47 private $numRowsProcessed = 0;
56 private $verboseStats;
62 private $collationName;
77 private $namespaceInfo;
80 parent::__construct();
83This script will find all rows in the categorylinks table whose collation is
85repopulate cl_sortkey
using the page title and cl_sortkey_prefix. If all
86collations are up-to-date, it will
do nothing.
91 $this->
addOption(
'force',
'Run on all rows, even if the collation is ' .
92 'supposed to be up-to-date.',
false,
false,
'f' );
93 $this->
addOption(
'previous-collation',
'Set the previous value of ' .
94 '$wgCategoryCollation here to speed up this script, especially if your ' .
95 'categorylinks table is large. This will only update rows with that ' .
96 'collation, though, so it may miss out-of-date rows with a different, ' .
97 'even older collation.',
false,
true );
98 $this->
addOption(
'target-collation',
'Set this to the new collation type to ' .
99 'use instead of $wgCategoryCollation. Usually you should not use this, ' .
100 'you should just update $wgCategoryCollation in LocalSettings.php.',
102 $this->
addOption(
'target-table',
'Copy rows from categorylinks into the ' .
103 'specified table instead of updating them in place.',
false,
true );
104 $this->
addOption(
'remote',
'Use Shellbox to calculate the new sort keys ' .
106 $this->
addOption(
'dry-run',
'Don\'t actually change the collations, just ' .
107 'compile statistics.' );
108 $this->
addOption(
'verbose-stats',
'Show more statistics.' );
114 private function init() {
115 $services = MediaWikiServices::getInstance();
116 $this->namespaceInfo = $services->getNamespaceInfo();
117 $this->lbFactory = $services->getDBLoadBalancerFactory();
119 if ( $this->
hasOption(
'target-collation' ) ) {
120 $this->collationName = $this->
getOption(
'target-collation' );
122 $this->collationName = $this->
getConfig()->get( MainConfigNames::CategoryCollation );
125 $realCollationName =
'remote-' . $this->collationName;
127 $realCollationName = $this->collationName;
129 $this->collation = $services->getCollationFactory()->makeCollation( $realCollationName );
133 $this->collation->getSortKey(
'MediaWiki' );
135 $this->force = $this->
getOption(
'force' );
136 $this->dryRun = $this->
getOption(
'dry-run' );
137 $this->verboseStats = $this->
getOption(
'verbose-stats' );
140 $this->targetTable = $this->
getOption(
'target-table' );
147 if ( $this->targetTable ) {
148 if ( !$this->dbw->tableExists( $this->targetTable, __METHOD__ ) ) {
149 $this->
output(
"Creating table {$this->targetTable}\n" );
151 'CREATE TABLE ' . $this->dbw->tableName( $this->targetTable ) .
152 ' LIKE ' . $this->dbw->tableName(
'categorylinks' ),
162 if ( $this->
hasOption(
'previous-collation' ) ) {
163 $orderBy =
'cl_to, cl_type, cl_from';
165 $orderBy =
'cl_collation, cl_to, cl_type, cl_from';
168 'LIMIT' => $batchSize,
169 'ORDER BY' => $orderBy,
173 $collationConds = [];
174 if ( !$this->force && !$this->targetTable ) {
175 if ( $this->
hasOption(
'previous-collation' ) ) {
176 $collationConds[
'cl_collation'] = $this->
getOption(
'previous-collation' );
179 0 =>
'cl_collation != ' . $this->dbr->addQuotes( $this->collationName )
183 $count = $this->dbr->estimateRowCount(
190 if ( $count < 1000000 ) {
191 $count = $this->dbr->selectField(
199 $this->
output(
"Collations up-to-date.\n" );
203 if ( $this->dryRun ) {
204 $this->
output(
"$count rows would be updated.\n" );
206 $this->
output(
"Fixing collation for $count rows.\n" );
211 $this->
output(
"Selecting next $batchSize rows..." );
215 if ( $this->dbw->getType() ===
'mysql' ) {
216 $clType =
'cl_type+0 AS "cl_type_numeric"';
220 $res = $this->dbw->select(
221 [
'categorylinks',
'page' ],
223 'cl_from',
'cl_to',
'cl_sortkey_prefix',
'cl_collation',
224 'cl_sortkey', $clType,
'cl_timestamp',
225 'page_namespace',
'page_title'
227 array_merge( $collationConds, $batchConds, [
'cl_from = page_id' ] ),
231 $this->
output(
" processing..." );
233 if (
$res->numRows() ) {
234 if ( $this->targetTable ) {
235 $this->copyBatch(
$res );
237 $this->updateBatch(
$res );
240 $lastRow =
$res->fetchObject();
241 $batchConds = [ $this->getBatchCondition( $lastRow, $this->dbw ) ];
244 if ( $this->dryRun ) {
245 $this->
output(
"{$this->numRowsProcessed} rows would be updated so far.\n" );
247 $this->
output(
"{$this->numRowsProcessed} done.\n" );
249 }
while (
$res->numRows() == $batchSize );
251 if ( !$this->dryRun ) {
252 $this->
output(
"{$this->numRowsProcessed} rows processed\n" );
255 if ( $this->verboseStats ) {
257 $this->showSortKeySizeHistogram();
268 private function getBatchCondition( $row, $dbw ) {
269 if ( $this->
hasOption(
'previous-collation' ) ) {
270 $fields = [
'cl_to',
'cl_type',
'cl_from' ];
272 $fields = [
'cl_collation',
'cl_to',
'cl_type',
'cl_from' ];
277 foreach ( $fields as $field ) {
278 if ( $dbw->
getType() ===
'mysql' && $field ===
'cl_type' ) {
281 $encValue = intval( $row->cl_type_numeric );
283 $encValue = $dbw->
addQuotes( $row->$field );
285 $inequality =
"$field > $encValue";
286 $equality =
"$field = $encValue";
293 $cond .=
" OR ($prefix AND $inequality)";
294 $prefix .=
" AND $equality";
306 private function updateBatch(
$res ) {
307 if ( !$this->dryRun ) {
310 foreach (
$res as $row ) {
312 if ( !$row->cl_collation ) {
313 # This is an old-style row, so the sortkey needs to be
315 if ( $row->cl_sortkey ==
$title->getText()
316 || $row->cl_sortkey ==
$title->getPrefixedText()
320 # Custom sortkey, use it as a prefix
321 $prefix = $row->cl_sortkey;
324 $prefix = $row->cl_sortkey_prefix;
326 # cl_type will be wrong for lots of pages if cl_collation is 0,
327 # so let's update it while we're here.
328 $type = $this->namespaceInfo->getCategoryLinkType( $row->page_namespace );
329 $newSortKey = $this->collation->getSortKey(
330 $title->getCategorySortkey( $prefix ) );
331 $this->updateSortKeySizeHistogram( $newSortKey );
333 $newSortKey = substr( $newSortKey, 0, 230 );
335 if ( $this->dryRun ) {
338 $this->numRowsProcessed += ( $row->cl_sortkey !== $newSortKey );
343 'cl_sortkey' => $newSortKey,
344 'cl_sortkey_prefix' => $prefix,
345 'cl_collation' => $this->collationName,
347 'cl_timestamp = cl_timestamp',
349 [
'cl_from' => $row->cl_from,
'cl_to' => $row->cl_to ],
352 $this->numRowsProcessed++;
355 if ( !$this->dryRun ) {
365 private function copyBatch(
$res ) {
367 foreach (
$res as $row ) {
369 $sortKeyInputs[] =
$title->getCategorySortkey( $row->cl_sortkey_prefix );
371 $sortKeys = $this->collation->getSortKeys( $sortKeyInputs );
373 foreach (
$res as $i => $row ) {
374 if ( !isset( $sortKeys[$i] ) ) {
377 $newSortKey = $sortKeys[$i];
378 $this->updateSortKeySizeHistogram( $newSortKey );
380 $newSortKey = substr( $newSortKey, 0, 230 );
381 $type = $this->namespaceInfo->getCategoryLinkType( $row->page_namespace );
383 'cl_from' => $row->cl_from,
384 'cl_to' => $row->cl_to,
385 'cl_sortkey' => $newSortKey,
386 'cl_sortkey_prefix' => $row->cl_sortkey_prefix,
387 'cl_collation' => $this->collationName,
389 'cl_timestamp' => $row->cl_timestamp
392 if ( $this->dryRun ) {
393 $this->numRowsProcessed += count( $rowsToInsert );
396 $this->dbw->insert( $this->targetTable, $rowsToInsert, __METHOD__, [
'IGNORE' ] );
397 $this->numRowsProcessed += $this->dbw->affectedRows();
407 private function updateSortKeySizeHistogram( $key ) {
408 if ( !$this->verboseStats ) {
411 $length = strlen( $key );
412 if ( !isset( $this->sizeHistogram[$length] ) ) {
413 $this->sizeHistogram[$length] = 0;
415 $this->sizeHistogram[$length]++;
421 private function showSortKeySizeHistogram() {
422 if ( !$this->sizeHistogram ) {
425 $maxLength = max( array_keys( $this->sizeHistogram ) );
426 if ( $maxLength == 0 ) {
430 $coarseHistogram = array_fill( 0, $numBins, 0 );
431 $coarseBoundaries = [];
433 for ( $i = 0; $i < $numBins - 1; $i++ ) {
434 $boundary += $maxLength / $numBins;
435 $coarseBoundaries[$i] = round( $boundary );
437 $coarseBoundaries[$numBins - 1] = $maxLength + 1;
439 for ( $i = 0; $i <= $maxLength; $i++ ) {
443 $val = $this->sizeHistogram[$i] ?? 0;
444 for ( $coarseIndex = 0; $coarseIndex < $numBins - 1; $coarseIndex++ ) {
446 if ( $coarseBoundaries[$coarseIndex] > $i ) {
447 $coarseHistogram[$coarseIndex] += $val;
451 if ( $coarseIndex == $numBins - 1 ) {
452 $coarseHistogram[$coarseIndex] += $val;
457 $this->
output(
"Sort key size histogram\nRaw data: $raw\n\n" );
459 $maxBinVal = max( $coarseHistogram );
460 $scale = (int)( 60 / $maxBinVal );
462 for ( $coarseIndex = 0; $coarseIndex < $numBins; $coarseIndex++ ) {
463 $val = $coarseHistogram[$coarseIndex] ?? 0;
465 $boundary = $coarseBoundaries[$coarseIndex];
466 $this->
output( sprintf(
"%-10s %-10d |%s\n",
467 $prevBoundary .
'-' . ( $boundary - 1 ) .
': ',
469 str_repeat(
'*', $scale * $val ) ) );
470 $prevBoundary = $boundary;
Abstract maintenance class for quickly writing and churning out maintenance scripts with minimal effo...
beginTransaction(IDatabase $dbw, $fname)
Begin a transaction on a DB.
commitTransaction(IDatabase $dbw, $fname)
Commit the transaction on a DB handle and wait for replica DBs to catch up.
output( $out, $channel=null)
Throw some output to the user.
hasOption( $name)
Checks to see if a particular option was set.
addDescription( $text)
Set the description text.
addOption( $name, $description, $required=false, $withArg=false, $shortName=false, $multiOccurrence=false)
Add a parameter to the script.
getOption( $name, $default=null)
Get an option, or return the default.