46 public $sizeHistogram = [];
49 private $numRowsProcessed = 0;
58 private $verboseStats;
64 private $collationName;
76 private $namespaceInfo;
79 parent::__construct();
82This script will find all rows in the categorylinks table whose collation is
84repopulate cl_sortkey
using the page title and cl_sortkey_prefix. If all
85collations are up-to-date, it will
do nothing.
90 $this->
addOption(
'force',
'Run on all rows, even if the collation is ' .
91 'supposed to be up-to-date.',
false,
false,
'f' );
92 $this->
addOption(
'previous-collation',
'Set the previous value of ' .
93 '$wgCategoryCollation here to speed up this script, especially if your ' .
94 'categorylinks table is large. This will only update rows with that ' .
95 'collation, though, so it may miss out-of-date rows with a different, ' .
96 'even older collation.',
false,
true );
97 $this->
addOption(
'target-collation',
'Set this to the new collation type to ' .
98 'use instead of $wgCategoryCollation. Usually you should not use this, ' .
99 'you should just update $wgCategoryCollation in LocalSettings.php.',
101 $this->
addOption(
'target-table',
'Copy rows from categorylinks into the ' .
102 'specified table instead of updating them in place.',
false,
true );
103 $this->
addOption(
'remote',
'Use Shellbox to calculate the new sort keys ' .
105 $this->
addOption(
'dry-run',
'Don\'t actually change the collations, just ' .
106 'compile statistics.' );
107 $this->
addOption(
'verbose-stats',
'Show more statistics.' );
113 private function init() {
114 $services = $this->getServiceContainer();
115 $this->namespaceInfo = $services->getNamespaceInfo();
117 if ( $this->hasOption(
'target-collation' ) ) {
118 $this->collationName = $this->getOption(
'target-collation' );
120 $this->collationName = $this->
getConfig()->get( MainConfigNames::CategoryCollation );
123 $realCollationName =
'remote-' . $this->collationName;
125 $realCollationName = $this->collationName;
127 $this->collation = $services->getCollationFactory()->makeCollation( $realCollationName );
131 $this->collation->getSortKey(
'MediaWiki' );
133 $this->force = $this->
getOption(
'force' );
134 $this->dryRun = $this->
getOption(
'dry-run' );
135 $this->verboseStats = $this->
getOption(
'verbose-stats' );
138 $this->targetTable = $this->
getOption(
'target-table' );
145 if ( $this->targetTable ) {
146 if ( !$this->dbw->tableExists( $this->targetTable, __METHOD__ ) ) {
147 $this->
output(
"Creating table {$this->targetTable}\n" );
149 'CREATE TABLE ' . $this->dbw->tableName( $this->targetTable ) .
150 ' LIKE ' . $this->dbw->tableName(
'categorylinks' ),
156 $collationConds = [];
157 if ( !$this->force && !$this->targetTable ) {
158 if ( $this->
hasOption(
'previous-collation' ) ) {
159 $collationConds[
'cl_collation'] = $this->
getOption(
'previous-collation' );
161 $collationConds[] = $this->dbr->expr(
'cl_collation',
'!=', $this->collationName );
164 $maxPageId = (int)$this->dbr->newSelectQueryBuilder()
165 ->select(
'MAX(page_id)' )
167 ->caller( __METHOD__ )->fetchField();
170 $this->
output(
"Selecting next $batchSize pages from cl_from = $batchValue... " );
174 if ( $this->dbw->getType() ===
'mysql' ) {
175 $clType =
'cl_type+0 AS "cl_type_numeric"';
179 $res = $this->dbw->newSelectQueryBuilder()
181 'cl_from',
'cl_to',
'cl_sortkey_prefix',
'cl_collation',
182 'cl_sortkey', $clType,
'cl_timestamp',
183 'page_namespace',
'page_title'
185 ->from(
'categorylinks' )
187 ->straightJoin(
'page',
null,
'cl_from = page_id' )
188 ->where( $collationConds )
190 $this->dbw->expr(
'cl_from',
'>=', $batchValue )
191 ->and(
'cl_from',
'<', $batchValue + $this->getBatchSize() )
193 ->orderBy(
'cl_from' )
194 ->caller( __METHOD__ )->fetchResultSet();
195 $this->
output(
"processing... " );
198 if ( $this->targetTable ) {
199 $this->copyBatch( $res );
201 $this->updateBatch( $res );
206 if ( $this->dryRun ) {
207 $this->
output(
"{$this->numRowsProcessed} rows would be updated so far.\n" );
209 $this->
output(
"{$this->numRowsProcessed} done.\n" );
211 }
while ( $maxPageId >= $batchValue );
213 if ( !$this->dryRun ) {
214 $this->
output(
"{$this->numRowsProcessed} rows processed\n" );
217 if ( $this->verboseStats ) {
219 $this->showSortKeySizeHistogram();
227 if ( !$this->dryRun ) {
228 $this->beginTransaction( $this->dbw, __METHOD__ );
230 foreach ( $res as $row ) {
231 $title = Title::newFromRow( $row );
232 if ( !$row->cl_collation ) {
233 # This is an old-style row, so the sortkey needs to be
235 if ( $row->cl_sortkey === $title->getText()
236 || $row->cl_sortkey === $title->getPrefixedText()
240 # Custom sortkey, so use it as a prefix
241 $prefix = $row->cl_sortkey;
244 $prefix = $row->cl_sortkey_prefix;
246 # cl_type will be wrong for lots of pages if cl_collation is 0,
247 # so let's update it while we're here.
248 $type = $this->namespaceInfo->getCategoryLinkType( $row->page_namespace );
249 $newSortKey = $this->collation->getSortKey(
250 $title->getCategorySortkey( $prefix ) );
251 $this->updateSortKeySizeHistogram( $newSortKey );
253 $newSortKey = substr( $newSortKey, 0, 230 );
255 if ( $this->dryRun ) {
258 $this->numRowsProcessed += ( $row->cl_sortkey !== $newSortKey );
260 $this->dbw->newUpdateQueryBuilder()
261 ->update(
'categorylinks' )
263 'cl_sortkey' => $newSortKey,
264 'cl_sortkey_prefix' => $prefix,
265 'cl_collation' => $this->collationName,
267 'cl_timestamp = cl_timestamp',
269 ->where( [
'cl_from' => $row->cl_from,
'cl_to' => $row->cl_to ] )
270 ->caller( __METHOD__ )
272 $this->numRowsProcessed++;
275 if ( !$this->dryRun ) {
285 foreach ( $res as $row ) {
286 $title = Title::newFromRow( $row );
287 $sortKeyInputs[] = $title->getCategorySortkey( $row->cl_sortkey_prefix );
289 $sortKeys = $this->collation->getSortKeys( $sortKeyInputs );
291 foreach ( $res as $i => $row ) {
292 if ( !isset( $sortKeys[$i] ) ) {
293 throw new RuntimeException(
'Unable to get sort key' );
295 $newSortKey = $sortKeys[$i];
296 $this->updateSortKeySizeHistogram( $newSortKey );
298 $newSortKey = substr( $newSortKey, 0, 230 );
299 $type = $this->namespaceInfo->getCategoryLinkType( $row->page_namespace );
301 'cl_from' => $row->cl_from,
302 'cl_to' => $row->cl_to,
303 'cl_sortkey' => $newSortKey,
304 'cl_sortkey_prefix' => $row->cl_sortkey_prefix,
305 'cl_collation' => $this->collationName,
307 'cl_timestamp' => $row->cl_timestamp
310 if ( $this->dryRun ) {
311 $this->numRowsProcessed += count( $rowsToInsert );
314 $this->dbw->newInsertQueryBuilder()
315 ->insertInto( $this->targetTable )
317 ->rows( $rowsToInsert )
318 ->caller( __METHOD__ )->execute();
319 $this->numRowsProcessed += $this->dbw->affectedRows();
327 private function updateSortKeySizeHistogram(
string $key ) {
328 if ( !$this->verboseStats ) {
331 $length = strlen( $key );
332 if ( !isset( $this->sizeHistogram[$length] ) ) {
333 $this->sizeHistogram[$length] = 0;
335 $this->sizeHistogram[$length]++;
341 private function showSortKeySizeHistogram() {
342 if ( !$this->sizeHistogram ) {
345 $maxLength = max( array_keys( $this->sizeHistogram ) );
346 if ( $maxLength === 0 ) {
350 $coarseHistogram = array_fill( 0, $numBins, 0 );
351 $coarseBoundaries = [];
353 for ( $i = 0; $i < $numBins - 1; $i++ ) {
354 $boundary += $maxLength / $numBins;
355 $coarseBoundaries[$i] = round( $boundary );
357 $coarseBoundaries[$numBins - 1] = $maxLength + 1;
359 for ( $i = 0; $i <= $maxLength; $i++ ) {
363 $val = $this->sizeHistogram[$i] ?? 0;
364 for ( $coarseIndex = 0; $coarseIndex < $numBins - 1; $coarseIndex++ ) {
366 if ( $coarseBoundaries[$coarseIndex] > $i ) {
367 $coarseHistogram[$coarseIndex] += $val;
371 if ( $coarseIndex === ( $numBins - 1 ) ) {
372 $coarseHistogram[$coarseIndex] += $val;
377 $this->
output(
"Sort key size histogram\nRaw data: $raw\n\n" );
379 $maxBinVal = max( $coarseHistogram );
380 $scale = (int)( 60 / $maxBinVal );
382 for ( $coarseIndex = 0; $coarseIndex < $numBins; $coarseIndex++ ) {
383 $val = $coarseHistogram[$coarseIndex] ?? 0;
385 $boundary = $coarseBoundaries[$coarseIndex];
387 sprintf(
"%-10s %-10d |%s\n",
388 $prevBoundary .
'-' . ( $boundary - 1 ) .
': ',
390 str_repeat(
'*', $scale * $val )
393 $prevBoundary = $boundary;
Abstract maintenance class for quickly writing and churning out maintenance scripts with minimal effo...
beginTransaction(IDatabase $dbw, $fname)
Begin a transaction on a DB.
commitTransaction(IDatabase $dbw, $fname)
Commit the transaction on a DB handle and wait for replica DBs to catch up.
output( $out, $channel=null)
Throw some output to the user.
hasOption( $name)
Checks to see if a particular option was set.
addDescription( $text)
Set the description text.
addOption( $name, $description, $required=false, $withArg=false, $shortName=false, $multiOccurrence=false)
Add a parameter to the script.
getOption( $name, $default=null)
Get an option, or return the default.