27require_once __DIR__ .
'/Maintenance.php';
44 parent::__construct();
47This script will find all rows in the categorylinks table whose collation is
49repopulate cl_sortkey
using the page title and cl_sortkey_prefix. If all
50collations are up-to-date, it will
do nothing.
54 $this->
addOption(
'force',
'Run on all rows, even if the collation is ' .
55 'supposed to be up-to-date.',
false,
false,
'f' );
56 $this->
addOption(
'previous-collation',
'Set the previous value of ' .
57 '$wgCategoryCollation here to speed up this script, especially if your ' .
58 'categorylinks table is large. This will only update rows with that ' .
59 'collation, though, so it may miss out-of-date rows with a different, ' .
60 'even older collation.',
false,
true );
61 $this->
addOption(
'target-collation',
'Set this to the new collation type to ' .
62 'use instead of $wgCategoryCollation. Usually you should not use this, ' .
63 'you should just update $wgCategoryCollation in LocalSettings.php.',
65 $this->
addOption(
'dry-run',
'Don\'t actually change the collations, just ' .
66 'compile statistics.' );
67 $this->
addOption(
'verbose-stats',
'Show more statistics.' );
75 $verboseStats = $this->
getOption(
'verbose-stats' );
76 if ( $this->
hasOption(
'target-collation' ) ) {
77 $collationName = $this->
getOption(
'target-collation' );
79 $collationName = $this->
getConfig()->get(
'CategoryCollation' );
81 $collation = MediaWikiServices::getInstance()->getCollationFactory()->makeCollation( $collationName );
85 $collation->getFirstLetter(
'MediaWiki' );
91 if ( $this->
hasOption(
'previous-collation' ) ) {
92 $orderBy =
'cl_to, cl_type, cl_from';
94 $orderBy =
'cl_collation, cl_to, cl_type, cl_from';
98 'ORDER BY' => $orderBy,
102 $collationConds = [];
104 if ( $this->
hasOption(
'previous-collation' ) ) {
105 $collationConds[
'cl_collation'] = $this->
getOption(
'previous-collation' );
107 $collationConds = [ 0 =>
108 'cl_collation != ' . $dbw->addQuotes( $collationName )
112 $count =
$dbr->estimateRowCount(
119 if ( $count < 1000000 ) {
120 $count =
$dbr->selectField(
128 $this->
output(
"Collations up-to-date.\n" );
133 $this->
output(
"$count rows would be updated.\n" );
135 $this->
output(
"Fixing collation for $count rows.\n" );
137 MediaWikiServices::getInstance()->getDBLoadBalancerFactory()->waitForReplication();
142 $this->
output(
"Selecting next " . self::BATCH_SIZE .
" rows..." );
146 if ( $dbw->getType() ===
'mysql' ) {
147 $clType =
'cl_type+0 AS "cl_type_numeric"';
152 [
'categorylinks',
'page' ],
153 [
'cl_from',
'cl_to',
'cl_sortkey_prefix',
'cl_collation',
154 'cl_sortkey', $clType,
155 'page_namespace',
'page_title'
157 array_merge( $collationConds, $batchConds, [
'cl_from = page_id' ] ),
161 $this->
output(
" processing..." );
166 foreach (
$res as $row ) {
167 $title = Title::newFromRow( $row );
168 if ( !$row->cl_collation ) {
169 # This is an old-style row, so the sortkey needs to be
171 if ( $row->cl_sortkey ==
$title->getText()
172 || $row->cl_sortkey ==
$title->getPrefixedText()
176 # Custom sortkey, use it as a prefix
177 $prefix = $row->cl_sortkey;
180 $prefix = $row->cl_sortkey_prefix;
182 # cl_type will be wrong for lots of pages if cl_collation is 0,
183 # so let's update it while we're here.
184 $type = MediaWikiServices::getInstance()->getNamespaceInfo()->
185 getCategoryLinkType(
$title->getNamespace() );
186 $newSortKey = $collation->getSortKey(
187 $title->getCategorySortkey( $prefix ) );
188 if ( $verboseStats ) {
195 $count += ( $row->cl_sortkey !== $newSortKey );
200 'cl_sortkey' => $newSortKey,
201 'cl_sortkey_prefix' => $prefix,
202 'cl_collation' => $collationName,
204 'cl_timestamp = cl_timestamp',
206 [
'cl_from' => $row->cl_from,
'cl_to' => $row->cl_to ],
220 $this->
output(
"$count rows would be updated so far.\n" );
222 $this->
output(
"$count done.\n" );
224 }
while (
$res->numRows() == self::BATCH_SIZE );
227 $this->
output(
"$count rows processed\n" );
230 if ( $verboseStats ) {
244 if ( $this->
hasOption(
'previous-collation' ) ) {
245 $fields = [
'cl_to',
'cl_type',
'cl_from' ];
247 $fields = [
'cl_collation',
'cl_to',
'cl_type',
'cl_from' ];
252 foreach ( $fields as $field ) {
253 if ( $dbw->getType() ===
'mysql' && $field ===
'cl_type' ) {
256 $encValue = intval( $row->cl_type_numeric );
258 $encValue = $dbw->addQuotes( $row->$field );
260 $inequality =
"$field > $encValue";
261 $equality =
"$field = $encValue";
267 $cond .=
" OR ($prefix AND $inequality)";
268 $prefix .=
" AND $equality";
276 $length = strlen( $key );
277 if ( !isset( $this->sizeHistogram[$length] ) ) {
278 $this->sizeHistogram[$length] = 0;
280 $this->sizeHistogram[$length]++;
284 if ( !$this->sizeHistogram ) {
287 $maxLength = max( array_keys( $this->sizeHistogram ) );
288 if ( $maxLength == 0 ) {
292 $coarseHistogram = array_fill( 0, $numBins, 0 );
293 $coarseBoundaries = [];
295 for ( $i = 0; $i < $numBins - 1; $i++ ) {
296 $boundary += $maxLength / $numBins;
297 $coarseBoundaries[$i] = round( $boundary );
299 $coarseBoundaries[$numBins - 1] = $maxLength + 1;
301 for ( $i = 0; $i <= $maxLength; $i++ ) {
305 $val = $this->sizeHistogram[$i] ?? 0;
306 for ( $coarseIndex = 0; $coarseIndex < $numBins - 1; $coarseIndex++ ) {
307 if ( $coarseBoundaries[$coarseIndex] > $i ) {
308 $coarseHistogram[$coarseIndex] += $val;
312 if ( $coarseIndex == $numBins - 1 ) {
313 $coarseHistogram[$coarseIndex] += $val;
318 $this->
output(
"Sort key size histogram\nRaw data: $raw\n\n" );
320 $maxBinVal = max( $coarseHistogram );
321 $scale = 60 / $maxBinVal;
323 for ( $coarseIndex = 0; $coarseIndex < $numBins; $coarseIndex++ ) {
324 $val = $coarseHistogram[$coarseIndex] ?? 0;
325 $boundary = $coarseBoundaries[$coarseIndex];
326 $this->
output( sprintf(
"%-10s %-10d |%s\n",
327 $prevBoundary .
'-' . ( $boundary - 1 ) .
': ',
329 str_repeat(
'*', $scale * $val ) ) );
330 $prevBoundary = $boundary;
336require_once RUN_MAINTENANCE_IF_MAIN;
$wgCategoryCollation
Specify how category names should be sorted, when listed on a category page.
Abstract maintenance class for quickly writing and churning out maintenance scripts with minimal effo...
beginTransaction(IDatabase $dbw, $fname)
Begin a transaction on a DB.
commitTransaction(IDatabase $dbw, $fname)
Commit the transaction on a DB handle and wait for replica DBs to catch up.
output( $out, $channel=null)
Throw some output to the user.
hasOption( $name)
Checks to see if a particular option was set.
addDescription( $text)
Set the description text.
addOption( $name, $description, $required=false, $withArg=false, $shortName=false, $multiOccurrence=false)
Add a parameter to the script.
getOption( $name, $default=null)
Get an option, or return the default.
Maintenance script that will find all rows in the categorylinks table whose collation is out-of-date.
showSortKeySizeHistogram()
execute()
Do the actual work.
__construct()
Default constructor.
updateSortKeySizeHistogram( $key)
getBatchCondition( $row, $dbw)
Return an SQL expression selecting rows which sort above the given row, assuming an ordering of cl_co...