Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
| Total | |
0.00% |
0 / 104 |
|
0.00% |
0 / 3 |
CRAP | |
0.00% |
0 / 1 |
| RecountCategories | |
0.00% |
0 / 104 |
|
0.00% |
0 / 3 |
210 | |
0.00% |
0 / 1 |
| __construct | |
0.00% |
0 / 29 |
|
0.00% |
0 / 1 |
2 | |||
| execute | |
0.00% |
0 / 28 |
|
0.00% |
0 / 1 |
72 | |||
| doWork | |
0.00% |
0 / 47 |
|
0.00% |
0 / 1 |
30 | |||
| 1 | <?php |
| 2 | /** |
| 3 | * Refreshes category counts. |
| 4 | * |
| 5 | * @license GPL-2.0-or-later |
| 6 | * @file |
| 7 | * @ingroup Maintenance |
| 8 | */ |
| 9 | |
| 10 | use MediaWiki\Maintenance\Maintenance; |
| 11 | use Wikimedia\Rdbms\RawSQLExpression; |
| 12 | |
| 13 | // @codeCoverageIgnoreStart |
| 14 | require_once __DIR__ . '/Maintenance.php'; |
| 15 | // @codeCoverageIgnoreEnd |
| 16 | |
| 17 | /** |
| 18 | * Maintenance script that refreshes category membership counts in the category |
| 19 | * table. |
| 20 | * |
| 21 | * @ingroup Maintenance |
| 22 | */ |
| 23 | class RecountCategories extends Maintenance { |
| 24 | /** @var int */ |
| 25 | private $minimumId; |
| 26 | |
| 27 | public function __construct() { |
| 28 | parent::__construct(); |
| 29 | $this->addDescription( <<<'TEXT' |
| 30 | This script refreshes the category membership counts stored in the category |
| 31 | table. As time passes, these counts often drift from the actual number of |
| 32 | category members. The script identifies rows where the value in the category |
| 33 | table does not match the number of categorylinks rows for that category, and |
| 34 | updates the category table accordingly. |
| 35 | |
| 36 | To fully refresh the data in the category table, you need to run this script |
| 37 | for all three modes. Alternatively, just one mode can be run if required. |
| 38 | TEXT |
| 39 | ); |
| 40 | $this->addOption( |
| 41 | 'mode', |
| 42 | '(REQUIRED) Which category count column to recompute: "pages", "subcats", "files" or "all".', |
| 43 | true, |
| 44 | true |
| 45 | ); |
| 46 | $this->addOption( |
| 47 | 'begin', |
| 48 | 'Only recount categories with cat_id greater than the given value', |
| 49 | false, |
| 50 | true |
| 51 | ); |
| 52 | $this->addOption( |
| 53 | 'throttle', |
| 54 | 'Wait this many milliseconds after each batch. Default: 0', |
| 55 | false, |
| 56 | true |
| 57 | ); |
| 58 | |
| 59 | $this->addOption( |
| 60 | 'skip-cleanup', |
| 61 | 'Skip running cleanupEmptyCategories if the "page" mode is selected', |
| 62 | false, |
| 63 | false |
| 64 | ); |
| 65 | |
| 66 | $this->setBatchSize( 500 ); |
| 67 | } |
| 68 | |
| 69 | public function execute() { |
| 70 | $originalMode = $this->getOption( 'mode' ); |
| 71 | if ( !in_array( $originalMode, [ 'pages', 'subcats', 'files', 'all' ] ) ) { |
| 72 | $this->fatalError( 'Please specify a valid mode: one of "pages", "subcats", "files" or "all".' ); |
| 73 | } |
| 74 | |
| 75 | if ( $originalMode === 'all' ) { |
| 76 | $modes = [ 'pages', 'subcats', 'files' ]; |
| 77 | } else { |
| 78 | $modes = [ $originalMode ]; |
| 79 | } |
| 80 | |
| 81 | foreach ( $modes as $mode ) { |
| 82 | $this->output( "Starting to recount {$mode} counts.\n" ); |
| 83 | $this->minimumId = intval( $this->getOption( 'begin', 0 ) ); |
| 84 | |
| 85 | // do the work, batch by batch |
| 86 | $affectedRows = 0; |
| 87 | // phpcs:ignore Generic.CodeAnalysis.AssignmentInCondition.FoundInWhileCondition |
| 88 | while ( ( $result = $this->doWork( $mode ) ) !== false ) { |
| 89 | $affectedRows += $result; |
| 90 | usleep( $this->getOption( 'throttle', 0 ) * 1000 ); |
| 91 | } |
| 92 | |
| 93 | $this->output( "Updated the {$mode} counts of $affectedRows categories.\n" ); |
| 94 | } |
| 95 | |
| 96 | // Finished |
| 97 | $this->output( "Done!\n" ); |
| 98 | if ( $originalMode !== 'all' ) { |
| 99 | $this->output( "Now run the script using the other --mode options if you haven't already.\n" ); |
| 100 | } |
| 101 | |
| 102 | if ( in_array( 'pages', $modes ) ) { |
| 103 | if ( $this->hasOption( 'skip-cleanup' ) ) { |
| 104 | $this->output( |
| 105 | "Also run 'php cleanupEmptyCategories.php --mode remove' to remove empty,\n" . |
| 106 | "nonexistent categories from the category table.\n\n" ); |
| 107 | } else { |
| 108 | $this->output( "Running cleanupEmptyCategories.php\n" ); |
| 109 | $cleanup = $this->runChild( CleanupEmptyCategories::class ); |
| 110 | '@phan-var CleanupEmptyCategories $cleanup'; |
| 111 | // Pass no options into the child because of a parameter collision between "mode", which |
| 112 | // both scripts use but set to different values. We'll just use the defaults. |
| 113 | $cleanup->loadParamsAndArgs( $this->mSelf, [], [] ); |
| 114 | // Force execution because we want to run it regardless of whether it's been run before. |
| 115 | $cleanup->setForce( true ); |
| 116 | $cleanup->execute(); |
| 117 | } |
| 118 | } |
| 119 | } |
| 120 | |
| 121 | protected function doWork( string $mode ): int|false { |
| 122 | $this->output( "Finding up to {$this->getBatchSize()} drifted rows " . |
| 123 | "greater than cat_id {$this->minimumId}...\n" ); |
| 124 | |
| 125 | $dbr = $this->getDB( DB_REPLICA, 'vslow' ); |
| 126 | |
| 127 | $queryBuilder = $dbr->newSelectQueryBuilder() |
| 128 | ->select( 'COUNT(*)' ) |
| 129 | ->from( 'categorylinks' ) |
| 130 | ->join( 'linktarget', null, 'cl_target_id = lt_id' ) |
| 131 | ->where( [ |
| 132 | new RawSQLExpression( 'lt_title = cat_title' ), |
| 133 | 'lt_namespace' => NS_CATEGORY, |
| 134 | ] ); |
| 135 | |
| 136 | if ( $mode === 'subcats' ) { |
| 137 | $queryBuilder->andWhere( [ 'cl_type' => 'subcat' ] ); |
| 138 | } elseif ( $mode === 'files' ) { |
| 139 | $queryBuilder->andWhere( [ 'cl_type' => 'file' ] ); |
| 140 | } |
| 141 | |
| 142 | $countingSubquery = $queryBuilder->caller( __METHOD__ )->getSQL(); |
| 143 | |
| 144 | // First, let's find out which categories have drifted and need to be updated. |
| 145 | // The query counts the categorylinks for each category on the replica DB, |
| 146 | // but this data can't be used for updating the master, so we don't include it |
| 147 | // in the results. |
| 148 | $idsToUpdate = $dbr->newSelectQueryBuilder() |
| 149 | ->select( 'cat_id' ) |
| 150 | ->from( 'category' ) |
| 151 | ->where( [ $dbr->expr( 'cat_id', '>', (int)$this->minimumId ), "cat_{$mode} != ($countingSubquery)" ] ) |
| 152 | ->limit( $this->getBatchSize() ) |
| 153 | ->caller( __METHOD__ )->fetchFieldValues(); |
| 154 | if ( !$idsToUpdate ) { |
| 155 | return false; |
| 156 | } |
| 157 | $this->output( "Updating cat_{$mode} field on " . |
| 158 | count( $idsToUpdate ) . " rows...\n" ); |
| 159 | |
| 160 | // In the next batch, start where this query left off. The rows selected |
| 161 | // in this iteration shouldn't be selected again after being updated, but |
| 162 | // we still keep track of where we are up to, as extra protection against |
| 163 | // infinite loops. |
| 164 | $this->minimumId = end( $idsToUpdate ); |
| 165 | |
| 166 | // Now, on master, find the correct counts for these categories. |
| 167 | $dbw = $this->getPrimaryDB(); |
| 168 | $res = $dbw->newSelectQueryBuilder() |
| 169 | ->select( [ 'cat_id', 'count' => "($countingSubquery)" ] ) |
| 170 | ->from( 'category' ) |
| 171 | ->where( [ 'cat_id' => $idsToUpdate ] ) |
| 172 | ->caller( __METHOD__ )->fetchResultSet(); |
| 173 | |
| 174 | // Update the category counts on the rows we just identified. |
| 175 | // This logic is equivalent to Category::refreshCounts, except here, we |
| 176 | // don't remove rows when cat_pages is zero and the category description page |
| 177 | // doesn't exist - instead we print a suggestion to run |
| 178 | // cleanupEmptyCategories.php. |
| 179 | $affectedRows = 0; |
| 180 | foreach ( $res as $row ) { |
| 181 | $dbw->newUpdateQueryBuilder() |
| 182 | ->update( 'category' ) |
| 183 | ->set( [ "cat_{$mode}" => $row->count ] ) |
| 184 | ->where( [ |
| 185 | 'cat_id' => $row->cat_id, |
| 186 | $dbw->expr( "cat_{$mode}", '!=', (int)$row->count ), |
| 187 | ] ) |
| 188 | ->caller( __METHOD__ ) |
| 189 | ->execute(); |
| 190 | $affectedRows += $dbw->affectedRows(); |
| 191 | } |
| 192 | |
| 193 | $this->waitForReplication(); |
| 194 | |
| 195 | return $affectedRows; |
| 196 | } |
| 197 | } |
| 198 | |
| 199 | // @codeCoverageIgnoreStart |
| 200 | $maintClass = RecountCategories::class; |
| 201 | require_once RUN_MAINTENANCE_IF_MAIN; |
| 202 | // @codeCoverageIgnoreEnd |