Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
0.00% |
0 / 100 |
|
0.00% |
0 / 3 |
CRAP | |
0.00% |
0 / 1 |
RecountCategories | |
0.00% |
0 / 100 |
|
0.00% |
0 / 3 |
210 | |
0.00% |
0 / 1 |
__construct | |
0.00% |
0 / 29 |
|
0.00% |
0 / 1 |
2 | |||
execute | |
0.00% |
0 / 28 |
|
0.00% |
0 / 1 |
72 | |||
doWork | |
0.00% |
0 / 43 |
|
0.00% |
0 / 1 |
30 |
1 | <?php |
2 | /** |
3 | * Refreshes category counts. |
4 | * |
5 | * This program is free software; you can redistribute it and/or modify |
6 | * it under the terms of the GNU General Public License as published by |
7 | * the Free Software Foundation; either version 2 of the License, or |
8 | * (at your option) any later version. |
9 | * |
10 | * This program is distributed in the hope that it will be useful, |
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
13 | * GNU General Public License for more details. |
14 | * |
15 | * You should have received a copy of the GNU General Public License along |
16 | * with this program; if not, write to the Free Software Foundation, Inc., |
17 | * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. |
18 | * http://www.gnu.org/copyleft/gpl.html |
19 | * |
20 | * @file |
21 | * @ingroup Maintenance |
22 | */ |
23 | |
24 | use MediaWiki\Maintenance\Maintenance; |
25 | |
26 | // @codeCoverageIgnoreStart |
27 | require_once __DIR__ . '/Maintenance.php'; |
28 | // @codeCoverageIgnoreEnd |
29 | |
30 | /** |
31 | * Maintenance script that refreshes category membership counts in the category |
32 | * table. |
33 | * |
34 | * @ingroup Maintenance |
35 | */ |
36 | class RecountCategories extends Maintenance { |
37 | /** @var int */ |
38 | private $minimumId; |
39 | |
40 | public function __construct() { |
41 | parent::__construct(); |
42 | $this->addDescription( <<<'TEXT' |
43 | This script refreshes the category membership counts stored in the category |
44 | table. As time passes, these counts often drift from the actual number of |
45 | category members. The script identifies rows where the value in the category |
46 | table does not match the number of categorylinks rows for that category, and |
47 | updates the category table accordingly. |
48 | |
49 | To fully refresh the data in the category table, you need to run this script |
50 | for all three modes. Alternatively, just one mode can be run if required. |
51 | TEXT |
52 | ); |
53 | $this->addOption( |
54 | 'mode', |
55 | '(REQUIRED) Which category count column to recompute: "pages", "subcats", "files" or "all".', |
56 | true, |
57 | true |
58 | ); |
59 | $this->addOption( |
60 | 'begin', |
61 | 'Only recount categories with cat_id greater than the given value', |
62 | false, |
63 | true |
64 | ); |
65 | $this->addOption( |
66 | 'throttle', |
67 | 'Wait this many milliseconds after each batch. Default: 0', |
68 | false, |
69 | true |
70 | ); |
71 | |
72 | $this->addOption( |
73 | 'skip-cleanup', |
74 | 'Skip running cleanupEmptyCategories if the "page" mode is selected', |
75 | false, |
76 | false |
77 | ); |
78 | |
79 | $this->setBatchSize( 500 ); |
80 | } |
81 | |
82 | public function execute() { |
83 | $originalMode = $this->getOption( 'mode' ); |
84 | if ( !in_array( $originalMode, [ 'pages', 'subcats', 'files', 'all' ] ) ) { |
85 | $this->fatalError( 'Please specify a valid mode: one of "pages", "subcats", "files" or "all".' ); |
86 | } |
87 | |
88 | if ( $originalMode === 'all' ) { |
89 | $modes = [ 'pages', 'subcats', 'files' ]; |
90 | } else { |
91 | $modes = [ $originalMode ]; |
92 | } |
93 | |
94 | foreach ( $modes as $mode ) { |
95 | $this->output( "Starting to recount {$mode} counts.\n" ); |
96 | $this->minimumId = intval( $this->getOption( 'begin', 0 ) ); |
97 | |
98 | // do the work, batch by batch |
99 | $affectedRows = 0; |
100 | // phpcs:ignore Generic.CodeAnalysis.AssignmentInCondition.FoundInWhileCondition |
101 | while ( ( $result = $this->doWork( $mode ) ) !== false ) { |
102 | $affectedRows += $result; |
103 | usleep( $this->getOption( 'throttle', 0 ) * 1000 ); |
104 | } |
105 | |
106 | $this->output( "Updated the {$mode} counts of $affectedRows categories.\n" ); |
107 | } |
108 | |
109 | // Finished |
110 | $this->output( "Done!\n" ); |
111 | if ( $originalMode !== 'all' ) { |
112 | $this->output( "Now run the script using the other --mode options if you haven't already.\n" ); |
113 | } |
114 | |
115 | if ( in_array( 'pages', $modes ) ) { |
116 | if ( $this->hasOption( 'skip-cleanup' ) ) { |
117 | $this->output( |
118 | "Also run 'php cleanupEmptyCategories.php --mode remove' to remove empty,\n" . |
119 | "nonexistent categories from the category table.\n\n" ); |
120 | } else { |
121 | $this->output( "Running cleanupEmptyCategories.php\n" ); |
122 | $cleanup = $this->runChild( CleanupEmptyCategories::class ); |
123 | '@phan-var CleanupEmptyCategories $cleanup'; |
124 | // Pass no options into the child because of a parameter collision between "mode", which |
125 | // both scripts use but set to different values. We'll just use the defaults. |
126 | $cleanup->loadParamsAndArgs( $this->mSelf, [], [] ); |
127 | // Force execution because we want to run it regardless of whether it's been run before. |
128 | $cleanup->setForce( true ); |
129 | $cleanup->execute(); |
130 | } |
131 | } |
132 | } |
133 | |
134 | protected function doWork( $mode ) { |
135 | $this->output( "Finding up to {$this->getBatchSize()} drifted rows " . |
136 | "greater than cat_id {$this->minimumId}...\n" ); |
137 | |
138 | $dbr = $this->getDB( DB_REPLICA, 'vslow' ); |
139 | $queryBuilder = $dbr->newSelectQueryBuilder() |
140 | ->select( 'COUNT(*)' ) |
141 | ->from( 'categorylinks' ) |
142 | ->where( 'cl_to = cat_title' ); |
143 | if ( $mode === 'subcats' ) { |
144 | $queryBuilder->andWhere( [ 'cl_type' => 'subcat' ] ); |
145 | } elseif ( $mode === 'files' ) { |
146 | $queryBuilder->andWhere( [ 'cl_type' => 'file' ] ); |
147 | } |
148 | |
149 | $countingSubquery = $queryBuilder->caller( __METHOD__ )->getSQL(); |
150 | |
151 | // First, let's find out which categories have drifted and need to be updated. |
152 | // The query counts the categorylinks for each category on the replica DB, |
153 | // but this data can't be used for updating the master, so we don't include it |
154 | // in the results. |
155 | $idsToUpdate = $dbr->newSelectQueryBuilder() |
156 | ->select( 'cat_id' ) |
157 | ->from( 'category' ) |
158 | ->where( [ $dbr->expr( 'cat_id', '>', (int)$this->minimumId ), "cat_{$mode} != ($countingSubquery)" ] ) |
159 | ->limit( $this->getBatchSize() ) |
160 | ->caller( __METHOD__ )->fetchFieldValues(); |
161 | if ( !$idsToUpdate ) { |
162 | return false; |
163 | } |
164 | $this->output( "Updating cat_{$mode} field on " . |
165 | count( $idsToUpdate ) . " rows...\n" ); |
166 | |
167 | // In the next batch, start where this query left off. The rows selected |
168 | // in this iteration shouldn't be selected again after being updated, but |
169 | // we still keep track of where we are up to, as extra protection against |
170 | // infinite loops. |
171 | $this->minimumId = end( $idsToUpdate ); |
172 | |
173 | // Now, on master, find the correct counts for these categories. |
174 | $dbw = $this->getPrimaryDB(); |
175 | $res = $dbw->newSelectQueryBuilder() |
176 | ->select( [ 'cat_id', 'count' => "($countingSubquery)" ] ) |
177 | ->from( 'category' ) |
178 | ->where( [ 'cat_id' => $idsToUpdate ] ) |
179 | ->caller( __METHOD__ )->fetchResultSet(); |
180 | |
181 | // Update the category counts on the rows we just identified. |
182 | // This logic is equivalent to Category::refreshCounts, except here, we |
183 | // don't remove rows when cat_pages is zero and the category description page |
184 | // doesn't exist - instead we print a suggestion to run |
185 | // cleanupEmptyCategories.php. |
186 | $affectedRows = 0; |
187 | foreach ( $res as $row ) { |
188 | $dbw->newUpdateQueryBuilder() |
189 | ->update( 'category' ) |
190 | ->set( [ "cat_{$mode}" => $row->count ] ) |
191 | ->where( [ |
192 | 'cat_id' => $row->cat_id, |
193 | $dbw->expr( "cat_{$mode}", '!=', (int)$row->count ), |
194 | ] ) |
195 | ->caller( __METHOD__ ) |
196 | ->execute(); |
197 | $affectedRows += $dbw->affectedRows(); |
198 | } |
199 | |
200 | $this->waitForReplication(); |
201 | |
202 | return $affectedRows; |
203 | } |
204 | } |
205 | |
206 | // @codeCoverageIgnoreStart |
207 | $maintClass = RecountCategories::class; |
208 | require_once RUN_MAINTENANCE_IF_MAIN; |
209 | // @codeCoverageIgnoreEnd |