Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
0.00% |
0 / 103 |
|
0.00% |
0 / 3 |
CRAP | |
0.00% |
0 / 1 |
RecountCategories | |
0.00% |
0 / 100 |
|
0.00% |
0 / 3 |
210 | |
0.00% |
0 / 1 |
__construct | |
0.00% |
0 / 29 |
|
0.00% |
0 / 1 |
2 | |||
execute | |
0.00% |
0 / 28 |
|
0.00% |
0 / 1 |
72 | |||
doWork | |
0.00% |
0 / 43 |
|
0.00% |
0 / 1 |
30 |
1 | <?php |
2 | /** |
3 | * Refreshes category counts. |
4 | * |
5 | * This program is free software; you can redistribute it and/or modify |
6 | * it under the terms of the GNU General Public License as published by |
7 | * the Free Software Foundation; either version 2 of the License, or |
8 | * (at your option) any later version. |
9 | * |
10 | * This program is distributed in the hope that it will be useful, |
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
13 | * GNU General Public License for more details. |
14 | * |
15 | * You should have received a copy of the GNU General Public License along |
16 | * with this program; if not, write to the Free Software Foundation, Inc., |
17 | * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. |
18 | * http://www.gnu.org/copyleft/gpl.html |
19 | * |
20 | * @file |
21 | * @ingroup Maintenance |
22 | */ |
23 | |
24 | require_once __DIR__ . '/Maintenance.php'; |
25 | |
26 | /** |
27 | * Maintenance script that refreshes category membership counts in the category |
28 | * table. |
29 | * |
30 | * @ingroup Maintenance |
31 | */ |
32 | class RecountCategories extends Maintenance { |
33 | /** @var int */ |
34 | private $minimumId; |
35 | |
36 | public function __construct() { |
37 | parent::__construct(); |
38 | $this->addDescription( <<<'TEXT' |
39 | This script refreshes the category membership counts stored in the category |
40 | table. As time passes, these counts often drift from the actual number of |
41 | category members. The script identifies rows where the value in the category |
42 | table does not match the number of categorylinks rows for that category, and |
43 | updates the category table accordingly. |
44 | |
45 | To fully refresh the data in the category table, you need to run this script |
46 | for all three modes. Alternatively, just one mode can be run if required. |
47 | TEXT |
48 | ); |
49 | $this->addOption( |
50 | 'mode', |
51 | '(REQUIRED) Which category count column to recompute: "pages", "subcats", "files" or "all".', |
52 | true, |
53 | true |
54 | ); |
55 | $this->addOption( |
56 | 'begin', |
57 | 'Only recount categories with cat_id greater than the given value', |
58 | false, |
59 | true |
60 | ); |
61 | $this->addOption( |
62 | 'throttle', |
63 | 'Wait this many milliseconds after each batch. Default: 0', |
64 | false, |
65 | true |
66 | ); |
67 | |
68 | $this->addOption( |
69 | 'skip-cleanup', |
70 | 'Skip running cleanupEmptyCategories if the "page" mode is selected', |
71 | false, |
72 | false |
73 | ); |
74 | |
75 | $this->setBatchSize( 500 ); |
76 | } |
77 | |
78 | public function execute() { |
79 | $originalMode = $this->getOption( 'mode' ); |
80 | if ( !in_array( $originalMode, [ 'pages', 'subcats', 'files', 'all' ] ) ) { |
81 | $this->fatalError( 'Please specify a valid mode: one of "pages", "subcats", "files" or "all".' ); |
82 | } |
83 | |
84 | if ( $originalMode === 'all' ) { |
85 | $modes = [ 'pages', 'subcats', 'files' ]; |
86 | } else { |
87 | $modes = [ $originalMode ]; |
88 | } |
89 | |
90 | foreach ( $modes as $mode ) { |
91 | $this->output( "Starting to recount {$mode} counts.\n" ); |
92 | $this->minimumId = intval( $this->getOption( 'begin', 0 ) ); |
93 | |
94 | // do the work, batch by batch |
95 | $affectedRows = 0; |
96 | while ( ( $result = $this->doWork( $mode ) ) !== false ) { |
97 | $affectedRows += $result; |
98 | usleep( $this->getOption( 'throttle', 0 ) * 1000 ); |
99 | } |
100 | |
101 | $this->output( "Updated the {$mode} counts of $affectedRows categories.\n" ); |
102 | } |
103 | |
104 | // Finished |
105 | $this->output( "Done!\n" ); |
106 | if ( $originalMode !== 'all' ) { |
107 | $this->output( "Now run the script using the other --mode options if you haven't already.\n" ); |
108 | } |
109 | |
110 | if ( in_array( 'pages', $modes ) ) { |
111 | if ( $this->hasOption( 'skip-cleanup' ) ) { |
112 | $this->output( |
113 | "Also run 'php cleanupEmptyCategories.php --mode remove' to remove empty,\n" . |
114 | "nonexistent categories from the category table.\n\n" ); |
115 | } else { |
116 | $this->output( "Running cleanupEmptyCategories.php\n" ); |
117 | $cleanup = $this->runChild( CleanupEmptyCategories::class ); |
118 | '@phan-var CleanupEmptyCategories $cleanup'; |
119 | // Pass no options into the child because of a parameter collision between "mode", which |
120 | // both scripts use but set to different values. We'll just use the defaults. |
121 | $cleanup->loadParamsAndArgs( $this->mSelf, [], [] ); |
122 | // Force execution because we want to run it regardless of whether it's been run before. |
123 | $cleanup->setForce( true ); |
124 | $cleanup->execute(); |
125 | } |
126 | } |
127 | } |
128 | |
129 | protected function doWork( $mode ) { |
130 | $this->output( "Finding up to {$this->getBatchSize()} drifted rows " . |
131 | "greater than cat_id {$this->minimumId}...\n" ); |
132 | |
133 | $dbr = $this->getDB( DB_REPLICA, 'vslow' ); |
134 | $queryBuilder = $dbr->newSelectQueryBuilder() |
135 | ->select( 'COUNT(*)' ) |
136 | ->from( 'categorylinks' ) |
137 | ->where( 'cl_to = cat_title' ); |
138 | if ( $mode === 'subcats' ) { |
139 | $queryBuilder->andWhere( [ 'cl_type' => 'subcat' ] ); |
140 | } elseif ( $mode === 'files' ) { |
141 | $queryBuilder->andWhere( [ 'cl_type' => 'file' ] ); |
142 | } |
143 | |
144 | $countingSubquery = $queryBuilder->caller( __METHOD__ )->getSQL(); |
145 | |
146 | // First, let's find out which categories have drifted and need to be updated. |
147 | // The query counts the categorylinks for each category on the replica DB, |
148 | // but this data can't be used for updating the master, so we don't include it |
149 | // in the results. |
150 | $idsToUpdate = $dbr->newSelectQueryBuilder() |
151 | ->select( 'cat_id' ) |
152 | ->from( 'category' ) |
153 | ->where( [ 'cat_id > ' . (int)$this->minimumId, "cat_{$mode} != ($countingSubquery)" ] ) |
154 | ->limit( $this->getBatchSize() ) |
155 | ->caller( __METHOD__ )->fetchFieldValues(); |
156 | if ( !$idsToUpdate ) { |
157 | return false; |
158 | } |
159 | $this->output( "Updating cat_{$mode} field on " . |
160 | count( $idsToUpdate ) . " rows...\n" ); |
161 | |
162 | // In the next batch, start where this query left off. The rows selected |
163 | // in this iteration shouldn't be selected again after being updated, but |
164 | // we still keep track of where we are up to, as extra protection against |
165 | // infinite loops. |
166 | $this->minimumId = end( $idsToUpdate ); |
167 | |
168 | // Now, on master, find the correct counts for these categories. |
169 | $dbw = $this->getPrimaryDB(); |
170 | $res = $dbw->newSelectQueryBuilder() |
171 | ->select( [ 'cat_id', 'count' => "($countingSubquery)" ] ) |
172 | ->from( 'category' ) |
173 | ->where( [ 'cat_id' => $idsToUpdate ] ) |
174 | ->caller( __METHOD__ )->fetchResultSet(); |
175 | |
176 | // Update the category counts on the rows we just identified. |
177 | // This logic is equivalent to Category::refreshCounts, except here, we |
178 | // don't remove rows when cat_pages is zero and the category description page |
179 | // doesn't exist - instead we print a suggestion to run |
180 | // cleanupEmptyCategories.php. |
181 | $affectedRows = 0; |
182 | foreach ( $res as $row ) { |
183 | $dbw->newUpdateQueryBuilder() |
184 | ->update( 'category' ) |
185 | ->set( [ "cat_{$mode}" => $row->count ] ) |
186 | ->where( [ |
187 | 'cat_id' => $row->cat_id, |
188 | $dbw->expr( "cat_{$mode}", '!=', (int)$row->count ), |
189 | ] ) |
190 | ->caller( __METHOD__ ) |
191 | ->execute(); |
192 | $affectedRows += $dbw->affectedRows(); |
193 | } |
194 | |
195 | $this->waitForReplication(); |
196 | |
197 | return $affectedRows; |
198 | } |
199 | } |
200 | |
201 | $maintClass = RecountCategories::class; |
202 | require_once RUN_MAINTENANCE_IF_MAIN; |