Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
90.16% |
110 / 122 |
|
57.14% |
4 / 7 |
CRAP | |
0.00% |
0 / 1 |
CategoryMembershipChangeJob | |
90.16% |
110 / 122 |
|
57.14% |
4 / 7 |
30.86 | |
0.00% |
0 / 1 |
newSpec | |
100.00% |
13 / 13 |
|
100.00% |
1 / 1 |
1 | |||
__construct | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
run | |
85.71% |
48 / 56 |
|
0.00% |
0 / 1 |
7.14 | |||
notifyUpdatesForRevision | |
89.66% |
26 / 29 |
|
0.00% |
0 / 1 |
13.19 | |||
getExplicitCategoriesChanges | |
100.00% |
8 / 8 |
|
100.00% |
1 / 1 |
2 | |||
getCategoriesAtRev | |
90.91% |
10 / 11 |
|
0.00% |
0 / 1 |
5.02 | |||
getDeduplicationInfo | |
100.00% |
3 / 3 |
|
100.00% |
1 / 1 |
1 |
1 | <?php |
2 | /** |
3 | * This program is free software; you can redistribute it and/or modify |
4 | * it under the terms of the GNU General Public License as published by |
5 | * the Free Software Foundation; either version 2 of the License, or |
6 | * (at your option) any later version. |
7 | * |
8 | * This program is distributed in the hope that it will be useful, |
9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
11 | * GNU General Public License for more details. |
12 | * |
13 | * You should have received a copy of the GNU General Public License along |
14 | * with this program; if not, write to the Free Software Foundation, Inc., |
15 | * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. |
16 | * http://www.gnu.org/copyleft/gpl.html |
17 | * |
18 | * @file |
19 | */ |
20 | |
21 | use MediaWiki\MainConfigNames; |
22 | use MediaWiki\MediaWikiServices; |
23 | use MediaWiki\Page\PageIdentity; |
24 | use MediaWiki\Revision\RevisionRecord; |
25 | use MediaWiki\Revision\RevisionStoreRecord; |
26 | use MediaWiki\Title\Title; |
27 | use Wikimedia\Rdbms\IDBAccessObject; |
28 | use Wikimedia\Rdbms\LBFactory; |
29 | use Wikimedia\Rdbms\RawSQLExpression; |
30 | use Wikimedia\Rdbms\SelectQueryBuilder; |
31 | |
32 | /** |
33 | * Job to add recent change entries mentioning category membership changes |
34 | * |
35 | * This allows users to easily scan categories for recent page membership changes |
36 | * |
37 | * Parameters include: |
38 | * - pageId : page ID |
39 | * - revTimestamp : timestamp of the triggering revision |
40 | * |
41 | * Category changes will be mentioned for revisions at/after the timestamp for this page |
42 | * |
43 | * @since 1.27 |
44 | * @ingroup JobQueue |
45 | */ |
46 | class CategoryMembershipChangeJob extends Job { |
47 | /** @var int|null */ |
48 | private $ticket; |
49 | |
50 | private const ENQUEUE_FUDGE_SEC = 60; |
51 | |
52 | /** |
53 | * @param PageIdentity $page the page for which to update category membership. |
54 | * @param string $revisionTimestamp The timestamp of the new revision that triggered the job. |
55 | * @param bool $forImport Whether the new revision that triggered the import was imported |
56 | * @return JobSpecification |
57 | */ |
58 | public static function newSpec( PageIdentity $page, $revisionTimestamp, bool $forImport ) { |
59 | return new JobSpecification( |
60 | 'categoryMembershipChange', |
61 | [ |
62 | 'pageId' => $page->getId(), |
63 | 'revTimestamp' => $revisionTimestamp, |
64 | 'forImport' => $forImport, |
65 | ], |
66 | [ |
67 | 'removeDuplicates' => true, |
68 | 'removeDuplicatesIgnoreParams' => [ 'revTimestamp' ] |
69 | ], |
70 | $page |
71 | ); |
72 | } |
73 | |
74 | /** |
75 | * Constructor for use by the Job Queue infrastructure. |
76 | * @note Don't call this when queueing a new instance, use newSpec() instead. |
77 | * @param PageIdentity $page the categorized page. |
78 | * @param array $params Such latest revision instance of the categorized page. |
79 | */ |
80 | public function __construct( PageIdentity $page, array $params ) { |
81 | parent::__construct( 'categoryMembershipChange', $page, $params ); |
82 | // Only need one job per page. Note that ENQUEUE_FUDGE_SEC handles races where an |
83 | // older revision job gets inserted while the newer revision job is de-duplicated. |
84 | $this->removeDuplicates = true; |
85 | } |
86 | |
87 | public function run() { |
88 | $services = MediaWikiServices::getInstance(); |
89 | $lbFactory = $services->getDBLoadBalancerFactory(); |
90 | $lb = $lbFactory->getMainLB(); |
91 | $dbw = $lb->getConnection( DB_PRIMARY ); |
92 | |
93 | $this->ticket = $lbFactory->getEmptyTransactionTicket( __METHOD__ ); |
94 | |
95 | $page = $services->getWikiPageFactory()->newFromID( $this->params['pageId'], IDBAccessObject::READ_LATEST ); |
96 | if ( !$page ) { |
97 | $this->setLastError( "Could not find page #{$this->params['pageId']}" ); |
98 | return false; // deleted? |
99 | } |
100 | |
101 | // Cut down on the time spent in waitForPrimaryPos() in the critical section |
102 | $dbr = $lb->getConnection( DB_REPLICA ); |
103 | if ( !$lb->waitForPrimaryPos( $dbr ) ) { |
104 | $this->setLastError( "Timed out while pre-waiting for replica DB to catch up" ); |
105 | return false; |
106 | } |
107 | |
108 | // Use a named lock so that jobs for this page see each others' changes |
109 | $lockKey = "{$dbw->getDomainID()}:CategoryMembershipChange:{$page->getId()}"; // per-wiki |
110 | $scopedLock = $dbw->getScopedLockAndFlush( $lockKey, __METHOD__, 3 ); |
111 | if ( !$scopedLock ) { |
112 | $this->setLastError( "Could not acquire lock '$lockKey'" ); |
113 | return false; |
114 | } |
115 | |
116 | // Wait till replica DB is caught up so that jobs for this page see each others' changes |
117 | if ( !$lb->waitForPrimaryPos( $dbr ) ) { |
118 | $this->setLastError( "Timed out while waiting for replica DB to catch up" ); |
119 | return false; |
120 | } |
121 | // Clear any stale REPEATABLE-READ snapshot |
122 | $dbr->flushSnapshot( __METHOD__ ); |
123 | |
124 | $cutoffUnix = wfTimestamp( TS_UNIX, $this->params['revTimestamp'] ); |
125 | // Using ENQUEUE_FUDGE_SEC handles jobs inserted out of revision order due to the delay |
126 | // between COMMIT and actual enqueueing of the CategoryMembershipChangeJob job. |
127 | $cutoffUnix -= self::ENQUEUE_FUDGE_SEC; |
128 | |
129 | // Get the newest page revision that has a SRC_CATEGORIZE row. |
130 | // Assume that category changes before it were already handled. |
131 | $subQuery = $dbr->newSelectQueryBuilder() |
132 | ->select( '1' ) |
133 | ->from( 'recentchanges' ) |
134 | ->where( 'rc_this_oldid = rev_id' ) |
135 | ->andWhere( [ 'rc_source' => RecentChange::SRC_CATEGORIZE ] ); |
136 | $row = $dbr->newSelectQueryBuilder() |
137 | ->select( [ 'rev_timestamp', 'rev_id' ] ) |
138 | ->from( 'revision' ) |
139 | ->where( [ 'rev_page' => $page->getId() ] ) |
140 | ->andWhere( $dbr->expr( 'rev_timestamp', '>=', $dbr->timestamp( $cutoffUnix ) ) ) |
141 | ->andWhere( new RawSQLExpression( 'EXISTS (' . $subQuery->getSQL() . ')' ) ) |
142 | ->orderBy( [ 'rev_timestamp', 'rev_id' ], SelectQueryBuilder::SORT_DESC ) |
143 | ->caller( __METHOD__ )->fetchRow(); |
144 | |
145 | // Only consider revisions newer than any such revision |
146 | if ( $row ) { |
147 | $cutoffUnix = wfTimestamp( TS_UNIX, $row->rev_timestamp ); |
148 | $lastRevId = (int)$row->rev_id; |
149 | } else { |
150 | $lastRevId = 0; |
151 | } |
152 | |
153 | // Find revisions to this page made around and after this revision which lack category |
154 | // notifications in recent changes. This lets jobs pick up were the last one left off. |
155 | $revisionStore = $services->getRevisionStore(); |
156 | $res = $revisionStore->newSelectQueryBuilder( $dbr ) |
157 | ->joinComment() |
158 | ->where( [ |
159 | 'rev_page' => $page->getId(), |
160 | $dbr->buildComparison( '>', [ |
161 | 'rev_timestamp' => $dbr->timestamp( $cutoffUnix ), |
162 | 'rev_id' => $lastRevId, |
163 | ] ) |
164 | ] ) |
165 | ->orderBy( [ 'rev_timestamp', 'rev_id' ], SelectQueryBuilder::SORT_ASC ) |
166 | ->caller( __METHOD__ )->fetchResultSet(); |
167 | |
168 | // Apply all category updates in revision timestamp order |
169 | foreach ( $res as $row ) { |
170 | $this->notifyUpdatesForRevision( $lbFactory, $page, $revisionStore->newRevisionFromRow( $row ) ); |
171 | } |
172 | |
173 | return true; |
174 | } |
175 | |
176 | /** |
177 | * @param LBFactory $lbFactory |
178 | * @param WikiPage $page |
179 | * @param RevisionRecord $newRev |
180 | */ |
181 | protected function notifyUpdatesForRevision( |
182 | LBFactory $lbFactory, WikiPage $page, RevisionRecord $newRev |
183 | ) { |
184 | $title = $page->getTitle(); |
185 | |
186 | // Get the new revision |
187 | if ( $newRev->isDeleted( RevisionRecord::DELETED_TEXT ) ) { |
188 | return; |
189 | } |
190 | |
191 | $services = MediaWikiServices::getInstance(); |
192 | // Get the prior revision (the same for null edits) |
193 | if ( $newRev->getParentId() ) { |
194 | $oldRev = $services->getRevisionLookup() |
195 | ->getRevisionById( $newRev->getParentId(), IDBAccessObject::READ_LATEST ); |
196 | if ( !$oldRev || $oldRev->isDeleted( RevisionRecord::DELETED_TEXT ) ) { |
197 | return; |
198 | } |
199 | } else { |
200 | $oldRev = null; |
201 | } |
202 | |
203 | // Parse the new revision and get the categories |
204 | $categoryChanges = $this->getExplicitCategoriesChanges( $page, $newRev, $oldRev ); |
205 | [ $categoryInserts, $categoryDeletes ] = $categoryChanges; |
206 | if ( !$categoryInserts && !$categoryDeletes ) { |
207 | return; // nothing to do |
208 | } |
209 | |
210 | $blc = $services->getBacklinkCacheFactory()->getBacklinkCache( $title ); |
211 | $catMembChange = new CategoryMembershipChange( $title, $blc, $newRev, $this->params['forImport'] ?? false ); |
212 | $catMembChange->checkTemplateLinks(); |
213 | |
214 | $batchSize = $services->getMainConfig()->get( MainConfigNames::UpdateRowsPerQuery ); |
215 | $insertCount = 0; |
216 | |
217 | foreach ( $categoryInserts as $categoryName ) { |
218 | $categoryTitle = Title::makeTitle( NS_CATEGORY, $categoryName ); |
219 | $catMembChange->triggerCategoryAddedNotification( $categoryTitle ); |
220 | if ( $insertCount++ && ( $insertCount % $batchSize ) == 0 ) { |
221 | $lbFactory->commitAndWaitForReplication( __METHOD__, $this->ticket ); |
222 | } |
223 | } |
224 | |
225 | foreach ( $categoryDeletes as $categoryName ) { |
226 | $categoryTitle = Title::makeTitle( NS_CATEGORY, $categoryName ); |
227 | $catMembChange->triggerCategoryRemovedNotification( $categoryTitle ); |
228 | if ( $insertCount++ && ( $insertCount++ % $batchSize ) == 0 ) { |
229 | $lbFactory->commitAndWaitForReplication( __METHOD__, $this->ticket ); |
230 | } |
231 | } |
232 | } |
233 | |
234 | private function getExplicitCategoriesChanges( |
235 | WikiPage $page, RevisionRecord $newRev, RevisionRecord $oldRev = null |
236 | ) { |
237 | // Inject the same timestamp for both revision parses to avoid seeing category changes |
238 | // due to time-based parser functions. Inject the same page title for the parses too. |
239 | // Note that REPEATABLE-READ makes template/file pages appear unchanged between parses. |
240 | $parseTimestamp = $newRev->getTimestamp(); |
241 | // Parse the old rev and get the categories. Do not use link tables as that |
242 | // assumes these updates are perfectly FIFO and that link tables are always |
243 | // up to date, neither of which are true. |
244 | $oldCategories = $oldRev |
245 | ? $this->getCategoriesAtRev( $page, $oldRev, $parseTimestamp ) |
246 | : []; |
247 | // Parse the new revision and get the categories |
248 | $newCategories = $this->getCategoriesAtRev( $page, $newRev, $parseTimestamp ); |
249 | |
250 | $categoryInserts = array_values( array_diff( $newCategories, $oldCategories ) ); |
251 | $categoryDeletes = array_values( array_diff( $oldCategories, $newCategories ) ); |
252 | |
253 | return [ $categoryInserts, $categoryDeletes ]; |
254 | } |
255 | |
256 | /** |
257 | * @param WikiPage $page |
258 | * @param RevisionRecord $rev |
259 | * @param string $parseTimestamp TS_MW |
260 | * |
261 | * @return string[] category names |
262 | */ |
263 | private function getCategoriesAtRev( WikiPage $page, RevisionRecord $rev, $parseTimestamp ) { |
264 | $services = MediaWikiServices::getInstance(); |
265 | $options = $page->makeParserOptions( 'canonical' ); |
266 | $options->setTimestamp( $parseTimestamp ); |
267 | $options->setRenderReason( 'CategoryMembershipChangeJob' ); |
268 | |
269 | $output = $rev instanceof RevisionStoreRecord && $rev->isCurrent() |
270 | ? $services->getParserCache()->get( $page, $options ) |
271 | : null; |
272 | |
273 | if ( !$output || $output->getCacheRevisionId() !== $rev->getId() ) { |
274 | $output = $services->getRevisionRenderer()->getRenderedRevision( $rev, $options ) |
275 | ->getRevisionParserOutput(); |
276 | } |
277 | |
278 | // array keys will cast numeric category names to ints; |
279 | // ::getCategoryNames() is careful to cast them back to strings |
280 | // to avoid breaking things! |
281 | return $output->getCategoryNames(); |
282 | } |
283 | |
284 | public function getDeduplicationInfo() { |
285 | $info = parent::getDeduplicationInfo(); |
286 | unset( $info['params']['revTimestamp'] ); // first job wins |
287 | |
288 | return $info; |
289 | } |
290 | } |