Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
90.08% |
109 / 121 |
|
57.14% |
4 / 7 |
CRAP | |
0.00% |
0 / 1 |
CategoryMembershipChangeJob | |
90.08% |
109 / 121 |
|
57.14% |
4 / 7 |
30.88 | |
0.00% |
0 / 1 |
newSpec | |
100.00% |
12 / 12 |
|
100.00% |
1 / 1 |
1 | |||
__construct | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
run | |
85.71% |
48 / 56 |
|
0.00% |
0 / 1 |
7.14 | |||
notifyUpdatesForRevision | |
89.66% |
26 / 29 |
|
0.00% |
0 / 1 |
13.19 | |||
getExplicitCategoriesChanges | |
100.00% |
8 / 8 |
|
100.00% |
1 / 1 |
2 | |||
getCategoriesAtRev | |
90.91% |
10 / 11 |
|
0.00% |
0 / 1 |
5.02 | |||
getDeduplicationInfo | |
100.00% |
3 / 3 |
|
100.00% |
1 / 1 |
1 |
1 | <?php |
2 | /** |
3 | * This program is free software; you can redistribute it and/or modify |
4 | * it under the terms of the GNU General Public License as published by |
5 | * the Free Software Foundation; either version 2 of the License, or |
6 | * (at your option) any later version. |
7 | * |
8 | * This program is distributed in the hope that it will be useful, |
9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
11 | * GNU General Public License for more details. |
12 | * |
13 | * You should have received a copy of the GNU General Public License along |
14 | * with this program; if not, write to the Free Software Foundation, Inc., |
15 | * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. |
16 | * http://www.gnu.org/copyleft/gpl.html |
17 | * |
18 | * @file |
19 | */ |
20 | |
21 | use MediaWiki\MainConfigNames; |
22 | use MediaWiki\MediaWikiServices; |
23 | use MediaWiki\Page\PageIdentity; |
24 | use MediaWiki\Revision\RevisionRecord; |
25 | use MediaWiki\Revision\RevisionStoreRecord; |
26 | use MediaWiki\Title\Title; |
27 | use Wikimedia\Rdbms\LBFactory; |
28 | use Wikimedia\Rdbms\SelectQueryBuilder; |
29 | |
30 | /** |
31 | * Job to add recent change entries mentioning category membership changes |
32 | * |
33 | * This allows users to easily scan categories for recent page membership changes |
34 | * |
35 | * Parameters include: |
36 | * - pageId : page ID |
37 | * - revTimestamp : timestamp of the triggering revision |
38 | * |
39 | * Category changes will be mentioned for revisions at/after the timestamp for this page |
40 | * |
41 | * @since 1.27 |
42 | * @ingroup JobQueue |
43 | */ |
44 | class CategoryMembershipChangeJob extends Job { |
45 | /** @var int|null */ |
46 | private $ticket; |
47 | |
48 | private const ENQUEUE_FUDGE_SEC = 60; |
49 | |
50 | /** |
51 | * @param PageIdentity $page the page for which to update category membership. |
52 | * @param string $revisionTimestamp The timestamp of the new revision that triggered the job. |
53 | * @return JobSpecification |
54 | */ |
55 | public static function newSpec( PageIdentity $page, $revisionTimestamp ) { |
56 | return new JobSpecification( |
57 | 'categoryMembershipChange', |
58 | [ |
59 | 'pageId' => $page->getId(), |
60 | 'revTimestamp' => $revisionTimestamp, |
61 | ], |
62 | [ |
63 | 'removeDuplicates' => true, |
64 | 'removeDuplicatesIgnoreParams' => [ 'revTimestamp' ] |
65 | ], |
66 | $page |
67 | ); |
68 | } |
69 | |
70 | /** |
71 | * Constructor for use by the Job Queue infrastructure. |
72 | * @note Don't call this when queueing a new instance, use newSpec() instead. |
73 | * @param PageIdentity $page the categorized page. |
74 | * @param array $params Such latest revision instance of the categorized page. |
75 | */ |
76 | public function __construct( PageIdentity $page, array $params ) { |
77 | parent::__construct( 'categoryMembershipChange', $page, $params ); |
78 | // Only need one job per page. Note that ENQUEUE_FUDGE_SEC handles races where an |
79 | // older revision job gets inserted while the newer revision job is de-duplicated. |
80 | $this->removeDuplicates = true; |
81 | } |
82 | |
83 | public function run() { |
84 | $services = MediaWikiServices::getInstance(); |
85 | $lbFactory = $services->getDBLoadBalancerFactory(); |
86 | $lb = $lbFactory->getMainLB(); |
87 | $dbw = $lb->getConnectionRef( DB_PRIMARY ); |
88 | |
89 | $this->ticket = $lbFactory->getEmptyTransactionTicket( __METHOD__ ); |
90 | |
91 | $page = $services->getWikiPageFactory()->newFromID( $this->params['pageId'], IDBAccessObject::READ_LATEST ); |
92 | if ( !$page ) { |
93 | $this->setLastError( "Could not find page #{$this->params['pageId']}" ); |
94 | return false; // deleted? |
95 | } |
96 | |
97 | // Cut down on the time spent in waitForPrimaryPos() in the critical section |
98 | $dbr = $lb->getConnectionRef( DB_REPLICA ); |
99 | if ( !$lb->waitForPrimaryPos( $dbr ) ) { |
100 | $this->setLastError( "Timed out while pre-waiting for replica DB to catch up" ); |
101 | return false; |
102 | } |
103 | |
104 | // Use a named lock so that jobs for this page see each others' changes |
105 | $lockKey = "{$dbw->getDomainID()}:CategoryMembershipChange:{$page->getId()}"; // per-wiki |
106 | $scopedLock = $dbw->getScopedLockAndFlush( $lockKey, __METHOD__, 3 ); |
107 | if ( !$scopedLock ) { |
108 | $this->setLastError( "Could not acquire lock '$lockKey'" ); |
109 | return false; |
110 | } |
111 | |
112 | // Wait till replica DB is caught up so that jobs for this page see each others' changes |
113 | if ( !$lb->waitForPrimaryPos( $dbr ) ) { |
114 | $this->setLastError( "Timed out while waiting for replica DB to catch up" ); |
115 | return false; |
116 | } |
117 | // Clear any stale REPEATABLE-READ snapshot |
118 | $dbr->flushSnapshot( __METHOD__ ); |
119 | |
120 | $cutoffUnix = wfTimestamp( TS_UNIX, $this->params['revTimestamp'] ); |
121 | // Using ENQUEUE_FUDGE_SEC handles jobs inserted out of revision order due to the delay |
122 | // between COMMIT and actual enqueueing of the CategoryMembershipChangeJob job. |
123 | $cutoffUnix -= self::ENQUEUE_FUDGE_SEC; |
124 | |
125 | // Get the newest page revision that has a SRC_CATEGORIZE row. |
126 | // Assume that category changes before it were already handled. |
127 | $subQuery = $dbr->newSelectQueryBuilder() |
128 | ->select( '1' ) |
129 | ->from( 'recentchanges' ) |
130 | ->where( 'rc_this_oldid = rev_id' ) |
131 | ->andWhere( [ 'rc_source' => RecentChange::SRC_CATEGORIZE ] ); |
132 | $row = $dbr->newSelectQueryBuilder() |
133 | ->select( [ 'rev_timestamp', 'rev_id' ] ) |
134 | ->from( 'revision' ) |
135 | ->where( [ 'rev_page' => $page->getId() ] ) |
136 | ->andWhere( $dbr->expr( 'rev_timestamp', '>=', $dbr->timestamp( $cutoffUnix ) ) ) |
137 | ->andWhere( 'EXISTS (' . $subQuery->caller( __METHOD__ )->getSQL() . ')' ) |
138 | ->orderBy( [ 'rev_timestamp', 'rev_id' ], SelectQueryBuilder::SORT_DESC ) |
139 | ->caller( __METHOD__ )->fetchRow(); |
140 | |
141 | // Only consider revisions newer than any such revision |
142 | if ( $row ) { |
143 | $cutoffUnix = wfTimestamp( TS_UNIX, $row->rev_timestamp ); |
144 | $lastRevId = (int)$row->rev_id; |
145 | } else { |
146 | $lastRevId = 0; |
147 | } |
148 | |
149 | // Find revisions to this page made around and after this revision which lack category |
150 | // notifications in recent changes. This lets jobs pick up were the last one left off. |
151 | $revisionStore = $services->getRevisionStore(); |
152 | $res = $revisionStore->newSelectQueryBuilder( $dbr ) |
153 | ->joinComment() |
154 | ->where( [ |
155 | 'rev_page' => $page->getId(), |
156 | $dbr->buildComparison( '>', [ |
157 | 'rev_timestamp' => $dbr->timestamp( $cutoffUnix ), |
158 | 'rev_id' => $lastRevId, |
159 | ] ) |
160 | ] ) |
161 | ->orderBy( [ 'rev_timestamp', 'rev_id' ], SelectQueryBuilder::SORT_ASC ) |
162 | ->caller( __METHOD__ )->fetchResultSet(); |
163 | |
164 | // Apply all category updates in revision timestamp order |
165 | foreach ( $res as $row ) { |
166 | $this->notifyUpdatesForRevision( $lbFactory, $page, $revisionStore->newRevisionFromRow( $row ) ); |
167 | } |
168 | |
169 | return true; |
170 | } |
171 | |
172 | /** |
173 | * @param LBFactory $lbFactory |
174 | * @param WikiPage $page |
175 | * @param RevisionRecord $newRev |
176 | */ |
177 | protected function notifyUpdatesForRevision( |
178 | LBFactory $lbFactory, WikiPage $page, RevisionRecord $newRev |
179 | ) { |
180 | $title = $page->getTitle(); |
181 | |
182 | // Get the new revision |
183 | if ( $newRev->isDeleted( RevisionRecord::DELETED_TEXT ) ) { |
184 | return; |
185 | } |
186 | |
187 | $services = MediaWikiServices::getInstance(); |
188 | // Get the prior revision (the same for null edits) |
189 | if ( $newRev->getParentId() ) { |
190 | $oldRev = $services->getRevisionLookup() |
191 | ->getRevisionById( $newRev->getParentId(), IDBAccessObject::READ_LATEST ); |
192 | if ( !$oldRev || $oldRev->isDeleted( RevisionRecord::DELETED_TEXT ) ) { |
193 | return; |
194 | } |
195 | } else { |
196 | $oldRev = null; |
197 | } |
198 | |
199 | // Parse the new revision and get the categories |
200 | $categoryChanges = $this->getExplicitCategoriesChanges( $page, $newRev, $oldRev ); |
201 | [ $categoryInserts, $categoryDeletes ] = $categoryChanges; |
202 | if ( !$categoryInserts && !$categoryDeletes ) { |
203 | return; // nothing to do |
204 | } |
205 | |
206 | $blc = $services->getBacklinkCacheFactory()->getBacklinkCache( $title ); |
207 | $catMembChange = new CategoryMembershipChange( $title, $blc, $newRev ); |
208 | $catMembChange->checkTemplateLinks(); |
209 | |
210 | $batchSize = $services->getMainConfig()->get( MainConfigNames::UpdateRowsPerQuery ); |
211 | $insertCount = 0; |
212 | |
213 | foreach ( $categoryInserts as $categoryName ) { |
214 | $categoryTitle = Title::makeTitle( NS_CATEGORY, $categoryName ); |
215 | $catMembChange->triggerCategoryAddedNotification( $categoryTitle ); |
216 | if ( $insertCount++ && ( $insertCount % $batchSize ) == 0 ) { |
217 | $lbFactory->commitAndWaitForReplication( __METHOD__, $this->ticket ); |
218 | } |
219 | } |
220 | |
221 | foreach ( $categoryDeletes as $categoryName ) { |
222 | $categoryTitle = Title::makeTitle( NS_CATEGORY, $categoryName ); |
223 | $catMembChange->triggerCategoryRemovedNotification( $categoryTitle ); |
224 | if ( $insertCount++ && ( $insertCount++ % $batchSize ) == 0 ) { |
225 | $lbFactory->commitAndWaitForReplication( __METHOD__, $this->ticket ); |
226 | } |
227 | } |
228 | } |
229 | |
230 | private function getExplicitCategoriesChanges( |
231 | WikiPage $page, RevisionRecord $newRev, RevisionRecord $oldRev = null |
232 | ) { |
233 | // Inject the same timestamp for both revision parses to avoid seeing category changes |
234 | // due to time-based parser functions. Inject the same page title for the parses too. |
235 | // Note that REPEATABLE-READ makes template/file pages appear unchanged between parses. |
236 | $parseTimestamp = $newRev->getTimestamp(); |
237 | // Parse the old rev and get the categories. Do not use link tables as that |
238 | // assumes these updates are perfectly FIFO and that link tables are always |
239 | // up to date, neither of which are true. |
240 | $oldCategories = $oldRev |
241 | ? $this->getCategoriesAtRev( $page, $oldRev, $parseTimestamp ) |
242 | : []; |
243 | // Parse the new revision and get the categories |
244 | $newCategories = $this->getCategoriesAtRev( $page, $newRev, $parseTimestamp ); |
245 | |
246 | $categoryInserts = array_values( array_diff( $newCategories, $oldCategories ) ); |
247 | $categoryDeletes = array_values( array_diff( $oldCategories, $newCategories ) ); |
248 | |
249 | return [ $categoryInserts, $categoryDeletes ]; |
250 | } |
251 | |
252 | /** |
253 | * @param WikiPage $page |
254 | * @param RevisionRecord $rev |
255 | * @param string $parseTimestamp TS_MW |
256 | * |
257 | * @return string[] category names |
258 | */ |
259 | private function getCategoriesAtRev( WikiPage $page, RevisionRecord $rev, $parseTimestamp ) { |
260 | $services = MediaWikiServices::getInstance(); |
261 | $options = $page->makeParserOptions( 'canonical' ); |
262 | $options->setTimestamp( $parseTimestamp ); |
263 | $options->setRenderReason( 'CategoryMembershipChangeJob' ); |
264 | |
265 | $output = $rev instanceof RevisionStoreRecord && $rev->isCurrent() |
266 | ? $services->getParserCache()->get( $page, $options ) |
267 | : null; |
268 | |
269 | if ( !$output || $output->getCacheRevisionId() !== $rev->getId() ) { |
270 | $output = $services->getRevisionRenderer()->getRenderedRevision( $rev, $options ) |
271 | ->getRevisionParserOutput(); |
272 | } |
273 | |
274 | // array keys will cast numeric category names to ints; |
275 | // ::getCategoryNames() is careful to cast them back to strings |
276 | // to avoid breaking things! |
277 | return $output->getCategoryNames(); |
278 | } |
279 | |
280 | public function getDeduplicationInfo() { |
281 | $info = parent::getDeduplicationInfo(); |
282 | unset( $info['params']['revTimestamp'] ); // first job wins |
283 | |
284 | return $info; |
285 | } |
286 | } |