MediaWiki master
CategoryMembershipChangeJob.php
Go to the documentation of this file.
1<?php
8
22use Wikimedia\Timestamp\TimestampFormat as TS;
23
42 private $ticket;
43
44 private RecentChangeFactory $recentChangeFactory;
45
46 private const ENQUEUE_FUDGE_SEC = 60;
47
54 public static function newSpec( PageIdentity $page, $revisionTimestamp, bool $forImport ) {
55 return new JobSpecification(
56 'categoryMembershipChange',
57 [
58 'pageId' => $page->getId(),
59 'revTimestamp' => $revisionTimestamp,
60 'forImport' => $forImport,
61 ],
62 [
63 'removeDuplicates' => true,
64 'removeDuplicatesIgnoreParams' => [ 'revTimestamp' ]
65 ],
66 $page
67 );
68 }
69
77 public function __construct(
78 PageIdentity $page,
79 array $params,
80 RecentChangeFactory $recentChangeFactory
81 ) {
82 parent::__construct( 'categoryMembershipChange', $page, $params );
83 $this->recentChangeFactory = $recentChangeFactory;
84 // Only need one job per page. Note that ENQUEUE_FUDGE_SEC handles races where an
85 // older revision job gets inserted while the newer revision job is de-duplicated.
86 $this->removeDuplicates = true;
87 }
88
90 public function run() {
92 $lbFactory = $services->getDBLoadBalancerFactory();
93 $lb = $lbFactory->getMainLB();
94 $dbw = $lb->getConnection( DB_PRIMARY );
95
96 $this->ticket = $lbFactory->getEmptyTransactionTicket( __METHOD__ );
97
98 $page = $services->getWikiPageFactory()->newFromID( $this->params['pageId'], IDBAccessObject::READ_LATEST );
99 if ( !$page ) {
100 $this->setLastError( "Could not find page #{$this->params['pageId']}" );
101 return false; // deleted?
102 }
103
104 // Cut down on the time spent in waitForPrimaryPos() in the critical section
105 $dbr = $lb->getConnection( DB_REPLICA );
106 if ( !$lb->waitForPrimaryPos( $dbr ) ) {
107 $this->setLastError( "Timed out while pre-waiting for replica DB to catch up" );
108 return false;
109 }
110
111 // Use a named lock so that jobs for this page see each others' changes
112 $lockKey = "{$dbw->getDomainID()}:CategoryMembershipChange:{$page->getId()}"; // per-wiki
113 $scopedLock = $dbw->getScopedLockAndFlush( $lockKey, __METHOD__, 1 );
114 if ( !$scopedLock ) {
115 $this->setLastError( "Could not acquire lock '$lockKey'" );
116 return false;
117 }
118
119 // Wait till replica DB is caught up so that jobs for this page see each others' changes
120 if ( !$lb->waitForPrimaryPos( $dbr ) ) {
121 $this->setLastError( "Timed out while waiting for replica DB to catch up" );
122 return false;
123 }
124 // Clear any stale REPEATABLE-READ snapshot
125 $dbr->flushSnapshot( __METHOD__ );
126
127 $cutoffUnix = wfTimestamp( TS::UNIX, $this->params['revTimestamp'] );
128 // Using ENQUEUE_FUDGE_SEC handles jobs inserted out of revision order due to the delay
129 // between COMMIT and actual enqueueing of the CategoryMembershipChangeJob job.
130 $cutoffUnix -= self::ENQUEUE_FUDGE_SEC;
131
132 // Get the newest page revision that has a SRC_CATEGORIZE row.
133 // Assume that category changes before it were already handled.
134 $subQuery = $dbr->newSelectQueryBuilder()
135 ->select( '1' )
136 ->from( 'recentchanges' )
137 ->where( 'rc_this_oldid = rev_id' )
138 ->andWhere( [ 'rc_source' => RecentChange::SRC_CATEGORIZE ] );
139 $row = $dbr->newSelectQueryBuilder()
140 ->select( [ 'rev_timestamp', 'rev_id' ] )
141 ->from( 'revision' )
142 ->where( [ 'rev_page' => $page->getId() ] )
143 ->andWhere( $dbr->expr( 'rev_timestamp', '>=', $dbr->timestamp( $cutoffUnix ) ) )
144 ->andWhere( new RawSQLExpression( 'EXISTS (' . $subQuery->getSQL() . ')' ) )
145 ->orderBy( [ 'rev_timestamp', 'rev_id' ], SelectQueryBuilder::SORT_DESC )
146 ->caller( __METHOD__ )->fetchRow();
147
148 // Only consider revisions newer than any such revision
149 if ( $row ) {
150 $cutoffUnix = wfTimestamp( TS::UNIX, $row->rev_timestamp );
151 $lastRevId = (int)$row->rev_id;
152 } else {
153 $lastRevId = 0;
154 }
155
156 // Find revisions to this page made around and after this revision which lack category
157 // notifications in recent changes. This lets jobs pick up were the last one left off.
158 $revisionStore = $services->getRevisionStore();
159 $res = $revisionStore->newSelectQueryBuilder( $dbr )
160 ->joinComment()
161 ->where( [
162 'rev_page' => $page->getId(),
163 $dbr->buildComparison( '>', [
164 'rev_timestamp' => $dbr->timestamp( $cutoffUnix ),
165 'rev_id' => $lastRevId,
166 ] )
167 ] )
168 ->orderBy( [ 'rev_timestamp', 'rev_id' ], SelectQueryBuilder::SORT_ASC )
169 ->caller( __METHOD__ )->fetchResultSet();
170
171 // Apply all category updates in revision timestamp order
172 foreach ( $res as $row ) {
173 $this->notifyUpdatesForRevision( $lbFactory, $page, $revisionStore->newRevisionFromRow( $row ) );
174 }
175
176 return true;
177 }
178
184 protected function notifyUpdatesForRevision(
185 LBFactory $lbFactory, WikiPage $page, RevisionRecord $newRev
186 ) {
187 $title = $page->getTitle();
188
189 // Get the new revision
190 if ( $newRev->isDeleted( RevisionRecord::DELETED_TEXT ) ) {
191 return;
192 }
193
194 $services = MediaWikiServices::getInstance();
195 // Get the prior revision (the same for null edits)
196 if ( $newRev->getParentId() ) {
197 $oldRev = $services->getRevisionLookup()
198 ->getRevisionById( $newRev->getParentId(), IDBAccessObject::READ_LATEST );
199 if ( !$oldRev || $oldRev->isDeleted( RevisionRecord::DELETED_TEXT ) ) {
200 return;
201 }
202 } else {
203 $oldRev = null;
204 }
205
206 // Parse the new revision and get the categories
207 $categoryChanges = $this->getExplicitCategoriesChanges( $page, $newRev, $oldRev );
208 [ $categoryInserts, $categoryDeletes ] = $categoryChanges;
209 if ( !$categoryInserts && !$categoryDeletes ) {
210 return; // nothing to do
211 }
212
213 $blc = $services->getBacklinkCacheFactory()->getBacklinkCache( $title );
214 $catMembChange = new CategoryMembershipChange(
215 $title,
216 $blc,
217 $newRev,
218 $this->recentChangeFactory,
219 $this->params['forImport'] ?? false
220 );
221 $catMembChange->checkTemplateLinks();
222
223 $batchSize = $services->getMainConfig()->get( MainConfigNames::UpdateRowsPerQuery );
224 $insertCount = 0;
225
226 foreach ( $categoryInserts as $categoryName ) {
227 $categoryTitle = Title::makeTitle( NS_CATEGORY, $categoryName );
228 $catMembChange->triggerCategoryAddedNotification( $categoryTitle );
229 if ( $insertCount++ && ( $insertCount % $batchSize ) == 0 ) {
230 $lbFactory->commitAndWaitForReplication( __METHOD__, $this->ticket );
231 }
232 }
233
234 foreach ( $categoryDeletes as $categoryName ) {
235 $categoryTitle = Title::makeTitle( NS_CATEGORY, $categoryName );
236 $catMembChange->triggerCategoryRemovedNotification( $categoryTitle );
237 if ( $insertCount++ && ( $insertCount++ % $batchSize ) == 0 ) {
238 $lbFactory->commitAndWaitForReplication( __METHOD__, $this->ticket );
239 }
240 }
241 }
242
243 private function getExplicitCategoriesChanges(
244 WikiPage $page, RevisionRecord $newRev, ?RevisionRecord $oldRev = null
245 ): array {
246 // Inject the same timestamp for both revision parses to avoid seeing category changes
247 // due to time-based parser functions. Inject the same page title for the parses too.
248 // Note that REPEATABLE-READ makes template/file pages appear unchanged between parses.
249 $parseTimestamp = $newRev->getTimestamp();
250 // Parse the old rev and get the categories. Do not use link tables as that
251 // assumes these updates are perfectly FIFO and that link tables are always
252 // up to date, neither of which are true.
253 $oldCategories = $oldRev
254 ? $this->getCategoriesAtRev( $page, $oldRev, $parseTimestamp )
255 : [];
256 // Parse the new revision and get the categories
257 $newCategories = $this->getCategoriesAtRev( $page, $newRev, $parseTimestamp );
258
259 $categoryInserts = array_values( array_diff( $newCategories, $oldCategories ) );
260 $categoryDeletes = array_values( array_diff( $oldCategories, $newCategories ) );
261
262 return [ $categoryInserts, $categoryDeletes ];
263 }
264
272 private function getCategoriesAtRev( WikiPage $page, RevisionRecord $rev, $parseTimestamp ) {
273 $services = MediaWikiServices::getInstance();
274 $options = $page->makeParserOptions( 'canonical' );
275 $options->setTimestamp( $parseTimestamp );
276 $options->setRenderReason( 'CategoryMembershipChangeJob' );
277
278 $output = $rev instanceof RevisionStoreRecord && $rev->isCurrent()
279 ? $services->getParserCache()->get( $page, $options )
280 : null;
281
282 if ( !$output || $output->getCacheRevisionId() !== $rev->getId() ) {
283 $output = $services->getRevisionRenderer()->getRenderedRevision( $rev, $options )
284 ->getRevisionParserOutput();
285 }
286
287 // array keys will cast numeric category names to ints;
288 // ::getCategoryNames() is careful to cast them back to strings
289 // to avoid breaking things!
290 return $output->getCategoryNames();
291 }
292
294 public function getDeduplicationInfo() {
295 $info = parent::getDeduplicationInfo();
296 unset( $info['params']['revTimestamp'] ); // first job wins
297
298 return $info;
299 }
300}
const NS_CATEGORY
Definition Defines.php:65
wfTimestamp( $outputtype=TS::UNIX, $ts=0)
Get a timestamp string in one of various formats.
const DB_REPLICA
Definition defines.php:26
const DB_PRIMARY
Definition defines.php:28
makeTitle( $linkId)
Convert a link ID to a Title.to override Title
Job queue task description base code.
Describe and execute a background job.
Definition Job.php:28
array $params
Array of job parameters.
Definition Job.php:33
setLastError( $error)
Definition Job.php:424
A class containing constants representing the names of configuration variables.
const UpdateRowsPerQuery
Name constant for the UpdateRowsPerQuery setting, for use with Config::get()
Service locator for MediaWiki core services.
static getInstance()
Returns the global default instance of the top level service locator.
Base representation for an editable wiki page.
Definition WikiPage.php:83
getTitle()
Get the title object of the article.
Definition WikiPage.php:251
Job to add recent change entries mentioning category membership changes.
run()
Run the job.If this method returns false or completes exceptionally, the job runner will retry execut...
getDeduplicationInfo()
Subclasses may need to override this to make duplication detection work.The resulting map conveys eve...
__construct(PageIdentity $page, array $params, RecentChangeFactory $recentChangeFactory)
Constructor for use by the Job Queue infrastructure.
static newSpec(PageIdentity $page, $revisionTimestamp, bool $forImport)
notifyUpdatesForRevision(LBFactory $lbFactory, WikiPage $page, RevisionRecord $newRev)
Helper class for category membership changes.
Page revision base class.
getParentId( $wikiId=self::LOCAL)
Get parent revision ID (the original previous page revision).
isDeleted( $field)
MCR migration note: this replaced Revision::isDeleted.
A RevisionRecord representing an existing revision persisted in the revision table.
Represents a title within MediaWiki.
Definition Title.php:69
commitAndWaitForReplication( $fname, $ticket, array $opts=[])
Commit primary DB transactions and wait for replication (if $ticket indicates it is safe)....
Raw SQL expression to be used in query builders.
Build SELECT queries with a fluent interface.
Interface for objects (potentially) representing an editable wiki page.
getId( $wikiId=self::LOCAL)
Returns the page ID.
Interface for database access objects.