MediaWiki fundraising/REL1_35
CategoryMembershipChangeJob.php
Go to the documentation of this file.
1<?php
27
43 private $ticket;
44
45 private const ENQUEUE_FUDGE_SEC = 60;
46
52 public static function newSpec( Title $title, $revisionTimestamp ) {
53 return new JobSpecification(
54 'categoryMembershipChange',
55 [
56 'pageId' => $title->getArticleID(),
57 'revTimestamp' => $revisionTimestamp,
58 ],
59 [
60 'removeDuplicates' => true,
61 'removeDuplicatesIgnoreParams' => [ 'revTimestamp' ]
62 ],
63 $title
64 );
65 }
66
73 public function __construct( Title $title, array $params ) {
74 parent::__construct( 'categoryMembershipChange', $title, $params );
75 // Only need one job per page. Note that ENQUEUE_FUDGE_SEC handles races where an
76 // older revision job gets inserted while the newer revision job is de-duplicated.
77 $this->removeDuplicates = true;
78 }
79
80 public function run() {
81 $lbFactory = MediaWikiServices::getInstance()->getDBLoadBalancerFactory();
82 $lb = $lbFactory->getMainLB();
83 $dbw = $lb->getConnectionRef( DB_MASTER );
84
85 $this->ticket = $lbFactory->getEmptyTransactionTicket( __METHOD__ );
86
87 $page = WikiPage::newFromID( $this->params['pageId'], WikiPage::READ_LATEST );
88 if ( !$page ) {
89 $this->setLastError( "Could not find page #{$this->params['pageId']}" );
90 return false; // deleted?
91 }
92
93 // Cut down on the time spent in waitForMasterPos() in the critical section
94 $dbr = $lb->getConnectionRef( DB_REPLICA, [ 'recentchanges' ] );
95 if ( !$lb->waitForMasterPos( $dbr ) ) {
96 $this->setLastError( "Timed out while pre-waiting for replica DB to catch up" );
97 return false;
98 }
99
100 // Use a named lock so that jobs for this page see each others' changes
101 $lockKey = "{$dbw->getDomainID()}:CategoryMembershipChange:{$page->getId()}"; // per-wiki
102 $scopedLock = $dbw->getScopedLockAndFlush( $lockKey, __METHOD__, 3 );
103 if ( !$scopedLock ) {
104 $this->setLastError( "Could not acquire lock '$lockKey'" );
105 return false;
106 }
107
108 // Wait till replica DB is caught up so that jobs for this page see each others' changes
109 if ( !$lb->waitForMasterPos( $dbr ) ) {
110 $this->setLastError( "Timed out while waiting for replica DB to catch up" );
111 return false;
112 }
113 // Clear any stale REPEATABLE-READ snapshot
114 $dbr->flushSnapshot( __METHOD__ );
115
116 $cutoffUnix = wfTimestamp( TS_UNIX, $this->params['revTimestamp'] );
117 // Using ENQUEUE_FUDGE_SEC handles jobs inserted out of revision order due to the delay
118 // between COMMIT and actual enqueueing of the CategoryMembershipChangeJob job.
119 $cutoffUnix -= self::ENQUEUE_FUDGE_SEC;
120
121 // Get the newest page revision that has a SRC_CATEGORIZE row.
122 // Assume that category changes before it were already handled.
123 $row = $dbr->selectRow(
124 'revision',
125 [ 'rev_timestamp', 'rev_id' ],
126 [
127 'rev_page' => $page->getId(),
128 'rev_timestamp >= ' . $dbr->addQuotes( $dbr->timestamp( $cutoffUnix ) ),
129 'EXISTS (' . $dbr->selectSQLText(
130 'recentchanges',
131 '1',
132 [
133 'rc_this_oldid = rev_id',
134 'rc_source' => RecentChange::SRC_CATEGORIZE,
135 ],
136 __METHOD__
137 ) . ')'
138 ],
139 __METHOD__,
140 [ 'ORDER BY' => [ 'rev_timestamp DESC', 'rev_id DESC' ] ]
141 );
142 // Only consider revisions newer than any such revision
143 if ( $row ) {
144 $cutoffUnix = wfTimestamp( TS_UNIX, $row->rev_timestamp );
145 $lastRevId = (int)$row->rev_id;
146 } else {
147 $lastRevId = 0;
148 }
149
150 // Find revisions to this page made around and after this revision which lack category
151 // notifications in recent changes. This lets jobs pick up were the last one left off.
152 $encCutoff = $dbr->addQuotes( $dbr->timestamp( $cutoffUnix ) );
153 $revisionStore = MediaWikiServices::getInstance()->getRevisionStore();
154 $revQuery = $revisionStore->getQueryInfo();
155 $res = $dbr->select(
156 $revQuery['tables'],
157 $revQuery['fields'],
158 [
159 'rev_page' => $page->getId(),
160 "rev_timestamp > $encCutoff" .
161 " OR (rev_timestamp = $encCutoff AND rev_id > $lastRevId)"
162 ],
163 __METHOD__,
164 [ 'ORDER BY' => [ 'rev_timestamp ASC', 'rev_id ASC' ] ],
165 $revQuery['joins']
166 );
167
168 // Apply all category updates in revision timestamp order
169 foreach ( $res as $row ) {
170 $this->notifyUpdatesForRevision( $lbFactory, $page, $revisionStore->newRevisionFromRow( $row ) );
171 }
172
173 return true;
174 }
175
182 protected function notifyUpdatesForRevision(
183 LBFactory $lbFactory, WikiPage $page, RevisionRecord $newRev
184 ) {
185 $config = RequestContext::getMain()->getConfig();
186 $title = $page->getTitle();
187
188 // Get the new revision
189 if ( $newRev->isDeleted( RevisionRecord::DELETED_TEXT ) ) {
190 return;
191 }
192
193 // Get the prior revision (the same for null edits)
194 if ( $newRev->getParentId() ) {
195 $oldRev = MediaWikiServices::getInstance()
196 ->getRevisionLookup()
197 ->getRevisionById( $newRev->getParentId(), RevisionLookup::READ_LATEST );
198 if ( !$oldRev || $oldRev->isDeleted( RevisionRecord::DELETED_TEXT ) ) {
199 return;
200 }
201 } else {
202 $oldRev = null;
203 }
204
205 // Parse the new revision and get the categories
206 $categoryChanges = $this->getExplicitCategoriesChanges( $page, $newRev, $oldRev );
207 list( $categoryInserts, $categoryDeletes ) = $categoryChanges;
208 if ( !$categoryInserts && !$categoryDeletes ) {
209 return; // nothing to do
210 }
211
212 $catMembChange = new CategoryMembershipChange( $title, $newRev );
213 $catMembChange->checkTemplateLinks();
214
215 $batchSize = $config->get( 'UpdateRowsPerQuery' );
216 $insertCount = 0;
217
218 foreach ( $categoryInserts as $categoryName ) {
219 $categoryTitle = Title::makeTitle( NS_CATEGORY, $categoryName );
220 $catMembChange->triggerCategoryAddedNotification( $categoryTitle );
221 if ( $insertCount++ && ( $insertCount % $batchSize ) == 0 ) {
222 $lbFactory->commitAndWaitForReplication( __METHOD__, $this->ticket );
223 }
224 }
225
226 foreach ( $categoryDeletes as $categoryName ) {
227 $categoryTitle = Title::makeTitle( NS_CATEGORY, $categoryName );
228 $catMembChange->triggerCategoryRemovedNotification( $categoryTitle );
229 if ( $insertCount++ && ( $insertCount++ % $batchSize ) == 0 ) {
230 $lbFactory->commitAndWaitForReplication( __METHOD__, $this->ticket );
231 }
232 }
233 }
234
236 WikiPage $page, RevisionRecord $newRev, RevisionRecord $oldRev = null
237 ) {
238 // Inject the same timestamp for both revision parses to avoid seeing category changes
239 // due to time-based parser functions. Inject the same page title for the parses too.
240 // Note that REPEATABLE-READ makes template/file pages appear unchanged between parses.
241 $parseTimestamp = $newRev->getTimestamp();
242 // Parse the old rev and get the categories. Do not use link tables as that
243 // assumes these updates are perfectly FIFO and that link tables are always
244 // up to date, neither of which are true.
245 $oldCategories = $oldRev
246 ? $this->getCategoriesAtRev( $page, $oldRev, $parseTimestamp )
247 : [];
248 // Parse the new revision and get the categories
249 $newCategories = $this->getCategoriesAtRev( $page, $newRev, $parseTimestamp );
250
251 $categoryInserts = array_values( array_diff( $newCategories, $oldCategories ) );
252 $categoryDeletes = array_values( array_diff( $oldCategories, $newCategories ) );
253
254 return [ $categoryInserts, $categoryDeletes ];
255 }
256
264 private function getCategoriesAtRev( WikiPage $page, RevisionRecord $rev, $parseTimestamp ) {
265 $services = MediaWikiServices::getInstance();
266 $options = $page->makeParserOptions( 'canonical' );
267 $options->setTimestamp( $parseTimestamp );
268
269 $output = $rev instanceof RevisionStoreRecord && $rev->isCurrent()
270 ? $services->getParserCache()->get( $page, $options )
271 : null;
272
273 if ( !$output || $output->getCacheRevisionId() !== $rev->getId() ) {
274 $output = $services->getRevisionRenderer()->getRenderedRevision( $rev, $options )
275 ->getRevisionParserOutput();
276 }
277
278 // array keys will cast numeric category names to ints
279 // so we need to cast them back to strings to avoid breaking things!
280 return array_map( 'strval', array_keys( $output->getCategories() ) );
281 }
282
283 public function getDeduplicationInfo() {
284 $info = parent::getDeduplicationInfo();
285 unset( $info['params']['revTimestamp'] ); // first job wins
286
287 return $info;
288 }
289}
wfTimestamp( $outputtype=TS_UNIX, $ts=0)
Get a timestamp string in one of various formats.
Job to add recent change entries mentioning category membership changes.
getCategoriesAtRev(WikiPage $page, RevisionRecord $rev, $parseTimestamp)
getExplicitCategoriesChanges(WikiPage $page, RevisionRecord $newRev, RevisionRecord $oldRev=null)
__construct(Title $title, array $params)
Constructor for use by the Job Queue infrastructure.
getDeduplicationInfo()
Subclasses may need to override this to make duplication detection work.
notifyUpdatesForRevision(LBFactory $lbFactory, WikiPage $page, RevisionRecord $newRev)
static newSpec(Title $title, $revisionTimestamp)
Job queue task description base code.
Class to both describe a background job and handle jobs.
Definition Job.php:32
setLastError( $error)
Definition Job.php:461
MediaWikiServices is the service locator for the application scope of MediaWiki.
Page revision base class.
getParentId()
Get parent revision ID (the original previous page revision).
isCurrent()
Checks whether the revision record is a stored current revision.
getTimestamp()
MCR migration note: this replaces Revision::getTimestamp.
isDeleted( $field)
MCR migration note: this replaces Revision::isDeleted.
A RevisionRecord representing an existing revision persisted in the revision table.
Represents a title within MediaWiki.
Definition Title.php:42
Class representing a MediaWiki article and history.
Definition WikiPage.php:51
makeParserOptions( $context)
Get parser options suitable for rendering the primary article wikitext.
getTitle()
Get the title object of the article.
Definition WikiPage.php:318
An interface for generating database load balancers.
Definition LBFactory.php:41
commitAndWaitForReplication( $fname, $ticket, array $opts=[])
Convenience method for safely running commitMasterChanges()/waitForReplication()
const NS_CATEGORY
Definition Defines.php:84
Service for looking up page revisions.
const DB_REPLICA
Definition defines.php:25
const DB_MASTER
Definition defines.php:29