MediaWiki  master
CategoryMembershipChangeJob.php
Go to the documentation of this file.
1 <?php
30 
47  private $ticket;
48 
49  private const ENQUEUE_FUDGE_SEC = 60;
50 
56  public static function newSpec( PageIdentity $page, $revisionTimestamp ) {
57  return new JobSpecification(
58  'categoryMembershipChange',
59  [
60  'pageId' => $page->getId(),
61  'revTimestamp' => $revisionTimestamp,
62  ],
63  [
64  'removeDuplicates' => true,
65  'removeDuplicatesIgnoreParams' => [ 'revTimestamp' ]
66  ],
67  $page
68  );
69  }
70 
77  public function __construct( PageIdentity $page, array $params ) {
78  parent::__construct( 'categoryMembershipChange', $page, $params );
79  // Only need one job per page. Note that ENQUEUE_FUDGE_SEC handles races where an
80  // older revision job gets inserted while the newer revision job is de-duplicated.
81  $this->removeDuplicates = true;
82  }
83 
84  public function run() {
85  $services = MediaWikiServices::getInstance();
86  $lbFactory = $services->getDBLoadBalancerFactory();
87  $lb = $lbFactory->getMainLB();
88  $dbw = $lb->getConnectionRef( DB_PRIMARY );
89 
90  $this->ticket = $lbFactory->getEmptyTransactionTicket( __METHOD__ );
91 
92  $page = $services->getWikiPageFactory()->newFromID( $this->params['pageId'], WikiPage::READ_LATEST );
93  if ( !$page ) {
94  $this->setLastError( "Could not find page #{$this->params['pageId']}" );
95  return false; // deleted?
96  }
97 
98  // Cut down on the time spent in waitForPrimaryPos() in the critical section
99  $dbr = $lb->getConnectionRef( DB_REPLICA );
100  if ( !$lb->waitForPrimaryPos( $dbr ) ) {
101  $this->setLastError( "Timed out while pre-waiting for replica DB to catch up" );
102  return false;
103  }
104 
105  // Use a named lock so that jobs for this page see each others' changes
106  $lockKey = "{$dbw->getDomainID()}:CategoryMembershipChange:{$page->getId()}"; // per-wiki
107  $scopedLock = $dbw->getScopedLockAndFlush( $lockKey, __METHOD__, 3 );
108  if ( !$scopedLock ) {
109  $this->setLastError( "Could not acquire lock '$lockKey'" );
110  return false;
111  }
112 
113  // Wait till replica DB is caught up so that jobs for this page see each others' changes
114  if ( !$lb->waitForPrimaryPos( $dbr ) ) {
115  $this->setLastError( "Timed out while waiting for replica DB to catch up" );
116  return false;
117  }
118  // Clear any stale REPEATABLE-READ snapshot
119  $dbr->flushSnapshot( __METHOD__ );
120 
121  $cutoffUnix = wfTimestamp( TS_UNIX, $this->params['revTimestamp'] );
122  // Using ENQUEUE_FUDGE_SEC handles jobs inserted out of revision order due to the delay
123  // between COMMIT and actual enqueueing of the CategoryMembershipChangeJob job.
124  $cutoffUnix -= self::ENQUEUE_FUDGE_SEC;
125 
126  // Get the newest page revision that has a SRC_CATEGORIZE row.
127  // Assume that category changes before it were already handled.
128  $subQuery = $dbr->newSelectQueryBuilder()
129  ->select( '1' )
130  ->from( 'recentchanges' )
131  ->where( 'rc_this_oldid = rev_id' )
132  ->andWhere( [ 'rc_source' => RecentChange::SRC_CATEGORIZE ] );
133  $row = $dbr->newSelectQueryBuilder()
134  ->select( [ 'rev_timestamp', 'rev_id' ] )
135  ->from( 'revision' )
136  ->where( [ 'rev_page' => $page->getId() ] )
137  ->andWhere( $dbr->buildComparison( '>=', [ 'rev_timestamp' => $dbr->timestamp( $cutoffUnix ) ] ) )
138  ->andWhere( 'EXISTS (' . $subQuery->caller( __METHOD__ )->getSQL() . ')' )
139  ->orderBy( [ 'rev_timestamp', 'rev_id' ], SelectQueryBuilder::SORT_DESC )
140  ->caller( __METHOD__ )->fetchRow();
141 
142  // Only consider revisions newer than any such revision
143  if ( $row ) {
144  $cutoffUnix = wfTimestamp( TS_UNIX, $row->rev_timestamp );
145  $lastRevId = (int)$row->rev_id;
146  } else {
147  $lastRevId = 0;
148  }
149 
150  // Find revisions to this page made around and after this revision which lack category
151  // notifications in recent changes. This lets jobs pick up were the last one left off.
152  $revisionStore = $services->getRevisionStore();
153  $res = $revisionStore->newSelectQueryBuilder( $dbr )
154  ->joinComment()
155  ->where( [
156  'rev_page' => $page->getId(),
157  $dbr->buildComparison( '>', [
158  'rev_timestamp' => $dbr->timestamp( $cutoffUnix ),
159  'rev_id' => $lastRevId,
160  ] )
161  ] )
162  ->orderBy( [ 'rev_timestamp', 'rev_id' ], SelectQueryBuilder::SORT_ASC )
163  ->caller( __METHOD__ )->fetchResultSet();
164 
165  // Apply all category updates in revision timestamp order
166  foreach ( $res as $row ) {
167  $this->notifyUpdatesForRevision( $lbFactory, $page, $revisionStore->newRevisionFromRow( $row ) );
168  }
169 
170  return true;
171  }
172 
179  protected function notifyUpdatesForRevision(
180  LBFactory $lbFactory, WikiPage $page, RevisionRecord $newRev
181  ) {
182  $title = $page->getTitle();
183 
184  // Get the new revision
185  if ( $newRev->isDeleted( RevisionRecord::DELETED_TEXT ) ) {
186  return;
187  }
188 
189  $services = MediaWikiServices::getInstance();
190  // Get the prior revision (the same for null edits)
191  if ( $newRev->getParentId() ) {
192  $oldRev = $services->getRevisionLookup()
193  ->getRevisionById( $newRev->getParentId(), RevisionLookup::READ_LATEST );
194  if ( !$oldRev || $oldRev->isDeleted( RevisionRecord::DELETED_TEXT ) ) {
195  return;
196  }
197  } else {
198  $oldRev = null;
199  }
200 
201  // Parse the new revision and get the categories
202  $categoryChanges = $this->getExplicitCategoriesChanges( $page, $newRev, $oldRev );
203  [ $categoryInserts, $categoryDeletes ] = $categoryChanges;
204  if ( !$categoryInserts && !$categoryDeletes ) {
205  return; // nothing to do
206  }
207 
208  $blc = $services->getBacklinkCacheFactory()->getBacklinkCache( $title );
209  $catMembChange = new CategoryMembershipChange( $title, $blc, $newRev );
210  $catMembChange->checkTemplateLinks();
211 
212  $batchSize = $services->getMainConfig()->get( MainConfigNames::UpdateRowsPerQuery );
213  $insertCount = 0;
214 
215  foreach ( $categoryInserts as $categoryName ) {
216  $categoryTitle = Title::makeTitle( NS_CATEGORY, $categoryName );
217  $catMembChange->triggerCategoryAddedNotification( $categoryTitle );
218  if ( $insertCount++ && ( $insertCount % $batchSize ) == 0 ) {
219  $lbFactory->commitAndWaitForReplication( __METHOD__, $this->ticket );
220  }
221  }
222 
223  foreach ( $categoryDeletes as $categoryName ) {
224  $categoryTitle = Title::makeTitle( NS_CATEGORY, $categoryName );
225  $catMembChange->triggerCategoryRemovedNotification( $categoryTitle );
226  if ( $insertCount++ && ( $insertCount++ % $batchSize ) == 0 ) {
227  $lbFactory->commitAndWaitForReplication( __METHOD__, $this->ticket );
228  }
229  }
230  }
231 
232  private function getExplicitCategoriesChanges(
233  WikiPage $page, RevisionRecord $newRev, RevisionRecord $oldRev = null
234  ) {
235  // Inject the same timestamp for both revision parses to avoid seeing category changes
236  // due to time-based parser functions. Inject the same page title for the parses too.
237  // Note that REPEATABLE-READ makes template/file pages appear unchanged between parses.
238  $parseTimestamp = $newRev->getTimestamp();
239  // Parse the old rev and get the categories. Do not use link tables as that
240  // assumes these updates are perfectly FIFO and that link tables are always
241  // up to date, neither of which are true.
242  $oldCategories = $oldRev
243  ? $this->getCategoriesAtRev( $page, $oldRev, $parseTimestamp )
244  : [];
245  // Parse the new revision and get the categories
246  $newCategories = $this->getCategoriesAtRev( $page, $newRev, $parseTimestamp );
247 
248  $categoryInserts = array_values( array_diff( $newCategories, $oldCategories ) );
249  $categoryDeletes = array_values( array_diff( $oldCategories, $newCategories ) );
250 
251  return [ $categoryInserts, $categoryDeletes ];
252  }
253 
261  private function getCategoriesAtRev( WikiPage $page, RevisionRecord $rev, $parseTimestamp ) {
262  $services = MediaWikiServices::getInstance();
263  $options = $page->makeParserOptions( 'canonical' );
264  $options->setTimestamp( $parseTimestamp );
265  $options->setRenderReason( 'CategoryMembershipChangeJob' );
266 
267  $output = $rev instanceof RevisionStoreRecord && $rev->isCurrent()
268  ? $services->getParserCache()->get( $page, $options )
269  : null;
270 
271  if ( !$output || $output->getCacheRevisionId() !== $rev->getId() ) {
272  $output = $services->getRevisionRenderer()->getRenderedRevision( $rev, $options )
273  ->getRevisionParserOutput();
274  }
275 
276  // array keys will cast numeric category names to ints;
277  // ::getCategoryNames() is careful to cast them back to strings
278  // to avoid breaking things!
279  return $output->getCategoryNames();
280  }
281 
282  public function getDeduplicationInfo() {
283  $info = parent::getDeduplicationInfo();
284  unset( $info['params']['revTimestamp'] ); // first job wins
285 
286  return $info;
287  }
288 }
const NS_CATEGORY
Definition: Defines.php:78
wfTimestamp( $outputtype=TS_UNIX, $ts=0)
Get a timestamp string in one of various formats.
Job to add recent change entries mentioning category membership changes.
static newSpec(PageIdentity $page, $revisionTimestamp)
__construct(PageIdentity $page, array $params)
Constructor for use by the Job Queue infrastructure.
getDeduplicationInfo()
Subclasses may need to override this to make duplication detection work.
notifyUpdatesForRevision(LBFactory $lbFactory, WikiPage $page, RevisionRecord $newRev)
Job queue task description base code.
Class to both describe a background job and handle jobs.
Definition: Job.php:40
Title $title
Definition: Job.php:51
setLastError( $error)
Definition: Job.php:432
array $params
Array of job parameters.
Definition: Job.php:45
A class containing constants representing the names of configuration variables.
Service locator for MediaWiki core services.
Page revision base class.
getParentId( $wikiId=self::LOCAL)
Get parent revision ID (the original previous page revision).
isCurrent()
Checks whether the revision record is a stored current revision.
getTimestamp()
MCR migration note: this replaced Revision::getTimestamp.
isDeleted( $field)
MCR migration note: this replaced Revision::isDeleted.
getId( $wikiId=self::LOCAL)
Get revision ID.
A RevisionRecord representing an existing revision persisted in the revision table.
Represents a title within MediaWiki.
Definition: Title.php:76
const SRC_CATEGORIZE
Base representation for an editable wiki page.
Definition: WikiPage.php:77
makeParserOptions( $context)
Get parser options suitable for rendering the primary article wikitext.
Definition: WikiPage.php:1890
getId( $wikiId=self::LOCAL)
Definition: WikiPage.php:528
getTitle()
Get the title object of the article.
Definition: WikiPage.php:258
commitAndWaitForReplication( $fname, $ticket, array $opts=[])
Commit primary DB transactions and wait for replication (if $ticket indicates it is safe).
Definition: LBFactory.php:530
Build SELECT queries with a fluent interface.
Interface for objects (potentially) representing an editable wiki page.
getId( $wikiId=self::LOCAL)
Returns the page ID.
Service for looking up page revisions.
const DB_REPLICA
Definition: defines.php:26
const DB_PRIMARY
Definition: defines.php:28