25use Wikimedia\Timestamp\TimestampFormat as TS;
47 private const ENQUEUE_FUDGE_SEC = 60;
57 'categoryMembershipChange',
59 'pageId' => $page->
getId(),
60 'revTimestamp' => $revisionTimestamp,
61 'forImport' => $forImport,
64 'removeDuplicates' =>
true,
65 'removeDuplicatesIgnoreParams' => [
'revTimestamp' ]
83 parent::__construct(
'categoryMembershipChange', $page,
$params );
84 $this->recentChangeFactory = $recentChangeFactory;
87 $this->removeDuplicates =
true;
91 public function run() {
93 $lbFactory = $services->getDBLoadBalancerFactory();
94 $lb = $lbFactory->getMainLB();
97 $this->ticket = $lbFactory->getEmptyTransactionTicket( __METHOD__ );
99 $page = $services->getWikiPageFactory()->newFromID( $this->params[
'pageId'], IDBAccessObject::READ_LATEST );
101 $this->
setLastError(
"Could not find page #{$this->params['pageId']}" );
107 if ( !$lb->waitForPrimaryPos( $dbr ) ) {
108 $this->
setLastError(
"Timed out while pre-waiting for replica DB to catch up" );
113 $lockKey =
"{$dbw->getDomainID()}:CategoryMembershipChange:{$page->getId()}";
114 $scopedLock = $dbw->getScopedLockAndFlush( $lockKey, __METHOD__, 1 );
115 if ( !$scopedLock ) {
116 $this->
setLastError(
"Could not acquire lock '$lockKey'" );
121 if ( !$lb->waitForPrimaryPos( $dbr ) ) {
122 $this->
setLastError(
"Timed out while waiting for replica DB to catch up" );
126 $dbr->flushSnapshot( __METHOD__ );
128 $cutoffUnix =
wfTimestamp( TS::UNIX, $this->params[
'revTimestamp'] );
131 $cutoffUnix -= self::ENQUEUE_FUDGE_SEC;
135 $subQuery = $dbr->newSelectQueryBuilder()
137 ->from(
'recentchanges' )
138 ->where(
'rc_this_oldid = rev_id' )
139 ->andWhere( [
'rc_source' => RecentChange::SRC_CATEGORIZE ] );
140 $row = $dbr->newSelectQueryBuilder()
141 ->select( [
'rev_timestamp',
'rev_id' ] )
143 ->where( [
'rev_page' => $page->getId() ] )
144 ->andWhere( $dbr->expr(
'rev_timestamp',
'>=', $dbr->timestamp( $cutoffUnix ) ) )
145 ->andWhere(
new RawSQLExpression(
'EXISTS (' . $subQuery->getSQL() .
')' ) )
146 ->orderBy( [
'rev_timestamp',
'rev_id' ], SelectQueryBuilder::SORT_DESC )
147 ->caller( __METHOD__ )->fetchRow();
151 $cutoffUnix =
wfTimestamp( TS::UNIX, $row->rev_timestamp );
152 $lastRevId = (int)$row->rev_id;
159 $revisionStore = $services->getRevisionStore();
160 $res = $revisionStore->newSelectQueryBuilder( $dbr )
163 'rev_page' => $page->getId(),
164 $dbr->buildComparison(
'>', [
165 'rev_timestamp' => $dbr->timestamp( $cutoffUnix ),
166 'rev_id' => $lastRevId,
169 ->orderBy( [
'rev_timestamp',
'rev_id' ], SelectQueryBuilder::SORT_ASC )
170 ->caller( __METHOD__ )->fetchResultSet();
173 foreach ( $res as $row ) {
191 if ( $newRev->
isDeleted( RevisionRecord::DELETED_TEXT ) ) {
198 $oldRev = $services->getRevisionLookup()
199 ->getRevisionById( $newRev->
getParentId(), IDBAccessObject::READ_LATEST );
200 if ( !$oldRev || $oldRev->isDeleted( RevisionRecord::DELETED_TEXT ) ) {
208 $categoryChanges = $this->getExplicitCategoriesChanges( $page, $newRev, $oldRev );
209 [ $categoryInserts, $categoryDeletes ] = $categoryChanges;
210 if ( !$categoryInserts && !$categoryDeletes ) {
214 $blc = $services->getBacklinkCacheFactory()->getBacklinkCache(
$title );
219 $this->recentChangeFactory,
220 $this->params[
'forImport'] ??
false
222 $catMembChange->checkTemplateLinks();
227 foreach ( $categoryInserts as $categoryName ) {
229 $catMembChange->triggerCategoryAddedNotification( $categoryTitle );
230 if ( $insertCount++ && ( $insertCount % $batchSize ) == 0 ) {
235 foreach ( $categoryDeletes as $categoryName ) {
237 $catMembChange->triggerCategoryRemovedNotification( $categoryTitle );
238 if ( $insertCount++ && ( $insertCount++ % $batchSize ) == 0 ) {
244 private function getExplicitCategoriesChanges(
250 $parseTimestamp = $newRev->getTimestamp();
254 $oldCategories = $oldRev
255 ? $this->getCategoriesAtRev( $page, $oldRev, $parseTimestamp )
258 $newCategories = $this->getCategoriesAtRev( $page, $newRev, $parseTimestamp );
260 $categoryInserts = array_values( array_diff( $newCategories, $oldCategories ) );
261 $categoryDeletes = array_values( array_diff( $oldCategories, $newCategories ) );
263 return [ $categoryInserts, $categoryDeletes ];
273 private function getCategoriesAtRev( WikiPage $page, RevisionRecord $rev, $parseTimestamp ) {
275 $options = $page->makeParserOptions(
'canonical' );
276 $options->setTimestamp( $parseTimestamp );
277 $options->setRenderReason(
'CategoryMembershipChangeJob' );
279 $output = $rev instanceof RevisionStoreRecord && $rev->isCurrent()
280 ? $services->getParserCache()->get( $page, $options )
283 if ( !$output || $output->getCacheRevisionId() !== $rev->getId() ) {
284 $output = $services->getRevisionRenderer()->getRenderedRevision( $rev, $options )
285 ->getRevisionParserOutput();
291 return $output->getCategoryNames();
296 $info = parent::getDeduplicationInfo();
297 unset( $info[
'params'][
'revTimestamp'] );
304class_alias( CategoryMembershipChangeJob::class,
'CategoryMembershipChangeJob' );
wfTimestamp( $outputtype=TS::UNIX, $ts=0)
Get a timestamp string in one of various formats.
A class containing constants representing the names of configuration variables.
const UpdateRowsPerQuery
Name constant for the UpdateRowsPerQuery setting, for use with Config::get()
Base representation for an editable wiki page.
getTitle()
Get the title object of the article.
Interface for objects (potentially) representing an editable wiki page.
getId( $wikiId=self::LOCAL)
Returns the page ID.