MediaWiki  1.23.5
RefreshLinksJob.php
Go to the documentation of this file.
1 <?php
37 class RefreshLinksJob extends Job {
38  const PARSE_THRESHOLD_SEC = 1.0;
39 
40  function __construct( $title, $params = '' ) {
41  parent::__construct( 'refreshLinks', $title, $params );
42  // Base backlink update jobs and per-title update jobs can be de-duplicated.
43  // If template A changes twice before any jobs run, a clean queue will have:
44  // (A base, A base)
45  // The second job is ignored by the queue on insertion.
46  // Suppose, many pages use template A, and that template itself uses template B.
47  // An edit to both will first create two base jobs. A clean FIFO queue will have:
48  // (A base, B base)
49  // When these jobs run, the queue will have per-title and remnant partition jobs:
50  // (titleX,titleY,titleZ,...,A remnant,titleM,titleN,titleO,...,B remnant)
51  // Some these jobs will be the same, and will automatically be ignored by
52  // the queue upon insertion. Some title jobs will run before the duplicate is
53  // inserted, so the work will still be done twice in those cases. More titles
54  // can be de-duplicated as the remnant jobs continue to be broken down. This
55  // works best when $wgUpdateRowsPerJob, and either the pages have few backlinks
56  // and/or the backlink sets for pages A and B are almost identical.
57  $this->removeDuplicates = !isset( $params['range'] )
58  && ( !isset( $params['pages'] ) || count( $params['pages'] ) == 1 );
59  }
60 
61  function run() {
62  global $wgUpdateRowsPerJob;
63 
64  // Job to update all (or a range of) backlink pages for a page
65  if ( !empty( $this->params['recursive'] ) ) {
66  // Carry over information for de-duplication
67  $extraParams = $this->getRootJobParams();
68  // Avoid slave lag when fetching templates.
69  // When the outermost job is run, we know that the caller that enqueued it must have
70  // committed the relevant changes to the DB by now. At that point, record the master
71  // position and pass it along as the job recursively breaks into smaller range jobs.
72  // Hopefully, when leaf jobs are popped, the slaves will have reached that position.
73  if ( isset( $this->params['masterPos'] ) ) {
74  $extraParams['masterPos'] = $this->params['masterPos'];
75  } elseif ( wfGetLB()->getServerCount() > 1 ) {
76  $extraParams['masterPos'] = wfGetLB()->getMasterPos();
77  } else {
78  $extraParams['masterPos'] = false;
79  }
80  // Convert this into no more than $wgUpdateRowsPerJob RefreshLinks per-title
81  // jobs and possibly a recursive RefreshLinks job for the rest of the backlinks
83  $this,
84  $wgUpdateRowsPerJob,
85  1, // job-per-title
86  array( 'params' => $extraParams )
87  );
88  JobQueueGroup::singleton()->push( $jobs );
89  // Job to update link tables for for a set of titles
90  } elseif ( isset( $this->params['pages'] ) ) {
91  foreach ( $this->params['pages'] as $pageId => $nsAndKey ) {
92  list( $ns, $dbKey ) = $nsAndKey;
93  $this->runForTitle( Title::makeTitleSafe( $ns, $dbKey ) );
94  }
95  // Job to update link tables for a given title
96  } else {
97  $this->runForTitle( $this->title );
98  }
99 
100  return true;
101  }
102 
103  protected function runForTitle( Title $title = null ) {
104  $linkCache = LinkCache::singleton();
105  $linkCache->clear();
106 
107  if ( is_null( $title ) ) {
108  $this->setLastError( "refreshLinks: Invalid title" );
109  return false;
110  }
111 
112  // Wait for the DB of the current/next slave DB handle to catch up to the master.
113  // This way, we get the correct page_latest for templates or files that just changed
114  // milliseconds ago, having triggered this job to begin with.
115  if ( isset( $this->params['masterPos'] ) && $this->params['masterPos'] !== false ) {
116  wfGetLB()->waitFor( $this->params['masterPos'] );
117  }
118 
119  $page = WikiPage::factory( $title );
120 
121  // Fetch the current revision...
122  $revision = Revision::newFromTitle( $title, false, Revision::READ_NORMAL );
123  if ( !$revision ) {
124  $this->setLastError( "refreshLinks: Article not found {$title->getPrefixedDBkey()}" );
125  return false; // XXX: what if it was just deleted?
126  }
127  $content = $revision->getContent( Revision::RAW );
128  if ( !$content ) {
129  // If there is no content, pretend the content is empty
130  $content = $revision->getContentHandler()->makeEmptyContent();
131  }
132 
133  $parserOutput = false;
134  $parserOptions = $page->makeParserOptions( 'canonical' );
135  // If page_touched changed after this root job (with a good slave lag skew factor),
136  // then it is likely that any views of the pages already resulted in re-parses which
137  // are now in cache. This can be reused to avoid expensive parsing in some cases.
138  if ( isset( $this->params['rootJobTimestamp'] ) ) {
139  $skewedTimestamp = wfTimestamp( TS_UNIX, $this->params['rootJobTimestamp'] ) + 5;
140  if ( $page->getLinksTimestamp() > wfTimestamp( TS_MW, $skewedTimestamp ) ) {
141  // Something already updated the backlinks since this job was made
142  return true;
143  }
144  if ( $page->getTouched() > wfTimestamp( TS_MW, $skewedTimestamp ) ) {
145  $parserOutput = ParserCache::singleton()->getDirty( $page, $parserOptions );
146  if ( $parserOutput && $parserOutput->getCacheTime() <= $skewedTimestamp ) {
147  $parserOutput = false; // too stale
148  }
149  }
150  }
151  // Fetch the current revision and parse it if necessary...
152  if ( $parserOutput == false ) {
153  $start = microtime( true );
154  // Revision ID must be passed to the parser output to get revision variables correct
155  $parserOutput = $content->getParserOutput(
156  $title, $revision->getId(), $parserOptions, false );
157  $ellapsed = microtime( true ) - $start;
158  // If it took a long time to render, then save this back to the cache to avoid
159  // wasted CPU by other apaches or job runners. We don't want to always save to
160  // cache as this cause cause high cache I/O and LRU churn when a template changes.
161  if ( $ellapsed >= self::PARSE_THRESHOLD_SEC
162  && $page->isParserCacheUsed( $parserOptions, $revision->getId() )
163  && $parserOutput->isCacheable()
164  ) {
165  $ctime = wfTimestamp( TS_MW, (int)$start ); // cache time
166  ParserCache::singleton()->save(
167  $parserOutput, $page, $parserOptions, $ctime, $revision->getId()
168  );
169  }
170  }
171 
172  $updates = $content->getSecondaryDataUpdates( $title, null, false, $parserOutput );
173  DataUpdate::runUpdates( $updates );
174 
176 
177  return true;
178  }
179 
180  public function getDeduplicationInfo() {
181  $info = parent::getDeduplicationInfo();
182  if ( is_array( $info['params'] ) ) {
183  // Don't let highly unique "masterPos" values ruin duplicate detection
184  unset( $info['params']['masterPos'] );
185  // For per-pages jobs, the job title is that of the template that changed
186  // (or similar), so remove that since it ruins duplicate detection
187  if ( isset( $info['pages'] ) ) {
188  unset( $info['namespace'] );
189  unset( $info['title'] );
190  }
191  }
192 
193  return $info;
194  }
195 
196  public function workItemCount() {
197  return isset( $this->params['pages'] ) ? count( $this->params['pages'] ) : 1;
198  }
199 }
Job\getRootJobParams
getRootJobParams()
Definition: Job.php:251
php
skin txt MediaWiki includes four core it has been set as the default in MediaWiki since the replacing Monobook it had been been the default skin since before being replaced by Vector largely rewritten in while keeping its appearance Several legacy skins were removed in the as the burden of supporting them became too heavy to bear Those in etc for skin dependent CSS etc for skin dependent JavaScript These can also be customised on a per user by etc This feature has led to a wide variety of user styles becoming that gallery is a good place to ending in php
Definition: skin.txt:62
DataUpdate\runUpdates
static runUpdates( $updates)
Convenience method, calls doUpdate() on every DataUpdate in the array.
Definition: DataUpdate.php:79
wfGetLB
wfGetLB( $wiki=false)
Get a load balancer object.
Definition: GlobalFunctions.php:3669
wfTimestamp
wfTimestamp( $outputtype=TS_UNIX, $ts=0)
Get a timestamp string in one of various formats.
Definition: GlobalFunctions.php:2483
Job\$title
Title $title
Definition: Job.php:38
RefreshLinksJob\getDeduplicationInfo
getDeduplicationInfo()
Subclasses may need to override this to make duplication detection work.
Definition: RefreshLinksJob.php:180
RefreshLinksJob\__construct
__construct( $title, $params='')
Definition: RefreshLinksJob.php:40
Job\setLastError
setLastError( $error)
Definition: Job.php:317
BacklinkJobUtils\partitionBacklinkJob
static partitionBacklinkJob(Job $job, $bSize, $cSize, $opts=array())
Break down $job into approximately ($bSize/$cSize) leaf jobs and a single partition job that covers t...
Definition: BacklinkJobUtils.php:67
title
to move a page</td >< td > &*You are moving the page across *A non empty talk page already exists under the new or *You uncheck the box below In those you will have to move or merge the page manually if desired</td >< td > be sure to &You are responsible for making sure that links continue to point where they are supposed to go Note that the page will &a page at the new title
Definition: All_system_messages.txt:2703
IDBAccessObject\READ_NORMAL
const READ_NORMAL
Definition: IDBAccessObject.php:53
Job
Class to both describe a background job and handle jobs.
Definition: Job.php:31
WikiPage\factory
static factory(Title $title)
Create a WikiPage object of the appropriate class for the given title.
Definition: WikiPage.php:103
RefreshLinksJob\run
run()
Run the job.
Definition: RefreshLinksJob.php:61
InfoAction\invalidateCache
static invalidateCache(Title $title)
Clear the info cache for a given Title.
Definition: InfoAction.php:66
array
the array() calling protocol came about after MediaWiki 1.4rc1.
List of Api Query prop modules.
global
when a variable name is used in a it is silently declared as a new masking the global
Definition: design.txt:93
RefreshLinksJob\runForTitle
runForTitle(Title $title=null)
Definition: RefreshLinksJob.php:103
list
deferred txt A few of the database updates required by various functions here can be deferred until after the result page is displayed to the user For updating the view updating the linked to tables after a etc PHP does not yet have any way to tell the server to actually return and disconnect while still running these but it might have such a feature in the future We handle these by creating a deferred update object and putting those objects on a global list
Definition: deferred.txt:11
false
processing should stop and the error should be shown to the user * false
Definition: hooks.txt:188
TS_MW
const TS_MW
MediaWiki concatenated string timestamp (YYYYMMDDHHMMSS)
Definition: GlobalFunctions.php:2431
Revision\newFromTitle
static newFromTitle( $title, $id=0, $flags=0)
Load either the current, or a specified, revision that's attached to a given title.
Definition: Revision.php:106
Title\makeTitleSafe
static makeTitleSafe( $ns, $title, $fragment='', $interwiki='')
Create a new Title from a namespace index and a DB key.
Definition: Title.php:422
RefreshLinksJob
Job to update link tables for pages.
Definition: RefreshLinksJob.php:37
RefreshLinksJob\workItemCount
workItemCount()
Definition: RefreshLinksJob.php:196
ParserCache\singleton
static singleton()
Get an instance of this object.
Definition: ParserCache.php:35
Job\$params
array bool $params
Array of job parameters or false if none *.
Definition: Job.php:34
Revision\RAW
const RAW
Definition: Revision.php:74
Title
Represents a title within MediaWiki.
Definition: Title.php:35
TS_UNIX
const TS_UNIX
Unix time - the number of seconds since 1970-01-01 00:00:00 UTC.
Definition: GlobalFunctions.php:2426
JobQueueGroup\singleton
static singleton( $wiki=false)
Definition: JobQueueGroup.php:61
as
This document is intended to provide useful advice for parties seeking to redistribute MediaWiki to end users It s targeted particularly at maintainers for Linux since it s been observed that distribution packages of MediaWiki often break We ve consistently had to recommend that users seeking support use official tarballs instead of their distribution s and this often solves whatever problem the user is having It would be nice if this could such as
Definition: distributors.txt:9
LinkCache\singleton
static & singleton()
Get an instance of this class.
Definition: LinkCache.php:49
RefreshLinksJob\PARSE_THRESHOLD_SEC
const PARSE_THRESHOLD_SEC
Definition: RefreshLinksJob.php:38