MediaWiki  1.28.0
BacklinkCache.php
Go to the documentation of this file.
1 <?php
44  protected static $instance;
45 
58  protected $partitionCache = [];
59 
68  protected $fullResultCache = [];
69 
77  protected $db;
78 
82  protected $title;
83 
84  const CACHE_EXPIRY = 3600;
85 
91  public function __construct( Title $title ) {
92  $this->title = $title;
93  }
94 
103  public static function get( Title $title ) {
104  if ( !self::$instance || !self::$instance->title->equals( $title ) ) {
105  self::$instance = new self( $title );
106  }
107  return self::$instance;
108  }
109 
117  function __sleep() {
118  return [ 'partitionCache', 'fullResultCache', 'title' ];
119  }
120 
124  public function clear() {
125  $this->partitionCache = [];
126  $this->fullResultCache = [];
127  unset( $this->db );
128  }
129 
135  public function setDB( $db ) {
136  $this->db = $db;
137  }
138 
144  protected function getDB() {
145  if ( !isset( $this->db ) ) {
146  $this->db = wfGetDB( DB_REPLICA );
147  }
148 
149  return $this->db;
150  }
151 
160  public function getLinks( $table, $startId = false, $endId = false, $max = INF ) {
161  return TitleArray::newFromResult( $this->queryLinks( $table, $startId, $endId, $max ) );
162  }
163 
173  protected function queryLinks( $table, $startId, $endId, $max, $select = 'all' ) {
174 
175  $fromField = $this->getPrefix( $table ) . '_from';
176 
177  if ( !$startId && !$endId && is_infinite( $max )
178  && isset( $this->fullResultCache[$table] )
179  ) {
180  wfDebug( __METHOD__ . ": got results from cache\n" );
181  $res = $this->fullResultCache[$table];
182  } else {
183  wfDebug( __METHOD__ . ": got results from DB\n" );
184  $conds = $this->getConditions( $table );
185  // Use the from field in the condition rather than the joined page_id,
186  // because databases are stupid and don't necessarily propagate indexes.
187  if ( $startId ) {
188  $conds[] = "$fromField >= " . intval( $startId );
189  }
190  if ( $endId ) {
191  $conds[] = "$fromField <= " . intval( $endId );
192  }
193  $options = [ 'ORDER BY' => $fromField ];
194  if ( is_finite( $max ) && $max > 0 ) {
195  $options['LIMIT'] = $max;
196  }
197 
198  if ( $select === 'ids' ) {
199  // Just select from the backlink table and ignore the page JOIN
200  $res = $this->getDB()->select(
201  $table,
202  [ $this->getPrefix( $table ) . '_from AS page_id' ],
203  array_filter( $conds, function ( $clause ) { // kind of janky
204  return !preg_match( '/(\b|=)page_id(\b|=)/', $clause );
205  } ),
206  __METHOD__,
207  $options
208  );
209  } else {
210  // Select from the backlink table and JOIN with page title information
211  $res = $this->getDB()->select(
212  [ $table, 'page' ],
213  [ 'page_namespace', 'page_title', 'page_id' ],
214  $conds,
215  __METHOD__,
216  array_merge( [ 'STRAIGHT_JOIN' ], $options )
217  );
218  }
219 
220  if ( $select === 'all' && !$startId && !$endId && $res->numRows() < $max ) {
221  // The full results fit within the limit, so cache them
222  $this->fullResultCache[$table] = $res;
223  } else {
224  wfDebug( __METHOD__ . ": results from DB were uncacheable\n" );
225  }
226  }
227 
228  return $res;
229  }
230 
237  protected function getPrefix( $table ) {
238  static $prefixes = [
239  'pagelinks' => 'pl',
240  'imagelinks' => 'il',
241  'categorylinks' => 'cl',
242  'templatelinks' => 'tl',
243  'redirect' => 'rd',
244  ];
245 
246  if ( isset( $prefixes[$table] ) ) {
247  return $prefixes[$table];
248  } else {
249  $prefix = null;
250  Hooks::run( 'BacklinkCacheGetPrefix', [ $table, &$prefix ] );
251  if ( $prefix ) {
252  return $prefix;
253  } else {
254  throw new MWException( "Invalid table \"$table\" in " . __CLASS__ );
255  }
256  }
257  }
258 
266  protected function getConditions( $table ) {
267  $prefix = $this->getPrefix( $table );
268 
269  switch ( $table ) {
270  case 'pagelinks':
271  case 'templatelinks':
272  $conds = [
273  "{$prefix}_namespace" => $this->title->getNamespace(),
274  "{$prefix}_title" => $this->title->getDBkey(),
275  "page_id={$prefix}_from"
276  ];
277  break;
278  case 'redirect':
279  $conds = [
280  "{$prefix}_namespace" => $this->title->getNamespace(),
281  "{$prefix}_title" => $this->title->getDBkey(),
282  $this->getDB()->makeList( [
283  "{$prefix}_interwiki" => '',
284  "{$prefix}_interwiki IS NULL",
285  ], LIST_OR ),
286  "page_id={$prefix}_from"
287  ];
288  break;
289  case 'imagelinks':
290  case 'categorylinks':
291  $conds = [
292  "{$prefix}_to" => $this->title->getDBkey(),
293  "page_id={$prefix}_from"
294  ];
295  break;
296  default:
297  $conds = null;
298  Hooks::run( 'BacklinkCacheGetConditions', [ $table, $this->title, &$conds ] );
299  if ( !$conds ) {
300  throw new MWException( "Invalid table \"$table\" in " . __CLASS__ );
301  }
302  }
303 
304  return $conds;
305  }
306 
312  public function hasLinks( $table ) {
313  return ( $this->getNumLinks( $table, 1 ) > 0 );
314  }
315 
322  public function getNumLinks( $table, $max = INF ) {
323  global $wgUpdateRowsPerJob;
324 
326  // 1) try partition cache ...
327  if ( isset( $this->partitionCache[$table] ) ) {
328  $entry = reset( $this->partitionCache[$table] );
329 
330  return min( $max, $entry['numRows'] );
331  }
332 
333  // 2) ... then try full result cache ...
334  if ( isset( $this->fullResultCache[$table] ) ) {
335  return min( $max, $this->fullResultCache[$table]->numRows() );
336  }
337 
338  $memcKey = wfMemcKey( 'numbacklinks', md5( $this->title->getPrefixedDBkey() ), $table );
339 
340  // 3) ... fallback to memcached ...
341  $count = $cache->get( $memcKey );
342  if ( $count ) {
343  return min( $max, $count );
344  }
345 
346  // 4) fetch from the database ...
347  if ( is_infinite( $max ) ) { // no limit at all
348  // Use partition() since it will batch the query and skip the JOIN.
349  // Use $wgUpdateRowsPerJob just to encourage cache reuse for jobs.
350  $this->partition( $table, $wgUpdateRowsPerJob ); // updates $this->partitionCache
351  return $this->partitionCache[$table][$wgUpdateRowsPerJob]['numRows'];
352  } else { // probably some sane limit
353  // Fetch the full title info, since the caller will likely need it next
354  $count = $this->getLinks( $table, false, false, $max )->count();
355  if ( $count < $max ) { // full count
356  $cache->set( $memcKey, $count, self::CACHE_EXPIRY );
357  }
358  }
359 
360  return min( $max, $count );
361  }
362 
372  public function partition( $table, $batchSize ) {
373  // 1) try partition cache ...
374  if ( isset( $this->partitionCache[$table][$batchSize] ) ) {
375  wfDebug( __METHOD__ . ": got from partition cache\n" );
376 
377  return $this->partitionCache[$table][$batchSize]['batches'];
378  }
379 
381  $this->partitionCache[$table][$batchSize] = false;
382  $cacheEntry =& $this->partitionCache[$table][$batchSize];
383 
384  // 2) ... then try full result cache ...
385  if ( isset( $this->fullResultCache[$table] ) ) {
386  $cacheEntry = $this->partitionResult( $this->fullResultCache[$table], $batchSize );
387  wfDebug( __METHOD__ . ": got from full result cache\n" );
388 
389  return $cacheEntry['batches'];
390  }
391 
392  $memcKey = wfMemcKey(
393  'backlinks',
394  md5( $this->title->getPrefixedDBkey() ),
395  $table,
396  $batchSize
397  );
398 
399  // 3) ... fallback to memcached ...
400  $memcValue = $cache->get( $memcKey );
401  if ( is_array( $memcValue ) ) {
402  $cacheEntry = $memcValue;
403  wfDebug( __METHOD__ . ": got from memcached $memcKey\n" );
404 
405  return $cacheEntry['batches'];
406  }
407 
408  // 4) ... finally fetch from the slow database :(
409  $cacheEntry = [ 'numRows' => 0, 'batches' => [] ]; // final result
410  // Do the selects in batches to avoid client-side OOMs (bug 43452).
411  // Use a LIMIT that plays well with $batchSize to keep equal sized partitions.
412  $selectSize = max( $batchSize, 200000 - ( 200000 % $batchSize ) );
413  $start = false;
414  do {
415  $res = $this->queryLinks( $table, $start, false, $selectSize, 'ids' );
416  $partitions = $this->partitionResult( $res, $batchSize, false );
417  // Merge the link count and range partitions for this chunk
418  $cacheEntry['numRows'] += $partitions['numRows'];
419  $cacheEntry['batches'] = array_merge( $cacheEntry['batches'], $partitions['batches'] );
420  if ( count( $partitions['batches'] ) ) {
421  list( , $lEnd ) = end( $partitions['batches'] );
422  $start = $lEnd + 1; // pick up after this inclusive range
423  }
424  } while ( $partitions['numRows'] >= $selectSize );
425  // Make sure the first range has start=false and the last one has end=false
426  if ( count( $cacheEntry['batches'] ) ) {
427  $cacheEntry['batches'][0][0] = false;
428  $cacheEntry['batches'][count( $cacheEntry['batches'] ) - 1][1] = false;
429  }
430 
431  // Save partitions to memcached
432  $cache->set( $memcKey, $cacheEntry, self::CACHE_EXPIRY );
433 
434  // Save backlink count to memcached
435  $memcKey = wfMemcKey( 'numbacklinks', md5( $this->title->getPrefixedDBkey() ), $table );
436  $cache->set( $memcKey, $cacheEntry['numRows'], self::CACHE_EXPIRY );
437 
438  wfDebug( __METHOD__ . ": got from database\n" );
439 
440  return $cacheEntry['batches'];
441  }
442 
451  protected function partitionResult( $res, $batchSize, $isComplete = true ) {
452  $batches = [];
453  $numRows = $res->numRows();
454  $numBatches = ceil( $numRows / $batchSize );
455 
456  for ( $i = 0; $i < $numBatches; $i++ ) {
457  if ( $i == 0 && $isComplete ) {
458  $start = false;
459  } else {
460  $rowNum = $i * $batchSize;
461  $res->seek( $rowNum );
462  $row = $res->fetchObject();
463  $start = (int)$row->page_id;
464  }
465 
466  if ( $i == ( $numBatches - 1 ) && $isComplete ) {
467  $end = false;
468  } else {
469  $rowNum = min( $numRows - 1, ( $i + 1 ) * $batchSize - 1 );
470  $res->seek( $rowNum );
471  $row = $res->fetchObject();
472  $end = (int)$row->page_id;
473  }
474 
475  # Sanity check order
476  if ( $start && $end && $start > $end ) {
477  throw new MWException( __METHOD__ . ': Internal error: query result out of order' );
478  }
479 
480  $batches[] = [ $start, $end ];
481  }
482 
483  return [ 'numRows' => $numRows, 'batches' => $batches ];
484  }
485 
492  public function getCascadeProtectedLinks() {
493  $dbr = $this->getDB();
494 
495  // @todo: use UNION without breaking tests that use temp tables
496  $resSets = [];
497  $resSets[] = $dbr->select(
498  [ 'templatelinks', 'page_restrictions', 'page' ],
499  [ 'page_namespace', 'page_title', 'page_id' ],
500  [
501  'tl_namespace' => $this->title->getNamespace(),
502  'tl_title' => $this->title->getDBkey(),
503  'tl_from = pr_page',
504  'pr_cascade' => 1,
505  'page_id = tl_from'
506  ],
507  __METHOD__,
508  [ 'DISTINCT' ]
509  );
510  if ( $this->title->getNamespace() == NS_FILE ) {
511  $resSets[] = $dbr->select(
512  [ 'imagelinks', 'page_restrictions', 'page' ],
513  [ 'page_namespace', 'page_title', 'page_id' ],
514  [
515  'il_to' => $this->title->getDBkey(),
516  'il_from = pr_page',
517  'pr_cascade' => 1,
518  'page_id = il_from'
519  ],
520  __METHOD__,
521  [ 'DISTINCT' ]
522  );
523  }
524 
525  // Combine and de-duplicate the results
526  $mergedRes = [];
527  foreach ( $resSets as $res ) {
528  foreach ( $res as $row ) {
529  $mergedRes[$row->page_id] = $row;
530  }
531  }
532 
534  new FakeResultWrapper( array_values( $mergedRes ) ) );
535  }
536 }
static getMainWANInstance()
Get the main WAN cache object.
deferred txt A few of the database updates required by various functions here can be deferred until after the result page is displayed to the user For updating the view updating the linked to tables after a etc PHP does not yet have any way to tell the server to actually return and disconnect while still running these but it might have such a feature in the future We handle these by creating a deferred update object and putting those objects on a global list
Definition: deferred.txt:11
wfGetDB($db, $groups=[], $wiki=false)
Get a Database object.
getCascadeProtectedLinks()
Get a Title iterator for cascade-protected template/file use backlinks.
__sleep()
Serialization handler, diasallows to serialize the database to prevent failures after this class is d...
__construct(Title $title)
Create a new BacklinkCache.
getNumLinks($table, $max=INF)
Get the approximate number of backlinks.
getPrefix($table)
Get the field name prefix for a given table.
when a variable name is used in a it is silently declared as a new local masking the global
Definition: design.txt:93
getConditions($table)
Get the SQL condition array for selecting backlinks, with a join on the page table.
title
setDB($db)
Set the Database object to use.
wfDebug($text, $dest= 'all', array $context=[])
Sends a line to the debug log if enabled or, optionally, to a comment in output.
static BacklinkCache $instance
this hook is for auditing only RecentChangesLinked and Watchlist RecentChangesLinked and Watchlist e g Watchlist removed from all revisions and log entries to which it was applied This gives extensions a chance to take it off their books as the deletion has already been partly carried out by this point or something similar the user will be unable to create the tag set and then return false from the hook function Ensure you consume the ChangeTagAfterDelete hook to carry out custom deletion actions as context called by AbstractContent::getParserOutput May be used to override the normal model specific rendering of page content as context as context $options
Definition: hooks.txt:1046
$res
Definition: database.txt:21
$db
Local copy of a database object.
$cache
Definition: mcc.php:33
static run($event, array $args=[], $deprecatedVersion=null)
Call hook functions defined in Hooks::register and $wgHooks.
Definition: Hooks.php:131
const NS_FILE
Definition: Defines.php:62
static newFromResult($res)
Definition: TitleArray.php:38
This document is intended to provide useful advice for parties seeking to redistribute MediaWiki to end users It s targeted particularly at maintainers for Linux since it s been observed that distribution packages of MediaWiki often break We ve consistently had to recommend that users seeking support use official tarballs instead of their distribution s and this often solves whatever problem the user is having It would be nice if this could such as
Definition: distributors.txt:9
partitionResult($res, $batchSize, $isComplete=true)
Partition a DB result with backlinks in it into batches.
ResultWrapper[] $fullResultCache
Contains the whole links from a database result.
array[] $partitionCache
Multi dimensions array representing batches.
const LIST_OR
Definition: Defines.php:38
clear()
Clear locally stored data and database object.
injection txt This is an overview of how MediaWiki makes use of dependency injection The design described here grew from the discussion of RFC T384 The term dependency this means that anything an object needs to operate should be injected from the the object itself should only know narrow no concrete implementation of the logic it relies on The requirement to inject everything typically results in an architecture that based on two main types of and essentially stateless service objects that use other service objects to operate on the value objects As of the beginning MediaWiki is only starting to use the DI approach Much of the code still relies on global state or direct resulting in a highly cyclical dependency which acts as the top level factory for services in MediaWiki which can be used to gain access to default instances of various services MediaWikiServices however also allows new services to be defined and default services to be redefined Services are defined or redefined by providing a callback the instantiator that will return a new instance of the service When it will create an instance of MediaWikiServices and populate it with the services defined in the files listed by thereby bootstrapping the DI framework Per $wgServiceWiringFiles lists includes ServiceWiring php
Definition: injection.txt:35
queryLinks($table, $startId, $endId, $max, $select= 'all')
Get the backlinks for a given table.
getLinks($table, $startId=false, $endId=false, $max=INF)
Get the backlinks for a given table.
Overloads the relevant methods of the real ResultsWrapper so it doesn't go anywhere near an actual da...
Class for fetching backlink lists, approximate backlink counts and partitions.
$title
Local copy of a Title object.
$count
wfMemcKey()
Make a cache key for the local wiki.
const DB_REPLICA
Definition: defines.php:22
getDB()
Get the replica DB connection to the database When non existing, will initialize the connection...
partition($table, $batchSize)
Partition the backlinks into batches.
hasLinks($table)
Check if there are any backlinks.