MediaWiki  1.32.5
BacklinkCache.php
Go to the documentation of this file.
1 <?php
32 
48  protected static $instance;
49 
62  protected $partitionCache = [];
63 
72  protected $fullResultCache = [];
73 
77  protected $wanCache;
78 
86  protected $db;
87 
91  protected $title;
92 
93  const CACHE_EXPIRY = 3600;
94 
100  public function __construct( Title $title ) {
101  $this->title = $title;
102  $this->wanCache = MediaWikiServices::getInstance()->getMainWANObjectCache();
103  }
104 
113  public static function get( Title $title ) {
114  if ( !self::$instance || !self::$instance->title->equals( $title ) ) {
115  self::$instance = new self( $title );
116  }
117  return self::$instance;
118  }
119 
127  function __sleep() {
128  return [ 'partitionCache', 'fullResultCache', 'title' ];
129  }
130 
134  public function clear() {
135  $this->partitionCache = [];
136  $this->fullResultCache = [];
137  $this->wanCache->touchCheckKey( $this->makeCheckKey() );
138  unset( $this->db );
139  }
140 
146  public function setDB( $db ) {
147  $this->db = $db;
148  }
149 
155  protected function getDB() {
156  if ( !isset( $this->db ) ) {
157  $this->db = wfGetDB( DB_REPLICA );
158  }
159 
160  return $this->db;
161  }
162 
171  public function getLinks( $table, $startId = false, $endId = false, $max = INF ) {
172  return TitleArray::newFromResult( $this->queryLinks( $table, $startId, $endId, $max ) );
173  }
174 
184  protected function queryLinks( $table, $startId, $endId, $max, $select = 'all' ) {
185  $fromField = $this->getPrefix( $table ) . '_from';
186 
187  if ( !$startId && !$endId && is_infinite( $max )
188  && isset( $this->fullResultCache[$table] )
189  ) {
190  wfDebug( __METHOD__ . ": got results from cache\n" );
191  $res = $this->fullResultCache[$table];
192  } else {
193  wfDebug( __METHOD__ . ": got results from DB\n" );
194  $conds = $this->getConditions( $table );
195  // Use the from field in the condition rather than the joined page_id,
196  // because databases are stupid and don't necessarily propagate indexes.
197  if ( $startId ) {
198  $conds[] = "$fromField >= " . intval( $startId );
199  }
200  if ( $endId ) {
201  $conds[] = "$fromField <= " . intval( $endId );
202  }
203  $options = [ 'ORDER BY' => $fromField ];
204  if ( is_finite( $max ) && $max > 0 ) {
205  $options['LIMIT'] = $max;
206  }
207 
208  if ( $select === 'ids' ) {
209  // Just select from the backlink table and ignore the page JOIN
210  $res = $this->getDB()->select(
211  $table,
212  [ $this->getPrefix( $table ) . '_from AS page_id' ],
213  array_filter( $conds, function ( $clause ) { // kind of janky
214  return !preg_match( '/(\b|=)page_id(\b|=)/', $clause );
215  } ),
216  __METHOD__,
217  $options
218  );
219  } else {
220  // Select from the backlink table and JOIN with page title information
221  $res = $this->getDB()->select(
222  [ $table, 'page' ],
223  [ 'page_namespace', 'page_title', 'page_id' ],
224  $conds,
225  __METHOD__,
226  array_merge( [ 'STRAIGHT_JOIN' ], $options )
227  );
228  }
229 
230  if ( $select === 'all' && !$startId && !$endId && $res->numRows() < $max ) {
231  // The full results fit within the limit, so cache them
232  $this->fullResultCache[$table] = $res;
233  } else {
234  wfDebug( __METHOD__ . ": results from DB were uncacheable\n" );
235  }
236  }
237 
238  return $res;
239  }
240 
247  protected function getPrefix( $table ) {
248  static $prefixes = [
249  'pagelinks' => 'pl',
250  'imagelinks' => 'il',
251  'categorylinks' => 'cl',
252  'templatelinks' => 'tl',
253  'redirect' => 'rd',
254  ];
255 
256  if ( isset( $prefixes[$table] ) ) {
257  return $prefixes[$table];
258  } else {
259  $prefix = null;
260  Hooks::run( 'BacklinkCacheGetPrefix', [ $table, &$prefix ] );
261  if ( $prefix ) {
262  return $prefix;
263  } else {
264  throw new MWException( "Invalid table \"$table\" in " . __CLASS__ );
265  }
266  }
267  }
268 
276  protected function getConditions( $table ) {
277  $prefix = $this->getPrefix( $table );
278 
279  switch ( $table ) {
280  case 'pagelinks':
281  case 'templatelinks':
282  $conds = [
283  "{$prefix}_namespace" => $this->title->getNamespace(),
284  "{$prefix}_title" => $this->title->getDBkey(),
285  "page_id={$prefix}_from"
286  ];
287  break;
288  case 'redirect':
289  $conds = [
290  "{$prefix}_namespace" => $this->title->getNamespace(),
291  "{$prefix}_title" => $this->title->getDBkey(),
292  $this->getDB()->makeList( [
293  "{$prefix}_interwiki" => '',
294  "{$prefix}_interwiki IS NULL",
295  ], LIST_OR ),
296  "page_id={$prefix}_from"
297  ];
298  break;
299  case 'imagelinks':
300  case 'categorylinks':
301  $conds = [
302  "{$prefix}_to" => $this->title->getDBkey(),
303  "page_id={$prefix}_from"
304  ];
305  break;
306  default:
307  $conds = null;
308  Hooks::run( 'BacklinkCacheGetConditions', [ $table, $this->title, &$conds ] );
309  if ( !$conds ) {
310  throw new MWException( "Invalid table \"$table\" in " . __CLASS__ );
311  }
312  }
313 
314  return $conds;
315  }
316 
322  public function hasLinks( $table ) {
323  return ( $this->getNumLinks( $table, 1 ) > 0 );
324  }
325 
332  public function getNumLinks( $table, $max = INF ) {
333  global $wgUpdateRowsPerJob;
334 
335  // 1) try partition cache ...
336  if ( isset( $this->partitionCache[$table] ) ) {
337  $entry = reset( $this->partitionCache[$table] );
338 
339  return min( $max, $entry['numRows'] );
340  }
341 
342  // 2) ... then try full result cache ...
343  if ( isset( $this->fullResultCache[$table] ) ) {
344  return min( $max, $this->fullResultCache[$table]->numRows() );
345  }
346 
347  $memcKey = $this->wanCache->makeKey(
348  'numbacklinks',
349  md5( $this->title->getPrefixedDBkey() ),
350  $table
351  );
352 
353  // 3) ... fallback to memcached ...
354  $curTTL = INF;
355  $count = $this->wanCache->get(
356  $memcKey,
357  $curTTL,
358  [
359  $this->makeCheckKey()
360  ]
361  );
362  if ( $count && ( $curTTL > 0 ) ) {
363  return min( $max, $count );
364  }
365 
366  // 4) fetch from the database ...
367  if ( is_infinite( $max ) ) { // no limit at all
368  // Use partition() since it will batch the query and skip the JOIN.
369  // Use $wgUpdateRowsPerJob just to encourage cache reuse for jobs.
370  $this->partition( $table, $wgUpdateRowsPerJob ); // updates $this->partitionCache
371  return $this->partitionCache[$table][$wgUpdateRowsPerJob]['numRows'];
372  } else { // probably some sane limit
373  // Fetch the full title info, since the caller will likely need it next
374  $count = $this->getLinks( $table, false, false, $max )->count();
375  if ( $count < $max ) { // full count
376  $this->wanCache->set( $memcKey, $count, self::CACHE_EXPIRY );
377  }
378  }
379 
380  return min( $max, $count );
381  }
382 
392  public function partition( $table, $batchSize ) {
393  // 1) try partition cache ...
394  if ( isset( $this->partitionCache[$table][$batchSize] ) ) {
395  wfDebug( __METHOD__ . ": got from partition cache\n" );
396 
397  return $this->partitionCache[$table][$batchSize]['batches'];
398  }
399 
400  $this->partitionCache[$table][$batchSize] = false;
401  $cacheEntry =& $this->partitionCache[$table][$batchSize];
402 
403  // 2) ... then try full result cache ...
404  if ( isset( $this->fullResultCache[$table] ) ) {
405  $cacheEntry = $this->partitionResult( $this->fullResultCache[$table], $batchSize );
406  wfDebug( __METHOD__ . ": got from full result cache\n" );
407 
408  return $cacheEntry['batches'];
409  }
410 
411  $memcKey = $this->wanCache->makeKey(
412  'backlinks',
413  md5( $this->title->getPrefixedDBkey() ),
414  $table,
415  $batchSize
416  );
417 
418  // 3) ... fallback to memcached ...
419  $curTTL = 0;
420  $memcValue = $this->wanCache->get(
421  $memcKey,
422  $curTTL,
423  [
424  $this->makeCheckKey()
425  ]
426  );
427  if ( is_array( $memcValue ) && ( $curTTL > 0 ) ) {
428  $cacheEntry = $memcValue;
429  wfDebug( __METHOD__ . ": got from memcached $memcKey\n" );
430 
431  return $cacheEntry['batches'];
432  }
433 
434  // 4) ... finally fetch from the slow database :(
435  $cacheEntry = [ 'numRows' => 0, 'batches' => [] ]; // final result
436  // Do the selects in batches to avoid client-side OOMs (T45452).
437  // Use a LIMIT that plays well with $batchSize to keep equal sized partitions.
438  $selectSize = max( $batchSize, 200000 - ( 200000 % $batchSize ) );
439  $start = false;
440  do {
441  $res = $this->queryLinks( $table, $start, false, $selectSize, 'ids' );
442  $partitions = $this->partitionResult( $res, $batchSize, false );
443  // Merge the link count and range partitions for this chunk
444  $cacheEntry['numRows'] += $partitions['numRows'];
445  $cacheEntry['batches'] = array_merge( $cacheEntry['batches'], $partitions['batches'] );
446  if ( count( $partitions['batches'] ) ) {
447  list( , $lEnd ) = end( $partitions['batches'] );
448  $start = $lEnd + 1; // pick up after this inclusive range
449  }
450  } while ( $partitions['numRows'] >= $selectSize );
451  // Make sure the first range has start=false and the last one has end=false
452  if ( count( $cacheEntry['batches'] ) ) {
453  $cacheEntry['batches'][0][0] = false;
454  $cacheEntry['batches'][count( $cacheEntry['batches'] ) - 1][1] = false;
455  }
456 
457  // Save partitions to memcached
458  $this->wanCache->set( $memcKey, $cacheEntry, self::CACHE_EXPIRY );
459 
460  // Save backlink count to memcached
461  $memcKey = $this->wanCache->makeKey(
462  'numbacklinks',
463  md5( $this->title->getPrefixedDBkey() ),
464  $table
465  );
466  $this->wanCache->set( $memcKey, $cacheEntry['numRows'], self::CACHE_EXPIRY );
467 
468  wfDebug( __METHOD__ . ": got from database\n" );
469 
470  return $cacheEntry['batches'];
471  }
472 
481  protected function partitionResult( $res, $batchSize, $isComplete = true ) {
482  $batches = [];
483  $numRows = $res->numRows();
484  $numBatches = ceil( $numRows / $batchSize );
485 
486  for ( $i = 0; $i < $numBatches; $i++ ) {
487  if ( $i == 0 && $isComplete ) {
488  $start = false;
489  } else {
490  $rowNum = $i * $batchSize;
491  $res->seek( $rowNum );
492  $row = $res->fetchObject();
493  $start = (int)$row->page_id;
494  }
495 
496  if ( $i == ( $numBatches - 1 ) && $isComplete ) {
497  $end = false;
498  } else {
499  $rowNum = min( $numRows - 1, ( $i + 1 ) * $batchSize - 1 );
500  $res->seek( $rowNum );
501  $row = $res->fetchObject();
502  $end = (int)$row->page_id;
503  }
504 
505  # Sanity check order
506  if ( $start && $end && $start > $end ) {
507  throw new MWException( __METHOD__ . ': Internal error: query result out of order' );
508  }
509 
510  $batches[] = [ $start, $end ];
511  }
512 
513  return [ 'numRows' => $numRows, 'batches' => $batches ];
514  }
515 
522  public function getCascadeProtectedLinks() {
523  $dbr = $this->getDB();
524 
525  // @todo: use UNION without breaking tests that use temp tables
526  $resSets = [];
527  $resSets[] = $dbr->select(
528  [ 'templatelinks', 'page_restrictions', 'page' ],
529  [ 'page_namespace', 'page_title', 'page_id' ],
530  [
531  'tl_namespace' => $this->title->getNamespace(),
532  'tl_title' => $this->title->getDBkey(),
533  'tl_from = pr_page',
534  'pr_cascade' => 1,
535  'page_id = tl_from'
536  ],
537  __METHOD__,
538  [ 'DISTINCT' ]
539  );
540  if ( $this->title->getNamespace() == NS_FILE ) {
541  $resSets[] = $dbr->select(
542  [ 'imagelinks', 'page_restrictions', 'page' ],
543  [ 'page_namespace', 'page_title', 'page_id' ],
544  [
545  'il_to' => $this->title->getDBkey(),
546  'il_from = pr_page',
547  'pr_cascade' => 1,
548  'page_id = il_from'
549  ],
550  __METHOD__,
551  [ 'DISTINCT' ]
552  );
553  }
554 
555  // Combine and de-duplicate the results
556  $mergedRes = [];
557  foreach ( $resSets as $res ) {
558  foreach ( $res as $row ) {
559  $mergedRes[$row->page_id] = $row;
560  }
561  }
562 
564  new FakeResultWrapper( array_values( $mergedRes ) ) );
565  }
566 
572  private function makeCheckKey() {
573  return $this->wanCache->makeKey(
574  'backlinks',
575  md5( $this->title->getPrefixedDBkey() )
576  );
577  }
578 }
BacklinkCache\getPrefix
getPrefix( $table)
Get the field name prefix for a given table.
Definition: BacklinkCache.php:247
BacklinkCache\$wanCache
WANObjectCache $wanCache
Definition: BacklinkCache.php:77
TitleArray\newFromResult
static newFromResult( $res)
Definition: TitleArray.php:40
BacklinkCache\$fullResultCache
ResultWrapper[] $fullResultCache
Contains the whole links from a database result.
Definition: BacklinkCache.php:72
captcha-old.count
count
Definition: captcha-old.py:249
NS_FILE
const NS_FILE
Definition: Defines.php:70
BacklinkCache\getDB
getDB()
Get the replica DB connection to the database When non existing, will initialize the connection.
Definition: BacklinkCache.php:155
$res
$res
Definition: database.txt:21
Wikimedia\Rdbms\ResultWrapper
Result wrapper for grabbing data queried from an IDatabase object.
Definition: ResultWrapper.php:24
BacklinkCache
Class for fetching backlink lists, approximate backlink counts and partitions.
Definition: BacklinkCache.php:46
Wikimedia\Rdbms\FakeResultWrapper
Overloads the relevant methods of the real ResultsWrapper so it doesn't go anywhere near an actual da...
Definition: FakeResultWrapper.php:11
BacklinkCache\$title
$title
Local copy of a Title object.
Definition: BacklinkCache.php:91
php
injection txt This is an overview of how MediaWiki makes use of dependency injection The design described here grew from the discussion of RFC T384 The term dependency this means that anything an object needs to operate should be injected from the the object itself should only know narrow no concrete implementation of the logic it relies on The requirement to inject everything typically results in an architecture that based on two main types of and essentially stateless service objects that use other service objects to operate on the value objects As of the beginning MediaWiki is only starting to use the DI approach Much of the code still relies on global state or direct resulting in a highly cyclical dependency which acts as the top level factory for services in MediaWiki which can be used to gain access to default instances of various services MediaWikiServices however also allows new services to be defined and default services to be redefined Services are defined or redefined by providing a callback the instantiator that will return a new instance of the service When it will create an instance of MediaWikiServices and populate it with the services defined in the files listed by thereby bootstrapping the DI framework Per $wgServiceWiringFiles lists includes ServiceWiring php
Definition: injection.txt:35
Wikimedia\Rdbms\IDatabase
Basic database interface for live and lazy-loaded relation database handles.
Definition: IDatabase.php:38
$dbr
$dbr
Definition: testCompression.php:50
BacklinkCache\getNumLinks
getNumLinks( $table, $max=INF)
Get the approximate number of backlinks.
Definition: BacklinkCache.php:332
LIST_OR
const LIST_OR
Definition: Defines.php:46
MWException
MediaWiki exception.
Definition: MWException.php:26
BacklinkCache\partition
partition( $table, $batchSize)
Partition the backlinks into batches.
Definition: BacklinkCache.php:392
BacklinkCache\queryLinks
queryLinks( $table, $startId, $endId, $max, $select='all')
Get the backlinks for a given table.
Definition: BacklinkCache.php:184
wfGetDB
wfGetDB( $db, $groups=[], $wiki=false)
Get a Database object.
Definition: GlobalFunctions.php:2693
BacklinkCache\__sleep
__sleep()
Serialization handler, diasallows to serialize the database to prevent failures after this class is d...
Definition: BacklinkCache.php:127
BacklinkCache\partitionResult
partitionResult( $res, $batchSize, $isComplete=true)
Partition a DB result with backlinks in it into batches.
Definition: BacklinkCache.php:481
use
as see the revision history and available at free of to any person obtaining a copy of this software and associated documentation to deal in the Software without including without limitation the rights to use
Definition: MIT-LICENSE.txt:10
DB_REPLICA
const DB_REPLICA
Definition: defines.php:25
$wgUpdateRowsPerJob
$wgUpdateRowsPerJob
Number of rows to update per job.
Definition: DefaultSettings.php:8480
array
The wiki should then use memcached to cache various data To use multiple just add more items to the array To increase the weight of a make its entry a array("192.168.0.1:11211", 2))
wfDebug
wfDebug( $text, $dest='all', array $context=[])
Sends a line to the debug log if enabled or, optionally, to a comment in output.
Definition: GlobalFunctions.php:988
list
deferred txt A few of the database updates required by various functions here can be deferred until after the result page is displayed to the user For updating the view updating the linked to tables after a etc PHP does not yet have any way to tell the server to actually return and disconnect while still running these but it might have such a feature in the future We handle these by creating a deferred update object and putting those objects on a global list
Definition: deferred.txt:11
BacklinkCache\clear
clear()
Clear locally stored data and database object.
Definition: BacklinkCache.php:134
BacklinkCache\CACHE_EXPIRY
const CACHE_EXPIRY
Definition: BacklinkCache.php:93
BacklinkCache\makeCheckKey
makeCheckKey()
Returns check key for the backlinks cache for a particular title.
Definition: BacklinkCache.php:572
BacklinkCache\$db
$db
Local copy of a database object.
Definition: BacklinkCache.php:86
title
title
Definition: parserTests.txt:239
WANObjectCache
Multi-datacenter aware caching interface.
Definition: WANObjectCache.php:118
BacklinkCache\__construct
__construct(Title $title)
Create a new BacklinkCache.
Definition: BacklinkCache.php:100
BacklinkCache\hasLinks
hasLinks( $table)
Check if there are any backlinks.
Definition: BacklinkCache.php:322
BacklinkCache\getCascadeProtectedLinks
getCascadeProtectedLinks()
Get a Title iterator for cascade-protected template/file use backlinks.
Definition: BacklinkCache.php:522
Title
Represents a title within MediaWiki.
Definition: Title.php:39
BacklinkCache\$partitionCache
array[] $partitionCache
Multi dimensions array representing batches.
Definition: BacklinkCache.php:62
$options
null means default in associative array with keys and values unescaped Should be merged with default with a value of false meaning to suppress the attribute in associative array with keys and values unescaped & $options
Definition: hooks.txt:2044
as
This document is intended to provide useful advice for parties seeking to redistribute MediaWiki to end users It s targeted particularly at maintainers for Linux since it s been observed that distribution packages of MediaWiki often break We ve consistently had to recommend that users seeking support use official tarballs instead of their distribution s and this often solves whatever problem the user is having It would be nice if this could such as
Definition: distributors.txt:9
BacklinkCache\setDB
setDB( $db)
Set the Database object to use.
Definition: BacklinkCache.php:146
BacklinkCache\getConditions
getConditions( $table)
Get the SQL condition array for selecting backlinks, with a join on the page table.
Definition: BacklinkCache.php:276
BacklinkCache\$instance
static BacklinkCache $instance
Definition: BacklinkCache.php:48
MediaWikiServices
injection txt This is an overview of how MediaWiki makes use of dependency injection The design described here grew from the discussion of RFC T384 The term dependency this means that anything an object needs to operate should be injected from the the object itself should only know narrow no concrete implementation of the logic it relies on The requirement to inject everything typically results in an architecture that based on two main types of and essentially stateless service objects that use other service objects to operate on the value objects As of the beginning MediaWiki is only starting to use the DI approach Much of the code still relies on global state or direct resulting in a highly cyclical dependency MediaWikiServices
Definition: injection.txt:23
Hooks\run
static run( $event, array $args=[], $deprecatedVersion=null)
Call hook functions defined in Hooks::register and $wgHooks.
Definition: Hooks.php:200
BacklinkCache\getLinks
getLinks( $table, $startId=false, $endId=false, $max=INF)
Get the backlinks for a given table.
Definition: BacklinkCache.php:171