MediaWiki  master
BacklinkCache.php
Go to the documentation of this file.
1 <?php
29 use MediaWiki\HookContainer\ProtectedHookAccessorTrait;
36 
51  use ProtectedHookAccessorTrait;
52 
54  protected static $instance;
55 
68  protected $partitionCache = [];
69 
78  protected $fullResultCache = [];
79 
81  protected $wanCache;
82 
90  protected $db;
91 
96  protected $page;
97 
98  private const CACHE_EXPIRY = 3600;
99 
107  $this->page = $page;
108  $this->wanCache = $wanCache;
109  }
110 
121  public static function get( PageReference $page ): self {
122  $backlinkCacheFactory = MediaWikiServices::getInstance()->getBacklinkCacheFactory();
123 
124  return $backlinkCacheFactory->getBacklinkCache( $page );
125  }
126 
131  public function getPage(): PageReference {
132  return $this->page;
133  }
134 
138  public function clear() {
139  $this->partitionCache = [];
140  $this->fullResultCache = [];
141  $this->wanCache->touchCheckKey( $this->makeCheckKey() );
142  $this->db = null;
143  }
144 
150  public function setDB( $db ) {
151  $this->db = $db;
152  }
153 
159  protected function getDB() {
160  if ( $this->db === null ) {
161  $this->db = wfGetDB( DB_REPLICA );
162  }
163 
164  return $this->db;
165  }
166 
176  public function getLinkPages(
177  string $table, $startId = false, $endId = false, $max = INF
178  ): Iterator {
179  return ( function () use ( $table, $startId, $endId, $max ): Iterator {
180  foreach ( $this->queryLinks( $table, $startId, $endId, $max ) as $row ) {
181  yield PageIdentityValue::localIdentity(
182  $row->page_id, $row->page_namespace, $row->page_title );
183  }
184  } )();
185  }
186 
197  public function getLinks( $table, $startId = false, $endId = false, $max = INF ) {
198  return TitleArray::newFromResult( $this->queryLinks( $table, $startId, $endId, $max ) );
199  }
200 
210  protected function queryLinks( $table, $startId, $endId, $max, $select = 'all' ) {
211  if ( !$startId && !$endId && is_infinite( $max )
212  && isset( $this->fullResultCache[$table] )
213  ) {
214  wfDebug( __METHOD__ . ": got results from cache" );
215  $res = $this->fullResultCache[$table];
216  } else {
217  wfDebug( __METHOD__ . ": got results from DB" );
218  $fromField = $this->getPrefix( $table ) . '_from';
219  $conds = $this->getConditions( $table );
220  // Use the from field in the condition rather than the joined page_id,
221  // because databases are stupid and don't necessarily propagate indexes.
222  if ( $startId ) {
223  $conds[] = "$fromField >= " . intval( $startId );
224  }
225  if ( $endId ) {
226  $conds[] = "$fromField <= " . intval( $endId );
227  }
228  $options = [ 'ORDER BY' => $fromField ];
229  if ( is_finite( $max ) && $max > 0 ) {
230  $options['LIMIT'] = $max;
231  }
232 
233  if ( $select === 'ids' ) {
234  // Just select from the backlink table and ignore the page JOIN
235  $res = $this->getDB()->select(
236  $table,
237  [ 'page_id' => $fromField ],
238  array_filter( $conds, static function ( $clause ) { // kind of janky
239  return !preg_match( '/(\b|=)page_id(\b|=)/', $clause );
240  } ),
241  __METHOD__,
242  $options
243  );
244  } else {
245  // Select from the backlink table and JOIN with page title information
246  $res = $this->getDB()->select(
247  [ $table, 'page' ],
248  [ 'page_namespace', 'page_title', 'page_id' ],
249  $conds,
250  __METHOD__,
251  array_merge( [ 'STRAIGHT_JOIN' ], $options )
252  );
253  }
254 
255  if ( $select === 'all' && !$startId && !$endId && $res->numRows() < $max ) {
256  // The full results fit within the limit, so cache them
257  $this->fullResultCache[$table] = $res;
258  } else {
259  wfDebug( __METHOD__ . ": results from DB were uncacheable" );
260  }
261  }
262 
263  return $res;
264  }
265 
272  protected function getPrefix( $table ) {
273  static $prefixes = [
274  'pagelinks' => 'pl',
275  'imagelinks' => 'il',
276  'categorylinks' => 'cl',
277  'templatelinks' => 'tl',
278  'redirect' => 'rd',
279  ];
280 
281  if ( isset( $prefixes[$table] ) ) {
282  return $prefixes[$table];
283  } else {
284  $prefix = null;
285  $this->getHookRunner()->onBacklinkCacheGetPrefix( $table, $prefix );
286  if ( $prefix ) {
287  return $prefix;
288  } else {
289  throw new MWException( "Invalid table \"$table\" in " . __CLASS__ );
290  }
291  }
292  }
293 
301  protected function getConditions( $table ) {
302  $prefix = $this->getPrefix( $table );
303 
304  switch ( $table ) {
305  case 'pagelinks':
306  case 'templatelinks':
307  $conds = [
308  "{$prefix}_namespace" => $this->page->getNamespace(),
309  "{$prefix}_title" => $this->page->getDBkey(),
310  "page_id={$prefix}_from"
311  ];
312  break;
313  case 'redirect':
314  $conds = [
315  "{$prefix}_namespace" => $this->page->getNamespace(),
316  "{$prefix}_title" => $this->page->getDBkey(),
317  $this->getDB()->makeList( [
318  "{$prefix}_interwiki" => '',
319  "{$prefix}_interwiki IS NULL",
320  ], LIST_OR ),
321  "page_id={$prefix}_from"
322  ];
323  break;
324  case 'imagelinks':
325  case 'categorylinks':
326  $conds = [
327  "{$prefix}_to" => $this->page->getDBkey(),
328  "page_id={$prefix}_from"
329  ];
330  break;
331  default:
332  $conds = null;
333  $this->getHookRunner()->onBacklinkCacheGetConditions( $table,
334  Title::castFromPageReference( $this->page ), $conds );
335  if ( !$conds ) {
336  throw new MWException( "Invalid table \"$table\" in " . __CLASS__ );
337  }
338  }
339 
340  return $conds;
341  }
342 
348  public function hasLinks( $table ) {
349  return ( $this->getNumLinks( $table, 1 ) > 0 );
350  }
351 
358  public function getNumLinks( $table, $max = INF ) {
359  global $wgUpdateRowsPerJob;
360 
361  // 1) try partition cache ...
362  if ( isset( $this->partitionCache[$table] ) ) {
363  $entry = reset( $this->partitionCache[$table] );
364 
365  return min( $max, $entry['numRows'] );
366  }
367 
368  // 2) ... then try full result cache ...
369  if ( isset( $this->fullResultCache[$table] ) ) {
370  return min( $max, $this->fullResultCache[$table]->numRows() );
371  }
372 
373  $memcKey = $this->wanCache->makeKey(
374  'numbacklinks',
375  CacheKeyHelper::getKeyForPage( $this->page ),
376  $table
377  );
378 
379  // 3) ... fallback to memcached ...
380  $curTTL = INF;
381  $count = $this->wanCache->get(
382  $memcKey,
383  $curTTL,
384  [
385  $this->makeCheckKey()
386  ]
387  );
388  if ( $count && ( $curTTL > 0 ) ) {
389  return min( $max, $count );
390  }
391 
392  // 4) fetch from the database ...
393  if ( is_infinite( $max ) ) { // no limit at all
394  // Use partition() since it will batch the query and skip the JOIN.
395  // Use $wgUpdateRowsPerJob just to encourage cache reuse for jobs.
396  $this->partition( $table, $wgUpdateRowsPerJob ); // updates $this->partitionCache
397  return $this->partitionCache[$table][$wgUpdateRowsPerJob]['numRows'];
398  } else { // probably some sane limit
399  // Fetch the full title info, since the caller will likely need it next
400  $count = $this->getLinks( $table, false, false, $max )->count();
401  if ( $count < $max ) { // full count
402  $this->wanCache->set( $memcKey, $count, self::CACHE_EXPIRY );
403  }
404  }
405 
406  return min( $max, $count );
407  }
408 
418  public function partition( $table, $batchSize ) {
419  // 1) try partition cache ...
420  if ( isset( $this->partitionCache[$table][$batchSize] ) ) {
421  wfDebug( __METHOD__ . ": got from partition cache" );
422 
423  return $this->partitionCache[$table][$batchSize]['batches'];
424  }
425 
426  $this->partitionCache[$table][$batchSize] = false;
427  $cacheEntry =& $this->partitionCache[$table][$batchSize];
428 
429  // 2) ... then try full result cache ...
430  if ( isset( $this->fullResultCache[$table] ) ) {
431  $cacheEntry = $this->partitionResult( $this->fullResultCache[$table], $batchSize );
432  wfDebug( __METHOD__ . ": got from full result cache" );
433 
434  return $cacheEntry['batches'];
435  }
436 
437  $memcKey = $this->wanCache->makeKey(
438  'backlinks',
439  CacheKeyHelper::getKeyForPage( $this->page ),
440  $table,
441  $batchSize
442  );
443 
444  // 3) ... fallback to memcached ...
445  $curTTL = 0;
446  $memcValue = $this->wanCache->get(
447  $memcKey,
448  $curTTL,
449  [
450  $this->makeCheckKey()
451  ]
452  );
453  if ( is_array( $memcValue ) && ( $curTTL > 0 ) ) {
454  $cacheEntry = $memcValue;
455  wfDebug( __METHOD__ . ": got from memcached $memcKey" );
456 
457  return $cacheEntry['batches'];
458  }
459 
460  // 4) ... finally fetch from the slow database :(
461  $cacheEntry = [ 'numRows' => 0, 'batches' => [] ]; // final result
462  // Do the selects in batches to avoid client-side OOMs (T45452).
463  // Use a LIMIT that plays well with $batchSize to keep equal sized partitions.
464  $selectSize = max( $batchSize, 200000 - ( 200000 % $batchSize ) );
465  $start = false;
466  do {
467  $res = $this->queryLinks( $table, $start, false, $selectSize, 'ids' );
468  $partitions = $this->partitionResult( $res, $batchSize, false );
469  // Merge the link count and range partitions for this chunk
470  $cacheEntry['numRows'] += $partitions['numRows'];
471  $cacheEntry['batches'] = array_merge( $cacheEntry['batches'], $partitions['batches'] );
472  if ( count( $partitions['batches'] ) ) {
473  list( , $lEnd ) = end( $partitions['batches'] );
474  $start = $lEnd + 1; // pick up after this inclusive range
475  }
476  } while ( $partitions['numRows'] >= $selectSize );
477  // Make sure the first range has start=false and the last one has end=false
478  if ( count( $cacheEntry['batches'] ) ) {
479  $cacheEntry['batches'][0][0] = false;
480  $cacheEntry['batches'][count( $cacheEntry['batches'] ) - 1][1] = false;
481  }
482 
483  // Save partitions to memcached
484  $this->wanCache->set( $memcKey, $cacheEntry, self::CACHE_EXPIRY );
485 
486  // Save backlink count to memcached
487  $memcKey = $this->wanCache->makeKey(
488  'numbacklinks',
489  CacheKeyHelper::getKeyForPage( $this->page ),
490  $table
491  );
492  $this->wanCache->set( $memcKey, $cacheEntry['numRows'], self::CACHE_EXPIRY );
493 
494  wfDebug( __METHOD__ . ": got from database" );
495 
496  return $cacheEntry['batches'];
497  }
498 
507  protected function partitionResult( $res, $batchSize, $isComplete = true ) {
508  $batches = [];
509  $numRows = $res->numRows();
510  $numBatches = ceil( $numRows / $batchSize );
511 
512  for ( $i = 0; $i < $numBatches; $i++ ) {
513  if ( $i == 0 && $isComplete ) {
514  $start = false;
515  } else {
516  $rowNum = $i * $batchSize;
517  $res->seek( $rowNum );
518  $row = $res->fetchObject();
519  $start = (int)$row->page_id;
520  }
521 
522  if ( $i == ( $numBatches - 1 ) && $isComplete ) {
523  $end = false;
524  } else {
525  $rowNum = min( $numRows - 1, ( $i + 1 ) * $batchSize - 1 );
526  $res->seek( $rowNum );
527  $row = $res->fetchObject();
528  $end = (int)$row->page_id;
529  }
530 
531  # Sanity check order
532  if ( $start && $end && $start > $end ) {
533  throw new MWException( __METHOD__ . ': Internal error: query result out of order' );
534  }
535 
536  $batches[] = [ $start, $end ];
537  }
538 
539  return [ 'numRows' => $numRows, 'batches' => $batches ];
540  }
541 
548  public function getCascadeProtectedLinkPages(): Iterator {
549  return ( function (): Iterator {
550  foreach ( $this->getCascadeProtectedLinksInternal() as $row ) {
551  yield PageIdentityValue::localIdentity(
552  $row->page_id, $row->page_namespace, $row->page_title );
553  }
554  } )();
555  }
556 
564  public function getCascadeProtectedLinks() {
567  }
568 
574  private function getCascadeProtectedLinksInternal(): array {
575  $dbr = $this->getDB();
576 
577  // @todo: use UNION without breaking tests that use temp tables
578  $resSets = [];
579  $resSets[] = $dbr->select(
580  [ 'templatelinks', 'page_restrictions', 'page' ],
581  [ 'page_namespace', 'page_title', 'page_id' ],
582  [
583  'tl_namespace' => $this->page->getNamespace(),
584  'tl_title' => $this->page->getDBkey(),
585  'tl_from = pr_page',
586  'pr_cascade' => 1,
587  'page_id = tl_from'
588  ],
589  __METHOD__,
590  [ 'DISTINCT' ]
591  );
592  if ( $this->page->getNamespace() === NS_FILE ) {
593  $resSets[] = $dbr->select(
594  [ 'imagelinks', 'page_restrictions', 'page' ],
595  [ 'page_namespace', 'page_title', 'page_id' ],
596  [
597  'il_to' => $this->page->getDBkey(),
598  'il_from = pr_page',
599  'pr_cascade' => 1,
600  'page_id = il_from'
601  ],
602  __METHOD__,
603  [ 'DISTINCT' ]
604  );
605  }
606 
607  // Combine and de-duplicate the results
608  $mergedRes = [];
609  foreach ( $resSets as $res ) {
610  foreach ( $res as $row ) {
611  // Index by page_id to remove duplicates
612  $mergedRes[$row->page_id] = $row;
613  }
614  }
615 
616  // Now that we've de-duplicated, throw away the keys
617  return array_values( $mergedRes );
618  }
619 
625  private function makeCheckKey() {
626  return $this->wanCache->makeKey(
627  'backlinks',
628  CacheKeyHelper::getKeyForPage( $this->page )
629  );
630  }
631 }
BacklinkCache\getPrefix
getPrefix( $table)
Get the field name prefix for a given table.
Definition: BacklinkCache.php:272
LIST_OR
const LIST_OR
Definition: Defines.php:46
BacklinkCache\$wanCache
WANObjectCache $wanCache
Definition: BacklinkCache.php:81
BacklinkCache\__construct
__construct(WANObjectCache $wanCache, PageReference $page)
Create a new BacklinkCache.
Definition: BacklinkCache.php:106
TitleArray\newFromResult
static newFromResult( $res)
Definition: TitleArray.php:44
MediaWiki\MediaWikiServices
MediaWikiServices is the service locator for the application scope of MediaWiki.
Definition: MediaWikiServices.php:193
BacklinkCache\getPage
getPage()
Definition: BacklinkCache.php:131
BacklinkCache\getCascadeProtectedLinksInternal
getCascadeProtectedLinksInternal()
Get an array of cascade-protected template/file use backlinks.
Definition: BacklinkCache.php:574
BacklinkCache\getDB
getDB()
Get the replica DB connection to the database When non existing, will initialize the connection.
Definition: BacklinkCache.php:159
BacklinkCache\getLinkPages
getLinkPages(string $table, $startId=false, $endId=false, $max=INF)
Get the backlinks for a given table.
Definition: BacklinkCache.php:176
BacklinkCache
Class for fetching backlink lists, approximate backlink counts and partitions.
Definition: BacklinkCache.php:50
$res
$res
Definition: testCompression.php:57
Wikimedia\Rdbms\FakeResultWrapper
Overloads the relevant methods of the real ResultWrapper so it doesn't go anywhere near an actual dat...
Definition: FakeResultWrapper.php:12
Page\PageReference
Interface for objects (potentially) representing a page that can be viewable and linked to on a wiki.
Definition: PageReference.php:49
Wikimedia\Rdbms\IDatabase
Basic database interface for live and lazy-loaded relation database handles.
Definition: IDatabase.php:38
$dbr
$dbr
Definition: testCompression.php:54
BacklinkCache\getNumLinks
getNumLinks( $table, $max=INF)
Get the approximate number of backlinks.
Definition: BacklinkCache.php:358
MWException
MediaWiki exception.
Definition: MWException.php:29
BacklinkCache\partition
partition( $table, $batchSize)
Partition the backlinks into batches.
Definition: BacklinkCache.php:418
Wikimedia\Rdbms\IResultWrapper
Result wrapper for grabbing data queried from an IDatabase object.
Definition: IResultWrapper.php:26
BacklinkCache\queryLinks
queryLinks( $table, $startId, $endId, $max, $select='all')
Get the backlinks for a given table.
Definition: BacklinkCache.php:210
BacklinkCache\$fullResultCache
IResultWrapper[] $fullResultCache
Contains the whole links from a database result.
Definition: BacklinkCache.php:78
wfGetDB
wfGetDB( $db, $groups=[], $wiki=false)
Get a Database object.
Definition: GlobalFunctions.php:2202
BacklinkCache\partitionResult
partitionResult( $res, $batchSize, $isComplete=true)
Partition a DB result with backlinks in it into batches.
Definition: BacklinkCache.php:507
DB_REPLICA
const DB_REPLICA
Definition: defines.php:25
$wgUpdateRowsPerJob
$wgUpdateRowsPerJob
Number of rows to update per job.
Definition: DefaultSettings.php:9729
BacklinkCache\getCascadeProtectedLinkPages
getCascadeProtectedLinkPages()
Get a PageIdentity iterator for cascade-protected template/file use backlinks.
Definition: BacklinkCache.php:548
wfDebug
wfDebug( $text, $dest='all', array $context=[])
Sends a line to the debug log if enabled or, optionally, to a comment in output.
Definition: GlobalFunctions.php:894
BacklinkCache\$page
PageReference $page
Local copy of a PageReference object.
Definition: BacklinkCache.php:96
BacklinkCache\clear
clear()
Clear locally stored data and database object.
Definition: BacklinkCache.php:138
BacklinkCache\CACHE_EXPIRY
const CACHE_EXPIRY
Definition: BacklinkCache.php:98
BacklinkCache\makeCheckKey
makeCheckKey()
Returns check key for the backlinks cache for a particular title.
Definition: BacklinkCache.php:625
BacklinkCache\$db
$db
Local copy of a database object.
Definition: BacklinkCache.php:90
WANObjectCache
Multi-datacenter aware caching interface.
Definition: WANObjectCache.php:128
BacklinkCache\hasLinks
hasLinks( $table)
Check if there are any backlinks.
Definition: BacklinkCache.php:348
BacklinkCache\getCascadeProtectedLinks
getCascadeProtectedLinks()
Get a Title iterator for cascade-protected template/file use backlinks.
Definition: BacklinkCache.php:564
BacklinkCache\$partitionCache
array[] $partitionCache
Multi dimensions array representing batches.
Definition: BacklinkCache.php:68
BacklinkCache\setDB
setDB( $db)
Set the Database object to use.
Definition: BacklinkCache.php:150
MediaWiki\Cache\CacheKeyHelper
Helper class for mapping value objects representing basic entities to cache keys.
Definition: CacheKeyHelper.php:43
Page\PageIdentityValue
Immutable value object representing a page identity.
Definition: PageIdentityValue.php:41
BacklinkCache\getConditions
getConditions( $table)
Get the SQL condition array for selecting backlinks, with a join on the page table.
Definition: BacklinkCache.php:301
Title\castFromPageReference
static castFromPageReference(?PageReference $pageReference)
Return a Title for a given Reference.
Definition: Title.php:345
BacklinkCache\$instance
static BacklinkCache $instance
Definition: BacklinkCache.php:54
NS_FILE
const NS_FILE
Definition: Defines.php:70
BacklinkCache\getLinks
getLinks( $table, $startId=false, $endId=false, $max=INF)
Get the backlinks for a given table.
Definition: BacklinkCache.php:197