MediaWiki  master
BacklinkCache.php
Go to the documentation of this file.
1 <?php
28 use MediaWiki\HookContainer\ProtectedHookAccessorTrait;
33 
48  use ProtectedHookAccessorTrait;
49 
51  protected static $instance;
52 
65  protected $partitionCache = [];
66 
75  protected $fullResultCache = [];
76 
80  protected $wanCache;
81 
89  protected $db;
90 
94  protected $title;
95 
96  private const CACHE_EXPIRY = 3600;
97 
103  public function __construct( Title $title ) {
104  $this->title = $title;
105  $this->wanCache = MediaWikiServices::getInstance()->getMainWANObjectCache();
106  }
107 
116  public static function get( Title $title ) {
117  if ( !self::$instance || !self::$instance->title->equals( $title ) ) {
118  self::$instance = new self( $title );
119  }
120  return self::$instance;
121  }
122 
130  public function __sleep() {
131  return [ 'partitionCache', 'fullResultCache', 'title' ];
132  }
133 
137  public function clear() {
138  $this->partitionCache = [];
139  $this->fullResultCache = [];
140  $this->wanCache->touchCheckKey( $this->makeCheckKey() );
141  $this->db = null;
142  }
143 
149  public function setDB( $db ) {
150  $this->db = $db;
151  }
152 
158  protected function getDB() {
159  if ( $this->db === null ) {
160  $this->db = wfGetDB( DB_REPLICA );
161  }
162 
163  return $this->db;
164  }
165 
174  public function getLinks( $table, $startId = false, $endId = false, $max = INF ) {
175  return TitleArray::newFromResult( $this->queryLinks( $table, $startId, $endId, $max ) );
176  }
177 
187  protected function queryLinks( $table, $startId, $endId, $max, $select = 'all' ) {
188  if ( !$startId && !$endId && is_infinite( $max )
189  && isset( $this->fullResultCache[$table] )
190  ) {
191  wfDebug( __METHOD__ . ": got results from cache" );
192  $res = $this->fullResultCache[$table];
193  } else {
194  wfDebug( __METHOD__ . ": got results from DB" );
195  $fromField = $this->getPrefix( $table ) . '_from';
196  $conds = $this->getConditions( $table );
197  // Use the from field in the condition rather than the joined page_id,
198  // because databases are stupid and don't necessarily propagate indexes.
199  if ( $startId ) {
200  $conds[] = "$fromField >= " . intval( $startId );
201  }
202  if ( $endId ) {
203  $conds[] = "$fromField <= " . intval( $endId );
204  }
205  $options = [ 'ORDER BY' => $fromField ];
206  if ( is_finite( $max ) && $max > 0 ) {
207  $options['LIMIT'] = $max;
208  }
209 
210  if ( $select === 'ids' ) {
211  // Just select from the backlink table and ignore the page JOIN
212  $res = $this->getDB()->select(
213  $table,
214  [ 'page_id' => $fromField ],
215  array_filter( $conds, function ( $clause ) { // kind of janky
216  return !preg_match( '/(\b|=)page_id(\b|=)/', $clause );
217  } ),
218  __METHOD__,
219  $options
220  );
221  } else {
222  // Select from the backlink table and JOIN with page title information
223  $res = $this->getDB()->select(
224  [ $table, 'page' ],
225  [ 'page_namespace', 'page_title', 'page_id' ],
226  $conds,
227  __METHOD__,
228  array_merge( [ 'STRAIGHT_JOIN' ], $options )
229  );
230  }
231 
232  if ( $select === 'all' && !$startId && !$endId && $res->numRows() < $max ) {
233  // The full results fit within the limit, so cache them
234  $this->fullResultCache[$table] = $res;
235  } else {
236  wfDebug( __METHOD__ . ": results from DB were uncacheable" );
237  }
238  }
239 
240  return $res;
241  }
242 
249  protected function getPrefix( $table ) {
250  static $prefixes = [
251  'pagelinks' => 'pl',
252  'imagelinks' => 'il',
253  'categorylinks' => 'cl',
254  'templatelinks' => 'tl',
255  'redirect' => 'rd',
256  ];
257 
258  if ( isset( $prefixes[$table] ) ) {
259  return $prefixes[$table];
260  } else {
261  $prefix = null;
262  $this->getHookRunner()->onBacklinkCacheGetPrefix( $table, $prefix );
263  if ( $prefix ) {
264  return $prefix;
265  } else {
266  throw new MWException( "Invalid table \"$table\" in " . __CLASS__ );
267  }
268  }
269  }
270 
278  protected function getConditions( $table ) {
279  $prefix = $this->getPrefix( $table );
280 
281  switch ( $table ) {
282  case 'pagelinks':
283  case 'templatelinks':
284  $conds = [
285  "{$prefix}_namespace" => $this->title->getNamespace(),
286  "{$prefix}_title" => $this->title->getDBkey(),
287  "page_id={$prefix}_from"
288  ];
289  break;
290  case 'redirect':
291  $conds = [
292  "{$prefix}_namespace" => $this->title->getNamespace(),
293  "{$prefix}_title" => $this->title->getDBkey(),
294  $this->getDB()->makeList( [
295  "{$prefix}_interwiki" => '',
296  "{$prefix}_interwiki IS NULL",
297  ], LIST_OR ),
298  "page_id={$prefix}_from"
299  ];
300  break;
301  case 'imagelinks':
302  case 'categorylinks':
303  $conds = [
304  "{$prefix}_to" => $this->title->getDBkey(),
305  "page_id={$prefix}_from"
306  ];
307  break;
308  default:
309  $conds = null;
310  $this->getHookRunner()->onBacklinkCacheGetConditions( $table, $this->title, $conds );
311  if ( !$conds ) {
312  throw new MWException( "Invalid table \"$table\" in " . __CLASS__ );
313  }
314  }
315 
316  return $conds;
317  }
318 
324  public function hasLinks( $table ) {
325  return ( $this->getNumLinks( $table, 1 ) > 0 );
326  }
327 
334  public function getNumLinks( $table, $max = INF ) {
335  global $wgUpdateRowsPerJob;
336 
337  // 1) try partition cache ...
338  if ( isset( $this->partitionCache[$table] ) ) {
339  $entry = reset( $this->partitionCache[$table] );
340 
341  return min( $max, $entry['numRows'] );
342  }
343 
344  // 2) ... then try full result cache ...
345  if ( isset( $this->fullResultCache[$table] ) ) {
346  return min( $max, $this->fullResultCache[$table]->numRows() );
347  }
348 
349  $memcKey = $this->wanCache->makeKey(
350  'numbacklinks',
351  md5( $this->title->getPrefixedDBkey() ),
352  $table
353  );
354 
355  // 3) ... fallback to memcached ...
356  $curTTL = INF;
357  $count = $this->wanCache->get(
358  $memcKey,
359  $curTTL,
360  [
361  $this->makeCheckKey()
362  ]
363  );
364  if ( $count && ( $curTTL > 0 ) ) {
365  return min( $max, $count );
366  }
367 
368  // 4) fetch from the database ...
369  if ( is_infinite( $max ) ) { // no limit at all
370  // Use partition() since it will batch the query and skip the JOIN.
371  // Use $wgUpdateRowsPerJob just to encourage cache reuse for jobs.
372  $this->partition( $table, $wgUpdateRowsPerJob ); // updates $this->partitionCache
373  return $this->partitionCache[$table][$wgUpdateRowsPerJob]['numRows'];
374  } else { // probably some sane limit
375  // Fetch the full title info, since the caller will likely need it next
376  $count = $this->getLinks( $table, false, false, $max )->count();
377  if ( $count < $max ) { // full count
378  $this->wanCache->set( $memcKey, $count, self::CACHE_EXPIRY );
379  }
380  }
381 
382  return min( $max, $count );
383  }
384 
394  public function partition( $table, $batchSize ) {
395  // 1) try partition cache ...
396  if ( isset( $this->partitionCache[$table][$batchSize] ) ) {
397  wfDebug( __METHOD__ . ": got from partition cache" );
398 
399  return $this->partitionCache[$table][$batchSize]['batches'];
400  }
401 
402  $this->partitionCache[$table][$batchSize] = false;
403  $cacheEntry =& $this->partitionCache[$table][$batchSize];
404 
405  // 2) ... then try full result cache ...
406  if ( isset( $this->fullResultCache[$table] ) ) {
407  $cacheEntry = $this->partitionResult( $this->fullResultCache[$table], $batchSize );
408  wfDebug( __METHOD__ . ": got from full result cache" );
409 
410  return $cacheEntry['batches'];
411  }
412 
413  $memcKey = $this->wanCache->makeKey(
414  'backlinks',
415  md5( $this->title->getPrefixedDBkey() ),
416  $table,
417  $batchSize
418  );
419 
420  // 3) ... fallback to memcached ...
421  $curTTL = 0;
422  $memcValue = $this->wanCache->get(
423  $memcKey,
424  $curTTL,
425  [
426  $this->makeCheckKey()
427  ]
428  );
429  if ( is_array( $memcValue ) && ( $curTTL > 0 ) ) {
430  $cacheEntry = $memcValue;
431  wfDebug( __METHOD__ . ": got from memcached $memcKey" );
432 
433  return $cacheEntry['batches'];
434  }
435 
436  // 4) ... finally fetch from the slow database :(
437  $cacheEntry = [ 'numRows' => 0, 'batches' => [] ]; // final result
438  // Do the selects in batches to avoid client-side OOMs (T45452).
439  // Use a LIMIT that plays well with $batchSize to keep equal sized partitions.
440  $selectSize = max( $batchSize, 200000 - ( 200000 % $batchSize ) );
441  $start = false;
442  do {
443  $res = $this->queryLinks( $table, $start, false, $selectSize, 'ids' );
444  $partitions = $this->partitionResult( $res, $batchSize, false );
445  // Merge the link count and range partitions for this chunk
446  $cacheEntry['numRows'] += $partitions['numRows'];
447  $cacheEntry['batches'] = array_merge( $cacheEntry['batches'], $partitions['batches'] );
448  if ( count( $partitions['batches'] ) ) {
449  list( , $lEnd ) = end( $partitions['batches'] );
450  $start = $lEnd + 1; // pick up after this inclusive range
451  }
452  } while ( $partitions['numRows'] >= $selectSize );
453  // Make sure the first range has start=false and the last one has end=false
454  if ( count( $cacheEntry['batches'] ) ) {
455  $cacheEntry['batches'][0][0] = false;
456  $cacheEntry['batches'][count( $cacheEntry['batches'] ) - 1][1] = false;
457  }
458 
459  // Save partitions to memcached
460  $this->wanCache->set( $memcKey, $cacheEntry, self::CACHE_EXPIRY );
461 
462  // Save backlink count to memcached
463  $memcKey = $this->wanCache->makeKey(
464  'numbacklinks',
465  md5( $this->title->getPrefixedDBkey() ),
466  $table
467  );
468  $this->wanCache->set( $memcKey, $cacheEntry['numRows'], self::CACHE_EXPIRY );
469 
470  wfDebug( __METHOD__ . ": got from database" );
471 
472  return $cacheEntry['batches'];
473  }
474 
483  protected function partitionResult( $res, $batchSize, $isComplete = true ) {
484  $batches = [];
485  $numRows = $res->numRows();
486  $numBatches = ceil( $numRows / $batchSize );
487 
488  for ( $i = 0; $i < $numBatches; $i++ ) {
489  if ( $i == 0 && $isComplete ) {
490  $start = false;
491  } else {
492  $rowNum = $i * $batchSize;
493  $res->seek( $rowNum );
494  $row = $res->fetchObject();
495  $start = (int)$row->page_id;
496  }
497 
498  if ( $i == ( $numBatches - 1 ) && $isComplete ) {
499  $end = false;
500  } else {
501  $rowNum = min( $numRows - 1, ( $i + 1 ) * $batchSize - 1 );
502  $res->seek( $rowNum );
503  $row = $res->fetchObject();
504  $end = (int)$row->page_id;
505  }
506 
507  # Sanity check order
508  if ( $start && $end && $start > $end ) {
509  throw new MWException( __METHOD__ . ': Internal error: query result out of order' );
510  }
511 
512  $batches[] = [ $start, $end ];
513  }
514 
515  return [ 'numRows' => $numRows, 'batches' => $batches ];
516  }
517 
524  public function getCascadeProtectedLinks() {
525  $dbr = $this->getDB();
526 
527  // @todo: use UNION without breaking tests that use temp tables
528  $resSets = [];
529  $resSets[] = $dbr->select(
530  [ 'templatelinks', 'page_restrictions', 'page' ],
531  [ 'page_namespace', 'page_title', 'page_id' ],
532  [
533  'tl_namespace' => $this->title->getNamespace(),
534  'tl_title' => $this->title->getDBkey(),
535  'tl_from = pr_page',
536  'pr_cascade' => 1,
537  'page_id = tl_from'
538  ],
539  __METHOD__,
540  [ 'DISTINCT' ]
541  );
542  if ( $this->title->getNamespace() === NS_FILE ) {
543  $resSets[] = $dbr->select(
544  [ 'imagelinks', 'page_restrictions', 'page' ],
545  [ 'page_namespace', 'page_title', 'page_id' ],
546  [
547  'il_to' => $this->title->getDBkey(),
548  'il_from = pr_page',
549  'pr_cascade' => 1,
550  'page_id = il_from'
551  ],
552  __METHOD__,
553  [ 'DISTINCT' ]
554  );
555  }
556 
557  // Combine and de-duplicate the results
558  $mergedRes = [];
559  foreach ( $resSets as $res ) {
560  foreach ( $res as $row ) {
561  $mergedRes[$row->page_id] = $row;
562  }
563  }
564 
566  new FakeResultWrapper( array_values( $mergedRes ) ) );
567  }
568 
574  private function makeCheckKey() {
575  return $this->wanCache->makeKey(
576  'backlinks',
577  md5( $this->title->getPrefixedDBkey() )
578  );
579  }
580 }
BacklinkCache\getPrefix
getPrefix( $table)
Get the field name prefix for a given table.
Definition: BacklinkCache.php:249
BacklinkCache\$wanCache
WANObjectCache $wanCache
Definition: BacklinkCache.php:80
TitleArray\newFromResult
static newFromResult( $res)
Definition: TitleArray.php:42
MediaWiki\MediaWikiServices
MediaWikiServices is the service locator for the application scope of MediaWiki.
Definition: MediaWikiServices.php:154
NS_FILE
const NS_FILE
Definition: Defines.php:75
BacklinkCache\getDB
getDB()
Get the replica DB connection to the database When non existing, will initialize the connection.
Definition: BacklinkCache.php:158
BacklinkCache
Class for fetching backlink lists, approximate backlink counts and partitions.
Definition: BacklinkCache.php:47
$res
$res
Definition: testCompression.php:57
Wikimedia\Rdbms\FakeResultWrapper
Overloads the relevant methods of the real ResultsWrapper so it doesn't go anywhere near an actual da...
Definition: FakeResultWrapper.php:11
BacklinkCache\$title
$title
Local copy of a Title object.
Definition: BacklinkCache.php:94
Wikimedia\Rdbms\IDatabase
Basic database interface for live and lazy-loaded relation database handles.
Definition: IDatabase.php:38
$dbr
$dbr
Definition: testCompression.php:54
BacklinkCache\getNumLinks
getNumLinks( $table, $max=INF)
Get the approximate number of backlinks.
Definition: BacklinkCache.php:334
LIST_OR
const LIST_OR
Definition: Defines.php:51
MWException
MediaWiki exception.
Definition: MWException.php:29
BacklinkCache\partition
partition( $table, $batchSize)
Partition the backlinks into batches.
Definition: BacklinkCache.php:394
Wikimedia\Rdbms\IResultWrapper
Result wrapper for grabbing data queried from an IDatabase object.
Definition: IResultWrapper.php:24
BacklinkCache\queryLinks
queryLinks( $table, $startId, $endId, $max, $select='all')
Get the backlinks for a given table.
Definition: BacklinkCache.php:187
BacklinkCache\$fullResultCache
IResultWrapper[] $fullResultCache
Contains the whole links from a database result.
Definition: BacklinkCache.php:75
wfGetDB
wfGetDB( $db, $groups=[], $wiki=false)
Get a Database object.
Definition: GlobalFunctions.php:2467
BacklinkCache\__sleep
__sleep()
Serialization handler, diasallows to serialize the database to prevent failures after this class is d...
Definition: BacklinkCache.php:130
BacklinkCache\partitionResult
partitionResult( $res, $batchSize, $isComplete=true)
Partition a DB result with backlinks in it into batches.
Definition: BacklinkCache.php:483
DB_REPLICA
const DB_REPLICA
Definition: defines.php:25
$wgUpdateRowsPerJob
$wgUpdateRowsPerJob
Number of rows to update per job.
Definition: DefaultSettings.php:8957
wfDebug
wfDebug( $text, $dest='all', array $context=[])
Sends a line to the debug log if enabled or, optionally, to a comment in output.
Definition: GlobalFunctions.php:909
BacklinkCache\clear
clear()
Clear locally stored data and database object.
Definition: BacklinkCache.php:137
BacklinkCache\CACHE_EXPIRY
const CACHE_EXPIRY
Definition: BacklinkCache.php:96
BacklinkCache\makeCheckKey
makeCheckKey()
Returns check key for the backlinks cache for a particular title.
Definition: BacklinkCache.php:574
BacklinkCache\$db
$db
Local copy of a database object.
Definition: BacklinkCache.php:89
WANObjectCache
Multi-datacenter aware caching interface.
Definition: WANObjectCache.php:125
BacklinkCache\__construct
__construct(Title $title)
Create a new BacklinkCache.
Definition: BacklinkCache.php:103
BacklinkCache\hasLinks
hasLinks( $table)
Check if there are any backlinks.
Definition: BacklinkCache.php:324
BacklinkCache\getCascadeProtectedLinks
getCascadeProtectedLinks()
Get a Title iterator for cascade-protected template/file use backlinks.
Definition: BacklinkCache.php:524
Title
Represents a title within MediaWiki.
Definition: Title.php:42
BacklinkCache\$partitionCache
array[] $partitionCache
Multi dimensions array representing batches.
Definition: BacklinkCache.php:65
BacklinkCache\setDB
setDB( $db)
Set the Database object to use.
Definition: BacklinkCache.php:149
BacklinkCache\getConditions
getConditions( $table)
Get the SQL condition array for selecting backlinks, with a join on the page table.
Definition: BacklinkCache.php:278
BacklinkCache\$instance
static BacklinkCache $instance
Definition: BacklinkCache.php:51
BacklinkCache\getLinks
getLinks( $table, $startId=false, $endId=false, $max=INF)
Get the backlinks for a given table.
Definition: BacklinkCache.php:174