MediaWiki  master
BacklinkCache.php
Go to the documentation of this file.
1 <?php
32 
48  protected static $instance;
49 
62  protected $partitionCache = [];
63 
72  protected $fullResultCache = [];
73 
77  protected $wanCache;
78 
86  protected $db;
87 
91  protected $title;
92 
93  private const CACHE_EXPIRY = 3600;
94 
100  public function __construct( Title $title ) {
101  $this->title = $title;
102  $this->wanCache = MediaWikiServices::getInstance()->getMainWANObjectCache();
103  }
104 
113  public static function get( Title $title ) {
114  if ( !self::$instance || !self::$instance->title->equals( $title ) ) {
115  self::$instance = new self( $title );
116  }
117  return self::$instance;
118  }
119 
127  public function __sleep() {
128  return [ 'partitionCache', 'fullResultCache', 'title' ];
129  }
130 
134  public function clear() {
135  $this->partitionCache = [];
136  $this->fullResultCache = [];
137  $this->wanCache->touchCheckKey( $this->makeCheckKey() );
138  $this->db = null;
139  }
140 
146  public function setDB( $db ) {
147  $this->db = $db;
148  }
149 
155  protected function getDB() {
156  if ( $this->db === null ) {
157  $this->db = wfGetDB( DB_REPLICA );
158  }
159 
160  return $this->db;
161  }
162 
171  public function getLinks( $table, $startId = false, $endId = false, $max = INF ) {
172  return TitleArray::newFromResult( $this->queryLinks( $table, $startId, $endId, $max ) );
173  }
174 
184  protected function queryLinks( $table, $startId, $endId, $max, $select = 'all' ) {
185  if ( !$startId && !$endId && is_infinite( $max )
186  && isset( $this->fullResultCache[$table] )
187  ) {
188  wfDebug( __METHOD__ . ": got results from cache\n" );
189  $res = $this->fullResultCache[$table];
190  } else {
191  wfDebug( __METHOD__ . ": got results from DB\n" );
192  $fromField = $this->getPrefix( $table ) . '_from';
193  $conds = $this->getConditions( $table );
194  // Use the from field in the condition rather than the joined page_id,
195  // because databases are stupid and don't necessarily propagate indexes.
196  if ( $startId ) {
197  $conds[] = "$fromField >= " . intval( $startId );
198  }
199  if ( $endId ) {
200  $conds[] = "$fromField <= " . intval( $endId );
201  }
202  $options = [ 'ORDER BY' => $fromField ];
203  if ( is_finite( $max ) && $max > 0 ) {
204  $options['LIMIT'] = $max;
205  }
206 
207  if ( $select === 'ids' ) {
208  // Just select from the backlink table and ignore the page JOIN
209  $res = $this->getDB()->select(
210  $table,
211  [ 'page_id' => $fromField ],
212  array_filter( $conds, function ( $clause ) { // kind of janky
213  return !preg_match( '/(\b|=)page_id(\b|=)/', $clause );
214  } ),
215  __METHOD__,
216  $options
217  );
218  } else {
219  // Select from the backlink table and JOIN with page title information
220  $res = $this->getDB()->select(
221  [ $table, 'page' ],
222  [ 'page_namespace', 'page_title', 'page_id' ],
223  $conds,
224  __METHOD__,
225  array_merge( [ 'STRAIGHT_JOIN' ], $options )
226  );
227  }
228 
229  if ( $select === 'all' && !$startId && !$endId && $res->numRows() < $max ) {
230  // The full results fit within the limit, so cache them
231  $this->fullResultCache[$table] = $res;
232  } else {
233  wfDebug( __METHOD__ . ": results from DB were uncacheable\n" );
234  }
235  }
236 
237  return $res;
238  }
239 
246  protected function getPrefix( $table ) {
247  static $prefixes = [
248  'pagelinks' => 'pl',
249  'imagelinks' => 'il',
250  'categorylinks' => 'cl',
251  'templatelinks' => 'tl',
252  'redirect' => 'rd',
253  ];
254 
255  if ( isset( $prefixes[$table] ) ) {
256  return $prefixes[$table];
257  } else {
258  $prefix = null;
259  Hooks::run( 'BacklinkCacheGetPrefix', [ $table, &$prefix ] );
260  if ( $prefix ) {
261  return $prefix;
262  } else {
263  throw new MWException( "Invalid table \"$table\" in " . __CLASS__ );
264  }
265  }
266  }
267 
275  protected function getConditions( $table ) {
276  $prefix = $this->getPrefix( $table );
277 
278  switch ( $table ) {
279  case 'pagelinks':
280  case 'templatelinks':
281  $conds = [
282  "{$prefix}_namespace" => $this->title->getNamespace(),
283  "{$prefix}_title" => $this->title->getDBkey(),
284  "page_id={$prefix}_from"
285  ];
286  break;
287  case 'redirect':
288  $conds = [
289  "{$prefix}_namespace" => $this->title->getNamespace(),
290  "{$prefix}_title" => $this->title->getDBkey(),
291  $this->getDB()->makeList( [
292  "{$prefix}_interwiki" => '',
293  "{$prefix}_interwiki IS NULL",
294  ], LIST_OR ),
295  "page_id={$prefix}_from"
296  ];
297  break;
298  case 'imagelinks':
299  case 'categorylinks':
300  $conds = [
301  "{$prefix}_to" => $this->title->getDBkey(),
302  "page_id={$prefix}_from"
303  ];
304  break;
305  default:
306  $conds = null;
307  Hooks::run( 'BacklinkCacheGetConditions', [ $table, $this->title, &$conds ] );
308  if ( !$conds ) {
309  throw new MWException( "Invalid table \"$table\" in " . __CLASS__ );
310  }
311  }
312 
313  return $conds;
314  }
315 
321  public function hasLinks( $table ) {
322  return ( $this->getNumLinks( $table, 1 ) > 0 );
323  }
324 
331  public function getNumLinks( $table, $max = INF ) {
332  global $wgUpdateRowsPerJob;
333 
334  // 1) try partition cache ...
335  if ( isset( $this->partitionCache[$table] ) ) {
336  $entry = reset( $this->partitionCache[$table] );
337 
338  return min( $max, $entry['numRows'] );
339  }
340 
341  // 2) ... then try full result cache ...
342  if ( isset( $this->fullResultCache[$table] ) ) {
343  return min( $max, $this->fullResultCache[$table]->numRows() );
344  }
345 
346  $memcKey = $this->wanCache->makeKey(
347  'numbacklinks',
348  md5( $this->title->getPrefixedDBkey() ),
349  $table
350  );
351 
352  // 3) ... fallback to memcached ...
353  $curTTL = INF;
354  $count = $this->wanCache->get(
355  $memcKey,
356  $curTTL,
357  [
358  $this->makeCheckKey()
359  ]
360  );
361  if ( $count && ( $curTTL > 0 ) ) {
362  return min( $max, $count );
363  }
364 
365  // 4) fetch from the database ...
366  if ( is_infinite( $max ) ) { // no limit at all
367  // Use partition() since it will batch the query and skip the JOIN.
368  // Use $wgUpdateRowsPerJob just to encourage cache reuse for jobs.
369  $this->partition( $table, $wgUpdateRowsPerJob ); // updates $this->partitionCache
370  return $this->partitionCache[$table][$wgUpdateRowsPerJob]['numRows'];
371  } else { // probably some sane limit
372  // Fetch the full title info, since the caller will likely need it next
373  $count = $this->getLinks( $table, false, false, $max )->count();
374  if ( $count < $max ) { // full count
375  $this->wanCache->set( $memcKey, $count, self::CACHE_EXPIRY );
376  }
377  }
378 
379  return min( $max, $count );
380  }
381 
391  public function partition( $table, $batchSize ) {
392  // 1) try partition cache ...
393  if ( isset( $this->partitionCache[$table][$batchSize] ) ) {
394  wfDebug( __METHOD__ . ": got from partition cache\n" );
395 
396  return $this->partitionCache[$table][$batchSize]['batches'];
397  }
398 
399  $this->partitionCache[$table][$batchSize] = false;
400  $cacheEntry =& $this->partitionCache[$table][$batchSize];
401 
402  // 2) ... then try full result cache ...
403  if ( isset( $this->fullResultCache[$table] ) ) {
404  $cacheEntry = $this->partitionResult( $this->fullResultCache[$table], $batchSize );
405  wfDebug( __METHOD__ . ": got from full result cache\n" );
406 
407  return $cacheEntry['batches'];
408  }
409 
410  $memcKey = $this->wanCache->makeKey(
411  'backlinks',
412  md5( $this->title->getPrefixedDBkey() ),
413  $table,
414  $batchSize
415  );
416 
417  // 3) ... fallback to memcached ...
418  $curTTL = 0;
419  $memcValue = $this->wanCache->get(
420  $memcKey,
421  $curTTL,
422  [
423  $this->makeCheckKey()
424  ]
425  );
426  if ( is_array( $memcValue ) && ( $curTTL > 0 ) ) {
427  $cacheEntry = $memcValue;
428  wfDebug( __METHOD__ . ": got from memcached $memcKey\n" );
429 
430  return $cacheEntry['batches'];
431  }
432 
433  // 4) ... finally fetch from the slow database :(
434  $cacheEntry = [ 'numRows' => 0, 'batches' => [] ]; // final result
435  // Do the selects in batches to avoid client-side OOMs (T45452).
436  // Use a LIMIT that plays well with $batchSize to keep equal sized partitions.
437  $selectSize = max( $batchSize, 200000 - ( 200000 % $batchSize ) );
438  $start = false;
439  do {
440  $res = $this->queryLinks( $table, $start, false, $selectSize, 'ids' );
441  $partitions = $this->partitionResult( $res, $batchSize, false );
442  // Merge the link count and range partitions for this chunk
443  $cacheEntry['numRows'] += $partitions['numRows'];
444  $cacheEntry['batches'] = array_merge( $cacheEntry['batches'], $partitions['batches'] );
445  if ( count( $partitions['batches'] ) ) {
446  list( , $lEnd ) = end( $partitions['batches'] );
447  $start = $lEnd + 1; // pick up after this inclusive range
448  }
449  } while ( $partitions['numRows'] >= $selectSize );
450  // Make sure the first range has start=false and the last one has end=false
451  if ( count( $cacheEntry['batches'] ) ) {
452  $cacheEntry['batches'][0][0] = false;
453  $cacheEntry['batches'][count( $cacheEntry['batches'] ) - 1][1] = false;
454  }
455 
456  // Save partitions to memcached
457  $this->wanCache->set( $memcKey, $cacheEntry, self::CACHE_EXPIRY );
458 
459  // Save backlink count to memcached
460  $memcKey = $this->wanCache->makeKey(
461  'numbacklinks',
462  md5( $this->title->getPrefixedDBkey() ),
463  $table
464  );
465  $this->wanCache->set( $memcKey, $cacheEntry['numRows'], self::CACHE_EXPIRY );
466 
467  wfDebug( __METHOD__ . ": got from database\n" );
468 
469  return $cacheEntry['batches'];
470  }
471 
480  protected function partitionResult( $res, $batchSize, $isComplete = true ) {
481  $batches = [];
482  $numRows = $res->numRows();
483  $numBatches = ceil( $numRows / $batchSize );
484 
485  for ( $i = 0; $i < $numBatches; $i++ ) {
486  if ( $i == 0 && $isComplete ) {
487  $start = false;
488  } else {
489  $rowNum = $i * $batchSize;
490  $res->seek( $rowNum );
491  $row = $res->fetchObject();
492  $start = (int)$row->page_id;
493  }
494 
495  if ( $i == ( $numBatches - 1 ) && $isComplete ) {
496  $end = false;
497  } else {
498  $rowNum = min( $numRows - 1, ( $i + 1 ) * $batchSize - 1 );
499  $res->seek( $rowNum );
500  $row = $res->fetchObject();
501  $end = (int)$row->page_id;
502  }
503 
504  # Sanity check order
505  if ( $start && $end && $start > $end ) {
506  throw new MWException( __METHOD__ . ': Internal error: query result out of order' );
507  }
508 
509  $batches[] = [ $start, $end ];
510  }
511 
512  return [ 'numRows' => $numRows, 'batches' => $batches ];
513  }
514 
521  public function getCascadeProtectedLinks() {
522  $dbr = $this->getDB();
523 
524  // @todo: use UNION without breaking tests that use temp tables
525  $resSets = [];
526  $resSets[] = $dbr->select(
527  [ 'templatelinks', 'page_restrictions', 'page' ],
528  [ 'page_namespace', 'page_title', 'page_id' ],
529  [
530  'tl_namespace' => $this->title->getNamespace(),
531  'tl_title' => $this->title->getDBkey(),
532  'tl_from = pr_page',
533  'pr_cascade' => 1,
534  'page_id = tl_from'
535  ],
536  __METHOD__,
537  [ 'DISTINCT' ]
538  );
539  if ( $this->title->getNamespace() == NS_FILE ) {
540  $resSets[] = $dbr->select(
541  [ 'imagelinks', 'page_restrictions', 'page' ],
542  [ 'page_namespace', 'page_title', 'page_id' ],
543  [
544  'il_to' => $this->title->getDBkey(),
545  'il_from = pr_page',
546  'pr_cascade' => 1,
547  'page_id = il_from'
548  ],
549  __METHOD__,
550  [ 'DISTINCT' ]
551  );
552  }
553 
554  // Combine and de-duplicate the results
555  $mergedRes = [];
556  foreach ( $resSets as $res ) {
557  foreach ( $res as $row ) {
558  $mergedRes[$row->page_id] = $row;
559  }
560  }
561 
563  new FakeResultWrapper( array_values( $mergedRes ) ) );
564  }
565 
571  private function makeCheckKey() {
572  return $this->wanCache->makeKey(
573  'backlinks',
574  md5( $this->title->getPrefixedDBkey() )
575  );
576  }
577 }
BacklinkCache\getPrefix
getPrefix( $table)
Get the field name prefix for a given table.
Definition: BacklinkCache.php:246
BacklinkCache\$wanCache
WANObjectCache $wanCache
Definition: BacklinkCache.php:77
TitleArray\newFromResult
static newFromResult( $res)
Definition: TitleArray.php:42
MediaWiki\MediaWikiServices
MediaWikiServices is the service locator for the application scope of MediaWiki.
Definition: MediaWikiServices.php:144
NS_FILE
const NS_FILE
Definition: Defines.php:75
BacklinkCache\getDB
getDB()
Get the replica DB connection to the database When non existing, will initialize the connection.
Definition: BacklinkCache.php:155
BacklinkCache
Class for fetching backlink lists, approximate backlink counts and partitions.
Definition: BacklinkCache.php:46
$res
$res
Definition: testCompression.php:57
Wikimedia\Rdbms\FakeResultWrapper
Overloads the relevant methods of the real ResultsWrapper so it doesn't go anywhere near an actual da...
Definition: FakeResultWrapper.php:11
BacklinkCache\$title
$title
Local copy of a Title object.
Definition: BacklinkCache.php:91
Wikimedia\Rdbms\IDatabase
Basic database interface for live and lazy-loaded relation database handles.
Definition: IDatabase.php:38
$dbr
$dbr
Definition: testCompression.php:54
BacklinkCache\getNumLinks
getNumLinks( $table, $max=INF)
Get the approximate number of backlinks.
Definition: BacklinkCache.php:331
LIST_OR
const LIST_OR
Definition: Defines.php:51
MWException
MediaWiki exception.
Definition: MWException.php:26
BacklinkCache\partition
partition( $table, $batchSize)
Partition the backlinks into batches.
Definition: BacklinkCache.php:391
Wikimedia\Rdbms\IResultWrapper
Result wrapper for grabbing data queried from an IDatabase object.
Definition: IResultWrapper.php:24
BacklinkCache\queryLinks
queryLinks( $table, $startId, $endId, $max, $select='all')
Get the backlinks for a given table.
Definition: BacklinkCache.php:184
BacklinkCache\$fullResultCache
IResultWrapper[] $fullResultCache
Contains the whole links from a database result.
Definition: BacklinkCache.php:72
wfGetDB
wfGetDB( $db, $groups=[], $wiki=false)
Get a Database object.
Definition: GlobalFunctions.php:2463
BacklinkCache\__sleep
__sleep()
Serialization handler, diasallows to serialize the database to prevent failures after this class is d...
Definition: BacklinkCache.php:127
BacklinkCache\partitionResult
partitionResult( $res, $batchSize, $isComplete=true)
Partition a DB result with backlinks in it into batches.
Definition: BacklinkCache.php:480
DB_REPLICA
const DB_REPLICA
Definition: defines.php:25
$wgUpdateRowsPerJob
$wgUpdateRowsPerJob
Number of rows to update per job.
Definition: DefaultSettings.php:8883
wfDebug
wfDebug( $text, $dest='all', array $context=[])
Sends a line to the debug log if enabled or, optionally, to a comment in output.
Definition: GlobalFunctions.php:913
BacklinkCache\clear
clear()
Clear locally stored data and database object.
Definition: BacklinkCache.php:134
BacklinkCache\CACHE_EXPIRY
const CACHE_EXPIRY
Definition: BacklinkCache.php:93
BacklinkCache\makeCheckKey
makeCheckKey()
Returns check key for the backlinks cache for a particular title.
Definition: BacklinkCache.php:571
BacklinkCache\$db
$db
Local copy of a database object.
Definition: BacklinkCache.php:86
WANObjectCache
Multi-datacenter aware caching interface.
Definition: WANObjectCache.php:120
BacklinkCache\__construct
__construct(Title $title)
Create a new BacklinkCache.
Definition: BacklinkCache.php:100
BacklinkCache\hasLinks
hasLinks( $table)
Check if there are any backlinks.
Definition: BacklinkCache.php:321
BacklinkCache\getCascadeProtectedLinks
getCascadeProtectedLinks()
Get a Title iterator for cascade-protected template/file use backlinks.
Definition: BacklinkCache.php:521
Title
Represents a title within MediaWiki.
Definition: Title.php:42
BacklinkCache\$partitionCache
array[] $partitionCache
Multi dimensions array representing batches.
Definition: BacklinkCache.php:62
BacklinkCache\setDB
setDB( $db)
Set the Database object to use.
Definition: BacklinkCache.php:146
BacklinkCache\getConditions
getConditions( $table)
Get the SQL condition array for selecting backlinks, with a join on the page table.
Definition: BacklinkCache.php:275
BacklinkCache\$instance
static BacklinkCache $instance
Definition: BacklinkCache.php:48
Hooks\run
static run( $event, array $args=[], $deprecatedVersion=null)
Call hook functions defined in Hooks::register and $wgHooks.
Definition: Hooks.php:133
BacklinkCache\getLinks
getLinks( $table, $startId=false, $endId=false, $max=INF)
Get the backlinks for a given table.
Definition: BacklinkCache.php:171