MediaWiki  1.30.1
BacklinkCache.php
Go to the documentation of this file.
1 <?php
31 
47  protected static $instance;
48 
61  protected $partitionCache = [];
62 
71  protected $fullResultCache = [];
72 
80  protected $db;
81 
85  protected $title;
86 
87  const CACHE_EXPIRY = 3600;
88 
94  public function __construct( Title $title ) {
95  $this->title = $title;
96  }
97 
106  public static function get( Title $title ) {
107  if ( !self::$instance || !self::$instance->title->equals( $title ) ) {
108  self::$instance = new self( $title );
109  }
110  return self::$instance;
111  }
112 
120  function __sleep() {
121  return [ 'partitionCache', 'fullResultCache', 'title' ];
122  }
123 
127  public function clear() {
128  $this->partitionCache = [];
129  $this->fullResultCache = [];
130  unset( $this->db );
131  }
132 
138  public function setDB( $db ) {
139  $this->db = $db;
140  }
141 
147  protected function getDB() {
148  if ( !isset( $this->db ) ) {
149  $this->db = wfGetDB( DB_REPLICA );
150  }
151 
152  return $this->db;
153  }
154 
163  public function getLinks( $table, $startId = false, $endId = false, $max = INF ) {
164  return TitleArray::newFromResult( $this->queryLinks( $table, $startId, $endId, $max ) );
165  }
166 
176  protected function queryLinks( $table, $startId, $endId, $max, $select = 'all' ) {
177  $fromField = $this->getPrefix( $table ) . '_from';
178 
179  if ( !$startId && !$endId && is_infinite( $max )
180  && isset( $this->fullResultCache[$table] )
181  ) {
182  wfDebug( __METHOD__ . ": got results from cache\n" );
183  $res = $this->fullResultCache[$table];
184  } else {
185  wfDebug( __METHOD__ . ": got results from DB\n" );
186  $conds = $this->getConditions( $table );
187  // Use the from field in the condition rather than the joined page_id,
188  // because databases are stupid and don't necessarily propagate indexes.
189  if ( $startId ) {
190  $conds[] = "$fromField >= " . intval( $startId );
191  }
192  if ( $endId ) {
193  $conds[] = "$fromField <= " . intval( $endId );
194  }
195  $options = [ 'ORDER BY' => $fromField ];
196  if ( is_finite( $max ) && $max > 0 ) {
197  $options['LIMIT'] = $max;
198  }
199 
200  if ( $select === 'ids' ) {
201  // Just select from the backlink table and ignore the page JOIN
202  $res = $this->getDB()->select(
203  $table,
204  [ $this->getPrefix( $table ) . '_from AS page_id' ],
205  array_filter( $conds, function ( $clause ) { // kind of janky
206  return !preg_match( '/(\b|=)page_id(\b|=)/', $clause );
207  } ),
208  __METHOD__,
209  $options
210  );
211  } else {
212  // Select from the backlink table and JOIN with page title information
213  $res = $this->getDB()->select(
214  [ $table, 'page' ],
215  [ 'page_namespace', 'page_title', 'page_id' ],
216  $conds,
217  __METHOD__,
218  array_merge( [ 'STRAIGHT_JOIN' ], $options )
219  );
220  }
221 
222  if ( $select === 'all' && !$startId && !$endId && $res->numRows() < $max ) {
223  // The full results fit within the limit, so cache them
224  $this->fullResultCache[$table] = $res;
225  } else {
226  wfDebug( __METHOD__ . ": results from DB were uncacheable\n" );
227  }
228  }
229 
230  return $res;
231  }
232 
239  protected function getPrefix( $table ) {
240  static $prefixes = [
241  'pagelinks' => 'pl',
242  'imagelinks' => 'il',
243  'categorylinks' => 'cl',
244  'templatelinks' => 'tl',
245  'redirect' => 'rd',
246  ];
247 
248  if ( isset( $prefixes[$table] ) ) {
249  return $prefixes[$table];
250  } else {
251  $prefix = null;
252  Hooks::run( 'BacklinkCacheGetPrefix', [ $table, &$prefix ] );
253  if ( $prefix ) {
254  return $prefix;
255  } else {
256  throw new MWException( "Invalid table \"$table\" in " . __CLASS__ );
257  }
258  }
259  }
260 
268  protected function getConditions( $table ) {
269  $prefix = $this->getPrefix( $table );
270 
271  switch ( $table ) {
272  case 'pagelinks':
273  case 'templatelinks':
274  $conds = [
275  "{$prefix}_namespace" => $this->title->getNamespace(),
276  "{$prefix}_title" => $this->title->getDBkey(),
277  "page_id={$prefix}_from"
278  ];
279  break;
280  case 'redirect':
281  $conds = [
282  "{$prefix}_namespace" => $this->title->getNamespace(),
283  "{$prefix}_title" => $this->title->getDBkey(),
284  $this->getDB()->makeList( [
285  "{$prefix}_interwiki" => '',
286  "{$prefix}_interwiki IS NULL",
287  ], LIST_OR ),
288  "page_id={$prefix}_from"
289  ];
290  break;
291  case 'imagelinks':
292  case 'categorylinks':
293  $conds = [
294  "{$prefix}_to" => $this->title->getDBkey(),
295  "page_id={$prefix}_from"
296  ];
297  break;
298  default:
299  $conds = null;
300  Hooks::run( 'BacklinkCacheGetConditions', [ $table, $this->title, &$conds ] );
301  if ( !$conds ) {
302  throw new MWException( "Invalid table \"$table\" in " . __CLASS__ );
303  }
304  }
305 
306  return $conds;
307  }
308 
314  public function hasLinks( $table ) {
315  return ( $this->getNumLinks( $table, 1 ) > 0 );
316  }
317 
324  public function getNumLinks( $table, $max = INF ) {
326 
328  // 1) try partition cache ...
329  if ( isset( $this->partitionCache[$table] ) ) {
330  $entry = reset( $this->partitionCache[$table] );
331 
332  return min( $max, $entry['numRows'] );
333  }
334 
335  // 2) ... then try full result cache ...
336  if ( isset( $this->fullResultCache[$table] ) ) {
337  return min( $max, $this->fullResultCache[$table]->numRows() );
338  }
339 
340  $memcKey = $cache->makeKey(
341  'numbacklinks',
342  md5( $this->title->getPrefixedDBkey() ),
343  $table
344  );
345 
346  // 3) ... fallback to memcached ...
347  $count = $cache->get( $memcKey );
348  if ( $count ) {
349  return min( $max, $count );
350  }
351 
352  // 4) fetch from the database ...
353  if ( is_infinite( $max ) ) { // no limit at all
354  // Use partition() since it will batch the query and skip the JOIN.
355  // Use $wgUpdateRowsPerJob just to encourage cache reuse for jobs.
356  $this->partition( $table, $wgUpdateRowsPerJob ); // updates $this->partitionCache
357  return $this->partitionCache[$table][$wgUpdateRowsPerJob]['numRows'];
358  } else { // probably some sane limit
359  // Fetch the full title info, since the caller will likely need it next
360  $count = $this->getLinks( $table, false, false, $max )->count();
361  if ( $count < $max ) { // full count
362  $cache->set( $memcKey, $count, self::CACHE_EXPIRY );
363  }
364  }
365 
366  return min( $max, $count );
367  }
368 
378  public function partition( $table, $batchSize ) {
379  // 1) try partition cache ...
380  if ( isset( $this->partitionCache[$table][$batchSize] ) ) {
381  wfDebug( __METHOD__ . ": got from partition cache\n" );
382 
383  return $this->partitionCache[$table][$batchSize]['batches'];
384  }
385 
387  $this->partitionCache[$table][$batchSize] = false;
388  $cacheEntry =& $this->partitionCache[$table][$batchSize];
389 
390  // 2) ... then try full result cache ...
391  if ( isset( $this->fullResultCache[$table] ) ) {
392  $cacheEntry = $this->partitionResult( $this->fullResultCache[$table], $batchSize );
393  wfDebug( __METHOD__ . ": got from full result cache\n" );
394 
395  return $cacheEntry['batches'];
396  }
397 
398  $memcKey = $cache->makeKey(
399  'backlinks',
400  md5( $this->title->getPrefixedDBkey() ),
401  $table,
402  $batchSize
403  );
404 
405  // 3) ... fallback to memcached ...
406  $memcValue = $cache->get( $memcKey );
407  if ( is_array( $memcValue ) ) {
408  $cacheEntry = $memcValue;
409  wfDebug( __METHOD__ . ": got from memcached $memcKey\n" );
410 
411  return $cacheEntry['batches'];
412  }
413 
414  // 4) ... finally fetch from the slow database :(
415  $cacheEntry = [ 'numRows' => 0, 'batches' => [] ]; // final result
416  // Do the selects in batches to avoid client-side OOMs (T45452).
417  // Use a LIMIT that plays well with $batchSize to keep equal sized partitions.
418  $selectSize = max( $batchSize, 200000 - ( 200000 % $batchSize ) );
419  $start = false;
420  do {
421  $res = $this->queryLinks( $table, $start, false, $selectSize, 'ids' );
422  $partitions = $this->partitionResult( $res, $batchSize, false );
423  // Merge the link count and range partitions for this chunk
424  $cacheEntry['numRows'] += $partitions['numRows'];
425  $cacheEntry['batches'] = array_merge( $cacheEntry['batches'], $partitions['batches'] );
426  if ( count( $partitions['batches'] ) ) {
427  list( , $lEnd ) = end( $partitions['batches'] );
428  $start = $lEnd + 1; // pick up after this inclusive range
429  }
430  } while ( $partitions['numRows'] >= $selectSize );
431  // Make sure the first range has start=false and the last one has end=false
432  if ( count( $cacheEntry['batches'] ) ) {
433  $cacheEntry['batches'][0][0] = false;
434  $cacheEntry['batches'][count( $cacheEntry['batches'] ) - 1][1] = false;
435  }
436 
437  // Save partitions to memcached
438  $cache->set( $memcKey, $cacheEntry, self::CACHE_EXPIRY );
439 
440  // Save backlink count to memcached
441  $memcKey = $cache->makeKey(
442  'numbacklinks',
443  md5( $this->title->getPrefixedDBkey() ),
444  $table
445  );
446  $cache->set( $memcKey, $cacheEntry['numRows'], self::CACHE_EXPIRY );
447 
448  wfDebug( __METHOD__ . ": got from database\n" );
449 
450  return $cacheEntry['batches'];
451  }
452 
461  protected function partitionResult( $res, $batchSize, $isComplete = true ) {
462  $batches = [];
463  $numRows = $res->numRows();
464  $numBatches = ceil( $numRows / $batchSize );
465 
466  for ( $i = 0; $i < $numBatches; $i++ ) {
467  if ( $i == 0 && $isComplete ) {
468  $start = false;
469  } else {
470  $rowNum = $i * $batchSize;
471  $res->seek( $rowNum );
472  $row = $res->fetchObject();
473  $start = (int)$row->page_id;
474  }
475 
476  if ( $i == ( $numBatches - 1 ) && $isComplete ) {
477  $end = false;
478  } else {
479  $rowNum = min( $numRows - 1, ( $i + 1 ) * $batchSize - 1 );
480  $res->seek( $rowNum );
481  $row = $res->fetchObject();
482  $end = (int)$row->page_id;
483  }
484 
485  # Sanity check order
486  if ( $start && $end && $start > $end ) {
487  throw new MWException( __METHOD__ . ': Internal error: query result out of order' );
488  }
489 
490  $batches[] = [ $start, $end ];
491  }
492 
493  return [ 'numRows' => $numRows, 'batches' => $batches ];
494  }
495 
502  public function getCascadeProtectedLinks() {
503  $dbr = $this->getDB();
504 
505  // @todo: use UNION without breaking tests that use temp tables
506  $resSets = [];
507  $resSets[] = $dbr->select(
508  [ 'templatelinks', 'page_restrictions', 'page' ],
509  [ 'page_namespace', 'page_title', 'page_id' ],
510  [
511  'tl_namespace' => $this->title->getNamespace(),
512  'tl_title' => $this->title->getDBkey(),
513  'tl_from = pr_page',
514  'pr_cascade' => 1,
515  'page_id = tl_from'
516  ],
517  __METHOD__,
518  [ 'DISTINCT' ]
519  );
520  if ( $this->title->getNamespace() == NS_FILE ) {
521  $resSets[] = $dbr->select(
522  [ 'imagelinks', 'page_restrictions', 'page' ],
523  [ 'page_namespace', 'page_title', 'page_id' ],
524  [
525  'il_to' => $this->title->getDBkey(),
526  'il_from = pr_page',
527  'pr_cascade' => 1,
528  'page_id = il_from'
529  ],
530  __METHOD__,
531  [ 'DISTINCT' ]
532  );
533  }
534 
535  // Combine and de-duplicate the results
536  $mergedRes = [];
537  foreach ( $resSets as $res ) {
538  foreach ( $res as $row ) {
539  $mergedRes[$row->page_id] = $row;
540  }
541  }
542 
544  new FakeResultWrapper( array_values( $mergedRes ) ) );
545  }
546 }
BacklinkCache\getPrefix
getPrefix( $table)
Get the field name prefix for a given table.
Definition: BacklinkCache.php:239
TitleArray\newFromResult
static newFromResult( $res)
Definition: TitleArray.php:40
BacklinkCache\$fullResultCache
ResultWrapper[] $fullResultCache
Contains the whole links from a database result.
Definition: BacklinkCache.php:71
captcha-old.count
count
Definition: captcha-old.py:249
use
as see the revision history and available at free of to any person obtaining a copy of this software and associated documentation to deal in the Software without including without limitation the rights to use
Definition: MIT-LICENSE.txt:10
NS_FILE
const NS_FILE
Definition: Defines.php:71
BacklinkCache\getDB
getDB()
Get the replica DB connection to the database When non existing, will initialize the connection.
Definition: BacklinkCache.php:147
$res
$res
Definition: database.txt:21
Wikimedia\Rdbms\ResultWrapper
Result wrapper for grabbing data queried from an IDatabase object.
Definition: ResultWrapper.php:24
BacklinkCache
Class for fetching backlink lists, approximate backlink counts and partitions.
Definition: BacklinkCache.php:45
Wikimedia\Rdbms\FakeResultWrapper
Overloads the relevant methods of the real ResultsWrapper so it doesn't go anywhere near an actual da...
Definition: FakeResultWrapper.php:11
BacklinkCache\$title
$title
Local copy of a Title object.
Definition: BacklinkCache.php:85
php
injection txt This is an overview of how MediaWiki makes use of dependency injection The design described here grew from the discussion of RFC T384 The term dependency this means that anything an object needs to operate should be injected from the the object itself should only know narrow no concrete implementation of the logic it relies on The requirement to inject everything typically results in an architecture that based on two main types of and essentially stateless service objects that use other service objects to operate on the value objects As of the beginning MediaWiki is only starting to use the DI approach Much of the code still relies on global state or direct resulting in a highly cyclical dependency which acts as the top level factory for services in MediaWiki which can be used to gain access to default instances of various services MediaWikiServices however also allows new services to be defined and default services to be redefined Services are defined or redefined by providing a callback the instantiator that will return a new instance of the service When it will create an instance of MediaWikiServices and populate it with the services defined in the files listed by thereby bootstrapping the DI framework Per $wgServiceWiringFiles lists includes ServiceWiring php
Definition: injection.txt:35
Wikimedia\Rdbms\IDatabase
Basic database interface for live and lazy-loaded relation database handles.
Definition: IDatabase.php:40
BacklinkCache\getNumLinks
getNumLinks( $table, $max=INF)
Get the approximate number of backlinks.
Definition: BacklinkCache.php:324
title
to move a page</td >< td > &*You are moving the page across *A non empty talk page already exists under the new or *You uncheck the box below In those you will have to move or merge the page manually if desired</td >< td > be sure to &You are responsible for making sure that links continue to point where they are supposed to go Note that the page will &a page at the new title
Definition: All_system_messages.txt:2696
LIST_OR
const LIST_OR
Definition: Defines.php:47
MWException
MediaWiki exception.
Definition: MWException.php:26
BacklinkCache\partition
partition( $table, $batchSize)
Partition the backlinks into batches.
Definition: BacklinkCache.php:378
BacklinkCache\queryLinks
queryLinks( $table, $startId, $endId, $max, $select='all')
Get the backlinks for a given table.
Definition: BacklinkCache.php:176
wfGetDB
wfGetDB( $db, $groups=[], $wiki=false)
Get a Database object.
Definition: GlobalFunctions.php:2856
BacklinkCache\__sleep
__sleep()
Serialization handler, diasallows to serialize the database to prevent failures after this class is d...
Definition: BacklinkCache.php:120
BacklinkCache\partitionResult
partitionResult( $res, $batchSize, $isComplete=true)
Partition a DB result with backlinks in it into batches.
Definition: BacklinkCache.php:461
global
when a variable name is used in a it is silently declared as a new masking the global
Definition: design.txt:93
DB_REPLICA
const DB_REPLICA
Definition: defines.php:25
$wgUpdateRowsPerJob
$wgUpdateRowsPerJob
Number of rows to update per job.
Definition: DefaultSettings.php:8327
wfDebug
wfDebug( $text, $dest='all', array $context=[])
Sends a line to the debug log if enabled or, optionally, to a comment in output.
Definition: GlobalFunctions.php:1047
list
deferred txt A few of the database updates required by various functions here can be deferred until after the result page is displayed to the user For updating the view updating the linked to tables after a etc PHP does not yet have any way to tell the server to actually return and disconnect while still running these but it might have such a feature in the future We handle these by creating a deferred update object and putting those objects on a global list
Definition: deferred.txt:11
BacklinkCache\clear
clear()
Clear locally stored data and database object.
Definition: BacklinkCache.php:127
BacklinkCache\CACHE_EXPIRY
const CACHE_EXPIRY
Definition: BacklinkCache.php:87
BacklinkCache\$db
$db
Local copy of a database object.
Definition: BacklinkCache.php:80
BacklinkCache\__construct
__construct(Title $title)
Create a new BacklinkCache.
Definition: BacklinkCache.php:94
BacklinkCache\hasLinks
hasLinks( $table)
Check if there are any backlinks.
Definition: BacklinkCache.php:314
BacklinkCache\getCascadeProtectedLinks
getCascadeProtectedLinks()
Get a Title iterator for cascade-protected template/file use backlinks.
Definition: BacklinkCache.php:502
Title
Represents a title within MediaWiki.
Definition: Title.php:39
BacklinkCache\$partitionCache
array[] $partitionCache
Multi dimensions array representing batches.
Definition: BacklinkCache.php:61
$dbr
if(! $regexes) $dbr
Definition: cleanup.php:94
$cache
$cache
Definition: mcc.php:33
$options
null means default in associative array with keys and values unescaped Should be merged with default with a value of false meaning to suppress the attribute in associative array with keys and values unescaped & $options
Definition: hooks.txt:1965
ObjectCache\getMainWANInstance
static getMainWANInstance()
Get the main WAN cache object.
Definition: ObjectCache.php:370
as
This document is intended to provide useful advice for parties seeking to redistribute MediaWiki to end users It s targeted particularly at maintainers for Linux since it s been observed that distribution packages of MediaWiki often break We ve consistently had to recommend that users seeking support use official tarballs instead of their distribution s and this often solves whatever problem the user is having It would be nice if this could such as
Definition: distributors.txt:9
BacklinkCache\setDB
setDB( $db)
Set the Database object to use.
Definition: BacklinkCache.php:138
BacklinkCache\getConditions
getConditions( $table)
Get the SQL condition array for selecting backlinks, with a join on the page table.
Definition: BacklinkCache.php:268
BacklinkCache\$instance
static BacklinkCache $instance
Definition: BacklinkCache.php:47
Hooks\run
static run( $event, array $args=[], $deprecatedVersion=null)
Call hook functions defined in Hooks::register and $wgHooks.
Definition: Hooks.php:203
array
the array() calling protocol came about after MediaWiki 1.4rc1.
BacklinkCache\getLinks
getLinks( $table, $startId=false, $endId=false, $max=INF)
Get the backlinks for a given table.
Definition: BacklinkCache.php:163