MediaWiki  1.29.2
BacklinkCache.php
Go to the documentation of this file.
1 <?php
32 
48  protected static $instance;
49 
62  protected $partitionCache = [];
63 
72  protected $fullResultCache = [];
73 
81  protected $db;
82 
86  protected $title;
87 
88  const CACHE_EXPIRY = 3600;
89 
95  public function __construct( Title $title ) {
96  $this->title = $title;
97  }
98 
107  public static function get( Title $title ) {
108  if ( !self::$instance || !self::$instance->title->equals( $title ) ) {
109  self::$instance = new self( $title );
110  }
111  return self::$instance;
112  }
113 
121  function __sleep() {
122  return [ 'partitionCache', 'fullResultCache', 'title' ];
123  }
124 
128  public function clear() {
129  $this->partitionCache = [];
130  $this->fullResultCache = [];
131  unset( $this->db );
132  }
133 
139  public function setDB( $db ) {
140  $this->db = $db;
141  }
142 
148  protected function getDB() {
149  if ( !isset( $this->db ) ) {
150  $this->db = wfGetDB( DB_REPLICA );
151  }
152 
153  return $this->db;
154  }
155 
164  public function getLinks( $table, $startId = false, $endId = false, $max = INF ) {
165  return TitleArray::newFromResult( $this->queryLinks( $table, $startId, $endId, $max ) );
166  }
167 
177  protected function queryLinks( $table, $startId, $endId, $max, $select = 'all' ) {
178 
179  $fromField = $this->getPrefix( $table ) . '_from';
180 
181  if ( !$startId && !$endId && is_infinite( $max )
182  && isset( $this->fullResultCache[$table] )
183  ) {
184  wfDebug( __METHOD__ . ": got results from cache\n" );
185  $res = $this->fullResultCache[$table];
186  } else {
187  wfDebug( __METHOD__ . ": got results from DB\n" );
188  $conds = $this->getConditions( $table );
189  // Use the from field in the condition rather than the joined page_id,
190  // because databases are stupid and don't necessarily propagate indexes.
191  if ( $startId ) {
192  $conds[] = "$fromField >= " . intval( $startId );
193  }
194  if ( $endId ) {
195  $conds[] = "$fromField <= " . intval( $endId );
196  }
197  $options = [ 'ORDER BY' => $fromField ];
198  if ( is_finite( $max ) && $max > 0 ) {
199  $options['LIMIT'] = $max;
200  }
201 
202  if ( $select === 'ids' ) {
203  // Just select from the backlink table and ignore the page JOIN
204  $res = $this->getDB()->select(
205  $table,
206  [ $this->getPrefix( $table ) . '_from AS page_id' ],
207  array_filter( $conds, function ( $clause ) { // kind of janky
208  return !preg_match( '/(\b|=)page_id(\b|=)/', $clause );
209  } ),
210  __METHOD__,
211  $options
212  );
213  } else {
214  // Select from the backlink table and JOIN with page title information
215  $res = $this->getDB()->select(
216  [ $table, 'page' ],
217  [ 'page_namespace', 'page_title', 'page_id' ],
218  $conds,
219  __METHOD__,
220  array_merge( [ 'STRAIGHT_JOIN' ], $options )
221  );
222  }
223 
224  if ( $select === 'all' && !$startId && !$endId && $res->numRows() < $max ) {
225  // The full results fit within the limit, so cache them
226  $this->fullResultCache[$table] = $res;
227  } else {
228  wfDebug( __METHOD__ . ": results from DB were uncacheable\n" );
229  }
230  }
231 
232  return $res;
233  }
234 
241  protected function getPrefix( $table ) {
242  static $prefixes = [
243  'pagelinks' => 'pl',
244  'imagelinks' => 'il',
245  'categorylinks' => 'cl',
246  'templatelinks' => 'tl',
247  'redirect' => 'rd',
248  ];
249 
250  if ( isset( $prefixes[$table] ) ) {
251  return $prefixes[$table];
252  } else {
253  $prefix = null;
254  Hooks::run( 'BacklinkCacheGetPrefix', [ $table, &$prefix ] );
255  if ( $prefix ) {
256  return $prefix;
257  } else {
258  throw new MWException( "Invalid table \"$table\" in " . __CLASS__ );
259  }
260  }
261  }
262 
270  protected function getConditions( $table ) {
271  $prefix = $this->getPrefix( $table );
272 
273  switch ( $table ) {
274  case 'pagelinks':
275  case 'templatelinks':
276  $conds = [
277  "{$prefix}_namespace" => $this->title->getNamespace(),
278  "{$prefix}_title" => $this->title->getDBkey(),
279  "page_id={$prefix}_from"
280  ];
281  break;
282  case 'redirect':
283  $conds = [
284  "{$prefix}_namespace" => $this->title->getNamespace(),
285  "{$prefix}_title" => $this->title->getDBkey(),
286  $this->getDB()->makeList( [
287  "{$prefix}_interwiki" => '',
288  "{$prefix}_interwiki IS NULL",
289  ], LIST_OR ),
290  "page_id={$prefix}_from"
291  ];
292  break;
293  case 'imagelinks':
294  case 'categorylinks':
295  $conds = [
296  "{$prefix}_to" => $this->title->getDBkey(),
297  "page_id={$prefix}_from"
298  ];
299  break;
300  default:
301  $conds = null;
302  Hooks::run( 'BacklinkCacheGetConditions', [ $table, $this->title, &$conds ] );
303  if ( !$conds ) {
304  throw new MWException( "Invalid table \"$table\" in " . __CLASS__ );
305  }
306  }
307 
308  return $conds;
309  }
310 
316  public function hasLinks( $table ) {
317  return ( $this->getNumLinks( $table, 1 ) > 0 );
318  }
319 
326  public function getNumLinks( $table, $max = INF ) {
327  global $wgUpdateRowsPerJob;
328 
330  // 1) try partition cache ...
331  if ( isset( $this->partitionCache[$table] ) ) {
332  $entry = reset( $this->partitionCache[$table] );
333 
334  return min( $max, $entry['numRows'] );
335  }
336 
337  // 2) ... then try full result cache ...
338  if ( isset( $this->fullResultCache[$table] ) ) {
339  return min( $max, $this->fullResultCache[$table]->numRows() );
340  }
341 
342  $memcKey = $cache->makeKey(
343  'numbacklinks',
344  md5( $this->title->getPrefixedDBkey() ),
345  $table
346  );
347 
348  // 3) ... fallback to memcached ...
349  $count = $cache->get( $memcKey );
350  if ( $count ) {
351  return min( $max, $count );
352  }
353 
354  // 4) fetch from the database ...
355  if ( is_infinite( $max ) ) { // no limit at all
356  // Use partition() since it will batch the query and skip the JOIN.
357  // Use $wgUpdateRowsPerJob just to encourage cache reuse for jobs.
358  $this->partition( $table, $wgUpdateRowsPerJob ); // updates $this->partitionCache
359  return $this->partitionCache[$table][$wgUpdateRowsPerJob]['numRows'];
360  } else { // probably some sane limit
361  // Fetch the full title info, since the caller will likely need it next
362  $count = $this->getLinks( $table, false, false, $max )->count();
363  if ( $count < $max ) { // full count
364  $cache->set( $memcKey, $count, self::CACHE_EXPIRY );
365  }
366  }
367 
368  return min( $max, $count );
369  }
370 
380  public function partition( $table, $batchSize ) {
381  // 1) try partition cache ...
382  if ( isset( $this->partitionCache[$table][$batchSize] ) ) {
383  wfDebug( __METHOD__ . ": got from partition cache\n" );
384 
385  return $this->partitionCache[$table][$batchSize]['batches'];
386  }
387 
389  $this->partitionCache[$table][$batchSize] = false;
390  $cacheEntry =& $this->partitionCache[$table][$batchSize];
391 
392  // 2) ... then try full result cache ...
393  if ( isset( $this->fullResultCache[$table] ) ) {
394  $cacheEntry = $this->partitionResult( $this->fullResultCache[$table], $batchSize );
395  wfDebug( __METHOD__ . ": got from full result cache\n" );
396 
397  return $cacheEntry['batches'];
398  }
399 
400  $memcKey = $cache->makeKey(
401  'backlinks',
402  md5( $this->title->getPrefixedDBkey() ),
403  $table,
404  $batchSize
405  );
406 
407  // 3) ... fallback to memcached ...
408  $memcValue = $cache->get( $memcKey );
409  if ( is_array( $memcValue ) ) {
410  $cacheEntry = $memcValue;
411  wfDebug( __METHOD__ . ": got from memcached $memcKey\n" );
412 
413  return $cacheEntry['batches'];
414  }
415 
416  // 4) ... finally fetch from the slow database :(
417  $cacheEntry = [ 'numRows' => 0, 'batches' => [] ]; // final result
418  // Do the selects in batches to avoid client-side OOMs (T45452).
419  // Use a LIMIT that plays well with $batchSize to keep equal sized partitions.
420  $selectSize = max( $batchSize, 200000 - ( 200000 % $batchSize ) );
421  $start = false;
422  do {
423  $res = $this->queryLinks( $table, $start, false, $selectSize, 'ids' );
424  $partitions = $this->partitionResult( $res, $batchSize, false );
425  // Merge the link count and range partitions for this chunk
426  $cacheEntry['numRows'] += $partitions['numRows'];
427  $cacheEntry['batches'] = array_merge( $cacheEntry['batches'], $partitions['batches'] );
428  if ( count( $partitions['batches'] ) ) {
429  list( , $lEnd ) = end( $partitions['batches'] );
430  $start = $lEnd + 1; // pick up after this inclusive range
431  }
432  } while ( $partitions['numRows'] >= $selectSize );
433  // Make sure the first range has start=false and the last one has end=false
434  if ( count( $cacheEntry['batches'] ) ) {
435  $cacheEntry['batches'][0][0] = false;
436  $cacheEntry['batches'][count( $cacheEntry['batches'] ) - 1][1] = false;
437  }
438 
439  // Save partitions to memcached
440  $cache->set( $memcKey, $cacheEntry, self::CACHE_EXPIRY );
441 
442  // Save backlink count to memcached
443  $memcKey = $cache->makeKey(
444  'numbacklinks',
445  md5( $this->title->getPrefixedDBkey() ),
446  $table
447  );
448  $cache->set( $memcKey, $cacheEntry['numRows'], self::CACHE_EXPIRY );
449 
450  wfDebug( __METHOD__ . ": got from database\n" );
451 
452  return $cacheEntry['batches'];
453  }
454 
463  protected function partitionResult( $res, $batchSize, $isComplete = true ) {
464  $batches = [];
465  $numRows = $res->numRows();
466  $numBatches = ceil( $numRows / $batchSize );
467 
468  for ( $i = 0; $i < $numBatches; $i++ ) {
469  if ( $i == 0 && $isComplete ) {
470  $start = false;
471  } else {
472  $rowNum = $i * $batchSize;
473  $res->seek( $rowNum );
474  $row = $res->fetchObject();
475  $start = (int)$row->page_id;
476  }
477 
478  if ( $i == ( $numBatches - 1 ) && $isComplete ) {
479  $end = false;
480  } else {
481  $rowNum = min( $numRows - 1, ( $i + 1 ) * $batchSize - 1 );
482  $res->seek( $rowNum );
483  $row = $res->fetchObject();
484  $end = (int)$row->page_id;
485  }
486 
487  # Sanity check order
488  if ( $start && $end && $start > $end ) {
489  throw new MWException( __METHOD__ . ': Internal error: query result out of order' );
490  }
491 
492  $batches[] = [ $start, $end ];
493  }
494 
495  return [ 'numRows' => $numRows, 'batches' => $batches ];
496  }
497 
504  public function getCascadeProtectedLinks() {
505  $dbr = $this->getDB();
506 
507  // @todo: use UNION without breaking tests that use temp tables
508  $resSets = [];
509  $resSets[] = $dbr->select(
510  [ 'templatelinks', 'page_restrictions', 'page' ],
511  [ 'page_namespace', 'page_title', 'page_id' ],
512  [
513  'tl_namespace' => $this->title->getNamespace(),
514  'tl_title' => $this->title->getDBkey(),
515  'tl_from = pr_page',
516  'pr_cascade' => 1,
517  'page_id = tl_from'
518  ],
519  __METHOD__,
520  [ 'DISTINCT' ]
521  );
522  if ( $this->title->getNamespace() == NS_FILE ) {
523  $resSets[] = $dbr->select(
524  [ 'imagelinks', 'page_restrictions', 'page' ],
525  [ 'page_namespace', 'page_title', 'page_id' ],
526  [
527  'il_to' => $this->title->getDBkey(),
528  'il_from = pr_page',
529  'pr_cascade' => 1,
530  'page_id = il_from'
531  ],
532  __METHOD__,
533  [ 'DISTINCT' ]
534  );
535  }
536 
537  // Combine and de-duplicate the results
538  $mergedRes = [];
539  foreach ( $resSets as $res ) {
540  foreach ( $res as $row ) {
541  $mergedRes[$row->page_id] = $row;
542  }
543  }
544 
546  new FakeResultWrapper( array_values( $mergedRes ) ) );
547  }
548 }
BacklinkCache\getPrefix
getPrefix( $table)
Get the field name prefix for a given table.
Definition: BacklinkCache.php:241
TitleArray\newFromResult
static newFromResult( $res)
Definition: TitleArray.php:40
BacklinkCache\$fullResultCache
ResultWrapper[] $fullResultCache
Contains the whole links from a database result.
Definition: BacklinkCache.php:72
captcha-old.count
count
Definition: captcha-old.py:225
use
as see the revision history and available at free of to any person obtaining a copy of this software and associated documentation to deal in the Software without including without limitation the rights to use
Definition: MIT-LICENSE.txt:10
NS_FILE
const NS_FILE
Definition: Defines.php:68
BacklinkCache\getDB
getDB()
Get the replica DB connection to the database When non existing, will initialize the connection.
Definition: BacklinkCache.php:148
$res
$res
Definition: database.txt:21
Wikimedia\Rdbms\ResultWrapper
Result wrapper for grabbing data queried from an IDatabase object.
Definition: ResultWrapper.php:24
BacklinkCache
Class for fetching backlink lists, approximate backlink counts and partitions.
Definition: BacklinkCache.php:46
Wikimedia\Rdbms\FakeResultWrapper
Overloads the relevant methods of the real ResultsWrapper so it doesn't go anywhere near an actual da...
Definition: FakeResultWrapper.php:11
BacklinkCache\$title
$title
Local copy of a Title object.
Definition: BacklinkCache.php:86
php
injection txt This is an overview of how MediaWiki makes use of dependency injection The design described here grew from the discussion of RFC T384 The term dependency this means that anything an object needs to operate should be injected from the the object itself should only know narrow no concrete implementation of the logic it relies on The requirement to inject everything typically results in an architecture that based on two main types of and essentially stateless service objects that use other service objects to operate on the value objects As of the beginning MediaWiki is only starting to use the DI approach Much of the code still relies on global state or direct resulting in a highly cyclical dependency which acts as the top level factory for services in MediaWiki which can be used to gain access to default instances of various services MediaWikiServices however also allows new services to be defined and default services to be redefined Services are defined or redefined by providing a callback the instantiator that will return a new instance of the service When it will create an instance of MediaWikiServices and populate it with the services defined in the files listed by thereby bootstrapping the DI framework Per $wgServiceWiringFiles lists includes ServiceWiring php
Definition: injection.txt:35
Wikimedia\Rdbms\IDatabase
Basic database interface for live and lazy-loaded relation database handles.
Definition: IDatabase.php:40
BacklinkCache\getNumLinks
getNumLinks( $table, $max=INF)
Get the approximate number of backlinks.
Definition: BacklinkCache.php:326
LIST_OR
const LIST_OR
Definition: Defines.php:44
MWException
MediaWiki exception.
Definition: MWException.php:26
BacklinkCache\partition
partition( $table, $batchSize)
Partition the backlinks into batches.
Definition: BacklinkCache.php:380
BacklinkCache\queryLinks
queryLinks( $table, $startId, $endId, $max, $select='all')
Get the backlinks for a given table.
Definition: BacklinkCache.php:177
wfGetDB
wfGetDB( $db, $groups=[], $wiki=false)
Get a Database object.
Definition: GlobalFunctions.php:3060
BacklinkCache\__sleep
__sleep()
Serialization handler, diasallows to serialize the database to prevent failures after this class is d...
Definition: BacklinkCache.php:121
BacklinkCache\partitionResult
partitionResult( $res, $batchSize, $isComplete=true)
Partition a DB result with backlinks in it into batches.
Definition: BacklinkCache.php:463
global
when a variable name is used in a it is silently declared as a new masking the global
Definition: design.txt:93
DB_REPLICA
const DB_REPLICA
Definition: defines.php:25
wfDebug
wfDebug( $text, $dest='all', array $context=[])
Sends a line to the debug log if enabled or, optionally, to a comment in output.
Definition: GlobalFunctions.php:999
list
deferred txt A few of the database updates required by various functions here can be deferred until after the result page is displayed to the user For updating the view updating the linked to tables after a etc PHP does not yet have any way to tell the server to actually return and disconnect while still running these but it might have such a feature in the future We handle these by creating a deferred update object and putting those objects on a global list
Definition: deferred.txt:11
BacklinkCache\clear
clear()
Clear locally stored data and database object.
Definition: BacklinkCache.php:128
BacklinkCache\CACHE_EXPIRY
const CACHE_EXPIRY
Definition: BacklinkCache.php:88
BacklinkCache\$db
$db
Local copy of a database object.
Definition: BacklinkCache.php:81
title
title
Definition: parserTests.txt:211
BacklinkCache\__construct
__construct(Title $title)
Create a new BacklinkCache.
Definition: BacklinkCache.php:95
BacklinkCache\hasLinks
hasLinks( $table)
Check if there are any backlinks.
Definition: BacklinkCache.php:316
BacklinkCache\getCascadeProtectedLinks
getCascadeProtectedLinks()
Get a Title iterator for cascade-protected template/file use backlinks.
Definition: BacklinkCache.php:504
Title
Represents a title within MediaWiki.
Definition: Title.php:39
BacklinkCache\$partitionCache
array[] $partitionCache
Multi dimensions array representing batches.
Definition: BacklinkCache.php:62
$dbr
if(! $regexes) $dbr
Definition: cleanup.php:94
$cache
$cache
Definition: mcc.php:33
ObjectCache\getMainWANInstance
static getMainWANInstance()
Get the main WAN cache object.
Definition: ObjectCache.php:370
as
This document is intended to provide useful advice for parties seeking to redistribute MediaWiki to end users It s targeted particularly at maintainers for Linux since it s been observed that distribution packages of MediaWiki often break We ve consistently had to recommend that users seeking support use official tarballs instead of their distribution s and this often solves whatever problem the user is having It would be nice if this could such as
Definition: distributors.txt:9
BacklinkCache\setDB
setDB( $db)
Set the Database object to use.
Definition: BacklinkCache.php:139
BacklinkCache\getConditions
getConditions( $table)
Get the SQL condition array for selecting backlinks, with a join on the page table.
Definition: BacklinkCache.php:270
BacklinkCache\$instance
static BacklinkCache $instance
Definition: BacklinkCache.php:48
Hooks\run
static run( $event, array $args=[], $deprecatedVersion=null)
Call hook functions defined in Hooks::register and $wgHooks.
Definition: Hooks.php:131
$options
this hook is for auditing only RecentChangesLinked and Watchlist RecentChangesLinked and Watchlist Do not use this to implement individual filters if they are compatible with the ChangesListFilter and ChangesListFilterGroup structure use sub classes of those in conjunction with the ChangesListSpecialPageStructuredFilters hook This hook can be used to implement filters that do not implement that or custom behavior that is not an individual filter e g Watchlist and Watchlist you will want to construct new ChangesListBooleanFilter or ChangesListStringOptionsFilter objects When constructing you specify which group they belong to You can reuse existing or create your you must register them with $special registerFilterGroup removed from all revisions and log entries to which it was applied This gives extensions a chance to take it off their books as the deletion has already been partly carried out by this point or something similar the user will be unable to create the tag set and then return false from the hook function Ensure you consume the ChangeTagAfterDelete hook to carry out custom deletion actions as context called by AbstractContent::getParserOutput May be used to override the normal model specific rendering of page content as context as context $options
Definition: hooks.txt:1049
array
the array() calling protocol came about after MediaWiki 1.4rc1.
BacklinkCache\getLinks
getLinks( $table, $startId=false, $endId=false, $max=INF)
Get the backlinks for a given table.
Definition: BacklinkCache.php:164