MediaWiki REL1_33
BacklinkCache.php
Go to the documentation of this file.
1<?php
32
48 protected static $instance;
49
62 protected $partitionCache = [];
63
72 protected $fullResultCache = [];
73
77 protected $wanCache;
78
86 protected $db;
87
91 protected $title;
92
93 const CACHE_EXPIRY = 3600;
94
100 public function __construct( Title $title ) {
101 $this->title = $title;
102 $this->wanCache = MediaWikiServices::getInstance()->getMainWANObjectCache();
103 }
104
113 public static function get( Title $title ) {
114 if ( !self::$instance || !self::$instance->title->equals( $title ) ) {
115 self::$instance = new self( $title );
116 }
117 return self::$instance;
118 }
119
127 function __sleep() {
128 return [ 'partitionCache', 'fullResultCache', 'title' ];
129 }
130
134 public function clear() {
135 $this->partitionCache = [];
136 $this->fullResultCache = [];
137 $this->wanCache->touchCheckKey( $this->makeCheckKey() );
138 unset( $this->db );
139 }
140
146 public function setDB( $db ) {
147 $this->db = $db;
148 }
149
155 protected function getDB() {
156 if ( !isset( $this->db ) ) {
157 $this->db = wfGetDB( DB_REPLICA );
158 }
159
160 return $this->db;
161 }
162
171 public function getLinks( $table, $startId = false, $endId = false, $max = INF ) {
172 return TitleArray::newFromResult( $this->queryLinks( $table, $startId, $endId, $max ) );
173 }
174
184 protected function queryLinks( $table, $startId, $endId, $max, $select = 'all' ) {
185 $fromField = $this->getPrefix( $table ) . '_from';
186
187 if ( !$startId && !$endId && is_infinite( $max )
188 && isset( $this->fullResultCache[$table] )
189 ) {
190 wfDebug( __METHOD__ . ": got results from cache\n" );
191 $res = $this->fullResultCache[$table];
192 } else {
193 wfDebug( __METHOD__ . ": got results from DB\n" );
194 $conds = $this->getConditions( $table );
195 // Use the from field in the condition rather than the joined page_id,
196 // because databases are stupid and don't necessarily propagate indexes.
197 if ( $startId ) {
198 $conds[] = "$fromField >= " . intval( $startId );
199 }
200 if ( $endId ) {
201 $conds[] = "$fromField <= " . intval( $endId );
202 }
203 $options = [ 'ORDER BY' => $fromField ];
204 if ( is_finite( $max ) && $max > 0 ) {
205 $options['LIMIT'] = $max;
206 }
207
208 if ( $select === 'ids' ) {
209 // Just select from the backlink table and ignore the page JOIN
210 $res = $this->getDB()->select(
211 $table,
212 [ $this->getPrefix( $table ) . '_from AS page_id' ],
213 array_filter( $conds, function ( $clause ) { // kind of janky
214 return !preg_match( '/(\b|=)page_id(\b|=)/', $clause );
215 } ),
216 __METHOD__,
218 );
219 } else {
220 // Select from the backlink table and JOIN with page title information
221 $res = $this->getDB()->select(
222 [ $table, 'page' ],
223 [ 'page_namespace', 'page_title', 'page_id' ],
224 $conds,
225 __METHOD__,
226 array_merge( [ 'STRAIGHT_JOIN' ], $options )
227 );
228 }
229
230 if ( $select === 'all' && !$startId && !$endId && $res->numRows() < $max ) {
231 // The full results fit within the limit, so cache them
232 $this->fullResultCache[$table] = $res;
233 } else {
234 wfDebug( __METHOD__ . ": results from DB were uncacheable\n" );
235 }
236 }
237
238 return $res;
239 }
240
247 protected function getPrefix( $table ) {
248 static $prefixes = [
249 'pagelinks' => 'pl',
250 'imagelinks' => 'il',
251 'categorylinks' => 'cl',
252 'templatelinks' => 'tl',
253 'redirect' => 'rd',
254 ];
255
256 if ( isset( $prefixes[$table] ) ) {
257 return $prefixes[$table];
258 } else {
259 $prefix = null;
260 Hooks::run( 'BacklinkCacheGetPrefix', [ $table, &$prefix ] );
261 if ( $prefix ) {
262 return $prefix;
263 } else {
264 throw new MWException( "Invalid table \"$table\" in " . __CLASS__ );
265 }
266 }
267 }
268
276 protected function getConditions( $table ) {
277 $prefix = $this->getPrefix( $table );
278
279 switch ( $table ) {
280 case 'pagelinks':
281 case 'templatelinks':
282 $conds = [
283 "{$prefix}_namespace" => $this->title->getNamespace(),
284 "{$prefix}_title" => $this->title->getDBkey(),
285 "page_id={$prefix}_from"
286 ];
287 break;
288 case 'redirect':
289 $conds = [
290 "{$prefix}_namespace" => $this->title->getNamespace(),
291 "{$prefix}_title" => $this->title->getDBkey(),
292 $this->getDB()->makeList( [
293 "{$prefix}_interwiki" => '',
294 "{$prefix}_interwiki IS NULL",
295 ], LIST_OR ),
296 "page_id={$prefix}_from"
297 ];
298 break;
299 case 'imagelinks':
300 case 'categorylinks':
301 $conds = [
302 "{$prefix}_to" => $this->title->getDBkey(),
303 "page_id={$prefix}_from"
304 ];
305 break;
306 default:
307 $conds = null;
308 Hooks::run( 'BacklinkCacheGetConditions', [ $table, $this->title, &$conds ] );
309 if ( !$conds ) {
310 throw new MWException( "Invalid table \"$table\" in " . __CLASS__ );
311 }
312 }
313
314 return $conds;
315 }
316
322 public function hasLinks( $table ) {
323 return ( $this->getNumLinks( $table, 1 ) > 0 );
324 }
325
332 public function getNumLinks( $table, $max = INF ) {
333 global $wgUpdateRowsPerJob;
334
335 // 1) try partition cache ...
336 if ( isset( $this->partitionCache[$table] ) ) {
337 $entry = reset( $this->partitionCache[$table] );
338
339 return min( $max, $entry['numRows'] );
340 }
341
342 // 2) ... then try full result cache ...
343 if ( isset( $this->fullResultCache[$table] ) ) {
344 return min( $max, $this->fullResultCache[$table]->numRows() );
345 }
346
347 $memcKey = $this->wanCache->makeKey(
348 'numbacklinks',
349 md5( $this->title->getPrefixedDBkey() ),
350 $table
351 );
352
353 // 3) ... fallback to memcached ...
354 $curTTL = INF;
355 $count = $this->wanCache->get(
356 $memcKey,
357 $curTTL,
358 [
359 $this->makeCheckKey()
360 ]
361 );
362 if ( $count && ( $curTTL > 0 ) ) {
363 return min( $max, $count );
364 }
365
366 // 4) fetch from the database ...
367 if ( is_infinite( $max ) ) { // no limit at all
368 // Use partition() since it will batch the query and skip the JOIN.
369 // Use $wgUpdateRowsPerJob just to encourage cache reuse for jobs.
370 $this->partition( $table, $wgUpdateRowsPerJob ); // updates $this->partitionCache
371 return $this->partitionCache[$table][$wgUpdateRowsPerJob]['numRows'];
372 } else { // probably some sane limit
373 // Fetch the full title info, since the caller will likely need it next
374 $count = $this->getLinks( $table, false, false, $max )->count();
375 if ( $count < $max ) { // full count
376 $this->wanCache->set( $memcKey, $count, self::CACHE_EXPIRY );
377 }
378 }
379
380 return min( $max, $count );
381 }
382
392 public function partition( $table, $batchSize ) {
393 // 1) try partition cache ...
394 if ( isset( $this->partitionCache[$table][$batchSize] ) ) {
395 wfDebug( __METHOD__ . ": got from partition cache\n" );
396
397 return $this->partitionCache[$table][$batchSize]['batches'];
398 }
399
400 $this->partitionCache[$table][$batchSize] = false;
401 $cacheEntry =& $this->partitionCache[$table][$batchSize];
402
403 // 2) ... then try full result cache ...
404 if ( isset( $this->fullResultCache[$table] ) ) {
405 $cacheEntry = $this->partitionResult( $this->fullResultCache[$table], $batchSize );
406 wfDebug( __METHOD__ . ": got from full result cache\n" );
407
408 return $cacheEntry['batches'];
409 }
410
411 $memcKey = $this->wanCache->makeKey(
412 'backlinks',
413 md5( $this->title->getPrefixedDBkey() ),
414 $table,
415 $batchSize
416 );
417
418 // 3) ... fallback to memcached ...
419 $curTTL = 0;
420 $memcValue = $this->wanCache->get(
421 $memcKey,
422 $curTTL,
423 [
424 $this->makeCheckKey()
425 ]
426 );
427 if ( is_array( $memcValue ) && ( $curTTL > 0 ) ) {
428 $cacheEntry = $memcValue;
429 wfDebug( __METHOD__ . ": got from memcached $memcKey\n" );
430
431 return $cacheEntry['batches'];
432 }
433
434 // 4) ... finally fetch from the slow database :(
435 $cacheEntry = [ 'numRows' => 0, 'batches' => [] ]; // final result
436 // Do the selects in batches to avoid client-side OOMs (T45452).
437 // Use a LIMIT that plays well with $batchSize to keep equal sized partitions.
438 $selectSize = max( $batchSize, 200000 - ( 200000 % $batchSize ) );
439 $start = false;
440 do {
441 $res = $this->queryLinks( $table, $start, false, $selectSize, 'ids' );
442 $partitions = $this->partitionResult( $res, $batchSize, false );
443 // Merge the link count and range partitions for this chunk
444 $cacheEntry['numRows'] += $partitions['numRows'];
445 $cacheEntry['batches'] = array_merge( $cacheEntry['batches'], $partitions['batches'] );
446 if ( count( $partitions['batches'] ) ) {
447 list( , $lEnd ) = end( $partitions['batches'] );
448 $start = $lEnd + 1; // pick up after this inclusive range
449 }
450 } while ( $partitions['numRows'] >= $selectSize );
451 // Make sure the first range has start=false and the last one has end=false
452 if ( count( $cacheEntry['batches'] ) ) {
453 $cacheEntry['batches'][0][0] = false;
454 $cacheEntry['batches'][count( $cacheEntry['batches'] ) - 1][1] = false;
455 }
456
457 // Save partitions to memcached
458 $this->wanCache->set( $memcKey, $cacheEntry, self::CACHE_EXPIRY );
459
460 // Save backlink count to memcached
461 $memcKey = $this->wanCache->makeKey(
462 'numbacklinks',
463 md5( $this->title->getPrefixedDBkey() ),
464 $table
465 );
466 $this->wanCache->set( $memcKey, $cacheEntry['numRows'], self::CACHE_EXPIRY );
467
468 wfDebug( __METHOD__ . ": got from database\n" );
469
470 return $cacheEntry['batches'];
471 }
472
481 protected function partitionResult( $res, $batchSize, $isComplete = true ) {
482 $batches = [];
483 $numRows = $res->numRows();
484 $numBatches = ceil( $numRows / $batchSize );
485
486 for ( $i = 0; $i < $numBatches; $i++ ) {
487 if ( $i == 0 && $isComplete ) {
488 $start = false;
489 } else {
490 $rowNum = $i * $batchSize;
491 $res->seek( $rowNum );
492 $row = $res->fetchObject();
493 $start = (int)$row->page_id;
494 }
495
496 if ( $i == ( $numBatches - 1 ) && $isComplete ) {
497 $end = false;
498 } else {
499 $rowNum = min( $numRows - 1, ( $i + 1 ) * $batchSize - 1 );
500 $res->seek( $rowNum );
501 $row = $res->fetchObject();
502 $end = (int)$row->page_id;
503 }
504
505 # Sanity check order
506 if ( $start && $end && $start > $end ) {
507 throw new MWException( __METHOD__ . ': Internal error: query result out of order' );
508 }
509
510 $batches[] = [ $start, $end ];
511 }
512
513 return [ 'numRows' => $numRows, 'batches' => $batches ];
514 }
515
522 public function getCascadeProtectedLinks() {
523 $dbr = $this->getDB();
524
525 // @todo: use UNION without breaking tests that use temp tables
526 $resSets = [];
527 $resSets[] = $dbr->select(
528 [ 'templatelinks', 'page_restrictions', 'page' ],
529 [ 'page_namespace', 'page_title', 'page_id' ],
530 [
531 'tl_namespace' => $this->title->getNamespace(),
532 'tl_title' => $this->title->getDBkey(),
533 'tl_from = pr_page',
534 'pr_cascade' => 1,
535 'page_id = tl_from'
536 ],
537 __METHOD__,
538 [ 'DISTINCT' ]
539 );
540 if ( $this->title->getNamespace() == NS_FILE ) {
541 $resSets[] = $dbr->select(
542 [ 'imagelinks', 'page_restrictions', 'page' ],
543 [ 'page_namespace', 'page_title', 'page_id' ],
544 [
545 'il_to' => $this->title->getDBkey(),
546 'il_from = pr_page',
547 'pr_cascade' => 1,
548 'page_id = il_from'
549 ],
550 __METHOD__,
551 [ 'DISTINCT' ]
552 );
553 }
554
555 // Combine and de-duplicate the results
556 $mergedRes = [];
557 foreach ( $resSets as $res ) {
558 foreach ( $res as $row ) {
559 $mergedRes[$row->page_id] = $row;
560 }
561 }
562
564 new FakeResultWrapper( array_values( $mergedRes ) ) );
565 }
566
572 private function makeCheckKey() {
573 return $this->wanCache->makeKey(
574 'backlinks',
575 md5( $this->title->getPrefixedDBkey() )
576 );
577 }
578}
Apache License January AND DISTRIBUTION Definitions License shall mean the terms and conditions for use
$wgUpdateRowsPerJob
Number of rows to update per job.
wfDebug( $text, $dest='all', array $context=[])
Sends a line to the debug log if enabled or, optionally, to a comment in output.
wfGetDB( $db, $groups=[], $wiki=false)
Get a Database object.
Class for fetching backlink lists, approximate backlink counts and partitions.
getCascadeProtectedLinks()
Get a Title iterator for cascade-protected template/file use backlinks.
array[] $partitionCache
Multi dimensions array representing batches.
queryLinks( $table, $startId, $endId, $max, $select='all')
Get the backlinks for a given table.
getLinks( $table, $startId=false, $endId=false, $max=INF)
Get the backlinks for a given table.
partition( $table, $batchSize)
Partition the backlinks into batches.
getPrefix( $table)
Get the field name prefix for a given table.
partitionResult( $res, $batchSize, $isComplete=true)
Partition a DB result with backlinks in it into batches.
__construct(Title $title)
Create a new BacklinkCache.
clear()
Clear locally stored data and database object.
getDB()
Get the replica DB connection to the database When non existing, will initialize the connection.
__sleep()
Serialization handler, diasallows to serialize the database to prevent failures after this class is d...
getNumLinks( $table, $max=INF)
Get the approximate number of backlinks.
hasLinks( $table)
Check if there are any backlinks.
setDB( $db)
Set the Database object to use.
WANObjectCache $wanCache
$db
Local copy of a database object.
makeCheckKey()
Returns check key for the backlinks cache for a particular title.
ResultWrapper[] $fullResultCache
Contains the whole links from a database result.
getConditions( $table)
Get the SQL condition array for selecting backlinks, with a join on the page table.
$title
Local copy of a Title object.
static BacklinkCache $instance
MediaWiki exception.
MediaWikiServices is the service locator for the application scope of MediaWiki.
static newFromResult( $res)
Represents a title within MediaWiki.
Definition Title.php:40
Multi-datacenter aware caching interface.
Overloads the relevant methods of the real ResultsWrapper so it doesn't go anywhere near an actual da...
Result wrapper for grabbing data queried from an IDatabase object.
$res
Definition database.txt:21
deferred txt A few of the database updates required by various functions here can be deferred until after the result page is displayed to the user For updating the view updating the linked to tables after a etc PHP does not yet have any way to tell the server to actually return and disconnect while still running these but it might have such a feature in the future We handle these by creating a deferred update object and putting those objects on a global list
Definition deferred.txt:11
This document is intended to provide useful advice for parties seeking to redistribute MediaWiki to end users It s targeted particularly at maintainers for Linux since it s been observed that distribution packages of MediaWiki often break We ve consistently had to recommend that users seeking support use official tarballs instead of their distribution s and this often solves whatever problem the user is having It would be nice if this could such as
const NS_FILE
Definition Defines.php:79
const LIST_OR
Definition Defines.php:55
null means default in associative array with keys and values unescaped Should be merged with default with a value of false meaning to suppress the attribute in associative array with keys and values unescaped & $options
Definition hooks.txt:1999
injection txt This is an overview of how MediaWiki makes use of dependency injection The design described here grew from the discussion of RFC T384 The term dependency this means that anything an object needs to operate should be injected from the the object itself should only know narrow no concrete implementation of the logic it relies on The requirement to inject everything typically results in an architecture that based on two main types of and essentially stateless service objects that use other service objects to operate on the value objects As of the beginning MediaWiki is only starting to use the DI approach Much of the code still relies on global state or direct resulting in a highly cyclical dependency which acts as the top level factory for services in MediaWiki which can be used to gain access to default instances of various services MediaWikiServices however also allows new services to be defined and default services to be redefined Services are defined or redefined by providing a callback the instantiator that will return a new instance of the service When it will create an instance of MediaWikiServices and populate it with the services defined in the files listed by thereby bootstrapping the DI framework Per $wgServiceWiringFiles lists includes ServiceWiring php
Definition injection.txt:37
Basic database interface for live and lazy-loaded relation database handles.
Definition IDatabase.php:38
The wiki should then use memcached to cache various data To use multiple just add more items to the array To increase the weight of a make its entry a array("192.168.0.1:11211", 2))
title
const DB_REPLICA
Definition defines.php:25