MediaWiki REL1_35
BacklinkCache.php
Go to the documentation of this file.
1<?php
28use MediaWiki\HookContainer\ProtectedHookAccessorTrait;
33
48 use ProtectedHookAccessorTrait;
49
51 protected static $instance;
52
65 protected $partitionCache = [];
66
75 protected $fullResultCache = [];
76
80 protected $wanCache;
81
89 protected $db;
90
94 protected $title;
95
96 private const CACHE_EXPIRY = 3600;
97
103 public function __construct( Title $title ) {
104 $this->title = $title;
105 $this->wanCache = MediaWikiServices::getInstance()->getMainWANObjectCache();
106 }
107
116 public static function get( Title $title ) {
117 if ( !self::$instance || !self::$instance->title->equals( $title ) ) {
118 self::$instance = new self( $title );
119 }
120 return self::$instance;
121 }
122
130 public function __sleep() {
131 return [ 'partitionCache', 'fullResultCache', 'title' ];
132 }
133
137 public function clear() {
138 $this->partitionCache = [];
139 $this->fullResultCache = [];
140 $this->wanCache->touchCheckKey( $this->makeCheckKey() );
141 $this->db = null;
142 }
143
149 public function setDB( $db ) {
150 $this->db = $db;
151 }
152
158 protected function getDB() {
159 if ( $this->db === null ) {
160 $this->db = wfGetDB( DB_REPLICA );
161 }
162
163 return $this->db;
164 }
165
174 public function getLinks( $table, $startId = false, $endId = false, $max = INF ) {
175 return TitleArray::newFromResult( $this->queryLinks( $table, $startId, $endId, $max ) );
176 }
177
187 protected function queryLinks( $table, $startId, $endId, $max, $select = 'all' ) {
188 if ( !$startId && !$endId && is_infinite( $max )
189 && isset( $this->fullResultCache[$table] )
190 ) {
191 wfDebug( __METHOD__ . ": got results from cache" );
192 $res = $this->fullResultCache[$table];
193 } else {
194 wfDebug( __METHOD__ . ": got results from DB" );
195 $fromField = $this->getPrefix( $table ) . '_from';
196 $conds = $this->getConditions( $table );
197 // Use the from field in the condition rather than the joined page_id,
198 // because databases are stupid and don't necessarily propagate indexes.
199 if ( $startId ) {
200 $conds[] = "$fromField >= " . intval( $startId );
201 }
202 if ( $endId ) {
203 $conds[] = "$fromField <= " . intval( $endId );
204 }
205 $options = [ 'ORDER BY' => $fromField ];
206 if ( is_finite( $max ) && $max > 0 ) {
207 $options['LIMIT'] = $max;
208 }
209
210 if ( $select === 'ids' ) {
211 // Just select from the backlink table and ignore the page JOIN
212 $res = $this->getDB()->select(
213 $table,
214 [ 'page_id' => $fromField ],
215 array_filter( $conds, function ( $clause ) { // kind of janky
216 return !preg_match( '/(\b|=)page_id(\b|=)/', $clause );
217 } ),
218 __METHOD__,
219 $options
220 );
221 } else {
222 // Select from the backlink table and JOIN with page title information
223 $res = $this->getDB()->select(
224 [ $table, 'page' ],
225 [ 'page_namespace', 'page_title', 'page_id' ],
226 $conds,
227 __METHOD__,
228 array_merge( [ 'STRAIGHT_JOIN' ], $options )
229 );
230 }
231
232 if ( $select === 'all' && !$startId && !$endId && $res->numRows() < $max ) {
233 // The full results fit within the limit, so cache them
234 $this->fullResultCache[$table] = $res;
235 } else {
236 wfDebug( __METHOD__ . ": results from DB were uncacheable" );
237 }
238 }
239
240 return $res;
241 }
242
249 protected function getPrefix( $table ) {
250 static $prefixes = [
251 'pagelinks' => 'pl',
252 'imagelinks' => 'il',
253 'categorylinks' => 'cl',
254 'templatelinks' => 'tl',
255 'redirect' => 'rd',
256 ];
257
258 if ( isset( $prefixes[$table] ) ) {
259 return $prefixes[$table];
260 } else {
261 $prefix = null;
262 $this->getHookRunner()->onBacklinkCacheGetPrefix( $table, $prefix );
263 if ( $prefix ) {
264 return $prefix;
265 } else {
266 throw new MWException( "Invalid table \"$table\" in " . __CLASS__ );
267 }
268 }
269 }
270
278 protected function getConditions( $table ) {
279 $prefix = $this->getPrefix( $table );
280
281 switch ( $table ) {
282 case 'pagelinks':
283 case 'templatelinks':
284 $conds = [
285 "{$prefix}_namespace" => $this->title->getNamespace(),
286 "{$prefix}_title" => $this->title->getDBkey(),
287 "page_id={$prefix}_from"
288 ];
289 break;
290 case 'redirect':
291 $conds = [
292 "{$prefix}_namespace" => $this->title->getNamespace(),
293 "{$prefix}_title" => $this->title->getDBkey(),
294 $this->getDB()->makeList( [
295 "{$prefix}_interwiki" => '',
296 "{$prefix}_interwiki IS NULL",
297 ], LIST_OR ),
298 "page_id={$prefix}_from"
299 ];
300 break;
301 case 'imagelinks':
302 case 'categorylinks':
303 $conds = [
304 "{$prefix}_to" => $this->title->getDBkey(),
305 "page_id={$prefix}_from"
306 ];
307 break;
308 default:
309 $conds = null;
310 $this->getHookRunner()->onBacklinkCacheGetConditions( $table, $this->title, $conds );
311 if ( !$conds ) {
312 throw new MWException( "Invalid table \"$table\" in " . __CLASS__ );
313 }
314 }
315
316 return $conds;
317 }
318
324 public function hasLinks( $table ) {
325 return ( $this->getNumLinks( $table, 1 ) > 0 );
326 }
327
334 public function getNumLinks( $table, $max = INF ) {
335 global $wgUpdateRowsPerJob;
336
337 // 1) try partition cache ...
338 if ( isset( $this->partitionCache[$table] ) ) {
339 $entry = reset( $this->partitionCache[$table] );
340
341 return min( $max, $entry['numRows'] );
342 }
343
344 // 2) ... then try full result cache ...
345 if ( isset( $this->fullResultCache[$table] ) ) {
346 return min( $max, $this->fullResultCache[$table]->numRows() );
347 }
348
349 $memcKey = $this->wanCache->makeKey(
350 'numbacklinks',
351 md5( $this->title->getPrefixedDBkey() ),
352 $table
353 );
354
355 // 3) ... fallback to memcached ...
356 $curTTL = INF;
357 $count = $this->wanCache->get(
358 $memcKey,
359 $curTTL,
360 [
361 $this->makeCheckKey()
362 ]
363 );
364 if ( $count && ( $curTTL > 0 ) ) {
365 return min( $max, $count );
366 }
367
368 // 4) fetch from the database ...
369 if ( is_infinite( $max ) ) { // no limit at all
370 // Use partition() since it will batch the query and skip the JOIN.
371 // Use $wgUpdateRowsPerJob just to encourage cache reuse for jobs.
372 $this->partition( $table, $wgUpdateRowsPerJob ); // updates $this->partitionCache
373 return $this->partitionCache[$table][$wgUpdateRowsPerJob]['numRows'];
374 } else { // probably some sane limit
375 // Fetch the full title info, since the caller will likely need it next
376 $count = $this->getLinks( $table, false, false, $max )->count();
377 if ( $count < $max ) { // full count
378 $this->wanCache->set( $memcKey, $count, self::CACHE_EXPIRY );
379 }
380 }
381
382 return min( $max, $count );
383 }
384
394 public function partition( $table, $batchSize ) {
395 // 1) try partition cache ...
396 if ( isset( $this->partitionCache[$table][$batchSize] ) ) {
397 wfDebug( __METHOD__ . ": got from partition cache" );
398
399 return $this->partitionCache[$table][$batchSize]['batches'];
400 }
401
402 $this->partitionCache[$table][$batchSize] = false;
403 $cacheEntry =& $this->partitionCache[$table][$batchSize];
404
405 // 2) ... then try full result cache ...
406 if ( isset( $this->fullResultCache[$table] ) ) {
407 $cacheEntry = $this->partitionResult( $this->fullResultCache[$table], $batchSize );
408 wfDebug( __METHOD__ . ": got from full result cache" );
409
410 return $cacheEntry['batches'];
411 }
412
413 $memcKey = $this->wanCache->makeKey(
414 'backlinks',
415 md5( $this->title->getPrefixedDBkey() ),
416 $table,
417 $batchSize
418 );
419
420 // 3) ... fallback to memcached ...
421 $curTTL = 0;
422 $memcValue = $this->wanCache->get(
423 $memcKey,
424 $curTTL,
425 [
426 $this->makeCheckKey()
427 ]
428 );
429 if ( is_array( $memcValue ) && ( $curTTL > 0 ) ) {
430 $cacheEntry = $memcValue;
431 wfDebug( __METHOD__ . ": got from memcached $memcKey" );
432
433 return $cacheEntry['batches'];
434 }
435
436 // 4) ... finally fetch from the slow database :(
437 $cacheEntry = [ 'numRows' => 0, 'batches' => [] ]; // final result
438 // Do the selects in batches to avoid client-side OOMs (T45452).
439 // Use a LIMIT that plays well with $batchSize to keep equal sized partitions.
440 $selectSize = max( $batchSize, 200000 - ( 200000 % $batchSize ) );
441 $start = false;
442 do {
443 $res = $this->queryLinks( $table, $start, false, $selectSize, 'ids' );
444 $partitions = $this->partitionResult( $res, $batchSize, false );
445 // Merge the link count and range partitions for this chunk
446 $cacheEntry['numRows'] += $partitions['numRows'];
447 $cacheEntry['batches'] = array_merge( $cacheEntry['batches'], $partitions['batches'] );
448 if ( count( $partitions['batches'] ) ) {
449 list( , $lEnd ) = end( $partitions['batches'] );
450 $start = $lEnd + 1; // pick up after this inclusive range
451 }
452 } while ( $partitions['numRows'] >= $selectSize );
453 // Make sure the first range has start=false and the last one has end=false
454 if ( count( $cacheEntry['batches'] ) ) {
455 $cacheEntry['batches'][0][0] = false;
456 $cacheEntry['batches'][count( $cacheEntry['batches'] ) - 1][1] = false;
457 }
458
459 // Save partitions to memcached
460 $this->wanCache->set( $memcKey, $cacheEntry, self::CACHE_EXPIRY );
461
462 // Save backlink count to memcached
463 $memcKey = $this->wanCache->makeKey(
464 'numbacklinks',
465 md5( $this->title->getPrefixedDBkey() ),
466 $table
467 );
468 $this->wanCache->set( $memcKey, $cacheEntry['numRows'], self::CACHE_EXPIRY );
469
470 wfDebug( __METHOD__ . ": got from database" );
471
472 return $cacheEntry['batches'];
473 }
474
483 protected function partitionResult( $res, $batchSize, $isComplete = true ) {
484 $batches = [];
485 $numRows = $res->numRows();
486 $numBatches = ceil( $numRows / $batchSize );
487
488 for ( $i = 0; $i < $numBatches; $i++ ) {
489 if ( $i == 0 && $isComplete ) {
490 $start = false;
491 } else {
492 $rowNum = $i * $batchSize;
493 $res->seek( $rowNum );
494 $row = $res->fetchObject();
495 $start = (int)$row->page_id;
496 }
497
498 if ( $i == ( $numBatches - 1 ) && $isComplete ) {
499 $end = false;
500 } else {
501 $rowNum = min( $numRows - 1, ( $i + 1 ) * $batchSize - 1 );
502 $res->seek( $rowNum );
503 $row = $res->fetchObject();
504 $end = (int)$row->page_id;
505 }
506
507 # Sanity check order
508 if ( $start && $end && $start > $end ) {
509 throw new MWException( __METHOD__ . ': Internal error: query result out of order' );
510 }
511
512 $batches[] = [ $start, $end ];
513 }
514
515 return [ 'numRows' => $numRows, 'batches' => $batches ];
516 }
517
524 public function getCascadeProtectedLinks() {
525 $dbr = $this->getDB();
526
527 // @todo: use UNION without breaking tests that use temp tables
528 $resSets = [];
529 $resSets[] = $dbr->select(
530 [ 'templatelinks', 'page_restrictions', 'page' ],
531 [ 'page_namespace', 'page_title', 'page_id' ],
532 [
533 'tl_namespace' => $this->title->getNamespace(),
534 'tl_title' => $this->title->getDBkey(),
535 'tl_from = pr_page',
536 'pr_cascade' => 1,
537 'page_id = tl_from'
538 ],
539 __METHOD__,
540 [ 'DISTINCT' ]
541 );
542 if ( $this->title->getNamespace() == NS_FILE ) {
543 $resSets[] = $dbr->select(
544 [ 'imagelinks', 'page_restrictions', 'page' ],
545 [ 'page_namespace', 'page_title', 'page_id' ],
546 [
547 'il_to' => $this->title->getDBkey(),
548 'il_from = pr_page',
549 'pr_cascade' => 1,
550 'page_id = il_from'
551 ],
552 __METHOD__,
553 [ 'DISTINCT' ]
554 );
555 }
556
557 // Combine and de-duplicate the results
558 $mergedRes = [];
559 foreach ( $resSets as $res ) {
560 foreach ( $res as $row ) {
561 $mergedRes[$row->page_id] = $row;
562 }
563 }
564
565 return TitleArray::newFromResult(
566 new FakeResultWrapper( array_values( $mergedRes ) ) );
567 }
568
574 private function makeCheckKey() {
575 return $this->wanCache->makeKey(
576 'backlinks',
577 md5( $this->title->getPrefixedDBkey() )
578 );
579 }
580}
$wgUpdateRowsPerJob
Number of rows to update per job.
wfDebug( $text, $dest='all', array $context=[])
Sends a line to the debug log if enabled or, optionally, to a comment in output.
wfGetDB( $db, $groups=[], $wiki=false)
Get a Database object.
Class for fetching backlink lists, approximate backlink counts and partitions.
getCascadeProtectedLinks()
Get a Title iterator for cascade-protected template/file use backlinks.
array[] $partitionCache
Multi dimensions array representing batches.
queryLinks( $table, $startId, $endId, $max, $select='all')
Get the backlinks for a given table.
getLinks( $table, $startId=false, $endId=false, $max=INF)
Get the backlinks for a given table.
partition( $table, $batchSize)
Partition the backlinks into batches.
IResultWrapper[] $fullResultCache
Contains the whole links from a database result.
getPrefix( $table)
Get the field name prefix for a given table.
partitionResult( $res, $batchSize, $isComplete=true)
Partition a DB result with backlinks in it into batches.
__construct(Title $title)
Create a new BacklinkCache.
clear()
Clear locally stored data and database object.
getDB()
Get the replica DB connection to the database When non existing, will initialize the connection.
__sleep()
Serialization handler, diasallows to serialize the database to prevent failures after this class is d...
getNumLinks( $table, $max=INF)
Get the approximate number of backlinks.
hasLinks( $table)
Check if there are any backlinks.
setDB( $db)
Set the Database object to use.
WANObjectCache $wanCache
$db
Local copy of a database object.
makeCheckKey()
Returns check key for the backlinks cache for a particular title.
getConditions( $table)
Get the SQL condition array for selecting backlinks, with a join on the page table.
$title
Local copy of a Title object.
static BacklinkCache $instance
MediaWiki exception.
MediaWikiServices is the service locator for the application scope of MediaWiki.
Represents a title within MediaWiki.
Definition Title.php:42
Multi-datacenter aware caching interface.
Overloads the relevant methods of the real ResultsWrapper so it doesn't go anywhere near an actual da...
const NS_FILE
Definition Defines.php:76
const LIST_OR
Definition Defines.php:52
Basic database interface for live and lazy-loaded relation database handles.
Definition IDatabase.php:38
Result wrapper for grabbing data queried from an IDatabase object.
const DB_REPLICA
Definition defines.php:25