MediaWiki REL1_29
BacklinkCache.php
Go to the documentation of this file.
1<?php
32
48 protected static $instance;
49
62 protected $partitionCache = [];
63
72 protected $fullResultCache = [];
73
81 protected $db;
82
86 protected $title;
87
88 const CACHE_EXPIRY = 3600;
89
95 public function __construct( Title $title ) {
96 $this->title = $title;
97 }
98
107 public static function get( Title $title ) {
108 if ( !self::$instance || !self::$instance->title->equals( $title ) ) {
109 self::$instance = new self( $title );
110 }
111 return self::$instance;
112 }
113
121 function __sleep() {
122 return [ 'partitionCache', 'fullResultCache', 'title' ];
123 }
124
128 public function clear() {
129 $this->partitionCache = [];
130 $this->fullResultCache = [];
131 unset( $this->db );
132 }
133
139 public function setDB( $db ) {
140 $this->db = $db;
141 }
142
148 protected function getDB() {
149 if ( !isset( $this->db ) ) {
150 $this->db = wfGetDB( DB_REPLICA );
151 }
152
153 return $this->db;
154 }
155
164 public function getLinks( $table, $startId = false, $endId = false, $max = INF ) {
165 return TitleArray::newFromResult( $this->queryLinks( $table, $startId, $endId, $max ) );
166 }
167
177 protected function queryLinks( $table, $startId, $endId, $max, $select = 'all' ) {
178
179 $fromField = $this->getPrefix( $table ) . '_from';
180
181 if ( !$startId && !$endId && is_infinite( $max )
182 && isset( $this->fullResultCache[$table] )
183 ) {
184 wfDebug( __METHOD__ . ": got results from cache\n" );
185 $res = $this->fullResultCache[$table];
186 } else {
187 wfDebug( __METHOD__ . ": got results from DB\n" );
188 $conds = $this->getConditions( $table );
189 // Use the from field in the condition rather than the joined page_id,
190 // because databases are stupid and don't necessarily propagate indexes.
191 if ( $startId ) {
192 $conds[] = "$fromField >= " . intval( $startId );
193 }
194 if ( $endId ) {
195 $conds[] = "$fromField <= " . intval( $endId );
196 }
197 $options = [ 'ORDER BY' => $fromField ];
198 if ( is_finite( $max ) && $max > 0 ) {
199 $options['LIMIT'] = $max;
200 }
201
202 if ( $select === 'ids' ) {
203 // Just select from the backlink table and ignore the page JOIN
204 $res = $this->getDB()->select(
205 $table,
206 [ $this->getPrefix( $table ) . '_from AS page_id' ],
207 array_filter( $conds, function ( $clause ) { // kind of janky
208 return !preg_match( '/(\b|=)page_id(\b|=)/', $clause );
209 } ),
210 __METHOD__,
212 );
213 } else {
214 // Select from the backlink table and JOIN with page title information
215 $res = $this->getDB()->select(
216 [ $table, 'page' ],
217 [ 'page_namespace', 'page_title', 'page_id' ],
218 $conds,
219 __METHOD__,
220 array_merge( [ 'STRAIGHT_JOIN' ], $options )
221 );
222 }
223
224 if ( $select === 'all' && !$startId && !$endId && $res->numRows() < $max ) {
225 // The full results fit within the limit, so cache them
226 $this->fullResultCache[$table] = $res;
227 } else {
228 wfDebug( __METHOD__ . ": results from DB were uncacheable\n" );
229 }
230 }
231
232 return $res;
233 }
234
241 protected function getPrefix( $table ) {
242 static $prefixes = [
243 'pagelinks' => 'pl',
244 'imagelinks' => 'il',
245 'categorylinks' => 'cl',
246 'templatelinks' => 'tl',
247 'redirect' => 'rd',
248 ];
249
250 if ( isset( $prefixes[$table] ) ) {
251 return $prefixes[$table];
252 } else {
253 $prefix = null;
254 Hooks::run( 'BacklinkCacheGetPrefix', [ $table, &$prefix ] );
255 if ( $prefix ) {
256 return $prefix;
257 } else {
258 throw new MWException( "Invalid table \"$table\" in " . __CLASS__ );
259 }
260 }
261 }
262
270 protected function getConditions( $table ) {
271 $prefix = $this->getPrefix( $table );
272
273 switch ( $table ) {
274 case 'pagelinks':
275 case 'templatelinks':
276 $conds = [
277 "{$prefix}_namespace" => $this->title->getNamespace(),
278 "{$prefix}_title" => $this->title->getDBkey(),
279 "page_id={$prefix}_from"
280 ];
281 break;
282 case 'redirect':
283 $conds = [
284 "{$prefix}_namespace" => $this->title->getNamespace(),
285 "{$prefix}_title" => $this->title->getDBkey(),
286 $this->getDB()->makeList( [
287 "{$prefix}_interwiki" => '',
288 "{$prefix}_interwiki IS NULL",
289 ], LIST_OR ),
290 "page_id={$prefix}_from"
291 ];
292 break;
293 case 'imagelinks':
294 case 'categorylinks':
295 $conds = [
296 "{$prefix}_to" => $this->title->getDBkey(),
297 "page_id={$prefix}_from"
298 ];
299 break;
300 default:
301 $conds = null;
302 Hooks::run( 'BacklinkCacheGetConditions', [ $table, $this->title, &$conds ] );
303 if ( !$conds ) {
304 throw new MWException( "Invalid table \"$table\" in " . __CLASS__ );
305 }
306 }
307
308 return $conds;
309 }
310
316 public function hasLinks( $table ) {
317 return ( $this->getNumLinks( $table, 1 ) > 0 );
318 }
319
326 public function getNumLinks( $table, $max = INF ) {
328
329 $cache = ObjectCache::getMainWANInstance();
330 // 1) try partition cache ...
331 if ( isset( $this->partitionCache[$table] ) ) {
332 $entry = reset( $this->partitionCache[$table] );
333
334 return min( $max, $entry['numRows'] );
335 }
336
337 // 2) ... then try full result cache ...
338 if ( isset( $this->fullResultCache[$table] ) ) {
339 return min( $max, $this->fullResultCache[$table]->numRows() );
340 }
341
342 $memcKey = $cache->makeKey(
343 'numbacklinks',
344 md5( $this->title->getPrefixedDBkey() ),
345 $table
346 );
347
348 // 3) ... fallback to memcached ...
349 $count = $cache->get( $memcKey );
350 if ( $count ) {
351 return min( $max, $count );
352 }
353
354 // 4) fetch from the database ...
355 if ( is_infinite( $max ) ) { // no limit at all
356 // Use partition() since it will batch the query and skip the JOIN.
357 // Use $wgUpdateRowsPerJob just to encourage cache reuse for jobs.
358 $this->partition( $table, $wgUpdateRowsPerJob ); // updates $this->partitionCache
359 return $this->partitionCache[$table][$wgUpdateRowsPerJob]['numRows'];
360 } else { // probably some sane limit
361 // Fetch the full title info, since the caller will likely need it next
362 $count = $this->getLinks( $table, false, false, $max )->count();
363 if ( $count < $max ) { // full count
364 $cache->set( $memcKey, $count, self::CACHE_EXPIRY );
365 }
366 }
367
368 return min( $max, $count );
369 }
370
380 public function partition( $table, $batchSize ) {
381 // 1) try partition cache ...
382 if ( isset( $this->partitionCache[$table][$batchSize] ) ) {
383 wfDebug( __METHOD__ . ": got from partition cache\n" );
384
385 return $this->partitionCache[$table][$batchSize]['batches'];
386 }
387
388 $cache = ObjectCache::getMainWANInstance();
389 $this->partitionCache[$table][$batchSize] = false;
390 $cacheEntry =& $this->partitionCache[$table][$batchSize];
391
392 // 2) ... then try full result cache ...
393 if ( isset( $this->fullResultCache[$table] ) ) {
394 $cacheEntry = $this->partitionResult( $this->fullResultCache[$table], $batchSize );
395 wfDebug( __METHOD__ . ": got from full result cache\n" );
396
397 return $cacheEntry['batches'];
398 }
399
400 $memcKey = $cache->makeKey(
401 'backlinks',
402 md5( $this->title->getPrefixedDBkey() ),
403 $table,
404 $batchSize
405 );
406
407 // 3) ... fallback to memcached ...
408 $memcValue = $cache->get( $memcKey );
409 if ( is_array( $memcValue ) ) {
410 $cacheEntry = $memcValue;
411 wfDebug( __METHOD__ . ": got from memcached $memcKey\n" );
412
413 return $cacheEntry['batches'];
414 }
415
416 // 4) ... finally fetch from the slow database :(
417 $cacheEntry = [ 'numRows' => 0, 'batches' => [] ]; // final result
418 // Do the selects in batches to avoid client-side OOMs (T45452).
419 // Use a LIMIT that plays well with $batchSize to keep equal sized partitions.
420 $selectSize = max( $batchSize, 200000 - ( 200000 % $batchSize ) );
421 $start = false;
422 do {
423 $res = $this->queryLinks( $table, $start, false, $selectSize, 'ids' );
424 $partitions = $this->partitionResult( $res, $batchSize, false );
425 // Merge the link count and range partitions for this chunk
426 $cacheEntry['numRows'] += $partitions['numRows'];
427 $cacheEntry['batches'] = array_merge( $cacheEntry['batches'], $partitions['batches'] );
428 if ( count( $partitions['batches'] ) ) {
429 list( , $lEnd ) = end( $partitions['batches'] );
430 $start = $lEnd + 1; // pick up after this inclusive range
431 }
432 } while ( $partitions['numRows'] >= $selectSize );
433 // Make sure the first range has start=false and the last one has end=false
434 if ( count( $cacheEntry['batches'] ) ) {
435 $cacheEntry['batches'][0][0] = false;
436 $cacheEntry['batches'][count( $cacheEntry['batches'] ) - 1][1] = false;
437 }
438
439 // Save partitions to memcached
440 $cache->set( $memcKey, $cacheEntry, self::CACHE_EXPIRY );
441
442 // Save backlink count to memcached
443 $memcKey = $cache->makeKey(
444 'numbacklinks',
445 md5( $this->title->getPrefixedDBkey() ),
446 $table
447 );
448 $cache->set( $memcKey, $cacheEntry['numRows'], self::CACHE_EXPIRY );
449
450 wfDebug( __METHOD__ . ": got from database\n" );
451
452 return $cacheEntry['batches'];
453 }
454
463 protected function partitionResult( $res, $batchSize, $isComplete = true ) {
464 $batches = [];
465 $numRows = $res->numRows();
466 $numBatches = ceil( $numRows / $batchSize );
467
468 for ( $i = 0; $i < $numBatches; $i++ ) {
469 if ( $i == 0 && $isComplete ) {
470 $start = false;
471 } else {
472 $rowNum = $i * $batchSize;
473 $res->seek( $rowNum );
474 $row = $res->fetchObject();
475 $start = (int)$row->page_id;
476 }
477
478 if ( $i == ( $numBatches - 1 ) && $isComplete ) {
479 $end = false;
480 } else {
481 $rowNum = min( $numRows - 1, ( $i + 1 ) * $batchSize - 1 );
482 $res->seek( $rowNum );
483 $row = $res->fetchObject();
484 $end = (int)$row->page_id;
485 }
486
487 # Sanity check order
488 if ( $start && $end && $start > $end ) {
489 throw new MWException( __METHOD__ . ': Internal error: query result out of order' );
490 }
491
492 $batches[] = [ $start, $end ];
493 }
494
495 return [ 'numRows' => $numRows, 'batches' => $batches ];
496 }
497
504 public function getCascadeProtectedLinks() {
505 $dbr = $this->getDB();
506
507 // @todo: use UNION without breaking tests that use temp tables
508 $resSets = [];
509 $resSets[] = $dbr->select(
510 [ 'templatelinks', 'page_restrictions', 'page' ],
511 [ 'page_namespace', 'page_title', 'page_id' ],
512 [
513 'tl_namespace' => $this->title->getNamespace(),
514 'tl_title' => $this->title->getDBkey(),
515 'tl_from = pr_page',
516 'pr_cascade' => 1,
517 'page_id = tl_from'
518 ],
519 __METHOD__,
520 [ 'DISTINCT' ]
521 );
522 if ( $this->title->getNamespace() == NS_FILE ) {
523 $resSets[] = $dbr->select(
524 [ 'imagelinks', 'page_restrictions', 'page' ],
525 [ 'page_namespace', 'page_title', 'page_id' ],
526 [
527 'il_to' => $this->title->getDBkey(),
528 'il_from = pr_page',
529 'pr_cascade' => 1,
530 'page_id = il_from'
531 ],
532 __METHOD__,
533 [ 'DISTINCT' ]
534 );
535 }
536
537 // Combine and de-duplicate the results
538 $mergedRes = [];
539 foreach ( $resSets as $res ) {
540 foreach ( $res as $row ) {
541 $mergedRes[$row->page_id] = $row;
542 }
543 }
544
546 new FakeResultWrapper( array_values( $mergedRes ) ) );
547 }
548}
Apache License January AND DISTRIBUTION Definitions License shall mean the terms and conditions for use
$wgUpdateRowsPerJob
Number of rows to update per job.
wfDebug( $text, $dest='all', array $context=[])
Sends a line to the debug log if enabled or, optionally, to a comment in output.
wfGetDB( $db, $groups=[], $wiki=false)
Get a Database object.
Class for fetching backlink lists, approximate backlink counts and partitions.
getCascadeProtectedLinks()
Get a Title iterator for cascade-protected template/file use backlinks.
array[] $partitionCache
Multi dimensions array representing batches.
queryLinks( $table, $startId, $endId, $max, $select='all')
Get the backlinks for a given table.
getLinks( $table, $startId=false, $endId=false, $max=INF)
Get the backlinks for a given table.
partition( $table, $batchSize)
Partition the backlinks into batches.
getPrefix( $table)
Get the field name prefix for a given table.
partitionResult( $res, $batchSize, $isComplete=true)
Partition a DB result with backlinks in it into batches.
__construct(Title $title)
Create a new BacklinkCache.
clear()
Clear locally stored data and database object.
getDB()
Get the replica DB connection to the database When non existing, will initialize the connection.
__sleep()
Serialization handler, diasallows to serialize the database to prevent failures after this class is d...
getNumLinks( $table, $max=INF)
Get the approximate number of backlinks.
hasLinks( $table)
Check if there are any backlinks.
setDB( $db)
Set the Database object to use.
$db
Local copy of a database object.
ResultWrapper[] $fullResultCache
Contains the whole links from a database result.
getConditions( $table)
Get the SQL condition array for selecting backlinks, with a join on the page table.
$title
Local copy of a Title object.
static BacklinkCache $instance
MediaWiki exception.
static newFromResult( $res)
Represents a title within MediaWiki.
Definition Title.php:39
Overloads the relevant methods of the real ResultsWrapper so it doesn't go anywhere near an actual da...
Result wrapper for grabbing data queried from an IDatabase object.
$res
Definition database.txt:21
deferred txt A few of the database updates required by various functions here can be deferred until after the result page is displayed to the user For updating the view updating the linked to tables after a etc PHP does not yet have any way to tell the server to actually return and disconnect while still running these but it might have such a feature in the future We handle these by creating a deferred update object and putting those objects on a global list
Definition deferred.txt:11
when a variable name is used in a it is silently declared as a new local masking the global
Definition design.txt:95
This document is intended to provide useful advice for parties seeking to redistribute MediaWiki to end users It s targeted particularly at maintainers for Linux since it s been observed that distribution packages of MediaWiki often break We ve consistently had to recommend that users seeking support use official tarballs instead of their distribution s and this often solves whatever problem the user is having It would be nice if this could such as
const NS_FILE
Definition Defines.php:68
const LIST_OR
Definition Defines.php:44
the array() calling protocol came about after MediaWiki 1.4rc1.
this hook is for auditing only RecentChangesLinked and Watchlist RecentChangesLinked and Watchlist Do not use this to implement individual filters if they are compatible with the ChangesListFilter and ChangesListFilterGroup structure use sub classes of those in conjunction with the ChangesListSpecialPageStructuredFilters hook This hook can be used to implement filters that do not implement that or custom behavior that is not an individual filter e g Watchlist and Watchlist you will want to construct new ChangesListBooleanFilter or ChangesListStringOptionsFilter objects When constructing you specify which group they belong to You can reuse existing or create your you must register them with $special registerFilterGroup removed from all revisions and log entries to which it was applied This gives extensions a chance to take it off their books as the deletion has already been partly carried out by this point or something similar the user will be unable to create the tag set and then return false from the hook function Ensure you consume the ChangeTagAfterDelete hook to carry out custom deletion actions as context called by AbstractContent::getParserOutput May be used to override the normal model specific rendering of page content as context as context $options
Definition hooks.txt:1102
injection txt This is an overview of how MediaWiki makes use of dependency injection The design described here grew from the discussion of RFC T384 The term dependency this means that anything an object needs to operate should be injected from the the object itself should only know narrow no concrete implementation of the logic it relies on The requirement to inject everything typically results in an architecture that based on two main types of and essentially stateless service objects that use other service objects to operate on the value objects As of the beginning MediaWiki is only starting to use the DI approach Much of the code still relies on global state or direct resulting in a highly cyclical dependency which acts as the top level factory for services in MediaWiki which can be used to gain access to default instances of various services MediaWikiServices however also allows new services to be defined and default services to be redefined Services are defined or redefined by providing a callback the instantiator that will return a new instance of the service When it will create an instance of MediaWikiServices and populate it with the services defined in the files listed by thereby bootstrapping the DI framework Per $wgServiceWiringFiles lists includes ServiceWiring php
Definition injection.txt:37
Basic database interface for live and lazy-loaded relation database handles.
Definition IDatabase.php:40
$cache
Definition mcc.php:33
title
const DB_REPLICA
Definition defines.php:25