MediaWiki REL1_28
BacklinkCache.php
Go to the documentation of this file.
1<?php
44 protected static $instance;
45
58 protected $partitionCache = [];
59
68 protected $fullResultCache = [];
69
77 protected $db;
78
82 protected $title;
83
84 const CACHE_EXPIRY = 3600;
85
91 public function __construct( Title $title ) {
92 $this->title = $title;
93 }
94
103 public static function get( Title $title ) {
104 if ( !self::$instance || !self::$instance->title->equals( $title ) ) {
105 self::$instance = new self( $title );
106 }
107 return self::$instance;
108 }
109
117 function __sleep() {
118 return [ 'partitionCache', 'fullResultCache', 'title' ];
119 }
120
124 public function clear() {
125 $this->partitionCache = [];
126 $this->fullResultCache = [];
127 unset( $this->db );
128 }
129
135 public function setDB( $db ) {
136 $this->db = $db;
137 }
138
144 protected function getDB() {
145 if ( !isset( $this->db ) ) {
146 $this->db = wfGetDB( DB_REPLICA );
147 }
148
149 return $this->db;
150 }
151
160 public function getLinks( $table, $startId = false, $endId = false, $max = INF ) {
161 return TitleArray::newFromResult( $this->queryLinks( $table, $startId, $endId, $max ) );
162 }
163
173 protected function queryLinks( $table, $startId, $endId, $max, $select = 'all' ) {
174
175 $fromField = $this->getPrefix( $table ) . '_from';
176
177 if ( !$startId && !$endId && is_infinite( $max )
178 && isset( $this->fullResultCache[$table] )
179 ) {
180 wfDebug( __METHOD__ . ": got results from cache\n" );
181 $res = $this->fullResultCache[$table];
182 } else {
183 wfDebug( __METHOD__ . ": got results from DB\n" );
184 $conds = $this->getConditions( $table );
185 // Use the from field in the condition rather than the joined page_id,
186 // because databases are stupid and don't necessarily propagate indexes.
187 if ( $startId ) {
188 $conds[] = "$fromField >= " . intval( $startId );
189 }
190 if ( $endId ) {
191 $conds[] = "$fromField <= " . intval( $endId );
192 }
193 $options = [ 'ORDER BY' => $fromField ];
194 if ( is_finite( $max ) && $max > 0 ) {
195 $options['LIMIT'] = $max;
196 }
197
198 if ( $select === 'ids' ) {
199 // Just select from the backlink table and ignore the page JOIN
200 $res = $this->getDB()->select(
201 $table,
202 [ $this->getPrefix( $table ) . '_from AS page_id' ],
203 array_filter( $conds, function ( $clause ) { // kind of janky
204 return !preg_match( '/(\b|=)page_id(\b|=)/', $clause );
205 } ),
206 __METHOD__,
208 );
209 } else {
210 // Select from the backlink table and JOIN with page title information
211 $res = $this->getDB()->select(
212 [ $table, 'page' ],
213 [ 'page_namespace', 'page_title', 'page_id' ],
214 $conds,
215 __METHOD__,
216 array_merge( [ 'STRAIGHT_JOIN' ], $options )
217 );
218 }
219
220 if ( $select === 'all' && !$startId && !$endId && $res->numRows() < $max ) {
221 // The full results fit within the limit, so cache them
222 $this->fullResultCache[$table] = $res;
223 } else {
224 wfDebug( __METHOD__ . ": results from DB were uncacheable\n" );
225 }
226 }
227
228 return $res;
229 }
230
237 protected function getPrefix( $table ) {
238 static $prefixes = [
239 'pagelinks' => 'pl',
240 'imagelinks' => 'il',
241 'categorylinks' => 'cl',
242 'templatelinks' => 'tl',
243 'redirect' => 'rd',
244 ];
245
246 if ( isset( $prefixes[$table] ) ) {
247 return $prefixes[$table];
248 } else {
249 $prefix = null;
250 Hooks::run( 'BacklinkCacheGetPrefix', [ $table, &$prefix ] );
251 if ( $prefix ) {
252 return $prefix;
253 } else {
254 throw new MWException( "Invalid table \"$table\" in " . __CLASS__ );
255 }
256 }
257 }
258
266 protected function getConditions( $table ) {
267 $prefix = $this->getPrefix( $table );
268
269 switch ( $table ) {
270 case 'pagelinks':
271 case 'templatelinks':
272 $conds = [
273 "{$prefix}_namespace" => $this->title->getNamespace(),
274 "{$prefix}_title" => $this->title->getDBkey(),
275 "page_id={$prefix}_from"
276 ];
277 break;
278 case 'redirect':
279 $conds = [
280 "{$prefix}_namespace" => $this->title->getNamespace(),
281 "{$prefix}_title" => $this->title->getDBkey(),
282 $this->getDB()->makeList( [
283 "{$prefix}_interwiki" => '',
284 "{$prefix}_interwiki IS NULL",
285 ], LIST_OR ),
286 "page_id={$prefix}_from"
287 ];
288 break;
289 case 'imagelinks':
290 case 'categorylinks':
291 $conds = [
292 "{$prefix}_to" => $this->title->getDBkey(),
293 "page_id={$prefix}_from"
294 ];
295 break;
296 default:
297 $conds = null;
298 Hooks::run( 'BacklinkCacheGetConditions', [ $table, $this->title, &$conds ] );
299 if ( !$conds ) {
300 throw new MWException( "Invalid table \"$table\" in " . __CLASS__ );
301 }
302 }
303
304 return $conds;
305 }
306
312 public function hasLinks( $table ) {
313 return ( $this->getNumLinks( $table, 1 ) > 0 );
314 }
315
322 public function getNumLinks( $table, $max = INF ) {
324
325 $cache = ObjectCache::getMainWANInstance();
326 // 1) try partition cache ...
327 if ( isset( $this->partitionCache[$table] ) ) {
328 $entry = reset( $this->partitionCache[$table] );
329
330 return min( $max, $entry['numRows'] );
331 }
332
333 // 2) ... then try full result cache ...
334 if ( isset( $this->fullResultCache[$table] ) ) {
335 return min( $max, $this->fullResultCache[$table]->numRows() );
336 }
337
338 $memcKey = wfMemcKey( 'numbacklinks', md5( $this->title->getPrefixedDBkey() ), $table );
339
340 // 3) ... fallback to memcached ...
341 $count = $cache->get( $memcKey );
342 if ( $count ) {
343 return min( $max, $count );
344 }
345
346 // 4) fetch from the database ...
347 if ( is_infinite( $max ) ) { // no limit at all
348 // Use partition() since it will batch the query and skip the JOIN.
349 // Use $wgUpdateRowsPerJob just to encourage cache reuse for jobs.
350 $this->partition( $table, $wgUpdateRowsPerJob ); // updates $this->partitionCache
351 return $this->partitionCache[$table][$wgUpdateRowsPerJob]['numRows'];
352 } else { // probably some sane limit
353 // Fetch the full title info, since the caller will likely need it next
354 $count = $this->getLinks( $table, false, false, $max )->count();
355 if ( $count < $max ) { // full count
356 $cache->set( $memcKey, $count, self::CACHE_EXPIRY );
357 }
358 }
359
360 return min( $max, $count );
361 }
362
372 public function partition( $table, $batchSize ) {
373 // 1) try partition cache ...
374 if ( isset( $this->partitionCache[$table][$batchSize] ) ) {
375 wfDebug( __METHOD__ . ": got from partition cache\n" );
376
377 return $this->partitionCache[$table][$batchSize]['batches'];
378 }
379
380 $cache = ObjectCache::getMainWANInstance();
381 $this->partitionCache[$table][$batchSize] = false;
382 $cacheEntry =& $this->partitionCache[$table][$batchSize];
383
384 // 2) ... then try full result cache ...
385 if ( isset( $this->fullResultCache[$table] ) ) {
386 $cacheEntry = $this->partitionResult( $this->fullResultCache[$table], $batchSize );
387 wfDebug( __METHOD__ . ": got from full result cache\n" );
388
389 return $cacheEntry['batches'];
390 }
391
392 $memcKey = wfMemcKey(
393 'backlinks',
394 md5( $this->title->getPrefixedDBkey() ),
395 $table,
396 $batchSize
397 );
398
399 // 3) ... fallback to memcached ...
400 $memcValue = $cache->get( $memcKey );
401 if ( is_array( $memcValue ) ) {
402 $cacheEntry = $memcValue;
403 wfDebug( __METHOD__ . ": got from memcached $memcKey\n" );
404
405 return $cacheEntry['batches'];
406 }
407
408 // 4) ... finally fetch from the slow database :(
409 $cacheEntry = [ 'numRows' => 0, 'batches' => [] ]; // final result
410 // Do the selects in batches to avoid client-side OOMs (bug 43452).
411 // Use a LIMIT that plays well with $batchSize to keep equal sized partitions.
412 $selectSize = max( $batchSize, 200000 - ( 200000 % $batchSize ) );
413 $start = false;
414 do {
415 $res = $this->queryLinks( $table, $start, false, $selectSize, 'ids' );
416 $partitions = $this->partitionResult( $res, $batchSize, false );
417 // Merge the link count and range partitions for this chunk
418 $cacheEntry['numRows'] += $partitions['numRows'];
419 $cacheEntry['batches'] = array_merge( $cacheEntry['batches'], $partitions['batches'] );
420 if ( count( $partitions['batches'] ) ) {
421 list( , $lEnd ) = end( $partitions['batches'] );
422 $start = $lEnd + 1; // pick up after this inclusive range
423 }
424 } while ( $partitions['numRows'] >= $selectSize );
425 // Make sure the first range has start=false and the last one has end=false
426 if ( count( $cacheEntry['batches'] ) ) {
427 $cacheEntry['batches'][0][0] = false;
428 $cacheEntry['batches'][count( $cacheEntry['batches'] ) - 1][1] = false;
429 }
430
431 // Save partitions to memcached
432 $cache->set( $memcKey, $cacheEntry, self::CACHE_EXPIRY );
433
434 // Save backlink count to memcached
435 $memcKey = wfMemcKey( 'numbacklinks', md5( $this->title->getPrefixedDBkey() ), $table );
436 $cache->set( $memcKey, $cacheEntry['numRows'], self::CACHE_EXPIRY );
437
438 wfDebug( __METHOD__ . ": got from database\n" );
439
440 return $cacheEntry['batches'];
441 }
442
451 protected function partitionResult( $res, $batchSize, $isComplete = true ) {
452 $batches = [];
453 $numRows = $res->numRows();
454 $numBatches = ceil( $numRows / $batchSize );
455
456 for ( $i = 0; $i < $numBatches; $i++ ) {
457 if ( $i == 0 && $isComplete ) {
458 $start = false;
459 } else {
460 $rowNum = $i * $batchSize;
461 $res->seek( $rowNum );
462 $row = $res->fetchObject();
463 $start = (int)$row->page_id;
464 }
465
466 if ( $i == ( $numBatches - 1 ) && $isComplete ) {
467 $end = false;
468 } else {
469 $rowNum = min( $numRows - 1, ( $i + 1 ) * $batchSize - 1 );
470 $res->seek( $rowNum );
471 $row = $res->fetchObject();
472 $end = (int)$row->page_id;
473 }
474
475 # Sanity check order
476 if ( $start && $end && $start > $end ) {
477 throw new MWException( __METHOD__ . ': Internal error: query result out of order' );
478 }
479
480 $batches[] = [ $start, $end ];
481 }
482
483 return [ 'numRows' => $numRows, 'batches' => $batches ];
484 }
485
492 public function getCascadeProtectedLinks() {
493 $dbr = $this->getDB();
494
495 // @todo: use UNION without breaking tests that use temp tables
496 $resSets = [];
497 $resSets[] = $dbr->select(
498 [ 'templatelinks', 'page_restrictions', 'page' ],
499 [ 'page_namespace', 'page_title', 'page_id' ],
500 [
501 'tl_namespace' => $this->title->getNamespace(),
502 'tl_title' => $this->title->getDBkey(),
503 'tl_from = pr_page',
504 'pr_cascade' => 1,
505 'page_id = tl_from'
506 ],
507 __METHOD__,
508 [ 'DISTINCT' ]
509 );
510 if ( $this->title->getNamespace() == NS_FILE ) {
511 $resSets[] = $dbr->select(
512 [ 'imagelinks', 'page_restrictions', 'page' ],
513 [ 'page_namespace', 'page_title', 'page_id' ],
514 [
515 'il_to' => $this->title->getDBkey(),
516 'il_from = pr_page',
517 'pr_cascade' => 1,
518 'page_id = il_from'
519 ],
520 __METHOD__,
521 [ 'DISTINCT' ]
522 );
523 }
524
525 // Combine and de-duplicate the results
526 $mergedRes = [];
527 foreach ( $resSets as $res ) {
528 foreach ( $res as $row ) {
529 $mergedRes[$row->page_id] = $row;
530 }
531 }
532
534 new FakeResultWrapper( array_values( $mergedRes ) ) );
535 }
536}
$wgUpdateRowsPerJob
Number of rows to update per job.
wfDebug( $text, $dest='all', array $context=[])
Sends a line to the debug log if enabled or, optionally, to a comment in output.
wfGetDB( $db, $groups=[], $wiki=false)
Get a Database object.
wfMemcKey()
Make a cache key for the local wiki.
Class for fetching backlink lists, approximate backlink counts and partitions.
getCascadeProtectedLinks()
Get a Title iterator for cascade-protected template/file use backlinks.
array[] $partitionCache
Multi dimensions array representing batches.
queryLinks( $table, $startId, $endId, $max, $select='all')
Get the backlinks for a given table.
getLinks( $table, $startId=false, $endId=false, $max=INF)
Get the backlinks for a given table.
partition( $table, $batchSize)
Partition the backlinks into batches.
getPrefix( $table)
Get the field name prefix for a given table.
partitionResult( $res, $batchSize, $isComplete=true)
Partition a DB result with backlinks in it into batches.
__construct(Title $title)
Create a new BacklinkCache.
clear()
Clear locally stored data and database object.
getDB()
Get the replica DB connection to the database When non existing, will initialize the connection.
__sleep()
Serialization handler, diasallows to serialize the database to prevent failures after this class is d...
getNumLinks( $table, $max=INF)
Get the approximate number of backlinks.
hasLinks( $table)
Check if there are any backlinks.
setDB( $db)
Set the Database object to use.
$db
Local copy of a database object.
ResultWrapper[] $fullResultCache
Contains the whole links from a database result.
getConditions( $table)
Get the SQL condition array for selecting backlinks, with a join on the page table.
$title
Local copy of a Title object.
static BacklinkCache $instance
Overloads the relevant methods of the real ResultsWrapper so it doesn't go anywhere near an actual da...
MediaWiki exception.
Result wrapper for grabbing data queried from an IDatabase object.
static newFromResult( $res)
Represents a title within MediaWiki.
Definition Title.php:36
$res
Definition database.txt:21
deferred txt A few of the database updates required by various functions here can be deferred until after the result page is displayed to the user For updating the view updating the linked to tables after a etc PHP does not yet have any way to tell the server to actually return and disconnect while still running these but it might have such a feature in the future We handle these by creating a deferred update object and putting those objects on a global list
Definition deferred.txt:11
when a variable name is used in a it is silently declared as a new local masking the global
Definition design.txt:95
This document is intended to provide useful advice for parties seeking to redistribute MediaWiki to end users It s targeted particularly at maintainers for Linux since it s been observed that distribution packages of MediaWiki often break We ve consistently had to recommend that users seeking support use official tarballs instead of their distribution s and this often solves whatever problem the user is having It would be nice if this could such as
const NS_FILE
Definition Defines.php:62
const LIST_OR
Definition Defines.php:38
the array() calling protocol came about after MediaWiki 1.4rc1.
this hook is for auditing only RecentChangesLinked and Watchlist RecentChangesLinked and Watchlist e g Watchlist removed from all revisions and log entries to which it was applied This gives extensions a chance to take it off their books as the deletion has already been partly carried out by this point or something similar the user will be unable to create the tag set and then return false from the hook function Ensure you consume the ChangeTagAfterDelete hook to carry out custom deletion actions as context called by AbstractContent::getParserOutput May be used to override the normal model specific rendering of page content as context as context $options
Definition hooks.txt:1096
injection txt This is an overview of how MediaWiki makes use of dependency injection The design described here grew from the discussion of RFC T384 The term dependency this means that anything an object needs to operate should be injected from the the object itself should only know narrow no concrete implementation of the logic it relies on The requirement to inject everything typically results in an architecture that based on two main types of and essentially stateless service objects that use other service objects to operate on the value objects As of the beginning MediaWiki is only starting to use the DI approach Much of the code still relies on global state or direct resulting in a highly cyclical dependency which acts as the top level factory for services in MediaWiki which can be used to gain access to default instances of various services MediaWikiServices however also allows new services to be defined and default services to be redefined Services are defined or redefined by providing a callback the instantiator that will return a new instance of the service When it will create an instance of MediaWikiServices and populate it with the services defined in the files listed by thereby bootstrapping the DI framework Per $wgServiceWiringFiles lists includes ServiceWiring php
Definition injection.txt:37
$cache
Definition mcc.php:33
title
const DB_REPLICA
Definition defines.php:22