MediaWiki master
LinkCache.php
Go to the documentation of this file.
1<?php
7namespace MediaWiki\Page;
8
9use InvalidArgumentException;
16use Psr\Log\LoggerAwareInterface;
17use Psr\Log\LoggerInterface;
18use Psr\Log\NullLogger;
19use stdClass;
20use Wikimedia\MapCacheLRU\MapCacheLRU;
22use Wikimedia\Parsoid\Core\LinkTarget;
27use Wikimedia\Timestamp\TimestampFormat as TS;
28
54class LinkCache implements LoggerAwareInterface {
56 private $entries;
58 private $wanCache;
60 private $titleFormatter;
62 private $nsInfo;
64 private $loadBalancer;
66 private $logger;
67
69 private const MAX_SIZE = 10000;
70
72 private const ROW = 0;
74 private const FLAGS = 1;
75
82 public function __construct(
83 TitleFormatter $titleFormatter,
84 WANObjectCache $cache,
85 NamespaceInfo $nsInfo,
86 ?ILoadBalancer $loadBalancer = null
87 ) {
88 $this->entries = new MapCacheLRU( self::MAX_SIZE );
89 $this->wanCache = $cache;
90 $this->titleFormatter = $titleFormatter;
91 $this->nsInfo = $nsInfo;
92 $this->loadBalancer = $loadBalancer;
93 $this->logger = new NullLogger();
94 }
95
96 public function setLogger( LoggerInterface $logger ): void {
97 $this->logger = $logger;
98 }
99
105 private function getCacheKey( $page, $passThrough = false ) {
106 if ( is_string( $page ) ) {
107 if ( $passThrough ) {
108 return $page;
109 } else {
110 throw new InvalidArgumentException( 'They key may not be given as a string here' );
111 }
112 }
113
114 if ( is_array( $page ) ) {
115 $namespace = $page['page_namespace'];
116 $dbkey = $page['page_title'];
117 return strtr( $this->titleFormatter->formatTitle( $namespace, $dbkey ), ' ', '_' );
118 }
119
120 if ( $page instanceof PageReference && $page->getWikiId() !== PageReference::LOCAL ) {
121 // No cross-wiki support yet. Perhaps LinkCache can become wiki-aware in the future.
122 $this->logger->info(
123 'cross-wiki page reference',
124 [
125 'page-wiki' => $page->getWikiId(),
126 'page-reference' => $this->titleFormatter->getFullText( $page )
127 ]
128 );
129 return null;
130 }
131
132 if ( $page instanceof PageIdentity && !$page->canExist() ) {
133 // Non-proper page, perhaps a special page or interwiki link or relative section link.
134 $this->logger->warning(
135 'non-proper page reference: {page-reference}',
136 [ 'page-reference' => $this->titleFormatter->getFullText( $page ) ]
137 );
138 return null;
139 }
140
141 if ( $page instanceof LinkTarget
142 && ( $page->isExternal() || $page->getText() === '' || $page->getNamespace() < 0 )
143 ) {
144 // Interwiki link or relative section link. These do not have a page ID, so they
145 // can neither be "good" nor "bad" in the sense of this class.
146 $this->logger->warning(
147 'link to non-proper page: {page-link}',
148 [ 'page-link' => $this->titleFormatter->getFullText( $page ) ]
149 );
150 return null;
151 }
152
153 return $this->titleFormatter->getPrefixedDBkey( $page );
154 }
155
165 public function getGoodLinkID( $page ) {
166 $key = $this->getCacheKey( $page, true );
167 if ( $key === null ) {
168 return 0;
169 }
170
171 $entry = $this->entries->get( $key );
172 if ( !$entry ) {
173 return 0;
174 }
175
176 $row = $entry[self::ROW];
177
178 return $row ? (int)$row->page_id : 0;
179 }
180
193 public function getGoodLinkFieldObj( $page, string $field ) {
194 $key = $this->getCacheKey( $page );
195 if ( $key === null ) {
196 return null;
197 }
198
199 $entry = $this->entries->get( $key );
200 if ( !$entry ) {
201 return null;
202 }
203
204 $row = $entry[self::ROW];
205 if ( !$row ) {
206 return null;
207 }
208
209 switch ( $field ) {
210 case 'id':
211 return (int)$row->page_id;
212 case 'length':
213 return (int)$row->page_len;
214 case 'redirect':
215 return (int)$row->page_is_redirect;
216 case 'revision':
217 return (int)$row->page_latest;
218 case 'model':
219 return !empty( $row->page_content_model )
220 ? (string)$row->page_content_model
221 : null;
222 case 'lang':
223 return !empty( $row->page_lang )
224 ? (string)$row->page_lang
225 : null;
226 default:
227 throw new InvalidArgumentException( "Unknown field: $field" );
228 }
229 }
230
240 public function isBadLink( $page ) {
241 $key = $this->getCacheKey( $page, true );
242 if ( $key === null ) {
243 return false;
244 }
245
246 $entry = $this->entries->get( $key );
247
248 return ( $entry && !$entry[self::ROW] );
249 }
250
267 public function addGoodLinkObjFromRow(
268 $page,
269 stdClass $row,
270 int $queryFlags = IDBAccessObject::READ_NORMAL
271 ) {
272 $key = $this->getCacheKey( $page );
273 if ( $key === null ) {
274 return;
275 }
276
277 foreach ( self::getSelectFields() as $field ) {
278 if ( !property_exists( $row, $field ) ) {
279 throw new InvalidArgumentException( "Missing field: $field" );
280 }
281 }
282
283 $this->entries->set( $key, [ self::ROW => $row, self::FLAGS => $queryFlags ] );
284 }
285
300 public function addBadLinkObj( $page, int $queryFlags = IDBAccessObject::READ_NORMAL ) {
301 $key = $this->getCacheKey( $page );
302 if ( $key === null ) {
303 return;
304 }
305
306 $this->entries->set( $key, [ self::ROW => null, self::FLAGS => $queryFlags ] );
307 }
308
317 public function clearBadLink( $page ) {
318 $key = $this->getCacheKey( $page, true );
319 if ( $key === null ) {
320 return;
321 }
322
323 $entry = $this->entries->get( $key );
324 if ( $entry && !$entry[self::ROW] ) {
325 $this->entries->clear( $key );
326 }
327 }
328
337 public function clearLink( $page ) {
338 $key = $this->getCacheKey( $page );
339 if ( $key !== null ) {
340 $this->entries->clear( $key );
341 }
342 }
343
350 public static function getSelectFields() {
351 $pageLanguageUseDB = MediaWikiServices::getInstance()->getMainConfig()
353
354 $fields = array_merge(
356 [
357 'page_len',
358 'page_content_model',
359 ]
360 );
361
362 if ( $pageLanguageUseDB ) {
363 $fields[] = 'page_lang';
364 }
365
366 return $fields;
367 }
368
383 public function addLinkObj( $page, int $queryFlags = IDBAccessObject::READ_NORMAL ) {
384 $row = $this->getGoodLinkRow(
385 $page->getNamespace(),
386 $page->getDBkey(),
387 $this->fetchPageRow( ... ),
388 $queryFlags
389 );
390
391 return $row ? (int)$row->page_id : 0;
392 }
393
402 private function getGoodLinkRowInternal(
403 TitleValue $link,
404 ?callable $fetchCallback = null,
405 int $queryFlags = IDBAccessObject::READ_NORMAL
406 ): array {
407 $callerShouldAddGoodLink = false;
408
409 $key = $this->getCacheKey( $link );
410 if ( $key === null ) {
411 return [ $callerShouldAddGoodLink, null ];
412 }
413
414 $ns = $link->getNamespace();
415 $dbkey = $link->getDBkey();
416
417 $entry = $this->entries->get( $key );
418 if ( $entry && $entry[self::FLAGS] >= $queryFlags ) {
419 return [ $callerShouldAddGoodLink, $entry[self::ROW] ?: null ];
420 }
421
422 if ( !$fetchCallback ) {
423 return [ $callerShouldAddGoodLink, null ];
424 }
425
426 $callerShouldAddGoodLink = true;
427
428 $wanCacheKey = $this->getPersistentCacheKey( $link );
429 if ( $wanCacheKey !== null && !( $queryFlags & IDBAccessObject::READ_LATEST ) ) {
430 // Some pages are often transcluded heavily, so use persistent caching
431 $row = $this->wanCache->getWithSetCallback(
432 $wanCacheKey,
433 WANObjectCache::TTL_DAY,
434 function ( $curValue, &$ttl, array &$setOpts ) use ( $fetchCallback, $ns, $dbkey ) {
435 $dbr = $this->loadBalancer->getConnection( ILoadBalancer::DB_REPLICA );
436 $setOpts += Database::getCacheSetOptions( $dbr );
437
438 $row = $fetchCallback( $dbr, $ns, $dbkey, [] );
439 $mtime = $row ? (int)wfTimestamp( TS::UNIX, $row->page_touched ) : false;
440 $ttl = $this->wanCache->adaptiveTTL( $mtime, $ttl );
441
442 return $row;
443 }
444 );
445 } else {
446 // No persistent caching needed, but we can still use the callback.
447 if ( ( $queryFlags & IDBAccessObject::READ_LATEST ) == IDBAccessObject::READ_LATEST ) {
448 $dbr = $this->loadBalancer->getConnection( DB_PRIMARY );
449 } else {
450 $dbr = $this->loadBalancer->getConnection( DB_REPLICA );
451 }
452 $options = [];
453 if ( ( $queryFlags & IDBAccessObject::READ_EXCLUSIVE ) == IDBAccessObject::READ_EXCLUSIVE ) {
454 $options[] = 'FOR UPDATE';
455 } elseif ( ( $queryFlags & IDBAccessObject::READ_LOCKING ) == IDBAccessObject::READ_LOCKING ) {
456 $options[] = 'LOCK IN SHARE MODE';
457 }
458 $row = $fetchCallback( $dbr, $ns, $dbkey, $options );
459 }
460
461 return [ $callerShouldAddGoodLink, $row ?: null ];
462 }
463
478 public function getGoodLinkRow(
479 int $ns,
480 string $dbkey,
481 ?callable $fetchCallback = null,
482 int $queryFlags = IDBAccessObject::READ_NORMAL
483 ): ?stdClass {
484 $link = TitleValue::tryNew( $ns, $dbkey );
485 if ( $link === null ) {
486 return null;
487 }
488
489 [ $shouldAddGoodLink, $row ] = $this->getGoodLinkRowInternal(
490 $link,
491 $fetchCallback,
492 $queryFlags
493 );
494
495 if ( $row ) {
496 if ( $shouldAddGoodLink ) {
497 try {
498 $this->addGoodLinkObjFromRow( $link, $row, $queryFlags );
499 } catch ( InvalidArgumentException ) {
500 // a field is missing from $row; maybe we used a cache?; invalidate it and try again
501 $this->invalidateTitle( $link );
502 [ , $row ] = $this->getGoodLinkRowInternal(
503 $link,
504 $fetchCallback,
505 $queryFlags
506 );
507 $this->addGoodLinkObjFromRow( $link, $row, $queryFlags );
508 }
509 }
510 } else {
511 $this->addBadLinkObj( $link );
512 }
513
514 return $row ?: null;
515 }
516
521 private function getPersistentCacheKey( $page ) {
522 // if no key can be derived, the page isn't cacheable
523 if ( $this->getCacheKey( $page ) === null || !$this->usePersistentCache( $page ) ) {
524 return null;
525 }
526 return $this->wanCache->makeKey( 'page', $page->getNamespace(), sha1( $page->getDBkey() ) );
527 }
528
533 private function usePersistentCache( $pageOrNamespace ) {
534 $ns = is_int( $pageOrNamespace ) ? $pageOrNamespace : $pageOrNamespace->getNamespace();
535
536 if ( in_array( $ns, [ NS_TEMPLATE, NS_FILE, NS_CATEGORY, NS_MEDIAWIKI ] ) ||
537 ( !is_int( $pageOrNamespace ) &&
538 ( str_ends_with( $pageOrNamespace->getDBkey(), '.css' ) ||
539 str_ends_with( $pageOrNamespace->getDBkey(), '.js' ) ) ) ) {
540 return true;
541 }
542 // Focus on transcluded pages more than the main content
543 if ( $this->nsInfo->isContent( $ns ) ) {
544 return false;
545 }
546 // Non-talk extension namespaces (e.g. NS_MODULE)
547 return ( $ns >= 100 && $this->nsInfo->isSubject( $ns ) );
548 }
549
557 private function fetchPageRow( IReadableDatabase $db, int $ns, string $dbkey, $options = [] ) {
558 $queryBuilder = $db->newSelectQueryBuilder()
559 ->select( self::getSelectFields() )
560 ->from( 'page' )
561 ->where( [ 'page_namespace' => $ns, 'page_title' => $dbkey ] )
562 ->options( $options );
563
564 return $queryBuilder->caller( __METHOD__ )->fetchRow();
565 }
566
572 public function executeBatch( array $pages, $fname ) {
573 $pageObject = [];
574 $result = [];
575
576 foreach ( $pages as $page ) {
577 $title = Title::newFromText( $page );
578 if ( $title ) {
579 $cacheKey = $this->getPersistentCacheKey( $title );
580 $pageObject[$cacheKey] = $title;
581 }
582 }
583
584 $rows = $this->wanCache->getMulti( array_keys( $pageObject ) );
585 foreach ( $rows as $key => $row ) {
586 if ( $row ) {
587 $title = TitleValue::tryNew( (int)$row->page_namespace, $row->page_title );
588 $this->addGoodLinkObjFromRow( $title, $row );
589 } else {
590 $this->addBadLinkObj( $pageObject[$key] );
591 }
592 unset( $pageObject[$key] );
593 }
594
595 $linkBatchFactory = MediaWikiServices::getInstance()->getLinkBatchFactory();
596
597 if ( count( $pageObject ) > 0 ) {
598 $linkBatch = $linkBatchFactory->newLinkBatch( array_values( $pageObject ) );
599 $linkBatch->setCaller( $fname );
600 $result = $linkBatch->doQuery();
601 $linkBatch->doGenderQuery();
602 }
603
604 foreach ( $result as $row ) {
605 $title = TitleValue::tryNew( (int)$row->page_namespace, $row->page_title );
606 $cacheKey = $this->getPersistentCacheKey( $title );
607 $this->addGoodLinkObjFromRow( $title, $row );
608 $pageObject[$cacheKey] = $row;
609 }
610
611 foreach ( $pageObject as $key => $row ) {
612 if ( !$row instanceof Title ) {
613 $this->wanCache->set( $key, $row, WANObjectCache::TTL_DAY );
614 } else {
615 $this->wanCache->set( $key, null, WANObjectCache::TTL_DAY );
616 $this->addBadLinkObj( $row );
617 }
618 }
619 }
620
628 public function invalidateTitle( $page ) {
629 // for use by ResourceLoader Wikimodule
630 $wanCacheKey = $this->getPersistentCacheKey( $page );
631 if ( $wanCacheKey !== null ) {
632 $this->wanCache->delete( $wanCacheKey );
633 }
634
635 $this->clearLink( $page );
636 }
637
641 public function clear() {
642 $this->entries->clear();
643 }
644}
645
647class_alias( LinkCache::class, 'LinkCache' );
648
650class_alias( LinkCache::class, 'MediaWiki\Cache\LinkCache' );
const NS_FILE
Definition Defines.php:57
const NS_MEDIAWIKI
Definition Defines.php:59
const NS_TEMPLATE
Definition Defines.php:61
const NS_CATEGORY
Definition Defines.php:65
wfTimestamp( $outputtype=TS::UNIX, $ts=0)
Get a timestamp string in one of various formats.
const DB_REPLICA
Definition defines.php:26
const DB_PRIMARY
Definition defines.php:28
A class containing constants representing the names of configuration variables.
const PageLanguageUseDB
Name constant for the PageLanguageUseDB setting, for use with Config::get()
Service locator for MediaWiki core services.
static getInstance()
Returns the global default instance of the top level service locator.
Page existence and metadata cache.
Definition LinkCache.php:54
setLogger(LoggerInterface $logger)
Definition LinkCache.php:96
static getSelectFields()
Fields that LinkCache needs to select.
clearBadLink( $page)
Clear information about a page being missing from the process cache.
invalidateTitle( $page)
Purge the persistent link cache for a title.
addBadLinkObj( $page, int $queryFlags=IDBAccessObject::READ_NORMAL)
Add information about a missing page to the process cache.
isBadLink( $page)
Check if a page is known to be missing based on the process cache.
executeBatch(array $pages, $fname)
getGoodLinkRow(int $ns, string $dbkey, ?callable $fetchCallback=null, int $queryFlags=IDBAccessObject::READ_NORMAL)
Returns the row for the page if the page exists (subject to race conditions).
__construct(TitleFormatter $titleFormatter, WANObjectCache $cache, NamespaceInfo $nsInfo, ?ILoadBalancer $loadBalancer=null)
Definition LinkCache.php:82
getGoodLinkFieldObj( $page, string $field)
Get the field of a page known to the process cache.
clearLink( $page)
Clear information about a page from the process cache.
addGoodLinkObjFromRow( $page, stdClass $row, int $queryFlags=IDBAccessObject::READ_NORMAL)
Add information about an existing page to the process cache.
addLinkObj( $page, int $queryFlags=IDBAccessObject::READ_NORMAL)
Add a title to the link cache, return the page_id or zero if non-existent.
getGoodLinkID( $page)
Get the ID of a page known to the process cache.
const REQUIRED_FIELDS
Fields that must be present in the row object passed to the constructor.
This is a utility class for dealing with namespaces that encodes all the "magic" behaviors of them ba...
A title formatter service for MediaWiki.
Represents the target of a wiki link.
getDBkey()
Get the main part of the link target, in canonical database form.
getNamespace()
Get the namespace index.
Represents a title within MediaWiki.
Definition Title.php:69
Multi-datacenter aware caching interface.
static getCacheSetOptions(?IReadableDatabase ... $dbs)
Merge the result of getSessionLagStatus() for several DBs using the most pessimistic values to estima...
const LOCAL
Wiki ID value to use with instances that are defined relative to the local wiki.
Interface for database access objects.
This class is a delegate to ILBFactory for a given database cluster.
A database connection without write operations.
getCacheKey()
Get the cache key used to store status.