MediaWiki  master
PageStore.php
Go to the documentation of this file.
1 <?php
2 
3 namespace MediaWiki\Page;
4 
6 use EmptyIterator;
7 use InvalidArgumentException;
8 use Iterator;
9 use LinkCache;
10 use Liuggio\StatsdClient\Factory\StatsdDataFactoryInterface;
15 use NamespaceInfo;
17 use stdClass;
18 use TitleParser;
19 use Wikimedia\Assert\Assert;
23 
29 class PageStore implements PageLookup {
30 
32  private $options;
33 
35  private $dbLoadBalancer;
36 
38  private $namespaceInfo;
39 
41  private $titleParser;
42 
44  private $linkCache;
45 
47  private $stats;
48 
50  private $wikiId;
51 
55  public const CONSTRUCTOR_OPTIONS = [
56  'PageLanguageUseDB',
57  ];
58 
68  public function __construct(
69  ServiceOptions $options,
70  ILoadBalancer $dbLoadBalancer,
74  ?StatsdDataFactoryInterface $stats,
75  $wikiId = WikiAwareEntity::LOCAL
76  ) {
77  $options->assertRequiredOptions( self::CONSTRUCTOR_OPTIONS );
78 
79  $this->options = $options;
80  $this->dbLoadBalancer = $dbLoadBalancer;
81  $this->namespaceInfo = $namespaceInfo;
82  $this->titleParser = $titleParser;
83  $this->wikiId = $wikiId;
84  $this->linkCache = $linkCache;
85  $this->stats = $stats ?: new NullStatsdDataFactory();
86 
87  if ( $wikiId !== WikiAwareEntity::LOCAL && $linkCache ) {
88  // LinkCache currently doesn't support cross-wiki PageReferences.
89  // Once it does, this check can go away. At that point, LinkCache should
90  // probably also no longer be optional.
91  throw new InvalidArgumentException( "Can't use LinkCache with pages from $wikiId" );
92  }
93  }
94 
98  private function incrementStats( string $metric ) {
99  $this->stats->increment( "PageStore.{$metric}" );
100  }
101 
108  public function getPageForLink(
109  LinkTarget $link,
110  int $queryFlags = self::READ_NORMAL
111  ): ProperPageIdentity {
112  Assert::parameter( !$link->isExternal(), '$link', 'must not be external' );
113  Assert::parameter( $link->getDBkey() !== '', '$link', 'must not be relative' );
114 
115  $ns = $link->getNamespace();
116 
117  // Map Media links to File namespace
118  if ( $ns === NS_MEDIA ) {
119  $ns = NS_FILE;
120  }
121 
122  Assert::parameter( $ns >= 0, '$link', 'namespace must not be virtual' );
123 
124  $page = $this->getPageByName( $ns, $link->getDBkey(), $queryFlags );
125 
126  if ( !$page ) {
127  $page = new PageIdentityValue( 0, $ns, $link->getDBkey(), $this->wikiId );
128  }
129 
130  return $page;
131  }
132 
140  public function getPageByName(
141  int $namespace,
142  string $dbKey,
143  int $queryFlags = self::READ_NORMAL
144  ): ?ExistingPageRecord {
145  Assert::parameter( $dbKey !== '', '$dbKey', 'must not be empty' );
146  Assert::parameter( !strpos( $dbKey, ' ' ), '$dbKey', 'must not contain spaces' );
147  Assert::parameter( $namespace >= 0, '$namespace', 'must not be virtual' );
148 
149  $conds = [
150  'page_namespace' => $namespace,
151  'page_title' => $dbKey,
152  ];
153 
154  if ( $this->linkCache ) {
155  return $this->getPageByNameViaLinkCache( $namespace, $dbKey, $queryFlags );
156  } else {
157  return $this->loadPageFromConditions( $conds, $queryFlags );
158  }
159  }
160 
168  private function getPageByNameViaLinkCache(
169  int $namespace,
170  string $dbKey,
171  int $queryFlags = self::READ_NORMAL
172  ): ?ExistingPageRecord {
173  $conds = [
174  'page_namespace' => $namespace,
175  'page_title' => $dbKey,
176  ];
177 
178  if ( $queryFlags === self::READ_NORMAL && $this->linkCache->isBadLink( $conds ) ) {
179  $this->incrementStats( "LinkCache.hit.bad.early" );
180  return null;
181  }
182 
183  $caller = __METHOD__;
184  $hitOrMiss = 'hit';
185 
186  // Try to get the row from LinkCache, providing a callback to fetch it if it's not cached.
187  // When getGoodLinkRow() returns, LinkCache should have an entry for the row, good or bad.
188  $row = $this->linkCache->getGoodLinkRow(
189  $namespace,
190  $dbKey,
191  function ( IDatabase $dbr, $ns, $dbkey, array $options )
192  use ( $conds, $caller, &$hitOrMiss )
193  {
194  $hitOrMiss = 'miss';
195  $row = $this->newSelectQueryBuilder( $dbr )
196  ->fields( $this->getSelectFields() )
197  ->conds( $conds )
198  ->options( $options )
199  ->caller( $caller )
200  ->fetchRow();
201 
202  return $row;
203  },
204  $queryFlags
205  );
206 
207  if ( $row ) {
208  try {
209  $page = $this->newPageRecordFromRow( $row );
210 
211  // We were able to use the row we got from link cache.
212  $this->incrementStats( "LinkCache.{$hitOrMiss}.good" );
213  } catch ( InvalidArgumentException $e ) {
214  // The cached row was incomplete or corrupt,
215  // just keep going and load from the database.
216  $page = $this->loadPageFromConditions( $conds, $queryFlags );
217 
218  if ( $page ) {
219  // PageSelectQueryBuilder should have added the full row to the LinkCache now.
220  $this->incrementStats( "LinkCache.{$hitOrMiss}.incomplete.loaded" );
221  } else {
222  // If we get here, an incomplete row was cached, but we failed to
223  // load the full row from the database. This should only happen
224  // if the page was deleted under out feet, which should be very rare.
225  // Update the LinkCache to reflect the new situation.
226  $this->linkCache->addBadLinkObj( $conds );
227  $this->incrementStats( "LinkCache.{$hitOrMiss}.incomplete.missing" );
228  }
229  }
230  } else {
231  $this->incrementStats( "LinkCache.{$hitOrMiss}.bad.late" );
232  $page = null;
233  }
234 
235  return $page;
236  }
237 
247  public function getPageByText(
248  string $text,
249  int $defaultNamespace = NS_MAIN,
250  int $queryFlags = self::READ_NORMAL
251  ): ?ProperPageIdentity {
252  try {
253  $title = $this->titleParser->parseTitle( $text, $defaultNamespace );
254  return $this->getPageForLink( $title, $queryFlags );
255  } catch ( MalformedTitleException | InvalidArgumentException $e ) {
256  // Note that even some well-formed links are still invalid parameters
257  // for getPageForLink(), e.g. interwiki links or special pages.
258  return null;
259  }
260  }
261 
271  public function getExistingPageByText(
272  string $text,
273  int $defaultNamespace = NS_MAIN,
274  int $queryFlags = self::READ_NORMAL
275  ): ?ExistingPageRecord {
276  $pageIdentity = $this->getPageByText( $text, $defaultNamespace, $queryFlags );
277  if ( !$pageIdentity ) {
278  return null;
279  }
280  return $this->getPageByReference( $pageIdentity, $queryFlags );
281  }
282 
289  public function getPageById(
290  int $pageId,
291  int $queryFlags = self::READ_NORMAL
292  ): ?ExistingPageRecord {
293  Assert::parameter( $pageId > 0, '$pageId', 'must be greater than zero' );
294 
295  $conds = [
296  'page_id' => $pageId,
297  ];
298 
299  // XXX: no caching needed?
300 
301  return $this->loadPageFromConditions( $conds, $queryFlags );
302  }
303 
310  public function getPageByReference(
311  PageReference $page,
312  int $queryFlags = self::READ_NORMAL
313  ): ?ExistingPageRecord {
314  $page->assertWiki( $this->wikiId );
315  Assert::parameter( $page->getNamespace() >= 0, '$page', 'namespace must not be virtual' );
316 
317  if ( $page instanceof ExistingPageRecord && $queryFlags === self::READ_NORMAL ) {
318  return $page;
319  }
320 
321  if ( $page instanceof PageIdentity ) {
322  Assert::parameter( $page->canExist(), '$page', 'Must be a proper page' );
323 
324  if ( $page->exists() ) {
325  // if we have a page ID, use it
326  $id = $page->getId( $this->wikiId );
327  return $this->getPageById( $id, $queryFlags );
328  }
329  }
330 
331  return $this->getPageByName( $page->getNamespace(), $page->getDBkey(), $queryFlags );
332  }
333 
340  private function loadPageFromConditions(
341  array $conds,
342  int $queryFlags = self::READ_NORMAL
343  ): ?ExistingPageRecord {
344  $queryBuilder = $this->newSelectQueryBuilder( $queryFlags )
345  ->conds( $conds )
346  ->caller( __METHOD__ );
347 
348  // @phan-suppress-next-line PhanTypeMismatchReturnSuperType
349  return $queryBuilder->fetchPageRecord();
350  }
351 
359  public function newPageRecordFromRow( stdClass $row ): ExistingPageRecord {
360  return new PageStoreRecord(
361  $row,
362  $this->wikiId
363  );
364  }
365 
371  public function getSelectFields(): array {
372  $fields = [
373  'page_id',
374  'page_namespace',
375  'page_title',
376  'page_is_redirect',
377  'page_is_new',
378  'page_touched',
379  'page_links_updated',
380  'page_latest',
381  'page_len',
382  'page_content_model'
383  ];
384 
385  if ( $this->options->get( 'PageLanguageUseDB' ) ) {
386  $fields[] = 'page_lang';
387  }
388 
389  // Since we are putting rows into LinkCache, we need to include all fields
390  // that LinkCache needs.
391  $fields = array_unique(
392  array_merge( $fields, LinkCache::getSelectFields() )
393  );
394 
395  return $fields;
396  }
397 
406  public function newSelectQueryBuilder( $dbOrFlags = self::READ_NORMAL ): SelectQueryBuilder {
407  if ( $dbOrFlags instanceof IDatabase ) {
408  $db = $dbOrFlags;
409  $options = [];
410  } else {
411  [ $mode, $options ] = DBAccessObjectUtils::getDBOptions( $dbOrFlags );
412  $db = $this->getDBConnectionRef( $mode );
413  }
414 
415  $queryBuilder = new PageSelectQueryBuilder( $db, $this, $this->linkCache );
416  $queryBuilder->options( $options );
417 
418  return $queryBuilder;
419  }
420 
425  private function getDBConnectionRef( int $mode = DB_REPLICA ): IDatabase {
426  return $this->dbLoadBalancer->getConnectionRef( $mode, [], $this->wikiId );
427  }
428 
438  public function getSubpages( PageIdentity $page, int $limit ): Iterator {
439  if ( !$this->namespaceInfo->hasSubpages( $page->getNamespace() ) ) {
440  return new EmptyIterator();
441  }
442 
443  return $this->newSelectQueryBuilder()
444  ->whereTitlePrefix( $page->getNamespace(), $page->getDBkey() . '/' )
445  ->orderByTitle()
446  ->options( [ 'LIMIT' => $limit ] )
447  ->caller( __METHOD__ )
448  ->fetchPageRecords();
449  }
450 
451 }
Page\PageIdentity
Interface for objects (potentially) representing an editable wiki page.
Definition: PageIdentity.php:64
LinkCache
Cache for article titles (prefixed DB keys) and ids linked from one source.
Definition: LinkCache.php:40
Page\PageStore\$linkCache
LinkCache null $linkCache
Definition: PageStore.php:44
Page\PageStore\getSubpages
getSubpages(PageIdentity $page, int $limit)
Get all subpages of this page.
Definition: PageStore.php:438
Page\PageStore\getPageByText
getPageByText(string $text, int $defaultNamespace=NS_MAIN, int $queryFlags=self::READ_NORMAL)
Definition: PageStore.php:247
NullStatsdDataFactory
Definition: NullStatsdDataFactory.php:10
DBAccessObjectUtils\getDBOptions
static getDBOptions( $bitfield)
Get an appropriate DB index, options, and fallback DB index for a query.
Definition: DBAccessObjectUtils.php:52
MediaWiki\DAO\WikiAwareEntity
Marker interface for entities aware of the wiki they belong to.
Definition: WikiAwareEntity.php:34
Page\PageReference
Interface for objects (potentially) representing a page that can be viewable and linked to on a wiki.
Definition: PageReference.php:49
Page\PageStore\__construct
__construct(ServiceOptions $options, ILoadBalancer $dbLoadBalancer, NamespaceInfo $namespaceInfo, TitleParser $titleParser, ?LinkCache $linkCache, ?StatsdDataFactoryInterface $stats, $wikiId=WikiAwareEntity::LOCAL)
Definition: PageStore.php:68
LinkCache\getSelectFields
static getSelectFields()
Fields that LinkCache needs to select.
Definition: LinkCache.php:382
Wikimedia\Rdbms\IDatabase
Basic database interface for live and lazy-loaded relation database handles.
Definition: IDatabase.php:38
Page\PageStoreRecord
Immutable data record representing an editable page on a wiki.
Definition: PageStoreRecord.php:33
NS_MAIN
const NS_MAIN
Definition: Defines.php:64
Page\PageStore\$options
ServiceOptions $options
Definition: PageStore.php:32
$dbr
$dbr
Definition: testCompression.php:54
Page\PageStore\getPageById
getPageById(int $pageId, int $queryFlags=self::READ_NORMAL)
Definition: PageStore.php:289
MediaWiki\Config\ServiceOptions
A class for passing options to services.
Definition: ServiceOptions.php:27
Page\PageReference\getNamespace
getNamespace()
Returns the page's namespace number.
Page\PageStore\incrementStats
incrementStats(string $metric)
Definition: PageStore.php:98
Wikimedia\Rdbms\SelectQueryBuilder
Definition: SelectQueryBuilder.php:11
Page\PageStore\loadPageFromConditions
loadPageFromConditions(array $conds, int $queryFlags=self::READ_NORMAL)
Definition: PageStore.php:340
Page\PageStore\getDBConnectionRef
getDBConnectionRef(int $mode=DB_REPLICA)
Definition: PageStore.php:425
$title
$title
Definition: testCompression.php:38
TitleParser
A title parser service for MediaWiki.
Definition: TitleParser.php:33
DB_REPLICA
const DB_REPLICA
Definition: defines.php:25
Page\PageStore\$wikiId
string false $wikiId
Definition: PageStore.php:50
DBAccessObjectUtils
Helper class for DAO classes.
Definition: DBAccessObjectUtils.php:29
Page\PageStore\$stats
StatsdDataFactoryInterface $stats
Definition: PageStore.php:47
Page\PageStore\$titleParser
TitleParser $titleParser
Definition: PageStore.php:41
Page\ExistingPageRecord
Data record representing a page that currently exists as an editable page on a wiki.
Definition: ExistingPageRecord.php:15
Page\ProperPageIdentity
Interface for objects representing a page that is (or could be, or used to be) an editable page on a ...
Definition: ProperPageIdentity.php:43
MediaWiki\DAO\WikiAwareEntity\assertWiki
assertWiki( $wikiId)
Throws if $wikiId is different from the return value of getWikiId().
NS_MEDIA
const NS_MEDIA
Definition: Defines.php:52
Page\PageReference\getDBkey
getDBkey()
Get the page title in DB key form.
Page\PageLookup
Service interface for looking up infermation about wiki pages.
Definition: PageLookup.php:14
Page\PageStore\$namespaceInfo
NamespaceInfo $namespaceInfo
Definition: PageStore.php:38
Page\PageSelectQueryBuilder
Definition: PageSelectQueryBuilder.php:14
Page\PageStore\getPageForLink
getPageForLink(LinkTarget $link, int $queryFlags=self::READ_NORMAL)
Definition: PageStore.php:108
Page\PageStore\getExistingPageByText
getExistingPageByText(string $text, int $defaultNamespace=NS_MAIN, int $queryFlags=self::READ_NORMAL)
Definition: PageStore.php:271
Page\PageStore\$dbLoadBalancer
ILoadBalancer $dbLoadBalancer
Definition: PageStore.php:35
Page\PageStore\newPageRecordFromRow
newPageRecordFromRow(stdClass $row)
Definition: PageStore.php:359
MalformedTitleException
MalformedTitleException is thrown when a TitleParser is unable to parse a title string.
Definition: MalformedTitleException.php:26
Page\PageStore\newSelectQueryBuilder
newSelectQueryBuilder( $dbOrFlags=self::READ_NORMAL)
Definition: PageStore.php:406
Page\PageStore\getPageByNameViaLinkCache
getPageByNameViaLinkCache(int $namespace, string $dbKey, int $queryFlags=self::READ_NORMAL)
Definition: PageStore.php:168
Page\PageIdentityValue
Immutable value object representing a page identity.
Definition: PageIdentityValue.php:41
MediaWiki\Page
Definition: ContentModelChangeFactory.php:23
Page\PageStore\getPageByReference
getPageByReference(PageReference $page, int $queryFlags=self::READ_NORMAL)
Definition: PageStore.php:310
NamespaceInfo
This is a utility class for dealing with namespaces that encodes all the "magic" behaviors of them ba...
Definition: NamespaceInfo.php:35
NS_FILE
const NS_FILE
Definition: Defines.php:70
MediaWiki\Linker\LinkTarget
Definition: LinkTarget.php:26
Page\PageStore
Definition: PageStore.php:29
Page\PageStore\getPageByName
getPageByName(int $namespace, string $dbKey, int $queryFlags=self::READ_NORMAL)
Definition: PageStore.php:140
Page\PageStore\CONSTRUCTOR_OPTIONS
const CONSTRUCTOR_OPTIONS
Definition: PageStore.php:55
Wikimedia\Rdbms\ILoadBalancer
Database cluster connection, tracking, load balancing, and transaction manager interface.
Definition: ILoadBalancer.php:81
Page\PageStore\getSelectFields
getSelectFields()
Definition: PageStore.php:371