MediaWiki  master
PageStore.php
Go to the documentation of this file.
1 <?php
2 
3 namespace MediaWiki\Page;
4 
6 use EmptyIterator;
7 use InvalidArgumentException;
8 use Iterator;
9 use LinkCache;
10 use Liuggio\StatsdClient\Factory\StatsdDataFactoryInterface;
15 use NamespaceInfo;
17 use stdClass;
18 use TitleParser;
19 use Wikimedia\Assert\Assert;
22 
28 class PageStore implements PageLookup {
29 
31  private $options;
32 
34  private $dbLoadBalancer;
35 
37  private $namespaceInfo;
38 
40  private $titleParser;
41 
43  private $linkCache;
44 
46  private $stats;
47 
49  private $wikiId;
50 
54  public const CONSTRUCTOR_OPTIONS = [
55  'PageLanguageUseDB',
56  ];
57 
67  public function __construct(
68  ServiceOptions $options,
69  ILoadBalancer $dbLoadBalancer,
73  ?StatsdDataFactoryInterface $stats,
74  $wikiId = WikiAwareEntity::LOCAL
75  ) {
76  $options->assertRequiredOptions( self::CONSTRUCTOR_OPTIONS );
77 
78  $this->options = $options;
79  $this->dbLoadBalancer = $dbLoadBalancer;
80  $this->namespaceInfo = $namespaceInfo;
81  $this->titleParser = $titleParser;
82  $this->wikiId = $wikiId;
83  $this->linkCache = $linkCache;
84  $this->stats = $stats ?: new NullStatsdDataFactory();
85 
86  if ( $wikiId !== WikiAwareEntity::LOCAL && $linkCache ) {
87  // LinkCache currently doesn't support cross-wiki PageReferences.
88  // Once it does, this check can go away. At that point, LinkCache should
89  // probably also no longer be optional.
90  throw new InvalidArgumentException( "Can't use LinkCache with pages from $wikiId" );
91  }
92  }
93 
97  private function incrementStats( string $metric ) {
98  $this->stats->increment( "PageStore.{$metric}" );
99  }
100 
107  public function getPageForLink(
108  LinkTarget $link,
109  int $queryFlags = self::READ_NORMAL
110  ): ProperPageIdentity {
111  Assert::parameter( !$link->isExternal(), '$link', 'must not be external' );
112  Assert::parameter( $link->getDBkey() !== '', '$link', 'must not be relative' );
113 
114  $ns = $link->getNamespace();
115 
116  // Map Media links to File namespace
117  if ( $ns === NS_MEDIA ) {
118  $ns = NS_FILE;
119  }
120 
121  Assert::parameter( $ns >= 0, '$link', 'namespace must not be virtual' );
122 
123  $page = $this->getPageByName( $ns, $link->getDBkey(), $queryFlags );
124 
125  if ( !$page ) {
126  $page = new PageIdentityValue( 0, $ns, $link->getDBkey(), $this->wikiId );
127  }
128 
129  return $page;
130  }
131 
139  public function getPageByName(
140  int $namespace,
141  string $dbKey,
142  int $queryFlags = self::READ_NORMAL
143  ): ?ExistingPageRecord {
144  Assert::parameter( $dbKey !== '', '$dbKey', 'must not be empty' );
145  Assert::parameter( !strpos( $dbKey, ' ' ), '$dbKey', 'must not contain spaces' );
146  Assert::parameter( $namespace >= 0, '$namespace', 'must not be virtual' );
147 
148  $conds = [
149  'page_namespace' => $namespace,
150  'page_title' => $dbKey,
151  ];
152 
153  if ( $this->linkCache ) {
154  return $this->getPageByNameViaLinkCache( $namespace, $dbKey, $queryFlags );
155  } else {
156  return $this->loadPageFromConditions( $conds, $queryFlags );
157  }
158  }
159 
167  private function getPageByNameViaLinkCache(
168  int $namespace,
169  string $dbKey,
170  int $queryFlags = self::READ_NORMAL
171  ): ?ExistingPageRecord {
172  $conds = [
173  'page_namespace' => $namespace,
174  'page_title' => $dbKey,
175  ];
176 
177  if ( $queryFlags === self::READ_NORMAL && $this->linkCache->isBadLink( $conds ) ) {
178  $this->incrementStats( "LinkCache.hit.bad.early" );
179  return null;
180  }
181 
182  $caller = __METHOD__;
183  $hitOrMiss = 'hit';
184 
185  // Try to get the row from LinkCache, providing a callback to fetch it if it's not cached.
186  // When getGoodLinkRow() returns, LinkCache should have an entry for the row, good or bad.
187  $row = $this->linkCache->getGoodLinkRow(
188  $namespace,
189  $dbKey,
190  function ( IDatabase $dbr, $ns, $dbkey, array $options )
191  use ( $conds, $caller, &$hitOrMiss )
192  {
193  $hitOrMiss = 'miss';
194  $row = $this->newSelectQueryBuilder( $dbr )
195  ->fields( $this->getSelectFields() )
196  ->conds( $conds )
197  ->options( $options )
198  ->caller( $caller )
199  ->fetchRow();
200 
201  return $row;
202  },
203  $queryFlags
204  );
205 
206  if ( $row ) {
207  try {
208  // NOTE: LinkCache may not include namespace and title in the cached row,
209  // since it's already used as the cache key!
210  $row->page_namespace = $namespace;
211  $row->page_title = $dbKey;
212  $page = $this->newPageRecordFromRow( $row );
213 
214  // We were able to use the row we got from link cache.
215  $this->incrementStats( "LinkCache.{$hitOrMiss}.good" );
216  } catch ( InvalidArgumentException $e ) {
217  // The cached row was incomplete or corrupt,
218  // just keep going and load from the database.
219  $page = $this->loadPageFromConditions( $conds, $queryFlags );
220 
221  if ( $page ) {
222  // PageSelectQueryBuilder should have added the full row to the LinkCache now.
223  $this->incrementStats( "LinkCache.{$hitOrMiss}.incomplete.loaded" );
224  } else {
225  // If we get here, an incomplete row was cached, but we failed to
226  // load the full row from the database. This should only happen
227  // if the page was deleted under out feet, which should be very rare.
228  // Update the LinkCache to reflect the new situation.
229  $this->linkCache->addBadLinkObj( $conds );
230  $this->incrementStats( "LinkCache.{$hitOrMiss}.incomplete.missing" );
231  }
232  }
233  } else {
234  $this->incrementStats( "LinkCache.{$hitOrMiss}.bad.late" );
235  $page = null;
236  }
237 
238  return $page;
239  }
240 
250  public function getPageByText(
251  string $text,
252  int $defaultNamespace = NS_MAIN,
253  int $queryFlags = self::READ_NORMAL
254  ): ?ProperPageIdentity {
255  try {
256  $title = $this->titleParser->parseTitle( $text, $defaultNamespace );
257  return $this->getPageForLink( $title, $queryFlags );
258  } catch ( MalformedTitleException | InvalidArgumentException $e ) {
259  // Note that even some well-formed links are still invalid parameters
260  // for getPageForLink(), e.g. interwiki links or special pages.
261  return null;
262  }
263  }
264 
274  public function getExistingPageByText(
275  string $text,
276  int $defaultNamespace = NS_MAIN,
277  int $queryFlags = self::READ_NORMAL
278  ): ?ExistingPageRecord {
279  $pageIdentity = $this->getPageByText( $text, $defaultNamespace, $queryFlags );
280  if ( !$pageIdentity ) {
281  return null;
282  }
283  return $this->getPageByReference( $pageIdentity, $queryFlags );
284  }
285 
292  public function getPageById(
293  int $pageId,
294  int $queryFlags = self::READ_NORMAL
295  ): ?ExistingPageRecord {
296  Assert::parameter( $pageId > 0, '$pageId', 'must be greater than zero' );
297 
298  $conds = [
299  'page_id' => $pageId,
300  ];
301 
302  // XXX: no caching needed?
303 
304  return $this->loadPageFromConditions( $conds, $queryFlags );
305  }
306 
313  public function getPageByReference(
314  PageReference $page,
315  int $queryFlags = self::READ_NORMAL
316  ): ?ExistingPageRecord {
317  $page->assertWiki( $this->wikiId );
318  Assert::parameter( $page->getNamespace() >= 0, '$page', 'namespace must not be virtual' );
319 
320  if ( $page instanceof ExistingPageRecord && $queryFlags === self::READ_NORMAL ) {
321  return $page;
322  }
323  if ( $page instanceof PageIdentity ) {
324  Assert::parameter( $page->canExist(), '$page', 'Must be a proper page' );
325  }
326  return $this->getPageByName( $page->getNamespace(), $page->getDBkey(), $queryFlags );
327  }
328 
335  private function loadPageFromConditions(
336  array $conds,
337  int $queryFlags = self::READ_NORMAL
338  ): ?ExistingPageRecord {
339  $queryBuilder = $this->newSelectQueryBuilder( $queryFlags )
340  ->conds( $conds )
341  ->caller( __METHOD__ );
342 
343  // @phan-suppress-next-line PhanTypeMismatchReturnSuperType
344  return $queryBuilder->fetchPageRecord();
345  }
346 
354  public function newPageRecordFromRow( stdClass $row ): ExistingPageRecord {
355  return new PageStoreRecord(
356  $row,
357  $this->wikiId
358  );
359  }
360 
366  public function getSelectFields(): array {
367  $fields = [
368  'page_id',
369  'page_namespace',
370  'page_title',
371  'page_is_redirect',
372  'page_is_new',
373  'page_touched',
374  'page_links_updated',
375  'page_latest',
376  'page_len',
377  'page_content_model'
378  ];
379 
380  if ( $this->options->get( 'PageLanguageUseDB' ) ) {
381  $fields[] = 'page_lang';
382  }
383 
384  // Since we are putting rows into LinkCache, we need to include all fields
385  // that LinkCache needs.
386  $fields = array_unique(
387  array_merge( $fields, LinkCache::getSelectFields() )
388  );
389 
390  return $fields;
391  }
392 
401  public function newSelectQueryBuilder( $dbOrFlags = self::READ_NORMAL ): PageSelectQueryBuilder {
402  if ( $dbOrFlags instanceof IDatabase ) {
403  $db = $dbOrFlags;
404  $options = [];
405  } else {
406  [ $mode, $options ] = DBAccessObjectUtils::getDBOptions( $dbOrFlags );
407  $db = $this->getDBConnectionRef( $mode );
408  }
409 
410  $queryBuilder = new PageSelectQueryBuilder( $db, $this, $this->linkCache );
411  $queryBuilder->options( $options );
412 
413  return $queryBuilder;
414  }
415 
420  private function getDBConnectionRef( int $mode = DB_REPLICA ): IDatabase {
421  return $this->dbLoadBalancer->getConnectionRef( $mode, [], $this->wikiId );
422  }
423 
433  public function getSubpages( PageIdentity $page, int $limit ): Iterator {
434  if ( !$this->namespaceInfo->hasSubpages( $page->getNamespace() ) ) {
435  return new EmptyIterator();
436  }
437 
438  return $this->newSelectQueryBuilder()
439  ->whereTitlePrefix( $page->getNamespace(), $page->getDBkey() . '/' )
440  ->orderByTitle()
441  ->options( [ 'LIMIT' => $limit ] )
442  ->caller( __METHOD__ )
443  ->fetchPageRecords();
444  }
445 
446 }
Page\PageIdentity
Interface for objects (potentially) representing an editable wiki page.
Definition: PageIdentity.php:64
LinkCache
Cache for article titles (prefixed DB keys) and ids linked from one source.
Definition: LinkCache.php:41
Page\PageStore\$linkCache
LinkCache null $linkCache
Definition: PageStore.php:43
Page\PageStore\getSubpages
getSubpages(PageIdentity $page, int $limit)
Get all subpages of this page.
Definition: PageStore.php:433
Page\PageStore\getPageByText
getPageByText(string $text, int $defaultNamespace=NS_MAIN, int $queryFlags=self::READ_NORMAL)
Definition: PageStore.php:250
NullStatsdDataFactory
Definition: NullStatsdDataFactory.php:10
DBAccessObjectUtils\getDBOptions
static getDBOptions( $bitfield)
Get an appropriate DB index, options, and fallback DB index for a query.
Definition: DBAccessObjectUtils.php:52
MediaWiki\DAO\WikiAwareEntity
Marker interface for entities aware of the wiki they belong to.
Definition: WikiAwareEntity.php:35
Page\PageReference
Interface for objects (potentially) representing a page that can be viewable and linked to on a wiki.
Definition: PageReference.php:49
Page\PageStore\__construct
__construct(ServiceOptions $options, ILoadBalancer $dbLoadBalancer, NamespaceInfo $namespaceInfo, TitleParser $titleParser, ?LinkCache $linkCache, ?StatsdDataFactoryInterface $stats, $wikiId=WikiAwareEntity::LOCAL)
Definition: PageStore.php:67
LinkCache\getSelectFields
static getSelectFields()
Fields that LinkCache needs to select.
Definition: LinkCache.php:384
Wikimedia\Rdbms\IDatabase
Basic database interface for live and lazy-loaded relation database handles.
Definition: IDatabase.php:38
Page\PageStoreRecord
Immutable data record representing an editable page on a wiki.
Definition: PageStoreRecord.php:33
NS_MAIN
const NS_MAIN
Definition: Defines.php:64
Page\PageStore\$options
ServiceOptions $options
Definition: PageStore.php:31
$dbr
$dbr
Definition: testCompression.php:54
Page\PageStore\getPageById
getPageById(int $pageId, int $queryFlags=self::READ_NORMAL)
Definition: PageStore.php:292
MediaWiki\Config\ServiceOptions
A class for passing options to services.
Definition: ServiceOptions.php:27
Page\PageReference\getNamespace
getNamespace()
Returns the page's namespace number.
Page\PageStore\incrementStats
incrementStats(string $metric)
Definition: PageStore.php:97
Page\PageStore\loadPageFromConditions
loadPageFromConditions(array $conds, int $queryFlags=self::READ_NORMAL)
Definition: PageStore.php:335
Page\PageStore\getDBConnectionRef
getDBConnectionRef(int $mode=DB_REPLICA)
Definition: PageStore.php:420
$title
$title
Definition: testCompression.php:38
TitleParser
A title parser service for MediaWiki.
Definition: TitleParser.php:33
DB_REPLICA
const DB_REPLICA
Definition: defines.php:25
Page\PageStore\$wikiId
string false $wikiId
Definition: PageStore.php:49
DBAccessObjectUtils
Helper class for DAO classes.
Definition: DBAccessObjectUtils.php:29
Page\PageStore\$stats
StatsdDataFactoryInterface $stats
Definition: PageStore.php:46
Page\PageStore\$titleParser
TitleParser $titleParser
Definition: PageStore.php:40
Page\ExistingPageRecord
Data record representing a page that currently exists as an editable page on a wiki.
Definition: ExistingPageRecord.php:15
Page\ProperPageIdentity
Interface for objects representing a page that is (or could be, or used to be) an editable page on a ...
Definition: ProperPageIdentity.php:43
MediaWiki\DAO\WikiAwareEntity\assertWiki
assertWiki( $wikiId)
Throws if $wikiId is different from the return value of getWikiId().
NS_MEDIA
const NS_MEDIA
Definition: Defines.php:52
Page\PageReference\getDBkey
getDBkey()
Get the page title in DB key form.
Page\PageLookup
Service interface for looking up information about wiki pages.
Definition: PageLookup.php:14
Page\PageStore\$namespaceInfo
NamespaceInfo $namespaceInfo
Definition: PageStore.php:37
Page\PageSelectQueryBuilder
Definition: PageSelectQueryBuilder.php:14
Page\PageStore\getPageForLink
getPageForLink(LinkTarget $link, int $queryFlags=self::READ_NORMAL)
Definition: PageStore.php:107
Page\PageStore\getExistingPageByText
getExistingPageByText(string $text, int $defaultNamespace=NS_MAIN, int $queryFlags=self::READ_NORMAL)
Definition: PageStore.php:274
Page\PageStore\$dbLoadBalancer
ILoadBalancer $dbLoadBalancer
Definition: PageStore.php:34
Page\PageStore\newPageRecordFromRow
newPageRecordFromRow(stdClass $row)
Definition: PageStore.php:354
MalformedTitleException
MalformedTitleException is thrown when a TitleParser is unable to parse a title string.
Definition: MalformedTitleException.php:26
Page\PageStore\newSelectQueryBuilder
newSelectQueryBuilder( $dbOrFlags=self::READ_NORMAL)
Definition: PageStore.php:401
Page\PageStore\getPageByNameViaLinkCache
getPageByNameViaLinkCache(int $namespace, string $dbKey, int $queryFlags=self::READ_NORMAL)
Definition: PageStore.php:167
Page\PageIdentityValue
Immutable value object representing a page identity.
Definition: PageIdentityValue.php:41
MediaWiki\Page
Definition: ContentModelChangeFactory.php:23
Page\PageStore\getPageByReference
getPageByReference(PageReference $page, int $queryFlags=self::READ_NORMAL)
Definition: PageStore.php:313
NamespaceInfo
This is a utility class for dealing with namespaces that encodes all the "magic" behaviors of them ba...
Definition: NamespaceInfo.php:35
NS_FILE
const NS_FILE
Definition: Defines.php:70
MediaWiki\Linker\LinkTarget
Definition: LinkTarget.php:26
Page\PageStore
Definition: PageStore.php:28
Page\PageStore\getPageByName
getPageByName(int $namespace, string $dbKey, int $queryFlags=self::READ_NORMAL)
Definition: PageStore.php:139
Page\PageStore\CONSTRUCTOR_OPTIONS
const CONSTRUCTOR_OPTIONS
Definition: PageStore.php:54
Wikimedia\Rdbms\ILoadBalancer
Database cluster connection, tracking, load balancing, and transaction manager interface.
Definition: ILoadBalancer.php:81
Page\PageStore\getSelectFields
getSelectFields()
Definition: PageStore.php:366