Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
57.14% |
80 / 140 |
|
71.43% |
10 / 14 |
CRAP | |
0.00% |
0 / 1 |
PageStore | |
57.14% |
80 / 140 |
|
71.43% |
10 / 14 |
118.72 | |
0.00% |
0 / 1 |
__construct | |
0.00% |
0 / 10 |
|
0.00% |
0 / 1 |
12 | |||
incrementLinkCacheHitOrMiss | |
0.00% |
0 / 6 |
|
0.00% |
0 / 1 |
2 | |||
getPageForLink | |
100.00% |
10 / 10 |
|
100.00% |
1 / 1 |
3 | |||
getPageByName | |
100.00% |
10 / 10 |
|
100.00% |
1 / 1 |
2 | |||
getPageByNameViaLinkCache | |
0.00% |
0 / 40 |
|
0.00% |
0 / 1 |
42 | |||
getPageByText | |
100.00% |
4 / 4 |
|
100.00% |
1 / 1 |
2 | |||
getExistingPageByText | |
100.00% |
4 / 4 |
|
100.00% |
1 / 1 |
2 | |||
getPageById | |
100.00% |
5 / 5 |
|
100.00% |
1 / 1 |
1 | |||
getPageByReference | |
100.00% |
7 / 7 |
|
100.00% |
1 / 1 |
4 | |||
loadPageFromConditions | |
0.00% |
0 / 4 |
|
0.00% |
0 / 1 |
2 | |||
newPageRecordFromRow | |
100.00% |
4 / 4 |
|
100.00% |
1 / 1 |
1 | |||
getSelectFields | |
100.00% |
18 / 18 |
|
100.00% |
1 / 1 |
2 | |||
newSelectQueryBuilder | |
100.00% |
10 / 10 |
|
100.00% |
1 / 1 |
3 | |||
getSubpages | |
100.00% |
8 / 8 |
|
100.00% |
1 / 1 |
2 |
1 | <?php |
2 | |
3 | namespace MediaWiki\Page; |
4 | |
5 | use EmptyIterator; |
6 | use InvalidArgumentException; |
7 | use Iterator; |
8 | use MediaWiki\Cache\LinkCache; |
9 | use MediaWiki\Config\ServiceOptions; |
10 | use MediaWiki\DAO\WikiAwareEntity; |
11 | use MediaWiki\MainConfigNames; |
12 | use MediaWiki\Title\MalformedTitleException; |
13 | use MediaWiki\Title\NamespaceInfo; |
14 | use MediaWiki\Title\TitleParser; |
15 | use stdClass; |
16 | use Wikimedia\Assert\Assert; |
17 | use Wikimedia\Parsoid\Core\LinkTarget as ParsoidLinkTarget; |
18 | use Wikimedia\Rdbms\IDBAccessObject; |
19 | use Wikimedia\Rdbms\ILoadBalancer; |
20 | use Wikimedia\Rdbms\IReadableDatabase; |
21 | use Wikimedia\Stats\StatsFactory; |
22 | |
23 | /** |
24 | * @since 1.36 |
25 | * @unstable |
26 | */ |
27 | class PageStore implements PageLookup { |
28 | |
29 | private ServiceOptions $options; |
30 | private ILoadBalancer $dbLoadBalancer; |
31 | private NamespaceInfo $namespaceInfo; |
32 | private TitleParser $titleParser; |
33 | private ?LinkCache $linkCache; |
34 | private StatsFactory $stats; |
35 | /** @var string|false */ |
36 | private $wikiId; |
37 | |
38 | /** |
39 | * @internal for use by service wiring |
40 | */ |
41 | public const CONSTRUCTOR_OPTIONS = [ |
42 | MainConfigNames::PageLanguageUseDB, |
43 | ]; |
44 | |
45 | /** |
46 | * @param ServiceOptions $options |
47 | * @param ILoadBalancer $dbLoadBalancer |
48 | * @param NamespaceInfo $namespaceInfo |
49 | * @param TitleParser $titleParser |
50 | * @param ?LinkCache $linkCache |
51 | * @param StatsFactory $stats |
52 | * @param false|string $wikiId |
53 | */ |
54 | public function __construct( |
55 | ServiceOptions $options, |
56 | ILoadBalancer $dbLoadBalancer, |
57 | NamespaceInfo $namespaceInfo, |
58 | TitleParser $titleParser, |
59 | ?LinkCache $linkCache, |
60 | StatsFactory $stats, |
61 | $wikiId = WikiAwareEntity::LOCAL |
62 | ) { |
63 | $options->assertRequiredOptions( self::CONSTRUCTOR_OPTIONS ); |
64 | |
65 | $this->options = $options; |
66 | $this->dbLoadBalancer = $dbLoadBalancer; |
67 | $this->namespaceInfo = $namespaceInfo; |
68 | $this->titleParser = $titleParser; |
69 | $this->wikiId = $wikiId; |
70 | $this->linkCache = $linkCache; |
71 | $this->stats = $stats; |
72 | |
73 | if ( $wikiId !== WikiAwareEntity::LOCAL && $linkCache ) { |
74 | // LinkCache currently doesn't support cross-wiki PageReferences. |
75 | // Once it does, this check can go away. At that point, LinkCache should |
76 | // probably also no longer be optional. |
77 | throw new InvalidArgumentException( "Can't use LinkCache with pages from $wikiId" ); |
78 | } |
79 | } |
80 | |
81 | /** |
82 | * Increment a cache hit or miss counter for LinkCache. |
83 | * Possible reason labels are: |
84 | * - `good`: The page was found in LinkCache and was complete. |
85 | * - `bad_early`: The page was known by LinkCache to not exist. |
86 | * - `bad_late`: The page was not found in LinkCache and did not exist. |
87 | * - `incomplete_loaded`: The page was found in LinkCache but was incomplete. |
88 | * - `incomplete_missing`: Incomplete page data was found in LinkCache, and the page did not exist. |
89 | * |
90 | * @param string $hitOrMiss 'hit' or 'miss' |
91 | * @param string $reason Well-known reason string |
92 | * @return void |
93 | */ |
94 | private function incrementLinkCacheHitOrMiss( $hitOrMiss, $reason ) { |
95 | $legacyReason = strtr( $reason, '_', '.' ); |
96 | $this->stats->getCounter( 'pagestore_linkcache_accesses_total' ) |
97 | ->setLabel( 'reason', $reason ) |
98 | ->setLabel( 'status', $hitOrMiss ) |
99 | ->copyToStatsdAt( "LinkCache.$hitOrMiss.$legacyReason" ) |
100 | ->increment(); |
101 | } |
102 | |
103 | /** |
104 | * @param ParsoidLinkTarget $link |
105 | * @param int $queryFlags |
106 | * |
107 | * @return ProperPageIdentity |
108 | */ |
109 | public function getPageForLink( |
110 | ParsoidLinkTarget $link, |
111 | int $queryFlags = IDBAccessObject::READ_NORMAL |
112 | ): ProperPageIdentity { |
113 | Assert::parameter( !$link->isExternal(), '$link', 'must not be external' ); |
114 | Assert::parameter( $link->getDBkey() !== '', '$link', 'must not be relative' ); |
115 | |
116 | $ns = $link->getNamespace(); |
117 | |
118 | // Map Media links to File namespace |
119 | if ( $ns === NS_MEDIA ) { |
120 | $ns = NS_FILE; |
121 | } |
122 | |
123 | Assert::parameter( $ns >= 0, '$link', 'namespace must not be virtual' ); |
124 | |
125 | $page = $this->getPageByName( $ns, $link->getDBkey(), $queryFlags ); |
126 | |
127 | if ( !$page ) { |
128 | $page = new PageIdentityValue( 0, $ns, $link->getDBkey(), $this->wikiId ); |
129 | } |
130 | |
131 | return $page; |
132 | } |
133 | |
134 | /** |
135 | * @param int $namespace |
136 | * @param string $dbKey |
137 | * @param int $queryFlags |
138 | * |
139 | * @return ExistingPageRecord|null |
140 | */ |
141 | public function getPageByName( |
142 | int $namespace, |
143 | string $dbKey, |
144 | int $queryFlags = IDBAccessObject::READ_NORMAL |
145 | ): ?ExistingPageRecord { |
146 | Assert::parameter( $dbKey !== '', '$dbKey', 'must not be empty' ); |
147 | Assert::parameter( !strpos( $dbKey, ' ' ), '$dbKey', 'must not contain spaces' ); |
148 | Assert::parameter( $namespace >= 0, '$namespace', 'must not be virtual' ); |
149 | |
150 | $conds = [ |
151 | 'page_namespace' => $namespace, |
152 | 'page_title' => $dbKey, |
153 | ]; |
154 | |
155 | if ( $this->linkCache ) { |
156 | return $this->getPageByNameViaLinkCache( $namespace, $dbKey, $queryFlags ); |
157 | } else { |
158 | return $this->loadPageFromConditions( $conds, $queryFlags ); |
159 | } |
160 | } |
161 | |
162 | /** |
163 | * @param int $namespace |
164 | * @param string $dbKey |
165 | * @param int $queryFlags |
166 | * |
167 | * @return ExistingPageRecord|null |
168 | */ |
169 | private function getPageByNameViaLinkCache( |
170 | int $namespace, |
171 | string $dbKey, |
172 | int $queryFlags = IDBAccessObject::READ_NORMAL |
173 | ): ?ExistingPageRecord { |
174 | $conds = [ |
175 | 'page_namespace' => $namespace, |
176 | 'page_title' => $dbKey, |
177 | ]; |
178 | |
179 | if ( $queryFlags === IDBAccessObject::READ_NORMAL && $this->linkCache->isBadLink( $conds ) ) { |
180 | $this->incrementLinkCacheHitOrMiss( 'hit', 'bad_early' ); |
181 | return null; |
182 | } |
183 | |
184 | $caller = __METHOD__; |
185 | $hitOrMiss = 'hit'; |
186 | |
187 | // Try to get the row from LinkCache, providing a callback to fetch it if it's not cached. |
188 | // When getGoodLinkRow() returns, LinkCache should have an entry for the row, good or bad. |
189 | $row = $this->linkCache->getGoodLinkRow( |
190 | $namespace, |
191 | $dbKey, |
192 | function ( IReadableDatabase $dbr, $ns, $dbkey, array $options ) |
193 | use ( $conds, $caller, &$hitOrMiss ) |
194 | { |
195 | $hitOrMiss = 'miss'; |
196 | $row = $this->newSelectQueryBuilder( $dbr ) |
197 | ->fields( $this->getSelectFields() ) |
198 | ->conds( $conds ) |
199 | ->options( $options ) |
200 | ->caller( $caller ) |
201 | ->fetchRow(); |
202 | |
203 | return $row; |
204 | }, |
205 | $queryFlags |
206 | ); |
207 | |
208 | if ( $row ) { |
209 | try { |
210 | // NOTE: LinkCache may not include namespace and title in the cached row, |
211 | // since it's already used as the cache key! |
212 | $row->page_namespace = $namespace; |
213 | $row->page_title = $dbKey; |
214 | $page = $this->newPageRecordFromRow( $row ); |
215 | |
216 | // We were able to use the row we got from link cache. |
217 | $this->incrementLinkCacheHitOrMiss( $hitOrMiss, 'good' ); |
218 | } catch ( InvalidArgumentException $e ) { |
219 | // The cached row was incomplete or corrupt, |
220 | // just keep going and load from the database. |
221 | $page = $this->loadPageFromConditions( $conds, $queryFlags ); |
222 | |
223 | if ( $page ) { |
224 | // PageSelectQueryBuilder should have added the full row to the LinkCache now. |
225 | $this->incrementLinkCacheHitOrMiss( $hitOrMiss, 'incomplete_loaded' ); |
226 | } else { |
227 | // If we get here, an incomplete row was cached, but we failed to |
228 | // load the full row from the database. This should only happen |
229 | // if the page was deleted under out feet, which should be very rare. |
230 | // Update the LinkCache to reflect the new situation. |
231 | $this->linkCache->addBadLinkObj( $conds ); |
232 | $this->incrementLinkCacheHitOrMiss( $hitOrMiss, 'incomplete_missing' ); |
233 | } |
234 | } |
235 | } else { |
236 | $this->incrementLinkCacheHitOrMiss( $hitOrMiss, 'bad_late' ); |
237 | $page = null; |
238 | } |
239 | |
240 | return $page; |
241 | } |
242 | |
243 | /** |
244 | * @since 1.37 |
245 | * |
246 | * @param string $text |
247 | * @param int $defaultNamespace Namespace to assume by default (usually NS_MAIN) |
248 | * @param int $queryFlags |
249 | * |
250 | * @return ProperPageIdentity|null |
251 | */ |
252 | public function getPageByText( |
253 | string $text, |
254 | int $defaultNamespace = NS_MAIN, |
255 | int $queryFlags = IDBAccessObject::READ_NORMAL |
256 | ): ?ProperPageIdentity { |
257 | try { |
258 | $title = $this->titleParser->parseTitle( $text, $defaultNamespace ); |
259 | return $this->getPageForLink( $title, $queryFlags ); |
260 | } catch ( MalformedTitleException | InvalidArgumentException $e ) { |
261 | // Note that even some well-formed links are still invalid parameters |
262 | // for getPageForLink(), e.g. interwiki links or special pages. |
263 | return null; |
264 | } |
265 | } |
266 | |
267 | /** |
268 | * @since 1.37 |
269 | * |
270 | * @param string $text |
271 | * @param int $defaultNamespace Namespace to assume by default (usually NS_MAIN) |
272 | * @param int $queryFlags |
273 | * |
274 | * @return ExistingPageRecord|null |
275 | */ |
276 | public function getExistingPageByText( |
277 | string $text, |
278 | int $defaultNamespace = NS_MAIN, |
279 | int $queryFlags = IDBAccessObject::READ_NORMAL |
280 | ): ?ExistingPageRecord { |
281 | $pageIdentity = $this->getPageByText( $text, $defaultNamespace, $queryFlags ); |
282 | if ( !$pageIdentity ) { |
283 | return null; |
284 | } |
285 | return $this->getPageByReference( $pageIdentity, $queryFlags ); |
286 | } |
287 | |
288 | /** |
289 | * @param int $pageId |
290 | * @param int $queryFlags |
291 | * |
292 | * @return ExistingPageRecord|null |
293 | */ |
294 | public function getPageById( |
295 | int $pageId, |
296 | int $queryFlags = IDBAccessObject::READ_NORMAL |
297 | ): ?ExistingPageRecord { |
298 | Assert::parameter( $pageId > 0, '$pageId', 'must be greater than zero' ); |
299 | |
300 | $conds = [ |
301 | 'page_id' => $pageId, |
302 | ]; |
303 | |
304 | // XXX: no caching needed? |
305 | |
306 | return $this->loadPageFromConditions( $conds, $queryFlags ); |
307 | } |
308 | |
309 | /** |
310 | * @param PageReference $page |
311 | * @param int $queryFlags |
312 | * |
313 | * @return ExistingPageRecord|null The page's PageRecord, or null if the page was not found. |
314 | */ |
315 | public function getPageByReference( |
316 | PageReference $page, |
317 | int $queryFlags = IDBAccessObject::READ_NORMAL |
318 | ): ?ExistingPageRecord { |
319 | $page->assertWiki( $this->wikiId ); |
320 | Assert::parameter( $page->getNamespace() >= 0, '$page', 'namespace must not be virtual' ); |
321 | |
322 | if ( $page instanceof ExistingPageRecord && $queryFlags === IDBAccessObject::READ_NORMAL ) { |
323 | return $page; |
324 | } |
325 | if ( $page instanceof PageIdentity ) { |
326 | Assert::parameter( $page->canExist(), '$page', 'Must be a proper page' ); |
327 | } |
328 | return $this->getPageByName( $page->getNamespace(), $page->getDBkey(), $queryFlags ); |
329 | } |
330 | |
331 | /** |
332 | * @param array $conds |
333 | * @param int $queryFlags |
334 | * |
335 | * @return ExistingPageRecord|null |
336 | */ |
337 | private function loadPageFromConditions( |
338 | array $conds, |
339 | int $queryFlags = IDBAccessObject::READ_NORMAL |
340 | ): ?ExistingPageRecord { |
341 | $queryBuilder = $this->newSelectQueryBuilder( $queryFlags ) |
342 | ->conds( $conds ) |
343 | ->caller( __METHOD__ ); |
344 | |
345 | // @phan-suppress-next-line PhanTypeMismatchReturnSuperType |
346 | return $queryBuilder->fetchPageRecord(); |
347 | } |
348 | |
349 | /** |
350 | * @internal |
351 | * |
352 | * @param stdClass $row |
353 | * |
354 | * @return ExistingPageRecord |
355 | */ |
356 | public function newPageRecordFromRow( stdClass $row ): ExistingPageRecord { |
357 | return new PageStoreRecord( |
358 | $row, |
359 | $this->wikiId |
360 | ); |
361 | } |
362 | |
363 | /** |
364 | * @internal |
365 | * |
366 | * @return string[] |
367 | */ |
368 | public function getSelectFields(): array { |
369 | $fields = [ |
370 | 'page_id', |
371 | 'page_namespace', |
372 | 'page_title', |
373 | 'page_is_redirect', |
374 | 'page_is_new', |
375 | 'page_touched', |
376 | 'page_links_updated', |
377 | 'page_latest', |
378 | 'page_len', |
379 | 'page_content_model' |
380 | ]; |
381 | |
382 | if ( $this->options->get( MainConfigNames::PageLanguageUseDB ) ) { |
383 | $fields[] = 'page_lang'; |
384 | } |
385 | |
386 | // Since we are putting rows into LinkCache, we need to include all fields |
387 | // that LinkCache needs. |
388 | $fields = array_unique( |
389 | array_merge( $fields, LinkCache::getSelectFields() ) |
390 | ); |
391 | |
392 | return $fields; |
393 | } |
394 | |
395 | /** |
396 | * @param IReadableDatabase|int $dbOrFlags The database connection to use, or a READ_XXX constant |
397 | * indicating what kind of database connection to use. |
398 | * |
399 | * @return PageSelectQueryBuilder |
400 | */ |
401 | public function newSelectQueryBuilder( $dbOrFlags = IDBAccessObject::READ_NORMAL ): PageSelectQueryBuilder { |
402 | if ( $dbOrFlags instanceof IReadableDatabase ) { |
403 | $db = $dbOrFlags; |
404 | $flags = IDBAccessObject::READ_NORMAL; |
405 | } else { |
406 | if ( ( $dbOrFlags & IDBAccessObject::READ_LATEST ) == IDBAccessObject::READ_LATEST ) { |
407 | $db = $this->dbLoadBalancer->getConnection( DB_PRIMARY, [], $this->wikiId ); |
408 | } else { |
409 | $db = $this->dbLoadBalancer->getConnection( DB_REPLICA, [], $this->wikiId ); |
410 | } |
411 | $flags = $dbOrFlags; |
412 | } |
413 | |
414 | $queryBuilder = new PageSelectQueryBuilder( $db, $this, $this->linkCache ); |
415 | $queryBuilder->recency( $flags ); |
416 | |
417 | return $queryBuilder; |
418 | } |
419 | |
420 | /** |
421 | * Get all subpages of this page. |
422 | * Will return an empty list of the namespace doesn't support subpages. |
423 | * |
424 | * @param PageIdentity $page |
425 | * @param int $limit Maximum number of subpages to fetch |
426 | * |
427 | * @return Iterator<ExistingPageRecord> |
428 | */ |
429 | public function getSubpages( PageIdentity $page, int $limit ): Iterator { |
430 | if ( !$this->namespaceInfo->hasSubpages( $page->getNamespace() ) ) { |
431 | return new EmptyIterator(); |
432 | } |
433 | |
434 | return $this->newSelectQueryBuilder() |
435 | ->whereTitlePrefix( $page->getNamespace(), $page->getDBkey() . '/' ) |
436 | ->orderByTitle() |
437 | ->limit( $limit ) |
438 | ->caller( __METHOD__ ) |
439 | ->fetchPageRecords(); |
440 | } |
441 | |
442 | } |