Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
| Total | |
57.97% |
80 / 138 |
|
71.43% |
10 / 14 |
CRAP | |
0.00% |
0 / 1 |
| PageStore | |
57.97% |
80 / 138 |
|
71.43% |
10 / 14 |
113.85 | |
0.00% |
0 / 1 |
| __construct | |
0.00% |
0 / 10 |
|
0.00% |
0 / 1 |
12 | |||
| incrementLinkCacheHitOrMiss | |
0.00% |
0 / 4 |
|
0.00% |
0 / 1 |
2 | |||
| getPageForLink | |
100.00% |
10 / 10 |
|
100.00% |
1 / 1 |
3 | |||
| getPageByName | |
100.00% |
10 / 10 |
|
100.00% |
1 / 1 |
2 | |||
| getPageByNameViaLinkCache | |
0.00% |
0 / 40 |
|
0.00% |
0 / 1 |
42 | |||
| getPageByText | |
100.00% |
4 / 4 |
|
100.00% |
1 / 1 |
2 | |||
| getExistingPageByText | |
100.00% |
4 / 4 |
|
100.00% |
1 / 1 |
2 | |||
| getPageById | |
100.00% |
5 / 5 |
|
100.00% |
1 / 1 |
1 | |||
| getPageByReference | |
100.00% |
7 / 7 |
|
100.00% |
1 / 1 |
4 | |||
| loadPageFromConditions | |
0.00% |
0 / 4 |
|
0.00% |
0 / 1 |
2 | |||
| newPageRecordFromRow | |
100.00% |
4 / 4 |
|
100.00% |
1 / 1 |
1 | |||
| getSelectFields | |
100.00% |
18 / 18 |
|
100.00% |
1 / 1 |
2 | |||
| newSelectQueryBuilder | |
100.00% |
10 / 10 |
|
100.00% |
1 / 1 |
3 | |||
| getSubpages | |
100.00% |
8 / 8 |
|
100.00% |
1 / 1 |
2 | |||
| 1 | <?php |
| 2 | |
| 3 | namespace MediaWiki\Page; |
| 4 | |
| 5 | use EmptyIterator; |
| 6 | use InvalidArgumentException; |
| 7 | use Iterator; |
| 8 | use MediaWiki\Config\ServiceOptions; |
| 9 | use MediaWiki\DAO\WikiAwareEntity; |
| 10 | use MediaWiki\MainConfigNames; |
| 11 | use MediaWiki\Title\MalformedTitleException; |
| 12 | use MediaWiki\Title\NamespaceInfo; |
| 13 | use MediaWiki\Title\TitleParser; |
| 14 | use stdClass; |
| 15 | use Wikimedia\Assert\Assert; |
| 16 | use Wikimedia\Parsoid\Core\LinkTarget as ParsoidLinkTarget; |
| 17 | use Wikimedia\Rdbms\IDBAccessObject; |
| 18 | use Wikimedia\Rdbms\ILoadBalancer; |
| 19 | use Wikimedia\Rdbms\IReadableDatabase; |
| 20 | use Wikimedia\Stats\StatsFactory; |
| 21 | |
| 22 | /** |
| 23 | * @since 1.36 |
| 24 | * @unstable |
| 25 | */ |
| 26 | class PageStore implements PageLookup { |
| 27 | |
| 28 | private ServiceOptions $options; |
| 29 | private ILoadBalancer $dbLoadBalancer; |
| 30 | private NamespaceInfo $namespaceInfo; |
| 31 | private TitleParser $titleParser; |
| 32 | private ?LinkCache $linkCache; |
| 33 | private StatsFactory $stats; |
| 34 | /** @var string|false */ |
| 35 | private $wikiId; |
| 36 | |
| 37 | /** |
| 38 | * @internal for use by service wiring |
| 39 | */ |
| 40 | public const CONSTRUCTOR_OPTIONS = [ |
| 41 | MainConfigNames::PageLanguageUseDB, |
| 42 | ]; |
| 43 | |
| 44 | /** |
| 45 | * @param ServiceOptions $options |
| 46 | * @param ILoadBalancer $dbLoadBalancer |
| 47 | * @param NamespaceInfo $namespaceInfo |
| 48 | * @param TitleParser $titleParser |
| 49 | * @param ?LinkCache $linkCache |
| 50 | * @param StatsFactory $stats |
| 51 | * @param false|string $wikiId |
| 52 | */ |
| 53 | public function __construct( |
| 54 | ServiceOptions $options, |
| 55 | ILoadBalancer $dbLoadBalancer, |
| 56 | NamespaceInfo $namespaceInfo, |
| 57 | TitleParser $titleParser, |
| 58 | ?LinkCache $linkCache, |
| 59 | StatsFactory $stats, |
| 60 | $wikiId = WikiAwareEntity::LOCAL |
| 61 | ) { |
| 62 | $options->assertRequiredOptions( self::CONSTRUCTOR_OPTIONS ); |
| 63 | |
| 64 | $this->options = $options; |
| 65 | $this->dbLoadBalancer = $dbLoadBalancer; |
| 66 | $this->namespaceInfo = $namespaceInfo; |
| 67 | $this->titleParser = $titleParser; |
| 68 | $this->wikiId = $wikiId; |
| 69 | $this->linkCache = $linkCache; |
| 70 | $this->stats = $stats; |
| 71 | |
| 72 | if ( $wikiId !== WikiAwareEntity::LOCAL && $linkCache ) { |
| 73 | // LinkCache currently doesn't support cross-wiki PageReferences. |
| 74 | // Once it does, this check can go away. At that point, LinkCache should |
| 75 | // probably also no longer be optional. |
| 76 | throw new InvalidArgumentException( "Can't use LinkCache with pages from $wikiId" ); |
| 77 | } |
| 78 | } |
| 79 | |
| 80 | /** |
| 81 | * Increment a cache hit or miss counter for LinkCache. |
| 82 | * Possible reason labels are: |
| 83 | * - `good`: The page was found in LinkCache and was complete. |
| 84 | * - `bad_early`: The page was known by LinkCache to not exist. |
| 85 | * - `bad_late`: The page was not found in LinkCache and did not exist. |
| 86 | * - `incomplete_loaded`: The page was found in LinkCache but was incomplete. |
| 87 | * - `incomplete_missing`: Incomplete page data was found in LinkCache, and the page did not exist. |
| 88 | * |
| 89 | * @param string $hitOrMiss 'hit' or 'miss' |
| 90 | * @param string $reason Well-known reason string |
| 91 | * @return void |
| 92 | */ |
| 93 | private function incrementLinkCacheHitOrMiss( $hitOrMiss, $reason ) { |
| 94 | $this->stats->getCounter( 'pagestore_linkcache_accesses_total' ) |
| 95 | ->setLabel( 'reason', $reason ) |
| 96 | ->setLabel( 'status', $hitOrMiss ) |
| 97 | ->increment(); |
| 98 | } |
| 99 | |
| 100 | /** |
| 101 | * @param ParsoidLinkTarget $link |
| 102 | * @param int $queryFlags |
| 103 | * |
| 104 | * @return ProperPageIdentity |
| 105 | */ |
| 106 | public function getPageForLink( |
| 107 | ParsoidLinkTarget $link, |
| 108 | int $queryFlags = IDBAccessObject::READ_NORMAL |
| 109 | ): ProperPageIdentity { |
| 110 | Assert::parameter( !$link->isExternal(), '$link', 'must not be external' ); |
| 111 | Assert::parameter( $link->getDBkey() !== '', '$link', 'must not be relative' ); |
| 112 | |
| 113 | $ns = $link->getNamespace(); |
| 114 | |
| 115 | // Map Media links to File namespace |
| 116 | if ( $ns === NS_MEDIA ) { |
| 117 | $ns = NS_FILE; |
| 118 | } |
| 119 | |
| 120 | Assert::parameter( $ns >= 0, '$link', 'namespace must not be virtual' ); |
| 121 | |
| 122 | $page = $this->getPageByName( $ns, $link->getDBkey(), $queryFlags ); |
| 123 | |
| 124 | if ( !$page ) { |
| 125 | $page = new PageIdentityValue( 0, $ns, $link->getDBkey(), $this->wikiId ); |
| 126 | } |
| 127 | |
| 128 | return $page; |
| 129 | } |
| 130 | |
| 131 | /** |
| 132 | * @param int $namespace |
| 133 | * @param string $dbKey |
| 134 | * @param int $queryFlags |
| 135 | * |
| 136 | * @return ExistingPageRecord|null |
| 137 | */ |
| 138 | public function getPageByName( |
| 139 | int $namespace, |
| 140 | string $dbKey, |
| 141 | int $queryFlags = IDBAccessObject::READ_NORMAL |
| 142 | ): ?ExistingPageRecord { |
| 143 | Assert::parameter( $dbKey !== '', '$dbKey', 'must not be empty' ); |
| 144 | Assert::parameter( !strpos( $dbKey, ' ' ), '$dbKey', 'must not contain spaces' ); |
| 145 | Assert::parameter( $namespace >= 0, '$namespace', 'must not be virtual' ); |
| 146 | |
| 147 | $conds = [ |
| 148 | 'page_namespace' => $namespace, |
| 149 | 'page_title' => $dbKey, |
| 150 | ]; |
| 151 | |
| 152 | if ( $this->linkCache ) { |
| 153 | return $this->getPageByNameViaLinkCache( $namespace, $dbKey, $queryFlags ); |
| 154 | } else { |
| 155 | return $this->loadPageFromConditions( $conds, $queryFlags ); |
| 156 | } |
| 157 | } |
| 158 | |
| 159 | /** |
| 160 | * @param int $namespace |
| 161 | * @param string $dbKey |
| 162 | * @param int $queryFlags |
| 163 | * |
| 164 | * @return ExistingPageRecord|null |
| 165 | */ |
| 166 | private function getPageByNameViaLinkCache( |
| 167 | int $namespace, |
| 168 | string $dbKey, |
| 169 | int $queryFlags = IDBAccessObject::READ_NORMAL |
| 170 | ): ?ExistingPageRecord { |
| 171 | $conds = [ |
| 172 | 'page_namespace' => $namespace, |
| 173 | 'page_title' => $dbKey, |
| 174 | ]; |
| 175 | |
| 176 | if ( $queryFlags === IDBAccessObject::READ_NORMAL && $this->linkCache->isBadLink( $conds ) ) { |
| 177 | $this->incrementLinkCacheHitOrMiss( 'hit', 'bad_early' ); |
| 178 | return null; |
| 179 | } |
| 180 | |
| 181 | $caller = __METHOD__; |
| 182 | $hitOrMiss = 'hit'; |
| 183 | |
| 184 | // Try to get the row from LinkCache, providing a callback to fetch it if it's not cached. |
| 185 | // When getGoodLinkRow() returns, LinkCache should have an entry for the row, good or bad. |
| 186 | $row = $this->linkCache->getGoodLinkRow( |
| 187 | $namespace, |
| 188 | $dbKey, |
| 189 | function ( IReadableDatabase $dbr, $ns, $dbkey, array $options ) |
| 190 | use ( $conds, $caller, &$hitOrMiss ) |
| 191 | { |
| 192 | $hitOrMiss = 'miss'; |
| 193 | $row = $this->newSelectQueryBuilder( $dbr ) |
| 194 | ->fields( $this->getSelectFields() ) |
| 195 | ->conds( $conds ) |
| 196 | ->options( $options ) |
| 197 | ->caller( $caller ) |
| 198 | ->fetchRow(); |
| 199 | |
| 200 | return $row; |
| 201 | }, |
| 202 | $queryFlags |
| 203 | ); |
| 204 | |
| 205 | if ( $row ) { |
| 206 | try { |
| 207 | // NOTE: LinkCache may not include namespace and title in the cached row, |
| 208 | // since it's already used as the cache key! |
| 209 | $row->page_namespace = $namespace; |
| 210 | $row->page_title = $dbKey; |
| 211 | $page = $this->newPageRecordFromRow( $row ); |
| 212 | |
| 213 | // We were able to use the row we got from link cache. |
| 214 | $this->incrementLinkCacheHitOrMiss( $hitOrMiss, 'good' ); |
| 215 | } catch ( InvalidArgumentException ) { |
| 216 | // The cached row was incomplete or corrupt, |
| 217 | // just keep going and load from the database. |
| 218 | $page = $this->loadPageFromConditions( $conds, $queryFlags ); |
| 219 | |
| 220 | if ( $page ) { |
| 221 | // PageSelectQueryBuilder should have added the full row to the LinkCache now. |
| 222 | $this->incrementLinkCacheHitOrMiss( $hitOrMiss, 'incomplete_loaded' ); |
| 223 | } else { |
| 224 | // If we get here, an incomplete row was cached, but we failed to |
| 225 | // load the full row from the database. This should only happen |
| 226 | // if the page was deleted under out feet, which should be very rare. |
| 227 | // Update the LinkCache to reflect the new situation. |
| 228 | $this->linkCache->addBadLinkObj( $conds ); |
| 229 | $this->incrementLinkCacheHitOrMiss( $hitOrMiss, 'incomplete_missing' ); |
| 230 | } |
| 231 | } |
| 232 | } else { |
| 233 | $this->incrementLinkCacheHitOrMiss( $hitOrMiss, 'bad_late' ); |
| 234 | $page = null; |
| 235 | } |
| 236 | |
| 237 | return $page; |
| 238 | } |
| 239 | |
| 240 | /** |
| 241 | * @since 1.37 |
| 242 | * |
| 243 | * @param string $text |
| 244 | * @param int $defaultNamespace Namespace to assume by default (usually NS_MAIN) |
| 245 | * @param int $queryFlags |
| 246 | * |
| 247 | * @return ProperPageIdentity|null |
| 248 | */ |
| 249 | public function getPageByText( |
| 250 | string $text, |
| 251 | int $defaultNamespace = NS_MAIN, |
| 252 | int $queryFlags = IDBAccessObject::READ_NORMAL |
| 253 | ): ?ProperPageIdentity { |
| 254 | try { |
| 255 | $title = $this->titleParser->parseTitle( $text, $defaultNamespace ); |
| 256 | return $this->getPageForLink( $title, $queryFlags ); |
| 257 | } catch ( MalformedTitleException | InvalidArgumentException ) { |
| 258 | // Note that even some well-formed links are still invalid parameters |
| 259 | // for getPageForLink(), e.g. interwiki links or special pages. |
| 260 | return null; |
| 261 | } |
| 262 | } |
| 263 | |
| 264 | /** |
| 265 | * @since 1.37 |
| 266 | * |
| 267 | * @param string $text |
| 268 | * @param int $defaultNamespace Namespace to assume by default (usually NS_MAIN) |
| 269 | * @param int $queryFlags |
| 270 | * |
| 271 | * @return ExistingPageRecord|null |
| 272 | */ |
| 273 | public function getExistingPageByText( |
| 274 | string $text, |
| 275 | int $defaultNamespace = NS_MAIN, |
| 276 | int $queryFlags = IDBAccessObject::READ_NORMAL |
| 277 | ): ?ExistingPageRecord { |
| 278 | $pageIdentity = $this->getPageByText( $text, $defaultNamespace, $queryFlags ); |
| 279 | if ( !$pageIdentity ) { |
| 280 | return null; |
| 281 | } |
| 282 | return $this->getPageByReference( $pageIdentity, $queryFlags ); |
| 283 | } |
| 284 | |
| 285 | /** |
| 286 | * @param int $pageId |
| 287 | * @param int $queryFlags |
| 288 | * |
| 289 | * @return ExistingPageRecord|null |
| 290 | */ |
| 291 | public function getPageById( |
| 292 | int $pageId, |
| 293 | int $queryFlags = IDBAccessObject::READ_NORMAL |
| 294 | ): ?ExistingPageRecord { |
| 295 | Assert::parameter( $pageId > 0, '$pageId', 'must be greater than zero' ); |
| 296 | |
| 297 | $conds = [ |
| 298 | 'page_id' => $pageId, |
| 299 | ]; |
| 300 | |
| 301 | // XXX: no caching needed? |
| 302 | |
| 303 | return $this->loadPageFromConditions( $conds, $queryFlags ); |
| 304 | } |
| 305 | |
| 306 | /** |
| 307 | * @param PageReference $page |
| 308 | * @param int $queryFlags |
| 309 | * |
| 310 | * @return ExistingPageRecord|null The page's PageRecord, or null if the page was not found. |
| 311 | */ |
| 312 | public function getPageByReference( |
| 313 | PageReference $page, |
| 314 | int $queryFlags = IDBAccessObject::READ_NORMAL |
| 315 | ): ?ExistingPageRecord { |
| 316 | $page->assertWiki( $this->wikiId ); |
| 317 | Assert::parameter( $page->getNamespace() >= 0, '$page', 'namespace must not be virtual' ); |
| 318 | |
| 319 | if ( $page instanceof ExistingPageRecord && $queryFlags === IDBAccessObject::READ_NORMAL ) { |
| 320 | return $page; |
| 321 | } |
| 322 | if ( $page instanceof PageIdentity ) { |
| 323 | Assert::parameter( $page->canExist(), '$page', 'Must be a proper page' ); |
| 324 | } |
| 325 | return $this->getPageByName( $page->getNamespace(), $page->getDBkey(), $queryFlags ); |
| 326 | } |
| 327 | |
| 328 | /** |
| 329 | * @param array $conds |
| 330 | * @param int $queryFlags |
| 331 | * |
| 332 | * @return ExistingPageRecord|null |
| 333 | */ |
| 334 | private function loadPageFromConditions( |
| 335 | array $conds, |
| 336 | int $queryFlags = IDBAccessObject::READ_NORMAL |
| 337 | ): ?ExistingPageRecord { |
| 338 | $queryBuilder = $this->newSelectQueryBuilder( $queryFlags ) |
| 339 | ->conds( $conds ) |
| 340 | ->caller( __METHOD__ ); |
| 341 | |
| 342 | // @phan-suppress-next-line PhanTypeMismatchReturnSuperType |
| 343 | return $queryBuilder->fetchPageRecord(); |
| 344 | } |
| 345 | |
| 346 | /** |
| 347 | * @internal |
| 348 | * |
| 349 | * @param stdClass $row |
| 350 | * |
| 351 | * @return ExistingPageRecord |
| 352 | */ |
| 353 | public function newPageRecordFromRow( stdClass $row ): ExistingPageRecord { |
| 354 | return new PageStoreRecord( |
| 355 | $row, |
| 356 | $this->wikiId |
| 357 | ); |
| 358 | } |
| 359 | |
| 360 | /** |
| 361 | * @internal |
| 362 | * |
| 363 | * @return string[] |
| 364 | */ |
| 365 | public function getSelectFields(): array { |
| 366 | $fields = [ |
| 367 | 'page_id', |
| 368 | 'page_namespace', |
| 369 | 'page_title', |
| 370 | 'page_is_redirect', |
| 371 | 'page_is_new', |
| 372 | 'page_touched', |
| 373 | 'page_links_updated', |
| 374 | 'page_latest', |
| 375 | 'page_len', |
| 376 | 'page_content_model' |
| 377 | ]; |
| 378 | |
| 379 | if ( $this->options->get( MainConfigNames::PageLanguageUseDB ) ) { |
| 380 | $fields[] = 'page_lang'; |
| 381 | } |
| 382 | |
| 383 | // Since we are putting rows into LinkCache, we need to include all fields |
| 384 | // that LinkCache needs. |
| 385 | $fields = array_unique( |
| 386 | array_merge( $fields, LinkCache::getSelectFields() ) |
| 387 | ); |
| 388 | |
| 389 | return $fields; |
| 390 | } |
| 391 | |
| 392 | /** |
| 393 | * @param IReadableDatabase|int $dbOrFlags The database connection to use, or a READ_XXX constant |
| 394 | * indicating what kind of database connection to use. |
| 395 | * |
| 396 | * @return PageSelectQueryBuilder |
| 397 | */ |
| 398 | public function newSelectQueryBuilder( $dbOrFlags = IDBAccessObject::READ_NORMAL ): PageSelectQueryBuilder { |
| 399 | if ( $dbOrFlags instanceof IReadableDatabase ) { |
| 400 | $db = $dbOrFlags; |
| 401 | $flags = IDBAccessObject::READ_NORMAL; |
| 402 | } else { |
| 403 | if ( ( $dbOrFlags & IDBAccessObject::READ_LATEST ) == IDBAccessObject::READ_LATEST ) { |
| 404 | $db = $this->dbLoadBalancer->getConnection( DB_PRIMARY, [], $this->wikiId ); |
| 405 | } else { |
| 406 | $db = $this->dbLoadBalancer->getConnection( DB_REPLICA, [], $this->wikiId ); |
| 407 | } |
| 408 | $flags = $dbOrFlags; |
| 409 | } |
| 410 | |
| 411 | $queryBuilder = new PageSelectQueryBuilder( $db, $this, $this->linkCache ); |
| 412 | $queryBuilder->recency( $flags ); |
| 413 | |
| 414 | return $queryBuilder; |
| 415 | } |
| 416 | |
| 417 | /** |
| 418 | * Get all subpages of this page. |
| 419 | * Will return an empty list of the namespace doesn't support subpages. |
| 420 | * |
| 421 | * @param PageIdentity $page |
| 422 | * @param int $limit Maximum number of subpages to fetch |
| 423 | * |
| 424 | * @return Iterator<ExistingPageRecord> |
| 425 | */ |
| 426 | public function getSubpages( PageIdentity $page, int $limit ): Iterator { |
| 427 | if ( !$this->namespaceInfo->hasSubpages( $page->getNamespace() ) ) { |
| 428 | return new EmptyIterator(); |
| 429 | } |
| 430 | |
| 431 | return $this->newSelectQueryBuilder() |
| 432 | ->whereTitlePrefix( $page->getNamespace(), $page->getDBkey() . '/' ) |
| 433 | ->orderByTitle() |
| 434 | ->limit( $limit ) |
| 435 | ->caller( __METHOD__ ) |
| 436 | ->fetchPageRecords(); |
| 437 | } |
| 438 | |
| 439 | } |