Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
| Total | |
90.20% |
414 / 459 |
|
46.15% |
6 / 13 |
CRAP | |
0.00% |
0 / 1 |
| ThreadItemStore | |
90.20% |
414 / 459 |
|
46.15% |
6 / 13 |
76.88 | |
0.00% |
0 / 1 |
| __construct | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
| isDisabled | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
| findNewestRevisionsByName | |
94.74% |
18 / 19 |
|
0.00% |
0 / 1 |
5.00 | |||
| findNewestRevisionsById | |
95.65% |
22 / 23 |
|
0.00% |
0 / 1 |
5 | |||
| findNewestRevisionsByHeading | |
91.04% |
61 / 67 |
|
0.00% |
0 / 1 |
6.03 | |||
| findNewestRevisionsByQuery | |
100.00% |
17 / 17 |
|
100.00% |
1 / 1 |
5 | |||
| fetchItemsResultSet | |
100.00% |
32 / 32 |
|
100.00% |
1 / 1 |
1 | |||
| fetchRevisionAndPageForItems | |
100.00% |
12 / 12 |
|
100.00% |
1 / 1 |
4 | |||
| getThreadItemFromRow | |
85.00% |
34 / 40 |
|
0.00% |
0 / 1 |
11.41 | |||
| findThreadItemsInCurrentRevision | |
0.00% |
0 / 16 |
|
0.00% |
0 / 1 |
20 | |||
| getIdsNamesBuilder | |
100.00% |
10 / 10 |
|
100.00% |
1 / 1 |
1 | |||
| findOrInsertId | |
38.89% |
7 / 18 |
|
0.00% |
0 / 1 |
7.65 | |||
| insertThreadItems | |
98.03% |
199 / 203 |
|
0.00% |
0 / 1 |
24 | |||
| 1 | <?php |
| 2 | |
| 3 | namespace MediaWiki\Extension\DiscussionTools; |
| 4 | |
| 5 | use Exception; |
| 6 | use MediaWiki\Config\Config; |
| 7 | use MediaWiki\Config\ConfigFactory; |
| 8 | use MediaWiki\Extension\DiscussionTools\ThreadItem\CommentItem; |
| 9 | use MediaWiki\Extension\DiscussionTools\ThreadItem\DatabaseCommentItem; |
| 10 | use MediaWiki\Extension\DiscussionTools\ThreadItem\DatabaseHeadingItem; |
| 11 | use MediaWiki\Extension\DiscussionTools\ThreadItem\DatabaseThreadItem; |
| 12 | use MediaWiki\Extension\DiscussionTools\ThreadItem\HeadingItem; |
| 13 | use MediaWiki\Language\Language; |
| 14 | use MediaWiki\Page\PageStore; |
| 15 | use MediaWiki\Revision\RevisionRecord; |
| 16 | use MediaWiki\Revision\RevisionStore; |
| 17 | use MediaWiki\Title\TitleFormatter; |
| 18 | use MediaWiki\Title\TitleValue; |
| 19 | use MediaWiki\User\ActorStore; |
| 20 | use MediaWiki\Utils\MWTimestamp; |
| 21 | use stdClass; |
| 22 | use Wikimedia\NormalizedException\NormalizedException; |
| 23 | use Wikimedia\Rdbms\DBError; |
| 24 | use Wikimedia\Rdbms\IDatabase; |
| 25 | use Wikimedia\Rdbms\IExpression; |
| 26 | use Wikimedia\Rdbms\ILBFactory; |
| 27 | use Wikimedia\Rdbms\ILoadBalancer; |
| 28 | use Wikimedia\Rdbms\IReadableDatabase; |
| 29 | use Wikimedia\Rdbms\IResultWrapper; |
| 30 | use Wikimedia\Rdbms\LikeValue; |
| 31 | use Wikimedia\Rdbms\ReadOnlyMode; |
| 32 | use Wikimedia\Rdbms\SelectQueryBuilder; |
| 33 | use Wikimedia\Timestamp\TimestampException; |
| 34 | |
| 35 | /** |
| 36 | * Stores and fetches ThreadItemSets from the database. |
| 37 | */ |
| 38 | class ThreadItemStore { |
| 39 | |
| 40 | private readonly Config $config; |
| 41 | |
| 42 | public function __construct( |
| 43 | ConfigFactory $configFactory, |
| 44 | private readonly ILBFactory $dbProvider, |
| 45 | private readonly ReadOnlyMode $readOnlyMode, |
| 46 | private readonly PageStore $pageStore, |
| 47 | private readonly RevisionStore $revStore, |
| 48 | private readonly TitleFormatter $titleFormatter, |
| 49 | private readonly ActorStore $actorStore, |
| 50 | private readonly Language $language, |
| 51 | ) { |
| 52 | $this->config = $configFactory->makeConfig( 'discussiontools' ); |
| 53 | } |
| 54 | |
| 55 | /** |
| 56 | * Returns true if the tables necessary for this feature haven't been created yet, |
| 57 | * to allow failing softly in that case. |
| 58 | */ |
| 59 | public function isDisabled(): bool { |
| 60 | return false; |
| 61 | } |
| 62 | |
| 63 | /** |
| 64 | * Find the thread items with the given name in the newest revision of every page in which they |
| 65 | * have appeared. |
| 66 | * |
| 67 | * @param string|string[] $itemName |
| 68 | * @param int|null $limit |
| 69 | * @return DatabaseThreadItem[] |
| 70 | */ |
| 71 | public function findNewestRevisionsByName( $itemName, ?int $limit = 50 ): array { |
| 72 | if ( $this->isDisabled() ) { |
| 73 | return []; |
| 74 | } |
| 75 | |
| 76 | $dbr = $this->dbProvider->getReplicaDatabase(); |
| 77 | $queryBuilder = $this->getIdsNamesBuilder() |
| 78 | ->caller( __METHOD__ ) |
| 79 | ->where( [ |
| 80 | 'it_itemname' => $itemName, |
| 81 | // Disallow querying for headings of sections that contain no comments. |
| 82 | // They all share the same name, so this would return a huge useless list on most wikis. |
| 83 | // (But we still store them, as we might need this data elsewhere.) |
| 84 | $dbr->expr( 'it_itemname', '!=', 'h-' ), |
| 85 | ] ); |
| 86 | |
| 87 | if ( $limit !== null ) { |
| 88 | $queryBuilder->limit( $limit ); |
| 89 | } |
| 90 | |
| 91 | $result = $this->fetchItemsResultSet( $queryBuilder ); |
| 92 | $revs = $this->fetchRevisionAndPageForItems( $result ); |
| 93 | |
| 94 | $threadItems = []; |
| 95 | foreach ( $result as $row ) { |
| 96 | $threadItem = $this->getThreadItemFromRow( $row, null, $revs ); |
| 97 | if ( $threadItem ) { |
| 98 | $threadItems[] = $threadItem; |
| 99 | } |
| 100 | } |
| 101 | return $threadItems; |
| 102 | } |
| 103 | |
| 104 | /** |
| 105 | * Find the thread items with the given ID in the newest revision of every page in which they have |
| 106 | * appeared. |
| 107 | * |
| 108 | * @param string|string[] $itemId |
| 109 | * @param int|null $limit |
| 110 | * @return DatabaseThreadItem[] |
| 111 | */ |
| 112 | public function findNewestRevisionsById( $itemId, ?int $limit = 50 ): array { |
| 113 | if ( $this->isDisabled() ) { |
| 114 | return []; |
| 115 | } |
| 116 | |
| 117 | $queryBuilder = $this->getIdsNamesBuilder() |
| 118 | ->caller( __METHOD__ ); |
| 119 | |
| 120 | // First find the name associated with the ID; then find by name. Otherwise we wouldn't find the |
| 121 | // latest revision in case comment ID changed, e.g. the comment was moved elsewhere on the page. |
| 122 | $itemNameQueryBuilder = $this->getIdsNamesBuilder() |
| 123 | ->where( [ 'itid_itemid' => $itemId ] ) |
| 124 | ->field( 'it_itemname' ); |
| 125 | // I think there may be more than 1 only in case of headings? |
| 126 | // For comments, any ID corresponds to just 1 name. |
| 127 | // Not sure how bad it is to not have limit( 1 ) here? |
| 128 | // It might scan a bunch of rows... |
| 129 | // ->limit( 1 ); |
| 130 | |
| 131 | $dbr = $this->dbProvider->getReplicaDatabase(); |
| 132 | $queryBuilder |
| 133 | ->where( [ |
| 134 | 'it_itemname IN (' . $itemNameQueryBuilder->getSQL() . ')', |
| 135 | $dbr->expr( 'it_itemname', '!=', 'h-' ), |
| 136 | ] ); |
| 137 | |
| 138 | if ( $limit !== null ) { |
| 139 | $queryBuilder->limit( $limit ); |
| 140 | } |
| 141 | |
| 142 | $result = $this->fetchItemsResultSet( $queryBuilder ); |
| 143 | $revs = $this->fetchRevisionAndPageForItems( $result ); |
| 144 | |
| 145 | $threadItems = []; |
| 146 | foreach ( $result as $row ) { |
| 147 | $threadItem = $this->getThreadItemFromRow( $row, null, $revs ); |
| 148 | if ( $threadItem ) { |
| 149 | $threadItems[] = $threadItem; |
| 150 | } |
| 151 | } |
| 152 | return $threadItems; |
| 153 | } |
| 154 | |
| 155 | /** |
| 156 | * Find heading items matching some text which: |
| 157 | * |
| 158 | * 1. appeared at some point in the history of the targetpage, or if this returns no results: |
| 159 | * 2. currently appear on a subpage of the target page, or if this returns no results: |
| 160 | * 3. currently appears on any page, but only if it is a unique match |
| 161 | * |
| 162 | * @param string|string[] $heading Heading text to match |
| 163 | * @param int $articleId Article ID of the target page |
| 164 | * @param TitleValue $title Title of the target page |
| 165 | * @param int|null $limit |
| 166 | * @return DatabaseThreadItem[] |
| 167 | */ |
| 168 | public function findNewestRevisionsByHeading( |
| 169 | $heading, int $articleId, TitleValue $title, ?int $limit = 50 |
| 170 | ): array { |
| 171 | if ( $this->isDisabled() ) { |
| 172 | return []; |
| 173 | } |
| 174 | |
| 175 | // Mirrors CommentParser::truncateForId |
| 176 | $heading = trim( $this->language->truncateForDatabase( $heading, 80, '' ), '_' ); |
| 177 | |
| 178 | $dbw = $this->dbProvider->getPrimaryDatabase(); |
| 179 | |
| 180 | // 1. Try to find items which have appeared on the page at some point |
| 181 | // in its history. |
| 182 | $itemIdInPageHistoryQueryBuilder = $this->getIdsNamesBuilder() |
| 183 | ->caller( __METHOD__ . ' case 1' ) |
| 184 | ->join( 'revision', null, [ 'rev_id = itr_revision_id' ] ) |
| 185 | ->where( $dbw->expr( 'itid_itemid', IExpression::LIKE, new LikeValue( |
| 186 | 'h-' . $heading . '-', |
| 187 | $dbw->anyString() |
| 188 | ) ) ) |
| 189 | // Has once appered on the specified page ID |
| 190 | ->where( [ 'rev_page' => $articleId ] ) |
| 191 | ->field( 'itid_itemid' ); |
| 192 | |
| 193 | $threadItems = $this->findNewestRevisionsByQuery( __METHOD__ . ' case 1', |
| 194 | $itemIdInPageHistoryQueryBuilder, $limit ); |
| 195 | |
| 196 | if ( count( $threadItems ) ) { |
| 197 | return $threadItems; |
| 198 | } |
| 199 | |
| 200 | // 2. If the thread item's database hasn't been back-filled with historical revisions |
| 201 | // then approach (1) may not work, instead look for matching headings the currently |
| 202 | // appear on subpages, which matches the archiving convention on most wikis. |
| 203 | $itemIdInSubPageQueryBuilder = $this->getIdsNamesBuilder() |
| 204 | ->caller( __METHOD__ . ' case 2' ) |
| 205 | ->join( 'page', null, [ 'page_id = itp_page_id' ] ) |
| 206 | ->where( $dbw->expr( 'itid_itemid', IExpression::LIKE, new LikeValue( |
| 207 | 'h-' . $heading . '-', |
| 208 | $dbw->anyString() |
| 209 | ) ) ) |
| 210 | ->where( $dbw->expr( 'page_title', IExpression::LIKE, new LikeValue( |
| 211 | $title->getText() . '/', |
| 212 | $dbw->anyString() |
| 213 | ) ) ) |
| 214 | ->where( [ 'page_namespace' => $title->getNamespace() ] ) |
| 215 | ->field( 'itid_itemid' ); |
| 216 | |
| 217 | $threadItems = $this->findNewestRevisionsByQuery( __METHOD__ . ' case 2', |
| 218 | $itemIdInSubPageQueryBuilder, $limit ); |
| 219 | |
| 220 | if ( count( $threadItems ) ) { |
| 221 | return $threadItems; |
| 222 | } |
| 223 | |
| 224 | // 3. Look for an "exact" match of the heading on any page. Because we are searching |
| 225 | // so broadly, only return if there is exactly one match to the heading name. |
| 226 | $itemIdInAnyPageQueryBuilder = $this->getIdsNamesBuilder() |
| 227 | ->caller( __METHOD__ . ' case 3' ) |
| 228 | ->join( 'page', null, [ 'page_id = itp_page_id', 'page_latest = itr_revision_id' ] ) |
| 229 | ->where( $dbw->expr( 'itid_itemid', IExpression::LIKE, new LikeValue( |
| 230 | 'h-' . $heading . '-', |
| 231 | $dbw->anyString() |
| 232 | ) ) ) |
| 233 | ->field( 'itid_itemid' ) |
| 234 | // We only care if there is one, or more than one result |
| 235 | ->limit( 2 ); |
| 236 | |
| 237 | // Check there is only one result in the sub-query |
| 238 | $itemIds = $itemIdInAnyPageQueryBuilder->fetchFieldValues(); |
| 239 | if ( count( $itemIds ) === 1 ) { |
| 240 | return $this->findNewestRevisionsByQuery( __METHOD__ . ' case 3', $itemIds[ 0 ] ); |
| 241 | } |
| 242 | |
| 243 | // 4. If there are no matches, check if the "talk" page has ever had any discussions |
| 244 | // on it (comments, not just headings). If not then throw an error instead of |
| 245 | // returning an empty list. This prevents the "topic could not be found" message |
| 246 | // from showing in the frontend. (T374598) |
| 247 | $anyItemsInPageHistoryQueryBuilder = $this->getIdsNamesBuilder() |
| 248 | ->caller( __METHOD__ . ' case 4' ) |
| 249 | ->join( 'revision', null, [ 'rev_id = itr_revision_id' ] ) |
| 250 | // Only comments, as non-talk headings are recorded |
| 251 | ->where( $dbw->expr( 'itid_itemid', IExpression::LIKE, new LikeValue( |
| 252 | 'c-', |
| 253 | $dbw->anyString() |
| 254 | ) ) ) |
| 255 | // On the specified page ID |
| 256 | ->where( [ 'rev_page' => $articleId ] ) |
| 257 | ->field( 'itid_itemid' ) |
| 258 | ->limit( 1 ); |
| 259 | |
| 260 | // Check there is only one result in the sub-query |
| 261 | $itemIds = $anyItemsInPageHistoryQueryBuilder->fetchFieldValues(); |
| 262 | if ( count( $itemIds ) === 0 ) { |
| 263 | throw new PageNeverHadThreadsException( |
| 264 | "Page {page} has never contained any discussions", |
| 265 | [ 'page' => $articleId ] |
| 266 | ); |
| 267 | } |
| 268 | |
| 269 | return []; |
| 270 | } |
| 271 | |
| 272 | /** |
| 273 | * @param string $fname |
| 274 | * @param SelectQueryBuilder|string $itemIdOrQueryBuilder Sub-query which returns item ID's, or an itemID |
| 275 | * @param int|null $limit |
| 276 | * @return DatabaseThreadItem[] |
| 277 | */ |
| 278 | private function findNewestRevisionsByQuery( $fname, $itemIdOrQueryBuilder, ?int $limit = 50 ): array { |
| 279 | $queryBuilder = $this->getIdsNamesBuilder()->caller( $fname . ' / ' . __METHOD__ ); |
| 280 | if ( $itemIdOrQueryBuilder instanceof SelectQueryBuilder ) { |
| 281 | $queryBuilder |
| 282 | ->where( [ |
| 283 | 'itid_itemid IN (' . $itemIdOrQueryBuilder->getSQL() . ')' |
| 284 | ] ); |
| 285 | } else { |
| 286 | $queryBuilder->where( [ 'itid_itemid' => $itemIdOrQueryBuilder ] ); |
| 287 | } |
| 288 | |
| 289 | if ( $limit !== null ) { |
| 290 | $queryBuilder->limit( $limit ); |
| 291 | } |
| 292 | |
| 293 | $result = $this->fetchItemsResultSet( $queryBuilder ); |
| 294 | $revs = $this->fetchRevisionAndPageForItems( $result ); |
| 295 | |
| 296 | $threadItems = []; |
| 297 | foreach ( $result as $row ) { |
| 298 | $threadItem = $this->getThreadItemFromRow( $row, null, $revs ); |
| 299 | if ( $threadItem ) { |
| 300 | $threadItems[] = $threadItem; |
| 301 | } |
| 302 | } |
| 303 | return $threadItems; |
| 304 | } |
| 305 | |
| 306 | private function fetchItemsResultSet( SelectQueryBuilder $queryBuilder ): IResultWrapper { |
| 307 | $queryBuilder |
| 308 | ->fields( [ |
| 309 | 'itr_id', |
| 310 | 'it_itemname', |
| 311 | 'it_timestamp', |
| 312 | 'it_actor', |
| 313 | 'itid_itemid', |
| 314 | 'itr_parent_id', |
| 315 | 'itr_transcludedfrom', |
| 316 | 'itr_level', |
| 317 | 'itr_headinglevel', |
| 318 | 'itr_revision_id', |
| 319 | ] ) |
| 320 | // PageStore fields for the transcluded-from page |
| 321 | ->leftJoin( 'page', null, [ 'page_id = itr_transcludedfrom' ] ) |
| 322 | ->fields( $this->pageStore->getSelectFields() ) |
| 323 | // ActorStore fields for the author |
| 324 | ->leftJoin( 'actor', null, [ 'actor_id = it_actor' ] ) |
| 325 | ->fields( [ 'actor_id', 'actor_name', 'actor_user' ] ) |
| 326 | // Parent item ID (the string, not just the primary key) |
| 327 | ->leftJoin( |
| 328 | $this->getIdsNamesBuilder() |
| 329 | ->caller( __METHOD__ ) |
| 330 | ->fields( [ |
| 331 | 'itr_parent__itr_id' => 'itr_id', |
| 332 | 'itr_parent__itid_itemid' => 'itid_itemid', |
| 333 | ] ), |
| 334 | null, |
| 335 | [ 'itr_parent_id = itr_parent__itr_id' ] |
| 336 | ) |
| 337 | ->field( 'itr_parent__itid_itemid' ); |
| 338 | |
| 339 | return $queryBuilder->fetchResultSet(); |
| 340 | } |
| 341 | |
| 342 | /** |
| 343 | * @return stdClass[] |
| 344 | */ |
| 345 | private function fetchRevisionAndPageForItems( IResultWrapper $result ): array { |
| 346 | // This could theoretically be done in the same query as fetchItemsResultSet(), |
| 347 | // but the resulting query would be two screens long |
| 348 | // and we'd have to alias a lot of fields to avoid conflicts. |
| 349 | $revs = []; |
| 350 | foreach ( $result as $row ) { |
| 351 | $revs[ $row->itr_revision_id ] = null; |
| 352 | } |
| 353 | $revQueryBuilder = $this->dbProvider->getReplicaDatabase()->newSelectQueryBuilder() |
| 354 | ->caller( __METHOD__ ) |
| 355 | ->queryInfo( $this->revStore->getQueryInfo( [ 'page' ] ) ) |
| 356 | ->fields( $this->pageStore->getSelectFields() ) |
| 357 | ->where( $revs ? [ 'rev_id' => array_keys( $revs ) ] : '0=1' ); |
| 358 | $revResult = $revQueryBuilder->fetchResultSet(); |
| 359 | foreach ( $revResult as $row ) { |
| 360 | $revs[ $row->rev_id ] = $row; |
| 361 | } |
| 362 | return $revs; |
| 363 | } |
| 364 | |
| 365 | private function getThreadItemFromRow( |
| 366 | stdClass $row, ?DatabaseThreadItemSet $set, array $revs |
| 367 | ): ?DatabaseThreadItem { |
| 368 | if ( $revs[ $row->itr_revision_id ] === null ) { |
| 369 | // We didn't find the 'revision' table row at all, this revision is deleted. |
| 370 | // (The page may or may not have other non-deleted revisions.) |
| 371 | // Pretend the thread item doesn't exist to avoid leaking data to users who shouldn't see it. |
| 372 | // TODO Allow privileged users to see it (we'd need to query from 'archive') |
| 373 | return null; |
| 374 | } |
| 375 | |
| 376 | $revRow = $revs[$row->itr_revision_id]; |
| 377 | $page = $this->pageStore->newPageRecordFromRow( $revRow ); |
| 378 | $rev = $this->revStore->newRevisionFromRow( $revRow ); |
| 379 | if ( $rev->isDeleted( RevisionRecord::DELETED_TEXT ) ) { |
| 380 | // This revision is revision-deleted. |
| 381 | // TODO Allow privileged users to see it |
| 382 | return null; |
| 383 | } |
| 384 | |
| 385 | if ( $set && $row->itr_parent__itid_itemid ) { |
| 386 | $parent = $set->findCommentById( $row->itr_parent__itid_itemid ); |
| 387 | } else { |
| 388 | $parent = null; |
| 389 | } |
| 390 | |
| 391 | if ( $row->itr_transcludedfrom && $row->page_id ) { |
| 392 | $transcludedFrom = $this->titleFormatter->getPrefixedText( |
| 393 | $this->pageStore->newPageRecordFromRow( $row ) ); |
| 394 | } else { |
| 395 | $transcludedFrom = $row->itr_transcludedfrom === '0'; |
| 396 | } |
| 397 | |
| 398 | if ( $row->it_timestamp !== null && $row->it_actor !== null ) { |
| 399 | $author = $this->actorStore->newActorFromRow( $row )->getName(); |
| 400 | |
| 401 | $item = new DatabaseCommentItem( |
| 402 | $page, |
| 403 | $rev, |
| 404 | $row->it_itemname, |
| 405 | $row->itid_itemid, |
| 406 | $parent, |
| 407 | $transcludedFrom, |
| 408 | (int)$row->itr_level, |
| 409 | $row->it_timestamp, |
| 410 | $author |
| 411 | ); |
| 412 | } else { |
| 413 | $item = new DatabaseHeadingItem( |
| 414 | $page, |
| 415 | $rev, |
| 416 | $row->it_itemname, |
| 417 | $row->itid_itemid, |
| 418 | $parent, |
| 419 | $transcludedFrom, |
| 420 | (int)$row->itr_level, |
| 421 | $row->itr_headinglevel === null ? null : (int)$row->itr_headinglevel |
| 422 | ); |
| 423 | } |
| 424 | |
| 425 | if ( $parent ) { |
| 426 | $parent->addReply( $item ); |
| 427 | } |
| 428 | return $item; |
| 429 | } |
| 430 | |
| 431 | /** |
| 432 | * Find the thread item set for the given revision, assuming that it is the current revision of |
| 433 | * its page. |
| 434 | */ |
| 435 | public function findThreadItemsInCurrentRevision( int $revId ): DatabaseThreadItemSet { |
| 436 | if ( $this->isDisabled() ) { |
| 437 | return new DatabaseThreadItemSet(); |
| 438 | } |
| 439 | |
| 440 | $queryBuilder = $this->getIdsNamesBuilder() |
| 441 | ->caller( __METHOD__ ) |
| 442 | ->where( [ 'itr_revision_id' => $revId ] ) |
| 443 | // We must process parents before their children in the loop later |
| 444 | ->orderBy( 'itr_id', SelectQueryBuilder::SORT_ASC ); |
| 445 | |
| 446 | $result = $this->fetchItemsResultSet( $queryBuilder ); |
| 447 | $revs = $this->fetchRevisionAndPageForItems( $result ); |
| 448 | |
| 449 | $set = new DatabaseThreadItemSet(); |
| 450 | foreach ( $result as $row ) { |
| 451 | $threadItem = $this->getThreadItemFromRow( $row, $set, $revs ); |
| 452 | if ( $threadItem ) { |
| 453 | $set->addThreadItem( $threadItem ); |
| 454 | $set->updateIdAndNameMaps( $threadItem ); |
| 455 | } |
| 456 | } |
| 457 | return $set; |
| 458 | } |
| 459 | |
| 460 | private function getIdsNamesBuilder(): SelectQueryBuilder { |
| 461 | $dbr = $this->dbProvider->getReplicaDatabase(); |
| 462 | |
| 463 | $queryBuilder = $dbr->newSelectQueryBuilder() |
| 464 | ->from( 'discussiontools_items' ) |
| 465 | ->join( 'discussiontools_item_pages', null, [ 'itp_items_id = it_id' ] ) |
| 466 | ->join( 'discussiontools_item_revisions', null, [ |
| 467 | 'itr_items_id = it_id', |
| 468 | // Only the latest revision of the items with each name |
| 469 | 'itr_revision_id = itp_newest_revision_id', |
| 470 | ] ) |
| 471 | ->join( 'discussiontools_item_ids', null, [ 'itid_id = itr_itemid_id' ] ); |
| 472 | |
| 473 | return $queryBuilder; |
| 474 | } |
| 475 | |
| 476 | /** |
| 477 | * @param callable $find Function that does a SELECT and returns primary key field |
| 478 | * @param callable $insert Function that does an INSERT IGNORE and returns last insert ID |
| 479 | * @param bool &$didInsert Set to true if the insert succeeds |
| 480 | * @param RevisionRecord $rev For error logging |
| 481 | * @return int Return value of whichever function succeeded |
| 482 | */ |
| 483 | private function findOrInsertId( |
| 484 | callable $find, callable $insert, bool &$didInsert, RevisionRecord $rev |
| 485 | ) { |
| 486 | $dbw = $this->dbProvider->getPrimaryDatabase(); |
| 487 | |
| 488 | $id = $find( $dbw ); |
| 489 | if ( !$id ) { |
| 490 | $id = $insert( $dbw ); |
| 491 | if ( $id ) { |
| 492 | $didInsert = true; |
| 493 | } else { |
| 494 | // Maybe it's there, but we can't see it due to REPEATABLE_READ? |
| 495 | // Try again in another connection. (T339882, T322701) |
| 496 | $dbwAnother = $this->dbProvider->getMainLB() |
| 497 | ->getConnection( DB_PRIMARY, [], false, ILoadBalancer::CONN_TRX_AUTOCOMMIT ); |
| 498 | $id = $find( $dbwAnother ); |
| 499 | if ( !$id ) { |
| 500 | throw new NormalizedException( |
| 501 | "Database can't find our row and won't let us insert it on page {page} revision {revision}", |
| 502 | [ |
| 503 | 'page' => $rev->getPageId(), |
| 504 | 'revision' => $rev->getId(), |
| 505 | ] |
| 506 | ); |
| 507 | } |
| 508 | } |
| 509 | } |
| 510 | return $id; |
| 511 | } |
| 512 | |
| 513 | /** |
| 514 | * Store the thread item set. |
| 515 | * |
| 516 | * @param RevisionRecord $rev |
| 517 | * @param ContentThreadItemSet $threadItemSet |
| 518 | * @throws TimestampException |
| 519 | * @throws DBError |
| 520 | * @throws Exception |
| 521 | */ |
| 522 | public function insertThreadItems( RevisionRecord $rev, ContentThreadItemSet $threadItemSet ): bool { |
| 523 | if ( $this->readOnlyMode->isReadOnly() ) { |
| 524 | return false; |
| 525 | } |
| 526 | |
| 527 | $dbw = $this->dbProvider->getPrimaryDatabase(); |
| 528 | $didInsert = false; |
| 529 | $method = __METHOD__; |
| 530 | |
| 531 | // Map of item IDs (strings) to their discussiontools_item_ids.itid_id field values (ints) |
| 532 | $itemIdsIds = []; |
| 533 | '@phan-var array<string,int> $itemIdsIds'; |
| 534 | // Map of item IDs (strings) to their discussiontools_items.it_id field values (ints) |
| 535 | $itemsIds = []; |
| 536 | '@phan-var array<string,int> $itemsIds'; |
| 537 | |
| 538 | // Insert or find discussiontools_item_ids rows, fill in itid_id field values. |
| 539 | // (This is not in a transaction. Orphaned rows in this table are harmlessly ignored, |
| 540 | // and long transactions caused performance issues on Wikimedia wikis: T315353#8218914.) |
| 541 | foreach ( $threadItemSet->getThreadItems() as $item ) { |
| 542 | $itemIdsId = $this->findOrInsertId( |
| 543 | static function ( IReadableDatabase $dbw ) use ( $item, $method ) { |
| 544 | $ids = [ $item->getId() ]; |
| 545 | if ( $item->getLegacyId() !== null ) { |
| 546 | // Avoid duplicates if the item exists under the legacy ID |
| 547 | // (i.e. with trailing underscores in the title part). |
| 548 | // The actual fixing of IDs is done by a maintenance script |
| 549 | // FixTrailingWhitespaceIds, as archived talk pages are unlikely |
| 550 | // to be edited again in the future. |
| 551 | // Once FixTrailingWhitespaceIds has run on and enough time has |
| 552 | // passed, we can remove all legacy ID code (again). |
| 553 | $ids[] = $item->getLegacyId(); |
| 554 | } |
| 555 | return $dbw->newSelectQueryBuilder() |
| 556 | ->from( 'discussiontools_item_ids' ) |
| 557 | ->field( 'itid_id' ) |
| 558 | ->where( [ 'itid_itemid' => $ids ] ) |
| 559 | ->caller( $method ) |
| 560 | ->fetchField(); |
| 561 | }, |
| 562 | static function ( IDatabase $dbw ) use ( $item, $method ) { |
| 563 | $dbw->newInsertQueryBuilder() |
| 564 | ->table( 'discussiontools_item_ids' ) |
| 565 | ->row( [ 'itid_itemid' => $item->getId() ] ) |
| 566 | ->ignore() |
| 567 | ->caller( $method ) |
| 568 | ->execute(); |
| 569 | return $dbw->affectedRows() ? $dbw->insertId() : null; |
| 570 | }, |
| 571 | $didInsert, |
| 572 | $rev |
| 573 | ); |
| 574 | $itemIdsIds[ $item->getId() ] = $itemIdsId; |
| 575 | } |
| 576 | |
| 577 | // Insert or find discussiontools_items rows, fill in it_id field values. |
| 578 | // (This is not in a transaction. Orphaned rows in this table are harmlessly ignored, |
| 579 | // and long transactions caused performance issues on Wikimedia wikis: T315353#8218914.) |
| 580 | foreach ( $threadItemSet->getThreadItems() as $item ) { |
| 581 | $itemsId = $this->findOrInsertId( |
| 582 | static function ( IReadableDatabase $dbw ) use ( $item, $method ) { |
| 583 | return $dbw->newSelectQueryBuilder() |
| 584 | ->from( 'discussiontools_items' ) |
| 585 | ->field( 'it_id' ) |
| 586 | ->where( [ 'it_itemname' => $item->getName() ] ) |
| 587 | ->caller( $method ) |
| 588 | ->fetchField(); |
| 589 | }, |
| 590 | function ( IDatabase $dbw ) use ( $item, $method ) { |
| 591 | $dbw->newInsertQueryBuilder() |
| 592 | ->table( 'discussiontools_items' ) |
| 593 | ->row( |
| 594 | [ |
| 595 | 'it_itemname' => $item->getName(), |
| 596 | ] + |
| 597 | ( $item instanceof CommentItem ? [ |
| 598 | 'it_timestamp' => |
| 599 | $dbw->timestamp( $item->getTimestampString() ), |
| 600 | 'it_actor' => |
| 601 | $this->actorStore->findActorIdByName( $item->getAuthor(), $dbw ), |
| 602 | ] : [] ) |
| 603 | ) |
| 604 | ->ignore() |
| 605 | ->caller( $method ) |
| 606 | ->execute(); |
| 607 | return $dbw->affectedRows() ? $dbw->insertId() : null; |
| 608 | }, |
| 609 | $didInsert, |
| 610 | $rev |
| 611 | ); |
| 612 | $itemsIds[ $item->getId() ] = $itemsId; |
| 613 | } |
| 614 | |
| 615 | // Insert or update discussiontools_item_pages and discussiontools_item_revisions rows. |
| 616 | // This IS in a transaction. We don't really want rows for different items on the same |
| 617 | // page to point to different revisions. |
| 618 | $dbw->doAtomicSection( $method, /** @throws TimestampException */ function ( IDatabase $dbw ) use ( |
| 619 | $method, $rev, $threadItemSet, $itemsIds, $itemIdsIds, &$didInsert |
| 620 | ) { |
| 621 | // Map of item IDs (strings) to their discussiontools_item_revisions.itr_id field values (ints) |
| 622 | $itemRevisionsIds = []; |
| 623 | '@phan-var array<string,int> $itemRevisionsIds'; |
| 624 | |
| 625 | $revUpdateRows = []; |
| 626 | // Insert or update discussiontools_item_pages rows. |
| 627 | foreach ( $threadItemSet->getThreadItems() as $item ) { |
| 628 | // Update (or insert) the references to oldest/newest item revision. |
| 629 | // The page revision we're processing is usually the newest one, but it doesn't have to be |
| 630 | // (in case of backfilling using the maintenance script, or in case of revisions being |
| 631 | // imported), so we need all these funky queries to see if we need to update oldest/newest. |
| 632 | |
| 633 | $itemPagesRow = $dbw->newSelectQueryBuilder() |
| 634 | ->from( 'discussiontools_item_pages' ) |
| 635 | ->join( 'revision', 'revision_oldest', [ 'itp_oldest_revision_id = revision_oldest.rev_id' ] ) |
| 636 | ->join( 'revision', 'revision_newest', [ 'itp_newest_revision_id = revision_newest.rev_id' ] ) |
| 637 | ->field( 'itp_id' ) |
| 638 | ->field( 'itp_oldest_revision_id' ) |
| 639 | ->field( 'itp_newest_revision_id' ) |
| 640 | ->field( 'revision_oldest.rev_timestamp', 'oldest_rev_timestamp' ) |
| 641 | ->field( 'revision_newest.rev_timestamp', 'newest_rev_timestamp' ) |
| 642 | ->where( [ |
| 643 | 'itp_items_id' => $itemsIds[ $item->getId() ], |
| 644 | 'itp_page_id' => $rev->getPageId(), |
| 645 | ] ) |
| 646 | ->caller( $method ) |
| 647 | ->fetchRow(); |
| 648 | if ( $itemPagesRow === false ) { |
| 649 | $dbw->newInsertQueryBuilder() |
| 650 | ->table( 'discussiontools_item_pages' ) |
| 651 | ->row( [ |
| 652 | 'itp_items_id' => $itemsIds[ $item->getId() ], |
| 653 | 'itp_page_id' => $rev->getPageId(), |
| 654 | 'itp_oldest_revision_id' => $rev->getId(), |
| 655 | 'itp_newest_revision_id' => $rev->getId(), |
| 656 | ] ) |
| 657 | ->ignore() |
| 658 | ->caller( $method ) |
| 659 | ->execute(); |
| 660 | } else { |
| 661 | $oldestTime = ( new MWTimestamp( $itemPagesRow->oldest_rev_timestamp ) )->getTimestamp( TS_MW ); |
| 662 | $newestTime = ( new MWTimestamp( $itemPagesRow->newest_rev_timestamp ) )->getTimestamp( TS_MW ); |
| 663 | $currentTime = $rev->getTimestamp(); |
| 664 | |
| 665 | $oldestId = (int)$itemPagesRow->itp_oldest_revision_id; |
| 666 | $newestId = (int)$itemPagesRow->itp_newest_revision_id; |
| 667 | $currentId = $rev->getId(); |
| 668 | |
| 669 | $updatePageField = null; |
| 670 | if ( [ $oldestTime, $oldestId ] > [ $currentTime, $currentId ] ) { |
| 671 | $updatePageField = 'itp_oldest_revision_id'; |
| 672 | } elseif ( [ $newestTime, $newestId ] < [ $currentTime, $currentId ] ) { |
| 673 | $updatePageField = 'itp_newest_revision_id'; |
| 674 | } |
| 675 | if ( $updatePageField ) { |
| 676 | $dbw->newUpdateQueryBuilder() |
| 677 | ->table( 'discussiontools_item_pages' ) |
| 678 | ->set( [ $updatePageField => $rev->getId() ] ) |
| 679 | ->where( [ 'itp_id' => $itemPagesRow->itp_id ] ) |
| 680 | ->caller( $method ) |
| 681 | ->execute(); |
| 682 | if ( $oldestId !== $newestId ) { |
| 683 | // This causes most rows in discussiontools_item_revisions referring to the previously |
| 684 | // oldest/newest revision to be unused, so try re-using them. |
| 685 | $revUpdateRows[ $itemsIds[ $item->getId() ] ] = $itemPagesRow->$updatePageField; |
| 686 | } |
| 687 | } |
| 688 | } |
| 689 | } |
| 690 | |
| 691 | // Insert or update discussiontools_item_revisions rows, fill in itr_id field values. |
| 692 | foreach ( $threadItemSet->getThreadItems() as $item ) { |
| 693 | $transcl = $item->getTranscludedFrom(); |
| 694 | $newOrUpdateRevRow = |
| 695 | [ |
| 696 | 'itr_itemid_id' => $itemIdsIds[ $item->getId() ], |
| 697 | 'itr_revision_id' => $rev->getId(), |
| 698 | 'itr_items_id' => $itemsIds[ $item->getId() ], |
| 699 | 'itr_parent_id' => |
| 700 | // This assumes that parent items were processed first |
| 701 | $item->getParent() ? $itemRevisionsIds[ $item->getParent()->getId() ] : null, |
| 702 | 'itr_transcludedfrom' => |
| 703 | $transcl === false ? null : ( |
| 704 | $transcl === true ? 0 : |
| 705 | $this->pageStore->getPageByText( $transcl )->getId() |
| 706 | ), |
| 707 | 'itr_level' => $item->getLevel(), |
| 708 | ] + |
| 709 | ( $item instanceof HeadingItem ? [ |
| 710 | 'itr_headinglevel' => $item->isPlaceholderHeading() ? null : $item->getHeadingLevel(), |
| 711 | ] : [] ); |
| 712 | |
| 713 | $itemRevisionsConds = [ |
| 714 | 'itr_itemid_id' => $itemIdsIds[ $item->getId() ], |
| 715 | 'itr_items_id' => $itemsIds[ $item->getId() ], |
| 716 | 'itr_revision_id' => $rev->getId(), |
| 717 | ]; |
| 718 | $itemRevisionsId = $dbw->newSelectQueryBuilder() |
| 719 | ->from( 'discussiontools_item_revisions' ) |
| 720 | ->field( 'itr_id' ) |
| 721 | ->where( $itemRevisionsConds ) |
| 722 | ->caller( $method ) |
| 723 | ->fetchField(); |
| 724 | if ( $itemRevisionsId === false ) { |
| 725 | $itemRevisionsUpdateId = null; |
| 726 | if ( isset( $revUpdateRows[ $itemsIds[ $item->getId() ] ] ) ) { |
| 727 | $itemRevisionsUpdateId = $dbw->newSelectQueryBuilder() |
| 728 | ->from( 'discussiontools_item_revisions' ) |
| 729 | ->field( 'itr_id' ) |
| 730 | ->where( [ |
| 731 | 'itr_revision_id' => $revUpdateRows[ $itemsIds[ $item->getId() ] ], |
| 732 | // We only keep up to 2 discussiontools_item_revisions rows with the same |
| 733 | // (itr_itemid_id, itr_items_id) pair, for the oldest and newest revision known. |
| 734 | // Here we find any rows we don't want to keep and re-use them. |
| 735 | 'itr_itemid_id' => $itemIdsIds[ $item->getId() ], |
| 736 | 'itr_items_id' => $itemsIds[ $item->getId() ], |
| 737 | ] ) |
| 738 | ->caller( $method ) |
| 739 | ->fetchField(); |
| 740 | // The row to re-use may not be found if it has a different itr_itemid_id than the row |
| 741 | // we want to add. |
| 742 | } |
| 743 | if ( $itemRevisionsUpdateId ) { |
| 744 | $dbw->newUpdateQueryBuilder() |
| 745 | ->table( 'discussiontools_item_revisions' ) |
| 746 | ->set( $newOrUpdateRevRow ) |
| 747 | ->where( [ 'itr_id' => $itemRevisionsUpdateId ] ) |
| 748 | ->caller( $method ) |
| 749 | ->execute(); |
| 750 | $itemRevisionsId = $itemRevisionsUpdateId; |
| 751 | $didInsert = true; |
| 752 | } else { |
| 753 | $itemRevisionsId = $this->findOrInsertId( |
| 754 | static function ( IReadableDatabase $dbw ) use ( $itemRevisionsConds, $method ) { |
| 755 | return $dbw->newSelectQueryBuilder() |
| 756 | ->from( 'discussiontools_item_revisions' ) |
| 757 | ->field( 'itr_id' ) |
| 758 | ->where( $itemRevisionsConds ) |
| 759 | ->caller( $method ) |
| 760 | ->fetchField(); |
| 761 | }, |
| 762 | static function ( IDatabase $dbw ) use ( $newOrUpdateRevRow, $method ) { |
| 763 | $dbw->newInsertQueryBuilder() |
| 764 | ->table( 'discussiontools_item_revisions' ) |
| 765 | ->row( $newOrUpdateRevRow ) |
| 766 | // Fix rows with corrupted itr_items_id=0, |
| 767 | // which are causing conflicts (T339882, T343859#9185559) |
| 768 | ->onDuplicateKeyUpdate() |
| 769 | ->uniqueIndexFields( [ 'itr_itemid_id', 'itr_revision_id' ] ) |
| 770 | // Omit redundant updates to avoid warnings (T353432) |
| 771 | ->set( array_diff_key( |
| 772 | $newOrUpdateRevRow, |
| 773 | [ 'itr_itemid_id' => true, 'itr_revision_id' => true ] |
| 774 | ) ) |
| 775 | ->caller( $method ) |
| 776 | ->execute(); |
| 777 | return $dbw->affectedRows() ? $dbw->insertId() : null; |
| 778 | }, |
| 779 | $didInsert, |
| 780 | $rev |
| 781 | ); |
| 782 | } |
| 783 | } |
| 784 | |
| 785 | $itemRevisionsIds[ $item->getId() ] = $itemRevisionsId; |
| 786 | } |
| 787 | }, $dbw::ATOMIC_CANCELABLE ); |
| 788 | |
| 789 | return $didInsert; |
| 790 | } |
| 791 | } |