Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
| Total | |
91.11% |
410 / 450 |
|
58.33% |
7 / 12 |
CRAP | |
0.00% |
0 / 1 |
| ThreadItemStore | |
91.11% |
410 / 450 |
|
58.33% |
7 / 12 |
70.15 | |
0.00% |
0 / 1 |
| __construct | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
| findNewestRevisionsByName | |
100.00% |
17 / 17 |
|
100.00% |
1 / 1 |
4 | |||
| findNewestRevisionsById | |
100.00% |
21 / 21 |
|
100.00% |
1 / 1 |
4 | |||
| findNewestRevisionsByHeading | |
92.31% |
60 / 65 |
|
0.00% |
0 / 1 |
5.01 | |||
| findNewestRevisionsByQuery | |
100.00% |
17 / 17 |
|
100.00% |
1 / 1 |
5 | |||
| fetchItemsResultSet | |
100.00% |
32 / 32 |
|
100.00% |
1 / 1 |
1 | |||
| fetchRevisionAndPageForItems | |
100.00% |
12 / 12 |
|
100.00% |
1 / 1 |
4 | |||
| getThreadItemFromRow | |
85.00% |
34 / 40 |
|
0.00% |
0 / 1 |
11.41 | |||
| findThreadItemsInCurrentRevision | |
0.00% |
0 / 14 |
|
0.00% |
0 / 1 |
12 | |||
| getIdsNamesBuilder | |
100.00% |
10 / 10 |
|
100.00% |
1 / 1 |
1 | |||
| findOrInsertId | |
38.89% |
7 / 18 |
|
0.00% |
0 / 1 |
7.65 | |||
| insertThreadItems | |
98.03% |
199 / 203 |
|
0.00% |
0 / 1 |
24 | |||
| 1 | <?php |
| 2 | |
| 3 | namespace MediaWiki\Extension\DiscussionTools; |
| 4 | |
| 5 | use Exception; |
| 6 | use MediaWiki\Extension\DiscussionTools\ThreadItem\CommentItem; |
| 7 | use MediaWiki\Extension\DiscussionTools\ThreadItem\DatabaseCommentItem; |
| 8 | use MediaWiki\Extension\DiscussionTools\ThreadItem\DatabaseHeadingItem; |
| 9 | use MediaWiki\Extension\DiscussionTools\ThreadItem\DatabaseThreadItem; |
| 10 | use MediaWiki\Extension\DiscussionTools\ThreadItem\HeadingItem; |
| 11 | use MediaWiki\Language\Language; |
| 12 | use MediaWiki\Page\PageStore; |
| 13 | use MediaWiki\Revision\RevisionRecord; |
| 14 | use MediaWiki\Revision\RevisionStore; |
| 15 | use MediaWiki\Title\TitleFormatter; |
| 16 | use MediaWiki\Title\TitleValue; |
| 17 | use MediaWiki\User\ActorStore; |
| 18 | use MediaWiki\Utils\MWTimestamp; |
| 19 | use stdClass; |
| 20 | use Wikimedia\NormalizedException\NormalizedException; |
| 21 | use Wikimedia\Rdbms\DBError; |
| 22 | use Wikimedia\Rdbms\IDatabase; |
| 23 | use Wikimedia\Rdbms\IExpression; |
| 24 | use Wikimedia\Rdbms\ILBFactory; |
| 25 | use Wikimedia\Rdbms\ILoadBalancer; |
| 26 | use Wikimedia\Rdbms\IReadableDatabase; |
| 27 | use Wikimedia\Rdbms\IResultWrapper; |
| 28 | use Wikimedia\Rdbms\LikeValue; |
| 29 | use Wikimedia\Rdbms\ReadOnlyMode; |
| 30 | use Wikimedia\Rdbms\SelectQueryBuilder; |
| 31 | use Wikimedia\Timestamp\TimestampException; |
| 32 | |
| 33 | /** |
| 34 | * Stores and fetches ThreadItemSets from the database. |
| 35 | */ |
| 36 | class ThreadItemStore { |
| 37 | |
| 38 | public function __construct( |
| 39 | private readonly ILBFactory $dbProvider, |
| 40 | private readonly ReadOnlyMode $readOnlyMode, |
| 41 | private readonly PageStore $pageStore, |
| 42 | private readonly RevisionStore $revStore, |
| 43 | private readonly TitleFormatter $titleFormatter, |
| 44 | private readonly ActorStore $actorStore, |
| 45 | private readonly Language $language, |
| 46 | ) { |
| 47 | } |
| 48 | |
| 49 | /** |
| 50 | * Find the thread items with the given name in the newest revision of every page in which they |
| 51 | * have appeared. |
| 52 | * |
| 53 | * @param string|string[] $itemName |
| 54 | * @param int|null $limit |
| 55 | * @return DatabaseThreadItem[] |
| 56 | */ |
| 57 | public function findNewestRevisionsByName( $itemName, ?int $limit = 50 ): array { |
| 58 | $dbr = $this->dbProvider->getReplicaDatabase(); |
| 59 | $queryBuilder = $this->getIdsNamesBuilder() |
| 60 | ->caller( __METHOD__ ) |
| 61 | ->where( [ |
| 62 | 'it_itemname' => $itemName, |
| 63 | // Disallow querying for headings of sections that contain no comments. |
| 64 | // They all share the same name, so this would return a huge useless list on most wikis. |
| 65 | // (But we still store them, as we might need this data elsewhere.) |
| 66 | $dbr->expr( 'it_itemname', '!=', 'h-' ), |
| 67 | ] ); |
| 68 | |
| 69 | if ( $limit !== null ) { |
| 70 | $queryBuilder->limit( $limit ); |
| 71 | } |
| 72 | |
| 73 | $result = $this->fetchItemsResultSet( $queryBuilder ); |
| 74 | $revs = $this->fetchRevisionAndPageForItems( $result ); |
| 75 | |
| 76 | $threadItems = []; |
| 77 | foreach ( $result as $row ) { |
| 78 | $threadItem = $this->getThreadItemFromRow( $row, null, $revs ); |
| 79 | if ( $threadItem ) { |
| 80 | $threadItems[] = $threadItem; |
| 81 | } |
| 82 | } |
| 83 | return $threadItems; |
| 84 | } |
| 85 | |
| 86 | /** |
| 87 | * Find the thread items with the given ID in the newest revision of every page in which they have |
| 88 | * appeared. |
| 89 | * |
| 90 | * @param string|string[] $itemId |
| 91 | * @param int|null $limit |
| 92 | * @return DatabaseThreadItem[] |
| 93 | */ |
| 94 | public function findNewestRevisionsById( $itemId, ?int $limit = 50 ): array { |
| 95 | $queryBuilder = $this->getIdsNamesBuilder() |
| 96 | ->caller( __METHOD__ ); |
| 97 | |
| 98 | // First find the name associated with the ID; then find by name. Otherwise we wouldn't find the |
| 99 | // latest revision in case comment ID changed, e.g. the comment was moved elsewhere on the page. |
| 100 | $itemNameQueryBuilder = $this->getIdsNamesBuilder() |
| 101 | ->where( [ 'itid_itemid' => $itemId ] ) |
| 102 | ->field( 'it_itemname' ); |
| 103 | // I think there may be more than 1 only in case of headings? |
| 104 | // For comments, any ID corresponds to just 1 name. |
| 105 | // Not sure how bad it is to not have limit( 1 ) here? |
| 106 | // It might scan a bunch of rows... |
| 107 | // ->limit( 1 ); |
| 108 | |
| 109 | $dbr = $this->dbProvider->getReplicaDatabase(); |
| 110 | $queryBuilder |
| 111 | ->where( [ |
| 112 | 'it_itemname IN (' . $itemNameQueryBuilder->getSQL() . ')', |
| 113 | $dbr->expr( 'it_itemname', '!=', 'h-' ), |
| 114 | ] ); |
| 115 | |
| 116 | if ( $limit !== null ) { |
| 117 | $queryBuilder->limit( $limit ); |
| 118 | } |
| 119 | |
| 120 | $result = $this->fetchItemsResultSet( $queryBuilder ); |
| 121 | $revs = $this->fetchRevisionAndPageForItems( $result ); |
| 122 | |
| 123 | $threadItems = []; |
| 124 | foreach ( $result as $row ) { |
| 125 | $threadItem = $this->getThreadItemFromRow( $row, null, $revs ); |
| 126 | if ( $threadItem ) { |
| 127 | $threadItems[] = $threadItem; |
| 128 | } |
| 129 | } |
| 130 | return $threadItems; |
| 131 | } |
| 132 | |
| 133 | /** |
| 134 | * Find heading items matching some text which: |
| 135 | * |
| 136 | * 1. appeared at some point in the history of the targetpage, or if this returns no results: |
| 137 | * 2. currently appear on a subpage of the target page, or if this returns no results: |
| 138 | * 3. currently appears on any page, but only if it is a unique match |
| 139 | * |
| 140 | * @param string|string[] $heading Heading text to match |
| 141 | * @param int $articleId Article ID of the target page |
| 142 | * @param TitleValue $title Title of the target page |
| 143 | * @param int|null $limit |
| 144 | * @return DatabaseThreadItem[] |
| 145 | */ |
| 146 | public function findNewestRevisionsByHeading( |
| 147 | $heading, int $articleId, TitleValue $title, ?int $limit = 50 |
| 148 | ): array { |
| 149 | // Mirrors CommentParser::truncateForId |
| 150 | $heading = trim( $this->language->truncateForDatabase( $heading, 80, '' ), '_' ); |
| 151 | |
| 152 | $dbr = $this->dbProvider->getReplicaDatabase(); |
| 153 | |
| 154 | // 1. Try to find items which have appeared on the page at some point |
| 155 | // in its history. |
| 156 | $itemIdInPageHistoryQueryBuilder = $this->getIdsNamesBuilder() |
| 157 | ->caller( __METHOD__ . ' case 1' ) |
| 158 | ->join( 'revision', null, [ 'rev_id = itr_revision_id' ] ) |
| 159 | ->where( $dbr->expr( 'itid_itemid', IExpression::LIKE, new LikeValue( |
| 160 | 'h-' . $heading . '-', |
| 161 | $dbr->anyString() |
| 162 | ) ) ) |
| 163 | // Has once appeared on the specified page ID |
| 164 | ->where( [ 'rev_page' => $articleId ] ) |
| 165 | ->field( 'itid_itemid' ); |
| 166 | |
| 167 | $threadItems = $this->findNewestRevisionsByQuery( __METHOD__ . ' case 1', |
| 168 | $itemIdInPageHistoryQueryBuilder, $limit ); |
| 169 | |
| 170 | if ( count( $threadItems ) ) { |
| 171 | return $threadItems; |
| 172 | } |
| 173 | |
| 174 | // 2. If the thread item's database hasn't been back-filled with historical revisions |
| 175 | // then approach (1) may not work, instead look for matching headings the currently |
| 176 | // appear on subpages, which matches the archiving convention on most wikis. |
| 177 | $itemIdInSubPageQueryBuilder = $this->getIdsNamesBuilder() |
| 178 | ->caller( __METHOD__ . ' case 2' ) |
| 179 | ->join( 'page', null, [ 'page_id = itp_page_id' ] ) |
| 180 | ->where( $dbr->expr( 'itid_itemid', IExpression::LIKE, new LikeValue( |
| 181 | 'h-' . $heading . '-', |
| 182 | $dbr->anyString() |
| 183 | ) ) ) |
| 184 | ->where( $dbr->expr( 'page_title', IExpression::LIKE, new LikeValue( |
| 185 | $title->getText() . '/', |
| 186 | $dbr->anyString() |
| 187 | ) ) ) |
| 188 | ->where( [ 'page_namespace' => $title->getNamespace() ] ) |
| 189 | ->field( 'itid_itemid' ); |
| 190 | |
| 191 | $threadItems = $this->findNewestRevisionsByQuery( __METHOD__ . ' case 2', |
| 192 | $itemIdInSubPageQueryBuilder, $limit ); |
| 193 | |
| 194 | if ( count( $threadItems ) ) { |
| 195 | return $threadItems; |
| 196 | } |
| 197 | |
| 198 | // 3. Look for an "exact" match of the heading on any page. Because we are searching |
| 199 | // so broadly, only return if there is exactly one match to the heading name. |
| 200 | $itemIdInAnyPageQueryBuilder = $this->getIdsNamesBuilder() |
| 201 | ->caller( __METHOD__ . ' case 3' ) |
| 202 | ->join( 'page', null, [ 'page_id = itp_page_id', 'page_latest = itr_revision_id' ] ) |
| 203 | ->where( $dbr->expr( 'itid_itemid', IExpression::LIKE, new LikeValue( |
| 204 | 'h-' . $heading . '-', |
| 205 | $dbr->anyString() |
| 206 | ) ) ) |
| 207 | ->field( 'itid_itemid' ) |
| 208 | // We only care if there is one, or more than one result |
| 209 | ->limit( 2 ); |
| 210 | |
| 211 | // Check there is only one result in the sub-query |
| 212 | $itemIds = $itemIdInAnyPageQueryBuilder->fetchFieldValues(); |
| 213 | if ( count( $itemIds ) === 1 ) { |
| 214 | return $this->findNewestRevisionsByQuery( __METHOD__ . ' case 3', $itemIds[ 0 ] ); |
| 215 | } |
| 216 | |
| 217 | // 4. If there are no matches, check if the "talk" page has ever had any discussions |
| 218 | // on it (comments, not just headings). If not then throw an error instead of |
| 219 | // returning an empty list. This prevents the "topic could not be found" message |
| 220 | // from showing in the frontend. (T374598) |
| 221 | $anyItemsInPageHistoryQueryBuilder = $this->getIdsNamesBuilder() |
| 222 | ->caller( __METHOD__ . ' case 4' ) |
| 223 | ->join( 'revision', null, [ 'rev_id = itr_revision_id' ] ) |
| 224 | // Only comments, as non-talk headings are recorded |
| 225 | ->where( $dbr->expr( 'itid_itemid', IExpression::LIKE, new LikeValue( |
| 226 | 'c-', |
| 227 | $dbr->anyString() |
| 228 | ) ) ) |
| 229 | // On the specified page ID |
| 230 | ->where( [ 'rev_page' => $articleId ] ) |
| 231 | ->field( 'itid_itemid' ) |
| 232 | ->limit( 1 ); |
| 233 | |
| 234 | // Check there is only one result in the sub-query |
| 235 | $itemIds = $anyItemsInPageHistoryQueryBuilder->fetchFieldValues(); |
| 236 | if ( count( $itemIds ) === 0 ) { |
| 237 | throw new PageNeverHadThreadsException( |
| 238 | "Page {page} has never contained any discussions", |
| 239 | [ 'page' => $articleId ] |
| 240 | ); |
| 241 | } |
| 242 | |
| 243 | return []; |
| 244 | } |
| 245 | |
| 246 | /** |
| 247 | * @param string $fname |
| 248 | * @param SelectQueryBuilder|string $itemIdOrQueryBuilder Sub-query which returns item ID's, or an itemID |
| 249 | * @param int|null $limit |
| 250 | * @return DatabaseThreadItem[] |
| 251 | */ |
| 252 | private function findNewestRevisionsByQuery( $fname, $itemIdOrQueryBuilder, ?int $limit = 50 ): array { |
| 253 | $queryBuilder = $this->getIdsNamesBuilder()->caller( $fname . ' / ' . __METHOD__ ); |
| 254 | if ( $itemIdOrQueryBuilder instanceof SelectQueryBuilder ) { |
| 255 | $queryBuilder |
| 256 | ->where( [ |
| 257 | 'itid_itemid IN (' . $itemIdOrQueryBuilder->getSQL() . ')' |
| 258 | ] ); |
| 259 | } else { |
| 260 | $queryBuilder->where( [ 'itid_itemid' => $itemIdOrQueryBuilder ] ); |
| 261 | } |
| 262 | |
| 263 | if ( $limit !== null ) { |
| 264 | $queryBuilder->limit( $limit ); |
| 265 | } |
| 266 | |
| 267 | $result = $this->fetchItemsResultSet( $queryBuilder ); |
| 268 | $revs = $this->fetchRevisionAndPageForItems( $result ); |
| 269 | |
| 270 | $threadItems = []; |
| 271 | foreach ( $result as $row ) { |
| 272 | $threadItem = $this->getThreadItemFromRow( $row, null, $revs ); |
| 273 | if ( $threadItem ) { |
| 274 | $threadItems[] = $threadItem; |
| 275 | } |
| 276 | } |
| 277 | return $threadItems; |
| 278 | } |
| 279 | |
| 280 | private function fetchItemsResultSet( SelectQueryBuilder $queryBuilder ): IResultWrapper { |
| 281 | $queryBuilder |
| 282 | ->fields( [ |
| 283 | 'itr_id', |
| 284 | 'it_itemname', |
| 285 | 'it_timestamp', |
| 286 | 'it_actor', |
| 287 | 'itid_itemid', |
| 288 | 'itr_parent_id', |
| 289 | 'itr_transcludedfrom', |
| 290 | 'itr_level', |
| 291 | 'itr_headinglevel', |
| 292 | 'itr_revision_id', |
| 293 | ] ) |
| 294 | // PageStore fields for the transcluded-from page |
| 295 | ->leftJoin( 'page', null, [ 'page_id = itr_transcludedfrom' ] ) |
| 296 | ->fields( $this->pageStore->getSelectFields() ) |
| 297 | // ActorStore fields for the author |
| 298 | ->leftJoin( 'actor', null, [ 'actor_id = it_actor' ] ) |
| 299 | ->fields( [ 'actor_id', 'actor_name', 'actor_user' ] ) |
| 300 | // Parent item ID (the string, not just the primary key) |
| 301 | ->leftJoin( |
| 302 | $this->getIdsNamesBuilder() |
| 303 | ->caller( __METHOD__ ) |
| 304 | ->fields( [ |
| 305 | 'itr_parent__itr_id' => 'itr_id', |
| 306 | 'itr_parent__itid_itemid' => 'itid_itemid', |
| 307 | ] ), |
| 308 | null, |
| 309 | [ 'itr_parent_id = itr_parent__itr_id' ] |
| 310 | ) |
| 311 | ->field( 'itr_parent__itid_itemid' ); |
| 312 | |
| 313 | return $queryBuilder->fetchResultSet(); |
| 314 | } |
| 315 | |
| 316 | /** |
| 317 | * @return stdClass[] |
| 318 | */ |
| 319 | private function fetchRevisionAndPageForItems( IResultWrapper $result ): array { |
| 320 | // This could theoretically be done in the same query as fetchItemsResultSet(), |
| 321 | // but the resulting query would be two screens long |
| 322 | // and we'd have to alias a lot of fields to avoid conflicts. |
| 323 | $revs = []; |
| 324 | foreach ( $result as $row ) { |
| 325 | $revs[ $row->itr_revision_id ] = null; |
| 326 | } |
| 327 | $revQueryBuilder = $this->dbProvider->getReplicaDatabase()->newSelectQueryBuilder() |
| 328 | ->caller( __METHOD__ ) |
| 329 | ->queryInfo( $this->revStore->getQueryInfo( [ 'page' ] ) ) |
| 330 | ->fields( $this->pageStore->getSelectFields() ) |
| 331 | ->where( $revs ? [ 'rev_id' => array_keys( $revs ) ] : '0=1' ); |
| 332 | $revResult = $revQueryBuilder->fetchResultSet(); |
| 333 | foreach ( $revResult as $row ) { |
| 334 | $revs[ $row->rev_id ] = $row; |
| 335 | } |
| 336 | return $revs; |
| 337 | } |
| 338 | |
| 339 | private function getThreadItemFromRow( |
| 340 | stdClass $row, ?DatabaseThreadItemSet $set, array $revs |
| 341 | ): ?DatabaseThreadItem { |
| 342 | if ( $revs[ $row->itr_revision_id ] === null ) { |
| 343 | // We didn't find the 'revision' table row at all, this revision is deleted. |
| 344 | // (The page may or may not have other non-deleted revisions.) |
| 345 | // Pretend the thread item doesn't exist to avoid leaking data to users who shouldn't see it. |
| 346 | // TODO Allow privileged users to see it (we'd need to query from 'archive') |
| 347 | return null; |
| 348 | } |
| 349 | |
| 350 | $revRow = $revs[$row->itr_revision_id]; |
| 351 | $page = $this->pageStore->newPageRecordFromRow( $revRow ); |
| 352 | $rev = $this->revStore->newRevisionFromRow( $revRow ); |
| 353 | if ( $rev->isDeleted( RevisionRecord::DELETED_TEXT ) ) { |
| 354 | // This revision is revision-deleted. |
| 355 | // TODO Allow privileged users to see it |
| 356 | return null; |
| 357 | } |
| 358 | |
| 359 | if ( $set && $row->itr_parent__itid_itemid ) { |
| 360 | $parent = $set->findCommentById( $row->itr_parent__itid_itemid ); |
| 361 | } else { |
| 362 | $parent = null; |
| 363 | } |
| 364 | |
| 365 | if ( $row->itr_transcludedfrom && $row->page_id ) { |
| 366 | $transcludedFrom = $this->titleFormatter->getPrefixedText( |
| 367 | $this->pageStore->newPageRecordFromRow( $row ) ); |
| 368 | } else { |
| 369 | $transcludedFrom = $row->itr_transcludedfrom === '0'; |
| 370 | } |
| 371 | |
| 372 | if ( $row->it_timestamp !== null && $row->it_actor !== null ) { |
| 373 | $author = $this->actorStore->newActorFromRow( $row )->getName(); |
| 374 | |
| 375 | $item = new DatabaseCommentItem( |
| 376 | $page, |
| 377 | $rev, |
| 378 | $row->it_itemname, |
| 379 | $row->itid_itemid, |
| 380 | $parent, |
| 381 | $transcludedFrom, |
| 382 | (int)$row->itr_level, |
| 383 | $row->it_timestamp, |
| 384 | $author |
| 385 | ); |
| 386 | } else { |
| 387 | $item = new DatabaseHeadingItem( |
| 388 | $page, |
| 389 | $rev, |
| 390 | $row->it_itemname, |
| 391 | $row->itid_itemid, |
| 392 | $parent, |
| 393 | $transcludedFrom, |
| 394 | (int)$row->itr_level, |
| 395 | $row->itr_headinglevel === null ? null : (int)$row->itr_headinglevel |
| 396 | ); |
| 397 | } |
| 398 | |
| 399 | if ( $parent ) { |
| 400 | $parent->addReply( $item ); |
| 401 | } |
| 402 | return $item; |
| 403 | } |
| 404 | |
| 405 | /** |
| 406 | * Find the thread item set for the given revision, assuming that it is the current revision of |
| 407 | * its page. |
| 408 | */ |
| 409 | public function findThreadItemsInCurrentRevision( int $revId ): DatabaseThreadItemSet { |
| 410 | $queryBuilder = $this->getIdsNamesBuilder() |
| 411 | ->caller( __METHOD__ ) |
| 412 | ->where( [ 'itr_revision_id' => $revId ] ) |
| 413 | // We must process parents before their children in the loop later |
| 414 | ->orderBy( 'itr_id', SelectQueryBuilder::SORT_ASC ); |
| 415 | |
| 416 | $result = $this->fetchItemsResultSet( $queryBuilder ); |
| 417 | $revs = $this->fetchRevisionAndPageForItems( $result ); |
| 418 | |
| 419 | $set = new DatabaseThreadItemSet(); |
| 420 | foreach ( $result as $row ) { |
| 421 | $threadItem = $this->getThreadItemFromRow( $row, $set, $revs ); |
| 422 | if ( $threadItem ) { |
| 423 | $set->addThreadItem( $threadItem ); |
| 424 | $set->updateIdAndNameMaps( $threadItem ); |
| 425 | } |
| 426 | } |
| 427 | return $set; |
| 428 | } |
| 429 | |
| 430 | private function getIdsNamesBuilder(): SelectQueryBuilder { |
| 431 | $dbr = $this->dbProvider->getReplicaDatabase(); |
| 432 | |
| 433 | $queryBuilder = $dbr->newSelectQueryBuilder() |
| 434 | ->from( 'discussiontools_items' ) |
| 435 | ->join( 'discussiontools_item_pages', null, [ 'itp_items_id = it_id' ] ) |
| 436 | ->join( 'discussiontools_item_revisions', null, [ |
| 437 | 'itr_items_id = it_id', |
| 438 | // Only the latest revision of the items with each name |
| 439 | 'itr_revision_id = itp_newest_revision_id', |
| 440 | ] ) |
| 441 | ->join( 'discussiontools_item_ids', null, [ 'itid_id = itr_itemid_id' ] ); |
| 442 | |
| 443 | return $queryBuilder; |
| 444 | } |
| 445 | |
| 446 | /** |
| 447 | * @param callable $find Function that does a SELECT and returns primary key field |
| 448 | * @param callable $insert Function that does an INSERT IGNORE and returns last insert ID |
| 449 | * @param bool &$didInsert Set to true if the insert succeeds |
| 450 | * @param RevisionRecord $rev For error logging |
| 451 | * @return int Return value of whichever function succeeded |
| 452 | */ |
| 453 | private function findOrInsertId( |
| 454 | callable $find, callable $insert, bool &$didInsert, RevisionRecord $rev |
| 455 | ) { |
| 456 | $dbw = $this->dbProvider->getPrimaryDatabase(); |
| 457 | |
| 458 | $id = $find( $dbw ); |
| 459 | if ( !$id ) { |
| 460 | $id = $insert( $dbw ); |
| 461 | if ( $id ) { |
| 462 | $didInsert = true; |
| 463 | } else { |
| 464 | // Maybe it's there, but we can't see it due to REPEATABLE_READ? |
| 465 | // Try again in another connection. (T339882, T322701) |
| 466 | $dbwAnother = $this->dbProvider->getMainLB() |
| 467 | ->getConnection( DB_PRIMARY, [], false, ILoadBalancer::CONN_TRX_AUTOCOMMIT ); |
| 468 | $id = $find( $dbwAnother ); |
| 469 | if ( !$id ) { |
| 470 | throw new NormalizedException( |
| 471 | "Database can't find our row and won't let us insert it on page {page} revision {revision}", |
| 472 | [ |
| 473 | 'page' => $rev->getPageId(), |
| 474 | 'revision' => $rev->getId(), |
| 475 | ] |
| 476 | ); |
| 477 | } |
| 478 | } |
| 479 | } |
| 480 | return $id; |
| 481 | } |
| 482 | |
| 483 | /** |
| 484 | * Store the thread item set. |
| 485 | * |
| 486 | * @param RevisionRecord $rev |
| 487 | * @param ContentThreadItemSet $threadItemSet |
| 488 | * @throws TimestampException |
| 489 | * @throws DBError |
| 490 | * @throws Exception |
| 491 | */ |
| 492 | public function insertThreadItems( RevisionRecord $rev, ContentThreadItemSet $threadItemSet ): bool { |
| 493 | if ( $this->readOnlyMode->isReadOnly() ) { |
| 494 | return false; |
| 495 | } |
| 496 | |
| 497 | $dbw = $this->dbProvider->getPrimaryDatabase(); |
| 498 | $didInsert = false; |
| 499 | $method = __METHOD__; |
| 500 | |
| 501 | // Map of item IDs (strings) to their discussiontools_item_ids.itid_id field values (ints) |
| 502 | $itemIdsIds = []; |
| 503 | '@phan-var array<string,int> $itemIdsIds'; |
| 504 | // Map of item IDs (strings) to their discussiontools_items.it_id field values (ints) |
| 505 | $itemsIds = []; |
| 506 | '@phan-var array<string,int> $itemsIds'; |
| 507 | |
| 508 | // Insert or find discussiontools_item_ids rows, fill in itid_id field values. |
| 509 | // (This is not in a transaction. Orphaned rows in this table are harmlessly ignored, |
| 510 | // and long transactions caused performance issues on Wikimedia wikis: T315353#8218914.) |
| 511 | foreach ( $threadItemSet->getThreadItems() as $item ) { |
| 512 | $itemIdsId = $this->findOrInsertId( |
| 513 | static function ( IReadableDatabase $dbw ) use ( $item, $method ) { |
| 514 | $ids = [ $item->getId() ]; |
| 515 | if ( $item->getLegacyId() !== null ) { |
| 516 | // Avoid duplicates if the item exists under the legacy ID |
| 517 | // (i.e. with trailing underscores in the title part). |
| 518 | // The actual fixing of IDs is done by a maintenance script |
| 519 | // FixTrailingWhitespaceIds, as archived talk pages are unlikely |
| 520 | // to be edited again in the future. |
| 521 | // Once FixTrailingWhitespaceIds has run on and enough time has |
| 522 | // passed, we can remove all legacy ID code (again). |
| 523 | $ids[] = $item->getLegacyId(); |
| 524 | } |
| 525 | return $dbw->newSelectQueryBuilder() |
| 526 | ->from( 'discussiontools_item_ids' ) |
| 527 | ->field( 'itid_id' ) |
| 528 | ->where( [ 'itid_itemid' => $ids ] ) |
| 529 | ->caller( $method ) |
| 530 | ->fetchField(); |
| 531 | }, |
| 532 | static function ( IDatabase $dbw ) use ( $item, $method ) { |
| 533 | $dbw->newInsertQueryBuilder() |
| 534 | ->table( 'discussiontools_item_ids' ) |
| 535 | ->row( [ 'itid_itemid' => $item->getId() ] ) |
| 536 | ->ignore() |
| 537 | ->caller( $method ) |
| 538 | ->execute(); |
| 539 | return $dbw->affectedRows() ? $dbw->insertId() : null; |
| 540 | }, |
| 541 | $didInsert, |
| 542 | $rev |
| 543 | ); |
| 544 | $itemIdsIds[ $item->getId() ] = $itemIdsId; |
| 545 | } |
| 546 | |
| 547 | // Insert or find discussiontools_items rows, fill in it_id field values. |
| 548 | // (This is not in a transaction. Orphaned rows in this table are harmlessly ignored, |
| 549 | // and long transactions caused performance issues on Wikimedia wikis: T315353#8218914.) |
| 550 | foreach ( $threadItemSet->getThreadItems() as $item ) { |
| 551 | $itemsId = $this->findOrInsertId( |
| 552 | static function ( IReadableDatabase $dbw ) use ( $item, $method ) { |
| 553 | return $dbw->newSelectQueryBuilder() |
| 554 | ->from( 'discussiontools_items' ) |
| 555 | ->field( 'it_id' ) |
| 556 | ->where( [ 'it_itemname' => $item->getName() ] ) |
| 557 | ->caller( $method ) |
| 558 | ->fetchField(); |
| 559 | }, |
| 560 | function ( IDatabase $dbw ) use ( $item, $method ) { |
| 561 | $dbw->newInsertQueryBuilder() |
| 562 | ->table( 'discussiontools_items' ) |
| 563 | ->row( |
| 564 | [ |
| 565 | 'it_itemname' => $item->getName(), |
| 566 | ] + |
| 567 | ( $item instanceof CommentItem ? [ |
| 568 | 'it_timestamp' => |
| 569 | $dbw->timestamp( $item->getTimestampString() ), |
| 570 | 'it_actor' => |
| 571 | $this->actorStore->findActorIdByName( $item->getAuthor(), $dbw ), |
| 572 | ] : [] ) |
| 573 | ) |
| 574 | ->ignore() |
| 575 | ->caller( $method ) |
| 576 | ->execute(); |
| 577 | return $dbw->affectedRows() ? $dbw->insertId() : null; |
| 578 | }, |
| 579 | $didInsert, |
| 580 | $rev |
| 581 | ); |
| 582 | $itemsIds[ $item->getId() ] = $itemsId; |
| 583 | } |
| 584 | |
| 585 | // Insert or update discussiontools_item_pages and discussiontools_item_revisions rows. |
| 586 | // This IS in a transaction. We don't really want rows for different items on the same |
| 587 | // page to point to different revisions. |
| 588 | $dbw->doAtomicSection( $method, /** @throws TimestampException */ function ( IDatabase $dbw ) use ( |
| 589 | $method, $rev, $threadItemSet, $itemsIds, $itemIdsIds, &$didInsert |
| 590 | ) { |
| 591 | // Map of item IDs (strings) to their discussiontools_item_revisions.itr_id field values (ints) |
| 592 | $itemRevisionsIds = []; |
| 593 | '@phan-var array<string,int> $itemRevisionsIds'; |
| 594 | |
| 595 | $revUpdateRows = []; |
| 596 | // Insert or update discussiontools_item_pages rows. |
| 597 | foreach ( $threadItemSet->getThreadItems() as $item ) { |
| 598 | // Update (or insert) the references to oldest/newest item revision. |
| 599 | // The page revision we're processing is usually the newest one, but it doesn't have to be |
| 600 | // (in case of backfilling using the maintenance script, or in case of revisions being |
| 601 | // imported), so we need all these funky queries to see if we need to update oldest/newest. |
| 602 | |
| 603 | $itemPagesRow = $dbw->newSelectQueryBuilder() |
| 604 | ->from( 'discussiontools_item_pages' ) |
| 605 | ->join( 'revision', 'revision_oldest', [ 'itp_oldest_revision_id = revision_oldest.rev_id' ] ) |
| 606 | ->join( 'revision', 'revision_newest', [ 'itp_newest_revision_id = revision_newest.rev_id' ] ) |
| 607 | ->field( 'itp_id' ) |
| 608 | ->field( 'itp_oldest_revision_id' ) |
| 609 | ->field( 'itp_newest_revision_id' ) |
| 610 | ->field( 'revision_oldest.rev_timestamp', 'oldest_rev_timestamp' ) |
| 611 | ->field( 'revision_newest.rev_timestamp', 'newest_rev_timestamp' ) |
| 612 | ->where( [ |
| 613 | 'itp_items_id' => $itemsIds[ $item->getId() ], |
| 614 | 'itp_page_id' => $rev->getPageId(), |
| 615 | ] ) |
| 616 | ->caller( $method ) |
| 617 | ->fetchRow(); |
| 618 | if ( $itemPagesRow === false ) { |
| 619 | $dbw->newInsertQueryBuilder() |
| 620 | ->table( 'discussiontools_item_pages' ) |
| 621 | ->row( [ |
| 622 | 'itp_items_id' => $itemsIds[ $item->getId() ], |
| 623 | 'itp_page_id' => $rev->getPageId(), |
| 624 | 'itp_oldest_revision_id' => $rev->getId(), |
| 625 | 'itp_newest_revision_id' => $rev->getId(), |
| 626 | ] ) |
| 627 | ->ignore() |
| 628 | ->caller( $method ) |
| 629 | ->execute(); |
| 630 | } else { |
| 631 | $oldestTime = ( new MWTimestamp( $itemPagesRow->oldest_rev_timestamp ) )->getTimestamp( TS_MW ); |
| 632 | $newestTime = ( new MWTimestamp( $itemPagesRow->newest_rev_timestamp ) )->getTimestamp( TS_MW ); |
| 633 | $currentTime = $rev->getTimestamp(); |
| 634 | |
| 635 | $oldestId = (int)$itemPagesRow->itp_oldest_revision_id; |
| 636 | $newestId = (int)$itemPagesRow->itp_newest_revision_id; |
| 637 | $currentId = $rev->getId(); |
| 638 | |
| 639 | $updatePageField = null; |
| 640 | if ( [ $oldestTime, $oldestId ] > [ $currentTime, $currentId ] ) { |
| 641 | $updatePageField = 'itp_oldest_revision_id'; |
| 642 | } elseif ( [ $newestTime, $newestId ] < [ $currentTime, $currentId ] ) { |
| 643 | $updatePageField = 'itp_newest_revision_id'; |
| 644 | } |
| 645 | if ( $updatePageField ) { |
| 646 | $dbw->newUpdateQueryBuilder() |
| 647 | ->table( 'discussiontools_item_pages' ) |
| 648 | ->set( [ $updatePageField => $rev->getId() ] ) |
| 649 | ->where( [ 'itp_id' => $itemPagesRow->itp_id ] ) |
| 650 | ->caller( $method ) |
| 651 | ->execute(); |
| 652 | if ( $oldestId !== $newestId ) { |
| 653 | // This causes most rows in discussiontools_item_revisions referring to the previously |
| 654 | // oldest/newest revision to be unused, so try re-using them. |
| 655 | $revUpdateRows[ $itemsIds[ $item->getId() ] ] = $itemPagesRow->$updatePageField; |
| 656 | } |
| 657 | } |
| 658 | } |
| 659 | } |
| 660 | |
| 661 | // Insert or update discussiontools_item_revisions rows, fill in itr_id field values. |
| 662 | foreach ( $threadItemSet->getThreadItems() as $item ) { |
| 663 | $transcl = $item->getTranscludedFrom(); |
| 664 | $newOrUpdateRevRow = |
| 665 | [ |
| 666 | 'itr_itemid_id' => $itemIdsIds[ $item->getId() ], |
| 667 | 'itr_revision_id' => $rev->getId(), |
| 668 | 'itr_items_id' => $itemsIds[ $item->getId() ], |
| 669 | 'itr_parent_id' => |
| 670 | // This assumes that parent items were processed first |
| 671 | $item->getParent() ? $itemRevisionsIds[ $item->getParent()->getId() ] : null, |
| 672 | 'itr_transcludedfrom' => |
| 673 | $transcl === false ? null : ( |
| 674 | $transcl === true ? 0 : |
| 675 | $this->pageStore->getPageByText( $transcl )->getId() |
| 676 | ), |
| 677 | 'itr_level' => $item->getLevel(), |
| 678 | ] + |
| 679 | ( $item instanceof HeadingItem ? [ |
| 680 | 'itr_headinglevel' => $item->isPlaceholderHeading() ? null : $item->getHeadingLevel(), |
| 681 | ] : [] ); |
| 682 | |
| 683 | $itemRevisionsConds = [ |
| 684 | 'itr_itemid_id' => $itemIdsIds[ $item->getId() ], |
| 685 | 'itr_items_id' => $itemsIds[ $item->getId() ], |
| 686 | 'itr_revision_id' => $rev->getId(), |
| 687 | ]; |
| 688 | $itemRevisionsId = $dbw->newSelectQueryBuilder() |
| 689 | ->from( 'discussiontools_item_revisions' ) |
| 690 | ->field( 'itr_id' ) |
| 691 | ->where( $itemRevisionsConds ) |
| 692 | ->caller( $method ) |
| 693 | ->fetchField(); |
| 694 | if ( $itemRevisionsId === false ) { |
| 695 | $itemRevisionsUpdateId = null; |
| 696 | if ( isset( $revUpdateRows[ $itemsIds[ $item->getId() ] ] ) ) { |
| 697 | $itemRevisionsUpdateId = $dbw->newSelectQueryBuilder() |
| 698 | ->from( 'discussiontools_item_revisions' ) |
| 699 | ->field( 'itr_id' ) |
| 700 | ->where( [ |
| 701 | 'itr_revision_id' => $revUpdateRows[ $itemsIds[ $item->getId() ] ], |
| 702 | // We only keep up to 2 discussiontools_item_revisions rows with the same |
| 703 | // (itr_itemid_id, itr_items_id) pair, for the oldest and newest revision known. |
| 704 | // Here we find any rows we don't want to keep and re-use them. |
| 705 | 'itr_itemid_id' => $itemIdsIds[ $item->getId() ], |
| 706 | 'itr_items_id' => $itemsIds[ $item->getId() ], |
| 707 | ] ) |
| 708 | ->caller( $method ) |
| 709 | ->fetchField(); |
| 710 | // The row to re-use may not be found if it has a different itr_itemid_id than the row |
| 711 | // we want to add. |
| 712 | } |
| 713 | if ( $itemRevisionsUpdateId ) { |
| 714 | $dbw->newUpdateQueryBuilder() |
| 715 | ->table( 'discussiontools_item_revisions' ) |
| 716 | ->set( $newOrUpdateRevRow ) |
| 717 | ->where( [ 'itr_id' => $itemRevisionsUpdateId ] ) |
| 718 | ->caller( $method ) |
| 719 | ->execute(); |
| 720 | $itemRevisionsId = $itemRevisionsUpdateId; |
| 721 | $didInsert = true; |
| 722 | } else { |
| 723 | $itemRevisionsId = $this->findOrInsertId( |
| 724 | static function ( IReadableDatabase $dbw ) use ( $itemRevisionsConds, $method ) { |
| 725 | return $dbw->newSelectQueryBuilder() |
| 726 | ->from( 'discussiontools_item_revisions' ) |
| 727 | ->field( 'itr_id' ) |
| 728 | ->where( $itemRevisionsConds ) |
| 729 | ->caller( $method ) |
| 730 | ->fetchField(); |
| 731 | }, |
| 732 | static function ( IDatabase $dbw ) use ( $newOrUpdateRevRow, $method ) { |
| 733 | $dbw->newInsertQueryBuilder() |
| 734 | ->table( 'discussiontools_item_revisions' ) |
| 735 | ->row( $newOrUpdateRevRow ) |
| 736 | // Fix rows with corrupted itr_items_id=0, |
| 737 | // which are causing conflicts (T339882, T343859#9185559) |
| 738 | ->onDuplicateKeyUpdate() |
| 739 | ->uniqueIndexFields( [ 'itr_itemid_id', 'itr_revision_id' ] ) |
| 740 | // Omit redundant updates to avoid warnings (T353432) |
| 741 | ->set( array_diff_key( |
| 742 | $newOrUpdateRevRow, |
| 743 | [ 'itr_itemid_id' => true, 'itr_revision_id' => true ] |
| 744 | ) ) |
| 745 | ->caller( $method ) |
| 746 | ->execute(); |
| 747 | return $dbw->affectedRows() ? $dbw->insertId() : null; |
| 748 | }, |
| 749 | $didInsert, |
| 750 | $rev |
| 751 | ); |
| 752 | } |
| 753 | } |
| 754 | |
| 755 | $itemRevisionsIds[ $item->getId() ] = $itemRevisionsId; |
| 756 | } |
| 757 | }, $dbw::ATOMIC_CANCELABLE ); |
| 758 | |
| 759 | return $didInsert; |
| 760 | } |
| 761 | } |