Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
90.60% |
424 / 468 |
|
46.15% |
6 / 13 |
CRAP | |
0.00% |
0 / 1 |
ThreadItemStore | |
90.60% |
424 / 468 |
|
46.15% |
6 / 13 |
76.31 | |
0.00% |
0 / 1 |
__construct | |
100.00% |
8 / 8 |
|
100.00% |
1 / 1 |
1 | |||
isDisabled | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
findNewestRevisionsByName | |
94.74% |
18 / 19 |
|
0.00% |
0 / 1 |
5.00 | |||
findNewestRevisionsById | |
95.65% |
22 / 23 |
|
0.00% |
0 / 1 |
5 | |||
findNewestRevisionsByHeading | |
91.04% |
61 / 67 |
|
0.00% |
0 / 1 |
6.03 | |||
findNewestRevisionsByQuery | |
100.00% |
17 / 17 |
|
100.00% |
1 / 1 |
5 | |||
fetchItemsResultSet | |
100.00% |
32 / 32 |
|
100.00% |
1 / 1 |
1 | |||
fetchRevisionAndPageForItems | |
100.00% |
12 / 12 |
|
100.00% |
1 / 1 |
4 | |||
getThreadItemFromRow | |
88.10% |
37 / 42 |
|
0.00% |
0 / 1 |
11.20 | |||
findThreadItemsInCurrentRevision | |
0.00% |
0 / 16 |
|
0.00% |
0 / 1 |
20 | |||
getIdsNamesBuilder | |
100.00% |
10 / 10 |
|
100.00% |
1 / 1 |
1 | |||
findOrInsertId | |
38.89% |
7 / 18 |
|
0.00% |
0 / 1 |
7.65 | |||
insertThreadItems | |
98.03% |
199 / 203 |
|
0.00% |
0 / 1 |
24 |
1 | <?php |
2 | |
3 | namespace MediaWiki\Extension\DiscussionTools; |
4 | |
5 | use Exception; |
6 | use Language; |
7 | use MediaWiki\Config\Config; |
8 | use MediaWiki\Config\ConfigFactory; |
9 | use MediaWiki\Extension\DiscussionTools\ThreadItem\CommentItem; |
10 | use MediaWiki\Extension\DiscussionTools\ThreadItem\DatabaseCommentItem; |
11 | use MediaWiki\Extension\DiscussionTools\ThreadItem\DatabaseHeadingItem; |
12 | use MediaWiki\Extension\DiscussionTools\ThreadItem\DatabaseThreadItem; |
13 | use MediaWiki\Extension\DiscussionTools\ThreadItem\HeadingItem; |
14 | use MediaWiki\Page\PageStore; |
15 | use MediaWiki\Revision\RevisionRecord; |
16 | use MediaWiki\Revision\RevisionStore; |
17 | use MediaWiki\Title\TitleFormatter; |
18 | use MediaWiki\Title\TitleValue; |
19 | use MediaWiki\User\ActorStore; |
20 | use MediaWiki\Utils\MWTimestamp; |
21 | use stdClass; |
22 | use Wikimedia\NormalizedException\NormalizedException; |
23 | use Wikimedia\Rdbms\DBError; |
24 | use Wikimedia\Rdbms\IDatabase; |
25 | use Wikimedia\Rdbms\IExpression; |
26 | use Wikimedia\Rdbms\ILBFactory; |
27 | use Wikimedia\Rdbms\ILoadBalancer; |
28 | use Wikimedia\Rdbms\IReadableDatabase; |
29 | use Wikimedia\Rdbms\IResultWrapper; |
30 | use Wikimedia\Rdbms\LikeValue; |
31 | use Wikimedia\Rdbms\ReadOnlyMode; |
32 | use Wikimedia\Rdbms\SelectQueryBuilder; |
33 | use Wikimedia\Timestamp\TimestampException; |
34 | |
35 | /** |
36 | * Stores and fetches ThreadItemSets from the database. |
37 | */ |
38 | class ThreadItemStore { |
39 | |
40 | private Config $config; |
41 | private ILBFactory $dbProvider; |
42 | private ReadOnlyMode $readOnlyMode; |
43 | private PageStore $pageStore; |
44 | private RevisionStore $revStore; |
45 | private TitleFormatter $titleFormatter; |
46 | private ActorStore $actorStore; |
47 | private Language $language; |
48 | |
49 | public function __construct( |
50 | ConfigFactory $configFactory, |
51 | ILBFactory $dbProvider, |
52 | ReadOnlyMode $readOnlyMode, |
53 | PageStore $pageStore, |
54 | RevisionStore $revStore, |
55 | TitleFormatter $titleFormatter, |
56 | ActorStore $actorStore, |
57 | Language $language |
58 | ) { |
59 | $this->config = $configFactory->makeConfig( 'discussiontools' ); |
60 | $this->dbProvider = $dbProvider; |
61 | $this->readOnlyMode = $readOnlyMode; |
62 | $this->pageStore = $pageStore; |
63 | $this->revStore = $revStore; |
64 | $this->titleFormatter = $titleFormatter; |
65 | $this->actorStore = $actorStore; |
66 | $this->language = $language; |
67 | } |
68 | |
69 | /** |
70 | * Returns true if the tables necessary for this feature haven't been created yet, |
71 | * to allow failing softly in that case. |
72 | */ |
73 | public function isDisabled(): bool { |
74 | return !$this->config->get( 'DiscussionToolsEnablePermalinksBackend' ); |
75 | } |
76 | |
77 | /** |
78 | * Find the thread items with the given name in the newest revision of every page in which they |
79 | * have appeared. |
80 | * |
81 | * @param string|string[] $itemName |
82 | * @param int|null $limit |
83 | * @return DatabaseThreadItem[] |
84 | */ |
85 | public function findNewestRevisionsByName( $itemName, ?int $limit = 50 ): array { |
86 | if ( $this->isDisabled() ) { |
87 | return []; |
88 | } |
89 | |
90 | $dbr = $this->dbProvider->getReplicaDatabase(); |
91 | $queryBuilder = $this->getIdsNamesBuilder() |
92 | ->caller( __METHOD__ ) |
93 | ->where( [ |
94 | 'it_itemname' => $itemName, |
95 | // Disallow querying for headings of sections that contain no comments. |
96 | // They all share the same name, so this would return a huge useless list on most wikis. |
97 | // (But we still store them, as we might need this data elsewhere.) |
98 | $dbr->expr( 'it_itemname', '!=', 'h-' ), |
99 | ] ); |
100 | |
101 | if ( $limit !== null ) { |
102 | $queryBuilder->limit( $limit ); |
103 | } |
104 | |
105 | $result = $this->fetchItemsResultSet( $queryBuilder ); |
106 | $revs = $this->fetchRevisionAndPageForItems( $result ); |
107 | |
108 | $threadItems = []; |
109 | foreach ( $result as $row ) { |
110 | $threadItem = $this->getThreadItemFromRow( $row, null, $revs ); |
111 | if ( $threadItem ) { |
112 | $threadItems[] = $threadItem; |
113 | } |
114 | } |
115 | return $threadItems; |
116 | } |
117 | |
118 | /** |
119 | * Find the thread items with the given ID in the newest revision of every page in which they have |
120 | * appeared. |
121 | * |
122 | * @param string|string[] $itemId |
123 | * @param int|null $limit |
124 | * @return DatabaseThreadItem[] |
125 | */ |
126 | public function findNewestRevisionsById( $itemId, ?int $limit = 50 ): array { |
127 | if ( $this->isDisabled() ) { |
128 | return []; |
129 | } |
130 | |
131 | $queryBuilder = $this->getIdsNamesBuilder() |
132 | ->caller( __METHOD__ ); |
133 | |
134 | // First find the name associated with the ID; then find by name. Otherwise we wouldn't find the |
135 | // latest revision in case comment ID changed, e.g. the comment was moved elsewhere on the page. |
136 | $itemNameQueryBuilder = $this->getIdsNamesBuilder() |
137 | ->where( [ 'itid_itemid' => $itemId ] ) |
138 | ->field( 'it_itemname' ); |
139 | // I think there may be more than 1 only in case of headings? |
140 | // For comments, any ID corresponds to just 1 name. |
141 | // Not sure how bad it is to not have limit( 1 ) here? |
142 | // It might scan a bunch of rows... |
143 | // ->limit( 1 ); |
144 | |
145 | $dbr = $this->dbProvider->getReplicaDatabase(); |
146 | $queryBuilder |
147 | ->where( [ |
148 | 'it_itemname IN (' . $itemNameQueryBuilder->getSQL() . ')', |
149 | $dbr->expr( 'it_itemname', '!=', 'h-' ), |
150 | ] ); |
151 | |
152 | if ( $limit !== null ) { |
153 | $queryBuilder->limit( $limit ); |
154 | } |
155 | |
156 | $result = $this->fetchItemsResultSet( $queryBuilder ); |
157 | $revs = $this->fetchRevisionAndPageForItems( $result ); |
158 | |
159 | $threadItems = []; |
160 | foreach ( $result as $row ) { |
161 | $threadItem = $this->getThreadItemFromRow( $row, null, $revs ); |
162 | if ( $threadItem ) { |
163 | $threadItems[] = $threadItem; |
164 | } |
165 | } |
166 | return $threadItems; |
167 | } |
168 | |
169 | /** |
170 | * Find heading items matching some text which: |
171 | * |
172 | * 1. appeared at some point in the history of the targetpage, or if this returns no results: |
173 | * 2. currently appear on a subpage of the target page, or if this returns no results: |
174 | * 3. currently appears on any page, but only if it is a unique match |
175 | * |
176 | * @param string|string[] $heading Heading text to match |
177 | * @param int $articleId Article ID of the target page |
178 | * @param TitleValue $title Title of the target page |
179 | * @param int|null $limit |
180 | * @return DatabaseThreadItem[] |
181 | */ |
182 | public function findNewestRevisionsByHeading( |
183 | $heading, int $articleId, TitleValue $title, ?int $limit = 50 |
184 | ): array { |
185 | if ( $this->isDisabled() ) { |
186 | return []; |
187 | } |
188 | |
189 | // Mirrors CommentParser::truncateForId |
190 | $heading = trim( $this->language->truncateForDatabase( $heading, 80, '' ), '_' ); |
191 | |
192 | $dbw = $this->dbProvider->getPrimaryDatabase(); |
193 | |
194 | // 1. Try to find items which have appeared on the page at some point |
195 | // in its history. |
196 | $itemIdInPageHistoryQueryBuilder = $this->getIdsNamesBuilder() |
197 | ->caller( __METHOD__ . ' case 1' ) |
198 | ->join( 'revision', null, [ 'rev_id = itr_revision_id' ] ) |
199 | ->where( $dbw->expr( 'itid_itemid', IExpression::LIKE, new LikeValue( |
200 | 'h-' . $heading . '-', |
201 | $dbw->anyString() |
202 | ) ) ) |
203 | // Has once appered on the specified page ID |
204 | ->where( [ 'rev_page' => $articleId ] ) |
205 | ->field( 'itid_itemid' ); |
206 | |
207 | $threadItems = $this->findNewestRevisionsByQuery( __METHOD__ . ' case 1', |
208 | $itemIdInPageHistoryQueryBuilder, $limit ); |
209 | |
210 | if ( count( $threadItems ) ) { |
211 | return $threadItems; |
212 | } |
213 | |
214 | // 2. If the thread item's database hasn't been back-filled with historical revisions |
215 | // then approach (1) may not work, instead look for matching headings the currently |
216 | // appear on subpages, which matches the archiving convention on most wikis. |
217 | $itemIdInSubPageQueryBuilder = $this->getIdsNamesBuilder() |
218 | ->caller( __METHOD__ . ' case 2' ) |
219 | ->join( 'page', null, [ 'page_id = itp_page_id' ] ) |
220 | ->where( $dbw->expr( 'itid_itemid', IExpression::LIKE, new LikeValue( |
221 | 'h-' . $heading . '-', |
222 | $dbw->anyString() |
223 | ) ) ) |
224 | ->where( $dbw->expr( 'page_title', IExpression::LIKE, new LikeValue( |
225 | $title->getText() . '/', |
226 | $dbw->anyString() |
227 | ) ) ) |
228 | ->where( [ 'page_namespace' => $title->getNamespace() ] ) |
229 | ->field( 'itid_itemid' ); |
230 | |
231 | $threadItems = $this->findNewestRevisionsByQuery( __METHOD__ . ' case 2', |
232 | $itemIdInSubPageQueryBuilder, $limit ); |
233 | |
234 | if ( count( $threadItems ) ) { |
235 | return $threadItems; |
236 | } |
237 | |
238 | // 3. Look for an "exact" match of the heading on any page. Because we are searching |
239 | // so broadly, only return if there is exactly one match to the heading name. |
240 | $itemIdInAnyPageQueryBuilder = $this->getIdsNamesBuilder() |
241 | ->caller( __METHOD__ . ' case 3' ) |
242 | ->join( 'page', null, [ 'page_id = itp_page_id', 'page_latest = itr_revision_id' ] ) |
243 | ->where( $dbw->expr( 'itid_itemid', IExpression::LIKE, new LikeValue( |
244 | 'h-' . $heading . '-', |
245 | $dbw->anyString() |
246 | ) ) ) |
247 | ->field( 'itid_itemid' ) |
248 | // We only care if there is one, or more than one result |
249 | ->limit( 2 ); |
250 | |
251 | // Check there is only one result in the sub-query |
252 | $itemIds = $itemIdInAnyPageQueryBuilder->fetchFieldValues(); |
253 | if ( count( $itemIds ) === 1 ) { |
254 | return $this->findNewestRevisionsByQuery( __METHOD__ . ' case 3', $itemIds[ 0 ] ); |
255 | } |
256 | |
257 | // 4. If there are no matches, check if the "talk" page has ever had any discussions |
258 | // on it (comments, not just headings). If not then throw an error instead of |
259 | // returning an empty list. This prevents the "topic could not be found" message |
260 | // from showing in the frontend. (T374598) |
261 | $anyItemsInPageHistoryQueryBuilder = $this->getIdsNamesBuilder() |
262 | ->caller( __METHOD__ . ' case 4' ) |
263 | ->join( 'revision', null, [ 'rev_id = itr_revision_id' ] ) |
264 | // Only comments, as non-talk headings are recorded |
265 | ->where( $dbw->expr( 'itid_itemid', IExpression::LIKE, new LikeValue( |
266 | 'c-', |
267 | $dbw->anyString() |
268 | ) ) ) |
269 | // On the specified page ID |
270 | ->where( [ 'rev_page' => $articleId ] ) |
271 | ->field( 'itid_itemid' ) |
272 | ->limit( 1 ); |
273 | |
274 | // Check there is only one result in the sub-query |
275 | $itemIds = $anyItemsInPageHistoryQueryBuilder->fetchFieldValues(); |
276 | if ( count( $itemIds ) === 0 ) { |
277 | throw new PageNeverHadThreadsException( |
278 | "Page {page} has never contained any discussions", |
279 | [ 'page' => $articleId ] |
280 | ); |
281 | } |
282 | |
283 | return []; |
284 | } |
285 | |
286 | /** |
287 | * @param string $fname |
288 | * @param SelectQueryBuilder|string $itemIdOrQueryBuilder Sub-query which returns item ID's, or an itemID |
289 | * @param int|null $limit |
290 | * @return DatabaseThreadItem[] |
291 | */ |
292 | private function findNewestRevisionsByQuery( $fname, $itemIdOrQueryBuilder, ?int $limit = 50 ): array { |
293 | $queryBuilder = $this->getIdsNamesBuilder()->caller( $fname . ' / ' . __METHOD__ ); |
294 | if ( $itemIdOrQueryBuilder instanceof SelectQueryBuilder ) { |
295 | $queryBuilder |
296 | ->where( [ |
297 | 'itid_itemid IN (' . $itemIdOrQueryBuilder->getSQL() . ')' |
298 | ] ); |
299 | } else { |
300 | $queryBuilder->where( [ 'itid_itemid' => $itemIdOrQueryBuilder ] ); |
301 | } |
302 | |
303 | if ( $limit !== null ) { |
304 | $queryBuilder->limit( $limit ); |
305 | } |
306 | |
307 | $result = $this->fetchItemsResultSet( $queryBuilder ); |
308 | $revs = $this->fetchRevisionAndPageForItems( $result ); |
309 | |
310 | $threadItems = []; |
311 | foreach ( $result as $row ) { |
312 | $threadItem = $this->getThreadItemFromRow( $row, null, $revs ); |
313 | if ( $threadItem ) { |
314 | $threadItems[] = $threadItem; |
315 | } |
316 | } |
317 | return $threadItems; |
318 | } |
319 | |
320 | private function fetchItemsResultSet( SelectQueryBuilder $queryBuilder ): IResultWrapper { |
321 | $queryBuilder |
322 | ->fields( [ |
323 | 'itr_id', |
324 | 'it_itemname', |
325 | 'it_timestamp', |
326 | 'it_actor', |
327 | 'itid_itemid', |
328 | 'itr_parent_id', |
329 | 'itr_transcludedfrom', |
330 | 'itr_level', |
331 | 'itr_headinglevel', |
332 | 'itr_revision_id', |
333 | ] ) |
334 | // PageStore fields for the transcluded-from page |
335 | ->leftJoin( 'page', null, [ 'page_id = itr_transcludedfrom' ] ) |
336 | ->fields( $this->pageStore->getSelectFields() ) |
337 | // ActorStore fields for the author |
338 | ->leftJoin( 'actor', null, [ 'actor_id = it_actor' ] ) |
339 | ->fields( [ 'actor_id', 'actor_name', 'actor_user' ] ) |
340 | // Parent item ID (the string, not just the primary key) |
341 | ->leftJoin( |
342 | $this->getIdsNamesBuilder() |
343 | ->caller( __METHOD__ ) |
344 | ->fields( [ |
345 | 'itr_parent__itr_id' => 'itr_id', |
346 | 'itr_parent__itid_itemid' => 'itid_itemid', |
347 | ] ), |
348 | null, |
349 | [ 'itr_parent_id = itr_parent__itr_id' ] |
350 | ) |
351 | ->field( 'itr_parent__itid_itemid' ); |
352 | |
353 | return $queryBuilder->fetchResultSet(); |
354 | } |
355 | |
356 | /** |
357 | * @param IResultWrapper $result |
358 | * @return stdClass[] |
359 | */ |
360 | private function fetchRevisionAndPageForItems( IResultWrapper $result ): array { |
361 | // This could theoretically be done in the same query as fetchItemsResultSet(), |
362 | // but the resulting query would be two screens long |
363 | // and we'd have to alias a lot of fields to avoid conflicts. |
364 | $revs = []; |
365 | foreach ( $result as $row ) { |
366 | $revs[ $row->itr_revision_id ] = null; |
367 | } |
368 | $revQueryBuilder = $this->dbProvider->getReplicaDatabase()->newSelectQueryBuilder() |
369 | ->caller( __METHOD__ ) |
370 | ->queryInfo( $this->revStore->getQueryInfo( [ 'page' ] ) ) |
371 | ->fields( $this->pageStore->getSelectFields() ) |
372 | ->where( $revs ? [ 'rev_id' => array_keys( $revs ) ] : '0=1' ); |
373 | $revResult = $revQueryBuilder->fetchResultSet(); |
374 | foreach ( $revResult as $row ) { |
375 | $revs[ $row->rev_id ] = $row; |
376 | } |
377 | return $revs; |
378 | } |
379 | |
380 | private function getThreadItemFromRow( |
381 | stdClass $row, ?DatabaseThreadItemSet $set, array $revs |
382 | ): ?DatabaseThreadItem { |
383 | if ( $revs[ $row->itr_revision_id ] === null ) { |
384 | // We didn't find the 'revision' table row at all, this revision is deleted. |
385 | // (The page may or may not have other non-deleted revisions.) |
386 | // Pretend the thread item doesn't exist to avoid leaking data to users who shouldn't see it. |
387 | // TODO Allow privileged users to see it (we'd need to query from 'archive') |
388 | return null; |
389 | } |
390 | |
391 | $revRow = $revs[$row->itr_revision_id]; |
392 | $page = $this->pageStore->newPageRecordFromRow( $revRow ); |
393 | $rev = $this->revStore->newRevisionFromRow( $revRow ); |
394 | if ( $rev->isDeleted( RevisionRecord::DELETED_TEXT ) ) { |
395 | // This revision is revision-deleted. |
396 | // TODO Allow privileged users to see it |
397 | return null; |
398 | } |
399 | |
400 | if ( $set && $row->itr_parent__itid_itemid ) { |
401 | $parent = $set->findCommentById( $row->itr_parent__itid_itemid ); |
402 | } else { |
403 | $parent = null; |
404 | } |
405 | |
406 | $transcludedFrom = $row->itr_transcludedfrom === null ? false : ( |
407 | $row->itr_transcludedfrom === '0' ? true : |
408 | $this->titleFormatter->getPrefixedText( |
409 | $this->pageStore->newPageRecordFromRow( $row ) |
410 | ) |
411 | ); |
412 | |
413 | if ( $row->it_timestamp !== null && $row->it_actor !== null ) { |
414 | $author = $this->actorStore->newActorFromRow( $row )->getName(); |
415 | |
416 | $item = new DatabaseCommentItem( |
417 | $page, |
418 | $rev, |
419 | $row->it_itemname, |
420 | $row->itid_itemid, |
421 | $parent, |
422 | $transcludedFrom, |
423 | (int)$row->itr_level, |
424 | $row->it_timestamp, |
425 | $author |
426 | ); |
427 | } else { |
428 | $item = new DatabaseHeadingItem( |
429 | $page, |
430 | $rev, |
431 | $row->it_itemname, |
432 | $row->itid_itemid, |
433 | $parent, |
434 | $transcludedFrom, |
435 | (int)$row->itr_level, |
436 | $row->itr_headinglevel === null ? null : (int)$row->itr_headinglevel |
437 | ); |
438 | } |
439 | |
440 | if ( $parent ) { |
441 | $parent->addReply( $item ); |
442 | } |
443 | return $item; |
444 | } |
445 | |
446 | /** |
447 | * Find the thread item set for the given revision, assuming that it is the current revision of |
448 | * its page. |
449 | */ |
450 | public function findThreadItemsInCurrentRevision( int $revId ): DatabaseThreadItemSet { |
451 | if ( $this->isDisabled() ) { |
452 | return new DatabaseThreadItemSet(); |
453 | } |
454 | |
455 | $queryBuilder = $this->getIdsNamesBuilder() |
456 | ->caller( __METHOD__ ) |
457 | ->where( [ 'itr_revision_id' => $revId ] ) |
458 | // We must process parents before their children in the loop later |
459 | ->orderBy( 'itr_id', SelectQueryBuilder::SORT_ASC ); |
460 | |
461 | $result = $this->fetchItemsResultSet( $queryBuilder ); |
462 | $revs = $this->fetchRevisionAndPageForItems( $result ); |
463 | |
464 | $set = new DatabaseThreadItemSet(); |
465 | foreach ( $result as $row ) { |
466 | $threadItem = $this->getThreadItemFromRow( $row, $set, $revs ); |
467 | if ( $threadItem ) { |
468 | $set->addThreadItem( $threadItem ); |
469 | $set->updateIdAndNameMaps( $threadItem ); |
470 | } |
471 | } |
472 | return $set; |
473 | } |
474 | |
475 | private function getIdsNamesBuilder(): SelectQueryBuilder { |
476 | $dbr = $this->dbProvider->getReplicaDatabase(); |
477 | |
478 | $queryBuilder = $dbr->newSelectQueryBuilder() |
479 | ->from( 'discussiontools_items' ) |
480 | ->join( 'discussiontools_item_pages', null, [ 'itp_items_id = it_id' ] ) |
481 | ->join( 'discussiontools_item_revisions', null, [ |
482 | 'itr_items_id = it_id', |
483 | // Only the latest revision of the items with each name |
484 | 'itr_revision_id = itp_newest_revision_id', |
485 | ] ) |
486 | ->join( 'discussiontools_item_ids', null, [ 'itid_id = itr_itemid_id' ] ); |
487 | |
488 | return $queryBuilder; |
489 | } |
490 | |
491 | /** |
492 | * @param callable $find Function that does a SELECT and returns primary key field |
493 | * @param callable $insert Function that does an INSERT IGNORE and returns last insert ID |
494 | * @param bool &$didInsert Set to true if the insert succeeds |
495 | * @param RevisionRecord $rev For error logging |
496 | * @return int Return value of whichever function succeeded |
497 | */ |
498 | private function findOrInsertId( |
499 | callable $find, callable $insert, bool &$didInsert, RevisionRecord $rev |
500 | ) { |
501 | $dbw = $this->dbProvider->getPrimaryDatabase(); |
502 | |
503 | $id = $find( $dbw ); |
504 | if ( !$id ) { |
505 | $id = $insert( $dbw ); |
506 | if ( $id ) { |
507 | $didInsert = true; |
508 | } else { |
509 | // Maybe it's there, but we can't see it due to REPEATABLE_READ? |
510 | // Try again in another connection. (T339882, T322701) |
511 | $dbwAnother = $this->dbProvider->getMainLB() |
512 | ->getConnection( DB_PRIMARY, [], false, ILoadBalancer::CONN_TRX_AUTOCOMMIT ); |
513 | $id = $find( $dbwAnother ); |
514 | if ( !$id ) { |
515 | throw new NormalizedException( |
516 | "Database can't find our row and won't let us insert it on page {page} revision {revision}", |
517 | [ |
518 | 'page' => $rev->getPageId(), |
519 | 'revision' => $rev->getId(), |
520 | ] |
521 | ); |
522 | } |
523 | } |
524 | } |
525 | return $id; |
526 | } |
527 | |
528 | /** |
529 | * Store the thread item set. |
530 | * |
531 | * @param RevisionRecord $rev |
532 | * @param ContentThreadItemSet $threadItemSet |
533 | * @throws TimestampException |
534 | * @throws DBError |
535 | * @throws Exception |
536 | * @return bool |
537 | */ |
538 | public function insertThreadItems( RevisionRecord $rev, ContentThreadItemSet $threadItemSet ): bool { |
539 | if ( $this->readOnlyMode->isReadOnly() ) { |
540 | return false; |
541 | } |
542 | |
543 | $dbw = $this->dbProvider->getPrimaryDatabase(); |
544 | $didInsert = false; |
545 | $method = __METHOD__; |
546 | |
547 | // Map of item IDs (strings) to their discussiontools_item_ids.itid_id field values (ints) |
548 | $itemIdsIds = []; |
549 | '@phan-var array<string,int> $itemIdsIds'; |
550 | // Map of item IDs (strings) to their discussiontools_items.it_id field values (ints) |
551 | $itemsIds = []; |
552 | '@phan-var array<string,int> $itemsIds'; |
553 | |
554 | // Insert or find discussiontools_item_ids rows, fill in itid_id field values. |
555 | // (This is not in a transaction. Orphaned rows in this table are harmlessly ignored, |
556 | // and long transactions caused performance issues on Wikimedia wikis: T315353#8218914.) |
557 | foreach ( $threadItemSet->getThreadItems() as $item ) { |
558 | $itemIdsId = $this->findOrInsertId( |
559 | static function ( IReadableDatabase $dbw ) use ( $item, $method ) { |
560 | $ids = [ $item->getId() ]; |
561 | if ( $item->getLegacyId() !== null ) { |
562 | // Avoid duplicates if the item exists under the legacy ID |
563 | // (i.e. with trailing underscores in the title part). |
564 | // The actual fixing of IDs is done by a maintenance script |
565 | // FixTrailingWhitespaceIds, as archived talk pages are unlikely |
566 | // to be edited again in the future. |
567 | // Once FixTrailingWhitespaceIds has run on and enough time has |
568 | // passed, we can remove all legacy ID code (again). |
569 | $ids[] = $item->getLegacyId(); |
570 | } |
571 | return $dbw->newSelectQueryBuilder() |
572 | ->from( 'discussiontools_item_ids' ) |
573 | ->field( 'itid_id' ) |
574 | ->where( [ 'itid_itemid' => $ids ] ) |
575 | ->caller( $method ) |
576 | ->fetchField(); |
577 | }, |
578 | static function ( IDatabase $dbw ) use ( $item, $method ) { |
579 | $dbw->newInsertQueryBuilder() |
580 | ->table( 'discussiontools_item_ids' ) |
581 | ->row( [ 'itid_itemid' => $item->getId() ] ) |
582 | ->ignore() |
583 | ->caller( $method ) |
584 | ->execute(); |
585 | return $dbw->affectedRows() ? $dbw->insertId() : null; |
586 | }, |
587 | $didInsert, |
588 | $rev |
589 | ); |
590 | $itemIdsIds[ $item->getId() ] = $itemIdsId; |
591 | } |
592 | |
593 | // Insert or find discussiontools_items rows, fill in it_id field values. |
594 | // (This is not in a transaction. Orphaned rows in this table are harmlessly ignored, |
595 | // and long transactions caused performance issues on Wikimedia wikis: T315353#8218914.) |
596 | foreach ( $threadItemSet->getThreadItems() as $item ) { |
597 | $itemsId = $this->findOrInsertId( |
598 | static function ( IReadableDatabase $dbw ) use ( $item, $method ) { |
599 | return $dbw->newSelectQueryBuilder() |
600 | ->from( 'discussiontools_items' ) |
601 | ->field( 'it_id' ) |
602 | ->where( [ 'it_itemname' => $item->getName() ] ) |
603 | ->caller( $method ) |
604 | ->fetchField(); |
605 | }, |
606 | function ( IDatabase $dbw ) use ( $item, $method ) { |
607 | $dbw->newInsertQueryBuilder() |
608 | ->table( 'discussiontools_items' ) |
609 | ->row( |
610 | [ |
611 | 'it_itemname' => $item->getName(), |
612 | ] + |
613 | ( $item instanceof CommentItem ? [ |
614 | 'it_timestamp' => |
615 | $dbw->timestamp( $item->getTimestampString() ), |
616 | 'it_actor' => |
617 | $this->actorStore->findActorIdByName( $item->getAuthor(), $dbw ), |
618 | ] : [] ) |
619 | ) |
620 | ->ignore() |
621 | ->caller( $method ) |
622 | ->execute(); |
623 | return $dbw->affectedRows() ? $dbw->insertId() : null; |
624 | }, |
625 | $didInsert, |
626 | $rev |
627 | ); |
628 | $itemsIds[ $item->getId() ] = $itemsId; |
629 | } |
630 | |
631 | // Insert or update discussiontools_item_pages and discussiontools_item_revisions rows. |
632 | // This IS in a transaction. We don't really want rows for different items on the same |
633 | // page to point to different revisions. |
634 | $dbw->doAtomicSection( $method, /** @throws TimestampException */ function ( IDatabase $dbw ) use ( |
635 | $method, $rev, $threadItemSet, $itemsIds, $itemIdsIds, &$didInsert |
636 | ) { |
637 | // Map of item IDs (strings) to their discussiontools_item_revisions.itr_id field values (ints) |
638 | $itemRevisionsIds = []; |
639 | '@phan-var array<string,int> $itemRevisionsIds'; |
640 | |
641 | $revUpdateRows = []; |
642 | // Insert or update discussiontools_item_pages rows. |
643 | foreach ( $threadItemSet->getThreadItems() as $item ) { |
644 | // Update (or insert) the references to oldest/newest item revision. |
645 | // The page revision we're processing is usually the newest one, but it doesn't have to be |
646 | // (in case of backfilling using the maintenance script, or in case of revisions being |
647 | // imported), so we need all these funky queries to see if we need to update oldest/newest. |
648 | |
649 | $itemPagesRow = $dbw->newSelectQueryBuilder() |
650 | ->from( 'discussiontools_item_pages' ) |
651 | ->join( 'revision', 'revision_oldest', [ 'itp_oldest_revision_id = revision_oldest.rev_id' ] ) |
652 | ->join( 'revision', 'revision_newest', [ 'itp_newest_revision_id = revision_newest.rev_id' ] ) |
653 | ->field( 'itp_id' ) |
654 | ->field( 'itp_oldest_revision_id' ) |
655 | ->field( 'itp_newest_revision_id' ) |
656 | ->field( 'revision_oldest.rev_timestamp', 'oldest_rev_timestamp' ) |
657 | ->field( 'revision_newest.rev_timestamp', 'newest_rev_timestamp' ) |
658 | ->where( [ |
659 | 'itp_items_id' => $itemsIds[ $item->getId() ], |
660 | 'itp_page_id' => $rev->getPageId(), |
661 | ] ) |
662 | ->caller( $method ) |
663 | ->fetchRow(); |
664 | if ( $itemPagesRow === false ) { |
665 | $dbw->newInsertQueryBuilder() |
666 | ->table( 'discussiontools_item_pages' ) |
667 | ->row( [ |
668 | 'itp_items_id' => $itemsIds[ $item->getId() ], |
669 | 'itp_page_id' => $rev->getPageId(), |
670 | 'itp_oldest_revision_id' => $rev->getId(), |
671 | 'itp_newest_revision_id' => $rev->getId(), |
672 | ] ) |
673 | ->ignore() |
674 | ->caller( $method ) |
675 | ->execute(); |
676 | } else { |
677 | $oldestTime = ( new MWTimestamp( $itemPagesRow->oldest_rev_timestamp ) )->getTimestamp( TS_MW ); |
678 | $newestTime = ( new MWTimestamp( $itemPagesRow->newest_rev_timestamp ) )->getTimestamp( TS_MW ); |
679 | $currentTime = $rev->getTimestamp(); |
680 | |
681 | $oldestId = (int)$itemPagesRow->itp_oldest_revision_id; |
682 | $newestId = (int)$itemPagesRow->itp_newest_revision_id; |
683 | $currentId = $rev->getId(); |
684 | |
685 | $updatePageField = null; |
686 | if ( [ $oldestTime, $oldestId ] > [ $currentTime, $currentId ] ) { |
687 | $updatePageField = 'itp_oldest_revision_id'; |
688 | } elseif ( [ $newestTime, $newestId ] < [ $currentTime, $currentId ] ) { |
689 | $updatePageField = 'itp_newest_revision_id'; |
690 | } |
691 | if ( $updatePageField ) { |
692 | $dbw->newUpdateQueryBuilder() |
693 | ->table( 'discussiontools_item_pages' ) |
694 | ->set( [ $updatePageField => $rev->getId() ] ) |
695 | ->where( [ 'itp_id' => $itemPagesRow->itp_id ] ) |
696 | ->caller( $method ) |
697 | ->execute(); |
698 | if ( $oldestId !== $newestId ) { |
699 | // This causes most rows in discussiontools_item_revisions referring to the previously |
700 | // oldest/newest revision to be unused, so try re-using them. |
701 | $revUpdateRows[ $itemsIds[ $item->getId() ] ] = $itemPagesRow->$updatePageField; |
702 | } |
703 | } |
704 | } |
705 | } |
706 | |
707 | // Insert or update discussiontools_item_revisions rows, fill in itr_id field values. |
708 | foreach ( $threadItemSet->getThreadItems() as $item ) { |
709 | $transcl = $item->getTranscludedFrom(); |
710 | $newOrUpdateRevRow = |
711 | [ |
712 | 'itr_itemid_id' => $itemIdsIds[ $item->getId() ], |
713 | 'itr_revision_id' => $rev->getId(), |
714 | 'itr_items_id' => $itemsIds[ $item->getId() ], |
715 | 'itr_parent_id' => |
716 | // This assumes that parent items were processed first |
717 | $item->getParent() ? $itemRevisionsIds[ $item->getParent()->getId() ] : null, |
718 | 'itr_transcludedfrom' => |
719 | $transcl === false ? null : ( |
720 | $transcl === true ? 0 : |
721 | $this->pageStore->getPageByText( $transcl )->getId() |
722 | ), |
723 | 'itr_level' => $item->getLevel(), |
724 | ] + |
725 | ( $item instanceof HeadingItem ? [ |
726 | 'itr_headinglevel' => $item->isPlaceholderHeading() ? null : $item->getHeadingLevel(), |
727 | ] : [] ); |
728 | |
729 | $itemRevisionsConds = [ |
730 | 'itr_itemid_id' => $itemIdsIds[ $item->getId() ], |
731 | 'itr_items_id' => $itemsIds[ $item->getId() ], |
732 | 'itr_revision_id' => $rev->getId(), |
733 | ]; |
734 | $itemRevisionsId = $dbw->newSelectQueryBuilder() |
735 | ->from( 'discussiontools_item_revisions' ) |
736 | ->field( 'itr_id' ) |
737 | ->where( $itemRevisionsConds ) |
738 | ->caller( $method ) |
739 | ->fetchField(); |
740 | if ( $itemRevisionsId === false ) { |
741 | $itemRevisionsUpdateId = null; |
742 | if ( isset( $revUpdateRows[ $itemsIds[ $item->getId() ] ] ) ) { |
743 | $itemRevisionsUpdateId = $dbw->newSelectQueryBuilder() |
744 | ->from( 'discussiontools_item_revisions' ) |
745 | ->field( 'itr_id' ) |
746 | ->where( [ |
747 | 'itr_revision_id' => $revUpdateRows[ $itemsIds[ $item->getId() ] ], |
748 | // We only keep up to 2 discussiontools_item_revisions rows with the same |
749 | // (itr_itemid_id, itr_items_id) pair, for the oldest and newest revision known. |
750 | // Here we find any rows we don't want to keep and re-use them. |
751 | 'itr_itemid_id' => $itemIdsIds[ $item->getId() ], |
752 | 'itr_items_id' => $itemsIds[ $item->getId() ], |
753 | ] ) |
754 | ->caller( $method ) |
755 | ->fetchField(); |
756 | // The row to re-use may not be found if it has a different itr_itemid_id than the row |
757 | // we want to add. |
758 | } |
759 | if ( $itemRevisionsUpdateId ) { |
760 | $dbw->newUpdateQueryBuilder() |
761 | ->table( 'discussiontools_item_revisions' ) |
762 | ->set( $newOrUpdateRevRow ) |
763 | ->where( [ 'itr_id' => $itemRevisionsUpdateId ] ) |
764 | ->caller( $method ) |
765 | ->execute(); |
766 | $itemRevisionsId = $itemRevisionsUpdateId; |
767 | $didInsert = true; |
768 | } else { |
769 | $itemRevisionsId = $this->findOrInsertId( |
770 | static function ( IReadableDatabase $dbw ) use ( $itemRevisionsConds, $method ) { |
771 | return $dbw->newSelectQueryBuilder() |
772 | ->from( 'discussiontools_item_revisions' ) |
773 | ->field( 'itr_id' ) |
774 | ->where( $itemRevisionsConds ) |
775 | ->caller( $method ) |
776 | ->fetchField(); |
777 | }, |
778 | static function ( IDatabase $dbw ) use ( $newOrUpdateRevRow, $method ) { |
779 | $dbw->newInsertQueryBuilder() |
780 | ->table( 'discussiontools_item_revisions' ) |
781 | ->row( $newOrUpdateRevRow ) |
782 | // Fix rows with corrupted itr_items_id=0, |
783 | // which are causing conflicts (T339882, T343859#9185559) |
784 | ->onDuplicateKeyUpdate() |
785 | ->uniqueIndexFields( [ 'itr_itemid_id', 'itr_revision_id' ] ) |
786 | // Omit redundant updates to avoid warnings (T353432) |
787 | ->set( array_diff_key( |
788 | $newOrUpdateRevRow, |
789 | [ 'itr_itemid_id' => true, 'itr_revision_id' => true ] |
790 | ) ) |
791 | ->caller( $method ) |
792 | ->execute(); |
793 | return $dbw->affectedRows() ? $dbw->insertId() : null; |
794 | }, |
795 | $didInsert, |
796 | $rev |
797 | ); |
798 | } |
799 | } |
800 | |
801 | $itemRevisionsIds[ $item->getId() ] = $itemRevisionsId; |
802 | } |
803 | }, $dbw::ATOMIC_CANCELABLE ); |
804 | |
805 | return $didInsert; |
806 | } |
807 | } |