Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
85.71% |
72 / 84 |
|
82.14% |
23 / 28 |
CRAP | |
0.00% |
0 / 1 |
LinksTable | |
85.71% |
72 / 84 |
|
82.14% |
23 / 28 |
52.17 | |
0.00% |
0 / 1 |
injectBaseDependencies | |
100.00% |
5 / 5 |
|
100.00% |
1 / 1 |
1 | |||
setTransactionTicket | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
setRevision | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
setMoveDetails | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
setParserOutput | n/a |
0 / 0 |
n/a |
0 / 0 |
0 | |||||
getTableName | n/a |
0 / 0 |
n/a |
0 / 0 |
0 | |||||
getFromField | n/a |
0 / 0 |
n/a |
0 / 0 |
0 | |||||
getExistingFields | n/a |
0 / 0 |
n/a |
0 / 0 |
0 | |||||
getNewLinkIDs | n/a |
0 / 0 |
n/a |
0 / 0 |
0 | |||||
getExistingLinkIDs | n/a |
0 / 0 |
n/a |
0 / 0 |
0 | |||||
isExisting | n/a |
0 / 0 |
n/a |
0 / 0 |
0 | |||||
isInNewSet | n/a |
0 / 0 |
n/a |
0 / 0 |
0 | |||||
insertLink | n/a |
0 / 0 |
n/a |
0 / 0 |
0 | |||||
deleteLink | n/a |
0 / 0 |
n/a |
0 / 0 |
0 | |||||
needForcedLinkRefresh | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
getDB | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
getLBFactory | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
getSourcePageId | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
getSourcePage | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
isMove | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
isCrossNamespaceMove | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
2 | |||
getMovedPage | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
getBatchSize | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
getTransactionTicket | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
getRevision | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
getFromConds | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
fetchExistingRows | |
100.00% |
6 / 6 |
|
100.00% |
1 / 1 |
1 | |||
update | |
100.00% |
12 / 12 |
|
100.00% |
1 / 1 |
7 | |||
insertRow | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
deleteRow | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
beforeLock | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
startUpdate | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
finishUpdate | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
doWrites | |
90.91% |
20 / 22 |
|
0.00% |
0 / 1 |
5.02 | |||
setStrictTestMode | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
getInsertOptions | |
100.00% |
3 / 3 |
|
100.00% |
1 / 1 |
2 | |||
getLinkIDs | |
36.36% |
4 / 11 |
|
0.00% |
0 / 1 |
19.63 | |||
linksTargetNormalizationStage | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 |
1 | <?php |
2 | |
3 | namespace MediaWiki\Deferred\LinksUpdate; |
4 | |
5 | use InvalidArgumentException; |
6 | use MediaWiki\Linker\LinkTargetLookup; |
7 | use MediaWiki\Page\PageIdentity; |
8 | use MediaWiki\Page\PageReference; |
9 | use MediaWiki\Parser\ParserOutput; |
10 | use MediaWiki\Revision\RevisionRecord; |
11 | use Wikimedia\Rdbms\IDatabase; |
12 | use Wikimedia\Rdbms\IResultWrapper; |
13 | use Wikimedia\Rdbms\LBFactory; |
14 | |
15 | /** |
16 | * The base class for classes which update a single link table. |
17 | * |
18 | * A LinksTable object is a container for new and existing link sets outbound |
19 | * from a single page, and an abstraction of the associated DB schema. The |
20 | * object stores state related to an update of the outbound links of a page. |
21 | * |
22 | * Explanation of link ID concept |
23 | * ------------------------------ |
24 | * |
25 | * Link IDs identify a link in the new or old state, or in the change arrays. |
26 | * They are opaque to the base class and are type-hinted here as mixed. |
27 | * |
28 | * Conventionally, the link ID is string|string[] and contains the link target |
29 | * fields. |
30 | * |
31 | * The link ID should contain enough information so that the base class can |
32 | * tell whether an existing link is in the new set, or vice versa, for the |
33 | * purposes of incremental updates. If a change to a field would cause a DB |
34 | * update, the field should be in the link ID. |
35 | * |
36 | * For example, a change to cl_timestamp does not trigger an update, so |
37 | * cl_timestamp is not in the link ID. |
38 | * |
39 | * @stable to extend |
40 | * @since 1.38 |
41 | */ |
42 | abstract class LinksTable { |
43 | /** Link type: Inserted (added) links */ |
44 | public const INSERTED = 1; |
45 | |
46 | /** Link type: Deleted (removed) links */ |
47 | public const DELETED = 2; |
48 | |
49 | /** Link type: Changed (inserted or removed) links */ |
50 | public const CHANGED = 3; |
51 | |
52 | /** Link type: existing/old links */ |
53 | public const OLD = 4; |
54 | |
55 | /** Link type: new links (from the ParserOutput) */ |
56 | public const NEW = 5; |
57 | |
58 | /** |
59 | * Rows to delete. An array of associative arrays, each associative array |
60 | * being the conditions for a delete query. Common conditions should be |
61 | * leftmost in the associative array so that they can be factored out. |
62 | * |
63 | * @var array |
64 | */ |
65 | protected $rowsToDelete = []; |
66 | |
67 | /** |
68 | * Rows to insert. An array of associative arrays, each associative array |
69 | * mapping field names to values. |
70 | * |
71 | * @var array |
72 | */ |
73 | protected $rowsToInsert = []; |
74 | |
75 | /** @var array Link IDs for inserted links */ |
76 | protected $insertedLinks = []; |
77 | |
78 | /** @var array Link IDs for deleted links */ |
79 | protected $deletedLinks = []; |
80 | |
81 | /** @var LBFactory */ |
82 | private $lbFactory; |
83 | |
84 | /** @var LinkTargetLookup */ |
85 | protected $linkTargetLookup; |
86 | |
87 | /** @var IDatabase */ |
88 | private $db; |
89 | |
90 | /** @var PageIdentity */ |
91 | private $sourcePage; |
92 | |
93 | /** @var PageReference|null */ |
94 | private $movedPage; |
95 | |
96 | /** @var int */ |
97 | private $batchSize; |
98 | |
99 | /** @var mixed */ |
100 | private $ticket; |
101 | |
102 | /** @var RevisionRecord */ |
103 | private $revision; |
104 | |
105 | /** @var bool */ |
106 | protected $strictTestMode; |
107 | |
108 | /** |
109 | * This is called by the factory to inject dependencies for the base class. |
110 | * This is used instead of the constructor so that changes can be made to |
111 | * the injected parameters without breaking the subclass constructors. |
112 | * |
113 | * @param LBFactory $lbFactory |
114 | * @param LinkTargetLookup $linkTargetLookup |
115 | * @param PageIdentity $sourcePage |
116 | * @param int $batchSize |
117 | */ |
118 | final public function injectBaseDependencies( |
119 | LBFactory $lbFactory, |
120 | LinkTargetLookup $linkTargetLookup, |
121 | PageIdentity $sourcePage, |
122 | $batchSize |
123 | ) { |
124 | $this->lbFactory = $lbFactory; |
125 | $this->db = $this->lbFactory->getPrimaryDatabase(); |
126 | $this->sourcePage = $sourcePage; |
127 | $this->batchSize = $batchSize; |
128 | $this->linkTargetLookup = $linkTargetLookup; |
129 | } |
130 | |
131 | /** |
132 | * Set the empty transaction ticket |
133 | * |
134 | * @param mixed $ticket |
135 | */ |
136 | public function setTransactionTicket( $ticket ) { |
137 | $this->ticket = $ticket; |
138 | } |
139 | |
140 | /** |
141 | * Set the revision associated with the edit. |
142 | * |
143 | * @param RevisionRecord $revision |
144 | */ |
145 | public function setRevision( RevisionRecord $revision ) { |
146 | $this->revision = $revision; |
147 | } |
148 | |
149 | /** |
150 | * Notify the object that the operation is a page move, and set the |
151 | * original title. |
152 | * |
153 | * @param PageReference $movedPage |
154 | */ |
155 | public function setMoveDetails( PageReference $movedPage ) { |
156 | $this->movedPage = $movedPage; |
157 | } |
158 | |
159 | /** |
160 | * Subclasses should implement this to extract the data they need from the |
161 | * ParserOutput. |
162 | * |
163 | * To support a future refactor of LinksDeletionUpdate, if this method is |
164 | * not called, the subclass should assume that the new state is empty. |
165 | * |
166 | * @param ParserOutput $parserOutput |
167 | */ |
168 | abstract public function setParserOutput( ParserOutput $parserOutput ); |
169 | |
170 | /** |
171 | * Get the table name. |
172 | * |
173 | * @return string |
174 | */ |
175 | abstract protected function getTableName(); |
176 | |
177 | /** |
178 | * Get the name of the field which links to page_id. |
179 | * |
180 | * @return string |
181 | */ |
182 | abstract protected function getFromField(); |
183 | |
184 | /** |
185 | * Get the fields to be used in fetchExistingRows(). Note that |
186 | * fetchExistingRows() is just a helper for subclasses. The value returned |
187 | * here is effectively private to the subclass. |
188 | * |
189 | * @return array |
190 | */ |
191 | abstract protected function getExistingFields(); |
192 | |
193 | /** |
194 | * Get an array (or iterator) of link IDs for the new state. |
195 | * |
196 | * See the LinksTable doc comment for an explanation of link IDs. |
197 | * |
198 | * @return iterable<mixed> |
199 | */ |
200 | abstract protected function getNewLinkIDs(); |
201 | |
202 | /** |
203 | * Get an array (or iterator) of link IDs for the existing state. The |
204 | * subclass should load the data from the database. There is |
205 | * fetchExistingRows() to make this easier but the subclass is responsible |
206 | * for caching. |
207 | * |
208 | * See the LinksTable doc comment for an explanation of link IDs. |
209 | * |
210 | * @return iterable<mixed> |
211 | */ |
212 | abstract protected function getExistingLinkIDs(); |
213 | |
214 | /** |
215 | * Determine whether a link (from the new set) is in the existing set. |
216 | * |
217 | * @param mixed $linkId |
218 | * @return bool |
219 | */ |
220 | abstract protected function isExisting( $linkId ); |
221 | |
222 | /** |
223 | * Determine whether a link (from the existing set) is in the new set. |
224 | * |
225 | * @param mixed $linkId |
226 | * @return bool |
227 | */ |
228 | abstract protected function isInNewSet( $linkId ); |
229 | |
230 | /** |
231 | * Insert a link identified by ID. The subclass is expected to queue the |
232 | * insertion by calling insertRow(). |
233 | * |
234 | * @param mixed $linkId |
235 | */ |
236 | abstract protected function insertLink( $linkId ); |
237 | |
238 | /** |
239 | * Delete a link identified by ID. The subclass is expected to queue the |
240 | * deletion by calling deleteRow(). |
241 | * |
242 | * @param mixed $linkId |
243 | */ |
244 | abstract protected function deleteLink( $linkId ); |
245 | |
246 | /** |
247 | * Subclasses can override this to return true in order to force |
248 | * reinsertion of all the links due to some property of the link |
249 | * changing for reasons not represented by the link ID. |
250 | * |
251 | * @return bool |
252 | */ |
253 | protected function needForcedLinkRefresh() { |
254 | return false; |
255 | } |
256 | |
257 | /** |
258 | * @stable to override |
259 | * @return IDatabase |
260 | */ |
261 | protected function getDB(): IDatabase { |
262 | return $this->db; |
263 | } |
264 | |
265 | /** |
266 | * @return LBFactory |
267 | */ |
268 | protected function getLBFactory(): LBFactory { |
269 | return $this->lbFactory; |
270 | } |
271 | |
272 | /** |
273 | * Get the page_id of the source page |
274 | * |
275 | * @return int |
276 | */ |
277 | protected function getSourcePageId(): int { |
278 | return $this->sourcePage->getId(); |
279 | } |
280 | |
281 | /** |
282 | * Get the source page, i.e. the page which is being updated and is the |
283 | * source of links. |
284 | * |
285 | * @return PageIdentity |
286 | */ |
287 | protected function getSourcePage(): PageIdentity { |
288 | return $this->sourcePage; |
289 | } |
290 | |
291 | /** |
292 | * Determine whether the page was moved |
293 | * |
294 | * @return bool |
295 | */ |
296 | protected function isMove() { |
297 | return $this->movedPage !== null; |
298 | } |
299 | |
300 | /** |
301 | * Determine whether the page was moved to a different namespace. |
302 | * |
303 | * @return bool |
304 | */ |
305 | protected function isCrossNamespaceMove() { |
306 | return $this->movedPage !== null |
307 | && $this->sourcePage->getNamespace() !== $this->movedPage->getNamespace(); |
308 | } |
309 | |
310 | /** |
311 | * Assuming the page was moved, get the original page title before the move. |
312 | * This will throw an exception if the page wasn't moved. |
313 | * |
314 | * @return PageReference |
315 | */ |
316 | protected function getMovedPage(): PageReference { |
317 | return $this->movedPage; |
318 | } |
319 | |
320 | /** |
321 | * Get the maximum number of rows to update in a batch. |
322 | * |
323 | * @return int |
324 | */ |
325 | protected function getBatchSize(): int { |
326 | return $this->batchSize; |
327 | } |
328 | |
329 | /** |
330 | * Get the empty transaction ticket, or null if there is none. |
331 | * |
332 | * @return mixed |
333 | */ |
334 | protected function getTransactionTicket() { |
335 | return $this->ticket; |
336 | } |
337 | |
338 | /** |
339 | * Get the RevisionRecord of the new revision, if the LinksUpdate caller |
340 | * injected one. |
341 | * |
342 | * @return RevisionRecord|null |
343 | */ |
344 | protected function getRevision(): ?RevisionRecord { |
345 | return $this->revision; |
346 | } |
347 | |
348 | /** |
349 | * Get field=>value associative array for the from field(s) |
350 | * |
351 | * @stable to override |
352 | * @return array |
353 | */ |
354 | protected function getFromConds() { |
355 | return [ $this->getFromField() => $this->getSourcePageId() ]; |
356 | } |
357 | |
358 | /** |
359 | * Do a select query to fetch the existing rows. This is a helper for |
360 | * subclasses. |
361 | * |
362 | * @return IResultWrapper |
363 | */ |
364 | protected function fetchExistingRows(): IResultWrapper { |
365 | return $this->getDB()->newSelectQueryBuilder() |
366 | ->select( $this->getExistingFields() ) |
367 | ->from( $this->getTableName() ) |
368 | ->where( $this->getFromConds() ) |
369 | ->caller( __METHOD__ ) |
370 | ->fetchResultSet(); |
371 | } |
372 | |
373 | /** |
374 | * Execute an edit/delete update |
375 | */ |
376 | final public function update() { |
377 | $this->startUpdate(); |
378 | $force = $this->needForcedLinkRefresh(); |
379 | foreach ( $this->getNewLinkIDs() as $link ) { |
380 | if ( $force || !$this->isExisting( $link ) ) { |
381 | $this->insertLink( $link ); |
382 | $this->insertedLinks[] = $link; |
383 | } |
384 | } |
385 | |
386 | foreach ( $this->getExistingLinkIDs() as $link ) { |
387 | if ( $force || !$this->isInNewSet( $link ) ) { |
388 | $this->deleteLink( $link ); |
389 | $this->deletedLinks[] = $link; |
390 | } |
391 | } |
392 | $this->doWrites(); |
393 | $this->finishUpdate(); |
394 | } |
395 | |
396 | /** |
397 | * Queue a row for insertion. Subclasses are expected to call this from |
398 | * insertLink(). The "from" field should not be included in the row. |
399 | * |
400 | * @param array $row Associative array mapping fields to values. |
401 | */ |
402 | protected function insertRow( $row ) { |
403 | $row += $this->getFromConds(); |
404 | $this->rowsToInsert[] = $row; |
405 | } |
406 | |
407 | /** |
408 | * Queue a deletion operation. Subclasses are expected to call this from |
409 | * deleteLink(). The "from" field does not need to be included in the |
410 | * conditions. |
411 | * |
412 | * Most often, the conditions match a single row, but this is not required. |
413 | * |
414 | * @param array $conds Associative array mapping fields to values, |
415 | * specifying the conditions for a delete query. |
416 | */ |
417 | protected function deleteRow( $conds ) { |
418 | // Put the "from" field leftmost, so it can be factored out |
419 | $conds = $this->getFromConds() + $conds; |
420 | $this->rowsToDelete[] = $conds; |
421 | } |
422 | |
423 | /** |
424 | * Subclasses can override this to do any necessary setup before the lock |
425 | * is acquired. |
426 | * |
427 | * @stable to override |
428 | */ |
429 | public function beforeLock() { |
430 | } |
431 | |
432 | /** |
433 | * Subclasses can override this to do any necessary setup before individual |
434 | * write operations begin. |
435 | * |
436 | * @stable to override |
437 | */ |
438 | protected function startUpdate() { |
439 | } |
440 | |
441 | /** |
442 | * Subclasses can override this to do any updates associated with their |
443 | * link data, for example dispatching HTML update jobs. |
444 | * |
445 | * @stable to override |
446 | */ |
447 | protected function finishUpdate() { |
448 | } |
449 | |
450 | /** |
451 | * Do the common DB operations |
452 | */ |
453 | protected function doWrites() { |
454 | $db = $this->getDB(); |
455 | $table = $this->getTableName(); |
456 | $domainId = $db->getDomainID(); |
457 | $batchSize = $this->getBatchSize(); |
458 | $ticket = $this->getTransactionTicket(); |
459 | |
460 | $deleteBatches = array_chunk( $this->rowsToDelete, $batchSize ); |
461 | foreach ( $deleteBatches as $chunk ) { |
462 | $db->newDeleteQueryBuilder() |
463 | ->deleteFrom( $table ) |
464 | ->where( $db->factorConds( $chunk ) ) |
465 | ->caller( __METHOD__ )->execute(); |
466 | if ( count( $deleteBatches ) > 1 ) { |
467 | $this->lbFactory->commitAndWaitForReplication( __METHOD__, $ticket ); |
468 | } |
469 | } |
470 | |
471 | $insertBatches = array_chunk( $this->rowsToInsert, $batchSize ); |
472 | foreach ( $insertBatches as $insertBatch ) { |
473 | $db->newInsertQueryBuilder() |
474 | ->options( $this->getInsertOptions() ) |
475 | ->insertInto( $table ) |
476 | ->rows( $insertBatch ) |
477 | ->caller( __METHOD__ )->execute(); |
478 | if ( count( $insertBatches ) > 1 ) { |
479 | $this->lbFactory->commitAndWaitForReplication( __METHOD__, $ticket ); |
480 | } |
481 | } |
482 | } |
483 | |
484 | /** |
485 | * Omit conflict resolution options from the insert query so that testing |
486 | * can confirm that the incremental update logic was correct. |
487 | * |
488 | * @param bool $mode |
489 | */ |
490 | public function setStrictTestMode( $mode = true ) { |
491 | $this->strictTestMode = $mode; |
492 | } |
493 | |
494 | /** |
495 | * Get the options for the insert queries |
496 | * |
497 | * @return array |
498 | */ |
499 | protected function getInsertOptions() { |
500 | if ( $this->strictTestMode ) { |
501 | return []; |
502 | } else { |
503 | return [ 'IGNORE' ]; |
504 | } |
505 | } |
506 | |
507 | /** |
508 | * Get an array or iterator of link IDs of a given type. Some subclasses |
509 | * use this to provide typed data to callers. This is not public because |
510 | * link IDs are a private concept. |
511 | * |
512 | * @param int $setType One of the class constants: self::INSERTED, self::DELETED, |
513 | * self::CHANGED, self::OLD or self::NEW. |
514 | * @return iterable<mixed> |
515 | */ |
516 | protected function getLinkIDs( $setType ) { |
517 | switch ( $setType ) { |
518 | case self::INSERTED: |
519 | return $this->insertedLinks; |
520 | |
521 | case self::DELETED: |
522 | return $this->deletedLinks; |
523 | |
524 | case self::CHANGED: |
525 | return array_merge( $this->insertedLinks, $this->deletedLinks ); |
526 | |
527 | case self::OLD: |
528 | return $this->getExistingLinkIDs(); |
529 | |
530 | case self::NEW: |
531 | return $this->getNewLinkIDs(); |
532 | |
533 | default: |
534 | throw new InvalidArgumentException( __METHOD__ . ": Unknown link type" ); |
535 | } |
536 | } |
537 | |
538 | /** |
539 | * Normalization stage of the links table (see T222224) |
540 | * @return int |
541 | */ |
542 | protected function linksTargetNormalizationStage(): int { |
543 | return SCHEMA_COMPAT_OLD; |
544 | } |
545 | } |