Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
85.71% |
72 / 84 |
|
82.14% |
23 / 28 |
CRAP | |
0.00% |
0 / 1 |
LinksTable | |
85.71% |
72 / 84 |
|
82.14% |
23 / 28 |
52.17 | |
0.00% |
0 / 1 |
injectBaseDependencies | |
100.00% |
5 / 5 |
|
100.00% |
1 / 1 |
1 | |||
setTransactionTicket | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
setRevision | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
setMoveDetails | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
setParserOutput | n/a |
0 / 0 |
n/a |
0 / 0 |
0 | |||||
getTableName | n/a |
0 / 0 |
n/a |
0 / 0 |
0 | |||||
getFromField | n/a |
0 / 0 |
n/a |
0 / 0 |
0 | |||||
getExistingFields | n/a |
0 / 0 |
n/a |
0 / 0 |
0 | |||||
getNewLinkIDs | n/a |
0 / 0 |
n/a |
0 / 0 |
0 | |||||
getExistingLinkIDs | n/a |
0 / 0 |
n/a |
0 / 0 |
0 | |||||
isExisting | n/a |
0 / 0 |
n/a |
0 / 0 |
0 | |||||
isInNewSet | n/a |
0 / 0 |
n/a |
0 / 0 |
0 | |||||
insertLink | n/a |
0 / 0 |
n/a |
0 / 0 |
0 | |||||
deleteLink | n/a |
0 / 0 |
n/a |
0 / 0 |
0 | |||||
needForcedLinkRefresh | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
getDB | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
getLBFactory | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
getSourcePageId | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
getSourcePage | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
isMove | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
isCrossNamespaceMove | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
2 | |||
getMovedPage | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
getBatchSize | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
getTransactionTicket | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
getRevision | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
getFromConds | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
fetchExistingRows | |
100.00% |
6 / 6 |
|
100.00% |
1 / 1 |
1 | |||
update | |
100.00% |
12 / 12 |
|
100.00% |
1 / 1 |
7 | |||
insertRow | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
deleteRow | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
beforeLock | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
startUpdate | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
finishUpdate | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
doWrites | |
90.91% |
20 / 22 |
|
0.00% |
0 / 1 |
5.02 | |||
setStrictTestMode | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
getInsertOptions | |
100.00% |
3 / 3 |
|
100.00% |
1 / 1 |
2 | |||
getLinkIDs | |
36.36% |
4 / 11 |
|
0.00% |
0 / 1 |
19.63 | |||
linksTargetNormalizationStage | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 |
1 | <?php |
2 | |
3 | namespace MediaWiki\Deferred\LinksUpdate; |
4 | |
5 | use MediaWiki\Linker\LinkTargetLookup; |
6 | use MediaWiki\Page\PageIdentity; |
7 | use MediaWiki\Page\PageReference; |
8 | use MediaWiki\Parser\ParserOutput; |
9 | use MediaWiki\Revision\RevisionRecord; |
10 | use Wikimedia\Rdbms\IDatabase; |
11 | use Wikimedia\Rdbms\IResultWrapper; |
12 | use Wikimedia\Rdbms\LBFactory; |
13 | |
14 | /** |
15 | * The base class for classes which update a single link table. |
16 | * |
17 | * A LinksTable object is a container for new and existing link sets outbound |
18 | * from a single page, and an abstraction of the associated DB schema. The |
19 | * object stores state related to an update of the outbound links of a page. |
20 | * |
21 | * Explanation of link ID concept |
22 | * ------------------------------ |
23 | * |
24 | * Link IDs identify a link in the new or old state, or in the change arrays. |
25 | * They are opaque to the base class and are type-hinted here as mixed. |
26 | * |
27 | * Conventionally, the link ID is string|string[] and contains the link target |
28 | * fields. |
29 | * |
30 | * The link ID should contain enough information so that the base class can |
31 | * tell whether an existing link is in the new set, or vice versa, for the |
32 | * purposes of incremental updates. If a change to a field would cause a DB |
33 | * update, the field should be in the link ID. |
34 | * |
35 | * For example, a change to cl_timestamp does not trigger an update, so |
36 | * cl_timestamp is not in the link ID. |
37 | * |
38 | * @stable to extend |
39 | * @since 1.38 |
40 | */ |
41 | abstract class LinksTable { |
42 | /** Link type: Inserted (added) links */ |
43 | public const INSERTED = 1; |
44 | |
45 | /** Link type: Deleted (removed) links */ |
46 | public const DELETED = 2; |
47 | |
48 | /** Link type: Changed (inserted or removed) links */ |
49 | public const CHANGED = 3; |
50 | |
51 | /** Link type: existing/old links */ |
52 | public const OLD = 4; |
53 | |
54 | /** Link type: new links (from the ParserOutput) */ |
55 | public const NEW = 5; |
56 | |
57 | /** |
58 | * Rows to delete. An array of associative arrays, each associative array |
59 | * being the conditions for a delete query. Common conditions should be |
60 | * leftmost in the associative array so that they can be factored out. |
61 | * |
62 | * @var array |
63 | */ |
64 | protected $rowsToDelete = []; |
65 | |
66 | /** |
67 | * Rows to insert. An array of associative arrays, each associative array |
68 | * mapping field names to values. |
69 | * |
70 | * @var array |
71 | */ |
72 | protected $rowsToInsert = []; |
73 | |
74 | /** @var array Link IDs for inserted links */ |
75 | protected $insertedLinks = []; |
76 | |
77 | /** @var array Link IDs for deleted links */ |
78 | protected $deletedLinks = []; |
79 | |
80 | /** @var LBFactory */ |
81 | private $lbFactory; |
82 | |
83 | /** @var LinkTargetLookup */ |
84 | protected $linkTargetLookup; |
85 | |
86 | /** @var IDatabase */ |
87 | private $db; |
88 | |
89 | /** @var PageIdentity */ |
90 | private $sourcePage; |
91 | |
92 | /** @var PageReference|null */ |
93 | private $movedPage; |
94 | |
95 | /** @var int */ |
96 | private $batchSize; |
97 | |
98 | /** @var mixed */ |
99 | private $ticket; |
100 | |
101 | /** @var RevisionRecord */ |
102 | private $revision; |
103 | |
104 | /** @var bool */ |
105 | protected $strictTestMode; |
106 | |
107 | /** |
108 | * This is called by the factory to inject dependencies for the base class. |
109 | * This is used instead of the constructor so that changes can be made to |
110 | * the injected parameters without breaking the subclass constructors. |
111 | * |
112 | * @param LBFactory $lbFactory |
113 | * @param LinkTargetLookup $linkTargetLookup |
114 | * @param PageIdentity $sourcePage |
115 | * @param int $batchSize |
116 | */ |
117 | final public function injectBaseDependencies( |
118 | LBFactory $lbFactory, |
119 | LinkTargetLookup $linkTargetLookup, |
120 | PageIdentity $sourcePage, |
121 | $batchSize |
122 | ) { |
123 | $this->lbFactory = $lbFactory; |
124 | $this->db = $this->lbFactory->getPrimaryDatabase(); |
125 | $this->sourcePage = $sourcePage; |
126 | $this->batchSize = $batchSize; |
127 | $this->linkTargetLookup = $linkTargetLookup; |
128 | } |
129 | |
130 | /** |
131 | * Set the empty transaction ticket |
132 | * |
133 | * @param mixed $ticket |
134 | */ |
135 | public function setTransactionTicket( $ticket ) { |
136 | $this->ticket = $ticket; |
137 | } |
138 | |
139 | /** |
140 | * Set the revision associated with the edit. |
141 | * |
142 | * @param RevisionRecord $revision |
143 | */ |
144 | public function setRevision( RevisionRecord $revision ) { |
145 | $this->revision = $revision; |
146 | } |
147 | |
148 | /** |
149 | * Notify the object that the operation is a page move, and set the |
150 | * original title. |
151 | * |
152 | * @param PageReference $movedPage |
153 | */ |
154 | public function setMoveDetails( PageReference $movedPage ) { |
155 | $this->movedPage = $movedPage; |
156 | } |
157 | |
158 | /** |
159 | * Subclasses should implement this to extract the data they need from the |
160 | * ParserOutput. |
161 | * |
162 | * To support a future refactor of LinksDeletionUpdate, if this method is |
163 | * not called, the subclass should assume that the new state is empty. |
164 | * |
165 | * @param ParserOutput $parserOutput |
166 | */ |
167 | abstract public function setParserOutput( ParserOutput $parserOutput ); |
168 | |
169 | /** |
170 | * Get the table name. |
171 | * |
172 | * @return string |
173 | */ |
174 | abstract protected function getTableName(); |
175 | |
176 | /** |
177 | * Get the name of the field which links to page_id. |
178 | * |
179 | * @return string |
180 | */ |
181 | abstract protected function getFromField(); |
182 | |
183 | /** |
184 | * Get the fields to be used in fetchExistingRows(). Note that |
185 | * fetchExistingRows() is just a helper for subclasses. The value returned |
186 | * here is effectively private to the subclass. |
187 | * |
188 | * @return array |
189 | */ |
190 | abstract protected function getExistingFields(); |
191 | |
192 | /** |
193 | * Get an array (or iterator) of link IDs for the new state. |
194 | * |
195 | * See the LinksTable doc comment for an explanation of link IDs. |
196 | * |
197 | * @return iterable<mixed> |
198 | */ |
199 | abstract protected function getNewLinkIDs(); |
200 | |
201 | /** |
202 | * Get an array (or iterator) of link IDs for the existing state. The |
203 | * subclass should load the data from the database. There is |
204 | * fetchExistingRows() to make this easier but the subclass is responsible |
205 | * for caching. |
206 | * |
207 | * See the LinksTable doc comment for an explanation of link IDs. |
208 | * |
209 | * @return iterable<mixed> |
210 | */ |
211 | abstract protected function getExistingLinkIDs(); |
212 | |
213 | /** |
214 | * Determine whether a link (from the new set) is in the existing set. |
215 | * |
216 | * @param mixed $linkId |
217 | * @return bool |
218 | */ |
219 | abstract protected function isExisting( $linkId ); |
220 | |
221 | /** |
222 | * Determine whether a link (from the existing set) is in the new set. |
223 | * |
224 | * @param mixed $linkId |
225 | * @return bool |
226 | */ |
227 | abstract protected function isInNewSet( $linkId ); |
228 | |
229 | /** |
230 | * Insert a link identified by ID. The subclass is expected to queue the |
231 | * insertion by calling insertRow(). |
232 | * |
233 | * @param mixed $linkId |
234 | */ |
235 | abstract protected function insertLink( $linkId ); |
236 | |
237 | /** |
238 | * Delete a link identified by ID. The subclass is expected to queue the |
239 | * deletion by calling deleteRow(). |
240 | * |
241 | * @param mixed $linkId |
242 | */ |
243 | abstract protected function deleteLink( $linkId ); |
244 | |
245 | /** |
246 | * Subclasses can override this to return true in order to force |
247 | * reinsertion of all the links due to some property of the link |
248 | * changing for reasons not represented by the link ID. |
249 | * |
250 | * @return bool |
251 | */ |
252 | protected function needForcedLinkRefresh() { |
253 | return false; |
254 | } |
255 | |
256 | /** |
257 | * @stable to override |
258 | * @return IDatabase |
259 | */ |
260 | protected function getDB(): IDatabase { |
261 | return $this->db; |
262 | } |
263 | |
264 | /** |
265 | * @return LBFactory |
266 | */ |
267 | protected function getLBFactory(): LBFactory { |
268 | return $this->lbFactory; |
269 | } |
270 | |
271 | /** |
272 | * Get the page_id of the source page |
273 | * |
274 | * @return int |
275 | */ |
276 | protected function getSourcePageId(): int { |
277 | return $this->sourcePage->getId(); |
278 | } |
279 | |
280 | /** |
281 | * Get the source page, i.e. the page which is being updated and is the |
282 | * source of links. |
283 | * |
284 | * @return PageIdentity |
285 | */ |
286 | protected function getSourcePage(): PageIdentity { |
287 | return $this->sourcePage; |
288 | } |
289 | |
290 | /** |
291 | * Determine whether the page was moved |
292 | * |
293 | * @return bool |
294 | */ |
295 | protected function isMove() { |
296 | return $this->movedPage !== null; |
297 | } |
298 | |
299 | /** |
300 | * Determine whether the page was moved to a different namespace. |
301 | * |
302 | * @return bool |
303 | */ |
304 | protected function isCrossNamespaceMove() { |
305 | return $this->movedPage !== null |
306 | && $this->sourcePage->getNamespace() !== $this->movedPage->getNamespace(); |
307 | } |
308 | |
309 | /** |
310 | * Assuming the page was moved, get the original page title before the move. |
311 | * This will throw an exception if the page wasn't moved. |
312 | * |
313 | * @return PageReference |
314 | */ |
315 | protected function getMovedPage(): PageReference { |
316 | return $this->movedPage; |
317 | } |
318 | |
319 | /** |
320 | * Get the maximum number of rows to update in a batch. |
321 | * |
322 | * @return int |
323 | */ |
324 | protected function getBatchSize(): int { |
325 | return $this->batchSize; |
326 | } |
327 | |
328 | /** |
329 | * Get the empty transaction ticket, or null if there is none. |
330 | * |
331 | * @return mixed |
332 | */ |
333 | protected function getTransactionTicket() { |
334 | return $this->ticket; |
335 | } |
336 | |
337 | /** |
338 | * Get the RevisionRecord of the new revision, if the LinksUpdate caller |
339 | * injected one. |
340 | * |
341 | * @return RevisionRecord|null |
342 | */ |
343 | protected function getRevision(): ?RevisionRecord { |
344 | return $this->revision; |
345 | } |
346 | |
347 | /** |
348 | * Get field=>value associative array for the from field(s) |
349 | * |
350 | * @stable to override |
351 | * @return array |
352 | */ |
353 | protected function getFromConds() { |
354 | return [ $this->getFromField() => $this->getSourcePageId() ]; |
355 | } |
356 | |
357 | /** |
358 | * Do a select query to fetch the existing rows. This is a helper for |
359 | * subclasses. |
360 | * |
361 | * @return IResultWrapper |
362 | */ |
363 | protected function fetchExistingRows(): IResultWrapper { |
364 | return $this->getDB()->newSelectQueryBuilder() |
365 | ->select( $this->getExistingFields() ) |
366 | ->from( $this->getTableName() ) |
367 | ->where( $this->getFromConds() ) |
368 | ->caller( __METHOD__ ) |
369 | ->fetchResultSet(); |
370 | } |
371 | |
372 | /** |
373 | * Execute an edit/delete update |
374 | */ |
375 | final public function update() { |
376 | $this->startUpdate(); |
377 | $force = $this->needForcedLinkRefresh(); |
378 | foreach ( $this->getNewLinkIDs() as $link ) { |
379 | if ( $force || !$this->isExisting( $link ) ) { |
380 | $this->insertLink( $link ); |
381 | $this->insertedLinks[] = $link; |
382 | } |
383 | } |
384 | |
385 | foreach ( $this->getExistingLinkIDs() as $link ) { |
386 | if ( $force || !$this->isInNewSet( $link ) ) { |
387 | $this->deleteLink( $link ); |
388 | $this->deletedLinks[] = $link; |
389 | } |
390 | } |
391 | $this->doWrites(); |
392 | $this->finishUpdate(); |
393 | } |
394 | |
395 | /** |
396 | * Queue a row for insertion. Subclasses are expected to call this from |
397 | * insertLink(). The "from" field should not be included in the row. |
398 | * |
399 | * @param array $row Associative array mapping fields to values. |
400 | */ |
401 | protected function insertRow( $row ) { |
402 | $row += $this->getFromConds(); |
403 | $this->rowsToInsert[] = $row; |
404 | } |
405 | |
406 | /** |
407 | * Queue a deletion operation. Subclasses are expected to call this from |
408 | * deleteLink(). The "from" field does not need to be included in the |
409 | * conditions. |
410 | * |
411 | * Most often, the conditions match a single row, but this is not required. |
412 | * |
413 | * @param array $conds Associative array mapping fields to values, |
414 | * specifying the conditions for a delete query. |
415 | */ |
416 | protected function deleteRow( $conds ) { |
417 | // Put the "from" field leftmost, so it can be factored out |
418 | $conds = $this->getFromConds() + $conds; |
419 | $this->rowsToDelete[] = $conds; |
420 | } |
421 | |
422 | /** |
423 | * Subclasses can override this to do any necessary setup before the lock |
424 | * is acquired. |
425 | * |
426 | * @stable to override |
427 | */ |
428 | public function beforeLock() { |
429 | } |
430 | |
431 | /** |
432 | * Subclasses can override this to do any necessary setup before individual |
433 | * write operations begin. |
434 | * |
435 | * @stable to override |
436 | */ |
437 | protected function startUpdate() { |
438 | } |
439 | |
440 | /** |
441 | * Subclasses can override this to do any updates associated with their |
442 | * link data, for example dispatching HTML update jobs. |
443 | * |
444 | * @stable to override |
445 | */ |
446 | protected function finishUpdate() { |
447 | } |
448 | |
449 | /** |
450 | * Do the common DB operations |
451 | */ |
452 | protected function doWrites() { |
453 | $db = $this->getDB(); |
454 | $table = $this->getTableName(); |
455 | $domainId = $db->getDomainID(); |
456 | $batchSize = $this->getBatchSize(); |
457 | $ticket = $this->getTransactionTicket(); |
458 | |
459 | $deleteBatches = array_chunk( $this->rowsToDelete, $batchSize ); |
460 | foreach ( $deleteBatches as $chunk ) { |
461 | $db->newDeleteQueryBuilder() |
462 | ->deleteFrom( $table ) |
463 | ->where( $db->factorConds( $chunk ) ) |
464 | ->caller( __METHOD__ )->execute(); |
465 | if ( count( $deleteBatches ) > 1 ) { |
466 | $this->lbFactory->commitAndWaitForReplication( __METHOD__, $ticket ); |
467 | } |
468 | } |
469 | |
470 | $insertBatches = array_chunk( $this->rowsToInsert, $batchSize ); |
471 | foreach ( $insertBatches as $insertBatch ) { |
472 | $db->newInsertQueryBuilder() |
473 | ->options( $this->getInsertOptions() ) |
474 | ->insertInto( $table ) |
475 | ->rows( $insertBatch ) |
476 | ->caller( __METHOD__ )->execute(); |
477 | if ( count( $insertBatches ) > 1 ) { |
478 | $this->lbFactory->commitAndWaitForReplication( __METHOD__, $ticket ); |
479 | } |
480 | } |
481 | } |
482 | |
483 | /** |
484 | * Omit conflict resolution options from the insert query so that testing |
485 | * can confirm that the incremental update logic was correct. |
486 | * |
487 | * @param bool $mode |
488 | */ |
489 | public function setStrictTestMode( $mode = true ) { |
490 | $this->strictTestMode = $mode; |
491 | } |
492 | |
493 | /** |
494 | * Get the options for the insert queries |
495 | * |
496 | * @return array |
497 | */ |
498 | protected function getInsertOptions() { |
499 | if ( $this->strictTestMode ) { |
500 | return []; |
501 | } else { |
502 | return [ 'IGNORE' ]; |
503 | } |
504 | } |
505 | |
506 | /** |
507 | * Get an array or iterator of link IDs of a given type. Some subclasses |
508 | * use this to provide typed data to callers. This is not public because |
509 | * link IDs are a private concept. |
510 | * |
511 | * @param int $setType One of the class constants: self::INSERTED, self::DELETED, |
512 | * self::CHANGED, self::OLD or self::NEW. |
513 | * @return iterable<mixed> |
514 | */ |
515 | protected function getLinkIDs( $setType ) { |
516 | switch ( $setType ) { |
517 | case self::INSERTED: |
518 | return $this->insertedLinks; |
519 | |
520 | case self::DELETED: |
521 | return $this->deletedLinks; |
522 | |
523 | case self::CHANGED: |
524 | return array_merge( $this->insertedLinks, $this->deletedLinks ); |
525 | |
526 | case self::OLD: |
527 | return $this->getExistingLinkIDs(); |
528 | |
529 | case self::NEW: |
530 | return $this->getNewLinkIDs(); |
531 | |
532 | default: |
533 | throw new \InvalidArgumentException( __METHOD__ . ": Unknown link type" ); |
534 | } |
535 | } |
536 | |
537 | /** |
538 | * Normalization stage of the links table (see T222224) |
539 | * @return int |
540 | */ |
541 | protected function linksTargetNormalizationStage(): int { |
542 | return SCHEMA_COMPAT_OLD; |
543 | } |
544 | } |