Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
60.75% |
65 / 107 |
|
38.46% |
5 / 13 |
CRAP | |
0.00% |
0 / 1 |
ChangeListener | |
60.75% |
65 / 107 |
|
38.46% |
5 / 13 |
93.93 | |
0.00% |
0 / 1 |
create | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
__construct | |
0.00% |
0 / 5 |
|
0.00% |
0 / 1 |
2 | |||
onArticleRevisionVisibilitySet | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
onLinksUpdateComplete | |
82.76% |
24 / 29 |
|
0.00% |
0 / 1 |
5.13 | |||
onUploadComplete | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
2 | |||
onPageDelete | |
100.00% |
7 / 7 |
|
100.00% |
1 / 1 |
3 | |||
onPageDeleteComplete | |
100.00% |
10 / 10 |
|
100.00% |
1 / 1 |
2 | |||
onTitleMove | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
onPageMoveComplete | |
0.00% |
0 / 15 |
|
0.00% |
0 / 1 |
12 | |||
prepareTitlesForLinksUpdate | |
100.00% |
13 / 13 |
|
100.00% |
1 / 1 |
4 | |||
pickFromArray | |
0.00% |
0 / 11 |
|
0.00% |
0 / 1 |
30 | |||
getConnection | |
0.00% |
0 / 3 |
|
0.00% |
0 / 1 |
6 | |||
onPageUndeleteComplete | |
88.89% |
8 / 9 |
|
0.00% |
0 / 1 |
2.01 |
1 | <?php |
2 | |
3 | namespace CirrusSearch; |
4 | |
5 | use CirrusSearch\Job\CirrusTitleJob; |
6 | use CirrusSearch\Job\DeletePages; |
7 | use CirrusSearch\Job\LinksUpdate; |
8 | use JobQueueGroup; |
9 | use ManualLogEntry; |
10 | use MediaWiki\Config\ConfigFactory; |
11 | use MediaWiki\Deferred\DeferredUpdates; |
12 | use MediaWiki\Hook\ArticleRevisionVisibilitySetHook; |
13 | use MediaWiki\Hook\LinksUpdateCompleteHook; |
14 | use MediaWiki\Hook\PageMoveCompleteHook; |
15 | use MediaWiki\Hook\TitleMoveHook; |
16 | use MediaWiki\Hook\UploadCompleteHook; |
17 | use MediaWiki\Logger\LoggerFactory; |
18 | use MediaWiki\Page\Hook\PageDeleteCompleteHook; |
19 | use MediaWiki\Page\Hook\PageDeleteHook; |
20 | use MediaWiki\Page\Hook\PageUndeleteCompleteHook; |
21 | use MediaWiki\Page\ProperPageIdentity; |
22 | use MediaWiki\Page\RedirectLookup; |
23 | use MediaWiki\Permissions\Authority; |
24 | use MediaWiki\Revision\RevisionRecord; |
25 | use MediaWiki\Status\Status; |
26 | use MediaWiki\Title\Title; |
27 | use MediaWiki\User\User; |
28 | use MediaWiki\Utils\MWTimestamp; |
29 | use Wikimedia\Assert\Assert; |
30 | use Wikimedia\Rdbms\LoadBalancer; |
31 | |
32 | /** |
33 | * Implementation to all the hooks that CirrusSearch needs to listen in order to keep its index |
34 | * in sync with main SQL database. |
35 | */ |
36 | class ChangeListener extends PageChangeTracker implements |
37 | LinksUpdateCompleteHook, |
38 | TitleMoveHook, |
39 | PageMoveCompleteHook, |
40 | UploadCompleteHook, |
41 | ArticleRevisionVisibilitySetHook, |
42 | PageDeleteHook, |
43 | PageDeleteCompleteHook, |
44 | PageUndeleteCompleteHook |
45 | { |
46 | /** @var JobQueueGroup */ |
47 | private $jobQueue; |
48 | /** @var SearchConfig */ |
49 | private $searchConfig; |
50 | /** @var LoadBalancer */ |
51 | private $loadBalancer; |
52 | /** @var Connection */ |
53 | private $connection; |
54 | |
55 | /** @var array state holding the titles being moved */ |
56 | private $movingTitles = []; |
57 | |
58 | /** @var RedirectLookup */ |
59 | private RedirectLookup $redirectLookup; |
60 | |
61 | public static function create( |
62 | JobQueueGroup $jobQueue, |
63 | ConfigFactory $configFactory, |
64 | LoadBalancer $loadBalancer, |
65 | RedirectLookup $redirectLookup |
66 | ): ChangeListener { |
67 | /** @phan-suppress-next-line PhanTypeMismatchArgumentSuperType $config is actually a SearchConfig */ |
68 | return new self( $jobQueue, $configFactory->makeConfig( "CirrusSearch" ), $loadBalancer, $redirectLookup ); |
69 | } |
70 | |
71 | /** |
72 | * @param JobQueueGroup $jobQueue |
73 | * @param SearchConfig $searchConfig |
74 | * @param LoadBalancer $loadBalancer |
75 | * @param RedirectLookup $redirectLookup |
76 | */ |
77 | public function __construct( |
78 | JobQueueGroup $jobQueue, |
79 | SearchConfig $searchConfig, |
80 | LoadBalancer $loadBalancer, |
81 | RedirectLookup $redirectLookup |
82 | ) { |
83 | parent::__construct(); |
84 | $this->jobQueue = $jobQueue; |
85 | $this->searchConfig = $searchConfig; |
86 | $this->loadBalancer = $loadBalancer; |
87 | $this->redirectLookup = $redirectLookup; |
88 | } |
89 | |
90 | /** |
91 | * Called when a revision is deleted. In theory, we shouldn't need to to this since |
92 | * you can't delete the current text of a page (so we should've already updated when |
93 | * the page was updated last). But we're paranoid, because deleted revisions absolutely |
94 | * should not be in the index. |
95 | * |
96 | * @param Title $title The page title we've had a revision deleted on |
97 | * @param int[] $ids IDs to set the visibility for |
98 | * @param array $visibilityChangeMap Map of revision ID to oldBits and newBits. |
99 | * This array can be examined to determine exactly what visibility bits |
100 | * have changed for each revision. This array is of the form: |
101 | * [id => ['oldBits' => $oldBits, 'newBits' => $newBits], ... ] |
102 | */ |
103 | public function onArticleRevisionVisibilitySet( $title, $ids, $visibilityChangeMap ) { |
104 | $this->jobQueue->lazyPush( LinksUpdate::newPastRevisionVisibilityChange( $title ) ); |
105 | } |
106 | |
107 | /** |
108 | * Hooked to update the search index when pages change directly or when templates that |
109 | * they include change. |
110 | * @param \MediaWiki\Deferred\LinksUpdate\LinksUpdate $linksUpdate |
111 | * @param mixed $ticket Prior result of LBFactory::getEmptyTransactionTicket() |
112 | */ |
113 | public function onLinksUpdateComplete( $linksUpdate, $ticket ) { |
114 | // defer processing the LinksUpdateComplete hook until other hooks tagged in PageChangeTracker |
115 | // have a chance to run. Reason is that we want to detect what are the links updates triggered |
116 | // by a "page change". The definition of a "page change" we use is the one used by EventBus |
117 | // PageChangeHooks. |
118 | DeferredUpdates::addCallableUpdate( function () use ( $linksUpdate ) { |
119 | $linkedArticlesToUpdate = $this->searchConfig->get( 'CirrusSearchLinkedArticlesToUpdate' ); |
120 | $unLinkedArticlesToUpdate = $this->searchConfig->get( 'CirrusSearchUnlinkedArticlesToUpdate' ); |
121 | $updateDelay = $this->searchConfig->get( 'CirrusSearchUpdateDelay' ); |
122 | |
123 | // Titles that are created by a move don't need their own job. |
124 | if ( in_array( $linksUpdate->getTitle()->getPrefixedDBkey(), $this->movingTitles ) ) { |
125 | return; |
126 | } |
127 | |
128 | $params = []; |
129 | if ( $this->searchConfig->get( 'CirrusSearchEnableIncomingLinkCounting' ) ) { |
130 | $params['addedLinks'] = self::prepareTitlesForLinksUpdate( $linksUpdate->getAddedLinks(), |
131 | $linkedArticlesToUpdate ); |
132 | // We exclude links that contains invalid UTF-8 sequences, reason is that page created |
133 | // before T13143 was fixed might sill have bad links the pagelinks table |
134 | // and thus will cause LinksUpdate to believe that these links are removed. |
135 | $params['removedLinks'] = self::prepareTitlesForLinksUpdate( $linksUpdate->getRemovedLinks(), |
136 | $unLinkedArticlesToUpdate, true ); |
137 | } |
138 | |
139 | if ( $this->isPageChange( $linksUpdate->getPageId() ) ) { |
140 | $jobParams = $params + LinksUpdate::buildJobDelayOptions( LinksUpdate::class, |
141 | $updateDelay['prioritized'], $this->jobQueue ); |
142 | $job = LinksUpdate::newPageChangeUpdate( $linksUpdate->getTitle(), |
143 | $linksUpdate->getRevisionRecord(), $jobParams ); |
144 | if ( ( MWTimestamp::time() - $job->params[CirrusTitleJob::ROOT_EVENT_TIME] ) > ( 3600 * 24 ) ) { |
145 | LoggerFactory::getInstance( 'CirrusSearch' )->debug( |
146 | "Scheduled a page-change-update for {title} on a revision created more than 24hours ago, " . |
147 | "the cause is {causeAction}", |
148 | [ |
149 | 'title' => $linksUpdate->getTitle()->getPrefixedDBkey(), |
150 | 'causeAction' => $linksUpdate->getCauseAction() |
151 | ] ); |
152 | } |
153 | } else { |
154 | $job = LinksUpdate::newPageRefreshUpdate( $linksUpdate->getTitle(), |
155 | $params + LinksUpdate::buildJobDelayOptions( LinksUpdate::class, $updateDelay['default'], $this->jobQueue ) ); |
156 | } |
157 | |
158 | $this->jobQueue->lazyPush( $job ); |
159 | } ); |
160 | } |
161 | |
162 | /** |
163 | * Hook into UploadComplete, because overwritten files mistakenly do not trigger |
164 | * LinksUpdateComplete (T344285). Since files do contain indexed metadata |
165 | * we need to refresh the search index when a file is overwritten on an |
166 | * existing title. |
167 | * |
168 | * @param \UploadBase $uploadBase |
169 | */ |
170 | public function onUploadComplete( $uploadBase ) { |
171 | if ( $uploadBase->getTitle()->exists() ) { |
172 | $this->jobQueue->lazyPush( LinksUpdate::newPageChangeUpdate( $uploadBase->getTitle(), null, [] ) ); |
173 | } |
174 | } |
175 | |
176 | /** |
177 | * This hook is called before a page is deleted. |
178 | * |
179 | * @since 1.37 |
180 | * |
181 | * @param ProperPageIdentity $page Page being deleted. |
182 | * @param Authority $deleter Who is deleting the page |
183 | * @param string $reason Reason the page is being deleted |
184 | * @param \StatusValue $status Add any error here |
185 | * @param bool $suppress Whether this is a suppression deletion or not |
186 | * @return bool|void True or no return value to continue; false to abort, which also requires adding |
187 | * a fatal error to $status. |
188 | */ |
189 | public function onPageDelete( |
190 | ProperPageIdentity $page, |
191 | Authority $deleter, |
192 | string $reason, |
193 | \StatusValue $status, |
194 | bool $suppress |
195 | ) { |
196 | parent::onPageDelete( $page, $deleter, $reason, $status, $suppress ); |
197 | // We use this to pick up redirects so we can update their targets. |
198 | // Can't re-use PageDeleteComplete because the page info's |
199 | // already gone |
200 | // If we abort or fail deletion it's no big deal because this will |
201 | // end up being a no-op when it executes. |
202 | $targetLink = $this->redirectLookup->getRedirectTarget( $page ); |
203 | $target = null; |
204 | if ( $targetLink != null ) { |
205 | $target = Title::castFromLinkTarget( $targetLink ); |
206 | } |
207 | if ( $target ) { |
208 | $this->jobQueue->lazyPush( new Job\LinksUpdate( $target, [] ) ); |
209 | } |
210 | } |
211 | |
212 | /** |
213 | * @param ProperPageIdentity $page |
214 | * @param Authority $deleter |
215 | * @param string $reason |
216 | * @param int $pageID |
217 | * @param RevisionRecord $deletedRev |
218 | * @param ManualLogEntry $logEntry |
219 | * @param int $archivedRevisionCount |
220 | * @return void |
221 | */ |
222 | public function onPageDeleteComplete( ProperPageIdentity $page, Authority $deleter, |
223 | string $reason, int $pageID, RevisionRecord $deletedRev, ManualLogEntry $logEntry, |
224 | int $archivedRevisionCount |
225 | ) { |
226 | parent::onPageDeleteComplete( $page, $deleter, $reason, $pageID, $deletedRev, $logEntry, 1 ); |
227 | // Note that we must use the article id provided or it'll be lost in the ether. The job can't |
228 | // load it from the title because the page row has already been deleted. |
229 | $title = Title::castFromPageIdentity( $page ); |
230 | Assert::postcondition( $title !== null, '$page can be cast to a Title' ); |
231 | $this->jobQueue->lazyPush( |
232 | DeletePages::build( |
233 | $title, |
234 | $this->searchConfig->makeId( $pageID ), |
235 | $logEntry->getTimestamp() !== false ? MWTimestamp::convert( TS_UNIX, $logEntry->getTimestamp() ) : MWTimestamp::time() |
236 | ) |
237 | ); |
238 | } |
239 | |
240 | /** |
241 | * Before we've moved a title from $title to $newTitle. |
242 | * |
243 | * @param Title $old Old title |
244 | * @param Title $nt New title |
245 | * @param User $user User who does the move |
246 | * @param string $reason Reason provided by the user |
247 | * @param Status &$status To abort the move, add a fatal error to this object |
248 | * (i.e. call $status->fatal()) |
249 | * @return bool|void True or no return value to continue or false to abort |
250 | */ |
251 | public function onTitleMove( Title $old, Title $nt, User $user, $reason, Status &$status ) { |
252 | $this->movingTitles[] = $old->getPrefixedDBkey(); |
253 | } |
254 | |
255 | /** |
256 | * When we've moved a Title from A to B. |
257 | * @param \MediaWiki\Linker\LinkTarget $old Old title |
258 | * @param \MediaWiki\Linker\LinkTarget $new New title |
259 | * @param \MediaWiki\User\UserIdentity $user User who did the move |
260 | * @param int $pageid Database ID of the page that's been moved |
261 | * @param int $redirid Database ID of the created redirect |
262 | * @param string $reason Reason for the move |
263 | * @param \MediaWiki\Revision\RevisionRecord $revision RevisionRecord created by the move |
264 | * @return bool|void True or no return value to continue or false stop other hook handlers, |
265 | * doesn't abort the move itself |
266 | */ |
267 | public function onPageMoveComplete( |
268 | $old, $new, $user, $pageid, $redirid, |
269 | $reason, $revision |
270 | ) { |
271 | parent::onPageMoveComplete( $old, $new, $user, $pageid, $redirid, $reason, $revision ); |
272 | // When a page is moved the update and delete hooks are good enough to catch |
273 | // almost everything. The only thing they miss is if a page moves from one |
274 | // index to another. That only happens if it switches namespace. |
275 | if ( $old->getNamespace() === $new->getNamespace() ) { |
276 | return; |
277 | } |
278 | |
279 | $conn = $this->getConnection(); |
280 | $oldIndexSuffix = $conn->getIndexSuffixForNamespace( $old->getNamespace() ); |
281 | $newIndexSuffix = $conn->getIndexSuffixForNamespace( $new->getNamespace() ); |
282 | if ( $oldIndexSuffix !== $newIndexSuffix ) { |
283 | $title = Title::newFromLinkTarget( $old ); |
284 | $job = new Job\DeletePages( $title, [ |
285 | 'indexSuffix' => $oldIndexSuffix, |
286 | 'docId' => $this->searchConfig->makeId( $pageid ) |
287 | ] ); |
288 | // Push the job after DB commit but cancel on rollback |
289 | $this->loadBalancer->getConnection( DB_PRIMARY )->onTransactionCommitOrIdle( function () use ( $job ) { |
290 | $this->jobQueue->lazyPush( $job ); |
291 | }, __METHOD__ ); |
292 | } |
293 | } |
294 | |
295 | /** |
296 | * Take a list of titles either linked or unlinked and prepare them for Job\LinksUpdate. |
297 | * This includes limiting them to $max titles. |
298 | * @param Title[] $titles titles to prepare |
299 | * @param int $max maximum number of titles to return |
300 | * @param bool $excludeBadUTF exclude links that contains invalid UTF sequences |
301 | * @return array |
302 | */ |
303 | public static function prepareTitlesForLinksUpdate( $titles, int $max, $excludeBadUTF = false ) { |
304 | $titles = self::pickFromArray( $titles, $max ); |
305 | $dBKeys = []; |
306 | foreach ( $titles as $title ) { |
307 | $key = $title->getPrefixedDBkey(); |
308 | if ( $excludeBadUTF ) { |
309 | $fixedKey = mb_convert_encoding( $key, 'UTF-8', 'UTF-8' ); |
310 | if ( $fixedKey !== $key ) { |
311 | LoggerFactory::getInstance( 'CirrusSearch' ) |
312 | ->warning( "Ignoring title {title} with invalid UTF-8 sequences.", |
313 | [ 'title' => $fixedKey ] ); |
314 | continue; |
315 | } |
316 | } |
317 | $dBKeys[] = $title->getPrefixedDBkey(); |
318 | } |
319 | return $dBKeys; |
320 | } |
321 | |
322 | /** |
323 | * Pick $num random entries from $array. |
324 | * @param array $array Array to pick from |
325 | * @param int $num Number of entries to pick |
326 | * @return array of entries from $array |
327 | */ |
328 | private static function pickFromArray( $array, $num ) { |
329 | if ( $num > count( $array ) ) { |
330 | return $array; |
331 | } |
332 | if ( $num < 1 ) { |
333 | return []; |
334 | } |
335 | $chosen = array_rand( $array, $num ); |
336 | // If $num === 1 then array_rand will return a key rather than an array of keys. |
337 | if ( !is_array( $chosen ) ) { |
338 | return [ $array[ $chosen ] ]; |
339 | } |
340 | $result = []; |
341 | foreach ( $chosen as $key ) { |
342 | $result[] = $array[ $key ]; |
343 | } |
344 | return $result; |
345 | } |
346 | |
347 | private function getConnection(): Connection { |
348 | if ( $this->connection === null ) { |
349 | $this->connection = new Connection( $this->searchConfig ); |
350 | } |
351 | return $this->connection; |
352 | } |
353 | |
354 | /** |
355 | * When article is undeleted - check the archive for other instances of the title, |
356 | * if not there - drop it from the archive. |
357 | * @param ProperPageIdentity $page |
358 | * @param Authority $restorer |
359 | * @param string $reason |
360 | * @param RevisionRecord $restoredRev |
361 | * @param ManualLogEntry $logEntry |
362 | * @param int $restoredRevisionCount |
363 | * @param bool $created |
364 | * @param array $restoredPageIds |
365 | * @return void |
366 | */ |
367 | public function onPageUndeleteComplete( |
368 | ProperPageIdentity $page, |
369 | Authority $restorer, |
370 | string $reason, |
371 | RevisionRecord $restoredRev, |
372 | ManualLogEntry $logEntry, |
373 | int $restoredRevisionCount, |
374 | bool $created, |
375 | array $restoredPageIds |
376 | ): void { |
377 | parent::onPageUndeleteComplete( $page, $restorer, $reason, $restoredRev, $logEntry, |
378 | $restoredRevisionCount, $created, $restoredPageIds ); |
379 | if ( !$this->searchConfig->get( 'CirrusSearchIndexDeletes' ) ) { |
380 | // Not indexing, thus nothing to remove here. |
381 | return; |
382 | } |
383 | $title = Title::castFromPageIdentity( $page ); |
384 | Assert::postcondition( $title !== null, '$page can be cast to a Title' ); |
385 | $this->jobQueue->lazyPush( |
386 | new Job\DeleteArchive( $title, [ 'docIds' => $restoredPageIds ] ) |
387 | ); |
388 | } |
389 | } |