Code Coverage
 
Lines
Functions and Methods
Classes and Traits
Total
92.13% covered (success)
92.13%
164 / 178
66.67% covered (warning)
66.67%
6 / 9
CRAP
0.00% covered (danger)
0.00%
0 / 1
PageChangeEventIngress
92.13% covered (success)
92.13%
164 / 178
66.67% covered (warning)
66.67%
6 / 9
33.53
0.00% covered (danger)
0.00%
0 / 1
 __construct
100.00% covered (success)
100.00%
15 / 15
100.00% covered (success)
100.00%
1 / 1
1
 lookupRedirectTarget
88.24% covered (warning)
88.24%
15 / 17
0.00% covered (danger)
0.00%
0 / 1
7.08
 sendEvents
100.00% covered (success)
100.00%
4 / 4
100.00% covered (success)
100.00%
1 / 1
1
 handlePageLatestRevisionChangedEvent
96.67% covered (success)
96.67%
29 / 30
0.00% covered (danger)
0.00%
0 / 1
5
 isContentChangeCause
100.00% covered (success)
100.00%
5 / 5
100.00% covered (success)
100.00%
1 / 1
5
 handlePageDeletedEvent
100.00% covered (success)
100.00%
21 / 21
100.00% covered (success)
100.00%
1 / 1
4
 handlePageMovedEvent
100.00% covered (success)
100.00%
30 / 30
100.00% covered (success)
100.00%
1 / 1
2
 handlePageCreatedEvent
100.00% covered (success)
100.00%
20 / 20
100.00% covered (success)
100.00%
1 / 1
3
 handlePageHistoryVisibilityChangedEvent
69.44% covered (warning)
69.44%
25 / 36
0.00% covered (danger)
0.00%
0 / 1
5.71
1<?php
2/**
3 * This program is free software; you can redistribute it and/or modify
4 * it under the terms of the GNU General Public License as published by
5 * the Free Software Foundation; either version 2 of the License, or
6 * (at your option) any later version.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public License along
14 * with this program; if not, write to the Free Software Foundation, Inc.,
15 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
16 * http://www.gnu.org/copyleft/gpl.html
17 *
18 * @file
19 * @author Andrew Otto <otto@wikimedia.org>
20 * @author Gabriele Modena <gmodena@wikimedia.org>
21 */
22
23declare( strict_types=1 );
24
25namespace MediaWiki\Extension\EventBus\MediaWikiEventSubscribers;
26
27use InvalidArgumentException;
28use MediaWiki\Deferred\DeferredUpdates;
29use MediaWiki\DomainEvent\DomainEventIngress;
30use MediaWiki\Extension\EventBus\EventBusFactory;
31use MediaWiki\Extension\EventBus\Redirects\RedirectTarget;
32use MediaWiki\Extension\EventBus\Serializers\EventSerializer;
33use MediaWiki\Extension\EventBus\Serializers\MediaWiki\PageChangeEventSerializer;
34use MediaWiki\Extension\EventBus\Serializers\MediaWiki\PageEntitySerializer;
35use MediaWiki\Extension\EventBus\Serializers\MediaWiki\RevisionEntitySerializer;
36use MediaWiki\Extension\EventBus\Serializers\MediaWiki\UserEntitySerializer;
37use MediaWiki\Extension\EventBus\StreamNameMapper;
38use MediaWiki\Logger\LoggerFactory;
39use MediaWiki\Page\Event\PageCreatedEvent;
40use MediaWiki\Page\Event\PageCreatedListener;
41use MediaWiki\Page\Event\PageDeletedEvent;
42use MediaWiki\Page\Event\PageDeletedListener;
43use MediaWiki\Page\Event\PageHistoryVisibilityChangedEvent;
44use MediaWiki\Page\Event\PageHistoryVisibilityChangedListener;
45use MediaWiki\Page\Event\PageLatestRevisionChangedEvent;
46use MediaWiki\Page\Event\PageLatestRevisionChangedListener;
47use MediaWiki\Page\Event\PageMovedEvent;
48use MediaWiki\Page\Event\PageMovedListener;
49use MediaWiki\Page\PageLookup;
50use MediaWiki\Page\PageReference;
51use MediaWiki\Page\RedirectLookup;
52use MediaWiki\Page\WikiPage;
53use MediaWiki\Revision\RevisionStore;
54use MediaWiki\Storage\PageUpdateCauses;
55use MediaWiki\User\UserFactory;
56use Psr\Log\LoggerInterface;
57use RuntimeException;
58use UnexpectedValueException;
59use Wikimedia\Rdbms\IDBAccessObject;
60use Wikimedia\Timestamp\TimestampException;
61
62/**
63 * Handles PageRevisionUpdated events by forwarding page edits to EventGate.
64 */
65class PageChangeEventIngress extends DomainEventIngress implements
66    PageLatestRevisionChangedListener,
67    PageDeletedListener,
68    PageMovedListener,
69    PageCreatedListener,
70    PageHistoryVisibilityChangedListener
71{
72    public const PAGE_CHANGE_STREAM_NAME_DEFAULT = "mediawiki.page_change.v1";
73
74    /**
75     * Name of the stream that events will be produced to.
76     * @var string
77     */
78    private string $streamName;
79
80    /**
81     * @var LoggerInterface
82     */
83    private LoggerInterface $logger;
84
85    /**
86     * @var EventBusFactory
87     */
88    private EventBusFactory $eventBusFactory;
89
90    /**
91     * @var PageChangeEventSerializer
92     */
93    private PageChangeEventSerializer $pageChangeEventSerializer;
94
95    /**
96     * @var UserFactory
97     */
98    private UserFactory $userFactory;
99
100    /**
101     * @var RevisionStore
102     */
103    private RevisionStore $revisionStore;
104
105    /**
106     * @var RedirectLookup
107     */
108    private RedirectLookup $redirectLookup;
109
110    /**
111     * @var PageLookup
112     */
113    private PageLookup $pageLookup;
114
115    public function __construct(
116        EventBusFactory $eventBusFactory,
117        StreamNameMapper $streamNameMapper,
118        EventSerializer $eventSerializer,
119        PageEntitySerializer $pageEntitySerializer,
120        UserEntitySerializer $userEntitySerializer,
121        RevisionEntitySerializer $revisionEntitySerializer,
122        UserFactory $userFactory,
123        RevisionStore $revisionStore,
124        RedirectLookup $redirectLookup,
125        PageLookup $pageLookup,
126    ) {
127        $this->logger = LoggerFactory::getInstance( 'EventBus.PageChangeEventIngress' );
128
129        $this->streamName = $streamNameMapper->resolve(
130            self::PAGE_CHANGE_STREAM_NAME_DEFAULT
131        );
132
133        $this->eventBusFactory = $eventBusFactory;
134
135        $this->pageChangeEventSerializer = new PageChangeEventSerializer(
136            $eventSerializer,
137            $pageEntitySerializer,
138            $userEntitySerializer,
139            $revisionEntitySerializer,
140        );
141
142        $this->userFactory = $userFactory;
143        $this->revisionStore = $revisionStore;
144        $this->redirectLookup = $redirectLookup;
145        $this->pageLookup = $pageLookup;
146    }
147
148    /**
149     * Returns a redirect target of supplied {@link PageReference}, if any.
150     *
151     * If the page reference does not represent a redirect, `null` is returned.
152     *
153     * See {@link RedirectTarget} for the meaning of its properties.
154     *
155     * TODO visible for testing only, move into RedirectLookup?
156     *
157     * @param PageReference $page
158     * @param PageLookup $pageLookup
159     * @param RedirectLookup $redirectLookup
160     * @return RedirectTarget|null
161     * @see RedirectTarget
162     */
163    public static function lookupRedirectTarget(
164        PageReference $page, PageLookup $pageLookup, RedirectLookup $redirectLookup
165    ): ?RedirectTarget {
166        if ( $page instanceof WikiPage ) {
167            // RedirectLookup doesn't support reading from the primary db, but we
168            // need the value from the new edit. Fetch directly through WikiPage which
169            // was updated with the new value as part of saving the new revision.
170            $redirectLinkTarget = $page->getRedirectTarget();
171        } else {
172            $redirectSourcePageReference =
173                $pageLookup->getPageByReference(
174                    $page,
175                    \Wikimedia\Rdbms\IDBAccessObject::READ_LATEST
176                );
177
178            $redirectLinkTarget =
179                $redirectSourcePageReference != null && $redirectSourcePageReference->isRedirect()
180                    ? $redirectLookup->getRedirectTarget( $redirectSourcePageReference ) : null;
181        }
182
183        if ( $redirectLinkTarget != null ) {
184            if ( !$redirectLinkTarget->isExternal() ) {
185                try {
186                    $redirectTargetPage = $pageLookup->getPageForLink( $redirectLinkTarget );
187
188                    return new RedirectTarget( $redirectLinkTarget, $redirectTargetPage );
189                } catch ( InvalidArgumentException ) {
190                    // silently ignore failed lookup, they are expected for anything but page targets
191                }
192            }
193
194            return new RedirectTarget( $redirectLinkTarget );
195        }
196
197        return null;
198    }
199
200    private function sendEvents(
201        string $streamName,
202        array $events
203    ): void {
204        $eventBus = $this->eventBusFactory->getInstanceForStream( $streamName );
205        DeferredUpdates::addCallableUpdate( static function () use ( $eventBus, $events ) {
206            $eventBus->send( $events );
207        } );
208    }
209
210    /**
211     * Handles a `PageLatestRevisionChangedEvent` and emits a corresponding page change event.
212     *
213     * This method is triggered when a page revision is updated. It filters out
214     * null edits (which do not change the page content) and constructs either
215     * a creation or edit event for downstream consumers, depending on the nature
216     * of the change.
217     *
218     * Null edits are ignored, as they are intended only to trigger side-effects
219     * and do not represent a meaningful change to page content.
220     *
221     * @param PageLatestRevisionChangedEvent $event
222     *   The domain event carrying information about the page revision update, including
223     *   the page ID, revision data, user identity, and edit result.
224     */
225    public function handlePageLatestRevisionChangedEvent(
226        PageLatestRevisionChangedEvent $event
227    ): void {
228        if ( $this->isContentChangeCause( $event ) ) {
229            // Null edits are only useful to trigger side-effects, and would be
230            //   confusing to consumers of these events.  Since these would not be able to
231            //   change page state, they also don't belong in here.  If filtering them out
232            //   breaks a downstream consumer, we should send them to a different stream.
233            if ( $event->getEditResult() && $event->getEditResult()->isNullEdit() ) {
234                return;
235            }
236
237            $performer = $this->userFactory->newFromUserIdentity( $event->getPerformer() );
238            $revisionRecord = $event->getLatestRevisionAfter();
239
240            $redirectTarget =
241                self::lookupRedirectTarget(
242                    $event->getPage(),
243                    $this->pageLookup,
244                    $this->redirectLookup
245                );
246
247            $pageChangeEvent = $event->isCreation()
248                ? $this->pageChangeEventSerializer->toCreateEvent(
249                    $this->streamName,
250                    $event->getPage(),
251                    $performer,
252                    $revisionRecord,
253                    $redirectTarget
254                )
255                : $this->pageChangeEventSerializer->toEditEvent(
256                    $this->streamName,
257                    $event->getPage(),
258                    $performer,
259                    $revisionRecord,
260                    $redirectTarget,
261                    $this->revisionStore->getRevisionById(
262                        $event->getPageRecordBefore()->getLatest()
263                    )
264                );
265
266            $this->sendEvents( $this->streamName, [ $pageChangeEvent ] );
267        }
268    }
269
270    /**
271     * Whether $event was emitted as a result of an action that modified content;
272     * this should match the code paths that previously would trigger onPageSaveComplete
273     * callbacks.
274     *
275     * @param PageLatestRevisionChangedEvent $event
276     * @return bool
277     */
278    private function isContentChangeCause( PageLatestRevisionChangedEvent $event ): bool {
279        return $event->getCause() === PageUpdateCauses::CAUSE_EDIT ||
280            $event->getCause() === PageUpdateCauses::CAUSE_IMPORT ||
281            $event->getCause() === PageUpdateCauses::CAUSE_ROLLBACK ||
282            $event->getCause() === PageUpdateCauses::CAUSE_UNDO ||
283            $event->getCause() === PageUpdateCauses::CAUSE_UPLOAD;
284    }
285
286    /**
287     * Handle a page deletion event by creating and sending a corresponding page change event.
288     *
289     * This method processes page deletion events and transforms them into page change events
290     * that can be consumed by event subscribers. It handles both regular deletions and
291     * suppressed deletions (where performer information is withheld).
292     *
293     * The generated event includes:
294     * - Page metadata (ID, title, etc.)
295     * - Deletion details (reason, timestamp, number of revisions deleted)
296     * - Performer information (unless suppressed)
297     * - Redirect target information (if the deleted page was a redirect)
298     *
299     * For suppressed deletions (oversight/revision deletion), performer information
300     * is intentionally omitted from the event for security reasons.
301     * See: https://phabricator.wikimedia.org/T342487
302     *
303     * @param PageDeletedEvent $event The page deletion event to process
304     * @throws TimestampException
305     * @see PageChangeEventSerializer::toDeleteEvent() For the event format
306     */
307    public function handlePageDeletedEvent( PageDeletedEvent $event ): void {
308        $deletedRev = $event->getLatestRevisionBefore();
309
310        // Don't set performer in the event if this delete suppresses the page from other admins.
311        // https://phabricator.wikimedia.org/T342487
312        $performerForEvent = $event->isSuppressed() ?
313            null :
314            $this->userFactory->newFromUserIdentity( $event->getPerformer() );
315
316        $redirectTarget = null;
317
318        if ( $event->wasRedirect() ) {
319            $targetBefore = $event->getRedirectTargetBefore();
320            if ( $targetBefore ) {
321                $redirectTarget = new RedirectTarget( $targetBefore );
322            }
323        }
324
325        $pageChangeEvent = $this->pageChangeEventSerializer->toDeleteEvent(
326            $this->streamName,
327            $event->getDeletedPage(),
328            $performerForEvent,
329            $deletedRev,
330            $event->getReason(),
331            $event->getEventTimestamp()->getTimestamp(),
332            $event->getArchivedRevisionCount(),
333            $redirectTarget,
334            $event->isSuppressed()
335        );
336
337        $this->sendEvents( $this->streamName, [ $pageChangeEvent ] );
338    }
339
340    /**
341     * Handles a page moved event by generating and sending a corresponding
342     * page change event.
343     *
344     * This method processes a `PageMovedEvent`, retrieves the necessary page state
345     * before and after the move, obtains user and revision context, identifies
346     * whether a redirect was created, and serializes all this information into
347     * a page change move event.
348     *
349     * Passes $event->getPageRecordBefore() directly to toMoveEvent, which accepts LinkTarget|PageReference.
350     *
351     * @param PageMovedEvent $event The event representing a page move, including
352     *                              references to the page before and after the move,
353     *                              the performing user, reason for the move, and
354     *                              any redirect that may have been created.
355     *
356     * @throws InvalidArgumentException If the moved-to page could not be found
357     *                                  using the latest page data.
358     */
359    public function handlePageMovedEvent( PageMovedEvent $event ): void {
360        if ( !$event->getPageRecordAfter()->exists() ) {
361            throw new InvalidArgumentException(
362                "No page moved from '{$event->getPageRecordBefore()->getDBkey()}"
363                . "to '{$event->getPageRecordAfter()->getDBkey()}'"
364                . " with ID {$event->getPageId()} could be found"
365            );
366        }
367
368        $performer = $this->userFactory->newFromUserIdentity( $event->getPerformer() );
369
370        $redirectTarget =
371            self::lookupRedirectTarget(
372                $event->getPageRecordAfter(), $this->pageLookup,
373                $this->redirectLookup
374            );
375
376        // The parentRevision is needed since a page move creates a new revision.
377        $revision = $this->revisionStore->getRevisionById(
378            $event->getPageRecordAfter()->getLatest()
379        );
380        $parentRevision = $this->revisionStore->getRevisionById(
381            $event->getPageRecordBefore()->getLatest()
382        );
383
384        $event = $this->pageChangeEventSerializer->toMoveEvent(
385            $this->streamName,
386            $event->getPageRecordAfter(),
387            $performer,
388            $revision,
389            $parentRevision,
390            $event->getPageRecordBefore(),
391            $event->getReason(),
392            $event->getRedirectPage(),
393            $redirectTarget
394        );
395
396        $this->sendEvents( $this->streamName, [ $event ] );
397    }
398
399    /**
400     * Handles `PageCreatedEvent` emitted after a page as been undeleted
401     * (e.g. a proper undelete into a new page).
402     *
403     * @param PageCreatedEvent $event
404     * @return void
405     * @throws TimestampException
406     */
407    public function handlePageCreatedEvent( PageCreatedEvent $event ): void {
408        if ( $event->getCause() === PageUpdateCauses::CAUSE_UNDELETE ) {
409            $performer = $this->userFactory->newFromUserIdentity( $event->getPerformer() );
410
411            $redirectTarget =
412                self::lookupRedirectTarget(
413                    $event->getPageRecordAfter(),
414                    $this->pageLookup,
415                    $this->redirectLookup
416                );
417
418            // TODO: replace with $event->getPageRecordBefore()?->getId();
419            //  once EventBus CI fully adopts php 8.
420            $oldPage = $event->getPageRecordBefore();
421
422            $event = $this->pageChangeEventSerializer->toUndeleteEvent(
423                $this->streamName,
424                $event->getPageRecordAfter(),
425                $performer,
426                $event->getLatestRevisionAfter(),
427                $event->getReason(),
428                $redirectTarget,
429                $event->getEventTimestamp()->getTimestamp(),
430                ( $oldPage !== null ) ? $oldPage->getId() : null
431            );
432
433            $this->sendEvents( $this->streamName, [ $event ] );
434        }
435    }
436
437    /**
438     * Handles `PageHistoryVisibilityChangedEvent` events.
439     *
440     * This method checks whether the visibility of the current revision of a page has changed.
441     * If so, it emits a corresponding `visibility_change` event to the configured stream.
442     *
443     * Notes:
444     * - Uses primary DB reads to prevent leaking suppressed data due to replication lag.
445     * - Emits private events when suppression occurs to match MediaWiki log visibility conventions.
446     *
447     * @param PageHistoryVisibilityChangedEvent $event
448     *
449     * @throws RuntimeException
450     * @throws UnexpectedValueException
451     * @throws TimestampException
452     */
453    public function handlePageHistoryVisibilityChangedEvent( PageHistoryVisibilityChangedEvent $event ): void {
454        $pageId = $event->getPageId();
455        $currentRevId = $event->getCurrentRevisionId();
456
457        // If the visibility was not changed on the current revision of the page,
458        // then we can return early.
459        // PageChange only represents changes to the current state of the page.
460        if ( !$event->wasCurrentRevisionAffected() ) {
461            $this->logger->debug(
462                "Revision visibility on page $pageId current revision $currentRevId " .
463                "was not changed. Not emitting event."
464            );
465            return;
466        }
467
468        // Read from primary since due to replication lag the updated field visibility
469        // might not yet be available on a replica, and we are at risk of leaking
470        // just recently suppressed data.
471        $revisionRecord = $this->revisionStore->getRevisionByPageId(
472            $pageId,
473            $currentRevId,
474            IDBAccessObject::READ_LATEST
475        );
476
477        if ( $revisionRecord === null ) {
478            throw new RuntimeException(
479                "Failed looking up page $pageId revision $currentRevId " .
480                "when checking if a visibility change event should be emitted to stream " .
481                $this->streamName
482            );
483        }
484
485        // current revision's visibility should be the same as we are given in
486        // $visibilityChanges['newBits']. Just in case, assert that this is true.
487        if ( $revisionRecord->getVisibility() != $event->getVisibilityAfter( $currentRevId ) ) {
488            throw new UnexpectedValueException(
489                "Page $pageId revision $currentRevId's' visibility did not match the " .
490                'expected visibility change provided by event. Revision visibility is ' .
491                $revisionRecord->getVisibility() . '. visibility changed to ' .
492                $event->getVisibilityAfter( $currentRevId )
493            );
494        }
495
496        // If this revision is 'suppressed' AKA restricted, then the person performing
497        // 'RevisionDelete' should not be visible in public data.
498        // https://phabricator.wikimedia.org/T342487
499        //
500        // NOTE: This event stream tries to match the visibility of MediaWiki core logs,
501        // where regular delete/revision events are public, and suppress/revision events
502        // are private. In MediaWiki core logs, private events are fully hidden from
503        // the public.  Here, we need to produce a 'private' event to the
504        // mediawiki.page_change stream, to indicate to consumers that
505        // they should also 'suppress' the revision.  When this is done, we need to
506        // make sure that we do not reproduce the data that has been suppressed
507        // in the event itself.  E.g. if the username of the editor of the revision has been
508        // suppressed, we should not include any information about that editor in the event.
509        $performerForEvent = $event->isSuppressed() ? null : $event->getPerformer();
510
511        $event = $this->pageChangeEventSerializer->toVisibilityChangeEvent(
512            $this->streamName,
513            $event->getPage(),
514            $performerForEvent,
515            $revisionRecord,
516            $event->getVisibilityBefore( $currentRevId ),
517            $event->getEventTimestamp()->getTimestamp()
518        );
519
520        $this->sendEvents( $this->streamName, [ $event ] );
521    }
522}