Code Coverage
 
Lines
Functions and Methods
Classes and Traits
Total
92.31% covered (success)
92.31%
168 / 182
66.67% covered (warning)
66.67%
6 / 9
CRAP
0.00% covered (danger)
0.00%
0 / 1
PageChangeEventIngress
92.31% covered (success)
92.31%
168 / 182
66.67% covered (warning)
66.67%
6 / 9
33.50
0.00% covered (danger)
0.00%
0 / 1
 __construct
100.00% covered (success)
100.00%
19 / 19
100.00% covered (success)
100.00%
1 / 1
1
 lookupRedirectTarget
88.24% covered (warning)
88.24%
15 / 17
0.00% covered (danger)
0.00%
0 / 1
7.08
 sendEvents
100.00% covered (success)
100.00%
4 / 4
100.00% covered (success)
100.00%
1 / 1
1
 handlePageRevisionUpdatedEvent
96.67% covered (success)
96.67%
29 / 30
0.00% covered (danger)
0.00%
0 / 1
5
 isContentChangeCause
100.00% covered (success)
100.00%
5 / 5
100.00% covered (success)
100.00%
1 / 1
5
 handlePageDeletedEvent
100.00% covered (success)
100.00%
21 / 21
100.00% covered (success)
100.00%
1 / 1
4
 handlePageMovedEvent
100.00% covered (success)
100.00%
30 / 30
100.00% covered (success)
100.00%
1 / 1
2
 handlePageCreatedEvent
100.00% covered (success)
100.00%
20 / 20
100.00% covered (success)
100.00%
1 / 1
3
 handlePageHistoryVisibilityChangedEvent
69.44% covered (warning)
69.44%
25 / 36
0.00% covered (danger)
0.00%
0 / 1
5.71
1<?php
2/**
3 * This program is free software; you can redistribute it and/or modify
4 * it under the terms of the GNU General Public License as published by
5 * the Free Software Foundation; either version 2 of the License, or
6 * (at your option) any later version.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public License along
14 * with this program; if not, write to the Free Software Foundation, Inc.,
15 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
16 * http://www.gnu.org/copyleft/gpl.html
17 *
18 * @file
19 * @author Andrew Otto <otto@wikimedia.org>
20 * @author Gabriele Modena <gmodena@wikimedia.org>
21 */
22
23declare( strict_types=1 );
24
25namespace MediaWiki\Extension\EventBus\MediaWikiEventSubscribers;
26
27use IDBAccessObject;
28use InvalidArgumentException;
29use MediaWiki\Config\Config;
30use MediaWiki\Content\ContentHandlerFactory;
31use MediaWiki\Deferred\DeferredUpdates;
32use MediaWiki\DomainEvent\DomainEventIngress;
33use MediaWiki\Extension\EventBus\EventBusFactory;
34use MediaWiki\Extension\EventBus\Redirects\RedirectTarget;
35use MediaWiki\Extension\EventBus\Serializers\EventSerializer;
36use MediaWiki\Extension\EventBus\Serializers\MediaWiki\PageChangeEventSerializer;
37use MediaWiki\Extension\EventBus\Serializers\MediaWiki\PageEntitySerializer;
38use MediaWiki\Extension\EventBus\Serializers\MediaWiki\RevisionEntitySerializer;
39use MediaWiki\Extension\EventBus\Serializers\MediaWiki\RevisionSlotEntitySerializer;
40use MediaWiki\Extension\EventBus\Serializers\MediaWiki\UserEntitySerializer;
41use MediaWiki\Extension\EventBus\StreamNameMapper;
42use MediaWiki\Http\Telemetry;
43use MediaWiki\Logger\LoggerFactory;
44use MediaWiki\Page\Event\PageCreatedEvent;
45use MediaWiki\Page\Event\PageCreatedListener;
46use MediaWiki\Page\Event\PageDeletedEvent;
47use MediaWiki\Page\Event\PageDeletedListener;
48use MediaWiki\Page\Event\PageHistoryVisibilityChangedEvent;
49use MediaWiki\Page\Event\PageHistoryVisibilityChangedListener;
50use MediaWiki\Page\Event\PageMovedEvent;
51use MediaWiki\Page\Event\PageMovedListener;
52use MediaWiki\Page\Event\PageRevisionUpdatedEvent;
53use MediaWiki\Page\Event\PageRevisionUpdatedListener;
54use MediaWiki\Page\PageLookup;
55use MediaWiki\Page\PageReference;
56use MediaWiki\Page\RedirectLookup;
57use MediaWiki\Page\WikiPage;
58use MediaWiki\Revision\RevisionStore;
59use MediaWiki\Storage\PageUpdateCauses;
60use MediaWiki\Title\TitleFormatter;
61use MediaWiki\User\CentralId\CentralIdLookup;
62use MediaWiki\User\UserFactory;
63use MediaWiki\User\UserGroupManager;
64use Psr\Log\LoggerInterface;
65use RuntimeException;
66use UnexpectedValueException;
67use Wikimedia\Timestamp\TimestampException;
68use Wikimedia\UUID\GlobalIdGenerator;
69
70/**
71 * Handles PageRevisionUpdated events by forwarding page edits to EventGate.
72 */
73class PageChangeEventIngress extends DomainEventIngress implements
74    PageRevisionUpdatedListener,
75    PageDeletedListener,
76    PageMovedListener,
77    PageCreatedListener,
78    PageHistoryVisibilityChangedListener
79{
80    public const PAGE_CHANGE_STREAM_NAME_DEFAULT = "mediawiki.page_change.v1";
81
82    /**
83     * Name of the stream that events will be produced to.
84     * @var string
85     */
86    private string $streamName;
87
88    /**
89     * @var LoggerInterface
90     */
91    private LoggerInterface $logger;
92
93    /**
94     * @var EventBusFactory
95     */
96    private EventBusFactory $eventBusFactory;
97
98    /**
99     * @var PageChangeEventSerializer
100     */
101    private PageChangeEventSerializer $pageChangeEventSerializer;
102
103    /**
104     * @var UserFactory
105     */
106    private UserFactory $userFactory;
107
108    /**
109     * @var RevisionStore
110     */
111    private RevisionStore $revisionStore;
112
113    /**
114     * @var RedirectLookup
115     */
116    private RedirectLookup $redirectLookup;
117
118    /**
119     * @var PageLookup
120     */
121    private PageLookup $pageLookup;
122
123    public function __construct(
124        EventBusFactory $eventBusFactory,
125        StreamNameMapper $streamNameMapper,
126        Config $mainConfig,
127        GlobalIdGenerator $globalIdGenerator,
128        UserGroupManager $userGroupManager,
129        TitleFormatter $titleFormatter,
130        UserFactory $userFactory,
131        RevisionStore $revisionStore,
132        ContentHandlerFactory $contentHandlerFactory,
133        RedirectLookup $redirectLookup,
134        PageLookup $pageLookup,
135        CentralIdLookup $centralIdLookup,
136    ) {
137        $this->logger = LoggerFactory::getInstance( 'EventBus.PageChangeEventIngress' );
138
139        $this->streamName = $streamNameMapper->resolve(
140            self::PAGE_CHANGE_STREAM_NAME_DEFAULT
141        );
142
143        $this->eventBusFactory = $eventBusFactory;
144
145        $userEntitySerializer = new UserEntitySerializer( $userFactory, $userGroupManager, $centralIdLookup );
146
147        $this->pageChangeEventSerializer = new PageChangeEventSerializer(
148            new EventSerializer( $mainConfig, $globalIdGenerator, Telemetry::getInstance() ),
149            new PageEntitySerializer( $mainConfig, $titleFormatter ),
150            $userEntitySerializer,
151            new RevisionEntitySerializer(
152                new RevisionSlotEntitySerializer( $contentHandlerFactory ),
153                $userEntitySerializer
154            )
155        );
156
157        $this->userFactory = $userFactory;
158        $this->revisionStore = $revisionStore;
159        $this->redirectLookup = $redirectLookup;
160        $this->pageLookup = $pageLookup;
161    }
162
163    /**
164     * Returns a redirect target of supplied {@link PageReference}, if any.
165     *
166     * If the page reference does not represent a redirect, `null` is returned.
167     *
168     * See {@link RedirectTarget} for the meaning of its properties.
169     *
170     * TODO visible for testing only, move into RedirectLookup?
171     *
172     * @param PageReference $page
173     * @param PageLookup $pageLookup
174     * @param RedirectLookup $redirectLookup
175     * @return RedirectTarget|null
176     * @see RedirectTarget
177     */
178    public static function lookupRedirectTarget(
179        PageReference $page, PageLookup $pageLookup, RedirectLookup $redirectLookup
180    ): ?RedirectTarget {
181        if ( $page instanceof WikiPage ) {
182            // RedirectLookup doesn't support reading from the primary db, but we
183            // need the value from the new edit. Fetch directly through WikiPage which
184            // was updated with the new value as part of saving the new revision.
185            $redirectLinkTarget = $page->getRedirectTarget();
186        } else {
187            $redirectSourcePageReference =
188                $pageLookup->getPageByReference(
189                    $page,
190                    \Wikimedia\Rdbms\IDBAccessObject::READ_LATEST
191                );
192
193            $redirectLinkTarget =
194                $redirectSourcePageReference != null && $redirectSourcePageReference->isRedirect()
195                    ? $redirectLookup->getRedirectTarget( $redirectSourcePageReference ) : null;
196        }
197
198        if ( $redirectLinkTarget != null ) {
199            if ( !$redirectLinkTarget->isExternal() ) {
200                try {
201                    $redirectTargetPage = $pageLookup->getPageForLink( $redirectLinkTarget );
202
203                    return new RedirectTarget( $redirectLinkTarget, $redirectTargetPage );
204                } catch ( InvalidArgumentException ) {
205                    // silently ignore failed lookup, they are expected for anything but page targets
206                }
207            }
208
209            return new RedirectTarget( $redirectLinkTarget );
210        }
211
212        return null;
213    }
214
215    private function sendEvents(
216        string $streamName,
217        array $events
218    ): void {
219        $eventBus = $this->eventBusFactory->getInstanceForStream( $streamName );
220        DeferredUpdates::addCallableUpdate( static function () use ( $eventBus, $events ) {
221            $eventBus->send( $events );
222        } );
223    }
224
225    /**
226     * Handles a `PageRevisionUpdatedEvent` and emits a corresponding page change event.
227     *
228     * This method is triggered when a page revision is updated. It filters out
229     * null edits (which do not change the page content) and constructs either
230     * a creation or edit event for downstream consumers, depending on the nature
231     * of the change.
232     *
233     * Null edits are ignored, as they are intended only to trigger side-effects
234     * and do not represent a meaningful change to page content.
235     *
236     * @param PageRevisionUpdatedEvent $event
237     *   The domain event carrying information about the page revision update, including
238     *   the page ID, revision data, user identity, and edit result.
239     */
240    public function handlePageRevisionUpdatedEvent( PageRevisionUpdatedEvent $event ): void {
241        if ( $this->isContentChangeCause( $event ) ) {
242            // Null edits are only useful to trigger side-effects, and would be
243            //   confusing to consumers of these events.  Since these would not be able to
244            //   change page state, they also don't belong in here.  If filtering them out
245            //   breaks a downstream consumer, we should send them to a different stream.
246            if ( $event->getEditResult() && $event->getEditResult()->isNullEdit() ) {
247                return;
248            }
249
250            $performer = $this->userFactory->newFromUserIdentity( $event->getPerformer() );
251            $revisionRecord = $event->getLatestRevisionAfter();
252
253            $redirectTarget =
254                self::lookupRedirectTarget(
255                    $event->getPage(),
256                    $this->pageLookup,
257                    $this->redirectLookup
258                );
259
260            $pageChangeEvent = $event->isCreation()
261                ? $this->pageChangeEventSerializer->toCreateEvent(
262                    $this->streamName,
263                    $event->getPage(),
264                    $performer,
265                    $revisionRecord,
266                    $redirectTarget
267                )
268                : $this->pageChangeEventSerializer->toEditEvent(
269                    $this->streamName,
270                    $event->getPage(),
271                    $performer,
272                    $revisionRecord,
273                    $redirectTarget,
274                    $this->revisionStore->getRevisionById(
275                        $event->getPageRecordBefore()->getLatest()
276                    )
277                );
278
279            $this->sendEvents( $this->streamName, [ $pageChangeEvent ] );
280        }
281    }
282
283    /**
284     * Whether $event was emitted as a result of an action that modified content;
285     * this should match the code paths that previously would trigger onPageSaveComplete
286     * callbacks.
287     *
288     * @param PageRevisionUpdatedEvent $event
289     * @return bool
290     */
291    private function isContentChangeCause( PageRevisionUpdatedEvent $event ): bool {
292        return $event->getCause() === PageUpdateCauses::CAUSE_EDIT ||
293            $event->getCause() === PageUpdateCauses::CAUSE_IMPORT ||
294            $event->getCause() === PageUpdateCauses::CAUSE_ROLLBACK ||
295            $event->getCause() === PageUpdateCauses::CAUSE_UNDO ||
296            $event->getCause() === PageUpdateCauses::CAUSE_UPLOAD;
297    }
298
299    /**
300     * Handle a page deletion event by creating and sending a corresponding page change event.
301     *
302     * This method processes page deletion events and transforms them into page change events
303     * that can be consumed by event subscribers. It handles both regular deletions and
304     * suppressed deletions (where performer information is withheld).
305     *
306     * The generated event includes:
307     * - Page metadata (ID, title, etc.)
308     * - Deletion details (reason, timestamp, number of revisions deleted)
309     * - Performer information (unless suppressed)
310     * - Redirect target information (if the deleted page was a redirect)
311     *
312     * For suppressed deletions (oversight/revision deletion), performer information
313     * is intentionally omitted from the event for security reasons.
314     * See: https://phabricator.wikimedia.org/T342487
315     *
316     * @param PageDeletedEvent $event The page deletion event to process
317     * @throws TimestampException
318     * @see PageChangeEventSerializer::toDeleteEvent() For the event format
319     */
320    public function handlePageDeletedEvent( PageDeletedEvent $event ): void {
321        $deletedRev = $event->getLatestRevisionBefore();
322
323        // Don't set performer in the event if this delete suppresses the page from other admins.
324        // https://phabricator.wikimedia.org/T342487
325        $performerForEvent = $event->isSuppressed() ?
326            null :
327            $this->userFactory->newFromUserIdentity( $event->getPerformer() );
328
329        $redirectTarget = null;
330
331        if ( $event->wasRedirect() ) {
332            $targetBefore = $event->getRedirectTargetBefore();
333            if ( $targetBefore ) {
334                $redirectTarget = new RedirectTarget( $targetBefore );
335            }
336        }
337
338        $pageChangeEvent = $this->pageChangeEventSerializer->toDeleteEvent(
339            $this->streamName,
340            $event->getDeletedPage(),
341            $performerForEvent,
342            $deletedRev,
343            $event->getReason(),
344            $event->getEventTimestamp()->getTimestamp(),
345            $event->getArchivedRevisionCount(),
346            $redirectTarget,
347            $event->isSuppressed()
348        );
349
350        $this->sendEvents( $this->streamName, [ $pageChangeEvent ] );
351    }
352
353    /**
354     * Handles a page moved event by generating and sending a corresponding
355     * page change event.
356     *
357     * This method processes a `PageMovedEvent`, retrieves the necessary page state
358     * before and after the move, obtains user and revision context, identifies
359     * whether a redirect was created, and serializes all this information into
360     * a page change move event.
361     *
362     * Passes $event->getPageRecordBefore() directly to toMoveEvent, which accepts LinkTarget|PageReference.
363     *
364     * @param PageMovedEvent $event The event representing a page move, including
365     *                              references to the page before and after the move,
366     *                              the performing user, reason for the move, and
367     *                              any redirect that may have been created.
368     *
369     * @throws InvalidArgumentException If the moved-to page could not be found
370     *                                  using the latest page data.
371     */
372    public function handlePageMovedEvent( PageMovedEvent $event ): void {
373        if ( !$event->getPageRecordAfter()->exists() ) {
374            throw new InvalidArgumentException(
375                "No page moved from '{$event->getPageRecordBefore()->getDBkey()}"
376                . "to '{$event->getPageRecordAfter()->getDBkey()}'"
377                . " with ID {$event->getPageId()} could be found"
378            );
379        }
380
381        $performer = $this->userFactory->newFromUserIdentity( $event->getPerformer() );
382
383        $redirectTarget =
384            self::lookupRedirectTarget(
385                $event->getPageRecordAfter(), $this->pageLookup,
386                $this->redirectLookup
387            );
388
389        // The parentRevision is needed since a page move creates a new revision.
390        $revision = $this->revisionStore->getRevisionById(
391            $event->getPageRecordAfter()->getLatest()
392        );
393        $parentRevision = $this->revisionStore->getRevisionById(
394            $event->getPageRecordBefore()->getLatest()
395        );
396
397        $event = $this->pageChangeEventSerializer->toMoveEvent(
398            $this->streamName,
399            $event->getPageRecordAfter(),
400            $performer,
401            $revision,
402            $parentRevision,
403            $event->getPageRecordBefore(),
404            $event->getReason(),
405            $event->getRedirectPage(),
406            $redirectTarget
407        );
408
409        $this->sendEvents( $this->streamName, [ $event ] );
410    }
411
412    /**
413     * Handles `PageCreatedEvent` emitted after a page as been undeleted
414     * (e.g. a proper undelete into a new page).
415     *
416     * @param PageCreatedEvent $event
417     * @return void
418     * @throws TimestampException
419     */
420    public function handlePageCreatedEvent( PageCreatedEvent $event ): void {
421        if ( $event->getCause() === PageUpdateCauses::CAUSE_UNDELETE ) {
422            $performer = $this->userFactory->newFromUserIdentity( $event->getPerformer() );
423
424            $redirectTarget =
425                self::lookupRedirectTarget(
426                    $event->getPageRecordAfter(),
427                    $this->pageLookup,
428                    $this->redirectLookup
429                );
430
431            // TODO: replace with $event->getPageRecordBefore()?->getId();
432            //  once EventBus CI fully adopts php 8.
433            $oldPage = $event->getPageRecordBefore();
434
435            $event = $this->pageChangeEventSerializer->toUndeleteEvent(
436                $this->streamName,
437                $event->getPageRecordAfter(),
438                $performer,
439                $event->getLatestRevisionAfter(),
440                $event->getReason(),
441                $redirectTarget,
442                $event->getEventTimestamp()->getTimestamp(),
443                ( $oldPage !== null ) ? $oldPage->getId() : null
444            );
445
446            $this->sendEvents( $this->streamName, [ $event ] );
447        }
448    }
449
450    /**
451     * Handles `PageHistoryVisibilityChangedEvent` events.
452     *
453     * This method checks whether the visibility of the current revision of a page has changed.
454     * If so, it emits a corresponding `visibility_change` event to the configured stream.
455     *
456     * Notes:
457     * - Uses primary DB reads to prevent leaking suppressed data due to replication lag.
458     * - Emits private events when suppression occurs to match MediaWiki log visibility conventions.
459     *
460     * @param PageHistoryVisibilityChangedEvent $event
461     *
462     * @throws RuntimeException
463     * @throws UnexpectedValueException
464     * @throws TimestampException
465     */
466    public function handlePageHistoryVisibilityChangedEvent( PageHistoryVisibilityChangedEvent $event ): void {
467        $pageId = $event->getPageId();
468        $currentRevId = $event->getCurrentRevisionId();
469
470        // If the visibility was not changed on the current revision of the page,
471        // then we can return early.
472        // PageChange only represents changes to the current state of the page.
473        if ( !$event->wasCurrentRevisionAffected() ) {
474            $this->logger->debug(
475                "Revision visibility on page $pageId current revision $currentRevId " .
476                "was not changed. Not emitting event."
477            );
478            return;
479        }
480
481        // Read from primary since due to replication lag the updated field visibility
482        // might not yet be available on a replica, and we are at risk of leaking
483        // just recently suppressed data.
484        $revisionRecord = $this->revisionStore->getRevisionByPageId(
485            $pageId,
486            $currentRevId,
487            IDBAccessObject::READ_LATEST
488        );
489
490        if ( $revisionRecord === null ) {
491            throw new RuntimeException(
492                "Failed looking up page $pageId revision $currentRevId " .
493                "when checking if a visibility change event should be emitted to stream " .
494                $this->streamName
495            );
496        }
497
498        // current revision's visibility should be the same as we are given in
499        // $visibilityChanges['newBits']. Just in case, assert that this is true.
500        if ( $revisionRecord->getVisibility() != $event->getVisibilityAfter( $currentRevId ) ) {
501            throw new UnexpectedValueException(
502                "Page $pageId revision $currentRevId's' visibility did not match the " .
503                'expected visibility change provided by event. Revision visibility is ' .
504                $revisionRecord->getVisibility() . '. visibility changed to ' .
505                $event->getVisibilityAfter( $currentRevId )
506            );
507        }
508
509        // If this revision is 'suppressed' AKA restricted, then the person performing
510        // 'RevisionDelete' should not be visible in public data.
511        // https://phabricator.wikimedia.org/T342487
512        //
513        // NOTE: This event stream tries to match the visibility of MediaWiki core logs,
514        // where regular delete/revision events are public, and suppress/revision events
515        // are private. In MediaWiki core logs, private events are fully hidden from
516        // the public.  Here, we need to produce a 'private' event to the
517        // mediawiki.page_change stream, to indicate to consumers that
518        // they should also 'suppress' the revision.  When this is done, we need to
519        // make sure that we do not reproduce the data that has been suppressed
520        // in the event itself.  E.g. if the username of the editor of the revision has been
521        // suppressed, we should not include any information about that editor in the event.
522        $performerForEvent = $event->isSuppressed() ? null : $event->getPerformer();
523
524        $event = $this->pageChangeEventSerializer->toVisibilityChangeEvent(
525            $this->streamName,
526            $event->getPage(),
527            $performerForEvent,
528            $revisionRecord,
529            $event->getVisibilityBefore( $currentRevId ),
530            $event->getEventTimestamp()->getTimestamp()
531        );
532
533        $this->sendEvents( $this->streamName, [ $event ] );
534    }
535}