Code Coverage
 
Lines
Functions and Methods
Classes and Traits
Total
6.83% covered (danger)
6.83%
11 / 161
0.00% covered (danger)
0.00%
0 / 10
CRAP
0.00% covered (danger)
0.00%
0 / 1
PageChangeHooks
6.83% covered (danger)
6.83%
11 / 161
0.00% covered (danger)
0.00%
0 / 10
709.13
0.00% covered (danger)
0.00%
0 / 1
 __construct
0.00% covered (danger)
0.00%
0 / 19
0.00% covered (danger)
0.00%
0 / 1
2
 sendEvents
0.00% covered (danger)
0.00%
0 / 4
0.00% covered (danger)
0.00%
0 / 1
2
 onPageSaveComplete
0.00% covered (danger)
0.00%
0 / 21
0.00% covered (danger)
0.00%
0 / 1
12
 onPageMoveComplete
0.00% covered (danger)
0.00%
0 / 20
0.00% covered (danger)
0.00%
0 / 1
12
 onPageDelete
0.00% covered (danger)
0.00%
0 / 2
0.00% covered (danger)
0.00%
0 / 1
2
 onPageDeleteComplete
0.00% covered (danger)
0.00%
0 / 16
0.00% covered (danger)
0.00%
0 / 1
6
 onPageUndeleteComplete
0.00% covered (danger)
0.00%
0 / 14
0.00% covered (danger)
0.00%
0 / 1
2
 onArticleRevisionVisibilitySet
0.00% covered (danger)
0.00%
0 / 50
0.00% covered (danger)
0.00%
0 / 1
72
 isSecretRevisionVisibilityChange
0.00% covered (danger)
0.00%
0 / 2
0.00% covered (danger)
0.00%
0 / 1
6
 lookupRedirectTarget
84.62% covered (warning)
84.62%
11 / 13
0.00% covered (danger)
0.00%
0 / 1
7.18
1<?php
2/**
3 * This program is free software; you can redistribute it and/or modify
4 * it under the terms of the GNU General Public License as published by
5 * the Free Software Foundation; either version 2 of the License, or
6 * (at your option) any later version.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public License along
14 * with this program; if not, write to the Free Software Foundation, Inc.,
15 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
16 * http://www.gnu.org/copyleft/gpl.html
17 *
18 * @file
19 * @author Andrew Otto <otto@wikimedia.org>
20 */
21
22namespace MediaWiki\Extension\EventBus\HookHandlers\MediaWiki;
23
24use Exception;
25use IDBAccessObject;
26use InvalidArgumentException;
27use ManualLogEntry;
28use MediaWiki\Config\Config;
29use MediaWiki\Content\ContentHandlerFactory;
30use MediaWiki\Context\RequestContext;
31use MediaWiki\Deferred\DeferredUpdates;
32use MediaWiki\Extension\EventBus\EventBusFactory;
33use MediaWiki\Extension\EventBus\Redirects\RedirectTarget;
34use MediaWiki\Extension\EventBus\Serializers\EventSerializer;
35use MediaWiki\Extension\EventBus\Serializers\MediaWiki\PageChangeEventSerializer;
36use MediaWiki\Extension\EventBus\Serializers\MediaWiki\PageEntitySerializer;
37use MediaWiki\Extension\EventBus\Serializers\MediaWiki\RevisionEntitySerializer;
38use MediaWiki\Extension\EventBus\Serializers\MediaWiki\RevisionSlotEntitySerializer;
39use MediaWiki\Extension\EventBus\Serializers\MediaWiki\UserEntitySerializer;
40use MediaWiki\Extension\EventBus\StreamNameMapper;
41use MediaWiki\Hook\ArticleRevisionVisibilitySetHook;
42use MediaWiki\Hook\PageMoveCompleteHook;
43use MediaWiki\Http\Telemetry;
44use MediaWiki\Logger\LoggerFactory;
45use MediaWiki\Page\Hook\PageDeleteCompleteHook;
46use MediaWiki\Page\Hook\PageDeleteHook;
47use MediaWiki\Page\Hook\PageUndeleteCompleteHook;
48use MediaWiki\Page\PageLookup;
49use MediaWiki\Page\PageReference;
50use MediaWiki\Page\ProperPageIdentity;
51use MediaWiki\Page\RedirectLookup;
52use MediaWiki\Page\WikiPageFactory;
53use MediaWiki\Permissions\Authority;
54use MediaWiki\Revision\RevisionRecord;
55use MediaWiki\Revision\RevisionStore;
56use MediaWiki\Storage\Hook\PageSaveCompleteHook;
57use MediaWiki\Title\TitleFormatter;
58use MediaWiki\User\UserFactory;
59use MediaWiki\User\UserGroupManager;
60use Psr\Log\LoggerInterface;
61use StatusValue;
62use Wikimedia\UUID\GlobalIdGenerator;
63use WikiPage;
64
65/**
66 * HookHandler for sending mediawiki/page/change events
67 * that represent changes to the current state of how a MediaWiki Page
68 * looks to a non-logged-in / anonymous / public user.
69 *
70 * In MediaWiki, what 'state' is part of the Page is not clearly defined,
71 * so we make some choices.
72 * - Updates to past revisions (e.g. deleting old revisions) are not included.
73 * - Information about editing restrictions are not included.
74 * - Content bodies are not included here, although they may be added
75 *   in other streams via enrichment.
76 */
77class PageChangeHooks implements
78    PageSaveCompleteHook,
79    PageMoveCompleteHook,
80    PageDeleteCompleteHook,
81    PageUndeleteCompleteHook,
82    PageDeleteHook,
83    ArticleRevisionVisibilitySetHook
84{
85
86    public const PAGE_CHANGE_STREAM_NAME_DEFAULT = 'mediawiki.page_change.v1';
87
88    /**
89     * Name of the stream that events will be produced to.
90     * @var string
91     */
92    private string $streamName;
93
94    /**
95     * @var LoggerInterface
96     */
97    private LoggerInterface $logger;
98
99    /**
100     * @var EventBusFactory
101     */
102    private EventBusFactory $eventBusFactory;
103
104    /**
105     * @var PageChangeEventSerializer
106     */
107    private PageChangeEventSerializer $pageChangeEventSerializer;
108
109    /**
110     * @var WikiPageFactory
111     */
112    private WikiPageFactory $wikiPageFactory;
113
114    /**
115     * @var UserFactory
116     */
117    private UserFactory $userFactory;
118
119    /**
120     * @var RevisionStore
121     */
122    private RevisionStore $revisionStore;
123
124    /**
125     * @var RedirectLookup
126     */
127    private RedirectLookup $redirectLookup;
128
129    /**
130     * @var PageLookup
131     */
132    private PageLookup $pageLookup;
133
134    /**
135     * Temporarily holds a map of page ID to redirect target between
136     * {@link onPageDelete} and {@link onPageDeleteComplete}.
137     * @var array<int, RedirectTarget>
138     */
139    private array $deletedPageRedirectTarget = [];
140
141    /**
142     * @param EventBusFactory $eventBusFactory
143     * @param StreamNameMapper $streamNameMapper
144     * @param Config $mainConfig
145     * @param GlobalIdGenerator $globalIdGenerator
146     * @param UserGroupManager $userGroupManager
147     * @param TitleFormatter $titleFormatter
148     * @param WikiPageFactory $wikiPageFactory
149     * @param UserFactory $userFactory
150     * @param RevisionStore $revisionStore
151     * @param ContentHandlerFactory $contentHandlerFactory
152     * @param RedirectLookup $redirectLookup
153     * @param PageLookup $pageLookup
154     */
155    public function __construct(
156        EventBusFactory $eventBusFactory,
157        StreamNameMapper $streamNameMapper,
158        Config $mainConfig,
159        GlobalIdGenerator $globalIdGenerator,
160        UserGroupManager $userGroupManager,
161        TitleFormatter $titleFormatter,
162        WikiPageFactory $wikiPageFactory,
163        UserFactory $userFactory,
164        RevisionStore $revisionStore,
165        ContentHandlerFactory $contentHandlerFactory,
166        RedirectLookup $redirectLookup,
167        PageLookup $pageLookup
168    ) {
169        $this->logger = LoggerFactory::getInstance( self::class );
170
171        $this->streamName = $streamNameMapper->resolve(
172            self::PAGE_CHANGE_STREAM_NAME_DEFAULT );
173
174        $this->eventBusFactory = $eventBusFactory;
175
176        $userEntitySerializer = new UserEntitySerializer( $userFactory, $userGroupManager );
177
178        $this->pageChangeEventSerializer = new PageChangeEventSerializer(
179            new EventSerializer( $mainConfig, $globalIdGenerator, Telemetry::getInstance() ),
180            new PageEntitySerializer( $mainConfig, $titleFormatter ),
181            $userEntitySerializer,
182            new RevisionEntitySerializer(
183                new RevisionSlotEntitySerializer( $contentHandlerFactory ),
184                $userEntitySerializer
185            )
186        );
187
188        $this->wikiPageFactory = $wikiPageFactory;
189        $this->userFactory = $userFactory;
190        $this->revisionStore = $revisionStore;
191        $this->redirectLookup = $redirectLookup;
192        $this->pageLookup = $pageLookup;
193    }
194
195    /**
196     * Sends the events to the stream in a DeferredUPdate via the EventBus
197     * configured for the stream.
198     * NOTE: All events here must be destined to be sent $streamName.
199     * Do not use this function to send a batch of events to different streams.
200     *
201     * @param string $streamName
202     *
203     * @param array $events
204     *        This must be given as a list of events.
205     *
206     * @return void
207     * @throws Exception
208     */
209    private function sendEvents(
210        string $streamName,
211        array $events
212    ): void {
213        $eventBus = $this->eventBusFactory->getInstanceForStream( $streamName );
214        DeferredUpdates::addCallableUpdate( static function () use ( $eventBus, $events ) {
215            $eventBus->send( $events );
216        } );
217    }
218
219    /**
220     * @inheritDoc
221     */
222    public function onPageSaveComplete(
223        $wikiPage,
224        $user,
225        $summary,
226        $flags,
227        $revisionRecord,
228        $editResult
229    ) {
230        // Null edits are only useful to trigger side-effects, and would be
231        //   confusing to consumers of these events.  Since these would not be able to
232        //   change page state, they also don't belong in here.  If filtering them out
233        //   breaks a downstream consumer, we should send them to a different stream.
234        if ( $editResult->isNullEdit() ) {
235            return;
236        }
237
238        $performer = $this->userFactory->newFromUserIdentity( $user );
239
240        $redirectTarget = self::lookupRedirectTarget( $wikiPage, $this->pageLookup, $this->redirectLookup );
241
242        if ( $flags & EDIT_NEW ) {
243            // New page state change event for page create
244            $event = $this->pageChangeEventSerializer->toCreateEvent(
245                $this->streamName,
246                $wikiPage,
247                $performer,
248                $revisionRecord,
249                $redirectTarget
250            );
251
252        } else {
253            $event = $this->pageChangeEventSerializer->toEditEvent(
254                $this->streamName,
255                $wikiPage,
256                $performer,
257                $revisionRecord,
258                $redirectTarget,
259                $this->revisionStore->getRevisionById( $revisionRecord->getParentId() )
260            );
261        }
262
263        $this->sendEvents( $this->streamName, [ $event ] );
264    }
265
266    /**
267     * @inheritDoc
268     */
269    public function onPageMoveComplete(
270        $oldTitle,
271        $newTitle,
272        $user,
273        $pageid,
274        $redirid,
275        $reason,
276        $revision
277    ) {
278        // While we have $newTitle, serialization is going to ask for that information from the WikiPage.
279        // We have to read latest to ensure we are seeing the moved page.
280        $wikiPage = $this->wikiPageFactory->newFromID( $pageid, IDBAccessObject::READ_LATEST );
281
282        if ( $wikiPage == null ) {
283            throw new InvalidArgumentException( "No page moved from '$oldTitle' to '$newTitle"
284                . " with ID $pageid could be found" );
285        }
286
287        $performer = $this->userFactory->newFromUserIdentity( $user );
288
289        $redirectTarget = self::lookupRedirectTarget( $wikiPage, $this->pageLookup, $this->redirectLookup );
290
291        $createdRedirectWikiPage = $redirid ? $this->wikiPageFactory->newFromID( $redirid ) : null;
292
293        // The parentRevision is needed since a page move creates a new revision.
294        $parentRevision = $this->revisionStore->getRevisionById( $revision->getParentId() );
295
296        // NOTE: $newTitle not needed by pageChangeEventSerializer,
297        //this is obtained via $wikiPage.
298        $event = $this->pageChangeEventSerializer->toMoveEvent(
299            $this->streamName,
300            $wikiPage,
301            $performer,
302            $revision,
303            $parentRevision,
304            $oldTitle,
305            $reason,
306            $createdRedirectWikiPage,
307            $redirectTarget
308        );
309
310        $this->sendEvents( $this->streamName, [ $event ] );
311    }
312
313    public function onPageDelete(
314        ProperPageIdentity $page,
315        Authority $deleter,
316        string $reason,
317        StatusValue $status,
318        bool $suppress
319    ) {
320        $this->deletedPageRedirectTarget[$page->getId()] =
321            self::lookupRedirectTarget( $page, $this->pageLookup, $this->redirectLookup );
322    }
323
324    // Supercedes ArticleDeleteComplete
325
326    /**
327     * @inheritDoc
328     * @throws Exception
329     */
330    public function onPageDeleteComplete(
331        ProperPageIdentity $page,
332        Authority $deleter,
333        string $reason,
334        int $pageID,
335        RevisionRecord $deletedRev,
336        ManualLogEntry $logEntry,
337        int $archivedRevisionCount
338    ) {
339        $wikiPage = $this->wikiPageFactory->newFromTitle( $page );
340        $isSuppression = $logEntry->getType() === 'suppress';
341
342        // Don't set performer in the event if this delete suppresses the page from other admins.
343        // https://phabricator.wikimedia.org/T342487
344        $performerForEvent = $isSuppression ? null : $this->userFactory->newFromAuthority( $deleter );
345
346        $event = $this->pageChangeEventSerializer->toDeleteEvent(
347            $this->streamName,
348            $wikiPage,
349            $performerForEvent,
350            $deletedRev,
351            $reason,
352            $logEntry->getTimestamp(),
353            $archivedRevisionCount,
354            $this->deletedPageRedirectTarget[$page->getId()] ?? null,
355            $isSuppression
356        );
357
358        $this->sendEvents( $this->streamName, [ $event ] );
359
360        unset( $this->deletedPageRedirectTarget[$page->getId()] );
361    }
362
363    /**
364     * @inheritDoc
365     * @throws Exception
366     */
367    public function onPageUndeleteComplete(
368        ProperPageIdentity $page,
369        Authority $restorer,
370        string $reason,
371        RevisionRecord $restoredRev,
372        ManualLogEntry $logEntry,
373        int $restoredRevisionCount,
374        bool $created,
375        array $restoredPageIds
376    ): void {
377        $wikiPage = $this->wikiPageFactory->newFromTitle( $page );
378        $performer = $this->userFactory->newFromAuthority( $restorer );
379
380        $redirectTarget = self::lookupRedirectTarget( $wikiPage, $this->pageLookup, $this->redirectLookup );
381
382        // Send page change undelete event
383        $event = $this->pageChangeEventSerializer->toUndeleteEvent(
384            $this->streamName,
385            $wikiPage,
386            $performer,
387            $restoredRev,
388            $reason,
389            $redirectTarget,
390            $logEntry->getTimestamp(),
391            $page->getId()
392        );
393
394        $this->sendEvents( $this->streamName, [ $event ] );
395    }
396
397    /**
398     * @inheritDoc
399     */
400    public function onArticleRevisionVisibilitySet(
401        $title,
402        $revIds,
403        $visibilityChangeMap
404    ) {
405        // https://phabricator.wikimedia.org/T321411
406        $performer = RequestContext::getMain()->getUser();
407        $performer->loadFromId();
408
409        // Only send an event if the visible-ness of the current revision has changed.
410        foreach ( $revIds as $revId ) {
411            // Read from primary since due to replication lag the updated field visibility
412            // might not yet be available on a replica, and we are at risk of leaking
413            // just suppressed data.
414            $revisionRecord = $this->revisionStore->getRevisionById(
415                $revId,
416                IDBAccessObject::READ_LATEST
417            );
418
419            if ( $revisionRecord === null ) {
420                $this->logger->warning(
421                    'revision ' . $revId . ' for page ' . $title->getId() .
422                    ' could not be loaded from database and may have been deleted.' .
423                    ' Cannot create visibility change event for ' . $this->streamName . '.'
424                );
425                continue;
426            } elseif ( !array_key_exists( $revId, $visibilityChangeMap ) ) {
427                // This should not happen, log it.
428                $this->logger->error(
429                    'revision ' . $revId . ' for page ' . $title->getId() .
430                    ' not found in visibilityChangeMap.' .
431                    ' Cannot create visibility change event for ' . $this->streamName . '.'
432                );
433                continue;
434            }
435
436            // If this is the current revision of the page,
437            // then we need to represent the fact that the visibility
438            // properties of the current state of the page has changed.
439            // Emit a page change visibility_change event.
440            if ( $revisionRecord->isCurrent() ) {
441
442                $visibilityChanges = $visibilityChangeMap[$revId];
443
444                // current revision's visibility should be the same as we are given in
445                // $visibilityChanges['newBits']. Just in case, assert that this is true.
446                if ( $revisionRecord->getVisibility() != $visibilityChanges['newBits'] ) {
447                    throw new InvalidArgumentException(
448                        "Current revision $revId's' visibility did not match the expected " .
449                        'visibility change provided by hook. Current revision visibility is ' .
450                        $revisionRecord->getVisibility() . '. visibility changed to ' .
451                        $visibilityChanges['newBits']
452                    );
453                }
454
455                // We only need to emit an event if visibility has actually changed.
456                if ( $visibilityChanges['newBits'] === $visibilityChanges['oldBits'] ) {
457                    $this->logger->warning(
458                        "onArticleRevisionVisibilitySet called on revision $revId " .
459                        'when no effective visibility change was made.'
460                    );
461                }
462
463                $wikiPage = $this->wikiPageFactory->newFromTitle( $title );
464
465                // If this revision is 'suppressed' AKA restricted, then the person performing
466                // 'RevisionDelete' should not be visible in public data.
467                // https://phabricator.wikimedia.org/T342487
468                //
469                // NOTE: This event stream tries to match the visibility of MediaWiki core logs,
470                // where regular delete/revision events are public, and suppress/revision events
471                // are private. In MediaWiki core logs, private events are fully hidden from
472                // the public.  Here, we need to produce a 'private' event to the
473                // mediawiki.page_change stream, to indicate to consumers that
474                // they should also 'suppress' the revision.  When this is done, we need to
475                // make sure that we do not reproduce the data that has been suppressed
476                // in the event itself.  E.g. if the username of the editor of the revision has been
477                // suppressed, we should not include any information about that editor in the event.
478                $performerForEvent = self::isSecretRevisionVisibilityChange(
479                    $visibilityChangeMap[$revId]['oldBits'],
480                    $visibilityChangeMap[$revId]['newBits']
481                ) ? null : $performer;
482
483                $event = $this->pageChangeEventSerializer->toVisibilityChangeEvent(
484                    $this->streamName,
485                    $wikiPage,
486                    $performerForEvent,
487                    $revisionRecord,
488                    $visibilityChanges['oldBits'],
489                    // NOTE: ArticleRevisionVisibilitySet hook does not give us a proper event time.
490                    // The best we can do is use the current timestamp :(
491                    // https://phabricator.wikimedia.org/T321411
492                    wfTimestampNow()
493                );
494
495                $this->sendEvents( $this->streamName, [ $event ] );
496                // No need to search any further for the 'current' revision
497                break;
498            }
499        }
500    }
501
502    /**
503     * This function returns true if the visibility bits between the change require the
504     * info about the change to be redacted.
505     * https://phabricator.wikimedia.org/T342487
506     *
507     * Info about a visibility change is secret (in the secret MW action log)
508     * if the revision was either previously or currently is being suppressed.
509     * The admin performing the action should be hidden in both cases.
510     * The admin performing the action should only be shown if the change is not
511     * affecting the revision's suppression status.
512     * https://phabricator.wikimedia.org/T342487#9292715
513     *
514     * @param int $oldBits
515     * @param int $newBits
516     * @return bool
517     */
518    public static function isSecretRevisionVisibilityChange( int $oldBits, int $newBits ) {
519        return $oldBits & RevisionRecord::DELETED_RESTRICTED ||
520            $newBits & RevisionRecord::DELETED_RESTRICTED;
521    }
522
523    /**
524     * Returns a redirect target of supplied {@link PageReference}, if any.
525     *
526     * If the page reference does not represent a redirect, `null` is returned.
527     *
528     * See {@link RedirectTarget} for the meaning of its properties.
529     *
530     * TODO visible for testing only, move into RedirectLookup?
531     *
532     * @param PageReference $page
533     * @param PageLookup $pageLookup
534     * @param RedirectLookup $redirectLookup
535     * @return RedirectTarget|null
536     * @see RedirectTarget
537     */
538    public static function lookupRedirectTarget(
539        PageReference $page,
540        PageLookup $pageLookup,
541        RedirectLookup $redirectLookup
542    ): ?RedirectTarget {
543        if ( $page instanceof WikiPage ) {
544            // RedirectLookup doesn't support reading from the primary db, but we
545            // need the value from the new edit. Fetch directly through WikiPage which
546            // was updated with the new value as part of saving the new revision.
547            $redirectLinkTarget = $page->getRedirectTarget();
548        } else {
549            $redirectSourcePageReference = $pageLookup->getPageByReference( $page, IDBAccessObject::READ_LATEST );
550
551            $redirectLinkTarget = $redirectSourcePageReference != null && $redirectSourcePageReference->isRedirect()
552                ? $redirectLookup->getRedirectTarget( $redirectSourcePageReference )
553                : null;
554        }
555
556        if ( $redirectLinkTarget != null ) {
557            if ( !$redirectLinkTarget->isExternal() ) {
558                try {
559                    $redirectTargetPage = $pageLookup->getPageForLink( $redirectLinkTarget );
560                    return new RedirectTarget( $redirectLinkTarget, $redirectTargetPage );
561                } catch ( InvalidArgumentException $e ) {
562                    // silently ignore failed lookup, they are expected for anything but page targets
563                }
564            }
565            return new RedirectTarget( $redirectLinkTarget );
566        }
567
568        return null;
569    }
570
571}