Code Coverage
 
Lines
Functions and Methods
Classes and Traits
Total
85.22% covered (warning)
85.22%
496 / 582
62.86% covered (warning)
62.86%
22 / 35
CRAP
0.00% covered (danger)
0.00%
0 / 1
DiscussionParser
85.37% covered (warning)
85.37%
496 / 581
62.86% covered (warning)
62.86%
22 / 35
242.17
0.00% covered (danger)
0.00%
0 / 1
 generateEventsForRevision
87.96% covered (warning)
87.96%
95 / 108
0.00% covered (danger)
0.00%
0 / 1
30.47
 detectSectionTitleAndText
100.00% covered (success)
100.00%
28 / 28
100.00% covered (success)
100.00%
1 / 1
7
 generateMentionEvents
0.00% covered (danger)
0.00%
0 / 3
0.00% covered (danger)
0.00%
0 / 1
6
 collectMentionEvents
82.50% covered (warning)
82.50%
66 / 80
0.00% covered (danger)
0.00%
0 / 1
11.65
 getOverallUserMentionsCount
100.00% covered (success)
100.00%
1 / 1
100.00% covered (success)
100.00%
1 / 1
1
 getUserMentions
84.62% covered (warning)
84.62%
33 / 39
0.00% covered (danger)
0.00%
0 / 1
10.36
 getUserLinks
100.00% covered (success)
100.00%
5 / 5
100.00% covered (success)
100.00%
1 / 1
3
 hasSubpage
100.00% covered (success)
100.00%
1 / 1
100.00% covered (success)
100.00%
1 / 1
1
 parseNonEditWikitext
100.00% covered (success)
100.00%
9 / 9
100.00% covered (success)
100.00%
1 / 1
2
 getChangeInterpretationForRevision
96.30% covered (success)
96.30%
26 / 27
0.00% covered (danger)
0.00%
0 / 1
9
 interpretDiff
89.66% covered (warning)
89.66%
78 / 87
0.00% covered (danger)
0.00%
0 / 1
18.36
 hasNewSignature
100.00% covered (success)
100.00%
3 / 3
100.00% covered (success)
100.00%
1 / 1
2
 convertToUnknownSignedChanges
100.00% covered (success)
100.00%
9 / 9
100.00% covered (success)
100.00%
1 / 1
4
 isInSignedSection
100.00% covered (success)
100.00%
4 / 4
100.00% covered (success)
100.00%
1 / 1
4
 getFullSection
100.00% covered (success)
100.00%
4 / 4
100.00% covered (success)
100.00%
1 / 1
1
 getSectionSpan
100.00% covered (success)
100.00%
4 / 4
100.00% covered (success)
100.00%
1 / 1
1
 getSectionStartIndex
100.00% covered (success)
100.00%
4 / 4
100.00% covered (success)
100.00%
1 / 1
3
 getSectionEndIndex
100.00% covered (success)
100.00%
5 / 5
100.00% covered (success)
100.00%
1 / 1
3
 getSectionCount
100.00% covered (success)
100.00%
2 / 2
100.00% covered (success)
100.00%
1 / 1
1
 extractHeader
100.00% covered (success)
100.00%
5 / 5
100.00% covered (success)
100.00%
1 / 1
2
 extractSections
100.00% covered (success)
100.00%
22 / 22
100.00% covered (success)
100.00%
1 / 1
5
 stripSignature
100.00% covered (success)
100.00%
6 / 6
100.00% covered (success)
100.00%
1 / 1
2
 stripHeader
100.00% covered (success)
100.00%
1 / 1
100.00% covered (success)
100.00%
1 / 1
1
 isSignedComment
55.56% covered (warning)
55.56%
5 / 9
0.00% covered (danger)
0.00%
0 / 1
3.79
 getTimestampPosition
100.00% covered (success)
100.00%
10 / 10
100.00% covered (success)
100.00%
1 / 1
2
 getMachineReadableDiff
66.67% covered (warning)
66.67%
2 / 3
0.00% covered (danger)
0.00%
0 / 1
2.15
 extractSignatures
100.00% covered (success)
100.00%
12 / 12
100.00% covered (success)
100.00%
1 / 1
3
 extractUsersFromLine
100.00% covered (success)
100.00%
15 / 15
100.00% covered (success)
100.00%
1 / 1
8
 getUserFromLine
100.00% covered (success)
100.00%
15 / 15
100.00% covered (success)
100.00%
1 / 1
4
 getLinkFromLine
0.00% covered (danger)
0.00%
0 / 12
0.00% covered (danger)
0.00%
0 / 1
30
 extractUserFromLink
0.00% covered (danger)
0.00%
0 / 14
0.00% covered (danger)
0.00%
0 / 1
20
 getTimestampRegex
95.45% covered (success)
95.45%
21 / 22
0.00% covered (danger)
0.00%
0 / 1
5
 getTextSnippet
100.00% covered (success)
100.00%
5 / 5
100.00% covered (success)
100.00%
1 / 1
1
 getTextSnippetFromSummary
0.00% covered (danger)
0.00%
0 / 4
0.00% covered (danger)
0.00%
0 / 1
2
 getEditExcerpt
0.00% covered (danger)
0.00%
0 / 3
0.00% covered (danger)
0.00%
0 / 1
2
1<?php
2
3namespace MediaWiki\Extension\Notifications;
4
5use Article;
6use IDBAccessObject;
7use Language;
8use MediaWiki\Extension\Notifications\Hooks\HookRunner;
9use MediaWiki\Extension\Notifications\Model\Event;
10use MediaWiki\MediaWikiServices;
11use MediaWiki\Parser\Sanitizer;
12use MediaWiki\Revision\RevisionRecord;
13use MediaWiki\Revision\SlotRecord;
14use MediaWiki\Title\Title;
15use MediaWiki\User\User;
16use MediaWiki\User\UserNameUtils;
17use ParserOptions;
18use ParserOutput;
19use RequestContext;
20use RuntimeException;
21use TextContent;
22
23abstract class DiscussionParser {
24    private const HEADER_REGEX = '^(==+)\h*([^=].*)\h*\1$';
25
26    public const DEFAULT_SNIPPET_LENGTH = 150;
27
28    /** @var string|null */
29    protected static $timestampRegex;
30
31    /**
32     * @var array[][]
33     * FIXME: This static cache can become stale in tests, because it's never reset. We use both rev IDs and title keys
34     * to mitigate that, but it might still break!
35     */
36    protected static $revisionInterpretationCache = [];
37
38    /** @var DiffParser|null */
39    protected static $diffParser = null;
40
41    /**
42     * Given a RevisionRecord object, generates Event objects for
43     * the discussion-related actions that occurred in that Revision.
44     *
45     * @param RevisionRecord $revision
46     * @param bool $isRevert
47     */
48    public static function generateEventsForRevision( RevisionRecord $revision, $isRevert ) {
49        global $wgEchoMentionsOnMultipleSectionEdits;
50        global $wgEchoMentionOnChanges;
51        $services = MediaWikiServices::getInstance();
52        $store = $services->getRevisionStore();
53
54        // use the replica database if there is a previous revision
55        if ( $store->getPreviousRevision( $revision ) ) {
56            $title = Title::newFromID( $revision->getPageId() );
57            // use the primary database for new page
58        } else {
59            $title = Title::newFromID( $revision->getPageId(), IDBAccessObject::READ_LATEST );
60        }
61
62        // not a valid title
63        if ( !$title ) {
64            return;
65        }
66
67        $events = [];
68
69        $interpretation = self::getChangeInterpretationForRevision( $revision );
70
71        $userID = $revision->getUser()->getId();
72        $userName = $revision->getUser()->getName();
73        $user = $userID !== 0 ? User::newFromId( $userID ) : User::newFromName( $userName, false );
74
75        foreach ( $interpretation as $action ) {
76            if ( $action['type'] === 'add-comment' ) {
77                $fullSection = $action['full-section'];
78                $header = self::extractHeader( $fullSection );
79                $userLinks = self::getUserLinks( $action['content'], $title );
80                $events = array_merge(
81                    $events,
82                    self::collectMentionEvents( $header, $userLinks, $action['content'], $revision, $user )
83                );
84            } elseif ( $action['type'] === 'new-section-with-comment' ) {
85                $content = $action['content'];
86                $header = self::extractHeader( $content );
87                $userLinks = self::getUserLinks( $content, $title );
88                $events = array_merge(
89                    $events,
90                    self::collectMentionEvents( $header, $userLinks, $content, $revision, $user )
91                );
92            } elseif ( $action['type'] === 'add-section-multiple' && $wgEchoMentionsOnMultipleSectionEdits ) {
93                $content = self::stripHeader( $action['content'] );
94                $content = self::stripSignature( $content );
95                $userLinks = self::getUserLinks( $content, $title );
96                $events = array_merge(
97                    $events,
98                    self::collectMentionEvents( $action['header'], $userLinks, $content, $revision, $user )
99                );
100            } elseif ( $action['type'] === 'unknown-signed-change' ) {
101                $userLinks = array_diff_key(
102                    self::getUserLinks( $action['new_content'], $title ),
103                    self::getUserLinks( $action['old_content'], $title )
104                );
105                $header = self::extractHeader( $action['full-section'] );
106
107                if ( $wgEchoMentionOnChanges ) {
108                    $events = array_merge(
109                        $events,
110                        self::collectMentionEvents( $header, $userLinks, $action['new_content'], $revision, $user )
111                    );
112                }
113            }
114        }
115
116        if ( $title->getNamespace() === NS_USER_TALK ) {
117            $notifyUser = User::newFromName( $title->getText() );
118            // If the recipient is a valid non-anonymous user generate a talk page post notification.
119            if ( $notifyUser && $notifyUser->getId() ) {
120                $permManager = MediaWikiServices::getInstance()->getPermissionManager();
121                // If this is a minor edit, only notify if the agent doesn't have talk page minor
122                // edit notification blocked
123                if ( !$revision->isMinor() || !$permManager->userHasRight( $user, 'nominornewtalk' ) ) {
124                    $section = self::detectSectionTitleAndText( $interpretation, $title );
125                    if ( $section['section-text'] === '' ) {
126                        $comment = $revision->getComment( RevisionRecord::FOR_PUBLIC, $notifyUser );
127                        if ( $comment ) {
128                            $section['section-text'] = $comment->text;
129                        }
130                    }
131                    $events[] = [
132                        'type' => 'edit-user-talk',
133                        'title' => $title,
134                        'extra' => [
135                            'revid' => $revision->getId(),
136                            'minoredit' => $revision->isMinor(),
137                            'section-title' => $section['section-title'],
138                            'section-text' => $section['section-text'],
139                            'target-page' => $title->getArticleID(),
140                        ],
141                        'agent' => $user,
142                    ];
143                }
144            }
145        } elseif ( $title->inNamespace( NS_USER ) ) {
146            $notifyUser = User::newFromName( $title->getText() );
147            // If the recipient is a valid non-anonymous user and hasn't turned
148            // off their notifications, generate a talk page post Echo notification.
149            if ( $notifyUser && $notifyUser->getId() ) {
150                $events[] = [
151                    'type' => 'edit-user-page',
152                    'title' => $title,
153                    'extra' => [
154                        'revid' => $revision->getId(),
155                    ],
156                    'agent' => $user,
157                ];
158            }
159        }
160
161        // Notify users mentioned in edit summary
162        global $wgEchoMaxMentionsInEditSummary;
163
164        if ( $wgEchoMaxMentionsInEditSummary > 0 && !$user->isBot() && !$isRevert ) {
165            $summaryParser = new SummaryParser();
166            $usersInSummary = $summaryParser->parse( $revision->getComment()->text );
167
168            // Don't allow pinging yourself
169            unset( $usersInSummary[$userName] );
170
171            $count = 0;
172            $mentionedUsers = [];
173            foreach ( $usersInSummary as $summaryUser ) {
174                if ( $summaryUser->getTalkPage()->equals( $title ) ) {
175                    // Users already get a ping when their talk page is edited
176                    continue;
177                }
178                if ( $count >= $wgEchoMaxMentionsInEditSummary ) {
179                    break;
180                }
181                $mentionedUsers[$summaryUser->getId()] = $summaryUser->getId();
182                $count++;
183            }
184
185            if ( $mentionedUsers ) {
186                $events[] = [
187                    'type' => 'mention-summary',
188                    'title' => $title,
189                    'extra' => [
190                        'revid' => $revision->getId(),
191                        'mentioned-users' => $mentionedUsers,
192                    ],
193                    'agent' => $user,
194                ];
195            }
196        }
197
198        // Allow extensions to generate more events for a revision, and de-duplicate
199        // against the standard events created above.
200        ( new HookRunner( $services->getHookContainer() ) )
201            ->onEchoGetEventsForRevision( $events, $revision, $isRevert );
202
203        // Create events
204        foreach ( $events as $event ) {
205            Event::create( $event );
206        }
207    }
208
209    /**
210     * Attempts to determine what section title the edit was performed under (if any)
211     *
212     * @param array[] $interpretation Results of {@see getChangeInterpretationForRevision}
213     * @param Title|null $title
214     * @return string[] Array containing section title and text
215     */
216    public static function detectSectionTitleAndText( array $interpretation, Title $title = null ) {
217        $header = $snippet = '';
218        $found = false;
219
220        foreach ( $interpretation as $action ) {
221            switch ( $action['type'] ) {
222                case 'add-comment':
223                    $header = self::extractHeader( $action['full-section'] );
224                    $snippet = self::getTextSnippet(
225                        self::stripSignature( self::stripHeader( $action['content'] ), $title ),
226                        RequestContext::getMain()->getLanguage(),
227                        self::DEFAULT_SNIPPET_LENGTH,
228                        $title );
229                    break;
230                case 'new-section-with-comment':
231                    $header = self::extractHeader( $action['content'] );
232                    $snippet = self::getTextSnippet(
233                        self::stripSignature( self::stripHeader( $action['content'] ), $title ),
234                        RequestContext::getMain()->getLanguage(),
235                        self::DEFAULT_SNIPPET_LENGTH,
236                        $title );
237                    break;
238            }
239            if ( $header ) {
240                // If we find a second header within the same change interpretation then
241                // we cannot choose just 1 to link to
242                if ( $found ) {
243                    $found = false;
244                    break;
245                }
246                $found = true;
247            }
248        }
249        if ( !$found ) {
250            return [ 'section-title' => '', 'section-text' => '' ];
251        }
252
253        return [ 'section-title' => $header, 'section-text' => $snippet ];
254    }
255
256    /**
257     * For an action taken on a talk page, notify users whose user pages
258     * are linked.
259     * @param string $header The subject line for the discussion.
260     * @param int[] $userLinks
261     * @param string $content The content of the post, as a wikitext string.
262     * @param RevisionRecord $revision
263     * @param User $agent The user who made the comment.
264     */
265    public static function generateMentionEvents(
266        $header,
267        array $userLinks,
268        $content,
269        RevisionRecord $revision,
270        User $agent
271    ) {
272        $events = self::collectMentionEvents( $header, $userLinks, $content, $revision, $agent );
273        foreach ( $events as $event ) {
274            Event::create( $event );
275        }
276    }
277
278    /**
279     * Generate mention event data for a talk page action
280     * @param string $header The subject line for the discussion.
281     * @param int[] $userLinks
282     * @param string $content The content of the post, as a wikitext string.
283     * @param RevisionRecord $revision
284     * @param User $agent The user who made the comment.
285     * @return array List of event info arrays
286     */
287    protected static function collectMentionEvents(
288        $header,
289        array $userLinks,
290        $content,
291        RevisionRecord $revision,
292        User $agent
293    ) {
294        global $wgEchoMaxMentionsCount, $wgEchoMentionStatusNotifications;
295
296        $title = Title::newFromLinkTarget( $revision->getPageAsLinkTarget() );
297        if ( !$title ) {
298            return [];
299        }
300        $content = self::stripHeader( $content );
301        $content = self::stripSignature( $content, $title );
302
303        if ( !$userLinks ) {
304            return [];
305        }
306
307        $userMentions = self::getUserMentions(
308            $title, $revision->getUser( RevisionRecord::RAW )->getId(), $userLinks
309        );
310        $overallMentionsCount = self::getOverallUserMentionsCount( $userMentions );
311        if ( $overallMentionsCount === 0 ) {
312            return [];
313        }
314
315        $events = [];
316        $stats = MediaWikiServices::getInstance()->getStatsdDataFactory();
317
318        if ( $overallMentionsCount > $wgEchoMaxMentionsCount ) {
319            if ( $wgEchoMentionStatusNotifications ) {
320                $events[] = [
321                    'type' => 'mention-failure-too-many',
322                    'title' => $title,
323                    'extra' => [
324                        'max-mentions' => $wgEchoMaxMentionsCount,
325                        'section-title' => $header,
326                    ],
327                    'agent' => $agent,
328                ];
329                $stats->increment( 'echo.event.mention.notification.failure-too-many' );
330            }
331            return $events;
332        }
333
334        if ( $userMentions['validMentions'] ) {
335            $events[] = [
336                'type' => 'mention',
337                'title' => $title,
338                'extra' => [
339                    'content' => $content,
340                    'section-title' => $header,
341                    'revid' => $revision->getId(),
342                    'mentioned-users' => $userMentions['validMentions'],
343                ],
344                'agent' => $agent,
345            ];
346        }
347
348        if ( $wgEchoMentionStatusNotifications ) {
349            // TODO batch?
350            foreach ( $userMentions['validMentions'] as $mentionedUserId ) {
351                $events[] = [
352                    'type' => 'mention-success',
353                    'title' => $title,
354                    'extra' => [
355                        'subject-name' => User::newFromId( $mentionedUserId )->getName(),
356                        'section-title' => $header,
357                        'revid' => $revision->getId(),
358                    ],
359                    'agent' => $agent,
360                ];
361                $stats->increment( 'echo.event.mention.notification.success' );
362            }
363
364            // TODO batch?
365            foreach ( $userMentions['anonymousUsers'] as $anonymousUser ) {
366                $events[] = [
367                    'type' => 'mention-failure',
368                    'title' => $title,
369                    'extra' => [
370                        'failure-type' => 'user-anonymous',
371                        'subject-name' => $anonymousUser,
372                        'section-title' => $header,
373                        'revid' => $revision->getId(),
374                    ],
375                    'agent' => $agent,
376                ];
377                $stats->increment( 'echo.event.mention.notification.failure-user-anonymous' );
378            }
379
380            // TODO batch?
381            foreach ( $userMentions['unknownUsers'] as $unknownUser ) {
382                $events[] = [
383                    'type' => 'mention-failure',
384                    'title' => $title,
385                    'extra' => [
386                        'failure-type' => 'user-unknown',
387                        'subject-name' => $unknownUser,
388                        'section-title' => $header,
389                        'revid' => $revision->getId(),
390                    ],
391                    'agent' => $agent,
392                ];
393                $stats->increment( 'echo.event.mention.notification.failure-user-unknown' );
394            }
395        }
396
397        return $events;
398    }
399
400    private static function getOverallUserMentionsCount( array $userMentions ) {
401        return count( $userMentions, COUNT_RECURSIVE ) - count( $userMentions );
402    }
403
404    /**
405     * @param Title $title
406     * @param int $revisionUserId
407     * @param int[] $userLinks
408     * @return array[]
409     * Set of arrays containing valid mentions and possible intended but failed mentions.
410     * - [validMentions]: An array of valid users to mention with ID => ID.
411     * - [unknownUsers]: An array of DBKey strings representing unknown users.
412     * - [anonymousUsers]: An array of DBKey strings representing anonymous IP users.
413     */
414    public static function getUserMentions( Title $title, $revisionUserId, array $userLinks ) {
415        global $wgEchoMaxMentionsCount;
416
417        $userMentions = [
418            'validMentions' => [],
419            'unknownUsers' => [],
420            'anonymousUsers' => [],
421        ];
422
423        $count = 0;
424        $stats = MediaWikiServices::getInstance()->getStatsdDataFactory();
425        $userNameUtils = MediaWikiServices::getInstance()->getUserNameUtils();
426
427        foreach ( $userLinks as $dbk => $page_id ) {
428            // If more users are being pinged this is likely a spam/attack vector
429            // Don't send any mention notifications.
430            if ( $count > $wgEchoMaxMentionsCount ) {
431                $stats->increment( 'echo.event.mention.error.tooMany' );
432                break;
433            }
434
435            // we should not add user to 'mention' notification list if
436            // 1. the user link links to a subpage
437            if ( self::hasSubpage( $dbk ) ) {
438                continue;
439            }
440
441            // 2. user is an anonymous IP
442            if ( $userNameUtils->isIP( $dbk ) ) {
443                $userMentions['anonymousUsers'][] = $dbk;
444                $count++;
445                $stats->increment( 'echo.event.mention.error.anonUser' );
446                continue;
447            }
448
449            $user = User::newFromName( $dbk );
450            // 3. the user name is not valid
451            if ( !$user ) {
452                $userMentions['unknownUsers'][] = str_replace( '_', ' ', $dbk );
453                $count++;
454                $stats->increment( 'echo.event.mention.error.invalidUser' );
455                continue;
456            }
457
458            // 4. the user mentions themselves
459            if ( $user->getId() === $revisionUserId ) {
460                $stats->increment( 'echo.event.mention.error.sameUser' );
461                continue;
462            }
463
464            // 5. the user is the owner of the talk page
465            if ( $title->getNamespace() === NS_USER_TALK && $title->getDBkey() === $dbk ) {
466                $stats->increment( 'echo.event.mention.error.ownPage' );
467                continue;
468            }
469
470            // 6. user does not exist
471            if ( $user->getId() === 0 ) {
472                $userMentions['unknownUsers'][] = str_replace( '_', ' ', $dbk );
473                $count++;
474                $stats->increment( 'echo.event.mention.error.unknownUser' );
475                continue;
476            }
477
478            $userMentions['validMentions'][$user->getId()] = $user->getId();
479            $count++;
480        }
481
482        return $userMentions;
483    }
484
485    /**
486     * @param string $content
487     * @param Title $title
488     * @return int[]
489     * Array of links in the user namespace with DBKey => ID.
490     */
491    public static function getUserLinks( $content, Title $title ) {
492        $output = self::parseNonEditWikitext( $content, new Article( $title ) );
493        $links = $output->getLinks();
494
495        if ( !isset( $links[NS_USER] ) || !is_array( $links[NS_USER] ) ) {
496            return [];
497        }
498
499        return $links[NS_USER];
500    }
501
502    private static function hasSubpage( $dbk ) {
503        return strpos( $dbk, '/' ) !== false;
504    }
505
506    /**
507     * It's like Article::prepareTextForEdit,
508     *  but not for editing (old wikitext usually)
509     * Stolen from AbuseFilter's VariableHolder
510     *
511     * @param string $wikitext
512     * @param Article $article
513     *
514     * @return ParserOutput
515     */
516    public static function parseNonEditWikitext( $wikitext, Article $article ) {
517        static $cache = [];
518
519        $cacheKey = md5( $wikitext ) . ':' . $article->getTitle()->getPrefixedText();
520
521        if ( isset( $cache[$cacheKey] ) ) {
522            return $cache[$cacheKey];
523        }
524
525        $parser = MediaWikiServices::getInstance()->getParser();
526
527        $options = new ParserOptions( $article->getContext()->getUser() );
528        $output = $parser->parse( $wikitext, $article->getTitle(), $options );
529        $cache[$cacheKey] = $output;
530
531        return $output;
532    }
533
534    /**
535     * Given a Revision object, returns a talk-page-centric interpretation
536     * of the changes made in it.
537     *
538     * @param RevisionRecord $revision
539     * @see DiscussionParser::interpretDiff
540     * @return array[] See {@see interpretDiff} for details.
541     */
542    private static function getChangeInterpretationForRevision( RevisionRecord $revision ) {
543        if ( $revision->getId() ) {
544            $page = $revision->getPage();
545            $cacheKey = $revision->getId() . '|' . $page->getNamespace() . '|' . $page->getDBkey();
546            if ( isset( self::$revisionInterpretationCache[$cacheKey] ) ) {
547                return self::$revisionInterpretationCache[$cacheKey];
548            }
549        } else {
550            $cacheKey = null;
551        }
552
553        $userIdentity = $revision->getUser();
554
555        $prevText = '';
556        if ( $revision->getParentId() ) {
557            $store = MediaWikiServices::getInstance()->getRevisionStore();
558            $prevRevision = $store->getRevisionById( $revision->getParentId() );
559            if ( $prevRevision ) {
560                $prevContent = $prevRevision->getContent( SlotRecord::MAIN );
561                $prevText = ( $prevContent instanceof TextContent ) ? $prevContent->getText() : '';
562            }
563        }
564
565        $content = $revision->getContent( SlotRecord::MAIN );
566        $changes = self::getMachineReadableDiff(
567            $prevText,
568            ( $content instanceof TextContent ) ? $content->getText() : ''
569        );
570        $output = self::interpretDiff(
571            $changes,
572            $userIdentity ? $userIdentity->getName() : '',
573            Title::newFromLinkTarget( $revision->getPageAsLinkTarget() )
574        );
575
576        if ( $cacheKey ) {
577            self::$revisionInterpretationCache[$cacheKey] = $output;
578        }
579
580        return $output;
581    }
582
583    /**
584     * Given a machine-readable diff, interprets the changes
585     * in terms of discussion page actions
586     *
587     * @todo Expand recognisable actions.
588     *
589     * @param array[] $changes Output of Event::getMachineReadableDiff
590     * @param string $username
591     * @param Title|null $title
592     * @return array[] Array of associative arrays.
593     *
594     * Each entry represents an action, which is classified in the 'action' field.
595     * All types contain a 'content' field except 'unknown'
596     *  (which instead passes through the machine-readable diff in 'details')
597     *  and 'unknown-change' (which provides 'new_content' and 'old_content')
598     * action may be:
599     * - add-comment: A comment signed by the user is added to an
600     *    existing section.
601     * - new-section-with-comment: A new section is added, containing
602     *    a single comment signed by the user in question.
603     * - add-section-multiple: A new section or additions to a section
604     *    while editing multiple sections at once.
605     * - unknown-multi-signed-addition: Some signed content is added,
606     *    but it contains multiple signatures.
607     * - unknown-unsigned-addition: Some content is added, but it is
608     *    unsigned.
609     * - unknown-subtraction: Some content was removed. These actions are
610     *    not currently analysed.
611     * - unknown-change: Some content was replaced with other content.
612     * - unknown-signed-change: Same as unknown-change, but signed.
613     * - unknown-multi-signed-change: Same as unknown-change,
614     *    but it contains multiple signatures.
615     * - unknown: Unrecognised change type.
616     */
617    public static function interpretDiff( array $changes, $username, Title $title = null ) {
618        // One extra item in $changes for _info
619        $actions = [];
620        $signedSections = [];
621
622        foreach ( $changes as $index => $change ) {
623            if ( !is_numeric( $index ) ) {
624                continue;
625            }
626
627            if ( !$change['action'] ) {
628                // Unknown action; skip
629                continue;
630            }
631
632            if ( $change['action'] === 'add' ) {
633                $content = trim( $change['content'] );
634                // The \A means the regex must match at the beginning of the string.
635                // This is slightly different than ^ which matches beginning of each
636                // line in multiline mode.
637                $startSection = preg_match( '/\A' . self::HEADER_REGEX . '/um', $content );
638                $sectionCount = self::getSectionCount( $content );
639                $signedUsers = self::extractSignatures( $content, $title );
640
641                if (
642                    count( $signedUsers ) === 1 &&
643                    isset( $signedUsers[$username] )
644                ) {
645                    if ( $sectionCount === 0 ) {
646                        $signedSections[] = self::getSectionSpan( $change['right-pos'], $changes['_info']['rhs'] );
647                        $fullSection = self::getFullSection( $changes['_info']['rhs'], $change['right-pos'] );
648                        $actions[] = [
649                            'type' => 'add-comment',
650                            'content' => $content,
651                            'full-section' => $fullSection,
652                        ];
653                    } elseif ( $startSection && $sectionCount === 1 ) {
654                        $signedSections[] = self::getSectionSpan( $change['right-pos'], $changes['_info']['rhs'] );
655                        $actions[] = [
656                            'type' => 'new-section-with-comment',
657                            'content' => $content,
658                        ];
659                    } else {
660                        $nextSectionStart = $change['right-pos'];
661                        $sectionData = self::extractSections( $content );
662                        foreach ( $sectionData as $section ) {
663                            $sectionSpan = self::getSectionSpan( $nextSectionStart, $changes['_info']['rhs'] );
664                            $nextSectionStart = $sectionSpan[1] + 1;
665                            $sectionSignedUsers = self::extractSignatures( $section['content'], $title );
666                            if ( $sectionSignedUsers ) {
667                                $signedSections[] = $sectionSpan;
668                                if ( !$section['header'] ) {
669                                    $fullSection = self::getFullSection(
670                                        $changes['_info']['rhs'],
671                                        $change['right-pos']
672                                    );
673                                    $section['header'] = self::extractHeader( $fullSection );
674                                }
675                                $actions[] = [
676                                    'type' => 'add-section-multiple',
677                                    'content' => $section['content'],
678                                    'header' => $section['header'],
679                                ];
680                            } else {
681                                $actions[] = [
682                                    'type' => 'unknown-unsigned-addition',
683                                    'content' => $section['content'],
684                                ];
685                            }
686                        }
687                    }
688                } elseif ( $signedUsers !== [] ) {
689                    $actions[] = [
690                        'type' => 'unknown-multi-signed-addition',
691                        'content' => $content,
692                    ];
693                } else {
694                    $actions[] = [
695                        'type' => 'unknown-unsigned-addition',
696                        'content' => $content,
697                    ];
698                }
699            } elseif ( $change['action'] === 'subtract' ) {
700                $actions[] = [
701                    'type' => 'unknown-subtraction',
702                    'content' => $change['content'],
703                ];
704            } elseif ( $change['action'] === 'change' ) {
705                $actions[] = [
706                    'type' => 'unknown-change',
707                    'old_content' => $change['old_content'],
708                    'new_content' => $change['new_content'],
709                    'right-pos' => $change['right-pos'],
710                    'full-section' => self::getFullSection( $changes['_info']['rhs'], $change['right-pos'] ),
711                ];
712
713                if ( self::hasNewSignature(
714                    $change['old_content'],
715                    $change['new_content'],
716                    $username,
717                    $title
718                ) ) {
719                    $signedSections[] = self::getSectionSpan( $change['right-pos'], $changes['_info']['rhs'] );
720                }
721            } else {
722                $actions[] = [
723                    'type' => 'unknown',
724                    'details' => $change,
725                ];
726            }
727        }
728
729        if ( $signedSections ) {
730            $actions = self::convertToUnknownSignedChanges( $signedSections, $actions );
731        }
732
733        return $actions;
734    }
735
736    private static function hasNewSignature( $oldContent, $newContent, $username, $title ) {
737        $oldSignedUsers = self::extractSignatures( $oldContent, $title );
738        $newSignedUsers = self::extractSignatures( $newContent, $title );
739
740        return !isset( $oldSignedUsers[$username] ) && isset( $newSignedUsers[$username] );
741    }
742
743    /**
744     * Converts actions of type "unknown-change" to "unknown-signed-change" if the change is in a signed section.
745     *
746     * @param array[] $signedSections Array of arrays containing first and last line number of signed sections
747     * @param array[] $actions
748     * @return array[] Converted actions
749     */
750    private static function convertToUnknownSignedChanges( array $signedSections, array $actions ) {
751        return array_map( function ( $action ) use( $signedSections ) {
752            if (
753                $action['type'] === 'unknown-change' &&
754                self::isInSignedSection( $action['right-pos'], $signedSections )
755            ) {
756                $signedUsers = self::extractSignatures( $action['new_content'], null );
757                if ( count( $signedUsers ) === 1 ) {
758                    $action['type'] = 'unknown-signed-change';
759                } else {
760                    $action['type'] = 'unknown-multi-signed-change';
761                }
762            }
763
764            return $action;
765        }, $actions );
766    }
767
768    /**
769     * @param int $line
770     * @param array[] $signedSections
771     * @return bool
772     */
773    private static function isInSignedSection( $line, array $signedSections ) {
774        foreach ( $signedSections as $section ) {
775            if ( $line > $section[0] && $line <= $section[1] ) {
776                return true;
777            }
778        }
779
780        return false;
781    }
782
783    /**
784     * Finds the section that a given line is in.
785     *
786     * @param array $lines of lines in the page.
787     * @param int $offset The line to find the full section for.
788     * @return string Content of the section.
789     */
790    public static function getFullSection( array $lines, $offset ) {
791        $start = self::getSectionStartIndex( $offset, $lines );
792        $end = self::getSectionEndIndex( $offset, $lines );
793        $content = implode( "\n", array_slice( $lines, $start, $end - $start ) );
794
795        return trim( $content, "\n" );
796    }
797
798    /**
799     * Given a line number and a text, find the first and last line of the section the line number is in.
800     * If there are subsections, the last line index will be the line before the beginning of the first subsection.
801     * @param int $offset line number
802     * @param string[] $lines
803     * @return int[] Tuple [$firstLine, $lastLine]
804     */
805    private static function getSectionSpan( $offset, array $lines ) {
806        return [
807            self::getSectionStartIndex( $offset, $lines ),
808            self::getSectionEndIndex( $offset, $lines )
809        ];
810    }
811
812    /**
813     * Finds the line number of the start of the section that $offset is in.
814     * @param int $offset
815     * @param string[] $lines
816     * @return int
817     */
818    private static function getSectionStartIndex( $offset, array $lines ) {
819        for ( $i = $offset - 1; $i >= 0; $i-- ) {
820            if ( self::getSectionCount( $lines[$i] ) ) {
821                break;
822            }
823        }
824
825        return $i;
826    }
827
828    /**
829     * Finds the line number of the end of the section that $offset is in.
830     * @param int $offset
831     * @param array $lines
832     * @return int
833     */
834    private static function getSectionEndIndex( $offset, array $lines ) {
835        $lastLine = count( $lines );
836        for ( $i = $offset; $i < $lastLine; $i++ ) {
837            if ( self::getSectionCount( $lines[$i] ) ) {
838                break;
839            }
840        }
841
842        return $i;
843    }
844
845    /**
846     * Gets the number of section headers in a string.
847     *
848     * @param string $text
849     * @return int Number of section headers found.
850     */
851    public static function getSectionCount( $text ) {
852        $text = trim( $text );
853
854        return (int)preg_match_all( '/' . self::HEADER_REGEX . '/um', $text );
855    }
856
857    /**
858     * Gets the title of a section or sub section
859     *
860     * @param string $text The text of the section.
861     * @return string|false The title of the section or false if not found
862     */
863    public static function extractHeader( $text ) {
864        $text = trim( $text );
865
866        $matches = [];
867
868        if ( !preg_match_all( '/' . self::HEADER_REGEX . '/um', $text, $matches ) ) {
869            return false;
870        }
871
872        return trim( end( $matches[2] ) );
873    }
874
875    /**
876     * Extracts sections and their contents from text.
877     *
878     * @param string $text The text to parse.
879     * @return array[]
880     * Array of arrays containing sections with header and content.
881     * - [header]: The full header string of the section or false if there is preceding text without header.
882     * - [content]: The content of the section including the header string.
883     */
884    private static function extractSections( $text ) {
885        $matches = [];
886
887        if ( !preg_match_all( '/' . self::HEADER_REGEX . '/um', $text, $matches, PREG_OFFSET_CAPTURE ) ) {
888            return [ [
889                'header' => false,
890                'content' => $text
891            ] ];
892        }
893
894        $sectionNum = count( $matches[0] );
895        $sections = [];
896
897        // is there text before the first headline?
898        if ( $matches[0][0][1] > 1 ) {
899            $sections[] = [
900                'header' => false,
901                'content' => substr( $text, 0, $matches[0][0][1] - 1 )
902            ];
903        }
904        for ( $i = 0; $i < $sectionNum; $i++ ) {
905            if ( $i + 1 < $sectionNum ) {
906                $content = substr( $text, $matches[0][$i][1], $matches[0][$i + 1][1] - $matches[0][$i][1] );
907            } else {
908                $content = substr( $text, $matches[0][$i][1] );
909            }
910            $sections[] = [
911                'header' => self::extractHeader( $matches[0][$i][0] ),
912                'content' => trim( $content )
913            ];
914        }
915
916        return $sections;
917    }
918
919    /**
920     * Strips out a signature if possible.
921     *
922     * @param string $text The wikitext to strip
923     * @param Title|null $title
924     * @return string
925     */
926    private static function stripSignature( $text, Title $title = null ) {
927        $output = self::getUserFromLine( $text, $title );
928        if ( $output === false ) {
929            $timestampPos = self::getTimestampPosition( $text );
930
931            return substr( $text, 0, $timestampPos );
932        }
933
934        // Use truncateForDatabase() instead of truncateHTML() because
935        // truncateHTML() would not strip signature if the text contains
936        // < or &.  (And we can't use truncateForVisual() because
937        // self::getUserFromLine() returns byte offsets, not character
938        // offsets.)
939        return MediaWikiServices::getInstance()->getContentLanguage()
940            ->truncateForDatabase( $text, $output[0], '' );
941    }
942
943    /**
944     * Strips out a section header
945     * @param string $text The text to strip out the section header from.
946     * @return string The same text, with the section header stripped out.
947     */
948    private static function stripHeader( $text ) {
949        return preg_replace( '/' . self::HEADER_REGEX . '/um', '', $text );
950    }
951
952    /**
953     * Determines whether the input is a signed comment.
954     *
955     * @param string $text The text to check.
956     * @param User|bool $user If set, will only return true if the comment is
957     *  signed by this user.
958     * @param Title|null $title
959     * @return bool
960     */
961    public static function isSignedComment( $text, $user = false, Title $title = null ) {
962        $userData = self::getUserFromLine( $text, $title );
963
964        if ( $userData === false ) {
965            return false;
966        } elseif ( $user === false ) {
967            return true;
968        }
969
970        [ , $foundUser ] = $userData;
971        $userNameUtils = MediaWikiServices::getInstance()->getUserNameUtils();
972
973        return $userNameUtils->getCanonical( $foundUser, UserNameUtils::RIGOR_NONE ) ===
974            $userNameUtils->getCanonical( $user, UserNameUtils::RIGOR_NONE );
975    }
976
977    /**
978     * Finds the start position, if any, of the timestamp on a line
979     *
980     * @param string $line The line to search for a signature on
981     * @return int|false Integer position
982     */
983    public static function getTimestampPosition( $line ) {
984        $timestampRegex = self::getTimestampRegex();
985        $tsMatches = [];
986        if ( !preg_match(
987            "/$timestampRegex/mu",
988            $line,
989            $tsMatches,
990            PREG_OFFSET_CAPTURE
991        ) ) {
992            return false;
993        }
994
995        return $tsMatches[0][1];
996    }
997
998    /**
999     * Finds differences between $oldText and $newText
1000     * and returns the result in a machine-readable format.
1001     *
1002     * @param string $oldText The "left hand side" of the diff.
1003     * @param string $newText The "right hand side" of the diff.
1004     * @return array[] Array of changes.
1005     * Each change consists of:
1006     * * An 'action', one of:
1007     *   - add
1008     *   - subtract
1009     *   - change
1010     * * 'content' that was added or removed, or in the case
1011     *    of a change, 'old_content' and 'new_content'
1012     * * 'left_pos' and 'right_pos' (in lines) of the change.
1013     */
1014    public static function getMachineReadableDiff( $oldText, $newText ) {
1015        if ( self::$diffParser === null ) {
1016            self::$diffParser = new DiffParser;
1017        }
1018
1019        return self::$diffParser->getChangeSet( $oldText, $newText );
1020    }
1021
1022    /**
1023     * Finds and extracts signatures in $text
1024     *
1025     * @param string $text The text in which to look for signed comments.
1026     * @param Title|null $title
1027     * @return array<string,string> Associative array, the key is the username, the value
1028     *  is the last signature that was found.
1029     */
1030    private static function extractSignatures( $text, Title $title = null ) {
1031        $lines = explode( "\n", $text );
1032
1033        $output = [];
1034
1035        $lineNumber = 0;
1036
1037        foreach ( $lines as $line ) {
1038            ++$lineNumber;
1039
1040            // Look for the last user link on the line.
1041            $userData = self::getUserFromLine( $line, $title );
1042            if ( $userData === false ) {
1043                continue;
1044            }
1045
1046            [ $signaturePos, $user ] = $userData;
1047
1048            $signature = substr( $line, $signaturePos );
1049            $output[$user] = $signature;
1050        }
1051
1052        return $output;
1053    }
1054
1055    /**
1056     * From a line in the signature, extract all the users linked to
1057     *
1058     * @param string $line Line of text potentially including linked user, user talk,
1059     *  and contribution pages
1060     * @return string[] array of usernames, empty array for none detected
1061     */
1062    public static function extractUsersFromLine( $line ) {
1063        /*
1064         * Signatures can look like anything (as defined by i18n messages
1065         * "signature" & "signature-anon").
1066         * A signature can, e.g., be both a link to user & user-talk page.
1067         */
1068        // match all title-like excerpts in this line
1069        if ( !preg_match_all( '/\[\[([^\[]+)\]\]/', $line, $matches ) ) {
1070            return [];
1071        }
1072
1073        $matches = $matches[1];
1074
1075        $usernames = [];
1076
1077        foreach ( $matches as $match ) {
1078            /*
1079             * Create an object out of the link title.
1080             * In theory, links can be [[text]], [[text|text]] or pipe tricks
1081             * [[text|]] or [[|text]].
1082             * In the case of reverse pipe trick, the value we use *could* be
1083             * empty, but Parser::pstPass2 should have normalized that for us
1084             * already.
1085             */
1086            $match = explode( '|', $match, 2 );
1087            $title = Title::newFromText( $match[0] );
1088
1089            // figure out if the link is related to a user
1090            if (
1091                $title &&
1092                ( $title->getNamespace() === NS_USER || $title->getNamespace() === NS_USER_TALK )
1093            ) {
1094                $usernames[] = $title->getText();
1095            } elseif ( $title && $title->isSpecial( 'Contributions' ) ) {
1096                $parts = explode( '/', $title->getText(), 2 );
1097                $usernames[] = end( $parts );
1098            } else {
1099                // move on to next matched title-like excerpt
1100                continue;
1101            }
1102        }
1103
1104        return $usernames;
1105    }
1106
1107    /**
1108     * From a line in a wiki page, determine which user, if any,
1109     *  has signed it.
1110     *
1111     * @param string $line
1112     * @param Title|null $title
1113     * @return array|false False for none, array for success.
1114     * - First element is the position of the signature.
1115     * - Second element is the normalised user name.
1116     */
1117    public static function getUserFromLine( $line, Title $title = null ) {
1118        $parser = MediaWikiServices::getInstance()->getParser();
1119
1120        /*
1121         * First we call extractUsersFromLine to get all the potential usernames
1122         * from the line.  Then, we loop backwards through them, figure out which
1123         * match to a user, regenerate the signature based on that user, and
1124         * see if it matches!
1125         */
1126        $usernames = self::extractUsersFromLine( $line );
1127        $usernames = array_reverse( $usernames );
1128        foreach ( $usernames as $username ) {
1129            // generate (dateless) signature from the user we think we've
1130            // discovered the signature from
1131            // don't validate the username - anon (IP) is fine!
1132            $user = User::newFromName( $username, false );
1133            $sig = $parser->preSaveTransform(
1134                '~~~',
1135                $title ?: Title::newMainPage(),
1136                $user,
1137                new ParserOptions( $user )
1138            );
1139
1140            // see if we can find this user's generated signature in the content
1141            $pos = strrpos( $line, $sig );
1142            if ( $pos !== false ) {
1143                return [ $pos, $username ];
1144            }
1145            // couldn't find sig, move on to next link excerpt and try there
1146        }
1147
1148        // couldn't find any matching signature
1149        return false;
1150    }
1151
1152    /**
1153     * Find the last link beginning with a given prefix on a line.
1154     *
1155     * @param string $line The line to search.
1156     * @param string $linkPrefix The prefix to search for.
1157     * @param int|false $failureOffset
1158     * @return array|false False for failure, array for success.
1159     * - First element is the string offset of the link.
1160     * - Second element is the user the link refers to.
1161     */
1162    private static function getLinkFromLine( $line, $linkPrefix, $failureOffset = false ) {
1163        $offset = 0;
1164
1165        // If extraction failed at another offset, try again.
1166        if ( $failureOffset !== false ) {
1167            $offset = $failureOffset - strlen( $line ) - 1;
1168        }
1169
1170        // Avoid PHP warning: Offset is greater than the length of haystack string
1171        if ( abs( $offset ) > strlen( $line ) ) {
1172            return false;
1173        }
1174
1175        $linkPos = strripos( $line, $linkPrefix, $offset );
1176
1177        if ( $linkPos === false ) {
1178            return false;
1179        }
1180
1181        $linkUser = self::extractUserFromLink( $line, $linkPrefix, $linkPos );
1182
1183        if ( $linkUser === false ) {
1184            // Look for another place.
1185            return self::getLinkFromLine( $line, $linkPrefix, $linkPos );
1186        } else {
1187            return [ $linkPos, $linkUser ];
1188        }
1189    }
1190
1191    /**
1192     * Given text including a link, gives the user that that link refers to
1193     *
1194     * @param string $text The text to extract from.
1195     * @param string $prefix The link prefix that was used to find the link.
1196     * @param int $offset Optionally, the offset of the start of the link.
1197     * @return bool|string Type description
1198     */
1199    private static function extractUserFromLink( $text, $prefix, $offset = 0 ) {
1200        $userPart = substr( $text, strlen( $prefix ) + $offset );
1201
1202        $userMatches = [];
1203        if ( !preg_match(
1204            '/^[^\|\]\#]+/u',
1205            $userPart,
1206            $userMatches
1207        ) ) {
1208            // user link is invalid
1209            return false;
1210        }
1211
1212        $user = $userMatches[0];
1213        $userNameUtils = MediaWikiServices::getInstance()->getUserNameUtils();
1214        if (
1215            !$userNameUtils->isIP( $user ) &&
1216            $userNameUtils->getCanonical( $user ) === false
1217        ) {
1218            // Not a real username
1219            return false;
1220        }
1221
1222        return $userNameUtils->getCanonical( $userMatches[0], UserNameUtils::RIGOR_NONE );
1223    }
1224
1225    /**
1226     * Gets a regular expression that will match this wiki's
1227     * timestamps as given by ~~~~.
1228     *
1229     * @return string regular expression fragment.
1230     */
1231    public static function getTimestampRegex() {
1232        if ( self::$timestampRegex !== null ) {
1233            return self::$timestampRegex;
1234        }
1235
1236        // Step 1: Get an exemplar timestamp
1237        $title = Title::newMainPage();
1238        $user = User::newFromName( 'Test' );
1239        $options = new ParserOptions( $user );
1240
1241        $parser = MediaWikiServices::getInstance()->getParser();
1242        $exemplarTimestamp =
1243            $parser->preSaveTransform( '~~~~~', $title, $user, $options );
1244
1245        // Step 2: Generalise it
1246        // Trim off the timezone to replace at the end
1247        $output = $exemplarTimestamp;
1248        $tzRegex = '/\h*\(\w+\)\h*$/u';
1249        $tzMatches = [];
1250        if ( preg_match( $tzRegex, $output, $tzMatches, PREG_OFFSET_CAPTURE ) ) {
1251            $output = substr( $output, 0, $tzMatches[0][1] );
1252        }
1253        $output = preg_quote( $output, '/' );
1254        $output = preg_replace( '/[^\d\W]+/u', '[^\d\W]+', $output );
1255        $output = preg_replace( '/\d+/u', '\d+', $output );
1256
1257        if ( $tzMatches ) {
1258            $output .= preg_quote( $tzMatches[0][0] );
1259        }
1260
1261        if ( !preg_match( "/$output/u", $exemplarTimestamp ) ) {
1262            throw new RuntimeException( "Timestamp regex does not match exemplar" );
1263        }
1264
1265        self::$timestampRegex = $output;
1266
1267        return $output;
1268    }
1269
1270    /**
1271     * Parse wikitext into truncated plain text.
1272     * @param string $text
1273     * @param Language $lang
1274     * @param int $length Length in characters (not bytes); default DEFAULT_SNIPPET_LENGTH
1275     * @param Title|null $title Page from which the text snippet is being extracted
1276     * @param bool $linestart Whether or not this is at the start of a line
1277     * @return string
1278     */
1279    public static function getTextSnippet(
1280        $text, Language $lang, $length = self::DEFAULT_SNIPPET_LENGTH, $title = null, $linestart = true
1281    ) {
1282        // Parse wikitext
1283        $html = MediaWikiServices::getInstance()->getMessageCache()->parse( $text, $title, $linestart )->getText( [
1284            'enableSectionEditLinks' => false
1285        ] );
1286        $plaintext = trim( Sanitizer::stripAllTags( $html ) );
1287        return $lang->truncateForVisual( $plaintext, $length );
1288    }
1289
1290    /**
1291     * Parse an edit summary into truncated plain text.
1292     * @param string $text
1293     * @param Language $lang
1294     * @param int $length Length in characters (not bytes); default DEFAULT_SNIPPET_LENGTH
1295     * @return string
1296     */
1297    public static function getTextSnippetFromSummary( $text, Language $lang, $length = self::DEFAULT_SNIPPET_LENGTH ) {
1298        // Parse wikitext with summary parser
1299        $html = MediaWikiServices::getInstance()->getCommentFormatter()
1300            ->formatLinks( Sanitizer::escapeHtmlAllowEntities( $text ) );
1301        $plaintext = trim( Sanitizer::stripAllTags( $html ) );
1302        return $lang->truncateForVisual( $plaintext, $length );
1303    }
1304
1305    /**
1306     * Extract an edit excerpt from a revision
1307     *
1308     * @param RevisionRecord $revision
1309     * @param Language $lang
1310     * @param int $length Length in characters (not bytes); default DEFAULT_SNIPPET_LENGTH
1311     * @return string
1312     */
1313    public static function getEditExcerpt(
1314        RevisionRecord $revision, Language $lang, $length = self::DEFAULT_SNIPPET_LENGTH
1315    ) {
1316        $interpretation = self::getChangeInterpretationForRevision( $revision );
1317        $section = self::detectSectionTitleAndText( $interpretation );
1318        return $lang->truncateForVisual( $section['section-title'] . ' ' . $section['section-text'], $length );
1319    }
1320}
1321
1322class_alias( DiscussionParser::class, 'EchoDiscussionParser' );