Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
85.22% |
496 / 582 |
|
62.86% |
22 / 35 |
CRAP | |
0.00% |
0 / 1 |
DiscussionParser | |
85.37% |
496 / 581 |
|
62.86% |
22 / 35 |
242.17 | |
0.00% |
0 / 1 |
generateEventsForRevision | |
87.96% |
95 / 108 |
|
0.00% |
0 / 1 |
30.47 | |||
detectSectionTitleAndText | |
100.00% |
28 / 28 |
|
100.00% |
1 / 1 |
7 | |||
generateMentionEvents | |
0.00% |
0 / 3 |
|
0.00% |
0 / 1 |
6 | |||
collectMentionEvents | |
82.50% |
66 / 80 |
|
0.00% |
0 / 1 |
11.65 | |||
getOverallUserMentionsCount | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
getUserMentions | |
84.62% |
33 / 39 |
|
0.00% |
0 / 1 |
10.36 | |||
getUserLinks | |
100.00% |
5 / 5 |
|
100.00% |
1 / 1 |
3 | |||
hasSubpage | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
parseNonEditWikitext | |
100.00% |
9 / 9 |
|
100.00% |
1 / 1 |
2 | |||
getChangeInterpretationForRevision | |
96.30% |
26 / 27 |
|
0.00% |
0 / 1 |
9 | |||
interpretDiff | |
89.66% |
78 / 87 |
|
0.00% |
0 / 1 |
18.36 | |||
hasNewSignature | |
100.00% |
3 / 3 |
|
100.00% |
1 / 1 |
2 | |||
convertToUnknownSignedChanges | |
100.00% |
9 / 9 |
|
100.00% |
1 / 1 |
4 | |||
isInSignedSection | |
100.00% |
4 / 4 |
|
100.00% |
1 / 1 |
4 | |||
getFullSection | |
100.00% |
4 / 4 |
|
100.00% |
1 / 1 |
1 | |||
getSectionSpan | |
100.00% |
4 / 4 |
|
100.00% |
1 / 1 |
1 | |||
getSectionStartIndex | |
100.00% |
4 / 4 |
|
100.00% |
1 / 1 |
3 | |||
getSectionEndIndex | |
100.00% |
5 / 5 |
|
100.00% |
1 / 1 |
3 | |||
getSectionCount | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
extractHeader | |
100.00% |
5 / 5 |
|
100.00% |
1 / 1 |
2 | |||
extractSections | |
100.00% |
22 / 22 |
|
100.00% |
1 / 1 |
5 | |||
stripSignature | |
100.00% |
6 / 6 |
|
100.00% |
1 / 1 |
2 | |||
stripHeader | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
isSignedComment | |
55.56% |
5 / 9 |
|
0.00% |
0 / 1 |
3.79 | |||
getTimestampPosition | |
100.00% |
10 / 10 |
|
100.00% |
1 / 1 |
2 | |||
getMachineReadableDiff | |
66.67% |
2 / 3 |
|
0.00% |
0 / 1 |
2.15 | |||
extractSignatures | |
100.00% |
12 / 12 |
|
100.00% |
1 / 1 |
3 | |||
extractUsersFromLine | |
100.00% |
15 / 15 |
|
100.00% |
1 / 1 |
8 | |||
getUserFromLine | |
100.00% |
15 / 15 |
|
100.00% |
1 / 1 |
4 | |||
getLinkFromLine | |
0.00% |
0 / 12 |
|
0.00% |
0 / 1 |
30 | |||
extractUserFromLink | |
0.00% |
0 / 14 |
|
0.00% |
0 / 1 |
20 | |||
getTimestampRegex | |
95.45% |
21 / 22 |
|
0.00% |
0 / 1 |
5 | |||
getTextSnippet | |
100.00% |
5 / 5 |
|
100.00% |
1 / 1 |
1 | |||
getTextSnippetFromSummary | |
0.00% |
0 / 4 |
|
0.00% |
0 / 1 |
2 | |||
getEditExcerpt | |
0.00% |
0 / 3 |
|
0.00% |
0 / 1 |
2 |
1 | <?php |
2 | |
3 | namespace MediaWiki\Extension\Notifications; |
4 | |
5 | use Article; |
6 | use IDBAccessObject; |
7 | use Language; |
8 | use MediaWiki\Extension\Notifications\Hooks\HookRunner; |
9 | use MediaWiki\Extension\Notifications\Model\Event; |
10 | use MediaWiki\MediaWikiServices; |
11 | use MediaWiki\Parser\Sanitizer; |
12 | use MediaWiki\Revision\RevisionRecord; |
13 | use MediaWiki\Revision\SlotRecord; |
14 | use MediaWiki\Title\Title; |
15 | use MediaWiki\User\User; |
16 | use MediaWiki\User\UserNameUtils; |
17 | use ParserOptions; |
18 | use ParserOutput; |
19 | use RequestContext; |
20 | use RuntimeException; |
21 | use TextContent; |
22 | |
23 | abstract class DiscussionParser { |
24 | private const HEADER_REGEX = '^(==+)\h*([^=].*)\h*\1$'; |
25 | |
26 | public const DEFAULT_SNIPPET_LENGTH = 150; |
27 | |
28 | /** @var string|null */ |
29 | protected static $timestampRegex; |
30 | |
31 | /** |
32 | * @var array[][] |
33 | * FIXME: This static cache can become stale in tests, because it's never reset. We use both rev IDs and title keys |
34 | * to mitigate that, but it might still break! |
35 | */ |
36 | protected static $revisionInterpretationCache = []; |
37 | |
38 | /** @var DiffParser|null */ |
39 | protected static $diffParser = null; |
40 | |
41 | /** |
42 | * Given a RevisionRecord object, generates Event objects for |
43 | * the discussion-related actions that occurred in that Revision. |
44 | * |
45 | * @param RevisionRecord $revision |
46 | * @param bool $isRevert |
47 | */ |
48 | public static function generateEventsForRevision( RevisionRecord $revision, $isRevert ) { |
49 | global $wgEchoMentionsOnMultipleSectionEdits; |
50 | global $wgEchoMentionOnChanges; |
51 | $services = MediaWikiServices::getInstance(); |
52 | $store = $services->getRevisionStore(); |
53 | |
54 | // use the replica database if there is a previous revision |
55 | if ( $store->getPreviousRevision( $revision ) ) { |
56 | $title = Title::newFromID( $revision->getPageId() ); |
57 | // use the primary database for new page |
58 | } else { |
59 | $title = Title::newFromID( $revision->getPageId(), IDBAccessObject::READ_LATEST ); |
60 | } |
61 | |
62 | // not a valid title |
63 | if ( !$title ) { |
64 | return; |
65 | } |
66 | |
67 | $events = []; |
68 | |
69 | $interpretation = self::getChangeInterpretationForRevision( $revision ); |
70 | |
71 | $userID = $revision->getUser()->getId(); |
72 | $userName = $revision->getUser()->getName(); |
73 | $user = $userID !== 0 ? User::newFromId( $userID ) : User::newFromName( $userName, false ); |
74 | |
75 | foreach ( $interpretation as $action ) { |
76 | if ( $action['type'] === 'add-comment' ) { |
77 | $fullSection = $action['full-section']; |
78 | $header = self::extractHeader( $fullSection ); |
79 | $userLinks = self::getUserLinks( $action['content'], $title ); |
80 | $events = array_merge( |
81 | $events, |
82 | self::collectMentionEvents( $header, $userLinks, $action['content'], $revision, $user ) |
83 | ); |
84 | } elseif ( $action['type'] === 'new-section-with-comment' ) { |
85 | $content = $action['content']; |
86 | $header = self::extractHeader( $content ); |
87 | $userLinks = self::getUserLinks( $content, $title ); |
88 | $events = array_merge( |
89 | $events, |
90 | self::collectMentionEvents( $header, $userLinks, $content, $revision, $user ) |
91 | ); |
92 | } elseif ( $action['type'] === 'add-section-multiple' && $wgEchoMentionsOnMultipleSectionEdits ) { |
93 | $content = self::stripHeader( $action['content'] ); |
94 | $content = self::stripSignature( $content ); |
95 | $userLinks = self::getUserLinks( $content, $title ); |
96 | $events = array_merge( |
97 | $events, |
98 | self::collectMentionEvents( $action['header'], $userLinks, $content, $revision, $user ) |
99 | ); |
100 | } elseif ( $action['type'] === 'unknown-signed-change' ) { |
101 | $userLinks = array_diff_key( |
102 | self::getUserLinks( $action['new_content'], $title ), |
103 | self::getUserLinks( $action['old_content'], $title ) |
104 | ); |
105 | $header = self::extractHeader( $action['full-section'] ); |
106 | |
107 | if ( $wgEchoMentionOnChanges ) { |
108 | $events = array_merge( |
109 | $events, |
110 | self::collectMentionEvents( $header, $userLinks, $action['new_content'], $revision, $user ) |
111 | ); |
112 | } |
113 | } |
114 | } |
115 | |
116 | if ( $title->getNamespace() === NS_USER_TALK ) { |
117 | $notifyUser = User::newFromName( $title->getText() ); |
118 | // If the recipient is a valid non-anonymous user generate a talk page post notification. |
119 | if ( $notifyUser && $notifyUser->getId() ) { |
120 | $permManager = MediaWikiServices::getInstance()->getPermissionManager(); |
121 | // If this is a minor edit, only notify if the agent doesn't have talk page minor |
122 | // edit notification blocked |
123 | if ( !$revision->isMinor() || !$permManager->userHasRight( $user, 'nominornewtalk' ) ) { |
124 | $section = self::detectSectionTitleAndText( $interpretation, $title ); |
125 | if ( $section['section-text'] === '' ) { |
126 | $comment = $revision->getComment( RevisionRecord::FOR_PUBLIC, $notifyUser ); |
127 | if ( $comment ) { |
128 | $section['section-text'] = $comment->text; |
129 | } |
130 | } |
131 | $events[] = [ |
132 | 'type' => 'edit-user-talk', |
133 | 'title' => $title, |
134 | 'extra' => [ |
135 | 'revid' => $revision->getId(), |
136 | 'minoredit' => $revision->isMinor(), |
137 | 'section-title' => $section['section-title'], |
138 | 'section-text' => $section['section-text'], |
139 | 'target-page' => $title->getArticleID(), |
140 | ], |
141 | 'agent' => $user, |
142 | ]; |
143 | } |
144 | } |
145 | } elseif ( $title->inNamespace( NS_USER ) ) { |
146 | $notifyUser = User::newFromName( $title->getText() ); |
147 | // If the recipient is a valid non-anonymous user and hasn't turned |
148 | // off their notifications, generate a talk page post Echo notification. |
149 | if ( $notifyUser && $notifyUser->getId() ) { |
150 | $events[] = [ |
151 | 'type' => 'edit-user-page', |
152 | 'title' => $title, |
153 | 'extra' => [ |
154 | 'revid' => $revision->getId(), |
155 | ], |
156 | 'agent' => $user, |
157 | ]; |
158 | } |
159 | } |
160 | |
161 | // Notify users mentioned in edit summary |
162 | global $wgEchoMaxMentionsInEditSummary; |
163 | |
164 | if ( $wgEchoMaxMentionsInEditSummary > 0 && !$user->isBot() && !$isRevert ) { |
165 | $summaryParser = new SummaryParser(); |
166 | $usersInSummary = $summaryParser->parse( $revision->getComment()->text ); |
167 | |
168 | // Don't allow pinging yourself |
169 | unset( $usersInSummary[$userName] ); |
170 | |
171 | $count = 0; |
172 | $mentionedUsers = []; |
173 | foreach ( $usersInSummary as $summaryUser ) { |
174 | if ( $summaryUser->getTalkPage()->equals( $title ) ) { |
175 | // Users already get a ping when their talk page is edited |
176 | continue; |
177 | } |
178 | if ( $count >= $wgEchoMaxMentionsInEditSummary ) { |
179 | break; |
180 | } |
181 | $mentionedUsers[$summaryUser->getId()] = $summaryUser->getId(); |
182 | $count++; |
183 | } |
184 | |
185 | if ( $mentionedUsers ) { |
186 | $events[] = [ |
187 | 'type' => 'mention-summary', |
188 | 'title' => $title, |
189 | 'extra' => [ |
190 | 'revid' => $revision->getId(), |
191 | 'mentioned-users' => $mentionedUsers, |
192 | ], |
193 | 'agent' => $user, |
194 | ]; |
195 | } |
196 | } |
197 | |
198 | // Allow extensions to generate more events for a revision, and de-duplicate |
199 | // against the standard events created above. |
200 | ( new HookRunner( $services->getHookContainer() ) ) |
201 | ->onEchoGetEventsForRevision( $events, $revision, $isRevert ); |
202 | |
203 | // Create events |
204 | foreach ( $events as $event ) { |
205 | Event::create( $event ); |
206 | } |
207 | } |
208 | |
209 | /** |
210 | * Attempts to determine what section title the edit was performed under (if any) |
211 | * |
212 | * @param array[] $interpretation Results of {@see getChangeInterpretationForRevision} |
213 | * @param Title|null $title |
214 | * @return string[] Array containing section title and text |
215 | */ |
216 | public static function detectSectionTitleAndText( array $interpretation, Title $title = null ) { |
217 | $header = $snippet = ''; |
218 | $found = false; |
219 | |
220 | foreach ( $interpretation as $action ) { |
221 | switch ( $action['type'] ) { |
222 | case 'add-comment': |
223 | $header = self::extractHeader( $action['full-section'] ); |
224 | $snippet = self::getTextSnippet( |
225 | self::stripSignature( self::stripHeader( $action['content'] ), $title ), |
226 | RequestContext::getMain()->getLanguage(), |
227 | self::DEFAULT_SNIPPET_LENGTH, |
228 | $title ); |
229 | break; |
230 | case 'new-section-with-comment': |
231 | $header = self::extractHeader( $action['content'] ); |
232 | $snippet = self::getTextSnippet( |
233 | self::stripSignature( self::stripHeader( $action['content'] ), $title ), |
234 | RequestContext::getMain()->getLanguage(), |
235 | self::DEFAULT_SNIPPET_LENGTH, |
236 | $title ); |
237 | break; |
238 | } |
239 | if ( $header ) { |
240 | // If we find a second header within the same change interpretation then |
241 | // we cannot choose just 1 to link to |
242 | if ( $found ) { |
243 | $found = false; |
244 | break; |
245 | } |
246 | $found = true; |
247 | } |
248 | } |
249 | if ( !$found ) { |
250 | return [ 'section-title' => '', 'section-text' => '' ]; |
251 | } |
252 | |
253 | return [ 'section-title' => $header, 'section-text' => $snippet ]; |
254 | } |
255 | |
256 | /** |
257 | * For an action taken on a talk page, notify users whose user pages |
258 | * are linked. |
259 | * @param string $header The subject line for the discussion. |
260 | * @param int[] $userLinks |
261 | * @param string $content The content of the post, as a wikitext string. |
262 | * @param RevisionRecord $revision |
263 | * @param User $agent The user who made the comment. |
264 | */ |
265 | public static function generateMentionEvents( |
266 | $header, |
267 | array $userLinks, |
268 | $content, |
269 | RevisionRecord $revision, |
270 | User $agent |
271 | ) { |
272 | $events = self::collectMentionEvents( $header, $userLinks, $content, $revision, $agent ); |
273 | foreach ( $events as $event ) { |
274 | Event::create( $event ); |
275 | } |
276 | } |
277 | |
278 | /** |
279 | * Generate mention event data for a talk page action |
280 | * @param string $header The subject line for the discussion. |
281 | * @param int[] $userLinks |
282 | * @param string $content The content of the post, as a wikitext string. |
283 | * @param RevisionRecord $revision |
284 | * @param User $agent The user who made the comment. |
285 | * @return array List of event info arrays |
286 | */ |
287 | protected static function collectMentionEvents( |
288 | $header, |
289 | array $userLinks, |
290 | $content, |
291 | RevisionRecord $revision, |
292 | User $agent |
293 | ) { |
294 | global $wgEchoMaxMentionsCount, $wgEchoMentionStatusNotifications; |
295 | |
296 | $title = Title::newFromLinkTarget( $revision->getPageAsLinkTarget() ); |
297 | if ( !$title ) { |
298 | return []; |
299 | } |
300 | $content = self::stripHeader( $content ); |
301 | $content = self::stripSignature( $content, $title ); |
302 | |
303 | if ( !$userLinks ) { |
304 | return []; |
305 | } |
306 | |
307 | $userMentions = self::getUserMentions( |
308 | $title, $revision->getUser( RevisionRecord::RAW )->getId(), $userLinks |
309 | ); |
310 | $overallMentionsCount = self::getOverallUserMentionsCount( $userMentions ); |
311 | if ( $overallMentionsCount === 0 ) { |
312 | return []; |
313 | } |
314 | |
315 | $events = []; |
316 | $stats = MediaWikiServices::getInstance()->getStatsdDataFactory(); |
317 | |
318 | if ( $overallMentionsCount > $wgEchoMaxMentionsCount ) { |
319 | if ( $wgEchoMentionStatusNotifications ) { |
320 | $events[] = [ |
321 | 'type' => 'mention-failure-too-many', |
322 | 'title' => $title, |
323 | 'extra' => [ |
324 | 'max-mentions' => $wgEchoMaxMentionsCount, |
325 | 'section-title' => $header, |
326 | ], |
327 | 'agent' => $agent, |
328 | ]; |
329 | $stats->increment( 'echo.event.mention.notification.failure-too-many' ); |
330 | } |
331 | return $events; |
332 | } |
333 | |
334 | if ( $userMentions['validMentions'] ) { |
335 | $events[] = [ |
336 | 'type' => 'mention', |
337 | 'title' => $title, |
338 | 'extra' => [ |
339 | 'content' => $content, |
340 | 'section-title' => $header, |
341 | 'revid' => $revision->getId(), |
342 | 'mentioned-users' => $userMentions['validMentions'], |
343 | ], |
344 | 'agent' => $agent, |
345 | ]; |
346 | } |
347 | |
348 | if ( $wgEchoMentionStatusNotifications ) { |
349 | // TODO batch? |
350 | foreach ( $userMentions['validMentions'] as $mentionedUserId ) { |
351 | $events[] = [ |
352 | 'type' => 'mention-success', |
353 | 'title' => $title, |
354 | 'extra' => [ |
355 | 'subject-name' => User::newFromId( $mentionedUserId )->getName(), |
356 | 'section-title' => $header, |
357 | 'revid' => $revision->getId(), |
358 | ], |
359 | 'agent' => $agent, |
360 | ]; |
361 | $stats->increment( 'echo.event.mention.notification.success' ); |
362 | } |
363 | |
364 | // TODO batch? |
365 | foreach ( $userMentions['anonymousUsers'] as $anonymousUser ) { |
366 | $events[] = [ |
367 | 'type' => 'mention-failure', |
368 | 'title' => $title, |
369 | 'extra' => [ |
370 | 'failure-type' => 'user-anonymous', |
371 | 'subject-name' => $anonymousUser, |
372 | 'section-title' => $header, |
373 | 'revid' => $revision->getId(), |
374 | ], |
375 | 'agent' => $agent, |
376 | ]; |
377 | $stats->increment( 'echo.event.mention.notification.failure-user-anonymous' ); |
378 | } |
379 | |
380 | // TODO batch? |
381 | foreach ( $userMentions['unknownUsers'] as $unknownUser ) { |
382 | $events[] = [ |
383 | 'type' => 'mention-failure', |
384 | 'title' => $title, |
385 | 'extra' => [ |
386 | 'failure-type' => 'user-unknown', |
387 | 'subject-name' => $unknownUser, |
388 | 'section-title' => $header, |
389 | 'revid' => $revision->getId(), |
390 | ], |
391 | 'agent' => $agent, |
392 | ]; |
393 | $stats->increment( 'echo.event.mention.notification.failure-user-unknown' ); |
394 | } |
395 | } |
396 | |
397 | return $events; |
398 | } |
399 | |
400 | private static function getOverallUserMentionsCount( array $userMentions ) { |
401 | return count( $userMentions, COUNT_RECURSIVE ) - count( $userMentions ); |
402 | } |
403 | |
404 | /** |
405 | * @param Title $title |
406 | * @param int $revisionUserId |
407 | * @param int[] $userLinks |
408 | * @return array[] |
409 | * Set of arrays containing valid mentions and possible intended but failed mentions. |
410 | * - [validMentions]: An array of valid users to mention with ID => ID. |
411 | * - [unknownUsers]: An array of DBKey strings representing unknown users. |
412 | * - [anonymousUsers]: An array of DBKey strings representing anonymous IP users. |
413 | */ |
414 | public static function getUserMentions( Title $title, $revisionUserId, array $userLinks ) { |
415 | global $wgEchoMaxMentionsCount; |
416 | |
417 | $userMentions = [ |
418 | 'validMentions' => [], |
419 | 'unknownUsers' => [], |
420 | 'anonymousUsers' => [], |
421 | ]; |
422 | |
423 | $count = 0; |
424 | $stats = MediaWikiServices::getInstance()->getStatsdDataFactory(); |
425 | $userNameUtils = MediaWikiServices::getInstance()->getUserNameUtils(); |
426 | |
427 | foreach ( $userLinks as $dbk => $page_id ) { |
428 | // If more users are being pinged this is likely a spam/attack vector |
429 | // Don't send any mention notifications. |
430 | if ( $count > $wgEchoMaxMentionsCount ) { |
431 | $stats->increment( 'echo.event.mention.error.tooMany' ); |
432 | break; |
433 | } |
434 | |
435 | // we should not add user to 'mention' notification list if |
436 | // 1. the user link links to a subpage |
437 | if ( self::hasSubpage( $dbk ) ) { |
438 | continue; |
439 | } |
440 | |
441 | // 2. user is an anonymous IP |
442 | if ( $userNameUtils->isIP( $dbk ) ) { |
443 | $userMentions['anonymousUsers'][] = $dbk; |
444 | $count++; |
445 | $stats->increment( 'echo.event.mention.error.anonUser' ); |
446 | continue; |
447 | } |
448 | |
449 | $user = User::newFromName( $dbk ); |
450 | // 3. the user name is not valid |
451 | if ( !$user ) { |
452 | $userMentions['unknownUsers'][] = str_replace( '_', ' ', $dbk ); |
453 | $count++; |
454 | $stats->increment( 'echo.event.mention.error.invalidUser' ); |
455 | continue; |
456 | } |
457 | |
458 | // 4. the user mentions themselves |
459 | if ( $user->getId() === $revisionUserId ) { |
460 | $stats->increment( 'echo.event.mention.error.sameUser' ); |
461 | continue; |
462 | } |
463 | |
464 | // 5. the user is the owner of the talk page |
465 | if ( $title->getNamespace() === NS_USER_TALK && $title->getDBkey() === $dbk ) { |
466 | $stats->increment( 'echo.event.mention.error.ownPage' ); |
467 | continue; |
468 | } |
469 | |
470 | // 6. user does not exist |
471 | if ( $user->getId() === 0 ) { |
472 | $userMentions['unknownUsers'][] = str_replace( '_', ' ', $dbk ); |
473 | $count++; |
474 | $stats->increment( 'echo.event.mention.error.unknownUser' ); |
475 | continue; |
476 | } |
477 | |
478 | $userMentions['validMentions'][$user->getId()] = $user->getId(); |
479 | $count++; |
480 | } |
481 | |
482 | return $userMentions; |
483 | } |
484 | |
485 | /** |
486 | * @param string $content |
487 | * @param Title $title |
488 | * @return int[] |
489 | * Array of links in the user namespace with DBKey => ID. |
490 | */ |
491 | public static function getUserLinks( $content, Title $title ) { |
492 | $output = self::parseNonEditWikitext( $content, new Article( $title ) ); |
493 | $links = $output->getLinks(); |
494 | |
495 | if ( !isset( $links[NS_USER] ) || !is_array( $links[NS_USER] ) ) { |
496 | return []; |
497 | } |
498 | |
499 | return $links[NS_USER]; |
500 | } |
501 | |
502 | private static function hasSubpage( $dbk ) { |
503 | return strpos( $dbk, '/' ) !== false; |
504 | } |
505 | |
506 | /** |
507 | * It's like Article::prepareTextForEdit, |
508 | * but not for editing (old wikitext usually) |
509 | * Stolen from AbuseFilter's VariableHolder |
510 | * |
511 | * @param string $wikitext |
512 | * @param Article $article |
513 | * |
514 | * @return ParserOutput |
515 | */ |
516 | public static function parseNonEditWikitext( $wikitext, Article $article ) { |
517 | static $cache = []; |
518 | |
519 | $cacheKey = md5( $wikitext ) . ':' . $article->getTitle()->getPrefixedText(); |
520 | |
521 | if ( isset( $cache[$cacheKey] ) ) { |
522 | return $cache[$cacheKey]; |
523 | } |
524 | |
525 | $parser = MediaWikiServices::getInstance()->getParser(); |
526 | |
527 | $options = new ParserOptions( $article->getContext()->getUser() ); |
528 | $output = $parser->parse( $wikitext, $article->getTitle(), $options ); |
529 | $cache[$cacheKey] = $output; |
530 | |
531 | return $output; |
532 | } |
533 | |
534 | /** |
535 | * Given a Revision object, returns a talk-page-centric interpretation |
536 | * of the changes made in it. |
537 | * |
538 | * @param RevisionRecord $revision |
539 | * @see DiscussionParser::interpretDiff |
540 | * @return array[] See {@see interpretDiff} for details. |
541 | */ |
542 | private static function getChangeInterpretationForRevision( RevisionRecord $revision ) { |
543 | if ( $revision->getId() ) { |
544 | $page = $revision->getPage(); |
545 | $cacheKey = $revision->getId() . '|' . $page->getNamespace() . '|' . $page->getDBkey(); |
546 | if ( isset( self::$revisionInterpretationCache[$cacheKey] ) ) { |
547 | return self::$revisionInterpretationCache[$cacheKey]; |
548 | } |
549 | } else { |
550 | $cacheKey = null; |
551 | } |
552 | |
553 | $userIdentity = $revision->getUser(); |
554 | |
555 | $prevText = ''; |
556 | if ( $revision->getParentId() ) { |
557 | $store = MediaWikiServices::getInstance()->getRevisionStore(); |
558 | $prevRevision = $store->getRevisionById( $revision->getParentId() ); |
559 | if ( $prevRevision ) { |
560 | $prevContent = $prevRevision->getContent( SlotRecord::MAIN ); |
561 | $prevText = ( $prevContent instanceof TextContent ) ? $prevContent->getText() : ''; |
562 | } |
563 | } |
564 | |
565 | $content = $revision->getContent( SlotRecord::MAIN ); |
566 | $changes = self::getMachineReadableDiff( |
567 | $prevText, |
568 | ( $content instanceof TextContent ) ? $content->getText() : '' |
569 | ); |
570 | $output = self::interpretDiff( |
571 | $changes, |
572 | $userIdentity ? $userIdentity->getName() : '', |
573 | Title::newFromLinkTarget( $revision->getPageAsLinkTarget() ) |
574 | ); |
575 | |
576 | if ( $cacheKey ) { |
577 | self::$revisionInterpretationCache[$cacheKey] = $output; |
578 | } |
579 | |
580 | return $output; |
581 | } |
582 | |
583 | /** |
584 | * Given a machine-readable diff, interprets the changes |
585 | * in terms of discussion page actions |
586 | * |
587 | * @todo Expand recognisable actions. |
588 | * |
589 | * @param array[] $changes Output of Event::getMachineReadableDiff |
590 | * @param string $username |
591 | * @param Title|null $title |
592 | * @return array[] Array of associative arrays. |
593 | * |
594 | * Each entry represents an action, which is classified in the 'action' field. |
595 | * All types contain a 'content' field except 'unknown' |
596 | * (which instead passes through the machine-readable diff in 'details') |
597 | * and 'unknown-change' (which provides 'new_content' and 'old_content') |
598 | * action may be: |
599 | * - add-comment: A comment signed by the user is added to an |
600 | * existing section. |
601 | * - new-section-with-comment: A new section is added, containing |
602 | * a single comment signed by the user in question. |
603 | * - add-section-multiple: A new section or additions to a section |
604 | * while editing multiple sections at once. |
605 | * - unknown-multi-signed-addition: Some signed content is added, |
606 | * but it contains multiple signatures. |
607 | * - unknown-unsigned-addition: Some content is added, but it is |
608 | * unsigned. |
609 | * - unknown-subtraction: Some content was removed. These actions are |
610 | * not currently analysed. |
611 | * - unknown-change: Some content was replaced with other content. |
612 | * - unknown-signed-change: Same as unknown-change, but signed. |
613 | * - unknown-multi-signed-change: Same as unknown-change, |
614 | * but it contains multiple signatures. |
615 | * - unknown: Unrecognised change type. |
616 | */ |
617 | public static function interpretDiff( array $changes, $username, Title $title = null ) { |
618 | // One extra item in $changes for _info |
619 | $actions = []; |
620 | $signedSections = []; |
621 | |
622 | foreach ( $changes as $index => $change ) { |
623 | if ( !is_numeric( $index ) ) { |
624 | continue; |
625 | } |
626 | |
627 | if ( !$change['action'] ) { |
628 | // Unknown action; skip |
629 | continue; |
630 | } |
631 | |
632 | if ( $change['action'] === 'add' ) { |
633 | $content = trim( $change['content'] ); |
634 | // The \A means the regex must match at the beginning of the string. |
635 | // This is slightly different than ^ which matches beginning of each |
636 | // line in multiline mode. |
637 | $startSection = preg_match( '/\A' . self::HEADER_REGEX . '/um', $content ); |
638 | $sectionCount = self::getSectionCount( $content ); |
639 | $signedUsers = self::extractSignatures( $content, $title ); |
640 | |
641 | if ( |
642 | count( $signedUsers ) === 1 && |
643 | isset( $signedUsers[$username] ) |
644 | ) { |
645 | if ( $sectionCount === 0 ) { |
646 | $signedSections[] = self::getSectionSpan( $change['right-pos'], $changes['_info']['rhs'] ); |
647 | $fullSection = self::getFullSection( $changes['_info']['rhs'], $change['right-pos'] ); |
648 | $actions[] = [ |
649 | 'type' => 'add-comment', |
650 | 'content' => $content, |
651 | 'full-section' => $fullSection, |
652 | ]; |
653 | } elseif ( $startSection && $sectionCount === 1 ) { |
654 | $signedSections[] = self::getSectionSpan( $change['right-pos'], $changes['_info']['rhs'] ); |
655 | $actions[] = [ |
656 | 'type' => 'new-section-with-comment', |
657 | 'content' => $content, |
658 | ]; |
659 | } else { |
660 | $nextSectionStart = $change['right-pos']; |
661 | $sectionData = self::extractSections( $content ); |
662 | foreach ( $sectionData as $section ) { |
663 | $sectionSpan = self::getSectionSpan( $nextSectionStart, $changes['_info']['rhs'] ); |
664 | $nextSectionStart = $sectionSpan[1] + 1; |
665 | $sectionSignedUsers = self::extractSignatures( $section['content'], $title ); |
666 | if ( $sectionSignedUsers ) { |
667 | $signedSections[] = $sectionSpan; |
668 | if ( !$section['header'] ) { |
669 | $fullSection = self::getFullSection( |
670 | $changes['_info']['rhs'], |
671 | $change['right-pos'] |
672 | ); |
673 | $section['header'] = self::extractHeader( $fullSection ); |
674 | } |
675 | $actions[] = [ |
676 | 'type' => 'add-section-multiple', |
677 | 'content' => $section['content'], |
678 | 'header' => $section['header'], |
679 | ]; |
680 | } else { |
681 | $actions[] = [ |
682 | 'type' => 'unknown-unsigned-addition', |
683 | 'content' => $section['content'], |
684 | ]; |
685 | } |
686 | } |
687 | } |
688 | } elseif ( $signedUsers !== [] ) { |
689 | $actions[] = [ |
690 | 'type' => 'unknown-multi-signed-addition', |
691 | 'content' => $content, |
692 | ]; |
693 | } else { |
694 | $actions[] = [ |
695 | 'type' => 'unknown-unsigned-addition', |
696 | 'content' => $content, |
697 | ]; |
698 | } |
699 | } elseif ( $change['action'] === 'subtract' ) { |
700 | $actions[] = [ |
701 | 'type' => 'unknown-subtraction', |
702 | 'content' => $change['content'], |
703 | ]; |
704 | } elseif ( $change['action'] === 'change' ) { |
705 | $actions[] = [ |
706 | 'type' => 'unknown-change', |
707 | 'old_content' => $change['old_content'], |
708 | 'new_content' => $change['new_content'], |
709 | 'right-pos' => $change['right-pos'], |
710 | 'full-section' => self::getFullSection( $changes['_info']['rhs'], $change['right-pos'] ), |
711 | ]; |
712 | |
713 | if ( self::hasNewSignature( |
714 | $change['old_content'], |
715 | $change['new_content'], |
716 | $username, |
717 | $title |
718 | ) ) { |
719 | $signedSections[] = self::getSectionSpan( $change['right-pos'], $changes['_info']['rhs'] ); |
720 | } |
721 | } else { |
722 | $actions[] = [ |
723 | 'type' => 'unknown', |
724 | 'details' => $change, |
725 | ]; |
726 | } |
727 | } |
728 | |
729 | if ( $signedSections ) { |
730 | $actions = self::convertToUnknownSignedChanges( $signedSections, $actions ); |
731 | } |
732 | |
733 | return $actions; |
734 | } |
735 | |
736 | private static function hasNewSignature( $oldContent, $newContent, $username, $title ) { |
737 | $oldSignedUsers = self::extractSignatures( $oldContent, $title ); |
738 | $newSignedUsers = self::extractSignatures( $newContent, $title ); |
739 | |
740 | return !isset( $oldSignedUsers[$username] ) && isset( $newSignedUsers[$username] ); |
741 | } |
742 | |
743 | /** |
744 | * Converts actions of type "unknown-change" to "unknown-signed-change" if the change is in a signed section. |
745 | * |
746 | * @param array[] $signedSections Array of arrays containing first and last line number of signed sections |
747 | * @param array[] $actions |
748 | * @return array[] Converted actions |
749 | */ |
750 | private static function convertToUnknownSignedChanges( array $signedSections, array $actions ) { |
751 | return array_map( function ( $action ) use( $signedSections ) { |
752 | if ( |
753 | $action['type'] === 'unknown-change' && |
754 | self::isInSignedSection( $action['right-pos'], $signedSections ) |
755 | ) { |
756 | $signedUsers = self::extractSignatures( $action['new_content'], null ); |
757 | if ( count( $signedUsers ) === 1 ) { |
758 | $action['type'] = 'unknown-signed-change'; |
759 | } else { |
760 | $action['type'] = 'unknown-multi-signed-change'; |
761 | } |
762 | } |
763 | |
764 | return $action; |
765 | }, $actions ); |
766 | } |
767 | |
768 | /** |
769 | * @param int $line |
770 | * @param array[] $signedSections |
771 | * @return bool |
772 | */ |
773 | private static function isInSignedSection( $line, array $signedSections ) { |
774 | foreach ( $signedSections as $section ) { |
775 | if ( $line > $section[0] && $line <= $section[1] ) { |
776 | return true; |
777 | } |
778 | } |
779 | |
780 | return false; |
781 | } |
782 | |
783 | /** |
784 | * Finds the section that a given line is in. |
785 | * |
786 | * @param array $lines of lines in the page. |
787 | * @param int $offset The line to find the full section for. |
788 | * @return string Content of the section. |
789 | */ |
790 | public static function getFullSection( array $lines, $offset ) { |
791 | $start = self::getSectionStartIndex( $offset, $lines ); |
792 | $end = self::getSectionEndIndex( $offset, $lines ); |
793 | $content = implode( "\n", array_slice( $lines, $start, $end - $start ) ); |
794 | |
795 | return trim( $content, "\n" ); |
796 | } |
797 | |
798 | /** |
799 | * Given a line number and a text, find the first and last line of the section the line number is in. |
800 | * If there are subsections, the last line index will be the line before the beginning of the first subsection. |
801 | * @param int $offset line number |
802 | * @param string[] $lines |
803 | * @return int[] Tuple [$firstLine, $lastLine] |
804 | */ |
805 | private static function getSectionSpan( $offset, array $lines ) { |
806 | return [ |
807 | self::getSectionStartIndex( $offset, $lines ), |
808 | self::getSectionEndIndex( $offset, $lines ) |
809 | ]; |
810 | } |
811 | |
812 | /** |
813 | * Finds the line number of the start of the section that $offset is in. |
814 | * @param int $offset |
815 | * @param string[] $lines |
816 | * @return int |
817 | */ |
818 | private static function getSectionStartIndex( $offset, array $lines ) { |
819 | for ( $i = $offset - 1; $i >= 0; $i-- ) { |
820 | if ( self::getSectionCount( $lines[$i] ) ) { |
821 | break; |
822 | } |
823 | } |
824 | |
825 | return $i; |
826 | } |
827 | |
828 | /** |
829 | * Finds the line number of the end of the section that $offset is in. |
830 | * @param int $offset |
831 | * @param array $lines |
832 | * @return int |
833 | */ |
834 | private static function getSectionEndIndex( $offset, array $lines ) { |
835 | $lastLine = count( $lines ); |
836 | for ( $i = $offset; $i < $lastLine; $i++ ) { |
837 | if ( self::getSectionCount( $lines[$i] ) ) { |
838 | break; |
839 | } |
840 | } |
841 | |
842 | return $i; |
843 | } |
844 | |
845 | /** |
846 | * Gets the number of section headers in a string. |
847 | * |
848 | * @param string $text |
849 | * @return int Number of section headers found. |
850 | */ |
851 | public static function getSectionCount( $text ) { |
852 | $text = trim( $text ); |
853 | |
854 | return (int)preg_match_all( '/' . self::HEADER_REGEX . '/um', $text ); |
855 | } |
856 | |
857 | /** |
858 | * Gets the title of a section or sub section |
859 | * |
860 | * @param string $text The text of the section. |
861 | * @return string|false The title of the section or false if not found |
862 | */ |
863 | public static function extractHeader( $text ) { |
864 | $text = trim( $text ); |
865 | |
866 | $matches = []; |
867 | |
868 | if ( !preg_match_all( '/' . self::HEADER_REGEX . '/um', $text, $matches ) ) { |
869 | return false; |
870 | } |
871 | |
872 | return trim( end( $matches[2] ) ); |
873 | } |
874 | |
875 | /** |
876 | * Extracts sections and their contents from text. |
877 | * |
878 | * @param string $text The text to parse. |
879 | * @return array[] |
880 | * Array of arrays containing sections with header and content. |
881 | * - [header]: The full header string of the section or false if there is preceding text without header. |
882 | * - [content]: The content of the section including the header string. |
883 | */ |
884 | private static function extractSections( $text ) { |
885 | $matches = []; |
886 | |
887 | if ( !preg_match_all( '/' . self::HEADER_REGEX . '/um', $text, $matches, PREG_OFFSET_CAPTURE ) ) { |
888 | return [ [ |
889 | 'header' => false, |
890 | 'content' => $text |
891 | ] ]; |
892 | } |
893 | |
894 | $sectionNum = count( $matches[0] ); |
895 | $sections = []; |
896 | |
897 | // is there text before the first headline? |
898 | if ( $matches[0][0][1] > 1 ) { |
899 | $sections[] = [ |
900 | 'header' => false, |
901 | 'content' => substr( $text, 0, $matches[0][0][1] - 1 ) |
902 | ]; |
903 | } |
904 | for ( $i = 0; $i < $sectionNum; $i++ ) { |
905 | if ( $i + 1 < $sectionNum ) { |
906 | $content = substr( $text, $matches[0][$i][1], $matches[0][$i + 1][1] - $matches[0][$i][1] ); |
907 | } else { |
908 | $content = substr( $text, $matches[0][$i][1] ); |
909 | } |
910 | $sections[] = [ |
911 | 'header' => self::extractHeader( $matches[0][$i][0] ), |
912 | 'content' => trim( $content ) |
913 | ]; |
914 | } |
915 | |
916 | return $sections; |
917 | } |
918 | |
919 | /** |
920 | * Strips out a signature if possible. |
921 | * |
922 | * @param string $text The wikitext to strip |
923 | * @param Title|null $title |
924 | * @return string |
925 | */ |
926 | private static function stripSignature( $text, Title $title = null ) { |
927 | $output = self::getUserFromLine( $text, $title ); |
928 | if ( $output === false ) { |
929 | $timestampPos = self::getTimestampPosition( $text ); |
930 | |
931 | return substr( $text, 0, $timestampPos ); |
932 | } |
933 | |
934 | // Use truncateForDatabase() instead of truncateHTML() because |
935 | // truncateHTML() would not strip signature if the text contains |
936 | // < or &. (And we can't use truncateForVisual() because |
937 | // self::getUserFromLine() returns byte offsets, not character |
938 | // offsets.) |
939 | return MediaWikiServices::getInstance()->getContentLanguage() |
940 | ->truncateForDatabase( $text, $output[0], '' ); |
941 | } |
942 | |
943 | /** |
944 | * Strips out a section header |
945 | * @param string $text The text to strip out the section header from. |
946 | * @return string The same text, with the section header stripped out. |
947 | */ |
948 | private static function stripHeader( $text ) { |
949 | return preg_replace( '/' . self::HEADER_REGEX . '/um', '', $text ); |
950 | } |
951 | |
952 | /** |
953 | * Determines whether the input is a signed comment. |
954 | * |
955 | * @param string $text The text to check. |
956 | * @param User|bool $user If set, will only return true if the comment is |
957 | * signed by this user. |
958 | * @param Title|null $title |
959 | * @return bool |
960 | */ |
961 | public static function isSignedComment( $text, $user = false, Title $title = null ) { |
962 | $userData = self::getUserFromLine( $text, $title ); |
963 | |
964 | if ( $userData === false ) { |
965 | return false; |
966 | } elseif ( $user === false ) { |
967 | return true; |
968 | } |
969 | |
970 | [ , $foundUser ] = $userData; |
971 | $userNameUtils = MediaWikiServices::getInstance()->getUserNameUtils(); |
972 | |
973 | return $userNameUtils->getCanonical( $foundUser, UserNameUtils::RIGOR_NONE ) === |
974 | $userNameUtils->getCanonical( $user, UserNameUtils::RIGOR_NONE ); |
975 | } |
976 | |
977 | /** |
978 | * Finds the start position, if any, of the timestamp on a line |
979 | * |
980 | * @param string $line The line to search for a signature on |
981 | * @return int|false Integer position |
982 | */ |
983 | public static function getTimestampPosition( $line ) { |
984 | $timestampRegex = self::getTimestampRegex(); |
985 | $tsMatches = []; |
986 | if ( !preg_match( |
987 | "/$timestampRegex/mu", |
988 | $line, |
989 | $tsMatches, |
990 | PREG_OFFSET_CAPTURE |
991 | ) ) { |
992 | return false; |
993 | } |
994 | |
995 | return $tsMatches[0][1]; |
996 | } |
997 | |
998 | /** |
999 | * Finds differences between $oldText and $newText |
1000 | * and returns the result in a machine-readable format. |
1001 | * |
1002 | * @param string $oldText The "left hand side" of the diff. |
1003 | * @param string $newText The "right hand side" of the diff. |
1004 | * @return array[] Array of changes. |
1005 | * Each change consists of: |
1006 | * * An 'action', one of: |
1007 | * - add |
1008 | * - subtract |
1009 | * - change |
1010 | * * 'content' that was added or removed, or in the case |
1011 | * of a change, 'old_content' and 'new_content' |
1012 | * * 'left_pos' and 'right_pos' (in lines) of the change. |
1013 | */ |
1014 | public static function getMachineReadableDiff( $oldText, $newText ) { |
1015 | if ( self::$diffParser === null ) { |
1016 | self::$diffParser = new DiffParser; |
1017 | } |
1018 | |
1019 | return self::$diffParser->getChangeSet( $oldText, $newText ); |
1020 | } |
1021 | |
1022 | /** |
1023 | * Finds and extracts signatures in $text |
1024 | * |
1025 | * @param string $text The text in which to look for signed comments. |
1026 | * @param Title|null $title |
1027 | * @return array<string,string> Associative array, the key is the username, the value |
1028 | * is the last signature that was found. |
1029 | */ |
1030 | private static function extractSignatures( $text, Title $title = null ) { |
1031 | $lines = explode( "\n", $text ); |
1032 | |
1033 | $output = []; |
1034 | |
1035 | $lineNumber = 0; |
1036 | |
1037 | foreach ( $lines as $line ) { |
1038 | ++$lineNumber; |
1039 | |
1040 | // Look for the last user link on the line. |
1041 | $userData = self::getUserFromLine( $line, $title ); |
1042 | if ( $userData === false ) { |
1043 | continue; |
1044 | } |
1045 | |
1046 | [ $signaturePos, $user ] = $userData; |
1047 | |
1048 | $signature = substr( $line, $signaturePos ); |
1049 | $output[$user] = $signature; |
1050 | } |
1051 | |
1052 | return $output; |
1053 | } |
1054 | |
1055 | /** |
1056 | * From a line in the signature, extract all the users linked to |
1057 | * |
1058 | * @param string $line Line of text potentially including linked user, user talk, |
1059 | * and contribution pages |
1060 | * @return string[] array of usernames, empty array for none detected |
1061 | */ |
1062 | public static function extractUsersFromLine( $line ) { |
1063 | /* |
1064 | * Signatures can look like anything (as defined by i18n messages |
1065 | * "signature" & "signature-anon"). |
1066 | * A signature can, e.g., be both a link to user & user-talk page. |
1067 | */ |
1068 | // match all title-like excerpts in this line |
1069 | if ( !preg_match_all( '/\[\[([^\[]+)\]\]/', $line, $matches ) ) { |
1070 | return []; |
1071 | } |
1072 | |
1073 | $matches = $matches[1]; |
1074 | |
1075 | $usernames = []; |
1076 | |
1077 | foreach ( $matches as $match ) { |
1078 | /* |
1079 | * Create an object out of the link title. |
1080 | * In theory, links can be [[text]], [[text|text]] or pipe tricks |
1081 | * [[text|]] or [[|text]]. |
1082 | * In the case of reverse pipe trick, the value we use *could* be |
1083 | * empty, but Parser::pstPass2 should have normalized that for us |
1084 | * already. |
1085 | */ |
1086 | $match = explode( '|', $match, 2 ); |
1087 | $title = Title::newFromText( $match[0] ); |
1088 | |
1089 | // figure out if the link is related to a user |
1090 | if ( |
1091 | $title && |
1092 | ( $title->getNamespace() === NS_USER || $title->getNamespace() === NS_USER_TALK ) |
1093 | ) { |
1094 | $usernames[] = $title->getText(); |
1095 | } elseif ( $title && $title->isSpecial( 'Contributions' ) ) { |
1096 | $parts = explode( '/', $title->getText(), 2 ); |
1097 | $usernames[] = end( $parts ); |
1098 | } else { |
1099 | // move on to next matched title-like excerpt |
1100 | continue; |
1101 | } |
1102 | } |
1103 | |
1104 | return $usernames; |
1105 | } |
1106 | |
1107 | /** |
1108 | * From a line in a wiki page, determine which user, if any, |
1109 | * has signed it. |
1110 | * |
1111 | * @param string $line |
1112 | * @param Title|null $title |
1113 | * @return array|false False for none, array for success. |
1114 | * - First element is the position of the signature. |
1115 | * - Second element is the normalised user name. |
1116 | */ |
1117 | public static function getUserFromLine( $line, Title $title = null ) { |
1118 | $parser = MediaWikiServices::getInstance()->getParser(); |
1119 | |
1120 | /* |
1121 | * First we call extractUsersFromLine to get all the potential usernames |
1122 | * from the line. Then, we loop backwards through them, figure out which |
1123 | * match to a user, regenerate the signature based on that user, and |
1124 | * see if it matches! |
1125 | */ |
1126 | $usernames = self::extractUsersFromLine( $line ); |
1127 | $usernames = array_reverse( $usernames ); |
1128 | foreach ( $usernames as $username ) { |
1129 | // generate (dateless) signature from the user we think we've |
1130 | // discovered the signature from |
1131 | // don't validate the username - anon (IP) is fine! |
1132 | $user = User::newFromName( $username, false ); |
1133 | $sig = $parser->preSaveTransform( |
1134 | '~~~', |
1135 | $title ?: Title::newMainPage(), |
1136 | $user, |
1137 | new ParserOptions( $user ) |
1138 | ); |
1139 | |
1140 | // see if we can find this user's generated signature in the content |
1141 | $pos = strrpos( $line, $sig ); |
1142 | if ( $pos !== false ) { |
1143 | return [ $pos, $username ]; |
1144 | } |
1145 | // couldn't find sig, move on to next link excerpt and try there |
1146 | } |
1147 | |
1148 | // couldn't find any matching signature |
1149 | return false; |
1150 | } |
1151 | |
1152 | /** |
1153 | * Find the last link beginning with a given prefix on a line. |
1154 | * |
1155 | * @param string $line The line to search. |
1156 | * @param string $linkPrefix The prefix to search for. |
1157 | * @param int|false $failureOffset |
1158 | * @return array|false False for failure, array for success. |
1159 | * - First element is the string offset of the link. |
1160 | * - Second element is the user the link refers to. |
1161 | */ |
1162 | private static function getLinkFromLine( $line, $linkPrefix, $failureOffset = false ) { |
1163 | $offset = 0; |
1164 | |
1165 | // If extraction failed at another offset, try again. |
1166 | if ( $failureOffset !== false ) { |
1167 | $offset = $failureOffset - strlen( $line ) - 1; |
1168 | } |
1169 | |
1170 | // Avoid PHP warning: Offset is greater than the length of haystack string |
1171 | if ( abs( $offset ) > strlen( $line ) ) { |
1172 | return false; |
1173 | } |
1174 | |
1175 | $linkPos = strripos( $line, $linkPrefix, $offset ); |
1176 | |
1177 | if ( $linkPos === false ) { |
1178 | return false; |
1179 | } |
1180 | |
1181 | $linkUser = self::extractUserFromLink( $line, $linkPrefix, $linkPos ); |
1182 | |
1183 | if ( $linkUser === false ) { |
1184 | // Look for another place. |
1185 | return self::getLinkFromLine( $line, $linkPrefix, $linkPos ); |
1186 | } else { |
1187 | return [ $linkPos, $linkUser ]; |
1188 | } |
1189 | } |
1190 | |
1191 | /** |
1192 | * Given text including a link, gives the user that that link refers to |
1193 | * |
1194 | * @param string $text The text to extract from. |
1195 | * @param string $prefix The link prefix that was used to find the link. |
1196 | * @param int $offset Optionally, the offset of the start of the link. |
1197 | * @return bool|string Type description |
1198 | */ |
1199 | private static function extractUserFromLink( $text, $prefix, $offset = 0 ) { |
1200 | $userPart = substr( $text, strlen( $prefix ) + $offset ); |
1201 | |
1202 | $userMatches = []; |
1203 | if ( !preg_match( |
1204 | '/^[^\|\]\#]+/u', |
1205 | $userPart, |
1206 | $userMatches |
1207 | ) ) { |
1208 | // user link is invalid |
1209 | return false; |
1210 | } |
1211 | |
1212 | $user = $userMatches[0]; |
1213 | $userNameUtils = MediaWikiServices::getInstance()->getUserNameUtils(); |
1214 | if ( |
1215 | !$userNameUtils->isIP( $user ) && |
1216 | $userNameUtils->getCanonical( $user ) === false |
1217 | ) { |
1218 | // Not a real username |
1219 | return false; |
1220 | } |
1221 | |
1222 | return $userNameUtils->getCanonical( $userMatches[0], UserNameUtils::RIGOR_NONE ); |
1223 | } |
1224 | |
1225 | /** |
1226 | * Gets a regular expression that will match this wiki's |
1227 | * timestamps as given by ~~~~. |
1228 | * |
1229 | * @return string regular expression fragment. |
1230 | */ |
1231 | public static function getTimestampRegex() { |
1232 | if ( self::$timestampRegex !== null ) { |
1233 | return self::$timestampRegex; |
1234 | } |
1235 | |
1236 | // Step 1: Get an exemplar timestamp |
1237 | $title = Title::newMainPage(); |
1238 | $user = User::newFromName( 'Test' ); |
1239 | $options = new ParserOptions( $user ); |
1240 | |
1241 | $parser = MediaWikiServices::getInstance()->getParser(); |
1242 | $exemplarTimestamp = |
1243 | $parser->preSaveTransform( '~~~~~', $title, $user, $options ); |
1244 | |
1245 | // Step 2: Generalise it |
1246 | // Trim off the timezone to replace at the end |
1247 | $output = $exemplarTimestamp; |
1248 | $tzRegex = '/\h*\(\w+\)\h*$/u'; |
1249 | $tzMatches = []; |
1250 | if ( preg_match( $tzRegex, $output, $tzMatches, PREG_OFFSET_CAPTURE ) ) { |
1251 | $output = substr( $output, 0, $tzMatches[0][1] ); |
1252 | } |
1253 | $output = preg_quote( $output, '/' ); |
1254 | $output = preg_replace( '/[^\d\W]+/u', '[^\d\W]+', $output ); |
1255 | $output = preg_replace( '/\d+/u', '\d+', $output ); |
1256 | |
1257 | if ( $tzMatches ) { |
1258 | $output .= preg_quote( $tzMatches[0][0] ); |
1259 | } |
1260 | |
1261 | if ( !preg_match( "/$output/u", $exemplarTimestamp ) ) { |
1262 | throw new RuntimeException( "Timestamp regex does not match exemplar" ); |
1263 | } |
1264 | |
1265 | self::$timestampRegex = $output; |
1266 | |
1267 | return $output; |
1268 | } |
1269 | |
1270 | /** |
1271 | * Parse wikitext into truncated plain text. |
1272 | * @param string $text |
1273 | * @param Language $lang |
1274 | * @param int $length Length in characters (not bytes); default DEFAULT_SNIPPET_LENGTH |
1275 | * @param Title|null $title Page from which the text snippet is being extracted |
1276 | * @param bool $linestart Whether or not this is at the start of a line |
1277 | * @return string |
1278 | */ |
1279 | public static function getTextSnippet( |
1280 | $text, Language $lang, $length = self::DEFAULT_SNIPPET_LENGTH, $title = null, $linestart = true |
1281 | ) { |
1282 | // Parse wikitext |
1283 | $html = MediaWikiServices::getInstance()->getMessageCache()->parse( $text, $title, $linestart )->getText( [ |
1284 | 'enableSectionEditLinks' => false |
1285 | ] ); |
1286 | $plaintext = trim( Sanitizer::stripAllTags( $html ) ); |
1287 | return $lang->truncateForVisual( $plaintext, $length ); |
1288 | } |
1289 | |
1290 | /** |
1291 | * Parse an edit summary into truncated plain text. |
1292 | * @param string $text |
1293 | * @param Language $lang |
1294 | * @param int $length Length in characters (not bytes); default DEFAULT_SNIPPET_LENGTH |
1295 | * @return string |
1296 | */ |
1297 | public static function getTextSnippetFromSummary( $text, Language $lang, $length = self::DEFAULT_SNIPPET_LENGTH ) { |
1298 | // Parse wikitext with summary parser |
1299 | $html = MediaWikiServices::getInstance()->getCommentFormatter() |
1300 | ->formatLinks( Sanitizer::escapeHtmlAllowEntities( $text ) ); |
1301 | $plaintext = trim( Sanitizer::stripAllTags( $html ) ); |
1302 | return $lang->truncateForVisual( $plaintext, $length ); |
1303 | } |
1304 | |
1305 | /** |
1306 | * Extract an edit excerpt from a revision |
1307 | * |
1308 | * @param RevisionRecord $revision |
1309 | * @param Language $lang |
1310 | * @param int $length Length in characters (not bytes); default DEFAULT_SNIPPET_LENGTH |
1311 | * @return string |
1312 | */ |
1313 | public static function getEditExcerpt( |
1314 | RevisionRecord $revision, Language $lang, $length = self::DEFAULT_SNIPPET_LENGTH |
1315 | ) { |
1316 | $interpretation = self::getChangeInterpretationForRevision( $revision ); |
1317 | $section = self::detectSectionTitleAndText( $interpretation ); |
1318 | return $lang->truncateForVisual( $section['section-title'] . ' ' . $section['section-text'], $length ); |
1319 | } |
1320 | } |
1321 | |
1322 | class_alias( DiscussionParser::class, 'EchoDiscussionParser' ); |