Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
97.97% |
193 / 197 |
|
66.67% |
8 / 12 |
CRAP | |
0.00% |
0 / 1 |
CommentParser | |
97.97% |
193 / 197 |
|
66.67% |
8 / 12 |
53 | |
0.00% |
0 / 1 |
__construct | |
100.00% |
9 / 9 |
|
100.00% |
1 / 1 |
1 | |||
preprocess | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
preprocessUnsafe | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
finalize | |
90.91% |
10 / 11 |
|
0.00% |
0 / 1 |
2.00 | |||
preprocessInternal | |
100.00% |
6 / 6 |
|
100.00% |
1 / 1 |
3 | |||
doSectionLinks | |
97.87% |
46 / 47 |
|
0.00% |
0 / 1 |
8 | |||
makeSectionLink | |
92.86% |
13 / 14 |
|
0.00% |
0 / 1 |
5.01 | |||
doWikiLinks | |
100.00% |
60 / 60 |
|
100.00% |
1 / 1 |
17 | |||
addLinkMarker | |
80.00% |
4 / 5 |
|
0.00% |
0 / 1 |
2.03 | |||
addPageLink | |
100.00% |
25 / 25 |
|
100.00% |
1 / 1 |
9 | |||
addFileLink | |
100.00% |
10 / 10 |
|
100.00% |
1 / 1 |
1 | |||
flushLinkBatches | |
100.00% |
6 / 6 |
|
100.00% |
1 / 1 |
3 |
1 | <?php |
2 | |
3 | namespace MediaWiki\CommentFormatter; |
4 | |
5 | use File; |
6 | use HtmlArmor; |
7 | use Language; |
8 | use MediaWiki\Cache\LinkBatch; |
9 | use MediaWiki\Cache\LinkBatchFactory; |
10 | use MediaWiki\Cache\LinkCache; |
11 | use MediaWiki\HookContainer\HookContainer; |
12 | use MediaWiki\HookContainer\HookRunner; |
13 | use MediaWiki\Linker\Linker; |
14 | use MediaWiki\Linker\LinkRenderer; |
15 | use MediaWiki\Linker\LinkTarget; |
16 | use MediaWiki\Parser\Parser; |
17 | use MediaWiki\Parser\Sanitizer; |
18 | use MediaWiki\Title\MalformedTitleException; |
19 | use MediaWiki\Title\NamespaceInfo; |
20 | use MediaWiki\Title\Title; |
21 | use MediaWiki\Title\TitleParser; |
22 | use MediaWiki\Title\TitleValue; |
23 | use MediaWiki\WikiMap\WikiMap; |
24 | use RepoGroup; |
25 | use StringUtils; |
26 | |
27 | /** |
28 | * The text processing backend for CommentFormatter. |
29 | * |
30 | * CommentParser objects should be discarded after the comment batch is |
31 | * complete, in order to reduce memory usage. |
32 | * |
33 | * @internal |
34 | */ |
35 | class CommentParser { |
36 | /** @var LinkRenderer */ |
37 | private $linkRenderer; |
38 | /** @var LinkBatchFactory */ |
39 | private $linkBatchFactory; |
40 | /** @var RepoGroup */ |
41 | private $repoGroup; |
42 | /** @var Language */ |
43 | private $userLang; |
44 | /** @var Language */ |
45 | private $contLang; |
46 | /** @var TitleParser */ |
47 | private $titleParser; |
48 | /** @var NamespaceInfo */ |
49 | private $namespaceInfo; |
50 | /** @var HookRunner */ |
51 | private $hookRunner; |
52 | /** @var LinkCache */ |
53 | private $linkCache; |
54 | |
55 | /** @var callable[] */ |
56 | private $links = []; |
57 | /** @var LinkBatch|null */ |
58 | private $linkBatch; |
59 | |
60 | /** @var array Input to RepoGroup::findFiles() */ |
61 | private $fileBatch; |
62 | /** @var File[] Resolved File objects indexed by DB key */ |
63 | private $files = []; |
64 | |
65 | /** @var int The maximum number of digits in a marker ID */ |
66 | private const MAX_ID_SIZE = 7; |
67 | /** @var string Prefix for marker. ' and " included to break attributes (T355538) */ |
68 | private const MARKER_PREFIX = "\x1B\"'"; |
69 | |
70 | /** |
71 | * @param LinkRenderer $linkRenderer |
72 | * @param LinkBatchFactory $linkBatchFactory |
73 | * @param LinkCache $linkCache |
74 | * @param RepoGroup $repoGroup |
75 | * @param Language $userLang |
76 | * @param Language $contLang |
77 | * @param TitleParser $titleParser |
78 | * @param NamespaceInfo $namespaceInfo |
79 | * @param HookContainer $hookContainer |
80 | */ |
81 | public function __construct( |
82 | LinkRenderer $linkRenderer, |
83 | LinkBatchFactory $linkBatchFactory, |
84 | LinkCache $linkCache, |
85 | RepoGroup $repoGroup, |
86 | Language $userLang, |
87 | Language $contLang, |
88 | TitleParser $titleParser, |
89 | NamespaceInfo $namespaceInfo, |
90 | HookContainer $hookContainer |
91 | ) { |
92 | $this->linkRenderer = $linkRenderer; |
93 | $this->linkBatchFactory = $linkBatchFactory; |
94 | $this->linkCache = $linkCache; |
95 | $this->repoGroup = $repoGroup; |
96 | $this->userLang = $userLang; |
97 | $this->contLang = $contLang; |
98 | $this->titleParser = $titleParser; |
99 | $this->namespaceInfo = $namespaceInfo; |
100 | $this->hookRunner = new HookRunner( $hookContainer ); |
101 | } |
102 | |
103 | /** |
104 | * Convert a comment to HTML, but replace links with markers which are |
105 | * resolved later. |
106 | * |
107 | * @param string $comment |
108 | * @param LinkTarget|null $selfLinkTarget |
109 | * @param bool $samePage |
110 | * @param string|false|null $wikiId |
111 | * @param bool $enableSectionLinks |
112 | * @return string |
113 | */ |
114 | public function preprocess( string $comment, LinkTarget $selfLinkTarget = null, |
115 | $samePage = false, $wikiId = false, $enableSectionLinks = true |
116 | ) { |
117 | return $this->preprocessInternal( $comment, false, $selfLinkTarget, |
118 | $samePage, $wikiId, $enableSectionLinks ); |
119 | } |
120 | |
121 | /** |
122 | * Convert a comment in pseudo-HTML format to HTML, replacing links with markers. |
123 | * |
124 | * @param string $comment |
125 | * @param LinkTarget|null $selfLinkTarget |
126 | * @param bool $samePage |
127 | * @param string|false|null $wikiId |
128 | * @param bool $enableSectionLinks |
129 | * @return string |
130 | */ |
131 | public function preprocessUnsafe( $comment, LinkTarget $selfLinkTarget = null, |
132 | $samePage = false, $wikiId = false, $enableSectionLinks = true |
133 | ) { |
134 | return $this->preprocessInternal( $comment, true, $selfLinkTarget, |
135 | $samePage, $wikiId, $enableSectionLinks ); |
136 | } |
137 | |
138 | /** |
139 | * Execute pending batch queries and replace markers in the specified |
140 | * string(s) with actual links. |
141 | * |
142 | * @param string|string[] $comments |
143 | * @return string|string[] |
144 | */ |
145 | public function finalize( $comments ) { |
146 | $this->flushLinkBatches(); |
147 | return preg_replace_callback( |
148 | '/' . self::MARKER_PREFIX . '([0-9]{' . self::MAX_ID_SIZE . '})/', |
149 | function ( $m ) { |
150 | $callback = $this->links[(int)$m[1]] ?? null; |
151 | if ( $callback ) { |
152 | return $callback(); |
153 | } else { |
154 | return '<!-- MISSING -->'; |
155 | } |
156 | }, |
157 | $comments |
158 | ); |
159 | } |
160 | |
161 | /** |
162 | * @param string $comment |
163 | * @param bool $unsafe |
164 | * @param LinkTarget|null $selfLinkTarget |
165 | * @param bool $samePage |
166 | * @param string|false|null $wikiId |
167 | * @param bool $enableSectionLinks |
168 | * @return string |
169 | */ |
170 | private function preprocessInternal( $comment, $unsafe, $selfLinkTarget, $samePage, $wikiId, |
171 | $enableSectionLinks |
172 | ) { |
173 | // Sanitize text a bit |
174 | // \x1b needs to be stripped because it is used for link markers |
175 | $comment = strtr( $comment, "\n\x1b", " " ); |
176 | // Allow HTML entities (for T15815) |
177 | if ( !$unsafe ) { |
178 | $comment = Sanitizer::escapeHtmlAllowEntities( $comment ); |
179 | } |
180 | if ( $enableSectionLinks ) { |
181 | $comment = $this->doSectionLinks( $comment, $selfLinkTarget, $samePage, $wikiId ); |
182 | } |
183 | return $this->doWikiLinks( $comment, $selfLinkTarget, $samePage, $wikiId ); |
184 | } |
185 | |
186 | /** |
187 | * Converts C-style comments in edit summaries into section links. |
188 | * |
189 | * Too many things are called "comments", so these are mostly now called |
190 | * section links rather than autocomments. |
191 | * |
192 | * We look for all comments, match any text before and after the comment, |
193 | * add a separator where needed and format the comment itself with CSS. |
194 | * |
195 | * @param string $comment Comment text |
196 | * @param LinkTarget|null $selfLinkTarget An optional LinkTarget object used to links to sections |
197 | * @param bool $samePage Whether section links should refer to local page |
198 | * @param string|false|null $wikiId Id of the wiki to link to (if not the local wiki), |
199 | * as used by WikiMap. |
200 | * @return string Preprocessed comment |
201 | */ |
202 | private function doSectionLinks( |
203 | $comment, |
204 | $selfLinkTarget = null, |
205 | $samePage = false, |
206 | $wikiId = false |
207 | ) { |
208 | // @todo $append here is something of a hack to preserve the status |
209 | // quo. Someone who knows more about bidi and such should decide |
210 | // (1) what sensible rendering even *is* for an LTR edit summary on an RTL |
211 | // wiki, both when autocomments exist and when they don't, and |
212 | // (2) what markup will make that actually happen. |
213 | $append = ''; |
214 | $comment = preg_replace_callback( |
215 | // To detect the presence of content before or after the |
216 | // auto-comment, we use capturing groups inside optional zero-width |
217 | // assertions. But older versions of PCRE can't directly make |
218 | // zero-width assertions optional, so wrap them in a non-capturing |
219 | // group. |
220 | '!(?:(?<=(.)))?/\*\s*(.*?)\s*\*/(?:(?=(.)))?!', |
221 | function ( $match ) use ( &$append, $selfLinkTarget, $samePage, $wikiId ) { |
222 | // Ensure all match positions are defined |
223 | $match += [ '', '', '', '' ]; |
224 | |
225 | $pre = $match[1] !== ''; |
226 | $auto = $match[2]; |
227 | $post = $match[3] !== ''; |
228 | $comment = null; |
229 | |
230 | $this->hookRunner->onFormatAutocomments( |
231 | $comment, $pre, $auto, $post, |
232 | Title::castFromLinkTarget( $selfLinkTarget ), |
233 | $samePage, |
234 | $wikiId ); |
235 | if ( $comment !== null ) { |
236 | return $comment; |
237 | } |
238 | |
239 | if ( $selfLinkTarget ) { |
240 | $section = $auto; |
241 | # Remove links that a user may have manually put in the autosummary |
242 | # This could be improved by copying as much of Parser::stripSectionName as desired. |
243 | $section = str_replace( [ |
244 | '[[:', |
245 | '[[', |
246 | ']]' |
247 | ], '', $section ); |
248 | |
249 | // We don't want any links in the auto text to be linked, but we still |
250 | // want to show any [[ ]] |
251 | $sectionText = str_replace( '[[', '[[', $auto ); |
252 | |
253 | $section = substr( Parser::guessSectionNameFromStrippedText( $section ), 1 ); |
254 | if ( $section !== '' ) { |
255 | if ( $samePage ) { |
256 | $sectionTitle = new TitleValue( NS_MAIN, '', $section ); |
257 | } else { |
258 | $sectionTitle = $selfLinkTarget->createFragmentTarget( $section ); |
259 | } |
260 | $auto = $this->makeSectionLink( |
261 | $sectionTitle, |
262 | $this->userLang->getArrow() . $this->userLang->getDirMark() . $sectionText, |
263 | $wikiId |
264 | ); |
265 | } |
266 | } |
267 | if ( $pre ) { |
268 | # written summary $presep autocomment (summary /* section */) |
269 | $pre = wfMessage( 'autocomment-prefix' )->inContentLanguage()->escaped(); |
270 | } |
271 | if ( $post ) { |
272 | # autocomment $postsep written summary (/* section */ summary) |
273 | $auto .= wfMessage( 'colon-separator' )->inContentLanguage()->escaped(); |
274 | } |
275 | if ( $auto ) { |
276 | $auto = '<span dir="auto"><span class="autocomment">' . $auto . '</span>'; |
277 | $append .= '</span>'; |
278 | } |
279 | $comment = $pre . $auto; |
280 | return $comment; |
281 | }, |
282 | $comment |
283 | ); |
284 | return $comment . $append; |
285 | } |
286 | |
287 | /** |
288 | * Make a section link. These don't need to go into the LinkBatch, since |
289 | * the link class does not depend on whether the link is known. |
290 | * |
291 | * @param LinkTarget $target |
292 | * @param string $text |
293 | * @param string|false|null $wikiId Id of the wiki to link to (if not the local wiki), |
294 | * as used by WikiMap. |
295 | * |
296 | * @return string HTML link |
297 | */ |
298 | private function makeSectionLink( |
299 | LinkTarget $target, $text, $wikiId |
300 | ) { |
301 | if ( $wikiId !== null && $wikiId !== false && !$target->isExternal() ) { |
302 | return Linker::makeExternalLink( |
303 | WikiMap::getForeignURL( |
304 | $wikiId, |
305 | $target->getNamespace() === 0 |
306 | ? $target->getDBkey() |
307 | : $this->namespaceInfo->getCanonicalName( $target->getNamespace() ) . |
308 | ':' . $target->getDBkey(), |
309 | $target->getFragment() |
310 | ), |
311 | $text, |
312 | /* escape = */ false // Already escaped |
313 | ); |
314 | } |
315 | return $this->linkRenderer->makePreloadedLink( $target, new HtmlArmor( $text ), '' ); |
316 | } |
317 | |
318 | /** |
319 | * Formats wiki links and media links in text; all other wiki formatting |
320 | * is ignored |
321 | * |
322 | * @todo FIXME: Doesn't handle sub-links as in image thumb texts like the main parser |
323 | * |
324 | * @param string $comment Text to format links in. WARNING! Since the output of this |
325 | * function is html, $comment must be sanitized for use as html. You probably want |
326 | * to pass $comment through Sanitizer::escapeHtmlAllowEntities() before calling |
327 | * this function. |
328 | * as used by WikiMap. |
329 | * @param LinkTarget|null $selfLinkTarget An optional LinkTarget object used to links to sections |
330 | * @param bool $samePage Whether section links should refer to local page |
331 | * @param string|false|null $wikiId Id of the wiki to link to (if not the local wiki), |
332 | * as used by WikiMap. |
333 | * |
334 | * @return string HTML |
335 | */ |
336 | private function doWikiLinks( $comment, $selfLinkTarget = null, $samePage = false, $wikiId = false ) { |
337 | return preg_replace_callback( |
338 | '/ |
339 | \[\[ |
340 | \s*+ # ignore leading whitespace, the *+ quantifier disallows backtracking |
341 | :? # ignore optional leading colon |
342 | ([^[\]|]+) # 1. link target; page names cannot include [, ] or | |
343 | (?:\| |
344 | # 2. link text |
345 | # Stop matching at ]] without relying on backtracking. |
346 | ((?:]?[^\]])*+) |
347 | )? |
348 | \]\] |
349 | ([^[]*) # 3. link trail (the text up until the next link) |
350 | /x', |
351 | function ( $match ) use ( $selfLinkTarget, $samePage, $wikiId ) { |
352 | $medians = '(?:'; |
353 | $medians .= preg_quote( |
354 | $this->namespaceInfo->getCanonicalName( NS_MEDIA ), '/' ); |
355 | $medians .= '|'; |
356 | $medians .= preg_quote( |
357 | $this->contLang->getNsText( NS_MEDIA ), |
358 | '/' |
359 | ) . '):'; |
360 | |
361 | $comment = $match[0]; |
362 | |
363 | // Fix up urlencoded title texts (copied from Parser::replaceInternalLinks) |
364 | if ( strpos( $match[1], '%' ) !== false ) { |
365 | $match[1] = strtr( |
366 | rawurldecode( $match[1] ), |
367 | [ '<' => '<', '>' => '>' ] |
368 | ); |
369 | } |
370 | |
371 | // Handle link renaming [[foo|text]] will show link as "text" |
372 | if ( $match[2] != "" ) { |
373 | $text = $match[2]; |
374 | } else { |
375 | $text = $match[1]; |
376 | } |
377 | $submatch = []; |
378 | $linkMarker = null; |
379 | if ( preg_match( '/^' . $medians . '(.*)$/i', $match[1], $submatch ) ) { |
380 | // Media link; trail not supported. |
381 | $linkRegexp = '/\[\[(.*?)\]\]/'; |
382 | $linkTarget = $this->titleParser->makeTitleValueSafe( NS_FILE, $submatch[1] ); |
383 | if ( $linkTarget ) { |
384 | $linkMarker = $this->addFileLink( $linkTarget, $text ); |
385 | } |
386 | } else { |
387 | // Other kind of link |
388 | // Make sure its target is non-empty |
389 | if ( isset( $match[1][0] ) && $match[1][0] == ':' ) { |
390 | $match[1] = substr( $match[1], 1 ); |
391 | } |
392 | // @phan-suppress-next-line PhanTypePossiblyInvalidDimOffset False positive |
393 | if ( $match[1] !== false && $match[1] !== null && $match[1] !== '' ) { |
394 | if ( preg_match( |
395 | $this->contLang->linkTrail(), |
396 | $match[3], |
397 | $submatch |
398 | ) ) { |
399 | $trail = $submatch[1]; |
400 | } else { |
401 | $trail = ""; |
402 | } |
403 | $linkRegexp = '/\[\[(.*?)\]\]' . preg_quote( $trail, '/' ) . '/'; |
404 | [ $inside, $trail ] = Linker::splitTrail( $trail ); |
405 | |
406 | $linkText = $text; |
407 | $linkTarget = Linker::normalizeSubpageLink( $selfLinkTarget, $match[1], $linkText ); |
408 | |
409 | try { |
410 | $target = $this->titleParser->parseTitle( $linkTarget ); |
411 | |
412 | if ( $target->getText() == '' && !$target->isExternal() |
413 | && !$samePage && $selfLinkTarget |
414 | ) { |
415 | $target = $selfLinkTarget->createFragmentTarget( $target->getFragment() ); |
416 | } |
417 | |
418 | $linkMarker = $this->addPageLink( $target, $linkText . $inside, $wikiId ); |
419 | $linkMarker .= $trail; |
420 | } catch ( MalformedTitleException $e ) { |
421 | // Fall through |
422 | } |
423 | } |
424 | } |
425 | if ( $linkMarker ) { |
426 | // If the link is still valid, go ahead and replace it in! |
427 | $comment = preg_replace( |
428 | // @phan-suppress-next-next-line PhanPossiblyUndeclaredVariable linkRegexp set when used |
429 | // @phan-suppress-next-line PhanTypeMismatchArgumentNullableInternal linkRegexp set when used |
430 | $linkRegexp, |
431 | StringUtils::escapeRegexReplacement( $linkMarker ), |
432 | $comment, |
433 | 1 |
434 | ); |
435 | } |
436 | |
437 | return $comment; |
438 | }, |
439 | $comment |
440 | ); |
441 | } |
442 | |
443 | /** |
444 | * Add a deferred link to the list and return its marker. |
445 | * |
446 | * @param callable $callback |
447 | * @return string |
448 | */ |
449 | private function addLinkMarker( $callback ) { |
450 | $nextId = count( $this->links ); |
451 | if ( strlen( (string)$nextId ) > self::MAX_ID_SIZE ) { |
452 | throw new \RuntimeException( 'Too many links in comment batch' ); |
453 | } |
454 | $this->links[] = $callback; |
455 | return sprintf( self::MARKER_PREFIX . "%0" . self::MAX_ID_SIZE . 'd', $nextId ); |
456 | } |
457 | |
458 | /** |
459 | * Link to a LinkTarget. Return either HTML or a marker depending on whether |
460 | * existence checks are deferred. |
461 | * |
462 | * @param LinkTarget $target |
463 | * @param string $text |
464 | * @param string|false|null $wikiId |
465 | * @return string |
466 | */ |
467 | private function addPageLink( LinkTarget $target, $text, $wikiId ) { |
468 | if ( $wikiId !== null && $wikiId !== false && !$target->isExternal() ) { |
469 | // Handle links from a foreign wiki ID |
470 | return Linker::makeExternalLink( |
471 | WikiMap::getForeignURL( |
472 | $wikiId, |
473 | $target->getNamespace() === 0 |
474 | ? $target->getDBkey() |
475 | : $this->namespaceInfo->getCanonicalName( $target->getNamespace() ) . |
476 | ':' . $target->getDBkey(), |
477 | $target->getFragment() |
478 | ), |
479 | $text, |
480 | /* escape = */ false // Already escaped |
481 | ); |
482 | } elseif ( $this->linkCache->getGoodLinkID( $target ) || |
483 | Title::newFromLinkTarget( $target )->isAlwaysKnown() |
484 | ) { |
485 | // Already known |
486 | return $this->linkRenderer->makeKnownLink( $target, new HtmlArmor( $text ) ); |
487 | } elseif ( $this->linkCache->isBadLink( $target ) ) { |
488 | // Already cached as unknown |
489 | return $this->linkRenderer->makeBrokenLink( $target, new HtmlArmor( $text ) ); |
490 | } |
491 | |
492 | // Defer page link |
493 | if ( !$this->linkBatch ) { |
494 | $this->linkBatch = $this->linkBatchFactory->newLinkBatch(); |
495 | $this->linkBatch->setCaller( __METHOD__ ); |
496 | } |
497 | $this->linkBatch->addObj( $target ); |
498 | return $this->addLinkMarker( function () use ( $target, $text ) { |
499 | return $this->linkRenderer->makeLink( $target, new HtmlArmor( $text ) ); |
500 | } ); |
501 | } |
502 | |
503 | /** |
504 | * Link to a file, returning a marker. |
505 | * |
506 | * @param LinkTarget $target The name of the file. |
507 | * @param string $html The inner HTML of the link |
508 | * @return string |
509 | */ |
510 | private function addFileLink( LinkTarget $target, $html ) { |
511 | $this->fileBatch[] = [ |
512 | 'title' => $target |
513 | ]; |
514 | return $this->addLinkMarker( function () use ( $target, $html ) { |
515 | return Linker::makeMediaLinkFile( |
516 | $target, |
517 | $this->files[$target->getDBkey()] ?? false, |
518 | $html |
519 | ); |
520 | } ); |
521 | } |
522 | |
523 | /** |
524 | * Execute any pending link batch or file batch |
525 | */ |
526 | private function flushLinkBatches() { |
527 | if ( $this->linkBatch ) { |
528 | $this->linkBatch->execute(); |
529 | $this->linkBatch = null; |
530 | } |
531 | if ( $this->fileBatch ) { |
532 | $this->files += $this->repoGroup->findFiles( $this->fileBatch ); |
533 | $this->fileBatch = []; |
534 | } |
535 | } |
536 | |
537 | } |