MediaWiki master
CommentParser.php
Go to the documentation of this file.
1<?php
2
4
5use File;
6use HtmlArmor;
7use Language;
24use RepoGroup;
25use StringUtils;
26
37 private $linkRenderer;
39 private $linkBatchFactory;
41 private $repoGroup;
43 private $userLang;
45 private $contLang;
47 private $titleParser;
49 private $namespaceInfo;
51 private $hookRunner;
53 private $linkCache;
54
56 private $links = [];
58 private $linkBatch;
59
61 private $fileBatch;
63 private $files = [];
64
66 private const MAX_ID_SIZE = 7;
68 private const MARKER_PREFIX = "\x1B\"'";
69
81 public function __construct(
82 LinkRenderer $linkRenderer,
83 LinkBatchFactory $linkBatchFactory,
84 LinkCache $linkCache,
85 RepoGroup $repoGroup,
86 Language $userLang,
87 Language $contLang,
88 TitleParser $titleParser,
89 NamespaceInfo $namespaceInfo,
90 HookContainer $hookContainer
91 ) {
92 $this->linkRenderer = $linkRenderer;
93 $this->linkBatchFactory = $linkBatchFactory;
94 $this->linkCache = $linkCache;
95 $this->repoGroup = $repoGroup;
96 $this->userLang = $userLang;
97 $this->contLang = $contLang;
98 $this->titleParser = $titleParser;
99 $this->namespaceInfo = $namespaceInfo;
100 $this->hookRunner = new HookRunner( $hookContainer );
101 }
102
114 public function preprocess( string $comment, LinkTarget $selfLinkTarget = null,
115 $samePage = false, $wikiId = false, $enableSectionLinks = true
116 ) {
117 return $this->preprocessInternal( $comment, false, $selfLinkTarget,
118 $samePage, $wikiId, $enableSectionLinks );
119 }
120
131 public function preprocessUnsafe( $comment, LinkTarget $selfLinkTarget = null,
132 $samePage = false, $wikiId = false, $enableSectionLinks = true
133 ) {
134 return $this->preprocessInternal( $comment, true, $selfLinkTarget,
135 $samePage, $wikiId, $enableSectionLinks );
136 }
137
145 public function finalize( $comments ) {
146 $this->flushLinkBatches();
147 return preg_replace_callback(
148 '/' . self::MARKER_PREFIX . '([0-9]{' . self::MAX_ID_SIZE . '})/',
149 function ( $m ) {
150 $callback = $this->links[(int)$m[1]] ?? null;
151 if ( $callback ) {
152 return $callback();
153 } else {
154 return '<!-- MISSING -->';
155 }
156 },
157 $comments
158 );
159 }
160
170 private function preprocessInternal( $comment, $unsafe, $selfLinkTarget, $samePage, $wikiId,
171 $enableSectionLinks
172 ) {
173 // Sanitize text a bit
174 // \x1b needs to be stripped because it is used for link markers
175 $comment = strtr( $comment, "\n\x1b", " " );
176 // Allow HTML entities (for T15815)
177 if ( !$unsafe ) {
178 $comment = Sanitizer::escapeHtmlAllowEntities( $comment );
179 }
180 if ( $enableSectionLinks ) {
181 $comment = $this->doSectionLinks( $comment, $selfLinkTarget, $samePage, $wikiId );
182 }
183 return $this->doWikiLinks( $comment, $selfLinkTarget, $samePage, $wikiId );
184 }
185
202 private function doSectionLinks(
203 $comment,
204 $selfLinkTarget = null,
205 $samePage = false,
206 $wikiId = false
207 ) {
208 // @todo $append here is something of a hack to preserve the status
209 // quo. Someone who knows more about bidi and such should decide
210 // (1) what sensible rendering even *is* for an LTR edit summary on an RTL
211 // wiki, both when autocomments exist and when they don't, and
212 // (2) what markup will make that actually happen.
213 $append = '';
214 $comment = preg_replace_callback(
215 // To detect the presence of content before or after the
216 // auto-comment, we use capturing groups inside optional zero-width
217 // assertions. But older versions of PCRE can't directly make
218 // zero-width assertions optional, so wrap them in a non-capturing
219 // group.
220 '!(?:(?<=(.)))?/\*\s*(.*?)\s*\*/(?:(?=(.)))?!',
221 function ( $match ) use ( &$append, $selfLinkTarget, $samePage, $wikiId ) {
222 // Ensure all match positions are defined
223 $match += [ '', '', '', '' ];
224
225 $pre = $match[1] !== '';
226 $auto = $match[2];
227 $post = $match[3] !== '';
228 $comment = null;
229
230 $this->hookRunner->onFormatAutocomments(
231 $comment, $pre, $auto, $post,
232 Title::castFromLinkTarget( $selfLinkTarget ),
233 $samePage,
234 $wikiId );
235 if ( $comment !== null ) {
236 return $comment;
237 }
238
239 if ( $selfLinkTarget ) {
240 $section = $auto;
241 # Remove links that a user may have manually put in the autosummary
242 # This could be improved by copying as much of Parser::stripSectionName as desired.
243 $section = str_replace( [
244 '[[:',
245 '[[',
246 ']]'
247 ], '', $section );
248
249 // We don't want any links in the auto text to be linked, but we still
250 // want to show any [[ ]]
251 $sectionText = str_replace( '[[', '&#91;[', $auto );
252
253 $section = substr( Parser::guessSectionNameFromStrippedText( $section ), 1 );
254 if ( $section !== '' ) {
255 if ( $samePage ) {
256 $sectionTitle = new TitleValue( NS_MAIN, '', $section );
257 } else {
258 $sectionTitle = $selfLinkTarget->createFragmentTarget( $section );
259 }
260 $auto = $this->makeSectionLink(
261 $sectionTitle,
262 $this->userLang->getArrow() . $this->userLang->getDirMark() . $sectionText,
263 $wikiId
264 );
265 }
266 }
267 if ( $pre ) {
268 # written summary $presep autocomment (summary /* section */)
269 $pre = wfMessage( 'autocomment-prefix' )->inContentLanguage()->escaped();
270 }
271 if ( $post ) {
272 # autocomment $postsep written summary (/* section */ summary)
273 $auto .= wfMessage( 'colon-separator' )->inContentLanguage()->escaped();
274 }
275 if ( $auto ) {
276 $auto = '<span dir="auto"><span class="autocomment">' . $auto . '</span>';
277 $append .= '</span>';
278 }
279 $comment = $pre . $auto;
280 return $comment;
281 },
282 $comment
283 );
284 return $comment . $append;
285 }
286
298 private function makeSectionLink(
299 LinkTarget $target, $text, $wikiId
300 ) {
301 if ( $wikiId !== null && $wikiId !== false && !$target->isExternal() ) {
302 return Linker::makeExternalLink(
303 WikiMap::getForeignURL(
304 $wikiId,
305 $target->getNamespace() === 0
306 ? $target->getDBkey()
307 : $this->namespaceInfo->getCanonicalName( $target->getNamespace() ) .
308 ':' . $target->getDBkey(),
309 $target->getFragment()
310 ),
311 $text,
312 /* escape = */ false // Already escaped
313 );
314 }
315 return $this->linkRenderer->makePreloadedLink( $target, new HtmlArmor( $text ), '' );
316 }
317
336 private function doWikiLinks( $comment, $selfLinkTarget = null, $samePage = false, $wikiId = false ) {
337 return preg_replace_callback(
338 '/
339 \[\[
340 \s*+ # ignore leading whitespace, the *+ quantifier disallows backtracking
341 :? # ignore optional leading colon
342 ([^[\]|]+) # 1. link target; page names cannot include [, ] or |
343 (?:\|
344 # 2. link text
345 # Stop matching at ]] without relying on backtracking.
346 ((?:]?[^\]])*+)
347 )?
348 \]\]
349 ([^[]*) # 3. link trail (the text up until the next link)
350 /x',
351 function ( $match ) use ( $selfLinkTarget, $samePage, $wikiId ) {
352 $medians = '(?:';
353 $medians .= preg_quote(
354 $this->namespaceInfo->getCanonicalName( NS_MEDIA ), '/' );
355 $medians .= '|';
356 $medians .= preg_quote(
357 $this->contLang->getNsText( NS_MEDIA ),
358 '/'
359 ) . '):';
360
361 $comment = $match[0];
362
363 // Fix up urlencoded title texts (copied from Parser::replaceInternalLinks)
364 if ( strpos( $match[1], '%' ) !== false ) {
365 $match[1] = strtr(
366 rawurldecode( $match[1] ),
367 [ '<' => '&lt;', '>' => '&gt;' ]
368 );
369 }
370
371 // Handle link renaming [[foo|text]] will show link as "text"
372 if ( $match[2] != "" ) {
373 $text = $match[2];
374 } else {
375 $text = $match[1];
376 }
377 $submatch = [];
378 $linkMarker = null;
379 if ( preg_match( '/^' . $medians . '(.*)$/i', $match[1], $submatch ) ) {
380 // Media link; trail not supported.
381 $linkRegexp = '/\[\[(.*?)\]\]/';
382 $linkTarget = $this->titleParser->makeTitleValueSafe( NS_FILE, $submatch[1] );
383 if ( $linkTarget ) {
384 $linkMarker = $this->addFileLink( $linkTarget, $text );
385 }
386 } else {
387 // Other kind of link
388 // Make sure its target is non-empty
389 if ( isset( $match[1][0] ) && $match[1][0] == ':' ) {
390 $match[1] = substr( $match[1], 1 );
391 }
392 // @phan-suppress-next-line PhanTypePossiblyInvalidDimOffset False positive
393 if ( $match[1] !== false && $match[1] !== null && $match[1] !== '' ) {
394 if ( preg_match(
395 $this->contLang->linkTrail(),
396 $match[3],
397 $submatch
398 ) ) {
399 $trail = $submatch[1];
400 } else {
401 $trail = "";
402 }
403 $linkRegexp = '/\[\[(.*?)\]\]' . preg_quote( $trail, '/' ) . '/';
404 [ $inside, $trail ] = Linker::splitTrail( $trail );
405
406 $linkText = $text;
407 $linkTarget = Linker::normalizeSubpageLink( $selfLinkTarget, $match[1], $linkText );
408
409 try {
410 $target = $this->titleParser->parseTitle( $linkTarget );
411
412 if ( $target->getText() == '' && !$target->isExternal()
413 && !$samePage && $selfLinkTarget
414 ) {
415 $target = $selfLinkTarget->createFragmentTarget( $target->getFragment() );
416 }
417
418 $linkMarker = $this->addPageLink( $target, $linkText . $inside, $wikiId );
419 $linkMarker .= $trail;
420 } catch ( MalformedTitleException $e ) {
421 // Fall through
422 }
423 }
424 }
425 if ( $linkMarker ) {
426 // If the link is still valid, go ahead and replace it in!
427 $comment = preg_replace(
428 // @phan-suppress-next-next-line PhanPossiblyUndeclaredVariable linkRegexp set when used
429 // @phan-suppress-next-line PhanTypeMismatchArgumentNullableInternal linkRegexp set when used
430 $linkRegexp,
432 $comment,
433 1
434 );
435 }
436
437 return $comment;
438 },
439 $comment
440 );
441 }
442
449 private function addLinkMarker( $callback ) {
450 $nextId = count( $this->links );
451 if ( strlen( (string)$nextId ) > self::MAX_ID_SIZE ) {
452 throw new \RuntimeException( 'Too many links in comment batch' );
453 }
454 $this->links[] = $callback;
455 return sprintf( self::MARKER_PREFIX . "%0" . self::MAX_ID_SIZE . 'd', $nextId );
456 }
457
467 private function addPageLink( LinkTarget $target, $text, $wikiId ) {
468 if ( $wikiId !== null && $wikiId !== false && !$target->isExternal() ) {
469 // Handle links from a foreign wiki ID
470 return Linker::makeExternalLink(
471 WikiMap::getForeignURL(
472 $wikiId,
473 $target->getNamespace() === 0
474 ? $target->getDBkey()
475 : $this->namespaceInfo->getCanonicalName( $target->getNamespace() ) .
476 ':' . $target->getDBkey(),
477 $target->getFragment()
478 ),
479 $text,
480 /* escape = */ false // Already escaped
481 );
482 } elseif ( $this->linkCache->getGoodLinkID( $target ) ||
483 Title::newFromLinkTarget( $target )->isAlwaysKnown()
484 ) {
485 // Already known
486 return $this->linkRenderer->makeKnownLink( $target, new HtmlArmor( $text ) );
487 } elseif ( $this->linkCache->isBadLink( $target ) ) {
488 // Already cached as unknown
489 return $this->linkRenderer->makeBrokenLink( $target, new HtmlArmor( $text ) );
490 }
491
492 // Defer page link
493 if ( !$this->linkBatch ) {
494 $this->linkBatch = $this->linkBatchFactory->newLinkBatch();
495 $this->linkBatch->setCaller( __METHOD__ );
496 }
497 $this->linkBatch->addObj( $target );
498 return $this->addLinkMarker( function () use ( $target, $text ) {
499 return $this->linkRenderer->makeLink( $target, new HtmlArmor( $text ) );
500 } );
501 }
502
510 private function addFileLink( LinkTarget $target, $html ) {
511 $this->fileBatch[] = [
512 'title' => $target
513 ];
514 return $this->addLinkMarker( function () use ( $target, $html ) {
515 return Linker::makeMediaLinkFile(
516 $target,
517 $this->files[$target->getDBkey()] ?? false,
518 $html
519 );
520 } );
521 }
522
526 private function flushLinkBatches() {
527 if ( $this->linkBatch ) {
528 $this->linkBatch->execute();
529 $this->linkBatch = null;
530 }
531 if ( $this->fileBatch ) {
532 $this->files += $this->repoGroup->findFiles( $this->fileBatch );
533 $this->fileBatch = [];
534 }
535 }
536
537}
const NS_FILE
Definition Defines.php:70
const NS_MAIN
Definition Defines.php:64
const NS_MEDIA
Definition Defines.php:52
wfMessage( $key,... $params)
This is the function for getting translated interface messages.
Implements some public methods and some protected utility functions which are required by multiple ch...
Definition File.php:73
Marks HTML that shouldn't be escaped.
Definition HtmlArmor.php:30
Base class for language-specific code.
Definition Language.php:63
Class representing a list of titles The execute() method checks them all for existence and adds them ...
Definition LinkBatch.php:48
Cache for article titles (prefixed DB keys) and ids linked from one source.
Definition LinkCache.php:53
The text processing backend for CommentFormatter.
__construct(LinkRenderer $linkRenderer, LinkBatchFactory $linkBatchFactory, LinkCache $linkCache, RepoGroup $repoGroup, Language $userLang, Language $contLang, TitleParser $titleParser, NamespaceInfo $namespaceInfo, HookContainer $hookContainer)
finalize( $comments)
Execute pending batch queries and replace markers in the specified string(s) with actual links.
preprocessUnsafe( $comment, LinkTarget $selfLinkTarget=null, $samePage=false, $wikiId=false, $enableSectionLinks=true)
Convert a comment in pseudo-HTML format to HTML, replacing links with markers.
preprocess(string $comment, LinkTarget $selfLinkTarget=null, $samePage=false, $wikiId=false, $enableSectionLinks=true)
Convert a comment to HTML, but replace links with markers which are resolved later.
This class provides an implementation of the core hook interfaces, forwarding hook calls to HookConta...
Class that generates HTML for internal links.
Some internal bits split of from Skin.php.
Definition Linker.php:65
PHP Parser - Processes wiki markup (which uses a more user-friendly syntax, such as "[[link]]" for ma...
Definition Parser.php:156
HTML sanitizer for MediaWiki.
Definition Sanitizer.php:46
MalformedTitleException is thrown when a TitleParser is unable to parse a title string.
This is a utility class for dealing with namespaces that encodes all the "magic" behaviors of them ba...
Represents the target of a wiki link.
Represents a title within MediaWiki.
Definition Title.php:78
Tools for dealing with other locally-hosted wikis.
Definition WikiMap.php:31
Prioritized list of file repositories.
Definition RepoGroup.php:30
A collection of static methods to play with strings.
static escapeRegexReplacement( $string)
Escape a string to make it suitable for inclusion in a preg_replace() replacement parameter.
Represents the target of a wiki link.
A title parser service for MediaWiki.