MediaWiki master
CommentParser.php
Go to the documentation of this file.
1<?php
2
4
5use File;
6use HtmlArmor;
25use RepoGroup;
26use StringUtils;
27
38 private $linkRenderer;
40 private $linkBatchFactory;
42 private $repoGroup;
44 private $userLang;
46 private $contLang;
48 private $titleParser;
50 private $namespaceInfo;
52 private $hookRunner;
54 private $linkCache;
55
57 private $links = [];
59 private $linkBatch;
60
62 private $fileBatch;
64 private $files = [];
65
67 private const MAX_ID_SIZE = 7;
69 private const MARKER_PREFIX = "\x1B\"'";
70
82 public function __construct(
83 LinkRenderer $linkRenderer,
84 LinkBatchFactory $linkBatchFactory,
85 LinkCache $linkCache,
86 RepoGroup $repoGroup,
87 Language $userLang,
88 Language $contLang,
89 TitleParser $titleParser,
90 NamespaceInfo $namespaceInfo,
91 HookContainer $hookContainer
92 ) {
93 $this->linkRenderer = $linkRenderer;
94 $this->linkBatchFactory = $linkBatchFactory;
95 $this->linkCache = $linkCache;
96 $this->repoGroup = $repoGroup;
97 $this->userLang = $userLang;
98 $this->contLang = $contLang;
99 $this->titleParser = $titleParser;
100 $this->namespaceInfo = $namespaceInfo;
101 $this->hookRunner = new HookRunner( $hookContainer );
102 }
103
115 public function preprocess( string $comment, LinkTarget $selfLinkTarget = null,
116 $samePage = false, $wikiId = false, $enableSectionLinks = true
117 ) {
118 return $this->preprocessInternal( $comment, false, $selfLinkTarget,
119 $samePage, $wikiId, $enableSectionLinks );
120 }
121
132 public function preprocessUnsafe( $comment, LinkTarget $selfLinkTarget = null,
133 $samePage = false, $wikiId = false, $enableSectionLinks = true
134 ) {
135 return $this->preprocessInternal( $comment, true, $selfLinkTarget,
136 $samePage, $wikiId, $enableSectionLinks );
137 }
138
146 public function finalize( $comments ) {
147 $this->flushLinkBatches();
148 return preg_replace_callback(
149 '/' . self::MARKER_PREFIX . '([0-9]{' . self::MAX_ID_SIZE . '})/',
150 function ( $m ) {
151 $callback = $this->links[(int)$m[1]] ?? null;
152 if ( $callback ) {
153 return $callback();
154 } else {
155 return '<!-- MISSING -->';
156 }
157 },
158 $comments
159 );
160 }
161
171 private function preprocessInternal( $comment, $unsafe, $selfLinkTarget, $samePage, $wikiId,
172 $enableSectionLinks
173 ) {
174 // Sanitize text a bit
175 // \x1b needs to be stripped because it is used for link markers
176 $comment = strtr( $comment, "\n\x1b", " " );
177 // Allow HTML entities (for T15815)
178 if ( !$unsafe ) {
179 $comment = Sanitizer::escapeHtmlAllowEntities( $comment );
180 }
181 if ( $enableSectionLinks ) {
182 $comment = $this->doSectionLinks( $comment, $selfLinkTarget, $samePage, $wikiId );
183 }
184 return $this->doWikiLinks( $comment, $selfLinkTarget, $samePage, $wikiId );
185 }
186
203 private function doSectionLinks(
204 $comment,
205 $selfLinkTarget = null,
206 $samePage = false,
207 $wikiId = false
208 ) {
209 // @todo $append here is something of a hack to preserve the status
210 // quo. Someone who knows more about bidi and such should decide
211 // (1) what sensible rendering even *is* for an LTR edit summary on an RTL
212 // wiki, both when autocomments exist and when they don't, and
213 // (2) what markup will make that actually happen.
214 $append = '';
215 $comment = preg_replace_callback(
216 // To detect the presence of content before or after the
217 // auto-comment, we use capturing groups inside optional zero-width
218 // assertions. But older versions of PCRE can't directly make
219 // zero-width assertions optional, so wrap them in a non-capturing
220 // group.
221 '!(?:(?<=(.)))?/\*\s*(.*?)\s*\*/(?:(?=(.)))?!',
222 function ( $match ) use ( &$append, $selfLinkTarget, $samePage, $wikiId ) {
223 // Ensure all match positions are defined
224 $match += [ '', '', '', '' ];
225
226 $pre = $match[1] !== '';
227 $auto = $match[2];
228 $post = $match[3] !== '';
229 $comment = null;
230
231 $this->hookRunner->onFormatAutocomments(
232 $comment, $pre, $auto, $post,
233 Title::castFromLinkTarget( $selfLinkTarget ),
234 $samePage,
235 $wikiId );
236 if ( $comment !== null ) {
237 return $comment;
238 }
239
240 if ( $selfLinkTarget ) {
241 $section = $auto;
242 # Remove links that a user may have manually put in the autosummary
243 # This could be improved by copying as much of Parser::stripSectionName as desired.
244 $section = str_replace( [
245 '[[:',
246 '[[',
247 ']]'
248 ], '', $section );
249
250 // We don't want any links in the auto text to be linked, but we still
251 // want to show any [[ ]]
252 $sectionText = str_replace( '[[', '&#91;[', $auto );
253
254 $section = substr( Parser::guessSectionNameFromStrippedText( $section ), 1 );
255 if ( $section !== '' ) {
256 if ( $samePage ) {
257 $sectionTitle = new TitleValue( NS_MAIN, '', $section );
258 } else {
259 $sectionTitle = $selfLinkTarget->createFragmentTarget( $section );
260 }
261 $auto = $this->makeSectionLink(
262 $sectionTitle,
263 $this->userLang->getArrow() . $this->userLang->getDirMark() . $sectionText,
264 $wikiId,
265 $selfLinkTarget
266 );
267 }
268 }
269 if ( $pre ) {
270 # written summary $presep autocomment (summary /* section */)
271 $pre = wfMessage( 'autocomment-prefix' )->inContentLanguage()->escaped();
272 }
273 if ( $post ) {
274 # autocomment $postsep written summary (/* section */ summary)
275 $auto .= wfMessage( 'colon-separator' )->inContentLanguage()->escaped();
276 }
277 if ( $auto ) {
278 $auto = '<span dir="auto"><span class="autocomment">' . $auto . '</span>';
279 $append .= '</span>';
280 }
281 $comment = $pre . $auto;
282 return $comment;
283 },
284 $comment
285 );
286 return $comment . $append;
287 }
288
301 private function makeSectionLink(
302 LinkTarget $target, $text, $wikiId, LinkTarget $contextTitle
303 ) {
304 if ( $wikiId !== null && $wikiId !== false && !$target->isExternal() ) {
305 return $this->linkRenderer->makeExternalLink(
306 WikiMap::getForeignURL(
307 $wikiId,
308 $target->getNamespace() === 0
309 ? $target->getDBkey()
310 : $this->namespaceInfo->getCanonicalName( $target->getNamespace() ) .
311 ':' . $target->getDBkey(),
312 $target->getFragment()
313 ),
314 new HtmlArmor( $text ), // Already escaped
315 $contextTitle
316 );
317 }
318 return $this->linkRenderer->makePreloadedLink( $target, new HtmlArmor( $text ), '' );
319 }
320
339 private function doWikiLinks( $comment, $selfLinkTarget = null, $samePage = false, $wikiId = false ) {
340 return preg_replace_callback(
341 '/
342 \[\[
343 \s*+ # ignore leading whitespace, the *+ quantifier disallows backtracking
344 :? # ignore optional leading colon
345 ([^[\]|]+) # 1. link target; page names cannot include [, ] or |
346 (?:\|
347 # 2. link text
348 # Stop matching at ]] without relying on backtracking.
349 ((?:]?[^\]])*+)
350 )?
351 \]\]
352 ([^[]*) # 3. link trail (the text up until the next link)
353 /x',
354 function ( $match ) use ( $selfLinkTarget, $samePage, $wikiId ) {
355 $medians = '(?:';
356 $medians .= preg_quote(
357 $this->namespaceInfo->getCanonicalName( NS_MEDIA ), '/' );
358 $medians .= '|';
359 $medians .= preg_quote(
360 $this->contLang->getNsText( NS_MEDIA ),
361 '/'
362 ) . '):';
363
364 $comment = $match[0];
365
366 // Fix up urlencoded title texts (copied from Parser::replaceInternalLinks)
367 if ( strpos( $match[1], '%' ) !== false ) {
368 $match[1] = strtr(
369 rawurldecode( $match[1] ),
370 [ '<' => '&lt;', '>' => '&gt;' ]
371 );
372 }
373
374 // Handle link renaming [[foo|text]] will show link as "text"
375 if ( $match[2] != "" ) {
376 $text = $match[2];
377 } else {
378 $text = $match[1];
379 }
380 $submatch = [];
381 $linkMarker = null;
382 if ( preg_match( '/^' . $medians . '(.*)$/i', $match[1], $submatch ) ) {
383 // Media link; trail not supported.
384 $linkRegexp = '/\[\[(.*?)\]\]/';
385 $linkTarget = $this->titleParser->makeTitleValueSafe( NS_FILE, $submatch[1] );
386 if ( $linkTarget ) {
387 $linkMarker = $this->addFileLink( $linkTarget, $text );
388 }
389 } else {
390 // Other kind of link
391 // Make sure its target is non-empty
392 if ( isset( $match[1][0] ) && $match[1][0] == ':' ) {
393 $match[1] = substr( $match[1], 1 );
394 }
395 // @phan-suppress-next-line PhanTypePossiblyInvalidDimOffset False positive
396 if ( $match[1] !== false && $match[1] !== null && $match[1] !== '' ) {
397 if ( preg_match(
398 $this->contLang->linkTrail(),
399 $match[3],
400 $submatch
401 ) ) {
402 $trail = $submatch[1];
403 } else {
404 $trail = "";
405 }
406 $linkRegexp = '/\[\[(.*?)\]\]' . preg_quote( $trail, '/' ) . '/';
407 [ $inside, $trail ] = Linker::splitTrail( $trail );
408
409 $linkText = $text;
410 $linkTarget = Linker::normalizeSubpageLink( $selfLinkTarget, $match[1], $linkText );
411
412 try {
413 $target = $this->titleParser->parseTitle( $linkTarget );
414
415 if ( $target->getText() == '' && !$target->isExternal()
416 && !$samePage && $selfLinkTarget
417 ) {
418 $target = $selfLinkTarget->createFragmentTarget( $target->getFragment() );
419 }
420
421 // We should deprecate `null` as a valid value for
422 // $selfLinkTarget to ensure that we can use it as
423 // the title context for the external link.
424 global $wgTitle;
425 $linkMarker = $this->addPageLink(
426 $target,
427 $linkText . $inside,
428 $wikiId,
429 $selfLinkTarget ?? $wgTitle ?? SpecialPage::getTitleFor( 'Badtitle' )
430 );
431 $linkMarker .= $trail;
432 } catch ( MalformedTitleException $e ) {
433 // Fall through
434 }
435 }
436 }
437 if ( $linkMarker ) {
438 // If the link is still valid, go ahead and replace it in!
439 $comment = preg_replace(
440 // @phan-suppress-next-next-line PhanPossiblyUndeclaredVariable linkRegexp set when used
441 // @phan-suppress-next-line PhanTypeMismatchArgumentNullableInternal linkRegexp set when used
442 $linkRegexp,
444 $comment,
445 1
446 );
447 }
448
449 return $comment;
450 },
451 $comment
452 );
453 }
454
461 private function addLinkMarker( $callback ) {
462 $nextId = count( $this->links );
463 if ( strlen( (string)$nextId ) > self::MAX_ID_SIZE ) {
464 throw new \RuntimeException( 'Too many links in comment batch' );
465 }
466 $this->links[] = $callback;
467 return sprintf( self::MARKER_PREFIX . "%0" . self::MAX_ID_SIZE . 'd', $nextId );
468 }
469
480 private function addPageLink( LinkTarget $target, $text, $wikiId, LinkTarget $contextTitle ) {
481 if ( $wikiId !== null && $wikiId !== false && !$target->isExternal() ) {
482 // Handle links from a foreign wiki ID
483 return $this->linkRenderer->makeExternalLink(
484 WikiMap::getForeignURL(
485 $wikiId,
486 $target->getNamespace() === 0
487 ? $target->getDBkey()
488 : $this->namespaceInfo->getCanonicalName( $target->getNamespace() ) .
489 ':' . $target->getDBkey(),
490 $target->getFragment()
491 ),
492 new HtmlArmor( $text ), // Already escaped
493 $contextTitle
494 );
495 } elseif ( $this->linkCache->getGoodLinkID( $target ) ||
496 Title::newFromLinkTarget( $target )->isAlwaysKnown()
497 ) {
498 // Already known
499 return $this->linkRenderer->makeKnownLink( $target, new HtmlArmor( $text ) );
500 } elseif ( $this->linkCache->isBadLink( $target ) ) {
501 // Already cached as unknown
502 return $this->linkRenderer->makeBrokenLink( $target, new HtmlArmor( $text ) );
503 }
504
505 // Defer page link
506 if ( !$this->linkBatch ) {
507 $this->linkBatch = $this->linkBatchFactory->newLinkBatch();
508 $this->linkBatch->setCaller( __METHOD__ );
509 }
510 $this->linkBatch->addObj( $target );
511 return $this->addLinkMarker( function () use ( $target, $text ) {
512 return $this->linkRenderer->makeLink( $target, new HtmlArmor( $text ) );
513 } );
514 }
515
523 private function addFileLink( LinkTarget $target, $html ) {
524 $this->fileBatch[] = [
525 'title' => $target
526 ];
527 return $this->addLinkMarker( function () use ( $target, $html ) {
528 return Linker::makeMediaLinkFile(
529 $target,
530 $this->files[$target->getDBkey()] ?? false,
531 $html
532 );
533 } );
534 }
535
539 private function flushLinkBatches() {
540 if ( $this->linkBatch ) {
541 $this->linkBatch->execute();
542 $this->linkBatch = null;
543 }
544 if ( $this->fileBatch ) {
545 $this->files += $this->repoGroup->findFiles( $this->fileBatch );
546 $this->fileBatch = [];
547 }
548 }
549
550}
const NS_FILE
Definition Defines.php:71
const NS_MAIN
Definition Defines.php:65
const NS_MEDIA
Definition Defines.php:53
wfMessage( $key,... $params)
This is the function for getting translated interface messages.
if(!defined( 'MW_NO_SESSION') &&MW_ENTRY_POINT !=='cli' $wgTitle
Definition Setup.php:540
Implements some public methods and some protected utility functions which are required by multiple ch...
Definition File.php:74
Marks HTML that shouldn't be escaped.
Definition HtmlArmor.php:30
Class representing a list of titles The execute() method checks them all for existence and adds them ...
Definition LinkBatch.php:48
Cache for article titles (prefixed DB keys) and ids linked from one source.
Definition LinkCache.php:52
The text processing backend for CommentFormatter.
__construct(LinkRenderer $linkRenderer, LinkBatchFactory $linkBatchFactory, LinkCache $linkCache, RepoGroup $repoGroup, Language $userLang, Language $contLang, TitleParser $titleParser, NamespaceInfo $namespaceInfo, HookContainer $hookContainer)
finalize( $comments)
Execute pending batch queries and replace markers in the specified string(s) with actual links.
preprocessUnsafe( $comment, LinkTarget $selfLinkTarget=null, $samePage=false, $wikiId=false, $enableSectionLinks=true)
Convert a comment in pseudo-HTML format to HTML, replacing links with markers.
preprocess(string $comment, LinkTarget $selfLinkTarget=null, $samePage=false, $wikiId=false, $enableSectionLinks=true)
Convert a comment to HTML, but replace links with markers which are resolved later.
This class provides an implementation of the core hook interfaces, forwarding hook calls to HookConta...
Base class for language-specific code.
Definition Language.php:78
Class that generates HTML for internal links.
Some internal bits split of from Skin.php.
Definition Linker.php:63
PHP Parser - Processes wiki markup (which uses a more user-friendly syntax, such as "[[link]]" for ma...
Definition Parser.php:155
HTML sanitizer for MediaWiki.
Definition Sanitizer.php:46
Parent class for all special pages.
static getTitleFor( $name, $subpage=false, $fragment='')
Get a localised Title object for a specified special page name If you don't need a full Title object,...
MalformedTitleException is thrown when a TitleParser is unable to parse a title string.
This is a utility class for dealing with namespaces that encodes all the "magic" behaviors of them ba...
Represents the target of a wiki link.
Represents a title within MediaWiki.
Definition Title.php:78
Tools for dealing with other locally-hosted wikis.
Definition WikiMap.php:31
Prioritized list of file repositories.
Definition RepoGroup.php:30
A collection of static methods to play with strings.
static escapeRegexReplacement( $string)
Escape a string to make it suitable for inclusion in a preg_replace() replacement parameter.
Represents the target of a wiki link.
A title parser service for MediaWiki.