MediaWiki master
CommentParser.php
Go to the documentation of this file.
1<?php
2
4
5use File;
6use HtmlArmor;
26use RepoGroup;
27use StringUtils;
28
39 private $linkRenderer;
41 private $linkBatchFactory;
43 private $repoGroup;
45 private $userLang;
47 private $contLang;
49 private $titleParser;
51 private $namespaceInfo;
53 private $hookRunner;
55 private $linkCache;
56
58 private $links = [];
60 private $linkBatch;
61
63 private $fileBatch;
65 private $files = [];
66
68 private const MAX_ID_SIZE = 7;
70 private const MARKER_PREFIX = "\x1B\"'";
71
83 public function __construct(
84 LinkRenderer $linkRenderer,
85 LinkBatchFactory $linkBatchFactory,
86 LinkCache $linkCache,
87 RepoGroup $repoGroup,
88 Language $userLang,
89 Language $contLang,
90 TitleParser $titleParser,
91 NamespaceInfo $namespaceInfo,
92 HookContainer $hookContainer
93 ) {
94 $this->linkRenderer = $linkRenderer;
95 $this->linkBatchFactory = $linkBatchFactory;
96 $this->linkCache = $linkCache;
97 $this->repoGroup = $repoGroup;
98 $this->userLang = $userLang;
99 $this->contLang = $contLang;
100 $this->titleParser = $titleParser;
101 $this->namespaceInfo = $namespaceInfo;
102 $this->hookRunner = new HookRunner( $hookContainer );
103 }
104
116 public function preprocess( string $comment, ?LinkTarget $selfLinkTarget = null,
117 $samePage = false, $wikiId = false, $enableSectionLinks = true
118 ) {
119 return $this->preprocessInternal( $comment, false, $selfLinkTarget,
120 $samePage, $wikiId, $enableSectionLinks );
121 }
122
133 public function preprocessUnsafe( $comment, ?LinkTarget $selfLinkTarget = null,
134 $samePage = false, $wikiId = false, $enableSectionLinks = true
135 ) {
136 return $this->preprocessInternal( $comment, true, $selfLinkTarget,
137 $samePage, $wikiId, $enableSectionLinks );
138 }
139
147 public function finalize( $comments ) {
148 $this->flushLinkBatches();
149 return preg_replace_callback(
150 '/' . self::MARKER_PREFIX . '([0-9]{' . self::MAX_ID_SIZE . '})/',
151 function ( $m ) {
152 $callback = $this->links[(int)$m[1]] ?? null;
153 if ( $callback ) {
154 return $callback();
155 } else {
156 return '<!-- MISSING -->';
157 }
158 },
159 $comments
160 );
161 }
162
172 private function preprocessInternal( $comment, $unsafe, $selfLinkTarget, $samePage, $wikiId,
173 $enableSectionLinks
174 ) {
175 // Sanitize text a bit
176 // \x1b needs to be stripped because it is used for link markers
177 $comment = strtr( $comment, "\n\x1b", " " );
178 // Allow HTML entities (for T15815)
179 if ( !$unsafe ) {
180 $comment = Sanitizer::escapeHtmlAllowEntities( $comment );
181 }
182 if ( $enableSectionLinks ) {
183 $comment = $this->doSectionLinks( $comment, $selfLinkTarget, $samePage, $wikiId );
184 }
185 return $this->doWikiLinks( $comment, $selfLinkTarget, $samePage, $wikiId );
186 }
187
204 private function doSectionLinks(
205 $comment,
206 $selfLinkTarget = null,
207 $samePage = false,
208 $wikiId = false
209 ) {
210 $comment = preg_replace_callback(
211 // To detect the presence of content before or after the
212 // auto-comment, we use capturing groups inside optional zero-width
213 // assertions. But older versions of PCRE can't directly make
214 // zero-width assertions optional, so wrap them in a non-capturing
215 // group.
216 '!(?:(?<=(.)))?/\*\s*(.*?)\s*\*/(?:(?=(.)))?!',
217 function ( $match ) use ( $selfLinkTarget, $samePage, $wikiId ) {
218 // Ensure all match positions are defined
219 $match += [ '', '', '', '' ];
220
221 $pre = $match[1] !== '';
222 $auto = $match[2];
223 $post = $match[3] !== '';
224 $comment = null;
225
226 $this->hookRunner->onFormatAutocomments(
227 $comment, $pre, $auto, $post,
228 Title::castFromLinkTarget( $selfLinkTarget ),
229 $samePage,
230 $wikiId );
231 if ( $comment !== null ) {
232 return $comment;
233 }
234
235 if ( $selfLinkTarget ) {
236 $section = $auto;
237 # Remove links that a user may have manually put in the autosummary
238 # This could be improved by copying as much of Parser::stripSectionName as desired.
239 $section = str_replace( [
240 '[[:',
241 '[[',
242 ']]'
243 ], '', $section );
244
245 // We don't want any links in the auto text to be linked, but we still
246 // want to show any [[ ]]
247 $sectionText = str_replace( '[[', '&#91;[', $auto );
248
249 $section = substr( Parser::guessSectionNameFromStrippedText( $section ), 1 );
250 if ( $section !== '' ) {
251 if ( $samePage ) {
252 $sectionTitle = new TitleValue( NS_MAIN, '', $section );
253 } else {
254 $sectionTitle = $selfLinkTarget->createFragmentTarget( $section );
255 }
256 $auto = $this->makeSectionLink(
257 $sectionTitle,
258 $this->userLang->getArrow() .
259 Html::rawElement( 'bdi', [ 'dir' => $this->userLang->getDir() ], $sectionText ),
260 $wikiId,
261 $selfLinkTarget
262 );
263 }
264 }
265 if ( $pre ) {
266 # written summary $presep autocomment (summary /* section */)
267 $pre = wfMessage( 'autocomment-prefix' )->inContentLanguage()->escaped();
268 }
269 if ( $post ) {
270 # autocomment $postsep written summary (/* section */ summary)
271 $auto .= wfMessage( 'colon-separator' )->inContentLanguage()->escaped();
272 }
273 if ( $auto ) {
274 $auto = Html::rawElement( 'span', [ 'class' => 'autocomment' ], $auto );
275 }
276 return $pre . $auto;
277 },
278 $comment
279 );
280 return $comment;
281 }
282
295 private function makeSectionLink(
296 LinkTarget $target, $text, $wikiId, LinkTarget $contextTitle
297 ) {
298 if ( $wikiId !== null && $wikiId !== false && !$target->isExternal() ) {
299 return $this->linkRenderer->makeExternalLink(
300 WikiMap::getForeignURL(
301 $wikiId,
302 $target->getNamespace() === 0
303 ? $target->getDBkey()
304 : $this->namespaceInfo->getCanonicalName( $target->getNamespace() ) .
305 ':' . $target->getDBkey(),
306 $target->getFragment()
307 ),
308 new HtmlArmor( $text ), // Already escaped
309 $contextTitle
310 );
311 }
312 return $this->linkRenderer->makePreloadedLink( $target, new HtmlArmor( $text ), '' );
313 }
314
333 private function doWikiLinks( $comment, $selfLinkTarget = null, $samePage = false, $wikiId = false ) {
334 return preg_replace_callback(
335 '/
336 \[\[
337 \s*+ # ignore leading whitespace, the *+ quantifier disallows backtracking
338 :? # ignore optional leading colon
339 ([^[\]|]+) # 1. link target; page names cannot include [, ] or |
340 (?:\|
341 # 2. link text
342 # Stop matching at ]] without relying on backtracking.
343 ((?:]?[^\]])*+)
344 )?
345 \]\]
346 ([^[]*) # 3. link trail (the text up until the next link)
347 /x',
348 function ( $match ) use ( $selfLinkTarget, $samePage, $wikiId ) {
349 $medians = '(?:';
350 $medians .= preg_quote(
351 $this->namespaceInfo->getCanonicalName( NS_MEDIA ), '/' );
352 $medians .= '|';
353 $medians .= preg_quote(
354 $this->contLang->getNsText( NS_MEDIA ),
355 '/'
356 ) . '):';
357
358 $comment = $match[0];
359
360 // Fix up urlencoded title texts (copied from Parser::replaceInternalLinks)
361 if ( strpos( $match[1], '%' ) !== false ) {
362 $match[1] = strtr(
363 rawurldecode( $match[1] ),
364 [ '<' => '&lt;', '>' => '&gt;' ]
365 );
366 }
367
368 // Handle link renaming [[foo|text]] will show link as "text"
369 if ( $match[2] != "" ) {
370 $text = $match[2];
371 } else {
372 $text = $match[1];
373 }
374 $submatch = [];
375 $linkMarker = null;
376 if ( preg_match( '/^' . $medians . '(.*)$/i', $match[1], $submatch ) ) {
377 // Media link; trail not supported.
378 $linkRegexp = '/\[\[(.*?)\]\]/';
379 $linkTarget = $this->titleParser->makeTitleValueSafe( NS_FILE, $submatch[1] );
380 if ( $linkTarget ) {
381 $linkMarker = $this->addFileLink( $linkTarget, $text );
382 }
383 } else {
384 // Other kind of link
385 // Make sure its target is non-empty
386 if ( isset( $match[1][0] ) && $match[1][0] == ':' ) {
387 $match[1] = substr( $match[1], 1 );
388 }
389 // @phan-suppress-next-line PhanTypePossiblyInvalidDimOffset False positive
390 if ( $match[1] !== false && $match[1] !== null && $match[1] !== '' ) {
391 if ( preg_match(
392 $this->contLang->linkTrail(),
393 $match[3],
394 $submatch
395 ) ) {
396 $trail = $submatch[1];
397 } else {
398 $trail = "";
399 }
400 $linkRegexp = '/\[\[(.*?)\]\]' . preg_quote( $trail, '/' ) . '/';
401 [ $inside, $trail ] = Linker::splitTrail( $trail );
402
403 $linkText = $text;
404 $linkTarget = Linker::normalizeSubpageLink( $selfLinkTarget, $match[1], $linkText );
405
406 try {
407 $target = $this->titleParser->parseTitle( $linkTarget );
408
409 if ( $target->getText() == '' && !$target->isExternal()
410 && !$samePage && $selfLinkTarget
411 ) {
412 $target = $selfLinkTarget->createFragmentTarget( $target->getFragment() );
413 }
414
415 // We should deprecate `null` as a valid value for
416 // $selfLinkTarget to ensure that we can use it as
417 // the title context for the external link.
418 // phpcs:ignore MediaWiki.Usage.DeprecatedGlobalVariables.Deprecated$wgTitle
419 global $wgTitle;
420 $linkMarker = $this->addPageLink(
421 $target,
422 $linkText . $inside,
423 $wikiId,
424 $selfLinkTarget ?? $wgTitle ?? SpecialPage::getTitleFor( 'Badtitle' )
425 );
426 $linkMarker .= $trail;
427 } catch ( MalformedTitleException $e ) {
428 // Fall through
429 }
430 }
431 }
432 if ( $linkMarker ) {
433 // If the link is still valid, go ahead and replace it in!
434 $comment = preg_replace(
435 // @phan-suppress-next-next-line PhanPossiblyUndeclaredVariable linkRegexp set when used
436 // @phan-suppress-next-line PhanTypeMismatchArgumentNullableInternal linkRegexp set when used
437 $linkRegexp,
439 $comment,
440 1
441 );
442 }
443
444 return $comment;
445 },
446 $comment
447 );
448 }
449
456 private function addLinkMarker( $callback ) {
457 $nextId = count( $this->links );
458 if ( strlen( (string)$nextId ) > self::MAX_ID_SIZE ) {
459 throw new \RuntimeException( 'Too many links in comment batch' );
460 }
461 $this->links[] = $callback;
462 return sprintf( self::MARKER_PREFIX . "%0" . self::MAX_ID_SIZE . 'd', $nextId );
463 }
464
475 private function addPageLink( LinkTarget $target, $text, $wikiId, LinkTarget $contextTitle ) {
476 if ( $wikiId !== null && $wikiId !== false && !$target->isExternal() ) {
477 // Handle links from a foreign wiki ID
478 return $this->linkRenderer->makeExternalLink(
479 WikiMap::getForeignURL(
480 $wikiId,
481 $target->getNamespace() === 0
482 ? $target->getDBkey()
483 : $this->namespaceInfo->getCanonicalName( $target->getNamespace() ) .
484 ':' . $target->getDBkey(),
485 $target->getFragment()
486 ),
487 new HtmlArmor( $text ), // Already escaped
488 $contextTitle
489 );
490 } elseif ( $this->linkCache->getGoodLinkID( $target ) ||
491 Title::newFromLinkTarget( $target )->isAlwaysKnown()
492 ) {
493 // Already known
494 return $this->linkRenderer->makeKnownLink( $target, new HtmlArmor( $text ) );
495 } elseif ( $this->linkCache->isBadLink( $target ) ) {
496 // Already cached as unknown
497 return $this->linkRenderer->makeBrokenLink( $target, new HtmlArmor( $text ) );
498 }
499
500 // Defer page link
501 if ( !$this->linkBatch ) {
502 $this->linkBatch = $this->linkBatchFactory->newLinkBatch();
503 $this->linkBatch->setCaller( __METHOD__ );
504 }
505 $this->linkBatch->addObj( $target );
506 return $this->addLinkMarker( function () use ( $target, $text ) {
507 return $this->linkRenderer->makeLink( $target, new HtmlArmor( $text ) );
508 } );
509 }
510
518 private function addFileLink( LinkTarget $target, $html ) {
519 $this->fileBatch[] = [
520 'title' => $target
521 ];
522 return $this->addLinkMarker( function () use ( $target, $html ) {
523 return Linker::makeMediaLinkFile(
524 $target,
525 $this->files[$target->getDBkey()] ?? false,
526 $html
527 );
528 } );
529 }
530
534 private function flushLinkBatches() {
535 if ( $this->linkBatch ) {
536 $this->linkBatch->execute();
537 $this->linkBatch = null;
538 }
539 if ( $this->fileBatch ) {
540 $this->files += $this->repoGroup->findFiles( $this->fileBatch );
541 $this->fileBatch = [];
542 }
543 }
544
545}
const NS_FILE
Definition Defines.php:71
const NS_MAIN
Definition Defines.php:65
const NS_MEDIA
Definition Defines.php:53
wfMessage( $key,... $params)
This is the function for getting translated interface messages.
if(!defined( 'MW_NO_SESSION') &&MW_ENTRY_POINT !=='cli' $wgTitle
Definition Setup.php:572
Implements some public methods and some protected utility functions which are required by multiple ch...
Definition File.php:79
Marks HTML that shouldn't be escaped.
Definition HtmlArmor.php:30
Class representing a list of titles The execute() method checks them all for existence and adds them ...
Definition LinkBatch.php:48
Cache for article titles (prefixed DB keys) and ids linked from one source.
Definition LinkCache.php:52
The text processing backend for CommentFormatter.
__construct(LinkRenderer $linkRenderer, LinkBatchFactory $linkBatchFactory, LinkCache $linkCache, RepoGroup $repoGroup, Language $userLang, Language $contLang, TitleParser $titleParser, NamespaceInfo $namespaceInfo, HookContainer $hookContainer)
finalize( $comments)
Execute pending batch queries and replace markers in the specified string(s) with actual links.
preprocessUnsafe( $comment, ?LinkTarget $selfLinkTarget=null, $samePage=false, $wikiId=false, $enableSectionLinks=true)
Convert a comment in pseudo-HTML format to HTML, replacing links with markers.
preprocess(string $comment, ?LinkTarget $selfLinkTarget=null, $samePage=false, $wikiId=false, $enableSectionLinks=true)
Convert a comment to HTML, but replace links with markers which are resolved later.
This class provides an implementation of the core hook interfaces, forwarding hook calls to HookConta...
This class is a collection of static functions that serve two purposes:
Definition Html.php:56
Base class for language-specific code.
Definition Language.php:80
Class that generates HTML for internal links.
Some internal bits split of from Skin.php.
Definition Linker.php:63
PHP Parser - Processes wiki markup (which uses a more user-friendly syntax, such as "[[link]]" for ma...
Definition Parser.php:145
HTML sanitizer for MediaWiki.
Definition Sanitizer.php:46
Parent class for all special pages.
static getTitleFor( $name, $subpage=false, $fragment='')
Get a localised Title object for a specified special page name If you don't need a full Title object,...
MalformedTitleException is thrown when a TitleParser is unable to parse a title string.
This is a utility class for dealing with namespaces that encodes all the "magic" behaviors of them ba...
Represents the target of a wiki link.
Represents a title within MediaWiki.
Definition Title.php:78
Tools for dealing with other locally-hosted wikis.
Definition WikiMap.php:31
Prioritized list of file repositories.
Definition RepoGroup.php:32
A collection of static methods to play with strings.
static escapeRegexReplacement( $string)
Escape a string to make it suitable for inclusion in a preg_replace() replacement parameter.
Represents the target of a wiki link.
A title parser service for MediaWiki.