MediaWiki master
CommentParser.php
Go to the documentation of this file.
1<?php
2
4
28
39 private $linkRenderer;
41 private $linkBatchFactory;
43 private $repoGroup;
45 private $userLang;
47 private $contLang;
49 private $titleParser;
51 private $namespaceInfo;
53 private $hookRunner;
55 private $linkCache;
56
58 private $links = [];
60 private $linkBatch;
61
63 private $fileBatch;
65 private $files = [];
66
68 private const MAX_ID_SIZE = 7;
70 private const MARKER_PREFIX = "\x1B\"'";
71
83 public function __construct(
84 LinkRenderer $linkRenderer,
85 LinkBatchFactory $linkBatchFactory,
86 LinkCache $linkCache,
87 RepoGroup $repoGroup,
88 Language $userLang,
89 Language $contLang,
90 TitleParser $titleParser,
91 NamespaceInfo $namespaceInfo,
92 HookContainer $hookContainer
93 ) {
94 $this->linkRenderer = $linkRenderer;
95 $this->linkBatchFactory = $linkBatchFactory;
96 $this->linkCache = $linkCache;
97 $this->repoGroup = $repoGroup;
98 $this->userLang = $userLang;
99 $this->contLang = $contLang;
100 $this->titleParser = $titleParser;
101 $this->namespaceInfo = $namespaceInfo;
102 $this->hookRunner = new HookRunner( $hookContainer );
103 }
104
116 public function preprocess( string $comment, ?LinkTarget $selfLinkTarget = null,
117 $samePage = false, $wikiId = false, $enableSectionLinks = true
118 ) {
119 return $this->preprocessInternal( $comment, false, $selfLinkTarget,
120 $samePage, $wikiId, $enableSectionLinks );
121 }
122
133 public function preprocessUnsafe( $comment, ?LinkTarget $selfLinkTarget = null,
134 $samePage = false, $wikiId = false, $enableSectionLinks = true
135 ) {
136 return $this->preprocessInternal( $comment, true, $selfLinkTarget,
137 $samePage, $wikiId, $enableSectionLinks );
138 }
139
147 public function finalize( $comments ) {
148 $this->flushLinkBatches();
149 return preg_replace_callback(
150 '/' . self::MARKER_PREFIX . '([0-9]{' . self::MAX_ID_SIZE . '})/',
151 function ( $m ) {
152 $callback = $this->links[(int)$m[1]] ?? null;
153 if ( $callback ) {
154 return $callback();
155 } else {
156 return '<!-- MISSING -->';
157 }
158 },
159 $comments
160 );
161 }
162
172 private function preprocessInternal( $comment, $unsafe, $selfLinkTarget, $samePage, $wikiId,
173 $enableSectionLinks
174 ) {
175 // Sanitize text a bit
176 // \x1b needs to be stripped because it is used for link markers
177 $comment = strtr( $comment, "\n\x1b", " " );
178 // Allow HTML entities (for T15815)
179 if ( !$unsafe ) {
180 $comment = Sanitizer::escapeHtmlAllowEntities( $comment );
181 }
182 if ( $enableSectionLinks ) {
183 $comment = $this->doSectionLinks( $comment, $selfLinkTarget, $samePage, $wikiId );
184 }
185 return $this->doWikiLinks( $comment, $selfLinkTarget, $samePage, $wikiId );
186 }
187
204 private function doSectionLinks(
205 $comment,
206 $selfLinkTarget = null,
207 $samePage = false,
208 $wikiId = false
209 ) {
210 $comment = preg_replace_callback(
211 // To detect the presence of content before or after the
212 // auto-comment, we use capturing groups inside optional zero-width
213 // assertions. But older versions of PCRE can not directly make
214 // zero-width assertions optional, so wrap them in a non-capturing
215 // group.
216 '!(?:(?<=(.)))?/\*\s*(.*?)\s*\*/(?:(?=(.)))?!',
217 function ( $match ) use ( $selfLinkTarget, $samePage, $wikiId ) {
218 $pre = ( $match[1] ?? '' ) !== '';
219 $section = ( $match[2] ?? '' );
220 $post = ( $match[3] ?? '' ) !== '';
221 $comment = null;
222
223 $this->hookRunner->onFormatAutocomments(
224 $comment, $pre, $section, $post,
225 Title::castFromLinkTarget( $selfLinkTarget ),
226 $samePage,
227 $wikiId );
228 if ( $comment !== null ) {
229 return $comment;
230 }
231
232 // HTML has already been escaped in preprocessInternal(), so treat this as HTML from this point
233 $parsedSection = $section;
234
235 if ( $selfLinkTarget ) {
236 $decodedSection = substr( Parser::guessSectionNameFromStrippedText(
237 // Remove links that a user may have manually put in the autosummary
238 // This could be improved by copying as much of Parser::stripSectionName as desired.
239 str_replace( [ '[[:', '[[', ']]' ], '', $section )
240 ), 1 );
241 if ( $decodedSection !== '' || $section === '' ) {
242 if ( $samePage ) {
243 $targetWithSection = new TitleValue( NS_MAIN, '', $decodedSection );
244 } else {
245 $targetWithSection = $selfLinkTarget->createFragmentTarget( $decodedSection );
246 }
247 if ( $section === '' ) {
248 // Special case for edits to the zeroth section (T412472).
249 $linkHtml = wfMessage( 'autocomment-top' )->inLanguage( $this->userLang )->escaped();
250 } else {
251 $linkHtml = $parsedSection;
252 }
253 $parsedSection = $this->makeSectionLink(
254 $targetWithSection,
255 $this->userLang->getArrow() .
256 Html::rawElement( 'bdi', [ 'dir' => $this->userLang->getDir() ], $linkHtml ),
257 $wikiId,
258 $selfLinkTarget
259 );
260 }
261 }
262 if ( $post ) {
263 # autocomment $postsep written summary (/* section */ summary)
264 $parsedSection .= wfMessage( 'colon-separator' )->inContentLanguage()->escaped();
265 }
266 if ( $parsedSection ) {
267 $parsedSection = Html::rawElement( 'span', [ 'class' => 'autocomment' ], $parsedSection );
268 }
269 if ( $pre ) {
270 # written summary $presep autocomment (summary /* section */)
271 $parsedSection = wfMessage( 'autocomment-prefix' )->inContentLanguage()->escaped()
272 . $parsedSection;
273 }
274
275 // Make sure any brackets (which the user could have input in the edit summary)
276 // in the generated autocomment HTML don't trigger additional link processing (T406664).
277 return str_replace( [ '[', ']' ], [ '&#91;', '&#93;' ], $parsedSection );
278 },
279 $comment
280 );
281 return $comment;
282 }
283
296 private function makeSectionLink(
297 LinkTarget $target, $text, $wikiId, LinkTarget $contextTitle
298 ) {
299 if ( $wikiId !== null && $wikiId !== false && !$target->isExternal() ) {
300 return $this->linkRenderer->makeExternalLink(
301 WikiMap::getForeignURL(
302 $wikiId,
303 $target->getNamespace() === 0
304 ? $target->getDBkey()
305 : $this->namespaceInfo->getCanonicalName( $target->getNamespace() ) .
306 ':' . $target->getDBkey(),
307 $target->getFragment()
308 ),
309 new HtmlArmor( $text ), // Already escaped
310 $contextTitle
311 );
312 }
313 return $this->linkRenderer->makePreloadedLink( $target, new HtmlArmor( $text ), '' );
314 }
315
334 private function doWikiLinks( $comment, $selfLinkTarget = null, $samePage = false, $wikiId = false ) {
335 return preg_replace_callback(
336 '/
337 \[\[
338 \s*+ # ignore leading whitespace, the *+ quantifier disallows backtracking
339 :? # ignore optional leading colon
340 ([^[\]|]+) # 1. link target; page names cannot include [, ] or |
341 (?:\|
342 # 2. link text
343 # Stop matching at ]] without relying on backtracking.
344 ((?:]?[^\]])*+)
345 )?
346 \]\]
347 ([^[]*) # 3. link trail (the text up until the next link)
348 /x',
349 function ( $match ) use ( $selfLinkTarget, $samePage, $wikiId ) {
350 $medians = '(?:';
351 $medians .= preg_quote(
352 $this->namespaceInfo->getCanonicalName( NS_MEDIA ), '/' );
353 $medians .= '|';
354 $medians .= preg_quote(
355 $this->contLang->getNsText( NS_MEDIA ),
356 '/'
357 ) . '):';
358
359 $comment = $match[0];
360
361 // Fix up urlencoded title texts (copied from Parser::replaceInternalLinks)
362 if ( str_contains( $match[1], '%' ) ) {
363 $match[1] = strtr(
364 rawurldecode( $match[1] ),
365 [ '<' => '&lt;', '>' => '&gt;' ]
366 );
367 }
368
369 // Handle link renaming [[foo|text]] will show link as "text"
370 if ( $match[2] != "" ) {
371 $text = $match[2];
372 } else {
373 $text = $match[1];
374 }
375 $submatch = [];
376 $linkMarker = null;
377 if ( preg_match( '/^' . $medians . '(.*)$/i', $match[1], $submatch ) ) {
378 // Media link; trail not supported.
379 $linkRegexp = '/\[\[(.*?)\]\]/';
380 $linkTarget = $this->titleParser->makeTitleValueSafe( NS_FILE, $submatch[1] );
381 if ( $linkTarget ) {
382 $linkMarker = $this->addFileLink( $linkTarget, $text );
383 }
384 } else {
385 // Other kind of link
386 // Make sure its target is non-empty
387 if ( isset( $match[1][0] ) && $match[1][0] == ':' ) {
388 $match[1] = substr( $match[1], 1 );
389 }
390 if ( $match[1] !== false && $match[1] !== null && $match[1] !== '' ) {
391 if ( preg_match(
392 $this->contLang->linkTrail(),
393 $match[3],
394 $submatch
395 ) ) {
396 $trail = $submatch[1];
397 } else {
398 $trail = "";
399 }
400 $linkRegexp = '/\[\[(.*?)\]\]' . preg_quote( $trail, '/' ) . '/';
401 [ $inside, $trail ] = Linker::splitTrail( $trail );
402
403 $linkText = $text;
404 $linkTarget = Linker::normalizeSubpageLink( $selfLinkTarget, $match[1], $linkText );
405
406 try {
407 $target = $this->titleParser->parseTitle( $linkTarget );
408
409 if ( $target->getText() == '' && !$target->isExternal()
410 && !$samePage && $selfLinkTarget
411 ) {
412 $target = $selfLinkTarget->createFragmentTarget( $target->getFragment() );
413 }
414
415 // We should deprecate `null` as a valid value for
416 // $selfLinkTarget to ensure that we can use it as
417 // the title context for the external link.
418 // phpcs:ignore MediaWiki.Usage.DeprecatedGlobalVariables.Deprecated$wgTitle
419 global $wgTitle;
420 $linkMarker = $this->addPageLink(
421 $target,
422 $linkText . $inside,
423 $wikiId,
424 $selfLinkTarget ?? $wgTitle ?? SpecialPage::getTitleFor( 'Badtitle' )
425 );
426 $linkMarker .= $trail;
427 } catch ( MalformedTitleException ) {
428 // Fall through
429 }
430 }
431 }
432 if ( $linkMarker ) {
433 // If the link is still valid, go ahead and replace it in!
434 $comment = preg_replace(
435 // @phan-suppress-next-line PhanPossiblyUndeclaredVariable linkRegexp set when used
436 $linkRegexp,
438 $comment,
439 1
440 );
441 }
442
443 return $comment;
444 },
445 $comment
446 );
447 }
448
455 private function addLinkMarker( $callback ) {
456 $nextId = count( $this->links );
457 if ( strlen( (string)$nextId ) > self::MAX_ID_SIZE ) {
458 throw new \RuntimeException( 'Too many links in comment batch' );
459 }
460 $this->links[] = $callback;
461 return sprintf( self::MARKER_PREFIX . "%0" . self::MAX_ID_SIZE . 'd', $nextId );
462 }
463
474 private function addPageLink( LinkTarget $target, $text, $wikiId, LinkTarget $contextTitle ) {
475 if ( $wikiId !== null && $wikiId !== false && !$target->isExternal() ) {
476 // Handle links from a foreign wiki ID
477 return $this->linkRenderer->makeExternalLink(
478 WikiMap::getForeignURL(
479 $wikiId,
480 $target->getNamespace() === 0
481 ? $target->getDBkey()
482 : $this->namespaceInfo->getCanonicalName( $target->getNamespace() ) .
483 ':' . $target->getDBkey(),
484 $target->getFragment()
485 ),
486 new HtmlArmor( $text ), // Already escaped
487 $contextTitle
488 );
489 } elseif ( $this->linkCache->getGoodLinkID( $target ) ||
490 Title::newFromLinkTarget( $target )->isAlwaysKnown()
491 ) {
492 // Already known
493 return $this->linkRenderer->makeKnownLink( $target, new HtmlArmor( $text ) );
494 } elseif ( $this->linkCache->isBadLink( $target ) ) {
495 // Already cached as unknown
496 return $this->linkRenderer->makeBrokenLink( $target, new HtmlArmor( $text ) );
497 }
498
499 // Defer page link
500 if ( !$this->linkBatch ) {
501 $this->linkBatch = $this->linkBatchFactory->newLinkBatch();
502 $this->linkBatch->setCaller( __METHOD__ );
503 }
504 $this->linkBatch->addObj( $target );
505 return $this->addLinkMarker( function () use ( $target, $text ) {
506 return $this->linkRenderer->makeLink( $target, new HtmlArmor( $text ) );
507 } );
508 }
509
517 private function addFileLink( LinkTarget $target, $html ) {
518 $this->fileBatch[] = [
519 'title' => $target
520 ];
521 return $this->addLinkMarker( function () use ( $target, $html ) {
522 return Linker::makeMediaLinkFile(
523 $target,
524 $this->files[$target->getDBkey()] ?? false,
525 $html
526 );
527 } );
528 }
529
533 private function flushLinkBatches() {
534 if ( $this->linkBatch ) {
535 $this->linkBatch->execute();
536 $this->linkBatch = null;
537 }
538 if ( $this->fileBatch ) {
539 $this->files += $this->repoGroup->findFiles( $this->fileBatch );
540 $this->fileBatch = [];
541 }
542 }
543
544}
const NS_FILE
Definition Defines.php:57
const NS_MAIN
Definition Defines.php:51
const NS_MEDIA
Definition Defines.php:39
wfMessage( $key,... $params)
This is the function for getting translated interface messages.
if(MW_ENTRY_POINT==='index') if(!defined( 'MW_NO_SESSION') &&MW_ENTRY_POINT !=='cli' $wgTitle
Definition Setup.php:551
The text processing backend for CommentFormatter.
__construct(LinkRenderer $linkRenderer, LinkBatchFactory $linkBatchFactory, LinkCache $linkCache, RepoGroup $repoGroup, Language $userLang, Language $contLang, TitleParser $titleParser, NamespaceInfo $namespaceInfo, HookContainer $hookContainer)
finalize( $comments)
Execute pending batch queries and replace markers in the specified string(s) with actual links.
preprocessUnsafe( $comment, ?LinkTarget $selfLinkTarget=null, $samePage=false, $wikiId=false, $enableSectionLinks=true)
Convert a comment in pseudo-HTML format to HTML, replacing links with markers.
preprocess(string $comment, ?LinkTarget $selfLinkTarget=null, $samePage=false, $wikiId=false, $enableSectionLinks=true)
Convert a comment to HTML, but replace links with markers which are resolved later.
Implements some public methods and some protected utility functions which are required by multiple ch...
Definition File.php:79
Prioritized list of file repositories.
Definition RepoGroup.php:30
This class provides an implementation of the core hook interfaces, forwarding hook calls to HookConta...
This class is a collection of static functions that serve two purposes:
Definition Html.php:43
Base class for language-specific code.
Definition Language.php:68
Class that generates HTML for internal links.
Some internal bits split of from Skin.php.
Definition Linker.php:47
Factory for LinkBatch objects to batch query page metadata.
Batch query for page metadata and feed to LinkCache.
Definition LinkBatch.php:36
Page existence and metadata cache.
Definition LinkCache.php:54
PHP Parser - Processes wiki markup (which uses a more user-friendly syntax, such as "[[link]]" for ma...
Definition Parser.php:135
HTML sanitizer for MediaWiki.
Definition Sanitizer.php:32
Parent class for all special pages.
static getTitleFor( $name, $subpage=false, $fragment='')
Get a localised Title object for a specified special page name If you don't need a full Title object,...
MalformedTitleException is thrown when a TitleParser is unable to parse a title string.
This is a utility class for dealing with namespaces that encodes all the "magic" behaviors of them ba...
A title parser service for MediaWiki.
Represents the target of a wiki link.
Represents a title within MediaWiki.
Definition Title.php:69
Tools for dealing with other locally-hosted wikis.
Definition WikiMap.php:19
Marks HTML that shouldn't be escaped.
Definition HtmlArmor.php:18
A collection of static methods to play with strings.
static escapeRegexReplacement( $string)
Escape a string to make it suitable for inclusion in a preg_replace() replacement parameter.
Represents the target of a wiki link.