MediaWiki 1.39.10
CommentParser.php
Go to the documentation of this file.
1<?php
2
4
5use File;
6use HtmlArmor;
7use Language;
8use LinkBatch;
9use LinkCache;
10use Linker;
18use Parser;
19use RepoGroup;
20use Title;
21use TitleParser;
22use TitleValue;
23
34 private $linkRenderer;
36 private $linkBatchFactory;
38 private $repoGroup;
40 private $userLang;
42 private $contLang;
44 private $titleParser;
46 private $namespaceInfo;
48 private $hookRunner;
50 private $linkCache;
51
53 private $links = [];
55 private $linkBatch;
56
58 private $fileBatch;
60 private $files = [];
61
63 private const MAX_ID_SIZE = 7;
65 private const MARKER_PREFIX = "\x1B\"'";
66
78 public function __construct(
79 LinkRenderer $linkRenderer,
80 LinkBatchFactory $linkBatchFactory,
81 LinkCache $linkCache,
82 RepoGroup $repoGroup,
83 Language $userLang,
84 Language $contLang,
85 TitleParser $titleParser,
86 NamespaceInfo $namespaceInfo,
87 HookContainer $hookContainer
88 ) {
89 $this->linkRenderer = $linkRenderer;
90 $this->linkBatchFactory = $linkBatchFactory;
91 $this->linkCache = $linkCache;
92 $this->repoGroup = $repoGroup;
93 $this->userLang = $userLang;
94 $this->contLang = $contLang;
95 $this->titleParser = $titleParser;
96 $this->namespaceInfo = $namespaceInfo;
97 $this->hookRunner = new HookRunner( $hookContainer );
98 }
99
111 public function preprocess( string $comment, LinkTarget $selfLinkTarget = null,
112 $samePage = false, $wikiId = false, $enableSectionLinks = true
113 ) {
114 return $this->preprocessInternal( $comment, false, $selfLinkTarget,
115 $samePage, $wikiId, $enableSectionLinks );
116 }
117
128 public function preprocessUnsafe( $comment, LinkTarget $selfLinkTarget = null,
129 $samePage = false, $wikiId = false, $enableSectionLinks = true
130 ) {
131 return $this->preprocessInternal( $comment, true, $selfLinkTarget,
132 $samePage, $wikiId, $enableSectionLinks );
133 }
134
142 public function finalize( $comments ) {
143 $this->flushLinkBatches();
144 return preg_replace_callback(
145 '/' . self::MARKER_PREFIX . '([0-9]{' . self::MAX_ID_SIZE . '})/',
146 function ( $m ) {
147 $callback = $this->links[(int)$m[1]] ?? null;
148 if ( $callback ) {
149 return $callback();
150 } else {
151 return '<!-- MISSING -->';
152 }
153 },
154 $comments
155 );
156 }
157
167 private function preprocessInternal( $comment, $unsafe, $selfLinkTarget, $samePage, $wikiId,
168 $enableSectionLinks
169 ) {
170 // Sanitize text a bit
171 // \x1b needs to be stripped because it is used for link markers
172 $comment = strtr( $comment, "\n\x1b", " " );
173 // Allow HTML entities (for T15815)
174 if ( !$unsafe ) {
175 $comment = \Sanitizer::escapeHtmlAllowEntities( $comment );
176 }
177 if ( $enableSectionLinks ) {
178 $comment = $this->doSectionLinks( $comment, $selfLinkTarget, $samePage, $wikiId );
179 }
180 return $this->doWikiLinks( $comment, $selfLinkTarget, $samePage, $wikiId );
181 }
182
199 private function doSectionLinks(
200 $comment,
201 $selfLinkTarget = null,
202 $samePage = false,
203 $wikiId = false
204 ) {
205 // @todo $append here is something of a hack to preserve the status
206 // quo. Someone who knows more about bidi and such should decide
207 // (1) what sensible rendering even *is* for an LTR edit summary on an RTL
208 // wiki, both when autocomments exist and when they don't, and
209 // (2) what markup will make that actually happen.
210 $append = '';
211 $comment = preg_replace_callback(
212 // To detect the presence of content before or after the
213 // auto-comment, we use capturing groups inside optional zero-width
214 // assertions. But older versions of PCRE can't directly make
215 // zero-width assertions optional, so wrap them in a non-capturing
216 // group.
217 '!(?:(?<=(.)))?/\*\s*(.*?)\s*\*/(?:(?=(.)))?!',
218 function ( $match ) use ( &$append, $selfLinkTarget, $samePage, $wikiId ) {
219 // Ensure all match positions are defined
220 $match += [ '', '', '', '' ];
221
222 $pre = $match[1] !== '';
223 $auto = $match[2];
224 $post = $match[3] !== '';
225 $comment = null;
226
227 $this->hookRunner->onFormatAutocomments(
228 $comment, $pre, $auto, $post,
229 Title::castFromLinkTarget( $selfLinkTarget ),
230 $samePage,
231 $wikiId );
232 if ( $comment !== null ) {
233 return $comment;
234 }
235
236 if ( $selfLinkTarget ) {
237 $section = $auto;
238 # Remove links that a user may have manually put in the autosummary
239 # This could be improved by copying as much of Parser::stripSectionName as desired.
240 $section = str_replace( [
241 '[[:',
242 '[[',
243 ']]'
244 ], '', $section );
245
246 // We don't want any links in the auto text to be linked, but we still
247 // want to show any [[ ]]
248 $sectionText = str_replace( '[[', '&#91;[', $auto );
249
250 $section = substr( Parser::guessSectionNameFromStrippedText( $section ), 1 );
251 if ( $section !== '' ) {
252 if ( $samePage ) {
253 $sectionTitle = new TitleValue( NS_MAIN, '', $section );
254 } else {
255 $sectionTitle = $selfLinkTarget->createFragmentTarget( $section );
256 }
257 $auto = $this->makeSectionLink(
258 $sectionTitle,
259 $this->userLang->getArrow() . $this->userLang->getDirMark() . $sectionText,
260 $wikiId
261 );
262 }
263 }
264 if ( $pre ) {
265 # written summary $presep autocomment (summary /* section */)
266 $pre = wfMessage( 'autocomment-prefix' )->inContentLanguage()->escaped();
267 }
268 if ( $post ) {
269 # autocomment $postsep written summary (/* section */ summary)
270 $auto .= wfMessage( 'colon-separator' )->inContentLanguage()->escaped();
271 }
272 if ( $auto ) {
273 $auto = '<span dir="auto"><span class="autocomment">' . $auto . '</span>';
274 $append .= '</span>';
275 }
276 $comment = $pre . $auto;
277 return $comment;
278 },
279 $comment
280 );
281 return $comment . $append;
282 }
283
295 private function makeSectionLink(
296 LinkTarget $target, $text, $wikiId
297 ) {
298 if ( $wikiId !== null && $wikiId !== false && !$target->isExternal() ) {
300 \WikiMap::getForeignURL(
301 $wikiId,
302 $target->getNamespace() === 0
303 ? $target->getDBkey()
304 : $this->namespaceInfo->getCanonicalName( $target->getNamespace() ) .
305 ':' . $target->getDBkey(),
306 $target->getFragment()
307 ),
308 $text,
309 /* escape = */ false // Already escaped
310 );
311 }
312 return $this->linkRenderer->makePreloadedLink( $target, new HtmlArmor( $text ), '' );
313 }
314
333 private function doWikiLinks( $comment, $selfLinkTarget = null, $samePage = false, $wikiId = false ) {
334 return preg_replace_callback(
335 '/
336 \[\[
337 \s*+ # ignore leading whitespace, the *+ quantifier disallows backtracking
338 :? # ignore optional leading colon
339 ([^[\]|]+) # 1. link target; page names cannot include [, ] or |
340 (?:\|
341 # 2. link text
342 # Stop matching at ]] without relying on backtracking.
343 ((?:]?[^\]])*+)
344 )?
345 \]\]
346 ([^[]*) # 3. link trail (the text up until the next link)
347 /x',
348 function ( $match ) use ( $selfLinkTarget, $samePage, $wikiId ) {
349 $medians = '(?:';
350 $medians .= preg_quote(
351 $this->namespaceInfo->getCanonicalName( NS_MEDIA ), '/' );
352 $medians .= '|';
353 $medians .= preg_quote(
354 $this->contLang->getNsText( NS_MEDIA ),
355 '/'
356 ) . '):';
357
358 $comment = $match[0];
359
360 // Fix up urlencoded title texts (copied from Parser::replaceInternalLinks)
361 if ( strpos( $match[1], '%' ) !== false ) {
362 $match[1] = strtr(
363 rawurldecode( $match[1] ),
364 [ '<' => '&lt;', '>' => '&gt;' ]
365 );
366 }
367
368 // Handle link renaming [[foo|text]] will show link as "text"
369 if ( $match[2] != "" ) {
370 $text = $match[2];
371 } else {
372 $text = $match[1];
373 }
374 $submatch = [];
375 $linkMarker = null;
376 if ( preg_match( '/^' . $medians . '(.*)$/i', $match[1], $submatch ) ) {
377 // Media link; trail not supported.
378 $linkRegexp = '/\[\[(.*?)\]\]/';
379 $linkTarget = $this->titleParser->makeTitleValueSafe( NS_FILE, $submatch[1] );
380 if ( $linkTarget ) {
381 $linkMarker = $this->addFileLink( $linkTarget, $text );
382 }
383 } else {
384 // Other kind of link
385 // Make sure its target is non-empty
386 if ( isset( $match[1][0] ) && $match[1][0] == ':' ) {
387 $match[1] = substr( $match[1], 1 );
388 }
389 // @phan-suppress-next-line PhanTypePossiblyInvalidDimOffset False positive
390 if ( $match[1] !== false && $match[1] !== null && $match[1] !== '' ) {
391 if ( preg_match(
392 $this->contLang->linkTrail(),
393 $match[3],
394 $submatch
395 ) ) {
396 $trail = $submatch[1];
397 } else {
398 $trail = "";
399 }
400 $linkRegexp = '/\[\[(.*?)\]\]' . preg_quote( $trail, '/' ) . '/';
401 list( $inside, $trail ) = Linker::splitTrail( $trail );
402
403 $linkText = $text;
404 $linkTarget = Linker::normalizeSubpageLink( $selfLinkTarget, $match[1], $linkText );
405
406 try {
407 $target = $this->titleParser->parseTitle( $linkTarget );
408
409 if ( $target->getText() == '' && !$target->isExternal()
410 && !$samePage && $selfLinkTarget
411 ) {
412 $target = $selfLinkTarget->createFragmentTarget( $target->getFragment() );
413 }
414
415 $linkMarker = $this->addPageLink( $target, $linkText . $inside, $wikiId );
416 $linkMarker .= $trail;
417 } catch ( MalformedTitleException $e ) {
418 // Fall through
419 }
420 }
421 }
422 if ( $linkMarker ) {
423 // If the link is still valid, go ahead and replace it in!
424 $comment = preg_replace(
425 // @phan-suppress-next-next-line PhanPossiblyUndeclaredVariable linkRegexp set when used
426 // @phan-suppress-next-line PhanTypeMismatchArgumentNullableInternal linkRegexp set when used
427 $linkRegexp,
428 $linkMarker,
429 $comment,
430 1
431 );
432 }
433
434 return $comment;
435 },
436 $comment
437 );
438 }
439
446 private function addLinkMarker( $callback ) {
447 $nextId = count( $this->links );
448 if ( strlen( (string)$nextId ) > self::MAX_ID_SIZE ) {
449 throw new \RuntimeException( 'Too many links in comment batch' );
450 }
451 $this->links[] = $callback;
452 return sprintf( self::MARKER_PREFIX . "%0" . self::MAX_ID_SIZE . 'd', $nextId );
453 }
454
464 private function addPageLink( LinkTarget $target, $text, $wikiId ) {
465 if ( $wikiId !== null && $wikiId !== false && !$target->isExternal() ) {
466 // Handle links from a foreign wiki ID
468 \WikiMap::getForeignURL(
469 $wikiId,
470 $target->getNamespace() === 0
471 ? $target->getDBkey()
472 : $this->namespaceInfo->getCanonicalName( $target->getNamespace() ) .
473 ':' . $target->getDBkey(),
474 $target->getFragment()
475 ),
476 $text,
477 /* escape = */ false // Already escaped
478 );
479 } elseif ( $this->linkCache->getGoodLinkID( $target ) ||
480 Title::newFromLinkTarget( $target )->isAlwaysKnown()
481 ) {
482 // Already known
483 return $this->linkRenderer->makeKnownLink( $target, new HtmlArmor( $text ) );
484 } elseif ( $this->linkCache->isBadLink( $target ) ) {
485 // Already cached as unknown
486 return $this->linkRenderer->makeBrokenLink( $target, new HtmlArmor( $text ) );
487 }
488
489 // Defer page link
490 if ( !$this->linkBatch ) {
491 $this->linkBatch = $this->linkBatchFactory->newLinkBatch();
492 $this->linkBatch->setCaller( __METHOD__ );
493 }
494 $this->linkBatch->addObj( $target );
495 return $this->addLinkMarker( function () use ( $target, $text ) {
496 return $this->linkRenderer->makeLink( $target, new HtmlArmor( $text ) );
497 } );
498 }
499
507 private function addFileLink( LinkTarget $target, $html ) {
508 $this->fileBatch[] = [
509 'title' => $target
510 ];
511 return $this->addLinkMarker( function () use ( $target, $html ) {
513 $target,
514 $this->files[$target->getDBkey()] ?? false,
515 $html
516 );
517 } );
518 }
519
523 private function flushLinkBatches() {
524 if ( $this->linkBatch ) {
525 $this->linkBatch->execute();
526 $this->linkBatch = null;
527 }
528 if ( $this->fileBatch ) {
529 $this->files += $this->repoGroup->findFiles( $this->fileBatch );
530 $this->fileBatch = [];
531 }
532 }
533
534}
const NS_FILE
Definition Defines.php:70
const NS_MAIN
Definition Defines.php:64
const NS_MEDIA
Definition Defines.php:52
wfMessage( $key,... $params)
This is the function for getting translated interface messages.
Implements some public methods and some protected utility functions which are required by multiple ch...
Definition File.php:67
Marks HTML that shouldn't be escaped.
Definition HtmlArmor.php:30
Base class for language-specific code.
Definition Language.php:53
Class representing a list of titles The execute() method checks them all for existence and adds them ...
Definition LinkBatch.php:44
Cache for article titles (prefixed DB keys) and ids linked from one source.
Definition LinkCache.php:42
Some internal bits split of from Skin.php.
Definition Linker.php:42
static makeMediaLinkFile(LinkTarget $title, $file, $html='')
Create a direct link to a given uploaded file.
Definition Linker.php:993
static normalizeSubpageLink( $contextTitle, $target, &$text)
Definition Linker.php:1488
static splitTrail( $trail)
Split a link trail, return the "inside" portion and the remainder of the trail as a two-element array...
Definition Linker.php:1796
static makeExternalLink( $url, $text, $escape=true, $linktype='', $attribs=[], $title=null)
Make an external link.
Definition Linker.php:1061
MalformedTitleException is thrown when a TitleParser is unable to parse a title string.
The text processing backend for CommentFormatter.
__construct(LinkRenderer $linkRenderer, LinkBatchFactory $linkBatchFactory, LinkCache $linkCache, RepoGroup $repoGroup, Language $userLang, Language $contLang, TitleParser $titleParser, NamespaceInfo $namespaceInfo, HookContainer $hookContainer)
finalize( $comments)
Execute pending batch queries and replace markers in the specified string(s) with actual links.
preprocessUnsafe( $comment, LinkTarget $selfLinkTarget=null, $samePage=false, $wikiId=false, $enableSectionLinks=true)
Convert a comment in pseudo-HTML format to HTML, replacing links with markers.
preprocess(string $comment, LinkTarget $selfLinkTarget=null, $samePage=false, $wikiId=false, $enableSectionLinks=true)
Convert a comment to HTML, but replace links with markers which are resolved later.
This class provides an implementation of the core hook interfaces, forwarding hook calls to HookConta...
Class that generates HTML anchor link elements for pages.
This is a utility class for dealing with namespaces that encodes all the "magic" behaviors of them ba...
PHP Parser - Processes wiki markup (which uses a more user-friendly syntax, such as "[[link]]" for ma...
Definition Parser.php:96
static guessSectionNameFromStrippedText( $text)
Like guessSectionNameFromWikiText(), but takes already-stripped text as input.
Definition Parser.php:6253
Prioritized list of file repositories.
Definition RepoGroup.php:29
Represents a page (or page fragment) title within MediaWiki.
Represents a title within MediaWiki.
Definition Title.php:49
A title parser service for MediaWiki.