MediaWiki REL1_39
CommentParser.php
Go to the documentation of this file.
1<?php
2
4
5use File;
6use HtmlArmor;
7use Language;
8use LinkBatch;
9use LinkCache;
10use Linker;
18use Parser;
19use RepoGroup;
20use Title;
21use TitleParser;
22use TitleValue;
23
34 private $linkRenderer;
36 private $linkBatchFactory;
38 private $repoGroup;
40 private $userLang;
42 private $contLang;
44 private $titleParser;
46 private $namespaceInfo;
48 private $hookRunner;
50 private $linkCache;
51
53 private $links = [];
55 private $linkBatch;
56
58 private $fileBatch;
60 private $files = [];
61
63 private const MAX_ID_SIZE = 7;
65 private const MARKER_PREFIX = "\x1B\"'";
66
78 public function __construct(
79 LinkRenderer $linkRenderer,
80 LinkBatchFactory $linkBatchFactory,
81 LinkCache $linkCache,
82 RepoGroup $repoGroup,
83 Language $userLang,
84 Language $contLang,
85 TitleParser $titleParser,
86 NamespaceInfo $namespaceInfo,
87 HookContainer $hookContainer
88 ) {
89 $this->linkRenderer = $linkRenderer;
90 $this->linkBatchFactory = $linkBatchFactory;
91 $this->linkCache = $linkCache;
92 $this->repoGroup = $repoGroup;
93 $this->userLang = $userLang;
94 $this->contLang = $contLang;
95 $this->titleParser = $titleParser;
96 $this->namespaceInfo = $namespaceInfo;
97 $this->hookRunner = new HookRunner( $hookContainer );
98 }
99
111 public function preprocess( string $comment, LinkTarget $selfLinkTarget = null,
112 $samePage = false, $wikiId = false, $enableSectionLinks = true
113 ) {
114 return $this->preprocessInternal( $comment, false, $selfLinkTarget,
115 $samePage, $wikiId, $enableSectionLinks );
116 }
117
128 public function preprocessUnsafe( $comment, LinkTarget $selfLinkTarget = null,
129 $samePage = false, $wikiId = false, $enableSectionLinks = true
130 ) {
131 return $this->preprocessInternal( $comment, true, $selfLinkTarget,
132 $samePage, $wikiId, $enableSectionLinks );
133 }
134
142 public function finalize( $comments ) {
143 $this->flushLinkBatches();
144 return preg_replace_callback(
145 '/' . self::MARKER_PREFIX . '([0-9]{' . self::MAX_ID_SIZE . '})/',
146 function ( $m ) {
147 $callback = $this->links[(int)$m[1]] ?? null;
148 if ( $callback ) {
149 return $callback();
150 } else {
151 return '<!-- MISSING -->';
152 }
153 },
154 $comments
155 );
156 }
157
167 private function preprocessInternal( $comment, $unsafe, $selfLinkTarget, $samePage, $wikiId,
168 $enableSectionLinks
169 ) {
170 // Sanitize text a bit
171 // \x1b needs to be stripped because it is used for link markers
172 $comment = strtr( $comment, "\n\x1b", " " );
173 // Allow HTML entities (for T15815)
174 if ( !$unsafe ) {
175 $comment = \Sanitizer::escapeHtmlAllowEntities( $comment );
176 }
177 if ( $enableSectionLinks ) {
178 $comment = $this->doSectionLinks( $comment, $selfLinkTarget, $samePage, $wikiId );
179 }
180 return $this->doWikiLinks( $comment, $selfLinkTarget, $samePage, $wikiId );
181 }
182
199 private function doSectionLinks(
200 $comment,
201 $selfLinkTarget = null,
202 $samePage = false,
203 $wikiId = false
204 ) {
205 // @todo $append here is something of a hack to preserve the status
206 // quo. Someone who knows more about bidi and such should decide
207 // (1) what sensible rendering even *is* for an LTR edit summary on an RTL
208 // wiki, both when autocomments exist and when they don't, and
209 // (2) what markup will make that actually happen.
210 $append = '';
211 $comment = preg_replace_callback(
212 // To detect the presence of content before or after the
213 // auto-comment, we use capturing groups inside optional zero-width
214 // assertions. But older versions of PCRE can't directly make
215 // zero-width assertions optional, so wrap them in a non-capturing
216 // group.
217 '!(?:(?<=(.)))?/\*\s*(.*?)\s*\*/(?:(?=(.)))?!',
218 function ( $match ) use ( &$append, $selfLinkTarget, $samePage, $wikiId ) {
219 // Ensure all match positions are defined
220 $match += [ '', '', '', '' ];
221
222 $pre = $match[1] !== '';
223 $auto = $match[2];
224 $post = $match[3] !== '';
225 $comment = null;
226
227 $this->hookRunner->onFormatAutocomments(
228 $comment, $pre, $auto, $post,
229 Title::castFromLinkTarget( $selfLinkTarget ),
230 $samePage,
231 $wikiId );
232 if ( $comment !== null ) {
233 return $comment;
234 }
235
236 if ( $selfLinkTarget ) {
237 $section = $auto;
238 # Remove links that a user may have manually put in the autosummary
239 # This could be improved by copying as much of Parser::stripSectionName as desired.
240 $section = str_replace( [
241 '[[:',
242 '[[',
243 ']]'
244 ], '', $section );
245
246 $section = substr( Parser::guessSectionNameFromStrippedText( $section ), 1 );
247 if ( $section !== '' ) {
248 if ( $samePage ) {
249 $sectionTitle = new TitleValue( NS_MAIN, '', $section );
250 } else {
251 $sectionTitle = $selfLinkTarget->createFragmentTarget( $section );
252 }
253 $auto = $this->makeSectionLink(
254 $sectionTitle,
255 $this->userLang->getArrow() . $this->userLang->getDirMark() . $auto,
256 $wikiId
257 );
258 }
259 }
260 if ( $pre ) {
261 # written summary $presep autocomment (summary /* section */)
262 $pre = wfMessage( 'autocomment-prefix' )->inContentLanguage()->escaped();
263 }
264 if ( $post ) {
265 # autocomment $postsep written summary (/* section */ summary)
266 $auto .= wfMessage( 'colon-separator' )->inContentLanguage()->escaped();
267 }
268 if ( $auto ) {
269 $auto = '<span dir="auto"><span class="autocomment">' . $auto . '</span>';
270 $append .= '</span>';
271 }
272
273 // Make sure any brackets (which the user could have input in the edit summary)
274 // in the generated autocomment HTML don't trigger additional link processing (T406664).
275 return str_replace( [ '[', ']' ], [ '&#91;', '&#93;' ], $pre . $auto );
276 },
277 $comment
278 );
279 return $comment . $append;
280 }
281
293 private function makeSectionLink(
294 LinkTarget $target, $text, $wikiId
295 ) {
296 if ( $wikiId !== null && $wikiId !== false && !$target->isExternal() ) {
298 \WikiMap::getForeignURL(
299 $wikiId,
300 $target->getNamespace() === 0
301 ? $target->getDBkey()
302 : $this->namespaceInfo->getCanonicalName( $target->getNamespace() ) .
303 ':' . $target->getDBkey(),
304 $target->getFragment()
305 ),
306 $text,
307 /* escape = */ false // Already escaped
308 );
309 }
310 return $this->linkRenderer->makePreloadedLink( $target, new HtmlArmor( $text ), '' );
311 }
312
331 private function doWikiLinks( $comment, $selfLinkTarget = null, $samePage = false, $wikiId = false ) {
332 return preg_replace_callback(
333 '/
334 \[\[
335 \s*+ # ignore leading whitespace, the *+ quantifier disallows backtracking
336 :? # ignore optional leading colon
337 ([^[\]|]+) # 1. link target; page names cannot include [, ] or |
338 (?:\|
339 # 2. link text
340 # Stop matching at ]] without relying on backtracking.
341 ((?:]?[^\]])*+)
342 )?
343 \]\]
344 ([^[]*) # 3. link trail (the text up until the next link)
345 /x',
346 function ( $match ) use ( $selfLinkTarget, $samePage, $wikiId ) {
347 $medians = '(?:';
348 $medians .= preg_quote(
349 $this->namespaceInfo->getCanonicalName( NS_MEDIA ), '/' );
350 $medians .= '|';
351 $medians .= preg_quote(
352 $this->contLang->getNsText( NS_MEDIA ),
353 '/'
354 ) . '):';
355
356 $comment = $match[0];
357
358 // Fix up urlencoded title texts (copied from Parser::replaceInternalLinks)
359 if ( strpos( $match[1], '%' ) !== false ) {
360 $match[1] = strtr(
361 rawurldecode( $match[1] ),
362 [ '<' => '&lt;', '>' => '&gt;' ]
363 );
364 }
365
366 // Handle link renaming [[foo|text]] will show link as "text"
367 if ( $match[2] != "" ) {
368 $text = $match[2];
369 } else {
370 $text = $match[1];
371 }
372 $submatch = [];
373 $linkMarker = null;
374 if ( preg_match( '/^' . $medians . '(.*)$/i', $match[1], $submatch ) ) {
375 // Media link; trail not supported.
376 $linkRegexp = '/\[\[(.*?)\]\]/';
377 $linkTarget = $this->titleParser->makeTitleValueSafe( NS_FILE, $submatch[1] );
378 if ( $linkTarget ) {
379 $linkMarker = $this->addFileLink( $linkTarget, $text );
380 }
381 } else {
382 // Other kind of link
383 // Make sure its target is non-empty
384 if ( isset( $match[1][0] ) && $match[1][0] == ':' ) {
385 $match[1] = substr( $match[1], 1 );
386 }
387 // @phan-suppress-next-line PhanTypePossiblyInvalidDimOffset False positive
388 if ( $match[1] !== false && $match[1] !== null && $match[1] !== '' ) {
389 if ( preg_match(
390 $this->contLang->linkTrail(),
391 $match[3],
392 $submatch
393 ) ) {
394 $trail = $submatch[1];
395 } else {
396 $trail = "";
397 }
398 $linkRegexp = '/\[\[(.*?)\]\]' . preg_quote( $trail, '/' ) . '/';
399 list( $inside, $trail ) = Linker::splitTrail( $trail );
400
401 $linkText = $text;
402 $linkTarget = Linker::normalizeSubpageLink( $selfLinkTarget, $match[1], $linkText );
403
404 try {
405 $target = $this->titleParser->parseTitle( $linkTarget );
406
407 if ( $target->getText() == '' && !$target->isExternal()
408 && !$samePage && $selfLinkTarget
409 ) {
410 $target = $selfLinkTarget->createFragmentTarget( $target->getFragment() );
411 }
412
413 $linkMarker = $this->addPageLink( $target, $linkText . $inside, $wikiId );
414 $linkMarker .= $trail;
415 } catch ( MalformedTitleException $e ) {
416 // Fall through
417 }
418 }
419 }
420 if ( $linkMarker ) {
421 // If the link is still valid, go ahead and replace it in!
422 $comment = preg_replace(
423 // @phan-suppress-next-next-line PhanPossiblyUndeclaredVariable linkRegexp set when used
424 // @phan-suppress-next-line PhanTypeMismatchArgumentNullableInternal linkRegexp set when used
425 $linkRegexp,
426 $linkMarker,
427 $comment,
428 1
429 );
430 }
431
432 return $comment;
433 },
434 $comment
435 );
436 }
437
444 private function addLinkMarker( $callback ) {
445 $nextId = count( $this->links );
446 if ( strlen( (string)$nextId ) > self::MAX_ID_SIZE ) {
447 throw new \RuntimeException( 'Too many links in comment batch' );
448 }
449 $this->links[] = $callback;
450 return sprintf( self::MARKER_PREFIX . "%0" . self::MAX_ID_SIZE . 'd', $nextId );
451 }
452
462 private function addPageLink( LinkTarget $target, $text, $wikiId ) {
463 if ( $wikiId !== null && $wikiId !== false && !$target->isExternal() ) {
464 // Handle links from a foreign wiki ID
466 \WikiMap::getForeignURL(
467 $wikiId,
468 $target->getNamespace() === 0
469 ? $target->getDBkey()
470 : $this->namespaceInfo->getCanonicalName( $target->getNamespace() ) .
471 ':' . $target->getDBkey(),
472 $target->getFragment()
473 ),
474 $text,
475 /* escape = */ false // Already escaped
476 );
477 } elseif ( $this->linkCache->getGoodLinkID( $target ) ||
478 Title::newFromLinkTarget( $target )->isAlwaysKnown()
479 ) {
480 // Already known
481 return $this->linkRenderer->makeKnownLink( $target, new HtmlArmor( $text ) );
482 } elseif ( $this->linkCache->isBadLink( $target ) ) {
483 // Already cached as unknown
484 return $this->linkRenderer->makeBrokenLink( $target, new HtmlArmor( $text ) );
485 }
486
487 // Defer page link
488 if ( !$this->linkBatch ) {
489 $this->linkBatch = $this->linkBatchFactory->newLinkBatch();
490 $this->linkBatch->setCaller( __METHOD__ );
491 }
492 $this->linkBatch->addObj( $target );
493 return $this->addLinkMarker( function () use ( $target, $text ) {
494 return $this->linkRenderer->makeLink( $target, new HtmlArmor( $text ) );
495 } );
496 }
497
505 private function addFileLink( LinkTarget $target, $html ) {
506 $this->fileBatch[] = [
507 'title' => $target
508 ];
509 return $this->addLinkMarker( function () use ( $target, $html ) {
511 $target,
512 $this->files[$target->getDBkey()] ?? false,
513 $html
514 );
515 } );
516 }
517
521 private function flushLinkBatches() {
522 if ( $this->linkBatch ) {
523 $this->linkBatch->execute();
524 $this->linkBatch = null;
525 }
526 if ( $this->fileBatch ) {
527 $this->files += $this->repoGroup->findFiles( $this->fileBatch );
528 $this->fileBatch = [];
529 }
530 }
531
532}
const NS_FILE
Definition Defines.php:70
const NS_MAIN
Definition Defines.php:64
const NS_MEDIA
Definition Defines.php:52
wfMessage( $key,... $params)
This is the function for getting translated interface messages.
Implements some public methods and some protected utility functions which are required by multiple ch...
Definition File.php:67
Marks HTML that shouldn't be escaped.
Definition HtmlArmor.php:30
Base class for language-specific code.
Definition Language.php:53
Class representing a list of titles The execute() method checks them all for existence and adds them ...
Definition LinkBatch.php:44
Cache for article titles (prefixed DB keys) and ids linked from one source.
Definition LinkCache.php:42
Some internal bits split of from Skin.php.
Definition Linker.php:42
static makeMediaLinkFile(LinkTarget $title, $file, $html='')
Create a direct link to a given uploaded file.
Definition Linker.php:993
static normalizeSubpageLink( $contextTitle, $target, &$text)
Definition Linker.php:1488
static splitTrail( $trail)
Split a link trail, return the "inside" portion and the remainder of the trail as a two-element array...
Definition Linker.php:1796
static makeExternalLink( $url, $text, $escape=true, $linktype='', $attribs=[], $title=null)
Make an external link.
Definition Linker.php:1061
MalformedTitleException is thrown when a TitleParser is unable to parse a title string.
The text processing backend for CommentFormatter.
__construct(LinkRenderer $linkRenderer, LinkBatchFactory $linkBatchFactory, LinkCache $linkCache, RepoGroup $repoGroup, Language $userLang, Language $contLang, TitleParser $titleParser, NamespaceInfo $namespaceInfo, HookContainer $hookContainer)
finalize( $comments)
Execute pending batch queries and replace markers in the specified string(s) with actual links.
preprocessUnsafe( $comment, LinkTarget $selfLinkTarget=null, $samePage=false, $wikiId=false, $enableSectionLinks=true)
Convert a comment in pseudo-HTML format to HTML, replacing links with markers.
preprocess(string $comment, LinkTarget $selfLinkTarget=null, $samePage=false, $wikiId=false, $enableSectionLinks=true)
Convert a comment to HTML, but replace links with markers which are resolved later.
This class provides an implementation of the core hook interfaces, forwarding hook calls to HookConta...
Class that generates HTML anchor link elements for pages.
This is a utility class for dealing with namespaces that encodes all the "magic" behaviors of them ba...
PHP Parser - Processes wiki markup (which uses a more user-friendly syntax, such as "[[link]]" for ma...
Definition Parser.php:96
static guessSectionNameFromStrippedText( $text)
Like guessSectionNameFromWikiText(), but takes already-stripped text as input.
Definition Parser.php:6266
Prioritized list of file repositories.
Definition RepoGroup.php:29
Represents a page (or page fragment) title within MediaWiki.
Represents a title within MediaWiki.
Definition Title.php:49
A title parser service for MediaWiki.