MediaWiki 1.40.4
CommentParser.php
Go to the documentation of this file.
1<?php
2
4
5use File;
6use HtmlArmor;
7use Language;
8use LinkBatch;
9use LinkCache;
20use Parser;
21use RepoGroup;
22use StringUtils;
23use TitleParser;
24use TitleValue;
25
36 private $linkRenderer;
38 private $linkBatchFactory;
40 private $repoGroup;
42 private $userLang;
44 private $contLang;
46 private $titleParser;
48 private $namespaceInfo;
50 private $hookRunner;
52 private $linkCache;
53
55 private $links = [];
57 private $linkBatch;
58
60 private $fileBatch;
62 private $files = [];
63
65 private const MAX_ID_SIZE = 7;
67 private const MARKER_PREFIX = "\x1B\"'";
68
80 public function __construct(
81 LinkRenderer $linkRenderer,
82 LinkBatchFactory $linkBatchFactory,
83 LinkCache $linkCache,
84 RepoGroup $repoGroup,
85 Language $userLang,
86 Language $contLang,
87 TitleParser $titleParser,
88 NamespaceInfo $namespaceInfo,
89 HookContainer $hookContainer
90 ) {
91 $this->linkRenderer = $linkRenderer;
92 $this->linkBatchFactory = $linkBatchFactory;
93 $this->linkCache = $linkCache;
94 $this->repoGroup = $repoGroup;
95 $this->userLang = $userLang;
96 $this->contLang = $contLang;
97 $this->titleParser = $titleParser;
98 $this->namespaceInfo = $namespaceInfo;
99 $this->hookRunner = new HookRunner( $hookContainer );
100 }
101
113 public function preprocess( string $comment, LinkTarget $selfLinkTarget = null,
114 $samePage = false, $wikiId = false, $enableSectionLinks = true
115 ) {
116 return $this->preprocessInternal( $comment, false, $selfLinkTarget,
117 $samePage, $wikiId, $enableSectionLinks );
118 }
119
130 public function preprocessUnsafe( $comment, LinkTarget $selfLinkTarget = null,
131 $samePage = false, $wikiId = false, $enableSectionLinks = true
132 ) {
133 return $this->preprocessInternal( $comment, true, $selfLinkTarget,
134 $samePage, $wikiId, $enableSectionLinks );
135 }
136
144 public function finalize( $comments ) {
145 $this->flushLinkBatches();
146 return preg_replace_callback(
147 '/' . self::MARKER_PREFIX . '([0-9]{' . self::MAX_ID_SIZE . '})/',
148 function ( $m ) {
149 $callback = $this->links[(int)$m[1]] ?? null;
150 if ( $callback ) {
151 return $callback();
152 } else {
153 return '<!-- MISSING -->';
154 }
155 },
156 $comments
157 );
158 }
159
169 private function preprocessInternal( $comment, $unsafe, $selfLinkTarget, $samePage, $wikiId,
170 $enableSectionLinks
171 ) {
172 // Sanitize text a bit
173 // \x1b needs to be stripped because it is used for link markers
174 $comment = strtr( $comment, "\n\x1b", " " );
175 // Allow HTML entities (for T15815)
176 if ( !$unsafe ) {
177 $comment = \Sanitizer::escapeHtmlAllowEntities( $comment );
178 }
179 if ( $enableSectionLinks ) {
180 $comment = $this->doSectionLinks( $comment, $selfLinkTarget, $samePage, $wikiId );
181 }
182 return $this->doWikiLinks( $comment, $selfLinkTarget, $samePage, $wikiId );
183 }
184
201 private function doSectionLinks(
202 $comment,
203 $selfLinkTarget = null,
204 $samePage = false,
205 $wikiId = false
206 ) {
207 // @todo $append here is something of a hack to preserve the status
208 // quo. Someone who knows more about bidi and such should decide
209 // (1) what sensible rendering even *is* for an LTR edit summary on an RTL
210 // wiki, both when autocomments exist and when they don't, and
211 // (2) what markup will make that actually happen.
212 $append = '';
213 $comment = preg_replace_callback(
214 // To detect the presence of content before or after the
215 // auto-comment, we use capturing groups inside optional zero-width
216 // assertions. But older versions of PCRE can't directly make
217 // zero-width assertions optional, so wrap them in a non-capturing
218 // group.
219 '!(?:(?<=(.)))?/\*\s*(.*?)\s*\*/(?:(?=(.)))?!',
220 function ( $match ) use ( &$append, $selfLinkTarget, $samePage, $wikiId ) {
221 // Ensure all match positions are defined
222 $match += [ '', '', '', '' ];
223
224 $pre = $match[1] !== '';
225 $auto = $match[2];
226 $post = $match[3] !== '';
227 $comment = null;
228
229 $this->hookRunner->onFormatAutocomments(
230 $comment, $pre, $auto, $post,
231 Title::castFromLinkTarget( $selfLinkTarget ),
232 $samePage,
233 $wikiId );
234 if ( $comment !== null ) {
235 return $comment;
236 }
237
238 if ( $selfLinkTarget ) {
239 $section = $auto;
240 # Remove links that a user may have manually put in the autosummary
241 # This could be improved by copying as much of Parser::stripSectionName as desired.
242 $section = str_replace( [
243 '[[:',
244 '[[',
245 ']]'
246 ], '', $section );
247
248 // We don't want any links in the auto text to be linked, but we still
249 // want to show any [[ ]]
250 $sectionText = str_replace( '[[', '&#91;[', $auto );
251
252 $section = substr( Parser::guessSectionNameFromStrippedText( $section ), 1 );
253 if ( $section !== '' ) {
254 if ( $samePage ) {
255 $sectionTitle = new TitleValue( NS_MAIN, '', $section );
256 } else {
257 $sectionTitle = $selfLinkTarget->createFragmentTarget( $section );
258 }
259 $auto = $this->makeSectionLink(
260 $sectionTitle,
261 $this->userLang->getArrow() . $this->userLang->getDirMark() . $sectionText,
262 $wikiId
263 );
264 }
265 }
266 if ( $pre ) {
267 # written summary $presep autocomment (summary /* section */)
268 $pre = wfMessage( 'autocomment-prefix' )->inContentLanguage()->escaped();
269 }
270 if ( $post ) {
271 # autocomment $postsep written summary (/* section */ summary)
272 $auto .= wfMessage( 'colon-separator' )->inContentLanguage()->escaped();
273 }
274 if ( $auto ) {
275 $auto = '<span dir="auto"><span class="autocomment">' . $auto . '</span>';
276 $append .= '</span>';
277 }
278 $comment = $pre . $auto;
279 return $comment;
280 },
281 $comment
282 );
283 return $comment . $append;
284 }
285
297 private function makeSectionLink(
298 LinkTarget $target, $text, $wikiId
299 ) {
300 if ( $wikiId !== null && $wikiId !== false && !$target->isExternal() ) {
301 return Linker::makeExternalLink(
302 WikiMap::getForeignURL(
303 $wikiId,
304 $target->getNamespace() === 0
305 ? $target->getDBkey()
306 : $this->namespaceInfo->getCanonicalName( $target->getNamespace() ) .
307 ':' . $target->getDBkey(),
308 $target->getFragment()
309 ),
310 $text,
311 /* escape = */ false // Already escaped
312 );
313 }
314 return $this->linkRenderer->makePreloadedLink( $target, new HtmlArmor( $text ), '' );
315 }
316
335 private function doWikiLinks( $comment, $selfLinkTarget = null, $samePage = false, $wikiId = false ) {
336 return preg_replace_callback(
337 '/
338 \[\[
339 \s*+ # ignore leading whitespace, the *+ quantifier disallows backtracking
340 :? # ignore optional leading colon
341 ([^[\]|]+) # 1. link target; page names cannot include [, ] or |
342 (?:\|
343 # 2. link text
344 # Stop matching at ]] without relying on backtracking.
345 ((?:]?[^\]])*+)
346 )?
347 \]\]
348 ([^[]*) # 3. link trail (the text up until the next link)
349 /x',
350 function ( $match ) use ( $selfLinkTarget, $samePage, $wikiId ) {
351 $medians = '(?:';
352 $medians .= preg_quote(
353 $this->namespaceInfo->getCanonicalName( NS_MEDIA ), '/' );
354 $medians .= '|';
355 $medians .= preg_quote(
356 $this->contLang->getNsText( NS_MEDIA ),
357 '/'
358 ) . '):';
359
360 $comment = $match[0];
361
362 // Fix up urlencoded title texts (copied from Parser::replaceInternalLinks)
363 if ( strpos( $match[1], '%' ) !== false ) {
364 $match[1] = strtr(
365 rawurldecode( $match[1] ),
366 [ '<' => '&lt;', '>' => '&gt;' ]
367 );
368 }
369
370 // Handle link renaming [[foo|text]] will show link as "text"
371 if ( $match[2] != "" ) {
372 $text = $match[2];
373 } else {
374 $text = $match[1];
375 }
376 $submatch = [];
377 $linkMarker = null;
378 if ( preg_match( '/^' . $medians . '(.*)$/i', $match[1], $submatch ) ) {
379 // Media link; trail not supported.
380 $linkRegexp = '/\[\[(.*?)\]\]/';
381 $linkTarget = $this->titleParser->makeTitleValueSafe( NS_FILE, $submatch[1] );
382 if ( $linkTarget ) {
383 $linkMarker = $this->addFileLink( $linkTarget, $text );
384 }
385 } else {
386 // Other kind of link
387 // Make sure its target is non-empty
388 if ( isset( $match[1][0] ) && $match[1][0] == ':' ) {
389 $match[1] = substr( $match[1], 1 );
390 }
391 // @phan-suppress-next-line PhanTypePossiblyInvalidDimOffset False positive
392 if ( $match[1] !== false && $match[1] !== null && $match[1] !== '' ) {
393 if ( preg_match(
394 $this->contLang->linkTrail(),
395 $match[3],
396 $submatch
397 ) ) {
398 $trail = $submatch[1];
399 } else {
400 $trail = "";
401 }
402 $linkRegexp = '/\[\[(.*?)\]\]' . preg_quote( $trail, '/' ) . '/';
403 [ $inside, $trail ] = Linker::splitTrail( $trail );
404
405 $linkText = $text;
406 $linkTarget = Linker::normalizeSubpageLink( $selfLinkTarget, $match[1], $linkText );
407
408 try {
409 $target = $this->titleParser->parseTitle( $linkTarget );
410
411 if ( $target->getText() == '' && !$target->isExternal()
412 && !$samePage && $selfLinkTarget
413 ) {
414 $target = $selfLinkTarget->createFragmentTarget( $target->getFragment() );
415 }
416
417 $linkMarker = $this->addPageLink( $target, $linkText . $inside, $wikiId );
418 $linkMarker .= $trail;
419 } catch ( MalformedTitleException $e ) {
420 // Fall through
421 }
422 }
423 }
424 if ( $linkMarker ) {
425 // If the link is still valid, go ahead and replace it in!
426 $comment = preg_replace(
427 // @phan-suppress-next-next-line PhanPossiblyUndeclaredVariable linkRegexp set when used
428 // @phan-suppress-next-line PhanTypeMismatchArgumentNullableInternal linkRegexp set when used
429 $linkRegexp,
430 StringUtils::escapeRegexReplacement( $linkMarker ),
431 $comment,
432 1
433 );
434 }
435
436 return $comment;
437 },
438 $comment
439 );
440 }
441
448 private function addLinkMarker( $callback ) {
449 $nextId = count( $this->links );
450 if ( strlen( (string)$nextId ) > self::MAX_ID_SIZE ) {
451 throw new \RuntimeException( 'Too many links in comment batch' );
452 }
453 $this->links[] = $callback;
454 return sprintf( self::MARKER_PREFIX . "%0" . self::MAX_ID_SIZE . 'd', $nextId );
455 }
456
466 private function addPageLink( LinkTarget $target, $text, $wikiId ) {
467 if ( $wikiId !== null && $wikiId !== false && !$target->isExternal() ) {
468 // Handle links from a foreign wiki ID
469 return Linker::makeExternalLink(
470 WikiMap::getForeignURL(
471 $wikiId,
472 $target->getNamespace() === 0
473 ? $target->getDBkey()
474 : $this->namespaceInfo->getCanonicalName( $target->getNamespace() ) .
475 ':' . $target->getDBkey(),
476 $target->getFragment()
477 ),
478 $text,
479 /* escape = */ false // Already escaped
480 );
481 } elseif ( $this->linkCache->getGoodLinkID( $target ) ||
482 Title::newFromLinkTarget( $target )->isAlwaysKnown()
483 ) {
484 // Already known
485 return $this->linkRenderer->makeKnownLink( $target, new HtmlArmor( $text ) );
486 } elseif ( $this->linkCache->isBadLink( $target ) ) {
487 // Already cached as unknown
488 return $this->linkRenderer->makeBrokenLink( $target, new HtmlArmor( $text ) );
489 }
490
491 // Defer page link
492 if ( !$this->linkBatch ) {
493 $this->linkBatch = $this->linkBatchFactory->newLinkBatch();
494 $this->linkBatch->setCaller( __METHOD__ );
495 }
496 $this->linkBatch->addObj( $target );
497 return $this->addLinkMarker( function () use ( $target, $text ) {
498 return $this->linkRenderer->makeLink( $target, new HtmlArmor( $text ) );
499 } );
500 }
501
509 private function addFileLink( LinkTarget $target, $html ) {
510 $this->fileBatch[] = [
511 'title' => $target
512 ];
513 return $this->addLinkMarker( function () use ( $target, $html ) {
514 return Linker::makeMediaLinkFile(
515 $target,
516 $this->files[$target->getDBkey()] ?? false,
517 $html
518 );
519 } );
520 }
521
525 private function flushLinkBatches() {
526 if ( $this->linkBatch ) {
527 $this->linkBatch->execute();
528 $this->linkBatch = null;
529 }
530 if ( $this->fileBatch ) {
531 $this->files += $this->repoGroup->findFiles( $this->fileBatch );
532 $this->fileBatch = [];
533 }
534 }
535
536}
const NS_FILE
Definition Defines.php:70
const NS_MAIN
Definition Defines.php:64
const NS_MEDIA
Definition Defines.php:52
wfMessage( $key,... $params)
This is the function for getting translated interface messages.
Implements some public methods and some protected utility functions which are required by multiple ch...
Definition File.php:68
Marks HTML that shouldn't be escaped.
Definition HtmlArmor.php:30
Base class for language-specific code.
Definition Language.php:56
Class representing a list of titles The execute() method checks them all for existence and adds them ...
Definition LinkBatch.php:44
Cache for article titles (prefixed DB keys) and ids linked from one source.
Definition LinkCache.php:42
MalformedTitleException is thrown when a TitleParser is unable to parse a title string.
The text processing backend for CommentFormatter.
__construct(LinkRenderer $linkRenderer, LinkBatchFactory $linkBatchFactory, LinkCache $linkCache, RepoGroup $repoGroup, Language $userLang, Language $contLang, TitleParser $titleParser, NamespaceInfo $namespaceInfo, HookContainer $hookContainer)
finalize( $comments)
Execute pending batch queries and replace markers in the specified string(s) with actual links.
preprocessUnsafe( $comment, LinkTarget $selfLinkTarget=null, $samePage=false, $wikiId=false, $enableSectionLinks=true)
Convert a comment in pseudo-HTML format to HTML, replacing links with markers.
preprocess(string $comment, LinkTarget $selfLinkTarget=null, $samePage=false, $wikiId=false, $enableSectionLinks=true)
Convert a comment to HTML, but replace links with markers which are resolved later.
This class provides an implementation of the core hook interfaces, forwarding hook calls to HookConta...
Class that generates HTML for internal links.
Some internal bits split of from Skin.php.
Definition Linker.php:67
castFromLinkTarget(?LinkTarget $linkTarget)
newFromLinkTarget(LinkTarget $linkTarget, $forceClone='')
Represents a title within MediaWiki.
Definition Title.php:82
Helper tools for dealing with other locally-hosted wikis.
Definition WikiMap.php:33
This is a utility class for dealing with namespaces that encodes all the "magic" behaviors of them ba...
PHP Parser - Processes wiki markup (which uses a more user-friendly syntax, such as "[[link]]" for ma...
Definition Parser.php:107
static guessSectionNameFromStrippedText( $text)
Like guessSectionNameFromWikiText(), but takes already-stripped text as input.
Definition Parser.php:6209
Prioritized list of file repositories.
Definition RepoGroup.php:30
A collection of static methods to play with strings.
Represents a page (or page fragment) title within MediaWiki.
A title parser service for MediaWiki.