MediaWiki  master
CommentParser.php
Go to the documentation of this file.
1 <?php
2 
4 
5 use File;
6 use HtmlArmor;
7 use Language;
8 use LinkBatch;
9 use LinkCache;
19 use NamespaceInfo;
20 use Parser;
21 use RepoGroup;
22 use StringUtils;
23 use TitleParser;
24 use TitleValue;
25 
36  private $linkRenderer;
38  private $linkBatchFactory;
40  private $repoGroup;
42  private $userLang;
44  private $contLang;
46  private $titleParser;
48  private $namespaceInfo;
50  private $hookRunner;
52  private $linkCache;
53 
55  private $links = [];
57  private $linkBatch;
58 
60  private $fileBatch;
62  private $files = [];
63 
65  private const MAX_ID_SIZE = 7;
66 
78  public function __construct(
79  LinkRenderer $linkRenderer,
80  LinkBatchFactory $linkBatchFactory,
81  LinkCache $linkCache,
82  RepoGroup $repoGroup,
83  Language $userLang,
84  Language $contLang,
85  TitleParser $titleParser,
86  NamespaceInfo $namespaceInfo,
87  HookContainer $hookContainer
88  ) {
89  $this->linkRenderer = $linkRenderer;
90  $this->linkBatchFactory = $linkBatchFactory;
91  $this->linkCache = $linkCache;
92  $this->repoGroup = $repoGroup;
93  $this->userLang = $userLang;
94  $this->contLang = $contLang;
95  $this->titleParser = $titleParser;
96  $this->namespaceInfo = $namespaceInfo;
97  $this->hookRunner = new HookRunner( $hookContainer );
98  }
99 
111  public function preprocess( string $comment, LinkTarget $selfLinkTarget = null,
112  $samePage = false, $wikiId = false, $enableSectionLinks = true
113  ) {
114  return $this->preprocessInternal( $comment, false, $selfLinkTarget,
115  $samePage, $wikiId, $enableSectionLinks );
116  }
117 
128  public function preprocessUnsafe( $comment, LinkTarget $selfLinkTarget = null,
129  $samePage = false, $wikiId = false, $enableSectionLinks = true
130  ) {
131  return $this->preprocessInternal( $comment, true, $selfLinkTarget,
132  $samePage, $wikiId, $enableSectionLinks );
133  }
134 
142  public function finalize( $comments ) {
143  $this->flushLinkBatches();
144  return preg_replace_callback(
145  '/\x1b([0-9]{' . self::MAX_ID_SIZE . '})/',
146  function ( $m ) {
147  $callback = $this->links[(int)$m[1]] ?? null;
148  if ( $callback ) {
149  return $callback();
150  } else {
151  return '<!-- MISSING -->';
152  }
153  },
154  $comments
155  );
156  }
157 
167  private function preprocessInternal( $comment, $unsafe, $selfLinkTarget, $samePage, $wikiId,
168  $enableSectionLinks
169  ) {
170  // Sanitize text a bit
171  // \x1b needs to be stripped because it is used for link markers
172  $comment = strtr( $comment, "\n\x1b", " " );
173  // Allow HTML entities (for T15815)
174  if ( !$unsafe ) {
175  $comment = \Sanitizer::escapeHtmlAllowEntities( $comment );
176  }
177  if ( $enableSectionLinks ) {
178  $comment = $this->doSectionLinks( $comment, $selfLinkTarget, $samePage, $wikiId );
179  }
180  return $this->doWikiLinks( $comment, $selfLinkTarget, $samePage, $wikiId );
181  }
182 
199  private function doSectionLinks(
200  $comment,
201  $selfLinkTarget = null,
202  $samePage = false,
203  $wikiId = false
204  ) {
205  // @todo $append here is something of a hack to preserve the status
206  // quo. Someone who knows more about bidi and such should decide
207  // (1) what sensible rendering even *is* for an LTR edit summary on an RTL
208  // wiki, both when autocomments exist and when they don't, and
209  // (2) what markup will make that actually happen.
210  $append = '';
211  $comment = preg_replace_callback(
212  // To detect the presence of content before or after the
213  // auto-comment, we use capturing groups inside optional zero-width
214  // assertions. But older versions of PCRE can't directly make
215  // zero-width assertions optional, so wrap them in a non-capturing
216  // group.
217  '!(?:(?<=(.)))?/\*\s*(.*?)\s*\*/(?:(?=(.)))?!',
218  function ( $match ) use ( &$append, $selfLinkTarget, $samePage, $wikiId ) {
219  // Ensure all match positions are defined
220  $match += [ '', '', '', '' ];
221 
222  $pre = $match[1] !== '';
223  $auto = $match[2];
224  $post = $match[3] !== '';
225  $comment = null;
226 
227  $this->hookRunner->onFormatAutocomments(
228  $comment, $pre, $auto, $post,
229  Title::castFromLinkTarget( $selfLinkTarget ),
230  $samePage,
231  $wikiId );
232  if ( $comment !== null ) {
233  return $comment;
234  }
235 
236  if ( $selfLinkTarget ) {
237  $section = $auto;
238  # Remove links that a user may have manually put in the autosummary
239  # This could be improved by copying as much of Parser::stripSectionName as desired.
240  $section = str_replace( [
241  '[[:',
242  '[[',
243  ']]'
244  ], '', $section );
245 
246  // We don't want any links in the auto text to be linked, but we still
247  // want to show any [[ ]]
248  $sectionText = str_replace( '[[', '&#91;[', $auto );
249 
250  $section = substr( Parser::guessSectionNameFromStrippedText( $section ), 1 );
251  if ( $section !== '' ) {
252  if ( $samePage ) {
253  $sectionTitle = new TitleValue( NS_MAIN, '', $section );
254  } else {
255  $sectionTitle = $selfLinkTarget->createFragmentTarget( $section );
256  }
257  $auto = $this->makeSectionLink(
258  $sectionTitle,
259  $this->userLang->getArrow() . $this->userLang->getDirMark() . $sectionText,
260  $wikiId
261  );
262  }
263  }
264  if ( $pre ) {
265  # written summary $presep autocomment (summary /* section */)
266  $pre = wfMessage( 'autocomment-prefix' )->inContentLanguage()->escaped();
267  }
268  if ( $post ) {
269  # autocomment $postsep written summary (/* section */ summary)
270  $auto .= wfMessage( 'colon-separator' )->inContentLanguage()->escaped();
271  }
272  if ( $auto ) {
273  $auto = '<span dir="auto"><span class="autocomment">' . $auto . '</span>';
274  $append .= '</span>';
275  }
276  $comment = $pre . $auto;
277  return $comment;
278  },
279  $comment
280  );
281  return $comment . $append;
282  }
283 
295  private function makeSectionLink(
296  LinkTarget $target, $text, $wikiId
297  ) {
298  if ( $wikiId !== null && $wikiId !== false && !$target->isExternal() ) {
301  $wikiId,
302  $target->getNamespace() === 0
303  ? $target->getDBkey()
304  : $this->namespaceInfo->getCanonicalName( $target->getNamespace() ) .
305  ':' . $target->getDBkey(),
306  $target->getFragment()
307  ),
308  $text,
309  /* escape = */ false // Already escaped
310  );
311  }
312  return $this->linkRenderer->makePreloadedLink( $target, new HtmlArmor( $text ), '' );
313  }
314 
333  private function doWikiLinks( $comment, $selfLinkTarget = null, $samePage = false, $wikiId = false ) {
334  return preg_replace_callback(
335  '/
336  \[\[
337  \s*+ # ignore leading whitespace, the *+ quantifier disallows backtracking
338  :? # ignore optional leading colon
339  ([^[\]|]+) # 1. link target; page names cannot include [, ] or |
340  (?:\|
341  # 2. link text
342  # Stop matching at ]] without relying on backtracking.
343  ((?:]?[^\]])*+)
344  )?
345  \]\]
346  ([^[]*) # 3. link trail (the text up until the next link)
347  /x',
348  function ( $match ) use ( $selfLinkTarget, $samePage, $wikiId ) {
349  $medians = '(?:';
350  $medians .= preg_quote(
351  $this->namespaceInfo->getCanonicalName( NS_MEDIA ), '/' );
352  $medians .= '|';
353  $medians .= preg_quote(
354  $this->contLang->getNsText( NS_MEDIA ),
355  '/'
356  ) . '):';
357 
358  $comment = $match[0];
359 
360  // Fix up urlencoded title texts (copied from Parser::replaceInternalLinks)
361  if ( strpos( $match[1], '%' ) !== false ) {
362  $match[1] = strtr(
363  rawurldecode( $match[1] ),
364  [ '<' => '&lt;', '>' => '&gt;' ]
365  );
366  }
367 
368  // Handle link renaming [[foo|text]] will show link as "text"
369  if ( $match[2] != "" ) {
370  $text = $match[2];
371  } else {
372  $text = $match[1];
373  }
374  $submatch = [];
375  $linkMarker = null;
376  if ( preg_match( '/^' . $medians . '(.*)$/i', $match[1], $submatch ) ) {
377  // Media link; trail not supported.
378  $linkRegexp = '/\[\[(.*?)\]\]/';
379  $linkTarget = $this->titleParser->makeTitleValueSafe( NS_FILE, $submatch[1] );
380  if ( $linkTarget ) {
381  $linkMarker = $this->addFileLink( $linkTarget, $text );
382  }
383  } else {
384  // Other kind of link
385  // Make sure its target is non-empty
386  if ( isset( $match[1][0] ) && $match[1][0] == ':' ) {
387  $match[1] = substr( $match[1], 1 );
388  }
389  // @phan-suppress-next-line PhanTypePossiblyInvalidDimOffset False positive
390  if ( $match[1] !== false && $match[1] !== null && $match[1] !== '' ) {
391  if ( preg_match(
392  $this->contLang->linkTrail(),
393  $match[3],
394  $submatch
395  ) ) {
396  $trail = $submatch[1];
397  } else {
398  $trail = "";
399  }
400  $linkRegexp = '/\[\[(.*?)\]\]' . preg_quote( $trail, '/' ) . '/';
401  [ $inside, $trail ] = Linker::splitTrail( $trail );
402 
403  $linkText = $text;
404  $linkTarget = Linker::normalizeSubpageLink( $selfLinkTarget, $match[1], $linkText );
405 
406  try {
407  $target = $this->titleParser->parseTitle( $linkTarget );
408 
409  if ( $target->getText() == '' && !$target->isExternal()
410  && !$samePage && $selfLinkTarget
411  ) {
412  $target = $selfLinkTarget->createFragmentTarget( $target->getFragment() );
413  }
414 
415  $linkMarker = $this->addPageLink( $target, $linkText . $inside, $wikiId );
416  $linkMarker .= $trail;
417  } catch ( MalformedTitleException $e ) {
418  // Fall through
419  }
420  }
421  }
422  if ( $linkMarker ) {
423  // If the link is still valid, go ahead and replace it in!
424  $comment = preg_replace(
425  // @phan-suppress-next-next-line PhanPossiblyUndeclaredVariable linkRegexp set when used
426  // @phan-suppress-next-line PhanTypeMismatchArgumentNullableInternal linkRegexp set when used
427  $linkRegexp,
429  $comment,
430  1
431  );
432  }
433 
434  return $comment;
435  },
436  $comment
437  );
438  }
439 
446  private function addLinkMarker( $callback ) {
447  $nextId = count( $this->links );
448  if ( strlen( (string)$nextId ) > self::MAX_ID_SIZE ) {
449  throw new \RuntimeException( 'Too many links in comment batch' );
450  }
451  $this->links[] = $callback;
452  return sprintf( "\x1b%0" . self::MAX_ID_SIZE . 'd', $nextId );
453  }
454 
464  private function addPageLink( LinkTarget $target, $text, $wikiId ) {
465  if ( $wikiId !== null && $wikiId !== false && !$target->isExternal() ) {
466  // Handle links from a foreign wiki ID
469  $wikiId,
470  $target->getNamespace() === 0
471  ? $target->getDBkey()
472  : $this->namespaceInfo->getCanonicalName( $target->getNamespace() ) .
473  ':' . $target->getDBkey(),
474  $target->getFragment()
475  ),
476  $text,
477  /* escape = */ false // Already escaped
478  );
479  } elseif ( $this->linkCache->getGoodLinkID( $target ) ||
480  Title::newFromLinkTarget( $target )->isAlwaysKnown()
481  ) {
482  // Already known
483  return $this->linkRenderer->makeKnownLink( $target, new HtmlArmor( $text ) );
484  } elseif ( $this->linkCache->isBadLink( $target ) ) {
485  // Already cached as unknown
486  return $this->linkRenderer->makeBrokenLink( $target, new HtmlArmor( $text ) );
487  }
488 
489  // Defer page link
490  if ( !$this->linkBatch ) {
491  $this->linkBatch = $this->linkBatchFactory->newLinkBatch();
492  $this->linkBatch->setCaller( __METHOD__ );
493  }
494  $this->linkBatch->addObj( $target );
495  return $this->addLinkMarker( function () use ( $target, $text ) {
496  return $this->linkRenderer->makeLink( $target, new HtmlArmor( $text ) );
497  } );
498  }
499 
507  private function addFileLink( LinkTarget $target, $html ) {
508  $this->fileBatch[] = [
509  'title' => $target
510  ];
511  return $this->addLinkMarker( function () use ( $target, $html ) {
513  $target,
514  $this->files[$target->getDBkey()] ?? false,
515  $html
516  );
517  } );
518  }
519 
523  private function flushLinkBatches() {
524  if ( $this->linkBatch ) {
525  $this->linkBatch->execute();
526  $this->linkBatch = null;
527  }
528  if ( $this->fileBatch ) {
529  $this->files += $this->repoGroup->findFiles( $this->fileBatch );
530  $this->fileBatch = [];
531  }
532  }
533 
534 }
const NS_FILE
Definition: Defines.php:70
const NS_MAIN
Definition: Defines.php:64
const NS_MEDIA
Definition: Defines.php:52
wfMessage( $key,... $params)
This is the function for getting translated interface messages.
Implements some public methods and some protected utility functions which are required by multiple ch...
Definition: File.php:68
Marks HTML that shouldn't be escaped.
Definition: HtmlArmor.php:30
Base class for language-specific code.
Definition: Language.php:56
Class representing a list of titles The execute() method checks them all for existence and adds them ...
Definition: LinkBatch.php:44
Cache for article titles (prefixed DB keys) and ids linked from one source.
Definition: LinkCache.php:42
MalformedTitleException is thrown when a TitleParser is unable to parse a title string.
The text processing backend for CommentFormatter.
__construct(LinkRenderer $linkRenderer, LinkBatchFactory $linkBatchFactory, LinkCache $linkCache, RepoGroup $repoGroup, Language $userLang, Language $contLang, TitleParser $titleParser, NamespaceInfo $namespaceInfo, HookContainer $hookContainer)
finalize( $comments)
Execute pending batch queries and replace markers in the specified string(s) with actual links.
preprocessUnsafe( $comment, LinkTarget $selfLinkTarget=null, $samePage=false, $wikiId=false, $enableSectionLinks=true)
Convert a comment in pseudo-HTML format to HTML, replacing links with markers.
preprocess(string $comment, LinkTarget $selfLinkTarget=null, $samePage=false, $wikiId=false, $enableSectionLinks=true)
Convert a comment to HTML, but replace links with markers which are resolved later.
This class provides an implementation of the core hook interfaces, forwarding hook calls to HookConta...
Definition: HookRunner.php:568
Class that generates HTML for internal links.
Some internal bits split of from Skin.php.
Definition: Linker.php:65
static normalizeSubpageLink( $contextTitle, $target, &$text)
Definition: Linker.php:1521
static makeExternalLink( $url, $text, $escape=true, $linktype='', $attribs=[], $title=null)
Make an external link.
Definition: Linker.php:1066
static makeMediaLinkFile(LinkTarget $title, $file, $html='')
Create a direct link to a given uploaded file.
Definition: Linker.php:977
static splitTrail( $trail)
Split a link trail, return the "inside" portion and the remainder of the trail as a two-element array...
Definition: Linker.php:1845
Represents a title within MediaWiki.
Definition: Title.php:82
static newFromLinkTarget(LinkTarget $linkTarget, $forceClone='')
Returns a Title given a LinkTarget.
Definition: Title.php:315
static castFromLinkTarget( $linkTarget)
Same as newFromLinkTarget, but if passed null, returns null.
Definition: Title.php:339
Helper tools for dealing with other locally-hosted wikis.
Definition: WikiMap.php:33
static getForeignURL( $wikiID, $page, $fragmentId=null)
Convenience to get a url to a page on a foreign wiki.
Definition: WikiMap.php:171
This is a utility class for dealing with namespaces that encodes all the "magic" behaviors of them ba...
PHP Parser - Processes wiki markup (which uses a more user-friendly syntax, such as "[[link]]" for ma...
Definition: Parser.php:107
static guessSectionNameFromStrippedText( $text)
Like guessSectionNameFromWikiText(), but takes already-stripped text as input.
Definition: Parser.php:6201
Prioritized list of file repositories.
Definition: RepoGroup.php:30
static escapeHtmlAllowEntities( $html)
Given HTML input, escape with htmlspecialchars but un-escape entities.
Definition: Sanitizer.php:1122
A collection of static methods to play with strings.
Definition: StringUtils.php:29
static escapeRegexReplacement( $string)
Escape a string to make it suitable for inclusion in a preg_replace() replacement parameter.
Represents a page (or page fragment) title within MediaWiki.
Definition: TitleValue.php:40
A title parser service for MediaWiki.
Definition: TitleParser.php:33