MediaWiki  master
CommentParser.php
Go to the documentation of this file.
1 <?php
2 
4 
5 use File;
6 use HtmlArmor;
7 use Language;
8 use LinkBatch;
9 use LinkCache;
23 use Parser;
24 use RepoGroup;
25 use StringUtils;
26 
37  private $linkRenderer;
39  private $linkBatchFactory;
41  private $repoGroup;
43  private $userLang;
45  private $contLang;
47  private $titleParser;
49  private $namespaceInfo;
51  private $hookRunner;
53  private $linkCache;
54 
56  private $links = [];
58  private $linkBatch;
59 
61  private $fileBatch;
63  private $files = [];
64 
66  private const MAX_ID_SIZE = 7;
67 
79  public function __construct(
80  LinkRenderer $linkRenderer,
81  LinkBatchFactory $linkBatchFactory,
82  LinkCache $linkCache,
83  RepoGroup $repoGroup,
84  Language $userLang,
85  Language $contLang,
86  TitleParser $titleParser,
87  NamespaceInfo $namespaceInfo,
88  HookContainer $hookContainer
89  ) {
90  $this->linkRenderer = $linkRenderer;
91  $this->linkBatchFactory = $linkBatchFactory;
92  $this->linkCache = $linkCache;
93  $this->repoGroup = $repoGroup;
94  $this->userLang = $userLang;
95  $this->contLang = $contLang;
96  $this->titleParser = $titleParser;
97  $this->namespaceInfo = $namespaceInfo;
98  $this->hookRunner = new HookRunner( $hookContainer );
99  }
100 
112  public function preprocess( string $comment, LinkTarget $selfLinkTarget = null,
113  $samePage = false, $wikiId = false, $enableSectionLinks = true
114  ) {
115  return $this->preprocessInternal( $comment, false, $selfLinkTarget,
116  $samePage, $wikiId, $enableSectionLinks );
117  }
118 
129  public function preprocessUnsafe( $comment, LinkTarget $selfLinkTarget = null,
130  $samePage = false, $wikiId = false, $enableSectionLinks = true
131  ) {
132  return $this->preprocessInternal( $comment, true, $selfLinkTarget,
133  $samePage, $wikiId, $enableSectionLinks );
134  }
135 
143  public function finalize( $comments ) {
144  $this->flushLinkBatches();
145  return preg_replace_callback(
146  '/\x1b([0-9]{' . self::MAX_ID_SIZE . '})/',
147  function ( $m ) {
148  $callback = $this->links[(int)$m[1]] ?? null;
149  if ( $callback ) {
150  return $callback();
151  } else {
152  return '<!-- MISSING -->';
153  }
154  },
155  $comments
156  );
157  }
158 
168  private function preprocessInternal( $comment, $unsafe, $selfLinkTarget, $samePage, $wikiId,
169  $enableSectionLinks
170  ) {
171  // Sanitize text a bit
172  // \x1b needs to be stripped because it is used for link markers
173  $comment = strtr( $comment, "\n\x1b", " " );
174  // Allow HTML entities (for T15815)
175  if ( !$unsafe ) {
176  $comment = Sanitizer::escapeHtmlAllowEntities( $comment );
177  }
178  if ( $enableSectionLinks ) {
179  $comment = $this->doSectionLinks( $comment, $selfLinkTarget, $samePage, $wikiId );
180  }
181  return $this->doWikiLinks( $comment, $selfLinkTarget, $samePage, $wikiId );
182  }
183 
200  private function doSectionLinks(
201  $comment,
202  $selfLinkTarget = null,
203  $samePage = false,
204  $wikiId = false
205  ) {
206  // @todo $append here is something of a hack to preserve the status
207  // quo. Someone who knows more about bidi and such should decide
208  // (1) what sensible rendering even *is* for an LTR edit summary on an RTL
209  // wiki, both when autocomments exist and when they don't, and
210  // (2) what markup will make that actually happen.
211  $append = '';
212  $comment = preg_replace_callback(
213  // To detect the presence of content before or after the
214  // auto-comment, we use capturing groups inside optional zero-width
215  // assertions. But older versions of PCRE can't directly make
216  // zero-width assertions optional, so wrap them in a non-capturing
217  // group.
218  '!(?:(?<=(.)))?/\*\s*(.*?)\s*\*/(?:(?=(.)))?!',
219  function ( $match ) use ( &$append, $selfLinkTarget, $samePage, $wikiId ) {
220  // Ensure all match positions are defined
221  $match += [ '', '', '', '' ];
222 
223  $pre = $match[1] !== '';
224  $auto = $match[2];
225  $post = $match[3] !== '';
226  $comment = null;
227 
228  $this->hookRunner->onFormatAutocomments(
229  $comment, $pre, $auto, $post,
230  Title::castFromLinkTarget( $selfLinkTarget ),
231  $samePage,
232  $wikiId );
233  if ( $comment !== null ) {
234  return $comment;
235  }
236 
237  if ( $selfLinkTarget ) {
238  $section = $auto;
239  # Remove links that a user may have manually put in the autosummary
240  # This could be improved by copying as much of Parser::stripSectionName as desired.
241  $section = str_replace( [
242  '[[:',
243  '[[',
244  ']]'
245  ], '', $section );
246 
247  // We don't want any links in the auto text to be linked, but we still
248  // want to show any [[ ]]
249  $sectionText = str_replace( '[[', '&#91;[', $auto );
250 
251  $section = substr( Parser::guessSectionNameFromStrippedText( $section ), 1 );
252  if ( $section !== '' ) {
253  if ( $samePage ) {
254  $sectionTitle = new TitleValue( NS_MAIN, '', $section );
255  } else {
256  $sectionTitle = $selfLinkTarget->createFragmentTarget( $section );
257  }
258  $auto = $this->makeSectionLink(
259  $sectionTitle,
260  $this->userLang->getArrow() . $this->userLang->getDirMark() . $sectionText,
261  $wikiId
262  );
263  }
264  }
265  if ( $pre ) {
266  # written summary $presep autocomment (summary /* section */)
267  $pre = wfMessage( 'autocomment-prefix' )->inContentLanguage()->escaped();
268  }
269  if ( $post ) {
270  # autocomment $postsep written summary (/* section */ summary)
271  $auto .= wfMessage( 'colon-separator' )->inContentLanguage()->escaped();
272  }
273  if ( $auto ) {
274  $auto = '<span dir="auto"><span class="autocomment">' . $auto . '</span>';
275  $append .= '</span>';
276  }
277  $comment = $pre . $auto;
278  return $comment;
279  },
280  $comment
281  );
282  return $comment . $append;
283  }
284 
296  private function makeSectionLink(
297  LinkTarget $target, $text, $wikiId
298  ) {
299  if ( $wikiId !== null && $wikiId !== false && !$target->isExternal() ) {
302  $wikiId,
303  $target->getNamespace() === 0
304  ? $target->getDBkey()
305  : $this->namespaceInfo->getCanonicalName( $target->getNamespace() ) .
306  ':' . $target->getDBkey(),
307  $target->getFragment()
308  ),
309  $text,
310  /* escape = */ false // Already escaped
311  );
312  }
313  return $this->linkRenderer->makePreloadedLink( $target, new HtmlArmor( $text ), '' );
314  }
315 
334  private function doWikiLinks( $comment, $selfLinkTarget = null, $samePage = false, $wikiId = false ) {
335  return preg_replace_callback(
336  '/
337  \[\[
338  \s*+ # ignore leading whitespace, the *+ quantifier disallows backtracking
339  :? # ignore optional leading colon
340  ([^[\]|]+) # 1. link target; page names cannot include [, ] or |
341  (?:\|
342  # 2. link text
343  # Stop matching at ]] without relying on backtracking.
344  ((?:]?[^\]])*+)
345  )?
346  \]\]
347  ([^[]*) # 3. link trail (the text up until the next link)
348  /x',
349  function ( $match ) use ( $selfLinkTarget, $samePage, $wikiId ) {
350  $medians = '(?:';
351  $medians .= preg_quote(
352  $this->namespaceInfo->getCanonicalName( NS_MEDIA ), '/' );
353  $medians .= '|';
354  $medians .= preg_quote(
355  $this->contLang->getNsText( NS_MEDIA ),
356  '/'
357  ) . '):';
358 
359  $comment = $match[0];
360 
361  // Fix up urlencoded title texts (copied from Parser::replaceInternalLinks)
362  if ( strpos( $match[1], '%' ) !== false ) {
363  $match[1] = strtr(
364  rawurldecode( $match[1] ),
365  [ '<' => '&lt;', '>' => '&gt;' ]
366  );
367  }
368 
369  // Handle link renaming [[foo|text]] will show link as "text"
370  if ( $match[2] != "" ) {
371  $text = $match[2];
372  } else {
373  $text = $match[1];
374  }
375  $submatch = [];
376  $linkMarker = null;
377  if ( preg_match( '/^' . $medians . '(.*)$/i', $match[1], $submatch ) ) {
378  // Media link; trail not supported.
379  $linkRegexp = '/\[\[(.*?)\]\]/';
380  $linkTarget = $this->titleParser->makeTitleValueSafe( NS_FILE, $submatch[1] );
381  if ( $linkTarget ) {
382  $linkMarker = $this->addFileLink( $linkTarget, $text );
383  }
384  } else {
385  // Other kind of link
386  // Make sure its target is non-empty
387  if ( isset( $match[1][0] ) && $match[1][0] == ':' ) {
388  $match[1] = substr( $match[1], 1 );
389  }
390  // @phan-suppress-next-line PhanTypePossiblyInvalidDimOffset False positive
391  if ( $match[1] !== false && $match[1] !== null && $match[1] !== '' ) {
392  if ( preg_match(
393  $this->contLang->linkTrail(),
394  $match[3],
395  $submatch
396  ) ) {
397  $trail = $submatch[1];
398  } else {
399  $trail = "";
400  }
401  $linkRegexp = '/\[\[(.*?)\]\]' . preg_quote( $trail, '/' ) . '/';
402  [ $inside, $trail ] = Linker::splitTrail( $trail );
403 
404  $linkText = $text;
405  $linkTarget = Linker::normalizeSubpageLink( $selfLinkTarget, $match[1], $linkText );
406 
407  try {
408  $target = $this->titleParser->parseTitle( $linkTarget );
409 
410  if ( $target->getText() == '' && !$target->isExternal()
411  && !$samePage && $selfLinkTarget
412  ) {
413  $target = $selfLinkTarget->createFragmentTarget( $target->getFragment() );
414  }
415 
416  $linkMarker = $this->addPageLink( $target, $linkText . $inside, $wikiId );
417  $linkMarker .= $trail;
418  } catch ( MalformedTitleException $e ) {
419  // Fall through
420  }
421  }
422  }
423  if ( $linkMarker ) {
424  // If the link is still valid, go ahead and replace it in!
425  $comment = preg_replace(
426  // @phan-suppress-next-next-line PhanPossiblyUndeclaredVariable linkRegexp set when used
427  // @phan-suppress-next-line PhanTypeMismatchArgumentNullableInternal linkRegexp set when used
428  $linkRegexp,
430  $comment,
431  1
432  );
433  }
434 
435  return $comment;
436  },
437  $comment
438  );
439  }
440 
447  private function addLinkMarker( $callback ) {
448  $nextId = count( $this->links );
449  if ( strlen( (string)$nextId ) > self::MAX_ID_SIZE ) {
450  throw new \RuntimeException( 'Too many links in comment batch' );
451  }
452  $this->links[] = $callback;
453  return sprintf( "\x1b%0" . self::MAX_ID_SIZE . 'd', $nextId );
454  }
455 
465  private function addPageLink( LinkTarget $target, $text, $wikiId ) {
466  if ( $wikiId !== null && $wikiId !== false && !$target->isExternal() ) {
467  // Handle links from a foreign wiki ID
470  $wikiId,
471  $target->getNamespace() === 0
472  ? $target->getDBkey()
473  : $this->namespaceInfo->getCanonicalName( $target->getNamespace() ) .
474  ':' . $target->getDBkey(),
475  $target->getFragment()
476  ),
477  $text,
478  /* escape = */ false // Already escaped
479  );
480  } elseif ( $this->linkCache->getGoodLinkID( $target ) ||
481  Title::newFromLinkTarget( $target )->isAlwaysKnown()
482  ) {
483  // Already known
484  return $this->linkRenderer->makeKnownLink( $target, new HtmlArmor( $text ) );
485  } elseif ( $this->linkCache->isBadLink( $target ) ) {
486  // Already cached as unknown
487  return $this->linkRenderer->makeBrokenLink( $target, new HtmlArmor( $text ) );
488  }
489 
490  // Defer page link
491  if ( !$this->linkBatch ) {
492  $this->linkBatch = $this->linkBatchFactory->newLinkBatch();
493  $this->linkBatch->setCaller( __METHOD__ );
494  }
495  $this->linkBatch->addObj( $target );
496  return $this->addLinkMarker( function () use ( $target, $text ) {
497  return $this->linkRenderer->makeLink( $target, new HtmlArmor( $text ) );
498  } );
499  }
500 
508  private function addFileLink( LinkTarget $target, $html ) {
509  $this->fileBatch[] = [
510  'title' => $target
511  ];
512  return $this->addLinkMarker( function () use ( $target, $html ) {
514  $target,
515  $this->files[$target->getDBkey()] ?? false,
516  $html
517  );
518  } );
519  }
520 
524  private function flushLinkBatches() {
525  if ( $this->linkBatch ) {
526  $this->linkBatch->execute();
527  $this->linkBatch = null;
528  }
529  if ( $this->fileBatch ) {
530  $this->files += $this->repoGroup->findFiles( $this->fileBatch );
531  $this->fileBatch = [];
532  }
533  }
534 
535 }
const NS_FILE
Definition: Defines.php:70
const NS_MAIN
Definition: Defines.php:64
const NS_MEDIA
Definition: Defines.php:52
wfMessage( $key,... $params)
This is the function for getting translated interface messages.
Implements some public methods and some protected utility functions which are required by multiple ch...
Definition: File.php:70
Marks HTML that shouldn't be escaped.
Definition: HtmlArmor.php:30
Base class for language-specific code.
Definition: Language.php:61
Class representing a list of titles The execute() method checks them all for existence and adds them ...
Definition: LinkBatch.php:44
Cache for article titles (prefixed DB keys) and ids linked from one source.
Definition: LinkCache.php:45
The text processing backend for CommentFormatter.
__construct(LinkRenderer $linkRenderer, LinkBatchFactory $linkBatchFactory, LinkCache $linkCache, RepoGroup $repoGroup, Language $userLang, Language $contLang, TitleParser $titleParser, NamespaceInfo $namespaceInfo, HookContainer $hookContainer)
finalize( $comments)
Execute pending batch queries and replace markers in the specified string(s) with actual links.
preprocessUnsafe( $comment, LinkTarget $selfLinkTarget=null, $samePage=false, $wikiId=false, $enableSectionLinks=true)
Convert a comment in pseudo-HTML format to HTML, replacing links with markers.
preprocess(string $comment, LinkTarget $selfLinkTarget=null, $samePage=false, $wikiId=false, $enableSectionLinks=true)
Convert a comment to HTML, but replace links with markers which are resolved later.
This class provides an implementation of the core hook interfaces, forwarding hook calls to HookConta...
Definition: HookRunner.php:568
Class that generates HTML for internal links.
Some internal bits split of from Skin.php.
Definition: Linker.php:65
static normalizeSubpageLink( $contextTitle, $target, &$text)
Definition: Linker.php:1578
static makeExternalLink( $url, $text, $escape=true, $linktype='', $attribs=[], $title=null)
Make an external link.
Definition: Linker.php:1130
static makeMediaLinkFile(LinkTarget $title, $file, $html='')
Create a direct link to a given uploaded file.
Definition: Linker.php:1041
static splitTrail( $trail)
Split a link trail, return the "inside" portion and the remainder of the trail as a two-element array...
Definition: Linker.php:1911
HTML sanitizer for MediaWiki.
Definition: Sanitizer.php:46
static escapeHtmlAllowEntities( $html)
Given HTML input, escape with htmlspecialchars but un-escape entities.
Definition: Sanitizer.php:1127
MalformedTitleException is thrown when a TitleParser is unable to parse a title string.
This is a utility class for dealing with namespaces that encodes all the "magic" behaviors of them ba...
Represents the target of a wiki link.
Definition: TitleValue.php:44
Represents a title within MediaWiki.
Definition: Title.php:76
static castFromLinkTarget(?LinkTarget $linkTarget)
Same as newFromLinkTarget(), but if passed null, returns null.
Definition: Title.php:314
static newFromLinkTarget(LinkTarget $linkTarget, $forceClone='')
Returns a Title given a LinkTarget.
Definition: Title.php:290
Tools for dealing with other locally-hosted wikis.
Definition: WikiMap.php:31
static getForeignURL( $wikiID, $page, $fragmentId=null)
Convenience to get a url to a page on a foreign wiki.
Definition: WikiMap.php:173
PHP Parser - Processes wiki markup (which uses a more user-friendly syntax, such as "[[link]]" for ma...
Definition: Parser.php:115
static guessSectionNameFromStrippedText( $text)
Like guessSectionNameFromWikiText(), but takes already-stripped text as input.
Definition: Parser.php:6161
Prioritized list of file repositories.
Definition: RepoGroup.php:30
A collection of static methods to play with strings.
Definition: StringUtils.php:29
static escapeRegexReplacement( $string)
Escape a string to make it suitable for inclusion in a preg_replace() replacement parameter.
Represents the target of a wiki link.
Definition: LinkTarget.php:30
A title parser service for MediaWiki.
Definition: TitleParser.php:35