MediaWiki  master
CommentParser.php
Go to the documentation of this file.
1 <?php
2 
4 
5 use File;
6 use HtmlArmor;
7 use Language;
8 use LinkBatch;
9 use LinkCache;
10 use Linker;
17 use NamespaceInfo;
18 use Parser;
19 use RepoGroup;
20 use Title;
21 use TitleParser;
22 use TitleValue;
23 
34  private $linkRenderer;
38  private $repoGroup;
40  private $userLang;
42  private $contLang;
44  private $titleParser;
46  private $namespaceInfo;
48  private $hookRunner;
50  private $linkCache;
51 
53  private $links = [];
55  private $linkBatch;
56 
58  private $fileBatch;
60  private $files = [];
61 
63  private const MAX_ID_SIZE = 7;
64 
76  public function __construct(
85  HookContainer $hookContainer
86  ) {
87  $this->linkRenderer = $linkRenderer;
88  $this->linkBatchFactory = $linkBatchFactory;
89  $this->linkCache = $linkCache;
90  $this->repoGroup = $repoGroup;
91  $this->userLang = $userLang;
92  $this->contLang = $contLang;
93  $this->titleParser = $titleParser;
94  $this->namespaceInfo = $namespaceInfo;
95  $this->hookRunner = new HookRunner( $hookContainer );
96  }
97 
109  public function preprocess( string $comment, LinkTarget $selfLinkTarget = null,
110  $samePage = false, $wikiId = false, $enableSectionLinks = true
111  ) {
112  return $this->preprocessInternal( $comment, false, $selfLinkTarget,
113  $samePage, $wikiId, $enableSectionLinks );
114  }
115 
126  public function preprocessUnsafe( $comment, LinkTarget $selfLinkTarget = null,
127  $samePage = false, $wikiId = false, $enableSectionLinks = true
128  ) {
129  return $this->preprocessInternal( $comment, true, $selfLinkTarget,
130  $samePage, $wikiId, $enableSectionLinks );
131  }
132 
140  public function finalize( $comments ) {
141  $this->flushLinkBatches();
142  return preg_replace_callback(
143  '/\x1b([0-9]{' . self::MAX_ID_SIZE . '})/',
144  function ( $m ) {
145  $callback = $this->links[(int)$m[1]] ?? null;
146  if ( $callback ) {
147  return $callback();
148  } else {
149  return '<!-- MISSING -->';
150  }
151  },
152  $comments
153  );
154  }
155 
165  private function preprocessInternal( $comment, $unsafe, $selfLinkTarget, $samePage, $wikiId,
166  $enableSectionLinks
167  ) {
168  // Sanitize text a bit
169  // \x1b needs to be stripped because it is used for link markers
170  $comment = strtr( $comment, "\n\x1b", " " );
171  // Allow HTML entities (for T15815)
172  if ( !$unsafe ) {
173  $comment = \Sanitizer::escapeHtmlAllowEntities( $comment );
174  }
175  if ( $enableSectionLinks ) {
176  $comment = $this->doSectionLinks( $comment, $selfLinkTarget, $samePage, $wikiId );
177  }
178  return $this->doWikiLinks( $comment, $selfLinkTarget, $samePage, $wikiId );
179  }
180 
197  private function doSectionLinks(
198  $comment,
199  $selfLinkTarget = null,
200  $samePage = false,
201  $wikiId = false
202  ) {
203  // @todo $append here is something of a hack to preserve the status
204  // quo. Someone who knows more about bidi and such should decide
205  // (1) what sensible rendering even *is* for an LTR edit summary on an RTL
206  // wiki, both when autocomments exist and when they don't, and
207  // (2) what markup will make that actually happen.
208  $append = '';
209  $comment = preg_replace_callback(
210  // To detect the presence of content before or after the
211  // auto-comment, we use capturing groups inside optional zero-width
212  // assertions. But older versions of PCRE can't directly make
213  // zero-width assertions optional, so wrap them in a non-capturing
214  // group.
215  '!(?:(?<=(.)))?/\*\s*(.*?)\s*\*/(?:(?=(.)))?!',
216  function ( $match ) use ( &$append, $selfLinkTarget, $samePage, $wikiId ) {
217  // Ensure all match positions are defined
218  $match += [ '', '', '', '' ];
219 
220  $pre = $match[1] !== '';
221  $auto = $match[2];
222  $post = $match[3] !== '';
223  $comment = null;
224 
225  $this->hookRunner->onFormatAutocomments(
226  $comment, $pre, $auto, $post,
227  Title::castFromLinkTarget( $selfLinkTarget ),
228  $samePage,
229  $wikiId );
230  if ( $comment !== null ) {
231  return $comment;
232  }
233 
234  if ( $selfLinkTarget ) {
235  $section = $auto;
236  # Remove links that a user may have manually put in the autosummary
237  # This could be improved by copying as much of Parser::stripSectionName as desired.
238  $section = str_replace( [
239  '[[:',
240  '[[',
241  ']]'
242  ], '', $section );
243 
244  // We don't want any links in the auto text to be linked, but we still
245  // want to show any [[ ]]
246  $sectionText = str_replace( '[[', '&#91;[', $auto );
247 
248  $section = substr( Parser::guessSectionNameFromStrippedText( $section ), 1 );
249  if ( $section !== '' ) {
250  if ( $samePage ) {
251  $sectionTitle = new TitleValue( NS_MAIN, '', $section );
252  } else {
253  $sectionTitle = $selfLinkTarget->createFragmentTarget( $section );
254  }
255  $auto = $this->makeSectionLink(
256  $sectionTitle,
257  $this->userLang->getArrow() . $this->userLang->getDirMark() . $sectionText,
258  $wikiId
259  );
260  }
261  }
262  if ( $pre ) {
263  # written summary $presep autocomment (summary /* section */)
264  $pre = wfMessage( 'autocomment-prefix' )->inContentLanguage()->escaped();
265  }
266  if ( $post ) {
267  # autocomment $postsep written summary (/* section */ summary)
268  $auto .= wfMessage( 'colon-separator' )->inContentLanguage()->escaped();
269  }
270  if ( $auto ) {
271  $auto = '<span dir="auto"><span class="autocomment">' . $auto . '</span>';
272  $append .= '</span>';
273  }
274  $comment = $pre . $auto;
275  return $comment;
276  },
277  $comment
278  );
279  return $comment . $append;
280  }
281 
293  private function makeSectionLink(
294  LinkTarget $target, $text, $wikiId
295  ) {
296  if ( $wikiId !== null && $wikiId !== false && !$target->isExternal() ) {
299  $wikiId,
300  $target->getNamespace() === 0
301  ? $target->getDBkey()
302  : $this->namespaceInfo->getCanonicalName( $target->getNamespace() ) .
303  ':' . $target->getDBkey(),
304  $target->getFragment()
305  ),
306  $text,
307  /* escape = */ false // Already escaped
308  );
309  }
310  return $this->linkRenderer->makePreloadedLink( $target, new HtmlArmor( $text ), '' );
311  }
312 
331  private function doWikiLinks( $comment, $selfLinkTarget = null, $samePage = false, $wikiId = false ) {
332  return preg_replace_callback(
333  '/
334  \[\[
335  \s*+ # ignore leading whitespace, the *+ quantifier disallows backtracking
336  :? # ignore optional leading colon
337  ([^[\]|]+) # 1. link target; page names cannot include [, ] or |
338  (?:\|
339  # 2. link text
340  # Stop matching at ]] without relying on backtracking.
341  ((?:]?[^\]])*+)
342  )?
343  \]\]
344  ([^[]*) # 3. link trail (the text up until the next link)
345  /x',
346  function ( $match ) use ( $selfLinkTarget, $samePage, $wikiId ) {
347  $medians = '(?:';
348  $medians .= preg_quote(
349  $this->namespaceInfo->getCanonicalName( NS_MEDIA ), '/' );
350  $medians .= '|';
351  $medians .= preg_quote(
352  $this->contLang->getNsText( NS_MEDIA ),
353  '/'
354  ) . '):';
355 
356  $comment = $match[0];
357 
358  // Fix up urlencoded title texts (copied from Parser::replaceInternalLinks)
359  if ( strpos( $match[1], '%' ) !== false ) {
360  $match[1] = strtr(
361  rawurldecode( $match[1] ),
362  [ '<' => '&lt;', '>' => '&gt;' ]
363  );
364  }
365 
366  // Handle link renaming [[foo|text]] will show link as "text"
367  if ( $match[2] != "" ) {
368  $text = $match[2];
369  } else {
370  $text = $match[1];
371  }
372  $submatch = [];
373  $linkMarker = null;
374  if ( preg_match( '/^' . $medians . '(.*)$/i', $match[1], $submatch ) ) {
375  // Media link; trail not supported.
376  $linkRegexp = '/\[\[(.*?)\]\]/';
377  $linkTarget = $this->titleParser->makeTitleValueSafe( NS_FILE, $submatch[1] );
378  if ( $linkTarget ) {
379  $linkMarker = $this->addFileLink( $linkTarget, $text );
380  }
381  } else {
382  // Other kind of link
383  // Make sure its target is non-empty
384  if ( isset( $match[1][0] ) && $match[1][0] == ':' ) {
385  $match[1] = substr( $match[1], 1 );
386  }
387  // @phan-suppress-next-line PhanTypePossiblyInvalidDimOffset False positive
388  if ( $match[1] !== false && $match[1] !== null && $match[1] !== '' ) {
389  if ( preg_match(
390  $this->contLang->linkTrail(),
391  $match[3],
392  $submatch
393  ) ) {
394  $trail = $submatch[1];
395  } else {
396  $trail = "";
397  }
398  $linkRegexp = '/\[\[(.*?)\]\]' . preg_quote( $trail, '/' ) . '/';
399  list( $inside, $trail ) = Linker::splitTrail( $trail );
400 
401  $linkText = $text;
402  $linkTarget = Linker::normalizeSubpageLink( $selfLinkTarget, $match[1], $linkText );
403 
404  try {
405  $target = $this->titleParser->parseTitle( $linkTarget );
406 
407  if ( $target->getText() == '' && !$target->isExternal()
408  && !$samePage && $selfLinkTarget
409  ) {
410  $target = $selfLinkTarget->createFragmentTarget( $target->getFragment() );
411  }
412 
413  $linkMarker = $this->addPageLink( $target, $linkText . $inside, $wikiId );
414  $linkMarker .= $trail;
415  } catch ( MalformedTitleException $e ) {
416  // Fall through
417  }
418  }
419  }
420  if ( $linkMarker ) {
421  // If the link is still valid, go ahead and replace it in!
422  $comment = preg_replace(
423  // @phan-suppress-next-next-line PhanPossiblyUndeclaredVariable linkRegexp set when used
424  // @phan-suppress-next-line PhanTypeMismatchArgumentNullableInternal linkRegexp set when used
425  $linkRegexp,
426  $linkMarker,
427  $comment,
428  1
429  );
430  }
431 
432  return $comment;
433  },
434  $comment
435  );
436  }
437 
444  private function addLinkMarker( $callback ) {
445  $nextId = count( $this->links );
446  if ( strlen( (string)$nextId ) > self::MAX_ID_SIZE ) {
447  throw new \RuntimeException( 'Too many links in comment batch' );
448  }
449  $this->links[] = $callback;
450  return sprintf( "\x1b%0" . self::MAX_ID_SIZE . 'd', $nextId );
451  }
452 
462  private function addPageLink( LinkTarget $target, $text, $wikiId ) {
463  if ( $wikiId !== null && $wikiId !== false && !$target->isExternal() ) {
464  // Handle links from a foreign wiki ID
467  $wikiId,
468  $target->getNamespace() === 0
469  ? $target->getDBkey()
470  : $this->namespaceInfo->getCanonicalName( $target->getNamespace() ) .
471  ':' . $target->getDBkey(),
472  $target->getFragment()
473  ),
474  $text,
475  /* escape = */ false // Already escaped
476  );
477  } elseif ( $this->linkCache->getGoodLinkID( $target ) ||
478  Title::newFromLinkTarget( $target )->isAlwaysKnown()
479  ) {
480  // Already known
481  return $this->linkRenderer->makeKnownLink( $target, new HtmlArmor( $text ) );
482  } elseif ( $this->linkCache->isBadLink( $target ) ) {
483  // Already cached as unknown
484  return $this->linkRenderer->makeBrokenLink( $target, new HtmlArmor( $text ) );
485  }
486 
487  // Defer page link
488  if ( !$this->linkBatch ) {
489  $this->linkBatch = $this->linkBatchFactory->newLinkBatch();
490  $this->linkBatch->setCaller( __METHOD__ );
491  }
492  $this->linkBatch->addObj( $target );
493  return $this->addLinkMarker( function () use ( $target, $text ) {
494  return $this->linkRenderer->makeLink( $target, new HtmlArmor( $text ) );
495  } );
496  }
497 
505  private function addFileLink( LinkTarget $target, $html ) {
506  $this->fileBatch[] = [
507  'title' => $target
508  ];
509  return $this->addLinkMarker( function () use ( $target, $html ) {
511  $target,
512  $this->files[$target->getDBkey()] ?? false,
513  $html
514  );
515  } );
516  }
517 
521  private function flushLinkBatches() {
522  if ( $this->linkBatch ) {
523  $this->linkBatch->execute();
524  $this->linkBatch = null;
525  }
526  if ( $this->fileBatch ) {
527  $this->files += $this->repoGroup->findFiles( $this->fileBatch );
528  $this->fileBatch = [];
529  }
530  }
531 
532 }
const NS_FILE
Definition: Defines.php:70
const NS_MAIN
Definition: Defines.php:64
const NS_MEDIA
Definition: Defines.php:52
wfMessage( $key,... $params)
This is the function for getting translated interface messages.
Implements some public methods and some protected utility functions which are required by multiple ch...
Definition: File.php:68
Marks HTML that shouldn't be escaped.
Definition: HtmlArmor.php:30
Internationalisation code See https://www.mediawiki.org/wiki/Special:MyLanguage/Localisation for more...
Definition: Language.php:45
Class representing a list of titles The execute() method checks them all for existence and adds them ...
Definition: LinkBatch.php:44
Cache for article titles (prefixed DB keys) and ids linked from one source.
Definition: LinkCache.php:42
Some internal bits split of from Skin.php.
Definition: Linker.php:40
static makeMediaLinkFile(LinkTarget $title, $file, $html='')
Create a direct link to a given uploaded file.
Definition: Linker.php:948
static normalizeSubpageLink( $contextTitle, $target, &$text)
Definition: Linker.php:1441
static splitTrail( $trail)
Split a link trail, return the "inside" portion and the remainder of the trail as a two-element array...
Definition: Linker.php:1749
static makeExternalLink( $url, $text, $escape=true, $linktype='', $attribs=[], $title=null)
Make an external link.
Definition: Linker.php:1016
MalformedTitleException is thrown when a TitleParser is unable to parse a title string.
The text processing backend for CommentFormatter.
preprocessInternal( $comment, $unsafe, $selfLinkTarget, $samePage, $wikiId, $enableSectionLinks)
doWikiLinks( $comment, $selfLinkTarget=null, $samePage=false, $wikiId=false)
Formats wiki links and media links in text; all other wiki formatting is ignored.
array $fileBatch
Input to RepoGroup::findFiles()
__construct(LinkRenderer $linkRenderer, LinkBatchFactory $linkBatchFactory, LinkCache $linkCache, RepoGroup $repoGroup, Language $userLang, Language $contLang, TitleParser $titleParser, NamespaceInfo $namespaceInfo, HookContainer $hookContainer)
flushLinkBatches()
Execute any pending link batch or file batch.
addPageLink(LinkTarget $target, $text, $wikiId)
Link to a LinkTarget.
finalize( $comments)
Execute pending batch queries and replace markers in the specified string(s) with actual links.
doSectionLinks( $comment, $selfLinkTarget=null, $samePage=false, $wikiId=false)
Converts C-style comments in edit summaries into section links.
addFileLink(LinkTarget $target, $html)
Link to a file, returning a marker.
addLinkMarker( $callback)
Add a deferred link to the list and return its marker.
makeSectionLink(LinkTarget $target, $text, $wikiId)
Make a section link.
preprocessUnsafe( $comment, LinkTarget $selfLinkTarget=null, $samePage=false, $wikiId=false, $enableSectionLinks=true)
Convert a comment in pseudo-HTML format to HTML, replacing links with markers.
File[] $files
Resolved File objects indexed by DB key.
preprocess(string $comment, LinkTarget $selfLinkTarget=null, $samePage=false, $wikiId=false, $enableSectionLinks=true)
Convert a comment to HTML, but replace links with markers which are resolved later.
This class provides an implementation of the core hook interfaces, forwarding hook calls to HookConta...
Definition: HookRunner.php:562
Class that generates HTML anchor link elements for pages.
This is a utility class for dealing with namespaces that encodes all the "magic" behaviors of them ba...
PHP Parser - Processes wiki markup (which uses a more user-friendly syntax, such as "[[link]]" for ma...
Definition: Parser.php:95
static guessSectionNameFromStrippedText( $text)
Like guessSectionNameFromWikiText(), but takes already-stripped text as input.
Definition: Parser.php:6239
Prioritized list of file repositories.
Definition: RepoGroup.php:29
static escapeHtmlAllowEntities( $html)
Given HTML input, escape with htmlspecialchars but un-escape entities.
Definition: Sanitizer.php:1122
Represents a page (or page fragment) title within MediaWiki.
Definition: TitleValue.php:40
Represents a title within MediaWiki.
Definition: Title.php:49
static newFromLinkTarget(LinkTarget $linkTarget, $forceClone='')
Returns a Title given a LinkTarget.
Definition: Title.php:282
static castFromLinkTarget( $linkTarget)
Same as newFromLinkTarget, but if passed null, returns null.
Definition: Title.php:306
static getForeignURL( $wikiID, $page, $fragmentId=null)
Convenience to get a url to a page on a foreign wiki.
Definition: WikiMap.php:171
getFragment()
Get the link fragment in text form (i.e.
getNamespace()
Get the namespace index.
getDBkey()
Get the main part of the link target, in canonical database form.
isExternal()
Whether this LinkTarget has an interwiki component.
A title parser service for MediaWiki.
Definition: TitleParser.php:33