MediaWiki  master
CommentParser.php
Go to the documentation of this file.
1 <?php
2 
4 
5 use File;
6 use HtmlArmor;
7 use Language;
8 use LinkBatch;
9 use LinkCache;
10 use Linker;
17 use NamespaceInfo;
18 use Parser;
19 use RepoGroup;
20 use Title;
21 use TitleParser;
22 use TitleValue;
23 
34  private $linkRenderer;
38  private $repoGroup;
40  private $userLang;
42  private $contLang;
44  private $titleParser;
46  private $namespaceInfo;
48  private $hookRunner;
50  private $linkCache;
51 
53  private $links = [];
55  private $linkBatch;
56 
58  private $fileBatch;
60  private $files = [];
61 
63  private const MAX_ID_SIZE = 7;
64 
76  public function __construct(
85  HookContainer $hookContainer
86  ) {
87  $this->linkRenderer = $linkRenderer;
88  $this->linkBatchFactory = $linkBatchFactory;
89  $this->linkCache = $linkCache;
90  $this->repoGroup = $repoGroup;
91  $this->userLang = $userLang;
92  $this->contLang = $contLang;
93  $this->titleParser = $titleParser;
94  $this->namespaceInfo = $namespaceInfo;
95  $this->hookRunner = new HookRunner( $hookContainer );
96  }
97 
109  public function preprocess( string $comment, LinkTarget $selfLinkTarget = null,
110  $samePage = false, $wikiId = false, $enableSectionLinks = true
111  ) {
112  return $this->preprocessInternal( $comment, false, $selfLinkTarget,
113  $samePage, $wikiId, $enableSectionLinks );
114  }
115 
126  public function preprocessUnsafe( $comment, LinkTarget $selfLinkTarget = null,
127  $samePage = false, $wikiId = false, $enableSectionLinks = true
128  ) {
129  return $this->preprocessInternal( $comment, true, $selfLinkTarget,
130  $samePage, $wikiId, $enableSectionLinks );
131  }
132 
140  public function finalize( $comments ) {
141  $this->flushLinkBatches();
142  return preg_replace_callback(
143  '/\x1b([0-9]{' . self::MAX_ID_SIZE . '})/',
144  function ( $m ) {
145  $callback = $this->links[(int)$m[1]] ?? null;
146  if ( $callback ) {
147  return $callback();
148  } else {
149  return '<!-- MISSING -->';
150  }
151  },
152  $comments
153  );
154  }
155 
165  private function preprocessInternal( $comment, $unsafe, $selfLinkTarget, $samePage, $wikiId,
166  $enableSectionLinks
167  ) {
168  // Sanitize text a bit
169  // \x1b needs to be stripped because it is used for link markers
170  $comment = strtr( $comment, "\n\x1b", " " );
171  // Allow HTML entities (for T15815)
172  if ( !$unsafe ) {
173  $comment = \Sanitizer::escapeHtmlAllowEntities( $comment );
174  }
175  if ( $enableSectionLinks ) {
176  $comment = $this->doSectionLinks( $comment, $selfLinkTarget, $samePage, $wikiId );
177  }
178  return $this->doWikiLinks( $comment, $selfLinkTarget, $samePage, $wikiId );
179  }
180 
197  private function doSectionLinks(
198  $comment,
199  $selfLinkTarget = null,
200  $samePage = false,
201  $wikiId = false
202  ) {
203  // @todo $append here is something of a hack to preserve the status
204  // quo. Someone who knows more about bidi and such should decide
205  // (1) what sensible rendering even *is* for an LTR edit summary on an RTL
206  // wiki, both when autocomments exist and when they don't, and
207  // (2) what markup will make that actually happen.
208  $append = '';
209  $comment = preg_replace_callback(
210  // To detect the presence of content before or after the
211  // auto-comment, we use capturing groups inside optional zero-width
212  // assertions. But older versions of PCRE can't directly make
213  // zero-width assertions optional, so wrap them in a non-capturing
214  // group.
215  '!(?:(?<=(.)))?/\*\s*(.*?)\s*\*/(?:(?=(.)))?!',
216  function ( $match ) use ( &$append, $selfLinkTarget, $samePage, $wikiId ) {
217  // Ensure all match positions are defined
218  $match += [ '', '', '', '' ];
219 
220  $pre = $match[1] !== '';
221  $auto = $match[2];
222  $post = $match[3] !== '';
223  $comment = null;
224 
225  $this->hookRunner->onFormatAutocomments(
226  $comment, $pre, $auto, $post,
227  Title::castFromLinkTarget( $selfLinkTarget ),
228  $samePage,
229  $wikiId );
230  if ( $comment !== null ) {
231  return $comment;
232  }
233 
234  if ( $selfLinkTarget ) {
235  $section = $auto;
236  # Remove links that a user may have manually put in the autosummary
237  # This could be improved by copying as much of Parser::stripSectionName as desired.
238  $section = str_replace( [
239  '[[:',
240  '[[',
241  ']]'
242  ], '', $section );
243 
244  // We don't want any links in the auto text to be linked, but we still
245  // want to show any [[ ]]
246  $sectionText = str_replace( '[[', '&#91;[', $auto );
247 
248  $section = substr( Parser::guessSectionNameFromStrippedText( $section ), 1 );
249  if ( $section !== '' ) {
250  if ( $samePage ) {
251  $sectionTitle = new TitleValue( NS_MAIN, '', $section );
252  } else {
253  $sectionTitle = $selfLinkTarget->createFragmentTarget( $section );
254  }
255  $auto = $this->makeSectionLink(
256  $sectionTitle,
257  $this->userLang->getArrow() . $this->userLang->getDirMark() . $sectionText,
258  $wikiId
259  );
260  }
261  }
262  if ( $pre ) {
263  # written summary $presep autocomment (summary /* section */)
264  $pre = wfMessage( 'autocomment-prefix' )->inContentLanguage()->escaped();
265  }
266  if ( $post ) {
267  # autocomment $postsep written summary (/* section */ summary)
268  $auto .= wfMessage( 'colon-separator' )->inContentLanguage()->escaped();
269  }
270  if ( $auto ) {
271  $auto = '<span dir="auto"><span class="autocomment">' . $auto . '</span>';
272  $append .= '</span>';
273  }
274  $comment = $pre . $auto;
275  return $comment;
276  },
277  $comment
278  );
279  return $comment . $append;
280  }
281 
293  private function makeSectionLink(
294  LinkTarget $target, $text, $wikiId
295  ) {
296  if ( $wikiId !== null && $wikiId !== false && !$target->isExternal() ) {
299  $wikiId,
300  $target->getNamespace() === 0
301  ? $target->getDBkey()
302  : $this->namespaceInfo->getCanonicalName( $target->getNamespace() ) .
303  ':' . $target->getDBkey(),
304  $target->getFragment()
305  ),
306  $text,
307  /* escape = */ false // Already escaped
308  );
309  }
310  return $this->linkRenderer->makePreloadedLink( $target, new HtmlArmor( $text ), '' );
311  }
312 
331  private function doWikiLinks( $comment, $selfLinkTarget = null, $samePage = false, $wikiId = false ) {
332  return preg_replace_callback(
333  '/
334  \[\[
335  \s*+ # ignore leading whitespace, the *+ quantifier disallows backtracking
336  :? # ignore optional leading colon
337  ([^[\]|]+) # 1. link target; page names cannot include [, ] or |
338  (?:\|
339  # 2. link text
340  # Stop matching at ]] without relying on backtracking.
341  ((?:]?[^\]])*+)
342  )?
343  \]\]
344  ([^[]*) # 3. link trail (the text up until the next link)
345  /x',
346  function ( $match ) use ( $selfLinkTarget, $samePage, $wikiId ) {
347  $medians = '(?:';
348  $medians .= preg_quote(
349  $this->namespaceInfo->getCanonicalName( NS_MEDIA ), '/' );
350  $medians .= '|';
351  $medians .= preg_quote(
352  $this->contLang->getNsText( NS_MEDIA ),
353  '/'
354  ) . '):';
355 
356  $comment = $match[0];
357 
358  // Fix up urlencoded title texts (copied from Parser::replaceInternalLinks)
359  if ( strpos( $match[1], '%' ) !== false ) {
360  $match[1] = strtr(
361  rawurldecode( $match[1] ),
362  [ '<' => '&lt;', '>' => '&gt;' ]
363  );
364  }
365 
366  // Handle link renaming [[foo|text]] will show link as "text"
367  if ( $match[2] != "" ) {
368  $text = $match[2];
369  } else {
370  $text = $match[1];
371  }
372  $submatch = [];
373  $linkMarker = null;
374  if ( preg_match( '/^' . $medians . '(.*)$/i', $match[1], $submatch ) ) {
375  // Media link; trail not supported.
376  $linkRegexp = '/\[\[(.*?)\]\]/';
377  $linkTarget = $this->titleParser->makeTitleValueSafe( NS_FILE, $submatch[1] );
378  if ( $linkTarget ) {
379  $linkMarker = $this->addFileLink( $linkTarget, $text );
380  }
381  } else {
382  // Other kind of link
383  // Make sure its target is non-empty
384  if ( isset( $match[1][0] ) && $match[1][0] == ':' ) {
385  $match[1] = substr( $match[1], 1 );
386  }
387  if ( $match[1] !== false && $match[1] !== '' ) {
388  if ( preg_match(
389  $this->contLang->linkTrail(),
390  $match[3],
391  $submatch
392  ) ) {
393  $trail = $submatch[1];
394  } else {
395  $trail = "";
396  }
397  $linkRegexp = '/\[\[(.*?)\]\]' . preg_quote( $trail, '/' ) . '/';
398  list( $inside, $trail ) = Linker::splitTrail( $trail );
399 
400  $linkText = $text;
401  $linkTarget = Linker::normalizeSubpageLink( $selfLinkTarget, $match[1], $linkText );
402 
403  try {
404  $target = $this->titleParser->parseTitle( $linkTarget );
405 
406  if ( $target->getText() == '' && !$target->isExternal()
407  && !$samePage && $selfLinkTarget
408  ) {
409  $target = $selfLinkTarget->createFragmentTarget( $target->getFragment() );
410  }
411 
412  $linkMarker = $this->addPageLink( $target, $linkText . $inside, $wikiId );
413  $linkMarker .= $trail;
414  } catch ( MalformedTitleException $e ) {
415  // Fall through
416  }
417  }
418  }
419  if ( $linkMarker ) {
420  // If the link is still valid, go ahead and replace it in!
421  $comment = preg_replace(
422  $linkRegexp,
423  $linkMarker,
424  $comment,
425  1
426  );
427  }
428 
429  return $comment;
430  },
431  $comment
432  );
433  }
434 
441  private function addLinkMarker( $callback ) {
442  $nextId = count( $this->links );
443  if ( strlen( $nextId ) > self::MAX_ID_SIZE ) {
444  throw new \RuntimeException( 'Too many links in comment batch' );
445  }
446  $this->links[] = $callback;
447  return sprintf( "\x1b%0" . self::MAX_ID_SIZE . 'd', $nextId );
448  }
449 
459  private function addPageLink( LinkTarget $target, $text, $wikiId ) {
460  // Handle external links (not including interwiki links)
461  if ( $wikiId !== null && $wikiId !== false && !$target->isExternal() ) {
464  $wikiId,
465  $target->getNamespace() === 0
466  ? $target->getDBkey()
467  : $this->namespaceInfo->getCanonicalName( $target->getNamespace() ) .
468  ':' . $target->getDBkey(),
469  $target->getFragment()
470  ),
471  $text,
472  /* escape = */ false // Already escaped
473  );
474  }
475 
476  if ( $this->linkCache->getGoodLinkID( $target ) ) {
477  // Already known
478  return $this->linkRenderer->makeKnownLink( $target, new HtmlArmor( $text ) );
479  } elseif ( $this->linkCache->isBadLink( $target ) ) {
480  // Already cached as unknown
481  return $this->linkRenderer->makeBrokenLink( $target, new HtmlArmor( $text ) );
482  }
483 
484  // Defer page link
485  if ( !$this->linkBatch ) {
486  $this->linkBatch = $this->linkBatchFactory->newLinkBatch();
487  $this->linkBatch->setCaller( __METHOD__ );
488  }
489  $this->linkBatch->addObj( $target );
490  return $this->addLinkMarker( function () use ( $target, $text ) {
491  return $this->linkRenderer->makeLink( $target, new HtmlArmor( $text ) );
492  } );
493  }
494 
502  private function addFileLink( LinkTarget $target, $html ) {
503  $this->fileBatch[] = [
504  'title' => $target
505  ];
506  return $this->addLinkMarker( function () use ( $target, $html ) {
508  $target,
509  $this->files[$target->getDBkey()] ?? false,
510  $html
511  );
512  } );
513  }
514 
518  private function flushLinkBatches() {
519  if ( $this->linkBatch ) {
520  $this->linkBatch->execute();
521  $this->linkBatch = null;
522  }
523  if ( $this->fileBatch ) {
524  $this->files += $this->repoGroup->findFiles( $this->fileBatch );
525  $this->fileBatch = [];
526  }
527  }
528 
529 }
MediaWiki\CommentFormatter\CommentParser\$fileBatch
array $fileBatch
Input to RepoGroup::findFiles()
Definition: CommentParser.php:58
MediaWiki\CommentFormatter\CommentParser\$links
callable[] $links
Definition: CommentParser.php:53
MediaWiki\CommentFormatter\CommentParser\addLinkMarker
addLinkMarker( $callback)
Add a deferred link to the list and return its marker.
Definition: CommentParser.php:441
LinkCache
Cache for article titles (prefixed DB keys) and ids linked from one source.
Definition: LinkCache.php:41
HtmlArmor
Marks HTML that shouldn't be escaped.
Definition: HtmlArmor.php:30
Linker
Some internal bits split of from Skin.php.
Definition: Linker.php:39
LinkBatch
Class representing a list of titles The execute() method checks them all for existence and adds them ...
Definition: LinkBatch.php:42
MediaWiki\CommentFormatter\CommentParser\flushLinkBatches
flushLinkBatches()
Execute any pending link batch or file batch.
Definition: CommentParser.php:518
MediaWiki\CommentFormatter\CommentParser\doSectionLinks
doSectionLinks( $comment, $selfLinkTarget=null, $samePage=false, $wikiId=false)
Converts C-style comments in edit summaries into section links.
Definition: CommentParser.php:197
MediaWiki\CommentFormatter\CommentParser\addPageLink
addPageLink(LinkTarget $target, $text, $wikiId)
Link to a LinkTarget.
Definition: CommentParser.php:459
MediaWiki\Linker\LinkRenderer
Class that generates HTML anchor link elements for pages.
Definition: LinkRenderer.php:43
MediaWiki\CommentFormatter\CommentParser\$contLang
Language $contLang
Definition: CommentParser.php:42
WikiMap\getForeignURL
static getForeignURL( $wikiID, $page, $fragmentId=null)
Convenience to get a url to a page on a foreign wiki.
Definition: WikiMap.php:171
MediaWiki\CommentFormatter\CommentParser\makeSectionLink
makeSectionLink(LinkTarget $target, $text, $wikiId)
Make a section link.
Definition: CommentParser.php:293
MediaWiki\CommentFormatter\CommentParser\$titleParser
TitleParser $titleParser
Definition: CommentParser.php:44
MediaWiki\CommentFormatter\CommentParser\finalize
finalize( $comments)
Execute pending batch queries and replace markers in the specified string(s) with actual links.
Definition: CommentParser.php:140
MediaWiki\CommentFormatter\CommentParser
The text processing backend for CommentFormatter.
Definition: CommentParser.php:32
wfMessage
wfMessage( $key,... $params)
This is the function for getting translated interface messages.
Definition: GlobalFunctions.php:1167
MediaWiki\CommentFormatter\CommentParser\$repoGroup
RepoGroup $repoGroup
Definition: CommentParser.php:38
MediaWiki\Linker\LinkTarget\isExternal
isExternal()
Whether this LinkTarget has an interwiki component.
NS_MAIN
const NS_MAIN
Definition: Defines.php:64
MediaWiki\CommentFormatter\CommentParser\$linkRenderer
LinkRenderer $linkRenderer
Definition: CommentParser.php:34
MediaWiki\CommentFormatter\CommentParser\preprocessInternal
preprocessInternal( $comment, $unsafe, $selfLinkTarget, $samePage, $wikiId, $enableSectionLinks)
Definition: CommentParser.php:165
MediaWiki\Linker\LinkTarget\getNamespace
getNamespace()
Get the namespace index.
File
Implements some public methods and some protected utility functions which are required by multiple ch...
Definition: File.php:67
MediaWiki\CommentFormatter\CommentParser\$linkCache
LinkCache $linkCache
Definition: CommentParser.php:50
MediaWiki\Cache\LinkBatchFactory
Definition: LinkBatchFactory.php:39
Linker\makeExternalLink
static makeExternalLink( $url, $text, $escape=true, $linktype='', $attribs=[], $title=null)
Make an external link.
Definition: Linker.php:1027
TitleParser
A title parser service for MediaWiki.
Definition: TitleParser.php:33
MediaWiki\CommentFormatter\CommentParser\$linkBatchFactory
LinkBatchFactory $linkBatchFactory
Definition: CommentParser.php:36
MediaWiki\CommentFormatter\CommentParser\doWikiLinks
doWikiLinks( $comment, $selfLinkTarget=null, $samePage=false, $wikiId=false)
Formats wiki links and media links in text; all other wiki formatting is ignored.
Definition: CommentParser.php:331
MediaWiki\CommentFormatter\CommentParser\$linkBatch
LinkBatch null $linkBatch
Definition: CommentParser.php:55
Linker\splitTrail
static splitTrail( $trail)
Split a link trail, return the "inside" portion and the remainder of the trail as a two-element array...
Definition: Linker.php:1760
MediaWiki\CommentFormatter\CommentParser\preprocessUnsafe
preprocessUnsafe( $comment, LinkTarget $selfLinkTarget=null, $samePage=false, $wikiId=false, $enableSectionLinks=true)
Convert a comment in pseudo-HTML format to HTML, replacing links with markers.
Definition: CommentParser.php:126
NS_MEDIA
const NS_MEDIA
Definition: Defines.php:52
MediaWiki\CommentFormatter\CommentParser\__construct
__construct(LinkRenderer $linkRenderer, LinkBatchFactory $linkBatchFactory, LinkCache $linkCache, RepoGroup $repoGroup, Language $userLang, Language $contLang, TitleParser $titleParser, NamespaceInfo $namespaceInfo, HookContainer $hookContainer)
Definition: CommentParser.php:76
MediaWiki\CommentFormatter\CommentParser\$userLang
Language $userLang
Definition: CommentParser.php:40
MediaWiki\Linker\LinkTarget\getDBkey
getDBkey()
Get the main part of the link target, in canonical database form.
MediaWiki\Linker\LinkTarget\getFragment
getFragment()
Get the link fragment in text form (i.e.
Parser
PHP Parser - Processes wiki markup (which uses a more user-friendly syntax, such as "[[link]]" for ma...
Definition: Parser.php:91
Linker\makeMediaLinkFile
static makeMediaLinkFile(LinkTarget $title, $file, $html='')
Create a direct link to a given uploaded file.
Definition: Linker.php:959
MediaWiki\CommentFormatter\CommentParser\addFileLink
addFileLink(LinkTarget $target, $html)
Link to a file, returning a marker.
Definition: CommentParser.php:502
MediaWiki\CommentFormatter\CommentParser\$files
File[] $files
Resolved File objects indexed by DB key.
Definition: CommentParser.php:60
Title
Represents a title within MediaWiki.
Definition: Title.php:47
MalformedTitleException
MalformedTitleException is thrown when a TitleParser is unable to parse a title string.
Definition: MalformedTitleException.php:26
MediaWiki\CommentFormatter
Definition: CommentBatch.php:3
RepoGroup
Prioritized list of file repositories.
Definition: RepoGroup.php:32
Linker\normalizeSubpageLink
static normalizeSubpageLink( $contextTitle, $target, &$text)
Definition: Linker.php:1452
MediaWiki\HookContainer\HookContainer
HookContainer class.
Definition: HookContainer.php:45
Parser\guessSectionNameFromStrippedText
static guessSectionNameFromStrippedText( $text)
Like guessSectionNameFromWikiText(), but takes already-stripped text as input.
Definition: Parser.php:6112
NamespaceInfo
This is a utility class for dealing with namespaces that encodes all the "magic" behaviors of them ba...
Definition: NamespaceInfo.php:35
MediaWiki\HookContainer\HookRunner
This class provides an implementation of the core hook interfaces, forwarding hook calls to HookConta...
Definition: HookRunner.php:557
Title\castFromLinkTarget
static castFromLinkTarget( $linkTarget)
Same as newFromLinkTarget, but if passed null, returns null.
Definition: Title.php:313
NS_FILE
const NS_FILE
Definition: Defines.php:70
MediaWiki\Linker\LinkTarget
Definition: LinkTarget.php:26
MediaWiki\CommentFormatter\CommentParser\$hookRunner
HookRunner $hookRunner
Definition: CommentParser.php:48
Language
Internationalisation code See https://www.mediawiki.org/wiki/Special:MyLanguage/Localisation for more...
Definition: Language.php:42
Sanitizer\escapeHtmlAllowEntities
static escapeHtmlAllowEntities( $html)
Given HTML input, escape with htmlspecialchars but un-escape entities.
Definition: Sanitizer.php:987
MediaWiki\CommentFormatter\CommentParser\$namespaceInfo
NamespaceInfo $namespaceInfo
Definition: CommentParser.php:46
MediaWiki\CommentFormatter\CommentParser\preprocess
preprocess(string $comment, LinkTarget $selfLinkTarget=null, $samePage=false, $wikiId=false, $enableSectionLinks=true)
Convert a comment to HTML, but replace links with markers which are resolved later.
Definition: CommentParser.php:109
TitleValue
Represents a page (or page fragment) title within MediaWiki.
Definition: TitleValue.php:40