MediaWiki  master
Parser.php
Go to the documentation of this file.
1 <?php
33 
74 class Parser {
80  const VERSION = '1.6.4';
81 
82  # Flags for Parser::setFunctionHook
83  const SFH_NO_HASH = 1;
84  const SFH_OBJECT_ARGS = 2;
85 
86  # Constants needed for external link processing
87  # Everything except bracket, space, or control characters
88  # \p{Zs} is unicode 'separator, space' category. It covers the space 0x20
89  # as well as U+3000 is IDEOGRAPHIC SPACE for T21052
90  # \x{FFFD} is the Unicode replacement character, which Preprocessor_DOM
91  # uses to replace invalid HTML characters.
92  const EXT_LINK_URL_CLASS = '[^][<>"\\x00-\\x20\\x7F\p{Zs}\x{FFFD}]';
93  # Simplified expression to match an IPv4 or IPv6 address, or
94  # at least one character of a host name (embeds EXT_LINK_URL_CLASS)
95  const EXT_LINK_ADDR = '(?:[0-9.]+|\\[(?i:[0-9a-f:.]+)\\]|[^][<>"\\x00-\\x20\\x7F\p{Zs}\x{FFFD}])';
96  # RegExp to make image URLs (embeds IPv6 part of EXT_LINK_ADDR)
97  // phpcs:ignore Generic.Files.LineLength
98  const EXT_IMAGE_REGEX = '/^(http:\/\/|https:\/\/)((?:\\[(?i:[0-9a-f:.]+)\\])?[^][<>"\\x00-\\x20\\x7F\p{Zs}\x{FFFD}]+)
99  \\/([A-Za-z0-9_.,~%\\-+&;#*?!=()@\\x80-\\xFF]+)\\.((?i)gif|png|jpg|jpeg)$/Sxu';
100 
101  # Regular expression for a non-newline space
102  const SPACE_NOT_NL = '(?:\t|&nbsp;|&\#0*160;|&\#[Xx]0*[Aa]0;|\p{Zs})';
103 
104  # Flags for preprocessToDom
105  const PTD_FOR_INCLUSION = 1;
106 
107  # Allowed values for $this->mOutputType
108  # Parameter to startExternalParse().
109  const OT_HTML = 1; # like parse()
110  const OT_WIKI = 2; # like preSaveTransform()
111  const OT_PREPROCESS = 3; # like preprocess()
112  const OT_MSG = 3;
113  const OT_PLAIN = 4; # like extractSections() - portions of the original are returned unchanged.
114 
132  const MARKER_SUFFIX = "-QINU`\"'\x7f";
133  const MARKER_PREFIX = "\x7f'\"`UNIQ-";
134 
135  # Markers used for wrapping the table of contents
136  const TOC_START = '<mw:toc>';
137  const TOC_END = '</mw:toc>';
138 
140  const MAX_TTS = 900;
141 
142  # Persistent:
143  public $mTagHooks = [];
145  public $mFunctionHooks = [];
146  public $mFunctionSynonyms = [ 0 => [], 1 => [] ];
147  public $mFunctionTagHooks = [];
148  public $mStripList = [];
149  public $mDefaultStripList = [];
150  public $mVarCache = [];
151  public $mImageParams = [];
153  public $mMarkerIndex = 0;
157  public $mFirstCall = true;
158 
159  # Initialised by initializeVariables()
160 
164  public $mVariables;
165 
169  public $mSubstWords;
170 
175  public $mConf;
176 
177  # Initialised in constructor
179 
180  # Initialized in getPreprocessor()
181 
183 
184  # Cleared with clearState():
185 
188  public $mOutput;
189  public $mAutonumber;
190 
194  public $mStripState;
195 
201 
202  public $mLinkID;
206  public $mExpensiveFunctionCount; # number of expensive parser function calls
210 
214  public $mUser; # User object; only used when doing pre-save transform
215 
216  # Temporary
217  # These are variables reset at least once per parse regardless of $clearState
218 
222  public $mOptions;
223 
231  public $mTitle; # Title context, used for self-link rendering and similar things
232  public $mOutputType; # Output type, one of the OT_xxx constants
233  public $ot; # Shortcut alias, see setOutputType()
234  public $mRevisionObject; # The revision object of the specified revision ID
235  public $mRevisionId; # ID to display in {{REVISIONID}} tags
236  public $mRevisionTimestamp; # The timestamp of the specified revision ID
237  public $mRevisionUser; # User to display in {{REVISIONUSER}} tag
238  public $mRevisionSize; # Size to display in {{REVISIONSIZE}} variable
239  public $mRevIdForTs; # The revision ID which was used to fetch the timestamp
240  public $mInputSize = false; # For {{PAGESIZE}} on current page.
241 
248 
256 
261  public $mInParse = false;
262 
264  protected $mProfiler;
265 
269  protected $mLinkRenderer;
270 
273 
275  private $contLang;
276 
278  private $factory;
279 
282 
290  private $svcOptions;
291 
294 
296  private $nsInfo;
297 
299  private $logger;
300 
302  private $badFileLookup;
303 
308  public const CONSTRUCTOR_OPTIONS = [
309  // See $wgParserConf documentation
310  'class',
311  'preprocessorClass',
312  // See documentation for the corresponding config options
313  'ArticlePath',
314  'EnableScaryTranscluding',
315  'ExtraInterlanguageLinkPrefixes',
316  'FragmentMode',
317  'LanguageCode',
318  'MaxSigChars',
319  'MaxTocLevel',
320  'MiserMode',
321  'ScriptPath',
322  'Server',
323  'ServerName',
324  'ShowHostnames',
325  'Sitename',
326  'StylePath',
327  'TranscludeCacheExpiry',
328  ];
329 
344  public function __construct(
345  $svcOptions = null,
347  Language $contLang = null,
348  ParserFactory $factory = null,
349  $urlProtocols = null,
350  SpecialPageFactory $spFactory = null,
351  $linkRendererFactory = null,
352  $nsInfo = null,
353  $logger = null,
355  ) {
356  if ( !$svcOptions || is_array( $svcOptions ) ) {
357  // Pre-1.34 calling convention is the first parameter is just ParserConf, the seventh is
358  // Config, and the eighth is LinkRendererFactory.
359  $this->mConf = (array)$svcOptions;
360  if ( empty( $this->mConf['class'] ) ) {
361  $this->mConf['class'] = self::class;
362  }
363  if ( empty( $this->mConf['preprocessorClass'] ) ) {
364  $this->mConf['preprocessorClass'] = self::getDefaultPreprocessorClass();
365  }
366  $this->svcOptions = new ServiceOptions( self::CONSTRUCTOR_OPTIONS,
367  $this->mConf, func_num_args() > 6
368  ? func_get_arg( 6 ) : MediaWikiServices::getInstance()->getMainConfig()
369  );
370  $linkRendererFactory = func_num_args() > 7 ? func_get_arg( 7 ) : null;
371  $nsInfo = func_num_args() > 8 ? func_get_arg( 8 ) : null;
372  } else {
373  // New calling convention
374  $svcOptions->assertRequiredOptions( self::CONSTRUCTOR_OPTIONS );
375  // $this->mConf is public, so we'll keep those two options there as well for
376  // compatibility until it's removed
377  $this->mConf = [
378  'class' => $svcOptions->get( 'class' ),
379  'preprocessorClass' => $svcOptions->get( 'preprocessorClass' ),
380  ];
381  $this->svcOptions = $svcOptions;
382  }
383 
384  $this->mUrlProtocols = $urlProtocols ?? wfUrlProtocols();
385  $this->mExtLinkBracketedRegex = '/\[(((?i)' . $this->mUrlProtocols . ')' .
386  self::EXT_LINK_ADDR .
387  self::EXT_LINK_URL_CLASS . '*)\p{Zs}*([^\]\\x00-\\x08\\x0a-\\x1F\\x{FFFD}]*?)\]/Su';
388 
389  $this->magicWordFactory = $magicWordFactory ??
390  MediaWikiServices::getInstance()->getMagicWordFactory();
391 
392  $this->contLang = $contLang ?? MediaWikiServices::getInstance()->getContentLanguage();
393 
394  $this->factory = $factory ?? MediaWikiServices::getInstance()->getParserFactory();
395  $this->specialPageFactory = $spFactory ??
396  MediaWikiServices::getInstance()->getSpecialPageFactory();
397  $this->linkRendererFactory = $linkRendererFactory ??
398  MediaWikiServices::getInstance()->getLinkRendererFactory();
399  $this->nsInfo = $nsInfo ?? MediaWikiServices::getInstance()->getNamespaceInfo();
400  $this->logger = $logger ?: new NullLogger();
401  $this->badFileLookup = $badFileLookup ??
402  MediaWikiServices::getInstance()->getBadFileLookup();
403  }
404 
408  public function __destruct() {
409  if ( isset( $this->mLinkHolders ) ) {
410  // @phan-suppress-next-line PhanTypeObjectUnsetDeclaredProperty
411  unset( $this->mLinkHolders );
412  }
413  // @phan-suppress-next-line PhanTypeSuspiciousNonTraversableForeach
414  foreach ( $this as $name => $value ) {
415  unset( $this->$name );
416  }
417  }
418 
422  public function __clone() {
423  $this->mInParse = false;
424 
425  // T58226: When you create a reference "to" an object field, that
426  // makes the object field itself be a reference too (until the other
427  // reference goes out of scope). When cloning, any field that's a
428  // reference is copied as a reference in the new object. Both of these
429  // are defined PHP5 behaviors, as inconvenient as it is for us when old
430  // hooks from PHP4 days are passing fields by reference.
431  foreach ( [ 'mStripState', 'mVarCache' ] as $k ) {
432  // Make a non-reference copy of the field, then rebind the field to
433  // reference the new copy.
434  $tmp = $this->$k;
435  $this->$k =& $tmp;
436  unset( $tmp );
437  }
438 
439  Hooks::run( 'ParserCloned', [ $this ] );
440  }
441 
449  public static function getDefaultPreprocessorClass() {
450  return Preprocessor_Hash::class;
451  }
452 
456  public function firstCallInit() {
457  if ( !$this->mFirstCall ) {
458  return;
459  }
460  $this->mFirstCall = false;
461 
463  CoreTagHooks::register( $this );
464  $this->initializeVariables();
465 
466  // Avoid PHP 7.1 warning from passing $this by reference
467  $parser = $this;
468  Hooks::run( 'ParserFirstCallInit', [ &$parser ] );
469  }
470 
476  public function clearState() {
477  $this->firstCallInit();
478  $this->resetOutput();
479  $this->mAutonumber = 0;
480  $this->mIncludeCount = [];
481  $this->mLinkHolders = new LinkHolderArray( $this );
482  $this->mLinkID = 0;
483  $this->mRevisionObject = $this->mRevisionTimestamp =
484  $this->mRevisionId = $this->mRevisionUser = $this->mRevisionSize = null;
485  $this->mVarCache = [];
486  $this->mUser = null;
487  $this->mLangLinkLanguages = [];
488  $this->currentRevisionCache = null;
489 
490  $this->mStripState = new StripState( $this );
491 
492  # Clear these on every parse, T6549
493  $this->mTplRedirCache = $this->mTplDomCache = [];
494 
495  $this->mShowToc = true;
496  $this->mForceTocPosition = false;
497  $this->mIncludeSizes = [
498  'post-expand' => 0,
499  'arg' => 0,
500  ];
501  $this->mPPNodeCount = 0;
502  $this->mGeneratedPPNodeCount = 0;
503  $this->mHighestExpansionDepth = 0;
504  $this->mDefaultSort = false;
505  $this->mHeadings = [];
506  $this->mDoubleUnderscores = [];
507  $this->mExpensiveFunctionCount = 0;
508 
509  # Fix cloning
510  if ( isset( $this->mPreprocessor ) && $this->mPreprocessor->parser !== $this ) {
511  $this->mPreprocessor = null;
512  }
513 
514  $this->mProfiler = new SectionProfiler();
515 
516  // Avoid PHP 7.1 warning from passing $this by reference
517  $parser = $this;
518  Hooks::run( 'ParserClearState', [ &$parser ] );
519  }
520 
524  public function resetOutput() {
525  $this->mOutput = new ParserOutput;
526  $this->mOptions->registerWatcher( [ $this->mOutput, 'recordOption' ] );
527  }
528 
546  public function parse(
547  $text, Title $title, ParserOptions $options,
548  $linestart = true, $clearState = true, $revid = null
549  ) {
550  if ( $clearState ) {
551  // We use U+007F DELETE to construct strip markers, so we have to make
552  // sure that this character does not occur in the input text.
553  $text = strtr( $text, "\x7f", "?" );
554  $magicScopeVariable = $this->lock();
555  }
556  // Strip U+0000 NULL (T159174)
557  $text = str_replace( "\000", '', $text );
558 
559  $this->startParse( $title, $options, self::OT_HTML, $clearState );
560 
561  $this->currentRevisionCache = null;
562  $this->mInputSize = strlen( $text );
563  if ( $this->mOptions->getEnableLimitReport() ) {
564  $this->mOutput->resetParseStartTime();
565  }
566 
567  $oldRevisionId = $this->mRevisionId;
568  $oldRevisionObject = $this->mRevisionObject;
569  $oldRevisionTimestamp = $this->mRevisionTimestamp;
570  $oldRevisionUser = $this->mRevisionUser;
571  $oldRevisionSize = $this->mRevisionSize;
572  if ( $revid !== null ) {
573  $this->mRevisionId = $revid;
574  $this->mRevisionObject = null;
575  $this->mRevisionTimestamp = null;
576  $this->mRevisionUser = null;
577  $this->mRevisionSize = null;
578  }
579 
580  // Avoid PHP 7.1 warning from passing $this by reference
581  $parser = $this;
582  Hooks::run( 'ParserBeforeStrip', [ &$parser, &$text, &$this->mStripState ] );
583  # No more strip!
584  Hooks::run( 'ParserAfterStrip', [ &$parser, &$text, &$this->mStripState ] );
585  $text = $this->internalParse( $text );
586  Hooks::run( 'ParserAfterParse', [ &$parser, &$text, &$this->mStripState ] );
587 
588  $text = $this->internalParseHalfParsed( $text, true, $linestart );
589 
597  if ( !( $options->getDisableTitleConversion()
598  || isset( $this->mDoubleUnderscores['nocontentconvert'] )
599  || isset( $this->mDoubleUnderscores['notitleconvert'] )
600  || $this->mOutput->getDisplayTitle() !== false )
601  ) {
602  $convruletitle = $this->getTargetLanguage()->getConvRuleTitle();
603  if ( $convruletitle ) {
604  $this->mOutput->setTitleText( $convruletitle );
605  } else {
606  $titleText = $this->getTargetLanguage()->convertTitle( $title );
607  $this->mOutput->setTitleText( $titleText );
608  }
609  }
610 
611  # Compute runtime adaptive expiry if set
612  $this->mOutput->finalizeAdaptiveCacheExpiry();
613 
614  # Warn if too many heavyweight parser functions were used
615  if ( $this->mExpensiveFunctionCount > $this->mOptions->getExpensiveParserFunctionLimit() ) {
616  $this->limitationWarn( 'expensive-parserfunction',
617  $this->mExpensiveFunctionCount,
618  $this->mOptions->getExpensiveParserFunctionLimit()
619  );
620  }
621 
622  # Information on limits, for the benefit of users who try to skirt them
623  if ( $this->mOptions->getEnableLimitReport() ) {
624  $text .= $this->makeLimitReport();
625  }
626 
627  # Wrap non-interface parser output in a <div> so it can be targeted
628  # with CSS (T37247)
629  $class = $this->mOptions->getWrapOutputClass();
630  if ( $class !== false && !$this->mOptions->getInterfaceMessage() ) {
631  $this->mOutput->addWrapperDivClass( $class );
632  }
633 
634  $this->mOutput->setText( $text );
635 
636  $this->mRevisionId = $oldRevisionId;
637  $this->mRevisionObject = $oldRevisionObject;
638  $this->mRevisionTimestamp = $oldRevisionTimestamp;
639  $this->mRevisionUser = $oldRevisionUser;
640  $this->mRevisionSize = $oldRevisionSize;
641  $this->mInputSize = false;
642  $this->currentRevisionCache = null;
643 
644  return $this->mOutput;
645  }
646 
653  protected function makeLimitReport() {
654  $maxIncludeSize = $this->mOptions->getMaxIncludeSize();
655 
656  $cpuTime = $this->mOutput->getTimeSinceStart( 'cpu' );
657  if ( $cpuTime !== null ) {
658  $this->mOutput->setLimitReportData( 'limitreport-cputime',
659  sprintf( "%.3f", $cpuTime )
660  );
661  }
662 
663  $wallTime = $this->mOutput->getTimeSinceStart( 'wall' );
664  $this->mOutput->setLimitReportData( 'limitreport-walltime',
665  sprintf( "%.3f", $wallTime )
666  );
667 
668  $this->mOutput->setLimitReportData( 'limitreport-ppvisitednodes',
669  [ $this->mPPNodeCount, $this->mOptions->getMaxPPNodeCount() ]
670  );
671  $this->mOutput->setLimitReportData( 'limitreport-ppgeneratednodes',
672  [ $this->mGeneratedPPNodeCount, $this->mOptions->getMaxGeneratedPPNodeCount() ]
673  );
674  $this->mOutput->setLimitReportData( 'limitreport-postexpandincludesize',
675  [ $this->mIncludeSizes['post-expand'], $maxIncludeSize ]
676  );
677  $this->mOutput->setLimitReportData( 'limitreport-templateargumentsize',
678  [ $this->mIncludeSizes['arg'], $maxIncludeSize ]
679  );
680  $this->mOutput->setLimitReportData( 'limitreport-expansiondepth',
681  [ $this->mHighestExpansionDepth, $this->mOptions->getMaxPPExpandDepth() ]
682  );
683  $this->mOutput->setLimitReportData( 'limitreport-expensivefunctioncount',
684  [ $this->mExpensiveFunctionCount, $this->mOptions->getExpensiveParserFunctionLimit() ]
685  );
686 
687  foreach ( $this->mStripState->getLimitReport() as list( $key, $value ) ) {
688  $this->mOutput->setLimitReportData( $key, $value );
689  }
690 
691  Hooks::run( 'ParserLimitReportPrepare', [ $this, $this->mOutput ] );
692 
693  $limitReport = "NewPP limit report\n";
694  if ( $this->svcOptions->get( 'ShowHostnames' ) ) {
695  $limitReport .= 'Parsed by ' . wfHostname() . "\n";
696  }
697  $limitReport .= 'Cached time: ' . $this->mOutput->getCacheTime() . "\n";
698  $limitReport .= 'Cache expiry: ' . $this->mOutput->getCacheExpiry() . "\n";
699  $limitReport .= 'Dynamic content: ' .
700  ( $this->mOutput->hasDynamicContent() ? 'true' : 'false' ) .
701  "\n";
702  $limitReport .= 'Complications: [' . implode( ', ', $this->mOutput->getAllFlags() ) . "]\n";
703 
704  foreach ( $this->mOutput->getLimitReportData() as $key => $value ) {
705  if ( Hooks::run( 'ParserLimitReportFormat',
706  [ $key, &$value, &$limitReport, false, false ]
707  ) ) {
708  $keyMsg = wfMessage( $key )->inLanguage( 'en' )->useDatabase( false );
709  $valueMsg = wfMessage( [ "$key-value-text", "$key-value" ] )
710  ->inLanguage( 'en' )->useDatabase( false );
711  if ( !$valueMsg->exists() ) {
712  $valueMsg = new RawMessage( '$1' );
713  }
714  if ( !$keyMsg->isDisabled() && !$valueMsg->isDisabled() ) {
715  $valueMsg->params( $value );
716  $limitReport .= "{$keyMsg->text()}: {$valueMsg->text()}\n";
717  }
718  }
719  }
720  // Since we're not really outputting HTML, decode the entities and
721  // then re-encode the things that need hiding inside HTML comments.
722  $limitReport = htmlspecialchars_decode( $limitReport );
723 
724  // Sanitize for comment. Note '‐' in the replacement is U+2010,
725  // which looks much like the problematic '-'.
726  $limitReport = str_replace( [ '-', '&' ], [ '‐', '&amp;' ], $limitReport );
727  $text = "\n<!-- \n$limitReport-->\n";
728 
729  // Add on template profiling data in human/machine readable way
730  $dataByFunc = $this->mProfiler->getFunctionStats();
731  uasort( $dataByFunc, function ( $a, $b ) {
732  return $b['real'] <=> $a['real']; // descending order
733  } );
734  $profileReport = [];
735  foreach ( array_slice( $dataByFunc, 0, 10 ) as $item ) {
736  $profileReport[] = sprintf( "%6.2f%% %8.3f %6d %s",
737  $item['%real'], $item['real'], $item['calls'],
738  htmlspecialchars( $item['name'] ) );
739  }
740  $text .= "<!--\nTransclusion expansion time report (%,ms,calls,template)\n";
741  $text .= implode( "\n", $profileReport ) . "\n-->\n";
742 
743  $this->mOutput->setLimitReportData( 'limitreport-timingprofile', $profileReport );
744 
745  // Add other cache related metadata
746  if ( $this->svcOptions->get( 'ShowHostnames' ) ) {
747  $this->mOutput->setLimitReportData( 'cachereport-origin', wfHostname() );
748  }
749  $this->mOutput->setLimitReportData( 'cachereport-timestamp',
750  $this->mOutput->getCacheTime() );
751  $this->mOutput->setLimitReportData( 'cachereport-ttl',
752  $this->mOutput->getCacheExpiry() );
753  $this->mOutput->setLimitReportData( 'cachereport-transientcontent',
754  $this->mOutput->hasDynamicContent() );
755 
756  if ( $this->mGeneratedPPNodeCount > $this->mOptions->getMaxGeneratedPPNodeCount() / 10 ) {
757  wfDebugLog( 'generated-pp-node-count', $this->mGeneratedPPNodeCount . ' ' .
758  $this->getTitle()->getPrefixedDBkey() );
759  }
760  return $text;
761  }
762 
787  public function recursiveTagParse( $text, $frame = false ) {
788  // Avoid PHP 7.1 warning from passing $this by reference
789  $parser = $this;
790  Hooks::run( 'ParserBeforeStrip', [ &$parser, &$text, &$this->mStripState ] );
791  Hooks::run( 'ParserAfterStrip', [ &$parser, &$text, &$this->mStripState ] );
792  $text = $this->internalParse( $text, false, $frame );
793  return $text;
794  }
795 
815  public function recursiveTagParseFully( $text, $frame = false ) {
816  $text = $this->recursiveTagParse( $text, $frame );
817  $text = $this->internalParseHalfParsed( $text, false );
818  return $text;
819  }
820 
840  public function parseExtensionTagAsTopLevelDoc( $text ) {
841  $text = $this->recursiveTagParse( $text );
842  $parser = $this;
843  Hooks::run( 'ParserAfterParse', [ &$parser, &$text, &$this->mStripState ] );
844  $text = $this->internalParseHalfParsed( $text, true );
845  return $text;
846  }
847 
859  public function preprocess( $text, ?Title $title,
860  ParserOptions $options, $revid = null, $frame = false
861  ) {
862  $magicScopeVariable = $this->lock();
863  $this->startParse( $title, $options, self::OT_PREPROCESS, true );
864  if ( $revid !== null ) {
865  $this->mRevisionId = $revid;
866  }
867  // Avoid PHP 7.1 warning from passing $this by reference
868  $parser = $this;
869  Hooks::run( 'ParserBeforeStrip', [ &$parser, &$text, &$this->mStripState ] );
870  Hooks::run( 'ParserAfterStrip', [ &$parser, &$text, &$this->mStripState ] );
871  $text = $this->replaceVariables( $text, $frame );
872  $text = $this->mStripState->unstripBoth( $text );
873  return $text;
874  }
875 
885  public function recursivePreprocess( $text, $frame = false ) {
886  $text = $this->replaceVariables( $text, $frame );
887  $text = $this->mStripState->unstripBoth( $text );
888  return $text;
889  }
890 
904  public function getPreloadText( $text, Title $title, ParserOptions $options, $params = [] ) {
905  $msg = new RawMessage( $text );
906  $text = $msg->params( $params )->plain();
907 
908  # Parser (re)initialisation
909  $magicScopeVariable = $this->lock();
910  $this->startParse( $title, $options, self::OT_PLAIN, true );
911 
913  $dom = $this->preprocessToDom( $text, self::PTD_FOR_INCLUSION );
914  $text = $this->getPreprocessor()->newFrame()->expand( $dom, $flags );
915  $text = $this->mStripState->unstripBoth( $text );
916  return $text;
917  }
918 
925  public function setUser( $user ) {
926  $this->mUser = $user;
927  }
928 
934  public function setTitle( Title $t = null ) {
935  if ( !$t ) {
936  $t = Title::makeTitle( NS_SPECIAL, 'Badtitle/Parser' );
937  }
938 
939  if ( $t->hasFragment() ) {
940  # Strip the fragment to avoid various odd effects
941  $this->mTitle = $t->createFragmentTarget( '' );
942  } else {
943  $this->mTitle = $t;
944  }
945  }
946 
952  public function getTitle() : Title {
953  return $this->mTitle;
954  }
955 
962  public function Title( Title $x = null ) : ?Title {
963  return wfSetVar( $this->mTitle, $x );
964  }
965 
971  public function setOutputType( $ot ) {
972  $this->mOutputType = $ot;
973  # Shortcut alias
974  $this->ot = [
975  'html' => $ot == self::OT_HTML,
976  'wiki' => $ot == self::OT_WIKI,
977  'pre' => $ot == self::OT_PREPROCESS,
978  'plain' => $ot == self::OT_PLAIN,
979  ];
980  }
981 
988  public function OutputType( $x = null ) {
989  return wfSetVar( $this->mOutputType, $x );
990  }
991 
997  public function getOutput() {
998  return $this->mOutput;
999  }
1000 
1006  public function getOptions() {
1007  return $this->mOptions;
1008  }
1009 
1016  public function Options( $x = null ) {
1017  return wfSetVar( $this->mOptions, $x );
1018  }
1019 
1023  public function nextLinkID() {
1024  return $this->mLinkID++;
1025  }
1026 
1030  public function setLinkID( $id ) {
1031  $this->mLinkID = $id;
1032  }
1033 
1038  public function getFunctionLang() {
1039  return $this->getTargetLanguage();
1040  }
1041 
1050  public function getTargetLanguage() {
1051  $target = $this->mOptions->getTargetLanguage();
1052 
1053  if ( $target !== null ) {
1054  return $target;
1055  } elseif ( $this->mOptions->getInterfaceMessage() ) {
1056  return $this->mOptions->getUserLangObj();
1057  }
1058 
1059  return $this->getTitle()->getPageLanguage();
1060  }
1061 
1068  public function getUser() {
1069  if ( !is_null( $this->mUser ) ) {
1070  return $this->mUser;
1071  }
1072  return $this->mOptions->getUser();
1073  }
1074 
1080  public function getPreprocessor() {
1081  if ( !isset( $this->mPreprocessor ) ) {
1082  $class = $this->svcOptions->get( 'preprocessorClass' );
1083  $this->mPreprocessor = new $class( $this );
1084  }
1085  return $this->mPreprocessor;
1086  }
1087 
1094  public function getLinkRenderer() {
1095  // XXX We make the LinkRenderer with current options and then cache it forever
1096  if ( !$this->mLinkRenderer ) {
1097  $this->mLinkRenderer = $this->linkRendererFactory->create();
1098  $this->mLinkRenderer->setStubThreshold(
1099  $this->getOptions()->getStubThreshold()
1100  );
1101  }
1102 
1103  return $this->mLinkRenderer;
1104  }
1105 
1112  public function getMagicWordFactory() {
1113  return $this->magicWordFactory;
1114  }
1115 
1122  public function getContentLanguage() {
1123  return $this->contLang;
1124  }
1125 
1145  public static function extractTagsAndParams( $elements, $text, &$matches ) {
1146  static $n = 1;
1147  $stripped = '';
1148  $matches = [];
1149 
1150  $taglist = implode( '|', $elements );
1151  $start = "/<($taglist)(\\s+[^>]*?|\\s*?)(\/?>)|<(!--)/i";
1152 
1153  while ( $text != '' ) {
1154  $p = preg_split( $start, $text, 2, PREG_SPLIT_DELIM_CAPTURE );
1155  $stripped .= $p[0];
1156  if ( count( $p ) < 5 ) {
1157  break;
1158  }
1159  if ( count( $p ) > 5 ) {
1160  # comment
1161  $element = $p[4];
1162  $attributes = '';
1163  $close = '';
1164  $inside = $p[5];
1165  } else {
1166  # tag
1167  list( , $element, $attributes, $close, $inside ) = $p;
1168  }
1169 
1170  $marker = self::MARKER_PREFIX . "-$element-" . sprintf( '%08X', $n++ ) . self::MARKER_SUFFIX;
1171  $stripped .= $marker;
1172 
1173  if ( $close === '/>' ) {
1174  # Empty element tag, <tag />
1175  $content = null;
1176  $text = $inside;
1177  $tail = null;
1178  } else {
1179  if ( $element === '!--' ) {
1180  $end = '/(-->)/';
1181  } else {
1182  $end = "/(<\\/$element\\s*>)/i";
1183  }
1184  $q = preg_split( $end, $inside, 2, PREG_SPLIT_DELIM_CAPTURE );
1185  $content = $q[0];
1186  if ( count( $q ) < 3 ) {
1187  # No end tag -- let it run out to the end of the text.
1188  $tail = '';
1189  $text = '';
1190  } else {
1191  list( , $tail, $text ) = $q;
1192  }
1193  }
1194 
1195  $matches[$marker] = [ $element,
1196  $content,
1197  Sanitizer::decodeTagAttributes( $attributes ),
1198  "<$element$attributes$close$content$tail" ];
1199  }
1200  return $stripped;
1201  }
1202 
1208  public function getStripList() {
1209  return $this->mStripList;
1210  }
1211 
1217  public function getStripState() {
1218  return $this->mStripState;
1219  }
1220 
1230  public function insertStripItem( $text ) {
1231  $marker = self::MARKER_PREFIX . "-item-{$this->mMarkerIndex}-" . self::MARKER_SUFFIX;
1232  $this->mMarkerIndex++;
1233  $this->mStripState->addGeneral( $marker, $text );
1234  return $marker;
1235  }
1236 
1243  private function handleTables( $text ) {
1244  $lines = StringUtils::explode( "\n", $text );
1245  $out = '';
1246  $td_history = []; # Is currently a td tag open?
1247  $last_tag_history = []; # Save history of last lag activated (td, th or caption)
1248  $tr_history = []; # Is currently a tr tag open?
1249  $tr_attributes = []; # history of tr attributes
1250  $has_opened_tr = []; # Did this table open a <tr> element?
1251  $indent_level = 0; # indent level of the table
1252 
1253  foreach ( $lines as $outLine ) {
1254  $line = trim( $outLine );
1255 
1256  if ( $line === '' ) { # empty line, go to next line
1257  $out .= $outLine . "\n";
1258  continue;
1259  }
1260 
1261  $first_character = $line[0];
1262  $first_two = substr( $line, 0, 2 );
1263  $matches = [];
1264 
1265  if ( preg_match( '/^(:*)\s*\{\|(.*)$/', $line, $matches ) ) {
1266  # First check if we are starting a new table
1267  $indent_level = strlen( $matches[1] );
1268 
1269  $attributes = $this->mStripState->unstripBoth( $matches[2] );
1270  $attributes = Sanitizer::fixTagAttributes( $attributes, 'table' );
1271 
1272  $outLine = str_repeat( '<dl><dd>', $indent_level ) . "<table{$attributes}>";
1273  array_push( $td_history, false );
1274  array_push( $last_tag_history, '' );
1275  array_push( $tr_history, false );
1276  array_push( $tr_attributes, '' );
1277  array_push( $has_opened_tr, false );
1278  } elseif ( count( $td_history ) == 0 ) {
1279  # Don't do any of the following
1280  $out .= $outLine . "\n";
1281  continue;
1282  } elseif ( $first_two === '|}' ) {
1283  # We are ending a table
1284  $line = '</table>' . substr( $line, 2 );
1285  $last_tag = array_pop( $last_tag_history );
1286 
1287  if ( !array_pop( $has_opened_tr ) ) {
1288  $line = "<tr><td></td></tr>{$line}";
1289  }
1290 
1291  if ( array_pop( $tr_history ) ) {
1292  $line = "</tr>{$line}";
1293  }
1294 
1295  if ( array_pop( $td_history ) ) {
1296  $line = "</{$last_tag}>{$line}";
1297  }
1298  array_pop( $tr_attributes );
1299  if ( $indent_level > 0 ) {
1300  $outLine = rtrim( $line ) . str_repeat( '</dd></dl>', $indent_level );
1301  } else {
1302  $outLine = $line;
1303  }
1304  } elseif ( $first_two === '|-' ) {
1305  # Now we have a table row
1306  $line = preg_replace( '#^\|-+#', '', $line );
1307 
1308  # Whats after the tag is now only attributes
1309  $attributes = $this->mStripState->unstripBoth( $line );
1310  $attributes = Sanitizer::fixTagAttributes( $attributes, 'tr' );
1311  array_pop( $tr_attributes );
1312  array_push( $tr_attributes, $attributes );
1313 
1314  $line = '';
1315  $last_tag = array_pop( $last_tag_history );
1316  array_pop( $has_opened_tr );
1317  array_push( $has_opened_tr, true );
1318 
1319  if ( array_pop( $tr_history ) ) {
1320  $line = '</tr>';
1321  }
1322 
1323  if ( array_pop( $td_history ) ) {
1324  $line = "</{$last_tag}>{$line}";
1325  }
1326 
1327  $outLine = $line;
1328  array_push( $tr_history, false );
1329  array_push( $td_history, false );
1330  array_push( $last_tag_history, '' );
1331  } elseif ( $first_character === '|'
1332  || $first_character === '!'
1333  || $first_two === '|+'
1334  ) {
1335  # This might be cell elements, td, th or captions
1336  if ( $first_two === '|+' ) {
1337  $first_character = '+';
1338  $line = substr( $line, 2 );
1339  } else {
1340  $line = substr( $line, 1 );
1341  }
1342 
1343  // Implies both are valid for table headings.
1344  if ( $first_character === '!' ) {
1345  $line = StringUtils::replaceMarkup( '!!', '||', $line );
1346  }
1347 
1348  # Split up multiple cells on the same line.
1349  # FIXME : This can result in improper nesting of tags processed
1350  # by earlier parser steps.
1351  $cells = explode( '||', $line );
1352 
1353  $outLine = '';
1354 
1355  # Loop through each table cell
1356  foreach ( $cells as $cell ) {
1357  $previous = '';
1358  if ( $first_character !== '+' ) {
1359  $tr_after = array_pop( $tr_attributes );
1360  if ( !array_pop( $tr_history ) ) {
1361  $previous = "<tr{$tr_after}>\n";
1362  }
1363  array_push( $tr_history, true );
1364  array_push( $tr_attributes, '' );
1365  array_pop( $has_opened_tr );
1366  array_push( $has_opened_tr, true );
1367  }
1368 
1369  $last_tag = array_pop( $last_tag_history );
1370 
1371  if ( array_pop( $td_history ) ) {
1372  $previous = "</{$last_tag}>\n{$previous}";
1373  }
1374 
1375  if ( $first_character === '|' ) {
1376  $last_tag = 'td';
1377  } elseif ( $first_character === '!' ) {
1378  $last_tag = 'th';
1379  } elseif ( $first_character === '+' ) {
1380  $last_tag = 'caption';
1381  } else {
1382  $last_tag = '';
1383  }
1384 
1385  array_push( $last_tag_history, $last_tag );
1386 
1387  # A cell could contain both parameters and data
1388  $cell_data = explode( '|', $cell, 2 );
1389 
1390  # T2553: Note that a '|' inside an invalid link should not
1391  # be mistaken as delimiting cell parameters
1392  # Bug T153140: Neither should language converter markup.
1393  if ( preg_match( '/\[\[|-\{/', $cell_data[0] ) === 1 ) {
1394  $cell = "{$previous}<{$last_tag}>" . trim( $cell );
1395  } elseif ( count( $cell_data ) == 1 ) {
1396  // Whitespace in cells is trimmed
1397  $cell = "{$previous}<{$last_tag}>" . trim( $cell_data[0] );
1398  } else {
1399  $attributes = $this->mStripState->unstripBoth( $cell_data[0] );
1400  $attributes = Sanitizer::fixTagAttributes( $attributes, $last_tag );
1401  // Whitespace in cells is trimmed
1402  $cell = "{$previous}<{$last_tag}{$attributes}>" . trim( $cell_data[1] );
1403  }
1404 
1405  $outLine .= $cell;
1406  array_push( $td_history, true );
1407  }
1408  }
1409  $out .= $outLine . "\n";
1410  }
1411 
1412  # Closing open td, tr && table
1413  while ( count( $td_history ) > 0 ) {
1414  if ( array_pop( $td_history ) ) {
1415  $out .= "</td>\n";
1416  }
1417  if ( array_pop( $tr_history ) ) {
1418  $out .= "</tr>\n";
1419  }
1420  if ( !array_pop( $has_opened_tr ) ) {
1421  $out .= "<tr><td></td></tr>\n";
1422  }
1423 
1424  $out .= "</table>\n";
1425  }
1426 
1427  # Remove trailing line-ending (b/c)
1428  if ( substr( $out, -1 ) === "\n" ) {
1429  $out = substr( $out, 0, -1 );
1430  }
1431 
1432  # special case: don't return empty table
1433  if ( $out === "<table>\n<tr><td></td></tr>\n</table>" ) {
1434  $out = '';
1435  }
1436 
1437  return $out;
1438  }
1439 
1453  public function internalParse( $text, $isMain = true, $frame = false ) {
1454  $origText = $text;
1455 
1456  // Avoid PHP 7.1 warning from passing $this by reference
1457  $parser = $this;
1458 
1459  # Hook to suspend the parser in this state
1460  if ( !Hooks::run( 'ParserBeforeInternalParse', [ &$parser, &$text, &$this->mStripState ] ) ) {
1461  return $text;
1462  }
1463 
1464  # if $frame is provided, then use $frame for replacing any variables
1465  if ( $frame ) {
1466  # use frame depth to infer how include/noinclude tags should be handled
1467  # depth=0 means this is the top-level document; otherwise it's an included document
1468  if ( !$frame->depth ) {
1469  $flag = 0;
1470  } else {
1471  $flag = self::PTD_FOR_INCLUSION;
1472  }
1473  $dom = $this->preprocessToDom( $text, $flag );
1474  $text = $frame->expand( $dom );
1475  } else {
1476  # if $frame is not provided, then use old-style replaceVariables
1477  $text = $this->replaceVariables( $text );
1478  }
1479 
1480  Hooks::run( 'InternalParseBeforeSanitize', [ &$parser, &$text, &$this->mStripState ] );
1481  $text = Sanitizer::removeHTMLtags(
1482  $text,
1483  [ $this, 'attributeStripCallback' ],
1484  false,
1485  array_keys( $this->mTransparentTagHooks ),
1486  [],
1487  [ $this, 'addTrackingCategory' ]
1488  );
1489  Hooks::run( 'InternalParseBeforeLinks', [ &$parser, &$text, &$this->mStripState ] );
1490 
1491  # Tables need to come after variable replacement for things to work
1492  # properly; putting them before other transformations should keep
1493  # exciting things like link expansions from showing up in surprising
1494  # places.
1495  $text = $this->handleTables( $text );
1496 
1497  $text = preg_replace( '/(^|\n)-----*/', '\\1<hr />', $text );
1498 
1499  $text = $this->handleDoubleUnderscore( $text );
1500 
1501  $text = $this->handleHeadings( $text );
1502  $text = $this->handleInternalLinks( $text );
1503  $text = $this->handleAllQuotes( $text );
1504  $text = $this->handleExternalLinks( $text );
1505 
1506  # handleInternalLinks may sometimes leave behind
1507  # absolute URLs, which have to be masked to hide them from handleExternalLinks
1508  $text = str_replace( self::MARKER_PREFIX . 'NOPARSE', '', $text );
1509 
1510  $text = $this->handleMagicLinks( $text );
1511  $text = $this->finalizeHeadings( $text, $origText, $isMain );
1512 
1513  return $text;
1514  }
1515 
1525  private function internalParseHalfParsed( $text, $isMain = true, $linestart = true ) {
1526  $text = $this->mStripState->unstripGeneral( $text );
1527 
1528  // Avoid PHP 7.1 warning from passing $this by reference
1529  $parser = $this;
1530 
1531  if ( $isMain ) {
1532  Hooks::run( 'ParserAfterUnstrip', [ &$parser, &$text ] );
1533  }
1534 
1535  # Clean up special characters, only run once, next-to-last before doBlockLevels
1536  $text = Sanitizer::armorFrenchSpaces( $text );
1537 
1538  $text = $this->doBlockLevels( $text, $linestart );
1539 
1540  $this->replaceLinkHoldersPrivate( $text );
1541 
1549  if ( !( $this->mOptions->getDisableContentConversion()
1550  || isset( $this->mDoubleUnderscores['nocontentconvert'] ) )
1551  && !$this->mOptions->getInterfaceMessage()
1552  ) {
1553  # The position of the convert() call should not be changed. it
1554  # assumes that the links are all replaced and the only thing left
1555  # is the <nowiki> mark.
1556  $text = $this->getTargetLanguage()->convert( $text );
1557  }
1558 
1559  $text = $this->mStripState->unstripNoWiki( $text );
1560 
1561  if ( $isMain ) {
1562  Hooks::run( 'ParserBeforeTidy', [ &$parser, &$text ] );
1563  }
1564 
1565  $text = $this->replaceTransparentTags( $text );
1566  $text = $this->mStripState->unstripGeneral( $text );
1567 
1568  $text = Sanitizer::normalizeCharReferences( $text );
1569 
1570  if ( MWTidy::isEnabled() ) {
1571  if ( $this->mOptions->getTidy() ) {
1572  $text = MWTidy::tidy( $text );
1573  }
1574  } else {
1575  # attempt to sanitize at least some nesting problems
1576  # (T4702 and quite a few others)
1577  # This code path is buggy and deprecated!
1578  wfDeprecated( 'disabling tidy', '1.33' );
1579  $tidyregs = [
1580  # ''Something [http://www.cool.com cool''] -->
1581  # <i>Something</i><a href="http://www.cool.com"..><i>cool></i></a>
1582  '/(<([bi])>)(<([bi])>)?([^<]*)(<\/?a[^<]*>)([^<]*)(<\/\\4>)?(<\/\\2>)/' =>
1583  '\\1\\3\\5\\8\\9\\6\\1\\3\\7\\8\\9',
1584  # fix up an anchor inside another anchor, only
1585  # at least for a single single nested link (T5695)
1586  '/(<a[^>]+>)([^<]*)(<a[^>]+>[^<]*)<\/a>(.*)<\/a>/' =>
1587  '\\1\\2</a>\\3</a>\\1\\4</a>',
1588  # fix div inside inline elements- doBlockLevels won't wrap a line which
1589  # contains a div, so fix it up here; replace
1590  # div with escaped text
1591  '/(<([aib]) [^>]+>)([^<]*)(<div([^>]*)>)(.*)(<\/div>)([^<]*)(<\/\\2>)/' =>
1592  '\\1\\3&lt;div\\5&gt;\\6&lt;/div&gt;\\8\\9',
1593  # remove empty italic or bold tag pairs, some
1594  # introduced by rules above
1595  '/<([bi])><\/\\1>/' => '',
1596  ];
1597 
1598  $text = preg_replace(
1599  array_keys( $tidyregs ),
1600  array_values( $tidyregs ),
1601  $text );
1602  }
1603 
1604  if ( $isMain ) {
1605  Hooks::run( 'ParserAfterTidy', [ &$parser, &$text ] );
1606  }
1607 
1608  return $text;
1609  }
1610 
1621  private function handleMagicLinks( $text ) {
1622  $prots = wfUrlProtocolsWithoutProtRel();
1623  $urlChar = self::EXT_LINK_URL_CLASS;
1624  $addr = self::EXT_LINK_ADDR;
1625  $space = self::SPACE_NOT_NL; # non-newline space
1626  $spdash = "(?:-|$space)"; # a dash or a non-newline space
1627  $spaces = "$space++"; # possessive match of 1 or more spaces
1628  $text = preg_replace_callback(
1629  '!(?: # Start cases
1630  (<a[ \t\r\n>].*?</a>) | # m[1]: Skip link text
1631  (<.*?>) | # m[2]: Skip stuff inside HTML elements' . "
1632  (\b # m[3]: Free external links
1633  (?i:$prots)
1634  ($addr$urlChar*) # m[4]: Post-protocol path
1635  ) |
1636  \b(?:RFC|PMID) $spaces # m[5]: RFC or PMID, capture number
1637  ([0-9]+)\b |
1638  \bISBN $spaces ( # m[6]: ISBN, capture number
1639  (?: 97[89] $spdash? )? # optional 13-digit ISBN prefix
1640  (?: [0-9] $spdash? ){9} # 9 digits with opt. delimiters
1641  [0-9Xx] # check digit
1642  )\b
1643  )!xu", [ $this, 'magicLinkCallback' ], $text );
1644  return $text;
1645  }
1646 
1652  public function magicLinkCallback( $m ) {
1653  if ( isset( $m[1] ) && $m[1] !== '' ) {
1654  # Skip anchor
1655  return $m[0];
1656  } elseif ( isset( $m[2] ) && $m[2] !== '' ) {
1657  # Skip HTML element
1658  return $m[0];
1659  } elseif ( isset( $m[3] ) && $m[3] !== '' ) {
1660  # Free external link
1661  return $this->makeFreeExternalLink( $m[0], strlen( $m[4] ) );
1662  } elseif ( isset( $m[5] ) && $m[5] !== '' ) {
1663  # RFC or PMID
1664  if ( substr( $m[0], 0, 3 ) === 'RFC' ) {
1665  if ( !$this->mOptions->getMagicRFCLinks() ) {
1666  return $m[0];
1667  }
1668  $keyword = 'RFC';
1669  $urlmsg = 'rfcurl';
1670  $cssClass = 'mw-magiclink-rfc';
1671  $trackingCat = 'magiclink-tracking-rfc';
1672  $id = $m[5];
1673  } elseif ( substr( $m[0], 0, 4 ) === 'PMID' ) {
1674  if ( !$this->mOptions->getMagicPMIDLinks() ) {
1675  return $m[0];
1676  }
1677  $keyword = 'PMID';
1678  $urlmsg = 'pubmedurl';
1679  $cssClass = 'mw-magiclink-pmid';
1680  $trackingCat = 'magiclink-tracking-pmid';
1681  $id = $m[5];
1682  } else {
1683  throw new MWException( __METHOD__ . ': unrecognised match type "' .
1684  substr( $m[0], 0, 20 ) . '"' );
1685  }
1686  $url = wfMessage( $urlmsg, $id )->inContentLanguage()->text();
1687  $this->addTrackingCategory( $trackingCat );
1688  return Linker::makeExternalLink(
1689  $url,
1690  "{$keyword} {$id}",
1691  true,
1692  $cssClass,
1693  [],
1694  $this->getTitle()
1695  );
1696  } elseif ( isset( $m[6] ) && $m[6] !== ''
1697  && $this->mOptions->getMagicISBNLinks()
1698  ) {
1699  # ISBN
1700  $isbn = $m[6];
1701  $space = self::SPACE_NOT_NL; # non-newline space
1702  $isbn = preg_replace( "/$space/", ' ', $isbn );
1703  $num = strtr( $isbn, [
1704  '-' => '',
1705  ' ' => '',
1706  'x' => 'X',
1707  ] );
1708  $this->addTrackingCategory( 'magiclink-tracking-isbn' );
1709  return $this->getLinkRenderer()->makeKnownLink(
1710  SpecialPage::getTitleFor( 'Booksources', $num ),
1711  "ISBN $isbn",
1712  [
1713  'class' => 'internal mw-magiclink-isbn',
1714  'title' => false // suppress title attribute
1715  ]
1716  );
1717  } else {
1718  return $m[0];
1719  }
1720  }
1721 
1731  public function makeFreeExternalLink( $url, $numPostProto ) {
1732  $trail = '';
1733 
1734  # The characters '<' and '>' (which were escaped by
1735  # removeHTMLtags()) should not be included in
1736  # URLs, per RFC 2396.
1737  # Make &nbsp; terminate a URL as well (bug T84937)
1738  $m2 = [];
1739  if ( preg_match(
1740  '/&(lt|gt|nbsp|#x0*(3[CcEe]|[Aa]0)|#0*(60|62|160));/',
1741  $url,
1742  $m2,
1743  PREG_OFFSET_CAPTURE
1744  ) ) {
1745  $trail = substr( $url, $m2[0][1] ) . $trail;
1746  $url = substr( $url, 0, $m2[0][1] );
1747  }
1748 
1749  # Move trailing punctuation to $trail
1750  $sep = ',;\.:!?';
1751  # If there is no left bracket, then consider right brackets fair game too
1752  if ( strpos( $url, '(' ) === false ) {
1753  $sep .= ')';
1754  }
1755 
1756  $urlRev = strrev( $url );
1757  $numSepChars = strspn( $urlRev, $sep );
1758  # Don't break a trailing HTML entity by moving the ; into $trail
1759  # This is in hot code, so use substr_compare to avoid having to
1760  # create a new string object for the comparison
1761  if ( $numSepChars && substr_compare( $url, ";", -$numSepChars, 1 ) === 0 ) {
1762  # more optimization: instead of running preg_match with a $
1763  # anchor, which can be slow, do the match on the reversed
1764  # string starting at the desired offset.
1765  # un-reversed regexp is: /&([a-z]+|#x[\da-f]+|#\d+)$/i
1766  if ( preg_match( '/\G([a-z]+|[\da-f]+x#|\d+#)&/i', $urlRev, $m2, 0, $numSepChars ) ) {
1767  $numSepChars--;
1768  }
1769  }
1770  if ( $numSepChars ) {
1771  $trail = substr( $url, -$numSepChars ) . $trail;
1772  $url = substr( $url, 0, -$numSepChars );
1773  }
1774 
1775  # Verify that we still have a real URL after trail removal, and
1776  # not just lone protocol
1777  if ( strlen( $trail ) >= $numPostProto ) {
1778  return $url . $trail;
1779  }
1780 
1781  $url = Sanitizer::cleanUrl( $url );
1782 
1783  # Is this an external image?
1784  $text = $this->maybeMakeExternalImage( $url );
1785  if ( $text === false ) {
1786  # Not an image, make a link
1787  $text = Linker::makeExternalLink( $url,
1788  $this->getTargetLanguage()->getConverter()->markNoConversion( $url ),
1789  true, 'free',
1790  $this->getExternalLinkAttribs( $url ), $this->getTitle() );
1791  # Register it in the output object...
1792  $this->mOutput->addExternalLink( $url );
1793  }
1794  return $text . $trail;
1795  }
1796 
1803  private function handleHeadings( $text ) {
1804  for ( $i = 6; $i >= 1; --$i ) {
1805  $h = str_repeat( '=', $i );
1806  // Trim non-newline whitespace from headings
1807  // Using \s* will break for: "==\n===\n" and parse as <h2>=</h2>
1808  $text = preg_replace( "/^(?:$h)[ \\t]*(.+?)[ \\t]*(?:$h)\\s*$/m", "<h$i>\\1</h$i>", $text );
1809  }
1810  return $text;
1811  }
1812 
1820  private function handleAllQuotes( $text ) {
1821  $outtext = '';
1822  $lines = StringUtils::explode( "\n", $text );
1823  foreach ( $lines as $line ) {
1824  $outtext .= $this->doQuotes( $line ) . "\n";
1825  }
1826  $outtext = substr( $outtext, 0, -1 );
1827  return $outtext;
1828  }
1829 
1838  public function doQuotes( $text ) {
1839  $arr = preg_split( "/(''+)/", $text, -1, PREG_SPLIT_DELIM_CAPTURE );
1840  $countarr = count( $arr );
1841  if ( $countarr == 1 ) {
1842  return $text;
1843  }
1844 
1845  // First, do some preliminary work. This may shift some apostrophes from
1846  // being mark-up to being text. It also counts the number of occurrences
1847  // of bold and italics mark-ups.
1848  $numbold = 0;
1849  $numitalics = 0;
1850  for ( $i = 1; $i < $countarr; $i += 2 ) {
1851  $thislen = strlen( $arr[$i] );
1852  // If there are ever four apostrophes, assume the first is supposed to
1853  // be text, and the remaining three constitute mark-up for bold text.
1854  // (T15227: ''''foo'''' turns into ' ''' foo ' ''')
1855  if ( $thislen == 4 ) {
1856  $arr[$i - 1] .= "'";
1857  $arr[$i] = "'''";
1858  $thislen = 3;
1859  } elseif ( $thislen > 5 ) {
1860  // If there are more than 5 apostrophes in a row, assume they're all
1861  // text except for the last 5.
1862  // (T15227: ''''''foo'''''' turns into ' ''''' foo ' ''''')
1863  $arr[$i - 1] .= str_repeat( "'", $thislen - 5 );
1864  $arr[$i] = "'''''";
1865  $thislen = 5;
1866  }
1867  // Count the number of occurrences of bold and italics mark-ups.
1868  if ( $thislen == 2 ) {
1869  $numitalics++;
1870  } elseif ( $thislen == 3 ) {
1871  $numbold++;
1872  } elseif ( $thislen == 5 ) {
1873  $numitalics++;
1874  $numbold++;
1875  }
1876  }
1877 
1878  // If there is an odd number of both bold and italics, it is likely
1879  // that one of the bold ones was meant to be an apostrophe followed
1880  // by italics. Which one we cannot know for certain, but it is more
1881  // likely to be one that has a single-letter word before it.
1882  if ( ( $numbold % 2 == 1 ) && ( $numitalics % 2 == 1 ) ) {
1883  $firstsingleletterword = -1;
1884  $firstmultiletterword = -1;
1885  $firstspace = -1;
1886  for ( $i = 1; $i < $countarr; $i += 2 ) {
1887  if ( strlen( $arr[$i] ) == 3 ) {
1888  $x1 = substr( $arr[$i - 1], -1 );
1889  $x2 = substr( $arr[$i - 1], -2, 1 );
1890  if ( $x1 === ' ' ) {
1891  if ( $firstspace == -1 ) {
1892  $firstspace = $i;
1893  }
1894  } elseif ( $x2 === ' ' ) {
1895  $firstsingleletterword = $i;
1896  // if $firstsingleletterword is set, we don't
1897  // look at the other options, so we can bail early.
1898  break;
1899  } elseif ( $firstmultiletterword == -1 ) {
1900  $firstmultiletterword = $i;
1901  }
1902  }
1903  }
1904 
1905  // If there is a single-letter word, use it!
1906  if ( $firstsingleletterword > -1 ) {
1907  $arr[$firstsingleletterword] = "''";
1908  $arr[$firstsingleletterword - 1] .= "'";
1909  } elseif ( $firstmultiletterword > -1 ) {
1910  // If not, but there's a multi-letter word, use that one.
1911  $arr[$firstmultiletterword] = "''";
1912  $arr[$firstmultiletterword - 1] .= "'";
1913  } elseif ( $firstspace > -1 ) {
1914  // ... otherwise use the first one that has neither.
1915  // (notice that it is possible for all three to be -1 if, for example,
1916  // there is only one pentuple-apostrophe in the line)
1917  $arr[$firstspace] = "''";
1918  $arr[$firstspace - 1] .= "'";
1919  }
1920  }
1921 
1922  // Now let's actually convert our apostrophic mush to HTML!
1923  $output = '';
1924  $buffer = '';
1925  $state = '';
1926  $i = 0;
1927  foreach ( $arr as $r ) {
1928  if ( ( $i % 2 ) == 0 ) {
1929  if ( $state === 'both' ) {
1930  $buffer .= $r;
1931  } else {
1932  $output .= $r;
1933  }
1934  } else {
1935  $thislen = strlen( $r );
1936  if ( $thislen == 2 ) {
1937  if ( $state === 'i' ) {
1938  $output .= '</i>';
1939  $state = '';
1940  } elseif ( $state === 'bi' ) {
1941  $output .= '</i>';
1942  $state = 'b';
1943  } elseif ( $state === 'ib' ) {
1944  $output .= '</b></i><b>';
1945  $state = 'b';
1946  } elseif ( $state === 'both' ) {
1947  $output .= '<b><i>' . $buffer . '</i>';
1948  $state = 'b';
1949  } else { // $state can be 'b' or ''
1950  $output .= '<i>';
1951  $state .= 'i';
1952  }
1953  } elseif ( $thislen == 3 ) {
1954  if ( $state === 'b' ) {
1955  $output .= '</b>';
1956  $state = '';
1957  } elseif ( $state === 'bi' ) {
1958  $output .= '</i></b><i>';
1959  $state = 'i';
1960  } elseif ( $state === 'ib' ) {
1961  $output .= '</b>';
1962  $state = 'i';
1963  } elseif ( $state === 'both' ) {
1964  $output .= '<i><b>' . $buffer . '</b>';
1965  $state = 'i';
1966  } else { // $state can be 'i' or ''
1967  $output .= '<b>';
1968  $state .= 'b';
1969  }
1970  } elseif ( $thislen == 5 ) {
1971  if ( $state === 'b' ) {
1972  $output .= '</b><i>';
1973  $state = 'i';
1974  } elseif ( $state === 'i' ) {
1975  $output .= '</i><b>';
1976  $state = 'b';
1977  } elseif ( $state === 'bi' ) {
1978  $output .= '</i></b>';
1979  $state = '';
1980  } elseif ( $state === 'ib' ) {
1981  $output .= '</b></i>';
1982  $state = '';
1983  } elseif ( $state === 'both' ) {
1984  $output .= '<i><b>' . $buffer . '</b></i>';
1985  $state = '';
1986  } else { // ($state == '')
1987  $buffer = '';
1988  $state = 'both';
1989  }
1990  }
1991  }
1992  $i++;
1993  }
1994  // Now close all remaining tags. Notice that the order is important.
1995  if ( $state === 'b' || $state === 'ib' ) {
1996  $output .= '</b>';
1997  }
1998  if ( $state === 'i' || $state === 'bi' || $state === 'ib' ) {
1999  $output .= '</i>';
2000  }
2001  if ( $state === 'bi' ) {
2002  $output .= '</b>';
2003  }
2004  // There might be lonely ''''', so make sure we have a buffer
2005  if ( $state === 'both' && $buffer ) {
2006  $output .= '<b><i>' . $buffer . '</i></b>';
2007  }
2008  return $output;
2009  }
2010 
2021  private function handleExternalLinks( $text ) {
2022  $bits = preg_split( $this->mExtLinkBracketedRegex, $text, -1, PREG_SPLIT_DELIM_CAPTURE );
2023  // @phan-suppress-next-line PhanTypeComparisonFromArray See phan issue #3161
2024  if ( $bits === false ) {
2025  throw new MWException( "PCRE needs to be compiled with "
2026  . "--enable-unicode-properties in order for MediaWiki to function" );
2027  }
2028  $s = array_shift( $bits );
2029 
2030  $i = 0;
2031  while ( $i < count( $bits ) ) {
2032  $url = $bits[$i++];
2033  $i++; // protocol
2034  $text = $bits[$i++];
2035  $trail = $bits[$i++];
2036 
2037  # The characters '<' and '>' (which were escaped by
2038  # removeHTMLtags()) should not be included in
2039  # URLs, per RFC 2396.
2040  $m2 = [];
2041  if ( preg_match( '/&(lt|gt);/', $url, $m2, PREG_OFFSET_CAPTURE ) ) {
2042  $text = substr( $url, $m2[0][1] ) . ' ' . $text;
2043  $url = substr( $url, 0, $m2[0][1] );
2044  }
2045 
2046  # If the link text is an image URL, replace it with an <img> tag
2047  # This happened by accident in the original parser, but some people used it extensively
2048  $img = $this->maybeMakeExternalImage( $text );
2049  if ( $img !== false ) {
2050  $text = $img;
2051  }
2052 
2053  $dtrail = '';
2054 
2055  # Set linktype for CSS
2056  $linktype = 'text';
2057 
2058  # No link text, e.g. [http://domain.tld/some.link]
2059  if ( $text == '' ) {
2060  # Autonumber
2061  $langObj = $this->getTargetLanguage();
2062  $text = '[' . $langObj->formatNum( ++$this->mAutonumber ) . ']';
2063  $linktype = 'autonumber';
2064  } else {
2065  # Have link text, e.g. [http://domain.tld/some.link text]s
2066  # Check for trail
2067  list( $dtrail, $trail ) = Linker::splitTrail( $trail );
2068  }
2069 
2070  // Excluding protocol-relative URLs may avoid many false positives.
2071  if ( preg_match( '/^(?:' . wfUrlProtocolsWithoutProtRel() . ')/', $text ) ) {
2072  $text = $this->getTargetLanguage()->getConverter()->markNoConversion( $text );
2073  }
2074 
2075  $url = Sanitizer::cleanUrl( $url );
2076 
2077  # Use the encoded URL
2078  # This means that users can paste URLs directly into the text
2079  # Funny characters like ö aren't valid in URLs anyway
2080  # This was changed in August 2004
2081  $s .= Linker::makeExternalLink( $url, $text, false, $linktype,
2082  $this->getExternalLinkAttribs( $url ), $this->getTitle() ) . $dtrail . $trail;
2083 
2084  # Register link in the output object.
2085  $this->mOutput->addExternalLink( $url );
2086  }
2087 
2088  return $s;
2089  }
2090 
2101  public static function getExternalLinkRel( $url = false, $title = null ) {
2103  $ns = $title ? $title->getNamespace() : false;
2104  if ( $wgNoFollowLinks && !in_array( $ns, $wgNoFollowNsExceptions )
2105  && !wfMatchesDomainList( $url, $wgNoFollowDomainExceptions )
2106  ) {
2107  return 'nofollow';
2108  }
2109  return null;
2110  }
2111 
2123  public function getExternalLinkAttribs( $url ) {
2124  $attribs = [];
2125  $rel = self::getExternalLinkRel( $url, $this->getTitle() );
2126 
2127  $target = $this->mOptions->getExternalLinkTarget();
2128  if ( $target ) {
2129  $attribs['target'] = $target;
2130  if ( !in_array( $target, [ '_self', '_parent', '_top' ] ) ) {
2131  // T133507. New windows can navigate parent cross-origin.
2132  // Including noreferrer due to lacking browser
2133  // support of noopener. Eventually noreferrer should be removed.
2134  if ( $rel !== '' ) {
2135  $rel .= ' ';
2136  }
2137  $rel .= 'noreferrer noopener';
2138  }
2139  }
2140  $attribs['rel'] = $rel;
2141  return $attribs;
2142  }
2143 
2154  public static function normalizeLinkUrl( $url ) {
2155  # Test for RFC 3986 IPv6 syntax
2156  $scheme = '[a-z][a-z0-9+.-]*:';
2157  $userinfo = '(?:[a-z0-9\-._~!$&\'()*+,;=:]|%[0-9a-f]{2})*';
2158  $ipv6Host = '\\[((?:[0-9a-f:]|%3[0-A]|%[46][1-6])+)\\]';
2159  if ( preg_match( "<^(?:{$scheme})?//(?:{$userinfo}@)?{$ipv6Host}(?:[:/?#].*|)$>i", $url, $m ) &&
2160  IP::isValid( rawurldecode( $m[1] ) )
2161  ) {
2162  $isIPv6 = rawurldecode( $m[1] );
2163  } else {
2164  $isIPv6 = false;
2165  }
2166 
2167  # Make sure unsafe characters are encoded
2168  $url = preg_replace_callback( '/[\x00-\x20"<>\[\\\\\]^`{|}\x7F-\xFF]/',
2169  function ( $m ) {
2170  return rawurlencode( $m[0] );
2171  },
2172  $url
2173  );
2174 
2175  $ret = '';
2176  $end = strlen( $url );
2177 
2178  # Fragment part - 'fragment'
2179  $start = strpos( $url, '#' );
2180  if ( $start !== false && $start < $end ) {
2181  $ret = self::normalizeUrlComponent(
2182  substr( $url, $start, $end - $start ), '"#%<>[\]^`{|}' ) . $ret;
2183  $end = $start;
2184  }
2185 
2186  # Query part - 'query' minus &=+;
2187  $start = strpos( $url, '?' );
2188  if ( $start !== false && $start < $end ) {
2189  $ret = self::normalizeUrlComponent(
2190  substr( $url, $start, $end - $start ), '"#%<>[\]^`{|}&=+;' ) . $ret;
2191  $end = $start;
2192  }
2193 
2194  # Scheme and path part - 'pchar'
2195  # (we assume no userinfo or encoded colons in the host)
2196  $ret = self::normalizeUrlComponent(
2197  substr( $url, 0, $end ), '"#%<>[\]^`{|}/?' ) . $ret;
2198 
2199  # Fix IPv6 syntax
2200  if ( $isIPv6 !== false ) {
2201  $ipv6Host = "%5B({$isIPv6})%5D";
2202  $ret = preg_replace(
2203  "<^((?:{$scheme})?//(?:{$userinfo}@)?){$ipv6Host}(?=[:/?#]|$)>i",
2204  "$1[$2]",
2205  $ret
2206  );
2207  }
2208 
2209  return $ret;
2210  }
2211 
2212  private static function normalizeUrlComponent( $component, $unsafe ) {
2213  $callback = function ( $matches ) use ( $unsafe ) {
2214  $char = urldecode( $matches[0] );
2215  $ord = ord( $char );
2216  if ( $ord > 32 && $ord < 127 && strpos( $unsafe, $char ) === false ) {
2217  # Unescape it
2218  return $char;
2219  } else {
2220  # Leave it escaped, but use uppercase for a-f
2221  return strtoupper( $matches[0] );
2222  }
2223  };
2224  return preg_replace_callback( '/%[0-9A-Fa-f]{2}/', $callback, $component );
2225  }
2226 
2235  private function maybeMakeExternalImage( $url ) {
2236  $imagesfrom = $this->mOptions->getAllowExternalImagesFrom();
2237  $imagesexception = !empty( $imagesfrom );
2238  $text = false;
2239  # $imagesfrom could be either a single string or an array of strings, parse out the latter
2240  if ( $imagesexception && is_array( $imagesfrom ) ) {
2241  $imagematch = false;
2242  foreach ( $imagesfrom as $match ) {
2243  if ( strpos( $url, $match ) === 0 ) {
2244  $imagematch = true;
2245  break;
2246  }
2247  }
2248  } elseif ( $imagesexception ) {
2249  $imagematch = ( strpos( $url, $imagesfrom ) === 0 );
2250  } else {
2251  $imagematch = false;
2252  }
2253 
2254  if ( $this->mOptions->getAllowExternalImages()
2255  || ( $imagesexception && $imagematch )
2256  ) {
2257  if ( preg_match( self::EXT_IMAGE_REGEX, $url ) ) {
2258  # Image found
2259  $text = Linker::makeExternalImage( $url );
2260  }
2261  }
2262  if ( !$text && $this->mOptions->getEnableImageWhitelist()
2263  && preg_match( self::EXT_IMAGE_REGEX, $url )
2264  ) {
2265  $whitelist = explode(
2266  "\n",
2267  wfMessage( 'external_image_whitelist' )->inContentLanguage()->text()
2268  );
2269 
2270  foreach ( $whitelist as $entry ) {
2271  # Sanitize the regex fragment, make it case-insensitive, ignore blank entries/comments
2272  if ( strpos( $entry, '#' ) === 0 || $entry === '' ) {
2273  continue;
2274  }
2275  if ( preg_match( '/' . str_replace( '/', '\\/', $entry ) . '/i', $url ) ) {
2276  # Image matches a whitelist entry
2277  $text = Linker::makeExternalImage( $url );
2278  break;
2279  }
2280  }
2281  }
2282  return $text;
2283  }
2284 
2292  private function handleInternalLinks( $text ) {
2293  $this->mLinkHolders->merge( $this->handleInternalLinks2( $text ) );
2294  return $text;
2295  }
2296 
2302  private function handleInternalLinks2( &$s ) {
2303  static $tc = false, $e1, $e1_img;
2304  # the % is needed to support urlencoded titles as well
2305  if ( !$tc ) {
2306  $tc = Title::legalChars() . '#%';
2307  # Match a link having the form [[namespace:link|alternate]]trail
2308  $e1 = "/^([{$tc}]+)(?:\\|(.+?))?]](.*)\$/sD";
2309  # Match cases where there is no "]]", which might still be images
2310  $e1_img = "/^([{$tc}]+)\\|(.*)\$/sD";
2311  }
2312 
2313  $holders = new LinkHolderArray( $this );
2314 
2315  # split the entire text string on occurrences of [[
2316  $a = StringUtils::explode( '[[', ' ' . $s );
2317  # get the first element (all text up to first [[), and remove the space we added
2318  $s = $a->current();
2319  $a->next();
2320  $line = $a->current(); # Workaround for broken ArrayIterator::next() that returns "void"
2321  $s = substr( $s, 1 );
2322 
2323  $nottalk = !$this->getTitle()->isTalkPage();
2324 
2325  $useLinkPrefixExtension = $this->getTargetLanguage()->linkPrefixExtension();
2326  $e2 = null;
2327  if ( $useLinkPrefixExtension ) {
2328  # Match the end of a line for a word that's not followed by whitespace,
2329  # e.g. in the case of 'The Arab al[[Razi]]', 'al' will be matched
2330  $charset = $this->contLang->linkPrefixCharset();
2331  $e2 = "/^((?>.*[^$charset]|))(.+)$/sDu";
2332  $m = [];
2333  if ( preg_match( $e2, $s, $m ) ) {
2334  $first_prefix = $m[2];
2335  } else {
2336  $first_prefix = false;
2337  }
2338  } else {
2339  $prefix = '';
2340  }
2341 
2342  # Some namespaces don't allow subpages
2343  $useSubpages = $this->nsInfo->hasSubpages(
2344  $this->getTitle()->getNamespace()
2345  );
2346 
2347  # Loop for each link
2348  for ( ; $line !== false && $line !== null; $a->next(), $line = $a->current() ) {
2349  # Check for excessive memory usage
2350  if ( $holders->isBig() ) {
2351  # Too big
2352  # Do the existence check, replace the link holders and clear the array
2353  $holders->replace( $s );
2354  $holders->clear();
2355  }
2356 
2357  if ( $useLinkPrefixExtension ) {
2358  if ( preg_match( $e2, $s, $m ) ) {
2359  list( , $s, $prefix ) = $m;
2360  } else {
2361  $prefix = '';
2362  }
2363  # first link
2364  if ( $first_prefix ) {
2365  $prefix = $first_prefix;
2366  $first_prefix = false;
2367  }
2368  }
2369 
2370  $might_be_img = false;
2371 
2372  if ( preg_match( $e1, $line, $m ) ) { # page with normal text or alt
2373  $text = $m[2];
2374  # If we get a ] at the beginning of $m[3] that means we have a link that's something like:
2375  # [[Image:Foo.jpg|[http://example.com desc]]] <- having three ] in a row fucks up,
2376  # the real problem is with the $e1 regex
2377  # See T1500.
2378  # Still some problems for cases where the ] is meant to be outside punctuation,
2379  # and no image is in sight. See T4095.
2380  if ( $text !== ''
2381  && substr( $m[3], 0, 1 ) === ']'
2382  && strpos( $text, '[' ) !== false
2383  ) {
2384  $text .= ']'; # so that handleExternalLinks($text) works later
2385  $m[3] = substr( $m[3], 1 );
2386  }
2387  # fix up urlencoded title texts
2388  if ( strpos( $m[1], '%' ) !== false ) {
2389  # Should anchors '#' also be rejected?
2390  $m[1] = str_replace( [ '<', '>' ], [ '&lt;', '&gt;' ], rawurldecode( $m[1] ) );
2391  }
2392  $trail = $m[3];
2393  } elseif ( preg_match( $e1_img, $line, $m ) ) {
2394  # Invalid, but might be an image with a link in its caption
2395  $might_be_img = true;
2396  $text = $m[2];
2397  if ( strpos( $m[1], '%' ) !== false ) {
2398  $m[1] = str_replace( [ '<', '>' ], [ '&lt;', '&gt;' ], rawurldecode( $m[1] ) );
2399  }
2400  $trail = "";
2401  } else { # Invalid form; output directly
2402  $s .= $prefix . '[[' . $line;
2403  continue;
2404  }
2405 
2406  $origLink = ltrim( $m[1], ' ' );
2407 
2408  # Don't allow internal links to pages containing
2409  # PROTO: where PROTO is a valid URL protocol; these
2410  # should be external links.
2411  if ( preg_match( '/^(?i:' . $this->mUrlProtocols . ')/', $origLink ) ) {
2412  $s .= $prefix . '[[' . $line;
2413  continue;
2414  }
2415 
2416  # Make subpage if necessary
2417  if ( $useSubpages ) {
2419  $this->getTitle(), $origLink, $text
2420  );
2421  } else {
2422  $link = $origLink;
2423  }
2424 
2425  // \x7f isn't a default legal title char, so most likely strip
2426  // markers will force us into the "invalid form" path above. But,
2427  // just in case, let's assert that xmlish tags aren't valid in
2428  // the title position.
2429  $unstrip = $this->mStripState->killMarkers( $link );
2430  $noMarkers = ( $unstrip === $link );
2431 
2432  $nt = $noMarkers ? Title::newFromText( $link ) : null;
2433  if ( $nt === null ) {
2434  $s .= $prefix . '[[' . $line;
2435  continue;
2436  }
2437 
2438  $ns = $nt->getNamespace();
2439  $iw = $nt->getInterwiki();
2440 
2441  $noforce = ( substr( $origLink, 0, 1 ) !== ':' );
2442 
2443  if ( $might_be_img ) { # if this is actually an invalid link
2444  if ( $ns == NS_FILE && $noforce ) { # but might be an image
2445  $found = false;
2446  while ( true ) {
2447  # look at the next 'line' to see if we can close it there
2448  $a->next();
2449  $next_line = $a->current();
2450  if ( $next_line === false || $next_line === null ) {
2451  break;
2452  }
2453  $m = explode( ']]', $next_line, 3 );
2454  if ( count( $m ) == 3 ) {
2455  # the first ]] closes the inner link, the second the image
2456  $found = true;
2457  $text .= "[[{$m[0]}]]{$m[1]}";
2458  $trail = $m[2];
2459  break;
2460  } elseif ( count( $m ) == 2 ) {
2461  # if there's exactly one ]] that's fine, we'll keep looking
2462  $text .= "[[{$m[0]}]]{$m[1]}";
2463  } else {
2464  # if $next_line is invalid too, we need look no further
2465  $text .= '[[' . $next_line;
2466  break;
2467  }
2468  }
2469  if ( !$found ) {
2470  # we couldn't find the end of this imageLink, so output it raw
2471  # but don't ignore what might be perfectly normal links in the text we've examined
2472  $holders->merge( $this->handleInternalLinks2( $text ) );
2473  $s .= "{$prefix}[[$link|$text";
2474  # note: no $trail, because without an end, there *is* no trail
2475  continue;
2476  }
2477  } else { # it's not an image, so output it raw
2478  $s .= "{$prefix}[[$link|$text";
2479  # note: no $trail, because without an end, there *is* no trail
2480  continue;
2481  }
2482  }
2483 
2484  $wasblank = ( $text == '' );
2485  if ( $wasblank ) {
2486  $text = $link;
2487  if ( !$noforce ) {
2488  # Strip off leading ':'
2489  $text = substr( $text, 1 );
2490  }
2491  } else {
2492  # T6598 madness. Handle the quotes only if they come from the alternate part
2493  # [[Lista d''e paise d''o munno]] -> <a href="...">Lista d''e paise d''o munno</a>
2494  # [[Criticism of Harry Potter|Criticism of ''Harry Potter'']]
2495  # -> <a href="Criticism of Harry Potter">Criticism of <i>Harry Potter</i></a>
2496  $text = $this->doQuotes( $text );
2497  }
2498 
2499  # Link not escaped by : , create the various objects
2500  if ( $noforce && !$nt->wasLocalInterwiki() ) {
2501  # Interwikis
2502  if (
2503  $iw && $this->mOptions->getInterwikiMagic() && $nottalk && (
2504  Language::fetchLanguageName( $iw, null, 'mw' ) ||
2505  in_array( $iw, $this->svcOptions->get( 'ExtraInterlanguageLinkPrefixes' ) )
2506  )
2507  ) {
2508  # T26502: filter duplicates
2509  if ( !isset( $this->mLangLinkLanguages[$iw] ) ) {
2510  $this->mLangLinkLanguages[$iw] = true;
2511  $this->mOutput->addLanguageLink( $nt->getFullText() );
2512  }
2513 
2517  $s = rtrim( $s . $prefix ) . $trail; # T175416
2518  continue;
2519  }
2520 
2521  if ( $ns == NS_FILE ) {
2522  if ( !$this->badFileLookup->isBadFile( $nt->getDBkey(), $this->getTitle() ) ) {
2523  if ( $wasblank ) {
2524  # if no parameters were passed, $text
2525  # becomes something like "File:Foo.png",
2526  # which we don't want to pass on to the
2527  # image generator
2528  $text = '';
2529  } else {
2530  # recursively parse links inside the image caption
2531  # actually, this will parse them in any other parameters, too,
2532  # but it might be hard to fix that, and it doesn't matter ATM
2533  $text = $this->handleExternalLinks( $text );
2534  $holders->merge( $this->handleInternalLinks2( $text ) );
2535  }
2536  # cloak any absolute URLs inside the image markup, so handleExternalLinks() won't touch them
2537  $s .= $prefix . $this->armorLinks(
2538  $this->makeImage( $nt, $text, $holders ) ) . $trail;
2539  continue;
2540  }
2541  } elseif ( $ns == NS_CATEGORY ) {
2545  $s = rtrim( $s . $prefix ) . $trail; # T2087, T87753
2546 
2547  if ( $wasblank ) {
2548  $sortkey = $this->getDefaultSort();
2549  } else {
2550  $sortkey = $text;
2551  }
2552  $sortkey = Sanitizer::decodeCharReferences( $sortkey );
2553  $sortkey = str_replace( "\n", '', $sortkey );
2554  $sortkey = $this->getTargetLanguage()->convertCategoryKey( $sortkey );
2555  $this->mOutput->addCategory( $nt->getDBkey(), $sortkey );
2556 
2557  continue;
2558  }
2559  }
2560 
2561  # Self-link checking. For some languages, variants of the title are checked in
2562  # LinkHolderArray::doVariants() to allow batching the existence checks necessary
2563  # for linking to a different variant.
2564  if ( $ns != NS_SPECIAL && $nt->equals( $this->getTitle() ) && !$nt->hasFragment() ) {
2565  $s .= $prefix . Linker::makeSelfLinkObj( $nt, $text, '', $trail );
2566  continue;
2567  }
2568 
2569  # NS_MEDIA is a pseudo-namespace for linking directly to a file
2570  # @todo FIXME: Should do batch file existence checks, see comment below
2571  if ( $ns == NS_MEDIA ) {
2572  # Give extensions a chance to select the file revision for us
2573  $options = [];
2574  $descQuery = false;
2575  Hooks::run( 'BeforeParserFetchFileAndTitle',
2576  [ $this, $nt, &$options, &$descQuery ] );
2577  # Fetch and register the file (file title may be different via hooks)
2578  list( $file, $nt ) = $this->fetchFileAndTitle( $nt, $options );
2579  # Cloak with NOPARSE to avoid replacement in handleExternalLinks
2580  $s .= $prefix . $this->armorLinks(
2581  Linker::makeMediaLinkFile( $nt, $file, $text ) ) . $trail;
2582  continue;
2583  }
2584 
2585  # Some titles, such as valid special pages or files in foreign repos, should
2586  # be shown as bluelinks even though they're not included in the page table
2587  # @todo FIXME: isAlwaysKnown() can be expensive for file links; we should really do
2588  # batch file existence checks for NS_FILE and NS_MEDIA
2589  if ( $iw == '' && $nt->isAlwaysKnown() ) {
2590  $this->mOutput->addLink( $nt );
2591  $s .= $this->makeKnownLinkHolder( $nt, $text, $trail, $prefix );
2592  } else {
2593  # Links will be added to the output link list after checking
2594  $s .= $holders->makeHolder( $nt, $text, [], $trail, $prefix );
2595  }
2596  }
2597  return $holders;
2598  }
2599 
2613  private function makeKnownLinkHolder( $nt, $text = '', $trail = '', $prefix = '' ) {
2614  list( $inside, $trail ) = Linker::splitTrail( $trail );
2615 
2616  if ( $text == '' ) {
2617  $text = htmlspecialchars( $nt->getPrefixedText() );
2618  }
2619 
2620  $link = $this->getLinkRenderer()->makeKnownLink(
2621  $nt, new HtmlArmor( "$prefix$text$inside" )
2622  );
2623 
2624  return $this->armorLinks( $link ) . $trail;
2625  }
2626 
2637  private function armorLinks( $text ) {
2638  return preg_replace( '/\b((?i)' . $this->mUrlProtocols . ')/',
2639  self::MARKER_PREFIX . "NOPARSE$1", $text );
2640  }
2641 
2650  public function doBlockLevels( $text, $linestart ) {
2651  return BlockLevelPass::doBlockLevels( $text, $linestart );
2652  }
2653 
2662  private function expandMagicVariable( $index, $frame = false ) {
2663  // Avoid PHP 7.1 warning from passing $this by reference
2664  $parser = $this;
2665 
2670  if (
2671  Hooks::run( 'ParserGetVariableValueVarCache', [ &$parser, &$this->mVarCache ] ) &&
2672  isset( $this->mVarCache[$index] )
2673  ) {
2674  return $this->mVarCache[$index];
2675  }
2676 
2677  $ts = wfTimestamp( TS_UNIX, $this->mOptions->getTimestamp() );
2678  Hooks::run( 'ParserGetVariableValueTs', [ &$parser, &$ts ] );
2679 
2680  $pageLang = $this->getFunctionLang();
2681 
2682  switch ( $index ) {
2683  case '!':
2684  $value = '|';
2685  break;
2686  case 'currentmonth':
2687  $value = $pageLang->formatNum( MWTimestamp::getInstance( $ts )->format( 'm' ), true );
2688  break;
2689  case 'currentmonth1':
2690  $value = $pageLang->formatNum( MWTimestamp::getInstance( $ts )->format( 'n' ), true );
2691  break;
2692  case 'currentmonthname':
2693  $value = $pageLang->getMonthName( MWTimestamp::getInstance( $ts )->format( 'n' ) );
2694  break;
2695  case 'currentmonthnamegen':
2696  $value = $pageLang->getMonthNameGen( MWTimestamp::getInstance( $ts )->format( 'n' ) );
2697  break;
2698  case 'currentmonthabbrev':
2699  $value = $pageLang->getMonthAbbreviation( MWTimestamp::getInstance( $ts )->format( 'n' ) );
2700  break;
2701  case 'currentday':
2702  $value = $pageLang->formatNum( MWTimestamp::getInstance( $ts )->format( 'j' ), true );
2703  break;
2704  case 'currentday2':
2705  $value = $pageLang->formatNum( MWTimestamp::getInstance( $ts )->format( 'd' ), true );
2706  break;
2707  case 'localmonth':
2708  $value = $pageLang->formatNum( MWTimestamp::getLocalInstance( $ts )->format( 'm' ), true );
2709  break;
2710  case 'localmonth1':
2711  $value = $pageLang->formatNum( MWTimestamp::getLocalInstance( $ts )->format( 'n' ), true );
2712  break;
2713  case 'localmonthname':
2714  $value = $pageLang->getMonthName( MWTimestamp::getLocalInstance( $ts )->format( 'n' ) );
2715  break;
2716  case 'localmonthnamegen':
2717  $value = $pageLang->getMonthNameGen( MWTimestamp::getLocalInstance( $ts )->format( 'n' ) );
2718  break;
2719  case 'localmonthabbrev':
2720  $value = $pageLang->getMonthAbbreviation( MWTimestamp::getLocalInstance( $ts )->format( 'n' ) );
2721  break;
2722  case 'localday':
2723  $value = $pageLang->formatNum( MWTimestamp::getLocalInstance( $ts )->format( 'j' ), true );
2724  break;
2725  case 'localday2':
2726  $value = $pageLang->formatNum( MWTimestamp::getLocalInstance( $ts )->format( 'd' ), true );
2727  break;
2728  case 'pagename':
2729  $value = wfEscapeWikiText( $this->getTitle()->getText() );
2730  break;
2731  case 'pagenamee':
2732  $value = wfEscapeWikiText( $this->getTitle()->getPartialURL() );
2733  break;
2734  case 'fullpagename':
2735  $value = wfEscapeWikiText( $this->getTitle()->getPrefixedText() );
2736  break;
2737  case 'fullpagenamee':
2738  $value = wfEscapeWikiText( $this->getTitle()->getPrefixedURL() );
2739  break;
2740  case 'subpagename':
2741  $value = wfEscapeWikiText( $this->getTitle()->getSubpageText() );
2742  break;
2743  case 'subpagenamee':
2744  $value = wfEscapeWikiText( $this->getTitle()->getSubpageUrlForm() );
2745  break;
2746  case 'rootpagename':
2747  $value = wfEscapeWikiText( $this->getTitle()->getRootText() );
2748  break;
2749  case 'rootpagenamee':
2750  $value = wfEscapeWikiText( wfUrlencode( str_replace(
2751  ' ',
2752  '_',
2753  $this->getTitle()->getRootText()
2754  ) ) );
2755  break;
2756  case 'basepagename':
2757  $value = wfEscapeWikiText( $this->getTitle()->getBaseText() );
2758  break;
2759  case 'basepagenamee':
2760  $value = wfEscapeWikiText( wfUrlencode( str_replace(
2761  ' ',
2762  '_',
2763  $this->getTitle()->getBaseText()
2764  ) ) );
2765  break;
2766  case 'talkpagename':
2767  if ( $this->getTitle()->canHaveTalkPage() ) {
2768  $talkPage = $this->getTitle()->getTalkPage();
2769  $value = wfEscapeWikiText( $talkPage->getPrefixedText() );
2770  } else {
2771  $value = '';
2772  }
2773  break;
2774  case 'talkpagenamee':
2775  if ( $this->getTitle()->canHaveTalkPage() ) {
2776  $talkPage = $this->getTitle()->getTalkPage();
2777  $value = wfEscapeWikiText( $talkPage->getPrefixedURL() );
2778  } else {
2779  $value = '';
2780  }
2781  break;
2782  case 'subjectpagename':
2783  $subjPage = $this->getTitle()->getSubjectPage();
2784  $value = wfEscapeWikiText( $subjPage->getPrefixedText() );
2785  break;
2786  case 'subjectpagenamee':
2787  $subjPage = $this->getTitle()->getSubjectPage();
2788  $value = wfEscapeWikiText( $subjPage->getPrefixedURL() );
2789  break;
2790  case 'pageid': // requested in T25427
2791  # Inform the edit saving system that getting the canonical output
2792  # after page insertion requires a parse that used that exact page ID
2793  $this->setOutputFlag( 'vary-page-id', '{{PAGEID}} used' );
2794  $value = $this->getTitle()->getArticleID();
2795  if ( !$value ) {
2796  $value = $this->mOptions->getSpeculativePageId();
2797  if ( $value ) {
2798  $this->mOutput->setSpeculativePageIdUsed( $value );
2799  }
2800  }
2801  break;
2802  case 'revisionid':
2803  if (
2804  $this->svcOptions->get( 'MiserMode' ) &&
2805  !$this->mOptions->getInterfaceMessage() &&
2806  // @TODO: disallow this word on all namespaces
2807  $this->nsInfo->isContent( $this->getTitle()->getNamespace() )
2808  ) {
2809  // Use a stub result instead of the actual revision ID in order to avoid
2810  // double parses on page save but still allow preview detection (T137900)
2811  if ( $this->getRevisionId() || $this->mOptions->getSpeculativeRevId() ) {
2812  $value = '-';
2813  } else {
2814  $this->setOutputFlag( 'vary-revision-exists', '{{REVISIONID}} used' );
2815  $value = '';
2816  }
2817  } else {
2818  # Inform the edit saving system that getting the canonical output after
2819  # revision insertion requires a parse that used that exact revision ID
2820  $this->setOutputFlag( 'vary-revision-id', '{{REVISIONID}} used' );
2821  $value = $this->getRevisionId();
2822  if ( $value === 0 ) {
2823  $rev = $this->getRevisionObject();
2824  $value = $rev ? $rev->getId() : $value;
2825  }
2826  if ( !$value ) {
2827  $value = $this->mOptions->getSpeculativeRevId();
2828  if ( $value ) {
2829  $this->mOutput->setSpeculativeRevIdUsed( $value );
2830  }
2831  }
2832  }
2833  break;
2834  case 'revisionday':
2835  $value = (int)$this->getRevisionTimestampSubstring( 6, 2, self::MAX_TTS, $index );
2836  break;
2837  case 'revisionday2':
2838  $value = $this->getRevisionTimestampSubstring( 6, 2, self::MAX_TTS, $index );
2839  break;
2840  case 'revisionmonth':
2841  $value = $this->getRevisionTimestampSubstring( 4, 2, self::MAX_TTS, $index );
2842  break;
2843  case 'revisionmonth1':
2844  $value = (int)$this->getRevisionTimestampSubstring( 4, 2, self::MAX_TTS, $index );
2845  break;
2846  case 'revisionyear':
2847  $value = $this->getRevisionTimestampSubstring( 0, 4, self::MAX_TTS, $index );
2848  break;
2849  case 'revisiontimestamp':
2850  $value = $this->getRevisionTimestampSubstring( 0, 14, self::MAX_TTS, $index );
2851  break;
2852  case 'revisionuser':
2853  # Inform the edit saving system that getting the canonical output after
2854  # revision insertion requires a parse that used the actual user ID
2855  $this->setOutputFlag( 'vary-user', '{{REVISIONUSER}} used' );
2856  $value = $this->getRevisionUser();
2857  break;
2858  case 'revisionsize':
2859  $value = $this->getRevisionSize();
2860  break;
2861  case 'namespace':
2862  $value = str_replace( '_', ' ',
2863  $this->contLang->getNsText( $this->getTitle()->getNamespace() ) );
2864  break;
2865  case 'namespacee':
2866  $value = wfUrlencode( $this->contLang->getNsText( $this->getTitle()->getNamespace() ) );
2867  break;
2868  case 'namespacenumber':
2869  $value = $this->getTitle()->getNamespace();
2870  break;
2871  case 'talkspace':
2872  $value = $this->getTitle()->canHaveTalkPage()
2873  ? str_replace( '_', ' ', $this->getTitle()->getTalkNsText() )
2874  : '';
2875  break;
2876  case 'talkspacee':
2877  $value = $this->getTitle()->canHaveTalkPage()
2878  ? wfUrlencode( $this->getTitle()->getTalkNsText() )
2879  : '';
2880  break;
2881  case 'subjectspace':
2882  $value = str_replace( '_', ' ', $this->getTitle()->getSubjectNsText() );
2883  break;
2884  case 'subjectspacee':
2885  $value = ( wfUrlencode( $this->getTitle()->getSubjectNsText() ) );
2886  break;
2887  case 'currentdayname':
2888  $value = $pageLang->getWeekdayName( (int)MWTimestamp::getInstance( $ts )->format( 'w' ) + 1 );
2889  break;
2890  case 'currentyear':
2891  $value = $pageLang->formatNum( MWTimestamp::getInstance( $ts )->format( 'Y' ), true );
2892  break;
2893  case 'currenttime':
2894  $value = $pageLang->time( wfTimestamp( TS_MW, $ts ), false, false );
2895  break;
2896  case 'currenthour':
2897  $value = $pageLang->formatNum( MWTimestamp::getInstance( $ts )->format( 'H' ), true );
2898  break;
2899  case 'currentweek':
2900  # @bug T6594 PHP5 has it zero padded, PHP4 does not, cast to
2901  # int to remove the padding
2902  $value = $pageLang->formatNum( (int)MWTimestamp::getInstance( $ts )->format( 'W' ) );
2903  break;
2904  case 'currentdow':
2905  $value = $pageLang->formatNum( MWTimestamp::getInstance( $ts )->format( 'w' ) );
2906  break;
2907  case 'localdayname':
2908  $value = $pageLang->getWeekdayName(
2909  (int)MWTimestamp::getLocalInstance( $ts )->format( 'w' ) + 1
2910  );
2911  break;
2912  case 'localyear':
2913  $value = $pageLang->formatNum( MWTimestamp::getLocalInstance( $ts )->format( 'Y' ), true );
2914  break;
2915  case 'localtime':
2916  $value = $pageLang->time(
2917  MWTimestamp::getLocalInstance( $ts )->format( 'YmdHis' ),
2918  false,
2919  false
2920  );
2921  break;
2922  case 'localhour':
2923  $value = $pageLang->formatNum( MWTimestamp::getLocalInstance( $ts )->format( 'H' ), true );
2924  break;
2925  case 'localweek':
2926  # @bug T6594 PHP5 has it zero padded, PHP4 does not, cast to
2927  # int to remove the padding
2928  $value = $pageLang->formatNum( (int)MWTimestamp::getLocalInstance( $ts )->format( 'W' ) );
2929  break;
2930  case 'localdow':
2931  $value = $pageLang->formatNum( MWTimestamp::getLocalInstance( $ts )->format( 'w' ) );
2932  break;
2933  case 'numberofarticles':
2934  $value = $pageLang->formatNum( SiteStats::articles() );
2935  break;
2936  case 'numberoffiles':
2937  $value = $pageLang->formatNum( SiteStats::images() );
2938  break;
2939  case 'numberofusers':
2940  $value = $pageLang->formatNum( SiteStats::users() );
2941  break;
2942  case 'numberofactiveusers':
2943  $value = $pageLang->formatNum( SiteStats::activeUsers() );
2944  break;
2945  case 'numberofpages':
2946  $value = $pageLang->formatNum( SiteStats::pages() );
2947  break;
2948  case 'numberofadmins':
2949  $value = $pageLang->formatNum( SiteStats::numberingroup( 'sysop' ) );
2950  break;
2951  case 'numberofedits':
2952  $value = $pageLang->formatNum( SiteStats::edits() );
2953  break;
2954  case 'currenttimestamp':
2955  $value = wfTimestamp( TS_MW, $ts );
2956  break;
2957  case 'localtimestamp':
2958  $value = MWTimestamp::getLocalInstance( $ts )->format( 'YmdHis' );
2959  break;
2960  case 'currentversion':
2961  $value = SpecialVersion::getVersion();
2962  break;
2963  case 'articlepath':
2964  return $this->svcOptions->get( 'ArticlePath' );
2965  case 'sitename':
2966  return $this->svcOptions->get( 'Sitename' );
2967  case 'server':
2968  return $this->svcOptions->get( 'Server' );
2969  case 'servername':
2970  return $this->svcOptions->get( 'ServerName' );
2971  case 'scriptpath':
2972  return $this->svcOptions->get( 'ScriptPath' );
2973  case 'stylepath':
2974  return $this->svcOptions->get( 'StylePath' );
2975  case 'directionmark':
2976  return $pageLang->getDirMark();
2977  case 'contentlanguage':
2978  return $this->svcOptions->get( 'LanguageCode' );
2979  case 'pagelanguage':
2980  $value = $pageLang->getCode();
2981  break;
2982  case 'cascadingsources':
2983  $value = CoreParserFunctions::cascadingsources( $this );
2984  break;
2985  default:
2986  $ret = null;
2987  Hooks::run(
2988  'ParserGetVariableValueSwitch',
2989  [ &$parser, &$this->mVarCache, &$index, &$ret, &$frame ]
2990  );
2991 
2992  return $ret;
2993  }
2994 
2995  if ( $index ) {
2996  $this->mVarCache[$index] = $value;
2997  }
2998 
2999  return $value;
3000  }
3001 
3009  private function getRevisionTimestampSubstring( $start, $len, $mtts, $variable ) {
3010  # Get the timezone-adjusted timestamp to be used for this revision
3011  $resNow = substr( $this->getRevisionTimestamp(), $start, $len );
3012  # Possibly set vary-revision if there is not yet an associated revision
3013  if ( !$this->getRevisionObject() ) {
3014  # Get the timezone-adjusted timestamp $mtts seconds in the future.
3015  # This future is relative to the current time and not that of the
3016  # parser options. The rendered timestamp can be compared to that
3017  # of the timestamp specified by the parser options.
3018  $resThen = substr(
3019  $this->contLang->userAdjust( wfTimestamp( TS_MW, time() + $mtts ), '' ),
3020  $start,
3021  $len
3022  );
3023 
3024  if ( $resNow !== $resThen ) {
3025  # Inform the edit saving system that getting the canonical output after
3026  # revision insertion requires a parse that used an actual revision timestamp
3027  $this->setOutputFlag( 'vary-revision-timestamp', "$variable used" );
3028  }
3029  }
3030 
3031  return $resNow;
3032  }
3033 
3038  private function initializeVariables() {
3039  $variableIDs = $this->magicWordFactory->getVariableIDs();
3040  $substIDs = $this->magicWordFactory->getSubstIDs();
3041 
3042  $this->mVariables = $this->magicWordFactory->newArray( $variableIDs );
3043  $this->mSubstWords = $this->magicWordFactory->newArray( $substIDs );
3044  }
3045 
3068  public function preprocessToDom( $text, $flags = 0 ) {
3069  $dom = $this->getPreprocessor()->preprocessToObj( $text, $flags );
3070  return $dom;
3071  }
3072 
3093  public function replaceVariables( $text, $frame = false, $argsOnly = false ) {
3094  # Is there any text? Also, Prevent too big inclusions!
3095  $textSize = strlen( $text );
3096  if ( $textSize < 1 || $textSize > $this->mOptions->getMaxIncludeSize() ) {
3097  return $text;
3098  }
3099 
3100  if ( $frame === false ) {
3101  $frame = $this->getPreprocessor()->newFrame();
3102  } elseif ( !( $frame instanceof PPFrame ) ) {
3103  $this->logger->debug(
3104  __METHOD__ . " called using plain parameters instead of " .
3105  "a PPFrame instance. Creating custom frame."
3106  );
3107  $frame = $this->getPreprocessor()->newCustomFrame( $frame );
3108  }
3109 
3110  $dom = $this->preprocessToDom( $text );
3111  $flags = $argsOnly ? PPFrame::NO_TEMPLATES : 0;
3112  $text = $frame->expand( $dom, $flags );
3113 
3114  return $text;
3115  }
3116 
3143  public function limitationWarn( $limitationType, $current = '', $max = '' ) {
3144  # does no harm if $current and $max are present but are unnecessary for the message
3145  # Not doing ->inLanguage( $this->mOptions->getUserLangObj() ), since this is shown
3146  # only during preview, and that would split the parser cache unnecessarily.
3147  $warning = wfMessage( "$limitationType-warning" )->numParams( $current, $max )
3148  ->text();
3149  $this->mOutput->addWarning( $warning );
3150  $this->addTrackingCategory( "$limitationType-category" );
3151  }
3152 
3166  public function braceSubstitution( $piece, $frame ) {
3167  // Flags
3168 
3169  // $text has been filled
3170  $found = false;
3171  // wiki markup in $text should be escaped
3172  $nowiki = false;
3173  // $text is HTML, armour it against wikitext transformation
3174  $isHTML = false;
3175  // Force interwiki transclusion to be done in raw mode not rendered
3176  $forceRawInterwiki = false;
3177  // $text is a DOM node needing expansion in a child frame
3178  $isChildObj = false;
3179  // $text is a DOM node needing expansion in the current frame
3180  $isLocalObj = false;
3181 
3182  # Title object, where $text came from
3183  $title = false;
3184 
3185  # $part1 is the bit before the first |, and must contain only title characters.
3186  # Various prefixes will be stripped from it later.
3187  $titleWithSpaces = $frame->expand( $piece['title'] );
3188  $part1 = trim( $titleWithSpaces );
3189  $titleText = false;
3190 
3191  # Original title text preserved for various purposes
3192  $originalTitle = $part1;
3193 
3194  # $args is a list of argument nodes, starting from index 0, not including $part1
3195  # @todo FIXME: If piece['parts'] is null then the call to getLength()
3196  # below won't work b/c this $args isn't an object
3197  $args = ( $piece['parts'] == null ) ? [] : $piece['parts'];
3198 
3199  $profileSection = null; // profile templates
3200 
3201  # SUBST
3202  if ( !$found ) {
3203  $substMatch = $this->mSubstWords->matchStartAndRemove( $part1 );
3204 
3205  # Possibilities for substMatch: "subst", "safesubst" or FALSE
3206  # Decide whether to expand template or keep wikitext as-is.
3207  if ( $this->ot['wiki'] ) {
3208  if ( $substMatch === false ) {
3209  $literal = true; # literal when in PST with no prefix
3210  } else {
3211  $literal = false; # expand when in PST with subst: or safesubst:
3212  }
3213  } else {
3214  if ( $substMatch == 'subst' ) {
3215  $literal = true; # literal when not in PST with plain subst:
3216  } else {
3217  $literal = false; # expand when not in PST with safesubst: or no prefix
3218  }
3219  }
3220  if ( $literal ) {
3221  $text = $frame->virtualBracketedImplode( '{{', '|', '}}', $titleWithSpaces, $args );
3222  $isLocalObj = true;
3223  $found = true;
3224  }
3225  }
3226 
3227  # Variables
3228  if ( !$found && $args->getLength() == 0 ) {
3229  $id = $this->mVariables->matchStartToEnd( $part1 );
3230  if ( $id !== false ) {
3231  $text = $this->expandMagicVariable( $id, $frame );
3232  if ( $this->magicWordFactory->getCacheTTL( $id ) > -1 ) {
3233  $this->mOutput->updateCacheExpiry(
3234  $this->magicWordFactory->getCacheTTL( $id ) );
3235  }
3236  $found = true;
3237  }
3238  }
3239 
3240  # MSG, MSGNW and RAW
3241  if ( !$found ) {
3242  # Check for MSGNW:
3243  $mwMsgnw = $this->magicWordFactory->get( 'msgnw' );
3244  if ( $mwMsgnw->matchStartAndRemove( $part1 ) ) {
3245  $nowiki = true;
3246  } else {
3247  # Remove obsolete MSG:
3248  $mwMsg = $this->magicWordFactory->get( 'msg' );
3249  $mwMsg->matchStartAndRemove( $part1 );
3250  }
3251 
3252  # Check for RAW:
3253  $mwRaw = $this->magicWordFactory->get( 'raw' );
3254  if ( $mwRaw->matchStartAndRemove( $part1 ) ) {
3255  $forceRawInterwiki = true;
3256  }
3257  }
3258 
3259  # Parser functions
3260  if ( !$found ) {
3261  $colonPos = strpos( $part1, ':' );
3262  if ( $colonPos !== false ) {
3263  $func = substr( $part1, 0, $colonPos );
3264  $funcArgs = [ trim( substr( $part1, $colonPos + 1 ) ) ];
3265  $argsLength = $args->getLength();
3266  for ( $i = 0; $i < $argsLength; $i++ ) {
3267  $funcArgs[] = $args->item( $i );
3268  }
3269 
3270  $result = $this->callParserFunction( $frame, $func, $funcArgs );
3271 
3272  // Extract any forwarded flags
3273  if ( isset( $result['title'] ) ) {
3274  $title = $result['title'];
3275  }
3276  if ( isset( $result['found'] ) ) {
3277  $found = $result['found'];
3278  }
3279  if ( array_key_exists( 'text', $result ) ) {
3280  // a string or null
3281  $text = $result['text'];
3282  }
3283  if ( isset( $result['nowiki'] ) ) {
3284  $nowiki = $result['nowiki'];
3285  }
3286  if ( isset( $result['isHTML'] ) ) {
3287  $isHTML = $result['isHTML'];
3288  }
3289  if ( isset( $result['forceRawInterwiki'] ) ) {
3290  $forceRawInterwiki = $result['forceRawInterwiki'];
3291  }
3292  if ( isset( $result['isChildObj'] ) ) {
3293  $isChildObj = $result['isChildObj'];
3294  }
3295  if ( isset( $result['isLocalObj'] ) ) {
3296  $isLocalObj = $result['isLocalObj'];
3297  }
3298  }
3299  }
3300 
3301  # Finish mangling title and then check for loops.
3302  # Set $title to a Title object and $titleText to the PDBK
3303  if ( !$found ) {
3304  $ns = NS_TEMPLATE;
3305  # Split the title into page and subpage
3306  $subpage = '';
3307  $relative = Linker::normalizeSubpageLink(
3308  $this->getTitle(), $part1, $subpage
3309  );
3310  if ( $part1 !== $relative ) {
3311  $part1 = $relative;
3312  $ns = $this->getTitle()->getNamespace();
3313  }
3314  $title = Title::newFromText( $part1, $ns );
3315  if ( $title ) {
3316  $titleText = $title->getPrefixedText();
3317  # Check for language variants if the template is not found
3318  if ( $this->getTargetLanguage()->hasVariants() && $title->getArticleID() == 0 ) {
3319  $this->getTargetLanguage()->findVariantLink( $part1, $title, true );
3320  }
3321  # Do recursion depth check
3322  $limit = $this->mOptions->getMaxTemplateDepth();
3323  if ( $frame->depth >= $limit ) {
3324  $found = true;
3325  $text = '<span class="error">'
3326  . wfMessage( 'parser-template-recursion-depth-warning' )
3327  ->numParams( $limit )->inContentLanguage()->text()
3328  . '</span>';
3329  }
3330  }
3331  }
3332 
3333  # Load from database
3334  if ( !$found && $title ) {
3335  $profileSection = $this->mProfiler->scopedProfileIn( $title->getPrefixedDBkey() );
3336  if ( !$title->isExternal() ) {
3337  if ( $title->isSpecialPage()
3338  && $this->mOptions->getAllowSpecialInclusion()
3339  && $this->ot['html']
3340  ) {
3341  $specialPage = $this->specialPageFactory->getPage( $title->getDBkey() );
3342  // Pass the template arguments as URL parameters.
3343  // "uselang" will have no effect since the Language object
3344  // is forced to the one defined in ParserOptions.
3345  $pageArgs = [];
3346  $argsLength = $args->getLength();
3347  for ( $i = 0; $i < $argsLength; $i++ ) {
3348  $bits = $args->item( $i )->splitArg();
3349  if ( strval( $bits['index'] ) === '' ) {
3350  $name = trim( $frame->expand( $bits['name'], PPFrame::STRIP_COMMENTS ) );
3351  $value = trim( $frame->expand( $bits['value'] ) );
3352  $pageArgs[$name] = $value;
3353  }
3354  }
3355 
3356  // Create a new context to execute the special page
3357  $context = new RequestContext;
3358  $context->setTitle( $title );
3359  $context->setRequest( new FauxRequest( $pageArgs ) );
3360  if ( $specialPage && $specialPage->maxIncludeCacheTime() === 0 ) {
3361  $context->setUser( $this->getUser() );
3362  } else {
3363  // If this page is cached, then we better not be per user.
3364  $context->setUser( User::newFromName( '127.0.0.1', false ) );
3365  }
3366  $context->setLanguage( $this->mOptions->getUserLangObj() );
3367  $ret = $this->specialPageFactory->capturePath( $title, $context, $this->getLinkRenderer() );
3368  if ( $ret ) {
3369  $text = $context->getOutput()->getHTML();
3370  $this->mOutput->addOutputPageMetadata( $context->getOutput() );
3371  $found = true;
3372  $isHTML = true;
3373  if ( $specialPage && $specialPage->maxIncludeCacheTime() !== false ) {
3374  $this->mOutput->updateRuntimeAdaptiveExpiry(
3375  $specialPage->maxIncludeCacheTime()
3376  );
3377  }
3378  }
3379  } elseif ( $this->nsInfo->isNonincludable( $title->getNamespace() ) ) {
3380  $found = false; # access denied
3381  $this->logger->debug(
3382  __METHOD__ .
3383  ": template inclusion denied for " . $title->getPrefixedDBkey()
3384  );
3385  } else {
3386  list( $text, $title ) = $this->getTemplateDom( $title );
3387  if ( $text !== false ) {
3388  $found = true;
3389  $isChildObj = true;
3390  }
3391  }
3392 
3393  # If the title is valid but undisplayable, make a link to it
3394  if ( !$found && ( $this->ot['html'] || $this->ot['pre'] ) ) {
3395  $text = "[[:$titleText]]";
3396  $found = true;
3397  }
3398  } elseif ( $title->isTrans() ) {
3399  # Interwiki transclusion
3400  if ( $this->ot['html'] && !$forceRawInterwiki ) {
3401  $text = $this->interwikiTransclude( $title, 'render' );
3402  $isHTML = true;
3403  } else {
3404  $text = $this->interwikiTransclude( $title, 'raw' );
3405  # Preprocess it like a template
3406  $text = $this->preprocessToDom( $text, self::PTD_FOR_INCLUSION );
3407  $isChildObj = true;
3408  }
3409  $found = true;
3410  }
3411 
3412  # Do infinite loop check
3413  # This has to be done after redirect resolution to avoid infinite loops via redirects
3414  if ( !$frame->loopCheck( $title ) ) {
3415  $found = true;
3416  $text = '<span class="error">'
3417  . wfMessage( 'parser-template-loop-warning', $titleText )->inContentLanguage()->text()
3418  . '</span>';
3419  $this->addTrackingCategory( 'template-loop-category' );
3420  $this->mOutput->addWarning( wfMessage( 'template-loop-warning',
3421  wfEscapeWikiText( $titleText ) )->text() );
3422  $this->logger->debug( __METHOD__ . ": template loop broken at '$titleText'" );
3423  }
3424  }
3425 
3426  # If we haven't found text to substitute by now, we're done
3427  # Recover the source wikitext and return it
3428  if ( !$found ) {
3429  $text = $frame->virtualBracketedImplode( '{{', '|', '}}', $titleWithSpaces, $args );
3430  if ( $profileSection ) {
3431  $this->mProfiler->scopedProfileOut( $profileSection );
3432  }
3433  return [ 'object' => $text ];
3434  }
3435 
3436  # Expand DOM-style return values in a child frame
3437  if ( $isChildObj ) {
3438  # Clean up argument array
3439  $newFrame = $frame->newChild( $args, $title );
3440 
3441  if ( $nowiki ) {
3442  $text = $newFrame->expand( $text, PPFrame::RECOVER_ORIG );
3443  } elseif ( $titleText !== false && $newFrame->isEmpty() ) {
3444  # Expansion is eligible for the empty-frame cache
3445  $text = $newFrame->cachedExpand( $titleText, $text );
3446  } else {
3447  # Uncached expansion
3448  $text = $newFrame->expand( $text );
3449  }
3450  }
3451  if ( $isLocalObj && $nowiki ) {
3452  $text = $frame->expand( $text, PPFrame::RECOVER_ORIG );
3453  $isLocalObj = false;
3454  }
3455 
3456  if ( $profileSection ) {
3457  $this->mProfiler->scopedProfileOut( $profileSection );
3458  }
3459 
3460  # Replace raw HTML by a placeholder
3461  if ( $isHTML ) {
3462  $text = $this->insertStripItem( $text );
3463  } elseif ( $nowiki && ( $this->ot['html'] || $this->ot['pre'] ) ) {
3464  # Escape nowiki-style return values
3465  $text = wfEscapeWikiText( $text );
3466  } elseif ( is_string( $text )
3467  && !$piece['lineStart']
3468  && preg_match( '/^(?:{\\||:|;|#|\*)/', $text )
3469  ) {
3470  # T2529: if the template begins with a table or block-level
3471  # element, it should be treated as beginning a new line.
3472  # This behavior is somewhat controversial.
3473  $text = "\n" . $text;
3474  }
3475 
3476  if ( is_string( $text ) && !$this->incrementIncludeSize( 'post-expand', strlen( $text ) ) ) {
3477  # Error, oversize inclusion
3478  if ( $titleText !== false ) {
3479  # Make a working, properly escaped link if possible (T25588)
3480  $text = "[[:$titleText]]";
3481  } else {
3482  # This will probably not be a working link, but at least it may
3483  # provide some hint of where the problem is
3484  preg_replace( '/^:/', '', $originalTitle );
3485  $text = "[[:$originalTitle]]";
3486  }
3487  $text .= $this->insertStripItem( '<!-- WARNING: template omitted, '
3488  . 'post-expand include size too large -->' );
3489  $this->limitationWarn( 'post-expand-template-inclusion' );
3490  }
3491 
3492  if ( $isLocalObj ) {
3493  $ret = [ 'object' => $text ];
3494  } else {
3495  $ret = [ 'text' => $text ];
3496  }
3497 
3498  return $ret;
3499  }
3500 
3519  public function callParserFunction( $frame, $function, array $args = [] ) {
3520  # Case sensitive functions
3521  if ( isset( $this->mFunctionSynonyms[1][$function] ) ) {
3522  $function = $this->mFunctionSynonyms[1][$function];
3523  } else {
3524  # Case insensitive functions
3525  $function = $this->contLang->lc( $function );
3526  if ( isset( $this->mFunctionSynonyms[0][$function] ) ) {
3527  $function = $this->mFunctionSynonyms[0][$function];
3528  } else {
3529  return [ 'found' => false ];
3530  }
3531  }
3532 
3533  list( $callback, $flags ) = $this->mFunctionHooks[$function];
3534 
3535  // Avoid PHP 7.1 warning from passing $this by reference
3536  $parser = $this;
3537 
3538  $allArgs = [ &$parser ];
3539  if ( $flags & self::SFH_OBJECT_ARGS ) {
3540  # Convert arguments to PPNodes and collect for appending to $allArgs
3541  $funcArgs = [];
3542  foreach ( $args as $k => $v ) {
3543  if ( $v instanceof PPNode || $k === 0 ) {
3544  $funcArgs[] = $v;
3545  } else {
3546  $funcArgs[] = $this->mPreprocessor->newPartNodeArray( [ $k => $v ] )->item( 0 );
3547  }
3548  }
3549 
3550  # Add a frame parameter, and pass the arguments as an array
3551  $allArgs[] = $frame;
3552  $allArgs[] = $funcArgs;
3553  } else {
3554  # Convert arguments to plain text and append to $allArgs
3555  foreach ( $args as $k => $v ) {
3556  if ( $v instanceof PPNode ) {
3557  $allArgs[] = trim( $frame->expand( $v ) );
3558  } elseif ( is_int( $k ) && $k >= 0 ) {
3559  $allArgs[] = trim( $v );
3560  } else {
3561  $allArgs[] = trim( "$k=$v" );
3562  }
3563  }
3564  }
3565 
3566  $result = $callback( ...$allArgs );
3567 
3568  # The interface for function hooks allows them to return a wikitext
3569  # string or an array containing the string and any flags. This mungs
3570  # things around to match what this method should return.
3571  if ( !is_array( $result ) ) {
3572  $result = [
3573  'found' => true,
3574  'text' => $result,
3575  ];
3576  } else {
3577  if ( isset( $result[0] ) && !isset( $result['text'] ) ) {
3578  $result['text'] = $result[0];
3579  }
3580  unset( $result[0] );
3581  $result += [
3582  'found' => true,
3583  ];
3584  }
3585 
3586  $noparse = true;
3587  $preprocessFlags = 0;
3588  if ( isset( $result['noparse'] ) ) {
3589  $noparse = $result['noparse'];
3590  }
3591  if ( isset( $result['preprocessFlags'] ) ) {
3592  $preprocessFlags = $result['preprocessFlags'];
3593  }
3594 
3595  if ( !$noparse ) {
3596  $result['text'] = $this->preprocessToDom( $result['text'], $preprocessFlags );
3597  $result['isChildObj'] = true;
3598  }
3599 
3600  return $result;
3601  }
3602 
3611  public function getTemplateDom( $title ) {
3612  $cacheTitle = $title;
3613  $titleText = $title->getPrefixedDBkey();
3614 
3615  if ( isset( $this->mTplRedirCache[$titleText] ) ) {
3616  list( $ns, $dbk ) = $this->mTplRedirCache[$titleText];
3617  $title = Title::makeTitle( $ns, $dbk );
3618  $titleText = $title->getPrefixedDBkey();
3619  }
3620  if ( isset( $this->mTplDomCache[$titleText] ) ) {
3621  return [ $this->mTplDomCache[$titleText], $title ];
3622  }
3623 
3624  # Cache miss, go to the database
3625  list( $text, $title ) = $this->fetchTemplateAndTitle( $title );
3626 
3627  if ( $text === false ) {
3628  $this->mTplDomCache[$titleText] = false;
3629  return [ false, $title ];
3630  }
3631 
3632  $dom = $this->preprocessToDom( $text, self::PTD_FOR_INCLUSION );
3633  $this->mTplDomCache[$titleText] = $dom;
3634 
3635  if ( !$title->equals( $cacheTitle ) ) {
3636  $this->mTplRedirCache[$cacheTitle->getPrefixedDBkey()] =
3637  [ $title->getNamespace(), $title->getDBkey() ];
3638  }
3639 
3640  return [ $dom, $title ];
3641  }
3642 
3655  $cacheKey = $title->getPrefixedDBkey();
3656  if ( !$this->currentRevisionCache ) {
3657  $this->currentRevisionCache = new MapCacheLRU( 100 );
3658  }
3659  if ( !$this->currentRevisionCache->has( $cacheKey ) ) {
3660  $this->currentRevisionCache->set( $cacheKey,
3661  // Defaults to Parser::statelessFetchRevision()
3662  call_user_func( $this->mOptions->getCurrentRevisionCallback(), $title, $this )
3663  );
3664  }
3665  return $this->currentRevisionCache->get( $cacheKey );
3666  }
3667 
3674  return (
3675  $this->currentRevisionCache &&
3676  $this->currentRevisionCache->has( $title->getPrefixedText() )
3677  );
3678  }
3679 
3689  public static function statelessFetchRevision( Title $title, $parser = false ) {
3690  $rev = Revision::newKnownCurrent( wfGetDB( DB_REPLICA ), $title );
3691 
3692  return $rev;
3693  }
3694 
3700  public function fetchTemplateAndTitle( $title ) {
3701  // Defaults to Parser::statelessFetchTemplate()
3702  $templateCb = $this->mOptions->getTemplateCallback();
3703  $stuff = call_user_func( $templateCb, $title, $this );
3704  $rev = $stuff['revision'] ?? null;
3705  $text = $stuff['text'];
3706  if ( is_string( $stuff['text'] ) ) {
3707  // We use U+007F DELETE to distinguish strip markers from regular text
3708  $text = strtr( $text, "\x7f", "?" );
3709  }
3710  $finalTitle = $stuff['finalTitle'] ?? $title;
3711  foreach ( ( $stuff['deps'] ?? [] ) as $dep ) {
3712  $this->mOutput->addTemplate( $dep['title'], $dep['page_id'], $dep['rev_id'] );
3713  if ( $dep['title']->equals( $this->getTitle() ) && $rev instanceof Revision ) {
3714  // Self-transclusion; final result may change based on the new page version
3715  $this->setOutputFlag( 'vary-revision-sha1', 'Self transclusion' );
3716  $this->getOutput()->setRevisionUsedSha1Base36( $rev->getSha1() );
3717  }
3718  }
3719 
3720  return [ $text, $finalTitle ];
3721  }
3722 
3728  public function fetchTemplate( $title ) {
3729  return $this->fetchTemplateAndTitle( $title )[0];
3730  }
3731 
3741  public static function statelessFetchTemplate( $title, $parser = false ) {
3742  $text = $skip = false;
3743  $finalTitle = $title;
3744  $deps = [];
3745  $rev = null;
3746 
3747  # Loop to fetch the article, with up to 1 redirect
3748  for ( $i = 0; $i < 2 && is_object( $title ); $i++ ) {
3749  # Give extensions a chance to select the revision instead
3750  $id = false; # Assume current
3751  Hooks::run( 'BeforeParserFetchTemplateAndtitle',
3752  [ $parser, $title, &$skip, &$id ] );
3753 
3754  if ( $skip ) {
3755  $text = false;
3756  $deps[] = [
3757  'title' => $title,
3758  'page_id' => $title->getArticleID(),
3759  'rev_id' => null
3760  ];
3761  break;
3762  }
3763  # Get the revision
3764  if ( $id ) {
3765  $rev = Revision::newFromId( $id );
3766  } elseif ( $parser ) {
3767  $rev = $parser->fetchCurrentRevisionOfTitle( $title );
3768  } else {
3769  $rev = Revision::newFromTitle( $title );
3770  }
3771  $rev_id = $rev ? $rev->getId() : 0;
3772  # If there is no current revision, there is no page
3773  if ( $id === false && !$rev ) {
3774  $linkCache = MediaWikiServices::getInstance()->getLinkCache();
3775  $linkCache->addBadLinkObj( $title );
3776  }
3777 
3778  $deps[] = [
3779  'title' => $title,
3780  'page_id' => $title->getArticleID(),
3781  'rev_id' => $rev_id
3782  ];
3783  if ( $rev && !$title->equals( $rev->getTitle() ) ) {
3784  # We fetched a rev from a different title; register it too...
3785  $deps[] = [
3786  'title' => $rev->getTitle(),
3787  'page_id' => $rev->getPage(),
3788  'rev_id' => $rev_id
3789  ];
3790  }
3791 
3792  if ( $rev ) {
3793  $content = $rev->getContent();
3794  $text = $content ? $content->getWikitextForTransclusion() : null;
3795 
3796  Hooks::run( 'ParserFetchTemplate',
3797  [ $parser, $title, $rev, &$text, &$deps ] );
3798 
3799  if ( $text === false || $text === null ) {
3800  $text = false;
3801  break;
3802  }
3803  } elseif ( $title->getNamespace() == NS_MEDIAWIKI ) {
3804  $message = wfMessage( MediaWikiServices::getInstance()->getContentLanguage()->
3805  lcfirst( $title->getText() ) )->inContentLanguage();
3806  if ( !$message->exists() ) {
3807  $text = false;
3808  break;
3809  }
3810  $content = $message->content();
3811  $text = $message->plain();
3812  } else {
3813  break;
3814  }
3815  if ( !$content ) {
3816  break;
3817  }
3818  # Redirect?
3819  $finalTitle = $title;
3820  $title = $content->getRedirectTarget();
3821  }
3822  return [
3823  'revision' => $rev,
3824  'text' => $text,
3825  'finalTitle' => $finalTitle,
3826  'deps' => $deps
3827  ];
3828  }
3829 
3837  public function fetchFileAndTitle( $title, $options = [] ) {
3838  $file = $this->fetchFileNoRegister( $title, $options );
3839 
3840  $time = $file ? $file->getTimestamp() : false;
3841  $sha1 = $file ? $file->getSha1() : false;
3842  # Register the file as a dependency...
3843  $this->mOutput->addImage( $title->getDBkey(), $time, $sha1 );
3844  if ( $file && !$title->equals( $file->getTitle() ) ) {
3845  # Update fetched file title
3846  $title = $file->getTitle();
3847  $this->mOutput->addImage( $title->getDBkey(), $time, $sha1 );
3848  }
3849  return [ $file, $title ];
3850  }
3851 
3862  protected function fetchFileNoRegister( $title, $options = [] ) {
3863  if ( isset( $options['broken'] ) ) {
3864  $file = false; // broken thumbnail forced by hook
3865  } elseif ( isset( $options['sha1'] ) ) { // get by (sha1,timestamp)
3866  $file = RepoGroup::singleton()->findFileFromKey( $options['sha1'], $options );
3867  } else { // get by (name,timestamp)
3868  $file = MediaWikiServices::getInstance()->getRepoGroup()->findFile( $title, $options );
3869  }
3870  return $file;
3871  }
3872 
3881  public function interwikiTransclude( $title, $action ) {
3882  if ( !$this->svcOptions->get( 'EnableScaryTranscluding' ) ) {
3883  return wfMessage( 'scarytranscludedisabled' )->inContentLanguage()->text();
3884  }
3885 
3886  $url = $title->getFullURL( [ 'action' => $action ] );
3887  if ( strlen( $url ) > 1024 ) {
3888  return wfMessage( 'scarytranscludetoolong' )->inContentLanguage()->text();
3889  }
3890 
3891  $wikiId = $title->getTransWikiID(); // remote wiki ID or false
3892 
3893  $fname = __METHOD__;
3894  $cache = MediaWikiServices::getInstance()->getMainWANObjectCache();
3895 
3896  $data = $cache->getWithSetCallback(
3897  $cache->makeGlobalKey(
3898  'interwiki-transclude',
3899  ( $wikiId !== false ) ? $wikiId : 'external',
3900  sha1( $url )
3901  ),
3902  $this->svcOptions->get( 'TranscludeCacheExpiry' ),
3903  function ( $oldValue, &$ttl ) use ( $url, $fname, $cache ) {
3904  $req = MWHttpRequest::factory( $url, [], $fname );
3905 
3906  $status = $req->execute(); // Status object
3907  if ( !$status->isOK() ) {
3908  $ttl = $cache::TTL_UNCACHEABLE;
3909  } elseif ( $req->getResponseHeader( 'X-Database-Lagged' ) !== null ) {
3910  $ttl = min( $cache::TTL_LAGGED, $ttl );
3911  }
3912 
3913  return [
3914  'text' => $status->isOK() ? $req->getContent() : null,
3915  'code' => $req->getStatus()
3916  ];
3917  },
3918  [
3919  'checkKeys' => ( $wikiId !== false )
3920  ? [ $cache->makeGlobalKey( 'interwiki-page', $wikiId, $title->getDBkey() ) ]
3921  : [],
3922  'pcGroup' => 'interwiki-transclude:5',
3923  'pcTTL' => $cache::TTL_PROC_LONG
3924  ]
3925  );
3926 
3927  if ( is_string( $data['text'] ) ) {
3928  $text = $data['text'];
3929  } elseif ( $data['code'] != 200 ) {
3930  // Though we failed to fetch the content, this status is useless.
3931  $text = wfMessage( 'scarytranscludefailed-httpstatus' )
3932  ->params( $url, $data['code'] )->inContentLanguage()->text();
3933  } else {
3934  $text = wfMessage( 'scarytranscludefailed', $url )->inContentLanguage()->text();
3935  }
3936 
3937  return $text;
3938  }
3939 
3949  public function argSubstitution( $piece, $frame ) {
3950  $error = false;
3951  $parts = $piece['parts'];
3952  $nameWithSpaces = $frame->expand( $piece['title'] );
3953  $argName = trim( $nameWithSpaces );
3954  $object = false;
3955  $text = $frame->getArgument( $argName );
3956  if ( $text === false && $parts->getLength() > 0
3957  && ( $this->ot['html']
3958  || $this->ot['pre']
3959  || ( $this->ot['wiki'] && $frame->isTemplate() )
3960  )
3961  ) {
3962  # No match in frame, use the supplied default
3963  $object = $parts->item( 0 )->getChildren();
3964  }
3965  if ( !$this->incrementIncludeSize( 'arg', strlen( $text ) ) ) {
3966  $error = '<!-- WARNING: argument omitted, expansion size too large -->';
3967  $this->limitationWarn( 'post-expand-template-argument' );
3968  }
3969 
3970  if ( $text === false && $object === false ) {
3971  # No match anywhere
3972  $object = $frame->virtualBracketedImplode( '{{{', '|', '}}}', $nameWithSpaces, $parts );
3973  }
3974  if ( $error !== false ) {
3975  $text .= $error;
3976  }
3977  if ( $object !== false ) {
3978  $ret = [ 'object' => $object ];
3979  } else {
3980  $ret = [ 'text' => $text ];
3981  }
3982 
3983  return $ret;
3984  }
3985 
4002  public function extensionSubstitution( $params, $frame ) {
4003  static $errorStr = '<span class="error">';
4004  static $errorLen = 20;
4005 
4006  $name = $frame->expand( $params['name'] );
4007  if ( substr( $name, 0, $errorLen ) === $errorStr ) {
4008  // Probably expansion depth or node count exceeded. Just punt the
4009  // error up.
4010  return $name;
4011  }
4012 
4013  $attrText = !isset( $params['attr'] ) ? null : $frame->expand( $params['attr'] );
4014  if ( substr( $attrText, 0, $errorLen ) === $errorStr ) {
4015  // See above
4016  return $attrText;
4017  }
4018 
4019  // We can't safely check if the expansion for $content resulted in an
4020  // error, because the content could happen to be the error string
4021  // (T149622).
4022  $content = !isset( $params['inner'] ) ? null : $frame->expand( $params['inner'] );
4023 
4024  $marker = self::MARKER_PREFIX . "-$name-"
4025  . sprintf( '%08X', $this->mMarkerIndex++ ) . self::MARKER_SUFFIX;
4026 
4027  $isFunctionTag = isset( $this->mFunctionTagHooks[strtolower( $name )] ) &&
4028  ( $this->ot['html'] || $this->ot['pre'] );
4029  if ( $isFunctionTag ) {
4030  $markerType = 'none';
4031  } else {
4032  $markerType = 'general';
4033  }
4034  if ( $this->ot['html'] || $isFunctionTag ) {
4035  $name = strtolower( $name );
4036  $attributes = Sanitizer::decodeTagAttributes( $attrText );
4037  if ( isset( $params['attributes'] ) ) {
4038  $attributes += $params['attributes'];
4039  }
4040 
4041  if ( isset( $this->mTagHooks[$name] ) ) {
4042  $output = call_user_func_array( $this->mTagHooks[$name],
4043  [ $content, $attributes, $this, $frame ] );
4044  } elseif ( isset( $this->mFunctionTagHooks[$name] ) ) {
4045  list( $callback, ) = $this->mFunctionTagHooks[$name];
4046 
4047  // Avoid PHP 7.1 warning from passing $this by reference
4048  $parser = $this;
4049  $output = call_user_func_array( $callback, [ &$parser, $frame, $content, $attributes ] );
4050  } else {
4051  $output = '<span class="error">Invalid tag extension name: ' .
4052  htmlspecialchars( $name ) . '</span>';
4053  }
4054 
4055  if ( is_array( $output ) ) {
4056  // Extract flags
4057  $flags = $output;
4058  $output = $flags[0];
4059  if ( isset( $flags['markerType'] ) ) {
4060  $markerType = $flags['markerType'];
4061  }
4062  }
4063  } else {
4064  if ( is_null( $attrText ) ) {
4065  $attrText = '';
4066  }
4067  if ( isset( $params['attributes'] ) ) {
4068  foreach ( $params['attributes'] as $attrName => $attrValue ) {
4069  $attrText .= ' ' . htmlspecialchars( $attrName ) . '="' .
4070  htmlspecialchars( $attrValue ) . '"';
4071  }
4072  }
4073  if ( $content === null ) {
4074  $output = "<$name$attrText/>";
4075  } else {
4076  $close = is_null( $params['close'] ) ? '' : $frame->expand( $params['close'] );
4077  if ( substr( $close, 0, $errorLen ) === $errorStr ) {
4078  // See above
4079  return $close;
4080  }
4081  $output = "<$name$attrText>$content$close";
4082  }
4083  }
4084 
4085  if ( $markerType === 'none' ) {
4086  return $output;
4087  } elseif ( $markerType === 'nowiki' ) {
4088  $this->mStripState->addNoWiki( $marker, $output );
4089  } elseif ( $markerType === 'general' ) {
4090  $this->mStripState->addGeneral( $marker, $output );
4091  } else {
4092  throw new MWException( __METHOD__ . ': invalid marker type' );
4093  }
4094  return $marker;
4095  }
4096 
4104  public function incrementIncludeSize( $type, $size ) {
4105  if ( $this->mIncludeSizes[$type] + $size > $this->mOptions->getMaxIncludeSize() ) {
4106  return false;
4107  } else {
4108  $this->mIncludeSizes[$type] += $size;
4109  return true;
4110  }
4111  }
4112 
4119  $this->mExpensiveFunctionCount++;
4120  return $this->mExpensiveFunctionCount <= $this->mOptions->getExpensiveParserFunctionLimit();
4121  }
4122 
4130  private function handleDoubleUnderscore( $text ) {
4131  # The position of __TOC__ needs to be recorded
4132  $mw = $this->magicWordFactory->get( 'toc' );
4133  if ( $mw->match( $text ) ) {
4134  $this->mShowToc = true;
4135  $this->mForceTocPosition = true;
4136 
4137  # Set a placeholder. At the end we'll fill it in with the TOC.
4138  $text = $mw->replace( '<!--MWTOC\'"-->', $text, 1 );
4139 
4140  # Only keep the first one.
4141  $text = $mw->replace( '', $text );
4142  }
4143 
4144  # Now match and remove the rest of them
4145  $mwa = $this->magicWordFactory->getDoubleUnderscoreArray();
4146  $this->mDoubleUnderscores = $mwa->matchAndRemove( $text );
4147 
4148  if ( isset( $this->mDoubleUnderscores['nogallery'] ) ) {
4149  $this->mOutput->mNoGallery = true;
4150  }
4151  if ( isset( $this->mDoubleUnderscores['notoc'] ) && !$this->mForceTocPosition ) {
4152  $this->mShowToc = false;
4153  }
4154  if ( isset( $this->mDoubleUnderscores['hiddencat'] )
4155  && $this->getTitle()->getNamespace() == NS_CATEGORY
4156  ) {
4157  $this->addTrackingCategory( 'hidden-category-category' );
4158  }
4159  # (T10068) Allow control over whether robots index a page.
4160  # __INDEX__ always overrides __NOINDEX__, see T16899
4161  if ( isset( $this->mDoubleUnderscores['noindex'] ) && $this->getTitle()->canUseNoindex() ) {
4162  $this->mOutput->setIndexPolicy( 'noindex' );
4163  $this->addTrackingCategory( 'noindex-category' );
4164  }
4165  if ( isset( $this->mDoubleUnderscores['index'] ) && $this->getTitle()->canUseNoindex() ) {
4166  $this->mOutput->setIndexPolicy( 'index' );
4167  $this->addTrackingCategory( 'index-category' );
4168  }
4169 
4170  # Cache all double underscores in the database
4171  foreach ( $this->mDoubleUnderscores as $key => $val ) {
4172  $this->mOutput->setProperty( $key, '' );
4173  }
4174 
4175  return $text;
4176  }
4177 
4183  public function addTrackingCategory( $msg ) {
4184  return $this->mOutput->addTrackingCategory( $msg, $this->getTitle() );
4185  }
4186 
4202  private function finalizeHeadings( $text, $origText, $isMain = true ) {
4203  # Inhibit editsection links if requested in the page
4204  if ( isset( $this->mDoubleUnderscores['noeditsection'] ) ) {
4205  $maybeShowEditLink = false;
4206  } else {
4207  $maybeShowEditLink = true; /* Actual presence will depend on post-cache transforms */
4208  }
4209 
4210  # Get all headlines for numbering them and adding funky stuff like [edit]
4211  # links - this is for later, but we need the number of headlines right now
4212  # NOTE: white space in headings have been trimmed in handleHeadings. They shouldn't
4213  # be trimmed here since whitespace in HTML headings is significant.
4214  $matches = [];
4215  $numMatches = preg_match_all(
4216  '/<H(?P<level>[1-6])(?P<attrib>.*?>)(?P<header>[\s\S]*?)<\/H[1-6] *>/i',
4217  $text,
4218  $matches
4219  );
4220 
4221  # if there are fewer than 4 headlines in the article, do not show TOC
4222  # unless it's been explicitly enabled.
4223  $enoughToc = $this->mShowToc &&
4224  ( ( $numMatches >= 4 ) || $this->mForceTocPosition );
4225 
4226  # Allow user to stipulate that a page should have a "new section"
4227  # link added via __NEWSECTIONLINK__
4228  if ( isset( $this->mDoubleUnderscores['newsectionlink'] ) ) {
4229  $this->mOutput->setNewSection( true );
4230  }
4231 
4232  # Allow user to remove the "new section"
4233  # link via __NONEWSECTIONLINK__
4234  if ( isset( $this->mDoubleUnderscores['nonewsectionlink'] ) ) {
4235  $this->mOutput->hideNewSection( true );
4236  }
4237 
4238  # if the string __FORCETOC__ (not case-sensitive) occurs in the HTML,
4239  # override above conditions and always show TOC above first header
4240  if ( isset( $this->mDoubleUnderscores['forcetoc'] ) ) {
4241  $this->mShowToc = true;
4242  $enoughToc = true;
4243  }
4244 
4245  # headline counter
4246  $headlineCount = 0;
4247  $numVisible = 0;
4248 
4249  # Ugh .. the TOC should have neat indentation levels which can be
4250  # passed to the skin functions. These are determined here
4251  $toc = '';
4252  $full = '';
4253  $head = [];
4254  $sublevelCount = [];
4255  $levelCount = [];
4256  $level = 0;
4257  $prevlevel = 0;
4258  $toclevel = 0;
4259  $prevtoclevel = 0;
4260  $markerRegex = self::MARKER_PREFIX . "-h-(\d+)-" . self::MARKER_SUFFIX;
4261  $baseTitleText = $this->getTitle()->getPrefixedDBkey();
4262  $oldType = $this->mOutputType;
4263  $this->setOutputType( self::OT_WIKI );
4264  $frame = $this->getPreprocessor()->newFrame();
4265  $root = $this->preprocessToDom( $origText );
4266  $node = $root->getFirstChild();
4267  $byteOffset = 0;
4268  $tocraw = [];
4269  $refers = [];
4270 
4271  $headlines = $numMatches !== false ? $matches[3] : [];
4272 
4273  $maxTocLevel = $this->svcOptions->get( 'MaxTocLevel' );
4274  foreach ( $headlines as $headline ) {
4275  $isTemplate = false;
4276  $titleText = false;
4277  $sectionIndex = false;
4278  $numbering = '';
4279  $markerMatches = [];
4280  if ( preg_match( "/^$markerRegex/", $headline, $markerMatches ) ) {
4281  $serial = $markerMatches[1];
4282  list( $titleText, $sectionIndex ) = $this->mHeadings[$serial];
4283  $isTemplate = ( $titleText != $baseTitleText );
4284  $headline = preg_replace( "/^$markerRegex\\s*/", "", $headline );
4285  }
4286 
4287  if ( $toclevel ) {
4288  $prevlevel = $level;
4289  }
4290  $level = $matches[1][$headlineCount];
4291 
4292  if ( $level > $prevlevel ) {
4293  # Increase TOC level
4294  $toclevel++;
4295  $sublevelCount[$toclevel] = 0;
4296  if ( $toclevel < $maxTocLevel ) {
4297  $prevtoclevel = $toclevel;
4298  $toc .= Linker::tocIndent();
4299  $numVisible++;
4300  }
4301  } elseif ( $level < $prevlevel && $toclevel > 1 ) {
4302  # Decrease TOC level, find level to jump to
4303 
4304  for ( $i = $toclevel; $i > 0; $i-- ) {
4305  if ( $levelCount[$i] == $level ) {
4306  # Found last matching level
4307  $toclevel = $i;
4308  break;
4309  } elseif ( $levelCount[$i] < $level ) {
4310  # Found first matching level below current level
4311  $toclevel = $i + 1;
4312  break;
4313  }
4314  }
4315  if ( $i == 0 ) {
4316  $toclevel = 1;
4317  }
4318  if ( $toclevel < $maxTocLevel ) {
4319  if ( $prevtoclevel < $maxTocLevel ) {
4320  # Unindent only if the previous toc level was shown :p
4321  $toc .= Linker::tocUnindent( $prevtoclevel - $toclevel );
4322  $prevtoclevel = $toclevel;
4323  } else {
4324  $toc .= Linker::tocLineEnd();
4325  }
4326  }
4327  } else {
4328  # No change in level, end TOC line
4329  if ( $toclevel < $maxTocLevel ) {
4330  $toc .= Linker::tocLineEnd();
4331  }
4332  }
4333 
4334  $levelCount[$toclevel] = $level;
4335 
4336  # count number of headlines for each level
4337  $sublevelCount[$toclevel]++;
4338  $dot = 0;
4339  for ( $i = 1; $i <= $toclevel; $i++ ) {
4340  if ( !empty( $sublevelCount[$i] ) ) {
4341  if ( $dot ) {
4342  $numbering .= '.';
4343  }
4344  $numbering .= $this->getTargetLanguage()->formatNum( $sublevelCount[$i] );
4345  $dot = 1;
4346  }
4347  }
4348 
4349  # The safe header is a version of the header text safe to use for links
4350 
4351  # Remove link placeholders by the link text.
4352  # <!--LINK number-->
4353  # turns into
4354  # link text with suffix
4355  # Do this before unstrip since link text can contain strip markers
4356  $safeHeadline = $this->replaceLinkHoldersText( $headline );
4357 
4358  # Avoid insertion of weird stuff like <math> by expanding the relevant sections
4359  $safeHeadline = $this->mStripState->unstripBoth( $safeHeadline );
4360 
4361  # Remove any <style> or <script> tags (T198618)
4362  $safeHeadline = preg_replace(
4363  '#<(style|script)(?: [^>]*[^>/])?>.*?</\1>#is',
4364  '',
4365  $safeHeadline
4366  );
4367 
4368  # Strip out HTML (first regex removes any tag not allowed)
4369  # Allowed tags are:
4370  # * <sup> and <sub> (T10393)
4371  # * <i> (T28375)
4372  # * <b> (r105284)
4373  # * <bdi> (T74884)
4374  # * <span dir="rtl"> and <span dir="ltr"> (T37167)
4375  # * <s> and <strike> (T35715)
4376  # We strip any parameter from accepted tags (second regex), except dir="rtl|ltr" from <span>,
4377  # to allow setting directionality in toc items.
4378  $tocline = preg_replace(
4379  [
4380  '#<(?!/?(span|sup|sub|bdi|i|b|s|strike)(?: [^>]*)?>).*?>#',
4381  '#<(/?(?:span(?: dir="(?:rtl|ltr)")?|sup|sub|bdi|i|b|s|strike))(?: .*?)?>#'
4382  ],
4383  [ '', '<$1>' ],
4384  $safeHeadline
4385  );
4386 
4387  # Strip '<span></span>', which is the result from the above if
4388  # <span id="foo"></span> is used to produce an additional anchor
4389  # for a section.
4390  $tocline = str_replace( '<span></span>', '', $tocline );
4391 
4392  $tocline = trim( $tocline );
4393 
4394  # For the anchor, strip out HTML-y stuff period
4395  $safeHeadline = preg_replace( '/<.*?>/', '', $safeHeadline );
4396  $safeHeadline = Sanitizer::normalizeSectionNameWhitespace( $safeHeadline );
4397 
4398  # Save headline for section edit hint before it's escaped
4399  $headlineHint = $safeHeadline;
4400 
4401  # Decode HTML entities
4402  $safeHeadline = Sanitizer::decodeCharReferences( $safeHeadline );
4403 
4404  $safeHeadline = self::normalizeSectionName( $safeHeadline );
4405 
4406  $fallbackHeadline = Sanitizer::escapeIdForAttribute( $safeHeadline, Sanitizer::ID_FALLBACK );
4407  $linkAnchor = Sanitizer::escapeIdForLink( $safeHeadline );
4408  $safeHeadline = Sanitizer::escapeIdForAttribute( $safeHeadline, Sanitizer::ID_PRIMARY );
4409  if ( $fallbackHeadline === $safeHeadline ) {
4410  # No reason to have both (in fact, we can't)
4411  $fallbackHeadline = false;
4412  }
4413 
4414  # HTML IDs must be case-insensitively unique for IE compatibility (T12721).
4415  # @todo FIXME: We may be changing them depending on the current locale.
4416  $arrayKey = strtolower( $safeHeadline );
4417  if ( $fallbackHeadline === false ) {
4418  $fallbackArrayKey = false;
4419  } else {
4420  $fallbackArrayKey = strtolower( $fallbackHeadline );
4421  }
4422 
4423  # Create the anchor for linking from the TOC to the section
4424  $anchor = $safeHeadline;
4425  $fallbackAnchor = $fallbackHeadline;
4426  if ( isset( $refers[$arrayKey] ) ) {
4427  // phpcs:ignore Generic.Formatting.DisallowMultipleStatements
4428  for ( $i = 2; isset( $refers["${arrayKey}_$i"] ); ++$i );
4429  $anchor .= "_$i";
4430  $linkAnchor .= "_$i";
4431  $refers["${arrayKey}_$i"] = true;
4432  } else {
4433  $refers[$arrayKey] = true;
4434  }
4435  if ( $fallbackHeadline !== false && isset( $refers[$fallbackArrayKey] ) ) {
4436  // phpcs:ignore Generic.Formatting.DisallowMultipleStatements
4437  for ( $i = 2; isset( $refers["${fallbackArrayKey}_$i"] ); ++$i );
4438  $fallbackAnchor .= "_$i";
4439  $refers["${fallbackArrayKey}_$i"] = true;
4440  } else {
4441  $refers[$fallbackArrayKey] = true;
4442  }
4443 
4444  # Don't number the heading if it is the only one (looks silly)
4445  if ( count( $matches[3] ) > 1 && $this->mOptions->getNumberHeadings() ) {
4446  # the two are different if the line contains a link
4447  $headline = Html::element(
4448  'span',
4449  [ 'class' => 'mw-headline-number' ],
4450  $numbering
4451  ) . ' ' . $headline;
4452  }
4453 
4454  if ( $enoughToc && ( !isset( $maxTocLevel ) || $toclevel < $maxTocLevel ) ) {
4455  $toc .= Linker::tocLine( $linkAnchor, $tocline,
4456  $numbering, $toclevel, ( $isTemplate ? false : $sectionIndex ) );
4457  }
4458 
4459  # Add the section to the section tree
4460  # Find the DOM node for this header
4461  $noOffset = ( $isTemplate || $sectionIndex === false );
4462  while ( $node && !$noOffset ) {
4463  if ( $node->getName() === 'h' ) {
4464  $bits = $node->splitHeading();
4465  if ( $bits['i'] == $sectionIndex ) {
4466  break;
4467  }
4468  }
4469  $byteOffset += mb_strlen( $this->mStripState->unstripBoth(
4470  $frame->expand( $node, PPFrame::RECOVER_ORIG ) ) );
4471  $node = $node->getNextSibling();
4472  }
4473  $tocraw[] = [
4474  'toclevel' => $toclevel,
4475  'level' => $level,
4476  'line' => $tocline,
4477  'number' => $numbering,
4478  'index' => ( $isTemplate ? 'T-' : '' ) . $sectionIndex,
4479  'fromtitle' => $titleText,
4480  'byteoffset' => ( $noOffset ? null : $byteOffset ),
4481  'anchor' => $anchor,
4482  ];
4483 
4484  # give headline the correct <h#> tag
4485  if ( $maybeShowEditLink && $sectionIndex !== false ) {
4486  // Output edit section links as markers with styles that can be customized by skins
4487  if ( $isTemplate ) {
4488  # Put a T flag in the section identifier, to indicate to extractSections()
4489  # that sections inside <includeonly> should be counted.
4490  $editsectionPage = $titleText;
4491  $editsectionSection = "T-$sectionIndex";
4492  $editsectionContent = null;
4493  } else {
4494  $editsectionPage = $this->getTitle()->getPrefixedText();
4495  $editsectionSection = $sectionIndex;
4496  $editsectionContent = $headlineHint;
4497  }
4498  // We use a bit of pesudo-xml for editsection markers. The
4499  // language converter is run later on. Using a UNIQ style marker
4500  // leads to the converter screwing up the tokens when it
4501  // converts stuff. And trying to insert strip tags fails too. At
4502  // this point all real inputted tags have already been escaped,
4503  // so we don't have to worry about a user trying to input one of
4504  // these markers directly. We use a page and section attribute
4505  // to stop the language converter from converting these
4506  // important bits of data, but put the headline hint inside a
4507  // content block because the language converter is supposed to
4508  // be able to convert that piece of data.
4509  // Gets replaced with html in ParserOutput::getText
4510  $editlink = '<mw:editsection page="' . htmlspecialchars( $editsectionPage );
4511  $editlink .= '" section="' . htmlspecialchars( $editsectionSection ) . '"';
4512  if ( $editsectionContent !== null ) {
4513  $editlink .= '>' . $editsectionContent . '</mw:editsection>';
4514  } else {
4515  $editlink .= '/>';
4516  }
4517  } else {
4518  $editlink = '';
4519  }
4520  $head[$headlineCount] = Linker::makeHeadline( $level,
4521  $matches['attrib'][$headlineCount], $anchor, $headline,
4522  $editlink, $fallbackAnchor );
4523 
4524  $headlineCount++;
4525  }
4526 
4527  $this->setOutputType( $oldType );
4528 
4529  # Never ever show TOC if no headers
4530  if ( $numVisible < 1 ) {
4531  $enoughToc = false;
4532  }
4533 
4534  if ( $enoughToc ) {
4535  if ( $prevtoclevel > 0 && $prevtoclevel < $maxTocLevel ) {
4536  $toc .= Linker::tocUnindent( $prevtoclevel - 1 );
4537  }
4538  $toc = Linker::tocList( $toc, $this->mOptions->getUserLangObj() );
4539  $this->mOutput->setTOCHTML( $toc );
4540  $toc = self::TOC_START . $toc . self::TOC_END;
4541  }
4542 
4543  if ( $isMain ) {
4544  $this->mOutput->setSections( $tocraw );
4545  }
4546 
4547  # split up and insert constructed headlines
4548  $blocks = preg_split( '/<H[1-6].*?>[\s\S]*?<\/H[1-6]>/i', $text );
4549  $i = 0;
4550 
4551  // build an array of document sections
4552  $sections = [];
4553  foreach ( $blocks as $block ) {
4554  // $head is zero-based, sections aren't.
4555  if ( empty( $head[$i - 1] ) ) {
4556  $sections[$i] = $block;
4557  } else {
4558  $sections[$i] = $head[$i - 1] . $block;
4559  }
4560 
4571  Hooks::run( 'ParserSectionCreate', [ $this, $i, &$sections[$i], $maybeShowEditLink ] );
4572 
4573  $i++;
4574  }
4575 
4576  if ( $enoughToc && $isMain && !$this->mForceTocPosition ) {
4577  // append the TOC at the beginning
4578  // Top anchor now in skin
4579  $sections[0] .= $toc . "\n";
4580  }
4581 
4582  $full .= implode( '', $sections );
4583 
4584  if ( $this->mForceTocPosition ) {
4585  return str_replace( '<!--MWTOC\'"-->', $toc, $full );
4586  } else {
4587  return $full;
4588  }
4589  }
4590 
4602  public function preSaveTransform( $text, Title $title, User $user,
4603  ParserOptions $options, $clearState = true
4604  ) {
4605  if ( $clearState ) {
4606  $magicScopeVariable = $this->lock();
4607  }
4608  $this->startParse( $title, $options, self::OT_WIKI, $clearState );
4609  $this->setUser( $user );
4610 
4611  // Strip U+0000 NULL (T159174)
4612  $text = str_replace( "\000", '', $text );
4613 
4614  // We still normalize line endings for backwards-compatibility
4615  // with other code that just calls PST, but this should already
4616  // be handled in TextContent subclasses
4617  $text = TextContent::normalizeLineEndings( $text );
4618 
4619  if ( $options->getPreSaveTransform() ) {
4620  $text = $this->pstPass2( $text, $user );
4621  }
4622  $text = $this->mStripState->unstripBoth( $text );
4623 
4624  $this->setUser( null ); # Reset
4625 
4626  return $text;
4627  }
4628 
4637  private function pstPass2( $text, $user ) {
4638  # Note: This is the timestamp saved as hardcoded wikitext to the database, we use
4639  # $this->contLang here in order to give everyone the same signature and use the default one
4640  # rather than the one selected in each user's preferences. (see also T14815)
4641  $ts = $this->mOptions->getTimestamp();
4642  $timestamp = MWTimestamp::getLocalInstance( $ts );
4643  $ts = $timestamp->format( 'YmdHis' );
4644  $tzMsg = $timestamp->getTimezoneMessage()->inContentLanguage()->text();
4645 
4646  $d = $this->contLang->timeanddate( $ts, false, false ) . " ($tzMsg)";
4647 
4648  # Variable replacement
4649  # Because mOutputType is OT_WIKI, this will only process {{subst:xxx}} type tags
4650  $text = $this->replaceVariables( $text );
4651 
4652  # This works almost by chance, as the replaceVariables are done before the getUserSig(),
4653  # which may corrupt this parser instance via its wfMessage()->text() call-
4654 
4655  # Signatures
4656  if ( strpos( $text, '~~~' ) !== false ) {
4657  $sigText = $this->getUserSig( $user );
4658  $text = strtr( $text, [
4659  '~~~~~' => $d,
4660  '~~~~' => "$sigText $d",
4661  '~~~' => $sigText
4662  ] );
4663  # The main two signature forms used above are time-sensitive
4664  $this->setOutputFlag( 'user-signature', 'User signature detected' );
4665  }
4666 
4667  # Context links ("pipe tricks"): [[|name]] and [[name (context)|]]
4668  $tc = '[' . Title::legalChars() . ']';
4669  $nc = '[ _0-9A-Za-z\x80-\xff-]'; # Namespaces can use non-ascii!
4670 
4671  // [[ns:page (context)|]]
4672  $p1 = "/\[\[(:?$nc+:|:|)($tc+?)( ?\\($tc+\\))\\|]]/";
4673  // [[ns:page(context)|]] (double-width brackets, added in r40257)
4674  $p4 = "/\[\[(:?$nc+:|:|)($tc+?)( ?($tc+))\\|]]/";
4675  // [[ns:page (context), context|]] (using either single or double-width comma)
4676  $p3 = "/\[\[(:?$nc+:|:|)($tc+?)( ?\\($tc+\\)|)((?:, |,)$tc+|)\\|]]/";
4677  // [[|page]] (reverse pipe trick: add context from page title)
4678  $p2 = "/\[\[\\|($tc+)]]/";
4679 
4680  # try $p1 first, to turn "[[A, B (C)|]]" into "[[A, B (C)|A, B]]"
4681  $text = preg_replace( $p1, '[[\\1\\2\\3|\\2]]', $text );
4682  $text = preg_replace( $p4, '[[\\1\\2\\3|\\2]]', $text );
4683  $text = preg_replace( $p3, '[[\\1\\2\\3\\4|\\2]]', $text );
4684 
4685  $t = $this->getTitle()->getText();
4686  $m = [];
4687  if ( preg_match( "/^($nc+:|)$tc+?( \\($tc+\\))$/", $t, $m ) ) {
4688  $text = preg_replace( $p2, "[[$m[1]\\1$m[2]|\\1]]", $text );
4689  } elseif ( preg_match( "/^($nc+:|)$tc+?(, $tc+|)$/", $t, $m ) && "$m[1]$m[2]" != '' ) {
4690  $text = preg_replace( $p2, "[[$m[1]\\1$m[2]|\\1]]", $text );
4691  } else {
4692  # if there's no context, don't bother duplicating the title
4693  $text = preg_replace( $p2, '[[\\1]]', $text );
4694  }
4695 
4696  return $text;
4697  }
4698 
4713  public function getUserSig( &$user, $nickname = false, $fancySig = null ) {
4714  $username = $user->getName();
4715 
4716  # If not given, retrieve from the user object.
4717  if ( $nickname === false ) {
4718  $nickname = $user->getOption( 'nickname' );
4719  }
4720 
4721  if ( is_null( $fancySig ) ) {
4722  $fancySig = $user->getBoolOption( 'fancysig' );
4723  }
4724 
4725  if ( $nickname === null || $nickname === '' ) {
4726  $nickname = $username;
4727  } elseif ( mb_strlen( $nickname ) > $this->svcOptions->get( 'MaxSigChars' ) ) {
4728  $nickname = $username;
4729  $this->logger->debug( __METHOD__ . ": $username has overlong signature." );
4730  } elseif ( $fancySig !== false ) {
4731  # Sig. might contain markup; validate this
4732  if ( $this->validateSig( $nickname ) !== false ) {
4733  # Validated; clean up (if needed) and return it
4734  return $this->cleanSig( $nickname, true );
4735  } else {
4736  # Failed to validate; fall back to the default
4737  $nickname = $username;
4738  $this->logger->debug( __METHOD__ . ": $username has bad XML tags in signature." );
4739  }
4740  }
4741 
4742  # Make sure nickname doesnt get a sig in a sig
4743  $nickname = self::cleanSigInSig( $nickname );
4744 
4745  # If we're still here, make it a link to the user page
4746  $userText = wfEscapeWikiText( $username );
4747  $nickText = wfEscapeWikiText( $nickname );
4748  $msgName = $user->isAnon() ? 'signature-anon' : 'signature';
4749 
4750  return wfMessage( $msgName, $userText, $nickText )->inContentLanguage()
4751  ->title( $this->getTitle() )->text();
4752  }
4753 
4760  public function validateSig( $text ) {
4761  return Xml::isWellFormedXmlFragment( $text ) ? $text : false;
4762  }
4763 
4774  public function cleanSig( $text, $parsing = false ) {
4775  if ( !$parsing ) {
4776  global $wgTitle;
4777  $magicScopeVariable = $this->lock();
4778  $this->startParse( $wgTitle, new ParserOptions, self::OT_PREPROCESS, true );
4779  }
4780 
4781  # Option to disable this feature
4782  if ( !$this->mOptions->getCleanSignatures() ) {
4783  return $text;
4784  }
4785 
4786  # @todo FIXME: Regex doesn't respect extension tags or nowiki
4787  # => Move this logic to braceSubstitution()
4788  $substWord = $this->magicWordFactory->get( 'subst' );
4789  $substRegex = '/\{\{(?!(?:' . $substWord->getBaseRegex() . '))/x' . $substWord->getRegexCase();
4790  $substText = '{{' . $substWord->getSynonym( 0 );
4791 
4792  $text = preg_replace( $substRegex, $substText, $text );
4793  $text = self::cleanSigInSig( $text );
4794  $dom = $this->preprocessToDom( $text );
4795  $frame = $this->getPreprocessor()->newFrame();
4796  $text = $frame->expand( $dom );
4797 
4798  if ( !$parsing ) {
4799  $text = $this->mStripState->unstripBoth( $text );
4800  }
4801 
4802  return $text;
4803  }
4804 
4811  public static function cleanSigInSig( $text ) {
4812  $text = preg_replace( '/~{3,5}/', '', $text );
4813  return $text;
4814  }
4815 
4826  public function startExternalParse( ?Title $title, ParserOptions $options,
4827  $outputType, $clearState = true, $revId = null
4828  ) {
4829  $this->startParse( $title, $options, $outputType, $clearState );
4830  if ( $revId !== null ) {
4831  $this->mRevisionId = $revId;
4832  }
4833  }
4834 
4841  private function startParse( ?Title $title, ParserOptions $options,
4842  $outputType, $clearState = true
4843  ) {
4844  $this->setTitle( $title );
4845  $this->mOptions = $options;
4846  $this->setOutputType( $outputType );
4847  if ( $clearState ) {
4848  $this->clearState();
4849  }
4850  }
4851 
4860  public function transformMsg( $text, $options, $title = null ) {
4861  static $executing = false;
4862 
4863  # Guard against infinite recursion
4864  if ( $executing ) {
4865  return $text;
4866  }
4867  $executing = true;
4868 
4869  if ( !$title ) {
4870  global $wgTitle;
4871  $title = $wgTitle;
4872  }
4873 
4874  $text = $this->preprocess( $text, $title, $options );
4875 
4876  $executing = false;
4877  return $text;
4878  }
4879 
4904  public function setHook( $tag, callable $callback ) {
4905  $tag = strtolower( $tag );
4906  if ( preg_match( '/[<>\r\n]/', $tag, $m ) ) {
4907  throw new MWException( "Invalid character {$m[0]} in setHook('$tag', ...) call" );
4908  }
4909  $oldVal = $this->mTagHooks[$tag] ?? null;
4910  $this->mTagHooks[$tag] = $callback;
4911  if ( !in_array( $tag, $this->mStripList ) ) {
4912  $this->mStripList[] = $tag;
4913  }
4914 
4915  return $oldVal;
4916  }
4917 
4935  public function setTransparentTagHook( $tag, callable $callback ) {
4936  $tag = strtolower( $tag );
4937  if ( preg_match( '/[<>\r\n]/', $tag, $m ) ) {
4938  throw new MWException( "Invalid character {$m[0]} in setTransparentHook('$tag', ...) call" );
4939  }
4940  $oldVal = $this->mTransparentTagHooks[$tag] ?? null;
4941  $this->mTransparentTagHooks[$tag] = $callback;
4942 
4943  return $oldVal;
4944  }
4945 
4949  public function clearTagHooks() {
4950  $this->mTagHooks = [];
4951  $this->mFunctionTagHooks = [];
4952  $this->mStripList = $this->mDefaultStripList;
4953  }
4954 
4998  public function setFunctionHook( $id, callable $callback, $flags = 0 ) {
4999  $oldVal = isset( $this->mFunctionHooks[$id] ) ? $this->mFunctionHooks[$id][0] : null;
5000  $this->mFunctionHooks[$id] = [ $callback, $flags ];
5001 
5002  # Add to function cache
5003  $mw = $this->magicWordFactory->get( $id );
5004  if ( !$mw ) {
5005  throw new MWException( __METHOD__ . '() expecting a magic word identifier.' );
5006  }
5007 
5008  $synonyms = $mw->getSynonyms();
5009  $sensitive = intval( $mw->isCaseSensitive() );
5010 
5011  foreach ( $synonyms as $syn ) {
5012  # Case
5013  if ( !$sensitive ) {
5014  $syn = $this->contLang->lc( $syn );
5015  }
5016  # Add leading hash
5017  if ( !( $flags & self::SFH_NO_HASH ) ) {
5018  $syn = '#' . $syn;
5019  }
5020  # Remove trailing colon
5021  if ( substr( $syn, -1, 1 ) === ':' ) {
5022  $syn = substr( $syn, 0, -1 );
5023  }
5024  $this->mFunctionSynonyms[$sensitive][$syn] = $id;
5025  }
5026  return $oldVal;
5027  }
5028 
5034  public function getFunctionHooks() {
5035  $this->firstCallInit();
5036  return array_keys( $this->mFunctionHooks );
5037  }
5038 
5049  public function setFunctionTagHook( $tag, callable $callback, $flags ) {
5050  $tag = strtolower( $tag );
5051  if ( preg_match( '/[<>\r\n]/', $tag, $m ) ) {
5052  throw new MWException( "Invalid character {$m[0]} in setFunctionTagHook('$tag', ...) call" );
5053  }
5054  $old = $this->mFunctionTagHooks[$tag] ?? null;
5055  $this->mFunctionTagHooks[$tag] = [ $callback, $flags ];
5056 
5057  if ( !in_array( $tag, $this->mStripList ) ) {
5058  $this->mStripList[] = $tag;
5059  }
5060 
5061  return $old;
5062  }
5063 
5072  public function replaceLinkHolders( &$text, $options = 0 ) {
5073  $this->replaceLinkHoldersPrivate( $text, $options );
5074  }
5075 
5083  private function replaceLinkHoldersPrivate( &$text, $options = 0 ) {
5084  $this->mLinkHolders->replace( $text );
5085  }
5086 
5094  private function replaceLinkHoldersText( $text ) {
5095  return $this->mLinkHolders->replaceText( $text );
5096  }
5097 
5111  public function renderImageGallery( $text, $params ) {
5112  $mode = false;
5113  if ( isset( $params['mode'] ) ) {
5114  $mode = $params['mode'];
5115  }
5116 
5117  try {
5118  $ig = ImageGalleryBase::factory( $mode );
5119  } catch ( Exception $e ) {
5120  // If invalid type set, fallback to default.
5121  $ig = ImageGalleryBase::factory( false );
5122  }
5123 
5124  $ig->setContextTitle( $this->getTitle() );
5125  $ig->setShowBytes( false );
5126  $ig->setShowDimensions( false );
5127  $ig->setShowFilename( false );
5128  $ig->setParser( $this );
5129  $ig->setHideBadImages();
5130  $ig->setAttributes( Sanitizer::validateTagAttributes( $params, 'ul' ) );
5131 
5132  if ( isset( $params['showfilename'] ) ) {
5133  $ig->setShowFilename( true );
5134  } else {
5135  $ig->setShowFilename( false );
5136  }
5137  if ( isset( $params['caption'] ) ) {
5138  // NOTE: We aren't passing a frame here or below. Frame info
5139  // is currently opaque to Parsoid, which acts on OT_PREPROCESS.
5140  // See T107332#4030581
5141  $caption = $this->recursiveTagParse( $params['caption'] );
5142  $ig->setCaptionHtml( $caption );
5143  }
5144  if ( isset( $params['perrow'] ) ) {
5145  $ig->setPerRow( $params['perrow'] );
5146  }
5147  if ( isset( $params['widths'] ) ) {
5148  $ig->setWidths( $params['widths'] );
5149  }
5150  if ( isset( $params['heights'] ) ) {
5151  $ig->setHeights( $params['heights'] );
5152  }
5153  $ig->setAdditionalOptions( $params );
5154 
5155  // Avoid PHP 7.1 warning from passing $this by reference
5156  $parser = $this;
5157  Hooks::run( 'BeforeParserrenderImageGallery', [ &$parser, &$ig ] );
5158 
5159  $lines = StringUtils::explode( "\n", $text );
5160  foreach ( $lines as $line ) {
5161  # match lines like these:
5162  # Image:someimage.jpg|This is some image
5163  $matches = [];
5164  preg_match( "/^([^|]+)(\\|(.*))?$/", $line, $matches );
5165  # Skip empty lines
5166  if ( count( $matches ) == 0 ) {
5167  continue;
5168  }
5169 
5170  if ( strpos( $matches[0], '%' ) !== false ) {
5171  $matches[1] = rawurldecode( $matches[1] );
5172  }
5174  if ( is_null( $title ) ) {
5175  # Bogus title. Ignore these so we don't bomb out later.
5176  continue;
5177  }
5178 
5179  # We need to get what handler the file uses, to figure out parameters.
5180  # Note, a hook can overide the file name, and chose an entirely different
5181  # file (which potentially could be of a different type and have different handler).
5182  $options = [];
5183  $descQuery = false;
5184  Hooks::run( 'BeforeParserFetchFileAndTitle',
5185  [ $this, $title, &$options, &$descQuery ] );
5186  # Don't register it now, as TraditionalImageGallery does that later.
5187  $file = $this->fetchFileNoRegister( $title, $options );
5188  $handler = $file ? $file->getHandler() : false;
5189 
5190  $paramMap = [
5191  'img_alt' => 'gallery-internal-alt',
5192  'img_link' => 'gallery-internal-link',
5193  ];
5194  if ( $handler ) {
5195  $paramMap += $handler->getParamMap();
5196  // We don't want people to specify per-image widths.
5197  // Additionally the width parameter would need special casing anyhow.
5198  unset( $paramMap['img_width'] );
5199  }
5200 
5201  $mwArray = $this->magicWordFactory->newArray( array_keys( $paramMap ) );
5202 
5203  $label = '';
5204  $alt = '';
5205  $link = '';
5206  $handlerOptions = [];
5207  if ( isset( $matches[3] ) ) {
5208  // look for an |alt= definition while trying not to break existing
5209  // captions with multiple pipes (|) in it, until a more sensible grammar
5210  // is defined for images in galleries
5211 
5212  // FIXME: Doing recursiveTagParse at this stage, and the trim before
5213  // splitting on '|' is a bit odd, and different from makeImage.
5214  $matches[3] = $this->recursiveTagParse( trim( $matches[3] ) );
5215  // Protect LanguageConverter markup
5216  $parameterMatches = StringUtils::delimiterExplode(
5217  '-{', '}-', '|', $matches[3], true /* nested */
5218  );
5219 
5220  foreach ( $parameterMatches as $parameterMatch ) {
5221  list( $magicName, $match ) = $mwArray->matchVariableStartToEnd( $parameterMatch );
5222  if ( $magicName ) {
5223  $paramName = $paramMap[$magicName];
5224 
5225  switch ( $paramName ) {
5226  case 'gallery-internal-alt':
5227  $alt = $this->stripAltText( $match, false );
5228  break;
5229  case 'gallery-internal-link':
5230  $linkValue = $this->stripAltText( $match, false );
5231  if ( preg_match( '/^-{R|(.*)}-$/', $linkValue ) ) {
5232  // Result of LanguageConverter::markNoConversion
5233  // invoked on an external link.
5234  $linkValue = substr( $linkValue, 4, -2 );
5235  }
5236  list( $type, $target ) = $this->parseLinkParameter( $linkValue );
5237  if ( $type === 'link-url' ) {
5238  $link = $target;
5239  $this->mOutput->addExternalLink( $target );
5240  } elseif ( $type === 'link-title' ) {
5241  $link = $target->getLinkURL();
5242  $this->mOutput->addLink( $target );
5243  }
5244  break;
5245  default:
5246  // Must be a handler specific parameter.
5247  if ( $handler->validateParam( $paramName, $match ) ) {
5248  $handlerOptions[$paramName] = $match;
5249  } else {
5250  // Guess not, consider it as caption.
5251  $this->logger->debug(
5252  "$parameterMatch failed parameter validation" );
5253  $label = $parameterMatch;
5254  }
5255  }
5256 
5257  } else {
5258  // Last pipe wins.
5259  $label = $parameterMatch;
5260  }
5261  }
5262  }
5263 
5264  $ig->add( $title, $label, $alt, $link, $handlerOptions );
5265  }
5266  $html = $ig->toHTML();
5267  Hooks::run( 'AfterParserFetchFileAndTitle', [ $this, $ig, &$html ] );
5268  return $html;
5269  }
5270 
5275  private function getImageParams( $handler ) {
5276  if ( $handler ) {
5277  $handlerClass = get_class( $handler );
5278  } else {
5279  $handlerClass = '';
5280  }
5281  if ( !isset( $this->mImageParams[$handlerClass] ) ) {
5282  # Initialise static lists
5283  static $internalParamNames = [
5284  'horizAlign' => [ 'left', 'right', 'center', 'none' ],
5285  'vertAlign' => [ 'baseline', 'sub', 'super', 'top', 'text-top', 'middle',
5286  'bottom', 'text-bottom' ],
5287  'frame' => [ 'thumbnail', 'manualthumb', 'framed', 'frameless',
5288  'upright', 'border', 'link', 'alt', 'class' ],
5289  ];
5290  static $internalParamMap;
5291  if ( !$internalParamMap ) {
5292  $internalParamMap = [];
5293  foreach ( $internalParamNames as $type => $names ) {
5294  foreach ( $names as $name ) {
5295  // For grep: img_left, img_right, img_center, img_none,
5296  // img_baseline, img_sub, img_super, img_top, img_text_top, img_middle,
5297  // img_bottom, img_text_bottom,
5298  // img_thumbnail, img_manualthumb, img_framed, img_frameless, img_upright,
5299  // img_border, img_link, img_alt, img_class
5300  $magicName = str_replace( '-', '_', "img_$name" );
5301  $internalParamMap[$magicName] = [ $type, $name ];
5302  }
5303  }
5304  }
5305 
5306  # Add handler params
5307  $paramMap = $internalParamMap;
5308  if ( $handler ) {
5309  $handlerParamMap = $handler->getParamMap();
5310  foreach ( $handlerParamMap as $magic => $paramName ) {
5311  $paramMap[$magic] = [ 'handler', $paramName ];
5312  }
5313  }
5314  $this->mImageParams[$handlerClass] = $paramMap;
5315  $this->mImageParamsMagicArray[$handlerClass] =
5316  $this->magicWordFactory->newArray( array_keys( $paramMap ) );
5317  }
5318  return [ $this->mImageParams[$handlerClass], $this->mImageParamsMagicArray[$handlerClass] ];
5319  }
5320 
5329  public function makeImage( $title, $options, $holders = false ) {
5330  # Check if the options text is of the form "options|alt text"
5331  # Options are:
5332  # * thumbnail make a thumbnail with enlarge-icon and caption, alignment depends on lang
5333  # * left no resizing, just left align. label is used for alt= only
5334  # * right same, but right aligned
5335  # * none same, but not aligned
5336  # * ___px scale to ___ pixels width, no aligning. e.g. use in taxobox
5337  # * center center the image
5338  # * frame Keep original image size, no magnify-button.
5339  # * framed Same as "frame"
5340  # * frameless like 'thumb' but without a frame. Keeps user preferences for width
5341  # * upright reduce width for upright images, rounded to full __0 px
5342  # * border draw a 1px border around the image
5343  # * alt Text for HTML alt attribute (defaults to empty)
5344  # * class Set a class for img node
5345  # * link Set the target of the image link. Can be external, interwiki, or local
5346  # vertical-align values (no % or length right now):
5347  # * baseline
5348  # * sub
5349  # * super
5350  # * top
5351  # * text-top
5352  # * middle
5353  # * bottom
5354  # * text-bottom
5355 
5356  # Protect LanguageConverter markup when splitting into parts
5358  '-{', '}-', '|', $options, true /* allow nesting */
5359  );
5360 
5361  # Give extensions a chance to select the file revision for us
5362  $options = [];
5363  $descQuery = false;
5364  Hooks::run( 'BeforeParserFetchFileAndTitle',
5365  [ $this, $title, &$options, &$descQuery ] );
5366  # Fetch and register the file (file title may be different via hooks)
5367  list( $file, $title ) = $this->fetchFileAndTitle( $title, $options );
5368 
5369  # Get parameter map
5370  $handler = $file ? $file->getHandler() : false;
5371 
5372  list( $paramMap, $mwArray ) = $this->getImageParams( $handler );
5373 
5374  if ( !$file ) {
5375  $this->addTrackingCategory( 'broken-file-category' );
5376  }
5377 
5378  # Process the input parameters
5379  $caption = '';
5380  $params = [ 'frame' => [], 'handler' => [],
5381  'horizAlign' => [], 'vertAlign' => [] ];
5382  $seenformat = false;
5383  foreach ( $parts as $part ) {
5384  $part = trim( $part );
5385  list( $magicName, $value ) = $mwArray->matchVariableStartToEnd( $part );
5386  $validated = false;
5387  if ( isset( $paramMap[$magicName] ) ) {
5388  list( $type, $paramName ) = $paramMap[$magicName];
5389 
5390  # Special case; width and height come in one variable together
5391  if ( $type === 'handler' && $paramName === 'width' ) {
5392  $parsedWidthParam = self::parseWidthParam( $value );
5393  if ( isset( $parsedWidthParam['width'] ) ) {
5394  $width = $parsedWidthParam['width'];
5395  if ( $handler->validateParam( 'width', $width ) ) {
5396  $params[$type]['width'] = $width;
5397  $validated = true;
5398  }
5399  }
5400  if ( isset( $parsedWidthParam['height'] ) ) {
5401  $height = $parsedWidthParam['height'];
5402  if ( $handler->validateParam( 'height', $height ) ) {
5403  $params[$type]['height'] = $height;
5404  $validated = true;
5405  }
5406  }
5407  # else no validation -- T15436
5408  } else {
5409  if ( $type === 'handler' ) {
5410  # Validate handler parameter
5411  $validated = $handler->validateParam( $paramName, $value );
5412  } else {
5413  # Validate internal parameters
5414  switch ( $paramName ) {
5415  case 'manualthumb':
5416  case 'alt':
5417  case 'class':
5418  # @todo FIXME: Possibly check validity here for
5419  # manualthumb? downstream behavior seems odd with
5420  # missing manual thumbs.
5421  $validated = true;
5422  $value = $this->stripAltText( $value, $holders );
5423  break;
5424  case 'link':
5425  list( $paramName, $value ) =
5426  $this->parseLinkParameter(
5427  $this->stripAltText( $value, $holders )
5428  );
5429  if ( $paramName ) {
5430  $validated = true;
5431  if ( $paramName === 'no-link' ) {
5432  $value = true;
5433  }
5434  if ( ( $paramName === 'link-url' ) && $this->mOptions->getExternalLinkTarget() ) {
5435  $params[$type]['link-target'] = $this->mOptions->getExternalLinkTarget();
5436  }
5437  }
5438  break;
5439  case 'frameless':
5440  case 'framed':
5441  case 'thumbnail':
5442  // use first appearing option, discard others.
5443  $validated = !$seenformat;
5444  $seenformat = true;
5445  break;
5446  default:
5447  # Most other things appear to be empty or numeric...
5448  $validated = ( $value === false || is_numeric( trim( $value ) ) );
5449  }
5450  }
5451 
5452  if ( $validated ) {
5453  $params[$type][$paramName] = $value;
5454  }
5455  }
5456  }
5457  if ( !$validated ) {
5458  $caption = $part;
5459  }
5460  }
5461 
5462  # Process alignment parameters
5463  if ( $params['horizAlign'] ) {
5464  $params['frame']['align'] = key( $params['horizAlign'] );
5465  }
5466  if ( $params['vertAlign'] ) {
5467  $params['frame']['valign'] = key( $params['vertAlign'] );
5468  }
5469 
5470  $params['frame']['caption'] = $caption;
5471 
5472  # Will the image be presented in a frame, with the caption below?
5473  $imageIsFramed = isset( $params['frame']['frame'] )
5474  || isset( $params['frame']['framed'] )
5475  || isset( $params['frame']['thumbnail'] )
5476  || isset( $params['frame']['manualthumb'] );
5477 
5478  # In the old days, [[Image:Foo|text...]] would set alt text. Later it
5479  # came to also set the caption, ordinary text after the image -- which
5480  # makes no sense, because that just repeats the text multiple times in
5481  # screen readers. It *also* came to set the title attribute.
5482  # Now that we have an alt attribute, we should not set the alt text to
5483  # equal the caption: that's worse than useless, it just repeats the
5484  # text. This is the framed/thumbnail case. If there's no caption, we
5485  # use the unnamed parameter for alt text as well, just for the time be-
5486  # ing, if the unnamed param is set and the alt param is not.
5487  # For the future, we need to figure out if we want to tweak this more,
5488  # e.g., introducing a title= parameter for the title; ignoring the un-
5489  # named parameter entirely for images without a caption; adding an ex-
5490  # plicit caption= parameter and preserving the old magic unnamed para-
5491  # meter for BC; ...
5492  if ( $imageIsFramed ) { # Framed image
5493  if ( $caption === '' && !isset( $params['frame']['alt'] ) ) {
5494  # No caption or alt text, add the filename as the alt text so
5495  # that screen readers at least get some description of the image
5496  $params['frame']['alt'] = $title->getText();
5497  }
5498  # Do not set $params['frame']['title'] because tooltips don't make sense
5499  # for framed images
5500  } else { # Inline image
5501  if ( !isset( $params['frame']['alt'] ) ) {
5502  # No alt text, use the "caption" for the alt text
5503  if ( $caption !== '' ) {
5504  $params['frame']['alt'] = $this->stripAltText( $caption, $holders );
5505  } else {
5506  # No caption, fall back to using the filename for the
5507  # alt text
5508  $params['frame']['alt'] = $title->getText();
5509  }
5510  }
5511  # Use the "caption" for the tooltip text
5512  $params['frame']['title'] = $this->stripAltText( $caption, $holders );
5513  }
5514  $params['handler']['targetlang'] = $this->getTargetLanguage()->getCode();
5515 
5516  Hooks::run( 'ParserMakeImageParams', [ $title, $file, &$params, $this ] );
5517 
5518  # Linker does the rest
5519  $time = $options['time'] ?? false;
5520  $ret = Linker::makeImageLink( $this, $title, $file, $params['frame'], $params['handler'],
5521  $time, $descQuery, $this->mOptions->getThumbSize() );
5522 
5523  # Give the handler a chance to modify the parser object
5524  if ( $handler ) {
5525  $handler->parserTransformHook( $this, $file );
5526  }
5527 
5528  return $ret;
5529  }
5530 
5549  private function parseLinkParameter( $value ) {
5550  $chars = self::EXT_LINK_URL_CLASS;
5551  $addr = self::EXT_LINK_ADDR;
5552  $prots = $this->mUrlProtocols;
5553  $type = null;
5554  $target = false;
5555  if ( $value === '' ) {
5556  $type = 'no-link';
5557  } elseif ( preg_match( "/^((?i)$prots)/", $value ) ) {
5558  if ( preg_match( "/^((?i)$prots)$addr$chars*$/u", $value, $m ) ) {
5559  $this->mOutput->addExternalLink( $value );
5560  $type = 'link-url';
5561  $target = $value;
5562  }
5563  } else {
5564  $linkTitle = Title::newFromText( $value );
5565  if ( $linkTitle ) {
5566  $this->mOutput->addLink( $linkTitle );
5567  $type = 'link-title';
5568  $target = $linkTitle;
5569  }
5570  }
5571  return [ $type, $target ];
5572  }
5573 
5579  private function stripAltText( $caption, $holders ) {
5580  # Strip bad stuff out of the title (tooltip). We can't just use
5581  # replaceLinkHoldersText() here, because if this function is called
5582  # from handleInternalLinks2(), mLinkHolders won't be up-to-date.
5583  if ( $holders ) {
5584  $tooltip = $holders->replaceText( $caption );
5585  } else {
5586  $tooltip = $this->replaceLinkHoldersText( $caption );
5587  }
5588 
5589  # make sure there are no placeholders in thumbnail attributes
5590  # that are later expanded to html- so expand them now and
5591  # remove the tags
5592  $tooltip = $this->mStripState->unstripBoth( $tooltip );
5593  # Compatibility hack! In HTML certain entity references not terminated
5594  # by a semicolon are decoded (but not if we're in an attribute; that's
5595  # how link URLs get away without properly escaping & in queries).
5596  # But wikitext has always required semicolon-termination of entities,
5597  # so encode & where needed to avoid decode of semicolon-less entities.
5598  # See T209236 and
5599  # https://www.w3.org/TR/html5/syntax.html#named-character-references
5600  # T210437 discusses moving this workaround to Sanitizer::stripAllTags.
5601  $tooltip = preg_replace( "/
5602  & # 1. entity prefix
5603  (?= # 2. followed by:
5604  (?: # a. one of the legacy semicolon-less named entities
5605  A(?:Elig|MP|acute|circ|grave|ring|tilde|uml)|
5606  C(?:OPY|cedil)|E(?:TH|acute|circ|grave|uml)|
5607  GT|I(?:acute|circ|grave|uml)|LT|Ntilde|
5608  O(?:acute|circ|grave|slash|tilde|uml)|QUOT|REG|THORN|
5609  U(?:acute|circ|grave|uml)|Yacute|
5610  a(?:acute|c(?:irc|ute)|elig|grave|mp|ring|tilde|uml)|brvbar|
5611  c(?:cedil|edil|urren)|cent(?!erdot;)|copy(?!sr;)|deg|
5612  divide(?!ontimes;)|e(?:acute|circ|grave|th|uml)|
5613  frac(?:1(?:2|4)|34)|
5614  gt(?!c(?:c|ir)|dot|lPar|quest|r(?:a(?:pprox|rr)|dot|eq(?:less|qless)|less|sim);)|
5615  i(?:acute|circ|excl|grave|quest|uml)|laquo|
5616  lt(?!c(?:c|ir)|dot|hree|imes|larr|quest|r(?:Par|i(?:e|f|));)|
5617  m(?:acr|i(?:cro|ddot))|n(?:bsp|tilde)|
5618  not(?!in(?:E|dot|v(?:a|b|c)|)|ni(?:v(?:a|b|c)|);)|
5619  o(?:acute|circ|grave|rd(?:f|m)|slash|tilde|uml)|
5620  p(?:lusmn|ound)|para(?!llel;)|quot|r(?:aquo|eg)|
5621  s(?:ect|hy|up(?:1|2|3)|zlig)|thorn|times(?!b(?:ar|)|d;)|
5622  u(?:acute|circ|grave|ml|uml)|y(?:acute|en|uml)
5623  )
5624  (?:[^;]|$)) # b. and not followed by a semicolon
5625  # S = study, for efficiency
5626  /Sx", '&amp;', $tooltip );
5627  $tooltip = Sanitizer::stripAllTags( $tooltip );
5628 
5629  return $tooltip;
5630  }
5631 
5640  public function attributeStripCallback( &$text, $frame = false ) {
5641  $text = $this->replaceVariables( $text, $frame );
5642  $text = $this->mStripState->unstripBoth( $text );
5643  return $text;
5644  }
5645 
5651  public function getTags() {
5652  $this->firstCallInit();
5653  return array_merge(
5654  array_keys( $this->mTransparentTagHooks ),
5655  array_keys( $this->mTagHooks ),
5656  array_keys( $this->mFunctionTagHooks )
5657  );
5658  }
5659 
5664  public function getFunctionSynonyms() {
5665  $this->firstCallInit();
5666  return $this->mFunctionSynonyms;
5667  }
5668 
5673  public function getUrlProtocols() {
5674  return $this->mUrlProtocols;
5675  }
5676 
5687  public function replaceTransparentTags( $text ) {
5688  $matches = [];
5689  $elements = array_keys( $this->mTransparentTagHooks );
5690  $text = self::extractTagsAndParams( $elements, $text, $matches );
5691  $replacements = [];
5692 
5693  foreach ( $matches as $marker => $data ) {
5694  list( $element, $content, $params, $tag ) = $data;
5695  $tagName = strtolower( $element );
5696  if ( isset( $this->mTransparentTagHooks[$tagName] ) ) {
5697  $output = call_user_func_array(
5698  $this->mTransparentTagHooks[$tagName],
5699  [ $content, $params, $this ]
5700  );
5701  } else {
5702  $output = $tag;
5703  }
5704  $replacements[$marker] = $output;
5705  }
5706  return strtr( $text, $replacements );
5707  }
5708 
5738  private function extractSections( $text, $sectionId, $mode, $newText = '' ) {
5739  global $wgTitle; # not generally used but removes an ugly failure mode
5740 
5741  $magicScopeVariable = $this->lock();
5742  $this->startParse( $wgTitle, new ParserOptions, self::OT_PLAIN, true );
5743  $outText = '';
5744  $frame = $this->getPreprocessor()->newFrame();
5745 
5746  # Process section extraction flags
5747  $flags = 0;
5748  $sectionParts = explode( '-', $sectionId );
5749  $sectionIndex = array_pop( $sectionParts );
5750  foreach ( $sectionParts as $part ) {
5751  if ( $part === 'T' ) {
5752  $flags |= self::PTD_FOR_INCLUSION;
5753  }
5754  }
5755 
5756  # Check for empty input
5757  if ( strval( $text ) === '' ) {
5758  # Only sections 0 and T-0 exist in an empty document
5759  if ( $sectionIndex == 0 ) {
5760  if ( $mode === 'get' ) {
5761  return '';
5762  }
5763 
5764  return $newText;
5765  } else {
5766  if ( $mode === 'get' ) {
5767  return $newText;
5768  }
5769 
5770  return $text;
5771  }
5772  }
5773 
5774  # Preprocess the text
5775  $root = $this->preprocessToDom( $text, $flags );
5776 
5777  # <h> nodes indicate section breaks
5778  # They can only occur at the top level, so we can find them by iterating the root's children
5779  $node = $root->getFirstChild();
5780 
5781  # Find the target section
5782  if ( $sectionIndex == 0 ) {
5783  # Section zero doesn't nest, level=big
5784  $targetLevel = 1000;
5785  } else {
5786  while ( $node ) {
5787  if ( $node->getName() === 'h' ) {
5788  $bits = $node->splitHeading();
5789  if ( $bits['i'] == $sectionIndex ) {
5790  $targetLevel = $bits['level'];
5791  break;
5792  }
5793  }
5794  if ( $mode === 'replace' ) {
5795  $outText .= $frame->expand( $node, PPFrame::RECOVER_ORIG );
5796  }
5797  $node = $node->getNextSibling();
5798  }
5799  }
5800 
5801  if ( !$node ) {
5802  # Not found
5803  if ( $mode === 'get' ) {
5804  return $newText;
5805  } else {
5806  return $text;
5807  }
5808  }
5809 
5810  # Find the end of the section, including nested sections
5811  do {
5812  if ( $node->getName() === 'h' ) {
5813  $bits = $node->splitHeading();
5814  $curLevel = $bits['level'];
5815  if ( $bits['i'] != $sectionIndex && $curLevel <= $targetLevel ) {
5816  break;
5817  }
5818  }
5819  if ( $mode === 'get' ) {
5820  $outText .= $frame->expand( $node, PPFrame::RECOVER_ORIG );
5821  }
5822  $node = $node->getNextSibling();
5823  } while ( $node );
5824 
5825  # Write out the remainder (in replace mode only)
5826  if ( $mode === 'replace' ) {
5827  # Output the replacement text
5828  # Add two newlines on -- trailing whitespace in $newText is conventionally
5829  # stripped by the editor, so we need both newlines to restore the paragraph gap
5830  # Only add trailing whitespace if there is newText
5831  if ( $newText != "" ) {
5832  $outText .= $newText . "\n\n";
5833  }
5834 
5835  while ( $node ) {
5836  $outText .= $frame->expand( $node, PPFrame::RECOVER_ORIG );
5837  $node = $node->getNextSibling();
5838  }
5839  }
5840 
5841  if ( is_string( $outText ) ) {
5842  # Re-insert stripped tags
5843  $outText = rtrim( $this->mStripState->unstripBoth( $outText ) );
5844  }
5845 
5846  return $outText;
5847  }
5848 
5863  public function getSection( $text, $sectionId, $defaultText = '' ) {
5864  return $this->extractSections( $text, $sectionId, 'get', $defaultText );
5865  }
5866 
5879  public function replaceSection( $oldText, $sectionId, $newText ) {
5880  return $this->extractSections( $oldText, $sectionId, 'replace', $newText );
5881  }
5882 
5911  public function getFlatSectionInfo( $text ) {
5912  $magicScopeVariable = $this->lock();
5913  $this->startParse( null, new ParserOptions, self::OT_PLAIN, true );
5914  $frame = $this->getPreprocessor()->newFrame();
5915  $root = $this->preprocessToDom( $text, 0 );
5916  $node = $root->getFirstChild();
5917  $offset = 0;
5918  $currentSection = [
5919  'index' => 0,
5920  'level' => 0,
5921  'offset' => 0,
5922  'heading' => '',
5923  'text' => ''
5924  ];
5925  $sections = [];
5926 
5927  while ( $node ) {
5928  $nodeText = $frame->expand( $node, PPFrame::RECOVER_ORIG );
5929  if ( $node->getName() === 'h' ) {
5930  $bits = $node->splitHeading();
5931  $sections[] = $currentSection;
5932  $currentSection = [
5933  'index' => $bits['i'],
5934  'level' => $bits['level'],
5935  'offset' => $offset,
5936  'heading' => $nodeText,
5937  'text' => $nodeText
5938  ];
5939  } else {
5940  $currentSection['text'] .= $nodeText;
5941  }
5942  $offset += strlen( $nodeText );
5943  $node = $node->getNextSibling();
5944  }
5945  $sections[] = $currentSection;
5946  return $sections;
5947  }
5948 
5959  public function getRevisionId() {
5960  return $this->mRevisionId;
5961  }
5962 
5969  public function getRevisionObject() {
5970  if ( $this->mRevisionObject ) {
5971  return $this->mRevisionObject;
5972  }
5973 
5974  // NOTE: try to get the RevisionObject even if mRevisionId is null.
5975  // This is useful when parsing a revision that has not yet been saved.
5976  // However, if we get back a saved revision even though we are in
5977  // preview mode, we'll have to ignore it, see below.
5978  // NOTE: This callback may be used to inject an OLD revision that was
5979  // already loaded, so "current" is a bit of a misnomer. We can't just
5980  // skip it if mRevisionId is set.
5981  $rev = call_user_func(
5982  $this->mOptions->getCurrentRevisionCallback(),
5983  $this->getTitle(),
5984  $this
5985  );
5986 
5987  if ( $this->mRevisionId === null && $rev && $rev->getId() ) {
5988  // We are in preview mode (mRevisionId is null), and the current revision callback
5989  // returned an existing revision. Ignore it and return null, it's probably the page's
5990  // current revision, which is not what we want here. Note that we do want to call the
5991  // callback to allow the unsaved revision to be injected here, e.g. for
5992  // self-transclusion previews.
5993  return null;
5994  }
5995 
5996  // If the parse is for a new revision, then the callback should have
5997  // already been set to force the object and should match mRevisionId.
5998  // If not, try to fetch by mRevisionId for sanity.
5999  if ( $this->mRevisionId && $rev && $rev->getId() != $this->mRevisionId ) {
6000  $rev = Revision::newFromId( $this->mRevisionId );
6001  }
6002 
6003  $this->mRevisionObject = $rev;
6004 
6005  return $this->mRevisionObject;
6006  }
6007 
6013  public function getRevisionTimestamp() {
6014  if ( $this->mRevisionTimestamp !== null ) {
6016  }
6017 
6018  # Use specified revision timestamp, falling back to the current timestamp
6019  $revObject = $this->getRevisionObject();
6020  $timestamp = $revObject ? $revObject->getTimestamp() : $this->mOptions->getTimestamp();
6021  $this->mOutput->setRevisionTimestampUsed( $timestamp ); // unadjusted time zone
6022 
6023  # The cryptic '' timezone parameter tells to use the site-default
6024  # timezone offset instead of the user settings.
6025  # Since this value will be saved into the parser cache, served
6026  # to other users, and potentially even used inside links and such,
6027  # it needs to be consistent for all visitors.
6028  $this->mRevisionTimestamp = $this->contLang->userAdjust( $timestamp, '' );
6029 
6031  }
6032 
6038  public function getRevisionUser() {
6039  if ( is_null( $this->mRevisionUser ) ) {
6040  $revObject = $this->getRevisionObject();
6041 
6042  # if this template is subst: the revision id will be blank,
6043  # so just use the current user's name
6044  if ( $revObject ) {
6045  $this->mRevisionUser = $revObject->getUserText();
6046  } elseif ( $this->ot['wiki'] || $this->mOptions->getIsPreview() ) {
6047  $this->mRevisionUser = $this->getUser()->getName();
6048  }
6049  }
6050  return $this->mRevisionUser;
6051  }
6052 
6058  public function getRevisionSize() {
6059  if ( is_null( $this->mRevisionSize ) ) {
6060  $revObject = $this->getRevisionObject();
6061 
6062  # if this variable is subst: the revision id will be blank,
6063  # so just use the parser input size, because the own substituation
6064  # will change the size.
6065  if ( $revObject ) {
6066  $this->mRevisionSize = $revObject->getSize();
6067  } else {
6068  $this->mRevisionSize = $this->mInputSize;
6069  }
6070  }
6071  return $this->mRevisionSize;
6072  }
6073 
6079  public function setDefaultSort( $sort ) {
6080  $this->mDefaultSort = $sort;
6081  $this->mOutput->setProperty( 'defaultsort', $sort );
6082  }
6083 
6094  public function getDefaultSort() {
6095  if ( $this->mDefaultSort !== false ) {
6096  return $this->mDefaultSort;
6097  } else {
6098  return '';
6099  }
6100  }
6101 
6108  public function getCustomDefaultSort() {
6109  return $this->mDefaultSort;
6110  }
6111 
6112  private static function getSectionNameFromStrippedText( $text ) {
6114  $text = Sanitizer::decodeCharReferences( $text );
6115  $text = self::normalizeSectionName( $text );
6116  return $text;
6117  }
6118 
6119  private static function makeAnchor( $sectionName ) {
6120  return '#' . Sanitizer::escapeIdForLink( $sectionName );
6121  }
6122 
6123  private function makeLegacyAnchor( $sectionName ) {
6124  $fragmentMode = $this->svcOptions->get( 'FragmentMode' );
6125  if ( isset( $fragmentMode[1] ) && $fragmentMode[1] === 'legacy' ) {
6126  // ForAttribute() and ForLink() are the same for legacy encoding
6128  } else {
6129  $id = Sanitizer::escapeIdForLink( $sectionName );
6130  }
6131 
6132  return "#$id";
6133  }
6134 
6143  public function guessSectionNameFromWikiText( $text ) {
6144  # Strip out wikitext links(they break the anchor)
6145  $text = $this->stripSectionName( $text );
6146  $sectionName = self::getSectionNameFromStrippedText( $text );
6147  return self::makeAnchor( $sectionName );
6148  }
6149 
6159  public function guessLegacySectionNameFromWikiText( $text ) {
6160  # Strip out wikitext links(they break the anchor)
6161  $text = $this->stripSectionName( $text );
6162  $sectionName = self::getSectionNameFromStrippedText( $text );
6163  return $this->makeLegacyAnchor( $sectionName );
6164  }
6165 
6171  public static function guessSectionNameFromStrippedText( $text ) {
6172  $sectionName = self::getSectionNameFromStrippedText( $text );
6173  return self::makeAnchor( $sectionName );
6174  }
6175 
6182  private static function normalizeSectionName( $text ) {
6183  # T90902: ensure the same normalization is applied for IDs as to links
6184 
6185  $titleParser = MediaWikiServices::getInstance()->getTitleParser();
6186  '@phan-var MediaWikiTitleCodec $titleParser';
6187  try {
6188 
6189  $parts = $titleParser->splitTitleString( "#$text" );
6190  } catch ( MalformedTitleException $ex ) {
6191  return $text;
6192  }
6193  return $parts['fragment'];
6194  }
6195 
6210  public function stripSectionName( $text ) {
6211  # Strip internal link markup
6212  $text = preg_replace( '/\[\[:?([^[|]+)\|([^[]+)\]\]/', '$2', $text );
6213  $text = preg_replace( '/\[\[:?([^[]+)\|?\]\]/', '$1', $text );
6214 
6215  # Strip external link markup
6216  # @todo FIXME: Not tolerant to blank link text
6217  # I.E. [https://www.mediawiki.org] will render as [1] or something depending
6218  # on how many empty links there are on the page - need to figure that out.
6219  $text = preg_replace( '/\[(?i:' . $this->mUrlProtocols . ')([^ ]+?) ([^[]+)\]/', '$2', $text );
6220 
6221  # Parse wikitext quotes (italics & bold)
6222  $text = $this->doQuotes( $text );
6223 
6224  # Strip HTML tags
6225  $text = StringUtils::delimiterReplace( '<', '>', '', $text );
6226  return $text;
6227  }
6228 
6239  private function fuzzTestSrvus( $text, Title $title, ParserOptions $options,
6240  $outputType = self::OT_HTML
6241  ) {
6242  $magicScopeVariable = $this->lock();
6243  $this->startParse( $title, $options, $outputType, true );
6244 
6245  $text = $this->replaceVariables( $text );
6246  $text = $this->mStripState->unstripBoth( $text );
6247  $text = Sanitizer::removeHTMLtags( $text );
6248  return $text;
6249  }
6250 
6257  private function fuzzTestPst( $text, Title $title, ParserOptions $options ) {
6258  return $this->preSaveTransform( $text, $title, $options->getUser(), $options );
6259  }
6260 
6267  private function fuzzTestPreprocess( $text, Title $title, ParserOptions $options ) {
6268  return $this->fuzzTestSrvus( $text, $title, $options, self::OT_PREPROCESS );
6269  }
6270 
6287  public function markerSkipCallback( $s, $callback ) {
6288  $i = 0;
6289  $out = '';
6290  while ( $i < strlen( $s ) ) {
6291  $markerStart = strpos( $s, self::MARKER_PREFIX, $i );
6292  if ( $markerStart === false ) {
6293  $out .= call_user_func( $callback, substr( $s, $i ) );
6294  break;
6295  } else {
6296  $out .= call_user_func( $callback, substr( $s, $i, $markerStart - $i ) );
6297  $markerEnd = strpos( $s, self::MARKER_SUFFIX, $markerStart );
6298  if ( $markerEnd === false ) {
6299  $out .= substr( $s, $markerStart );
6300  break;
6301  } else {
6302  $markerEnd += strlen( self::MARKER_SUFFIX );
6303  $out .= substr( $s, $markerStart, $markerEnd - $markerStart );
6304  $i = $markerEnd;
6305  }
6306  }
6307  }
6308  return $out;
6309  }
6310 
6317  public function killMarkers( $text ) {
6318  return $this->mStripState->killMarkers( $text );
6319  }
6320 
6330  public static function parseWidthParam( $value, $parseHeight = true ) {
6331  $parsedWidthParam = [];
6332  if ( $value === '' ) {
6333  return $parsedWidthParam;
6334  }
6335  $m = [];
6336  # (T15500) In both cases (width/height and width only),
6337  # permit trailing "px" for backward compatibility.
6338  if ( $parseHeight && preg_match( '/^([0-9]*)x([0-9]*)\s*(?:px)?\s*$/', $value, $m ) ) {
6339  $width = intval( $m[1] );
6340  $height = intval( $m[2] );
6341  $parsedWidthParam['width'] = $width;
6342  $parsedWidthParam['height'] = $height;
6343  } elseif ( preg_match( '/^[0-9]*\s*(?:px)?\s*$/', $value ) ) {
6344  $width = intval( $value );
6345  $parsedWidthParam['width'] = $width;
6346  }
6347  return $parsedWidthParam;
6348  }
6349 
6359  protected function lock() {
6360  if ( $this->mInParse ) {
6361  throw new MWException( "Parser state cleared while parsing. "
6362  . "Did you call Parser::parse recursively? Lock is held by: " . $this->mInParse );
6363  }
6364 
6365  // Save the backtrace when locking, so that if some code tries locking again,
6366  // we can print the lock owner's backtrace for easier debugging
6367  $e = new Exception;
6368  $this->mInParse = $e->getTraceAsString();
6369 
6370  $recursiveCheck = new ScopedCallback( function () {
6371  $this->mInParse = false;
6372  } );
6373 
6374  return $recursiveCheck;
6375  }
6376 
6387  public static function stripOuterParagraph( $html ) {
6388  $m = [];
6389  if ( preg_match( '/^<p>(.*)\n?<\/p>\n?$/sU', $html, $m ) && strpos( $m[1], '</p>' ) === false ) {
6390  $html = $m[1];
6391  }
6392 
6393  return $html;
6394  }
6395 
6406  public function getFreshParser() {
6407  if ( $this->mInParse ) {
6408  return $this->factory->create();
6409  } else {
6410  return $this;
6411  }
6412  }
6413 
6420  public function enableOOUI() {
6422  $this->mOutput->setEnableOOUI( true );
6423  }
6424 
6429  protected function setOutputFlag( $flag, $reason ) {
6430  $this->mOutput->setFlag( $flag );
6431  $name = $this->getTitle()->getPrefixedText();
6432  $this->logger->debug( __METHOD__ . ": set $flag flag on '$name'; $reason" );
6433  }
6434 }
getRevisionObject()
Get the revision object for $this->mRevisionId.
Definition: Parser.php:5969
extensionSubstitution( $params, $frame)
Return the text to be used for a given extension tag.
Definition: Parser.php:4002
getFunctionSynonyms()
Definition: Parser.php:5664
static armorFrenchSpaces( $text, $space='&#160;')
Armor French spaces with a replacement character.
Definition: Sanitizer.php:1141
static register( $parser)
$mAutonumber
Definition: Parser.php:189
$mPPNodeCount
Definition: Parser.php:203
if(PHP_SAPI !='cli-server') if(!isset( $_SERVER['SCRIPT_FILENAME'])) $file
Item class for a filearchive table row.
Definition: router.php:42
bool string $mInParse
Recursive call protection.
Definition: Parser.php:261
const MARKER_PREFIX
Definition: Parser.php:133
wfUrlProtocols( $includeProtocolRelative=true)
Returns a regular expression of url protocols.
setLinkID( $id)
Definition: Parser.php:1030
static tocLineEnd()
End a Table Of Contents line.
Definition: Linker.php:1663
wfEscapeWikiText( $text)
Escapes the given text so that it may be output using addWikiText() without any linking, formatting, etc.
cleanSig( $text, $parsing=false)
Clean up signature text.
Definition: Parser.php:4774
static factory( $url, array $options=null, $caller=__METHOD__)
Generate a new request object.
$mTplRedirCache
Definition: Parser.php:205
LinkRenderer $mLinkRenderer
Definition: Parser.php:269
getRevisionUser()
Get the name of the user that edited the last revision.
Definition: Parser.php:6038
handleMagicLinks( $text)
Replace special strings like "ISBN xxx" and "RFC xxx" with magic external links.
Definition: Parser.php:1621
parseLinkParameter( $value)
Parse the value of &#39;link&#39; parameter in image syntax ([[File:Foo.jpg|link=<value>]]).
Definition: Parser.php:5549
const OT_PREPROCESS
Definition: Defines.php:166
static element( $element, $attribs=[], $contents='')
Identical to rawElement(), but HTML-escapes $contents (like Xml::element()).
Definition: Html.php:231
static tidy( $text)
Interface with Remex tidy.
Definition: MWTidy.php:42
$mDoubleUnderscores
Definition: Parser.php:205
SpecialPageFactory $specialPageFactory
Definition: Parser.php:281
killMarkers( $text)
Remove any strip markers found in the given text.
Definition: Parser.php:6317
$context
Definition: load.php:40
static getExternalLinkRel( $url=false, $title=null)
Get the rel attribute for a particular external link.
Definition: Parser.php:2101
getSection( $text, $sectionId, $defaultText='')
This function returns the text of a section, specified by a number ($section).
Definition: Parser.php:5863
MapCacheLRU null $currentRevisionCache
Definition: Parser.php:255
BadFileLookup $badFileLookup
Definition: Parser.php:302
static getLocalInstance( $ts=false)
Get a timestamp instance in the server local timezone ($wgLocaltimezone)
handleAllQuotes( $text)
Replace single quotes with HTML markup.
Definition: Parser.php:1820
bool $mFirstCall
Whether firstCallInit still needs to be called.
Definition: Parser.php:157
const ID_PRIMARY
Tells escapeUrlForHtml() to encode the ID using the wiki&#39;s primary encoding.
Definition: Sanitizer.php:66
getRevisionTimestampSubstring( $start, $len, $mtts, $variable)
Definition: Parser.php:3009
nextLinkID()
Definition: Parser.php:1023
getTemplateDom( $title)
Get the semi-parsed DOM representation of a template with a given title, and its redirect destination...
Definition: Parser.php:3611
const SPACE_NOT_NL
Definition: Parser.php:102
static normalizeSubpageLink( $contextTitle, $target, &$text)
Definition: Linker.php:1450
const OT_PLAIN
Definition: Parser.php:113
getTags()
Accessor.
Definition: Parser.php:5651
const OT_WIKI
Definition: Parser.php:110
User $mUser
Definition: Parser.php:214
replaceLinkHoldersPrivate(&$text, $options=0)
Replace "<!--LINK-->" link placeholders with actual links, in the buffer Placeholders created in Link...
Definition: Parser.php:5083
static cleanUrl( $url)
Definition: Sanitizer.php:2041
Title null $mTitle
Since 1.34, leaving mTitle uninitialized or setting mTitle to null is deprecated. ...
Definition: Parser.php:231
static splitTrail( $trail)
Split a link trail, return the "inside" portion and the remainder of the trail as a two-element array...
Definition: Linker.php:1771
static isEnabled()
Definition: MWTidy.php:54
getFunctionHooks()
Get all registered function hook identifiers.
Definition: Parser.php:5034
finalizeHeadings( $text, $origText, $isMain=true)
This function accomplishes several tasks: 1) Auto-number headings if that option is enabled 2) Add an...
Definition: Parser.php:4202
getExternalLinkAttribs( $url)
Get an associative array of additional HTML attributes appropriate for a particular external link...
Definition: Parser.php:2123
callParserFunction( $frame, $function, array $args=[])
Call a parser function and return an array with text and flags.
Definition: Parser.php:3519
magicLinkCallback( $m)
Definition: Parser.php:1652
wfHostname()
Fetch server name for use in error reporting etc.
braceSubstitution( $piece, $frame)
Return the text of a template, after recursively replacing any variables or templates within the temp...
Definition: Parser.php:3166
getFunctionLang()
Get a language object for use in parser functions such as {{FORMATNUM:}}.
Definition: Parser.php:1038
startParse(?Title $title, ParserOptions $options, $outputType, $clearState=true)
Definition: Parser.php:4841
preprocessToDom( $text, $flags=0)
Preprocess some wikitext and return the document tree.
Definition: Parser.php:3068
Title(Title $x=null)
Accessor/mutator for the Title object.
Definition: Parser.php:962
const TOC_START
Definition: Parser.php:136
wfGetDB( $db, $groups=[], $wiki=false)
Get a Database object.
SectionProfiler $mProfiler
Definition: Parser.php:264
static replaceMarkup( $search, $replace, $text)
More or less "markup-safe" str_replace() Ignores any instances of the separator inside <...
getFlatSectionInfo( $text)
Get an array of preprocessor section information.
Definition: Parser.php:5911
There are three types of nodes:
Definition: PPNode.php:35
$mHeadings
Definition: Parser.php:205
clearTagHooks()
Remove all tag hooks.
Definition: Parser.php:4949
const NS_SPECIAL
Definition: Defines.php:49
clearState()
Clear Parser state.
Definition: Parser.php:476
const EXT_LINK_ADDR
Definition: Parser.php:95
static factory( $mode=false, IContextSource $context=null)
Get a new image gallery.
stripSectionName( $text)
Strips a text string of wikitext for use in a section anchor.
Definition: Parser.php:6210
wfUrlProtocolsWithoutProtRel()
Like wfUrlProtocols(), but excludes &#39;//&#39; from the protocol list.
static statelessFetchRevision(Title $title, $parser=false)
Wrapper around Revision::newFromTitle to allow passing additional parameters without passing them on ...
Definition: Parser.php:3689
armorLinks( $text)
Insert a NOPARSE hacky thing into any inline links in a chunk that&#39;s going to go through further pars...
Definition: Parser.php:2637
static activeUsers()
Definition: SiteStats.php:130
$mLinkID
Definition: Parser.php:202
setHook( $tag, callable $callback)
Create an HTML-style tag, e.g.
Definition: Parser.php:4904
LinkRendererFactory $linkRendererFactory
Definition: Parser.php:293
fuzzTestPst( $text, Title $title, ParserOptions $options)
Definition: Parser.php:6257
static makeMediaLinkFile(LinkTarget $title, $file, $html='')
Create a direct link to a given uploaded file.
Definition: Linker.php:781
$mGeneratedPPNodeCount
Definition: Parser.php:203
$mRevisionId
Definition: Parser.php:235
static cleanSigInSig( $text)
Strip 3, 4 or 5 tildes out of signatures.
Definition: Parser.php:4811
getLinkRenderer()
Get a LinkRenderer instance to make links with.
Definition: Parser.php:1094
const NS_TEMPLATE
Definition: Defines.php:70
static newFromTitle(LinkTarget $linkTarget, $id=0, $flags=0)
Load either the current, or a specified, revision that&#39;s attached to a given link target...
Definition: Revision.php:139
const NO_ARGS
Definition: PPFrame.php:29
fetchFileNoRegister( $title, $options=[])
Helper function for fetchFileAndTitle.
Definition: Parser.php:3862
MagicWordArray $mVariables
Definition: Parser.php:164
const SFH_NO_HASH
Definition: Parser.php:83
static setupOOUI( $skinName='default', $dir='ltr')
Helper function to setup the PHP implementation of OOUI to use in this request.
setTransparentTagHook( $tag, callable $callback)
As setHook(), but letting the contents be parsed.
Definition: Parser.php:4935
$mForceTocPosition
Definition: Parser.php:207
getRevisionId()
Get the ID of the revision we are parsing.
Definition: Parser.php:5959
const OT_PREPROCESS
Definition: Parser.php:111
limitationWarn( $limitationType, $current='', $max='')
Warn the user when a parser limitation is reached Will warn at most once the user per limitation type...
Definition: Parser.php:3143
guessLegacySectionNameFromWikiText( $text)
Same as guessSectionNameFromWikiText(), but produces legacy anchors instead, if possible.
Definition: Parser.php:6159
$mFunctionSynonyms
Definition: Parser.php:146
getPreSaveTransform()
Transform wiki markup when saving the page?
$mOutputType
Definition: Parser.php:232
interwikiTransclude( $title, $action)
Transclude an interwiki link.
Definition: Parser.php:3881
$mDefaultStripList
Definition: Parser.php:149
$mExtLinkBracketedRegex
Definition: Parser.php:178
makeKnownLinkHolder( $nt, $text='', $trail='', $prefix='')
Render a forced-blue link inline; protect against double expansion of URLs if we&#39;re in a mode that pr...
Definition: Parser.php:2613
static stripOuterParagraph( $html)
Strip outer.
Definition: Parser.php:6387
A class for passing options to services.
const ID_FALLBACK
Tells escapeUrlForHtml() to encode the ID using the fallback encoding, or return false if no fallback...
Definition: Sanitizer.php:74
The User object encapsulates all of the user-specific settings (user_id, name, rights, email address, options, last login time).
Definition: User.php:51
static getInstance( $ts=false)
Get a timestamp instance in GMT.
Definition: MWTimestamp.php:39
static numberingroup( $group)
Find the number of users in a given user group.
Definition: SiteStats.php:150
stripAltText( $caption, $holders)
Definition: Parser.php:5579
setDefaultSort( $sort)
Mutator for $mDefaultSort.
Definition: Parser.php:6079
$wgNoFollowNsExceptions
Namespaces in which $wgNoFollowLinks doesn&#39;t apply.
static makeExternalImage( $url, $alt='')
Return the code for images which were added via external links, via Parser::maybeMakeExternalImage()...
Definition: Linker.php:247
Custom PHP profiler for parser/DB type section names that xhprof/xdebug can&#39;t handle.
static edits()
Definition: SiteStats.php:94
wfTimestamp( $outputtype=TS_UNIX, $ts=0)
Get a timestamp string in one of various formats.
fetchFileAndTitle( $title, $options=[])
Fetch a file and its title and register a reference to it.
Definition: Parser.php:3837
static validateTagAttributes( $attribs, $element)
Take an array of attribute names and values and normalize or discard illegal values for the given ele...
Definition: Sanitizer.php:791
if( $line===false) $args
Definition: mcc.php:124
const NO_TEMPLATES
Definition: PPFrame.php:30
array $mTplDomCache
Definition: Parser.php:209
static parseWidthParam( $value, $parseHeight=true)
Parsed a width param of imagelink like 300px or 200x300px.
Definition: Parser.php:6330
$mVarCache
Definition: Parser.php:150
$mRevisionObject
Definition: Parser.php:234
static stripAllTags( $html)
Take a fragment of (potentially invalid) HTML and return a version with any tags removed, encoded as plain text.
Definition: Sanitizer.php:2003
getPreloadText( $text, Title $title, ParserOptions $options, $params=[])
Process the wikitext for the "?preload=" feature.
Definition: Parser.php:904
preprocess( $text, ?Title $title, ParserOptions $options, $revid=null, $frame=false)
Expand templates and variables in the text, producing valid, static wikitext.
Definition: Parser.php:859
makeImage( $title, $options, $holders=false)
Parse image options text and use it to make an image.
Definition: Parser.php:5329
__destruct()
Reduce memory usage to reduce the impact of circular references.
Definition: Parser.php:408
wfMatchesDomainList( $url, $domains)
Check whether a given URL has a domain that occurs in a given set of domains.
getRevisionTimestamp()
Get the timestamp associated with the current revision, adjusted for the default server-local timesta...
Definition: Parser.php:6013
wfUrlencode( $s)
We want some things to be included as literal characters in our title URLs for prettiness, which urlencode encodes by default.
getImageParams( $handler)
Definition: Parser.php:5275
fetchCurrentRevisionOfTitle( $title)
Fetch the current revision of a given title.
Definition: Parser.php:3654
Factory for handling the special page list and generating SpecialPage objects.
static extractTagsAndParams( $elements, $text, &$matches)
Replaces all occurrences of HTML-style comments and the given tags in the text with a random marker a...
Definition: Parser.php:1145
$mRevIdForTs
Definition: Parser.php:239
setUser( $user)
Set the current user.
Definition: Parser.php:925
$mStripList
Definition: Parser.php:148
$mFunctionTagHooks
Definition: Parser.php:147
const OT_PLAIN
Definition: Defines.php:168
$wgNoFollowLinks
If true, external URL links in wiki text will be given the rel="nofollow" attribute as a hint to sear...
static makeSelfLinkObj( $nt, $html='', $query='', $trail='', $prefix='')
Make appropriate markup for a link to the current article.
Definition: Linker.php:163
$mRevisionTimestamp
Definition: Parser.php:236
$mImageParams
Definition: Parser.php:151
makeLimitReport()
Set the limit report data in the current ParserOutput, and return the limit report HTML comment...
Definition: Parser.php:653
static escapeIdForAttribute( $id, $mode=self::ID_PRIMARY)
Given a section name or other user-generated or otherwise unsafe string, escapes it to be a valid HTM...
Definition: Sanitizer.php:1257
static newKnownCurrent(IDatabase $db, $pageIdOrTitle, $revId=0)
Load a revision based on a known page ID and current revision ID from the DB.
Definition: Revision.php:1124
const VERSION
Update this version number when the ParserOutput format changes in an incompatible way...
Definition: Parser.php:80
const OT_WIKI
Definition: Defines.php:165
Preprocessor $mPreprocessor
Definition: Parser.php:182
getPreprocessor()
Get a preprocessor object.
Definition: Parser.php:1080
const NS_MEDIA
Definition: Defines.php:48
replaceSection( $oldText, $sectionId, $newText)
This function returns $oldtext after the content of the section specified by $section has been replac...
Definition: Parser.php:5879
static getVersion( $flags='', $lang=null)
Return a string of the MediaWiki version with Git revision if available.
static singleton()
Definition: RepoGroup.php:60
static normalizeSectionName( $text)
Apply the same normalization as code making links to this section would.
Definition: Parser.php:6182
replaceTransparentTags( $text)
Replace transparent tags in $text with the values given by the callbacks.
Definition: Parser.php:5687
handleDoubleUnderscore( $text)
Strip double-underscore items like NOGALLERY and NOTOC Fills $this->mDoubleUnderscores, returns the modified text.
Definition: Parser.php:4130
argSubstitution( $piece, $frame)
Triple brace replacement – used for template arguments.
Definition: Parser.php:3949
const RECOVER_ORIG
Definition: PPFrame.php:36
static normalizeUrlComponent( $component, $unsafe)
Definition: Parser.php:2212
static isValid( $ip)
Validate an IP address.
Definition: IP.php:111
handleHeadings( $text)
Parse headers and return html.
Definition: Parser.php:1803
StripState $mStripState
Definition: Parser.php:194
replaceVariables( $text, $frame=false, $argsOnly=false)
Replace magic variables, templates, and template arguments with the appropriate text.
Definition: Parser.php:3093
$mDefaultSort
Definition: Parser.php:204
getUser()
Get a User object either from $this->mUser, if set, or from the ParserOptions object otherwise...
Definition: Parser.php:1068
setOutputFlag( $flag, $reason)
Definition: Parser.php:6429
static explode( $separator, $subject)
Workalike for explode() with limited memory usage.
getStripList()
Get a list of strippable XML-like elements.
Definition: Parser.php:1208
const EXT_IMAGE_REGEX
Definition: Parser.php:98
$cache
Definition: mcc.php:33
static tocLine( $anchor, $tocline, $tocnumber, $level, $sectionIndex=false)
parameter level defines if we are on an indentation level
Definition: Linker.php:1639
const NS_CATEGORY
Definition: Defines.php:74
setFunctionHook( $id, callable $callback, $flags=0)
Create a function, e.g.
Definition: Parser.php:4998
doQuotes( $text)
Helper function for handleAllQuotes()
Definition: Parser.php:1838
static makeExternalLink( $url, $text, $escape=true, $linktype='', $attribs=[], $title=null)
Make an external link.
Definition: Linker.php:848
extractSections( $text, $sectionId, $mode, $newText='')
Break wikitext input into sections, and either pull or replace some particular section&#39;s text...
Definition: Parser.php:5738
setOutputType( $ot)
Set the output type.
Definition: Parser.php:971
getRevisionSize()
Get the size of the revision.
Definition: Parser.php:6058
$mImageParamsMagicArray
Definition: Parser.php:152
LinkHolderArray $mLinkHolders
Definition: Parser.php:200
$wgNoFollowDomainExceptions
If this is set to an array of domains, external links to these domain names (or any subdomains) will ...
getMagicWordFactory()
Get the MagicWordFactory that this Parser is using.
Definition: Parser.php:1112
handleInternalLinks2(&$s)
Process [[ ]] wikilinks (RIL)
Definition: Parser.php:2302
setTitle(Title $t=null)
Set the context title.
Definition: Parser.php:934
getTargetLanguage()
Get the target language for the content being parsed.
Definition: Parser.php:1050
initializeVariables()
Initialize the magic variables (like CURRENTMONTHNAME) and substitution modifiers.
Definition: Parser.php:3038
static decodeTagAttributes( $text)
Return an associative array of attribute names and values from a partial tag string.
Definition: Sanitizer.php:1412
internalParseHalfParsed( $text, $isMain=true, $linestart=true)
Helper function for parse() that transforms half-parsed HTML into fully parsed HTML.
Definition: Parser.php:1525
$mInputSize
Definition: Parser.php:240
getUserSig(&$user, $nickname=false, $fancySig=null)
Fetch the user&#39;s signature text, if any, and normalize to validated, ready-to-insert wikitext...
Definition: Parser.php:4713
const NS_FILE
Definition: Defines.php:66
firstCallInit()
Do various kinds of initialisation on the first call of the parser.
Definition: Parser.php:456
static makeAnchor( $sectionName)
Definition: Parser.php:6119
static getTitleFor( $name, $subpage=false, $fragment='')
Get a localised Title object for a specified special page name If you don&#39;t need a full Title object...
Definition: SpecialPage.php:83
static normalizeCharReferences( $text)
Ensure that any entities and character references are legal for XML and XHTML specifically.
Definition: Sanitizer.php:1531
const PTD_FOR_INCLUSION
Definition: Parser.php:105
renderImageGallery( $text, $params)
Renders an image gallery from a text with one line per image.
Definition: Parser.php:5111
$mTagHooks
Definition: Parser.php:143
NamespaceInfo $nsInfo
Definition: Parser.php:296
fetchTemplateAndTitle( $title)
Fetch the unparsed text of a template and register a reference to it.
Definition: Parser.php:3700
const NS_MEDIAWIKI
Definition: Defines.php:68
MagicWordFactory $magicWordFactory
Definition: Parser.php:272
handleTables( $text)
Parse the wiki syntax used to render tables.
Definition: Parser.php:1243
static doBlockLevels( $text, $lineStart)
Make lists from lines starting with &#39;:&#39;, &#39;*&#39;, &#39;#&#39;, etc.
enableOOUI()
Set&#39;s up the PHP implementation of OOUI for use in this request and instructs OutputPage to enable OO...
Definition: Parser.php:6420
const OT_HTML
Definition: Defines.php:164
addTrackingCategory( $msg)
Definition: Parser.php:4183
assertRequiredOptions(array $expectedKeys)
Assert that the list of options provided in this instance exactly match $expectedKeys, without regard for order.
static images()
Definition: SiteStats.php:139
$mTransparentTagHooks
Definition: Parser.php:144
$mExpensiveFunctionCount
Definition: Parser.php:206
$mUrlProtocols
Definition: Parser.php:178
static isWellFormedXmlFragment( $text)
Check if a string is a well-formed XML fragment.
Definition: Xml.php:730
static makeTitle( $ns, $title, $fragment='', $interwiki='')
Create a new Title from a namespace index and a DB key.
Definition: Title.php:584
makeFreeExternalLink( $url, $numPostProto)
Make a free external link, given a user-supplied URL.
Definition: Parser.php:1731
markerSkipCallback( $s, $callback)
Call a callback function on all regions of the given text that are not inside strip markers...
Definition: Parser.php:6287
__clone()
Allow extensions to clean up when the parser is cloned.
Definition: Parser.php:422
Options( $x=null)
Accessor/mutator for the ParserOptions object.
Definition: Parser.php:1016
maybeMakeExternalImage( $url)
make an image if it&#39;s allowed, either through the global option, through the exception, or through the on-wiki whitelist
Definition: Parser.php:2235
getUser()
Current user.
wfSetVar(&$dest, $source, $force=false)
Sets dest to source and returns the original value of dest If source is NULL, it just returns the val...
array $mLangLinkLanguages
Array with the language name of each language link (i.e.
Definition: Parser.php:247
static makeImageLink(Parser $parser, LinkTarget $title, $file, $frameParams=[], $handlerParams=[], $time=false, $query="", $widthOption=null)
Given parameters derived from [[Image:Foo|options...]], generate the HTML that that syntax inserts in...
Definition: Linker.php:303
const OT_MSG
Definition: Parser.php:112
replaceLinkHoldersText( $text)
Replace "<!--LINK-->" link placeholders with plain text of links (not HTML-formatted).
Definition: Parser.php:5094
$mFunctionHooks
Definition: Parser.php:145
$lines
Definition: router.php:61
static makeHeadline( $level, $attribs, $anchor, $html, $link, $fallbackAnchor=false)
Create a headline for content.
Definition: Linker.php:1750
MagicWordArray $mSubstWords
Definition: Parser.php:169
incrementIncludeSize( $type, $size)
Increment an include size counter.
Definition: Parser.php:4104
static delimiterExplode( $startDelim, $endDelim, $separator, $subject, $nested=false)
Explode a string, but ignore any instances of the separator inside the given start and end delimiters...
Definition: StringUtils.php:59
const TOC_END
Definition: Parser.php:137
pstPass2( $text, $user)
Pre-save transform helper function.
Definition: Parser.php:4637
ServiceOptions $svcOptions
This is called $svcOptions instead of $options like elsewhere to avoid confusion with $mOptions...
Definition: Parser.php:290
replaceLinkHolders(&$text, $options=0)
Replace "<!--LINK-->" link placeholders with actual links, in the buffer Placeholders created in Link...
Definition: Parser.php:5072
resetOutput()
Reset the ParserOutput.
Definition: Parser.php:524
static escapeIdForLink( $id)
Given a section name or other user-generated or otherwise unsafe string, escapes it to be a valid URL...
Definition: Sanitizer.php:1284
Variant of the Message class.
Definition: RawMessage.php:34
getFreshParser()
Return this parser if it is not doing anything, otherwise get a fresh parser.
Definition: Parser.php:6406
handleInternalLinks( $text)
Process [[ ]] wikilinks.
Definition: Parser.php:2292
parseExtensionTagAsTopLevelDoc( $text)
Needed by Parsoid/PHP to ensure all the hooks for extensions are run in the right order...
Definition: Parser.php:840
wfDebugLog( $logGroup, $text, $dest='all', array $context=[])
Send a line to a supplementary debug log file, if configured, or main debug log if not...
getContentLanguage()
Get the content language that this Parser is using.
Definition: Parser.php:1122
static articles()
Definition: SiteStats.php:103
$mRevisionUser
Definition: Parser.php:237
lock()
Lock the current instance of the parser.
Definition: Parser.php:6359
fuzzTestPreprocess( $text, Title $title, ParserOptions $options)
Definition: Parser.php:6267
static pages()
Definition: SiteStats.php:112
wfDeprecated( $function, $version=false, $component=false, $callerOffset=2)
Throws a warning that $function is deprecated.
static tocList( $toc, Language $lang=null)
Wraps the TOC in a table and provides the hide/collapse javascript.
Definition: Linker.php:1675
static decodeCharReferences( $text)
Decode any character references, numeric or named entities, in the text and return a UTF-8 string...
Definition: Sanitizer.php:1631
getStripState()
Get the StripState.
Definition: Parser.php:1217
const SFH_OBJECT_ARGS
Definition: Parser.php:84
handleExternalLinks( $text)
Replace external links (REL)
Definition: Parser.php:2021
fuzzTestSrvus( $text, Title $title, ParserOptions $options, $outputType=self::OT_HTML)
Strip/replaceVariables/unstrip for preprocessor regression testing.
Definition: Parser.php:6239
$mIncludeCount
Definition: Parser.php:196
$mMarkerIndex
Definition: Parser.php:153
transformMsg( $text, $options, $title=null)
Wrapper for preprocess()
Definition: Parser.php:4860
getTitle()
Accessor for the Title object.
Definition: Parser.php:952
ParserOutput $mOutput
Definition: Parser.php:188
recursiveTagParse( $text, $frame=false)
Half-parse wikitext to half-parsed HTML.
Definition: Parser.php:787
getOutput()
Get the ParserOutput object.
Definition: Parser.php:997
recursivePreprocess( $text, $frame=false)
Recursive parser entry point that can be called from an extension tag hook.
Definition: Parser.php:885
fetchTemplate( $title)
Fetch the unparsed text of a template and register a reference to it.
Definition: Parser.php:3728
expandMagicVariable( $index, $frame=false)
Return value of a magic variable (like PAGENAME)
Definition: Parser.php:2662
static tocIndent()
Add another level to the Table of Contents.
Definition: Linker.php:1613
static register( $parser)
static legalChars()
Get a regex character class describing the legal characters in a link.
Definition: Title.php:692
$line
Definition: mcc.php:119
static fixTagAttributes( $text, $element, $sorted=false)
Take a tag soup fragment listing an HTML element&#39;s attributes and normalize it to well-formed XML...
Definition: Sanitizer.php:1098
isCurrentRevisionOfTitleCached( $title)
Definition: Parser.php:3673
makeLegacyAnchor( $sectionName)
Definition: Parser.php:6123
recursiveTagParseFully( $text, $frame=false)
Fully parse wikitext to fully parsed HTML.
Definition: Parser.php:815
incrementExpensiveFunctionCount()
Increment the expensive function count.
Definition: Parser.php:4118
getDisableTitleConversion()
Whether title conversion should be disabled.
$mShowToc
Definition: Parser.php:207
const DB_REPLICA
Definition: defines.php:25
$content
Definition: router.php:78
static normalizeLinkUrl( $url)
Replace unusual escape codes in a URL with their equivalent characters.
Definition: Parser.php:2154
const EXT_LINK_URL_CLASS
Definition: Parser.php:92
Language $contLang
Definition: Parser.php:275
if(isset( $_SERVER['PATH_INFO']) && $_SERVER['PATH_INFO'] !='') $wgTitle
Definition: api.php:53
static removeHTMLtags( $text, $processCallback=null, $args=[], $extratags=[], $removetags=[], $warnCallback=null)
Cleans up HTML, removes dangerous tags and attributes, and removes HTML comments. ...
Definition: Sanitizer.php:497
static getSectionNameFromStrippedText( $text)
Definition: Parser.php:6112
wfMessage( $key,... $params)
This is the function for getting translated interface messages.
ParserOptions $mOptions
Definition: Parser.php:222
static newFromName( $name, $validate='valid')
Static factory method for creation from username.
Definition: User.php:536
const STRIP_COMMENTS
Definition: PPFrame.php:31
static cascadingsources( $parser, $title='')
Returns the sources of any cascading protection acting on a specified page.
static newFromId( $id, $flags=0)
Load a page revision from a given revision ID number.
Definition: Revision.php:120
static normalizeLineEndings( $text)
Do a "\\r\\n" -> "\\n" and "\\r" -> "\\n" transformation as well as trim trailing whitespace...
$mHighestExpansionDepth
Definition: Parser.php:203
static guessSectionNameFromStrippedText( $text)
Like guessSectionNameFromWikiText(), but takes already-stripped text as input.
Definition: Parser.php:6171
attributeStripCallback(&$text, $frame=false)
Callback from the Sanitizer for expanding items found in HTML attribute values, so they can be safely...
Definition: Parser.php:5640
ParserFactory $factory
Definition: Parser.php:278
__construct( $svcOptions=null, MagicWordFactory $magicWordFactory=null, Language $contLang=null, ParserFactory $factory=null, $urlProtocols=null, SpecialPageFactory $spFactory=null, $linkRendererFactory=null, $nsInfo=null, $logger=null, BadFileLookup $badFileLookup=null)
Constructing parsers directly is deprecated! Use a ParserFactory.
Definition: Parser.php:344
getCustomDefaultSort()
Accessor for $mDefaultSort Unlike getDefaultSort(), will return false if none is set.
Definition: Parser.php:6108
static getDefaultPreprocessorClass()
Which class should we use for the preprocessor if not otherwise specified?
Definition: Parser.php:449
getUrlProtocols()
Definition: Parser.php:5673
insertStripItem( $text)
Add an item to the strip state Returns the unique tag which must be inserted into the stripped text T...
Definition: Parser.php:1230
static tocUnindent( $level)
Finish one or more sublevels on the Table of Contents.
Definition: Linker.php:1624
$mRevisionSize
Definition: Parser.php:238
static users()
Definition: SiteStats.php:121
validateSig( $text)
Check that the user&#39;s signature contains no bad XML.
Definition: Parser.php:4760
const SFH_OBJECT_ARGS
Definition: Defines.php:178
startExternalParse(?Title $title, ParserOptions $options, $outputType, $clearState=true, $revId=null)
Set up some variables which are usually set up in parse() so that an external function can call some ...
Definition: Parser.php:4826
static normalizeSectionNameWhitespace( $section)
Normalizes whitespace in a section name, such as might be returned by Parser::stripSectionName(), for use in the id&#39;s that are used for section links.
Definition: Sanitizer.php:1512
doBlockLevels( $text, $linestart)
Make lists from lines starting with &#39;:&#39;, &#39;*&#39;, &#39;#&#39;, etc.
Definition: Parser.php:2650
LoggerInterface $logger
Definition: Parser.php:299
array $mConf
Definition: Parser.php:175
const OT_HTML
Definition: Parser.php:109
$mIncludeSizes
Definition: Parser.php:203
internalParse( $text, $isMain=true, $frame=false)
Helper function for parse() that transforms wiki markup into half-parsed HTML.
Definition: Parser.php:1453
preSaveTransform( $text, Title $title, User $user, ParserOptions $options, $clearState=true)
Transform wiki markup when saving a page by doing "\\r\\n" -> "\\n" conversion, substituting signatur...
Definition: Parser.php:4602
static run( $event, array $args=[], $deprecatedVersion=null)
Call hook functions defined in Hooks::register and $wgHooks.
Definition: Hooks.php:200
guessSectionNameFromWikiText( $text)
Try to guess the section anchor name based on a wikitext fragment presumably extracted from a heading...
Definition: Parser.php:6143
getOptions()
Get the ParserOptions object.
Definition: Parser.php:1006
getDefaultSort()
Accessor for $mDefaultSort Will use the empty string if none is set.
Definition: Parser.php:6094
static statelessFetchTemplate( $title, $parser=false)
Static function to get a template Can be overridden via ParserOptions::setTemplateCallback().
Definition: Parser.php:3741
OutputType( $x=null)
Accessor/mutator for the output type.
Definition: Parser.php:988
setFunctionTagHook( $tag, callable $callback, $flags)
Create a tag function, e.g.
Definition: Parser.php:5049
static delimiterReplace( $startDelim, $endDelim, $replace, $subject, $flags='')
Perform an operation equivalent to preg_replace() with flags.
const SFH_NO_HASH
Definition: Defines.php:177
parse( $text, Title $title, ParserOptions $options, $linestart=true, $clearState=true, $revid=null)
Convert wikitext to HTML Do not call this function recursively.
Definition: Parser.php:546
$matches
static newFromText( $text, $defaultNamespace=NS_MAIN)
Create a new Title from text, such as what one would find in a link.
Definition: Title.php:317