MediaWiki  master
Parser.php
Go to the documentation of this file.
1 <?php
33 
74 class Parser {
80  const VERSION = '1.6.4';
81 
87 
88  # Flags for Parser::setFunctionHook
89  const SFH_NO_HASH = 1;
90  const SFH_OBJECT_ARGS = 2;
91 
92  # Constants needed for external link processing
93  # Everything except bracket, space, or control characters
94  # \p{Zs} is unicode 'separator, space' category. It covers the space 0x20
95  # as well as U+3000 is IDEOGRAPHIC SPACE for T21052
96  # \x{FFFD} is the Unicode replacement character, which Preprocessor_DOM
97  # uses to replace invalid HTML characters.
98  const EXT_LINK_URL_CLASS = '[^][<>"\\x00-\\x20\\x7F\p{Zs}\x{FFFD}]';
99  # Simplified expression to match an IPv4 or IPv6 address, or
100  # at least one character of a host name (embeds EXT_LINK_URL_CLASS)
101  const EXT_LINK_ADDR = '(?:[0-9.]+|\\[(?i:[0-9a-f:.]+)\\]|[^][<>"\\x00-\\x20\\x7F\p{Zs}\x{FFFD}])';
102  # RegExp to make image URLs (embeds IPv6 part of EXT_LINK_ADDR)
103  // phpcs:ignore Generic.Files.LineLength
104  const EXT_IMAGE_REGEX = '/^(http:\/\/|https:\/\/)((?:\\[(?i:[0-9a-f:.]+)\\])?[^][<>"\\x00-\\x20\\x7F\p{Zs}\x{FFFD}]+)
105  \\/([A-Za-z0-9_.,~%\\-+&;#*?!=()@\\x80-\\xFF]+)\\.((?i)gif|png|jpg|jpeg)$/Sxu';
106 
107  # Regular expression for a non-newline space
108  const SPACE_NOT_NL = '(?:\t|&nbsp;|&\#0*160;|&\#[Xx]0*[Aa]0;|\p{Zs})';
109 
110  # Flags for preprocessToDom
111  const PTD_FOR_INCLUSION = 1;
112 
113  # Allowed values for $this->mOutputType
114  # Parameter to startExternalParse().
115  const OT_HTML = 1; # like parse()
116  const OT_WIKI = 2; # like preSaveTransform()
117  const OT_PREPROCESS = 3; # like preprocess()
118  const OT_MSG = 3;
119  const OT_PLAIN = 4; # like extractSections() - portions of the original are returned unchanged.
120 
138  const MARKER_SUFFIX = "-QINU`\"'\x7f";
139  const MARKER_PREFIX = "\x7f'\"`UNIQ-";
140 
141  # Markers used for wrapping the table of contents
142  const TOC_START = '<mw:toc>';
143  const TOC_END = '</mw:toc>';
144 
146  const MAX_TTS = 900;
147 
148  # Persistent:
149  public $mTagHooks = [];
151  public $mFunctionHooks = [];
152  public $mFunctionSynonyms = [ 0 => [], 1 => [] ];
153  public $mFunctionTagHooks = [];
154  public $mStripList = [];
155  public $mDefaultStripList = [];
156  public $mVarCache = [];
157  public $mImageParams = [];
159  public $mMarkerIndex = 0;
163  public $mFirstCall = true;
164 
165  # Initialised by initialiseVariables()
166 
170  public $mVariables;
171 
175  public $mSubstWords;
176 
181  public $mConf;
182 
183  # Initialised in constructor
185 
186  # Initialized in getPreprocessor()
187 
189 
190  # Cleared with clearState():
191 
194  public $mOutput;
195  public $mAutonumber;
196 
200  public $mStripState;
201 
207 
208  public $mLinkID;
212  public $mExpensiveFunctionCount; # number of expensive parser function calls
216 
220  public $mUser; # User object; only used when doing pre-save transform
221 
222  # Temporary
223  # These are variables reset at least once per parse regardless of $clearState
224 
228  public $mOptions;
229 
233  public $mTitle; # Title context, used for self-link rendering and similar things
234  public $mOutputType; # Output type, one of the OT_xxx constants
235  public $ot; # Shortcut alias, see setOutputType()
236  public $mRevisionObject; # The revision object of the specified revision ID
237  public $mRevisionId; # ID to display in {{REVISIONID}} tags
238  public $mRevisionTimestamp; # The timestamp of the specified revision ID
239  public $mRevisionUser; # User to display in {{REVISIONUSER}} tag
240  public $mRevisionSize; # Size to display in {{REVISIONSIZE}} variable
241  public $mRevIdForTs; # The revision ID which was used to fetch the timestamp
242  public $mInputSize = false; # For {{PAGESIZE}} on current page.
243 
250 
258 
263  public $mInParse = false;
264 
266  protected $mProfiler;
267 
271  protected $mLinkRenderer;
272 
275 
277  private $contLang;
278 
280  private $factory;
281 
284 
292  private $svcOptions;
293 
296 
298  private $nsInfo;
299 
301  private $logger;
302 
304  private $badFileLookup;
305 
310  public const CONSTRUCTOR_OPTIONS = [
311  // See $wgParserConf documentation
312  'class',
313  'preprocessorClass',
314  // See documentation for the corresponding config options
315  'ArticlePath',
316  'EnableScaryTranscluding',
317  'ExtraInterlanguageLinkPrefixes',
318  'FragmentMode',
319  'LanguageCode',
320  'MaxSigChars',
321  'MaxTocLevel',
322  'MiserMode',
323  'ScriptPath',
324  'Server',
325  'ServerName',
326  'ShowHostnames',
327  'Sitename',
328  'StylePath',
329  'TranscludeCacheExpiry',
330  ];
331 
346  public function __construct(
347  $svcOptions = null,
349  Language $contLang = null,
350  ParserFactory $factory = null,
351  $urlProtocols = null,
352  SpecialPageFactory $spFactory = null,
353  $linkRendererFactory = null,
354  $nsInfo = null,
355  $logger = null,
357  ) {
358  if ( !$svcOptions || is_array( $svcOptions ) ) {
359  // Pre-1.34 calling convention is the first parameter is just ParserConf, the seventh is
360  // Config, and the eighth is LinkRendererFactory.
361  $this->mConf = (array)$svcOptions;
362  if ( empty( $this->mConf['class'] ) ) {
363  $this->mConf['class'] = self::class;
364  }
365  if ( empty( $this->mConf['preprocessorClass'] ) ) {
366  $this->mConf['preprocessorClass'] = self::getDefaultPreprocessorClass();
367  }
368  $this->svcOptions = new ServiceOptions( self::CONSTRUCTOR_OPTIONS,
369  $this->mConf, func_num_args() > 6
370  ? func_get_arg( 6 ) : MediaWikiServices::getInstance()->getMainConfig()
371  );
372  $linkRendererFactory = func_num_args() > 7 ? func_get_arg( 7 ) : null;
373  $nsInfo = func_num_args() > 8 ? func_get_arg( 8 ) : null;
374  } else {
375  // New calling convention
376  $svcOptions->assertRequiredOptions( self::CONSTRUCTOR_OPTIONS );
377  // $this->mConf is public, so we'll keep those two options there as well for
378  // compatibility until it's removed
379  $this->mConf = [
380  'class' => $svcOptions->get( 'class' ),
381  'preprocessorClass' => $svcOptions->get( 'preprocessorClass' ),
382  ];
383  $this->svcOptions = $svcOptions;
384  }
385 
386  $this->mUrlProtocols = $urlProtocols ?? wfUrlProtocols();
387  $this->mExtLinkBracketedRegex = '/\[(((?i)' . $this->mUrlProtocols . ')' .
388  self::EXT_LINK_ADDR .
389  self::EXT_LINK_URL_CLASS . '*)\p{Zs}*([^\]\\x00-\\x08\\x0a-\\x1F\\x{FFFD}]*?)\]/Su';
390 
391  $this->magicWordFactory = $magicWordFactory ??
392  MediaWikiServices::getInstance()->getMagicWordFactory();
393 
394  $this->contLang = $contLang ?? MediaWikiServices::getInstance()->getContentLanguage();
395 
396  $this->factory = $factory ?? MediaWikiServices::getInstance()->getParserFactory();
397  $this->specialPageFactory = $spFactory ??
398  MediaWikiServices::getInstance()->getSpecialPageFactory();
399  $this->linkRendererFactory = $linkRendererFactory ??
400  MediaWikiServices::getInstance()->getLinkRendererFactory();
401  $this->nsInfo = $nsInfo ?? MediaWikiServices::getInstance()->getNamespaceInfo();
402  $this->logger = $logger ?: new NullLogger();
403  $this->badFileLookup = $badFileLookup ??
404  MediaWikiServices::getInstance()->getBadFileLookup();
405  }
406 
410  public function __destruct() {
411  if ( isset( $this->mLinkHolders ) ) {
412  // @phan-suppress-next-line PhanTypeObjectUnsetDeclaredProperty
413  unset( $this->mLinkHolders );
414  }
415  // @phan-suppress-next-line PhanTypeSuspiciousNonTraversableForeach
416  foreach ( $this as $name => $value ) {
417  unset( $this->$name );
418  }
419  }
420 
424  public function __clone() {
425  $this->mInParse = false;
426 
427  // T58226: When you create a reference "to" an object field, that
428  // makes the object field itself be a reference too (until the other
429  // reference goes out of scope). When cloning, any field that's a
430  // reference is copied as a reference in the new object. Both of these
431  // are defined PHP5 behaviors, as inconvenient as it is for us when old
432  // hooks from PHP4 days are passing fields by reference.
433  foreach ( [ 'mStripState', 'mVarCache' ] as $k ) {
434  // Make a non-reference copy of the field, then rebind the field to
435  // reference the new copy.
436  $tmp = $this->$k;
437  $this->$k =& $tmp;
438  unset( $tmp );
439  }
440 
441  Hooks::run( 'ParserCloned', [ $this ] );
442  }
443 
451  public static function getDefaultPreprocessorClass() {
452  return Preprocessor_Hash::class;
453  }
454 
458  public function firstCallInit() {
459  if ( !$this->mFirstCall ) {
460  return;
461  }
462  $this->mFirstCall = false;
463 
465  CoreTagHooks::register( $this );
466  $this->initialiseVariables();
467 
468  // Avoid PHP 7.1 warning from passing $this by reference
469  $parser = $this;
470  Hooks::run( 'ParserFirstCallInit', [ &$parser ] );
471  }
472 
478  public function clearState() {
479  $this->firstCallInit();
480  $this->resetOutput();
481  $this->mAutonumber = 0;
482  $this->mIncludeCount = [];
483  $this->mLinkHolders = new LinkHolderArray( $this );
484  $this->mLinkID = 0;
485  $this->mRevisionObject = $this->mRevisionTimestamp =
486  $this->mRevisionId = $this->mRevisionUser = $this->mRevisionSize = null;
487  $this->mVarCache = [];
488  $this->mUser = null;
489  $this->mLangLinkLanguages = [];
490  $this->currentRevisionCache = null;
491 
492  $this->mStripState = new StripState( $this );
493 
494  # Clear these on every parse, T6549
495  $this->mTplRedirCache = $this->mTplDomCache = [];
496 
497  $this->mShowToc = true;
498  $this->mForceTocPosition = false;
499  $this->mIncludeSizes = [
500  'post-expand' => 0,
501  'arg' => 0,
502  ];
503  $this->mPPNodeCount = 0;
504  $this->mGeneratedPPNodeCount = 0;
505  $this->mHighestExpansionDepth = 0;
506  $this->mDefaultSort = false;
507  $this->mHeadings = [];
508  $this->mDoubleUnderscores = [];
509  $this->mExpensiveFunctionCount = 0;
510 
511  # Fix cloning
512  if ( isset( $this->mPreprocessor ) && $this->mPreprocessor->parser !== $this ) {
513  $this->mPreprocessor = null;
514  }
515 
516  $this->mProfiler = new SectionProfiler();
517 
518  // Avoid PHP 7.1 warning from passing $this by reference
519  $parser = $this;
520  Hooks::run( 'ParserClearState', [ &$parser ] );
521  }
522 
526  public function resetOutput() {
527  $this->mOutput = new ParserOutput;
528  $this->mOptions->registerWatcher( [ $this->mOutput, 'recordOption' ] );
529  }
530 
548  public function parse(
549  $text, Title $title, ParserOptions $options,
550  $linestart = true, $clearState = true, $revid = null
551  ) {
552  if ( $clearState ) {
553  // We use U+007F DELETE to construct strip markers, so we have to make
554  // sure that this character does not occur in the input text.
555  $text = strtr( $text, "\x7f", "?" );
556  $magicScopeVariable = $this->lock();
557  }
558  // Strip U+0000 NULL (T159174)
559  $text = str_replace( "\000", '', $text );
560 
561  $this->startParse( $title, $options, self::OT_HTML, $clearState );
562 
563  $this->currentRevisionCache = null;
564  $this->mInputSize = strlen( $text );
565  if ( $this->mOptions->getEnableLimitReport() ) {
566  $this->mOutput->resetParseStartTime();
567  }
568 
569  $oldRevisionId = $this->mRevisionId;
570  $oldRevisionObject = $this->mRevisionObject;
571  $oldRevisionTimestamp = $this->mRevisionTimestamp;
572  $oldRevisionUser = $this->mRevisionUser;
573  $oldRevisionSize = $this->mRevisionSize;
574  if ( $revid !== null ) {
575  $this->mRevisionId = $revid;
576  $this->mRevisionObject = null;
577  $this->mRevisionTimestamp = null;
578  $this->mRevisionUser = null;
579  $this->mRevisionSize = null;
580  }
581 
582  // Avoid PHP 7.1 warning from passing $this by reference
583  $parser = $this;
584  Hooks::run( 'ParserBeforeStrip', [ &$parser, &$text, &$this->mStripState ] );
585  # No more strip!
586  Hooks::run( 'ParserAfterStrip', [ &$parser, &$text, &$this->mStripState ] );
587  $text = $this->internalParse( $text );
588  Hooks::run( 'ParserAfterParse', [ &$parser, &$text, &$this->mStripState ] );
589 
590  $text = $this->internalParseHalfParsed( $text, true, $linestart );
591 
599  if ( !( $options->getDisableTitleConversion()
600  || isset( $this->mDoubleUnderscores['nocontentconvert'] )
601  || isset( $this->mDoubleUnderscores['notitleconvert'] )
602  || $this->mOutput->getDisplayTitle() !== false )
603  ) {
604  $convruletitle = $this->getTargetLanguage()->getConvRuleTitle();
605  if ( $convruletitle ) {
606  $this->mOutput->setTitleText( $convruletitle );
607  } else {
608  $titleText = $this->getTargetLanguage()->convertTitle( $title );
609  $this->mOutput->setTitleText( $titleText );
610  }
611  }
612 
613  # Compute runtime adaptive expiry if set
614  $this->mOutput->finalizeAdaptiveCacheExpiry();
615 
616  # Warn if too many heavyweight parser functions were used
617  if ( $this->mExpensiveFunctionCount > $this->mOptions->getExpensiveParserFunctionLimit() ) {
618  $this->limitationWarn( 'expensive-parserfunction',
619  $this->mExpensiveFunctionCount,
620  $this->mOptions->getExpensiveParserFunctionLimit()
621  );
622  }
623 
624  # Information on limits, for the benefit of users who try to skirt them
625  if ( $this->mOptions->getEnableLimitReport() ) {
626  $text .= $this->makeLimitReport();
627  }
628 
629  # Wrap non-interface parser output in a <div> so it can be targeted
630  # with CSS (T37247)
631  $class = $this->mOptions->getWrapOutputClass();
632  if ( $class !== false && !$this->mOptions->getInterfaceMessage() ) {
633  $this->mOutput->addWrapperDivClass( $class );
634  }
635 
636  $this->mOutput->setText( $text );
637 
638  $this->mRevisionId = $oldRevisionId;
639  $this->mRevisionObject = $oldRevisionObject;
640  $this->mRevisionTimestamp = $oldRevisionTimestamp;
641  $this->mRevisionUser = $oldRevisionUser;
642  $this->mRevisionSize = $oldRevisionSize;
643  $this->mInputSize = false;
644  $this->currentRevisionCache = null;
645 
646  return $this->mOutput;
647  }
648 
655  protected function makeLimitReport() {
656  $maxIncludeSize = $this->mOptions->getMaxIncludeSize();
657 
658  $cpuTime = $this->mOutput->getTimeSinceStart( 'cpu' );
659  if ( $cpuTime !== null ) {
660  $this->mOutput->setLimitReportData( 'limitreport-cputime',
661  sprintf( "%.3f", $cpuTime )
662  );
663  }
664 
665  $wallTime = $this->mOutput->getTimeSinceStart( 'wall' );
666  $this->mOutput->setLimitReportData( 'limitreport-walltime',
667  sprintf( "%.3f", $wallTime )
668  );
669 
670  $this->mOutput->setLimitReportData( 'limitreport-ppvisitednodes',
671  [ $this->mPPNodeCount, $this->mOptions->getMaxPPNodeCount() ]
672  );
673  $this->mOutput->setLimitReportData( 'limitreport-ppgeneratednodes',
674  [ $this->mGeneratedPPNodeCount, $this->mOptions->getMaxGeneratedPPNodeCount() ]
675  );
676  $this->mOutput->setLimitReportData( 'limitreport-postexpandincludesize',
677  [ $this->mIncludeSizes['post-expand'], $maxIncludeSize ]
678  );
679  $this->mOutput->setLimitReportData( 'limitreport-templateargumentsize',
680  [ $this->mIncludeSizes['arg'], $maxIncludeSize ]
681  );
682  $this->mOutput->setLimitReportData( 'limitreport-expansiondepth',
683  [ $this->mHighestExpansionDepth, $this->mOptions->getMaxPPExpandDepth() ]
684  );
685  $this->mOutput->setLimitReportData( 'limitreport-expensivefunctioncount',
686  [ $this->mExpensiveFunctionCount, $this->mOptions->getExpensiveParserFunctionLimit() ]
687  );
688 
689  foreach ( $this->mStripState->getLimitReport() as list( $key, $value ) ) {
690  $this->mOutput->setLimitReportData( $key, $value );
691  }
692 
693  Hooks::run( 'ParserLimitReportPrepare', [ $this, $this->mOutput ] );
694 
695  $limitReport = "NewPP limit report\n";
696  if ( $this->svcOptions->get( 'ShowHostnames' ) ) {
697  $limitReport .= 'Parsed by ' . wfHostname() . "\n";
698  }
699  $limitReport .= 'Cached time: ' . $this->mOutput->getCacheTime() . "\n";
700  $limitReport .= 'Cache expiry: ' . $this->mOutput->getCacheExpiry() . "\n";
701  $limitReport .= 'Dynamic content: ' .
702  ( $this->mOutput->hasDynamicContent() ? 'true' : 'false' ) .
703  "\n";
704  $limitReport .= 'Complications: [' . implode( ', ', $this->mOutput->getAllFlags() ) . "]\n";
705 
706  foreach ( $this->mOutput->getLimitReportData() as $key => $value ) {
707  if ( Hooks::run( 'ParserLimitReportFormat',
708  [ $key, &$value, &$limitReport, false, false ]
709  ) ) {
710  $keyMsg = wfMessage( $key )->inLanguage( 'en' )->useDatabase( false );
711  $valueMsg = wfMessage( [ "$key-value-text", "$key-value" ] )
712  ->inLanguage( 'en' )->useDatabase( false );
713  if ( !$valueMsg->exists() ) {
714  $valueMsg = new RawMessage( '$1' );
715  }
716  if ( !$keyMsg->isDisabled() && !$valueMsg->isDisabled() ) {
717  $valueMsg->params( $value );
718  $limitReport .= "{$keyMsg->text()}: {$valueMsg->text()}\n";
719  }
720  }
721  }
722  // Since we're not really outputting HTML, decode the entities and
723  // then re-encode the things that need hiding inside HTML comments.
724  $limitReport = htmlspecialchars_decode( $limitReport );
725 
726  // Sanitize for comment. Note '‐' in the replacement is U+2010,
727  // which looks much like the problematic '-'.
728  $limitReport = str_replace( [ '-', '&' ], [ '‐', '&amp;' ], $limitReport );
729  $text = "\n<!-- \n$limitReport-->\n";
730 
731  // Add on template profiling data in human/machine readable way
732  $dataByFunc = $this->mProfiler->getFunctionStats();
733  uasort( $dataByFunc, function ( $a, $b ) {
734  return $b['real'] <=> $a['real']; // descending order
735  } );
736  $profileReport = [];
737  foreach ( array_slice( $dataByFunc, 0, 10 ) as $item ) {
738  $profileReport[] = sprintf( "%6.2f%% %8.3f %6d %s",
739  $item['%real'], $item['real'], $item['calls'],
740  htmlspecialchars( $item['name'] ) );
741  }
742  $text .= "<!--\nTransclusion expansion time report (%,ms,calls,template)\n";
743  $text .= implode( "\n", $profileReport ) . "\n-->\n";
744 
745  $this->mOutput->setLimitReportData( 'limitreport-timingprofile', $profileReport );
746 
747  // Add other cache related metadata
748  if ( $this->svcOptions->get( 'ShowHostnames' ) ) {
749  $this->mOutput->setLimitReportData( 'cachereport-origin', wfHostname() );
750  }
751  $this->mOutput->setLimitReportData( 'cachereport-timestamp',
752  $this->mOutput->getCacheTime() );
753  $this->mOutput->setLimitReportData( 'cachereport-ttl',
754  $this->mOutput->getCacheExpiry() );
755  $this->mOutput->setLimitReportData( 'cachereport-transientcontent',
756  $this->mOutput->hasDynamicContent() );
757 
758  if ( $this->mGeneratedPPNodeCount > $this->mOptions->getMaxGeneratedPPNodeCount() / 10 ) {
759  wfDebugLog( 'generated-pp-node-count', $this->mGeneratedPPNodeCount . ' ' .
760  $this->getTitle()->getPrefixedDBkey() );
761  }
762  return $text;
763  }
764 
789  public function recursiveTagParse( $text, $frame = false ) {
790  // Avoid PHP 7.1 warning from passing $this by reference
791  $parser = $this;
792  Hooks::run( 'ParserBeforeStrip', [ &$parser, &$text, &$this->mStripState ] );
793  Hooks::run( 'ParserAfterStrip', [ &$parser, &$text, &$this->mStripState ] );
794  $text = $this->internalParse( $text, false, $frame );
795  return $text;
796  }
797 
817  public function recursiveTagParseFully( $text, $frame = false ) {
818  $text = $this->recursiveTagParse( $text, $frame );
819  $text = $this->internalParseHalfParsed( $text, false );
820  return $text;
821  }
822 
834  public function preprocess( $text, ?Title $title,
835  ParserOptions $options, $revid = null, $frame = false
836  ) {
837  $magicScopeVariable = $this->lock();
838  $this->startParse( $title, $options, self::OT_PREPROCESS, true );
839  if ( $revid !== null ) {
840  $this->mRevisionId = $revid;
841  }
842  // Avoid PHP 7.1 warning from passing $this by reference
843  $parser = $this;
844  Hooks::run( 'ParserBeforeStrip', [ &$parser, &$text, &$this->mStripState ] );
845  Hooks::run( 'ParserAfterStrip', [ &$parser, &$text, &$this->mStripState ] );
846  $text = $this->replaceVariables( $text, $frame );
847  $text = $this->mStripState->unstripBoth( $text );
848  return $text;
849  }
850 
860  public function recursivePreprocess( $text, $frame = false ) {
861  $text = $this->replaceVariables( $text, $frame );
862  $text = $this->mStripState->unstripBoth( $text );
863  return $text;
864  }
865 
879  public function getPreloadText( $text, Title $title, ParserOptions $options, $params = [] ) {
880  $msg = new RawMessage( $text );
881  $text = $msg->params( $params )->plain();
882 
883  # Parser (re)initialisation
884  $magicScopeVariable = $this->lock();
885  $this->startParse( $title, $options, self::OT_PLAIN, true );
886 
888  $dom = $this->preprocessToDom( $text, self::PTD_FOR_INCLUSION );
889  $text = $this->getPreprocessor()->newFrame()->expand( $dom, $flags );
890  $text = $this->mStripState->unstripBoth( $text );
891  return $text;
892  }
893 
900  public function setUser( $user ) {
901  $this->mUser = $user;
902  }
903 
909  public function setTitle( Title $t = null ) {
910  if ( !$t ) {
911  $t = Title::makeTitle( NS_SPECIAL, 'Badtitle/Parser' );
912  }
913 
914  if ( $t->hasFragment() ) {
915  # Strip the fragment to avoid various odd effects
916  $this->mTitle = $t->createFragmentTarget( '' );
917  } else {
918  $this->mTitle = $t;
919  }
920  }
921 
927  public function getTitle() : Title {
928  return $this->mTitle;
929  }
930 
937  public function Title( Title $x = null ) : Title {
938  return wfSetVar( $this->mTitle, $x );
939  }
940 
946  public function setOutputType( $ot ) {
947  $this->mOutputType = $ot;
948  # Shortcut alias
949  $this->ot = [
950  'html' => $ot == self::OT_HTML,
951  'wiki' => $ot == self::OT_WIKI,
952  'pre' => $ot == self::OT_PREPROCESS,
953  'plain' => $ot == self::OT_PLAIN,
954  ];
955  }
956 
963  public function OutputType( $x = null ) {
964  return wfSetVar( $this->mOutputType, $x );
965  }
966 
972  public function getOutput() {
973  return $this->mOutput;
974  }
975 
981  public function getOptions() {
982  return $this->mOptions;
983  }
984 
991  public function Options( $x = null ) {
992  return wfSetVar( $this->mOptions, $x );
993  }
994 
998  public function nextLinkID() {
999  return $this->mLinkID++;
1000  }
1001 
1005  public function setLinkID( $id ) {
1006  $this->mLinkID = $id;
1007  }
1008 
1013  public function getFunctionLang() {
1014  return $this->getTargetLanguage();
1015  }
1016 
1025  public function getTargetLanguage() {
1026  $target = $this->mOptions->getTargetLanguage();
1027 
1028  if ( $target !== null ) {
1029  return $target;
1030  } elseif ( $this->mOptions->getInterfaceMessage() ) {
1031  return $this->mOptions->getUserLangObj();
1032  }
1033 
1034  return $this->getTitle()->getPageLanguage();
1035  }
1036 
1042  public function getConverterLanguage() {
1043  return $this->getTargetLanguage();
1044  }
1045 
1052  public function getUser() {
1053  if ( !is_null( $this->mUser ) ) {
1054  return $this->mUser;
1055  }
1056  return $this->mOptions->getUser();
1057  }
1058 
1064  public function getPreprocessor() {
1065  if ( !isset( $this->mPreprocessor ) ) {
1066  $class = $this->svcOptions->get( 'preprocessorClass' );
1067  $this->mPreprocessor = new $class( $this );
1068  }
1069  return $this->mPreprocessor;
1070  }
1071 
1078  public function getLinkRenderer() {
1079  // XXX We make the LinkRenderer with current options and then cache it forever
1080  if ( !$this->mLinkRenderer ) {
1081  $this->mLinkRenderer = $this->linkRendererFactory->create();
1082  $this->mLinkRenderer->setStubThreshold(
1083  $this->getOptions()->getStubThreshold()
1084  );
1085  }
1086 
1087  return $this->mLinkRenderer;
1088  }
1089 
1096  public function getMagicWordFactory() {
1097  return $this->magicWordFactory;
1098  }
1099 
1106  public function getContentLanguage() {
1107  return $this->contLang;
1108  }
1109 
1129  public static function extractTagsAndParams( $elements, $text, &$matches ) {
1130  static $n = 1;
1131  $stripped = '';
1132  $matches = [];
1133 
1134  $taglist = implode( '|', $elements );
1135  $start = "/<($taglist)(\\s+[^>]*?|\\s*?)(\/?>)|<(!--)/i";
1136 
1137  while ( $text != '' ) {
1138  $p = preg_split( $start, $text, 2, PREG_SPLIT_DELIM_CAPTURE );
1139  $stripped .= $p[0];
1140  if ( count( $p ) < 5 ) {
1141  break;
1142  }
1143  if ( count( $p ) > 5 ) {
1144  # comment
1145  $element = $p[4];
1146  $attributes = '';
1147  $close = '';
1148  $inside = $p[5];
1149  } else {
1150  # tag
1151  list( , $element, $attributes, $close, $inside ) = $p;
1152  }
1153 
1154  $marker = self::MARKER_PREFIX . "-$element-" . sprintf( '%08X', $n++ ) . self::MARKER_SUFFIX;
1155  $stripped .= $marker;
1156 
1157  if ( $close === '/>' ) {
1158  # Empty element tag, <tag />
1159  $content = null;
1160  $text = $inside;
1161  $tail = null;
1162  } else {
1163  if ( $element === '!--' ) {
1164  $end = '/(-->)/';
1165  } else {
1166  $end = "/(<\\/$element\\s*>)/i";
1167  }
1168  $q = preg_split( $end, $inside, 2, PREG_SPLIT_DELIM_CAPTURE );
1169  $content = $q[0];
1170  if ( count( $q ) < 3 ) {
1171  # No end tag -- let it run out to the end of the text.
1172  $tail = '';
1173  $text = '';
1174  } else {
1175  list( , $tail, $text ) = $q;
1176  }
1177  }
1178 
1179  $matches[$marker] = [ $element,
1180  $content,
1181  Sanitizer::decodeTagAttributes( $attributes ),
1182  "<$element$attributes$close$content$tail" ];
1183  }
1184  return $stripped;
1185  }
1186 
1192  public function getStripList() {
1193  return $this->mStripList;
1194  }
1195 
1201  public function getStripState() {
1202  return $this->mStripState;
1203  }
1204 
1214  public function insertStripItem( $text ) {
1215  $marker = self::MARKER_PREFIX . "-item-{$this->mMarkerIndex}-" . self::MARKER_SUFFIX;
1216  $this->mMarkerIndex++;
1217  $this->mStripState->addGeneral( $marker, $text );
1218  return $marker;
1219  }
1220 
1228  public function doTableStuff( $text ) {
1229  $lines = StringUtils::explode( "\n", $text );
1230  $out = '';
1231  $td_history = []; # Is currently a td tag open?
1232  $last_tag_history = []; # Save history of last lag activated (td, th or caption)
1233  $tr_history = []; # Is currently a tr tag open?
1234  $tr_attributes = []; # history of tr attributes
1235  $has_opened_tr = []; # Did this table open a <tr> element?
1236  $indent_level = 0; # indent level of the table
1237 
1238  foreach ( $lines as $outLine ) {
1239  $line = trim( $outLine );
1240 
1241  if ( $line === '' ) { # empty line, go to next line
1242  $out .= $outLine . "\n";
1243  continue;
1244  }
1245 
1246  $first_character = $line[0];
1247  $first_two = substr( $line, 0, 2 );
1248  $matches = [];
1249 
1250  if ( preg_match( '/^(:*)\s*\{\|(.*)$/', $line, $matches ) ) {
1251  # First check if we are starting a new table
1252  $indent_level = strlen( $matches[1] );
1253 
1254  $attributes = $this->mStripState->unstripBoth( $matches[2] );
1255  $attributes = Sanitizer::fixTagAttributes( $attributes, 'table' );
1256 
1257  $outLine = str_repeat( '<dl><dd>', $indent_level ) . "<table{$attributes}>";
1258  array_push( $td_history, false );
1259  array_push( $last_tag_history, '' );
1260  array_push( $tr_history, false );
1261  array_push( $tr_attributes, '' );
1262  array_push( $has_opened_tr, false );
1263  } elseif ( count( $td_history ) == 0 ) {
1264  # Don't do any of the following
1265  $out .= $outLine . "\n";
1266  continue;
1267  } elseif ( $first_two === '|}' ) {
1268  # We are ending a table
1269  $line = '</table>' . substr( $line, 2 );
1270  $last_tag = array_pop( $last_tag_history );
1271 
1272  if ( !array_pop( $has_opened_tr ) ) {
1273  $line = "<tr><td></td></tr>{$line}";
1274  }
1275 
1276  if ( array_pop( $tr_history ) ) {
1277  $line = "</tr>{$line}";
1278  }
1279 
1280  if ( array_pop( $td_history ) ) {
1281  $line = "</{$last_tag}>{$line}";
1282  }
1283  array_pop( $tr_attributes );
1284  if ( $indent_level > 0 ) {
1285  $outLine = rtrim( $line ) . str_repeat( '</dd></dl>', $indent_level );
1286  } else {
1287  $outLine = $line;
1288  }
1289  } elseif ( $first_two === '|-' ) {
1290  # Now we have a table row
1291  $line = preg_replace( '#^\|-+#', '', $line );
1292 
1293  # Whats after the tag is now only attributes
1294  $attributes = $this->mStripState->unstripBoth( $line );
1295  $attributes = Sanitizer::fixTagAttributes( $attributes, 'tr' );
1296  array_pop( $tr_attributes );
1297  array_push( $tr_attributes, $attributes );
1298 
1299  $line = '';
1300  $last_tag = array_pop( $last_tag_history );
1301  array_pop( $has_opened_tr );
1302  array_push( $has_opened_tr, true );
1303 
1304  if ( array_pop( $tr_history ) ) {
1305  $line = '</tr>';
1306  }
1307 
1308  if ( array_pop( $td_history ) ) {
1309  $line = "</{$last_tag}>{$line}";
1310  }
1311 
1312  $outLine = $line;
1313  array_push( $tr_history, false );
1314  array_push( $td_history, false );
1315  array_push( $last_tag_history, '' );
1316  } elseif ( $first_character === '|'
1317  || $first_character === '!'
1318  || $first_two === '|+'
1319  ) {
1320  # This might be cell elements, td, th or captions
1321  if ( $first_two === '|+' ) {
1322  $first_character = '+';
1323  $line = substr( $line, 2 );
1324  } else {
1325  $line = substr( $line, 1 );
1326  }
1327 
1328  // Implies both are valid for table headings.
1329  if ( $first_character === '!' ) {
1330  $line = StringUtils::replaceMarkup( '!!', '||', $line );
1331  }
1332 
1333  # Split up multiple cells on the same line.
1334  # FIXME : This can result in improper nesting of tags processed
1335  # by earlier parser steps.
1336  $cells = explode( '||', $line );
1337 
1338  $outLine = '';
1339 
1340  # Loop through each table cell
1341  foreach ( $cells as $cell ) {
1342  $previous = '';
1343  if ( $first_character !== '+' ) {
1344  $tr_after = array_pop( $tr_attributes );
1345  if ( !array_pop( $tr_history ) ) {
1346  $previous = "<tr{$tr_after}>\n";
1347  }
1348  array_push( $tr_history, true );
1349  array_push( $tr_attributes, '' );
1350  array_pop( $has_opened_tr );
1351  array_push( $has_opened_tr, true );
1352  }
1353 
1354  $last_tag = array_pop( $last_tag_history );
1355 
1356  if ( array_pop( $td_history ) ) {
1357  $previous = "</{$last_tag}>\n{$previous}";
1358  }
1359 
1360  if ( $first_character === '|' ) {
1361  $last_tag = 'td';
1362  } elseif ( $first_character === '!' ) {
1363  $last_tag = 'th';
1364  } elseif ( $first_character === '+' ) {
1365  $last_tag = 'caption';
1366  } else {
1367  $last_tag = '';
1368  }
1369 
1370  array_push( $last_tag_history, $last_tag );
1371 
1372  # A cell could contain both parameters and data
1373  $cell_data = explode( '|', $cell, 2 );
1374 
1375  # T2553: Note that a '|' inside an invalid link should not
1376  # be mistaken as delimiting cell parameters
1377  # Bug T153140: Neither should language converter markup.
1378  if ( preg_match( '/\[\[|-\{/', $cell_data[0] ) === 1 ) {
1379  $cell = "{$previous}<{$last_tag}>" . trim( $cell );
1380  } elseif ( count( $cell_data ) == 1 ) {
1381  // Whitespace in cells is trimmed
1382  $cell = "{$previous}<{$last_tag}>" . trim( $cell_data[0] );
1383  } else {
1384  $attributes = $this->mStripState->unstripBoth( $cell_data[0] );
1385  $attributes = Sanitizer::fixTagAttributes( $attributes, $last_tag );
1386  // Whitespace in cells is trimmed
1387  $cell = "{$previous}<{$last_tag}{$attributes}>" . trim( $cell_data[1] );
1388  }
1389 
1390  $outLine .= $cell;
1391  array_push( $td_history, true );
1392  }
1393  }
1394  $out .= $outLine . "\n";
1395  }
1396 
1397  # Closing open td, tr && table
1398  while ( count( $td_history ) > 0 ) {
1399  if ( array_pop( $td_history ) ) {
1400  $out .= "</td>\n";
1401  }
1402  if ( array_pop( $tr_history ) ) {
1403  $out .= "</tr>\n";
1404  }
1405  if ( !array_pop( $has_opened_tr ) ) {
1406  $out .= "<tr><td></td></tr>\n";
1407  }
1408 
1409  $out .= "</table>\n";
1410  }
1411 
1412  # Remove trailing line-ending (b/c)
1413  if ( substr( $out, -1 ) === "\n" ) {
1414  $out = substr( $out, 0, -1 );
1415  }
1416 
1417  # special case: don't return empty table
1418  if ( $out === "<table>\n<tr><td></td></tr>\n</table>" ) {
1419  $out = '';
1420  }
1421 
1422  return $out;
1423  }
1424 
1438  public function internalParse( $text, $isMain = true, $frame = false ) {
1439  $origText = $text;
1440 
1441  // Avoid PHP 7.1 warning from passing $this by reference
1442  $parser = $this;
1443 
1444  # Hook to suspend the parser in this state
1445  if ( !Hooks::run( 'ParserBeforeInternalParse', [ &$parser, &$text, &$this->mStripState ] ) ) {
1446  return $text;
1447  }
1448 
1449  # if $frame is provided, then use $frame for replacing any variables
1450  if ( $frame ) {
1451  # use frame depth to infer how include/noinclude tags should be handled
1452  # depth=0 means this is the top-level document; otherwise it's an included document
1453  if ( !$frame->depth ) {
1454  $flag = 0;
1455  } else {
1456  $flag = self::PTD_FOR_INCLUSION;
1457  }
1458  $dom = $this->preprocessToDom( $text, $flag );
1459  $text = $frame->expand( $dom );
1460  } else {
1461  # if $frame is not provided, then use old-style replaceVariables
1462  $text = $this->replaceVariables( $text );
1463  }
1464 
1465  Hooks::run( 'InternalParseBeforeSanitize', [ &$parser, &$text, &$this->mStripState ] );
1466  $text = Sanitizer::removeHTMLtags(
1467  $text,
1468  [ $this, 'attributeStripCallback' ],
1469  false,
1470  array_keys( $this->mTransparentTagHooks ),
1471  [],
1472  [ $this, 'addTrackingCategory' ]
1473  );
1474  Hooks::run( 'InternalParseBeforeLinks', [ &$parser, &$text, &$this->mStripState ] );
1475 
1476  # Tables need to come after variable replacement for things to work
1477  # properly; putting them before other transformations should keep
1478  # exciting things like link expansions from showing up in surprising
1479  # places.
1480  $text = $this->doTableStuff( $text );
1481 
1482  $text = preg_replace( '/(^|\n)-----*/', '\\1<hr />', $text );
1483 
1484  $text = $this->doDoubleUnderscore( $text );
1485 
1486  $text = $this->doHeadings( $text );
1487  $text = $this->replaceInternalLinks( $text );
1488  $text = $this->doAllQuotes( $text );
1489  $text = $this->replaceExternalLinks( $text );
1490 
1491  # replaceInternalLinks may sometimes leave behind
1492  # absolute URLs, which have to be masked to hide them from replaceExternalLinks
1493  $text = str_replace( self::MARKER_PREFIX . 'NOPARSE', '', $text );
1494 
1495  $text = $this->doMagicLinks( $text );
1496  $text = $this->formatHeadings( $text, $origText, $isMain );
1497 
1498  return $text;
1499  }
1500 
1510  private function internalParseHalfParsed( $text, $isMain = true, $linestart = true ) {
1511  $text = $this->mStripState->unstripGeneral( $text );
1512 
1513  // Avoid PHP 7.1 warning from passing $this by reference
1514  $parser = $this;
1515 
1516  if ( $isMain ) {
1517  Hooks::run( 'ParserAfterUnstrip', [ &$parser, &$text ] );
1518  }
1519 
1520  # Clean up special characters, only run once, next-to-last before doBlockLevels
1521  $text = Sanitizer::armorFrenchSpaces( $text );
1522 
1523  $text = $this->doBlockLevels( $text, $linestart );
1524 
1525  $this->replaceLinkHolders( $text );
1526 
1534  if ( !( $this->mOptions->getDisableContentConversion()
1535  || isset( $this->mDoubleUnderscores['nocontentconvert'] ) )
1536  && !$this->mOptions->getInterfaceMessage()
1537  ) {
1538  # The position of the convert() call should not be changed. it
1539  # assumes that the links are all replaced and the only thing left
1540  # is the <nowiki> mark.
1541  $text = $this->getTargetLanguage()->convert( $text );
1542  }
1543 
1544  $text = $this->mStripState->unstripNoWiki( $text );
1545 
1546  if ( $isMain ) {
1547  Hooks::run( 'ParserBeforeTidy', [ &$parser, &$text ] );
1548  }
1549 
1550  $text = $this->replaceTransparentTags( $text );
1551  $text = $this->mStripState->unstripGeneral( $text );
1552 
1553  $text = Sanitizer::normalizeCharReferences( $text );
1554 
1555  if ( MWTidy::isEnabled() ) {
1556  if ( $this->mOptions->getTidy() ) {
1557  $text = MWTidy::tidy( $text );
1558  }
1559  } else {
1560  # attempt to sanitize at least some nesting problems
1561  # (T4702 and quite a few others)
1562  # This code path is buggy and deprecated!
1563  wfDeprecated( 'disabling tidy', '1.33' );
1564  $tidyregs = [
1565  # ''Something [http://www.cool.com cool''] -->
1566  # <i>Something</i><a href="http://www.cool.com"..><i>cool></i></a>
1567  '/(<([bi])>)(<([bi])>)?([^<]*)(<\/?a[^<]*>)([^<]*)(<\/\\4>)?(<\/\\2>)/' =>
1568  '\\1\\3\\5\\8\\9\\6\\1\\3\\7\\8\\9',
1569  # fix up an anchor inside another anchor, only
1570  # at least for a single single nested link (T5695)
1571  '/(<a[^>]+>)([^<]*)(<a[^>]+>[^<]*)<\/a>(.*)<\/a>/' =>
1572  '\\1\\2</a>\\3</a>\\1\\4</a>',
1573  # fix div inside inline elements- doBlockLevels won't wrap a line which
1574  # contains a div, so fix it up here; replace
1575  # div with escaped text
1576  '/(<([aib]) [^>]+>)([^<]*)(<div([^>]*)>)(.*)(<\/div>)([^<]*)(<\/\\2>)/' =>
1577  '\\1\\3&lt;div\\5&gt;\\6&lt;/div&gt;\\8\\9',
1578  # remove empty italic or bold tag pairs, some
1579  # introduced by rules above
1580  '/<([bi])><\/\\1>/' => '',
1581  ];
1582 
1583  $text = preg_replace(
1584  array_keys( $tidyregs ),
1585  array_values( $tidyregs ),
1586  $text );
1587  }
1588 
1589  if ( $isMain ) {
1590  Hooks::run( 'ParserAfterTidy', [ &$parser, &$text ] );
1591  }
1592 
1593  return $text;
1594  }
1595 
1607  public function doMagicLinks( $text ) {
1608  $prots = wfUrlProtocolsWithoutProtRel();
1609  $urlChar = self::EXT_LINK_URL_CLASS;
1610  $addr = self::EXT_LINK_ADDR;
1611  $space = self::SPACE_NOT_NL; # non-newline space
1612  $spdash = "(?:-|$space)"; # a dash or a non-newline space
1613  $spaces = "$space++"; # possessive match of 1 or more spaces
1614  $text = preg_replace_callback(
1615  '!(?: # Start cases
1616  (<a[ \t\r\n>].*?</a>) | # m[1]: Skip link text
1617  (<.*?>) | # m[2]: Skip stuff inside HTML elements' . "
1618  (\b # m[3]: Free external links
1619  (?i:$prots)
1620  ($addr$urlChar*) # m[4]: Post-protocol path
1621  ) |
1622  \b(?:RFC|PMID) $spaces # m[5]: RFC or PMID, capture number
1623  ([0-9]+)\b |
1624  \bISBN $spaces ( # m[6]: ISBN, capture number
1625  (?: 97[89] $spdash? )? # optional 13-digit ISBN prefix
1626  (?: [0-9] $spdash? ){9} # 9 digits with opt. delimiters
1627  [0-9Xx] # check digit
1628  )\b
1629  )!xu", [ $this, 'magicLinkCallback' ], $text );
1630  return $text;
1631  }
1632 
1638  public function magicLinkCallback( $m ) {
1639  if ( isset( $m[1] ) && $m[1] !== '' ) {
1640  # Skip anchor
1641  return $m[0];
1642  } elseif ( isset( $m[2] ) && $m[2] !== '' ) {
1643  # Skip HTML element
1644  return $m[0];
1645  } elseif ( isset( $m[3] ) && $m[3] !== '' ) {
1646  # Free external link
1647  return $this->makeFreeExternalLink( $m[0], strlen( $m[4] ) );
1648  } elseif ( isset( $m[5] ) && $m[5] !== '' ) {
1649  # RFC or PMID
1650  if ( substr( $m[0], 0, 3 ) === 'RFC' ) {
1651  if ( !$this->mOptions->getMagicRFCLinks() ) {
1652  return $m[0];
1653  }
1654  $keyword = 'RFC';
1655  $urlmsg = 'rfcurl';
1656  $cssClass = 'mw-magiclink-rfc';
1657  $trackingCat = 'magiclink-tracking-rfc';
1658  $id = $m[5];
1659  } elseif ( substr( $m[0], 0, 4 ) === 'PMID' ) {
1660  if ( !$this->mOptions->getMagicPMIDLinks() ) {
1661  return $m[0];
1662  }
1663  $keyword = 'PMID';
1664  $urlmsg = 'pubmedurl';
1665  $cssClass = 'mw-magiclink-pmid';
1666  $trackingCat = 'magiclink-tracking-pmid';
1667  $id = $m[5];
1668  } else {
1669  throw new MWException( __METHOD__ . ': unrecognised match type "' .
1670  substr( $m[0], 0, 20 ) . '"' );
1671  }
1672  $url = wfMessage( $urlmsg, $id )->inContentLanguage()->text();
1673  $this->addTrackingCategory( $trackingCat );
1674  return Linker::makeExternalLink(
1675  $url,
1676  "{$keyword} {$id}",
1677  true,
1678  $cssClass,
1679  [],
1680  $this->getTitle()
1681  );
1682  } elseif ( isset( $m[6] ) && $m[6] !== ''
1683  && $this->mOptions->getMagicISBNLinks()
1684  ) {
1685  # ISBN
1686  $isbn = $m[6];
1687  $space = self::SPACE_NOT_NL; # non-newline space
1688  $isbn = preg_replace( "/$space/", ' ', $isbn );
1689  $num = strtr( $isbn, [
1690  '-' => '',
1691  ' ' => '',
1692  'x' => 'X',
1693  ] );
1694  $this->addTrackingCategory( 'magiclink-tracking-isbn' );
1695  return $this->getLinkRenderer()->makeKnownLink(
1696  SpecialPage::getTitleFor( 'Booksources', $num ),
1697  "ISBN $isbn",
1698  [
1699  'class' => 'internal mw-magiclink-isbn',
1700  'title' => false // suppress title attribute
1701  ]
1702  );
1703  } else {
1704  return $m[0];
1705  }
1706  }
1707 
1717  public function makeFreeExternalLink( $url, $numPostProto ) {
1718  $trail = '';
1719 
1720  # The characters '<' and '>' (which were escaped by
1721  # removeHTMLtags()) should not be included in
1722  # URLs, per RFC 2396.
1723  # Make &nbsp; terminate a URL as well (bug T84937)
1724  $m2 = [];
1725  if ( preg_match(
1726  '/&(lt|gt|nbsp|#x0*(3[CcEe]|[Aa]0)|#0*(60|62|160));/',
1727  $url,
1728  $m2,
1729  PREG_OFFSET_CAPTURE
1730  ) ) {
1731  $trail = substr( $url, $m2[0][1] ) . $trail;
1732  $url = substr( $url, 0, $m2[0][1] );
1733  }
1734 
1735  # Move trailing punctuation to $trail
1736  $sep = ',;\.:!?';
1737  # If there is no left bracket, then consider right brackets fair game too
1738  if ( strpos( $url, '(' ) === false ) {
1739  $sep .= ')';
1740  }
1741 
1742  $urlRev = strrev( $url );
1743  $numSepChars = strspn( $urlRev, $sep );
1744  # Don't break a trailing HTML entity by moving the ; into $trail
1745  # This is in hot code, so use substr_compare to avoid having to
1746  # create a new string object for the comparison
1747  if ( $numSepChars && substr_compare( $url, ";", -$numSepChars, 1 ) === 0 ) {
1748  # more optimization: instead of running preg_match with a $
1749  # anchor, which can be slow, do the match on the reversed
1750  # string starting at the desired offset.
1751  # un-reversed regexp is: /&([a-z]+|#x[\da-f]+|#\d+)$/i
1752  if ( preg_match( '/\G([a-z]+|[\da-f]+x#|\d+#)&/i', $urlRev, $m2, 0, $numSepChars ) ) {
1753  $numSepChars--;
1754  }
1755  }
1756  if ( $numSepChars ) {
1757  $trail = substr( $url, -$numSepChars ) . $trail;
1758  $url = substr( $url, 0, -$numSepChars );
1759  }
1760 
1761  # Verify that we still have a real URL after trail removal, and
1762  # not just lone protocol
1763  if ( strlen( $trail ) >= $numPostProto ) {
1764  return $url . $trail;
1765  }
1766 
1767  $url = Sanitizer::cleanUrl( $url );
1768 
1769  # Is this an external image?
1770  $text = $this->maybeMakeExternalImage( $url );
1771  if ( $text === false ) {
1772  # Not an image, make a link
1773  $text = Linker::makeExternalLink( $url,
1774  $this->getTargetLanguage()->getConverter()->markNoConversion( $url ),
1775  true, 'free',
1776  $this->getExternalLinkAttribs( $url ), $this->getTitle() );
1777  # Register it in the output object...
1778  $this->mOutput->addExternalLink( $url );
1779  }
1780  return $text . $trail;
1781  }
1782 
1792  public function doHeadings( $text ) {
1793  for ( $i = 6; $i >= 1; --$i ) {
1794  $h = str_repeat( '=', $i );
1795  // Trim non-newline whitespace from headings
1796  // Using \s* will break for: "==\n===\n" and parse as <h2>=</h2>
1797  $text = preg_replace( "/^(?:$h)[ \\t]*(.+?)[ \\t]*(?:$h)\\s*$/m", "<h$i>\\1</h$i>", $text );
1798  }
1799  return $text;
1800  }
1801 
1810  public function doAllQuotes( $text ) {
1811  $outtext = '';
1812  $lines = StringUtils::explode( "\n", $text );
1813  foreach ( $lines as $line ) {
1814  $outtext .= $this->doQuotes( $line ) . "\n";
1815  }
1816  $outtext = substr( $outtext, 0, -1 );
1817  return $outtext;
1818  }
1819 
1827  public function doQuotes( $text ) {
1828  $arr = preg_split( "/(''+)/", $text, -1, PREG_SPLIT_DELIM_CAPTURE );
1829  $countarr = count( $arr );
1830  if ( $countarr == 1 ) {
1831  return $text;
1832  }
1833 
1834  // First, do some preliminary work. This may shift some apostrophes from
1835  // being mark-up to being text. It also counts the number of occurrences
1836  // of bold and italics mark-ups.
1837  $numbold = 0;
1838  $numitalics = 0;
1839  for ( $i = 1; $i < $countarr; $i += 2 ) {
1840  $thislen = strlen( $arr[$i] );
1841  // If there are ever four apostrophes, assume the first is supposed to
1842  // be text, and the remaining three constitute mark-up for bold text.
1843  // (T15227: ''''foo'''' turns into ' ''' foo ' ''')
1844  if ( $thislen == 4 ) {
1845  $arr[$i - 1] .= "'";
1846  $arr[$i] = "'''";
1847  $thislen = 3;
1848  } elseif ( $thislen > 5 ) {
1849  // If there are more than 5 apostrophes in a row, assume they're all
1850  // text except for the last 5.
1851  // (T15227: ''''''foo'''''' turns into ' ''''' foo ' ''''')
1852  $arr[$i - 1] .= str_repeat( "'", $thislen - 5 );
1853  $arr[$i] = "'''''";
1854  $thislen = 5;
1855  }
1856  // Count the number of occurrences of bold and italics mark-ups.
1857  if ( $thislen == 2 ) {
1858  $numitalics++;
1859  } elseif ( $thislen == 3 ) {
1860  $numbold++;
1861  } elseif ( $thislen == 5 ) {
1862  $numitalics++;
1863  $numbold++;
1864  }
1865  }
1866 
1867  // If there is an odd number of both bold and italics, it is likely
1868  // that one of the bold ones was meant to be an apostrophe followed
1869  // by italics. Which one we cannot know for certain, but it is more
1870  // likely to be one that has a single-letter word before it.
1871  if ( ( $numbold % 2 == 1 ) && ( $numitalics % 2 == 1 ) ) {
1872  $firstsingleletterword = -1;
1873  $firstmultiletterword = -1;
1874  $firstspace = -1;
1875  for ( $i = 1; $i < $countarr; $i += 2 ) {
1876  if ( strlen( $arr[$i] ) == 3 ) {
1877  $x1 = substr( $arr[$i - 1], -1 );
1878  $x2 = substr( $arr[$i - 1], -2, 1 );
1879  if ( $x1 === ' ' ) {
1880  if ( $firstspace == -1 ) {
1881  $firstspace = $i;
1882  }
1883  } elseif ( $x2 === ' ' ) {
1884  $firstsingleletterword = $i;
1885  // if $firstsingleletterword is set, we don't
1886  // look at the other options, so we can bail early.
1887  break;
1888  } elseif ( $firstmultiletterword == -1 ) {
1889  $firstmultiletterword = $i;
1890  }
1891  }
1892  }
1893 
1894  // If there is a single-letter word, use it!
1895  if ( $firstsingleletterword > -1 ) {
1896  $arr[$firstsingleletterword] = "''";
1897  $arr[$firstsingleletterword - 1] .= "'";
1898  } elseif ( $firstmultiletterword > -1 ) {
1899  // If not, but there's a multi-letter word, use that one.
1900  $arr[$firstmultiletterword] = "''";
1901  $arr[$firstmultiletterword - 1] .= "'";
1902  } elseif ( $firstspace > -1 ) {
1903  // ... otherwise use the first one that has neither.
1904  // (notice that it is possible for all three to be -1 if, for example,
1905  // there is only one pentuple-apostrophe in the line)
1906  $arr[$firstspace] = "''";
1907  $arr[$firstspace - 1] .= "'";
1908  }
1909  }
1910 
1911  // Now let's actually convert our apostrophic mush to HTML!
1912  $output = '';
1913  $buffer = '';
1914  $state = '';
1915  $i = 0;
1916  foreach ( $arr as $r ) {
1917  if ( ( $i % 2 ) == 0 ) {
1918  if ( $state === 'both' ) {
1919  $buffer .= $r;
1920  } else {
1921  $output .= $r;
1922  }
1923  } else {
1924  $thislen = strlen( $r );
1925  if ( $thislen == 2 ) {
1926  if ( $state === 'i' ) {
1927  $output .= '</i>';
1928  $state = '';
1929  } elseif ( $state === 'bi' ) {
1930  $output .= '</i>';
1931  $state = 'b';
1932  } elseif ( $state === 'ib' ) {
1933  $output .= '</b></i><b>';
1934  $state = 'b';
1935  } elseif ( $state === 'both' ) {
1936  $output .= '<b><i>' . $buffer . '</i>';
1937  $state = 'b';
1938  } else { // $state can be 'b' or ''
1939  $output .= '<i>';
1940  $state .= 'i';
1941  }
1942  } elseif ( $thislen == 3 ) {
1943  if ( $state === 'b' ) {
1944  $output .= '</b>';
1945  $state = '';
1946  } elseif ( $state === 'bi' ) {
1947  $output .= '</i></b><i>';
1948  $state = 'i';
1949  } elseif ( $state === 'ib' ) {
1950  $output .= '</b>';
1951  $state = 'i';
1952  } elseif ( $state === 'both' ) {
1953  $output .= '<i><b>' . $buffer . '</b>';
1954  $state = 'i';
1955  } else { // $state can be 'i' or ''
1956  $output .= '<b>';
1957  $state .= 'b';
1958  }
1959  } elseif ( $thislen == 5 ) {
1960  if ( $state === 'b' ) {
1961  $output .= '</b><i>';
1962  $state = 'i';
1963  } elseif ( $state === 'i' ) {
1964  $output .= '</i><b>';
1965  $state = 'b';
1966  } elseif ( $state === 'bi' ) {
1967  $output .= '</i></b>';
1968  $state = '';
1969  } elseif ( $state === 'ib' ) {
1970  $output .= '</b></i>';
1971  $state = '';
1972  } elseif ( $state === 'both' ) {
1973  $output .= '<i><b>' . $buffer . '</b></i>';
1974  $state = '';
1975  } else { // ($state == '')
1976  $buffer = '';
1977  $state = 'both';
1978  }
1979  }
1980  }
1981  $i++;
1982  }
1983  // Now close all remaining tags. Notice that the order is important.
1984  if ( $state === 'b' || $state === 'ib' ) {
1985  $output .= '</b>';
1986  }
1987  if ( $state === 'i' || $state === 'bi' || $state === 'ib' ) {
1988  $output .= '</i>';
1989  }
1990  if ( $state === 'bi' ) {
1991  $output .= '</b>';
1992  }
1993  // There might be lonely ''''', so make sure we have a buffer
1994  if ( $state === 'both' && $buffer ) {
1995  $output .= '<b><i>' . $buffer . '</i></b>';
1996  }
1997  return $output;
1998  }
1999 
2013  public function replaceExternalLinks( $text ) {
2014  $bits = preg_split( $this->mExtLinkBracketedRegex, $text, -1, PREG_SPLIT_DELIM_CAPTURE );
2015  // @phan-suppress-next-line PhanTypeComparisonFromArray See phan issue #3161
2016  if ( $bits === false ) {
2017  throw new MWException( "PCRE needs to be compiled with "
2018  . "--enable-unicode-properties in order for MediaWiki to function" );
2019  }
2020  $s = array_shift( $bits );
2021 
2022  $i = 0;
2023  while ( $i < count( $bits ) ) {
2024  $url = $bits[$i++];
2025  $i++; // protocol
2026  $text = $bits[$i++];
2027  $trail = $bits[$i++];
2028 
2029  # The characters '<' and '>' (which were escaped by
2030  # removeHTMLtags()) should not be included in
2031  # URLs, per RFC 2396.
2032  $m2 = [];
2033  if ( preg_match( '/&(lt|gt);/', $url, $m2, PREG_OFFSET_CAPTURE ) ) {
2034  $text = substr( $url, $m2[0][1] ) . ' ' . $text;
2035  $url = substr( $url, 0, $m2[0][1] );
2036  }
2037 
2038  # If the link text is an image URL, replace it with an <img> tag
2039  # This happened by accident in the original parser, but some people used it extensively
2040  $img = $this->maybeMakeExternalImage( $text );
2041  if ( $img !== false ) {
2042  $text = $img;
2043  }
2044 
2045  $dtrail = '';
2046 
2047  # Set linktype for CSS
2048  $linktype = 'text';
2049 
2050  # No link text, e.g. [http://domain.tld/some.link]
2051  if ( $text == '' ) {
2052  # Autonumber
2053  $langObj = $this->getTargetLanguage();
2054  $text = '[' . $langObj->formatNum( ++$this->mAutonumber ) . ']';
2055  $linktype = 'autonumber';
2056  } else {
2057  # Have link text, e.g. [http://domain.tld/some.link text]s
2058  # Check for trail
2059  list( $dtrail, $trail ) = Linker::splitTrail( $trail );
2060  }
2061 
2062  // Excluding protocol-relative URLs may avoid many false positives.
2063  if ( preg_match( '/^(?:' . wfUrlProtocolsWithoutProtRel() . ')/', $text ) ) {
2064  $text = $this->getTargetLanguage()->getConverter()->markNoConversion( $text );
2065  }
2066 
2067  $url = Sanitizer::cleanUrl( $url );
2068 
2069  # Use the encoded URL
2070  # This means that users can paste URLs directly into the text
2071  # Funny characters like ö aren't valid in URLs anyway
2072  # This was changed in August 2004
2073  $s .= Linker::makeExternalLink( $url, $text, false, $linktype,
2074  $this->getExternalLinkAttribs( $url ), $this->getTitle() ) . $dtrail . $trail;
2075 
2076  # Register link in the output object.
2077  $this->mOutput->addExternalLink( $url );
2078  }
2079 
2080  return $s;
2081  }
2082 
2092  public static function getExternalLinkRel( $url = false, $title = null ) {
2094  $ns = $title ? $title->getNamespace() : false;
2095  if ( $wgNoFollowLinks && !in_array( $ns, $wgNoFollowNsExceptions )
2096  && !wfMatchesDomainList( $url, $wgNoFollowDomainExceptions )
2097  ) {
2098  return 'nofollow';
2099  }
2100  return null;
2101  }
2102 
2113  public function getExternalLinkAttribs( $url ) {
2114  $attribs = [];
2115  $rel = self::getExternalLinkRel( $url, $this->getTitle() );
2116 
2117  $target = $this->mOptions->getExternalLinkTarget();
2118  if ( $target ) {
2119  $attribs['target'] = $target;
2120  if ( !in_array( $target, [ '_self', '_parent', '_top' ] ) ) {
2121  // T133507. New windows can navigate parent cross-origin.
2122  // Including noreferrer due to lacking browser
2123  // support of noopener. Eventually noreferrer should be removed.
2124  if ( $rel !== '' ) {
2125  $rel .= ' ';
2126  }
2127  $rel .= 'noreferrer noopener';
2128  }
2129  }
2130  $attribs['rel'] = $rel;
2131  return $attribs;
2132  }
2133 
2143  public static function normalizeLinkUrl( $url ) {
2144  # Test for RFC 3986 IPv6 syntax
2145  $scheme = '[a-z][a-z0-9+.-]*:';
2146  $userinfo = '(?:[a-z0-9\-._~!$&\'()*+,;=:]|%[0-9a-f]{2})*';
2147  $ipv6Host = '\\[((?:[0-9a-f:]|%3[0-A]|%[46][1-6])+)\\]';
2148  if ( preg_match( "<^(?:{$scheme})?//(?:{$userinfo}@)?{$ipv6Host}(?:[:/?#].*|)$>i", $url, $m ) &&
2149  IP::isValid( rawurldecode( $m[1] ) )
2150  ) {
2151  $isIPv6 = rawurldecode( $m[1] );
2152  } else {
2153  $isIPv6 = false;
2154  }
2155 
2156  # Make sure unsafe characters are encoded
2157  $url = preg_replace_callback( '/[\x00-\x20"<>\[\\\\\]^`{|}\x7F-\xFF]/',
2158  function ( $m ) {
2159  return rawurlencode( $m[0] );
2160  },
2161  $url
2162  );
2163 
2164  $ret = '';
2165  $end = strlen( $url );
2166 
2167  # Fragment part - 'fragment'
2168  $start = strpos( $url, '#' );
2169  if ( $start !== false && $start < $end ) {
2170  $ret = self::normalizeUrlComponent(
2171  substr( $url, $start, $end - $start ), '"#%<>[\]^`{|}' ) . $ret;
2172  $end = $start;
2173  }
2174 
2175  # Query part - 'query' minus &=+;
2176  $start = strpos( $url, '?' );
2177  if ( $start !== false && $start < $end ) {
2178  $ret = self::normalizeUrlComponent(
2179  substr( $url, $start, $end - $start ), '"#%<>[\]^`{|}&=+;' ) . $ret;
2180  $end = $start;
2181  }
2182 
2183  # Scheme and path part - 'pchar'
2184  # (we assume no userinfo or encoded colons in the host)
2185  $ret = self::normalizeUrlComponent(
2186  substr( $url, 0, $end ), '"#%<>[\]^`{|}/?' ) . $ret;
2187 
2188  # Fix IPv6 syntax
2189  if ( $isIPv6 !== false ) {
2190  $ipv6Host = "%5B({$isIPv6})%5D";
2191  $ret = preg_replace(
2192  "<^((?:{$scheme})?//(?:{$userinfo}@)?){$ipv6Host}(?=[:/?#]|$)>i",
2193  "$1[$2]",
2194  $ret
2195  );
2196  }
2197 
2198  return $ret;
2199  }
2200 
2201  private static function normalizeUrlComponent( $component, $unsafe ) {
2202  $callback = function ( $matches ) use ( $unsafe ) {
2203  $char = urldecode( $matches[0] );
2204  $ord = ord( $char );
2205  if ( $ord > 32 && $ord < 127 && strpos( $unsafe, $char ) === false ) {
2206  # Unescape it
2207  return $char;
2208  } else {
2209  # Leave it escaped, but use uppercase for a-f
2210  return strtoupper( $matches[0] );
2211  }
2212  };
2213  return preg_replace_callback( '/%[0-9A-Fa-f]{2}/', $callback, $component );
2214  }
2215 
2224  private function maybeMakeExternalImage( $url ) {
2225  $imagesfrom = $this->mOptions->getAllowExternalImagesFrom();
2226  $imagesexception = !empty( $imagesfrom );
2227  $text = false;
2228  # $imagesfrom could be either a single string or an array of strings, parse out the latter
2229  if ( $imagesexception && is_array( $imagesfrom ) ) {
2230  $imagematch = false;
2231  foreach ( $imagesfrom as $match ) {
2232  if ( strpos( $url, $match ) === 0 ) {
2233  $imagematch = true;
2234  break;
2235  }
2236  }
2237  } elseif ( $imagesexception ) {
2238  $imagematch = ( strpos( $url, $imagesfrom ) === 0 );
2239  } else {
2240  $imagematch = false;
2241  }
2242 
2243  if ( $this->mOptions->getAllowExternalImages()
2244  || ( $imagesexception && $imagematch )
2245  ) {
2246  if ( preg_match( self::EXT_IMAGE_REGEX, $url ) ) {
2247  # Image found
2248  $text = Linker::makeExternalImage( $url );
2249  }
2250  }
2251  if ( !$text && $this->mOptions->getEnableImageWhitelist()
2252  && preg_match( self::EXT_IMAGE_REGEX, $url )
2253  ) {
2254  $whitelist = explode(
2255  "\n",
2256  wfMessage( 'external_image_whitelist' )->inContentLanguage()->text()
2257  );
2258 
2259  foreach ( $whitelist as $entry ) {
2260  # Sanitize the regex fragment, make it case-insensitive, ignore blank entries/comments
2261  if ( strpos( $entry, '#' ) === 0 || $entry === '' ) {
2262  continue;
2263  }
2264  if ( preg_match( '/' . str_replace( '/', '\\/', $entry ) . '/i', $url ) ) {
2265  # Image matches a whitelist entry
2266  $text = Linker::makeExternalImage( $url );
2267  break;
2268  }
2269  }
2270  }
2271  return $text;
2272  }
2273 
2283  public function replaceInternalLinks( $s ) {
2284  $this->mLinkHolders->merge( $this->replaceInternalLinks2( $s ) );
2285  return $s;
2286  }
2287 
2295  public function replaceInternalLinks2( &$s ) {
2296  static $tc = false, $e1, $e1_img;
2297  # the % is needed to support urlencoded titles as well
2298  if ( !$tc ) {
2299  $tc = Title::legalChars() . '#%';
2300  # Match a link having the form [[namespace:link|alternate]]trail
2301  $e1 = "/^([{$tc}]+)(?:\\|(.+?))?]](.*)\$/sD";
2302  # Match cases where there is no "]]", which might still be images
2303  $e1_img = "/^([{$tc}]+)\\|(.*)\$/sD";
2304  }
2305 
2306  $holders = new LinkHolderArray( $this );
2307 
2308  # split the entire text string on occurrences of [[
2309  $a = StringUtils::explode( '[[', ' ' . $s );
2310  # get the first element (all text up to first [[), and remove the space we added
2311  $s = $a->current();
2312  $a->next();
2313  $line = $a->current(); # Workaround for broken ArrayIterator::next() that returns "void"
2314  $s = substr( $s, 1 );
2315 
2316  $nottalk = !$this->getTitle()->isTalkPage();
2317 
2318  $useLinkPrefixExtension = $this->getTargetLanguage()->linkPrefixExtension();
2319  $e2 = null;
2320  if ( $useLinkPrefixExtension ) {
2321  # Match the end of a line for a word that's not followed by whitespace,
2322  # e.g. in the case of 'The Arab al[[Razi]]', 'al' will be matched
2323  $charset = $this->contLang->linkPrefixCharset();
2324  $e2 = "/^((?>.*[^$charset]|))(.+)$/sDu";
2325  $m = [];
2326  if ( preg_match( $e2, $s, $m ) ) {
2327  $first_prefix = $m[2];
2328  } else {
2329  $first_prefix = false;
2330  }
2331  } else {
2332  $prefix = '';
2333  }
2334 
2335  $useSubpages = $this->areSubpagesAllowed();
2336 
2337  # Loop for each link
2338  for ( ; $line !== false && $line !== null; $a->next(), $line = $a->current() ) {
2339  # Check for excessive memory usage
2340  if ( $holders->isBig() ) {
2341  # Too big
2342  # Do the existence check, replace the link holders and clear the array
2343  $holders->replace( $s );
2344  $holders->clear();
2345  }
2346 
2347  if ( $useLinkPrefixExtension ) {
2348  if ( preg_match( $e2, $s, $m ) ) {
2349  list( , $s, $prefix ) = $m;
2350  } else {
2351  $prefix = '';
2352  }
2353  # first link
2354  if ( $first_prefix ) {
2355  $prefix = $first_prefix;
2356  $first_prefix = false;
2357  }
2358  }
2359 
2360  $might_be_img = false;
2361 
2362  if ( preg_match( $e1, $line, $m ) ) { # page with normal text or alt
2363  $text = $m[2];
2364  # If we get a ] at the beginning of $m[3] that means we have a link that's something like:
2365  # [[Image:Foo.jpg|[http://example.com desc]]] <- having three ] in a row fucks up,
2366  # the real problem is with the $e1 regex
2367  # See T1500.
2368  # Still some problems for cases where the ] is meant to be outside punctuation,
2369  # and no image is in sight. See T4095.
2370  if ( $text !== ''
2371  && substr( $m[3], 0, 1 ) === ']'
2372  && strpos( $text, '[' ) !== false
2373  ) {
2374  $text .= ']'; # so that replaceExternalLinks($text) works later
2375  $m[3] = substr( $m[3], 1 );
2376  }
2377  # fix up urlencoded title texts
2378  if ( strpos( $m[1], '%' ) !== false ) {
2379  # Should anchors '#' also be rejected?
2380  $m[1] = str_replace( [ '<', '>' ], [ '&lt;', '&gt;' ], rawurldecode( $m[1] ) );
2381  }
2382  $trail = $m[3];
2383  } elseif ( preg_match( $e1_img, $line, $m ) ) {
2384  # Invalid, but might be an image with a link in its caption
2385  $might_be_img = true;
2386  $text = $m[2];
2387  if ( strpos( $m[1], '%' ) !== false ) {
2388  $m[1] = str_replace( [ '<', '>' ], [ '&lt;', '&gt;' ], rawurldecode( $m[1] ) );
2389  }
2390  $trail = "";
2391  } else { # Invalid form; output directly
2392  $s .= $prefix . '[[' . $line;
2393  continue;
2394  }
2395 
2396  $origLink = ltrim( $m[1], ' ' );
2397 
2398  # Don't allow internal links to pages containing
2399  # PROTO: where PROTO is a valid URL protocol; these
2400  # should be external links.
2401  if ( preg_match( '/^(?i:' . $this->mUrlProtocols . ')/', $origLink ) ) {
2402  $s .= $prefix . '[[' . $line;
2403  continue;
2404  }
2405 
2406  # Make subpage if necessary
2407  if ( $useSubpages ) {
2408  $link = $this->maybeDoSubpageLink( $origLink, $text );
2409  } else {
2410  $link = $origLink;
2411  }
2412 
2413  // \x7f isn't a default legal title char, so most likely strip
2414  // markers will force us into the "invalid form" path above. But,
2415  // just in case, let's assert that xmlish tags aren't valid in
2416  // the title position.
2417  $unstrip = $this->mStripState->killMarkers( $link );
2418  $noMarkers = ( $unstrip === $link );
2419 
2420  $nt = $noMarkers ? Title::newFromText( $link ) : null;
2421  if ( $nt === null ) {
2422  $s .= $prefix . '[[' . $line;
2423  continue;
2424  }
2425 
2426  $ns = $nt->getNamespace();
2427  $iw = $nt->getInterwiki();
2428 
2429  $noforce = ( substr( $origLink, 0, 1 ) !== ':' );
2430 
2431  if ( $might_be_img ) { # if this is actually an invalid link
2432  if ( $ns == NS_FILE && $noforce ) { # but might be an image
2433  $found = false;
2434  while ( true ) {
2435  # look at the next 'line' to see if we can close it there
2436  $a->next();
2437  $next_line = $a->current();
2438  if ( $next_line === false || $next_line === null ) {
2439  break;
2440  }
2441  $m = explode( ']]', $next_line, 3 );
2442  if ( count( $m ) == 3 ) {
2443  # the first ]] closes the inner link, the second the image
2444  $found = true;
2445  $text .= "[[{$m[0]}]]{$m[1]}";
2446  $trail = $m[2];
2447  break;
2448  } elseif ( count( $m ) == 2 ) {
2449  # if there's exactly one ]] that's fine, we'll keep looking
2450  $text .= "[[{$m[0]}]]{$m[1]}";
2451  } else {
2452  # if $next_line is invalid too, we need look no further
2453  $text .= '[[' . $next_line;
2454  break;
2455  }
2456  }
2457  if ( !$found ) {
2458  # we couldn't find the end of this imageLink, so output it raw
2459  # but don't ignore what might be perfectly normal links in the text we've examined
2460  $holders->merge( $this->replaceInternalLinks2( $text ) );
2461  $s .= "{$prefix}[[$link|$text";
2462  # note: no $trail, because without an end, there *is* no trail
2463  continue;
2464  }
2465  } else { # it's not an image, so output it raw
2466  $s .= "{$prefix}[[$link|$text";
2467  # note: no $trail, because without an end, there *is* no trail
2468  continue;
2469  }
2470  }
2471 
2472  $wasblank = ( $text == '' );
2473  if ( $wasblank ) {
2474  $text = $link;
2475  if ( !$noforce ) {
2476  # Strip off leading ':'
2477  $text = substr( $text, 1 );
2478  }
2479  } else {
2480  # T6598 madness. Handle the quotes only if they come from the alternate part
2481  # [[Lista d''e paise d''o munno]] -> <a href="...">Lista d''e paise d''o munno</a>
2482  # [[Criticism of Harry Potter|Criticism of ''Harry Potter'']]
2483  # -> <a href="Criticism of Harry Potter">Criticism of <i>Harry Potter</i></a>
2484  $text = $this->doQuotes( $text );
2485  }
2486 
2487  # Link not escaped by : , create the various objects
2488  if ( $noforce && !$nt->wasLocalInterwiki() ) {
2489  # Interwikis
2490  if (
2491  $iw && $this->mOptions->getInterwikiMagic() && $nottalk && (
2492  Language::fetchLanguageName( $iw, null, 'mw' ) ||
2493  in_array( $iw, $this->svcOptions->get( 'ExtraInterlanguageLinkPrefixes' ) )
2494  )
2495  ) {
2496  # T26502: filter duplicates
2497  if ( !isset( $this->mLangLinkLanguages[$iw] ) ) {
2498  $this->mLangLinkLanguages[$iw] = true;
2499  $this->mOutput->addLanguageLink( $nt->getFullText() );
2500  }
2501 
2505  $s = rtrim( $s . $prefix ) . $trail; # T175416
2506  continue;
2507  }
2508 
2509  if ( $ns == NS_FILE ) {
2510  if ( !$this->badFileLookup->isBadFile( $nt->getDBkey(), $this->getTitle() ) ) {
2511  if ( $wasblank ) {
2512  # if no parameters were passed, $text
2513  # becomes something like "File:Foo.png",
2514  # which we don't want to pass on to the
2515  # image generator
2516  $text = '';
2517  } else {
2518  # recursively parse links inside the image caption
2519  # actually, this will parse them in any other parameters, too,
2520  # but it might be hard to fix that, and it doesn't matter ATM
2521  $text = $this->replaceExternalLinks( $text );
2522  $holders->merge( $this->replaceInternalLinks2( $text ) );
2523  }
2524  # cloak any absolute URLs inside the image markup, so replaceExternalLinks() won't touch them
2525  $s .= $prefix . $this->armorLinks(
2526  $this->makeImage( $nt, $text, $holders ) ) . $trail;
2527  continue;
2528  }
2529  } elseif ( $ns == NS_CATEGORY ) {
2533  $s = rtrim( $s . $prefix ) . $trail; # T2087, T87753
2534 
2535  if ( $wasblank ) {
2536  $sortkey = $this->getDefaultSort();
2537  } else {
2538  $sortkey = $text;
2539  }
2540  $sortkey = Sanitizer::decodeCharReferences( $sortkey );
2541  $sortkey = str_replace( "\n", '', $sortkey );
2542  $sortkey = $this->getTargetLanguage()->convertCategoryKey( $sortkey );
2543  $this->mOutput->addCategory( $nt->getDBkey(), $sortkey );
2544 
2545  continue;
2546  }
2547  }
2548 
2549  # Self-link checking. For some languages, variants of the title are checked in
2550  # LinkHolderArray::doVariants() to allow batching the existence checks necessary
2551  # for linking to a different variant.
2552  if ( $ns != NS_SPECIAL && $nt->equals( $this->getTitle() ) && !$nt->hasFragment() ) {
2553  $s .= $prefix . Linker::makeSelfLinkObj( $nt, $text, '', $trail );
2554  continue;
2555  }
2556 
2557  # NS_MEDIA is a pseudo-namespace for linking directly to a file
2558  # @todo FIXME: Should do batch file existence checks, see comment below
2559  if ( $ns == NS_MEDIA ) {
2560  # Give extensions a chance to select the file revision for us
2561  $options = [];
2562  $descQuery = false;
2563  Hooks::run( 'BeforeParserFetchFileAndTitle',
2564  [ $this, $nt, &$options, &$descQuery ] );
2565  # Fetch and register the file (file title may be different via hooks)
2566  list( $file, $nt ) = $this->fetchFileAndTitle( $nt, $options );
2567  # Cloak with NOPARSE to avoid replacement in replaceExternalLinks
2568  $s .= $prefix . $this->armorLinks(
2569  Linker::makeMediaLinkFile( $nt, $file, $text ) ) . $trail;
2570  continue;
2571  }
2572 
2573  # Some titles, such as valid special pages or files in foreign repos, should
2574  # be shown as bluelinks even though they're not included in the page table
2575  # @todo FIXME: isAlwaysKnown() can be expensive for file links; we should really do
2576  # batch file existence checks for NS_FILE and NS_MEDIA
2577  if ( $iw == '' && $nt->isAlwaysKnown() ) {
2578  $this->mOutput->addLink( $nt );
2579  $s .= $this->makeKnownLinkHolder( $nt, $text, $trail, $prefix );
2580  } else {
2581  # Links will be added to the output link list after checking
2582  $s .= $holders->makeHolder( $nt, $text, [], $trail, $prefix );
2583  }
2584  }
2585  return $holders;
2586  }
2587 
2601  protected function makeKnownLinkHolder( $nt, $text = '', $trail = '', $prefix = '' ) {
2602  list( $inside, $trail ) = Linker::splitTrail( $trail );
2603 
2604  if ( $text == '' ) {
2605  $text = htmlspecialchars( $nt->getPrefixedText() );
2606  }
2607 
2608  $link = $this->getLinkRenderer()->makeKnownLink(
2609  $nt, new HtmlArmor( "$prefix$text$inside" )
2610  );
2611 
2612  return $this->armorLinks( $link ) . $trail;
2613  }
2614 
2625  public function armorLinks( $text ) {
2626  return preg_replace( '/\b((?i)' . $this->mUrlProtocols . ')/',
2627  self::MARKER_PREFIX . "NOPARSE$1", $text );
2628  }
2629 
2634  public function areSubpagesAllowed() {
2635  # Some namespaces don't allow subpages
2636  return $this->nsInfo->hasSubpages( $this->getTitle()->getNamespace() );
2637  }
2638 
2647  public function maybeDoSubpageLink( $target, &$text ) {
2648  return Linker::normalizeSubpageLink( $this->getTitle(), $target, $text );
2649  }
2650 
2659  public function doBlockLevels( $text, $linestart ) {
2660  return BlockLevelPass::doBlockLevels( $text, $linestart );
2661  }
2662 
2673  public function getVariableValue( $index, $frame = false ) {
2674  // Avoid PHP 7.1 warning from passing $this by reference
2675  $parser = $this;
2676 
2681  if (
2682  Hooks::run( 'ParserGetVariableValueVarCache', [ &$parser, &$this->mVarCache ] ) &&
2683  isset( $this->mVarCache[$index] )
2684  ) {
2685  return $this->mVarCache[$index];
2686  }
2687 
2688  $ts = wfTimestamp( TS_UNIX, $this->mOptions->getTimestamp() );
2689  Hooks::run( 'ParserGetVariableValueTs', [ &$parser, &$ts ] );
2690 
2691  $pageLang = $this->getFunctionLang();
2692 
2693  switch ( $index ) {
2694  case '!':
2695  $value = '|';
2696  break;
2697  case 'currentmonth':
2698  $value = $pageLang->formatNum( MWTimestamp::getInstance( $ts )->format( 'm' ), true );
2699  break;
2700  case 'currentmonth1':
2701  $value = $pageLang->formatNum( MWTimestamp::getInstance( $ts )->format( 'n' ), true );
2702  break;
2703  case 'currentmonthname':
2704  $value = $pageLang->getMonthName( MWTimestamp::getInstance( $ts )->format( 'n' ) );
2705  break;
2706  case 'currentmonthnamegen':
2707  $value = $pageLang->getMonthNameGen( MWTimestamp::getInstance( $ts )->format( 'n' ) );
2708  break;
2709  case 'currentmonthabbrev':
2710  $value = $pageLang->getMonthAbbreviation( MWTimestamp::getInstance( $ts )->format( 'n' ) );
2711  break;
2712  case 'currentday':
2713  $value = $pageLang->formatNum( MWTimestamp::getInstance( $ts )->format( 'j' ), true );
2714  break;
2715  case 'currentday2':
2716  $value = $pageLang->formatNum( MWTimestamp::getInstance( $ts )->format( 'd' ), true );
2717  break;
2718  case 'localmonth':
2719  $value = $pageLang->formatNum( MWTimestamp::getLocalInstance( $ts )->format( 'm' ), true );
2720  break;
2721  case 'localmonth1':
2722  $value = $pageLang->formatNum( MWTimestamp::getLocalInstance( $ts )->format( 'n' ), true );
2723  break;
2724  case 'localmonthname':
2725  $value = $pageLang->getMonthName( MWTimestamp::getLocalInstance( $ts )->format( 'n' ) );
2726  break;
2727  case 'localmonthnamegen':
2728  $value = $pageLang->getMonthNameGen( MWTimestamp::getLocalInstance( $ts )->format( 'n' ) );
2729  break;
2730  case 'localmonthabbrev':
2731  $value = $pageLang->getMonthAbbreviation( MWTimestamp::getLocalInstance( $ts )->format( 'n' ) );
2732  break;
2733  case 'localday':
2734  $value = $pageLang->formatNum( MWTimestamp::getLocalInstance( $ts )->format( 'j' ), true );
2735  break;
2736  case 'localday2':
2737  $value = $pageLang->formatNum( MWTimestamp::getLocalInstance( $ts )->format( 'd' ), true );
2738  break;
2739  case 'pagename':
2740  $value = wfEscapeWikiText( $this->getTitle()->getText() );
2741  break;
2742  case 'pagenamee':
2743  $value = wfEscapeWikiText( $this->getTitle()->getPartialURL() );
2744  break;
2745  case 'fullpagename':
2746  $value = wfEscapeWikiText( $this->getTitle()->getPrefixedText() );
2747  break;
2748  case 'fullpagenamee':
2749  $value = wfEscapeWikiText( $this->getTitle()->getPrefixedURL() );
2750  break;
2751  case 'subpagename':
2752  $value = wfEscapeWikiText( $this->getTitle()->getSubpageText() );
2753  break;
2754  case 'subpagenamee':
2755  $value = wfEscapeWikiText( $this->getTitle()->getSubpageUrlForm() );
2756  break;
2757  case 'rootpagename':
2758  $value = wfEscapeWikiText( $this->getTitle()->getRootText() );
2759  break;
2760  case 'rootpagenamee':
2761  $value = wfEscapeWikiText( wfUrlencode( str_replace(
2762  ' ',
2763  '_',
2764  $this->getTitle()->getRootText()
2765  ) ) );
2766  break;
2767  case 'basepagename':
2768  $value = wfEscapeWikiText( $this->getTitle()->getBaseText() );
2769  break;
2770  case 'basepagenamee':
2771  $value = wfEscapeWikiText( wfUrlencode( str_replace(
2772  ' ',
2773  '_',
2774  $this->getTitle()->getBaseText()
2775  ) ) );
2776  break;
2777  case 'talkpagename':
2778  if ( $this->getTitle()->canHaveTalkPage() ) {
2779  $talkPage = $this->getTitle()->getTalkPage();
2780  $value = wfEscapeWikiText( $talkPage->getPrefixedText() );
2781  } else {
2782  $value = '';
2783  }
2784  break;
2785  case 'talkpagenamee':
2786  if ( $this->getTitle()->canHaveTalkPage() ) {
2787  $talkPage = $this->getTitle()->getTalkPage();
2788  $value = wfEscapeWikiText( $talkPage->getPrefixedURL() );
2789  } else {
2790  $value = '';
2791  }
2792  break;
2793  case 'subjectpagename':
2794  $subjPage = $this->getTitle()->getSubjectPage();
2795  $value = wfEscapeWikiText( $subjPage->getPrefixedText() );
2796  break;
2797  case 'subjectpagenamee':
2798  $subjPage = $this->getTitle()->getSubjectPage();
2799  $value = wfEscapeWikiText( $subjPage->getPrefixedURL() );
2800  break;
2801  case 'pageid': // requested in T25427
2802  # Inform the edit saving system that getting the canonical output
2803  # after page insertion requires a parse that used that exact page ID
2804  $this->setOutputFlag( 'vary-page-id', '{{PAGEID}} used' );
2805  $value = $this->getTitle()->getArticleID();
2806  if ( !$value ) {
2807  $value = $this->mOptions->getSpeculativePageId();
2808  if ( $value ) {
2809  $this->mOutput->setSpeculativePageIdUsed( $value );
2810  }
2811  }
2812  break;
2813  case 'revisionid':
2814  if (
2815  $this->svcOptions->get( 'MiserMode' ) &&
2816  !$this->mOptions->getInterfaceMessage() &&
2817  // @TODO: disallow this word on all namespaces
2818  $this->nsInfo->isContent( $this->getTitle()->getNamespace() )
2819  ) {
2820  // Use a stub result instead of the actual revision ID in order to avoid
2821  // double parses on page save but still allow preview detection (T137900)
2822  if ( $this->getRevisionId() || $this->mOptions->getSpeculativeRevId() ) {
2823  $value = '-';
2824  } else {
2825  $this->setOutputFlag( 'vary-revision-exists', '{{REVISIONID}} used' );
2826  $value = '';
2827  }
2828  } else {
2829  # Inform the edit saving system that getting the canonical output after
2830  # revision insertion requires a parse that used that exact revision ID
2831  $this->setOutputFlag( 'vary-revision-id', '{{REVISIONID}} used' );
2832  $value = $this->getRevisionId();
2833  if ( $value === 0 ) {
2834  $rev = $this->getRevisionObject();
2835  $value = $rev ? $rev->getId() : $value;
2836  }
2837  if ( !$value ) {
2838  $value = $this->mOptions->getSpeculativeRevId();
2839  if ( $value ) {
2840  $this->mOutput->setSpeculativeRevIdUsed( $value );
2841  }
2842  }
2843  }
2844  break;
2845  case 'revisionday':
2846  $value = (int)$this->getRevisionTimestampSubstring( 6, 2, self::MAX_TTS, $index );
2847  break;
2848  case 'revisionday2':
2849  $value = $this->getRevisionTimestampSubstring( 6, 2, self::MAX_TTS, $index );
2850  break;
2851  case 'revisionmonth':
2852  $value = $this->getRevisionTimestampSubstring( 4, 2, self::MAX_TTS, $index );
2853  break;
2854  case 'revisionmonth1':
2855  $value = (int)$this->getRevisionTimestampSubstring( 4, 2, self::MAX_TTS, $index );
2856  break;
2857  case 'revisionyear':
2858  $value = $this->getRevisionTimestampSubstring( 0, 4, self::MAX_TTS, $index );
2859  break;
2860  case 'revisiontimestamp':
2861  $value = $this->getRevisionTimestampSubstring( 0, 14, self::MAX_TTS, $index );
2862  break;
2863  case 'revisionuser':
2864  # Inform the edit saving system that getting the canonical output after
2865  # revision insertion requires a parse that used the actual user ID
2866  $this->setOutputFlag( 'vary-user', '{{REVISIONUSER}} used' );
2867  $value = $this->getRevisionUser();
2868  break;
2869  case 'revisionsize':
2870  $value = $this->getRevisionSize();
2871  break;
2872  case 'namespace':
2873  $value = str_replace( '_', ' ',
2874  $this->contLang->getNsText( $this->getTitle()->getNamespace() ) );
2875  break;
2876  case 'namespacee':
2877  $value = wfUrlencode( $this->contLang->getNsText( $this->getTitle()->getNamespace() ) );
2878  break;
2879  case 'namespacenumber':
2880  $value = $this->getTitle()->getNamespace();
2881  break;
2882  case 'talkspace':
2883  $value = $this->getTitle()->canHaveTalkPage()
2884  ? str_replace( '_', ' ', $this->getTitle()->getTalkNsText() )
2885  : '';
2886  break;
2887  case 'talkspacee':
2888  $value = $this->getTitle()->canHaveTalkPage()
2889  ? wfUrlencode( $this->getTitle()->getTalkNsText() )
2890  : '';
2891  break;
2892  case 'subjectspace':
2893  $value = str_replace( '_', ' ', $this->getTitle()->getSubjectNsText() );
2894  break;
2895  case 'subjectspacee':
2896  $value = ( wfUrlencode( $this->getTitle()->getSubjectNsText() ) );
2897  break;
2898  case 'currentdayname':
2899  $value = $pageLang->getWeekdayName( (int)MWTimestamp::getInstance( $ts )->format( 'w' ) + 1 );
2900  break;
2901  case 'currentyear':
2902  $value = $pageLang->formatNum( MWTimestamp::getInstance( $ts )->format( 'Y' ), true );
2903  break;
2904  case 'currenttime':
2905  $value = $pageLang->time( wfTimestamp( TS_MW, $ts ), false, false );
2906  break;
2907  case 'currenthour':
2908  $value = $pageLang->formatNum( MWTimestamp::getInstance( $ts )->format( 'H' ), true );
2909  break;
2910  case 'currentweek':
2911  # @bug T6594 PHP5 has it zero padded, PHP4 does not, cast to
2912  # int to remove the padding
2913  $value = $pageLang->formatNum( (int)MWTimestamp::getInstance( $ts )->format( 'W' ) );
2914  break;
2915  case 'currentdow':
2916  $value = $pageLang->formatNum( MWTimestamp::getInstance( $ts )->format( 'w' ) );
2917  break;
2918  case 'localdayname':
2919  $value = $pageLang->getWeekdayName(
2920  (int)MWTimestamp::getLocalInstance( $ts )->format( 'w' ) + 1
2921  );
2922  break;
2923  case 'localyear':
2924  $value = $pageLang->formatNum( MWTimestamp::getLocalInstance( $ts )->format( 'Y' ), true );
2925  break;
2926  case 'localtime':
2927  $value = $pageLang->time(
2928  MWTimestamp::getLocalInstance( $ts )->format( 'YmdHis' ),
2929  false,
2930  false
2931  );
2932  break;
2933  case 'localhour':
2934  $value = $pageLang->formatNum( MWTimestamp::getLocalInstance( $ts )->format( 'H' ), true );
2935  break;
2936  case 'localweek':
2937  # @bug T6594 PHP5 has it zero padded, PHP4 does not, cast to
2938  # int to remove the padding
2939  $value = $pageLang->formatNum( (int)MWTimestamp::getLocalInstance( $ts )->format( 'W' ) );
2940  break;
2941  case 'localdow':
2942  $value = $pageLang->formatNum( MWTimestamp::getLocalInstance( $ts )->format( 'w' ) );
2943  break;
2944  case 'numberofarticles':
2945  $value = $pageLang->formatNum( SiteStats::articles() );
2946  break;
2947  case 'numberoffiles':
2948  $value = $pageLang->formatNum( SiteStats::images() );
2949  break;
2950  case 'numberofusers':
2951  $value = $pageLang->formatNum( SiteStats::users() );
2952  break;
2953  case 'numberofactiveusers':
2954  $value = $pageLang->formatNum( SiteStats::activeUsers() );
2955  break;
2956  case 'numberofpages':
2957  $value = $pageLang->formatNum( SiteStats::pages() );
2958  break;
2959  case 'numberofadmins':
2960  $value = $pageLang->formatNum( SiteStats::numberingroup( 'sysop' ) );
2961  break;
2962  case 'numberofedits':
2963  $value = $pageLang->formatNum( SiteStats::edits() );
2964  break;
2965  case 'currenttimestamp':
2966  $value = wfTimestamp( TS_MW, $ts );
2967  break;
2968  case 'localtimestamp':
2969  $value = MWTimestamp::getLocalInstance( $ts )->format( 'YmdHis' );
2970  break;
2971  case 'currentversion':
2972  $value = SpecialVersion::getVersion();
2973  break;
2974  case 'articlepath':
2975  return $this->svcOptions->get( 'ArticlePath' );
2976  case 'sitename':
2977  return $this->svcOptions->get( 'Sitename' );
2978  case 'server':
2979  return $this->svcOptions->get( 'Server' );
2980  case 'servername':
2981  return $this->svcOptions->get( 'ServerName' );
2982  case 'scriptpath':
2983  return $this->svcOptions->get( 'ScriptPath' );
2984  case 'stylepath':
2985  return $this->svcOptions->get( 'StylePath' );
2986  case 'directionmark':
2987  return $pageLang->getDirMark();
2988  case 'contentlanguage':
2989  return $this->svcOptions->get( 'LanguageCode' );
2990  case 'pagelanguage':
2991  $value = $pageLang->getCode();
2992  break;
2993  case 'cascadingsources':
2994  $value = CoreParserFunctions::cascadingsources( $this );
2995  break;
2996  default:
2997  $ret = null;
2998  Hooks::run(
2999  'ParserGetVariableValueSwitch',
3000  [ &$parser, &$this->mVarCache, &$index, &$ret, &$frame ]
3001  );
3002 
3003  return $ret;
3004  }
3005 
3006  if ( $index ) {
3007  $this->mVarCache[$index] = $value;
3008  }
3009 
3010  return $value;
3011  }
3012 
3020  private function getRevisionTimestampSubstring( $start, $len, $mtts, $variable ) {
3021  # Get the timezone-adjusted timestamp to be used for this revision
3022  $resNow = substr( $this->getRevisionTimestamp(), $start, $len );
3023  # Possibly set vary-revision if there is not yet an associated revision
3024  if ( !$this->getRevisionObject() ) {
3025  # Get the timezone-adjusted timestamp $mtts seconds in the future.
3026  # This future is relative to the current time and not that of the
3027  # parser options. The rendered timestamp can be compared to that
3028  # of the timestamp specified by the parser options.
3029  $resThen = substr(
3030  $this->contLang->userAdjust( wfTimestamp( TS_MW, time() + $mtts ), '' ),
3031  $start,
3032  $len
3033  );
3034 
3035  if ( $resNow !== $resThen ) {
3036  # Inform the edit saving system that getting the canonical output after
3037  # revision insertion requires a parse that used an actual revision timestamp
3038  $this->setOutputFlag( 'vary-revision-timestamp', "$variable used" );
3039  }
3040  }
3041 
3042  return $resNow;
3043  }
3044 
3050  public function initialiseVariables() {
3051  $variableIDs = $this->magicWordFactory->getVariableIDs();
3052  $substIDs = $this->magicWordFactory->getSubstIDs();
3053 
3054  $this->mVariables = $this->magicWordFactory->newArray( $variableIDs );
3055  $this->mSubstWords = $this->magicWordFactory->newArray( $substIDs );
3056  }
3057 
3080  public function preprocessToDom( $text, $flags = 0 ) {
3081  $dom = $this->getPreprocessor()->preprocessToObj( $text, $flags );
3082  return $dom;
3083  }
3084 
3092  public static function splitWhitespace( $s ) {
3093  $ltrimmed = ltrim( $s );
3094  $w1 = substr( $s, 0, strlen( $s ) - strlen( $ltrimmed ) );
3095  $trimmed = rtrim( $ltrimmed );
3096  $diff = strlen( $ltrimmed ) - strlen( $trimmed );
3097  if ( $diff > 0 ) {
3098  $w2 = substr( $ltrimmed, -$diff );
3099  } else {
3100  $w2 = '';
3101  }
3102  return [ $w1, $trimmed, $w2 ];
3103  }
3104 
3125  public function replaceVariables( $text, $frame = false, $argsOnly = false ) {
3126  # Is there any text? Also, Prevent too big inclusions!
3127  $textSize = strlen( $text );
3128  if ( $textSize < 1 || $textSize > $this->mOptions->getMaxIncludeSize() ) {
3129  return $text;
3130  }
3131 
3132  if ( $frame === false ) {
3133  $frame = $this->getPreprocessor()->newFrame();
3134  } elseif ( !( $frame instanceof PPFrame ) ) {
3135  $this->logger->debug(
3136  __METHOD__ . " called using plain parameters instead of " .
3137  "a PPFrame instance. Creating custom frame."
3138  );
3139  $frame = $this->getPreprocessor()->newCustomFrame( $frame );
3140  }
3141 
3142  $dom = $this->preprocessToDom( $text );
3143  $flags = $argsOnly ? PPFrame::NO_TEMPLATES : 0;
3144  $text = $frame->expand( $dom, $flags );
3145 
3146  return $text;
3147  }
3148 
3156  public static function createAssocArgs( $args ) {
3157  $assocArgs = [];
3158  $index = 1;
3159  foreach ( $args as $arg ) {
3160  $eqpos = strpos( $arg, '=' );
3161  if ( $eqpos === false ) {
3162  $assocArgs[$index++] = $arg;
3163  } else {
3164  $name = trim( substr( $arg, 0, $eqpos ) );
3165  $value = trim( substr( $arg, $eqpos + 1 ) );
3166  if ( $value === false ) {
3167  $value = '';
3168  }
3169  if ( $name !== false ) {
3170  $assocArgs[$name] = $value;
3171  }
3172  }
3173  }
3174 
3175  return $assocArgs;
3176  }
3177 
3204  public function limitationWarn( $limitationType, $current = '', $max = '' ) {
3205  # does no harm if $current and $max are present but are unnecessary for the message
3206  # Not doing ->inLanguage( $this->mOptions->getUserLangObj() ), since this is shown
3207  # only during preview, and that would split the parser cache unnecessarily.
3208  $warning = wfMessage( "$limitationType-warning" )->numParams( $current, $max )
3209  ->text();
3210  $this->mOutput->addWarning( $warning );
3211  $this->addTrackingCategory( "$limitationType-category" );
3212  }
3213 
3226  public function braceSubstitution( $piece, $frame ) {
3227  // Flags
3228 
3229  // $text has been filled
3230  $found = false;
3231  // wiki markup in $text should be escaped
3232  $nowiki = false;
3233  // $text is HTML, armour it against wikitext transformation
3234  $isHTML = false;
3235  // Force interwiki transclusion to be done in raw mode not rendered
3236  $forceRawInterwiki = false;
3237  // $text is a DOM node needing expansion in a child frame
3238  $isChildObj = false;
3239  // $text is a DOM node needing expansion in the current frame
3240  $isLocalObj = false;
3241 
3242  # Title object, where $text came from
3243  $title = false;
3244 
3245  # $part1 is the bit before the first |, and must contain only title characters.
3246  # Various prefixes will be stripped from it later.
3247  $titleWithSpaces = $frame->expand( $piece['title'] );
3248  $part1 = trim( $titleWithSpaces );
3249  $titleText = false;
3250 
3251  # Original title text preserved for various purposes
3252  $originalTitle = $part1;
3253 
3254  # $args is a list of argument nodes, starting from index 0, not including $part1
3255  # @todo FIXME: If piece['parts'] is null then the call to getLength()
3256  # below won't work b/c this $args isn't an object
3257  $args = ( $piece['parts'] == null ) ? [] : $piece['parts'];
3258 
3259  $profileSection = null; // profile templates
3260 
3261  # SUBST
3262  if ( !$found ) {
3263  $substMatch = $this->mSubstWords->matchStartAndRemove( $part1 );
3264 
3265  # Possibilities for substMatch: "subst", "safesubst" or FALSE
3266  # Decide whether to expand template or keep wikitext as-is.
3267  if ( $this->ot['wiki'] ) {
3268  if ( $substMatch === false ) {
3269  $literal = true; # literal when in PST with no prefix
3270  } else {
3271  $literal = false; # expand when in PST with subst: or safesubst:
3272  }
3273  } else {
3274  if ( $substMatch == 'subst' ) {
3275  $literal = true; # literal when not in PST with plain subst:
3276  } else {
3277  $literal = false; # expand when not in PST with safesubst: or no prefix
3278  }
3279  }
3280  if ( $literal ) {
3281  $text = $frame->virtualBracketedImplode( '{{', '|', '}}', $titleWithSpaces, $args );
3282  $isLocalObj = true;
3283  $found = true;
3284  }
3285  }
3286 
3287  # Variables
3288  if ( !$found && $args->getLength() == 0 ) {
3289  $id = $this->mVariables->matchStartToEnd( $part1 );
3290  if ( $id !== false ) {
3291  $text = $this->getVariableValue( $id, $frame );
3292  if ( $this->magicWordFactory->getCacheTTL( $id ) > -1 ) {
3293  $this->mOutput->updateCacheExpiry(
3294  $this->magicWordFactory->getCacheTTL( $id ) );
3295  }
3296  $found = true;
3297  }
3298  }
3299 
3300  # MSG, MSGNW and RAW
3301  if ( !$found ) {
3302  # Check for MSGNW:
3303  $mwMsgnw = $this->magicWordFactory->get( 'msgnw' );
3304  if ( $mwMsgnw->matchStartAndRemove( $part1 ) ) {
3305  $nowiki = true;
3306  } else {
3307  # Remove obsolete MSG:
3308  $mwMsg = $this->magicWordFactory->get( 'msg' );
3309  $mwMsg->matchStartAndRemove( $part1 );
3310  }
3311 
3312  # Check for RAW:
3313  $mwRaw = $this->magicWordFactory->get( 'raw' );
3314  if ( $mwRaw->matchStartAndRemove( $part1 ) ) {
3315  $forceRawInterwiki = true;
3316  }
3317  }
3318 
3319  # Parser functions
3320  if ( !$found ) {
3321  $colonPos = strpos( $part1, ':' );
3322  if ( $colonPos !== false ) {
3323  $func = substr( $part1, 0, $colonPos );
3324  $funcArgs = [ trim( substr( $part1, $colonPos + 1 ) ) ];
3325  $argsLength = $args->getLength();
3326  for ( $i = 0; $i < $argsLength; $i++ ) {
3327  $funcArgs[] = $args->item( $i );
3328  }
3329 
3330  $result = $this->callParserFunction( $frame, $func, $funcArgs );
3331 
3332  // Extract any forwarded flags
3333  if ( isset( $result['title'] ) ) {
3334  $title = $result['title'];
3335  }
3336  if ( isset( $result['found'] ) ) {
3337  $found = $result['found'];
3338  }
3339  if ( array_key_exists( 'text', $result ) ) {
3340  // a string or null
3341  $text = $result['text'];
3342  }
3343  if ( isset( $result['nowiki'] ) ) {
3344  $nowiki = $result['nowiki'];
3345  }
3346  if ( isset( $result['isHTML'] ) ) {
3347  $isHTML = $result['isHTML'];
3348  }
3349  if ( isset( $result['forceRawInterwiki'] ) ) {
3350  $forceRawInterwiki = $result['forceRawInterwiki'];
3351  }
3352  if ( isset( $result['isChildObj'] ) ) {
3353  $isChildObj = $result['isChildObj'];
3354  }
3355  if ( isset( $result['isLocalObj'] ) ) {
3356  $isLocalObj = $result['isLocalObj'];
3357  }
3358  }
3359  }
3360 
3361  # Finish mangling title and then check for loops.
3362  # Set $title to a Title object and $titleText to the PDBK
3363  if ( !$found ) {
3364  $ns = NS_TEMPLATE;
3365  # Split the title into page and subpage
3366  $subpage = '';
3367  $relative = $this->maybeDoSubpageLink( $part1, $subpage );
3368  if ( $part1 !== $relative ) {
3369  $part1 = $relative;
3370  $ns = $this->getTitle()->getNamespace();
3371  }
3372  $title = Title::newFromText( $part1, $ns );
3373  if ( $title ) {
3374  $titleText = $title->getPrefixedText();
3375  # Check for language variants if the template is not found
3376  if ( $this->getTargetLanguage()->hasVariants() && $title->getArticleID() == 0 ) {
3377  $this->getTargetLanguage()->findVariantLink( $part1, $title, true );
3378  }
3379  # Do recursion depth check
3380  $limit = $this->mOptions->getMaxTemplateDepth();
3381  if ( $frame->depth >= $limit ) {
3382  $found = true;
3383  $text = '<span class="error">'
3384  . wfMessage( 'parser-template-recursion-depth-warning' )
3385  ->numParams( $limit )->inContentLanguage()->text()
3386  . '</span>';
3387  }
3388  }
3389  }
3390 
3391  # Load from database
3392  if ( !$found && $title ) {
3393  $profileSection = $this->mProfiler->scopedProfileIn( $title->getPrefixedDBkey() );
3394  if ( !$title->isExternal() ) {
3395  if ( $title->isSpecialPage()
3396  && $this->mOptions->getAllowSpecialInclusion()
3397  && $this->ot['html']
3398  ) {
3399  $specialPage = $this->specialPageFactory->getPage( $title->getDBkey() );
3400  // Pass the template arguments as URL parameters.
3401  // "uselang" will have no effect since the Language object
3402  // is forced to the one defined in ParserOptions.
3403  $pageArgs = [];
3404  $argsLength = $args->getLength();
3405  for ( $i = 0; $i < $argsLength; $i++ ) {
3406  $bits = $args->item( $i )->splitArg();
3407  if ( strval( $bits['index'] ) === '' ) {
3408  $name = trim( $frame->expand( $bits['name'], PPFrame::STRIP_COMMENTS ) );
3409  $value = trim( $frame->expand( $bits['value'] ) );
3410  $pageArgs[$name] = $value;
3411  }
3412  }
3413 
3414  // Create a new context to execute the special page
3415  $context = new RequestContext;
3416  $context->setTitle( $title );
3417  $context->setRequest( new FauxRequest( $pageArgs ) );
3418  if ( $specialPage && $specialPage->maxIncludeCacheTime() === 0 ) {
3419  $context->setUser( $this->getUser() );
3420  } else {
3421  // If this page is cached, then we better not be per user.
3422  $context->setUser( User::newFromName( '127.0.0.1', false ) );
3423  }
3424  $context->setLanguage( $this->mOptions->getUserLangObj() );
3425  $ret = $this->specialPageFactory->capturePath( $title, $context, $this->getLinkRenderer() );
3426  if ( $ret ) {
3427  $text = $context->getOutput()->getHTML();
3428  $this->mOutput->addOutputPageMetadata( $context->getOutput() );
3429  $found = true;
3430  $isHTML = true;
3431  if ( $specialPage && $specialPage->maxIncludeCacheTime() !== false ) {
3432  $this->mOutput->updateRuntimeAdaptiveExpiry(
3433  $specialPage->maxIncludeCacheTime()
3434  );
3435  }
3436  }
3437  } elseif ( $this->nsInfo->isNonincludable( $title->getNamespace() ) ) {
3438  $found = false; # access denied
3439  $this->logger->debug(
3440  __METHOD__ .
3441  ": template inclusion denied for " . $title->getPrefixedDBkey()
3442  );
3443  } else {
3444  list( $text, $title ) = $this->getTemplateDom( $title );
3445  if ( $text !== false ) {
3446  $found = true;
3447  $isChildObj = true;
3448  }
3449  }
3450 
3451  # If the title is valid but undisplayable, make a link to it
3452  if ( !$found && ( $this->ot['html'] || $this->ot['pre'] ) ) {
3453  $text = "[[:$titleText]]";
3454  $found = true;
3455  }
3456  } elseif ( $title->isTrans() ) {
3457  # Interwiki transclusion
3458  if ( $this->ot['html'] && !$forceRawInterwiki ) {
3459  $text = $this->interwikiTransclude( $title, 'render' );
3460  $isHTML = true;
3461  } else {
3462  $text = $this->interwikiTransclude( $title, 'raw' );
3463  # Preprocess it like a template
3464  $text = $this->preprocessToDom( $text, self::PTD_FOR_INCLUSION );
3465  $isChildObj = true;
3466  }
3467  $found = true;
3468  }
3469 
3470  # Do infinite loop check
3471  # This has to be done after redirect resolution to avoid infinite loops via redirects
3472  if ( !$frame->loopCheck( $title ) ) {
3473  $found = true;
3474  $text = '<span class="error">'
3475  . wfMessage( 'parser-template-loop-warning', $titleText )->inContentLanguage()->text()
3476  . '</span>';
3477  $this->addTrackingCategory( 'template-loop-category' );
3478  $this->mOutput->addWarning( wfMessage( 'template-loop-warning',
3479  wfEscapeWikiText( $titleText ) )->text() );
3480  $this->logger->debug( __METHOD__ . ": template loop broken at '$titleText'" );
3481  }
3482  }
3483 
3484  # If we haven't found text to substitute by now, we're done
3485  # Recover the source wikitext and return it
3486  if ( !$found ) {
3487  $text = $frame->virtualBracketedImplode( '{{', '|', '}}', $titleWithSpaces, $args );
3488  if ( $profileSection ) {
3489  $this->mProfiler->scopedProfileOut( $profileSection );
3490  }
3491  return [ 'object' => $text ];
3492  }
3493 
3494  # Expand DOM-style return values in a child frame
3495  if ( $isChildObj ) {
3496  # Clean up argument array
3497  $newFrame = $frame->newChild( $args, $title );
3498 
3499  if ( $nowiki ) {
3500  $text = $newFrame->expand( $text, PPFrame::RECOVER_ORIG );
3501  } elseif ( $titleText !== false && $newFrame->isEmpty() ) {
3502  # Expansion is eligible for the empty-frame cache
3503  $text = $newFrame->cachedExpand( $titleText, $text );
3504  } else {
3505  # Uncached expansion
3506  $text = $newFrame->expand( $text );
3507  }
3508  }
3509  if ( $isLocalObj && $nowiki ) {
3510  $text = $frame->expand( $text, PPFrame::RECOVER_ORIG );
3511  $isLocalObj = false;
3512  }
3513 
3514  if ( $profileSection ) {
3515  $this->mProfiler->scopedProfileOut( $profileSection );
3516  }
3517 
3518  # Replace raw HTML by a placeholder
3519  if ( $isHTML ) {
3520  $text = $this->insertStripItem( $text );
3521  } elseif ( $nowiki && ( $this->ot['html'] || $this->ot['pre'] ) ) {
3522  # Escape nowiki-style return values
3523  $text = wfEscapeWikiText( $text );
3524  } elseif ( is_string( $text )
3525  && !$piece['lineStart']
3526  && preg_match( '/^(?:{\\||:|;|#|\*)/', $text )
3527  ) {
3528  # T2529: if the template begins with a table or block-level
3529  # element, it should be treated as beginning a new line.
3530  # This behavior is somewhat controversial.
3531  $text = "\n" . $text;
3532  }
3533 
3534  if ( is_string( $text ) && !$this->incrementIncludeSize( 'post-expand', strlen( $text ) ) ) {
3535  # Error, oversize inclusion
3536  if ( $titleText !== false ) {
3537  # Make a working, properly escaped link if possible (T25588)
3538  $text = "[[:$titleText]]";
3539  } else {
3540  # This will probably not be a working link, but at least it may
3541  # provide some hint of where the problem is
3542  preg_replace( '/^:/', '', $originalTitle );
3543  $text = "[[:$originalTitle]]";
3544  }
3545  $text .= $this->insertStripItem( '<!-- WARNING: template omitted, '
3546  . 'post-expand include size too large -->' );
3547  $this->limitationWarn( 'post-expand-template-inclusion' );
3548  }
3549 
3550  if ( $isLocalObj ) {
3551  $ret = [ 'object' => $text ];
3552  } else {
3553  $ret = [ 'text' => $text ];
3554  }
3555 
3556  return $ret;
3557  }
3558 
3577  public function callParserFunction( $frame, $function, array $args = [] ) {
3578  # Case sensitive functions
3579  if ( isset( $this->mFunctionSynonyms[1][$function] ) ) {
3580  $function = $this->mFunctionSynonyms[1][$function];
3581  } else {
3582  # Case insensitive functions
3583  $function = $this->contLang->lc( $function );
3584  if ( isset( $this->mFunctionSynonyms[0][$function] ) ) {
3585  $function = $this->mFunctionSynonyms[0][$function];
3586  } else {
3587  return [ 'found' => false ];
3588  }
3589  }
3590 
3591  list( $callback, $flags ) = $this->mFunctionHooks[$function];
3592 
3593  // Avoid PHP 7.1 warning from passing $this by reference
3594  $parser = $this;
3595 
3596  $allArgs = [ &$parser ];
3597  if ( $flags & self::SFH_OBJECT_ARGS ) {
3598  # Convert arguments to PPNodes and collect for appending to $allArgs
3599  $funcArgs = [];
3600  foreach ( $args as $k => $v ) {
3601  if ( $v instanceof PPNode || $k === 0 ) {
3602  $funcArgs[] = $v;
3603  } else {
3604  $funcArgs[] = $this->mPreprocessor->newPartNodeArray( [ $k => $v ] )->item( 0 );
3605  }
3606  }
3607 
3608  # Add a frame parameter, and pass the arguments as an array
3609  $allArgs[] = $frame;
3610  $allArgs[] = $funcArgs;
3611  } else {
3612  # Convert arguments to plain text and append to $allArgs
3613  foreach ( $args as $k => $v ) {
3614  if ( $v instanceof PPNode ) {
3615  $allArgs[] = trim( $frame->expand( $v ) );
3616  } elseif ( is_int( $k ) && $k >= 0 ) {
3617  $allArgs[] = trim( $v );
3618  } else {
3619  $allArgs[] = trim( "$k=$v" );
3620  }
3621  }
3622  }
3623 
3624  $result = $callback( ...$allArgs );
3625 
3626  # The interface for function hooks allows them to return a wikitext
3627  # string or an array containing the string and any flags. This mungs
3628  # things around to match what this method should return.
3629  if ( !is_array( $result ) ) {
3630  $result = [
3631  'found' => true,
3632  'text' => $result,
3633  ];
3634  } else {
3635  if ( isset( $result[0] ) && !isset( $result['text'] ) ) {
3636  $result['text'] = $result[0];
3637  }
3638  unset( $result[0] );
3639  $result += [
3640  'found' => true,
3641  ];
3642  }
3643 
3644  $noparse = true;
3645  $preprocessFlags = 0;
3646  if ( isset( $result['noparse'] ) ) {
3647  $noparse = $result['noparse'];
3648  }
3649  if ( isset( $result['preprocessFlags'] ) ) {
3650  $preprocessFlags = $result['preprocessFlags'];
3651  }
3652 
3653  if ( !$noparse ) {
3654  $result['text'] = $this->preprocessToDom( $result['text'], $preprocessFlags );
3655  $result['isChildObj'] = true;
3656  }
3657 
3658  return $result;
3659  }
3660 
3669  public function getTemplateDom( $title ) {
3670  $cacheTitle = $title;
3671  $titleText = $title->getPrefixedDBkey();
3672 
3673  if ( isset( $this->mTplRedirCache[$titleText] ) ) {
3674  list( $ns, $dbk ) = $this->mTplRedirCache[$titleText];
3675  $title = Title::makeTitle( $ns, $dbk );
3676  $titleText = $title->getPrefixedDBkey();
3677  }
3678  if ( isset( $this->mTplDomCache[$titleText] ) ) {
3679  return [ $this->mTplDomCache[$titleText], $title ];
3680  }
3681 
3682  # Cache miss, go to the database
3683  list( $text, $title ) = $this->fetchTemplateAndTitle( $title );
3684 
3685  if ( $text === false ) {
3686  $this->mTplDomCache[$titleText] = false;
3687  return [ false, $title ];
3688  }
3689 
3690  $dom = $this->preprocessToDom( $text, self::PTD_FOR_INCLUSION );
3691  $this->mTplDomCache[$titleText] = $dom;
3692 
3693  if ( !$title->equals( $cacheTitle ) ) {
3694  $this->mTplRedirCache[$cacheTitle->getPrefixedDBkey()] =
3695  [ $title->getNamespace(), $title->getDBkey() ];
3696  }
3697 
3698  return [ $dom, $title ];
3699  }
3700 
3713  $cacheKey = $title->getPrefixedDBkey();
3714  if ( !$this->currentRevisionCache ) {
3715  $this->currentRevisionCache = new MapCacheLRU( 100 );
3716  }
3717  if ( !$this->currentRevisionCache->has( $cacheKey ) ) {
3718  $this->currentRevisionCache->set( $cacheKey,
3719  // Defaults to Parser::statelessFetchRevision()
3720  call_user_func( $this->mOptions->getCurrentRevisionCallback(), $title, $this )
3721  );
3722  }
3723  return $this->currentRevisionCache->get( $cacheKey );
3724  }
3725 
3732  return (
3733  $this->currentRevisionCache &&
3734  $this->currentRevisionCache->has( $title->getPrefixedText() )
3735  );
3736  }
3737 
3747  public static function statelessFetchRevision( Title $title, $parser = false ) {
3748  $rev = Revision::newKnownCurrent( wfGetDB( DB_REPLICA ), $title );
3749 
3750  return $rev;
3751  }
3752 
3758  public function fetchTemplateAndTitle( $title ) {
3759  // Defaults to Parser::statelessFetchTemplate()
3760  $templateCb = $this->mOptions->getTemplateCallback();
3761  $stuff = call_user_func( $templateCb, $title, $this );
3762  $rev = $stuff['revision'] ?? null;
3763  $text = $stuff['text'];
3764  if ( is_string( $stuff['text'] ) ) {
3765  // We use U+007F DELETE to distinguish strip markers from regular text
3766  $text = strtr( $text, "\x7f", "?" );
3767  }
3768  $finalTitle = $stuff['finalTitle'] ?? $title;
3769  foreach ( ( $stuff['deps'] ?? [] ) as $dep ) {
3770  $this->mOutput->addTemplate( $dep['title'], $dep['page_id'], $dep['rev_id'] );
3771  if ( $dep['title']->equals( $this->getTitle() ) && $rev instanceof Revision ) {
3772  // Self-transclusion; final result may change based on the new page version
3773  $this->setOutputFlag( 'vary-revision-sha1', 'Self transclusion' );
3774  $this->getOutput()->setRevisionUsedSha1Base36( $rev->getSha1() );
3775  }
3776  }
3777 
3778  return [ $text, $finalTitle ];
3779  }
3780 
3786  public function fetchTemplate( $title ) {
3787  return $this->fetchTemplateAndTitle( $title )[0];
3788  }
3789 
3799  public static function statelessFetchTemplate( $title, $parser = false ) {
3800  $text = $skip = false;
3801  $finalTitle = $title;
3802  $deps = [];
3803  $rev = null;
3804 
3805  # Loop to fetch the article, with up to 1 redirect
3806  for ( $i = 0; $i < 2 && is_object( $title ); $i++ ) {
3807  # Give extensions a chance to select the revision instead
3808  $id = false; # Assume current
3809  Hooks::run( 'BeforeParserFetchTemplateAndtitle',
3810  [ $parser, $title, &$skip, &$id ] );
3811 
3812  if ( $skip ) {
3813  $text = false;
3814  $deps[] = [
3815  'title' => $title,
3816  'page_id' => $title->getArticleID(),
3817  'rev_id' => null
3818  ];
3819  break;
3820  }
3821  # Get the revision
3822  if ( $id ) {
3823  $rev = Revision::newFromId( $id );
3824  } elseif ( $parser ) {
3825  $rev = $parser->fetchCurrentRevisionOfTitle( $title );
3826  } else {
3827  $rev = Revision::newFromTitle( $title );
3828  }
3829  $rev_id = $rev ? $rev->getId() : 0;
3830  # If there is no current revision, there is no page
3831  if ( $id === false && !$rev ) {
3832  $linkCache = MediaWikiServices::getInstance()->getLinkCache();
3833  $linkCache->addBadLinkObj( $title );
3834  }
3835 
3836  $deps[] = [
3837  'title' => $title,
3838  'page_id' => $title->getArticleID(),
3839  'rev_id' => $rev_id
3840  ];
3841  if ( $rev && !$title->equals( $rev->getTitle() ) ) {
3842  # We fetched a rev from a different title; register it too...
3843  $deps[] = [
3844  'title' => $rev->getTitle(),
3845  'page_id' => $rev->getPage(),
3846  'rev_id' => $rev_id
3847  ];
3848  }
3849 
3850  if ( $rev ) {
3851  $content = $rev->getContent();
3852  $text = $content ? $content->getWikitextForTransclusion() : null;
3853 
3854  Hooks::run( 'ParserFetchTemplate',
3855  [ $parser, $title, $rev, &$text, &$deps ] );
3856 
3857  if ( $text === false || $text === null ) {
3858  $text = false;
3859  break;
3860  }
3861  } elseif ( $title->getNamespace() == NS_MEDIAWIKI ) {
3862  $message = wfMessage( MediaWikiServices::getInstance()->getContentLanguage()->
3863  lcfirst( $title->getText() ) )->inContentLanguage();
3864  if ( !$message->exists() ) {
3865  $text = false;
3866  break;
3867  }
3868  $content = $message->content();
3869  $text = $message->plain();
3870  } else {
3871  break;
3872  }
3873  if ( !$content ) {
3874  break;
3875  }
3876  # Redirect?
3877  $finalTitle = $title;
3878  $title = $content->getRedirectTarget();
3879  }
3880  return [
3881  'revision' => $rev,
3882  'text' => $text,
3883  'finalTitle' => $finalTitle,
3884  'deps' => $deps
3885  ];
3886  }
3887 
3895  public function fetchFileAndTitle( $title, $options = [] ) {
3896  $file = $this->fetchFileNoRegister( $title, $options );
3897 
3898  $time = $file ? $file->getTimestamp() : false;
3899  $sha1 = $file ? $file->getSha1() : false;
3900  # Register the file as a dependency...
3901  $this->mOutput->addImage( $title->getDBkey(), $time, $sha1 );
3902  if ( $file && !$title->equals( $file->getTitle() ) ) {
3903  # Update fetched file title
3904  $title = $file->getTitle();
3905  $this->mOutput->addImage( $title->getDBkey(), $time, $sha1 );
3906  }
3907  return [ $file, $title ];
3908  }
3909 
3920  protected function fetchFileNoRegister( $title, $options = [] ) {
3921  if ( isset( $options['broken'] ) ) {
3922  $file = false; // broken thumbnail forced by hook
3923  } elseif ( isset( $options['sha1'] ) ) { // get by (sha1,timestamp)
3924  $file = RepoGroup::singleton()->findFileFromKey( $options['sha1'], $options );
3925  } else { // get by (name,timestamp)
3926  $file = MediaWikiServices::getInstance()->getRepoGroup()->findFile( $title, $options );
3927  }
3928  return $file;
3929  }
3930 
3939  public function interwikiTransclude( $title, $action ) {
3940  if ( !$this->svcOptions->get( 'EnableScaryTranscluding' ) ) {
3941  return wfMessage( 'scarytranscludedisabled' )->inContentLanguage()->text();
3942  }
3943 
3944  $url = $title->getFullURL( [ 'action' => $action ] );
3945  if ( strlen( $url ) > 1024 ) {
3946  return wfMessage( 'scarytranscludetoolong' )->inContentLanguage()->text();
3947  }
3948 
3949  $wikiId = $title->getTransWikiID(); // remote wiki ID or false
3950 
3951  $fname = __METHOD__;
3952  $cache = MediaWikiServices::getInstance()->getMainWANObjectCache();
3953 
3954  $data = $cache->getWithSetCallback(
3955  $cache->makeGlobalKey(
3956  'interwiki-transclude',
3957  ( $wikiId !== false ) ? $wikiId : 'external',
3958  sha1( $url )
3959  ),
3960  $this->svcOptions->get( 'TranscludeCacheExpiry' ),
3961  function ( $oldValue, &$ttl ) use ( $url, $fname, $cache ) {
3962  $req = MWHttpRequest::factory( $url, [], $fname );
3963 
3964  $status = $req->execute(); // Status object
3965  if ( !$status->isOK() ) {
3966  $ttl = $cache::TTL_UNCACHEABLE;
3967  } elseif ( $req->getResponseHeader( 'X-Database-Lagged' ) !== null ) {
3968  $ttl = min( $cache::TTL_LAGGED, $ttl );
3969  }
3970 
3971  return [
3972  'text' => $status->isOK() ? $req->getContent() : null,
3973  'code' => $req->getStatus()
3974  ];
3975  },
3976  [
3977  'checkKeys' => ( $wikiId !== false )
3978  ? [ $cache->makeGlobalKey( 'interwiki-page', $wikiId, $title->getDBkey() ) ]
3979  : [],
3980  'pcGroup' => 'interwiki-transclude:5',
3981  'pcTTL' => $cache::TTL_PROC_LONG
3982  ]
3983  );
3984 
3985  if ( is_string( $data['text'] ) ) {
3986  $text = $data['text'];
3987  } elseif ( $data['code'] != 200 ) {
3988  // Though we failed to fetch the content, this status is useless.
3989  $text = wfMessage( 'scarytranscludefailed-httpstatus' )
3990  ->params( $url, $data['code'] )->inContentLanguage()->text();
3991  } else {
3992  $text = wfMessage( 'scarytranscludefailed', $url )->inContentLanguage()->text();
3993  }
3994 
3995  return $text;
3996  }
3997 
4007  public function argSubstitution( $piece, $frame ) {
4008  $error = false;
4009  $parts = $piece['parts'];
4010  $nameWithSpaces = $frame->expand( $piece['title'] );
4011  $argName = trim( $nameWithSpaces );
4012  $object = false;
4013  $text = $frame->getArgument( $argName );
4014  if ( $text === false && $parts->getLength() > 0
4015  && ( $this->ot['html']
4016  || $this->ot['pre']
4017  || ( $this->ot['wiki'] && $frame->isTemplate() )
4018  )
4019  ) {
4020  # No match in frame, use the supplied default
4021  $object = $parts->item( 0 )->getChildren();
4022  }
4023  if ( !$this->incrementIncludeSize( 'arg', strlen( $text ) ) ) {
4024  $error = '<!-- WARNING: argument omitted, expansion size too large -->';
4025  $this->limitationWarn( 'post-expand-template-argument' );
4026  }
4027 
4028  if ( $text === false && $object === false ) {
4029  # No match anywhere
4030  $object = $frame->virtualBracketedImplode( '{{{', '|', '}}}', $nameWithSpaces, $parts );
4031  }
4032  if ( $error !== false ) {
4033  $text .= $error;
4034  }
4035  if ( $object !== false ) {
4036  $ret = [ 'object' => $object ];
4037  } else {
4038  $ret = [ 'text' => $text ];
4039  }
4040 
4041  return $ret;
4042  }
4043 
4059  public function extensionSubstitution( $params, $frame ) {
4060  static $errorStr = '<span class="error">';
4061  static $errorLen = 20;
4062 
4063  $name = $frame->expand( $params['name'] );
4064  if ( substr( $name, 0, $errorLen ) === $errorStr ) {
4065  // Probably expansion depth or node count exceeded. Just punt the
4066  // error up.
4067  return $name;
4068  }
4069 
4070  $attrText = !isset( $params['attr'] ) ? null : $frame->expand( $params['attr'] );
4071  if ( substr( $attrText, 0, $errorLen ) === $errorStr ) {
4072  // See above
4073  return $attrText;
4074  }
4075 
4076  // We can't safely check if the expansion for $content resulted in an
4077  // error, because the content could happen to be the error string
4078  // (T149622).
4079  $content = !isset( $params['inner'] ) ? null : $frame->expand( $params['inner'] );
4080 
4081  $marker = self::MARKER_PREFIX . "-$name-"
4082  . sprintf( '%08X', $this->mMarkerIndex++ ) . self::MARKER_SUFFIX;
4083 
4084  $isFunctionTag = isset( $this->mFunctionTagHooks[strtolower( $name )] ) &&
4085  ( $this->ot['html'] || $this->ot['pre'] );
4086  if ( $isFunctionTag ) {
4087  $markerType = 'none';
4088  } else {
4089  $markerType = 'general';
4090  }
4091  if ( $this->ot['html'] || $isFunctionTag ) {
4092  $name = strtolower( $name );
4093  $attributes = Sanitizer::decodeTagAttributes( $attrText );
4094  if ( isset( $params['attributes'] ) ) {
4095  $attributes += $params['attributes'];
4096  }
4097 
4098  if ( isset( $this->mTagHooks[$name] ) ) {
4099  $output = call_user_func_array( $this->mTagHooks[$name],
4100  [ $content, $attributes, $this, $frame ] );
4101  } elseif ( isset( $this->mFunctionTagHooks[$name] ) ) {
4102  list( $callback, ) = $this->mFunctionTagHooks[$name];
4103 
4104  // Avoid PHP 7.1 warning from passing $this by reference
4105  $parser = $this;
4106  $output = call_user_func_array( $callback, [ &$parser, $frame, $content, $attributes ] );
4107  } else {
4108  $output = '<span class="error">Invalid tag extension name: ' .
4109  htmlspecialchars( $name ) . '</span>';
4110  }
4111 
4112  if ( is_array( $output ) ) {
4113  // Extract flags
4114  $flags = $output;
4115  $output = $flags[0];
4116  if ( isset( $flags['markerType'] ) ) {
4117  $markerType = $flags['markerType'];
4118  }
4119  }
4120  } else {
4121  if ( is_null( $attrText ) ) {
4122  $attrText = '';
4123  }
4124  if ( isset( $params['attributes'] ) ) {
4125  foreach ( $params['attributes'] as $attrName => $attrValue ) {
4126  $attrText .= ' ' . htmlspecialchars( $attrName ) . '="' .
4127  htmlspecialchars( $attrValue ) . '"';
4128  }
4129  }
4130  if ( $content === null ) {
4131  $output = "<$name$attrText/>";
4132  } else {
4133  $close = is_null( $params['close'] ) ? '' : $frame->expand( $params['close'] );
4134  if ( substr( $close, 0, $errorLen ) === $errorStr ) {
4135  // See above
4136  return $close;
4137  }
4138  $output = "<$name$attrText>$content$close";
4139  }
4140  }
4141 
4142  if ( $markerType === 'none' ) {
4143  return $output;
4144  } elseif ( $markerType === 'nowiki' ) {
4145  $this->mStripState->addNoWiki( $marker, $output );
4146  } elseif ( $markerType === 'general' ) {
4147  $this->mStripState->addGeneral( $marker, $output );
4148  } else {
4149  throw new MWException( __METHOD__ . ': invalid marker type' );
4150  }
4151  return $marker;
4152  }
4153 
4161  public function incrementIncludeSize( $type, $size ) {
4162  if ( $this->mIncludeSizes[$type] + $size > $this->mOptions->getMaxIncludeSize() ) {
4163  return false;
4164  } else {
4165  $this->mIncludeSizes[$type] += $size;
4166  return true;
4167  }
4168  }
4169 
4176  $this->mExpensiveFunctionCount++;
4177  return $this->mExpensiveFunctionCount <= $this->mOptions->getExpensiveParserFunctionLimit();
4178  }
4179 
4188  public function doDoubleUnderscore( $text ) {
4189  # The position of __TOC__ needs to be recorded
4190  $mw = $this->magicWordFactory->get( 'toc' );
4191  if ( $mw->match( $text ) ) {
4192  $this->mShowToc = true;
4193  $this->mForceTocPosition = true;
4194 
4195  # Set a placeholder. At the end we'll fill it in with the TOC.
4196  $text = $mw->replace( '<!--MWTOC\'"-->', $text, 1 );
4197 
4198  # Only keep the first one.
4199  $text = $mw->replace( '', $text );
4200  }
4201 
4202  # Now match and remove the rest of them
4203  $mwa = $this->magicWordFactory->getDoubleUnderscoreArray();
4204  $this->mDoubleUnderscores = $mwa->matchAndRemove( $text );
4205 
4206  if ( isset( $this->mDoubleUnderscores['nogallery'] ) ) {
4207  $this->mOutput->mNoGallery = true;
4208  }
4209  if ( isset( $this->mDoubleUnderscores['notoc'] ) && !$this->mForceTocPosition ) {
4210  $this->mShowToc = false;
4211  }
4212  if ( isset( $this->mDoubleUnderscores['hiddencat'] )
4213  && $this->getTitle()->getNamespace() == NS_CATEGORY
4214  ) {
4215  $this->addTrackingCategory( 'hidden-category-category' );
4216  }
4217  # (T10068) Allow control over whether robots index a page.
4218  # __INDEX__ always overrides __NOINDEX__, see T16899
4219  if ( isset( $this->mDoubleUnderscores['noindex'] ) && $this->getTitle()->canUseNoindex() ) {
4220  $this->mOutput->setIndexPolicy( 'noindex' );
4221  $this->addTrackingCategory( 'noindex-category' );
4222  }
4223  if ( isset( $this->mDoubleUnderscores['index'] ) && $this->getTitle()->canUseNoindex() ) {
4224  $this->mOutput->setIndexPolicy( 'index' );
4225  $this->addTrackingCategory( 'index-category' );
4226  }
4227 
4228  # Cache all double underscores in the database
4229  foreach ( $this->mDoubleUnderscores as $key => $val ) {
4230  $this->mOutput->setProperty( $key, '' );
4231  }
4232 
4233  return $text;
4234  }
4235 
4241  public function addTrackingCategory( $msg ) {
4242  return $this->mOutput->addTrackingCategory( $msg, $this->getTitle() );
4243  }
4244 
4261  public function formatHeadings( $text, $origText, $isMain = true ) {
4262  # Inhibit editsection links if requested in the page
4263  if ( isset( $this->mDoubleUnderscores['noeditsection'] ) ) {
4264  $maybeShowEditLink = false;
4265  } else {
4266  $maybeShowEditLink = true; /* Actual presence will depend on post-cache transforms */
4267  }
4268 
4269  # Get all headlines for numbering them and adding funky stuff like [edit]
4270  # links - this is for later, but we need the number of headlines right now
4271  # NOTE: white space in headings have been trimmed in doHeadings. They shouldn't
4272  # be trimmed here since whitespace in HTML headings is significant.
4273  $matches = [];
4274  $numMatches = preg_match_all(
4275  '/<H(?P<level>[1-6])(?P<attrib>.*?>)(?P<header>[\s\S]*?)<\/H[1-6] *>/i',
4276  $text,
4277  $matches
4278  );
4279 
4280  # if there are fewer than 4 headlines in the article, do not show TOC
4281  # unless it's been explicitly enabled.
4282  $enoughToc = $this->mShowToc &&
4283  ( ( $numMatches >= 4 ) || $this->mForceTocPosition );
4284 
4285  # Allow user to stipulate that a page should have a "new section"
4286  # link added via __NEWSECTIONLINK__
4287  if ( isset( $this->mDoubleUnderscores['newsectionlink'] ) ) {
4288  $this->mOutput->setNewSection( true );
4289  }
4290 
4291  # Allow user to remove the "new section"
4292  # link via __NONEWSECTIONLINK__
4293  if ( isset( $this->mDoubleUnderscores['nonewsectionlink'] ) ) {
4294  $this->mOutput->hideNewSection( true );
4295  }
4296 
4297  # if the string __FORCETOC__ (not case-sensitive) occurs in the HTML,
4298  # override above conditions and always show TOC above first header
4299  if ( isset( $this->mDoubleUnderscores['forcetoc'] ) ) {
4300  $this->mShowToc = true;
4301  $enoughToc = true;
4302  }
4303 
4304  # headline counter
4305  $headlineCount = 0;
4306  $numVisible = 0;
4307 
4308  # Ugh .. the TOC should have neat indentation levels which can be
4309  # passed to the skin functions. These are determined here
4310  $toc = '';
4311  $full = '';
4312  $head = [];
4313  $sublevelCount = [];
4314  $levelCount = [];
4315  $level = 0;
4316  $prevlevel = 0;
4317  $toclevel = 0;
4318  $prevtoclevel = 0;
4319  $markerRegex = self::MARKER_PREFIX . "-h-(\d+)-" . self::MARKER_SUFFIX;
4320  $baseTitleText = $this->getTitle()->getPrefixedDBkey();
4321  $oldType = $this->mOutputType;
4322  $this->setOutputType( self::OT_WIKI );
4323  $frame = $this->getPreprocessor()->newFrame();
4324  $root = $this->preprocessToDom( $origText );
4325  $node = $root->getFirstChild();
4326  $byteOffset = 0;
4327  $tocraw = [];
4328  $refers = [];
4329 
4330  $headlines = $numMatches !== false ? $matches[3] : [];
4331 
4332  $maxTocLevel = $this->svcOptions->get( 'MaxTocLevel' );
4333  foreach ( $headlines as $headline ) {
4334  $isTemplate = false;
4335  $titleText = false;
4336  $sectionIndex = false;
4337  $numbering = '';
4338  $markerMatches = [];
4339  if ( preg_match( "/^$markerRegex/", $headline, $markerMatches ) ) {
4340  $serial = $markerMatches[1];
4341  list( $titleText, $sectionIndex ) = $this->mHeadings[$serial];
4342  $isTemplate = ( $titleText != $baseTitleText );
4343  $headline = preg_replace( "/^$markerRegex\\s*/", "", $headline );
4344  }
4345 
4346  if ( $toclevel ) {
4347  $prevlevel = $level;
4348  }
4349  $level = $matches[1][$headlineCount];
4350 
4351  if ( $level > $prevlevel ) {
4352  # Increase TOC level
4353  $toclevel++;
4354  $sublevelCount[$toclevel] = 0;
4355  if ( $toclevel < $maxTocLevel ) {
4356  $prevtoclevel = $toclevel;
4357  $toc .= Linker::tocIndent();
4358  $numVisible++;
4359  }
4360  } elseif ( $level < $prevlevel && $toclevel > 1 ) {
4361  # Decrease TOC level, find level to jump to
4362 
4363  for ( $i = $toclevel; $i > 0; $i-- ) {
4364  if ( $levelCount[$i] == $level ) {
4365  # Found last matching level
4366  $toclevel = $i;
4367  break;
4368  } elseif ( $levelCount[$i] < $level ) {
4369  # Found first matching level below current level
4370  $toclevel = $i + 1;
4371  break;
4372  }
4373  }
4374  if ( $i == 0 ) {
4375  $toclevel = 1;
4376  }
4377  if ( $toclevel < $maxTocLevel ) {
4378  if ( $prevtoclevel < $maxTocLevel ) {
4379  # Unindent only if the previous toc level was shown :p
4380  $toc .= Linker::tocUnindent( $prevtoclevel - $toclevel );
4381  $prevtoclevel = $toclevel;
4382  } else {
4383  $toc .= Linker::tocLineEnd();
4384  }
4385  }
4386  } else {
4387  # No change in level, end TOC line
4388  if ( $toclevel < $maxTocLevel ) {
4389  $toc .= Linker::tocLineEnd();
4390  }
4391  }
4392 
4393  $levelCount[$toclevel] = $level;
4394 
4395  # count number of headlines for each level
4396  $sublevelCount[$toclevel]++;
4397  $dot = 0;
4398  for ( $i = 1; $i <= $toclevel; $i++ ) {
4399  if ( !empty( $sublevelCount[$i] ) ) {
4400  if ( $dot ) {
4401  $numbering .= '.';
4402  }
4403  $numbering .= $this->getTargetLanguage()->formatNum( $sublevelCount[$i] );
4404  $dot = 1;
4405  }
4406  }
4407 
4408  # The safe header is a version of the header text safe to use for links
4409 
4410  # Remove link placeholders by the link text.
4411  # <!--LINK number-->
4412  # turns into
4413  # link text with suffix
4414  # Do this before unstrip since link text can contain strip markers
4415  $safeHeadline = $this->replaceLinkHoldersText( $headline );
4416 
4417  # Avoid insertion of weird stuff like <math> by expanding the relevant sections
4418  $safeHeadline = $this->mStripState->unstripBoth( $safeHeadline );
4419 
4420  # Remove any <style> or <script> tags (T198618)
4421  $safeHeadline = preg_replace(
4422  '#<(style|script)(?: [^>]*[^>/])?>.*?</\1>#is',
4423  '',
4424  $safeHeadline
4425  );
4426 
4427  # Strip out HTML (first regex removes any tag not allowed)
4428  # Allowed tags are:
4429  # * <sup> and <sub> (T10393)
4430  # * <i> (T28375)
4431  # * <b> (r105284)
4432  # * <bdi> (T74884)
4433  # * <span dir="rtl"> and <span dir="ltr"> (T37167)
4434  # * <s> and <strike> (T35715)
4435  # We strip any parameter from accepted tags (second regex), except dir="rtl|ltr" from <span>,
4436  # to allow setting directionality in toc items.
4437  $tocline = preg_replace(
4438  [
4439  '#<(?!/?(span|sup|sub|bdi|i|b|s|strike)(?: [^>]*)?>).*?>#',
4440  '#<(/?(?:span(?: dir="(?:rtl|ltr)")?|sup|sub|bdi|i|b|s|strike))(?: .*?)?>#'
4441  ],
4442  [ '', '<$1>' ],
4443  $safeHeadline
4444  );
4445 
4446  # Strip '<span></span>', which is the result from the above if
4447  # <span id="foo"></span> is used to produce an additional anchor
4448  # for a section.
4449  $tocline = str_replace( '<span></span>', '', $tocline );
4450 
4451  $tocline = trim( $tocline );
4452 
4453  # For the anchor, strip out HTML-y stuff period
4454  $safeHeadline = preg_replace( '/<.*?>/', '', $safeHeadline );
4455  $safeHeadline = Sanitizer::normalizeSectionNameWhitespace( $safeHeadline );
4456 
4457  # Save headline for section edit hint before it's escaped
4458  $headlineHint = $safeHeadline;
4459 
4460  # Decode HTML entities
4461  $safeHeadline = Sanitizer::decodeCharReferences( $safeHeadline );
4462 
4463  $safeHeadline = self::normalizeSectionName( $safeHeadline );
4464 
4465  $fallbackHeadline = Sanitizer::escapeIdForAttribute( $safeHeadline, Sanitizer::ID_FALLBACK );
4466  $linkAnchor = Sanitizer::escapeIdForLink( $safeHeadline );
4467  $safeHeadline = Sanitizer::escapeIdForAttribute( $safeHeadline, Sanitizer::ID_PRIMARY );
4468  if ( $fallbackHeadline === $safeHeadline ) {
4469  # No reason to have both (in fact, we can't)
4470  $fallbackHeadline = false;
4471  }
4472 
4473  # HTML IDs must be case-insensitively unique for IE compatibility (T12721).
4474  # @todo FIXME: We may be changing them depending on the current locale.
4475  $arrayKey = strtolower( $safeHeadline );
4476  if ( $fallbackHeadline === false ) {
4477  $fallbackArrayKey = false;
4478  } else {
4479  $fallbackArrayKey = strtolower( $fallbackHeadline );
4480  }
4481 
4482  # Create the anchor for linking from the TOC to the section
4483  $anchor = $safeHeadline;
4484  $fallbackAnchor = $fallbackHeadline;
4485  if ( isset( $refers[$arrayKey] ) ) {
4486  // phpcs:ignore Generic.Formatting.DisallowMultipleStatements
4487  for ( $i = 2; isset( $refers["${arrayKey}_$i"] ); ++$i );
4488  $anchor .= "_$i";
4489  $linkAnchor .= "_$i";
4490  $refers["${arrayKey}_$i"] = true;
4491  } else {
4492  $refers[$arrayKey] = true;
4493  }
4494  if ( $fallbackHeadline !== false && isset( $refers[$fallbackArrayKey] ) ) {
4495  // phpcs:ignore Generic.Formatting.DisallowMultipleStatements
4496  for ( $i = 2; isset( $refers["${fallbackArrayKey}_$i"] ); ++$i );
4497  $fallbackAnchor .= "_$i";
4498  $refers["${fallbackArrayKey}_$i"] = true;
4499  } else {
4500  $refers[$fallbackArrayKey] = true;
4501  }
4502 
4503  # Don't number the heading if it is the only one (looks silly)
4504  if ( count( $matches[3] ) > 1 && $this->mOptions->getNumberHeadings() ) {
4505  # the two are different if the line contains a link
4506  $headline = Html::element(
4507  'span',
4508  [ 'class' => 'mw-headline-number' ],
4509  $numbering
4510  ) . ' ' . $headline;
4511  }
4512 
4513  if ( $enoughToc && ( !isset( $maxTocLevel ) || $toclevel < $maxTocLevel ) ) {
4514  $toc .= Linker::tocLine( $linkAnchor, $tocline,
4515  $numbering, $toclevel, ( $isTemplate ? false : $sectionIndex ) );
4516  }
4517 
4518  # Add the section to the section tree
4519  # Find the DOM node for this header
4520  $noOffset = ( $isTemplate || $sectionIndex === false );
4521  while ( $node && !$noOffset ) {
4522  if ( $node->getName() === 'h' ) {
4523  $bits = $node->splitHeading();
4524  if ( $bits['i'] == $sectionIndex ) {
4525  break;
4526  }
4527  }
4528  $byteOffset += mb_strlen( $this->mStripState->unstripBoth(
4529  $frame->expand( $node, PPFrame::RECOVER_ORIG ) ) );
4530  $node = $node->getNextSibling();
4531  }
4532  $tocraw[] = [
4533  'toclevel' => $toclevel,
4534  'level' => $level,
4535  'line' => $tocline,
4536  'number' => $numbering,
4537  'index' => ( $isTemplate ? 'T-' : '' ) . $sectionIndex,
4538  'fromtitle' => $titleText,
4539  'byteoffset' => ( $noOffset ? null : $byteOffset ),
4540  'anchor' => $anchor,
4541  ];
4542 
4543  # give headline the correct <h#> tag
4544  if ( $maybeShowEditLink && $sectionIndex !== false ) {
4545  // Output edit section links as markers with styles that can be customized by skins
4546  if ( $isTemplate ) {
4547  # Put a T flag in the section identifier, to indicate to extractSections()
4548  # that sections inside <includeonly> should be counted.
4549  $editsectionPage = $titleText;
4550  $editsectionSection = "T-$sectionIndex";
4551  $editsectionContent = null;
4552  } else {
4553  $editsectionPage = $this->getTitle()->getPrefixedText();
4554  $editsectionSection = $sectionIndex;
4555  $editsectionContent = $headlineHint;
4556  }
4557  // We use a bit of pesudo-xml for editsection markers. The
4558  // language converter is run later on. Using a UNIQ style marker
4559  // leads to the converter screwing up the tokens when it
4560  // converts stuff. And trying to insert strip tags fails too. At
4561  // this point all real inputted tags have already been escaped,
4562  // so we don't have to worry about a user trying to input one of
4563  // these markers directly. We use a page and section attribute
4564  // to stop the language converter from converting these
4565  // important bits of data, but put the headline hint inside a
4566  // content block because the language converter is supposed to
4567  // be able to convert that piece of data.
4568  // Gets replaced with html in ParserOutput::getText
4569  $editlink = '<mw:editsection page="' . htmlspecialchars( $editsectionPage );
4570  $editlink .= '" section="' . htmlspecialchars( $editsectionSection ) . '"';
4571  if ( $editsectionContent !== null ) {
4572  $editlink .= '>' . $editsectionContent . '</mw:editsection>';
4573  } else {
4574  $editlink .= '/>';
4575  }
4576  } else {
4577  $editlink = '';
4578  }
4579  $head[$headlineCount] = Linker::makeHeadline( $level,
4580  $matches['attrib'][$headlineCount], $anchor, $headline,
4581  $editlink, $fallbackAnchor );
4582 
4583  $headlineCount++;
4584  }
4585 
4586  $this->setOutputType( $oldType );
4587 
4588  # Never ever show TOC if no headers
4589  if ( $numVisible < 1 ) {
4590  $enoughToc = false;
4591  }
4592 
4593  if ( $enoughToc ) {
4594  if ( $prevtoclevel > 0 && $prevtoclevel < $maxTocLevel ) {
4595  $toc .= Linker::tocUnindent( $prevtoclevel - 1 );
4596  }
4597  $toc = Linker::tocList( $toc, $this->mOptions->getUserLangObj() );
4598  $this->mOutput->setTOCHTML( $toc );
4599  $toc = self::TOC_START . $toc . self::TOC_END;
4600  }
4601 
4602  if ( $isMain ) {
4603  $this->mOutput->setSections( $tocraw );
4604  }
4605 
4606  # split up and insert constructed headlines
4607  $blocks = preg_split( '/<H[1-6].*?>[\s\S]*?<\/H[1-6]>/i', $text );
4608  $i = 0;
4609 
4610  // build an array of document sections
4611  $sections = [];
4612  foreach ( $blocks as $block ) {
4613  // $head is zero-based, sections aren't.
4614  if ( empty( $head[$i - 1] ) ) {
4615  $sections[$i] = $block;
4616  } else {
4617  $sections[$i] = $head[$i - 1] . $block;
4618  }
4619 
4630  Hooks::run( 'ParserSectionCreate', [ $this, $i, &$sections[$i], $maybeShowEditLink ] );
4631 
4632  $i++;
4633  }
4634 
4635  if ( $enoughToc && $isMain && !$this->mForceTocPosition ) {
4636  // append the TOC at the beginning
4637  // Top anchor now in skin
4638  $sections[0] .= $toc . "\n";
4639  }
4640 
4641  $full .= implode( '', $sections );
4642 
4643  if ( $this->mForceTocPosition ) {
4644  return str_replace( '<!--MWTOC\'"-->', $toc, $full );
4645  } else {
4646  return $full;
4647  }
4648  }
4649 
4661  public function preSaveTransform( $text, Title $title, User $user,
4662  ParserOptions $options, $clearState = true
4663  ) {
4664  if ( $clearState ) {
4665  $magicScopeVariable = $this->lock();
4666  }
4667  $this->startParse( $title, $options, self::OT_WIKI, $clearState );
4668  $this->setUser( $user );
4669 
4670  // Strip U+0000 NULL (T159174)
4671  $text = str_replace( "\000", '', $text );
4672 
4673  // We still normalize line endings for backwards-compatibility
4674  // with other code that just calls PST, but this should already
4675  // be handled in TextContent subclasses
4676  $text = TextContent::normalizeLineEndings( $text );
4677 
4678  if ( $options->getPreSaveTransform() ) {
4679  $text = $this->pstPass2( $text, $user );
4680  }
4681  $text = $this->mStripState->unstripBoth( $text );
4682 
4683  $this->setUser( null ); # Reset
4684 
4685  return $text;
4686  }
4687 
4696  private function pstPass2( $text, $user ) {
4697  # Note: This is the timestamp saved as hardcoded wikitext to the database, we use
4698  # $this->contLang here in order to give everyone the same signature and use the default one
4699  # rather than the one selected in each user's preferences. (see also T14815)
4700  $ts = $this->mOptions->getTimestamp();
4701  $timestamp = MWTimestamp::getLocalInstance( $ts );
4702  $ts = $timestamp->format( 'YmdHis' );
4703  $tzMsg = $timestamp->getTimezoneMessage()->inContentLanguage()->text();
4704 
4705  $d = $this->contLang->timeanddate( $ts, false, false ) . " ($tzMsg)";
4706 
4707  # Variable replacement
4708  # Because mOutputType is OT_WIKI, this will only process {{subst:xxx}} type tags
4709  $text = $this->replaceVariables( $text );
4710 
4711  # This works almost by chance, as the replaceVariables are done before the getUserSig(),
4712  # which may corrupt this parser instance via its wfMessage()->text() call-
4713 
4714  # Signatures
4715  if ( strpos( $text, '~~~' ) !== false ) {
4716  $sigText = $this->getUserSig( $user );
4717  $text = strtr( $text, [
4718  '~~~~~' => $d,
4719  '~~~~' => "$sigText $d",
4720  '~~~' => $sigText
4721  ] );
4722  # The main two signature forms used above are time-sensitive
4723  $this->setOutputFlag( 'user-signature', 'User signature detected' );
4724  }
4725 
4726  # Context links ("pipe tricks"): [[|name]] and [[name (context)|]]
4727  $tc = '[' . Title::legalChars() . ']';
4728  $nc = '[ _0-9A-Za-z\x80-\xff-]'; # Namespaces can use non-ascii!
4729 
4730  // [[ns:page (context)|]]
4731  $p1 = "/\[\[(:?$nc+:|:|)($tc+?)( ?\\($tc+\\))\\|]]/";
4732  // [[ns:page(context)|]] (double-width brackets, added in r40257)
4733  $p4 = "/\[\[(:?$nc+:|:|)($tc+?)( ?($tc+))\\|]]/";
4734  // [[ns:page (context), context|]] (using either single or double-width comma)
4735  $p3 = "/\[\[(:?$nc+:|:|)($tc+?)( ?\\($tc+\\)|)((?:, |,)$tc+|)\\|]]/";
4736  // [[|page]] (reverse pipe trick: add context from page title)
4737  $p2 = "/\[\[\\|($tc+)]]/";
4738 
4739  # try $p1 first, to turn "[[A, B (C)|]]" into "[[A, B (C)|A, B]]"
4740  $text = preg_replace( $p1, '[[\\1\\2\\3|\\2]]', $text );
4741  $text = preg_replace( $p4, '[[\\1\\2\\3|\\2]]', $text );
4742  $text = preg_replace( $p3, '[[\\1\\2\\3\\4|\\2]]', $text );
4743 
4744  $t = $this->getTitle()->getText();
4745  $m = [];
4746  if ( preg_match( "/^($nc+:|)$tc+?( \\($tc+\\))$/", $t, $m ) ) {
4747  $text = preg_replace( $p2, "[[$m[1]\\1$m[2]|\\1]]", $text );
4748  } elseif ( preg_match( "/^($nc+:|)$tc+?(, $tc+|)$/", $t, $m ) && "$m[1]$m[2]" != '' ) {
4749  $text = preg_replace( $p2, "[[$m[1]\\1$m[2]|\\1]]", $text );
4750  } else {
4751  # if there's no context, don't bother duplicating the title
4752  $text = preg_replace( $p2, '[[\\1]]', $text );
4753  }
4754 
4755  return $text;
4756  }
4757 
4772  public function getUserSig( &$user, $nickname = false, $fancySig = null ) {
4773  $username = $user->getName();
4774 
4775  # If not given, retrieve from the user object.
4776  if ( $nickname === false ) {
4777  $nickname = $user->getOption( 'nickname' );
4778  }
4779 
4780  if ( is_null( $fancySig ) ) {
4781  $fancySig = $user->getBoolOption( 'fancysig' );
4782  }
4783 
4784  $nickname = $nickname == null ? $username : $nickname;
4785 
4786  if ( mb_strlen( $nickname ) > $this->svcOptions->get( 'MaxSigChars' ) ) {
4787  $nickname = $username;
4788  $this->logger->debug( __METHOD__ . ": $username has overlong signature." );
4789  } elseif ( $fancySig !== false ) {
4790  # Sig. might contain markup; validate this
4791  if ( $this->validateSig( $nickname ) !== false ) {
4792  # Validated; clean up (if needed) and return it
4793  return $this->cleanSig( $nickname, true );
4794  } else {
4795  # Failed to validate; fall back to the default
4796  $nickname = $username;
4797  $this->logger->debug( __METHOD__ . ": $username has bad XML tags in signature." );
4798  }
4799  }
4800 
4801  # Make sure nickname doesnt get a sig in a sig
4802  $nickname = self::cleanSigInSig( $nickname );
4803 
4804  # If we're still here, make it a link to the user page
4805  $userText = wfEscapeWikiText( $username );
4806  $nickText = wfEscapeWikiText( $nickname );
4807  $msgName = $user->isAnon() ? 'signature-anon' : 'signature';
4808 
4809  return wfMessage( $msgName, $userText, $nickText )->inContentLanguage()
4810  ->title( $this->getTitle() )->text();
4811  }
4812 
4819  public function validateSig( $text ) {
4820  return Xml::isWellFormedXmlFragment( $text ) ? $text : false;
4821  }
4822 
4833  public function cleanSig( $text, $parsing = false ) {
4834  if ( !$parsing ) {
4835  global $wgTitle;
4836  $magicScopeVariable = $this->lock();
4837  $this->startParse( $wgTitle, new ParserOptions, self::OT_PREPROCESS, true );
4838  }
4839 
4840  # Option to disable this feature
4841  if ( !$this->mOptions->getCleanSignatures() ) {
4842  return $text;
4843  }
4844 
4845  # @todo FIXME: Regex doesn't respect extension tags or nowiki
4846  # => Move this logic to braceSubstitution()
4847  $substWord = $this->magicWordFactory->get( 'subst' );
4848  $substRegex = '/\{\{(?!(?:' . $substWord->getBaseRegex() . '))/x' . $substWord->getRegexCase();
4849  $substText = '{{' . $substWord->getSynonym( 0 );
4850 
4851  $text = preg_replace( $substRegex, $substText, $text );
4852  $text = self::cleanSigInSig( $text );
4853  $dom = $this->preprocessToDom( $text );
4854  $frame = $this->getPreprocessor()->newFrame();
4855  $text = $frame->expand( $dom );
4856 
4857  if ( !$parsing ) {
4858  $text = $this->mStripState->unstripBoth( $text );
4859  }
4860 
4861  return $text;
4862  }
4863 
4870  public static function cleanSigInSig( $text ) {
4871  $text = preg_replace( '/~{3,5}/', '', $text );
4872  return $text;
4873  }
4874 
4885  public function startExternalParse( ?Title $title, ParserOptions $options,
4886  $outputType, $clearState = true, $revId = null
4887  ) {
4888  $this->startParse( $title, $options, $outputType, $clearState );
4889  if ( $revId !== null ) {
4890  $this->mRevisionId = $revId;
4891  }
4892  }
4893 
4900  private function startParse( ?Title $title, ParserOptions $options,
4901  $outputType, $clearState = true
4902  ) {
4903  $this->setTitle( $title );
4904  $this->mOptions = $options;
4905  $this->setOutputType( $outputType );
4906  if ( $clearState ) {
4907  $this->clearState();
4908  }
4909  }
4910 
4919  public function transformMsg( $text, $options, $title = null ) {
4920  static $executing = false;
4921 
4922  # Guard against infinite recursion
4923  if ( $executing ) {
4924  return $text;
4925  }
4926  $executing = true;
4927 
4928  if ( !$title ) {
4929  global $wgTitle;
4930  $title = $wgTitle;
4931  }
4932 
4933  $text = $this->preprocess( $text, $title, $options );
4934 
4935  $executing = false;
4936  return $text;
4937  }
4938 
4963  public function setHook( $tag, callable $callback ) {
4964  $tag = strtolower( $tag );
4965  if ( preg_match( '/[<>\r\n]/', $tag, $m ) ) {
4966  throw new MWException( "Invalid character {$m[0]} in setHook('$tag', ...) call" );
4967  }
4968  $oldVal = $this->mTagHooks[$tag] ?? null;
4969  $this->mTagHooks[$tag] = $callback;
4970  if ( !in_array( $tag, $this->mStripList ) ) {
4971  $this->mStripList[] = $tag;
4972  }
4973 
4974  return $oldVal;
4975  }
4976 
4994  public function setTransparentTagHook( $tag, callable $callback ) {
4995  $tag = strtolower( $tag );
4996  if ( preg_match( '/[<>\r\n]/', $tag, $m ) ) {
4997  throw new MWException( "Invalid character {$m[0]} in setTransparentHook('$tag', ...) call" );
4998  }
4999  $oldVal = $this->mTransparentTagHooks[$tag] ?? null;
5000  $this->mTransparentTagHooks[$tag] = $callback;
5001 
5002  return $oldVal;
5003  }
5004 
5008  public function clearTagHooks() {
5009  $this->mTagHooks = [];
5010  $this->mFunctionTagHooks = [];
5011  $this->mStripList = $this->mDefaultStripList;
5012  }
5013 
5057  public function setFunctionHook( $id, callable $callback, $flags = 0 ) {
5058  $oldVal = isset( $this->mFunctionHooks[$id] ) ? $this->mFunctionHooks[$id][0] : null;
5059  $this->mFunctionHooks[$id] = [ $callback, $flags ];
5060 
5061  # Add to function cache
5062  $mw = $this->magicWordFactory->get( $id );
5063  if ( !$mw ) {
5064  throw new MWException( __METHOD__ . '() expecting a magic word identifier.' );
5065  }
5066 
5067  $synonyms = $mw->getSynonyms();
5068  $sensitive = intval( $mw->isCaseSensitive() );
5069 
5070  foreach ( $synonyms as $syn ) {
5071  # Case
5072  if ( !$sensitive ) {
5073  $syn = $this->contLang->lc( $syn );
5074  }
5075  # Add leading hash
5076  if ( !( $flags & self::SFH_NO_HASH ) ) {
5077  $syn = '#' . $syn;
5078  }
5079  # Remove trailing colon
5080  if ( substr( $syn, -1, 1 ) === ':' ) {
5081  $syn = substr( $syn, 0, -1 );
5082  }
5083  $this->mFunctionSynonyms[$sensitive][$syn] = $id;
5084  }
5085  return $oldVal;
5086  }
5087 
5093  public function getFunctionHooks() {
5094  $this->firstCallInit();
5095  return array_keys( $this->mFunctionHooks );
5096  }
5097 
5108  public function setFunctionTagHook( $tag, callable $callback, $flags ) {
5109  $tag = strtolower( $tag );
5110  if ( preg_match( '/[<>\r\n]/', $tag, $m ) ) {
5111  throw new MWException( "Invalid character {$m[0]} in setFunctionTagHook('$tag', ...) call" );
5112  }
5113  $old = $this->mFunctionTagHooks[$tag] ?? null;
5114  $this->mFunctionTagHooks[$tag] = [ $callback, $flags ];
5115 
5116  if ( !in_array( $tag, $this->mStripList ) ) {
5117  $this->mStripList[] = $tag;
5118  }
5119 
5120  return $old;
5121  }
5122 
5130  public function replaceLinkHolders( &$text, $options = 0 ) {
5131  $this->mLinkHolders->replace( $text );
5132  }
5133 
5141  public function replaceLinkHoldersText( $text ) {
5142  return $this->mLinkHolders->replaceText( $text );
5143  }
5144 
5158  public function renderImageGallery( $text, $params ) {
5159  $mode = false;
5160  if ( isset( $params['mode'] ) ) {
5161  $mode = $params['mode'];
5162  }
5163 
5164  try {
5165  $ig = ImageGalleryBase::factory( $mode );
5166  } catch ( Exception $e ) {
5167  // If invalid type set, fallback to default.
5168  $ig = ImageGalleryBase::factory( false );
5169  }
5170 
5171  $ig->setContextTitle( $this->getTitle() );
5172  $ig->setShowBytes( false );
5173  $ig->setShowDimensions( false );
5174  $ig->setShowFilename( false );
5175  $ig->setParser( $this );
5176  $ig->setHideBadImages();
5177  $ig->setAttributes( Sanitizer::validateTagAttributes( $params, 'ul' ) );
5178 
5179  if ( isset( $params['showfilename'] ) ) {
5180  $ig->setShowFilename( true );
5181  } else {
5182  $ig->setShowFilename( false );
5183  }
5184  if ( isset( $params['caption'] ) ) {
5185  // NOTE: We aren't passing a frame here or below. Frame info
5186  // is currently opaque to Parsoid, which acts on OT_PREPROCESS.
5187  // See T107332#4030581
5188  $caption = $this->recursiveTagParse( $params['caption'] );
5189  $ig->setCaptionHtml( $caption );
5190  }
5191  if ( isset( $params['perrow'] ) ) {
5192  $ig->setPerRow( $params['perrow'] );
5193  }
5194  if ( isset( $params['widths'] ) ) {
5195  $ig->setWidths( $params['widths'] );
5196  }
5197  if ( isset( $params['heights'] ) ) {
5198  $ig->setHeights( $params['heights'] );
5199  }
5200  $ig->setAdditionalOptions( $params );
5201 
5202  // Avoid PHP 7.1 warning from passing $this by reference
5203  $parser = $this;
5204  Hooks::run( 'BeforeParserrenderImageGallery', [ &$parser, &$ig ] );
5205 
5206  $lines = StringUtils::explode( "\n", $text );
5207  foreach ( $lines as $line ) {
5208  # match lines like these:
5209  # Image:someimage.jpg|This is some image
5210  $matches = [];
5211  preg_match( "/^([^|]+)(\\|(.*))?$/", $line, $matches );
5212  # Skip empty lines
5213  if ( count( $matches ) == 0 ) {
5214  continue;
5215  }
5216 
5217  if ( strpos( $matches[0], '%' ) !== false ) {
5218  $matches[1] = rawurldecode( $matches[1] );
5219  }
5221  if ( is_null( $title ) ) {
5222  # Bogus title. Ignore these so we don't bomb out later.
5223  continue;
5224  }
5225 
5226  # We need to get what handler the file uses, to figure out parameters.
5227  # Note, a hook can overide the file name, and chose an entirely different
5228  # file (which potentially could be of a different type and have different handler).
5229  $options = [];
5230  $descQuery = false;
5231  Hooks::run( 'BeforeParserFetchFileAndTitle',
5232  [ $this, $title, &$options, &$descQuery ] );
5233  # Don't register it now, as TraditionalImageGallery does that later.
5234  $file = $this->fetchFileNoRegister( $title, $options );
5235  $handler = $file ? $file->getHandler() : false;
5236 
5237  $paramMap = [
5238  'img_alt' => 'gallery-internal-alt',
5239  'img_link' => 'gallery-internal-link',
5240  ];
5241  if ( $handler ) {
5242  $paramMap += $handler->getParamMap();
5243  // We don't want people to specify per-image widths.
5244  // Additionally the width parameter would need special casing anyhow.
5245  unset( $paramMap['img_width'] );
5246  }
5247 
5248  $mwArray = $this->magicWordFactory->newArray( array_keys( $paramMap ) );
5249 
5250  $label = '';
5251  $alt = '';
5252  $link = '';
5253  $handlerOptions = [];
5254  if ( isset( $matches[3] ) ) {
5255  // look for an |alt= definition while trying not to break existing
5256  // captions with multiple pipes (|) in it, until a more sensible grammar
5257  // is defined for images in galleries
5258 
5259  // FIXME: Doing recursiveTagParse at this stage, and the trim before
5260  // splitting on '|' is a bit odd, and different from makeImage.
5261  $matches[3] = $this->recursiveTagParse( trim( $matches[3] ) );
5262  // Protect LanguageConverter markup
5263  $parameterMatches = StringUtils::delimiterExplode(
5264  '-{', '}-', '|', $matches[3], true /* nested */
5265  );
5266 
5267  foreach ( $parameterMatches as $parameterMatch ) {
5268  list( $magicName, $match ) = $mwArray->matchVariableStartToEnd( $parameterMatch );
5269  if ( $magicName ) {
5270  $paramName = $paramMap[$magicName];
5271 
5272  switch ( $paramName ) {
5273  case 'gallery-internal-alt':
5274  $alt = $this->stripAltText( $match, false );
5275  break;
5276  case 'gallery-internal-link':
5277  $linkValue = $this->stripAltText( $match, false );
5278  if ( preg_match( '/^-{R|(.*)}-$/', $linkValue ) ) {
5279  // Result of LanguageConverter::markNoConversion
5280  // invoked on an external link.
5281  $linkValue = substr( $linkValue, 4, -2 );
5282  }
5283  list( $type, $target ) = $this->parseLinkParameter( $linkValue );
5284  if ( $type === 'link-url' ) {
5285  $link = $target;
5286  $this->mOutput->addExternalLink( $target );
5287  } elseif ( $type === 'link-title' ) {
5288  $link = $target->getLinkURL();
5289  $this->mOutput->addLink( $target );
5290  }
5291  break;
5292  default:
5293  // Must be a handler specific parameter.
5294  if ( $handler->validateParam( $paramName, $match ) ) {
5295  $handlerOptions[$paramName] = $match;
5296  } else {
5297  // Guess not, consider it as caption.
5298  $this->logger->debug(
5299  "$parameterMatch failed parameter validation" );
5300  $label = $parameterMatch;
5301  }
5302  }
5303 
5304  } else {
5305  // Last pipe wins.
5306  $label = $parameterMatch;
5307  }
5308  }
5309  }
5310 
5311  $ig->add( $title, $label, $alt, $link, $handlerOptions );
5312  }
5313  $html = $ig->toHTML();
5314  Hooks::run( 'AfterParserFetchFileAndTitle', [ $this, $ig, &$html ] );
5315  return $html;
5316  }
5317 
5322  public function getImageParams( $handler ) {
5323  if ( $handler ) {
5324  $handlerClass = get_class( $handler );
5325  } else {
5326  $handlerClass = '';
5327  }
5328  if ( !isset( $this->mImageParams[$handlerClass] ) ) {
5329  # Initialise static lists
5330  static $internalParamNames = [
5331  'horizAlign' => [ 'left', 'right', 'center', 'none' ],
5332  'vertAlign' => [ 'baseline', 'sub', 'super', 'top', 'text-top', 'middle',
5333  'bottom', 'text-bottom' ],
5334  'frame' => [ 'thumbnail', 'manualthumb', 'framed', 'frameless',
5335  'upright', 'border', 'link', 'alt', 'class' ],
5336  ];
5337  static $internalParamMap;
5338  if ( !$internalParamMap ) {
5339  $internalParamMap = [];
5340  foreach ( $internalParamNames as $type => $names ) {
5341  foreach ( $names as $name ) {
5342  // For grep: img_left, img_right, img_center, img_none,
5343  // img_baseline, img_sub, img_super, img_top, img_text_top, img_middle,
5344  // img_bottom, img_text_bottom,
5345  // img_thumbnail, img_manualthumb, img_framed, img_frameless, img_upright,
5346  // img_border, img_link, img_alt, img_class
5347  $magicName = str_replace( '-', '_', "img_$name" );
5348  $internalParamMap[$magicName] = [ $type, $name ];
5349  }
5350  }
5351  }
5352 
5353  # Add handler params
5354  $paramMap = $internalParamMap;
5355  if ( $handler ) {
5356  $handlerParamMap = $handler->getParamMap();
5357  foreach ( $handlerParamMap as $magic => $paramName ) {
5358  $paramMap[$magic] = [ 'handler', $paramName ];
5359  }
5360  }
5361  $this->mImageParams[$handlerClass] = $paramMap;
5362  $this->mImageParamsMagicArray[$handlerClass] =
5363  $this->magicWordFactory->newArray( array_keys( $paramMap ) );
5364  }
5365  return [ $this->mImageParams[$handlerClass], $this->mImageParamsMagicArray[$handlerClass] ];
5366  }
5367 
5376  public function makeImage( $title, $options, $holders = false ) {
5377  # Check if the options text is of the form "options|alt text"
5378  # Options are:
5379  # * thumbnail make a thumbnail with enlarge-icon and caption, alignment depends on lang
5380  # * left no resizing, just left align. label is used for alt= only
5381  # * right same, but right aligned
5382  # * none same, but not aligned
5383  # * ___px scale to ___ pixels width, no aligning. e.g. use in taxobox
5384  # * center center the image
5385  # * frame Keep original image size, no magnify-button.
5386  # * framed Same as "frame"
5387  # * frameless like 'thumb' but without a frame. Keeps user preferences for width
5388  # * upright reduce width for upright images, rounded to full __0 px
5389  # * border draw a 1px border around the image
5390  # * alt Text for HTML alt attribute (defaults to empty)
5391  # * class Set a class for img node
5392  # * link Set the target of the image link. Can be external, interwiki, or local
5393  # vertical-align values (no % or length right now):
5394  # * baseline
5395  # * sub
5396  # * super
5397  # * top
5398  # * text-top
5399  # * middle
5400  # * bottom
5401  # * text-bottom
5402 
5403  # Protect LanguageConverter markup when splitting into parts
5405  '-{', '}-', '|', $options, true /* allow nesting */
5406  );
5407 
5408  # Give extensions a chance to select the file revision for us
5409  $options = [];
5410  $descQuery = false;
5411  Hooks::run( 'BeforeParserFetchFileAndTitle',
5412  [ $this, $title, &$options, &$descQuery ] );
5413  # Fetch and register the file (file title may be different via hooks)
5414  list( $file, $title ) = $this->fetchFileAndTitle( $title, $options );
5415 
5416  # Get parameter map
5417  $handler = $file ? $file->getHandler() : false;
5418 
5419  list( $paramMap, $mwArray ) = $this->getImageParams( $handler );
5420 
5421  if ( !$file ) {
5422  $this->addTrackingCategory( 'broken-file-category' );
5423  }
5424 
5425  # Process the input parameters
5426  $caption = '';
5427  $params = [ 'frame' => [], 'handler' => [],
5428  'horizAlign' => [], 'vertAlign' => [] ];
5429  $seenformat = false;
5430  foreach ( $parts as $part ) {
5431  $part = trim( $part );
5432  list( $magicName, $value ) = $mwArray->matchVariableStartToEnd( $part );
5433  $validated = false;
5434  if ( isset( $paramMap[$magicName] ) ) {
5435  list( $type, $paramName ) = $paramMap[$magicName];
5436 
5437  # Special case; width and height come in one variable together
5438  if ( $type === 'handler' && $paramName === 'width' ) {
5439  $parsedWidthParam = self::parseWidthParam( $value );
5440  if ( isset( $parsedWidthParam['width'] ) ) {
5441  $width = $parsedWidthParam['width'];
5442  if ( $handler->validateParam( 'width', $width ) ) {
5443  $params[$type]['width'] = $width;
5444  $validated = true;
5445  }
5446  }
5447  if ( isset( $parsedWidthParam['height'] ) ) {
5448  $height = $parsedWidthParam['height'];
5449  if ( $handler->validateParam( 'height', $height ) ) {
5450  $params[$type]['height'] = $height;
5451  $validated = true;
5452  }
5453  }
5454  # else no validation -- T15436
5455  } else {
5456  if ( $type === 'handler' ) {
5457  # Validate handler parameter
5458  $validated = $handler->validateParam( $paramName, $value );
5459  } else {
5460  # Validate internal parameters
5461  switch ( $paramName ) {
5462  case 'manualthumb':
5463  case 'alt':
5464  case 'class':
5465  # @todo FIXME: Possibly check validity here for
5466  # manualthumb? downstream behavior seems odd with
5467  # missing manual thumbs.
5468  $validated = true;
5469  $value = $this->stripAltText( $value, $holders );
5470  break;
5471  case 'link':
5472  list( $paramName, $value ) =
5473  $this->parseLinkParameter(
5474  $this->stripAltText( $value, $holders )
5475  );
5476  if ( $paramName ) {
5477  $validated = true;
5478  if ( $paramName === 'no-link' ) {
5479  $value = true;
5480  }
5481  if ( ( $paramName === 'link-url' ) && $this->mOptions->getExternalLinkTarget() ) {
5482  $params[$type]['link-target'] = $this->mOptions->getExternalLinkTarget();
5483  }
5484  }
5485  break;
5486  case 'frameless':
5487  case 'framed':
5488  case 'thumbnail':
5489  // use first appearing option, discard others.
5490  $validated = !$seenformat;
5491  $seenformat = true;
5492  break;
5493  default:
5494  # Most other things appear to be empty or numeric...
5495  $validated = ( $value === false || is_numeric( trim( $value ) ) );
5496  }
5497  }
5498 
5499  if ( $validated ) {
5500  $params[$type][$paramName] = $value;
5501  }
5502  }
5503  }
5504  if ( !$validated ) {
5505  $caption = $part;
5506  }
5507  }
5508 
5509  # Process alignment parameters
5510  if ( $params['horizAlign'] ) {
5511  $params['frame']['align'] = key( $params['horizAlign'] );
5512  }
5513  if ( $params['vertAlign'] ) {
5514  $params['frame']['valign'] = key( $params['vertAlign'] );
5515  }
5516 
5517  $params['frame']['caption'] = $caption;
5518 
5519  # Will the image be presented in a frame, with the caption below?
5520  $imageIsFramed = isset( $params['frame']['frame'] )
5521  || isset( $params['frame']['framed'] )
5522  || isset( $params['frame']['thumbnail'] )
5523  || isset( $params['frame']['manualthumb'] );
5524 
5525  # In the old days, [[Image:Foo|text...]] would set alt text. Later it
5526  # came to also set the caption, ordinary text after the image -- which
5527  # makes no sense, because that just repeats the text multiple times in
5528  # screen readers. It *also* came to set the title attribute.
5529  # Now that we have an alt attribute, we should not set the alt text to
5530  # equal the caption: that's worse than useless, it just repeats the
5531  # text. This is the framed/thumbnail case. If there's no caption, we
5532  # use the unnamed parameter for alt text as well, just for the time be-
5533  # ing, if the unnamed param is set and the alt param is not.
5534  # For the future, we need to figure out if we want to tweak this more,
5535  # e.g., introducing a title= parameter for the title; ignoring the un-
5536  # named parameter entirely for images without a caption; adding an ex-
5537  # plicit caption= parameter and preserving the old magic unnamed para-
5538  # meter for BC; ...
5539  if ( $imageIsFramed ) { # Framed image
5540  if ( $caption === '' && !isset( $params['frame']['alt'] ) ) {
5541  # No caption or alt text, add the filename as the alt text so
5542  # that screen readers at least get some description of the image
5543  $params['frame']['alt'] = $title->getText();
5544  }
5545  # Do not set $params['frame']['title'] because tooltips don't make sense
5546  # for framed images
5547  } else { # Inline image
5548  if ( !isset( $params['frame']['alt'] ) ) {
5549  # No alt text, use the "caption" for the alt text
5550  if ( $caption !== '' ) {
5551  $params['frame']['alt'] = $this->stripAltText( $caption, $holders );
5552  } else {
5553  # No caption, fall back to using the filename for the
5554  # alt text
5555  $params['frame']['alt'] = $title->getText();
5556  }
5557  }
5558  # Use the "caption" for the tooltip text
5559  $params['frame']['title'] = $this->stripAltText( $caption, $holders );
5560  }
5561  $params['handler']['targetlang'] = $this->getTargetLanguage()->getCode();
5562 
5563  Hooks::run( 'ParserMakeImageParams', [ $title, $file, &$params, $this ] );
5564 
5565  # Linker does the rest
5566  $time = $options['time'] ?? false;
5567  $ret = Linker::makeImageLink( $this, $title, $file, $params['frame'], $params['handler'],
5568  $time, $descQuery, $this->mOptions->getThumbSize() );
5569 
5570  # Give the handler a chance to modify the parser object
5571  if ( $handler ) {
5572  $handler->parserTransformHook( $this, $file );
5573  }
5574 
5575  return $ret;
5576  }
5577 
5596  public function parseLinkParameter( $value ) {
5597  $chars = self::EXT_LINK_URL_CLASS;
5598  $addr = self::EXT_LINK_ADDR;
5599  $prots = $this->mUrlProtocols;
5600  $type = null;
5601  $target = false;
5602  if ( $value === '' ) {
5603  $type = 'no-link';
5604  } elseif ( preg_match( "/^((?i)$prots)/", $value ) ) {
5605  if ( preg_match( "/^((?i)$prots)$addr$chars*$/u", $value, $m ) ) {
5606  $this->mOutput->addExternalLink( $value );
5607  $type = 'link-url';
5608  $target = $value;
5609  }
5610  } else {
5611  $linkTitle = Title::newFromText( $value );
5612  if ( $linkTitle ) {
5613  $this->mOutput->addLink( $linkTitle );
5614  $type = 'link-title';
5615  $target = $linkTitle;
5616  }
5617  }
5618  return [ $type, $target ];
5619  }
5620 
5626  protected function stripAltText( $caption, $holders ) {
5627  # Strip bad stuff out of the title (tooltip). We can't just use
5628  # replaceLinkHoldersText() here, because if this function is called
5629  # from replaceInternalLinks2(), mLinkHolders won't be up-to-date.
5630  if ( $holders ) {
5631  $tooltip = $holders->replaceText( $caption );
5632  } else {
5633  $tooltip = $this->replaceLinkHoldersText( $caption );
5634  }
5635 
5636  # make sure there are no placeholders in thumbnail attributes
5637  # that are later expanded to html- so expand them now and
5638  # remove the tags
5639  $tooltip = $this->mStripState->unstripBoth( $tooltip );
5640  # Compatibility hack! In HTML certain entity references not terminated
5641  # by a semicolon are decoded (but not if we're in an attribute; that's
5642  # how link URLs get away without properly escaping & in queries).
5643  # But wikitext has always required semicolon-termination of entities,
5644  # so encode & where needed to avoid decode of semicolon-less entities.
5645  # See T209236 and
5646  # https://www.w3.org/TR/html5/syntax.html#named-character-references
5647  # T210437 discusses moving this workaround to Sanitizer::stripAllTags.
5648  $tooltip = preg_replace( "/
5649  & # 1. entity prefix
5650  (?= # 2. followed by:
5651  (?: # a. one of the legacy semicolon-less named entities
5652  A(?:Elig|MP|acute|circ|grave|ring|tilde|uml)|
5653  C(?:OPY|cedil)|E(?:TH|acute|circ|grave|uml)|
5654  GT|I(?:acute|circ|grave|uml)|LT|Ntilde|
5655  O(?:acute|circ|grave|slash|tilde|uml)|QUOT|REG|THORN|
5656  U(?:acute|circ|grave|uml)|Yacute|
5657  a(?:acute|c(?:irc|ute)|elig|grave|mp|ring|tilde|uml)|brvbar|
5658  c(?:cedil|edil|urren)|cent(?!erdot;)|copy(?!sr;)|deg|
5659  divide(?!ontimes;)|e(?:acute|circ|grave|th|uml)|
5660  frac(?:1(?:2|4)|34)|
5661  gt(?!c(?:c|ir)|dot|lPar|quest|r(?:a(?:pprox|rr)|dot|eq(?:less|qless)|less|sim);)|
5662  i(?:acute|circ|excl|grave|quest|uml)|laquo|
5663  lt(?!c(?:c|ir)|dot|hree|imes|larr|quest|r(?:Par|i(?:e|f|));)|
5664  m(?:acr|i(?:cro|ddot))|n(?:bsp|tilde)|
5665  not(?!in(?:E|dot|v(?:a|b|c)|)|ni(?:v(?:a|b|c)|);)|
5666  o(?:acute|circ|grave|rd(?:f|m)|slash|tilde|uml)|
5667  p(?:lusmn|ound)|para(?!llel;)|quot|r(?:aquo|eg)|
5668  s(?:ect|hy|up(?:1|2|3)|zlig)|thorn|times(?!b(?:ar|)|d;)|
5669  u(?:acute|circ|grave|ml|uml)|y(?:acute|en|uml)
5670  )
5671  (?:[^;]|$)) # b. and not followed by a semicolon
5672  # S = study, for efficiency
5673  /Sx", '&amp;', $tooltip );
5674  $tooltip = Sanitizer::stripAllTags( $tooltip );
5675 
5676  return $tooltip;
5677  }
5678 
5684  public function disableCache() {
5685  $this->logger->debug( "Parser output marked as uncacheable." );
5686  if ( !$this->mOutput ) {
5687  throw new MWException( __METHOD__ .
5688  " can only be called when actually parsing something" );
5689  }
5690  $this->mOutput->updateCacheExpiry( 0 ); // new style, for consistency
5691  }
5692 
5701  public function attributeStripCallback( &$text, $frame = false ) {
5702  $text = $this->replaceVariables( $text, $frame );
5703  $text = $this->mStripState->unstripBoth( $text );
5704  return $text;
5705  }
5706 
5712  public function getTags() {
5713  $this->firstCallInit();
5714  return array_merge(
5715  array_keys( $this->mTransparentTagHooks ),
5716  array_keys( $this->mTagHooks ),
5717  array_keys( $this->mFunctionTagHooks )
5718  );
5719  }
5720 
5725  public function getFunctionSynonyms() {
5726  $this->firstCallInit();
5727  return $this->mFunctionSynonyms;
5728  }
5729 
5734  public function getUrlProtocols() {
5735  return $this->mUrlProtocols;
5736  }
5737 
5748  public function replaceTransparentTags( $text ) {
5749  $matches = [];
5750  $elements = array_keys( $this->mTransparentTagHooks );
5751  $text = self::extractTagsAndParams( $elements, $text, $matches );
5752  $replacements = [];
5753 
5754  foreach ( $matches as $marker => $data ) {
5755  list( $element, $content, $params, $tag ) = $data;
5756  $tagName = strtolower( $element );
5757  if ( isset( $this->mTransparentTagHooks[$tagName] ) ) {
5758  $output = call_user_func_array(
5759  $this->mTransparentTagHooks[$tagName],
5760  [ $content, $params, $this ]
5761  );
5762  } else {
5763  $output = $tag;
5764  }
5765  $replacements[$marker] = $output;
5766  }
5767  return strtr( $text, $replacements );
5768  }
5769 
5799  private function extractSections( $text, $sectionId, $mode, $newText = '' ) {
5800  global $wgTitle; # not generally used but removes an ugly failure mode
5801 
5802  $magicScopeVariable = $this->lock();
5803  $this->startParse( $wgTitle, new ParserOptions, self::OT_PLAIN, true );
5804  $outText = '';
5805  $frame = $this->getPreprocessor()->newFrame();
5806 
5807  # Process section extraction flags
5808  $flags = 0;
5809  $sectionParts = explode( '-', $sectionId );
5810  $sectionIndex = array_pop( $sectionParts );
5811  foreach ( $sectionParts as $part ) {
5812  if ( $part === 'T' ) {
5813  $flags |= self::PTD_FOR_INCLUSION;
5814  }
5815  }
5816 
5817  # Check for empty input
5818  if ( strval( $text ) === '' ) {
5819  # Only sections 0 and T-0 exist in an empty document
5820  if ( $sectionIndex == 0 ) {
5821  if ( $mode === 'get' ) {
5822  return '';
5823  }
5824 
5825  return $newText;
5826  } else {
5827  if ( $mode === 'get' ) {
5828  return $newText;
5829  }
5830 
5831  return $text;
5832  }
5833  }
5834 
5835  # Preprocess the text
5836  $root = $this->preprocessToDom( $text, $flags );
5837 
5838  # <h> nodes indicate section breaks
5839  # They can only occur at the top level, so we can find them by iterating the root's children
5840  $node = $root->getFirstChild();
5841 
5842  # Find the target section
5843  if ( $sectionIndex == 0 ) {
5844  # Section zero doesn't nest, level=big
5845  $targetLevel = 1000;
5846  } else {
5847  while ( $node ) {
5848  if ( $node->getName() === 'h' ) {
5849  $bits = $node->splitHeading();
5850  if ( $bits['i'] == $sectionIndex ) {
5851  $targetLevel = $bits['level'];
5852  break;
5853  }
5854  }
5855  if ( $mode === 'replace' ) {
5856  $outText .= $frame->expand( $node, PPFrame::RECOVER_ORIG );
5857  }
5858  $node = $node->getNextSibling();
5859  }
5860  }
5861 
5862  if ( !$node ) {
5863  # Not found
5864  if ( $mode === 'get' ) {
5865  return $newText;
5866  } else {
5867  return $text;
5868  }
5869  }
5870 
5871  # Find the end of the section, including nested sections
5872  do {
5873  if ( $node->getName() === 'h' ) {
5874  $bits = $node->splitHeading();
5875  $curLevel = $bits['level'];
5876  if ( $bits['i'] != $sectionIndex && $curLevel <= $targetLevel ) {
5877  break;
5878  }
5879  }
5880  if ( $mode === 'get' ) {
5881  $outText .= $frame->expand( $node, PPFrame::RECOVER_ORIG );
5882  }
5883  $node = $node->getNextSibling();
5884  } while ( $node );
5885 
5886  # Write out the remainder (in replace mode only)
5887  if ( $mode === 'replace' ) {
5888  # Output the replacement text
5889  # Add two newlines on -- trailing whitespace in $newText is conventionally
5890  # stripped by the editor, so we need both newlines to restore the paragraph gap
5891  # Only add trailing whitespace if there is newText
5892  if ( $newText != "" ) {
5893  $outText .= $newText . "\n\n";
5894  }
5895 
5896  while ( $node ) {
5897  $outText .= $frame->expand( $node, PPFrame::RECOVER_ORIG );
5898  $node = $node->getNextSibling();
5899  }
5900  }
5901 
5902  if ( is_string( $outText ) ) {
5903  # Re-insert stripped tags
5904  $outText = rtrim( $this->mStripState->unstripBoth( $outText ) );
5905  }
5906 
5907  return $outText;
5908  }
5909 
5924  public function getSection( $text, $sectionId, $defaultText = '' ) {
5925  return $this->extractSections( $text, $sectionId, 'get', $defaultText );
5926  }
5927 
5940  public function replaceSection( $oldText, $sectionId, $newText ) {
5941  return $this->extractSections( $oldText, $sectionId, 'replace', $newText );
5942  }
5943 
5954  public function getRevisionId() {
5955  return $this->mRevisionId;
5956  }
5957 
5964  public function getRevisionObject() {
5965  if ( $this->mRevisionObject ) {
5966  return $this->mRevisionObject;
5967  }
5968 
5969  // NOTE: try to get the RevisionObject even if mRevisionId is null.
5970  // This is useful when parsing a revision that has not yet been saved.
5971  // However, if we get back a saved revision even though we are in
5972  // preview mode, we'll have to ignore it, see below.
5973  // NOTE: This callback may be used to inject an OLD revision that was
5974  // already loaded, so "current" is a bit of a misnomer. We can't just
5975  // skip it if mRevisionId is set.
5976  $rev = call_user_func(
5977  $this->mOptions->getCurrentRevisionCallback(),
5978  $this->getTitle(),
5979  $this
5980  );
5981 
5982  if ( $this->mRevisionId === null && $rev && $rev->getId() ) {
5983  // We are in preview mode (mRevisionId is null), and the current revision callback
5984  // returned an existing revision. Ignore it and return null, it's probably the page's
5985  // current revision, which is not what we want here. Note that we do want to call the
5986  // callback to allow the unsaved revision to be injected here, e.g. for
5987  // self-transclusion previews.
5988  return null;
5989  }
5990 
5991  // If the parse is for a new revision, then the callback should have
5992  // already been set to force the object and should match mRevisionId.
5993  // If not, try to fetch by mRevisionId for sanity.
5994  if ( $this->mRevisionId && $rev && $rev->getId() != $this->mRevisionId ) {
5995  $rev = Revision::newFromId( $this->mRevisionId );
5996  }
5997 
5998  $this->mRevisionObject = $rev;
5999 
6000  return $this->mRevisionObject;
6001  }
6002 
6008  public function getRevisionTimestamp() {
6009  if ( $this->mRevisionTimestamp !== null ) {
6011  }
6012 
6013  # Use specified revision timestamp, falling back to the current timestamp
6014  $revObject = $this->getRevisionObject();
6015  $timestamp = $revObject ? $revObject->getTimestamp() : $this->mOptions->getTimestamp();
6016  $this->mOutput->setRevisionTimestampUsed( $timestamp ); // unadjusted time zone
6017 
6018  # The cryptic '' timezone parameter tells to use the site-default
6019  # timezone offset instead of the user settings.
6020  # Since this value will be saved into the parser cache, served
6021  # to other users, and potentially even used inside links and such,
6022  # it needs to be consistent for all visitors.
6023  $this->mRevisionTimestamp = $this->contLang->userAdjust( $timestamp, '' );
6024 
6026  }
6027 
6033  public function getRevisionUser() {
6034  if ( is_null( $this->mRevisionUser ) ) {
6035  $revObject = $this->getRevisionObject();
6036 
6037  # if this template is subst: the revision id will be blank,
6038  # so just use the current user's name
6039  if ( $revObject ) {
6040  $this->mRevisionUser = $revObject->getUserText();
6041  } elseif ( $this->ot['wiki'] || $this->mOptions->getIsPreview() ) {
6042  $this->mRevisionUser = $this->getUser()->getName();
6043  }
6044  }
6045  return $this->mRevisionUser;
6046  }
6047 
6053  public function getRevisionSize() {
6054  if ( is_null( $this->mRevisionSize ) ) {
6055  $revObject = $this->getRevisionObject();
6056 
6057  # if this variable is subst: the revision id will be blank,
6058  # so just use the parser input size, because the own substituation
6059  # will change the size.
6060  if ( $revObject ) {
6061  $this->mRevisionSize = $revObject->getSize();
6062  } else {
6063  $this->mRevisionSize = $this->mInputSize;
6064  }
6065  }
6066  return $this->mRevisionSize;
6067  }
6068 
6074  public function setDefaultSort( $sort ) {
6075  $this->mDefaultSort = $sort;
6076  $this->mOutput->setProperty( 'defaultsort', $sort );
6077  }
6078 
6089  public function getDefaultSort() {
6090  if ( $this->mDefaultSort !== false ) {
6091  return $this->mDefaultSort;
6092  } else {
6093  return '';
6094  }
6095  }
6096 
6103  public function getCustomDefaultSort() {
6104  return $this->mDefaultSort;
6105  }
6106 
6107  private static function getSectionNameFromStrippedText( $text ) {
6109  $text = Sanitizer::decodeCharReferences( $text );
6110  $text = self::normalizeSectionName( $text );
6111  return $text;
6112  }
6113 
6114  private static function makeAnchor( $sectionName ) {
6115  return '#' . Sanitizer::escapeIdForLink( $sectionName );
6116  }
6117 
6118  private function makeLegacyAnchor( $sectionName ) {
6119  $fragmentMode = $this->svcOptions->get( 'FragmentMode' );
6120  if ( isset( $fragmentMode[1] ) && $fragmentMode[1] === 'legacy' ) {
6121  // ForAttribute() and ForLink() are the same for legacy encoding
6123  } else {
6124  $id = Sanitizer::escapeIdForLink( $sectionName );
6125  }
6126 
6127  return "#$id";
6128  }
6129 
6138  public function guessSectionNameFromWikiText( $text ) {
6139  # Strip out wikitext links(they break the anchor)
6140  $text = $this->stripSectionName( $text );
6141  $sectionName = self::getSectionNameFromStrippedText( $text );
6142  return self::makeAnchor( $sectionName );
6143  }
6144 
6154  public function guessLegacySectionNameFromWikiText( $text ) {
6155  # Strip out wikitext links(they break the anchor)
6156  $text = $this->stripSectionName( $text );
6157  $sectionName = self::getSectionNameFromStrippedText( $text );
6158  return $this->makeLegacyAnchor( $sectionName );
6159  }
6160 
6166  public static function guessSectionNameFromStrippedText( $text ) {
6167  $sectionName = self::getSectionNameFromStrippedText( $text );
6168  return self::makeAnchor( $sectionName );
6169  }
6170 
6177  private static function normalizeSectionName( $text ) {
6178  # T90902: ensure the same normalization is applied for IDs as to links
6179 
6180  $titleParser = MediaWikiServices::getInstance()->getTitleParser();
6181  '@phan-var MediaWikiTitleCodec $titleParser';
6182  try {
6183 
6184  $parts = $titleParser->splitTitleString( "#$text" );
6185  } catch ( MalformedTitleException $ex ) {
6186  return $text;
6187  }
6188  return $parts['fragment'];
6189  }
6190 
6205  public function stripSectionName( $text ) {
6206  # Strip internal link markup
6207  $text = preg_replace( '/\[\[:?([^[|]+)\|([^[]+)\]\]/', '$2', $text );
6208  $text = preg_replace( '/\[\[:?([^[]+)\|?\]\]/', '$1', $text );
6209 
6210  # Strip external link markup
6211  # @todo FIXME: Not tolerant to blank link text
6212  # I.E. [https://www.mediawiki.org] will render as [1] or something depending
6213  # on how many empty links there are on the page - need to figure that out.
6214  $text = preg_replace( '/\[(?i:' . $this->mUrlProtocols . ')([^ ]+?) ([^[]+)\]/', '$2', $text );
6215 
6216  # Parse wikitext quotes (italics & bold)
6217  $text = $this->doQuotes( $text );
6218 
6219  # Strip HTML tags
6220  $text = StringUtils::delimiterReplace( '<', '>', '', $text );
6221  return $text;
6222  }
6223 
6234  public function testSrvus( $text, Title $title, ParserOptions $options,
6235  $outputType = self::OT_HTML
6236  ) {
6237  $magicScopeVariable = $this->lock();
6238  $this->startParse( $title, $options, $outputType, true );
6239 
6240  $text = $this->replaceVariables( $text );
6241  $text = $this->mStripState->unstripBoth( $text );
6242  $text = Sanitizer::removeHTMLtags( $text );
6243  return $text;
6244  }
6245 
6252  public function testPst( $text, Title $title, ParserOptions $options ) {
6253  return $this->preSaveTransform( $text, $title, $options->getUser(), $options );
6254  }
6255 
6262  public function testPreprocess( $text, Title $title, ParserOptions $options ) {
6263  return $this->testSrvus( $text, $title, $options, self::OT_PREPROCESS );
6264  }
6265 
6282  public function markerSkipCallback( $s, $callback ) {
6283  $i = 0;
6284  $out = '';
6285  while ( $i < strlen( $s ) ) {
6286  $markerStart = strpos( $s, self::MARKER_PREFIX, $i );
6287  if ( $markerStart === false ) {
6288  $out .= call_user_func( $callback, substr( $s, $i ) );
6289  break;
6290  } else {
6291  $out .= call_user_func( $callback, substr( $s, $i, $markerStart - $i ) );
6292  $markerEnd = strpos( $s, self::MARKER_SUFFIX, $markerStart );
6293  if ( $markerEnd === false ) {
6294  $out .= substr( $s, $markerStart );
6295  break;
6296  } else {
6297  $markerEnd += strlen( self::MARKER_SUFFIX );
6298  $out .= substr( $s, $markerStart, $markerEnd - $markerStart );
6299  $i = $markerEnd;
6300  }
6301  }
6302  }
6303  return $out;
6304  }
6305 
6312  public function killMarkers( $text ) {
6313  return $this->mStripState->killMarkers( $text );
6314  }
6315 
6333  public function serializeHalfParsedText( $text ) {
6334  wfDeprecated( __METHOD__, '1.31' );
6335  $data = [
6336  'text' => $text,
6337  'version' => self::HALF_PARSED_VERSION,
6338  'stripState' => $this->mStripState->getSubState( $text ),
6339  'linkHolders' => $this->mLinkHolders->getSubArray( $text )
6340  ];
6341  return $data;
6342  }
6343 
6360  public function unserializeHalfParsedText( $data ) {
6361  wfDeprecated( __METHOD__, '1.31' );
6362  if ( !isset( $data['version'] ) || $data['version'] != self::HALF_PARSED_VERSION ) {
6363  throw new MWException( __METHOD__ . ': invalid version' );
6364  }
6365 
6366  # First, extract the strip state.
6367  $texts = [ $data['text'] ];
6368  $texts = $this->mStripState->merge( $data['stripState'], $texts );
6369 
6370  # Now renumber links
6371  $texts = $this->mLinkHolders->mergeForeign( $data['linkHolders'], $texts );
6372 
6373  # Should be good to go.
6374  return $texts[0];
6375  }
6376 
6387  public function isValidHalfParsedText( $data ) {
6388  wfDeprecated( __METHOD__, '1.31' );
6389  return isset( $data['version'] ) && $data['version'] == self::HALF_PARSED_VERSION;
6390  }
6391 
6401  public static function parseWidthParam( $value, $parseHeight = true ) {
6402  $parsedWidthParam = [];
6403  if ( $value === '' ) {
6404  return $parsedWidthParam;
6405  }
6406  $m = [];
6407  # (T15500) In both cases (width/height and width only),
6408  # permit trailing "px" for backward compatibility.
6409  if ( $parseHeight && preg_match( '/^([0-9]*)x([0-9]*)\s*(?:px)?\s*$/', $value, $m ) ) {
6410  $width = intval( $m[1] );
6411  $height = intval( $m[2] );
6412  $parsedWidthParam['width'] = $width;
6413  $parsedWidthParam['height'] = $height;
6414  } elseif ( preg_match( '/^[0-9]*\s*(?:px)?\s*$/', $value ) ) {
6415  $width = intval( $value );
6416  $parsedWidthParam['width'] = $width;
6417  }
6418  return $parsedWidthParam;
6419  }
6420 
6430  protected function lock() {
6431  if ( $this->mInParse ) {
6432  throw new MWException( "Parser state cleared while parsing. "
6433  . "Did you call Parser::parse recursively? Lock is held by: " . $this->mInParse );
6434  }
6435 
6436  // Save the backtrace when locking, so that if some code tries locking again,
6437  // we can print the lock owner's backtrace for easier debugging
6438  $e = new Exception;
6439  $this->mInParse = $e->getTraceAsString();
6440 
6441  $recursiveCheck = new ScopedCallback( function () {
6442  $this->mInParse = false;
6443  } );
6444 
6445  return $recursiveCheck;
6446  }
6447 
6458  public static function stripOuterParagraph( $html ) {
6459  $m = [];
6460  if ( preg_match( '/^<p>(.*)\n?<\/p>\n?$/sU', $html, $m ) && strpos( $m[1], '</p>' ) === false ) {
6461  $html = $m[1];
6462  }
6463 
6464  return $html;
6465  }
6466 
6477  public function getFreshParser() {
6478  if ( $this->mInParse ) {
6479  return $this->factory->create();
6480  } else {
6481  return $this;
6482  }
6483  }
6484 
6491  public function enableOOUI() {
6493  $this->mOutput->setEnableOOUI( true );
6494  }
6495 
6500  protected function setOutputFlag( $flag, $reason ) {
6501  $this->mOutput->setFlag( $flag );
6502  $name = $this->getTitle()->getPrefixedText();
6503  $this->logger->debug( __METHOD__ . ": set $flag flag on '$name'; $reason" );
6504  }
6505 }
getRevisionObject()
Get the revision object for $this->mRevisionId.
Definition: Parser.php:5964
extensionSubstitution( $params, $frame)
Return the text to be used for a given extension tag.
Definition: Parser.php:4059
getFunctionSynonyms()
Definition: Parser.php:5725
static armorFrenchSpaces( $text, $space='&#160;')
Armor French spaces with a replacement character.
Definition: Sanitizer.php:1179
static register( $parser)
$mAutonumber
Definition: Parser.php:195
$mPPNodeCount
Definition: Parser.php:209
replaceInternalLinks2(&$s)
Process [[ ]] wikilinks (RIL)
Definition: Parser.php:2295
if(PHP_SAPI !='cli-server') if(!isset( $_SERVER['SCRIPT_FILENAME'])) $file
Item class for a filearchive table row.
Definition: router.php:42
bool string $mInParse
Recursive call protection.
Definition: Parser.php:263
const MARKER_PREFIX
Definition: Parser.php:139
wfUrlProtocols( $includeProtocolRelative=true)
Returns a regular expression of url protocols.
setLinkID( $id)
Definition: Parser.php:1005
static tocLineEnd()
End a Table Of Contents line.
Definition: Linker.php:1662
wfEscapeWikiText( $text)
Escapes the given text so that it may be output using addWikiText() without any linking, formatting, etc.
cleanSig( $text, $parsing=false)
Clean up signature text.
Definition: Parser.php:4833
static factory( $url, array $options=null, $caller=__METHOD__)
Generate a new request object.
$mTplRedirCache
Definition: Parser.php:211
LinkRenderer $mLinkRenderer
Definition: Parser.php:271
getRevisionUser()
Get the name of the user that edited the last revision.
Definition: Parser.php:6033
doMagicLinks( $text)
Replace special strings like "ISBN xxx" and "RFC xxx" with magic external links.
Definition: Parser.php:1607
parseLinkParameter( $value)
Parse the value of &#39;link&#39; parameter in image syntax ([[File:Foo.jpg|link=<value>]]).
Definition: Parser.php:5596
const OT_PREPROCESS
Definition: Defines.php:166
static element( $element, $attribs=[], $contents='')
Identical to rawElement(), but HTML-escapes $contents (like Xml::element()).
Definition: Html.php:231
doHeadings( $text)
Parse headers and return html.
Definition: Parser.php:1792
static tidy( $text)
Interface with Remex tidy.
Definition: MWTidy.php:42
$mDoubleUnderscores
Definition: Parser.php:211
SpecialPageFactory $specialPageFactory
Definition: Parser.php:283
killMarkers( $text)
Remove any strip markers found in the given text.
Definition: Parser.php:6312
$context
Definition: load.php:45
static getExternalLinkRel( $url=false, $title=null)
Get the rel attribute for a particular external link.
Definition: Parser.php:2092
getSection( $text, $sectionId, $defaultText='')
This function returns the text of a section, specified by a number ($section).
Definition: Parser.php:5924
MapCacheLRU null $currentRevisionCache
Definition: Parser.php:257
BadFileLookup $badFileLookup
Definition: Parser.php:304
static getLocalInstance( $ts=false)
Get a timestamp instance in the server local timezone ($wgLocaltimezone)
bool $mFirstCall
Whether firstCallInit still needs to be called.
Definition: Parser.php:163
const ID_PRIMARY
Tells escapeUrlForHtml() to encode the ID using the wiki&#39;s primary encoding.
Definition: Sanitizer.php:66
getRevisionTimestampSubstring( $start, $len, $mtts, $variable)
Definition: Parser.php:3020
nextLinkID()
Definition: Parser.php:998
getTemplateDom( $title)
Get the semi-parsed DOM representation of a template with a given title, and its redirect destination...
Definition: Parser.php:3669
const SPACE_NOT_NL
Definition: Parser.php:108
static normalizeSubpageLink( $contextTitle, $target, &$text)
Definition: Linker.php:1450
const OT_PLAIN
Definition: Parser.php:119
getTags()
Accessor.
Definition: Parser.php:5712
const OT_WIKI
Definition: Parser.php:116
User $mUser
Definition: Parser.php:220
initialiseVariables()
initialise the magic variables (like CURRENTMONTHNAME) and substitution modifiers ...
Definition: Parser.php:3050
static cleanUrl( $url)
Definition: Sanitizer.php:2079
static splitTrail( $trail)
Split a link trail, return the "inside" portion and the remainder of the trail as a two-element array...
Definition: Linker.php:1770
static isEnabled()
Definition: MWTidy.php:54
getFunctionHooks()
Get all registered function hook identifiers.
Definition: Parser.php:5093
getExternalLinkAttribs( $url)
Get an associative array of additional HTML attributes appropriate for a particular external link...
Definition: Parser.php:2113
callParserFunction( $frame, $function, array $args=[])
Call a parser function and return an array with text and flags.
Definition: Parser.php:3577
magicLinkCallback( $m)
Definition: Parser.php:1638
wfHostname()
Fetch server name for use in error reporting etc.
braceSubstitution( $piece, $frame)
Return the text of a template, after recursively replacing any variables or templates within the temp...
Definition: Parser.php:3226
getFunctionLang()
Get a language object for use in parser functions such as {{FORMATNUM:}}.
Definition: Parser.php:1013
startParse(?Title $title, ParserOptions $options, $outputType, $clearState=true)
Definition: Parser.php:4900
preprocessToDom( $text, $flags=0)
Preprocess some wikitext and return the document tree.
Definition: Parser.php:3080
Title(Title $x=null)
Accessor/mutator for the Title object.
Definition: Parser.php:937
const TOC_START
Definition: Parser.php:142
wfGetDB( $db, $groups=[], $wiki=false)
Get a Database object.
SectionProfiler $mProfiler
Definition: Parser.php:266
static replaceMarkup( $search, $replace, $text)
More or less "markup-safe" str_replace() Ignores any instances of the separator inside <...
$sort
There are three types of nodes:
Definition: PPNode.php:35
$mHeadings
Definition: Parser.php:211
clearTagHooks()
Remove all tag hooks.
Definition: Parser.php:5008
const NS_SPECIAL
Definition: Defines.php:49
clearState()
Clear Parser state.
Definition: Parser.php:478
const EXT_LINK_ADDR
Definition: Parser.php:101
replaceExternalLinks( $text)
Replace external links (REL)
Definition: Parser.php:2013
static factory( $mode=false, IContextSource $context=null)
Get a new image gallery.
stripSectionName( $text)
Strips a text string of wikitext for use in a section anchor.
Definition: Parser.php:6205
wfUrlProtocolsWithoutProtRel()
Like wfUrlProtocols(), but excludes &#39;//&#39; from the protocol list.
static statelessFetchRevision(Title $title, $parser=false)
Wrapper around Revision::newFromTitle to allow passing additional parameters without passing them on ...
Definition: Parser.php:3747
armorLinks( $text)
Insert a NOPARSE hacky thing into any inline links in a chunk that&#39;s going to go through further pars...
Definition: Parser.php:2625
static activeUsers()
Definition: SiteStats.php:130
$mLinkID
Definition: Parser.php:208
setHook( $tag, callable $callback)
Create an HTML-style tag, e.g.
Definition: Parser.php:4963
static createAssocArgs( $args)
Clean up argument array - refactored in 1.9 so parserfunctions can use it, too.
Definition: Parser.php:3156
LinkRendererFactory $linkRendererFactory
Definition: Parser.php:295
static makeMediaLinkFile(LinkTarget $title, $file, $html='')
Create a direct link to a given uploaded file.
Definition: Linker.php:781
$mGeneratedPPNodeCount
Definition: Parser.php:209
$mRevisionId
Definition: Parser.php:237
static cleanSigInSig( $text)
Strip 3, 4 or 5 tildes out of signatures.
Definition: Parser.php:4870
getLinkRenderer()
Get a LinkRenderer instance to make links with.
Definition: Parser.php:1078
const NS_TEMPLATE
Definition: Defines.php:70
static newFromTitle(LinkTarget $linkTarget, $id=0, $flags=0)
Load either the current, or a specified, revision that&#39;s attached to a given link target...
Definition: Revision.php:138
const NO_ARGS
Definition: PPFrame.php:29
fetchFileNoRegister( $title, $options=[])
Helper function for fetchFileAndTitle.
Definition: Parser.php:3920
MagicWordArray $mVariables
Definition: Parser.php:170
const SFH_NO_HASH
Definition: Parser.php:89
static setupOOUI( $skinName='default', $dir='ltr')
Helper function to setup the PHP implementation of OOUI to use in this request.
setTransparentTagHook( $tag, callable $callback)
As setHook(), but letting the contents be parsed.
Definition: Parser.php:4994
$mForceTocPosition
Definition: Parser.php:213
getRevisionId()
Get the ID of the revision we are parsing.
Definition: Parser.php:5954
const OT_PREPROCESS
Definition: Parser.php:117
limitationWarn( $limitationType, $current='', $max='')
Warn the user when a parser limitation is reached Will warn at most once the user per limitation type...
Definition: Parser.php:3204
guessLegacySectionNameFromWikiText( $text)
Same as guessSectionNameFromWikiText(), but produces legacy anchors instead, if possible.
Definition: Parser.php:6154
$mFunctionSynonyms
Definition: Parser.php:152
getPreSaveTransform()
Transform wiki markup when saving the page?
$mOutputType
Definition: Parser.php:234
interwikiTransclude( $title, $action)
Transclude an interwiki link.
Definition: Parser.php:3939
$mDefaultStripList
Definition: Parser.php:155
$mExtLinkBracketedRegex
Definition: Parser.php:184
makeKnownLinkHolder( $nt, $text='', $trail='', $prefix='')
Render a forced-blue link inline; protect against double expansion of URLs if we&#39;re in a mode that pr...
Definition: Parser.php:2601
if( $line===false) $args
Definition: cdb.php:64
static stripOuterParagraph( $html)
Strip outer.
Definition: Parser.php:6458
A class for passing options to services.
const ID_FALLBACK
Tells escapeUrlForHtml() to encode the ID using the fallback encoding, or return false if no fallback...
Definition: Sanitizer.php:74
The User object encapsulates all of the user-specific settings (user_id, name, rights, email address, options, last login time).
Definition: User.php:51
static getInstance( $ts=false)
Get a timestamp instance in GMT.
Definition: MWTimestamp.php:39
static numberingroup( $group)
Find the number of users in a given user group.
Definition: SiteStats.php:150
stripAltText( $caption, $holders)
Definition: Parser.php:5626
setDefaultSort( $sort)
Mutator for $mDefaultSort.
Definition: Parser.php:6074
$wgNoFollowNsExceptions
Namespaces in which $wgNoFollowLinks doesn&#39;t apply.
static makeExternalImage( $url, $alt='')
Return the code for images which were added via external links, via Parser::maybeMakeExternalImage()...
Definition: Linker.php:247
Custom PHP profiler for parser/DB type section names that xhprof/xdebug can&#39;t handle.
static edits()
Definition: SiteStats.php:94
wfTimestamp( $outputtype=TS_UNIX, $ts=0)
Get a timestamp string in one of various formats.
fetchFileAndTitle( $title, $options=[])
Fetch a file and its title and register a reference to it.
Definition: Parser.php:3895
static validateTagAttributes( $attribs, $element)
Take an array of attribute names and values and normalize or discard illegal values for the given ele...
Definition: Sanitizer.php:791
const NO_TEMPLATES
Definition: PPFrame.php:30
array $mTplDomCache
Definition: Parser.php:215
static parseWidthParam( $value, $parseHeight=true)
Parsed a width param of imagelink like 300px or 200x300px.
Definition: Parser.php:6401
$mVarCache
Definition: Parser.php:156
disableCache()
Set a flag in the output object indicating that the content is dynamic and shouldn&#39;t be cached...
Definition: Parser.php:5684
$mRevisionObject
Definition: Parser.php:236
Title $mTitle
Definition: Parser.php:233
static stripAllTags( $html)
Take a fragment of (potentially invalid) HTML and return a version with any tags removed, encoded as plain text.
Definition: Sanitizer.php:2041
getPreloadText( $text, Title $title, ParserOptions $options, $params=[])
Process the wikitext for the "?preload=" feature.
Definition: Parser.php:879
preprocess( $text, ?Title $title, ParserOptions $options, $revid=null, $frame=false)
Expand templates and variables in the text, producing valid, static wikitext.
Definition: Parser.php:834
makeImage( $title, $options, $holders=false)
Parse image options text and use it to make an image.
Definition: Parser.php:5376
__destruct()
Reduce memory usage to reduce the impact of circular references.
Definition: Parser.php:410
wfMatchesDomainList( $url, $domains)
Check whether a given URL has a domain that occurs in a given set of domains.
getRevisionTimestamp()
Get the timestamp associated with the current revision, adjusted for the default server-local timesta...
Definition: Parser.php:6008
wfUrlencode( $s)
We want some things to be included as literal characters in our title URLs for prettiness, which urlencode encodes by default.
getImageParams( $handler)
Definition: Parser.php:5322
fetchCurrentRevisionOfTitle( $title)
Fetch the current revision of a given title.
Definition: Parser.php:3712
Factory for handling the special page list and generating SpecialPage objects.
static extractTagsAndParams( $elements, $text, &$matches)
Replaces all occurrences of HTML-style comments and the given tags in the text with a random marker a...
Definition: Parser.php:1129
$mRevIdForTs
Definition: Parser.php:241
setUser( $user)
Set the current user.
Definition: Parser.php:900
$mStripList
Definition: Parser.php:154
$mFunctionTagHooks
Definition: Parser.php:153
const OT_PLAIN
Definition: Defines.php:168
$wgNoFollowLinks
If true, external URL links in wiki text will be given the rel="nofollow" attribute as a hint to sear...
static makeSelfLinkObj( $nt, $html='', $query='', $trail='', $prefix='')
Make appropriate markup for a link to the current article.
Definition: Linker.php:163
$mRevisionTimestamp
Definition: Parser.php:238
$mImageParams
Definition: Parser.php:157
makeLimitReport()
Set the limit report data in the current ParserOutput, and return the limit report HTML comment...
Definition: Parser.php:655
static escapeIdForAttribute( $id, $mode=self::ID_PRIMARY)
Given a section name or other user-generated or otherwise unsafe string, escapes it to be a valid HTM...
Definition: Sanitizer.php:1295
static newKnownCurrent(IDatabase $db, $pageIdOrTitle, $revId=0)
Load a revision based on a known page ID and current revision ID from the DB.
Definition: Revision.php:1123
const VERSION
Update this version number when the ParserOutput format changes in an incompatible way...
Definition: Parser.php:80
const OT_WIKI
Definition: Defines.php:165
Preprocessor $mPreprocessor
Definition: Parser.php:188
getPreprocessor()
Get a preprocessor object.
Definition: Parser.php:1064
const NS_MEDIA
Definition: Defines.php:48
replaceSection( $oldText, $sectionId, $newText)
This function returns $oldtext after the content of the section specified by $section has been replac...
Definition: Parser.php:5940
static getVersion( $flags='', $lang=null)
Return a string of the MediaWiki version with Git revision if available.
static singleton()
Definition: RepoGroup.php:60
static normalizeSectionName( $text)
Apply the same normalization as code making links to this section would.
Definition: Parser.php:6177
replaceTransparentTags( $text)
Replace transparent tags in $text with the values given by the callbacks.
Definition: Parser.php:5748
argSubstitution( $piece, $frame)
Triple brace replacement – used for template arguments.
Definition: Parser.php:4007
const RECOVER_ORIG
Definition: PPFrame.php:36
static normalizeUrlComponent( $component, $unsafe)
Definition: Parser.php:2201
static isValid( $ip)
Validate an IP address.
Definition: IP.php:111
StripState $mStripState
Definition: Parser.php:200
replaceVariables( $text, $frame=false, $argsOnly=false)
Replace magic variables, templates, and template arguments with the appropriate text.
Definition: Parser.php:3125
$mDefaultSort
Definition: Parser.php:210
getUser()
Get a User object either from $this->mUser, if set, or from the ParserOptions object otherwise...
Definition: Parser.php:1052
setOutputFlag( $flag, $reason)
Definition: Parser.php:6500
static explode( $separator, $subject)
Workalike for explode() with limited memory usage.
getStripList()
Get a list of strippable XML-like elements.
Definition: Parser.php:1192
const EXT_IMAGE_REGEX
Definition: Parser.php:104
$cache
Definition: mcc.php:33
static tocLine( $anchor, $tocline, $tocnumber, $level, $sectionIndex=false)
parameter level defines if we are on an indentation level
Definition: Linker.php:1638
replaceInternalLinks( $s)
Process [[ ]] wikilinks.
Definition: Parser.php:2283
const NS_CATEGORY
Definition: Defines.php:74
setFunctionHook( $id, callable $callback, $flags=0)
Create a function, e.g.
Definition: Parser.php:5057
doQuotes( $text)
Helper function for doAllQuotes()
Definition: Parser.php:1827
static makeExternalLink( $url, $text, $escape=true, $linktype='', $attribs=[], $title=null)
Make an external link.
Definition: Linker.php:848
extractSections( $text, $sectionId, $mode, $newText='')
Break wikitext input into sections, and either pull or replace some particular section&#39;s text...
Definition: Parser.php:5799
setOutputType( $ot)
Set the output type.
Definition: Parser.php:946
getRevisionSize()
Get the size of the revision.
Definition: Parser.php:6053
$mImageParamsMagicArray
Definition: Parser.php:158
LinkHolderArray $mLinkHolders
Definition: Parser.php:206
$wgNoFollowDomainExceptions
If this is set to an array of domains, external links to these domain names (or any subdomains) will ...
getMagicWordFactory()
Get the MagicWordFactory that this Parser is using.
Definition: Parser.php:1096
static splitWhitespace( $s)
Return a three-element array: leading whitespace, string contents, trailing whitespace.
Definition: Parser.php:3092
setTitle(Title $t=null)
Set the context title.
Definition: Parser.php:909
getTargetLanguage()
Get the target language for the content being parsed.
Definition: Parser.php:1025
static decodeTagAttributes( $text)
Return an associative array of attribute names and values from a partial tag string.
Definition: Sanitizer.php:1450
internalParseHalfParsed( $text, $isMain=true, $linestart=true)
Helper function for parse() that transforms half-parsed HTML into fully parsed HTML.
Definition: Parser.php:1510
getConverterLanguage()
Get the language object for language conversion.
Definition: Parser.php:1042
$mInputSize
Definition: Parser.php:242
formatHeadings( $text, $origText, $isMain=true)
This function accomplishes several tasks: 1) Auto-number headings if that option is enabled 2) Add an...
Definition: Parser.php:4261
getUserSig(&$user, $nickname=false, $fancySig=null)
Fetch the user&#39;s signature text, if any, and normalize to validated, ready-to-insert wikitext...
Definition: Parser.php:4772
const HALF_PARSED_VERSION
Update this version number when the output of serialiseHalfParsedText() changes in an incompatible wa...
Definition: Parser.php:86
const NS_FILE
Definition: Defines.php:66
firstCallInit()
Do various kinds of initialisation on the first call of the parser.
Definition: Parser.php:458
static makeAnchor( $sectionName)
Definition: Parser.php:6114
static getTitleFor( $name, $subpage=false, $fragment='')
Get a localised Title object for a specified special page name If you don&#39;t need a full Title object...
Definition: SpecialPage.php:83
static normalizeCharReferences( $text)
Ensure that any entities and character references are legal for XML and XHTML specifically.
Definition: Sanitizer.php:1569
const PTD_FOR_INCLUSION
Definition: Parser.php:111
isValidHalfParsedText( $data)
Returns true if the given array, presumed to be generated by serializeHalfParsedText(), is compatible with the current version of the parser.
Definition: Parser.php:6387
doDoubleUnderscore( $text)
Strip double-underscore items like NOGALLERY and NOTOC Fills $this->mDoubleUnderscores, returns the modified text.
Definition: Parser.php:4188
renderImageGallery( $text, $params)
Renders an image gallery from a text with one line per image.
Definition: Parser.php:5158
$mTagHooks
Definition: Parser.php:149
NamespaceInfo $nsInfo
Definition: Parser.php:298
fetchTemplateAndTitle( $title)
Fetch the unparsed text of a template and register a reference to it.
Definition: Parser.php:3758
const NS_MEDIAWIKI
Definition: Defines.php:68
MagicWordFactory $magicWordFactory
Definition: Parser.php:274
static doBlockLevels( $text, $lineStart)
Make lists from lines starting with &#39;:&#39;, &#39;*&#39;, &#39;#&#39;, etc.
enableOOUI()
Set&#39;s up the PHP implementation of OOUI for use in this request and instructs OutputPage to enable OO...
Definition: Parser.php:6491
testSrvus( $text, Title $title, ParserOptions $options, $outputType=self::OT_HTML)
strip/replaceVariables/unstrip for preprocessor regression testing
Definition: Parser.php:6234
areSubpagesAllowed()
Return true if subpage links should be expanded on this page.
Definition: Parser.php:2634
const OT_HTML
Definition: Defines.php:164
addTrackingCategory( $msg)
Definition: Parser.php:4241
assertRequiredOptions(array $expectedKeys)
Assert that the list of options provided in this instance exactly match $expectedKeys, without regard for order.
static images()
Definition: SiteStats.php:139
$mTransparentTagHooks
Definition: Parser.php:150
$mExpensiveFunctionCount
Definition: Parser.php:212
$mUrlProtocols
Definition: Parser.php:184
static isWellFormedXmlFragment( $text)
Check if a string is a well-formed XML fragment.
Definition: Xml.php:730
static makeTitle( $ns, $title, $fragment='', $interwiki='')
Create a new Title from a namespace index and a DB key.
Definition: Title.php:586
makeFreeExternalLink( $url, $numPostProto)
Make a free external link, given a user-supplied URL.
Definition: Parser.php:1717
markerSkipCallback( $s, $callback)
Call a callback function on all regions of the given text that are not inside strip markers...
Definition: Parser.php:6282
__clone()
Allow extensions to clean up when the parser is cloned.
Definition: Parser.php:424
Options( $x=null)
Accessor/mutator for the ParserOptions object.
Definition: Parser.php:991
maybeMakeExternalImage( $url)
make an image if it&#39;s allowed, either through the global option, through the exception, or through the on-wiki whitelist
Definition: Parser.php:2224
getUser()
Current user.
wfSetVar(&$dest, $source, $force=false)
Sets dest to source and returns the original value of dest If source is NULL, it just returns the val...
array $mLangLinkLanguages
Array with the language name of each language link (i.e.
Definition: Parser.php:249
static makeImageLink(Parser $parser, LinkTarget $title, $file, $frameParams=[], $handlerParams=[], $time=false, $query="", $widthOption=null)
Given parameters derived from [[Image:Foo|options...]], generate the HTML that that syntax inserts in...
Definition: Linker.php:303
const OT_MSG
Definition: Parser.php:118
replaceLinkHoldersText( $text)
Replace "<!--LINK-->" link placeholders with plain text of links (not HTML-formatted).
Definition: Parser.php:5141
$mFunctionHooks
Definition: Parser.php:151
$lines
Definition: router.php:61
static makeHeadline( $level, $attribs, $anchor, $html, $link, $fallbackAnchor=false)
Create a headline for content.
Definition: Linker.php:1749
MagicWordArray $mSubstWords
Definition: Parser.php:175
incrementIncludeSize( $type, $size)
Increment an include size counter.
Definition: Parser.php:4161
unserializeHalfParsedText( $data)
Load the parser state given in the $data array, which is assumed to have been generated by serializeH...
Definition: Parser.php:6360
static delimiterExplode( $startDelim, $endDelim, $separator, $subject, $nested=false)
Explode a string, but ignore any instances of the separator inside the given start and end delimiters...
Definition: StringUtils.php:59
const TOC_END
Definition: Parser.php:143
pstPass2( $text, $user)
Pre-save transform helper function.
Definition: Parser.php:4696
ServiceOptions $svcOptions
This is called $svcOptions instead of $options like elsewhere to avoid confusion with $mOptions...
Definition: Parser.php:292
replaceLinkHolders(&$text, $options=0)
Replace "<!--LINK-->" link placeholders with actual links, in the buffer Placeholders created in Link...
Definition: Parser.php:5130
resetOutput()
Reset the ParserOutput.
Definition: Parser.php:526
static escapeIdForLink( $id)
Given a section name or other user-generated or otherwise unsafe string, escapes it to be a valid URL...
Definition: Sanitizer.php:1322
Variant of the Message class.
Definition: RawMessage.php:34
getFreshParser()
Return this parser if it is not doing anything, otherwise get a fresh parser.
Definition: Parser.php:6477
wfDebugLog( $logGroup, $text, $dest='all', array $context=[])
Send a line to a supplementary debug log file, if configured, or main debug log if not...
getContentLanguage()
Get the content language that this Parser is using.
Definition: Parser.php:1106
static articles()
Definition: SiteStats.php:103
$mRevisionUser
Definition: Parser.php:239
lock()
Lock the current instance of the parser.
Definition: Parser.php:6430
testPst( $text, Title $title, ParserOptions $options)
Definition: Parser.php:6252
static pages()
Definition: SiteStats.php:112
wfDeprecated( $function, $version=false, $component=false, $callerOffset=2)
Throws a warning that $function is deprecated.
$line
Definition: cdb.php:59
static tocList( $toc, Language $lang=null)
Wraps the TOC in a table and provides the hide/collapse javascript.
Definition: Linker.php:1674
static decodeCharReferences( $text)
Decode any character references, numeric or named entities, in the text and return a UTF-8 string...
Definition: Sanitizer.php:1669
getStripState()
Get the StripState.
Definition: Parser.php:1201
const SFH_OBJECT_ARGS
Definition: Parser.php:90
$mIncludeCount
Definition: Parser.php:202
$mMarkerIndex
Definition: Parser.php:159
transformMsg( $text, $options, $title=null)
Wrapper for preprocess()
Definition: Parser.php:4919
getTitle()
Accessor for the Title object.
Definition: Parser.php:927
ParserOutput $mOutput
Definition: Parser.php:194
recursiveTagParse( $text, $frame=false)
Half-parse wikitext to half-parsed HTML.
Definition: Parser.php:789
getOutput()
Get the ParserOutput object.
Definition: Parser.php:972
recursivePreprocess( $text, $frame=false)
Recursive parser entry point that can be called from an extension tag hook.
Definition: Parser.php:860
fetchTemplate( $title)
Fetch the unparsed text of a template and register a reference to it.
Definition: Parser.php:3786
doTableStuff( $text)
parse the wiki syntax used to render tables
Definition: Parser.php:1228
static tocIndent()
Add another level to the Table of Contents.
Definition: Linker.php:1612
static register( $parser)
static legalChars()
Get a regex character class describing the legal characters in a link.
Definition: Title.php:694
static fixTagAttributes( $text, $element, $sorted=false)
Take a tag soup fragment listing an HTML element&#39;s attributes and normalize it to well-formed XML...
Definition: Sanitizer.php:1136
isCurrentRevisionOfTitleCached( $title)
Definition: Parser.php:3731
doAllQuotes( $text)
Replace single quotes with HTML markup.
Definition: Parser.php:1810
makeLegacyAnchor( $sectionName)
Definition: Parser.php:6118
recursiveTagParseFully( $text, $frame=false)
Fully parse wikitext to fully parsed HTML.
Definition: Parser.php:817
incrementExpensiveFunctionCount()
Increment the expensive function count.
Definition: Parser.php:4175
getDisableTitleConversion()
Whether title conversion should be disabled.
$mShowToc
Definition: Parser.php:213
const DB_REPLICA
Definition: defines.php:25
$content
Definition: router.php:78
static normalizeLinkUrl( $url)
Replace unusual escape codes in a URL with their equivalent characters.
Definition: Parser.php:2143
const EXT_LINK_URL_CLASS
Definition: Parser.php:98
Language $contLang
Definition: Parser.php:277
if(! $wgRequest->checkUrlExtension()) if(isset( $_SERVER['PATH_INFO']) && $_SERVER['PATH_INFO'] !='') $wgTitle
Definition: api.php:58
static removeHTMLtags( $text, $processCallback=null, $args=[], $extratags=[], $removetags=[], $warnCallback=null)
Cleans up HTML, removes dangerous tags and attributes, and removes HTML comments. ...
Definition: Sanitizer.php:497
static getSectionNameFromStrippedText( $text)
Definition: Parser.php:6107
wfMessage( $key,... $params)
This is the function for getting translated interface messages.
ParserOptions $mOptions
Definition: Parser.php:228
testPreprocess( $text, Title $title, ParserOptions $options)
Definition: Parser.php:6262
static newFromName( $name, $validate='valid')
Static factory method for creation from username.
Definition: User.php:515
getVariableValue( $index, $frame=false)
Return value of a magic variable (like PAGENAME)
Definition: Parser.php:2673
const STRIP_COMMENTS
Definition: PPFrame.php:31
static cascadingsources( $parser, $title='')
Returns the sources of any cascading protection acting on a specified page.
static newFromId( $id, $flags=0)
Load a page revision from a given revision ID number.
Definition: Revision.php:119
static normalizeLineEndings( $text)
Do a "\\r\\n" -> "\\n" and "\\r" -> "\\n" transformation as well as trim trailing whitespace...
$mHighestExpansionDepth
Definition: Parser.php:209
static guessSectionNameFromStrippedText( $text)
Like guessSectionNameFromWikiText(), but takes already-stripped text as input.
Definition: Parser.php:6166
attributeStripCallback(&$text, $frame=false)
Callback from the Sanitizer for expanding items found in HTML attribute values, so they can be safely...
Definition: Parser.php:5701
serializeHalfParsedText( $text)
Save the parser state required to convert the given half-parsed text to HTML.
Definition: Parser.php:6333
ParserFactory $factory
Definition: Parser.php:280
__construct( $svcOptions=null, MagicWordFactory $magicWordFactory=null, Language $contLang=null, ParserFactory $factory=null, $urlProtocols=null, SpecialPageFactory $spFactory=null, $linkRendererFactory=null, $nsInfo=null, $logger=null, BadFileLookup $badFileLookup=null)
Constructing parsers directly is deprecated! Use a ParserFactory.
Definition: Parser.php:346
getCustomDefaultSort()
Accessor for $mDefaultSort Unlike getDefaultSort(), will return false if none is set.
Definition: Parser.php:6103
static getDefaultPreprocessorClass()
Which class should we use for the preprocessor if not otherwise specified?
Definition: Parser.php:451
getUrlProtocols()
Definition: Parser.php:5734
insertStripItem( $text)
Add an item to the strip state Returns the unique tag which must be inserted into the stripped text T...
Definition: Parser.php:1214
static tocUnindent( $level)
Finish one or more sublevels on the Table of Contents.
Definition: Linker.php:1623
$mRevisionSize
Definition: Parser.php:240
static users()
Definition: SiteStats.php:121
validateSig( $text)
Check that the user&#39;s signature contains no bad XML.
Definition: Parser.php:4819
const SFH_OBJECT_ARGS
Definition: Defines.php:178
startExternalParse(?Title $title, ParserOptions $options, $outputType, $clearState=true, $revId=null)
Set up some variables which are usually set up in parse() so that an external function can call some ...
Definition: Parser.php:4885
static normalizeSectionNameWhitespace( $section)
Normalizes whitespace in a section name, such as might be returned by Parser::stripSectionName(), for use in the id&#39;s that are used for section links.
Definition: Sanitizer.php:1550
doBlockLevels( $text, $linestart)
Make lists from lines starting with &#39;:&#39;, &#39;*&#39;, &#39;#&#39;, etc.
Definition: Parser.php:2659
LoggerInterface $logger
Definition: Parser.php:301
array $mConf
Definition: Parser.php:181
const OT_HTML
Definition: Parser.php:115
$mIncludeSizes
Definition: Parser.php:209
internalParse( $text, $isMain=true, $frame=false)
Helper function for parse() that transforms wiki markup into half-parsed HTML.
Definition: Parser.php:1438
preSaveTransform( $text, Title $title, User $user, ParserOptions $options, $clearState=true)
Transform wiki markup when saving a page by doing "\\r\\n" -> "\\n" conversion, substituting signatur...
Definition: Parser.php:4661
static run( $event, array $args=[], $deprecatedVersion=null)
Call hook functions defined in Hooks::register and $wgHooks.
Definition: Hooks.php:200
guessSectionNameFromWikiText( $text)
Try to guess the section anchor name based on a wikitext fragment presumably extracted from a heading...
Definition: Parser.php:6138
getOptions()
Get the ParserOptions object.
Definition: Parser.php:981
getDefaultSort()
Accessor for $mDefaultSort Will use the empty string if none is set.
Definition: Parser.php:6089
static statelessFetchTemplate( $title, $parser=false)
Static function to get a template Can be overridden via ParserOptions::setTemplateCallback().
Definition: Parser.php:3799
OutputType( $x=null)
Accessor/mutator for the output type.
Definition: Parser.php:963
setFunctionTagHook( $tag, callable $callback, $flags)
Create a tag function, e.g.
Definition: Parser.php:5108
static delimiterReplace( $startDelim, $endDelim, $replace, $subject, $flags='')
Perform an operation equivalent to preg_replace() with flags.
const SFH_NO_HASH
Definition: Defines.php:177
parse( $text, Title $title, ParserOptions $options, $linestart=true, $clearState=true, $revid=null)
Convert wikitext to HTML Do not call this function recursively.
Definition: Parser.php:548
$matches
maybeDoSubpageLink( $target, &$text)
Handle link to subpage if necessary.
Definition: Parser.php:2647
static newFromText( $text, $defaultNamespace=NS_MAIN)
Create a new Title from text, such as what one would find in a link.
Definition: Title.php:319