MediaWiki  master
Parser.php
Go to the documentation of this file.
1 <?php
32 
73 class Parser {
79  const VERSION = '1.6.4';
80 
86 
87  # Flags for Parser::setFunctionHook
88  const SFH_NO_HASH = 1;
89  const SFH_OBJECT_ARGS = 2;
90 
91  # Constants needed for external link processing
92  # Everything except bracket, space, or control characters
93  # \p{Zs} is unicode 'separator, space' category. It covers the space 0x20
94  # as well as U+3000 is IDEOGRAPHIC SPACE for T21052
95  # \x{FFFD} is the Unicode replacement character, which Preprocessor_DOM
96  # uses to replace invalid HTML characters.
97  const EXT_LINK_URL_CLASS = '[^][<>"\\x00-\\x20\\x7F\p{Zs}\x{FFFD}]';
98  # Simplified expression to match an IPv4 or IPv6 address, or
99  # at least one character of a host name (embeds EXT_LINK_URL_CLASS)
100  const EXT_LINK_ADDR = '(?:[0-9.]+|\\[(?i:[0-9a-f:.]+)\\]|[^][<>"\\x00-\\x20\\x7F\p{Zs}\x{FFFD}])';
101  # RegExp to make image URLs (embeds IPv6 part of EXT_LINK_ADDR)
102  // phpcs:ignore Generic.Files.LineLength
103  const EXT_IMAGE_REGEX = '/^(http:\/\/|https:\/\/)((?:\\[(?i:[0-9a-f:.]+)\\])?[^][<>"\\x00-\\x20\\x7F\p{Zs}\x{FFFD}]+)
104  \\/([A-Za-z0-9_.,~%\\-+&;#*?!=()@\\x80-\\xFF]+)\\.((?i)gif|png|jpg|jpeg)$/Sxu';
105 
106  # Regular expression for a non-newline space
107  const SPACE_NOT_NL = '(?:\t|&nbsp;|&\#0*160;|&\#[Xx]0*[Aa]0;|\p{Zs})';
108 
109  # Flags for preprocessToDom
110  const PTD_FOR_INCLUSION = 1;
111 
112  # Allowed values for $this->mOutputType
113  # Parameter to startExternalParse().
114  const OT_HTML = 1; # like parse()
115  const OT_WIKI = 2; # like preSaveTransform()
117  const OT_MSG = 3;
118  const OT_PLAIN = 4; # like extractSections() - portions of the original are returned unchanged.
119 
137  const MARKER_SUFFIX = "-QINU`\"'\x7f";
138  const MARKER_PREFIX = "\x7f'\"`UNIQ-";
139 
140  # Markers used for wrapping the table of contents
141  const TOC_START = '<mw:toc>';
142  const TOC_END = '</mw:toc>';
143 
145  const MAX_TTS = 900;
146 
147  # Persistent:
148  public $mTagHooks = [];
150  public $mFunctionHooks = [];
151  public $mFunctionSynonyms = [ 0 => [], 1 => [] ];
152  public $mFunctionTagHooks = [];
153  public $mStripList = [];
154  public $mDefaultStripList = [];
155  public $mVarCache = [];
156  public $mImageParams = [];
158  public $mMarkerIndex = 0;
162  public $mFirstCall = true;
163 
164  # Initialised by initialiseVariables()
165 
169  public $mVariables;
170 
174  public $mSubstWords;
175 
180  public $mConf;
181 
182  # Initialised in constructor
184 
185  # Initialized in getPreprocessor()
186 
188 
189  # Cleared with clearState():
190 
193  public $mOutput;
194  public $mAutonumber;
195 
199  public $mStripState;
200 
206 
207  public $mLinkID;
211  public $mExpensiveFunctionCount; # number of expensive parser function calls
215 
219  public $mUser; # User object; only used when doing pre-save transform
220 
221  # Temporary
222  # These are variables reset at least once per parse regardless of $clearState
223 
227  public $mOptions;
228 
232  public $mTitle; # Title context, used for self-link rendering and similar things
233  public $mOutputType; # Output type, one of the OT_xxx constants
234  public $ot; # Shortcut alias, see setOutputType()
235  public $mRevisionObject; # The revision object of the specified revision ID
236  public $mRevisionId; # ID to display in {{REVISIONID}} tags
237  public $mRevisionTimestamp; # The timestamp of the specified revision ID
238  public $mRevisionUser; # User to display in {{REVISIONUSER}} tag
239  public $mRevisionSize; # Size to display in {{REVISIONSIZE}} variable
240  public $mRevIdForTs; # The revision ID which was used to fetch the timestamp
241  public $mInputSize = false; # For {{PAGESIZE}} on current page.
242 
249 
257 
262  public $mInParse = false;
263 
265  protected $mProfiler;
266 
270  protected $mLinkRenderer;
271 
274 
276  private $contLang;
277 
279  private $factory;
280 
283 
291  private $svcOptions;
292 
295 
297  private $nsInfo;
298 
300  private $logger;
301 
308  public static $constructorOptions = [
309  // See $wgParserConf documentation
310  'class',
311  'preprocessorClass',
312  // See documentation for the corresponding config options
313  'ArticlePath',
314  'EnableScaryTranscluding',
315  'ExtraInterlanguageLinkPrefixes',
316  'FragmentMode',
317  'LanguageCode',
318  'MaxSigChars',
319  'MaxTocLevel',
320  'MiserMode',
321  'ScriptPath',
322  'Server',
323  'ServerName',
324  'ShowHostnames',
325  'Sitename',
326  'StylePath',
327  'TranscludeCacheExpiry',
328  ];
329 
343  public function __construct(
344  $svcOptions = null,
348  $urlProtocols = null,
349  SpecialPageFactory $spFactory = null,
351  $nsInfo = null,
352  $logger = null
353  ) {
354  $services = MediaWikiServices::getInstance();
355  if ( !$svcOptions || is_array( $svcOptions ) ) {
356  // Pre-1.34 calling convention is the first parameter is just ParserConf, the seventh is
357  // Config, and the eighth is LinkRendererFactory.
358  $this->mConf = (array)$svcOptions;
359  if ( empty( $this->mConf['class'] ) ) {
360  $this->mConf['class'] = self::class;
361  }
362  if ( empty( $this->mConf['preprocessorClass'] ) ) {
363  $this->mConf['preprocessorClass'] = self::getDefaultPreprocessorClass();
364  }
365  $this->svcOptions = new ServiceOptions( self::$constructorOptions,
366  $this->mConf,
367  func_num_args() > 6 ? func_get_arg( 6 ) : $services->getMainConfig()
368  );
369  $linkRendererFactory = func_num_args() > 7 ? func_get_arg( 7 ) : null;
370  $nsInfo = func_num_args() > 8 ? func_get_arg( 8 ) : null;
371  } else {
372  // New calling convention
373  $svcOptions->assertRequiredOptions( self::$constructorOptions );
374  // $this->mConf is public, so we'll keep those two options there as well for
375  // compatibility until it's removed
376  $this->mConf = [
377  'class' => $svcOptions->get( 'class' ),
378  'preprocessorClass' => $svcOptions->get( 'preprocessorClass' ),
379  ];
380  $this->svcOptions = $svcOptions;
381  }
382 
383  $this->mUrlProtocols = $urlProtocols ?? wfUrlProtocols();
384  $this->mExtLinkBracketedRegex = '/\[(((?i)' . $this->mUrlProtocols . ')' .
385  self::EXT_LINK_ADDR .
386  self::EXT_LINK_URL_CLASS . '*)\p{Zs}*([^\]\\x00-\\x08\\x0a-\\x1F\\x{FFFD}]*?)\]/Su';
387 
388  $this->magicWordFactory = $magicWordFactory ??
389  $services->getMagicWordFactory();
390 
391  $this->contLang = $contLang ?? $services->getContentLanguage();
392 
393  $this->factory = $factory ?? $services->getParserFactory();
394  $this->specialPageFactory = $spFactory ?? $services->getSpecialPageFactory();
395  $this->linkRendererFactory = $linkRendererFactory ?? $services->getLinkRendererFactory();
396  $this->nsInfo = $nsInfo ?? $services->getNamespaceInfo();
397  $this->logger = $logger ?: new NullLogger();
398  }
399 
403  public function __destruct() {
404  if ( isset( $this->mLinkHolders ) ) {
405  unset( $this->mLinkHolders );
406  }
407  foreach ( $this as $name => $value ) {
408  unset( $this->$name );
409  }
410  }
411 
415  public function __clone() {
416  $this->mInParse = false;
417 
418  // T58226: When you create a reference "to" an object field, that
419  // makes the object field itself be a reference too (until the other
420  // reference goes out of scope). When cloning, any field that's a
421  // reference is copied as a reference in the new object. Both of these
422  // are defined PHP5 behaviors, as inconvenient as it is for us when old
423  // hooks from PHP4 days are passing fields by reference.
424  foreach ( [ 'mStripState', 'mVarCache' ] as $k ) {
425  // Make a non-reference copy of the field, then rebind the field to
426  // reference the new copy.
427  $tmp = $this->$k;
428  $this->$k =& $tmp;
429  unset( $tmp );
430  }
431 
432  Hooks::run( 'ParserCloned', [ $this ] );
433  }
434 
442  public static function getDefaultPreprocessorClass() {
444  }
445 
449  public function firstCallInit() {
450  if ( !$this->mFirstCall ) {
451  return;
452  }
453  $this->mFirstCall = false;
454 
456  CoreTagHooks::register( $this );
457  $this->initialiseVariables();
458 
459  // Avoid PHP 7.1 warning from passing $this by reference
460  $parser = $this;
461  Hooks::run( 'ParserFirstCallInit', [ &$parser ] );
462  }
463 
469  public function clearState() {
470  $this->firstCallInit();
471  $this->resetOutput();
472  $this->mAutonumber = 0;
473  $this->mIncludeCount = [];
474  $this->mLinkHolders = new LinkHolderArray( $this );
475  $this->mLinkID = 0;
476  $this->mRevisionObject = $this->mRevisionTimestamp =
477  $this->mRevisionId = $this->mRevisionUser = $this->mRevisionSize = null;
478  $this->mVarCache = [];
479  $this->mUser = null;
480  $this->mLangLinkLanguages = [];
481  $this->currentRevisionCache = null;
482 
483  $this->mStripState = new StripState( $this );
484 
485  # Clear these on every parse, T6549
486  $this->mTplRedirCache = $this->mTplDomCache = [];
487 
488  $this->mShowToc = true;
489  $this->mForceTocPosition = false;
490  $this->mIncludeSizes = [
491  'post-expand' => 0,
492  'arg' => 0,
493  ];
494  $this->mPPNodeCount = 0;
495  $this->mGeneratedPPNodeCount = 0;
496  $this->mHighestExpansionDepth = 0;
497  $this->mDefaultSort = false;
498  $this->mHeadings = [];
499  $this->mDoubleUnderscores = [];
500  $this->mExpensiveFunctionCount = 0;
501 
502  # Fix cloning
503  if ( isset( $this->mPreprocessor ) && $this->mPreprocessor->parser !== $this ) {
504  $this->mPreprocessor = null;
505  }
506 
507  $this->mProfiler = new SectionProfiler();
508 
509  // Avoid PHP 7.1 warning from passing $this by reference
510  $parser = $this;
511  Hooks::run( 'ParserClearState', [ &$parser ] );
512  }
513 
517  public function resetOutput() {
518  $this->mOutput = new ParserOutput;
519  $this->mOptions->registerWatcher( [ $this->mOutput, 'recordOption' ] );
520  }
521 
536  public function parse(
538  $linestart = true, $clearState = true, $revid = null
539  ) {
540  if ( $clearState ) {
541  // We use U+007F DELETE to construct strip markers, so we have to make
542  // sure that this character does not occur in the input text.
543  $text = strtr( $text, "\x7f", "?" );
544  $magicScopeVariable = $this->lock();
545  }
546  // Strip U+0000 NULL (T159174)
547  $text = str_replace( "\000", '', $text );
548 
549  $this->startParse( $title, $options, self::OT_HTML, $clearState );
550 
551  $this->currentRevisionCache = null;
552  $this->mInputSize = strlen( $text );
553  if ( $this->mOptions->getEnableLimitReport() ) {
554  $this->mOutput->resetParseStartTime();
555  }
556 
557  $oldRevisionId = $this->mRevisionId;
558  $oldRevisionObject = $this->mRevisionObject;
559  $oldRevisionTimestamp = $this->mRevisionTimestamp;
560  $oldRevisionUser = $this->mRevisionUser;
561  $oldRevisionSize = $this->mRevisionSize;
562  if ( $revid !== null ) {
563  $this->mRevisionId = $revid;
564  $this->mRevisionObject = null;
565  $this->mRevisionTimestamp = null;
566  $this->mRevisionUser = null;
567  $this->mRevisionSize = null;
568  }
569 
570  // Avoid PHP 7.1 warning from passing $this by reference
571  $parser = $this;
572  Hooks::run( 'ParserBeforeStrip', [ &$parser, &$text, &$this->mStripState ] );
573  # No more strip!
574  Hooks::run( 'ParserAfterStrip', [ &$parser, &$text, &$this->mStripState ] );
575  $text = $this->internalParse( $text );
576  Hooks::run( 'ParserAfterParse', [ &$parser, &$text, &$this->mStripState ] );
577 
578  $text = $this->internalParseHalfParsed( $text, true, $linestart );
579 
587  if ( !( $options->getDisableTitleConversion()
588  || isset( $this->mDoubleUnderscores['nocontentconvert'] )
589  || isset( $this->mDoubleUnderscores['notitleconvert'] )
590  || $this->mOutput->getDisplayTitle() !== false )
591  ) {
592  $convruletitle = $this->getTargetLanguage()->getConvRuleTitle();
593  if ( $convruletitle ) {
594  $this->mOutput->setTitleText( $convruletitle );
595  } else {
596  $titleText = $this->getTargetLanguage()->convertTitle( $title );
597  $this->mOutput->setTitleText( $titleText );
598  }
599  }
600 
601  # Compute runtime adaptive expiry if set
602  $this->mOutput->finalizeAdaptiveCacheExpiry();
603 
604  # Warn if too many heavyweight parser functions were used
605  if ( $this->mExpensiveFunctionCount > $this->mOptions->getExpensiveParserFunctionLimit() ) {
606  $this->limitationWarn( 'expensive-parserfunction',
607  $this->mExpensiveFunctionCount,
608  $this->mOptions->getExpensiveParserFunctionLimit()
609  );
610  }
611 
612  # Information on limits, for the benefit of users who try to skirt them
613  if ( $this->mOptions->getEnableLimitReport() ) {
614  $text .= $this->makeLimitReport();
615  }
616 
617  # Wrap non-interface parser output in a <div> so it can be targeted
618  # with CSS (T37247)
619  $class = $this->mOptions->getWrapOutputClass();
620  if ( $class !== false && !$this->mOptions->getInterfaceMessage() ) {
621  $this->mOutput->addWrapperDivClass( $class );
622  }
623 
624  $this->mOutput->setText( $text );
625 
626  $this->mRevisionId = $oldRevisionId;
627  $this->mRevisionObject = $oldRevisionObject;
628  $this->mRevisionTimestamp = $oldRevisionTimestamp;
629  $this->mRevisionUser = $oldRevisionUser;
630  $this->mRevisionSize = $oldRevisionSize;
631  $this->mInputSize = false;
632  $this->currentRevisionCache = null;
633 
634  return $this->mOutput;
635  }
636 
643  protected function makeLimitReport() {
644  $maxIncludeSize = $this->mOptions->getMaxIncludeSize();
645 
646  $cpuTime = $this->mOutput->getTimeSinceStart( 'cpu' );
647  if ( $cpuTime !== null ) {
648  $this->mOutput->setLimitReportData( 'limitreport-cputime',
649  sprintf( "%.3f", $cpuTime )
650  );
651  }
652 
653  $wallTime = $this->mOutput->getTimeSinceStart( 'wall' );
654  $this->mOutput->setLimitReportData( 'limitreport-walltime',
655  sprintf( "%.3f", $wallTime )
656  );
657 
658  $this->mOutput->setLimitReportData( 'limitreport-ppvisitednodes',
659  [ $this->mPPNodeCount, $this->mOptions->getMaxPPNodeCount() ]
660  );
661  $this->mOutput->setLimitReportData( 'limitreport-ppgeneratednodes',
662  [ $this->mGeneratedPPNodeCount, $this->mOptions->getMaxGeneratedPPNodeCount() ]
663  );
664  $this->mOutput->setLimitReportData( 'limitreport-postexpandincludesize',
665  [ $this->mIncludeSizes['post-expand'], $maxIncludeSize ]
666  );
667  $this->mOutput->setLimitReportData( 'limitreport-templateargumentsize',
668  [ $this->mIncludeSizes['arg'], $maxIncludeSize ]
669  );
670  $this->mOutput->setLimitReportData( 'limitreport-expansiondepth',
671  [ $this->mHighestExpansionDepth, $this->mOptions->getMaxPPExpandDepth() ]
672  );
673  $this->mOutput->setLimitReportData( 'limitreport-expensivefunctioncount',
674  [ $this->mExpensiveFunctionCount, $this->mOptions->getExpensiveParserFunctionLimit() ]
675  );
676 
677  foreach ( $this->mStripState->getLimitReport() as list( $key, $value ) ) {
678  $this->mOutput->setLimitReportData( $key, $value );
679  }
680 
681  Hooks::run( 'ParserLimitReportPrepare', [ $this, $this->mOutput ] );
682 
683  $limitReport = "NewPP limit report\n";
684  if ( $this->svcOptions->get( 'ShowHostnames' ) ) {
685  $limitReport .= 'Parsed by ' . wfHostname() . "\n";
686  }
687  $limitReport .= 'Cached time: ' . $this->mOutput->getCacheTime() . "\n";
688  $limitReport .= 'Cache expiry: ' . $this->mOutput->getCacheExpiry() . "\n";
689  $limitReport .= 'Dynamic content: ' .
690  ( $this->mOutput->hasDynamicContent() ? 'true' : 'false' ) .
691  "\n";
692  $limitReport .= 'Complications: [' . implode( ', ', $this->mOutput->getAllFlags() ) . "]\n";
693 
694  foreach ( $this->mOutput->getLimitReportData() as $key => $value ) {
695  if ( Hooks::run( 'ParserLimitReportFormat',
696  [ $key, &$value, &$limitReport, false, false ]
697  ) ) {
698  $keyMsg = wfMessage( $key )->inLanguage( 'en' )->useDatabase( false );
699  $valueMsg = wfMessage( [ "$key-value-text", "$key-value" ] )
700  ->inLanguage( 'en' )->useDatabase( false );
701  if ( !$valueMsg->exists() ) {
702  $valueMsg = new RawMessage( '$1' );
703  }
704  if ( !$keyMsg->isDisabled() && !$valueMsg->isDisabled() ) {
705  $valueMsg->params( $value );
706  $limitReport .= "{$keyMsg->text()}: {$valueMsg->text()}\n";
707  }
708  }
709  }
710  // Since we're not really outputting HTML, decode the entities and
711  // then re-encode the things that need hiding inside HTML comments.
712  $limitReport = htmlspecialchars_decode( $limitReport );
713 
714  // Sanitize for comment. Note '‐' in the replacement is U+2010,
715  // which looks much like the problematic '-'.
716  $limitReport = str_replace( [ '-', '&' ], [ '‐', '&amp;' ], $limitReport );
717  $text = "\n<!-- \n$limitReport-->\n";
718 
719  // Add on template profiling data in human/machine readable way
720  $dataByFunc = $this->mProfiler->getFunctionStats();
721  uasort( $dataByFunc, function ( $a, $b ) {
722  return $b['real'] <=> $a['real']; // descending order
723  } );
724  $profileReport = [];
725  foreach ( array_slice( $dataByFunc, 0, 10 ) as $item ) {
726  $profileReport[] = sprintf( "%6.2f%% %8.3f %6d %s",
727  $item['%real'], $item['real'], $item['calls'],
728  htmlspecialchars( $item['name'] ) );
729  }
730  $text .= "<!--\nTransclusion expansion time report (%,ms,calls,template)\n";
731  $text .= implode( "\n", $profileReport ) . "\n-->\n";
732 
733  $this->mOutput->setLimitReportData( 'limitreport-timingprofile', $profileReport );
734 
735  // Add other cache related metadata
736  if ( $this->svcOptions->get( 'ShowHostnames' ) ) {
737  $this->mOutput->setLimitReportData( 'cachereport-origin', wfHostname() );
738  }
739  $this->mOutput->setLimitReportData( 'cachereport-timestamp',
740  $this->mOutput->getCacheTime() );
741  $this->mOutput->setLimitReportData( 'cachereport-ttl',
742  $this->mOutput->getCacheExpiry() );
743  $this->mOutput->setLimitReportData( 'cachereport-transientcontent',
744  $this->mOutput->hasDynamicContent() );
745 
746  if ( $this->mGeneratedPPNodeCount > $this->mOptions->getMaxGeneratedPPNodeCount() / 10 ) {
747  wfDebugLog( 'generated-pp-node-count', $this->mGeneratedPPNodeCount . ' ' .
748  $this->mTitle->getPrefixedDBkey() );
749  }
750  return $text;
751  }
752 
777  public function recursiveTagParse( $text, $frame = false ) {
778  // Avoid PHP 7.1 warning from passing $this by reference
779  $parser = $this;
780  Hooks::run( 'ParserBeforeStrip', [ &$parser, &$text, &$this->mStripState ] );
781  Hooks::run( 'ParserAfterStrip', [ &$parser, &$text, &$this->mStripState ] );
782  $text = $this->internalParse( $text, false, $frame );
783  return $text;
784  }
785 
805  public function recursiveTagParseFully( $text, $frame = false ) {
806  $text = $this->recursiveTagParse( $text, $frame );
807  $text = $this->internalParseHalfParsed( $text, false );
808  return $text;
809  }
810 
822  public function preprocess( $text, Title $title = null,
823  ParserOptions $options, $revid = null, $frame = false
824  ) {
825  $magicScopeVariable = $this->lock();
826  $this->startParse( $title, $options, self::OT_PREPROCESS, true );
827  if ( $revid !== null ) {
828  $this->mRevisionId = $revid;
829  }
830  // Avoid PHP 7.1 warning from passing $this by reference
831  $parser = $this;
832  Hooks::run( 'ParserBeforeStrip', [ &$parser, &$text, &$this->mStripState ] );
833  Hooks::run( 'ParserAfterStrip', [ &$parser, &$text, &$this->mStripState ] );
834  $text = $this->replaceVariables( $text, $frame );
835  $text = $this->mStripState->unstripBoth( $text );
836  return $text;
837  }
838 
848  public function recursivePreprocess( $text, $frame = false ) {
849  $text = $this->replaceVariables( $text, $frame );
850  $text = $this->mStripState->unstripBoth( $text );
851  return $text;
852  }
853 
867  public function getPreloadText( $text, Title $title, ParserOptions $options, $params = [] ) {
868  $msg = new RawMessage( $text );
869  $text = $msg->params( $params )->plain();
870 
871  # Parser (re)initialisation
872  $magicScopeVariable = $this->lock();
873  $this->startParse( $title, $options, self::OT_PLAIN, true );
874 
876  $dom = $this->preprocessToDom( $text, self::PTD_FOR_INCLUSION );
877  $text = $this->getPreprocessor()->newFrame()->expand( $dom, $flags );
878  $text = $this->mStripState->unstripBoth( $text );
879  return $text;
880  }
881 
888  public function setUser( $user ) {
889  $this->mUser = $user;
890  }
891 
897  public function setTitle( $t ) {
898  if ( !$t ) {
899  $t = Title::newFromText( 'NO TITLE' );
900  }
901 
902  if ( $t->hasFragment() ) {
903  # Strip the fragment to avoid various odd effects
904  $this->mTitle = $t->createFragmentTarget( '' );
905  } else {
906  $this->mTitle = $t;
907  }
908  }
909 
915  public function getTitle() {
916  return $this->mTitle;
917  }
918 
925  public function Title( $x = null ) {
926  return wfSetVar( $this->mTitle, $x );
927  }
928 
934  public function setOutputType( $ot ) {
935  $this->mOutputType = $ot;
936  # Shortcut alias
937  $this->ot = [
938  'html' => $ot == self::OT_HTML,
939  'wiki' => $ot == self::OT_WIKI,
940  'pre' => $ot == self::OT_PREPROCESS,
941  'plain' => $ot == self::OT_PLAIN,
942  ];
943  }
944 
951  public function OutputType( $x = null ) {
952  return wfSetVar( $this->mOutputType, $x );
953  }
954 
960  public function getOutput() {
961  return $this->mOutput;
962  }
963 
969  public function getOptions() {
970  return $this->mOptions;
971  }
972 
979  public function Options( $x = null ) {
980  return wfSetVar( $this->mOptions, $x );
981  }
982 
986  public function nextLinkID() {
987  return $this->mLinkID++;
988  }
989 
993  public function setLinkID( $id ) {
994  $this->mLinkID = $id;
995  }
996 
1001  public function getFunctionLang() {
1002  return $this->getTargetLanguage();
1003  }
1004 
1014  public function getTargetLanguage() {
1015  $target = $this->mOptions->getTargetLanguage();
1016 
1017  if ( $target !== null ) {
1018  return $target;
1019  } elseif ( $this->mOptions->getInterfaceMessage() ) {
1020  return $this->mOptions->getUserLangObj();
1021  } elseif ( is_null( $this->mTitle ) ) {
1022  throw new MWException( __METHOD__ . ': $this->mTitle is null' );
1023  }
1024 
1025  return $this->mTitle->getPageLanguage();
1026  }
1027 
1033  public function getConverterLanguage() {
1034  return $this->getTargetLanguage();
1035  }
1036 
1043  public function getUser() {
1044  if ( !is_null( $this->mUser ) ) {
1045  return $this->mUser;
1046  }
1047  return $this->mOptions->getUser();
1048  }
1049 
1055  public function getPreprocessor() {
1056  if ( !isset( $this->mPreprocessor ) ) {
1057  $class = $this->svcOptions->get( 'preprocessorClass' );
1058  $this->mPreprocessor = new $class( $this );
1059  }
1060  return $this->mPreprocessor;
1061  }
1062 
1069  public function getLinkRenderer() {
1070  // XXX We make the LinkRenderer with current options and then cache it forever
1071  if ( !$this->mLinkRenderer ) {
1072  $this->mLinkRenderer = $this->linkRendererFactory->create();
1073  $this->mLinkRenderer->setStubThreshold(
1074  $this->getOptions()->getStubThreshold()
1075  );
1076  }
1077 
1078  return $this->mLinkRenderer;
1079  }
1080 
1087  public function getMagicWordFactory() {
1088  return $this->magicWordFactory;
1089  }
1090 
1097  public function getContentLanguage() {
1098  return $this->contLang;
1099  }
1100 
1120  public static function extractTagsAndParams( $elements, $text, &$matches ) {
1121  static $n = 1;
1122  $stripped = '';
1123  $matches = [];
1124 
1125  $taglist = implode( '|', $elements );
1126  $start = "/<($taglist)(\\s+[^>]*?|\\s*?)(\/?>)|<(!--)/i";
1127 
1128  while ( $text != '' ) {
1129  $p = preg_split( $start, $text, 2, PREG_SPLIT_DELIM_CAPTURE );
1130  $stripped .= $p[0];
1131  if ( count( $p ) < 5 ) {
1132  break;
1133  }
1134  if ( count( $p ) > 5 ) {
1135  # comment
1136  $element = $p[4];
1137  $attributes = '';
1138  $close = '';
1139  $inside = $p[5];
1140  } else {
1141  # tag
1142  list( , $element, $attributes, $close, $inside ) = $p;
1143  }
1144 
1145  $marker = self::MARKER_PREFIX . "-$element-" . sprintf( '%08X', $n++ ) . self::MARKER_SUFFIX;
1146  $stripped .= $marker;
1147 
1148  if ( $close === '/>' ) {
1149  # Empty element tag, <tag />
1150  $content = null;
1151  $text = $inside;
1152  $tail = null;
1153  } else {
1154  if ( $element === '!--' ) {
1155  $end = '/(-->)/';
1156  } else {
1157  $end = "/(<\\/$element\\s*>)/i";
1158  }
1159  $q = preg_split( $end, $inside, 2, PREG_SPLIT_DELIM_CAPTURE );
1160  $content = $q[0];
1161  if ( count( $q ) < 3 ) {
1162  # No end tag -- let it run out to the end of the text.
1163  $tail = '';
1164  $text = '';
1165  } else {
1166  list( , $tail, $text ) = $q;
1167  }
1168  }
1169 
1170  $matches[$marker] = [ $element,
1171  $content,
1172  Sanitizer::decodeTagAttributes( $attributes ),
1173  "<$element$attributes$close$content$tail" ];
1174  }
1175  return $stripped;
1176  }
1177 
1183  public function getStripList() {
1184  return $this->mStripList;
1185  }
1186 
1192  public function getStripState() {
1193  return $this->mStripState;
1194  }
1195 
1205  public function insertStripItem( $text ) {
1206  $marker = self::MARKER_PREFIX . "-item-{$this->mMarkerIndex}-" . self::MARKER_SUFFIX;
1207  $this->mMarkerIndex++;
1208  $this->mStripState->addGeneral( $marker, $text );
1209  return $marker;
1210  }
1211 
1219  public function doTableStuff( $text ) {
1220  $lines = StringUtils::explode( "\n", $text );
1221  $out = '';
1222  $td_history = []; # Is currently a td tag open?
1223  $last_tag_history = []; # Save history of last lag activated (td, th or caption)
1224  $tr_history = []; # Is currently a tr tag open?
1225  $tr_attributes = []; # history of tr attributes
1226  $has_opened_tr = []; # Did this table open a <tr> element?
1227  $indent_level = 0; # indent level of the table
1228 
1229  foreach ( $lines as $outLine ) {
1230  $line = trim( $outLine );
1231 
1232  if ( $line === '' ) { # empty line, go to next line
1233  $out .= $outLine . "\n";
1234  continue;
1235  }
1236 
1237  $first_character = $line[0];
1238  $first_two = substr( $line, 0, 2 );
1239  $matches = [];
1240 
1241  if ( preg_match( '/^(:*)\s*\{\|(.*)$/', $line, $matches ) ) {
1242  # First check if we are starting a new table
1243  $indent_level = strlen( $matches[1] );
1244 
1245  $attributes = $this->mStripState->unstripBoth( $matches[2] );
1246  $attributes = Sanitizer::fixTagAttributes( $attributes, 'table' );
1247 
1248  $outLine = str_repeat( '<dl><dd>', $indent_level ) . "<table{$attributes}>";
1249  array_push( $td_history, false );
1250  array_push( $last_tag_history, '' );
1251  array_push( $tr_history, false );
1252  array_push( $tr_attributes, '' );
1253  array_push( $has_opened_tr, false );
1254  } elseif ( count( $td_history ) == 0 ) {
1255  # Don't do any of the following
1256  $out .= $outLine . "\n";
1257  continue;
1258  } elseif ( $first_two === '|}' ) {
1259  # We are ending a table
1260  $line = '</table>' . substr( $line, 2 );
1261  $last_tag = array_pop( $last_tag_history );
1262 
1263  if ( !array_pop( $has_opened_tr ) ) {
1264  $line = "<tr><td></td></tr>{$line}";
1265  }
1266 
1267  if ( array_pop( $tr_history ) ) {
1268  $line = "</tr>{$line}";
1269  }
1270 
1271  if ( array_pop( $td_history ) ) {
1272  $line = "</{$last_tag}>{$line}";
1273  }
1274  array_pop( $tr_attributes );
1275  if ( $indent_level > 0 ) {
1276  $outLine = rtrim( $line ) . str_repeat( '</dd></dl>', $indent_level );
1277  } else {
1278  $outLine = $line;
1279  }
1280  } elseif ( $first_two === '|-' ) {
1281  # Now we have a table row
1282  $line = preg_replace( '#^\|-+#', '', $line );
1283 
1284  # Whats after the tag is now only attributes
1285  $attributes = $this->mStripState->unstripBoth( $line );
1286  $attributes = Sanitizer::fixTagAttributes( $attributes, 'tr' );
1287  array_pop( $tr_attributes );
1288  array_push( $tr_attributes, $attributes );
1289 
1290  $line = '';
1291  $last_tag = array_pop( $last_tag_history );
1292  array_pop( $has_opened_tr );
1293  array_push( $has_opened_tr, true );
1294 
1295  if ( array_pop( $tr_history ) ) {
1296  $line = '</tr>';
1297  }
1298 
1299  if ( array_pop( $td_history ) ) {
1300  $line = "</{$last_tag}>{$line}";
1301  }
1302 
1303  $outLine = $line;
1304  array_push( $tr_history, false );
1305  array_push( $td_history, false );
1306  array_push( $last_tag_history, '' );
1307  } elseif ( $first_character === '|'
1308  || $first_character === '!'
1309  || $first_two === '|+'
1310  ) {
1311  # This might be cell elements, td, th or captions
1312  if ( $first_two === '|+' ) {
1313  $first_character = '+';
1314  $line = substr( $line, 2 );
1315  } else {
1316  $line = substr( $line, 1 );
1317  }
1318 
1319  // Implies both are valid for table headings.
1320  if ( $first_character === '!' ) {
1321  $line = StringUtils::replaceMarkup( '!!', '||', $line );
1322  }
1323 
1324  # Split up multiple cells on the same line.
1325  # FIXME : This can result in improper nesting of tags processed
1326  # by earlier parser steps.
1327  $cells = explode( '||', $line );
1328 
1329  $outLine = '';
1330 
1331  # Loop through each table cell
1332  foreach ( $cells as $cell ) {
1333  $previous = '';
1334  if ( $first_character !== '+' ) {
1335  $tr_after = array_pop( $tr_attributes );
1336  if ( !array_pop( $tr_history ) ) {
1337  $previous = "<tr{$tr_after}>\n";
1338  }
1339  array_push( $tr_history, true );
1340  array_push( $tr_attributes, '' );
1341  array_pop( $has_opened_tr );
1342  array_push( $has_opened_tr, true );
1343  }
1344 
1345  $last_tag = array_pop( $last_tag_history );
1346 
1347  if ( array_pop( $td_history ) ) {
1348  $previous = "</{$last_tag}>\n{$previous}";
1349  }
1350 
1351  if ( $first_character === '|' ) {
1352  $last_tag = 'td';
1353  } elseif ( $first_character === '!' ) {
1354  $last_tag = 'th';
1355  } elseif ( $first_character === '+' ) {
1356  $last_tag = 'caption';
1357  } else {
1358  $last_tag = '';
1359  }
1360 
1361  array_push( $last_tag_history, $last_tag );
1362 
1363  # A cell could contain both parameters and data
1364  $cell_data = explode( '|', $cell, 2 );
1365 
1366  # T2553: Note that a '|' inside an invalid link should not
1367  # be mistaken as delimiting cell parameters
1368  # Bug T153140: Neither should language converter markup.
1369  if ( preg_match( '/\[\[|-\{/', $cell_data[0] ) === 1 ) {
1370  $cell = "{$previous}<{$last_tag}>" . trim( $cell );
1371  } elseif ( count( $cell_data ) == 1 ) {
1372  // Whitespace in cells is trimmed
1373  $cell = "{$previous}<{$last_tag}>" . trim( $cell_data[0] );
1374  } else {
1375  $attributes = $this->mStripState->unstripBoth( $cell_data[0] );
1376  $attributes = Sanitizer::fixTagAttributes( $attributes, $last_tag );
1377  // Whitespace in cells is trimmed
1378  $cell = "{$previous}<{$last_tag}{$attributes}>" . trim( $cell_data[1] );
1379  }
1380 
1381  $outLine .= $cell;
1382  array_push( $td_history, true );
1383  }
1384  }
1385  $out .= $outLine . "\n";
1386  }
1387 
1388  # Closing open td, tr && table
1389  while ( count( $td_history ) > 0 ) {
1390  if ( array_pop( $td_history ) ) {
1391  $out .= "</td>\n";
1392  }
1393  if ( array_pop( $tr_history ) ) {
1394  $out .= "</tr>\n";
1395  }
1396  if ( !array_pop( $has_opened_tr ) ) {
1397  $out .= "<tr><td></td></tr>\n";
1398  }
1399 
1400  $out .= "</table>\n";
1401  }
1402 
1403  # Remove trailing line-ending (b/c)
1404  if ( substr( $out, -1 ) === "\n" ) {
1405  $out = substr( $out, 0, -1 );
1406  }
1407 
1408  # special case: don't return empty table
1409  if ( $out === "<table>\n<tr><td></td></tr>\n</table>" ) {
1410  $out = '';
1411  }
1412 
1413  return $out;
1414  }
1415 
1429  public function internalParse( $text, $isMain = true, $frame = false ) {
1430  $origText = $text;
1431 
1432  // Avoid PHP 7.1 warning from passing $this by reference
1433  $parser = $this;
1434 
1435  # Hook to suspend the parser in this state
1436  if ( !Hooks::run( 'ParserBeforeInternalParse', [ &$parser, &$text, &$this->mStripState ] ) ) {
1437  return $text;
1438  }
1439 
1440  # if $frame is provided, then use $frame for replacing any variables
1441  if ( $frame ) {
1442  # use frame depth to infer how include/noinclude tags should be handled
1443  # depth=0 means this is the top-level document; otherwise it's an included document
1444  if ( !$frame->depth ) {
1445  $flag = 0;
1446  } else {
1447  $flag = self::PTD_FOR_INCLUSION;
1448  }
1449  $dom = $this->preprocessToDom( $text, $flag );
1450  $text = $frame->expand( $dom );
1451  } else {
1452  # if $frame is not provided, then use old-style replaceVariables
1453  $text = $this->replaceVariables( $text );
1454  }
1455 
1456  Hooks::run( 'InternalParseBeforeSanitize', [ &$parser, &$text, &$this->mStripState ] );
1457  $text = Sanitizer::removeHTMLtags(
1458  $text,
1459  [ $this, 'attributeStripCallback' ],
1460  false,
1461  array_keys( $this->mTransparentTagHooks ),
1462  [],
1463  [ $this, 'addTrackingCategory' ]
1464  );
1465  Hooks::run( 'InternalParseBeforeLinks', [ &$parser, &$text, &$this->mStripState ] );
1466 
1467  # Tables need to come after variable replacement for things to work
1468  # properly; putting them before other transformations should keep
1469  # exciting things like link expansions from showing up in surprising
1470  # places.
1471  $text = $this->doTableStuff( $text );
1472 
1473  $text = preg_replace( '/(^|\n)-----*/', '\\1<hr />', $text );
1474 
1475  $text = $this->doDoubleUnderscore( $text );
1476 
1477  $text = $this->doHeadings( $text );
1478  $text = $this->replaceInternalLinks( $text );
1479  $text = $this->doAllQuotes( $text );
1480  $text = $this->replaceExternalLinks( $text );
1481 
1482  # replaceInternalLinks may sometimes leave behind
1483  # absolute URLs, which have to be masked to hide them from replaceExternalLinks
1484  $text = str_replace( self::MARKER_PREFIX . 'NOPARSE', '', $text );
1485 
1486  $text = $this->doMagicLinks( $text );
1487  $text = $this->formatHeadings( $text, $origText, $isMain );
1488 
1489  return $text;
1490  }
1491 
1501  private function internalParseHalfParsed( $text, $isMain = true, $linestart = true ) {
1502  $text = $this->mStripState->unstripGeneral( $text );
1503 
1504  // Avoid PHP 7.1 warning from passing $this by reference
1505  $parser = $this;
1506 
1507  if ( $isMain ) {
1508  Hooks::run( 'ParserAfterUnstrip', [ &$parser, &$text ] );
1509  }
1510 
1511  # Clean up special characters, only run once, next-to-last before doBlockLevels
1512  $text = Sanitizer::armorFrenchSpaces( $text );
1513 
1514  $text = $this->doBlockLevels( $text, $linestart );
1515 
1516  $this->replaceLinkHolders( $text );
1517 
1525  if ( !( $this->mOptions->getDisableContentConversion()
1526  || isset( $this->mDoubleUnderscores['nocontentconvert'] ) )
1527  && !$this->mOptions->getInterfaceMessage()
1528  ) {
1529  # The position of the convert() call should not be changed. it
1530  # assumes that the links are all replaced and the only thing left
1531  # is the <nowiki> mark.
1532  $text = $this->getTargetLanguage()->convert( $text );
1533  }
1534 
1535  $text = $this->mStripState->unstripNoWiki( $text );
1536 
1537  if ( $isMain ) {
1538  Hooks::run( 'ParserBeforeTidy', [ &$parser, &$text ] );
1539  }
1540 
1541  $text = $this->replaceTransparentTags( $text );
1542  $text = $this->mStripState->unstripGeneral( $text );
1543 
1544  $text = Sanitizer::normalizeCharReferences( $text );
1545 
1546  if ( MWTidy::isEnabled() ) {
1547  if ( $this->mOptions->getTidy() ) {
1548  $text = MWTidy::tidy( $text );
1549  }
1550  } else {
1551  # attempt to sanitize at least some nesting problems
1552  # (T4702 and quite a few others)
1553  # This code path is buggy and deprecated!
1554  wfDeprecated( 'disabling tidy', '1.33' );
1555  $tidyregs = [
1556  # ''Something [http://www.cool.com cool''] -->
1557  # <i>Something</i><a href="http://www.cool.com"..><i>cool></i></a>
1558  '/(<([bi])>)(<([bi])>)?([^<]*)(<\/?a[^<]*>)([^<]*)(<\/\\4>)?(<\/\\2>)/' =>
1559  '\\1\\3\\5\\8\\9\\6\\1\\3\\7\\8\\9',
1560  # fix up an anchor inside another anchor, only
1561  # at least for a single single nested link (T5695)
1562  '/(<a[^>]+>)([^<]*)(<a[^>]+>[^<]*)<\/a>(.*)<\/a>/' =>
1563  '\\1\\2</a>\\3</a>\\1\\4</a>',
1564  # fix div inside inline elements- doBlockLevels won't wrap a line which
1565  # contains a div, so fix it up here; replace
1566  # div with escaped text
1567  '/(<([aib]) [^>]+>)([^<]*)(<div([^>]*)>)(.*)(<\/div>)([^<]*)(<\/\\2>)/' =>
1568  '\\1\\3&lt;div\\5&gt;\\6&lt;/div&gt;\\8\\9',
1569  # remove empty italic or bold tag pairs, some
1570  # introduced by rules above
1571  '/<([bi])><\/\\1>/' => '',
1572  ];
1573 
1574  $text = preg_replace(
1575  array_keys( $tidyregs ),
1576  array_values( $tidyregs ),
1577  $text );
1578  }
1579 
1580  if ( $isMain ) {
1581  Hooks::run( 'ParserAfterTidy', [ &$parser, &$text ] );
1582  }
1583 
1584  return $text;
1585  }
1586 
1598  public function doMagicLinks( $text ) {
1599  $prots = wfUrlProtocolsWithoutProtRel();
1600  $urlChar = self::EXT_LINK_URL_CLASS;
1601  $addr = self::EXT_LINK_ADDR;
1602  $space = self::SPACE_NOT_NL; # non-newline space
1603  $spdash = "(?:-|$space)"; # a dash or a non-newline space
1604  $spaces = "$space++"; # possessive match of 1 or more spaces
1605  $text = preg_replace_callback(
1606  '!(?: # Start cases
1607  (<a[ \t\r\n>].*?</a>) | # m[1]: Skip link text
1608  (<.*?>) | # m[2]: Skip stuff inside HTML elements' . "
1609  (\b # m[3]: Free external links
1610  (?i:$prots)
1611  ($addr$urlChar*) # m[4]: Post-protocol path
1612  ) |
1613  \b(?:RFC|PMID) $spaces # m[5]: RFC or PMID, capture number
1614  ([0-9]+)\b |
1615  \bISBN $spaces ( # m[6]: ISBN, capture number
1616  (?: 97[89] $spdash? )? # optional 13-digit ISBN prefix
1617  (?: [0-9] $spdash? ){9} # 9 digits with opt. delimiters
1618  [0-9Xx] # check digit
1619  )\b
1620  )!xu", [ $this, 'magicLinkCallback' ], $text );
1621  return $text;
1622  }
1623 
1629  public function magicLinkCallback( $m ) {
1630  if ( isset( $m[1] ) && $m[1] !== '' ) {
1631  # Skip anchor
1632  return $m[0];
1633  } elseif ( isset( $m[2] ) && $m[2] !== '' ) {
1634  # Skip HTML element
1635  return $m[0];
1636  } elseif ( isset( $m[3] ) && $m[3] !== '' ) {
1637  # Free external link
1638  return $this->makeFreeExternalLink( $m[0], strlen( $m[4] ) );
1639  } elseif ( isset( $m[5] ) && $m[5] !== '' ) {
1640  # RFC or PMID
1641  if ( substr( $m[0], 0, 3 ) === 'RFC' ) {
1642  if ( !$this->mOptions->getMagicRFCLinks() ) {
1643  return $m[0];
1644  }
1645  $keyword = 'RFC';
1646  $urlmsg = 'rfcurl';
1647  $cssClass = 'mw-magiclink-rfc';
1648  $trackingCat = 'magiclink-tracking-rfc';
1649  $id = $m[5];
1650  } elseif ( substr( $m[0], 0, 4 ) === 'PMID' ) {
1651  if ( !$this->mOptions->getMagicPMIDLinks() ) {
1652  return $m[0];
1653  }
1654  $keyword = 'PMID';
1655  $urlmsg = 'pubmedurl';
1656  $cssClass = 'mw-magiclink-pmid';
1657  $trackingCat = 'magiclink-tracking-pmid';
1658  $id = $m[5];
1659  } else {
1660  throw new MWException( __METHOD__ . ': unrecognised match type "' .
1661  substr( $m[0], 0, 20 ) . '"' );
1662  }
1663  $url = wfMessage( $urlmsg, $id )->inContentLanguage()->text();
1664  $this->addTrackingCategory( $trackingCat );
1665  return Linker::makeExternalLink( $url, "{$keyword} {$id}", true, $cssClass, [], $this->mTitle );
1666  } elseif ( isset( $m[6] ) && $m[6] !== ''
1667  && $this->mOptions->getMagicISBNLinks()
1668  ) {
1669  # ISBN
1670  $isbn = $m[6];
1671  $space = self::SPACE_NOT_NL; # non-newline space
1672  $isbn = preg_replace( "/$space/", ' ', $isbn );
1673  $num = strtr( $isbn, [
1674  '-' => '',
1675  ' ' => '',
1676  'x' => 'X',
1677  ] );
1678  $this->addTrackingCategory( 'magiclink-tracking-isbn' );
1679  return $this->getLinkRenderer()->makeKnownLink(
1680  SpecialPage::getTitleFor( 'Booksources', $num ),
1681  "ISBN $isbn",
1682  [
1683  'class' => 'internal mw-magiclink-isbn',
1684  'title' => false // suppress title attribute
1685  ]
1686  );
1687  } else {
1688  return $m[0];
1689  }
1690  }
1691 
1701  public function makeFreeExternalLink( $url, $numPostProto ) {
1702  $trail = '';
1703 
1704  # The characters '<' and '>' (which were escaped by
1705  # removeHTMLtags()) should not be included in
1706  # URLs, per RFC 2396.
1707  # Make &nbsp; terminate a URL as well (bug T84937)
1708  $m2 = [];
1709  if ( preg_match(
1710  '/&(lt|gt|nbsp|#x0*(3[CcEe]|[Aa]0)|#0*(60|62|160));/',
1711  $url,
1712  $m2,
1713  PREG_OFFSET_CAPTURE
1714  ) ) {
1715  $trail = substr( $url, $m2[0][1] ) . $trail;
1716  $url = substr( $url, 0, $m2[0][1] );
1717  }
1718 
1719  # Move trailing punctuation to $trail
1720  $sep = ',;\.:!?';
1721  # If there is no left bracket, then consider right brackets fair game too
1722  if ( strpos( $url, '(' ) === false ) {
1723  $sep .= ')';
1724  }
1725 
1726  $urlRev = strrev( $url );
1727  $numSepChars = strspn( $urlRev, $sep );
1728  # Don't break a trailing HTML entity by moving the ; into $trail
1729  # This is in hot code, so use substr_compare to avoid having to
1730  # create a new string object for the comparison
1731  if ( $numSepChars && substr_compare( $url, ";", -$numSepChars, 1 ) === 0 ) {
1732  # more optimization: instead of running preg_match with a $
1733  # anchor, which can be slow, do the match on the reversed
1734  # string starting at the desired offset.
1735  # un-reversed regexp is: /&([a-z]+|#x[\da-f]+|#\d+)$/i
1736  if ( preg_match( '/\G([a-z]+|[\da-f]+x#|\d+#)&/i', $urlRev, $m2, 0, $numSepChars ) ) {
1737  $numSepChars--;
1738  }
1739  }
1740  if ( $numSepChars ) {
1741  $trail = substr( $url, -$numSepChars ) . $trail;
1742  $url = substr( $url, 0, -$numSepChars );
1743  }
1744 
1745  # Verify that we still have a real URL after trail removal, and
1746  # not just lone protocol
1747  if ( strlen( $trail ) >= $numPostProto ) {
1748  return $url . $trail;
1749  }
1750 
1751  $url = Sanitizer::cleanUrl( $url );
1752 
1753  # Is this an external image?
1754  $text = $this->maybeMakeExternalImage( $url );
1755  if ( $text === false ) {
1756  # Not an image, make a link
1757  $text = Linker::makeExternalLink( $url,
1758  $this->getTargetLanguage()->getConverter()->markNoConversion( $url ),
1759  true, 'free',
1760  $this->getExternalLinkAttribs( $url ), $this->mTitle );
1761  # Register it in the output object...
1762  $this->mOutput->addExternalLink( $url );
1763  }
1764  return $text . $trail;
1765  }
1766 
1776  public function doHeadings( $text ) {
1777  for ( $i = 6; $i >= 1; --$i ) {
1778  $h = str_repeat( '=', $i );
1779  // Trim non-newline whitespace from headings
1780  // Using \s* will break for: "==\n===\n" and parse as <h2>=</h2>
1781  $text = preg_replace( "/^(?:$h)[ \\t]*(.+?)[ \\t]*(?:$h)\\s*$/m", "<h$i>\\1</h$i>", $text );
1782  }
1783  return $text;
1784  }
1785 
1794  public function doAllQuotes( $text ) {
1795  $outtext = '';
1796  $lines = StringUtils::explode( "\n", $text );
1797  foreach ( $lines as $line ) {
1798  $outtext .= $this->doQuotes( $line ) . "\n";
1799  }
1800  $outtext = substr( $outtext, 0, -1 );
1801  return $outtext;
1802  }
1803 
1811  public function doQuotes( $text ) {
1812  $arr = preg_split( "/(''+)/", $text, -1, PREG_SPLIT_DELIM_CAPTURE );
1813  $countarr = count( $arr );
1814  if ( $countarr == 1 ) {
1815  return $text;
1816  }
1817 
1818  // First, do some preliminary work. This may shift some apostrophes from
1819  // being mark-up to being text. It also counts the number of occurrences
1820  // of bold and italics mark-ups.
1821  $numbold = 0;
1822  $numitalics = 0;
1823  for ( $i = 1; $i < $countarr; $i += 2 ) {
1824  $thislen = strlen( $arr[$i] );
1825  // If there are ever four apostrophes, assume the first is supposed to
1826  // be text, and the remaining three constitute mark-up for bold text.
1827  // (T15227: ''''foo'''' turns into ' ''' foo ' ''')
1828  if ( $thislen == 4 ) {
1829  $arr[$i - 1] .= "'";
1830  $arr[$i] = "'''";
1831  $thislen = 3;
1832  } elseif ( $thislen > 5 ) {
1833  // If there are more than 5 apostrophes in a row, assume they're all
1834  // text except for the last 5.
1835  // (T15227: ''''''foo'''''' turns into ' ''''' foo ' ''''')
1836  $arr[$i - 1] .= str_repeat( "'", $thislen - 5 );
1837  $arr[$i] = "'''''";
1838  $thislen = 5;
1839  }
1840  // Count the number of occurrences of bold and italics mark-ups.
1841  if ( $thislen == 2 ) {
1842  $numitalics++;
1843  } elseif ( $thislen == 3 ) {
1844  $numbold++;
1845  } elseif ( $thislen == 5 ) {
1846  $numitalics++;
1847  $numbold++;
1848  }
1849  }
1850 
1851  // If there is an odd number of both bold and italics, it is likely
1852  // that one of the bold ones was meant to be an apostrophe followed
1853  // by italics. Which one we cannot know for certain, but it is more
1854  // likely to be one that has a single-letter word before it.
1855  if ( ( $numbold % 2 == 1 ) && ( $numitalics % 2 == 1 ) ) {
1856  $firstsingleletterword = -1;
1857  $firstmultiletterword = -1;
1858  $firstspace = -1;
1859  for ( $i = 1; $i < $countarr; $i += 2 ) {
1860  if ( strlen( $arr[$i] ) == 3 ) {
1861  $x1 = substr( $arr[$i - 1], -1 );
1862  $x2 = substr( $arr[$i - 1], -2, 1 );
1863  if ( $x1 === ' ' ) {
1864  if ( $firstspace == -1 ) {
1865  $firstspace = $i;
1866  }
1867  } elseif ( $x2 === ' ' ) {
1868  $firstsingleletterword = $i;
1869  // if $firstsingleletterword is set, we don't
1870  // look at the other options, so we can bail early.
1871  break;
1872  } elseif ( $firstmultiletterword == -1 ) {
1873  $firstmultiletterword = $i;
1874  }
1875  }
1876  }
1877 
1878  // If there is a single-letter word, use it!
1879  if ( $firstsingleletterword > -1 ) {
1880  $arr[$firstsingleletterword] = "''";
1881  $arr[$firstsingleletterword - 1] .= "'";
1882  } elseif ( $firstmultiletterword > -1 ) {
1883  // If not, but there's a multi-letter word, use that one.
1884  $arr[$firstmultiletterword] = "''";
1885  $arr[$firstmultiletterword - 1] .= "'";
1886  } elseif ( $firstspace > -1 ) {
1887  // ... otherwise use the first one that has neither.
1888  // (notice that it is possible for all three to be -1 if, for example,
1889  // there is only one pentuple-apostrophe in the line)
1890  $arr[$firstspace] = "''";
1891  $arr[$firstspace - 1] .= "'";
1892  }
1893  }
1894 
1895  // Now let's actually convert our apostrophic mush to HTML!
1896  $output = '';
1897  $buffer = '';
1898  $state = '';
1899  $i = 0;
1900  foreach ( $arr as $r ) {
1901  if ( ( $i % 2 ) == 0 ) {
1902  if ( $state === 'both' ) {
1903  $buffer .= $r;
1904  } else {
1905  $output .= $r;
1906  }
1907  } else {
1908  $thislen = strlen( $r );
1909  if ( $thislen == 2 ) {
1910  if ( $state === 'i' ) {
1911  $output .= '</i>';
1912  $state = '';
1913  } elseif ( $state === 'bi' ) {
1914  $output .= '</i>';
1915  $state = 'b';
1916  } elseif ( $state === 'ib' ) {
1917  $output .= '</b></i><b>';
1918  $state = 'b';
1919  } elseif ( $state === 'both' ) {
1920  $output .= '<b><i>' . $buffer . '</i>';
1921  $state = 'b';
1922  } else { // $state can be 'b' or ''
1923  $output .= '<i>';
1924  $state .= 'i';
1925  }
1926  } elseif ( $thislen == 3 ) {
1927  if ( $state === 'b' ) {
1928  $output .= '</b>';
1929  $state = '';
1930  } elseif ( $state === 'bi' ) {
1931  $output .= '</i></b><i>';
1932  $state = 'i';
1933  } elseif ( $state === 'ib' ) {
1934  $output .= '</b>';
1935  $state = 'i';
1936  } elseif ( $state === 'both' ) {
1937  $output .= '<i><b>' . $buffer . '</b>';
1938  $state = 'i';
1939  } else { // $state can be 'i' or ''
1940  $output .= '<b>';
1941  $state .= 'b';
1942  }
1943  } elseif ( $thislen == 5 ) {
1944  if ( $state === 'b' ) {
1945  $output .= '</b><i>';
1946  $state = 'i';
1947  } elseif ( $state === 'i' ) {
1948  $output .= '</i><b>';
1949  $state = 'b';
1950  } elseif ( $state === 'bi' ) {
1951  $output .= '</i></b>';
1952  $state = '';
1953  } elseif ( $state === 'ib' ) {
1954  $output .= '</b></i>';
1955  $state = '';
1956  } elseif ( $state === 'both' ) {
1957  $output .= '<i><b>' . $buffer . '</b></i>';
1958  $state = '';
1959  } else { // ($state == '')
1960  $buffer = '';
1961  $state = 'both';
1962  }
1963  }
1964  }
1965  $i++;
1966  }
1967  // Now close all remaining tags. Notice that the order is important.
1968  if ( $state === 'b' || $state === 'ib' ) {
1969  $output .= '</b>';
1970  }
1971  if ( $state === 'i' || $state === 'bi' || $state === 'ib' ) {
1972  $output .= '</i>';
1973  }
1974  if ( $state === 'bi' ) {
1975  $output .= '</b>';
1976  }
1977  // There might be lonely ''''', so make sure we have a buffer
1978  if ( $state === 'both' && $buffer ) {
1979  $output .= '<b><i>' . $buffer . '</i></b>';
1980  }
1981  return $output;
1982  }
1983 
1997  public function replaceExternalLinks( $text ) {
1998  $bits = preg_split( $this->mExtLinkBracketedRegex, $text, -1, PREG_SPLIT_DELIM_CAPTURE );
1999  if ( $bits === false ) {
2000  throw new MWException( "PCRE needs to be compiled with "
2001  . "--enable-unicode-properties in order for MediaWiki to function" );
2002  }
2003  $s = array_shift( $bits );
2004 
2005  $i = 0;
2006  while ( $i < count( $bits ) ) {
2007  $url = $bits[$i++];
2008  $i++; // protocol
2009  $text = $bits[$i++];
2010  $trail = $bits[$i++];
2011 
2012  # The characters '<' and '>' (which were escaped by
2013  # removeHTMLtags()) should not be included in
2014  # URLs, per RFC 2396.
2015  $m2 = [];
2016  if ( preg_match( '/&(lt|gt);/', $url, $m2, PREG_OFFSET_CAPTURE ) ) {
2017  $text = substr( $url, $m2[0][1] ) . ' ' . $text;
2018  $url = substr( $url, 0, $m2[0][1] );
2019  }
2020 
2021  # If the link text is an image URL, replace it with an <img> tag
2022  # This happened by accident in the original parser, but some people used it extensively
2023  $img = $this->maybeMakeExternalImage( $text );
2024  if ( $img !== false ) {
2025  $text = $img;
2026  }
2027 
2028  $dtrail = '';
2029 
2030  # Set linktype for CSS
2031  $linktype = 'text';
2032 
2033  # No link text, e.g. [http://domain.tld/some.link]
2034  if ( $text == '' ) {
2035  # Autonumber
2036  $langObj = $this->getTargetLanguage();
2037  $text = '[' . $langObj->formatNum( ++$this->mAutonumber ) . ']';
2038  $linktype = 'autonumber';
2039  } else {
2040  # Have link text, e.g. [http://domain.tld/some.link text]s
2041  # Check for trail
2042  list( $dtrail, $trail ) = Linker::splitTrail( $trail );
2043  }
2044 
2045  // Excluding protocol-relative URLs may avoid many false positives.
2046  if ( preg_match( '/^(?:' . wfUrlProtocolsWithoutProtRel() . ')/', $text ) ) {
2047  $text = $this->getTargetLanguage()->getConverter()->markNoConversion( $text );
2048  }
2049 
2050  $url = Sanitizer::cleanUrl( $url );
2051 
2052  # Use the encoded URL
2053  # This means that users can paste URLs directly into the text
2054  # Funny characters like ö aren't valid in URLs anyway
2055  # This was changed in August 2004
2056  $s .= Linker::makeExternalLink( $url, $text, false, $linktype,
2057  $this->getExternalLinkAttribs( $url ), $this->mTitle ) . $dtrail . $trail;
2058 
2059  # Register link in the output object.
2060  $this->mOutput->addExternalLink( $url );
2061  }
2062 
2063  return $s;
2064  }
2065 
2075  public static function getExternalLinkRel( $url = false, $title = null ) {
2077  $ns = $title ? $title->getNamespace() : false;
2078  if ( $wgNoFollowLinks && !in_array( $ns, $wgNoFollowNsExceptions )
2079  && !wfMatchesDomainList( $url, $wgNoFollowDomainExceptions )
2080  ) {
2081  return 'nofollow';
2082  }
2083  return null;
2084  }
2085 
2096  public function getExternalLinkAttribs( $url ) {
2097  $attribs = [];
2098  $rel = self::getExternalLinkRel( $url, $this->mTitle );
2099 
2100  $target = $this->mOptions->getExternalLinkTarget();
2101  if ( $target ) {
2102  $attribs['target'] = $target;
2103  if ( !in_array( $target, [ '_self', '_parent', '_top' ] ) ) {
2104  // T133507. New windows can navigate parent cross-origin.
2105  // Including noreferrer due to lacking browser
2106  // support of noopener. Eventually noreferrer should be removed.
2107  if ( $rel !== '' ) {
2108  $rel .= ' ';
2109  }
2110  $rel .= 'noreferrer noopener';
2111  }
2112  }
2113  $attribs['rel'] = $rel;
2114  return $attribs;
2115  }
2116 
2126  public static function normalizeLinkUrl( $url ) {
2127  # Test for RFC 3986 IPv6 syntax
2128  $scheme = '[a-z][a-z0-9+.-]*:';
2129  $userinfo = '(?:[a-z0-9\-._~!$&\'()*+,;=:]|%[0-9a-f]{2})*';
2130  $ipv6Host = '\\[((?:[0-9a-f:]|%3[0-A]|%[46][1-6])+)\\]';
2131  if ( preg_match( "<^(?:{$scheme})?//(?:{$userinfo}@)?{$ipv6Host}(?:[:/?#].*|)$>i", $url, $m ) &&
2132  IP::isValid( rawurldecode( $m[1] ) )
2133  ) {
2134  $isIPv6 = rawurldecode( $m[1] );
2135  } else {
2136  $isIPv6 = false;
2137  }
2138 
2139  # Make sure unsafe characters are encoded
2140  $url = preg_replace_callback( '/[\x00-\x20"<>\[\\\\\]^`{|}\x7F-\xFF]/',
2141  function ( $m ) {
2142  return rawurlencode( $m[0] );
2143  },
2144  $url
2145  );
2146 
2147  $ret = '';
2148  $end = strlen( $url );
2149 
2150  # Fragment part - 'fragment'
2151  $start = strpos( $url, '#' );
2152  if ( $start !== false && $start < $end ) {
2153  $ret = self::normalizeUrlComponent(
2154  substr( $url, $start, $end - $start ), '"#%<>[\]^`{|}' ) . $ret;
2155  $end = $start;
2156  }
2157 
2158  # Query part - 'query' minus &=+;
2159  $start = strpos( $url, '?' );
2160  if ( $start !== false && $start < $end ) {
2161  $ret = self::normalizeUrlComponent(
2162  substr( $url, $start, $end - $start ), '"#%<>[\]^`{|}&=+;' ) . $ret;
2163  $end = $start;
2164  }
2165 
2166  # Scheme and path part - 'pchar'
2167  # (we assume no userinfo or encoded colons in the host)
2168  $ret = self::normalizeUrlComponent(
2169  substr( $url, 0, $end ), '"#%<>[\]^`{|}/?' ) . $ret;
2170 
2171  # Fix IPv6 syntax
2172  if ( $isIPv6 !== false ) {
2173  $ipv6Host = "%5B({$isIPv6})%5D";
2174  $ret = preg_replace(
2175  "<^((?:{$scheme})?//(?:{$userinfo}@)?){$ipv6Host}(?=[:/?#]|$)>i",
2176  "$1[$2]",
2177  $ret
2178  );
2179  }
2180 
2181  return $ret;
2182  }
2183 
2184  private static function normalizeUrlComponent( $component, $unsafe ) {
2185  $callback = function ( $matches ) use ( $unsafe ) {
2186  $char = urldecode( $matches[0] );
2187  $ord = ord( $char );
2188  if ( $ord > 32 && $ord < 127 && strpos( $unsafe, $char ) === false ) {
2189  # Unescape it
2190  return $char;
2191  } else {
2192  # Leave it escaped, but use uppercase for a-f
2193  return strtoupper( $matches[0] );
2194  }
2195  };
2196  return preg_replace_callback( '/%[0-9A-Fa-f]{2}/', $callback, $component );
2197  }
2198 
2207  private function maybeMakeExternalImage( $url ) {
2208  $imagesfrom = $this->mOptions->getAllowExternalImagesFrom();
2209  $imagesexception = !empty( $imagesfrom );
2210  $text = false;
2211  # $imagesfrom could be either a single string or an array of strings, parse out the latter
2212  if ( $imagesexception && is_array( $imagesfrom ) ) {
2213  $imagematch = false;
2214  foreach ( $imagesfrom as $match ) {
2215  if ( strpos( $url, $match ) === 0 ) {
2216  $imagematch = true;
2217  break;
2218  }
2219  }
2220  } elseif ( $imagesexception ) {
2221  $imagematch = ( strpos( $url, $imagesfrom ) === 0 );
2222  } else {
2223  $imagematch = false;
2224  }
2225 
2226  if ( $this->mOptions->getAllowExternalImages()
2227  || ( $imagesexception && $imagematch )
2228  ) {
2229  if ( preg_match( self::EXT_IMAGE_REGEX, $url ) ) {
2230  # Image found
2231  $text = Linker::makeExternalImage( $url );
2232  }
2233  }
2234  if ( !$text && $this->mOptions->getEnableImageWhitelist()
2235  && preg_match( self::EXT_IMAGE_REGEX, $url )
2236  ) {
2237  $whitelist = explode(
2238  "\n",
2239  wfMessage( 'external_image_whitelist' )->inContentLanguage()->text()
2240  );
2241 
2242  foreach ( $whitelist as $entry ) {
2243  # Sanitize the regex fragment, make it case-insensitive, ignore blank entries/comments
2244  if ( strpos( $entry, '#' ) === 0 || $entry === '' ) {
2245  continue;
2246  }
2247  if ( preg_match( '/' . str_replace( '/', '\\/', $entry ) . '/i', $url ) ) {
2248  # Image matches a whitelist entry
2249  $text = Linker::makeExternalImage( $url );
2250  break;
2251  }
2252  }
2253  }
2254  return $text;
2255  }
2256 
2266  public function replaceInternalLinks( $s ) {
2267  $this->mLinkHolders->merge( $this->replaceInternalLinks2( $s ) );
2268  return $s;
2269  }
2270 
2279  public function replaceInternalLinks2( &$s ) {
2280  static $tc = false, $e1, $e1_img;
2281  # the % is needed to support urlencoded titles as well
2282  if ( !$tc ) {
2283  $tc = Title::legalChars() . '#%';
2284  # Match a link having the form [[namespace:link|alternate]]trail
2285  $e1 = "/^([{$tc}]+)(?:\\|(.+?))?]](.*)\$/sD";
2286  # Match cases where there is no "]]", which might still be images
2287  $e1_img = "/^([{$tc}]+)\\|(.*)\$/sD";
2288  }
2289 
2290  $holders = new LinkHolderArray( $this );
2291 
2292  # split the entire text string on occurrences of [[
2293  $a = StringUtils::explode( '[[', ' ' . $s );
2294  # get the first element (all text up to first [[), and remove the space we added
2295  $s = $a->current();
2296  $a->next();
2297  $line = $a->current(); # Workaround for broken ArrayIterator::next() that returns "void"
2298  $s = substr( $s, 1 );
2299 
2300  $useLinkPrefixExtension = $this->getTargetLanguage()->linkPrefixExtension();
2301  $e2 = null;
2302  if ( $useLinkPrefixExtension ) {
2303  # Match the end of a line for a word that's not followed by whitespace,
2304  # e.g. in the case of 'The Arab al[[Razi]]', 'al' will be matched
2305  $charset = $this->contLang->linkPrefixCharset();
2306  $e2 = "/^((?>.*[^$charset]|))(.+)$/sDu";
2307  }
2308 
2309  if ( is_null( $this->mTitle ) ) {
2310  throw new MWException( __METHOD__ . ": \$this->mTitle is null\n" );
2311  }
2312  $nottalk = !$this->mTitle->isTalkPage();
2313 
2314  if ( $useLinkPrefixExtension ) {
2315  $m = [];
2316  if ( preg_match( $e2, $s, $m ) ) {
2317  $first_prefix = $m[2];
2318  } else {
2319  $first_prefix = false;
2320  }
2321  } else {
2322  $prefix = '';
2323  }
2324 
2325  $useSubpages = $this->areSubpagesAllowed();
2326 
2327  # Loop for each link
2328  for ( ; $line !== false && $line !== null; $a->next(), $line = $a->current() ) {
2329  # Check for excessive memory usage
2330  if ( $holders->isBig() ) {
2331  # Too big
2332  # Do the existence check, replace the link holders and clear the array
2333  $holders->replace( $s );
2334  $holders->clear();
2335  }
2336 
2337  if ( $useLinkPrefixExtension ) {
2338  if ( preg_match( $e2, $s, $m ) ) {
2339  list( , $s, $prefix ) = $m;
2340  } else {
2341  $prefix = '';
2342  }
2343  # first link
2344  if ( $first_prefix ) {
2345  $prefix = $first_prefix;
2346  $first_prefix = false;
2347  }
2348  }
2349 
2350  $might_be_img = false;
2351 
2352  if ( preg_match( $e1, $line, $m ) ) { # page with normal text or alt
2353  $text = $m[2];
2354  # If we get a ] at the beginning of $m[3] that means we have a link that's something like:
2355  # [[Image:Foo.jpg|[http://example.com desc]]] <- having three ] in a row fucks up,
2356  # the real problem is with the $e1 regex
2357  # See T1500.
2358  # Still some problems for cases where the ] is meant to be outside punctuation,
2359  # and no image is in sight. See T4095.
2360  if ( $text !== ''
2361  && substr( $m[3], 0, 1 ) === ']'
2362  && strpos( $text, '[' ) !== false
2363  ) {
2364  $text .= ']'; # so that replaceExternalLinks($text) works later
2365  $m[3] = substr( $m[3], 1 );
2366  }
2367  # fix up urlencoded title texts
2368  if ( strpos( $m[1], '%' ) !== false ) {
2369  # Should anchors '#' also be rejected?
2370  $m[1] = str_replace( [ '<', '>' ], [ '&lt;', '&gt;' ], rawurldecode( $m[1] ) );
2371  }
2372  $trail = $m[3];
2373  } elseif ( preg_match( $e1_img, $line, $m ) ) {
2374  # Invalid, but might be an image with a link in its caption
2375  $might_be_img = true;
2376  $text = $m[2];
2377  if ( strpos( $m[1], '%' ) !== false ) {
2378  $m[1] = str_replace( [ '<', '>' ], [ '&lt;', '&gt;' ], rawurldecode( $m[1] ) );
2379  }
2380  $trail = "";
2381  } else { # Invalid form; output directly
2382  $s .= $prefix . '[[' . $line;
2383  continue;
2384  }
2385 
2386  $origLink = ltrim( $m[1], ' ' );
2387 
2388  # Don't allow internal links to pages containing
2389  # PROTO: where PROTO is a valid URL protocol; these
2390  # should be external links.
2391  if ( preg_match( '/^(?i:' . $this->mUrlProtocols . ')/', $origLink ) ) {
2392  $s .= $prefix . '[[' . $line;
2393  continue;
2394  }
2395 
2396  # Make subpage if necessary
2397  if ( $useSubpages ) {
2398  $link = $this->maybeDoSubpageLink( $origLink, $text );
2399  } else {
2400  $link = $origLink;
2401  }
2402 
2403  // \x7f isn't a default legal title char, so most likely strip
2404  // markers will force us into the "invalid form" path above. But,
2405  // just in case, let's assert that xmlish tags aren't valid in
2406  // the title position.
2407  $unstrip = $this->mStripState->killMarkers( $link );
2408  $noMarkers = ( $unstrip === $link );
2409 
2410  $nt = $noMarkers ? Title::newFromText( $link ) : null;
2411  if ( $nt === null ) {
2412  $s .= $prefix . '[[' . $line;
2413  continue;
2414  }
2415 
2416  $ns = $nt->getNamespace();
2417  $iw = $nt->getInterwiki();
2418 
2419  $noforce = ( substr( $origLink, 0, 1 ) !== ':' );
2420 
2421  if ( $might_be_img ) { # if this is actually an invalid link
2422  if ( $ns == NS_FILE && $noforce ) { # but might be an image
2423  $found = false;
2424  while ( true ) {
2425  # look at the next 'line' to see if we can close it there
2426  $a->next();
2427  $next_line = $a->current();
2428  if ( $next_line === false || $next_line === null ) {
2429  break;
2430  }
2431  $m = explode( ']]', $next_line, 3 );
2432  if ( count( $m ) == 3 ) {
2433  # the first ]] closes the inner link, the second the image
2434  $found = true;
2435  $text .= "[[{$m[0]}]]{$m[1]}";
2436  $trail = $m[2];
2437  break;
2438  } elseif ( count( $m ) == 2 ) {
2439  # if there's exactly one ]] that's fine, we'll keep looking
2440  $text .= "[[{$m[0]}]]{$m[1]}";
2441  } else {
2442  # if $next_line is invalid too, we need look no further
2443  $text .= '[[' . $next_line;
2444  break;
2445  }
2446  }
2447  if ( !$found ) {
2448  # we couldn't find the end of this imageLink, so output it raw
2449  # but don't ignore what might be perfectly normal links in the text we've examined
2450  $holders->merge( $this->replaceInternalLinks2( $text ) );
2451  $s .= "{$prefix}[[$link|$text";
2452  # note: no $trail, because without an end, there *is* no trail
2453  continue;
2454  }
2455  } else { # it's not an image, so output it raw
2456  $s .= "{$prefix}[[$link|$text";
2457  # note: no $trail, because without an end, there *is* no trail
2458  continue;
2459  }
2460  }
2461 
2462  $wasblank = ( $text == '' );
2463  if ( $wasblank ) {
2464  $text = $link;
2465  if ( !$noforce ) {
2466  # Strip off leading ':'
2467  $text = substr( $text, 1 );
2468  }
2469  } else {
2470  # T6598 madness. Handle the quotes only if they come from the alternate part
2471  # [[Lista d''e paise d''o munno]] -> <a href="...">Lista d''e paise d''o munno</a>
2472  # [[Criticism of Harry Potter|Criticism of ''Harry Potter'']]
2473  # -> <a href="Criticism of Harry Potter">Criticism of <i>Harry Potter</i></a>
2474  $text = $this->doQuotes( $text );
2475  }
2476 
2477  # Link not escaped by : , create the various objects
2478  if ( $noforce && !$nt->wasLocalInterwiki() ) {
2479  # Interwikis
2480  if (
2481  $iw && $this->mOptions->getInterwikiMagic() && $nottalk && (
2482  Language::fetchLanguageName( $iw, null, 'mw' ) ||
2483  in_array( $iw, $this->svcOptions->get( 'ExtraInterlanguageLinkPrefixes' ) )
2484  )
2485  ) {
2486  # T26502: filter duplicates
2487  if ( !isset( $this->mLangLinkLanguages[$iw] ) ) {
2488  $this->mLangLinkLanguages[$iw] = true;
2489  $this->mOutput->addLanguageLink( $nt->getFullText() );
2490  }
2491 
2495  $s = rtrim( $s . $prefix ) . $trail; # T175416
2496  continue;
2497  }
2498 
2499  if ( $ns == NS_FILE ) {
2500  if ( !wfIsBadImage( $nt->getDBkey(), $this->mTitle ) ) {
2501  if ( $wasblank ) {
2502  # if no parameters were passed, $text
2503  # becomes something like "File:Foo.png",
2504  # which we don't want to pass on to the
2505  # image generator
2506  $text = '';
2507  } else {
2508  # recursively parse links inside the image caption
2509  # actually, this will parse them in any other parameters, too,
2510  # but it might be hard to fix that, and it doesn't matter ATM
2511  $text = $this->replaceExternalLinks( $text );
2512  $holders->merge( $this->replaceInternalLinks2( $text ) );
2513  }
2514  # cloak any absolute URLs inside the image markup, so replaceExternalLinks() won't touch them
2515  $s .= $prefix . $this->armorLinks(
2516  $this->makeImage( $nt, $text, $holders ) ) . $trail;
2517  continue;
2518  }
2519  } elseif ( $ns == NS_CATEGORY ) {
2523  $s = rtrim( $s . $prefix ) . $trail; # T2087, T87753
2524 
2525  if ( $wasblank ) {
2526  $sortkey = $this->getDefaultSort();
2527  } else {
2528  $sortkey = $text;
2529  }
2530  $sortkey = Sanitizer::decodeCharReferences( $sortkey );
2531  $sortkey = str_replace( "\n", '', $sortkey );
2532  $sortkey = $this->getTargetLanguage()->convertCategoryKey( $sortkey );
2533  $this->mOutput->addCategory( $nt->getDBkey(), $sortkey );
2534 
2535  continue;
2536  }
2537  }
2538 
2539  # Self-link checking. For some languages, variants of the title are checked in
2540  # LinkHolderArray::doVariants() to allow batching the existence checks necessary
2541  # for linking to a different variant.
2542  if ( $ns != NS_SPECIAL && $nt->equals( $this->mTitle ) && !$nt->hasFragment() ) {
2543  $s .= $prefix . Linker::makeSelfLinkObj( $nt, $text, '', $trail );
2544  continue;
2545  }
2546 
2547  # NS_MEDIA is a pseudo-namespace for linking directly to a file
2548  # @todo FIXME: Should do batch file existence checks, see comment below
2549  if ( $ns == NS_MEDIA ) {
2550  # Give extensions a chance to select the file revision for us
2551  $options = [];
2552  $descQuery = false;
2553  Hooks::run( 'BeforeParserFetchFileAndTitle',
2554  [ $this, $nt, &$options, &$descQuery ] );
2555  # Fetch and register the file (file title may be different via hooks)
2556  list( $file, $nt ) = $this->fetchFileAndTitle( $nt, $options );
2557  # Cloak with NOPARSE to avoid replacement in replaceExternalLinks
2558  $s .= $prefix . $this->armorLinks(
2559  Linker::makeMediaLinkFile( $nt, $file, $text ) ) . $trail;
2560  continue;
2561  }
2562 
2563  # Some titles, such as valid special pages or files in foreign repos, should
2564  # be shown as bluelinks even though they're not included in the page table
2565  # @todo FIXME: isAlwaysKnown() can be expensive for file links; we should really do
2566  # batch file existence checks for NS_FILE and NS_MEDIA
2567  if ( $iw == '' && $nt->isAlwaysKnown() ) {
2568  $this->mOutput->addLink( $nt );
2569  $s .= $this->makeKnownLinkHolder( $nt, $text, $trail, $prefix );
2570  } else {
2571  # Links will be added to the output link list after checking
2572  $s .= $holders->makeHolder( $nt, $text, [], $trail, $prefix );
2573  }
2574  }
2575  return $holders;
2576  }
2577 
2591  protected function makeKnownLinkHolder( $nt, $text = '', $trail = '', $prefix = '' ) {
2592  list( $inside, $trail ) = Linker::splitTrail( $trail );
2593 
2594  if ( $text == '' ) {
2595  $text = htmlspecialchars( $nt->getPrefixedText() );
2596  }
2597 
2598  $link = $this->getLinkRenderer()->makeKnownLink(
2599  $nt, new HtmlArmor( "$prefix$text$inside" )
2600  );
2601 
2602  return $this->armorLinks( $link ) . $trail;
2603  }
2604 
2615  public function armorLinks( $text ) {
2616  return preg_replace( '/\b((?i)' . $this->mUrlProtocols . ')/',
2617  self::MARKER_PREFIX . "NOPARSE$1", $text );
2618  }
2619 
2624  public function areSubpagesAllowed() {
2625  # Some namespaces don't allow subpages
2626  return $this->nsInfo->hasSubpages( $this->mTitle->getNamespace() );
2627  }
2628 
2637  public function maybeDoSubpageLink( $target, &$text ) {
2638  return Linker::normalizeSubpageLink( $this->mTitle, $target, $text );
2639  }
2640 
2649  public function doBlockLevels( $text, $linestart ) {
2650  return BlockLevelPass::doBlockLevels( $text, $linestart );
2651  }
2652 
2664  public function getVariableValue( $index, $frame = false ) {
2665  if ( is_null( $this->mTitle ) ) {
2666  // If no title set, bad things are going to happen
2667  // later. Title should always be set since this
2668  // should only be called in the middle of a parse
2669  // operation (but the unit-tests do funky stuff)
2670  throw new MWException( __METHOD__ . ' Should only be '
2671  . ' called while parsing (no title set)' );
2672  }
2673 
2674  // Avoid PHP 7.1 warning from passing $this by reference
2675  $parser = $this;
2676 
2681  if (
2682  Hooks::run( 'ParserGetVariableValueVarCache', [ &$parser, &$this->mVarCache ] ) &&
2683  isset( $this->mVarCache[$index] )
2684  ) {
2685  return $this->mVarCache[$index];
2686  }
2687 
2688  $ts = wfTimestamp( TS_UNIX, $this->mOptions->getTimestamp() );
2689  Hooks::run( 'ParserGetVariableValueTs', [ &$parser, &$ts ] );
2690 
2691  $pageLang = $this->getFunctionLang();
2692 
2693  switch ( $index ) {
2694  case '!':
2695  $value = '|';
2696  break;
2697  case 'currentmonth':
2698  $value = $pageLang->formatNum( MWTimestamp::getInstance( $ts )->format( 'm' ), true );
2699  break;
2700  case 'currentmonth1':
2701  $value = $pageLang->formatNum( MWTimestamp::getInstance( $ts )->format( 'n' ), true );
2702  break;
2703  case 'currentmonthname':
2704  $value = $pageLang->getMonthName( MWTimestamp::getInstance( $ts )->format( 'n' ) );
2705  break;
2706  case 'currentmonthnamegen':
2707  $value = $pageLang->getMonthNameGen( MWTimestamp::getInstance( $ts )->format( 'n' ) );
2708  break;
2709  case 'currentmonthabbrev':
2710  $value = $pageLang->getMonthAbbreviation( MWTimestamp::getInstance( $ts )->format( 'n' ) );
2711  break;
2712  case 'currentday':
2713  $value = $pageLang->formatNum( MWTimestamp::getInstance( $ts )->format( 'j' ), true );
2714  break;
2715  case 'currentday2':
2716  $value = $pageLang->formatNum( MWTimestamp::getInstance( $ts )->format( 'd' ), true );
2717  break;
2718  case 'localmonth':
2719  $value = $pageLang->formatNum( MWTimestamp::getLocalInstance( $ts )->format( 'm' ), true );
2720  break;
2721  case 'localmonth1':
2722  $value = $pageLang->formatNum( MWTimestamp::getLocalInstance( $ts )->format( 'n' ), true );
2723  break;
2724  case 'localmonthname':
2725  $value = $pageLang->getMonthName( MWTimestamp::getLocalInstance( $ts )->format( 'n' ) );
2726  break;
2727  case 'localmonthnamegen':
2728  $value = $pageLang->getMonthNameGen( MWTimestamp::getLocalInstance( $ts )->format( 'n' ) );
2729  break;
2730  case 'localmonthabbrev':
2731  $value = $pageLang->getMonthAbbreviation( MWTimestamp::getLocalInstance( $ts )->format( 'n' ) );
2732  break;
2733  case 'localday':
2734  $value = $pageLang->formatNum( MWTimestamp::getLocalInstance( $ts )->format( 'j' ), true );
2735  break;
2736  case 'localday2':
2737  $value = $pageLang->formatNum( MWTimestamp::getLocalInstance( $ts )->format( 'd' ), true );
2738  break;
2739  case 'pagename':
2740  $value = wfEscapeWikiText( $this->mTitle->getText() );
2741  break;
2742  case 'pagenamee':
2743  $value = wfEscapeWikiText( $this->mTitle->getPartialURL() );
2744  break;
2745  case 'fullpagename':
2746  $value = wfEscapeWikiText( $this->mTitle->getPrefixedText() );
2747  break;
2748  case 'fullpagenamee':
2749  $value = wfEscapeWikiText( $this->mTitle->getPrefixedURL() );
2750  break;
2751  case 'subpagename':
2752  $value = wfEscapeWikiText( $this->mTitle->getSubpageText() );
2753  break;
2754  case 'subpagenamee':
2755  $value = wfEscapeWikiText( $this->mTitle->getSubpageUrlForm() );
2756  break;
2757  case 'rootpagename':
2758  $value = wfEscapeWikiText( $this->mTitle->getRootText() );
2759  break;
2760  case 'rootpagenamee':
2761  $value = wfEscapeWikiText( wfUrlencode( str_replace(
2762  ' ',
2763  '_',
2764  $this->mTitle->getRootText()
2765  ) ) );
2766  break;
2767  case 'basepagename':
2768  $value = wfEscapeWikiText( $this->mTitle->getBaseText() );
2769  break;
2770  case 'basepagenamee':
2771  $value = wfEscapeWikiText( wfUrlencode( str_replace(
2772  ' ',
2773  '_',
2774  $this->mTitle->getBaseText()
2775  ) ) );
2776  break;
2777  case 'talkpagename':
2778  if ( $this->mTitle->canHaveTalkPage() ) {
2779  $talkPage = $this->mTitle->getTalkPage();
2780  $value = wfEscapeWikiText( $talkPage->getPrefixedText() );
2781  } else {
2782  $value = '';
2783  }
2784  break;
2785  case 'talkpagenamee':
2786  if ( $this->mTitle->canHaveTalkPage() ) {
2787  $talkPage = $this->mTitle->getTalkPage();
2788  $value = wfEscapeWikiText( $talkPage->getPrefixedURL() );
2789  } else {
2790  $value = '';
2791  }
2792  break;
2793  case 'subjectpagename':
2794  $subjPage = $this->mTitle->getSubjectPage();
2795  $value = wfEscapeWikiText( $subjPage->getPrefixedText() );
2796  break;
2797  case 'subjectpagenamee':
2798  $subjPage = $this->mTitle->getSubjectPage();
2799  $value = wfEscapeWikiText( $subjPage->getPrefixedURL() );
2800  break;
2801  case 'pageid': // requested in T25427
2802  # Inform the edit saving system that getting the canonical output
2803  # after page insertion requires a parse that used that exact page ID
2804  $this->setOutputFlag( 'vary-page-id', '{{PAGEID}} used' );
2805  $value = $this->mTitle->getArticleID();
2806  if ( !$value ) {
2807  $value = $this->mOptions->getSpeculativePageId();
2808  if ( $value ) {
2809  $this->mOutput->setSpeculativePageIdUsed( $value );
2810  }
2811  }
2812  break;
2813  case 'revisionid':
2814  if (
2815  $this->svcOptions->get( 'MiserMode' ) &&
2816  !$this->mOptions->getInterfaceMessage() &&
2817  // @TODO: disallow this word on all namespaces
2818  $this->nsInfo->isContent( $this->mTitle->getNamespace() )
2819  ) {
2820  // Use a stub result instead of the actual revision ID in order to avoid
2821  // double parses on page save but still allow preview detection (T137900)
2822  if ( $this->getRevisionId() || $this->mOptions->getSpeculativeRevId() ) {
2823  $value = '-';
2824  } else {
2825  $this->setOutputFlag( 'vary-revision-exists', '{{REVISIONID}} used' );
2826  $value = '';
2827  }
2828  } else {
2829  # Inform the edit saving system that getting the canonical output after
2830  # revision insertion requires a parse that used that exact revision ID
2831  $this->setOutputFlag( 'vary-revision-id', '{{REVISIONID}} used' );
2832  $value = $this->getRevisionId();
2833  if ( $value === 0 ) {
2834  $rev = $this->getRevisionObject();
2835  $value = $rev ? $rev->getId() : $value;
2836  }
2837  if ( !$value ) {
2838  $value = $this->mOptions->getSpeculativeRevId();
2839  if ( $value ) {
2840  $this->mOutput->setSpeculativeRevIdUsed( $value );
2841  }
2842  }
2843  }
2844  break;
2845  case 'revisionday':
2846  $value = (int)$this->getRevisionTimestampSubstring( 6, 2, self::MAX_TTS, $index );
2847  break;
2848  case 'revisionday2':
2849  $value = $this->getRevisionTimestampSubstring( 6, 2, self::MAX_TTS, $index );
2850  break;
2851  case 'revisionmonth':
2852  $value = $this->getRevisionTimestampSubstring( 4, 2, self::MAX_TTS, $index );
2853  break;
2854  case 'revisionmonth1':
2855  $value = (int)$this->getRevisionTimestampSubstring( 4, 2, self::MAX_TTS, $index );
2856  break;
2857  case 'revisionyear':
2858  $value = $this->getRevisionTimestampSubstring( 0, 4, self::MAX_TTS, $index );
2859  break;
2860  case 'revisiontimestamp':
2861  $value = $this->getRevisionTimestampSubstring( 0, 14, self::MAX_TTS, $index );
2862  break;
2863  case 'revisionuser':
2864  # Inform the edit saving system that getting the canonical output after
2865  # revision insertion requires a parse that used the actual user ID
2866  $this->setOutputFlag( 'vary-user', '{{REVISIONUSER}} used' );
2867  $value = $this->getRevisionUser();
2868  break;
2869  case 'revisionsize':
2870  $value = $this->getRevisionSize();
2871  break;
2872  case 'namespace':
2873  $value = str_replace( '_', ' ',
2874  $this->contLang->getNsText( $this->mTitle->getNamespace() ) );
2875  break;
2876  case 'namespacee':
2877  $value = wfUrlencode( $this->contLang->getNsText( $this->mTitle->getNamespace() ) );
2878  break;
2879  case 'namespacenumber':
2880  $value = $this->mTitle->getNamespace();
2881  break;
2882  case 'talkspace':
2883  $value = $this->mTitle->canHaveTalkPage()
2884  ? str_replace( '_', ' ', $this->mTitle->getTalkNsText() )
2885  : '';
2886  break;
2887  case 'talkspacee':
2888  $value = $this->mTitle->canHaveTalkPage() ? wfUrlencode( $this->mTitle->getTalkNsText() ) : '';
2889  break;
2890  case 'subjectspace':
2891  $value = str_replace( '_', ' ', $this->mTitle->getSubjectNsText() );
2892  break;
2893  case 'subjectspacee':
2894  $value = ( wfUrlencode( $this->mTitle->getSubjectNsText() ) );
2895  break;
2896  case 'currentdayname':
2897  $value = $pageLang->getWeekdayName( (int)MWTimestamp::getInstance( $ts )->format( 'w' ) + 1 );
2898  break;
2899  case 'currentyear':
2900  $value = $pageLang->formatNum( MWTimestamp::getInstance( $ts )->format( 'Y' ), true );
2901  break;
2902  case 'currenttime':
2903  $value = $pageLang->time( wfTimestamp( TS_MW, $ts ), false, false );
2904  break;
2905  case 'currenthour':
2906  $value = $pageLang->formatNum( MWTimestamp::getInstance( $ts )->format( 'H' ), true );
2907  break;
2908  case 'currentweek':
2909  # @bug T6594 PHP5 has it zero padded, PHP4 does not, cast to
2910  # int to remove the padding
2911  $value = $pageLang->formatNum( (int)MWTimestamp::getInstance( $ts )->format( 'W' ) );
2912  break;
2913  case 'currentdow':
2914  $value = $pageLang->formatNum( MWTimestamp::getInstance( $ts )->format( 'w' ) );
2915  break;
2916  case 'localdayname':
2917  $value = $pageLang->getWeekdayName(
2918  (int)MWTimestamp::getLocalInstance( $ts )->format( 'w' ) + 1
2919  );
2920  break;
2921  case 'localyear':
2922  $value = $pageLang->formatNum( MWTimestamp::getLocalInstance( $ts )->format( 'Y' ), true );
2923  break;
2924  case 'localtime':
2925  $value = $pageLang->time(
2926  MWTimestamp::getLocalInstance( $ts )->format( 'YmdHis' ),
2927  false,
2928  false
2929  );
2930  break;
2931  case 'localhour':
2932  $value = $pageLang->formatNum( MWTimestamp::getLocalInstance( $ts )->format( 'H' ), true );
2933  break;
2934  case 'localweek':
2935  # @bug T6594 PHP5 has it zero padded, PHP4 does not, cast to
2936  # int to remove the padding
2937  $value = $pageLang->formatNum( (int)MWTimestamp::getLocalInstance( $ts )->format( 'W' ) );
2938  break;
2939  case 'localdow':
2940  $value = $pageLang->formatNum( MWTimestamp::getLocalInstance( $ts )->format( 'w' ) );
2941  break;
2942  case 'numberofarticles':
2943  $value = $pageLang->formatNum( SiteStats::articles() );
2944  break;
2945  case 'numberoffiles':
2946  $value = $pageLang->formatNum( SiteStats::images() );
2947  break;
2948  case 'numberofusers':
2949  $value = $pageLang->formatNum( SiteStats::users() );
2950  break;
2951  case 'numberofactiveusers':
2952  $value = $pageLang->formatNum( SiteStats::activeUsers() );
2953  break;
2954  case 'numberofpages':
2955  $value = $pageLang->formatNum( SiteStats::pages() );
2956  break;
2957  case 'numberofadmins':
2958  $value = $pageLang->formatNum( SiteStats::numberingroup( 'sysop' ) );
2959  break;
2960  case 'numberofedits':
2961  $value = $pageLang->formatNum( SiteStats::edits() );
2962  break;
2963  case 'currenttimestamp':
2964  $value = wfTimestamp( TS_MW, $ts );
2965  break;
2966  case 'localtimestamp':
2967  $value = MWTimestamp::getLocalInstance( $ts )->format( 'YmdHis' );
2968  break;
2969  case 'currentversion':
2971  break;
2972  case 'articlepath':
2973  return $this->svcOptions->get( 'ArticlePath' );
2974  case 'sitename':
2975  return $this->svcOptions->get( 'Sitename' );
2976  case 'server':
2977  return $this->svcOptions->get( 'Server' );
2978  case 'servername':
2979  return $this->svcOptions->get( 'ServerName' );
2980  case 'scriptpath':
2981  return $this->svcOptions->get( 'ScriptPath' );
2982  case 'stylepath':
2983  return $this->svcOptions->get( 'StylePath' );
2984  case 'directionmark':
2985  return $pageLang->getDirMark();
2986  case 'contentlanguage':
2987  return $this->svcOptions->get( 'LanguageCode' );
2988  case 'pagelanguage':
2989  $value = $pageLang->getCode();
2990  break;
2991  case 'cascadingsources':
2993  break;
2994  default:
2995  $ret = null;
2996  Hooks::run(
2997  'ParserGetVariableValueSwitch',
2998  [ &$parser, &$this->mVarCache, &$index, &$ret, &$frame ]
2999  );
3000 
3001  return $ret;
3002  }
3003 
3004  if ( $index ) {
3005  $this->mVarCache[$index] = $value;
3006  }
3007 
3008  return $value;
3009  }
3010 
3018  private function getRevisionTimestampSubstring( $start, $len, $mtts, $variable ) {
3019  # Get the timezone-adjusted timestamp to be used for this revision
3020  $resNow = substr( $this->getRevisionTimestamp(), $start, $len );
3021  # Possibly set vary-revision if there is not yet an associated revision
3022  if ( !$this->getRevisionObject() ) {
3023  # Get the timezone-adjusted timestamp $mtts seconds in the future.
3024  # This future is relative to the current time and not that of the
3025  # parser options. The rendered timestamp can be compared to that
3026  # of the timestamp specified by the parser options.
3027  $resThen = substr(
3028  $this->contLang->userAdjust( wfTimestamp( TS_MW, time() + $mtts ), '' ),
3029  $start,
3030  $len
3031  );
3032 
3033  if ( $resNow !== $resThen ) {
3034  # Inform the edit saving system that getting the canonical output after
3035  # revision insertion requires a parse that used an actual revision timestamp
3036  $this->setOutputFlag( 'vary-revision-timestamp', "$variable used" );
3037  }
3038  }
3039 
3040  return $resNow;
3041  }
3042 
3048  public function initialiseVariables() {
3049  $variableIDs = $this->magicWordFactory->getVariableIDs();
3050  $substIDs = $this->magicWordFactory->getSubstIDs();
3051 
3052  $this->mVariables = $this->magicWordFactory->newArray( $variableIDs );
3053  $this->mSubstWords = $this->magicWordFactory->newArray( $substIDs );
3054  }
3055 
3078  public function preprocessToDom( $text, $flags = 0 ) {
3079  $dom = $this->getPreprocessor()->preprocessToObj( $text, $flags );
3080  return $dom;
3081  }
3082 
3090  public static function splitWhitespace( $s ) {
3091  $ltrimmed = ltrim( $s );
3092  $w1 = substr( $s, 0, strlen( $s ) - strlen( $ltrimmed ) );
3093  $trimmed = rtrim( $ltrimmed );
3094  $diff = strlen( $ltrimmed ) - strlen( $trimmed );
3095  if ( $diff > 0 ) {
3096  $w2 = substr( $ltrimmed, -$diff );
3097  } else {
3098  $w2 = '';
3099  }
3100  return [ $w1, $trimmed, $w2 ];
3101  }
3102 
3123  public function replaceVariables( $text, $frame = false, $argsOnly = false ) {
3124  # Is there any text? Also, Prevent too big inclusions!
3125  $textSize = strlen( $text );
3126  if ( $textSize < 1 || $textSize > $this->mOptions->getMaxIncludeSize() ) {
3127  return $text;
3128  }
3129 
3130  if ( $frame === false ) {
3131  $frame = $this->getPreprocessor()->newFrame();
3132  } elseif ( !( $frame instanceof PPFrame ) ) {
3133  $this->logger->debug(
3134  __METHOD__ . " called using plain parameters instead of " .
3135  "a PPFrame instance. Creating custom frame."
3136  );
3137  $frame = $this->getPreprocessor()->newCustomFrame( $frame );
3138  }
3139 
3140  $dom = $this->preprocessToDom( $text );
3141  $flags = $argsOnly ? PPFrame::NO_TEMPLATES : 0;
3142  $text = $frame->expand( $dom, $flags );
3143 
3144  return $text;
3145  }
3146 
3154  public static function createAssocArgs( $args ) {
3155  $assocArgs = [];
3156  $index = 1;
3157  foreach ( $args as $arg ) {
3158  $eqpos = strpos( $arg, '=' );
3159  if ( $eqpos === false ) {
3160  $assocArgs[$index++] = $arg;
3161  } else {
3162  $name = trim( substr( $arg, 0, $eqpos ) );
3163  $value = trim( substr( $arg, $eqpos + 1 ) );
3164  if ( $value === false ) {
3165  $value = '';
3166  }
3167  if ( $name !== false ) {
3168  $assocArgs[$name] = $value;
3169  }
3170  }
3171  }
3172 
3173  return $assocArgs;
3174  }
3175 
3202  public function limitationWarn( $limitationType, $current = '', $max = '' ) {
3203  # does no harm if $current and $max are present but are unnecessary for the message
3204  # Not doing ->inLanguage( $this->mOptions->getUserLangObj() ), since this is shown
3205  # only during preview, and that would split the parser cache unnecessarily.
3206  $warning = wfMessage( "$limitationType-warning" )->numParams( $current, $max )
3207  ->text();
3208  $this->mOutput->addWarning( $warning );
3209  $this->addTrackingCategory( "$limitationType-category" );
3210  }
3211 
3224  public function braceSubstitution( $piece, $frame ) {
3225  // Flags
3226 
3227  // $text has been filled
3228  $found = false;
3229  // wiki markup in $text should be escaped
3230  $nowiki = false;
3231  // $text is HTML, armour it against wikitext transformation
3232  $isHTML = false;
3233  // Force interwiki transclusion to be done in raw mode not rendered
3234  $forceRawInterwiki = false;
3235  // $text is a DOM node needing expansion in a child frame
3236  $isChildObj = false;
3237  // $text is a DOM node needing expansion in the current frame
3238  $isLocalObj = false;
3239 
3240  # Title object, where $text came from
3241  $title = false;
3242 
3243  # $part1 is the bit before the first |, and must contain only title characters.
3244  # Various prefixes will be stripped from it later.
3245  $titleWithSpaces = $frame->expand( $piece['title'] );
3246  $part1 = trim( $titleWithSpaces );
3247  $titleText = false;
3248 
3249  # Original title text preserved for various purposes
3250  $originalTitle = $part1;
3251 
3252  # $args is a list of argument nodes, starting from index 0, not including $part1
3253  # @todo FIXME: If piece['parts'] is null then the call to getLength()
3254  # below won't work b/c this $args isn't an object
3255  $args = ( $piece['parts'] == null ) ? [] : $piece['parts'];
3256 
3257  $profileSection = null; // profile templates
3258 
3259  # SUBST
3260  if ( !$found ) {
3261  $substMatch = $this->mSubstWords->matchStartAndRemove( $part1 );
3262 
3263  # Possibilities for substMatch: "subst", "safesubst" or FALSE
3264  # Decide whether to expand template or keep wikitext as-is.
3265  if ( $this->ot['wiki'] ) {
3266  if ( $substMatch === false ) {
3267  $literal = true; # literal when in PST with no prefix
3268  } else {
3269  $literal = false; # expand when in PST with subst: or safesubst:
3270  }
3271  } else {
3272  if ( $substMatch == 'subst' ) {
3273  $literal = true; # literal when not in PST with plain subst:
3274  } else {
3275  $literal = false; # expand when not in PST with safesubst: or no prefix
3276  }
3277  }
3278  if ( $literal ) {
3279  $text = $frame->virtualBracketedImplode( '{{', '|', '}}', $titleWithSpaces, $args );
3280  $isLocalObj = true;
3281  $found = true;
3282  }
3283  }
3284 
3285  # Variables
3286  if ( !$found && $args->getLength() == 0 ) {
3287  $id = $this->mVariables->matchStartToEnd( $part1 );
3288  if ( $id !== false ) {
3289  $text = $this->getVariableValue( $id, $frame );
3290  if ( $this->magicWordFactory->getCacheTTL( $id ) > -1 ) {
3291  $this->mOutput->updateCacheExpiry(
3292  $this->magicWordFactory->getCacheTTL( $id ) );
3293  }
3294  $found = true;
3295  }
3296  }
3297 
3298  # MSG, MSGNW and RAW
3299  if ( !$found ) {
3300  # Check for MSGNW:
3301  $mwMsgnw = $this->magicWordFactory->get( 'msgnw' );
3302  if ( $mwMsgnw->matchStartAndRemove( $part1 ) ) {
3303  $nowiki = true;
3304  } else {
3305  # Remove obsolete MSG:
3306  $mwMsg = $this->magicWordFactory->get( 'msg' );
3307  $mwMsg->matchStartAndRemove( $part1 );
3308  }
3309 
3310  # Check for RAW:
3311  $mwRaw = $this->magicWordFactory->get( 'raw' );
3312  if ( $mwRaw->matchStartAndRemove( $part1 ) ) {
3313  $forceRawInterwiki = true;
3314  }
3315  }
3316 
3317  # Parser functions
3318  if ( !$found ) {
3319  $colonPos = strpos( $part1, ':' );
3320  if ( $colonPos !== false ) {
3321  $func = substr( $part1, 0, $colonPos );
3322  $funcArgs = [ trim( substr( $part1, $colonPos + 1 ) ) ];
3323  $argsLength = $args->getLength();
3324  for ( $i = 0; $i < $argsLength; $i++ ) {
3325  $funcArgs[] = $args->item( $i );
3326  }
3327 
3328  $result = $this->callParserFunction( $frame, $func, $funcArgs );
3329 
3330  // Extract any forwarded flags
3331  if ( isset( $result['title'] ) ) {
3332  $title = $result['title'];
3333  }
3334  if ( isset( $result['found'] ) ) {
3335  $found = $result['found'];
3336  }
3337  if ( array_key_exists( 'text', $result ) ) {
3338  // a string or null
3339  $text = $result['text'];
3340  }
3341  if ( isset( $result['nowiki'] ) ) {
3342  $nowiki = $result['nowiki'];
3343  }
3344  if ( isset( $result['isHTML'] ) ) {
3345  $isHTML = $result['isHTML'];
3346  }
3347  if ( isset( $result['forceRawInterwiki'] ) ) {
3348  $forceRawInterwiki = $result['forceRawInterwiki'];
3349  }
3350  if ( isset( $result['isChildObj'] ) ) {
3351  $isChildObj = $result['isChildObj'];
3352  }
3353  if ( isset( $result['isLocalObj'] ) ) {
3354  $isLocalObj = $result['isLocalObj'];
3355  }
3356  }
3357  }
3358 
3359  # Finish mangling title and then check for loops.
3360  # Set $title to a Title object and $titleText to the PDBK
3361  if ( !$found ) {
3362  $ns = NS_TEMPLATE;
3363  # Split the title into page and subpage
3364  $subpage = '';
3365  $relative = $this->maybeDoSubpageLink( $part1, $subpage );
3366  if ( $part1 !== $relative ) {
3367  $part1 = $relative;
3368  $ns = $this->mTitle->getNamespace();
3369  }
3370  $title = Title::newFromText( $part1, $ns );
3371  if ( $title ) {
3372  $titleText = $title->getPrefixedText();
3373  # Check for language variants if the template is not found
3374  if ( $this->getTargetLanguage()->hasVariants() && $title->getArticleID() == 0 ) {
3375  $this->getTargetLanguage()->findVariantLink( $part1, $title, true );
3376  }
3377  # Do recursion depth check
3378  $limit = $this->mOptions->getMaxTemplateDepth();
3379  if ( $frame->depth >= $limit ) {
3380  $found = true;
3381  $text = '<span class="error">'
3382  . wfMessage( 'parser-template-recursion-depth-warning' )
3383  ->numParams( $limit )->inContentLanguage()->text()
3384  . '</span>';
3385  }
3386  }
3387  }
3388 
3389  # Load from database
3390  if ( !$found && $title ) {
3391  $profileSection = $this->mProfiler->scopedProfileIn( $title->getPrefixedDBkey() );
3392  if ( !$title->isExternal() ) {
3393  if ( $title->isSpecialPage()
3394  && $this->mOptions->getAllowSpecialInclusion()
3395  && $this->ot['html']
3396  ) {
3397  $specialPage = $this->specialPageFactory->getPage( $title->getDBkey() );
3398  // Pass the template arguments as URL parameters.
3399  // "uselang" will have no effect since the Language object
3400  // is forced to the one defined in ParserOptions.
3401  $pageArgs = [];
3402  $argsLength = $args->getLength();
3403  for ( $i = 0; $i < $argsLength; $i++ ) {
3404  $bits = $args->item( $i )->splitArg();
3405  if ( strval( $bits['index'] ) === '' ) {
3406  $name = trim( $frame->expand( $bits['name'], PPFrame::STRIP_COMMENTS ) );
3407  $value = trim( $frame->expand( $bits['value'] ) );
3408  $pageArgs[$name] = $value;
3409  }
3410  }
3411 
3412  // Create a new context to execute the special page
3413  $context = new RequestContext;
3414  $context->setTitle( $title );
3415  $context->setRequest( new FauxRequest( $pageArgs ) );
3416  if ( $specialPage && $specialPage->maxIncludeCacheTime() === 0 ) {
3417  $context->setUser( $this->getUser() );
3418  } else {
3419  // If this page is cached, then we better not be per user.
3420  $context->setUser( User::newFromName( '127.0.0.1', false ) );
3421  }
3422  $context->setLanguage( $this->mOptions->getUserLangObj() );
3423  $ret = $this->specialPageFactory->capturePath( $title, $context, $this->getLinkRenderer() );
3424  if ( $ret ) {
3425  $text = $context->getOutput()->getHTML();
3426  $this->mOutput->addOutputPageMetadata( $context->getOutput() );
3427  $found = true;
3428  $isHTML = true;
3429  if ( $specialPage && $specialPage->maxIncludeCacheTime() !== false ) {
3430  $this->mOutput->updateRuntimeAdaptiveExpiry(
3431  $specialPage->maxIncludeCacheTime()
3432  );
3433  }
3434  }
3435  } elseif ( $this->nsInfo->isNonincludable( $title->getNamespace() ) ) {
3436  $found = false; # access denied
3437  $this->logger->debug(
3438  __METHOD__ .
3439  ": template inclusion denied for " . $title->getPrefixedDBkey()
3440  );
3441  } else {
3442  list( $text, $title ) = $this->getTemplateDom( $title );
3443  if ( $text !== false ) {
3444  $found = true;
3445  $isChildObj = true;
3446  }
3447  }
3448 
3449  # If the title is valid but undisplayable, make a link to it
3450  if ( !$found && ( $this->ot['html'] || $this->ot['pre'] ) ) {
3451  $text = "[[:$titleText]]";
3452  $found = true;
3453  }
3454  } elseif ( $title->isTrans() ) {
3455  # Interwiki transclusion
3456  if ( $this->ot['html'] && !$forceRawInterwiki ) {
3457  $text = $this->interwikiTransclude( $title, 'render' );
3458  $isHTML = true;
3459  } else {
3460  $text = $this->interwikiTransclude( $title, 'raw' );
3461  # Preprocess it like a template
3462  $text = $this->preprocessToDom( $text, self::PTD_FOR_INCLUSION );
3463  $isChildObj = true;
3464  }
3465  $found = true;
3466  }
3467 
3468  # Do infinite loop check
3469  # This has to be done after redirect resolution to avoid infinite loops via redirects
3470  if ( !$frame->loopCheck( $title ) ) {
3471  $found = true;
3472  $text = '<span class="error">'
3473  . wfMessage( 'parser-template-loop-warning', $titleText )->inContentLanguage()->text()
3474  . '</span>';
3475  $this->addTrackingCategory( 'template-loop-category' );
3476  $this->mOutput->addWarning( wfMessage( 'template-loop-warning',
3477  wfEscapeWikiText( $titleText ) )->text() );
3478  $this->logger->debug( __METHOD__ . ": template loop broken at '$titleText'" );
3479  }
3480  }
3481 
3482  # If we haven't found text to substitute by now, we're done
3483  # Recover the source wikitext and return it
3484  if ( !$found ) {
3485  $text = $frame->virtualBracketedImplode( '{{', '|', '}}', $titleWithSpaces, $args );
3486  if ( $profileSection ) {
3487  $this->mProfiler->scopedProfileOut( $profileSection );
3488  }
3489  return [ 'object' => $text ];
3490  }
3491 
3492  # Expand DOM-style return values in a child frame
3493  if ( $isChildObj ) {
3494  # Clean up argument array
3495  $newFrame = $frame->newChild( $args, $title );
3496 
3497  if ( $nowiki ) {
3498  $text = $newFrame->expand( $text, PPFrame::RECOVER_ORIG );
3499  } elseif ( $titleText !== false && $newFrame->isEmpty() ) {
3500  # Expansion is eligible for the empty-frame cache
3501  $text = $newFrame->cachedExpand( $titleText, $text );
3502  } else {
3503  # Uncached expansion
3504  $text = $newFrame->expand( $text );
3505  }
3506  }
3507  if ( $isLocalObj && $nowiki ) {
3508  $text = $frame->expand( $text, PPFrame::RECOVER_ORIG );
3509  $isLocalObj = false;
3510  }
3511 
3512  if ( $profileSection ) {
3513  $this->mProfiler->scopedProfileOut( $profileSection );
3514  }
3515 
3516  # Replace raw HTML by a placeholder
3517  if ( $isHTML ) {
3518  $text = $this->insertStripItem( $text );
3519  } elseif ( $nowiki && ( $this->ot['html'] || $this->ot['pre'] ) ) {
3520  # Escape nowiki-style return values
3521  $text = wfEscapeWikiText( $text );
3522  } elseif ( is_string( $text )
3523  && !$piece['lineStart']
3524  && preg_match( '/^(?:{\\||:|;|#|\*)/', $text )
3525  ) {
3526  # T2529: if the template begins with a table or block-level
3527  # element, it should be treated as beginning a new line.
3528  # This behavior is somewhat controversial.
3529  $text = "\n" . $text;
3530  }
3531 
3532  if ( is_string( $text ) && !$this->incrementIncludeSize( 'post-expand', strlen( $text ) ) ) {
3533  # Error, oversize inclusion
3534  if ( $titleText !== false ) {
3535  # Make a working, properly escaped link if possible (T25588)
3536  $text = "[[:$titleText]]";
3537  } else {
3538  # This will probably not be a working link, but at least it may
3539  # provide some hint of where the problem is
3540  preg_replace( '/^:/', '', $originalTitle );
3541  $text = "[[:$originalTitle]]";
3542  }
3543  $text .= $this->insertStripItem( '<!-- WARNING: template omitted, '
3544  . 'post-expand include size too large -->' );
3545  $this->limitationWarn( 'post-expand-template-inclusion' );
3546  }
3547 
3548  if ( $isLocalObj ) {
3549  $ret = [ 'object' => $text ];
3550  } else {
3551  $ret = [ 'text' => $text ];
3552  }
3553 
3554  return $ret;
3555  }
3556 
3576  public function callParserFunction( $frame, $function, array $args = [] ) {
3577  # Case sensitive functions
3578  if ( isset( $this->mFunctionSynonyms[1][$function] ) ) {
3579  $function = $this->mFunctionSynonyms[1][$function];
3580  } else {
3581  # Case insensitive functions
3582  $function = $this->contLang->lc( $function );
3583  if ( isset( $this->mFunctionSynonyms[0][$function] ) ) {
3584  $function = $this->mFunctionSynonyms[0][$function];
3585  } else {
3586  return [ 'found' => false ];
3587  }
3588  }
3589 
3590  list( $callback, $flags ) = $this->mFunctionHooks[$function];
3591 
3592  // Avoid PHP 7.1 warning from passing $this by reference
3593  $parser = $this;
3594 
3595  $allArgs = [ &$parser ];
3596  if ( $flags & self::SFH_OBJECT_ARGS ) {
3597  # Convert arguments to PPNodes and collect for appending to $allArgs
3598  $funcArgs = [];
3599  foreach ( $args as $k => $v ) {
3600  if ( $v instanceof PPNode || $k === 0 ) {
3601  $funcArgs[] = $v;
3602  } else {
3603  $funcArgs[] = $this->mPreprocessor->newPartNodeArray( [ $k => $v ] )->item( 0 );
3604  }
3605  }
3606 
3607  # Add a frame parameter, and pass the arguments as an array
3608  $allArgs[] = $frame;
3609  $allArgs[] = $funcArgs;
3610  } else {
3611  # Convert arguments to plain text and append to $allArgs
3612  foreach ( $args as $k => $v ) {
3613  if ( $v instanceof PPNode ) {
3614  $allArgs[] = trim( $frame->expand( $v ) );
3615  } elseif ( is_int( $k ) && $k >= 0 ) {
3616  $allArgs[] = trim( $v );
3617  } else {
3618  $allArgs[] = trim( "$k=$v" );
3619  }
3620  }
3621  }
3622 
3623  $result = $callback( ...$allArgs );
3624 
3625  # The interface for function hooks allows them to return a wikitext
3626  # string or an array containing the string and any flags. This mungs
3627  # things around to match what this method should return.
3628  if ( !is_array( $result ) ) {
3629  $result = [
3630  'found' => true,
3631  'text' => $result,
3632  ];
3633  } else {
3634  if ( isset( $result[0] ) && !isset( $result['text'] ) ) {
3635  $result['text'] = $result[0];
3636  }
3637  unset( $result[0] );
3638  $result += [
3639  'found' => true,
3640  ];
3641  }
3642 
3643  $noparse = true;
3644  $preprocessFlags = 0;
3645  if ( isset( $result['noparse'] ) ) {
3646  $noparse = $result['noparse'];
3647  }
3648  if ( isset( $result['preprocessFlags'] ) ) {
3649  $preprocessFlags = $result['preprocessFlags'];
3650  }
3651 
3652  if ( !$noparse ) {
3653  $result['text'] = $this->preprocessToDom( $result['text'], $preprocessFlags );
3654  $result['isChildObj'] = true;
3655  }
3656 
3657  return $result;
3658  }
3659 
3668  public function getTemplateDom( $title ) {
3669  $cacheTitle = $title;
3670  $titleText = $title->getPrefixedDBkey();
3671 
3672  if ( isset( $this->mTplRedirCache[$titleText] ) ) {
3673  list( $ns, $dbk ) = $this->mTplRedirCache[$titleText];
3674  $title = Title::makeTitle( $ns, $dbk );
3675  $titleText = $title->getPrefixedDBkey();
3676  }
3677  if ( isset( $this->mTplDomCache[$titleText] ) ) {
3678  return [ $this->mTplDomCache[$titleText], $title ];
3679  }
3680 
3681  # Cache miss, go to the database
3682  list( $text, $title ) = $this->fetchTemplateAndTitle( $title );
3683 
3684  if ( $text === false ) {
3685  $this->mTplDomCache[$titleText] = false;
3686  return [ false, $title ];
3687  }
3688 
3689  $dom = $this->preprocessToDom( $text, self::PTD_FOR_INCLUSION );
3690  $this->mTplDomCache[$titleText] = $dom;
3691 
3692  if ( !$title->equals( $cacheTitle ) ) {
3693  $this->mTplRedirCache[$cacheTitle->getPrefixedDBkey()] =
3694  [ $title->getNamespace(), $title->getDBkey() ];
3695  }
3696 
3697  return [ $dom, $title ];
3698  }
3699 
3712  $cacheKey = $title->getPrefixedDBkey();
3713  if ( !$this->currentRevisionCache ) {
3714  $this->currentRevisionCache = new MapCacheLRU( 100 );
3715  }
3716  if ( !$this->currentRevisionCache->has( $cacheKey ) ) {
3717  $this->currentRevisionCache->set( $cacheKey,
3718  // Defaults to Parser::statelessFetchRevision()
3719  call_user_func( $this->mOptions->getCurrentRevisionCallback(), $title, $this )
3720  );
3721  }
3722  return $this->currentRevisionCache->get( $cacheKey );
3723  }
3724 
3731  return (
3732  $this->currentRevisionCache &&
3733  $this->currentRevisionCache->has( $title->getPrefixedText() )
3734  );
3735  }
3736 
3746  public static function statelessFetchRevision( Title $title, $parser = false ) {
3748 
3749  return $rev;
3750  }
3751 
3757  public function fetchTemplateAndTitle( $title ) {
3758  // Defaults to Parser::statelessFetchTemplate()
3759  $templateCb = $this->mOptions->getTemplateCallback();
3760  $stuff = call_user_func( $templateCb, $title, $this );
3761  $rev = $stuff['revision'] ?? null;
3762  $text = $stuff['text'];
3763  if ( is_string( $stuff['text'] ) ) {
3764  // We use U+007F DELETE to distinguish strip markers from regular text
3765  $text = strtr( $text, "\x7f", "?" );
3766  }
3767  $finalTitle = $stuff['finalTitle'] ?? $title;
3768  foreach ( ( $stuff['deps'] ?? [] ) as $dep ) {
3769  $this->mOutput->addTemplate( $dep['title'], $dep['page_id'], $dep['rev_id'] );
3770  if ( $dep['title']->equals( $this->getTitle() ) && $rev instanceof Revision ) {
3771  // Self-transclusion; final result may change based on the new page version
3772  $this->setOutputFlag( 'vary-revision-sha1', 'Self transclusion' );
3773  $this->getOutput()->setRevisionUsedSha1Base36( $rev->getSha1() );
3774  }
3775  }
3776 
3777  return [ $text, $finalTitle ];
3778  }
3779 
3785  public function fetchTemplate( $title ) {
3786  return $this->fetchTemplateAndTitle( $title )[0];
3787  }
3788 
3798  public static function statelessFetchTemplate( $title, $parser = false ) {
3799  $text = $skip = false;
3800  $finalTitle = $title;
3801  $deps = [];
3802  $rev = null;
3803 
3804  # Loop to fetch the article, with up to 1 redirect
3805  for ( $i = 0; $i < 2 && is_object( $title ); $i++ ) {
3806  # Give extensions a chance to select the revision instead
3807  $id = false; # Assume current
3808  Hooks::run( 'BeforeParserFetchTemplateAndtitle',
3809  [ $parser, $title, &$skip, &$id ] );
3810 
3811  if ( $skip ) {
3812  $text = false;
3813  $deps[] = [
3814  'title' => $title,
3815  'page_id' => $title->getArticleID(),
3816  'rev_id' => null
3817  ];
3818  break;
3819  }
3820  # Get the revision
3821  if ( $id ) {
3822  $rev = Revision::newFromId( $id );
3823  } elseif ( $parser ) {
3824  $rev = $parser->fetchCurrentRevisionOfTitle( $title );
3825  } else {
3827  }
3828  $rev_id = $rev ? $rev->getId() : 0;
3829  # If there is no current revision, there is no page
3830  if ( $id === false && !$rev ) {
3831  $linkCache = MediaWikiServices::getInstance()->getLinkCache();
3832  $linkCache->addBadLinkObj( $title );
3833  }
3834 
3835  $deps[] = [
3836  'title' => $title,
3837  'page_id' => $title->getArticleID(),
3838  'rev_id' => $rev_id
3839  ];
3840  if ( $rev && !$title->equals( $rev->getTitle() ) ) {
3841  # We fetched a rev from a different title; register it too...
3842  $deps[] = [
3843  'title' => $rev->getTitle(),
3844  'page_id' => $rev->getPage(),
3845  'rev_id' => $rev_id
3846  ];
3847  }
3848 
3849  if ( $rev ) {
3850  $content = $rev->getContent();
3851  $text = $content ? $content->getWikitextForTransclusion() : null;
3852 
3853  Hooks::run( 'ParserFetchTemplate',
3854  [ $parser, $title, $rev, &$text, &$deps ] );
3855 
3856  if ( $text === false || $text === null ) {
3857  $text = false;
3858  break;
3859  }
3860  } elseif ( $title->getNamespace() == NS_MEDIAWIKI ) {
3861  $message = wfMessage( MediaWikiServices::getInstance()->getContentLanguage()->
3862  lcfirst( $title->getText() ) )->inContentLanguage();
3863  if ( !$message->exists() ) {
3864  $text = false;
3865  break;
3866  }
3867  $content = $message->content();
3868  $text = $message->plain();
3869  } else {
3870  break;
3871  }
3872  if ( !$content ) {
3873  break;
3874  }
3875  # Redirect?
3876  $finalTitle = $title;
3877  $title = $content->getRedirectTarget();
3878  }
3879  return [
3880  'revision' => $rev,
3881  'text' => $text,
3882  'finalTitle' => $finalTitle,
3883  'deps' => $deps
3884  ];
3885  }
3886 
3894  public function fetchFileAndTitle( $title, $options = [] ) {
3896 
3897  $time = $file ? $file->getTimestamp() : false;
3898  $sha1 = $file ? $file->getSha1() : false;
3899  # Register the file as a dependency...
3900  $this->mOutput->addImage( $title->getDBkey(), $time, $sha1 );
3901  if ( $file && !$title->equals( $file->getTitle() ) ) {
3902  # Update fetched file title
3903  $title = $file->getTitle();
3904  $this->mOutput->addImage( $title->getDBkey(), $time, $sha1 );
3905  }
3906  return [ $file, $title ];
3907  }
3908 
3919  protected function fetchFileNoRegister( $title, $options = [] ) {
3920  if ( isset( $options['broken'] ) ) {
3921  $file = false; // broken thumbnail forced by hook
3922  } elseif ( isset( $options['sha1'] ) ) { // get by (sha1,timestamp)
3923  $file = RepoGroup::singleton()->findFileFromKey( $options['sha1'], $options );
3924  } else { // get by (name,timestamp)
3925  $file = MediaWikiServices::getInstance()->getRepoGroup()->findFile( $title, $options );
3926  }
3927  return $file;
3928  }
3929 
3938  public function interwikiTransclude( $title, $action ) {
3939  if ( !$this->svcOptions->get( 'EnableScaryTranscluding' ) ) {
3940  return wfMessage( 'scarytranscludedisabled' )->inContentLanguage()->text();
3941  }
3942 
3943  $url = $title->getFullURL( [ 'action' => $action ] );
3944  if ( strlen( $url ) > 1024 ) {
3945  return wfMessage( 'scarytranscludetoolong' )->inContentLanguage()->text();
3946  }
3947 
3948  $wikiId = $title->getTransWikiID(); // remote wiki ID or false
3949 
3950  $fname = __METHOD__;
3951  $cache = MediaWikiServices::getInstance()->getMainWANObjectCache();
3952 
3953  $data = $cache->getWithSetCallback(
3954  $cache->makeGlobalKey(
3955  'interwiki-transclude',
3956  ( $wikiId !== false ) ? $wikiId : 'external',
3957  sha1( $url )
3958  ),
3959  $this->svcOptions->get( 'TranscludeCacheExpiry' ),
3960  function ( $oldValue, &$ttl ) use ( $url, $fname, $cache ) {
3961  $req = MWHttpRequest::factory( $url, [], $fname );
3962 
3963  $status = $req->execute(); // Status object
3964  if ( !$status->isOK() ) {
3965  $ttl = $cache::TTL_UNCACHEABLE;
3966  } elseif ( $req->getResponseHeader( 'X-Database-Lagged' ) !== null ) {
3967  $ttl = min( $cache::TTL_LAGGED, $ttl );
3968  }
3969 
3970  return [
3971  'text' => $status->isOK() ? $req->getContent() : null,
3972  'code' => $req->getStatus()
3973  ];
3974  },
3975  [
3976  'checkKeys' => ( $wikiId !== false )
3977  ? [ $cache->makeGlobalKey( 'interwiki-page', $wikiId, $title->getDBkey() ) ]
3978  : [],
3979  'pcGroup' => 'interwiki-transclude:5',
3980  'pcTTL' => $cache::TTL_PROC_LONG
3981  ]
3982  );
3983 
3984  if ( is_string( $data['text'] ) ) {
3985  $text = $data['text'];
3986  } elseif ( $data['code'] != 200 ) {
3987  // Though we failed to fetch the content, this status is useless.
3988  $text = wfMessage( 'scarytranscludefailed-httpstatus' )
3989  ->params( $url, $data['code'] )->inContentLanguage()->text();
3990  } else {
3991  $text = wfMessage( 'scarytranscludefailed', $url )->inContentLanguage()->text();
3992  }
3993 
3994  return $text;
3995  }
3996 
4006  public function argSubstitution( $piece, $frame ) {
4007  $error = false;
4008  $parts = $piece['parts'];
4009  $nameWithSpaces = $frame->expand( $piece['title'] );
4010  $argName = trim( $nameWithSpaces );
4011  $object = false;
4012  $text = $frame->getArgument( $argName );
4013  if ( $text === false && $parts->getLength() > 0
4014  && ( $this->ot['html']
4015  || $this->ot['pre']
4016  || ( $this->ot['wiki'] && $frame->isTemplate() )
4017  )
4018  ) {
4019  # No match in frame, use the supplied default
4020  $object = $parts->item( 0 )->getChildren();
4021  }
4022  if ( !$this->incrementIncludeSize( 'arg', strlen( $text ) ) ) {
4023  $error = '<!-- WARNING: argument omitted, expansion size too large -->';
4024  $this->limitationWarn( 'post-expand-template-argument' );
4025  }
4026 
4027  if ( $text === false && $object === false ) {
4028  # No match anywhere
4029  $object = $frame->virtualBracketedImplode( '{{{', '|', '}}}', $nameWithSpaces, $parts );
4030  }
4031  if ( $error !== false ) {
4032  $text .= $error;
4033  }
4034  if ( $object !== false ) {
4035  $ret = [ 'object' => $object ];
4036  } else {
4037  $ret = [ 'text' => $text ];
4038  }
4039 
4040  return $ret;
4041  }
4042 
4058  public function extensionSubstitution( $params, $frame ) {
4059  static $errorStr = '<span class="error">';
4060  static $errorLen = 20;
4061 
4062  $name = $frame->expand( $params['name'] );
4063  if ( substr( $name, 0, $errorLen ) === $errorStr ) {
4064  // Probably expansion depth or node count exceeded. Just punt the
4065  // error up.
4066  return $name;
4067  }
4068 
4069  $attrText = !isset( $params['attr'] ) ? null : $frame->expand( $params['attr'] );
4070  if ( substr( $attrText, 0, $errorLen ) === $errorStr ) {
4071  // See above
4072  return $attrText;
4073  }
4074 
4075  // We can't safely check if the expansion for $content resulted in an
4076  // error, because the content could happen to be the error string
4077  // (T149622).
4078  $content = !isset( $params['inner'] ) ? null : $frame->expand( $params['inner'] );
4079 
4080  $marker = self::MARKER_PREFIX . "-$name-"
4081  . sprintf( '%08X', $this->mMarkerIndex++ ) . self::MARKER_SUFFIX;
4082 
4083  $isFunctionTag = isset( $this->mFunctionTagHooks[strtolower( $name )] ) &&
4084  ( $this->ot['html'] || $this->ot['pre'] );
4085  if ( $isFunctionTag ) {
4086  $markerType = 'none';
4087  } else {
4088  $markerType = 'general';
4089  }
4090  if ( $this->ot['html'] || $isFunctionTag ) {
4091  $name = strtolower( $name );
4092  $attributes = Sanitizer::decodeTagAttributes( $attrText );
4093  if ( isset( $params['attributes'] ) ) {
4094  $attributes += $params['attributes'];
4095  }
4096 
4097  if ( isset( $this->mTagHooks[$name] ) ) {
4098  $output = call_user_func_array( $this->mTagHooks[$name],
4099  [ $content, $attributes, $this, $frame ] );
4100  } elseif ( isset( $this->mFunctionTagHooks[$name] ) ) {
4101  list( $callback, ) = $this->mFunctionTagHooks[$name];
4102 
4103  // Avoid PHP 7.1 warning from passing $this by reference
4104  $parser = $this;
4105  $output = call_user_func_array( $callback, [ &$parser, $frame, $content, $attributes ] );
4106  } else {
4107  $output = '<span class="error">Invalid tag extension name: ' .
4108  htmlspecialchars( $name ) . '</span>';
4109  }
4110 
4111  if ( is_array( $output ) ) {
4112  // Extract flags
4113  $flags = $output;
4114  $output = $flags[0];
4115  if ( isset( $flags['markerType'] ) ) {
4116  $markerType = $flags['markerType'];
4117  }
4118  }
4119  } else {
4120  if ( is_null( $attrText ) ) {
4121  $attrText = '';
4122  }
4123  if ( isset( $params['attributes'] ) ) {
4124  foreach ( $params['attributes'] as $attrName => $attrValue ) {
4125  $attrText .= ' ' . htmlspecialchars( $attrName ) . '="' .
4126  htmlspecialchars( $attrValue ) . '"';
4127  }
4128  }
4129  if ( $content === null ) {
4130  $output = "<$name$attrText/>";
4131  } else {
4132  $close = is_null( $params['close'] ) ? '' : $frame->expand( $params['close'] );
4133  if ( substr( $close, 0, $errorLen ) === $errorStr ) {
4134  // See above
4135  return $close;
4136  }
4137  $output = "<$name$attrText>$content$close";
4138  }
4139  }
4140 
4141  if ( $markerType === 'none' ) {
4142  return $output;
4143  } elseif ( $markerType === 'nowiki' ) {
4144  $this->mStripState->addNoWiki( $marker, $output );
4145  } elseif ( $markerType === 'general' ) {
4146  $this->mStripState->addGeneral( $marker, $output );
4147  } else {
4148  throw new MWException( __METHOD__ . ': invalid marker type' );
4149  }
4150  return $marker;
4151  }
4152 
4160  public function incrementIncludeSize( $type, $size ) {
4161  if ( $this->mIncludeSizes[$type] + $size > $this->mOptions->getMaxIncludeSize() ) {
4162  return false;
4163  } else {
4164  $this->mIncludeSizes[$type] += $size;
4165  return true;
4166  }
4167  }
4168 
4175  $this->mExpensiveFunctionCount++;
4176  return $this->mExpensiveFunctionCount <= $this->mOptions->getExpensiveParserFunctionLimit();
4177  }
4178 
4187  public function doDoubleUnderscore( $text ) {
4188  # The position of __TOC__ needs to be recorded
4189  $mw = $this->magicWordFactory->get( 'toc' );
4190  if ( $mw->match( $text ) ) {
4191  $this->mShowToc = true;
4192  $this->mForceTocPosition = true;
4193 
4194  # Set a placeholder. At the end we'll fill it in with the TOC.
4195  $text = $mw->replace( '<!--MWTOC\'"-->', $text, 1 );
4196 
4197  # Only keep the first one.
4198  $text = $mw->replace( '', $text );
4199  }
4200 
4201  # Now match and remove the rest of them
4202  $mwa = $this->magicWordFactory->getDoubleUnderscoreArray();
4203  $this->mDoubleUnderscores = $mwa->matchAndRemove( $text );
4204 
4205  if ( isset( $this->mDoubleUnderscores['nogallery'] ) ) {
4206  $this->mOutput->mNoGallery = true;
4207  }
4208  if ( isset( $this->mDoubleUnderscores['notoc'] ) && !$this->mForceTocPosition ) {
4209  $this->mShowToc = false;
4210  }
4211  if ( isset( $this->mDoubleUnderscores['hiddencat'] )
4212  && $this->mTitle->getNamespace() == NS_CATEGORY
4213  ) {
4214  $this->addTrackingCategory( 'hidden-category-category' );
4215  }
4216  # (T10068) Allow control over whether robots index a page.
4217  # __INDEX__ always overrides __NOINDEX__, see T16899
4218  if ( isset( $this->mDoubleUnderscores['noindex'] ) && $this->mTitle->canUseNoindex() ) {
4219  $this->mOutput->setIndexPolicy( 'noindex' );
4220  $this->addTrackingCategory( 'noindex-category' );
4221  }
4222  if ( isset( $this->mDoubleUnderscores['index'] ) && $this->mTitle->canUseNoindex() ) {
4223  $this->mOutput->setIndexPolicy( 'index' );
4224  $this->addTrackingCategory( 'index-category' );
4225  }
4226 
4227  # Cache all double underscores in the database
4228  foreach ( $this->mDoubleUnderscores as $key => $val ) {
4229  $this->mOutput->setProperty( $key, '' );
4230  }
4231 
4232  return $text;
4233  }
4234 
4240  public function addTrackingCategory( $msg ) {
4241  return $this->mOutput->addTrackingCategory( $msg, $this->mTitle );
4242  }
4243 
4260  public function formatHeadings( $text, $origText, $isMain = true ) {
4261  # Inhibit editsection links if requested in the page
4262  if ( isset( $this->mDoubleUnderscores['noeditsection'] ) ) {
4263  $maybeShowEditLink = false;
4264  } else {
4265  $maybeShowEditLink = true; /* Actual presence will depend on post-cache transforms */
4266  }
4267 
4268  # Get all headlines for numbering them and adding funky stuff like [edit]
4269  # links - this is for later, but we need the number of headlines right now
4270  # NOTE: white space in headings have been trimmed in doHeadings. They shouldn't
4271  # be trimmed here since whitespace in HTML headings is significant.
4272  $matches = [];
4273  $numMatches = preg_match_all(
4274  '/<H(?P<level>[1-6])(?P<attrib>.*?>)(?P<header>[\s\S]*?)<\/H[1-6] *>/i',
4275  $text,
4276  $matches
4277  );
4278 
4279  # if there are fewer than 4 headlines in the article, do not show TOC
4280  # unless it's been explicitly enabled.
4281  $enoughToc = $this->mShowToc &&
4282  ( ( $numMatches >= 4 ) || $this->mForceTocPosition );
4283 
4284  # Allow user to stipulate that a page should have a "new section"
4285  # link added via __NEWSECTIONLINK__
4286  if ( isset( $this->mDoubleUnderscores['newsectionlink'] ) ) {
4287  $this->mOutput->setNewSection( true );
4288  }
4289 
4290  # Allow user to remove the "new section"
4291  # link via __NONEWSECTIONLINK__
4292  if ( isset( $this->mDoubleUnderscores['nonewsectionlink'] ) ) {
4293  $this->mOutput->hideNewSection( true );
4294  }
4295 
4296  # if the string __FORCETOC__ (not case-sensitive) occurs in the HTML,
4297  # override above conditions and always show TOC above first header
4298  if ( isset( $this->mDoubleUnderscores['forcetoc'] ) ) {
4299  $this->mShowToc = true;
4300  $enoughToc = true;
4301  }
4302 
4303  # headline counter
4304  $headlineCount = 0;
4305  $numVisible = 0;
4306 
4307  # Ugh .. the TOC should have neat indentation levels which can be
4308  # passed to the skin functions. These are determined here
4309  $toc = '';
4310  $full = '';
4311  $head = [];
4312  $sublevelCount = [];
4313  $levelCount = [];
4314  $level = 0;
4315  $prevlevel = 0;
4316  $toclevel = 0;
4317  $prevtoclevel = 0;
4318  $markerRegex = self::MARKER_PREFIX . "-h-(\d+)-" . self::MARKER_SUFFIX;
4319  $baseTitleText = $this->mTitle->getPrefixedDBkey();
4320  $oldType = $this->mOutputType;
4321  $this->setOutputType( self::OT_WIKI );
4322  $frame = $this->getPreprocessor()->newFrame();
4323  $root = $this->preprocessToDom( $origText );
4324  $node = $root->getFirstChild();
4325  $byteOffset = 0;
4326  $tocraw = [];
4327  $refers = [];
4328 
4329  $headlines = $numMatches !== false ? $matches[3] : [];
4330 
4331  $maxTocLevel = $this->svcOptions->get( 'MaxTocLevel' );
4332  foreach ( $headlines as $headline ) {
4333  $isTemplate = false;
4334  $titleText = false;
4335  $sectionIndex = false;
4336  $numbering = '';
4337  $markerMatches = [];
4338  if ( preg_match( "/^$markerRegex/", $headline, $markerMatches ) ) {
4339  $serial = $markerMatches[1];
4340  list( $titleText, $sectionIndex ) = $this->mHeadings[$serial];
4341  $isTemplate = ( $titleText != $baseTitleText );
4342  $headline = preg_replace( "/^$markerRegex\\s*/", "", $headline );
4343  }
4344 
4345  if ( $toclevel ) {
4346  $prevlevel = $level;
4347  }
4348  $level = $matches[1][$headlineCount];
4349 
4350  if ( $level > $prevlevel ) {
4351  # Increase TOC level
4352  $toclevel++;
4353  $sublevelCount[$toclevel] = 0;
4354  if ( $toclevel < $maxTocLevel ) {
4355  $prevtoclevel = $toclevel;
4356  $toc .= Linker::tocIndent();
4357  $numVisible++;
4358  }
4359  } elseif ( $level < $prevlevel && $toclevel > 1 ) {
4360  # Decrease TOC level, find level to jump to
4361 
4362  for ( $i = $toclevel; $i > 0; $i-- ) {
4363  if ( $levelCount[$i] == $level ) {
4364  # Found last matching level
4365  $toclevel = $i;
4366  break;
4367  } elseif ( $levelCount[$i] < $level ) {
4368  # Found first matching level below current level
4369  $toclevel = $i + 1;
4370  break;
4371  }
4372  }
4373  if ( $i == 0 ) {
4374  $toclevel = 1;
4375  }
4376  if ( $toclevel < $maxTocLevel ) {
4377  if ( $prevtoclevel < $maxTocLevel ) {
4378  # Unindent only if the previous toc level was shown :p
4379  $toc .= Linker::tocUnindent( $prevtoclevel - $toclevel );
4380  $prevtoclevel = $toclevel;
4381  } else {
4382  $toc .= Linker::tocLineEnd();
4383  }
4384  }
4385  } else {
4386  # No change in level, end TOC line
4387  if ( $toclevel < $maxTocLevel ) {
4388  $toc .= Linker::tocLineEnd();
4389  }
4390  }
4391 
4392  $levelCount[$toclevel] = $level;
4393 
4394  # count number of headlines for each level
4395  $sublevelCount[$toclevel]++;
4396  $dot = 0;
4397  for ( $i = 1; $i <= $toclevel; $i++ ) {
4398  if ( !empty( $sublevelCount[$i] ) ) {
4399  if ( $dot ) {
4400  $numbering .= '.';
4401  }
4402  $numbering .= $this->getTargetLanguage()->formatNum( $sublevelCount[$i] );
4403  $dot = 1;
4404  }
4405  }
4406 
4407  # The safe header is a version of the header text safe to use for links
4408 
4409  # Remove link placeholders by the link text.
4410  # <!--LINK number-->
4411  # turns into
4412  # link text with suffix
4413  # Do this before unstrip since link text can contain strip markers
4414  $safeHeadline = $this->replaceLinkHoldersText( $headline );
4415 
4416  # Avoid insertion of weird stuff like <math> by expanding the relevant sections
4417  $safeHeadline = $this->mStripState->unstripBoth( $safeHeadline );
4418 
4419  # Remove any <style> or <script> tags (T198618)
4420  $safeHeadline = preg_replace(
4421  '#<(style|script)(?: [^>]*[^>/])?>.*?</\1>#is',
4422  '',
4423  $safeHeadline
4424  );
4425 
4426  # Strip out HTML (first regex removes any tag not allowed)
4427  # Allowed tags are:
4428  # * <sup> and <sub> (T10393)
4429  # * <i> (T28375)
4430  # * <b> (r105284)
4431  # * <bdi> (T74884)
4432  # * <span dir="rtl"> and <span dir="ltr"> (T37167)
4433  # * <s> and <strike> (T35715)
4434  # We strip any parameter from accepted tags (second regex), except dir="rtl|ltr" from <span>,
4435  # to allow setting directionality in toc items.
4436  $tocline = preg_replace(
4437  [
4438  '#<(?!/?(span|sup|sub|bdi|i|b|s|strike)(?: [^>]*)?>).*?>#',
4439  '#<(/?(?:span(?: dir="(?:rtl|ltr)")?|sup|sub|bdi|i|b|s|strike))(?: .*?)?>#'
4440  ],
4441  [ '', '<$1>' ],
4442  $safeHeadline
4443  );
4444 
4445  # Strip '<span></span>', which is the result from the above if
4446  # <span id="foo"></span> is used to produce an additional anchor
4447  # for a section.
4448  $tocline = str_replace( '<span></span>', '', $tocline );
4449 
4450  $tocline = trim( $tocline );
4451 
4452  # For the anchor, strip out HTML-y stuff period
4453  $safeHeadline = preg_replace( '/<.*?>/', '', $safeHeadline );
4454  $safeHeadline = Sanitizer::normalizeSectionNameWhitespace( $safeHeadline );
4455 
4456  # Save headline for section edit hint before it's escaped
4457  $headlineHint = $safeHeadline;
4458 
4459  # Decode HTML entities
4460  $safeHeadline = Sanitizer::decodeCharReferences( $safeHeadline );
4461 
4462  $safeHeadline = self::normalizeSectionName( $safeHeadline );
4463 
4464  $fallbackHeadline = Sanitizer::escapeIdForAttribute( $safeHeadline, Sanitizer::ID_FALLBACK );
4465  $linkAnchor = Sanitizer::escapeIdForLink( $safeHeadline );
4466  $safeHeadline = Sanitizer::escapeIdForAttribute( $safeHeadline, Sanitizer::ID_PRIMARY );
4467  if ( $fallbackHeadline === $safeHeadline ) {
4468  # No reason to have both (in fact, we can't)
4469  $fallbackHeadline = false;
4470  }
4471 
4472  # HTML IDs must be case-insensitively unique for IE compatibility (T12721).
4473  # @todo FIXME: We may be changing them depending on the current locale.
4474  $arrayKey = strtolower( $safeHeadline );
4475  if ( $fallbackHeadline === false ) {
4476  $fallbackArrayKey = false;
4477  } else {
4478  $fallbackArrayKey = strtolower( $fallbackHeadline );
4479  }
4480 
4481  # Create the anchor for linking from the TOC to the section
4482  $anchor = $safeHeadline;
4483  $fallbackAnchor = $fallbackHeadline;
4484  if ( isset( $refers[$arrayKey] ) ) {
4485  // phpcs:ignore Generic.Formatting.DisallowMultipleStatements
4486  for ( $i = 2; isset( $refers["${arrayKey}_$i"] ); ++$i );
4487  $anchor .= "_$i";
4488  $linkAnchor .= "_$i";
4489  $refers["${arrayKey}_$i"] = true;
4490  } else {
4491  $refers[$arrayKey] = true;
4492  }
4493  if ( $fallbackHeadline !== false && isset( $refers[$fallbackArrayKey] ) ) {
4494  // phpcs:ignore Generic.Formatting.DisallowMultipleStatements
4495  for ( $i = 2; isset( $refers["${fallbackArrayKey}_$i"] ); ++$i );
4496  $fallbackAnchor .= "_$i";
4497  $refers["${fallbackArrayKey}_$i"] = true;
4498  } else {
4499  $refers[$fallbackArrayKey] = true;
4500  }
4501 
4502  # Don't number the heading if it is the only one (looks silly)
4503  if ( count( $matches[3] ) > 1 && $this->mOptions->getNumberHeadings() ) {
4504  # the two are different if the line contains a link
4505  $headline = Html::element(
4506  'span',
4507  [ 'class' => 'mw-headline-number' ],
4508  $numbering
4509  ) . ' ' . $headline;
4510  }
4511 
4512  if ( $enoughToc && ( !isset( $maxTocLevel ) || $toclevel < $maxTocLevel ) ) {
4513  $toc .= Linker::tocLine( $linkAnchor, $tocline,
4514  $numbering, $toclevel, ( $isTemplate ? false : $sectionIndex ) );
4515  }
4516 
4517  # Add the section to the section tree
4518  # Find the DOM node for this header
4519  $noOffset = ( $isTemplate || $sectionIndex === false );
4520  while ( $node && !$noOffset ) {
4521  if ( $node->getName() === 'h' ) {
4522  $bits = $node->splitHeading();
4523  if ( $bits['i'] == $sectionIndex ) {
4524  break;
4525  }
4526  }
4527  $byteOffset += mb_strlen( $this->mStripState->unstripBoth(
4528  $frame->expand( $node, PPFrame::RECOVER_ORIG ) ) );
4529  $node = $node->getNextSibling();
4530  }
4531  $tocraw[] = [
4532  'toclevel' => $toclevel,
4533  'level' => $level,
4534  'line' => $tocline,
4535  'number' => $numbering,
4536  'index' => ( $isTemplate ? 'T-' : '' ) . $sectionIndex,
4537  'fromtitle' => $titleText,
4538  'byteoffset' => ( $noOffset ? null : $byteOffset ),
4539  'anchor' => $anchor,
4540  ];
4541 
4542  # give headline the correct <h#> tag
4543  if ( $maybeShowEditLink && $sectionIndex !== false ) {
4544  // Output edit section links as markers with styles that can be customized by skins
4545  if ( $isTemplate ) {
4546  # Put a T flag in the section identifier, to indicate to extractSections()
4547  # that sections inside <includeonly> should be counted.
4548  $editsectionPage = $titleText;
4549  $editsectionSection = "T-$sectionIndex";
4550  $editsectionContent = null;
4551  } else {
4552  $editsectionPage = $this->mTitle->getPrefixedText();
4553  $editsectionSection = $sectionIndex;
4554  $editsectionContent = $headlineHint;
4555  }
4556  // We use a bit of pesudo-xml for editsection markers. The
4557  // language converter is run later on. Using a UNIQ style marker
4558  // leads to the converter screwing up the tokens when it
4559  // converts stuff. And trying to insert strip tags fails too. At
4560  // this point all real inputted tags have already been escaped,
4561  // so we don't have to worry about a user trying to input one of
4562  // these markers directly. We use a page and section attribute
4563  // to stop the language converter from converting these
4564  // important bits of data, but put the headline hint inside a
4565  // content block because the language converter is supposed to
4566  // be able to convert that piece of data.
4567  // Gets replaced with html in ParserOutput::getText
4568  $editlink = '<mw:editsection page="' . htmlspecialchars( $editsectionPage );
4569  $editlink .= '" section="' . htmlspecialchars( $editsectionSection ) . '"';
4570  if ( $editsectionContent !== null ) {
4571  $editlink .= '>' . $editsectionContent . '</mw:editsection>';
4572  } else {
4573  $editlink .= '/>';
4574  }
4575  } else {
4576  $editlink = '';
4577  }
4578  $head[$headlineCount] = Linker::makeHeadline( $level,
4579  $matches['attrib'][$headlineCount], $anchor, $headline,
4580  $editlink, $fallbackAnchor );
4581 
4582  $headlineCount++;
4583  }
4584 
4585  $this->setOutputType( $oldType );
4586 
4587  # Never ever show TOC if no headers
4588  if ( $numVisible < 1 ) {
4589  $enoughToc = false;
4590  }
4591 
4592  if ( $enoughToc ) {
4593  if ( $prevtoclevel > 0 && $prevtoclevel < $maxTocLevel ) {
4594  $toc .= Linker::tocUnindent( $prevtoclevel - 1 );
4595  }
4596  $toc = Linker::tocList( $toc, $this->mOptions->getUserLangObj() );
4597  $this->mOutput->setTOCHTML( $toc );
4598  $toc = self::TOC_START . $toc . self::TOC_END;
4599  }
4600 
4601  if ( $isMain ) {
4602  $this->mOutput->setSections( $tocraw );
4603  }
4604 
4605  # split up and insert constructed headlines
4606  $blocks = preg_split( '/<H[1-6].*?>[\s\S]*?<\/H[1-6]>/i', $text );
4607  $i = 0;
4608 
4609  // build an array of document sections
4610  $sections = [];
4611  foreach ( $blocks as $block ) {
4612  // $head is zero-based, sections aren't.
4613  if ( empty( $head[$i - 1] ) ) {
4614  $sections[$i] = $block;
4615  } else {
4616  $sections[$i] = $head[$i - 1] . $block;
4617  }
4618 
4629  Hooks::run( 'ParserSectionCreate', [ $this, $i, &$sections[$i], $maybeShowEditLink ] );
4630 
4631  $i++;
4632  }
4633 
4634  if ( $enoughToc && $isMain && !$this->mForceTocPosition ) {
4635  // append the TOC at the beginning
4636  // Top anchor now in skin
4637  $sections[0] .= $toc . "\n";
4638  }
4639 
4640  $full .= implode( '', $sections );
4641 
4642  if ( $this->mForceTocPosition ) {
4643  return str_replace( '<!--MWTOC\'"-->', $toc, $full );
4644  } else {
4645  return $full;
4646  }
4647  }
4648 
4660  public function preSaveTransform( $text, Title $title, User $user,
4661  ParserOptions $options, $clearState = true
4662  ) {
4663  if ( $clearState ) {
4664  $magicScopeVariable = $this->lock();
4665  }
4666  $this->startParse( $title, $options, self::OT_WIKI, $clearState );
4667  $this->setUser( $user );
4668 
4669  // Strip U+0000 NULL (T159174)
4670  $text = str_replace( "\000", '', $text );
4671 
4672  // We still normalize line endings for backwards-compatibility
4673  // with other code that just calls PST, but this should already
4674  // be handled in TextContent subclasses
4675  $text = TextContent::normalizeLineEndings( $text );
4676 
4677  if ( $options->getPreSaveTransform() ) {
4678  $text = $this->pstPass2( $text, $user );
4679  }
4680  $text = $this->mStripState->unstripBoth( $text );
4681 
4682  $this->setUser( null ); # Reset
4683 
4684  return $text;
4685  }
4686 
4695  private function pstPass2( $text, $user ) {
4696  # Note: This is the timestamp saved as hardcoded wikitext to the database, we use
4697  # $this->contLang here in order to give everyone the same signature and use the default one
4698  # rather than the one selected in each user's preferences. (see also T14815)
4699  $ts = $this->mOptions->getTimestamp();
4700  $timestamp = MWTimestamp::getLocalInstance( $ts );
4701  $ts = $timestamp->format( 'YmdHis' );
4702  $tzMsg = $timestamp->getTimezoneMessage()->inContentLanguage()->text();
4703 
4704  $d = $this->contLang->timeanddate( $ts, false, false ) . " ($tzMsg)";
4705 
4706  # Variable replacement
4707  # Because mOutputType is OT_WIKI, this will only process {{subst:xxx}} type tags
4708  $text = $this->replaceVariables( $text );
4709 
4710  # This works almost by chance, as the replaceVariables are done before the getUserSig(),
4711  # which may corrupt this parser instance via its wfMessage()->text() call-
4712 
4713  # Signatures
4714  if ( strpos( $text, '~~~' ) !== false ) {
4715  $sigText = $this->getUserSig( $user );
4716  $text = strtr( $text, [
4717  '~~~~~' => $d,
4718  '~~~~' => "$sigText $d",
4719  '~~~' => $sigText
4720  ] );
4721  # The main two signature forms used above are time-sensitive
4722  $this->setOutputFlag( 'user-signature', 'User signature detected' );
4723  }
4724 
4725  # Context links ("pipe tricks"): [[|name]] and [[name (context)|]]
4726  $tc = '[' . Title::legalChars() . ']';
4727  $nc = '[ _0-9A-Za-z\x80-\xff-]'; # Namespaces can use non-ascii!
4728 
4729  // [[ns:page (context)|]]
4730  $p1 = "/\[\[(:?$nc+:|:|)($tc+?)( ?\\($tc+\\))\\|]]/";
4731  // [[ns:page(context)|]] (double-width brackets, added in r40257)
4732  $p4 = "/\[\[(:?$nc+:|:|)($tc+?)( ?($tc+))\\|]]/";
4733  // [[ns:page (context), context|]] (using either single or double-width comma)
4734  $p3 = "/\[\[(:?$nc+:|:|)($tc+?)( ?\\($tc+\\)|)((?:, |,)$tc+|)\\|]]/";
4735  // [[|page]] (reverse pipe trick: add context from page title)
4736  $p2 = "/\[\[\\|($tc+)]]/";
4737 
4738  # try $p1 first, to turn "[[A, B (C)|]]" into "[[A, B (C)|A, B]]"
4739  $text = preg_replace( $p1, '[[\\1\\2\\3|\\2]]', $text );
4740  $text = preg_replace( $p4, '[[\\1\\2\\3|\\2]]', $text );
4741  $text = preg_replace( $p3, '[[\\1\\2\\3\\4|\\2]]', $text );
4742 
4743  $t = $this->mTitle->getText();
4744  $m = [];
4745  if ( preg_match( "/^($nc+:|)$tc+?( \\($tc+\\))$/", $t, $m ) ) {
4746  $text = preg_replace( $p2, "[[$m[1]\\1$m[2]|\\1]]", $text );
4747  } elseif ( preg_match( "/^($nc+:|)$tc+?(, $tc+|)$/", $t, $m ) && "$m[1]$m[2]" != '' ) {
4748  $text = preg_replace( $p2, "[[$m[1]\\1$m[2]|\\1]]", $text );
4749  } else {
4750  # if there's no context, don't bother duplicating the title
4751  $text = preg_replace( $p2, '[[\\1]]', $text );
4752  }
4753 
4754  return $text;
4755  }
4756 
4771  public function getUserSig( &$user, $nickname = false, $fancySig = null ) {
4772  $username = $user->getName();
4773 
4774  # If not given, retrieve from the user object.
4775  if ( $nickname === false ) {
4776  $nickname = $user->getOption( 'nickname' );
4777  }
4778 
4779  if ( is_null( $fancySig ) ) {
4780  $fancySig = $user->getBoolOption( 'fancysig' );
4781  }
4782 
4783  $nickname = $nickname == null ? $username : $nickname;
4784 
4785  if ( mb_strlen( $nickname ) > $this->svcOptions->get( 'MaxSigChars' ) ) {
4786  $nickname = $username;
4787  $this->logger->debug( __METHOD__ . ": $username has overlong signature." );
4788  } elseif ( $fancySig !== false ) {
4789  # Sig. might contain markup; validate this
4790  if ( $this->validateSig( $nickname ) !== false ) {
4791  # Validated; clean up (if needed) and return it
4792  return $this->cleanSig( $nickname, true );
4793  } else {
4794  # Failed to validate; fall back to the default
4795  $nickname = $username;
4796  $this->logger->debug( __METHOD__ . ": $username has bad XML tags in signature." );
4797  }
4798  }
4799 
4800  # Make sure nickname doesnt get a sig in a sig
4801  $nickname = self::cleanSigInSig( $nickname );
4802 
4803  # If we're still here, make it a link to the user page
4804  $userText = wfEscapeWikiText( $username );
4805  $nickText = wfEscapeWikiText( $nickname );
4806  $msgName = $user->isAnon() ? 'signature-anon' : 'signature';
4807 
4808  return wfMessage( $msgName, $userText, $nickText )->inContentLanguage()
4809  ->title( $this->getTitle() )->text();
4810  }
4811 
4818  public function validateSig( $text ) {
4819  return Xml::isWellFormedXmlFragment( $text ) ? $text : false;
4820  }
4821 
4832  public function cleanSig( $text, $parsing = false ) {
4833  if ( !$parsing ) {
4834  global $wgTitle;
4835  $magicScopeVariable = $this->lock();
4836  $this->startParse( $wgTitle, new ParserOptions, self::OT_PREPROCESS, true );
4837  }
4838 
4839  # Option to disable this feature
4840  if ( !$this->mOptions->getCleanSignatures() ) {
4841  return $text;
4842  }
4843 
4844  # @todo FIXME: Regex doesn't respect extension tags or nowiki
4845  # => Move this logic to braceSubstitution()
4846  $substWord = $this->magicWordFactory->get( 'subst' );
4847  $substRegex = '/\{\{(?!(?:' . $substWord->getBaseRegex() . '))/x' . $substWord->getRegexCase();
4848  $substText = '{{' . $substWord->getSynonym( 0 );
4849 
4850  $text = preg_replace( $substRegex, $substText, $text );
4851  $text = self::cleanSigInSig( $text );
4852  $dom = $this->preprocessToDom( $text );
4853  $frame = $this->getPreprocessor()->newFrame();
4854  $text = $frame->expand( $dom );
4855 
4856  if ( !$parsing ) {
4857  $text = $this->mStripState->unstripBoth( $text );
4858  }
4859 
4860  return $text;
4861  }
4862 
4869  public static function cleanSigInSig( $text ) {
4870  $text = preg_replace( '/~{3,5}/', '', $text );
4871  return $text;
4872  }
4873 
4885  $outputType, $clearState = true, $revId = null
4886  ) {
4887  $this->startParse( $title, $options, $outputType, $clearState );
4888  if ( $revId !== null ) {
4889  $this->mRevisionId = $revId;
4890  }
4891  }
4892 
4900  $outputType, $clearState = true
4901  ) {
4902  $this->setTitle( $title );
4903  $this->mOptions = $options;
4904  $this->setOutputType( $outputType );
4905  if ( $clearState ) {
4906  $this->clearState();
4907  }
4908  }
4909 
4918  public function transformMsg( $text, $options, $title = null ) {
4919  static $executing = false;
4920 
4921  # Guard against infinite recursion
4922  if ( $executing ) {
4923  return $text;
4924  }
4925  $executing = true;
4926 
4927  if ( !$title ) {
4928  global $wgTitle;
4929  $title = $wgTitle;
4930  }
4931 
4932  $text = $this->preprocess( $text, $title, $options );
4933 
4934  $executing = false;
4935  return $text;
4936  }
4937 
4962  public function setHook( $tag, callable $callback ) {
4963  $tag = strtolower( $tag );
4964  if ( preg_match( '/[<>\r\n]/', $tag, $m ) ) {
4965  throw new MWException( "Invalid character {$m[0]} in setHook('$tag', ...) call" );
4966  }
4967  $oldVal = $this->mTagHooks[$tag] ?? null;
4968  $this->mTagHooks[$tag] = $callback;
4969  if ( !in_array( $tag, $this->mStripList ) ) {
4970  $this->mStripList[] = $tag;
4971  }
4972 
4973  return $oldVal;
4974  }
4975 
4993  public function setTransparentTagHook( $tag, callable $callback ) {
4994  $tag = strtolower( $tag );
4995  if ( preg_match( '/[<>\r\n]/', $tag, $m ) ) {
4996  throw new MWException( "Invalid character {$m[0]} in setTransparentHook('$tag', ...) call" );
4997  }
4998  $oldVal = $this->mTransparentTagHooks[$tag] ?? null;
4999  $this->mTransparentTagHooks[$tag] = $callback;
5000 
5001  return $oldVal;
5002  }
5003 
5007  public function clearTagHooks() {
5008  $this->mTagHooks = [];
5009  $this->mFunctionTagHooks = [];
5010  $this->mStripList = $this->mDefaultStripList;
5011  }
5012 
5056  public function setFunctionHook( $id, callable $callback, $flags = 0 ) {
5057  $oldVal = isset( $this->mFunctionHooks[$id] ) ? $this->mFunctionHooks[$id][0] : null;
5058  $this->mFunctionHooks[$id] = [ $callback, $flags ];
5059 
5060  # Add to function cache
5061  $mw = $this->magicWordFactory->get( $id );
5062  if ( !$mw ) {
5063  throw new MWException( __METHOD__ . '() expecting a magic word identifier.' );
5064  }
5065 
5066  $synonyms = $mw->getSynonyms();
5067  $sensitive = intval( $mw->isCaseSensitive() );
5068 
5069  foreach ( $synonyms as $syn ) {
5070  # Case
5071  if ( !$sensitive ) {
5072  $syn = $this->contLang->lc( $syn );
5073  }
5074  # Add leading hash
5075  if ( !( $flags & self::SFH_NO_HASH ) ) {
5076  $syn = '#' . $syn;
5077  }
5078  # Remove trailing colon
5079  if ( substr( $syn, -1, 1 ) === ':' ) {
5080  $syn = substr( $syn, 0, -1 );
5081  }
5082  $this->mFunctionSynonyms[$sensitive][$syn] = $id;
5083  }
5084  return $oldVal;
5085  }
5086 
5092  public function getFunctionHooks() {
5093  $this->firstCallInit();
5094  return array_keys( $this->mFunctionHooks );
5095  }
5096 
5107  public function setFunctionTagHook( $tag, callable $callback, $flags ) {
5108  $tag = strtolower( $tag );
5109  if ( preg_match( '/[<>\r\n]/', $tag, $m ) ) {
5110  throw new MWException( "Invalid character {$m[0]} in setFunctionTagHook('$tag', ...) call" );
5111  }
5112  $old = $this->mFunctionTagHooks[$tag] ?? null;
5113  $this->mFunctionTagHooks[$tag] = [ $callback, $flags ];
5114 
5115  if ( !in_array( $tag, $this->mStripList ) ) {
5116  $this->mStripList[] = $tag;
5117  }
5118 
5119  return $old;
5120  }
5121 
5129  public function replaceLinkHolders( &$text, $options = 0 ) {
5130  $this->mLinkHolders->replace( $text );
5131  }
5132 
5140  public function replaceLinkHoldersText( $text ) {
5141  return $this->mLinkHolders->replaceText( $text );
5142  }
5143 
5157  public function renderImageGallery( $text, $params ) {
5158  $mode = false;
5159  if ( isset( $params['mode'] ) ) {
5160  $mode = $params['mode'];
5161  }
5162 
5163  try {
5164  $ig = ImageGalleryBase::factory( $mode );
5165  } catch ( Exception $e ) {
5166  // If invalid type set, fallback to default.
5167  $ig = ImageGalleryBase::factory( false );
5168  }
5169 
5170  $ig->setContextTitle( $this->mTitle );
5171  $ig->setShowBytes( false );
5172  $ig->setShowDimensions( false );
5173  $ig->setShowFilename( false );
5174  $ig->setParser( $this );
5175  $ig->setHideBadImages();
5176  $ig->setAttributes( Sanitizer::validateTagAttributes( $params, 'ul' ) );
5177 
5178  if ( isset( $params['showfilename'] ) ) {
5179  $ig->setShowFilename( true );
5180  } else {
5181  $ig->setShowFilename( false );
5182  }
5183  if ( isset( $params['caption'] ) ) {
5184  // NOTE: We aren't passing a frame here or below. Frame info
5185  // is currently opaque to Parsoid, which acts on OT_PREPROCESS.
5186  // See T107332#4030581
5187  $caption = $this->recursiveTagParse( $params['caption'] );
5188  $ig->setCaptionHtml( $caption );
5189  }
5190  if ( isset( $params['perrow'] ) ) {
5191  $ig->setPerRow( $params['perrow'] );
5192  }
5193  if ( isset( $params['widths'] ) ) {
5194  $ig->setWidths( $params['widths'] );
5195  }
5196  if ( isset( $params['heights'] ) ) {
5197  $ig->setHeights( $params['heights'] );
5198  }
5199  $ig->setAdditionalOptions( $params );
5200 
5201  // Avoid PHP 7.1 warning from passing $this by reference
5202  $parser = $this;
5203  Hooks::run( 'BeforeParserrenderImageGallery', [ &$parser, &$ig ] );
5204 
5205  $lines = StringUtils::explode( "\n", $text );
5206  foreach ( $lines as $line ) {
5207  # match lines like these:
5208  # Image:someimage.jpg|This is some image
5209  $matches = [];
5210  preg_match( "/^([^|]+)(\\|(.*))?$/", $line, $matches );
5211  # Skip empty lines
5212  if ( count( $matches ) == 0 ) {
5213  continue;
5214  }
5215 
5216  if ( strpos( $matches[0], '%' ) !== false ) {
5217  $matches[1] = rawurldecode( $matches[1] );
5218  }
5220  if ( is_null( $title ) ) {
5221  # Bogus title. Ignore these so we don't bomb out later.
5222  continue;
5223  }
5224 
5225  # We need to get what handler the file uses, to figure out parameters.
5226  # Note, a hook can overide the file name, and chose an entirely different
5227  # file (which potentially could be of a different type and have different handler).
5228  $options = [];
5229  $descQuery = false;
5230  Hooks::run( 'BeforeParserFetchFileAndTitle',
5231  [ $this, $title, &$options, &$descQuery ] );
5232  # Don't register it now, as TraditionalImageGallery does that later.
5234  $handler = $file ? $file->getHandler() : false;
5235 
5236  $paramMap = [
5237  'img_alt' => 'gallery-internal-alt',
5238  'img_link' => 'gallery-internal-link',
5239  ];
5240  if ( $handler ) {
5241  $paramMap += $handler->getParamMap();
5242  // We don't want people to specify per-image widths.
5243  // Additionally the width parameter would need special casing anyhow.
5244  unset( $paramMap['img_width'] );
5245  }
5246 
5247  $mwArray = $this->magicWordFactory->newArray( array_keys( $paramMap ) );
5248 
5249  $label = '';
5250  $alt = '';
5251  $link = '';
5252  $handlerOptions = [];
5253  if ( isset( $matches[3] ) ) {
5254  // look for an |alt= definition while trying not to break existing
5255  // captions with multiple pipes (|) in it, until a more sensible grammar
5256  // is defined for images in galleries
5257 
5258  // FIXME: Doing recursiveTagParse at this stage, and the trim before
5259  // splitting on '|' is a bit odd, and different from makeImage.
5260  $matches[3] = $this->recursiveTagParse( trim( $matches[3] ) );
5261  // Protect LanguageConverter markup
5262  $parameterMatches = StringUtils::delimiterExplode(
5263  '-{', '}-', '|', $matches[3], true /* nested */
5264  );
5265 
5266  foreach ( $parameterMatches as $parameterMatch ) {
5267  list( $magicName, $match ) = $mwArray->matchVariableStartToEnd( $parameterMatch );
5268  if ( $magicName ) {
5269  $paramName = $paramMap[$magicName];
5270 
5271  switch ( $paramName ) {
5272  case 'gallery-internal-alt':
5273  $alt = $this->stripAltText( $match, false );
5274  break;
5275  case 'gallery-internal-link':
5276  $linkValue = $this->stripAltText( $match, false );
5277  if ( preg_match( '/^-{R|(.*)}-$/', $linkValue ) ) {
5278  // Result of LanguageConverter::markNoConversion
5279  // invoked on an external link.
5280  $linkValue = substr( $linkValue, 4, -2 );
5281  }
5282  list( $type, $target ) = $this->parseLinkParameter( $linkValue );
5283  if ( $type === 'link-url' ) {
5284  $link = $target;
5285  $this->mOutput->addExternalLink( $target );
5286  } elseif ( $type === 'link-title' ) {
5287  $link = $target->getLinkURL();
5288  $this->mOutput->addLink( $target );
5289  }
5290  break;
5291  default:
5292  // Must be a handler specific parameter.
5293  if ( $handler->validateParam( $paramName, $match ) ) {
5294  $handlerOptions[$paramName] = $match;
5295  } else {
5296  // Guess not, consider it as caption.
5297  $this->logger->debug(
5298  "$parameterMatch failed parameter validation" );
5299  $label = $parameterMatch;
5300  }
5301  }
5302 
5303  } else {
5304  // Last pipe wins.
5305  $label = $parameterMatch;
5306  }
5307  }
5308  }
5309 
5310  $ig->add( $title, $label, $alt, $link, $handlerOptions );
5311  }
5312  $html = $ig->toHTML();
5313  Hooks::run( 'AfterParserFetchFileAndTitle', [ $this, $ig, &$html ] );
5314  return $html;
5315  }
5316 
5321  public function getImageParams( $handler ) {
5322  if ( $handler ) {
5323  $handlerClass = get_class( $handler );
5324  } else {
5325  $handlerClass = '';
5326  }
5327  if ( !isset( $this->mImageParams[$handlerClass] ) ) {
5328  # Initialise static lists
5329  static $internalParamNames = [
5330  'horizAlign' => [ 'left', 'right', 'center', 'none' ],
5331  'vertAlign' => [ 'baseline', 'sub', 'super', 'top', 'text-top', 'middle',
5332  'bottom', 'text-bottom' ],
5333  'frame' => [ 'thumbnail', 'manualthumb', 'framed', 'frameless',
5334  'upright', 'border', 'link', 'alt', 'class' ],
5335  ];
5336  static $internalParamMap;
5337  if ( !$internalParamMap ) {
5338  $internalParamMap = [];
5339  foreach ( $internalParamNames as $type => $names ) {
5340  foreach ( $names as $name ) {
5341  // For grep: img_left, img_right, img_center, img_none,
5342  // img_baseline, img_sub, img_super, img_top, img_text_top, img_middle,
5343  // img_bottom, img_text_bottom,
5344  // img_thumbnail, img_manualthumb, img_framed, img_frameless, img_upright,
5345  // img_border, img_link, img_alt, img_class
5346  $magicName = str_replace( '-', '_', "img_$name" );
5347  $internalParamMap[$magicName] = [ $type, $name ];
5348  }
5349  }
5350  }
5351 
5352  # Add handler params
5353  $paramMap = $internalParamMap;
5354  if ( $handler ) {
5355  $handlerParamMap = $handler->getParamMap();
5356  foreach ( $handlerParamMap as $magic => $paramName ) {
5357  $paramMap[$magic] = [ 'handler', $paramName ];
5358  }
5359  }
5360  $this->mImageParams[$handlerClass] = $paramMap;
5361  $this->mImageParamsMagicArray[$handlerClass] =
5362  $this->magicWordFactory->newArray( array_keys( $paramMap ) );
5363  }
5364  return [ $this->mImageParams[$handlerClass], $this->mImageParamsMagicArray[$handlerClass] ];
5365  }
5366 
5375  public function makeImage( $title, $options, $holders = false ) {
5376  # Check if the options text is of the form "options|alt text"
5377  # Options are:
5378  # * thumbnail make a thumbnail with enlarge-icon and caption, alignment depends on lang
5379  # * left no resizing, just left align. label is used for alt= only
5380  # * right same, but right aligned
5381  # * none same, but not aligned
5382  # * ___px scale to ___ pixels width, no aligning. e.g. use in taxobox
5383  # * center center the image
5384  # * frame Keep original image size, no magnify-button.
5385  # * framed Same as "frame"
5386  # * frameless like 'thumb' but without a frame. Keeps user preferences for width
5387  # * upright reduce width for upright images, rounded to full __0 px
5388  # * border draw a 1px border around the image
5389  # * alt Text for HTML alt attribute (defaults to empty)
5390  # * class Set a class for img node
5391  # * link Set the target of the image link. Can be external, interwiki, or local
5392  # vertical-align values (no % or length right now):
5393  # * baseline
5394  # * sub
5395  # * super
5396  # * top
5397  # * text-top
5398  # * middle
5399  # * bottom
5400  # * text-bottom
5401 
5402  # Protect LanguageConverter markup when splitting into parts
5404  '-{', '}-', '|', $options, true /* allow nesting */
5405  );
5406 
5407  # Give extensions a chance to select the file revision for us
5408  $options = [];
5409  $descQuery = false;
5410  Hooks::run( 'BeforeParserFetchFileAndTitle',
5411  [ $this, $title, &$options, &$descQuery ] );
5412  # Fetch and register the file (file title may be different via hooks)
5413  list( $file, $title ) = $this->fetchFileAndTitle( $title, $options );
5414 
5415  # Get parameter map
5416  $handler = $file ? $file->getHandler() : false;
5417 
5418  list( $paramMap, $mwArray ) = $this->getImageParams( $handler );
5419 
5420  if ( !$file ) {
5421  $this->addTrackingCategory( 'broken-file-category' );
5422  }
5423 
5424  # Process the input parameters
5425  $caption = '';
5426  $params = [ 'frame' => [], 'handler' => [],
5427  'horizAlign' => [], 'vertAlign' => [] ];
5428  $seenformat = false;
5429  foreach ( $parts as $part ) {
5430  $part = trim( $part );
5431  list( $magicName, $value ) = $mwArray->matchVariableStartToEnd( $part );
5432  $validated = false;
5433  if ( isset( $paramMap[$magicName] ) ) {
5434  list( $type, $paramName ) = $paramMap[$magicName];
5435 
5436  # Special case; width and height come in one variable together
5437  if ( $type === 'handler' && $paramName === 'width' ) {
5438  $parsedWidthParam = self::parseWidthParam( $value );
5439  if ( isset( $parsedWidthParam['width'] ) ) {
5440  $width = $parsedWidthParam['width'];
5441  if ( $handler->validateParam( 'width', $width ) ) {
5442  $params[$type]['width'] = $width;
5443  $validated = true;
5444  }
5445  }
5446  if ( isset( $parsedWidthParam['height'] ) ) {
5447  $height = $parsedWidthParam['height'];
5448  if ( $handler->validateParam( 'height', $height ) ) {
5449  $params[$type]['height'] = $height;
5450  $validated = true;
5451  }
5452  }
5453  # else no validation -- T15436
5454  } else {
5455  if ( $type === 'handler' ) {
5456  # Validate handler parameter
5457  $validated = $handler->validateParam( $paramName, $value );
5458  } else {
5459  # Validate internal parameters
5460  switch ( $paramName ) {
5461  case 'manualthumb':
5462  case 'alt':
5463  case 'class':
5464  # @todo FIXME: Possibly check validity here for
5465  # manualthumb? downstream behavior seems odd with
5466  # missing manual thumbs.
5467  $validated = true;
5468  $value = $this->stripAltText( $value, $holders );
5469  break;
5470  case 'link':
5471  list( $paramName, $value ) =
5472  $this->parseLinkParameter(
5473  $this->stripAltText( $value, $holders )
5474  );
5475  if ( $paramName ) {
5476  $validated = true;
5477  if ( $paramName === 'no-link' ) {
5478  $value = true;
5479  }
5480  if ( ( $paramName === 'link-url' ) && $this->mOptions->getExternalLinkTarget() ) {
5481  $params[$type]['link-target'] = $this->mOptions->getExternalLinkTarget();
5482  }
5483  }
5484  break;
5485  case 'frameless':
5486  case 'framed':
5487  case 'thumbnail':
5488  // use first appearing option, discard others.
5489  $validated = !$seenformat;
5490  $seenformat = true;
5491  break;
5492  default:
5493  # Most other things appear to be empty or numeric...
5494  $validated = ( $value === false || is_numeric( trim( $value ) ) );
5495  }
5496  }
5497 
5498  if ( $validated ) {
5499  $params[$type][$paramName] = $value;
5500  }
5501  }
5502  }
5503  if ( !$validated ) {
5504  $caption = $part;
5505  }
5506  }
5507 
5508  # Process alignment parameters
5509  if ( $params['horizAlign'] ) {
5510  $params['frame']['align'] = key( $params['horizAlign'] );
5511  }
5512  if ( $params['vertAlign'] ) {
5513  $params['frame']['valign'] = key( $params['vertAlign'] );
5514  }
5515 
5516  $params['frame']['caption'] = $caption;
5517 
5518  # Will the image be presented in a frame, with the caption below?
5519  $imageIsFramed = isset( $params['frame']['frame'] )
5520  || isset( $params['frame']['framed'] )
5521  || isset( $params['frame']['thumbnail'] )
5522  || isset( $params['frame']['manualthumb'] );
5523 
5524  # In the old days, [[Image:Foo|text...]] would set alt text. Later it
5525  # came to also set the caption, ordinary text after the image -- which
5526  # makes no sense, because that just repeats the text multiple times in
5527  # screen readers. It *also* came to set the title attribute.
5528  # Now that we have an alt attribute, we should not set the alt text to
5529  # equal the caption: that's worse than useless, it just repeats the
5530  # text. This is the framed/thumbnail case. If there's no caption, we
5531  # use the unnamed parameter for alt text as well, just for the time be-
5532  # ing, if the unnamed param is set and the alt param is not.
5533  # For the future, we need to figure out if we want to tweak this more,
5534  # e.g., introducing a title= parameter for the title; ignoring the un-
5535  # named parameter entirely for images without a caption; adding an ex-
5536  # plicit caption= parameter and preserving the old magic unnamed para-
5537  # meter for BC; ...
5538  if ( $imageIsFramed ) { # Framed image
5539  if ( $caption === '' && !isset( $params['frame']['alt'] ) ) {
5540  # No caption or alt text, add the filename as the alt text so
5541  # that screen readers at least get some description of the image
5542  $params['frame']['alt'] = $title->getText();
5543  }
5544  # Do not set $params['frame']['title'] because tooltips don't make sense
5545  # for framed images
5546  } else { # Inline image
5547  if ( !isset( $params['frame']['alt'] ) ) {
5548  # No alt text, use the "caption" for the alt text
5549  if ( $caption !== '' ) {
5550  $params['frame']['alt'] = $this->stripAltText( $caption, $holders );
5551  } else {
5552  # No caption, fall back to using the filename for the
5553  # alt text
5554  $params['frame']['alt'] = $title->getText();
5555  }
5556  }
5557  # Use the "caption" for the tooltip text
5558  $params['frame']['title'] = $this->stripAltText( $caption, $holders );
5559  }
5560  $params['handler']['targetlang'] = $this->getTargetLanguage()->getCode();
5561 
5562  Hooks::run( 'ParserMakeImageParams', [ $title, $file, &$params, $this ] );
5563 
5564  # Linker does the rest
5565  $time = $options['time'] ?? false;
5566  $ret = Linker::makeImageLink( $this, $title, $file, $params['frame'], $params['handler'],
5567  $time, $descQuery, $this->mOptions->getThumbSize() );
5568 
5569  # Give the handler a chance to modify the parser object
5570  if ( $handler ) {
5571  $handler->parserTransformHook( $this, $file );
5572  }
5573 
5574  return $ret;
5575  }
5576 
5595  public function parseLinkParameter( $value ) {
5596  $chars = self::EXT_LINK_URL_CLASS;
5597  $addr = self::EXT_LINK_ADDR;
5598  $prots = $this->mUrlProtocols;
5599  $type = null;
5600  $target = false;
5601  if ( $value === '' ) {
5602  $type = 'no-link';
5603  } elseif ( preg_match( "/^((?i)$prots)/", $value ) ) {
5604  if ( preg_match( "/^((?i)$prots)$addr$chars*$/u", $value, $m ) ) {
5605  $this->mOutput->addExternalLink( $value );
5606  $type = 'link-url';
5607  $target = $value;
5608  }
5609  } else {
5610  $linkTitle = Title::newFromText( $value );
5611  if ( $linkTitle ) {
5612  $this->mOutput->addLink( $linkTitle );
5613  $type = 'link-title';
5614  $target = $linkTitle;
5615  }
5616  }
5617  return [ $type, $target ];
5618  }
5619 
5625  protected function stripAltText( $caption, $holders ) {
5626  # Strip bad stuff out of the title (tooltip). We can't just use
5627  # replaceLinkHoldersText() here, because if this function is called
5628  # from replaceInternalLinks2(), mLinkHolders won't be up-to-date.
5629  if ( $holders ) {
5630  $tooltip = $holders->replaceText( $caption );
5631  } else {
5632  $tooltip = $this->replaceLinkHoldersText( $caption );
5633  }
5634 
5635  # make sure there are no placeholders in thumbnail attributes
5636  # that are later expanded to html- so expand them now and
5637  # remove the tags
5638  $tooltip = $this->mStripState->unstripBoth( $tooltip );
5639  # Compatibility hack! In HTML certain entity references not terminated
5640  # by a semicolon are decoded (but not if we're in an attribute; that's
5641  # how link URLs get away without properly escaping & in queries).
5642  # But wikitext has always required semicolon-termination of entities,
5643  # so encode & where needed to avoid decode of semicolon-less entities.
5644  # See T209236 and
5645  # https://www.w3.org/TR/html5/syntax.html#named-character-references
5646  # T210437 discusses moving this workaround to Sanitizer::stripAllTags.
5647  $tooltip = preg_replace( "/
5648  & # 1. entity prefix
5649  (?= # 2. followed by:
5650  (?: # a. one of the legacy semicolon-less named entities
5651  A(?:Elig|MP|acute|circ|grave|ring|tilde|uml)|
5652  C(?:OPY|cedil)|E(?:TH|acute|circ|grave|uml)|
5653  GT|I(?:acute|circ|grave|uml)|LT|Ntilde|
5654  O(?:acute|circ|grave|slash|tilde|uml)|QUOT|REG|THORN|
5655  U(?:acute|circ|grave|uml)|Yacute|
5656  a(?:acute|c(?:irc|ute)|elig|grave|mp|ring|tilde|uml)|brvbar|
5657  c(?:cedil|edil|urren)|cent(?!erdot;)|copy(?!sr;)|deg|
5658  divide(?!ontimes;)|e(?:acute|circ|grave|th|uml)|
5659  frac(?:1(?:2|4)|34)|
5660  gt(?!c(?:c|ir)|dot|lPar|quest|r(?:a(?:pprox|rr)|dot|eq(?:less|qless)|less|sim);)|
5661  i(?:acute|circ|excl|grave|quest|uml)|laquo|
5662  lt(?!c(?:c|ir)|dot|hree|imes|larr|quest|r(?:Par|i(?:e|f|));)|
5663  m(?:acr|i(?:cro|ddot))|n(?:bsp|tilde)|
5664  not(?!in(?:E|dot|v(?:a|b|c)|)|ni(?:v(?:a|b|c)|);)|
5665  o(?:acute|circ|grave|rd(?:f|m)|slash|tilde|uml)|
5666  p(?:lusmn|ound)|para(?!llel;)|quot|r(?:aquo|eg)|
5667  s(?:ect|hy|up(?:1|2|3)|zlig)|thorn|times(?!b(?:ar|)|d;)|
5668  u(?:acute|circ|grave|ml|uml)|y(?:acute|en|uml)
5669  )
5670  (?:[^;]|$)) # b. and not followed by a semicolon
5671  # S = study, for efficiency
5672  /Sx", '&amp;', $tooltip );
5673  $tooltip = Sanitizer::stripAllTags( $tooltip );
5674 
5675  return $tooltip;
5676  }
5677 
5683  public function disableCache() {
5684  $this->logger->debug( "Parser output marked as uncacheable." );
5685  if ( !$this->mOutput ) {
5686  throw new MWException( __METHOD__ .
5687  " can only be called when actually parsing something" );
5688  }
5689  $this->mOutput->updateCacheExpiry( 0 ); // new style, for consistency
5690  }
5691 
5700  public function attributeStripCallback( &$text, $frame = false ) {
5701  $text = $this->replaceVariables( $text, $frame );
5702  $text = $this->mStripState->unstripBoth( $text );
5703  return $text;
5704  }
5705 
5711  public function getTags() {
5712  $this->firstCallInit();
5713  return array_merge(
5714  array_keys( $this->mTransparentTagHooks ),
5715  array_keys( $this->mTagHooks ),
5716  array_keys( $this->mFunctionTagHooks )
5717  );
5718  }
5719 
5724  public function getFunctionSynonyms() {
5725  $this->firstCallInit();
5726  return $this->mFunctionSynonyms;
5727  }
5728 
5733  public function getUrlProtocols() {
5734  return $this->mUrlProtocols;
5735  }
5736 
5747  public function replaceTransparentTags( $text ) {
5748  $matches = [];
5749  $elements = array_keys( $this->mTransparentTagHooks );
5750  $text = self::extractTagsAndParams( $elements, $text, $matches );
5751  $replacements = [];
5752 
5753  foreach ( $matches as $marker => $data ) {
5754  list( $element, $content, $params, $tag ) = $data;
5755  $tagName = strtolower( $element );
5756  if ( isset( $this->mTransparentTagHooks[$tagName] ) ) {
5757  $output = call_user_func_array(
5758  $this->mTransparentTagHooks[$tagName],
5759  [ $content, $params, $this ]
5760  );
5761  } else {
5762  $output = $tag;
5763  }
5764  $replacements[$marker] = $output;
5765  }
5766  return strtr( $text, $replacements );
5767  }
5768 
5798  private function extractSections( $text, $sectionId, $mode, $newText = '' ) {
5799  global $wgTitle; # not generally used but removes an ugly failure mode
5800 
5801  $magicScopeVariable = $this->lock();
5802  $this->startParse( $wgTitle, new ParserOptions, self::OT_PLAIN, true );
5803  $outText = '';
5804  $frame = $this->getPreprocessor()->newFrame();
5805 
5806  # Process section extraction flags
5807  $flags = 0;
5808  $sectionParts = explode( '-', $sectionId );
5809  $sectionIndex = array_pop( $sectionParts );
5810  foreach ( $sectionParts as $part ) {
5811  if ( $part === 'T' ) {
5812  $flags |= self::PTD_FOR_INCLUSION;
5813  }
5814  }
5815 
5816  # Check for empty input
5817  if ( strval( $text ) === '' ) {
5818  # Only sections 0 and T-0 exist in an empty document
5819  if ( $sectionIndex == 0 ) {
5820  if ( $mode === 'get' ) {
5821  return '';
5822  }
5823 
5824  return $newText;
5825  } else {
5826  if ( $mode === 'get' ) {
5827  return $newText;
5828  }
5829 
5830  return $text;
5831  }
5832  }
5833 
5834  # Preprocess the text
5835  $root = $this->preprocessToDom( $text, $flags );
5836 
5837  # <h> nodes indicate section breaks
5838  # They can only occur at the top level, so we can find them by iterating the root's children
5839  $node = $root->getFirstChild();
5840 
5841  # Find the target section
5842  if ( $sectionIndex == 0 ) {
5843  # Section zero doesn't nest, level=big
5844  $targetLevel = 1000;
5845  } else {
5846  while ( $node ) {
5847  if ( $node->getName() === 'h' ) {
5848  $bits = $node->splitHeading();
5849  if ( $bits['i'] == $sectionIndex ) {
5850  $targetLevel = $bits['level'];
5851  break;
5852  }
5853  }
5854  if ( $mode === 'replace' ) {
5855  $outText .= $frame->expand( $node, PPFrame::RECOVER_ORIG );
5856  }
5857  $node = $node->getNextSibling();
5858  }
5859  }
5860 
5861  if ( !$node ) {
5862  # Not found
5863  if ( $mode === 'get' ) {
5864  return $newText;
5865  } else {
5866  return $text;
5867  }
5868  }
5869 
5870  # Find the end of the section, including nested sections
5871  do {
5872  if ( $node->getName() === 'h' ) {
5873  $bits = $node->splitHeading();
5874  $curLevel = $bits['level'];
5875  if ( $bits['i'] != $sectionIndex && $curLevel <= $targetLevel ) {
5876  break;
5877  }
5878  }
5879  if ( $mode === 'get' ) {
5880  $outText .= $frame->expand( $node, PPFrame::RECOVER_ORIG );
5881  }
5882  $node = $node->getNextSibling();
5883  } while ( $node );
5884 
5885  # Write out the remainder (in replace mode only)
5886  if ( $mode === 'replace' ) {
5887  # Output the replacement text
5888  # Add two newlines on -- trailing whitespace in $newText is conventionally
5889  # stripped by the editor, so we need both newlines to restore the paragraph gap
5890  # Only add trailing whitespace if there is newText
5891  if ( $newText != "" ) {
5892  $outText .= $newText . "\n\n";
5893  }
5894 
5895  while ( $node ) {
5896  $outText .= $frame->expand( $node, PPFrame::RECOVER_ORIG );
5897  $node = $node->getNextSibling();
5898  }
5899  }
5900 
5901  if ( is_string( $outText ) ) {
5902  # Re-insert stripped tags
5903  $outText = rtrim( $this->mStripState->unstripBoth( $outText ) );
5904  }
5905 
5906  return $outText;
5907  }
5908 
5923  public function getSection( $text, $sectionId, $defaultText = '' ) {
5924  return $this->extractSections( $text, $sectionId, 'get', $defaultText );
5925  }
5926 
5939  public function replaceSection( $oldText, $sectionId, $newText ) {
5940  return $this->extractSections( $oldText, $sectionId, 'replace', $newText );
5941  }
5942 
5953  public function getRevisionId() {
5954  return $this->mRevisionId;
5955  }
5956 
5963  public function getRevisionObject() {
5964  if ( $this->mRevisionObject ) {
5965  return $this->mRevisionObject;
5966  }
5967 
5968  // NOTE: try to get the RevisionObject even if mRevisionId is null.
5969  // This is useful when parsing a revision that has not yet been saved.
5970  // However, if we get back a saved revision even though we are in
5971  // preview mode, we'll have to ignore it, see below.
5972  // NOTE: This callback may be used to inject an OLD revision that was
5973  // already loaded, so "current" is a bit of a misnomer. We can't just
5974  // skip it if mRevisionId is set.
5975  $rev = call_user_func(
5976  $this->mOptions->getCurrentRevisionCallback(),
5977  $this->getTitle(),
5978  $this
5979  );
5980 
5981  if ( $this->mRevisionId === null && $rev && $rev->getId() ) {
5982  // We are in preview mode (mRevisionId is null), and the current revision callback
5983  // returned an existing revision. Ignore it and return null, it's probably the page's
5984  // current revision, which is not what we want here. Note that we do want to call the
5985  // callback to allow the unsaved revision to be injected here, e.g. for
5986  // self-transclusion previews.
5987  return null;
5988  }
5989 
5990  // If the parse is for a new revision, then the callback should have
5991  // already been set to force the object and should match mRevisionId.
5992  // If not, try to fetch by mRevisionId for sanity.
5993  if ( $this->mRevisionId && $rev && $rev->getId() != $this->mRevisionId ) {
5994  $rev = Revision::newFromId( $this->mRevisionId );
5995  }
5996 
5997  $this->mRevisionObject = $rev;
5998 
5999  return $this->mRevisionObject;
6000  }
6001 
6007  public function getRevisionTimestamp() {
6008  if ( $this->mRevisionTimestamp !== null ) {
6010  }
6011 
6012  # Use specified revision timestamp, falling back to the current timestamp
6013  $revObject = $this->getRevisionObject();
6014  $timestamp = $revObject ? $revObject->getTimestamp() : $this->mOptions->getTimestamp();
6015  $this->mOutput->setRevisionTimestampUsed( $timestamp ); // unadjusted time zone
6016 
6017  # The cryptic '' timezone parameter tells to use the site-default
6018  # timezone offset instead of the user settings.
6019  # Since this value will be saved into the parser cache, served
6020  # to other users, and potentially even used inside links and such,
6021  # it needs to be consistent for all visitors.
6022  $this->mRevisionTimestamp = $this->contLang->userAdjust( $timestamp, '' );
6023 
6025  }
6026 
6032  public function getRevisionUser() {
6033  if ( is_null( $this->mRevisionUser ) ) {
6034  $revObject = $this->getRevisionObject();
6035 
6036  # if this template is subst: the revision id will be blank,
6037  # so just use the current user's name
6038  if ( $revObject ) {
6039  $this->mRevisionUser = $revObject->getUserText();
6040  } elseif ( $this->ot['wiki'] || $this->mOptions->getIsPreview() ) {
6041  $this->mRevisionUser = $this->getUser()->getName();
6042  }
6043  }
6044  return $this->mRevisionUser;
6045  }
6046 
6052  public function getRevisionSize() {
6053  if ( is_null( $this->mRevisionSize ) ) {
6054  $revObject = $this->getRevisionObject();
6055 
6056  # if this variable is subst: the revision id will be blank,
6057  # so just use the parser input size, because the own substituation
6058  # will change the size.
6059  if ( $revObject ) {
6060  $this->mRevisionSize = $revObject->getSize();
6061  } else {
6062  $this->mRevisionSize = $this->mInputSize;
6063  }
6064  }
6065  return $this->mRevisionSize;
6066  }
6067 
6073  public function setDefaultSort( $sort ) {
6074  $this->mDefaultSort = $sort;
6075  $this->mOutput->setProperty( 'defaultsort', $sort );
6076  }
6077 
6088  public function getDefaultSort() {
6089  if ( $this->mDefaultSort !== false ) {
6090  return $this->mDefaultSort;
6091  } else {
6092  return '';
6093  }
6094  }
6095 
6102  public function getCustomDefaultSort() {
6103  return $this->mDefaultSort;
6104  }
6105 
6106  private static function getSectionNameFromStrippedText( $text ) {
6108  $text = Sanitizer::decodeCharReferences( $text );
6109  $text = self::normalizeSectionName( $text );
6110  return $text;
6111  }
6112 
6113  private static function makeAnchor( $sectionName ) {
6114  return '#' . Sanitizer::escapeIdForLink( $sectionName );
6115  }
6116 
6117  private function makeLegacyAnchor( $sectionName ) {
6118  $fragmentMode = $this->svcOptions->get( 'FragmentMode' );
6119  if ( isset( $fragmentMode[1] ) && $fragmentMode[1] === 'legacy' ) {
6120  // ForAttribute() and ForLink() are the same for legacy encoding
6122  } else {
6123  $id = Sanitizer::escapeIdForLink( $sectionName );
6124  }
6125 
6126  return "#$id";
6127  }
6128 
6137  public function guessSectionNameFromWikiText( $text ) {
6138  # Strip out wikitext links(they break the anchor)
6139  $text = $this->stripSectionName( $text );
6140  $sectionName = self::getSectionNameFromStrippedText( $text );
6141  return self::makeAnchor( $sectionName );
6142  }
6143 
6153  public function guessLegacySectionNameFromWikiText( $text ) {
6154  # Strip out wikitext links(they break the anchor)
6155  $text = $this->stripSectionName( $text );
6156  $sectionName = self::getSectionNameFromStrippedText( $text );
6157  return $this->makeLegacyAnchor( $sectionName );
6158  }
6159 
6165  public static function guessSectionNameFromStrippedText( $text ) {
6166  $sectionName = self::getSectionNameFromStrippedText( $text );
6167  return self::makeAnchor( $sectionName );
6168  }
6169 
6176  private static function normalizeSectionName( $text ) {
6177  # T90902: ensure the same normalization is applied for IDs as to links
6178  $titleParser = MediaWikiServices::getInstance()->getTitleParser();
6179  try {
6180 
6181  $parts = $titleParser->splitTitleString( "#$text" );
6182  } catch ( MalformedTitleException $ex ) {
6183  return $text;
6184  }
6185  return $parts['fragment'];
6186  }
6187 
6202  public function stripSectionName( $text ) {
6203  # Strip internal link markup
6204  $text = preg_replace( '/\[\[:?([^[|]+)\|([^[]+)\]\]/', '$2', $text );
6205  $text = preg_replace( '/\[\[:?([^[]+)\|?\]\]/', '$1', $text );
6206 
6207  # Strip external link markup
6208  # @todo FIXME: Not tolerant to blank link text
6209  # I.E. [https://www.mediawiki.org] will render as [1] or something depending
6210  # on how many empty links there are on the page - need to figure that out.
6211  $text = preg_replace( '/\[(?i:' . $this->mUrlProtocols . ')([^ ]+?) ([^[]+)\]/', '$2', $text );
6212 
6213  # Parse wikitext quotes (italics & bold)
6214  $text = $this->doQuotes( $text );
6215 
6216  # Strip HTML tags
6217  $text = StringUtils::delimiterReplace( '<', '>', '', $text );
6218  return $text;
6219  }
6220 
6231  public function testSrvus( $text, Title $title, ParserOptions $options,
6232  $outputType = self::OT_HTML
6233  ) {
6234  $magicScopeVariable = $this->lock();
6235  $this->startParse( $title, $options, $outputType, true );
6236 
6237  $text = $this->replaceVariables( $text );
6238  $text = $this->mStripState->unstripBoth( $text );
6239  $text = Sanitizer::removeHTMLtags( $text );
6240  return $text;
6241  }
6242 
6249  public function testPst( $text, Title $title, ParserOptions $options ) {
6250  return $this->preSaveTransform( $text, $title, $options->getUser(), $options );
6251  }
6252 
6259  public function testPreprocess( $text, Title $title, ParserOptions $options ) {
6260  return $this->testSrvus( $text, $title, $options, self::OT_PREPROCESS );
6261  }
6262 
6279  public function markerSkipCallback( $s, $callback ) {
6280  $i = 0;
6281  $out = '';
6282  while ( $i < strlen( $s ) ) {
6283  $markerStart = strpos( $s, self::MARKER_PREFIX, $i );
6284  if ( $markerStart === false ) {
6285  $out .= call_user_func( $callback, substr( $s, $i ) );
6286  break;
6287  } else {
6288  $out .= call_user_func( $callback, substr( $s, $i, $markerStart - $i ) );
6289  $markerEnd = strpos( $s, self::MARKER_SUFFIX, $markerStart );
6290  if ( $markerEnd === false ) {
6291  $out .= substr( $s, $markerStart );
6292  break;
6293  } else {
6294  $markerEnd += strlen( self::MARKER_SUFFIX );
6295  $out .= substr( $s, $markerStart, $markerEnd - $markerStart );
6296  $i = $markerEnd;
6297  }
6298  }
6299  }
6300  return $out;
6301  }
6302 
6309  public function killMarkers( $text ) {
6310  return $this->mStripState->killMarkers( $text );
6311  }
6312 
6330  public function serializeHalfParsedText( $text ) {
6331  wfDeprecated( __METHOD__, '1.31' );
6332  $data = [
6333  'text' => $text,
6334  'version' => self::HALF_PARSED_VERSION,
6335  'stripState' => $this->mStripState->getSubState( $text ),
6336  'linkHolders' => $this->mLinkHolders->getSubArray( $text )
6337  ];
6338  return $data;
6339  }
6340 
6357  public function unserializeHalfParsedText( $data ) {
6358  wfDeprecated( __METHOD__, '1.31' );
6359  if ( !isset( $data['version'] ) || $data['version'] != self::HALF_PARSED_VERSION ) {
6360  throw new MWException( __METHOD__ . ': invalid version' );
6361  }
6362 
6363  # First, extract the strip state.
6364  $texts = [ $data['text'] ];
6365  $texts = $this->mStripState->merge( $data['stripState'], $texts );
6366 
6367  # Now renumber links
6368  $texts = $this->mLinkHolders->mergeForeign( $data['linkHolders'], $texts );
6369 
6370  # Should be good to go.
6371  return $texts[0];
6372  }
6373 
6384  public function isValidHalfParsedText( $data ) {
6385  wfDeprecated( __METHOD__, '1.31' );
6386  return isset( $data['version'] ) && $data['version'] == self::HALF_PARSED_VERSION;
6387  }
6388 
6398  public static function parseWidthParam( $value, $parseHeight = true ) {
6399  $parsedWidthParam = [];
6400  if ( $value === '' ) {
6401  return $parsedWidthParam;
6402  }
6403  $m = [];
6404  # (T15500) In both cases (width/height and width only),
6405  # permit trailing "px" for backward compatibility.
6406  if ( $parseHeight && preg_match( '/^([0-9]*)x([0-9]*)\s*(?:px)?\s*$/', $value, $m ) ) {
6407  $width = intval( $m[1] );
6408  $height = intval( $m[2] );
6409  $parsedWidthParam['width'] = $width;
6410  $parsedWidthParam['height'] = $height;
6411  } elseif ( preg_match( '/^[0-9]*\s*(?:px)?\s*$/', $value ) ) {
6412  $width = intval( $value );
6413  $parsedWidthParam['width'] = $width;
6414  }
6415  return $parsedWidthParam;
6416  }
6417 
6427  protected function lock() {
6428  if ( $this->mInParse ) {
6429  throw new MWException( "Parser state cleared while parsing. "
6430  . "Did you call Parser::parse recursively? Lock is held by: " . $this->mInParse );
6431  }
6432 
6433  // Save the backtrace when locking, so that if some code tries locking again,
6434  // we can print the lock owner's backtrace for easier debugging
6435  $e = new Exception;
6436  $this->mInParse = $e->getTraceAsString();
6437 
6438  $recursiveCheck = new ScopedCallback( function () {
6439  $this->mInParse = false;
6440  } );
6441 
6442  return $recursiveCheck;
6443  }
6444 
6455  public static function stripOuterParagraph( $html ) {
6456  $m = [];
6457  if ( preg_match( '/^<p>(.*)\n?<\/p>\n?$/sU', $html, $m ) && strpos( $m[1], '</p>' ) === false ) {
6458  $html = $m[1];
6459  }
6460 
6461  return $html;
6462  }
6463 
6474  public function getFreshParser() {
6475  if ( $this->mInParse ) {
6476  return $this->factory->create();
6477  } else {
6478  return $this;
6479  }
6480  }
6481 
6488  public function enableOOUI() {
6490  $this->mOutput->setEnableOOUI( true );
6491  }
6492 
6497  protected function setOutputFlag( $flag, $reason ) {
6498  $this->mOutput->setFlag( $flag );
6499  $name = $this->mTitle->getPrefixedText();
6500  $this->logger->debug( __METHOD__ . ": set $flag flag on '$name'; $reason" );
6501  }
6502 }
getRevisionObject()
Get the revision object for $this->mRevisionId.
Definition: Parser.php:5963
extensionSubstitution( $params, $frame)
Return the text to be used for a given extension tag.
Definition: Parser.php:4058
getFunctionSynonyms()
Definition: Parser.php:5724
static armorFrenchSpaces( $text, $space='&#160;')
Armor French spaces with a replacement character.
Definition: Sanitizer.php:1179
static register( $parser)
$mAutonumber
Definition: Parser.php:194
$mPPNodeCount
Definition: Parser.php:208
replaceInternalLinks2(&$s)
Process [[ ]] wikilinks (RIL)
Definition: Parser.php:2279
bool string $mInParse
Recursive call protection.
Definition: Parser.php:262
return true to allow those checks to and false if checking is done remove or add to the links of a group of changes in EnhancedChangesList Hook subscribers can return false to omit this line from recentchanges use this to change the tables headers change it to an object instance and return false override the list derivative used $groups Array of ChangesListFilterGroup objects(added in 1.34) 'FileDeleteComplete' null for the local wiki Added in
Definition: hooks.txt:1529
const MARKER_PREFIX
Definition: Parser.php:138
deferred txt A few of the database updates required by various functions here can be deferred until after the result page is displayed to the user For updating the view updating the linked to tables after a etc PHP does not yet have any way to tell the server to actually return and disconnect while still running these but it might have such a feature in the future We handle these by creating a deferred update object and putting those objects on a global then executing the whole list after the page is displayed We don t do anything smart like collating updates to the same table or such because the list is almost always going to have just one item on if that
Definition: deferred.txt:11
wfUrlProtocols( $includeProtocolRelative=true)
Returns a regular expression of url protocols.
null means default in associative array form
Definition: hooks.txt:1972
setLinkID( $id)
Definition: Parser.php:993
null means default in associative array with keys and values unescaped Should be merged with default with a value of false meaning to suppress the attribute in associative array with keys and values unescaped noclasses just before the function returns a value If you return an< a > element with HTML attributes $attribs and contents $html will be returned If you return $ret will be returned and may include noclasses & $html
Definition: hooks.txt:1972
static tocLineEnd()
End a Table Of Contents line.
Definition: Linker.php:1662
deferred txt A few of the database updates required by various functions here can be deferred until after the result page is displayed to the user For updating the view updating the linked to tables after a etc PHP does not yet have any way to tell the server to actually return and disconnect while still running these but it might have such a feature in the future We handle these by creating a deferred update object and putting those objects on a global list
Definition: deferred.txt:11
wfEscapeWikiText( $text)
Escapes the given text so that it may be output using addWikiText() without any linking, formatting, etc.
cleanSig( $text, $parsing=false)
Clean up signature text.
Definition: Parser.php:4832
static factory( $url, array $options=null, $caller=__METHOD__)
Generate a new request object.
$mTplRedirCache
Definition: Parser.php:210
LinkRenderer $mLinkRenderer
Definition: Parser.php:270
getRevisionUser()
Get the name of the user that edited the last revision.
Definition: Parser.php:6032
doMagicLinks( $text)
Replace special strings like "ISBN xxx" and "RFC xxx" with magic external links.
Definition: Parser.php:1598
parseLinkParameter( $value)
Parse the value of &#39;link&#39; parameter in image syntax ([[File:Foo.jpg|link=<value>]]).
Definition: Parser.php:5595
const OT_PREPROCESS
Definition: Defines.php:166
static element( $element, $attribs=[], $contents='')
Identical to rawElement(), but HTML-escapes $contents (like Xml::element()).
Definition: Html.php:231
doHeadings( $text)
Parse headers and return html.
Definition: Parser.php:1776
either a plain
Definition: hooks.txt:2033
__construct( $svcOptions=null, MagicWordFactory $magicWordFactory=null, Language $contLang=null, ParserFactory $factory=null, $urlProtocols=null, SpecialPageFactory $spFactory=null, $linkRendererFactory=null, $nsInfo=null, $logger=null)
Constructing parsers directly is deprecated! Use a ParserFactory.
Definition: Parser.php:343
static tidy( $text)
Interface with Remex tidy.
Definition: MWTidy.php:42
$mDoubleUnderscores
Definition: Parser.php:210
SpecialPageFactory $specialPageFactory
Definition: Parser.php:282
MalformedTitleException is thrown when a TitleParser is unable to parse a title string.
killMarkers( $text)
Remove any strip markers found in the given text.
Definition: Parser.php:6309
static getExternalLinkRel( $url=false, $title=null)
Get the rel attribute for a particular external link.
Definition: Parser.php:2075
getSection( $text, $sectionId, $defaultText='')
This function returns the text of a section, specified by a number ($section).
Definition: Parser.php:5923
MapCacheLRU null $currentRevisionCache
Definition: Parser.php:256
static getLocalInstance( $ts=false)
Get a timestamp instance in the server local timezone ($wgLocaltimezone)
bool $mFirstCall
Whether firstCallInit still needs to be called.
Definition: Parser.php:162
processing should stop and the error should be shown to the user * false
Definition: hooks.txt:187
const ID_PRIMARY
Tells escapeUrlForHtml() to encode the ID using the wiki&#39;s primary encoding.
Definition: Sanitizer.php:66
getRevisionTimestampSubstring( $start, $len, $mtts, $variable)
Definition: Parser.php:3018
nextLinkID()
Definition: Parser.php:986
getTemplateDom( $title)
Get the semi-parsed DOM representation of a template with a given title, and its redirect destination...
Definition: Parser.php:3668
Title( $x=null)
Accessor/mutator for the Title object.
Definition: Parser.php:925
const SPACE_NOT_NL
Definition: Parser.php:107
null means default in associative array with keys and values unescaped Should be merged with default with a value of false meaning to suppress the attribute in associative array with keys and values unescaped noclasses & $ret
Definition: hooks.txt:1972
static normalizeSubpageLink( $contextTitle, $target, &$text)
Definition: Linker.php:1450
Apache License January AND DISTRIBUTION Definitions License shall mean the terms and conditions for use
const OT_PLAIN
Definition: Parser.php:118
getTags()
Accessor.
Definition: Parser.php:5711
if(PHP_SAPI !='cli-server') if(!isset( $_SERVER['SCRIPT_FILENAME'])) $file
Definition: router.php:42
const OT_WIKI
Definition: Parser.php:115
div flags Integer display flags(NO_ACTION_LINK, NO_EXTRA_USER_LINKS) 'LogException' returning false will NOT prevent logging $e
Definition: hooks.txt:2147
User $mUser
Definition: Parser.php:219
initialiseVariables()
initialise the magic variables (like CURRENTMONTHNAME) and substitution modifiers ...
Definition: Parser.php:3048
static cleanUrl( $url)
Definition: Sanitizer.php:2079
static splitTrail( $trail)
Split a link trail, return the "inside" portion and the remainder of the trail as a two-element array...
Definition: Linker.php:1770
static isEnabled()
Definition: MWTidy.php:54
getFunctionHooks()
Get all registered function hook identifiers.
Definition: Parser.php:5092
getExternalLinkAttribs( $url)
Get an associative array of additional HTML attributes appropriate for a particular external link...
Definition: Parser.php:2096
callParserFunction( $frame, $function, array $args=[])
Call a parser function and return an array with text and flags.
Definition: Parser.php:3576
magicLinkCallback( $m)
Definition: Parser.php:1629
globals txt Globals are evil The original MediaWiki code relied on globals for processing context far too often MediaWiki development since then has been a story of slowly moving context out of global variables and into objects Storing processing context in object member variables allows those objects to be reused in a much more flexible way Consider the elegance of
database rows
Definition: globals.txt:10
wfHostname()
Fetch server name for use in error reporting etc.
braceSubstitution( $piece, $frame)
Return the text of a template, after recursively replacing any variables or templates within the temp...
Definition: Parser.php:3224
getFunctionLang()
Get a language object for use in parser functions such as {{FORMATNUM:}}.
Definition: Parser.php:1001
globals will be eliminated from MediaWiki replaced by an application object which would be passed to constructors Whether that would be an convenient solution remains to be but certainly PHP makes such object oriented programming models easier than they were in previous versions For the time being MediaWiki programmers will have to work in an environment with some global context At the time of globals were initialised on startup by MediaWiki of these were configuration which are documented in DefaultSettings php There is no comprehensive documentation for the remaining however some of the most important ones are listed below They are typically initialised either in index php or in Setup php $wgTitle Title object created from the request URL $wgOut OutputPage object for HTTP response $wgUser User object for the user associated with the current request $wgLang Language object selected by user preferences $wgContLang Language object associated with the wiki being viewed $wgParser Parser object Parser extensions register their hooks here $wgRequest WebRequest object
Definition: globals.txt:25
preprocessToDom( $text, $flags=0)
Preprocess some wikitext and return the document tree.
Definition: Parser.php:3078
const TOC_START
Definition: Parser.php:141
wfGetDB( $db, $groups=[], $wiki=false)
Get a Database object.
SectionProfiler $mProfiler
Definition: Parser.php:265
static replaceMarkup( $search, $replace, $text)
More or less "markup-safe" str_replace() Ignores any instances of the separator inside <...
$sort
you don t have to do a grep find to see where the $wgReverseTitle variable is used
Definition: hooks.txt:115
There are three types of nodes:
Definition: PPNode.php:35
$mHeadings
Definition: Parser.php:210
$value
clearTagHooks()
Remove all tag hooks.
Definition: Parser.php:5007
const NS_SPECIAL
Definition: Defines.php:49
clearState()
Clear Parser state.
Definition: Parser.php:469
const EXT_LINK_ADDR
Definition: Parser.php:100
replaceExternalLinks( $text)
Replace external links (REL)
Definition: Parser.php:1997
either a unescaped string or a HtmlArmor object after in associative array form externallinks including delete and has completed for all link tables whether this was an auto creation use $formDescriptor instead default is conds Array Extra conditions for the No matching items in log is displayed if loglist is empty msgKey Array If you want a nice box with a set this to the key of the message First element is the message key
Definition: hooks.txt:2139
static factory( $mode=false, IContextSource $context=null)
Get a new image gallery.
stripSectionName( $text)
Strips a text string of wikitext for use in a section anchor.
Definition: Parser.php:6202
injection txt This is an overview of how MediaWiki makes use of dependency injection The design described here grew from the discussion of RFC T384 The term dependency this means that anything an object needs to operate should be injected from the the object itself should only know narrow no concrete implementation of the logic it relies on The requirement to inject everything typically results in an architecture that based on two main types of and essentially stateless service objects that use other service objects to operate on the value objects As of the beginning MediaWiki is only starting to use the DI approach Much of the code still relies on global state or direct resulting in a highly cyclical dependency MediaWikiServices
Definition: injection.txt:23
wfUrlProtocolsWithoutProtRel()
Like wfUrlProtocols(), but excludes &#39;//&#39; from the protocol list.
replaceLinkHolders(&$text, $options=0)
Replace "<!--LINK-->" link placeholders with actual links, in the buffer Placeholders created in Link...
Definition: Parser.php:5129
static statelessFetchRevision(Title $title, $parser=false)
Wrapper around Revision::newFromTitle to allow passing additional parameters without passing them on ...
Definition: Parser.php:3746
armorLinks( $text)
Insert a NOPARSE hacky thing into any inline links in a chunk that&#39;s going to go through further pars...
Definition: Parser.php:2615
static activeUsers()
Definition: SiteStats.php:130
$mLinkID
Definition: Parser.php:207
setHook( $tag, callable $callback)
Create an HTML-style tag, e.g.
Definition: Parser.php:4962
static createAssocArgs( $args)
Clean up argument array - refactored in 1.9 so parserfunctions can use it, too.
Definition: Parser.php:3154
LinkRendererFactory $linkRendererFactory
Definition: Parser.php:294
static makeMediaLinkFile(LinkTarget $title, $file, $html='')
Create a direct link to a given uploaded file.
Definition: Linker.php:778
$mGeneratedPPNodeCount
Definition: Parser.php:208
$mRevisionId
Definition: Parser.php:236
target page
static cleanSigInSig( $text)
Strip 3, 4 or 5 tildes out of signatures.
Definition: Parser.php:4869
getLinkRenderer()
Get a LinkRenderer instance to make links with.
Definition: Parser.php:1069
const NS_TEMPLATE
Definition: Defines.php:70
see documentation in includes Linker php for Linker::makeImageLink & $time
Definition: hooks.txt:1781
static newFromTitle(LinkTarget $linkTarget, $id=0, $flags=0)
Load either the current, or a specified, revision that&#39;s attached to a given link target...
Definition: Revision.php:137
setTitle( $t)
Set the context title.
Definition: Parser.php:897
const NO_ARGS
Definition: PPFrame.php:26
fetchFileNoRegister( $title, $options=[])
Helper function for fetchFileAndTitle.
Definition: Parser.php:3919
see documentation in includes Linker php for Linker::makeImageLink or false for current used if you return false $parser
Definition: hooks.txt:1781
MagicWordArray $mVariables
Definition: Parser.php:169
This list may contain false positives That usually means there is additional text with links below the first Each row contains links to the first and second as well as the first line of the second redirect text
const SFH_NO_HASH
Definition: Parser.php:88
static setupOOUI( $skinName='default', $dir='ltr')
Helper function to setup the PHP implementation of OOUI to use in this request.
setTransparentTagHook( $tag, callable $callback)
As setHook(), but letting the contents be parsed.
Definition: Parser.php:4993
$mForceTocPosition
Definition: Parser.php:212
getRevisionId()
Get the ID of the revision we are parsing.
Definition: Parser.php:5953
const OT_PREPROCESS
Definition: Parser.php:116
limitationWarn( $limitationType, $current='', $max='')
Warn the user when a parser limitation is reached Will warn at most once the user per limitation type...
Definition: Parser.php:3202
guessLegacySectionNameFromWikiText( $text)
Same as guessSectionNameFromWikiText(), but produces legacy anchors instead, if possible.
Definition: Parser.php:6153
$mFunctionSynonyms
Definition: Parser.php:151
If you want to remove the page from your watchlist later
getPreSaveTransform()
Transform wiki markup when saving the page?
$mOutputType
Definition: Parser.php:233
interwikiTransclude( $title, $action)
Transclude an interwiki link.
Definition: Parser.php:3938
$mDefaultStripList
Definition: Parser.php:154
$mExtLinkBracketedRegex
Definition: Parser.php:183
static configuration should be added through ResourceLoaderGetConfigVars instead can be used to get the real title e g db for database replication lag or jobqueue for job queue size converted to pseudo seconds It is possible to add more fields and they will be returned to the user in the API response after the basic globals have been set but before ordinary actions take place $output
Definition: hooks.txt:2205
The index of the header message $result[1]=The index of the body text message $result[2 through n]=Parameters passed to body text message. Please note the header message cannot receive/use parameters. 'ImgAuthModifyHeaders':Executed just before a file is streamed to a user via img_auth.php, allowing headers to be modified beforehand. $title:LinkTarget object & $headers:HTTP headers(name=> value, names are case insensitive). Two headers get special handling:If-Modified-Since(value must be a valid HTTP date) and Range(must be of the form "bytes=(\*-\*)") will be honored when streaming the file. 'ImportHandleLogItemXMLTag':When parsing a XML tag in a log item. Return false to stop further processing of the tag $reader:XMLReader object $logInfo:Array of information 'ImportHandlePageXMLTag':When parsing a XML tag in a page. Return false to stop further processing of the tag $reader:XMLReader object & $pageInfo:Array of information 'ImportHandleRevisionXMLTag':When parsing a XML tag in a page revision. Return false to stop further processing of the tag $reader:XMLReader object $pageInfo:Array of page information $revisionInfo:Array of revision information 'ImportHandleToplevelXMLTag':When parsing a top level XML tag. Return false to stop further processing of the tag $reader:XMLReader object 'ImportHandleUnknownUser':When a user doesn 't exist locally, this hook is called to give extensions an opportunity to auto-create it. If the auto-creation is successful, return false. $name:User name 'ImportHandleUploadXMLTag':When parsing a XML tag in a file upload. Return false to stop further processing of the tag $reader:XMLReader object $revisionInfo:Array of information 'ImportLogInterwikiLink':Hook to change the interwiki link used in log entries and edit summaries for transwiki imports. & $fullInterwikiPrefix:Interwiki prefix, may contain colons. & $pageTitle:String that contains page title. 'ImportSources':Called when reading from the $wgImportSources configuration variable. Can be used to lazy-load the import sources list. & $importSources:The value of $wgImportSources. Modify as necessary. See the comment in DefaultSettings.php for the detail of how to structure this array. 'InfoAction':When building information to display on the action=info page. $context:IContextSource object & $pageInfo:Array of information 'InitializeArticleMaybeRedirect':MediaWiki check to see if title is a redirect. & $title:Title object for the current page & $request:WebRequest & $ignoreRedirect:boolean to skip redirect check & $target:Title/string of redirect target & $article:Article object 'InternalParseBeforeLinks':during Parser 's internalParse method before links but after nowiki/noinclude/includeonly/onlyinclude and other processings. & $parser:Parser object & $text:string containing partially parsed text & $stripState:Parser 's internal StripState object 'InternalParseBeforeSanitize':during Parser 's internalParse method just before the parser removes unwanted/dangerous HTML tags and after nowiki/noinclude/includeonly/onlyinclude and other processings. Ideal for syntax-extensions after template/parser function execution which respect nowiki and HTML-comments. & $parser:Parser object & $text:string containing partially parsed text & $stripState:Parser 's internal StripState object 'InterwikiLoadPrefix':When resolving if a given prefix is an interwiki or not. Return true without providing an interwiki to continue interwiki search. $prefix:interwiki prefix we are looking for. & $iwData:output array describing the interwiki with keys iw_url, iw_local, iw_trans and optionally iw_api and iw_wikiid. 'InvalidateEmailComplete':Called after a user 's email has been invalidated successfully. $user:user(object) whose email is being invalidated 'IRCLineURL':When constructing the URL to use in an IRC notification. Callee may modify $url and $query, URL will be constructed as $url . $query & $url:URL to index.php & $query:Query string $rc:RecentChange object that triggered url generation 'IsFileCacheable':Override the result of Article::isFileCacheable()(if true) & $article:article(object) being checked 'IsTrustedProxy':Override the result of IP::isTrustedProxy() & $ip:IP being check & $result:Change this value to override the result of IP::isTrustedProxy() 'IsUploadAllowedFromUrl':Override the result of UploadFromUrl::isAllowedUrl() $url:URL used to upload from & $allowed:Boolean indicating if uploading is allowed for given URL 'isValidEmailAddr':Override the result of Sanitizer::validateEmail(), for instance to return false if the domain name doesn 't match your organization. $addr:The e-mail address entered by the user & $result:Set this and return false to override the internal checks 'isValidPassword':Override the result of User::isValidPassword() $password:The password entered by the user & $result:Set this and return false to override the internal checks $user:User the password is being validated for 'Language::getMessagesFileName':$code:The language code or the language we 're looking for a messages file for & $file:The messages file path, you can override this to change the location. 'LanguageGetNamespaces':Provide custom ordering for namespaces or remove namespaces. Do not use this hook to add namespaces. Use CanonicalNamespaces for that. & $namespaces:Array of namespaces indexed by their numbers 'LanguageGetTranslatedLanguageNames':Provide translated language names. & $names:array of language code=> language name $code:language of the preferred translations 'LanguageLinks':Manipulate a page 's language links. This is called in various places to allow extensions to define the effective language links for a page. $title:The page 's Title. & $links:Array with elements of the form "language:title" in the order that they will be output. & $linkFlags:Associative array mapping prefixed links to arrays of flags. Currently unused, but planned to provide support for marking individual language links in the UI, e.g. for featured articles. 'LanguageSelector':Hook to change the language selector available on a page. $out:The output page. $cssClassName:CSS class name of the language selector. 'LinkBegin':DEPRECATED since 1.28! Use HtmlPageLinkRendererBegin instead. Used when generating internal and interwiki links in Linker::link(), before processing starts. Return false to skip default processing and return $ret. See documentation for Linker::link() for details on the expected meanings of parameters. $skin:the Skin object $target:the Title that the link is pointing to & $html:the contents that the< a > tag should have(raw HTML) $result
Definition: hooks.txt:1970
This document provides an overview of the usage of PageUpdater and that is
Definition: pageupdater.txt:3
makeKnownLinkHolder( $nt, $text='', $trail='', $prefix='')
Render a forced-blue link inline; protect against double expansion of URLs if we&#39;re in a mode that pr...
Definition: Parser.php:2591
if( $line===false) $args
Definition: cdb.php:64
static stripOuterParagraph( $html)
Strip outer.
Definition: Parser.php:6455
A class for passing options to services.
const ID_FALLBACK
Tells escapeUrlForHtml() to encode the ID using the fallback encoding, or return false if no fallback...
Definition: Sanitizer.php:74
usually copyright or history_copyright This message must be in HTML not wikitext & $link
Definition: hooks.txt:3039
The User object encapsulates all of the user-specific settings (user_id, name, rights, email address, options, last login time).
Definition: User.php:51
this hook is for auditing only or null if authentication failed before getting that far or null if we can t even determine that When $user is not it can be in the form of< username >< more info > e g for bot passwords intended to be added to log contexts Fields it might only if the login was with a bot password it is not rendered in wiki pages or galleries in category pages allow injecting custom HTML after the section Any uses of the hook need to handle escaping see BaseTemplate::getToolbox and BaseTemplate::makeListItem for details on the format of individual items inside of this array or by returning and letting standard HTTP rendering take place modifiable or by returning false and taking over the output modifiable modifiable after all normalizations have been except for the $wgMaxImageArea check set to true or false to override the $wgMaxImageArea check result gives extension the possibility to transform it themselves $handler
Definition: hooks.txt:767
static getInstance( $ts=false)
Get a timestamp instance in GMT.
Definition: MWTimestamp.php:39
this hook is for auditing only or null if authentication failed before getting that far or null if we can t even determine that When $user is not it can be in the form of< username >< more info > e g for bot passwords intended to be added to log contexts Fields it might only if the login was with a bot password it is not rendered in wiki pages or galleries in category pages allow injecting custom HTML after the section Any uses of the hook need to handle escaping see BaseTemplate::getToolbox and BaseTemplate::makeListItem for details on the format of individual items inside of this array or by returning and letting standard HTTP rendering take place modifiable or by returning false and taking over the output $out
Definition: hooks.txt:767
static numberingroup( $group)
Find the number of users in a given user group.
Definition: SiteStats.php:150
stripAltText( $caption, $holders)
Definition: Parser.php:5625
setDefaultSort( $sort)
Mutator for $mDefaultSort.
Definition: Parser.php:6073
$wgNoFollowNsExceptions
Namespaces in which $wgNoFollowLinks doesn&#39;t apply.
static makeExternalImage( $url, $alt='')
Return the code for images which were added via external links, via Parser::maybeMakeExternalImage()...
Definition: Linker.php:247
Custom PHP profiler for parser/DB type section names that xhprof/xdebug can&#39;t handle.
static edits()
Definition: SiteStats.php:94
Status::newGood()` to allow deletion, and then `return false` from the hook function. Ensure you consume the 'ChangeTagAfterDelete' hook to carry out custom deletion actions. $tag:name of the tag $user:user initiating the action & $status:Status object. See above. 'ChangeTagsListActive':Allows you to nominate which of the tags your extension uses are in active use. & $tags:list of all active tags. Append to this array. 'ChangeTagsAfterUpdateTags':Called after tags have been updated with the ChangeTags::updateTags function. Params:$addedTags:tags effectively added in the update $removedTags:tags effectively removed in the update $prevTags:tags that were present prior to the update $rc_id:recentchanges table id $rev_id:revision table id $log_id:logging table id $params:tag params $rc:RecentChange being tagged when the tagging accompanies the action, or null $user:User who performed the tagging when the tagging is subsequent to the action, or null 'ChangeTagsAllowedAdd':Called when checking if a user can add tags to a change. & $allowedTags:List of all the tags the user is allowed to add. Any tags the user wants to add( $addTags) that are not in this array will cause it to fail. You may add or remove tags to this array as required. $addTags:List of tags user intends to add. $user:User who is adding the tags. 'ChangeUserGroups':Called before user groups are changed. $performer:The User who will perform the change $user:The User whose groups will be changed & $add:The groups that will be added & $remove:The groups that will be removed 'Collation::factory':Called if $wgCategoryCollation is an unknown collation. $collationName:Name of the collation in question & $collationObject:Null. Replace with a subclass of the Collation class that implements the collation given in $collationName. 'ConfirmEmailComplete':Called after a user 's email has been confirmed successfully. $user:user(object) whose email is being confirmed 'ContentAlterParserOutput':Modify parser output for a given content object. Called by Content::getParserOutput after parsing has finished. Can be used for changes that depend on the result of the parsing but have to be done before LinksUpdate is called(such as adding tracking categories based on the rendered HTML). $content:The Content to render $title:Title of the page, as context $parserOutput:ParserOutput to manipulate 'ContentGetParserOutput':Customize parser output for a given content object, called by AbstractContent::getParserOutput. May be used to override the normal model-specific rendering of page content. $content:The Content to render $title:Title of the page, as context $revId:The revision ID, as context $options:ParserOptions for rendering. To avoid confusing the parser cache, the output can only depend on parameters provided to this hook function, not on global state. $generateHtml:boolean, indicating whether full HTML should be generated. If false, generation of HTML may be skipped, but other information should still be present in the ParserOutput object. & $output:ParserOutput, to manipulate or replace 'ContentHandlerDefaultModelFor':Called when the default content model is determined for a given title. May be used to assign a different model for that title. $title:the Title in question & $model:the model name. Use with CONTENT_MODEL_XXX constants. 'ContentHandlerForModelID':Called when a ContentHandler is requested for a given content model name, but no entry for that model exists in $wgContentHandlers. Note:if your extension implements additional models via this hook, please use GetContentModels hook to make them known to core. $modeName:the requested content model name & $handler:set this to a ContentHandler object, if desired. 'ContentModelCanBeUsedOn':Called to determine whether that content model can be used on a given page. This is especially useful to prevent some content models to be used in some special location. $contentModel:ID of the content model in question $title:the Title in question. & $ok:Output parameter, whether it is OK to use $contentModel on $title. Handler functions that modify $ok should generally return false to prevent further hooks from further modifying $ok. 'ContribsPager::getQueryInfo':Before the contributions query is about to run & $pager:Pager object for contributions & $queryInfo:The query for the contribs Pager 'ContribsPager::reallyDoQuery':Called before really executing the query for My Contributions & $data:an array of results of all contribs queries $pager:The ContribsPager object hooked into $offset:Index offset, inclusive $limit:Exact query limit $descending:Query direction, false for ascending, true for descending 'ContributionsLineEnding':Called before a contributions HTML line is finished $page:SpecialPage object for contributions & $ret:the HTML line $row:the DB row for this line & $classes:the classes to add to the surrounding< li > & $attribs:associative array of other HTML attributes for the< li > element. Currently only data attributes reserved to MediaWiki are allowed(see Sanitizer::isReservedDataAttribute). 'ContributionsToolLinks':Change tool links above Special:Contributions $id:User identifier $title:User page title & $tools:Array of tool links $specialPage:SpecialPage instance for context and services. Can be either SpecialContributions or DeletedContributionsPage. Extensions should type hint against a generic SpecialPage though. 'ConvertContent':Called by AbstractContent::convert when a conversion to another content model is requested. Handler functions that modify $result should generally return false to disable further attempts at conversion. $content:The Content object to be converted. $toModel:The ID of the content model to convert to. $lossy:boolean indicating whether lossy conversion is allowed. & $result:Output parameter, in case the handler function wants to provide a converted Content object. Note that $result->getContentModel() must return $toModel. 'ContentSecurityPolicyDefaultSource':Modify the allowed CSP load sources. This affects all directives except for the script directive. If you want to add a script source, see ContentSecurityPolicyScriptSource hook. & $defaultSrc:Array of Content-Security-Policy allowed sources $policyConfig:Current configuration for the Content-Security-Policy header $mode:ContentSecurityPolicy::REPORT_ONLY_MODE or ContentSecurityPolicy::FULL_MODE depending on type of header 'ContentSecurityPolicyDirectives':Modify the content security policy directives. Use this only if ContentSecurityPolicyDefaultSource and ContentSecurityPolicyScriptSource do not meet your needs. & $directives:Array of CSP directives $policyConfig:Current configuration for the CSP header $mode:ContentSecurityPolicy::REPORT_ONLY_MODE or ContentSecurityPolicy::FULL_MODE depending on type of header 'ContentSecurityPolicyScriptSource':Modify the allowed CSP script sources. Note that you also have to use ContentSecurityPolicyDefaultSource if you want non-script sources to be loaded from whatever you add. & $scriptSrc:Array of CSP directives $policyConfig:Current configuration for the CSP header $mode:ContentSecurityPolicy::REPORT_ONLY_MODE or ContentSecurityPolicy::FULL_MODE depending on type of header 'CustomEditor':When invoking the page editor Return true to allow the normal editor to be used, or false if implementing a custom editor, e.g. for a special namespace, etc. $article:Article being edited $user:User performing the edit 'DeletedContribsPager::reallyDoQuery':Called before really executing the query for Special:DeletedContributions Similar to ContribsPager::reallyDoQuery & $data:an array of results of all contribs queries $pager:The DeletedContribsPager object hooked into $offset:Index offset, inclusive $limit:Exact query limit $descending:Query direction, false for ascending, true for descending 'DeletedContributionsLineEnding':Called before a DeletedContributions HTML line is finished. Similar to ContributionsLineEnding $page:SpecialPage object for DeletedContributions & $ret:the HTML line $row:the DB row for this line & $classes:the classes to add to the surrounding< li > & $attribs:associative array of other HTML attributes for the< li > element. Currently only data attributes reserved to MediaWiki are allowed(see Sanitizer::isReservedDataAttribute). 'DeleteUnknownPreferences':Called by the cleanupPreferences.php maintenance script to build a WHERE clause with which to delete preferences that are not known about. This hook is used by extensions that have dynamically-named preferences that should not be deleted in the usual cleanup process. For example, the Gadgets extension creates preferences prefixed with 'gadget-', and so anything with that prefix is excluded from the deletion. &where:An array that will be passed as the $cond parameter to IDatabase::select() to determine what will be deleted from the user_properties table. $db:The IDatabase object, useful for accessing $db->buildLike() etc. 'DifferenceEngineAfterLoadNewText':called in DifferenceEngine::loadNewText() after the new revision 's content has been loaded into the class member variable $differenceEngine->mNewContent but before returning true from this function. $differenceEngine:DifferenceEngine object 'DifferenceEngineLoadTextAfterNewContentIsLoaded':called in DifferenceEngine::loadText() after the new revision 's content has been loaded into the class member variable $differenceEngine->mNewContent but before checking if the variable 's value is null. This hook can be used to inject content into said class member variable. $differenceEngine:DifferenceEngine object 'DifferenceEngineMarkPatrolledLink':Allows extensions to change the "mark as patrolled" link which is shown both on the diff header as well as on the bottom of a page, usually wrapped in a span element which has class="patrollink". $differenceEngine:DifferenceEngine object & $markAsPatrolledLink:The "mark as patrolled" link HTML(string) $rcid:Recent change ID(rc_id) for this change(int) 'DifferenceEngineMarkPatrolledRCID':Allows extensions to possibly change the rcid parameter. For example the rcid might be set to zero due to the user being the same as the performer of the change but an extension might still want to show it under certain conditions. & $rcid:rc_id(int) of the change or 0 $differenceEngine:DifferenceEngine object $change:RecentChange object $user:User object representing the current user 'DifferenceEngineNewHeader':Allows extensions to change the $newHeader variable, which contains information about the new revision, such as the revision 's author, whether the revision was marked as a minor edit or not, etc. $differenceEngine:DifferenceEngine object & $newHeader:The string containing the various #mw-diff-otitle[1-5] divs, which include things like revision author info, revision comment, RevisionDelete link and more $formattedRevisionTools:Array containing revision tools, some of which may have been injected with the DiffRevisionTools hook $nextlink:String containing the link to the next revision(if any) $status
Definition: hooks.txt:1244
wfTimestamp( $outputtype=TS_UNIX, $ts=0)
Get a timestamp string in one of various formats.
fetchFileAndTitle( $title, $options=[])
Fetch a file and its title and register a reference to it.
Definition: Parser.php:3894
static validateTagAttributes( $attribs, $element)
Take an array of attribute names and values and normalize or discard illegal values for the given ele...
Definition: Sanitizer.php:791
const NO_TEMPLATES
Definition: PPFrame.php:27
array $mTplDomCache
Definition: Parser.php:214
static parseWidthParam( $value, $parseHeight=true)
Parsed a width param of imagelink like 300px or 200x300px.
Definition: Parser.php:6398
$mVarCache
Definition: Parser.php:155
disableCache()
Set a flag in the output object indicating that the content is dynamic and shouldn&#39;t be cached...
Definition: Parser.php:5683
$mRevisionObject
Definition: Parser.php:235
Title $mTitle
Definition: Parser.php:232
static stripAllTags( $html)
Take a fragment of (potentially invalid) HTML and return a version with any tags removed, encoded as plain text.
Definition: Sanitizer.php:2041
getPreloadText( $text, Title $title, ParserOptions $options, $params=[])
Process the wikitext for the "?preload=" feature.
Definition: Parser.php:867
makeImage( $title, $options, $holders=false)
Parse image options text and use it to make an image.
Definition: Parser.php:5375
__destruct()
Reduce memory usage to reduce the impact of circular references.
Definition: Parser.php:403
wfMatchesDomainList( $url, $domains)
Check whether a given URL has a domain that occurs in a given set of domains.
getRevisionTimestamp()
Get the timestamp associated with the current revision, adjusted for the default server-local timesta...
Definition: Parser.php:6007
wfUrlencode( $s)
We want some things to be included as literal characters in our title URLs for prettiness, which urlencode encodes by default.
getImageParams( $handler)
Definition: Parser.php:5321
fetchCurrentRevisionOfTitle( $title)
Fetch the current revision of a given title.
Definition: Parser.php:3711
either a unescaped string or a HtmlArmor object after in associative array form externallinks including delete and has completed for all link tables whether this was an auto creation use $formDescriptor instead default is conds Array Extra conditions for the No matching items in log is displayed if loglist is empty msgKey Array If you want a nice box with a set this to the key of the message First element is the message additional optional elements are parameters for the key that are processed with wfMessage() -> params() ->parseAsBlock() - offset Set to overwrite offset parameter in $wgRequest set to '' to unset offset - wrap String Wrap the message in html(usually something like "&lt
Factory for handling the special page list and generating SpecialPage objects.
static extractTagsAndParams( $elements, $text, &$matches)
Replaces all occurrences of HTML-style comments and the given tags in the text with a random marker a...
Definition: Parser.php:1120
$mRevIdForTs
Definition: Parser.php:240
setUser( $user)
Set the current user.
Definition: Parser.php:888
$mStripList
Definition: Parser.php:153
$mFunctionTagHooks
Definition: Parser.php:152
const OT_PLAIN
Definition: Defines.php:168
$wgNoFollowLinks
If true, external URL links in wiki text will be given the rel="nofollow" attribute as a hint to sear...
static makeSelfLinkObj( $nt, $html='', $query='', $trail='', $prefix='')
Make appropriate markup for a link to the current article.
Definition: Linker.php:163
static configuration should be added through ResourceLoaderGetConfigVars instead can be used to get the real title e g db for database replication lag or jobqueue for job queue size converted to pseudo seconds It is possible to add more fields and they will be returned to the user in the API response after the basic globals have been set but before ordinary actions take place or wrap services the preferred way to define a new service is the $wgServiceWiringFiles array $services
Definition: hooks.txt:2205
$mRevisionTimestamp
Definition: Parser.php:237
$mImageParams
Definition: Parser.php:156
makeLimitReport()
Set the limit report data in the current ParserOutput, and return the limit report HTML comment...
Definition: Parser.php:643
static escapeIdForAttribute( $id, $mode=self::ID_PRIMARY)
Given a section name or other user-generated or otherwise unsafe string, escapes it to be a valid HTM...
Definition: Sanitizer.php:1295
static newKnownCurrent(IDatabase $db, $pageIdOrTitle, $revId=0)
Load a revision based on a known page ID and current revision ID from the DB.
Definition: Revision.php:1325
const VERSION
Update this version number when the ParserOutput format changes in an incompatible way...
Definition: Parser.php:79
const OT_WIKI
Definition: Defines.php:165
Preprocessor $mPreprocessor
Definition: Parser.php:187
getPreprocessor()
Get a preprocessor object.
Definition: Parser.php:1055
const NS_MEDIA
Definition: Defines.php:48
replaceSection( $oldText, $sectionId, $newText)
This function returns $oldtext after the content of the section specified by $section has been replac...
Definition: Parser.php:5939
static getVersion( $flags='', $lang=null)
Return a string of the MediaWiki version with Git revision if available.
static singleton()
Definition: RepoGroup.php:60
static normalizeSectionName( $text)
Apply the same normalization as code making links to this section would.
Definition: Parser.php:6176
replaceTransparentTags( $text)
Replace transparent tags in $text with the values given by the callbacks.
Definition: Parser.php:5747
argSubstitution( $piece, $frame)
Triple brace replacement – used for template arguments.
Definition: Parser.php:4006
const RECOVER_ORIG
Definition: PPFrame.php:33
static normalizeUrlComponent( $component, $unsafe)
Definition: Parser.php:2184
static isValid( $ip)
Validate an IP address.
Definition: IP.php:111
StripState $mStripState
Definition: Parser.php:199
replaceVariables( $text, $frame=false, $argsOnly=false)
Replace magic variables, templates, and template arguments with the appropriate text.
Definition: Parser.php:3123
$mDefaultSort
Definition: Parser.php:209
getUser()
Get a User object either from $this->mUser, if set, or from the ParserOptions object otherwise...
Definition: Parser.php:1043
setOutputFlag( $flag, $reason)
Definition: Parser.php:6497
static explode( $separator, $subject)
Workalike for explode() with limited memory usage.
getStripList()
Get a list of strippable XML-like elements.
Definition: Parser.php:1183
const EXT_IMAGE_REGEX
Definition: Parser.php:103
startParse(Title $title=null, ParserOptions $options, $outputType, $clearState=true)
Definition: Parser.php:4899
$cache
Definition: mcc.php:33
static tocLine( $anchor, $tocline, $tocnumber, $level, $sectionIndex=false)
parameter level defines if we are on an indentation level
Definition: Linker.php:1638
$params
replaceInternalLinks( $s)
Process [[ ]] wikilinks.
Definition: Parser.php:2266
const NS_CATEGORY
Definition: Defines.php:74
setFunctionHook( $id, callable $callback, $flags=0)
Create a function, e.g.
Definition: Parser.php:5056
null means default in associative array with keys and values unescaped Should be merged with default with a value of false meaning to suppress the attribute in associative array with keys and values unescaped & $options
Definition: hooks.txt:1972
doQuotes( $text)
Helper function for doAllQuotes()
Definition: Parser.php:1811
static makeExternalLink( $url, $text, $escape=true, $linktype='', $attribs=[], $title=null)
Make an external link.
Definition: Linker.php:845
extractSections( $text, $sectionId, $mode, $newText='')
Break wikitext input into sections, and either pull or replace some particular section&#39;s text...
Definition: Parser.php:5798
setOutputType( $ot)
Set the output type.
Definition: Parser.php:934
and(b) You must cause any modified files to carry prominent notices stating that You changed the files
getRevisionSize()
Get the size of the revision.
Definition: Parser.php:6052
$mImageParamsMagicArray
Definition: Parser.php:157
LinkHolderArray $mLinkHolders
Definition: Parser.php:205
$wgNoFollowDomainExceptions
If this is set to an array of domains, external links to these domain names (or any subdomains) will ...
null means default in associative array with keys and values unescaped Should be merged with default with a value of false meaning to suppress the attribute in associative array with keys and values unescaped noclasses just before the function returns a value If you return an< a > element with HTML attributes $attribs and contents $html will be returned If you return $ret will be returned and may include noclasses after processing & $attribs
Definition: hooks.txt:1972
deferred txt A few of the database updates required by various functions here can be deferred until after the result page is displayed to the user For updating the view updating the linked to tables after a save
Definition: deferred.txt:4
as see the revision history and available at free of to any person obtaining a copy of this software and associated documentation to deal in the Software without including without limitation the rights to and or sell copies of the and to permit persons to whom the Software is furnished to do so
Definition: LICENSE.txt:10
getMagicWordFactory()
Get the MagicWordFactory that this Parser is using.
Definition: Parser.php:1087
Some information about database access in MediaWiki By Tim January Database layout For information about the MediaWiki database such as a description of the tables and their please see
Definition: database.txt:2
static splitWhitespace( $s)
Return a three-element array: leading whitespace, string contents, trailing whitespace.
Definition: Parser.php:3090
this hook is for auditing only or null if authentication failed before getting that far or null if we can t even determine that When $user is not null
Definition: hooks.txt:767
getTargetLanguage()
Get the target language for the content being parsed.
Definition: Parser.php:1014
$buffer
static decodeTagAttributes( $text)
Return an associative array of attribute names and values from a partial tag string.
Definition: Sanitizer.php:1450
internalParseHalfParsed( $text, $isMain=true, $linestart=true)
Helper function for parse() that transforms half-parsed HTML into fully parsed HTML.
Definition: Parser.php:1501
namespace and then decline to actually register it file or subcat img or subcat $title
Definition: hooks.txt:912
getConverterLanguage()
Get the language object for language conversion.
Definition: Parser.php:1033
$mInputSize
Definition: Parser.php:241
magicword txt Magic Words are some phrases used in the wikitext They are used for two things
Definition: magicword.txt:4
formatHeadings( $text, $origText, $isMain=true)
This function accomplishes several tasks: 1) Auto-number headings if that option is enabled 2) Add an...
Definition: Parser.php:4260
getUserSig(&$user, $nickname=false, $fancySig=null)
Fetch the user&#39;s signature text, if any, and normalize to validated, ready-to-insert wikitext...
Definition: Parser.php:4771
const HALF_PARSED_VERSION
Update this version number when the output of serialiseHalfParsedText() changes in an incompatible wa...
Definition: Parser.php:85
const NS_FILE
Definition: Defines.php:66
firstCallInit()
Do various kinds of initialisation on the first call of the parser.
Definition: Parser.php:449
static makeAnchor( $sectionName)
Definition: Parser.php:6113
static getTitleFor( $name, $subpage=false, $fragment='')
Get a localised Title object for a specified special page name If you don&#39;t need a full Title object...
Definition: SpecialPage.php:83
static normalizeCharReferences( $text)
Ensure that any entities and character references are legal for XML and XHTML specifically.
Definition: Sanitizer.php:1569
const PTD_FOR_INCLUSION
Definition: Parser.php:110
isValidHalfParsedText( $data)
Returns true if the given array, presumed to be generated by serializeHalfParsedText(), is compatible with the current version of the parser.
Definition: Parser.php:6384
null means default in associative array with keys and values unescaped Should be merged with default with a value of false meaning to suppress the attribute in associative array with keys and values unescaped broken
Definition: hooks.txt:1972