MediaWiki  master
Parser.php
Go to the documentation of this file.
1 <?php
29 
70 class Parser {
76  const VERSION = '1.6.4';
77 
83 
84  # Flags for Parser::setFunctionHook
85  const SFH_NO_HASH = 1;
86  const SFH_OBJECT_ARGS = 2;
87 
88  # Constants needed for external link processing
89  # Everything except bracket, space, or control characters
90  # \p{Zs} is unicode 'separator, space' category. It covers the space 0x20
91  # as well as U+3000 is IDEOGRAPHIC SPACE for T21052
92  # \x{FFFD} is the Unicode replacement character, which Preprocessor_DOM
93  # uses to replace invalid HTML characters.
94  const EXT_LINK_URL_CLASS = '[^][<>"\\x00-\\x20\\x7F\p{Zs}\x{FFFD}]';
95  # Simplified expression to match an IPv4 or IPv6 address, or
96  # at least one character of a host name (embeds EXT_LINK_URL_CLASS)
97  const EXT_LINK_ADDR = '(?:[0-9.]+|\\[(?i:[0-9a-f:.]+)\\]|[^][<>"\\x00-\\x20\\x7F\p{Zs}\x{FFFD}])';
98  # RegExp to make image URLs (embeds IPv6 part of EXT_LINK_ADDR)
99  // phpcs:ignore Generic.Files.LineLength
100  const EXT_IMAGE_REGEX = '/^(http:\/\/|https:\/\/)((?:\\[(?i:[0-9a-f:.]+)\\])?[^][<>"\\x00-\\x20\\x7F\p{Zs}\x{FFFD}]+)
101  \\/([A-Za-z0-9_.,~%\\-+&;#*?!=()@\\x80-\\xFF]+)\\.((?i)gif|png|jpg|jpeg)$/Sxu';
102 
103  # Regular expression for a non-newline space
104  const SPACE_NOT_NL = '(?:\t|&nbsp;|&\#0*160;|&\#[Xx]0*[Aa]0;|\p{Zs})';
105 
106  # Flags for preprocessToDom
107  const PTD_FOR_INCLUSION = 1;
108 
109  # Allowed values for $this->mOutputType
110  # Parameter to startExternalParse().
111  const OT_HTML = 1; # like parse()
112  const OT_WIKI = 2; # like preSaveTransform()
114  const OT_MSG = 3;
115  const OT_PLAIN = 4; # like extractSections() - portions of the original are returned unchanged.
116 
134  const MARKER_SUFFIX = "-QINU`\"'\x7f";
135  const MARKER_PREFIX = "\x7f'\"`UNIQ-";
136 
137  # Markers used for wrapping the table of contents
138  const TOC_START = '<mw:toc>';
139  const TOC_END = '</mw:toc>';
140 
142  const MAX_TTS = 900;
143 
144  # Persistent:
145  public $mTagHooks = [];
147  public $mFunctionHooks = [];
148  public $mFunctionSynonyms = [ 0 => [], 1 => [] ];
149  public $mFunctionTagHooks = [];
150  public $mStripList = [];
151  public $mDefaultStripList = [];
152  public $mVarCache = [];
153  public $mImageParams = [];
155  public $mMarkerIndex = 0;
159  public $mFirstCall = true;
160 
161  # Initialised by initialiseVariables()
162 
166  public $mVariables;
167 
171  public $mSubstWords;
172  # Initialised in constructor
174 
175  # Initialized in getPreprocessor()
176 
178 
179  # Cleared with clearState():
180 
183  public $mOutput;
184  public $mAutonumber;
185 
189  public $mStripState;
190 
196 
197  public $mLinkID;
201  public $mExpensiveFunctionCount; # number of expensive parser function calls
203 
207  public $mUser; # User object; only used when doing pre-save transform
208 
209  # Temporary
210  # These are variables reset at least once per parse regardless of $clearState
211 
215  public $mOptions;
216 
220  public $mTitle; # Title context, used for self-link rendering and similar things
221  public $mOutputType; # Output type, one of the OT_xxx constants
222  public $ot; # Shortcut alias, see setOutputType()
223  public $mRevisionObject; # The revision object of the specified revision ID
224  public $mRevisionId; # ID to display in {{REVISIONID}} tags
225  public $mRevisionTimestamp; # The timestamp of the specified revision ID
226  public $mRevisionUser; # User to display in {{REVISIONUSER}} tag
227  public $mRevisionSize; # Size to display in {{REVISIONSIZE}} variable
228  public $mRevIdForTs; # The revision ID which was used to fetch the timestamp
229  public $mInputSize = false; # For {{PAGESIZE}} on current page.
230 
235  public $mUniqPrefix = self::MARKER_PREFIX;
236 
243 
251 
256  public $mInParse = false;
257 
259  protected $mProfiler;
260 
264  protected $mLinkRenderer;
265 
268 
270  private $contLang;
271 
273  private $factory;
274 
277 
279  private $siteConfig;
280 
283 
285  private $nsInfo;
286 
298  public function __construct(
299  array $parserConf = [], MagicWordFactory $magicWordFactory = null,
300  Language $contLang = null, ParserFactory $factory = null, $urlProtocols = null,
304  ) {
305  $this->mConf = $parserConf;
306  $this->mUrlProtocols = $urlProtocols ?? wfUrlProtocols();
307  $this->mExtLinkBracketedRegex = '/\[(((?i)' . $this->mUrlProtocols . ')' .
308  self::EXT_LINK_ADDR .
309  self::EXT_LINK_URL_CLASS . '*)\p{Zs}*([^\]\\x00-\\x08\\x0a-\\x1F\\x{FFFD}]*?)\]/Su';
310  if ( isset( $parserConf['preprocessorClass'] ) ) {
311  $this->mPreprocessorClass = $parserConf['preprocessorClass'];
312  } elseif ( wfIsHHVM() ) {
313  # Under HHVM Preprocessor_Hash is much faster than Preprocessor_DOM
314  $this->mPreprocessorClass = Preprocessor_Hash::class;
315  } elseif ( extension_loaded( 'domxml' ) ) {
316  # PECL extension that conflicts with the core DOM extension (T15770)
317  wfDebug( "Warning: you have the obsolete domxml extension for PHP. Please remove it!\n" );
318  $this->mPreprocessorClass = Preprocessor_Hash::class;
319  } elseif ( extension_loaded( 'dom' ) ) {
320  $this->mPreprocessorClass = Preprocessor_DOM::class;
321  } else {
322  $this->mPreprocessorClass = Preprocessor_Hash::class;
323  }
324  wfDebug( __CLASS__ . ": using preprocessor: {$this->mPreprocessorClass}\n" );
325 
326  $services = MediaWikiServices::getInstance();
327  $this->magicWordFactory = $magicWordFactory ??
328  $services->getMagicWordFactory();
329 
330  $this->contLang = $contLang ?? $services->getContentLanguage();
331 
332  $this->factory = $factory ?? $services->getParserFactory();
333  $this->specialPageFactory = $spFactory ?? $services->getSpecialPageFactory();
334  $this->siteConfig = $siteConfig ?? $services->getMainConfig();
335  $this->linkRendererFactory =
336  $linkRendererFactory ?? $services->getLinkRendererFactory();
337  $this->nsInfo = $nsInfo ?? $services->getNamespaceInfo();
338  }
339 
343  public function __destruct() {
344  if ( isset( $this->mLinkHolders ) ) {
345  unset( $this->mLinkHolders );
346  }
347  foreach ( $this as $name => $value ) {
348  unset( $this->$name );
349  }
350  }
351 
355  public function __clone() {
356  $this->mInParse = false;
357 
358  // T58226: When you create a reference "to" an object field, that
359  // makes the object field itself be a reference too (until the other
360  // reference goes out of scope). When cloning, any field that's a
361  // reference is copied as a reference in the new object. Both of these
362  // are defined PHP5 behaviors, as inconvenient as it is for us when old
363  // hooks from PHP4 days are passing fields by reference.
364  foreach ( [ 'mStripState', 'mVarCache' ] as $k ) {
365  // Make a non-reference copy of the field, then rebind the field to
366  // reference the new copy.
367  $tmp = $this->$k;
368  $this->$k =& $tmp;
369  unset( $tmp );
370  }
371 
372  Hooks::run( 'ParserCloned', [ $this ] );
373  }
374 
378  public function firstCallInit() {
379  if ( !$this->mFirstCall ) {
380  return;
381  }
382  $this->mFirstCall = false;
383 
385  CoreTagHooks::register( $this );
386  $this->initialiseVariables();
387 
388  // Avoid PHP 7.1 warning from passing $this by reference
389  $parser = $this;
390  Hooks::run( 'ParserFirstCallInit', [ &$parser ] );
391  }
392 
398  public function clearState() {
399  $this->firstCallInit();
400  $this->mOutput = new ParserOutput;
401  $this->mOptions->registerWatcher( [ $this->mOutput, 'recordOption' ] );
402  $this->mAutonumber = 0;
403  $this->mIncludeCount = [];
404  $this->mLinkHolders = new LinkHolderArray( $this );
405  $this->mLinkID = 0;
406  $this->mRevisionObject = $this->mRevisionTimestamp =
407  $this->mRevisionId = $this->mRevisionUser = $this->mRevisionSize = null;
408  $this->mVarCache = [];
409  $this->mUser = null;
410  $this->mLangLinkLanguages = [];
411  $this->currentRevisionCache = null;
412 
413  $this->mStripState = new StripState( $this );
414 
415  # Clear these on every parse, T6549
416  $this->mTplRedirCache = $this->mTplDomCache = [];
417 
418  $this->mShowToc = true;
419  $this->mForceTocPosition = false;
420  $this->mIncludeSizes = [
421  'post-expand' => 0,
422  'arg' => 0,
423  ];
424  $this->mPPNodeCount = 0;
425  $this->mGeneratedPPNodeCount = 0;
426  $this->mHighestExpansionDepth = 0;
427  $this->mDefaultSort = false;
428  $this->mHeadings = [];
429  $this->mDoubleUnderscores = [];
430  $this->mExpensiveFunctionCount = 0;
431 
432  # Fix cloning
433  if ( isset( $this->mPreprocessor ) && $this->mPreprocessor->parser !== $this ) {
434  $this->mPreprocessor = null;
435  }
436 
437  $this->mProfiler = new SectionProfiler();
438 
439  // Avoid PHP 7.1 warning from passing $this by reference
440  $parser = $this;
441  Hooks::run( 'ParserClearState', [ &$parser ] );
442  }
443 
458  public function parse(
460  $linestart = true, $clearState = true, $revid = null
461  ) {
462  if ( $clearState ) {
463  // We use U+007F DELETE to construct strip markers, so we have to make
464  // sure that this character does not occur in the input text.
465  $text = strtr( $text, "\x7f", "?" );
466  $magicScopeVariable = $this->lock();
467  }
468  // Strip U+0000 NULL (T159174)
469  $text = str_replace( "\000", '', $text );
470 
471  $this->startParse( $title, $options, self::OT_HTML, $clearState );
472 
473  $this->currentRevisionCache = null;
474  $this->mInputSize = strlen( $text );
475  if ( $this->mOptions->getEnableLimitReport() ) {
476  $this->mOutput->resetParseStartTime();
477  }
478 
479  $oldRevisionId = $this->mRevisionId;
480  $oldRevisionObject = $this->mRevisionObject;
481  $oldRevisionTimestamp = $this->mRevisionTimestamp;
482  $oldRevisionUser = $this->mRevisionUser;
483  $oldRevisionSize = $this->mRevisionSize;
484  if ( $revid !== null ) {
485  $this->mRevisionId = $revid;
486  $this->mRevisionObject = null;
487  $this->mRevisionTimestamp = null;
488  $this->mRevisionUser = null;
489  $this->mRevisionSize = null;
490  }
491 
492  // Avoid PHP 7.1 warning from passing $this by reference
493  $parser = $this;
494  Hooks::run( 'ParserBeforeStrip', [ &$parser, &$text, &$this->mStripState ] );
495  # No more strip!
496  Hooks::run( 'ParserAfterStrip', [ &$parser, &$text, &$this->mStripState ] );
497  $text = $this->internalParse( $text );
498  Hooks::run( 'ParserAfterParse', [ &$parser, &$text, &$this->mStripState ] );
499 
500  $text = $this->internalParseHalfParsed( $text, true, $linestart );
501 
509  if ( !( $options->getDisableTitleConversion()
510  || isset( $this->mDoubleUnderscores['nocontentconvert'] )
511  || isset( $this->mDoubleUnderscores['notitleconvert'] )
512  || $this->mOutput->getDisplayTitle() !== false )
513  ) {
514  $convruletitle = $this->getTargetLanguage()->getConvRuleTitle();
515  if ( $convruletitle ) {
516  $this->mOutput->setTitleText( $convruletitle );
517  } else {
518  $titleText = $this->getTargetLanguage()->convertTitle( $title );
519  $this->mOutput->setTitleText( $titleText );
520  }
521  }
522 
523  # Compute runtime adaptive expiry if set
524  $this->mOutput->finalizeAdaptiveCacheExpiry();
525 
526  # Warn if too many heavyweight parser functions were used
527  if ( $this->mExpensiveFunctionCount > $this->mOptions->getExpensiveParserFunctionLimit() ) {
528  $this->limitationWarn( 'expensive-parserfunction',
529  $this->mExpensiveFunctionCount,
530  $this->mOptions->getExpensiveParserFunctionLimit()
531  );
532  }
533 
534  # Information on limits, for the benefit of users who try to skirt them
535  if ( $this->mOptions->getEnableLimitReport() ) {
536  $text .= $this->makeLimitReport();
537  }
538 
539  # Wrap non-interface parser output in a <div> so it can be targeted
540  # with CSS (T37247)
541  $class = $this->mOptions->getWrapOutputClass();
542  if ( $class !== false && !$this->mOptions->getInterfaceMessage() ) {
543  $this->mOutput->addWrapperDivClass( $class );
544  }
545 
546  $this->mOutput->setText( $text );
547 
548  $this->mRevisionId = $oldRevisionId;
549  $this->mRevisionObject = $oldRevisionObject;
550  $this->mRevisionTimestamp = $oldRevisionTimestamp;
551  $this->mRevisionUser = $oldRevisionUser;
552  $this->mRevisionSize = $oldRevisionSize;
553  $this->mInputSize = false;
554  $this->currentRevisionCache = null;
555 
556  return $this->mOutput;
557  }
558 
565  protected function makeLimitReport() {
566  $maxIncludeSize = $this->mOptions->getMaxIncludeSize();
567 
568  $cpuTime = $this->mOutput->getTimeSinceStart( 'cpu' );
569  if ( $cpuTime !== null ) {
570  $this->mOutput->setLimitReportData( 'limitreport-cputime',
571  sprintf( "%.3f", $cpuTime )
572  );
573  }
574 
575  $wallTime = $this->mOutput->getTimeSinceStart( 'wall' );
576  $this->mOutput->setLimitReportData( 'limitreport-walltime',
577  sprintf( "%.3f", $wallTime )
578  );
579 
580  $this->mOutput->setLimitReportData( 'limitreport-ppvisitednodes',
581  [ $this->mPPNodeCount, $this->mOptions->getMaxPPNodeCount() ]
582  );
583  $this->mOutput->setLimitReportData( 'limitreport-ppgeneratednodes',
584  [ $this->mGeneratedPPNodeCount, $this->mOptions->getMaxGeneratedPPNodeCount() ]
585  );
586  $this->mOutput->setLimitReportData( 'limitreport-postexpandincludesize',
587  [ $this->mIncludeSizes['post-expand'], $maxIncludeSize ]
588  );
589  $this->mOutput->setLimitReportData( 'limitreport-templateargumentsize',
590  [ $this->mIncludeSizes['arg'], $maxIncludeSize ]
591  );
592  $this->mOutput->setLimitReportData( 'limitreport-expansiondepth',
593  [ $this->mHighestExpansionDepth, $this->mOptions->getMaxPPExpandDepth() ]
594  );
595  $this->mOutput->setLimitReportData( 'limitreport-expensivefunctioncount',
596  [ $this->mExpensiveFunctionCount, $this->mOptions->getExpensiveParserFunctionLimit() ]
597  );
598 
599  foreach ( $this->mStripState->getLimitReport() as list( $key, $value ) ) {
600  $this->mOutput->setLimitReportData( $key, $value );
601  }
602 
603  Hooks::run( 'ParserLimitReportPrepare', [ $this, $this->mOutput ] );
604 
605  $limitReport = "NewPP limit report\n";
606  if ( $this->siteConfig->get( 'ShowHostnames' ) ) {
607  $limitReport .= 'Parsed by ' . wfHostname() . "\n";
608  }
609  $limitReport .= 'Cached time: ' . $this->mOutput->getCacheTime() . "\n";
610  $limitReport .= 'Cache expiry: ' . $this->mOutput->getCacheExpiry() . "\n";
611  $limitReport .= 'Dynamic content: ' .
612  ( $this->mOutput->hasDynamicContent() ? 'true' : 'false' ) .
613  "\n";
614 
615  foreach ( $this->mOutput->getLimitReportData() as $key => $value ) {
616  if ( Hooks::run( 'ParserLimitReportFormat',
617  [ $key, &$value, &$limitReport, false, false ]
618  ) ) {
619  $keyMsg = wfMessage( $key )->inLanguage( 'en' )->useDatabase( false );
620  $valueMsg = wfMessage( [ "$key-value-text", "$key-value" ] )
621  ->inLanguage( 'en' )->useDatabase( false );
622  if ( !$valueMsg->exists() ) {
623  $valueMsg = new RawMessage( '$1' );
624  }
625  if ( !$keyMsg->isDisabled() && !$valueMsg->isDisabled() ) {
626  $valueMsg->params( $value );
627  $limitReport .= "{$keyMsg->text()}: {$valueMsg->text()}\n";
628  }
629  }
630  }
631  // Since we're not really outputting HTML, decode the entities and
632  // then re-encode the things that need hiding inside HTML comments.
633  $limitReport = htmlspecialchars_decode( $limitReport );
634 
635  // Sanitize for comment. Note '‐' in the replacement is U+2010,
636  // which looks much like the problematic '-'.
637  $limitReport = str_replace( [ '-', '&' ], [ '‐', '&amp;' ], $limitReport );
638  $text = "\n<!-- \n$limitReport-->\n";
639 
640  // Add on template profiling data in human/machine readable way
641  $dataByFunc = $this->mProfiler->getFunctionStats();
642  uasort( $dataByFunc, function ( $a, $b ) {
643  return $b['real'] <=> $a['real']; // descending order
644  } );
645  $profileReport = [];
646  foreach ( array_slice( $dataByFunc, 0, 10 ) as $item ) {
647  $profileReport[] = sprintf( "%6.2f%% %8.3f %6d %s",
648  $item['%real'], $item['real'], $item['calls'],
649  htmlspecialchars( $item['name'] ) );
650  }
651  $text .= "<!--\nTransclusion expansion time report (%,ms,calls,template)\n";
652  $text .= implode( "\n", $profileReport ) . "\n-->\n";
653 
654  $this->mOutput->setLimitReportData( 'limitreport-timingprofile', $profileReport );
655 
656  // Add other cache related metadata
657  if ( $this->siteConfig->get( 'ShowHostnames' ) ) {
658  $this->mOutput->setLimitReportData( 'cachereport-origin', wfHostname() );
659  }
660  $this->mOutput->setLimitReportData( 'cachereport-timestamp',
661  $this->mOutput->getCacheTime() );
662  $this->mOutput->setLimitReportData( 'cachereport-ttl',
663  $this->mOutput->getCacheExpiry() );
664  $this->mOutput->setLimitReportData( 'cachereport-transientcontent',
665  $this->mOutput->hasDynamicContent() );
666 
667  if ( $this->mGeneratedPPNodeCount > $this->mOptions->getMaxGeneratedPPNodeCount() / 10 ) {
668  wfDebugLog( 'generated-pp-node-count', $this->mGeneratedPPNodeCount . ' ' .
669  $this->mTitle->getPrefixedDBkey() );
670  }
671  return $text;
672  }
673 
698  public function recursiveTagParse( $text, $frame = false ) {
699  // Avoid PHP 7.1 warning from passing $this by reference
700  $parser = $this;
701  Hooks::run( 'ParserBeforeStrip', [ &$parser, &$text, &$this->mStripState ] );
702  Hooks::run( 'ParserAfterStrip', [ &$parser, &$text, &$this->mStripState ] );
703  $text = $this->internalParse( $text, false, $frame );
704  return $text;
705  }
706 
726  public function recursiveTagParseFully( $text, $frame = false ) {
727  $text = $this->recursiveTagParse( $text, $frame );
728  $text = $this->internalParseHalfParsed( $text, false );
729  return $text;
730  }
731 
743  public function preprocess( $text, Title $title = null,
744  ParserOptions $options, $revid = null, $frame = false
745  ) {
746  $magicScopeVariable = $this->lock();
747  $this->startParse( $title, $options, self::OT_PREPROCESS, true );
748  if ( $revid !== null ) {
749  $this->mRevisionId = $revid;
750  }
751  // Avoid PHP 7.1 warning from passing $this by reference
752  $parser = $this;
753  Hooks::run( 'ParserBeforeStrip', [ &$parser, &$text, &$this->mStripState ] );
754  Hooks::run( 'ParserAfterStrip', [ &$parser, &$text, &$this->mStripState ] );
755  $text = $this->replaceVariables( $text, $frame );
756  $text = $this->mStripState->unstripBoth( $text );
757  return $text;
758  }
759 
769  public function recursivePreprocess( $text, $frame = false ) {
770  $text = $this->replaceVariables( $text, $frame );
771  $text = $this->mStripState->unstripBoth( $text );
772  return $text;
773  }
774 
788  public function getPreloadText( $text, Title $title, ParserOptions $options, $params = [] ) {
789  $msg = new RawMessage( $text );
790  $text = $msg->params( $params )->plain();
791 
792  # Parser (re)initialisation
793  $magicScopeVariable = $this->lock();
794  $this->startParse( $title, $options, self::OT_PLAIN, true );
795 
797  $dom = $this->preprocessToDom( $text, self::PTD_FOR_INCLUSION );
798  $text = $this->getPreprocessor()->newFrame()->expand( $dom, $flags );
799  $text = $this->mStripState->unstripBoth( $text );
800  return $text;
801  }
802 
809  public function setUser( $user ) {
810  $this->mUser = $user;
811  }
812 
818  public function setTitle( $t ) {
819  if ( !$t ) {
820  $t = Title::newFromText( 'NO TITLE' );
821  }
822 
823  if ( $t->hasFragment() ) {
824  # Strip the fragment to avoid various odd effects
825  $this->mTitle = $t->createFragmentTarget( '' );
826  } else {
827  $this->mTitle = $t;
828  }
829  }
830 
836  public function getTitle() {
837  return $this->mTitle;
838  }
839 
846  public function Title( $x = null ) {
847  return wfSetVar( $this->mTitle, $x );
848  }
849 
855  public function setOutputType( $ot ) {
856  $this->mOutputType = $ot;
857  # Shortcut alias
858  $this->ot = [
859  'html' => $ot == self::OT_HTML,
860  'wiki' => $ot == self::OT_WIKI,
861  'pre' => $ot == self::OT_PREPROCESS,
862  'plain' => $ot == self::OT_PLAIN,
863  ];
864  }
865 
872  public function OutputType( $x = null ) {
873  return wfSetVar( $this->mOutputType, $x );
874  }
875 
881  public function getOutput() {
882  return $this->mOutput;
883  }
884 
890  public function getOptions() {
891  return $this->mOptions;
892  }
893 
900  public function Options( $x = null ) {
901  return wfSetVar( $this->mOptions, $x );
902  }
903 
907  public function nextLinkID() {
908  return $this->mLinkID++;
909  }
910 
914  public function setLinkID( $id ) {
915  $this->mLinkID = $id;
916  }
917 
922  public function getFunctionLang() {
923  return $this->getTargetLanguage();
924  }
925 
935  public function getTargetLanguage() {
936  $target = $this->mOptions->getTargetLanguage();
937 
938  if ( $target !== null ) {
939  return $target;
940  } elseif ( $this->mOptions->getInterfaceMessage() ) {
941  return $this->mOptions->getUserLangObj();
942  } elseif ( is_null( $this->mTitle ) ) {
943  throw new MWException( __METHOD__ . ': $this->mTitle is null' );
944  }
945 
946  return $this->mTitle->getPageLanguage();
947  }
948 
954  public function getConverterLanguage() {
955  return $this->getTargetLanguage();
956  }
957 
964  public function getUser() {
965  if ( !is_null( $this->mUser ) ) {
966  return $this->mUser;
967  }
968  return $this->mOptions->getUser();
969  }
970 
976  public function getPreprocessor() {
977  if ( !isset( $this->mPreprocessor ) ) {
978  $class = $this->mPreprocessorClass;
979  $this->mPreprocessor = new $class( $this );
980  }
981  return $this->mPreprocessor;
982  }
983 
990  public function getLinkRenderer() {
991  // XXX We make the LinkRenderer with current options and then cache it forever
992  if ( !$this->mLinkRenderer ) {
993  $this->mLinkRenderer = $this->linkRendererFactory->create();
994  $this->mLinkRenderer->setStubThreshold(
995  $this->getOptions()->getStubThreshold()
996  );
997  }
998 
999  return $this->mLinkRenderer;
1000  }
1001 
1008  public function getMagicWordFactory() {
1009  return $this->magicWordFactory;
1010  }
1011 
1018  public function getContentLanguage() {
1019  return $this->contLang;
1020  }
1021 
1041  public static function extractTagsAndParams( $elements, $text, &$matches ) {
1042  static $n = 1;
1043  $stripped = '';
1044  $matches = [];
1045 
1046  $taglist = implode( '|', $elements );
1047  $start = "/<($taglist)(\\s+[^>]*?|\\s*?)(\/?>)|<(!--)/i";
1048 
1049  while ( $text != '' ) {
1050  $p = preg_split( $start, $text, 2, PREG_SPLIT_DELIM_CAPTURE );
1051  $stripped .= $p[0];
1052  if ( count( $p ) < 5 ) {
1053  break;
1054  }
1055  if ( count( $p ) > 5 ) {
1056  # comment
1057  $element = $p[4];
1058  $attributes = '';
1059  $close = '';
1060  $inside = $p[5];
1061  } else {
1062  # tag
1063  list( , $element, $attributes, $close, $inside ) = $p;
1064  }
1065 
1066  $marker = self::MARKER_PREFIX . "-$element-" . sprintf( '%08X', $n++ ) . self::MARKER_SUFFIX;
1067  $stripped .= $marker;
1068 
1069  if ( $close === '/>' ) {
1070  # Empty element tag, <tag />
1071  $content = null;
1072  $text = $inside;
1073  $tail = null;
1074  } else {
1075  if ( $element === '!--' ) {
1076  $end = '/(-->)/';
1077  } else {
1078  $end = "/(<\\/$element\\s*>)/i";
1079  }
1080  $q = preg_split( $end, $inside, 2, PREG_SPLIT_DELIM_CAPTURE );
1081  $content = $q[0];
1082  if ( count( $q ) < 3 ) {
1083  # No end tag -- let it run out to the end of the text.
1084  $tail = '';
1085  $text = '';
1086  } else {
1087  list( , $tail, $text ) = $q;
1088  }
1089  }
1090 
1091  $matches[$marker] = [ $element,
1092  $content,
1093  Sanitizer::decodeTagAttributes( $attributes ),
1094  "<$element$attributes$close$content$tail" ];
1095  }
1096  return $stripped;
1097  }
1098 
1104  public function getStripList() {
1105  return $this->mStripList;
1106  }
1107 
1117  public function insertStripItem( $text ) {
1118  $marker = self::MARKER_PREFIX . "-item-{$this->mMarkerIndex}-" . self::MARKER_SUFFIX;
1119  $this->mMarkerIndex++;
1120  $this->mStripState->addGeneral( $marker, $text );
1121  return $marker;
1122  }
1123 
1131  public function doTableStuff( $text ) {
1132  $lines = StringUtils::explode( "\n", $text );
1133  $out = '';
1134  $td_history = []; # Is currently a td tag open?
1135  $last_tag_history = []; # Save history of last lag activated (td, th or caption)
1136  $tr_history = []; # Is currently a tr tag open?
1137  $tr_attributes = []; # history of tr attributes
1138  $has_opened_tr = []; # Did this table open a <tr> element?
1139  $indent_level = 0; # indent level of the table
1140 
1141  foreach ( $lines as $outLine ) {
1142  $line = trim( $outLine );
1143 
1144  if ( $line === '' ) { # empty line, go to next line
1145  $out .= $outLine . "\n";
1146  continue;
1147  }
1148 
1149  $first_character = $line[0];
1150  $first_two = substr( $line, 0, 2 );
1151  $matches = [];
1152 
1153  if ( preg_match( '/^(:*)\s*\{\|(.*)$/', $line, $matches ) ) {
1154  # First check if we are starting a new table
1155  $indent_level = strlen( $matches[1] );
1156 
1157  $attributes = $this->mStripState->unstripBoth( $matches[2] );
1158  $attributes = Sanitizer::fixTagAttributes( $attributes, 'table' );
1159 
1160  $outLine = str_repeat( '<dl><dd>', $indent_level ) . "<table{$attributes}>";
1161  array_push( $td_history, false );
1162  array_push( $last_tag_history, '' );
1163  array_push( $tr_history, false );
1164  array_push( $tr_attributes, '' );
1165  array_push( $has_opened_tr, false );
1166  } elseif ( count( $td_history ) == 0 ) {
1167  # Don't do any of the following
1168  $out .= $outLine . "\n";
1169  continue;
1170  } elseif ( $first_two === '|}' ) {
1171  # We are ending a table
1172  $line = '</table>' . substr( $line, 2 );
1173  $last_tag = array_pop( $last_tag_history );
1174 
1175  if ( !array_pop( $has_opened_tr ) ) {
1176  $line = "<tr><td></td></tr>{$line}";
1177  }
1178 
1179  if ( array_pop( $tr_history ) ) {
1180  $line = "</tr>{$line}";
1181  }
1182 
1183  if ( array_pop( $td_history ) ) {
1184  $line = "</{$last_tag}>{$line}";
1185  }
1186  array_pop( $tr_attributes );
1187  if ( $indent_level > 0 ) {
1188  $outLine = rtrim( $line ) . str_repeat( '</dd></dl>', $indent_level );
1189  } else {
1190  $outLine = $line;
1191  }
1192  } elseif ( $first_two === '|-' ) {
1193  # Now we have a table row
1194  $line = preg_replace( '#^\|-+#', '', $line );
1195 
1196  # Whats after the tag is now only attributes
1197  $attributes = $this->mStripState->unstripBoth( $line );
1198  $attributes = Sanitizer::fixTagAttributes( $attributes, 'tr' );
1199  array_pop( $tr_attributes );
1200  array_push( $tr_attributes, $attributes );
1201 
1202  $line = '';
1203  $last_tag = array_pop( $last_tag_history );
1204  array_pop( $has_opened_tr );
1205  array_push( $has_opened_tr, true );
1206 
1207  if ( array_pop( $tr_history ) ) {
1208  $line = '</tr>';
1209  }
1210 
1211  if ( array_pop( $td_history ) ) {
1212  $line = "</{$last_tag}>{$line}";
1213  }
1214 
1215  $outLine = $line;
1216  array_push( $tr_history, false );
1217  array_push( $td_history, false );
1218  array_push( $last_tag_history, '' );
1219  } elseif ( $first_character === '|'
1220  || $first_character === '!'
1221  || $first_two === '|+'
1222  ) {
1223  # This might be cell elements, td, th or captions
1224  if ( $first_two === '|+' ) {
1225  $first_character = '+';
1226  $line = substr( $line, 2 );
1227  } else {
1228  $line = substr( $line, 1 );
1229  }
1230 
1231  // Implies both are valid for table headings.
1232  if ( $first_character === '!' ) {
1233  $line = StringUtils::replaceMarkup( '!!', '||', $line );
1234  }
1235 
1236  # Split up multiple cells on the same line.
1237  # FIXME : This can result in improper nesting of tags processed
1238  # by earlier parser steps.
1239  $cells = explode( '||', $line );
1240 
1241  $outLine = '';
1242 
1243  # Loop through each table cell
1244  foreach ( $cells as $cell ) {
1245  $previous = '';
1246  if ( $first_character !== '+' ) {
1247  $tr_after = array_pop( $tr_attributes );
1248  if ( !array_pop( $tr_history ) ) {
1249  $previous = "<tr{$tr_after}>\n";
1250  }
1251  array_push( $tr_history, true );
1252  array_push( $tr_attributes, '' );
1253  array_pop( $has_opened_tr );
1254  array_push( $has_opened_tr, true );
1255  }
1256 
1257  $last_tag = array_pop( $last_tag_history );
1258 
1259  if ( array_pop( $td_history ) ) {
1260  $previous = "</{$last_tag}>\n{$previous}";
1261  }
1262 
1263  if ( $first_character === '|' ) {
1264  $last_tag = 'td';
1265  } elseif ( $first_character === '!' ) {
1266  $last_tag = 'th';
1267  } elseif ( $first_character === '+' ) {
1268  $last_tag = 'caption';
1269  } else {
1270  $last_tag = '';
1271  }
1272 
1273  array_push( $last_tag_history, $last_tag );
1274 
1275  # A cell could contain both parameters and data
1276  $cell_data = explode( '|', $cell, 2 );
1277 
1278  # T2553: Note that a '|' inside an invalid link should not
1279  # be mistaken as delimiting cell parameters
1280  # Bug T153140: Neither should language converter markup.
1281  if ( preg_match( '/\[\[|-\{/', $cell_data[0] ) === 1 ) {
1282  $cell = "{$previous}<{$last_tag}>" . trim( $cell );
1283  } elseif ( count( $cell_data ) == 1 ) {
1284  // Whitespace in cells is trimmed
1285  $cell = "{$previous}<{$last_tag}>" . trim( $cell_data[0] );
1286  } else {
1287  $attributes = $this->mStripState->unstripBoth( $cell_data[0] );
1288  $attributes = Sanitizer::fixTagAttributes( $attributes, $last_tag );
1289  // Whitespace in cells is trimmed
1290  $cell = "{$previous}<{$last_tag}{$attributes}>" . trim( $cell_data[1] );
1291  }
1292 
1293  $outLine .= $cell;
1294  array_push( $td_history, true );
1295  }
1296  }
1297  $out .= $outLine . "\n";
1298  }
1299 
1300  # Closing open td, tr && table
1301  while ( count( $td_history ) > 0 ) {
1302  if ( array_pop( $td_history ) ) {
1303  $out .= "</td>\n";
1304  }
1305  if ( array_pop( $tr_history ) ) {
1306  $out .= "</tr>\n";
1307  }
1308  if ( !array_pop( $has_opened_tr ) ) {
1309  $out .= "<tr><td></td></tr>\n";
1310  }
1311 
1312  $out .= "</table>\n";
1313  }
1314 
1315  # Remove trailing line-ending (b/c)
1316  if ( substr( $out, -1 ) === "\n" ) {
1317  $out = substr( $out, 0, -1 );
1318  }
1319 
1320  # special case: don't return empty table
1321  if ( $out === "<table>\n<tr><td></td></tr>\n</table>" ) {
1322  $out = '';
1323  }
1324 
1325  return $out;
1326  }
1327 
1341  public function internalParse( $text, $isMain = true, $frame = false ) {
1342  $origText = $text;
1343 
1344  // Avoid PHP 7.1 warning from passing $this by reference
1345  $parser = $this;
1346 
1347  # Hook to suspend the parser in this state
1348  if ( !Hooks::run( 'ParserBeforeInternalParse', [ &$parser, &$text, &$this->mStripState ] ) ) {
1349  return $text;
1350  }
1351 
1352  # if $frame is provided, then use $frame for replacing any variables
1353  if ( $frame ) {
1354  # use frame depth to infer how include/noinclude tags should be handled
1355  # depth=0 means this is the top-level document; otherwise it's an included document
1356  if ( !$frame->depth ) {
1357  $flag = 0;
1358  } else {
1359  $flag = self::PTD_FOR_INCLUSION;
1360  }
1361  $dom = $this->preprocessToDom( $text, $flag );
1362  $text = $frame->expand( $dom );
1363  } else {
1364  # if $frame is not provided, then use old-style replaceVariables
1365  $text = $this->replaceVariables( $text );
1366  }
1367 
1368  Hooks::run( 'InternalParseBeforeSanitize', [ &$parser, &$text, &$this->mStripState ] );
1369  $text = Sanitizer::removeHTMLtags(
1370  $text,
1371  [ $this, 'attributeStripCallback' ],
1372  false,
1373  array_keys( $this->mTransparentTagHooks ),
1374  [],
1375  [ $this, 'addTrackingCategory' ]
1376  );
1377  Hooks::run( 'InternalParseBeforeLinks', [ &$parser, &$text, &$this->mStripState ] );
1378 
1379  # Tables need to come after variable replacement for things to work
1380  # properly; putting them before other transformations should keep
1381  # exciting things like link expansions from showing up in surprising
1382  # places.
1383  $text = $this->doTableStuff( $text );
1384 
1385  $text = preg_replace( '/(^|\n)-----*/', '\\1<hr />', $text );
1386 
1387  $text = $this->doDoubleUnderscore( $text );
1388 
1389  $text = $this->doHeadings( $text );
1390  $text = $this->replaceInternalLinks( $text );
1391  $text = $this->doAllQuotes( $text );
1392  $text = $this->replaceExternalLinks( $text );
1393 
1394  # replaceInternalLinks may sometimes leave behind
1395  # absolute URLs, which have to be masked to hide them from replaceExternalLinks
1396  $text = str_replace( self::MARKER_PREFIX . 'NOPARSE', '', $text );
1397 
1398  $text = $this->doMagicLinks( $text );
1399  $text = $this->formatHeadings( $text, $origText, $isMain );
1400 
1401  return $text;
1402  }
1403 
1413  private function internalParseHalfParsed( $text, $isMain = true, $linestart = true ) {
1414  $text = $this->mStripState->unstripGeneral( $text );
1415 
1416  // Avoid PHP 7.1 warning from passing $this by reference
1417  $parser = $this;
1418 
1419  if ( $isMain ) {
1420  Hooks::run( 'ParserAfterUnstrip', [ &$parser, &$text ] );
1421  }
1422 
1423  # Clean up special characters, only run once, next-to-last before doBlockLevels
1424  $text = Sanitizer::armorFrenchSpaces( $text );
1425 
1426  $text = $this->doBlockLevels( $text, $linestart );
1427 
1428  $this->replaceLinkHolders( $text );
1429 
1437  if ( !( $this->mOptions->getDisableContentConversion()
1438  || isset( $this->mDoubleUnderscores['nocontentconvert'] ) )
1439  && !$this->mOptions->getInterfaceMessage()
1440  ) {
1441  # The position of the convert() call should not be changed. it
1442  # assumes that the links are all replaced and the only thing left
1443  # is the <nowiki> mark.
1444  $text = $this->getTargetLanguage()->convert( $text );
1445  }
1446 
1447  $text = $this->mStripState->unstripNoWiki( $text );
1448 
1449  if ( $isMain ) {
1450  Hooks::run( 'ParserBeforeTidy', [ &$parser, &$text ] );
1451  }
1452 
1453  $text = $this->replaceTransparentTags( $text );
1454  $text = $this->mStripState->unstripGeneral( $text );
1455 
1456  $text = Sanitizer::normalizeCharReferences( $text );
1457 
1458  if ( MWTidy::isEnabled() ) {
1459  if ( $this->mOptions->getTidy() ) {
1460  $text = MWTidy::tidy( $text );
1461  }
1462  } else {
1463  # attempt to sanitize at least some nesting problems
1464  # (T4702 and quite a few others)
1465  # This code path is buggy and deprecated!
1466  wfDeprecated( 'disabling tidy', '1.33' );
1467  $tidyregs = [
1468  # ''Something [http://www.cool.com cool''] -->
1469  # <i>Something</i><a href="http://www.cool.com"..><i>cool></i></a>
1470  '/(<([bi])>)(<([bi])>)?([^<]*)(<\/?a[^<]*>)([^<]*)(<\/\\4>)?(<\/\\2>)/' =>
1471  '\\1\\3\\5\\8\\9\\6\\1\\3\\7\\8\\9',
1472  # fix up an anchor inside another anchor, only
1473  # at least for a single single nested link (T5695)
1474  '/(<a[^>]+>)([^<]*)(<a[^>]+>[^<]*)<\/a>(.*)<\/a>/' =>
1475  '\\1\\2</a>\\3</a>\\1\\4</a>',
1476  # fix div inside inline elements- doBlockLevels won't wrap a line which
1477  # contains a div, so fix it up here; replace
1478  # div with escaped text
1479  '/(<([aib]) [^>]+>)([^<]*)(<div([^>]*)>)(.*)(<\/div>)([^<]*)(<\/\\2>)/' =>
1480  '\\1\\3&lt;div\\5&gt;\\6&lt;/div&gt;\\8\\9',
1481  # remove empty italic or bold tag pairs, some
1482  # introduced by rules above
1483  '/<([bi])><\/\\1>/' => '',
1484  ];
1485 
1486  $text = preg_replace(
1487  array_keys( $tidyregs ),
1488  array_values( $tidyregs ),
1489  $text );
1490  }
1491 
1492  if ( $isMain ) {
1493  Hooks::run( 'ParserAfterTidy', [ &$parser, &$text ] );
1494  }
1495 
1496  return $text;
1497  }
1498 
1510  public function doMagicLinks( $text ) {
1511  $prots = wfUrlProtocolsWithoutProtRel();
1512  $urlChar = self::EXT_LINK_URL_CLASS;
1513  $addr = self::EXT_LINK_ADDR;
1514  $space = self::SPACE_NOT_NL; # non-newline space
1515  $spdash = "(?:-|$space)"; # a dash or a non-newline space
1516  $spaces = "$space++"; # possessive match of 1 or more spaces
1517  $text = preg_replace_callback(
1518  '!(?: # Start cases
1519  (<a[ \t\r\n>].*?</a>) | # m[1]: Skip link text
1520  (<.*?>) | # m[2]: Skip stuff inside HTML elements' . "
1521  (\b # m[3]: Free external links
1522  (?i:$prots)
1523  ($addr$urlChar*) # m[4]: Post-protocol path
1524  ) |
1525  \b(?:RFC|PMID) $spaces # m[5]: RFC or PMID, capture number
1526  ([0-9]+)\b |
1527  \bISBN $spaces ( # m[6]: ISBN, capture number
1528  (?: 97[89] $spdash? )? # optional 13-digit ISBN prefix
1529  (?: [0-9] $spdash? ){9} # 9 digits with opt. delimiters
1530  [0-9Xx] # check digit
1531  )\b
1532  )!xu", [ $this, 'magicLinkCallback' ], $text );
1533  return $text;
1534  }
1535 
1541  public function magicLinkCallback( $m ) {
1542  if ( isset( $m[1] ) && $m[1] !== '' ) {
1543  # Skip anchor
1544  return $m[0];
1545  } elseif ( isset( $m[2] ) && $m[2] !== '' ) {
1546  # Skip HTML element
1547  return $m[0];
1548  } elseif ( isset( $m[3] ) && $m[3] !== '' ) {
1549  # Free external link
1550  return $this->makeFreeExternalLink( $m[0], strlen( $m[4] ) );
1551  } elseif ( isset( $m[5] ) && $m[5] !== '' ) {
1552  # RFC or PMID
1553  if ( substr( $m[0], 0, 3 ) === 'RFC' ) {
1554  if ( !$this->mOptions->getMagicRFCLinks() ) {
1555  return $m[0];
1556  }
1557  $keyword = 'RFC';
1558  $urlmsg = 'rfcurl';
1559  $cssClass = 'mw-magiclink-rfc';
1560  $trackingCat = 'magiclink-tracking-rfc';
1561  $id = $m[5];
1562  } elseif ( substr( $m[0], 0, 4 ) === 'PMID' ) {
1563  if ( !$this->mOptions->getMagicPMIDLinks() ) {
1564  return $m[0];
1565  }
1566  $keyword = 'PMID';
1567  $urlmsg = 'pubmedurl';
1568  $cssClass = 'mw-magiclink-pmid';
1569  $trackingCat = 'magiclink-tracking-pmid';
1570  $id = $m[5];
1571  } else {
1572  throw new MWException( __METHOD__ . ': unrecognised match type "' .
1573  substr( $m[0], 0, 20 ) . '"' );
1574  }
1575  $url = wfMessage( $urlmsg, $id )->inContentLanguage()->text();
1576  $this->addTrackingCategory( $trackingCat );
1577  return Linker::makeExternalLink( $url, "{$keyword} {$id}", true, $cssClass, [], $this->mTitle );
1578  } elseif ( isset( $m[6] ) && $m[6] !== ''
1579  && $this->mOptions->getMagicISBNLinks()
1580  ) {
1581  # ISBN
1582  $isbn = $m[6];
1583  $space = self::SPACE_NOT_NL; # non-newline space
1584  $isbn = preg_replace( "/$space/", ' ', $isbn );
1585  $num = strtr( $isbn, [
1586  '-' => '',
1587  ' ' => '',
1588  'x' => 'X',
1589  ] );
1590  $this->addTrackingCategory( 'magiclink-tracking-isbn' );
1591  return $this->getLinkRenderer()->makeKnownLink(
1592  SpecialPage::getTitleFor( 'Booksources', $num ),
1593  "ISBN $isbn",
1594  [
1595  'class' => 'internal mw-magiclink-isbn',
1596  'title' => false // suppress title attribute
1597  ]
1598  );
1599  } else {
1600  return $m[0];
1601  }
1602  }
1603 
1613  public function makeFreeExternalLink( $url, $numPostProto ) {
1614  $trail = '';
1615 
1616  # The characters '<' and '>' (which were escaped by
1617  # removeHTMLtags()) should not be included in
1618  # URLs, per RFC 2396.
1619  # Make &nbsp; terminate a URL as well (bug T84937)
1620  $m2 = [];
1621  if ( preg_match(
1622  '/&(lt|gt|nbsp|#x0*(3[CcEe]|[Aa]0)|#0*(60|62|160));/',
1623  $url,
1624  $m2,
1625  PREG_OFFSET_CAPTURE
1626  ) ) {
1627  $trail = substr( $url, $m2[0][1] ) . $trail;
1628  $url = substr( $url, 0, $m2[0][1] );
1629  }
1630 
1631  # Move trailing punctuation to $trail
1632  $sep = ',;\.:!?';
1633  # If there is no left bracket, then consider right brackets fair game too
1634  if ( strpos( $url, '(' ) === false ) {
1635  $sep .= ')';
1636  }
1637 
1638  $urlRev = strrev( $url );
1639  $numSepChars = strspn( $urlRev, $sep );
1640  # Don't break a trailing HTML entity by moving the ; into $trail
1641  # This is in hot code, so use substr_compare to avoid having to
1642  # create a new string object for the comparison
1643  if ( $numSepChars && substr_compare( $url, ";", -$numSepChars, 1 ) === 0 ) {
1644  # more optimization: instead of running preg_match with a $
1645  # anchor, which can be slow, do the match on the reversed
1646  # string starting at the desired offset.
1647  # un-reversed regexp is: /&([a-z]+|#x[\da-f]+|#\d+)$/i
1648  if ( preg_match( '/\G([a-z]+|[\da-f]+x#|\d+#)&/i', $urlRev, $m2, 0, $numSepChars ) ) {
1649  $numSepChars--;
1650  }
1651  }
1652  if ( $numSepChars ) {
1653  $trail = substr( $url, -$numSepChars ) . $trail;
1654  $url = substr( $url, 0, -$numSepChars );
1655  }
1656 
1657  # Verify that we still have a real URL after trail removal, and
1658  # not just lone protocol
1659  if ( strlen( $trail ) >= $numPostProto ) {
1660  return $url . $trail;
1661  }
1662 
1663  $url = Sanitizer::cleanUrl( $url );
1664 
1665  # Is this an external image?
1666  $text = $this->maybeMakeExternalImage( $url );
1667  if ( $text === false ) {
1668  # Not an image, make a link
1669  $text = Linker::makeExternalLink( $url,
1670  $this->getTargetLanguage()->getConverter()->markNoConversion( $url ),
1671  true, 'free',
1672  $this->getExternalLinkAttribs( $url ), $this->mTitle );
1673  # Register it in the output object...
1674  $this->mOutput->addExternalLink( $url );
1675  }
1676  return $text . $trail;
1677  }
1678 
1688  public function doHeadings( $text ) {
1689  for ( $i = 6; $i >= 1; --$i ) {
1690  $h = str_repeat( '=', $i );
1691  // Trim non-newline whitespace from headings
1692  // Using \s* will break for: "==\n===\n" and parse as <h2>=</h2>
1693  $text = preg_replace( "/^(?:$h)[ \\t]*(.+?)[ \\t]*(?:$h)\\s*$/m", "<h$i>\\1</h$i>", $text );
1694  }
1695  return $text;
1696  }
1697 
1706  public function doAllQuotes( $text ) {
1707  $outtext = '';
1708  $lines = StringUtils::explode( "\n", $text );
1709  foreach ( $lines as $line ) {
1710  $outtext .= $this->doQuotes( $line ) . "\n";
1711  }
1712  $outtext = substr( $outtext, 0, -1 );
1713  return $outtext;
1714  }
1715 
1723  public function doQuotes( $text ) {
1724  $arr = preg_split( "/(''+)/", $text, -1, PREG_SPLIT_DELIM_CAPTURE );
1725  $countarr = count( $arr );
1726  if ( $countarr == 1 ) {
1727  return $text;
1728  }
1729 
1730  // First, do some preliminary work. This may shift some apostrophes from
1731  // being mark-up to being text. It also counts the number of occurrences
1732  // of bold and italics mark-ups.
1733  $numbold = 0;
1734  $numitalics = 0;
1735  for ( $i = 1; $i < $countarr; $i += 2 ) {
1736  $thislen = strlen( $arr[$i] );
1737  // If there are ever four apostrophes, assume the first is supposed to
1738  // be text, and the remaining three constitute mark-up for bold text.
1739  // (T15227: ''''foo'''' turns into ' ''' foo ' ''')
1740  if ( $thislen == 4 ) {
1741  $arr[$i - 1] .= "'";
1742  $arr[$i] = "'''";
1743  $thislen = 3;
1744  } elseif ( $thislen > 5 ) {
1745  // If there are more than 5 apostrophes in a row, assume they're all
1746  // text except for the last 5.
1747  // (T15227: ''''''foo'''''' turns into ' ''''' foo ' ''''')
1748  $arr[$i - 1] .= str_repeat( "'", $thislen - 5 );
1749  $arr[$i] = "'''''";
1750  $thislen = 5;
1751  }
1752  // Count the number of occurrences of bold and italics mark-ups.
1753  if ( $thislen == 2 ) {
1754  $numitalics++;
1755  } elseif ( $thislen == 3 ) {
1756  $numbold++;
1757  } elseif ( $thislen == 5 ) {
1758  $numitalics++;
1759  $numbold++;
1760  }
1761  }
1762 
1763  // If there is an odd number of both bold and italics, it is likely
1764  // that one of the bold ones was meant to be an apostrophe followed
1765  // by italics. Which one we cannot know for certain, but it is more
1766  // likely to be one that has a single-letter word before it.
1767  if ( ( $numbold % 2 == 1 ) && ( $numitalics % 2 == 1 ) ) {
1768  $firstsingleletterword = -1;
1769  $firstmultiletterword = -1;
1770  $firstspace = -1;
1771  for ( $i = 1; $i < $countarr; $i += 2 ) {
1772  if ( strlen( $arr[$i] ) == 3 ) {
1773  $x1 = substr( $arr[$i - 1], -1 );
1774  $x2 = substr( $arr[$i - 1], -2, 1 );
1775  if ( $x1 === ' ' ) {
1776  if ( $firstspace == -1 ) {
1777  $firstspace = $i;
1778  }
1779  } elseif ( $x2 === ' ' ) {
1780  $firstsingleletterword = $i;
1781  // if $firstsingleletterword is set, we don't
1782  // look at the other options, so we can bail early.
1783  break;
1784  } elseif ( $firstmultiletterword == -1 ) {
1785  $firstmultiletterword = $i;
1786  }
1787  }
1788  }
1789 
1790  // If there is a single-letter word, use it!
1791  if ( $firstsingleletterword > -1 ) {
1792  $arr[$firstsingleletterword] = "''";
1793  $arr[$firstsingleletterword - 1] .= "'";
1794  } elseif ( $firstmultiletterword > -1 ) {
1795  // If not, but there's a multi-letter word, use that one.
1796  $arr[$firstmultiletterword] = "''";
1797  $arr[$firstmultiletterword - 1] .= "'";
1798  } elseif ( $firstspace > -1 ) {
1799  // ... otherwise use the first one that has neither.
1800  // (notice that it is possible for all three to be -1 if, for example,
1801  // there is only one pentuple-apostrophe in the line)
1802  $arr[$firstspace] = "''";
1803  $arr[$firstspace - 1] .= "'";
1804  }
1805  }
1806 
1807  // Now let's actually convert our apostrophic mush to HTML!
1808  $output = '';
1809  $buffer = '';
1810  $state = '';
1811  $i = 0;
1812  foreach ( $arr as $r ) {
1813  if ( ( $i % 2 ) == 0 ) {
1814  if ( $state === 'both' ) {
1815  $buffer .= $r;
1816  } else {
1817  $output .= $r;
1818  }
1819  } else {
1820  $thislen = strlen( $r );
1821  if ( $thislen == 2 ) {
1822  if ( $state === 'i' ) {
1823  $output .= '</i>';
1824  $state = '';
1825  } elseif ( $state === 'bi' ) {
1826  $output .= '</i>';
1827  $state = 'b';
1828  } elseif ( $state === 'ib' ) {
1829  $output .= '</b></i><b>';
1830  $state = 'b';
1831  } elseif ( $state === 'both' ) {
1832  $output .= '<b><i>' . $buffer . '</i>';
1833  $state = 'b';
1834  } else { // $state can be 'b' or ''
1835  $output .= '<i>';
1836  $state .= 'i';
1837  }
1838  } elseif ( $thislen == 3 ) {
1839  if ( $state === 'b' ) {
1840  $output .= '</b>';
1841  $state = '';
1842  } elseif ( $state === 'bi' ) {
1843  $output .= '</i></b><i>';
1844  $state = 'i';
1845  } elseif ( $state === 'ib' ) {
1846  $output .= '</b>';
1847  $state = 'i';
1848  } elseif ( $state === 'both' ) {
1849  $output .= '<i><b>' . $buffer . '</b>';
1850  $state = 'i';
1851  } else { // $state can be 'i' or ''
1852  $output .= '<b>';
1853  $state .= 'b';
1854  }
1855  } elseif ( $thislen == 5 ) {
1856  if ( $state === 'b' ) {
1857  $output .= '</b><i>';
1858  $state = 'i';
1859  } elseif ( $state === 'i' ) {
1860  $output .= '</i><b>';
1861  $state = 'b';
1862  } elseif ( $state === 'bi' ) {
1863  $output .= '</i></b>';
1864  $state = '';
1865  } elseif ( $state === 'ib' ) {
1866  $output .= '</b></i>';
1867  $state = '';
1868  } elseif ( $state === 'both' ) {
1869  $output .= '<i><b>' . $buffer . '</b></i>';
1870  $state = '';
1871  } else { // ($state == '')
1872  $buffer = '';
1873  $state = 'both';
1874  }
1875  }
1876  }
1877  $i++;
1878  }
1879  // Now close all remaining tags. Notice that the order is important.
1880  if ( $state === 'b' || $state === 'ib' ) {
1881  $output .= '</b>';
1882  }
1883  if ( $state === 'i' || $state === 'bi' || $state === 'ib' ) {
1884  $output .= '</i>';
1885  }
1886  if ( $state === 'bi' ) {
1887  $output .= '</b>';
1888  }
1889  // There might be lonely ''''', so make sure we have a buffer
1890  if ( $state === 'both' && $buffer ) {
1891  $output .= '<b><i>' . $buffer . '</i></b>';
1892  }
1893  return $output;
1894  }
1895 
1909  public function replaceExternalLinks( $text ) {
1910  $bits = preg_split( $this->mExtLinkBracketedRegex, $text, -1, PREG_SPLIT_DELIM_CAPTURE );
1911  if ( $bits === false ) {
1912  throw new MWException( "PCRE needs to be compiled with "
1913  . "--enable-unicode-properties in order for MediaWiki to function" );
1914  }
1915  $s = array_shift( $bits );
1916 
1917  $i = 0;
1918  while ( $i < count( $bits ) ) {
1919  $url = $bits[$i++];
1920  $i++; // protocol
1921  $text = $bits[$i++];
1922  $trail = $bits[$i++];
1923 
1924  # The characters '<' and '>' (which were escaped by
1925  # removeHTMLtags()) should not be included in
1926  # URLs, per RFC 2396.
1927  $m2 = [];
1928  if ( preg_match( '/&(lt|gt);/', $url, $m2, PREG_OFFSET_CAPTURE ) ) {
1929  $text = substr( $url, $m2[0][1] ) . ' ' . $text;
1930  $url = substr( $url, 0, $m2[0][1] );
1931  }
1932 
1933  # If the link text is an image URL, replace it with an <img> tag
1934  # This happened by accident in the original parser, but some people used it extensively
1935  $img = $this->maybeMakeExternalImage( $text );
1936  if ( $img !== false ) {
1937  $text = $img;
1938  }
1939 
1940  $dtrail = '';
1941 
1942  # Set linktype for CSS
1943  $linktype = 'text';
1944 
1945  # No link text, e.g. [http://domain.tld/some.link]
1946  if ( $text == '' ) {
1947  # Autonumber
1948  $langObj = $this->getTargetLanguage();
1949  $text = '[' . $langObj->formatNum( ++$this->mAutonumber ) . ']';
1950  $linktype = 'autonumber';
1951  } else {
1952  # Have link text, e.g. [http://domain.tld/some.link text]s
1953  # Check for trail
1954  list( $dtrail, $trail ) = Linker::splitTrail( $trail );
1955  }
1956 
1957  // Excluding protocol-relative URLs may avoid many false positives.
1958  if ( preg_match( '/^(?:' . wfUrlProtocolsWithoutProtRel() . ')/', $text ) ) {
1959  $text = $this->getTargetLanguage()->getConverter()->markNoConversion( $text );
1960  }
1961 
1962  $url = Sanitizer::cleanUrl( $url );
1963 
1964  # Use the encoded URL
1965  # This means that users can paste URLs directly into the text
1966  # Funny characters like ö aren't valid in URLs anyway
1967  # This was changed in August 2004
1968  $s .= Linker::makeExternalLink( $url, $text, false, $linktype,
1969  $this->getExternalLinkAttribs( $url ), $this->mTitle ) . $dtrail . $trail;
1970 
1971  # Register link in the output object.
1972  $this->mOutput->addExternalLink( $url );
1973  }
1974 
1975  return $s;
1976  }
1977 
1987  public static function getExternalLinkRel( $url = false, $title = null ) {
1989  $ns = $title ? $title->getNamespace() : false;
1990  if ( $wgNoFollowLinks && !in_array( $ns, $wgNoFollowNsExceptions )
1991  && !wfMatchesDomainList( $url, $wgNoFollowDomainExceptions )
1992  ) {
1993  return 'nofollow';
1994  }
1995  return null;
1996  }
1997 
2008  public function getExternalLinkAttribs( $url ) {
2009  $attribs = [];
2010  $rel = self::getExternalLinkRel( $url, $this->mTitle );
2011 
2012  $target = $this->mOptions->getExternalLinkTarget();
2013  if ( $target ) {
2014  $attribs['target'] = $target;
2015  if ( !in_array( $target, [ '_self', '_parent', '_top' ] ) ) {
2016  // T133507. New windows can navigate parent cross-origin.
2017  // Including noreferrer due to lacking browser
2018  // support of noopener. Eventually noreferrer should be removed.
2019  if ( $rel !== '' ) {
2020  $rel .= ' ';
2021  }
2022  $rel .= 'noreferrer noopener';
2023  }
2024  }
2025  $attribs['rel'] = $rel;
2026  return $attribs;
2027  }
2028 
2038  public static function normalizeLinkUrl( $url ) {
2039  # Test for RFC 3986 IPv6 syntax
2040  $scheme = '[a-z][a-z0-9+.-]*:';
2041  $userinfo = '(?:[a-z0-9\-._~!$&\'()*+,;=:]|%[0-9a-f]{2})*';
2042  $ipv6Host = '\\[((?:[0-9a-f:]|%3[0-A]|%[46][1-6])+)\\]';
2043  if ( preg_match( "<^(?:{$scheme})?//(?:{$userinfo}@)?{$ipv6Host}(?:[:/?#].*|)$>i", $url, $m ) &&
2044  IP::isValid( rawurldecode( $m[1] ) )
2045  ) {
2046  $isIPv6 = rawurldecode( $m[1] );
2047  } else {
2048  $isIPv6 = false;
2049  }
2050 
2051  # Make sure unsafe characters are encoded
2052  $url = preg_replace_callback( '/[\x00-\x20"<>\[\\\\\]^`{|}\x7F-\xFF]/',
2053  function ( $m ) {
2054  return rawurlencode( $m[0] );
2055  },
2056  $url
2057  );
2058 
2059  $ret = '';
2060  $end = strlen( $url );
2061 
2062  # Fragment part - 'fragment'
2063  $start = strpos( $url, '#' );
2064  if ( $start !== false && $start < $end ) {
2065  $ret = self::normalizeUrlComponent(
2066  substr( $url, $start, $end - $start ), '"#%<>[\]^`{|}' ) . $ret;
2067  $end = $start;
2068  }
2069 
2070  # Query part - 'query' minus &=+;
2071  $start = strpos( $url, '?' );
2072  if ( $start !== false && $start < $end ) {
2073  $ret = self::normalizeUrlComponent(
2074  substr( $url, $start, $end - $start ), '"#%<>[\]^`{|}&=+;' ) . $ret;
2075  $end = $start;
2076  }
2077 
2078  # Scheme and path part - 'pchar'
2079  # (we assume no userinfo or encoded colons in the host)
2080  $ret = self::normalizeUrlComponent(
2081  substr( $url, 0, $end ), '"#%<>[\]^`{|}/?' ) . $ret;
2082 
2083  # Fix IPv6 syntax
2084  if ( $isIPv6 !== false ) {
2085  $ipv6Host = "%5B({$isIPv6})%5D";
2086  $ret = preg_replace(
2087  "<^((?:{$scheme})?//(?:{$userinfo}@)?){$ipv6Host}(?=[:/?#]|$)>i",
2088  "$1[$2]",
2089  $ret
2090  );
2091  }
2092 
2093  return $ret;
2094  }
2095 
2096  private static function normalizeUrlComponent( $component, $unsafe ) {
2097  $callback = function ( $matches ) use ( $unsafe ) {
2098  $char = urldecode( $matches[0] );
2099  $ord = ord( $char );
2100  if ( $ord > 32 && $ord < 127 && strpos( $unsafe, $char ) === false ) {
2101  # Unescape it
2102  return $char;
2103  } else {
2104  # Leave it escaped, but use uppercase for a-f
2105  return strtoupper( $matches[0] );
2106  }
2107  };
2108  return preg_replace_callback( '/%[0-9A-Fa-f]{2}/', $callback, $component );
2109  }
2110 
2119  private function maybeMakeExternalImage( $url ) {
2120  $imagesfrom = $this->mOptions->getAllowExternalImagesFrom();
2121  $imagesexception = !empty( $imagesfrom );
2122  $text = false;
2123  # $imagesfrom could be either a single string or an array of strings, parse out the latter
2124  if ( $imagesexception && is_array( $imagesfrom ) ) {
2125  $imagematch = false;
2126  foreach ( $imagesfrom as $match ) {
2127  if ( strpos( $url, $match ) === 0 ) {
2128  $imagematch = true;
2129  break;
2130  }
2131  }
2132  } elseif ( $imagesexception ) {
2133  $imagematch = ( strpos( $url, $imagesfrom ) === 0 );
2134  } else {
2135  $imagematch = false;
2136  }
2137 
2138  if ( $this->mOptions->getAllowExternalImages()
2139  || ( $imagesexception && $imagematch )
2140  ) {
2141  if ( preg_match( self::EXT_IMAGE_REGEX, $url ) ) {
2142  # Image found
2143  $text = Linker::makeExternalImage( $url );
2144  }
2145  }
2146  if ( !$text && $this->mOptions->getEnableImageWhitelist()
2147  && preg_match( self::EXT_IMAGE_REGEX, $url )
2148  ) {
2149  $whitelist = explode(
2150  "\n",
2151  wfMessage( 'external_image_whitelist' )->inContentLanguage()->text()
2152  );
2153 
2154  foreach ( $whitelist as $entry ) {
2155  # Sanitize the regex fragment, make it case-insensitive, ignore blank entries/comments
2156  if ( strpos( $entry, '#' ) === 0 || $entry === '' ) {
2157  continue;
2158  }
2159  if ( preg_match( '/' . str_replace( '/', '\\/', $entry ) . '/i', $url ) ) {
2160  # Image matches a whitelist entry
2161  $text = Linker::makeExternalImage( $url );
2162  break;
2163  }
2164  }
2165  }
2166  return $text;
2167  }
2168 
2178  public function replaceInternalLinks( $s ) {
2179  $this->mLinkHolders->merge( $this->replaceInternalLinks2( $s ) );
2180  return $s;
2181  }
2182 
2191  public function replaceInternalLinks2( &$s ) {
2192  static $tc = false, $e1, $e1_img;
2193  # the % is needed to support urlencoded titles as well
2194  if ( !$tc ) {
2195  $tc = Title::legalChars() . '#%';
2196  # Match a link having the form [[namespace:link|alternate]]trail
2197  $e1 = "/^([{$tc}]+)(?:\\|(.+?))?]](.*)\$/sD";
2198  # Match cases where there is no "]]", which might still be images
2199  $e1_img = "/^([{$tc}]+)\\|(.*)\$/sD";
2200  }
2201 
2202  $holders = new LinkHolderArray( $this );
2203 
2204  # split the entire text string on occurrences of [[
2205  $a = StringUtils::explode( '[[', ' ' . $s );
2206  # get the first element (all text up to first [[), and remove the space we added
2207  $s = $a->current();
2208  $a->next();
2209  $line = $a->current(); # Workaround for broken ArrayIterator::next() that returns "void"
2210  $s = substr( $s, 1 );
2211 
2212  $useLinkPrefixExtension = $this->getTargetLanguage()->linkPrefixExtension();
2213  $e2 = null;
2214  if ( $useLinkPrefixExtension ) {
2215  # Match the end of a line for a word that's not followed by whitespace,
2216  # e.g. in the case of 'The Arab al[[Razi]]', 'al' will be matched
2217  $charset = $this->contLang->linkPrefixCharset();
2218  $e2 = "/^((?>.*[^$charset]|))(.+)$/sDu";
2219  }
2220 
2221  if ( is_null( $this->mTitle ) ) {
2222  throw new MWException( __METHOD__ . ": \$this->mTitle is null\n" );
2223  }
2224  $nottalk = !$this->mTitle->isTalkPage();
2225 
2226  if ( $useLinkPrefixExtension ) {
2227  $m = [];
2228  if ( preg_match( $e2, $s, $m ) ) {
2229  $first_prefix = $m[2];
2230  } else {
2231  $first_prefix = false;
2232  }
2233  } else {
2234  $prefix = '';
2235  }
2236 
2237  $useSubpages = $this->areSubpagesAllowed();
2238 
2239  # Loop for each link
2240  for ( ; $line !== false && $line !== null; $a->next(), $line = $a->current() ) {
2241  # Check for excessive memory usage
2242  if ( $holders->isBig() ) {
2243  # Too big
2244  # Do the existence check, replace the link holders and clear the array
2245  $holders->replace( $s );
2246  $holders->clear();
2247  }
2248 
2249  if ( $useLinkPrefixExtension ) {
2250  if ( preg_match( $e2, $s, $m ) ) {
2251  list( , $s, $prefix ) = $m;
2252  } else {
2253  $prefix = '';
2254  }
2255  # first link
2256  if ( $first_prefix ) {
2257  $prefix = $first_prefix;
2258  $first_prefix = false;
2259  }
2260  }
2261 
2262  $might_be_img = false;
2263 
2264  if ( preg_match( $e1, $line, $m ) ) { # page with normal text or alt
2265  $text = $m[2];
2266  # If we get a ] at the beginning of $m[3] that means we have a link that's something like:
2267  # [[Image:Foo.jpg|[http://example.com desc]]] <- having three ] in a row fucks up,
2268  # the real problem is with the $e1 regex
2269  # See T1500.
2270  # Still some problems for cases where the ] is meant to be outside punctuation,
2271  # and no image is in sight. See T4095.
2272  if ( $text !== ''
2273  && substr( $m[3], 0, 1 ) === ']'
2274  && strpos( $text, '[' ) !== false
2275  ) {
2276  $text .= ']'; # so that replaceExternalLinks($text) works later
2277  $m[3] = substr( $m[3], 1 );
2278  }
2279  # fix up urlencoded title texts
2280  if ( strpos( $m[1], '%' ) !== false ) {
2281  # Should anchors '#' also be rejected?
2282  $m[1] = str_replace( [ '<', '>' ], [ '&lt;', '&gt;' ], rawurldecode( $m[1] ) );
2283  }
2284  $trail = $m[3];
2285  } elseif ( preg_match( $e1_img, $line, $m ) ) {
2286  # Invalid, but might be an image with a link in its caption
2287  $might_be_img = true;
2288  $text = $m[2];
2289  if ( strpos( $m[1], '%' ) !== false ) {
2290  $m[1] = str_replace( [ '<', '>' ], [ '&lt;', '&gt;' ], rawurldecode( $m[1] ) );
2291  }
2292  $trail = "";
2293  } else { # Invalid form; output directly
2294  $s .= $prefix . '[[' . $line;
2295  continue;
2296  }
2297 
2298  $origLink = ltrim( $m[1], ' ' );
2299 
2300  # Don't allow internal links to pages containing
2301  # PROTO: where PROTO is a valid URL protocol; these
2302  # should be external links.
2303  if ( preg_match( '/^(?i:' . $this->mUrlProtocols . ')/', $origLink ) ) {
2304  $s .= $prefix . '[[' . $line;
2305  continue;
2306  }
2307 
2308  # Make subpage if necessary
2309  if ( $useSubpages ) {
2310  $link = $this->maybeDoSubpageLink( $origLink, $text );
2311  } else {
2312  $link = $origLink;
2313  }
2314 
2315  // \x7f isn't a default legal title char, so most likely strip
2316  // markers will force us into the "invalid form" path above. But,
2317  // just in case, let's assert that xmlish tags aren't valid in
2318  // the title position.
2319  $unstrip = $this->mStripState->killMarkers( $link );
2320  $noMarkers = ( $unstrip === $link );
2321 
2322  $nt = $noMarkers ? Title::newFromText( $link ) : null;
2323  if ( $nt === null ) {
2324  $s .= $prefix . '[[' . $line;
2325  continue;
2326  }
2327 
2328  $ns = $nt->getNamespace();
2329  $iw = $nt->getInterwiki();
2330 
2331  $noforce = ( substr( $origLink, 0, 1 ) !== ':' );
2332 
2333  if ( $might_be_img ) { # if this is actually an invalid link
2334  if ( $ns == NS_FILE && $noforce ) { # but might be an image
2335  $found = false;
2336  while ( true ) {
2337  # look at the next 'line' to see if we can close it there
2338  $a->next();
2339  $next_line = $a->current();
2340  if ( $next_line === false || $next_line === null ) {
2341  break;
2342  }
2343  $m = explode( ']]', $next_line, 3 );
2344  if ( count( $m ) == 3 ) {
2345  # the first ]] closes the inner link, the second the image
2346  $found = true;
2347  $text .= "[[{$m[0]}]]{$m[1]}";
2348  $trail = $m[2];
2349  break;
2350  } elseif ( count( $m ) == 2 ) {
2351  # if there's exactly one ]] that's fine, we'll keep looking
2352  $text .= "[[{$m[0]}]]{$m[1]}";
2353  } else {
2354  # if $next_line is invalid too, we need look no further
2355  $text .= '[[' . $next_line;
2356  break;
2357  }
2358  }
2359  if ( !$found ) {
2360  # we couldn't find the end of this imageLink, so output it raw
2361  # but don't ignore what might be perfectly normal links in the text we've examined
2362  $holders->merge( $this->replaceInternalLinks2( $text ) );
2363  $s .= "{$prefix}[[$link|$text";
2364  # note: no $trail, because without an end, there *is* no trail
2365  continue;
2366  }
2367  } else { # it's not an image, so output it raw
2368  $s .= "{$prefix}[[$link|$text";
2369  # note: no $trail, because without an end, there *is* no trail
2370  continue;
2371  }
2372  }
2373 
2374  $wasblank = ( $text == '' );
2375  if ( $wasblank ) {
2376  $text = $link;
2377  if ( !$noforce ) {
2378  # Strip off leading ':'
2379  $text = substr( $text, 1 );
2380  }
2381  } else {
2382  # T6598 madness. Handle the quotes only if they come from the alternate part
2383  # [[Lista d''e paise d''o munno]] -> <a href="...">Lista d''e paise d''o munno</a>
2384  # [[Criticism of Harry Potter|Criticism of ''Harry Potter'']]
2385  # -> <a href="Criticism of Harry Potter">Criticism of <i>Harry Potter</i></a>
2386  $text = $this->doQuotes( $text );
2387  }
2388 
2389  # Link not escaped by : , create the various objects
2390  if ( $noforce && !$nt->wasLocalInterwiki() ) {
2391  # Interwikis
2392  if (
2393  $iw && $this->mOptions->getInterwikiMagic() && $nottalk && (
2394  Language::fetchLanguageName( $iw, null, 'mw' ) ||
2395  in_array( $iw, $this->siteConfig->get( 'ExtraInterlanguageLinkPrefixes' ) )
2396  )
2397  ) {
2398  # T26502: filter duplicates
2399  if ( !isset( $this->mLangLinkLanguages[$iw] ) ) {
2400  $this->mLangLinkLanguages[$iw] = true;
2401  $this->mOutput->addLanguageLink( $nt->getFullText() );
2402  }
2403 
2407  $s = rtrim( $s . $prefix ) . $trail; # T175416
2408  continue;
2409  }
2410 
2411  if ( $ns == NS_FILE ) {
2412  if ( !wfIsBadImage( $nt->getDBkey(), $this->mTitle ) ) {
2413  if ( $wasblank ) {
2414  # if no parameters were passed, $text
2415  # becomes something like "File:Foo.png",
2416  # which we don't want to pass on to the
2417  # image generator
2418  $text = '';
2419  } else {
2420  # recursively parse links inside the image caption
2421  # actually, this will parse them in any other parameters, too,
2422  # but it might be hard to fix that, and it doesn't matter ATM
2423  $text = $this->replaceExternalLinks( $text );
2424  $holders->merge( $this->replaceInternalLinks2( $text ) );
2425  }
2426  # cloak any absolute URLs inside the image markup, so replaceExternalLinks() won't touch them
2427  $s .= $prefix . $this->armorLinks(
2428  $this->makeImage( $nt, $text, $holders ) ) . $trail;
2429  continue;
2430  }
2431  } elseif ( $ns == NS_CATEGORY ) {
2435  $s = rtrim( $s . $prefix ) . $trail; # T2087, T87753
2436 
2437  if ( $wasblank ) {
2438  $sortkey = $this->getDefaultSort();
2439  } else {
2440  $sortkey = $text;
2441  }
2442  $sortkey = Sanitizer::decodeCharReferences( $sortkey );
2443  $sortkey = str_replace( "\n", '', $sortkey );
2444  $sortkey = $this->getTargetLanguage()->convertCategoryKey( $sortkey );
2445  $this->mOutput->addCategory( $nt->getDBkey(), $sortkey );
2446 
2447  continue;
2448  }
2449  }
2450 
2451  # Self-link checking. For some languages, variants of the title are checked in
2452  # LinkHolderArray::doVariants() to allow batching the existence checks necessary
2453  # for linking to a different variant.
2454  if ( $ns != NS_SPECIAL && $nt->equals( $this->mTitle ) && !$nt->hasFragment() ) {
2455  $s .= $prefix . Linker::makeSelfLinkObj( $nt, $text, '', $trail );
2456  continue;
2457  }
2458 
2459  # NS_MEDIA is a pseudo-namespace for linking directly to a file
2460  # @todo FIXME: Should do batch file existence checks, see comment below
2461  if ( $ns == NS_MEDIA ) {
2462  # Give extensions a chance to select the file revision for us
2463  $options = [];
2464  $descQuery = false;
2465  Hooks::run( 'BeforeParserFetchFileAndTitle',
2466  [ $this, $nt, &$options, &$descQuery ] );
2467  # Fetch and register the file (file title may be different via hooks)
2468  list( $file, $nt ) = $this->fetchFileAndTitle( $nt, $options );
2469  # Cloak with NOPARSE to avoid replacement in replaceExternalLinks
2470  $s .= $prefix . $this->armorLinks(
2471  Linker::makeMediaLinkFile( $nt, $file, $text ) ) . $trail;
2472  continue;
2473  }
2474 
2475  # Some titles, such as valid special pages or files in foreign repos, should
2476  # be shown as bluelinks even though they're not included in the page table
2477  # @todo FIXME: isAlwaysKnown() can be expensive for file links; we should really do
2478  # batch file existence checks for NS_FILE and NS_MEDIA
2479  if ( $iw == '' && $nt->isAlwaysKnown() ) {
2480  $this->mOutput->addLink( $nt );
2481  $s .= $this->makeKnownLinkHolder( $nt, $text, $trail, $prefix );
2482  } else {
2483  # Links will be added to the output link list after checking
2484  $s .= $holders->makeHolder( $nt, $text, [], $trail, $prefix );
2485  }
2486  }
2487  return $holders;
2488  }
2489 
2503  protected function makeKnownLinkHolder( $nt, $text = '', $trail = '', $prefix = '' ) {
2504  list( $inside, $trail ) = Linker::splitTrail( $trail );
2505 
2506  if ( $text == '' ) {
2507  $text = htmlspecialchars( $nt->getPrefixedText() );
2508  }
2509 
2510  $link = $this->getLinkRenderer()->makeKnownLink(
2511  $nt, new HtmlArmor( "$prefix$text$inside" )
2512  );
2513 
2514  return $this->armorLinks( $link ) . $trail;
2515  }
2516 
2527  public function armorLinks( $text ) {
2528  return preg_replace( '/\b((?i)' . $this->mUrlProtocols . ')/',
2529  self::MARKER_PREFIX . "NOPARSE$1", $text );
2530  }
2531 
2536  public function areSubpagesAllowed() {
2537  # Some namespaces don't allow subpages
2538  return $this->nsInfo->hasSubpages( $this->mTitle->getNamespace() );
2539  }
2540 
2549  public function maybeDoSubpageLink( $target, &$text ) {
2550  return Linker::normalizeSubpageLink( $this->mTitle, $target, $text );
2551  }
2552 
2561  public function doBlockLevels( $text, $linestart ) {
2562  return BlockLevelPass::doBlockLevels( $text, $linestart );
2563  }
2564 
2576  public function getVariableValue( $index, $frame = false ) {
2577  if ( is_null( $this->mTitle ) ) {
2578  // If no title set, bad things are going to happen
2579  // later. Title should always be set since this
2580  // should only be called in the middle of a parse
2581  // operation (but the unit-tests do funky stuff)
2582  throw new MWException( __METHOD__ . ' Should only be '
2583  . ' called while parsing (no title set)' );
2584  }
2585 
2586  // Avoid PHP 7.1 warning from passing $this by reference
2587  $parser = $this;
2588 
2593  if (
2594  Hooks::run( 'ParserGetVariableValueVarCache', [ &$parser, &$this->mVarCache ] ) &&
2595  isset( $this->mVarCache[$index] )
2596  ) {
2597  return $this->mVarCache[$index];
2598  }
2599 
2600  $ts = wfTimestamp( TS_UNIX, $this->mOptions->getTimestamp() );
2601  Hooks::run( 'ParserGetVariableValueTs', [ &$parser, &$ts ] );
2602 
2603  $pageLang = $this->getFunctionLang();
2604 
2605  switch ( $index ) {
2606  case '!':
2607  $value = '|';
2608  break;
2609  case 'currentmonth':
2610  $value = $pageLang->formatNum( MWTimestamp::getInstance( $ts )->format( 'm' ), true );
2611  break;
2612  case 'currentmonth1':
2613  $value = $pageLang->formatNum( MWTimestamp::getInstance( $ts )->format( 'n' ), true );
2614  break;
2615  case 'currentmonthname':
2616  $value = $pageLang->getMonthName( MWTimestamp::getInstance( $ts )->format( 'n' ) );
2617  break;
2618  case 'currentmonthnamegen':
2619  $value = $pageLang->getMonthNameGen( MWTimestamp::getInstance( $ts )->format( 'n' ) );
2620  break;
2621  case 'currentmonthabbrev':
2622  $value = $pageLang->getMonthAbbreviation( MWTimestamp::getInstance( $ts )->format( 'n' ) );
2623  break;
2624  case 'currentday':
2625  $value = $pageLang->formatNum( MWTimestamp::getInstance( $ts )->format( 'j' ), true );
2626  break;
2627  case 'currentday2':
2628  $value = $pageLang->formatNum( MWTimestamp::getInstance( $ts )->format( 'd' ), true );
2629  break;
2630  case 'localmonth':
2631  $value = $pageLang->formatNum( MWTimestamp::getLocalInstance( $ts )->format( 'm' ), true );
2632  break;
2633  case 'localmonth1':
2634  $value = $pageLang->formatNum( MWTimestamp::getLocalInstance( $ts )->format( 'n' ), true );
2635  break;
2636  case 'localmonthname':
2637  $value = $pageLang->getMonthName( MWTimestamp::getLocalInstance( $ts )->format( 'n' ) );
2638  break;
2639  case 'localmonthnamegen':
2640  $value = $pageLang->getMonthNameGen( MWTimestamp::getLocalInstance( $ts )->format( 'n' ) );
2641  break;
2642  case 'localmonthabbrev':
2643  $value = $pageLang->getMonthAbbreviation( MWTimestamp::getLocalInstance( $ts )->format( 'n' ) );
2644  break;
2645  case 'localday':
2646  $value = $pageLang->formatNum( MWTimestamp::getLocalInstance( $ts )->format( 'j' ), true );
2647  break;
2648  case 'localday2':
2649  $value = $pageLang->formatNum( MWTimestamp::getLocalInstance( $ts )->format( 'd' ), true );
2650  break;
2651  case 'pagename':
2652  $value = wfEscapeWikiText( $this->mTitle->getText() );
2653  break;
2654  case 'pagenamee':
2655  $value = wfEscapeWikiText( $this->mTitle->getPartialURL() );
2656  break;
2657  case 'fullpagename':
2658  $value = wfEscapeWikiText( $this->mTitle->getPrefixedText() );
2659  break;
2660  case 'fullpagenamee':
2661  $value = wfEscapeWikiText( $this->mTitle->getPrefixedURL() );
2662  break;
2663  case 'subpagename':
2664  $value = wfEscapeWikiText( $this->mTitle->getSubpageText() );
2665  break;
2666  case 'subpagenamee':
2667  $value = wfEscapeWikiText( $this->mTitle->getSubpageUrlForm() );
2668  break;
2669  case 'rootpagename':
2670  $value = wfEscapeWikiText( $this->mTitle->getRootText() );
2671  break;
2672  case 'rootpagenamee':
2673  $value = wfEscapeWikiText( wfUrlencode( str_replace(
2674  ' ',
2675  '_',
2676  $this->mTitle->getRootText()
2677  ) ) );
2678  break;
2679  case 'basepagename':
2680  $value = wfEscapeWikiText( $this->mTitle->getBaseText() );
2681  break;
2682  case 'basepagenamee':
2683  $value = wfEscapeWikiText( wfUrlencode( str_replace(
2684  ' ',
2685  '_',
2686  $this->mTitle->getBaseText()
2687  ) ) );
2688  break;
2689  case 'talkpagename':
2690  if ( $this->mTitle->canHaveTalkPage() ) {
2691  $talkPage = $this->mTitle->getTalkPage();
2692  $value = wfEscapeWikiText( $talkPage->getPrefixedText() );
2693  } else {
2694  $value = '';
2695  }
2696  break;
2697  case 'talkpagenamee':
2698  if ( $this->mTitle->canHaveTalkPage() ) {
2699  $talkPage = $this->mTitle->getTalkPage();
2700  $value = wfEscapeWikiText( $talkPage->getPrefixedURL() );
2701  } else {
2702  $value = '';
2703  }
2704  break;
2705  case 'subjectpagename':
2706  $subjPage = $this->mTitle->getSubjectPage();
2707  $value = wfEscapeWikiText( $subjPage->getPrefixedText() );
2708  break;
2709  case 'subjectpagenamee':
2710  $subjPage = $this->mTitle->getSubjectPage();
2711  $value = wfEscapeWikiText( $subjPage->getPrefixedURL() );
2712  break;
2713  case 'pageid': // requested in T25427
2714  $pageid = $this->getTitle()->getArticleID();
2715  if ( $pageid == 0 ) {
2716  # 0 means the page doesn't exist in the database,
2717  # which means the user is previewing a new page.
2718  # The vary-revision flag must be set, because the magic word
2719  # will have a different value once the page is saved.
2720  $this->mOutput->setFlag( 'vary-revision' );
2721  wfDebug( __METHOD__ . ": {{PAGEID}} used in a new page, setting vary-revision...\n" );
2722  }
2723  $value = $pageid ?: null;
2724  break;
2725  case 'revisionid':
2726  if (
2727  $this->siteConfig->get( 'MiserMode' ) &&
2728  !$this->mOptions->getInterfaceMessage() &&
2729  // @TODO: disallow this word on all namespaces
2730  $this->nsInfo->isContent( $this->mTitle->getNamespace() )
2731  ) {
2732  // Use a stub result instead of the actual revision ID in order to avoid
2733  // double parses on page save but still allow preview detection (T137900)
2734  if ( $this->getRevisionId() || $this->mOptions->getSpeculativeRevId() ) {
2735  $value = '-';
2736  } else {
2737  $this->mOutput->setFlag( 'vary-revision-exists' );
2738  $value = '';
2739  }
2740  } else {
2741  # Inform the edit saving system that getting the canonical output after
2742  # revision insertion requires another parse using the actual revision ID
2743  $this->mOutput->setFlag( 'vary-revision-id' );
2744  wfDebug( __METHOD__ . ": {{REVISIONID}} used, setting vary-revision-id...\n" );
2745  $value = $this->getRevisionId();
2746  if ( $value === 0 ) {
2747  $rev = $this->getRevisionObject();
2748  $value = $rev ? $rev->getId() : $value;
2749  }
2750  if ( !$value ) {
2751  $value = $this->mOptions->getSpeculativeRevId();
2752  if ( $value ) {
2753  $this->mOutput->setSpeculativeRevIdUsed( $value );
2754  }
2755  }
2756  }
2757  break;
2758  case 'revisionday':
2759  $value = (int)$this->getRevisionTimestampSubstring( 6, 2, self::MAX_TTS, $index );
2760  break;
2761  case 'revisionday2':
2762  $value = $this->getRevisionTimestampSubstring( 6, 2, self::MAX_TTS, $index );
2763  break;
2764  case 'revisionmonth':
2765  $value = $this->getRevisionTimestampSubstring( 4, 2, self::MAX_TTS, $index );
2766  break;
2767  case 'revisionmonth1':
2768  $value = (int)$this->getRevisionTimestampSubstring( 4, 2, self::MAX_TTS, $index );
2769  break;
2770  case 'revisionyear':
2771  $value = $this->getRevisionTimestampSubstring( 0, 4, self::MAX_TTS, $index );
2772  break;
2773  case 'revisiontimestamp':
2774  # Let the edit saving system know we should parse the page
2775  # *after* a revision ID has been assigned. This is for null edits.
2776  $this->mOutput->setFlag( 'vary-revision' );
2777  wfDebug( __METHOD__ . ": {{REVISIONTIMESTAMP}} used, setting vary-revision...\n" );
2778  $value = $this->getRevisionTimestamp();
2779  break;
2780  case 'revisionuser':
2781  # Let the edit saving system know we should parse the page
2782  # *after* a revision ID has been assigned for null edits.
2783  $this->mOutput->setFlag( 'vary-user' );
2784  wfDebug( __METHOD__ . ": {{REVISIONUSER}} used, setting vary-user...\n" );
2785  $value = $this->getRevisionUser();
2786  break;
2787  case 'revisionsize':
2788  $value = $this->getRevisionSize();
2789  break;
2790  case 'namespace':
2791  $value = str_replace( '_', ' ',
2792  $this->contLang->getNsText( $this->mTitle->getNamespace() ) );
2793  break;
2794  case 'namespacee':
2795  $value = wfUrlencode( $this->contLang->getNsText( $this->mTitle->getNamespace() ) );
2796  break;
2797  case 'namespacenumber':
2798  $value = $this->mTitle->getNamespace();
2799  break;
2800  case 'talkspace':
2801  $value = $this->mTitle->canHaveTalkPage()
2802  ? str_replace( '_', ' ', $this->mTitle->getTalkNsText() )
2803  : '';
2804  break;
2805  case 'talkspacee':
2806  $value = $this->mTitle->canHaveTalkPage() ? wfUrlencode( $this->mTitle->getTalkNsText() ) : '';
2807  break;
2808  case 'subjectspace':
2809  $value = str_replace( '_', ' ', $this->mTitle->getSubjectNsText() );
2810  break;
2811  case 'subjectspacee':
2812  $value = ( wfUrlencode( $this->mTitle->getSubjectNsText() ) );
2813  break;
2814  case 'currentdayname':
2815  $value = $pageLang->getWeekdayName( (int)MWTimestamp::getInstance( $ts )->format( 'w' ) + 1 );
2816  break;
2817  case 'currentyear':
2818  $value = $pageLang->formatNum( MWTimestamp::getInstance( $ts )->format( 'Y' ), true );
2819  break;
2820  case 'currenttime':
2821  $value = $pageLang->time( wfTimestamp( TS_MW, $ts ), false, false );
2822  break;
2823  case 'currenthour':
2824  $value = $pageLang->formatNum( MWTimestamp::getInstance( $ts )->format( 'H' ), true );
2825  break;
2826  case 'currentweek':
2827  # @bug T6594 PHP5 has it zero padded, PHP4 does not, cast to
2828  # int to remove the padding
2829  $value = $pageLang->formatNum( (int)MWTimestamp::getInstance( $ts )->format( 'W' ) );
2830  break;
2831  case 'currentdow':
2832  $value = $pageLang->formatNum( MWTimestamp::getInstance( $ts )->format( 'w' ) );
2833  break;
2834  case 'localdayname':
2835  $value = $pageLang->getWeekdayName(
2836  (int)MWTimestamp::getLocalInstance( $ts )->format( 'w' ) + 1
2837  );
2838  break;
2839  case 'localyear':
2840  $value = $pageLang->formatNum( MWTimestamp::getLocalInstance( $ts )->format( 'Y' ), true );
2841  break;
2842  case 'localtime':
2843  $value = $pageLang->time(
2844  MWTimestamp::getLocalInstance( $ts )->format( 'YmdHis' ),
2845  false,
2846  false
2847  );
2848  break;
2849  case 'localhour':
2850  $value = $pageLang->formatNum( MWTimestamp::getLocalInstance( $ts )->format( 'H' ), true );
2851  break;
2852  case 'localweek':
2853  # @bug T6594 PHP5 has it zero padded, PHP4 does not, cast to
2854  # int to remove the padding
2855  $value = $pageLang->formatNum( (int)MWTimestamp::getLocalInstance( $ts )->format( 'W' ) );
2856  break;
2857  case 'localdow':
2858  $value = $pageLang->formatNum( MWTimestamp::getLocalInstance( $ts )->format( 'w' ) );
2859  break;
2860  case 'numberofarticles':
2861  $value = $pageLang->formatNum( SiteStats::articles() );
2862  break;
2863  case 'numberoffiles':
2864  $value = $pageLang->formatNum( SiteStats::images() );
2865  break;
2866  case 'numberofusers':
2867  $value = $pageLang->formatNum( SiteStats::users() );
2868  break;
2869  case 'numberofactiveusers':
2870  $value = $pageLang->formatNum( SiteStats::activeUsers() );
2871  break;
2872  case 'numberofpages':
2873  $value = $pageLang->formatNum( SiteStats::pages() );
2874  break;
2875  case 'numberofadmins':
2876  $value = $pageLang->formatNum( SiteStats::numberingroup( 'sysop' ) );
2877  break;
2878  case 'numberofedits':
2879  $value = $pageLang->formatNum( SiteStats::edits() );
2880  break;
2881  case 'currenttimestamp':
2882  $value = wfTimestamp( TS_MW, $ts );
2883  break;
2884  case 'localtimestamp':
2885  $value = MWTimestamp::getLocalInstance( $ts )->format( 'YmdHis' );
2886  break;
2887  case 'currentversion':
2889  break;
2890  case 'articlepath':
2891  return $this->siteConfig->get( 'ArticlePath' );
2892  case 'sitename':
2893  return $this->siteConfig->get( 'Sitename' );
2894  case 'server':
2895  return $this->siteConfig->get( 'Server' );
2896  case 'servername':
2897  return $this->siteConfig->get( 'ServerName' );
2898  case 'scriptpath':
2899  return $this->siteConfig->get( 'ScriptPath' );
2900  case 'stylepath':
2901  return $this->siteConfig->get( 'StylePath' );
2902  case 'directionmark':
2903  return $pageLang->getDirMark();
2904  case 'contentlanguage':
2905  return $this->siteConfig->get( 'LanguageCode' );
2906  case 'pagelanguage':
2907  $value = $pageLang->getCode();
2908  break;
2909  case 'cascadingsources':
2911  break;
2912  default:
2913  $ret = null;
2914  Hooks::run(
2915  'ParserGetVariableValueSwitch',
2916  [ &$parser, &$this->mVarCache, &$index, &$ret, &$frame ]
2917  );
2918 
2919  return $ret;
2920  }
2921 
2922  if ( $index ) {
2923  $this->mVarCache[$index] = $value;
2924  }
2925 
2926  return $value;
2927  }
2928 
2936  private function getRevisionTimestampSubstring( $start, $len, $mtts, $variable ) {
2937  # Get the timezone-adjusted timestamp to be used for this revision
2938  $resNow = substr( $this->getRevisionTimestamp(), $start, $len );
2939  # Possibly set vary-revision if there is not yet an associated revision
2940  if ( !$this->getRevisionObject() ) {
2941  # Get the timezone-adjusted timestamp $mtts seconds in the future
2942  $resThen = substr(
2943  $this->contLang->userAdjust( wfTimestamp( TS_MW, time() + $mtts ), '' ),
2944  $start,
2945  $len
2946  );
2947 
2948  if ( $resNow !== $resThen ) {
2949  # Let the edit saving system know we should parse the page
2950  # *after* a revision ID has been assigned. This is for null edits.
2951  $this->mOutput->setFlag( 'vary-revision' );
2952  wfDebug( __METHOD__ . ": $variable used, setting vary-revision...\n" );
2953  }
2954  }
2955 
2956  return $resNow;
2957  }
2958 
2964  public function initialiseVariables() {
2965  $variableIDs = $this->magicWordFactory->getVariableIDs();
2966  $substIDs = $this->magicWordFactory->getSubstIDs();
2967 
2968  $this->mVariables = $this->magicWordFactory->newArray( $variableIDs );
2969  $this->mSubstWords = $this->magicWordFactory->newArray( $substIDs );
2970  }
2971 
2994  public function preprocessToDom( $text, $flags = 0 ) {
2995  $dom = $this->getPreprocessor()->preprocessToObj( $text, $flags );
2996  return $dom;
2997  }
2998 
3006  public static function splitWhitespace( $s ) {
3007  $ltrimmed = ltrim( $s );
3008  $w1 = substr( $s, 0, strlen( $s ) - strlen( $ltrimmed ) );
3009  $trimmed = rtrim( $ltrimmed );
3010  $diff = strlen( $ltrimmed ) - strlen( $trimmed );
3011  if ( $diff > 0 ) {
3012  $w2 = substr( $ltrimmed, -$diff );
3013  } else {
3014  $w2 = '';
3015  }
3016  return [ $w1, $trimmed, $w2 ];
3017  }
3018 
3039  public function replaceVariables( $text, $frame = false, $argsOnly = false ) {
3040  # Is there any text? Also, Prevent too big inclusions!
3041  $textSize = strlen( $text );
3042  if ( $textSize < 1 || $textSize > $this->mOptions->getMaxIncludeSize() ) {
3043  return $text;
3044  }
3045 
3046  if ( $frame === false ) {
3047  $frame = $this->getPreprocessor()->newFrame();
3048  } elseif ( !( $frame instanceof PPFrame ) ) {
3049  wfDebug( __METHOD__ . " called using plain parameters instead of "
3050  . "a PPFrame instance. Creating custom frame.\n" );
3051  $frame = $this->getPreprocessor()->newCustomFrame( $frame );
3052  }
3053 
3054  $dom = $this->preprocessToDom( $text );
3055  $flags = $argsOnly ? PPFrame::NO_TEMPLATES : 0;
3056  $text = $frame->expand( $dom, $flags );
3057 
3058  return $text;
3059  }
3060 
3068  public static function createAssocArgs( $args ) {
3069  $assocArgs = [];
3070  $index = 1;
3071  foreach ( $args as $arg ) {
3072  $eqpos = strpos( $arg, '=' );
3073  if ( $eqpos === false ) {
3074  $assocArgs[$index++] = $arg;
3075  } else {
3076  $name = trim( substr( $arg, 0, $eqpos ) );
3077  $value = trim( substr( $arg, $eqpos + 1 ) );
3078  if ( $value === false ) {
3079  $value = '';
3080  }
3081  if ( $name !== false ) {
3082  $assocArgs[$name] = $value;
3083  }
3084  }
3085  }
3086 
3087  return $assocArgs;
3088  }
3089 
3116  public function limitationWarn( $limitationType, $current = '', $max = '' ) {
3117  # does no harm if $current and $max are present but are unnecessary for the message
3118  # Not doing ->inLanguage( $this->mOptions->getUserLangObj() ), since this is shown
3119  # only during preview, and that would split the parser cache unnecessarily.
3120  $warning = wfMessage( "$limitationType-warning" )->numParams( $current, $max )
3121  ->text();
3122  $this->mOutput->addWarning( $warning );
3123  $this->addTrackingCategory( "$limitationType-category" );
3124  }
3125 
3138  public function braceSubstitution( $piece, $frame ) {
3139  // Flags
3140 
3141  // $text has been filled
3142  $found = false;
3143  // wiki markup in $text should be escaped
3144  $nowiki = false;
3145  // $text is HTML, armour it against wikitext transformation
3146  $isHTML = false;
3147  // Force interwiki transclusion to be done in raw mode not rendered
3148  $forceRawInterwiki = false;
3149  // $text is a DOM node needing expansion in a child frame
3150  $isChildObj = false;
3151  // $text is a DOM node needing expansion in the current frame
3152  $isLocalObj = false;
3153 
3154  # Title object, where $text came from
3155  $title = false;
3156 
3157  # $part1 is the bit before the first |, and must contain only title characters.
3158  # Various prefixes will be stripped from it later.
3159  $titleWithSpaces = $frame->expand( $piece['title'] );
3160  $part1 = trim( $titleWithSpaces );
3161  $titleText = false;
3162 
3163  # Original title text preserved for various purposes
3164  $originalTitle = $part1;
3165 
3166  # $args is a list of argument nodes, starting from index 0, not including $part1
3167  # @todo FIXME: If piece['parts'] is null then the call to getLength()
3168  # below won't work b/c this $args isn't an object
3169  $args = ( $piece['parts'] == null ) ? [] : $piece['parts'];
3170 
3171  $profileSection = null; // profile templates
3172 
3173  # SUBST
3174  if ( !$found ) {
3175  $substMatch = $this->mSubstWords->matchStartAndRemove( $part1 );
3176 
3177  # Possibilities for substMatch: "subst", "safesubst" or FALSE
3178  # Decide whether to expand template or keep wikitext as-is.
3179  if ( $this->ot['wiki'] ) {
3180  if ( $substMatch === false ) {
3181  $literal = true; # literal when in PST with no prefix
3182  } else {
3183  $literal = false; # expand when in PST with subst: or safesubst:
3184  }
3185  } else {
3186  if ( $substMatch == 'subst' ) {
3187  $literal = true; # literal when not in PST with plain subst:
3188  } else {
3189  $literal = false; # expand when not in PST with safesubst: or no prefix
3190  }
3191  }
3192  if ( $literal ) {
3193  $text = $frame->virtualBracketedImplode( '{{', '|', '}}', $titleWithSpaces, $args );
3194  $isLocalObj = true;
3195  $found = true;
3196  }
3197  }
3198 
3199  # Variables
3200  if ( !$found && $args->getLength() == 0 ) {
3201  $id = $this->mVariables->matchStartToEnd( $part1 );
3202  if ( $id !== false ) {
3203  $text = $this->getVariableValue( $id, $frame );
3204  if ( $this->magicWordFactory->getCacheTTL( $id ) > -1 ) {
3205  $this->mOutput->updateCacheExpiry(
3206  $this->magicWordFactory->getCacheTTL( $id ) );
3207  }
3208  $found = true;
3209  }
3210  }
3211 
3212  # MSG, MSGNW and RAW
3213  if ( !$found ) {
3214  # Check for MSGNW:
3215  $mwMsgnw = $this->magicWordFactory->get( 'msgnw' );
3216  if ( $mwMsgnw->matchStartAndRemove( $part1 ) ) {
3217  $nowiki = true;
3218  } else {
3219  # Remove obsolete MSG:
3220  $mwMsg = $this->magicWordFactory->get( 'msg' );
3221  $mwMsg->matchStartAndRemove( $part1 );
3222  }
3223 
3224  # Check for RAW:
3225  $mwRaw = $this->magicWordFactory->get( 'raw' );
3226  if ( $mwRaw->matchStartAndRemove( $part1 ) ) {
3227  $forceRawInterwiki = true;
3228  }
3229  }
3230 
3231  # Parser functions
3232  if ( !$found ) {
3233  $colonPos = strpos( $part1, ':' );
3234  if ( $colonPos !== false ) {
3235  $func = substr( $part1, 0, $colonPos );
3236  $funcArgs = [ trim( substr( $part1, $colonPos + 1 ) ) ];
3237  $argsLength = $args->getLength();
3238  for ( $i = 0; $i < $argsLength; $i++ ) {
3239  $funcArgs[] = $args->item( $i );
3240  }
3241 
3242  $result = $this->callParserFunction( $frame, $func, $funcArgs );
3243 
3244  // Extract any forwarded flags
3245  if ( isset( $result['title'] ) ) {
3246  $title = $result['title'];
3247  }
3248  if ( isset( $result['found'] ) ) {
3249  $found = $result['found'];
3250  }
3251  if ( array_key_exists( 'text', $result ) ) {
3252  // a string or null
3253  $text = $result['text'];
3254  }
3255  if ( isset( $result['nowiki'] ) ) {
3256  $nowiki = $result['nowiki'];
3257  }
3258  if ( isset( $result['isHTML'] ) ) {
3259  $isHTML = $result['isHTML'];
3260  }
3261  if ( isset( $result['forceRawInterwiki'] ) ) {
3262  $forceRawInterwiki = $result['forceRawInterwiki'];
3263  }
3264  if ( isset( $result['isChildObj'] ) ) {
3265  $isChildObj = $result['isChildObj'];
3266  }
3267  if ( isset( $result['isLocalObj'] ) ) {
3268  $isLocalObj = $result['isLocalObj'];
3269  }
3270  }
3271  }
3272 
3273  # Finish mangling title and then check for loops.
3274  # Set $title to a Title object and $titleText to the PDBK
3275  if ( !$found ) {
3276  $ns = NS_TEMPLATE;
3277  # Split the title into page and subpage
3278  $subpage = '';
3279  $relative = $this->maybeDoSubpageLink( $part1, $subpage );
3280  if ( $part1 !== $relative ) {
3281  $part1 = $relative;
3282  $ns = $this->mTitle->getNamespace();
3283  }
3284  $title = Title::newFromText( $part1, $ns );
3285  if ( $title ) {
3286  $titleText = $title->getPrefixedText();
3287  # Check for language variants if the template is not found
3288  if ( $this->getTargetLanguage()->hasVariants() && $title->getArticleID() == 0 ) {
3289  $this->getTargetLanguage()->findVariantLink( $part1, $title, true );
3290  }
3291  # Do recursion depth check
3292  $limit = $this->mOptions->getMaxTemplateDepth();
3293  if ( $frame->depth >= $limit ) {
3294  $found = true;
3295  $text = '<span class="error">'
3296  . wfMessage( 'parser-template-recursion-depth-warning' )
3297  ->numParams( $limit )->inContentLanguage()->text()
3298  . '</span>';
3299  }
3300  }
3301  }
3302 
3303  # Load from database
3304  if ( !$found && $title ) {
3305  $profileSection = $this->mProfiler->scopedProfileIn( $title->getPrefixedDBkey() );
3306  if ( !$title->isExternal() ) {
3307  if ( $title->isSpecialPage()
3308  && $this->mOptions->getAllowSpecialInclusion()
3309  && $this->ot['html']
3310  ) {
3311  $specialPage = $this->specialPageFactory->getPage( $title->getDBkey() );
3312  // Pass the template arguments as URL parameters.
3313  // "uselang" will have no effect since the Language object
3314  // is forced to the one defined in ParserOptions.
3315  $pageArgs = [];
3316  $argsLength = $args->getLength();
3317  for ( $i = 0; $i < $argsLength; $i++ ) {
3318  $bits = $args->item( $i )->splitArg();
3319  if ( strval( $bits['index'] ) === '' ) {
3320  $name = trim( $frame->expand( $bits['name'], PPFrame::STRIP_COMMENTS ) );
3321  $value = trim( $frame->expand( $bits['value'] ) );
3322  $pageArgs[$name] = $value;
3323  }
3324  }
3325 
3326  // Create a new context to execute the special page
3327  $context = new RequestContext;
3328  $context->setTitle( $title );
3329  $context->setRequest( new FauxRequest( $pageArgs ) );
3330  if ( $specialPage && $specialPage->maxIncludeCacheTime() === 0 ) {
3331  $context->setUser( $this->getUser() );
3332  } else {
3333  // If this page is cached, then we better not be per user.
3334  $context->setUser( User::newFromName( '127.0.0.1', false ) );
3335  }
3336  $context->setLanguage( $this->mOptions->getUserLangObj() );
3337  $ret = $this->specialPageFactory->capturePath( $title, $context, $this->getLinkRenderer() );
3338  if ( $ret ) {
3339  $text = $context->getOutput()->getHTML();
3340  $this->mOutput->addOutputPageMetadata( $context->getOutput() );
3341  $found = true;
3342  $isHTML = true;
3343  if ( $specialPage && $specialPage->maxIncludeCacheTime() !== false ) {
3344  $this->mOutput->updateRuntimeAdaptiveExpiry(
3345  $specialPage->maxIncludeCacheTime()
3346  );
3347  }
3348  }
3349  } elseif ( $this->nsInfo->isNonincludable( $title->getNamespace() ) ) {
3350  $found = false; # access denied
3351  wfDebug( __METHOD__ . ": template inclusion denied for " .
3352  $title->getPrefixedDBkey() . "\n" );
3353  } else {
3354  list( $text, $title ) = $this->getTemplateDom( $title );
3355  if ( $text !== false ) {
3356  $found = true;
3357  $isChildObj = true;
3358  }
3359  }
3360 
3361  # If the title is valid but undisplayable, make a link to it
3362  if ( !$found && ( $this->ot['html'] || $this->ot['pre'] ) ) {
3363  $text = "[[:$titleText]]";
3364  $found = true;
3365  }
3366  } elseif ( $title->isTrans() ) {
3367  # Interwiki transclusion
3368  if ( $this->ot['html'] && !$forceRawInterwiki ) {
3369  $text = $this->interwikiTransclude( $title, 'render' );
3370  $isHTML = true;
3371  } else {
3372  $text = $this->interwikiTransclude( $title, 'raw' );
3373  # Preprocess it like a template
3374  $text = $this->preprocessToDom( $text, self::PTD_FOR_INCLUSION );
3375  $isChildObj = true;
3376  }
3377  $found = true;
3378  }
3379 
3380  # Do infinite loop check
3381  # This has to be done after redirect resolution to avoid infinite loops via redirects
3382  if ( !$frame->loopCheck( $title ) ) {
3383  $found = true;
3384  $text = '<span class="error">'
3385  . wfMessage( 'parser-template-loop-warning', $titleText )->inContentLanguage()->text()
3386  . '</span>';
3387  $this->addTrackingCategory( 'template-loop-category' );
3388  $this->mOutput->addWarning( wfMessage( 'template-loop-warning',
3389  wfEscapeWikiText( $titleText ) )->text() );
3390  wfDebug( __METHOD__ . ": template loop broken at '$titleText'\n" );
3391  }
3392  }
3393 
3394  # If we haven't found text to substitute by now, we're done
3395  # Recover the source wikitext and return it
3396  if ( !$found ) {
3397  $text = $frame->virtualBracketedImplode( '{{', '|', '}}', $titleWithSpaces, $args );
3398  if ( $profileSection ) {
3399  $this->mProfiler->scopedProfileOut( $profileSection );
3400  }
3401  return [ 'object' => $text ];
3402  }
3403 
3404  # Expand DOM-style return values in a child frame
3405  if ( $isChildObj ) {
3406  # Clean up argument array
3407  $newFrame = $frame->newChild( $args, $title );
3408 
3409  if ( $nowiki ) {
3410  $text = $newFrame->expand( $text, PPFrame::RECOVER_ORIG );
3411  } elseif ( $titleText !== false && $newFrame->isEmpty() ) {
3412  # Expansion is eligible for the empty-frame cache
3413  $text = $newFrame->cachedExpand( $titleText, $text );
3414  } else {
3415  # Uncached expansion
3416  $text = $newFrame->expand( $text );
3417  }
3418  }
3419  if ( $isLocalObj && $nowiki ) {
3420  $text = $frame->expand( $text, PPFrame::RECOVER_ORIG );
3421  $isLocalObj = false;
3422  }
3423 
3424  if ( $profileSection ) {
3425  $this->mProfiler->scopedProfileOut( $profileSection );
3426  }
3427 
3428  # Replace raw HTML by a placeholder
3429  if ( $isHTML ) {
3430  $text = $this->insertStripItem( $text );
3431  } elseif ( $nowiki && ( $this->ot['html'] || $this->ot['pre'] ) ) {
3432  # Escape nowiki-style return values
3433  $text = wfEscapeWikiText( $text );
3434  } elseif ( is_string( $text )
3435  && !$piece['lineStart']
3436  && preg_match( '/^(?:{\\||:|;|#|\*)/', $text )
3437  ) {
3438  # T2529: if the template begins with a table or block-level
3439  # element, it should be treated as beginning a new line.
3440  # This behavior is somewhat controversial.
3441  $text = "\n" . $text;
3442  }
3443 
3444  if ( is_string( $text ) && !$this->incrementIncludeSize( 'post-expand', strlen( $text ) ) ) {
3445  # Error, oversize inclusion
3446  if ( $titleText !== false ) {
3447  # Make a working, properly escaped link if possible (T25588)
3448  $text = "[[:$titleText]]";
3449  } else {
3450  # This will probably not be a working link, but at least it may
3451  # provide some hint of where the problem is
3452  preg_replace( '/^:/', '', $originalTitle );
3453  $text = "[[:$originalTitle]]";
3454  }
3455  $text .= $this->insertStripItem( '<!-- WARNING: template omitted, '
3456  . 'post-expand include size too large -->' );
3457  $this->limitationWarn( 'post-expand-template-inclusion' );
3458  }
3459 
3460  if ( $isLocalObj ) {
3461  $ret = [ 'object' => $text ];
3462  } else {
3463  $ret = [ 'text' => $text ];
3464  }
3465 
3466  return $ret;
3467  }
3468 
3488  public function callParserFunction( $frame, $function, array $args = [] ) {
3489  # Case sensitive functions
3490  if ( isset( $this->mFunctionSynonyms[1][$function] ) ) {
3491  $function = $this->mFunctionSynonyms[1][$function];
3492  } else {
3493  # Case insensitive functions
3494  $function = $this->contLang->lc( $function );
3495  if ( isset( $this->mFunctionSynonyms[0][$function] ) ) {
3496  $function = $this->mFunctionSynonyms[0][$function];
3497  } else {
3498  return [ 'found' => false ];
3499  }
3500  }
3501 
3502  list( $callback, $flags ) = $this->mFunctionHooks[$function];
3503 
3504  // Avoid PHP 7.1 warning from passing $this by reference
3505  $parser = $this;
3506 
3507  $allArgs = [ &$parser ];
3508  if ( $flags & self::SFH_OBJECT_ARGS ) {
3509  # Convert arguments to PPNodes and collect for appending to $allArgs
3510  $funcArgs = [];
3511  foreach ( $args as $k => $v ) {
3512  if ( $v instanceof PPNode || $k === 0 ) {
3513  $funcArgs[] = $v;
3514  } else {
3515  $funcArgs[] = $this->mPreprocessor->newPartNodeArray( [ $k => $v ] )->item( 0 );
3516  }
3517  }
3518 
3519  # Add a frame parameter, and pass the arguments as an array
3520  $allArgs[] = $frame;
3521  $allArgs[] = $funcArgs;
3522  } else {
3523  # Convert arguments to plain text and append to $allArgs
3524  foreach ( $args as $k => $v ) {
3525  if ( $v instanceof PPNode ) {
3526  $allArgs[] = trim( $frame->expand( $v ) );
3527  } elseif ( is_int( $k ) && $k >= 0 ) {
3528  $allArgs[] = trim( $v );
3529  } else {
3530  $allArgs[] = trim( "$k=$v" );
3531  }
3532  }
3533  }
3534 
3535  $result = $callback( ...$allArgs );
3536 
3537  # The interface for function hooks allows them to return a wikitext
3538  # string or an array containing the string and any flags. This mungs
3539  # things around to match what this method should return.
3540  if ( !is_array( $result ) ) {
3541  $result = [
3542  'found' => true,
3543  'text' => $result,
3544  ];
3545  } else {
3546  if ( isset( $result[0] ) && !isset( $result['text'] ) ) {
3547  $result['text'] = $result[0];
3548  }
3549  unset( $result[0] );
3550  $result += [
3551  'found' => true,
3552  ];
3553  }
3554 
3555  $noparse = true;
3556  $preprocessFlags = 0;
3557  if ( isset( $result['noparse'] ) ) {
3558  $noparse = $result['noparse'];
3559  }
3560  if ( isset( $result['preprocessFlags'] ) ) {
3561  $preprocessFlags = $result['preprocessFlags'];
3562  }
3563 
3564  if ( !$noparse ) {
3565  $result['text'] = $this->preprocessToDom( $result['text'], $preprocessFlags );
3566  $result['isChildObj'] = true;
3567  }
3568 
3569  return $result;
3570  }
3571 
3580  public function getTemplateDom( $title ) {
3581  $cacheTitle = $title;
3582  $titleText = $title->getPrefixedDBkey();
3583 
3584  if ( isset( $this->mTplRedirCache[$titleText] ) ) {
3585  list( $ns, $dbk ) = $this->mTplRedirCache[$titleText];
3586  $title = Title::makeTitle( $ns, $dbk );
3587  $titleText = $title->getPrefixedDBkey();
3588  }
3589  if ( isset( $this->mTplDomCache[$titleText] ) ) {
3590  return [ $this->mTplDomCache[$titleText], $title ];
3591  }
3592 
3593  # Cache miss, go to the database
3594  list( $text, $title ) = $this->fetchTemplateAndTitle( $title );
3595 
3596  if ( $text === false ) {
3597  $this->mTplDomCache[$titleText] = false;
3598  return [ false, $title ];
3599  }
3600 
3601  $dom = $this->preprocessToDom( $text, self::PTD_FOR_INCLUSION );
3602  $this->mTplDomCache[$titleText] = $dom;
3603 
3604  if ( !$title->equals( $cacheTitle ) ) {
3605  $this->mTplRedirCache[$cacheTitle->getPrefixedDBkey()] =
3606  [ $title->getNamespace(), $title->getDBkey() ];
3607  }
3608 
3609  return [ $dom, $title ];
3610  }
3611 
3624  $cacheKey = $title->getPrefixedDBkey();
3625  if ( !$this->currentRevisionCache ) {
3626  $this->currentRevisionCache = new MapCacheLRU( 100 );
3627  }
3628  if ( !$this->currentRevisionCache->has( $cacheKey ) ) {
3629  $this->currentRevisionCache->set( $cacheKey,
3630  // Defaults to Parser::statelessFetchRevision()
3631  call_user_func( $this->mOptions->getCurrentRevisionCallback(), $title, $this )
3632  );
3633  }
3634  return $this->currentRevisionCache->get( $cacheKey );
3635  }
3636 
3646  public static function statelessFetchRevision( Title $title, $parser = false ) {
3648 
3649  return $rev;
3650  }
3651 
3657  public function fetchTemplateAndTitle( $title ) {
3658  // Defaults to Parser::statelessFetchTemplate()
3659  $templateCb = $this->mOptions->getTemplateCallback();
3660  $stuff = call_user_func( $templateCb, $title, $this );
3661  // We use U+007F DELETE to distinguish strip markers from regular text.
3662  $text = $stuff['text'];
3663  if ( is_string( $stuff['text'] ) ) {
3664  $text = strtr( $text, "\x7f", "?" );
3665  }
3666  $finalTitle = $stuff['finalTitle'] ?? $title;
3667  if ( isset( $stuff['deps'] ) ) {
3668  foreach ( $stuff['deps'] as $dep ) {
3669  $this->mOutput->addTemplate( $dep['title'], $dep['page_id'], $dep['rev_id'] );
3670  if ( $dep['title']->equals( $this->getTitle() ) ) {
3671  // If we transclude ourselves, the final result
3672  // will change based on the new version of the page
3673  $this->mOutput->setFlag( 'vary-revision' );
3674  }
3675  }
3676  }
3677  return [ $text, $finalTitle ];
3678  }
3679 
3685  public function fetchTemplate( $title ) {
3686  return $this->fetchTemplateAndTitle( $title )[0];
3687  }
3688 
3698  public static function statelessFetchTemplate( $title, $parser = false ) {
3699  $text = $skip = false;
3700  $finalTitle = $title;
3701  $deps = [];
3702 
3703  # Loop to fetch the article, with up to 1 redirect
3704  for ( $i = 0; $i < 2 && is_object( $title ); $i++ ) {
3705  # Give extensions a chance to select the revision instead
3706  $id = false; # Assume current
3707  Hooks::run( 'BeforeParserFetchTemplateAndtitle',
3708  [ $parser, $title, &$skip, &$id ] );
3709 
3710  if ( $skip ) {
3711  $text = false;
3712  $deps[] = [
3713  'title' => $title,
3714  'page_id' => $title->getArticleID(),
3715  'rev_id' => null
3716  ];
3717  break;
3718  }
3719  # Get the revision
3720  if ( $id ) {
3721  $rev = Revision::newFromId( $id );
3722  } elseif ( $parser ) {
3723  $rev = $parser->fetchCurrentRevisionOfTitle( $title );
3724  } else {
3726  }
3727  $rev_id = $rev ? $rev->getId() : 0;
3728  # If there is no current revision, there is no page
3729  if ( $id === false && !$rev ) {
3730  $linkCache = MediaWikiServices::getInstance()->getLinkCache();
3731  $linkCache->addBadLinkObj( $title );
3732  }
3733 
3734  $deps[] = [
3735  'title' => $title,
3736  'page_id' => $title->getArticleID(),
3737  'rev_id' => $rev_id ];
3738  if ( $rev && !$title->equals( $rev->getTitle() ) ) {
3739  # We fetched a rev from a different title; register it too...
3740  $deps[] = [
3741  'title' => $rev->getTitle(),
3742  'page_id' => $rev->getPage(),
3743  'rev_id' => $rev_id ];
3744  }
3745 
3746  if ( $rev ) {
3747  $content = $rev->getContent();
3748  $text = $content ? $content->getWikitextForTransclusion() : null;
3749 
3750  Hooks::run( 'ParserFetchTemplate',
3751  [ $parser, $title, $rev, &$text, &$deps ] );
3752 
3753  if ( $text === false || $text === null ) {
3754  $text = false;
3755  break;
3756  }
3757  } elseif ( $title->getNamespace() == NS_MEDIAWIKI ) {
3758  $message = wfMessage( MediaWikiServices::getInstance()->getContentLanguage()->
3759  lcfirst( $title->getText() ) )->inContentLanguage();
3760  if ( !$message->exists() ) {
3761  $text = false;
3762  break;
3763  }
3764  $content = $message->content();
3765  $text = $message->plain();
3766  } else {
3767  break;
3768  }
3769  if ( !$content ) {
3770  break;
3771  }
3772  # Redirect?
3773  $finalTitle = $title;
3774  $title = $content->getRedirectTarget();
3775  }
3776  return [
3777  'text' => $text,
3778  'finalTitle' => $finalTitle,
3779  'deps' => $deps ];
3780  }
3781 
3790  public function fetchFile( $title, $options = [] ) {
3791  wfDeprecated( __METHOD__, '1.32' );
3792  return $this->fetchFileAndTitle( $title, $options )[0];
3793  }
3794 
3802  public function fetchFileAndTitle( $title, $options = [] ) {
3804 
3805  $time = $file ? $file->getTimestamp() : false;
3806  $sha1 = $file ? $file->getSha1() : false;
3807  # Register the file as a dependency...
3808  $this->mOutput->addImage( $title->getDBkey(), $time, $sha1 );
3809  if ( $file && !$title->equals( $file->getTitle() ) ) {
3810  # Update fetched file title
3811  $title = $file->getTitle();
3812  $this->mOutput->addImage( $title->getDBkey(), $time, $sha1 );
3813  }
3814  return [ $file, $title ];
3815  }
3816 
3827  protected function fetchFileNoRegister( $title, $options = [] ) {
3828  if ( isset( $options['broken'] ) ) {
3829  $file = false; // broken thumbnail forced by hook
3830  } elseif ( isset( $options['sha1'] ) ) { // get by (sha1,timestamp)
3831  $file = RepoGroup::singleton()->findFileFromKey( $options['sha1'], $options );
3832  } else { // get by (name,timestamp)
3834  }
3835  return $file;
3836  }
3837 
3846  public function interwikiTransclude( $title, $action ) {
3847  if ( !$this->siteConfig->get( 'EnableScaryTranscluding' ) ) {
3848  return wfMessage( 'scarytranscludedisabled' )->inContentLanguage()->text();
3849  }
3850 
3851  $url = $title->getFullURL( [ 'action' => $action ] );
3852  if ( strlen( $url ) > 1024 ) {
3853  return wfMessage( 'scarytranscludetoolong' )->inContentLanguage()->text();
3854  }
3855 
3856  $wikiId = $title->getTransWikiID(); // remote wiki ID or false
3857 
3858  $fname = __METHOD__;
3859  $cache = MediaWikiServices::getInstance()->getMainWANObjectCache();
3860 
3861  $data = $cache->getWithSetCallback(
3862  $cache->makeGlobalKey(
3863  'interwiki-transclude',
3864  ( $wikiId !== false ) ? $wikiId : 'external',
3865  sha1( $url )
3866  ),
3867  $this->siteConfig->get( 'TranscludeCacheExpiry' ),
3868  function ( $oldValue, &$ttl ) use ( $url, $fname, $cache ) {
3869  $req = MWHttpRequest::factory( $url, [], $fname );
3870 
3871  $status = $req->execute(); // Status object
3872  if ( !$status->isOK() ) {
3873  $ttl = $cache::TTL_UNCACHEABLE;
3874  } elseif ( $req->getResponseHeader( 'X-Database-Lagged' ) !== null ) {
3875  $ttl = min( $cache::TTL_LAGGED, $ttl );
3876  }
3877 
3878  return [
3879  'text' => $status->isOK() ? $req->getContent() : null,
3880  'code' => $req->getStatus()
3881  ];
3882  },
3883  [
3884  'checkKeys' => ( $wikiId !== false )
3885  ? [ $cache->makeGlobalKey( 'interwiki-page', $wikiId, $title->getDBkey() ) ]
3886  : [],
3887  'pcGroup' => 'interwiki-transclude:5',
3888  'pcTTL' => $cache::TTL_PROC_LONG
3889  ]
3890  );
3891 
3892  if ( is_string( $data['text'] ) ) {
3893  $text = $data['text'];
3894  } elseif ( $data['code'] != 200 ) {
3895  // Though we failed to fetch the content, this status is useless.
3896  $text = wfMessage( 'scarytranscludefailed-httpstatus' )
3897  ->params( $url, $data['code'] )->inContentLanguage()->text();
3898  } else {
3899  $text = wfMessage( 'scarytranscludefailed', $url )->inContentLanguage()->text();
3900  }
3901 
3902  return $text;
3903  }
3904 
3914  public function argSubstitution( $piece, $frame ) {
3915  $error = false;
3916  $parts = $piece['parts'];
3917  $nameWithSpaces = $frame->expand( $piece['title'] );
3918  $argName = trim( $nameWithSpaces );
3919  $object = false;
3920  $text = $frame->getArgument( $argName );
3921  if ( $text === false && $parts->getLength() > 0
3922  && ( $this->ot['html']
3923  || $this->ot['pre']
3924  || ( $this->ot['wiki'] && $frame->isTemplate() )
3925  )
3926  ) {
3927  # No match in frame, use the supplied default
3928  $object = $parts->item( 0 )->getChildren();
3929  }
3930  if ( !$this->incrementIncludeSize( 'arg', strlen( $text ) ) ) {
3931  $error = '<!-- WARNING: argument omitted, expansion size too large -->';
3932  $this->limitationWarn( 'post-expand-template-argument' );
3933  }
3934 
3935  if ( $text === false && $object === false ) {
3936  # No match anywhere
3937  $object = $frame->virtualBracketedImplode( '{{{', '|', '}}}', $nameWithSpaces, $parts );
3938  }
3939  if ( $error !== false ) {
3940  $text .= $error;
3941  }
3942  if ( $object !== false ) {
3943  $ret = [ 'object' => $object ];
3944  } else {
3945  $ret = [ 'text' => $text ];
3946  }
3947 
3948  return $ret;
3949  }
3950 
3966  public function extensionSubstitution( $params, $frame ) {
3967  static $errorStr = '<span class="error">';
3968  static $errorLen = 20;
3969 
3970  $name = $frame->expand( $params['name'] );
3971  if ( substr( $name, 0, $errorLen ) === $errorStr ) {
3972  // Probably expansion depth or node count exceeded. Just punt the
3973  // error up.
3974  return $name;
3975  }
3976 
3977  $attrText = !isset( $params['attr'] ) ? null : $frame->expand( $params['attr'] );
3978  if ( substr( $attrText, 0, $errorLen ) === $errorStr ) {
3979  // See above
3980  return $attrText;
3981  }
3982 
3983  // We can't safely check if the expansion for $content resulted in an
3984  // error, because the content could happen to be the error string
3985  // (T149622).
3986  $content = !isset( $params['inner'] ) ? null : $frame->expand( $params['inner'] );
3987 
3988  $marker = self::MARKER_PREFIX . "-$name-"
3989  . sprintf( '%08X', $this->mMarkerIndex++ ) . self::MARKER_SUFFIX;
3990 
3991  $isFunctionTag = isset( $this->mFunctionTagHooks[strtolower( $name )] ) &&
3992  ( $this->ot['html'] || $this->ot['pre'] );
3993  if ( $isFunctionTag ) {
3994  $markerType = 'none';
3995  } else {
3996  $markerType = 'general';
3997  }
3998  if ( $this->ot['html'] || $isFunctionTag ) {
3999  $name = strtolower( $name );
4000  $attributes = Sanitizer::decodeTagAttributes( $attrText );
4001  if ( isset( $params['attributes'] ) ) {
4002  $attributes += $params['attributes'];
4003  }
4004 
4005  if ( isset( $this->mTagHooks[$name] ) ) {
4006  $output = call_user_func_array( $this->mTagHooks[$name],
4007  [ $content, $attributes, $this, $frame ] );
4008  } elseif ( isset( $this->mFunctionTagHooks[$name] ) ) {
4009  list( $callback, ) = $this->mFunctionTagHooks[$name];
4010 
4011  // Avoid PHP 7.1 warning from passing $this by reference
4012  $parser = $this;
4013  $output = call_user_func_array( $callback, [ &$parser, $frame, $content, $attributes ] );
4014  } else {
4015  $output = '<span class="error">Invalid tag extension name: ' .
4016  htmlspecialchars( $name ) . '</span>';
4017  }
4018 
4019  if ( is_array( $output ) ) {
4020  // Extract flags
4021  $flags = $output;
4022  $output = $flags[0];
4023  if ( isset( $flags['markerType'] ) ) {
4024  $markerType = $flags['markerType'];
4025  }
4026  }
4027  } else {
4028  if ( is_null( $attrText ) ) {
4029  $attrText = '';
4030  }
4031  if ( isset( $params['attributes'] ) ) {
4032  foreach ( $params['attributes'] as $attrName => $attrValue ) {
4033  $attrText .= ' ' . htmlspecialchars( $attrName ) . '="' .
4034  htmlspecialchars( $attrValue ) . '"';
4035  }
4036  }
4037  if ( $content === null ) {
4038  $output = "<$name$attrText/>";
4039  } else {
4040  $close = is_null( $params['close'] ) ? '' : $frame->expand( $params['close'] );
4041  if ( substr( $close, 0, $errorLen ) === $errorStr ) {
4042  // See above
4043  return $close;
4044  }
4045  $output = "<$name$attrText>$content$close";
4046  }
4047  }
4048 
4049  if ( $markerType === 'none' ) {
4050  return $output;
4051  } elseif ( $markerType === 'nowiki' ) {
4052  $this->mStripState->addNoWiki( $marker, $output );
4053  } elseif ( $markerType === 'general' ) {
4054  $this->mStripState->addGeneral( $marker, $output );
4055  } else {
4056  throw new MWException( __METHOD__ . ': invalid marker type' );
4057  }
4058  return $marker;
4059  }
4060 
4068  public function incrementIncludeSize( $type, $size ) {
4069  if ( $this->mIncludeSizes[$type] + $size > $this->mOptions->getMaxIncludeSize() ) {
4070  return false;
4071  } else {
4072  $this->mIncludeSizes[$type] += $size;
4073  return true;
4074  }
4075  }
4076 
4083  $this->mExpensiveFunctionCount++;
4084  return $this->mExpensiveFunctionCount <= $this->mOptions->getExpensiveParserFunctionLimit();
4085  }
4086 
4095  public function doDoubleUnderscore( $text ) {
4096  # The position of __TOC__ needs to be recorded
4097  $mw = $this->magicWordFactory->get( 'toc' );
4098  if ( $mw->match( $text ) ) {
4099  $this->mShowToc = true;
4100  $this->mForceTocPosition = true;
4101 
4102  # Set a placeholder. At the end we'll fill it in with the TOC.
4103  $text = $mw->replace( '<!--MWTOC\'"-->', $text, 1 );
4104 
4105  # Only keep the first one.
4106  $text = $mw->replace( '', $text );
4107  }
4108 
4109  # Now match and remove the rest of them
4110  $mwa = $this->magicWordFactory->getDoubleUnderscoreArray();
4111  $this->mDoubleUnderscores = $mwa->matchAndRemove( $text );
4112 
4113  if ( isset( $this->mDoubleUnderscores['nogallery'] ) ) {
4114  $this->mOutput->mNoGallery = true;
4115  }
4116  if ( isset( $this->mDoubleUnderscores['notoc'] ) && !$this->mForceTocPosition ) {
4117  $this->mShowToc = false;
4118  }
4119  if ( isset( $this->mDoubleUnderscores['hiddencat'] )
4120  && $this->mTitle->getNamespace() == NS_CATEGORY
4121  ) {
4122  $this->addTrackingCategory( 'hidden-category-category' );
4123  }
4124  # (T10068) Allow control over whether robots index a page.
4125  # __INDEX__ always overrides __NOINDEX__, see T16899
4126  if ( isset( $this->mDoubleUnderscores['noindex'] ) && $this->mTitle->canUseNoindex() ) {
4127  $this->mOutput->setIndexPolicy( 'noindex' );
4128  $this->addTrackingCategory( 'noindex-category' );
4129  }
4130  if ( isset( $this->mDoubleUnderscores['index'] ) && $this->mTitle->canUseNoindex() ) {
4131  $this->mOutput->setIndexPolicy( 'index' );
4132  $this->addTrackingCategory( 'index-category' );
4133  }
4134 
4135  # Cache all double underscores in the database
4136  foreach ( $this->mDoubleUnderscores as $key => $val ) {
4137  $this->mOutput->setProperty( $key, '' );
4138  }
4139 
4140  return $text;
4141  }
4142 
4148  public function addTrackingCategory( $msg ) {
4149  return $this->mOutput->addTrackingCategory( $msg, $this->mTitle );
4150  }
4151 
4168  public function formatHeadings( $text, $origText, $isMain = true ) {
4169  # Inhibit editsection links if requested in the page
4170  if ( isset( $this->mDoubleUnderscores['noeditsection'] ) ) {
4171  $maybeShowEditLink = false;
4172  } else {
4173  $maybeShowEditLink = true; /* Actual presence will depend on post-cache transforms */
4174  }
4175 
4176  # Get all headlines for numbering them and adding funky stuff like [edit]
4177  # links - this is for later, but we need the number of headlines right now
4178  # NOTE: white space in headings have been trimmed in doHeadings. They shouldn't
4179  # be trimmed here since whitespace in HTML headings is significant.
4180  $matches = [];
4181  $numMatches = preg_match_all(
4182  '/<H(?P<level>[1-6])(?P<attrib>.*?>)(?P<header>[\s\S]*?)<\/H[1-6] *>/i',
4183  $text,
4184  $matches
4185  );
4186 
4187  # if there are fewer than 4 headlines in the article, do not show TOC
4188  # unless it's been explicitly enabled.
4189  $enoughToc = $this->mShowToc &&
4190  ( ( $numMatches >= 4 ) || $this->mForceTocPosition );
4191 
4192  # Allow user to stipulate that a page should have a "new section"
4193  # link added via __NEWSECTIONLINK__
4194  if ( isset( $this->mDoubleUnderscores['newsectionlink'] ) ) {
4195  $this->mOutput->setNewSection( true );
4196  }
4197 
4198  # Allow user to remove the "new section"
4199  # link via __NONEWSECTIONLINK__
4200  if ( isset( $this->mDoubleUnderscores['nonewsectionlink'] ) ) {
4201  $this->mOutput->hideNewSection( true );
4202  }
4203 
4204  # if the string __FORCETOC__ (not case-sensitive) occurs in the HTML,
4205  # override above conditions and always show TOC above first header
4206  if ( isset( $this->mDoubleUnderscores['forcetoc'] ) ) {
4207  $this->mShowToc = true;
4208  $enoughToc = true;
4209  }
4210 
4211  # headline counter
4212  $headlineCount = 0;
4213  $numVisible = 0;
4214 
4215  # Ugh .. the TOC should have neat indentation levels which can be
4216  # passed to the skin functions. These are determined here
4217  $toc = '';
4218  $full = '';
4219  $head = [];
4220  $sublevelCount = [];
4221  $levelCount = [];
4222  $level = 0;
4223  $prevlevel = 0;
4224  $toclevel = 0;
4225  $prevtoclevel = 0;
4226  $markerRegex = self::MARKER_PREFIX . "-h-(\d+)-" . self::MARKER_SUFFIX;
4227  $baseTitleText = $this->mTitle->getPrefixedDBkey();
4228  $oldType = $this->mOutputType;
4229  $this->setOutputType( self::OT_WIKI );
4230  $frame = $this->getPreprocessor()->newFrame();
4231  $root = $this->preprocessToDom( $origText );
4232  $node = $root->getFirstChild();
4233  $byteOffset = 0;
4234  $tocraw = [];
4235  $refers = [];
4236 
4237  $headlines = $numMatches !== false ? $matches[3] : [];
4238 
4239  $maxTocLevel = $this->siteConfig->get( 'MaxTocLevel' );
4240  foreach ( $headlines as $headline ) {
4241  $isTemplate = false;
4242  $titleText = false;
4243  $sectionIndex = false;
4244  $numbering = '';
4245  $markerMatches = [];
4246  if ( preg_match( "/^$markerRegex/", $headline, $markerMatches ) ) {
4247  $serial = $markerMatches[1];
4248  list( $titleText, $sectionIndex ) = $this->mHeadings[$serial];
4249  $isTemplate = ( $titleText != $baseTitleText );
4250  $headline = preg_replace( "/^$markerRegex\\s*/", "", $headline );
4251  }
4252 
4253  if ( $toclevel ) {
4254  $prevlevel = $level;
4255  }
4256  $level = $matches[1][$headlineCount];
4257 
4258  if ( $level > $prevlevel ) {
4259  # Increase TOC level
4260  $toclevel++;
4261  $sublevelCount[$toclevel] = 0;
4262  if ( $toclevel < $maxTocLevel ) {
4263  $prevtoclevel = $toclevel;
4264  $toc .= Linker::tocIndent();
4265  $numVisible++;
4266  }
4267  } elseif ( $level < $prevlevel && $toclevel > 1 ) {
4268  # Decrease TOC level, find level to jump to
4269 
4270  for ( $i = $toclevel; $i > 0; $i-- ) {
4271  if ( $levelCount[$i] == $level ) {
4272  # Found last matching level
4273  $toclevel = $i;
4274  break;
4275  } elseif ( $levelCount[$i] < $level ) {
4276  # Found first matching level below current level
4277  $toclevel = $i + 1;
4278  break;
4279  }
4280  }
4281  if ( $i == 0 ) {
4282  $toclevel = 1;
4283  }
4284  if ( $toclevel < $maxTocLevel ) {
4285  if ( $prevtoclevel < $maxTocLevel ) {
4286  # Unindent only if the previous toc level was shown :p
4287  $toc .= Linker::tocUnindent( $prevtoclevel - $toclevel );
4288  $prevtoclevel = $toclevel;
4289  } else {
4290  $toc .= Linker::tocLineEnd();
4291  }
4292  }
4293  } else {
4294  # No change in level, end TOC line
4295  if ( $toclevel < $maxTocLevel ) {
4296  $toc .= Linker::tocLineEnd();
4297  }
4298  }
4299 
4300  $levelCount[$toclevel] = $level;
4301 
4302  # count number of headlines for each level
4303  $sublevelCount[$toclevel]++;
4304  $dot = 0;
4305  for ( $i = 1; $i <= $toclevel; $i++ ) {
4306  if ( !empty( $sublevelCount[$i] ) ) {
4307  if ( $dot ) {
4308  $numbering .= '.';
4309  }
4310  $numbering .= $this->getTargetLanguage()->formatNum( $sublevelCount[$i] );
4311  $dot = 1;
4312  }
4313  }
4314 
4315  # The safe header is a version of the header text safe to use for links
4316 
4317  # Remove link placeholders by the link text.
4318  # <!--LINK number-->
4319  # turns into
4320  # link text with suffix
4321  # Do this before unstrip since link text can contain strip markers
4322  $safeHeadline = $this->replaceLinkHoldersText( $headline );
4323 
4324  # Avoid insertion of weird stuff like <math> by expanding the relevant sections
4325  $safeHeadline = $this->mStripState->unstripBoth( $safeHeadline );
4326 
4327  # Remove any <style> or <script> tags (T198618)
4328  $safeHeadline = preg_replace(
4329  '#<(style|script)(?: [^>]*[^>/])?>.*?</\1>#is',
4330  '',
4331  $safeHeadline
4332  );
4333 
4334  # Strip out HTML (first regex removes any tag not allowed)
4335  # Allowed tags are:
4336  # * <sup> and <sub> (T10393)
4337  # * <i> (T28375)
4338  # * <b> (r105284)
4339  # * <bdi> (T74884)
4340  # * <span dir="rtl"> and <span dir="ltr"> (T37167)
4341  # * <s> and <strike> (T35715)
4342  # We strip any parameter from accepted tags (second regex), except dir="rtl|ltr" from <span>,
4343  # to allow setting directionality in toc items.
4344  $tocline = preg_replace(
4345  [
4346  '#<(?!/?(span|sup|sub|bdi|i|b|s|strike)(?: [^>]*)?>).*?>#',
4347  '#<(/?(?:span(?: dir="(?:rtl|ltr)")?|sup|sub|bdi|i|b|s|strike))(?: .*?)?>#'
4348  ],
4349  [ '', '<$1>' ],
4350  $safeHeadline
4351  );
4352 
4353  # Strip '<span></span>', which is the result from the above if
4354  # <span id="foo"></span> is used to produce an additional anchor
4355  # for a section.
4356  $tocline = str_replace( '<span></span>', '', $tocline );
4357 
4358  $tocline = trim( $tocline );
4359 
4360  # For the anchor, strip out HTML-y stuff period
4361  $safeHeadline = preg_replace( '/<.*?>/', '', $safeHeadline );
4362  $safeHeadline = Sanitizer::normalizeSectionNameWhitespace( $safeHeadline );
4363 
4364  # Save headline for section edit hint before it's escaped
4365  $headlineHint = $safeHeadline;
4366 
4367  # Decode HTML entities
4368  $safeHeadline = Sanitizer::decodeCharReferences( $safeHeadline );
4369 
4370  $safeHeadline = self::normalizeSectionName( $safeHeadline );
4371 
4372  $fallbackHeadline = Sanitizer::escapeIdForAttribute( $safeHeadline, Sanitizer::ID_FALLBACK );
4373  $linkAnchor = Sanitizer::escapeIdForLink( $safeHeadline );
4374  $safeHeadline = Sanitizer::escapeIdForAttribute( $safeHeadline, Sanitizer::ID_PRIMARY );
4375  if ( $fallbackHeadline === $safeHeadline ) {
4376  # No reason to have both (in fact, we can't)
4377  $fallbackHeadline = false;
4378  }
4379 
4380  # HTML IDs must be case-insensitively unique for IE compatibility (T12721).
4381  # @todo FIXME: We may be changing them depending on the current locale.
4382  $arrayKey = strtolower( $safeHeadline );
4383  if ( $fallbackHeadline === false ) {
4384  $fallbackArrayKey = false;
4385  } else {
4386  $fallbackArrayKey = strtolower( $fallbackHeadline );
4387  }
4388 
4389  # Create the anchor for linking from the TOC to the section
4390  $anchor = $safeHeadline;
4391  $fallbackAnchor = $fallbackHeadline;
4392  if ( isset( $refers[$arrayKey] ) ) {
4393  // phpcs:ignore Generic.Formatting.DisallowMultipleStatements
4394  for ( $i = 2; isset( $refers["${arrayKey}_$i"] ); ++$i );
4395  $anchor .= "_$i";
4396  $linkAnchor .= "_$i";
4397  $refers["${arrayKey}_$i"] = true;
4398  } else {
4399  $refers[$arrayKey] = true;
4400  }
4401  if ( $fallbackHeadline !== false && isset( $refers[$fallbackArrayKey] ) ) {
4402  // phpcs:ignore Generic.Formatting.DisallowMultipleStatements
4403  for ( $i = 2; isset( $refers["${fallbackArrayKey}_$i"] ); ++$i );
4404  $fallbackAnchor .= "_$i";
4405  $refers["${fallbackArrayKey}_$i"] = true;
4406  } else {
4407  $refers[$fallbackArrayKey] = true;
4408  }
4409 
4410  # Don't number the heading if it is the only one (looks silly)
4411  if ( count( $matches[3] ) > 1 && $this->mOptions->getNumberHeadings() ) {
4412  # the two are different if the line contains a link
4413  $headline = Html::element(
4414  'span',
4415  [ 'class' => 'mw-headline-number' ],
4416  $numbering
4417  ) . ' ' . $headline;
4418  }
4419 
4420  if ( $enoughToc && ( !isset( $maxTocLevel ) || $toclevel < $maxTocLevel ) ) {
4421  $toc .= Linker::tocLine( $linkAnchor, $tocline,
4422  $numbering, $toclevel, ( $isTemplate ? false : $sectionIndex ) );
4423  }
4424 
4425  # Add the section to the section tree
4426  # Find the DOM node for this header
4427  $noOffset = ( $isTemplate || $sectionIndex === false );
4428  while ( $node && !$noOffset ) {
4429  if ( $node->getName() === 'h' ) {
4430  $bits = $node->splitHeading();
4431  if ( $bits['i'] == $sectionIndex ) {
4432  break;
4433  }
4434  }
4435  $byteOffset += mb_strlen( $this->mStripState->unstripBoth(
4436  $frame->expand( $node, PPFrame::RECOVER_ORIG ) ) );
4437  $node = $node->getNextSibling();
4438  }
4439  $tocraw[] = [
4440  'toclevel' => $toclevel,
4441  'level' => $level,
4442  'line' => $tocline,
4443  'number' => $numbering,
4444  'index' => ( $isTemplate ? 'T-' : '' ) . $sectionIndex,
4445  'fromtitle' => $titleText,
4446  'byteoffset' => ( $noOffset ? null : $byteOffset ),
4447  'anchor' => $anchor,
4448  ];
4449 
4450  # give headline the correct <h#> tag
4451  if ( $maybeShowEditLink && $sectionIndex !== false ) {
4452  // Output edit section links as markers with styles that can be customized by skins
4453  if ( $isTemplate ) {
4454  # Put a T flag in the section identifier, to indicate to extractSections()
4455  # that sections inside <includeonly> should be counted.
4456  $editsectionPage = $titleText;
4457  $editsectionSection = "T-$sectionIndex";
4458  $editsectionContent = null;
4459  } else {
4460  $editsectionPage = $this->mTitle->getPrefixedText();
4461  $editsectionSection = $sectionIndex;
4462  $editsectionContent = $headlineHint;
4463  }
4464  // We use a bit of pesudo-xml for editsection markers. The
4465  // language converter is run later on. Using a UNIQ style marker
4466  // leads to the converter screwing up the tokens when it
4467  // converts stuff. And trying to insert strip tags fails too. At
4468  // this point all real inputted tags have already been escaped,
4469  // so we don't have to worry about a user trying to input one of
4470  // these markers directly. We use a page and section attribute
4471  // to stop the language converter from converting these
4472  // important bits of data, but put the headline hint inside a
4473  // content block because the language converter is supposed to
4474  // be able to convert that piece of data.
4475  // Gets replaced with html in ParserOutput::getText
4476  $editlink = '<mw:editsection page="' . htmlspecialchars( $editsectionPage );
4477  $editlink .= '" section="' . htmlspecialchars( $editsectionSection ) . '"';
4478  if ( $editsectionContent !== null ) {
4479  $editlink .= '>' . $editsectionContent . '</mw:editsection>';
4480  } else {
4481  $editlink .= '/>';
4482  }
4483  } else {
4484  $editlink = '';
4485  }
4486  $head[$headlineCount] = Linker::makeHeadline( $level,
4487  $matches['attrib'][$headlineCount], $anchor, $headline,
4488  $editlink, $fallbackAnchor );
4489 
4490  $headlineCount++;
4491  }
4492 
4493  $this->setOutputType( $oldType );
4494 
4495  # Never ever show TOC if no headers
4496  if ( $numVisible < 1 ) {
4497  $enoughToc = false;
4498  }
4499 
4500  if ( $enoughToc ) {
4501  if ( $prevtoclevel > 0 && $prevtoclevel < $maxTocLevel ) {
4502  $toc .= Linker::tocUnindent( $prevtoclevel - 1 );
4503  }
4504  $toc = Linker::tocList( $toc, $this->mOptions->getUserLangObj() );
4505  $this->mOutput->setTOCHTML( $toc );
4506  $toc = self::TOC_START . $toc . self::TOC_END;
4507  }
4508 
4509  if ( $isMain ) {
4510  $this->mOutput->setSections( $tocraw );
4511  }
4512 
4513  # split up and insert constructed headlines
4514  $blocks = preg_split( '/<H[1-6].*?>[\s\S]*?<\/H[1-6]>/i', $text );
4515  $i = 0;
4516 
4517  // build an array of document sections
4518  $sections = [];
4519  foreach ( $blocks as $block ) {
4520  // $head is zero-based, sections aren't.
4521  if ( empty( $head[$i - 1] ) ) {
4522  $sections[$i] = $block;
4523  } else {
4524  $sections[$i] = $head[$i - 1] . $block;
4525  }
4526 
4537  Hooks::run( 'ParserSectionCreate', [ $this, $i, &$sections[$i], $maybeShowEditLink ] );
4538 
4539  $i++;
4540  }
4541 
4542  if ( $enoughToc && $isMain && !$this->mForceTocPosition ) {
4543  // append the TOC at the beginning
4544  // Top anchor now in skin
4545  $sections[0] .= $toc . "\n";
4546  }
4547 
4548  $full .= implode( '', $sections );
4549 
4550  if ( $this->mForceTocPosition ) {
4551  return str_replace( '<!--MWTOC\'"-->', $toc, $full );
4552  } else {
4553  return $full;
4554  }
4555  }
4556 
4568  public function preSaveTransform( $text, Title $title, User $user,
4569  ParserOptions $options, $clearState = true
4570  ) {
4571  if ( $clearState ) {
4572  $magicScopeVariable = $this->lock();
4573  }
4574  $this->startParse( $title, $options, self::OT_WIKI, $clearState );
4575  $this->setUser( $user );
4576 
4577  // Strip U+0000 NULL (T159174)
4578  $text = str_replace( "\000", '', $text );
4579 
4580  // We still normalize line endings for backwards-compatibility
4581  // with other code that just calls PST, but this should already
4582  // be handled in TextContent subclasses
4583  $text = TextContent::normalizeLineEndings( $text );
4584 
4585  if ( $options->getPreSaveTransform() ) {
4586  $text = $this->pstPass2( $text, $user );
4587  }
4588  $text = $this->mStripState->unstripBoth( $text );
4589 
4590  $this->setUser( null ); # Reset
4591 
4592  return $text;
4593  }
4594 
4603  private function pstPass2( $text, $user ) {
4604  # Note: This is the timestamp saved as hardcoded wikitext to the database, we use
4605  # $this->contLang here in order to give everyone the same signature and use the default one
4606  # rather than the one selected in each user's preferences. (see also T14815)
4607  $ts = $this->mOptions->getTimestamp();
4608  $timestamp = MWTimestamp::getLocalInstance( $ts );
4609  $ts = $timestamp->format( 'YmdHis' );
4610  $tzMsg = $timestamp->getTimezoneMessage()->inContentLanguage()->text();
4611 
4612  $d = $this->contLang->timeanddate( $ts, false, false ) . " ($tzMsg)";
4613 
4614  # Variable replacement
4615  # Because mOutputType is OT_WIKI, this will only process {{subst:xxx}} type tags
4616  $text = $this->replaceVariables( $text );
4617 
4618  # This works almost by chance, as the replaceVariables are done before the getUserSig(),
4619  # which may corrupt this parser instance via its wfMessage()->text() call-
4620 
4621  # Signatures
4622  if ( strpos( $text, '~~~' ) !== false ) {
4623  $sigText = $this->getUserSig( $user );
4624  $text = strtr( $text, [
4625  '~~~~~' => $d,
4626  '~~~~' => "$sigText $d",
4627  '~~~' => $sigText
4628  ] );
4629  # The main two signature forms used above are time-sensitive
4630  $this->mOutput->setFlag( 'user-signature' );
4631  }
4632 
4633  # Context links ("pipe tricks"): [[|name]] and [[name (context)|]]
4634  $tc = '[' . Title::legalChars() . ']';
4635  $nc = '[ _0-9A-Za-z\x80-\xff-]'; # Namespaces can use non-ascii!
4636 
4637  // [[ns:page (context)|]]
4638  $p1 = "/\[\[(:?$nc+:|:|)($tc+?)( ?\\($tc+\\))\\|]]/";
4639  // [[ns:page(context)|]] (double-width brackets, added in r40257)
4640  $p4 = "/\[\[(:?$nc+:|:|)($tc+?)( ?($tc+))\\|]]/";
4641  // [[ns:page (context), context|]] (using either single or double-width comma)
4642  $p3 = "/\[\[(:?$nc+:|:|)($tc+?)( ?\\($tc+\\)|)((?:, |,)$tc+|)\\|]]/";
4643  // [[|page]] (reverse pipe trick: add context from page title)
4644  $p2 = "/\[\[\\|($tc+)]]/";
4645 
4646  # try $p1 first, to turn "[[A, B (C)|]]" into "[[A, B (C)|A, B]]"
4647  $text = preg_replace( $p1, '[[\\1\\2\\3|\\2]]', $text );
4648  $text = preg_replace( $p4, '[[\\1\\2\\3|\\2]]', $text );
4649  $text = preg_replace( $p3, '[[\\1\\2\\3\\4|\\2]]', $text );
4650 
4651  $t = $this->mTitle->getText();
4652  $m = [];
4653  if ( preg_match( "/^($nc+:|)$tc+?( \\($tc+\\))$/", $t, $m ) ) {
4654  $text = preg_replace( $p2, "[[$m[1]\\1$m[2]|\\1]]", $text );
4655  } elseif ( preg_match( "/^($nc+:|)$tc+?(, $tc+|)$/", $t, $m ) && "$m[1]$m[2]" != '' ) {
4656  $text = preg_replace( $p2, "[[$m[1]\\1$m[2]|\\1]]", $text );
4657  } else {
4658  # if there's no context, don't bother duplicating the title
4659  $text = preg_replace( $p2, '[[\\1]]', $text );
4660  }
4661 
4662  return $text;
4663  }
4664 
4679  public function getUserSig( &$user, $nickname = false, $fancySig = null ) {
4680  $username = $user->getName();
4681 
4682  # If not given, retrieve from the user object.
4683  if ( $nickname === false ) {
4684  $nickname = $user->getOption( 'nickname' );
4685  }
4686 
4687  if ( is_null( $fancySig ) ) {
4688  $fancySig = $user->getBoolOption( 'fancysig' );
4689  }
4690 
4691  $nickname = $nickname == null ? $username : $nickname;
4692 
4693  if ( mb_strlen( $nickname ) > $this->siteConfig->get( 'MaxSigChars' ) ) {
4694  $nickname = $username;
4695  wfDebug( __METHOD__ . ": $username has overlong signature.\n" );
4696  } elseif ( $fancySig !== false ) {
4697  # Sig. might contain markup; validate this
4698  if ( $this->validateSig( $nickname ) !== false ) {
4699  # Validated; clean up (if needed) and return it
4700  return $this->cleanSig( $nickname, true );
4701  } else {
4702  # Failed to validate; fall back to the default
4703  $nickname = $username;
4704  wfDebug( __METHOD__ . ": $username has bad XML tags in signature.\n" );
4705  }
4706  }
4707 
4708  # Make sure nickname doesnt get a sig in a sig
4709  $nickname = self::cleanSigInSig( $nickname );
4710 
4711  # If we're still here, make it a link to the user page
4712  $userText = wfEscapeWikiText( $username );
4713  $nickText = wfEscapeWikiText( $nickname );
4714  $msgName = $user->isAnon() ? 'signature-anon' : 'signature';
4715 
4716  return wfMessage( $msgName, $userText, $nickText )->inContentLanguage()
4717  ->title( $this->getTitle() )->text();
4718  }
4719 
4726  public function validateSig( $text ) {
4727  return Xml::isWellFormedXmlFragment( $text ) ? $text : false;
4728  }
4729 
4740  public function cleanSig( $text, $parsing = false ) {
4741  if ( !$parsing ) {
4742  global $wgTitle;
4743  $magicScopeVariable = $this->lock();
4744  $this->startParse( $wgTitle, new ParserOptions, self::OT_PREPROCESS, true );
4745  }
4746 
4747  # Option to disable this feature
4748  if ( !$this->mOptions->getCleanSignatures() ) {
4749  return $text;
4750  }
4751 
4752  # @todo FIXME: Regex doesn't respect extension tags or nowiki
4753  # => Move this logic to braceSubstitution()
4754  $substWord = $this->magicWordFactory->get( 'subst' );
4755  $substRegex = '/\{\{(?!(?:' . $substWord->getBaseRegex() . '))/x' . $substWord->getRegexCase();
4756  $substText = '{{' . $substWord->getSynonym( 0 );
4757 
4758  $text = preg_replace( $substRegex, $substText, $text );
4759  $text = self::cleanSigInSig( $text );
4760  $dom = $this->preprocessToDom( $text );
4761  $frame = $this->getPreprocessor()->newFrame();
4762  $text = $frame->expand( $dom );
4763 
4764  if ( !$parsing ) {
4765  $text = $this->mStripState->unstripBoth( $text );
4766  }
4767 
4768  return $text;
4769  }
4770 
4777  public static function cleanSigInSig( $text ) {
4778  $text = preg_replace( '/~{3,5}/', '', $text );
4779  return $text;
4780  }
4781 
4792  $outputType, $clearState = true
4793  ) {
4794  $this->startParse( $title, $options, $outputType, $clearState );
4795  }
4796 
4804  $outputType, $clearState = true
4805  ) {
4806  $this->setTitle( $title );
4807  $this->mOptions = $options;
4808  $this->setOutputType( $outputType );
4809  if ( $clearState ) {
4810  $this->clearState();
4811  }
4812  }
4813 
4822  public function transformMsg( $text, $options, $title = null ) {
4823  static $executing = false;
4824 
4825  # Guard against infinite recursion
4826  if ( $executing ) {
4827  return $text;
4828  }
4829  $executing = true;
4830 
4831  if ( !$title ) {
4832  global $wgTitle;
4833  $title = $wgTitle;
4834  }
4835 
4836  $text = $this->preprocess( $text, $title, $options );
4837 
4838  $executing = false;
4839  return $text;
4840  }
4841 
4866  public function setHook( $tag, callable $callback ) {
4867  $tag = strtolower( $tag );
4868  if ( preg_match( '/[<>\r\n]/', $tag, $m ) ) {
4869  throw new MWException( "Invalid character {$m[0]} in setHook('$tag', ...) call" );
4870  }
4871  $oldVal = $this->mTagHooks[$tag] ?? null;
4872  $this->mTagHooks[$tag] = $callback;
4873  if ( !in_array( $tag, $this->mStripList ) ) {
4874  $this->mStripList[] = $tag;
4875  }
4876 
4877  return $oldVal;
4878  }
4879 
4897  public function setTransparentTagHook( $tag, callable $callback ) {
4898  $tag = strtolower( $tag );
4899  if ( preg_match( '/[<>\r\n]/', $tag, $m ) ) {
4900  throw new MWException( "Invalid character {$m[0]} in setTransparentHook('$tag', ...) call" );
4901  }
4902  $oldVal = $this->mTransparentTagHooks[$tag] ?? null;
4903  $this->mTransparentTagHooks[$tag] = $callback;
4904 
4905  return $oldVal;
4906  }
4907 
4911  public function clearTagHooks() {
4912  $this->mTagHooks = [];
4913  $this->mFunctionTagHooks = [];
4914  $this->mStripList = $this->mDefaultStripList;
4915  }
4916 
4960  public function setFunctionHook( $id, callable $callback, $flags = 0 ) {
4961  $oldVal = isset( $this->mFunctionHooks[$id] ) ? $this->mFunctionHooks[$id][0] : null;
4962  $this->mFunctionHooks[$id] = [ $callback, $flags ];
4963 
4964  # Add to function cache
4965  $mw = $this->magicWordFactory->get( $id );
4966  if ( !$mw ) {
4967  throw new MWException( __METHOD__ . '() expecting a magic word identifier.' );
4968  }
4969 
4970  $synonyms = $mw->getSynonyms();
4971  $sensitive = intval( $mw->isCaseSensitive() );
4972 
4973  foreach ( $synonyms as $syn ) {
4974  # Case
4975  if ( !$sensitive ) {
4976  $syn = $this->contLang->lc( $syn );
4977  }
4978  # Add leading hash
4979  if ( !( $flags & self::SFH_NO_HASH ) ) {
4980  $syn = '#' . $syn;
4981  }
4982  # Remove trailing colon
4983  if ( substr( $syn, -1, 1 ) === ':' ) {
4984  $syn = substr( $syn, 0, -1 );
4985  }
4986  $this->mFunctionSynonyms[$sensitive][$syn] = $id;
4987  }
4988  return $oldVal;
4989  }
4990 
4996  public function getFunctionHooks() {
4997  $this->firstCallInit();
4998  return array_keys( $this->mFunctionHooks );
4999  }
5000 
5011  public function setFunctionTagHook( $tag, callable $callback, $flags ) {
5012  $tag = strtolower( $tag );
5013  if ( preg_match( '/[<>\r\n]/', $tag, $m ) ) {
5014  throw new MWException( "Invalid character {$m[0]} in setFunctionTagHook('$tag', ...) call" );
5015  }
5016  $old = $this->mFunctionTagHooks[$tag] ?? null;
5017  $this->mFunctionTagHooks[$tag] = [ $callback, $flags ];
5018 
5019  if ( !in_array( $tag, $this->mStripList ) ) {
5020  $this->mStripList[] = $tag;
5021  }
5022 
5023  return $old;
5024  }
5025 
5033  public function replaceLinkHolders( &$text, $options = 0 ) {
5034  $this->mLinkHolders->replace( $text );
5035  }
5036 
5044  public function replaceLinkHoldersText( $text ) {
5045  return $this->mLinkHolders->replaceText( $text );
5046  }
5047 
5061  public function renderImageGallery( $text, $params ) {
5062  $mode = false;
5063  if ( isset( $params['mode'] ) ) {
5064  $mode = $params['mode'];
5065  }
5066 
5067  try {
5068  $ig = ImageGalleryBase::factory( $mode );
5069  } catch ( Exception $e ) {
5070  // If invalid type set, fallback to default.
5071  $ig = ImageGalleryBase::factory( false );
5072  }
5073 
5074  $ig->setContextTitle( $this->mTitle );
5075  $ig->setShowBytes( false );
5076  $ig->setShowDimensions( false );
5077  $ig->setShowFilename( false );
5078  $ig->setParser( $this );
5079  $ig->setHideBadImages();
5080  $ig->setAttributes( Sanitizer::validateTagAttributes( $params, 'ul' ) );
5081 
5082  if ( isset( $params['showfilename'] ) ) {
5083  $ig->setShowFilename( true );
5084  } else {
5085  $ig->setShowFilename( false );
5086  }
5087  if ( isset( $params['caption'] ) ) {
5088  // NOTE: We aren't passing a frame here or below. Frame info
5089  // is currently opaque to Parsoid, which acts on OT_PREPROCESS.
5090  // See T107332#4030581
5091  $caption = $this->recursiveTagParse( $params['caption'] );
5092  $ig->setCaptionHtml( $caption );
5093  }
5094  if ( isset( $params['perrow'] ) ) {
5095  $ig->setPerRow( $params['perrow'] );
5096  }
5097  if ( isset( $params['widths'] ) ) {
5098  $ig->setWidths( $params['widths'] );
5099  }
5100  if ( isset( $params['heights'] ) ) {
5101  $ig->setHeights( $params['heights'] );
5102  }
5103  $ig->setAdditionalOptions( $params );
5104 
5105  // Avoid PHP 7.1 warning from passing $this by reference
5106  $parser = $this;
5107  Hooks::run( 'BeforeParserrenderImageGallery', [ &$parser, &$ig ] );
5108 
5109  $lines = StringUtils::explode( "\n", $text );
5110  foreach ( $lines as $line ) {
5111  # match lines like these:
5112  # Image:someimage.jpg|This is some image
5113  $matches = [];
5114  preg_match( "/^([^|]+)(\\|(.*))?$/", $line, $matches );
5115  # Skip empty lines
5116  if ( count( $matches ) == 0 ) {
5117  continue;
5118  }
5119 
5120  if ( strpos( $matches[0], '%' ) !== false ) {
5121  $matches[1] = rawurldecode( $matches[1] );
5122  }
5124  if ( is_null( $title ) ) {
5125  # Bogus title. Ignore these so we don't bomb out later.
5126  continue;
5127  }
5128 
5129  # We need to get what handler the file uses, to figure out parameters.
5130  # Note, a hook can overide the file name, and chose an entirely different
5131  # file (which potentially could be of a different type and have different handler).
5132  $options = [];
5133  $descQuery = false;
5134  Hooks::run( 'BeforeParserFetchFileAndTitle',
5135  [ $this, $title, &$options, &$descQuery ] );
5136  # Don't register it now, as TraditionalImageGallery does that later.
5138  $handler = $file ? $file->getHandler() : false;
5139 
5140  $paramMap = [
5141  'img_alt' => 'gallery-internal-alt',
5142  'img_link' => 'gallery-internal-link',
5143  ];
5144  if ( $handler ) {
5145  $paramMap += $handler->getParamMap();
5146  // We don't want people to specify per-image widths.
5147  // Additionally the width parameter would need special casing anyhow.
5148  unset( $paramMap['img_width'] );
5149  }
5150 
5151  $mwArray = $this->magicWordFactory->newArray( array_keys( $paramMap ) );
5152 
5153  $label = '';
5154  $alt = '';
5155  $link = '';
5156  $handlerOptions = [];
5157  if ( isset( $matches[3] ) ) {
5158  // look for an |alt= definition while trying not to break existing
5159  // captions with multiple pipes (|) in it, until a more sensible grammar
5160  // is defined for images in galleries
5161 
5162  // FIXME: Doing recursiveTagParse at this stage, and the trim before
5163  // splitting on '|' is a bit odd, and different from makeImage.
5164  $matches[3] = $this->recursiveTagParse( trim( $matches[3] ) );
5165  // Protect LanguageConverter markup
5166  $parameterMatches = StringUtils::delimiterExplode(
5167  '-{', '}-', '|', $matches[3], true /* nested */
5168  );
5169 
5170  foreach ( $parameterMatches as $parameterMatch ) {
5171  list( $magicName, $match ) = $mwArray->matchVariableStartToEnd( $parameterMatch );
5172  if ( $magicName ) {
5173  $paramName = $paramMap[$magicName];
5174 
5175  switch ( $paramName ) {
5176  case 'gallery-internal-alt':
5177  $alt = $this->stripAltText( $match, false );
5178  break;
5179  case 'gallery-internal-link':
5180  $linkValue = $this->stripAltText( $match, false );
5181  if ( preg_match( '/^-{R|(.*)}-$/', $linkValue ) ) {
5182  // Result of LanguageConverter::markNoConversion
5183  // invoked on an external link.
5184  $linkValue = substr( $linkValue, 4, -2 );
5185  }
5186  list( $type, $target ) = $this->parseLinkParameter( $linkValue );
5187  if ( $type === 'link-url' ) {
5188  $link = $target;
5189  $this->mOutput->addExternalLink( $target );
5190  } elseif ( $type === 'link-title' ) {
5191  $link = $target->getLinkURL();
5192  $this->mOutput->addLink( $target );
5193  }
5194  break;
5195  default:
5196  // Must be a handler specific parameter.
5197  if ( $handler->validateParam( $paramName, $match ) ) {
5198  $handlerOptions[$paramName] = $match;
5199  } else {
5200  // Guess not, consider it as caption.
5201  wfDebug( "$parameterMatch failed parameter validation\n" );
5202  $label = $parameterMatch;
5203  }
5204  }
5205 
5206  } else {
5207  // Last pipe wins.
5208  $label = $parameterMatch;
5209  }
5210  }
5211  }
5212 
5213  $ig->add( $title, $label, $alt, $link, $handlerOptions );
5214  }
5215  $html = $ig->toHTML();
5216  Hooks::run( 'AfterParserFetchFileAndTitle', [ $this, $ig, &$html ] );
5217  return $html;
5218  }
5219 
5224  public function getImageParams( $handler ) {
5225  if ( $handler ) {
5226  $handlerClass = get_class( $handler );
5227  } else {
5228  $handlerClass = '';
5229  }
5230  if ( !isset( $this->mImageParams[$handlerClass] ) ) {
5231  # Initialise static lists
5232  static $internalParamNames = [
5233  'horizAlign' => [ 'left', 'right', 'center', 'none' ],
5234  'vertAlign' => [ 'baseline', 'sub', 'super', 'top', 'text-top', 'middle',
5235  'bottom', 'text-bottom' ],
5236  'frame' => [ 'thumbnail', 'manualthumb', 'framed', 'frameless',
5237  'upright', 'border', 'link', 'alt', 'class' ],
5238  ];
5239  static $internalParamMap;
5240  if ( !$internalParamMap ) {
5241  $internalParamMap = [];
5242  foreach ( $internalParamNames as $type => $names ) {
5243  foreach ( $names as $name ) {
5244  // For grep: img_left, img_right, img_center, img_none,
5245  // img_baseline, img_sub, img_super, img_top, img_text_top, img_middle,
5246  // img_bottom, img_text_bottom,
5247  // img_thumbnail, img_manualthumb, img_framed, img_frameless, img_upright,
5248  // img_border, img_link, img_alt, img_class
5249  $magicName = str_replace( '-', '_', "img_$name" );
5250  $internalParamMap[$magicName] = [ $type, $name ];
5251  }
5252  }
5253  }
5254 
5255  # Add handler params
5256  $paramMap = $internalParamMap;
5257  if ( $handler ) {
5258  $handlerParamMap = $handler->getParamMap();
5259  foreach ( $handlerParamMap as $magic => $paramName ) {
5260  $paramMap[$magic] = [ 'handler', $paramName ];
5261  }
5262  }
5263  $this->mImageParams[$handlerClass] = $paramMap;
5264  $this->mImageParamsMagicArray[$handlerClass] =
5265  $this->magicWordFactory->newArray( array_keys( $paramMap ) );
5266  }
5267  return [ $this->mImageParams[$handlerClass], $this->mImageParamsMagicArray[$handlerClass] ];
5268  }
5269 
5278  public function makeImage( $title, $options, $holders = false ) {
5279  # Check if the options text is of the form "options|alt text"
5280  # Options are:
5281  # * thumbnail make a thumbnail with enlarge-icon and caption, alignment depends on lang
5282  # * left no resizing, just left align. label is used for alt= only
5283  # * right same, but right aligned
5284  # * none same, but not aligned
5285  # * ___px scale to ___ pixels width, no aligning. e.g. use in taxobox
5286  # * center center the image
5287  # * frame Keep original image size, no magnify-button.
5288  # * framed Same as "frame"
5289  # * frameless like 'thumb' but without a frame. Keeps user preferences for width
5290  # * upright reduce width for upright images, rounded to full __0 px
5291  # * border draw a 1px border around the image
5292  # * alt Text for HTML alt attribute (defaults to empty)
5293  # * class Set a class for img node
5294  # * link Set the target of the image link. Can be external, interwiki, or local
5295  # vertical-align values (no % or length right now):
5296  # * baseline
5297  # * sub
5298  # * super
5299  # * top
5300  # * text-top
5301  # * middle
5302  # * bottom
5303  # * text-bottom
5304 
5305  # Protect LanguageConverter markup when splitting into parts
5307  '-{', '}-', '|', $options, true /* allow nesting */
5308  );
5309 
5310  # Give extensions a chance to select the file revision for us
5311  $options = [];
5312  $descQuery = false;
5313  Hooks::run( 'BeforeParserFetchFileAndTitle',
5314  [ $this, $title, &$options, &$descQuery ] );
5315  # Fetch and register the file (file title may be different via hooks)
5316  list( $file, $title ) = $this->fetchFileAndTitle( $title, $options );
5317 
5318  # Get parameter map
5319  $handler = $file ? $file->getHandler() : false;
5320 
5321  list( $paramMap, $mwArray ) = $this->getImageParams( $handler );
5322 
5323  if ( !$file ) {
5324  $this->addTrackingCategory( 'broken-file-category' );
5325  }
5326 
5327  # Process the input parameters
5328  $caption = '';
5329  $params = [ 'frame' => [], 'handler' => [],
5330  'horizAlign' => [], 'vertAlign' => [] ];
5331  $seenformat = false;
5332  foreach ( $parts as $part ) {
5333  $part = trim( $part );
5334  list( $magicName, $value ) = $mwArray->matchVariableStartToEnd( $part );
5335  $validated = false;
5336  if ( isset( $paramMap[$magicName] ) ) {
5337  list( $type, $paramName ) = $paramMap[$magicName];
5338 
5339  # Special case; width and height come in one variable together
5340  if ( $type === 'handler' && $paramName === 'width' ) {
5341  $parsedWidthParam = self::parseWidthParam( $value );
5342  if ( isset( $parsedWidthParam['width'] ) ) {
5343  $width = $parsedWidthParam['width'];
5344  if ( $handler->validateParam( 'width', $width ) ) {
5345  $params[$type]['width'] = $width;
5346  $validated = true;
5347  }
5348  }
5349  if ( isset( $parsedWidthParam['height'] ) ) {
5350  $height = $parsedWidthParam['height'];
5351  if ( $handler->validateParam( 'height', $height ) ) {
5352  $params[$type]['height'] = $height;
5353  $validated = true;
5354  }
5355  }
5356  # else no validation -- T15436
5357  } else {
5358  if ( $type === 'handler' ) {
5359  # Validate handler parameter
5360  $validated = $handler->validateParam( $paramName, $value );
5361  } else {
5362  # Validate internal parameters
5363  switch ( $paramName ) {
5364  case 'manualthumb':
5365  case 'alt':
5366  case 'class':
5367  # @todo FIXME: Possibly check validity here for
5368  # manualthumb? downstream behavior seems odd with
5369  # missing manual thumbs.
5370  $validated = true;
5371  $value = $this->stripAltText( $value, $holders );
5372  break;
5373  case 'link':
5374  list( $paramName, $value ) =
5375  $this->parseLinkParameter(
5376  $this->stripAltText( $value, $holders )
5377  );
5378  if ( $paramName ) {
5379  $validated = true;
5380  if ( $paramName === 'no-link' ) {
5381  $value = true;
5382  }
5383  if ( ( $paramName === 'link-url' ) && $this->mOptions->getExternalLinkTarget() ) {
5384  $params[$type]['link-target'] = $this->mOptions->getExternalLinkTarget();
5385  }
5386  }
5387  break;
5388  case 'frameless':
5389  case 'framed':
5390  case 'thumbnail':
5391  // use first appearing option, discard others.
5392  $validated = !$seenformat;
5393  $seenformat = true;
5394  break;
5395  default:
5396  # Most other things appear to be empty or numeric...
5397  $validated = ( $value === false || is_numeric( trim( $value ) ) );
5398  }
5399  }
5400 
5401  if ( $validated ) {
5402  $params[$type][$paramName] = $value;
5403  }
5404  }
5405  }
5406  if ( !$validated ) {
5407  $caption = $part;
5408  }
5409  }
5410 
5411  # Process alignment parameters
5412  if ( $params['horizAlign'] ) {
5413  $params['frame']['align'] = key( $params['horizAlign'] );
5414  }
5415  if ( $params['vertAlign'] ) {
5416  $params['frame']['valign'] = key( $params['vertAlign'] );
5417  }
5418 
5419  $params['frame']['caption'] = $caption;
5420 
5421  # Will the image be presented in a frame, with the caption below?
5422  $imageIsFramed = isset( $params['frame']['frame'] )
5423  || isset( $params['frame']['framed'] )
5424  || isset( $params['frame']['thumbnail'] )
5425  || isset( $params['frame']['manualthumb'] );
5426 
5427  # In the old days, [[Image:Foo|text...]] would set alt text. Later it
5428  # came to also set the caption, ordinary text after the image -- which
5429  # makes no sense, because that just repeats the text multiple times in
5430  # screen readers. It *also* came to set the title attribute.
5431  # Now that we have an alt attribute, we should not set the alt text to
5432  # equal the caption: that's worse than useless, it just repeats the
5433  # text. This is the framed/thumbnail case. If there's no caption, we
5434  # use the unnamed parameter for alt text as well, just for the time be-
5435  # ing, if the unnamed param is set and the alt param is not.
5436  # For the future, we need to figure out if we want to tweak this more,
5437  # e.g., introducing a title= parameter for the title; ignoring the un-
5438  # named parameter entirely for images without a caption; adding an ex-
5439  # plicit caption= parameter and preserving the old magic unnamed para-
5440  # meter for BC; ...
5441  if ( $imageIsFramed ) { # Framed image
5442  if ( $caption === '' && !isset( $params['frame']['alt'] ) ) {
5443  # No caption or alt text, add the filename as the alt text so
5444  # that screen readers at least get some description of the image
5445  $params['frame']['alt'] = $title->getText();
5446  }
5447  # Do not set $params['frame']['title'] because tooltips don't make sense
5448  # for framed images
5449  } else { # Inline image
5450  if ( !isset( $params['frame']['alt'] ) ) {
5451  # No alt text, use the "caption" for the alt text
5452  if ( $caption !== '' ) {
5453  $params['frame']['alt'] = $this->stripAltText( $caption, $holders );
5454  } else {
5455  # No caption, fall back to using the filename for the
5456  # alt text
5457  $params['frame']['alt'] = $title->getText();
5458  }
5459  }
5460  # Use the "caption" for the tooltip text
5461  $params['frame']['title'] = $this->stripAltText( $caption, $holders );
5462  }
5463  $params['handler']['targetlang'] = $this->getTargetLanguage()->getCode();
5464 
5465  Hooks::run( 'ParserMakeImageParams', [ $title, $file, &$params, $this ] );
5466 
5467  # Linker does the rest
5468  $time = $options['time'] ?? false;
5469  $ret = Linker::makeImageLink( $this, $title, $file, $params['frame'], $params['handler'],
5470  $time, $descQuery, $this->mOptions->getThumbSize() );
5471 
5472  # Give the handler a chance to modify the parser object
5473  if ( $handler ) {
5474  $handler->parserTransformHook( $this, $file );
5475  }
5476 
5477  return $ret;
5478  }
5479 
5498  public function parseLinkParameter( $value ) {
5499  $chars = self::EXT_LINK_URL_CLASS;
5500  $addr = self::EXT_LINK_ADDR;
5501  $prots = $this->mUrlProtocols;
5502  $type = null;
5503  $target = false;
5504  if ( $value === '' ) {
5505  $type = 'no-link';
5506  } elseif ( preg_match( "/^((?i)$prots)/", $value ) ) {
5507  if ( preg_match( "/^((?i)$prots)$addr$chars*$/u", $value, $m ) ) {
5508  $this->mOutput->addExternalLink( $value );
5509  $type = 'link-url';
5510  $target = $value;
5511  }
5512  } else {
5513  $linkTitle = Title::newFromText( $value );
5514  if ( $linkTitle ) {
5515  $this->mOutput->addLink( $linkTitle );
5516  $type = 'link-title';
5517  $target = $linkTitle;
5518  }
5519  }
5520  return [ $type, $target ];
5521  }
5522 
5528  protected function stripAltText( $caption, $holders ) {
5529  # Strip bad stuff out of the title (tooltip). We can't just use
5530  # replaceLinkHoldersText() here, because if this function is called
5531  # from replaceInternalLinks2(), mLinkHolders won't be up-to-date.
5532  if ( $holders ) {
5533  $tooltip = $holders->replaceText( $caption );
5534  } else {
5535  $tooltip = $this->replaceLinkHoldersText( $caption );
5536  }
5537 
5538  # make sure there are no placeholders in thumbnail attributes
5539  # that are later expanded to html- so expand them now and
5540  # remove the tags
5541  $tooltip = $this->mStripState->unstripBoth( $tooltip );
5542  # Compatibility hack! In HTML certain entity references not terminated
5543  # by a semicolon are decoded (but not if we're in an attribute; that's
5544  # how link URLs get away without properly escaping & in queries).
5545  # But wikitext has always required semicolon-termination of entities,
5546  # so encode & where needed to avoid decode of semicolon-less entities.
5547  # See T209236 and
5548  # https://www.w3.org/TR/html5/syntax.html#named-character-references
5549  # T210437 discusses moving this workaround to Sanitizer::stripAllTags.
5550  $tooltip = preg_replace( "/
5551  & # 1. entity prefix
5552  (?= # 2. followed by:
5553  (?: # a. one of the legacy semicolon-less named entities
5554  A(?:Elig|MP|acute|circ|grave|ring|tilde|uml)|
5555  C(?:OPY|cedil)|E(?:TH|acute|circ|grave|uml)|
5556  GT|I(?:acute|circ|grave|uml)|LT|Ntilde|
5557  O(?:acute|circ|grave|slash|tilde|uml)|QUOT|REG|THORN|
5558  U(?:acute|circ|grave|uml)|Yacute|
5559  a(?:acute|c(?:irc|ute)|elig|grave|mp|ring|tilde|uml)|brvbar|
5560  c(?:cedil|edil|urren)|cent(?!erdot;)|copy(?!sr;)|deg|
5561  divide(?!ontimes;)|e(?:acute|circ|grave|th|uml)|
5562  frac(?:1(?:2|4)|34)|
5563  gt(?!c(?:c|ir)|dot|lPar|quest|r(?:a(?:pprox|rr)|dot|eq(?:less|qless)|less|sim);)|
5564  i(?:acute|circ|excl|grave|quest|uml)|laquo|
5565  lt(?!c(?:c|ir)|dot|hree|imes|larr|quest|r(?:Par|i(?:e|f|));)|
5566  m(?:acr|i(?:cro|ddot))|n(?:bsp|tilde)|
5567  not(?!in(?:E|dot|v(?:a|b|c)|)|ni(?:v(?:a|b|c)|);)|
5568  o(?:acute|circ|grave|rd(?:f|m)|slash|tilde|uml)|
5569  p(?:lusmn|ound)|para(?!llel;)|quot|r(?:aquo|eg)|
5570  s(?:ect|hy|up(?:1|2|3)|zlig)|thorn|times(?!b(?:ar|)|d;)|
5571  u(?:acute|circ|grave|ml|uml)|y(?:acute|en|uml)
5572  )
5573  (?:[^;]|$)) # b. and not followed by a semicolon
5574  # S = study, for efficiency
5575  /Sx", '&amp;', $tooltip );
5576  $tooltip = Sanitizer::stripAllTags( $tooltip );
5577 
5578  return $tooltip;
5579  }
5580 
5586  public function disableCache() {
5587  wfDebug( "Parser output marked as uncacheable.\n" );
5588  if ( !$this->mOutput ) {
5589  throw new MWException( __METHOD__ .
5590  " can only be called when actually parsing something" );
5591  }
5592  $this->mOutput->updateCacheExpiry( 0 ); // new style, for consistency
5593  }
5594 
5603  public function attributeStripCallback( &$text, $frame = false ) {
5604  $text = $this->replaceVariables( $text, $frame );
5605  $text = $this->mStripState->unstripBoth( $text );
5606  return $text;
5607  }
5608 
5614  public function getTags() {
5615  $this->firstCallInit();
5616  return array_merge(
5617  array_keys( $this->mTransparentTagHooks ),
5618  array_keys( $this->mTagHooks ),
5619  array_keys( $this->mFunctionTagHooks )
5620  );
5621  }
5622 
5627  public function getFunctionSynonyms() {
5628  $this->firstCallInit();
5629  return $this->mFunctionSynonyms;
5630  }
5631 
5636  public function getUrlProtocols() {
5637  return $this->mUrlProtocols;
5638  }
5639 
5650  public function replaceTransparentTags( $text ) {
5651  $matches = [];
5652  $elements = array_keys( $this->mTransparentTagHooks );
5653  $text = self::extractTagsAndParams( $elements, $text, $matches );
5654  $replacements = [];
5655 
5656  foreach ( $matches as $marker => $data ) {
5657  list( $element, $content, $params, $tag ) = $data;
5658  $tagName = strtolower( $element );
5659  if ( isset( $this->mTransparentTagHooks[$tagName] ) ) {
5660  $output = call_user_func_array(
5661  $this->mTransparentTagHooks[$tagName],
5662  [ $content, $params, $this ]
5663  );
5664  } else {
5665  $output = $tag;
5666  }
5667  $replacements[$marker] = $output;
5668  }
5669  return strtr( $text, $replacements );
5670  }
5671 
5701  private function extractSections( $text, $sectionId, $mode, $newText = '' ) {
5702  global $wgTitle; # not generally used but removes an ugly failure mode
5703 
5704  $magicScopeVariable = $this->lock();
5705  $this->startParse( $wgTitle, new ParserOptions, self::OT_PLAIN, true );
5706  $outText = '';
5707  $frame = $this->getPreprocessor()->newFrame();
5708 
5709  # Process section extraction flags
5710  $flags = 0;
5711  $sectionParts = explode( '-', $sectionId );
5712  $sectionIndex = array_pop( $sectionParts );
5713  foreach ( $sectionParts as $part ) {
5714  if ( $part === 'T' ) {
5715  $flags |= self::PTD_FOR_INCLUSION;
5716  }
5717  }
5718 
5719  # Check for empty input
5720  if ( strval( $text ) === '' ) {
5721  # Only sections 0 and T-0 exist in an empty document
5722  if ( $sectionIndex == 0 ) {
5723  if ( $mode === 'get' ) {
5724  return '';
5725  }
5726 
5727  return $newText;
5728  } else {
5729  if ( $mode === 'get' ) {
5730  return $newText;
5731  }
5732 
5733  return $text;
5734  }
5735  }
5736 
5737  # Preprocess the text
5738  $root = $this->preprocessToDom( $text, $flags );
5739 
5740  # <h> nodes indicate section breaks
5741  # They can only occur at the top level, so we can find them by iterating the root's children
5742  $node = $root->getFirstChild();
5743 
5744  # Find the target section
5745  if ( $sectionIndex == 0 ) {
5746  # Section zero doesn't nest, level=big
5747  $targetLevel = 1000;
5748  } else {
5749  while ( $node ) {
5750  if ( $node->getName() === 'h' ) {
5751  $bits = $node->splitHeading();
5752  if ( $bits['i'] == $sectionIndex ) {
5753  $targetLevel = $bits['level'];
5754  break;
5755  }
5756  }
5757  if ( $mode === 'replace' ) {
5758  $outText .= $frame->expand( $node, PPFrame::RECOVER_ORIG );
5759  }
5760  $node = $node->getNextSibling();
5761  }
5762  }
5763 
5764  if ( !$node ) {
5765  # Not found
5766  if ( $mode === 'get' ) {
5767  return $newText;
5768  } else {
5769  return $text;
5770  }
5771  }
5772 
5773  # Find the end of the section, including nested sections
5774  do {
5775  if ( $node->getName() === 'h' ) {
5776  $bits = $node->splitHeading();
5777  $curLevel = $bits['level'];
5778  if ( $bits['i'] != $sectionIndex && $curLevel <= $targetLevel ) {
5779  break;
5780  }
5781  }
5782  if ( $mode === 'get' ) {
5783  $outText .= $frame->expand( $node, PPFrame::RECOVER_ORIG );
5784  }
5785  $node = $node->getNextSibling();
5786  } while ( $node );
5787 
5788  # Write out the remainder (in replace mode only)
5789  if ( $mode === 'replace' ) {
5790  # Output the replacement text
5791  # Add two newlines on -- trailing whitespace in $newText is conventionally
5792  # stripped by the editor, so we need both newlines to restore the paragraph gap
5793  # Only add trailing whitespace if there is newText
5794  if ( $newText != "" ) {
5795  $outText .= $newText . "\n\n";
5796  }
5797 
5798  while ( $node ) {
5799  $outText .= $frame->expand( $node, PPFrame::RECOVER_ORIG );
5800  $node = $node->getNextSibling();
5801  }
5802  }
5803 
5804  if ( is_string( $outText ) ) {
5805  # Re-insert stripped tags
5806  $outText = rtrim( $this->mStripState->unstripBoth( $outText ) );
5807  }
5808 
5809  return $outText;
5810  }
5811 
5826  public function getSection( $text, $sectionId, $defaultText = '' ) {
5827  return $this->extractSections( $text, $sectionId, 'get', $defaultText );
5828  }
5829 
5842  public function replaceSection( $oldText, $sectionId, $newText ) {
5843  return $this->extractSections( $oldText, $sectionId, 'replace', $newText );
5844  }
5845 
5856  public function getRevisionId() {
5857  return $this->mRevisionId;
5858  }
5859 
5866  public function getRevisionObject() {
5867  if ( !is_null( $this->mRevisionObject ) ) {
5868  return $this->mRevisionObject;
5869  }
5870 
5871  // NOTE: try to get the RevisionObject even if mRevisionId is null.
5872  // This is useful when parsing revision that has not yet been saved.
5873  // However, if we get back a saved revision even though we are in
5874  // preview mode, we'll have to ignore it, see below.
5875  // NOTE: This callback may be used to inject an OLD revision that was
5876  // already loaded, so "current" is a bit of a misnomer. We can't just
5877  // skip it if mRevisionId is set.
5878  $rev = call_user_func(
5879  $this->mOptions->getCurrentRevisionCallback(), $this->getTitle(), $this
5880  );
5881 
5882  if ( $this->mRevisionId === null && $rev && $rev->getId() ) {
5883  // We are in preview mode (mRevisionId is null), and the current revision callback
5884  // returned an existing revision. Ignore it and return null, it's probably the page's
5885  // current revision, which is not what we want here. Note that we do want to call the
5886  // callback to allow the unsaved revision to be injected here, e.g. for
5887  // self-transclusion previews.
5888  return null;
5889  }
5890 
5891  // If the parse is for a new revision, then the callback should have
5892  // already been set to force the object and should match mRevisionId.
5893  // If not, try to fetch by mRevisionId for sanity.
5894  if ( $this->mRevisionId && $rev && $rev->getId() != $this->mRevisionId ) {
5895  $rev = Revision::newFromId( $this->mRevisionId );
5896  }
5897 
5898  $this->mRevisionObject = $rev;
5899 
5900  return $this->mRevisionObject;
5901  }
5902 
5908  public function getRevisionTimestamp() {
5909  if ( is_null( $this->mRevisionTimestamp ) ) {
5910  $revObject = $this->getRevisionObject();
5911  $timestamp = $revObject ? $revObject->getTimestamp() : wfTimestampNow();
5912 
5913  # The cryptic '' timezone parameter tells to use the site-default
5914  # timezone offset instead of the user settings.
5915  # Since this value will be saved into the parser cache, served
5916  # to other users, and potentially even used inside links and such,
5917  # it needs to be consistent for all visitors.
5918  $this->mRevisionTimestamp = $this->contLang->userAdjust( $timestamp, '' );
5919  }
5921  }
5922 
5928  public function getRevisionUser() {
5929  if ( is_null( $this->mRevisionUser ) ) {
5930  $revObject = $this->getRevisionObject();
5931 
5932  # if this template is subst: the revision id will be blank,
5933  # so just use the current user's name
5934  if ( $revObject ) {
5935  $this->mRevisionUser = $revObject->getUserText();
5936  } elseif ( $this->ot['wiki'] || $this->mOptions->getIsPreview() ) {
5937  $this->mRevisionUser = $this->getUser()->getName();
5938  }
5939  }
5940  return $this->mRevisionUser;
5941  }
5942 
5948  public function getRevisionSize() {
5949  if ( is_null( $this->mRevisionSize ) ) {
5950  $revObject = $this->getRevisionObject();
5951 
5952  # if this variable is subst: the revision id will be blank,
5953  # so just use the parser input size, because the own substituation
5954  # will change the size.
5955  if ( $revObject ) {
5956  $this->mRevisionSize = $revObject->getSize();
5957  } else {
5958  $this->mRevisionSize = $this->mInputSize;
5959  }
5960  }
5961  return $this->mRevisionSize;
5962  }
5963 
5969  public function setDefaultSort( $sort ) {
5970  $this->mDefaultSort = $sort;
5971  $this->mOutput->setProperty( 'defaultsort', $sort );
5972  }
5973 
5984  public function getDefaultSort() {
5985  if ( $this->mDefaultSort !== false ) {
5986  return $this->mDefaultSort;
5987  } else {
5988  return '';
5989  }
5990  }
5991 
5998  public function getCustomDefaultSort() {
5999  return $this->mDefaultSort;
6000  }
6001 
6002  private static function getSectionNameFromStrippedText( $text ) {
6004  $text = Sanitizer::decodeCharReferences( $text );
6005  $text = self::normalizeSectionName( $text );
6006  return $text;
6007  }
6008 
6009  private static function makeAnchor( $sectionName ) {
6010  return '#' . Sanitizer::escapeIdForLink( $sectionName );
6011  }
6012 
6013  private function makeLegacyAnchor( $sectionName ) {
6014  $fragmentMode = $this->siteConfig->get( 'FragmentMode' );
6015  if ( isset( $fragmentMode[1] ) && $fragmentMode[1] === 'legacy' ) {
6016  // ForAttribute() and ForLink() are the same for legacy encoding
6018  } else {
6019  $id = Sanitizer::escapeIdForLink( $sectionName );
6020  }
6021 
6022  return "#$id";
6023  }
6024 
6033  public function guessSectionNameFromWikiText( $text ) {
6034  # Strip out wikitext links(they break the anchor)
6035  $text = $this->stripSectionName( $text );
6036  $sectionName = self::getSectionNameFromStrippedText( $text );
6037  return self::makeAnchor( $sectionName );
6038  }
6039 
6049  public function guessLegacySectionNameFromWikiText( $text ) {
6050  # Strip out wikitext links(they break the anchor)
6051  $text = $this->stripSectionName( $text );
6052  $sectionName = self::getSectionNameFromStrippedText( $text );
6053  return $this->makeLegacyAnchor( $sectionName );
6054  }
6055 
6061  public static function guessSectionNameFromStrippedText( $text ) {
6062  $sectionName = self::getSectionNameFromStrippedText( $text );
6063  return self::makeAnchor( $sectionName );
6064  }
6065 
6072  private static function normalizeSectionName( $text ) {
6073  # T90902: ensure the same normalization is applied for IDs as to links
6074  $titleParser = MediaWikiServices::getInstance()->getTitleParser();
6075  try {
6076 
6077  $parts = $titleParser->splitTitleString( "#$text" );
6078  } catch ( MalformedTitleException $ex ) {
6079  return $text;
6080  }
6081  return $parts['fragment'];
6082  }
6083 
6098  public function stripSectionName( $text ) {
6099  # Strip internal link markup
6100  $text = preg_replace( '/\[\[:?([^[|]+)\|([^[]+)\]\]/', '$2', $text );
6101  $text = preg_replace( '/\[\[:?([^[]+)\|?\]\]/', '$1', $text );
6102 
6103  # Strip external link markup
6104  # @todo FIXME: Not tolerant to blank link text
6105  # I.E. [https://www.mediawiki.org] will render as [1] or something depending
6106  # on how many empty links there are on the page - need to figure that out.
6107  $text = preg_replace( '/\[(?i:' . $this->mUrlProtocols . ')([^ ]+?) ([^[]+)\]/', '$2', $text );
6108 
6109  # Parse wikitext quotes (italics & bold)
6110  $text = $this->doQuotes( $text );
6111 
6112  # Strip HTML tags
6113  $text = StringUtils::delimiterReplace( '<', '>', '', $text );
6114  return $text;
6115  }
6116 
6127  public function testSrvus( $text, Title $title, ParserOptions $options,
6128  $outputType = self::OT_HTML
6129  ) {
6130  $magicScopeVariable = $this->lock();
6131  $this->startParse( $title, $options, $outputType, true );
6132 
6133  $text = $this->replaceVariables( $text );
6134  $text = $this->mStripState->unstripBoth( $text );
6135  $text = Sanitizer::removeHTMLtags( $text );
6136  return $text;
6137  }
6138 
6145  public function testPst( $text, Title $title, ParserOptions $options ) {
6146  return $this->preSaveTransform( $text, $title, $options->getUser(), $options );
6147  }
6148 
6155  public function testPreprocess( $text, Title $title, ParserOptions $options ) {
6156  return $this->testSrvus( $text, $title, $options, self::OT_PREPROCESS );
6157  }
6158 
6175  public function markerSkipCallback( $s, $callback ) {
6176  $i = 0;
6177  $out = '';
6178  while ( $i < strlen( $s ) ) {
6179  $markerStart = strpos( $s, self::MARKER_PREFIX, $i );
6180  if ( $markerStart === false ) {
6181  $out .= call_user_func( $callback, substr( $s, $i ) );
6182  break;
6183  } else {
6184  $out .= call_user_func( $callback, substr( $s, $i, $markerStart - $i ) );
6185  $markerEnd = strpos( $s, self::MARKER_SUFFIX, $markerStart );
6186  if ( $markerEnd === false ) {
6187  $out .= substr( $s, $markerStart );
6188  break;
6189  } else {
6190  $markerEnd += strlen( self::MARKER_SUFFIX );
6191  $out .= substr( $s, $markerStart, $markerEnd - $markerStart );
6192  $i = $markerEnd;
6193  }
6194  }
6195  }
6196  return $out;
6197  }
6198 
6205  public function killMarkers( $text ) {
6206  return $this->mStripState->killMarkers( $text );
6207  }
6208 
6226  public function serializeHalfParsedText( $text ) {
6227  wfDeprecated( __METHOD__, '1.31' );
6228  $data = [
6229  'text' => $text,
6230  'version' => self::HALF_PARSED_VERSION,
6231  'stripState' => $this->mStripState->getSubState( $text ),
6232  'linkHolders' => $this->mLinkHolders->getSubArray( $text )
6233  ];
6234  return $data;
6235  }
6236 
6253  public function unserializeHalfParsedText( $data ) {
6254  wfDeprecated( __METHOD__, '1.31' );
6255  if ( !isset( $data['version'] ) || $data['version'] != self::HALF_PARSED_VERSION ) {
6256  throw new MWException( __METHOD__ . ': invalid version' );
6257  }
6258 
6259  # First, extract the strip state.
6260  $texts = [ $data['text'] ];
6261  $texts = $this->mStripState->merge( $data['stripState'], $texts );
6262 
6263  # Now renumber links
6264  $texts = $this->mLinkHolders->mergeForeign( $data['linkHolders'], $texts );
6265 
6266  # Should be good to go.
6267  return $texts[0];
6268  }
6269 
6280  public function isValidHalfParsedText( $data ) {
6281  wfDeprecated( __METHOD__, '1.31' );
6282  return isset( $data['version'] ) && $data['version'] == self::HALF_PARSED_VERSION;
6283  }
6284 
6294  public static function parseWidthParam( $value, $parseHeight = true ) {
6295  $parsedWidthParam = [];
6296  if ( $value === '' ) {
6297  return $parsedWidthParam;
6298  }
6299  $m = [];
6300  # (T15500) In both cases (width/height and width only),
6301  # permit trailing "px" for backward compatibility.
6302  if ( $parseHeight && preg_match( '/^([0-9]*)x([0-9]*)\s*(?:px)?\s*$/', $value, $m ) ) {
6303  $width = intval( $m[1] );
6304  $height = intval( $m[2] );
6305  $parsedWidthParam['width'] = $width;
6306  $parsedWidthParam['height'] = $height;
6307  } elseif ( preg_match( '/^[0-9]*\s*(?:px)?\s*$/', $value ) ) {
6308  $width = intval( $value );
6309  $parsedWidthParam['width'] = $width;
6310  }
6311  return $parsedWidthParam;
6312  }
6313 
6323  protected function lock() {
6324  if ( $this->mInParse ) {
6325  throw new MWException( "Parser state cleared while parsing. "
6326  . "Did you call Parser::parse recursively? Lock is held by: " . $this->mInParse );
6327  }
6328 
6329  // Save the backtrace when locking, so that if some code tries locking again,
6330  // we can print the lock owner's backtrace for easier debugging
6331  $e = new Exception;
6332  $this->mInParse = $e->getTraceAsString();
6333 
6334  $recursiveCheck = new ScopedCallback( function () {
6335  $this->mInParse = false;
6336  } );
6337 
6338  return $recursiveCheck;
6339  }
6340 
6351  public static function stripOuterParagraph( $html ) {
6352  $m = [];
6353  if ( preg_match( '/^<p>(.*)\n?<\/p>\n?$/sU', $html, $m ) && strpos( $m[1], '</p>' ) === false ) {
6354  $html = $m[1];
6355  }
6356 
6357  return $html;
6358  }
6359 
6370  public function getFreshParser() {
6371  if ( $this->mInParse ) {
6372  return $this->factory->create();
6373  } else {
6374  return $this;
6375  }
6376  }
6377 
6384  public function enableOOUI() {
6386  $this->mOutput->setEnableOOUI( true );
6387  }
6388 }
getRevisionObject()
Get the revision object for $this->mRevisionId.
Definition: Parser.php:5866
The wiki should then use memcached to cache various data To use multiple just add more items to the array To increase the weight of a make its entry a array("192.168.0.1:11211", 2))
extensionSubstitution( $params, $frame)
Return the text to be used for a given extension tag.
Definition: Parser.php:3966
getFunctionSynonyms()
Definition: Parser.php:5627
static armorFrenchSpaces( $text, $space='&#160;')
Armor French spaces with a replacement character.
Definition: Sanitizer.php:1171
static register( $parser)
$mAutonumber
Definition: Parser.php:184
$mPPNodeCount
Definition: Parser.php:198
replaceInternalLinks2(&$s)
Process [[ ]] wikilinks (RIL)
Definition: Parser.php:2191
bool string $mInParse
Recursive call protection.
Definition: Parser.php:256
const MARKER_PREFIX
Definition: Parser.php:135
deferred txt A few of the database updates required by various functions here can be deferred until after the result page is displayed to the user For updating the view updating the linked to tables after a etc PHP does not yet have any way to tell the server to actually return and disconnect while still running these but it might have such a feature in the future We handle these by creating a deferred update object and putting those objects on a global then executing the whole list after the page is displayed We don t do anything smart like collating updates to the same table or such because the list is almost always going to have just one item on if that
Definition: deferred.txt:11
wfUrlProtocols( $includeProtocolRelative=true)
Returns a regular expression of url protocols.
null means default in associative array form
Definition: hooks.txt:1982
setLinkID( $id)
Definition: Parser.php:914
null means default in associative array with keys and values unescaped Should be merged with default with a value of false meaning to suppress the attribute in associative array with keys and values unescaped noclasses just before the function returns a value If you return an< a > element with HTML attributes $attribs and contents $html will be returned If you return $ret will be returned and may include noclasses & $html
Definition: hooks.txt:1982
static tocLineEnd()
End a Table Of Contents line.
Definition: Linker.php:1612
deferred txt A few of the database updates required by various functions here can be deferred until after the result page is displayed to the user For updating the view updating the linked to tables after a etc PHP does not yet have any way to tell the server to actually return and disconnect while still running these but it might have such a feature in the future We handle these by creating a deferred update object and putting those objects on a global list
Definition: deferred.txt:11
wfEscapeWikiText( $text)
Escapes the given text so that it may be output using addWikiText() without any linking, formatting, etc.
cleanSig( $text, $parsing=false)
Clean up signature text.
Definition: Parser.php:4740
static factory( $url, array $options=null, $caller=__METHOD__)
Generate a new request object Deprecated:
$mTplRedirCache
Definition: Parser.php:200
LinkRenderer $mLinkRenderer
Definition: Parser.php:264
getRevisionUser()
Get the name of the user that edited the last revision.
Definition: Parser.php:5928
doMagicLinks( $text)
Replace special strings like "ISBN xxx" and "RFC xxx" with magic external links.
Definition: Parser.php:1510
parseLinkParameter( $value)
Parse the value of &#39;link&#39; parameter in image syntax ([[File:Foo.jpg|link=<value>]]).
Definition: Parser.php:5498
const OT_PREPROCESS
Definition: Defines.php:186
$data
Utility to generate mapping file used in mw.Title (phpCharToUpper.json)
static element( $element, $attribs=[], $contents='')
Identical to rawElement(), but HTML-escapes $contents (like Xml::element()).
Definition: Html.php:232
doHeadings( $text)
Parse headers and return html.
Definition: Parser.php:1688
either a plain
Definition: hooks.txt:2043
static tidy( $text)
Interface with Remex tidy.
Definition: MWTidy.php:42
$mDoubleUnderscores
Definition: Parser.php:200
SpecialPageFactory $specialPageFactory
Definition: Parser.php:276
MalformedTitleException is thrown when a TitleParser is unable to parse a title string.
killMarkers( $text)
Remove any strip markers found in the given text.
Definition: Parser.php:6205
static getExternalLinkRel( $url=false, $title=null)
Get the rel attribute for a particular external link.
Definition: Parser.php:1987
getSection( $text, $sectionId, $defaultText='')
This function returns the text of a section, specified by a number ($section).
Definition: Parser.php:5826
wfIsHHVM()
Check if we are running under HHVM.
MapCacheLRU null $currentRevisionCache
Definition: Parser.php:250
static getLocalInstance( $ts=false)
Get a timestamp instance in the server local timezone ($wgLocaltimezone)
bool $mFirstCall
Whether firstCallInit still needs to be called.
Definition: Parser.php:159
processing should stop and the error should be shown to the user * false
Definition: hooks.txt:187
const ID_PRIMARY
Tells escapeUrlForHtml() to encode the ID using the wiki&#39;s primary encoding.
Definition: Sanitizer.php:66
getRevisionTimestampSubstring( $start, $len, $mtts, $variable)
Definition: Parser.php:2936
nextLinkID()
Definition: Parser.php:907
getTemplateDom( $title)
Get the semi-parsed DOM representation of a template with a given title, and its redirect destination...
Definition: Parser.php:3580
Title( $x=null)
Accessor/mutator for the Title object.
Definition: Parser.php:846
const SPACE_NOT_NL
Definition: Parser.php:104
null means default in associative array with keys and values unescaped Should be merged with default with a value of false meaning to suppress the attribute in associative array with keys and values unescaped noclasses & $ret
Definition: hooks.txt:1982
static normalizeSubpageLink( $contextTitle, $target, &$text)
Definition: Linker.php:1403
Apache License January AND DISTRIBUTION Definitions License shall mean the terms and conditions for use
const OT_PLAIN
Definition: Parser.php:115
getTags()
Accessor.
Definition: Parser.php:5614
if(PHP_SAPI !='cli-server') if(!isset( $_SERVER['SCRIPT_FILENAME'])) $file
Definition: router.php:42
const OT_WIKI
Definition: Parser.php:112
div flags Integer display flags(NO_ACTION_LINK, NO_EXTRA_USER_LINKS) 'LogException' returning false will NOT prevent logging $e
Definition: hooks.txt:2159
User $mUser
Definition: Parser.php:207
initialiseVariables()
initialise the magic variables (like CURRENTMONTHNAME) and substitution modifiers ...
Definition: Parser.php:2964
static cleanUrl( $url)
Definition: Sanitizer.php:2030
static splitTrail( $trail)
Split a link trail, return the "inside" portion and the remainder of the trail as a two-element array...
Definition: Linker.php:1726
static isEnabled()
Definition: MWTidy.php:54
getFunctionHooks()
Get all registered function hook identifiers.
Definition: Parser.php:4996
getExternalLinkAttribs( $url)
Get an associative array of additional HTML attributes appropriate for a particular external link...
Definition: Parser.php:2008
callParserFunction( $frame, $function, array $args=[])
Call a parser function and return an array with text and flags.
Definition: Parser.php:3488
magicLinkCallback( $m)
Definition: Parser.php:1541
globals txt Globals are evil The original MediaWiki code relied on globals for processing context far too often MediaWiki development since then has been a story of slowly moving context out of global variables and into objects Storing processing context in object member variables allows those objects to be reused in a much more flexible way Consider the elegance of
database rows
Definition: globals.txt:10
wfHostname()
Fetch server name for use in error reporting etc.
braceSubstitution( $piece, $frame)
Return the text of a template, after recursively replacing any variables or templates within the temp...
Definition: Parser.php:3138
getFunctionLang()
Get a language object for use in parser functions such as {{FORMATNUM:}}.
Definition: Parser.php:922
globals will be eliminated from MediaWiki replaced by an application object which would be passed to constructors Whether that would be an convenient solution remains to be but certainly PHP makes such object oriented programming models easier than they were in previous versions For the time being MediaWiki programmers will have to work in an environment with some global context At the time of globals were initialised on startup by MediaWiki of these were configuration which are documented in DefaultSettings php There is no comprehensive documentation for the remaining however some of the most important ones are listed below They are typically initialised either in index php or in Setup php $wgTitle Title object created from the request URL $wgOut OutputPage object for HTTP response $wgUser User object for the user associated with the current request $wgLang Language object selected by user preferences $wgContLang Language object associated with the wiki being viewed $wgParser Parser object Parser extensions register their hooks here $wgRequest WebRequest object
Definition: globals.txt:25
preprocessToDom( $text, $flags=0)
Preprocess some wikitext and return the document tree.
Definition: Parser.php:2994
const TOC_START
Definition: Parser.php:138
wfGetDB( $db, $groups=[], $wiki=false)
Get a Database object.
SectionProfiler $mProfiler
Definition: Parser.php:259
static replaceMarkup( $search, $replace, $text)
More or less "markup-safe" str_replace() Ignores any instances of the separator inside <...
$sort
you don t have to do a grep find to see where the $wgReverseTitle variable is used
Definition: hooks.txt:115
null for the local wiki Added in
Definition: hooks.txt:1585
There are three types of nodes:
$mHeadings
Definition: Parser.php:200
$value
clearTagHooks()
Remove all tag hooks.
Definition: Parser.php:4911
const NS_SPECIAL
Definition: Defines.php:53
clearState()
Clear Parser state.
Definition: Parser.php:398
const EXT_LINK_ADDR
Definition: Parser.php:97
replaceExternalLinks( $text)
Replace external links (REL)
Definition: Parser.php:1909
either a unescaped string or a HtmlArmor object after in associative array form externallinks including delete and has completed for all link tables whether this was an auto creation use $formDescriptor instead default is conds Array Extra conditions for the No matching items in log is displayed if loglist is empty msgKey Array If you want a nice box with a set this to the key of the message First element is the message key
Definition: hooks.txt:2151
static factory( $mode=false, IContextSource $context=null)
Get a new image gallery.
stripSectionName( $text)
Strips a text string of wikitext for use in a section anchor.
Definition: Parser.php:6098
injection txt This is an overview of how MediaWiki makes use of dependency injection The design described here grew from the discussion of RFC T384 The term dependency this means that anything an object needs to operate should be injected from the the object itself should only know narrow no concrete implementation of the logic it relies on The requirement to inject everything typically results in an architecture that based on two main types of and essentially stateless service objects that use other service objects to operate on the value objects As of the beginning MediaWiki is only starting to use the DI approach Much of the code still relies on global state or direct resulting in a highly cyclical dependency MediaWikiServices
Definition: injection.txt:23
wfUrlProtocolsWithoutProtRel()
Like wfUrlProtocols(), but excludes &#39;//&#39; from the protocol list.
replaceLinkHolders(&$text, $options=0)
Replace "<!--LINK-->" link placeholders with actual links, in the buffer Placeholders created in Link...
Definition: Parser.php:5033
static statelessFetchRevision(Title $title, $parser=false)
Wrapper around Revision::newFromTitle to allow passing additional parameters without passing them on ...
Definition: Parser.php:3646
armorLinks( $text)
Insert a NOPARSE hacky thing into any inline links in a chunk that&#39;s going to go through further pars...
Definition: Parser.php:2527
static activeUsers()
Definition: SiteStats.php:130
$mLinkID
Definition: Parser.php:197
setHook( $tag, callable $callback)
Create an HTML-style tag, e.g.
Definition: Parser.php:4866
static createAssocArgs( $args)
Clean up argument array - refactored in 1.9 so parserfunctions can use it, too.
Definition: Parser.php:3068
LinkRendererFactory $linkRendererFactory
Definition: Parser.php:282
static makeMediaLinkFile(LinkTarget $title, $file, $html='')
Create a direct link to a given uploaded file.
Definition: Linker.php:779
$mGeneratedPPNodeCount
Definition: Parser.php:198
$mRevisionId
Definition: Parser.php:224
target page
static cleanSigInSig( $text)
Strip 3, 4 or 5 tildes out of signatures.
Definition: Parser.php:4777
getLinkRenderer()
Get a LinkRenderer instance to make links with.
Definition: Parser.php:990
const NS_TEMPLATE
Definition: Defines.php:74
see documentation in includes Linker php for Linker::makeImageLink & $time
Definition: hooks.txt:1799
static newFromTitle(LinkTarget $linkTarget, $id=0, $flags=0)
Load either the current, or a specified, revision that&#39;s attached to a given link target...
Definition: Revision.php:137
setTitle( $t)
Set the context title.
Definition: Parser.php:818
const NO_ARGS
fetchFileNoRegister( $title, $options=[])
Helper function for fetchFileAndTitle.
Definition: Parser.php:3827
see documentation in includes Linker php for Linker::makeImageLink or false for current used if you return false $parser
Definition: hooks.txt:1799
MagicWordArray $mVariables
Definition: Parser.php:166
This list may contain false positives That usually means there is additional text with links below the first Each row contains links to the first and second as well as the first line of the second redirect text
const SFH_NO_HASH
Definition: Parser.php:85
static setupOOUI( $skinName='default', $dir='ltr')
Helper function to setup the PHP implementation of OOUI to use in this request.
setTransparentTagHook( $tag, callable $callback)
As setHook(), but letting the contents be parsed.
Definition: Parser.php:4897
$mForceTocPosition
Definition: Parser.php:202
getRevisionId()
Get the ID of the revision we are parsing.
Definition: Parser.php:5856
const OT_PREPROCESS
Definition: Parser.php:113
limitationWarn( $limitationType, $current='', $max='')
Warn the user when a parser limitation is reached Will warn at most once the user per limitation type...
Definition: Parser.php:3116
guessLegacySectionNameFromWikiText( $text)
Same as guessSectionNameFromWikiText(), but produces legacy anchors instead, if possible.
Definition: Parser.php:6049
$mFunctionSynonyms
Definition: Parser.php:148
If you want to remove the page from your watchlist later
getPreSaveTransform()
Transform wiki markup when saving the page?
$mOutputType
Definition: Parser.php:221
interwikiTransclude( $title, $action)
Transclude an interwiki link.
Definition: Parser.php:3846
$mDefaultStripList
Definition: Parser.php:151
$mExtLinkBracketedRegex
Definition: Parser.php:173
static configuration should be added through ResourceLoaderGetConfigVars instead can be used to get the real title e g db for database replication lag or jobqueue for job queue size converted to pseudo seconds It is possible to add more fields and they will be returned to the user in the API response after the basic globals have been set but before ordinary actions take place $output
Definition: hooks.txt:2217
The index of the header message $result[1]=The index of the body text message $result[2 through n]=Parameters passed to body text message. Please note the header message cannot receive/use parameters. 'ImportHandleLogItemXMLTag':When parsing a XML tag in a log item. Return false to stop further processing of the tag $reader:XMLReader object $logInfo:Array of information 'ImportHandlePageXMLTag':When parsing a XML tag in a page. Return false to stop further processing of the tag $reader:XMLReader object & $pageInfo:Array of information 'ImportHandleRevisionXMLTag':When parsing a XML tag in a page revision. Return false to stop further processing of the tag $reader:XMLReader object $pageInfo:Array of page information $revisionInfo:Array of revision information 'ImportHandleToplevelXMLTag':When parsing a top level XML tag. Return false to stop further processing of the tag $reader:XMLReader object 'ImportHandleUnknownUser':When a user doesn 't exist locally, this hook is called to give extensions an opportunity to auto-create it. If the auto-creation is successful, return false. $name:User name 'ImportHandleUploadXMLTag':When parsing a XML tag in a file upload. Return false to stop further processing of the tag $reader:XMLReader object $revisionInfo:Array of information 'ImportLogInterwikiLink':Hook to change the interwiki link used in log entries and edit summaries for transwiki imports. & $fullInterwikiPrefix:Interwiki prefix, may contain colons. & $pageTitle:String that contains page title. 'ImportSources':Called when reading from the $wgImportSources configuration variable. Can be used to lazy-load the import sources list. & $importSources:The value of $wgImportSources. Modify as necessary. See the comment in DefaultSettings.php for the detail of how to structure this array. 'InfoAction':When building information to display on the action=info page. $context:IContextSource object & $pageInfo:Array of information 'InitializeArticleMaybeRedirect':MediaWiki check to see if title is a redirect. & $title:Title object for the current page & $request:WebRequest & $ignoreRedirect:boolean to skip redirect check & $target:Title/string of redirect target & $article:Article object 'InternalParseBeforeLinks':during Parser 's internalParse method before links but after nowiki/noinclude/includeonly/onlyinclude and other processings. & $parser:Parser object & $text:string containing partially parsed text & $stripState:Parser 's internal StripState object 'InternalParseBeforeSanitize':during Parser 's internalParse method just before the parser removes unwanted/dangerous HTML tags and after nowiki/noinclude/includeonly/onlyinclude and other processings. Ideal for syntax-extensions after template/parser function execution which respect nowiki and HTML-comments. & $parser:Parser object & $text:string containing partially parsed text & $stripState:Parser 's internal StripState object 'InterwikiLoadPrefix':When resolving if a given prefix is an interwiki or not. Return true without providing an interwiki to continue interwiki search. $prefix:interwiki prefix we are looking for. & $iwData:output array describing the interwiki with keys iw_url, iw_local, iw_trans and optionally iw_api and iw_wikiid. 'InvalidateEmailComplete':Called after a user 's email has been invalidated successfully. $user:user(object) whose email is being invalidated 'IRCLineURL':When constructing the URL to use in an IRC notification. Callee may modify $url and $query, URL will be constructed as $url . $query & $url:URL to index.php & $query:Query string $rc:RecentChange object that triggered url generation 'IsFileCacheable':Override the result of Article::isFileCacheable()(if true) & $article:article(object) being checked 'IsTrustedProxy':Override the result of IP::isTrustedProxy() & $ip:IP being check & $result:Change this value to override the result of IP::isTrustedProxy() 'IsUploadAllowedFromUrl':Override the result of UploadFromUrl::isAllowedUrl() $url:URL used to upload from & $allowed:Boolean indicating if uploading is allowed for given URL 'isValidEmailAddr':Override the result of Sanitizer::validateEmail(), for instance to return false if the domain name doesn 't match your organization. $addr:The e-mail address entered by the user & $result:Set this and return false to override the internal checks 'isValidPassword':Override the result of User::isValidPassword() $password:The password entered by the user & $result:Set this and return false to override the internal checks $user:User the password is being validated for 'Language::getMessagesFileName':$code:The language code or the language we 're looking for a messages file for & $file:The messages file path, you can override this to change the location. 'LanguageGetNamespaces':Provide custom ordering for namespaces or remove namespaces. Do not use this hook to add namespaces. Use CanonicalNamespaces for that. & $namespaces:Array of namespaces indexed by their numbers 'LanguageGetTranslatedLanguageNames':Provide translated language names. & $names:array of language code=> language name $code:language of the preferred translations 'LanguageLinks':Manipulate a page 's language links. This is called in various places to allow extensions to define the effective language links for a page. $title:The page 's Title. & $links:Array with elements of the form "language:title" in the order that they will be output. & $linkFlags:Associative array mapping prefixed links to arrays of flags. Currently unused, but planned to provide support for marking individual language links in the UI, e.g. for featured articles. 'LanguageSelector':Hook to change the language selector available on a page. $out:The output page. $cssClassName:CSS class name of the language selector. 'LinkBegin':DEPRECATED since 1.28! Use HtmlPageLinkRendererBegin instead. Used when generating internal and interwiki links in Linker::link(), before processing starts. Return false to skip default processing and return $ret. See documentation for Linker::link() for details on the expected meanings of parameters. $skin:the Skin object $target:the Title that the link is pointing to & $html:the contents that the< a > tag should have(raw HTML) $result
Definition: hooks.txt:1980
This document provides an overview of the usage of PageUpdater and that is
Definition: pageupdater.txt:3
makeKnownLinkHolder( $nt, $text='', $trail='', $prefix='')
Render a forced-blue link inline; protect against double expansion of URLs if we&#39;re in a mode that pr...
Definition: Parser.php:2503
if( $line===false) $args
Definition: cdb.php:64
static stripOuterParagraph( $html)
Strip outer.
Definition: Parser.php:6351
const ID_FALLBACK
Tells escapeUrlForHtml() to encode the ID using the fallback encoding, or return false if no fallback...
Definition: Sanitizer.php:74
usually copyright or history_copyright This message must be in HTML not wikitext & $link
Definition: hooks.txt:3050
The User object encapsulates all of the user-specific settings (user_id, name, rights, email address, options, last login time).
Definition: User.php:48
this hook is for auditing only or null if authentication failed before getting that far or null if we can t even determine that When $user is not it can be in the form of< username >< more info > e g for bot passwords intended to be added to log contexts Fields it might only if the login was with a bot password it is not rendered in wiki pages or galleries in category pages allow injecting custom HTML after the section Any uses of the hook need to handle escaping see BaseTemplate::getToolbox and BaseTemplate::makeListItem for details on the format of individual items inside of this array or by returning and letting standard HTTP rendering take place modifiable or by returning false and taking over the output modifiable modifiable after all normalizations have been except for the $wgMaxImageArea check set to true or false to override the $wgMaxImageArea check result gives extension the possibility to transform it themselves $handler
Definition: hooks.txt:780
static getInstance( $ts=false)
Get a timestamp instance in GMT.
Definition: MWTimestamp.php:39
this hook is for auditing only or null if authentication failed before getting that far or null if we can t even determine that When $user is not it can be in the form of< username >< more info > e g for bot passwords intended to be added to log contexts Fields it might only if the login was with a bot password it is not rendered in wiki pages or galleries in category pages allow injecting custom HTML after the section Any uses of the hook need to handle escaping see BaseTemplate::getToolbox and BaseTemplate::makeListItem for details on the format of individual items inside of this array or by returning and letting standard HTTP rendering take place modifiable or by returning false and taking over the output $out
Definition: hooks.txt:780
static numberingroup( $group)
Find the number of users in a given user group.
Definition: SiteStats.php:150
stripAltText( $caption, $holders)
Definition: Parser.php:5528
setDefaultSort( $sort)
Mutator for $mDefaultSort.
Definition: Parser.php:5969
$wgNoFollowNsExceptions
Namespaces in which $wgNoFollowLinks doesn&#39;t apply.
static makeExternalImage( $url, $alt='')
Return the code for images which were added via external links, via Parser::maybeMakeExternalImage()...
Definition: Linker.php:252
Custom PHP profiler for parser/DB type section names that xhprof/xdebug can&#39;t handle.
static edits()
Definition: SiteStats.php:94
Status::newGood()` to allow deletion, and then `return false` from the hook function. Ensure you consume the 'ChangeTagAfterDelete' hook to carry out custom deletion actions. $tag:name of the tag $user:user initiating the action & $status:Status object. See above. 'ChangeTagsListActive':Allows you to nominate which of the tags your extension uses are in active use. & $tags:list of all active tags. Append to this array. 'ChangeTagsAfterUpdateTags':Called after tags have been updated with the ChangeTags::updateTags function. Params:$addedTags:tags effectively added in the update $removedTags:tags effectively removed in the update $prevTags:tags that were present prior to the update $rc_id:recentchanges table id $rev_id:revision table id $log_id:logging table id $params:tag params $rc:RecentChange being tagged when the tagging accompanies the action, or null $user:User who performed the tagging when the tagging is subsequent to the action, or null 'ChangeTagsAllowedAdd':Called when checking if a user can add tags to a change. & $allowedTags:List of all the tags the user is allowed to add. Any tags the user wants to add( $addTags) that are not in this array will cause it to fail. You may add or remove tags to this array as required. $addTags:List of tags user intends to add. $user:User who is adding the tags. 'ChangeUserGroups':Called before user groups are changed. $performer:The User who will perform the change $user:The User whose groups will be changed & $add:The groups that will be added & $remove:The groups that will be removed 'Collation::factory':Called if $wgCategoryCollation is an unknown collation. $collationName:Name of the collation in question & $collationObject:Null. Replace with a subclass of the Collation class that implements the collation given in $collationName. 'ConfirmEmailComplete':Called after a user 's email has been confirmed successfully. $user:user(object) whose email is being confirmed 'ContentAlterParserOutput':Modify parser output for a given content object. Called by Content::getParserOutput after parsing has finished. Can be used for changes that depend on the result of the parsing but have to be done before LinksUpdate is called(such as adding tracking categories based on the rendered HTML). $content:The Content to render $title:Title of the page, as context $parserOutput:ParserOutput to manipulate 'ContentGetParserOutput':Customize parser output for a given content object, called by AbstractContent::getParserOutput. May be used to override the normal model-specific rendering of page content. $content:The Content to render $title:Title of the page, as context $revId:The revision ID, as context $options:ParserOptions for rendering. To avoid confusing the parser cache, the output can only depend on parameters provided to this hook function, not on global state. $generateHtml:boolean, indicating whether full HTML should be generated. If false, generation of HTML may be skipped, but other information should still be present in the ParserOutput object. & $output:ParserOutput, to manipulate or replace 'ContentHandlerDefaultModelFor':Called when the default content model is determined for a given title. May be used to assign a different model for that title. $title:the Title in question & $model:the model name. Use with CONTENT_MODEL_XXX constants. 'ContentHandlerForModelID':Called when a ContentHandler is requested for a given content model name, but no entry for that model exists in $wgContentHandlers. Note:if your extension implements additional models via this hook, please use GetContentModels hook to make them known to core. $modeName:the requested content model name & $handler:set this to a ContentHandler object, if desired. 'ContentModelCanBeUsedOn':Called to determine whether that content model can be used on a given page. This is especially useful to prevent some content models to be used in some special location. $contentModel:ID of the content model in question $title:the Title in question. & $ok:Output parameter, whether it is OK to use $contentModel on $title. Handler functions that modify $ok should generally return false to prevent further hooks from further modifying $ok. 'ContribsPager::getQueryInfo':Before the contributions query is about to run & $pager:Pager object for contributions & $queryInfo:The query for the contribs Pager 'ContribsPager::reallyDoQuery':Called before really executing the query for My Contributions & $data:an array of results of all contribs queries $pager:The ContribsPager object hooked into $offset:Index offset, inclusive $limit:Exact query limit $descending:Query direction, false for ascending, true for descending 'ContributionsLineEnding':Called before a contributions HTML line is finished $page:SpecialPage object for contributions & $ret:the HTML line $row:the DB row for this line & $classes:the classes to add to the surrounding< li > & $attribs:associative array of other HTML attributes for the< li > element. Currently only data attributes reserved to MediaWiki are allowed(see Sanitizer::isReservedDataAttribute). 'ContributionsToolLinks':Change tool links above Special:Contributions $id:User identifier $title:User page title & $tools:Array of tool links $specialPage:SpecialPage instance for context and services. Can be either SpecialContributions or DeletedContributionsPage. Extensions should type hint against a generic SpecialPage though. 'ConvertContent':Called by AbstractContent::convert when a conversion to another content model is requested. Handler functions that modify $result should generally return false to disable further attempts at conversion. $content:The Content object to be converted. $toModel:The ID of the content model to convert to. $lossy:boolean indicating whether lossy conversion is allowed. & $result:Output parameter, in case the handler function wants to provide a converted Content object. Note that $result->getContentModel() must return $toModel. 'ContentSecurityPolicyDefaultSource':Modify the allowed CSP load sources. This affects all directives except for the script directive. If you want to add a script source, see ContentSecurityPolicyScriptSource hook. & $defaultSrc:Array of Content-Security-Policy allowed sources $policyConfig:Current configuration for the Content-Security-Policy header $mode:ContentSecurityPolicy::REPORT_ONLY_MODE or ContentSecurityPolicy::FULL_MODE depending on type of header 'ContentSecurityPolicyDirectives':Modify the content security policy directives. Use this only if ContentSecurityPolicyDefaultSource and ContentSecurityPolicyScriptSource do not meet your needs. & $directives:Array of CSP directives $policyConfig:Current configuration for the CSP header $mode:ContentSecurityPolicy::REPORT_ONLY_MODE or ContentSecurityPolicy::FULL_MODE depending on type of header 'ContentSecurityPolicyScriptSource':Modify the allowed CSP script sources. Note that you also have to use ContentSecurityPolicyDefaultSource if you want non-script sources to be loaded from whatever you add. & $scriptSrc:Array of CSP directives $policyConfig:Current configuration for the CSP header $mode:ContentSecurityPolicy::REPORT_ONLY_MODE or ContentSecurityPolicy::FULL_MODE depending on type of header 'CustomEditor':When invoking the page editor Return true to allow the normal editor to be used, or false if implementing a custom editor, e.g. for a special namespace, etc. $article:Article being edited $user:User performing the edit 'DeletedContribsPager::reallyDoQuery':Called before really executing the query for Special:DeletedContributions Similar to ContribsPager::reallyDoQuery & $data:an array of results of all contribs queries $pager:The DeletedContribsPager object hooked into $offset:Index offset, inclusive $limit:Exact query limit $descending:Query direction, false for ascending, true for descending 'DeletedContributionsLineEnding':Called before a DeletedContributions HTML line is finished. Similar to ContributionsLineEnding $page:SpecialPage object for DeletedContributions & $ret:the HTML line $row:the DB row for this line & $classes:the classes to add to the surrounding< li > & $attribs:associative array of other HTML attributes for the< li > element. Currently only data attributes reserved to MediaWiki are allowed(see Sanitizer::isReservedDataAttribute). 'DeleteUnknownPreferences':Called by the cleanupPreferences.php maintenance script to build a WHERE clause with which to delete preferences that are not known about. This hook is used by extensions that have dynamically-named preferences that should not be deleted in the usual cleanup process. For example, the Gadgets extension creates preferences prefixed with 'gadget-', and so anything with that prefix is excluded from the deletion. &where:An array that will be passed as the $cond parameter to IDatabase::select() to determine what will be deleted from the user_properties table. $db:The IDatabase object, useful for accessing $db->buildLike() etc. 'DifferenceEngineAfterLoadNewText':called in DifferenceEngine::loadNewText() after the new revision 's content has been loaded into the class member variable $differenceEngine->mNewContent but before returning true from this function. $differenceEngine:DifferenceEngine object 'DifferenceEngineLoadTextAfterNewContentIsLoaded':called in DifferenceEngine::loadText() after the new revision 's content has been loaded into the class member variable $differenceEngine->mNewContent but before checking if the variable 's value is null. This hook can be used to inject content into said class member variable. $differenceEngine:DifferenceEngine object 'DifferenceEngineMarkPatrolledLink':Allows extensions to change the "mark as patrolled" link which is shown both on the diff header as well as on the bottom of a page, usually wrapped in a span element which has class="patrollink". $differenceEngine:DifferenceEngine object & $markAsPatrolledLink:The "mark as patrolled" link HTML(string) $rcid:Recent change ID(rc_id) for this change(int) 'DifferenceEngineMarkPatrolledRCID':Allows extensions to possibly change the rcid parameter. For example the rcid might be set to zero due to the user being the same as the performer of the change but an extension might still want to show it under certain conditions. & $rcid:rc_id(int) of the change or 0 $differenceEngine:DifferenceEngine object $change:RecentChange object $user:User object representing the current user 'DifferenceEngineNewHeader':Allows extensions to change the $newHeader variable, which contains information about the new revision, such as the revision 's author, whether the revision was marked as a minor edit or not, etc. $differenceEngine:DifferenceEngine object & $newHeader:The string containing the various #mw-diff-otitle[1-5] divs, which include things like revision author info, revision comment, RevisionDelete link and more $formattedRevisionTools:Array containing revision tools, some of which may have been injected with the DiffRevisionTools hook $nextlink:String containing the link to the next revision(if any) $status
Definition: hooks.txt:1263
wfTimestamp( $outputtype=TS_UNIX, $ts=0)
Get a timestamp string in one of various formats.
startExternalParse(Title $title=null, ParserOptions $options, $outputType, $clearState=true)
Set up some variables which are usually set up in parse() so that an external function can call some ...
Definition: Parser.php:4791
fetchFileAndTitle( $title, $options=[])
Fetch a file and its title and register a reference to it.
Definition: Parser.php:3802
static validateTagAttributes( $attribs, $element)
Take an array of attribute names and values and normalize or discard illegal values for the given ele...
Definition: Sanitizer.php:791
const NO_TEMPLATES
static parseWidthParam( $value, $parseHeight=true)
Parsed a width param of imagelink like 300px or 200x300px.
Definition: Parser.php:6294
$mVarCache
Definition: Parser.php:152
disableCache()
Set a flag in the output object indicating that the content is dynamic and shouldn&#39;t be cached...
Definition: Parser.php:5586
$mRevisionObject
Definition: Parser.php:223
Title $mTitle
Definition: Parser.php:220
static stripAllTags( $html)
Take a fragment of (potentially invalid) HTML and return a version with any tags removed, encoded as plain text.
Definition: Sanitizer.php:1992
getPreloadText( $text, Title $title, ParserOptions $options, $params=[])
Process the wikitext for the "?preload=" feature.
Definition: Parser.php:788
makeImage( $title, $options, $holders=false)
Parse image options text and use it to make an image.
Definition: Parser.php:5278
__destruct()
Reduce memory usage to reduce the impact of circular references.
Definition: Parser.php:343
wfMatchesDomainList( $url, $domains)
Check whether a given URL has a domain that occurs in a given set of domains.
getRevisionTimestamp()
Get the timestamp associated with the current revision, adjusted for the default server-local timesta...
Definition: Parser.php:5908
wfUrlencode( $s)
We want some things to be included as literal characters in our title URLs for prettiness, which urlencode encodes by default.
getImageParams( $handler)
Definition: Parser.php:5224
fetchCurrentRevisionOfTitle( $title)
Fetch the current revision of a given title.
Definition: Parser.php:3623
either a unescaped string or a HtmlArmor object after in associative array form externallinks including delete and has completed for all link tables whether this was an auto creation use $formDescriptor instead default is conds Array Extra conditions for the No matching items in log is displayed if loglist is empty msgKey Array If you want a nice box with a set this to the key of the message First element is the message additional optional elements are parameters for the key that are processed with wfMessage() -> params() ->parseAsBlock() - offset Set to overwrite offset parameter in $wgRequest set to '' to unset offset - wrap String Wrap the message in html(usually something like "&lt
Factory for handling the special page list and generating SpecialPage objects.
static extractTagsAndParams( $elements, $text, &$matches)
Replaces all occurrences of HTML-style comments and the given tags in the text with a random marker a...
Definition: Parser.php:1041
$mRevIdForTs
Definition: Parser.php:228
Interface for configuration instances.
Definition: Config.php:28
setUser( $user)
Set the current user.
Definition: Parser.php:809
$mStripList
Definition: Parser.php:150
$mFunctionTagHooks
Definition: Parser.php:149
const OT_PLAIN
Definition: Defines.php:188
$wgNoFollowLinks
If true, external URL links in wiki text will be given the rel="nofollow" attribute as a hint to sear...
static makeSelfLinkObj( $nt, $html='', $query='', $trail='', $prefix='')
Make appropriate markup for a link to the current article.
Definition: Linker.php:168
static configuration should be added through ResourceLoaderGetConfigVars instead can be used to get the real title e g db for database replication lag or jobqueue for job queue size converted to pseudo seconds It is possible to add more fields and they will be returned to the user in the API response after the basic globals have been set but before ordinary actions take place or wrap services the preferred way to define a new service is the $wgServiceWiringFiles array $services
Definition: hooks.txt:2217
$mRevisionTimestamp
Definition: Parser.php:225
$mImageParams
Definition: Parser.php:153
makeLimitReport()
Set the limit report data in the current ParserOutput, and return the limit report HTML comment...
Definition: Parser.php:565
static escapeIdForAttribute( $id, $mode=self::ID_PRIMARY)
Given a section name or other user-generated or otherwise unsafe string, escapes it to be a valid HTM...
Definition: Sanitizer.php:1287
static newKnownCurrent(IDatabase $db, $pageIdOrTitle, $revId=0)
Load a revision based on a known page ID and current revision ID from the DB.
Definition: Revision.php:1327
const VERSION
Update this version number when the ParserOutput format changes in an incompatible way...
Definition: Parser.php:76
const OT_WIKI
Definition: Defines.php:185
Preprocessor $mPreprocessor
Definition: Parser.php:177
wfFindFile( $title, $options=[])
Find a file.
getPreprocessor()
Get a preprocessor object.
Definition: Parser.php:976
const NS_MEDIA
Definition: Defines.php:52
replaceSection( $oldText, $sectionId, $newText)
This function returns $oldtext after the content of the section specified by $section has been replac...
Definition: Parser.php:5842
static getVersion( $flags='', $lang=null)
Return a string of the MediaWiki version with Git revision if available.
static singleton()
Get a RepoGroup instance.
Definition: RepoGroup.php:61
static normalizeSectionName( $text)
Apply the same normalization as code making links to this section would.
Definition: Parser.php:6072
replaceTransparentTags( $text)
Replace transparent tags in $text with the values given by the callbacks.
Definition: Parser.php:5650
wfDebug( $text, $dest='all', array $context=[])
Sends a line to the debug log if enabled or, optionally, to a comment in output.
argSubstitution( $piece, $frame)
Triple brace replacement – used for template arguments.
Definition: Parser.php:3914
const RECOVER_ORIG
static normalizeUrlComponent( $component, $unsafe)
Definition: Parser.php:2096
static isValid( $ip)
Validate an IP address.
Definition: IP.php:111
StripState $mStripState
Definition: Parser.php:189
replaceVariables( $text, $frame=false, $argsOnly=false)
Replace magic variables, templates, and template arguments with the appropriate text.
Definition: Parser.php:3039
$mDefaultSort
Definition: Parser.php:199
getUser()
Get a User object either from $this->mUser, if set, or from the ParserOptions object otherwise...
Definition: Parser.php:964
wfTimestampNow()
Convenience function; returns MediaWiki timestamp for the present time.
static explode( $separator, $subject)
Workalike for explode() with limited memory usage.
getStripList()
Get a list of strippable XML-like elements.
Definition: Parser.php:1104
const EXT_IMAGE_REGEX
Definition: Parser.php:100
startParse(Title $title=null, ParserOptions $options, $outputType, $clearState=true)
Definition: Parser.php:4803
$cache
Definition: mcc.php:33
static tocLine( $anchor, $tocline, $tocnumber, $level, $sectionIndex=false)
parameter level defines if we are on an indentation level
Definition: Linker.php:1588
$params
replaceInternalLinks( $s)
Process [[ ]] wikilinks.
Definition: Parser.php:2178
const NS_CATEGORY
Definition: Defines.php:78
setFunctionHook( $id, callable $callback, $flags=0)
Create a function, e.g.
Definition: Parser.php:4960
null means default in associative array with keys and values unescaped Should be merged with default with a value of false meaning to suppress the attribute in associative array with keys and values unescaped & $options
Definition: hooks.txt:1982
doQuotes( $text)
Helper function for doAllQuotes()
Definition: Parser.php:1723
static makeExternalLink( $url, $text, $escape=true, $linktype='', $attribs=[], $title=null)
Make an external link.
Definition: Linker.php:846
extractSections( $text, $sectionId, $mode, $newText='')
Break wikitext input into sections, and either pull or replace some particular section&#39;s text...
Definition: Parser.php:5701
setOutputType( $ot)
Set the output type.
Definition: Parser.php:855
and(b) You must cause any modified files to carry prominent notices stating that You changed the files
getRevisionSize()
Get the size of the revision.
Definition: Parser.php:5948
$mImageParamsMagicArray
Definition: Parser.php:154
LinkHolderArray $mLinkHolders
Definition: Parser.php:195
$wgNoFollowDomainExceptions
If this is set to an array of domains, external links to these domain names (or any subdomains) will ...
null means default in associative array with keys and values unescaped Should be merged with default with a value of false meaning to suppress the attribute in associative array with keys and values unescaped noclasses just before the function returns a value If you return an< a > element with HTML attributes $attribs and contents $html will be returned If you return $ret will be returned and may include noclasses after processing & $attribs
Definition: hooks.txt:1982
deferred txt A few of the database updates required by various functions here can be deferred until after the result page is displayed to the user For updating the view updating the linked to tables after a save
Definition: deferred.txt:4
as see the revision history and available at free of to any person obtaining a copy of this software and associated documentation to deal in the Software without including without limitation the rights to and or sell copies of the and to permit persons to whom the Software is furnished to do so
Definition: LICENSE.txt:10
getMagicWordFactory()
Get the MagicWordFactory that this Parser is using.
Definition: Parser.php:1008
Some information about database access in MediaWiki By Tim January Database layout For information about the MediaWiki database such as a description of the tables and their please see
Definition: database.txt:2
static splitWhitespace( $s)
Return a three-element array: leading whitespace, string contents, trailing whitespace.
Definition: Parser.php:3006
this hook is for auditing only or null if authentication failed before getting that far or null if we can t even determine that When $user is not null
Definition: hooks.txt:780
getTargetLanguage()
Get the target language for the content being parsed.
Definition: Parser.php:935
$buffer
static decodeTagAttributes( $text)
Return an associative array of attribute names and values from a partial tag string.
Definition: Sanitizer.php:1442
internalParseHalfParsed( $text, $isMain=true, $linestart=true)
Helper function for parse() that transforms half-parsed HTML into fully parsed HTML.
Definition: Parser.php:1413
namespace and then decline to actually register it file or subcat img or subcat $title
Definition: hooks.txt:925
getConverterLanguage()
Get the language object for language conversion.
Definition: Parser.php:954
$mInputSize
Definition: Parser.php:229
magicword txt Magic Words are some phrases used in the wikitext They are used for two things
Definition: magicword.txt:4
formatHeadings( $text, $origText, $isMain=true)
This function accomplishes several tasks: 1) Auto-number headings if that option is enabled 2) Add an...
Definition: Parser.php:4168
getUserSig(&$user, $nickname=false, $fancySig=null)
Fetch the user&#39;s signature text, if any, and normalize to validated, ready-to-insert wikitext...
Definition: Parser.php:4679
const HALF_PARSED_VERSION
Update this version number when the output of serialiseHalfParsedText() changes in an incompatible wa...
Definition: Parser.php:82
const NS_FILE
Definition: Defines.php:70
firstCallInit()
Do various kinds of initialisation on the first call of the parser.
Definition: Parser.php:378
static makeAnchor( $sectionName)
Definition: Parser.php:6009
static getTitleFor( $name, $subpage=false, $fragment='')
Get a localised Title object for a specified special page name If you don&#39;t need a full Title object...
Definition: SpecialPage.php:82
static normalizeCharReferences( $text)
Ensure that any entities and character references are legal for XML and XHTML specifically.
Definition: Sanitizer.php:1561
const PTD_FOR_INCLUSION
Definition: Parser.php:107
isValidHalfParsedText( $data)
Returns true if the given array, presumed to be generated by serializeHalfParsedText(), is compatible with the current version of the parser.
Definition: Parser.php:6280
null means default in associative array with keys and values unescaped Should be merged with default with a value of false meaning to suppress the attribute in associative array with keys and values unescaped broken
Definition: hooks.txt:1982
doDoubleUnderscore( $text)
Strip double-underscore items like NOGALLERY and NOTOC Fills $this->mDoubleUnderscores, returns the modified text.
Definition: Parser.php:4095
presenting them properly to the user as errors is done by the caller return true use this to change the list i e etc $rev
Definition: hooks.txt:1766
renderImageGallery( $text, $params)
Renders an image gallery from a text with one line per image.
Definition: Parser.php:5061
This document is intended to provide useful advice for parties seeking to redistribute MediaWiki to end users It s targeted particularly at maintainers for Linux since it s been observed that distribution packages of MediaWiki often break We ve consistently had to recommend that users seeking support use official tarballs instead of their distribution s and this often solves whatever problem the user is having It would be nice if this could such as
Definition: distributors.txt:9
$mTagHooks
Definition: Parser.php:145
NamespaceInfo $nsInfo
Definition: Parser.php:285
fetchTemplateAndTitle( $title)
Fetch the unparsed text of a template and register a reference to it.
Definition: Parser.php:3657
const NS_MEDIAWIKI
Definition: Defines.php:72
MagicWordFactory $magicWordFactory
Definition: Parser.php:267
if(defined( 'MW_SETUP_CALLBACK')) $fname
Customization point after all loading (constants, functions, classes, DefaultSettings, LocalSettings).
Definition: Setup.php:123
static doBlockLevels( $text, $lineStart)
Make lists from lines starting with &#39;:&#39;, &#39;*&#39;, &#39;#&#39;, etc.
enableOOUI()
Set&#39;s up the PHP implementation of OOUI for use in this request and instructs OutputPage to enable OO...
Definition: Parser.php:6384
testSrvus( $text, Title $title, ParserOptions $options, $outputType=self::OT_HTML)
strip/replaceVariables/unstrip for preprocessor regression testing
Definition: Parser.php:6127
areSubpagesAllowed()
Return true if subpage links should be expanded on this page.
Definition: Parser.php:2536
const OT_HTML
Definition: Defines.php:184
addTrackingCategory( $msg)
Definition: Parser.php:4148
static images()
Definition: SiteStats.php:139
$mTransparentTagHooks
Definition: Parser.php:146
$mExpensiveFunctionCount
Definition: Parser.php:201
$mUrlProtocols
Definition: Parser.php:173
static isWellFormedXmlFragment( $text)
Check if a string is a well-formed XML fragment.
Definition: Xml.php:730
$mConf
Definition: Parser.php:173
static makeTitle( $ns, $title, $fragment='', $interwiki='')
Create a new Title from a namespace index and a DB key.
Definition: Title.php:589
makeFreeExternalLink( $url, $numPostProto)
Make a free external link, given a user-supplied URL.
Definition: Parser.php:1613
markerSkipCallback( $s, $callback)
Call a callback function on all regions of the given text that are not inside strip markers...
Definition: Parser.php:6175