MediaWiki  master
Parser.php
Go to the documentation of this file.
1 <?php
30 use Psr\Log\LoggerInterface;
31 use Psr\Log\NullLogger;
32 use Wikimedia\IPUtils;
33 use Wikimedia\ScopedCallback;
34 
75 class Parser {
81  const VERSION = '1.6.4';
82 
83  # Flags for Parser::setFunctionHook
84  const SFH_NO_HASH = 1;
85  const SFH_OBJECT_ARGS = 2;
86 
87  # Constants needed for external link processing
88  # Everything except bracket, space, or control characters
89  # \p{Zs} is unicode 'separator, space' category. It covers the space 0x20
90  # as well as U+3000 is IDEOGRAPHIC SPACE for T21052
91  # \x{FFFD} is the Unicode replacement character, which the HTML5 spec
92  # uses to replace invalid HTML characters.
93  const EXT_LINK_URL_CLASS = '[^][<>"\\x00-\\x20\\x7F\p{Zs}\x{FFFD}]';
94  # Simplified expression to match an IPv4 or IPv6 address, or
95  # at least one character of a host name (embeds EXT_LINK_URL_CLASS)
96  const EXT_LINK_ADDR = '(?:[0-9.]+|\\[(?i:[0-9a-f:.]+)\\]|[^][<>"\\x00-\\x20\\x7F\p{Zs}\x{FFFD}])';
97  # RegExp to make image URLs (embeds IPv6 part of EXT_LINK_ADDR)
98  // phpcs:ignore Generic.Files.LineLength
99  const EXT_IMAGE_REGEX = '/^(http:\/\/|https:\/\/)((?:\\[(?i:[0-9a-f:.]+)\\])?[^][<>"\\x00-\\x20\\x7F\p{Zs}\x{FFFD}]+)
100  \\/([A-Za-z0-9_.,~%\\-+&;#*?!=()@\\x80-\\xFF]+)\\.((?i)gif|png|jpg|jpeg)$/Sxu';
101 
102  # Regular expression for a non-newline space
103  const SPACE_NOT_NL = '(?:\t|&nbsp;|&\#0*160;|&\#[Xx]0*[Aa]0;|\p{Zs})';
104 
105  # Flags for preprocessToDom
106  const PTD_FOR_INCLUSION = 1;
107 
108  # Allowed values for $this->mOutputType
109  # Parameter to startExternalParse().
110  const OT_HTML = 1; # like parse()
111  const OT_WIKI = 2; # like preSaveTransform()
112  const OT_PREPROCESS = 3; # like preprocess()
113  const OT_MSG = 3;
114  const OT_PLAIN = 4; # like extractSections() - portions of the original are returned unchanged.
115 
133  const MARKER_SUFFIX = "-QINU`\"'\x7f";
134  const MARKER_PREFIX = "\x7f'\"`UNIQ-";
135 
136  # Markers used for wrapping the table of contents
137  const TOC_START = '<mw:toc>';
138  const TOC_END = '</mw:toc>';
139 
141  const MAX_TTS = 900;
142 
143  # Persistent:
144  public $mTagHooks = [];
145  public $mTransparentTagHooks = [];
146  public $mFunctionHooks = [];
147  public $mFunctionSynonyms = [ 0 => [], 1 => [] ];
148  public $mFunctionTagHooks = [];
149  public $mStripList = [];
150  public $mDefaultStripList = [];
151  public $mVarCache = [];
152  public $mImageParams = [];
153  public $mImageParamsMagicArray = [];
154  public $mMarkerIndex = 0;
158  public $mFirstCall = true;
159 
160  # Initialised by initializeVariables()
161 
165  public $mVariables;
166 
170  public $mSubstWords;
171 
176  public $mConf;
177 
178  # Initialised in constructor
179  public $mExtLinkBracketedRegex, $mUrlProtocols;
180 
181  # Initialized in getPreprocessor()
182 
183  public $mPreprocessor;
184 
185  # Cleared with clearState():
186 
189  public $mOutput;
190  public $mAutonumber;
191 
195  public $mStripState;
196 
197  public $mIncludeCount;
201  public $mLinkHolders;
202 
203  public $mLinkID;
204  public $mIncludeSizes, $mPPNodeCount;
209  public $mGeneratedPPNodeCount;
210  public $mHighestExpansionDepth;
211  public $mDefaultSort;
212  public $mTplRedirCache, $mHeadings, $mDoubleUnderscores;
213  public $mExpensiveFunctionCount; # number of expensive parser function calls
214  public $mShowToc, $mForceTocPosition;
216  public $mTplDomCache;
217 
221  public $mUser; # User object; only used when doing pre-save transform
222 
223  # Temporary
224  # These are variables reset at least once per parse regardless of $clearState
225 
229  public $mOptions;
230 
238  public $mTitle; # Title context, used for self-link rendering and similar things
239  public $mOutputType; # Output type, one of the OT_xxx constants
240  public $ot; # Shortcut alias, see setOutputType()
241  public $mRevisionObject; # The revision object of the specified revision ID
242  public $mRevisionId; # ID to display in {{REVISIONID}} tags
243  public $mRevisionTimestamp; # The timestamp of the specified revision ID
244  public $mRevisionUser; # User to display in {{REVISIONUSER}} tag
245  public $mRevisionSize; # Size to display in {{REVISIONSIZE}} variable
246  public $mRevIdForTs; # The revision ID which was used to fetch the timestamp
247  public $mInputSize = false; # For {{PAGESIZE}} on current page.
248 
254  public $mLangLinkLanguages;
255 
262  public $currentRevisionCache;
263 
268  public $mInParse = false;
269 
271  protected $mProfiler;
272 
276  protected $mLinkRenderer;
277 
279  private $magicWordFactory;
280 
282  private $contLang;
283 
285  private $factory;
286 
288  private $specialPageFactory;
289 
297  private $svcOptions;
298 
300  private $linkRendererFactory;
301 
303  private $nsInfo;
304 
306  private $logger;
307 
309  private $badFileLookup;
310 
315  public const CONSTRUCTOR_OPTIONS = [
316  // See $wgParserConf documentation
317  'class',
318  // See documentation for the corresponding config options
319  'ArticlePath',
320  'EnableScaryTranscluding',
321  'ExtraInterlanguageLinkPrefixes',
322  'FragmentMode',
323  'LanguageCode',
324  'MaxSigChars',
325  'MaxTocLevel',
326  'MiserMode',
327  'ScriptPath',
328  'Server',
329  'ServerName',
330  'ShowHostnames',
331  'Sitename',
332  'StylePath',
333  'TranscludeCacheExpiry',
334  ];
335 
350  public function __construct(
351  $svcOptions = null,
352  MagicWordFactory $magicWordFactory = null,
353  Language $contLang = null,
354  ParserFactory $factory = null,
355  $urlProtocols = null,
356  SpecialPageFactory $spFactory = null,
357  $linkRendererFactory = null,
358  $nsInfo = null,
359  $logger = null,
360  BadFileLookup $badFileLookup = null
361  ) {
362  if ( !$svcOptions || is_array( $svcOptions ) ) {
363  // Pre-1.34 calling convention is the first parameter is just ParserConf, the seventh is
364  // Config, and the eighth is LinkRendererFactory.
365  $this->mConf = (array)$svcOptions;
366  if ( empty( $this->mConf['class'] ) ) {
367  $this->mConf['class'] = self::class;
368  }
369  $this->svcOptions = new ServiceOptions( self::CONSTRUCTOR_OPTIONS,
370  $this->mConf, func_num_args() > 6
371  ? func_get_arg( 6 ) : MediaWikiServices::getInstance()->getMainConfig()
372  );
373  $linkRendererFactory = func_num_args() > 7 ? func_get_arg( 7 ) : null;
374  $nsInfo = func_num_args() > 8 ? func_get_arg( 8 ) : null;
375  } else {
376  // New calling convention
377  $svcOptions->assertRequiredOptions( self::CONSTRUCTOR_OPTIONS );
378  // $this->mConf is public, so we'll keep the option there for
379  // compatibility until it's removed
380  $this->mConf = [
381  'class' => $svcOptions->get( 'class' ),
382  ];
383  $this->svcOptions = $svcOptions;
384  }
385 
386  $this->mUrlProtocols = $urlProtocols ?? wfUrlProtocols();
387  $this->mExtLinkBracketedRegex = '/\[(((?i)' . $this->mUrlProtocols . ')' .
388  self::EXT_LINK_ADDR .
389  self::EXT_LINK_URL_CLASS . '*)\p{Zs}*([^\]\\x00-\\x08\\x0a-\\x1F\\x{FFFD}]*?)\]/Su';
390 
391  $this->magicWordFactory = $magicWordFactory ??
392  MediaWikiServices::getInstance()->getMagicWordFactory();
393 
394  $this->contLang = $contLang ?? MediaWikiServices::getInstance()->getContentLanguage();
395 
396  $this->factory = $factory ?? MediaWikiServices::getInstance()->getParserFactory();
397  $this->specialPageFactory = $spFactory ??
398  MediaWikiServices::getInstance()->getSpecialPageFactory();
399  $this->linkRendererFactory = $linkRendererFactory ??
400  MediaWikiServices::getInstance()->getLinkRendererFactory();
401  $this->nsInfo = $nsInfo ?? MediaWikiServices::getInstance()->getNamespaceInfo();
402  $this->logger = $logger ?: new NullLogger();
403  $this->badFileLookup = $badFileLookup ??
404  MediaWikiServices::getInstance()->getBadFileLookup();
405  }
406 
410  public function __destruct() {
411  if ( isset( $this->mLinkHolders ) ) {
412  // @phan-suppress-next-line PhanTypeObjectUnsetDeclaredProperty
413  unset( $this->mLinkHolders );
414  }
415  // @phan-suppress-next-line PhanTypeSuspiciousNonTraversableForeach
416  foreach ( $this as $name => $value ) {
417  unset( $this->$name );
418  }
419  }
420 
424  public function __clone() {
425  $this->mInParse = false;
426 
427  // T58226: When you create a reference "to" an object field, that
428  // makes the object field itself be a reference too (until the other
429  // reference goes out of scope). When cloning, any field that's a
430  // reference is copied as a reference in the new object. Both of these
431  // are defined PHP5 behaviors, as inconvenient as it is for us when old
432  // hooks from PHP4 days are passing fields by reference.
433  foreach ( [ 'mStripState', 'mVarCache' ] as $k ) {
434  // Make a non-reference copy of the field, then rebind the field to
435  // reference the new copy.
436  $tmp = $this->$k;
437  $this->$k =& $tmp;
438  unset( $tmp );
439  }
440 
441  Hooks::run( 'ParserCloned', [ $this ] );
442  }
443 
451  public static function getDefaultPreprocessorClass() {
452  wfDeprecated( __METHOD__, '1.34' );
453  return Preprocessor_Hash::class;
454  }
455 
459  public function firstCallInit() {
460  if ( !$this->mFirstCall ) {
461  return;
462  }
463  $this->mFirstCall = false;
464 
466  CoreTagHooks::register( $this );
467  $this->initializeVariables();
468 
469  // Avoid PHP 7.1 warning from passing $this by reference
470  $parser = $this;
471  Hooks::run( 'ParserFirstCallInit', [ &$parser ] );
472  }
473 
479  public function clearState() {
480  $this->firstCallInit();
481  $this->resetOutput();
482  $this->mAutonumber = 0;
483  $this->mIncludeCount = [];
484  $this->mLinkHolders = new LinkHolderArray( $this );
485  $this->mLinkID = 0;
486  $this->mRevisionObject = $this->mRevisionTimestamp =
487  $this->mRevisionId = $this->mRevisionUser = $this->mRevisionSize = null;
488  $this->mVarCache = [];
489  $this->mUser = null;
490  $this->mLangLinkLanguages = [];
491  $this->currentRevisionCache = null;
492 
493  $this->mStripState = new StripState( $this );
494 
495  # Clear these on every parse, T6549
496  $this->mTplRedirCache = $this->mTplDomCache = [];
497 
498  $this->mShowToc = true;
499  $this->mForceTocPosition = false;
500  $this->mIncludeSizes = [
501  'post-expand' => 0,
502  'arg' => 0,
503  ];
504  $this->mPPNodeCount = 0;
505  $this->mGeneratedPPNodeCount = 0;
506  $this->mHighestExpansionDepth = 0;
507  $this->mDefaultSort = false;
508  $this->mHeadings = [];
509  $this->mDoubleUnderscores = [];
510  $this->mExpensiveFunctionCount = 0;
511 
512  # Fix cloning
513  if ( isset( $this->mPreprocessor ) && $this->mPreprocessor->parser !== $this ) {
514  $this->mPreprocessor = null;
515  }
516 
517  $this->mProfiler = new SectionProfiler();
518 
519  // Avoid PHP 7.1 warning from passing $this by reference
520  $parser = $this;
521  Hooks::run( 'ParserClearState', [ &$parser ] );
522  }
523 
527  public function resetOutput() {
528  $this->mOutput = new ParserOutput;
529  $this->mOptions->registerWatcher( [ $this->mOutput, 'recordOption' ] );
530  }
531 
549  public function parse(
550  $text, Title $title, ParserOptions $options,
551  $linestart = true, $clearState = true, $revid = null
552  ) {
553  if ( $clearState ) {
554  // We use U+007F DELETE to construct strip markers, so we have to make
555  // sure that this character does not occur in the input text.
556  $text = strtr( $text, "\x7f", "?" );
557  $magicScopeVariable = $this->lock();
558  }
559  // Strip U+0000 NULL (T159174)
560  $text = str_replace( "\000", '', $text );
561 
562  $this->startParse( $title, $options, self::OT_HTML, $clearState );
563 
564  $this->currentRevisionCache = null;
565  $this->mInputSize = strlen( $text );
566  if ( $this->mOptions->getEnableLimitReport() ) {
567  $this->mOutput->resetParseStartTime();
568  }
569 
570  $oldRevisionId = $this->mRevisionId;
571  $oldRevisionObject = $this->mRevisionObject;
572  $oldRevisionTimestamp = $this->mRevisionTimestamp;
573  $oldRevisionUser = $this->mRevisionUser;
574  $oldRevisionSize = $this->mRevisionSize;
575  if ( $revid !== null ) {
576  $this->mRevisionId = $revid;
577  $this->mRevisionObject = null;
578  $this->mRevisionTimestamp = null;
579  $this->mRevisionUser = null;
580  $this->mRevisionSize = null;
581  }
582 
583  // Avoid PHP 7.1 warning from passing $this by reference
584  $parser = $this;
585  Hooks::run( 'ParserBeforeStrip', [ &$parser, &$text, &$this->mStripState ] );
586  # No more strip!
587  Hooks::run( 'ParserAfterStrip', [ &$parser, &$text, &$this->mStripState ] );
588  $text = $this->internalParse( $text );
589  Hooks::run( 'ParserAfterParse', [ &$parser, &$text, &$this->mStripState ] );
590 
591  $text = $this->internalParseHalfParsed( $text, true, $linestart );
592 
600  if ( !( $options->getDisableTitleConversion()
601  || isset( $this->mDoubleUnderscores['nocontentconvert'] )
602  || isset( $this->mDoubleUnderscores['notitleconvert'] )
603  || $this->mOutput->getDisplayTitle() !== false )
604  ) {
605  $convruletitle = $this->getTargetLanguage()->getConvRuleTitle();
606  if ( $convruletitle ) {
607  $this->mOutput->setTitleText( $convruletitle );
608  } else {
609  $titleText = $this->getTargetLanguage()->convertTitle( $title );
610  $this->mOutput->setTitleText( $titleText );
611  }
612  }
613 
614  # Compute runtime adaptive expiry if set
615  $this->mOutput->finalizeAdaptiveCacheExpiry();
616 
617  # Warn if too many heavyweight parser functions were used
618  if ( $this->mExpensiveFunctionCount > $this->mOptions->getExpensiveParserFunctionLimit() ) {
619  $this->limitationWarn( 'expensive-parserfunction',
620  $this->mExpensiveFunctionCount,
621  $this->mOptions->getExpensiveParserFunctionLimit()
622  );
623  }
624 
625  # Information on limits, for the benefit of users who try to skirt them
626  if ( $this->mOptions->getEnableLimitReport() ) {
627  $text .= $this->makeLimitReport();
628  }
629 
630  # Wrap non-interface parser output in a <div> so it can be targeted
631  # with CSS (T37247)
632  $class = $this->mOptions->getWrapOutputClass();
633  if ( $class !== false && !$this->mOptions->getInterfaceMessage() ) {
634  $this->mOutput->addWrapperDivClass( $class );
635  }
636 
637  $this->mOutput->setText( $text );
638 
639  $this->mRevisionId = $oldRevisionId;
640  $this->mRevisionObject = $oldRevisionObject;
641  $this->mRevisionTimestamp = $oldRevisionTimestamp;
642  $this->mRevisionUser = $oldRevisionUser;
643  $this->mRevisionSize = $oldRevisionSize;
644  $this->mInputSize = false;
645  $this->currentRevisionCache = null;
646 
647  return $this->mOutput;
648  }
649 
656  protected function makeLimitReport() {
657  $maxIncludeSize = $this->mOptions->getMaxIncludeSize();
658 
659  $cpuTime = $this->mOutput->getTimeSinceStart( 'cpu' );
660  if ( $cpuTime !== null ) {
661  $this->mOutput->setLimitReportData( 'limitreport-cputime',
662  sprintf( "%.3f", $cpuTime )
663  );
664  }
665 
666  $wallTime = $this->mOutput->getTimeSinceStart( 'wall' );
667  $this->mOutput->setLimitReportData( 'limitreport-walltime',
668  sprintf( "%.3f", $wallTime )
669  );
670 
671  $this->mOutput->setLimitReportData( 'limitreport-ppvisitednodes',
672  [ $this->mPPNodeCount, $this->mOptions->getMaxPPNodeCount() ]
673  );
674  $this->mOutput->setLimitReportData( 'limitreport-ppgeneratednodes',
675  [ $this->mGeneratedPPNodeCount, $this->mOptions->getMaxGeneratedPPNodeCount() ]
676  );
677  $this->mOutput->setLimitReportData( 'limitreport-postexpandincludesize',
678  [ $this->mIncludeSizes['post-expand'], $maxIncludeSize ]
679  );
680  $this->mOutput->setLimitReportData( 'limitreport-templateargumentsize',
681  [ $this->mIncludeSizes['arg'], $maxIncludeSize ]
682  );
683  $this->mOutput->setLimitReportData( 'limitreport-expansiondepth',
684  [ $this->mHighestExpansionDepth, $this->mOptions->getMaxPPExpandDepth() ]
685  );
686  $this->mOutput->setLimitReportData( 'limitreport-expensivefunctioncount',
687  [ $this->mExpensiveFunctionCount, $this->mOptions->getExpensiveParserFunctionLimit() ]
688  );
689 
690  foreach ( $this->mStripState->getLimitReport() as list( $key, $value ) ) {
691  $this->mOutput->setLimitReportData( $key, $value );
692  }
693 
694  Hooks::run( 'ParserLimitReportPrepare', [ $this, $this->mOutput ] );
695 
696  $limitReport = "NewPP limit report\n";
697  if ( $this->svcOptions->get( 'ShowHostnames' ) ) {
698  $limitReport .= 'Parsed by ' . wfHostname() . "\n";
699  }
700  $limitReport .= 'Cached time: ' . $this->mOutput->getCacheTime() . "\n";
701  $limitReport .= 'Cache expiry: ' . $this->mOutput->getCacheExpiry() . "\n";
702  $limitReport .= 'Dynamic content: ' .
703  ( $this->mOutput->hasDynamicContent() ? 'true' : 'false' ) .
704  "\n";
705  $limitReport .= 'Complications: [' . implode( ', ', $this->mOutput->getAllFlags() ) . "]\n";
706 
707  foreach ( $this->mOutput->getLimitReportData() as $key => $value ) {
708  if ( Hooks::run( 'ParserLimitReportFormat',
709  [ $key, &$value, &$limitReport, false, false ]
710  ) ) {
711  $keyMsg = wfMessage( $key )->inLanguage( 'en' )->useDatabase( false );
712  $valueMsg = wfMessage( [ "$key-value-text", "$key-value" ] )
713  ->inLanguage( 'en' )->useDatabase( false );
714  if ( !$valueMsg->exists() ) {
715  $valueMsg = new RawMessage( '$1' );
716  }
717  if ( !$keyMsg->isDisabled() && !$valueMsg->isDisabled() ) {
718  $valueMsg->params( $value );
719  $limitReport .= "{$keyMsg->text()}: {$valueMsg->text()}\n";
720  }
721  }
722  }
723  // Since we're not really outputting HTML, decode the entities and
724  // then re-encode the things that need hiding inside HTML comments.
725  $limitReport = htmlspecialchars_decode( $limitReport );
726 
727  // Sanitize for comment. Note '‐' in the replacement is U+2010,
728  // which looks much like the problematic '-'.
729  $limitReport = str_replace( [ '-', '&' ], [ '‐', '&amp;' ], $limitReport );
730  $text = "\n<!-- \n$limitReport-->\n";
731 
732  // Add on template profiling data in human/machine readable way
733  $dataByFunc = $this->mProfiler->getFunctionStats();
734  uasort( $dataByFunc, function ( $a, $b ) {
735  return $b['real'] <=> $a['real']; // descending order
736  } );
737  $profileReport = [];
738  foreach ( array_slice( $dataByFunc, 0, 10 ) as $item ) {
739  $profileReport[] = sprintf( "%6.2f%% %8.3f %6d %s",
740  $item['%real'], $item['real'], $item['calls'],
741  htmlspecialchars( $item['name'] ) );
742  }
743  $text .= "<!--\nTransclusion expansion time report (%,ms,calls,template)\n";
744  $text .= implode( "\n", $profileReport ) . "\n-->\n";
745 
746  $this->mOutput->setLimitReportData( 'limitreport-timingprofile', $profileReport );
747 
748  // Add other cache related metadata
749  if ( $this->svcOptions->get( 'ShowHostnames' ) ) {
750  $this->mOutput->setLimitReportData( 'cachereport-origin', wfHostname() );
751  }
752  $this->mOutput->setLimitReportData( 'cachereport-timestamp',
753  $this->mOutput->getCacheTime() );
754  $this->mOutput->setLimitReportData( 'cachereport-ttl',
755  $this->mOutput->getCacheExpiry() );
756  $this->mOutput->setLimitReportData( 'cachereport-transientcontent',
757  $this->mOutput->hasDynamicContent() );
758 
759  if ( $this->mGeneratedPPNodeCount > $this->mOptions->getMaxGeneratedPPNodeCount() / 10 ) {
760  wfDebugLog( 'generated-pp-node-count', $this->mGeneratedPPNodeCount . ' ' .
761  $this->getTitle()->getPrefixedDBkey() );
762  }
763  return $text;
764  }
765 
790  public function recursiveTagParse( $text, $frame = false ) {
791  // Avoid PHP 7.1 warning from passing $this by reference
792  $parser = $this;
793  Hooks::run( 'ParserBeforeStrip', [ &$parser, &$text, &$this->mStripState ] );
794  Hooks::run( 'ParserAfterStrip', [ &$parser, &$text, &$this->mStripState ] );
795  $text = $this->internalParse( $text, false, $frame );
796  return $text;
797  }
798 
818  public function recursiveTagParseFully( $text, $frame = false ) {
819  $text = $this->recursiveTagParse( $text, $frame );
820  $text = $this->internalParseHalfParsed( $text, false );
821  return $text;
822  }
823 
843  public function parseExtensionTagAsTopLevelDoc( $text ) {
844  $text = $this->recursiveTagParse( $text );
845  $parser = $this;
846  Hooks::run( 'ParserAfterParse', [ &$parser, &$text, &$this->mStripState ] );
847  $text = $this->internalParseHalfParsed( $text, true );
848  return $text;
849  }
850 
862  public function preprocess( $text, ?Title $title,
863  ParserOptions $options, $revid = null, $frame = false
864  ) {
865  $magicScopeVariable = $this->lock();
866  $this->startParse( $title, $options, self::OT_PREPROCESS, true );
867  if ( $revid !== null ) {
868  $this->mRevisionId = $revid;
869  }
870  // Avoid PHP 7.1 warning from passing $this by reference
871  $parser = $this;
872  Hooks::run( 'ParserBeforeStrip', [ &$parser, &$text, &$this->mStripState ] );
873  Hooks::run( 'ParserAfterStrip', [ &$parser, &$text, &$this->mStripState ] );
874  $text = $this->replaceVariables( $text, $frame );
875  $text = $this->mStripState->unstripBoth( $text );
876  return $text;
877  }
878 
888  public function recursivePreprocess( $text, $frame = false ) {
889  $text = $this->replaceVariables( $text, $frame );
890  $text = $this->mStripState->unstripBoth( $text );
891  return $text;
892  }
893 
907  public function getPreloadText( $text, Title $title, ParserOptions $options, $params = [] ) {
908  $msg = new RawMessage( $text );
909  $text = $msg->params( $params )->plain();
910 
911  # Parser (re)initialisation
912  $magicScopeVariable = $this->lock();
913  $this->startParse( $title, $options, self::OT_PLAIN, true );
914 
916  $dom = $this->preprocessToDom( $text, self::PTD_FOR_INCLUSION );
917  $text = $this->getPreprocessor()->newFrame()->expand( $dom, $flags );
918  $text = $this->mStripState->unstripBoth( $text );
919  return $text;
920  }
921 
928  public function setUser( $user ) {
929  $this->mUser = $user;
930  }
931 
937  public function setTitle( Title $t = null ) {
938  if ( !$t ) {
939  $t = Title::makeTitle( NS_SPECIAL, 'Badtitle/Parser' );
940  }
941 
942  if ( $t->hasFragment() ) {
943  # Strip the fragment to avoid various odd effects
944  $this->mTitle = $t->createFragmentTarget( '' );
945  } else {
946  $this->mTitle = $t;
947  }
948  }
949 
955  public function getTitle() : Title {
956  return $this->mTitle;
957  }
958 
965  public function Title( Title $x = null ) : ?Title {
966  return wfSetVar( $this->mTitle, $x );
967  }
968 
974  public function setOutputType( $ot ) {
975  $this->mOutputType = $ot;
976  # Shortcut alias
977  $this->ot = [
978  'html' => $ot == self::OT_HTML,
979  'wiki' => $ot == self::OT_WIKI,
980  'pre' => $ot == self::OT_PREPROCESS,
981  'plain' => $ot == self::OT_PLAIN,
982  ];
983  }
984 
991  public function OutputType( $x = null ) {
992  return wfSetVar( $this->mOutputType, $x );
993  }
994 
1000  public function getOutput() {
1001  return $this->mOutput;
1002  }
1003 
1009  public function getOptions() {
1010  return $this->mOptions;
1011  }
1012 
1019  public function Options( $x = null ) {
1020  return wfSetVar( $this->mOptions, $x );
1021  }
1022 
1026  public function nextLinkID() {
1027  return $this->mLinkID++;
1028  }
1029 
1033  public function setLinkID( $id ) {
1034  $this->mLinkID = $id;
1035  }
1036 
1041  public function getFunctionLang() {
1042  return $this->getTargetLanguage();
1043  }
1044 
1053  public function getTargetLanguage() {
1054  $target = $this->mOptions->getTargetLanguage();
1055 
1056  if ( $target !== null ) {
1057  return $target;
1058  } elseif ( $this->mOptions->getInterfaceMessage() ) {
1059  return $this->mOptions->getUserLangObj();
1060  }
1061 
1062  return $this->getTitle()->getPageLanguage();
1063  }
1064 
1071  public function getUser() {
1072  if ( $this->mUser !== null ) {
1073  return $this->mUser;
1074  }
1075  return $this->mOptions->getUser();
1076  }
1077 
1083  public function getPreprocessor() {
1084  if ( !isset( $this->mPreprocessor ) ) {
1085  $this->mPreprocessor = new Preprocessor_Hash( $this );
1086  }
1087  return $this->mPreprocessor;
1088  }
1089 
1096  public function getLinkRenderer() {
1097  // XXX We make the LinkRenderer with current options and then cache it forever
1098  if ( !$this->mLinkRenderer ) {
1099  $this->mLinkRenderer = $this->linkRendererFactory->create();
1100  $this->mLinkRenderer->setStubThreshold(
1101  $this->getOptions()->getStubThreshold()
1102  );
1103  }
1104 
1105  return $this->mLinkRenderer;
1106  }
1107 
1114  public function getMagicWordFactory() {
1115  return $this->magicWordFactory;
1116  }
1117 
1124  public function getContentLanguage() {
1125  return $this->contLang;
1126  }
1127 
1147  public static function extractTagsAndParams( $elements, $text, &$matches ) {
1148  static $n = 1;
1149  $stripped = '';
1150  $matches = [];
1151 
1152  $taglist = implode( '|', $elements );
1153  $start = "/<($taglist)(\\s+[^>]*?|\\s*?)(\/?>)|<(!--)/i";
1154 
1155  while ( $text != '' ) {
1156  $p = preg_split( $start, $text, 2, PREG_SPLIT_DELIM_CAPTURE );
1157  $stripped .= $p[0];
1158  if ( count( $p ) < 5 ) {
1159  break;
1160  }
1161  if ( count( $p ) > 5 ) {
1162  # comment
1163  $element = $p[4];
1164  $attributes = '';
1165  $close = '';
1166  $inside = $p[5];
1167  } else {
1168  # tag
1169  list( , $element, $attributes, $close, $inside ) = $p;
1170  }
1171 
1172  $marker = self::MARKER_PREFIX . "-$element-" . sprintf( '%08X', $n++ ) . self::MARKER_SUFFIX;
1173  $stripped .= $marker;
1174 
1175  if ( $close === '/>' ) {
1176  # Empty element tag, <tag />
1177  $content = null;
1178  $text = $inside;
1179  $tail = null;
1180  } else {
1181  if ( $element === '!--' ) {
1182  $end = '/(-->)/';
1183  } else {
1184  $end = "/(<\\/$element\\s*>)/i";
1185  }
1186  $q = preg_split( $end, $inside, 2, PREG_SPLIT_DELIM_CAPTURE );
1187  $content = $q[0];
1188  if ( count( $q ) < 3 ) {
1189  # No end tag -- let it run out to the end of the text.
1190  $tail = '';
1191  $text = '';
1192  } else {
1193  list( , $tail, $text ) = $q;
1194  }
1195  }
1196 
1197  $matches[$marker] = [ $element,
1198  $content,
1199  Sanitizer::decodeTagAttributes( $attributes ),
1200  "<$element$attributes$close$content$tail" ];
1201  }
1202  return $stripped;
1203  }
1204 
1210  public function getStripList() {
1211  return $this->mStripList;
1212  }
1213 
1219  public function getStripState() {
1220  return $this->mStripState;
1221  }
1222 
1232  public function insertStripItem( $text ) {
1233  $marker = self::MARKER_PREFIX . "-item-{$this->mMarkerIndex}-" . self::MARKER_SUFFIX;
1234  $this->mMarkerIndex++;
1235  $this->mStripState->addGeneral( $marker, $text );
1236  return $marker;
1237  }
1238 
1245  private function handleTables( $text ) {
1246  $lines = StringUtils::explode( "\n", $text );
1247  $out = '';
1248  $td_history = []; # Is currently a td tag open?
1249  $last_tag_history = []; # Save history of last lag activated (td, th or caption)
1250  $tr_history = []; # Is currently a tr tag open?
1251  $tr_attributes = []; # history of tr attributes
1252  $has_opened_tr = []; # Did this table open a <tr> element?
1253  $indent_level = 0; # indent level of the table
1254 
1255  foreach ( $lines as $outLine ) {
1256  $line = trim( $outLine );
1257 
1258  if ( $line === '' ) { # empty line, go to next line
1259  $out .= $outLine . "\n";
1260  continue;
1261  }
1262 
1263  $first_character = $line[0];
1264  $first_two = substr( $line, 0, 2 );
1265  $matches = [];
1266 
1267  if ( preg_match( '/^(:*)\s*\{\|(.*)$/', $line, $matches ) ) {
1268  # First check if we are starting a new table
1269  $indent_level = strlen( $matches[1] );
1270 
1271  $attributes = $this->mStripState->unstripBoth( $matches[2] );
1272  $attributes = Sanitizer::fixTagAttributes( $attributes, 'table' );
1273 
1274  $outLine = str_repeat( '<dl><dd>', $indent_level ) . "<table{$attributes}>";
1275  array_push( $td_history, false );
1276  array_push( $last_tag_history, '' );
1277  array_push( $tr_history, false );
1278  array_push( $tr_attributes, '' );
1279  array_push( $has_opened_tr, false );
1280  } elseif ( count( $td_history ) == 0 ) {
1281  # Don't do any of the following
1282  $out .= $outLine . "\n";
1283  continue;
1284  } elseif ( $first_two === '|}' ) {
1285  # We are ending a table
1286  $line = '</table>' . substr( $line, 2 );
1287  $last_tag = array_pop( $last_tag_history );
1288 
1289  if ( !array_pop( $has_opened_tr ) ) {
1290  $line = "<tr><td></td></tr>{$line}";
1291  }
1292 
1293  if ( array_pop( $tr_history ) ) {
1294  $line = "</tr>{$line}";
1295  }
1296 
1297  if ( array_pop( $td_history ) ) {
1298  $line = "</{$last_tag}>{$line}";
1299  }
1300  array_pop( $tr_attributes );
1301  if ( $indent_level > 0 ) {
1302  $outLine = rtrim( $line ) . str_repeat( '</dd></dl>', $indent_level );
1303  } else {
1304  $outLine = $line;
1305  }
1306  } elseif ( $first_two === '|-' ) {
1307  # Now we have a table row
1308  $line = preg_replace( '#^\|-+#', '', $line );
1309 
1310  # Whats after the tag is now only attributes
1311  $attributes = $this->mStripState->unstripBoth( $line );
1312  $attributes = Sanitizer::fixTagAttributes( $attributes, 'tr' );
1313  array_pop( $tr_attributes );
1314  array_push( $tr_attributes, $attributes );
1315 
1316  $line = '';
1317  $last_tag = array_pop( $last_tag_history );
1318  array_pop( $has_opened_tr );
1319  array_push( $has_opened_tr, true );
1320 
1321  if ( array_pop( $tr_history ) ) {
1322  $line = '</tr>';
1323  }
1324 
1325  if ( array_pop( $td_history ) ) {
1326  $line = "</{$last_tag}>{$line}";
1327  }
1328 
1329  $outLine = $line;
1330  array_push( $tr_history, false );
1331  array_push( $td_history, false );
1332  array_push( $last_tag_history, '' );
1333  } elseif ( $first_character === '|'
1334  || $first_character === '!'
1335  || $first_two === '|+'
1336  ) {
1337  # This might be cell elements, td, th or captions
1338  if ( $first_two === '|+' ) {
1339  $first_character = '+';
1340  $line = substr( $line, 2 );
1341  } else {
1342  $line = substr( $line, 1 );
1343  }
1344 
1345  // Implies both are valid for table headings.
1346  if ( $first_character === '!' ) {
1347  $line = StringUtils::replaceMarkup( '!!', '||', $line );
1348  }
1349 
1350  # Split up multiple cells on the same line.
1351  # FIXME : This can result in improper nesting of tags processed
1352  # by earlier parser steps.
1353  $cells = explode( '||', $line );
1354 
1355  $outLine = '';
1356 
1357  # Loop through each table cell
1358  foreach ( $cells as $cell ) {
1359  $previous = '';
1360  if ( $first_character !== '+' ) {
1361  $tr_after = array_pop( $tr_attributes );
1362  if ( !array_pop( $tr_history ) ) {
1363  $previous = "<tr{$tr_after}>\n";
1364  }
1365  array_push( $tr_history, true );
1366  array_push( $tr_attributes, '' );
1367  array_pop( $has_opened_tr );
1368  array_push( $has_opened_tr, true );
1369  }
1370 
1371  $last_tag = array_pop( $last_tag_history );
1372 
1373  if ( array_pop( $td_history ) ) {
1374  $previous = "</{$last_tag}>\n{$previous}";
1375  }
1376 
1377  if ( $first_character === '|' ) {
1378  $last_tag = 'td';
1379  } elseif ( $first_character === '!' ) {
1380  $last_tag = 'th';
1381  } elseif ( $first_character === '+' ) {
1382  $last_tag = 'caption';
1383  } else {
1384  $last_tag = '';
1385  }
1386 
1387  array_push( $last_tag_history, $last_tag );
1388 
1389  # A cell could contain both parameters and data
1390  $cell_data = explode( '|', $cell, 2 );
1391 
1392  # T2553: Note that a '|' inside an invalid link should not
1393  # be mistaken as delimiting cell parameters
1394  # Bug T153140: Neither should language converter markup.
1395  if ( preg_match( '/\[\[|-\{/', $cell_data[0] ) === 1 ) {
1396  $cell = "{$previous}<{$last_tag}>" . trim( $cell );
1397  } elseif ( count( $cell_data ) == 1 ) {
1398  // Whitespace in cells is trimmed
1399  $cell = "{$previous}<{$last_tag}>" . trim( $cell_data[0] );
1400  } else {
1401  $attributes = $this->mStripState->unstripBoth( $cell_data[0] );
1402  $attributes = Sanitizer::fixTagAttributes( $attributes, $last_tag );
1403  // Whitespace in cells is trimmed
1404  $cell = "{$previous}<{$last_tag}{$attributes}>" . trim( $cell_data[1] );
1405  }
1406 
1407  $outLine .= $cell;
1408  array_push( $td_history, true );
1409  }
1410  }
1411  $out .= $outLine . "\n";
1412  }
1413 
1414  # Closing open td, tr && table
1415  while ( count( $td_history ) > 0 ) {
1416  if ( array_pop( $td_history ) ) {
1417  $out .= "</td>\n";
1418  }
1419  if ( array_pop( $tr_history ) ) {
1420  $out .= "</tr>\n";
1421  }
1422  if ( !array_pop( $has_opened_tr ) ) {
1423  $out .= "<tr><td></td></tr>\n";
1424  }
1425 
1426  $out .= "</table>\n";
1427  }
1428 
1429  # Remove trailing line-ending (b/c)
1430  if ( substr( $out, -1 ) === "\n" ) {
1431  $out = substr( $out, 0, -1 );
1432  }
1433 
1434  # special case: don't return empty table
1435  if ( $out === "<table>\n<tr><td></td></tr>\n</table>" ) {
1436  $out = '';
1437  }
1438 
1439  return $out;
1440  }
1441 
1455  public function internalParse( $text, $isMain = true, $frame = false ) {
1456  $origText = $text;
1457 
1458  // Avoid PHP 7.1 warning from passing $this by reference
1459  $parser = $this;
1460 
1461  # Hook to suspend the parser in this state
1462  if ( !Hooks::run( 'ParserBeforeInternalParse', [ &$parser, &$text, &$this->mStripState ] ) ) {
1463  return $text;
1464  }
1465 
1466  # if $frame is provided, then use $frame for replacing any variables
1467  if ( $frame ) {
1468  # use frame depth to infer how include/noinclude tags should be handled
1469  # depth=0 means this is the top-level document; otherwise it's an included document
1470  if ( !$frame->depth ) {
1471  $flag = 0;
1472  } else {
1473  $flag = self::PTD_FOR_INCLUSION;
1474  }
1475  $dom = $this->preprocessToDom( $text, $flag );
1476  $text = $frame->expand( $dom );
1477  } else {
1478  # if $frame is not provided, then use old-style replaceVariables
1479  $text = $this->replaceVariables( $text );
1480  }
1481 
1482  Hooks::run( 'InternalParseBeforeSanitize', [ &$parser, &$text, &$this->mStripState ] );
1483  $text = Sanitizer::removeHTMLtags(
1484  $text,
1485  [ $this, 'attributeStripCallback' ],
1486  false,
1487  array_keys( $this->mTransparentTagHooks ),
1488  [],
1489  [ $this, 'addTrackingCategory' ]
1490  );
1491  Hooks::run( 'InternalParseBeforeLinks', [ &$parser, &$text, &$this->mStripState ] );
1492 
1493  # Tables need to come after variable replacement for things to work
1494  # properly; putting them before other transformations should keep
1495  # exciting things like link expansions from showing up in surprising
1496  # places.
1497  $text = $this->handleTables( $text );
1498 
1499  $text = preg_replace( '/(^|\n)-----*/', '\\1<hr />', $text );
1500 
1501  $text = $this->handleDoubleUnderscore( $text );
1502 
1503  $text = $this->handleHeadings( $text );
1504  $text = $this->handleInternalLinks( $text );
1505  $text = $this->handleAllQuotes( $text );
1506  $text = $this->handleExternalLinks( $text );
1507 
1508  # handleInternalLinks may sometimes leave behind
1509  # absolute URLs, which have to be masked to hide them from handleExternalLinks
1510  $text = str_replace( self::MARKER_PREFIX . 'NOPARSE', '', $text );
1511 
1512  $text = $this->handleMagicLinks( $text );
1513  $text = $this->finalizeHeadings( $text, $origText, $isMain );
1514 
1515  return $text;
1516  }
1517 
1527  private function internalParseHalfParsed( $text, $isMain = true, $linestart = true ) {
1528  $text = $this->mStripState->unstripGeneral( $text );
1529 
1530  // Avoid PHP 7.1 warning from passing $this by reference
1531  $parser = $this;
1532 
1533  # Clean up special characters, only run once, next-to-last before doBlockLevels
1534  $text = Sanitizer::armorFrenchSpaces( $text );
1535 
1536  $text = $this->doBlockLevels( $text, $linestart );
1537 
1538  $this->replaceLinkHoldersPrivate( $text );
1539 
1547  if ( !( $this->mOptions->getDisableContentConversion()
1548  || isset( $this->mDoubleUnderscores['nocontentconvert'] ) )
1549  && !$this->mOptions->getInterfaceMessage()
1550  ) {
1551  # The position of the convert() call should not be changed. it
1552  # assumes that the links are all replaced and the only thing left
1553  # is the <nowiki> mark.
1554  $text = $this->getTargetLanguage()->convert( $text );
1555  }
1556 
1557  $text = $this->mStripState->unstripNoWiki( $text );
1558 
1559  if ( $isMain ) {
1560  Hooks::run( 'ParserBeforeTidy', [ &$parser, &$text ] );
1561  }
1562 
1563  $text = $this->replaceTransparentTags( $text );
1564  $text = $this->mStripState->unstripGeneral( $text );
1565 
1566  $text = Sanitizer::normalizeCharReferences( $text );
1567 
1568  if ( MWTidy::isEnabled() ) {
1569  if ( $this->mOptions->getTidy() ) {
1570  $text = MWTidy::tidy( $text );
1571  }
1572  } else {
1573  # attempt to sanitize at least some nesting problems
1574  # (T4702 and quite a few others)
1575  # This code path is buggy and deprecated!
1576  wfDeprecated( 'disabling tidy', '1.33' );
1577  $tidyregs = [
1578  # ''Something [http://www.cool.com cool''] -->
1579  # <i>Something</i><a href="http://www.cool.com"..><i>cool></i></a>
1580  '/(<([bi])>)(<([bi])>)?([^<]*)(<\/?a[^<]*>)([^<]*)(<\/\\4>)?(<\/\\2>)/' =>
1581  '\\1\\3\\5\\8\\9\\6\\1\\3\\7\\8\\9',
1582  # fix up an anchor inside another anchor, only
1583  # at least for a single single nested link (T5695)
1584  '/(<a[^>]+>)([^<]*)(<a[^>]+>[^<]*)<\/a>(.*)<\/a>/' =>
1585  '\\1\\2</a>\\3</a>\\1\\4</a>',
1586  # fix div inside inline elements- doBlockLevels won't wrap a line which
1587  # contains a div, so fix it up here; replace
1588  # div with escaped text
1589  '/(<([aib]) [^>]+>)([^<]*)(<div([^>]*)>)(.*)(<\/div>)([^<]*)(<\/\\2>)/' =>
1590  '\\1\\3&lt;div\\5&gt;\\6&lt;/div&gt;\\8\\9',
1591  # remove empty italic or bold tag pairs, some
1592  # introduced by rules above
1593  '/<([bi])><\/\\1>/' => '',
1594  ];
1595 
1596  $text = preg_replace(
1597  array_keys( $tidyregs ),
1598  array_values( $tidyregs ),
1599  $text );
1600  }
1601 
1602  if ( $isMain ) {
1603  Hooks::run( 'ParserAfterTidy', [ &$parser, &$text ] );
1604  }
1605 
1606  return $text;
1607  }
1608 
1619  private function handleMagicLinks( $text ) {
1620  $prots = wfUrlProtocolsWithoutProtRel();
1621  $urlChar = self::EXT_LINK_URL_CLASS;
1622  $addr = self::EXT_LINK_ADDR;
1623  $space = self::SPACE_NOT_NL; # non-newline space
1624  $spdash = "(?:-|$space)"; # a dash or a non-newline space
1625  $spaces = "$space++"; # possessive match of 1 or more spaces
1626  $text = preg_replace_callback(
1627  '!(?: # Start cases
1628  (<a[ \t\r\n>].*?</a>) | # m[1]: Skip link text
1629  (<.*?>) | # m[2]: Skip stuff inside HTML elements' . "
1630  (\b # m[3]: Free external links
1631  (?i:$prots)
1632  ($addr$urlChar*) # m[4]: Post-protocol path
1633  ) |
1634  \b(?:RFC|PMID) $spaces # m[5]: RFC or PMID, capture number
1635  ([0-9]+)\b |
1636  \bISBN $spaces ( # m[6]: ISBN, capture number
1637  (?: 97[89] $spdash? )? # optional 13-digit ISBN prefix
1638  (?: [0-9] $spdash? ){9} # 9 digits with opt. delimiters
1639  [0-9Xx] # check digit
1640  )\b
1641  )!xu", [ $this, 'magicLinkCallback' ], $text );
1642  return $text;
1643  }
1644 
1650  public function magicLinkCallback( $m ) {
1651  if ( isset( $m[1] ) && $m[1] !== '' ) {
1652  # Skip anchor
1653  return $m[0];
1654  } elseif ( isset( $m[2] ) && $m[2] !== '' ) {
1655  # Skip HTML element
1656  return $m[0];
1657  } elseif ( isset( $m[3] ) && $m[3] !== '' ) {
1658  # Free external link
1659  return $this->makeFreeExternalLink( $m[0], strlen( $m[4] ) );
1660  } elseif ( isset( $m[5] ) && $m[5] !== '' ) {
1661  # RFC or PMID
1662  if ( substr( $m[0], 0, 3 ) === 'RFC' ) {
1663  if ( !$this->mOptions->getMagicRFCLinks() ) {
1664  return $m[0];
1665  }
1666  $keyword = 'RFC';
1667  $urlmsg = 'rfcurl';
1668  $cssClass = 'mw-magiclink-rfc';
1669  $trackingCat = 'magiclink-tracking-rfc';
1670  $id = $m[5];
1671  } elseif ( substr( $m[0], 0, 4 ) === 'PMID' ) {
1672  if ( !$this->mOptions->getMagicPMIDLinks() ) {
1673  return $m[0];
1674  }
1675  $keyword = 'PMID';
1676  $urlmsg = 'pubmedurl';
1677  $cssClass = 'mw-magiclink-pmid';
1678  $trackingCat = 'magiclink-tracking-pmid';
1679  $id = $m[5];
1680  } else {
1681  throw new MWException( __METHOD__ . ': unrecognised match type "' .
1682  substr( $m[0], 0, 20 ) . '"' );
1683  }
1684  $url = wfMessage( $urlmsg, $id )->inContentLanguage()->text();
1685  $this->addTrackingCategory( $trackingCat );
1686  return Linker::makeExternalLink(
1687  $url,
1688  "{$keyword} {$id}",
1689  true,
1690  $cssClass,
1691  [],
1692  $this->getTitle()
1693  );
1694  } elseif ( isset( $m[6] ) && $m[6] !== ''
1695  && $this->mOptions->getMagicISBNLinks()
1696  ) {
1697  # ISBN
1698  $isbn = $m[6];
1699  $space = self::SPACE_NOT_NL; # non-newline space
1700  $isbn = preg_replace( "/$space/", ' ', $isbn );
1701  $num = strtr( $isbn, [
1702  '-' => '',
1703  ' ' => '',
1704  'x' => 'X',
1705  ] );
1706  $this->addTrackingCategory( 'magiclink-tracking-isbn' );
1707  return $this->getLinkRenderer()->makeKnownLink(
1708  SpecialPage::getTitleFor( 'Booksources', $num ),
1709  "ISBN $isbn",
1710  [
1711  'class' => 'internal mw-magiclink-isbn',
1712  'title' => false // suppress title attribute
1713  ]
1714  );
1715  } else {
1716  return $m[0];
1717  }
1718  }
1719 
1729  public function makeFreeExternalLink( $url, $numPostProto ) {
1730  $trail = '';
1731 
1732  # The characters '<' and '>' (which were escaped by
1733  # removeHTMLtags()) should not be included in
1734  # URLs, per RFC 2396.
1735  # Make &nbsp; terminate a URL as well (bug T84937)
1736  $m2 = [];
1737  if ( preg_match(
1738  '/&(lt|gt|nbsp|#x0*(3[CcEe]|[Aa]0)|#0*(60|62|160));/',
1739  $url,
1740  $m2,
1741  PREG_OFFSET_CAPTURE
1742  ) ) {
1743  $trail = substr( $url, $m2[0][1] ) . $trail;
1744  $url = substr( $url, 0, $m2[0][1] );
1745  }
1746 
1747  # Move trailing punctuation to $trail
1748  $sep = ',;\.:!?';
1749  # If there is no left bracket, then consider right brackets fair game too
1750  if ( strpos( $url, '(' ) === false ) {
1751  $sep .= ')';
1752  }
1753 
1754  $urlRev = strrev( $url );
1755  $numSepChars = strspn( $urlRev, $sep );
1756  # Don't break a trailing HTML entity by moving the ; into $trail
1757  # This is in hot code, so use substr_compare to avoid having to
1758  # create a new string object for the comparison
1759  if ( $numSepChars && substr_compare( $url, ";", -$numSepChars, 1 ) === 0 ) {
1760  # more optimization: instead of running preg_match with a $
1761  # anchor, which can be slow, do the match on the reversed
1762  # string starting at the desired offset.
1763  # un-reversed regexp is: /&([a-z]+|#x[\da-f]+|#\d+)$/i
1764  if ( preg_match( '/\G([a-z]+|[\da-f]+x#|\d+#)&/i', $urlRev, $m2, 0, $numSepChars ) ) {
1765  $numSepChars--;
1766  }
1767  }
1768  if ( $numSepChars ) {
1769  $trail = substr( $url, -$numSepChars ) . $trail;
1770  $url = substr( $url, 0, -$numSepChars );
1771  }
1772 
1773  # Verify that we still have a real URL after trail removal, and
1774  # not just lone protocol
1775  if ( strlen( $trail ) >= $numPostProto ) {
1776  return $url . $trail;
1777  }
1778 
1779  $url = Sanitizer::cleanUrl( $url );
1780 
1781  # Is this an external image?
1782  $text = $this->maybeMakeExternalImage( $url );
1783  if ( $text === false ) {
1784  # Not an image, make a link
1785  $text = Linker::makeExternalLink( $url,
1786  $this->getTargetLanguage()->getConverter()->markNoConversion( $url ),
1787  true, 'free',
1788  $this->getExternalLinkAttribs( $url ), $this->getTitle() );
1789  # Register it in the output object...
1790  $this->mOutput->addExternalLink( $url );
1791  }
1792  return $text . $trail;
1793  }
1794 
1801  private function handleHeadings( $text ) {
1802  for ( $i = 6; $i >= 1; --$i ) {
1803  $h = str_repeat( '=', $i );
1804  // Trim non-newline whitespace from headings
1805  // Using \s* will break for: "==\n===\n" and parse as <h2>=</h2>
1806  $text = preg_replace( "/^(?:$h)[ \\t]*(.+?)[ \\t]*(?:$h)\\s*$/m", "<h$i>\\1</h$i>", $text );
1807  }
1808  return $text;
1809  }
1810 
1818  private function handleAllQuotes( $text ) {
1819  $outtext = '';
1820  $lines = StringUtils::explode( "\n", $text );
1821  foreach ( $lines as $line ) {
1822  $outtext .= $this->doQuotes( $line ) . "\n";
1823  }
1824  $outtext = substr( $outtext, 0, -1 );
1825  return $outtext;
1826  }
1827 
1836  public function doQuotes( $text ) {
1837  $arr = preg_split( "/(''+)/", $text, -1, PREG_SPLIT_DELIM_CAPTURE );
1838  $countarr = count( $arr );
1839  if ( $countarr == 1 ) {
1840  return $text;
1841  }
1842 
1843  // First, do some preliminary work. This may shift some apostrophes from
1844  // being mark-up to being text. It also counts the number of occurrences
1845  // of bold and italics mark-ups.
1846  $numbold = 0;
1847  $numitalics = 0;
1848  for ( $i = 1; $i < $countarr; $i += 2 ) {
1849  $thislen = strlen( $arr[$i] );
1850  // If there are ever four apostrophes, assume the first is supposed to
1851  // be text, and the remaining three constitute mark-up for bold text.
1852  // (T15227: ''''foo'''' turns into ' ''' foo ' ''')
1853  if ( $thislen == 4 ) {
1854  $arr[$i - 1] .= "'";
1855  $arr[$i] = "'''";
1856  $thislen = 3;
1857  } elseif ( $thislen > 5 ) {
1858  // If there are more than 5 apostrophes in a row, assume they're all
1859  // text except for the last 5.
1860  // (T15227: ''''''foo'''''' turns into ' ''''' foo ' ''''')
1861  $arr[$i - 1] .= str_repeat( "'", $thislen - 5 );
1862  $arr[$i] = "'''''";
1863  $thislen = 5;
1864  }
1865  // Count the number of occurrences of bold and italics mark-ups.
1866  if ( $thislen == 2 ) {
1867  $numitalics++;
1868  } elseif ( $thislen == 3 ) {
1869  $numbold++;
1870  } elseif ( $thislen == 5 ) {
1871  $numitalics++;
1872  $numbold++;
1873  }
1874  }
1875 
1876  // If there is an odd number of both bold and italics, it is likely
1877  // that one of the bold ones was meant to be an apostrophe followed
1878  // by italics. Which one we cannot know for certain, but it is more
1879  // likely to be one that has a single-letter word before it.
1880  if ( ( $numbold % 2 == 1 ) && ( $numitalics % 2 == 1 ) ) {
1881  $firstsingleletterword = -1;
1882  $firstmultiletterword = -1;
1883  $firstspace = -1;
1884  for ( $i = 1; $i < $countarr; $i += 2 ) {
1885  if ( strlen( $arr[$i] ) == 3 ) {
1886  $x1 = substr( $arr[$i - 1], -1 );
1887  $x2 = substr( $arr[$i - 1], -2, 1 );
1888  if ( $x1 === ' ' ) {
1889  if ( $firstspace == -1 ) {
1890  $firstspace = $i;
1891  }
1892  } elseif ( $x2 === ' ' ) {
1893  $firstsingleletterword = $i;
1894  // if $firstsingleletterword is set, we don't
1895  // look at the other options, so we can bail early.
1896  break;
1897  } elseif ( $firstmultiletterword == -1 ) {
1898  $firstmultiletterword = $i;
1899  }
1900  }
1901  }
1902 
1903  // If there is a single-letter word, use it!
1904  if ( $firstsingleletterword > -1 ) {
1905  $arr[$firstsingleletterword] = "''";
1906  $arr[$firstsingleletterword - 1] .= "'";
1907  } elseif ( $firstmultiletterword > -1 ) {
1908  // If not, but there's a multi-letter word, use that one.
1909  $arr[$firstmultiletterword] = "''";
1910  $arr[$firstmultiletterword - 1] .= "'";
1911  } elseif ( $firstspace > -1 ) {
1912  // ... otherwise use the first one that has neither.
1913  // (notice that it is possible for all three to be -1 if, for example,
1914  // there is only one pentuple-apostrophe in the line)
1915  $arr[$firstspace] = "''";
1916  $arr[$firstspace - 1] .= "'";
1917  }
1918  }
1919 
1920  // Now let's actually convert our apostrophic mush to HTML!
1921  $output = '';
1922  $buffer = '';
1923  $state = '';
1924  $i = 0;
1925  foreach ( $arr as $r ) {
1926  if ( ( $i % 2 ) == 0 ) {
1927  if ( $state === 'both' ) {
1928  $buffer .= $r;
1929  } else {
1930  $output .= $r;
1931  }
1932  } else {
1933  $thislen = strlen( $r );
1934  if ( $thislen == 2 ) {
1935  if ( $state === 'i' ) {
1936  $output .= '</i>';
1937  $state = '';
1938  } elseif ( $state === 'bi' ) {
1939  $output .= '</i>';
1940  $state = 'b';
1941  } elseif ( $state === 'ib' ) {
1942  $output .= '</b></i><b>';
1943  $state = 'b';
1944  } elseif ( $state === 'both' ) {
1945  $output .= '<b><i>' . $buffer . '</i>';
1946  $state = 'b';
1947  } else { // $state can be 'b' or ''
1948  $output .= '<i>';
1949  $state .= 'i';
1950  }
1951  } elseif ( $thislen == 3 ) {
1952  if ( $state === 'b' ) {
1953  $output .= '</b>';
1954  $state = '';
1955  } elseif ( $state === 'bi' ) {
1956  $output .= '</i></b><i>';
1957  $state = 'i';
1958  } elseif ( $state === 'ib' ) {
1959  $output .= '</b>';
1960  $state = 'i';
1961  } elseif ( $state === 'both' ) {
1962  $output .= '<i><b>' . $buffer . '</b>';
1963  $state = 'i';
1964  } else { // $state can be 'i' or ''
1965  $output .= '<b>';
1966  $state .= 'b';
1967  }
1968  } elseif ( $thislen == 5 ) {
1969  if ( $state === 'b' ) {
1970  $output .= '</b><i>';
1971  $state = 'i';
1972  } elseif ( $state === 'i' ) {
1973  $output .= '</i><b>';
1974  $state = 'b';
1975  } elseif ( $state === 'bi' ) {
1976  $output .= '</i></b>';
1977  $state = '';
1978  } elseif ( $state === 'ib' ) {
1979  $output .= '</b></i>';
1980  $state = '';
1981  } elseif ( $state === 'both' ) {
1982  $output .= '<i><b>' . $buffer . '</b></i>';
1983  $state = '';
1984  } else { // ($state == '')
1985  $buffer = '';
1986  $state = 'both';
1987  }
1988  }
1989  }
1990  $i++;
1991  }
1992  // Now close all remaining tags. Notice that the order is important.
1993  if ( $state === 'b' || $state === 'ib' ) {
1994  $output .= '</b>';
1995  }
1996  if ( $state === 'i' || $state === 'bi' || $state === 'ib' ) {
1997  $output .= '</i>';
1998  }
1999  if ( $state === 'bi' ) {
2000  $output .= '</b>';
2001  }
2002  // There might be lonely ''''', so make sure we have a buffer
2003  if ( $state === 'both' && $buffer ) {
2004  $output .= '<b><i>' . $buffer . '</i></b>';
2005  }
2006  return $output;
2007  }
2008 
2019  private function handleExternalLinks( $text ) {
2020  $bits = preg_split( $this->mExtLinkBracketedRegex, $text, -1, PREG_SPLIT_DELIM_CAPTURE );
2021  // @phan-suppress-next-line PhanTypeComparisonFromArray See phan issue #3161
2022  if ( $bits === false ) {
2023  throw new MWException( "PCRE needs to be compiled with "
2024  . "--enable-unicode-properties in order for MediaWiki to function" );
2025  }
2026  $s = array_shift( $bits );
2027 
2028  $i = 0;
2029  while ( $i < count( $bits ) ) {
2030  $url = $bits[$i++];
2031  $i++; // protocol
2032  $text = $bits[$i++];
2033  $trail = $bits[$i++];
2034 
2035  # The characters '<' and '>' (which were escaped by
2036  # removeHTMLtags()) should not be included in
2037  # URLs, per RFC 2396.
2038  $m2 = [];
2039  if ( preg_match( '/&(lt|gt);/', $url, $m2, PREG_OFFSET_CAPTURE ) ) {
2040  $text = substr( $url, $m2[0][1] ) . ' ' . $text;
2041  $url = substr( $url, 0, $m2[0][1] );
2042  }
2043 
2044  # If the link text is an image URL, replace it with an <img> tag
2045  # This happened by accident in the original parser, but some people used it extensively
2046  $img = $this->maybeMakeExternalImage( $text );
2047  if ( $img !== false ) {
2048  $text = $img;
2049  }
2050 
2051  $dtrail = '';
2052 
2053  # Set linktype for CSS
2054  $linktype = 'text';
2055 
2056  # No link text, e.g. [http://domain.tld/some.link]
2057  if ( $text == '' ) {
2058  # Autonumber
2059  $langObj = $this->getTargetLanguage();
2060  $text = '[' . $langObj->formatNum( ++$this->mAutonumber ) . ']';
2061  $linktype = 'autonumber';
2062  } else {
2063  # Have link text, e.g. [http://domain.tld/some.link text]s
2064  # Check for trail
2065  list( $dtrail, $trail ) = Linker::splitTrail( $trail );
2066  }
2067 
2068  // Excluding protocol-relative URLs may avoid many false positives.
2069  if ( preg_match( '/^(?:' . wfUrlProtocolsWithoutProtRel() . ')/', $text ) ) {
2070  $text = $this->getTargetLanguage()->getConverter()->markNoConversion( $text );
2071  }
2072 
2073  $url = Sanitizer::cleanUrl( $url );
2074 
2075  # Use the encoded URL
2076  # This means that users can paste URLs directly into the text
2077  # Funny characters like ö aren't valid in URLs anyway
2078  # This was changed in August 2004
2079  $s .= Linker::makeExternalLink( $url, $text, false, $linktype,
2080  $this->getExternalLinkAttribs( $url ), $this->getTitle() ) . $dtrail . $trail;
2081 
2082  # Register link in the output object.
2083  $this->mOutput->addExternalLink( $url );
2084  }
2085 
2086  return $s;
2087  }
2088 
2099  public static function getExternalLinkRel( $url = false, $title = null ) {
2101  $ns = $title ? $title->getNamespace() : false;
2102  if ( $wgNoFollowLinks && !in_array( $ns, $wgNoFollowNsExceptions )
2104  ) {
2105  return 'nofollow';
2106  }
2107  return null;
2108  }
2109 
2121  public function getExternalLinkAttribs( $url ) {
2122  $attribs = [];
2123  $rel = self::getExternalLinkRel( $url, $this->getTitle() );
2124 
2125  $target = $this->mOptions->getExternalLinkTarget();
2126  if ( $target ) {
2127  $attribs['target'] = $target;
2128  if ( !in_array( $target, [ '_self', '_parent', '_top' ] ) ) {
2129  // T133507. New windows can navigate parent cross-origin.
2130  // Including noreferrer due to lacking browser
2131  // support of noopener. Eventually noreferrer should be removed.
2132  if ( $rel !== '' ) {
2133  $rel .= ' ';
2134  }
2135  $rel .= 'noreferrer noopener';
2136  }
2137  }
2138  $attribs['rel'] = $rel;
2139  return $attribs;
2140  }
2141 
2152  public static function normalizeLinkUrl( $url ) {
2153  # Test for RFC 3986 IPv6 syntax
2154  $scheme = '[a-z][a-z0-9+.-]*:';
2155  $userinfo = '(?:[a-z0-9\-._~!$&\'()*+,;=:]|%[0-9a-f]{2})*';
2156  $ipv6Host = '\\[((?:[0-9a-f:]|%3[0-A]|%[46][1-6])+)\\]';
2157  if ( preg_match( "<^(?:{$scheme})?//(?:{$userinfo}@)?{$ipv6Host}(?:[:/?#].*|)$>i", $url, $m ) &&
2158  IPUtils::isValid( rawurldecode( $m[1] ) )
2159  ) {
2160  $isIPv6 = rawurldecode( $m[1] );
2161  } else {
2162  $isIPv6 = false;
2163  }
2164 
2165  # Make sure unsafe characters are encoded
2166  $url = preg_replace_callback( '/[\x00-\x20"<>\[\\\\\]^`{|}\x7F-\xFF]/',
2167  function ( $m ) {
2168  return rawurlencode( $m[0] );
2169  },
2170  $url
2171  );
2172 
2173  $ret = '';
2174  $end = strlen( $url );
2175 
2176  # Fragment part - 'fragment'
2177  $start = strpos( $url, '#' );
2178  if ( $start !== false && $start < $end ) {
2179  $ret = self::normalizeUrlComponent(
2180  substr( $url, $start, $end - $start ), '"#%<>[\]^`{|}' ) . $ret;
2181  $end = $start;
2182  }
2183 
2184  # Query part - 'query' minus &=+;
2185  $start = strpos( $url, '?' );
2186  if ( $start !== false && $start < $end ) {
2187  $ret = self::normalizeUrlComponent(
2188  substr( $url, $start, $end - $start ), '"#%<>[\]^`{|}&=+;' ) . $ret;
2189  $end = $start;
2190  }
2191 
2192  # Scheme and path part - 'pchar'
2193  # (we assume no userinfo or encoded colons in the host)
2194  $ret = self::normalizeUrlComponent(
2195  substr( $url, 0, $end ), '"#%<>[\]^`{|}/?' ) . $ret;
2196 
2197  # Fix IPv6 syntax
2198  if ( $isIPv6 !== false ) {
2199  $ipv6Host = "%5B({$isIPv6})%5D";
2200  $ret = preg_replace(
2201  "<^((?:{$scheme})?//(?:{$userinfo}@)?){$ipv6Host}(?=[:/?#]|$)>i",
2202  "$1[$2]",
2203  $ret
2204  );
2205  }
2206 
2207  return $ret;
2208  }
2209 
2210  private static function normalizeUrlComponent( $component, $unsafe ) {
2211  $callback = function ( $matches ) use ( $unsafe ) {
2212  $char = urldecode( $matches[0] );
2213  $ord = ord( $char );
2214  if ( $ord > 32 && $ord < 127 && strpos( $unsafe, $char ) === false ) {
2215  # Unescape it
2216  return $char;
2217  } else {
2218  # Leave it escaped, but use uppercase for a-f
2219  return strtoupper( $matches[0] );
2220  }
2221  };
2222  return preg_replace_callback( '/%[0-9A-Fa-f]{2}/', $callback, $component );
2223  }
2224 
2233  private function maybeMakeExternalImage( $url ) {
2234  $imagesfrom = $this->mOptions->getAllowExternalImagesFrom();
2235  $imagesexception = !empty( $imagesfrom );
2236  $text = false;
2237  # $imagesfrom could be either a single string or an array of strings, parse out the latter
2238  if ( $imagesexception && is_array( $imagesfrom ) ) {
2239  $imagematch = false;
2240  foreach ( $imagesfrom as $match ) {
2241  if ( strpos( $url, $match ) === 0 ) {
2242  $imagematch = true;
2243  break;
2244  }
2245  }
2246  } elseif ( $imagesexception ) {
2247  $imagematch = ( strpos( $url, $imagesfrom ) === 0 );
2248  } else {
2249  $imagematch = false;
2250  }
2251 
2252  if ( $this->mOptions->getAllowExternalImages()
2253  || ( $imagesexception && $imagematch )
2254  ) {
2255  if ( preg_match( self::EXT_IMAGE_REGEX, $url ) ) {
2256  # Image found
2257  $text = Linker::makeExternalImage( $url );
2258  }
2259  }
2260  if ( !$text && $this->mOptions->getEnableImageWhitelist()
2261  && preg_match( self::EXT_IMAGE_REGEX, $url )
2262  ) {
2263  $whitelist = explode(
2264  "\n",
2265  wfMessage( 'external_image_whitelist' )->inContentLanguage()->text()
2266  );
2267 
2268  foreach ( $whitelist as $entry ) {
2269  # Sanitize the regex fragment, make it case-insensitive, ignore blank entries/comments
2270  if ( strpos( $entry, '#' ) === 0 || $entry === '' ) {
2271  continue;
2272  }
2273  if ( preg_match( '/' . str_replace( '/', '\\/', $entry ) . '/i', $url ) ) {
2274  # Image matches a whitelist entry
2275  $text = Linker::makeExternalImage( $url );
2276  break;
2277  }
2278  }
2279  }
2280  return $text;
2281  }
2282 
2290  private function handleInternalLinks( $text ) {
2291  $this->mLinkHolders->merge( $this->handleInternalLinks2( $text ) );
2292  return $text;
2293  }
2294 
2300  private function handleInternalLinks2( &$s ) {
2301  static $tc = false, $e1, $e1_img;
2302  # the % is needed to support urlencoded titles as well
2303  if ( !$tc ) {
2304  $tc = Title::legalChars() . '#%';
2305  # Match a link having the form [[namespace:link|alternate]]trail
2306  $e1 = "/^([{$tc}]+)(?:\\|(.+?))?]](.*)\$/sD";
2307  # Match cases where there is no "]]", which might still be images
2308  $e1_img = "/^([{$tc}]+)\\|(.*)\$/sD";
2309  }
2310 
2311  $holders = new LinkHolderArray( $this );
2312 
2313  # split the entire text string on occurrences of [[
2314  $a = StringUtils::explode( '[[', ' ' . $s );
2315  # get the first element (all text up to first [[), and remove the space we added
2316  $s = $a->current();
2317  $a->next();
2318  $line = $a->current(); # Workaround for broken ArrayIterator::next() that returns "void"
2319  $s = substr( $s, 1 );
2320 
2321  $nottalk = !$this->getTitle()->isTalkPage();
2322 
2323  $useLinkPrefixExtension = $this->getTargetLanguage()->linkPrefixExtension();
2324  $e2 = null;
2325  if ( $useLinkPrefixExtension ) {
2326  # Match the end of a line for a word that's not followed by whitespace,
2327  # e.g. in the case of 'The Arab al[[Razi]]', 'al' will be matched
2328  $charset = $this->contLang->linkPrefixCharset();
2329  $e2 = "/^((?>.*[^$charset]|))(.+)$/sDu";
2330  $m = [];
2331  if ( preg_match( $e2, $s, $m ) ) {
2332  $first_prefix = $m[2];
2333  } else {
2334  $first_prefix = false;
2335  }
2336  } else {
2337  $prefix = '';
2338  }
2339 
2340  # Some namespaces don't allow subpages
2341  $useSubpages = $this->nsInfo->hasSubpages(
2342  $this->getTitle()->getNamespace()
2343  );
2344 
2345  # Loop for each link
2346  for ( ; $line !== false && $line !== null; $a->next(), $line = $a->current() ) {
2347  # Check for excessive memory usage
2348  if ( $holders->isBig() ) {
2349  # Too big
2350  # Do the existence check, replace the link holders and clear the array
2351  $holders->replace( $s );
2352  $holders->clear();
2353  }
2354 
2355  if ( $useLinkPrefixExtension ) {
2356  if ( preg_match( $e2, $s, $m ) ) {
2357  list( , $s, $prefix ) = $m;
2358  } else {
2359  $prefix = '';
2360  }
2361  # first link
2362  if ( $first_prefix ) {
2363  $prefix = $first_prefix;
2364  $first_prefix = false;
2365  }
2366  }
2367 
2368  $might_be_img = false;
2369 
2370  if ( preg_match( $e1, $line, $m ) ) { # page with normal text or alt
2371  $text = $m[2];
2372  # If we get a ] at the beginning of $m[3] that means we have a link that's something like:
2373  # [[Image:Foo.jpg|[http://example.com desc]]] <- having three ] in a row fucks up,
2374  # the real problem is with the $e1 regex
2375  # See T1500.
2376  # Still some problems for cases where the ] is meant to be outside punctuation,
2377  # and no image is in sight. See T4095.
2378  if ( $text !== ''
2379  && substr( $m[3], 0, 1 ) === ']'
2380  && strpos( $text, '[' ) !== false
2381  ) {
2382  $text .= ']'; # so that handleExternalLinks($text) works later
2383  $m[3] = substr( $m[3], 1 );
2384  }
2385  # fix up urlencoded title texts
2386  if ( strpos( $m[1], '%' ) !== false ) {
2387  # Should anchors '#' also be rejected?
2388  $m[1] = str_replace( [ '<', '>' ], [ '&lt;', '&gt;' ], rawurldecode( $m[1] ) );
2389  }
2390  $trail = $m[3];
2391  } elseif ( preg_match( $e1_img, $line, $m ) ) {
2392  # Invalid, but might be an image with a link in its caption
2393  $might_be_img = true;
2394  $text = $m[2];
2395  if ( strpos( $m[1], '%' ) !== false ) {
2396  $m[1] = str_replace( [ '<', '>' ], [ '&lt;', '&gt;' ], rawurldecode( $m[1] ) );
2397  }
2398  $trail = "";
2399  } else { # Invalid form; output directly
2400  $s .= $prefix . '[[' . $line;
2401  continue;
2402  }
2403 
2404  $origLink = ltrim( $m[1], ' ' );
2405 
2406  # Don't allow internal links to pages containing
2407  # PROTO: where PROTO is a valid URL protocol; these
2408  # should be external links.
2409  if ( preg_match( '/^(?i:' . $this->mUrlProtocols . ')/', $origLink ) ) {
2410  $s .= $prefix . '[[' . $line;
2411  continue;
2412  }
2413 
2414  # Make subpage if necessary
2415  if ( $useSubpages ) {
2417  $this->getTitle(), $origLink, $text
2418  );
2419  } else {
2420  $link = $origLink;
2421  }
2422 
2423  // \x7f isn't a default legal title char, so most likely strip
2424  // markers will force us into the "invalid form" path above. But,
2425  // just in case, let's assert that xmlish tags aren't valid in
2426  // the title position.
2427  $unstrip = $this->mStripState->killMarkers( $link );
2428  $noMarkers = ( $unstrip === $link );
2429 
2430  $nt = $noMarkers ? Title::newFromText( $link ) : null;
2431  if ( $nt === null ) {
2432  $s .= $prefix . '[[' . $line;
2433  continue;
2434  }
2435 
2436  $ns = $nt->getNamespace();
2437  $iw = $nt->getInterwiki();
2438 
2439  $noforce = ( substr( $origLink, 0, 1 ) !== ':' );
2440 
2441  if ( $might_be_img ) { # if this is actually an invalid link
2442  if ( $ns == NS_FILE && $noforce ) { # but might be an image
2443  $found = false;
2444  while ( true ) {
2445  # look at the next 'line' to see if we can close it there
2446  $a->next();
2447  $next_line = $a->current();
2448  if ( $next_line === false || $next_line === null ) {
2449  break;
2450  }
2451  $m = explode( ']]', $next_line, 3 );
2452  if ( count( $m ) == 3 ) {
2453  # the first ]] closes the inner link, the second the image
2454  $found = true;
2455  $text .= "[[{$m[0]}]]{$m[1]}";
2456  $trail = $m[2];
2457  break;
2458  } elseif ( count( $m ) == 2 ) {
2459  # if there's exactly one ]] that's fine, we'll keep looking
2460  $text .= "[[{$m[0]}]]{$m[1]}";
2461  } else {
2462  # if $next_line is invalid too, we need look no further
2463  $text .= '[[' . $next_line;
2464  break;
2465  }
2466  }
2467  if ( !$found ) {
2468  # we couldn't find the end of this imageLink, so output it raw
2469  # but don't ignore what might be perfectly normal links in the text we've examined
2470  $holders->merge( $this->handleInternalLinks2( $text ) );
2471  $s .= "{$prefix}[[$link|$text";
2472  # note: no $trail, because without an end, there *is* no trail
2473  continue;
2474  }
2475  } else { # it's not an image, so output it raw
2476  $s .= "{$prefix}[[$link|$text";
2477  # note: no $trail, because without an end, there *is* no trail
2478  continue;
2479  }
2480  }
2481 
2482  $wasblank = ( $text == '' );
2483  if ( $wasblank ) {
2484  $text = $link;
2485  if ( !$noforce ) {
2486  # Strip off leading ':'
2487  $text = substr( $text, 1 );
2488  }
2489  } else {
2490  # T6598 madness. Handle the quotes only if they come from the alternate part
2491  # [[Lista d''e paise d''o munno]] -> <a href="...">Lista d''e paise d''o munno</a>
2492  # [[Criticism of Harry Potter|Criticism of ''Harry Potter'']]
2493  # -> <a href="Criticism of Harry Potter">Criticism of <i>Harry Potter</i></a>
2494  $text = $this->doQuotes( $text );
2495  }
2496 
2497  # Link not escaped by : , create the various objects
2498  if ( $noforce && !$nt->wasLocalInterwiki() ) {
2499  # Interwikis
2500  if (
2501  $iw && $this->mOptions->getInterwikiMagic() && $nottalk && (
2502  Language::fetchLanguageName( $iw, null, 'mw' ) ||
2503  in_array( $iw, $this->svcOptions->get( 'ExtraInterlanguageLinkPrefixes' ) )
2504  )
2505  ) {
2506  # T26502: filter duplicates
2507  if ( !isset( $this->mLangLinkLanguages[$iw] ) ) {
2508  $this->mLangLinkLanguages[$iw] = true;
2509  $this->mOutput->addLanguageLink( $nt->getFullText() );
2510  }
2511 
2515  $s = rtrim( $s . $prefix ) . $trail; # T175416
2516  continue;
2517  }
2518 
2519  if ( $ns == NS_FILE ) {
2520  if ( !$this->badFileLookup->isBadFile( $nt->getDBkey(), $this->getTitle() ) ) {
2521  if ( $wasblank ) {
2522  # if no parameters were passed, $text
2523  # becomes something like "File:Foo.png",
2524  # which we don't want to pass on to the
2525  # image generator
2526  $text = '';
2527  } else {
2528  # recursively parse links inside the image caption
2529  # actually, this will parse them in any other parameters, too,
2530  # but it might be hard to fix that, and it doesn't matter ATM
2531  $text = $this->handleExternalLinks( $text );
2532  $holders->merge( $this->handleInternalLinks2( $text ) );
2533  }
2534  # cloak any absolute URLs inside the image markup, so handleExternalLinks() won't touch them
2535  $s .= $prefix . $this->armorLinks(
2536  $this->makeImage( $nt, $text, $holders ) ) . $trail;
2537  continue;
2538  }
2539  } elseif ( $ns == NS_CATEGORY ) {
2543  $s = rtrim( $s . $prefix ) . $trail; # T2087, T87753
2544 
2545  if ( $wasblank ) {
2546  $sortkey = $this->getDefaultSort();
2547  } else {
2548  $sortkey = $text;
2549  }
2550  $sortkey = Sanitizer::decodeCharReferences( $sortkey );
2551  $sortkey = str_replace( "\n", '', $sortkey );
2552  $sortkey = $this->getTargetLanguage()->convertCategoryKey( $sortkey );
2553  $this->mOutput->addCategory( $nt->getDBkey(), $sortkey );
2554 
2555  continue;
2556  }
2557  }
2558 
2559  # Self-link checking. For some languages, variants of the title are checked in
2560  # LinkHolderArray::doVariants() to allow batching the existence checks necessary
2561  # for linking to a different variant.
2562  if ( $ns != NS_SPECIAL && $nt->equals( $this->getTitle() ) && !$nt->hasFragment() ) {
2563  $s .= $prefix . Linker::makeSelfLinkObj( $nt, $text, '', $trail );
2564  continue;
2565  }
2566 
2567  # NS_MEDIA is a pseudo-namespace for linking directly to a file
2568  # @todo FIXME: Should do batch file existence checks, see comment below
2569  if ( $ns == NS_MEDIA ) {
2570  # Give extensions a chance to select the file revision for us
2571  $options = [];
2572  $descQuery = false;
2573  Hooks::run( 'BeforeParserFetchFileAndTitle',
2574  [ $this, $nt, &$options, &$descQuery ] );
2575  # Fetch and register the file (file title may be different via hooks)
2576  list( $file, $nt ) = $this->fetchFileAndTitle( $nt, $options );
2577  # Cloak with NOPARSE to avoid replacement in handleExternalLinks
2578  $s .= $prefix . $this->armorLinks(
2579  Linker::makeMediaLinkFile( $nt, $file, $text ) ) . $trail;
2580  continue;
2581  }
2582 
2583  # Some titles, such as valid special pages or files in foreign repos, should
2584  # be shown as bluelinks even though they're not included in the page table
2585  # @todo FIXME: isAlwaysKnown() can be expensive for file links; we should really do
2586  # batch file existence checks for NS_FILE and NS_MEDIA
2587  if ( $iw == '' && $nt->isAlwaysKnown() ) {
2588  $this->mOutput->addLink( $nt );
2589  $s .= $this->makeKnownLinkHolder( $nt, $text, $trail, $prefix );
2590  } else {
2591  # Links will be added to the output link list after checking
2592  $s .= $holders->makeHolder( $nt, $text, [], $trail, $prefix );
2593  }
2594  }
2595  return $holders;
2596  }
2597 
2611  private function makeKnownLinkHolder( $nt, $text = '', $trail = '', $prefix = '' ) {
2612  list( $inside, $trail ) = Linker::splitTrail( $trail );
2613 
2614  if ( $text == '' ) {
2615  $text = htmlspecialchars( $nt->getPrefixedText() );
2616  }
2617 
2618  $link = $this->getLinkRenderer()->makeKnownLink(
2619  $nt, new HtmlArmor( "$prefix$text$inside" )
2620  );
2621 
2622  return $this->armorLinks( $link ) . $trail;
2623  }
2624 
2635  private function armorLinks( $text ) {
2636  return preg_replace( '/\b((?i)' . $this->mUrlProtocols . ')/',
2637  self::MARKER_PREFIX . "NOPARSE$1", $text );
2638  }
2639 
2648  public function doBlockLevels( $text, $linestart ) {
2649  return BlockLevelPass::doBlockLevels( $text, $linestart );
2650  }
2651 
2660  private function expandMagicVariable( $index, $frame = false ) {
2661  // Avoid PHP 7.1 warning from passing $this by reference
2662  $parser = $this;
2663 
2668  if (
2669  Hooks::run( 'ParserGetVariableValueVarCache', [ &$parser, &$this->mVarCache ] ) &&
2670  isset( $this->mVarCache[$index] )
2671  ) {
2672  return $this->mVarCache[$index];
2673  }
2674 
2675  $ts = wfTimestamp( TS_UNIX, $this->mOptions->getTimestamp() );
2676  Hooks::run( 'ParserGetVariableValueTs', [ &$parser, &$ts ] );
2677 
2678  $pageLang = $this->getFunctionLang();
2679 
2680  switch ( $index ) {
2681  case '!':
2682  $value = '|';
2683  break;
2684  case 'currentmonth':
2685  $value = $pageLang->formatNum( MWTimestamp::getInstance( $ts )->format( 'm' ), true );
2686  break;
2687  case 'currentmonth1':
2688  $value = $pageLang->formatNum( MWTimestamp::getInstance( $ts )->format( 'n' ), true );
2689  break;
2690  case 'currentmonthname':
2691  $value = $pageLang->getMonthName( MWTimestamp::getInstance( $ts )->format( 'n' ) );
2692  break;
2693  case 'currentmonthnamegen':
2694  $value = $pageLang->getMonthNameGen( MWTimestamp::getInstance( $ts )->format( 'n' ) );
2695  break;
2696  case 'currentmonthabbrev':
2697  $value = $pageLang->getMonthAbbreviation( MWTimestamp::getInstance( $ts )->format( 'n' ) );
2698  break;
2699  case 'currentday':
2700  $value = $pageLang->formatNum( MWTimestamp::getInstance( $ts )->format( 'j' ), true );
2701  break;
2702  case 'currentday2':
2703  $value = $pageLang->formatNum( MWTimestamp::getInstance( $ts )->format( 'd' ), true );
2704  break;
2705  case 'localmonth':
2706  $value = $pageLang->formatNum( MWTimestamp::getLocalInstance( $ts )->format( 'm' ), true );
2707  break;
2708  case 'localmonth1':
2709  $value = $pageLang->formatNum( MWTimestamp::getLocalInstance( $ts )->format( 'n' ), true );
2710  break;
2711  case 'localmonthname':
2712  $value = $pageLang->getMonthName( MWTimestamp::getLocalInstance( $ts )->format( 'n' ) );
2713  break;
2714  case 'localmonthnamegen':
2715  $value = $pageLang->getMonthNameGen( MWTimestamp::getLocalInstance( $ts )->format( 'n' ) );
2716  break;
2717  case 'localmonthabbrev':
2718  $value = $pageLang->getMonthAbbreviation( MWTimestamp::getLocalInstance( $ts )->format( 'n' ) );
2719  break;
2720  case 'localday':
2721  $value = $pageLang->formatNum( MWTimestamp::getLocalInstance( $ts )->format( 'j' ), true );
2722  break;
2723  case 'localday2':
2724  $value = $pageLang->formatNum( MWTimestamp::getLocalInstance( $ts )->format( 'd' ), true );
2725  break;
2726  case 'pagename':
2727  $value = wfEscapeWikiText( $this->getTitle()->getText() );
2728  break;
2729  case 'pagenamee':
2730  $value = wfEscapeWikiText( $this->getTitle()->getPartialURL() );
2731  break;
2732  case 'fullpagename':
2733  $value = wfEscapeWikiText( $this->getTitle()->getPrefixedText() );
2734  break;
2735  case 'fullpagenamee':
2736  $value = wfEscapeWikiText( $this->getTitle()->getPrefixedURL() );
2737  break;
2738  case 'subpagename':
2739  $value = wfEscapeWikiText( $this->getTitle()->getSubpageText() );
2740  break;
2741  case 'subpagenamee':
2742  $value = wfEscapeWikiText( $this->getTitle()->getSubpageUrlForm() );
2743  break;
2744  case 'rootpagename':
2745  $value = wfEscapeWikiText( $this->getTitle()->getRootText() );
2746  break;
2747  case 'rootpagenamee':
2748  $value = wfEscapeWikiText( wfUrlencode( str_replace(
2749  ' ',
2750  '_',
2751  $this->getTitle()->getRootText()
2752  ) ) );
2753  break;
2754  case 'basepagename':
2755  $value = wfEscapeWikiText( $this->getTitle()->getBaseText() );
2756  break;
2757  case 'basepagenamee':
2758  $value = wfEscapeWikiText( wfUrlencode( str_replace(
2759  ' ',
2760  '_',
2761  $this->getTitle()->getBaseText()
2762  ) ) );
2763  break;
2764  case 'talkpagename':
2765  if ( $this->getTitle()->canHaveTalkPage() ) {
2766  $talkPage = $this->getTitle()->getTalkPage();
2767  $value = wfEscapeWikiText( $talkPage->getPrefixedText() );
2768  } else {
2769  $value = '';
2770  }
2771  break;
2772  case 'talkpagenamee':
2773  if ( $this->getTitle()->canHaveTalkPage() ) {
2774  $talkPage = $this->getTitle()->getTalkPage();
2775  $value = wfEscapeWikiText( $talkPage->getPrefixedURL() );
2776  } else {
2777  $value = '';
2778  }
2779  break;
2780  case 'subjectpagename':
2781  $subjPage = $this->getTitle()->getSubjectPage();
2782  $value = wfEscapeWikiText( $subjPage->getPrefixedText() );
2783  break;
2784  case 'subjectpagenamee':
2785  $subjPage = $this->getTitle()->getSubjectPage();
2786  $value = wfEscapeWikiText( $subjPage->getPrefixedURL() );
2787  break;
2788  case 'pageid': // requested in T25427
2789  # Inform the edit saving system that getting the canonical output
2790  # after page insertion requires a parse that used that exact page ID
2791  $this->setOutputFlag( 'vary-page-id', '{{PAGEID}} used' );
2792  $value = $this->getTitle()->getArticleID();
2793  if ( !$value ) {
2794  $value = $this->mOptions->getSpeculativePageId();
2795  if ( $value ) {
2796  $this->mOutput->setSpeculativePageIdUsed( $value );
2797  }
2798  }
2799  break;
2800  case 'revisionid':
2801  $namespace = $this->getTitle()->getNamespace();
2802  if (
2803  $this->svcOptions->get( 'MiserMode' ) &&
2804  !$this->mOptions->getInterfaceMessage() &&
2805  // @TODO: disallow this word on all namespaces
2806  $this->nsInfo->isSubject( $namespace ) &&
2807  !in_array( $namespace, [ NS_USER, NS_PROJECT ], true )
2808  ) {
2809  // Use a stub result instead of the actual revision ID in order to avoid
2810  // double parses on page save but still allow preview detection (T137900)
2811  if ( $this->getRevisionId() || $this->mOptions->getSpeculativeRevId() ) {
2812  $value = '-';
2813  } else {
2814  $this->setOutputFlag( 'vary-revision-exists', '{{REVISIONID}} used' );
2815  $value = '';
2816  }
2817  } else {
2818  # Inform the edit saving system that getting the canonical output after
2819  # revision insertion requires a parse that used that exact revision ID
2820  $this->setOutputFlag( 'vary-revision-id', '{{REVISIONID}} used' );
2821  $value = $this->getRevisionId();
2822  if ( $value === 0 ) {
2823  $rev = $this->getRevisionObject();
2824  $value = $rev ? $rev->getId() : $value;
2825  }
2826  if ( !$value ) {
2827  $value = $this->mOptions->getSpeculativeRevId();
2828  if ( $value ) {
2829  $this->mOutput->setSpeculativeRevIdUsed( $value );
2830  }
2831  }
2832  }
2833  break;
2834  case 'revisionday':
2835  $value = (int)$this->getRevisionTimestampSubstring( 6, 2, self::MAX_TTS, $index );
2836  break;
2837  case 'revisionday2':
2838  $value = $this->getRevisionTimestampSubstring( 6, 2, self::MAX_TTS, $index );
2839  break;
2840  case 'revisionmonth':
2841  $value = $this->getRevisionTimestampSubstring( 4, 2, self::MAX_TTS, $index );
2842  break;
2843  case 'revisionmonth1':
2844  $value = (int)$this->getRevisionTimestampSubstring( 4, 2, self::MAX_TTS, $index );
2845  break;
2846  case 'revisionyear':
2847  $value = $this->getRevisionTimestampSubstring( 0, 4, self::MAX_TTS, $index );
2848  break;
2849  case 'revisiontimestamp':
2850  $value = $this->getRevisionTimestampSubstring( 0, 14, self::MAX_TTS, $index );
2851  break;
2852  case 'revisionuser':
2853  # Inform the edit saving system that getting the canonical output after
2854  # revision insertion requires a parse that used the actual user ID
2855  $this->setOutputFlag( 'vary-user', '{{REVISIONUSER}} used' );
2856  $value = $this->getRevisionUser();
2857  break;
2858  case 'revisionsize':
2859  $value = $this->getRevisionSize();
2860  break;
2861  case 'namespace':
2862  $value = str_replace( '_', ' ',
2863  $this->contLang->getNsText( $this->getTitle()->getNamespace() ) );
2864  break;
2865  case 'namespacee':
2866  $value = wfUrlencode( $this->contLang->getNsText( $this->getTitle()->getNamespace() ) );
2867  break;
2868  case 'namespacenumber':
2869  $value = $this->getTitle()->getNamespace();
2870  break;
2871  case 'talkspace':
2872  $value = $this->getTitle()->canHaveTalkPage()
2873  ? str_replace( '_', ' ', $this->getTitle()->getTalkNsText() )
2874  : '';
2875  break;
2876  case 'talkspacee':
2877  $value = $this->getTitle()->canHaveTalkPage()
2878  ? wfUrlencode( $this->getTitle()->getTalkNsText() )
2879  : '';
2880  break;
2881  case 'subjectspace':
2882  $value = str_replace( '_', ' ', $this->getTitle()->getSubjectNsText() );
2883  break;
2884  case 'subjectspacee':
2885  $value = ( wfUrlencode( $this->getTitle()->getSubjectNsText() ) );
2886  break;
2887  case 'currentdayname':
2888  $value = $pageLang->getWeekdayName( (int)MWTimestamp::getInstance( $ts )->format( 'w' ) + 1 );
2889  break;
2890  case 'currentyear':
2891  $value = $pageLang->formatNum( MWTimestamp::getInstance( $ts )->format( 'Y' ), true );
2892  break;
2893  case 'currenttime':
2894  $value = $pageLang->time( wfTimestamp( TS_MW, $ts ), false, false );
2895  break;
2896  case 'currenthour':
2897  $value = $pageLang->formatNum( MWTimestamp::getInstance( $ts )->format( 'H' ), true );
2898  break;
2899  case 'currentweek':
2900  # @bug T6594 PHP5 has it zero padded, PHP4 does not, cast to
2901  # int to remove the padding
2902  $value = $pageLang->formatNum( (int)MWTimestamp::getInstance( $ts )->format( 'W' ) );
2903  break;
2904  case 'currentdow':
2905  $value = $pageLang->formatNum( MWTimestamp::getInstance( $ts )->format( 'w' ) );
2906  break;
2907  case 'localdayname':
2908  $value = $pageLang->getWeekdayName(
2909  (int)MWTimestamp::getLocalInstance( $ts )->format( 'w' ) + 1
2910  );
2911  break;
2912  case 'localyear':
2913  $value = $pageLang->formatNum( MWTimestamp::getLocalInstance( $ts )->format( 'Y' ), true );
2914  break;
2915  case 'localtime':
2916  $value = $pageLang->time(
2917  MWTimestamp::getLocalInstance( $ts )->format( 'YmdHis' ),
2918  false,
2919  false
2920  );
2921  break;
2922  case 'localhour':
2923  $value = $pageLang->formatNum( MWTimestamp::getLocalInstance( $ts )->format( 'H' ), true );
2924  break;
2925  case 'localweek':
2926  # @bug T6594 PHP5 has it zero padded, PHP4 does not, cast to
2927  # int to remove the padding
2928  $value = $pageLang->formatNum( (int)MWTimestamp::getLocalInstance( $ts )->format( 'W' ) );
2929  break;
2930  case 'localdow':
2931  $value = $pageLang->formatNum( MWTimestamp::getLocalInstance( $ts )->format( 'w' ) );
2932  break;
2933  case 'numberofarticles':
2934  $value = $pageLang->formatNum( SiteStats::articles() );
2935  break;
2936  case 'numberoffiles':
2937  $value = $pageLang->formatNum( SiteStats::images() );
2938  break;
2939  case 'numberofusers':
2940  $value = $pageLang->formatNum( SiteStats::users() );
2941  break;
2942  case 'numberofactiveusers':
2943  $value = $pageLang->formatNum( SiteStats::activeUsers() );
2944  break;
2945  case 'numberofpages':
2946  $value = $pageLang->formatNum( SiteStats::pages() );
2947  break;
2948  case 'numberofadmins':
2949  $value = $pageLang->formatNum( SiteStats::numberingroup( 'sysop' ) );
2950  break;
2951  case 'numberofedits':
2952  $value = $pageLang->formatNum( SiteStats::edits() );
2953  break;
2954  case 'currenttimestamp':
2955  $value = wfTimestamp( TS_MW, $ts );
2956  break;
2957  case 'localtimestamp':
2958  $value = MWTimestamp::getLocalInstance( $ts )->format( 'YmdHis' );
2959  break;
2960  case 'currentversion':
2961  $value = SpecialVersion::getVersion();
2962  break;
2963  case 'articlepath':
2964  return $this->svcOptions->get( 'ArticlePath' );
2965  case 'sitename':
2966  return $this->svcOptions->get( 'Sitename' );
2967  case 'server':
2968  return $this->svcOptions->get( 'Server' );
2969  case 'servername':
2970  return $this->svcOptions->get( 'ServerName' );
2971  case 'scriptpath':
2972  return $this->svcOptions->get( 'ScriptPath' );
2973  case 'stylepath':
2974  return $this->svcOptions->get( 'StylePath' );
2975  case 'directionmark':
2976  return $pageLang->getDirMark();
2977  case 'contentlanguage':
2978  return $this->svcOptions->get( 'LanguageCode' );
2979  case 'pagelanguage':
2980  $value = $pageLang->getCode();
2981  break;
2982  case 'cascadingsources':
2983  $value = CoreParserFunctions::cascadingsources( $this );
2984  break;
2985  default:
2986  $ret = null;
2987  Hooks::run(
2988  'ParserGetVariableValueSwitch',
2989  [ &$parser, &$this->mVarCache, &$index, &$ret, &$frame ]
2990  );
2991 
2992  return $ret;
2993  }
2994 
2995  if ( $index ) {
2996  $this->mVarCache[$index] = $value;
2997  }
2998 
2999  return $value;
3000  }
3001 
3009  private function getRevisionTimestampSubstring( $start, $len, $mtts, $variable ) {
3010  # Get the timezone-adjusted timestamp to be used for this revision
3011  $resNow = substr( $this->getRevisionTimestamp(), $start, $len );
3012  # Possibly set vary-revision if there is not yet an associated revision
3013  if ( !$this->getRevisionObject() ) {
3014  # Get the timezone-adjusted timestamp $mtts seconds in the future.
3015  # This future is relative to the current time and not that of the
3016  # parser options. The rendered timestamp can be compared to that
3017  # of the timestamp specified by the parser options.
3018  $resThen = substr(
3019  $this->contLang->userAdjust( wfTimestamp( TS_MW, time() + $mtts ), '' ),
3020  $start,
3021  $len
3022  );
3023 
3024  if ( $resNow !== $resThen ) {
3025  # Inform the edit saving system that getting the canonical output after
3026  # revision insertion requires a parse that used an actual revision timestamp
3027  $this->setOutputFlag( 'vary-revision-timestamp', "$variable used" );
3028  }
3029  }
3030 
3031  return $resNow;
3032  }
3033 
3038  private function initializeVariables() {
3039  $variableIDs = $this->magicWordFactory->getVariableIDs();
3040  $substIDs = $this->magicWordFactory->getSubstIDs();
3041 
3042  $this->mVariables = $this->magicWordFactory->newArray( $variableIDs );
3043  $this->mSubstWords = $this->magicWordFactory->newArray( $substIDs );
3044  }
3045 
3068  public function preprocessToDom( $text, $flags = 0 ) {
3069  $dom = $this->getPreprocessor()->preprocessToObj( $text, $flags );
3070  return $dom;
3071  }
3072 
3093  public function replaceVariables( $text, $frame = false, $argsOnly = false ) {
3094  # Is there any text? Also, Prevent too big inclusions!
3095  $textSize = strlen( $text );
3096  if ( $textSize < 1 || $textSize > $this->mOptions->getMaxIncludeSize() ) {
3097  return $text;
3098  }
3099 
3100  if ( $frame === false ) {
3101  $frame = $this->getPreprocessor()->newFrame();
3102  } elseif ( !( $frame instanceof PPFrame ) ) {
3103  $this->logger->debug(
3104  __METHOD__ . " called using plain parameters instead of " .
3105  "a PPFrame instance. Creating custom frame."
3106  );
3107  $frame = $this->getPreprocessor()->newCustomFrame( $frame );
3108  }
3109 
3110  $dom = $this->preprocessToDom( $text );
3111  $flags = $argsOnly ? PPFrame::NO_TEMPLATES : 0;
3112  $text = $frame->expand( $dom, $flags );
3113 
3114  return $text;
3115  }
3116 
3144  public function limitationWarn( $limitationType, $current = '', $max = '' ) {
3145  # does no harm if $current and $max are present but are unnecessary for the message
3146  # Not doing ->inLanguage( $this->mOptions->getUserLangObj() ), since this is shown
3147  # only during preview, and that would split the parser cache unnecessarily.
3148  $warning = wfMessage( "$limitationType-warning" )->numParams( $current, $max )
3149  ->text();
3150  $this->mOutput->addWarning( $warning );
3151  $this->addTrackingCategory( "$limitationType-category" );
3152  }
3153 
3167  public function braceSubstitution( $piece, $frame ) {
3168  // Flags
3169 
3170  // $text has been filled
3171  $found = false;
3172  // wiki markup in $text should be escaped
3173  $nowiki = false;
3174  // $text is HTML, armour it against wikitext transformation
3175  $isHTML = false;
3176  // Force interwiki transclusion to be done in raw mode not rendered
3177  $forceRawInterwiki = false;
3178  // $text is a DOM node needing expansion in a child frame
3179  $isChildObj = false;
3180  // $text is a DOM node needing expansion in the current frame
3181  $isLocalObj = false;
3182 
3183  # Title object, where $text came from
3184  $title = false;
3185 
3186  # $part1 is the bit before the first |, and must contain only title characters.
3187  # Various prefixes will be stripped from it later.
3188  $titleWithSpaces = $frame->expand( $piece['title'] );
3189  $part1 = trim( $titleWithSpaces );
3190  $titleText = false;
3191 
3192  # Original title text preserved for various purposes
3193  $originalTitle = $part1;
3194 
3195  # $args is a list of argument nodes, starting from index 0, not including $part1
3196  # @todo FIXME: If piece['parts'] is null then the call to getLength()
3197  # below won't work b/c this $args isn't an object
3198  $args = ( $piece['parts'] == null ) ? [] : $piece['parts'];
3199 
3200  $profileSection = null; // profile templates
3201 
3202  # SUBST
3203  if ( !$found ) {
3204  $substMatch = $this->mSubstWords->matchStartAndRemove( $part1 );
3205 
3206  # Possibilities for substMatch: "subst", "safesubst" or FALSE
3207  # Decide whether to expand template or keep wikitext as-is.
3208  if ( $this->ot['wiki'] ) {
3209  if ( $substMatch === false ) {
3210  $literal = true; # literal when in PST with no prefix
3211  } else {
3212  $literal = false; # expand when in PST with subst: or safesubst:
3213  }
3214  } else {
3215  if ( $substMatch == 'subst' ) {
3216  $literal = true; # literal when not in PST with plain subst:
3217  } else {
3218  $literal = false; # expand when not in PST with safesubst: or no prefix
3219  }
3220  }
3221  if ( $literal ) {
3222  $text = $frame->virtualBracketedImplode( '{{', '|', '}}', $titleWithSpaces, $args );
3223  $isLocalObj = true;
3224  $found = true;
3225  }
3226  }
3227 
3228  # Variables
3229  if ( !$found && $args->getLength() == 0 ) {
3230  $id = $this->mVariables->matchStartToEnd( $part1 );
3231  if ( $id !== false ) {
3232  $text = $this->expandMagicVariable( $id, $frame );
3233  if ( $this->magicWordFactory->getCacheTTL( $id ) > -1 ) {
3234  $this->mOutput->updateCacheExpiry(
3235  $this->magicWordFactory->getCacheTTL( $id ) );
3236  }
3237  $found = true;
3238  }
3239  }
3240 
3241  # MSG, MSGNW and RAW
3242  if ( !$found ) {
3243  # Check for MSGNW:
3244  $mwMsgnw = $this->magicWordFactory->get( 'msgnw' );
3245  if ( $mwMsgnw->matchStartAndRemove( $part1 ) ) {
3246  $nowiki = true;
3247  } else {
3248  # Remove obsolete MSG:
3249  $mwMsg = $this->magicWordFactory->get( 'msg' );
3250  $mwMsg->matchStartAndRemove( $part1 );
3251  }
3252 
3253  # Check for RAW:
3254  $mwRaw = $this->magicWordFactory->get( 'raw' );
3255  if ( $mwRaw->matchStartAndRemove( $part1 ) ) {
3256  $forceRawInterwiki = true;
3257  }
3258  }
3259 
3260  # Parser functions
3261  if ( !$found ) {
3262  $colonPos = strpos( $part1, ':' );
3263  if ( $colonPos !== false ) {
3264  $func = substr( $part1, 0, $colonPos );
3265  $funcArgs = [ trim( substr( $part1, $colonPos + 1 ) ) ];
3266  $argsLength = $args->getLength();
3267  for ( $i = 0; $i < $argsLength; $i++ ) {
3268  $funcArgs[] = $args->item( $i );
3269  }
3270 
3271  $result = $this->callParserFunction( $frame, $func, $funcArgs );
3272 
3273  // Extract any forwarded flags
3274  if ( isset( $result['title'] ) ) {
3275  $title = $result['title'];
3276  }
3277  if ( isset( $result['found'] ) ) {
3278  $found = $result['found'];
3279  }
3280  if ( array_key_exists( 'text', $result ) ) {
3281  // a string or null
3282  $text = $result['text'];
3283  }
3284  if ( isset( $result['nowiki'] ) ) {
3285  $nowiki = $result['nowiki'];
3286  }
3287  if ( isset( $result['isHTML'] ) ) {
3288  $isHTML = $result['isHTML'];
3289  }
3290  if ( isset( $result['forceRawInterwiki'] ) ) {
3291  $forceRawInterwiki = $result['forceRawInterwiki'];
3292  }
3293  if ( isset( $result['isChildObj'] ) ) {
3294  $isChildObj = $result['isChildObj'];
3295  }
3296  if ( isset( $result['isLocalObj'] ) ) {
3297  $isLocalObj = $result['isLocalObj'];
3298  }
3299  }
3300  }
3301 
3302  # Finish mangling title and then check for loops.
3303  # Set $title to a Title object and $titleText to the PDBK
3304  if ( !$found ) {
3305  $ns = NS_TEMPLATE;
3306  # Split the title into page and subpage
3307  $subpage = '';
3308  $relative = Linker::normalizeSubpageLink(
3309  $this->getTitle(), $part1, $subpage
3310  );
3311  if ( $part1 !== $relative ) {
3312  $part1 = $relative;
3313  $ns = $this->getTitle()->getNamespace();
3314  }
3315  $title = Title::newFromText( $part1, $ns );
3316  if ( $title ) {
3317  $titleText = $title->getPrefixedText();
3318  # Check for language variants if the template is not found
3319  if ( $this->getTargetLanguage()->hasVariants() && $title->getArticleID() == 0 ) {
3320  $this->getTargetLanguage()->findVariantLink( $part1, $title, true );
3321  }
3322  # Do recursion depth check
3323  $limit = $this->mOptions->getMaxTemplateDepth();
3324  if ( $frame->depth >= $limit ) {
3325  $found = true;
3326  $text = '<span class="error">'
3327  . wfMessage( 'parser-template-recursion-depth-warning' )
3328  ->numParams( $limit )->inContentLanguage()->text()
3329  . '</span>';
3330  }
3331  }
3332  }
3333 
3334  # Load from database
3335  if ( !$found && $title ) {
3336  $profileSection = $this->mProfiler->scopedProfileIn( $title->getPrefixedDBkey() );
3337  if ( !$title->isExternal() ) {
3338  if ( $title->isSpecialPage()
3339  && $this->mOptions->getAllowSpecialInclusion()
3340  && $this->ot['html']
3341  ) {
3342  $specialPage = $this->specialPageFactory->getPage( $title->getDBkey() );
3343  // Pass the template arguments as URL parameters.
3344  // "uselang" will have no effect since the Language object
3345  // is forced to the one defined in ParserOptions.
3346  $pageArgs = [];
3347  $argsLength = $args->getLength();
3348  for ( $i = 0; $i < $argsLength; $i++ ) {
3349  $bits = $args->item( $i )->splitArg();
3350  if ( strval( $bits['index'] ) === '' ) {
3351  $name = trim( $frame->expand( $bits['name'], PPFrame::STRIP_COMMENTS ) );
3352  $value = trim( $frame->expand( $bits['value'] ) );
3353  $pageArgs[$name] = $value;
3354  }
3355  }
3356 
3357  // Create a new context to execute the special page
3358  $context = new RequestContext;
3359  $context->setTitle( $title );
3360  $context->setRequest( new FauxRequest( $pageArgs ) );
3361  if ( $specialPage && $specialPage->maxIncludeCacheTime() === 0 ) {
3362  $context->setUser( $this->getUser() );
3363  } else {
3364  // If this page is cached, then we better not be per user.
3365  $context->setUser( User::newFromName( '127.0.0.1', false ) );
3366  }
3367  $context->setLanguage( $this->mOptions->getUserLangObj() );
3368  $ret = $this->specialPageFactory->capturePath( $title, $context, $this->getLinkRenderer() );
3369  if ( $ret ) {
3370  $text = $context->getOutput()->getHTML();
3371  $this->mOutput->addOutputPageMetadata( $context->getOutput() );
3372  $found = true;
3373  $isHTML = true;
3374  if ( $specialPage && $specialPage->maxIncludeCacheTime() !== false ) {
3375  $this->mOutput->updateRuntimeAdaptiveExpiry(
3376  $specialPage->maxIncludeCacheTime()
3377  );
3378  }
3379  }
3380  } elseif ( $this->nsInfo->isNonincludable( $title->getNamespace() ) ) {
3381  $found = false; # access denied
3382  $this->logger->debug(
3383  __METHOD__ .
3384  ": template inclusion denied for " . $title->getPrefixedDBkey()
3385  );
3386  } else {
3387  list( $text, $title ) = $this->getTemplateDom( $title );
3388  if ( $text !== false ) {
3389  $found = true;
3390  $isChildObj = true;
3391  }
3392  }
3393 
3394  # If the title is valid but undisplayable, make a link to it
3395  if ( !$found && ( $this->ot['html'] || $this->ot['pre'] ) ) {
3396  $text = "[[:$titleText]]";
3397  $found = true;
3398  }
3399  } elseif ( $title->isTrans() ) {
3400  # Interwiki transclusion
3401  if ( $this->ot['html'] && !$forceRawInterwiki ) {
3402  $text = $this->interwikiTransclude( $title, 'render' );
3403  $isHTML = true;
3404  } else {
3405  $text = $this->interwikiTransclude( $title, 'raw' );
3406  # Preprocess it like a template
3407  $text = $this->preprocessToDom( $text, self::PTD_FOR_INCLUSION );
3408  $isChildObj = true;
3409  }
3410  $found = true;
3411  }
3412 
3413  # Do infinite loop check
3414  # This has to be done after redirect resolution to avoid infinite loops via redirects
3415  if ( !$frame->loopCheck( $title ) ) {
3416  $found = true;
3417  $text = '<span class="error">'
3418  . wfMessage( 'parser-template-loop-warning', $titleText )->inContentLanguage()->text()
3419  . '</span>';
3420  $this->addTrackingCategory( 'template-loop-category' );
3421  $this->mOutput->addWarning( wfMessage( 'template-loop-warning',
3422  wfEscapeWikiText( $titleText ) )->text() );
3423  $this->logger->debug( __METHOD__ . ": template loop broken at '$titleText'" );
3424  }
3425  }
3426 
3427  # If we haven't found text to substitute by now, we're done
3428  # Recover the source wikitext and return it
3429  if ( !$found ) {
3430  $text = $frame->virtualBracketedImplode( '{{', '|', '}}', $titleWithSpaces, $args );
3431  if ( $profileSection ) {
3432  $this->mProfiler->scopedProfileOut( $profileSection );
3433  }
3434  return [ 'object' => $text ];
3435  }
3436 
3437  # Expand DOM-style return values in a child frame
3438  if ( $isChildObj ) {
3439  # Clean up argument array
3440  $newFrame = $frame->newChild( $args, $title );
3441 
3442  if ( $nowiki ) {
3443  $text = $newFrame->expand( $text, PPFrame::RECOVER_ORIG );
3444  } elseif ( $titleText !== false && $newFrame->isEmpty() ) {
3445  # Expansion is eligible for the empty-frame cache
3446  $text = $newFrame->cachedExpand( $titleText, $text );
3447  } else {
3448  # Uncached expansion
3449  $text = $newFrame->expand( $text );
3450  }
3451  }
3452  if ( $isLocalObj && $nowiki ) {
3453  $text = $frame->expand( $text, PPFrame::RECOVER_ORIG );
3454  $isLocalObj = false;
3455  }
3456 
3457  if ( $profileSection ) {
3458  $this->mProfiler->scopedProfileOut( $profileSection );
3459  }
3460 
3461  # Replace raw HTML by a placeholder
3462  if ( $isHTML ) {
3463  $text = $this->insertStripItem( $text );
3464  } elseif ( $nowiki && ( $this->ot['html'] || $this->ot['pre'] ) ) {
3465  # Escape nowiki-style return values
3466  $text = wfEscapeWikiText( $text );
3467  } elseif ( is_string( $text )
3468  && !$piece['lineStart']
3469  && preg_match( '/^(?:{\\||:|;|#|\*)/', $text )
3470  ) {
3471  # T2529: if the template begins with a table or block-level
3472  # element, it should be treated as beginning a new line.
3473  # This behavior is somewhat controversial.
3474  $text = "\n" . $text;
3475  }
3476 
3477  if ( is_string( $text ) && !$this->incrementIncludeSize( 'post-expand', strlen( $text ) ) ) {
3478  # Error, oversize inclusion
3479  if ( $titleText !== false ) {
3480  # Make a working, properly escaped link if possible (T25588)
3481  $text = "[[:$titleText]]";
3482  } else {
3483  # This will probably not be a working link, but at least it may
3484  # provide some hint of where the problem is
3485  preg_replace( '/^:/', '', $originalTitle );
3486  $text = "[[:$originalTitle]]";
3487  }
3488  $text .= $this->insertStripItem( '<!-- WARNING: template omitted, '
3489  . 'post-expand include size too large -->' );
3490  $this->limitationWarn( 'post-expand-template-inclusion' );
3491  }
3492 
3493  if ( $isLocalObj ) {
3494  $ret = [ 'object' => $text ];
3495  } else {
3496  $ret = [ 'text' => $text ];
3497  }
3498 
3499  return $ret;
3500  }
3501 
3520  public function callParserFunction( $frame, $function, array $args = [] ) {
3521  # Case sensitive functions
3522  if ( isset( $this->mFunctionSynonyms[1][$function] ) ) {
3523  $function = $this->mFunctionSynonyms[1][$function];
3524  } else {
3525  # Case insensitive functions
3526  $function = $this->contLang->lc( $function );
3527  if ( isset( $this->mFunctionSynonyms[0][$function] ) ) {
3528  $function = $this->mFunctionSynonyms[0][$function];
3529  } else {
3530  return [ 'found' => false ];
3531  }
3532  }
3533 
3534  list( $callback, $flags ) = $this->mFunctionHooks[$function];
3535 
3536  // Avoid PHP 7.1 warning from passing $this by reference
3537  $parser = $this;
3538 
3539  $allArgs = [ &$parser ];
3540  if ( $flags & self::SFH_OBJECT_ARGS ) {
3541  # Convert arguments to PPNodes and collect for appending to $allArgs
3542  $funcArgs = [];
3543  foreach ( $args as $k => $v ) {
3544  if ( $v instanceof PPNode || $k === 0 ) {
3545  $funcArgs[] = $v;
3546  } else {
3547  $funcArgs[] = $this->mPreprocessor->newPartNodeArray( [ $k => $v ] )->item( 0 );
3548  }
3549  }
3550 
3551  # Add a frame parameter, and pass the arguments as an array
3552  $allArgs[] = $frame;
3553  $allArgs[] = $funcArgs;
3554  } else {
3555  # Convert arguments to plain text and append to $allArgs
3556  foreach ( $args as $k => $v ) {
3557  if ( $v instanceof PPNode ) {
3558  $allArgs[] = trim( $frame->expand( $v ) );
3559  } elseif ( is_int( $k ) && $k >= 0 ) {
3560  $allArgs[] = trim( $v );
3561  } else {
3562  $allArgs[] = trim( "$k=$v" );
3563  }
3564  }
3565  }
3566 
3567  $result = $callback( ...$allArgs );
3568 
3569  # The interface for function hooks allows them to return a wikitext
3570  # string or an array containing the string and any flags. This mungs
3571  # things around to match what this method should return.
3572  if ( !is_array( $result ) ) {
3573  $result = [
3574  'found' => true,
3575  'text' => $result,
3576  ];
3577  } else {
3578  if ( isset( $result[0] ) && !isset( $result['text'] ) ) {
3579  $result['text'] = $result[0];
3580  }
3581  unset( $result[0] );
3582  $result += [
3583  'found' => true,
3584  ];
3585  }
3586 
3587  $noparse = true;
3588  $preprocessFlags = 0;
3589  if ( isset( $result['noparse'] ) ) {
3590  $noparse = $result['noparse'];
3591  }
3592  if ( isset( $result['preprocessFlags'] ) ) {
3593  $preprocessFlags = $result['preprocessFlags'];
3594  }
3595 
3596  if ( !$noparse ) {
3597  $result['text'] = $this->preprocessToDom( $result['text'], $preprocessFlags );
3598  $result['isChildObj'] = true;
3599  }
3600 
3601  return $result;
3602  }
3603 
3612  public function getTemplateDom( $title ) {
3613  $cacheTitle = $title;
3614  $titleText = $title->getPrefixedDBkey();
3615 
3616  if ( isset( $this->mTplRedirCache[$titleText] ) ) {
3617  list( $ns, $dbk ) = $this->mTplRedirCache[$titleText];
3618  $title = Title::makeTitle( $ns, $dbk );
3619  $titleText = $title->getPrefixedDBkey();
3620  }
3621  if ( isset( $this->mTplDomCache[$titleText] ) ) {
3622  return [ $this->mTplDomCache[$titleText], $title ];
3623  }
3624 
3625  # Cache miss, go to the database
3626  list( $text, $title ) = $this->fetchTemplateAndTitle( $title );
3627 
3628  if ( $text === false ) {
3629  $this->mTplDomCache[$titleText] = false;
3630  return [ false, $title ];
3631  }
3632 
3633  $dom = $this->preprocessToDom( $text, self::PTD_FOR_INCLUSION );
3634  $this->mTplDomCache[$titleText] = $dom;
3635 
3636  if ( !$title->equals( $cacheTitle ) ) {
3637  $this->mTplRedirCache[$cacheTitle->getPrefixedDBkey()] =
3638  [ $title->getNamespace(), $title->getDBkey() ];
3639  }
3640 
3641  return [ $dom, $title ];
3642  }
3643 
3655  public function fetchCurrentRevisionOfTitle( $title ) {
3656  $cacheKey = $title->getPrefixedDBkey();
3657  if ( !$this->currentRevisionCache ) {
3658  $this->currentRevisionCache = new MapCacheLRU( 100 );
3659  }
3660  if ( !$this->currentRevisionCache->has( $cacheKey ) ) {
3661  $this->currentRevisionCache->set( $cacheKey,
3662  // Defaults to Parser::statelessFetchRevision()
3663  call_user_func( $this->mOptions->getCurrentRevisionCallback(), $title, $this )
3664  );
3665  }
3666  return $this->currentRevisionCache->get( $cacheKey );
3667  }
3668 
3675  public function isCurrentRevisionOfTitleCached( $title ) {
3676  return (
3677  $this->currentRevisionCache &&
3678  $this->currentRevisionCache->has( $title->getPrefixedText() )
3679  );
3680  }
3681 
3691  public static function statelessFetchRevision( Title $title, $parser = false ) {
3693 
3694  return $rev;
3695  }
3696 
3702  public function fetchTemplateAndTitle( $title ) {
3703  // Defaults to Parser::statelessFetchTemplate()
3704  $templateCb = $this->mOptions->getTemplateCallback();
3705  $stuff = call_user_func( $templateCb, $title, $this );
3706  $rev = $stuff['revision'] ?? null;
3707  $text = $stuff['text'];
3708  if ( is_string( $stuff['text'] ) ) {
3709  // We use U+007F DELETE to distinguish strip markers from regular text
3710  $text = strtr( $text, "\x7f", "?" );
3711  }
3712  $finalTitle = $stuff['finalTitle'] ?? $title;
3713  foreach ( ( $stuff['deps'] ?? [] ) as $dep ) {
3714  $this->mOutput->addTemplate( $dep['title'], $dep['page_id'], $dep['rev_id'] );
3715  if ( $dep['title']->equals( $this->getTitle() ) && $rev instanceof Revision ) {
3716  // Self-transclusion; final result may change based on the new page version
3717  $this->setOutputFlag( 'vary-revision-sha1', 'Self transclusion' );
3718  $this->getOutput()->setRevisionUsedSha1Base36( $rev->getSha1() );
3719  }
3720  }
3721 
3722  return [ $text, $finalTitle ];
3723  }
3724 
3730  public function fetchTemplate( $title ) {
3731  return $this->fetchTemplateAndTitle( $title )[0];
3732  }
3733 
3743  public static function statelessFetchTemplate( $title, $parser = false ) {
3744  $text = $skip = false;
3745  $finalTitle = $title;
3746  $deps = [];
3747  $rev = null;
3748 
3749  # Loop to fetch the article, with up to 1 redirect
3750  for ( $i = 0; $i < 2 && is_object( $title ); $i++ ) {
3751  # Give extensions a chance to select the revision instead
3752  $id = false; # Assume current
3753  Hooks::run( 'BeforeParserFetchTemplateAndtitle',
3754  [ $parser, $title, &$skip, &$id ] );
3755 
3756  if ( $skip ) {
3757  $text = false;
3758  $deps[] = [
3759  'title' => $title,
3760  'page_id' => $title->getArticleID(),
3761  'rev_id' => null
3762  ];
3763  break;
3764  }
3765  # Get the revision
3766  if ( $id ) {
3767  $rev = Revision::newFromId( $id );
3768  } elseif ( $parser ) {
3769  $rev = $parser->fetchCurrentRevisionOfTitle( $title );
3770  } else {
3771  $rev = Revision::newFromTitle( $title );
3772  }
3773  $rev_id = $rev ? $rev->getId() : 0;
3774  # If there is no current revision, there is no page
3775  if ( $id === false && !$rev ) {
3776  $linkCache = MediaWikiServices::getInstance()->getLinkCache();
3777  $linkCache->addBadLinkObj( $title );
3778  }
3779 
3780  $deps[] = [
3781  'title' => $title,
3782  'page_id' => $title->getArticleID(),
3783  'rev_id' => $rev_id
3784  ];
3785  if ( $rev && !$title->equals( $rev->getTitle() ) ) {
3786  # We fetched a rev from a different title; register it too...
3787  $deps[] = [
3788  'title' => $rev->getTitle(),
3789  'page_id' => $rev->getPage(),
3790  'rev_id' => $rev_id
3791  ];
3792  }
3793 
3794  if ( $rev ) {
3795  $content = $rev->getContent();
3796  $text = $content ? $content->getWikitextForTransclusion() : null;
3797 
3798  Hooks::run( 'ParserFetchTemplate',
3799  [ $parser, $title, $rev, &$text, &$deps ] );
3800 
3801  if ( $text === false || $text === null ) {
3802  $text = false;
3803  break;
3804  }
3805  } elseif ( $title->getNamespace() == NS_MEDIAWIKI ) {
3806  $message = wfMessage( MediaWikiServices::getInstance()->getContentLanguage()->
3807  lcfirst( $title->getText() ) )->inContentLanguage();
3808  if ( !$message->exists() ) {
3809  $text = false;
3810  break;
3811  }
3812  $content = $message->content();
3813  $text = $message->plain();
3814  } else {
3815  break;
3816  }
3817  if ( !$content ) {
3818  break;
3819  }
3820  # Redirect?
3821  $finalTitle = $title;
3822  $title = $content->getRedirectTarget();
3823  }
3824  return [
3825  'revision' => $rev,
3826  'text' => $text,
3827  'finalTitle' => $finalTitle,
3828  'deps' => $deps
3829  ];
3830  }
3831 
3839  public function fetchFileAndTitle( $title, $options = [] ) {
3840  $file = $this->fetchFileNoRegister( $title, $options );
3841 
3842  $time = $file ? $file->getTimestamp() : false;
3843  $sha1 = $file ? $file->getSha1() : false;
3844  # Register the file as a dependency...
3845  $this->mOutput->addImage( $title->getDBkey(), $time, $sha1 );
3846  if ( $file && !$title->equals( $file->getTitle() ) ) {
3847  # Update fetched file title
3848  $title = $file->getTitle();
3849  $this->mOutput->addImage( $title->getDBkey(), $time, $sha1 );
3850  }
3851  return [ $file, $title ];
3852  }
3853 
3864  protected function fetchFileNoRegister( $title, $options = [] ) {
3865  if ( isset( $options['broken'] ) ) {
3866  $file = false; // broken thumbnail forced by hook
3867  } elseif ( isset( $options['sha1'] ) ) { // get by (sha1,timestamp)
3868  $file = RepoGroup::singleton()->findFileFromKey( $options['sha1'], $options );
3869  } else { // get by (name,timestamp)
3870  $file = MediaWikiServices::getInstance()->getRepoGroup()->findFile( $title, $options );
3871  }
3872  return $file;
3873  }
3874 
3884  public function interwikiTransclude( $title, $action ) {
3885  if ( !$this->svcOptions->get( 'EnableScaryTranscluding' ) ) {
3886  return wfMessage( 'scarytranscludedisabled' )->inContentLanguage()->text();
3887  }
3888 
3889  $url = $title->getFullURL( [ 'action' => $action ] );
3890  if ( strlen( $url ) > 1024 ) {
3891  return wfMessage( 'scarytranscludetoolong' )->inContentLanguage()->text();
3892  }
3893 
3894  $wikiId = $title->getTransWikiID(); // remote wiki ID or false
3895 
3896  $fname = __METHOD__;
3897  $cache = MediaWikiServices::getInstance()->getMainWANObjectCache();
3898 
3899  $data = $cache->getWithSetCallback(
3900  $cache->makeGlobalKey(
3901  'interwiki-transclude',
3902  ( $wikiId !== false ) ? $wikiId : 'external',
3903  sha1( $url )
3904  ),
3905  $this->svcOptions->get( 'TranscludeCacheExpiry' ),
3906  function ( $oldValue, &$ttl ) use ( $url, $fname, $cache ) {
3907  $req = MWHttpRequest::factory( $url, [], $fname );
3908 
3909  $status = $req->execute(); // Status object
3910  if ( !$status->isOK() ) {
3911  $ttl = $cache::TTL_UNCACHEABLE;
3912  } elseif ( $req->getResponseHeader( 'X-Database-Lagged' ) !== null ) {
3913  $ttl = min( $cache::TTL_LAGGED, $ttl );
3914  }
3915 
3916  return [
3917  'text' => $status->isOK() ? $req->getContent() : null,
3918  'code' => $req->getStatus()
3919  ];
3920  },
3921  [
3922  'checkKeys' => ( $wikiId !== false )
3923  ? [ $cache->makeGlobalKey( 'interwiki-page', $wikiId, $title->getDBkey() ) ]
3924  : [],
3925  'pcGroup' => 'interwiki-transclude:5',
3926  'pcTTL' => $cache::TTL_PROC_LONG
3927  ]
3928  );
3929 
3930  if ( is_string( $data['text'] ) ) {
3931  $text = $data['text'];
3932  } elseif ( $data['code'] != 200 ) {
3933  // Though we failed to fetch the content, this status is useless.
3934  $text = wfMessage( 'scarytranscludefailed-httpstatus' )
3935  ->params( $url, $data['code'] )->inContentLanguage()->text();
3936  } else {
3937  $text = wfMessage( 'scarytranscludefailed', $url )->inContentLanguage()->text();
3938  }
3939 
3940  return $text;
3941  }
3942 
3953  public function argSubstitution( $piece, $frame ) {
3954  $error = false;
3955  $parts = $piece['parts'];
3956  $nameWithSpaces = $frame->expand( $piece['title'] );
3957  $argName = trim( $nameWithSpaces );
3958  $object = false;
3959  $text = $frame->getArgument( $argName );
3960  if ( $text === false && $parts->getLength() > 0
3961  && ( $this->ot['html']
3962  || $this->ot['pre']
3963  || ( $this->ot['wiki'] && $frame->isTemplate() )
3964  )
3965  ) {
3966  # No match in frame, use the supplied default
3967  $object = $parts->item( 0 )->getChildren();
3968  }
3969  if ( !$this->incrementIncludeSize( 'arg', strlen( $text ) ) ) {
3970  $error = '<!-- WARNING: argument omitted, expansion size too large -->';
3971  $this->limitationWarn( 'post-expand-template-argument' );
3972  }
3973 
3974  if ( $text === false && $object === false ) {
3975  # No match anywhere
3976  $object = $frame->virtualBracketedImplode( '{{{', '|', '}}}', $nameWithSpaces, $parts );
3977  }
3978  if ( $error !== false ) {
3979  $text .= $error;
3980  }
3981  if ( $object !== false ) {
3982  $ret = [ 'object' => $object ];
3983  } else {
3984  $ret = [ 'text' => $text ];
3985  }
3986 
3987  return $ret;
3988  }
3989 
4006  public function extensionSubstitution( $params, $frame ) {
4007  static $errorStr = '<span class="error">';
4008  static $errorLen = 20;
4009 
4010  $name = $frame->expand( $params['name'] );
4011  if ( substr( $name, 0, $errorLen ) === $errorStr ) {
4012  // Probably expansion depth or node count exceeded. Just punt the
4013  // error up.
4014  return $name;
4015  }
4016 
4017  $attrText = !isset( $params['attr'] ) ? null : $frame->expand( $params['attr'] );
4018  if ( substr( $attrText, 0, $errorLen ) === $errorStr ) {
4019  // See above
4020  return $attrText;
4021  }
4022 
4023  // We can't safely check if the expansion for $content resulted in an
4024  // error, because the content could happen to be the error string
4025  // (T149622).
4026  $content = !isset( $params['inner'] ) ? null : $frame->expand( $params['inner'] );
4027 
4028  $marker = self::MARKER_PREFIX . "-$name-"
4029  . sprintf( '%08X', $this->mMarkerIndex++ ) . self::MARKER_SUFFIX;
4030 
4031  $isFunctionTag = isset( $this->mFunctionTagHooks[strtolower( $name )] ) &&
4032  ( $this->ot['html'] || $this->ot['pre'] );
4033  if ( $isFunctionTag ) {
4034  $markerType = 'none';
4035  } else {
4036  $markerType = 'general';
4037  }
4038  if ( $this->ot['html'] || $isFunctionTag ) {
4039  $name = strtolower( $name );
4040  $attributes = Sanitizer::decodeTagAttributes( $attrText );
4041  if ( isset( $params['attributes'] ) ) {
4042  $attributes += $params['attributes'];
4043  }
4044 
4045  if ( isset( $this->mTagHooks[$name] ) ) {
4046  $output = call_user_func_array( $this->mTagHooks[$name],
4047  [ $content, $attributes, $this, $frame ] );
4048  } elseif ( isset( $this->mFunctionTagHooks[$name] ) ) {
4049  list( $callback, ) = $this->mFunctionTagHooks[$name];
4050 
4051  // Avoid PHP 7.1 warning from passing $this by reference
4052  $parser = $this;
4053  $output = call_user_func_array( $callback, [ &$parser, $frame, $content, $attributes ] );
4054  } else {
4055  $output = '<span class="error">Invalid tag extension name: ' .
4056  htmlspecialchars( $name ) . '</span>';
4057  }
4058 
4059  if ( is_array( $output ) ) {
4060  // Extract flags
4061  $flags = $output;
4062  $output = $flags[0];
4063  if ( isset( $flags['markerType'] ) ) {
4064  $markerType = $flags['markerType'];
4065  }
4066  }
4067  } else {
4068  if ( $attrText === null ) {
4069  $attrText = '';
4070  }
4071  if ( isset( $params['attributes'] ) ) {
4072  foreach ( $params['attributes'] as $attrName => $attrValue ) {
4073  $attrText .= ' ' . htmlspecialchars( $attrName ) . '="' .
4074  htmlspecialchars( $attrValue ) . '"';
4075  }
4076  }
4077  if ( $content === null ) {
4078  $output = "<$name$attrText/>";
4079  } else {
4080  $close = $params['close'] === null ? '' : $frame->expand( $params['close'] );
4081  if ( substr( $close, 0, $errorLen ) === $errorStr ) {
4082  // See above
4083  return $close;
4084  }
4085  $output = "<$name$attrText>$content$close";
4086  }
4087  }
4088 
4089  if ( $markerType === 'none' ) {
4090  return $output;
4091  } elseif ( $markerType === 'nowiki' ) {
4092  $this->mStripState->addNoWiki( $marker, $output );
4093  } elseif ( $markerType === 'general' ) {
4094  $this->mStripState->addGeneral( $marker, $output );
4095  } else {
4096  throw new MWException( __METHOD__ . ': invalid marker type' );
4097  }
4098  return $marker;
4099  }
4100 
4108  public function incrementIncludeSize( $type, $size ) {
4109  if ( $this->mIncludeSizes[$type] + $size > $this->mOptions->getMaxIncludeSize() ) {
4110  return false;
4111  } else {
4112  $this->mIncludeSizes[$type] += $size;
4113  return true;
4114  }
4115  }
4116 
4122  public function incrementExpensiveFunctionCount() {
4123  $this->mExpensiveFunctionCount++;
4124  return $this->mExpensiveFunctionCount <= $this->mOptions->getExpensiveParserFunctionLimit();
4125  }
4126 
4134  private function handleDoubleUnderscore( $text ) {
4135  # The position of __TOC__ needs to be recorded
4136  $mw = $this->magicWordFactory->get( 'toc' );
4137  if ( $mw->match( $text ) ) {
4138  $this->mShowToc = true;
4139  $this->mForceTocPosition = true;
4140 
4141  # Set a placeholder. At the end we'll fill it in with the TOC.
4142  $text = $mw->replace( '<!--MWTOC\'"-->', $text, 1 );
4143 
4144  # Only keep the first one.
4145  $text = $mw->replace( '', $text );
4146  }
4147 
4148  # Now match and remove the rest of them
4149  $mwa = $this->magicWordFactory->getDoubleUnderscoreArray();
4150  $this->mDoubleUnderscores = $mwa->matchAndRemove( $text );
4151 
4152  if ( isset( $this->mDoubleUnderscores['nogallery'] ) ) {
4153  $this->mOutput->mNoGallery = true;
4154  }
4155  if ( isset( $this->mDoubleUnderscores['notoc'] ) && !$this->mForceTocPosition ) {
4156  $this->mShowToc = false;
4157  }
4158  if ( isset( $this->mDoubleUnderscores['hiddencat'] )
4159  && $this->getTitle()->getNamespace() == NS_CATEGORY
4160  ) {
4161  $this->addTrackingCategory( 'hidden-category-category' );
4162  }
4163  # (T10068) Allow control over whether robots index a page.
4164  # __INDEX__ always overrides __NOINDEX__, see T16899
4165  if ( isset( $this->mDoubleUnderscores['noindex'] ) && $this->getTitle()->canUseNoindex() ) {
4166  $this->mOutput->setIndexPolicy( 'noindex' );
4167  $this->addTrackingCategory( 'noindex-category' );
4168  }
4169  if ( isset( $this->mDoubleUnderscores['index'] ) && $this->getTitle()->canUseNoindex() ) {
4170  $this->mOutput->setIndexPolicy( 'index' );
4171  $this->addTrackingCategory( 'index-category' );
4172  }
4173 
4174  # Cache all double underscores in the database
4175  foreach ( $this->mDoubleUnderscores as $key => $val ) {
4176  $this->mOutput->setProperty( $key, '' );
4177  }
4178 
4179  return $text;
4180  }
4181 
4187  public function addTrackingCategory( $msg ) {
4188  return $this->mOutput->addTrackingCategory( $msg, $this->getTitle() );
4189  }
4190 
4206  private function finalizeHeadings( $text, $origText, $isMain = true ) {
4207  # Inhibit editsection links if requested in the page
4208  if ( isset( $this->mDoubleUnderscores['noeditsection'] ) ) {
4209  $maybeShowEditLink = false;
4210  } else {
4211  $maybeShowEditLink = true; /* Actual presence will depend on post-cache transforms */
4212  }
4213 
4214  # Get all headlines for numbering them and adding funky stuff like [edit]
4215  # links - this is for later, but we need the number of headlines right now
4216  # NOTE: white space in headings have been trimmed in handleHeadings. They shouldn't
4217  # be trimmed here since whitespace in HTML headings is significant.
4218  $matches = [];
4219  $numMatches = preg_match_all(
4220  '/<H(?P<level>[1-6])(?P<attrib>.*?>)(?P<header>[\s\S]*?)<\/H[1-6] *>/i',
4221  $text,
4222  $matches
4223  );
4224 
4225  # if there are fewer than 4 headlines in the article, do not show TOC
4226  # unless it's been explicitly enabled.
4227  $enoughToc = $this->mShowToc &&
4228  ( ( $numMatches >= 4 ) || $this->mForceTocPosition );
4229 
4230  # Allow user to stipulate that a page should have a "new section"
4231  # link added via __NEWSECTIONLINK__
4232  if ( isset( $this->mDoubleUnderscores['newsectionlink'] ) ) {
4233  $this->mOutput->setNewSection( true );
4234  }
4235 
4236  # Allow user to remove the "new section"
4237  # link via __NONEWSECTIONLINK__
4238  if ( isset( $this->mDoubleUnderscores['nonewsectionlink'] ) ) {
4239  $this->mOutput->hideNewSection( true );
4240  }
4241 
4242  # if the string __FORCETOC__ (not case-sensitive) occurs in the HTML,
4243  # override above conditions and always show TOC above first header
4244  if ( isset( $this->mDoubleUnderscores['forcetoc'] ) ) {
4245  $this->mShowToc = true;
4246  $enoughToc = true;
4247  }
4248 
4249  # headline counter
4250  $headlineCount = 0;
4251  $numVisible = 0;
4252 
4253  # Ugh .. the TOC should have neat indentation levels which can be
4254  # passed to the skin functions. These are determined here
4255  $toc = '';
4256  $full = '';
4257  $head = [];
4258  $sublevelCount = [];
4259  $levelCount = [];
4260  $level = 0;
4261  $prevlevel = 0;
4262  $toclevel = 0;
4263  $prevtoclevel = 0;
4264  $markerRegex = self::MARKER_PREFIX . "-h-(\d+)-" . self::MARKER_SUFFIX;
4265  $baseTitleText = $this->getTitle()->getPrefixedDBkey();
4266  $oldType = $this->mOutputType;
4267  $this->setOutputType( self::OT_WIKI );
4268  $frame = $this->getPreprocessor()->newFrame();
4269  $root = $this->preprocessToDom( $origText );
4270  $node = $root->getFirstChild();
4271  $byteOffset = 0;
4272  $tocraw = [];
4273  $refers = [];
4274 
4275  $headlines = $numMatches !== false ? $matches[3] : [];
4276 
4277  $maxTocLevel = $this->svcOptions->get( 'MaxTocLevel' );
4278  foreach ( $headlines as $headline ) {
4279  $isTemplate = false;
4280  $titleText = false;
4281  $sectionIndex = false;
4282  $numbering = '';
4283  $markerMatches = [];
4284  if ( preg_match( "/^$markerRegex/", $headline, $markerMatches ) ) {
4285  $serial = $markerMatches[1];
4286  list( $titleText, $sectionIndex ) = $this->mHeadings[$serial];
4287  $isTemplate = ( $titleText != $baseTitleText );
4288  $headline = preg_replace( "/^$markerRegex\\s*/", "", $headline );
4289  }
4290 
4291  if ( $toclevel ) {
4292  $prevlevel = $level;
4293  }
4294  $level = $matches[1][$headlineCount];
4295 
4296  if ( $level > $prevlevel ) {
4297  # Increase TOC level
4298  $toclevel++;
4299  $sublevelCount[$toclevel] = 0;
4300  if ( $toclevel < $maxTocLevel ) {
4301  $prevtoclevel = $toclevel;
4302  $toc .= Linker::tocIndent();
4303  $numVisible++;
4304  }
4305  } elseif ( $level < $prevlevel && $toclevel > 1 ) {
4306  # Decrease TOC level, find level to jump to
4307 
4308  for ( $i = $toclevel; $i > 0; $i-- ) {
4309  // @phan-suppress-next-line PhanTypeInvalidDimOffset
4310  if ( $levelCount[$i] == $level ) {
4311  # Found last matching level
4312  $toclevel = $i;
4313  break;
4314  } elseif ( $levelCount[$i] < $level ) {
4315  // @phan-suppress-previous-line PhanTypeInvalidDimOffset
4316  # Found first matching level below current level
4317  $toclevel = $i + 1;
4318  break;
4319  }
4320  }
4321  if ( $i == 0 ) {
4322  $toclevel = 1;
4323  }
4324  if ( $toclevel < $maxTocLevel ) {
4325  if ( $prevtoclevel < $maxTocLevel ) {
4326  # Unindent only if the previous toc level was shown :p
4327  $toc .= Linker::tocUnindent( $prevtoclevel - $toclevel );
4328  $prevtoclevel = $toclevel;
4329  } else {
4330  $toc .= Linker::tocLineEnd();
4331  }
4332  }
4333  } else {
4334  # No change in level, end TOC line
4335  if ( $toclevel < $maxTocLevel ) {
4336  $toc .= Linker::tocLineEnd();
4337  }
4338  }
4339 
4340  $levelCount[$toclevel] = $level;
4341 
4342  # count number of headlines for each level
4343  $sublevelCount[$toclevel]++;
4344  $dot = 0;
4345  for ( $i = 1; $i <= $toclevel; $i++ ) {
4346  if ( !empty( $sublevelCount[$i] ) ) {
4347  if ( $dot ) {
4348  $numbering .= '.';
4349  }
4350  $numbering .= $this->getTargetLanguage()->formatNum( $sublevelCount[$i] );
4351  $dot = 1;
4352  }
4353  }
4354 
4355  # The safe header is a version of the header text safe to use for links
4356 
4357  # Remove link placeholders by the link text.
4358  # <!--LINK number-->
4359  # turns into
4360  # link text with suffix
4361  # Do this before unstrip since link text can contain strip markers
4362  $safeHeadline = $this->replaceLinkHoldersText( $headline );
4363 
4364  # Avoid insertion of weird stuff like <math> by expanding the relevant sections
4365  $safeHeadline = $this->mStripState->unstripBoth( $safeHeadline );
4366 
4367  # Remove any <style> or <script> tags (T198618)
4368  $safeHeadline = preg_replace(
4369  '#<(style|script)(?: [^>]*[^>/])?>.*?</\1>#is',
4370  '',
4371  $safeHeadline
4372  );
4373 
4374  # Strip out HTML (first regex removes any tag not allowed)
4375  # Allowed tags are:
4376  # * <sup> and <sub> (T10393)
4377  # * <i> (T28375)
4378  # * <b> (r105284)
4379  # * <bdi> (T74884)
4380  # * <span dir="rtl"> and <span dir="ltr"> (T37167)
4381  # * <s> and <strike> (T35715)
4382  # We strip any parameter from accepted tags (second regex), except dir="rtl|ltr" from <span>,
4383  # to allow setting directionality in toc items.
4384  $tocline = preg_replace(
4385  [
4386  '#<(?!/?(span|sup|sub|bdi|i|b|s|strike)(?: [^>]*)?>).*?>#',
4387  '#<(/?(?:span(?: dir="(?:rtl|ltr)")?|sup|sub|bdi|i|b|s|strike))(?: .*?)?>#'
4388  ],
4389  [ '', '<$1>' ],
4390  $safeHeadline
4391  );
4392 
4393  # Strip '<span></span>', which is the result from the above if
4394  # <span id="foo"></span> is used to produce an additional anchor
4395  # for a section.
4396  $tocline = str_replace( '<span></span>', '', $tocline );
4397 
4398  $tocline = trim( $tocline );
4399 
4400  # For the anchor, strip out HTML-y stuff period
4401  $safeHeadline = preg_replace( '/<.*?>/', '', $safeHeadline );
4402  $safeHeadline = Sanitizer::normalizeSectionNameWhitespace( $safeHeadline );
4403 
4404  # Save headline for section edit hint before it's escaped
4405  $headlineHint = $safeHeadline;
4406 
4407  # Decode HTML entities
4408  $safeHeadline = Sanitizer::decodeCharReferences( $safeHeadline );
4409 
4410  $safeHeadline = self::normalizeSectionName( $safeHeadline );
4411 
4412  $fallbackHeadline = Sanitizer::escapeIdForAttribute( $safeHeadline, Sanitizer::ID_FALLBACK );
4413  $linkAnchor = Sanitizer::escapeIdForLink( $safeHeadline );
4414  $safeHeadline = Sanitizer::escapeIdForAttribute( $safeHeadline, Sanitizer::ID_PRIMARY );
4415  if ( $fallbackHeadline === $safeHeadline ) {
4416  # No reason to have both (in fact, we can't)
4417  $fallbackHeadline = false;
4418  }
4419 
4420  # HTML IDs must be case-insensitively unique for IE compatibility (T12721).
4421  # @todo FIXME: We may be changing them depending on the current locale.
4422  $arrayKey = strtolower( $safeHeadline );
4423  if ( $fallbackHeadline === false ) {
4424  $fallbackArrayKey = false;
4425  } else {
4426  $fallbackArrayKey = strtolower( $fallbackHeadline );
4427  }
4428 
4429  # Create the anchor for linking from the TOC to the section
4430  $anchor = $safeHeadline;
4431  $fallbackAnchor = $fallbackHeadline;
4432  if ( isset( $refers[$arrayKey] ) ) {
4433  // phpcs:ignore Generic.Formatting.DisallowMultipleStatements
4434  for ( $i = 2; isset( $refers["${arrayKey}_$i"] ); ++$i );
4435  $anchor .= "_$i";
4436  $linkAnchor .= "_$i";
4437  $refers["${arrayKey}_$i"] = true;
4438  } else {
4439  $refers[$arrayKey] = true;
4440  }
4441  if ( $fallbackHeadline !== false && isset( $refers[$fallbackArrayKey] ) ) {
4442  // phpcs:ignore Generic.Formatting.DisallowMultipleStatements
4443  for ( $i = 2; isset( $refers["${fallbackArrayKey}_$i"] ); ++$i );
4444  $fallbackAnchor .= "_$i";
4445  $refers["${fallbackArrayKey}_$i"] = true;
4446  } else {
4447  $refers[$fallbackArrayKey] = true;
4448  }
4449 
4450  # Don't number the heading if it is the only one (looks silly)
4451  if ( count( $matches[3] ) > 1 && $this->mOptions->getNumberHeadings() ) {
4452  # the two are different if the line contains a link
4453  $headline = Html::element(
4454  'span',
4455  [ 'class' => 'mw-headline-number' ],
4456  $numbering
4457  ) . ' ' . $headline;
4458  }
4459 
4460  if ( $enoughToc && ( !isset( $maxTocLevel ) || $toclevel < $maxTocLevel ) ) {
4461  $toc .= Linker::tocLine( $linkAnchor, $tocline,
4462  $numbering, $toclevel, ( $isTemplate ? false : $sectionIndex ) );
4463  }
4464 
4465  # Add the section to the section tree
4466  # Find the DOM node for this header
4467  $noOffset = ( $isTemplate || $sectionIndex === false );
4468  while ( $node && !$noOffset ) {
4469  if ( $node->getName() === 'h' ) {
4470  $bits = $node->splitHeading();
4471  if ( $bits['i'] == $sectionIndex ) {
4472  break;
4473  }
4474  }
4475  $byteOffset += mb_strlen( $this->mStripState->unstripBoth(
4476  $frame->expand( $node, PPFrame::RECOVER_ORIG ) ) );
4477  $node = $node->getNextSibling();
4478  }
4479  $tocraw[] = [
4480  'toclevel' => $toclevel,
4481  'level' => $level,
4482  'line' => $tocline,
4483  'number' => $numbering,
4484  'index' => ( $isTemplate ? 'T-' : '' ) . $sectionIndex,
4485  'fromtitle' => $titleText,
4486  'byteoffset' => ( $noOffset ? null : $byteOffset ),
4487  'anchor' => $anchor,
4488  ];
4489 
4490  # give headline the correct <h#> tag
4491  if ( $maybeShowEditLink && $sectionIndex !== false ) {
4492  // Output edit section links as markers with styles that can be customized by skins
4493  if ( $isTemplate ) {
4494  # Put a T flag in the section identifier, to indicate to extractSections()
4495  # that sections inside <includeonly> should be counted.
4496  $editsectionPage = $titleText;
4497  $editsectionSection = "T-$sectionIndex";
4498  $editsectionContent = null;
4499  } else {
4500  $editsectionPage = $this->getTitle()->getPrefixedText();
4501  $editsectionSection = $sectionIndex;
4502  $editsectionContent = $headlineHint;
4503  }
4504  // We use a bit of pesudo-xml for editsection markers. The
4505  // language converter is run later on. Using a UNIQ style marker
4506  // leads to the converter screwing up the tokens when it
4507  // converts stuff. And trying to insert strip tags fails too. At
4508  // this point all real inputted tags have already been escaped,
4509  // so we don't have to worry about a user trying to input one of
4510  // these markers directly. We use a page and section attribute
4511  // to stop the language converter from converting these
4512  // important bits of data, but put the headline hint inside a
4513  // content block because the language converter is supposed to
4514  // be able to convert that piece of data.
4515  // Gets replaced with html in ParserOutput::getText
4516  $editlink = '<mw:editsection page="' . htmlspecialchars( $editsectionPage );
4517  $editlink .= '" section="' . htmlspecialchars( $editsectionSection ) . '"';
4518  if ( $editsectionContent !== null ) {
4519  $editlink .= '>' . $editsectionContent . '</mw:editsection>';
4520  } else {
4521  $editlink .= '/>';
4522  }
4523  } else {
4524  $editlink = '';
4525  }
4526  $head[$headlineCount] = Linker::makeHeadline( $level,
4527  $matches['attrib'][$headlineCount], $anchor, $headline,
4528  $editlink, $fallbackAnchor );
4529 
4530  $headlineCount++;
4531  }
4532 
4533  $this->setOutputType( $oldType );
4534 
4535  # Never ever show TOC if no headers
4536  if ( $numVisible < 1 ) {
4537  $enoughToc = false;
4538  }
4539 
4540  if ( $enoughToc ) {
4541  if ( $prevtoclevel > 0 && $prevtoclevel < $maxTocLevel ) {
4542  $toc .= Linker::tocUnindent( $prevtoclevel - 1 );
4543  }
4544  $toc = Linker::tocList( $toc, $this->mOptions->getUserLangObj() );
4545  $this->mOutput->setTOCHTML( $toc );
4546  $toc = self::TOC_START . $toc . self::TOC_END;
4547  }
4548 
4549  if ( $isMain ) {
4550  $this->mOutput->setSections( $tocraw );
4551  }
4552 
4553  # split up and insert constructed headlines
4554  $blocks = preg_split( '/<H[1-6].*?>[\s\S]*?<\/H[1-6]>/i', $text );
4555  $i = 0;
4556 
4557  // build an array of document sections
4558  $sections = [];
4559  foreach ( $blocks as $block ) {
4560  // $head is zero-based, sections aren't.
4561  if ( empty( $head[$i - 1] ) ) {
4562  $sections[$i] = $block;
4563  } else {
4564  $sections[$i] = $head[$i - 1] . $block;
4565  }
4566 
4577  Hooks::run( 'ParserSectionCreate', [ $this, $i, &$sections[$i], $maybeShowEditLink ] );
4578 
4579  $i++;
4580  }
4581 
4582  if ( $enoughToc && $isMain && !$this->mForceTocPosition ) {
4583  // append the TOC at the beginning
4584  // Top anchor now in skin
4585  $sections[0] .= $toc . "\n";
4586  }
4587 
4588  $full .= implode( '', $sections );
4589 
4590  if ( $this->mForceTocPosition ) {
4591  return str_replace( '<!--MWTOC\'"-->', $toc, $full );
4592  } else {
4593  return $full;
4594  }
4595  }
4596 
4608  public function preSaveTransform( $text, Title $title, User $user,
4609  ParserOptions $options, $clearState = true
4610  ) {
4611  if ( $clearState ) {
4612  $magicScopeVariable = $this->lock();
4613  }
4614  $this->startParse( $title, $options, self::OT_WIKI, $clearState );
4615  $this->setUser( $user );
4616 
4617  // Strip U+0000 NULL (T159174)
4618  $text = str_replace( "\000", '', $text );
4619 
4620  // We still normalize line endings for backwards-compatibility
4621  // with other code that just calls PST, but this should already
4622  // be handled in TextContent subclasses
4623  $text = TextContent::normalizeLineEndings( $text );
4624 
4625  if ( $options->getPreSaveTransform() ) {
4626  $text = $this->pstPass2( $text, $user );
4627  }
4628  $text = $this->mStripState->unstripBoth( $text );
4629 
4630  $this->setUser( null ); # Reset
4631 
4632  return $text;
4633  }
4634 
4643  private function pstPass2( $text, $user ) {
4644  # Note: This is the timestamp saved as hardcoded wikitext to the database, we use
4645  # $this->contLang here in order to give everyone the same signature and use the default one
4646  # rather than the one selected in each user's preferences. (see also T14815)
4647  $ts = $this->mOptions->getTimestamp();
4648  $timestamp = MWTimestamp::getLocalInstance( $ts );
4649  $ts = $timestamp->format( 'YmdHis' );
4650  $tzMsg = $timestamp->getTimezoneMessage()->inContentLanguage()->text();
4651 
4652  $d = $this->contLang->timeanddate( $ts, false, false ) . " ($tzMsg)";
4653 
4654  # Variable replacement
4655  # Because mOutputType is OT_WIKI, this will only process {{subst:xxx}} type tags
4656  $text = $this->replaceVariables( $text );
4657 
4658  # This works almost by chance, as the replaceVariables are done before the getUserSig(),
4659  # which may corrupt this parser instance via its wfMessage()->text() call-
4660 
4661  # Signatures
4662  if ( strpos( $text, '~~~' ) !== false ) {
4663  $sigText = $this->getUserSig( $user );
4664  $text = strtr( $text, [
4665  '~~~~~' => $d,
4666  '~~~~' => "$sigText $d",
4667  '~~~' => $sigText
4668  ] );
4669  # The main two signature forms used above are time-sensitive
4670  $this->setOutputFlag( 'user-signature', 'User signature detected' );
4671  }
4672 
4673  # Context links ("pipe tricks"): [[|name]] and [[name (context)|]]
4674  $tc = '[' . Title::legalChars() . ']';
4675  $nc = '[ _0-9A-Za-z\x80-\xff-]'; # Namespaces can use non-ascii!
4676 
4677  // [[ns:page (context)|]]
4678  $p1 = "/\[\[(:?$nc+:|:|)($tc+?)( ?\\($tc+\\))\\|]]/";
4679  // [[ns:page(context)|]] (double-width brackets, added in r40257)
4680  $p4 = "/\[\[(:?$nc+:|:|)($tc+?)( ?($tc+))\\|]]/";
4681  // [[ns:page (context), context|]] (using either single or double-width comma)
4682  $p3 = "/\[\[(:?$nc+:|:|)($tc+?)( ?\\($tc+\\)|)((?:, |,)$tc+|)\\|]]/";
4683  // [[|page]] (reverse pipe trick: add context from page title)
4684  $p2 = "/\[\[\\|($tc+)]]/";
4685 
4686  # try $p1 first, to turn "[[A, B (C)|]]" into "[[A, B (C)|A, B]]"
4687  $text = preg_replace( $p1, '[[\\1\\2\\3|\\2]]', $text );
4688  $text = preg_replace( $p4, '[[\\1\\2\\3|\\2]]', $text );
4689  $text = preg_replace( $p3, '[[\\1\\2\\3\\4|\\2]]', $text );
4690 
4691  $t = $this->getTitle()->getText();
4692  $m = [];
4693  if ( preg_match( "/^($nc+:|)$tc+?( \\($tc+\\))$/", $t, $m ) ) {
4694  $text = preg_replace( $p2, "[[$m[1]\\1$m[2]|\\1]]", $text );
4695  } elseif ( preg_match( "/^($nc+:|)$tc+?(, $tc+|)$/", $t, $m ) && "$m[1]$m[2]" != '' ) {
4696  $text = preg_replace( $p2, "[[$m[1]\\1$m[2]|\\1]]", $text );
4697  } else {
4698  # if there's no context, don't bother duplicating the title
4699  $text = preg_replace( $p2, '[[\\1]]', $text );
4700  }
4701 
4702  return $text;
4703  }
4704 
4719  public function getUserSig( &$user, $nickname = false, $fancySig = null ) {
4720  $username = $user->getName();
4721 
4722  # If not given, retrieve from the user object.
4723  if ( $nickname === false ) {
4724  $nickname = $user->getOption( 'nickname' );
4725  }
4726 
4727  if ( $fancySig === null ) {
4728  $fancySig = $user->getBoolOption( 'fancysig' );
4729  }
4730 
4731  if ( $nickname === null || $nickname === '' ) {
4732  $nickname = $username;
4733  } elseif ( mb_strlen( $nickname ) > $this->svcOptions->get( 'MaxSigChars' ) ) {
4734  $nickname = $username;
4735  $this->logger->debug( __METHOD__ . ": $username has overlong signature." );
4736  } elseif ( $fancySig !== false ) {
4737  # Sig. might contain markup; validate this
4738  if ( $this->validateSig( $nickname ) !== false ) {
4739  # Validated; clean up (if needed) and return it
4740  return $this->cleanSig( $nickname, true );
4741  } else {
4742  # Failed to validate; fall back to the default
4743  $nickname = $username;
4744  $this->logger->debug( __METHOD__ . ": $username has bad XML tags in signature." );
4745  }
4746  }
4747 
4748  # Make sure nickname doesnt get a sig in a sig
4749  $nickname = self::cleanSigInSig( $nickname );
4750 
4751  # If we're still here, make it a link to the user page
4752  $userText = wfEscapeWikiText( $username );
4753  $nickText = wfEscapeWikiText( $nickname );
4754  $msgName = $user->isAnon() ? 'signature-anon' : 'signature';
4755 
4756  return wfMessage( $msgName, $userText, $nickText )->inContentLanguage()
4757  ->title( $this->getTitle() )->text();
4758  }
4759 
4766  public function validateSig( $text ) {
4767  return Xml::isWellFormedXmlFragment( $text ) ? $text : false;
4768  }
4769 
4780  public function cleanSig( $text, $parsing = false ) {
4781  if ( !$parsing ) {
4782  global $wgTitle;
4783  $magicScopeVariable = $this->lock();
4784  $this->startParse( $wgTitle, new ParserOptions, self::OT_PREPROCESS, true );
4785  }
4786 
4787  # Option to disable this feature
4788  if ( !$this->mOptions->getCleanSignatures() ) {
4789  return $text;
4790  }
4791 
4792  # @todo FIXME: Regex doesn't respect extension tags or nowiki
4793  # => Move this logic to braceSubstitution()
4794  $substWord = $this->magicWordFactory->get( 'subst' );
4795  $substRegex = '/\{\{(?!(?:' . $substWord->getBaseRegex() . '))/x' . $substWord->getRegexCase();
4796  $substText = '{{' . $substWord->getSynonym( 0 );
4797 
4798  $text = preg_replace( $substRegex, $substText, $text );
4799  $text = self::cleanSigInSig( $text );
4800  $dom = $this->preprocessToDom( $text );
4801  $frame = $this->getPreprocessor()->newFrame();
4802  $text = $frame->expand( $dom );
4803 
4804  if ( !$parsing ) {
4805  $text = $this->mStripState->unstripBoth( $text );
4806  }
4807 
4808  return $text;
4809  }
4810 
4817  public static function cleanSigInSig( $text ) {
4818  $text = preg_replace( '/~{3,5}/', '', $text );
4819  return $text;
4820  }
4821 
4832  public function startExternalParse( ?Title $title, ParserOptions $options,
4833  $outputType, $clearState = true, $revId = null
4834  ) {
4835  $this->startParse( $title, $options, $outputType, $clearState );
4836  if ( $revId !== null ) {
4837  $this->mRevisionId = $revId;
4838  }
4839  }
4840 
4847  private function startParse( ?Title $title, ParserOptions $options,
4848  $outputType, $clearState = true
4849  ) {
4850  $this->setTitle( $title );
4851  $this->mOptions = $options;
4852  $this->setOutputType( $outputType );
4853  if ( $clearState ) {
4854  $this->clearState();
4855  }
4856  }
4857 
4866  public function transformMsg( $text, $options, $title = null ) {
4867  static $executing = false;
4868 
4869  # Guard against infinite recursion
4870  if ( $executing ) {
4871  return $text;
4872  }
4873  $executing = true;
4874 
4875  if ( !$title ) {
4876  global $wgTitle;
4877  $title = $wgTitle;
4878  }
4879 
4880  $text = $this->preprocess( $text, $title, $options );
4881 
4882  $executing = false;
4883  return $text;
4884  }
4885 
4910  public function setHook( $tag, callable $callback ) {
4911  $tag = strtolower( $tag );
4912  if ( preg_match( '/[<>\r\n]/', $tag, $m ) ) {
4913  throw new MWException( "Invalid character {$m[0]} in setHook('$tag', ...) call" );
4914  }
4915  $oldVal = $this->mTagHooks[$tag] ?? null;
4916  $this->mTagHooks[$tag] = $callback;
4917  if ( !in_array( $tag, $this->mStripList ) ) {
4918  $this->mStripList[] = $tag;
4919  }
4920 
4921  return $oldVal;
4922  }
4923 
4941  public function setTransparentTagHook( $tag, callable $callback ) {
4942  $tag = strtolower( $tag );
4943  if ( preg_match( '/[<>\r\n]/', $tag, $m ) ) {
4944  throw new MWException( "Invalid character {$m[0]} in setTransparentHook('$tag', ...) call" );
4945  }
4946  $oldVal = $this->mTransparentTagHooks[$tag] ?? null;
4947  $this->mTransparentTagHooks[$tag] = $callback;
4948 
4949  return $oldVal;
4950  }
4951 
4955  public function clearTagHooks() {
4956  $this->mTagHooks = [];
4957  $this->mFunctionTagHooks = [];
4958  $this->mStripList = $this->mDefaultStripList;
4959  }
4960 
5004  public function setFunctionHook( $id, callable $callback, $flags = 0 ) {
5005  $oldVal = isset( $this->mFunctionHooks[$id] ) ? $this->mFunctionHooks[$id][0] : null;
5006  $this->mFunctionHooks[$id] = [ $callback, $flags ];
5007 
5008  # Add to function cache
5009  $mw = $this->magicWordFactory->get( $id );
5010  if ( !$mw ) {
5011  throw new MWException( __METHOD__ . '() expecting a magic word identifier.' );
5012  }
5013 
5014  $synonyms = $mw->getSynonyms();
5015  $sensitive = intval( $mw->isCaseSensitive() );
5016 
5017  foreach ( $synonyms as $syn ) {
5018  # Case
5019  if ( !$sensitive ) {
5020  $syn = $this->contLang->lc( $syn );
5021  }
5022  # Add leading hash
5023  if ( !( $flags & self::SFH_NO_HASH ) ) {
5024  $syn = '#' . $syn;
5025  }
5026  # Remove trailing colon
5027  if ( substr( $syn, -1, 1 ) === ':' ) {
5028  $syn = substr( $syn, 0, -1 );
5029  }
5030  $this->mFunctionSynonyms[$sensitive][$syn] = $id;
5031  }
5032  return $oldVal;
5033  }
5034 
5040  public function getFunctionHooks() {
5041  $this->firstCallInit();
5042  return array_keys( $this->mFunctionHooks );
5043  }
5044 
5055  public function setFunctionTagHook( $tag, callable $callback, $flags ) {
5056  $tag = strtolower( $tag );
5057  if ( preg_match( '/[<>\r\n]/', $tag, $m ) ) {
5058  throw new MWException( "Invalid character {$m[0]} in setFunctionTagHook('$tag', ...) call" );
5059  }
5060  $old = $this->mFunctionTagHooks[$tag] ?? null;
5061  $this->mFunctionTagHooks[$tag] = [ $callback, $flags ];
5062 
5063  if ( !in_array( $tag, $this->mStripList ) ) {
5064  $this->mStripList[] = $tag;
5065  }
5066 
5067  return $old;
5068  }
5069 
5078  public function replaceLinkHolders( &$text, $options = 0 ) {
5079  $this->replaceLinkHoldersPrivate( $text, $options );
5080  }
5081 
5089  private function replaceLinkHoldersPrivate( &$text, $options = 0 ) {
5090  $this->mLinkHolders->replace( $text );
5091  }
5092 
5100  private function replaceLinkHoldersText( $text ) {
5101  return $this->mLinkHolders->replaceText( $text );
5102  }
5103 
5118  public function renderImageGallery( $text, $params ) {
5119  $mode = false;
5120  if ( isset( $params['mode'] ) ) {
5121  $mode = $params['mode'];
5122  }
5123 
5124  try {
5125  $ig = ImageGalleryBase::factory( $mode );
5126  } catch ( Exception $e ) {
5127  // If invalid type set, fallback to default.
5128  $ig = ImageGalleryBase::factory( false );
5129  }
5130 
5131  $ig->setContextTitle( $this->getTitle() );
5132  $ig->setShowBytes( false );
5133  $ig->setShowDimensions( false );
5134  $ig->setShowFilename( false );
5135  $ig->setParser( $this );
5136  $ig->setHideBadImages();
5137  $ig->setAttributes( Sanitizer::validateTagAttributes( $params, 'ul' ) );
5138 
5139  if ( isset( $params['showfilename'] ) ) {
5140  $ig->setShowFilename( true );
5141  } else {
5142  $ig->setShowFilename( false );
5143  }
5144  if ( isset( $params['caption'] ) ) {
5145  // NOTE: We aren't passing a frame here or below. Frame info
5146  // is currently opaque to Parsoid, which acts on OT_PREPROCESS.
5147  // See T107332#4030581
5148  $caption = $this->recursiveTagParse( $params['caption'] );
5149  $ig->setCaptionHtml( $caption );
5150  }
5151  if ( isset( $params['perrow'] ) ) {
5152  $ig->setPerRow( $params['perrow'] );
5153  }
5154  if ( isset( $params['widths'] ) ) {
5155  $ig->setWidths( $params['widths'] );
5156  }
5157  if ( isset( $params['heights'] ) ) {
5158  $ig->setHeights( $params['heights'] );
5159  }
5160  $ig->setAdditionalOptions( $params );
5161 
5162  // Avoid PHP 7.1 warning from passing $this by reference
5163  $parser = $this;
5164  Hooks::run( 'BeforeParserrenderImageGallery', [ &$parser, &$ig ] );
5165 
5166  $lines = StringUtils::explode( "\n", $text );
5167  foreach ( $lines as $line ) {
5168  # match lines like these:
5169  # Image:someimage.jpg|This is some image
5170  $matches = [];
5171  preg_match( "/^([^|]+)(\\|(.*))?$/", $line, $matches );
5172  # Skip empty lines
5173  if ( count( $matches ) == 0 ) {
5174  continue;
5175  }
5176 
5177  if ( strpos( $matches[0], '%' ) !== false ) {
5178  $matches[1] = rawurldecode( $matches[1] );
5179  }
5181  if ( $title === null ) {
5182  # Bogus title. Ignore these so we don't bomb out later.
5183  continue;
5184  }
5185 
5186  # We need to get what handler the file uses, to figure out parameters.
5187  # Note, a hook can overide the file name, and chose an entirely different
5188  # file (which potentially could be of a different type and have different handler).
5189  $options = [];
5190  $descQuery = false;
5191  Hooks::run( 'BeforeParserFetchFileAndTitle',
5192  [ $this, $title, &$options, &$descQuery ] );
5193  # Don't register it now, as TraditionalImageGallery does that later.
5194  $file = $this->fetchFileNoRegister( $title, $options );
5195  $handler = $file ? $file->getHandler() : false;
5196 
5197  $paramMap = [
5198  'img_alt' => 'gallery-internal-alt',
5199  'img_link' => 'gallery-internal-link',
5200  ];
5201  if ( $handler ) {
5202  $paramMap += $handler->getParamMap();
5203  // We don't want people to specify per-image widths.
5204  // Additionally the width parameter would need special casing anyhow.
5205  unset( $paramMap['img_width'] );
5206  }
5207 
5208  $mwArray = $this->magicWordFactory->newArray( array_keys( $paramMap ) );
5209 
5210  $label = '';
5211  $alt = '';
5212  $link = '';
5213  $handlerOptions = [];
5214  if ( isset( $matches[3] ) ) {
5215  // look for an |alt= definition while trying not to break existing
5216  // captions with multiple pipes (|) in it, until a more sensible grammar
5217  // is defined for images in galleries
5218 
5219  // FIXME: Doing recursiveTagParse at this stage, and the trim before
5220  // splitting on '|' is a bit odd, and different from makeImage.
5221  $matches[3] = $this->recursiveTagParse( trim( $matches[3] ) );
5222  // Protect LanguageConverter markup
5223  $parameterMatches = StringUtils::delimiterExplode(
5224  '-{', '}-', '|', $matches[3], true /* nested */
5225  );
5226 
5227  foreach ( $parameterMatches as $parameterMatch ) {
5228  list( $magicName, $match ) = $mwArray->matchVariableStartToEnd( $parameterMatch );
5229  if ( $magicName ) {
5230  $paramName = $paramMap[$magicName];
5231 
5232  switch ( $paramName ) {
5233  case 'gallery-internal-alt':
5234  $alt = $this->stripAltText( $match, false );
5235  break;
5236  case 'gallery-internal-link':
5237  $linkValue = $this->stripAltText( $match, false );
5238  if ( preg_match( '/^-{R|(.*)}-$/', $linkValue ) ) {
5239  // Result of LanguageConverter::markNoConversion
5240  // invoked on an external link.
5241  $linkValue = substr( $linkValue, 4, -2 );
5242  }
5243  list( $type, $target ) = $this->parseLinkParameter( $linkValue );
5244  if ( $type === 'link-url' ) {
5245  $link = $target;
5246  $this->mOutput->addExternalLink( $target );
5247  } elseif ( $type === 'link-title' ) {
5248  $link = $target->getLinkURL();
5249  $this->mOutput->addLink( $target );
5250  }
5251  break;
5252  default:
5253  // Must be a handler specific parameter.
5254  if ( $handler->validateParam( $paramName, $match ) ) {
5255  $handlerOptions[$paramName] = $match;
5256  } else {
5257  // Guess not, consider it as caption.
5258  $this->logger->debug(
5259  "$parameterMatch failed parameter validation" );
5260  $label = $parameterMatch;
5261  }
5262  }
5263 
5264  } else {
5265  // Last pipe wins.
5266  $label = $parameterMatch;
5267  }
5268  }
5269  }
5270 
5271  $ig->add( $title, $label, $alt, $link, $handlerOptions );
5272  }
5273  $html = $ig->toHTML();
5274  Hooks::run( 'AfterParserFetchFileAndTitle', [ $this, $ig, &$html ] );
5275  return $html;
5276  }
5277 
5282  private function getImageParams( $handler ) {
5283  if ( $handler ) {
5284  $handlerClass = get_class( $handler );
5285  } else {
5286  $handlerClass = '';
5287  }
5288  if ( !isset( $this->mImageParams[$handlerClass] ) ) {
5289  # Initialise static lists
5290  static $internalParamNames = [
5291  'horizAlign' => [ 'left', 'right', 'center', 'none' ],
5292  'vertAlign' => [ 'baseline', 'sub', 'super', 'top', 'text-top', 'middle',
5293  'bottom', 'text-bottom' ],
5294  'frame' => [ 'thumbnail', 'manualthumb', 'framed', 'frameless',
5295  'upright', 'border', 'link', 'alt', 'class' ],
5296  ];
5297  static $internalParamMap;
5298  if ( !$internalParamMap ) {
5299  $internalParamMap = [];
5300  foreach ( $internalParamNames as $type => $names ) {
5301  foreach ( $names as $name ) {
5302  // For grep: img_left, img_right, img_center, img_none,
5303  // img_baseline, img_sub, img_super, img_top, img_text_top, img_middle,
5304  // img_bottom, img_text_bottom,
5305  // img_thumbnail, img_manualthumb, img_framed, img_frameless, img_upright,
5306  // img_border, img_link, img_alt, img_class
5307  $magicName = str_replace( '-', '_', "img_$name" );
5308  $internalParamMap[$magicName] = [ $type, $name ];
5309  }
5310  }
5311  }
5312 
5313  # Add handler params
5314  $paramMap = $internalParamMap;
5315  if ( $handler ) {
5316  $handlerParamMap = $handler->getParamMap();
5317  foreach ( $handlerParamMap as $magic => $paramName ) {
5318  $paramMap[$magic] = [ 'handler', $paramName ];
5319  }
5320  }
5321  $this->mImageParams[$handlerClass] = $paramMap;
5322  $this->mImageParamsMagicArray[$handlerClass] =
5323  $this->magicWordFactory->newArray( array_keys( $paramMap ) );
5324  }
5325  return [ $this->mImageParams[$handlerClass], $this->mImageParamsMagicArray[$handlerClass] ];
5326  }
5327 
5336  public function makeImage( $title, $options, $holders = false ) {
5337  # Check if the options text is of the form "options|alt text"
5338  # Options are:
5339  # * thumbnail make a thumbnail with enlarge-icon and caption, alignment depends on lang
5340  # * left no resizing, just left align. label is used for alt= only
5341  # * right same, but right aligned
5342  # * none same, but not aligned
5343  # * ___px scale to ___ pixels width, no aligning. e.g. use in taxobox
5344  # * center center the image
5345  # * frame Keep original image size, no magnify-button.
5346  # * framed Same as "frame"
5347  # * frameless like 'thumb' but without a frame. Keeps user preferences for width
5348  # * upright reduce width for upright images, rounded to full __0 px
5349  # * border draw a 1px border around the image
5350  # * alt Text for HTML alt attribute (defaults to empty)
5351  # * class Set a class for img node
5352  # * link Set the target of the image link. Can be external, interwiki, or local
5353  # vertical-align values (no % or length right now):
5354  # * baseline
5355  # * sub
5356  # * super
5357  # * top
5358  # * text-top
5359  # * middle
5360  # * bottom
5361  # * text-bottom
5362 
5363  # Protect LanguageConverter markup when splitting into parts
5365  '-{', '}-', '|', $options, true /* allow nesting */
5366  );
5367 
5368  # Give extensions a chance to select the file revision for us
5369  $options = [];
5370  $descQuery = false;
5371  Hooks::run( 'BeforeParserFetchFileAndTitle',
5372  [ $this, $title, &$options, &$descQuery ] );
5373  # Fetch and register the file (file title may be different via hooks)
5374  list( $file, $title ) = $this->fetchFileAndTitle( $title, $options );
5375 
5376  # Get parameter map
5377  $handler = $file ? $file->getHandler() : false;
5378 
5379  list( $paramMap, $mwArray ) = $this->getImageParams( $handler );
5380 
5381  if ( !$file ) {
5382  $this->addTrackingCategory( 'broken-file-category' );
5383  }
5384 
5385  # Process the input parameters
5386  $caption = '';
5387  $params = [ 'frame' => [], 'handler' => [],
5388  'horizAlign' => [], 'vertAlign' => [] ];
5389  $seenformat = false;
5390  foreach ( $parts as $part ) {
5391  $part = trim( $part );
5392  list( $magicName, $value ) = $mwArray->matchVariableStartToEnd( $part );
5393  $validated = false;
5394  if ( isset( $paramMap[$magicName] ) ) {
5395  list( $type, $paramName ) = $paramMap[$magicName];
5396 
5397  # Special case; width and height come in one variable together
5398  if ( $type === 'handler' && $paramName === 'width' ) {
5399  $parsedWidthParam = self::parseWidthParam( $value );
5400  if ( isset( $parsedWidthParam['width'] ) ) {
5401  $width = $parsedWidthParam['width'];
5402  if ( $handler->validateParam( 'width', $width ) ) {
5403  $params[$type]['width'] = $width;
5404  $validated = true;
5405  }
5406  }
5407  if ( isset( $parsedWidthParam['height'] ) ) {
5408  $height = $parsedWidthParam['height'];
5409  if ( $handler->validateParam( 'height', $height ) ) {
5410  $params[$type]['height'] = $height;
5411  $validated = true;
5412  }
5413  }
5414  # else no validation -- T15436
5415  } else {
5416  if ( $type === 'handler' ) {
5417  # Validate handler parameter
5418  $validated = $handler->validateParam( $paramName, $value );
5419  } else {
5420  # Validate internal parameters
5421  switch ( $paramName ) {
5422  case 'manualthumb':
5423  case 'alt':
5424  case 'class':
5425  # @todo FIXME: Possibly check validity here for
5426  # manualthumb? downstream behavior seems odd with
5427  # missing manual thumbs.
5428  $validated = true;
5429  $value = $this->stripAltText( $value, $holders );
5430  break;
5431  case 'link':
5432  list( $paramName, $value ) =
5433  $this->parseLinkParameter(
5434  $this->stripAltText( $value, $holders )
5435  );
5436  if ( $paramName ) {
5437  $validated = true;
5438  if ( $paramName === 'no-link' ) {
5439  $value = true;
5440  }
5441  if ( ( $paramName === 'link-url' ) && $this->mOptions->getExternalLinkTarget() ) {
5442  $params[$type]['link-target'] = $this->mOptions->getExternalLinkTarget();
5443  }
5444  }
5445  break;
5446  case 'frameless':
5447  case 'framed':
5448  case 'thumbnail':
5449  // use first appearing option, discard others.
5450  $validated = !$seenformat;
5451  $seenformat = true;
5452  break;
5453  default:
5454  # Most other things appear to be empty or numeric...
5455  $validated = ( $value === false || is_numeric( trim( $value ) ) );
5456  }
5457  }
5458 
5459  if ( $validated ) {
5460  $params[$type][$paramName] = $value;
5461  }
5462  }
5463  }
5464  if ( !$validated ) {
5465  $caption = $part;
5466  }
5467  }
5468 
5469  # Process alignment parameters
5470  if ( $params['horizAlign'] ) {
5471  $params['frame']['align'] = key( $params['horizAlign'] );
5472  }
5473  if ( $params['vertAlign'] ) {
5474  $params['frame']['valign'] = key( $params['vertAlign'] );
5475  }
5476 
5477  $params['frame']['caption'] = $caption;
5478 
5479  # Will the image be presented in a frame, with the caption below?
5480  $imageIsFramed = isset( $params['frame']['frame'] )
5481  || isset( $params['frame']['framed'] )
5482  || isset( $params['frame']['thumbnail'] )
5483  || isset( $params['frame']['manualthumb'] );
5484 
5485  # In the old days, [[Image:Foo|text...]] would set alt text. Later it
5486  # came to also set the caption, ordinary text after the image -- which
5487  # makes no sense, because that just repeats the text multiple times in
5488  # screen readers. It *also* came to set the title attribute.
5489  # Now that we have an alt attribute, we should not set the alt text to
5490  # equal the caption: that's worse than useless, it just repeats the
5491  # text. This is the framed/thumbnail case. If there's no caption, we
5492  # use the unnamed parameter for alt text as well, just for the time be-
5493  # ing, if the unnamed param is set and the alt param is not.
5494  # For the future, we need to figure out if we want to tweak this more,
5495  # e.g., introducing a title= parameter for the title; ignoring the un-
5496  # named parameter entirely for images without a caption; adding an ex-
5497  # plicit caption= parameter and preserving the old magic unnamed para-
5498  # meter for BC; ...
5499  if ( $imageIsFramed ) { # Framed image
5500  if ( $caption === '' && !isset( $params['frame']['alt'] ) ) {
5501  # No caption or alt text, add the filename as the alt text so
5502  # that screen readers at least get some description of the image
5503  $params['frame']['alt'] = $title->getText();
5504  }
5505  # Do not set $params['frame']['title'] because tooltips don't make sense
5506  # for framed images
5507  } else { # Inline image
5508  if ( !isset( $params['frame']['alt'] ) ) {
5509  # No alt text, use the "caption" for the alt text
5510  if ( $caption !== '' ) {
5511  $params['frame']['alt'] = $this->stripAltText( $caption, $holders );
5512  } else {
5513  # No caption, fall back to using the filename for the
5514  # alt text
5515  $params['frame']['alt'] = $title->getText();
5516  }
5517  }
5518  # Use the "caption" for the tooltip text
5519  $params['frame']['title'] = $this->stripAltText( $caption, $holders );
5520  }
5521  $params['handler']['targetlang'] = $this->getTargetLanguage()->getCode();
5522 
5523  Hooks::run( 'ParserMakeImageParams', [ $title, $file, &$params, $this ] );
5524 
5525  # Linker does the rest
5526  $time = $options['time'] ?? false;
5527  $ret = Linker::makeImageLink( $this, $title, $file, $params['frame'], $params['handler'],
5528  $time, $descQuery, $this->mOptions->getThumbSize() );
5529 
5530  # Give the handler a chance to modify the parser object
5531  if ( $handler ) {
5532  $handler->parserTransformHook( $this, $file );
5533  }
5534 
5535  return $ret;
5536  }
5537 
5556  private function parseLinkParameter( $value ) {
5557  $chars = self::EXT_LINK_URL_CLASS;
5558  $addr = self::EXT_LINK_ADDR;
5559  $prots = $this->mUrlProtocols;
5560  $type = null;
5561  $target = false;
5562  if ( $value === '' ) {
5563  $type = 'no-link';
5564  } elseif ( preg_match( "/^((?i)$prots)/", $value ) ) {
5565  if ( preg_match( "/^((?i)$prots)$addr$chars*$/u", $value, $m ) ) {
5566  $this->mOutput->addExternalLink( $value );
5567  $type = 'link-url';
5568  $target = $value;
5569  }
5570  } else {
5571  $linkTitle = Title::newFromText( $value );
5572  if ( $linkTitle ) {
5573  $this->mOutput->addLink( $linkTitle );
5574  $type = 'link-title';
5575  $target = $linkTitle;
5576  }
5577  }
5578  return [ $type, $target ];
5579  }
5580 
5586  private function stripAltText( $caption, $holders ) {
5587  # Strip bad stuff out of the title (tooltip). We can't just use
5588  # replaceLinkHoldersText() here, because if this function is called
5589  # from handleInternalLinks2(), mLinkHolders won't be up-to-date.
5590  if ( $holders ) {
5591  $tooltip = $holders->replaceText( $caption );
5592  } else {
5593  $tooltip = $this->replaceLinkHoldersText( $caption );
5594  }
5595 
5596  # make sure there are no placeholders in thumbnail attributes
5597  # that are later expanded to html- so expand them now and
5598  # remove the tags
5599  $tooltip = $this->mStripState->unstripBoth( $tooltip );
5600  # Compatibility hack! In HTML certain entity references not terminated
5601  # by a semicolon are decoded (but not if we're in an attribute; that's
5602  # how link URLs get away without properly escaping & in queries).
5603  # But wikitext has always required semicolon-termination of entities,
5604  # so encode & where needed to avoid decode of semicolon-less entities.
5605  # See T209236 and
5606  # https://www.w3.org/TR/html5/syntax.html#named-character-references
5607  # T210437 discusses moving this workaround to Sanitizer::stripAllTags.
5608  $tooltip = preg_replace( "/
5609  & # 1. entity prefix
5610  (?= # 2. followed by:
5611  (?: # a. one of the legacy semicolon-less named entities
5612  A(?:Elig|MP|acute|circ|grave|ring|tilde|uml)|
5613  C(?:OPY|cedil)|E(?:TH|acute|circ|grave|uml)|
5614  GT|I(?:acute|circ|grave|uml)|LT|Ntilde|
5615  O(?:acute|circ|grave|slash|tilde|uml)|QUOT|REG|THORN|
5616  U(?:acute|circ|grave|uml)|Yacute|
5617  a(?:acute|c(?:irc|ute)|elig|grave|mp|ring|tilde|uml)|brvbar|
5618  c(?:cedil|edil|urren)|cent(?!erdot;)|copy(?!sr;)|deg|
5619  divide(?!ontimes;)|e(?:acute|circ|grave|th|uml)|
5620  frac(?:1(?:2|4)|34)|
5621  gt(?!c(?:c|ir)|dot|lPar|quest|r(?:a(?:pprox|rr)|dot|eq(?:less|qless)|less|sim);)|
5622  i(?:acute|circ|excl|grave|quest|uml)|laquo|
5623  lt(?!c(?:c|ir)|dot|hree|imes|larr|quest|r(?:Par|i(?:e|f|));)|
5624  m(?:acr|i(?:cro|ddot))|n(?:bsp|tilde)|
5625  not(?!in(?:E|dot|v(?:a|b|c)|)|ni(?:v(?:a|b|c)|);)|
5626  o(?:acute|circ|grave|rd(?:f|m)|slash|tilde|uml)|
5627  p(?:lusmn|ound)|para(?!llel;)|quot|r(?:aquo|eg)|
5628  s(?:ect|hy|up(?:1|2|3)|zlig)|thorn|times(?!b(?:ar|)|d;)|
5629  u(?:acute|circ|grave|ml|uml)|y(?:acute|en|uml)
5630  )
5631  (?:[^;]|$)) # b. and not followed by a semicolon
5632  # S = study, for efficiency
5633  /Sx", '&amp;', $tooltip );
5634  $tooltip = Sanitizer::stripAllTags( $tooltip );
5635 
5636  return $tooltip;
5637  }
5638 
5647  public function attributeStripCallback( &$text, $frame = false ) {
5648  $text = $this->replaceVariables( $text, $frame );
5649  $text = $this->mStripState->unstripBoth( $text );
5650  return $text;
5651  }
5652 
5658  public function getTags() {
5659  $this->firstCallInit();
5660  return array_merge(
5661  array_keys( $this->mTransparentTagHooks ),
5662  array_keys( $this->mTagHooks ),
5663  array_keys( $this->mFunctionTagHooks )
5664  );
5665  }
5666 
5671  public function getFunctionSynonyms() {
5672  $this->firstCallInit();
5673  return $this->mFunctionSynonyms;
5674  }
5675 
5680  public function getUrlProtocols() {
5681  return $this->mUrlProtocols;
5682  }
5683 
5694  public function replaceTransparentTags( $text ) {
5695  $matches = [];
5696  $elements = array_keys( $this->mTransparentTagHooks );
5697  $text = self::extractTagsAndParams( $elements, $text, $matches );
5698  $replacements = [];
5699 
5700  foreach ( $matches as $marker => $data ) {
5701  list( $element, $content, $params, $tag ) = $data;
5702  $tagName = strtolower( $element );
5703  if ( isset( $this->mTransparentTagHooks[$tagName] ) ) {
5704  $output = call_user_func_array(
5705  $this->mTransparentTagHooks[$tagName],
5706  [ $content, $params, $this ]
5707  );
5708  } else {
5709  $output = $tag;
5710  }
5711  $replacements[$marker] = $output;
5712  }
5713  return strtr( $text, $replacements );
5714  }
5715 
5745  private function extractSections( $text, $sectionId, $mode, $newText = '' ) {
5746  global $wgTitle; # not generally used but removes an ugly failure mode
5747 
5748  $magicScopeVariable = $this->lock();
5749  $this->startParse( $wgTitle, new ParserOptions, self::OT_PLAIN, true );
5750  $outText = '';
5751  $frame = $this->getPreprocessor()->newFrame();
5752 
5753  # Process section extraction flags
5754  $flags = 0;
5755  $sectionParts = explode( '-', $sectionId );
5756  $sectionIndex = array_pop( $sectionParts );
5757  foreach ( $sectionParts as $part ) {
5758  if ( $part === 'T' ) {
5759  $flags |= self::PTD_FOR_INCLUSION;
5760  }
5761  }
5762 
5763  # Check for empty input
5764  if ( strval( $text ) === '' ) {
5765  # Only sections 0 and T-0 exist in an empty document
5766  if ( $sectionIndex == 0 ) {
5767  if ( $mode === 'get' ) {
5768  return '';
5769  }
5770 
5771  return $newText;
5772  } else {
5773  if ( $mode === 'get' ) {
5774  return $newText;
5775  }
5776 
5777  return $text;
5778  }
5779  }
5780 
5781  # Preprocess the text
5782  $root = $this->preprocessToDom( $text, $flags );
5783 
5784  # <h> nodes indicate section breaks
5785  # They can only occur at the top level, so we can find them by iterating the root's children
5786  $node = $root->getFirstChild();
5787 
5788  # Find the target section
5789  if ( $sectionIndex == 0 ) {
5790  # Section zero doesn't nest, level=big
5791  $targetLevel = 1000;
5792  } else {
5793  while ( $node ) {
5794  if ( $node->getName() === 'h' ) {
5795  $bits = $node->splitHeading();
5796  if ( $bits['i'] == $sectionIndex ) {
5797  $targetLevel = $bits['level'];
5798  break;
5799  }
5800  }
5801  if ( $mode === 'replace' ) {
5802  $outText .= $frame->expand( $node, PPFrame::RECOVER_ORIG );
5803  }
5804  $node = $node->getNextSibling();
5805  }
5806  }
5807 
5808  if ( !$node ) {
5809  # Not found
5810  if ( $mode === 'get' ) {
5811  return $newText;
5812  } else {
5813  return $text;
5814  }
5815  }
5816 
5817  # Find the end of the section, including nested sections
5818  do {
5819  if ( $node->getName() === 'h' ) {
5820  $bits = $node->splitHeading();
5821  $curLevel = $bits['level'];
5822  if ( $bits['i'] != $sectionIndex && $curLevel <= $targetLevel ) {
5823  break;
5824  }
5825  }
5826  if ( $mode === 'get' ) {
5827  $outText .= $frame->expand( $node, PPFrame::RECOVER_ORIG );
5828  }
5829  $node = $node->getNextSibling();
5830  } while ( $node );
5831 
5832  # Write out the remainder (in replace mode only)
5833  if ( $mode === 'replace' ) {
5834  # Output the replacement text
5835  # Add two newlines on -- trailing whitespace in $newText is conventionally
5836  # stripped by the editor, so we need both newlines to restore the paragraph gap
5837  # Only add trailing whitespace if there is newText
5838  if ( $newText != "" ) {
5839  $outText .= $newText . "\n\n";
5840  }
5841 
5842  while ( $node ) {
5843  $outText .= $frame->expand( $node, PPFrame::RECOVER_ORIG );
5844  $node = $node->getNextSibling();
5845  }
5846  }
5847 
5848  if ( is_string( $outText ) ) {
5849  # Re-insert stripped tags
5850  $outText = rtrim( $this->mStripState->unstripBoth( $outText ) );
5851  }
5852 
5853  return $outText;
5854  }
5855 
5870  public function getSection( $text, $sectionId, $defaultText = '' ) {
5871  return $this->extractSections( $text, $sectionId, 'get', $defaultText );
5872  }
5873 
5886  public function replaceSection( $oldText, $sectionId, $newText ) {
5887  return $this->extractSections( $oldText, $sectionId, 'replace', $newText );
5888  }
5889 
5919  public function getFlatSectionInfo( $text ) {
5920  $magicScopeVariable = $this->lock();
5921  $this->startParse( null, new ParserOptions, self::OT_PLAIN, true );
5922  $frame = $this->getPreprocessor()->newFrame();
5923  $root = $this->preprocessToDom( $text, 0 );
5924  $node = $root->getFirstChild();
5925  $offset = 0;
5926  $currentSection = [
5927  'index' => 0,
5928  'level' => 0,
5929  'offset' => 0,
5930  'heading' => '',
5931  'text' => ''
5932  ];
5933  $sections = [];
5934 
5935  while ( $node ) {
5936  $nodeText = $frame->expand( $node, PPFrame::RECOVER_ORIG );
5937  if ( $node->getName() === 'h' ) {
5938  $bits = $node->splitHeading();
5939  $sections[] = $currentSection;
5940  $currentSection = [
5941  'index' => $bits['i'],
5942  'level' => $bits['level'],
5943  'offset' => $offset,
5944  'heading' => $nodeText,
5945  'text' => $nodeText
5946  ];
5947  } else {
5948  $currentSection['text'] .= $nodeText;
5949  }
5950  $offset += strlen( $nodeText );
5951  $node = $node->getNextSibling();
5952  }
5953  $sections[] = $currentSection;
5954  return $sections;
5955  }
5956 
5967  public function getRevisionId() {
5968  return $this->mRevisionId;
5969  }
5970 
5977  public function getRevisionObject() {
5978  if ( $this->mRevisionObject ) {
5979  return $this->mRevisionObject;
5980  }
5981 
5982  // NOTE: try to get the RevisionObject even if mRevisionId is null.
5983  // This is useful when parsing a revision that has not yet been saved.
5984  // However, if we get back a saved revision even though we are in
5985  // preview mode, we'll have to ignore it, see below.
5986  // NOTE: This callback may be used to inject an OLD revision that was
5987  // already loaded, so "current" is a bit of a misnomer. We can't just
5988  // skip it if mRevisionId is set.
5989  $rev = call_user_func(
5990  $this->mOptions->getCurrentRevisionCallback(),
5991  $this->getTitle(),
5992  $this
5993  );
5994 
5995  if ( $this->mRevisionId === null && $rev && $rev->getId() ) {
5996  // We are in preview mode (mRevisionId is null), and the current revision callback
5997  // returned an existing revision. Ignore it and return null, it's probably the page's
5998  // current revision, which is not what we want here. Note that we do want to call the
5999  // callback to allow the unsaved revision to be injected here, e.g. for
6000  // self-transclusion previews.
6001  return null;
6002  }
6003 
6004  // If the parse is for a new revision, then the callback should have
6005  // already been set to force the object and should match mRevisionId.
6006  // If not, try to fetch by mRevisionId for sanity.
6007  if ( $this->mRevisionId && $rev && $rev->getId() != $this->mRevisionId ) {
6008  $rev = Revision::newFromId( $this->mRevisionId );
6009  }
6010 
6011  $this->mRevisionObject = $rev;
6012 
6013  return $this->mRevisionObject;
6014  }
6015 
6021  public function getRevisionTimestamp() {
6022  if ( $this->mRevisionTimestamp !== null ) {
6023  return $this->mRevisionTimestamp;
6024  }
6025 
6026  # Use specified revision timestamp, falling back to the current timestamp
6027  $revObject = $this->getRevisionObject();
6028  $timestamp = $revObject ? $revObject->getTimestamp() : $this->mOptions->getTimestamp();
6029  $this->mOutput->setRevisionTimestampUsed( $timestamp ); // unadjusted time zone
6030 
6031  # The cryptic '' timezone parameter tells to use the site-default
6032  # timezone offset instead of the user settings.
6033  # Since this value will be saved into the parser cache, served
6034  # to other users, and potentially even used inside links and such,
6035  # it needs to be consistent for all visitors.
6036  $this->mRevisionTimestamp = $this->contLang->userAdjust( $timestamp, '' );
6037 
6038  return $this->mRevisionTimestamp;
6039  }
6040 
6046  public function getRevisionUser() {
6047  if ( $this->mRevisionUser === null ) {
6048  $revObject = $this->getRevisionObject();
6049 
6050  # if this template is subst: the revision id will be blank,
6051  # so just use the current user's name
6052  if ( $revObject ) {
6053  $this->mRevisionUser = $revObject->getUserText();
6054  } elseif ( $this->ot['wiki'] || $this->mOptions->getIsPreview() ) {
6055  $this->mRevisionUser = $this->getUser()->getName();
6056  }
6057  }
6058  return $this->mRevisionUser;
6059  }
6060 
6066  public function getRevisionSize() {
6067  if ( $this->mRevisionSize === null ) {
6068  $revObject = $this->getRevisionObject();
6069 
6070  # if this variable is subst: the revision id will be blank,
6071  # so just use the parser input size, because the own substituation
6072  # will change the size.
6073  if ( $revObject ) {
6074  $this->mRevisionSize = $revObject->getSize();
6075  } else {
6076  $this->mRevisionSize = $this->mInputSize;
6077  }
6078  }
6079  return $this->mRevisionSize;
6080  }
6081 
6087  public function setDefaultSort( $sort ) {
6088  $this->mDefaultSort = $sort;
6089  $this->mOutput->setProperty( 'defaultsort', $sort );
6090  }
6091 
6102  public function getDefaultSort() {
6103  if ( $this->mDefaultSort !== false ) {
6104  return $this->mDefaultSort;
6105  } else {
6106  return '';
6107  }
6108  }
6109 
6116  public function getCustomDefaultSort() {
6117  return $this->mDefaultSort;
6118  }
6119 
6120  private static function getSectionNameFromStrippedText( $text ) {
6121  $text = Sanitizer::normalizeSectionNameWhitespace( $text );
6122  $text = Sanitizer::decodeCharReferences( $text );
6123  $text = self::normalizeSectionName( $text );
6124  return $text;
6125  }
6126 
6127  private static function makeAnchor( $sectionName ) {
6128  return '#' . Sanitizer::escapeIdForLink( $sectionName );
6129  }
6130 
6131  private function makeLegacyAnchor( $sectionName ) {
6132  $fragmentMode = $this->svcOptions->get( 'FragmentMode' );
6133  if ( isset( $fragmentMode[1] ) && $fragmentMode[1] === 'legacy' ) {
6134  // ForAttribute() and ForLink() are the same for legacy encoding
6135  $id = Sanitizer::escapeIdForAttribute( $sectionName, Sanitizer::ID_FALLBACK );
6136  } else {
6137  $id = Sanitizer::escapeIdForLink( $sectionName );
6138  }
6139 
6140  return "#$id";
6141  }
6142 
6151  public function guessSectionNameFromWikiText( $text ) {
6152  # Strip out wikitext links(they break the anchor)
6153  $text = $this->stripSectionName( $text );
6154  $sectionName = self::getSectionNameFromStrippedText( $text );
6155  return self::makeAnchor( $sectionName );
6156  }
6157 
6167  public function guessLegacySectionNameFromWikiText( $text ) {
6168  # Strip out wikitext links(they break the anchor)
6169  $text = $this->stripSectionName( $text );
6170  $sectionName = self::getSectionNameFromStrippedText( $text );
6171  return $this->makeLegacyAnchor( $sectionName );
6172  }
6173 
6179  public static function guessSectionNameFromStrippedText( $text ) {
6180  $sectionName = self::getSectionNameFromStrippedText( $text );
6181  return self::makeAnchor( $sectionName );
6182  }
6183 
6190  private static function normalizeSectionName( $text ) {
6191  # T90902: ensure the same normalization is applied for IDs as to links
6192 
6193  $titleParser = MediaWikiServices::getInstance()->getTitleParser();
6194  '@phan-var MediaWikiTitleCodec $titleParser';
6195  try {
6196 
6197  $parts = $titleParser->splitTitleString( "#$text" );
6198  } catch ( MalformedTitleException $ex ) {
6199  return $text;
6200  }
6201  return $parts['fragment'];
6202  }
6203 
6218  public function stripSectionName( $text ) {
6219  # Strip internal link markup
6220  $text = preg_replace( '/\[\[:?([^[|]+)\|([^[]+)\]\]/', '$2', $text );
6221  $text = preg_replace( '/\[\[:?([^[]+)\|?\]\]/', '$1', $text );
6222 
6223  # Strip external link markup
6224  # @todo FIXME: Not tolerant to blank link text
6225  # I.E. [https://www.mediawiki.org] will render as [1] or something depending
6226  # on how many empty links there are on the page - need to figure that out.
6227  $text = preg_replace( '/\[(?i:' . $this->mUrlProtocols . ')([^ ]+?) ([^[]+)\]/', '$2', $text );
6228 
6229  # Parse wikitext quotes (italics & bold)
6230  $text = $this->doQuotes( $text );
6231 
6232  # Strip HTML tags
6233  $text = StringUtils::delimiterReplace( '<', '>', '', $text );
6234  return $text;
6235  }
6236 
6247  private function fuzzTestSrvus( $text, Title $title, ParserOptions $options,
6248  $outputType = self::OT_HTML
6249  ) {
6250  $magicScopeVariable = $this->lock();
6251  $this->startParse( $title, $options, $outputType, true );
6252 
6253  $text = $this->replaceVariables( $text );
6254  $text = $this->mStripState->unstripBoth( $text );
6255  $text = Sanitizer::removeHTMLtags( $text );
6256  return $text;
6257  }
6258 
6265  private function fuzzTestPst( $text, Title $title, ParserOptions $options ) {
6266  return $this->preSaveTransform( $text, $title, $options->getUser(), $options );
6267  }
6268 
6275  private function fuzzTestPreprocess( $text, Title $title, ParserOptions $options ) {
6276  return $this->fuzzTestSrvus( $text, $title, $options, self::OT_PREPROCESS );
6277  }
6278 
6296  public function markerSkipCallback( $s, $callback ) {
6297  $i = 0;
6298  $out = '';
6299  while ( $i < strlen( $s ) ) {
6300  $markerStart = strpos( $s, self::MARKER_PREFIX, $i );
6301  if ( $markerStart === false ) {
6302  $out .= call_user_func( $callback, substr( $s, $i ) );
6303  break;
6304  } else {
6305  $out .= call_user_func( $callback, substr( $s, $i, $markerStart - $i ) );
6306  $markerEnd = strpos( $s, self::MARKER_SUFFIX, $markerStart );
6307  if ( $markerEnd === false ) {
6308  $out .= substr( $s, $markerStart );
6309  break;
6310  } else {
6311  $markerEnd += strlen( self::MARKER_SUFFIX );
6312  $out .= substr( $s, $markerStart, $markerEnd - $markerStart );
6313  $i = $markerEnd;
6314  }
6315  }
6316  }
6317  return $out;
6318  }
6319 
6326  public function killMarkers( $text ) {
6327  return $this->mStripState->killMarkers( $text );
6328  }
6329 
6340  public static function parseWidthParam( $value, $parseHeight = true ) {
6341  $parsedWidthParam = [];
6342  if ( $value === '' ) {
6343  return $parsedWidthParam;
6344  }
6345  $m = [];
6346  # (T15500) In both cases (width/height and width only),
6347  # permit trailing "px" for backward compatibility.
6348  if ( $parseHeight && preg_match( '/^([0-9]*)x([0-9]*)\s*(?:px)?\s*$/', $value, $m ) ) {
6349  $width = intval( $m[1] );
6350  $height = intval( $m[2] );
6351  $parsedWidthParam['width'] = $width;
6352  $parsedWidthParam['height'] = $height;
6353  } elseif ( preg_match( '/^[0-9]*\s*(?:px)?\s*$/', $value ) ) {
6354  $width = intval( $value );
6355  $parsedWidthParam['width'] = $width;
6356  }
6357  return $parsedWidthParam;
6358  }
6359 
6369  protected function lock() {
6370  if ( $this->mInParse ) {
6371  throw new MWException( "Parser state cleared while parsing. "
6372  . "Did you call Parser::parse recursively? Lock is held by: " . $this->mInParse );
6373  }
6374 
6375  // Save the backtrace when locking, so that if some code tries locking again,
6376  // we can print the lock owner's backtrace for easier debugging
6377  $e = new Exception;
6378  $this->mInParse = $e->getTraceAsString();
6379 
6380  $recursiveCheck = new ScopedCallback( function () {
6381  $this->mInParse = false;
6382  } );
6383 
6384  return $recursiveCheck;
6385  }
6386 
6397  public static function stripOuterParagraph( $html ) {
6398  $m = [];
6399  if ( preg_match( '/^<p>(.*)\n?<\/p>\n?$/sU', $html, $m ) && strpos( $m[1], '</p>' ) === false ) {
6400  $html = $m[1];
6401  }
6402 
6403  return $html;
6404  }
6405 
6416  public function getFreshParser() {
6417  if ( $this->mInParse ) {
6418  return $this->factory->create();
6419  } else {
6420  return $this;
6421  }
6422  }
6423 
6430  public function enableOOUI() {
6431  OutputPage::setupOOUI();
6432  $this->mOutput->setEnableOOUI( true );
6433  }
6434 
6439  protected function setOutputFlag( $flag, $reason ) {
6440  $this->mOutput->setFlag( $flag );
6441  $name = $this->getTitle()->getPrefixedText();
6442  $this->logger->debug( __METHOD__ . ": set $flag flag on '$name'; $reason" );
6443  }
6444 }
OT_MSG
const OT_MSG
Definition: Defines.php:167
SiteStats\articles
static articles()
Definition: SiteStats.php:103
ParserOptions
Set options of the Parser.
Definition: ParserOptions.php:42
MagicWordArray
Class for handling an array of magic words.
Definition: MagicWordArray.php:32
FauxRequest
WebRequest clone which takes values from a provided array.
Definition: FauxRequest.php:33
Revision\newKnownCurrent
static newKnownCurrent(IDatabase $db, $pageIdOrTitle, $revId=0)
Load a revision based on a known page ID and current revision ID from the DB.
Definition: Revision.php:1123
Title\newFromText
static newFromText( $text, $defaultNamespace=NS_MAIN)
Create a new Title from text, such as what one would find in a link.
Definition: Title.php:317
PPFrame\STRIP_COMMENTS
const STRIP_COMMENTS
Definition: PPFrame.php:31
HtmlArmor
Marks HTML that shouldn't be escaped.
Definition: HtmlArmor.php:28
RepoGroup\singleton
static singleton()
Definition: RepoGroup.php:60
ParserOutput
Definition: ParserOutput.php:25
Revision\newFromId
static newFromId( $id, $flags=0)
Load a page revision from a given revision ID number.
Definition: Revision.php:119
SiteStats\users
static users()
Definition: SiteStats.php:121
MagicWordFactory
A factory that stores information about MagicWords, and creates them on demand with caching.
Definition: MagicWordFactory.php:34
User\isAnon
isAnon()
Get whether the user is anonymous.
Definition: User.php:3589
SiteStats\activeUsers
static activeUsers()
Definition: SiteStats.php:130
MediaWiki\MediaWikiServices
MediaWikiServices is the service locator for the application scope of MediaWiki.
Definition: MediaWikiServices.php:130
Linker\makeSelfLinkObj
static makeSelfLinkObj( $nt, $html='', $query='', $trail='', $prefix='')
Make appropriate markup for a link to the current article.
Definition: Linker.php:164
MediaWiki\BadFileLookup
Definition: BadFileLookup.php:12
PPFrame\NO_ARGS
const NO_ARGS
Definition: PPFrame.php:29
wfSetVar
wfSetVar(&$dest, $source, $force=false)
Sets dest to source and returns the original value of dest If source is NULL, it just returns the val...
Definition: GlobalFunctions.php:1606
if
if(ini_get( 'mbstring.func_overload')) if(!defined('MW_ENTRY_POINT'))
Pre-config setup: Before loading LocalSettings.php.
Definition: Setup.php:58
Linker\tocIndent
static tocIndent()
Add another level to the Table of Contents.
Definition: Linker.php:1614
MediaWiki\Linker\LinkRenderer
Class that generates HTML links for pages.
Definition: LinkRenderer.php:41
ParserOptions\getDisableTitleConversion
getDisableTitleConversion()
Whether title conversion should be disabled.
Definition: ParserOptions.php:535
wfTimestamp
wfTimestamp( $outputtype=TS_UNIX, $ts=0)
Get a timestamp string in one of various formats.
Definition: GlobalFunctions.php:1871
getUser
getUser()
SiteStats\pages
static pages()
Definition: SiteStats.php:112
$wgNoFollowDomainExceptions
$wgNoFollowDomainExceptions
If this is set to an array of domains, external links to these domain names (or any subdomains) will ...
Definition: DefaultSettings.php:4295
wfUrlencode
wfUrlencode( $s)
We want some things to be included as literal characters in our title URLs for prettiness,...
Definition: GlobalFunctions.php:309
SiteStats\numberingroup
static numberingroup( $group)
Find the number of users in a given user group.
Definition: SiteStats.php:150
SFH_OBJECT_ARGS
const SFH_OBJECT_ARGS
Definition: Defines.php:178
OT_PREPROCESS
const OT_PREPROCESS
Definition: Defines.php:166
NS_FILE
const NS_FILE
Definition: Defines.php:66
OT_PLAIN
const OT_PLAIN
Definition: Defines.php:168
$file
if(PHP_SAPI !='cli-server') if(!isset( $_SERVER['SCRIPT_FILENAME'])) $file
Item class for a filearchive table row.
Definition: router.php:42
wfHostname
wfHostname()
Fetch server name for use in error reporting etc.
Definition: GlobalFunctions.php:1325
NS_TEMPLATE
const NS_TEMPLATE
Definition: Defines.php:70
User\newFromName
static newFromName( $name, $validate='valid')
Static factory method for creation from username.
Definition: User.php:537
wfMessage
wfMessage( $key,... $params)
This is the function for getting translated interface messages.
Definition: GlobalFunctions.php:1263
MediaWiki\Linker\LinkRendererFactory
Factory to create LinkRender objects.
Definition: LinkRendererFactory.php:32
$s
$s
Definition: mergeMessageFileList.php:185
SpecialPage\getTitleFor
static getTitleFor( $name, $subpage=false, $fragment='')
Get a localised Title object for a specified special page name If you don't need a full Title object,...
Definition: SpecialPage.php:83
$wgTitle
if(isset( $_SERVER['PATH_INFO']) && $_SERVER['PATH_INFO'] !='') $wgTitle
Definition: api.php:53
MWTidy\isEnabled
static isEnabled()
Definition: MWTidy.php:54
Preprocessor_Hash
Differences from DOM schema:
Definition: Preprocessor_Hash.php:43
StripState
Definition: StripState.php:28
Linker\tocLine
static tocLine( $anchor, $tocline, $tocnumber, $level, $sectionIndex=false)
parameter level defines if we are on an indentation level
Definition: Linker.php:1640
wfDebugLog
wfDebugLog( $logGroup, $text, $dest='all', array $context=[])
Send a line to a supplementary debug log file, if configured, or main debug log if not.
Definition: GlobalFunctions.php:1007
Linker\tocList
static tocList( $toc, Language $lang=null)
Wraps the TOC in a table and provides the hide/collapse javascript.
Definition: Linker.php:1676
PPFrame\NO_TEMPLATES
const NO_TEMPLATES
Definition: PPFrame.php:30
Preprocessor
Definition: Preprocessor.php:30
SiteStats\images
static images()
Definition: SiteStats.php:139
StringUtils\replaceMarkup
static replaceMarkup( $search, $replace, $text)
More or less "markup-safe" str_replace() Ignores any instances of the separator inside <....
Definition: StringUtils.php:297
Revision
Definition: Revision.php:40
Revision\newFromTitle
static newFromTitle(LinkTarget $linkTarget, $id=0, $flags=0)
Load either the current, or a specified, revision that's attached to a given link target.
Definition: Revision.php:138
NS_SPECIAL
const NS_SPECIAL
Definition: Defines.php:49
$mTitle
Title null $mTitle
Definition: RevisionSearchResultTrait.php:26
MWException
MediaWiki exception.
Definition: MWException.php:26
NS_PROJECT
const NS_PROJECT
Definition: Defines.php:64
MediaWiki\Config\ServiceOptions
A class for passing options to services.
Definition: ServiceOptions.php:25
wfDeprecated
wfDeprecated( $function, $version=false, $component=false, $callerOffset=2)
Throws a warning that $function is deprecated.
Definition: GlobalFunctions.php:1044
BlockLevelPass\doBlockLevels
static doBlockLevels( $text, $lineStart)
Make lists from lines starting with ':', '*', '#', etc.
Definition: BlockLevelPass.php:50
wfGetDB
wfGetDB( $db, $groups=[], $wiki=false)
Get a Database object.
Definition: GlobalFunctions.php:2562
wfUrlProtocolsWithoutProtRel
wfUrlProtocolsWithoutProtRel()
Like wfUrlProtocols(), but excludes '//' from the protocol list.
Definition: GlobalFunctions.php:764
$matches
$matches
Definition: NoLocalSettings.php:24
CoreTagHooks\register
static register( $parser)
Definition: CoreTagHooks.php:33
StringUtils\explode
static explode( $separator, $subject)
Workalike for explode() with limited memory usage.
Definition: StringUtils.php:356
PPNode
There are three types of nodes:
Definition: PPNode.php:35
LinkHolderArray
Definition: LinkHolderArray.php:29
PPFrame\RECOVER_ORIG
const RECOVER_ORIG
Definition: PPFrame.php:36
Linker\makeHeadline
static makeHeadline( $level, $attribs, $anchor, $html, $link, $fallbackAnchor=false)
Create a headline for content.
Definition: Linker.php:1751
Linker\tocLineEnd
static tocLineEnd()
End a Table Of Contents line.
Definition: Linker.php:1664
$args
if( $line===false) $args
Definition: mcc.php:124
MapCacheLRU
Handles a simple LRU key/value map with a maximum number of entries.
Definition: MapCacheLRU.php:37
$lines
$lines
Definition: router.php:61
MWTimestamp\getInstance
static getInstance( $ts=false)
Get a timestamp instance in GMT.
Definition: MWTimestamp.php:39
$title
$title
Definition: testCompression.php:36
Linker\makeExternalLink
static makeExternalLink( $url, $text, $escape=true, $linktype='', $attribs=[], $title=null)
Make an external link.
Definition: Linker.php:849
OT_WIKI
const OT_WIKI
Definition: Defines.php:165
Title\makeTitle
static makeTitle( $ns, $title, $fragment='', $interwiki='')
Create a new Title from a namespace index and a DB key.
Definition: Title.php:584
DB_REPLICA
const DB_REPLICA
Definition: defines.php:25
SectionProfiler
Custom PHP profiler for parser/DB type section names that xhprof/xdebug can't handle.
Definition: SectionProfiler.php:30
NS_CATEGORY
const NS_CATEGORY
Definition: Defines.php:74
RequestContext
Group all the pieces relevant to the context of a request into one instance.
Definition: RequestContext.php:34
ParserOptions\getPreSaveTransform
getPreSaveTransform()
Transform wiki markup when saving the page?
Definition: ParserOptions.php:637
Linker\splitTrail
static splitTrail( $trail)
Split a link trail, return the "inside" portion and the remainder of the trail as a two-element array...
Definition: Linker.php:1772
MediaWiki\Special\SpecialPageFactory
Factory for handling the special page list and generating SpecialPage objects.
Definition: SpecialPageFactory.php:64
wfUrlProtocols
wfUrlProtocols( $includeProtocolRelative=true)
Returns a regular expression of url protocols.
Definition: GlobalFunctions.php:719
SpecialVersion\getVersion
static getVersion( $flags='', $lang=null)
Return a string of the MediaWiki version with Git revision if available.
Definition: SpecialVersion.php:289
ParserFactory
Definition: ParserFactory.php:33
$content
$content
Definition: router.php:78
CoreParserFunctions\register
static register( $parser)
Definition: CoreParserFunctions.php:34
$wgNoFollowNsExceptions
$wgNoFollowNsExceptions
Namespaces in which $wgNoFollowLinks doesn't apply.
Definition: DefaultSettings.php:4280
$wgNoFollowLinks
$wgNoFollowLinks
If true, external URL links in wiki text will be given the rel="nofollow" attribute as a hint to sear...
Definition: DefaultSettings.php:4274
NS_MEDIA
const NS_MEDIA
Definition: Defines.php:48
User\getOption
getOption( $oname, $defaultOverride=null, $ignoreHidden=false)
Get the user's current setting for a given option.
Definition: User.php:2975
PPFrame
Definition: PPFrame.php:28
$line
$line
Definition: mcc.php:119
StringUtils\delimiterExplode
static delimiterExplode( $startDelim, $endDelim, $separator, $subject, $nested=false)
Explode a string, but ignore any instances of the separator inside the given start and end delimiters...
Definition: StringUtils.php:59
wfEscapeWikiText
wfEscapeWikiText( $text)
Escapes the given text so that it may be output using addWikiText() without any linking,...
Definition: GlobalFunctions.php:1550
Linker\makeMediaLinkFile
static makeMediaLinkFile(LinkTarget $title, $file, $html='')
Create a direct link to a given uploaded file.
Definition: Linker.php:782
$context
$context
Definition: load.php:40
SFH_NO_HASH
const SFH_NO_HASH
Definition: Defines.php:177
OT_HTML
const OT_HTML
Definition: Defines.php:164
Title
Represents a title within MediaWiki.
Definition: Title.php:42
CoreParserFunctions\cascadingsources
static cascadingsources( $parser, $title='')
Returns the sources of any cascading protection acting on a specified page.
Definition: CoreParserFunctions.php:1373
wfMatchesDomainList
wfMatchesDomainList( $url, $domains)
Check whether a given URL has a domain that occurs in a given set of domains.
Definition: GlobalFunctions.php:879
$cache
$cache
Definition: mcc.php:33
MalformedTitleException
MalformedTitleException is thrown when a TitleParser is unable to parse a title string.
Definition: MalformedTitleException.php:25
Xml\isWellFormedXmlFragment
static isWellFormedXmlFragment( $text)
Check if a string is a well-formed XML fragment.
Definition: Xml.php:730
ParserOptions\getUser
getUser()
Current user.
Definition: ParserOptions.php:981
Linker\tocUnindent
static tocUnindent( $level)
Finish one or more sublevels on the Table of Contents.
Definition: Linker.php:1625
Linker\makeImageLink
static makeImageLink(Parser $parser, LinkTarget $title, $file, $frameParams=[], $handlerParams=[], $time=false, $query="", $widthOption=null)
Given parameters derived from [[Image:Foo|options...]], generate the HTML that that syntax inserts in...
Definition: Linker.php:304
getTitle
getTitle()
Definition: RevisionSearchResultTrait.php:74
StringUtils\delimiterReplace
static delimiterReplace( $startDelim, $endDelim, $replace, $subject, $flags='')
Perform an operation equivalent to preg_replace() with flags.
Definition: StringUtils.php:248
NS_USER
const NS_USER
Definition: Defines.php:62
TextContent\normalizeLineEndings
static normalizeLineEndings( $text)
Do a "\\r\\n" -> "\\n" and "\\r" -> "\\n" transformation as well as trim trailing whitespace.
Definition: TextContent.php:182
Linker\normalizeSubpageLink
static normalizeSubpageLink( $contextTitle, $target, &$text)
Definition: Linker.php:1451
MediaWiki\Config\ServiceOptions\get
get( $key)
Definition: ServiceOptions.php:84
ImageGalleryBase\factory
static factory( $mode=false, IContextSource $context=null)
Get a new image gallery.
Definition: ImageGalleryBase.php:113
NamespaceInfo
This is a utility class for dealing with namespaces that encodes all the "magic" behaviors of them ba...
Definition: NamespaceInfo.php:33
NS_MEDIAWIKI
const NS_MEDIAWIKI
Definition: Defines.php:68
$t
$t
Definition: testCompression.php:71
Title\legalChars
static legalChars()
Get a regex character class describing the legal characters in a link.
Definition: Title.php:692
User\getBoolOption
getBoolOption( $oname)
Get the user's current setting for a given option, as a boolean value.
Definition: User.php:3034
MediaWiki\Linker\LinkTarget
Definition: LinkTarget.php:26
RawMessage
Variant of the Message class.
Definition: RawMessage.php:34
User
The User object encapsulates all of the user-specific settings (user_id, name, rights,...
Definition: User.php:52
Hooks\run
static run( $event, array $args=[], $deprecatedVersion=null)
Call hook functions defined in Hooks::register and $wgHooks.
Definition: Hooks.php:200
MWTimestamp\getLocalInstance
static getLocalInstance( $ts=false)
Get a timestamp instance in the server local timezone ($wgLocaltimezone)
Definition: MWTimestamp.php:204
User\getName
getName()
Get the user name, or the IP of an anonymous user.
Definition: User.php:2284
Linker\makeExternalImage
static makeExternalImage( $url, $alt='')
Return the code for images which were added via external links, via Parser::maybeMakeExternalImage().
Definition: Linker.php:248
SiteStats\edits
static edits()
Definition: SiteStats.php:94
Language
Internationalisation code.
Definition: Language.php:39
MWHttpRequest\factory
static factory( $url, array $options=null, $caller=__METHOD__)
Generate a new request object.
Definition: MWHttpRequest.php:189
MediaWiki\Config\ServiceOptions\assertRequiredOptions
assertRequiredOptions(array $expectedKeys)
Assert that the list of options provided in this instance exactly match $expectedKeys,...
Definition: ServiceOptions.php:62
$type
$type
Definition: testCompression.php:50
MWTidy\tidy
static tidy( $text)
Interface with Remex tidy.
Definition: MWTidy.php:42