MediaWiki  master
Parser.php
Go to the documentation of this file.
1 <?php
39 use Psr\Log\LoggerInterface;
40 use Psr\Log\NullLogger;
41 use Wikimedia\IPUtils;
42 use Wikimedia\ScopedCallback;
43 
84 class Parser {
90  public const VERSION = '1.6.4';
91 
92  # Flags for Parser::setFunctionHook
93  public const SFH_NO_HASH = 1;
94  public const SFH_OBJECT_ARGS = 2;
95 
96  # Constants needed for external link processing
97  # Everything except bracket, space, or control characters
98  # \p{Zs} is unicode 'separator, space' category. It covers the space 0x20
99  # as well as U+3000 is IDEOGRAPHIC SPACE for T21052
100  # \x{FFFD} is the Unicode replacement character, which the HTML5 spec
101  # uses to replace invalid HTML characters.
102  public const EXT_LINK_URL_CLASS = '[^][<>"\\x00-\\x20\\x7F\p{Zs}\x{FFFD}]';
103  # Simplified expression to match an IPv4 or IPv6 address, or
104  # at least one character of a host name (embeds EXT_LINK_URL_CLASS)
105  // phpcs:ignore Generic.Files.LineLength
106  private const EXT_LINK_ADDR = '(?:[0-9.]+|\\[(?i:[0-9a-f:.]+)\\]|[^][<>"\\x00-\\x20\\x7F\p{Zs}\x{FFFD}])';
107  # RegExp to make image URLs (embeds IPv6 part of EXT_LINK_ADDR)
108  // phpcs:ignore Generic.Files.LineLength
109  private const EXT_IMAGE_REGEX = '/^(http:\/\/|https:\/\/)((?:\\[(?i:[0-9a-f:.]+)\\])?[^][<>"\\x00-\\x20\\x7F\p{Zs}\x{FFFD}]+)
110  \\/([A-Za-z0-9_.,~%\\-+&;#*?!=()@\\x80-\\xFF]+)\\.((?i)gif|png|jpg|jpeg)$/Sxu';
111 
112  # Regular expression for a non-newline space
113  private const SPACE_NOT_NL = '(?:\t|&nbsp;|&\#0*160;|&\#[Xx]0*[Aa]0;|\p{Zs})';
114 
119  public const PTD_FOR_INCLUSION = Preprocessor::DOM_FOR_INCLUSION;
120 
121  # Allowed values for $this->mOutputType
122  # Parameter to startExternalParse().
123  public const OT_HTML = 1; # like parse()
124  public const OT_WIKI = 2; # like preSaveTransform()
125  public const OT_PREPROCESS = 3; # like preprocess()
126  public const OT_MSG = 3;
127  # like extractSections() - portions of the original are returned unchanged.
128  public const OT_PLAIN = 4;
129 
147  public const MARKER_SUFFIX = "-QINU`\"'\x7f";
148  public const MARKER_PREFIX = "\x7f'\"`UNIQ-";
149 
150  # Markers used for wrapping the table of contents
151  public const TOC_START = '<mw:toc>';
152  public const TOC_END = '</mw:toc>';
153 
154  # Persistent:
155 
156  public $mTagHooks = [];
158  public $mFunctionHooks = [];
159  private $mFunctionSynonyms = [ 0 => [], 1 => [] ];
160  private $mStripList = [];
161  private $mVarCache = [];
162  private $mImageParams = [];
165  public $mMarkerIndex = 0;
170  public $mFirstCall = true;
171 
172  # Initialised by initializeVariables()
173 
177  private $mVariables;
178 
182  private $mSubstWords;
183 
188  private $mConf;
189 
190  # Initialised in constructor
192 
193  # Initialized in getPreprocessor()
194 
199 
200  # Cleared with clearState():
201 
205  public $mOutput;
206  private $mAutonumber;
207 
212  public $mStripState;
213 
217  private $mLinkHolders;
218 
223  public $mLinkID;
235  private $mDefaultSort;
238  public $mHeadings;
242  public $mExpensiveFunctionCount; # number of expensive parser function calls
244  public $mShowToc;
247  private $mTplDomCache;
248 
253  public $mUser; # User object; only used when doing pre-save transform
254 
255  # Temporary
256  # These are variables reset at least once per parse regardless of $clearState
257 
262  public $mOptions;
263 
271  public $mTitle; # Title context, used for self-link rendering and similar things
272  private $mOutputType; # Output type, one of the OT_xxx constants
274  public $ot; # Shortcut alias, see setOutputType()
276  public $mRevisionObject; # The revision object of the specified revision ID
277 
278  public $mRevisionId; # ID to display in {{REVISIONID}} tags
280  public $mRevisionTimestamp; # The timestamp of the specified revision ID
282  public $mRevisionUser; # User to display in {{REVISIONUSER}} tag
284  public $mRevisionSize; # Size to display in {{REVISIONSIZE}} variable
286  public $mInputSize = false; # For {{PAGESIZE}} on current page.
287 
290 
297 
305 
312  public $mInParse = false;
313 
315  private $mProfiler;
316 
320  private $mLinkRenderer;
321 
324 
326  private $contLang;
327 
330 
332  private $factory;
333 
336 
344  private $svcOptions;
345 
348 
350  private $nsInfo;
351 
353  private $logger;
354 
356  private $badFileLookup;
357 
359  private $hookContainer;
360 
362  private $hookRunner;
363 
367  public const CONSTRUCTOR_OPTIONS = [
368  // Deprecated and unused; from $wgParserConf
369  'class',
370  // See documentation for the corresponding config options
371  'ArticlePath',
372  'EnableScaryTranscluding',
373  'ExtraInterlanguageLinkPrefixes',
374  'FragmentMode',
375  'LanguageCode',
376  'MaxSigChars',
377  'MaxTocLevel',
378  'MiserMode',
379  'ScriptPath',
380  'Server',
381  'ServerName',
382  'ShowHostnames',
383  'SignatureValidation',
384  'Sitename',
385  'StylePath',
386  'TranscludeCacheExpiry',
387  'PreprocessorCacheThreshold',
388  'DisableLangConversion'
389  ];
390 
408  public function __construct(
409  $svcOptions = null,
411  Language $contLang = null,
412  ParserFactory $factory = null,
413  $urlProtocols = null,
414  SpecialPageFactory $spFactory = null,
415  $linkRendererFactory = null,
416  $nsInfo = null,
417  $logger = null,
421  ) {
422  if ( ParserFactory::$inParserFactory === 0 ) {
423  // Direct construction of Parser is deprecated; use a ParserFactory
424  wfDeprecated( __METHOD__, '1.34' );
425  }
426  if ( !$svcOptions || is_array( $svcOptions ) ) {
427  wfDeprecated( 'old calling convention for ' . __METHOD__, '1.34' );
428  // Pre-1.34 calling convention is the first parameter is just ParserConf, the seventh is
429  // Config, and the eighth is LinkRendererFactory.
430  $this->mConf = (array)$svcOptions;
431  if ( empty( $this->mConf['class'] ) ) {
432  $this->mConf['class'] = self::class;
433  }
434  $this->svcOptions = new ServiceOptions( self::CONSTRUCTOR_OPTIONS,
435  $this->mConf, func_num_args() > 6
436  ? func_get_arg( 6 ) : MediaWikiServices::getInstance()->getMainConfig()
437  );
438  $linkRendererFactory = func_num_args() > 7 ? func_get_arg( 7 ) : null;
439  $nsInfo = func_num_args() > 8 ? func_get_arg( 8 ) : null;
440  } else {
441  // New calling convention
442  $svcOptions->assertRequiredOptions( self::CONSTRUCTOR_OPTIONS );
443  // $this->mConf is public, so we'll keep the option there for
444  // compatibility until it's removed
445  $this->mConf = [
446  'class' => $svcOptions->get( 'class' ),
447  ];
448  $this->svcOptions = $svcOptions;
449  }
450 
451  $this->mUrlProtocols = $urlProtocols ?? wfUrlProtocols();
452  $this->mExtLinkBracketedRegex = '/\[(((?i)' . $this->mUrlProtocols . ')' .
453  self::EXT_LINK_ADDR .
454  self::EXT_LINK_URL_CLASS . '*)\p{Zs}*([^\]\\x00-\\x08\\x0a-\\x1F\\x{FFFD}]*?)\]/Su';
455 
456  $this->magicWordFactory = $magicWordFactory ??
457  MediaWikiServices::getInstance()->getMagicWordFactory();
458 
459  $this->contLang = $contLang ?? MediaWikiServices::getInstance()->getContentLanguage();
460 
461  $this->factory = $factory ?? MediaWikiServices::getInstance()->getParserFactory();
462  $this->specialPageFactory = $spFactory ??
463  MediaWikiServices::getInstance()->getSpecialPageFactory();
464  $this->linkRendererFactory = $linkRendererFactory ??
465  MediaWikiServices::getInstance()->getLinkRendererFactory();
466  $this->nsInfo = $nsInfo ?? MediaWikiServices::getInstance()->getNamespaceInfo();
467  $this->logger = $logger ?: new NullLogger();
468  $this->badFileLookup = $badFileLookup ??
469  MediaWikiServices::getInstance()->getBadFileLookup();
470 
471  $this->languageConverterFactory = $languageConverterFactory ??
472  MediaWikiServices::getInstance()->getLanguageConverterFactory();
473 
474  $this->hookContainer = $hookContainer ??
475  MediaWikiServices::getInstance()->getHookContainer();
476  $this->hookRunner = new HookRunner( $this->hookContainer );
477 
478  // T250444: This will eventually be inlined here and the
479  // standalone method removed.
480  $this->firstCallInit();
481  }
482 
486  public function __destruct() {
487  if ( isset( $this->mLinkHolders ) ) {
488  // @phan-suppress-next-line PhanTypeObjectUnsetDeclaredProperty
489  unset( $this->mLinkHolders );
490  }
491  // @phan-suppress-next-line PhanTypeSuspiciousNonTraversableForeach
492  foreach ( $this as $name => $value ) {
493  unset( $this->$name );
494  }
495  }
496 
500  public function __clone() {
501  $this->mInParse = false;
502 
503  // T58226: When you create a reference "to" an object field, that
504  // makes the object field itself be a reference too (until the other
505  // reference goes out of scope). When cloning, any field that's a
506  // reference is copied as a reference in the new object. Both of these
507  // are defined PHP5 behaviors, as inconvenient as it is for us when old
508  // hooks from PHP4 days are passing fields by reference.
509  foreach ( [ 'mStripState', 'mVarCache' ] as $k ) {
510  // Make a non-reference copy of the field, then rebind the field to
511  // reference the new copy.
512  $tmp = $this->$k;
513  $this->$k =& $tmp;
514  unset( $tmp );
515  }
516 
517  $this->hookRunner->onParserCloned( $this );
518  }
519 
525  public function firstCallInit() {
526  if ( !$this->mFirstCall ) {
527  return;
528  }
529  $this->mFirstCall = false;
530 
532  CoreTagHooks::register( $this );
533  $this->initializeVariables();
534 
535  $this->hookRunner->onParserFirstCallInit( $this );
536  }
537 
543  public function clearState() {
544  $this->firstCallInit();
545  $this->resetOutput();
546  $this->mAutonumber = 0;
547  $this->mLinkHolders = new LinkHolderArray(
548  $this,
550  $this->getHookContainer()
551  );
552  $this->mLinkID = 0;
553  $this->mRevisionObject = $this->mRevisionTimestamp =
554  $this->mRevisionId = $this->mRevisionUser = $this->mRevisionSize = null;
555  $this->mRevisionRecordObject = null;
556  $this->mVarCache = [];
557  $this->mUser = null;
558  $this->mLangLinkLanguages = [];
559  $this->currentRevisionCache = null;
560 
561  $this->mStripState = new StripState( $this );
562 
563  # Clear these on every parse, T6549
564  $this->mTplRedirCache = $this->mTplDomCache = [];
565 
566  $this->mShowToc = true;
567  $this->mForceTocPosition = false;
568  $this->mIncludeSizes = [
569  'post-expand' => 0,
570  'arg' => 0,
571  ];
572  $this->mPPNodeCount = 0;
573  $this->mGeneratedPPNodeCount = 0;
574  $this->mHighestExpansionDepth = 0;
575  $this->mDefaultSort = false;
576  $this->mHeadings = [];
577  $this->mDoubleUnderscores = [];
578  $this->mExpensiveFunctionCount = 0;
579 
580  # Fix cloning
581  if ( isset( $this->mPreprocessor ) && $this->mPreprocessor->parser !== $this ) {
582  $this->mPreprocessor = null;
583  }
584 
585  $this->mProfiler = new SectionProfiler();
586 
587  $this->hookRunner->onParserClearState( $this );
588  }
589 
593  public function resetOutput() {
594  $this->mOutput = new ParserOutput;
595  $this->mOptions->registerWatcher( [ $this->mOutput, 'recordOption' ] );
596  }
597 
615  public function parse(
616  $text, Title $title, ParserOptions $options,
617  $linestart = true, $clearState = true, $revid = null
618  ) {
619  if ( $clearState ) {
620  // We use U+007F DELETE to construct strip markers, so we have to make
621  // sure that this character does not occur in the input text.
622  $text = strtr( $text, "\x7f", "?" );
623  $magicScopeVariable = $this->lock();
624  }
625  // Strip U+0000 NULL (T159174)
626  $text = str_replace( "\000", '', $text );
627 
628  $this->startParse( $title, $options, self::OT_HTML, $clearState );
629 
630  $this->currentRevisionCache = null;
631  $this->mInputSize = strlen( $text );
632  if ( $this->mOptions->getEnableLimitReport() ) {
633  $this->mOutput->resetParseStartTime();
634  }
635 
636  $oldRevisionId = $this->mRevisionId;
637  $oldRevisionObject = $this->mRevisionObject;
638  $oldRevisionRecordObject = $this->mRevisionRecordObject;
639  $oldRevisionTimestamp = $this->mRevisionTimestamp;
640  $oldRevisionUser = $this->mRevisionUser;
641  $oldRevisionSize = $this->mRevisionSize;
642  if ( $revid !== null ) {
643  $this->mRevisionId = $revid;
644  $this->mRevisionObject = null;
645  $this->mRevisionRecordObject = null;
646  $this->mRevisionTimestamp = null;
647  $this->mRevisionUser = null;
648  $this->mRevisionSize = null;
649  }
650 
651  $text = $this->internalParse( $text );
652  $this->hookRunner->onParserAfterParse( $this, $text, $this->mStripState );
653 
654  $text = $this->internalParseHalfParsed( $text, true, $linestart );
655 
663  if ( !( $options->getDisableTitleConversion()
664  || isset( $this->mDoubleUnderscores['nocontentconvert'] )
665  || isset( $this->mDoubleUnderscores['notitleconvert'] )
666  || $this->mOutput->getDisplayTitle() !== false )
667  ) {
668  $convruletitle = $this->getTargetLanguageConverter()->getConvRuleTitle();
669  if ( $convruletitle ) {
670  $this->mOutput->setTitleText( $convruletitle );
671  } else {
672  $titleText = $this->getTargetLanguageConverter()->convertTitle( $title );
673  $this->mOutput->setTitleText( $titleText );
674  }
675  }
676 
677  # Compute runtime adaptive expiry if set
678  $this->mOutput->finalizeAdaptiveCacheExpiry();
679 
680  # Warn if too many heavyweight parser functions were used
681  if ( $this->mExpensiveFunctionCount > $this->mOptions->getExpensiveParserFunctionLimit() ) {
682  $this->limitationWarn( 'expensive-parserfunction',
683  $this->mExpensiveFunctionCount,
684  $this->mOptions->getExpensiveParserFunctionLimit()
685  );
686  }
687 
688  # Information on limits, for the benefit of users who try to skirt them
689  if ( $this->mOptions->getEnableLimitReport() ) {
690  $text .= $this->makeLimitReport();
691  }
692 
693  # Wrap non-interface parser output in a <div> so it can be targeted
694  # with CSS (T37247)
695  $class = $this->mOptions->getWrapOutputClass();
696  if ( $class !== false && !$this->mOptions->getInterfaceMessage() ) {
697  $this->mOutput->addWrapperDivClass( $class );
698  }
699 
700  $this->mOutput->setText( $text );
701 
702  $this->mRevisionId = $oldRevisionId;
703  $this->mRevisionObject = $oldRevisionObject;
704  $this->mRevisionRecordObject = $oldRevisionRecordObject;
705  $this->mRevisionTimestamp = $oldRevisionTimestamp;
706  $this->mRevisionUser = $oldRevisionUser;
707  $this->mRevisionSize = $oldRevisionSize;
708  $this->mInputSize = false;
709  $this->currentRevisionCache = null;
710 
711  return $this->mOutput;
712  }
713 
720  protected function makeLimitReport() {
721  $maxIncludeSize = $this->mOptions->getMaxIncludeSize();
722 
723  $cpuTime = $this->mOutput->getTimeSinceStart( 'cpu' );
724  if ( $cpuTime !== null ) {
725  $this->mOutput->setLimitReportData( 'limitreport-cputime',
726  sprintf( "%.3f", $cpuTime )
727  );
728  }
729 
730  $wallTime = $this->mOutput->getTimeSinceStart( 'wall' );
731  $this->mOutput->setLimitReportData( 'limitreport-walltime',
732  sprintf( "%.3f", $wallTime )
733  );
734 
735  $this->mOutput->setLimitReportData( 'limitreport-ppvisitednodes',
736  [ $this->mPPNodeCount, $this->mOptions->getMaxPPNodeCount() ]
737  );
738  $this->mOutput->setLimitReportData( 'limitreport-postexpandincludesize',
739  [ $this->mIncludeSizes['post-expand'], $maxIncludeSize ]
740  );
741  $this->mOutput->setLimitReportData( 'limitreport-templateargumentsize',
742  [ $this->mIncludeSizes['arg'], $maxIncludeSize ]
743  );
744  $this->mOutput->setLimitReportData( 'limitreport-expansiondepth',
745  [ $this->mHighestExpansionDepth, $this->mOptions->getMaxPPExpandDepth() ]
746  );
747  $this->mOutput->setLimitReportData( 'limitreport-expensivefunctioncount',
748  [ $this->mExpensiveFunctionCount, $this->mOptions->getExpensiveParserFunctionLimit() ]
749  );
750 
751  foreach ( $this->mStripState->getLimitReport() as list( $key, $value ) ) {
752  $this->mOutput->setLimitReportData( $key, $value );
753  }
754 
755  $this->hookRunner->onParserLimitReportPrepare( $this, $this->mOutput );
756 
757  $limitReport = "NewPP limit report\n";
758  if ( $this->svcOptions->get( 'ShowHostnames' ) ) {
759  $limitReport .= 'Parsed by ' . wfHostname() . "\n";
760  }
761  $limitReport .= 'Cached time: ' . $this->mOutput->getCacheTime() . "\n";
762  $limitReport .= 'Cache expiry: ' . $this->mOutput->getCacheExpiry() . "\n";
763  $limitReport .= 'Dynamic content: ' .
764  ( $this->mOutput->hasDynamicContent() ? 'true' : 'false' ) .
765  "\n";
766  $limitReport .= 'Complications: [' . implode( ', ', $this->mOutput->getAllFlags() ) . "]\n";
767 
768  foreach ( $this->mOutput->getLimitReportData() as $key => $value ) {
769  if ( $this->hookRunner->onParserLimitReportFormat(
770  $key, $value, $limitReport, false, false )
771  ) {
772  $keyMsg = wfMessage( $key )->inLanguage( 'en' )->useDatabase( false );
773  $valueMsg = wfMessage( [ "$key-value-text", "$key-value" ] )
774  ->inLanguage( 'en' )->useDatabase( false );
775  if ( !$valueMsg->exists() ) {
776  $valueMsg = new RawMessage( '$1' );
777  }
778  if ( !$keyMsg->isDisabled() && !$valueMsg->isDisabled() ) {
779  $valueMsg->params( $value );
780  $limitReport .= "{$keyMsg->text()}: {$valueMsg->text()}\n";
781  }
782  }
783  }
784  // Since we're not really outputting HTML, decode the entities and
785  // then re-encode the things that need hiding inside HTML comments.
786  $limitReport = htmlspecialchars_decode( $limitReport );
787 
788  // Sanitize for comment. Note '‐' in the replacement is U+2010,
789  // which looks much like the problematic '-'.
790  $limitReport = str_replace( [ '-', '&' ], [ '‐', '&amp;' ], $limitReport );
791  $text = "\n<!-- \n$limitReport-->\n";
792 
793  // Add on template profiling data in human/machine readable way
794  $dataByFunc = $this->mProfiler->getFunctionStats();
795  uasort( $dataByFunc, function ( $a, $b ) {
796  return $b['real'] <=> $a['real']; // descending order
797  } );
798  $profileReport = [];
799  foreach ( array_slice( $dataByFunc, 0, 10 ) as $item ) {
800  $profileReport[] = sprintf( "%6.2f%% %8.3f %6d %s",
801  $item['%real'], $item['real'], $item['calls'],
802  htmlspecialchars( $item['name'] ) );
803  }
804  $text .= "<!--\nTransclusion expansion time report (%,ms,calls,template)\n";
805  $text .= implode( "\n", $profileReport ) . "\n-->\n";
806 
807  $this->mOutput->setLimitReportData( 'limitreport-timingprofile', $profileReport );
808 
809  // Add other cache related metadata
810  if ( $this->svcOptions->get( 'ShowHostnames' ) ) {
811  $this->mOutput->setLimitReportData( 'cachereport-origin', wfHostname() );
812  }
813  $this->mOutput->setLimitReportData( 'cachereport-timestamp',
814  $this->mOutput->getCacheTime() );
815  $this->mOutput->setLimitReportData( 'cachereport-ttl',
816  $this->mOutput->getCacheExpiry() );
817  $this->mOutput->setLimitReportData( 'cachereport-transientcontent',
818  $this->mOutput->hasDynamicContent() );
819 
820  return $text;
821  }
822 
847  public function recursiveTagParse( $text, $frame = false ) {
848  $text = $this->internalParse( $text, false, $frame );
849  return $text;
850  }
851 
871  public function recursiveTagParseFully( $text, $frame = false ) {
872  $text = $this->recursiveTagParse( $text, $frame );
873  $text = $this->internalParseHalfParsed( $text, false );
874  return $text;
875  }
876 
896  public function parseExtensionTagAsTopLevelDoc( $text ) {
897  $text = $this->recursiveTagParse( $text );
898  $this->hookRunner->onParserAfterParse( $this, $text, $this->mStripState );
899  $text = $this->internalParseHalfParsed( $text, true );
900  return $text;
901  }
902 
914  public function preprocess( $text, ?Title $title,
915  ParserOptions $options, $revid = null, $frame = false
916  ) {
917  $magicScopeVariable = $this->lock();
918  $this->startParse( $title, $options, self::OT_PREPROCESS, true );
919  if ( $revid !== null ) {
920  $this->mRevisionId = $revid;
921  }
922  $this->hookRunner->onParserBeforePreprocess( $this, $text, $this->mStripState );
923  $text = $this->replaceVariables( $text, $frame );
924  $text = $this->mStripState->unstripBoth( $text );
925  return $text;
926  }
927 
937  public function recursivePreprocess( $text, $frame = false ) {
938  $text = $this->replaceVariables( $text, $frame );
939  $text = $this->mStripState->unstripBoth( $text );
940  return $text;
941  }
942 
956  public function getPreloadText( $text, Title $title, ParserOptions $options, $params = [] ) {
957  $msg = new RawMessage( $text );
958  $text = $msg->params( $params )->plain();
959 
960  # Parser (re)initialisation
961  $magicScopeVariable = $this->lock();
962  $this->startParse( $title, $options, self::OT_PLAIN, true );
963 
965  $dom = $this->preprocessToDom( $text, Preprocessor::DOM_FOR_INCLUSION );
966  $text = $this->getPreprocessor()->newFrame()->expand( $dom, $flags );
967  $text = $this->mStripState->unstripBoth( $text );
968  return $text;
969  }
970 
977  public function setUser( ?User $user ) {
978  $this->mUser = $user;
979  }
980 
986  public function setTitle( Title $t = null ) {
987  if ( !$t ) {
988  $t = Title::makeTitle( NS_SPECIAL, 'Badtitle/Parser' );
989  }
990 
991  if ( $t->hasFragment() ) {
992  # Strip the fragment to avoid various odd effects
993  $this->mTitle = $t->createFragmentTarget( '' );
994  } else {
995  $this->mTitle = $t;
996  }
997  }
998 
1002  public function getTitle() : Title {
1003  return $this->mTitle;
1004  }
1005 
1013  public function Title( Title $x = null ) : ?Title {
1014  wfDeprecated( __METHOD__, '1.35' );
1015  return wfSetVar( $this->mTitle, $x );
1016  }
1017 
1023  public function getOutputType(): int {
1024  return $this->mOutputType;
1025  }
1026 
1031  public function setOutputType( $ot ): void {
1032  $this->mOutputType = $ot;
1033  # Shortcut alias
1034  $this->ot = [
1035  'html' => $ot == self::OT_HTML,
1036  'wiki' => $ot == self::OT_WIKI,
1037  'pre' => $ot == self::OT_PREPROCESS,
1038  'plain' => $ot == self::OT_PLAIN,
1039  ];
1040  }
1041 
1049  public function OutputType( $x = null ) {
1050  wfDeprecated( __METHOD__, '1.35' );
1051  return wfSetVar( $this->mOutputType, $x );
1052  }
1053 
1057  public function getOutput() {
1058  return $this->mOutput;
1059  }
1060 
1064  public function getOptions() {
1065  return $this->mOptions;
1066  }
1067 
1073  public function setOptions( ParserOptions $options ): void {
1074  $this->mOptions = $options;
1075  }
1076 
1084  public function Options( $x = null ) {
1085  wfDeprecated( __METHOD__, '1.35' );
1086  return wfSetVar( $this->mOptions, $x );
1087  }
1088 
1092  public function nextLinkID() {
1093  return $this->mLinkID++;
1094  }
1095 
1099  public function setLinkID( $id ) {
1100  $this->mLinkID = $id;
1101  }
1102 
1107  public function getFunctionLang() {
1108  return $this->getTargetLanguage();
1109  }
1110 
1119  public function getTargetLanguage() {
1120  $target = $this->mOptions->getTargetLanguage();
1121 
1122  if ( $target !== null ) {
1123  return $target;
1124  } elseif ( $this->mOptions->getInterfaceMessage() ) {
1125  return $this->mOptions->getUserLangObj();
1126  }
1127 
1128  return $this->getTitle()->getPageLanguage();
1129  }
1130 
1137  public function getUser() {
1138  if ( $this->mUser !== null ) {
1139  return $this->mUser;
1140  }
1141  return $this->mOptions->getUser();
1142  }
1143 
1149  public function getPreprocessor() {
1150  if ( !isset( $this->mPreprocessor ) ) {
1151  $this->mPreprocessor = new Preprocessor_Hash(
1152  $this,
1153  MediaWikiServices::getInstance()->getMainWANObjectCache(),
1154  [
1155  'cacheThreshold' => $this->svcOptions->get( 'PreprocessorCacheThreshold' ),
1156  'disableLangConversion' => $this->svcOptions->get( 'DisableLangConversion' )
1157  ]
1158  );
1159  }
1160 
1161  return $this->mPreprocessor;
1162  }
1163 
1170  public function getLinkRenderer() {
1171  // XXX We make the LinkRenderer with current options and then cache it forever
1172  if ( !$this->mLinkRenderer ) {
1173  $this->mLinkRenderer = $this->linkRendererFactory->create();
1174  $this->mLinkRenderer->setStubThreshold(
1175  $this->getOptions()->getStubThreshold()
1176  );
1177  }
1178 
1179  return $this->mLinkRenderer;
1180  }
1181 
1188  public function getMagicWordFactory() {
1189  return $this->magicWordFactory;
1190  }
1191 
1198  public function getContentLanguage() {
1199  return $this->contLang;
1200  }
1201 
1208  public function getBadFileLookup() {
1209  return $this->badFileLookup;
1210  }
1211 
1231  public static function extractTagsAndParams( array $elements, $text, &$matches ) {
1232  static $n = 1;
1233  $stripped = '';
1234  $matches = [];
1235 
1236  $taglist = implode( '|', $elements );
1237  $start = "/<($taglist)(\\s+[^>]*?|\\s*?)(\/?>)|<(!--)/i";
1238 
1239  while ( $text != '' ) {
1240  $p = preg_split( $start, $text, 2, PREG_SPLIT_DELIM_CAPTURE );
1241  $stripped .= $p[0];
1242  if ( count( $p ) < 5 ) {
1243  break;
1244  }
1245  if ( count( $p ) > 5 ) {
1246  # comment
1247  $element = $p[4];
1248  $attributes = '';
1249  $close = '';
1250  $inside = $p[5];
1251  } else {
1252  # tag
1253  list( , $element, $attributes, $close, $inside ) = $p;
1254  }
1255 
1256  $marker = self::MARKER_PREFIX . "-$element-" . sprintf( '%08X', $n++ ) . self::MARKER_SUFFIX;
1257  $stripped .= $marker;
1258 
1259  if ( $close === '/>' ) {
1260  # Empty element tag, <tag />
1261  $content = null;
1262  $text = $inside;
1263  $tail = null;
1264  } else {
1265  if ( $element === '!--' ) {
1266  $end = '/(-->)/';
1267  } else {
1268  $end = "/(<\\/$element\\s*>)/i";
1269  }
1270  $q = preg_split( $end, $inside, 2, PREG_SPLIT_DELIM_CAPTURE );
1271  $content = $q[0];
1272  if ( count( $q ) < 3 ) {
1273  # No end tag -- let it run out to the end of the text.
1274  $tail = '';
1275  $text = '';
1276  } else {
1277  list( , $tail, $text ) = $q;
1278  }
1279  }
1280 
1281  $matches[$marker] = [ $element,
1282  $content,
1283  Sanitizer::decodeTagAttributes( $attributes ),
1284  "<$element$attributes$close$content$tail" ];
1285  }
1286  return $stripped;
1287  }
1288 
1294  public function getStripList() {
1295  return $this->mStripList;
1296  }
1297 
1301  public function getStripState() {
1302  return $this->mStripState;
1303  }
1304 
1314  public function insertStripItem( $text ) {
1315  $marker = self::MARKER_PREFIX . "-item-{$this->mMarkerIndex}-" . self::MARKER_SUFFIX;
1316  $this->mMarkerIndex++;
1317  $this->mStripState->addGeneral( $marker, $text );
1318  return $marker;
1319  }
1320 
1327  private function handleTables( $text ) {
1328  $lines = StringUtils::explode( "\n", $text );
1329  $out = '';
1330  $td_history = []; # Is currently a td tag open?
1331  $last_tag_history = []; # Save history of last lag activated (td, th or caption)
1332  $tr_history = []; # Is currently a tr tag open?
1333  $tr_attributes = []; # history of tr attributes
1334  $has_opened_tr = []; # Did this table open a <tr> element?
1335  $indent_level = 0; # indent level of the table
1336 
1337  foreach ( $lines as $outLine ) {
1338  $line = trim( $outLine );
1339 
1340  if ( $line === '' ) { # empty line, go to next line
1341  $out .= $outLine . "\n";
1342  continue;
1343  }
1344 
1345  $first_character = $line[0];
1346  $first_two = substr( $line, 0, 2 );
1347  $matches = [];
1348 
1349  if ( preg_match( '/^(:*)\s*\{\|(.*)$/', $line, $matches ) ) {
1350  # First check if we are starting a new table
1351  $indent_level = strlen( $matches[1] );
1352 
1353  $attributes = $this->mStripState->unstripBoth( $matches[2] );
1354  $attributes = Sanitizer::fixTagAttributes( $attributes, 'table' );
1355 
1356  $outLine = str_repeat( '<dl><dd>', $indent_level ) . "<table{$attributes}>";
1357  array_push( $td_history, false );
1358  array_push( $last_tag_history, '' );
1359  array_push( $tr_history, false );
1360  array_push( $tr_attributes, '' );
1361  array_push( $has_opened_tr, false );
1362  } elseif ( count( $td_history ) == 0 ) {
1363  # Don't do any of the following
1364  $out .= $outLine . "\n";
1365  continue;
1366  } elseif ( $first_two === '|}' ) {
1367  # We are ending a table
1368  $line = '</table>' . substr( $line, 2 );
1369  $last_tag = array_pop( $last_tag_history );
1370 
1371  if ( !array_pop( $has_opened_tr ) ) {
1372  $line = "<tr><td></td></tr>{$line}";
1373  }
1374 
1375  if ( array_pop( $tr_history ) ) {
1376  $line = "</tr>{$line}";
1377  }
1378 
1379  if ( array_pop( $td_history ) ) {
1380  $line = "</{$last_tag}>{$line}";
1381  }
1382  array_pop( $tr_attributes );
1383  if ( $indent_level > 0 ) {
1384  $outLine = rtrim( $line ) . str_repeat( '</dd></dl>', $indent_level );
1385  } else {
1386  $outLine = $line;
1387  }
1388  } elseif ( $first_two === '|-' ) {
1389  # Now we have a table row
1390  $line = preg_replace( '#^\|-+#', '', $line );
1391 
1392  # Whats after the tag is now only attributes
1393  $attributes = $this->mStripState->unstripBoth( $line );
1394  $attributes = Sanitizer::fixTagAttributes( $attributes, 'tr' );
1395  array_pop( $tr_attributes );
1396  array_push( $tr_attributes, $attributes );
1397 
1398  $line = '';
1399  $last_tag = array_pop( $last_tag_history );
1400  array_pop( $has_opened_tr );
1401  array_push( $has_opened_tr, true );
1402 
1403  if ( array_pop( $tr_history ) ) {
1404  $line = '</tr>';
1405  }
1406 
1407  if ( array_pop( $td_history ) ) {
1408  $line = "</{$last_tag}>{$line}";
1409  }
1410 
1411  $outLine = $line;
1412  array_push( $tr_history, false );
1413  array_push( $td_history, false );
1414  array_push( $last_tag_history, '' );
1415  } elseif ( $first_character === '|'
1416  || $first_character === '!'
1417  || $first_two === '|+'
1418  ) {
1419  # This might be cell elements, td, th or captions
1420  if ( $first_two === '|+' ) {
1421  $first_character = '+';
1422  $line = substr( $line, 2 );
1423  } else {
1424  $line = substr( $line, 1 );
1425  }
1426 
1427  // Implies both are valid for table headings.
1428  if ( $first_character === '!' ) {
1429  $line = StringUtils::replaceMarkup( '!!', '||', $line );
1430  }
1431 
1432  # Split up multiple cells on the same line.
1433  # FIXME : This can result in improper nesting of tags processed
1434  # by earlier parser steps.
1435  $cells = explode( '||', $line );
1436 
1437  $outLine = '';
1438 
1439  # Loop through each table cell
1440  foreach ( $cells as $cell ) {
1441  $previous = '';
1442  if ( $first_character !== '+' ) {
1443  $tr_after = array_pop( $tr_attributes );
1444  if ( !array_pop( $tr_history ) ) {
1445  $previous = "<tr{$tr_after}>\n";
1446  }
1447  array_push( $tr_history, true );
1448  array_push( $tr_attributes, '' );
1449  array_pop( $has_opened_tr );
1450  array_push( $has_opened_tr, true );
1451  }
1452 
1453  $last_tag = array_pop( $last_tag_history );
1454 
1455  if ( array_pop( $td_history ) ) {
1456  $previous = "</{$last_tag}>\n{$previous}";
1457  }
1458 
1459  if ( $first_character === '|' ) {
1460  $last_tag = 'td';
1461  } elseif ( $first_character === '!' ) {
1462  $last_tag = 'th';
1463  } elseif ( $first_character === '+' ) {
1464  $last_tag = 'caption';
1465  } else {
1466  $last_tag = '';
1467  }
1468 
1469  array_push( $last_tag_history, $last_tag );
1470 
1471  # A cell could contain both parameters and data
1472  $cell_data = explode( '|', $cell, 2 );
1473 
1474  # T2553: Note that a '|' inside an invalid link should not
1475  # be mistaken as delimiting cell parameters
1476  # Bug T153140: Neither should language converter markup.
1477  if ( preg_match( '/\[\[|-\{/', $cell_data[0] ) === 1 ) {
1478  $cell = "{$previous}<{$last_tag}>" . trim( $cell );
1479  } elseif ( count( $cell_data ) == 1 ) {
1480  // Whitespace in cells is trimmed
1481  $cell = "{$previous}<{$last_tag}>" . trim( $cell_data[0] );
1482  } else {
1483  $attributes = $this->mStripState->unstripBoth( $cell_data[0] );
1484  $attributes = Sanitizer::fixTagAttributes( $attributes, $last_tag );
1485  // Whitespace in cells is trimmed
1486  $cell = "{$previous}<{$last_tag}{$attributes}>" . trim( $cell_data[1] );
1487  }
1488 
1489  $outLine .= $cell;
1490  array_push( $td_history, true );
1491  }
1492  }
1493  $out .= $outLine . "\n";
1494  }
1495 
1496  # Closing open td, tr && table
1497  while ( count( $td_history ) > 0 ) {
1498  if ( array_pop( $td_history ) ) {
1499  $out .= "</td>\n";
1500  }
1501  if ( array_pop( $tr_history ) ) {
1502  $out .= "</tr>\n";
1503  }
1504  if ( !array_pop( $has_opened_tr ) ) {
1505  $out .= "<tr><td></td></tr>\n";
1506  }
1507 
1508  $out .= "</table>\n";
1509  }
1510 
1511  # Remove trailing line-ending (b/c)
1512  if ( substr( $out, -1 ) === "\n" ) {
1513  $out = substr( $out, 0, -1 );
1514  }
1515 
1516  # special case: don't return empty table
1517  if ( $out === "<table>\n<tr><td></td></tr>\n</table>" ) {
1518  $out = '';
1519  }
1520 
1521  return $out;
1522  }
1523 
1537  public function internalParse( $text, $isMain = true, $frame = false ) {
1538  $origText = $text;
1539 
1540  # Hook to suspend the parser in this state
1541  if ( !$this->hookRunner->onParserBeforeInternalParse( $this, $text, $this->mStripState ) ) {
1542  return $text;
1543  }
1544 
1545  # if $frame is provided, then use $frame for replacing any variables
1546  if ( $frame ) {
1547  # use frame depth to infer how include/noinclude tags should be handled
1548  # depth=0 means this is the top-level document; otherwise it's an included document
1549  if ( !$frame->depth ) {
1550  $flag = 0;
1551  } else {
1553  }
1554  $dom = $this->preprocessToDom( $text, $flag );
1555  $text = $frame->expand( $dom );
1556  } else {
1557  # if $frame is not provided, then use old-style replaceVariables
1558  $text = $this->replaceVariables( $text );
1559  }
1560 
1561  $this->hookRunner->onInternalParseBeforeSanitize( $this, $text, $this->mStripState );
1562  $text = Sanitizer::removeHTMLtags(
1563  $text,
1564  // Callback from the Sanitizer for expanding items found in
1565  // HTML attribute values, so they can be safely tested and escaped.
1566  function ( &$text, $frame = false ) {
1567  $text = $this->replaceVariables( $text, $frame );
1568  $text = $this->mStripState->unstripBoth( $text );
1569  },
1570  false,
1571  [],
1572  []
1573  );
1574  $this->hookRunner->onInternalParseBeforeLinks( $this, $text, $this->mStripState );
1575 
1576  # Tables need to come after variable replacement for things to work
1577  # properly; putting them before other transformations should keep
1578  # exciting things like link expansions from showing up in surprising
1579  # places.
1580  $text = $this->handleTables( $text );
1581 
1582  $text = preg_replace( '/(^|\n)-----*/', '\\1<hr />', $text );
1583 
1584  $text = $this->handleDoubleUnderscore( $text );
1585 
1586  $text = $this->handleHeadings( $text );
1587  $text = $this->handleInternalLinks( $text );
1588  $text = $this->handleAllQuotes( $text );
1589  $text = $this->handleExternalLinks( $text );
1590 
1591  # handleInternalLinks may sometimes leave behind
1592  # absolute URLs, which have to be masked to hide them from handleExternalLinks
1593  $text = str_replace( self::MARKER_PREFIX . 'NOPARSE', '', $text );
1594 
1595  $text = $this->handleMagicLinks( $text );
1596  $text = $this->finalizeHeadings( $text, $origText, $isMain );
1597 
1598  return $text;
1599  }
1600 
1607  return $this->languageConverterFactory->getLanguageConverter(
1608  $this->getTargetLanguage()
1609  );
1610  }
1611 
1618  return $this->languageConverterFactory->getLanguageConverter(
1619  $this->getContentLanguage()
1620  );
1621  }
1622 
1630  protected function getHookContainer() {
1631  return $this->hookContainer;
1632  }
1633 
1642  protected function getHookRunner() {
1643  return $this->hookRunner;
1644  }
1645 
1655  private function internalParseHalfParsed( $text, $isMain = true, $linestart = true ) {
1656  $text = $this->mStripState->unstripGeneral( $text );
1657 
1658  $text = BlockLevelPass::doBlockLevels( $text, $linestart );
1659 
1660  $this->replaceLinkHoldersPrivate( $text );
1661 
1669  if ( !( $this->mOptions->getDisableContentConversion()
1670  || isset( $this->mDoubleUnderscores['nocontentconvert'] ) )
1671  && !$this->mOptions->getInterfaceMessage()
1672  ) {
1673  # The position of the convert() call should not be changed. it
1674  # assumes that the links are all replaced and the only thing left
1675  # is the <nowiki> mark.
1676  $text = $this->getTargetLanguageConverter()->convert( $text );
1677  }
1678 
1679  $text = $this->mStripState->unstripNoWiki( $text );
1680 
1681  $text = $this->mStripState->unstripGeneral( $text );
1682 
1683  # Clean up special characters, only run once, after doBlockLevels
1684  $text = Sanitizer::armorFrenchSpaces( $text );
1685 
1686  $text = Sanitizer::normalizeCharReferences( $text );
1687 
1688  $text = MWTidy::tidy( $text );
1689 
1690  if ( $isMain ) {
1691  $this->hookRunner->onParserAfterTidy( $this, $text );
1692  }
1693 
1694  return $text;
1695  }
1696 
1707  private function handleMagicLinks( $text ) {
1708  $prots = wfUrlProtocolsWithoutProtRel();
1709  $urlChar = self::EXT_LINK_URL_CLASS;
1710  $addr = self::EXT_LINK_ADDR;
1711  $space = self::SPACE_NOT_NL; # non-newline space
1712  $spdash = "(?:-|$space)"; # a dash or a non-newline space
1713  $spaces = "$space++"; # possessive match of 1 or more spaces
1714  $text = preg_replace_callback(
1715  '!(?: # Start cases
1716  (<a[ \t\r\n>].*?</a>) | # m[1]: Skip link text
1717  (<.*?>) | # m[2]: Skip stuff inside HTML elements' . "
1718  (\b # m[3]: Free external links
1719  (?i:$prots)
1720  ($addr$urlChar*) # m[4]: Post-protocol path
1721  ) |
1722  \b(?:RFC|PMID) $spaces # m[5]: RFC or PMID, capture number
1723  ([0-9]+)\b |
1724  \bISBN $spaces ( # m[6]: ISBN, capture number
1725  (?: 97[89] $spdash? )? # optional 13-digit ISBN prefix
1726  (?: [0-9] $spdash? ){9} # 9 digits with opt. delimiters
1727  [0-9Xx] # check digit
1728  )\b
1729  )!xu", [ $this, 'magicLinkCallback' ], $text );
1730  return $text;
1731  }
1732 
1738  private function magicLinkCallback( array $m ) {
1739  if ( isset( $m[1] ) && $m[1] !== '' ) {
1740  # Skip anchor
1741  return $m[0];
1742  } elseif ( isset( $m[2] ) && $m[2] !== '' ) {
1743  # Skip HTML element
1744  return $m[0];
1745  } elseif ( isset( $m[3] ) && $m[3] !== '' ) {
1746  # Free external link
1747  return $this->makeFreeExternalLink( $m[0], strlen( $m[4] ) );
1748  } elseif ( isset( $m[5] ) && $m[5] !== '' ) {
1749  # RFC or PMID
1750  if ( substr( $m[0], 0, 3 ) === 'RFC' ) {
1751  if ( !$this->mOptions->getMagicRFCLinks() ) {
1752  return $m[0];
1753  }
1754  $keyword = 'RFC';
1755  $urlmsg = 'rfcurl';
1756  $cssClass = 'mw-magiclink-rfc';
1757  $trackingCat = 'magiclink-tracking-rfc';
1758  $id = $m[5];
1759  } elseif ( substr( $m[0], 0, 4 ) === 'PMID' ) {
1760  if ( !$this->mOptions->getMagicPMIDLinks() ) {
1761  return $m[0];
1762  }
1763  $keyword = 'PMID';
1764  $urlmsg = 'pubmedurl';
1765  $cssClass = 'mw-magiclink-pmid';
1766  $trackingCat = 'magiclink-tracking-pmid';
1767  $id = $m[5];
1768  } else {
1769  throw new MWException( __METHOD__ . ': unrecognised match type "' .
1770  substr( $m[0], 0, 20 ) . '"' );
1771  }
1772  $url = wfMessage( $urlmsg, $id )->inContentLanguage()->text();
1773  $this->addTrackingCategory( $trackingCat );
1774  return Linker::makeExternalLink(
1775  $url,
1776  "{$keyword} {$id}",
1777  true,
1778  $cssClass,
1779  [],
1780  $this->getTitle()
1781  );
1782  } elseif ( isset( $m[6] ) && $m[6] !== ''
1783  && $this->mOptions->getMagicISBNLinks()
1784  ) {
1785  # ISBN
1786  $isbn = $m[6];
1787  $space = self::SPACE_NOT_NL; # non-newline space
1788  $isbn = preg_replace( "/$space/", ' ', $isbn );
1789  $num = strtr( $isbn, [
1790  '-' => '',
1791  ' ' => '',
1792  'x' => 'X',
1793  ] );
1794  $this->addTrackingCategory( 'magiclink-tracking-isbn' );
1795  return $this->getLinkRenderer()->makeKnownLink(
1796  SpecialPage::getTitleFor( 'Booksources', $num ),
1797  "ISBN $isbn",
1798  [
1799  'class' => 'internal mw-magiclink-isbn',
1800  'title' => false // suppress title attribute
1801  ]
1802  );
1803  } else {
1804  return $m[0];
1805  }
1806  }
1807 
1817  private function makeFreeExternalLink( $url, $numPostProto ) {
1818  $trail = '';
1819 
1820  # The characters '<' and '>' (which were escaped by
1821  # removeHTMLtags()) should not be included in
1822  # URLs, per RFC 2396.
1823  # Make &nbsp; terminate a URL as well (bug T84937)
1824  $m2 = [];
1825  if ( preg_match(
1826  '/&(lt|gt|nbsp|#x0*(3[CcEe]|[Aa]0)|#0*(60|62|160));/',
1827  $url,
1828  $m2,
1829  PREG_OFFSET_CAPTURE
1830  ) ) {
1831  $trail = substr( $url, $m2[0][1] ) . $trail;
1832  $url = substr( $url, 0, $m2[0][1] );
1833  }
1834 
1835  # Move trailing punctuation to $trail
1836  $sep = ',;\.:!?';
1837  # If there is no left bracket, then consider right brackets fair game too
1838  if ( strpos( $url, '(' ) === false ) {
1839  $sep .= ')';
1840  }
1841 
1842  $urlRev = strrev( $url );
1843  $numSepChars = strspn( $urlRev, $sep );
1844  # Don't break a trailing HTML entity by moving the ; into $trail
1845  # This is in hot code, so use substr_compare to avoid having to
1846  # create a new string object for the comparison
1847  if ( $numSepChars && substr_compare( $url, ";", -$numSepChars, 1 ) === 0 ) {
1848  # more optimization: instead of running preg_match with a $
1849  # anchor, which can be slow, do the match on the reversed
1850  # string starting at the desired offset.
1851  # un-reversed regexp is: /&([a-z]+|#x[\da-f]+|#\d+)$/i
1852  if ( preg_match( '/\G([a-z]+|[\da-f]+x#|\d+#)&/i', $urlRev, $m2, 0, $numSepChars ) ) {
1853  $numSepChars--;
1854  }
1855  }
1856  if ( $numSepChars ) {
1857  $trail = substr( $url, -$numSepChars ) . $trail;
1858  $url = substr( $url, 0, -$numSepChars );
1859  }
1860 
1861  # Verify that we still have a real URL after trail removal, and
1862  # not just lone protocol
1863  if ( strlen( $trail ) >= $numPostProto ) {
1864  return $url . $trail;
1865  }
1866 
1867  $url = Sanitizer::cleanUrl( $url );
1868 
1869  # Is this an external image?
1870  $text = $this->maybeMakeExternalImage( $url );
1871  if ( $text === false ) {
1872  # Not an image, make a link
1873  $text = Linker::makeExternalLink( $url,
1874  $this->getTargetLanguageConverter()->markNoConversion( $url ),
1875  true, 'free',
1876  $this->getExternalLinkAttribs( $url ), $this->getTitle() );
1877  # Register it in the output object...
1878  $this->mOutput->addExternalLink( $url );
1879  }
1880  return $text . $trail;
1881  }
1882 
1889  private function handleHeadings( $text ) {
1890  for ( $i = 6; $i >= 1; --$i ) {
1891  $h = str_repeat( '=', $i );
1892  // Trim non-newline whitespace from headings
1893  // Using \s* will break for: "==\n===\n" and parse as <h2>=</h2>
1894  $text = preg_replace( "/^(?:$h)[ \\t]*(.+?)[ \\t]*(?:$h)\\s*$/m", "<h$i>\\1</h$i>", $text );
1895  }
1896  return $text;
1897  }
1898 
1906  private function handleAllQuotes( $text ) {
1907  $outtext = '';
1908  $lines = StringUtils::explode( "\n", $text );
1909  foreach ( $lines as $line ) {
1910  $outtext .= $this->doQuotes( $line ) . "\n";
1911  }
1912  $outtext = substr( $outtext, 0, -1 );
1913  return $outtext;
1914  }
1915 
1924  public function doQuotes( $text ) {
1925  $arr = preg_split( "/(''+)/", $text, -1, PREG_SPLIT_DELIM_CAPTURE );
1926  $countarr = count( $arr );
1927  if ( $countarr == 1 ) {
1928  return $text;
1929  }
1930 
1931  // First, do some preliminary work. This may shift some apostrophes from
1932  // being mark-up to being text. It also counts the number of occurrences
1933  // of bold and italics mark-ups.
1934  $numbold = 0;
1935  $numitalics = 0;
1936  for ( $i = 1; $i < $countarr; $i += 2 ) {
1937  $thislen = strlen( $arr[$i] );
1938  // If there are ever four apostrophes, assume the first is supposed to
1939  // be text, and the remaining three constitute mark-up for bold text.
1940  // (T15227: ''''foo'''' turns into ' ''' foo ' ''')
1941  if ( $thislen == 4 ) {
1942  $arr[$i - 1] .= "'";
1943  $arr[$i] = "'''";
1944  $thislen = 3;
1945  } elseif ( $thislen > 5 ) {
1946  // If there are more than 5 apostrophes in a row, assume they're all
1947  // text except for the last 5.
1948  // (T15227: ''''''foo'''''' turns into ' ''''' foo ' ''''')
1949  $arr[$i - 1] .= str_repeat( "'", $thislen - 5 );
1950  $arr[$i] = "'''''";
1951  $thislen = 5;
1952  }
1953  // Count the number of occurrences of bold and italics mark-ups.
1954  if ( $thislen == 2 ) {
1955  $numitalics++;
1956  } elseif ( $thislen == 3 ) {
1957  $numbold++;
1958  } elseif ( $thislen == 5 ) {
1959  $numitalics++;
1960  $numbold++;
1961  }
1962  }
1963 
1964  // If there is an odd number of both bold and italics, it is likely
1965  // that one of the bold ones was meant to be an apostrophe followed
1966  // by italics. Which one we cannot know for certain, but it is more
1967  // likely to be one that has a single-letter word before it.
1968  if ( ( $numbold % 2 == 1 ) && ( $numitalics % 2 == 1 ) ) {
1969  $firstsingleletterword = -1;
1970  $firstmultiletterword = -1;
1971  $firstspace = -1;
1972  for ( $i = 1; $i < $countarr; $i += 2 ) {
1973  if ( strlen( $arr[$i] ) == 3 ) {
1974  $x1 = substr( $arr[$i - 1], -1 );
1975  $x2 = substr( $arr[$i - 1], -2, 1 );
1976  if ( $x1 === ' ' ) {
1977  if ( $firstspace == -1 ) {
1978  $firstspace = $i;
1979  }
1980  } elseif ( $x2 === ' ' ) {
1981  $firstsingleletterword = $i;
1982  // if $firstsingleletterword is set, we don't
1983  // look at the other options, so we can bail early.
1984  break;
1985  } elseif ( $firstmultiletterword == -1 ) {
1986  $firstmultiletterword = $i;
1987  }
1988  }
1989  }
1990 
1991  // If there is a single-letter word, use it!
1992  if ( $firstsingleletterword > -1 ) {
1993  $arr[$firstsingleletterword] = "''";
1994  $arr[$firstsingleletterword - 1] .= "'";
1995  } elseif ( $firstmultiletterword > -1 ) {
1996  // If not, but there's a multi-letter word, use that one.
1997  $arr[$firstmultiletterword] = "''";
1998  $arr[$firstmultiletterword - 1] .= "'";
1999  } elseif ( $firstspace > -1 ) {
2000  // ... otherwise use the first one that has neither.
2001  // (notice that it is possible for all three to be -1 if, for example,
2002  // there is only one pentuple-apostrophe in the line)
2003  $arr[$firstspace] = "''";
2004  $arr[$firstspace - 1] .= "'";
2005  }
2006  }
2007 
2008  // Now let's actually convert our apostrophic mush to HTML!
2009  $output = '';
2010  $buffer = '';
2011  $state = '';
2012  $i = 0;
2013  foreach ( $arr as $r ) {
2014  if ( ( $i % 2 ) == 0 ) {
2015  if ( $state === 'both' ) {
2016  $buffer .= $r;
2017  } else {
2018  $output .= $r;
2019  }
2020  } else {
2021  $thislen = strlen( $r );
2022  if ( $thislen == 2 ) {
2023  if ( $state === 'i' ) {
2024  $output .= '</i>';
2025  $state = '';
2026  } elseif ( $state === 'bi' ) {
2027  $output .= '</i>';
2028  $state = 'b';
2029  } elseif ( $state === 'ib' ) {
2030  $output .= '</b></i><b>';
2031  $state = 'b';
2032  } elseif ( $state === 'both' ) {
2033  $output .= '<b><i>' . $buffer . '</i>';
2034  $state = 'b';
2035  } else { // $state can be 'b' or ''
2036  $output .= '<i>';
2037  $state .= 'i';
2038  }
2039  } elseif ( $thislen == 3 ) {
2040  if ( $state === 'b' ) {
2041  $output .= '</b>';
2042  $state = '';
2043  } elseif ( $state === 'bi' ) {
2044  $output .= '</i></b><i>';
2045  $state = 'i';
2046  } elseif ( $state === 'ib' ) {
2047  $output .= '</b>';
2048  $state = 'i';
2049  } elseif ( $state === 'both' ) {
2050  $output .= '<i><b>' . $buffer . '</b>';
2051  $state = 'i';
2052  } else { // $state can be 'i' or ''
2053  $output .= '<b>';
2054  $state .= 'b';
2055  }
2056  } elseif ( $thislen == 5 ) {
2057  if ( $state === 'b' ) {
2058  $output .= '</b><i>';
2059  $state = 'i';
2060  } elseif ( $state === 'i' ) {
2061  $output .= '</i><b>';
2062  $state = 'b';
2063  } elseif ( $state === 'bi' ) {
2064  $output .= '</i></b>';
2065  $state = '';
2066  } elseif ( $state === 'ib' ) {
2067  $output .= '</b></i>';
2068  $state = '';
2069  } elseif ( $state === 'both' ) {
2070  $output .= '<i><b>' . $buffer . '</b></i>';
2071  $state = '';
2072  } else { // ($state == '')
2073  $buffer = '';
2074  $state = 'both';
2075  }
2076  }
2077  }
2078  $i++;
2079  }
2080  // Now close all remaining tags. Notice that the order is important.
2081  if ( $state === 'b' || $state === 'ib' ) {
2082  $output .= '</b>';
2083  }
2084  if ( $state === 'i' || $state === 'bi' || $state === 'ib' ) {
2085  $output .= '</i>';
2086  }
2087  if ( $state === 'bi' ) {
2088  $output .= '</b>';
2089  }
2090  // There might be lonely ''''', so make sure we have a buffer
2091  if ( $state === 'both' && $buffer ) {
2092  $output .= '<b><i>' . $buffer . '</i></b>';
2093  }
2094  return $output;
2095  }
2096 
2107  private function handleExternalLinks( $text ) {
2108  $bits = preg_split( $this->mExtLinkBracketedRegex, $text, -1, PREG_SPLIT_DELIM_CAPTURE );
2109  // @phan-suppress-next-line PhanTypeComparisonFromArray See phan issue #3161
2110  if ( $bits === false ) {
2111  throw new MWException( "PCRE needs to be compiled with "
2112  . "--enable-unicode-properties in order for MediaWiki to function" );
2113  }
2114  $s = array_shift( $bits );
2115 
2116  $i = 0;
2117  while ( $i < count( $bits ) ) {
2118  $url = $bits[$i++];
2119  $i++; // protocol
2120  $text = $bits[$i++];
2121  $trail = $bits[$i++];
2122 
2123  # The characters '<' and '>' (which were escaped by
2124  # removeHTMLtags()) should not be included in
2125  # URLs, per RFC 2396.
2126  $m2 = [];
2127  if ( preg_match( '/&(lt|gt);/', $url, $m2, PREG_OFFSET_CAPTURE ) ) {
2128  $text = substr( $url, $m2[0][1] ) . ' ' . $text;
2129  $url = substr( $url, 0, $m2[0][1] );
2130  }
2131 
2132  # If the link text is an image URL, replace it with an <img> tag
2133  # This happened by accident in the original parser, but some people used it extensively
2134  $img = $this->maybeMakeExternalImage( $text );
2135  if ( $img !== false ) {
2136  $text = $img;
2137  }
2138 
2139  $dtrail = '';
2140 
2141  # Set linktype for CSS
2142  $linktype = 'text';
2143 
2144  # No link text, e.g. [http://domain.tld/some.link]
2145  if ( $text == '' ) {
2146  # Autonumber
2147  $langObj = $this->getTargetLanguage();
2148  $text = '[' . $langObj->formatNum( ++$this->mAutonumber ) . ']';
2149  $linktype = 'autonumber';
2150  } else {
2151  # Have link text, e.g. [http://domain.tld/some.link text]s
2152  # Check for trail
2153  list( $dtrail, $trail ) = Linker::splitTrail( $trail );
2154  }
2155 
2156  // Excluding protocol-relative URLs may avoid many false positives.
2157  if ( preg_match( '/^(?:' . wfUrlProtocolsWithoutProtRel() . ')/', $text ) ) {
2158  $text = $this->getTargetLanguageConverter()->markNoConversion( $text );
2159  }
2160 
2161  $url = Sanitizer::cleanUrl( $url );
2162 
2163  # Use the encoded URL
2164  # This means that users can paste URLs directly into the text
2165  # Funny characters like ö aren't valid in URLs anyway
2166  # This was changed in August 2004
2167  // @phan-suppress-next-line SecurityCheck-XSS using false for escape is valid here
2168  $s .= Linker::makeExternalLink( $url, $text, false, $linktype,
2169  $this->getExternalLinkAttribs( $url ), $this->getTitle() ) . $dtrail . $trail;
2170 
2171  # Register link in the output object.
2172  $this->mOutput->addExternalLink( $url );
2173  }
2174 
2175  return $s;
2176  }
2177 
2188  public static function getExternalLinkRel( $url = false, LinkTarget $title = null ) {
2190  $ns = $title ? $title->getNamespace() : false;
2191  if ( $wgNoFollowLinks && !in_array( $ns, $wgNoFollowNsExceptions )
2193  ) {
2194  return 'nofollow';
2195  }
2196  return null;
2197  }
2198 
2210  public function getExternalLinkAttribs( $url ) {
2211  $attribs = [];
2212  $rel = self::getExternalLinkRel( $url, $this->getTitle() );
2213 
2214  $target = $this->mOptions->getExternalLinkTarget();
2215  if ( $target ) {
2216  $attribs['target'] = $target;
2217  if ( !in_array( $target, [ '_self', '_parent', '_top' ] ) ) {
2218  // T133507. New windows can navigate parent cross-origin.
2219  // Including noreferrer due to lacking browser
2220  // support of noopener. Eventually noreferrer should be removed.
2221  if ( $rel !== '' ) {
2222  $rel .= ' ';
2223  }
2224  $rel .= 'noreferrer noopener';
2225  }
2226  }
2227  $attribs['rel'] = $rel;
2228  return $attribs;
2229  }
2230 
2241  public static function normalizeLinkUrl( $url ) {
2242  # Test for RFC 3986 IPv6 syntax
2243  $scheme = '[a-z][a-z0-9+.-]*:';
2244  $userinfo = '(?:[a-z0-9\-._~!$&\'()*+,;=:]|%[0-9a-f]{2})*';
2245  $ipv6Host = '\\[((?:[0-9a-f:]|%3[0-A]|%[46][1-6])+)\\]';
2246  if ( preg_match( "<^(?:{$scheme})?//(?:{$userinfo}@)?{$ipv6Host}(?:[:/?#].*|)$>i", $url, $m ) &&
2247  IPUtils::isValid( rawurldecode( $m[1] ) )
2248  ) {
2249  $isIPv6 = rawurldecode( $m[1] );
2250  } else {
2251  $isIPv6 = false;
2252  }
2253 
2254  # Make sure unsafe characters are encoded
2255  $url = preg_replace_callback( '/[\x00-\x20"<>\[\\\\\]^`{|}\x7F-\xFF]/',
2256  function ( $m ) {
2257  return rawurlencode( $m[0] );
2258  },
2259  $url
2260  );
2261 
2262  $ret = '';
2263  $end = strlen( $url );
2264 
2265  # Fragment part - 'fragment'
2266  $start = strpos( $url, '#' );
2267  if ( $start !== false && $start < $end ) {
2269  substr( $url, $start, $end - $start ), '"#%<>[\]^`{|}' ) . $ret;
2270  $end = $start;
2271  }
2272 
2273  # Query part - 'query' minus &=+;
2274  $start = strpos( $url, '?' );
2275  if ( $start !== false && $start < $end ) {
2277  substr( $url, $start, $end - $start ), '"#%<>[\]^`{|}&=+;' ) . $ret;
2278  $end = $start;
2279  }
2280 
2281  # Scheme and path part - 'pchar'
2282  # (we assume no userinfo or encoded colons in the host)
2284  substr( $url, 0, $end ), '"#%<>[\]^`{|}/?' ) . $ret;
2285 
2286  # Fix IPv6 syntax
2287  if ( $isIPv6 !== false ) {
2288  $ipv6Host = "%5B({$isIPv6})%5D";
2289  $ret = preg_replace(
2290  "<^((?:{$scheme})?//(?:{$userinfo}@)?){$ipv6Host}(?=[:/?#]|$)>i",
2291  "$1[$2]",
2292  $ret
2293  );
2294  }
2295 
2296  return $ret;
2297  }
2298 
2299  private static function normalizeUrlComponent( $component, $unsafe ) {
2300  $callback = function ( $matches ) use ( $unsafe ) {
2301  $char = urldecode( $matches[0] );
2302  $ord = ord( $char );
2303  if ( $ord > 32 && $ord < 127 && strpos( $unsafe, $char ) === false ) {
2304  # Unescape it
2305  return $char;
2306  } else {
2307  # Leave it escaped, but use uppercase for a-f
2308  return strtoupper( $matches[0] );
2309  }
2310  };
2311  return preg_replace_callback( '/%[0-9A-Fa-f]{2}/', $callback, $component );
2312  }
2313 
2322  private function maybeMakeExternalImage( $url ) {
2323  $imagesfrom = $this->mOptions->getAllowExternalImagesFrom();
2324  $imagesexception = !empty( $imagesfrom );
2325  $text = false;
2326  # $imagesfrom could be either a single string or an array of strings, parse out the latter
2327  if ( $imagesexception && is_array( $imagesfrom ) ) {
2328  $imagematch = false;
2329  foreach ( $imagesfrom as $match ) {
2330  if ( strpos( $url, $match ) === 0 ) {
2331  $imagematch = true;
2332  break;
2333  }
2334  }
2335  } elseif ( $imagesexception ) {
2336  $imagematch = ( strpos( $url, $imagesfrom ) === 0 );
2337  } else {
2338  $imagematch = false;
2339  }
2340 
2341  if ( $this->mOptions->getAllowExternalImages()
2342  || ( $imagesexception && $imagematch )
2343  ) {
2344  if ( preg_match( self::EXT_IMAGE_REGEX, $url ) ) {
2345  # Image found
2346  $text = Linker::makeExternalImage( $url );
2347  }
2348  }
2349  if ( !$text && $this->mOptions->getEnableImageWhitelist()
2350  && preg_match( self::EXT_IMAGE_REGEX, $url )
2351  ) {
2352  $whitelist = explode(
2353  "\n",
2354  wfMessage( 'external_image_whitelist' )->inContentLanguage()->text()
2355  );
2356 
2357  foreach ( $whitelist as $entry ) {
2358  # Sanitize the regex fragment, make it case-insensitive, ignore blank entries/comments
2359  if ( strpos( $entry, '#' ) === 0 || $entry === '' ) {
2360  continue;
2361  }
2362  // @phan-suppress-next-line SecurityCheck-ReDoS preg_quote is not wanted here
2363  if ( preg_match( '/' . str_replace( '/', '\\/', $entry ) . '/i', $url ) ) {
2364  # Image matches a whitelist entry
2365  $text = Linker::makeExternalImage( $url );
2366  break;
2367  }
2368  }
2369  }
2370  return $text;
2371  }
2372 
2380  private function handleInternalLinks( $text ) {
2381  $this->mLinkHolders->merge( $this->handleInternalLinks2( $text ) );
2382  return $text;
2383  }
2384 
2390  private function handleInternalLinks2( &$s ) {
2391  static $tc = false, $e1, $e1_img;
2392  # the % is needed to support urlencoded titles as well
2393  if ( !$tc ) {
2394  $tc = Title::legalChars() . '#%';
2395  # Match a link having the form [[namespace:link|alternate]]trail
2396  $e1 = "/^([{$tc}]+)(?:\\|(.+?))?]](.*)\$/sD";
2397  # Match cases where there is no "]]", which might still be images
2398  $e1_img = "/^([{$tc}]+)\\|(.*)\$/sD";
2399  }
2400 
2401  $holders = new LinkHolderArray(
2402  $this,
2403  $this->getContentLanguageConverter(),
2404  $this->getHookContainer() );
2405 
2406  # split the entire text string on occurrences of [[
2407  $a = StringUtils::explode( '[[', ' ' . $s );
2408  # get the first element (all text up to first [[), and remove the space we added
2409  $s = $a->current();
2410  $a->next();
2411  $line = $a->current(); # Workaround for broken ArrayIterator::next() that returns "void"
2412  $s = substr( $s, 1 );
2413 
2414  $nottalk = !$this->getTitle()->isTalkPage();
2415 
2416  $useLinkPrefixExtension = $this->getTargetLanguage()->linkPrefixExtension();
2417  $e2 = null;
2418  if ( $useLinkPrefixExtension ) {
2419  # Match the end of a line for a word that's not followed by whitespace,
2420  # e.g. in the case of 'The Arab al[[Razi]]', 'al' will be matched
2421  $charset = $this->contLang->linkPrefixCharset();
2422  $e2 = "/^((?>.*[^$charset]|))(.+)$/sDu";
2423  $m = [];
2424  if ( preg_match( $e2, $s, $m ) ) {
2425  $first_prefix = $m[2];
2426  } else {
2427  $first_prefix = false;
2428  }
2429  } else {
2430  $prefix = '';
2431  }
2432 
2433  # Some namespaces don't allow subpages
2434  $useSubpages = $this->nsInfo->hasSubpages(
2435  $this->getTitle()->getNamespace()
2436  );
2437 
2438  # Loop for each link
2439  for ( ; $line !== false && $line !== null; $a->next(), $line = $a->current() ) {
2440  # Check for excessive memory usage
2441  if ( $holders->isBig() ) {
2442  # Too big
2443  # Do the existence check, replace the link holders and clear the array
2444  $holders->replace( $s );
2445  $holders->clear();
2446  }
2447 
2448  if ( $useLinkPrefixExtension ) {
2449  if ( preg_match( $e2, $s, $m ) ) {
2450  list( , $s, $prefix ) = $m;
2451  } else {
2452  $prefix = '';
2453  }
2454  # first link
2455  if ( $first_prefix ) {
2456  $prefix = $first_prefix;
2457  $first_prefix = false;
2458  }
2459  }
2460 
2461  $might_be_img = false;
2462 
2463  if ( preg_match( $e1, $line, $m ) ) { # page with normal text or alt
2464  $text = $m[2];
2465  # If we get a ] at the beginning of $m[3] that means we have a link that's something like:
2466  # [[Image:Foo.jpg|[http://example.com desc]]] <- having three ] in a row fucks up,
2467  # the real problem is with the $e1 regex
2468  # See T1500.
2469  # Still some problems for cases where the ] is meant to be outside punctuation,
2470  # and no image is in sight. See T4095.
2471  if ( $text !== ''
2472  && substr( $m[3], 0, 1 ) === ']'
2473  && strpos( $text, '[' ) !== false
2474  ) {
2475  $text .= ']'; # so that handleExternalLinks($text) works later
2476  $m[3] = substr( $m[3], 1 );
2477  }
2478  # fix up urlencoded title texts
2479  if ( strpos( $m[1], '%' ) !== false ) {
2480  # Should anchors '#' also be rejected?
2481  $m[1] = str_replace( [ '<', '>' ], [ '&lt;', '&gt;' ], rawurldecode( $m[1] ) );
2482  }
2483  $trail = $m[3];
2484  } elseif ( preg_match( $e1_img, $line, $m ) ) {
2485  # Invalid, but might be an image with a link in its caption
2486  $might_be_img = true;
2487  $text = $m[2];
2488  if ( strpos( $m[1], '%' ) !== false ) {
2489  $m[1] = str_replace( [ '<', '>' ], [ '&lt;', '&gt;' ], rawurldecode( $m[1] ) );
2490  }
2491  $trail = "";
2492  } else { # Invalid form; output directly
2493  $s .= $prefix . '[[' . $line;
2494  continue;
2495  }
2496 
2497  $origLink = ltrim( $m[1], ' ' );
2498 
2499  # Don't allow internal links to pages containing
2500  # PROTO: where PROTO is a valid URL protocol; these
2501  # should be external links.
2502  if ( preg_match( '/^(?i:' . $this->mUrlProtocols . ')/', $origLink ) ) {
2503  $s .= $prefix . '[[' . $line;
2504  continue;
2505  }
2506 
2507  # Make subpage if necessary
2508  if ( $useSubpages ) {
2510  $this->getTitle(), $origLink, $text
2511  );
2512  } else {
2513  $link = $origLink;
2514  }
2515 
2516  // \x7f isn't a default legal title char, so most likely strip
2517  // markers will force us into the "invalid form" path above. But,
2518  // just in case, let's assert that xmlish tags aren't valid in
2519  // the title position.
2520  $unstrip = $this->mStripState->killMarkers( $link );
2521  $noMarkers = ( $unstrip === $link );
2522 
2523  $nt = $noMarkers ? Title::newFromText( $link ) : null;
2524  if ( $nt === null ) {
2525  $s .= $prefix . '[[' . $line;
2526  continue;
2527  }
2528 
2529  $ns = $nt->getNamespace();
2530  $iw = $nt->getInterwiki();
2531 
2532  $noforce = ( substr( $origLink, 0, 1 ) !== ':' );
2533 
2534  if ( $might_be_img ) { # if this is actually an invalid link
2535  if ( $ns === NS_FILE && $noforce ) { # but might be an image
2536  $found = false;
2537  while ( true ) {
2538  # look at the next 'line' to see if we can close it there
2539  $a->next();
2540  $next_line = $a->current();
2541  if ( $next_line === false || $next_line === null ) {
2542  break;
2543  }
2544  $m = explode( ']]', $next_line, 3 );
2545  if ( count( $m ) == 3 ) {
2546  # the first ]] closes the inner link, the second the image
2547  $found = true;
2548  $text .= "[[{$m[0]}]]{$m[1]}";
2549  $trail = $m[2];
2550  break;
2551  } elseif ( count( $m ) == 2 ) {
2552  # if there's exactly one ]] that's fine, we'll keep looking
2553  $text .= "[[{$m[0]}]]{$m[1]}";
2554  } else {
2555  # if $next_line is invalid too, we need look no further
2556  $text .= '[[' . $next_line;
2557  break;
2558  }
2559  }
2560  if ( !$found ) {
2561  # we couldn't find the end of this imageLink, so output it raw
2562  # but don't ignore what might be perfectly normal links in the text we've examined
2563  $holders->merge( $this->handleInternalLinks2( $text ) );
2564  $s .= "{$prefix}[[$link|$text";
2565  # note: no $trail, because without an end, there *is* no trail
2566  continue;
2567  }
2568  } else { # it's not an image, so output it raw
2569  $s .= "{$prefix}[[$link|$text";
2570  # note: no $trail, because without an end, there *is* no trail
2571  continue;
2572  }
2573  }
2574 
2575  $wasblank = ( $text == '' );
2576  if ( $wasblank ) {
2577  $text = $link;
2578  if ( !$noforce ) {
2579  # Strip off leading ':'
2580  $text = substr( $text, 1 );
2581  }
2582  } else {
2583  # T6598 madness. Handle the quotes only if they come from the alternate part
2584  # [[Lista d''e paise d''o munno]] -> <a href="...">Lista d''e paise d''o munno</a>
2585  # [[Criticism of Harry Potter|Criticism of ''Harry Potter'']]
2586  # -> <a href="Criticism of Harry Potter">Criticism of <i>Harry Potter</i></a>
2587  $text = $this->doQuotes( $text );
2588  }
2589 
2590  # Link not escaped by : , create the various objects
2591  if ( $noforce && !$nt->wasLocalInterwiki() ) {
2592  # Interwikis
2593  if (
2594  $iw && $this->mOptions->getInterwikiMagic() && $nottalk && (
2595  MediaWikiServices::getInstance()->getLanguageNameUtils()
2596  ->getLanguageName(
2597  $iw,
2598  LanguageNameUtils::AUTONYMS,
2599  LanguageNameUtils::DEFINED
2600  )
2601  || in_array( $iw, $this->svcOptions->get( 'ExtraInterlanguageLinkPrefixes' ) )
2602  )
2603  ) {
2604  # T26502: filter duplicates
2605  if ( !isset( $this->mLangLinkLanguages[$iw] ) ) {
2606  $this->mLangLinkLanguages[$iw] = true;
2607  $this->mOutput->addLanguageLink( $nt->getFullText() );
2608  }
2609 
2613  $s = rtrim( $s . $prefix ) . $trail; # T175416
2614  continue;
2615  }
2616 
2617  if ( $ns === NS_FILE ) {
2618  if ( !$this->badFileLookup->isBadFile( $nt->getDBkey(), $this->getTitle() ) ) {
2619  if ( $wasblank ) {
2620  # if no parameters were passed, $text
2621  # becomes something like "File:Foo.png",
2622  # which we don't want to pass on to the
2623  # image generator
2624  $text = '';
2625  } else {
2626  # recursively parse links inside the image caption
2627  # actually, this will parse them in any other parameters, too,
2628  # but it might be hard to fix that, and it doesn't matter ATM
2629  $text = $this->handleExternalLinks( $text );
2630  $holders->merge( $this->handleInternalLinks2( $text ) );
2631  }
2632  # cloak any absolute URLs inside the image markup, so handleExternalLinks() won't touch them
2633  $s .= $prefix . $this->armorLinks(
2634  $this->makeImage( $nt, $text, $holders ) ) . $trail;
2635  continue;
2636  }
2637  } elseif ( $ns === NS_CATEGORY ) {
2641  $s = rtrim( $s . $prefix ) . $trail; # T2087, T87753
2642 
2643  if ( $wasblank ) {
2644  $sortkey = $this->getDefaultSort();
2645  } else {
2646  $sortkey = $text;
2647  }
2648  $sortkey = Sanitizer::decodeCharReferences( $sortkey );
2649  $sortkey = str_replace( "\n", '', $sortkey );
2650  $sortkey = $this->getTargetLanguageConverter()->convertCategoryKey( $sortkey );
2651  $this->mOutput->addCategory( $nt->getDBkey(), $sortkey );
2652 
2653  continue;
2654  }
2655  }
2656 
2657  # Self-link checking. For some languages, variants of the title are checked in
2658  # LinkHolderArray::doVariants() to allow batching the existence checks necessary
2659  # for linking to a different variant.
2660  if ( $ns !== NS_SPECIAL && $nt->equals( $this->getTitle() ) && !$nt->hasFragment() ) {
2661  $s .= $prefix . Linker::makeSelfLinkObj( $nt, $text, '', $trail );
2662  continue;
2663  }
2664 
2665  # NS_MEDIA is a pseudo-namespace for linking directly to a file
2666  # @todo FIXME: Should do batch file existence checks, see comment below
2667  if ( $ns === NS_MEDIA ) {
2668  # Give extensions a chance to select the file revision for us
2669  $options = [];
2670  $descQuery = false;
2671  $this->hookRunner->onBeforeParserFetchFileAndTitle(
2672  $this, $nt, $options, $descQuery );
2673  # Fetch and register the file (file title may be different via hooks)
2674  list( $file, $nt ) = $this->fetchFileAndTitle( $nt, $options );
2675  # Cloak with NOPARSE to avoid replacement in handleExternalLinks
2676  $s .= $prefix . $this->armorLinks(
2677  Linker::makeMediaLinkFile( $nt, $file, $text ) ) . $trail;
2678  continue;
2679  }
2680 
2681  # Some titles, such as valid special pages or files in foreign repos, should
2682  # be shown as bluelinks even though they're not included in the page table
2683  # @todo FIXME: isAlwaysKnown() can be expensive for file links; we should really do
2684  # batch file existence checks for NS_FILE and NS_MEDIA
2685  if ( $iw == '' && $nt->isAlwaysKnown() ) {
2686  $this->mOutput->addLink( $nt );
2687  $s .= $this->makeKnownLinkHolder( $nt, $text, $trail, $prefix );
2688  } else {
2689  # Links will be added to the output link list after checking
2690  $s .= $holders->makeHolder( $nt, $text, $trail, $prefix );
2691  }
2692  }
2693  return $holders;
2694  }
2695 
2709  private function makeKnownLinkHolder( Title $nt, $text = '', $trail = '', $prefix = '' ) {
2710  list( $inside, $trail ) = Linker::splitTrail( $trail );
2711 
2712  if ( $text == '' ) {
2713  $text = htmlspecialchars( $nt->getPrefixedText() );
2714  }
2715 
2716  $link = $this->getLinkRenderer()->makeKnownLink(
2717  $nt, new HtmlArmor( "$prefix$text$inside" )
2718  );
2719 
2720  return $this->armorLinks( $link ) . $trail;
2721  }
2722 
2733  private function armorLinks( $text ) {
2734  return preg_replace( '/\b((?i)' . $this->mUrlProtocols . ')/',
2735  self::MARKER_PREFIX . "NOPARSE$1", $text );
2736  }
2737 
2747  public function doBlockLevels( $text, $linestart ) {
2748  wfDeprecated( __METHOD__, '1.35' );
2749  return BlockLevelPass::doBlockLevels( $text, $linestart );
2750  }
2751 
2760  private function expandMagicVariable( $index, $frame = false ) {
2765  if (
2766  $this->hookRunner->onParserGetVariableValueVarCache( $this, $this->mVarCache ) &&
2767  isset( $this->mVarCache[$index] )
2768  ) {
2769  return $this->mVarCache[$index];
2770  }
2771 
2772  $ts = wfTimestamp( TS_UNIX, $this->mOptions->getTimestamp() );
2773  $this->hookRunner->onParserGetVariableValueTs( $this, $ts );
2774 
2775  $value = CoreMagicVariables::expand(
2776  $this, $index, $ts, $this->nsInfo, $this->svcOptions, $this->logger
2777  );
2778 
2779  if ( $value === null ) {
2780  // Not a defined core magic word
2781  $ret = null;
2782  $originalIndex = $index;
2783  $this->hookRunner->onParserGetVariableValueSwitch( $this,
2784  $this->mVarCache, $index, $ret, $frame );
2785  if ( $index !== $originalIndex ) {
2787  'A ParserGetVariableValueSwitch hook handler modified $index, ' .
2788  'this is deprecated since MediaWiki 1.35',
2789  '1.35', false, false
2790  );
2791  }
2792  if ( !isset( $this->mVarCache[$originalIndex] ) ||
2793  $this->mVarCache[$originalIndex] !== $ret ) {
2795  'A ParserGetVariableValueSwitch hook handler bypassed the cache, ' .
2796  'this is deprecated since MediaWiki 1.35', '1.35', false, false
2797  );
2798  }// FIXME: in the future, don't give this hook unrestricted
2799  // access to mVarCache; we can cache it ourselves by falling
2800  // through here.
2801  return $ret;
2802  }
2803 
2804  $this->mVarCache[$index] = $value;
2805 
2806  return $value;
2807  }
2808 
2813  private function initializeVariables() {
2814  $variableIDs = $this->magicWordFactory->getVariableIDs();
2815  $substIDs = $this->magicWordFactory->getSubstIDs();
2816 
2817  $this->mVariables = $this->magicWordFactory->newArray( $variableIDs );
2818  $this->mSubstWords = $this->magicWordFactory->newArray( $substIDs );
2819  }
2820 
2838  public function preprocessToDom( $text, $flags = 0 ) {
2839  return $this->getPreprocessor()->preprocessToObj( $text, $flags );
2840  }
2841 
2862  public function replaceVariables( $text, $frame = false, $argsOnly = false ) {
2863  # Is there any text? Also, Prevent too big inclusions!
2864  $textSize = strlen( $text );
2865  if ( $textSize < 1 || $textSize > $this->mOptions->getMaxIncludeSize() ) {
2866  return $text;
2867  }
2868 
2869  if ( $frame === false ) {
2870  $frame = $this->getPreprocessor()->newFrame();
2871  } elseif ( !( $frame instanceof PPFrame ) ) {
2872  $this->logger->debug(
2873  __METHOD__ . " called using plain parameters instead of " .
2874  "a PPFrame instance. Creating custom frame."
2875  );
2876  $frame = $this->getPreprocessor()->newCustomFrame( $frame );
2877  }
2878 
2879  $dom = $this->preprocessToDom( $text );
2880  $flags = $argsOnly ? PPFrame::NO_TEMPLATES : 0;
2881  $text = $frame->expand( $dom, $flags );
2882 
2883  return $text;
2884  }
2885 
2913  public function limitationWarn( $limitationType, $current = '', $max = '' ) {
2914  # does no harm if $current and $max are present but are unnecessary for the message
2915  # Not doing ->inLanguage( $this->mOptions->getUserLangObj() ), since this is shown
2916  # only during preview, and that would split the parser cache unnecessarily.
2917  $warning = wfMessage( "$limitationType-warning" )->numParams( $current, $max )
2918  ->text();
2919  $this->mOutput->addWarning( $warning );
2920  $this->addTrackingCategory( "$limitationType-category" );
2921  }
2922 
2936  public function braceSubstitution( array $piece, PPFrame $frame ) {
2937  // Flags
2938 
2939  // $text has been filled
2940  $found = false;
2941  // wiki markup in $text should be escaped
2942  $nowiki = false;
2943  // $text is HTML, armour it against wikitext transformation
2944  $isHTML = false;
2945  // Force interwiki transclusion to be done in raw mode not rendered
2946  $forceRawInterwiki = false;
2947  // $text is a DOM node needing expansion in a child frame
2948  $isChildObj = false;
2949  // $text is a DOM node needing expansion in the current frame
2950  $isLocalObj = false;
2951 
2952  # Title object, where $text came from
2953  $title = false;
2954 
2955  # $part1 is the bit before the first |, and must contain only title characters.
2956  # Various prefixes will be stripped from it later.
2957  $titleWithSpaces = $frame->expand( $piece['title'] );
2958  $part1 = trim( $titleWithSpaces );
2959  $titleText = false;
2960 
2961  # Original title text preserved for various purposes
2962  $originalTitle = $part1;
2963 
2964  # $args is a list of argument nodes, starting from index 0, not including $part1
2965  # @todo FIXME: If piece['parts'] is null then the call to getLength()
2966  # below won't work b/c this $args isn't an object
2967  $args = ( $piece['parts'] == null ) ? [] : $piece['parts'];
2968 
2969  $profileSection = null; // profile templates
2970 
2971  $sawDeprecatedTemplateEquals = false; // T91154
2972 
2973  # SUBST
2974  // @phan-suppress-next-line PhanImpossibleCondition
2975  if ( !$found ) {
2976  $substMatch = $this->mSubstWords->matchStartAndRemove( $part1 );
2977 
2978  # Possibilities for substMatch: "subst", "safesubst" or FALSE
2979  # Decide whether to expand template or keep wikitext as-is.
2980  if ( $this->ot['wiki'] ) {
2981  if ( $substMatch === false ) {
2982  $literal = true; # literal when in PST with no prefix
2983  } else {
2984  $literal = false; # expand when in PST with subst: or safesubst:
2985  }
2986  } else {
2987  if ( $substMatch == 'subst' ) {
2988  $literal = true; # literal when not in PST with plain subst:
2989  } else {
2990  $literal = false; # expand when not in PST with safesubst: or no prefix
2991  }
2992  }
2993  if ( $literal ) {
2994  $text = $frame->virtualBracketedImplode( '{{', '|', '}}', $titleWithSpaces, $args );
2995  $isLocalObj = true;
2996  $found = true;
2997  }
2998  }
2999 
3000  # Variables
3001  if ( !$found && $args->getLength() == 0 ) {
3002  $id = $this->mVariables->matchStartToEnd( $part1 );
3003  if ( $id !== false ) {
3004  $text = $this->expandMagicVariable( $id, $frame );
3005  if ( $this->magicWordFactory->getCacheTTL( $id ) > -1 ) {
3006  $this->mOutput->updateCacheExpiry(
3007  $this->magicWordFactory->getCacheTTL( $id ) );
3008  }
3009  $found = true;
3010  }
3011  }
3012 
3013  # MSG, MSGNW and RAW
3014  if ( !$found ) {
3015  # Check for MSGNW:
3016  $mwMsgnw = $this->magicWordFactory->get( 'msgnw' );
3017  if ( $mwMsgnw->matchStartAndRemove( $part1 ) ) {
3018  $nowiki = true;
3019  } else {
3020  # Remove obsolete MSG:
3021  $mwMsg = $this->magicWordFactory->get( 'msg' );
3022  $mwMsg->matchStartAndRemove( $part1 );
3023  }
3024 
3025  # Check for RAW:
3026  $mwRaw = $this->magicWordFactory->get( 'raw' );
3027  if ( $mwRaw->matchStartAndRemove( $part1 ) ) {
3028  $forceRawInterwiki = true;
3029  }
3030  }
3031 
3032  # Parser functions
3033  if ( !$found ) {
3034  $colonPos = strpos( $part1, ':' );
3035  if ( $colonPos !== false ) {
3036  $func = substr( $part1, 0, $colonPos );
3037  $funcArgs = [ trim( substr( $part1, $colonPos + 1 ) ) ];
3038  $argsLength = $args->getLength();
3039  for ( $i = 0; $i < $argsLength; $i++ ) {
3040  $funcArgs[] = $args->item( $i );
3041  }
3042 
3043  $result = $this->callParserFunction( $frame, $func, $funcArgs );
3044 
3045  // Extract any forwarded flags
3046  if ( isset( $result['title'] ) ) {
3047  $title = $result['title'];
3048  }
3049  if ( isset( $result['found'] ) ) {
3050  $found = $result['found'];
3051  }
3052  if ( array_key_exists( 'text', $result ) ) {
3053  // a string or null
3054  $text = $result['text'];
3055  }
3056  if ( isset( $result['nowiki'] ) ) {
3057  $nowiki = $result['nowiki'];
3058  }
3059  if ( isset( $result['isHTML'] ) ) {
3060  $isHTML = $result['isHTML'];
3061  }
3062  if ( isset( $result['forceRawInterwiki'] ) ) {
3063  $forceRawInterwiki = $result['forceRawInterwiki'];
3064  }
3065  if ( isset( $result['isChildObj'] ) ) {
3066  $isChildObj = $result['isChildObj'];
3067  }
3068  if ( isset( $result['isLocalObj'] ) ) {
3069  $isLocalObj = $result['isLocalObj'];
3070  }
3071  }
3072  }
3073 
3074  # Finish mangling title and then check for loops.
3075  # Set $title to a Title object and $titleText to the PDBK
3076  if ( !$found ) {
3077  $ns = NS_TEMPLATE;
3078  # Split the title into page and subpage
3079  $subpage = '';
3080  $relative = Linker::normalizeSubpageLink(
3081  $this->getTitle(), $part1, $subpage
3082  );
3083  if ( $part1 !== $relative ) {
3084  $part1 = $relative;
3085  $ns = $this->getTitle()->getNamespace();
3086  }
3087  $title = Title::newFromText( $part1, $ns );
3088  if ( $title ) {
3089  $titleText = $title->getPrefixedText();
3090  # Check for language variants if the template is not found
3091  if ( $this->getTargetLanguageConverter()->hasVariants() && $title->getArticleID() == 0 ) {
3092  $this->getTargetLanguageConverter()->findVariantLink( $part1, $title, true );
3093  }
3094  # Do recursion depth check
3095  $limit = $this->mOptions->getMaxTemplateDepth();
3096  if ( $frame->depth >= $limit ) {
3097  $found = true;
3098  $text = '<span class="error">'
3099  . wfMessage( 'parser-template-recursion-depth-warning' )
3100  ->numParams( $limit )->inContentLanguage()->text()
3101  . '</span>';
3102  }
3103  }
3104  }
3105 
3106  # Load from database
3107  if ( !$found && $title ) {
3108  $profileSection = $this->mProfiler->scopedProfileIn( $title->getPrefixedDBkey() );
3109  if ( !$title->isExternal() ) {
3110  if ( $title->isSpecialPage()
3111  && $this->mOptions->getAllowSpecialInclusion()
3112  && $this->ot['html']
3113  ) {
3114  $specialPage = $this->specialPageFactory->getPage( $title->getDBkey() );
3115  // Pass the template arguments as URL parameters.
3116  // "uselang" will have no effect since the Language object
3117  // is forced to the one defined in ParserOptions.
3118  $pageArgs = [];
3119  $argsLength = $args->getLength();
3120  for ( $i = 0; $i < $argsLength; $i++ ) {
3121  $bits = $args->item( $i )->splitArg();
3122  if ( strval( $bits['index'] ) === '' ) {
3123  $name = trim( $frame->expand( $bits['name'], PPFrame::STRIP_COMMENTS ) );
3124  $value = trim( $frame->expand( $bits['value'] ) );
3125  $pageArgs[$name] = $value;
3126  }
3127  }
3128 
3129  // Create a new context to execute the special page
3130  $context = new RequestContext;
3131  $context->setTitle( $title );
3132  $context->setRequest( new FauxRequest( $pageArgs ) );
3133  if ( $specialPage && $specialPage->maxIncludeCacheTime() === 0 ) {
3134  $context->setUser( $this->getUser() );
3135  } else {
3136  // If this page is cached, then we better not be per user.
3137  $context->setUser( User::newFromName( '127.0.0.1', false ) );
3138  }
3139  $context->setLanguage( $this->mOptions->getUserLangObj() );
3140  $ret = $this->specialPageFactory->capturePath( $title, $context, $this->getLinkRenderer() );
3141  if ( $ret ) {
3142  $text = $context->getOutput()->getHTML();
3143  $this->mOutput->addOutputPageMetadata( $context->getOutput() );
3144  $found = true;
3145  $isHTML = true;
3146  if ( $specialPage && $specialPage->maxIncludeCacheTime() !== false ) {
3147  $this->mOutput->updateRuntimeAdaptiveExpiry(
3148  $specialPage->maxIncludeCacheTime()
3149  );
3150  }
3151  }
3152  } elseif ( $this->nsInfo->isNonincludable( $title->getNamespace() ) ) {
3153  $found = false; # access denied
3154  $this->logger->debug(
3155  __METHOD__ .
3156  ": template inclusion denied for " . $title->getPrefixedDBkey()
3157  );
3158  } else {
3159  list( $text, $title ) = $this->getTemplateDom( $title );
3160  if ( $text !== false ) {
3161  $found = true;
3162  $isChildObj = true;
3163  if (
3164  $title->getNamespace() === NS_TEMPLATE &&
3165  $title->getDBkey() === '=' &&
3166  $originalTitle === '='
3167  ) {
3168  // Note that we won't get here if `=` is evaluated
3169  // (in the future) as a parser function, nor if
3170  // the Template namespace is given explicitly,
3171  // ie `{{Template:=}}`. Only `{{=}}` triggers.
3172  $sawDeprecatedTemplateEquals = true; // T91154
3173  }
3174  }
3175  }
3176 
3177  # If the title is valid but undisplayable, make a link to it
3178  if ( !$found && ( $this->ot['html'] || $this->ot['pre'] ) ) {
3179  $text = "[[:$titleText]]";
3180  $found = true;
3181  }
3182  } elseif ( $title->isTrans() ) {
3183  # Interwiki transclusion
3184  if ( $this->ot['html'] && !$forceRawInterwiki ) {
3185  $text = $this->interwikiTransclude( $title, 'render' );
3186  $isHTML = true;
3187  } else {
3188  $text = $this->interwikiTransclude( $title, 'raw' );
3189  # Preprocess it like a template
3190  $text = $this->preprocessToDom( $text, Preprocessor::DOM_FOR_INCLUSION );
3191  $isChildObj = true;
3192  }
3193  $found = true;
3194  }
3195 
3196  # Do infinite loop check
3197  # This has to be done after redirect resolution to avoid infinite loops via redirects
3198  if ( !$frame->loopCheck( $title ) ) {
3199  $found = true;
3200  $text = '<span class="error">'
3201  . wfMessage( 'parser-template-loop-warning', $titleText )->inContentLanguage()->text()
3202  . '</span>';
3203  $this->addTrackingCategory( 'template-loop-category' );
3204  $this->mOutput->addWarning( wfMessage( 'template-loop-warning',
3205  wfEscapeWikiText( $titleText ) )->text() );
3206  $this->logger->debug( __METHOD__ . ": template loop broken at '$titleText'" );
3207  }
3208  }
3209 
3210  # If we haven't found text to substitute by now, we're done
3211  # Recover the source wikitext and return it
3212  if ( !$found ) {
3213  $text = $frame->virtualBracketedImplode( '{{', '|', '}}', $titleWithSpaces, $args );
3214  if ( $profileSection ) {
3215  $this->mProfiler->scopedProfileOut( $profileSection );
3216  }
3217  return [ 'object' => $text ];
3218  }
3219 
3220  # Expand DOM-style return values in a child frame
3221  if ( $isChildObj ) {
3222  # Clean up argument array
3223  $newFrame = $frame->newChild( $args, $title );
3224 
3225  if ( $nowiki ) {
3226  $text = $newFrame->expand( $text, PPFrame::RECOVER_ORIG );
3227  } elseif ( $titleText !== false && $newFrame->isEmpty() ) {
3228  # Expansion is eligible for the empty-frame cache
3229  $text = $newFrame->cachedExpand( $titleText, $text );
3230  } else {
3231  # Uncached expansion
3232  $text = $newFrame->expand( $text );
3233  }
3234  }
3235  if ( $isLocalObj && $nowiki ) {
3236  $text = $frame->expand( $text, PPFrame::RECOVER_ORIG );
3237  $isLocalObj = false;
3238  }
3239 
3240  if ( $profileSection ) {
3241  $this->mProfiler->scopedProfileOut( $profileSection );
3242  }
3243  if (
3244  $sawDeprecatedTemplateEquals &&
3245  $this->mStripState->unstripBoth( $text ) !== '='
3246  ) {
3247  // T91154: {{=}} is deprecated when it doesn't expand to `=`;
3248  // use {{Template:=}} if you must.
3249  $this->addTrackingCategory( 'template-equals-category' );
3250  $this->mOutput->addWarning( wfMessage( 'template-equals-warning' )->text() );
3251  }
3252 
3253  # Replace raw HTML by a placeholder
3254  if ( $isHTML ) {
3255  // @phan-suppress-next-line SecurityCheck-XSS Mixed mode, here html and safe
3256  $text = $this->insertStripItem( $text );
3257  } elseif ( $nowiki && ( $this->ot['html'] || $this->ot['pre'] ) ) {
3258  # Escape nowiki-style return values
3259  // @phan-suppress-next-line SecurityCheck-DoubleEscaped Mixed mode, here html and safe
3260  $text = wfEscapeWikiText( $text );
3261  } elseif ( is_string( $text )
3262  && !$piece['lineStart']
3263  && preg_match( '/^(?:{\\||:|;|#|\*)/', $text )
3264  ) {
3265  # T2529: if the template begins with a table or block-level
3266  # element, it should be treated as beginning a new line.
3267  # This behavior is somewhat controversial.
3268  $text = "\n" . $text;
3269  }
3270 
3271  if ( is_string( $text ) && !$this->incrementIncludeSize( 'post-expand', strlen( $text ) ) ) {
3272  # Error, oversize inclusion
3273  if ( $titleText !== false ) {
3274  # Make a working, properly escaped link if possible (T25588)
3275  $text = "[[:$titleText]]";
3276  } else {
3277  # This will probably not be a working link, but at least it may
3278  # provide some hint of where the problem is
3279  preg_replace( '/^:/', '', $originalTitle );
3280  $text = "[[:$originalTitle]]";
3281  }
3282  $text .= $this->insertStripItem( '<!-- WARNING: template omitted, '
3283  . 'post-expand include size too large -->' );
3284  $this->limitationWarn( 'post-expand-template-inclusion' );
3285  }
3286 
3287  if ( $isLocalObj ) {
3288  $ret = [ 'object' => $text ];
3289  } else {
3290  $ret = [ 'text' => $text ];
3291  }
3292 
3293  return $ret;
3294  }
3295 
3314  public function callParserFunction( PPFrame $frame, $function, array $args = [] ) {
3315  # Case sensitive functions
3316  if ( isset( $this->mFunctionSynonyms[1][$function] ) ) {
3317  $function = $this->mFunctionSynonyms[1][$function];
3318  } else {
3319  # Case insensitive functions
3320  $function = $this->contLang->lc( $function );
3321  if ( isset( $this->mFunctionSynonyms[0][$function] ) ) {
3322  $function = $this->mFunctionSynonyms[0][$function];
3323  } else {
3324  return [ 'found' => false ];
3325  }
3326  }
3327 
3328  list( $callback, $flags ) = $this->mFunctionHooks[$function];
3329 
3330  $allArgs = [ $this ];
3331  if ( $flags & self::SFH_OBJECT_ARGS ) {
3332  # Convert arguments to PPNodes and collect for appending to $allArgs
3333  $funcArgs = [];
3334  foreach ( $args as $k => $v ) {
3335  if ( $v instanceof PPNode || $k === 0 ) {
3336  $funcArgs[] = $v;
3337  } else {
3338  $funcArgs[] = $this->mPreprocessor->newPartNodeArray( [ $k => $v ] )->item( 0 );
3339  }
3340  }
3341 
3342  # Add a frame parameter, and pass the arguments as an array
3343  $allArgs[] = $frame;
3344  $allArgs[] = $funcArgs;
3345  } else {
3346  # Convert arguments to plain text and append to $allArgs
3347  foreach ( $args as $k => $v ) {
3348  if ( $v instanceof PPNode ) {
3349  $allArgs[] = trim( $frame->expand( $v ) );
3350  } elseif ( is_int( $k ) && $k >= 0 ) {
3351  $allArgs[] = trim( $v );
3352  } else {
3353  $allArgs[] = trim( "$k=$v" );
3354  }
3355  }
3356  }
3357 
3358  $result = $callback( ...$allArgs );
3359 
3360  # The interface for function hooks allows them to return a wikitext
3361  # string or an array containing the string and any flags. This mungs
3362  # things around to match what this method should return.
3363  if ( !is_array( $result ) ) {
3364  $result = [
3365  'found' => true,
3366  'text' => $result,
3367  ];
3368  } else {
3369  if ( isset( $result[0] ) && !isset( $result['text'] ) ) {
3370  $result['text'] = $result[0];
3371  }
3372  unset( $result[0] );
3373  $result += [
3374  'found' => true,
3375  ];
3376  }
3377 
3378  $noparse = true;
3379  $preprocessFlags = 0;
3380  if ( isset( $result['noparse'] ) ) {
3381  $noparse = $result['noparse'];
3382  }
3383  if ( isset( $result['preprocessFlags'] ) ) {
3384  $preprocessFlags = $result['preprocessFlags'];
3385  }
3386 
3387  if ( !$noparse ) {
3388  $result['text'] = $this->preprocessToDom( $result['text'], $preprocessFlags );
3389  $result['isChildObj'] = true;
3390  }
3391 
3392  return $result;
3393  }
3394 
3403  public function getTemplateDom( Title $title ) {
3404  $cacheTitle = $title;
3405  $titleText = $title->getPrefixedDBkey();
3406 
3407  if ( isset( $this->mTplRedirCache[$titleText] ) ) {
3408  list( $ns, $dbk ) = $this->mTplRedirCache[$titleText];
3409  $title = Title::makeTitle( $ns, $dbk );
3410  $titleText = $title->getPrefixedDBkey();
3411  }
3412  if ( isset( $this->mTplDomCache[$titleText] ) ) {
3413  return [ $this->mTplDomCache[$titleText], $title ];
3414  }
3415 
3416  # Cache miss, go to the database
3417  list( $text, $title ) = $this->fetchTemplateAndTitle( $title );
3418 
3419  if ( $text === false ) {
3420  $this->mTplDomCache[$titleText] = false;
3421  return [ false, $title ];
3422  }
3423 
3424  $dom = $this->preprocessToDom( $text, Preprocessor::DOM_FOR_INCLUSION );
3425  $this->mTplDomCache[$titleText] = $dom;
3426 
3427  if ( !$title->equals( $cacheTitle ) ) {
3428  $this->mTplRedirCache[$cacheTitle->getPrefixedDBkey()] =
3429  [ $title->getNamespace(), $title->getDBkey() ];
3430  }
3431 
3432  return [ $dom, $title ];
3433  }
3434 
3450  wfDeprecated( __METHOD__, '1.35' );
3451  $revisionRecord = $this->fetchCurrentRevisionRecordOfTitle( $title );
3452  if ( $revisionRecord ) {
3453  return new Revision( $revisionRecord );
3454  }
3455  return $revisionRecord;
3456  }
3457 
3472  $cacheKey = $title->getPrefixedDBkey();
3473  if ( !$this->currentRevisionCache ) {
3474  $this->currentRevisionCache = new MapCacheLRU( 100 );
3475  }
3476  if ( !$this->currentRevisionCache->has( $cacheKey ) ) {
3477  $revisionRecord =
3478  // Defaults to Parser::statelessFetchRevisionRecord()
3479  call_user_func(
3480  $this->mOptions->getCurrentRevisionRecordCallback(),
3481  $title,
3482  $this
3483  );
3484  if ( !$revisionRecord ) {
3485  // Parser::statelessFetchRevisionRecord() can return false;
3486  // normalize it to null.
3487  $revisionRecord = null;
3488  }
3489  $this->currentRevisionCache->set( $cacheKey, $revisionRecord );
3490  }
3491  return $this->currentRevisionCache->get( $cacheKey );
3492  }
3493 
3501  return (
3502  $this->currentRevisionCache &&
3503  $this->currentRevisionCache->has( $title->getPrefixedText() )
3504  );
3505  }
3506 
3517  public static function statelessFetchRevision( Title $title, $parser = false ) {
3518  wfDeprecated( __METHOD__, '1.35' );
3519  $revRecord = MediaWikiServices::getInstance()
3520  ->getRevisionLookup()
3521  ->getKnownCurrentRevision( $title );
3522  return $revRecord ? new Revision( $revRecord ) : false;
3523  }
3524 
3534  public static function statelessFetchRevisionRecord( Title $title, $parser = null ) {
3535  $revRecord = MediaWikiServices::getInstance()
3536  ->getRevisionLookup()
3537  ->getKnownCurrentRevision( $title );
3538  return $revRecord;
3539  }
3540 
3546  public function fetchTemplateAndTitle( Title $title ) {
3547  // Defaults to Parser::statelessFetchTemplate()
3548  $templateCb = $this->mOptions->getTemplateCallback();
3549  $stuff = call_user_func( $templateCb, $title, $this );
3550  if ( isset( $stuff['revision-record'] ) ) {
3551  $revRecord = $stuff['revision-record'];
3552  } else {
3553  // Triggers deprecation warnings via DeprecatablePropertyArray
3554  $rev = $stuff['revision'] ?? null;
3555  if ( $rev instanceof Revision ) {
3556  $revRecord = $rev->getRevisionRecord();
3557  } else {
3558  $revRecord = null;
3559  }
3560  }
3561 
3562  $text = $stuff['text'];
3563  if ( is_string( $stuff['text'] ) ) {
3564  // We use U+007F DELETE to distinguish strip markers from regular text
3565  $text = strtr( $text, "\x7f", "?" );
3566  }
3567  $finalTitle = $stuff['finalTitle'] ?? $title;
3568  foreach ( ( $stuff['deps'] ?? [] ) as $dep ) {
3569  $this->mOutput->addTemplate( $dep['title'], $dep['page_id'], $dep['rev_id'] );
3570  if ( $dep['title']->equals( $this->getTitle() ) && $revRecord instanceof RevisionRecord ) {
3571  // Self-transclusion; final result may change based on the new page version
3572  try {
3573  $sha1 = $revRecord->getSha1();
3574  } catch ( RevisionAccessException $e ) {
3575  $sha1 = null;
3576  }
3577  $this->setOutputFlag( 'vary-revision-sha1', 'Self transclusion' );
3578  $this->getOutput()->setRevisionUsedSha1Base36( $sha1 );
3579  }
3580  }
3581 
3582  return [ $text, $finalTitle ];
3583  }
3584 
3591  public function fetchTemplate( Title $title ) {
3592  wfDeprecated( __METHOD__, '1.35' );
3593  return $this->fetchTemplateAndTitle( $title )[0];
3594  }
3595 
3605  public static function statelessFetchTemplate( $title, $parser = false ) {
3606  $text = $skip = false;
3607  $finalTitle = $title;
3608  $deps = [];
3609  $revRecord = null;
3610 
3611  # Loop to fetch the article, with up to 1 redirect
3612  $revLookup = MediaWikiServices::getInstance()->getRevisionLookup();
3613  for ( $i = 0; $i < 2 && is_object( $title ); $i++ ) {
3614  # Give extensions a chance to select the revision instead
3615  $id = false; # Assume current
3616  Hooks::runner()->onBeforeParserFetchTemplateAndtitle(
3617  $parser, $title, $skip, $id );
3618 
3619  if ( $skip ) {
3620  $text = false;
3621  $deps[] = [
3622  'title' => $title,
3623  'page_id' => $title->getArticleID(),
3624  'rev_id' => null
3625  ];
3626  break;
3627  }
3628  # Get the revision
3629  # TODO rewrite using only RevisionRecord objects
3630  if ( $id ) {
3631  $revRecord = $revLookup->getRevisionById( $id );
3632  } elseif ( $parser ) {
3633  $revRecord = $parser->fetchCurrentRevisionRecordOfTitle( $title );
3634  } else {
3635  $revRecord = $revLookup->getRevisionByTitle( $title );
3636  }
3637  $rev_id = $revRecord ? $revRecord->getId() : 0;
3638  # If there is no current revision, there is no page
3639  if ( $id === false && !$revRecord ) {
3640  $linkCache = MediaWikiServices::getInstance()->getLinkCache();
3641  $linkCache->addBadLinkObj( $title );
3642  }
3643 
3644  $deps[] = [
3645  'title' => $title,
3646  'page_id' => $title->getArticleID(),
3647  'rev_id' => $rev_id
3648  ];
3649  if ( $revRecord ) {
3650  $revTitle = Title::newFromLinkTarget(
3651  $revRecord->getPageAsLinkTarget()
3652  );
3653  if ( !$title->equals( $revTitle ) ) {
3654  # We fetched a rev from a different title; register it too...
3655  $deps[] = [
3656  'title' => $revTitle,
3657  'page_id' => $revRecord->getPageId(),
3658  'rev_id' => $rev_id
3659  ];
3660  }
3661  }
3662 
3663  if ( $revRecord ) {
3664  $content = $revRecord->getContent( SlotRecord::MAIN );
3665  $text = $content ? $content->getWikitextForTransclusion() : null;
3666 
3667  // Hook is hard deprecated since 1.35
3668  if ( Hooks::isRegistered( 'ParserFetchTemplate' ) ) {
3669  // Only create the Revision object if needed
3670  $legacyRevision = new Revision( $revRecord );
3671  Hooks::runner()->onParserFetchTemplate(
3672  $parser,
3673  $title,
3674  $legacyRevision,
3675  $text,
3676  $deps
3677  );
3678  }
3679 
3680  if ( $text === false || $text === null ) {
3681  $text = false;
3682  break;
3683  }
3684  } elseif ( $title->getNamespace() === NS_MEDIAWIKI ) {
3685  $message = wfMessage( MediaWikiServices::getInstance()->getContentLanguage()->
3686  lcfirst( $title->getText() ) )->inContentLanguage();
3687  if ( !$message->exists() ) {
3688  $text = false;
3689  break;
3690  }
3691  $content = $message->content();
3692  $text = $message->plain();
3693  } else {
3694  break;
3695  }
3696  if ( !$content ) {
3697  break;
3698  }
3699  # Redirect?
3700  $finalTitle = $title;
3701  $title = $content->getRedirectTarget();
3702  }
3703 
3704  $legacyRevision = function () use ( $revRecord ) {
3705  return $revRecord ? new Revision( $revRecord ) : null;
3706  };
3707  $retValues = [
3708  'revision' => $legacyRevision,
3709  'revision-record' => $revRecord ?: false, // So isset works
3710  'text' => $text,
3711  'finalTitle' => $finalTitle,
3712  'deps' => $deps
3713  ];
3714  $propertyArray = new DeprecatablePropertyArray(
3715  $retValues,
3716  [ 'revision' => '1.35' ],
3717  __METHOD__
3718  );
3719  return $propertyArray;
3720  }
3721 
3729  public function fetchFileAndTitle( Title $title, array $options = [] ) {
3730  $file = $this->fetchFileNoRegister( $title, $options );
3731 
3732  $time = $file ? $file->getTimestamp() : false;
3733  $sha1 = $file ? $file->getSha1() : false;
3734  # Register the file as a dependency...
3735  $this->mOutput->addImage( $title->getDBkey(), $time, $sha1 );
3736  if ( $file && !$title->equals( $file->getTitle() ) ) {
3737  # Update fetched file title
3738  $title = $file->getTitle();
3739  $this->mOutput->addImage( $title->getDBkey(), $time, $sha1 );
3740  }
3741  return [ $file, $title ];
3742  }
3743 
3754  protected function fetchFileNoRegister( Title $title, array $options = [] ) {
3755  if ( isset( $options['broken'] ) ) {
3756  $file = false; // broken thumbnail forced by hook
3757  } else {
3758  $repoGroup = MediaWikiServices::getInstance()->getRepoGroup();
3759  if ( isset( $options['sha1'] ) ) { // get by (sha1,timestamp)
3760  $file = $repoGroup->findFileFromKey( $options['sha1'], $options );
3761  } else { // get by (name,timestamp)
3762  $file = $repoGroup->findFile( $title, $options );
3763  }
3764  }
3765  return $file;
3766  }
3767 
3777  public function interwikiTransclude( Title $title, $action ) {
3778  if ( !$this->svcOptions->get( 'EnableScaryTranscluding' ) ) {
3779  return wfMessage( 'scarytranscludedisabled' )->inContentLanguage()->text();
3780  }
3781 
3782  $url = $title->getFullURL( [ 'action' => $action ] );
3783  if ( strlen( $url ) > 1024 ) {
3784  return wfMessage( 'scarytranscludetoolong' )->inContentLanguage()->text();
3785  }
3786 
3787  $wikiId = $title->getTransWikiID(); // remote wiki ID or false
3788 
3789  $fname = __METHOD__;
3790  $cache = MediaWikiServices::getInstance()->getMainWANObjectCache();
3791 
3792  $data = $cache->getWithSetCallback(
3793  $cache->makeGlobalKey(
3794  'interwiki-transclude',
3795  ( $wikiId !== false ) ? $wikiId : 'external',
3796  sha1( $url )
3797  ),
3798  $this->svcOptions->get( 'TranscludeCacheExpiry' ),
3799  function ( $oldValue, &$ttl ) use ( $url, $fname, $cache ) {
3800  $req = MWHttpRequest::factory( $url, [], $fname );
3801 
3802  $status = $req->execute(); // Status object
3803  if ( !$status->isOK() ) {
3804  $ttl = $cache::TTL_UNCACHEABLE;
3805  } elseif ( $req->getResponseHeader( 'X-Database-Lagged' ) !== null ) {
3806  $ttl = min( $cache::TTL_LAGGED, $ttl );
3807  }
3808 
3809  return [
3810  'text' => $status->isOK() ? $req->getContent() : null,
3811  'code' => $req->getStatus()
3812  ];
3813  },
3814  [
3815  'checkKeys' => ( $wikiId !== false )
3816  ? [ $cache->makeGlobalKey( 'interwiki-page', $wikiId, $title->getDBkey() ) ]
3817  : [],
3818  'pcGroup' => 'interwiki-transclude:5',
3819  'pcTTL' => $cache::TTL_PROC_LONG
3820  ]
3821  );
3822 
3823  if ( is_string( $data['text'] ) ) {
3824  $text = $data['text'];
3825  } elseif ( $data['code'] != 200 ) {
3826  // Though we failed to fetch the content, this status is useless.
3827  $text = wfMessage( 'scarytranscludefailed-httpstatus' )
3828  ->params( $url, $data['code'] )->inContentLanguage()->text();
3829  } else {
3830  $text = wfMessage( 'scarytranscludefailed', $url )->inContentLanguage()->text();
3831  }
3832 
3833  return $text;
3834  }
3835 
3846  public function argSubstitution( array $piece, PPFrame $frame ) {
3847  $error = false;
3848  $parts = $piece['parts'];
3849  $nameWithSpaces = $frame->expand( $piece['title'] );
3850  $argName = trim( $nameWithSpaces );
3851  $object = false;
3852  $text = $frame->getArgument( $argName );
3853  if ( $text === false && $parts->getLength() > 0
3854  && ( $this->ot['html']
3855  || $this->ot['pre']
3856  || ( $this->ot['wiki'] && $frame->isTemplate() )
3857  )
3858  ) {
3859  # No match in frame, use the supplied default
3860  $object = $parts->item( 0 )->getChildren();
3861  }
3862  if ( !$this->incrementIncludeSize( 'arg', strlen( $text ) ) ) {
3863  $error = '<!-- WARNING: argument omitted, expansion size too large -->';
3864  $this->limitationWarn( 'post-expand-template-argument' );
3865  }
3866 
3867  if ( $text === false && $object === false ) {
3868  # No match anywhere
3869  $object = $frame->virtualBracketedImplode( '{{{', '|', '}}}', $nameWithSpaces, $parts );
3870  }
3871  if ( $error !== false ) {
3872  $text .= $error;
3873  }
3874  if ( $object !== false ) {
3875  $ret = [ 'object' => $object ];
3876  } else {
3877  $ret = [ 'text' => $text ];
3878  }
3879 
3880  return $ret;
3881  }
3882 
3899  public function extensionSubstitution( array $params, PPFrame $frame ) {
3900  static $errorStr = '<span class="error">';
3901  static $errorLen = 20;
3902 
3903  $name = $frame->expand( $params['name'] );
3904  if ( substr( $name, 0, $errorLen ) === $errorStr ) {
3905  // Probably expansion depth or node count exceeded. Just punt the
3906  // error up.
3907  return $name;
3908  }
3909 
3910  $attrText = !isset( $params['attr'] ) ? null : $frame->expand( $params['attr'] );
3911  if ( substr( $attrText, 0, $errorLen ) === $errorStr ) {
3912  // See above
3913  return $attrText;
3914  }
3915 
3916  // We can't safely check if the expansion for $content resulted in an
3917  // error, because the content could happen to be the error string
3918  // (T149622).
3919  $content = !isset( $params['inner'] ) ? null : $frame->expand( $params['inner'] );
3920 
3921  $marker = self::MARKER_PREFIX . "-$name-"
3922  . sprintf( '%08X', $this->mMarkerIndex++ ) . self::MARKER_SUFFIX;
3923 
3924  $markerType = 'general';
3925  if ( $this->ot['html'] ) {
3926  $name = strtolower( $name );
3927  $attributes = Sanitizer::decodeTagAttributes( $attrText );
3928  if ( isset( $params['attributes'] ) ) {
3929  $attributes += $params['attributes'];
3930  }
3931 
3932  if ( isset( $this->mTagHooks[$name] ) ) {
3933  // Note that $content may be null here, for example if the
3934  // tag is self-closed.
3935  $output = call_user_func_array( $this->mTagHooks[$name],
3936  [ $content, $attributes, $this, $frame ] );
3937  } else {
3938  $output = '<span class="error">Invalid tag extension name: ' .
3939  htmlspecialchars( $name ) . '</span>';
3940  }
3941 
3942  if ( is_array( $output ) ) {
3943  // Extract flags
3944  $flags = $output;
3945  $output = $flags[0];
3946  if ( isset( $flags['markerType'] ) ) {
3947  $markerType = $flags['markerType'];
3948  }
3949  }
3950  } else {
3951  if ( $attrText === null ) {
3952  $attrText = '';
3953  }
3954  if ( isset( $params['attributes'] ) ) {
3955  foreach ( $params['attributes'] as $attrName => $attrValue ) {
3956  $attrText .= ' ' . htmlspecialchars( $attrName ) . '="' .
3957  htmlspecialchars( $attrValue ) . '"';
3958  }
3959  }
3960  if ( $content === null ) {
3961  $output = "<$name$attrText/>";
3962  } else {
3963  $close = $params['close'] === null ? '' : $frame->expand( $params['close'] );
3964  if ( substr( $close, 0, $errorLen ) === $errorStr ) {
3965  // See above
3966  return $close;
3967  }
3968  $output = "<$name$attrText>$content$close";
3969  }
3970  }
3971 
3972  if ( $markerType === 'none' ) {
3973  return $output;
3974  } elseif ( $markerType === 'nowiki' ) {
3975  $this->mStripState->addNoWiki( $marker, $output );
3976  } elseif ( $markerType === 'general' ) {
3977  $this->mStripState->addGeneral( $marker, $output );
3978  } else {
3979  throw new MWException( __METHOD__ . ': invalid marker type' );
3980  }
3981  return $marker;
3982  }
3983 
3991  private function incrementIncludeSize( $type, $size ) {
3992  if ( $this->mIncludeSizes[$type] + $size > $this->mOptions->getMaxIncludeSize() ) {
3993  return false;
3994  } else {
3995  $this->mIncludeSizes[$type] += $size;
3996  return true;
3997  }
3998  }
3999 
4004  $this->mExpensiveFunctionCount++;
4005  return $this->mExpensiveFunctionCount <= $this->mOptions->getExpensiveParserFunctionLimit();
4006  }
4007 
4015  private function handleDoubleUnderscore( $text ) {
4016  # The position of __TOC__ needs to be recorded
4017  $mw = $this->magicWordFactory->get( 'toc' );
4018  if ( $mw->match( $text ) ) {
4019  $this->mShowToc = true;
4020  $this->mForceTocPosition = true;
4021 
4022  # Set a placeholder. At the end we'll fill it in with the TOC.
4023  $text = $mw->replace( '<!--MWTOC\'"-->', $text, 1 );
4024 
4025  # Only keep the first one.
4026  $text = $mw->replace( '', $text );
4027  }
4028 
4029  # Now match and remove the rest of them
4030  $mwa = $this->magicWordFactory->getDoubleUnderscoreArray();
4031  $this->mDoubleUnderscores = $mwa->matchAndRemove( $text );
4032 
4033  if ( isset( $this->mDoubleUnderscores['nogallery'] ) ) {
4034  $this->mOutput->setNoGallery( true );
4035  }
4036  if ( isset( $this->mDoubleUnderscores['notoc'] ) && !$this->mForceTocPosition ) {
4037  $this->mShowToc = false;
4038  }
4039  if ( isset( $this->mDoubleUnderscores['hiddencat'] )
4040  && $this->getTitle()->getNamespace() === NS_CATEGORY
4041  ) {
4042  $this->addTrackingCategory( 'hidden-category-category' );
4043  }
4044  # (T10068) Allow control over whether robots index a page.
4045  # __INDEX__ always overrides __NOINDEX__, see T16899
4046  if ( isset( $this->mDoubleUnderscores['noindex'] ) && $this->getTitle()->canUseNoindex() ) {
4047  $this->mOutput->setIndexPolicy( 'noindex' );
4048  $this->addTrackingCategory( 'noindex-category' );
4049  }
4050  if ( isset( $this->mDoubleUnderscores['index'] ) && $this->getTitle()->canUseNoindex() ) {
4051  $this->mOutput->setIndexPolicy( 'index' );
4052  $this->addTrackingCategory( 'index-category' );
4053  }
4054 
4055  # Cache all double underscores in the database
4056  foreach ( $this->mDoubleUnderscores as $key => $val ) {
4057  $this->mOutput->setProperty( $key, '' );
4058  }
4059 
4060  return $text;
4061  }
4062 
4068  public function addTrackingCategory( $msg ) {
4069  return $this->mOutput->addTrackingCategory( $msg, $this->getTitle() );
4070  }
4071 
4087  private function finalizeHeadings( $text, $origText, $isMain = true ) {
4088  # Inhibit editsection links if requested in the page
4089  if ( isset( $this->mDoubleUnderscores['noeditsection'] ) ) {
4090  $maybeShowEditLink = false;
4091  } else {
4092  $maybeShowEditLink = true; /* Actual presence will depend on post-cache transforms */
4093  }
4094 
4095  # Get all headlines for numbering them and adding funky stuff like [edit]
4096  # links - this is for later, but we need the number of headlines right now
4097  # NOTE: white space in headings have been trimmed in handleHeadings. They shouldn't
4098  # be trimmed here since whitespace in HTML headings is significant.
4099  $matches = [];
4100  $numMatches = preg_match_all(
4101  '/<H(?P<level>[1-6])(?P<attrib>.*?>)(?P<header>[\s\S]*?)<\/H[1-6] *>/i',
4102  $text,
4103  $matches
4104  );
4105 
4106  # if there are fewer than 4 headlines in the article, do not show TOC
4107  # unless it's been explicitly enabled.
4108  $enoughToc = $this->mShowToc &&
4109  ( ( $numMatches >= 4 ) || $this->mForceTocPosition );
4110 
4111  # Allow user to stipulate that a page should have a "new section"
4112  # link added via __NEWSECTIONLINK__
4113  if ( isset( $this->mDoubleUnderscores['newsectionlink'] ) ) {
4114  $this->mOutput->setNewSection( true );
4115  }
4116 
4117  # Allow user to remove the "new section"
4118  # link via __NONEWSECTIONLINK__
4119  if ( isset( $this->mDoubleUnderscores['nonewsectionlink'] ) ) {
4120  $this->mOutput->hideNewSection( true );
4121  }
4122 
4123  # if the string __FORCETOC__ (not case-sensitive) occurs in the HTML,
4124  # override above conditions and always show TOC above first header
4125  if ( isset( $this->mDoubleUnderscores['forcetoc'] ) ) {
4126  $this->mShowToc = true;
4127  $enoughToc = true;
4128  }
4129 
4130  # headline counter
4131  $headlineCount = 0;
4132  $numVisible = 0;
4133 
4134  # Ugh .. the TOC should have neat indentation levels which can be
4135  # passed to the skin functions. These are determined here
4136  $toc = '';
4137  $full = '';
4138  $head = [];
4139  $sublevelCount = [];
4140  $levelCount = [];
4141  $level = 0;
4142  $prevlevel = 0;
4143  $toclevel = 0;
4144  $prevtoclevel = 0;
4145  $markerRegex = self::MARKER_PREFIX . "-h-(\d+)-" . self::MARKER_SUFFIX;
4146  $baseTitleText = $this->getTitle()->getPrefixedDBkey();
4147  $oldType = $this->mOutputType;
4148  $this->setOutputType( self::OT_WIKI );
4149  $frame = $this->getPreprocessor()->newFrame();
4150  $root = $this->preprocessToDom( $origText );
4151  $node = $root->getFirstChild();
4152  $byteOffset = 0;
4153  $tocraw = [];
4154  $refers = [];
4155 
4156  $headlines = $numMatches !== false ? $matches[3] : [];
4157 
4158  $maxTocLevel = $this->svcOptions->get( 'MaxTocLevel' );
4159  foreach ( $headlines as $headline ) {
4160  $isTemplate = false;
4161  $titleText = false;
4162  $sectionIndex = false;
4163  $numbering = '';
4164  $markerMatches = [];
4165  if ( preg_match( "/^$markerRegex/", $headline, $markerMatches ) ) {
4166  $serial = $markerMatches[1];
4167  list( $titleText, $sectionIndex ) = $this->mHeadings[$serial];
4168  $isTemplate = ( $titleText != $baseTitleText );
4169  $headline = preg_replace( "/^$markerRegex\\s*/", "", $headline );
4170  }
4171 
4172  if ( $toclevel ) {
4173  $prevlevel = $level;
4174  }
4175  $level = $matches[1][$headlineCount];
4176 
4177  if ( $level > $prevlevel ) {
4178  # Increase TOC level
4179  $toclevel++;
4180  $sublevelCount[$toclevel] = 0;
4181  if ( $toclevel < $maxTocLevel ) {
4182  $prevtoclevel = $toclevel;
4183  $toc .= Linker::tocIndent();
4184  $numVisible++;
4185  }
4186  } elseif ( $level < $prevlevel && $toclevel > 1 ) {
4187  # Decrease TOC level, find level to jump to
4188 
4189  for ( $i = $toclevel; $i > 0; $i-- ) {
4190  // @phan-suppress-next-line PhanTypeInvalidDimOffset
4191  if ( $levelCount[$i] == $level ) {
4192  # Found last matching level
4193  $toclevel = $i;
4194  break;
4195  } elseif ( $levelCount[$i] < $level ) {
4196  // @phan-suppress-previous-line PhanTypeInvalidDimOffset
4197  # Found first matching level below current level
4198  $toclevel = $i + 1;
4199  break;
4200  }
4201  }
4202  if ( $i == 0 ) {
4203  $toclevel = 1;
4204  }
4205  if ( $toclevel < $maxTocLevel ) {
4206  if ( $prevtoclevel < $maxTocLevel ) {
4207  # Unindent only if the previous toc level was shown :p
4208  $toc .= Linker::tocUnindent( $prevtoclevel - $toclevel );
4209  $prevtoclevel = $toclevel;
4210  } else {
4211  $toc .= Linker::tocLineEnd();
4212  }
4213  }
4214  } else {
4215  # No change in level, end TOC line
4216  if ( $toclevel < $maxTocLevel ) {
4217  $toc .= Linker::tocLineEnd();
4218  }
4219  }
4220 
4221  $levelCount[$toclevel] = $level;
4222 
4223  # count number of headlines for each level
4224  $sublevelCount[$toclevel]++;
4225  $dot = 0;
4226  for ( $i = 1; $i <= $toclevel; $i++ ) {
4227  if ( !empty( $sublevelCount[$i] ) ) {
4228  if ( $dot ) {
4229  $numbering .= '.';
4230  }
4231  $numbering .= $this->getTargetLanguage()->formatNum( $sublevelCount[$i] );
4232  $dot = 1;
4233  }
4234  }
4235 
4236  # The safe header is a version of the header text safe to use for links
4237 
4238  # Remove link placeholders by the link text.
4239  # <!--LINK number-->
4240  # turns into
4241  # link text with suffix
4242  # Do this before unstrip since link text can contain strip markers
4243  $safeHeadline = $this->replaceLinkHoldersText( $headline );
4244 
4245  # Avoid insertion of weird stuff like <math> by expanding the relevant sections
4246  $safeHeadline = $this->mStripState->unstripBoth( $safeHeadline );
4247 
4248  # Remove any <style> or <script> tags (T198618)
4249  $safeHeadline = preg_replace(
4250  '#<(style|script)(?: [^>]*[^>/])?>.*?</\1>#is',
4251  '',
4252  $safeHeadline
4253  );
4254 
4255  # Strip out HTML (first regex removes any tag not allowed)
4256  # Allowed tags are:
4257  # * <sup> and <sub> (T10393)
4258  # * <i> (T28375)
4259  # * <b> (r105284)
4260  # * <bdi> (T74884)
4261  # * <span dir="rtl"> and <span dir="ltr"> (T37167)
4262  # * <s> and <strike> (T35715)
4263  # We strip any parameter from accepted tags (second regex), except dir="rtl|ltr" from <span>,
4264  # to allow setting directionality in toc items.
4265  $tocline = preg_replace(
4266  [
4267  '#<(?!/?(span|sup|sub|bdi|i|b|s|strike)(?: [^>]*)?>).*?>#',
4268  '#<(/?(?:span(?: dir="(?:rtl|ltr)")?|sup|sub|bdi|i|b|s|strike))(?: .*?)?>#'
4269  ],
4270  [ '', '<$1>' ],
4271  $safeHeadline
4272  );
4273 
4274  # Strip '<span></span>', which is the result from the above if
4275  # <span id="foo"></span> is used to produce an additional anchor
4276  # for a section.
4277  $tocline = str_replace( '<span></span>', '', $tocline );
4278 
4279  $tocline = trim( $tocline );
4280 
4281  # For the anchor, strip out HTML-y stuff period
4282  $safeHeadline = preg_replace( '/<.*?>/', '', $safeHeadline );
4283  $safeHeadline = Sanitizer::normalizeSectionNameWhitespace( $safeHeadline );
4284 
4285  # Save headline for section edit hint before it's escaped
4286  $headlineHint = $safeHeadline;
4287 
4288  # Decode HTML entities
4289  $safeHeadline = Sanitizer::decodeCharReferences( $safeHeadline );
4290 
4291  $safeHeadline = self::normalizeSectionName( $safeHeadline );
4292 
4293  $fallbackHeadline = Sanitizer::escapeIdForAttribute( $safeHeadline, Sanitizer::ID_FALLBACK );
4294  $linkAnchor = Sanitizer::escapeIdForLink( $safeHeadline );
4295  $safeHeadline = Sanitizer::escapeIdForAttribute( $safeHeadline, Sanitizer::ID_PRIMARY );
4296  if ( $fallbackHeadline === $safeHeadline ) {
4297  # No reason to have both (in fact, we can't)
4298  $fallbackHeadline = false;
4299  }
4300 
4301  # HTML IDs must be case-insensitively unique for IE compatibility (T12721).
4302  # @todo FIXME: We may be changing them depending on the current locale.
4303  $arrayKey = strtolower( $safeHeadline );
4304  if ( $fallbackHeadline === false ) {
4305  $fallbackArrayKey = false;
4306  } else {
4307  $fallbackArrayKey = strtolower( $fallbackHeadline );
4308  }
4309 
4310  # Create the anchor for linking from the TOC to the section
4311  $anchor = $safeHeadline;
4312  $fallbackAnchor = $fallbackHeadline;
4313  if ( isset( $refers[$arrayKey] ) ) {
4314  // phpcs:ignore Generic.Formatting.DisallowMultipleStatements
4315  for ( $i = 2; isset( $refers["${arrayKey}_$i"] ); ++$i );
4316  $anchor .= "_$i";
4317  $linkAnchor .= "_$i";
4318  $refers["${arrayKey}_$i"] = true;
4319  } else {
4320  $refers[$arrayKey] = true;
4321  }
4322  if ( $fallbackHeadline !== false && isset( $refers[$fallbackArrayKey] ) ) {
4323  // phpcs:ignore Generic.Formatting.DisallowMultipleStatements
4324  for ( $i = 2; isset( $refers["${fallbackArrayKey}_$i"] ); ++$i );
4325  $fallbackAnchor .= "_$i";
4326  $refers["${fallbackArrayKey}_$i"] = true;
4327  } else {
4328  $refers[$fallbackArrayKey] = true;
4329  }
4330 
4331  # Don't number the heading if it is the only one (looks silly)
4332  if ( count( $matches[3] ) > 1 && $this->mOptions->getNumberHeadings() ) {
4333  # the two are different if the line contains a link
4334  $headline = Html::element(
4335  'span',
4336  [ 'class' => 'mw-headline-number' ],
4337  $numbering
4338  ) . ' ' . $headline;
4339  }
4340 
4341  if ( $enoughToc && ( !isset( $maxTocLevel ) || $toclevel < $maxTocLevel ) ) {
4342  $toc .= Linker::tocLine( $linkAnchor, $tocline,
4343  $numbering, $toclevel, ( $isTemplate ? false : $sectionIndex ) );
4344  }
4345 
4346  # Add the section to the section tree
4347  # Find the DOM node for this header
4348  $noOffset = ( $isTemplate || $sectionIndex === false );
4349  while ( $node && !$noOffset ) {
4350  if ( $node->getName() === 'h' ) {
4351  $bits = $node->splitHeading();
4352  if ( $bits['i'] == $sectionIndex ) {
4353  break;
4354  }
4355  }
4356  $byteOffset += mb_strlen( $this->mStripState->unstripBoth(
4357  $frame->expand( $node, PPFrame::RECOVER_ORIG ) ) );
4358  $node = $node->getNextSibling();
4359  }
4360  $tocraw[] = [
4361  'toclevel' => $toclevel,
4362  'level' => $level,
4363  'line' => $tocline,
4364  'number' => $numbering,
4365  'index' => ( $isTemplate ? 'T-' : '' ) . $sectionIndex,
4366  'fromtitle' => $titleText,
4367  'byteoffset' => ( $noOffset ? null : $byteOffset ),
4368  'anchor' => $anchor,
4369  ];
4370 
4371  # give headline the correct <h#> tag
4372  if ( $maybeShowEditLink && $sectionIndex !== false ) {
4373  // Output edit section links as markers with styles that can be customized by skins
4374  if ( $isTemplate ) {
4375  # Put a T flag in the section identifier, to indicate to extractSections()
4376  # that sections inside <includeonly> should be counted.
4377  $editsectionPage = $titleText;
4378  $editsectionSection = "T-$sectionIndex";
4379  $editsectionContent = null;
4380  } else {
4381  $editsectionPage = $this->getTitle()->getPrefixedText();
4382  $editsectionSection = $sectionIndex;
4383  $editsectionContent = $headlineHint;
4384  }
4385  // We use a bit of pesudo-xml for editsection markers. The
4386  // language converter is run later on. Using a UNIQ style marker
4387  // leads to the converter screwing up the tokens when it
4388  // converts stuff. And trying to insert strip tags fails too. At
4389  // this point all real inputted tags have already been escaped,
4390  // so we don't have to worry about a user trying to input one of
4391  // these markers directly. We use a page and section attribute
4392  // to stop the language converter from converting these
4393  // important bits of data, but put the headline hint inside a
4394  // content block because the language converter is supposed to
4395  // be able to convert that piece of data.
4396  // Gets replaced with html in ParserOutput::getText
4397  $editlink = '<mw:editsection page="' . htmlspecialchars( $editsectionPage );
4398  $editlink .= '" section="' . htmlspecialchars( $editsectionSection ) . '"';
4399  if ( $editsectionContent !== null ) {
4400  $editlink .= '>' . $editsectionContent . '</mw:editsection>';
4401  } else {
4402  $editlink .= '/>';
4403  }
4404  } else {
4405  $editlink = '';
4406  }
4407  $head[$headlineCount] = Linker::makeHeadline( $level,
4408  $matches['attrib'][$headlineCount], $anchor, $headline,
4409  $editlink, $fallbackAnchor );
4410 
4411  $headlineCount++;
4412  }
4413 
4414  $this->setOutputType( $oldType );
4415 
4416  # Never ever show TOC if no headers
4417  if ( $numVisible < 1 ) {
4418  $enoughToc = false;
4419  }
4420 
4421  if ( $enoughToc ) {
4422  if ( $prevtoclevel > 0 && $prevtoclevel < $maxTocLevel ) {
4423  $toc .= Linker::tocUnindent( $prevtoclevel - 1 );
4424  }
4425  $toc = Linker::tocList( $toc, $this->mOptions->getUserLangObj() );
4426  $this->mOutput->setTOCHTML( $toc );
4427  $toc = self::TOC_START . $toc . self::TOC_END;
4428  }
4429 
4430  if ( $isMain ) {
4431  $this->mOutput->setSections( $tocraw );
4432  }
4433 
4434  # split up and insert constructed headlines
4435  $blocks = preg_split( '/<H[1-6].*?>[\s\S]*?<\/H[1-6]>/i', $text );
4436  $i = 0;
4437 
4438  // build an array of document sections
4439  $sections = [];
4440  foreach ( $blocks as $block ) {
4441  // $head is zero-based, sections aren't.
4442  if ( empty( $head[$i - 1] ) ) {
4443  $sections[$i] = $block;
4444  } else {
4445  $sections[$i] = $head[$i - 1] . $block;
4446  }
4447 
4458  $this->hookRunner->onParserSectionCreate( $this, $i, $sections[$i], $maybeShowEditLink );
4459 
4460  $i++;
4461  }
4462 
4463  if ( $enoughToc && $isMain && !$this->mForceTocPosition ) {
4464  // append the TOC at the beginning
4465  // Top anchor now in skin
4466  $sections[0] .= $toc . "\n";
4467  }
4468 
4469  $full .= implode( '', $sections );
4470 
4471  if ( $this->mForceTocPosition ) {
4472  return str_replace( '<!--MWTOC\'"-->', $toc, $full );
4473  } else {
4474  return $full;
4475  }
4476  }
4477 
4489  public function preSaveTransform( $text, Title $title, User $user,
4490  ParserOptions $options, $clearState = true
4491  ) {
4492  if ( $clearState ) {
4493  $magicScopeVariable = $this->lock();
4494  }
4495  $this->startParse( $title, $options, self::OT_WIKI, $clearState );
4496  $this->setUser( $user );
4497 
4498  // Strip U+0000 NULL (T159174)
4499  $text = str_replace( "\000", '', $text );
4500 
4501  // We still normalize line endings for backwards-compatibility
4502  // with other code that just calls PST, but this should already
4503  // be handled in TextContent subclasses
4504  $text = TextContent::normalizeLineEndings( $text );
4505 
4506  if ( $options->getPreSaveTransform() ) {
4507  $text = $this->pstPass2( $text, $user );
4508  }
4509  $text = $this->mStripState->unstripBoth( $text );
4510 
4511  $this->hookRunner->onParserPreSaveTransformComplete( $this, $text );
4512 
4513  $this->setUser( null ); # Reset
4514 
4515  return $text;
4516  }
4517 
4526  private function pstPass2( $text, User $user ) {
4527  # Note: This is the timestamp saved as hardcoded wikitext to the database, we use
4528  # $this->contLang here in order to give everyone the same signature and use the default one
4529  # rather than the one selected in each user's preferences. (see also T14815)
4530  $ts = $this->mOptions->getTimestamp();
4531  $timestamp = MWTimestamp::getLocalInstance( $ts );
4532  $ts = $timestamp->format( 'YmdHis' );
4533  $tzMsg = $timestamp->getTimezoneMessage()->inContentLanguage()->text();
4534 
4535  $d = $this->contLang->timeanddate( $ts, false, false ) . " ($tzMsg)";
4536 
4537  # Variable replacement
4538  # Because mOutputType is OT_WIKI, this will only process {{subst:xxx}} type tags
4539  $text = $this->replaceVariables( $text );
4540 
4541  # This works almost by chance, as the replaceVariables are done before the getUserSig(),
4542  # which may corrupt this parser instance via its wfMessage()->text() call-
4543 
4544  # Signatures
4545  if ( strpos( $text, '~~~' ) !== false ) {
4546  $sigText = $this->getUserSig( $user );
4547  $text = strtr( $text, [
4548  '~~~~~' => $d,
4549  '~~~~' => "$sigText $d",
4550  '~~~' => $sigText
4551  ] );
4552  # The main two signature forms used above are time-sensitive
4553  $this->setOutputFlag( 'user-signature', 'User signature detected' );
4554  }
4555 
4556  # Context links ("pipe tricks"): [[|name]] and [[name (context)|]]
4557  $tc = '[' . Title::legalChars() . ']';
4558  $nc = '[ _0-9A-Za-z\x80-\xff-]'; # Namespaces can use non-ascii!
4559 
4560  // [[ns:page (context)|]]
4561  $p1 = "/\[\[(:?$nc+:|:|)($tc+?)( ?\\($tc+\\))\\|]]/";
4562  // [[ns:page(context)|]] (double-width brackets, added in r40257)
4563  $p4 = "/\[\[(:?$nc+:|:|)($tc+?)( ?($tc+))\\|]]/";
4564  // [[ns:page (context), context|]] (using single, double-width or Arabic comma)
4565  $p3 = "/\[\[(:?$nc+:|:|)($tc+?)( ?\\($tc+\\)|)((?:, |,|، )$tc+|)\\|]]/";
4566  // [[|page]] (reverse pipe trick: add context from page title)
4567  $p2 = "/\[\[\\|($tc+)]]/";
4568 
4569  # try $p1 first, to turn "[[A, B (C)|]]" into "[[A, B (C)|A, B]]"
4570  $text = preg_replace( $p1, '[[\\1\\2\\3|\\2]]', $text );
4571  $text = preg_replace( $p4, '[[\\1\\2\\3|\\2]]', $text );
4572  $text = preg_replace( $p3, '[[\\1\\2\\3\\4|\\2]]', $text );
4573 
4574  $t = $this->getTitle()->getText();
4575  $m = [];
4576  if ( preg_match( "/^($nc+:|)$tc+?( \\($tc+\\))$/", $t, $m ) ) {
4577  $text = preg_replace( $p2, "[[$m[1]\\1$m[2]|\\1]]", $text );
4578  } elseif ( preg_match( "/^($nc+:|)$tc+?(, $tc+|)$/", $t, $m ) && "$m[1]$m[2]" != '' ) {
4579  $text = preg_replace( $p2, "[[$m[1]\\1$m[2]|\\1]]", $text );
4580  } else {
4581  # if there's no context, don't bother duplicating the title
4582  $text = preg_replace( $p2, '[[\\1]]', $text );
4583  }
4584 
4585  return $text;
4586  }
4587 
4602  public function getUserSig( User $user, $nickname = false, $fancySig = null ) {
4603  $username = $user->getName();
4604 
4605  # If not given, retrieve from the user object.
4606  if ( $nickname === false ) {
4607  $nickname = $user->getOption( 'nickname' );
4608  }
4609 
4610  if ( $fancySig === null ) {
4611  $fancySig = $user->getBoolOption( 'fancysig' );
4612  }
4613 
4614  if ( $nickname === null || $nickname === '' ) {
4615  $nickname = $username;
4616  } elseif ( mb_strlen( $nickname ) > $this->svcOptions->get( 'MaxSigChars' ) ) {
4617  $nickname = $username;
4618  $this->logger->debug( __METHOD__ . ": $username has overlong signature." );
4619  } elseif ( $fancySig !== false ) {
4620  # Sig. might contain markup; validate this
4621  $isValid = $this->validateSig( $nickname ) !== false;
4622 
4623  # New validator
4624  $sigValidation = $this->svcOptions->get( 'SignatureValidation' );
4625  if ( $isValid && $sigValidation === 'disallow' ) {
4626  $validator = new SignatureValidator(
4627  $user,
4628  null,
4629  $this->mOptions
4630  );
4631  $isValid = !$validator->validateSignature( $nickname );
4632  }
4633 
4634  if ( $isValid ) {
4635  # Validated; clean up (if needed) and return it
4636  return $this->cleanSig( $nickname, true );
4637  } else {
4638  # Failed to validate; fall back to the default
4639  $nickname = $username;
4640  $this->logger->debug( __METHOD__ . ": $username has invalid signature." );
4641  }
4642  }
4643 
4644  # Make sure nickname doesnt get a sig in a sig
4645  $nickname = self::cleanSigInSig( $nickname );
4646 
4647  # If we're still here, make it a link to the user page
4648  $userText = wfEscapeWikiText( $username );
4649  $nickText = wfEscapeWikiText( $nickname );
4650  $msgName = $user->isAnon() ? 'signature-anon' : 'signature';
4651 
4652  return wfMessage( $msgName, $userText, $nickText )->inContentLanguage()
4653  ->title( $this->getTitle() )->text();
4654  }
4655 
4662  public function validateSig( $text ) {
4663  return Xml::isWellFormedXmlFragment( $text ) ? $text : false;
4664  }
4665 
4676  public function cleanSig( $text, $parsing = false ) {
4677  if ( !$parsing ) {
4678  global $wgTitle;
4679  $magicScopeVariable = $this->lock();
4680  $this->startParse(
4681  $wgTitle,
4684  true
4685  );
4686  }
4687 
4688  # Option to disable this feature
4689  if ( !$this->mOptions->getCleanSignatures() ) {
4690  return $text;
4691  }
4692 
4693  # @todo FIXME: Regex doesn't respect extension tags or nowiki
4694  # => Move this logic to braceSubstitution()
4695  $substWord = $this->magicWordFactory->get( 'subst' );
4696  $substRegex = '/\{\{(?!(?:' . $substWord->getBaseRegex() . '))/x' . $substWord->getRegexCase();
4697  $substText = '{{' . $substWord->getSynonym( 0 );
4698 
4699  $text = preg_replace( $substRegex, $substText, $text );
4700  $text = self::cleanSigInSig( $text );
4701  $dom = $this->preprocessToDom( $text );
4702  $frame = $this->getPreprocessor()->newFrame();
4703  $text = $frame->expand( $dom );
4704 
4705  if ( !$parsing ) {
4706  $text = $this->mStripState->unstripBoth( $text );
4707  }
4708 
4709  return $text;
4710  }
4711 
4718  public static function cleanSigInSig( $text ) {
4719  $text = preg_replace( '/~{3,5}/', '', $text );
4720  return $text;
4721  }
4722 
4733  public function startExternalParse( ?Title $title, ParserOptions $options,
4734  $outputType, $clearState = true, $revId = null
4735  ) {
4736  $this->startParse( $title, $options, $outputType, $clearState );
4737  if ( $revId !== null ) {
4738  $this->mRevisionId = $revId;
4739  }
4740  }
4741 
4748  private function startParse( ?Title $title, ParserOptions $options,
4749  $outputType, $clearState = true
4750  ) {
4751  $this->setTitle( $title );
4752  $this->mOptions = $options;
4753  $this->setOutputType( $outputType );
4754  if ( $clearState ) {
4755  $this->clearState();
4756  }
4757  }
4758 
4767  public function transformMsg( $text, ParserOptions $options, Title $title = null ) {
4768  static $executing = false;
4769 
4770  # Guard against infinite recursion
4771  if ( $executing ) {
4772  return $text;
4773  }
4774  $executing = true;
4775 
4776  if ( !$title ) {
4777  global $wgTitle;
4778  $title = $wgTitle;
4779  }
4780 
4781  $text = $this->preprocess( $text, $title, $options );
4782 
4783  $executing = false;
4784  return $text;
4785  }
4786 
4811  public function setHook( $tag, callable $callback ) {
4812  $tag = strtolower( $tag );
4813  if ( preg_match( '/[<>\r\n]/', $tag, $m ) ) {
4814  throw new MWException( "Invalid character {$m[0]} in setHook('$tag', ...) call" );
4815  }
4816  $oldVal = $this->mTagHooks[$tag] ?? null;
4817  $this->mTagHooks[$tag] = $callback;
4818  if ( !in_array( $tag, $this->mStripList ) ) {
4819  $this->mStripList[] = $tag;
4820  }
4821 
4822  return $oldVal;
4823  }
4824 
4828  public function clearTagHooks() {
4829  $this->mTagHooks = [];
4830  $this->mStripList = [];
4831  }
4832 
4876  public function setFunctionHook( $id, callable $callback, $flags = 0 ) {
4877  $oldVal = $this->mFunctionHooks[$id][0] ?? null;
4878  $this->mFunctionHooks[$id] = [ $callback, $flags ];
4879 
4880  # Add to function cache
4881  $mw = $this->magicWordFactory->get( $id );
4882  if ( !$mw ) {
4883  throw new MWException( __METHOD__ . '() expecting a magic word identifier.' );
4884  }
4885 
4886  $synonyms = $mw->getSynonyms();
4887  $sensitive = intval( $mw->isCaseSensitive() );
4888 
4889  foreach ( $synonyms as $syn ) {
4890  # Case
4891  if ( !$sensitive ) {
4892  $syn = $this->contLang->lc( $syn );
4893  }
4894  # Add leading hash
4895  if ( !( $flags & self::SFH_NO_HASH ) ) {
4896  $syn = '#' . $syn;
4897  }
4898  # Remove trailing colon
4899  if ( substr( $syn, -1, 1 ) === ':' ) {
4900  $syn = substr( $syn, 0, -1 );
4901  }
4902  $this->mFunctionSynonyms[$sensitive][$syn] = $id;
4903  }
4904  return $oldVal;
4905  }
4906 
4912  public function getFunctionHooks() {
4913  $this->firstCallInit();
4914  return array_keys( $this->mFunctionHooks );
4915  }
4916 
4925  public function replaceLinkHolders( &$text, $options = 0 ) {
4926  $this->replaceLinkHoldersPrivate( $text, $options );
4927  }
4928 
4936  private function replaceLinkHoldersPrivate( &$text, $options = 0 ) {
4937  $this->mLinkHolders->replace( $text );
4938  }
4939 
4947  private function replaceLinkHoldersText( $text ) {
4948  return $this->mLinkHolders->replaceText( $text );
4949  }
4950 
4965  public function renderImageGallery( $text, array $params ) {
4966  $mode = false;
4967  if ( isset( $params['mode'] ) ) {
4968  $mode = $params['mode'];
4969  }
4970 
4971  try {
4972  $ig = ImageGalleryBase::factory( $mode );
4973  } catch ( Exception $e ) {
4974  // If invalid type set, fallback to default.
4975  $ig = ImageGalleryBase::factory( false );
4976  }
4977 
4978  $ig->setContextTitle( $this->getTitle() );
4979  $ig->setShowBytes( false );
4980  $ig->setShowDimensions( false );
4981  $ig->setShowFilename( false );
4982  $ig->setParser( $this );
4983  $ig->setHideBadImages();
4984  $ig->setAttributes( Sanitizer::validateTagAttributes( $params, 'ul' ) );
4985 
4986  if ( isset( $params['showfilename'] ) ) {
4987  $ig->setShowFilename( true );
4988  } else {
4989  $ig->setShowFilename( false );
4990  }
4991  if ( isset( $params['caption'] ) ) {
4992  // NOTE: We aren't passing a frame here or below. Frame info
4993  // is currently opaque to Parsoid, which acts on OT_PREPROCESS.
4994  // See T107332#4030581
4995  $caption = $this->recursiveTagParse( $params['caption'] );
4996  $ig->setCaptionHtml( $caption );
4997  }
4998  if ( isset( $params['perrow'] ) ) {
4999  $ig->setPerRow( $params['perrow'] );
5000  }
5001  if ( isset( $params['widths'] ) ) {
5002  $ig->setWidths( $params['widths'] );
5003  }
5004  if ( isset( $params['heights'] ) ) {
5005  $ig->setHeights( $params['heights'] );
5006  }
5007  $ig->setAdditionalOptions( $params );
5008 
5009  $this->hookRunner->onBeforeParserrenderImageGallery( $this, $ig );
5010 
5011  $lines = StringUtils::explode( "\n", $text );
5012  foreach ( $lines as $line ) {
5013  # match lines like these:
5014  # Image:someimage.jpg|This is some image
5015  $matches = [];
5016  preg_match( "/^([^|]+)(\\|(.*))?$/", $line, $matches );
5017  # Skip empty lines
5018  if ( count( $matches ) == 0 ) {
5019  continue;
5020  }
5021 
5022  if ( strpos( $matches[0], '%' ) !== false ) {
5023  $matches[1] = rawurldecode( $matches[1] );
5024  }
5026  if ( $title === null ) {
5027  # Bogus title. Ignore these so we don't bomb out later.
5028  continue;
5029  }
5030 
5031  # We need to get what handler the file uses, to figure out parameters.
5032  # Note, a hook can overide the file name, and chose an entirely different
5033  # file (which potentially could be of a different type and have different handler).
5034  $options = [];
5035  $descQuery = false;
5036  $this->hookRunner->onBeforeParserFetchFileAndTitle(
5037  $this, $title, $options, $descQuery );
5038  # Don't register it now, as TraditionalImageGallery does that later.
5039  $file = $this->fetchFileNoRegister( $title, $options );
5040  $handler = $file ? $file->getHandler() : false;
5041 
5042  $paramMap = [
5043  'img_alt' => 'gallery-internal-alt',
5044  'img_link' => 'gallery-internal-link',
5045  ];
5046  if ( $handler ) {
5047  $paramMap += $handler->getParamMap();
5048  // We don't want people to specify per-image widths.
5049  // Additionally the width parameter would need special casing anyhow.
5050  unset( $paramMap['img_width'] );
5051  }
5052 
5053  $mwArray = $this->magicWordFactory->newArray( array_keys( $paramMap ) );
5054 
5055  $label = '';
5056  $alt = '';
5057  $link = '';
5058  $handlerOptions = [];
5059  if ( isset( $matches[3] ) ) {
5060  // look for an |alt= definition while trying not to break existing
5061  // captions with multiple pipes (|) in it, until a more sensible grammar
5062  // is defined for images in galleries
5063 
5064  // FIXME: Doing recursiveTagParse at this stage, and the trim before
5065  // splitting on '|' is a bit odd, and different from makeImage.
5066  $matches[3] = $this->recursiveTagParse( trim( $matches[3] ) );
5067  // Protect LanguageConverter markup
5068  $parameterMatches = StringUtils::delimiterExplode(
5069  '-{', '}-', '|', $matches[3], true /* nested */
5070  );
5071 
5072  foreach ( $parameterMatches as $parameterMatch ) {
5073  list( $magicName, $match ) = $mwArray->matchVariableStartToEnd( $parameterMatch );
5074  if ( $magicName ) {
5075  $paramName = $paramMap[$magicName];
5076 
5077  switch ( $paramName ) {
5078  case 'gallery-internal-alt':
5079  $alt = $this->stripAltText( $match, false );
5080  break;
5081  case 'gallery-internal-link':
5082  $linkValue = $this->stripAltText( $match, false );
5083  if ( preg_match( '/^-{R|(.*)}-$/', $linkValue ) ) {
5084  // Result of LanguageConverter::markNoConversion
5085  // invoked on an external link.
5086  $linkValue = substr( $linkValue, 4, -2 );
5087  }
5088  list( $type, $target ) = $this->parseLinkParameter( $linkValue );
5089  if ( $type === 'link-url' ) {
5090  $link = $target;
5091  $this->mOutput->addExternalLink( $target );
5092  } elseif ( $type === 'link-title' ) {
5093  $link = $target->getLinkURL();
5094  $this->mOutput->addLink( $target );
5095  }
5096  break;
5097  default:
5098  // Must be a handler specific parameter.
5099  if ( $handler->validateParam( $paramName, $match ) ) {
5100  $handlerOptions[$paramName] = $match;
5101  } else {
5102  // Guess not, consider it as caption.
5103  $this->logger->debug(
5104  "$parameterMatch failed parameter validation" );
5105  $label = $parameterMatch;
5106  }
5107  }
5108 
5109  } else {
5110  // Last pipe wins.
5111  $label = $parameterMatch;
5112  }
5113  }
5114  }
5115 
5116  $ig->add( $title, $label, $alt, $link, $handlerOptions );
5117  }
5118  $html = $ig->toHTML();
5119  $this->hookRunner->onAfterParserFetchFileAndTitle( $this, $ig, $html );
5120  return $html;
5121  }
5122 
5127  private function getImageParams( $handler ) {
5128  if ( $handler ) {
5129  $handlerClass = get_class( $handler );
5130  } else {
5131  $handlerClass = '';
5132  }
5133  if ( !isset( $this->mImageParams[$handlerClass] ) ) {
5134  # Initialise static lists
5135  static $internalParamNames = [
5136  'horizAlign' => [ 'left', 'right', 'center', 'none' ],
5137  'vertAlign' => [ 'baseline', 'sub', 'super', 'top', 'text-top', 'middle',
5138  'bottom', 'text-bottom' ],
5139  'frame' => [ 'thumbnail', 'manualthumb', 'framed', 'frameless',
5140  'upright', 'border', 'link', 'alt', 'class' ],
5141  ];
5142  static $internalParamMap;
5143  if ( !$internalParamMap ) {
5144  $internalParamMap = [];
5145  foreach ( $internalParamNames as $type => $names ) {
5146  foreach ( $names as $name ) {
5147  // For grep: img_left, img_right, img_center, img_none,
5148  // img_baseline, img_sub, img_super, img_top, img_text_top, img_middle,
5149  // img_bottom, img_text_bottom,
5150  // img_thumbnail, img_manualthumb, img_framed, img_frameless, img_upright,
5151  // img_border, img_link, img_alt, img_class
5152  $magicName = str_replace( '-', '_', "img_$name" );
5153  $internalParamMap[$magicName] = [ $type, $name ];
5154  }
5155  }
5156  }
5157 
5158  # Add handler params
5159  $paramMap = $internalParamMap;
5160  if ( $handler ) {
5161  $handlerParamMap = $handler->getParamMap();
5162  foreach ( $handlerParamMap as $magic => $paramName ) {
5163  $paramMap[$magic] = [ 'handler', $paramName ];
5164  }
5165  }
5166  $this->mImageParams[$handlerClass] = $paramMap;
5167  $this->mImageParamsMagicArray[$handlerClass] =
5168  $this->magicWordFactory->newArray( array_keys( $paramMap ) );
5169  }
5170  return [ $this->mImageParams[$handlerClass], $this->mImageParamsMagicArray[$handlerClass] ];
5171  }
5172 
5181  public function makeImage( Title $title, $options, $holders = false ) {
5182  # Check if the options text is of the form "options|alt text"
5183  # Options are:
5184  # * thumbnail make a thumbnail with enlarge-icon and caption, alignment depends on lang
5185  # * left no resizing, just left align. label is used for alt= only
5186  # * right same, but right aligned
5187  # * none same, but not aligned
5188  # * ___px scale to ___ pixels width, no aligning. e.g. use in taxobox
5189  # * center center the image
5190  # * frame Keep original image size, no magnify-button.
5191  # * framed Same as "frame"
5192  # * frameless like 'thumb' but without a frame. Keeps user preferences for width
5193  # * upright reduce width for upright images, rounded to full __0 px
5194  # * border draw a 1px border around the image
5195  # * alt Text for HTML alt attribute (defaults to empty)
5196  # * class Set a class for img node
5197  # * link Set the target of the image link. Can be external, interwiki, or local
5198  # vertical-align values (no % or length right now):
5199  # * baseline
5200  # * sub
5201  # * super
5202  # * top
5203  # * text-top
5204  # * middle
5205  # * bottom
5206  # * text-bottom
5207 
5208  # Protect LanguageConverter markup when splitting into parts
5210  '-{', '}-', '|', $options, true /* allow nesting */
5211  );
5212 
5213  # Give extensions a chance to select the file revision for us
5214  $options = [];
5215  $descQuery = false;
5216  $this->hookRunner->onBeforeParserFetchFileAndTitle(
5217  $this, $title, $options, $descQuery );
5218  # Fetch and register the file (file title may be different via hooks)
5219  list( $file, $title ) = $this->fetchFileAndTitle( $title, $options );
5220 
5221  # Get parameter map
5222  $handler = $file ? $file->getHandler() : false;
5223 
5224  list( $paramMap, $mwArray ) = $this->getImageParams( $handler );
5225 
5226  if ( !$file ) {
5227  $this->addTrackingCategory( 'broken-file-category' );
5228  }
5229 
5230  # Process the input parameters
5231  $caption = '';
5232  $params = [ 'frame' => [], 'handler' => [],
5233  'horizAlign' => [], 'vertAlign' => [] ];
5234  $seenformat = false;
5235  foreach ( $parts as $part ) {
5236  $part = trim( $part );
5237  list( $magicName, $value ) = $mwArray->matchVariableStartToEnd( $part );
5238  $validated = false;
5239  if ( isset( $paramMap[$magicName] ) ) {
5240  list( $type, $paramName ) = $paramMap[$magicName];
5241 
5242  # Special case; width and height come in one variable together
5243  if ( $type === 'handler' && $paramName === 'width' ) {
5244  $parsedWidthParam = self::parseWidthParam( $value );
5245  if ( isset( $parsedWidthParam['width'] ) ) {
5246  $width = $parsedWidthParam['width'];
5247  if ( $handler->validateParam( 'width', $width ) ) {
5248  $params[$type]['width'] = $width;
5249  $validated = true;
5250  }
5251  }
5252  if ( isset( $parsedWidthParam['height'] ) ) {
5253  $height = $parsedWidthParam['height'];
5254  if ( $handler->validateParam( 'height', $height ) ) {
5255  $params[$type]['height'] = $height;
5256  $validated = true;
5257  }
5258  }
5259  # else no validation -- T15436
5260  } else {
5261  if ( $type === 'handler' ) {
5262  # Validate handler parameter
5263  $validated = $handler->validateParam( $paramName, $value );
5264  } else {
5265  # Validate internal parameters
5266  switch ( $paramName ) {
5267  case 'manualthumb':
5268  case 'alt':
5269  case 'class':
5270  # @todo FIXME: Possibly check validity here for
5271  # manualthumb? downstream behavior seems odd with
5272  # missing manual thumbs.
5273  $validated = true;
5274  $value = $this->stripAltText( $value, $holders );
5275  break;
5276  case 'link':
5277  list( $paramName, $value ) =
5278  $this->parseLinkParameter(
5279  $this->stripAltText( $value, $holders )
5280  );
5281  if ( $paramName ) {
5282  $validated = true;
5283  if ( $paramName === 'no-link' ) {
5284  $value = true;
5285  }
5286  if ( ( $paramName === 'link-url' ) && $this->mOptions->getExternalLinkTarget() ) {
5287  $params[$type]['link-target'] = $this->mOptions->getExternalLinkTarget();
5288  }
5289  }
5290  break;
5291  case 'frameless':
5292  case 'framed':
5293  case 'thumbnail':
5294  // use first appearing option, discard others.
5295  $validated = !$seenformat;
5296  $seenformat = true;
5297  break;
5298  default:
5299  # Most other things appear to be empty or numeric...
5300  $validated = ( $value === false || is_numeric( trim( $value ) ) );
5301  }
5302  }
5303 
5304  if ( $validated ) {
5305  $params[$type][$paramName] = $value;
5306  }
5307  }
5308  }
5309  if ( !$validated ) {
5310  $caption = $part;
5311  }
5312  }
5313 
5314  # Process alignment parameters
5315  // @phan-suppress-next-line PhanImpossibleCondition
5316  if ( $params['horizAlign'] ) {
5317  $params['frame']['align'] = key( $params['horizAlign'] );
5318  }
5319  // @phan-suppress-next-line PhanImpossibleCondition
5320  if ( $params['vertAlign'] ) {
5321  $params['frame']['valign'] = key( $params['vertAlign'] );
5322  }
5323 
5324  $params['frame']['caption'] = $caption;
5325 
5326  # Will the image be presented in a frame, with the caption below?
5327  $imageIsFramed = isset( $params['frame']['frame'] )
5328  || isset( $params['frame']['framed'] )
5329  || isset( $params['frame']['thumbnail'] )
5330  || isset( $params['frame']['manualthumb'] );
5331 
5332  # In the old days, [[Image:Foo|text...]] would set alt text. Later it
5333  # came to also set the caption, ordinary text after the image -- which
5334  # makes no sense, because that just repeats the text multiple times in
5335  # screen readers. It *also* came to set the title attribute.
5336  # Now that we have an alt attribute, we should not set the alt text to
5337  # equal the caption: that's worse than useless, it just repeats the
5338  # text. This is the framed/thumbnail case. If there's no caption, we
5339  # use the unnamed parameter for alt text as well, just for the time be-
5340  # ing, if the unnamed param is set and the alt param is not.
5341  # For the future, we need to figure out if we want to tweak this more,
5342  # e.g., introducing a title= parameter for the title; ignoring the un-
5343  # named parameter entirely for images without a caption; adding an ex-
5344  # plicit caption= parameter and preserving the old magic unnamed para-
5345  # meter for BC; ...
5346  if ( $imageIsFramed ) { # Framed image
5347  if ( $caption === '' && !isset( $params['frame']['alt'] ) ) {
5348  # No caption or alt text, add the filename as the alt text so
5349  # that screen readers at least get some description of the image
5350  $params['frame']['alt'] = $title->getText();
5351  }
5352  # Do not set $params['frame']['title'] because tooltips don't make sense
5353  # for framed images
5354  } else { # Inline image
5355  if ( !isset( $params['frame']['alt'] ) ) {
5356  # No alt text, use the "caption" for the alt text
5357  if ( $caption !== '' ) {
5358  $params['frame']['alt'] = $this->stripAltText( $caption, $holders );
5359  } else {
5360  # No caption, fall back to using the filename for the
5361  # alt text
5362  $params['frame']['alt'] = $title->getText();
5363  }
5364  }
5365  # Use the "caption" for the tooltip text
5366  $params['frame']['title'] = $this->stripAltText( $caption, $holders );
5367  }
5368  $params['handler']['targetlang'] = $this->getTargetLanguage()->getCode();
5369 
5370  $this->hookRunner->onParserMakeImageParams( $title, $file, $params, $this );
5371 
5372  # Linker does the rest
5373  $time = $options['time'] ?? false;
5374  $ret = Linker::makeImageLink( $this, $title, $file, $params['frame'], $params['handler'],
5375  $time, $descQuery, $this->mOptions->getThumbSize() );
5376 
5377  # Give the handler a chance to modify the parser object
5378  if ( $handler ) {
5379  $handler->parserTransformHook( $this, $file );
5380  }
5381 
5382  return $ret;
5383  }
5384 
5403  private function parseLinkParameter( $value ) {
5404  $chars = self::EXT_LINK_URL_CLASS;
5405  $addr = self::EXT_LINK_ADDR;
5406  $prots = $this->mUrlProtocols;
5407  $type = null;
5408  $target = false;
5409  if ( $value === '' ) {
5410  $type = 'no-link';
5411  } elseif ( preg_match( "/^((?i)$prots)/", $value ) ) {
5412  if ( preg_match( "/^((?i)$prots)$addr$chars*$/u", $value, $m ) ) {
5413  $this->mOutput->addExternalLink( $value );
5414  $type = 'link-url';
5415  $target = $value;
5416  }
5417  } else {
5418  $linkTitle = Title::newFromText( $value );
5419  if ( $linkTitle ) {
5420  $this->mOutput->addLink( $linkTitle );
5421  $type = 'link-title';
5422  $target = $linkTitle;
5423  }
5424  }
5425  return [ $type, $target ];
5426  }
5427 
5433  private function stripAltText( $caption, $holders ) {
5434  # Strip bad stuff out of the title (tooltip). We can't just use
5435  # replaceLinkHoldersText() here, because if this function is called
5436  # from handleInternalLinks2(), mLinkHolders won't be up-to-date.
5437  if ( $holders ) {
5438  $tooltip = $holders->replaceText( $caption );
5439  } else {
5440  $tooltip = $this->replaceLinkHoldersText( $caption );
5441  }
5442 
5443  # make sure there are no placeholders in thumbnail attributes
5444  # that are later expanded to html- so expand them now and
5445  # remove the tags
5446  $tooltip = $this->mStripState->unstripBoth( $tooltip );
5447  # Compatibility hack! In HTML certain entity references not terminated
5448  # by a semicolon are decoded (but not if we're in an attribute; that's
5449  # how link URLs get away without properly escaping & in queries).
5450  # But wikitext has always required semicolon-termination of entities,
5451  # so encode & where needed to avoid decode of semicolon-less entities.
5452  # See T209236 and
5453  # https://www.w3.org/TR/html5/syntax.html#named-character-references
5454  # T210437 discusses moving this workaround to Sanitizer::stripAllTags.
5455  $tooltip = preg_replace( "/
5456  & # 1. entity prefix
5457  (?= # 2. followed by:
5458  (?: # a. one of the legacy semicolon-less named entities
5459  A(?:Elig|MP|acute|circ|grave|ring|tilde|uml)|
5460  C(?:OPY|cedil)|E(?:TH|acute|circ|grave|uml)|
5461  GT|I(?:acute|circ|grave|uml)|LT|Ntilde|
5462  O(?:acute|circ|grave|slash|tilde|uml)|QUOT|REG|THORN|
5463  U(?:acute|circ|grave|uml)|Yacute|
5464  a(?:acute|c(?:irc|ute)|elig|grave|mp|ring|tilde|uml)|brvbar|
5465  c(?:cedil|edil|urren)|cent(?!erdot;)|copy(?!sr;)|deg|
5466  divide(?!ontimes;)|e(?:acute|circ|grave|th|uml)|
5467  frac(?:1(?:2|4)|34)|
5468  gt(?!c(?:c|ir)|dot|lPar|quest|r(?:a(?:pprox|rr)|dot|eq(?:less|qless)|less|sim);)|
5469  i(?:acute|circ|excl|grave|quest|uml)|laquo|
5470  lt(?!c(?:c|ir)|dot|hree|imes|larr|quest|r(?:Par|i(?:e|f|));)|
5471  m(?:acr|i(?:cro|ddot))|n(?:bsp|tilde)|
5472  not(?!in(?:E|dot|v(?:a|b|c)|)|ni(?:v(?:a|b|c)|);)|
5473  o(?:acute|circ|grave|rd(?:f|m)|slash|tilde|uml)|
5474  p(?:lusmn|ound)|para(?!llel;)|quot|r(?:aquo|eg)|
5475  s(?:ect|hy|up(?:1|2|3)|zlig)|thorn|times(?!b(?:ar|)|d;)|
5476  u(?:acute|circ|grave|ml|uml)|y(?:acute|en|uml)
5477  )
5478  (?:[^;]|$)) # b. and not followed by a semicolon
5479  # S = study, for efficiency
5480  /Sx", '&amp;', $tooltip );
5481  $tooltip = Sanitizer::stripAllTags( $tooltip );
5482 
5483  return $tooltip;
5484  }
5485 
5495  public function attributeStripCallback( &$text, $frame = false ) {
5496  wfDeprecated( __METHOD__, '1.35' );
5497  $text = $this->replaceVariables( $text, $frame );
5498  $text = $this->mStripState->unstripBoth( $text );
5499  return $text;
5500  }
5501 
5507  public function getTags() {
5508  $this->firstCallInit();
5509  return array_keys( $this->mTagHooks );
5510  }
5511 
5516  public function getFunctionSynonyms() {
5517  $this->firstCallInit();
5518  return $this->mFunctionSynonyms;
5519  }
5520 
5525  public function getUrlProtocols() {
5526  return $this->mUrlProtocols;
5527  }
5528 
5558  private function extractSections( $text, $sectionId, $mode, $newText = '' ) {
5559  global $wgTitle; # not generally used but removes an ugly failure mode
5560 
5561  $magicScopeVariable = $this->lock();
5562  $this->startParse(
5563  $wgTitle,
5566  true
5567  );
5568  $outText = '';
5569  $frame = $this->getPreprocessor()->newFrame();
5570 
5571  # Process section extraction flags
5572  $flags = 0;
5573  $sectionParts = explode( '-', $sectionId );
5574  $sectionIndex = array_pop( $sectionParts );
5575  foreach ( $sectionParts as $part ) {
5576  if ( $part === 'T' ) {
5578  }
5579  }
5580 
5581  # Check for empty input
5582  if ( strval( $text ) === '' ) {
5583  # Only sections 0 and T-0 exist in an empty document
5584  if ( $sectionIndex == 0 ) {
5585  if ( $mode === 'get' ) {
5586  return '';
5587  }
5588 
5589  return $newText;
5590  } else {
5591  if ( $mode === 'get' ) {
5592  return $newText;
5593  }
5594 
5595  return $text;
5596  }
5597  }
5598 
5599  # Preprocess the text
5600  $root = $this->preprocessToDom( $text, $flags );
5601 
5602  # <h> nodes indicate section breaks
5603  # They can only occur at the top level, so we can find them by iterating the root's children
5604  $node = $root->getFirstChild();
5605 
5606  # Find the target section
5607  if ( $sectionIndex == 0 ) {
5608  # Section zero doesn't nest, level=big
5609  $targetLevel = 1000;
5610  } else {
5611  while ( $node ) {
5612  if ( $node->getName() === 'h' ) {
5613  $bits = $node->splitHeading();
5614  if ( $bits['i'] == $sectionIndex ) {
5615  $targetLevel = $bits['level'];
5616  break;
5617  }
5618  }
5619  if ( $mode === 'replace' ) {
5620  $outText .= $frame->expand( $node, PPFrame::RECOVER_ORIG );
5621  }
5622  $node = $node->getNextSibling();
5623  }
5624  }
5625 
5626  if ( !$node ) {
5627  # Not found
5628  if ( $mode === 'get' ) {
5629  return $newText;
5630  } else {
5631  return $text;
5632  }
5633  }
5634 
5635  # Find the end of the section, including nested sections
5636  do {
5637  if ( $node->getName() === 'h' ) {
5638  $bits = $node->splitHeading();
5639  $curLevel = $bits['level'];
5640  if ( $bits['i'] != $sectionIndex && $curLevel <= $targetLevel ) {
5641  break;
5642  }
5643  }
5644  if ( $mode === 'get' ) {
5645  $outText .= $frame->expand( $node, PPFrame::RECOVER_ORIG );
5646  }
5647  $node = $node->getNextSibling();
5648  } while ( $node );
5649 
5650  # Write out the remainder (in replace mode only)
5651  if ( $mode === 'replace' ) {
5652  # Output the replacement text
5653  # Add two newlines on -- trailing whitespace in $newText is conventionally
5654  # stripped by the editor, so we need both newlines to restore the paragraph gap
5655  # Only add trailing whitespace if there is newText
5656  if ( $newText != "" ) {
5657  $outText .= $newText . "\n\n";
5658  }
5659 
5660  while ( $node ) {
5661  $outText .= $frame->expand( $node, PPFrame::RECOVER_ORIG );
5662  $node = $node->getNextSibling();
5663  }
5664  }
5665 
5666  # Re-insert stripped tags
5667  $outText = rtrim( $this->mStripState->unstripBoth( $outText ) );
5668 
5669  return $outText;
5670  }
5671 
5686  public function getSection( $text, $sectionId, $defaultText = '' ) {
5687  return $this->extractSections( $text, $sectionId, 'get', $defaultText );
5688  }
5689 
5702  public function replaceSection( $oldText, $sectionId, $newText ) {
5703  return $this->extractSections( $oldText, $sectionId, 'replace', $newText );
5704  }
5705 
5735  public function getFlatSectionInfo( $text ) {
5736  $magicScopeVariable = $this->lock();
5737  $this->startParse(
5738  null,
5741  true
5742  );
5743  $frame = $this->getPreprocessor()->newFrame();
5744  $root = $this->preprocessToDom( $text, 0 );
5745  $node = $root->getFirstChild();
5746  $offset = 0;
5747  $currentSection = [
5748  'index' => 0,
5749  'level' => 0,
5750  'offset' => 0,
5751  'heading' => '',
5752  'text' => ''
5753  ];
5754  $sections = [];
5755 
5756  while ( $node ) {
5757  $nodeText = $frame->expand( $node, PPFrame::RECOVER_ORIG );
5758  if ( $node->getName() === 'h' ) {
5759  $bits = $node->splitHeading();
5760  $sections[] = $currentSection;
5761  $currentSection = [
5762  'index' => $bits['i'],
5763  'level' => $bits['level'],
5764  'offset' => $offset,
5765  'heading' => $nodeText,
5766  'text' => $nodeText
5767  ];
5768  } else {
5769  $currentSection['text'] .= $nodeText;
5770  }
5771  $offset += strlen( $nodeText );
5772  $node = $node->getNextSibling();
5773  }
5774  $sections[] = $currentSection;
5775  return $sections;
5776  }
5777 
5788  public function getRevisionId() {
5789  return $this->mRevisionId;
5790  }
5791 
5799  public function getRevisionObject() {
5800  wfDeprecated( __METHOD__, '1.35' );
5801 
5802  if ( $this->mRevisionObject ) {
5803  return $this->mRevisionObject;
5804  }
5805 
5806  $this->mRevisionObject = null;
5807 
5808  $revRecord = $this->getRevisionRecordObject();
5809  if ( $revRecord ) {
5810  $this->mRevisionObject = new Revision( $revRecord );
5811  }
5812 
5813  return $this->mRevisionObject;
5814  }
5815 
5822  public function getRevisionRecordObject() {
5823  if ( $this->mRevisionRecordObject ) {
5824  return $this->mRevisionRecordObject;
5825  }
5826 
5827  // NOTE: try to get the RevisionObject even if mRevisionId is null.
5828  // This is useful when parsing a revision that has not yet been saved.
5829  // However, if we get back a saved revision even though we are in
5830  // preview mode, we'll have to ignore it, see below.
5831  // NOTE: This callback may be used to inject an OLD revision that was
5832  // already loaded, so "current" is a bit of a misnomer. We can't just
5833  // skip it if mRevisionId is set.
5834  $rev = call_user_func(
5835  $this->mOptions->getCurrentRevisionRecordCallback(),
5836  $this->getTitle(),
5837  $this
5838  );
5839 
5840  if ( $rev === false ) {
5841  // The revision record callback returns `false` (not null) to
5842  // indicate that the revision is missing. (See for example
5843  // Parser::statelessFetchRevisionRecord(), the default callback.)
5844  // This API expects `null` instead. (T251952)
5845  $rev = null;
5846  }
5847 
5848  if ( $this->mRevisionId === null && $rev && $rev->getId() ) {
5849  // We are in preview mode (mRevisionId is null), and the current revision callback
5850  // returned an existing revision. Ignore it and return null, it's probably the page's
5851  // current revision, which is not what we want here. Note that we do want to call the
5852  // callback to allow the unsaved revision to be injected here, e.g. for
5853  // self-transclusion previews.
5854  return null;
5855  }
5856 
5857  // If the parse is for a new revision, then the callback should have
5858  // already been set to force the object and should match mRevisionId.
5859  // If not, try to fetch by mRevisionId for sanity.
5860  if ( $this->mRevisionId && $rev && $rev->getId() != $this->mRevisionId ) {
5861  $rev = MediaWikiServices::getInstance()
5862  ->getRevisionLookup()
5863  ->getRevisionById( $this->mRevisionId );
5864  }
5865 
5866  $this->mRevisionRecordObject = $rev;
5867 
5868  return $this->mRevisionRecordObject;
5869  }
5870 
5876  public function getRevisionTimestamp() {
5877  if ( $this->mRevisionTimestamp !== null ) {
5878  return $this->mRevisionTimestamp;
5879  }
5880 
5881  # Use specified revision timestamp, falling back to the current timestamp
5882  $revObject = $this->getRevisionRecordObject();
5883  $timestamp = $revObject ? $revObject->getTimestamp() : $this->mOptions->getTimestamp();
5884  $this->mOutput->setRevisionTimestampUsed( $timestamp ); // unadjusted time zone
5885 
5886  # The cryptic '' timezone parameter tells to use the site-default
5887  # timezone offset instead of the user settings.
5888  # Since this value will be saved into the parser cache, served
5889  # to other users, and potentially even used inside links and such,
5890  # it needs to be consistent for all visitors.
5891  $this->mRevisionTimestamp = $this->contLang->userAdjust( $timestamp, '' );
5892 
5893  return $this->mRevisionTimestamp;
5894  }
5895 
5901  public function getRevisionUser(): ?string {
5902  if ( $this->mRevisionUser === null ) {
5903  $revObject = $this->getRevisionRecordObject();
5904 
5905  # if this template is subst: the revision id will be blank,
5906  # so just use the current user's name
5907  if ( $revObject && $revObject->getUser() ) {
5908  $this->mRevisionUser = $revObject->getUser()->getName();
5909  } elseif ( $this->ot['wiki'] || $this->mOptions->getIsPreview() ) {
5910  $this->mRevisionUser = $this->getUser()->getName();
5911  } else {
5912  # Note that we fall through here with
5913  # $this->mRevisionUser still null
5914  }
5915  }
5916  return $this->mRevisionUser;
5917  }
5918 
5924  public function getRevisionSize() {
5925  if ( $this->mRevisionSize === null ) {
5926  $revObject = $this->getRevisionRecordObject();
5927 
5928  # if this variable is subst: the revision id will be blank,
5929  # so just use the parser input size, because the own substituation
5930  # will change the size.
5931  if ( $revObject ) {
5932  $this->mRevisionSize = $revObject->getSize();
5933  } else {
5934  $this->mRevisionSize = $this->mInputSize;
5935  }
5936  }
5937  return $this->mRevisionSize;
5938  }
5939 
5945  public function setDefaultSort( $sort ) {
5946  $this->mDefaultSort = $sort;
5947  $this->mOutput->setProperty( 'defaultsort', $sort );
5948  }
5949 
5960  public function getDefaultSort() {
5961  if ( $this->mDefaultSort !== false ) {
5962  return $this->mDefaultSort;
5963  } else {
5964  return '';
5965  }
5966  }
5967 
5974  public function getCustomDefaultSort() {
5975  return $this->mDefaultSort;
5976  }
5977 
5978  private static function getSectionNameFromStrippedText( $text ) {
5980  $text = Sanitizer::decodeCharReferences( $text );
5981  $text = self::normalizeSectionName( $text );
5982  return $text;
5983  }
5984 
5985  private static function makeAnchor( $sectionName ) {
5986  return '#' . Sanitizer::escapeIdForLink( $sectionName );
5987  }
5988 
5989  private function makeLegacyAnchor( $sectionName ) {
5990  $fragmentMode = $this->svcOptions->get( 'FragmentMode' );
5991  if ( isset( $fragmentMode[1] ) && $fragmentMode[1] === 'legacy' ) {
5992  // ForAttribute() and ForLink() are the same for legacy encoding
5994  } else {
5995  $id = Sanitizer::escapeIdForLink( $sectionName );
5996  }
5997 
5998  return "#$id";
5999  }
6000 
6009  public function guessSectionNameFromWikiText( $text ) {
6010  # Strip out wikitext links(they break the anchor)
6011  $text = $this->stripSectionName( $text );
6012  $sectionName = self::getSectionNameFromStrippedText( $text );
6013  return self::makeAnchor( $sectionName );
6014  }
6015 
6025  public function guessLegacySectionNameFromWikiText( $text ) {
6026  # Strip out wikitext links(they break the anchor)
6027  $text = $this->stripSectionName( $text );
6028  $sectionName = self::getSectionNameFromStrippedText( $text );
6029  return $this->makeLegacyAnchor( $sectionName );
6030  }
6031 
6037  public static function guessSectionNameFromStrippedText( $text ) {
6038  $sectionName = self::getSectionNameFromStrippedText( $text );
6039  return self::makeAnchor( $sectionName );
6040  }
6041 
6048  private static function normalizeSectionName( $text ) {
6049  # T90902: ensure the same normalization is applied for IDs as to links
6050 
6051  $titleParser = MediaWikiServices::getInstance()->getTitleParser();
6052  '@phan-var MediaWikiTitleCodec $titleParser';
6053  try {
6054 
6055  $parts = $titleParser->splitTitleString( "#$text" );
6056  } catch ( MalformedTitleException $ex ) {
6057  return $text;
6058  }
6059  return $parts['fragment'];
6060  }
6061 
6076  public function stripSectionName( $text ) {
6077  # Strip internal link markup
6078  $text = preg_replace( '/\[\[:?([^[|]+)\|([^[]+)\]\]/', '$2', $text );
6079  $text = preg_replace( '/\[\[:?([^[]+)\|?\]\]/', '$1', $text );
6080 
6081  # Strip external link markup
6082  # @todo FIXME: Not tolerant to blank link text
6083  # I.E. [https://www.mediawiki.org] will render as [1] or something depending
6084  # on how many empty links there are on the page - need to figure that out.
6085  $text = preg_replace( '/\[(?i:' . $this->mUrlProtocols . ')([^ ]+?) ([^[]+)\]/', '$2', $text );
6086 
6087  # Parse wikitext quotes (italics & bold)
6088  $text = $this->doQuotes( $text );
6089 
6090  # Strip HTML tags
6091  $text = StringUtils::delimiterReplace( '<', '>', '', $text );
6092  return $text;
6093  }
6094 
6105  private function fuzzTestSrvus( $text, Title $title, ParserOptions $options,
6106  $outputType = self::OT_HTML
6107  ) {
6108  $magicScopeVariable = $this->lock();
6109  $this->startParse( $title, $options, $outputType, true );
6110 
6111  $text = $this->replaceVariables( $text );
6112  $text = $this->mStripState->unstripBoth( $text );
6113  $text = Sanitizer::removeHTMLtags( $text );
6114  return $text;
6115  }
6116 
6123  private function fuzzTestPst( $text, Title $title, ParserOptions $options ) {
6124  return $this->preSaveTransform( $text, $title, $options->getUser(), $options );
6125  }
6126 
6133  private function fuzzTestPreprocess( $text, Title $title, ParserOptions $options ) {
6134  return $this->fuzzTestSrvus( $text, $title, $options, self::OT_PREPROCESS );
6135  }
6136 
6154  public function markerSkipCallback( $s, callable $callback ) {
6155  $i = 0;
6156  $out = '';
6157  while ( $i < strlen( $s ) ) {
6158  $markerStart = strpos( $s, self::MARKER_PREFIX, $i );
6159  if ( $markerStart === false ) {
6160  $out .= call_user_func( $callback, substr( $s, $i ) );
6161  break;
6162  } else {
6163  $out .= call_user_func( $callback, substr( $s, $i, $markerStart - $i ) );
6164  $markerEnd = strpos( $s, self::MARKER_SUFFIX, $markerStart );
6165  if ( $markerEnd === false ) {
6166  $out .= substr( $s, $markerStart );
6167  break;
6168  } else {
6169  $markerEnd += strlen( self::MARKER_SUFFIX );
6170  $out .= substr( $s, $markerStart, $markerEnd - $markerStart );
6171  $i = $markerEnd;
6172  }
6173  }
6174  }
6175  return $out;
6176  }
6177 
6184  public function killMarkers( $text ) {
6185  return $this->mStripState->killMarkers( $text );
6186  }
6187 
6198  public static function parseWidthParam( $value, $parseHeight = true ) {
6199  $parsedWidthParam = [];
6200  if ( $value === '' ) {
6201  return $parsedWidthParam;
6202  }
6203  $m = [];
6204  # (T15500) In both cases (width/height and width only),
6205  # permit trailing "px" for backward compatibility.
6206  if ( $parseHeight && preg_match( '/^([0-9]*)x([0-9]*)\s*(?:px)?\s*$/', $value, $m ) ) {
6207  $width = intval( $m[1] );
6208  $height = intval( $m[2] );
6209  $parsedWidthParam['width'] = $width;
6210  $parsedWidthParam['height'] = $height;
6211  } elseif ( preg_match( '/^[0-9]*\s*(?:px)?\s*$/', $value ) ) {
6212  $width = intval( $value );
6213  $parsedWidthParam['width'] = $width;
6214  }
6215  return $parsedWidthParam;
6216  }
6217 
6227  protected function lock() {
6228  if ( $this->mInParse ) {
6229  throw new MWException( "Parser state cleared while parsing. "
6230  . "Did you call Parser::parse recursively? Lock is held by: " . $this->mInParse );
6231  }
6232 
6233  // Save the backtrace when locking, so that if some code tries locking again,
6234  // we can print the lock owner's backtrace for easier debugging
6235  $e = new Exception;
6236  $this->mInParse = $e->getTraceAsString();
6237 
6238  $recursiveCheck = new ScopedCallback( function () {
6239  $this->mInParse = false;
6240  } );
6241 
6242  return $recursiveCheck;
6243  }
6244 
6255  public static function stripOuterParagraph( $html ) {
6256  $m = [];
6257  if ( preg_match( '/^<p>(.*)\n?<\/p>\n?$/sU', $html, $m ) && strpos( $m[1], '</p>' ) === false ) {
6258  $html = $m[1];
6259  }
6260 
6261  return $html;
6262  }
6263 
6274  public function getFreshParser() {
6275  if ( $this->mInParse ) {
6276  return $this->factory->create();
6277  } else {
6278  return $this;
6279  }
6280  }
6281 
6289  public function enableOOUI() {
6290  wfDeprecated( __METHOD__, '1.35' );
6292  $this->mOutput->setEnableOOUI( true );
6293  }
6294 
6301  private function setOutputFlag( string $flag, string $reason ): void {
6302  $this->mOutput->setFlag( $flag );
6303  $name = $this->getTitle()->getPrefixedText();
6304  $this->logger->debug( __METHOD__ . ": set $flag flag on '$name'; $reason" );
6305  }
6306 }
Parser\$badFileLookup
BadFileLookup $badFileLookup
Definition: Parser.php:356
Parser\getFunctionHooks
getFunctionHooks()
Get all registered function hook identifiers.
Definition: Parser.php:4912
Parser\$mLinkRenderer
LinkRenderer $mLinkRenderer
Definition: Parser.php:320
Parser\$mForceTocPosition
$mForceTocPosition
Definition: Parser.php:245
Parser\recursivePreprocess
recursivePreprocess( $text, $frame=false)
Recursive parser entry point that can be called from an extension tag hook.
Definition: Parser.php:937
Parser\getContentLanguageConverter
getContentLanguageConverter()
Shorthand for getting a Language Converter for Content language.
Definition: Parser.php:1617
ParserOptions
Set options of the Parser.
Definition: ParserOptions.php:44
Parser\attributeStripCallback
attributeStripCallback(&$text, $frame=false)
Callback from the Sanitizer for expanding items found in HTML attribute values, so they can be safely...
Definition: Parser.php:5495
PPFrame\loopCheck
loopCheck( $title)
Returns true if the infinite loop check is OK, false if a loop is detected.
Parser\$mSubstWords
MagicWordArray $mSubstWords
Definition: Parser.php:182
Parser\$linkRendererFactory
LinkRendererFactory $linkRendererFactory
Definition: Parser.php:347
Sanitizer\ID_FALLBACK
const ID_FALLBACK
Tells escapeUrlForHtml() to encode the ID using the fallback encoding, or return false if no fallback...
Definition: Sanitizer.php:78
Parser\maybeMakeExternalImage
maybeMakeExternalImage( $url)
make an image if it's allowed, either through the global option, through the exception,...
Definition: Parser.php:2322
MagicWordArray
Class for handling an array of magic words.
Definition: MagicWordArray.php:32
Parser\EXT_LINK_ADDR
const EXT_LINK_ADDR
Definition: Parser.php:106
Revision\RevisionAccessException
Exception representing a failure to look up a revision.
Definition: RevisionAccessException.php:34
FauxRequest
WebRequest clone which takes values from a provided array.
Definition: FauxRequest.php:35
Title\newFromText
static newFromText( $text, $defaultNamespace=NS_MAIN)
Create a new Title from text, such as what one would find in a link.
Definition: Title.php:361
Parser\$mInputSize
$mInputSize
Definition: Parser.php:286
PPFrame\STRIP_COMMENTS
const STRIP_COMMENTS
Definition: PPFrame.php:31
Parser\SPACE_NOT_NL
const SPACE_NOT_NL
Definition: Parser.php:113
HtmlArmor
Marks HTML that shouldn't be escaped.
Definition: HtmlArmor.php:30
Revision\RevisionRecord
Page revision base class.
Definition: RevisionRecord.php:45
Parser\__destruct
__destruct()
Reduce memory usage to reduce the impact of circular references.
Definition: Parser.php:486
Preprocessor\DOM_FOR_INCLUSION
const DOM_FOR_INCLUSION
Transclusion mode flag for Preprocessor::preprocessToObj()
Definition: Preprocessor.php:29
ParserOutput
Definition: ParserOutput.php:31
Parser\$mLinkHolders
LinkHolderArray $mLinkHolders
Definition: Parser.php:217
NS_MEDIAWIKI
const NS_MEDIAWIKI
Definition: Defines.php:71
Parser\braceSubstitution
braceSubstitution(array $piece, PPFrame $frame)
Return the text of a template, after recursively replacing any variables or templates within the temp...
Definition: Parser.php:2936
Parser\makeLimitReport
makeLimitReport()
Set the limit report data in the current ParserOutput, and return the limit report HTML comment.
Definition: Parser.php:720
MagicWordFactory
A factory that stores information about MagicWords, and creates them on demand with caching.
Definition: MagicWordFactory.php:37
Parser\internalParseHalfParsed
internalParseHalfParsed( $text, $isMain=true, $linestart=true)
Helper function for parse() that transforms half-parsed HTML into fully parsed HTML.
Definition: Parser.php:1655
Parser\stripAltText
stripAltText( $caption, $holders)
Definition: Parser.php:5433
Parser\killMarkers
killMarkers( $text)
Remove any strip markers found in the given text.
Definition: Parser.php:6184
User\isAnon
isAnon()
Get whether the user is anonymous.
Definition: User.php:2993
Sanitizer\stripAllTags
static stripAllTags( $html)
Take a fragment of (potentially invalid) HTML and return a version with any tags removed,...
Definition: Sanitizer.php:1574
Parser\$mTagHooks
$mTagHooks
Definition: Parser.php:156
Parser\OutputType
OutputType( $x=null)
Accessor/mutator for the output type.
Definition: Parser.php:1049
Parser\$currentRevisionCache
MapCacheLRU null $currentRevisionCache
Definition: Parser.php:304
Parser\setOutputFlag
setOutputFlag(string $flag, string $reason)
Sets the flag on the parser output but also does some debug logging.
Definition: Parser.php:6301
Parser\Title
Title(Title $x=null)
Accessor/mutator for the Title object.
Definition: Parser.php:1013
Parser\enableOOUI
enableOOUI()
Set's up the PHP implementation of OOUI for use in this request and instructs OutputPage to enable OO...
Definition: Parser.php:6289
MediaWiki\MediaWikiServices
MediaWikiServices is the service locator for the application scope of MediaWiki.
Definition: MediaWikiServices.php:166
Linker\makeSelfLinkObj
static makeSelfLinkObj( $nt, $html='', $query='', $trail='', $prefix='')
Make appropriate markup for a link to the current article.
Definition: Linker.php:164
Parser\$mTplDomCache
array $mTplDomCache
Definition: Parser.php:247
MediaWiki\BadFileLookup
Definition: BadFileLookup.php:13
PPFrame\NO_ARGS
const NO_ARGS
Definition: PPFrame.php:29
wfSetVar
wfSetVar(&$dest, $source, $force=false)
Sets dest to source and returns the original value of dest If source is NULL, it just returns the val...
Definition: GlobalFunctions.php:1560
Parser\parseExtensionTagAsTopLevelDoc
parseExtensionTagAsTopLevelDoc( $text)
Needed by Parsoid/PHP to ensure all the hooks for extensions are run in the right order.
Definition: Parser.php:896
Parser\$mDoubleUnderscores
$mDoubleUnderscores
Definition: Parser.php:240
Linker\tocIndent
static tocIndent()
Add another level to the Table of Contents.
Definition: Linker.php:1664
Parser\getRevisionSize
getRevisionSize()
Get the size of the revision.
Definition: Parser.php:5924
Sanitizer\escapeIdForAttribute
static escapeIdForAttribute( $id, $mode=self::ID_PRIMARY)
Given a section name or other user-generated or otherwise unsafe string, escapes it to be a valid HTM...
Definition: Sanitizer.php:815
Sanitizer\removeHTMLtags
static removeHTMLtags( $text, $processCallback=null, $args=[], $extratags=[], $removetags=[])
Cleans up HTML, removes dangerous tags and attributes, and removes HTML comments.
Definition: Sanitizer.php:239
Parser\handleExternalLinks
handleExternalLinks( $text)
Replace external links (REL)
Definition: Parser.php:2107
Parser\$mOutputType
$mOutputType
Definition: Parser.php:272
Parser\setUser
setUser(?User $user)
Set the current user.
Definition: Parser.php:977
MediaWiki\Linker\LinkRenderer
Class that generates HTML links for pages.
Definition: LinkRenderer.php:41
ParserOptions\getDisableTitleConversion
getDisableTitleConversion()
Whether title conversion should be disabled.
Definition: ParserOptions.php:523
wfTimestamp
wfTimestamp( $outputtype=TS_UNIX, $ts=0)
Get a timestamp string in one of various formats.
Definition: GlobalFunctions.php:1831
Parser\handleHeadings
handleHeadings( $text)
Parse headers and return html.
Definition: Parser.php:1889
getUser
getUser()
MediaWiki\SpecialPage\SpecialPageFactory
Factory for handling the special page list and generating SpecialPage objects.
Definition: SpecialPageFactory.php:61
$wgNoFollowDomainExceptions
$wgNoFollowDomainExceptions
If this is set to an array of domains, external links to these domain names (or any subdomains) will ...
Definition: DefaultSettings.php:4716
Parser\handleAllQuotes
handleAllQuotes( $text)
Replace single quotes with HTML markup.
Definition: Parser.php:1906
Parser\$mUrlProtocols
$mUrlProtocols
Definition: Parser.php:191
Parser\extractTagsAndParams
static extractTagsAndParams(array $elements, $text, &$matches)
Replaces all occurrences of HTML-style comments and the given tags in the text with a random marker a...
Definition: Parser.php:1231
Parser\$mLinkID
int $mLinkID
Definition: Parser.php:223
OT_HTML
const OT_HTML
Definition: Defines.php:167
SFH_NO_HASH
const SFH_NO_HASH
Definition: Defines.php:180
Title\getPrefixedText
getPrefixedText()
Get the prefixed title with spaces.
Definition: Title.php:1884
Parser\handleDoubleUnderscore
handleDoubleUnderscore( $text)
Strip double-underscore items like NOGALLERY and NOTOC Fills $this->mDoubleUnderscores,...
Definition: Parser.php:4015
Parser\fetchCurrentRevisionRecordOfTitle
fetchCurrentRevisionRecordOfTitle(Title $title)
Fetch the current revision of a given title as a RevisionRecord.
Definition: Parser.php:3471
Sanitizer\normalizeSectionNameWhitespace
static normalizeSectionNameWhitespace( $section)
Normalizes whitespace in a section name, such as might be returned by Parser::stripSectionName(),...
Definition: Sanitizer.php:1108
OT_PREPROCESS
const OT_PREPROCESS
Definition: Defines.php:169
Parser\normalizeSectionName
static normalizeSectionName( $text)
Apply the same normalization as code making links to this section would.
Definition: Parser.php:6048
$file
if(PHP_SAPI !='cli-server') if(!isset( $_SERVER['SCRIPT_FILENAME'])) $file
Item class for a filearchive table row.
Definition: router.php:42
wfHostname
wfHostname()
Get host name of the current machine, for use in error reporting.
Definition: GlobalFunctions.php:1293
Parser\recursiveTagParseFully
recursiveTagParseFully( $text, $frame=false)
Fully parse wikitext to fully parsed HTML.
Definition: Parser.php:871
Parser\$specialPageFactory
SpecialPageFactory $specialPageFactory
Definition: Parser.php:335
Parser\nextLinkID
nextLinkID()
Definition: Parser.php:1092
Parser\fuzzTestPreprocess
fuzzTestPreprocess( $text, Title $title, ParserOptions $options)
Definition: Parser.php:6133
Parser\fuzzTestPst
fuzzTestPst( $text, Title $title, ParserOptions $options)
Definition: Parser.php:6123
Parser\getTargetLanguage
getTargetLanguage()
Get the target language for the content being parsed.
Definition: Parser.php:1119
User\newFromName
static newFromName( $name, $validate='valid')
Definition: User.php:558
Parser\$mStripList
$mStripList
Definition: Parser.php:160
wfMessage
wfMessage( $key,... $params)
This is the function for getting translated interface messages.
Definition: GlobalFunctions.php:1230
MediaWiki\Linker\LinkRendererFactory
Factory to create LinkRender objects.
Definition: LinkRendererFactory.php:34
$s
$s
Definition: mergeMessageFileList.php:186
SpecialPage\getTitleFor
static getTitleFor( $name, $subpage=false, $fragment='')
Get a localised Title object for a specified special page name If you don't need a full Title object,...
Definition: SpecialPage.php:106
Parser\getRevisionObject
getRevisionObject()
Get the revision object for $this->mRevisionId.
Definition: Parser.php:5799
Parser\guessSectionNameFromWikiText
guessSectionNameFromWikiText( $text)
Try to guess the section anchor name based on a wikitext fragment presumably extracted from a heading...
Definition: Parser.php:6009
Parser\setDefaultSort
setDefaultSort( $sort)
Mutator for $mDefaultSort.
Definition: Parser.php:5945
Preprocessor_Hash
Differences from DOM schema:
Definition: Preprocessor_Hash.php:43
Sanitizer\armorFrenchSpaces
static armorFrenchSpaces( $text, $space='&#160;')
Armor French spaces with a replacement character.
Definition: Sanitizer.php:746
Parser\VERSION
const VERSION
Update this version number when the ParserOutput format changes in an incompatible way,...
Definition: Parser.php:90
StripState
Definition: StripState.php:29
Parser\getExternalLinkRel
static getExternalLinkRel( $url=false, LinkTarget $title=null)
Get the rel attribute for a particular external link.
Definition: Parser.php:2188
Parser\replaceVariables
replaceVariables( $text, $frame=false, $argsOnly=false)
Replace magic variables, templates, and template arguments with the appropriate text.
Definition: Parser.php:2862
Parser\MARKER_PREFIX
const MARKER_PREFIX
Definition: Parser.php:148
Parser\getFunctionSynonyms
getFunctionSynonyms()
Definition: Parser.php:5516
Parser\$mInParse
bool string $mInParse
Recursive call protection.
Definition: Parser.php:312
Parser\transformMsg
transformMsg( $text, ParserOptions $options, Title $title=null)
Wrapper for preprocess()
Definition: Parser.php:4767
Parser\doQuotes
doQuotes( $text)
Helper function for handleAllQuotes()
Definition: Parser.php:1924
Linker\tocLine
static tocLine( $anchor, $tocline, $tocnumber, $level, $sectionIndex=false)
parameter level defines if we are on an indentation level
Definition: Linker.php:1690
MediaWiki\Languages\LanguageConverterFactory
An interface for creating language converters.
Definition: LanguageConverterFactory.php:45
Parser\$svcOptions
ServiceOptions $svcOptions
This is called $svcOptions instead of $options like elsewhere to avoid confusion with $mOptions,...
Definition: Parser.php:344
Linker\tocList
static tocList( $toc, Language $lang=null)
Wraps the TOC in a div with ARIA navigation role and provides the hide/collapse JavaScript.
Definition: Linker.php:1726
Parser\SFH_OBJECT_ARGS
const SFH_OBJECT_ARGS
Definition: Parser.php:94
Parser\OT_WIKI
const OT_WIKI
Definition: Parser.php:124
Parser\getTags
getTags()
Accessor.
Definition: Parser.php:5507
Parser\getStripList
getStripList()
Get a list of strippable XML-like elements.
Definition: Parser.php:1294
Parser\initializeVariables
initializeVariables()
Initialize the magic variables (like CURRENTMONTHNAME) and substitution modifiers.
Definition: Parser.php:2813
PPFrame\NO_TEMPLATES
const NO_TEMPLATES
Definition: PPFrame.php:30
Preprocessor
Definition: Preprocessor.php:27
Parser\getOptions
getOptions()
Definition: Parser.php:1064
MediaWiki\Languages\LanguageNameUtils
A service that provides utilities to do with language names and codes.
Definition: LanguageNameUtils.php:42
PPFrame\newChild
newChild( $args=false, $title=false, $indexOffset=0)
Create a child frame.
Parser\getFunctionLang
getFunctionLang()
Get a language object for use in parser functions such as {{FORMATNUM:}}.
Definition: Parser.php:1107
StringUtils\replaceMarkup
static replaceMarkup( $search, $replace, $text)
More or less "markup-safe" str_replace() Ignores any instances of the separator inside <....
Definition: StringUtils.php:268
Parser\$mRevisionRecordObject
RevisionRecord null $mRevisionRecordObject
Definition: Parser.php:289
Parser\Options
Options( $x=null)
Accessor/mutator for the ParserOptions object.
Definition: Parser.php:1084
NS_SPECIAL
const NS_SPECIAL
Definition: Defines.php:52
Parser\lock
lock()
Lock the current instance of the parser.
Definition: Parser.php:6227
Parser\statelessFetchRevision
static statelessFetchRevision(Title $title, $parser=false)
Wrapper around Revision::newFromTitle to allow passing additional parameters without passing them on ...
Definition: Parser.php:3517
Revision
Definition: Revision.php:40
Parser\getDefaultSort
getDefaultSort()
Accessor for $mDefaultSort Will use the empty string if none is set.
Definition: Parser.php:5960
Parser\$mFunctionSynonyms
$mFunctionSynonyms
Definition: Parser.php:159
Parser\$hookRunner
HookRunner $hookRunner
Definition: Parser.php:362
Parser\$nsInfo
NamespaceInfo $nsInfo
Definition: Parser.php:350
Parser\makeLegacyAnchor
makeLegacyAnchor( $sectionName)
Definition: Parser.php:5989
Parser\setHook
setHook( $tag, callable $callback)
Create an HTML-style tag, e.g.
Definition: Parser.php:4811
Parser\$mHeadings
$mHeadings
Definition: Parser.php:238
Parser\getTitle
getTitle()
Definition: Parser.php:1002
Parser\$mVariables
MagicWordArray $mVariables
Definition: Parser.php:177
wfDeprecatedMsg
wfDeprecatedMsg( $msg, $version=false, $component=false, $callerOffset=2)
Log a deprecation warning with arbitrary message text.
Definition: GlobalFunctions.php:1065
MWException
MediaWiki exception.
Definition: MWException.php:29
Parser\TOC_START
const TOC_START
Definition: Parser.php:151
Parser\statelessFetchTemplate
static statelessFetchTemplate( $title, $parser=false)
Static function to get a template Can be overridden via ParserOptions::setTemplateCallback().
Definition: Parser.php:3605
Parser\$mConf
array $mConf
Definition: Parser.php:188
Parser\$ot
$ot
Definition: Parser.php:274
Parser\getRevisionRecordObject
getRevisionRecordObject()
Get the revision record object for $this->mRevisionId.
Definition: Parser.php:5822
Parser\getUserSig
getUserSig(User $user, $nickname=false, $fancySig=null)
Fetch the user's signature text, if any, and normalize to validated, ready-to-insert wikitext.
Definition: Parser.php:4602
MediaWiki\Config\ServiceOptions
A class for passing options to services.
Definition: ServiceOptions.php:27
wfDeprecated
wfDeprecated( $function, $version=false, $component=false, $callerOffset=2)
Logs a warning that $function is deprecated.
Definition: GlobalFunctions.php:1033
Parser\OT_MSG
const OT_MSG
Definition: Parser.php:126
Parser\makeKnownLinkHolder
makeKnownLinkHolder(Title $nt, $text='', $trail='', $prefix='')
Render a forced-blue link inline; protect against double expansion of URLs if we're in a mode that pr...
Definition: Parser.php:2709
Parser\firstCallInit
firstCallInit()
Do various kinds of initialisation on the first call of the parser.
Definition: Parser.php:525
Parser\$mProfiler
SectionProfiler $mProfiler
Definition: Parser.php:315
Parser\preprocess
preprocess( $text, ?Title $title, ParserOptions $options, $revid=null, $frame=false)
Expand templates and variables in the text, producing valid, static wikitext.
Definition: Parser.php:914
Parser\getFlatSectionInfo
getFlatSectionInfo( $text)
Get an array of preprocessor section information.
Definition: Parser.php:5735
Parser\$mMarkerIndex
$mMarkerIndex
Definition: Parser.php:165
BlockLevelPass\doBlockLevels
static doBlockLevels( $text, $lineStart)
Make lists from lines starting with ':', '*', '#', etc.
Definition: BlockLevelPass.php:52
Parser\getCustomDefaultSort
getCustomDefaultSort()
Accessor for $mDefaultSort Unlike getDefaultSort(), will return false if none is set.
Definition: Parser.php:5974
wfUrlProtocolsWithoutProtRel
wfUrlProtocolsWithoutProtRel()
Like wfUrlProtocols(), but excludes '//' from the protocol list.
Definition: GlobalFunctions.php:767
Parser\handleTables
handleTables( $text)
Parse the wiki syntax used to render tables.
Definition: Parser.php:1327
$matches
$matches
Definition: NoLocalSettings.php:24
CoreTagHooks\register
static register( $parser)
Definition: CoreTagHooks.php:36
Parser\$contLang
Language $contLang
Definition: Parser.php:326
Parser\makeAnchor
static makeAnchor( $sectionName)
Definition: Parser.php:5985
StringUtils\explode
static explode( $separator, $subject)
Workalike for explode() with limited memory usage.
Definition: StringUtils.php:326
PPNode
There are three types of nodes:
Definition: PPNode.php:35
Parser\$factory
ParserFactory $factory
Definition: Parser.php:332
Parser\replaceLinkHoldersPrivate
replaceLinkHoldersPrivate(&$text, $options=0)
Replace "<!--LINK-->" link placeholders with actual links, in the buffer Placeholders created in Link...
Definition: Parser.php:4936
LinkHolderArray
Definition: LinkHolderArray.php:33
Parser\__clone
__clone()
Allow extensions to clean up when the parser is cloned.
Definition: Parser.php:500
NS_TEMPLATE
const NS_TEMPLATE
Definition: Defines.php:73
PPFrame\RECOVER_ORIG
const RECOVER_ORIG
Definition: PPFrame.php:36
Linker\makeHeadline
static makeHeadline( $level, $attribs, $anchor, $html, $link, $fallbackAnchor=false)
Create a headline for content.
Definition: Linker.php:1801
Parser\getHookContainer
getHookContainer()
Get a HookContainer capable of returning metadata about hooks or running extension hooks.
Definition: Parser.php:1630
Parser\callParserFunction
callParserFunction(PPFrame $frame, $function, array $args=[])
Call a parser function and return an array with text and flags.
Definition: Parser.php:3314
Parser\extensionSubstitution
extensionSubstitution(array $params, PPFrame $frame)
Return the text to be used for a given extension tag.
Definition: Parser.php:3899
Linker\tocLineEnd
static tocLineEnd()
End a Table Of Contents line.
Definition: Linker.php:1714
$args
if( $line===false) $args
Definition: mcc.php:124
MapCacheLRU
Handles a simple LRU key/value map with a maximum number of entries.
Definition: MapCacheLRU.php:37
Parser\$mLangLinkLanguages
array $mLangLinkLanguages
Array with the language name of each language link (i.e.
Definition: Parser.php:296
Parser\markerSkipCallback
markerSkipCallback( $s, callable $callback)
Call a callback function on all regions of the given text that are not inside strip markers,...
Definition: Parser.php:6154
Parser\fetchFileNoRegister
fetchFileNoRegister(Title $title, array $options=[])
Helper function for fetchFileAndTitle.
Definition: Parser.php:3754
Parser\fetchTemplate
fetchTemplate(Title $title)
Fetch the unparsed text of a template and register a reference to it.
Definition: Parser.php:3591
Parser\limitationWarn
limitationWarn( $limitationType, $current='', $max='')
Warn the user when a parser limitation is reached Will warn at most once the user per limitation type...
Definition: Parser.php:2913
Parser\TOC_END
const TOC_END
Definition: Parser.php:152
$title
$title
Definition: testCompression.php:38
Parser\recursiveTagParse
recursiveTagParse( $text, $frame=false)
Half-parse wikitext to half-parsed HTML.
Definition: Parser.php:847
Linker\makeExternalLink
static makeExternalLink( $url, $text, $escape=true, $linktype='', $attribs=[], $title=null)
Make an external link.
Definition: Linker.php:845
Parser\fetchFileAndTitle
fetchFileAndTitle(Title $title, array $options=[])
Fetch a file and its title and register a reference to it.
Definition: Parser.php:3729
Parser\finalizeHeadings
finalizeHeadings( $text, $origText, $isMain=true)
This function accomplishes several tasks: 1) Auto-number headings if that option is enabled 2) Add an...
Definition: Parser.php:4087
Title\makeTitle
static makeTitle( $ns, $title, $fragment='', $interwiki='')
Create a new Title from a namespace index and a DB key.
Definition: Title.php:624
Parser\$mHighestExpansionDepth
$mHighestExpansionDepth
Definition: Parser.php:234
SectionProfiler
Arbitrary section name based PHP profiling.
Definition: SectionProfiler.php:33
Parser\cleanSig
cleanSig( $text, $parsing=false)
Clean up signature text.
Definition: Parser.php:4676
RequestContext
Group all the pieces relevant to the context of a request into one instance @newable.
Definition: RequestContext.php:40
Parser\$mUser
User $mUser
Definition: Parser.php:253
Parser\$mImageParamsMagicArray
$mImageParamsMagicArray
Definition: Parser.php:163
SFH_OBJECT_ARGS
const SFH_OBJECT_ARGS
Definition: Defines.php:181
Parser\handleInternalLinks
handleInternalLinks( $text)
Process [[ ]] wikilinks.
Definition: Parser.php:2380
Parser\$mTplRedirCache
$mTplRedirCache
Definition: Parser.php:236
Parser\$mFirstCall
bool $mFirstCall
Whether firstCallInit still needs to be called.
Definition: Parser.php:170
ParserOptions\getPreSaveTransform
getPreSaveTransform()
Transform wiki markup when saving the page?
Definition: ParserOptions.php:625
Parser\getStripState
getStripState()
Definition: Parser.php:1301
Parser\getContentLanguage
getContentLanguage()
Get the content language that this Parser is using.
Definition: Parser.php:1198
Parser\OT_PLAIN
const OT_PLAIN
Definition: Parser.php:128
$wgTitle
$wgTitle
Definition: Setup.php:794
Parser\handleMagicLinks
handleMagicLinks( $text)
Replace special strings like "ISBN xxx" and "RFC xxx" with magic external links.
Definition: Parser.php:1707
Linker\splitTrail
static splitTrail( $trail)
Split a link trail, return the "inside" portion and the remainder of the trail as a two-element array...
Definition: Linker.php:1822
Parser\__construct
__construct( $svcOptions=null, MagicWordFactory $magicWordFactory=null, Language $contLang=null, ParserFactory $factory=null, $urlProtocols=null, SpecialPageFactory $spFactory=null, $linkRendererFactory=null, $nsInfo=null, $logger=null, BadFileLookup $badFileLookup=null, LanguageConverterFactory $languageConverterFactory=null, HookContainer $hookContainer=null)
Constructing parsers directly is deprecated! Use a ParserFactory.
Definition: Parser.php:408
Parser\insertStripItem
insertStripItem( $text)
Add an item to the strip state Returns the unique tag which must be inserted into the stripped text T...
Definition: Parser.php:1314
Parser\fuzzTestSrvus
fuzzTestSrvus( $text, Title $title, ParserOptions $options, $outputType=self::OT_HTML)
Strip/replaceVariables/unstrip for preprocessor regression testing.
Definition: Parser.php:6105
Parser\isCurrentRevisionOfTitleCached
isCurrentRevisionOfTitleCached(Title $title)
Definition: Parser.php:3500
Parser\getFreshParser
getFreshParser()
Return this parser if it is not doing anything, otherwise get a fresh parser.
Definition: Parser.php:6274
Parser\getRevisionUser
getRevisionUser()
Get the name of the user that edited the last revision.
Definition: Parser.php:5901
Parser\setOptions
setOptions(ParserOptions $options)
Mutator for the ParserOptions object.
Definition: Parser.php:1073
Parser\getImageParams
getImageParams( $handler)
Definition: Parser.php:5127
wfUrlProtocols
wfUrlProtocols( $includeProtocolRelative=true)
Returns a regular expression of url protocols.
Definition: GlobalFunctions.php:722
Parser\$mAutonumber
$mAutonumber
Definition: Parser.php:206
Parser\replaceLinkHolders
replaceLinkHolders(&$text, $options=0)
Replace "<!--LINK-->" link placeholders with actual links, in the buffer Placeholders created in Link...
Definition: Parser.php:4925
Parser\addTrackingCategory
addTrackingCategory( $msg)
Definition: Parser.php:4068
Parser\getUrlProtocols
getUrlProtocols()
Definition: Parser.php:5525
Parser\incrementIncludeSize
incrementIncludeSize( $type, $size)
Increment an include size counter.
Definition: Parser.php:3991
Parser\getTargetLanguageConverter
getTargetLanguageConverter()
Shorthand for getting a Language Converter for Target language.
Definition: Parser.php:1606
ParserFactory
Definition: ParserFactory.php:33
Parser\startExternalParse
startExternalParse(?Title $title, ParserOptions $options, $outputType, $clearState=true, $revId=null)
Set up some variables which are usually set up in parse() so that an external function can call some ...
Definition: Parser.php:4733
$content
$content
Definition: router.php:76
CoreParserFunctions\register
static register( $parser)
Definition: CoreParserFunctions.php:37
Parser\makeFreeExternalLink
makeFreeExternalLink( $url, $numPostProto)
Make a free external link, given a user-supplied URL.
Definition: Parser.php:1817
Parser\CONSTRUCTOR_OPTIONS
const CONSTRUCTOR_OPTIONS
Definition: Parser.php:367
NS_MEDIA
const NS_MEDIA
Definition: Defines.php:51
PPFrame\expand
expand( $root, $flags=0)
Expand a document tree node.
ILanguageConverter
The shared interface for all language converters.
Definition: ILanguageConverter.php:28
$wgNoFollowNsExceptions
$wgNoFollowNsExceptions
Namespaces in which $wgNoFollowLinks doesn't apply.
Definition: DefaultSettings.php:4701
$wgNoFollowLinks
$wgNoFollowLinks
If true, external URL links in wiki text will be given the rel="nofollow" attribute as a hint to sear...
Definition: DefaultSettings.php:4695
Parser\$mOutput
ParserOutput $mOutput
Definition: Parser.php:205
Parser\$mFunctionHooks
$mFunctionHooks
Definition: Parser.php:158
ParserFactory\$inParserFactory
static int $inParserFactory
Track calls to Parser constructor to aid in deprecation of direct Parser invocation.
Definition: ParserFactory.php:72
Parser\$mOptions
ParserOptions null $mOptions
Definition: Parser.php:262
Parser\$mRevisionUser
$mRevisionUser
Definition: Parser.php:282
User\getOption
getOption( $oname, $defaultOverride=null, $ignoreHidden=false)
Get the user's current setting for a given option.
Definition: User.php:2572
Sanitizer\validateTagAttributes
static validateTagAttributes( $attribs, $element)
Take an array of attribute names and values and normalize or discard illegal values for the given ele...
Definition: Sanitizer.php:390
Parser\extractSections
extractSections( $text, $sectionId, $mode, $newText='')
Break wikitext input into sections, and either pull or replace some particular section's text.
Definition: Parser.php:5558
Hooks\runner
static runner()
Get a HookRunner instance for calling hooks using the new interfaces.
Definition: Hooks.php:172
Parser\OT_HTML
const OT_HTML
Definition: Parser.php:123
PPFrame
Definition: PPFrame.php:28
$line
$line
Definition: mcc.php:119
Parser\EXT_LINK_URL_CLASS
const EXT_LINK_URL_CLASS
Definition: Parser.php:102
Parser\renderImageGallery
renderImageGallery( $text, array $params)
Renders an image gallery from a text with one line per image.
Definition: Parser.php:4965
StringUtils\delimiterExplode
static delimiterExplode( $startDelim, $endDelim, $separator, $subject, $nested=false)
Explode a string, but ignore any instances of the separator inside the given start and end delimiters...
Definition: StringUtils.php:59
OutputPage\setupOOUI
static setupOOUI( $skinName='default', $dir='ltr')
Helper function to setup the PHP implementation of OOUI to use in this request.
Definition: OutputPage.php:4131
Parser\magicLinkCallback
magicLinkCallback(array $m)
Definition: Parser.php:1738
Parser\getUser
getUser()
Get a User object either from $this->mUser, if set, or from the ParserOptions object otherwise.
Definition: Parser.php:1137
wfEscapeWikiText
wfEscapeWikiText( $text)
Escapes the given text so that it may be output using addWikiText() without any linking,...
Definition: GlobalFunctions.php:1504
Parser\incrementExpensiveFunctionCount
incrementExpensiveFunctionCount()
Definition: Parser.php:4003
Parser\$mImageParams
$mImageParams
Definition: Parser.php:162
Parser\setFunctionHook
setFunctionHook( $id, callable $callback, $flags=0)
Create a function, e.g.
Definition: Parser.php:4876
Parser\setLinkID
setLinkID( $id)
Definition: Parser.php:1099
Sanitizer\cleanUrl
static cleanUrl( $url)
Definition: Sanitizer.php:1625
Parser\$magicWordFactory
MagicWordFactory $magicWordFactory
Definition: Parser.php:323
Parser\preprocessToDom
preprocessToDom( $text, $flags=0)
Get the document object model for the given wikitext.
Definition: Parser.php:2838
Parser
PHP Parser - Processes wiki markup (which uses a more user-friendly syntax, such as "[[link]]" for ma...
Definition: Parser.php:84
RequestContext\getMain
static getMain()
Get the RequestContext object associated with the main request.
Definition: RequestContext.php:476
Title\newFromLinkTarget
static newFromLinkTarget(LinkTarget $linkTarget, $forceClone='')
Returns a Title given a LinkTarget.
Definition: Title.php:289
Parser\getMagicWordFactory
getMagicWordFactory()
Get the MagicWordFactory that this Parser is using.
Definition: Parser.php:1188
Parser\argSubstitution
argSubstitution(array $piece, PPFrame $frame)
Triple brace replacement – used for template arguments.
Definition: Parser.php:3846
Linker\makeMediaLinkFile
static makeMediaLinkFile(LinkTarget $title, $file, $html='')
Create a direct link to a given uploaded file.
Definition: Linker.php:777
Sanitizer\fixTagAttributes
static fixTagAttributes( $text, $element, $sorted=false)
Take a tag soup fragment listing an HTML element's attributes and normalize it to well-formed XML,...
Definition: Sanitizer.php:703
Parser\getPreloadText
getPreloadText( $text, Title $title, ParserOptions $options, $params=[])
Process the wikitext for the "?preload=" feature.
Definition: Parser.php:956
Parser\setOutputType
setOutputType( $ot)
Mutator for the output type.
Definition: Parser.php:1031
Parser\getTemplateDom
getTemplateDom(Title $title)
Get the semi-parsed DOM representation of a template with a given title, and its redirect destination...
Definition: Parser.php:3403
$lines
if(!file_exists( $CREDITS)) $lines
Definition: updateCredits.php:45
Parser\OT_PREPROCESS
const OT_PREPROCESS
Definition: Parser.php:125
Parser\getExternalLinkAttribs
getExternalLinkAttribs( $url)
Get an associative array of additional HTML attributes appropriate for a particular external link.
Definition: Parser.php:2210
Hooks\isRegistered
static isRegistered( $name)
Returns true if a hook has a function registered to it.
Definition: Hooks.php:88
Parser\$mStripState
StripState $mStripState
Definition: Parser.php:212
Parser\internalParse
internalParse( $text, $isMain=true, $frame=false)
Helper function for parse() that transforms wiki markup into half-parsed HTML.
Definition: Parser.php:1537
Parser\validateSig
validateSig( $text)
Check that the user's signature contains no bad XML.
Definition: Parser.php:4662
Parser\$mPPNodeCount
$mPPNodeCount
Definition: Parser.php:227
Title
Represents a title within MediaWiki.
Definition: Title.php:46
Parser\resetOutput
resetOutput()
Reset the ParserOutput.
Definition: Parser.php:593
Parser\stripOuterParagraph
static stripOuterParagraph( $html)
Strip outer.
Definition: Parser.php:6255
Parser\$mVarCache
$mVarCache
Definition: Parser.php:161
Parser\$mDefaultSort
$mDefaultSort
Definition: Parser.php:235
Parser\$mExpensiveFunctionCount
$mExpensiveFunctionCount
Definition: Parser.php:242
Parser\normalizeLinkUrl
static normalizeLinkUrl( $url)
Replace unusual escape codes in a URL with their equivalent characters.
Definition: Parser.php:2241
MediaWiki\Preferences\SignatureValidator
Definition: SignatureValidator.php:37
Parser\interwikiTransclude
interwikiTransclude(Title $title, $action)
Transclude an interwiki link.
Definition: Parser.php:3777
Parser\$mExtLinkBracketedRegex
$mExtLinkBracketedRegex
Definition: Parser.php:191
wfMatchesDomainList
wfMatchesDomainList( $url, $domains)
Check whether a given URL has a domain that occurs in a given set of domains.
Definition: GlobalFunctions.php:880
Parser\$mIncludeSizes
$mIncludeSizes
Definition: Parser.php:225
$cache
$cache
Definition: mcc.php:33
MalformedTitleException
MalformedTitleException is thrown when a TitleParser is unable to parse a title string.
Definition: MalformedTitleException.php:26
Parser\getSection
getSection( $text, $sectionId, $defaultText='')
This function returns the text of a section, specified by a number ($section).
Definition: Parser.php:5686
Xml\isWellFormedXmlFragment
static isWellFormedXmlFragment( $text)
Check if a string is a well-formed XML fragment.
Definition: Xml.php:739
Parser\$mRevisionTimestamp
$mRevisionTimestamp
Definition: Parser.php:280
Parser\replaceSection
replaceSection( $oldText, $sectionId, $newText)
This function returns $oldtext after the content of the section specified by $section has been replac...
Definition: Parser.php:5702
Sanitizer\ID_PRIMARY
const ID_PRIMARY
Tells escapeUrlForHtml() to encode the ID using the wiki's primary encoding.
Definition: Sanitizer.php:70
Parser\$logger
LoggerInterface $logger
Definition: Parser.php:353
ParserOptions\getUser
getUser()
Current user.
Definition: ParserOptions.php:1037
PPFrame\virtualBracketedImplode
virtualBracketedImplode( $start, $sep, $end,... $params)
Virtual implode with brackets.
Parser\armorLinks
armorLinks( $text)
Insert a NOPARSE hacky thing into any inline links in a chunk that's going to go through further pars...
Definition: Parser.php:2733
Linker\tocUnindent
static tocUnindent( $level)
Finish one or more sublevels on the Table of Contents.
Definition: Linker.php:1675
Linker\makeImageLink
static makeImageLink(Parser $parser, LinkTarget $title, $file, $frameParams=[], $handlerParams=[], $time=false, $query="", $widthOption=null)
Given parameters derived from [[Image:Foo|options...]], generate the HTML that that syntax inserts in...
Definition: Linker.php:299
getTitle
getTitle()
Definition: RevisionSearchResultTrait.php:81
Parser\getBadFileLookup
getBadFileLookup()
Get the BadFileLookup instance that this Parser is using.
Definition: Parser.php:1208
NS_CATEGORY
const NS_CATEGORY
Definition: Defines.php:77
Parser\getOutput
getOutput()
Definition: Parser.php:1057
StringUtils\delimiterReplace
static delimiterReplace( $startDelim, $endDelim, $replace, $subject, $flags='')
Perform an operation equivalent to preg_replace() with flags.
Definition: StringUtils.php:248
Parser\handleInternalLinks2
handleInternalLinks2(&$s)
Process [[ ]] wikilinks (RIL)
Definition: Parser.php:2390
Parser\preSaveTransform
preSaveTransform( $text, Title $title, User $user, ParserOptions $options, $clearState=true)
Transform wiki markup when saving a page by doing "\\r\\n" -> "\\n" conversion, substituting signatur...
Definition: Parser.php:4489
Parser\getOutputType
getOutputType()
Accessor for the output type.
Definition: Parser.php:1023
Parser\$mGeneratedPPNodeCount
$mGeneratedPPNodeCount
Definition: Parser.php:232
Parser\statelessFetchRevisionRecord
static statelessFetchRevisionRecord(Title $title, $parser=null)
Wrapper around Revision::newFromTitle to allow passing additional parameters without passing them on ...
Definition: Parser.php:3534
Parser\getHookRunner
getHookRunner()
Get a HookRunner for calling core hooks.
Definition: Parser.php:1642
PPFrame\getArgument
getArgument( $name)
Get an argument to this frame by name.
Sanitizer\normalizeCharReferences
static normalizeCharReferences( $text)
Ensure that any entities and character references are legal for XML and XHTML specifically.
Definition: Sanitizer.php:1127
TextContent\normalizeLineEndings
static normalizeLineEndings( $text)
Do a "\\r\\n" -> "\\n" and "\\r" -> "\\n" transformation as well as trim trailing whitespace.
Definition: TextContent.php:203
Parser\getSectionNameFromStrippedText
static getSectionNameFromStrippedText( $text)
Definition: Parser.php:5978
Sanitizer\escapeIdForLink
static escapeIdForLink( $id)
Given a section name or other user-generated or otherwise unsafe string, escapes it to be a valid URL...
Definition: Sanitizer.php:842
Linker\normalizeSubpageLink
static normalizeSubpageLink( $contextTitle, $target, &$text)
Definition: Linker.php:1480
MediaWiki\Config\ServiceOptions\get
get( $key)
Definition: ServiceOptions.php:88
Parser\startParse
startParse(?Title $title, ParserOptions $options, $outputType, $clearState=true)
Definition: Parser.php:4748
MediaWiki\HookContainer\HookContainer
HookContainer class.
Definition: HookContainer.php:45
Parser\SFH_NO_HASH
const SFH_NO_HASH
Definition: Parser.php:93
CoreMagicVariables\expand
static expand(Parser $parser, string $id, int $ts, NamespaceInfo $nsInfo, ServiceOptions $svcOptions, LoggerInterface $logger)
Expand the magic variable given by $index.
Definition: CoreMagicVariables.php:48
Parser\$mShowToc
$mShowToc
Definition: Parser.php:244
ImageGalleryBase\factory
static factory( $mode=false, IContextSource $context=null)
Get a new image gallery.
Definition: ImageGalleryBase.php:116
Sanitizer\decodeTagAttributes
static decodeTagAttributes( $text)
Return an associative array of attribute names and values from a partial tag string.
Definition: Sanitizer.php:1008
Parser\guessSectionNameFromStrippedText
static guessSectionNameFromStrippedText( $text)
Like guessSectionNameFromWikiText(), but takes already-stripped text as input.
Definition: Parser.php:6037
Parser\fetchTemplateAndTitle
fetchTemplateAndTitle(Title $title)
Fetch the unparsed text of a template and register a reference to it.
Definition: Parser.php:3546
Parser\$languageConverterFactory
LanguageConverterFactory $languageConverterFactory
Definition: Parser.php:329
NamespaceInfo
This is a utility class for dealing with namespaces that encodes all the "magic" behaviors of them ba...
Definition: NamespaceInfo.php:35
PPFrame\isTemplate
isTemplate()
Return true if the frame is a template frame.
MediaWiki\HookContainer\HookRunner
This class provides an implementation of the core hook interfaces, forwarding hook calls to HookConta...
Definition: HookRunner.php:571
Parser\parseLinkParameter
parseLinkParameter( $value)
Parse the value of 'link' parameter in image syntax ([[File:Foo.jpg|link=<value>]]).
Definition: Parser.php:5403
$t
$t
Definition: testCompression.php:74
Title\legalChars
static legalChars()
Get a regex character class describing the legal characters in a link.
Definition: Title.php:735
Parser\$mRevisionObject
$mRevisionObject
Definition: Parser.php:276
Parser\fetchCurrentRevisionOfTitle
fetchCurrentRevisionOfTitle(Title $title)
Fetch the current revision of a given title.
Definition: Parser.php:3449
Sanitizer\decodeCharReferences
static decodeCharReferences( $text)
Decode any character references, numeric or named entities, in the text and return a UTF-8 string.
Definition: Sanitizer.php:1232
Parser\getRevisionTimestamp
getRevisionTimestamp()
Get the timestamp associated with the current revision, adjusted for the default server-local timesta...
Definition: Parser.php:5876
Html\element
static element( $element, $attribs=[], $contents='')
Identical to rawElement(), but HTML-escapes $contents (like Xml::element()).
Definition: Html.php:234
Parser\expandMagicVariable
expandMagicVariable( $index, $frame=false)
Return value of a magic variable (like PAGENAME)
Definition: Parser.php:2760
NS_FILE
const NS_FILE
Definition: Defines.php:69
User\getBoolOption
getBoolOption( $oname)
Get the user's current setting for a given option, as a boolean value.
Definition: User.php:2604
MediaWiki\Linker\LinkTarget
Definition: LinkTarget.php:26
Parser\$mPreprocessor
Preprocessor $mPreprocessor
Definition: Parser.php:198
Parser\parseWidthParam
static parseWidthParam( $value, $parseHeight=true)
Parsed a width param of imagelink like 300px or 200x300px.
Definition: Parser.php:6198
RawMessage
Variant of the Message class.
Definition: RawMessage.php:35
Parser\cleanSigInSig
static cleanSigInSig( $text)
Strip 3, 4 or 5 tildes out of signatures.
Definition: Parser.php:4718
Parser\setTitle
setTitle(Title $t=null)
Set the context title.
Definition: Parser.php:986
User
The User object encapsulates all of the user-specific settings (user_id, name, rights,...
Definition: User.php:63
Parser\replaceLinkHoldersText
replaceLinkHoldersText( $text)
Replace "<!--LINK-->" link placeholders with plain text of links (not HTML-formatted).
Definition: Parser.php:4947
Parser\normalizeUrlComponent
static normalizeUrlComponent( $component, $unsafe)
Definition: Parser.php:2299
Parser\clearTagHooks
clearTagHooks()
Remove all tag hooks.
Definition: Parser.php:4828
MWTimestamp\getLocalInstance
static getLocalInstance( $ts=false)
Get a timestamp instance in the server local timezone ($wgLocaltimezone)
Definition: MWTimestamp.php:205
User\getName
getName()
Get the user name, or the IP of an anonymous user.
Definition: User.php:2057
OT_WIKI
const OT_WIKI
Definition: Defines.php:168
Linker\makeExternalImage
static makeExternalImage( $url, $alt='')
Return the code for images which were added via external links, via Parser::maybeMakeExternalImage().
Definition: Linker.php:243
Parser\$mTitle
Title null $mTitle
Since 1.34, leaving mTitle uninitialized or setting mTitle to null is deprecated.
Definition: Parser.php:271
Parser\getLinkRenderer
getLinkRenderer()
Get a LinkRenderer instance to make links with.
Definition: Parser.php:1170
Language
Internationalisation code See https://www.mediawiki.org/wiki/Special:MyLanguage/Localisation for more...
Definition: Language.php:42
MediaWiki\Debug\DeprecatablePropertyArray
ArrayAccess implementation that supports deprecating access to certain properties.
Definition: DeprecatablePropertyArray.php:16
Parser\parse
parse( $text, Title $title, ParserOptions $options, $linestart=true, $clearState=true, $revid=null)
Convert wikitext to HTML Do not call this function recursively.
Definition: Parser.php:615
Parser\$mRevisionId
$mRevisionId
Definition: Parser.php:278
RequestContext\setTitle
setTitle(Title $title=null)
Definition: RequestContext.php:172
Parser\$mRevisionSize
$mRevisionSize
Definition: Parser.php:284
Parser\getRevisionId
getRevisionId()
Get the ID of the revision we are parsing.
Definition: Parser.php:5788
Revision\SlotRecord
Value object representing a content slot associated with a page revision.
Definition: SlotRecord.php:40
Parser\pstPass2
pstPass2( $text, User $user)
Pre-save transform helper function.
Definition: Parser.php:4526
OT_PLAIN
const OT_PLAIN
Definition: Defines.php:171
Parser\clearState
clearState()
Clear Parser state.
Definition: Parser.php:543
Parser\guessLegacySectionNameFromWikiText
guessLegacySectionNameFromWikiText( $text)
Same as guessSectionNameFromWikiText(), but produces legacy anchors instead, if possible.
Definition: Parser.php:6025
MWHttpRequest\factory
static factory( $url, array $options=null, $caller=__METHOD__)
Generate a new request object.
Definition: MWHttpRequest.php:195
MediaWiki\Config\ServiceOptions\assertRequiredOptions
assertRequiredOptions(array $expectedKeys)
Assert that the list of options provided in this instance exactly match $expectedKeys,...
Definition: ServiceOptions.php:66
ParserOptions\newFromUser
static newFromUser( $user)
Get a ParserOptions object from a given user.
Definition: ParserOptions.php:1086
Parser\makeImage
makeImage(Title $title, $options, $holders=false)
Parse image options text and use it to make an image.
Definition: Parser.php:5181
Parser\stripSectionName
stripSectionName( $text)
Strips a text string of wikitext for use in a section anchor.
Definition: Parser.php:6076
Parser\EXT_IMAGE_REGEX
const EXT_IMAGE_REGEX
Definition: Parser.php:109
Parser\getPreprocessor
getPreprocessor()
Get a preprocessor object.
Definition: Parser.php:1149
Parser\doBlockLevels
doBlockLevels( $text, $linestart)
Make lists from lines starting with ':', '*', '#', etc.
Definition: Parser.php:2747
Parser\$hookContainer
HookContainer $hookContainer
Definition: Parser.php:359
$type
$type
Definition: testCompression.php:52
MWTidy\tidy
static tidy( $text)
Interface with Remex tidy.
Definition: MWTidy.php:42