MediaWiki  master
Parser.php
Go to the documentation of this file.
1 <?php
43 use Psr\Log\LoggerInterface;
44 use Wikimedia\IPUtils;
45 use Wikimedia\ScopedCallback;
46 
87 class Parser {
88  # Flags for Parser::setFunctionHook
89  public const SFH_NO_HASH = 1;
90  public const SFH_OBJECT_ARGS = 2;
91 
92  # Constants needed for external link processing
93  # Everything except bracket, space, or control characters
94  # \p{Zs} is unicode 'separator, space' category. It covers the space 0x20
95  # as well as U+3000 is IDEOGRAPHIC SPACE for T21052
96  # \x{FFFD} is the Unicode replacement character, which the HTML5 spec
97  # uses to replace invalid HTML characters.
98  public const EXT_LINK_URL_CLASS = '[^][<>"\\x00-\\x20\\x7F\p{Zs}\x{FFFD}]';
99  # Simplified expression to match an IPv4 or IPv6 address, or
100  # at least one character of a host name (embeds EXT_LINK_URL_CLASS)
101  // phpcs:ignore Generic.Files.LineLength
102  private const EXT_LINK_ADDR = '(?:[0-9.]+|\\[(?i:[0-9a-f:.]+)\\]|[^][<>"\\x00-\\x20\\x7F\p{Zs}\x{FFFD}])';
103  # RegExp to make image URLs (embeds IPv6 part of EXT_LINK_ADDR)
104  // phpcs:ignore Generic.Files.LineLength
105  private const EXT_IMAGE_REGEX = '/^(http:\/\/|https:\/\/)((?:\\[(?i:[0-9a-f:.]+)\\])?[^][<>"\\x00-\\x20\\x7F\p{Zs}\x{FFFD}]+)
106  \\/([A-Za-z0-9_.,~%\\-+&;#*?!=()@\\x80-\\xFF]+)\\.((?i)gif|png|jpg|jpeg)$/Sxu';
107 
108  # Regular expression for a non-newline space
109  private const SPACE_NOT_NL = '(?:\t|&nbsp;|&\#0*160;|&\#[Xx]0*[Aa]0;|\p{Zs})';
110 
115  public const PTD_FOR_INCLUSION = Preprocessor::DOM_FOR_INCLUSION;
116 
117  # Allowed values for $this->mOutputType
118  # Parameter to startExternalParse().
119  public const OT_HTML = 1; # like parse()
120  public const OT_WIKI = 2; # like preSaveTransform()
121  public const OT_PREPROCESS = 3; # like preprocess()
122  public const OT_MSG = 3;
123  # like extractSections() - portions of the original are returned unchanged.
124  public const OT_PLAIN = 4;
125 
143  public const MARKER_SUFFIX = "-QINU`\"'\x7f";
144  public const MARKER_PREFIX = "\x7f'\"`UNIQ-";
145 
146  # Markers used for wrapping the table of contents
147  public const TOC_START = '<mw:toc>';
148  public const TOC_END = '</mw:toc>';
149 
150  # Persistent:
151  private $mTagHooks = [];
152  private $mFunctionHooks = [];
153  private $mFunctionSynonyms = [ 0 => [], 1 => [] ];
154  private $mStripList = [];
155  private $mVarCache = [];
156  private $mImageParams = [];
159  public $mMarkerIndex = 0;
164  public $mFirstCall = false;
165 
166  # Initialised by initializeVariables()
167 
171  private $mVariables;
172 
176  private $mSubstWords;
177 
178  # Initialised in constructor
180 
181  # Initialized in constructor
182 
185  private $mPreprocessor;
186 
187  # Cleared with clearState():
188 
191  private $mOutput;
192  private $mAutonumber;
193 
198  public $mStripState;
199 
203  private $mLinkHolders;
204 
209  public $mLinkID;
221  private $mDefaultSort;
224  public $mHeadings;
228  public $mExpensiveFunctionCount; # number of expensive parser function calls
230  public $mShowToc;
233  private $mTplDomCache;
234 
239  public $mUser; # User object; only used when doing pre-save transform
240 
241  # Temporary
242  # These are variables reset at least once per parse regardless of $clearState
243 
248  public $mOptions;
249 
257  public $mTitle; # Title context, used for self-link rendering and similar things
258  private $mOutputType; # Output type, one of the OT_xxx constants
260  public $ot; # Shortcut alias, see setOutputType()
262  public $mRevisionObject; # The revision object of the specified revision ID
263 
264  public $mRevisionId; # ID to display in {{REVISIONID}} tags
266  public $mRevisionTimestamp; # The timestamp of the specified revision ID
268  public $mRevisionUser; # User to display in {{REVISIONUSER}} tag
270  public $mRevisionSize; # Size to display in {{REVISIONSIZE}} variable
272  public $mInputSize = false; # For {{PAGESIZE}} on current page.
273 
276 
283 
291 
298  public $mInParse = false;
299 
301  private $mProfiler;
302 
306  private $mLinkRenderer;
307 
310 
312  private $contLang;
313 
316 
318  private $factory;
319 
322 
330  private $svcOptions;
331 
334 
336  private $nsInfo;
337 
339  private $logger;
340 
342  private $badFileLookup;
343 
345  private $hookContainer;
346 
348  private $hookRunner;
349 
351  private $tidy;
352 
355 
357  private $userFactory;
358 
362  public const CONSTRUCTOR_OPTIONS = [
363  // See documentation for the corresponding config options
364  'ArticlePath',
365  'EnableScaryTranscluding',
366  'ExtraInterlanguageLinkPrefixes',
367  'FragmentMode',
368  'LanguageCode',
369  'MaxSigChars',
370  'MaxTocLevel',
371  'MiserMode',
372  'ScriptPath',
373  'Server',
374  'ServerName',
375  'ShowHostnames',
376  'SignatureValidation',
377  'Sitename',
378  'StylePath',
379  'TranscludeCacheExpiry',
380  'PreprocessorCacheThreshold',
381  'DisableLangConversion',
382  ];
383 
405  public function __construct(
410  string $urlProtocols,
411  SpecialPageFactory $spFactory,
414  LoggerInterface $logger,
419  WANObjectCache $wanCache,
422  ) {
423  if ( ParserFactory::$inParserFactory === 0 ) {
424  // Direct construction of Parser was deprecated in 1.34 and
425  // removed in 1.36; use a ParserFactory instead.
426  throw new MWException( 'Direct construction of Parser not allowed' );
427  }
428  $svcOptions->assertRequiredOptions( self::CONSTRUCTOR_OPTIONS );
429  $this->svcOptions = $svcOptions;
430 
431  $this->mUrlProtocols = $urlProtocols;
432  $this->mExtLinkBracketedRegex = '/\[(((?i)' . $this->mUrlProtocols . ')' .
433  self::EXT_LINK_ADDR .
434  self::EXT_LINK_URL_CLASS . '*)\p{Zs}*([^\]\\x00-\\x08\\x0a-\\x1F\\x{FFFD}]*?)\]/Su';
435 
436  $this->magicWordFactory = $magicWordFactory;
437 
438  $this->contLang = $contLang;
439 
440  $this->factory = $factory;
441  $this->specialPageFactory = $spFactory;
442  $this->linkRendererFactory = $linkRendererFactory;
443  $this->nsInfo = $nsInfo;
444  $this->logger = $logger;
445  $this->badFileLookup = $badFileLookup;
446 
447  $this->languageConverterFactory = $languageConverterFactory;
448 
449  $this->hookContainer = $hookContainer;
450  $this->hookRunner = new HookRunner( $hookContainer );
451 
452  $this->tidy = $tidy;
453 
454  $this->mPreprocessor = new Preprocessor_Hash(
455  $this,
456  $wanCache,
457  [
458  'cacheThreshold' => $svcOptions->get( 'PreprocessorCacheThreshold' ),
459  'disableLangConversion' => $svcOptions->get( 'DisableLangConversion' ),
460  ]
461  );
462 
463  $this->userOptionsLookup = $userOptionsLookup;
464  $this->userFactory = $userFactory;
465 
466  // These steps used to be done in "::firstCallInit()"
467  // (if you're chasing a reference from some old code)
469  CoreTagHooks::register( $this );
470  $this->initializeVariables();
471 
472  $this->hookRunner->onParserFirstCallInit( $this );
473  }
474 
478  public function __destruct() {
479  if ( isset( $this->mLinkHolders ) ) {
480  // @phan-suppress-next-line PhanTypeObjectUnsetDeclaredProperty
481  unset( $this->mLinkHolders );
482  }
483  // @phan-suppress-next-line PhanTypeSuspiciousNonTraversableForeach
484  foreach ( $this as $name => $value ) {
485  unset( $this->$name );
486  }
487  }
488 
492  public function __clone() {
493  $this->mInParse = false;
494 
495  // T58226: When you create a reference "to" an object field, that
496  // makes the object field itself be a reference too (until the other
497  // reference goes out of scope). When cloning, any field that's a
498  // reference is copied as a reference in the new object. Both of these
499  // are defined PHP5 behaviors, as inconvenient as it is for us when old
500  // hooks from PHP4 days are passing fields by reference.
501  foreach ( [ 'mStripState', 'mVarCache' ] as $k ) {
502  // Make a non-reference copy of the field, then rebind the field to
503  // reference the new copy.
504  $tmp = $this->$k;
505  $this->$k =& $tmp;
506  unset( $tmp );
507  }
508 
509  $this->mPreprocessor = clone $this->mPreprocessor;
510  $this->mPreprocessor->resetParser( $this );
511 
512  $this->hookRunner->onParserCloned( $this );
513  }
514 
521  public function firstCallInit() {
522  /*
523  * This method should be hard-deprecated once remaining calls are
524  * removed; it no longer does anything.
525  */
526  }
527 
533  public function clearState() {
534  $this->resetOutput();
535  $this->mAutonumber = 0;
536  $this->mLinkHolders = new LinkHolderArray(
537  $this,
539  $this->getHookContainer()
540  );
541  $this->mLinkID = 0;
542  $this->mRevisionObject = null;
543  $this->mRevisionTimestamp = null;
544  $this->mRevisionId = null;
545  $this->mRevisionUser = null;
546  $this->mRevisionSize = null;
547  $this->mRevisionRecordObject = null;
548  $this->mVarCache = [];
549  $this->mUser = null;
550  $this->mLangLinkLanguages = [];
551  $this->currentRevisionCache = null;
552 
553  $this->mStripState = new StripState( $this );
554 
555  # Clear these on every parse, T6549
556  $this->mTplRedirCache = [];
557  $this->mTplDomCache = [];
558 
559  $this->mShowToc = true;
560  $this->mForceTocPosition = false;
561  $this->mIncludeSizes = [
562  'post-expand' => 0,
563  'arg' => 0,
564  ];
565  $this->mPPNodeCount = 0;
566  $this->mGeneratedPPNodeCount = 0;
567  $this->mHighestExpansionDepth = 0;
568  $this->mDefaultSort = false;
569  $this->mHeadings = [];
570  $this->mDoubleUnderscores = [];
571  $this->mExpensiveFunctionCount = 0;
572 
573  $this->mProfiler = new SectionProfiler();
574 
575  $this->hookRunner->onParserClearState( $this );
576  }
577 
581  public function resetOutput() {
582  $this->mOutput = new ParserOutput;
583  $this->mOptions->registerWatcher( [ $this->mOutput, 'recordOption' ] );
584  }
585 
603  public function parse(
604  $text, Title $title, ParserOptions $options,
605  $linestart = true, $clearState = true, $revid = null
606  ) {
607  if ( $clearState ) {
608  // We use U+007F DELETE to construct strip markers, so we have to make
609  // sure that this character does not occur in the input text.
610  $text = strtr( $text, "\x7f", "?" );
611  $magicScopeVariable = $this->lock();
612  }
613  // Strip U+0000 NULL (T159174)
614  $text = str_replace( "\000", '', $text );
615 
616  $this->startParse( $title, $options, self::OT_HTML, $clearState );
617 
618  $this->currentRevisionCache = null;
619  $this->mInputSize = strlen( $text );
620  if ( $this->mOptions->getEnableLimitReport() ) {
621  $this->mOutput->resetParseStartTime();
622  }
623 
624  $oldRevisionId = $this->mRevisionId;
625  $oldRevisionObject = $this->mRevisionObject;
626  $oldRevisionRecordObject = $this->mRevisionRecordObject;
627  $oldRevisionTimestamp = $this->mRevisionTimestamp;
628  $oldRevisionUser = $this->mRevisionUser;
629  $oldRevisionSize = $this->mRevisionSize;
630  if ( $revid !== null ) {
631  $this->mRevisionId = $revid;
632  $this->mRevisionObject = null;
633  $this->mRevisionRecordObject = null;
634  $this->mRevisionTimestamp = null;
635  $this->mRevisionUser = null;
636  $this->mRevisionSize = null;
637  }
638 
639  $text = $this->internalParse( $text );
640  $this->hookRunner->onParserAfterParse( $this, $text, $this->mStripState );
641 
642  $text = $this->internalParseHalfParsed( $text, true, $linestart );
643 
651  if ( !( $options->getDisableTitleConversion()
652  || isset( $this->mDoubleUnderscores['nocontentconvert'] )
653  || isset( $this->mDoubleUnderscores['notitleconvert'] )
654  || $this->mOutput->getDisplayTitle() !== false )
655  ) {
656  $convruletitle = $this->getTargetLanguageConverter()->getConvRuleTitle();
657  if ( $convruletitle ) {
658  $this->mOutput->setTitleText( $convruletitle );
659  } else {
660  $titleText = $this->getTargetLanguageConverter()->convertTitle( $title );
661  $this->mOutput->setTitleText( $titleText );
662  }
663  }
664 
665  # Compute runtime adaptive expiry if set
666  $this->mOutput->finalizeAdaptiveCacheExpiry();
667 
668  # Warn if too many heavyweight parser functions were used
669  if ( $this->mExpensiveFunctionCount > $this->mOptions->getExpensiveParserFunctionLimit() ) {
670  $this->limitationWarn( 'expensive-parserfunction',
671  $this->mExpensiveFunctionCount,
672  $this->mOptions->getExpensiveParserFunctionLimit()
673  );
674  }
675 
676  # Information on limits, for the benefit of users who try to skirt them
677  if ( $this->mOptions->getEnableLimitReport() ) {
678  $text .= $this->makeLimitReport();
679  }
680 
681  # Wrap non-interface parser output in a <div> so it can be targeted
682  # with CSS (T37247)
683  $class = $this->mOptions->getWrapOutputClass();
684  if ( $class !== false && !$this->mOptions->getInterfaceMessage() ) {
685  $this->mOutput->addWrapperDivClass( $class );
686  }
687 
688  $this->mOutput->setText( $text );
689 
690  $this->mRevisionId = $oldRevisionId;
691  $this->mRevisionObject = $oldRevisionObject;
692  $this->mRevisionRecordObject = $oldRevisionRecordObject;
693  $this->mRevisionTimestamp = $oldRevisionTimestamp;
694  $this->mRevisionUser = $oldRevisionUser;
695  $this->mRevisionSize = $oldRevisionSize;
696  $this->mInputSize = false;
697  $this->currentRevisionCache = null;
698 
699  return $this->mOutput;
700  }
701 
708  protected function makeLimitReport() {
709  $maxIncludeSize = $this->mOptions->getMaxIncludeSize();
710 
711  $cpuTime = $this->mOutput->getTimeSinceStart( 'cpu' );
712  if ( $cpuTime !== null ) {
713  $this->mOutput->setLimitReportData( 'limitreport-cputime',
714  sprintf( "%.3f", $cpuTime )
715  );
716  }
717 
718  $wallTime = $this->mOutput->getTimeSinceStart( 'wall' );
719  $this->mOutput->setLimitReportData( 'limitreport-walltime',
720  sprintf( "%.3f", $wallTime )
721  );
722 
723  $this->mOutput->setLimitReportData( 'limitreport-ppvisitednodes',
724  [ $this->mPPNodeCount, $this->mOptions->getMaxPPNodeCount() ]
725  );
726  $this->mOutput->setLimitReportData( 'limitreport-postexpandincludesize',
727  [ $this->mIncludeSizes['post-expand'], $maxIncludeSize ]
728  );
729  $this->mOutput->setLimitReportData( 'limitreport-templateargumentsize',
730  [ $this->mIncludeSizes['arg'], $maxIncludeSize ]
731  );
732  $this->mOutput->setLimitReportData( 'limitreport-expansiondepth',
733  [ $this->mHighestExpansionDepth, $this->mOptions->getMaxPPExpandDepth() ]
734  );
735  $this->mOutput->setLimitReportData( 'limitreport-expensivefunctioncount',
736  [ $this->mExpensiveFunctionCount, $this->mOptions->getExpensiveParserFunctionLimit() ]
737  );
738 
739  foreach ( $this->mStripState->getLimitReport() as list( $key, $value ) ) {
740  $this->mOutput->setLimitReportData( $key, $value );
741  }
742 
743  $this->hookRunner->onParserLimitReportPrepare( $this, $this->mOutput );
744 
745  $limitReport = "NewPP limit report\n";
746  if ( $this->svcOptions->get( 'ShowHostnames' ) ) {
747  $limitReport .= 'Parsed by ' . wfHostname() . "\n";
748  }
749  $limitReport .= 'Cached time: ' . $this->mOutput->getCacheTime() . "\n";
750  $limitReport .= 'Cache expiry: ' . $this->mOutput->getCacheExpiry() . "\n";
751  $limitReport .= 'Dynamic content: ' .
752  ( $this->mOutput->hasDynamicContent() ? 'true' : 'false' ) .
753  "\n";
754  $limitReport .= 'Complications: [' . implode( ', ', $this->mOutput->getAllFlags() ) . "]\n";
755 
756  foreach ( $this->mOutput->getLimitReportData() as $key => $value ) {
757  if ( $this->hookRunner->onParserLimitReportFormat(
758  $key, $value, $limitReport, false, false )
759  ) {
760  $keyMsg = wfMessage( $key )->inLanguage( 'en' )->useDatabase( false );
761  $valueMsg = wfMessage( [ "$key-value-text", "$key-value" ] )
762  ->inLanguage( 'en' )->useDatabase( false );
763  if ( !$valueMsg->exists() ) {
764  $valueMsg = new RawMessage( '$1' );
765  }
766  if ( !$keyMsg->isDisabled() && !$valueMsg->isDisabled() ) {
767  $valueMsg->params( $value );
768  $limitReport .= "{$keyMsg->text()}: {$valueMsg->text()}\n";
769  }
770  }
771  }
772  // Since we're not really outputting HTML, decode the entities and
773  // then re-encode the things that need hiding inside HTML comments.
774  $limitReport = htmlspecialchars_decode( $limitReport );
775 
776  // Sanitize for comment. Note '‐' in the replacement is U+2010,
777  // which looks much like the problematic '-'.
778  $limitReport = str_replace( [ '-', '&' ], [ '‐', '&amp;' ], $limitReport );
779  $text = "\n<!-- \n$limitReport-->\n";
780 
781  // Add on template profiling data in human/machine readable way
782  $dataByFunc = $this->mProfiler->getFunctionStats();
783  uasort( $dataByFunc, static function ( $a, $b ) {
784  return $b['real'] <=> $a['real']; // descending order
785  } );
786  $profileReport = [];
787  foreach ( array_slice( $dataByFunc, 0, 10 ) as $item ) {
788  $profileReport[] = sprintf( "%6.2f%% %8.3f %6d %s",
789  $item['%real'], $item['real'], $item['calls'],
790  htmlspecialchars( $item['name'] ) );
791  }
792  $text .= "<!--\nTransclusion expansion time report (%,ms,calls,template)\n";
793  $text .= implode( "\n", $profileReport ) . "\n-->\n";
794 
795  $this->mOutput->setLimitReportData( 'limitreport-timingprofile', $profileReport );
796 
797  // Add other cache related metadata
798  if ( $this->svcOptions->get( 'ShowHostnames' ) ) {
799  $this->mOutput->setLimitReportData( 'cachereport-origin', wfHostname() );
800  }
801  $this->mOutput->setLimitReportData( 'cachereport-timestamp',
802  $this->mOutput->getCacheTime() );
803  $this->mOutput->setLimitReportData( 'cachereport-ttl',
804  $this->mOutput->getCacheExpiry() );
805  $this->mOutput->setLimitReportData( 'cachereport-transientcontent',
806  $this->mOutput->hasDynamicContent() );
807 
808  return $text;
809  }
810 
835  public function recursiveTagParse( $text, $frame = false ) {
836  $text = $this->internalParse( $text, false, $frame );
837  return $text;
838  }
839 
859  public function recursiveTagParseFully( $text, $frame = false ) {
860  $text = $this->recursiveTagParse( $text, $frame );
861  $text = $this->internalParseHalfParsed( $text, false );
862  return $text;
863  }
864 
884  public function parseExtensionTagAsTopLevelDoc( $text ) {
885  $text = $this->recursiveTagParse( $text );
886  $this->hookRunner->onParserAfterParse( $this, $text, $this->mStripState );
887  $text = $this->internalParseHalfParsed( $text, true );
888  return $text;
889  }
890 
902  public function preprocess( $text, ?Title $title,
903  ParserOptions $options, $revid = null, $frame = false
904  ) {
905  $magicScopeVariable = $this->lock();
906  $this->startParse( $title, $options, self::OT_PREPROCESS, true );
907  if ( $revid !== null ) {
908  $this->mRevisionId = $revid;
909  }
910  $this->hookRunner->onParserBeforePreprocess( $this, $text, $this->mStripState );
911  $text = $this->replaceVariables( $text, $frame );
912  $text = $this->mStripState->unstripBoth( $text );
913  return $text;
914  }
915 
925  public function recursivePreprocess( $text, $frame = false ) {
926  $text = $this->replaceVariables( $text, $frame );
927  $text = $this->mStripState->unstripBoth( $text );
928  return $text;
929  }
930 
944  public function getPreloadText( $text, Title $title, ParserOptions $options, $params = [] ) {
945  $msg = new RawMessage( $text );
946  $text = $msg->params( $params )->plain();
947 
948  # Parser (re)initialisation
949  $magicScopeVariable = $this->lock();
950  $this->startParse( $title, $options, self::OT_PLAIN, true );
951 
953  $dom = $this->preprocessToDom( $text, Preprocessor::DOM_FOR_INCLUSION );
954  $text = $this->getPreprocessor()->newFrame()->expand( $dom, $flags );
955  $text = $this->mStripState->unstripBoth( $text );
956  return $text;
957  }
958 
965  public function setUser( ?UserIdentity $user ) {
966  if ( $user ) {
967  $this->mUser = $this->userFactory->newFromUserIdentity( $user );
968  } else {
969  $this->mUser = $user;
970  }
971  }
972 
978  public function setTitle( Title $t = null ) {
979  if ( !$t ) {
980  $t = Title::makeTitle( NS_SPECIAL, 'Badtitle/Parser' );
981  }
982 
983  if ( $t->hasFragment() ) {
984  # Strip the fragment to avoid various odd effects
985  $this->mTitle = $t->createFragmentTarget( '' );
986  } else {
987  $this->mTitle = $t;
988  }
989  }
990 
994  public function getTitle() : Title {
995  return $this->mTitle;
996  }
997 
1005  public function Title( Title $x = null ) : ?Title {
1006  wfDeprecated( __METHOD__, '1.35' );
1007  return wfSetVar( $this->mTitle, $x );
1008  }
1009 
1015  public function getOutputType(): int {
1016  return $this->mOutputType;
1017  }
1018 
1023  public function setOutputType( $ot ): void {
1024  $this->mOutputType = $ot;
1025  # Shortcut alias
1026  $this->ot = [
1027  'html' => $ot == self::OT_HTML,
1028  'wiki' => $ot == self::OT_WIKI,
1029  'pre' => $ot == self::OT_PREPROCESS,
1030  'plain' => $ot == self::OT_PLAIN,
1031  ];
1032  }
1033 
1041  public function OutputType( $x = null ) {
1042  wfDeprecated( __METHOD__, '1.35' );
1043  return wfSetVar( $this->mOutputType, $x );
1044  }
1045 
1050  public function getOutput() {
1051  return $this->mOutput;
1052  }
1053 
1057  public function getOptions() {
1058  return $this->mOptions;
1059  }
1060 
1066  public function setOptions( ParserOptions $options ): void {
1067  $this->mOptions = $options;
1068  }
1069 
1077  public function Options( $x = null ) {
1078  wfDeprecated( __METHOD__, '1.35' );
1079  return wfSetVar( $this->mOptions, $x );
1080  }
1081 
1085  public function nextLinkID() {
1086  return $this->mLinkID++;
1087  }
1088 
1092  public function setLinkID( $id ) {
1093  $this->mLinkID = $id;
1094  }
1095 
1100  public function getFunctionLang() {
1101  return $this->getTargetLanguage();
1102  }
1103 
1112  public function getTargetLanguage() {
1113  $target = $this->mOptions->getTargetLanguage();
1114 
1115  if ( $target !== null ) {
1116  return $target;
1117  } elseif ( $this->mOptions->getInterfaceMessage() ) {
1118  return $this->mOptions->getUserLangObj();
1119  }
1120 
1121  return $this->getTitle()->getPageLanguage();
1122  }
1123 
1130  public function getUser() {
1131  if ( $this->mUser !== null ) {
1132  return $this->mUser;
1133  }
1134  return $this->mOptions->getUser();
1135  }
1136 
1141  public function getUserIdentity(): UserIdentity {
1142  return $this->getUser();
1143  }
1144 
1151  public function getPreprocessor() {
1152  return $this->mPreprocessor;
1153  }
1154 
1161  public function getLinkRenderer() {
1162  // XXX We make the LinkRenderer with current options and then cache it forever
1163  if ( !$this->mLinkRenderer ) {
1164  $this->mLinkRenderer = $this->linkRendererFactory->create();
1165  $this->mLinkRenderer->setStubThreshold(
1166  $this->getOptions()->getStubThreshold()
1167  );
1168  }
1169 
1170  return $this->mLinkRenderer;
1171  }
1172 
1179  public function getMagicWordFactory() {
1180  return $this->magicWordFactory;
1181  }
1182 
1189  public function getContentLanguage() {
1190  return $this->contLang;
1191  }
1192 
1199  public function getBadFileLookup() {
1200  return $this->badFileLookup;
1201  }
1202 
1222  public static function extractTagsAndParams( array $elements, $text, &$matches ) {
1223  static $n = 1;
1224  $stripped = '';
1225  $matches = [];
1226 
1227  $taglist = implode( '|', $elements );
1228  $start = "/<($taglist)(\\s+[^>]*?|\\s*?)(\/?>)|<(!--)/i";
1229 
1230  while ( $text != '' ) {
1231  $p = preg_split( $start, $text, 2, PREG_SPLIT_DELIM_CAPTURE );
1232  $stripped .= $p[0];
1233  if ( count( $p ) < 5 ) {
1234  break;
1235  }
1236  if ( count( $p ) > 5 ) {
1237  # comment
1238  $element = $p[4];
1239  $attributes = '';
1240  $close = '';
1241  $inside = $p[5];
1242  } else {
1243  # tag
1244  list( , $element, $attributes, $close, $inside ) = $p;
1245  }
1246 
1247  $marker = self::MARKER_PREFIX . "-$element-" . sprintf( '%08X', $n++ ) . self::MARKER_SUFFIX;
1248  $stripped .= $marker;
1249 
1250  if ( $close === '/>' ) {
1251  # Empty element tag, <tag />
1252  $content = null;
1253  $text = $inside;
1254  $tail = null;
1255  } else {
1256  if ( $element === '!--' ) {
1257  $end = '/(-->)/';
1258  } else {
1259  $end = "/(<\\/$element\\s*>)/i";
1260  }
1261  $q = preg_split( $end, $inside, 2, PREG_SPLIT_DELIM_CAPTURE );
1262  $content = $q[0];
1263  if ( count( $q ) < 3 ) {
1264  # No end tag -- let it run out to the end of the text.
1265  $tail = '';
1266  $text = '';
1267  } else {
1268  list( , $tail, $text ) = $q;
1269  }
1270  }
1271 
1272  $matches[$marker] = [ $element,
1273  $content,
1274  Sanitizer::decodeTagAttributes( $attributes ),
1275  "<$element$attributes$close$content$tail" ];
1276  }
1277  return $stripped;
1278  }
1279 
1285  public function getStripList() {
1286  return $this->mStripList;
1287  }
1288 
1292  public function getStripState() {
1293  return $this->mStripState;
1294  }
1295 
1305  public function insertStripItem( $text ) {
1306  $marker = self::MARKER_PREFIX . "-item-{$this->mMarkerIndex}-" . self::MARKER_SUFFIX;
1307  $this->mMarkerIndex++;
1308  $this->mStripState->addGeneral( $marker, $text );
1309  return $marker;
1310  }
1311 
1318  private function handleTables( $text ) {
1319  $lines = StringUtils::explode( "\n", $text );
1320  $out = '';
1321  $td_history = []; # Is currently a td tag open?
1322  $last_tag_history = []; # Save history of last lag activated (td, th or caption)
1323  $tr_history = []; # Is currently a tr tag open?
1324  $tr_attributes = []; # history of tr attributes
1325  $has_opened_tr = []; # Did this table open a <tr> element?
1326  $indent_level = 0; # indent level of the table
1327 
1328  foreach ( $lines as $outLine ) {
1329  $line = trim( $outLine );
1330 
1331  if ( $line === '' ) { # empty line, go to next line
1332  $out .= $outLine . "\n";
1333  continue;
1334  }
1335 
1336  $first_character = $line[0];
1337  $first_two = substr( $line, 0, 2 );
1338  $matches = [];
1339 
1340  if ( preg_match( '/^(:*)\s*\{\|(.*)$/', $line, $matches ) ) {
1341  # First check if we are starting a new table
1342  $indent_level = strlen( $matches[1] );
1343 
1344  $attributes = $this->mStripState->unstripBoth( $matches[2] );
1345  $attributes = Sanitizer::fixTagAttributes( $attributes, 'table' );
1346 
1347  $outLine = str_repeat( '<dl><dd>', $indent_level ) . "<table{$attributes}>";
1348  array_push( $td_history, false );
1349  array_push( $last_tag_history, '' );
1350  array_push( $tr_history, false );
1351  array_push( $tr_attributes, '' );
1352  array_push( $has_opened_tr, false );
1353  } elseif ( count( $td_history ) == 0 ) {
1354  # Don't do any of the following
1355  $out .= $outLine . "\n";
1356  continue;
1357  } elseif ( $first_two === '|}' ) {
1358  # We are ending a table
1359  $line = '</table>' . substr( $line, 2 );
1360  $last_tag = array_pop( $last_tag_history );
1361 
1362  if ( !array_pop( $has_opened_tr ) ) {
1363  $line = "<tr><td></td></tr>{$line}";
1364  }
1365 
1366  if ( array_pop( $tr_history ) ) {
1367  $line = "</tr>{$line}";
1368  }
1369 
1370  if ( array_pop( $td_history ) ) {
1371  $line = "</{$last_tag}>{$line}";
1372  }
1373  array_pop( $tr_attributes );
1374  if ( $indent_level > 0 ) {
1375  $outLine = rtrim( $line ) . str_repeat( '</dd></dl>', $indent_level );
1376  } else {
1377  $outLine = $line;
1378  }
1379  } elseif ( $first_two === '|-' ) {
1380  # Now we have a table row
1381  $line = preg_replace( '#^\|-+#', '', $line );
1382 
1383  # Whats after the tag is now only attributes
1384  $attributes = $this->mStripState->unstripBoth( $line );
1385  $attributes = Sanitizer::fixTagAttributes( $attributes, 'tr' );
1386  array_pop( $tr_attributes );
1387  array_push( $tr_attributes, $attributes );
1388 
1389  $line = '';
1390  $last_tag = array_pop( $last_tag_history );
1391  array_pop( $has_opened_tr );
1392  array_push( $has_opened_tr, true );
1393 
1394  if ( array_pop( $tr_history ) ) {
1395  $line = '</tr>';
1396  }
1397 
1398  if ( array_pop( $td_history ) ) {
1399  $line = "</{$last_tag}>{$line}";
1400  }
1401 
1402  $outLine = $line;
1403  array_push( $tr_history, false );
1404  array_push( $td_history, false );
1405  array_push( $last_tag_history, '' );
1406  } elseif ( $first_character === '|'
1407  || $first_character === '!'
1408  || $first_two === '|+'
1409  ) {
1410  # This might be cell elements, td, th or captions
1411  if ( $first_two === '|+' ) {
1412  $first_character = '+';
1413  $line = substr( $line, 2 );
1414  } else {
1415  $line = substr( $line, 1 );
1416  }
1417 
1418  // Implies both are valid for table headings.
1419  if ( $first_character === '!' ) {
1420  $line = StringUtils::replaceMarkup( '!!', '||', $line );
1421  }
1422 
1423  # Split up multiple cells on the same line.
1424  # FIXME : This can result in improper nesting of tags processed
1425  # by earlier parser steps.
1426  $cells = explode( '||', $line );
1427 
1428  $outLine = '';
1429 
1430  # Loop through each table cell
1431  foreach ( $cells as $cell ) {
1432  $previous = '';
1433  if ( $first_character !== '+' ) {
1434  $tr_after = array_pop( $tr_attributes );
1435  if ( !array_pop( $tr_history ) ) {
1436  $previous = "<tr{$tr_after}>\n";
1437  }
1438  array_push( $tr_history, true );
1439  array_push( $tr_attributes, '' );
1440  array_pop( $has_opened_tr );
1441  array_push( $has_opened_tr, true );
1442  }
1443 
1444  $last_tag = array_pop( $last_tag_history );
1445 
1446  if ( array_pop( $td_history ) ) {
1447  $previous = "</{$last_tag}>\n{$previous}";
1448  }
1449 
1450  if ( $first_character === '|' ) {
1451  $last_tag = 'td';
1452  } elseif ( $first_character === '!' ) {
1453  $last_tag = 'th';
1454  } elseif ( $first_character === '+' ) {
1455  $last_tag = 'caption';
1456  } else {
1457  $last_tag = '';
1458  }
1459 
1460  array_push( $last_tag_history, $last_tag );
1461 
1462  # A cell could contain both parameters and data
1463  $cell_data = explode( '|', $cell, 2 );
1464 
1465  # T2553: Note that a '|' inside an invalid link should not
1466  # be mistaken as delimiting cell parameters
1467  # Bug T153140: Neither should language converter markup.
1468  if ( preg_match( '/\[\[|-\{/', $cell_data[0] ) === 1 ) {
1469  $cell = "{$previous}<{$last_tag}>" . trim( $cell );
1470  } elseif ( count( $cell_data ) == 1 ) {
1471  // Whitespace in cells is trimmed
1472  $cell = "{$previous}<{$last_tag}>" . trim( $cell_data[0] );
1473  } else {
1474  $attributes = $this->mStripState->unstripBoth( $cell_data[0] );
1475  $attributes = Sanitizer::fixTagAttributes( $attributes, $last_tag );
1476  // Whitespace in cells is trimmed
1477  $cell = "{$previous}<{$last_tag}{$attributes}>" . trim( $cell_data[1] );
1478  }
1479 
1480  $outLine .= $cell;
1481  array_push( $td_history, true );
1482  }
1483  }
1484  $out .= $outLine . "\n";
1485  }
1486 
1487  # Closing open td, tr && table
1488  while ( count( $td_history ) > 0 ) {
1489  if ( array_pop( $td_history ) ) {
1490  $out .= "</td>\n";
1491  }
1492  if ( array_pop( $tr_history ) ) {
1493  $out .= "</tr>\n";
1494  }
1495  if ( !array_pop( $has_opened_tr ) ) {
1496  $out .= "<tr><td></td></tr>\n";
1497  }
1498 
1499  $out .= "</table>\n";
1500  }
1501 
1502  # Remove trailing line-ending (b/c)
1503  if ( substr( $out, -1 ) === "\n" ) {
1504  $out = substr( $out, 0, -1 );
1505  }
1506 
1507  # special case: don't return empty table
1508  if ( $out === "<table>\n<tr><td></td></tr>\n</table>" ) {
1509  $out = '';
1510  }
1511 
1512  return $out;
1513  }
1514 
1528  public function internalParse( $text, $isMain = true, $frame = false ) {
1529  $origText = $text;
1530 
1531  # Hook to suspend the parser in this state
1532  if ( !$this->hookRunner->onParserBeforeInternalParse( $this, $text, $this->mStripState ) ) {
1533  return $text;
1534  }
1535 
1536  # if $frame is provided, then use $frame for replacing any variables
1537  if ( $frame ) {
1538  # use frame depth to infer how include/noinclude tags should be handled
1539  # depth=0 means this is the top-level document; otherwise it's an included document
1540  if ( !$frame->depth ) {
1541  $flag = 0;
1542  } else {
1544  }
1545  $dom = $this->preprocessToDom( $text, $flag );
1546  $text = $frame->expand( $dom );
1547  } else {
1548  # if $frame is not provided, then use old-style replaceVariables
1549  $text = $this->replaceVariables( $text );
1550  }
1551 
1552  $this->hookRunner->onInternalParseBeforeSanitize( $this, $text, $this->mStripState );
1553  $text = Sanitizer::removeHTMLtags(
1554  $text,
1555  // Callback from the Sanitizer for expanding items found in
1556  // HTML attribute values, so they can be safely tested and escaped.
1557  function ( &$text, $frame = false ) {
1558  $text = $this->replaceVariables( $text, $frame );
1559  $text = $this->mStripState->unstripBoth( $text );
1560  },
1561  false,
1562  [],
1563  []
1564  );
1565  $this->hookRunner->onInternalParseBeforeLinks( $this, $text, $this->mStripState );
1566 
1567  # Tables need to come after variable replacement for things to work
1568  # properly; putting them before other transformations should keep
1569  # exciting things like link expansions from showing up in surprising
1570  # places.
1571  $text = $this->handleTables( $text );
1572 
1573  $text = preg_replace( '/(^|\n)-----*/', '\\1<hr />', $text );
1574 
1575  $text = $this->handleDoubleUnderscore( $text );
1576 
1577  $text = $this->handleHeadings( $text );
1578  $text = $this->handleInternalLinks( $text );
1579  $text = $this->handleAllQuotes( $text );
1580  $text = $this->handleExternalLinks( $text );
1581 
1582  # handleInternalLinks may sometimes leave behind
1583  # absolute URLs, which have to be masked to hide them from handleExternalLinks
1584  $text = str_replace( self::MARKER_PREFIX . 'NOPARSE', '', $text );
1585 
1586  $text = $this->handleMagicLinks( $text );
1587  $text = $this->finalizeHeadings( $text, $origText, $isMain );
1588 
1589  return $text;
1590  }
1591 
1598  return $this->languageConverterFactory->getLanguageConverter(
1599  $this->getTargetLanguage()
1600  );
1601  }
1602 
1609  return $this->languageConverterFactory->getLanguageConverter(
1610  $this->getContentLanguage()
1611  );
1612  }
1613 
1621  protected function getHookContainer() {
1622  return $this->hookContainer;
1623  }
1624 
1633  protected function getHookRunner() {
1634  return $this->hookRunner;
1635  }
1636 
1646  private function internalParseHalfParsed( $text, $isMain = true, $linestart = true ) {
1647  $text = $this->mStripState->unstripGeneral( $text );
1648 
1649  $text = BlockLevelPass::doBlockLevels( $text, $linestart );
1650 
1651  $this->replaceLinkHoldersPrivate( $text );
1652 
1660  if ( !( $this->mOptions->getDisableContentConversion()
1661  || isset( $this->mDoubleUnderscores['nocontentconvert'] ) )
1662  && !$this->mOptions->getInterfaceMessage()
1663  ) {
1664  # The position of the convert() call should not be changed. it
1665  # assumes that the links are all replaced and the only thing left
1666  # is the <nowiki> mark.
1667  $text = $this->getTargetLanguageConverter()->convert( $text );
1668  }
1669 
1670  $text = $this->mStripState->unstripNoWiki( $text );
1671 
1672  $text = $this->mStripState->unstripGeneral( $text );
1673 
1674  $text = $this->tidy->tidy( $text, [ Sanitizer::class, 'armorFrenchSpaces' ] );
1675 
1676  if ( $isMain ) {
1677  $this->hookRunner->onParserAfterTidy( $this, $text );
1678  }
1679 
1680  return $text;
1681  }
1682 
1693  private function handleMagicLinks( $text ) {
1694  $prots = wfUrlProtocolsWithoutProtRel();
1695  $urlChar = self::EXT_LINK_URL_CLASS;
1696  $addr = self::EXT_LINK_ADDR;
1697  $space = self::SPACE_NOT_NL; # non-newline space
1698  $spdash = "(?:-|$space)"; # a dash or a non-newline space
1699  $spaces = "$space++"; # possessive match of 1 or more spaces
1700  $text = preg_replace_callback(
1701  '!(?: # Start cases
1702  (<a[ \t\r\n>].*?</a>) | # m[1]: Skip link text
1703  (<.*?>) | # m[2]: Skip stuff inside HTML elements' . "
1704  (\b # m[3]: Free external links
1705  (?i:$prots)
1706  ($addr$urlChar*) # m[4]: Post-protocol path
1707  ) |
1708  \b(?:RFC|PMID) $spaces # m[5]: RFC or PMID, capture number
1709  ([0-9]+)\b |
1710  \bISBN $spaces ( # m[6]: ISBN, capture number
1711  (?: 97[89] $spdash? )? # optional 13-digit ISBN prefix
1712  (?: [0-9] $spdash? ){9} # 9 digits with opt. delimiters
1713  [0-9Xx] # check digit
1714  )\b
1715  )!xu",
1716  [ $this, 'magicLinkCallback' ],
1717  $text
1718  );
1719  return $text;
1720  }
1721 
1727  private function magicLinkCallback( array $m ) {
1728  if ( isset( $m[1] ) && $m[1] !== '' ) {
1729  # Skip anchor
1730  return $m[0];
1731  } elseif ( isset( $m[2] ) && $m[2] !== '' ) {
1732  # Skip HTML element
1733  return $m[0];
1734  } elseif ( isset( $m[3] ) && $m[3] !== '' ) {
1735  # Free external link
1736  return $this->makeFreeExternalLink( $m[0], strlen( $m[4] ) );
1737  } elseif ( isset( $m[5] ) && $m[5] !== '' ) {
1738  # RFC or PMID
1739  if ( substr( $m[0], 0, 3 ) === 'RFC' ) {
1740  if ( !$this->mOptions->getMagicRFCLinks() ) {
1741  return $m[0];
1742  }
1743  $keyword = 'RFC';
1744  $urlmsg = 'rfcurl';
1745  $cssClass = 'mw-magiclink-rfc';
1746  $trackingCat = 'magiclink-tracking-rfc';
1747  $id = $m[5];
1748  } elseif ( substr( $m[0], 0, 4 ) === 'PMID' ) {
1749  if ( !$this->mOptions->getMagicPMIDLinks() ) {
1750  return $m[0];
1751  }
1752  $keyword = 'PMID';
1753  $urlmsg = 'pubmedurl';
1754  $cssClass = 'mw-magiclink-pmid';
1755  $trackingCat = 'magiclink-tracking-pmid';
1756  $id = $m[5];
1757  } else {
1758  // Should never happen
1759  throw new MWException( __METHOD__ . ': unrecognised match type "' .
1760  substr( $m[0], 0, 20 ) . '"' );
1761  }
1762  $url = wfMessage( $urlmsg, $id )->inContentLanguage()->text();
1763  $this->addTrackingCategory( $trackingCat );
1764  return Linker::makeExternalLink(
1765  $url,
1766  "{$keyword} {$id}",
1767  true,
1768  $cssClass,
1769  [],
1770  $this->getTitle()
1771  );
1772  } elseif ( isset( $m[6] ) && $m[6] !== ''
1773  && $this->mOptions->getMagicISBNLinks()
1774  ) {
1775  # ISBN
1776  $isbn = $m[6];
1777  $space = self::SPACE_NOT_NL; # non-newline space
1778  $isbn = preg_replace( "/$space/", ' ', $isbn );
1779  $num = strtr( $isbn, [
1780  '-' => '',
1781  ' ' => '',
1782  'x' => 'X',
1783  ] );
1784  $this->addTrackingCategory( 'magiclink-tracking-isbn' );
1785  return $this->getLinkRenderer()->makeKnownLink(
1786  SpecialPage::getTitleFor( 'Booksources', $num ),
1787  "ISBN $isbn",
1788  [
1789  'class' => 'internal mw-magiclink-isbn',
1790  'title' => false // suppress title attribute
1791  ]
1792  );
1793  } else {
1794  return $m[0];
1795  }
1796  }
1797 
1807  private function makeFreeExternalLink( $url, $numPostProto ) {
1808  $trail = '';
1809 
1810  # The characters '<' and '>' (which were escaped by
1811  # removeHTMLtags()) should not be included in
1812  # URLs, per RFC 2396.
1813  # Make &nbsp; terminate a URL as well (bug T84937)
1814  $m2 = [];
1815  if ( preg_match(
1816  '/&(lt|gt|nbsp|#x0*(3[CcEe]|[Aa]0)|#0*(60|62|160));/',
1817  $url,
1818  $m2,
1819  PREG_OFFSET_CAPTURE
1820  ) ) {
1821  $trail = substr( $url, $m2[0][1] ) . $trail;
1822  $url = substr( $url, 0, $m2[0][1] );
1823  }
1824 
1825  # Move trailing punctuation to $trail
1826  $sep = ',;\.:!?';
1827  # If there is no left bracket, then consider right brackets fair game too
1828  if ( strpos( $url, '(' ) === false ) {
1829  $sep .= ')';
1830  }
1831 
1832  $urlRev = strrev( $url );
1833  $numSepChars = strspn( $urlRev, $sep );
1834  # Don't break a trailing HTML entity by moving the ; into $trail
1835  # This is in hot code, so use substr_compare to avoid having to
1836  # create a new string object for the comparison
1837  if ( $numSepChars && substr_compare( $url, ";", -$numSepChars, 1 ) === 0 ) {
1838  # more optimization: instead of running preg_match with a $
1839  # anchor, which can be slow, do the match on the reversed
1840  # string starting at the desired offset.
1841  # un-reversed regexp is: /&([a-z]+|#x[\da-f]+|#\d+)$/i
1842  if ( preg_match( '/\G([a-z]+|[\da-f]+x#|\d+#)&/i', $urlRev, $m2, 0, $numSepChars ) ) {
1843  $numSepChars--;
1844  }
1845  }
1846  if ( $numSepChars ) {
1847  $trail = substr( $url, -$numSepChars ) . $trail;
1848  $url = substr( $url, 0, -$numSepChars );
1849  }
1850 
1851  # Verify that we still have a real URL after trail removal, and
1852  # not just lone protocol
1853  if ( strlen( $trail ) >= $numPostProto ) {
1854  return $url . $trail;
1855  }
1856 
1857  $url = Sanitizer::cleanUrl( $url );
1858 
1859  # Is this an external image?
1860  $text = $this->maybeMakeExternalImage( $url );
1861  if ( $text === false ) {
1862  # Not an image, make a link
1863  $text = Linker::makeExternalLink(
1864  $url,
1865  $this->getTargetLanguageConverter()->markNoConversion( $url ),
1866  true,
1867  'free',
1868  $this->getExternalLinkAttribs( $url ),
1869  $this->getTitle()
1870  );
1871  # Register it in the output object...
1872  $this->mOutput->addExternalLink( $url );
1873  }
1874  return $text . $trail;
1875  }
1876 
1883  private function handleHeadings( $text ) {
1884  for ( $i = 6; $i >= 1; --$i ) {
1885  $h = str_repeat( '=', $i );
1886  // Trim non-newline whitespace from headings
1887  // Using \s* will break for: "==\n===\n" and parse as <h2>=</h2>
1888  $text = preg_replace( "/^(?:$h)[ \\t]*(.+?)[ \\t]*(?:$h)\\s*$/m", "<h$i>\\1</h$i>", $text );
1889  }
1890  return $text;
1891  }
1892 
1900  private function handleAllQuotes( $text ) {
1901  $outtext = '';
1902  $lines = StringUtils::explode( "\n", $text );
1903  foreach ( $lines as $line ) {
1904  $outtext .= $this->doQuotes( $line ) . "\n";
1905  }
1906  $outtext = substr( $outtext, 0, -1 );
1907  return $outtext;
1908  }
1909 
1918  public function doQuotes( $text ) {
1919  $arr = preg_split( "/(''+)/", $text, -1, PREG_SPLIT_DELIM_CAPTURE );
1920  $countarr = count( $arr );
1921  if ( $countarr == 1 ) {
1922  return $text;
1923  }
1924 
1925  // First, do some preliminary work. This may shift some apostrophes from
1926  // being mark-up to being text. It also counts the number of occurrences
1927  // of bold and italics mark-ups.
1928  $numbold = 0;
1929  $numitalics = 0;
1930  for ( $i = 1; $i < $countarr; $i += 2 ) {
1931  $thislen = strlen( $arr[$i] );
1932  // If there are ever four apostrophes, assume the first is supposed to
1933  // be text, and the remaining three constitute mark-up for bold text.
1934  // (T15227: ''''foo'''' turns into ' ''' foo ' ''')
1935  if ( $thislen == 4 ) {
1936  $arr[$i - 1] .= "'";
1937  $arr[$i] = "'''";
1938  $thislen = 3;
1939  } elseif ( $thislen > 5 ) {
1940  // If there are more than 5 apostrophes in a row, assume they're all
1941  // text except for the last 5.
1942  // (T15227: ''''''foo'''''' turns into ' ''''' foo ' ''''')
1943  $arr[$i - 1] .= str_repeat( "'", $thislen - 5 );
1944  $arr[$i] = "'''''";
1945  $thislen = 5;
1946  }
1947  // Count the number of occurrences of bold and italics mark-ups.
1948  if ( $thislen == 2 ) {
1949  $numitalics++;
1950  } elseif ( $thislen == 3 ) {
1951  $numbold++;
1952  } elseif ( $thislen == 5 ) {
1953  $numitalics++;
1954  $numbold++;
1955  }
1956  }
1957 
1958  // If there is an odd number of both bold and italics, it is likely
1959  // that one of the bold ones was meant to be an apostrophe followed
1960  // by italics. Which one we cannot know for certain, but it is more
1961  // likely to be one that has a single-letter word before it.
1962  if ( ( $numbold % 2 == 1 ) && ( $numitalics % 2 == 1 ) ) {
1963  $firstsingleletterword = -1;
1964  $firstmultiletterword = -1;
1965  $firstspace = -1;
1966  for ( $i = 1; $i < $countarr; $i += 2 ) {
1967  if ( strlen( $arr[$i] ) == 3 ) {
1968  $x1 = substr( $arr[$i - 1], -1 );
1969  $x2 = substr( $arr[$i - 1], -2, 1 );
1970  if ( $x1 === ' ' ) {
1971  if ( $firstspace == -1 ) {
1972  $firstspace = $i;
1973  }
1974  } elseif ( $x2 === ' ' ) {
1975  $firstsingleletterword = $i;
1976  // if $firstsingleletterword is set, we don't
1977  // look at the other options, so we can bail early.
1978  break;
1979  } elseif ( $firstmultiletterword == -1 ) {
1980  $firstmultiletterword = $i;
1981  }
1982  }
1983  }
1984 
1985  // If there is a single-letter word, use it!
1986  if ( $firstsingleletterword > -1 ) {
1987  $arr[$firstsingleletterword] = "''";
1988  $arr[$firstsingleletterword - 1] .= "'";
1989  } elseif ( $firstmultiletterword > -1 ) {
1990  // If not, but there's a multi-letter word, use that one.
1991  $arr[$firstmultiletterword] = "''";
1992  $arr[$firstmultiletterword - 1] .= "'";
1993  } elseif ( $firstspace > -1 ) {
1994  // ... otherwise use the first one that has neither.
1995  // (notice that it is possible for all three to be -1 if, for example,
1996  // there is only one pentuple-apostrophe in the line)
1997  $arr[$firstspace] = "''";
1998  $arr[$firstspace - 1] .= "'";
1999  }
2000  }
2001 
2002  // Now let's actually convert our apostrophic mush to HTML!
2003  $output = '';
2004  $buffer = '';
2005  $state = '';
2006  $i = 0;
2007  foreach ( $arr as $r ) {
2008  if ( ( $i % 2 ) == 0 ) {
2009  if ( $state === 'both' ) {
2010  $buffer .= $r;
2011  } else {
2012  $output .= $r;
2013  }
2014  } else {
2015  $thislen = strlen( $r );
2016  if ( $thislen == 2 ) {
2017  // two quotes - open or close italics
2018  if ( $state === 'i' ) {
2019  $output .= '</i>';
2020  $state = '';
2021  } elseif ( $state === 'bi' ) {
2022  $output .= '</i>';
2023  $state = 'b';
2024  } elseif ( $state === 'ib' ) {
2025  $output .= '</b></i><b>';
2026  $state = 'b';
2027  } elseif ( $state === 'both' ) {
2028  $output .= '<b><i>' . $buffer . '</i>';
2029  $state = 'b';
2030  } else { // $state can be 'b' or ''
2031  $output .= '<i>';
2032  $state .= 'i';
2033  }
2034  } elseif ( $thislen == 3 ) {
2035  // three quotes - open or close bold
2036  if ( $state === 'b' ) {
2037  $output .= '</b>';
2038  $state = '';
2039  } elseif ( $state === 'bi' ) {
2040  $output .= '</i></b><i>';
2041  $state = 'i';
2042  } elseif ( $state === 'ib' ) {
2043  $output .= '</b>';
2044  $state = 'i';
2045  } elseif ( $state === 'both' ) {
2046  $output .= '<i><b>' . $buffer . '</b>';
2047  $state = 'i';
2048  } else { // $state can be 'i' or ''
2049  $output .= '<b>';
2050  $state .= 'b';
2051  }
2052  } elseif ( $thislen == 5 ) {
2053  // five quotes - open or close both separately
2054  if ( $state === 'b' ) {
2055  $output .= '</b><i>';
2056  $state = 'i';
2057  } elseif ( $state === 'i' ) {
2058  $output .= '</i><b>';
2059  $state = 'b';
2060  } elseif ( $state === 'bi' ) {
2061  $output .= '</i></b>';
2062  $state = '';
2063  } elseif ( $state === 'ib' ) {
2064  $output .= '</b></i>';
2065  $state = '';
2066  } elseif ( $state === 'both' ) {
2067  $output .= '<i><b>' . $buffer . '</b></i>';
2068  $state = '';
2069  } else { // ($state == '')
2070  $buffer = '';
2071  $state = 'both';
2072  }
2073  }
2074  }
2075  $i++;
2076  }
2077  // Now close all remaining tags. Notice that the order is important.
2078  if ( $state === 'b' || $state === 'ib' ) {
2079  $output .= '</b>';
2080  }
2081  if ( $state === 'i' || $state === 'bi' || $state === 'ib' ) {
2082  $output .= '</i>';
2083  }
2084  if ( $state === 'bi' ) {
2085  $output .= '</b>';
2086  }
2087  // There might be lonely ''''', so make sure we have a buffer
2088  if ( $state === 'both' && $buffer ) {
2089  $output .= '<b><i>' . $buffer . '</i></b>';
2090  }
2091  return $output;
2092  }
2093 
2104  private function handleExternalLinks( $text ) {
2105  $bits = preg_split( $this->mExtLinkBracketedRegex, $text, -1, PREG_SPLIT_DELIM_CAPTURE );
2106  // @phan-suppress-next-line PhanTypeComparisonFromArray See phan issue #3161
2107  if ( $bits === false ) {
2108  throw new MWException( "PCRE needs to be compiled with "
2109  . "--enable-unicode-properties in order for MediaWiki to function" );
2110  }
2111  $s = array_shift( $bits );
2112 
2113  $i = 0;
2114  while ( $i < count( $bits ) ) {
2115  $url = $bits[$i++];
2116  $i++; // protocol
2117  $text = $bits[$i++];
2118  $trail = $bits[$i++];
2119 
2120  # The characters '<' and '>' (which were escaped by
2121  # removeHTMLtags()) should not be included in
2122  # URLs, per RFC 2396.
2123  $m2 = [];
2124  if ( preg_match( '/&(lt|gt);/', $url, $m2, PREG_OFFSET_CAPTURE ) ) {
2125  $text = substr( $url, $m2[0][1] ) . ' ' . $text;
2126  $url = substr( $url, 0, $m2[0][1] );
2127  }
2128 
2129  # If the link text is an image URL, replace it with an <img> tag
2130  # This happened by accident in the original parser, but some people used it extensively
2131  $img = $this->maybeMakeExternalImage( $text );
2132  if ( $img !== false ) {
2133  $text = $img;
2134  }
2135 
2136  $dtrail = '';
2137 
2138  # Set linktype for CSS
2139  $linktype = 'text';
2140 
2141  # No link text, e.g. [http://domain.tld/some.link]
2142  if ( $text == '' ) {
2143  # Autonumber
2144  $langObj = $this->getTargetLanguage();
2145  $text = '[' . $langObj->formatNum( ++$this->mAutonumber ) . ']';
2146  $linktype = 'autonumber';
2147  } else {
2148  # Have link text, e.g. [http://domain.tld/some.link text]s
2149  # Check for trail
2150  list( $dtrail, $trail ) = Linker::splitTrail( $trail );
2151  }
2152 
2153  // Excluding protocol-relative URLs may avoid many false positives.
2154  if ( preg_match( '/^(?:' . wfUrlProtocolsWithoutProtRel() . ')/', $text ) ) {
2155  $text = $this->getTargetLanguageConverter()->markNoConversion( $text );
2156  }
2157 
2158  $url = Sanitizer::cleanUrl( $url );
2159 
2160  # Use the encoded URL
2161  # This means that users can paste URLs directly into the text
2162  # Funny characters like ö aren't valid in URLs anyway
2163  # This was changed in August 2004
2164  // @phan-suppress-next-line SecurityCheck-XSS using false for escape is valid here
2165  $s .= Linker::makeExternalLink( $url, $text, false, $linktype,
2166  $this->getExternalLinkAttribs( $url ), $this->getTitle() ) . $dtrail . $trail;
2167 
2168  # Register link in the output object.
2169  $this->mOutput->addExternalLink( $url );
2170  }
2171 
2172  return $s;
2173  }
2174 
2185  public static function getExternalLinkRel( $url = false, LinkTarget $title = null ) {
2187  $ns = $title ? $title->getNamespace() : false;
2188  if ( $wgNoFollowLinks && !in_array( $ns, $wgNoFollowNsExceptions )
2190  ) {
2191  return 'nofollow';
2192  }
2193  return null;
2194  }
2195 
2207  public function getExternalLinkAttribs( $url ) {
2208  $attribs = [];
2209  $rel = self::getExternalLinkRel( $url, $this->getTitle() );
2210 
2211  $target = $this->mOptions->getExternalLinkTarget();
2212  if ( $target ) {
2213  $attribs['target'] = $target;
2214  if ( !in_array( $target, [ '_self', '_parent', '_top' ] ) ) {
2215  // T133507. New windows can navigate parent cross-origin.
2216  // Including noreferrer due to lacking browser
2217  // support of noopener. Eventually noreferrer should be removed.
2218  if ( $rel !== '' ) {
2219  $rel .= ' ';
2220  }
2221  $rel .= 'noreferrer noopener';
2222  }
2223  }
2224  $attribs['rel'] = $rel;
2225  return $attribs;
2226  }
2227 
2238  public static function normalizeLinkUrl( $url ) {
2239  # Test for RFC 3986 IPv6 syntax
2240  $scheme = '[a-z][a-z0-9+.-]*:';
2241  $userinfo = '(?:[a-z0-9\-._~!$&\'()*+,;=:]|%[0-9a-f]{2})*';
2242  $ipv6Host = '\\[((?:[0-9a-f:]|%3[0-A]|%[46][1-6])+)\\]';
2243  if ( preg_match( "<^(?:{$scheme})?//(?:{$userinfo}@)?{$ipv6Host}(?:[:/?#].*|)$>i", $url, $m ) &&
2244  IPUtils::isValid( rawurldecode( $m[1] ) )
2245  ) {
2246  $isIPv6 = rawurldecode( $m[1] );
2247  } else {
2248  $isIPv6 = false;
2249  }
2250 
2251  # Make sure unsafe characters are encoded
2252  $url = preg_replace_callback(
2253  '/[\x00-\x20"<>\[\\\\\]^`{|}\x7F-\xFF]/',
2254  static function ( $m ) {
2255  return rawurlencode( $m[0] );
2256  },
2257  $url
2258  );
2259 
2260  $ret = '';
2261  $end = strlen( $url );
2262 
2263  # Fragment part - 'fragment'
2264  $start = strpos( $url, '#' );
2265  if ( $start !== false && $start < $end ) {
2267  substr( $url, $start, $end - $start ), '"#%<>[\]^`{|}' ) . $ret;
2268  $end = $start;
2269  }
2270 
2271  # Query part - 'query' minus &=+;
2272  $start = strpos( $url, '?' );
2273  if ( $start !== false && $start < $end ) {
2275  substr( $url, $start, $end - $start ), '"#%<>[\]^`{|}&=+;' ) . $ret;
2276  $end = $start;
2277  }
2278 
2279  # Scheme and path part - 'pchar'
2280  # (we assume no userinfo or encoded colons in the host)
2282  substr( $url, 0, $end ), '"#%<>[\]^`{|}/?' ) . $ret;
2283 
2284  # Fix IPv6 syntax
2285  if ( $isIPv6 !== false ) {
2286  $ipv6Host = "%5B({$isIPv6})%5D";
2287  $ret = preg_replace(
2288  "<^((?:{$scheme})?//(?:{$userinfo}@)?){$ipv6Host}(?=[:/?#]|$)>i",
2289  "$1[$2]",
2290  $ret
2291  );
2292  }
2293 
2294  return $ret;
2295  }
2296 
2297  private static function normalizeUrlComponent( $component, $unsafe ) {
2298  $callback = static function ( $matches ) use ( $unsafe ) {
2299  $char = urldecode( $matches[0] );
2300  $ord = ord( $char );
2301  if ( $ord > 32 && $ord < 127 && strpos( $unsafe, $char ) === false ) {
2302  # Unescape it
2303  return $char;
2304  } else {
2305  # Leave it escaped, but use uppercase for a-f
2306  return strtoupper( $matches[0] );
2307  }
2308  };
2309  return preg_replace_callback( '/%[0-9A-Fa-f]{2}/', $callback, $component );
2310  }
2311 
2320  private function maybeMakeExternalImage( $url ) {
2321  $imagesfrom = $this->mOptions->getAllowExternalImagesFrom();
2322  $imagesexception = !empty( $imagesfrom );
2323  $text = false;
2324  # $imagesfrom could be either a single string or an array of strings, parse out the latter
2325  if ( $imagesexception && is_array( $imagesfrom ) ) {
2326  $imagematch = false;
2327  foreach ( $imagesfrom as $match ) {
2328  if ( strpos( $url, $match ) === 0 ) {
2329  $imagematch = true;
2330  break;
2331  }
2332  }
2333  } elseif ( $imagesexception ) {
2334  $imagematch = ( strpos( $url, $imagesfrom ) === 0 );
2335  } else {
2336  $imagematch = false;
2337  }
2338 
2339  if ( $this->mOptions->getAllowExternalImages()
2340  || ( $imagesexception && $imagematch )
2341  ) {
2342  if ( preg_match( self::EXT_IMAGE_REGEX, $url ) ) {
2343  # Image found
2344  $text = Linker::makeExternalImage( $url );
2345  }
2346  }
2347  if ( !$text && $this->mOptions->getEnableImageWhitelist()
2348  && preg_match( self::EXT_IMAGE_REGEX, $url )
2349  ) {
2350  $whitelist = explode(
2351  "\n",
2352  wfMessage( 'external_image_whitelist' )->inContentLanguage()->text()
2353  );
2354 
2355  foreach ( $whitelist as $entry ) {
2356  # Sanitize the regex fragment, make it case-insensitive, ignore blank entries/comments
2357  if ( strpos( $entry, '#' ) === 0 || $entry === '' ) {
2358  continue;
2359  }
2360  // @phan-suppress-next-line SecurityCheck-ReDoS preg_quote is not wanted here
2361  if ( preg_match( '/' . str_replace( '/', '\\/', $entry ) . '/i', $url ) ) {
2362  # Image matches a whitelist entry
2363  $text = Linker::makeExternalImage( $url );
2364  break;
2365  }
2366  }
2367  }
2368  return $text;
2369  }
2370 
2378  private function handleInternalLinks( $text ) {
2379  $this->mLinkHolders->merge( $this->handleInternalLinks2( $text ) );
2380  return $text;
2381  }
2382 
2388  private function handleInternalLinks2( &$s ) {
2389  static $tc = false, $e1, $e1_img;
2390  # the % is needed to support urlencoded titles as well
2391  if ( !$tc ) {
2392  $tc = Title::legalChars() . '#%';
2393  # Match a link having the form [[namespace:link|alternate]]trail
2394  $e1 = "/^([{$tc}]+)(?:\\|(.+?))?]](.*)\$/sD";
2395  # Match cases where there is no "]]", which might still be images
2396  $e1_img = "/^([{$tc}]+)\\|(.*)\$/sD";
2397  }
2398 
2399  $holders = new LinkHolderArray(
2400  $this,
2401  $this->getContentLanguageConverter(),
2402  $this->getHookContainer() );
2403 
2404  # split the entire text string on occurrences of [[
2405  $a = StringUtils::explode( '[[', ' ' . $s );
2406  # get the first element (all text up to first [[), and remove the space we added
2407  $s = $a->current();
2408  $a->next();
2409  $line = $a->current(); # Workaround for broken ArrayIterator::next() that returns "void"
2410  $s = substr( $s, 1 );
2411 
2412  $nottalk = !$this->getTitle()->isTalkPage();
2413 
2414  $useLinkPrefixExtension = $this->getTargetLanguage()->linkPrefixExtension();
2415  $e2 = null;
2416  if ( $useLinkPrefixExtension ) {
2417  # Match the end of a line for a word that's not followed by whitespace,
2418  # e.g. in the case of 'The Arab al[[Razi]]', 'al' will be matched
2419  $charset = $this->contLang->linkPrefixCharset();
2420  $e2 = "/^((?>.*[^$charset]|))(.+)$/sDu";
2421  $m = [];
2422  if ( preg_match( $e2, $s, $m ) ) {
2423  $first_prefix = $m[2];
2424  } else {
2425  $first_prefix = false;
2426  }
2427  } else {
2428  $prefix = '';
2429  }
2430 
2431  # Some namespaces don't allow subpages
2432  $useSubpages = $this->nsInfo->hasSubpages(
2433  $this->getTitle()->getNamespace()
2434  );
2435 
2436  # Loop for each link
2437  for ( ; $line !== false && $line !== null; $a->next(), $line = $a->current() ) {
2438  # Check for excessive memory usage
2439  if ( $holders->isBig() ) {
2440  # Too big
2441  # Do the existence check, replace the link holders and clear the array
2442  $holders->replace( $s );
2443  $holders->clear();
2444  }
2445 
2446  if ( $useLinkPrefixExtension ) {
2447  if ( preg_match( $e2, $s, $m ) ) {
2448  list( , $s, $prefix ) = $m;
2449  } else {
2450  $prefix = '';
2451  }
2452  # first link
2453  if ( $first_prefix ) {
2454  $prefix = $first_prefix;
2455  $first_prefix = false;
2456  }
2457  }
2458 
2459  $might_be_img = false;
2460 
2461  if ( preg_match( $e1, $line, $m ) ) { # page with normal text or alt
2462  $text = $m[2];
2463  # If we get a ] at the beginning of $m[3] that means we have a link that's something like:
2464  # [[Image:Foo.jpg|[http://example.com desc]]] <- having three ] in a row fucks up,
2465  # the real problem is with the $e1 regex
2466  # See T1500.
2467  # Still some problems for cases where the ] is meant to be outside punctuation,
2468  # and no image is in sight. See T4095.
2469  if ( $text !== ''
2470  && substr( $m[3], 0, 1 ) === ']'
2471  && strpos( $text, '[' ) !== false
2472  ) {
2473  $text .= ']'; # so that handleExternalLinks($text) works later
2474  $m[3] = substr( $m[3], 1 );
2475  }
2476  # fix up urlencoded title texts
2477  if ( strpos( $m[1], '%' ) !== false ) {
2478  # Should anchors '#' also be rejected?
2479  $m[1] = str_replace( [ '<', '>' ], [ '&lt;', '&gt;' ], rawurldecode( $m[1] ) );
2480  }
2481  $trail = $m[3];
2482  } elseif ( preg_match( $e1_img, $line, $m ) ) {
2483  # Invalid, but might be an image with a link in its caption
2484  $might_be_img = true;
2485  $text = $m[2];
2486  if ( strpos( $m[1], '%' ) !== false ) {
2487  $m[1] = str_replace( [ '<', '>' ], [ '&lt;', '&gt;' ], rawurldecode( $m[1] ) );
2488  }
2489  $trail = "";
2490  } else { # Invalid form; output directly
2491  $s .= $prefix . '[[' . $line;
2492  continue;
2493  }
2494 
2495  $origLink = ltrim( $m[1], ' ' );
2496 
2497  # Don't allow internal links to pages containing
2498  # PROTO: where PROTO is a valid URL protocol; these
2499  # should be external links.
2500  if ( preg_match( '/^(?i:' . $this->mUrlProtocols . ')/', $origLink ) ) {
2501  $s .= $prefix . '[[' . $line;
2502  continue;
2503  }
2504 
2505  # Make subpage if necessary
2506  if ( $useSubpages ) {
2508  $this->getTitle(), $origLink, $text
2509  );
2510  } else {
2511  $link = $origLink;
2512  }
2513 
2514  // \x7f isn't a default legal title char, so most likely strip
2515  // markers will force us into the "invalid form" path above. But,
2516  // just in case, let's assert that xmlish tags aren't valid in
2517  // the title position.
2518  $unstrip = $this->mStripState->killMarkers( $link );
2519  $noMarkers = ( $unstrip === $link );
2520 
2521  $nt = $noMarkers ? Title::newFromText( $link ) : null;
2522  if ( $nt === null ) {
2523  $s .= $prefix . '[[' . $line;
2524  continue;
2525  }
2526 
2527  $ns = $nt->getNamespace();
2528  $iw = $nt->getInterwiki();
2529 
2530  $noforce = ( substr( $origLink, 0, 1 ) !== ':' );
2531 
2532  if ( $might_be_img ) { # if this is actually an invalid link
2533  if ( $ns === NS_FILE && $noforce ) { # but might be an image
2534  $found = false;
2535  while ( true ) {
2536  # look at the next 'line' to see if we can close it there
2537  $a->next();
2538  $next_line = $a->current();
2539  if ( $next_line === false || $next_line === null ) {
2540  break;
2541  }
2542  $m = explode( ']]', $next_line, 3 );
2543  if ( count( $m ) == 3 ) {
2544  # the first ]] closes the inner link, the second the image
2545  $found = true;
2546  $text .= "[[{$m[0]}]]{$m[1]}";
2547  $trail = $m[2];
2548  break;
2549  } elseif ( count( $m ) == 2 ) {
2550  # if there's exactly one ]] that's fine, we'll keep looking
2551  $text .= "[[{$m[0]}]]{$m[1]}";
2552  } else {
2553  # if $next_line is invalid too, we need look no further
2554  $text .= '[[' . $next_line;
2555  break;
2556  }
2557  }
2558  if ( !$found ) {
2559  # we couldn't find the end of this imageLink, so output it raw
2560  # but don't ignore what might be perfectly normal links in the text we've examined
2561  $holders->merge( $this->handleInternalLinks2( $text ) );
2562  $s .= "{$prefix}[[$link|$text";
2563  # note: no $trail, because without an end, there *is* no trail
2564  continue;
2565  }
2566  } else { # it's not an image, so output it raw
2567  $s .= "{$prefix}[[$link|$text";
2568  # note: no $trail, because without an end, there *is* no trail
2569  continue;
2570  }
2571  }
2572 
2573  $wasblank = ( $text == '' );
2574  if ( $wasblank ) {
2575  $text = $link;
2576  if ( !$noforce ) {
2577  # Strip off leading ':'
2578  $text = substr( $text, 1 );
2579  }
2580  } else {
2581  # T6598 madness. Handle the quotes only if they come from the alternate part
2582  # [[Lista d''e paise d''o munno]] -> <a href="...">Lista d''e paise d''o munno</a>
2583  # [[Criticism of Harry Potter|Criticism of ''Harry Potter'']]
2584  # -> <a href="Criticism of Harry Potter">Criticism of <i>Harry Potter</i></a>
2585  $text = $this->doQuotes( $text );
2586  }
2587 
2588  # Link not escaped by : , create the various objects
2589  if ( $noforce && !$nt->wasLocalInterwiki() ) {
2590  # Interwikis
2591  if (
2592  $iw && $this->mOptions->getInterwikiMagic() && $nottalk && (
2593  MediaWikiServices::getInstance()->getLanguageNameUtils()
2594  ->getLanguageName(
2595  $iw,
2596  LanguageNameUtils::AUTONYMS,
2597  LanguageNameUtils::DEFINED
2598  )
2599  || in_array( $iw, $this->svcOptions->get( 'ExtraInterlanguageLinkPrefixes' ) )
2600  )
2601  ) {
2602  # T26502: filter duplicates
2603  if ( !isset( $this->mLangLinkLanguages[$iw] ) ) {
2604  $this->mLangLinkLanguages[$iw] = true;
2605  $this->mOutput->addLanguageLink( $nt->getFullText() );
2606  }
2607 
2611  $s = rtrim( $s . $prefix ) . $trail; # T175416
2612  continue;
2613  }
2614 
2615  if ( $ns === NS_FILE ) {
2616  if ( !$this->badFileLookup->isBadFile( $nt->getDBkey(), $this->getTitle() ) ) {
2617  if ( $wasblank ) {
2618  # if no parameters were passed, $text
2619  # becomes something like "File:Foo.png",
2620  # which we don't want to pass on to the
2621  # image generator
2622  $text = '';
2623  } else {
2624  # recursively parse links inside the image caption
2625  # actually, this will parse them in any other parameters, too,
2626  # but it might be hard to fix that, and it doesn't matter ATM
2627  $text = $this->handleExternalLinks( $text );
2628  $holders->merge( $this->handleInternalLinks2( $text ) );
2629  }
2630  # cloak any absolute URLs inside the image markup, so handleExternalLinks() won't touch them
2631  $s .= $prefix . $this->armorLinks(
2632  $this->makeImage( $nt, $text, $holders ) ) . $trail;
2633  continue;
2634  }
2635  } elseif ( $ns === NS_CATEGORY ) {
2639  $s = rtrim( $s . $prefix ) . $trail; # T2087, T87753
2640 
2641  if ( $wasblank ) {
2642  $sortkey = $this->getDefaultSort();
2643  } else {
2644  $sortkey = $text;
2645  }
2646  $sortkey = Sanitizer::decodeCharReferences( $sortkey );
2647  $sortkey = str_replace( "\n", '', $sortkey );
2648  $sortkey = $this->getTargetLanguageConverter()->convertCategoryKey( $sortkey );
2649  $this->mOutput->addCategory( $nt->getDBkey(), $sortkey );
2650 
2651  continue;
2652  }
2653  }
2654 
2655  # Self-link checking. For some languages, variants of the title are checked in
2656  # LinkHolderArray::doVariants() to allow batching the existence checks necessary
2657  # for linking to a different variant.
2658  if ( $ns !== NS_SPECIAL && $nt->equals( $this->getTitle() ) && !$nt->hasFragment() ) {
2659  $s .= $prefix . Linker::makeSelfLinkObj( $nt, $text, '', $trail );
2660  continue;
2661  }
2662 
2663  # NS_MEDIA is a pseudo-namespace for linking directly to a file
2664  # @todo FIXME: Should do batch file existence checks, see comment below
2665  if ( $ns === NS_MEDIA ) {
2666  # Give extensions a chance to select the file revision for us
2667  $options = [];
2668  $descQuery = false;
2669  $this->hookRunner->onBeforeParserFetchFileAndTitle(
2670  $this, $nt, $options, $descQuery );
2671  # Fetch and register the file (file title may be different via hooks)
2672  list( $file, $nt ) = $this->fetchFileAndTitle( $nt, $options );
2673  # Cloak with NOPARSE to avoid replacement in handleExternalLinks
2674  $s .= $prefix . $this->armorLinks(
2675  Linker::makeMediaLinkFile( $nt, $file, $text ) ) . $trail;
2676  continue;
2677  }
2678 
2679  # Some titles, such as valid special pages or files in foreign repos, should
2680  # be shown as bluelinks even though they're not included in the page table
2681  # @todo FIXME: isAlwaysKnown() can be expensive for file links; we should really do
2682  # batch file existence checks for NS_FILE and NS_MEDIA
2683  if ( $iw == '' && $nt->isAlwaysKnown() ) {
2684  $this->mOutput->addLink( $nt );
2685  $s .= $this->makeKnownLinkHolder( $nt, $text, $trail, $prefix );
2686  } else {
2687  # Links will be added to the output link list after checking
2688  $s .= $holders->makeHolder( $nt, $text, $trail, $prefix );
2689  }
2690  }
2691  return $holders;
2692  }
2693 
2707  private function makeKnownLinkHolder( Title $nt, $text = '', $trail = '', $prefix = '' ) {
2708  list( $inside, $trail ) = Linker::splitTrail( $trail );
2709 
2710  if ( $text == '' ) {
2711  $text = htmlspecialchars( $nt->getPrefixedText() );
2712  }
2713 
2714  $link = $this->getLinkRenderer()->makeKnownLink(
2715  $nt, new HtmlArmor( "$prefix$text$inside" )
2716  );
2717 
2718  return $this->armorLinks( $link ) . $trail;
2719  }
2720 
2731  private function armorLinks( $text ) {
2732  return preg_replace( '/\b((?i)' . $this->mUrlProtocols . ')/',
2733  self::MARKER_PREFIX . "NOPARSE$1", $text );
2734  }
2735 
2745  public function doBlockLevels( $text, $linestart ) {
2746  wfDeprecated( __METHOD__, '1.35' );
2747  return BlockLevelPass::doBlockLevels( $text, $linestart );
2748  }
2749 
2758  private function expandMagicVariable( $index, $frame = false ) {
2763  if (
2764  $this->hookRunner->onParserGetVariableValueVarCache( $this, $this->mVarCache ) &&
2765  isset( $this->mVarCache[$index] )
2766  ) {
2767  return $this->mVarCache[$index];
2768  }
2769 
2770  $ts = wfTimestamp( TS_UNIX, $this->mOptions->getTimestamp() );
2771  $this->hookRunner->onParserGetVariableValueTs( $this, $ts );
2772 
2773  $value = CoreMagicVariables::expand(
2774  $this, $index, $ts, $this->nsInfo, $this->svcOptions, $this->logger
2775  );
2776 
2777  if ( $value === null ) {
2778  // Not a defined core magic word
2779  $ret = null;
2780  $originalIndex = $index;
2781  $this->hookRunner->onParserGetVariableValueSwitch( $this,
2782  $this->mVarCache, $index, $ret, $frame );
2783  if ( $index !== $originalIndex ) {
2785  'A ParserGetVariableValueSwitch hook handler modified $index, ' .
2786  'this is deprecated since MediaWiki 1.35',
2787  '1.35', false, false
2788  );
2789  }
2790  if ( !isset( $this->mVarCache[$originalIndex] ) ||
2791  $this->mVarCache[$originalIndex] !== $ret ) {
2793  'A ParserGetVariableValueSwitch hook handler bypassed the cache, ' .
2794  'this is deprecated since MediaWiki 1.35', '1.35', false, false
2795  );
2796  }// FIXME: in the future, don't give this hook unrestricted
2797  // access to mVarCache; we can cache it ourselves by falling
2798  // through here.
2799  return $ret;
2800  }
2801 
2802  $this->mVarCache[$index] = $value;
2803 
2804  return $value;
2805  }
2806 
2811  private function initializeVariables() {
2812  $variableIDs = $this->magicWordFactory->getVariableIDs();
2813  $substIDs = $this->magicWordFactory->getSubstIDs();
2814 
2815  $this->mVariables = $this->magicWordFactory->newArray( $variableIDs );
2816  $this->mSubstWords = $this->magicWordFactory->newArray( $substIDs );
2817  }
2818 
2836  public function preprocessToDom( $text, $flags = 0 ) {
2837  return $this->getPreprocessor()->preprocessToObj( $text, $flags );
2838  }
2839 
2860  public function replaceVariables( $text, $frame = false, $argsOnly = false ) {
2861  # Is there any text? Also, Prevent too big inclusions!
2862  $textSize = strlen( $text );
2863  if ( $textSize < 1 || $textSize > $this->mOptions->getMaxIncludeSize() ) {
2864  return $text;
2865  }
2866 
2867  if ( $frame === false ) {
2868  $frame = $this->getPreprocessor()->newFrame();
2869  } elseif ( !( $frame instanceof PPFrame ) ) {
2870  $this->logger->debug(
2871  __METHOD__ . " called using plain parameters instead of " .
2872  "a PPFrame instance. Creating custom frame."
2873  );
2874  $frame = $this->getPreprocessor()->newCustomFrame( $frame );
2875  }
2876 
2877  $dom = $this->preprocessToDom( $text );
2878  $flags = $argsOnly ? PPFrame::NO_TEMPLATES : 0;
2879  $text = $frame->expand( $dom, $flags );
2880 
2881  return $text;
2882  }
2883 
2911  public function limitationWarn( $limitationType, $current = '', $max = '' ) {
2912  # does no harm if $current and $max are present but are unnecessary for the message
2913  # Not doing ->inLanguage( $this->mOptions->getUserLangObj() ), since this is shown
2914  # only during preview, and that would split the parser cache unnecessarily.
2915  $warning = wfMessage( "$limitationType-warning" )->numParams( $current, $max )
2916  ->text();
2917  $this->mOutput->addWarning( $warning );
2918  $this->addTrackingCategory( "$limitationType-category" );
2919  }
2920 
2934  public function braceSubstitution( array $piece, PPFrame $frame ) {
2935  // Flags
2936 
2937  // $text has been filled
2938  $found = false;
2939  // wiki markup in $text should be escaped
2940  $nowiki = false;
2941  // $text is HTML, armour it against wikitext transformation
2942  $isHTML = false;
2943  // Force interwiki transclusion to be done in raw mode not rendered
2944  $forceRawInterwiki = false;
2945  // $text is a DOM node needing expansion in a child frame
2946  $isChildObj = false;
2947  // $text is a DOM node needing expansion in the current frame
2948  $isLocalObj = false;
2949 
2950  # Title object, where $text came from
2951  $title = false;
2952 
2953  # $part1 is the bit before the first |, and must contain only title characters.
2954  # Various prefixes will be stripped from it later.
2955  $titleWithSpaces = $frame->expand( $piece['title'] );
2956  $part1 = trim( $titleWithSpaces );
2957  $titleText = false;
2958 
2959  # Original title text preserved for various purposes
2960  $originalTitle = $part1;
2961 
2962  # $args is a list of argument nodes, starting from index 0, not including $part1
2963  # @todo FIXME: If piece['parts'] is null then the call to getLength()
2964  # below won't work b/c this $args isn't an object
2965  $args = ( $piece['parts'] == null ) ? [] : $piece['parts'];
2966 
2967  $profileSection = null; // profile templates
2968 
2969  $sawDeprecatedTemplateEquals = false; // T91154
2970 
2971  # SUBST
2972  // @phan-suppress-next-line PhanImpossibleCondition
2973  if ( !$found ) {
2974  $substMatch = $this->mSubstWords->matchStartAndRemove( $part1 );
2975 
2976  # Possibilities for substMatch: "subst", "safesubst" or FALSE
2977  # Decide whether to expand template or keep wikitext as-is.
2978  if ( $this->ot['wiki'] ) {
2979  if ( $substMatch === false ) {
2980  $literal = true; # literal when in PST with no prefix
2981  } else {
2982  $literal = false; # expand when in PST with subst: or safesubst:
2983  }
2984  } else {
2985  if ( $substMatch == 'subst' ) {
2986  $literal = true; # literal when not in PST with plain subst:
2987  } else {
2988  $literal = false; # expand when not in PST with safesubst: or no prefix
2989  }
2990  }
2991  if ( $literal ) {
2992  $text = $frame->virtualBracketedImplode( '{{', '|', '}}', $titleWithSpaces, $args );
2993  $isLocalObj = true;
2994  $found = true;
2995  }
2996  }
2997 
2998  # Variables
2999  if ( !$found && $args->getLength() == 0 ) {
3000  $id = $this->mVariables->matchStartToEnd( $part1 );
3001  if ( $id !== false ) {
3002  $text = $this->expandMagicVariable( $id, $frame );
3003  if ( $this->magicWordFactory->getCacheTTL( $id ) > -1 ) {
3004  $this->mOutput->updateCacheExpiry(
3005  $this->magicWordFactory->getCacheTTL( $id ) );
3006  }
3007  $found = true;
3008  }
3009  }
3010 
3011  # MSG, MSGNW and RAW
3012  if ( !$found ) {
3013  # Check for MSGNW:
3014  $mwMsgnw = $this->magicWordFactory->get( 'msgnw' );
3015  if ( $mwMsgnw->matchStartAndRemove( $part1 ) ) {
3016  $nowiki = true;
3017  } else {
3018  # Remove obsolete MSG:
3019  $mwMsg = $this->magicWordFactory->get( 'msg' );
3020  $mwMsg->matchStartAndRemove( $part1 );
3021  }
3022 
3023  # Check for RAW:
3024  $mwRaw = $this->magicWordFactory->get( 'raw' );
3025  if ( $mwRaw->matchStartAndRemove( $part1 ) ) {
3026  $forceRawInterwiki = true;
3027  }
3028  }
3029 
3030  # Parser functions
3031  if ( !$found ) {
3032  $colonPos = strpos( $part1, ':' );
3033  if ( $colonPos !== false ) {
3034  $func = substr( $part1, 0, $colonPos );
3035  $funcArgs = [ trim( substr( $part1, $colonPos + 1 ) ) ];
3036  $argsLength = $args->getLength();
3037  for ( $i = 0; $i < $argsLength; $i++ ) {
3038  $funcArgs[] = $args->item( $i );
3039  }
3040 
3041  $result = $this->callParserFunction( $frame, $func, $funcArgs );
3042 
3043  // Extract any forwarded flags
3044  if ( isset( $result['title'] ) ) {
3045  $title = $result['title'];
3046  }
3047  if ( isset( $result['found'] ) ) {
3048  $found = $result['found'];
3049  }
3050  if ( array_key_exists( 'text', $result ) ) {
3051  // a string or null
3052  $text = $result['text'];
3053  }
3054  if ( isset( $result['nowiki'] ) ) {
3055  $nowiki = $result['nowiki'];
3056  }
3057  if ( isset( $result['isHTML'] ) ) {
3058  $isHTML = $result['isHTML'];
3059  }
3060  if ( isset( $result['forceRawInterwiki'] ) ) {
3061  $forceRawInterwiki = $result['forceRawInterwiki'];
3062  }
3063  if ( isset( $result['isChildObj'] ) ) {
3064  $isChildObj = $result['isChildObj'];
3065  }
3066  if ( isset( $result['isLocalObj'] ) ) {
3067  $isLocalObj = $result['isLocalObj'];
3068  }
3069  }
3070  }
3071 
3072  # Finish mangling title and then check for loops.
3073  # Set $title to a Title object and $titleText to the PDBK
3074  if ( !$found ) {
3075  $ns = NS_TEMPLATE;
3076  # Split the title into page and subpage
3077  $subpage = '';
3078  $relative = Linker::normalizeSubpageLink(
3079  $this->getTitle(), $part1, $subpage
3080  );
3081  if ( $part1 !== $relative ) {
3082  $part1 = $relative;
3083  $ns = $this->getTitle()->getNamespace();
3084  }
3085  $title = Title::newFromText( $part1, $ns );
3086  if ( $title ) {
3087  $titleText = $title->getPrefixedText();
3088  # Check for language variants if the template is not found
3089  if ( $this->getTargetLanguageConverter()->hasVariants() && $title->getArticleID() == 0 ) {
3090  $this->getTargetLanguageConverter()->findVariantLink( $part1, $title, true );
3091  }
3092  # Do recursion depth check
3093  $limit = $this->mOptions->getMaxTemplateDepth();
3094  if ( $frame->depth >= $limit ) {
3095  $found = true;
3096  $text = '<span class="error">'
3097  . wfMessage( 'parser-template-recursion-depth-warning' )
3098  ->numParams( $limit )->inContentLanguage()->text()
3099  . '</span>';
3100  }
3101  }
3102  }
3103 
3104  # Load from database
3105  if ( !$found && $title ) {
3106  $profileSection = $this->mProfiler->scopedProfileIn( $title->getPrefixedDBkey() );
3107  if ( !$title->isExternal() ) {
3108  if ( $title->isSpecialPage()
3109  && $this->mOptions->getAllowSpecialInclusion()
3110  && $this->ot['html']
3111  ) {
3112  $specialPage = $this->specialPageFactory->getPage( $title->getDBkey() );
3113  // Pass the template arguments as URL parameters.
3114  // "uselang" will have no effect since the Language object
3115  // is forced to the one defined in ParserOptions.
3116  $pageArgs = [];
3117  $argsLength = $args->getLength();
3118  for ( $i = 0; $i < $argsLength; $i++ ) {
3119  $bits = $args->item( $i )->splitArg();
3120  if ( strval( $bits['index'] ) === '' ) {
3121  $name = trim( $frame->expand( $bits['name'], PPFrame::STRIP_COMMENTS ) );
3122  $value = trim( $frame->expand( $bits['value'] ) );
3123  $pageArgs[$name] = $value;
3124  }
3125  }
3126 
3127  // Create a new context to execute the special page
3128  $context = new RequestContext;
3129  $context->setTitle( $title );
3130  $context->setRequest( new FauxRequest( $pageArgs ) );
3131  if ( $specialPage && $specialPage->maxIncludeCacheTime() === 0 ) {
3132  $context->setUser( $this->getUser() );
3133  } else {
3134  // If this page is cached, then we better not be per user.
3135  $context->setUser( User::newFromName( '127.0.0.1', false ) );
3136  }
3137  $context->setLanguage( $this->mOptions->getUserLangObj() );
3138  $ret = $this->specialPageFactory->capturePath( $title, $context, $this->getLinkRenderer() );
3139  if ( $ret ) {
3140  $text = $context->getOutput()->getHTML();
3141  $this->mOutput->addOutputPageMetadata( $context->getOutput() );
3142  $found = true;
3143  $isHTML = true;
3144  if ( $specialPage && $specialPage->maxIncludeCacheTime() !== false ) {
3145  $this->mOutput->updateRuntimeAdaptiveExpiry(
3146  $specialPage->maxIncludeCacheTime()
3147  );
3148  }
3149  }
3150  } elseif ( $this->nsInfo->isNonincludable( $title->getNamespace() ) ) {
3151  $found = false; # access denied
3152  $this->logger->debug(
3153  __METHOD__ .
3154  ": template inclusion denied for " . $title->getPrefixedDBkey()
3155  );
3156  } else {
3157  list( $text, $title ) = $this->getTemplateDom( $title );
3158  if ( $text !== false ) {
3159  $found = true;
3160  $isChildObj = true;
3161  if (
3162  $title->getNamespace() === NS_TEMPLATE &&
3163  $title->getDBkey() === '=' &&
3164  $originalTitle === '='
3165  ) {
3166  // Note that we won't get here if `=` is evaluated
3167  // (in the future) as a parser function, nor if
3168  // the Template namespace is given explicitly,
3169  // ie `{{Template:=}}`. Only `{{=}}` triggers.
3170  $sawDeprecatedTemplateEquals = true; // T91154
3171  }
3172  }
3173  }
3174 
3175  # If the title is valid but undisplayable, make a link to it
3176  if ( !$found && ( $this->ot['html'] || $this->ot['pre'] ) ) {
3177  $text = "[[:$titleText]]";
3178  $found = true;
3179  }
3180  } elseif ( $title->isTrans() ) {
3181  # Interwiki transclusion
3182  if ( $this->ot['html'] && !$forceRawInterwiki ) {
3183  $text = $this->interwikiTransclude( $title, 'render' );
3184  $isHTML = true;
3185  } else {
3186  $text = $this->interwikiTransclude( $title, 'raw' );
3187  # Preprocess it like a template
3188  $text = $this->preprocessToDom( $text, Preprocessor::DOM_FOR_INCLUSION );
3189  $isChildObj = true;
3190  }
3191  $found = true;
3192  }
3193 
3194  # Do infinite loop check
3195  # This has to be done after redirect resolution to avoid infinite loops via redirects
3196  if ( !$frame->loopCheck( $title ) ) {
3197  $found = true;
3198  $text = '<span class="error">'
3199  . wfMessage( 'parser-template-loop-warning', $titleText )->inContentLanguage()->text()
3200  . '</span>';
3201  $this->addTrackingCategory( 'template-loop-category' );
3202  $this->mOutput->addWarning( wfMessage( 'template-loop-warning',
3203  wfEscapeWikiText( $titleText ) )->text() );
3204  $this->logger->debug( __METHOD__ . ": template loop broken at '$titleText'" );
3205  }
3206  }
3207 
3208  # If we haven't found text to substitute by now, we're done
3209  # Recover the source wikitext and return it
3210  if ( !$found ) {
3211  $text = $frame->virtualBracketedImplode( '{{', '|', '}}', $titleWithSpaces, $args );
3212  if ( $profileSection ) {
3213  $this->mProfiler->scopedProfileOut( $profileSection );
3214  }
3215  return [ 'object' => $text ];
3216  }
3217 
3218  # Expand DOM-style return values in a child frame
3219  if ( $isChildObj ) {
3220  # Clean up argument array
3221  $newFrame = $frame->newChild( $args, $title );
3222 
3223  if ( $nowiki ) {
3224  $text = $newFrame->expand( $text, PPFrame::RECOVER_ORIG );
3225  } elseif ( $titleText !== false && $newFrame->isEmpty() ) {
3226  # Expansion is eligible for the empty-frame cache
3227  $text = $newFrame->cachedExpand( $titleText, $text );
3228  } else {
3229  # Uncached expansion
3230  $text = $newFrame->expand( $text );
3231  }
3232  }
3233  if ( $isLocalObj && $nowiki ) {
3234  $text = $frame->expand( $text, PPFrame::RECOVER_ORIG );
3235  $isLocalObj = false;
3236  }
3237 
3238  if ( $profileSection ) {
3239  $this->mProfiler->scopedProfileOut( $profileSection );
3240  }
3241  if (
3242  $sawDeprecatedTemplateEquals &&
3243  $this->mStripState->unstripBoth( $text ) !== '='
3244  ) {
3245  // T91154: {{=}} is deprecated when it doesn't expand to `=`;
3246  // use {{Template:=}} if you must.
3247  $this->addTrackingCategory( 'template-equals-category' );
3248  $this->mOutput->addWarning( wfMessage( 'template-equals-warning' )->text() );
3249  }
3250 
3251  # Replace raw HTML by a placeholder
3252  if ( $isHTML ) {
3253  // @phan-suppress-next-line SecurityCheck-XSS Mixed mode, here html and safe
3254  $text = $this->insertStripItem( $text );
3255  } elseif ( $nowiki && ( $this->ot['html'] || $this->ot['pre'] ) ) {
3256  # Escape nowiki-style return values
3257  // @phan-suppress-next-line SecurityCheck-DoubleEscaped Mixed mode, here html and safe
3258  $text = wfEscapeWikiText( $text );
3259  } elseif ( is_string( $text )
3260  && !$piece['lineStart']
3261  && preg_match( '/^(?:{\\||:|;|#|\*)/', $text )
3262  ) {
3263  # T2529: if the template begins with a table or block-level
3264  # element, it should be treated as beginning a new line.
3265  # This behavior is somewhat controversial.
3266  $text = "\n" . $text;
3267  }
3268 
3269  if ( is_string( $text ) && !$this->incrementIncludeSize( 'post-expand', strlen( $text ) ) ) {
3270  # Error, oversize inclusion
3271  if ( $titleText !== false ) {
3272  # Make a working, properly escaped link if possible (T25588)
3273  $text = "[[:$titleText]]";
3274  } else {
3275  # This will probably not be a working link, but at least it may
3276  # provide some hint of where the problem is
3277  preg_replace( '/^:/', '', $originalTitle );
3278  $text = "[[:$originalTitle]]";
3279  }
3280  $text .= $this->insertStripItem( '<!-- WARNING: template omitted, '
3281  . 'post-expand include size too large -->' );
3282  $this->limitationWarn( 'post-expand-template-inclusion' );
3283  }
3284 
3285  if ( $isLocalObj ) {
3286  $ret = [ 'object' => $text ];
3287  } else {
3288  $ret = [ 'text' => $text ];
3289  }
3290 
3291  return $ret;
3292  }
3293 
3312  public function callParserFunction( PPFrame $frame, $function, array $args = [] ) {
3313  # Case sensitive functions
3314  if ( isset( $this->mFunctionSynonyms[1][$function] ) ) {
3315  $function = $this->mFunctionSynonyms[1][$function];
3316  } else {
3317  # Case insensitive functions
3318  $function = $this->contLang->lc( $function );
3319  if ( isset( $this->mFunctionSynonyms[0][$function] ) ) {
3320  $function = $this->mFunctionSynonyms[0][$function];
3321  } else {
3322  return [ 'found' => false ];
3323  }
3324  }
3325 
3326  list( $callback, $flags ) = $this->mFunctionHooks[$function];
3327 
3328  $allArgs = [ $this ];
3329  if ( $flags & self::SFH_OBJECT_ARGS ) {
3330  # Convert arguments to PPNodes and collect for appending to $allArgs
3331  $funcArgs = [];
3332  foreach ( $args as $k => $v ) {
3333  if ( $v instanceof PPNode || $k === 0 ) {
3334  $funcArgs[] = $v;
3335  } else {
3336  $funcArgs[] = $this->mPreprocessor->newPartNodeArray( [ $k => $v ] )->item( 0 );
3337  }
3338  }
3339 
3340  # Add a frame parameter, and pass the arguments as an array
3341  $allArgs[] = $frame;
3342  $allArgs[] = $funcArgs;
3343  } else {
3344  # Convert arguments to plain text and append to $allArgs
3345  foreach ( $args as $k => $v ) {
3346  if ( $v instanceof PPNode ) {
3347  $allArgs[] = trim( $frame->expand( $v ) );
3348  } elseif ( is_int( $k ) && $k >= 0 ) {
3349  $allArgs[] = trim( $v );
3350  } else {
3351  $allArgs[] = trim( "$k=$v" );
3352  }
3353  }
3354  }
3355 
3356  $result = $callback( ...$allArgs );
3357 
3358  # The interface for function hooks allows them to return a wikitext
3359  # string or an array containing the string and any flags. This mungs
3360  # things around to match what this method should return.
3361  if ( !is_array( $result ) ) {
3362  $result = [
3363  'found' => true,
3364  'text' => $result,
3365  ];
3366  } else {
3367  if ( isset( $result[0] ) && !isset( $result['text'] ) ) {
3368  $result['text'] = $result[0];
3369  }
3370  unset( $result[0] );
3371  $result += [
3372  'found' => true,
3373  ];
3374  }
3375 
3376  $noparse = true;
3377  $preprocessFlags = 0;
3378  if ( isset( $result['noparse'] ) ) {
3379  $noparse = $result['noparse'];
3380  }
3381  if ( isset( $result['preprocessFlags'] ) ) {
3382  $preprocessFlags = $result['preprocessFlags'];
3383  }
3384 
3385  if ( !$noparse ) {
3386  $result['text'] = $this->preprocessToDom( $result['text'], $preprocessFlags );
3387  $result['isChildObj'] = true;
3388  }
3389 
3390  return $result;
3391  }
3392 
3401  public function getTemplateDom( Title $title ) {
3402  $cacheTitle = $title;
3403  $titleText = $title->getPrefixedDBkey();
3404 
3405  if ( isset( $this->mTplRedirCache[$titleText] ) ) {
3406  list( $ns, $dbk ) = $this->mTplRedirCache[$titleText];
3407  $title = Title::makeTitle( $ns, $dbk );
3408  $titleText = $title->getPrefixedDBkey();
3409  }
3410  if ( isset( $this->mTplDomCache[$titleText] ) ) {
3411  return [ $this->mTplDomCache[$titleText], $title ];
3412  }
3413 
3414  # Cache miss, go to the database
3415  list( $text, $title ) = $this->fetchTemplateAndTitle( $title );
3416 
3417  if ( $text === false ) {
3418  $this->mTplDomCache[$titleText] = false;
3419  return [ false, $title ];
3420  }
3421 
3422  $dom = $this->preprocessToDom( $text, Preprocessor::DOM_FOR_INCLUSION );
3423  $this->mTplDomCache[$titleText] = $dom;
3424 
3425  if ( !$title->equals( $cacheTitle ) ) {
3426  $this->mTplRedirCache[$cacheTitle->getPrefixedDBkey()] =
3427  [ $title->getNamespace(), $title->getDBkey() ];
3428  }
3429 
3430  return [ $dom, $title ];
3431  }
3432 
3448  wfDeprecated( __METHOD__, '1.35' );
3449  $revisionRecord = $this->fetchCurrentRevisionRecordOfTitle( $title );
3450  if ( $revisionRecord ) {
3451  return new Revision( $revisionRecord );
3452  }
3453  return $revisionRecord;
3454  }
3455 
3470  $cacheKey = $title->getPrefixedDBkey();
3471  if ( !$this->currentRevisionCache ) {
3472  $this->currentRevisionCache = new MapCacheLRU( 100 );
3473  }
3474  if ( !$this->currentRevisionCache->has( $cacheKey ) ) {
3475  $revisionRecord =
3476  // Defaults to Parser::statelessFetchRevisionRecord()
3477  call_user_func(
3478  $this->mOptions->getCurrentRevisionRecordCallback(),
3479  $title,
3480  $this
3481  );
3482  if ( !$revisionRecord ) {
3483  // Parser::statelessFetchRevisionRecord() can return false;
3484  // normalize it to null.
3485  $revisionRecord = null;
3486  }
3487  $this->currentRevisionCache->set( $cacheKey, $revisionRecord );
3488  }
3489  return $this->currentRevisionCache->get( $cacheKey );
3490  }
3491 
3499  return (
3500  $this->currentRevisionCache &&
3501  $this->currentRevisionCache->has( $title->getPrefixedText() )
3502  );
3503  }
3504 
3515  public static function statelessFetchRevision( Title $title, $parser = false ) {
3516  wfDeprecated( __METHOD__, '1.35' );
3517  $revRecord = MediaWikiServices::getInstance()
3518  ->getRevisionLookup()
3519  ->getKnownCurrentRevision( $title );
3520  return $revRecord ? new Revision( $revRecord ) : false;
3521  }
3522 
3532  public static function statelessFetchRevisionRecord( Title $title, $parser = null ) {
3533  $revRecord = MediaWikiServices::getInstance()
3534  ->getRevisionLookup()
3535  ->getKnownCurrentRevision( $title );
3536  return $revRecord;
3537  }
3538 
3544  public function fetchTemplateAndTitle( Title $title ) {
3545  // Defaults to Parser::statelessFetchTemplate()
3546  $templateCb = $this->mOptions->getTemplateCallback();
3547  $stuff = call_user_func( $templateCb, $title, $this );
3548  if ( isset( $stuff['revision-record'] ) ) {
3549  $revRecord = $stuff['revision-record'];
3550  } else {
3551  // Triggers deprecation warnings via DeprecatablePropertyArray
3552  $rev = $stuff['revision'] ?? null;
3553  if ( $rev instanceof Revision ) {
3554  $revRecord = $rev->getRevisionRecord();
3555  } else {
3556  $revRecord = null;
3557  }
3558  }
3559 
3560  $text = $stuff['text'];
3561  if ( is_string( $stuff['text'] ) ) {
3562  // We use U+007F DELETE to distinguish strip markers from regular text
3563  $text = strtr( $text, "\x7f", "?" );
3564  }
3565  $finalTitle = $stuff['finalTitle'] ?? $title;
3566  foreach ( ( $stuff['deps'] ?? [] ) as $dep ) {
3567  $this->mOutput->addTemplate( $dep['title'], $dep['page_id'], $dep['rev_id'] );
3568  if ( $dep['title']->equals( $this->getTitle() ) && $revRecord instanceof RevisionRecord ) {
3569  // Self-transclusion; final result may change based on the new page version
3570  try {
3571  $sha1 = $revRecord->getSha1();
3572  } catch ( RevisionAccessException $e ) {
3573  $sha1 = null;
3574  }
3575  $this->setOutputFlag( 'vary-revision-sha1', 'Self transclusion' );
3576  $this->getOutput()->setRevisionUsedSha1Base36( $sha1 );
3577  }
3578  }
3579 
3580  return [ $text, $finalTitle ];
3581  }
3582 
3589  public function fetchTemplate( Title $title ) {
3590  wfDeprecated( __METHOD__, '1.35' );
3591  return $this->fetchTemplateAndTitle( $title )[0];
3592  }
3593 
3603  public static function statelessFetchTemplate( $title, $parser = false ) {
3604  $text = $skip = false;
3605  $finalTitle = $title;
3606  $deps = [];
3607  $revRecord = null;
3608  $contextTitle = $parser ? $parser->getTitle() : null;
3609 
3610  # Loop to fetch the article, with up to 2 redirects
3611  $revLookup = MediaWikiServices::getInstance()->getRevisionLookup();
3612  for ( $i = 0; $i < 3 && is_object( $title ); $i++ ) {
3613  # Give extensions a chance to select the revision instead
3614  $revRecord = null; # Assume no hook
3615  $id = false; # Assume current
3616  $origTitle = $title;
3617  $titleChanged = false;
3618  Hooks::runner()->onBeforeParserFetchTemplateRevisionRecord(
3619  # The $title is a not a PageIdentity, as it may
3620  # contain fragments or even represent an attempt to transclude
3621  # a broken or otherwise-missing Title, which the hook may
3622  # fix up. Similarly, the $contextTitle may represent a special
3623  # page or other page which "exists" as a parsing context but
3624  # is not in the DB.
3625  $contextTitle, $title,
3626  $skip, $revRecord
3627  );
3628  if ( !$skip && !$revRecord ) {
3629  # Deprecated legacy hook
3630  Hooks::runner()->onBeforeParserFetchTemplateAndtitle(
3631  $parser, $title, $skip, $id
3632  );
3633  }
3634 
3635  if ( $skip ) {
3636  $text = false;
3637  $deps[] = [
3638  'title' => $title,
3639  'page_id' => $title->getArticleID(),
3640  'rev_id' => null
3641  ];
3642  break;
3643  }
3644  # Get the revision
3645  if ( !$revRecord ) {
3646  if ( $id ) {
3647  # Handle $id returned by deprecated legacy hook
3648  $revRecord = $revLookup->getRevisionById( $id );
3649  } elseif ( $parser ) {
3650  $revRecord = $parser->fetchCurrentRevisionRecordOfTitle( $title );
3651  } else {
3652  $revRecord = $revLookup->getRevisionByTitle( $title );
3653  }
3654  }
3655  if ( $revRecord ) {
3656  # Update title, as $revRecord may have been changed by hook
3658  $revRecord->getPageAsLinkTarget()
3659  );
3660  $deps[] = [
3661  'title' => $title,
3662  'page_id' => $revRecord->getPageId(),
3663  'rev_id' => $revRecord->getId(),
3664  ];
3665  } else {
3666  $deps[] = [
3667  'title' => $title,
3668  'page_id' => $title->getArticleID(),
3669  'rev_id' => null,
3670  ];
3671  }
3672  if ( !$title->equals( $origTitle ) ) {
3673  # If we fetched a rev from a different title, register
3674  # the original title too...
3675  $deps[] = [
3676  'title' => $origTitle,
3677  'page_id' => $origTitle->getArticleID(),
3678  'rev_id' => null,
3679  ];
3680  $titleChanged = true;
3681  }
3682  # If there is no current revision, there is no page
3683  if ( $revRecord === null || $revRecord->getId() === null ) {
3684  $linkCache = MediaWikiServices::getInstance()->getLinkCache();
3685  $linkCache->addBadLinkObj( $title );
3686  }
3687  if ( $revRecord ) {
3688  if ( $titleChanged && !$revRecord->hasSlot( SlotRecord::MAIN ) ) {
3689  // We've added this (missing) title to the dependencies;
3690  // give the hook another chance to redirect it to an
3691  // actual page.
3692  $text = false;
3693  $finalTitle = $title;
3694  continue;
3695  }
3696  if ( $revRecord->hasSlot( SlotRecord::MAIN ) ) { // T276476
3697  $content = $revRecord->getContent( SlotRecord::MAIN );
3698  $text = $content ? $content->getWikitextForTransclusion() : null;
3699  } else {
3700  $text = false;
3701  }
3702  // Hook is hard deprecated since 1.35
3703  if ( Hooks::isRegistered( 'ParserFetchTemplate' ) ) {
3704  // Only create the Revision object if needed
3705  $legacyRevision = new Revision( $revRecord );
3706  Hooks::runner()->onParserFetchTemplate(
3707  $parser,
3708  $title,
3709  $legacyRevision,
3710  $text,
3711  $deps
3712  );
3713  }
3714 
3715  if ( $text === false || $text === null ) {
3716  $text = false;
3717  break;
3718  }
3719  } elseif ( $title->getNamespace() === NS_MEDIAWIKI ) {
3720  $message = wfMessage( MediaWikiServices::getInstance()->getContentLanguage()->
3721  lcfirst( $title->getText() ) )->inContentLanguage();
3722  if ( !$message->exists() ) {
3723  $text = false;
3724  break;
3725  }
3726  $content = $message->content();
3727  $text = $message->plain();
3728  } else {
3729  break;
3730  }
3731  if ( !$content ) {
3732  break;
3733  }
3734  # Redirect?
3735  $finalTitle = $title;
3736  $title = $content->getRedirectTarget();
3737  }
3738 
3739  $legacyRevision = static function () use ( $revRecord ) {
3740  return $revRecord ? new Revision( $revRecord ) : null;
3741  };
3742  $retValues = [
3743  'revision' => $legacyRevision,
3744  'revision-record' => $revRecord ?: false, // So isset works
3745  'text' => $text,
3746  'finalTitle' => $finalTitle,
3747  'deps' => $deps
3748  ];
3749  $propertyArray = new DeprecatablePropertyArray(
3750  $retValues,
3751  [ 'revision' => '1.35' ],
3752  __METHOD__
3753  );
3754  return $propertyArray;
3755  }
3756 
3764  public function fetchFileAndTitle( Title $title, array $options = [] ) {
3765  $file = $this->fetchFileNoRegister( $title, $options );
3766 
3767  $time = $file ? $file->getTimestamp() : false;
3768  $sha1 = $file ? $file->getSha1() : false;
3769  # Register the file as a dependency...
3770  $this->mOutput->addImage( $title->getDBkey(), $time, $sha1 );
3771  if ( $file && !$title->equals( $file->getTitle() ) ) {
3772  # Update fetched file title
3773  $title = $file->getTitle();
3774  $this->mOutput->addImage( $title->getDBkey(), $time, $sha1 );
3775  }
3776  return [ $file, $title ];
3777  }
3778 
3789  protected function fetchFileNoRegister( Title $title, array $options = [] ) {
3790  if ( isset( $options['broken'] ) ) {
3791  $file = false; // broken thumbnail forced by hook
3792  } else {
3793  $repoGroup = MediaWikiServices::getInstance()->getRepoGroup();
3794  if ( isset( $options['sha1'] ) ) { // get by (sha1,timestamp)
3795  $file = $repoGroup->findFileFromKey( $options['sha1'], $options );
3796  } else { // get by (name,timestamp)
3797  $file = $repoGroup->findFile( $title, $options );
3798  }
3799  }
3800  return $file;
3801  }
3802 
3812  public function interwikiTransclude( Title $title, $action ) {
3813  if ( !$this->svcOptions->get( 'EnableScaryTranscluding' ) ) {
3814  return wfMessage( 'scarytranscludedisabled' )->inContentLanguage()->text();
3815  }
3816 
3817  $url = $title->getFullURL( [ 'action' => $action ] );
3818  if ( strlen( $url ) > 1024 ) {
3819  return wfMessage( 'scarytranscludetoolong' )->inContentLanguage()->text();
3820  }
3821 
3822  $wikiId = $title->getTransWikiID(); // remote wiki ID or false
3823 
3824  $fname = __METHOD__;
3825  $cache = MediaWikiServices::getInstance()->getMainWANObjectCache();
3826 
3827  $data = $cache->getWithSetCallback(
3828  $cache->makeGlobalKey(
3829  'interwiki-transclude',
3830  ( $wikiId !== false ) ? $wikiId : 'external',
3831  sha1( $url )
3832  ),
3833  $this->svcOptions->get( 'TranscludeCacheExpiry' ),
3834  static function ( $oldValue, &$ttl ) use ( $url, $fname, $cache ) {
3835  $req = MWHttpRequest::factory( $url, [], $fname );
3836 
3837  $status = $req->execute(); // Status object
3838  if ( !$status->isOK() ) {
3839  $ttl = $cache::TTL_UNCACHEABLE;
3840  } elseif ( $req->getResponseHeader( 'X-Database-Lagged' ) !== null ) {
3841  $ttl = min( $cache::TTL_LAGGED, $ttl );
3842  }
3843 
3844  return [
3845  'text' => $status->isOK() ? $req->getContent() : null,
3846  'code' => $req->getStatus()
3847  ];
3848  },
3849  [
3850  'checkKeys' => ( $wikiId !== false )
3851  ? [ $cache->makeGlobalKey( 'interwiki-page', $wikiId, $title->getDBkey() ) ]
3852  : [],
3853  'pcGroup' => 'interwiki-transclude:5',
3854  'pcTTL' => $cache::TTL_PROC_LONG
3855  ]
3856  );
3857 
3858  if ( is_string( $data['text'] ) ) {
3859  $text = $data['text'];
3860  } elseif ( $data['code'] != 200 ) {
3861  // Though we failed to fetch the content, this status is useless.
3862  $text = wfMessage( 'scarytranscludefailed-httpstatus' )
3863  ->params( $url, $data['code'] )->inContentLanguage()->text();
3864  } else {
3865  $text = wfMessage( 'scarytranscludefailed', $url )->inContentLanguage()->text();
3866  }
3867 
3868  return $text;
3869  }
3870 
3881  public function argSubstitution( array $piece, PPFrame $frame ) {
3882  $error = false;
3883  $parts = $piece['parts'];
3884  $nameWithSpaces = $frame->expand( $piece['title'] );
3885  $argName = trim( $nameWithSpaces );
3886  $object = false;
3887  $text = $frame->getArgument( $argName );
3888  if ( $text === false && $parts->getLength() > 0
3889  && ( $this->ot['html']
3890  || $this->ot['pre']
3891  || ( $this->ot['wiki'] && $frame->isTemplate() )
3892  )
3893  ) {
3894  # No match in frame, use the supplied default
3895  $object = $parts->item( 0 )->getChildren();
3896  }
3897  if ( !$this->incrementIncludeSize( 'arg', strlen( $text ) ) ) {
3898  $error = '<!-- WARNING: argument omitted, expansion size too large -->';
3899  $this->limitationWarn( 'post-expand-template-argument' );
3900  }
3901 
3902  if ( $text === false && $object === false ) {
3903  # No match anywhere
3904  $object = $frame->virtualBracketedImplode( '{{{', '|', '}}}', $nameWithSpaces, $parts );
3905  }
3906  if ( $error !== false ) {
3907  $text .= $error;
3908  }
3909  if ( $object !== false ) {
3910  $ret = [ 'object' => $object ];
3911  } else {
3912  $ret = [ 'text' => $text ];
3913  }
3914 
3915  return $ret;
3916  }
3917 
3934  public function extensionSubstitution( array $params, PPFrame $frame ) {
3935  static $errorStr = '<span class="error">';
3936  static $errorLen = 20;
3937 
3938  $name = $frame->expand( $params['name'] );
3939  if ( substr( $name, 0, $errorLen ) === $errorStr ) {
3940  // Probably expansion depth or node count exceeded. Just punt the
3941  // error up.
3942  return $name;
3943  }
3944 
3945  $attrText = !isset( $params['attr'] ) ? null : $frame->expand( $params['attr'] );
3946  if ( substr( $attrText, 0, $errorLen ) === $errorStr ) {
3947  // See above
3948  return $attrText;
3949  }
3950 
3951  // We can't safely check if the expansion for $content resulted in an
3952  // error, because the content could happen to be the error string
3953  // (T149622).
3954  $content = !isset( $params['inner'] ) ? null : $frame->expand( $params['inner'] );
3955 
3956  $marker = self::MARKER_PREFIX . "-$name-"
3957  . sprintf( '%08X', $this->mMarkerIndex++ ) . self::MARKER_SUFFIX;
3958 
3959  $markerType = 'general';
3960  if ( $this->ot['html'] ) {
3961  $name = strtolower( $name );
3962  $attributes = Sanitizer::decodeTagAttributes( $attrText );
3963  if ( isset( $params['attributes'] ) ) {
3964  $attributes += $params['attributes'];
3965  }
3966 
3967  if ( isset( $this->mTagHooks[$name] ) ) {
3968  // Note that $content may be null here, for example if the
3969  // tag is self-closed.
3970  $output = call_user_func_array( $this->mTagHooks[$name],
3971  [ $content, $attributes, $this, $frame ] );
3972  } else {
3973  $output = '<span class="error">Invalid tag extension name: ' .
3974  htmlspecialchars( $name ) . '</span>';
3975  }
3976 
3977  if ( is_array( $output ) ) {
3978  // Extract flags
3979  $flags = $output;
3980  $output = $flags[0];
3981  if ( isset( $flags['markerType'] ) ) {
3982  $markerType = $flags['markerType'];
3983  }
3984  }
3985  } else {
3986  if ( $attrText === null ) {
3987  $attrText = '';
3988  }
3989  if ( isset( $params['attributes'] ) ) {
3990  foreach ( $params['attributes'] as $attrName => $attrValue ) {
3991  $attrText .= ' ' . htmlspecialchars( $attrName ) . '="' .
3992  htmlspecialchars( $attrValue ) . '"';
3993  }
3994  }
3995  if ( $content === null ) {
3996  $output = "<$name$attrText/>";
3997  } else {
3998  $close = $params['close'] === null ? '' : $frame->expand( $params['close'] );
3999  if ( substr( $close, 0, $errorLen ) === $errorStr ) {
4000  // See above
4001  return $close;
4002  }
4003  $output = "<$name$attrText>$content$close";
4004  }
4005  }
4006 
4007  if ( $markerType === 'none' ) {
4008  return $output;
4009  } elseif ( $markerType === 'nowiki' ) {
4010  $this->mStripState->addNoWiki( $marker, $output );
4011  } elseif ( $markerType === 'general' ) {
4012  $this->mStripState->addGeneral( $marker, $output );
4013  } else {
4014  throw new MWException( __METHOD__ . ': invalid marker type' );
4015  }
4016  return $marker;
4017  }
4018 
4026  private function incrementIncludeSize( $type, $size ) {
4027  if ( $this->mIncludeSizes[$type] + $size > $this->mOptions->getMaxIncludeSize() ) {
4028  return false;
4029  } else {
4030  $this->mIncludeSizes[$type] += $size;
4031  return true;
4032  }
4033  }
4034 
4039  $this->mExpensiveFunctionCount++;
4040  return $this->mExpensiveFunctionCount <= $this->mOptions->getExpensiveParserFunctionLimit();
4041  }
4042 
4050  private function handleDoubleUnderscore( $text ) {
4051  # The position of __TOC__ needs to be recorded
4052  $mw = $this->magicWordFactory->get( 'toc' );
4053  if ( $mw->match( $text ) ) {
4054  $this->mShowToc = true;
4055  $this->mForceTocPosition = true;
4056 
4057  # Set a placeholder. At the end we'll fill it in with the TOC.
4058  $text = $mw->replace( '<!--MWTOC\'"-->', $text, 1 );
4059 
4060  # Only keep the first one.
4061  $text = $mw->replace( '', $text );
4062  }
4063 
4064  # Now match and remove the rest of them
4065  $mwa = $this->magicWordFactory->getDoubleUnderscoreArray();
4066  $this->mDoubleUnderscores = $mwa->matchAndRemove( $text );
4067 
4068  if ( isset( $this->mDoubleUnderscores['nogallery'] ) ) {
4069  $this->mOutput->setNoGallery( true );
4070  }
4071  if ( isset( $this->mDoubleUnderscores['notoc'] ) && !$this->mForceTocPosition ) {
4072  $this->mShowToc = false;
4073  }
4074  if ( isset( $this->mDoubleUnderscores['hiddencat'] )
4075  && $this->getTitle()->getNamespace() === NS_CATEGORY
4076  ) {
4077  $this->addTrackingCategory( 'hidden-category-category' );
4078  }
4079  # (T10068) Allow control over whether robots index a page.
4080  # __INDEX__ always overrides __NOINDEX__, see T16899
4081  if ( isset( $this->mDoubleUnderscores['noindex'] ) && $this->getTitle()->canUseNoindex() ) {
4082  $this->mOutput->setIndexPolicy( 'noindex' );
4083  $this->addTrackingCategory( 'noindex-category' );
4084  }
4085  if ( isset( $this->mDoubleUnderscores['index'] ) && $this->getTitle()->canUseNoindex() ) {
4086  $this->mOutput->setIndexPolicy( 'index' );
4087  $this->addTrackingCategory( 'index-category' );
4088  }
4089 
4090  # Cache all double underscores in the database
4091  foreach ( $this->mDoubleUnderscores as $key => $val ) {
4092  $this->mOutput->setProperty( $key, '' );
4093  }
4094 
4095  return $text;
4096  }
4097 
4103  public function addTrackingCategory( $msg ) {
4104  return $this->mOutput->addTrackingCategory( $msg, $this->getTitle() );
4105  }
4106 
4122  private function finalizeHeadings( $text, $origText, $isMain = true ) {
4123  # Inhibit editsection links if requested in the page
4124  if ( isset( $this->mDoubleUnderscores['noeditsection'] ) ) {
4125  $maybeShowEditLink = false;
4126  } else {
4127  $maybeShowEditLink = true; /* Actual presence will depend on post-cache transforms */
4128  }
4129 
4130  # Get all headlines for numbering them and adding funky stuff like [edit]
4131  # links - this is for later, but we need the number of headlines right now
4132  # NOTE: white space in headings have been trimmed in handleHeadings. They shouldn't
4133  # be trimmed here since whitespace in HTML headings is significant.
4134  $matches = [];
4135  $numMatches = preg_match_all(
4136  '/<H(?P<level>[1-6])(?P<attrib>.*?>)(?P<header>[\s\S]*?)<\/H[1-6] *>/i',
4137  $text,
4138  $matches
4139  );
4140 
4141  # if there are fewer than 4 headlines in the article, do not show TOC
4142  # unless it's been explicitly enabled.
4143  $enoughToc = $this->mShowToc &&
4144  ( ( $numMatches >= 4 ) || $this->mForceTocPosition );
4145 
4146  # Allow user to stipulate that a page should have a "new section"
4147  # link added via __NEWSECTIONLINK__
4148  if ( isset( $this->mDoubleUnderscores['newsectionlink'] ) ) {
4149  $this->mOutput->setNewSection( true );
4150  }
4151 
4152  # Allow user to remove the "new section"
4153  # link via __NONEWSECTIONLINK__
4154  if ( isset( $this->mDoubleUnderscores['nonewsectionlink'] ) ) {
4155  $this->mOutput->hideNewSection( true );
4156  }
4157 
4158  # if the string __FORCETOC__ (not case-sensitive) occurs in the HTML,
4159  # override above conditions and always show TOC above first header
4160  if ( isset( $this->mDoubleUnderscores['forcetoc'] ) ) {
4161  $this->mShowToc = true;
4162  $enoughToc = true;
4163  }
4164 
4165  # headline counter
4166  $headlineCount = 0;
4167  $numVisible = 0;
4168 
4169  # Ugh .. the TOC should have neat indentation levels which can be
4170  # passed to the skin functions. These are determined here
4171  $toc = '';
4172  $full = '';
4173  $head = [];
4174  $sublevelCount = [];
4175  $levelCount = [];
4176  $level = 0;
4177  $prevlevel = 0;
4178  $toclevel = 0;
4179  $prevtoclevel = 0;
4180  $markerRegex = self::MARKER_PREFIX . "-h-(\d+)-" . self::MARKER_SUFFIX;
4181  $baseTitleText = $this->getTitle()->getPrefixedDBkey();
4182  $oldType = $this->mOutputType;
4183  $this->setOutputType( self::OT_WIKI );
4184  $frame = $this->getPreprocessor()->newFrame();
4185  $root = $this->preprocessToDom( $origText );
4186  $node = $root->getFirstChild();
4187  $byteOffset = 0;
4188  $tocraw = [];
4189  $refers = [];
4190 
4191  $headlines = $numMatches !== false ? $matches[3] : [];
4192 
4193  $maxTocLevel = $this->svcOptions->get( 'MaxTocLevel' );
4194  foreach ( $headlines as $headline ) {
4195  $isTemplate = false;
4196  $titleText = false;
4197  $sectionIndex = false;
4198  $numbering = '';
4199  $markerMatches = [];
4200  if ( preg_match( "/^$markerRegex/", $headline, $markerMatches ) ) {
4201  $serial = $markerMatches[1];
4202  list( $titleText, $sectionIndex ) = $this->mHeadings[$serial];
4203  $isTemplate = ( $titleText != $baseTitleText );
4204  $headline = preg_replace( "/^$markerRegex\\s*/", "", $headline );
4205  }
4206 
4207  if ( $toclevel ) {
4208  $prevlevel = $level;
4209  }
4210  $level = $matches[1][$headlineCount];
4211 
4212  if ( $level > $prevlevel ) {
4213  # Increase TOC level
4214  $toclevel++;
4215  $sublevelCount[$toclevel] = 0;
4216  if ( $toclevel < $maxTocLevel ) {
4217  $prevtoclevel = $toclevel;
4218  $toc .= Linker::tocIndent();
4219  $numVisible++;
4220  }
4221  } elseif ( $level < $prevlevel && $toclevel > 1 ) {
4222  # Decrease TOC level, find level to jump to
4223 
4224  for ( $i = $toclevel; $i > 0; $i-- ) {
4225  // @phan-suppress-next-line PhanTypeInvalidDimOffset
4226  if ( $levelCount[$i] == $level ) {
4227  # Found last matching level
4228  $toclevel = $i;
4229  break;
4230  } elseif ( $levelCount[$i] < $level ) {
4231  // @phan-suppress-previous-line PhanTypeInvalidDimOffset
4232  # Found first matching level below current level
4233  $toclevel = $i + 1;
4234  break;
4235  }
4236  }
4237  if ( $i == 0 ) {
4238  $toclevel = 1;
4239  }
4240  if ( $toclevel < $maxTocLevel ) {
4241  if ( $prevtoclevel < $maxTocLevel ) {
4242  # Unindent only if the previous toc level was shown :p
4243  $toc .= Linker::tocUnindent( $prevtoclevel - $toclevel );
4244  $prevtoclevel = $toclevel;
4245  } else {
4246  $toc .= Linker::tocLineEnd();
4247  }
4248  }
4249  } else {
4250  # No change in level, end TOC line
4251  if ( $toclevel < $maxTocLevel ) {
4252  $toc .= Linker::tocLineEnd();
4253  }
4254  }
4255 
4256  $levelCount[$toclevel] = $level;
4257 
4258  # count number of headlines for each level
4259  $sublevelCount[$toclevel]++;
4260  $dot = 0;
4261  for ( $i = 1; $i <= $toclevel; $i++ ) {
4262  if ( !empty( $sublevelCount[$i] ) ) {
4263  if ( $dot ) {
4264  $numbering .= '.';
4265  }
4266  $numbering .= $this->getTargetLanguage()->formatNum( $sublevelCount[$i] );
4267  $dot = 1;
4268  }
4269  }
4270 
4271  # The safe header is a version of the header text safe to use for links
4272 
4273  # Remove link placeholders by the link text.
4274  # <!--LINK number-->
4275  # turns into
4276  # link text with suffix
4277  # Do this before unstrip since link text can contain strip markers
4278  $safeHeadline = $this->replaceLinkHoldersText( $headline );
4279 
4280  # Avoid insertion of weird stuff like <math> by expanding the relevant sections
4281  $safeHeadline = $this->mStripState->unstripBoth( $safeHeadline );
4282 
4283  # Remove any <style> or <script> tags (T198618)
4284  $safeHeadline = preg_replace(
4285  '#<(style|script)(?: [^>]*[^>/])?>.*?</\1>#is',
4286  '',
4287  $safeHeadline
4288  );
4289 
4290  # Strip out HTML (first regex removes any tag not allowed)
4291  # Allowed tags are:
4292  # * <sup> and <sub> (T10393)
4293  # * <i> (T28375)
4294  # * <b> (r105284)
4295  # * <bdi> (T74884)
4296  # * <span dir="rtl"> and <span dir="ltr"> (T37167)
4297  # * <s> and <strike> (T35715)
4298  # We strip any parameter from accepted tags (second regex), except dir="rtl|ltr" from <span>,
4299  # to allow setting directionality in toc items.
4300  $tocline = preg_replace(
4301  [
4302  '#<(?!/?(span|sup|sub|bdi|i|b|s|strike)(?: [^>]*)?>).*?>#',
4303  '#<(/?(?:span(?: dir="(?:rtl|ltr)")?|sup|sub|bdi|i|b|s|strike))(?: .*?)?>#'
4304  ],
4305  [ '', '<$1>' ],
4306  $safeHeadline
4307  );
4308 
4309  # Strip '<span></span>', which is the result from the above if
4310  # <span id="foo"></span> is used to produce an additional anchor
4311  # for a section.
4312  $tocline = str_replace( '<span></span>', '', $tocline );
4313 
4314  $tocline = trim( $tocline );
4315 
4316  # For the anchor, strip out HTML-y stuff period
4317  $safeHeadline = preg_replace( '/<.*?>/', '', $safeHeadline );
4318  $safeHeadline = Sanitizer::normalizeSectionNameWhitespace( $safeHeadline );
4319 
4320  # Save headline for section edit hint before it's escaped
4321  $headlineHint = $safeHeadline;
4322 
4323  # Decode HTML entities
4324  $safeHeadline = Sanitizer::decodeCharReferences( $safeHeadline );
4325 
4326  $safeHeadline = self::normalizeSectionName( $safeHeadline );
4327 
4328  $fallbackHeadline = Sanitizer::escapeIdForAttribute( $safeHeadline, Sanitizer::ID_FALLBACK );
4329  $linkAnchor = Sanitizer::escapeIdForLink( $safeHeadline );
4330  $safeHeadline = Sanitizer::escapeIdForAttribute( $safeHeadline, Sanitizer::ID_PRIMARY );
4331  if ( $fallbackHeadline === $safeHeadline ) {
4332  # No reason to have both (in fact, we can't)
4333  $fallbackHeadline = false;
4334  }
4335 
4336  # HTML IDs must be case-insensitively unique for IE compatibility (T12721).
4337  # @todo FIXME: We may be changing them depending on the current locale.
4338  $arrayKey = strtolower( $safeHeadline );
4339  if ( $fallbackHeadline === false ) {
4340  $fallbackArrayKey = false;
4341  } else {
4342  $fallbackArrayKey = strtolower( $fallbackHeadline );
4343  }
4344 
4345  # Create the anchor for linking from the TOC to the section
4346  $anchor = $safeHeadline;
4347  $fallbackAnchor = $fallbackHeadline;
4348  if ( isset( $refers[$arrayKey] ) ) {
4349  // phpcs:ignore Generic.Formatting.DisallowMultipleStatements
4350  for ( $i = 2; isset( $refers["${arrayKey}_$i"] ); ++$i );
4351  $anchor .= "_$i";
4352  $linkAnchor .= "_$i";
4353  $refers["${arrayKey}_$i"] = true;
4354  } else {
4355  $refers[$arrayKey] = true;
4356  }
4357  if ( $fallbackHeadline !== false && isset( $refers[$fallbackArrayKey] ) ) {
4358  // phpcs:ignore Generic.Formatting.DisallowMultipleStatements
4359  for ( $i = 2; isset( $refers["${fallbackArrayKey}_$i"] ); ++$i );
4360  $fallbackAnchor .= "_$i";
4361  $refers["${fallbackArrayKey}_$i"] = true;
4362  } else {
4363  $refers[$fallbackArrayKey] = true;
4364  }
4365 
4366  # Don't number the heading if it is the only one (looks silly)
4367  if ( count( $matches[3] ) > 1 && $this->mOptions->getNumberHeadings() ) {
4368  # the two are different if the line contains a link
4369  $headline = Html::element(
4370  'span',
4371  [ 'class' => 'mw-headline-number' ],
4372  $numbering
4373  ) . ' ' . $headline;
4374  }
4375 
4376  if ( $enoughToc && ( !isset( $maxTocLevel ) || $toclevel < $maxTocLevel ) ) {
4377  $toc .= Linker::tocLine(
4378  $linkAnchor,
4379  $tocline,
4380  $numbering,
4381  $toclevel,
4382  ( $isTemplate ? false : $sectionIndex )
4383  );
4384  }
4385 
4386  # Add the section to the section tree
4387  # Find the DOM node for this header
4388  $noOffset = ( $isTemplate || $sectionIndex === false );
4389  while ( $node && !$noOffset ) {
4390  if ( $node->getName() === 'h' ) {
4391  $bits = $node->splitHeading();
4392  if ( $bits['i'] == $sectionIndex ) {
4393  break;
4394  }
4395  }
4396  $byteOffset += mb_strlen(
4397  $this->mStripState->unstripBoth(
4398  $frame->expand( $node, PPFrame::RECOVER_ORIG )
4399  )
4400  );
4401  $node = $node->getNextSibling();
4402  }
4403  $tocraw[] = [
4404  'toclevel' => $toclevel,
4405  'level' => $level,
4406  'line' => $tocline,
4407  'number' => $numbering,
4408  'index' => ( $isTemplate ? 'T-' : '' ) . $sectionIndex,
4409  'fromtitle' => $titleText,
4410  'byteoffset' => ( $noOffset ? null : $byteOffset ),
4411  'anchor' => $anchor,
4412  ];
4413 
4414  # give headline the correct <h#> tag
4415  if ( $maybeShowEditLink && $sectionIndex !== false ) {
4416  // Output edit section links as markers with styles that can be customized by skins
4417  if ( $isTemplate ) {
4418  # Put a T flag in the section identifier, to indicate to extractSections()
4419  # that sections inside <includeonly> should be counted.
4420  $editsectionPage = $titleText;
4421  $editsectionSection = "T-$sectionIndex";
4422  $editsectionContent = null;
4423  } else {
4424  $editsectionPage = $this->getTitle()->getPrefixedText();
4425  $editsectionSection = $sectionIndex;
4426  $editsectionContent = $headlineHint;
4427  }
4428  // We use a bit of pesudo-xml for editsection markers. The
4429  // language converter is run later on. Using a UNIQ style marker
4430  // leads to the converter screwing up the tokens when it
4431  // converts stuff. And trying to insert strip tags fails too. At
4432  // this point all real inputted tags have already been escaped,
4433  // so we don't have to worry about a user trying to input one of
4434  // these markers directly. We use a page and section attribute
4435  // to stop the language converter from converting these
4436  // important bits of data, but put the headline hint inside a
4437  // content block because the language converter is supposed to
4438  // be able to convert that piece of data.
4439  // Gets replaced with html in ParserOutput::getText
4440  $editlink = '<mw:editsection page="' . htmlspecialchars( $editsectionPage );
4441  $editlink .= '" section="' . htmlspecialchars( $editsectionSection ) . '"';
4442  if ( $editsectionContent !== null ) {
4443  $editlink .= '>' . $editsectionContent . '</mw:editsection>';
4444  } else {
4445  $editlink .= '/>';
4446  }
4447  } else {
4448  $editlink = '';
4449  }
4450  $head[$headlineCount] = Linker::makeHeadline(
4451  $level,
4452  $matches['attrib'][$headlineCount],
4453  $anchor,
4454  $headline,
4455  $editlink,
4456  $fallbackAnchor
4457  );
4458 
4459  $headlineCount++;
4460  }
4461 
4462  $this->setOutputType( $oldType );
4463 
4464  # Never ever show TOC if no headers
4465  if ( $numVisible < 1 ) {
4466  $enoughToc = false;
4467  }
4468 
4469  if ( $enoughToc ) {
4470  if ( $prevtoclevel > 0 && $prevtoclevel < $maxTocLevel ) {
4471  $toc .= Linker::tocUnindent( $prevtoclevel - 1 );
4472  }
4473  $toc = Linker::tocList( $toc, $this->mOptions->getUserLangObj() );
4474  $this->mOutput->setTOCHTML( $toc );
4475  $toc = self::TOC_START . $toc . self::TOC_END;
4476  }
4477 
4478  if ( $isMain ) {
4479  $this->mOutput->setSections( $tocraw );
4480  }
4481 
4482  # split up and insert constructed headlines
4483  $blocks = preg_split( '/<H[1-6].*?>[\s\S]*?<\/H[1-6]>/i', $text );
4484  $i = 0;
4485 
4486  // build an array of document sections
4487  $sections = [];
4488  foreach ( $blocks as $block ) {
4489  // $head is zero-based, sections aren't.
4490  if ( empty( $head[$i - 1] ) ) {
4491  $sections[$i] = $block;
4492  } else {
4493  $sections[$i] = $head[$i - 1] . $block;
4494  }
4495 
4506  $this->hookRunner->onParserSectionCreate( $this, $i, $sections[$i], $maybeShowEditLink );
4507 
4508  $i++;
4509  }
4510 
4511  if ( $enoughToc && $isMain && !$this->mForceTocPosition ) {
4512  // append the TOC at the beginning
4513  // Top anchor now in skin
4514  $sections[0] .= $toc . "\n";
4515  }
4516 
4517  $full .= implode( '', $sections );
4518 
4519  if ( $this->mForceTocPosition ) {
4520  return str_replace( '<!--MWTOC\'"-->', $toc, $full );
4521  } else {
4522  return $full;
4523  }
4524  }
4525 
4537  public function preSaveTransform( $text, Title $title, UserIdentity $user,
4538  ParserOptions $options, $clearState = true
4539  ) {
4540  if ( $clearState ) {
4541  $magicScopeVariable = $this->lock();
4542  }
4543  $this->startParse( $title, $options, self::OT_WIKI, $clearState );
4544  $this->setUser( $user );
4545 
4546  // Strip U+0000 NULL (T159174)
4547  $text = str_replace( "\000", '', $text );
4548 
4549  // We still normalize line endings for backwards-compatibility
4550  // with other code that just calls PST, but this should already
4551  // be handled in TextContent subclasses
4552  $text = TextContent::normalizeLineEndings( $text );
4553 
4554  if ( $options->getPreSaveTransform() ) {
4555  $text = $this->pstPass2( $text, $user );
4556  }
4557  $text = $this->mStripState->unstripBoth( $text );
4558 
4559  $this->hookRunner->onParserPreSaveTransformComplete( $this, $text );
4560 
4561  $this->setUser( null ); # Reset
4562 
4563  return $text;
4564  }
4565 
4574  private function pstPass2( $text, UserIdentity $user ) {
4575  # Note: This is the timestamp saved as hardcoded wikitext to the database, we use
4576  # $this->contLang here in order to give everyone the same signature and use the default one
4577  # rather than the one selected in each user's preferences. (see also T14815)
4578  $ts = $this->mOptions->getTimestamp();
4579  $timestamp = MWTimestamp::getLocalInstance( $ts );
4580  $ts = $timestamp->format( 'YmdHis' );
4581  $tzMsg = $timestamp->getTimezoneMessage()->inContentLanguage()->text();
4582 
4583  $d = $this->contLang->timeanddate( $ts, false, false ) . " ($tzMsg)";
4584 
4585  # Variable replacement
4586  # Because mOutputType is OT_WIKI, this will only process {{subst:xxx}} type tags
4587  $text = $this->replaceVariables( $text );
4588 
4589  # This works almost by chance, as the replaceVariables are done before the getUserSig(),
4590  # which may corrupt this parser instance via its wfMessage()->text() call-
4591 
4592  # Signatures
4593  if ( strpos( $text, '~~~' ) !== false ) {
4594  $sigText = $this->getUserSig( $user );
4595  $text = strtr( $text, [
4596  '~~~~~' => $d,
4597  '~~~~' => "$sigText $d",
4598  '~~~' => $sigText
4599  ] );
4600  # The main two signature forms used above are time-sensitive
4601  $this->setOutputFlag( 'user-signature', 'User signature detected' );
4602  }
4603 
4604  # Context links ("pipe tricks"): [[|name]] and [[name (context)|]]
4605  $tc = '[' . Title::legalChars() . ']';
4606  $nc = '[ _0-9A-Za-z\x80-\xff-]'; # Namespaces can use non-ascii!
4607 
4608  // [[ns:page (context)|]]
4609  $p1 = "/\[\[(:?$nc+:|:|)($tc+?)( ?\\($tc+\\))\\|]]/";
4610  // [[ns:page(context)|]] (double-width brackets, added in r40257)
4611  $p4 = "/\[\[(:?$nc+:|:|)($tc+?)( ?($tc+))\\|]]/";
4612  // [[ns:page (context), context|]] (using single, double-width or Arabic comma)
4613  $p3 = "/\[\[(:?$nc+:|:|)($tc+?)( ?\\($tc+\\)|)((?:, |,|، )$tc+|)\\|]]/";
4614  // [[|page]] (reverse pipe trick: add context from page title)
4615  $p2 = "/\[\[\\|($tc+)]]/";
4616 
4617  # try $p1 first, to turn "[[A, B (C)|]]" into "[[A, B (C)|A, B]]"
4618  $text = preg_replace( $p1, '[[\\1\\2\\3|\\2]]', $text );
4619  $text = preg_replace( $p4, '[[\\1\\2\\3|\\2]]', $text );
4620  $text = preg_replace( $p3, '[[\\1\\2\\3\\4|\\2]]', $text );
4621 
4622  $t = $this->getTitle()->getText();
4623  $m = [];
4624  if ( preg_match( "/^($nc+:|)$tc+?( \\($tc+\\))$/", $t, $m ) ) {
4625  $text = preg_replace( $p2, "[[$m[1]\\1$m[2]|\\1]]", $text );
4626  } elseif ( preg_match( "/^($nc+:|)$tc+?(, $tc+|)$/", $t, $m ) && "$m[1]$m[2]" != '' ) {
4627  $text = preg_replace( $p2, "[[$m[1]\\1$m[2]|\\1]]", $text );
4628  } else {
4629  # if there's no context, don't bother duplicating the title
4630  $text = preg_replace( $p2, '[[\\1]]', $text );
4631  }
4632 
4633  return $text;
4634  }
4635 
4650  public function getUserSig( UserIdentity $user, $nickname = false, $fancySig = null ) {
4651  $username = $user->getName();
4652 
4653  # If not given, retrieve from the user object.
4654  if ( $nickname === false ) {
4655  $nickname = $this->userOptionsLookup->getOption( $user, 'nickname' );
4656  }
4657 
4658  if ( $fancySig === null ) {
4659  $fancySig = $this->userOptionsLookup->getBoolOption( $user, 'fancysig' );
4660  }
4661 
4662  if ( $nickname === null || $nickname === '' ) {
4663  $nickname = $username;
4664  } elseif ( mb_strlen( $nickname ) > $this->svcOptions->get( 'MaxSigChars' ) ) {
4665  $nickname = $username;
4666  $this->logger->debug( __METHOD__ . ": $username has overlong signature." );
4667  } elseif ( $fancySig !== false ) {
4668  # Sig. might contain markup; validate this
4669  $isValid = $this->validateSig( $nickname ) !== false;
4670 
4671  # New validator
4672  $sigValidation = $this->svcOptions->get( 'SignatureValidation' );
4673  if ( $isValid && $sigValidation === 'disallow' ) {
4674  $validator = new SignatureValidator(
4675  $user,
4676  null,
4677  $this->mOptions
4678  );
4679  $isValid = !$validator->validateSignature( $nickname );
4680  }
4681 
4682  if ( $isValid ) {
4683  # Validated; clean up (if needed) and return it
4684  return $this->cleanSig( $nickname, true );
4685  } else {
4686  # Failed to validate; fall back to the default
4687  $nickname = $username;
4688  $this->logger->debug( __METHOD__ . ": $username has invalid signature." );
4689  }
4690  }
4691 
4692  # Make sure nickname doesnt get a sig in a sig
4693  $nickname = self::cleanSigInSig( $nickname );
4694 
4695  # If we're still here, make it a link to the user page
4696  $userText = wfEscapeWikiText( $username );
4697  $nickText = wfEscapeWikiText( $nickname );
4698  $msgName = $user->isRegistered() ? 'signature' : 'signature-anon';
4699 
4700  return wfMessage( $msgName, $userText, $nickText )->inContentLanguage()
4701  ->title( $this->getTitle() )->text();
4702  }
4703 
4710  public function validateSig( $text ) {
4711  return Xml::isWellFormedXmlFragment( $text ) ? $text : false;
4712  }
4713 
4724  public function cleanSig( $text, $parsing = false ) {
4725  if ( !$parsing ) {
4726  global $wgTitle;
4727  $magicScopeVariable = $this->lock();
4728  $this->startParse(
4729  $wgTitle,
4732  true
4733  );
4734  }
4735 
4736  # Option to disable this feature
4737  if ( !$this->mOptions->getCleanSignatures() ) {
4738  return $text;
4739  }
4740 
4741  # @todo FIXME: Regex doesn't respect extension tags or nowiki
4742  # => Move this logic to braceSubstitution()
4743  $substWord = $this->magicWordFactory->get( 'subst' );
4744  $substRegex = '/\{\{(?!(?:' . $substWord->getBaseRegex() . '))/x' . $substWord->getRegexCase();
4745  $substText = '{{' . $substWord->getSynonym( 0 );
4746 
4747  $text = preg_replace( $substRegex, $substText, $text );
4748  $text = self::cleanSigInSig( $text );
4749  $dom = $this->preprocessToDom( $text );
4750  $frame = $this->getPreprocessor()->newFrame();
4751  $text = $frame->expand( $dom );
4752 
4753  if ( !$parsing ) {
4754  $text = $this->mStripState->unstripBoth( $text );
4755  }
4756 
4757  return $text;
4758  }
4759 
4766  public static function cleanSigInSig( $text ) {
4767  $text = preg_replace( '/~{3,5}/', '', $text );
4768  return $text;
4769  }
4770 
4781  public function startExternalParse( ?Title $title, ParserOptions $options,
4782  $outputType, $clearState = true, $revId = null
4783  ) {
4784  $this->startParse( $title, $options, $outputType, $clearState );
4785  if ( $revId !== null ) {
4786  $this->mRevisionId = $revId;
4787  }
4788  }
4789 
4796  private function startParse( ?Title $title, ParserOptions $options,
4797  $outputType, $clearState = true
4798  ) {
4799  $this->setTitle( $title );
4800  $this->mOptions = $options;
4801  $this->setOutputType( $outputType );
4802  if ( $clearState ) {
4803  $this->clearState();
4804  }
4805  }
4806 
4815  public function transformMsg( $text, ParserOptions $options, Title $title = null ) {
4816  static $executing = false;
4817 
4818  # Guard against infinite recursion
4819  if ( $executing ) {
4820  return $text;
4821  }
4822  $executing = true;
4823 
4824  if ( !$title ) {
4825  global $wgTitle;
4826  $title = $wgTitle;
4827  }
4828 
4829  $text = $this->preprocess( $text, $title, $options );
4830 
4831  $executing = false;
4832  return $text;
4833  }
4834 
4859  public function setHook( $tag, callable $callback ) {
4860  $tag = strtolower( $tag );
4861  if ( preg_match( '/[<>\r\n]/', $tag, $m ) ) {
4862  throw new MWException( "Invalid character {$m[0]} in setHook('$tag', ...) call" );
4863  }
4864  $oldVal = $this->mTagHooks[$tag] ?? null;
4865  $this->mTagHooks[$tag] = $callback;
4866  if ( !in_array( $tag, $this->mStripList ) ) {
4867  $this->mStripList[] = $tag;
4868  }
4869 
4870  return $oldVal;
4871  }
4872 
4876  public function clearTagHooks() {
4877  $this->mTagHooks = [];
4878  $this->mStripList = [];
4879  }
4880 
4924  public function setFunctionHook( $id, callable $callback, $flags = 0 ) {
4925  $oldVal = $this->mFunctionHooks[$id][0] ?? null;
4926  $this->mFunctionHooks[$id] = [ $callback, $flags ];
4927 
4928  # Add to function cache
4929  $mw = $this->magicWordFactory->get( $id );
4930  if ( !$mw ) {
4931  throw new MWException( __METHOD__ . '() expecting a magic word identifier.' );
4932  }
4933 
4934  $synonyms = $mw->getSynonyms();
4935  $sensitive = intval( $mw->isCaseSensitive() );
4936 
4937  foreach ( $synonyms as $syn ) {
4938  # Case
4939  if ( !$sensitive ) {
4940  $syn = $this->contLang->lc( $syn );
4941  }
4942  # Add leading hash
4943  if ( !( $flags & self::SFH_NO_HASH ) ) {
4944  $syn = '#' . $syn;
4945  }
4946  # Remove trailing colon
4947  if ( substr( $syn, -1, 1 ) === ':' ) {
4948  $syn = substr( $syn, 0, -1 );
4949  }
4950  $this->mFunctionSynonyms[$sensitive][$syn] = $id;
4951  }
4952  return $oldVal;
4953  }
4954 
4961  public function getFunctionHooks() {
4962  return array_keys( $this->mFunctionHooks );
4963  }
4964 
4973  public function replaceLinkHolders( &$text, $options = 0 ) {
4974  $this->replaceLinkHoldersPrivate( $text, $options );
4975  }
4976 
4984  private function replaceLinkHoldersPrivate( &$text, $options = 0 ) {
4985  $this->mLinkHolders->replace( $text );
4986  }
4987 
4995  private function replaceLinkHoldersText( $text ) {
4996  return $this->mLinkHolders->replaceText( $text );
4997  }
4998 
5013  public function renderImageGallery( $text, array $params ) {
5014  $mode = false;
5015  if ( isset( $params['mode'] ) ) {
5016  $mode = $params['mode'];
5017  }
5018 
5019  try {
5020  $ig = ImageGalleryBase::factory( $mode );
5021  } catch ( Exception $e ) {
5022  // If invalid type set, fallback to default.
5023  $ig = ImageGalleryBase::factory( false );
5024  }
5025 
5026  $ig->setContextTitle( $this->getTitle() );
5027  $ig->setShowBytes( false );
5028  $ig->setShowDimensions( false );
5029  $ig->setShowFilename( false );
5030  $ig->setParser( $this );
5031  $ig->setHideBadImages();
5032  $ig->setAttributes( Sanitizer::validateTagAttributes( $params, 'ul' ) );
5033 
5034  if ( isset( $params['showfilename'] ) ) {
5035  $ig->setShowFilename( true );
5036  } else {
5037  $ig->setShowFilename( false );
5038  }
5039  if ( isset( $params['caption'] ) ) {
5040  // NOTE: We aren't passing a frame here or below. Frame info
5041  // is currently opaque to Parsoid, which acts on OT_PREPROCESS.
5042  // See T107332#4030581
5043  $caption = $this->recursiveTagParse( $params['caption'] );
5044  $ig->setCaptionHtml( $caption );
5045  }
5046  if ( isset( $params['perrow'] ) ) {
5047  $ig->setPerRow( $params['perrow'] );
5048  }
5049  if ( isset( $params['widths'] ) ) {
5050  $ig->setWidths( $params['widths'] );
5051  }
5052  if ( isset( $params['heights'] ) ) {
5053  $ig->setHeights( $params['heights'] );
5054  }
5055  $ig->setAdditionalOptions( $params );
5056 
5057  $this->hookRunner->onBeforeParserrenderImageGallery( $this, $ig );
5058 
5059  $lines = StringUtils::explode( "\n", $text );
5060  foreach ( $lines as $line ) {
5061  # match lines like these:
5062  # Image:someimage.jpg|This is some image
5063  $matches = [];
5064  preg_match( "/^([^|]+)(\\|(.*))?$/", $line, $matches );
5065  # Skip empty lines
5066  if ( count( $matches ) == 0 ) {
5067  continue;
5068  }
5069 
5070  if ( strpos( $matches[0], '%' ) !== false ) {
5071  $matches[1] = rawurldecode( $matches[1] );
5072  }
5074  if ( $title === null ) {
5075  # Bogus title. Ignore these so we don't bomb out later.
5076  continue;
5077  }
5078 
5079  # We need to get what handler the file uses, to figure out parameters.
5080  # Note, a hook can overide the file name, and chose an entirely different
5081  # file (which potentially could be of a different type and have different handler).
5082  $options = [];
5083  $descQuery = false;
5084  $this->hookRunner->onBeforeParserFetchFileAndTitle(
5085  $this, $title, $options, $descQuery );
5086  # Don't register it now, as TraditionalImageGallery does that later.
5087  $file = $this->fetchFileNoRegister( $title, $options );
5088  $handler = $file ? $file->getHandler() : false;
5089 
5090  $paramMap = [
5091  'img_alt' => 'gallery-internal-alt',
5092  'img_link' => 'gallery-internal-link',
5093  ];
5094  if ( $handler ) {
5095  $paramMap += $handler->getParamMap();
5096  // We don't want people to specify per-image widths.
5097  // Additionally the width parameter would need special casing anyhow.
5098  unset( $paramMap['img_width'] );
5099  }
5100 
5101  $mwArray = $this->magicWordFactory->newArray( array_keys( $paramMap ) );
5102 
5103  $label = '';
5104  $alt = '';
5105  $link = '';
5106  $handlerOptions = [];
5107  if ( isset( $matches[3] ) ) {
5108  // look for an |alt= definition while trying not to break existing
5109  // captions with multiple pipes (|) in it, until a more sensible grammar
5110  // is defined for images in galleries
5111 
5112  // FIXME: Doing recursiveTagParse at this stage, and the trim before
5113  // splitting on '|' is a bit odd, and different from makeImage.
5114  $matches[3] = $this->recursiveTagParse( trim( $matches[3] ) );
5115  // Protect LanguageConverter markup
5116  $parameterMatches = StringUtils::delimiterExplode(
5117  '-{', '}-',
5118  '|',
5119  $matches[3],
5120  true /* nested */
5121  );
5122 
5123  foreach ( $parameterMatches as $parameterMatch ) {
5124  list( $magicName, $match ) = $mwArray->matchVariableStartToEnd( $parameterMatch );
5125  if ( !$magicName ) {
5126  // Last pipe wins.
5127  $label = $parameterMatch;
5128  continue;
5129  }
5130 
5131  $paramName = $paramMap[$magicName];
5132  switch ( $paramName ) {
5133  case 'gallery-internal-alt':
5134  $alt = $this->stripAltText( $match, false );
5135  break;
5136  case 'gallery-internal-link':
5137  $linkValue = $this->stripAltText( $match, false );
5138  if ( preg_match( '/^-{R|(.*)}-$/', $linkValue ) ) {
5139  // Result of LanguageConverter::markNoConversion
5140  // invoked on an external link.
5141  $linkValue = substr( $linkValue, 4, -2 );
5142  }
5143  list( $type, $target ) = $this->parseLinkParameter( $linkValue );
5144  if ( $type === 'link-url' ) {
5145  $link = $target;
5146  $this->mOutput->addExternalLink( $target );
5147  } elseif ( $type === 'link-title' ) {
5148  $link = $target->getLinkURL();
5149  $this->mOutput->addLink( $target );
5150  }
5151  break;
5152  default:
5153  // Must be a handler specific parameter.
5154  if ( $handler->validateParam( $paramName, $match ) ) {
5155  $handlerOptions[$paramName] = $match;
5156  } else {
5157  // Guess not, consider it as caption.
5158  $this->logger->debug(
5159  "$parameterMatch failed parameter validation" );
5160  $label = $parameterMatch;
5161  }
5162  }
5163  }
5164  }
5165 
5166  $ig->add( $title, $label, $alt, $link, $handlerOptions );
5167  }
5168  $html = $ig->toHTML();
5169  $this->hookRunner->onAfterParserFetchFileAndTitle( $this, $ig, $html );
5170  return $html;
5171  }
5172 
5177  private function getImageParams( $handler ) {
5178  if ( $handler ) {
5179  $handlerClass = get_class( $handler );
5180  } else {
5181  $handlerClass = '';
5182  }
5183  if ( !isset( $this->mImageParams[$handlerClass] ) ) {
5184  # Initialise static lists
5185  static $internalParamNames = [
5186  'horizAlign' => [ 'left', 'right', 'center', 'none' ],
5187  'vertAlign' => [ 'baseline', 'sub', 'super', 'top', 'text-top', 'middle',
5188  'bottom', 'text-bottom' ],
5189  'frame' => [ 'thumbnail', 'manualthumb', 'framed', 'frameless',
5190  'upright', 'border', 'link', 'alt', 'class' ],
5191  ];
5192  static $internalParamMap;
5193  if ( !$internalParamMap ) {
5194  $internalParamMap = [];
5195  foreach ( $internalParamNames as $type => $names ) {
5196  foreach ( $names as $name ) {
5197  // For grep: img_left, img_right, img_center, img_none,
5198  // img_baseline, img_sub, img_super, img_top, img_text_top, img_middle,
5199  // img_bottom, img_text_bottom,
5200  // img_thumbnail, img_manualthumb, img_framed, img_frameless, img_upright,
5201  // img_border, img_link, img_alt, img_class
5202  $magicName = str_replace( '-', '_', "img_$name" );
5203  $internalParamMap[$magicName] = [ $type, $name ];
5204  }
5205  }
5206  }
5207 
5208  # Add handler params
5209  $paramMap = $internalParamMap;
5210  if ( $handler ) {
5211  $handlerParamMap = $handler->getParamMap();
5212  foreach ( $handlerParamMap as $magic => $paramName ) {
5213  $paramMap[$magic] = [ 'handler', $paramName ];
5214  }
5215  }
5216  $this->mImageParams[$handlerClass] = $paramMap;
5217  $this->mImageParamsMagicArray[$handlerClass] =
5218  $this->magicWordFactory->newArray( array_keys( $paramMap ) );
5219  }
5220  return [ $this->mImageParams[$handlerClass], $this->mImageParamsMagicArray[$handlerClass] ];
5221  }
5222 
5231  public function makeImage( Title $title, $options, $holders = false ) {
5232  # Check if the options text is of the form "options|alt text"
5233  # Options are:
5234  # * thumbnail make a thumbnail with enlarge-icon and caption, alignment depends on lang
5235  # * left no resizing, just left align. label is used for alt= only
5236  # * right same, but right aligned
5237  # * none same, but not aligned
5238  # * ___px scale to ___ pixels width, no aligning. e.g. use in taxobox
5239  # * center center the image
5240  # * frame Keep original image size, no magnify-button.
5241  # * framed Same as "frame"
5242  # * frameless like 'thumb' but without a frame. Keeps user preferences for width
5243  # * upright reduce width for upright images, rounded to full __0 px
5244  # * border draw a 1px border around the image
5245  # * alt Text for HTML alt attribute (defaults to empty)
5246  # * class Set a class for img node
5247  # * link Set the target of the image link. Can be external, interwiki, or local
5248  # vertical-align values (no % or length right now):
5249  # * baseline
5250  # * sub
5251  # * super
5252  # * top
5253  # * text-top
5254  # * middle
5255  # * bottom
5256  # * text-bottom
5257 
5258  # Protect LanguageConverter markup when splitting into parts
5260  '-{', '}-', '|', $options, true /* allow nesting */
5261  );
5262 
5263  # Give extensions a chance to select the file revision for us
5264  $options = [];
5265  $descQuery = false;
5266  $this->hookRunner->onBeforeParserFetchFileAndTitle(
5267  $this, $title, $options, $descQuery );
5268  # Fetch and register the file (file title may be different via hooks)
5269  list( $file, $title ) = $this->fetchFileAndTitle( $title, $options );
5270 
5271  # Get parameter map
5272  $handler = $file ? $file->getHandler() : false;
5273 
5274  list( $paramMap, $mwArray ) = $this->getImageParams( $handler );
5275 
5276  if ( !$file ) {
5277  $this->addTrackingCategory( 'broken-file-category' );
5278  }
5279 
5280  # Process the input parameters
5281  $caption = '';
5282  $params = [ 'frame' => [], 'handler' => [],
5283  'horizAlign' => [], 'vertAlign' => [] ];
5284  $seenformat = false;
5285  foreach ( $parts as $part ) {
5286  $part = trim( $part );
5287  list( $magicName, $value ) = $mwArray->matchVariableStartToEnd( $part );
5288  $validated = false;
5289  if ( isset( $paramMap[$magicName] ) ) {
5290  list( $type, $paramName ) = $paramMap[$magicName];
5291 
5292  # Special case; width and height come in one variable together
5293  if ( $type === 'handler' && $paramName === 'width' ) {
5294  $parsedWidthParam = self::parseWidthParam( $value );
5295  if ( isset( $parsedWidthParam['width'] ) ) {
5296  $width = $parsedWidthParam['width'];
5297  if ( $handler->validateParam( 'width', $width ) ) {
5298  $params[$type]['width'] = $width;
5299  $validated = true;
5300  }
5301  }
5302  if ( isset( $parsedWidthParam['height'] ) ) {
5303  $height = $parsedWidthParam['height'];
5304  if ( $handler->validateParam( 'height', $height ) ) {
5305  $params[$type]['height'] = $height;
5306  $validated = true;
5307  }
5308  }
5309  # else no validation -- T15436
5310  } else {
5311  if ( $type === 'handler' ) {
5312  # Validate handler parameter
5313  $validated = $handler->validateParam( $paramName, $value );
5314  } else {
5315  # Validate internal parameters
5316  switch ( $paramName ) {
5317  case 'manualthumb':
5318  case 'alt':
5319  case 'class':
5320  # @todo FIXME: Possibly check validity here for
5321  # manualthumb? downstream behavior seems odd with
5322  # missing manual thumbs.
5323  $validated = true;
5324  $value = $this->stripAltText( $value, $holders );
5325  break;
5326  case 'link':
5327  list( $paramName, $value ) =
5328  $this->parseLinkParameter(
5329  $this->stripAltText( $value, $holders )
5330  );
5331  if ( $paramName ) {
5332  $validated = true;
5333  if ( $paramName === 'no-link' ) {
5334  $value = true;
5335  }
5336  if ( ( $paramName === 'link-url' ) && $this->mOptions->getExternalLinkTarget() ) {
5337  $params[$type]['link-target'] = $this->mOptions->getExternalLinkTarget();
5338  }
5339  }
5340  break;
5341  case 'frameless':
5342  case 'framed':
5343  case 'thumbnail':
5344  // use first appearing option, discard others.
5345  $validated = !$seenformat;
5346  $seenformat = true;
5347  break;
5348  default:
5349  # Most other things appear to be empty or numeric...
5350  $validated = ( $value === false || is_numeric( trim( $value ) ) );
5351  }
5352  }
5353 
5354  if ( $validated ) {
5355  $params[$type][$paramName] = $value;
5356  }
5357  }
5358  }
5359  if ( !$validated ) {
5360  $caption = $part;
5361  }
5362  }
5363 
5364  # Process alignment parameters
5365  // @phan-suppress-next-line PhanImpossibleCondition
5366  if ( $params['horizAlign'] ) {
5367  $params['frame']['align'] = key( $params['horizAlign'] );
5368  }
5369  // @phan-suppress-next-line PhanImpossibleCondition
5370  if ( $params['vertAlign'] ) {
5371  $params['frame']['valign'] = key( $params['vertAlign'] );
5372  }
5373 
5374  $params['frame']['caption'] = $caption;
5375 
5376  # Will the image be presented in a frame, with the caption below?
5377  $imageIsFramed = isset( $params['frame']['frame'] )
5378  || isset( $params['frame']['framed'] )
5379  || isset( $params['frame']['thumbnail'] )
5380  || isset( $params['frame']['manualthumb'] );
5381 
5382  # In the old days, [[Image:Foo|text...]] would set alt text. Later it
5383  # came to also set the caption, ordinary text after the image -- which
5384  # makes no sense, because that just repeats the text multiple times in
5385  # screen readers. It *also* came to set the title attribute.
5386  # Now that we have an alt attribute, we should not set the alt text to
5387  # equal the caption: that's worse than useless, it just repeats the
5388  # text. This is the framed/thumbnail case. If there's no caption, we
5389  # use the unnamed parameter for alt text as well, just for the time be-
5390  # ing, if the unnamed param is set and the alt param is not.
5391  # For the future, we need to figure out if we want to tweak this more,
5392  # e.g., introducing a title= parameter for the title; ignoring the un-
5393  # named parameter entirely for images without a caption; adding an ex-
5394  # plicit caption= parameter and preserving the old magic unnamed para-
5395  # meter for BC; ...
5396  if ( $imageIsFramed ) { # Framed image
5397  if ( $caption === '' && !isset( $params['frame']['alt'] ) ) {
5398  # No caption or alt text, add the filename as the alt text so
5399  # that screen readers at least get some description of the image
5400  $params['frame']['alt'] = $title->getText();
5401  }
5402  # Do not set $params['frame']['title'] because tooltips don't make sense
5403  # for framed images
5404  } else { # Inline image
5405  if ( !isset( $params['frame']['alt'] ) ) {
5406  # No alt text, use the "caption" for the alt text
5407  if ( $caption !== '' ) {
5408  $params['frame']['alt'] = $this->stripAltText( $caption, $holders );
5409  } else {
5410  # No caption, fall back to using the filename for the
5411  # alt text
5412  $params['frame']['alt'] = $title->getText();
5413  }
5414  }
5415  # Use the "caption" for the tooltip text
5416  $params['frame']['title'] = $this->stripAltText( $caption, $holders );
5417  }
5418  $params['handler']['targetlang'] = $this->getTargetLanguage()->getCode();
5419 
5420  $this->hookRunner->onParserMakeImageParams( $title, $file, $params, $this );
5421 
5422  # Linker does the rest
5423  $time = $options['time'] ?? false;
5424  $ret = Linker::makeImageLink( $this, $title, $file, $params['frame'], $params['handler'],
5425  $time, $descQuery, $this->mOptions->getThumbSize() );
5426 
5427  # Give the handler a chance to modify the parser object
5428  if ( $handler ) {
5429  $handler->parserTransformHook( $this, $file );
5430  }
5431 
5432  return $ret;
5433  }
5434 
5453  private function parseLinkParameter( $value ) {
5454  $chars = self::EXT_LINK_URL_CLASS;
5455  $addr = self::EXT_LINK_ADDR;
5456  $prots = $this->mUrlProtocols;
5457  $type = null;
5458  $target = false;
5459  if ( $value === '' ) {
5460  $type = 'no-link';
5461  } elseif ( preg_match( "/^((?i)$prots)/", $value ) ) {
5462  if ( preg_match( "/^((?i)$prots)$addr$chars*$/u", $value, $m ) ) {
5463  $this->mOutput->addExternalLink( $value );
5464  $type = 'link-url';
5465  $target = $value;
5466  }
5467  } else {
5468  $linkTitle = Title::newFromText( $value );
5469  if ( $linkTitle ) {
5470  $this->mOutput->addLink( $linkTitle );
5471  $type = 'link-title';
5472  $target = $linkTitle;
5473  }
5474  }
5475  return [ $type, $target ];
5476  }
5477 
5483  private function stripAltText( $caption, $holders ) {
5484  # Strip bad stuff out of the title (tooltip). We can't just use
5485  # replaceLinkHoldersText() here, because if this function is called
5486  # from handleInternalLinks2(), mLinkHolders won't be up-to-date.
5487  if ( $holders ) {
5488  $tooltip = $holders->replaceText( $caption );
5489  } else {
5490  $tooltip = $this->replaceLinkHoldersText( $caption );
5491  }
5492 
5493  # make sure there are no placeholders in thumbnail attributes
5494  # that are later expanded to html- so expand them now and
5495  # remove the tags
5496  $tooltip = $this->mStripState->unstripBoth( $tooltip );
5497  # Compatibility hack! In HTML certain entity references not terminated
5498  # by a semicolon are decoded (but not if we're in an attribute; that's
5499  # how link URLs get away without properly escaping & in queries).
5500  # But wikitext has always required semicolon-termination of entities,
5501  # so encode & where needed to avoid decode of semicolon-less entities.
5502  # See T209236 and
5503  # https://www.w3.org/TR/html5/syntax.html#named-character-references
5504  # T210437 discusses moving this workaround to Sanitizer::stripAllTags.
5505  $tooltip = preg_replace( "/
5506  & # 1. entity prefix
5507  (?= # 2. followed by:
5508  (?: # a. one of the legacy semicolon-less named entities
5509  A(?:Elig|MP|acute|circ|grave|ring|tilde|uml)|
5510  C(?:OPY|cedil)|E(?:TH|acute|circ|grave|uml)|
5511  GT|I(?:acute|circ|grave|uml)|LT|Ntilde|
5512  O(?:acute|circ|grave|slash|tilde|uml)|QUOT|REG|THORN|
5513  U(?:acute|circ|grave|uml)|Yacute|
5514  a(?:acute|c(?:irc|ute)|elig|grave|mp|ring|tilde|uml)|brvbar|
5515  c(?:cedil|edil|urren)|cent(?!erdot;)|copy(?!sr;)|deg|
5516  divide(?!ontimes;)|e(?:acute|circ|grave|th|uml)|
5517  frac(?:1(?:2|4)|34)|
5518  gt(?!c(?:c|ir)|dot|lPar|quest|r(?:a(?:pprox|rr)|dot|eq(?:less|qless)|less|sim);)|
5519  i(?:acute|circ|excl|grave|quest|uml)|laquo|
5520  lt(?!c(?:c|ir)|dot|hree|imes|larr|quest|r(?:Par|i(?:e|f|));)|
5521  m(?:acr|i(?:cro|ddot))|n(?:bsp|tilde)|
5522  not(?!in(?:E|dot|v(?:a|b|c)|)|ni(?:v(?:a|b|c)|);)|
5523  o(?:acute|circ|grave|rd(?:f|m)|slash|tilde|uml)|
5524  p(?:lusmn|ound)|para(?!llel;)|quot|r(?:aquo|eg)|
5525  s(?:ect|hy|up(?:1|2|3)|zlig)|thorn|times(?!b(?:ar|)|d;)|
5526  u(?:acute|circ|grave|ml|uml)|y(?:acute|en|uml)
5527  )
5528  (?:[^;]|$)) # b. and not followed by a semicolon
5529  # S = study, for efficiency
5530  /Sx", '&amp;', $tooltip );
5531  $tooltip = Sanitizer::stripAllTags( $tooltip );
5532 
5533  return $tooltip;
5534  }
5535 
5545  public function attributeStripCallback( &$text, $frame = false ) {
5546  wfDeprecated( __METHOD__, '1.35' );
5547  $text = $this->replaceVariables( $text, $frame );
5548  $text = $this->mStripState->unstripBoth( $text );
5549  return $text;
5550  }
5551 
5558  public function getTags() {
5559  return array_keys( $this->mTagHooks );
5560  }
5561 
5566  public function getFunctionSynonyms() {
5567  return $this->mFunctionSynonyms;
5568  }
5569 
5574  public function getUrlProtocols() {
5575  return $this->mUrlProtocols;
5576  }
5577 
5607  private function extractSections( $text, $sectionId, $mode, $newText = '' ) {
5608  global $wgTitle; # not generally used but removes an ugly failure mode
5609 
5610  $magicScopeVariable = $this->lock();
5611  $this->startParse(
5612  $wgTitle,
5615  true
5616  );
5617  $outText = '';
5618  $frame = $this->getPreprocessor()->newFrame();
5619 
5620  # Process section extraction flags
5621  $flags = 0;
5622  $sectionParts = explode( '-', $sectionId );
5623  $sectionIndex = array_pop( $sectionParts );
5624  foreach ( $sectionParts as $part ) {
5625  if ( $part === 'T' ) {
5627  }
5628  }
5629 
5630  # Check for empty input
5631  if ( strval( $text ) === '' ) {
5632  # Only sections 0 and T-0 exist in an empty document
5633  if ( $sectionIndex == 0 ) {
5634  if ( $mode === 'get' ) {
5635  return '';
5636  }
5637 
5638  return $newText;
5639  } else {
5640  if ( $mode === 'get' ) {
5641  return $newText;
5642  }
5643 
5644  return $text;
5645  }
5646  }
5647 
5648  # Preprocess the text
5649  $root = $this->preprocessToDom( $text, $flags );
5650 
5651  # <h> nodes indicate section breaks
5652  # They can only occur at the top level, so we can find them by iterating the root's children
5653  $node = $root->getFirstChild();
5654 
5655  # Find the target section
5656  if ( $sectionIndex == 0 ) {
5657  # Section zero doesn't nest, level=big
5658  $targetLevel = 1000;
5659  } else {
5660  while ( $node ) {
5661  if ( $node->getName() === 'h' ) {
5662  $bits = $node->splitHeading();
5663  if ( $bits['i'] == $sectionIndex ) {
5664  $targetLevel = $bits['level'];
5665  break;
5666  }
5667  }
5668  if ( $mode === 'replace' ) {
5669  $outText .= $frame->expand( $node, PPFrame::RECOVER_ORIG );
5670  }
5671  $node = $node->getNextSibling();
5672  }
5673  }
5674 
5675  if ( !$node ) {
5676  # Not found
5677  if ( $mode === 'get' ) {
5678  return $newText;
5679  } else {
5680  return $text;
5681  }
5682  }
5683 
5684  # Find the end of the section, including nested sections
5685  do {
5686  if ( $node->getName() === 'h' ) {
5687  $bits = $node->splitHeading();
5688  $curLevel = $bits['level'];
5689  if ( $bits['i'] != $sectionIndex && $curLevel <= $targetLevel ) {
5690  break;
5691  }
5692  }
5693  if ( $mode === 'get' ) {
5694  $outText .= $frame->expand( $node, PPFrame::RECOVER_ORIG );
5695  }
5696  $node = $node->getNextSibling();
5697  } while ( $node );
5698 
5699  # Write out the remainder (in replace mode only)
5700  if ( $mode === 'replace' ) {
5701  # Output the replacement text
5702  # Add two newlines on -- trailing whitespace in $newText is conventionally
5703  # stripped by the editor, so we need both newlines to restore the paragraph gap
5704  # Only add trailing whitespace if there is newText
5705  if ( $newText != "" ) {
5706  $outText .= $newText . "\n\n";
5707  }
5708 
5709  while ( $node ) {
5710  $outText .= $frame->expand( $node, PPFrame::RECOVER_ORIG );
5711  $node = $node->getNextSibling();
5712  }
5713  }
5714 
5715  # Re-insert stripped tags
5716  $outText = rtrim( $this->mStripState->unstripBoth( $outText ) );
5717 
5718  return $outText;
5719  }
5720 
5735  public function getSection( $text, $sectionId, $defaultText = '' ) {
5736  return $this->extractSections( $text, $sectionId, 'get', $defaultText );
5737  }
5738 
5751  public function replaceSection( $oldText, $sectionId, $newText ) {
5752  return $this->extractSections( $oldText, $sectionId, 'replace', $newText );
5753  }
5754 
5784  public function getFlatSectionInfo( $text ) {
5785  $magicScopeVariable = $this->lock();
5786  $this->startParse(
5787  null,
5790  true
5791  );
5792  $frame = $this->getPreprocessor()->newFrame();
5793  $root = $this->preprocessToDom( $text, 0 );
5794  $node = $root->getFirstChild();
5795  $offset = 0;
5796  $currentSection = [
5797  'index' => 0,
5798  'level' => 0,
5799  'offset' => 0,
5800  'heading' => '',
5801  'text' => ''
5802  ];
5803  $sections = [];
5804 
5805  while ( $node ) {
5806  $nodeText = $frame->expand( $node, PPFrame::RECOVER_ORIG );
5807  if ( $node->getName() === 'h' ) {
5808  $bits = $node->splitHeading();
5809  $sections[] = $currentSection;
5810  $currentSection = [
5811  'index' => $bits['i'],
5812  'level' => $bits['level'],
5813  'offset' => $offset,
5814  'heading' => $nodeText,
5815  'text' => $nodeText
5816  ];
5817  } else {
5818  $currentSection['text'] .= $nodeText;
5819  }
5820  $offset += strlen( $nodeText );
5821  $node = $node->getNextSibling();
5822  }
5823  $sections[] = $currentSection;
5824  return $sections;
5825  }
5826 
5837  public function getRevisionId() {
5838  return $this->mRevisionId;
5839  }
5840 
5848  public function getRevisionObject() {
5849  wfDeprecated( __METHOD__, '1.35' );
5850 
5851  if ( $this->mRevisionObject ) {
5852  return $this->mRevisionObject;
5853  }
5854 
5855  $this->mRevisionObject = null;
5856 
5857  $revRecord = $this->getRevisionRecordObject();
5858  if ( $revRecord ) {
5859  $this->mRevisionObject = new Revision( $revRecord );
5860  }
5861 
5862  return $this->mRevisionObject;
5863  }
5864 
5871  public function getRevisionRecordObject() {
5872  if ( $this->mRevisionRecordObject ) {
5873  return $this->mRevisionRecordObject;
5874  }
5875 
5876  // NOTE: try to get the RevisionObject even if mRevisionId is null.
5877  // This is useful when parsing a revision that has not yet been saved.
5878  // However, if we get back a saved revision even though we are in
5879  // preview mode, we'll have to ignore it, see below.
5880  // NOTE: This callback may be used to inject an OLD revision that was
5881  // already loaded, so "current" is a bit of a misnomer. We can't just
5882  // skip it if mRevisionId is set.
5883  $rev = call_user_func(
5884  $this->mOptions->getCurrentRevisionRecordCallback(),
5885  $this->getTitle(),
5886  $this
5887  );
5888 
5889  if ( $rev === false ) {
5890  // The revision record callback returns `false` (not null) to
5891  // indicate that the revision is missing. (See for example
5892  // Parser::statelessFetchRevisionRecord(), the default callback.)
5893  // This API expects `null` instead. (T251952)
5894  $rev = null;
5895  }
5896 
5897  if ( $this->mRevisionId === null && $rev && $rev->getId() ) {
5898  // We are in preview mode (mRevisionId is null), and the current revision callback
5899  // returned an existing revision. Ignore it and return null, it's probably the page's
5900  // current revision, which is not what we want here. Note that we do want to call the
5901  // callback to allow the unsaved revision to be injected here, e.g. for
5902  // self-transclusion previews.
5903  return null;
5904  }
5905 
5906  // If the parse is for a new revision, then the callback should have
5907  // already been set to force the object and should match mRevisionId.
5908  // If not, try to fetch by mRevisionId for sanity.
5909  if ( $this->mRevisionId && $rev && $rev->getId() != $this->mRevisionId ) {
5910  $rev = MediaWikiServices::getInstance()
5911  ->getRevisionLookup()
5912  ->getRevisionById( $this->mRevisionId );
5913  }
5914 
5915  $this->mRevisionRecordObject = $rev;
5916 
5917  return $this->mRevisionRecordObject;
5918  }
5919 
5925  public function getRevisionTimestamp() {
5926  if ( $this->mRevisionTimestamp !== null ) {
5927  return $this->mRevisionTimestamp;
5928  }
5929 
5930  # Use specified revision timestamp, falling back to the current timestamp
5931  $revObject = $this->getRevisionRecordObject();
5932  $timestamp = $revObject ? $revObject->getTimestamp() : $this->mOptions->getTimestamp();
5933  $this->mOutput->setRevisionTimestampUsed( $timestamp ); // unadjusted time zone
5934 
5935  # The cryptic '' timezone parameter tells to use the site-default
5936  # timezone offset instead of the user settings.
5937  # Since this value will be saved into the parser cache, served
5938  # to other users, and potentially even used inside links and such,
5939  # it needs to be consistent for all visitors.
5940  $this->mRevisionTimestamp = $this->contLang->userAdjust( $timestamp, '' );
5941 
5942  return $this->mRevisionTimestamp;
5943  }
5944 
5950  public function getRevisionUser(): ?string {
5951  if ( $this->mRevisionUser === null ) {
5952  $revObject = $this->getRevisionRecordObject();
5953 
5954  # if this template is subst: the revision id will be blank,
5955  # so just use the current user's name
5956  if ( $revObject && $revObject->getUser() ) {
5957  $this->mRevisionUser = $revObject->getUser()->getName();
5958  } elseif ( $this->ot['wiki'] || $this->mOptions->getIsPreview() ) {
5959  $this->mRevisionUser = $this->getUser()->getName();
5960  } else {
5961  # Note that we fall through here with
5962  # $this->mRevisionUser still null
5963  }
5964  }
5965  return $this->mRevisionUser;
5966  }
5967 
5973  public function getRevisionSize() {
5974  if ( $this->mRevisionSize === null ) {
5975  $revObject = $this->getRevisionRecordObject();
5976 
5977  # if this variable is subst: the revision id will be blank,
5978  # so just use the parser input size, because the own substituation
5979  # will change the size.
5980  if ( $revObject ) {
5981  $this->mRevisionSize = $revObject->getSize();
5982  } else {
5983  $this->mRevisionSize = $this->mInputSize;
5984  }
5985  }
5986  return $this->mRevisionSize;
5987  }
5988 
5994  public function setDefaultSort( $sort ) {
5995  $this->mDefaultSort = $sort;
5996  $this->mOutput->setProperty( 'defaultsort', $sort );
5997  }
5998 
6009  public function getDefaultSort() {
6010  if ( $this->mDefaultSort !== false ) {
6011  return $this->mDefaultSort;
6012  } else {
6013  return '';
6014  }
6015  }
6016 
6023  public function getCustomDefaultSort() {
6024  return $this->mDefaultSort;
6025  }
6026 
6027  private static function getSectionNameFromStrippedText( $text ) {
6029  $text = Sanitizer::decodeCharReferences( $text );
6030  $text = self::normalizeSectionName( $text );
6031  return $text;
6032  }
6033 
6034  private static function makeAnchor( $sectionName ) {
6035  return '#' . Sanitizer::escapeIdForLink( $sectionName );
6036  }
6037 
6038  private function makeLegacyAnchor( $sectionName ) {
6039  $fragmentMode = $this->svcOptions->get( 'FragmentMode' );
6040  if ( isset( $fragmentMode[1] ) && $fragmentMode[1] === 'legacy' ) {
6041  // ForAttribute() and ForLink() are the same for legacy encoding
6043  } else {
6044  $id = Sanitizer::escapeIdForLink( $sectionName );
6045  }
6046 
6047  return "#$id";
6048  }
6049 
6058  public function guessSectionNameFromWikiText( $text ) {
6059  # Strip out wikitext links(they break the anchor)
6060  $text = $this->stripSectionName( $text );
6061  $sectionName = self::getSectionNameFromStrippedText( $text );
6062  return self::makeAnchor( $sectionName );
6063  }
6064 
6074  public function guessLegacySectionNameFromWikiText( $text ) {
6075  # Strip out wikitext links(they break the anchor)
6076  $text = $this->stripSectionName( $text );
6077  $sectionName = self::getSectionNameFromStrippedText( $text );
6078  return $this->makeLegacyAnchor( $sectionName );
6079  }
6080 
6086  public static function guessSectionNameFromStrippedText( $text ) {
6087  $sectionName = self::getSectionNameFromStrippedText( $text );
6088  return self::makeAnchor( $sectionName );
6089  }
6090 
6097  private static function normalizeSectionName( $text ) {
6098  # T90902: ensure the same normalization is applied for IDs as to links
6099 
6100  $titleParser = MediaWikiServices::getInstance()->getTitleParser();
6101  '@phan-var MediaWikiTitleCodec $titleParser';
6102  try {
6103 
6104  $parts = $titleParser->splitTitleString( "#$text" );
6105  } catch ( MalformedTitleException $ex ) {
6106  return $text;
6107  }
6108  return $parts['fragment'];
6109  }
6110 
6125  public function stripSectionName( $text ) {
6126  # Strip internal link markup
6127  $text = preg_replace( '/\[\[:?([^[|]+)\|([^[]+)\]\]/', '$2', $text );
6128  $text = preg_replace( '/\[\[:?([^[]+)\|?\]\]/', '$1', $text );
6129 
6130  # Strip external link markup
6131  # @todo FIXME: Not tolerant to blank link text
6132  # I.E. [https://www.mediawiki.org] will render as [1] or something depending
6133  # on how many empty links there are on the page - need to figure that out.
6134  $text = preg_replace( '/\[(?i:' . $this->mUrlProtocols . ')([^ ]+?) ([^[]+)\]/', '$2', $text );
6135 
6136  # Parse wikitext quotes (italics & bold)
6137  $text = $this->doQuotes( $text );
6138 
6139  # Strip HTML tags
6140  $text = StringUtils::delimiterReplace( '<', '>', '', $text );
6141  return $text;
6142  }
6143 
6154  private function fuzzTestSrvus( $text, Title $title, ParserOptions $options,
6155  $outputType = self::OT_HTML
6156  ) {
6157  $magicScopeVariable = $this->lock();
6158  $this->startParse( $title, $options, $outputType, true );
6159 
6160  $text = $this->replaceVariables( $text );
6161  $text = $this->mStripState->unstripBoth( $text );
6162  $text = Sanitizer::removeHTMLtags( $text );
6163  return $text;
6164  }
6165 
6172  private function fuzzTestPst( $text, Title $title, ParserOptions $options ) {
6173  return $this->preSaveTransform( $text, $title, $options->getUser(), $options );
6174  }
6175 
6182  private function fuzzTestPreprocess( $text, Title $title, ParserOptions $options ) {
6183  return $this->fuzzTestSrvus( $text, $title, $options, self::OT_PREPROCESS );
6184  }
6185 
6203  public function markerSkipCallback( $s, callable $callback ) {
6204  $i = 0;
6205  $out = '';
6206  while ( $i < strlen( $s ) ) {
6207  $markerStart = strpos( $s, self::MARKER_PREFIX, $i );
6208  if ( $markerStart === false ) {
6209  $out .= call_user_func( $callback, substr( $s, $i ) );
6210  break;
6211  } else {
6212  $out .= call_user_func( $callback, substr( $s, $i, $markerStart - $i ) );
6213  $markerEnd = strpos( $s, self::MARKER_SUFFIX, $markerStart );
6214  if ( $markerEnd === false ) {
6215  $out .= substr( $s, $markerStart );
6216  break;
6217  } else {
6218  $markerEnd += strlen( self::MARKER_SUFFIX );
6219  $out .= substr( $s, $markerStart, $markerEnd - $markerStart );
6220  $i = $markerEnd;
6221  }
6222  }
6223  }
6224  return $out;
6225  }
6226 
6233  public function killMarkers( $text ) {
6234  return $this->mStripState->killMarkers( $text );
6235  }
6236 
6247  public static function parseWidthParam( $value, $parseHeight = true ) {
6248  $parsedWidthParam = [];
6249  if ( $value === '' ) {
6250  return $parsedWidthParam;
6251  }
6252  $m = [];
6253  # (T15500) In both cases (width/height and width only),
6254  # permit trailing "px" for backward compatibility.
6255  if ( $parseHeight && preg_match( '/^([0-9]*)x([0-9]*)\s*(?:px)?\s*$/', $value, $m ) ) {
6256  $width = intval( $m[1] );
6257  $height = intval( $m[2] );
6258  $parsedWidthParam['width'] = $width;
6259  $parsedWidthParam['height'] = $height;
6260  } elseif ( preg_match( '/^[0-9]*\s*(?:px)?\s*$/', $value ) ) {
6261  $width = intval( $value );
6262  $parsedWidthParam['width'] = $width;
6263  }
6264  return $parsedWidthParam;
6265  }
6266 
6276  protected function lock() {
6277  if ( $this->mInParse ) {
6278  throw new MWException( "Parser state cleared while parsing. "
6279  . "Did you call Parser::parse recursively? Lock is held by: " . $this->mInParse );
6280  }
6281 
6282  // Save the backtrace when locking, so that if some code tries locking again,
6283  // we can print the lock owner's backtrace for easier debugging
6284  $e = new Exception;
6285  $this->mInParse = $e->getTraceAsString();
6286 
6287  $recursiveCheck = new ScopedCallback( function () {
6288  $this->mInParse = false;
6289  } );
6290 
6291  return $recursiveCheck;
6292  }
6293 
6304  public static function stripOuterParagraph( $html ) {
6305  $m = [];
6306  if ( preg_match( '/^<p>(.*)\n?<\/p>\n?$/sU', $html, $m ) && strpos( $m[1], '</p>' ) === false ) {
6307  $html = $m[1];
6308  }
6309 
6310  return $html;
6311  }
6312 
6323  public function getFreshParser() {
6324  if ( $this->mInParse ) {
6325  return $this->factory->create();
6326  } else {
6327  return $this;
6328  }
6329  }
6330 
6338  public function enableOOUI() {
6339  wfDeprecated( __METHOD__, '1.35' );
6341  $this->mOutput->setEnableOOUI( true );
6342  }
6343 
6350  private function setOutputFlag( string $flag, string $reason ): void {
6351  $this->mOutput->setFlag( $flag );
6352  $name = $this->getTitle()->getPrefixedText();
6353  $this->logger->debug( __METHOD__ . ": set $flag flag on '$name'; $reason" );
6354  }
6355 }
Parser\$badFileLookup
BadFileLookup $badFileLookup
Definition: Parser.php:342
Parser\getFunctionHooks
getFunctionHooks()
Get all registered function hook identifiers.
Definition: Parser.php:4961
Parser\$mLinkRenderer
LinkRenderer $mLinkRenderer
Definition: Parser.php:306
Parser\$mForceTocPosition
$mForceTocPosition
Definition: Parser.php:231
Parser\recursivePreprocess
recursivePreprocess( $text, $frame=false)
Recursive parser entry point that can be called from an extension tag hook.
Definition: Parser.php:925
Parser\getContentLanguageConverter
getContentLanguageConverter()
Shorthand for getting a Language Converter for Content language.
Definition: Parser.php:1608
ParserOptions
Set options of the Parser.
Definition: ParserOptions.php:44
Parser\attributeStripCallback
attributeStripCallback(&$text, $frame=false)
Callback from the Sanitizer for expanding items found in HTML attribute values, so they can be safely...
Definition: Parser.php:5545
PPFrame\loopCheck
loopCheck( $title)
Returns true if the infinite loop check is OK, false if a loop is detected.
Parser\$mSubstWords
MagicWordArray $mSubstWords
Definition: Parser.php:176
Parser\$linkRendererFactory
LinkRendererFactory $linkRendererFactory
Definition: Parser.php:333
Sanitizer\ID_FALLBACK
const ID_FALLBACK
Tells escapeUrlForHtml() to encode the ID using the fallback encoding, or return false if no fallback...
Definition: Sanitizer.php:78
Parser\maybeMakeExternalImage
maybeMakeExternalImage( $url)
make an image if it's allowed, either through the global option, through the exception,...
Definition: Parser.php:2320
MagicWordArray
Class for handling an array of magic words.
Definition: MagicWordArray.php:32
Parser\EXT_LINK_ADDR
const EXT_LINK_ADDR
Definition: Parser.php:102
Revision\RevisionAccessException
Exception representing a failure to look up a revision.
Definition: RevisionAccessException.php:34
FauxRequest
WebRequest clone which takes values from a provided array.
Definition: FauxRequest.php:35
Title\newFromText
static newFromText( $text, $defaultNamespace=NS_MAIN)
Create a new Title from text, such as what one would find in a link.
Definition: Title.php:363
Parser\$mInputSize
$mInputSize
Definition: Parser.php:272
PPFrame\STRIP_COMMENTS
const STRIP_COMMENTS
Definition: PPFrame.php:31
Parser\SPACE_NOT_NL
const SPACE_NOT_NL
Definition: Parser.php:109
HtmlArmor
Marks HTML that shouldn't be escaped.
Definition: HtmlArmor.php:30
Revision\RevisionRecord
Page revision base class.
Definition: RevisionRecord.php:47
Parser\__destruct
__destruct()
Reduce memory usage to reduce the impact of circular references.
Definition: Parser.php:478
Preprocessor\DOM_FOR_INCLUSION
const DOM_FOR_INCLUSION
Transclusion mode flag for Preprocessor::preprocessToObj()
Definition: Preprocessor.php:29
ParserOutput
Definition: ParserOutput.php:31
Parser\$mLinkHolders
LinkHolderArray $mLinkHolders
Definition: Parser.php:203
NS_MEDIAWIKI
const NS_MEDIAWIKI
Definition: Defines.php:72
Parser\braceSubstitution
braceSubstitution(array $piece, PPFrame $frame)
Return the text of a template, after recursively replacing any variables or templates within the temp...
Definition: Parser.php:2934
Parser\makeLimitReport
makeLimitReport()
Set the limit report data in the current ParserOutput, and return the limit report HTML comment.
Definition: Parser.php:708
MagicWordFactory
A factory that stores information about MagicWords, and creates them on demand with caching.
Definition: MagicWordFactory.php:37
Parser\internalParseHalfParsed
internalParseHalfParsed( $text, $isMain=true, $linestart=true)
Helper function for parse() that transforms half-parsed HTML into fully parsed HTML.
Definition: Parser.php:1646
Parser\$userFactory
UserFactory $userFactory
Definition: Parser.php:357
Parser\stripAltText
stripAltText( $caption, $holders)
Definition: Parser.php:5483
Parser\killMarkers
killMarkers( $text)
Remove any strip markers found in the given text.
Definition: Parser.php:6233
Sanitizer\stripAllTags
static stripAllTags( $html)
Take a fragment of (potentially invalid) HTML and return a version with any tags removed,...
Definition: Sanitizer.php:1570
Parser\$mTagHooks
$mTagHooks
Definition: Parser.php:151
Parser\OutputType
OutputType( $x=null)
Accessor/mutator for the output type.
Definition: Parser.php:1041
Parser\$currentRevisionCache
MapCacheLRU null $currentRevisionCache
Definition: Parser.php:290
Parser\setOutputFlag
setOutputFlag(string $flag, string $reason)
Sets the flag on the parser output but also does some debug logging.
Definition: Parser.php:6350
Parser\Title
Title(Title $x=null)
Accessor/mutator for the Title object.
Definition: Parser.php:1005
Parser\enableOOUI
enableOOUI()
Set's up the PHP implementation of OOUI for use in this request and instructs OutputPage to enable OO...
Definition: Parser.php:6338
MediaWiki\MediaWikiServices
MediaWikiServices is the service locator for the application scope of MediaWiki.
Definition: MediaWikiServices.php:173
Linker\makeSelfLinkObj
static makeSelfLinkObj( $nt, $html='', $query='', $trail='', $prefix='')
Make appropriate markup for a link to the current article.
Definition: Linker.php:165
Parser\$mTplDomCache
array $mTplDomCache
Definition: Parser.php:233
MediaWiki\BadFileLookup
Definition: BadFileLookup.php:13
PPFrame\NO_ARGS
const NO_ARGS
Definition: PPFrame.php:29
wfSetVar
wfSetVar(&$dest, $source, $force=false)
Sets dest to source and returns the original value of dest If source is NULL, it just returns the val...
Definition: GlobalFunctions.php:1561
Parser\parseExtensionTagAsTopLevelDoc
parseExtensionTagAsTopLevelDoc( $text)
Needed by Parsoid/PHP to ensure all the hooks for extensions are run in the right order.
Definition: Parser.php:884
Parser\$mDoubleUnderscores
$mDoubleUnderscores
Definition: Parser.php:226
Linker\tocIndent
static tocIndent()
Add another level to the Table of Contents.
Definition: Linker.php:1653
Parser\getRevisionSize
getRevisionSize()
Get the size of the revision.
Definition: Parser.php:5973
Sanitizer\escapeIdForAttribute
static escapeIdForAttribute( $id, $mode=self::ID_PRIMARY)
Given a section name or other user-generated or otherwise unsafe string, escapes it to be a valid HTM...
Definition: Sanitizer.php:811
Sanitizer\removeHTMLtags
static removeHTMLtags( $text, $processCallback=null, $args=[], $extratags=[], $removetags=[])
Cleans up HTML, removes dangerous tags and attributes, and removes HTML comments.
Definition: Sanitizer.php:239
Parser\handleExternalLinks
handleExternalLinks( $text)
Replace external links (REL)
Definition: Parser.php:2104
Parser\$mOutputType
$mOutputType
Definition: Parser.php:258
MediaWiki\Linker\LinkRenderer
Class that generates HTML links for pages.
Definition: LinkRenderer.php:41
ParserOptions\getDisableTitleConversion
getDisableTitleConversion()
Whether title conversion should be disabled.
Definition: ParserOptions.php:568
wfTimestamp
wfTimestamp( $outputtype=TS_UNIX, $ts=0)
Get a timestamp string in one of various formats.
Definition: GlobalFunctions.php:1832
Parser\handleHeadings
handleHeadings( $text)
Parse headers and return html.
Definition: Parser.php:1883
MediaWiki\SpecialPage\SpecialPageFactory
Factory for handling the special page list and generating SpecialPage objects.
Definition: SpecialPageFactory.php:61
$wgNoFollowDomainExceptions
$wgNoFollowDomainExceptions
If this is set to an array of domains, external links to these domain names (or any subdomains) will ...
Definition: DefaultSettings.php:4798
Parser\handleAllQuotes
handleAllQuotes( $text)
Replace single quotes with HTML markup.
Definition: Parser.php:1900
Parser\$mUrlProtocols
$mUrlProtocols
Definition: Parser.php:179
Parser\extractTagsAndParams
static extractTagsAndParams(array $elements, $text, &$matches)
Replaces all occurrences of HTML-style comments and the given tags in the text with a random marker a...
Definition: Parser.php:1222
Parser\$mLinkID
int $mLinkID
Definition: Parser.php:209
OT_HTML
const OT_HTML
Definition: Defines.php:168
SFH_NO_HASH
const SFH_NO_HASH
Definition: Defines.php:181
Title\getPrefixedText
getPrefixedText()
Get the prefixed title with spaces.
Definition: Title.php:1886
Parser\handleDoubleUnderscore
handleDoubleUnderscore( $text)
Strip double-underscore items like NOGALLERY and NOTOC Fills $this->mDoubleUnderscores,...
Definition: Parser.php:4050
Parser\fetchCurrentRevisionRecordOfTitle
fetchCurrentRevisionRecordOfTitle(Title $title)
Fetch the current revision of a given title as a RevisionRecord.
Definition: Parser.php:3469
Sanitizer\normalizeSectionNameWhitespace
static normalizeSectionNameWhitespace( $section)
Normalizes whitespace in a section name, such as might be returned by Parser::stripSectionName(),...
Definition: Sanitizer.php:1104
OT_PREPROCESS
const OT_PREPROCESS
Definition: Defines.php:170
Parser\normalizeSectionName
static normalizeSectionName( $text)
Apply the same normalization as code making links to this section would.
Definition: Parser.php:6097
$file
if(PHP_SAPI !='cli-server') if(!isset( $_SERVER['SCRIPT_FILENAME'])) $file
Item class for a filearchive table row.
Definition: router.php:42
wfHostname
wfHostname()
Get host name of the current machine, for use in error reporting.
Definition: GlobalFunctions.php:1294
Parser\recursiveTagParseFully
recursiveTagParseFully( $text, $frame=false)
Fully parse wikitext to fully parsed HTML.
Definition: Parser.php:859
Parser\$specialPageFactory
SpecialPageFactory $specialPageFactory
Definition: Parser.php:321
Parser\nextLinkID
nextLinkID()
Definition: Parser.php:1085
Parser\fuzzTestPreprocess
fuzzTestPreprocess( $text, Title $title, ParserOptions $options)
Definition: Parser.php:6182
Parser\fuzzTestPst
fuzzTestPst( $text, Title $title, ParserOptions $options)
Definition: Parser.php:6172
Parser\getTargetLanguage
getTargetLanguage()
Get the target language for the content being parsed.
Definition: Parser.php:1112
User\newFromName
static newFromName( $name, $validate='valid')
Definition: User.php:586
Parser\$mStripList
$mStripList
Definition: Parser.php:154
wfMessage
wfMessage( $key,... $params)
This is the function for getting translated interface messages.
Definition: GlobalFunctions.php:1231
MediaWiki\Linker\LinkRendererFactory
Factory to create LinkRender objects.
Definition: LinkRendererFactory.php:34
Parser\__construct
__construct(ServiceOptions $svcOptions, MagicWordFactory $magicWordFactory, Language $contLang, ParserFactory $factory, string $urlProtocols, SpecialPageFactory $spFactory, LinkRendererFactory $linkRendererFactory, NamespaceInfo $nsInfo, LoggerInterface $logger, BadFileLookup $badFileLookup, LanguageConverterFactory $languageConverterFactory, HookContainer $hookContainer, TidyDriverBase $tidy, WANObjectCache $wanCache, UserOptionsLookup $userOptionsLookup, UserFactory $userFactory)
Constructing parsers directly is not allowed! Use a ParserFactory.
Definition: Parser.php:405
SpecialPage\getTitleFor
static getTitleFor( $name, $subpage=false, $fragment='')
Get a localised Title object for a specified special page name If you don't need a full Title object,...
Definition: SpecialPage.php:107
Parser\getRevisionObject
getRevisionObject()
Get the revision object for $this->mRevisionId.
Definition: Parser.php:5848
Parser\guessSectionNameFromWikiText
guessSectionNameFromWikiText( $text)
Try to guess the section anchor name based on a wikitext fragment presumably extracted from a heading...
Definition: Parser.php:6058
Parser\setDefaultSort
setDefaultSort( $sort)
Mutator for $mDefaultSort.
Definition: Parser.php:5994
Preprocessor_Hash
Differences from DOM schema:
Definition: Preprocessor_Hash.php:43
StripState
Definition: StripState.php:29
Parser\getExternalLinkRel
static getExternalLinkRel( $url=false, LinkTarget $title=null)
Get the rel attribute for a particular external link.
Definition: Parser.php:2185
Parser\replaceVariables
replaceVariables( $text, $frame=false, $argsOnly=false)
Replace magic variables, templates, and template arguments with the appropriate text.
Definition: Parser.php:2860
Parser\MARKER_PREFIX
const MARKER_PREFIX
Definition: Parser.php:144
Parser\getFunctionSynonyms
getFunctionSynonyms()
Definition: Parser.php:5566
Parser\$mInParse
bool string $mInParse
Recursive call protection.
Definition: Parser.php:298
Parser\transformMsg
transformMsg( $text, ParserOptions $options, Title $title=null)
Wrapper for preprocess()
Definition: Parser.php:4815
Parser\doQuotes
doQuotes( $text)
Helper function for handleAllQuotes()
Definition: Parser.php:1918
Linker\tocLine
static tocLine( $anchor, $tocline, $tocnumber, $level, $sectionIndex=false)
parameter level defines if we are on an indentation level
Definition: Linker.php:1679
MediaWiki\Languages\LanguageConverterFactory
An interface for creating language converters.
Definition: LanguageConverterFactory.php:46
Parser\$svcOptions
ServiceOptions $svcOptions
This is called $svcOptions instead of $options like elsewhere to avoid confusion with $mOptions,...
Definition: Parser.php:330
MediaWiki\User\UserIdentity
Interface for objects representing user identity.
Definition: UserIdentity.php:39
Linker\tocList
static tocList( $toc, Language $lang=null)
Wraps the TOC in a div with ARIA navigation role and provides the hide/collapse JavaScript.
Definition: Linker.php:1715
Parser\SFH_OBJECT_ARGS
const SFH_OBJECT_ARGS
Definition: Parser.php:90
Parser\OT_WIKI
const OT_WIKI
Definition: Parser.php:120
Parser\getTags
getTags()
Accessor.
Definition: Parser.php:5558
Parser\getStripList
getStripList()
Get a list of strippable XML-like elements.
Definition: Parser.php:1285
Parser\initializeVariables
initializeVariables()
Initialize the magic variables (like CURRENTMONTHNAME) and substitution modifiers.
Definition: Parser.php:2811
PPFrame\NO_TEMPLATES
const NO_TEMPLATES
Definition: PPFrame.php:30
Preprocessor
Definition: Preprocessor.php:27
Parser\getOptions
getOptions()
Definition: Parser.php:1057
MediaWiki\Languages\LanguageNameUtils
A service that provides utilities to do with language names and codes.
Definition: LanguageNameUtils.php:42
PPFrame\newChild
newChild( $args=false, $title=false, $indexOffset=0)
Create a child frame.
Parser\getFunctionLang
getFunctionLang()
Get a language object for use in parser functions such as {{FORMATNUM:}}.
Definition: Parser.php:1100
StringUtils\replaceMarkup
static replaceMarkup( $search, $replace, $text)
More or less "markup-safe" str_replace() Ignores any instances of the separator inside <....
Definition: StringUtils.php:268
Parser\$mRevisionRecordObject
RevisionRecord null $mRevisionRecordObject
Definition: Parser.php:275
Parser\Options
Options( $x=null)
Accessor/mutator for the ParserOptions object.
Definition: Parser.php:1077
NS_SPECIAL
const NS_SPECIAL
Definition: Defines.php:53
Parser\lock
lock()
Lock the current instance of the parser.
Definition: Parser.php:6276
Parser\statelessFetchRevision
static statelessFetchRevision(Title $title, $parser=false)
Wrapper around Revision::newFromTitle to allow passing additional parameters without passing them on ...
Definition: Parser.php:3515
Revision
Definition: Revision.php:40
Parser\getDefaultSort
getDefaultSort()
Accessor for $mDefaultSort Will use the empty string if none is set.
Definition: Parser.php:6009
Parser\$mFunctionSynonyms
$mFunctionSynonyms
Definition: Parser.php:153
Parser\$hookRunner
HookRunner $hookRunner
Definition: Parser.php:348
Parser\$nsInfo
NamespaceInfo $nsInfo
Definition: Parser.php:336
Parser\makeLegacyAnchor
makeLegacyAnchor( $sectionName)
Definition: Parser.php:6038
Parser\setHook
setHook( $tag, callable $callback)
Create an HTML-style tag, e.g.
Definition: Parser.php:4859
Parser\$mHeadings
$mHeadings
Definition: Parser.php:224
Parser\$userOptionsLookup
UserOptionsLookup $userOptionsLookup
Definition: Parser.php:354
Parser\getTitle
getTitle()
Definition: Parser.php:994
Parser\$mVariables
MagicWordArray $mVariables
Definition: Parser.php:171
wfDeprecatedMsg
wfDeprecatedMsg( $msg, $version=false, $component=false, $callerOffset=2)
Log a deprecation warning with arbitrary message text.
Definition: GlobalFunctions.php:1066
MWException
MediaWiki exception.
Definition: MWException.php:29
Parser\TOC_START
const TOC_START
Definition: Parser.php:147
Parser\statelessFetchTemplate
static statelessFetchTemplate( $title, $parser=false)
Static function to get a template Can be overridden via ParserOptions::setTemplateCallback().
Definition: Parser.php:3603
Parser\$ot
$ot
Definition: Parser.php:260
Parser\getRevisionRecordObject
getRevisionRecordObject()
Get the revision record object for $this->mRevisionId.
Definition: Parser.php:5871
MediaWiki\Config\ServiceOptions
A class for passing options to services.
Definition: ServiceOptions.php:27
wfDeprecated
wfDeprecated( $function, $version=false, $component=false, $callerOffset=2)
Logs a warning that $function is deprecated.
Definition: GlobalFunctions.php:1034
Parser\OT_MSG
const OT_MSG
Definition: Parser.php:122
MediaWiki\User\UserIdentity\isRegistered
isRegistered()
Parser\makeKnownLinkHolder
makeKnownLinkHolder(Title $nt, $text='', $trail='', $prefix='')
Render a forced-blue link inline; protect against double expansion of URLs if we're in a mode that pr...
Definition: Parser.php:2707
Parser\firstCallInit
firstCallInit()
Used to do various kinds of initialisation on the first call of the parser.
Definition: Parser.php:521
Parser\$mProfiler
SectionProfiler $mProfiler
Definition: Parser.php:301
Parser\preprocess
preprocess( $text, ?Title $title, ParserOptions $options, $revid=null, $frame=false)
Expand templates and variables in the text, producing valid, static wikitext.
Definition: Parser.php:902
Parser\getFlatSectionInfo
getFlatSectionInfo( $text)
Get an array of preprocessor section information.
Definition: Parser.php:5784
Parser\$mMarkerIndex
$mMarkerIndex
Definition: Parser.php:159
BlockLevelPass\doBlockLevels
static doBlockLevels( $text, $lineStart)
Make lists from lines starting with ':', '*', '#', etc.
Definition: BlockLevelPass.php:52
Parser\getCustomDefaultSort
getCustomDefaultSort()
Accessor for $mDefaultSort Unlike getDefaultSort(), will return false if none is set.
Definition: Parser.php:6023
wfUrlProtocolsWithoutProtRel
wfUrlProtocolsWithoutProtRel()
Like wfUrlProtocols(), but excludes '//' from the protocol list.
Definition: GlobalFunctions.php:768
Parser\handleTables
handleTables( $text)
Parse the wiki syntax used to render tables.
Definition: Parser.php:1318
$matches
$matches
Definition: NoLocalSettings.php:24
CoreTagHooks\register
static register( $parser)
Definition: CoreTagHooks.php:36
Parser\$contLang
Language $contLang
Definition: Parser.php:312
Parser\makeAnchor
static makeAnchor( $sectionName)
Definition: Parser.php:6034
StringUtils\explode
static explode( $separator, $subject)
Workalike for explode() with limited memory usage.
Definition: StringUtils.php:326
PPNode
There are three types of nodes:
Definition: PPNode.php:35
Parser\$factory
ParserFactory $factory
Definition: Parser.php:318
Parser\replaceLinkHoldersPrivate
replaceLinkHoldersPrivate(&$text, $options=0)
Replace "<!--LINK-->" link placeholders with actual links, in the buffer Placeholders created in Link...
Definition: Parser.php:4984
LinkHolderArray
Definition: LinkHolderArray.php:33
Parser\__clone
__clone()
Allow extensions to clean up when the parser is cloned.
Definition: Parser.php:492
NS_TEMPLATE
const NS_TEMPLATE
Definition: Defines.php:74
PPFrame\RECOVER_ORIG
const RECOVER_ORIG
Definition: PPFrame.php:36
Linker\makeHeadline
static makeHeadline( $level, $attribs, $anchor, $html, $link, $fallbackAnchor=false)
Create a headline for content.
Definition: Linker.php:1790
Parser\getHookContainer
getHookContainer()
Get a HookContainer capable of returning metadata about hooks or running extension hooks.
Definition: Parser.php:1621
Parser\callParserFunction
callParserFunction(PPFrame $frame, $function, array $args=[])
Call a parser function and return an array with text and flags.
Definition: Parser.php:3312
Parser\extensionSubstitution
extensionSubstitution(array $params, PPFrame $frame)
Return the text to be used for a given extension tag.
Definition: Parser.php:3934
Linker\tocLineEnd
static tocLineEnd()
End a Table Of Contents line.
Definition: Linker.php:1703
$args
if( $line===false) $args
Definition: mcc.php:124
MapCacheLRU
Handles a simple LRU key/value map with a maximum number of entries.
Definition: MapCacheLRU.php:37
Parser\$mLangLinkLanguages
array $mLangLinkLanguages
Array with the language name of each language link (i.e.
Definition: Parser.php:282
Parser\markerSkipCallback
markerSkipCallback( $s, callable $callback)
Call a callback function on all regions of the given text that are not inside strip markers,...
Definition: Parser.php:6203
Parser\fetchFileNoRegister
fetchFileNoRegister(Title $title, array $options=[])
Helper function for fetchFileAndTitle.
Definition: Parser.php:3789
Parser\fetchTemplate
fetchTemplate(Title $title)
Fetch the unparsed text of a template and register a reference to it.
Definition: Parser.php:3589
Parser\limitationWarn
limitationWarn( $limitationType, $current='', $max='')
Warn the user when a parser limitation is reached Will warn at most once the user per limitation type...
Definition: Parser.php:2911
Parser\TOC_END
const TOC_END
Definition: Parser.php:148
MediaWiki\User\UserIdentity\getName
getName()
$title
$title
Definition: testCompression.php:38
Parser\recursiveTagParse
recursiveTagParse( $text, $frame=false)
Half-parse wikitext to half-parsed HTML.
Definition: Parser.php:835
Linker\makeExternalLink
static makeExternalLink( $url, $text, $escape=true, $linktype='', $attribs=[], $title=null)
Make an external link.
Definition: Linker.php:847
Parser\fetchFileAndTitle
fetchFileAndTitle(Title $title, array $options=[])
Fetch a file and its title and register a reference to it.
Definition: Parser.php:3764
Parser\finalizeHeadings
finalizeHeadings( $text, $origText, $isMain=true)
This function accomplishes several tasks: 1) Auto-number headings if that option is enabled 2) Add an...
Definition: Parser.php:4122
Title\makeTitle
static makeTitle( $ns, $title, $fragment='', $interwiki='')
Create a new Title from a namespace index and a DB key.
Definition: Title.php:626
Parser\$mHighestExpansionDepth
$mHighestExpansionDepth
Definition: Parser.php:220
SectionProfiler
Arbitrary section name based PHP profiling.
Definition: SectionProfiler.php:33
Parser\cleanSig
cleanSig( $text, $parsing=false)
Clean up signature text.
Definition: Parser.php:4724
RequestContext
Group all the pieces relevant to the context of a request into one instance @newable.
Definition: RequestContext.php:40
Parser\$mUser
User $mUser
Definition: Parser.php:239
Parser\$mImageParamsMagicArray
$mImageParamsMagicArray
Definition: Parser.php:157
SFH_OBJECT_ARGS
const SFH_OBJECT_ARGS
Definition: Defines.php:182
Parser\handleInternalLinks
handleInternalLinks( $text)
Process [[ ]] wikilinks.
Definition: Parser.php:2378
Parser\$mTplRedirCache
$mTplRedirCache
Definition: Parser.php:222
Parser\$tidy
TidyDriverBase $tidy
Definition: Parser.php:351
Parser\$mFirstCall
bool $mFirstCall
Whether firstCallInit still needs to be called.
Definition: Parser.php:164
ParserOptions\getPreSaveTransform
getPreSaveTransform()
Transform wiki markup when saving the page?
Definition: ParserOptions.php:670
Parser\getStripState
getStripState()
Definition: Parser.php:1292
Parser\getContentLanguage
getContentLanguage()
Get the content language that this Parser is using.
Definition: Parser.php:1189
Parser\OT_PLAIN
const OT_PLAIN
Definition: Parser.php:124
$wgTitle
$wgTitle
Definition: Setup.php:800
Parser\handleMagicLinks
handleMagicLinks( $text)
Replace special strings like "ISBN xxx" and "RFC xxx" with magic external links.
Definition: Parser.php:1693
Linker\splitTrail
static splitTrail( $trail)
Split a link trail, return the "inside" portion and the remainder of the trail as a two-element array...
Definition: Linker.php:1811
MediaWiki\Tidy\TidyDriverBase
Base class for HTML cleanup utilities.
Definition: TidyDriverBase.php:8
Parser\insertStripItem
insertStripItem( $text)
Add an item to the strip state Returns the unique tag which must be inserted into the stripped text T...
Definition: Parser.php:1305
Parser\fuzzTestSrvus
fuzzTestSrvus( $text, Title $title, ParserOptions $options, $outputType=self::OT_HTML)
Strip/replaceVariables/unstrip for preprocessor regression testing.
Definition: Parser.php:6154
Parser\isCurrentRevisionOfTitleCached
isCurrentRevisionOfTitleCached(Title $title)
Definition: Parser.php:3498
Parser\getFreshParser
getFreshParser()
Return this parser if it is not doing anything, otherwise get a fresh parser.
Definition: Parser.php:6323
Parser\getRevisionUser
getRevisionUser()
Get the name of the user that edited the last revision.
Definition: Parser.php:5950
Parser\setOptions
setOptions(ParserOptions $options)
Mutator for the ParserOptions object.
Definition: Parser.php:1066
Parser\getImageParams
getImageParams( $handler)
Definition: Parser.php:5177
Parser\$mAutonumber
$mAutonumber
Definition: Parser.php:192
Parser\replaceLinkHolders
replaceLinkHolders(&$text, $options=0)
Replace "<!--LINK-->" link placeholders with actual links, in the buffer Placeholders created in Link...
Definition: Parser.php:4973
Parser\addTrackingCategory
addTrackingCategory( $msg)
Definition: Parser.php:4103
Parser\getUrlProtocols
getUrlProtocols()
Definition: Parser.php:5574
Parser\incrementIncludeSize
incrementIncludeSize( $type, $size)
Increment an include size counter.
Definition: Parser.php:4026
Parser\getTargetLanguageConverter
getTargetLanguageConverter()
Shorthand for getting a Language Converter for Target language.
Definition: Parser.php:1597
ParserFactory
Definition: ParserFactory.php:36
Parser\startExternalParse
startExternalParse(?Title $title, ParserOptions $options, $outputType, $clearState=true, $revId=null)
Set up some variables which are usually set up in parse() so that an external function can call some ...
Definition: Parser.php:4781
$content
$content
Definition: router.php:76
CoreParserFunctions\register
static register( $parser)
Definition: CoreParserFunctions.php:37
Parser\makeFreeExternalLink
makeFreeExternalLink( $url, $numPostProto)
Make a free external link, given a user-supplied URL.
Definition: Parser.php:1807
Parser\CONSTRUCTOR_OPTIONS
const CONSTRUCTOR_OPTIONS
Definition: Parser.php:362
$s
foreach( $mmfl['setupFiles'] as $fileName) if( $queue) if(empty( $mmfl['quiet'])) $s
Definition: mergeMessageFileList.php:206
NS_MEDIA
const NS_MEDIA
Definition: Defines.php:52
PPFrame\expand
expand( $root, $flags=0)
Expand a document tree node.
ILanguageConverter
The shared interface for all language converters.
Definition: ILanguageConverter.php:28
$wgNoFollowNsExceptions
$wgNoFollowNsExceptions
Namespaces in which $wgNoFollowLinks doesn't apply.
Definition: DefaultSettings.php:4783
$wgNoFollowLinks
$wgNoFollowLinks
If true, external URL links in wiki text will be given the rel="nofollow" attribute as a hint to sear...
Definition: DefaultSettings.php:4777
Parser\$mOutput
ParserOutput $mOutput
Definition: Parser.php:191
Parser\$mFunctionHooks
$mFunctionHooks
Definition: Parser.php:152
ParserFactory\$inParserFactory
static int $inParserFactory
Track calls to Parser constructor to aid in deprecation of direct Parser invocation.
Definition: ParserFactory.php:81
Parser\$mOptions
ParserOptions null $mOptions
Definition: Parser.php:248
Parser\$mRevisionUser
$mRevisionUser
Definition: Parser.php:268
Sanitizer\validateTagAttributes
static validateTagAttributes( $attribs, $element)
Take an array of attribute names and values and normalize or discard illegal values for the given ele...
Definition: Sanitizer.php:390
Parser\extractSections
extractSections( $text, $sectionId, $mode, $newText='')
Break wikitext input into sections, and either pull or replace some particular section's text.
Definition: Parser.php:5607
Hooks\runner
static runner()
Get a HookRunner instance for calling hooks using the new interfaces.
Definition: Hooks.php:172
Parser\OT_HTML
const OT_HTML
Definition: Parser.php:119
WANObjectCache
Multi-datacenter aware caching interface.
Definition: WANObjectCache.php:125
PPFrame
Definition: PPFrame.php:28
$line
$line
Definition: mcc.php:119
Parser\EXT_LINK_URL_CLASS
const EXT_LINK_URL_CLASS
Definition: Parser.php:98
Parser\getUserIdentity
getUserIdentity()
Get an identity of the user for whom the parse is being made, if set.
Definition: Parser.php:1141
Parser\renderImageGallery
renderImageGallery( $text, array $params)
Renders an image gallery from a text with one line per image.
Definition: Parser.php:5013
StringUtils\delimiterExplode
static delimiterExplode( $startDelim, $endDelim, $separator, $subject, $nested=false)
Explode a string, but ignore any instances of the separator inside the given start and end delimiters...
Definition: StringUtils.php:59
OutputPage\setupOOUI
static setupOOUI( $skinName='default', $dir='ltr')
Helper function to setup the PHP implementation of OOUI to use in this request.
Definition: OutputPage.php:4147
Parser\magicLinkCallback
magicLinkCallback(array $m)
Definition: Parser.php:1727
Parser\getUser
getUser()
Get a User object either from $this->mUser, if set, or from the ParserOptions object otherwise.
Definition: Parser.php:1130
wfEscapeWikiText
wfEscapeWikiText( $text)
Escapes the given text so that it may be output using addWikiText() without any linking,...
Definition: GlobalFunctions.php:1505
Parser\incrementExpensiveFunctionCount
incrementExpensiveFunctionCount()
Definition: Parser.php:4038
Parser\$mImageParams
$mImageParams
Definition: Parser.php:156
Parser\setFunctionHook
setFunctionHook( $id, callable $callback, $flags=0)
Create a function, e.g.
Definition: Parser.php:4924
Parser\setLinkID
setLinkID( $id)
Definition: Parser.php:1092
Sanitizer\cleanUrl
static cleanUrl( $url)
Definition: Sanitizer.php:1621
Parser\$magicWordFactory
MagicWordFactory $magicWordFactory
Definition: Parser.php:309
Parser\preprocessToDom
preprocessToDom( $text, $flags=0)
Get the document object model for the given wikitext.
Definition: Parser.php:2836
Parser\setUser
setUser(?UserIdentity $user)
Set the current user.
Definition: Parser.php:965
Parser
PHP Parser - Processes wiki markup (which uses a more user-friendly syntax, such as "[[link]]" for ma...
Definition: Parser.php:87
RequestContext\getMain
static getMain()
Get the RequestContext object associated with the main request.
Definition: RequestContext.php:476
MediaWiki\User\UserOptionsLookup
Provides access to user options.
Definition: UserOptionsLookup.php:29
Title\newFromLinkTarget
static newFromLinkTarget(LinkTarget $linkTarget, $forceClone='')
Returns a Title given a LinkTarget.
Definition: Title.php:291
Parser\getMagicWordFactory
getMagicWordFactory()
Get the MagicWordFactory that this Parser is using.
Definition: Parser.php:1179
Parser\argSubstitution
argSubstitution(array $piece, PPFrame $frame)
Triple brace replacement – used for template arguments.
Definition: Parser.php:3881
Linker\makeMediaLinkFile
static makeMediaLinkFile(LinkTarget $title, $file, $html='')
Create a direct link to a given uploaded file.
Definition: Linker.php:779
Sanitizer\fixTagAttributes
static fixTagAttributes( $text, $element, $sorted=false)
Take a tag soup fragment listing an HTML element's attributes and normalize it to well-formed XML,...
Definition: Sanitizer.php:703
Parser\getPreloadText
getPreloadText( $text, Title $title, ParserOptions $options, $params=[])
Process the wikitext for the "?preload=" feature.
Definition: Parser.php:944
Preprocessor\resetParser
resetParser(?Parser $parser)
Allows resetting the internal Parser reference after Preprocessor is cloned.
Definition: Preprocessor.php:95
Parser\setOutputType
setOutputType( $ot)
Mutator for the output type.
Definition: Parser.php:1023
Parser\getTemplateDom
getTemplateDom(Title $title)
Get the semi-parsed DOM representation of a template with a given title, and its redirect destination...
Definition: Parser.php:3401
$lines
if(!file_exists( $CREDITS)) $lines
Definition: updateCredits.php:45
Parser\OT_PREPROCESS
const OT_PREPROCESS
Definition: Parser.php:121
Parser\getExternalLinkAttribs
getExternalLinkAttribs( $url)
Get an associative array of additional HTML attributes appropriate for a particular external link.
Definition: Parser.php:2207
Hooks\isRegistered
static isRegistered( $name)
Returns true if a hook has a function registered to it.
Definition: Hooks.php:88
Parser\getUserSig
getUserSig(UserIdentity $user, $nickname=false, $fancySig=null)
Fetch the user's signature text, if any, and normalize to validated, ready-to-insert wikitext.
Definition: Parser.php:4650
Parser\$mStripState
StripState $mStripState
Definition: Parser.php:198
Parser\internalParse
internalParse( $text, $isMain=true, $frame=false)
Helper function for parse() that transforms wiki markup into half-parsed HTML.
Definition: Parser.php:1528
Parser\validateSig
validateSig( $text)
Check that the user's signature contains no bad XML.
Definition: Parser.php:4710
Parser\$mPPNodeCount
$mPPNodeCount
Definition: Parser.php:213
Title
Represents a title within MediaWiki.
Definition: Title.php:46
Parser\resetOutput
resetOutput()
Reset the ParserOutput.
Definition: Parser.php:581
Parser\stripOuterParagraph
static stripOuterParagraph( $html)
Strip outer.
Definition: Parser.php:6304
Parser\$mVarCache
$mVarCache
Definition: Parser.php:155
Parser\$mDefaultSort
$mDefaultSort
Definition: Parser.php:221
Parser\$mExpensiveFunctionCount
$mExpensiveFunctionCount
Definition: Parser.php:228
Parser\normalizeLinkUrl
static normalizeLinkUrl( $url)
Replace unusual escape codes in a URL with their equivalent characters.
Definition: Parser.php:2238
MediaWiki\Preferences\SignatureValidator
Definition: SignatureValidator.php:37
Parser\interwikiTransclude
interwikiTransclude(Title $title, $action)
Transclude an interwiki link.
Definition: Parser.php:3812
Parser\$mExtLinkBracketedRegex
$mExtLinkBracketedRegex
Definition: Parser.php:179
wfMatchesDomainList
wfMatchesDomainList( $url, $domains)
Check whether a given URL has a domain that occurs in a given set of domains.
Definition: GlobalFunctions.php:881
Parser\$mIncludeSizes
$mIncludeSizes
Definition: Parser.php:211
$cache
$cache
Definition: mcc.php:33
MalformedTitleException
MalformedTitleException is thrown when a TitleParser is unable to parse a title string.
Definition: MalformedTitleException.php:26
Parser\getSection
getSection( $text, $sectionId, $defaultText='')
This function returns the text of a section, specified by a number ($section).
Definition: Parser.php:5735
Xml\isWellFormedXmlFragment
static isWellFormedXmlFragment( $text)
Check if a string is a well-formed XML fragment.
Definition: Xml.php:743
Parser\$mRevisionTimestamp
$mRevisionTimestamp
Definition: Parser.php:266
Parser\replaceSection
replaceSection( $oldText, $sectionId, $newText)
This function returns $oldtext after the content of the section specified by $section has been replac...
Definition: Parser.php:5751
Sanitizer\ID_PRIMARY
const ID_PRIMARY
Tells escapeUrlForHtml() to encode the ID using the wiki's primary encoding.
Definition: Sanitizer.php:70
Parser\$logger
LoggerInterface $logger
Definition: Parser.php:339
ParserOptions\getUser
getUser()
Current user.
Definition: ParserOptions.php:1082
PPFrame\virtualBracketedImplode
virtualBracketedImplode( $start, $sep, $end,... $params)
Virtual implode with brackets.
Parser\armorLinks
armorLinks( $text)
Insert a NOPARSE hacky thing into any inline links in a chunk that's going to go through further pars...
Definition: Parser.php:2731
Linker\tocUnindent
static tocUnindent( $level)
Finish one or more sublevels on the Table of Contents.
Definition: Linker.php:1664
Linker\makeImageLink
static makeImageLink(Parser $parser, LinkTarget $title, $file, $frameParams=[], $handlerParams=[], $time=false, $query="", $widthOption=null)
Given parameters derived from [[Image:Foo|options...]], generate the HTML that that syntax inserts in...
Definition: Linker.php:300
getTitle
getTitle()
Definition: RevisionSearchResultTrait.php:81
Parser\getBadFileLookup
getBadFileLookup()
Get the BadFileLookup instance that this Parser is using.
Definition: Parser.php:1199
NS_CATEGORY
const NS_CATEGORY
Definition: Defines.php:78
Parser\getOutput
getOutput()
Definition: Parser.php:1050
StringUtils\delimiterReplace
static delimiterReplace( $startDelim, $endDelim, $replace, $subject, $flags='')
Perform an operation equivalent to preg_replace() with flags.
Definition: StringUtils.php:248
Parser\handleInternalLinks2
handleInternalLinks2(&$s)
Process [[ ]] wikilinks (RIL)
Definition: Parser.php:2388
Parser\getOutputType
getOutputType()
Accessor for the output type.
Definition: Parser.php:1015
Parser\$mGeneratedPPNodeCount
$mGeneratedPPNodeCount
Definition: Parser.php:218
Parser\statelessFetchRevisionRecord
static statelessFetchRevisionRecord(Title $title, $parser=null)
Wrapper around Revision::newFromTitle to allow passing additional parameters without passing them on ...
Definition: Parser.php:3532
Parser\getHookRunner
getHookRunner()
Get a HookRunner for calling core hooks.
Definition: Parser.php:1633
PPFrame\getArgument
getArgument( $name)
Get an argument to this frame by name.
TextContent\normalizeLineEndings
static normalizeLineEndings( $text)
Do a "\\r\\n" -> "\\n" and "\\r" -> "\\n" transformation as well as trim trailing whitespace.
Definition: TextContent.php:203
Parser\getSectionNameFromStrippedText
static getSectionNameFromStrippedText( $text)
Definition: Parser.php:6027
Sanitizer\escapeIdForLink
static escapeIdForLink( $id)
Given a section name or other user-generated or otherwise unsafe string, escapes it to be a valid URL...
Definition: Sanitizer.php:838
Linker\normalizeSubpageLink
static normalizeSubpageLink( $contextTitle, $target, &$text)
Definition: Linker.php:1473
MediaWiki\Config\ServiceOptions\get
get( $key)
Definition: ServiceOptions.php:88
Parser\startParse
startParse(?Title $title, ParserOptions $options, $outputType, $clearState=true)
Definition: Parser.php:4796
MediaWiki\HookContainer\HookContainer
HookContainer class.
Definition: HookContainer.php:45
Parser\SFH_NO_HASH
const SFH_NO_HASH
Definition: Parser.php:89
CoreMagicVariables\expand
static expand(Parser $parser, string $id, int $ts, NamespaceInfo $nsInfo, ServiceOptions $svcOptions, LoggerInterface $logger)
Expand the magic variable given by $index.
Definition: CoreMagicVariables.php:48
Parser\$mShowToc
$mShowToc
Definition: Parser.php:230
ImageGalleryBase\factory
static factory( $mode=false, IContextSource $context=null)
Get a new image gallery.
Definition: ImageGalleryBase.php:116
Sanitizer\decodeTagAttributes
static decodeTagAttributes( $text)
Return an associative array of attribute names and values from a partial tag string.
Definition: Sanitizer.php:1004
Parser\guessSectionNameFromStrippedText
static guessSectionNameFromStrippedText( $text)
Like guessSectionNameFromWikiText(), but takes already-stripped text as input.
Definition: Parser.php:6086
Parser\fetchTemplateAndTitle
fetchTemplateAndTitle(Title $title)
Fetch the unparsed text of a template and register a reference to it.
Definition: Parser.php:3544
Parser\$languageConverterFactory
LanguageConverterFactory $languageConverterFactory
Definition: Parser.php:315
NamespaceInfo
This is a utility class for dealing with namespaces that encodes all the "magic" behaviors of them ba...
Definition: NamespaceInfo.php:35
PPFrame\isTemplate
isTemplate()
Return true if the frame is a template frame.
MediaWiki\HookContainer\HookRunner
This class provides an implementation of the core hook interfaces, forwarding hook calls to HookConta...
Definition: HookRunner.php:576
Parser\parseLinkParameter
parseLinkParameter( $value)
Parse the value of 'link' parameter in image syntax ([[File:Foo.jpg|link=<value>]]).
Definition: Parser.php:5453
$t
$t
Definition: testCompression.php:74
Title\legalChars
static legalChars()
Get a regex character class describing the legal characters in a link.
Definition: Title.php:737
Parser\$mRevisionObject
$mRevisionObject
Definition: Parser.php:262
Parser\fetchCurrentRevisionOfTitle
fetchCurrentRevisionOfTitle(Title $title)
Fetch the current revision of a given title.
Definition: Parser.php:3447
Sanitizer\decodeCharReferences
static decodeCharReferences( $text)
Decode any character references, numeric or named entities, in the text and return a UTF-8 string.
Definition: Sanitizer.php:1228
Parser\getRevisionTimestamp
getRevisionTimestamp()
Get the timestamp associated with the current revision, adjusted for the default server-local timesta...
Definition: Parser.php:5925
Html\element
static element( $element, $attribs=[], $contents='')
Identical to rawElement(), but HTML-escapes $contents (like Xml::element()).
Definition: Html.php:234
Parser\expandMagicVariable
expandMagicVariable( $index, $frame=false)
Return value of a magic variable (like PAGENAME)
Definition: Parser.php:2758
NS_FILE
const NS_FILE
Definition: Defines.php:70
MediaWiki\Linker\LinkTarget
Definition: LinkTarget.php:26
Parser\pstPass2
pstPass2( $text, UserIdentity $user)
Pre-save transform helper function.
Definition: Parser.php:4574
Parser\preSaveTransform
preSaveTransform( $text, Title $title, UserIdentity $user, ParserOptions $options, $clearState=true)
Transform wiki markup when saving a page by doing "\\r\\n" -> "\\n" conversion, substituting signatur...
Definition: Parser.php:4537
Parser\$mPreprocessor
Preprocessor $mPreprocessor
Definition: Parser.php:185
Parser\parseWidthParam
static parseWidthParam( $value, $parseHeight=true)
Parsed a width param of imagelink like 300px or 200x300px.
Definition: Parser.php:6247
RawMessage
Variant of the Message class.
Definition: RawMessage.php:35
Parser\cleanSigInSig
static cleanSigInSig( $text)
Strip 3, 4 or 5 tildes out of signatures.
Definition: Parser.php:4766
Parser\setTitle
setTitle(Title $t=null)
Set the context title.
Definition: Parser.php:978
User
The User object encapsulates all of the user-specific settings (user_id, name, rights,...
Definition: User.php:66
Parser\replaceLinkHoldersText
replaceLinkHoldersText( $text)
Replace "<!--LINK-->" link placeholders with plain text of links (not HTML-formatted).
Definition: Parser.php:4995
Parser\normalizeUrlComponent
static normalizeUrlComponent( $component, $unsafe)
Definition: Parser.php:2297
Parser\clearTagHooks
clearTagHooks()
Remove all tag hooks.
Definition: Parser.php:4876
MWTimestamp\getLocalInstance
static getLocalInstance( $ts=false)
Get a timestamp instance in the server local timezone ($wgLocaltimezone)
Definition: MWTimestamp.php:213
OT_WIKI
const OT_WIKI
Definition: Defines.php:169
MediaWiki\User\UserFactory
Creates User objects.
Definition: UserFactory.php:41
Linker\makeExternalImage
static makeExternalImage( $url, $alt='')
Return the code for images which were added via external links, via Parser::maybeMakeExternalImage().
Definition: Linker.php:244
Parser\$mTitle
Title null $mTitle
Since 1.34, leaving mTitle uninitialized or setting mTitle to null is deprecated.
Definition: Parser.php:257
Parser\getLinkRenderer
getLinkRenderer()
Get a LinkRenderer instance to make links with.
Definition: Parser.php:1161
Language
Internationalisation code See https://www.mediawiki.org/wiki/Special:MyLanguage/Localisation for more...
Definition: Language.php:43
User\getUser
getUser()
Definition: User.php:4507
MediaWiki\Debug\DeprecatablePropertyArray
ArrayAccess implementation that supports deprecating access to certain properties.
Definition: DeprecatablePropertyArray.php:16
Parser\parse
parse( $text, Title $title, ParserOptions $options, $linestart=true, $clearState=true, $revid=null)
Convert wikitext to HTML Do not call this function recursively.
Definition: Parser.php:603
Parser\$mRevisionId
$mRevisionId
Definition: Parser.php:264
RequestContext\setTitle
setTitle(Title $title=null)
Definition: RequestContext.php:172
Parser\$mRevisionSize
$mRevisionSize
Definition: Parser.php:270
Parser\getRevisionId
getRevisionId()
Get the ID of the revision we are parsing.
Definition: Parser.php:5837
Revision\SlotRecord
Value object representing a content slot associated with a page revision.
Definition: SlotRecord.php:40
OT_PLAIN
const OT_PLAIN
Definition: Defines.php:172
Parser\clearState
clearState()
Clear Parser state.
Definition: Parser.php:533
Parser\guessLegacySectionNameFromWikiText
guessLegacySectionNameFromWikiText( $text)
Same as guessSectionNameFromWikiText(), but produces legacy anchors instead, if possible.
Definition: Parser.php:6074
MWHttpRequest\factory
static factory( $url, array $options=null, $caller=__METHOD__)
Generate a new request object.
Definition: MWHttpRequest.php:195
MediaWiki\Config\ServiceOptions\assertRequiredOptions
assertRequiredOptions(array $expectedKeys)
Assert that the list of options provided in this instance exactly match $expectedKeys,...
Definition: ServiceOptions.php:66
ParserOptions\newFromUser
static newFromUser( $user)
Get a ParserOptions object from a given user.
Definition: ParserOptions.php:1131
Parser\makeImage
makeImage(Title $title, $options, $holders=false)
Parse image options text and use it to make an image.
Definition: Parser.php:5231
Parser\stripSectionName
stripSectionName( $text)
Strips a text string of wikitext for use in a section anchor.
Definition: Parser.php:6125
Parser\EXT_IMAGE_REGEX
const EXT_IMAGE_REGEX
Definition: Parser.php:105
Parser\getPreprocessor
getPreprocessor()
Get a preprocessor object.
Definition: Parser.php:1151
Parser\doBlockLevels
doBlockLevels( $text, $linestart)
Make lists from lines starting with ':', '*', '#', etc.
Definition: Parser.php:2745
Parser\$hookContainer
HookContainer $hookContainer
Definition: Parser.php:345
$type
$type
Definition: testCompression.php:52