MediaWiki  master
Parser.php
Go to the documentation of this file.
1 <?php
45 use Psr\Log\LoggerInterface;
46 use Wikimedia\IPUtils;
47 use Wikimedia\ScopedCallback;
48 
89 class Parser {
90  # Flags for Parser::setFunctionHook
91  public const SFH_NO_HASH = 1;
92  public const SFH_OBJECT_ARGS = 2;
93 
94  # Constants needed for external link processing
95  # Everything except bracket, space, or control characters
96  # \p{Zs} is unicode 'separator, space' category. It covers the space 0x20
97  # as well as U+3000 is IDEOGRAPHIC SPACE for T21052
98  # \x{FFFD} is the Unicode replacement character, which the HTML5 spec
99  # uses to replace invalid HTML characters.
100  public const EXT_LINK_URL_CLASS = '[^][<>"\\x00-\\x20\\x7F\p{Zs}\x{FFFD}]';
101  # Simplified expression to match an IPv4 or IPv6 address, or
102  # at least one character of a host name (embeds EXT_LINK_URL_CLASS)
103  // phpcs:ignore Generic.Files.LineLength
104  private const EXT_LINK_ADDR = '(?:[0-9.]+|\\[(?i:[0-9a-f:.]+)\\]|[^][<>"\\x00-\\x20\\x7F\p{Zs}\x{FFFD}])';
105  # RegExp to make image URLs (embeds IPv6 part of EXT_LINK_ADDR)
106  // phpcs:ignore Generic.Files.LineLength
107  private const EXT_IMAGE_REGEX = '/^(http:\/\/|https:\/\/)((?:\\[(?i:[0-9a-f:.]+)\\])?[^][<>"\\x00-\\x20\\x7F\p{Zs}\x{FFFD}]+)
108  \\/([A-Za-z0-9_.,~%\\-+&;#*?!=()@\\x80-\\xFF]+)\\.((?i)gif|png|jpg|jpeg)$/Sxu';
109 
110  # Regular expression for a non-newline space
111  private const SPACE_NOT_NL = '(?:\t|&nbsp;|&\#0*160;|&\#[Xx]0*[Aa]0;|\p{Zs})';
112 
117  public const PTD_FOR_INCLUSION = Preprocessor::DOM_FOR_INCLUSION;
118 
119  # Allowed values for $this->mOutputType
120  # Parameter to startExternalParse().
121  public const OT_HTML = 1; # like parse()
122  public const OT_WIKI = 2; # like preSaveTransform()
123  public const OT_PREPROCESS = 3; # like preprocess()
124  public const OT_MSG = 3;
125  # like extractSections() - portions of the original are returned unchanged.
126  public const OT_PLAIN = 4;
127 
145  public const MARKER_SUFFIX = "-QINU`\"'\x7f";
146  public const MARKER_PREFIX = "\x7f'\"`UNIQ-";
147 
148  # Markers used for wrapping the table of contents
149  public const TOC_START = '<mw:toc>';
150  public const TOC_END = '</mw:toc>';
151 
152  # Persistent:
153  private $mTagHooks = [];
154  private $mFunctionHooks = [];
155  private $mFunctionSynonyms = [ 0 => [], 1 => [] ];
156  private $mStripList = [];
157  private $mVarCache = [];
158  private $mImageParams = [];
161  public $mMarkerIndex = 0;
166  public $mFirstCall = false;
167 
168  # Initialised by initializeVariables()
169 
173  private $mVariables;
174 
178  private $mSubstWords;
179 
180  # Initialised in constructor
182 
183  # Initialized in constructor
184 
187  private $mPreprocessor;
188 
189  # Cleared with clearState():
190 
193  private $mOutput;
194  private $mAutonumber;
195 
200  public $mStripState;
201 
205  private $mLinkHolders;
206 
211  public $mLinkID;
223  private $mDefaultSort;
226  public $mHeadings;
230  public $mExpensiveFunctionCount; # number of expensive parser function calls
232  public $mShowToc;
235  private $mTplDomCache;
236 
241  public $mUser; # User object; only used when doing pre-save transform
242 
243  # Temporary
244  # These are variables reset at least once per parse regardless of $clearState
245 
250  public $mOptions;
251 
259  public $mTitle; # Title context, used for self-link rendering and similar things
260  private $mOutputType; # Output type, one of the OT_xxx constants
262  public $ot; # Shortcut alias, see setOutputType()
264  public $mRevisionId; # ID to display in {{REVISIONID}} tags
265 
266  public $mRevisionTimestamp; # The timestamp of the specified revision ID
268  public $mRevisionUser; # User to display in {{REVISIONUSER}} tag
270  public $mRevisionSize; # Size to display in {{REVISIONSIZE}} variable
272  public $mInputSize = false; # For {{PAGESIZE}} on current page.
273 
276 
283 
291 
298  public $mInParse = false;
299 
301  private $mProfiler;
302 
306  private $mLinkRenderer;
307 
310 
312  private $contLang;
313 
316 
318  private $factory;
319 
322 
325 
333  private $svcOptions;
334 
337 
339  private $nsInfo;
340 
342  private $logger;
343 
345  private $badFileLookup;
346 
348  private $hookContainer;
349 
351  private $hookRunner;
352 
354  private $tidy;
355 
358 
360  private $userFactory;
361 
365  public const CONSTRUCTOR_OPTIONS = [
366  // See documentation for the corresponding config options
367  'ArticlePath',
368  'EnableScaryTranscluding',
369  'ExtraInterlanguageLinkPrefixes',
370  'FragmentMode',
371  'LanguageCode',
372  'MaxSigChars',
373  'MaxTocLevel',
374  'MiserMode',
375  'ScriptPath',
376  'Server',
377  'ServerName',
378  'ShowHostnames',
379  'SignatureValidation',
380  'Sitename',
381  'StylePath',
382  'TranscludeCacheExpiry',
383  'PreprocessorCacheThreshold',
384  'DisableLangConversion',
385  ];
386 
409  public function __construct(
414  string $urlProtocols,
415  SpecialPageFactory $spFactory,
418  LoggerInterface $logger,
423  WANObjectCache $wanCache,
427  ) {
428  if ( ParserFactory::$inParserFactory === 0 ) {
429  // Direct construction of Parser was deprecated in 1.34 and
430  // removed in 1.36; use a ParserFactory instead.
431  throw new MWException( 'Direct construction of Parser not allowed' );
432  }
433  $svcOptions->assertRequiredOptions( self::CONSTRUCTOR_OPTIONS );
434  $this->svcOptions = $svcOptions;
435 
436  $this->mUrlProtocols = $urlProtocols;
437  $this->mExtLinkBracketedRegex = '/\[(((?i)' . $this->mUrlProtocols . ')' .
438  self::EXT_LINK_ADDR .
439  self::EXT_LINK_URL_CLASS . '*)\p{Zs}*([^\]\\x00-\\x08\\x0a-\\x1F\\x{FFFD}]*?)\]/Su';
440 
441  $this->magicWordFactory = $magicWordFactory;
442 
443  $this->contLang = $contLang;
444 
445  $this->factory = $factory;
446  $this->specialPageFactory = $spFactory;
447  $this->linkRendererFactory = $linkRendererFactory;
448  $this->nsInfo = $nsInfo;
449  $this->logger = $logger;
450  $this->badFileLookup = $badFileLookup;
451 
452  $this->languageConverterFactory = $languageConverterFactory;
453 
454  $this->hookContainer = $hookContainer;
455  $this->hookRunner = new HookRunner( $hookContainer );
456 
457  $this->tidy = $tidy;
458 
459  $this->mPreprocessor = new Preprocessor_Hash(
460  $this,
461  $wanCache,
462  [
463  'cacheThreshold' => $svcOptions->get( 'PreprocessorCacheThreshold' ),
464  'disableLangConversion' => $svcOptions->get( 'DisableLangConversion' ),
465  ]
466  );
467 
468  $this->userOptionsLookup = $userOptionsLookup;
469  $this->userFactory = $userFactory;
470  $this->titleFormatter = $titleFormatter;
471 
472  // These steps used to be done in "::firstCallInit()"
473  // (if you're chasing a reference from some old code)
475  CoreTagHooks::register( $this );
476  $this->initializeVariables();
477 
478  $this->hookRunner->onParserFirstCallInit( $this );
479  }
480 
484  public function __destruct() {
485  if ( isset( $this->mLinkHolders ) ) {
486  // @phan-suppress-next-line PhanTypeObjectUnsetDeclaredProperty
487  unset( $this->mLinkHolders );
488  }
489  // @phan-suppress-next-line PhanTypeSuspiciousNonTraversableForeach
490  foreach ( $this as $name => $value ) {
491  unset( $this->$name );
492  }
493  }
494 
498  public function __clone() {
499  $this->mInParse = false;
500 
501  // T58226: When you create a reference "to" an object field, that
502  // makes the object field itself be a reference too (until the other
503  // reference goes out of scope). When cloning, any field that's a
504  // reference is copied as a reference in the new object. Both of these
505  // are defined PHP5 behaviors, as inconvenient as it is for us when old
506  // hooks from PHP4 days are passing fields by reference.
507  foreach ( [ 'mStripState', 'mVarCache' ] as $k ) {
508  // Make a non-reference copy of the field, then rebind the field to
509  // reference the new copy.
510  $tmp = $this->$k;
511  $this->$k =& $tmp;
512  unset( $tmp );
513  }
514 
515  $this->mPreprocessor = clone $this->mPreprocessor;
516  $this->mPreprocessor->resetParser( $this );
517 
518  $this->hookRunner->onParserCloned( $this );
519  }
520 
528  public function firstCallInit() {
529  /*
530  * This method should be hard-deprecated once remaining calls are
531  * removed; it no longer does anything.
532  */
533  }
534 
540  public function clearState() {
541  $this->resetOutput();
542  $this->mAutonumber = 0;
543  $this->mLinkHolders = new LinkHolderArray(
544  $this,
546  $this->getHookContainer()
547  );
548  $this->mLinkID = 0;
549  $this->mRevisionTimestamp = null;
550  $this->mRevisionId = null;
551  $this->mRevisionUser = null;
552  $this->mRevisionSize = null;
553  $this->mRevisionRecordObject = null;
554  $this->mVarCache = [];
555  $this->mUser = null;
556  $this->mLangLinkLanguages = [];
557  $this->currentRevisionCache = null;
558 
559  $this->mStripState = new StripState( $this );
560 
561  # Clear these on every parse, T6549
562  $this->mTplRedirCache = [];
563  $this->mTplDomCache = [];
564 
565  $this->mShowToc = true;
566  $this->mForceTocPosition = false;
567  $this->mIncludeSizes = [
568  'post-expand' => 0,
569  'arg' => 0,
570  ];
571  $this->mPPNodeCount = 0;
572  $this->mGeneratedPPNodeCount = 0;
573  $this->mHighestExpansionDepth = 0;
574  $this->mDefaultSort = false;
575  $this->mHeadings = [];
576  $this->mDoubleUnderscores = [];
577  $this->mExpensiveFunctionCount = 0;
578 
579  $this->mProfiler = new SectionProfiler();
580 
581  $this->hookRunner->onParserClearState( $this );
582  }
583 
588  public function resetOutput() {
589  $this->mOutput = new ParserOutput;
590  $this->mOptions->registerWatcher( [ $this->mOutput, 'recordOption' ] );
591  }
592 
611  public function parse(
612  $text, PageReference $page, ParserOptions $options,
613  $linestart = true, $clearState = true, $revid = null
614  ) {
615  if ( $clearState ) {
616  // We use U+007F DELETE to construct strip markers, so we have to make
617  // sure that this character does not occur in the input text.
618  $text = strtr( $text, "\x7f", "?" );
619  $magicScopeVariable = $this->lock();
620  }
621  // Strip U+0000 NULL (T159174)
622  $text = str_replace( "\000", '', $text );
623 
624  $this->startParse( $page, $options, self::OT_HTML, $clearState );
625 
626  $this->currentRevisionCache = null;
627  $this->mInputSize = strlen( $text );
628  if ( $this->mOptions->getEnableLimitReport() ) {
629  $this->mOutput->resetParseStartTime();
630  }
631 
632  $oldRevisionId = $this->mRevisionId;
633  $oldRevisionRecordObject = $this->mRevisionRecordObject;
634  $oldRevisionTimestamp = $this->mRevisionTimestamp;
635  $oldRevisionUser = $this->mRevisionUser;
636  $oldRevisionSize = $this->mRevisionSize;
637  if ( $revid !== null ) {
638  $this->mRevisionId = $revid;
639  $this->mRevisionRecordObject = null;
640  $this->mRevisionTimestamp = null;
641  $this->mRevisionUser = null;
642  $this->mRevisionSize = null;
643  }
644 
645  $text = $this->internalParse( $text );
646  $this->hookRunner->onParserAfterParse( $this, $text, $this->mStripState );
647 
648  $text = $this->internalParseHalfParsed( $text, true, $linestart );
649 
657  if ( !( $options->getDisableTitleConversion()
658  || isset( $this->mDoubleUnderscores['nocontentconvert'] )
659  || isset( $this->mDoubleUnderscores['notitleconvert'] )
660  || $this->mOutput->getDisplayTitle() !== false )
661  ) {
662  $convruletitle = $this->getTargetLanguageConverter()->getConvRuleTitle();
663  if ( $convruletitle ) {
664  $this->mOutput->setTitleText( $convruletitle );
665  } else {
666  $titleText = $this->getTargetLanguageConverter()->convertTitle( $page );
667  $this->mOutput->setTitleText( $titleText );
668  }
669  }
670 
671  # Compute runtime adaptive expiry if set
672  $this->mOutput->finalizeAdaptiveCacheExpiry();
673 
674  # Warn if too many heavyweight parser functions were used
675  if ( $this->mExpensiveFunctionCount > $this->mOptions->getExpensiveParserFunctionLimit() ) {
676  $this->limitationWarn( 'expensive-parserfunction',
677  $this->mExpensiveFunctionCount,
678  $this->mOptions->getExpensiveParserFunctionLimit()
679  );
680  }
681 
682  # Information on limits, for the benefit of users who try to skirt them
683  if ( $this->mOptions->getEnableLimitReport() ) {
684  $text .= $this->makeLimitReport();
685  }
686 
687  # Wrap non-interface parser output in a <div> so it can be targeted
688  # with CSS (T37247)
689  $class = $this->mOptions->getWrapOutputClass();
690  if ( $class !== false && !$this->mOptions->getInterfaceMessage() ) {
691  $this->mOutput->addWrapperDivClass( $class );
692  }
693 
694  $this->mOutput->setText( $text );
695 
696  $this->mRevisionId = $oldRevisionId;
697  $this->mRevisionRecordObject = $oldRevisionRecordObject;
698  $this->mRevisionTimestamp = $oldRevisionTimestamp;
699  $this->mRevisionUser = $oldRevisionUser;
700  $this->mRevisionSize = $oldRevisionSize;
701  $this->mInputSize = false;
702  $this->currentRevisionCache = null;
703 
704  return $this->mOutput;
705  }
706 
713  protected function makeLimitReport() {
714  $maxIncludeSize = $this->mOptions->getMaxIncludeSize();
715 
716  $cpuTime = $this->mOutput->getTimeSinceStart( 'cpu' );
717  if ( $cpuTime !== null ) {
718  $this->mOutput->setLimitReportData( 'limitreport-cputime',
719  sprintf( "%.3f", $cpuTime )
720  );
721  }
722 
723  $wallTime = $this->mOutput->getTimeSinceStart( 'wall' );
724  $this->mOutput->setLimitReportData( 'limitreport-walltime',
725  sprintf( "%.3f", $wallTime )
726  );
727 
728  $this->mOutput->setLimitReportData( 'limitreport-ppvisitednodes',
729  [ $this->mPPNodeCount, $this->mOptions->getMaxPPNodeCount() ]
730  );
731  $this->mOutput->setLimitReportData( 'limitreport-postexpandincludesize',
732  [ $this->mIncludeSizes['post-expand'], $maxIncludeSize ]
733  );
734  $this->mOutput->setLimitReportData( 'limitreport-templateargumentsize',
735  [ $this->mIncludeSizes['arg'], $maxIncludeSize ]
736  );
737  $this->mOutput->setLimitReportData( 'limitreport-expansiondepth',
738  [ $this->mHighestExpansionDepth, $this->mOptions->getMaxPPExpandDepth() ]
739  );
740  $this->mOutput->setLimitReportData( 'limitreport-expensivefunctioncount',
741  [ $this->mExpensiveFunctionCount, $this->mOptions->getExpensiveParserFunctionLimit() ]
742  );
743 
744  foreach ( $this->mStripState->getLimitReport() as list( $key, $value ) ) {
745  $this->mOutput->setLimitReportData( $key, $value );
746  }
747 
748  $this->hookRunner->onParserLimitReportPrepare( $this, $this->mOutput );
749 
750  $limitReport = "NewPP limit report\n";
751  if ( $this->svcOptions->get( 'ShowHostnames' ) ) {
752  $limitReport .= 'Parsed by ' . wfHostname() . "\n";
753  }
754  $limitReport .= 'Cached time: ' . $this->mOutput->getCacheTime() . "\n";
755  $limitReport .= 'Cache expiry: ' . $this->mOutput->getCacheExpiry() . "\n";
756  $limitReport .= 'Reduced expiry: ' .
757  ( $this->mOutput->hasReducedExpiry() ? 'true' : 'false' ) .
758  "\n";
759  $limitReport .= 'Complications: [' . implode( ', ', $this->mOutput->getAllFlags() ) . "]\n";
760 
761  foreach ( $this->mOutput->getLimitReportData() as $key => $value ) {
762  if ( $this->hookRunner->onParserLimitReportFormat(
763  $key, $value, $limitReport, false, false )
764  ) {
765  $keyMsg = wfMessage( $key )->inLanguage( 'en' )->useDatabase( false );
766  $valueMsg = wfMessage( [ "$key-value-text", "$key-value" ] )
767  ->inLanguage( 'en' )->useDatabase( false );
768  if ( !$valueMsg->exists() ) {
769  $valueMsg = new RawMessage( '$1' );
770  }
771  if ( !$keyMsg->isDisabled() && !$valueMsg->isDisabled() ) {
772  $valueMsg->params( $value );
773  $limitReport .= "{$keyMsg->text()}: {$valueMsg->text()}\n";
774  }
775  }
776  }
777  // Since we're not really outputting HTML, decode the entities and
778  // then re-encode the things that need hiding inside HTML comments.
779  $limitReport = htmlspecialchars_decode( $limitReport );
780 
781  // Sanitize for comment. Note '‐' in the replacement is U+2010,
782  // which looks much like the problematic '-'.
783  $limitReport = str_replace( [ '-', '&' ], [ '‐', '&amp;' ], $limitReport );
784  $text = "\n<!-- \n$limitReport-->\n";
785 
786  // Add on template profiling data in human/machine readable way
787  $dataByFunc = $this->mProfiler->getFunctionStats();
788  uasort( $dataByFunc, static function ( $a, $b ) {
789  return $b['real'] <=> $a['real']; // descending order
790  } );
791  $profileReport = [];
792  foreach ( array_slice( $dataByFunc, 0, 10 ) as $item ) {
793  $profileReport[] = sprintf( "%6.2f%% %8.3f %6d %s",
794  $item['%real'], $item['real'], $item['calls'],
795  htmlspecialchars( $item['name'] ) );
796  }
797  $text .= "<!--\nTransclusion expansion time report (%,ms,calls,template)\n";
798  $text .= implode( "\n", $profileReport ) . "\n-->\n";
799 
800  $this->mOutput->setLimitReportData( 'limitreport-timingprofile', $profileReport );
801 
802  // Add other cache related metadata
803  if ( $this->svcOptions->get( 'ShowHostnames' ) ) {
804  $this->mOutput->setLimitReportData( 'cachereport-origin', wfHostname() );
805  }
806  $this->mOutput->setLimitReportData( 'cachereport-timestamp',
807  $this->mOutput->getCacheTime() );
808  $this->mOutput->setLimitReportData( 'cachereport-ttl',
809  $this->mOutput->getCacheExpiry() );
810  $this->mOutput->setLimitReportData( 'cachereport-transientcontent',
811  $this->mOutput->hasDynamicContent() );
812 
813  return $text;
814  }
815 
841  public function recursiveTagParse( $text, $frame = false ) {
842  $text = $this->internalParse( $text, false, $frame );
843  return $text;
844  }
845 
865  public function recursiveTagParseFully( $text, $frame = false ) {
866  $text = $this->recursiveTagParse( $text, $frame );
867  $text = $this->internalParseHalfParsed( $text, false );
868  return $text;
869  }
870 
890  public function parseExtensionTagAsTopLevelDoc( $text ) {
891  $text = $this->recursiveTagParse( $text );
892  $this->hookRunner->onParserAfterParse( $this, $text, $this->mStripState );
893  $text = $this->internalParseHalfParsed( $text, true );
894  return $text;
895  }
896 
909  public function preprocess( $text, ?PageReference $page,
910  ParserOptions $options, $revid = null, $frame = false
911  ) {
912  $magicScopeVariable = $this->lock();
913  $this->startParse( $page, $options, self::OT_PREPROCESS, true );
914  if ( $revid !== null ) {
915  $this->mRevisionId = $revid;
916  }
917  $this->hookRunner->onParserBeforePreprocess( $this, $text, $this->mStripState );
918  $text = $this->replaceVariables( $text, $frame );
919  $text = $this->mStripState->unstripBoth( $text );
920  return $text;
921  }
922 
932  public function recursivePreprocess( $text, $frame = false ) {
933  $text = $this->replaceVariables( $text, $frame );
934  $text = $this->mStripState->unstripBoth( $text );
935  return $text;
936  }
937 
952  public function getPreloadText( $text, PageReference $page, ParserOptions $options, $params = [] ) {
953  $msg = new RawMessage( $text );
954  $text = $msg->params( $params )->plain();
955 
956  # Parser (re)initialisation
957  $magicScopeVariable = $this->lock();
958  $this->startParse( $page, $options, self::OT_PLAIN, true );
959 
961  $dom = $this->preprocessToDom( $text, Preprocessor::DOM_FOR_INCLUSION );
962  $text = $this->getPreprocessor()->newFrame()->expand( $dom, $flags );
963  $text = $this->mStripState->unstripBoth( $text );
964  return $text;
965  }
966 
974  public function setUser( ?UserIdentity $user ) {
975  if ( $user ) {
976  $this->mUser = $this->userFactory->newFromUserIdentity( $user );
977  } else {
978  $this->mUser = $user;
979  }
980  }
981 
989  public function setTitle( Title $t = null ) {
990  $this->setPage( $t );
991  }
992 
998  public function getTitle(): Title {
999  if ( !$this->mTitle ) {
1000  $this->mTitle = Title::makeTitle( NS_SPECIAL, 'Badtitle/Parser' );
1001  }
1002  return $this->mTitle;
1003  }
1004 
1011  public function setPage( ?PageReference $t = null ) {
1012  if ( !$t ) {
1013  $t = Title::makeTitle( NS_SPECIAL, 'Badtitle/Parser' );
1014  } else {
1015  // For now (early 1.37 alpha), always convert to Title, so we don't have to do it over
1016  // and over again in other methods. Eventually, we will no longer need to have a Title
1017  // instance internally.
1019  }
1020 
1021  if ( $t->hasFragment() ) {
1022  # Strip the fragment to avoid various odd effects
1023  $this->mTitle = $t->createFragmentTarget( '' );
1024  } else {
1025  $this->mTitle = $t;
1026  }
1027  }
1028 
1034  public function getPage(): ?PageReference {
1035  return $this->mTitle;
1036  }
1037 
1043  public function getOutputType(): int {
1044  return $this->mOutputType;
1045  }
1046 
1052  public function setOutputType( $ot ): void {
1053  $this->mOutputType = $ot;
1054  # Shortcut alias
1055  $this->ot = [
1056  'html' => $ot == self::OT_HTML,
1057  'wiki' => $ot == self::OT_WIKI,
1058  'pre' => $ot == self::OT_PREPROCESS,
1059  'plain' => $ot == self::OT_PLAIN,
1060  ];
1061  }
1062 
1070  public function OutputType( $x = null ) {
1071  wfDeprecated( __METHOD__, '1.35' );
1072  return wfSetVar( $this->mOutputType, $x );
1073  }
1074 
1079  public function getOutput() {
1080  return $this->mOutput;
1081  }
1082 
1087  public function getOptions() {
1088  return $this->mOptions;
1089  }
1090 
1096  public function setOptions( ParserOptions $options ): void {
1097  $this->mOptions = $options;
1098  }
1099 
1107  public function Options( $x = null ) {
1108  wfDeprecated( __METHOD__, '1.35' );
1109  return wfSetVar( $this->mOptions, $x );
1110  }
1111 
1116  public function nextLinkID() {
1117  return $this->mLinkID++;
1118  }
1119 
1124  public function setLinkID( $id ) {
1125  $this->mLinkID = $id;
1126  }
1127 
1133  public function getFunctionLang() {
1134  return $this->getTargetLanguage();
1135  }
1136 
1145  public function getTargetLanguage() {
1146  $target = $this->mOptions->getTargetLanguage();
1147 
1148  if ( $target !== null ) {
1149  return $target;
1150  } elseif ( $this->mOptions->getInterfaceMessage() ) {
1151  return $this->mOptions->getUserLangObj();
1152  }
1153 
1154  return $this->getTitle()->getPageLanguage();
1155  }
1156 
1164  public function getUser() {
1165  if ( $this->mUser !== null ) {
1166  return $this->mUser;
1167  }
1168  return $this->mOptions->getUser();
1169  }
1170 
1175  public function getUserIdentity(): UserIdentity {
1176  return $this->getUser();
1177  }
1178 
1185  public function getPreprocessor() {
1186  return $this->mPreprocessor;
1187  }
1188 
1195  public function getLinkRenderer() {
1196  // XXX We make the LinkRenderer with current options and then cache it forever
1197  if ( !$this->mLinkRenderer ) {
1198  $this->mLinkRenderer = $this->linkRendererFactory->create();
1199  $this->mLinkRenderer->setStubThreshold(
1200  $this->getOptions()->getStubThreshold()
1201  );
1202  }
1203 
1204  return $this->mLinkRenderer;
1205  }
1206 
1213  public function getMagicWordFactory() {
1214  return $this->magicWordFactory;
1215  }
1216 
1223  public function getContentLanguage() {
1224  return $this->contLang;
1225  }
1226 
1233  public function getBadFileLookup() {
1234  return $this->badFileLookup;
1235  }
1236 
1256  public static function extractTagsAndParams( array $elements, $text, &$matches ) {
1257  static $n = 1;
1258  $stripped = '';
1259  $matches = [];
1260 
1261  $taglist = implode( '|', $elements );
1262  $start = "/<($taglist)(\\s+[^>]*?|\\s*?)(\/?>)|<(!--)/i";
1263 
1264  while ( $text != '' ) {
1265  $p = preg_split( $start, $text, 2, PREG_SPLIT_DELIM_CAPTURE );
1266  $stripped .= $p[0];
1267  if ( count( $p ) < 5 ) {
1268  break;
1269  }
1270  if ( count( $p ) > 5 ) {
1271  # comment
1272  $element = $p[4];
1273  $attributes = '';
1274  $close = '';
1275  $inside = $p[5];
1276  } else {
1277  # tag
1278  list( , $element, $attributes, $close, $inside ) = $p;
1279  }
1280 
1281  $marker = self::MARKER_PREFIX . "-$element-" . sprintf( '%08X', $n++ ) . self::MARKER_SUFFIX;
1282  $stripped .= $marker;
1283 
1284  if ( $close === '/>' ) {
1285  # Empty element tag, <tag />
1286  $content = null;
1287  $text = $inside;
1288  $tail = null;
1289  } else {
1290  if ( $element === '!--' ) {
1291  $end = '/(-->)/';
1292  } else {
1293  $end = "/(<\\/$element\\s*>)/i";
1294  }
1295  $q = preg_split( $end, $inside, 2, PREG_SPLIT_DELIM_CAPTURE );
1296  $content = $q[0];
1297  if ( count( $q ) < 3 ) {
1298  # No end tag -- let it run out to the end of the text.
1299  $tail = '';
1300  $text = '';
1301  } else {
1302  list( , $tail, $text ) = $q;
1303  }
1304  }
1305 
1306  $matches[$marker] = [ $element,
1307  $content,
1308  Sanitizer::decodeTagAttributes( $attributes ),
1309  "<$element$attributes$close$content$tail" ];
1310  }
1311  return $stripped;
1312  }
1313 
1319  public function getStripList() {
1320  return $this->mStripList;
1321  }
1322 
1327  public function getStripState() {
1328  return $this->mStripState;
1329  }
1330 
1340  public function insertStripItem( $text ) {
1341  $marker = self::MARKER_PREFIX . "-item-{$this->mMarkerIndex}-" . self::MARKER_SUFFIX;
1342  $this->mMarkerIndex++;
1343  $this->mStripState->addGeneral( $marker, $text );
1344  return $marker;
1345  }
1346 
1353  private function handleTables( $text ) {
1354  $lines = StringUtils::explode( "\n", $text );
1355  $out = '';
1356  $td_history = []; # Is currently a td tag open?
1357  $last_tag_history = []; # Save history of last lag activated (td, th or caption)
1358  $tr_history = []; # Is currently a tr tag open?
1359  $tr_attributes = []; # history of tr attributes
1360  $has_opened_tr = []; # Did this table open a <tr> element?
1361  $indent_level = 0; # indent level of the table
1362 
1363  foreach ( $lines as $outLine ) {
1364  $line = trim( $outLine );
1365 
1366  if ( $line === '' ) { # empty line, go to next line
1367  $out .= $outLine . "\n";
1368  continue;
1369  }
1370 
1371  $first_character = $line[0];
1372  $first_two = substr( $line, 0, 2 );
1373  $matches = [];
1374 
1375  if ( preg_match( '/^(:*)\s*\{\|(.*)$/', $line, $matches ) ) {
1376  # First check if we are starting a new table
1377  $indent_level = strlen( $matches[1] );
1378 
1379  $attributes = $this->mStripState->unstripBoth( $matches[2] );
1380  $attributes = Sanitizer::fixTagAttributes( $attributes, 'table' );
1381 
1382  $outLine = str_repeat( '<dl><dd>', $indent_level ) . "<table{$attributes}>";
1383  array_push( $td_history, false );
1384  array_push( $last_tag_history, '' );
1385  array_push( $tr_history, false );
1386  array_push( $tr_attributes, '' );
1387  array_push( $has_opened_tr, false );
1388  } elseif ( count( $td_history ) == 0 ) {
1389  # Don't do any of the following
1390  $out .= $outLine . "\n";
1391  continue;
1392  } elseif ( $first_two === '|}' ) {
1393  # We are ending a table
1394  $line = '</table>' . substr( $line, 2 );
1395  $last_tag = array_pop( $last_tag_history );
1396 
1397  if ( !array_pop( $has_opened_tr ) ) {
1398  $line = "<tr><td></td></tr>{$line}";
1399  }
1400 
1401  if ( array_pop( $tr_history ) ) {
1402  $line = "</tr>{$line}";
1403  }
1404 
1405  if ( array_pop( $td_history ) ) {
1406  $line = "</{$last_tag}>{$line}";
1407  }
1408  array_pop( $tr_attributes );
1409  if ( $indent_level > 0 ) {
1410  $outLine = rtrim( $line ) . str_repeat( '</dd></dl>', $indent_level );
1411  } else {
1412  $outLine = $line;
1413  }
1414  } elseif ( $first_two === '|-' ) {
1415  # Now we have a table row
1416  $line = preg_replace( '#^\|-+#', '', $line );
1417 
1418  # Whats after the tag is now only attributes
1419  $attributes = $this->mStripState->unstripBoth( $line );
1420  $attributes = Sanitizer::fixTagAttributes( $attributes, 'tr' );
1421  array_pop( $tr_attributes );
1422  array_push( $tr_attributes, $attributes );
1423 
1424  $line = '';
1425  $last_tag = array_pop( $last_tag_history );
1426  array_pop( $has_opened_tr );
1427  array_push( $has_opened_tr, true );
1428 
1429  if ( array_pop( $tr_history ) ) {
1430  $line = '</tr>';
1431  }
1432 
1433  if ( array_pop( $td_history ) ) {
1434  $line = "</{$last_tag}>{$line}";
1435  }
1436 
1437  $outLine = $line;
1438  array_push( $tr_history, false );
1439  array_push( $td_history, false );
1440  array_push( $last_tag_history, '' );
1441  } elseif ( $first_character === '|'
1442  || $first_character === '!'
1443  || $first_two === '|+'
1444  ) {
1445  # This might be cell elements, td, th or captions
1446  if ( $first_two === '|+' ) {
1447  $first_character = '+';
1448  $line = substr( $line, 2 );
1449  } else {
1450  $line = substr( $line, 1 );
1451  }
1452 
1453  // Implies both are valid for table headings.
1454  if ( $first_character === '!' ) {
1455  $line = StringUtils::replaceMarkup( '!!', '||', $line );
1456  }
1457 
1458  # Split up multiple cells on the same line.
1459  # FIXME : This can result in improper nesting of tags processed
1460  # by earlier parser steps.
1461  $cells = explode( '||', $line );
1462 
1463  $outLine = '';
1464 
1465  # Loop through each table cell
1466  foreach ( $cells as $cell ) {
1467  $previous = '';
1468  if ( $first_character !== '+' ) {
1469  $tr_after = array_pop( $tr_attributes );
1470  if ( !array_pop( $tr_history ) ) {
1471  $previous = "<tr{$tr_after}>\n";
1472  }
1473  array_push( $tr_history, true );
1474  array_push( $tr_attributes, '' );
1475  array_pop( $has_opened_tr );
1476  array_push( $has_opened_tr, true );
1477  }
1478 
1479  $last_tag = array_pop( $last_tag_history );
1480 
1481  if ( array_pop( $td_history ) ) {
1482  $previous = "</{$last_tag}>\n{$previous}";
1483  }
1484 
1485  if ( $first_character === '|' ) {
1486  $last_tag = 'td';
1487  } elseif ( $first_character === '!' ) {
1488  $last_tag = 'th';
1489  } elseif ( $first_character === '+' ) {
1490  $last_tag = 'caption';
1491  } else {
1492  $last_tag = '';
1493  }
1494 
1495  array_push( $last_tag_history, $last_tag );
1496 
1497  # A cell could contain both parameters and data
1498  $cell_data = explode( '|', $cell, 2 );
1499 
1500  # T2553: Note that a '|' inside an invalid link should not
1501  # be mistaken as delimiting cell parameters
1502  # Bug T153140: Neither should language converter markup.
1503  if ( preg_match( '/\[\[|-\{/', $cell_data[0] ) === 1 ) {
1504  $cell = "{$previous}<{$last_tag}>" . trim( $cell );
1505  } elseif ( count( $cell_data ) == 1 ) {
1506  // Whitespace in cells is trimmed
1507  $cell = "{$previous}<{$last_tag}>" . trim( $cell_data[0] );
1508  } else {
1509  $attributes = $this->mStripState->unstripBoth( $cell_data[0] );
1510  $attributes = Sanitizer::fixTagAttributes( $attributes, $last_tag );
1511  // Whitespace in cells is trimmed
1512  $cell = "{$previous}<{$last_tag}{$attributes}>" . trim( $cell_data[1] );
1513  }
1514 
1515  $outLine .= $cell;
1516  array_push( $td_history, true );
1517  }
1518  }
1519  $out .= $outLine . "\n";
1520  }
1521 
1522  # Closing open td, tr && table
1523  while ( count( $td_history ) > 0 ) {
1524  if ( array_pop( $td_history ) ) {
1525  $out .= "</td>\n";
1526  }
1527  if ( array_pop( $tr_history ) ) {
1528  $out .= "</tr>\n";
1529  }
1530  if ( !array_pop( $has_opened_tr ) ) {
1531  $out .= "<tr><td></td></tr>\n";
1532  }
1533 
1534  $out .= "</table>\n";
1535  }
1536 
1537  # Remove trailing line-ending (b/c)
1538  if ( substr( $out, -1 ) === "\n" ) {
1539  $out = substr( $out, 0, -1 );
1540  }
1541 
1542  # special case: don't return empty table
1543  if ( $out === "<table>\n<tr><td></td></tr>\n</table>" ) {
1544  $out = '';
1545  }
1546 
1547  return $out;
1548  }
1549 
1563  public function internalParse( $text, $isMain = true, $frame = false ) {
1564  $origText = $text;
1565 
1566  # Hook to suspend the parser in this state
1567  if ( !$this->hookRunner->onParserBeforeInternalParse( $this, $text, $this->mStripState ) ) {
1568  return $text;
1569  }
1570 
1571  # if $frame is provided, then use $frame for replacing any variables
1572  if ( $frame ) {
1573  # use frame depth to infer how include/noinclude tags should be handled
1574  # depth=0 means this is the top-level document; otherwise it's an included document
1575  if ( !$frame->depth ) {
1576  $flag = 0;
1577  } else {
1579  }
1580  $dom = $this->preprocessToDom( $text, $flag );
1581  $text = $frame->expand( $dom );
1582  } else {
1583  # if $frame is not provided, then use old-style replaceVariables
1584  $text = $this->replaceVariables( $text );
1585  }
1586 
1587  $this->hookRunner->onInternalParseBeforeSanitize( $this, $text, $this->mStripState );
1588  $text = Sanitizer::removeHTMLtags(
1589  $text,
1590  // Callback from the Sanitizer for expanding items found in
1591  // HTML attribute values, so they can be safely tested and escaped.
1592  function ( &$text, $frame = false ) {
1593  $text = $this->replaceVariables( $text, $frame );
1594  $text = $this->mStripState->unstripBoth( $text );
1595  },
1596  false,
1597  [],
1598  []
1599  );
1600  $this->hookRunner->onInternalParseBeforeLinks( $this, $text, $this->mStripState );
1601 
1602  # Tables need to come after variable replacement for things to work
1603  # properly; putting them before other transformations should keep
1604  # exciting things like link expansions from showing up in surprising
1605  # places.
1606  $text = $this->handleTables( $text );
1607 
1608  $text = preg_replace( '/(^|\n)-----*/', '\\1<hr />', $text );
1609 
1610  $text = $this->handleDoubleUnderscore( $text );
1611 
1612  $text = $this->handleHeadings( $text );
1613  $text = $this->handleInternalLinks( $text );
1614  $text = $this->handleAllQuotes( $text );
1615  $text = $this->handleExternalLinks( $text );
1616 
1617  # handleInternalLinks may sometimes leave behind
1618  # absolute URLs, which have to be masked to hide them from handleExternalLinks
1619  $text = str_replace( self::MARKER_PREFIX . 'NOPARSE', '', $text );
1620 
1621  $text = $this->handleMagicLinks( $text );
1622  $text = $this->finalizeHeadings( $text, $origText, $isMain );
1623 
1624  return $text;
1625  }
1626 
1633  return $this->languageConverterFactory->getLanguageConverter(
1634  $this->getTargetLanguage()
1635  );
1636  }
1637 
1644  return $this->languageConverterFactory->getLanguageConverter(
1645  $this->getContentLanguage()
1646  );
1647  }
1648 
1656  protected function getHookContainer() {
1657  return $this->hookContainer;
1658  }
1659 
1668  protected function getHookRunner() {
1669  return $this->hookRunner;
1670  }
1671 
1681  private function internalParseHalfParsed( $text, $isMain = true, $linestart = true ) {
1682  $text = $this->mStripState->unstripGeneral( $text );
1683 
1684  $text = BlockLevelPass::doBlockLevels( $text, $linestart );
1685 
1686  $this->replaceLinkHoldersPrivate( $text );
1687 
1695  if ( !( $this->mOptions->getDisableContentConversion()
1696  || isset( $this->mDoubleUnderscores['nocontentconvert'] ) )
1697  && !$this->mOptions->getInterfaceMessage()
1698  ) {
1699  # The position of the convert() call should not be changed. it
1700  # assumes that the links are all replaced and the only thing left
1701  # is the <nowiki> mark.
1702  $text = $this->getTargetLanguageConverter()->convert( $text );
1703  }
1704 
1705  $text = $this->mStripState->unstripNoWiki( $text );
1706 
1707  $text = $this->mStripState->unstripGeneral( $text );
1708 
1709  $text = $this->tidy->tidy( $text, [ Sanitizer::class, 'armorFrenchSpaces' ] );
1710 
1711  if ( $isMain ) {
1712  $this->hookRunner->onParserAfterTidy( $this, $text );
1713  }
1714 
1715  return $text;
1716  }
1717 
1728  private function handleMagicLinks( $text ) {
1729  $prots = wfUrlProtocolsWithoutProtRel();
1730  $urlChar = self::EXT_LINK_URL_CLASS;
1731  $addr = self::EXT_LINK_ADDR;
1732  $space = self::SPACE_NOT_NL; # non-newline space
1733  $spdash = "(?:-|$space)"; # a dash or a non-newline space
1734  $spaces = "$space++"; # possessive match of 1 or more spaces
1735  $text = preg_replace_callback(
1736  '!(?: # Start cases
1737  (<a[ \t\r\n>].*?</a>) | # m[1]: Skip link text
1738  (<.*?>) | # m[2]: Skip stuff inside HTML elements' . "
1739  (\b # m[3]: Free external links
1740  (?i:$prots)
1741  ($addr$urlChar*) # m[4]: Post-protocol path
1742  ) |
1743  \b(?:RFC|PMID) $spaces # m[5]: RFC or PMID, capture number
1744  ([0-9]+)\b |
1745  \bISBN $spaces ( # m[6]: ISBN, capture number
1746  (?: 97[89] $spdash? )? # optional 13-digit ISBN prefix
1747  (?: [0-9] $spdash? ){9} # 9 digits with opt. delimiters
1748  [0-9Xx] # check digit
1749  )\b
1750  )!xu",
1751  [ $this, 'magicLinkCallback' ],
1752  $text
1753  );
1754  return $text;
1755  }
1756 
1762  private function magicLinkCallback( array $m ) {
1763  if ( isset( $m[1] ) && $m[1] !== '' ) {
1764  # Skip anchor
1765  return $m[0];
1766  } elseif ( isset( $m[2] ) && $m[2] !== '' ) {
1767  # Skip HTML element
1768  return $m[0];
1769  } elseif ( isset( $m[3] ) && $m[3] !== '' ) {
1770  # Free external link
1771  return $this->makeFreeExternalLink( $m[0], strlen( $m[4] ) );
1772  } elseif ( isset( $m[5] ) && $m[5] !== '' ) {
1773  # RFC or PMID
1774  if ( substr( $m[0], 0, 3 ) === 'RFC' ) {
1775  if ( !$this->mOptions->getMagicRFCLinks() ) {
1776  return $m[0];
1777  }
1778  $keyword = 'RFC';
1779  $urlmsg = 'rfcurl';
1780  $cssClass = 'mw-magiclink-rfc';
1781  $trackingCat = 'magiclink-tracking-rfc';
1782  $id = $m[5];
1783  } elseif ( substr( $m[0], 0, 4 ) === 'PMID' ) {
1784  if ( !$this->mOptions->getMagicPMIDLinks() ) {
1785  return $m[0];
1786  }
1787  $keyword = 'PMID';
1788  $urlmsg = 'pubmedurl';
1789  $cssClass = 'mw-magiclink-pmid';
1790  $trackingCat = 'magiclink-tracking-pmid';
1791  $id = $m[5];
1792  } else {
1793  // Should never happen
1794  throw new MWException( __METHOD__ . ': unrecognised match type "' .
1795  substr( $m[0], 0, 20 ) . '"' );
1796  }
1797  $url = wfMessage( $urlmsg, $id )->inContentLanguage()->text();
1798  $this->addTrackingCategory( $trackingCat );
1799  return Linker::makeExternalLink(
1800  $url,
1801  "{$keyword} {$id}",
1802  true,
1803  $cssClass,
1804  [],
1805  $this->getTitle()
1806  );
1807  } elseif ( isset( $m[6] ) && $m[6] !== ''
1808  && $this->mOptions->getMagicISBNLinks()
1809  ) {
1810  # ISBN
1811  $isbn = $m[6];
1812  $space = self::SPACE_NOT_NL; # non-newline space
1813  $isbn = preg_replace( "/$space/", ' ', $isbn );
1814  $num = strtr( $isbn, [
1815  '-' => '',
1816  ' ' => '',
1817  'x' => 'X',
1818  ] );
1819  $this->addTrackingCategory( 'magiclink-tracking-isbn' );
1820  return $this->getLinkRenderer()->makeKnownLink(
1821  SpecialPage::getTitleFor( 'Booksources', $num ),
1822  "ISBN $isbn",
1823  [
1824  'class' => 'internal mw-magiclink-isbn',
1825  'title' => false // suppress title attribute
1826  ]
1827  );
1828  } else {
1829  return $m[0];
1830  }
1831  }
1832 
1842  private function makeFreeExternalLink( $url, $numPostProto ) {
1843  $trail = '';
1844 
1845  # The characters '<' and '>' (which were escaped by
1846  # removeHTMLtags()) should not be included in
1847  # URLs, per RFC 2396.
1848  # Make &nbsp; terminate a URL as well (bug T84937)
1849  $m2 = [];
1850  if ( preg_match(
1851  '/&(lt|gt|nbsp|#x0*(3[CcEe]|[Aa]0)|#0*(60|62|160));/',
1852  $url,
1853  $m2,
1854  PREG_OFFSET_CAPTURE
1855  ) ) {
1856  $trail = substr( $url, $m2[0][1] ) . $trail;
1857  $url = substr( $url, 0, $m2[0][1] );
1858  }
1859 
1860  # Move trailing punctuation to $trail
1861  $sep = ',;\.:!?';
1862  # If there is no left bracket, then consider right brackets fair game too
1863  if ( strpos( $url, '(' ) === false ) {
1864  $sep .= ')';
1865  }
1866 
1867  $urlRev = strrev( $url );
1868  $numSepChars = strspn( $urlRev, $sep );
1869  # Don't break a trailing HTML entity by moving the ; into $trail
1870  # This is in hot code, so use substr_compare to avoid having to
1871  # create a new string object for the comparison
1872  if ( $numSepChars && substr_compare( $url, ";", -$numSepChars, 1 ) === 0 ) {
1873  # more optimization: instead of running preg_match with a $
1874  # anchor, which can be slow, do the match on the reversed
1875  # string starting at the desired offset.
1876  # un-reversed regexp is: /&([a-z]+|#x[\da-f]+|#\d+)$/i
1877  if ( preg_match( '/\G([a-z]+|[\da-f]+x#|\d+#)&/i', $urlRev, $m2, 0, $numSepChars ) ) {
1878  $numSepChars--;
1879  }
1880  }
1881  if ( $numSepChars ) {
1882  $trail = substr( $url, -$numSepChars ) . $trail;
1883  $url = substr( $url, 0, -$numSepChars );
1884  }
1885 
1886  # Verify that we still have a real URL after trail removal, and
1887  # not just lone protocol
1888  if ( strlen( $trail ) >= $numPostProto ) {
1889  return $url . $trail;
1890  }
1891 
1892  $url = Sanitizer::cleanUrl( $url );
1893 
1894  # Is this an external image?
1895  $text = $this->maybeMakeExternalImage( $url );
1896  if ( $text === false ) {
1897  # Not an image, make a link
1898  $text = Linker::makeExternalLink(
1899  $url,
1900  $this->getTargetLanguageConverter()->markNoConversion( $url ),
1901  true,
1902  'free',
1903  $this->getExternalLinkAttribs( $url ),
1904  $this->getTitle()
1905  );
1906  # Register it in the output object...
1907  $this->mOutput->addExternalLink( $url );
1908  }
1909  return $text . $trail;
1910  }
1911 
1918  private function handleHeadings( $text ) {
1919  for ( $i = 6; $i >= 1; --$i ) {
1920  $h = str_repeat( '=', $i );
1921  // Trim non-newline whitespace from headings
1922  // Using \s* will break for: "==\n===\n" and parse as <h2>=</h2>
1923  $text = preg_replace( "/^(?:$h)[ \\t]*(.+?)[ \\t]*(?:$h)\\s*$/m", "<h$i>\\1</h$i>", $text );
1924  }
1925  return $text;
1926  }
1927 
1935  private function handleAllQuotes( $text ) {
1936  $outtext = '';
1937  $lines = StringUtils::explode( "\n", $text );
1938  foreach ( $lines as $line ) {
1939  $outtext .= $this->doQuotes( $line ) . "\n";
1940  }
1941  $outtext = substr( $outtext, 0, -1 );
1942  return $outtext;
1943  }
1944 
1953  public function doQuotes( $text ) {
1954  $arr = preg_split( "/(''+)/", $text, -1, PREG_SPLIT_DELIM_CAPTURE );
1955  $countarr = count( $arr );
1956  if ( $countarr == 1 ) {
1957  return $text;
1958  }
1959 
1960  // First, do some preliminary work. This may shift some apostrophes from
1961  // being mark-up to being text. It also counts the number of occurrences
1962  // of bold and italics mark-ups.
1963  $numbold = 0;
1964  $numitalics = 0;
1965  for ( $i = 1; $i < $countarr; $i += 2 ) {
1966  $thislen = strlen( $arr[$i] );
1967  // If there are ever four apostrophes, assume the first is supposed to
1968  // be text, and the remaining three constitute mark-up for bold text.
1969  // (T15227: ''''foo'''' turns into ' ''' foo ' ''')
1970  if ( $thislen == 4 ) {
1971  $arr[$i - 1] .= "'";
1972  $arr[$i] = "'''";
1973  $thislen = 3;
1974  } elseif ( $thislen > 5 ) {
1975  // If there are more than 5 apostrophes in a row, assume they're all
1976  // text except for the last 5.
1977  // (T15227: ''''''foo'''''' turns into ' ''''' foo ' ''''')
1978  $arr[$i - 1] .= str_repeat( "'", $thislen - 5 );
1979  $arr[$i] = "'''''";
1980  $thislen = 5;
1981  }
1982  // Count the number of occurrences of bold and italics mark-ups.
1983  if ( $thislen == 2 ) {
1984  $numitalics++;
1985  } elseif ( $thislen == 3 ) {
1986  $numbold++;
1987  } elseif ( $thislen == 5 ) {
1988  $numitalics++;
1989  $numbold++;
1990  }
1991  }
1992 
1993  // If there is an odd number of both bold and italics, it is likely
1994  // that one of the bold ones was meant to be an apostrophe followed
1995  // by italics. Which one we cannot know for certain, but it is more
1996  // likely to be one that has a single-letter word before it.
1997  if ( ( $numbold % 2 == 1 ) && ( $numitalics % 2 == 1 ) ) {
1998  $firstsingleletterword = -1;
1999  $firstmultiletterword = -1;
2000  $firstspace = -1;
2001  for ( $i = 1; $i < $countarr; $i += 2 ) {
2002  if ( strlen( $arr[$i] ) == 3 ) {
2003  $x1 = substr( $arr[$i - 1], -1 );
2004  $x2 = substr( $arr[$i - 1], -2, 1 );
2005  if ( $x1 === ' ' ) {
2006  if ( $firstspace == -1 ) {
2007  $firstspace = $i;
2008  }
2009  } elseif ( $x2 === ' ' ) {
2010  $firstsingleletterword = $i;
2011  // if $firstsingleletterword is set, we don't
2012  // look at the other options, so we can bail early.
2013  break;
2014  } elseif ( $firstmultiletterword == -1 ) {
2015  $firstmultiletterword = $i;
2016  }
2017  }
2018  }
2019 
2020  // If there is a single-letter word, use it!
2021  if ( $firstsingleletterword > -1 ) {
2022  $arr[$firstsingleletterword] = "''";
2023  $arr[$firstsingleletterword - 1] .= "'";
2024  } elseif ( $firstmultiletterword > -1 ) {
2025  // If not, but there's a multi-letter word, use that one.
2026  $arr[$firstmultiletterword] = "''";
2027  $arr[$firstmultiletterword - 1] .= "'";
2028  } elseif ( $firstspace > -1 ) {
2029  // ... otherwise use the first one that has neither.
2030  // (notice that it is possible for all three to be -1 if, for example,
2031  // there is only one pentuple-apostrophe in the line)
2032  $arr[$firstspace] = "''";
2033  $arr[$firstspace - 1] .= "'";
2034  }
2035  }
2036 
2037  // Now let's actually convert our apostrophic mush to HTML!
2038  $output = '';
2039  $buffer = '';
2040  $state = '';
2041  $i = 0;
2042  foreach ( $arr as $r ) {
2043  if ( ( $i % 2 ) == 0 ) {
2044  if ( $state === 'both' ) {
2045  $buffer .= $r;
2046  } else {
2047  $output .= $r;
2048  }
2049  } else {
2050  $thislen = strlen( $r );
2051  if ( $thislen == 2 ) {
2052  // two quotes - open or close italics
2053  if ( $state === 'i' ) {
2054  $output .= '</i>';
2055  $state = '';
2056  } elseif ( $state === 'bi' ) {
2057  $output .= '</i>';
2058  $state = 'b';
2059  } elseif ( $state === 'ib' ) {
2060  $output .= '</b></i><b>';
2061  $state = 'b';
2062  } elseif ( $state === 'both' ) {
2063  $output .= '<b><i>' . $buffer . '</i>';
2064  $state = 'b';
2065  } else { // $state can be 'b' or ''
2066  $output .= '<i>';
2067  $state .= 'i';
2068  }
2069  } elseif ( $thislen == 3 ) {
2070  // three quotes - open or close bold
2071  if ( $state === 'b' ) {
2072  $output .= '</b>';
2073  $state = '';
2074  } elseif ( $state === 'bi' ) {
2075  $output .= '</i></b><i>';
2076  $state = 'i';
2077  } elseif ( $state === 'ib' ) {
2078  $output .= '</b>';
2079  $state = 'i';
2080  } elseif ( $state === 'both' ) {
2081  $output .= '<i><b>' . $buffer . '</b>';
2082  $state = 'i';
2083  } else { // $state can be 'i' or ''
2084  $output .= '<b>';
2085  $state .= 'b';
2086  }
2087  } elseif ( $thislen == 5 ) {
2088  // five quotes - open or close both separately
2089  if ( $state === 'b' ) {
2090  $output .= '</b><i>';
2091  $state = 'i';
2092  } elseif ( $state === 'i' ) {
2093  $output .= '</i><b>';
2094  $state = 'b';
2095  } elseif ( $state === 'bi' ) {
2096  $output .= '</i></b>';
2097  $state = '';
2098  } elseif ( $state === 'ib' ) {
2099  $output .= '</b></i>';
2100  $state = '';
2101  } elseif ( $state === 'both' ) {
2102  $output .= '<i><b>' . $buffer . '</b></i>';
2103  $state = '';
2104  } else { // ($state == '')
2105  $buffer = '';
2106  $state = 'both';
2107  }
2108  }
2109  }
2110  $i++;
2111  }
2112  // Now close all remaining tags. Notice that the order is important.
2113  if ( $state === 'b' || $state === 'ib' ) {
2114  $output .= '</b>';
2115  }
2116  if ( $state === 'i' || $state === 'bi' || $state === 'ib' ) {
2117  $output .= '</i>';
2118  }
2119  if ( $state === 'bi' ) {
2120  $output .= '</b>';
2121  }
2122  // There might be lonely ''''', so make sure we have a buffer
2123  if ( $state === 'both' && $buffer ) {
2124  $output .= '<b><i>' . $buffer . '</i></b>';
2125  }
2126  return $output;
2127  }
2128 
2139  private function handleExternalLinks( $text ) {
2140  $bits = preg_split( $this->mExtLinkBracketedRegex, $text, -1, PREG_SPLIT_DELIM_CAPTURE );
2141  // @phan-suppress-next-line PhanTypeComparisonFromArray See phan issue #3161
2142  if ( $bits === false ) {
2143  throw new MWException( "PCRE needs to be compiled with "
2144  . "--enable-unicode-properties in order for MediaWiki to function" );
2145  }
2146  $s = array_shift( $bits );
2147 
2148  $i = 0;
2149  while ( $i < count( $bits ) ) {
2150  $url = $bits[$i++];
2151  $i++; // protocol
2152  $text = $bits[$i++];
2153  $trail = $bits[$i++];
2154 
2155  # The characters '<' and '>' (which were escaped by
2156  # removeHTMLtags()) should not be included in
2157  # URLs, per RFC 2396.
2158  $m2 = [];
2159  if ( preg_match( '/&(lt|gt);/', $url, $m2, PREG_OFFSET_CAPTURE ) ) {
2160  $text = substr( $url, $m2[0][1] ) . ' ' . $text;
2161  $url = substr( $url, 0, $m2[0][1] );
2162  }
2163 
2164  # If the link text is an image URL, replace it with an <img> tag
2165  # This happened by accident in the original parser, but some people used it extensively
2166  $img = $this->maybeMakeExternalImage( $text );
2167  if ( $img !== false ) {
2168  $text = $img;
2169  }
2170 
2171  $dtrail = '';
2172 
2173  # Set linktype for CSS
2174  $linktype = 'text';
2175 
2176  # No link text, e.g. [http://domain.tld/some.link]
2177  if ( $text == '' ) {
2178  # Autonumber
2179  $langObj = $this->getTargetLanguage();
2180  $text = '[' . $langObj->formatNum( ++$this->mAutonumber ) . ']';
2181  $linktype = 'autonumber';
2182  } else {
2183  # Have link text, e.g. [http://domain.tld/some.link text]s
2184  # Check for trail
2185  list( $dtrail, $trail ) = Linker::splitTrail( $trail );
2186  }
2187 
2188  // Excluding protocol-relative URLs may avoid many false positives.
2189  if ( preg_match( '/^(?:' . wfUrlProtocolsWithoutProtRel() . ')/', $text ) ) {
2190  $text = $this->getTargetLanguageConverter()->markNoConversion( $text );
2191  }
2192 
2193  $url = Sanitizer::cleanUrl( $url );
2194 
2195  # Use the encoded URL
2196  # This means that users can paste URLs directly into the text
2197  # Funny characters like ö aren't valid in URLs anyway
2198  # This was changed in August 2004
2199  // @phan-suppress-next-line SecurityCheck-XSS using false for escape is valid here
2200  $s .= Linker::makeExternalLink( $url, $text, false, $linktype,
2201  $this->getExternalLinkAttribs( $url ), $this->getTitle() ) . $dtrail . $trail;
2202 
2203  # Register link in the output object.
2204  $this->mOutput->addExternalLink( $url );
2205  }
2206 
2207  return $s;
2208  }
2209 
2220  public static function getExternalLinkRel( $url = false, LinkTarget $title = null ) {
2222  $ns = $title ? $title->getNamespace() : false;
2223  if ( $wgNoFollowLinks && !in_array( $ns, $wgNoFollowNsExceptions )
2225  ) {
2226  return 'nofollow';
2227  }
2228  return null;
2229  }
2230 
2242  public function getExternalLinkAttribs( $url ) {
2243  $attribs = [];
2244  $rel = self::getExternalLinkRel( $url, $this->getTitle() );
2245 
2246  $target = $this->mOptions->getExternalLinkTarget();
2247  if ( $target ) {
2248  $attribs['target'] = $target;
2249  if ( !in_array( $target, [ '_self', '_parent', '_top' ] ) ) {
2250  // T133507. New windows can navigate parent cross-origin.
2251  // Including noreferrer due to lacking browser
2252  // support of noopener. Eventually noreferrer should be removed.
2253  if ( $rel !== '' ) {
2254  $rel .= ' ';
2255  }
2256  $rel .= 'noreferrer noopener';
2257  }
2258  }
2259  $attribs['rel'] = $rel;
2260  return $attribs;
2261  }
2262 
2273  public static function normalizeLinkUrl( $url ) {
2274  # Test for RFC 3986 IPv6 syntax
2275  $scheme = '[a-z][a-z0-9+.-]*:';
2276  $userinfo = '(?:[a-z0-9\-._~!$&\'()*+,;=:]|%[0-9a-f]{2})*';
2277  $ipv6Host = '\\[((?:[0-9a-f:]|%3[0-A]|%[46][1-6])+)\\]';
2278  if ( preg_match( "<^(?:{$scheme})?//(?:{$userinfo}@)?{$ipv6Host}(?:[:/?#].*|)$>i", $url, $m ) &&
2279  IPUtils::isValid( rawurldecode( $m[1] ) )
2280  ) {
2281  $isIPv6 = rawurldecode( $m[1] );
2282  } else {
2283  $isIPv6 = false;
2284  }
2285 
2286  # Make sure unsafe characters are encoded
2287  $url = preg_replace_callback(
2288  '/[\x00-\x20"<>\[\\\\\]^`{|}\x7F-\xFF]/',
2289  static function ( $m ) {
2290  return rawurlencode( $m[0] );
2291  },
2292  $url
2293  );
2294 
2295  $ret = '';
2296  $end = strlen( $url );
2297 
2298  # Fragment part - 'fragment'
2299  $start = strpos( $url, '#' );
2300  if ( $start !== false && $start < $end ) {
2302  substr( $url, $start, $end - $start ), '"#%<>[\]^`{|}' ) . $ret;
2303  $end = $start;
2304  }
2305 
2306  # Query part - 'query' minus &=+;
2307  $start = strpos( $url, '?' );
2308  if ( $start !== false && $start < $end ) {
2310  substr( $url, $start, $end - $start ), '"#%<>[\]^`{|}&=+;' ) . $ret;
2311  $end = $start;
2312  }
2313 
2314  # Scheme and path part - 'pchar'
2315  # (we assume no userinfo or encoded colons in the host)
2317  substr( $url, 0, $end ), '"#%<>[\]^`{|}/?' ) . $ret;
2318 
2319  # Fix IPv6 syntax
2320  if ( $isIPv6 !== false ) {
2321  $ipv6Host = "%5B({$isIPv6})%5D";
2322  $ret = preg_replace(
2323  "<^((?:{$scheme})?//(?:{$userinfo}@)?){$ipv6Host}(?=[:/?#]|$)>i",
2324  "$1[$2]",
2325  $ret
2326  );
2327  }
2328 
2329  return $ret;
2330  }
2331 
2332  private static function normalizeUrlComponent( $component, $unsafe ) {
2333  $callback = static function ( $matches ) use ( $unsafe ) {
2334  $char = urldecode( $matches[0] );
2335  $ord = ord( $char );
2336  if ( $ord > 32 && $ord < 127 && strpos( $unsafe, $char ) === false ) {
2337  # Unescape it
2338  return $char;
2339  } else {
2340  # Leave it escaped, but use uppercase for a-f
2341  return strtoupper( $matches[0] );
2342  }
2343  };
2344  return preg_replace_callback( '/%[0-9A-Fa-f]{2}/', $callback, $component );
2345  }
2346 
2355  private function maybeMakeExternalImage( $url ) {
2356  $imagesfrom = $this->mOptions->getAllowExternalImagesFrom();
2357  $imagesexception = !empty( $imagesfrom );
2358  $text = false;
2359  # $imagesfrom could be either a single string or an array of strings, parse out the latter
2360  if ( $imagesexception && is_array( $imagesfrom ) ) {
2361  $imagematch = false;
2362  foreach ( $imagesfrom as $match ) {
2363  if ( strpos( $url, $match ) === 0 ) {
2364  $imagematch = true;
2365  break;
2366  }
2367  }
2368  } elseif ( $imagesexception ) {
2369  $imagematch = ( strpos( $url, $imagesfrom ) === 0 );
2370  } else {
2371  $imagematch = false;
2372  }
2373 
2374  if ( $this->mOptions->getAllowExternalImages()
2375  || ( $imagesexception && $imagematch )
2376  ) {
2377  if ( preg_match( self::EXT_IMAGE_REGEX, $url ) ) {
2378  # Image found
2379  $text = Linker::makeExternalImage( $url );
2380  }
2381  }
2382  if ( !$text && $this->mOptions->getEnableImageWhitelist()
2383  && preg_match( self::EXT_IMAGE_REGEX, $url )
2384  ) {
2385  $whitelist = explode(
2386  "\n",
2387  wfMessage( 'external_image_whitelist' )->inContentLanguage()->text()
2388  );
2389 
2390  foreach ( $whitelist as $entry ) {
2391  # Sanitize the regex fragment, make it case-insensitive, ignore blank entries/comments
2392  if ( strpos( $entry, '#' ) === 0 || $entry === '' ) {
2393  continue;
2394  }
2395  // @phan-suppress-next-line SecurityCheck-ReDoS preg_quote is not wanted here
2396  if ( preg_match( '/' . str_replace( '/', '\\/', $entry ) . '/i', $url ) ) {
2397  # Image matches a whitelist entry
2398  $text = Linker::makeExternalImage( $url );
2399  break;
2400  }
2401  }
2402  }
2403  return $text;
2404  }
2405 
2413  private function handleInternalLinks( $text ) {
2414  $this->mLinkHolders->merge( $this->handleInternalLinks2( $text ) );
2415  return $text;
2416  }
2417 
2423  private function handleInternalLinks2( &$s ) {
2424  static $tc = false, $e1, $e1_img;
2425  # the % is needed to support urlencoded titles as well
2426  if ( !$tc ) {
2427  $tc = Title::legalChars() . '#%';
2428  # Match a link having the form [[namespace:link|alternate]]trail
2429  $e1 = "/^([{$tc}]+)(?:\\|(.+?))?]](.*)\$/sD";
2430  # Match cases where there is no "]]", which might still be images
2431  $e1_img = "/^([{$tc}]+)\\|(.*)\$/sD";
2432  }
2433 
2434  $holders = new LinkHolderArray(
2435  $this,
2436  $this->getContentLanguageConverter(),
2437  $this->getHookContainer() );
2438 
2439  # split the entire text string on occurrences of [[
2440  $a = StringUtils::explode( '[[', ' ' . $s );
2441  # get the first element (all text up to first [[), and remove the space we added
2442  $s = $a->current();
2443  $a->next();
2444  $line = $a->current(); # Workaround for broken ArrayIterator::next() that returns "void"
2445  $s = substr( $s, 1 );
2446 
2447  $nottalk = !$this->getTitle()->isTalkPage();
2448 
2449  $useLinkPrefixExtension = $this->getTargetLanguage()->linkPrefixExtension();
2450  $e2 = null;
2451  if ( $useLinkPrefixExtension ) {
2452  # Match the end of a line for a word that's not followed by whitespace,
2453  # e.g. in the case of 'The Arab al[[Razi]]', 'al' will be matched
2454  $charset = $this->contLang->linkPrefixCharset();
2455  $e2 = "/^((?>.*[^$charset]|))(.+)$/sDu";
2456  $m = [];
2457  if ( preg_match( $e2, $s, $m ) ) {
2458  $first_prefix = $m[2];
2459  } else {
2460  $first_prefix = false;
2461  }
2462  } else {
2463  $prefix = '';
2464  }
2465 
2466  # Some namespaces don't allow subpages
2467  $useSubpages = $this->nsInfo->hasSubpages(
2468  $this->getTitle()->getNamespace()
2469  );
2470 
2471  # Loop for each link
2472  for ( ; $line !== false && $line !== null; $a->next(), $line = $a->current() ) {
2473  # Check for excessive memory usage
2474  if ( $holders->isBig() ) {
2475  # Too big
2476  # Do the existence check, replace the link holders and clear the array
2477  $holders->replace( $s );
2478  $holders->clear();
2479  }
2480 
2481  if ( $useLinkPrefixExtension ) {
2482  if ( preg_match( $e2, $s, $m ) ) {
2483  list( , $s, $prefix ) = $m;
2484  } else {
2485  $prefix = '';
2486  }
2487  # first link
2488  if ( $first_prefix ) {
2489  $prefix = $first_prefix;
2490  $first_prefix = false;
2491  }
2492  }
2493 
2494  $might_be_img = false;
2495 
2496  if ( preg_match( $e1, $line, $m ) ) { # page with normal text or alt
2497  $text = $m[2];
2498  # If we get a ] at the beginning of $m[3] that means we have a link that's something like:
2499  # [[Image:Foo.jpg|[http://example.com desc]]] <- having three ] in a row fucks up,
2500  # the real problem is with the $e1 regex
2501  # See T1500.
2502  # Still some problems for cases where the ] is meant to be outside punctuation,
2503  # and no image is in sight. See T4095.
2504  if ( $text !== ''
2505  && substr( $m[3], 0, 1 ) === ']'
2506  && strpos( $text, '[' ) !== false
2507  ) {
2508  $text .= ']'; # so that handleExternalLinks($text) works later
2509  $m[3] = substr( $m[3], 1 );
2510  }
2511  # fix up urlencoded title texts
2512  if ( strpos( $m[1], '%' ) !== false ) {
2513  # Should anchors '#' also be rejected?
2514  $m[1] = str_replace( [ '<', '>' ], [ '&lt;', '&gt;' ], rawurldecode( $m[1] ) );
2515  }
2516  $trail = $m[3];
2517  } elseif ( preg_match( $e1_img, $line, $m ) ) {
2518  # Invalid, but might be an image with a link in its caption
2519  $might_be_img = true;
2520  $text = $m[2];
2521  if ( strpos( $m[1], '%' ) !== false ) {
2522  $m[1] = str_replace( [ '<', '>' ], [ '&lt;', '&gt;' ], rawurldecode( $m[1] ) );
2523  }
2524  $trail = "";
2525  } else { # Invalid form; output directly
2526  $s .= $prefix . '[[' . $line;
2527  continue;
2528  }
2529 
2530  $origLink = ltrim( $m[1], ' ' );
2531 
2532  # Don't allow internal links to pages containing
2533  # PROTO: where PROTO is a valid URL protocol; these
2534  # should be external links.
2535  if ( preg_match( '/^(?i:' . $this->mUrlProtocols . ')/', $origLink ) ) {
2536  $s .= $prefix . '[[' . $line;
2537  continue;
2538  }
2539 
2540  # Make subpage if necessary
2541  if ( $useSubpages ) {
2543  $this->getTitle(), $origLink, $text
2544  );
2545  } else {
2546  $link = $origLink;
2547  }
2548 
2549  // \x7f isn't a default legal title char, so most likely strip
2550  // markers will force us into the "invalid form" path above. But,
2551  // just in case, let's assert that xmlish tags aren't valid in
2552  // the title position.
2553  $unstrip = $this->mStripState->killMarkers( $link );
2554  $noMarkers = ( $unstrip === $link );
2555 
2556  $nt = $noMarkers ? Title::newFromText( $link ) : null;
2557  if ( $nt === null ) {
2558  $s .= $prefix . '[[' . $line;
2559  continue;
2560  }
2561 
2562  $ns = $nt->getNamespace();
2563  $iw = $nt->getInterwiki();
2564 
2565  $noforce = ( substr( $origLink, 0, 1 ) !== ':' );
2566 
2567  if ( $might_be_img ) { # if this is actually an invalid link
2568  if ( $ns === NS_FILE && $noforce ) { # but might be an image
2569  $found = false;
2570  while ( true ) {
2571  # look at the next 'line' to see if we can close it there
2572  $a->next();
2573  $next_line = $a->current();
2574  if ( $next_line === false || $next_line === null ) {
2575  break;
2576  }
2577  $m = explode( ']]', $next_line, 3 );
2578  if ( count( $m ) == 3 ) {
2579  # the first ]] closes the inner link, the second the image
2580  $found = true;
2581  $text .= "[[{$m[0]}]]{$m[1]}";
2582  $trail = $m[2];
2583  break;
2584  } elseif ( count( $m ) == 2 ) {
2585  # if there's exactly one ]] that's fine, we'll keep looking
2586  $text .= "[[{$m[0]}]]{$m[1]}";
2587  } else {
2588  # if $next_line is invalid too, we need look no further
2589  $text .= '[[' . $next_line;
2590  break;
2591  }
2592  }
2593  if ( !$found ) {
2594  # we couldn't find the end of this imageLink, so output it raw
2595  # but don't ignore what might be perfectly normal links in the text we've examined
2596  $holders->merge( $this->handleInternalLinks2( $text ) );
2597  $s .= "{$prefix}[[$link|$text";
2598  # note: no $trail, because without an end, there *is* no trail
2599  continue;
2600  }
2601  } else { # it's not an image, so output it raw
2602  $s .= "{$prefix}[[$link|$text";
2603  # note: no $trail, because without an end, there *is* no trail
2604  continue;
2605  }
2606  }
2607 
2608  $wasblank = ( $text == '' );
2609  if ( $wasblank ) {
2610  $text = $link;
2611  if ( !$noforce ) {
2612  # Strip off leading ':'
2613  $text = substr( $text, 1 );
2614  }
2615  } else {
2616  # T6598 madness. Handle the quotes only if they come from the alternate part
2617  # [[Lista d''e paise d''o munno]] -> <a href="...">Lista d''e paise d''o munno</a>
2618  # [[Criticism of Harry Potter|Criticism of ''Harry Potter'']]
2619  # -> <a href="Criticism of Harry Potter">Criticism of <i>Harry Potter</i></a>
2620  $text = $this->doQuotes( $text );
2621  }
2622 
2623  # Link not escaped by : , create the various objects
2624  if ( $noforce && !$nt->wasLocalInterwiki() ) {
2625  # Interwikis
2626  if (
2627  $iw && $this->mOptions->getInterwikiMagic() && $nottalk && (
2628  MediaWikiServices::getInstance()->getLanguageNameUtils()
2629  ->getLanguageName(
2630  $iw,
2631  LanguageNameUtils::AUTONYMS,
2632  LanguageNameUtils::DEFINED
2633  )
2634  || in_array( $iw, $this->svcOptions->get( 'ExtraInterlanguageLinkPrefixes' ) )
2635  )
2636  ) {
2637  # T26502: filter duplicates
2638  if ( !isset( $this->mLangLinkLanguages[$iw] ) ) {
2639  $this->mLangLinkLanguages[$iw] = true;
2640  $this->mOutput->addLanguageLink( $nt->getFullText() );
2641  }
2642 
2646  $s = rtrim( $s . $prefix ) . $trail; # T175416
2647  continue;
2648  }
2649 
2650  if ( $ns === NS_FILE ) {
2651  if ( !$this->badFileLookup->isBadFile( $nt->getDBkey(), $this->getTitle() ) ) {
2652  if ( $wasblank ) {
2653  # if no parameters were passed, $text
2654  # becomes something like "File:Foo.png",
2655  # which we don't want to pass on to the
2656  # image generator
2657  $text = '';
2658  } else {
2659  # recursively parse links inside the image caption
2660  # actually, this will parse them in any other parameters, too,
2661  # but it might be hard to fix that, and it doesn't matter ATM
2662  $text = $this->handleExternalLinks( $text );
2663  $holders->merge( $this->handleInternalLinks2( $text ) );
2664  }
2665  # cloak any absolute URLs inside the image markup, so handleExternalLinks() won't touch them
2666  $s .= $prefix . $this->armorLinks(
2667  $this->makeImage( $nt, $text, $holders ) ) . $trail;
2668  continue;
2669  }
2670  } elseif ( $ns === NS_CATEGORY ) {
2674  $s = rtrim( $s . $prefix ) . $trail; # T2087, T87753
2675 
2676  if ( $wasblank ) {
2677  $sortkey = $this->getDefaultSort();
2678  } else {
2679  $sortkey = $text;
2680  }
2681  $sortkey = Sanitizer::decodeCharReferences( $sortkey );
2682  $sortkey = str_replace( "\n", '', $sortkey );
2683  $sortkey = $this->getTargetLanguageConverter()->convertCategoryKey( $sortkey );
2684  $this->mOutput->addCategory( $nt->getDBkey(), $sortkey );
2685 
2686  continue;
2687  }
2688  }
2689 
2690  # Self-link checking. For some languages, variants of the title are checked in
2691  # LinkHolderArray::doVariants() to allow batching the existence checks necessary
2692  # for linking to a different variant.
2693  if ( $ns !== NS_SPECIAL && $nt->equals( $this->getTitle() ) && !$nt->hasFragment() ) {
2694  $s .= $prefix . Linker::makeSelfLinkObj( $nt, $text, '', $trail );
2695  continue;
2696  }
2697 
2698  # NS_MEDIA is a pseudo-namespace for linking directly to a file
2699  # @todo FIXME: Should do batch file existence checks, see comment below
2700  if ( $ns === NS_MEDIA ) {
2701  # Give extensions a chance to select the file revision for us
2702  $options = [];
2703  $descQuery = false;
2704  $this->hookRunner->onBeforeParserFetchFileAndTitle(
2705  $this, $nt, $options, $descQuery );
2706  # Fetch and register the file (file title may be different via hooks)
2707  list( $file, $nt ) = $this->fetchFileAndTitle( $nt, $options );
2708  # Cloak with NOPARSE to avoid replacement in handleExternalLinks
2709  $s .= $prefix . $this->armorLinks(
2710  Linker::makeMediaLinkFile( $nt, $file, $text ) ) . $trail;
2711  continue;
2712  }
2713 
2714  # Some titles, such as valid special pages or files in foreign repos, should
2715  # be shown as bluelinks even though they're not included in the page table
2716  # @todo FIXME: isAlwaysKnown() can be expensive for file links; we should really do
2717  # batch file existence checks for NS_FILE and NS_MEDIA
2718  if ( $iw == '' && $nt->isAlwaysKnown() ) {
2719  $this->mOutput->addLink( $nt );
2720  $s .= $this->makeKnownLinkHolder( $nt, $text, $trail, $prefix );
2721  } else {
2722  # Links will be added to the output link list after checking
2723  $s .= $holders->makeHolder( $nt, $text, $trail, $prefix );
2724  }
2725  }
2726  return $holders;
2727  }
2728 
2742  private function makeKnownLinkHolder( LinkTarget $nt, $text = '', $trail = '', $prefix = '' ) {
2743  list( $inside, $trail ) = Linker::splitTrail( $trail );
2744 
2745  if ( $text == '' ) {
2746  $text = htmlspecialchars( $this->titleFormatter->getPrefixedText( $nt ) );
2747  }
2748 
2749  $link = $this->getLinkRenderer()->makeKnownLink(
2750  $nt, new HtmlArmor( "$prefix$text$inside" )
2751  );
2752 
2753  return $this->armorLinks( $link ) . $trail;
2754  }
2755 
2766  private function armorLinks( $text ) {
2767  return preg_replace( '/\b((?i)' . $this->mUrlProtocols . ')/',
2768  self::MARKER_PREFIX . "NOPARSE$1", $text );
2769  }
2770 
2780  public function doBlockLevels( $text, $linestart ) {
2781  wfDeprecated( __METHOD__, '1.35' );
2782  return BlockLevelPass::doBlockLevels( $text, $linestart );
2783  }
2784 
2793  private function expandMagicVariable( $index, $frame = false ) {
2798  if (
2799  $this->hookRunner->onParserGetVariableValueVarCache( $this, $this->mVarCache ) &&
2800  isset( $this->mVarCache[$index] )
2801  ) {
2802  return $this->mVarCache[$index];
2803  }
2804 
2805  $ts = wfTimestamp( TS_UNIX, $this->mOptions->getTimestamp() );
2806  $this->hookRunner->onParserGetVariableValueTs( $this, $ts );
2807 
2808  $value = CoreMagicVariables::expand(
2809  $this, $index, $ts, $this->nsInfo, $this->svcOptions, $this->logger
2810  );
2811 
2812  if ( $value === null ) {
2813  // Not a defined core magic word
2814  $ret = null;
2815  $originalIndex = $index;
2816  $this->hookRunner->onParserGetVariableValueSwitch( $this,
2817  $this->mVarCache, $index, $ret, $frame );
2818  if ( $index !== $originalIndex ) {
2820  'A ParserGetVariableValueSwitch hook handler modified $index, ' .
2821  'this is deprecated since MediaWiki 1.35',
2822  '1.35', false, false
2823  );
2824  }
2825  if ( !isset( $this->mVarCache[$originalIndex] ) ||
2826  $this->mVarCache[$originalIndex] !== $ret ) {
2828  'A ParserGetVariableValueSwitch hook handler bypassed the cache, ' .
2829  'this is deprecated since MediaWiki 1.35', '1.35', false, false
2830  );
2831  }// FIXME: in the future, don't give this hook unrestricted
2832  // access to mVarCache; we can cache it ourselves by falling
2833  // through here.
2834  return $ret;
2835  }
2836 
2837  $this->mVarCache[$index] = $value;
2838 
2839  return $value;
2840  }
2841 
2846  private function initializeVariables() {
2847  $variableIDs = $this->magicWordFactory->getVariableIDs();
2848  $substIDs = $this->magicWordFactory->getSubstIDs();
2849 
2850  $this->mVariables = $this->magicWordFactory->newArray( $variableIDs );
2851  $this->mSubstWords = $this->magicWordFactory->newArray( $substIDs );
2852  }
2853 
2872  public function preprocessToDom( $text, $flags = 0 ) {
2873  return $this->getPreprocessor()->preprocessToObj( $text, $flags );
2874  }
2875 
2897  public function replaceVariables( $text, $frame = false, $argsOnly = false ) {
2898  # Is there any text? Also, Prevent too big inclusions!
2899  $textSize = strlen( $text );
2900  if ( $textSize < 1 || $textSize > $this->mOptions->getMaxIncludeSize() ) {
2901  return $text;
2902  }
2903 
2904  if ( $frame === false ) {
2905  $frame = $this->getPreprocessor()->newFrame();
2906  } elseif ( !( $frame instanceof PPFrame ) ) {
2907  $this->logger->debug(
2908  __METHOD__ . " called using plain parameters instead of " .
2909  "a PPFrame instance. Creating custom frame."
2910  );
2911  $frame = $this->getPreprocessor()->newCustomFrame( $frame );
2912  }
2913 
2914  $dom = $this->preprocessToDom( $text );
2915  $flags = $argsOnly ? PPFrame::NO_TEMPLATES : 0;
2916  $text = $frame->expand( $dom, $flags );
2917 
2918  return $text;
2919  }
2920 
2948  public function limitationWarn( $limitationType, $current = '', $max = '' ) {
2949  # does no harm if $current and $max are present but are unnecessary for the message
2950  # Not doing ->inLanguage( $this->mOptions->getUserLangObj() ), since this is shown
2951  # only during preview, and that would split the parser cache unnecessarily.
2952  $warning = wfMessage( "$limitationType-warning" )->numParams( $current, $max )
2953  ->text();
2954  $this->mOutput->addWarning( $warning );
2955  $this->addTrackingCategory( "$limitationType-category" );
2956  }
2957 
2971  public function braceSubstitution( array $piece, PPFrame $frame ) {
2972  // Flags
2973 
2974  // $text has been filled
2975  $found = false;
2976  // wiki markup in $text should be escaped
2977  $nowiki = false;
2978  // $text is HTML, armour it against wikitext transformation
2979  $isHTML = false;
2980  // Force interwiki transclusion to be done in raw mode not rendered
2981  $forceRawInterwiki = false;
2982  // $text is a DOM node needing expansion in a child frame
2983  $isChildObj = false;
2984  // $text is a DOM node needing expansion in the current frame
2985  $isLocalObj = false;
2986 
2987  # Title object, where $text came from
2988  $title = false;
2989 
2990  # $part1 is the bit before the first |, and must contain only title characters.
2991  # Various prefixes will be stripped from it later.
2992  $titleWithSpaces = $frame->expand( $piece['title'] );
2993  $part1 = trim( $titleWithSpaces );
2994  $titleText = false;
2995 
2996  # Original title text preserved for various purposes
2997  $originalTitle = $part1;
2998 
2999  # $args is a list of argument nodes, starting from index 0, not including $part1
3000  # @todo FIXME: If piece['parts'] is null then the call to getLength()
3001  # below won't work b/c this $args isn't an object
3002  $args = ( $piece['parts'] == null ) ? [] : $piece['parts'];
3003 
3004  $profileSection = null; // profile templates
3005 
3006  $sawDeprecatedTemplateEquals = false; // T91154
3007 
3008  # SUBST
3009  // @phan-suppress-next-line PhanImpossibleCondition
3010  if ( !$found ) {
3011  $substMatch = $this->mSubstWords->matchStartAndRemove( $part1 );
3012 
3013  # Possibilities for substMatch: "subst", "safesubst" or FALSE
3014  # Decide whether to expand template or keep wikitext as-is.
3015  if ( $this->ot['wiki'] ) {
3016  if ( $substMatch === false ) {
3017  $literal = true; # literal when in PST with no prefix
3018  } else {
3019  $literal = false; # expand when in PST with subst: or safesubst:
3020  }
3021  } else {
3022  if ( $substMatch == 'subst' ) {
3023  $literal = true; # literal when not in PST with plain subst:
3024  } else {
3025  $literal = false; # expand when not in PST with safesubst: or no prefix
3026  }
3027  }
3028  if ( $literal ) {
3029  $text = $frame->virtualBracketedImplode( '{{', '|', '}}', $titleWithSpaces, $args );
3030  $isLocalObj = true;
3031  $found = true;
3032  }
3033  }
3034 
3035  # Variables
3036  if ( !$found && $args->getLength() == 0 ) {
3037  $id = $this->mVariables->matchStartToEnd( $part1 );
3038  if ( $id !== false ) {
3039  $text = $this->expandMagicVariable( $id, $frame );
3040  if ( $this->magicWordFactory->getCacheTTL( $id ) > -1 ) {
3041  $this->mOutput->updateCacheExpiry(
3042  $this->magicWordFactory->getCacheTTL( $id ) );
3043  }
3044  $found = true;
3045  }
3046  }
3047 
3048  # MSG, MSGNW and RAW
3049  if ( !$found ) {
3050  # Check for MSGNW:
3051  $mwMsgnw = $this->magicWordFactory->get( 'msgnw' );
3052  if ( $mwMsgnw->matchStartAndRemove( $part1 ) ) {
3053  $nowiki = true;
3054  } else {
3055  # Remove obsolete MSG:
3056  $mwMsg = $this->magicWordFactory->get( 'msg' );
3057  $mwMsg->matchStartAndRemove( $part1 );
3058  }
3059 
3060  # Check for RAW:
3061  $mwRaw = $this->magicWordFactory->get( 'raw' );
3062  if ( $mwRaw->matchStartAndRemove( $part1 ) ) {
3063  $forceRawInterwiki = true;
3064  }
3065  }
3066 
3067  # Parser functions
3068  if ( !$found ) {
3069  $colonPos = strpos( $part1, ':' );
3070  if ( $colonPos !== false ) {
3071  $func = substr( $part1, 0, $colonPos );
3072  $funcArgs = [ trim( substr( $part1, $colonPos + 1 ) ) ];
3073  $argsLength = $args->getLength();
3074  for ( $i = 0; $i < $argsLength; $i++ ) {
3075  $funcArgs[] = $args->item( $i );
3076  }
3077 
3078  $result = $this->callParserFunction( $frame, $func, $funcArgs );
3079 
3080  // Extract any forwarded flags
3081  if ( isset( $result['title'] ) ) {
3082  $title = $result['title'];
3083  }
3084  if ( isset( $result['found'] ) ) {
3085  $found = $result['found'];
3086  }
3087  if ( array_key_exists( 'text', $result ) ) {
3088  // a string or null
3089  $text = $result['text'];
3090  }
3091  if ( isset( $result['nowiki'] ) ) {
3092  $nowiki = $result['nowiki'];
3093  }
3094  if ( isset( $result['isHTML'] ) ) {
3095  $isHTML = $result['isHTML'];
3096  }
3097  if ( isset( $result['forceRawInterwiki'] ) ) {
3098  $forceRawInterwiki = $result['forceRawInterwiki'];
3099  }
3100  if ( isset( $result['isChildObj'] ) ) {
3101  $isChildObj = $result['isChildObj'];
3102  }
3103  if ( isset( $result['isLocalObj'] ) ) {
3104  $isLocalObj = $result['isLocalObj'];
3105  }
3106  }
3107  }
3108 
3109  # Finish mangling title and then check for loops.
3110  # Set $title to a Title object and $titleText to the PDBK
3111  if ( !$found ) {
3112  $ns = NS_TEMPLATE;
3113  # Split the title into page and subpage
3114  $subpage = '';
3115  $relative = Linker::normalizeSubpageLink(
3116  $this->getTitle(), $part1, $subpage
3117  );
3118  if ( $part1 !== $relative ) {
3119  $part1 = $relative;
3120  $ns = $this->getTitle()->getNamespace();
3121  }
3122  $title = Title::newFromText( $part1, $ns );
3123  if ( $title ) {
3124  $titleText = $title->getPrefixedText();
3125  # Check for language variants if the template is not found
3126  if ( $this->getTargetLanguageConverter()->hasVariants() && $title->getArticleID() == 0 ) {
3127  $this->getTargetLanguageConverter()->findVariantLink( $part1, $title, true );
3128  }
3129  # Do recursion depth check
3130  $limit = $this->mOptions->getMaxTemplateDepth();
3131  if ( $frame->depth >= $limit ) {
3132  $found = true;
3133  $text = '<span class="error">'
3134  . wfMessage( 'parser-template-recursion-depth-warning' )
3135  ->numParams( $limit )->inContentLanguage()->text()
3136  . '</span>';
3137  }
3138  }
3139  }
3140 
3141  # Load from database
3142  if ( !$found && $title ) {
3143  $profileSection = $this->mProfiler->scopedProfileIn( $title->getPrefixedDBkey() );
3144  if ( !$title->isExternal() ) {
3145  if ( $title->isSpecialPage()
3146  && $this->mOptions->getAllowSpecialInclusion()
3147  && $this->ot['html']
3148  ) {
3149  $specialPage = $this->specialPageFactory->getPage( $title->getDBkey() );
3150  // Pass the template arguments as URL parameters.
3151  // "uselang" will have no effect since the Language object
3152  // is forced to the one defined in ParserOptions.
3153  $pageArgs = [];
3154  $argsLength = $args->getLength();
3155  for ( $i = 0; $i < $argsLength; $i++ ) {
3156  $bits = $args->item( $i )->splitArg();
3157  if ( strval( $bits['index'] ) === '' ) {
3158  $name = trim( $frame->expand( $bits['name'], PPFrame::STRIP_COMMENTS ) );
3159  $value = trim( $frame->expand( $bits['value'] ) );
3160  $pageArgs[$name] = $value;
3161  }
3162  }
3163 
3164  // Create a new context to execute the special page
3165  $context = new RequestContext;
3166  $context->setTitle( $title );
3167  $context->setRequest( new FauxRequest( $pageArgs ) );
3168  if ( $specialPage && $specialPage->maxIncludeCacheTime() === 0 ) {
3169  $context->setUser( $this->getUser() );
3170  } else {
3171  // If this page is cached, then we better not be per user.
3172  $context->setUser( User::newFromName( '127.0.0.1', false ) );
3173  }
3174  $context->setLanguage( $this->mOptions->getUserLangObj() );
3175  $ret = $this->specialPageFactory->capturePath( $title, $context, $this->getLinkRenderer() );
3176  if ( $ret ) {
3177  $text = $context->getOutput()->getHTML();
3178  $this->mOutput->addOutputPageMetadata( $context->getOutput() );
3179  $found = true;
3180  $isHTML = true;
3181  if ( $specialPage && $specialPage->maxIncludeCacheTime() !== false ) {
3182  $this->mOutput->updateRuntimeAdaptiveExpiry(
3183  $specialPage->maxIncludeCacheTime()
3184  );
3185  }
3186  }
3187  } elseif ( $this->nsInfo->isNonincludable( $title->getNamespace() ) ) {
3188  $found = false; # access denied
3189  $this->logger->debug(
3190  __METHOD__ .
3191  ": template inclusion denied for " . $title->getPrefixedDBkey()
3192  );
3193  } else {
3194  list( $text, $title ) = $this->getTemplateDom( $title );
3195  if ( $text !== false ) {
3196  $found = true;
3197  $isChildObj = true;
3198  if (
3199  $title->getNamespace() === NS_TEMPLATE &&
3200  $title->getDBkey() === '=' &&
3201  $originalTitle === '='
3202  ) {
3203  // Note that we won't get here if `=` is evaluated
3204  // (in the future) as a parser function, nor if
3205  // the Template namespace is given explicitly,
3206  // ie `{{Template:=}}`. Only `{{=}}` triggers.
3207  $sawDeprecatedTemplateEquals = true; // T91154
3208  }
3209  }
3210  }
3211 
3212  # If the title is valid but undisplayable, make a link to it
3213  if ( !$found && ( $this->ot['html'] || $this->ot['pre'] ) ) {
3214  $text = "[[:$titleText]]";
3215  $found = true;
3216  }
3217  } elseif ( $title->isTrans() ) {
3218  # Interwiki transclusion
3219  if ( $this->ot['html'] && !$forceRawInterwiki ) {
3220  $text = $this->interwikiTransclude( $title, 'render' );
3221  $isHTML = true;
3222  } else {
3223  $text = $this->interwikiTransclude( $title, 'raw' );
3224  # Preprocess it like a template
3225  $text = $this->preprocessToDom( $text, Preprocessor::DOM_FOR_INCLUSION );
3226  $isChildObj = true;
3227  }
3228  $found = true;
3229  }
3230 
3231  # Do infinite loop check
3232  # This has to be done after redirect resolution to avoid infinite loops via redirects
3233  if ( !$frame->loopCheck( $title ) ) {
3234  $found = true;
3235  $text = '<span class="error">'
3236  . wfMessage( 'parser-template-loop-warning', $titleText )->inContentLanguage()->text()
3237  . '</span>';
3238  $this->addTrackingCategory( 'template-loop-category' );
3239  $this->mOutput->addWarning( wfMessage( 'template-loop-warning',
3240  wfEscapeWikiText( $titleText ) )->text() );
3241  $this->logger->debug( __METHOD__ . ": template loop broken at '$titleText'" );
3242  }
3243  }
3244 
3245  # If we haven't found text to substitute by now, we're done
3246  # Recover the source wikitext and return it
3247  if ( !$found ) {
3248  $text = $frame->virtualBracketedImplode( '{{', '|', '}}', $titleWithSpaces, $args );
3249  if ( $profileSection ) {
3250  $this->mProfiler->scopedProfileOut( $profileSection );
3251  }
3252  return [ 'object' => $text ];
3253  }
3254 
3255  # Expand DOM-style return values in a child frame
3256  if ( $isChildObj ) {
3257  # Clean up argument array
3258  $newFrame = $frame->newChild( $args, $title );
3259 
3260  if ( $nowiki ) {
3261  $text = $newFrame->expand( $text, PPFrame::RECOVER_ORIG );
3262  } elseif ( $titleText !== false && $newFrame->isEmpty() ) {
3263  # Expansion is eligible for the empty-frame cache
3264  $text = $newFrame->cachedExpand( $titleText, $text );
3265  } else {
3266  # Uncached expansion
3267  $text = $newFrame->expand( $text );
3268  }
3269  }
3270  if ( $isLocalObj && $nowiki ) {
3271  $text = $frame->expand( $text, PPFrame::RECOVER_ORIG );
3272  $isLocalObj = false;
3273  }
3274 
3275  if ( $profileSection ) {
3276  $this->mProfiler->scopedProfileOut( $profileSection );
3277  }
3278  if (
3279  $sawDeprecatedTemplateEquals &&
3280  $this->mStripState->unstripBoth( $text ) !== '='
3281  ) {
3282  // T91154: {{=}} is deprecated when it doesn't expand to `=`;
3283  // use {{Template:=}} if you must.
3284  $this->addTrackingCategory( 'template-equals-category' );
3285  $this->mOutput->addWarning( wfMessage( 'template-equals-warning' )->text() );
3286  }
3287 
3288  # Replace raw HTML by a placeholder
3289  if ( $isHTML ) {
3290  // @phan-suppress-next-line SecurityCheck-XSS Mixed mode, here html and safe
3291  $text = $this->insertStripItem( $text );
3292  } elseif ( $nowiki && ( $this->ot['html'] || $this->ot['pre'] ) ) {
3293  # Escape nowiki-style return values
3294  // @phan-suppress-next-line SecurityCheck-DoubleEscaped Mixed mode, here html and safe
3295  $text = wfEscapeWikiText( $text );
3296  } elseif ( is_string( $text )
3297  && !$piece['lineStart']
3298  && preg_match( '/^(?:{\\||:|;|#|\*)/', $text )
3299  ) {
3300  # T2529: if the template begins with a table or block-level
3301  # element, it should be treated as beginning a new line.
3302  # This behavior is somewhat controversial.
3303  $text = "\n" . $text;
3304  }
3305 
3306  if ( is_string( $text ) && !$this->incrementIncludeSize( 'post-expand', strlen( $text ) ) ) {
3307  # Error, oversize inclusion
3308  if ( $titleText !== false ) {
3309  # Make a working, properly escaped link if possible (T25588)
3310  $text = "[[:$titleText]]";
3311  } else {
3312  # This will probably not be a working link, but at least it may
3313  # provide some hint of where the problem is
3314  preg_replace( '/^:/', '', $originalTitle );
3315  $text = "[[:$originalTitle]]";
3316  }
3317  $text .= $this->insertStripItem( '<!-- WARNING: template omitted, '
3318  . 'post-expand include size too large -->' );
3319  $this->limitationWarn( 'post-expand-template-inclusion' );
3320  }
3321 
3322  if ( $isLocalObj ) {
3323  $ret = [ 'object' => $text ];
3324  } else {
3325  $ret = [ 'text' => $text ];
3326  }
3327 
3328  return $ret;
3329  }
3330 
3349  public function callParserFunction( PPFrame $frame, $function, array $args = [] ) {
3350  # Case sensitive functions
3351  if ( isset( $this->mFunctionSynonyms[1][$function] ) ) {
3352  $function = $this->mFunctionSynonyms[1][$function];
3353  } else {
3354  # Case insensitive functions
3355  $function = $this->contLang->lc( $function );
3356  if ( isset( $this->mFunctionSynonyms[0][$function] ) ) {
3357  $function = $this->mFunctionSynonyms[0][$function];
3358  } else {
3359  return [ 'found' => false ];
3360  }
3361  }
3362 
3363  list( $callback, $flags ) = $this->mFunctionHooks[$function];
3364 
3365  $allArgs = [ $this ];
3366  if ( $flags & self::SFH_OBJECT_ARGS ) {
3367  # Convert arguments to PPNodes and collect for appending to $allArgs
3368  $funcArgs = [];
3369  foreach ( $args as $k => $v ) {
3370  if ( $v instanceof PPNode || $k === 0 ) {
3371  $funcArgs[] = $v;
3372  } else {
3373  $funcArgs[] = $this->mPreprocessor->newPartNodeArray( [ $k => $v ] )->item( 0 );
3374  }
3375  }
3376 
3377  # Add a frame parameter, and pass the arguments as an array
3378  $allArgs[] = $frame;
3379  $allArgs[] = $funcArgs;
3380  } else {
3381  # Convert arguments to plain text and append to $allArgs
3382  foreach ( $args as $k => $v ) {
3383  if ( $v instanceof PPNode ) {
3384  $allArgs[] = trim( $frame->expand( $v ) );
3385  } elseif ( is_int( $k ) && $k >= 0 ) {
3386  $allArgs[] = trim( $v );
3387  } else {
3388  $allArgs[] = trim( "$k=$v" );
3389  }
3390  }
3391  }
3392 
3393  $result = $callback( ...$allArgs );
3394 
3395  # The interface for function hooks allows them to return a wikitext
3396  # string or an array containing the string and any flags. This mungs
3397  # things around to match what this method should return.
3398  if ( !is_array( $result ) ) {
3399  $result = [
3400  'found' => true,
3401  'text' => $result,
3402  ];
3403  } else {
3404  if ( isset( $result[0] ) && !isset( $result['text'] ) ) {
3405  $result['text'] = $result[0];
3406  }
3407  unset( $result[0] );
3408  $result += [
3409  'found' => true,
3410  ];
3411  }
3412 
3413  $noparse = true;
3414  $preprocessFlags = 0;
3415  if ( isset( $result['noparse'] ) ) {
3416  $noparse = $result['noparse'];
3417  }
3418  if ( isset( $result['preprocessFlags'] ) ) {
3419  $preprocessFlags = $result['preprocessFlags'];
3420  }
3421 
3422  if ( !$noparse ) {
3423  $result['text'] = $this->preprocessToDom( $result['text'], $preprocessFlags );
3424  $result['isChildObj'] = true;
3425  }
3426 
3427  return $result;
3428  }
3429 
3439  public function getTemplateDom( LinkTarget $title ) {
3440  $cacheTitle = $title;
3441  $titleKey = CacheKeyHelper::getKeyForPage( $title );
3442 
3443  if ( isset( $this->mTplRedirCache[$titleKey] ) ) {
3444  list( $ns, $dbk ) = $this->mTplRedirCache[$titleKey];
3445  $title = Title::makeTitle( $ns, $dbk );
3446  $titleKey = CacheKeyHelper::getKeyForPage( $title );
3447  }
3448  if ( isset( $this->mTplDomCache[$titleKey] ) ) {
3449  return [ $this->mTplDomCache[$titleKey], $title ];
3450  }
3451 
3452  # Cache miss, go to the database
3453  list( $text, $title ) = $this->fetchTemplateAndTitle( $title );
3454 
3455  if ( $text === false ) {
3456  $this->mTplDomCache[$titleKey] = false;
3457  return [ false, $title ];
3458  }
3459 
3460  $dom = $this->preprocessToDom( $text, Preprocessor::DOM_FOR_INCLUSION );
3461  $this->mTplDomCache[$titleKey] = $dom;
3462 
3463  if ( !$title->isSamePageAs( $cacheTitle ) ) {
3464  $this->mTplRedirCache[ CacheKeyHelper::getKeyForPage( $cacheTitle ) ] =
3465  [ $title->getNamespace(), $title->getDBkey() ];
3466  }
3467 
3468  return [ $dom, $title ];
3469  }
3470 
3485  $cacheKey = CacheKeyHelper::getKeyForPage( $link );
3486  if ( !$this->currentRevisionCache ) {
3487  $this->currentRevisionCache = new MapCacheLRU( 100 );
3488  }
3489  if ( !$this->currentRevisionCache->has( $cacheKey ) ) {
3490  $title = Title::castFromLinkTarget( $link ); // hook signature compat
3491  $revisionRecord =
3492  // Defaults to Parser::statelessFetchRevisionRecord()
3493  call_user_func(
3494  $this->mOptions->getCurrentRevisionRecordCallback(),
3495  $title,
3496  $this
3497  );
3498  if ( !$revisionRecord ) {
3499  // Parser::statelessFetchRevisionRecord() can return false;
3500  // normalize it to null.
3501  $revisionRecord = null;
3502  }
3503  $this->currentRevisionCache->set( $cacheKey, $revisionRecord );
3504  }
3505  return $this->currentRevisionCache->get( $cacheKey );
3506  }
3507 
3514  public function isCurrentRevisionOfTitleCached( LinkTarget $link ) {
3515  $key = CacheKeyHelper::getKeyForPage( $link );
3516  return (
3517  $this->currentRevisionCache &&
3518  $this->currentRevisionCache->has( $key )
3519  );
3520  }
3521 
3530  public static function statelessFetchRevisionRecord( LinkTarget $link, $parser = null ) {
3531  if ( $link instanceof PageIdentity ) {
3532  // probably a Title, just use it.
3533  $page = $link;
3534  } else {
3535  // XXX: use RevisionStore::getPageForLink()!
3536  // ...but get the info for the current revision at the same time?
3537  // Should RevisionStore::getKnownCurrentRevision accept a LinkTarget?
3538  $page = Title::castFromLinkTarget( $link );
3539  }
3540 
3541  $revRecord = MediaWikiServices::getInstance()
3542  ->getRevisionLookup()
3543  ->getKnownCurrentRevision( $page );
3544  return $revRecord;
3545  }
3546 
3553  public function fetchTemplateAndTitle( LinkTarget $link ) {
3554  // Use Title for compatibility with callbacks and return type
3555  $title = Title::castFromLinkTarget( $link );
3556 
3557  // Defaults to Parser::statelessFetchTemplate()
3558  $templateCb = $this->mOptions->getTemplateCallback();
3559  $stuff = call_user_func( $templateCb, $title, $this );
3560  if ( isset( $stuff['revision-record'] ) ) {
3561  $revRecord = $stuff['revision-record'];
3562  } else {
3563  $revRecord = null;
3564  }
3565 
3566  $text = $stuff['text'];
3567  if ( is_string( $stuff['text'] ) ) {
3568  // We use U+007F DELETE to distinguish strip markers from regular text
3569  $text = strtr( $text, "\x7f", "?" );
3570  }
3571  $finalTitle = $stuff['finalTitle'] ?? $title;
3572  foreach ( ( $stuff['deps'] ?? [] ) as $dep ) {
3573  $this->mOutput->addTemplate( $dep['title'], $dep['page_id'], $dep['rev_id'] );
3574  if ( $dep['title']->equals( $this->getTitle() ) && $revRecord instanceof RevisionRecord ) {
3575  // Self-transclusion; final result may change based on the new page version
3576  try {
3577  $sha1 = $revRecord->getSha1();
3578  } catch ( RevisionAccessException $e ) {
3579  $sha1 = null;
3580  }
3581  $this->setOutputFlag( 'vary-revision-sha1', 'Self transclusion' );
3582  $this->getOutput()->setRevisionUsedSha1Base36( $sha1 );
3583  }
3584  }
3585 
3586  return [ $text, $finalTitle ];
3587  }
3588 
3599  public static function statelessFetchTemplate( $page, $parser = false ) {
3600  $title = Title::castFromLinkTarget( $page ); // for compatibility with return type
3601  $text = $skip = false;
3602  $finalTitle = $title;
3603  $deps = [];
3604  $revRecord = null;
3605  $contextTitle = $parser ? $parser->getTitle() : null;
3606 
3607  # Loop to fetch the article, with up to 2 redirects
3608  $revLookup = MediaWikiServices::getInstance()->getRevisionLookup();
3609  for ( $i = 0; $i < 3 && is_object( $title ); $i++ ) {
3610  # Give extensions a chance to select the revision instead
3611  $revRecord = null; # Assume no hook
3612  $id = false; # Assume current
3613  $origTitle = $title;
3614  $titleChanged = false;
3615  Hooks::runner()->onBeforeParserFetchTemplateRevisionRecord(
3616  # The $title is a not a PageIdentity, as it may
3617  # contain fragments or even represent an attempt to transclude
3618  # a broken or otherwise-missing Title, which the hook may
3619  # fix up. Similarly, the $contextTitle may represent a special
3620  # page or other page which "exists" as a parsing context but
3621  # is not in the DB.
3622  $contextTitle, $title,
3623  $skip, $revRecord
3624  );
3625  if ( !$skip && !$revRecord ) {
3626  # Deprecated legacy hook
3627  Hooks::runner()->onBeforeParserFetchTemplateAndtitle(
3628  $parser, $title, $skip, $id
3629  );
3630  }
3631 
3632  if ( $skip ) {
3633  $text = false;
3634  $deps[] = [
3635  'title' => $title,
3636  'page_id' => $title->getArticleID(),
3637  'rev_id' => null
3638  ];
3639  break;
3640  }
3641  # Get the revision
3642  if ( !$revRecord ) {
3643  if ( $id ) {
3644  # Handle $id returned by deprecated legacy hook
3645  $revRecord = $revLookup->getRevisionById( $id );
3646  } elseif ( $parser ) {
3647  $revRecord = $parser->fetchCurrentRevisionRecordOfTitle( $title );
3648  } else {
3649  $revRecord = $revLookup->getRevisionByTitle( $title );
3650  }
3651  }
3652  if ( $revRecord ) {
3653  # Update title, as $revRecord may have been changed by hook
3655  $revRecord->getPageAsLinkTarget()
3656  );
3657  $deps[] = [
3658  'title' => $title,
3659  'page_id' => $revRecord->getPageId(),
3660  'rev_id' => $revRecord->getId(),
3661  ];
3662  } else {
3663  $deps[] = [
3664  'title' => $title,
3665  'page_id' => $title->getArticleID(),
3666  'rev_id' => null,
3667  ];
3668  }
3669  if ( !$title->equals( $origTitle ) ) {
3670  # If we fetched a rev from a different title, register
3671  # the original title too...
3672  $deps[] = [
3673  'title' => $origTitle,
3674  'page_id' => $origTitle->getArticleID(),
3675  'rev_id' => null,
3676  ];
3677  $titleChanged = true;
3678  }
3679  # If there is no current revision, there is no page
3680  if ( $revRecord === null || $revRecord->getId() === null ) {
3681  $linkCache = MediaWikiServices::getInstance()->getLinkCache();
3682  $linkCache->addBadLinkObj( $title );
3683  }
3684  if ( $revRecord ) {
3685  if ( $titleChanged && !$revRecord->hasSlot( SlotRecord::MAIN ) ) {
3686  // We've added this (missing) title to the dependencies;
3687  // give the hook another chance to redirect it to an
3688  // actual page.
3689  $text = false;
3690  $finalTitle = $title;
3691  continue;
3692  }
3693  if ( $revRecord->hasSlot( SlotRecord::MAIN ) ) { // T276476
3694  $content = $revRecord->getContent( SlotRecord::MAIN );
3695  $text = $content ? $content->getWikitextForTransclusion() : null;
3696  } else {
3697  $text = false;
3698  }
3699 
3700  if ( $text === false || $text === null ) {
3701  $text = false;
3702  break;
3703  }
3704  } elseif ( $title->getNamespace() === NS_MEDIAWIKI ) {
3705  $message = wfMessage( MediaWikiServices::getInstance()->getContentLanguage()->
3706  lcfirst( $title->getText() ) )->inContentLanguage();
3707  if ( !$message->exists() ) {
3708  $text = false;
3709  break;
3710  }
3711  $content = $message->content();
3712  $text = $message->plain();
3713  } else {
3714  break;
3715  }
3716  if ( !$content ) {
3717  break;
3718  }
3719  # Redirect?
3720  $finalTitle = $title;
3721  $title = $content->getRedirectTarget();
3722  }
3723 
3724  $retValues = [
3725  // previously, when this also returned a Revision object, we set
3726  // 'revision-record' to false instead of null if it was unavailable,
3727  // so that callers to use isset and then rely on the revision-record
3728  // key instead of the revision key, even if there was no corresponding
3729  // object - we continue to set to false here for backwards compatability
3730  'revision-record' => $revRecord ?: false,
3731  'text' => $text,
3732  'finalTitle' => $finalTitle,
3733  'deps' => $deps
3734  ];
3735  return $retValues;
3736  }
3737 
3746  public function fetchFileAndTitle( LinkTarget $link, array $options = [] ) {
3747  $file = $this->fetchFileNoRegister( $link, $options );
3748 
3749  $time = $file ? $file->getTimestamp() : false;
3750  $sha1 = $file ? $file->getSha1() : false;
3751  # Register the file as a dependency...
3752  $this->mOutput->addImage( $link->getDBkey(), $time, $sha1 );
3753  if ( $file && !$link->isSameLinkAs( $file->getTitle() ) ) {
3754  # Update fetched file title
3755  $page = $file->getTitle();
3756  $this->mOutput->addImage( $page->getDBkey(), $time, $sha1 );
3757  }
3758 
3759  $title = Title::castFromLinkTarget( $link ); // for return type compat
3760  return [ $file, $title ];
3761  }
3762 
3773  protected function fetchFileNoRegister( LinkTarget $link, array $options = [] ) {
3774  if ( isset( $options['broken'] ) ) {
3775  $file = false; // broken thumbnail forced by hook
3776  } else {
3777  $repoGroup = MediaWikiServices::getInstance()->getRepoGroup();
3778  if ( isset( $options['sha1'] ) ) { // get by (sha1,timestamp)
3779  $file = $repoGroup->findFileFromKey( $options['sha1'], $options );
3780  } else { // get by (name,timestamp)
3781  $file = $repoGroup->findFile( $link, $options );
3782  }
3783  }
3784  return $file;
3785  }
3786 
3796  public function interwikiTransclude( LinkTarget $link, $action ) {
3797  if ( !$this->svcOptions->get( 'EnableScaryTranscluding' ) ) {
3798  return wfMessage( 'scarytranscludedisabled' )->inContentLanguage()->text();
3799  }
3800 
3801  // TODO: extract relevant functionality from Title
3802  $title = Title::castFromLinkTarget( $link );
3803 
3804  $url = $title->getFullURL( [ 'action' => $action ] );
3805  if ( strlen( $url ) > 1024 ) {
3806  return wfMessage( 'scarytranscludetoolong' )->inContentLanguage()->text();
3807  }
3808 
3809  $wikiId = $title->getTransWikiID(); // remote wiki ID or false
3810 
3811  $fname = __METHOD__;
3812  $cache = MediaWikiServices::getInstance()->getMainWANObjectCache();
3813 
3814  $data = $cache->getWithSetCallback(
3815  $cache->makeGlobalKey(
3816  'interwiki-transclude',
3817  ( $wikiId !== false ) ? $wikiId : 'external',
3818  sha1( $url )
3819  ),
3820  $this->svcOptions->get( 'TranscludeCacheExpiry' ),
3821  static function ( $oldValue, &$ttl ) use ( $url, $fname, $cache ) {
3822  $req = MWHttpRequest::factory( $url, [], $fname );
3823 
3824  $status = $req->execute(); // Status object
3825  if ( !$status->isOK() ) {
3826  $ttl = $cache::TTL_UNCACHEABLE;
3827  } elseif ( $req->getResponseHeader( 'X-Database-Lagged' ) !== null ) {
3828  $ttl = min( $cache::TTL_LAGGED, $ttl );
3829  }
3830 
3831  return [
3832  'text' => $status->isOK() ? $req->getContent() : null,
3833  'code' => $req->getStatus()
3834  ];
3835  },
3836  [
3837  'checkKeys' => ( $wikiId !== false )
3838  ? [ $cache->makeGlobalKey( 'interwiki-page', $wikiId, $title->getDBkey() ) ]
3839  : [],
3840  'pcGroup' => 'interwiki-transclude:5',
3841  'pcTTL' => $cache::TTL_PROC_LONG
3842  ]
3843  );
3844 
3845  if ( is_string( $data['text'] ) ) {
3846  $text = $data['text'];
3847  } elseif ( $data['code'] != 200 ) {
3848  // Though we failed to fetch the content, this status is useless.
3849  $text = wfMessage( 'scarytranscludefailed-httpstatus' )
3850  ->params( $url, $data['code'] )->inContentLanguage()->text();
3851  } else {
3852  $text = wfMessage( 'scarytranscludefailed', $url )->inContentLanguage()->text();
3853  }
3854 
3855  return $text;
3856  }
3857 
3868  public function argSubstitution( array $piece, PPFrame $frame ) {
3869  $error = false;
3870  $parts = $piece['parts'];
3871  $nameWithSpaces = $frame->expand( $piece['title'] );
3872  $argName = trim( $nameWithSpaces );
3873  $object = false;
3874  $text = $frame->getArgument( $argName );
3875  if ( $text === false && $parts->getLength() > 0
3876  && ( $this->ot['html']
3877  || $this->ot['pre']
3878  || ( $this->ot['wiki'] && $frame->isTemplate() )
3879  )
3880  ) {
3881  # No match in frame, use the supplied default
3882  $object = $parts->item( 0 )->getChildren();
3883  }
3884  if ( !$this->incrementIncludeSize( 'arg', strlen( $text ) ) ) {
3885  $error = '<!-- WARNING: argument omitted, expansion size too large -->';
3886  $this->limitationWarn( 'post-expand-template-argument' );
3887  }
3888 
3889  if ( $text === false && $object === false ) {
3890  # No match anywhere
3891  $object = $frame->virtualBracketedImplode( '{{{', '|', '}}}', $nameWithSpaces, $parts );
3892  }
3893  if ( $error !== false ) {
3894  $text .= $error;
3895  }
3896  if ( $object !== false ) {
3897  $ret = [ 'object' => $object ];
3898  } else {
3899  $ret = [ 'text' => $text ];
3900  }
3901 
3902  return $ret;
3903  }
3904 
3922  public function extensionSubstitution( array $params, PPFrame $frame ) {
3923  static $errorStr = '<span class="error">';
3924  static $errorLen = 20;
3925 
3926  $name = $frame->expand( $params['name'] );
3927  if ( substr( $name, 0, $errorLen ) === $errorStr ) {
3928  // Probably expansion depth or node count exceeded. Just punt the
3929  // error up.
3930  return $name;
3931  }
3932 
3933  $attrText = !isset( $params['attr'] ) ? null : $frame->expand( $params['attr'] );
3934  if ( substr( $attrText, 0, $errorLen ) === $errorStr ) {
3935  // See above
3936  return $attrText;
3937  }
3938 
3939  // We can't safely check if the expansion for $content resulted in an
3940  // error, because the content could happen to be the error string
3941  // (T149622).
3942  $content = !isset( $params['inner'] ) ? null : $frame->expand( $params['inner'] );
3943 
3944  $marker = self::MARKER_PREFIX . "-$name-"
3945  . sprintf( '%08X', $this->mMarkerIndex++ ) . self::MARKER_SUFFIX;
3946 
3947  $markerType = 'general';
3948  if ( $this->ot['html'] ) {
3949  $name = strtolower( $name );
3950  $attributes = Sanitizer::decodeTagAttributes( $attrText );
3951  if ( isset( $params['attributes'] ) ) {
3952  $attributes += $params['attributes'];
3953  }
3954 
3955  if ( isset( $this->mTagHooks[$name] ) ) {
3956  // Note that $content may be null here, for example if the
3957  // tag is self-closed.
3958  $output = call_user_func_array( $this->mTagHooks[$name],
3959  [ $content, $attributes, $this, $frame ] );
3960  } else {
3961  $output = '<span class="error">Invalid tag extension name: ' .
3962  htmlspecialchars( $name ) . '</span>';
3963  }
3964 
3965  if ( is_array( $output ) ) {
3966  // Extract flags
3967  $flags = $output;
3968  $output = $flags[0];
3969  if ( isset( $flags['markerType'] ) ) {
3970  $markerType = $flags['markerType'];
3971  }
3972  }
3973  } else {
3974  if ( $attrText === null ) {
3975  $attrText = '';
3976  }
3977  if ( isset( $params['attributes'] ) ) {
3978  foreach ( $params['attributes'] as $attrName => $attrValue ) {
3979  $attrText .= ' ' . htmlspecialchars( $attrName ) . '="' .
3980  htmlspecialchars( $attrValue ) . '"';
3981  }
3982  }
3983  if ( $content === null ) {
3984  $output = "<$name$attrText/>";
3985  } else {
3986  $close = $params['close'] === null ? '' : $frame->expand( $params['close'] );
3987  if ( substr( $close, 0, $errorLen ) === $errorStr ) {
3988  // See above
3989  return $close;
3990  }
3991  $output = "<$name$attrText>$content$close";
3992  }
3993  }
3994 
3995  if ( $markerType === 'none' ) {
3996  return $output;
3997  } elseif ( $markerType === 'nowiki' ) {
3998  $this->mStripState->addNoWiki( $marker, $output );
3999  } elseif ( $markerType === 'general' ) {
4000  $this->mStripState->addGeneral( $marker, $output );
4001  } else {
4002  throw new MWException( __METHOD__ . ': invalid marker type' );
4003  }
4004  return $marker;
4005  }
4006 
4014  private function incrementIncludeSize( $type, $size ) {
4015  if ( $this->mIncludeSizes[$type] + $size > $this->mOptions->getMaxIncludeSize() ) {
4016  return false;
4017  } else {
4018  $this->mIncludeSizes[$type] += $size;
4019  return true;
4020  }
4021  }
4022 
4028  $this->mExpensiveFunctionCount++;
4029  return $this->mExpensiveFunctionCount <= $this->mOptions->getExpensiveParserFunctionLimit();
4030  }
4031 
4039  private function handleDoubleUnderscore( $text ) {
4040  # The position of __TOC__ needs to be recorded
4041  $mw = $this->magicWordFactory->get( 'toc' );
4042  if ( $mw->match( $text ) ) {
4043  $this->mShowToc = true;
4044  $this->mForceTocPosition = true;
4045 
4046  # Set a placeholder. At the end we'll fill it in with the TOC.
4047  $text = $mw->replace( '<!--MWTOC\'"-->', $text, 1 );
4048 
4049  # Only keep the first one.
4050  $text = $mw->replace( '', $text );
4051  }
4052 
4053  # Now match and remove the rest of them
4054  $mwa = $this->magicWordFactory->getDoubleUnderscoreArray();
4055  $this->mDoubleUnderscores = $mwa->matchAndRemove( $text );
4056 
4057  if ( isset( $this->mDoubleUnderscores['nogallery'] ) ) {
4058  $this->mOutput->setNoGallery( true );
4059  }
4060  if ( isset( $this->mDoubleUnderscores['notoc'] ) && !$this->mForceTocPosition ) {
4061  $this->mShowToc = false;
4062  }
4063  if ( isset( $this->mDoubleUnderscores['hiddencat'] )
4064  && $this->getTitle()->getNamespace() === NS_CATEGORY
4065  ) {
4066  $this->addTrackingCategory( 'hidden-category-category' );
4067  }
4068  # (T10068) Allow control over whether robots index a page.
4069  # __INDEX__ always overrides __NOINDEX__, see T16899
4070  if ( isset( $this->mDoubleUnderscores['noindex'] ) && $this->getTitle()->canUseNoindex() ) {
4071  $this->mOutput->setIndexPolicy( 'noindex' );
4072  $this->addTrackingCategory( 'noindex-category' );
4073  }
4074  if ( isset( $this->mDoubleUnderscores['index'] ) && $this->getTitle()->canUseNoindex() ) {
4075  $this->mOutput->setIndexPolicy( 'index' );
4076  $this->addTrackingCategory( 'index-category' );
4077  }
4078 
4079  # Cache all double underscores in the database
4080  foreach ( $this->mDoubleUnderscores as $key => $val ) {
4081  $this->mOutput->setProperty( $key, '' );
4082  }
4083 
4084  return $text;
4085  }
4086 
4093  public function addTrackingCategory( $msg ) {
4094  return $this->mOutput->addTrackingCategory( $msg, $this->getTitle() );
4095  }
4096 
4112  private function finalizeHeadings( $text, $origText, $isMain = true ) {
4113  # Inhibit editsection links if requested in the page
4114  if ( isset( $this->mDoubleUnderscores['noeditsection'] ) ) {
4115  $maybeShowEditLink = false;
4116  } else {
4117  $maybeShowEditLink = true; /* Actual presence will depend on post-cache transforms */
4118  }
4119 
4120  # Get all headlines for numbering them and adding funky stuff like [edit]
4121  # links - this is for later, but we need the number of headlines right now
4122  # NOTE: white space in headings have been trimmed in handleHeadings. They shouldn't
4123  # be trimmed here since whitespace in HTML headings is significant.
4124  $matches = [];
4125  $numMatches = preg_match_all(
4126  '/<H(?P<level>[1-6])(?P<attrib>.*?>)(?P<header>[\s\S]*?)<\/H[1-6] *>/i',
4127  $text,
4128  $matches
4129  );
4130 
4131  # if there are fewer than 4 headlines in the article, do not show TOC
4132  # unless it's been explicitly enabled.
4133  $enoughToc = $this->mShowToc &&
4134  ( ( $numMatches >= 4 ) || $this->mForceTocPosition );
4135 
4136  # Allow user to stipulate that a page should have a "new section"
4137  # link added via __NEWSECTIONLINK__
4138  if ( isset( $this->mDoubleUnderscores['newsectionlink'] ) ) {
4139  $this->mOutput->setNewSection( true );
4140  }
4141 
4142  # Allow user to remove the "new section"
4143  # link via __NONEWSECTIONLINK__
4144  if ( isset( $this->mDoubleUnderscores['nonewsectionlink'] ) ) {
4145  $this->mOutput->hideNewSection( true );
4146  }
4147 
4148  # if the string __FORCETOC__ (not case-sensitive) occurs in the HTML,
4149  # override above conditions and always show TOC above first header
4150  if ( isset( $this->mDoubleUnderscores['forcetoc'] ) ) {
4151  $this->mShowToc = true;
4152  $enoughToc = true;
4153  }
4154 
4155  # headline counter
4156  $headlineCount = 0;
4157  $numVisible = 0;
4158 
4159  # Ugh .. the TOC should have neat indentation levels which can be
4160  # passed to the skin functions. These are determined here
4161  $toc = '';
4162  $full = '';
4163  $head = [];
4164  $sublevelCount = [];
4165  $levelCount = [];
4166  $level = 0;
4167  $prevlevel = 0;
4168  $toclevel = 0;
4169  $prevtoclevel = 0;
4170  $markerRegex = self::MARKER_PREFIX . "-h-(\d+)-" . self::MARKER_SUFFIX;
4171  $baseTitleText = $this->getTitle()->getPrefixedDBkey();
4172  $oldType = $this->mOutputType;
4173  $this->setOutputType( self::OT_WIKI );
4174  $frame = $this->getPreprocessor()->newFrame();
4175  $root = $this->preprocessToDom( $origText );
4176  $node = $root->getFirstChild();
4177  $byteOffset = 0;
4178  $tocraw = [];
4179  $refers = [];
4180 
4181  $headlines = $numMatches !== false ? $matches[3] : [];
4182 
4183  $maxTocLevel = $this->svcOptions->get( 'MaxTocLevel' );
4184  foreach ( $headlines as $headline ) {
4185  $isTemplate = false;
4186  $titleText = false;
4187  $sectionIndex = false;
4188  $numbering = '';
4189  $markerMatches = [];
4190  if ( preg_match( "/^$markerRegex/", $headline, $markerMatches ) ) {
4191  $serial = $markerMatches[1];
4192  list( $titleText, $sectionIndex ) = $this->mHeadings[$serial];
4193  $isTemplate = ( $titleText != $baseTitleText );
4194  $headline = preg_replace( "/^$markerRegex\\s*/", "", $headline );
4195  }
4196 
4197  if ( $toclevel ) {
4198  $prevlevel = $level;
4199  }
4200  $level = $matches[1][$headlineCount];
4201 
4202  if ( $level > $prevlevel ) {
4203  # Increase TOC level
4204  $toclevel++;
4205  $sublevelCount[$toclevel] = 0;
4206  if ( $toclevel < $maxTocLevel ) {
4207  $prevtoclevel = $toclevel;
4208  $toc .= Linker::tocIndent();
4209  $numVisible++;
4210  }
4211  } elseif ( $level < $prevlevel && $toclevel > 1 ) {
4212  # Decrease TOC level, find level to jump to
4213 
4214  for ( $i = $toclevel; $i > 0; $i-- ) {
4215  // @phan-suppress-next-line PhanTypeInvalidDimOffset
4216  if ( $levelCount[$i] == $level ) {
4217  # Found last matching level
4218  $toclevel = $i;
4219  break;
4220  } elseif ( $levelCount[$i] < $level ) {
4221  // @phan-suppress-previous-line PhanTypeInvalidDimOffset
4222  # Found first matching level below current level
4223  $toclevel = $i + 1;
4224  break;
4225  }
4226  }
4227  if ( $i == 0 ) {
4228  $toclevel = 1;
4229  }
4230  if ( $toclevel < $maxTocLevel ) {
4231  if ( $prevtoclevel < $maxTocLevel ) {
4232  # Unindent only if the previous toc level was shown :p
4233  $toc .= Linker::tocUnindent( $prevtoclevel - $toclevel );
4234  $prevtoclevel = $toclevel;
4235  } else {
4236  $toc .= Linker::tocLineEnd();
4237  }
4238  }
4239  } else {
4240  # No change in level, end TOC line
4241  if ( $toclevel < $maxTocLevel ) {
4242  $toc .= Linker::tocLineEnd();
4243  }
4244  }
4245 
4246  $levelCount[$toclevel] = $level;
4247 
4248  # count number of headlines for each level
4249  $sublevelCount[$toclevel]++;
4250  $dot = 0;
4251  for ( $i = 1; $i <= $toclevel; $i++ ) {
4252  if ( !empty( $sublevelCount[$i] ) ) {
4253  if ( $dot ) {
4254  $numbering .= '.';
4255  }
4256  $numbering .= $this->getTargetLanguage()->formatNum( $sublevelCount[$i] );
4257  $dot = 1;
4258  }
4259  }
4260 
4261  # The safe header is a version of the header text safe to use for links
4262 
4263  # Remove link placeholders by the link text.
4264  # <!--LINK number-->
4265  # turns into
4266  # link text with suffix
4267  # Do this before unstrip since link text can contain strip markers
4268  $safeHeadline = $this->replaceLinkHoldersText( $headline );
4269 
4270  # Avoid insertion of weird stuff like <math> by expanding the relevant sections
4271  $safeHeadline = $this->mStripState->unstripBoth( $safeHeadline );
4272 
4273  # Remove any <style> or <script> tags (T198618)
4274  $safeHeadline = preg_replace(
4275  '#<(style|script)(?: [^>]*[^>/])?>.*?</\1>#is',
4276  '',
4277  $safeHeadline
4278  );
4279 
4280  # Strip out HTML (first regex removes any tag not allowed)
4281  # Allowed tags are:
4282  # * <sup> and <sub> (T10393)
4283  # * <i> (T28375)
4284  # * <b> (r105284)
4285  # * <bdi> (T74884)
4286  # * <span dir="rtl"> and <span dir="ltr"> (T37167)
4287  # * <s> and <strike> (T35715)
4288  # We strip any parameter from accepted tags (second regex), except dir="rtl|ltr" from <span>,
4289  # to allow setting directionality in toc items.
4290  $tocline = preg_replace(
4291  [
4292  '#<(?!/?(span|sup|sub|bdi|i|b|s|strike)(?: [^>]*)?>).*?>#',
4293  '#<(/?(?:span(?: dir="(?:rtl|ltr)")?|sup|sub|bdi|i|b|s|strike))(?: .*?)?>#'
4294  ],
4295  [ '', '<$1>' ],
4296  $safeHeadline
4297  );
4298 
4299  # Strip '<span></span>', which is the result from the above if
4300  # <span id="foo"></span> is used to produce an additional anchor
4301  # for a section.
4302  $tocline = str_replace( '<span></span>', '', $tocline );
4303 
4304  $tocline = trim( $tocline );
4305 
4306  # For the anchor, strip out HTML-y stuff period
4307  $safeHeadline = preg_replace( '/<.*?>/', '', $safeHeadline );
4308  $safeHeadline = Sanitizer::normalizeSectionNameWhitespace( $safeHeadline );
4309 
4310  # Save headline for section edit hint before it's escaped
4311  $headlineHint = $safeHeadline;
4312 
4313  # Decode HTML entities
4314  $safeHeadline = Sanitizer::decodeCharReferences( $safeHeadline );
4315 
4316  $safeHeadline = self::normalizeSectionName( $safeHeadline );
4317 
4318  $fallbackHeadline = Sanitizer::escapeIdForAttribute( $safeHeadline, Sanitizer::ID_FALLBACK );
4319  $linkAnchor = Sanitizer::escapeIdForLink( $safeHeadline );
4320  $safeHeadline = Sanitizer::escapeIdForAttribute( $safeHeadline, Sanitizer::ID_PRIMARY );
4321  if ( $fallbackHeadline === $safeHeadline ) {
4322  # No reason to have both (in fact, we can't)
4323  $fallbackHeadline = false;
4324  }
4325 
4326  # HTML IDs must be case-insensitively unique for IE compatibility (T12721).
4327  # @todo FIXME: We may be changing them depending on the current locale.
4328  $arrayKey = strtolower( $safeHeadline );
4329  if ( $fallbackHeadline === false ) {
4330  $fallbackArrayKey = false;
4331  } else {
4332  $fallbackArrayKey = strtolower( $fallbackHeadline );
4333  }
4334 
4335  # Create the anchor for linking from the TOC to the section
4336  $anchor = $safeHeadline;
4337  $fallbackAnchor = $fallbackHeadline;
4338  if ( isset( $refers[$arrayKey] ) ) {
4339  for ( $i = 2; isset( $refers["${arrayKey}_$i"] ); ++$i );
4340  $anchor .= "_$i";
4341  $linkAnchor .= "_$i";
4342  $refers["${arrayKey}_$i"] = true;
4343  } else {
4344  $refers[$arrayKey] = true;
4345  }
4346  if ( $fallbackHeadline !== false && isset( $refers[$fallbackArrayKey] ) ) {
4347  for ( $i = 2; isset( $refers["${fallbackArrayKey}_$i"] ); ++$i );
4348  $fallbackAnchor .= "_$i";
4349  $refers["${fallbackArrayKey}_$i"] = true;
4350  } else {
4351  $refers[$fallbackArrayKey] = true;
4352  }
4353 
4354  # Don't number the heading if it is the only one (looks silly)
4355  if ( count( $matches[3] ) > 1 && $this->mOptions->getNumberHeadings() ) {
4356  # the two are different if the line contains a link
4357  $headline = Html::element(
4358  'span',
4359  [ 'class' => 'mw-headline-number' ],
4360  $numbering
4361  ) . ' ' . $headline;
4362  }
4363 
4364  if ( $enoughToc && ( !isset( $maxTocLevel ) || $toclevel < $maxTocLevel ) ) {
4365  $toc .= Linker::tocLine(
4366  $linkAnchor,
4367  $tocline,
4368  $numbering,
4369  $toclevel,
4370  ( $isTemplate ? false : $sectionIndex )
4371  );
4372  }
4373 
4374  # Add the section to the section tree
4375  # Find the DOM node for this header
4376  $noOffset = ( $isTemplate || $sectionIndex === false );
4377  while ( $node && !$noOffset ) {
4378  if ( $node->getName() === 'h' ) {
4379  $bits = $node->splitHeading();
4380  if ( $bits['i'] == $sectionIndex ) {
4381  break;
4382  }
4383  }
4384  $byteOffset += mb_strlen(
4385  $this->mStripState->unstripBoth(
4386  $frame->expand( $node, PPFrame::RECOVER_ORIG )
4387  )
4388  );
4389  $node = $node->getNextSibling();
4390  }
4391  $tocraw[] = [
4392  'toclevel' => $toclevel,
4393  'level' => $level,
4394  'line' => $tocline,
4395  'number' => $numbering,
4396  'index' => ( $isTemplate ? 'T-' : '' ) . $sectionIndex,
4397  'fromtitle' => $titleText,
4398  'byteoffset' => ( $noOffset ? null : $byteOffset ),
4399  'anchor' => $anchor,
4400  ];
4401 
4402  # give headline the correct <h#> tag
4403  if ( $maybeShowEditLink && $sectionIndex !== false ) {
4404  // Output edit section links as markers with styles that can be customized by skins
4405  if ( $isTemplate ) {
4406  # Put a T flag in the section identifier, to indicate to extractSections()
4407  # that sections inside <includeonly> should be counted.
4408  $editsectionPage = $titleText;
4409  $editsectionSection = "T-$sectionIndex";
4410  $editsectionContent = null;
4411  } else {
4412  $editsectionPage = $this->getTitle()->getPrefixedText();
4413  $editsectionSection = $sectionIndex;
4414  $editsectionContent = $headlineHint;
4415  }
4416  // We use a bit of pesudo-xml for editsection markers. The
4417  // language converter is run later on. Using a UNIQ style marker
4418  // leads to the converter screwing up the tokens when it
4419  // converts stuff. And trying to insert strip tags fails too. At
4420  // this point all real inputted tags have already been escaped,
4421  // so we don't have to worry about a user trying to input one of
4422  // these markers directly. We use a page and section attribute
4423  // to stop the language converter from converting these
4424  // important bits of data, but put the headline hint inside a
4425  // content block because the language converter is supposed to
4426  // be able to convert that piece of data.
4427  // Gets replaced with html in ParserOutput::getText
4428  $editlink = '<mw:editsection page="' . htmlspecialchars( $editsectionPage );
4429  $editlink .= '" section="' . htmlspecialchars( $editsectionSection ) . '"';
4430  if ( $editsectionContent !== null ) {
4431  $editlink .= '>' . $editsectionContent . '</mw:editsection>';
4432  } else {
4433  $editlink .= '/>';
4434  }
4435  } else {
4436  $editlink = '';
4437  }
4438  $head[$headlineCount] = Linker::makeHeadline(
4439  $level,
4440  $matches['attrib'][$headlineCount],
4441  $anchor,
4442  $headline,
4443  $editlink,
4444  $fallbackAnchor
4445  );
4446 
4447  $headlineCount++;
4448  }
4449 
4450  $this->setOutputType( $oldType );
4451 
4452  # Never ever show TOC if no headers
4453  if ( $numVisible < 1 ) {
4454  $enoughToc = false;
4455  }
4456 
4457  if ( $enoughToc ) {
4458  if ( $prevtoclevel > 0 && $prevtoclevel < $maxTocLevel ) {
4459  $toc .= Linker::tocUnindent( $prevtoclevel - 1 );
4460  }
4461  $toc = Linker::tocList( $toc, $this->mOptions->getUserLangObj() );
4462  $this->mOutput->setTOCHTML( $toc );
4463  $toc = self::TOC_START . $toc . self::TOC_END;
4464  }
4465 
4466  if ( $isMain ) {
4467  $this->mOutput->setSections( $tocraw );
4468  }
4469 
4470  # split up and insert constructed headlines
4471  $blocks = preg_split( '/<H[1-6].*?>[\s\S]*?<\/H[1-6]>/i', $text );
4472  $i = 0;
4473 
4474  // build an array of document sections
4475  $sections = [];
4476  foreach ( $blocks as $block ) {
4477  // $head is zero-based, sections aren't.
4478  if ( empty( $head[$i - 1] ) ) {
4479  $sections[$i] = $block;
4480  } else {
4481  $sections[$i] = $head[$i - 1] . $block;
4482  }
4483 
4494  $this->hookRunner->onParserSectionCreate( $this, $i, $sections[$i], $maybeShowEditLink );
4495 
4496  $i++;
4497  }
4498 
4499  if ( $enoughToc && $isMain && !$this->mForceTocPosition ) {
4500  // append the TOC at the beginning
4501  // Top anchor now in skin
4502  $sections[0] .= $toc . "\n";
4503  }
4504 
4505  $full .= implode( '', $sections );
4506 
4507  if ( $this->mForceTocPosition ) {
4508  return str_replace( '<!--MWTOC\'"-->', $toc, $full );
4509  } else {
4510  return $full;
4511  }
4512  }
4513 
4526  public function preSaveTransform( $text, PageReference $page, UserIdentity $user,
4527  ParserOptions $options, $clearState = true
4528  ) {
4529  if ( $clearState ) {
4530  $magicScopeVariable = $this->lock();
4531  }
4532  $this->startParse( $page, $options, self::OT_WIKI, $clearState );
4533  $this->setUser( $user );
4534 
4535  // Strip U+0000 NULL (T159174)
4536  $text = str_replace( "\000", '', $text );
4537 
4538  // We still normalize line endings (including trimming trailing whitespace) for
4539  // backwards-compatibility with other code that just calls PST, but this should already
4540  // be handled in TextContent subclasses
4541  $text = TextContent::normalizeLineEndings( $text );
4542 
4543  if ( $options->getPreSaveTransform() ) {
4544  $text = $this->pstPass2( $text, $user );
4545  }
4546  $text = $this->mStripState->unstripBoth( $text );
4547 
4548  // Trim trailing whitespace again, because the previous steps can introduce it.
4549  $text = rtrim( $text );
4550 
4551  $this->hookRunner->onParserPreSaveTransformComplete( $this, $text );
4552 
4553  $this->setUser( null ); # Reset
4554 
4555  return $text;
4556  }
4557 
4566  private function pstPass2( $text, UserIdentity $user ) {
4567  # Note: This is the timestamp saved as hardcoded wikitext to the database, we use
4568  # $this->contLang here in order to give everyone the same signature and use the default one
4569  # rather than the one selected in each user's preferences. (see also T14815)
4570  $ts = $this->mOptions->getTimestamp();
4571  $timestamp = MWTimestamp::getLocalInstance( $ts );
4572  $ts = $timestamp->format( 'YmdHis' );
4573  $tzMsg = $timestamp->getTimezoneMessage()->inContentLanguage()->text();
4574 
4575  $d = $this->contLang->timeanddate( $ts, false, false ) . " ($tzMsg)";
4576 
4577  # Variable replacement
4578  # Because mOutputType is OT_WIKI, this will only process {{subst:xxx}} type tags
4579  $text = $this->replaceVariables( $text );
4580 
4581  # This works almost by chance, as the replaceVariables are done before the getUserSig(),
4582  # which may corrupt this parser instance via its wfMessage()->text() call-
4583 
4584  # Signatures
4585  if ( strpos( $text, '~~~' ) !== false ) {
4586  $sigText = $this->getUserSig( $user );
4587  $text = strtr( $text, [
4588  '~~~~~' => $d,
4589  '~~~~' => "$sigText $d",
4590  '~~~' => $sigText
4591  ] );
4592  # The main two signature forms used above are time-sensitive
4593  $this->setOutputFlag( 'user-signature', 'User signature detected' );
4594  }
4595 
4596  # Context links ("pipe tricks"): [[|name]] and [[name (context)|]]
4597  $tc = '[' . Title::legalChars() . ']';
4598  $nc = '[ _0-9A-Za-z\x80-\xff-]'; # Namespaces can use non-ascii!
4599 
4600  // [[ns:page (context)|]]
4601  $p1 = "/\[\[(:?$nc+:|:|)($tc+?)( ?\\($tc+\\))\\|]]/";
4602  // [[ns:page(context)|]] (double-width brackets, added in r40257)
4603  $p4 = "/\[\[(:?$nc+:|:|)($tc+?)( ?($tc+))\\|]]/";
4604  // [[ns:page (context), context|]] (using single, double-width or Arabic comma)
4605  $p3 = "/\[\[(:?$nc+:|:|)($tc+?)( ?\\($tc+\\)|)((?:, |,|، )$tc+|)\\|]]/";
4606  // [[|page]] (reverse pipe trick: add context from page title)
4607  $p2 = "/\[\[\\|($tc+)]]/";
4608 
4609  # try $p1 first, to turn "[[A, B (C)|]]" into "[[A, B (C)|A, B]]"
4610  $text = preg_replace( $p1, '[[\\1\\2\\3|\\2]]', $text );
4611  $text = preg_replace( $p4, '[[\\1\\2\\3|\\2]]', $text );
4612  $text = preg_replace( $p3, '[[\\1\\2\\3\\4|\\2]]', $text );
4613 
4614  $t = $this->getTitle()->getText();
4615  $m = [];
4616  if ( preg_match( "/^($nc+:|)$tc+?( \\($tc+\\))$/", $t, $m ) ) {
4617  $text = preg_replace( $p2, "[[$m[1]\\1$m[2]|\\1]]", $text );
4618  } elseif ( preg_match( "/^($nc+:|)$tc+?(, $tc+|)$/", $t, $m ) && "$m[1]$m[2]" != '' ) {
4619  $text = preg_replace( $p2, "[[$m[1]\\1$m[2]|\\1]]", $text );
4620  } else {
4621  # if there's no context, don't bother duplicating the title
4622  $text = preg_replace( $p2, '[[\\1]]', $text );
4623  }
4624 
4625  return $text;
4626  }
4627 
4643  public function getUserSig( UserIdentity $user, $nickname = false, $fancySig = null ) {
4644  $username = $user->getName();
4645 
4646  # If not given, retrieve from the user object.
4647  if ( $nickname === false ) {
4648  $nickname = $this->userOptionsLookup->getOption( $user, 'nickname' );
4649  }
4650 
4651  if ( $fancySig === null ) {
4652  $fancySig = $this->userOptionsLookup->getBoolOption( $user, 'fancysig' );
4653  }
4654 
4655  if ( $nickname === null || $nickname === '' ) {
4656  $nickname = $username;
4657  } elseif ( mb_strlen( $nickname ) > $this->svcOptions->get( 'MaxSigChars' ) ) {
4658  $nickname = $username;
4659  $this->logger->debug( __METHOD__ . ": $username has overlong signature." );
4660  } elseif ( $fancySig !== false ) {
4661  # Sig. might contain markup; validate this
4662  $isValid = $this->validateSig( $nickname ) !== false;
4663 
4664  # New validator
4665  $sigValidation = $this->svcOptions->get( 'SignatureValidation' );
4666  if ( $isValid && $sigValidation === 'disallow' ) {
4667  $validator = new SignatureValidator(
4668  $user,
4669  null,
4670  $this->mOptions
4671  );
4672  $isValid = !$validator->validateSignature( $nickname );
4673  }
4674 
4675  if ( $isValid ) {
4676  # Validated; clean up (if needed) and return it
4677  return $this->cleanSig( $nickname, true );
4678  } else {
4679  # Failed to validate; fall back to the default
4680  $nickname = $username;
4681  $this->logger->debug( __METHOD__ . ": $username has invalid signature." );
4682  }
4683  }
4684 
4685  # Make sure nickname doesnt get a sig in a sig
4686  $nickname = self::cleanSigInSig( $nickname );
4687 
4688  # If we're still here, make it a link to the user page
4689  $userText = wfEscapeWikiText( $username );
4690  $nickText = wfEscapeWikiText( $nickname );
4691  $msgName = $user->isRegistered() ? 'signature' : 'signature-anon';
4692 
4693  return wfMessage( $msgName, $userText, $nickText )->inContentLanguage()
4694  ->page( $this->getPage() )->text();
4695  }
4696 
4704  public function validateSig( $text ) {
4705  return Xml::isWellFormedXmlFragment( $text ) ? $text : false;
4706  }
4707 
4719  public function cleanSig( $text, $parsing = false ) {
4720  if ( !$parsing ) {
4721  global $wgTitle;
4722  $magicScopeVariable = $this->lock();
4723  $this->startParse(
4724  $wgTitle,
4727  true
4728  );
4729  }
4730 
4731  # Option to disable this feature
4732  if ( !$this->mOptions->getCleanSignatures() ) {
4733  return $text;
4734  }
4735 
4736  # @todo FIXME: Regex doesn't respect extension tags or nowiki
4737  # => Move this logic to braceSubstitution()
4738  $substWord = $this->magicWordFactory->get( 'subst' );
4739  $substRegex = '/\{\{(?!(?:' . $substWord->getBaseRegex() . '))/x' . $substWord->getRegexCase();
4740  $substText = '{{' . $substWord->getSynonym( 0 );
4741 
4742  $text = preg_replace( $substRegex, $substText, $text );
4743  $text = self::cleanSigInSig( $text );
4744  $dom = $this->preprocessToDom( $text );
4745  $frame = $this->getPreprocessor()->newFrame();
4746  $text = $frame->expand( $dom );
4747 
4748  if ( !$parsing ) {
4749  $text = $this->mStripState->unstripBoth( $text );
4750  }
4751 
4752  return $text;
4753  }
4754 
4762  public static function cleanSigInSig( $text ) {
4763  $text = preg_replace( '/~{3,5}/', '', $text );
4764  return $text;
4765  }
4766 
4778  public function startExternalParse( ?PageReference $page, ParserOptions $options,
4779  $outputType, $clearState = true, $revId = null
4780  ) {
4781  $this->startParse( $page, $options, $outputType, $clearState );
4782  if ( $revId !== null ) {
4783  $this->mRevisionId = $revId;
4784  }
4785  }
4786 
4793  private function startParse( ?PageReference $page, ParserOptions $options,
4794  $outputType, $clearState = true
4795  ) {
4796  $this->setPage( $page );
4797  $this->mOptions = $options;
4798  $this->setOutputType( $outputType );
4799  if ( $clearState ) {
4800  $this->clearState();
4801  }
4802  }
4803 
4813  public function transformMsg( $text, ParserOptions $options, ?PageReference $page = null ) {
4814  static $executing = false;
4815 
4816  # Guard against infinite recursion
4817  if ( $executing ) {
4818  return $text;
4819  }
4820  $executing = true;
4821 
4822  if ( !$page ) {
4823  global $wgTitle;
4824  $page = $wgTitle;
4825  }
4826 
4827  $text = $this->preprocess( $text, $page, $options );
4828 
4829  $executing = false;
4830  return $text;
4831  }
4832 
4858  public function setHook( $tag, callable $callback ) {
4859  $tag = strtolower( $tag );
4860  if ( preg_match( '/[<>\r\n]/', $tag, $m ) ) {
4861  throw new MWException( "Invalid character {$m[0]} in setHook('$tag', ...) call" );
4862  }
4863  $oldVal = $this->mTagHooks[$tag] ?? null;
4864  $this->mTagHooks[$tag] = $callback;
4865  if ( !in_array( $tag, $this->mStripList ) ) {
4866  $this->mStripList[] = $tag;
4867  }
4868 
4869  return $oldVal;
4870  }
4871 
4876  public function clearTagHooks() {
4877  $this->mTagHooks = [];
4878  $this->mStripList = [];
4879  }
4880 
4925  public function setFunctionHook( $id, callable $callback, $flags = 0 ) {
4926  $oldVal = $this->mFunctionHooks[$id][0] ?? null;
4927  $this->mFunctionHooks[$id] = [ $callback, $flags ];
4928 
4929  # Add to function cache
4930  $mw = $this->magicWordFactory->get( $id );
4931  if ( !$mw ) {
4932  throw new MWException( __METHOD__ . '() expecting a magic word identifier.' );
4933  }
4934 
4935  $synonyms = $mw->getSynonyms();
4936  $sensitive = intval( $mw->isCaseSensitive() );
4937 
4938  foreach ( $synonyms as $syn ) {
4939  # Case
4940  if ( !$sensitive ) {
4941  $syn = $this->contLang->lc( $syn );
4942  }
4943  # Add leading hash
4944  if ( !( $flags & self::SFH_NO_HASH ) ) {
4945  $syn = '#' . $syn;
4946  }
4947  # Remove trailing colon
4948  if ( substr( $syn, -1, 1 ) === ':' ) {
4949  $syn = substr( $syn, 0, -1 );
4950  }
4951  $this->mFunctionSynonyms[$sensitive][$syn] = $id;
4952  }
4953  return $oldVal;
4954  }
4955 
4962  public function getFunctionHooks() {
4963  return array_keys( $this->mFunctionHooks );
4964  }
4965 
4974  public function replaceLinkHolders( &$text, $options = 0 ) {
4975  $this->replaceLinkHoldersPrivate( $text, $options );
4976  }
4977 
4985  private function replaceLinkHoldersPrivate( &$text, $options = 0 ) {
4986  $this->mLinkHolders->replace( $text );
4987  }
4988 
4996  private function replaceLinkHoldersText( $text ) {
4997  return $this->mLinkHolders->replaceText( $text );
4998  }
4999 
5014  public function renderImageGallery( $text, array $params ) {
5015  $mode = false;
5016  if ( isset( $params['mode'] ) ) {
5017  $mode = $params['mode'];
5018  }
5019 
5020  try {
5021  $ig = ImageGalleryBase::factory( $mode );
5022  } catch ( Exception $e ) {
5023  // If invalid type set, fallback to default.
5024  $ig = ImageGalleryBase::factory( false );
5025  }
5026 
5027  $ig->setContextTitle( $this->getTitle() );
5028  $ig->setShowBytes( false );
5029  $ig->setShowDimensions( false );
5030  $ig->setShowFilename( false );
5031  $ig->setParser( $this );
5032  $ig->setHideBadImages();
5033  $ig->setAttributes( Sanitizer::validateTagAttributes( $params, 'ul' ) );
5034 
5035  if ( isset( $params['showfilename'] ) ) {
5036  $ig->setShowFilename( true );
5037  } else {
5038  $ig->setShowFilename( false );
5039  }
5040  if ( isset( $params['caption'] ) ) {
5041  // NOTE: We aren't passing a frame here or below. Frame info
5042  // is currently opaque to Parsoid, which acts on OT_PREPROCESS.
5043  // See T107332#4030581
5044  $caption = $this->recursiveTagParse( $params['caption'] );
5045  $ig->setCaptionHtml( $caption );
5046  }
5047  if ( isset( $params['perrow'] ) ) {
5048  $ig->setPerRow( $params['perrow'] );
5049  }
5050  if ( isset( $params['widths'] ) ) {
5051  $ig->setWidths( $params['widths'] );
5052  }
5053  if ( isset( $params['heights'] ) ) {
5054  $ig->setHeights( $params['heights'] );
5055  }
5056  $ig->setAdditionalOptions( $params );
5057 
5058  $this->hookRunner->onBeforeParserrenderImageGallery( $this, $ig );
5059 
5060  $lines = StringUtils::explode( "\n", $text );
5061  foreach ( $lines as $line ) {
5062  # match lines like these:
5063  # Image:someimage.jpg|This is some image
5064  $matches = [];
5065  preg_match( "/^([^|]+)(\\|(.*))?$/", $line, $matches );
5066  # Skip empty lines
5067  if ( count( $matches ) == 0 ) {
5068  continue;
5069  }
5070 
5071  if ( strpos( $matches[0], '%' ) !== false ) {
5072  $matches[1] = rawurldecode( $matches[1] );
5073  }
5075  if ( $title === null ) {
5076  # Bogus title. Ignore these so we don't bomb out later.
5077  continue;
5078  }
5079 
5080  # We need to get what handler the file uses, to figure out parameters.
5081  # Note, a hook can overide the file name, and chose an entirely different
5082  # file (which potentially could be of a different type and have different handler).
5083  $options = [];
5084  $descQuery = false;
5085  $this->hookRunner->onBeforeParserFetchFileAndTitle(
5086  $this, $title, $options, $descQuery );
5087  # Don't register it now, as TraditionalImageGallery does that later.
5088  $file = $this->fetchFileNoRegister( $title, $options );
5089  $handler = $file ? $file->getHandler() : false;
5090 
5091  $paramMap = [
5092  'img_alt' => 'gallery-internal-alt',
5093  'img_link' => 'gallery-internal-link',
5094  ];
5095  if ( $handler ) {
5096  $paramMap += $handler->getParamMap();
5097  // We don't want people to specify per-image widths.
5098  // Additionally the width parameter would need special casing anyhow.
5099  unset( $paramMap['img_width'] );
5100  }
5101 
5102  $mwArray = $this->magicWordFactory->newArray( array_keys( $paramMap ) );
5103 
5104  $label = '';
5105  $alt = '';
5106  $link = '';
5107  $handlerOptions = [];
5108  if ( isset( $matches[3] ) ) {
5109  // look for an |alt= definition while trying not to break existing
5110  // captions with multiple pipes (|) in it, until a more sensible grammar
5111  // is defined for images in galleries
5112 
5113  // FIXME: Doing recursiveTagParse at this stage, and the trim before
5114  // splitting on '|' is a bit odd, and different from makeImage.
5115  $matches[3] = $this->recursiveTagParse( trim( $matches[3] ) );
5116  // Protect LanguageConverter markup
5117  $parameterMatches = StringUtils::delimiterExplode(
5118  '-{', '}-',
5119  '|',
5120  $matches[3],
5121  true /* nested */
5122  );
5123 
5124  foreach ( $parameterMatches as $parameterMatch ) {
5125  list( $magicName, $match ) = $mwArray->matchVariableStartToEnd( $parameterMatch );
5126  if ( !$magicName ) {
5127  // Last pipe wins.
5128  $label = $parameterMatch;
5129  continue;
5130  }
5131 
5132  $paramName = $paramMap[$magicName];
5133  switch ( $paramName ) {
5134  case 'gallery-internal-alt':
5135  $alt = $this->stripAltText( $match, false );
5136  break;
5137  case 'gallery-internal-link':
5138  $linkValue = $this->stripAltText( $match, false );
5139  if ( preg_match( '/^-{R|(.*)}-$/', $linkValue ) ) {
5140  // Result of LanguageConverter::markNoConversion
5141  // invoked on an external link.
5142  $linkValue = substr( $linkValue, 4, -2 );
5143  }
5144  list( $type, $target ) = $this->parseLinkParameter( $linkValue );
5145  if ( $type === 'link-url' ) {
5146  $link = $target;
5147  $this->mOutput->addExternalLink( $target );
5148  } elseif ( $type === 'link-title' ) {
5149  $link = $target->getLinkURL();
5150  $this->mOutput->addLink( $target );
5151  }
5152  break;
5153  default:
5154  // Must be a handler specific parameter.
5155  if ( $handler->validateParam( $paramName, $match ) ) {
5156  $handlerOptions[$paramName] = $match;
5157  } else {
5158  // Guess not, consider it as caption.
5159  $this->logger->debug(
5160  "$parameterMatch failed parameter validation" );
5161  $label = $parameterMatch;
5162  }
5163  }
5164  }
5165  }
5166 
5167  $ig->add( $title, $label, $alt, $link, $handlerOptions );
5168  }
5169  $html = $ig->toHTML();
5170  $this->hookRunner->onAfterParserFetchFileAndTitle( $this, $ig, $html );
5171  return $html;
5172  }
5173 
5178  private function getImageParams( $handler ) {
5179  if ( $handler ) {
5180  $handlerClass = get_class( $handler );
5181  } else {
5182  $handlerClass = '';
5183  }
5184  if ( !isset( $this->mImageParams[$handlerClass] ) ) {
5185  # Initialise static lists
5186  static $internalParamNames = [
5187  'horizAlign' => [ 'left', 'right', 'center', 'none' ],
5188  'vertAlign' => [ 'baseline', 'sub', 'super', 'top', 'text-top', 'middle',
5189  'bottom', 'text-bottom' ],
5190  'frame' => [ 'thumbnail', 'manualthumb', 'framed', 'frameless',
5191  'upright', 'border', 'link', 'alt', 'class' ],
5192  ];
5193  static $internalParamMap;
5194  if ( !$internalParamMap ) {
5195  $internalParamMap = [];
5196  foreach ( $internalParamNames as $type => $names ) {
5197  foreach ( $names as $name ) {
5198  // For grep: img_left, img_right, img_center, img_none,
5199  // img_baseline, img_sub, img_super, img_top, img_text_top, img_middle,
5200  // img_bottom, img_text_bottom,
5201  // img_thumbnail, img_manualthumb, img_framed, img_frameless, img_upright,
5202  // img_border, img_link, img_alt, img_class
5203  $magicName = str_replace( '-', '_', "img_$name" );
5204  $internalParamMap[$magicName] = [ $type, $name ];
5205  }
5206  }
5207  }
5208 
5209  # Add handler params
5210  $paramMap = $internalParamMap;
5211  if ( $handler ) {
5212  $handlerParamMap = $handler->getParamMap();
5213  foreach ( $handlerParamMap as $magic => $paramName ) {
5214  $paramMap[$magic] = [ 'handler', $paramName ];
5215  }
5216  } else {
5217  // Parse the size for non-existent files. See T273013
5218  $paramMap[ 'img_width' ] = [ 'handler', 'width' ];
5219  }
5220  $this->mImageParams[$handlerClass] = $paramMap;
5221  $this->mImageParamsMagicArray[$handlerClass] =
5222  $this->magicWordFactory->newArray( array_keys( $paramMap ) );
5223  }
5224  return [ $this->mImageParams[$handlerClass], $this->mImageParamsMagicArray[$handlerClass] ];
5225  }
5226 
5236  public function makeImage( LinkTarget $link, $options, $holders = false ) {
5237  # Check if the options text is of the form "options|alt text"
5238  # Options are:
5239  # * thumbnail make a thumbnail with enlarge-icon and caption, alignment depends on lang
5240  # * left no resizing, just left align. label is used for alt= only
5241  # * right same, but right aligned
5242  # * none same, but not aligned
5243  # * ___px scale to ___ pixels width, no aligning. e.g. use in taxobox
5244  # * center center the image
5245  # * frame Keep original image size, no magnify-button.
5246  # * framed Same as "frame"
5247  # * frameless like 'thumb' but without a frame. Keeps user preferences for width
5248  # * upright reduce width for upright images, rounded to full __0 px
5249  # * border draw a 1px border around the image
5250  # * alt Text for HTML alt attribute (defaults to empty)
5251  # * class Set a class for img node
5252  # * link Set the target of the image link. Can be external, interwiki, or local
5253  # vertical-align values (no % or length right now):
5254  # * baseline
5255  # * sub
5256  # * super
5257  # * top
5258  # * text-top
5259  # * middle
5260  # * bottom
5261  # * text-bottom
5262 
5263  # Protect LanguageConverter markup when splitting into parts
5265  '-{', '}-', '|', $options, true /* allow nesting */
5266  );
5267 
5268  # Give extensions a chance to select the file revision for us
5269  $options = [];
5270  $descQuery = false;
5271  $title = Title::castFromLinkTarget( $link ); // hook signature compat
5272  $this->hookRunner->onBeforeParserFetchFileAndTitle(
5273  $this, $title, $options, $descQuery );
5274  # Fetch and register the file (file title may be different via hooks)
5275  list( $file, $link ) = $this->fetchFileAndTitle( $link, $options );
5276 
5277  # Get parameter map
5278  $handler = $file ? $file->getHandler() : false;
5279 
5280  list( $paramMap, $mwArray ) = $this->getImageParams( $handler );
5281 
5282  if ( !$file ) {
5283  $this->addTrackingCategory( 'broken-file-category' );
5284  }
5285 
5286  # Process the input parameters
5287  $caption = '';
5288  $params = [ 'frame' => [], 'handler' => [],
5289  'horizAlign' => [], 'vertAlign' => [] ];
5290  $seenformat = false;
5291  foreach ( $parts as $part ) {
5292  $part = trim( $part );
5293  list( $magicName, $value ) = $mwArray->matchVariableStartToEnd( $part );
5294  $validated = false;
5295  if ( isset( $paramMap[$magicName] ) ) {
5296  list( $type, $paramName ) = $paramMap[$magicName];
5297 
5298  # Special case; width and height come in one variable together
5299  if ( $type === 'handler' && $paramName === 'width' ) {
5300  $parsedWidthParam = self::parseWidthParam( $value );
5301  // Parsoid applies data-(width|height) attributes to broken
5302  // media spans, for client use. See T273013
5303  $validateFunc = static function ( $name, $value ) use ( $handler ) {
5304  return $handler
5305  ? $handler->validateParam( $name, $value )
5306  : $value > 0;
5307  };
5308  if ( isset( $parsedWidthParam['width'] ) ) {
5309  $width = $parsedWidthParam['width'];
5310  if ( $validateFunc( 'width', $width ) ) {
5311  $params[$type]['width'] = $width;
5312  $validated = true;
5313  }
5314  }
5315  if ( isset( $parsedWidthParam['height'] ) ) {
5316  $height = $parsedWidthParam['height'];
5317  if ( $validateFunc( 'height', $height ) ) {
5318  $params[$type]['height'] = $height;
5319  $validated = true;
5320  }
5321  }
5322  # else no validation -- T15436
5323  } else {
5324  if ( $type === 'handler' ) {
5325  # Validate handler parameter
5326  $validated = $handler->validateParam( $paramName, $value );
5327  } else {
5328  # Validate internal parameters
5329  switch ( $paramName ) {
5330  case 'manualthumb':
5331  case 'alt':
5332  case 'class':
5333  # @todo FIXME: Possibly check validity here for
5334  # manualthumb? downstream behavior seems odd with
5335  # missing manual thumbs.
5336  $validated = true;
5337  $value = $this->stripAltText( $value, $holders );
5338  break;
5339  case 'link':
5340  list( $paramName, $value ) =
5341  $this->parseLinkParameter(
5342  $this->stripAltText( $value, $holders )
5343  );
5344  if ( $paramName ) {
5345  $validated = true;
5346  if ( $paramName === 'no-link' ) {
5347  $value = true;
5348  }
5349  if ( ( $paramName === 'link-url' ) && $this->mOptions->getExternalLinkTarget() ) {
5350  $params[$type]['link-target'] = $this->mOptions->getExternalLinkTarget();
5351  }
5352  }
5353  break;
5354  case 'frameless':
5355  case 'framed':
5356  case 'thumbnail':
5357  // use first appearing option, discard others.
5358  $validated = !$seenformat;
5359  $seenformat = true;
5360  break;
5361  default:
5362  # Most other things appear to be empty or numeric...
5363  $validated = ( $value === false || is_numeric( trim( $value ) ) );
5364  }
5365  }
5366 
5367  if ( $validated ) {
5368  $params[$type][$paramName] = $value;
5369  }
5370  }
5371  }
5372  if ( !$validated ) {
5373  $caption = $part;
5374  }
5375  }
5376 
5377  # Process alignment parameters
5378  // @phan-suppress-next-line PhanImpossibleCondition
5379  if ( $params['horizAlign'] ) {
5380  $params['frame']['align'] = key( $params['horizAlign'] );
5381  }
5382  // @phan-suppress-next-line PhanImpossibleCondition
5383  if ( $params['vertAlign'] ) {
5384  $params['frame']['valign'] = key( $params['vertAlign'] );
5385  }
5386 
5387  $params['frame']['caption'] = $caption;
5388 
5389  # Will the image be presented in a frame, with the caption below?
5390  $imageIsFramed = isset( $params['frame']['frame'] )
5391  || isset( $params['frame']['framed'] )
5392  || isset( $params['frame']['thumbnail'] )
5393  || isset( $params['frame']['manualthumb'] );
5394 
5395  # In the old days, [[Image:Foo|text...]] would set alt text. Later it
5396  # came to also set the caption, ordinary text after the image -- which
5397  # makes no sense, because that just repeats the text multiple times in
5398  # screen readers. It *also* came to set the title attribute.
5399  # Now that we have an alt attribute, we should not set the alt text to
5400  # equal the caption: that's worse than useless, it just repeats the
5401  # text. This is the framed/thumbnail case. If there's no caption, we
5402  # use the unnamed parameter for alt text as well, just for the time be-
5403  # ing, if the unnamed param is set and the alt param is not.
5404  # For the future, we need to figure out if we want to tweak this more,
5405  # e.g., introducing a title= parameter for the title; ignoring the un-
5406  # named parameter entirely for images without a caption; adding an ex-
5407  # plicit caption= parameter and preserving the old magic unnamed para-
5408  # meter for BC; ...
5409  if ( $imageIsFramed ) { # Framed image
5410  if ( $caption === '' && !isset( $params['frame']['alt'] ) ) {
5411  # No caption or alt text, add the filename as the alt text so
5412  # that screen readers at least get some description of the image
5413  $params['frame']['alt'] = $link->getText();
5414  }
5415  # Do not set $params['frame']['title'] because tooltips don't make sense
5416  # for framed images
5417  } else { # Inline image
5418  if ( !isset( $params['frame']['alt'] ) ) {
5419  # No alt text, use the "caption" for the alt text
5420  if ( $caption !== '' ) {
5421  $params['frame']['alt'] = $this->stripAltText( $caption, $holders );
5422  } else {
5423  # No caption, fall back to using the filename for the
5424  # alt text
5425  $params['frame']['alt'] = $link->getText();
5426  }
5427  }
5428  # Use the "caption" for the tooltip text
5429  $params['frame']['title'] = $this->stripAltText( $caption, $holders );
5430  }
5431  $params['handler']['targetlang'] = $this->getTargetLanguage()->getCode();
5432 
5433  // hook signature compat again, $link may have changed
5434  $title = Title::castFromLinkTarget( $link );
5435  $this->hookRunner->onParserMakeImageParams( $title, $file, $params, $this );
5436 
5437  # Linker does the rest
5438  $time = $options['time'] ?? false;
5439  $ret = Linker::makeImageLink( $this, $link, $file, $params['frame'], $params['handler'],
5440  $time, $descQuery, $this->mOptions->getThumbSize() );
5441 
5442  # Give the handler a chance to modify the parser object
5443  if ( $handler ) {
5444  $handler->parserTransformHook( $this, $file );
5445  }
5446 
5447  return $ret;
5448  }
5449 
5468  private function parseLinkParameter( $value ) {
5469  $chars = self::EXT_LINK_URL_CLASS;
5470  $addr = self::EXT_LINK_ADDR;
5471  $prots = $this->mUrlProtocols;
5472  $type = null;
5473  $target = false;
5474  if ( $value === '' ) {
5475  $type = 'no-link';
5476  } elseif ( preg_match( "/^((?i)$prots)/", $value ) ) {
5477  if ( preg_match( "/^((?i)$prots)$addr$chars*$/u", $value, $m ) ) {
5478  $this->mOutput->addExternalLink( $value );
5479  $type = 'link-url';
5480  $target = $value;
5481  }
5482  } else {
5483  $linkTitle = Title::newFromText( $value );
5484  if ( $linkTitle ) {
5485  $this->mOutput->addLink( $linkTitle );
5486  $type = 'link-title';
5487  $target = $linkTitle;
5488  }
5489  }
5490  return [ $type, $target ];
5491  }
5492 
5498  private function stripAltText( $caption, $holders ) {
5499  # Strip bad stuff out of the title (tooltip). We can't just use
5500  # replaceLinkHoldersText() here, because if this function is called
5501  # from handleInternalLinks2(), mLinkHolders won't be up-to-date.
5502  if ( $holders ) {
5503  $tooltip = $holders->replaceText( $caption );
5504  } else {
5505  $tooltip = $this->replaceLinkHoldersText( $caption );
5506  }
5507 
5508  # make sure there are no placeholders in thumbnail attributes
5509  # that are later expanded to html- so expand them now and
5510  # remove the tags
5511  $tooltip = $this->mStripState->unstripBoth( $tooltip );
5512  # Compatibility hack! In HTML certain entity references not terminated
5513  # by a semicolon are decoded (but not if we're in an attribute; that's
5514  # how link URLs get away without properly escaping & in queries).
5515  # But wikitext has always required semicolon-termination of entities,
5516  # so encode & where needed to avoid decode of semicolon-less entities.
5517  # See T209236 and
5518  # https://www.w3.org/TR/html5/syntax.html#named-character-references
5519  # T210437 discusses moving this workaround to Sanitizer::stripAllTags.
5520  $tooltip = preg_replace( "/
5521  & # 1. entity prefix
5522  (?= # 2. followed by:
5523  (?: # a. one of the legacy semicolon-less named entities
5524  A(?:Elig|MP|acute|circ|grave|ring|tilde|uml)|
5525  C(?:OPY|cedil)|E(?:TH|acute|circ|grave|uml)|
5526  GT|I(?:acute|circ|grave|uml)|LT|Ntilde|
5527  O(?:acute|circ|grave|slash|tilde|uml)|QUOT|REG|THORN|
5528  U(?:acute|circ|grave|uml)|Yacute|
5529  a(?:acute|c(?:irc|ute)|elig|grave|mp|ring|tilde|uml)|brvbar|
5530  c(?:cedil|edil|urren)|cent(?!erdot;)|copy(?!sr;)|deg|
5531  divide(?!ontimes;)|e(?:acute|circ|grave|th|uml)|
5532  frac(?:1(?:2|4)|34)|
5533  gt(?!c(?:c|ir)|dot|lPar|quest|r(?:a(?:pprox|rr)|dot|eq(?:less|qless)|less|sim);)|
5534  i(?:acute|circ|excl|grave|quest|uml)|laquo|
5535  lt(?!c(?:c|ir)|dot|hree|imes|larr|quest|r(?:Par|i(?:e|f|));)|
5536  m(?:acr|i(?:cro|ddot))|n(?:bsp|tilde)|
5537  not(?!in(?:E|dot|v(?:a|b|c)|)|ni(?:v(?:a|b|c)|);)|
5538  o(?:acute|circ|grave|rd(?:f|m)|slash|tilde|uml)|
5539  p(?:lusmn|ound)|para(?!llel;)|quot|r(?:aquo|eg)|
5540  s(?:ect|hy|up(?:1|2|3)|zlig)|thorn|times(?!b(?:ar|)|d;)|
5541  u(?:acute|circ|grave|ml|uml)|y(?:acute|en|uml)
5542  )
5543  (?:[^;]|$)) # b. and not followed by a semicolon
5544  # S = study, for efficiency
5545  /Sx", '&amp;', $tooltip );
5546  $tooltip = Sanitizer::stripAllTags( $tooltip );
5547 
5548  return $tooltip;
5549  }
5550 
5560  public function attributeStripCallback( &$text, $frame = false ) {
5561  wfDeprecated( __METHOD__, '1.35' );
5562  $text = $this->replaceVariables( $text, $frame );
5563  $text = $this->mStripState->unstripBoth( $text );
5564  return $text;
5565  }
5566 
5573  public function getTags() {
5574  return array_keys( $this->mTagHooks );
5575  }
5576 
5581  public function getFunctionSynonyms() {
5582  return $this->mFunctionSynonyms;
5583  }
5584 
5589  public function getUrlProtocols() {
5590  return $this->mUrlProtocols;
5591  }
5592 
5622  private function extractSections( $text, $sectionId, $mode, $newText = '' ) {
5623  global $wgTitle; # not generally used but removes an ugly failure mode
5624 
5625  $magicScopeVariable = $this->lock();
5626  $this->startParse(
5627  $wgTitle,
5630  true
5631  );
5632  $outText = '';
5633  $frame = $this->getPreprocessor()->newFrame();
5634 
5635  # Process section extraction flags
5636  $flags = 0;
5637  $sectionParts = explode( '-', $sectionId );
5638  $sectionIndex = array_pop( $sectionParts );
5639  foreach ( $sectionParts as $part ) {
5640  if ( $part === 'T' ) {
5642  }
5643  }
5644 
5645  # Check for empty input
5646  if ( strval( $text ) === '' ) {
5647  # Only sections 0 and T-0 exist in an empty document
5648  if ( $sectionIndex == 0 ) {
5649  if ( $mode === 'get' ) {
5650  return '';
5651  }
5652 
5653  return $newText;
5654  } else {
5655  if ( $mode === 'get' ) {
5656  return $newText;
5657  }
5658 
5659  return $text;
5660  }
5661  }
5662 
5663  # Preprocess the text
5664  $root = $this->preprocessToDom( $text, $flags );
5665 
5666  # <h> nodes indicate section breaks
5667  # They can only occur at the top level, so we can find them by iterating the root's children
5668  $node = $root->getFirstChild();
5669 
5670  # Find the target section
5671  if ( $sectionIndex == 0 ) {
5672  # Section zero doesn't nest, level=big
5673  $targetLevel = 1000;
5674  } else {
5675  while ( $node ) {
5676  if ( $node->getName() === 'h' ) {
5677  $bits = $node->splitHeading();
5678  if ( $bits['i'] == $sectionIndex ) {
5679  $targetLevel = $bits['level'];
5680  break;
5681  }
5682  }
5683  if ( $mode === 'replace' ) {
5684  $outText .= $frame->expand( $node, PPFrame::RECOVER_ORIG );
5685  }
5686  $node = $node->getNextSibling();
5687  }
5688  }
5689 
5690  if ( !$node ) {
5691  # Not found
5692  if ( $mode === 'get' ) {
5693  return $newText;
5694  } else {
5695  return $text;
5696  }
5697  }
5698 
5699  # Find the end of the section, including nested sections
5700  do {
5701  if ( $node->getName() === 'h' ) {
5702  $bits = $node->splitHeading();
5703  $curLevel = $bits['level'];
5704  if ( $bits['i'] != $sectionIndex && $curLevel <= $targetLevel ) {
5705  break;
5706  }
5707  }
5708  if ( $mode === 'get' ) {
5709  $outText .= $frame->expand( $node, PPFrame::RECOVER_ORIG );
5710  }
5711  $node = $node->getNextSibling();
5712  } while ( $node );
5713 
5714  # Write out the remainder (in replace mode only)
5715  if ( $mode === 'replace' ) {
5716  # Output the replacement text
5717  # Add two newlines on -- trailing whitespace in $newText is conventionally
5718  # stripped by the editor, so we need both newlines to restore the paragraph gap
5719  # Only add trailing whitespace if there is newText
5720  if ( $newText != "" ) {
5721  $outText .= $newText . "\n\n";
5722  }
5723 
5724  while ( $node ) {
5725  $outText .= $frame->expand( $node, PPFrame::RECOVER_ORIG );
5726  $node = $node->getNextSibling();
5727  }
5728  }
5729 
5730  # Re-insert stripped tags
5731  $outText = rtrim( $this->mStripState->unstripBoth( $outText ) );
5732 
5733  return $outText;
5734  }
5735 
5751  public function getSection( $text, $sectionId, $defaultText = '' ) {
5752  return $this->extractSections( $text, $sectionId, 'get', $defaultText );
5753  }
5754 
5768  public function replaceSection( $oldText, $sectionId, $newText ) {
5769  return $this->extractSections( $oldText, $sectionId, 'replace', $newText );
5770  }
5771 
5801  public function getFlatSectionInfo( $text ) {
5802  $magicScopeVariable = $this->lock();
5803  $this->startParse(
5804  null,
5807  true
5808  );
5809  $frame = $this->getPreprocessor()->newFrame();
5810  $root = $this->preprocessToDom( $text, 0 );
5811  $node = $root->getFirstChild();
5812  $offset = 0;
5813  $currentSection = [
5814  'index' => 0,
5815  'level' => 0,
5816  'offset' => 0,
5817  'heading' => '',
5818  'text' => ''
5819  ];
5820  $sections = [];
5821 
5822  while ( $node ) {
5823  $nodeText = $frame->expand( $node, PPFrame::RECOVER_ORIG );
5824  if ( $node->getName() === 'h' ) {
5825  $bits = $node->splitHeading();
5826  $sections[] = $currentSection;
5827  $currentSection = [
5828  'index' => $bits['i'],
5829  'level' => $bits['level'],
5830  'offset' => $offset,
5831  'heading' => $nodeText,
5832  'text' => $nodeText
5833  ];
5834  } else {
5835  $currentSection['text'] .= $nodeText;
5836  }
5837  $offset += strlen( $nodeText );
5838  $node = $node->getNextSibling();
5839  }
5840  $sections[] = $currentSection;
5841  return $sections;
5842  }
5843 
5855  public function getRevisionId() {
5856  return $this->mRevisionId;
5857  }
5858 
5865  public function getRevisionRecordObject() {
5866  if ( $this->mRevisionRecordObject ) {
5867  return $this->mRevisionRecordObject;
5868  }
5869 
5870  // NOTE: try to get the RevisionRecord object even if mRevisionId is null.
5871  // This is useful when parsing a revision that has not yet been saved.
5872  // However, if we get back a saved revision even though we are in
5873  // preview mode, we'll have to ignore it, see below.
5874  // NOTE: This callback may be used to inject an OLD revision that was
5875  // already loaded, so "current" is a bit of a misnomer. We can't just
5876  // skip it if mRevisionId is set.
5877  $rev = call_user_func(
5878  $this->mOptions->getCurrentRevisionRecordCallback(),
5879  $this->getTitle(),
5880  $this
5881  );
5882 
5883  if ( $rev === false ) {
5884  // The revision record callback returns `false` (not null) to
5885  // indicate that the revision is missing. (See for example
5886  // Parser::statelessFetchRevisionRecord(), the default callback.)
5887  // This API expects `null` instead. (T251952)
5888  $rev = null;
5889  }
5890 
5891  if ( $this->mRevisionId === null && $rev && $rev->getId() ) {
5892  // We are in preview mode (mRevisionId is null), and the current revision callback
5893  // returned an existing revision. Ignore it and return null, it's probably the page's
5894  // current revision, which is not what we want here. Note that we do want to call the
5895  // callback to allow the unsaved revision to be injected here, e.g. for
5896  // self-transclusion previews.
5897  return null;
5898  }
5899 
5900  // If the parse is for a new revision, then the callback should have
5901  // already been set to force the object and should match mRevisionId.
5902  // If not, try to fetch by mRevisionId for sanity.
5903  if ( $this->mRevisionId && $rev && $rev->getId() != $this->mRevisionId ) {
5904  $rev = MediaWikiServices::getInstance()
5905  ->getRevisionLookup()
5906  ->getRevisionById( $this->mRevisionId );
5907  }
5908 
5909  $this->mRevisionRecordObject = $rev;
5910 
5911  return $this->mRevisionRecordObject;
5912  }
5913 
5920  public function getRevisionTimestamp() {
5921  if ( $this->mRevisionTimestamp !== null ) {
5922  return $this->mRevisionTimestamp;
5923  }
5924 
5925  # Use specified revision timestamp, falling back to the current timestamp
5926  $revObject = $this->getRevisionRecordObject();
5927  $timestamp = $revObject ? $revObject->getTimestamp() : $this->mOptions->getTimestamp();
5928  $this->mOutput->setRevisionTimestampUsed( $timestamp ); // unadjusted time zone
5929 
5930  # The cryptic '' timezone parameter tells to use the site-default
5931  # timezone offset instead of the user settings.
5932  # Since this value will be saved into the parser cache, served
5933  # to other users, and potentially even used inside links and such,
5934  # it needs to be consistent for all visitors.
5935  $this->mRevisionTimestamp = $this->contLang->userAdjust( $timestamp, '' );
5936 
5937  return $this->mRevisionTimestamp;
5938  }
5939 
5946  public function getRevisionUser(): ?string {
5947  if ( $this->mRevisionUser === null ) {
5948  $revObject = $this->getRevisionRecordObject();
5949 
5950  # if this template is subst: the revision id will be blank,
5951  # so just use the current user's name
5952  if ( $revObject && $revObject->getUser() ) {
5953  $this->mRevisionUser = $revObject->getUser()->getName();
5954  } elseif ( $this->ot['wiki'] || $this->mOptions->getIsPreview() ) {
5955  $this->mRevisionUser = $this->getUser()->getName();
5956  } else {
5957  # Note that we fall through here with
5958  # $this->mRevisionUser still null
5959  }
5960  }
5961  return $this->mRevisionUser;
5962  }
5963 
5970  public function getRevisionSize() {
5971  if ( $this->mRevisionSize === null ) {
5972  $revObject = $this->getRevisionRecordObject();
5973 
5974  # if this variable is subst: the revision id will be blank,
5975  # so just use the parser input size, because the own substituation
5976  # will change the size.
5977  if ( $revObject ) {
5978  $this->mRevisionSize = $revObject->getSize();
5979  } else {
5980  $this->mRevisionSize = $this->mInputSize;
5981  }
5982  }
5983  return $this->mRevisionSize;
5984  }
5985 
5992  public function setDefaultSort( $sort ) {
5993  $this->mDefaultSort = $sort;
5994  $this->mOutput->setProperty( 'defaultsort', $sort );
5995  }
5996 
6008  public function getDefaultSort() {
6009  if ( $this->mDefaultSort !== false ) {
6010  return $this->mDefaultSort;
6011  } else {
6012  return '';
6013  }
6014  }
6015 
6023  public function getCustomDefaultSort() {
6024  return $this->mDefaultSort;
6025  }
6026 
6027  private static function getSectionNameFromStrippedText( $text ) {
6029  $text = Sanitizer::decodeCharReferences( $text );
6030  $text = self::normalizeSectionName( $text );
6031  return $text;
6032  }
6033 
6034  private static function makeAnchor( $sectionName ) {
6035  return '#' . Sanitizer::escapeIdForLink( $sectionName );
6036  }
6037 
6038  private function makeLegacyAnchor( $sectionName ) {
6039  $fragmentMode = $this->svcOptions->get( 'FragmentMode' );
6040  if ( isset( $fragmentMode[1] ) && $fragmentMode[1] === 'legacy' ) {
6041  // ForAttribute() and ForLink() are the same for legacy encoding
6043  } else {
6044  $id = Sanitizer::escapeIdForLink( $sectionName );
6045  }
6046 
6047  return "#$id";
6048  }
6049 
6059  public function guessSectionNameFromWikiText( $text ) {
6060  # Strip out wikitext links(they break the anchor)
6061  $text = $this->stripSectionName( $text );
6062  $sectionName = self::getSectionNameFromStrippedText( $text );
6063  return self::makeAnchor( $sectionName );
6064  }
6065 
6076  public function guessLegacySectionNameFromWikiText( $text ) {
6077  # Strip out wikitext links(they break the anchor)
6078  $text = $this->stripSectionName( $text );
6079  $sectionName = self::getSectionNameFromStrippedText( $text );
6080  return $this->makeLegacyAnchor( $sectionName );
6081  }
6082 
6089  public static function guessSectionNameFromStrippedText( $text ) {
6090  $sectionName = self::getSectionNameFromStrippedText( $text );
6091  return self::makeAnchor( $sectionName );
6092  }
6093 
6100  private static function normalizeSectionName( $text ) {
6101  # T90902: ensure the same normalization is applied for IDs as to links
6102 
6103  $titleParser = MediaWikiServices::getInstance()->getTitleParser();
6104  '@phan-var MediaWikiTitleCodec $titleParser';
6105  try {
6106 
6107  $parts = $titleParser->splitTitleString( "#$text" );
6108  } catch ( MalformedTitleException $ex ) {
6109  return $text;
6110  }
6111  return $parts['fragment'];
6112  }
6113 
6129  public function stripSectionName( $text ) {
6130  # Strip internal link markup
6131  $text = preg_replace( '/\[\[:?([^[|]+)\|([^[]+)\]\]/', '$2', $text );
6132  $text = preg_replace( '/\[\[:?([^[]+)\|?\]\]/', '$1', $text );
6133 
6134  # Strip external link markup
6135  # @todo FIXME: Not tolerant to blank link text
6136  # I.E. [https://www.mediawiki.org] will render as [1] or something depending
6137  # on how many empty links there are on the page - need to figure that out.
6138  $text = preg_replace( '/\[(?i:' . $this->mUrlProtocols . ')([^ ]+?) ([^[]+)\]/', '$2', $text );
6139 
6140  # Parse wikitext quotes (italics & bold)
6141  $text = $this->doQuotes( $text );
6142 
6143  # Strip HTML tags
6144  $text = StringUtils::delimiterReplace( '<', '>', '', $text );
6145  return $text;
6146  }
6147 
6158  private function fuzzTestSrvus( $text, PageReference $page, ParserOptions $options,
6159  $outputType = self::OT_HTML
6160  ) {
6161  $magicScopeVariable = $this->lock();
6162  $this->startParse( $page, $options, $outputType, true );
6163 
6164  $text = $this->replaceVariables( $text );
6165  $text = $this->mStripState->unstripBoth( $text );
6166  $text = Sanitizer::removeHTMLtags( $text );
6167  return $text;
6168  }
6169 
6176  private function fuzzTestPst( $text, PageReference $page, ParserOptions $options ) {
6177  return $this->preSaveTransform( $text, $page, $options->getUser(), $options );
6178  }
6179 
6186  private function fuzzTestPreprocess( $text, PageReference $page, ParserOptions $options ) {
6187  return $this->fuzzTestSrvus( $text, $page, $options, self::OT_PREPROCESS );
6188  }
6189 
6208  public function markerSkipCallback( $s, callable $callback ) {
6209  $i = 0;
6210  $out = '';
6211  while ( $i < strlen( $s ) ) {
6212  $markerStart = strpos( $s, self::MARKER_PREFIX, $i );
6213  if ( $markerStart === false ) {
6214  $out .= call_user_func( $callback, substr( $s, $i ) );
6215  break;
6216  } else {
6217  $out .= call_user_func( $callback, substr( $s, $i, $markerStart - $i ) );
6218  $markerEnd = strpos( $s, self::MARKER_SUFFIX, $markerStart );
6219  if ( $markerEnd === false ) {
6220  $out .= substr( $s, $markerStart );
6221  break;
6222  } else {
6223  $markerEnd += strlen( self::MARKER_SUFFIX );
6224  $out .= substr( $s, $markerStart, $markerEnd - $markerStart );
6225  $i = $markerEnd;
6226  }
6227  }
6228  }
6229  return $out;
6230  }
6231 
6239  public function killMarkers( $text ) {
6240  return $this->mStripState->killMarkers( $text );
6241  }
6242 
6253  public static function parseWidthParam( $value, $parseHeight = true ) {
6254  $parsedWidthParam = [];
6255  if ( $value === '' ) {
6256  return $parsedWidthParam;
6257  }
6258  $m = [];
6259  # (T15500) In both cases (width/height and width only),
6260  # permit trailing "px" for backward compatibility.
6261  if ( $parseHeight && preg_match( '/^([0-9]*)x([0-9]*)\s*(?:px)?\s*$/', $value, $m ) ) {
6262  $width = intval( $m[1] );
6263  $height = intval( $m[2] );
6264  $parsedWidthParam['width'] = $width;
6265  $parsedWidthParam['height'] = $height;
6266  } elseif ( preg_match( '/^[0-9]*\s*(?:px)?\s*$/', $value ) ) {
6267  $width = intval( $value );
6268  $parsedWidthParam['width'] = $width;
6269  }
6270  return $parsedWidthParam;
6271  }
6272 
6282  protected function lock() {
6283  if ( $this->mInParse ) {
6284  throw new MWException( "Parser state cleared while parsing. "
6285  . "Did you call Parser::parse recursively? Lock is held by: " . $this->mInParse );
6286  }
6287 
6288  // Save the backtrace when locking, so that if some code tries locking again,
6289  // we can print the lock owner's backtrace for easier debugging
6290  $e = new Exception;
6291  $this->mInParse = $e->getTraceAsString();
6292 
6293  $recursiveCheck = new ScopedCallback( function () {
6294  $this->mInParse = false;
6295  } );
6296 
6297  return $recursiveCheck;
6298  }
6299 
6310  public static function stripOuterParagraph( $html ) {
6311  $m = [];
6312  if ( preg_match( '/^<p>(.*)\n?<\/p>\n?$/sU', $html, $m ) && strpos( $m[1], '</p>' ) === false ) {
6313  $html = $m[1];
6314  }
6315 
6316  return $html;
6317  }
6318 
6329  public function getFreshParser() {
6330  if ( $this->mInParse ) {
6331  return $this->factory->create();
6332  } else {
6333  return $this;
6334  }
6335  }
6336 
6344  public function enableOOUI() {
6345  wfDeprecated( __METHOD__, '1.35' );
6347  $this->mOutput->setEnableOOUI( true );
6348  }
6349 
6356  private function setOutputFlag( string $flag, string $reason ): void {
6357  $this->mOutput->setFlag( $flag );
6358  $name = $this->getTitle()->getPrefixedText();
6359  $this->logger->debug( __METHOD__ . ": set $flag flag on '$name'; $reason" );
6360  }
6361 }
Parser\$badFileLookup
BadFileLookup $badFileLookup
Definition: Parser.php:345
Page\PageIdentity
Interface for objects (potentially) representing an editable wiki page.
Definition: PageIdentity.php:64
Parser\getFunctionHooks
getFunctionHooks()
Get all registered function hook identifiers.
Definition: Parser.php:4962
Parser\$mLinkRenderer
LinkRenderer $mLinkRenderer
Definition: Parser.php:306
Parser\$mForceTocPosition
$mForceTocPosition
Definition: Parser.php:233
Parser\recursivePreprocess
recursivePreprocess( $text, $frame=false)
Recursive parser entry point that can be called from an extension tag hook.
Definition: Parser.php:932
Parser\getContentLanguageConverter
getContentLanguageConverter()
Shorthand for getting a Language Converter for Content language.
Definition: Parser.php:1643
Parser\transformMsg
transformMsg( $text, ParserOptions $options, ?PageReference $page=null)
Wrapper for preprocess()
Definition: Parser.php:4813
ParserOptions
Set options of the Parser.
Definition: ParserOptions.php:45
Parser\attributeStripCallback
attributeStripCallback(&$text, $frame=false)
Callback from the Sanitizer for expanding items found in HTML attribute values, so they can be safely...
Definition: Parser.php:5560
PPFrame\loopCheck
loopCheck( $title)
Returns true if the infinite loop check is OK, false if a loop is detected.
Parser\$mSubstWords
MagicWordArray $mSubstWords
Definition: Parser.php:178
Parser\$linkRendererFactory
LinkRendererFactory $linkRendererFactory
Definition: Parser.php:336
Sanitizer\ID_FALLBACK
const ID_FALLBACK
Tells escapeUrlForHtml() to encode the ID using the fallback encoding, or return false if no fallback...
Definition: Sanitizer.php:78
Parser\maybeMakeExternalImage
maybeMakeExternalImage( $url)
make an image if it's allowed, either through the global option, through the exception,...
Definition: Parser.php:2355
MagicWordArray
Class for handling an array of magic words.
Definition: MagicWordArray.php:32
Parser\EXT_LINK_ADDR
const EXT_LINK_ADDR
Definition: Parser.php:104
MediaWiki\Revision\RevisionAccessException
Exception representing a failure to look up a revision.
Definition: RevisionAccessException.php:34
MediaWiki\Linker\LinkTarget\isSameLinkAs
isSameLinkAs(LinkTarget $other)
Checks whether the given LinkTarget refers to the same target as this LinkTarget.
FauxRequest
WebRequest clone which takes values from a provided array.
Definition: FauxRequest.php:35
Title\newFromText
static newFromText( $text, $defaultNamespace=NS_MAIN)
Create a new Title from text, such as what one would find in a link.
Definition: Title.php:415
Parser\$mInputSize
$mInputSize
Definition: Parser.php:272
PPFrame\STRIP_COMMENTS
const STRIP_COMMENTS
Definition: PPFrame.php:31
Parser\SPACE_NOT_NL
const SPACE_NOT_NL
Definition: Parser.php:111
Parser\$titleFormatter
TitleFormatter $titleFormatter
Definition: Parser.php:324
HtmlArmor
Marks HTML that shouldn't be escaped.
Definition: HtmlArmor.php:30
MediaWiki\Revision\RevisionRecord
Page revision base class.
Definition: RevisionRecord.php:47
MediaWiki\Linker\LinkTarget\getText
getText()
Returns the link in text form, without namespace prefix or fragment.
Parser\__destruct
__destruct()
Reduce memory usage to reduce the impact of circular references.
Definition: Parser.php:484
Preprocessor\DOM_FOR_INCLUSION
const DOM_FOR_INCLUSION
Transclusion mode flag for Preprocessor::preprocessToObj()
Definition: Preprocessor.php:29
ParserOutput
Definition: ParserOutput.php:31
Parser\$mLinkHolders
LinkHolderArray $mLinkHolders
Definition: Parser.php:205
NS_MEDIAWIKI
const NS_MEDIAWIKI
Definition: Defines.php:72
Parser\makeImage
makeImage(LinkTarget $link, $options, $holders=false)
Parse image options text and use it to make an image.
Definition: Parser.php:5236
Parser\braceSubstitution
braceSubstitution(array $piece, PPFrame $frame)
Return the text of a template, after recursively replacing any variables or templates within the temp...
Definition: Parser.php:2971
Parser\makeLimitReport
makeLimitReport()
Set the limit report data in the current ParserOutput, and return the limit report HTML comment.
Definition: Parser.php:713
MagicWordFactory
A factory that stores information about MagicWords, and creates them on demand with caching.
Definition: MagicWordFactory.php:37
Parser\internalParseHalfParsed
internalParseHalfParsed( $text, $isMain=true, $linestart=true)
Helper function for parse() that transforms half-parsed HTML into fully parsed HTML.
Definition: Parser.php:1681
Parser\$userFactory
UserFactory $userFactory
Definition: Parser.php:360
Parser\stripAltText
stripAltText( $caption, $holders)
Definition: Parser.php:5498
Parser\killMarkers
killMarkers( $text)
Remove any strip markers found in the given text.
Definition: Parser.php:6239
Sanitizer\stripAllTags
static stripAllTags( $html)
Take a fragment of (potentially invalid) HTML and return a version with any tags removed,...
Definition: Sanitizer.php:1576
Parser\$mTagHooks
$mTagHooks
Definition: Parser.php:153
Parser\OutputType
OutputType( $x=null)
Accessor/mutator for the output type.
Definition: Parser.php:1070
Parser\$currentRevisionCache
MapCacheLRU null $currentRevisionCache
Definition: Parser.php:290
Parser\setOutputFlag
setOutputFlag(string $flag, string $reason)
Sets the flag on the parser output but also does some debug logging.
Definition: Parser.php:6356
Parser\enableOOUI
enableOOUI()
Set's up the PHP implementation of OOUI for use in this request and instructs OutputPage to enable OO...
Definition: Parser.php:6344
MediaWiki\MediaWikiServices
MediaWikiServices is the service locator for the application scope of MediaWiki.
Definition: MediaWikiServices.php:186
Linker\makeSelfLinkObj
static makeSelfLinkObj( $nt, $html='', $query='', $trail='', $prefix='')
Make appropriate markup for a link to the current article.
Definition: Linker.php:165
Parser\$mTplDomCache
array $mTplDomCache
Definition: Parser.php:235
MediaWiki\BadFileLookup
Definition: BadFileLookup.php:13
PPFrame\NO_ARGS
const NO_ARGS
Definition: PPFrame.php:29
Parser\statelessFetchRevisionRecord
static statelessFetchRevisionRecord(LinkTarget $link, $parser=null)
Wrapper around RevisionLookup::getKnownCurrentRevision.
Definition: Parser.php:3530
wfSetVar
wfSetVar(&$dest, $source, $force=false)
Sets dest to source and returns the original value of dest If source is NULL, it just returns the val...
Definition: GlobalFunctions.php:1512
Parser\parseExtensionTagAsTopLevelDoc
parseExtensionTagAsTopLevelDoc( $text)
Needed by Parsoid/PHP to ensure all the hooks for extensions are run in the right order.
Definition: Parser.php:890
Parser\$mDoubleUnderscores
$mDoubleUnderscores
Definition: Parser.php:228
Linker\tocIndent
static tocIndent()
Add another level to the Table of Contents.
Definition: Linker.php:1837
Parser\getRevisionSize
getRevisionSize()
Get the size of the revision.
Definition: Parser.php:5970
Sanitizer\escapeIdForAttribute
static escapeIdForAttribute( $id, $mode=self::ID_PRIMARY)
Given a section name or other user-generated or otherwise unsafe string, escapes it to be a valid HTM...
Definition: Sanitizer.php:811
Sanitizer\removeHTMLtags
static removeHTMLtags( $text, $processCallback=null, $args=[], $extratags=[], $removetags=[])
Cleans up HTML, removes dangerous tags and attributes, and removes HTML comments.
Definition: Sanitizer.php:239
Parser\handleExternalLinks
handleExternalLinks( $text)
Replace external links (REL)
Definition: Parser.php:2139
Parser\$mOutputType
$mOutputType
Definition: Parser.php:260
MediaWiki\Linker\LinkRenderer
Class that generates HTML links for pages.
Definition: LinkRenderer.php:43
ParserOptions\getDisableTitleConversion
getDisableTitleConversion()
Whether title conversion should be disabled.
Definition: ParserOptions.php:568
wfTimestamp
wfTimestamp( $outputtype=TS_UNIX, $ts=0)
Get a timestamp string in one of various formats.
Definition: GlobalFunctions.php:1692
Parser\handleHeadings
handleHeadings( $text)
Parse headers and return html.
Definition: Parser.php:1918
MediaWiki\SpecialPage\SpecialPageFactory
Factory for handling the special page list and generating SpecialPage objects.
Definition: SpecialPageFactory.php:63
$wgNoFollowDomainExceptions
$wgNoFollowDomainExceptions
If this is set to an array of domains, external links to these domain names (or any subdomains) will ...
Definition: DefaultSettings.php:5037
Parser\handleAllQuotes
handleAllQuotes( $text)
Replace single quotes with HTML markup.
Definition: Parser.php:1935
Parser\$mUrlProtocols
$mUrlProtocols
Definition: Parser.php:181
Parser\extractTagsAndParams
static extractTagsAndParams(array $elements, $text, &$matches)
Replaces all occurrences of HTML-style comments and the given tags in the text with a random marker a...
Definition: Parser.php:1256
Parser\$mLinkID
int $mLinkID
Definition: Parser.php:211
OT_HTML
const OT_HTML
Definition: Defines.php:157
SFH_NO_HASH
const SFH_NO_HASH
Definition: Defines.php:170
Parser\handleDoubleUnderscore
handleDoubleUnderscore( $text)
Strip double-underscore items like NOGALLERY and NOTOC Fills $this->mDoubleUnderscores,...
Definition: Parser.php:4039
Parser\__construct
__construct(ServiceOptions $svcOptions, MagicWordFactory $magicWordFactory, Language $contLang, ParserFactory $factory, string $urlProtocols, SpecialPageFactory $spFactory, LinkRendererFactory $linkRendererFactory, NamespaceInfo $nsInfo, LoggerInterface $logger, BadFileLookup $badFileLookup, LanguageConverterFactory $languageConverterFactory, HookContainer $hookContainer, TidyDriverBase $tidy, WANObjectCache $wanCache, UserOptionsLookup $userOptionsLookup, UserFactory $userFactory, TitleFormatter $titleFormatter)
Constructing parsers directly is not allowed! Use a ParserFactory.
Definition: Parser.php:409
Sanitizer\normalizeSectionNameWhitespace
static normalizeSectionNameWhitespace( $section)
Normalizes whitespace in a section name, such as might be returned by Parser::stripSectionName(),...
Definition: Sanitizer.php:1104
OT_PREPROCESS
const OT_PREPROCESS
Definition: Defines.php:159
Parser\normalizeSectionName
static normalizeSectionName( $text)
Apply the same normalization as code making links to this section would.
Definition: Parser.php:6100
$file
if(PHP_SAPI !='cli-server') if(!isset( $_SERVER['SCRIPT_FILENAME'])) $file
Item class for a filearchive table row.
Definition: router.php:42
Parser\fetchFileNoRegister
fetchFileNoRegister(LinkTarget $link, array $options=[])
Helper function for fetchFileAndTitle.
Definition: Parser.php:3773
wfHostname
wfHostname()
Get host name of the current machine, for use in error reporting.
Definition: GlobalFunctions.php:1245
Parser\recursiveTagParseFully
recursiveTagParseFully( $text, $frame=false)
Fully parse wikitext to fully parsed HTML.
Definition: Parser.php:865
Parser\$specialPageFactory
SpecialPageFactory $specialPageFactory
Definition: Parser.php:321
Parser\nextLinkID
nextLinkID()
Definition: Parser.php:1116
Parser\getTargetLanguage
getTargetLanguage()
Get the target language for the content being parsed.
Definition: Parser.php:1145
User\newFromName
static newFromName( $name, $validate='valid')
Definition: User.php:602
Parser\$mStripList
$mStripList
Definition: Parser.php:156
wfMessage
wfMessage( $key,... $params)
This is the function for getting translated interface messages.
Definition: GlobalFunctions.php:1182
MediaWiki\Linker\LinkRendererFactory
Factory to create LinkRender objects.
Definition: LinkRendererFactory.php:34
SpecialPage\getTitleFor
static getTitleFor( $name, $subpage=false, $fragment='')
Get a localised Title object for a specified special page name If you don't need a full Title object,...
Definition: SpecialPage.php:107
Parser\startExternalParse
startExternalParse(?PageReference $page, ParserOptions $options, $outputType, $clearState=true, $revId=null)
Set up some variables which are usually set up in parse() so that an external function can call some ...
Definition: Parser.php:4778
Parser\guessSectionNameFromWikiText
guessSectionNameFromWikiText( $text)
Try to guess the section anchor name based on a wikitext fragment presumably extracted from a heading...
Definition: Parser.php:6059
Parser\setDefaultSort
setDefaultSort( $sort)
Mutator for $mDefaultSort.
Definition: Parser.php:5992
Parser\getPage
getPage()
Returns the page used as context for parsing, e.g.
Definition: Parser.php:1034
Preprocessor_Hash
Differences from DOM schema:
Definition: Preprocessor_Hash.php:43
StripState
Definition: StripState.php:29
Parser\getExternalLinkRel
static getExternalLinkRel( $url=false, LinkTarget $title=null)
Get the rel attribute for a particular external link.
Definition: Parser.php:2220
Parser\replaceVariables
replaceVariables( $text, $frame=false, $argsOnly=false)
Replace magic variables, templates, and template arguments with the appropriate text.
Definition: Parser.php:2897
Parser\MARKER_PREFIX
const MARKER_PREFIX
Definition: Parser.php:146
Parser\getFunctionSynonyms
getFunctionSynonyms()
Definition: Parser.php:5581
Parser\$mInParse
bool string $mInParse
Recursive call protection.
Definition: Parser.php:298
Parser\doQuotes
doQuotes( $text)
Helper function for handleAllQuotes()
Definition: Parser.php:1953
Linker\tocLine
static tocLine( $anchor, $tocline, $tocnumber, $level, $sectionIndex=false)
parameter level defines if we are on an indentation level
Definition: Linker.php:1863
Parser\startParse
startParse(?PageReference $page, ParserOptions $options, $outputType, $clearState=true)
Definition: Parser.php:4793
MediaWiki\Languages\LanguageConverterFactory
An interface for creating language converters.
Definition: LanguageConverterFactory.php:46
Page\PageReference
Interface for objects (potentially) representing a page that can be viewable and linked to on a wiki.
Definition: PageReference.php:49
Parser\$svcOptions
ServiceOptions $svcOptions
This is called $svcOptions instead of $options like elsewhere to avoid confusion with $mOptions,...
Definition: Parser.php:333
MediaWiki\User\UserIdentity
Interface for objects representing user identity.
Definition: UserIdentity.php:39
Linker\tocList
static tocList( $toc, Language $lang=null)
Wraps the TOC in a div with ARIA navigation role and provides the hide/collapse JavaScript.
Definition: Linker.php:1899
Parser\SFH_OBJECT_ARGS
const SFH_OBJECT_ARGS
Definition: Parser.php:92
Parser\OT_WIKI
const OT_WIKI
Definition: Parser.php:122
Parser\getTags
getTags()
Accessor.
Definition: Parser.php:5573
Parser\getStripList
getStripList()
Get a list of strippable XML-like elements.
Definition: Parser.php:1319
Parser\initializeVariables
initializeVariables()
Initialize the magic variables (like CURRENTMONTHNAME) and substitution modifiers.
Definition: Parser.php:2846
PPFrame\NO_TEMPLATES
const NO_TEMPLATES
Definition: PPFrame.php:30
Preprocessor
Definition: Preprocessor.php:27
Parser\getOptions
getOptions()
Definition: Parser.php:1087
MediaWiki\Languages\LanguageNameUtils
A service that provides utilities to do with language names and codes.
Definition: LanguageNameUtils.php:42
PPFrame\newChild
newChild( $args=false, $title=false, $indexOffset=0)
Create a child frame.
Parser\getFunctionLang
getFunctionLang()
Get a language object for use in parser functions such as {{FORMATNUM:}}.
Definition: Parser.php:1133
StringUtils\replaceMarkup
static replaceMarkup( $search, $replace, $text)
More or less "markup-safe" str_replace() Ignores any instances of the separator inside <....
Definition: StringUtils.php:268
Parser\$mRevisionRecordObject
RevisionRecord null $mRevisionRecordObject
Definition: Parser.php:275
Parser\Options
Options( $x=null)
Accessor/mutator for the ParserOptions object.
Definition: Parser.php:1107
NS_SPECIAL
const NS_SPECIAL
Definition: Defines.php:53
Parser\preSaveTransform
preSaveTransform( $text, PageReference $page, UserIdentity $user, ParserOptions $options, $clearState=true)
Transform wiki markup when saving a page by doing "\\r\\n" -> "\\n" conversion, substituting signatur...
Definition: Parser.php:4526
Parser\lock
lock()
Lock the current instance of the parser.
Definition: Parser.php:6282
Parser\getDefaultSort
getDefaultSort()
Accessor for $mDefaultSort Will use the empty string if none is set.
Definition: Parser.php:6008
Parser\$mFunctionSynonyms
$mFunctionSynonyms
Definition: Parser.php:155
Parser\$hookRunner
HookRunner $hookRunner
Definition: Parser.php:351
Parser\$nsInfo
NamespaceInfo $nsInfo
Definition: Parser.php:339
Parser\makeKnownLinkHolder
makeKnownLinkHolder(LinkTarget $nt, $text='', $trail='', $prefix='')
Render a forced-blue link inline; protect against double expansion of URLs if we're in a mode that pr...
Definition: Parser.php:2742
Parser\makeLegacyAnchor
makeLegacyAnchor( $sectionName)
Definition: Parser.php:6038
Parser\fuzzTestSrvus
fuzzTestSrvus( $text, PageReference $page, ParserOptions $options, $outputType=self::OT_HTML)
Strip/replaceVariables/unstrip for preprocessor regression testing.
Definition: Parser.php:6158
Parser\setHook
setHook( $tag, callable $callback)
Create an HTML-style tag, e.g.
Definition: Parser.php:4858
Parser\$mHeadings
$mHeadings
Definition: Parser.php:226
Parser\$userOptionsLookup
UserOptionsLookup $userOptionsLookup
Definition: Parser.php:357
Parser\interwikiTransclude
interwikiTransclude(LinkTarget $link, $action)
Transclude an interwiki link.
Definition: Parser.php:3796
Parser\getTitle
getTitle()
Definition: Parser.php:998
Parser\$mVariables
MagicWordArray $mVariables
Definition: Parser.php:173
wfDeprecatedMsg
wfDeprecatedMsg( $msg, $version=false, $component=false, $callerOffset=2)
Log a deprecation warning with arbitrary message text.
Definition: GlobalFunctions.php:1028
MWException
MediaWiki exception.
Definition: MWException.php:29
Parser\TOC_START
const TOC_START
Definition: Parser.php:149
Parser\$ot
$ot
Definition: Parser.php:262
Parser\getRevisionRecordObject
getRevisionRecordObject()
Get the revision record object for $this->mRevisionId.
Definition: Parser.php:5865
MediaWiki\Config\ServiceOptions
A class for passing options to services.
Definition: ServiceOptions.php:27
wfDeprecated
wfDeprecated( $function, $version=false, $component=false, $callerOffset=2)
Logs a warning that $function is deprecated.
Definition: GlobalFunctions.php:996
Parser\OT_MSG
const OT_MSG
Definition: Parser.php:124
Parser\getPreloadText
getPreloadText( $text, PageReference $page, ParserOptions $options, $params=[])
Process the wikitext for the "?preload=" feature.
Definition: Parser.php:952
MediaWiki\User\UserIdentity\isRegistered
isRegistered()
Parser\firstCallInit
firstCallInit()
Used to do various kinds of initialisation on the first call of the parser.
Definition: Parser.php:528
Parser\$mProfiler
SectionProfiler $mProfiler
Definition: Parser.php:301
Parser\getFlatSectionInfo
getFlatSectionInfo( $text)
Get an array of preprocessor section information.
Definition: Parser.php:5801
Parser\$mMarkerIndex
$mMarkerIndex
Definition: Parser.php:161
BlockLevelPass\doBlockLevels
static doBlockLevels( $text, $lineStart)
Make lists from lines starting with ':', '*', '#', etc.
Definition: BlockLevelPass.php:52
Parser\getCustomDefaultSort
getCustomDefaultSort()
Accessor for $mDefaultSort Unlike getDefaultSort(), will return false if none is set.
Definition: Parser.php:6023
wfUrlProtocolsWithoutProtRel
wfUrlProtocolsWithoutProtRel()
Like wfUrlProtocols(), but excludes '//' from the protocol list.
Definition: GlobalFunctions.php:747
Parser\handleTables
handleTables( $text)
Parse the wiki syntax used to render tables.
Definition: Parser.php:1353
$matches
$matches
Definition: NoLocalSettings.php:24
CoreTagHooks\register
static register( $parser)
Definition: CoreTagHooks.php:36
Parser\$contLang
Language $contLang
Definition: Parser.php:312
Parser\makeAnchor
static makeAnchor( $sectionName)
Definition: Parser.php:6034
StringUtils\explode
static explode( $separator, $subject)
Workalike for explode() with limited memory usage.
Definition: StringUtils.php:326
PPNode
There are three types of nodes:
Definition: PPNode.php:35
Parser\$factory
ParserFactory $factory
Definition: Parser.php:318
Parser\replaceLinkHoldersPrivate
replaceLinkHoldersPrivate(&$text, $options=0)
Replace "<!--LINK-->" link placeholders with actual links, in the buffer Placeholders created in Link...
Definition: Parser.php:4985
LinkHolderArray
Definition: LinkHolderArray.php:33
Parser\__clone
__clone()
Allow extensions to clean up when the parser is cloned.
Definition: Parser.php:498
NS_TEMPLATE
const NS_TEMPLATE
Definition: Defines.php:74
PPFrame\RECOVER_ORIG
const RECOVER_ORIG
Definition: PPFrame.php:36
Linker\makeHeadline
static makeHeadline( $level, $attribs, $anchor, $html, $link, $fallbackAnchor=false)
Create a headline for content.
Definition: Linker.php:1974
Parser\getHookContainer
getHookContainer()
Get a HookContainer capable of returning metadata about hooks or running extension hooks.
Definition: Parser.php:1656
Parser\callParserFunction
callParserFunction(PPFrame $frame, $function, array $args=[])
Call a parser function and return an array with text and flags.
Definition: Parser.php:3349
Parser\extensionSubstitution
extensionSubstitution(array $params, PPFrame $frame)
Return the text to be used for a given extension tag.
Definition: Parser.php:3922
Linker\tocLineEnd
static tocLineEnd()
End a Table Of Contents line.
Definition: Linker.php:1887
$args
if( $line===false) $args
Definition: mcc.php:124
MapCacheLRU
Handles a simple LRU key/value map with a maximum number of entries.
Definition: MapCacheLRU.php:37
Parser\$mLangLinkLanguages
array $mLangLinkLanguages
Array with the language name of each language link (i.e.
Definition: Parser.php:282
Parser\markerSkipCallback
markerSkipCallback( $s, callable $callback)
Call a callback function on all regions of the given text that are not inside strip markers,...
Definition: Parser.php:6208
Parser\limitationWarn
limitationWarn( $limitationType, $current='', $max='')
Warn the user when a parser limitation is reached Will warn at most once the user per limitation type...
Definition: Parser.php:2948
Parser\TOC_END
const TOC_END
Definition: Parser.php:150
MediaWiki\User\UserIdentity\getName
getName()
$title
$title
Definition: testCompression.php:38
Parser\recursiveTagParse
recursiveTagParse( $text, $frame=false)
Half-parse wikitext to half-parsed HTML.
Definition: Parser.php:841
Linker\makeExternalLink
static makeExternalLink( $url, $text, $escape=true, $linktype='', $attribs=[], $title=null)
Make an external link.
Definition: Linker.php:1015
Parser\finalizeHeadings
finalizeHeadings( $text, $origText, $isMain=true)
This function accomplishes several tasks: 1) Auto-number headings if that option is enabled 2) Add an...
Definition: Parser.php:4112
Title\makeTitle
static makeTitle( $ns, $title, $fragment='', $interwiki='')
Create a new Title from a namespace index and a DB key.
Definition: Title.php:680
Parser\$mHighestExpansionDepth
$mHighestExpansionDepth
Definition: Parser.php:222
SectionProfiler
Arbitrary section name based PHP profiling.
Definition: SectionProfiler.php:33
Parser\cleanSig
cleanSig( $text, $parsing=false)
Clean up signature text.
Definition: Parser.php:4719
RequestContext
Group all the pieces relevant to the context of a request into one instance @newable.
Definition: RequestContext.php:41
Parser\$mUser
User $mUser
Definition: Parser.php:241
Parser\$mImageParamsMagicArray
$mImageParamsMagicArray
Definition: Parser.php:159
SFH_OBJECT_ARGS
const SFH_OBJECT_ARGS
Definition: Defines.php:171
Parser\handleInternalLinks
handleInternalLinks( $text)
Process [[ ]] wikilinks.
Definition: Parser.php:2413
Parser\$mTplRedirCache
$mTplRedirCache
Definition: Parser.php:224
Parser\$tidy
TidyDriverBase $tidy
Definition: Parser.php:354
Parser\$mFirstCall
bool $mFirstCall
Whether firstCallInit still needs to be called.
Definition: Parser.php:166
ParserOptions\getPreSaveTransform
getPreSaveTransform()
Transform wiki markup when saving the page?
Definition: ParserOptions.php:670
Parser\getStripState
getStripState()
Definition: Parser.php:1327
Parser\getContentLanguage
getContentLanguage()
Get the content language that this Parser is using.
Definition: Parser.php:1223
Parser\OT_PLAIN
const OT_PLAIN
Definition: Parser.php:126
$wgTitle
$wgTitle
Definition: Setup.php:825
Parser\handleMagicLinks
handleMagicLinks( $text)
Replace special strings like "ISBN xxx" and "RFC xxx" with magic external links.
Definition: Parser.php:1728
Linker\splitTrail
static splitTrail( $trail)
Split a link trail, return the "inside" portion and the remainder of the trail as a two-element array...
Definition: Linker.php:1995
MediaWiki\Tidy\TidyDriverBase
Base class for HTML cleanup utilities.
Definition: TidyDriverBase.php:8
Parser\insertStripItem
insertStripItem( $text)
Add an item to the strip state Returns the unique tag which must be inserted into the stripped text T...
Definition: Parser.php:1340
Parser\getFreshParser
getFreshParser()
Return this parser if it is not doing anything, otherwise get a fresh parser.
Definition: Parser.php:6329
Parser\getRevisionUser
getRevisionUser()
Get the name of the user that edited the last revision.
Definition: Parser.php:5946
Parser\setOptions
setOptions(ParserOptions $options)
Mutator for the ParserOptions object.
Definition: Parser.php:1096
Parser\getImageParams
getImageParams( $handler)
Definition: Parser.php:5178
Parser\$mAutonumber
$mAutonumber
Definition: Parser.php:194
Parser\fuzzTestPst
fuzzTestPst( $text, PageReference $page, ParserOptions $options)
Definition: Parser.php:6176
Parser\replaceLinkHolders
replaceLinkHolders(&$text, $options=0)
Replace "<!--LINK-->" link placeholders with actual links, in the buffer Placeholders created in Link...
Definition: Parser.php:4974
Parser\addTrackingCategory
addTrackingCategory( $msg)
Definition: Parser.php:4093
Parser\getUrlProtocols
getUrlProtocols()
Definition: Parser.php:5589
Parser\incrementIncludeSize
incrementIncludeSize( $type, $size)
Increment an include size counter.
Definition: Parser.php:4014
Parser\getTargetLanguageConverter
getTargetLanguageConverter()
Shorthand for getting a Language Converter for Target language.
Definition: Parser.php:1632
ParserFactory
Definition: ParserFactory.php:36
$content
$content
Definition: router.php:76
CoreParserFunctions\register
static register( $parser)
Definition: CoreParserFunctions.php:37
Parser\makeFreeExternalLink
makeFreeExternalLink( $url, $numPostProto)
Make a free external link, given a user-supplied URL.
Definition: Parser.php:1842
Parser\CONSTRUCTOR_OPTIONS
const CONSTRUCTOR_OPTIONS
Definition: Parser.php:365
$s
foreach( $mmfl['setupFiles'] as $fileName) if( $queue) if(empty( $mmfl['quiet'])) $s
Definition: mergeMessageFileList.php:206
NS_MEDIA
const NS_MEDIA
Definition: Defines.php:52
PPFrame\expand
expand( $root, $flags=0)
Expand a document tree node.
ILanguageConverter
The shared interface for all language converters.
Definition: ILanguageConverter.php:29
$wgNoFollowNsExceptions
$wgNoFollowNsExceptions
Namespaces in which $wgNoFollowLinks doesn't apply.
Definition: DefaultSettings.php:5022
$wgNoFollowLinks
$wgNoFollowLinks
If true, external URL links in wiki text will be given the rel="nofollow" attribute as a hint to sear...
Definition: DefaultSettings.php:5016
Parser\$mOutput
ParserOutput $mOutput
Definition: Parser.php:193
Parser\$mFunctionHooks
$mFunctionHooks
Definition: Parser.php:154
ParserFactory\$inParserFactory
static int $inParserFactory
Track calls to Parser constructor to aid in deprecation of direct Parser invocation.
Definition: ParserFactory.php:84
Parser\$mOptions
ParserOptions null $mOptions
Definition: Parser.php:250
Parser\$mRevisionUser
$mRevisionUser
Definition: Parser.php:268
Sanitizer\validateTagAttributes
static validateTagAttributes( $attribs, $element)
Take an array of attribute names and values and normalize or discard illegal values for the given ele...
Definition: Sanitizer.php:390
Parser\extractSections
extractSections( $text, $sectionId, $mode, $newText='')
Break wikitext input into sections, and either pull or replace some particular section's text.
Definition: Parser.php:5622
Hooks\runner
static runner()
Get a HookRunner instance for calling hooks using the new interfaces.
Definition: Hooks.php:173
Parser\OT_HTML
const OT_HTML
Definition: Parser.php:121
WANObjectCache
Multi-datacenter aware caching interface.
Definition: WANObjectCache.php:128
Parser\fuzzTestPreprocess
fuzzTestPreprocess( $text, PageReference $page, ParserOptions $options)
Definition: Parser.php:6186
PPFrame
Definition: PPFrame.php:28
$line
$line
Definition: mcc.php:119
Parser\EXT_LINK_URL_CLASS
const EXT_LINK_URL_CLASS
Definition: Parser.php:100
MediaWiki\Linker\LinkTarget\getDBkey
getDBkey()
Get the main part with underscores.
Parser\getUserIdentity
getUserIdentity()
Get an identity of the user for whom the parse is being made, if set.
Definition: Parser.php:1175
Parser\renderImageGallery
renderImageGallery( $text, array $params)
Renders an image gallery from a text with one line per image.
Definition: Parser.php:5014
StringUtils\delimiterExplode
static delimiterExplode( $startDelim, $endDelim, $separator, $subject, $nested=false)
Explode a string, but ignore any instances of the separator inside the given start and end delimiters...
Definition: StringUtils.php:59
OutputPage\setupOOUI
static setupOOUI( $skinName='default', $dir='ltr')
Helper function to setup the PHP implementation of OOUI to use in this request.
Definition: OutputPage.php:4153
Parser\magicLinkCallback
magicLinkCallback(array $m)
Definition: Parser.php:1762
Parser\fetchTemplateAndTitle
fetchTemplateAndTitle(LinkTarget $link)
Fetch the unparsed text of a template and register a reference to it.
Definition: Parser.php:3553
Parser\getUser
getUser()
Get a User object either from $this->mUser, if set, or from the ParserOptions object otherwise.
Definition: Parser.php:1164
wfEscapeWikiText
wfEscapeWikiText( $text)
Escapes the given text so that it may be output using addWikiText() without any linking,...
Definition: GlobalFunctions.php:1456
Parser\incrementExpensiveFunctionCount
incrementExpensiveFunctionCount()
Definition: Parser.php:4027
Parser\$mImageParams
$mImageParams
Definition: Parser.php:158
Parser\setFunctionHook
setFunctionHook( $id, callable $callback, $flags=0)
Create a function, e.g.
Definition: Parser.php:4925
Parser\setLinkID
setLinkID( $id)
Definition: Parser.php:1124
Sanitizer\cleanUrl
static cleanUrl( $url)
Definition: Sanitizer.php:1627
Parser\$magicWordFactory
MagicWordFactory $magicWordFactory
Definition: Parser.php:309
Parser\preprocessToDom
preprocessToDom( $text, $flags=0)
Get the document object model for the given wikitext.
Definition: Parser.php:2872
Parser\setUser
setUser(?UserIdentity $user)
Set the current user.
Definition: Parser.php:974
Parser
PHP Parser - Processes wiki markup (which uses a more user-friendly syntax, such as "[[link]]" for ma...
Definition: Parser.php:89
RequestContext\getMain
static getMain()
Get the RequestContext object associated with the main request.
Definition: RequestContext.php:484
MediaWiki\User\UserOptionsLookup
Provides access to user options.
Definition: UserOptionsLookup.php:29
Title\newFromLinkTarget
static newFromLinkTarget(LinkTarget $linkTarget, $forceClone='')
Returns a Title given a LinkTarget.
Definition: Title.php:327
Parser\getMagicWordFactory
getMagicWordFactory()
Get the MagicWordFactory that this Parser is using.
Definition: Parser.php:1213
Parser\argSubstitution
argSubstitution(array $piece, PPFrame $frame)
Triple brace replacement – used for template arguments.
Definition: Parser.php:3868
Linker\makeMediaLinkFile
static makeMediaLinkFile(LinkTarget $title, $file, $html='')
Create a direct link to a given uploaded file.
Definition: Linker.php:947
Sanitizer\fixTagAttributes
static fixTagAttributes( $text, $element, $sorted=false)
Take a tag soup fragment listing an HTML element's attributes and normalize it to well-formed XML,...
Definition: Sanitizer.php:703
Parser\fetchFileAndTitle
fetchFileAndTitle(LinkTarget $link, array $options=[])
Fetch a file and its title and register a reference to it.
Definition: Parser.php:3746
Preprocessor\resetParser
resetParser(?Parser $parser)
Allows resetting the internal Parser reference after Preprocessor is cloned.
Definition: Preprocessor.php:95
Parser\setOutputType
setOutputType( $ot)
Mutator for the output type.
Definition: Parser.php:1052
$lines
if(!file_exists( $CREDITS)) $lines
Definition: updateCredits.php:45
Parser\OT_PREPROCESS
const OT_PREPROCESS
Definition: Parser.php:123
Parser\getExternalLinkAttribs
getExternalLinkAttribs( $url)
Get an associative array of additional HTML attributes appropriate for a particular external link.
Definition: Parser.php:2242
Parser\isCurrentRevisionOfTitleCached
isCurrentRevisionOfTitleCached(LinkTarget $link)
Definition: Parser.php:3514
Parser\statelessFetchTemplate
static statelessFetchTemplate( $page, $parser=false)
Static function to get a template Can be overridden via ParserOptions::setTemplateCallback().
Definition: Parser.php:3599
Parser\getUserSig
getUserSig(UserIdentity $user, $nickname=false, $fancySig=null)
Fetch the user's signature text, if any, and normalize to validated, ready-to-insert wikitext.
Definition: Parser.php:4643
Parser\$mStripState
StripState $mStripState
Definition: Parser.php:200
Parser\internalParse
internalParse( $text, $isMain=true, $frame=false)
Helper function for parse() that transforms wiki markup into half-parsed HTML.
Definition: Parser.php:1563
Parser\validateSig
validateSig( $text)
Check that the user's signature contains no bad XML.
Definition: Parser.php:4704
Parser\$mPPNodeCount
$mPPNodeCount
Definition: Parser.php:215
Title
Represents a title within MediaWiki.
Definition: Title.php:49
Parser\resetOutput
resetOutput()
Reset the ParserOutput.
Definition: Parser.php:588
Parser\stripOuterParagraph
static stripOuterParagraph( $html)
Strip outer.
Definition: Parser.php:6310
Parser\$mVarCache
$mVarCache
Definition: Parser.php:157
Parser\$mDefaultSort
$mDefaultSort
Definition: Parser.php:223
Parser\$mExpensiveFunctionCount
$mExpensiveFunctionCount
Definition: Parser.php:230
Parser\normalizeLinkUrl
static normalizeLinkUrl( $url)
Replace unusual escape codes in a URL with their equivalent characters.
Definition: Parser.php:2273
MediaWiki\Preferences\SignatureValidator
Definition: SignatureValidator.php:40
Parser\$mExtLinkBracketedRegex
$mExtLinkBracketedRegex
Definition: Parser.php:181
wfMatchesDomainList
wfMatchesDomainList( $url, $domains)
Check whether a given URL has a domain that occurs in a given set of domains.
Definition: GlobalFunctions.php:860
Parser\$mIncludeSizes
$mIncludeSizes
Definition: Parser.php:213
$cache
$cache
Definition: mcc.php:33
MalformedTitleException
MalformedTitleException is thrown when a TitleParser is unable to parse a title string.
Definition: MalformedTitleException.php:26
Parser\getSection
getSection( $text, $sectionId, $defaultText='')
This function returns the text of a section, specified by a number ($section).
Definition: Parser.php:5751
Xml\isWellFormedXmlFragment
static isWellFormedXmlFragment( $text)
Check if a string is a well-formed XML fragment.
Definition: Xml.php:743
Parser\preprocess
preprocess( $text, ?PageReference $page, ParserOptions $options, $revid=null, $frame=false)
Expand templates and variables in the text, producing valid, static wikitext.
Definition: Parser.php:909
Parser\$mRevisionTimestamp
$mRevisionTimestamp
Definition: Parser.php:266
Parser\replaceSection
replaceSection( $oldText, $sectionId, $newText)
This function returns $oldtext after the content of the section specified by $section has been replac...
Definition: Parser.php:5768
Sanitizer\ID_PRIMARY
const ID_PRIMARY
Tells escapeUrlForHtml() to encode the ID using the wiki's primary encoding.
Definition: Sanitizer.php:70
Parser\$logger
LoggerInterface $logger
Definition: Parser.php:342
TitleFormatter
A title formatter service for MediaWiki.
Definition: TitleFormatter.php:35
ParserOptions\getUser
getUser()
Current user.
Definition: ParserOptions.php:1015
PPFrame\virtualBracketedImplode
virtualBracketedImplode( $start, $sep, $end,... $params)
Virtual implode with brackets.
Parser\armorLinks
armorLinks( $text)
Insert a NOPARSE hacky thing into any inline links in a chunk that's going to go through further pars...
Definition: Parser.php:2766
Linker\tocUnindent
static tocUnindent( $level)
Finish one or more sublevels on the Table of Contents.
Definition: Linker.php:1848
Linker\makeImageLink
static makeImageLink(Parser $parser, LinkTarget $title, $file, $frameParams=[], $handlerParams=[], $time=false, $query="", $widthOption=null)
Given parameters derived from [[Image:Foo|options...]], generate the HTML that that syntax inserts in...
Definition: Linker.php:300
getTitle
getTitle()
Definition: RevisionSearchResultTrait.php:81
Parser\parse
parse( $text, PageReference $page, ParserOptions $options, $linestart=true, $clearState=true, $revid=null)
Convert wikitext to HTML Do not call this function recursively.
Definition: Parser.php:611
MediaWiki\Cache\CacheKeyHelper
Helper class for mapping value objects representing basic entities to cache keys.
Definition: CacheKeyHelper.php:43
Parser\getBadFileLookup
getBadFileLookup()
Get the BadFileLookup instance that this Parser is using.
Definition: Parser.php:1233
NS_CATEGORY
const NS_CATEGORY
Definition: Defines.php:78
Parser\getOutput
getOutput()
Definition: Parser.php:1079
StringUtils\delimiterReplace
static delimiterReplace( $startDelim, $endDelim, $replace, $subject, $flags='')
Perform an operation equivalent to preg_replace() with flags.
Definition: StringUtils.php:248
Parser\handleInternalLinks2
handleInternalLinks2(&$s)
Process [[ ]] wikilinks (RIL)
Definition: Parser.php:2423
Parser\getOutputType
getOutputType()
Accessor for the output type.
Definition: Parser.php:1043
Parser\$mGeneratedPPNodeCount
$mGeneratedPPNodeCount
Definition: Parser.php:220
Parser\getHookRunner
getHookRunner()
Get a HookRunner for calling core hooks.
Definition: Parser.php:1668
PPFrame\getArgument
getArgument( $name)
Get an argument to this frame by name.
TextContent\normalizeLineEndings
static normalizeLineEndings( $text)
Do a "\\r\\n" -> "\\n" and "\\r" -> "\\n" transformation as well as trim trailing whitespace.
Definition: TextContent.php:203
Parser\getSectionNameFromStrippedText
static getSectionNameFromStrippedText( $text)
Definition: Parser.php:6027
Sanitizer\escapeIdForLink
static escapeIdForLink( $id)
Given a section name or other user-generated or otherwise unsafe string, escapes it to be a valid URL...
Definition: Sanitizer.php:838
Linker\normalizeSubpageLink
static normalizeSubpageLink( $contextTitle, $target, &$text)
Definition: Linker.php:1658
MediaWiki\Config\ServiceOptions\get
get( $key)
Definition: ServiceOptions.php:93
MediaWiki\HookContainer\HookContainer
HookContainer class.
Definition: HookContainer.php:45
Title\castFromPageReference
static castFromPageReference(?PageReference $pageReference)
Return a Title for a given Reference.
Definition: Title.php:377
Parser\SFH_NO_HASH
const SFH_NO_HASH
Definition: Parser.php:91
CoreMagicVariables\expand
static expand(Parser $parser, string $id, int $ts, NamespaceInfo $nsInfo, ServiceOptions $svcOptions, LoggerInterface $logger)
Expand the magic variable given by $index.
Definition: CoreMagicVariables.php:48
Parser\$mShowToc
$mShowToc
Definition: Parser.php:232
ImageGalleryBase\factory
static factory( $mode=false, IContextSource $context=null)
Get a new image gallery.
Definition: ImageGalleryBase.php:116
Sanitizer\decodeTagAttributes
static decodeTagAttributes( $text)
Return an associative array of attribute names and values from a partial tag string.
Definition: Sanitizer.php:1004
Parser\guessSectionNameFromStrippedText
static guessSectionNameFromStrippedText( $text)
Like guessSectionNameFromWikiText(), but takes already-stripped text as input.
Definition: Parser.php:6089
Parser\getTemplateDom
getTemplateDom(LinkTarget $title)
Get the semi-parsed DOM representation of a template with a given title, and its redirect destination...
Definition: Parser.php:3439
Parser\$languageConverterFactory
LanguageConverterFactory $languageConverterFactory
Definition: Parser.php:315
NamespaceInfo
This is a utility class for dealing with namespaces that encodes all the "magic" behaviors of them ba...
Definition: NamespaceInfo.php:35
PPFrame\isTemplate
isTemplate()
Return true if the frame is a template frame.
MediaWiki\HookContainer\HookRunner
This class provides an implementation of the core hook interfaces, forwarding hook calls to HookConta...
Definition: HookRunner.php:554
Parser\parseLinkParameter
parseLinkParameter( $value)
Parse the value of 'link' parameter in image syntax ([[File:Foo.jpg|link=<value>]]).
Definition: Parser.php:5468
$t
$t
Definition: testCompression.php:74
Title\castFromLinkTarget
static castFromLinkTarget( $linkTarget)
Same as newFromLinkTarget, but if passed null, returns null.
Definition: Title.php:351
Title\legalChars
static legalChars()
Get a regex character class describing the legal characters in a link.
Definition: Title.php:766
Sanitizer\decodeCharReferences
static decodeCharReferences( $text)
Decode any character references, numeric or named entities, in the text and return a UTF-8 string.
Definition: Sanitizer.php:1228
Parser\getRevisionTimestamp
getRevisionTimestamp()
Get the timestamp associated with the current revision, adjusted for the default server-local timesta...
Definition: Parser.php:5920
Html\element
static element( $element, $attribs=[], $contents='')
Identical to rawElement(), but HTML-escapes $contents (like Xml::element()).
Definition: Html.php:234
Parser\expandMagicVariable
expandMagicVariable( $index, $frame=false)
Return value of a magic variable (like PAGENAME)
Definition: Parser.php:2793
NS_FILE
const NS_FILE
Definition: Defines.php:70
MediaWiki\Linker\LinkTarget
Definition: LinkTarget.php:26
Parser\pstPass2
pstPass2( $text, UserIdentity $user)
Pre-save transform helper function.
Definition: Parser.php:4566
Parser\$mPreprocessor
Preprocessor $mPreprocessor
Definition: Parser.php:187
Parser\parseWidthParam
static parseWidthParam( $value, $parseHeight=true)
Parsed a width param of imagelink like 300px or 200x300px.
Definition: Parser.php:6253
RawMessage
Variant of the Message class.
Definition: RawMessage.php:35
Parser\cleanSigInSig
static cleanSigInSig( $text)
Strip 3, 4 or 5 tildes out of signatures.
Definition: Parser.php:4762
Parser\setTitle
setTitle(Title $t=null)
Set the context title.
Definition: Parser.php:989
User
The User object encapsulates all of the user-specific settings (user_id, name, rights,...
Definition: User.php:68
Parser\replaceLinkHoldersText
replaceLinkHoldersText( $text)
Replace "<!--LINK-->" link placeholders with plain text of links (not HTML-formatted).
Definition: Parser.php:4996
Parser\normalizeUrlComponent
static normalizeUrlComponent( $component, $unsafe)
Definition: Parser.php:2332
Parser\clearTagHooks
clearTagHooks()
Remove all tag hooks.
Definition: Parser.php:4876
MWTimestamp\getLocalInstance
static getLocalInstance( $ts=false)
Get a timestamp instance in the server local timezone ($wgLocaltimezone)
Definition: MWTimestamp.php:173
OT_WIKI
const OT_WIKI
Definition: Defines.php:158
MediaWiki\User\UserFactory
Creates User objects.
Definition: UserFactory.php:41
Linker\makeExternalImage
static makeExternalImage( $url, $alt='')
Return the code for images which were added via external links, via Parser::maybeMakeExternalImage().
Definition: Linker.php:244
Parser\$mTitle
Title null $mTitle
Since 1.34, leaving mTitle uninitialized or setting mTitle to null is deprecated.
Definition: Parser.php:259
Parser\getLinkRenderer
getLinkRenderer()
Get a LinkRenderer instance to make links with.
Definition: Parser.php:1195
Language
Internationalisation code See https://www.mediawiki.org/wiki/Special:MyLanguage/Localisation for more...
Definition: Language.php:42
User\getUser
getUser()
Definition: User.php:4274
Parser\$mRevisionId
$mRevisionId
Definition: Parser.php:264
RequestContext\setTitle
setTitle(Title $title=null)
Definition: RequestContext.php:173
Parser\setPage
setPage(?PageReference $t=null)
Set the page used as context for parsing, e.g.
Definition: Parser.php:1011
Parser\$mRevisionSize
$mRevisionSize
Definition: Parser.php:270
Parser\getRevisionId
getRevisionId()
Get the ID of the revision we are parsing.
Definition: Parser.php:5855
Parser\fetchCurrentRevisionRecordOfTitle
fetchCurrentRevisionRecordOfTitle(LinkTarget $link)
Fetch the current revision of a given title as a RevisionRecord.
Definition: Parser.php:3484
MediaWiki\Revision\SlotRecord
Value object representing a content slot associated with a page revision.
Definition: SlotRecord.php:40
OT_PLAIN
const OT_PLAIN
Definition: Defines.php:161
Parser\clearState
clearState()
Clear Parser state.
Definition: Parser.php:540
Parser\guessLegacySectionNameFromWikiText
guessLegacySectionNameFromWikiText( $text)
Same as guessSectionNameFromWikiText(), but produces legacy anchors instead, if possible.
Definition: Parser.php:6076
MWHttpRequest\factory
static factory( $url, array $options=null, $caller=__METHOD__)
Generate a new request object.
Definition: MWHttpRequest.php:194
MediaWiki\Config\ServiceOptions\assertRequiredOptions
assertRequiredOptions(array $expectedKeys)
Assert that the list of options provided in this instance exactly match $expectedKeys,...
Definition: ServiceOptions.php:71
ParserOptions\newFromUser
static newFromUser( $user)
Get a ParserOptions object from a given user.
Definition: ParserOptions.php:1066
Parser\stripSectionName
stripSectionName( $text)
Strips a text string of wikitext for use in a section anchor.
Definition: Parser.php:6129
Parser\EXT_IMAGE_REGEX
const EXT_IMAGE_REGEX
Definition: Parser.php:107
Parser\getPreprocessor
getPreprocessor()
Get a preprocessor object.
Definition: Parser.php:1185
Parser\doBlockLevels
doBlockLevels( $text, $linestart)
Make lists from lines starting with ':', '*', '#', etc.
Definition: Parser.php:2780
Parser\$hookContainer
HookContainer $hookContainer
Definition: Parser.php:348
$type
$type
Definition: testCompression.php:52