MediaWiki  master
Parser.php
Go to the documentation of this file.
1 <?php
48 use Psr\Log\LoggerInterface;
49 use Wikimedia\IPUtils;
50 use Wikimedia\ScopedCallback;
51 
92 class Parser {
93 
94  # Flags for Parser::setFunctionHook
95  public const SFH_NO_HASH = 1;
96  public const SFH_OBJECT_ARGS = 2;
97 
98  # Constants needed for external link processing
99  # Everything except bracket, space, or control characters
100  # \p{Zs} is unicode 'separator, space' category. It covers the space 0x20
101  # as well as U+3000 is IDEOGRAPHIC SPACE for T21052
102  # \x{FFFD} is the Unicode replacement character, which the HTML5 spec
103  # uses to replace invalid HTML characters.
104  public const EXT_LINK_URL_CLASS = '[^][<>"\\x00-\\x20\\x7F\p{Zs}\x{FFFD}]';
105  # Simplified expression to match an IPv4 or IPv6 address, or
106  # at least one character of a host name (embeds EXT_LINK_URL_CLASS)
107  // phpcs:ignore Generic.Files.LineLength
108  private const EXT_LINK_ADDR = '(?:[0-9.]+|\\[(?i:[0-9a-f:.]+)\\]|[^][<>"\\x00-\\x20\\x7F\p{Zs}\x{FFFD}])';
109  # RegExp to make image URLs (embeds IPv6 part of EXT_LINK_ADDR)
110  // phpcs:ignore Generic.Files.LineLength
111  private const EXT_IMAGE_REGEX = '/^(http:\/\/|https:\/\/)((?:\\[(?i:[0-9a-f:.]+)\\])?[^][<>"\\x00-\\x20\\x7F\p{Zs}\x{FFFD}]+)
112  \\/([A-Za-z0-9_.,~%\\-+&;#*?!=()@\\x80-\\xFF]+)\\.((?i)gif|png|jpg|jpeg)$/Sxu';
113 
114  # Regular expression for a non-newline space
115  private const SPACE_NOT_NL = '(?:\t|&nbsp;|&\#0*160;|&\#[Xx]0*[Aa]0;|\p{Zs})';
116 
121  public const PTD_FOR_INCLUSION = Preprocessor::DOM_FOR_INCLUSION;
122 
123  # Allowed values for $this->mOutputType
124  # Parameter to startExternalParse().
125  public const OT_HTML = 1; # like parse()
126  public const OT_WIKI = 2; # like preSaveTransform()
127  public const OT_PREPROCESS = 3; # like preprocess()
128  public const OT_MSG = 3;
129  # like extractSections() - portions of the original are returned unchanged.
130  public const OT_PLAIN = 4;
131 
149  public const MARKER_SUFFIX = "-QINU`\"'\x7f";
150  public const MARKER_PREFIX = "\x7f'\"`UNIQ-";
151 
152  # Markers used for wrapping the table of contents
153  public const TOC_START = '<mw:toc>';
154  public const TOC_END = '</mw:toc>';
155 
156  # Persistent:
157  private $mTagHooks = [];
158  private $mFunctionHooks = [];
159  private $mFunctionSynonyms = [ 0 => [], 1 => [] ];
160  private $mStripList = [];
161  private $mVarCache = [];
162  private $mImageParams = [];
165  public $mMarkerIndex = 0;
170  public $mFirstCall = false;
171 
172  # Initialised by initializeVariables()
173 
177  private $mVariables;
178 
182  private $mSubstWords;
183 
184  # Initialised in constructor
186 
187  # Initialized in constructor
188 
191  private $mPreprocessor;
192 
193  # Cleared with clearState():
194 
197  private $mOutput;
198  private $mAutonumber;
199 
203  private $mStripState;
204 
208  private $mLinkHolders;
209 
214  public $mLinkID;
226  private $mDefaultSort;
229  public $mHeadings;
233  public $mExpensiveFunctionCount; # number of expensive parser function calls
235  public $mShowToc;
238  private $mTplDomCache;
239 
243  private $mUser;
244 
245  # Temporary
246  # These are variables reset at least once per parse regardless of $clearState
247 
252  public $mOptions;
253 
261  public $mTitle; # Title context, used for self-link rendering and similar things
262  private $mOutputType; # Output type, one of the OT_xxx constants
264  public $ot; # Shortcut alias, see setOutputType()
266  public $mRevisionId; # ID to display in {{REVISIONID}} tags
267 
268  public $mRevisionTimestamp; # The timestamp of the specified revision ID
270  public $mRevisionUser; # User to display in {{REVISIONUSER}} tag
272  public $mRevisionSize; # Size to display in {{REVISIONSIZE}} variable
274  public $mInputSize = false; # For {{PAGESIZE}} on current page.
275 
278 
285 
293 
300  public $mInParse = false;
301 
303  private $mProfiler;
304 
308  private $mLinkRenderer;
309 
312 
314  private $contLang;
315 
318 
320  private $factory;
321 
324 
327 
335  private $svcOptions;
336 
339 
341  private $nsInfo;
342 
344  private $logger;
345 
347  private $badFileLookup;
348 
350  private $hookContainer;
351 
353  private $hookRunner;
354 
356  private $tidy;
357 
360 
362  private $userFactory;
363 
366 
369 
373  public const CONSTRUCTOR_OPTIONS = [
374  // See documentation for the corresponding config options
375  'ArticlePath',
376  'EnableScaryTranscluding',
377  'ExtraInterlanguageLinkPrefixes',
378  'FragmentMode',
379  'LanguageCode',
380  'MaxSigChars',
381  'MaxTocLevel',
382  'MiserMode',
383  'ScriptPath',
384  'Server',
385  'ServerName',
386  'ShowHostnames',
387  'SignatureValidation',
388  'Sitename',
389  'StylePath',
390  'TranscludeCacheExpiry',
391  'PreprocessorCacheThreshold',
392  'DisableLangConversion',
393  ];
394 
419  public function __construct(
424  string $urlProtocols,
425  SpecialPageFactory $spFactory,
428  LoggerInterface $logger,
433  WANObjectCache $wanCache,
439  ) {
440  if ( ParserFactory::$inParserFactory === 0 ) {
441  // Direct construction of Parser was deprecated in 1.34 and
442  // removed in 1.36; use a ParserFactory instead.
443  throw new MWException( 'Direct construction of Parser not allowed' );
444  }
445  $svcOptions->assertRequiredOptions( self::CONSTRUCTOR_OPTIONS );
446  $this->svcOptions = $svcOptions;
447 
448  $this->mUrlProtocols = $urlProtocols;
449  $this->mExtLinkBracketedRegex = '/\[(((?i)' . $this->mUrlProtocols . ')' .
450  self::EXT_LINK_ADDR .
451  self::EXT_LINK_URL_CLASS . '*)\p{Zs}*([^\]\\x00-\\x08\\x0a-\\x1F\\x{FFFD}]*?)\]/Su';
452 
453  $this->magicWordFactory = $magicWordFactory;
454 
455  $this->contLang = $contLang;
456 
457  $this->factory = $factory;
458  $this->specialPageFactory = $spFactory;
459  $this->linkRendererFactory = $linkRendererFactory;
460  $this->nsInfo = $nsInfo;
461  $this->logger = $logger;
462  $this->badFileLookup = $badFileLookup;
463 
464  $this->languageConverterFactory = $languageConverterFactory;
465 
466  $this->hookContainer = $hookContainer;
467  $this->hookRunner = new HookRunner( $hookContainer );
468 
469  $this->tidy = $tidy;
470 
471  $this->mPreprocessor = new Preprocessor_Hash(
472  $this,
473  $wanCache,
474  [
475  'cacheThreshold' => $svcOptions->get( 'PreprocessorCacheThreshold' ),
476  'disableLangConversion' => $svcOptions->get( 'DisableLangConversion' ),
477  ]
478  );
479 
480  $this->userOptionsLookup = $userOptionsLookup;
481  $this->userFactory = $userFactory;
482  $this->titleFormatter = $titleFormatter;
483  $this->httpRequestFactory = $httpRequestFactory;
484  $this->trackingCategories = $trackingCategories;
485 
486  // These steps used to be done in "::firstCallInit()"
487  // (if you're chasing a reference from some old code)
489  CoreTagHooks::register( $this );
490  $this->initializeVariables();
491 
492  $this->hookRunner->onParserFirstCallInit( $this );
493  }
494 
498  public function __destruct() {
499  if ( isset( $this->mLinkHolders ) ) {
500  // @phan-suppress-next-line PhanTypeObjectUnsetDeclaredProperty
501  unset( $this->mLinkHolders );
502  }
503  // @phan-suppress-next-line PhanTypeSuspiciousNonTraversableForeach
504  foreach ( $this as $name => $value ) {
505  unset( $this->$name );
506  }
507  }
508 
512  public function __clone() {
513  $this->mInParse = false;
514 
515  // T58226: When you create a reference "to" an object field, that
516  // makes the object field itself be a reference too (until the other
517  // reference goes out of scope). When cloning, any field that's a
518  // reference is copied as a reference in the new object. Both of these
519  // are defined PHP5 behaviors, as inconvenient as it is for us when old
520  // hooks from PHP4 days are passing fields by reference.
521  foreach ( [ 'mStripState', 'mVarCache' ] as $k ) {
522  // Make a non-reference copy of the field, then rebind the field to
523  // reference the new copy.
524  $tmp = $this->$k;
525  $this->$k =& $tmp;
526  unset( $tmp );
527  }
528 
529  $this->mPreprocessor = clone $this->mPreprocessor;
530  $this->mPreprocessor->resetParser( $this );
531 
532  $this->hookRunner->onParserCloned( $this );
533  }
534 
542  public function firstCallInit() {
543  /*
544  * This method should be hard-deprecated once remaining calls are
545  * removed; it no longer does anything.
546  */
547  }
548 
554  public function clearState() {
555  $this->resetOutput();
556  $this->mAutonumber = 0;
557  $this->mLinkHolders = new LinkHolderArray(
558  $this,
560  $this->getHookContainer()
561  );
562  $this->mLinkID = 0;
563  $this->mRevisionTimestamp = null;
564  $this->mRevisionId = null;
565  $this->mRevisionUser = null;
566  $this->mRevisionSize = null;
567  $this->mRevisionRecordObject = null;
568  $this->mVarCache = [];
569  $this->mUser = null;
570  $this->mLangLinkLanguages = [];
571  $this->currentRevisionCache = null;
572 
573  $this->mStripState = new StripState( $this );
574 
575  # Clear these on every parse, T6549
576  $this->mTplRedirCache = [];
577  $this->mTplDomCache = [];
578 
579  $this->mShowToc = true;
580  $this->mForceTocPosition = false;
581  $this->mIncludeSizes = [
582  'post-expand' => 0,
583  'arg' => 0,
584  ];
585  $this->mPPNodeCount = 0;
586  $this->mGeneratedPPNodeCount = 0;
587  $this->mHighestExpansionDepth = 0;
588  $this->mDefaultSort = false;
589  $this->mHeadings = [];
590  $this->mDoubleUnderscores = [];
591  $this->mExpensiveFunctionCount = 0;
592 
593  $this->mProfiler = new SectionProfiler();
594 
595  $this->hookRunner->onParserClearState( $this );
596  }
597 
602  public function resetOutput() {
603  $this->mOutput = new ParserOutput;
604  $this->mOptions->registerWatcher( [ $this->mOutput, 'recordOption' ] );
605  }
606 
625  public function parse(
626  $text, PageReference $page, ParserOptions $options,
627  $linestart = true, $clearState = true, $revid = null
628  ) {
629  if ( $clearState ) {
630  // We use U+007F DELETE to construct strip markers, so we have to make
631  // sure that this character does not occur in the input text.
632  $text = strtr( $text, "\x7f", "?" );
633  $magicScopeVariable = $this->lock();
634  }
635  // Strip U+0000 NULL (T159174)
636  $text = str_replace( "\000", '', $text );
637 
638  $this->startParse( $page, $options, self::OT_HTML, $clearState );
639 
640  $this->currentRevisionCache = null;
641  $this->mInputSize = strlen( $text );
642  if ( $this->mOptions->getEnableLimitReport() ) {
643  $this->mOutput->resetParseStartTime();
644  }
645 
646  $oldRevisionId = $this->mRevisionId;
647  $oldRevisionRecordObject = $this->mRevisionRecordObject;
648  $oldRevisionTimestamp = $this->mRevisionTimestamp;
649  $oldRevisionUser = $this->mRevisionUser;
650  $oldRevisionSize = $this->mRevisionSize;
651  if ( $revid !== null ) {
652  $this->mRevisionId = $revid;
653  $this->mRevisionRecordObject = null;
654  $this->mRevisionTimestamp = null;
655  $this->mRevisionUser = null;
656  $this->mRevisionSize = null;
657  }
658 
659  $text = $this->internalParse( $text );
660  $this->hookRunner->onParserAfterParse( $this, $text, $this->mStripState );
661 
662  $text = $this->internalParseHalfParsed( $text, true, $linestart );
663 
671  if ( !$options->getDisableTitleConversion()
672  && !isset( $this->mDoubleUnderscores['nocontentconvert'] )
673  && !isset( $this->mDoubleUnderscores['notitleconvert'] )
674  && $this->mOutput->getDisplayTitle() === false
675  ) {
676  $titleText = $this->getTargetLanguageConverter()->getConvRuleTitle();
677  if ( $titleText === false ) {
678  $titleText = $this->getTargetLanguageConverter()->convertTitle( $page );
679  }
680  $this->mOutput->setTitleText(
681  htmlspecialchars( $titleText, ENT_NOQUOTES )
682  );
683  }
684 
685  # Compute runtime adaptive expiry if set
686  $this->mOutput->finalizeAdaptiveCacheExpiry();
687 
688  # Warn if too many heavyweight parser functions were used
689  if ( $this->mExpensiveFunctionCount > $this->mOptions->getExpensiveParserFunctionLimit() ) {
690  $this->limitationWarn( 'expensive-parserfunction',
691  $this->mExpensiveFunctionCount,
692  $this->mOptions->getExpensiveParserFunctionLimit()
693  );
694  }
695 
696  # Information on limits, for the benefit of users who try to skirt them
697  if ( $this->mOptions->getEnableLimitReport() ) {
698  $text .= $this->makeLimitReport();
699  }
700 
701  # Wrap non-interface parser output in a <div> so it can be targeted
702  # with CSS (T37247)
703  $class = $this->mOptions->getWrapOutputClass();
704  if ( $class !== false && !$this->mOptions->getInterfaceMessage() ) {
705  $this->mOutput->addWrapperDivClass( $class );
706  }
707 
708  $this->mOutput->setText( $text );
709 
710  $this->mRevisionId = $oldRevisionId;
711  $this->mRevisionRecordObject = $oldRevisionRecordObject;
712  $this->mRevisionTimestamp = $oldRevisionTimestamp;
713  $this->mRevisionUser = $oldRevisionUser;
714  $this->mRevisionSize = $oldRevisionSize;
715  $this->mInputSize = false;
716  $this->currentRevisionCache = null;
717 
718  return $this->mOutput;
719  }
720 
727  protected function makeLimitReport() {
728  $maxIncludeSize = $this->mOptions->getMaxIncludeSize();
729 
730  $cpuTime = $this->mOutput->getTimeSinceStart( 'cpu' );
731  if ( $cpuTime !== null ) {
732  $this->mOutput->setLimitReportData( 'limitreport-cputime',
733  sprintf( "%.3f", $cpuTime )
734  );
735  }
736 
737  $wallTime = $this->mOutput->getTimeSinceStart( 'wall' );
738  $this->mOutput->setLimitReportData( 'limitreport-walltime',
739  sprintf( "%.3f", $wallTime )
740  );
741 
742  $this->mOutput->setLimitReportData( 'limitreport-ppvisitednodes',
743  [ $this->mPPNodeCount, $this->mOptions->getMaxPPNodeCount() ]
744  );
745  $this->mOutput->setLimitReportData( 'limitreport-postexpandincludesize',
746  [ $this->mIncludeSizes['post-expand'], $maxIncludeSize ]
747  );
748  $this->mOutput->setLimitReportData( 'limitreport-templateargumentsize',
749  [ $this->mIncludeSizes['arg'], $maxIncludeSize ]
750  );
751  $this->mOutput->setLimitReportData( 'limitreport-expansiondepth',
752  [ $this->mHighestExpansionDepth, $this->mOptions->getMaxPPExpandDepth() ]
753  );
754  $this->mOutput->setLimitReportData( 'limitreport-expensivefunctioncount',
755  [ $this->mExpensiveFunctionCount, $this->mOptions->getExpensiveParserFunctionLimit() ]
756  );
757 
758  foreach ( $this->mStripState->getLimitReport() as list( $key, $value ) ) {
759  $this->mOutput->setLimitReportData( $key, $value );
760  }
761 
762  $this->hookRunner->onParserLimitReportPrepare( $this, $this->mOutput );
763 
764  $limitReport = "NewPP limit report\n";
765  if ( $this->svcOptions->get( 'ShowHostnames' ) ) {
766  $limitReport .= 'Parsed by ' . wfHostname() . "\n";
767  }
768  $limitReport .= 'Cached time: ' . $this->mOutput->getCacheTime() . "\n";
769  $limitReport .= 'Cache expiry: ' . $this->mOutput->getCacheExpiry() . "\n";
770  $limitReport .= 'Reduced expiry: ' .
771  ( $this->mOutput->hasReducedExpiry() ? 'true' : 'false' ) .
772  "\n";
773  $limitReport .= 'Complications: [' . implode( ', ', $this->mOutput->getAllFlags() ) . "]\n";
774 
775  foreach ( $this->mOutput->getLimitReportData() as $key => $value ) {
776  if ( $this->hookRunner->onParserLimitReportFormat(
777  $key, $value, $limitReport, false, false )
778  ) {
779  $keyMsg = wfMessage( $key )->inLanguage( 'en' )->useDatabase( false );
780  $valueMsg = wfMessage( [ "$key-value-text", "$key-value" ] )
781  ->inLanguage( 'en' )->useDatabase( false );
782  if ( !$valueMsg->exists() ) {
783  $valueMsg = new RawMessage( '$1' );
784  }
785  if ( !$keyMsg->isDisabled() && !$valueMsg->isDisabled() ) {
786  $valueMsg->params( $value );
787  $limitReport .= "{$keyMsg->text()}: {$valueMsg->text()}\n";
788  }
789  }
790  }
791  // Since we're not really outputting HTML, decode the entities and
792  // then re-encode the things that need hiding inside HTML comments.
793  $limitReport = htmlspecialchars_decode( $limitReport );
794 
795  // Sanitize for comment. Note '‐' in the replacement is U+2010,
796  // which looks much like the problematic '-'.
797  $limitReport = str_replace( [ '-', '&' ], [ '‐', '&amp;' ], $limitReport );
798  $text = "\n<!-- \n$limitReport-->\n";
799 
800  // Add on template profiling data in human/machine readable way
801  $dataByFunc = $this->mProfiler->getFunctionStats();
802  uasort( $dataByFunc, static function ( $a, $b ) {
803  return $b['real'] <=> $a['real']; // descending order
804  } );
805  $profileReport = [];
806  foreach ( array_slice( $dataByFunc, 0, 10 ) as $item ) {
807  $profileReport[] = sprintf( "%6.2f%% %8.3f %6d %s",
808  $item['%real'], $item['real'], $item['calls'],
809  htmlspecialchars( $item['name'] ) );
810  }
811  $text .= "<!--\nTransclusion expansion time report (%,ms,calls,template)\n";
812  $text .= implode( "\n", $profileReport ) . "\n-->\n";
813 
814  $this->mOutput->setLimitReportData( 'limitreport-timingprofile', $profileReport );
815 
816  // Add other cache related metadata
817  if ( $this->svcOptions->get( 'ShowHostnames' ) ) {
818  $this->mOutput->setLimitReportData( 'cachereport-origin', wfHostname() );
819  }
820  $this->mOutput->setLimitReportData( 'cachereport-timestamp',
821  $this->mOutput->getCacheTime() );
822  $this->mOutput->setLimitReportData( 'cachereport-ttl',
823  $this->mOutput->getCacheExpiry() );
824  $this->mOutput->setLimitReportData( 'cachereport-transientcontent',
825  $this->mOutput->hasDynamicContent() );
826 
827  return $text;
828  }
829 
855  public function recursiveTagParse( $text, $frame = false ) {
856  $text = $this->internalParse( $text, false, $frame );
857  return $text;
858  }
859 
879  public function recursiveTagParseFully( $text, $frame = false ) {
880  $text = $this->recursiveTagParse( $text, $frame );
881  $text = $this->internalParseHalfParsed( $text, false );
882  return $text;
883  }
884 
904  public function parseExtensionTagAsTopLevelDoc( $text ) {
905  $text = $this->recursiveTagParse( $text );
906  $this->hookRunner->onParserAfterParse( $this, $text, $this->mStripState );
907  $text = $this->internalParseHalfParsed( $text, true );
908  return $text;
909  }
910 
923  public function preprocess(
924  $text,
925  ?PageReference $page,
926  ParserOptions $options,
927  $revid = null,
928  $frame = false
929  ) {
930  $magicScopeVariable = $this->lock();
931  $this->startParse( $page, $options, self::OT_PREPROCESS, true );
932  if ( $revid !== null ) {
933  $this->mRevisionId = $revid;
934  }
935  $this->hookRunner->onParserBeforePreprocess( $this, $text, $this->mStripState );
936  $text = $this->replaceVariables( $text, $frame );
937  $text = $this->mStripState->unstripBoth( $text );
938  return $text;
939  }
940 
950  public function recursivePreprocess( $text, $frame = false ) {
951  $text = $this->replaceVariables( $text, $frame );
952  $text = $this->mStripState->unstripBoth( $text );
953  return $text;
954  }
955 
970  public function getPreloadText( $text, PageReference $page, ParserOptions $options, $params = [] ) {
971  $msg = new RawMessage( $text );
972  $text = $msg->params( $params )->plain();
973 
974  # Parser (re)initialisation
975  $magicScopeVariable = $this->lock();
976  $this->startParse( $page, $options, self::OT_PLAIN, true );
977 
979  $dom = $this->preprocessToDom( $text, Preprocessor::DOM_FOR_INCLUSION );
980  $text = $this->getPreprocessor()->newFrame()->expand( $dom, $flags );
981  $text = $this->mStripState->unstripBoth( $text );
982  return $text;
983  }
984 
992  public function setUser( ?UserIdentity $user ) {
993  $this->mUser = $user;
994  }
995 
1003  public function setTitle( Title $t = null ) {
1004  $this->setPage( $t );
1005  }
1006 
1012  public function getTitle(): Title {
1013  if ( !$this->mTitle ) {
1014  $this->mTitle = Title::makeTitle( NS_SPECIAL, 'Badtitle/Parser' );
1015  }
1016  return $this->mTitle;
1017  }
1018 
1025  public function setPage( ?PageReference $t = null ) {
1026  if ( !$t ) {
1027  $t = Title::makeTitle( NS_SPECIAL, 'Badtitle/Parser' );
1028  } else {
1029  // For now (early 1.37 alpha), always convert to Title, so we don't have to do it over
1030  // and over again in other methods. Eventually, we will no longer need to have a Title
1031  // instance internally.
1033  }
1034 
1035  if ( $t->hasFragment() ) {
1036  # Strip the fragment to avoid various odd effects
1037  $this->mTitle = $t->createFragmentTarget( '' );
1038  } else {
1039  $this->mTitle = $t;
1040  }
1041  }
1042 
1048  public function getPage(): ?PageReference {
1049  return $this->mTitle;
1050  }
1051 
1057  public function getOutputType(): int {
1058  return $this->mOutputType;
1059  }
1060 
1066  public function setOutputType( $ot ): void {
1067  $this->mOutputType = $ot;
1068  # Shortcut alias
1069  $this->ot = [
1070  'html' => $ot == self::OT_HTML,
1071  'wiki' => $ot == self::OT_WIKI,
1072  'pre' => $ot == self::OT_PREPROCESS,
1073  'plain' => $ot == self::OT_PLAIN,
1074  ];
1075  }
1076 
1084  public function OutputType( $x = null ) {
1085  wfDeprecated( __METHOD__, '1.35' );
1086  return wfSetVar( $this->mOutputType, $x );
1087  }
1088 
1093  public function getOutput() {
1094  return $this->mOutput;
1095  }
1096 
1101  public function getOptions() {
1102  return $this->mOptions;
1103  }
1104 
1110  public function setOptions( ParserOptions $options ): void {
1111  $this->mOptions = $options;
1112  }
1113 
1121  public function Options( $x = null ) {
1122  wfDeprecated( __METHOD__, '1.35' );
1123  return wfSetVar( $this->mOptions, $x );
1124  }
1125 
1130  public function nextLinkID() {
1131  return $this->mLinkID++;
1132  }
1133 
1138  public function setLinkID( $id ) {
1139  $this->mLinkID = $id;
1140  }
1141 
1147  public function getFunctionLang() {
1148  return $this->getTargetLanguage();
1149  }
1150 
1159  public function getTargetLanguage() {
1160  $target = $this->mOptions->getTargetLanguage();
1161 
1162  if ( $target !== null ) {
1163  return $target;
1164  } elseif ( $this->mOptions->getInterfaceMessage() ) {
1165  return $this->mOptions->getUserLangObj();
1166  }
1167 
1168  return $this->getTitle()->getPageLanguage();
1169  }
1170 
1178  public function getUserIdentity(): UserIdentity {
1179  return $this->mUser ?? $this->getOptions()->getUserIdentity();
1180  }
1181 
1188  public function getPreprocessor() {
1189  return $this->mPreprocessor;
1190  }
1191 
1198  public function getLinkRenderer() {
1199  // XXX We make the LinkRenderer with current options and then cache it forever
1200  if ( !$this->mLinkRenderer ) {
1201  $this->mLinkRenderer = $this->linkRendererFactory->create();
1202  }
1203 
1204  return $this->mLinkRenderer;
1205  }
1206 
1213  public function getMagicWordFactory() {
1214  return $this->magicWordFactory;
1215  }
1216 
1223  public function getContentLanguage() {
1224  return $this->contLang;
1225  }
1226 
1233  public function getBadFileLookup() {
1234  return $this->badFileLookup;
1235  }
1236 
1256  public static function extractTagsAndParams( array $elements, $text, &$matches ) {
1257  static $n = 1;
1258  $stripped = '';
1259  $matches = [];
1260 
1261  $taglist = implode( '|', $elements );
1262  $start = "/<($taglist)(\\s+[^>]*?|\\s*?)(\/?>)|<(!--)/i";
1263 
1264  while ( $text != '' ) {
1265  $p = preg_split( $start, $text, 2, PREG_SPLIT_DELIM_CAPTURE );
1266  $stripped .= $p[0];
1267  if ( count( $p ) < 5 ) {
1268  break;
1269  }
1270  if ( count( $p ) > 5 ) {
1271  # comment
1272  $element = $p[4];
1273  $attributes = '';
1274  $close = '';
1275  $inside = $p[5];
1276  } else {
1277  # tag
1278  list( , $element, $attributes, $close, $inside ) = $p;
1279  }
1280 
1281  $marker = self::MARKER_PREFIX . "-$element-" . sprintf( '%08X', $n++ ) . self::MARKER_SUFFIX;
1282  $stripped .= $marker;
1283 
1284  if ( $close === '/>' ) {
1285  # Empty element tag, <tag />
1286  $content = null;
1287  $text = $inside;
1288  $tail = null;
1289  } else {
1290  if ( $element === '!--' ) {
1291  $end = '/(-->)/';
1292  } else {
1293  $end = "/(<\\/$element\\s*>)/i";
1294  }
1295  $q = preg_split( $end, $inside, 2, PREG_SPLIT_DELIM_CAPTURE );
1296  $content = $q[0];
1297  if ( count( $q ) < 3 ) {
1298  # No end tag -- let it run out to the end of the text.
1299  $tail = '';
1300  $text = '';
1301  } else {
1302  list( , $tail, $text ) = $q;
1303  }
1304  }
1305 
1306  $matches[$marker] = [ $element,
1307  $content,
1308  Sanitizer::decodeTagAttributes( $attributes ),
1309  "<$element$attributes$close$content$tail" ];
1310  }
1311  return $stripped;
1312  }
1313 
1319  public function getStripList() {
1320  return $this->mStripList;
1321  }
1322 
1327  public function getStripState() {
1328  return $this->mStripState;
1329  }
1330 
1340  public function insertStripItem( $text ) {
1341  $marker = self::MARKER_PREFIX . "-item-{$this->mMarkerIndex}-" . self::MARKER_SUFFIX;
1342  $this->mMarkerIndex++;
1343  $this->mStripState->addGeneral( $marker, $text );
1344  return $marker;
1345  }
1346 
1353  private function handleTables( $text ) {
1354  $lines = StringUtils::explode( "\n", $text );
1355  $out = '';
1356  $td_history = []; # Is currently a td tag open?
1357  $last_tag_history = []; # Save history of last lag activated (td, th or caption)
1358  $tr_history = []; # Is currently a tr tag open?
1359  $tr_attributes = []; # history of tr attributes
1360  $has_opened_tr = []; # Did this table open a <tr> element?
1361  $indent_level = 0; # indent level of the table
1362 
1363  foreach ( $lines as $outLine ) {
1364  $line = trim( $outLine );
1365 
1366  if ( $line === '' ) { # empty line, go to next line
1367  $out .= $outLine . "\n";
1368  continue;
1369  }
1370 
1371  $first_character = $line[0];
1372  $first_two = substr( $line, 0, 2 );
1373  $matches = [];
1374 
1375  if ( preg_match( '/^(:*)\s*\{\|(.*)$/', $line, $matches ) ) {
1376  # First check if we are starting a new table
1377  $indent_level = strlen( $matches[1] );
1378 
1379  $attributes = $this->mStripState->unstripBoth( $matches[2] );
1380  $attributes = Sanitizer::fixTagAttributes( $attributes, 'table' );
1381 
1382  $outLine = str_repeat( '<dl><dd>', $indent_level ) . "<table{$attributes}>";
1383  array_push( $td_history, false );
1384  array_push( $last_tag_history, '' );
1385  array_push( $tr_history, false );
1386  array_push( $tr_attributes, '' );
1387  array_push( $has_opened_tr, false );
1388  } elseif ( count( $td_history ) == 0 ) {
1389  # Don't do any of the following
1390  $out .= $outLine . "\n";
1391  continue;
1392  } elseif ( $first_two === '|}' ) {
1393  # We are ending a table
1394  $line = '</table>' . substr( $line, 2 );
1395  $last_tag = array_pop( $last_tag_history );
1396 
1397  if ( !array_pop( $has_opened_tr ) ) {
1398  $line = "<tr><td></td></tr>{$line}";
1399  }
1400 
1401  if ( array_pop( $tr_history ) ) {
1402  $line = "</tr>{$line}";
1403  }
1404 
1405  if ( array_pop( $td_history ) ) {
1406  $line = "</{$last_tag}>{$line}";
1407  }
1408  array_pop( $tr_attributes );
1409  if ( $indent_level > 0 ) {
1410  $outLine = rtrim( $line ) . str_repeat( '</dd></dl>', $indent_level );
1411  } else {
1412  $outLine = $line;
1413  }
1414  } elseif ( $first_two === '|-' ) {
1415  # Now we have a table row
1416  $line = preg_replace( '#^\|-+#', '', $line );
1417 
1418  # Whats after the tag is now only attributes
1419  $attributes = $this->mStripState->unstripBoth( $line );
1420  $attributes = Sanitizer::fixTagAttributes( $attributes, 'tr' );
1421  array_pop( $tr_attributes );
1422  array_push( $tr_attributes, $attributes );
1423 
1424  $line = '';
1425  $last_tag = array_pop( $last_tag_history );
1426  array_pop( $has_opened_tr );
1427  array_push( $has_opened_tr, true );
1428 
1429  if ( array_pop( $tr_history ) ) {
1430  $line = '</tr>';
1431  }
1432 
1433  if ( array_pop( $td_history ) ) {
1434  $line = "</{$last_tag}>{$line}";
1435  }
1436 
1437  $outLine = $line;
1438  array_push( $tr_history, false );
1439  array_push( $td_history, false );
1440  array_push( $last_tag_history, '' );
1441  } elseif ( $first_character === '|'
1442  || $first_character === '!'
1443  || $first_two === '|+'
1444  ) {
1445  # This might be cell elements, td, th or captions
1446  if ( $first_two === '|+' ) {
1447  $first_character = '+';
1448  $line = substr( $line, 2 );
1449  } else {
1450  $line = substr( $line, 1 );
1451  }
1452 
1453  // Implies both are valid for table headings.
1454  if ( $first_character === '!' ) {
1455  $line = StringUtils::replaceMarkup( '!!', '||', $line );
1456  }
1457 
1458  # Split up multiple cells on the same line.
1459  # FIXME : This can result in improper nesting of tags processed
1460  # by earlier parser steps.
1461  $cells = explode( '||', $line );
1462 
1463  $outLine = '';
1464 
1465  # Loop through each table cell
1466  foreach ( $cells as $cell ) {
1467  $previous = '';
1468  if ( $first_character !== '+' ) {
1469  $tr_after = array_pop( $tr_attributes );
1470  if ( !array_pop( $tr_history ) ) {
1471  $previous = "<tr{$tr_after}>\n";
1472  }
1473  array_push( $tr_history, true );
1474  array_push( $tr_attributes, '' );
1475  array_pop( $has_opened_tr );
1476  array_push( $has_opened_tr, true );
1477  }
1478 
1479  $last_tag = array_pop( $last_tag_history );
1480 
1481  if ( array_pop( $td_history ) ) {
1482  $previous = "</{$last_tag}>\n{$previous}";
1483  }
1484 
1485  if ( $first_character === '|' ) {
1486  $last_tag = 'td';
1487  } elseif ( $first_character === '!' ) {
1488  $last_tag = 'th';
1489  } elseif ( $first_character === '+' ) {
1490  $last_tag = 'caption';
1491  } else {
1492  $last_tag = '';
1493  }
1494 
1495  array_push( $last_tag_history, $last_tag );
1496 
1497  # A cell could contain both parameters and data
1498  $cell_data = explode( '|', $cell, 2 );
1499 
1500  # T2553: Note that a '|' inside an invalid link should not
1501  # be mistaken as delimiting cell parameters
1502  # Bug T153140: Neither should language converter markup.
1503  if ( preg_match( '/\[\[|-\{/', $cell_data[0] ) === 1 ) {
1504  $cell = "{$previous}<{$last_tag}>" . trim( $cell );
1505  } elseif ( count( $cell_data ) == 1 ) {
1506  // Whitespace in cells is trimmed
1507  $cell = "{$previous}<{$last_tag}>" . trim( $cell_data[0] );
1508  } else {
1509  $attributes = $this->mStripState->unstripBoth( $cell_data[0] );
1510  $attributes = Sanitizer::fixTagAttributes( $attributes, $last_tag );
1511  // Whitespace in cells is trimmed
1512  $cell = "{$previous}<{$last_tag}{$attributes}>" . trim( $cell_data[1] );
1513  }
1514 
1515  $outLine .= $cell;
1516  array_push( $td_history, true );
1517  }
1518  }
1519  $out .= $outLine . "\n";
1520  }
1521 
1522  # Closing open td, tr && table
1523  while ( count( $td_history ) > 0 ) {
1524  if ( array_pop( $td_history ) ) {
1525  $out .= "</td>\n";
1526  }
1527  if ( array_pop( $tr_history ) ) {
1528  $out .= "</tr>\n";
1529  }
1530  if ( !array_pop( $has_opened_tr ) ) {
1531  $out .= "<tr><td></td></tr>\n";
1532  }
1533 
1534  $out .= "</table>\n";
1535  }
1536 
1537  # Remove trailing line-ending (b/c)
1538  if ( substr( $out, -1 ) === "\n" ) {
1539  $out = substr( $out, 0, -1 );
1540  }
1541 
1542  # special case: don't return empty table
1543  if ( $out === "<table>\n<tr><td></td></tr>\n</table>" ) {
1544  $out = '';
1545  }
1546 
1547  return $out;
1548  }
1549 
1563  public function internalParse( $text, $isMain = true, $frame = false ) {
1564  $origText = $text;
1565 
1566  # Hook to suspend the parser in this state
1567  if ( !$this->hookRunner->onParserBeforeInternalParse( $this, $text, $this->mStripState ) ) {
1568  return $text;
1569  }
1570 
1571  # if $frame is provided, then use $frame for replacing any variables
1572  if ( $frame ) {
1573  # use frame depth to infer how include/noinclude tags should be handled
1574  # depth=0 means this is the top-level document; otherwise it's an included document
1575  if ( !$frame->depth ) {
1576  $flag = 0;
1577  } else {
1579  }
1580  $dom = $this->preprocessToDom( $text, $flag );
1581  $text = $frame->expand( $dom );
1582  } else {
1583  # if $frame is not provided, then use old-style replaceVariables
1584  $text = $this->replaceVariables( $text );
1585  }
1586 
1587  $this->hookRunner->onInternalParseBeforeSanitize( $this, $text, $this->mStripState );
1588  $text = Sanitizer::removeHTMLtags(
1589  $text,
1590  // Callback from the Sanitizer for expanding items found in
1591  // HTML attribute values, so they can be safely tested and escaped.
1592  function ( &$text, $frame = false ) {
1593  $text = $this->replaceVariables( $text, $frame );
1594  $text = $this->mStripState->unstripBoth( $text );
1595  },
1596  false,
1597  [],
1598  []
1599  );
1600  $this->hookRunner->onInternalParseBeforeLinks( $this, $text, $this->mStripState );
1601 
1602  # Tables need to come after variable replacement for things to work
1603  # properly; putting them before other transformations should keep
1604  # exciting things like link expansions from showing up in surprising
1605  # places.
1606  $text = $this->handleTables( $text );
1607 
1608  $text = preg_replace( '/(^|\n)-----*/', '\\1<hr />', $text );
1609 
1610  $text = $this->handleDoubleUnderscore( $text );
1611 
1612  $text = $this->handleHeadings( $text );
1613  $text = $this->handleInternalLinks( $text );
1614  $text = $this->handleAllQuotes( $text );
1615  $text = $this->handleExternalLinks( $text );
1616 
1617  # handleInternalLinks may sometimes leave behind
1618  # absolute URLs, which have to be masked to hide them from handleExternalLinks
1619  $text = str_replace( self::MARKER_PREFIX . 'NOPARSE', '', $text );
1620 
1621  $text = $this->handleMagicLinks( $text );
1622  $text = $this->finalizeHeadings( $text, $origText, $isMain );
1623 
1624  return $text;
1625  }
1626 
1634  return $this->languageConverterFactory->getLanguageConverter(
1635  $this->getTargetLanguage()
1636  );
1637  }
1638 
1645  return $this->languageConverterFactory->getLanguageConverter(
1646  $this->getContentLanguage()
1647  );
1648  }
1649 
1657  protected function getHookContainer() {
1658  return $this->hookContainer;
1659  }
1660 
1669  protected function getHookRunner() {
1670  return $this->hookRunner;
1671  }
1672 
1682  private function internalParseHalfParsed( $text, $isMain = true, $linestart = true ) {
1683  $text = $this->mStripState->unstripGeneral( $text );
1684 
1685  $text = BlockLevelPass::doBlockLevels( $text, $linestart );
1686 
1687  $this->replaceLinkHoldersPrivate( $text );
1688 
1696  if ( !( $this->mOptions->getDisableContentConversion()
1697  || isset( $this->mDoubleUnderscores['nocontentconvert'] ) )
1698  && !$this->mOptions->getInterfaceMessage()
1699  ) {
1700  # The position of the convert() call should not be changed. it
1701  # assumes that the links are all replaced and the only thing left
1702  # is the <nowiki> mark.
1703  $text = $this->getTargetLanguageConverter()->convert( $text );
1704  }
1705 
1706  $text = $this->mStripState->unstripNoWiki( $text );
1707 
1708  $text = $this->mStripState->unstripGeneral( $text );
1709 
1710  $text = $this->tidy->tidy( $text, [ Sanitizer::class, 'armorFrenchSpaces' ] );
1711 
1712  if ( $isMain ) {
1713  $this->hookRunner->onParserAfterTidy( $this, $text );
1714  }
1715 
1716  return $text;
1717  }
1718 
1729  private function handleMagicLinks( $text ) {
1730  $prots = wfUrlProtocolsWithoutProtRel();
1731  $urlChar = self::EXT_LINK_URL_CLASS;
1732  $addr = self::EXT_LINK_ADDR;
1733  $space = self::SPACE_NOT_NL; # non-newline space
1734  $spdash = "(?:-|$space)"; # a dash or a non-newline space
1735  $spaces = "$space++"; # possessive match of 1 or more spaces
1736  $text = preg_replace_callback(
1737  '!(?: # Start cases
1738  (<a[ \t\r\n>].*?</a>) | # m[1]: Skip link text
1739  (<.*?>) | # m[2]: Skip stuff inside HTML elements' . "
1740  (\b # m[3]: Free external links
1741  (?i:$prots)
1742  ($addr$urlChar*) # m[4]: Post-protocol path
1743  ) |
1744  \b(?:RFC|PMID) $spaces # m[5]: RFC or PMID, capture number
1745  ([0-9]+)\b |
1746  \bISBN $spaces ( # m[6]: ISBN, capture number
1747  (?: 97[89] $spdash? )? # optional 13-digit ISBN prefix
1748  (?: [0-9] $spdash? ){9} # 9 digits with opt. delimiters
1749  [0-9Xx] # check digit
1750  )\b
1751  )!xu",
1752  [ $this, 'magicLinkCallback' ],
1753  $text
1754  );
1755  return $text;
1756  }
1757 
1763  private function magicLinkCallback( array $m ) {
1764  if ( isset( $m[1] ) && $m[1] !== '' ) {
1765  # Skip anchor
1766  return $m[0];
1767  } elseif ( isset( $m[2] ) && $m[2] !== '' ) {
1768  # Skip HTML element
1769  return $m[0];
1770  } elseif ( isset( $m[3] ) && $m[3] !== '' ) {
1771  # Free external link
1772  return $this->makeFreeExternalLink( $m[0], strlen( $m[4] ) );
1773  } elseif ( isset( $m[5] ) && $m[5] !== '' ) {
1774  # RFC or PMID
1775  if ( substr( $m[0], 0, 3 ) === 'RFC' ) {
1776  if ( !$this->mOptions->getMagicRFCLinks() ) {
1777  return $m[0];
1778  }
1779  $keyword = 'RFC';
1780  $urlmsg = 'rfcurl';
1781  $cssClass = 'mw-magiclink-rfc';
1782  $trackingCat = 'magiclink-tracking-rfc';
1783  $id = $m[5];
1784  } elseif ( substr( $m[0], 0, 4 ) === 'PMID' ) {
1785  if ( !$this->mOptions->getMagicPMIDLinks() ) {
1786  return $m[0];
1787  }
1788  $keyword = 'PMID';
1789  $urlmsg = 'pubmedurl';
1790  $cssClass = 'mw-magiclink-pmid';
1791  $trackingCat = 'magiclink-tracking-pmid';
1792  $id = $m[5];
1793  } else {
1794  // Should never happen
1795  throw new MWException( __METHOD__ . ': unrecognised match type "' .
1796  substr( $m[0], 0, 20 ) . '"' );
1797  }
1798  $url = wfMessage( $urlmsg, $id )->inContentLanguage()->text();
1799  $this->addTrackingCategory( $trackingCat );
1800  return Linker::makeExternalLink(
1801  $url,
1802  "{$keyword} {$id}",
1803  true,
1804  $cssClass,
1805  [],
1806  $this->getTitle()
1807  );
1808  } elseif ( isset( $m[6] ) && $m[6] !== ''
1809  && $this->mOptions->getMagicISBNLinks()
1810  ) {
1811  # ISBN
1812  $isbn = $m[6];
1813  $space = self::SPACE_NOT_NL; # non-newline space
1814  $isbn = preg_replace( "/$space/", ' ', $isbn );
1815  $num = strtr( $isbn, [
1816  '-' => '',
1817  ' ' => '',
1818  'x' => 'X',
1819  ] );
1820  $this->addTrackingCategory( 'magiclink-tracking-isbn' );
1821  return $this->getLinkRenderer()->makeKnownLink(
1822  SpecialPage::getTitleFor( 'Booksources', $num ),
1823  "ISBN $isbn",
1824  [
1825  'class' => 'internal mw-magiclink-isbn',
1826  'title' => false // suppress title attribute
1827  ]
1828  );
1829  } else {
1830  return $m[0];
1831  }
1832  }
1833 
1843  private function makeFreeExternalLink( $url, $numPostProto ) {
1844  $trail = '';
1845 
1846  # The characters '<' and '>' (which were escaped by
1847  # removeHTMLtags()) should not be included in
1848  # URLs, per RFC 2396.
1849  # Make &nbsp; terminate a URL as well (bug T84937)
1850  $m2 = [];
1851  if ( preg_match(
1852  '/&(lt|gt|nbsp|#x0*(3[CcEe]|[Aa]0)|#0*(60|62|160));/',
1853  $url,
1854  $m2,
1855  PREG_OFFSET_CAPTURE
1856  ) ) {
1857  $trail = substr( $url, $m2[0][1] ) . $trail;
1858  $url = substr( $url, 0, $m2[0][1] );
1859  }
1860 
1861  # Move trailing punctuation to $trail
1862  $sep = ',;\.:!?';
1863  # If there is no left bracket, then consider right brackets fair game too
1864  if ( strpos( $url, '(' ) === false ) {
1865  $sep .= ')';
1866  }
1867 
1868  $urlRev = strrev( $url );
1869  $numSepChars = strspn( $urlRev, $sep );
1870  # Don't break a trailing HTML entity by moving the ; into $trail
1871  # This is in hot code, so use substr_compare to avoid having to
1872  # create a new string object for the comparison
1873  if ( $numSepChars && substr_compare( $url, ";", -$numSepChars, 1 ) === 0 ) {
1874  # more optimization: instead of running preg_match with a $
1875  # anchor, which can be slow, do the match on the reversed
1876  # string starting at the desired offset.
1877  # un-reversed regexp is: /&([a-z]+|#x[\da-f]+|#\d+)$/i
1878  if ( preg_match( '/\G([a-z]+|[\da-f]+x#|\d+#)&/i', $urlRev, $m2, 0, $numSepChars ) ) {
1879  $numSepChars--;
1880  }
1881  }
1882  if ( $numSepChars ) {
1883  $trail = substr( $url, -$numSepChars ) . $trail;
1884  $url = substr( $url, 0, -$numSepChars );
1885  }
1886 
1887  # Verify that we still have a real URL after trail removal, and
1888  # not just lone protocol
1889  if ( strlen( $trail ) >= $numPostProto ) {
1890  return $url . $trail;
1891  }
1892 
1893  $url = Sanitizer::cleanUrl( $url );
1894 
1895  # Is this an external image?
1896  $text = $this->maybeMakeExternalImage( $url );
1897  if ( $text === false ) {
1898  # Not an image, make a link
1899  $text = Linker::makeExternalLink(
1900  $url,
1901  $this->getTargetLanguageConverter()->markNoConversion( $url ),
1902  true,
1903  'free',
1904  $this->getExternalLinkAttribs( $url ),
1905  $this->getTitle()
1906  );
1907  # Register it in the output object...
1908  $this->mOutput->addExternalLink( $url );
1909  }
1910  return $text . $trail;
1911  }
1912 
1919  private function handleHeadings( $text ) {
1920  for ( $i = 6; $i >= 1; --$i ) {
1921  $h = str_repeat( '=', $i );
1922  // Trim non-newline whitespace from headings
1923  // Using \s* will break for: "==\n===\n" and parse as <h2>=</h2>
1924  $text = preg_replace( "/^(?:$h)[ \\t]*(.+?)[ \\t]*(?:$h)\\s*$/m", "<h$i>\\1</h$i>", $text );
1925  }
1926  return $text;
1927  }
1928 
1936  private function handleAllQuotes( $text ) {
1937  $outtext = '';
1938  $lines = StringUtils::explode( "\n", $text );
1939  foreach ( $lines as $line ) {
1940  $outtext .= $this->doQuotes( $line ) . "\n";
1941  }
1942  $outtext = substr( $outtext, 0, -1 );
1943  return $outtext;
1944  }
1945 
1954  public function doQuotes( $text ) {
1955  $arr = preg_split( "/(''+)/", $text, -1, PREG_SPLIT_DELIM_CAPTURE );
1956  $countarr = count( $arr );
1957  if ( $countarr == 1 ) {
1958  return $text;
1959  }
1960 
1961  // First, do some preliminary work. This may shift some apostrophes from
1962  // being mark-up to being text. It also counts the number of occurrences
1963  // of bold and italics mark-ups.
1964  $numbold = 0;
1965  $numitalics = 0;
1966  for ( $i = 1; $i < $countarr; $i += 2 ) {
1967  $thislen = strlen( $arr[$i] );
1968  // If there are ever four apostrophes, assume the first is supposed to
1969  // be text, and the remaining three constitute mark-up for bold text.
1970  // (T15227: ''''foo'''' turns into ' ''' foo ' ''')
1971  if ( $thislen == 4 ) {
1972  $arr[$i - 1] .= "'";
1973  $arr[$i] = "'''";
1974  $thislen = 3;
1975  } elseif ( $thislen > 5 ) {
1976  // If there are more than 5 apostrophes in a row, assume they're all
1977  // text except for the last 5.
1978  // (T15227: ''''''foo'''''' turns into ' ''''' foo ' ''''')
1979  $arr[$i - 1] .= str_repeat( "'", $thislen - 5 );
1980  $arr[$i] = "'''''";
1981  $thislen = 5;
1982  }
1983  // Count the number of occurrences of bold and italics mark-ups.
1984  if ( $thislen == 2 ) {
1985  $numitalics++;
1986  } elseif ( $thislen == 3 ) {
1987  $numbold++;
1988  } elseif ( $thislen == 5 ) {
1989  $numitalics++;
1990  $numbold++;
1991  }
1992  }
1993 
1994  // If there is an odd number of both bold and italics, it is likely
1995  // that one of the bold ones was meant to be an apostrophe followed
1996  // by italics. Which one we cannot know for certain, but it is more
1997  // likely to be one that has a single-letter word before it.
1998  if ( ( $numbold % 2 == 1 ) && ( $numitalics % 2 == 1 ) ) {
1999  $firstsingleletterword = -1;
2000  $firstmultiletterword = -1;
2001  $firstspace = -1;
2002  for ( $i = 1; $i < $countarr; $i += 2 ) {
2003  if ( strlen( $arr[$i] ) == 3 ) {
2004  $x1 = substr( $arr[$i - 1], -1 );
2005  $x2 = substr( $arr[$i - 1], -2, 1 );
2006  if ( $x1 === ' ' ) {
2007  if ( $firstspace == -1 ) {
2008  $firstspace = $i;
2009  }
2010  } elseif ( $x2 === ' ' ) {
2011  $firstsingleletterword = $i;
2012  // if $firstsingleletterword is set, we don't
2013  // look at the other options, so we can bail early.
2014  break;
2015  } elseif ( $firstmultiletterword == -1 ) {
2016  $firstmultiletterword = $i;
2017  }
2018  }
2019  }
2020 
2021  // If there is a single-letter word, use it!
2022  if ( $firstsingleletterword > -1 ) {
2023  $arr[$firstsingleletterword] = "''";
2024  $arr[$firstsingleletterword - 1] .= "'";
2025  } elseif ( $firstmultiletterword > -1 ) {
2026  // If not, but there's a multi-letter word, use that one.
2027  $arr[$firstmultiletterword] = "''";
2028  $arr[$firstmultiletterword - 1] .= "'";
2029  } elseif ( $firstspace > -1 ) {
2030  // ... otherwise use the first one that has neither.
2031  // (notice that it is possible for all three to be -1 if, for example,
2032  // there is only one pentuple-apostrophe in the line)
2033  $arr[$firstspace] = "''";
2034  $arr[$firstspace - 1] .= "'";
2035  }
2036  }
2037 
2038  // Now let's actually convert our apostrophic mush to HTML!
2039  $output = '';
2040  $buffer = '';
2041  $state = '';
2042  $i = 0;
2043  foreach ( $arr as $r ) {
2044  if ( ( $i % 2 ) == 0 ) {
2045  if ( $state === 'both' ) {
2046  $buffer .= $r;
2047  } else {
2048  $output .= $r;
2049  }
2050  } else {
2051  $thislen = strlen( $r );
2052  if ( $thislen == 2 ) {
2053  // two quotes - open or close italics
2054  if ( $state === 'i' ) {
2055  $output .= '</i>';
2056  $state = '';
2057  } elseif ( $state === 'bi' ) {
2058  $output .= '</i>';
2059  $state = 'b';
2060  } elseif ( $state === 'ib' ) {
2061  $output .= '</b></i><b>';
2062  $state = 'b';
2063  } elseif ( $state === 'both' ) {
2064  $output .= '<b><i>' . $buffer . '</i>';
2065  $state = 'b';
2066  } else { // $state can be 'b' or ''
2067  $output .= '<i>';
2068  $state .= 'i';
2069  }
2070  } elseif ( $thislen == 3 ) {
2071  // three quotes - open or close bold
2072  if ( $state === 'b' ) {
2073  $output .= '</b>';
2074  $state = '';
2075  } elseif ( $state === 'bi' ) {
2076  $output .= '</i></b><i>';
2077  $state = 'i';
2078  } elseif ( $state === 'ib' ) {
2079  $output .= '</b>';
2080  $state = 'i';
2081  } elseif ( $state === 'both' ) {
2082  $output .= '<i><b>' . $buffer . '</b>';
2083  $state = 'i';
2084  } else { // $state can be 'i' or ''
2085  $output .= '<b>';
2086  $state .= 'b';
2087  }
2088  } elseif ( $thislen == 5 ) {
2089  // five quotes - open or close both separately
2090  if ( $state === 'b' ) {
2091  $output .= '</b><i>';
2092  $state = 'i';
2093  } elseif ( $state === 'i' ) {
2094  $output .= '</i><b>';
2095  $state = 'b';
2096  } elseif ( $state === 'bi' ) {
2097  $output .= '</i></b>';
2098  $state = '';
2099  } elseif ( $state === 'ib' ) {
2100  $output .= '</b></i>';
2101  $state = '';
2102  } elseif ( $state === 'both' ) {
2103  $output .= '<i><b>' . $buffer . '</b></i>';
2104  $state = '';
2105  } else { // ($state == '')
2106  $buffer = '';
2107  $state = 'both';
2108  }
2109  }
2110  }
2111  $i++;
2112  }
2113  // Now close all remaining tags. Notice that the order is important.
2114  if ( $state === 'b' || $state === 'ib' ) {
2115  $output .= '</b>';
2116  }
2117  if ( $state === 'i' || $state === 'bi' || $state === 'ib' ) {
2118  $output .= '</i>';
2119  }
2120  if ( $state === 'bi' ) {
2121  $output .= '</b>';
2122  }
2123  // There might be lonely ''''', so make sure we have a buffer
2124  if ( $state === 'both' && $buffer ) {
2125  $output .= '<b><i>' . $buffer . '</i></b>';
2126  }
2127  return $output;
2128  }
2129 
2140  private function handleExternalLinks( $text ) {
2141  $bits = preg_split( $this->mExtLinkBracketedRegex, $text, -1, PREG_SPLIT_DELIM_CAPTURE );
2142  // @phan-suppress-next-line PhanTypeComparisonFromArray See phan issue #3161
2143  if ( $bits === false ) {
2144  throw new MWException( "PCRE needs to be compiled with "
2145  . "--enable-unicode-properties in order for MediaWiki to function" );
2146  }
2147  $s = array_shift( $bits );
2148 
2149  $i = 0;
2150  while ( $i < count( $bits ) ) {
2151  $url = $bits[$i++];
2152  $i++; // protocol
2153  $text = $bits[$i++];
2154  $trail = $bits[$i++];
2155 
2156  # The characters '<' and '>' (which were escaped by
2157  # removeHTMLtags()) should not be included in
2158  # URLs, per RFC 2396.
2159  $m2 = [];
2160  if ( preg_match( '/&(lt|gt);/', $url, $m2, PREG_OFFSET_CAPTURE ) ) {
2161  $text = substr( $url, $m2[0][1] ) . ' ' . $text;
2162  $url = substr( $url, 0, $m2[0][1] );
2163  }
2164 
2165  # If the link text is an image URL, replace it with an <img> tag
2166  # This happened by accident in the original parser, but some people used it extensively
2167  $img = $this->maybeMakeExternalImage( $text );
2168  if ( $img !== false ) {
2169  $text = $img;
2170  }
2171 
2172  $dtrail = '';
2173 
2174  # Set linktype for CSS
2175  $linktype = 'text';
2176 
2177  # No link text, e.g. [http://domain.tld/some.link]
2178  if ( $text == '' ) {
2179  # Autonumber
2180  $langObj = $this->getTargetLanguage();
2181  $text = '[' . $langObj->formatNum( ++$this->mAutonumber ) . ']';
2182  $linktype = 'autonumber';
2183  } else {
2184  # Have link text, e.g. [http://domain.tld/some.link text]s
2185  # Check for trail
2186  list( $dtrail, $trail ) = Linker::splitTrail( $trail );
2187  }
2188 
2189  // Excluding protocol-relative URLs may avoid many false positives.
2190  if ( preg_match( '/^(?:' . wfUrlProtocolsWithoutProtRel() . ')/', $text ) ) {
2191  $text = $this->getTargetLanguageConverter()->markNoConversion( $text );
2192  }
2193 
2194  $url = Sanitizer::cleanUrl( $url );
2195 
2196  # Use the encoded URL
2197  # This means that users can paste URLs directly into the text
2198  # Funny characters like ö aren't valid in URLs anyway
2199  # This was changed in August 2004
2200  // @phan-suppress-next-line SecurityCheck-XSS,SecurityCheck-DoubleEscaped using false for escape is valid
2201  $s .= Linker::makeExternalLink( $url, $text, false, $linktype,
2202  $this->getExternalLinkAttribs( $url ), $this->getTitle() ) . $dtrail . $trail;
2203 
2204  # Register link in the output object.
2205  $this->mOutput->addExternalLink( $url );
2206  }
2207 
2208  return $s;
2209  }
2210 
2221  public static function getExternalLinkRel( $url = false, LinkTarget $title = null ) {
2223  $ns = $title ? $title->getNamespace() : false;
2224  if ( $wgNoFollowLinks && !in_array( $ns, $wgNoFollowNsExceptions )
2226  ) {
2227  return 'nofollow';
2228  }
2229  return null;
2230  }
2231 
2243  public function getExternalLinkAttribs( $url ) {
2244  $attribs = [];
2245  $rel = self::getExternalLinkRel( $url, $this->getTitle() );
2246 
2247  $target = $this->mOptions->getExternalLinkTarget();
2248  if ( $target ) {
2249  $attribs['target'] = $target;
2250  if ( !in_array( $target, [ '_self', '_parent', '_top' ] ) ) {
2251  // T133507. New windows can navigate parent cross-origin.
2252  // Including noreferrer due to lacking browser
2253  // support of noopener. Eventually noreferrer should be removed.
2254  if ( $rel !== '' ) {
2255  $rel .= ' ';
2256  }
2257  $rel .= 'noreferrer noopener';
2258  }
2259  }
2260  $attribs['rel'] = $rel;
2261  return $attribs;
2262  }
2263 
2274  public static function normalizeLinkUrl( $url ) {
2275  # Test for RFC 3986 IPv6 syntax
2276  $scheme = '[a-z][a-z0-9+.-]*:';
2277  $userinfo = '(?:[a-z0-9\-._~!$&\'()*+,;=:]|%[0-9a-f]{2})*';
2278  $ipv6Host = '\\[((?:[0-9a-f:]|%3[0-A]|%[46][1-6])+)\\]';
2279  if ( preg_match( "<^(?:{$scheme})?//(?:{$userinfo}@)?{$ipv6Host}(?:[:/?#].*|)$>i", $url, $m ) &&
2280  IPUtils::isValid( rawurldecode( $m[1] ) )
2281  ) {
2282  $isIPv6 = rawurldecode( $m[1] );
2283  } else {
2284  $isIPv6 = false;
2285  }
2286 
2287  # Make sure unsafe characters are encoded
2288  $url = preg_replace_callback(
2289  '/[\x00-\x20"<>\[\\\\\]^`{|}\x7F-\xFF]/',
2290  static function ( $m ) {
2291  return rawurlencode( $m[0] );
2292  },
2293  $url
2294  );
2295 
2296  $ret = '';
2297  $end = strlen( $url );
2298 
2299  # Fragment part - 'fragment'
2300  $start = strpos( $url, '#' );
2301  if ( $start !== false && $start < $end ) {
2303  substr( $url, $start, $end - $start ), '"#%<>[\]^`{|}' ) . $ret;
2304  $end = $start;
2305  }
2306 
2307  # Query part - 'query' minus &=+;
2308  $start = strpos( $url, '?' );
2309  if ( $start !== false && $start < $end ) {
2311  substr( $url, $start, $end - $start ), '"#%<>[\]^`{|}&=+;' ) . $ret;
2312  $end = $start;
2313  }
2314 
2315  # Scheme and path part - 'pchar'
2316  # (we assume no userinfo or encoded colons in the host)
2318  substr( $url, 0, $end ), '"#%<>[\]^`{|}/?' ) . $ret;
2319 
2320  # Fix IPv6 syntax
2321  if ( $isIPv6 !== false ) {
2322  $ipv6Host = "%5B({$isIPv6})%5D";
2323  $ret = preg_replace(
2324  "<^((?:{$scheme})?//(?:{$userinfo}@)?){$ipv6Host}(?=[:/?#]|$)>i",
2325  "$1[$2]",
2326  $ret
2327  );
2328  }
2329 
2330  return $ret;
2331  }
2332 
2333  private static function normalizeUrlComponent( $component, $unsafe ) {
2334  $callback = static function ( $matches ) use ( $unsafe ) {
2335  $char = urldecode( $matches[0] );
2336  $ord = ord( $char );
2337  if ( $ord > 32 && $ord < 127 && strpos( $unsafe, $char ) === false ) {
2338  # Unescape it
2339  return $char;
2340  } else {
2341  # Leave it escaped, but use uppercase for a-f
2342  return strtoupper( $matches[0] );
2343  }
2344  };
2345  return preg_replace_callback( '/%[0-9A-Fa-f]{2}/', $callback, $component );
2346  }
2347 
2356  private function maybeMakeExternalImage( $url ) {
2357  $imagesfrom = $this->mOptions->getAllowExternalImagesFrom();
2358  $imagesexception = !empty( $imagesfrom );
2359  $text = false;
2360  # $imagesfrom could be either a single string or an array of strings, parse out the latter
2361  if ( $imagesexception && is_array( $imagesfrom ) ) {
2362  $imagematch = false;
2363  foreach ( $imagesfrom as $match ) {
2364  if ( strpos( $url, $match ) === 0 ) {
2365  $imagematch = true;
2366  break;
2367  }
2368  }
2369  } elseif ( $imagesexception ) {
2370  $imagematch = ( strpos( $url, $imagesfrom ) === 0 );
2371  } else {
2372  $imagematch = false;
2373  }
2374 
2375  if ( $this->mOptions->getAllowExternalImages()
2376  || ( $imagesexception && $imagematch )
2377  ) {
2378  if ( preg_match( self::EXT_IMAGE_REGEX, $url ) ) {
2379  # Image found
2380  $text = Linker::makeExternalImage( $url );
2381  }
2382  }
2383  if ( !$text && $this->mOptions->getEnableImageWhitelist()
2384  && preg_match( self::EXT_IMAGE_REGEX, $url )
2385  ) {
2386  $whitelist = explode(
2387  "\n",
2388  wfMessage( 'external_image_whitelist' )->inContentLanguage()->text()
2389  );
2390 
2391  foreach ( $whitelist as $entry ) {
2392  # Sanitize the regex fragment, make it case-insensitive, ignore blank entries/comments
2393  if ( strpos( $entry, '#' ) === 0 || $entry === '' ) {
2394  continue;
2395  }
2396  // @phan-suppress-next-line SecurityCheck-ReDoS preg_quote is not wanted here
2397  if ( preg_match( '/' . str_replace( '/', '\\/', $entry ) . '/i', $url ) ) {
2398  # Image matches a whitelist entry
2399  $text = Linker::makeExternalImage( $url );
2400  break;
2401  }
2402  }
2403  }
2404  return $text;
2405  }
2406 
2414  private function handleInternalLinks( $text ) {
2415  $this->mLinkHolders->merge( $this->handleInternalLinks2( $text ) );
2416  return $text;
2417  }
2418 
2424  private function handleInternalLinks2( &$s ) {
2425  static $tc = false, $e1, $e1_img;
2426  # the % is needed to support urlencoded titles as well
2427  if ( !$tc ) {
2428  $tc = Title::legalChars() . '#%';
2429  # Match a link having the form [[namespace:link|alternate]]trail
2430  $e1 = "/^([{$tc}]+)(?:\\|(.+?))?]](.*)\$/sD";
2431  # Match cases where there is no "]]", which might still be images
2432  $e1_img = "/^([{$tc}]+)\\|(.*)\$/sD";
2433  }
2434 
2435  $holders = new LinkHolderArray(
2436  $this,
2437  $this->getContentLanguageConverter(),
2438  $this->getHookContainer() );
2439 
2440  # split the entire text string on occurrences of [[
2441  $a = StringUtils::explode( '[[', ' ' . $s );
2442  # get the first element (all text up to first [[), and remove the space we added
2443  $s = $a->current();
2444  $a->next();
2445  $line = $a->current(); # Workaround for broken ArrayIterator::next() that returns "void"
2446  $s = substr( $s, 1 );
2447 
2448  $nottalk = !$this->getTitle()->isTalkPage();
2449 
2450  $useLinkPrefixExtension = $this->getTargetLanguage()->linkPrefixExtension();
2451  $e2 = null;
2452  if ( $useLinkPrefixExtension ) {
2453  # Match the end of a line for a word that's not followed by whitespace,
2454  # e.g. in the case of 'The Arab al[[Razi]]', 'al' will be matched
2455  $charset = $this->contLang->linkPrefixCharset();
2456  $e2 = "/^((?>.*[^$charset]|))(.+)$/sDu";
2457  $m = [];
2458  if ( preg_match( $e2, $s, $m ) ) {
2459  $first_prefix = $m[2];
2460  } else {
2461  $first_prefix = false;
2462  }
2463  } else {
2464  $prefix = '';
2465  }
2466 
2467  # Some namespaces don't allow subpages
2468  $useSubpages = $this->nsInfo->hasSubpages(
2469  $this->getTitle()->getNamespace()
2470  );
2471 
2472  # Loop for each link
2473  for ( ; $line !== false && $line !== null; $a->next(), $line = $a->current() ) {
2474  # Check for excessive memory usage
2475  if ( $holders->isBig() ) {
2476  # Too big
2477  # Do the existence check, replace the link holders and clear the array
2478  $holders->replace( $s );
2479  $holders->clear();
2480  }
2481 
2482  if ( $useLinkPrefixExtension ) {
2483  if ( preg_match( $e2, $s, $m ) ) {
2484  list( , $s, $prefix ) = $m;
2485  } else {
2486  $prefix = '';
2487  }
2488  # first link
2489  if ( $first_prefix ) {
2490  $prefix = $first_prefix;
2491  $first_prefix = false;
2492  }
2493  }
2494 
2495  $might_be_img = false;
2496 
2497  if ( preg_match( $e1, $line, $m ) ) { # page with normal text or alt
2498  $text = $m[2];
2499  # If we get a ] at the beginning of $m[3] that means we have a link that's something like:
2500  # [[Image:Foo.jpg|[http://example.com desc]]] <- having three ] in a row fucks up,
2501  # the real problem is with the $e1 regex
2502  # See T1500.
2503  # Still some problems for cases where the ] is meant to be outside punctuation,
2504  # and no image is in sight. See T4095.
2505  if ( $text !== ''
2506  && substr( $m[3], 0, 1 ) === ']'
2507  && strpos( $text, '[' ) !== false
2508  ) {
2509  $text .= ']'; # so that handleExternalLinks($text) works later
2510  $m[3] = substr( $m[3], 1 );
2511  }
2512  # fix up urlencoded title texts
2513  if ( strpos( $m[1], '%' ) !== false ) {
2514  # Should anchors '#' also be rejected?
2515  $m[1] = str_replace( [ '<', '>' ], [ '&lt;', '&gt;' ], rawurldecode( $m[1] ) );
2516  }
2517  $trail = $m[3];
2518  } elseif ( preg_match( $e1_img, $line, $m ) ) {
2519  # Invalid, but might be an image with a link in its caption
2520  $might_be_img = true;
2521  $text = $m[2];
2522  if ( strpos( $m[1], '%' ) !== false ) {
2523  $m[1] = str_replace( [ '<', '>' ], [ '&lt;', '&gt;' ], rawurldecode( $m[1] ) );
2524  }
2525  $trail = "";
2526  } else { # Invalid form; output directly
2527  $s .= $prefix . '[[' . $line;
2528  continue;
2529  }
2530 
2531  $origLink = ltrim( $m[1], ' ' );
2532 
2533  # Don't allow internal links to pages containing
2534  # PROTO: where PROTO is a valid URL protocol; these
2535  # should be external links.
2536  if ( preg_match( '/^(?i:' . $this->mUrlProtocols . ')/', $origLink ) ) {
2537  $s .= $prefix . '[[' . $line;
2538  continue;
2539  }
2540 
2541  # Make subpage if necessary
2542  if ( $useSubpages ) {
2544  $this->getTitle(), $origLink, $text
2545  );
2546  } else {
2547  $link = $origLink;
2548  }
2549 
2550  // \x7f isn't a default legal title char, so most likely strip
2551  // markers will force us into the "invalid form" path above. But,
2552  // just in case, let's assert that xmlish tags aren't valid in
2553  // the title position.
2554  $unstrip = $this->mStripState->killMarkers( $link );
2555  $noMarkers = ( $unstrip === $link );
2556 
2557  $nt = $noMarkers ? Title::newFromText( $link ) : null;
2558  if ( $nt === null ) {
2559  $s .= $prefix . '[[' . $line;
2560  continue;
2561  }
2562 
2563  $ns = $nt->getNamespace();
2564  $iw = $nt->getInterwiki();
2565 
2566  $noforce = ( substr( $origLink, 0, 1 ) !== ':' );
2567 
2568  if ( $might_be_img ) { # if this is actually an invalid link
2569  if ( $ns === NS_FILE && $noforce ) { # but might be an image
2570  $found = false;
2571  while ( true ) {
2572  # look at the next 'line' to see if we can close it there
2573  $a->next();
2574  $next_line = $a->current();
2575  if ( $next_line === false || $next_line === null ) {
2576  break;
2577  }
2578  $m = explode( ']]', $next_line, 3 );
2579  if ( count( $m ) == 3 ) {
2580  # the first ]] closes the inner link, the second the image
2581  $found = true;
2582  $text .= "[[{$m[0]}]]{$m[1]}";
2583  $trail = $m[2];
2584  break;
2585  } elseif ( count( $m ) == 2 ) {
2586  # if there's exactly one ]] that's fine, we'll keep looking
2587  $text .= "[[{$m[0]}]]{$m[1]}";
2588  } else {
2589  # if $next_line is invalid too, we need look no further
2590  $text .= '[[' . $next_line;
2591  break;
2592  }
2593  }
2594  if ( !$found ) {
2595  # we couldn't find the end of this imageLink, so output it raw
2596  # but don't ignore what might be perfectly normal links in the text we've examined
2597  $holders->merge( $this->handleInternalLinks2( $text ) );
2598  $s .= "{$prefix}[[$link|$text";
2599  # note: no $trail, because without an end, there *is* no trail
2600  continue;
2601  }
2602  } else { # it's not an image, so output it raw
2603  $s .= "{$prefix}[[$link|$text";
2604  # note: no $trail, because without an end, there *is* no trail
2605  continue;
2606  }
2607  }
2608 
2609  $wasblank = ( $text == '' );
2610  if ( $wasblank ) {
2611  $text = $link;
2612  if ( !$noforce ) {
2613  # Strip off leading ':'
2614  $text = substr( $text, 1 );
2615  }
2616  } else {
2617  # T6598 madness. Handle the quotes only if they come from the alternate part
2618  # [[Lista d''e paise d''o munno]] -> <a href="...">Lista d''e paise d''o munno</a>
2619  # [[Criticism of Harry Potter|Criticism of ''Harry Potter'']]
2620  # -> <a href="Criticism of Harry Potter">Criticism of <i>Harry Potter</i></a>
2621  $text = $this->doQuotes( $text );
2622  }
2623 
2624  # Link not escaped by : , create the various objects
2625  if ( $noforce && !$nt->wasLocalInterwiki() ) {
2626  # Interwikis
2627  if (
2628  $iw && $this->mOptions->getInterwikiMagic() && $nottalk && (
2629  MediaWikiServices::getInstance()->getLanguageNameUtils()
2630  ->getLanguageName(
2631  $iw,
2632  LanguageNameUtils::AUTONYMS,
2633  LanguageNameUtils::DEFINED
2634  )
2635  || in_array( $iw, $this->svcOptions->get( 'ExtraInterlanguageLinkPrefixes' ) )
2636  )
2637  ) {
2638  # T26502: filter duplicates
2639  if ( !isset( $this->mLangLinkLanguages[$iw] ) ) {
2640  $this->mLangLinkLanguages[$iw] = true;
2641  $this->mOutput->addLanguageLink( $nt->getFullText() );
2642  }
2643 
2647  $s = rtrim( $s . $prefix ) . $trail; # T175416
2648  continue;
2649  }
2650 
2651  if ( $ns === NS_FILE ) {
2652  if ( !$this->badFileLookup->isBadFile( $nt->getDBkey(), $this->getTitle() ) ) {
2653  if ( $wasblank ) {
2654  # if no parameters were passed, $text
2655  # becomes something like "File:Foo.png",
2656  # which we don't want to pass on to the
2657  # image generator
2658  $text = '';
2659  } else {
2660  # recursively parse links inside the image caption
2661  # actually, this will parse them in any other parameters, too,
2662  # but it might be hard to fix that, and it doesn't matter ATM
2663  $text = $this->handleExternalLinks( $text );
2664  $holders->merge( $this->handleInternalLinks2( $text ) );
2665  }
2666  # cloak any absolute URLs inside the image markup, so handleExternalLinks() won't touch them
2667  $s .= $prefix . $this->armorLinks(
2668  $this->makeImage( $nt, $text, $holders ) ) . $trail;
2669  continue;
2670  }
2671  } elseif ( $ns === NS_CATEGORY ) {
2675  $s = rtrim( $s . $prefix ) . $trail; # T2087, T87753
2676 
2677  if ( $wasblank ) {
2678  $sortkey = $this->getDefaultSort();
2679  } else {
2680  $sortkey = $text;
2681  }
2682  $sortkey = Sanitizer::decodeCharReferences( $sortkey );
2683  $sortkey = str_replace( "\n", '', $sortkey );
2684  $sortkey = $this->getTargetLanguageConverter()->convertCategoryKey( $sortkey );
2685  $this->mOutput->addCategory( $nt->getDBkey(), $sortkey );
2686 
2687  continue;
2688  }
2689  }
2690 
2691  # Self-link checking. For some languages, variants of the title are checked in
2692  # LinkHolderArray::doVariants() to allow batching the existence checks necessary
2693  # for linking to a different variant.
2694  if ( $ns !== NS_SPECIAL && $nt->equals( $this->getTitle() ) && !$nt->hasFragment() ) {
2695  $s .= $prefix . Linker::makeSelfLinkObj( $nt, $text, '', $trail );
2696  continue;
2697  }
2698 
2699  # NS_MEDIA is a pseudo-namespace for linking directly to a file
2700  # @todo FIXME: Should do batch file existence checks, see comment below
2701  if ( $ns === NS_MEDIA ) {
2702  # Give extensions a chance to select the file revision for us
2703  $options = [];
2704  $descQuery = false;
2705  $this->hookRunner->onBeforeParserFetchFileAndTitle(
2706  $this, $nt, $options, $descQuery );
2707  # Fetch and register the file (file title may be different via hooks)
2708  list( $file, $nt ) = $this->fetchFileAndTitle( $nt, $options );
2709  # Cloak with NOPARSE to avoid replacement in handleExternalLinks
2710  $s .= $prefix . $this->armorLinks(
2711  Linker::makeMediaLinkFile( $nt, $file, $text ) ) . $trail;
2712  continue;
2713  }
2714 
2715  # Some titles, such as valid special pages or files in foreign repos, should
2716  # be shown as bluelinks even though they're not included in the page table
2717  # @todo FIXME: isAlwaysKnown() can be expensive for file links; we should really do
2718  # batch file existence checks for NS_FILE and NS_MEDIA
2719  if ( $iw == '' && $nt->isAlwaysKnown() ) {
2720  $this->mOutput->addLink( $nt );
2721  $s .= $this->makeKnownLinkHolder( $nt, $text, $trail, $prefix );
2722  } else {
2723  # Links will be added to the output link list after checking
2724  $s .= $holders->makeHolder( $nt, $text, $trail, $prefix );
2725  }
2726  }
2727  return $holders;
2728  }
2729 
2743  private function makeKnownLinkHolder( LinkTarget $nt, $text = '', $trail = '', $prefix = '' ) {
2744  list( $inside, $trail ) = Linker::splitTrail( $trail );
2745 
2746  if ( $text == '' ) {
2747  $text = htmlspecialchars( $this->titleFormatter->getPrefixedText( $nt ) );
2748  }
2749 
2750  $link = $this->getLinkRenderer()->makeKnownLink(
2751  $nt, new HtmlArmor( "$prefix$text$inside" )
2752  );
2753 
2754  return $this->armorLinks( $link ) . $trail;
2755  }
2756 
2767  private function armorLinks( $text ) {
2768  return preg_replace( '/\b((?i)' . $this->mUrlProtocols . ')/',
2769  self::MARKER_PREFIX . "NOPARSE$1", $text );
2770  }
2771 
2781  public function doBlockLevels( $text, $linestart ) {
2782  wfDeprecated( __METHOD__, '1.35' );
2783  return BlockLevelPass::doBlockLevels( $text, $linestart );
2784  }
2785 
2794  private function expandMagicVariable( $index, $frame = false ) {
2799  if (
2800  $this->hookRunner->onParserGetVariableValueVarCache( $this, $this->mVarCache ) &&
2801  isset( $this->mVarCache[$index] )
2802  ) {
2803  return $this->mVarCache[$index];
2804  }
2805 
2806  $ts = wfTimestamp( TS_UNIX, $this->mOptions->getTimestamp() );
2807  $this->hookRunner->onParserGetVariableValueTs( $this, $ts );
2808 
2809  $value = CoreMagicVariables::expand(
2810  $this, $index, $ts, $this->nsInfo, $this->svcOptions, $this->logger
2811  );
2812 
2813  if ( $value === null ) {
2814  // Not a defined core magic word
2815  $ret = null;
2816  $originalIndex = $index;
2817  $this->hookRunner->onParserGetVariableValueSwitch( $this,
2818  $this->mVarCache, $index, $ret, $frame );
2819  if ( $index !== $originalIndex ) {
2821  'A ParserGetVariableValueSwitch hook handler modified $index, ' .
2822  'this is deprecated since MediaWiki 1.35',
2823  '1.35', false, false
2824  );
2825  }
2826  if ( !isset( $this->mVarCache[$originalIndex] ) ||
2827  $this->mVarCache[$originalIndex] !== $ret ) {
2829  'A ParserGetVariableValueSwitch hook handler bypassed the cache, ' .
2830  'this is deprecated since MediaWiki 1.35', '1.35', false, false
2831  );
2832  }// FIXME: in the future, don't give this hook unrestricted
2833  // access to mVarCache; we can cache it ourselves by falling
2834  // through here.
2835  return $ret;
2836  }
2837 
2838  $this->mVarCache[$index] = $value;
2839 
2840  return $value;
2841  }
2842 
2847  private function initializeVariables() {
2848  $variableIDs = $this->magicWordFactory->getVariableIDs();
2849  $substIDs = $this->magicWordFactory->getSubstIDs();
2850 
2851  $this->mVariables = $this->magicWordFactory->newArray( $variableIDs );
2852  $this->mSubstWords = $this->magicWordFactory->newArray( $substIDs );
2853  }
2854 
2873  public function preprocessToDom( $text, $flags = 0 ) {
2874  return $this->getPreprocessor()->preprocessToObj( $text, $flags );
2875  }
2876 
2898  public function replaceVariables( $text, $frame = false, $argsOnly = false ) {
2899  # Is there any text? Also, Prevent too big inclusions!
2900  $textSize = strlen( $text );
2901  if ( $textSize < 1 || $textSize > $this->mOptions->getMaxIncludeSize() ) {
2902  return $text;
2903  }
2904 
2905  if ( $frame === false ) {
2906  $frame = $this->getPreprocessor()->newFrame();
2907  } elseif ( !( $frame instanceof PPFrame ) ) {
2908  $this->logger->debug(
2909  __METHOD__ . " called using plain parameters instead of " .
2910  "a PPFrame instance. Creating custom frame."
2911  );
2912  $frame = $this->getPreprocessor()->newCustomFrame( $frame );
2913  }
2914 
2915  $dom = $this->preprocessToDom( $text );
2916  $flags = $argsOnly ? PPFrame::NO_TEMPLATES : 0;
2917  $text = $frame->expand( $dom, $flags );
2918 
2919  return $text;
2920  }
2921 
2949  public function limitationWarn( $limitationType, $current = '', $max = '' ) {
2950  # does no harm if $current and $max are present but are unnecessary for the message
2951  # Not doing ->inLanguage( $this->mOptions->getUserLangObj() ), since this is shown
2952  # only during preview, and that would split the parser cache unnecessarily.
2953  $this->mOutput->addWarningMsg(
2954  "$limitationType-warning",
2955  Message::numParam( $current ),
2956  Message::numParam( $max )
2957  );
2958  $this->addTrackingCategory( "$limitationType-category" );
2959  }
2960 
2974  public function braceSubstitution( array $piece, PPFrame $frame ) {
2975  // Flags
2976 
2977  // $text has been filled
2978  $found = false;
2979  // wiki markup in $text should be escaped
2980  $nowiki = false;
2981  // $text is HTML, armour it against wikitext transformation
2982  $isHTML = false;
2983  // Force interwiki transclusion to be done in raw mode not rendered
2984  $forceRawInterwiki = false;
2985  // $text is a DOM node needing expansion in a child frame
2986  $isChildObj = false;
2987  // $text is a DOM node needing expansion in the current frame
2988  $isLocalObj = false;
2989 
2990  # Title object, where $text came from
2991  $title = false;
2992 
2993  # $part1 is the bit before the first |, and must contain only title characters.
2994  # Various prefixes will be stripped from it later.
2995  $titleWithSpaces = $frame->expand( $piece['title'] );
2996  $part1 = trim( $titleWithSpaces );
2997  $titleText = false;
2998 
2999  # Original title text preserved for various purposes
3000  $originalTitle = $part1;
3001 
3002  # $args is a list of argument nodes, starting from index 0, not including $part1
3003  # @todo FIXME: If piece['parts'] is null then the call to getLength()
3004  # below won't work b/c this $args isn't an object
3005  $args = ( $piece['parts'] == null ) ? [] : $piece['parts'];
3006 
3007  $profileSection = null; // profile templates
3008 
3009  $sawDeprecatedTemplateEquals = false; // T91154
3010 
3011  # SUBST
3012  // @phan-suppress-next-line PhanImpossibleCondition
3013  if ( !$found ) {
3014  $substMatch = $this->mSubstWords->matchStartAndRemove( $part1 );
3015 
3016  # Possibilities for substMatch: "subst", "safesubst" or FALSE
3017  # Decide whether to expand template or keep wikitext as-is.
3018  if ( $this->ot['wiki'] ) {
3019  if ( $substMatch === false ) {
3020  $literal = true; # literal when in PST with no prefix
3021  } else {
3022  $literal = false; # expand when in PST with subst: or safesubst:
3023  }
3024  } else {
3025  if ( $substMatch == 'subst' ) {
3026  $literal = true; # literal when not in PST with plain subst:
3027  } else {
3028  $literal = false; # expand when not in PST with safesubst: or no prefix
3029  }
3030  }
3031  if ( $literal ) {
3032  $text = $frame->virtualBracketedImplode( '{{', '|', '}}', $titleWithSpaces, $args );
3033  $isLocalObj = true;
3034  $found = true;
3035  }
3036  }
3037 
3038  # Variables
3039  if ( !$found && $args->getLength() == 0 ) {
3040  $id = $this->mVariables->matchStartToEnd( $part1 );
3041  if ( $id !== false ) {
3042  $text = $this->expandMagicVariable( $id, $frame );
3043  if ( $this->magicWordFactory->getCacheTTL( $id ) > -1 ) {
3044  $this->mOutput->updateCacheExpiry(
3045  $this->magicWordFactory->getCacheTTL( $id ) );
3046  }
3047  $found = true;
3048  }
3049  }
3050 
3051  # MSG, MSGNW and RAW
3052  if ( !$found ) {
3053  # Check for MSGNW:
3054  $mwMsgnw = $this->magicWordFactory->get( 'msgnw' );
3055  if ( $mwMsgnw->matchStartAndRemove( $part1 ) ) {
3056  $nowiki = true;
3057  } else {
3058  # Remove obsolete MSG:
3059  $mwMsg = $this->magicWordFactory->get( 'msg' );
3060  $mwMsg->matchStartAndRemove( $part1 );
3061  }
3062 
3063  # Check for RAW:
3064  $mwRaw = $this->magicWordFactory->get( 'raw' );
3065  if ( $mwRaw->matchStartAndRemove( $part1 ) ) {
3066  $forceRawInterwiki = true;
3067  }
3068  }
3069 
3070  # Parser functions
3071  if ( !$found ) {
3072  $colonPos = strpos( $part1, ':' );
3073  if ( $colonPos !== false ) {
3074  $func = substr( $part1, 0, $colonPos );
3075  $funcArgs = [ trim( substr( $part1, $colonPos + 1 ) ) ];
3076  $argsLength = $args->getLength();
3077  for ( $i = 0; $i < $argsLength; $i++ ) {
3078  $funcArgs[] = $args->item( $i );
3079  }
3080 
3081  $result = $this->callParserFunction( $frame, $func, $funcArgs );
3082 
3083  // Extract any forwarded flags
3084  if ( isset( $result['title'] ) ) {
3085  $title = $result['title'];
3086  }
3087  if ( isset( $result['found'] ) ) {
3088  $found = $result['found'];
3089  }
3090  if ( array_key_exists( 'text', $result ) ) {
3091  // a string or null
3092  $text = $result['text'];
3093  }
3094  if ( isset( $result['nowiki'] ) ) {
3095  $nowiki = $result['nowiki'];
3096  }
3097  if ( isset( $result['isHTML'] ) ) {
3098  $isHTML = $result['isHTML'];
3099  }
3100  if ( isset( $result['forceRawInterwiki'] ) ) {
3101  $forceRawInterwiki = $result['forceRawInterwiki'];
3102  }
3103  if ( isset( $result['isChildObj'] ) ) {
3104  $isChildObj = $result['isChildObj'];
3105  }
3106  if ( isset( $result['isLocalObj'] ) ) {
3107  $isLocalObj = $result['isLocalObj'];
3108  }
3109  }
3110  }
3111 
3112  # Finish mangling title and then check for loops.
3113  # Set $title to a Title object and $titleText to the PDBK
3114  if ( !$found ) {
3115  $ns = NS_TEMPLATE;
3116  # Split the title into page and subpage
3117  $subpage = '';
3118  $relative = Linker::normalizeSubpageLink(
3119  $this->getTitle(), $part1, $subpage
3120  );
3121  if ( $part1 !== $relative ) {
3122  $part1 = $relative;
3123  $ns = $this->getTitle()->getNamespace();
3124  }
3125  $title = Title::newFromText( $part1, $ns );
3126  if ( $title ) {
3127  $titleText = $title->getPrefixedText();
3128  # Check for language variants if the template is not found
3129  if ( $this->getTargetLanguageConverter()->hasVariants() && $title->getArticleID() == 0 ) {
3130  $this->getTargetLanguageConverter()->findVariantLink( $part1, $title, true );
3131  }
3132  # Do recursion depth check
3133  $limit = $this->mOptions->getMaxTemplateDepth();
3134  if ( $frame->depth >= $limit ) {
3135  $found = true;
3136  $text = '<span class="error">'
3137  . wfMessage( 'parser-template-recursion-depth-warning' )
3138  ->numParams( $limit )->inContentLanguage()->text()
3139  . '</span>';
3140  }
3141  }
3142  }
3143 
3144  # Load from database
3145  if ( !$found && $title ) {
3146  $profileSection = $this->mProfiler->scopedProfileIn( $title->getPrefixedDBkey() );
3147  if ( !$title->isExternal() ) {
3148  if ( $title->isSpecialPage()
3149  && $this->mOptions->getAllowSpecialInclusion()
3150  && $this->ot['html']
3151  ) {
3152  $specialPage = $this->specialPageFactory->getPage( $title->getDBkey() );
3153  // Pass the template arguments as URL parameters.
3154  // "uselang" will have no effect since the Language object
3155  // is forced to the one defined in ParserOptions.
3156  $pageArgs = [];
3157  $argsLength = $args->getLength();
3158  for ( $i = 0; $i < $argsLength; $i++ ) {
3159  $bits = $args->item( $i )->splitArg();
3160  if ( strval( $bits['index'] ) === '' ) {
3161  $name = trim( $frame->expand( $bits['name'], PPFrame::STRIP_COMMENTS ) );
3162  $value = trim( $frame->expand( $bits['value'] ) );
3163  $pageArgs[$name] = $value;
3164  }
3165  }
3166 
3167  // Create a new context to execute the special page
3168  $context = new RequestContext;
3169  $context->setTitle( $title );
3170  $context->setRequest( new FauxRequest( $pageArgs ) );
3171  if ( $specialPage && $specialPage->maxIncludeCacheTime() === 0 ) {
3172  $context->setUser( $this->userFactory->newFromUserIdentity( $this->getUserIdentity() ) );
3173  } else {
3174  // If this page is cached, then we better not be per user.
3175  $context->setUser( User::newFromName( '127.0.0.1', false ) );
3176  }
3177  $context->setLanguage( $this->mOptions->getUserLangObj() );
3178  $ret = $this->specialPageFactory->capturePath( $title, $context, $this->getLinkRenderer() );
3179  if ( $ret ) {
3180  $text = $context->getOutput()->getHTML();
3181  $this->mOutput->addOutputPageMetadata( $context->getOutput() );
3182  $found = true;
3183  $isHTML = true;
3184  if ( $specialPage && $specialPage->maxIncludeCacheTime() !== false ) {
3185  $this->mOutput->updateRuntimeAdaptiveExpiry(
3186  $specialPage->maxIncludeCacheTime()
3187  );
3188  }
3189  }
3190  } elseif ( $this->nsInfo->isNonincludable( $title->getNamespace() ) ) {
3191  $found = false; # access denied
3192  $this->logger->debug(
3193  __METHOD__ .
3194  ": template inclusion denied for " . $title->getPrefixedDBkey()
3195  );
3196  } else {
3197  list( $text, $title ) = $this->getTemplateDom( $title );
3198  if ( $text !== false ) {
3199  $found = true;
3200  $isChildObj = true;
3201  if (
3202  $title->getNamespace() === NS_TEMPLATE &&
3203  $title->getDBkey() === '=' &&
3204  $originalTitle === '='
3205  ) {
3206  // Note that we won't get here if `=` is evaluated
3207  // (in the future) as a parser function, nor if
3208  // the Template namespace is given explicitly,
3209  // ie `{{Template:=}}`. Only `{{=}}` triggers.
3210  $sawDeprecatedTemplateEquals = true; // T91154
3211  }
3212  }
3213  }
3214 
3215  # If the title is valid but undisplayable, make a link to it
3216  if ( !$found && ( $this->ot['html'] || $this->ot['pre'] ) ) {
3217  $text = "[[:$titleText]]";
3218  $found = true;
3219  }
3220  } elseif ( $title->isTrans() ) {
3221  # Interwiki transclusion
3222  if ( $this->ot['html'] && !$forceRawInterwiki ) {
3223  $text = $this->interwikiTransclude( $title, 'render' );
3224  $isHTML = true;
3225  } else {
3226  $text = $this->interwikiTransclude( $title, 'raw' );
3227  # Preprocess it like a template
3228  $text = $this->preprocessToDom( $text, Preprocessor::DOM_FOR_INCLUSION );
3229  $isChildObj = true;
3230  }
3231  $found = true;
3232  }
3233 
3234  # Do infinite loop check
3235  # This has to be done after redirect resolution to avoid infinite loops via redirects
3236  if ( !$frame->loopCheck( $title ) ) {
3237  $found = true;
3238  $text = '<span class="error">'
3239  . wfMessage( 'parser-template-loop-warning', $titleText )->inContentLanguage()->text()
3240  . '</span>';
3241  $this->addTrackingCategory( 'template-loop-category' );
3242  $this->mOutput->addWarningMsg(
3243  'template-loop-warning',
3244  Message::plaintextParam( $titleText )
3245  );
3246  $this->logger->debug( __METHOD__ . ": template loop broken at '$titleText'" );
3247  }
3248  }
3249 
3250  # If we haven't found text to substitute by now, we're done
3251  # Recover the source wikitext and return it
3252  if ( !$found ) {
3253  $text = $frame->virtualBracketedImplode( '{{', '|', '}}', $titleWithSpaces, $args );
3254  if ( $profileSection ) {
3255  $this->mProfiler->scopedProfileOut( $profileSection );
3256  }
3257  return [ 'object' => $text ];
3258  }
3259 
3260  # Expand DOM-style return values in a child frame
3261  if ( $isChildObj ) {
3262  # Clean up argument array
3263  $newFrame = $frame->newChild( $args, $title );
3264 
3265  if ( $nowiki ) {
3266  $text = $newFrame->expand( $text, PPFrame::RECOVER_ORIG );
3267  } elseif ( $titleText !== false && $newFrame->isEmpty() ) {
3268  # Expansion is eligible for the empty-frame cache
3269  $text = $newFrame->cachedExpand( $titleText, $text );
3270  } else {
3271  # Uncached expansion
3272  $text = $newFrame->expand( $text );
3273  }
3274  }
3275  if ( $isLocalObj && $nowiki ) {
3276  $text = $frame->expand( $text, PPFrame::RECOVER_ORIG );
3277  $isLocalObj = false;
3278  }
3279 
3280  if ( $profileSection ) {
3281  $this->mProfiler->scopedProfileOut( $profileSection );
3282  }
3283  if (
3284  $sawDeprecatedTemplateEquals &&
3285  $this->mStripState->unstripBoth( $text ) !== '='
3286  ) {
3287  // T91154: {{=}} is deprecated when it doesn't expand to `=`;
3288  // use {{Template:=}} if you must.
3289  $this->addTrackingCategory( 'template-equals-category' );
3290  $this->mOutput->addWarningMsg( 'template-equals-warning' );
3291  }
3292 
3293  # Replace raw HTML by a placeholder
3294  if ( $isHTML ) {
3295  // @phan-suppress-next-line SecurityCheck-XSS Mixed mode, here html and safe
3296  $text = $this->insertStripItem( $text );
3297  } elseif ( $nowiki && ( $this->ot['html'] || $this->ot['pre'] ) ) {
3298  # Escape nowiki-style return values
3299  // @phan-suppress-next-line SecurityCheck-DoubleEscaped Mixed mode, here html and safe
3300  $text = wfEscapeWikiText( $text );
3301  } elseif ( is_string( $text )
3302  && !$piece['lineStart']
3303  && preg_match( '/^(?:{\\||:|;|#|\*)/', $text )
3304  ) {
3305  # T2529: if the template begins with a table or block-level
3306  # element, it should be treated as beginning a new line.
3307  # This behavior is somewhat controversial.
3308  $text = "\n" . $text;
3309  }
3310 
3311  if ( is_string( $text ) && !$this->incrementIncludeSize( 'post-expand', strlen( $text ) ) ) {
3312  # Error, oversize inclusion
3313  if ( $titleText !== false ) {
3314  # Make a working, properly escaped link if possible (T25588)
3315  $text = "[[:$titleText]]";
3316  } else {
3317  # This will probably not be a working link, but at least it may
3318  # provide some hint of where the problem is
3319  $originalTitle = preg_replace( '/^:/', '', $originalTitle );
3320  $text = "[[:$originalTitle]]";
3321  }
3322  $text .= $this->insertStripItem( '<!-- WARNING: template omitted, '
3323  . 'post-expand include size too large -->' );
3324  $this->limitationWarn( 'post-expand-template-inclusion' );
3325  }
3326 
3327  if ( $isLocalObj ) {
3328  $ret = [ 'object' => $text ];
3329  } else {
3330  $ret = [ 'text' => $text ];
3331  }
3332 
3333  return $ret;
3334  }
3335 
3354  public function callParserFunction( PPFrame $frame, $function, array $args = [] ) {
3355  # Case sensitive functions
3356  if ( isset( $this->mFunctionSynonyms[1][$function] ) ) {
3357  $function = $this->mFunctionSynonyms[1][$function];
3358  } else {
3359  # Case insensitive functions
3360  $function = $this->contLang->lc( $function );
3361  if ( isset( $this->mFunctionSynonyms[0][$function] ) ) {
3362  $function = $this->mFunctionSynonyms[0][$function];
3363  } else {
3364  return [ 'found' => false ];
3365  }
3366  }
3367 
3368  list( $callback, $flags ) = $this->mFunctionHooks[$function];
3369 
3370  $allArgs = [ $this ];
3371  if ( $flags & self::SFH_OBJECT_ARGS ) {
3372  # Convert arguments to PPNodes and collect for appending to $allArgs
3373  $funcArgs = [];
3374  foreach ( $args as $k => $v ) {
3375  if ( $v instanceof PPNode || $k === 0 ) {
3376  $funcArgs[] = $v;
3377  } else {
3378  $funcArgs[] = $this->mPreprocessor->newPartNodeArray( [ $k => $v ] )->item( 0 );
3379  }
3380  }
3381 
3382  # Add a frame parameter, and pass the arguments as an array
3383  $allArgs[] = $frame;
3384  $allArgs[] = $funcArgs;
3385  } else {
3386  # Convert arguments to plain text and append to $allArgs
3387  foreach ( $args as $k => $v ) {
3388  if ( $v instanceof PPNode ) {
3389  $allArgs[] = trim( $frame->expand( $v ) );
3390  } elseif ( is_int( $k ) && $k >= 0 ) {
3391  $allArgs[] = trim( $v );
3392  } else {
3393  $allArgs[] = trim( "$k=$v" );
3394  }
3395  }
3396  }
3397 
3398  $result = $callback( ...$allArgs );
3399 
3400  # The interface for function hooks allows them to return a wikitext
3401  # string or an array containing the string and any flags. This mungs
3402  # things around to match what this method should return.
3403  if ( !is_array( $result ) ) {
3404  $result = [
3405  'found' => true,
3406  'text' => $result,
3407  ];
3408  } else {
3409  if ( isset( $result[0] ) && !isset( $result['text'] ) ) {
3410  $result['text'] = $result[0];
3411  }
3412  unset( $result[0] );
3413  $result += [
3414  'found' => true,
3415  ];
3416  }
3417 
3418  $noparse = true;
3419  $preprocessFlags = 0;
3420  if ( isset( $result['noparse'] ) ) {
3421  $noparse = $result['noparse'];
3422  }
3423  if ( isset( $result['preprocessFlags'] ) ) {
3424  $preprocessFlags = $result['preprocessFlags'];
3425  }
3426 
3427  if ( !$noparse ) {
3428  $result['text'] = $this->preprocessToDom( $result['text'], $preprocessFlags );
3429  $result['isChildObj'] = true;
3430  }
3431 
3432  return $result;
3433  }
3434 
3444  public function getTemplateDom( LinkTarget $title ) {
3445  $cacheTitle = $title;
3446  $titleKey = CacheKeyHelper::getKeyForPage( $title );
3447 
3448  if ( isset( $this->mTplRedirCache[$titleKey] ) ) {
3449  list( $ns, $dbk ) = $this->mTplRedirCache[$titleKey];
3450  $title = Title::makeTitle( $ns, $dbk );
3451  $titleKey = CacheKeyHelper::getKeyForPage( $title );
3452  }
3453  if ( isset( $this->mTplDomCache[$titleKey] ) ) {
3454  return [ $this->mTplDomCache[$titleKey], $title ];
3455  }
3456 
3457  # Cache miss, go to the database
3458  list( $text, $title ) = $this->fetchTemplateAndTitle( $title );
3459 
3460  if ( $text === false ) {
3461  $this->mTplDomCache[$titleKey] = false;
3462  return [ false, $title ];
3463  }
3464 
3465  $dom = $this->preprocessToDom( $text, Preprocessor::DOM_FOR_INCLUSION );
3466  $this->mTplDomCache[$titleKey] = $dom;
3467 
3468  if ( !$title->isSamePageAs( $cacheTitle ) ) {
3469  $this->mTplRedirCache[ CacheKeyHelper::getKeyForPage( $cacheTitle ) ] =
3470  [ $title->getNamespace(), $title->getDBkey() ];
3471  }
3472 
3473  return [ $dom, $title ];
3474  }
3475 
3490  $cacheKey = CacheKeyHelper::getKeyForPage( $link );
3491  if ( !$this->currentRevisionCache ) {
3492  $this->currentRevisionCache = new MapCacheLRU( 100 );
3493  }
3494  if ( !$this->currentRevisionCache->has( $cacheKey ) ) {
3495  $title = Title::castFromLinkTarget( $link ); // hook signature compat
3496  $revisionRecord =
3497  // Defaults to Parser::statelessFetchRevisionRecord()
3498  call_user_func(
3499  $this->mOptions->getCurrentRevisionRecordCallback(),
3500  $title,
3501  $this
3502  );
3503  if ( !$revisionRecord ) {
3504  // Parser::statelessFetchRevisionRecord() can return false;
3505  // normalize it to null.
3506  $revisionRecord = null;
3507  }
3508  $this->currentRevisionCache->set( $cacheKey, $revisionRecord );
3509  }
3510  return $this->currentRevisionCache->get( $cacheKey );
3511  }
3512 
3519  public function isCurrentRevisionOfTitleCached( LinkTarget $link ) {
3520  $key = CacheKeyHelper::getKeyForPage( $link );
3521  return (
3522  $this->currentRevisionCache &&
3523  $this->currentRevisionCache->has( $key )
3524  );
3525  }
3526 
3535  public static function statelessFetchRevisionRecord( LinkTarget $link, $parser = null ) {
3536  if ( $link instanceof PageIdentity ) {
3537  // probably a Title, just use it.
3538  $page = $link;
3539  } else {
3540  // XXX: use RevisionStore::getPageForLink()!
3541  // ...but get the info for the current revision at the same time?
3542  // Should RevisionStore::getKnownCurrentRevision accept a LinkTarget?
3543  $page = Title::castFromLinkTarget( $link );
3544  }
3545 
3546  $revRecord = MediaWikiServices::getInstance()
3547  ->getRevisionLookup()
3548  ->getKnownCurrentRevision( $page );
3549  return $revRecord;
3550  }
3551 
3558  public function fetchTemplateAndTitle( LinkTarget $link ) {
3559  // Use Title for compatibility with callbacks and return type
3560  $title = Title::castFromLinkTarget( $link );
3561 
3562  // Defaults to Parser::statelessFetchTemplate()
3563  $templateCb = $this->mOptions->getTemplateCallback();
3564  $stuff = call_user_func( $templateCb, $title, $this );
3565  if ( isset( $stuff['revision-record'] ) ) {
3566  $revRecord = $stuff['revision-record'];
3567  } else {
3568  $revRecord = null;
3569  }
3570 
3571  $text = $stuff['text'];
3572  if ( is_string( $stuff['text'] ) ) {
3573  // We use U+007F DELETE to distinguish strip markers from regular text
3574  $text = strtr( $text, "\x7f", "?" );
3575  }
3576  $finalTitle = $stuff['finalTitle'] ?? $title;
3577  foreach ( ( $stuff['deps'] ?? [] ) as $dep ) {
3578  $this->mOutput->addTemplate( $dep['title'], $dep['page_id'], $dep['rev_id'] );
3579  if ( $dep['title']->equals( $this->getTitle() ) && $revRecord instanceof RevisionRecord ) {
3580  // Self-transclusion; final result may change based on the new page version
3581  try {
3582  $sha1 = $revRecord->getSha1();
3583  } catch ( RevisionAccessException $e ) {
3584  $sha1 = null;
3585  }
3586  $this->setOutputFlag( ParserOutputFlags::VARY_REVISION_SHA1, 'Self transclusion' );
3587  $this->getOutput()->setRevisionUsedSha1Base36( $sha1 );
3588  }
3589  }
3590 
3591  return [ $text, $finalTitle ];
3592  }
3593 
3604  public static function statelessFetchTemplate( $page, $parser = false ) {
3605  $title = Title::castFromLinkTarget( $page ); // for compatibility with return type
3606  $text = $skip = false;
3607  $finalTitle = $title;
3608  $deps = [];
3609  $revRecord = null;
3610  $contextTitle = $parser ? $parser->getTitle() : null;
3611 
3612  # Loop to fetch the article, with up to 2 redirects
3613  $revLookup = MediaWikiServices::getInstance()->getRevisionLookup();
3614  for ( $i = 0; $i < 3 && is_object( $title ); $i++ ) {
3615  # Give extensions a chance to select the revision instead
3616  $revRecord = null; # Assume no hook
3617  $id = false; # Assume current
3618  $origTitle = $title;
3619  $titleChanged = false;
3620  Hooks::runner()->onBeforeParserFetchTemplateRevisionRecord(
3621  # The $title is a not a PageIdentity, as it may
3622  # contain fragments or even represent an attempt to transclude
3623  # a broken or otherwise-missing Title, which the hook may
3624  # fix up. Similarly, the $contextTitle may represent a special
3625  # page or other page which "exists" as a parsing context but
3626  # is not in the DB.
3627  $contextTitle, $title,
3628  $skip, $revRecord
3629  );
3630  if ( !$skip && !$revRecord ) {
3631  # Deprecated legacy hook
3632  Hooks::runner()->onBeforeParserFetchTemplateAndtitle(
3633  $parser, $title, $skip, $id
3634  );
3635  }
3636 
3637  if ( $skip ) {
3638  $text = false;
3639  $deps[] = [
3640  'title' => $title,
3641  'page_id' => $title->getArticleID(),
3642  'rev_id' => null
3643  ];
3644  break;
3645  }
3646  # Get the revision
3647  if ( !$revRecord ) {
3648  if ( $id ) {
3649  # Handle $id returned by deprecated legacy hook
3650  $revRecord = $revLookup->getRevisionById( $id );
3651  } elseif ( $parser ) {
3652  $revRecord = $parser->fetchCurrentRevisionRecordOfTitle( $title );
3653  } else {
3654  $revRecord = $revLookup->getRevisionByTitle( $title );
3655  }
3656  }
3657  if ( $revRecord ) {
3658  # Update title, as $revRecord may have been changed by hook
3660  $revRecord->getPageAsLinkTarget()
3661  );
3662  $deps[] = [
3663  'title' => $title,
3664  'page_id' => $revRecord->getPageId(),
3665  'rev_id' => $revRecord->getId(),
3666  ];
3667  } else {
3668  $deps[] = [
3669  'title' => $title,
3670  'page_id' => $title->getArticleID(),
3671  'rev_id' => null,
3672  ];
3673  }
3674  if ( !$title->equals( $origTitle ) ) {
3675  # If we fetched a rev from a different title, register
3676  # the original title too...
3677  $deps[] = [
3678  'title' => $origTitle,
3679  'page_id' => $origTitle->getArticleID(),
3680  'rev_id' => null,
3681  ];
3682  $titleChanged = true;
3683  }
3684  # If there is no current revision, there is no page
3685  if ( $revRecord === null || $revRecord->getId() === null ) {
3686  $linkCache = MediaWikiServices::getInstance()->getLinkCache();
3687  $linkCache->addBadLinkObj( $title );
3688  }
3689  if ( $revRecord ) {
3690  if ( $titleChanged && !$revRecord->hasSlot( SlotRecord::MAIN ) ) {
3691  // We've added this (missing) title to the dependencies;
3692  // give the hook another chance to redirect it to an
3693  // actual page.
3694  $text = false;
3695  $finalTitle = $title;
3696  continue;
3697  }
3698  if ( $revRecord->hasSlot( SlotRecord::MAIN ) ) { // T276476
3699  $content = $revRecord->getContent( SlotRecord::MAIN );
3700  $text = $content ? $content->getWikitextForTransclusion() : null;
3701  } else {
3702  $text = false;
3703  }
3704 
3705  if ( $text === false || $text === null ) {
3706  $text = false;
3707  break;
3708  }
3709  } elseif ( $title->getNamespace() === NS_MEDIAWIKI ) {
3710  $message = wfMessage( MediaWikiServices::getInstance()->getContentLanguage()->
3711  lcfirst( $title->getText() ) )->inContentLanguage();
3712  if ( !$message->exists() ) {
3713  $text = false;
3714  break;
3715  }
3716  $text = $message->plain();
3717  break;
3718  } else {
3719  break;
3720  }
3721  if ( !$content ) {
3722  break;
3723  }
3724  # Redirect?
3725  $finalTitle = $title;
3726  $title = $content->getRedirectTarget();
3727  }
3728 
3729  $retValues = [
3730  // previously, when this also returned a Revision object, we set
3731  // 'revision-record' to false instead of null if it was unavailable,
3732  // so that callers to use isset and then rely on the revision-record
3733  // key instead of the revision key, even if there was no corresponding
3734  // object - we continue to set to false here for backwards compatability
3735  'revision-record' => $revRecord ?: false,
3736  'text' => $text,
3737  'finalTitle' => $finalTitle,
3738  'deps' => $deps
3739  ];
3740  return $retValues;
3741  }
3742 
3751  public function fetchFileAndTitle( LinkTarget $link, array $options = [] ) {
3752  $file = $this->fetchFileNoRegister( $link, $options );
3753 
3754  $time = $file ? $file->getTimestamp() : false;
3755  $sha1 = $file ? $file->getSha1() : false;
3756  # Register the file as a dependency...
3757  $this->mOutput->addImage( $link->getDBkey(), $time, $sha1 );
3758  if ( $file && !$link->isSameLinkAs( $file->getTitle() ) ) {
3759  # Update fetched file title
3760  $page = $file->getTitle();
3761  $this->mOutput->addImage( $page->getDBkey(), $time, $sha1 );
3762  }
3763 
3764  $title = Title::castFromLinkTarget( $link ); // for return type compat
3765  return [ $file, $title ];
3766  }
3767 
3778  protected function fetchFileNoRegister( LinkTarget $link, array $options = [] ) {
3779  if ( isset( $options['broken'] ) ) {
3780  $file = false; // broken thumbnail forced by hook
3781  } else {
3782  $repoGroup = MediaWikiServices::getInstance()->getRepoGroup();
3783  if ( isset( $options['sha1'] ) ) { // get by (sha1,timestamp)
3784  $file = $repoGroup->findFileFromKey( $options['sha1'], $options );
3785  } else { // get by (name,timestamp)
3786  $file = $repoGroup->findFile( $link, $options );
3787  }
3788  }
3789  return $file;
3790  }
3791 
3801  public function interwikiTransclude( LinkTarget $link, $action ) {
3802  if ( !$this->svcOptions->get( 'EnableScaryTranscluding' ) ) {
3803  return wfMessage( 'scarytranscludedisabled' )->inContentLanguage()->text();
3804  }
3805 
3806  // TODO: extract relevant functionality from Title
3807  $title = Title::castFromLinkTarget( $link );
3808 
3809  $url = $title->getFullURL( [ 'action' => $action ] );
3810  if ( strlen( $url ) > 1024 ) {
3811  return wfMessage( 'scarytranscludetoolong' )->inContentLanguage()->text();
3812  }
3813 
3814  $wikiId = $title->getTransWikiID(); // remote wiki ID or false
3815 
3816  $fname = __METHOD__;
3817  $cache = MediaWikiServices::getInstance()->getMainWANObjectCache();
3818 
3819  $data = $cache->getWithSetCallback(
3820  $cache->makeGlobalKey(
3821  'interwiki-transclude',
3822  ( $wikiId !== false ) ? $wikiId : 'external',
3823  sha1( $url )
3824  ),
3825  $this->svcOptions->get( 'TranscludeCacheExpiry' ),
3826  function ( $oldValue, &$ttl ) use ( $url, $fname, $cache ) {
3827  $req = $this->httpRequestFactory->create( $url, [], $fname );
3828 
3829  $status = $req->execute(); // Status object
3830  if ( !$status->isOK() ) {
3831  $ttl = $cache::TTL_UNCACHEABLE;
3832  } elseif ( $req->getResponseHeader( 'X-Database-Lagged' ) !== null ) {
3833  $ttl = min( $cache::TTL_LAGGED, $ttl );
3834  }
3835 
3836  return [
3837  'text' => $status->isOK() ? $req->getContent() : null,
3838  'code' => $req->getStatus()
3839  ];
3840  },
3841  [
3842  'checkKeys' => ( $wikiId !== false )
3843  ? [ $cache->makeGlobalKey( 'interwiki-page', $wikiId, $title->getDBkey() ) ]
3844  : [],
3845  'pcGroup' => 'interwiki-transclude:5',
3846  'pcTTL' => $cache::TTL_PROC_LONG
3847  ]
3848  );
3849 
3850  if ( is_string( $data['text'] ) ) {
3851  $text = $data['text'];
3852  } elseif ( $data['code'] != 200 ) {
3853  // Though we failed to fetch the content, this status is useless.
3854  $text = wfMessage( 'scarytranscludefailed-httpstatus' )
3855  ->params( $url, $data['code'] )->inContentLanguage()->text();
3856  } else {
3857  $text = wfMessage( 'scarytranscludefailed', $url )->inContentLanguage()->text();
3858  }
3859 
3860  return $text;
3861  }
3862 
3873  public function argSubstitution( array $piece, PPFrame $frame ) {
3874  $error = false;
3875  $parts = $piece['parts'];
3876  $nameWithSpaces = $frame->expand( $piece['title'] );
3877  $argName = trim( $nameWithSpaces );
3878  $object = false;
3879  $text = $frame->getArgument( $argName );
3880  if ( $text === false && $parts->getLength() > 0
3881  && ( $this->ot['html']
3882  || $this->ot['pre']
3883  || ( $this->ot['wiki'] && $frame->isTemplate() )
3884  )
3885  ) {
3886  # No match in frame, use the supplied default
3887  $object = $parts->item( 0 )->getChildren();
3888  }
3889  if ( !$this->incrementIncludeSize( 'arg', strlen( $text ) ) ) {
3890  $error = '<!-- WARNING: argument omitted, expansion size too large -->';
3891  $this->limitationWarn( 'post-expand-template-argument' );
3892  }
3893 
3894  if ( $text === false && $object === false ) {
3895  # No match anywhere
3896  $object = $frame->virtualBracketedImplode( '{{{', '|', '}}}', $nameWithSpaces, $parts );
3897  }
3898  if ( $error !== false ) {
3899  $text .= $error;
3900  }
3901  if ( $object !== false ) {
3902  $ret = [ 'object' => $object ];
3903  } else {
3904  $ret = [ 'text' => $text ];
3905  }
3906 
3907  return $ret;
3908  }
3909 
3927  public function extensionSubstitution( array $params, PPFrame $frame ) {
3928  static $errorStr = '<span class="error">';
3929  static $errorLen = 20;
3930 
3931  $name = $frame->expand( $params['name'] );
3932  if ( substr( $name, 0, $errorLen ) === $errorStr ) {
3933  // Probably expansion depth or node count exceeded. Just punt the
3934  // error up.
3935  return $name;
3936  }
3937 
3938  $attrText = !isset( $params['attr'] ) ? null : $frame->expand( $params['attr'] );
3939  if ( substr( $attrText, 0, $errorLen ) === $errorStr ) {
3940  // See above
3941  return $attrText;
3942  }
3943 
3944  // We can't safely check if the expansion for $content resulted in an
3945  // error, because the content could happen to be the error string
3946  // (T149622).
3947  $content = !isset( $params['inner'] ) ? null : $frame->expand( $params['inner'] );
3948 
3949  $marker = self::MARKER_PREFIX . "-$name-"
3950  . sprintf( '%08X', $this->mMarkerIndex++ ) . self::MARKER_SUFFIX;
3951 
3952  $markerType = 'general';
3953  if ( $this->ot['html'] ) {
3954  $name = strtolower( $name );
3955  $attributes = Sanitizer::decodeTagAttributes( $attrText );
3956  if ( isset( $params['attributes'] ) ) {
3957  $attributes += $params['attributes'];
3958  }
3959 
3960  if ( isset( $this->mTagHooks[$name] ) ) {
3961  // Note that $content may be null here, for example if the
3962  // tag is self-closed.
3963  $output = call_user_func_array( $this->mTagHooks[$name],
3964  [ $content, $attributes, $this, $frame ] );
3965  } else {
3966  $output = '<span class="error">Invalid tag extension name: ' .
3967  htmlspecialchars( $name ) . '</span>';
3968  }
3969 
3970  if ( is_array( $output ) ) {
3971  // Extract flags
3972  $flags = $output;
3973  $output = $flags[0];
3974  if ( isset( $flags['markerType'] ) ) {
3975  $markerType = $flags['markerType'];
3976  }
3977  }
3978  } else {
3979  if ( $attrText === null ) {
3980  $attrText = '';
3981  }
3982  if ( isset( $params['attributes'] ) ) {
3983  foreach ( $params['attributes'] as $attrName => $attrValue ) {
3984  $attrText .= ' ' . htmlspecialchars( $attrName ) . '="' .
3985  htmlspecialchars( $attrValue ) . '"';
3986  }
3987  }
3988  if ( $content === null ) {
3989  $output = "<$name$attrText/>";
3990  } else {
3991  $close = $params['close'] === null ? '' : $frame->expand( $params['close'] );
3992  if ( substr( $close, 0, $errorLen ) === $errorStr ) {
3993  // See above
3994  return $close;
3995  }
3996  $output = "<$name$attrText>$content$close";
3997  }
3998  }
3999 
4000  if ( $markerType === 'none' ) {
4001  return $output;
4002  } elseif ( $markerType === 'nowiki' ) {
4003  $this->mStripState->addNoWiki( $marker, $output );
4004  } elseif ( $markerType === 'general' ) {
4005  $this->mStripState->addGeneral( $marker, $output );
4006  } else {
4007  throw new MWException( __METHOD__ . ': invalid marker type' );
4008  }
4009  return $marker;
4010  }
4011 
4019  private function incrementIncludeSize( $type, $size ) {
4020  if ( $this->mIncludeSizes[$type] + $size > $this->mOptions->getMaxIncludeSize() ) {
4021  return false;
4022  } else {
4023  $this->mIncludeSizes[$type] += $size;
4024  return true;
4025  }
4026  }
4027 
4033  $this->mExpensiveFunctionCount++;
4034  return $this->mExpensiveFunctionCount <= $this->mOptions->getExpensiveParserFunctionLimit();
4035  }
4036 
4044  private function handleDoubleUnderscore( $text ) {
4045  # The position of __TOC__ needs to be recorded
4046  $mw = $this->magicWordFactory->get( 'toc' );
4047  if ( $mw->match( $text ) ) {
4048  $this->mShowToc = true;
4049  $this->mForceTocPosition = true;
4050 
4051  # Set a placeholder. At the end we'll fill it in with the TOC.
4052  $text = $mw->replace( '<!--MWTOC\'"-->', $text, 1 );
4053 
4054  # Only keep the first one.
4055  $text = $mw->replace( '', $text );
4056  }
4057 
4058  # Now match and remove the rest of them
4059  $mwa = $this->magicWordFactory->getDoubleUnderscoreArray();
4060  $this->mDoubleUnderscores = $mwa->matchAndRemove( $text );
4061 
4062  if ( isset( $this->mDoubleUnderscores['nogallery'] ) ) {
4063  $this->mOutput->setNoGallery( true );
4064  }
4065  if ( isset( $this->mDoubleUnderscores['notoc'] ) && !$this->mForceTocPosition ) {
4066  $this->mShowToc = false;
4067  }
4068  if ( isset( $this->mDoubleUnderscores['hiddencat'] )
4069  && $this->getTitle()->getNamespace() === NS_CATEGORY
4070  ) {
4071  $this->addTrackingCategory( 'hidden-category-category' );
4072  }
4073  # (T10068) Allow control over whether robots index a page.
4074  # __INDEX__ always overrides __NOINDEX__, see T16899
4075  if ( isset( $this->mDoubleUnderscores['noindex'] ) && $this->getTitle()->canUseNoindex() ) {
4076  $this->mOutput->setIndexPolicy( 'noindex' );
4077  $this->addTrackingCategory( 'noindex-category' );
4078  }
4079  if ( isset( $this->mDoubleUnderscores['index'] ) && $this->getTitle()->canUseNoindex() ) {
4080  $this->mOutput->setIndexPolicy( 'index' );
4081  $this->addTrackingCategory( 'index-category' );
4082  }
4083 
4084  # Cache all double underscores in the database
4085  foreach ( $this->mDoubleUnderscores as $key => $val ) {
4086  $this->mOutput->setPageProperty( $key, '' );
4087  }
4088 
4089  return $text;
4090  }
4091 
4098  public function addTrackingCategory( $msg ) {
4099  return $this->trackingCategories->addTrackingCategory(
4100  $this->mOutput, $msg, $this->getPage()
4101  );
4102  }
4103 
4119  private function finalizeHeadings( $text, $origText, $isMain = true ) {
4120  # Inhibit editsection links if requested in the page
4121  if ( isset( $this->mDoubleUnderscores['noeditsection'] ) ) {
4122  $maybeShowEditLink = false;
4123  } else {
4124  $maybeShowEditLink = true; /* Actual presence will depend on post-cache transforms */
4125  }
4126 
4127  # Get all headlines for numbering them and adding funky stuff like [edit]
4128  # links - this is for later, but we need the number of headlines right now
4129  # NOTE: white space in headings have been trimmed in handleHeadings. They shouldn't
4130  # be trimmed here since whitespace in HTML headings is significant.
4131  $matches = [];
4132  $numMatches = preg_match_all(
4133  '/<H(?P<level>[1-6])(?P<attrib>.*?>)(?P<header>[\s\S]*?)<\/H[1-6] *>/i',
4134  $text,
4135  $matches
4136  );
4137 
4138  # if there are fewer than 4 headlines in the article, do not show TOC
4139  # unless it's been explicitly enabled.
4140  $enoughToc = $this->mShowToc &&
4141  ( ( $numMatches >= 4 ) || $this->mForceTocPosition );
4142 
4143  # Allow user to stipulate that a page should have a "new section"
4144  # link added via __NEWSECTIONLINK__
4145  if ( isset( $this->mDoubleUnderscores['newsectionlink'] ) ) {
4146  $this->mOutput->setNewSection( true );
4147  }
4148 
4149  # Allow user to remove the "new section"
4150  # link via __NONEWSECTIONLINK__
4151  if ( isset( $this->mDoubleUnderscores['nonewsectionlink'] ) ) {
4152  $this->mOutput->setHideNewSection( true );
4153  }
4154 
4155  # if the string __FORCETOC__ (not case-sensitive) occurs in the HTML,
4156  # override above conditions and always show TOC above first header
4157  if ( isset( $this->mDoubleUnderscores['forcetoc'] ) ) {
4158  $this->mShowToc = true;
4159  $enoughToc = true;
4160  }
4161 
4162  # headline counter
4163  $headlineCount = 0;
4164  $numVisible = 0;
4165 
4166  # Ugh .. the TOC should have neat indentation levels which can be
4167  # passed to the skin functions. These are determined here
4168  $toc = '';
4169  $full = '';
4170  $head = [];
4171  $sublevelCount = [];
4172  $levelCount = [];
4173  $level = 0;
4174  $prevlevel = 0;
4175  $toclevel = 0;
4176  $prevtoclevel = 0;
4177  $markerRegex = self::MARKER_PREFIX . "-h-(\d+)-" . self::MARKER_SUFFIX;
4178  $baseTitleText = $this->getTitle()->getPrefixedDBkey();
4179  $oldType = $this->mOutputType;
4180  $this->setOutputType( self::OT_WIKI );
4181  $frame = $this->getPreprocessor()->newFrame();
4182  $root = $this->preprocessToDom( $origText );
4183  $node = $root->getFirstChild();
4184  $byteOffset = 0;
4185  $tocraw = [];
4186  $refers = [];
4187 
4188  $headlines = $numMatches !== false ? $matches[3] : [];
4189 
4190  $maxTocLevel = $this->svcOptions->get( 'MaxTocLevel' );
4191  foreach ( $headlines as $headline ) {
4192  $isTemplate = false;
4193  $titleText = false;
4194  $sectionIndex = false;
4195  $numbering = '';
4196  $markerMatches = [];
4197  if ( preg_match( "/^$markerRegex/", $headline, $markerMatches ) ) {
4198  $serial = $markerMatches[1];
4199  list( $titleText, $sectionIndex ) = $this->mHeadings[$serial];
4200  $isTemplate = ( $titleText != $baseTitleText );
4201  $headline = preg_replace( "/^$markerRegex\\s*/", "", $headline );
4202  }
4203 
4204  if ( $toclevel ) {
4205  $prevlevel = $level;
4206  }
4207  $level = $matches[1][$headlineCount];
4208 
4209  if ( $level > $prevlevel ) {
4210  # Increase TOC level
4211  $toclevel++;
4212  $sublevelCount[$toclevel] = 0;
4213  if ( $toclevel < $maxTocLevel ) {
4214  $prevtoclevel = $toclevel;
4215  $toc .= Linker::tocIndent();
4216  $numVisible++;
4217  }
4218  } elseif ( $level < $prevlevel && $toclevel > 1 ) {
4219  # Decrease TOC level, find level to jump to
4220 
4221  for ( $i = $toclevel; $i > 0; $i-- ) {
4222  // @phan-suppress-next-line PhanTypeInvalidDimOffset
4223  if ( $levelCount[$i] == $level ) {
4224  # Found last matching level
4225  $toclevel = $i;
4226  break;
4227  } elseif ( $levelCount[$i] < $level ) {
4228  // @phan-suppress-previous-line PhanTypeInvalidDimOffset
4229  # Found first matching level below current level
4230  $toclevel = $i + 1;
4231  break;
4232  }
4233  }
4234  if ( $i == 0 ) {
4235  $toclevel = 1;
4236  }
4237  if ( $toclevel < $maxTocLevel ) {
4238  if ( $prevtoclevel < $maxTocLevel ) {
4239  # Unindent only if the previous toc level was shown :p
4240  $toc .= Linker::tocUnindent( $prevtoclevel - $toclevel );
4241  $prevtoclevel = $toclevel;
4242  } else {
4243  $toc .= Linker::tocLineEnd();
4244  }
4245  }
4246  } else {
4247  # No change in level, end TOC line
4248  if ( $toclevel < $maxTocLevel ) {
4249  $toc .= Linker::tocLineEnd();
4250  }
4251  }
4252 
4253  $levelCount[$toclevel] = $level;
4254 
4255  # count number of headlines for each level
4256  $sublevelCount[$toclevel]++;
4257  $dot = 0;
4258  for ( $i = 1; $i <= $toclevel; $i++ ) {
4259  if ( !empty( $sublevelCount[$i] ) ) {
4260  if ( $dot ) {
4261  $numbering .= '.';
4262  }
4263  $numbering .= $this->getTargetLanguage()->formatNum( $sublevelCount[$i] );
4264  $dot = 1;
4265  }
4266  }
4267 
4268  # The safe header is a version of the header text safe to use for links
4269 
4270  # Remove link placeholders by the link text.
4271  # <!--LINK number-->
4272  # turns into
4273  # link text with suffix
4274  # Do this before unstrip since link text can contain strip markers
4275  $safeHeadline = $this->replaceLinkHoldersText( $headline );
4276 
4277  # Avoid insertion of weird stuff like <math> by expanding the relevant sections
4278  $safeHeadline = $this->mStripState->unstripBoth( $safeHeadline );
4279 
4280  # Remove any <style> or <script> tags (T198618)
4281  $safeHeadline = preg_replace(
4282  '#<(style|script)(?: [^>]*[^>/])?>.*?</\1>#is',
4283  '',
4284  $safeHeadline
4285  );
4286 
4287  # Strip out HTML (first regex removes any tag not allowed)
4288  # Allowed tags are:
4289  # * <sup> and <sub> (T10393)
4290  # * <i> (T28375)
4291  # * <b> (r105284)
4292  # * <bdi> (T74884)
4293  # * <span dir="rtl"> and <span dir="ltr"> (T37167)
4294  # * <s> and <strike> (T35715)
4295  # We strip any parameter from accepted tags (second regex), except dir="rtl|ltr" from <span>,
4296  # to allow setting directionality in toc items.
4297  $tocline = preg_replace(
4298  [
4299  '#<(?!/?(span|sup|sub|bdi|i|b|s|strike)(?: [^>]*)?>).*?>#',
4300  '#<(/?(?:span(?: dir="(?:rtl|ltr)")?|sup|sub|bdi|i|b|s|strike))(?: .*?)?>#'
4301  ],
4302  [ '', '<$1>' ],
4303  $safeHeadline
4304  );
4305 
4306  # Strip '<span></span>', which is the result from the above if
4307  # <span id="foo"></span> is used to produce an additional anchor
4308  # for a section.
4309  $tocline = str_replace( '<span></span>', '', $tocline );
4310 
4311  $tocline = trim( $tocline );
4312 
4313  # For the anchor, strip out HTML-y stuff period
4314  $safeHeadline = preg_replace( '/<.*?>/', '', $safeHeadline );
4315  $safeHeadline = Sanitizer::normalizeSectionNameWhitespace( $safeHeadline );
4316 
4317  # Save headline for section edit hint before it's escaped
4318  $headlineHint = $safeHeadline;
4319 
4320  # Decode HTML entities
4321  $safeHeadline = Sanitizer::decodeCharReferences( $safeHeadline );
4322 
4323  $safeHeadline = self::normalizeSectionName( $safeHeadline );
4324 
4325  $fallbackHeadline = Sanitizer::escapeIdForAttribute( $safeHeadline, Sanitizer::ID_FALLBACK );
4326  $linkAnchor = Sanitizer::escapeIdForLink( $safeHeadline );
4327  $safeHeadline = Sanitizer::escapeIdForAttribute( $safeHeadline, Sanitizer::ID_PRIMARY );
4328  if ( $fallbackHeadline === $safeHeadline ) {
4329  # No reason to have both (in fact, we can't)
4330  $fallbackHeadline = false;
4331  }
4332 
4333  # HTML IDs must be case-insensitively unique for IE compatibility (T12721).
4334  $arrayKey = strtolower( $safeHeadline );
4335  if ( $fallbackHeadline === false ) {
4336  $fallbackArrayKey = false;
4337  } else {
4338  $fallbackArrayKey = strtolower( $fallbackHeadline );
4339  }
4340 
4341  # Create the anchor for linking from the TOC to the section
4342  $anchor = $safeHeadline;
4343  $fallbackAnchor = $fallbackHeadline;
4344  if ( isset( $refers[$arrayKey] ) ) {
4345  for ( $i = 2; isset( $refers["${arrayKey}_$i"] ); ++$i );
4346  $anchor .= "_$i";
4347  $linkAnchor .= "_$i";
4348  $refers["${arrayKey}_$i"] = true;
4349  } else {
4350  $refers[$arrayKey] = true;
4351  }
4352  if ( $fallbackHeadline !== false && isset( $refers[$fallbackArrayKey] ) ) {
4353  for ( $i = 2; isset( $refers["${fallbackArrayKey}_$i"] ); ++$i );
4354  $fallbackAnchor .= "_$i";
4355  $refers["${fallbackArrayKey}_$i"] = true;
4356  } else {
4357  $refers[$fallbackArrayKey] = true;
4358  }
4359 
4360  if ( $enoughToc && ( !isset( $maxTocLevel ) || $toclevel < $maxTocLevel ) ) {
4361  $toc .= Linker::tocLine(
4362  $linkAnchor,
4363  $tocline,
4364  $numbering,
4365  $toclevel,
4366  ( $isTemplate ? false : $sectionIndex )
4367  );
4368  }
4369 
4370  # Add the section to the section tree
4371  # Find the DOM node for this header
4372  $noOffset = ( $isTemplate || $sectionIndex === false );
4373  while ( $node && !$noOffset ) {
4374  if ( $node->getName() === 'h' ) {
4375  $bits = $node->splitHeading();
4376  if ( $bits['i'] == $sectionIndex ) {
4377  break;
4378  }
4379  }
4380  $byteOffset += mb_strlen(
4381  $this->mStripState->unstripBoth(
4382  $frame->expand( $node, PPFrame::RECOVER_ORIG )
4383  )
4384  );
4385  $node = $node->getNextSibling();
4386  }
4387  $tocraw[] = [
4388  'toclevel' => $toclevel,
4389  'level' => $level,
4390  'line' => $tocline,
4391  'number' => $numbering,
4392  'index' => ( $isTemplate ? 'T-' : '' ) . $sectionIndex,
4393  'fromtitle' => $titleText,
4394  'byteoffset' => ( $noOffset ? null : $byteOffset ),
4395  'anchor' => $anchor,
4396  ];
4397 
4398  # give headline the correct <h#> tag
4399  if ( $maybeShowEditLink && $sectionIndex !== false ) {
4400  // Output edit section links as markers with styles that can be customized by skins
4401  if ( $isTemplate ) {
4402  # Put a T flag in the section identifier, to indicate to extractSections()
4403  # that sections inside <includeonly> should be counted.
4404  $editsectionPage = $titleText;
4405  $editsectionSection = "T-$sectionIndex";
4406  $editsectionContent = null;
4407  } else {
4408  $editsectionPage = $this->getTitle()->getPrefixedText();
4409  $editsectionSection = $sectionIndex;
4410  $editsectionContent = $headlineHint;
4411  }
4412  // We use a bit of pesudo-xml for editsection markers. The
4413  // language converter is run later on. Using a UNIQ style marker
4414  // leads to the converter screwing up the tokens when it
4415  // converts stuff. And trying to insert strip tags fails too. At
4416  // this point all real inputted tags have already been escaped,
4417  // so we don't have to worry about a user trying to input one of
4418  // these markers directly. We use a page and section attribute
4419  // to stop the language converter from converting these
4420  // important bits of data, but put the headline hint inside a
4421  // content block because the language converter is supposed to
4422  // be able to convert that piece of data.
4423  // Gets replaced with html in ParserOutput::getText
4424  $editlink = '<mw:editsection page="' . htmlspecialchars( $editsectionPage );
4425  // @phan-suppress-next-line SecurityCheck-DoubleEscaped
4426  $editlink .= '" section="' . htmlspecialchars( $editsectionSection ) . '"';
4427  if ( $editsectionContent !== null ) {
4428  $editlink .= '>' . $editsectionContent . '</mw:editsection>';
4429  } else {
4430  $editlink .= '/>';
4431  }
4432  } else {
4433  $editlink = '';
4434  }
4435  $head[$headlineCount] = Linker::makeHeadline(
4436  $level,
4437  $matches['attrib'][$headlineCount],
4438  $anchor,
4439  $headline,
4440  $editlink,
4441  $fallbackAnchor
4442  );
4443 
4444  $headlineCount++;
4445  }
4446 
4447  $this->setOutputType( $oldType );
4448 
4449  # Never ever show TOC if no headers
4450  if ( $numVisible < 1 ) {
4451  $enoughToc = false;
4452  }
4453 
4454  if ( $enoughToc ) {
4455  if ( $prevtoclevel > 0 && $prevtoclevel < $maxTocLevel ) {
4456  $toc .= Linker::tocUnindent( $prevtoclevel - 1 );
4457  }
4458  $toc = Linker::tocList( $toc, $this->mOptions->getUserLangObj() );
4459  $this->mOutput->setTOCHTML( $toc );
4460  $toc = self::TOC_START . $toc . self::TOC_END;
4461  }
4462 
4463  if ( $isMain ) {
4464  $this->mOutput->setSections( $tocraw );
4465  }
4466 
4467  # split up and insert constructed headlines
4468  $blocks = preg_split( '/<H[1-6].*?>[\s\S]*?<\/H[1-6]>/i', $text );
4469  $i = 0;
4470 
4471  // build an array of document sections
4472  $sections = [];
4473  foreach ( $blocks as $block ) {
4474  // $head is zero-based, sections aren't.
4475  if ( empty( $head[$i - 1] ) ) {
4476  $sections[$i] = $block;
4477  } else {
4478  $sections[$i] = $head[$i - 1] . $block;
4479  }
4480 
4491  $this->hookRunner->onParserSectionCreate( $this, $i, $sections[$i], $maybeShowEditLink );
4492 
4493  $i++;
4494  }
4495 
4496  if ( $enoughToc && $isMain && !$this->mForceTocPosition ) {
4497  // append the TOC at the beginning
4498  // Top anchor now in skin
4499  $sections[0] .= $toc . "\n";
4500  }
4501 
4502  $full .= implode( '', $sections );
4503 
4504  if ( $this->mForceTocPosition ) {
4505  return str_replace( '<!--MWTOC\'"-->', $toc, $full );
4506  } else {
4507  return $full;
4508  }
4509  }
4510 
4523  public function preSaveTransform(
4524  $text,
4525  PageReference $page,
4526  UserIdentity $user,
4527  ParserOptions $options,
4528  $clearState = true
4529  ) {
4530  if ( $clearState ) {
4531  $magicScopeVariable = $this->lock();
4532  }
4533  $this->startParse( $page, $options, self::OT_WIKI, $clearState );
4534  $this->setUser( $user );
4535 
4536  // Strip U+0000 NULL (T159174)
4537  $text = str_replace( "\000", '', $text );
4538 
4539  // We still normalize line endings (including trimming trailing whitespace) for
4540  // backwards-compatibility with other code that just calls PST, but this should already
4541  // be handled in TextContent subclasses
4542  $text = TextContent::normalizeLineEndings( $text );
4543 
4544  if ( $options->getPreSaveTransform() ) {
4545  $text = $this->pstPass2( $text, $user );
4546  }
4547  $text = $this->mStripState->unstripBoth( $text );
4548 
4549  // Trim trailing whitespace again, because the previous steps can introduce it.
4550  $text = rtrim( $text );
4551 
4552  $this->hookRunner->onParserPreSaveTransformComplete( $this, $text );
4553 
4554  $this->setUser( null ); # Reset
4555 
4556  return $text;
4557  }
4558 
4567  private function pstPass2( $text, UserIdentity $user ) {
4568  # Note: This is the timestamp saved as hardcoded wikitext to the database, we use
4569  # $this->contLang here in order to give everyone the same signature and use the default one
4570  # rather than the one selected in each user's preferences. (see also T14815)
4571  $ts = $this->mOptions->getTimestamp();
4572  $timestamp = MWTimestamp::getLocalInstance( $ts );
4573  $ts = $timestamp->format( 'YmdHis' );
4574  $tzMsg = $timestamp->getTimezoneMessage()->inContentLanguage()->text();
4575 
4576  $d = $this->contLang->timeanddate( $ts, false, false ) . " ($tzMsg)";
4577 
4578  # Variable replacement
4579  # Because mOutputType is OT_WIKI, this will only process {{subst:xxx}} type tags
4580  $text = $this->replaceVariables( $text );
4581 
4582  # This works almost by chance, as the replaceVariables are done before the getUserSig(),
4583  # which may corrupt this parser instance via its wfMessage()->text() call-
4584 
4585  # Signatures
4586  if ( strpos( $text, '~~~' ) !== false ) {
4587  $sigText = $this->getUserSig( $user );
4588  $text = strtr( $text, [
4589  '~~~~~' => $d,
4590  '~~~~' => "$sigText $d",
4591  '~~~' => $sigText
4592  ] );
4593  # The main two signature forms used above are time-sensitive
4594  $this->setOutputFlag( ParserOutputFlags::USER_SIGNATURE, 'User signature detected' );
4595  }
4596 
4597  # Context links ("pipe tricks"): [[|name]] and [[name (context)|]]
4598  $tc = '[' . Title::legalChars() . ']';
4599  $nc = '[ _0-9A-Za-z\x80-\xff-]'; # Namespaces can use non-ascii!
4600 
4601  // [[ns:page (context)|]]
4602  $p1 = "/\[\[(:?$nc+:|:|)($tc+?)( ?\\($tc+\\))\\|]]/";
4603  // [[ns:page(context)|]] (double-width brackets, added in r40257)
4604  $p4 = "/\[\[(:?$nc+:|:|)($tc+?)( ?($tc+))\\|]]/";
4605  // [[ns:page (context), context|]] (using single, double-width or Arabic comma)
4606  $p3 = "/\[\[(:?$nc+:|:|)($tc+?)( ?\\($tc+\\)|)((?:, |,|، )$tc+|)\\|]]/";
4607  // [[|page]] (reverse pipe trick: add context from page title)
4608  $p2 = "/\[\[\\|($tc+)]]/";
4609 
4610  # try $p1 first, to turn "[[A, B (C)|]]" into "[[A, B (C)|A, B]]"
4611  $text = preg_replace( $p1, '[[\\1\\2\\3|\\2]]', $text );
4612  $text = preg_replace( $p4, '[[\\1\\2\\3|\\2]]', $text );
4613  $text = preg_replace( $p3, '[[\\1\\2\\3\\4|\\2]]', $text );
4614 
4615  $t = $this->getTitle()->getText();
4616  $m = [];
4617  if ( preg_match( "/^($nc+:|)$tc+?( \\($tc+\\))$/", $t, $m ) ) {
4618  $text = preg_replace( $p2, "[[$m[1]\\1$m[2]|\\1]]", $text );
4619  } elseif ( preg_match( "/^($nc+:|)$tc+?(, $tc+|)$/", $t, $m ) && "$m[1]$m[2]" != '' ) {
4620  $text = preg_replace( $p2, "[[$m[1]\\1$m[2]|\\1]]", $text );
4621  } else {
4622  # if there's no context, don't bother duplicating the title
4623  $text = preg_replace( $p2, '[[\\1]]', $text );
4624  }
4625 
4626  return $text;
4627  }
4628 
4644  public function getUserSig( UserIdentity $user, $nickname = false, $fancySig = null ) {
4645  $username = $user->getName();
4646 
4647  # If not given, retrieve from the user object.
4648  if ( $nickname === false ) {
4649  $nickname = $this->userOptionsLookup->getOption( $user, 'nickname' );
4650  }
4651 
4652  if ( $fancySig === null ) {
4653  $fancySig = $this->userOptionsLookup->getBoolOption( $user, 'fancysig' );
4654  }
4655 
4656  if ( $nickname === null || $nickname === '' ) {
4657  // Empty value results in the default signature (even when fancysig is enabled)
4658  $nickname = $username;
4659  } elseif ( mb_strlen( $nickname ) > $this->svcOptions->get( 'MaxSigChars' ) ) {
4660  $nickname = $username;
4661  $this->logger->debug( __METHOD__ . ": $username has overlong signature." );
4662  } elseif ( $fancySig !== false ) {
4663  # Sig. might contain markup; validate this
4664  $isValid = $this->validateSig( $nickname ) !== false;
4665 
4666  # New validator
4667  $sigValidation = $this->svcOptions->get( 'SignatureValidation' );
4668  if ( $isValid && $sigValidation === 'disallow' ) {
4669  $validator = new SignatureValidator(
4670  $user,
4671  null,
4672  $this->mOptions
4673  );
4674  $isValid = !$validator->validateSignature( $nickname );
4675  }
4676 
4677  if ( $isValid ) {
4678  # Validated; clean up (if needed) and return it
4679  return $this->cleanSig( $nickname, true );
4680  } else {
4681  # Failed to validate; fall back to the default
4682  $nickname = $username;
4683  $this->logger->debug( __METHOD__ . ": $username has invalid signature." );
4684  }
4685  }
4686 
4687  # Make sure nickname doesnt get a sig in a sig
4688  $nickname = self::cleanSigInSig( $nickname );
4689 
4690  # If we're still here, make it a link to the user page
4691  $userText = wfEscapeWikiText( $username );
4692  $nickText = wfEscapeWikiText( $nickname );
4693  $msgName = $user->isRegistered() ? 'signature' : 'signature-anon';
4694 
4695  return wfMessage( $msgName, $userText, $nickText )->inContentLanguage()
4696  ->page( $this->getPage() )->text();
4697  }
4698 
4706  public function validateSig( $text ) {
4707  return Xml::isWellFormedXmlFragment( $text ) ? $text : false;
4708  }
4709 
4721  public function cleanSig( $text, $parsing = false ) {
4722  if ( !$parsing ) {
4723  global $wgTitle;
4724  $magicScopeVariable = $this->lock();
4725  $this->startParse(
4726  $wgTitle,
4729  true
4730  );
4731  }
4732 
4733  # Option to disable this feature
4734  if ( !$this->mOptions->getCleanSignatures() ) {
4735  return $text;
4736  }
4737 
4738  # @todo FIXME: Regex doesn't respect extension tags or nowiki
4739  # => Move this logic to braceSubstitution()
4740  $substWord = $this->magicWordFactory->get( 'subst' );
4741  $substRegex = '/\{\{(?!(?:' . $substWord->getBaseRegex() . '))/x' . $substWord->getRegexCase();
4742  $substText = '{{' . $substWord->getSynonym( 0 );
4743 
4744  $text = preg_replace( $substRegex, $substText, $text );
4745  $text = self::cleanSigInSig( $text );
4746  $dom = $this->preprocessToDom( $text );
4747  $frame = $this->getPreprocessor()->newFrame();
4748  $text = $frame->expand( $dom );
4749 
4750  if ( !$parsing ) {
4751  $text = $this->mStripState->unstripBoth( $text );
4752  }
4753 
4754  return $text;
4755  }
4756 
4764  public static function cleanSigInSig( $text ) {
4765  $text = preg_replace( '/~{3,5}/', '', $text );
4766  return $text;
4767  }
4768 
4780  public function startExternalParse( ?PageReference $page, ParserOptions $options,
4781  $outputType, $clearState = true, $revId = null
4782  ) {
4783  $this->startParse( $page, $options, $outputType, $clearState );
4784  if ( $revId !== null ) {
4785  $this->mRevisionId = $revId;
4786  }
4787  }
4788 
4795  private function startParse( ?PageReference $page, ParserOptions $options,
4796  $outputType, $clearState = true
4797  ) {
4798  $this->setPage( $page );
4799  $this->mOptions = $options;
4800  $this->setOutputType( $outputType );
4801  if ( $clearState ) {
4802  $this->clearState();
4803  }
4804  }
4805 
4815  public function transformMsg( $text, ParserOptions $options, ?PageReference $page = null ) {
4816  static $executing = false;
4817 
4818  # Guard against infinite recursion
4819  if ( $executing ) {
4820  return $text;
4821  }
4822  $executing = true;
4823 
4824  if ( !$page ) {
4825  global $wgTitle;
4826  $page = $wgTitle;
4827  }
4828 
4829  $text = $this->preprocess( $text, $page, $options );
4830 
4831  $executing = false;
4832  return $text;
4833  }
4834 
4860  public function setHook( $tag, callable $callback ) {
4861  $tag = strtolower( $tag );
4862  if ( preg_match( '/[<>\r\n]/', $tag, $m ) ) {
4863  throw new MWException( "Invalid character {$m[0]} in setHook('$tag', ...) call" );
4864  }
4865  $oldVal = $this->mTagHooks[$tag] ?? null;
4866  $this->mTagHooks[$tag] = $callback;
4867  if ( !in_array( $tag, $this->mStripList ) ) {
4868  $this->mStripList[] = $tag;
4869  }
4870 
4871  return $oldVal;
4872  }
4873 
4878  public function clearTagHooks() {
4879  $this->mTagHooks = [];
4880  $this->mStripList = [];
4881  }
4882 
4927  public function setFunctionHook( $id, callable $callback, $flags = 0 ) {
4928  $oldVal = $this->mFunctionHooks[$id][0] ?? null;
4929  $this->mFunctionHooks[$id] = [ $callback, $flags ];
4930 
4931  # Add to function cache
4932  $mw = $this->magicWordFactory->get( $id );
4933  if ( !$mw ) {
4934  throw new MWException( __METHOD__ . '() expecting a magic word identifier.' );
4935  }
4936 
4937  $synonyms = $mw->getSynonyms();
4938  $sensitive = intval( $mw->isCaseSensitive() );
4939 
4940  foreach ( $synonyms as $syn ) {
4941  # Case
4942  if ( !$sensitive ) {
4943  $syn = $this->contLang->lc( $syn );
4944  }
4945  # Add leading hash
4946  if ( !( $flags & self::SFH_NO_HASH ) ) {
4947  $syn = '#' . $syn;
4948  }
4949  # Remove trailing colon
4950  if ( substr( $syn, -1, 1 ) === ':' ) {
4951  $syn = substr( $syn, 0, -1 );
4952  }
4953  $this->mFunctionSynonyms[$sensitive][$syn] = $id;
4954  }
4955  return $oldVal;
4956  }
4957 
4964  public function getFunctionHooks() {
4965  return array_keys( $this->mFunctionHooks );
4966  }
4967 
4976  public function replaceLinkHolders( &$text, $options = 0 ) {
4977  $this->replaceLinkHoldersPrivate( $text, $options );
4978  }
4979 
4987  private function replaceLinkHoldersPrivate( &$text, $options = 0 ) {
4988  $this->mLinkHolders->replace( $text );
4989  }
4990 
4998  private function replaceLinkHoldersText( $text ) {
4999  return $this->mLinkHolders->replaceText( $text );
5000  }
5001 
5016  public function renderImageGallery( $text, array $params ) {
5017  $mode = false;
5018  if ( isset( $params['mode'] ) ) {
5019  $mode = $params['mode'];
5020  }
5021 
5022  try {
5023  $ig = ImageGalleryBase::factory( $mode );
5024  } catch ( Exception $e ) {
5025  // If invalid type set, fallback to default.
5026  $ig = ImageGalleryBase::factory( false );
5027  }
5028 
5029  $ig->setContextTitle( $this->getTitle() );
5030  $ig->setShowBytes( false );
5031  $ig->setShowDimensions( false );
5032  $ig->setShowFilename( false );
5033  $ig->setParser( $this );
5034  $ig->setHideBadImages();
5035  $ig->setAttributes( Sanitizer::validateTagAttributes( $params, 'ul' ) );
5036 
5037  if ( isset( $params['showfilename'] ) ) {
5038  $ig->setShowFilename( true );
5039  } else {
5040  $ig->setShowFilename( false );
5041  }
5042  if ( isset( $params['caption'] ) ) {
5043  // NOTE: We aren't passing a frame here or below. Frame info
5044  // is currently opaque to Parsoid, which acts on OT_PREPROCESS.
5045  // See T107332#4030581
5046  $caption = $this->recursiveTagParse( $params['caption'] );
5047  $ig->setCaptionHtml( $caption );
5048  }
5049  if ( isset( $params['perrow'] ) ) {
5050  $ig->setPerRow( $params['perrow'] );
5051  }
5052  if ( isset( $params['widths'] ) ) {
5053  $ig->setWidths( $params['widths'] );
5054  }
5055  if ( isset( $params['heights'] ) ) {
5056  $ig->setHeights( $params['heights'] );
5057  }
5058  $ig->setAdditionalOptions( $params );
5059 
5060  $this->hookRunner->onBeforeParserrenderImageGallery( $this, $ig );
5061 
5062  $lines = StringUtils::explode( "\n", $text );
5063  foreach ( $lines as $line ) {
5064  # match lines like these:
5065  # Image:someimage.jpg|This is some image
5066  $matches = [];
5067  preg_match( "/^([^|]+)(\\|(.*))?$/", $line, $matches );
5068  # Skip empty lines
5069  if ( count( $matches ) == 0 ) {
5070  continue;
5071  }
5072 
5073  if ( strpos( $matches[0], '%' ) !== false ) {
5074  $matches[1] = rawurldecode( $matches[1] );
5075  }
5077  if ( $title === null ) {
5078  # Bogus title. Ignore these so we don't bomb out later.
5079  continue;
5080  }
5081 
5082  # We need to get what handler the file uses, to figure out parameters.
5083  # Note, a hook can overide the file name, and chose an entirely different
5084  # file (which potentially could be of a different type and have different handler).
5085  $options = [];
5086  $descQuery = false;
5087  $this->hookRunner->onBeforeParserFetchFileAndTitle(
5088  $this, $title, $options, $descQuery );
5089  # Don't register it now, as TraditionalImageGallery does that later.
5090  $file = $this->fetchFileNoRegister( $title, $options );
5091  $handler = $file ? $file->getHandler() : false;
5092 
5093  $paramMap = [
5094  'img_alt' => 'gallery-internal-alt',
5095  'img_link' => 'gallery-internal-link',
5096  ];
5097  if ( $handler ) {
5098  $paramMap += $handler->getParamMap();
5099  // We don't want people to specify per-image widths.
5100  // Additionally the width parameter would need special casing anyhow.
5101  unset( $paramMap['img_width'] );
5102  }
5103 
5104  $mwArray = $this->magicWordFactory->newArray( array_keys( $paramMap ) );
5105 
5106  $label = '';
5107  $alt = '';
5108  $link = '';
5109  $handlerOptions = [];
5110  if ( isset( $matches[3] ) ) {
5111  // look for an |alt= definition while trying not to break existing
5112  // captions with multiple pipes (|) in it, until a more sensible grammar
5113  // is defined for images in galleries
5114 
5115  // FIXME: Doing recursiveTagParse at this stage, and the trim before
5116  // splitting on '|' is a bit odd, and different from makeImage.
5117  $matches[3] = $this->recursiveTagParse( trim( $matches[3] ) );
5118  // Protect LanguageConverter markup
5119  $parameterMatches = StringUtils::delimiterExplode(
5120  '-{', '}-',
5121  '|',
5122  $matches[3],
5123  true /* nested */
5124  );
5125 
5126  foreach ( $parameterMatches as $parameterMatch ) {
5127  list( $magicName, $match ) = $mwArray->matchVariableStartToEnd( $parameterMatch );
5128  if ( !$magicName ) {
5129  // Last pipe wins.
5130  $label = $parameterMatch;
5131  continue;
5132  }
5133 
5134  $paramName = $paramMap[$magicName];
5135  switch ( $paramName ) {
5136  case 'gallery-internal-alt':
5137  $alt = $this->stripAltText( $match, false );
5138  break;
5139  case 'gallery-internal-link':
5140  $linkValue = $this->stripAltText( $match, false );
5141  if ( preg_match( '/^-{R\|(.*)}-$/', $linkValue ) ) {
5142  // Result of LanguageConverter::markNoConversion
5143  // invoked on an external link.
5144  $linkValue = substr( $linkValue, 4, -2 );
5145  }
5146  list( $type, $target ) = $this->parseLinkParameter( $linkValue );
5147  if ( $type === 'link-url' ) {
5148  $link = $target;
5149  $this->mOutput->addExternalLink( $target );
5150  } elseif ( $type === 'link-title' ) {
5151  $link = $target->getLinkURL();
5152  $this->mOutput->addLink( $target );
5153  }
5154  break;
5155  default:
5156  // Must be a handler specific parameter.
5157  if ( $handler->validateParam( $paramName, $match ) ) {
5158  $handlerOptions[$paramName] = $match;
5159  } else {
5160  // Guess not, consider it as caption.
5161  $this->logger->debug(
5162  "$parameterMatch failed parameter validation" );
5163  $label = $parameterMatch;
5164  }
5165  }
5166  }
5167  }
5168 
5169  $ig->add( $title, $label, $alt, $link, $handlerOptions );
5170  }
5171  $html = $ig->toHTML();
5172  $this->hookRunner->onAfterParserFetchFileAndTitle( $this, $ig, $html );
5173  return $html;
5174  }
5175 
5180  private function getImageParams( $handler ) {
5181  if ( $handler ) {
5182  $handlerClass = get_class( $handler );
5183  } else {
5184  $handlerClass = '';
5185  }
5186  if ( !isset( $this->mImageParams[$handlerClass] ) ) {
5187  # Initialise static lists
5188  static $internalParamNames = [
5189  'horizAlign' => [ 'left', 'right', 'center', 'none' ],
5190  'vertAlign' => [ 'baseline', 'sub', 'super', 'top', 'text-top', 'middle',
5191  'bottom', 'text-bottom' ],
5192  'frame' => [ 'thumbnail', 'manualthumb', 'framed', 'frameless',
5193  'upright', 'border', 'link', 'alt', 'class' ],
5194  ];
5195  static $internalParamMap;
5196  if ( !$internalParamMap ) {
5197  $internalParamMap = [];
5198  foreach ( $internalParamNames as $type => $names ) {
5199  foreach ( $names as $name ) {
5200  // For grep: img_left, img_right, img_center, img_none,
5201  // img_baseline, img_sub, img_super, img_top, img_text_top, img_middle,
5202  // img_bottom, img_text_bottom,
5203  // img_thumbnail, img_manualthumb, img_framed, img_frameless, img_upright,
5204  // img_border, img_link, img_alt, img_class
5205  $magicName = str_replace( '-', '_', "img_$name" );
5206  $internalParamMap[$magicName] = [ $type, $name ];
5207  }
5208  }
5209  }
5210 
5211  # Add handler params
5212  $paramMap = $internalParamMap;
5213  if ( $handler ) {
5214  $handlerParamMap = $handler->getParamMap();
5215  foreach ( $handlerParamMap as $magic => $paramName ) {
5216  $paramMap[$magic] = [ 'handler', $paramName ];
5217  }
5218  } else {
5219  // Parse the size for non-existent files. See T273013
5220  $paramMap[ 'img_width' ] = [ 'handler', 'width' ];
5221  }
5222  $this->mImageParams[$handlerClass] = $paramMap;
5223  $this->mImageParamsMagicArray[$handlerClass] =
5224  $this->magicWordFactory->newArray( array_keys( $paramMap ) );
5225  }
5226  return [ $this->mImageParams[$handlerClass], $this->mImageParamsMagicArray[$handlerClass] ];
5227  }
5228 
5238  public function makeImage( LinkTarget $link, $options, $holders = false ) {
5239  # Check if the options text is of the form "options|alt text"
5240  # Options are:
5241  # * thumbnail make a thumbnail with enlarge-icon and caption, alignment depends on lang
5242  # * left no resizing, just left align. label is used for alt= only
5243  # * right same, but right aligned
5244  # * none same, but not aligned
5245  # * ___px scale to ___ pixels width, no aligning. e.g. use in taxobox
5246  # * center center the image
5247  # * frame Keep original image size, no magnify-button.
5248  # * framed Same as "frame"
5249  # * frameless like 'thumb' but without a frame. Keeps user preferences for width
5250  # * upright reduce width for upright images, rounded to full __0 px
5251  # * border draw a 1px border around the image
5252  # * alt Text for HTML alt attribute (defaults to empty)
5253  # * class Set a class for img node
5254  # * link Set the target of the image link. Can be external, interwiki, or local
5255  # vertical-align values (no % or length right now):
5256  # * baseline
5257  # * sub
5258  # * super
5259  # * top
5260  # * text-top
5261  # * middle
5262  # * bottom
5263  # * text-bottom
5264 
5265  # Protect LanguageConverter markup when splitting into parts
5267  '-{', '}-', '|', $options, true /* allow nesting */
5268  );
5269 
5270  # Give extensions a chance to select the file revision for us
5271  $options = [];
5272  $descQuery = false;
5273  $title = Title::castFromLinkTarget( $link ); // hook signature compat
5274  $this->hookRunner->onBeforeParserFetchFileAndTitle(
5275  $this, $title, $options, $descQuery );
5276  # Fetch and register the file (file title may be different via hooks)
5277  list( $file, $link ) = $this->fetchFileAndTitle( $link, $options );
5278 
5279  # Get parameter map
5280  $handler = $file ? $file->getHandler() : false;
5281 
5282  list( $paramMap, $mwArray ) = $this->getImageParams( $handler );
5283 
5284  if ( !$file ) {
5285  $this->addTrackingCategory( 'broken-file-category' );
5286  }
5287 
5288  # Process the input parameters
5289  $caption = '';
5290  $params = [ 'frame' => [], 'handler' => [],
5291  'horizAlign' => [], 'vertAlign' => [] ];
5292  $seenformat = false;
5293  foreach ( $parts as $part ) {
5294  $part = trim( $part );
5295  list( $magicName, $value ) = $mwArray->matchVariableStartToEnd( $part );
5296  $validated = false;
5297  if ( isset( $paramMap[$magicName] ) ) {
5298  list( $type, $paramName ) = $paramMap[$magicName];
5299 
5300  # Special case; width and height come in one variable together
5301  if ( $type === 'handler' && $paramName === 'width' ) {
5302  $parsedWidthParam = self::parseWidthParam( $value );
5303  // Parsoid applies data-(width|height) attributes to broken
5304  // media spans, for client use. See T273013
5305  $validateFunc = static function ( $name, $value ) use ( $handler ) {
5306  return $handler
5307  ? $handler->validateParam( $name, $value )
5308  : $value > 0;
5309  };
5310  if ( isset( $parsedWidthParam['width'] ) ) {
5311  $width = $parsedWidthParam['width'];
5312  if ( $validateFunc( 'width', $width ) ) {
5313  $params[$type]['width'] = $width;
5314  $validated = true;
5315  }
5316  }
5317  if ( isset( $parsedWidthParam['height'] ) ) {
5318  $height = $parsedWidthParam['height'];
5319  if ( $validateFunc( 'height', $height ) ) {
5320  $params[$type]['height'] = $height;
5321  $validated = true;
5322  }
5323  }
5324  # else no validation -- T15436
5325  } else {
5326  if ( $type === 'handler' ) {
5327  # Validate handler parameter
5328  $validated = $handler->validateParam( $paramName, $value );
5329  } else {
5330  # Validate internal parameters
5331  switch ( $paramName ) {
5332  case 'manualthumb':
5333  case 'alt':
5334  case 'class':
5335  # @todo FIXME: Possibly check validity here for
5336  # manualthumb? downstream behavior seems odd with
5337  # missing manual thumbs.
5338  $validated = true;
5339  $value = $this->stripAltText( $value, $holders );
5340  break;
5341  case 'link':
5342  list( $paramName, $value ) =
5343  $this->parseLinkParameter(
5344  $this->stripAltText( $value, $holders )
5345  );
5346  if ( $paramName ) {
5347  $validated = true;
5348  if ( $paramName === 'no-link' ) {
5349  $value = true;
5350  }
5351  if ( ( $paramName === 'link-url' ) && $this->mOptions->getExternalLinkTarget() ) {
5352  $params[$type]['link-target'] = $this->mOptions->getExternalLinkTarget();
5353  }
5354  }
5355  break;
5356  case 'frameless':
5357  case 'framed':
5358  case 'thumbnail':
5359  // use first appearing option, discard others.
5360  $validated = !$seenformat;
5361  $seenformat = true;
5362  break;
5363  default:
5364  # Most other things appear to be empty or numeric...
5365  $validated = ( $value === false || is_numeric( trim( $value ) ) );
5366  }
5367  }
5368 
5369  if ( $validated ) {
5370  $params[$type][$paramName] = $value;
5371  }
5372  }
5373  }
5374  if ( !$validated ) {
5375  $caption = $part;
5376  }
5377  }
5378 
5379  # Process alignment parameters
5380  if ( $params['horizAlign'] ) {
5381  $params['frame']['align'] = key( $params['horizAlign'] );
5382  }
5383  if ( $params['vertAlign'] ) {
5384  $params['frame']['valign'] = key( $params['vertAlign'] );
5385  }
5386 
5387  $params['frame']['caption'] = $caption;
5388 
5389  # Will the image be presented in a frame, with the caption below?
5390  $imageIsFramed = isset( $params['frame']['frame'] )
5391  || isset( $params['frame']['framed'] )
5392  || isset( $params['frame']['thumbnail'] )
5393  || isset( $params['frame']['manualthumb'] );
5394 
5395  # In the old days, [[Image:Foo|text...]] would set alt text. Later it
5396  # came to also set the caption, ordinary text after the image -- which
5397  # makes no sense, because that just repeats the text multiple times in
5398  # screen readers. It *also* came to set the title attribute.
5399  # Now that we have an alt attribute, we should not set the alt text to
5400  # equal the caption: that's worse than useless, it just repeats the
5401  # text. This is the framed/thumbnail case. If there's no caption, we
5402  # use the unnamed parameter for alt text as well, just for the time be-
5403  # ing, if the unnamed param is set and the alt param is not.
5404  # For the future, we need to figure out if we want to tweak this more,
5405  # e.g., introducing a title= parameter for the title; ignoring the un-
5406  # named parameter entirely for images without a caption; adding an ex-
5407  # plicit caption= parameter and preserving the old magic unnamed para-
5408  # meter for BC; ...
5409  if ( $imageIsFramed ) { # Framed image
5410  if ( $caption === '' && !isset( $params['frame']['alt'] ) ) {
5411  # No caption or alt text, add the filename as the alt text so
5412  # that screen readers at least get some description of the image
5413  $params['frame']['alt'] = $link->getText();
5414  }
5415  # Do not set $params['frame']['title'] because tooltips don't make sense
5416  # for framed images
5417  } else { # Inline image
5418  if ( !isset( $params['frame']['alt'] ) ) {
5419  # No alt text, use the "caption" for the alt text
5420  if ( $caption !== '' ) {
5421  $params['frame']['alt'] = $this->stripAltText( $caption, $holders );
5422  } else {
5423  # No caption, fall back to using the filename for the
5424  # alt text
5425  $params['frame']['alt'] = $link->getText();
5426  }
5427  }
5428  # Use the "caption" for the tooltip text
5429  $params['frame']['title'] = $this->stripAltText( $caption, $holders );
5430  }
5431  $params['handler']['targetlang'] = $this->getTargetLanguage()->getCode();
5432 
5433  // hook signature compat again, $link may have changed
5434  $title = Title::castFromLinkTarget( $link );
5435  $this->hookRunner->onParserMakeImageParams( $title, $file, $params, $this );
5436 
5437  # Linker does the rest
5438  $time = $options['time'] ?? false;
5439  $ret = Linker::makeImageLink( $this, $link, $file, $params['frame'], $params['handler'],
5440  $time, $descQuery, $this->mOptions->getThumbSize() );
5441 
5442  # Give the handler a chance to modify the parser object
5443  if ( $handler ) {
5444  $handler->parserTransformHook( $this, $file );
5445  }
5446 
5447  return $ret;
5448  }
5449 
5468  private function parseLinkParameter( $value ) {
5469  $chars = self::EXT_LINK_URL_CLASS;
5470  $addr = self::EXT_LINK_ADDR;
5471  $prots = $this->mUrlProtocols;
5472  $type = null;
5473  $target = false;
5474  if ( $value === '' ) {
5475  $type = 'no-link';
5476  } elseif ( preg_match( "/^((?i)$prots)/", $value ) ) {
5477  if ( preg_match( "/^((?i)$prots)$addr$chars*$/u", $value, $m ) ) {
5478  $this->mOutput->addExternalLink( $value );
5479  $type = 'link-url';
5480  $target = $value;
5481  }
5482  } else {
5483  $linkTitle = Title::newFromText( $value );
5484  if ( $linkTitle ) {
5485  $this->mOutput->addLink( $linkTitle );
5486  $type = 'link-title';
5487  $target = $linkTitle;
5488  }
5489  }
5490  return [ $type, $target ];
5491  }
5492 
5498  private function stripAltText( $caption, $holders ) {
5499  # Strip bad stuff out of the title (tooltip). We can't just use
5500  # replaceLinkHoldersText() here, because if this function is called
5501  # from handleInternalLinks2(), mLinkHolders won't be up-to-date.
5502  if ( $holders ) {
5503  $tooltip = $holders->replaceText( $caption );
5504  } else {
5505  $tooltip = $this->replaceLinkHoldersText( $caption );
5506  }
5507 
5508  # make sure there are no placeholders in thumbnail attributes
5509  # that are later expanded to html- so expand them now and
5510  # remove the tags
5511  $tooltip = $this->mStripState->unstripBoth( $tooltip );
5512  # Compatibility hack! In HTML certain entity references not terminated
5513  # by a semicolon are decoded (but not if we're in an attribute; that's
5514  # how link URLs get away without properly escaping & in queries).
5515  # But wikitext has always required semicolon-termination of entities,
5516  # so encode & where needed to avoid decode of semicolon-less entities.
5517  # See T209236 and
5518  # https://www.w3.org/TR/html5/syntax.html#named-character-references
5519  # T210437 discusses moving this workaround to Sanitizer::stripAllTags.
5520  $tooltip = preg_replace( "/
5521  & # 1. entity prefix
5522  (?= # 2. followed by:
5523  (?: # a. one of the legacy semicolon-less named entities
5524  A(?:Elig|MP|acute|circ|grave|ring|tilde|uml)|
5525  C(?:OPY|cedil)|E(?:TH|acute|circ|grave|uml)|
5526  GT|I(?:acute|circ|grave|uml)|LT|Ntilde|
5527  O(?:acute|circ|grave|slash|tilde|uml)|QUOT|REG|THORN|
5528  U(?:acute|circ|grave|uml)|Yacute|
5529  a(?:acute|c(?:irc|ute)|elig|grave|mp|ring|tilde|uml)|brvbar|
5530  c(?:cedil|edil|urren)|cent(?!erdot;)|copy(?!sr;)|deg|
5531  divide(?!ontimes;)|e(?:acute|circ|grave|th|uml)|
5532  frac(?:1(?:2|4)|34)|
5533  gt(?!c(?:c|ir)|dot|lPar|quest|r(?:a(?:pprox|rr)|dot|eq(?:less|qless)|less|sim);)|
5534  i(?:acute|circ|excl|grave|quest|uml)|laquo|
5535  lt(?!c(?:c|ir)|dot|hree|imes|larr|quest|r(?:Par|i(?:e|f|));)|
5536  m(?:acr|i(?:cro|ddot))|n(?:bsp|tilde)|
5537  not(?!in(?:E|dot|v(?:a|b|c)|)|ni(?:v(?:a|b|c)|);)|
5538  o(?:acute|circ|grave|rd(?:f|m)|slash|tilde|uml)|
5539  p(?:lusmn|ound)|para(?!llel;)|quot|r(?:aquo|eg)|
5540  s(?:ect|hy|up(?:1|2|3)|zlig)|thorn|times(?!b(?:ar|)|d;)|
5541  u(?:acute|circ|grave|ml|uml)|y(?:acute|en|uml)
5542  )
5543  (?:[^;]|$)) # b. and not followed by a semicolon
5544  # S = study, for efficiency
5545  /Sx", '&amp;', $tooltip );
5546  $tooltip = Sanitizer::stripAllTags( $tooltip );
5547 
5548  return $tooltip;
5549  }
5550 
5560  public function attributeStripCallback( &$text, $frame = false ) {
5561  wfDeprecated( __METHOD__, '1.35' );
5562  $text = $this->replaceVariables( $text, $frame );
5563  $text = $this->mStripState->unstripBoth( $text );
5564  return $text;
5565  }
5566 
5573  public function getTags() {
5574  return array_keys( $this->mTagHooks );
5575  }
5576 
5581  public function getFunctionSynonyms() {
5582  return $this->mFunctionSynonyms;
5583  }
5584 
5589  public function getUrlProtocols() {
5590  return $this->mUrlProtocols;
5591  }
5592 
5622  private function extractSections( $text, $sectionId, $mode, $newText = '' ) {
5623  global $wgTitle; # not generally used but removes an ugly failure mode
5624 
5625  $magicScopeVariable = $this->lock();
5626  $this->startParse(
5627  $wgTitle,
5630  true
5631  );
5632  $outText = '';
5633  $frame = $this->getPreprocessor()->newFrame();
5634 
5635  # Process section extraction flags
5636  $flags = 0;
5637  $sectionParts = explode( '-', $sectionId );
5638  $sectionIndex = array_pop( $sectionParts );
5639  foreach ( $sectionParts as $part ) {
5640  if ( $part === 'T' ) {
5642  }
5643  }
5644 
5645  # Check for empty input
5646  if ( strval( $text ) === '' ) {
5647  # Only sections 0 and T-0 exist in an empty document
5648  if ( $sectionIndex == 0 ) {
5649  if ( $mode === 'get' ) {
5650  return '';
5651  }
5652 
5653  return $newText;
5654  } else {
5655  if ( $mode === 'get' ) {
5656  return $newText;
5657  }
5658 
5659  return $text;
5660  }
5661  }
5662 
5663  # Preprocess the text
5664  $root = $this->preprocessToDom( $text, $flags );
5665 
5666  # <h> nodes indicate section breaks
5667  # They can only occur at the top level, so we can find them by iterating the root's children
5668  $node = $root->getFirstChild();
5669 
5670  # Find the target section
5671  if ( $sectionIndex == 0 ) {
5672  # Section zero doesn't nest, level=big
5673  $targetLevel = 1000;
5674  } else {
5675  while ( $node ) {
5676  if ( $node->getName() === 'h' ) {
5677  $bits = $node->splitHeading();
5678  if ( $bits['i'] == $sectionIndex ) {
5679  $targetLevel = $bits['level'];
5680  break;
5681  }
5682  }
5683  if ( $mode === 'replace' ) {
5684  $outText .= $frame->expand( $node, PPFrame::RECOVER_ORIG );
5685  }
5686  $node = $node->getNextSibling();
5687  }
5688  }
5689 
5690  if ( !$node ) {
5691  # Not found
5692  if ( $mode === 'get' ) {
5693  return $newText;
5694  } else {
5695  return $text;
5696  }
5697  }
5698 
5699  # Find the end of the section, including nested sections
5700  do {
5701  if ( $node->getName() === 'h' ) {
5702  $bits = $node->splitHeading();
5703  $curLevel = $bits['level'];
5704  if ( $bits['i'] != $sectionIndex && $curLevel <= $targetLevel ) {
5705  break;
5706  }
5707  }
5708  if ( $mode === 'get' ) {
5709  $outText .= $frame->expand( $node, PPFrame::RECOVER_ORIG );
5710  }
5711  $node = $node->getNextSibling();
5712  } while ( $node );
5713 
5714  # Write out the remainder (in replace mode only)
5715  if ( $mode === 'replace' ) {
5716  # Output the replacement text
5717  # Add two newlines on -- trailing whitespace in $newText is conventionally
5718  # stripped by the editor, so we need both newlines to restore the paragraph gap
5719  # Only add trailing whitespace if there is newText
5720  if ( $newText != "" ) {
5721  $outText .= $newText . "\n\n";
5722  }
5723 
5724  while ( $node ) {
5725  $outText .= $frame->expand( $node, PPFrame::RECOVER_ORIG );
5726  $node = $node->getNextSibling();
5727  }
5728  }
5729 
5730  # Re-insert stripped tags
5731  $outText = rtrim( $this->mStripState->unstripBoth( $outText ) );
5732 
5733  return $outText;
5734  }
5735 
5751  public function getSection( $text, $sectionId, $defaultText = '' ) {
5752  return $this->extractSections( $text, $sectionId, 'get', $defaultText );
5753  }
5754 
5768  public function replaceSection( $oldText, $sectionId, $newText ) {
5769  return $this->extractSections( $oldText, $sectionId, 'replace', $newText );
5770  }
5771 
5801  public function getFlatSectionInfo( $text ) {
5802  $magicScopeVariable = $this->lock();
5803  $this->startParse(
5804  null,
5807  true
5808  );
5809  $frame = $this->getPreprocessor()->newFrame();
5810  $root = $this->preprocessToDom( $text, 0 );
5811  $node = $root->getFirstChild();
5812  $offset = 0;
5813  $currentSection = [
5814  'index' => 0,
5815  'level' => 0,
5816  'offset' => 0,
5817  'heading' => '',
5818  'text' => ''
5819  ];
5820  $sections = [];
5821 
5822  while ( $node ) {
5823  $nodeText = $frame->expand( $node, PPFrame::RECOVER_ORIG );
5824  if ( $node->getName() === 'h' ) {
5825  $bits = $node->splitHeading();
5826  $sections[] = $currentSection;
5827  $currentSection = [
5828  'index' => $bits['i'],
5829  'level' => $bits['level'],
5830  'offset' => $offset,
5831  'heading' => $nodeText,
5832  'text' => $nodeText
5833  ];
5834  } else {
5835  $currentSection['text'] .= $nodeText;
5836  }
5837  $offset += strlen( $nodeText );
5838  $node = $node->getNextSibling();
5839  }
5840  $sections[] = $currentSection;
5841  return $sections;
5842  }
5843 
5855  public function getRevisionId() {
5856  return $this->mRevisionId;
5857  }
5858 
5865  public function getRevisionRecordObject() {
5866  if ( $this->mRevisionRecordObject ) {
5867  return $this->mRevisionRecordObject;
5868  }
5869 
5870  // NOTE: try to get the RevisionRecord object even if mRevisionId is null.
5871  // This is useful when parsing a revision that has not yet been saved.
5872  // However, if we get back a saved revision even though we are in
5873  // preview mode, we'll have to ignore it, see below.
5874  // NOTE: This callback may be used to inject an OLD revision that was
5875  // already loaded, so "current" is a bit of a misnomer. We can't just
5876  // skip it if mRevisionId is set.
5877  $rev = call_user_func(
5878  $this->mOptions->getCurrentRevisionRecordCallback(),
5879  $this->getTitle(),
5880  $this
5881  );
5882 
5883  if ( $rev === false ) {
5884  // The revision record callback returns `false` (not null) to
5885  // indicate that the revision is missing. (See for example
5886  // Parser::statelessFetchRevisionRecord(), the default callback.)
5887  // This API expects `null` instead. (T251952)
5888  $rev = null;
5889  }
5890 
5891  if ( $this->mRevisionId === null && $rev && $rev->getId() ) {
5892  // We are in preview mode (mRevisionId is null), and the current revision callback
5893  // returned an existing revision. Ignore it and return null, it's probably the page's
5894  // current revision, which is not what we want here. Note that we do want to call the
5895  // callback to allow the unsaved revision to be injected here, e.g. for
5896  // self-transclusion previews.
5897  return null;
5898  }
5899 
5900  // If the parse is for a new revision, then the callback should have
5901  // already been set to force the object and should match mRevisionId.
5902  // If not, try to fetch by mRevisionId for sanity.
5903  if ( $this->mRevisionId && $rev && $rev->getId() != $this->mRevisionId ) {
5904  $rev = MediaWikiServices::getInstance()
5905  ->getRevisionLookup()
5906  ->getRevisionById( $this->mRevisionId );
5907  }
5908 
5909  $this->mRevisionRecordObject = $rev;
5910 
5911  return $this->mRevisionRecordObject;
5912  }
5913 
5920  public function getRevisionTimestamp() {
5921  if ( $this->mRevisionTimestamp !== null ) {
5922  return $this->mRevisionTimestamp;
5923  }
5924 
5925  # Use specified revision timestamp, falling back to the current timestamp
5926  $revObject = $this->getRevisionRecordObject();
5927  $timestamp = $revObject ? $revObject->getTimestamp() : $this->mOptions->getTimestamp();
5928  $this->mOutput->setRevisionTimestampUsed( $timestamp ); // unadjusted time zone
5929 
5930  # The cryptic '' timezone parameter tells to use the site-default
5931  # timezone offset instead of the user settings.
5932  # Since this value will be saved into the parser cache, served
5933  # to other users, and potentially even used inside links and such,
5934  # it needs to be consistent for all visitors.
5935  $this->mRevisionTimestamp = $this->contLang->userAdjust( $timestamp, '' );
5936 
5937  return $this->mRevisionTimestamp;
5938  }
5939 
5946  public function getRevisionUser(): ?string {
5947  if ( $this->mRevisionUser === null ) {
5948  $revObject = $this->getRevisionRecordObject();
5949 
5950  # if this template is subst: the revision id will be blank,
5951  # so just use the current user's name
5952  if ( $revObject && $revObject->getUser() ) {
5953  $this->mRevisionUser = $revObject->getUser()->getName();
5954  } elseif ( $this->ot['wiki'] || $this->mOptions->getIsPreview() ) {
5955  $this->mRevisionUser = $this->getUserIdentity()->getName();
5956  } else {
5957  # Note that we fall through here with
5958  # $this->mRevisionUser still null
5959  }
5960  }
5961  return $this->mRevisionUser;
5962  }
5963 
5970  public function getRevisionSize() {
5971  if ( $this->mRevisionSize === null ) {
5972  $revObject = $this->getRevisionRecordObject();
5973 
5974  # if this variable is subst: the revision id will be blank,
5975  # so just use the parser input size, because the own substituation
5976  # will change the size.
5977  if ( $revObject ) {
5978  $this->mRevisionSize = $revObject->getSize();
5979  } else {
5980  $this->mRevisionSize = $this->mInputSize;
5981  }
5982  }
5983  return $this->mRevisionSize;
5984  }
5985 
5992  public function setDefaultSort( $sort ) {
5993  $this->mDefaultSort = $sort;
5994  $this->mOutput->setPageProperty( 'defaultsort', $sort );
5995  }
5996 
6008  public function getDefaultSort() {
6009  if ( $this->mDefaultSort !== false ) {
6010  return $this->mDefaultSort;
6011  } else {
6012  return '';
6013  }
6014  }
6015 
6023  public function getCustomDefaultSort() {
6024  return $this->mDefaultSort;
6025  }
6026 
6027  private static function getSectionNameFromStrippedText( $text ) {
6029  $text = Sanitizer::decodeCharReferences( $text );
6030  $text = self::normalizeSectionName( $text );
6031  return $text;
6032  }
6033 
6034  private static function makeAnchor( $sectionName ) {
6035  return '#' . Sanitizer::escapeIdForLink( $sectionName );
6036  }
6037 
6038  private function makeLegacyAnchor( $sectionName ) {
6039  $fragmentMode = $this->svcOptions->get( 'FragmentMode' );
6040  if ( isset( $fragmentMode[1] ) && $fragmentMode[1] === 'legacy' ) {
6041  // ForAttribute() and ForLink() are the same for legacy encoding
6043  } else {
6044  $id = Sanitizer::escapeIdForLink( $sectionName );
6045  }
6046 
6047  return "#$id";
6048  }
6049 
6059  public function guessSectionNameFromWikiText( $text ) {
6060  # Strip out wikitext links(they break the anchor)
6061  $text = $this->stripSectionName( $text );
6062  $sectionName = self::getSectionNameFromStrippedText( $text );
6063  return self::makeAnchor( $sectionName );
6064  }
6065 
6076  public function guessLegacySectionNameFromWikiText( $text ) {
6077  # Strip out wikitext links(they break the anchor)
6078  $text = $this->stripSectionName( $text );
6079  $sectionName = self::getSectionNameFromStrippedText( $text );
6080  return $this->makeLegacyAnchor( $sectionName );
6081  }
6082 
6089  public static function guessSectionNameFromStrippedText( $text ) {
6090  $sectionName = self::getSectionNameFromStrippedText( $text );
6091  return self::makeAnchor( $sectionName );
6092  }
6093 
6100  private static function normalizeSectionName( $text ) {
6101  # T90902: ensure the same normalization is applied for IDs as to links
6102 
6103  $titleParser = MediaWikiServices::getInstance()->getTitleParser();
6104  '@phan-var MediaWikiTitleCodec $titleParser';
6105  try {
6106 
6107  $parts = $titleParser->splitTitleString( "#$text" );
6108  } catch ( MalformedTitleException $ex ) {
6109  return $text;
6110  }
6111  return $parts['fragment'];
6112  }
6113 
6129  public function stripSectionName( $text ) {
6130  # Strip internal link markup
6131  $text = preg_replace( '/\[\[:?([^[|]+)\|([^[]+)\]\]/', '$2', $text );
6132  $text = preg_replace( '/\[\[:?([^[]+)\|?\]\]/', '$1', $text );
6133 
6134  # Strip external link markup
6135  # @todo FIXME: Not tolerant to blank link text
6136  # I.E. [https://www.mediawiki.org] will render as [1] or something depending
6137  # on how many empty links there are on the page - need to figure that out.
6138  $text = preg_replace( '/\[(?i:' . $this->mUrlProtocols . ')([^ ]+?) ([^[]+)\]/', '$2', $text );
6139 
6140  # Parse wikitext quotes (italics & bold)
6141  $text = $this->doQuotes( $text );
6142 
6143  # Strip HTML tags
6144  $text = StringUtils::delimiterReplace( '<', '>', '', $text );
6145  return $text;
6146  }
6147 
6161  private function fuzzTestSrvus( $text, PageReference $page, ParserOptions $options,
6162  $outputType = self::OT_HTML
6163  ) {
6164  $magicScopeVariable = $this->lock();
6165  $this->startParse( $page, $options, $outputType, true );
6166 
6167  $text = $this->replaceVariables( $text );
6168  $text = $this->mStripState->unstripBoth( $text );
6169  $text = Sanitizer::removeHTMLtags( $text );
6170  return $text;
6171  }
6172 
6184  private function fuzzTestPst( $text, PageReference $page, ParserOptions $options ) {
6185  return $this->preSaveTransform( $text, $page, $options->getUserIdentity(), $options );
6186  }
6187 
6199  private function fuzzTestPreprocess( $text, PageReference $page, ParserOptions $options ) {
6200  return $this->fuzzTestSrvus( $text, $page, $options, self::OT_PREPROCESS );
6201  }
6202 
6221  public function markerSkipCallback( $s, callable $callback ) {
6222  $i = 0;
6223  $out = '';
6224  while ( $i < strlen( $s ) ) {
6225  $markerStart = strpos( $s, self::MARKER_PREFIX, $i );
6226  if ( $markerStart === false ) {
6227  $out .= call_user_func( $callback, substr( $s, $i ) );
6228  break;
6229  } else {
6230  $out .= call_user_func( $callback, substr( $s, $i, $markerStart - $i ) );
6231  $markerEnd = strpos( $s, self::MARKER_SUFFIX, $markerStart );
6232  if ( $markerEnd === false ) {
6233  $out .= substr( $s, $markerStart );
6234  break;
6235  } else {
6236  $markerEnd += strlen( self::MARKER_SUFFIX );
6237  $out .= substr( $s, $markerStart, $markerEnd - $markerStart );
6238  $i = $markerEnd;
6239  }
6240  }
6241  }
6242  return $out;
6243  }
6244 
6252  public function killMarkers( $text ) {
6253  return $this->mStripState->killMarkers( $text );
6254  }
6255 
6266  public static function parseWidthParam( $value, $parseHeight = true ) {
6267  $parsedWidthParam = [];
6268  if ( $value === '' ) {
6269  return $parsedWidthParam;
6270  }
6271  $m = [];
6272  # (T15500) In both cases (width/height and width only),
6273  # permit trailing "px" for backward compatibility.
6274  if ( $parseHeight && preg_match( '/^([0-9]*)x([0-9]*)\s*(?:px)?\s*$/', $value, $m ) ) {
6275  $width = intval( $m[1] );
6276  $height = intval( $m[2] );
6277  $parsedWidthParam['width'] = $width;
6278  $parsedWidthParam['height'] = $height;
6279  } elseif ( preg_match( '/^[0-9]*\s*(?:px)?\s*$/', $value ) ) {
6280  $width = intval( $value );
6281  $parsedWidthParam['width'] = $width;
6282  }
6283  return $parsedWidthParam;
6284  }
6285 
6295  protected function lock() {
6296  if ( $this->mInParse ) {
6297  throw new MWException( "Parser state cleared while parsing. "
6298  . "Did you call Parser::parse recursively? Lock is held by: " . $this->mInParse );
6299  }
6300 
6301  // Save the backtrace when locking, so that if some code tries locking again,
6302  // we can print the lock owner's backtrace for easier debugging
6303  $e = new Exception;
6304  $this->mInParse = $e->getTraceAsString();
6305 
6306  $recursiveCheck = new ScopedCallback( function () {
6307  $this->mInParse = false;
6308  } );
6309 
6310  return $recursiveCheck;
6311  }
6312 
6323  public static function stripOuterParagraph( $html ) {
6324  $m = [];
6325  if ( preg_match( '/^<p>(.*)\n?<\/p>\n?$/sU', $html, $m ) && strpos( $m[1], '</p>' ) === false ) {
6326  $html = $m[1];
6327  }
6328 
6329  return $html;
6330  }
6331 
6342  public function getFreshParser() {
6343  if ( $this->mInParse ) {
6344  return $this->factory->create();
6345  } else {
6346  return $this;
6347  }
6348  }
6349 
6357  public function enableOOUI() {
6358  wfDeprecated( __METHOD__, '1.35' );
6360  $this->mOutput->setEnableOOUI( true );
6361  }
6362 
6369  private function setOutputFlag( string $flag, string $reason ): void {
6370  $this->mOutput->setOutputFlag( $flag );
6371  $name = $this->getTitle()->getPrefixedText();
6372  $this->logger->debug( __METHOD__ . ": set $flag flag on '$name'; $reason" );
6373  }
6374 }
Parser\$badFileLookup
BadFileLookup $badFileLookup
Definition: Parser.php:347
Page\PageIdentity
Interface for objects (potentially) representing an editable wiki page.
Definition: PageIdentity.php:64
Parser\getFunctionHooks
getFunctionHooks()
Get all registered function hook identifiers.
Definition: Parser.php:4964
Parser\$mLinkRenderer
LinkRenderer $mLinkRenderer
Definition: Parser.php:308
Parser\$mForceTocPosition
$mForceTocPosition
Definition: Parser.php:236
Parser\recursivePreprocess
recursivePreprocess( $text, $frame=false)
Recursive parser entry point that can be called from an extension tag hook.
Definition: Parser.php:950
Parser\getContentLanguageConverter
getContentLanguageConverter()
Shorthand for getting a Language Converter for Content language.
Definition: Parser.php:1644
Parser\transformMsg
transformMsg( $text, ParserOptions $options, ?PageReference $page=null)
Wrapper for preprocess()
Definition: Parser.php:4815
ParserOptions
Set options of the Parser.
Definition: ParserOptions.php:45
Parser\attributeStripCallback
attributeStripCallback(&$text, $frame=false)
Callback from the Sanitizer for expanding items found in HTML attribute values, so they can be safely...
Definition: Parser.php:5560
PPFrame\loopCheck
loopCheck( $title)
Returns true if the infinite loop check is OK, false if a loop is detected.
Parser\$mSubstWords
MagicWordArray $mSubstWords
Definition: Parser.php:182
Parser\$linkRendererFactory
LinkRendererFactory $linkRendererFactory
Definition: Parser.php:338
Sanitizer\ID_FALLBACK
const ID_FALLBACK
Tells escapeUrlForHtml() to encode the ID using the fallback encoding, or return false if no fallback...
Definition: Sanitizer.php:79
Parser\maybeMakeExternalImage
maybeMakeExternalImage( $url)
make an image if it's allowed, either through the global option, through the exception,...
Definition: Parser.php:2356
Message\numParam
static numParam( $num)
Definition: Message.php:1106
MagicWordArray
Class for handling an array of magic words.
Definition: MagicWordArray.php:32
Parser\EXT_LINK_ADDR
const EXT_LINK_ADDR
Definition: Parser.php:108
MediaWiki\Revision\RevisionAccessException
Exception representing a failure to look up a revision.
Definition: RevisionAccessException.php:37
MediaWiki\Linker\LinkTarget\isSameLinkAs
isSameLinkAs(LinkTarget $other)
Checks whether the given LinkTarget refers to the same target as this LinkTarget.
FauxRequest
WebRequest clone which takes values from a provided array.
Definition: FauxRequest.php:35
Title\newFromText
static newFromText( $text, $defaultNamespace=NS_MAIN)
Create a new Title from text, such as what one would find in a link.
Definition: Title.php:382
Parser\$mInputSize
$mInputSize
Definition: Parser.php:274
PPFrame\STRIP_COMMENTS
const STRIP_COMMENTS
Definition: PPFrame.php:31
Parser\SPACE_NOT_NL
const SPACE_NOT_NL
Definition: Parser.php:115
Parser\$titleFormatter
TitleFormatter $titleFormatter
Definition: Parser.php:326
HtmlArmor
Marks HTML that shouldn't be escaped.
Definition: HtmlArmor.php:30
MediaWiki\Revision\RevisionRecord
Page revision base class.
Definition: RevisionRecord.php:47
MediaWiki\Linker\LinkTarget\getText
getText()
Returns the link in text form, without namespace prefix or fragment.
Parser\__destruct
__destruct()
Reduce memory usage to reduce the impact of circular references.
Definition: Parser.php:498
Preprocessor\DOM_FOR_INCLUSION
const DOM_FOR_INCLUSION
Transclusion mode flag for Preprocessor::preprocessToObj()
Definition: Preprocessor.php:29
ParserOutput
Definition: ParserOutput.php:36
Parser\$mLinkHolders
LinkHolderArray $mLinkHolders
Definition: Parser.php:208
NS_MEDIAWIKI
const NS_MEDIAWIKI
Definition: Defines.php:72
Parser\makeImage
makeImage(LinkTarget $link, $options, $holders=false)
Parse image options text and use it to make an image.
Definition: Parser.php:5238
Parser\braceSubstitution
braceSubstitution(array $piece, PPFrame $frame)
Return the text of a template, after recursively replacing any variables or templates within the temp...
Definition: Parser.php:2974
Parser\makeLimitReport
makeLimitReport()
Set the limit report data in the current ParserOutput, and return the limit report HTML comment.
Definition: Parser.php:727
MagicWordFactory
A factory that stores information about MagicWords, and creates them on demand with caching.
Definition: MagicWordFactory.php:37
Parser\internalParseHalfParsed
internalParseHalfParsed( $text, $isMain=true, $linestart=true)
Helper function for parse() that transforms half-parsed HTML into fully parsed HTML.
Definition: Parser.php:1682
Parser\$userFactory
UserFactory $userFactory
Definition: Parser.php:362
Parser\stripAltText
stripAltText( $caption, $holders)
Definition: Parser.php:5498
Parser\killMarkers
killMarkers( $text)
Remove any strip markers found in the given text.
Definition: Parser.php:6252
Sanitizer\stripAllTags
static stripAllTags( $html)
Take a fragment of (potentially invalid) HTML and return a version with any tags removed,...
Definition: Sanitizer.php:1577
Parser\$mTagHooks
$mTagHooks
Definition: Parser.php:157
Parser\OutputType
OutputType( $x=null)
Accessor/mutator for the output type.
Definition: Parser.php:1084
Parser\$currentRevisionCache
MapCacheLRU null $currentRevisionCache
Definition: Parser.php:292
Parser\setOutputFlag
setOutputFlag(string $flag, string $reason)
Sets the flag on the parser output but also does some debug logging.
Definition: Parser.php:6369
Parser\enableOOUI
enableOOUI()
Set's up the PHP implementation of OOUI for use in this request and instructs OutputPage to enable OO...
Definition: Parser.php:6357
MediaWiki\MediaWikiServices
MediaWikiServices is the service locator for the application scope of MediaWiki.
Definition: MediaWikiServices.php:200
Linker\makeSelfLinkObj
static makeSelfLinkObj( $nt, $html='', $query='', $trail='', $prefix='')
Make appropriate markup for a link to the current article.
Definition: Linker.php:161
Parser\$mTplDomCache
array $mTplDomCache
Definition: Parser.php:238
MediaWiki\BadFileLookup
Definition: BadFileLookup.php:13
PPFrame\NO_ARGS
const NO_ARGS
Definition: PPFrame.php:29
Parser\statelessFetchRevisionRecord
static statelessFetchRevisionRecord(LinkTarget $link, $parser=null)
Wrapper around RevisionLookup::getKnownCurrentRevision.
Definition: Parser.php:3535
wfSetVar
wfSetVar(&$dest, $source, $force=false)
Sets dest to source and returns the original value of dest If source is NULL, it just returns the val...
Definition: GlobalFunctions.php:1504
Parser\parseExtensionTagAsTopLevelDoc
parseExtensionTagAsTopLevelDoc( $text)
Needed by Parsoid/PHP to ensure all the hooks for extensions are run in the right order.
Definition: Parser.php:904
Parser\$mDoubleUnderscores
$mDoubleUnderscores
Definition: Parser.php:231
Linker\tocIndent
static tocIndent()
Add another level to the Table of Contents.
Definition: Linker.php:1561
Parser\getRevisionSize
getRevisionSize()
Get the size of the revision.
Definition: Parser.php:5970
Sanitizer\escapeIdForAttribute
static escapeIdForAttribute( $id, $mode=self::ID_PRIMARY)
Given a section name or other user-generated or otherwise unsafe string, escapes it to be a valid HTM...
Definition: Sanitizer.php:812
Sanitizer\removeHTMLtags
static removeHTMLtags( $text, $processCallback=null, $args=[], $extratags=[], $removetags=[])
Cleans up HTML, removes dangerous tags and attributes, and removes HTML comments.
Definition: Sanitizer.php:240
Parser\ParserOutputFlags
Definition: ParserOutputFlags.php:41
MediaWiki\Http\HttpRequestFactory
Factory creating MWHttpRequest objects.
Definition: HttpRequestFactory.php:39
Parser\handleExternalLinks
handleExternalLinks( $text)
Replace external links (REL)
Definition: Parser.php:2140
Parser\$mOutputType
$mOutputType
Definition: Parser.php:262
MediaWiki\Linker\LinkRenderer
Class that generates HTML links for pages.
Definition: LinkRenderer.php:43
ParserOptions\getDisableTitleConversion
getDisableTitleConversion()
Whether title conversion should be disabled.
Definition: ParserOptions.php:552
wfTimestamp
wfTimestamp( $outputtype=TS_UNIX, $ts=0)
Get a timestamp string in one of various formats.
Definition: GlobalFunctions.php:1657
Parser\handleHeadings
handleHeadings( $text)
Parse headers and return html.
Definition: Parser.php:1919
MediaWiki\SpecialPage\SpecialPageFactory
Factory for handling the special page list and generating SpecialPage objects.
Definition: SpecialPageFactory.php:63
$wgNoFollowDomainExceptions
$wgNoFollowDomainExceptions
If this is set to an array of domains, external links to these domain names (or any subdomains) will ...
Definition: DefaultSettings.php:5027
Parser\handleAllQuotes
handleAllQuotes( $text)
Replace single quotes with HTML markup.
Definition: Parser.php:1936
Parser\$mUrlProtocols
$mUrlProtocols
Definition: Parser.php:185
Parser\extractTagsAndParams
static extractTagsAndParams(array $elements, $text, &$matches)
Replaces all occurrences of HTML-style comments and the given tags in the text with a random marker a...
Definition: Parser.php:1256
Parser\$mLinkID
int $mLinkID
Definition: Parser.php:214
OT_HTML
const OT_HTML
Definition: Defines.php:157
SFH_NO_HASH
const SFH_NO_HASH
Definition: Defines.php:170
Parser\handleDoubleUnderscore
handleDoubleUnderscore( $text)
Strip double-underscore items like NOGALLERY and NOTOC Fills $this->mDoubleUnderscores,...
Definition: Parser.php:4044
Sanitizer\normalizeSectionNameWhitespace
static normalizeSectionNameWhitespace( $section)
Normalizes whitespace in a section name, such as might be returned by Parser::stripSectionName(),...
Definition: Sanitizer.php:1105
OT_PREPROCESS
const OT_PREPROCESS
Definition: Defines.php:159
Parser\normalizeSectionName
static normalizeSectionName( $text)
Apply the same normalization as code making links to this section would.
Definition: Parser.php:6100
$file
if(PHP_SAPI !='cli-server') if(!isset( $_SERVER['SCRIPT_FILENAME'])) $file
Item class for a filearchive table row.
Definition: router.php:42
Parser\fetchFileNoRegister
fetchFileNoRegister(LinkTarget $link, array $options=[])
Helper function for fetchFileAndTitle.
Definition: Parser.php:3778
wfHostname
wfHostname()
Get host name of the current machine, for use in error reporting.
Definition: GlobalFunctions.php:1238
Parser\recursiveTagParseFully
recursiveTagParseFully( $text, $frame=false)
Fully parse wikitext to fully parsed HTML.
Definition: Parser.php:879
Parser\$specialPageFactory
SpecialPageFactory $specialPageFactory
Definition: Parser.php:323
Parser\nextLinkID
nextLinkID()
Definition: Parser.php:1130
Parser\getTargetLanguage
getTargetLanguage()
Get the target language for the content being parsed.
Definition: Parser.php:1159
User\newFromName
static newFromName( $name, $validate='valid')
Definition: User.php:606
Parser\$mStripList
$mStripList
Definition: Parser.php:160
wfMessage
wfMessage( $key,... $params)
This is the function for getting translated interface messages.
Definition: GlobalFunctions.php:1175
MediaWiki\Linker\LinkRendererFactory
Factory to create LinkRender objects.
Definition: LinkRendererFactory.php:34
SpecialPage\getTitleFor
static getTitleFor( $name, $subpage=false, $fragment='')
Get a localised Title object for a specified special page name If you don't need a full Title object,...
Definition: SpecialPage.php:107
Parser\startExternalParse
startExternalParse(?PageReference $page, ParserOptions $options, $outputType, $clearState=true, $revId=null)
Set up some variables which are usually set up in parse() so that an external function can call some ...
Definition: Parser.php:4780
Parser\guessSectionNameFromWikiText
guessSectionNameFromWikiText( $text)
Try to guess the section anchor name based on a wikitext fragment presumably extracted from a heading...
Definition: Parser.php:6059
Parser\setDefaultSort
setDefaultSort( $sort)
Mutator for $mDefaultSort.
Definition: Parser.php:5992
Parser\getPage
getPage()
Returns the page used as context for parsing, e.g.
Definition: Parser.php:1048
Preprocessor_Hash
Differences from DOM schema:
Definition: Preprocessor_Hash.php:43
StripState
Definition: StripState.php:29
Parser\getExternalLinkRel
static getExternalLinkRel( $url=false, LinkTarget $title=null)
Get the rel attribute for a particular external link.
Definition: Parser.php:2221
Parser\replaceVariables
replaceVariables( $text, $frame=false, $argsOnly=false)
Replace magic variables, templates, and template arguments with the appropriate text.
Definition: Parser.php:2898
Parser\MARKER_PREFIX
const MARKER_PREFIX
Definition: Parser.php:150
Parser\getFunctionSynonyms
getFunctionSynonyms()
Definition: Parser.php:5581
Parser\$mInParse
bool string $mInParse
Recursive call protection.
Definition: Parser.php:300
Parser\doQuotes
doQuotes( $text)
Helper function for handleAllQuotes()
Definition: Parser.php:1954
Linker\tocLine
static tocLine( $anchor, $tocline, $tocnumber, $level, $sectionIndex=false)
parameter level defines if we are on an indentation level
Definition: Linker.php:1587
Parser\startParse
startParse(?PageReference $page, ParserOptions $options, $outputType, $clearState=true)
Definition: Parser.php:4795
MediaWiki\Languages\LanguageConverterFactory
An interface for creating language converters.
Definition: LanguageConverterFactory.php:46
Page\PageReference
Interface for objects (potentially) representing a page that can be viewable and linked to on a wiki.
Definition: PageReference.php:49
Parser\$svcOptions
ServiceOptions $svcOptions
This is called $svcOptions instead of $options like elsewhere to avoid confusion with $mOptions,...
Definition: Parser.php:335
MediaWiki\User\UserIdentity
Interface for objects representing user identity.
Definition: UserIdentity.php:39
Linker\tocList
static tocList( $toc, Language $lang=null)
Wraps the TOC in a div with ARIA navigation role and provides the hide/collapse JavaScript.
Definition: Linker.php:1623
Parser\SFH_OBJECT_ARGS
const SFH_OBJECT_ARGS
Definition: Parser.php:96
Parser\OT_WIKI
const OT_WIKI
Definition: Parser.php:126
Parser\getTags
getTags()
Accessor.
Definition: Parser.php:5573
Parser\getStripList
getStripList()
Get a list of strippable XML-like elements.
Definition: Parser.php:1319
Parser\initializeVariables
initializeVariables()
Initialize the magic variables (like CURRENTMONTHNAME) and substitution modifiers.
Definition: Parser.php:2847
PPFrame\NO_TEMPLATES
const NO_TEMPLATES
Definition: PPFrame.php:30
Preprocessor
Definition: Preprocessor.php:27
Parser\getOptions
getOptions()
Definition: Parser.php:1101
MediaWiki\Languages\LanguageNameUtils
A service that provides utilities to do with language names and codes.
Definition: LanguageNameUtils.php:42
PPFrame\newChild
newChild( $args=false, $title=false, $indexOffset=0)
Create a child frame.
Parser\getFunctionLang
getFunctionLang()
Get a language object for use in parser functions such as {{FORMATNUM:}}.
Definition: Parser.php:1147
StringUtils\replaceMarkup
static replaceMarkup( $search, $replace, $text)
More or less "markup-safe" str_replace() Ignores any instances of the separator inside <....
Definition: StringUtils.php:268
Parser\$mRevisionRecordObject
RevisionRecord null $mRevisionRecordObject
Definition: Parser.php:277
Parser\Options
Options( $x=null)
Accessor/mutator for the ParserOptions object.
Definition: Parser.php:1121
NS_SPECIAL
const NS_SPECIAL
Definition: Defines.php:53
Parser\preSaveTransform
preSaveTransform( $text, PageReference $page, UserIdentity $user, ParserOptions $options, $clearState=true)
Transform wiki markup when saving a page by doing "\\r\\n" -> "\\n" conversion, substituting signatur...
Definition: Parser.php:4523
Parser\lock
lock()
Lock the current instance of the parser.
Definition: Parser.php:6295
Parser\getDefaultSort
getDefaultSort()
Accessor for $mDefaultSort Will use the empty string if none is set.
Definition: Parser.php:6008
Parser\$mFunctionSynonyms
$mFunctionSynonyms
Definition: Parser.php:159
Parser\$hookRunner
HookRunner $hookRunner
Definition: Parser.php:353
Parser\$nsInfo
NamespaceInfo $nsInfo
Definition: Parser.php:341
Parser\makeKnownLinkHolder
makeKnownLinkHolder(LinkTarget $nt, $text='', $trail='', $prefix='')
Render a forced-blue link inline; protect against double expansion of URLs if we're in a mode that pr...
Definition: Parser.php:2743
Parser\makeLegacyAnchor
makeLegacyAnchor( $sectionName)
Definition: Parser.php:6038
Parser\fuzzTestSrvus
fuzzTestSrvus( $text, PageReference $page, ParserOptions $options, $outputType=self::OT_HTML)
Strip/replaceVariables/unstrip for preprocessor regression testing.
Definition: Parser.php:6161
Parser\setHook
setHook( $tag, callable $callback)
Create an HTML-style tag, e.g.
Definition: Parser.php:4860
Parser\$mHeadings
$mHeadings
Definition: Parser.php:229
Parser\$userOptionsLookup
UserOptionsLookup $userOptionsLookup
Definition: Parser.php:359
Parser\interwikiTransclude
interwikiTransclude(LinkTarget $link, $action)
Transclude an interwiki link.
Definition: Parser.php:3801
Parser\getTitle
getTitle()
Definition: Parser.php:1012
Parser\$mVariables
MagicWordArray $mVariables
Definition: Parser.php:177
wfDeprecatedMsg
wfDeprecatedMsg( $msg, $version=false, $component=false, $callerOffset=2)
Log a deprecation warning with arbitrary message text.
Definition: GlobalFunctions.php:1020
MWException
MediaWiki exception.
Definition: MWException.php:29
Parser\TOC_START
const TOC_START
Definition: Parser.php:153
Parser\$ot
$ot
Definition: Parser.php:264
Parser\getRevisionRecordObject
getRevisionRecordObject()
Get the revision record object for $this->mRevisionId.
Definition: Parser.php:5865
MediaWiki\Config\ServiceOptions
A class for passing options to services.
Definition: ServiceOptions.php:27
wfDeprecated
wfDeprecated( $function, $version=false, $component=false, $callerOffset=2)
Logs a warning that a deprecated feature was used.
Definition: GlobalFunctions.php:989
Parser\OT_MSG
const OT_MSG
Definition: Parser.php:128
Parser\getPreloadText
getPreloadText( $text, PageReference $page, ParserOptions $options, $params=[])
Process the wikitext for the "?preload=" feature.
Definition: Parser.php:970
MediaWiki\User\UserIdentity\isRegistered
isRegistered()
Parser\firstCallInit
firstCallInit()
Used to do various kinds of initialisation on the first call of the parser.
Definition: Parser.php:542
Parser\$mProfiler
SectionProfiler $mProfiler
Definition: Parser.php:303
Parser\getFlatSectionInfo
getFlatSectionInfo( $text)
Get an array of preprocessor section information.
Definition: Parser.php:5801
Parser\$mMarkerIndex
$mMarkerIndex
Definition: Parser.php:165
BlockLevelPass\doBlockLevels
static doBlockLevels( $text, $lineStart)
Make lists from lines starting with ':', '*', '#', etc.
Definition: BlockLevelPass.php:52
Parser\getCustomDefaultSort
getCustomDefaultSort()
Accessor for $mDefaultSort Unlike getDefaultSort(), will return false if none is set.
Definition: Parser.php:6023
wfUrlProtocolsWithoutProtRel
wfUrlProtocolsWithoutProtRel()
Like wfUrlProtocols(), but excludes '//' from the protocol list.
Definition: GlobalFunctions.php:747
Parser\handleTables
handleTables( $text)
Parse the wiki syntax used to render tables.
Definition: Parser.php:1353
$matches
$matches
Definition: NoLocalSettings.php:24
CoreTagHooks\register
static register( $parser)
Definition: CoreTagHooks.php:36
Parser\$contLang
Language $contLang
Definition: Parser.php:314
Parser\makeAnchor
static makeAnchor( $sectionName)
Definition: Parser.php:6034
StringUtils\explode
static explode( $separator, $subject)
Workalike for explode() with limited memory usage.
Definition: StringUtils.php:326
PPNode
There are three types of nodes:
Definition: PPNode.php:35
Parser\$factory
ParserFactory $factory
Definition: Parser.php:320
Parser\replaceLinkHoldersPrivate
replaceLinkHoldersPrivate(&$text, $options=0)
Replace "<!--LINK-->" link placeholders with actual links, in the buffer Placeholders created in Link...
Definition: Parser.php:4987
LinkHolderArray
Definition: LinkHolderArray.php:33
Parser\__clone
__clone()
Allow extensions to clean up when the parser is cloned.
Definition: Parser.php:512
NS_TEMPLATE
const NS_TEMPLATE
Definition: Defines.php:74
PPFrame\RECOVER_ORIG
const RECOVER_ORIG
Definition: PPFrame.php:36
Linker\makeHeadline
static makeHeadline( $level, $attribs, $anchor, $html, $link, $fallbackAnchor=false)
Create a headline for content.
Definition: Linker.php:1698
Parser\getHookContainer
getHookContainer()
Get a HookContainer capable of returning metadata about hooks or running extension hooks.
Definition: Parser.php:1657
Parser\callParserFunction
callParserFunction(PPFrame $frame, $function, array $args=[])
Call a parser function and return an array with text and flags.
Definition: Parser.php:3354
Parser\extensionSubstitution
extensionSubstitution(array $params, PPFrame $frame)
Return the text to be used for a given extension tag.
Definition: Parser.php:3927
Linker\tocLineEnd
static tocLineEnd()
End a Table Of Contents line.
Definition: Linker.php:1611
$args
if( $line===false) $args
Definition: mcc.php:124
MapCacheLRU
Handles a simple LRU key/value map with a maximum number of entries.
Definition: MapCacheLRU.php:36
Parser\$mLangLinkLanguages
array $mLangLinkLanguages
Array with the language name of each language link (i.e.
Definition: Parser.php:284
Parser\$trackingCategories
TrackingCategories $trackingCategories
Definition: Parser.php:368
Parser\markerSkipCallback
markerSkipCallback( $s, callable $callback)
Call a callback function on all regions of the given text that are not inside strip markers,...
Definition: Parser.php:6221
Parser\limitationWarn
limitationWarn( $limitationType, $current='', $max='')
Warn the user when a parser limitation is reached Will warn at most once the user per limitation type...
Definition: Parser.php:2949
Parser\TOC_END
const TOC_END
Definition: Parser.php:154
MediaWiki\User\UserIdentity\getName
getName()
$title
$title
Definition: testCompression.php:38
Parser\recursiveTagParse
recursiveTagParse( $text, $frame=false)
Half-parse wikitext to half-parsed HTML.
Definition: Parser.php:855
Linker\makeExternalLink
static makeExternalLink( $url, $text, $escape=true, $linktype='', $attribs=[], $title=null)
Make an external link.
Definition: Linker.php:1011
Parser\finalizeHeadings
finalizeHeadings( $text, $origText, $isMain=true)
This function accomplishes several tasks: 1) Auto-number headings if that option is enabled 2) Add an...
Definition: Parser.php:4119
Title\makeTitle
static makeTitle( $ns, $title, $fragment='', $interwiki='')
Create a new Title from a namespace index and a DB key.
Definition: Title.php:650
Parser\$mHighestExpansionDepth
$mHighestExpansionDepth
Definition: Parser.php:225
SectionProfiler
Arbitrary section name based PHP profiling.
Definition: SectionProfiler.php:35
Parser\cleanSig
cleanSig( $text, $parsing=false)
Clean up signature text.
Definition: Parser.php:4721
RequestContext
Group all the pieces relevant to the context of a request into one instance @newable.
Definition: RequestContext.php:41
Parser\$mImageParamsMagicArray
$mImageParamsMagicArray
Definition: Parser.php:163
SFH_OBJECT_ARGS
const SFH_OBJECT_ARGS
Definition: Defines.php:171
Parser\handleInternalLinks
handleInternalLinks( $text)
Process [[ ]] wikilinks.
Definition: Parser.php:2414
Parser\$mTplRedirCache
$mTplRedirCache
Definition: Parser.php:227
Parser\$tidy
TidyDriverBase $tidy
Definition: Parser.php:356
Parser\$mFirstCall
bool $mFirstCall
Whether firstCallInit still needs to be called.
Definition: Parser.php:170
ParserOptions\getPreSaveTransform
getPreSaveTransform()
Transform wiki markup when saving the page?
Definition: ParserOptions.php:658
Parser\getStripState
getStripState()
Definition: Parser.php:1327
Parser\getContentLanguage
getContentLanguage()
Get the content language that this Parser is using.
Definition: Parser.php:1223
Parser\OT_PLAIN
const OT_PLAIN
Definition: Parser.php:130
$wgTitle
$wgTitle
Definition: Setup.php:852
Parser\handleMagicLinks
handleMagicLinks( $text)
Replace special strings like "ISBN xxx" and "RFC xxx" with magic external links.
Definition: Parser.php:1729
Linker\splitTrail
static splitTrail( $trail)
Split a link trail, return the "inside" portion and the remainder of the trail as a two-element array...
Definition: Linker.php:1719
MediaWiki\Tidy\TidyDriverBase
Base class for HTML cleanup utilities.
Definition: TidyDriverBase.php:8
Parser\insertStripItem
insertStripItem( $text)
Add an item to the strip state Returns the unique tag which must be inserted into the stripped text T...
Definition: Parser.php:1340
Parser\getFreshParser
getFreshParser()
Return this parser if it is not doing anything, otherwise get a fresh parser.
Definition: Parser.php:6342
Parser\getRevisionUser
getRevisionUser()
Get the name of the user that edited the last revision.
Definition: Parser.php:5946
Parser\setOptions
setOptions(ParserOptions $options)
Mutator for the ParserOptions object.
Definition: Parser.php:1110
Parser\getImageParams
getImageParams( $handler)
Definition: Parser.php:5180
Parser\$mAutonumber
$mAutonumber
Definition: Parser.php:198
Parser\fuzzTestPst
fuzzTestPst( $text, PageReference $page, ParserOptions $options)
Strip/replaceVariables/unstrip for preprocessor regression testing.
Definition: Parser.php:6184
Parser\replaceLinkHolders
replaceLinkHolders(&$text, $options=0)
Replace "<!--LINK-->" link placeholders with actual links, in the buffer Placeholders created in Link...
Definition: Parser.php:4976
Parser\addTrackingCategory
addTrackingCategory( $msg)
Definition: Parser.php:4098
Parser\getUrlProtocols
getUrlProtocols()
Definition: Parser.php:5589
Parser\incrementIncludeSize
incrementIncludeSize( $type, $size)
Increment an include size counter.
Definition: Parser.php:4019
Parser\getTargetLanguageConverter
getTargetLanguageConverter()
Shorthand for getting a Language Converter for Target language.
Definition: Parser.php:1633
ParserFactory
Definition: ParserFactory.php:37
$content
$content
Definition: router.php:76
CoreParserFunctions\register
static register( $parser)
Definition: CoreParserFunctions.php:38
Parser\makeFreeExternalLink
makeFreeExternalLink( $url, $numPostProto)
Make a free external link, given a user-supplied URL.
Definition: Parser.php:1843
Parser\CONSTRUCTOR_OPTIONS
const CONSTRUCTOR_OPTIONS
Definition: Parser.php:373
$s
foreach( $mmfl['setupFiles'] as $fileName) if( $queue) if(empty( $mmfl['quiet'])) $s
Definition: mergeMessageFileList.php:206
NS_MEDIA
const NS_MEDIA
Definition: Defines.php:52
PPFrame\expand
expand( $root, $flags=0)
Expand a document tree node.
ILanguageConverter
The shared interface for all language converters.
Definition: ILanguageConverter.php:29
$wgNoFollowNsExceptions
$wgNoFollowNsExceptions
Namespaces in which $wgNoFollowLinks doesn't apply.
Definition: DefaultSettings.php:5012
$wgNoFollowLinks
$wgNoFollowLinks
If true, external URL links in wiki text will be given the rel="nofollow" attribute as a hint to sear...
Definition: DefaultSettings.php:5006
Parser\$mOutput
ParserOutput $mOutput
Definition: Parser.php:197
Parser\$mFunctionHooks
$mFunctionHooks
Definition: Parser.php:158
ParserFactory\$inParserFactory
static int $inParserFactory
Track calls to Parser constructor to aid in deprecation of direct Parser invocation.
Definition: ParserFactory.php:91
Parser\$mOptions
ParserOptions null $mOptions
Definition: Parser.php:252
Parser\$mRevisionUser
$mRevisionUser
Definition: Parser.php:270
Message\plaintextParam
static plaintextParam( $plaintext)
Definition: Message.php:1216
Sanitizer\validateTagAttributes
static validateTagAttributes( $attribs, $element)
Take an array of attribute names and values and normalize or discard illegal values for the given ele...
Definition: Sanitizer.php:391
Parser\extractSections
extractSections( $text, $sectionId, $mode, $newText='')
Break wikitext input into sections, and either pull or replace some particular section's text.
Definition: Parser.php:5622
Hooks\runner
static runner()
Get a HookRunner instance for calling hooks using the new interfaces.
Definition: Hooks.php:173
Parser\OT_HTML
const OT_HTML
Definition: Parser.php:125
WANObjectCache
Multi-datacenter aware caching interface.
Definition: WANObjectCache.php:137
Parser\fuzzTestPreprocess
fuzzTestPreprocess( $text, PageReference $page, ParserOptions $options)
Strip/replaceVariables/unstrip for preprocessor regression testing.
Definition: Parser.php:6199
PPFrame
Definition: PPFrame.php:28
$line
$line
Definition: mcc.php:119
Parser\EXT_LINK_URL_CLASS
const EXT_LINK_URL_CLASS
Definition: Parser.php:104
MediaWiki\Linker\LinkTarget\getDBkey
getDBkey()
Get the main part with underscores.
Parser\getUserIdentity
getUserIdentity()
Get a user either from the user set on Parser if it's set, or from the ParserOptions object otherwise...
Definition: Parser.php:1178
Parser\renderImageGallery
renderImageGallery( $text, array $params)
Renders an image gallery from a text with one line per image.
Definition: Parser.php:5016
StringUtils\delimiterExplode
static delimiterExplode( $startDelim, $endDelim, $separator, $subject, $nested=false)
Explode a string, but ignore any instances of the separator inside the given start and end delimiters...
Definition: StringUtils.php:59
OutputPage\setupOOUI
static setupOOUI( $skinName='default', $dir='ltr')
Helper function to setup the PHP implementation of OOUI to use in this request.
Definition: OutputPage.php:4172
Parser\magicLinkCallback
magicLinkCallback(array $m)
Definition: Parser.php:1763
Parser\fetchTemplateAndTitle
fetchTemplateAndTitle(LinkTarget $link)
Fetch the unparsed text of a template and register a reference to it.
Definition: Parser.php:3558
wfEscapeWikiText
wfEscapeWikiText( $text)
Escapes the given text so that it may be output using addWikiText() without any linking,...
Definition: GlobalFunctions.php:1448
Parser\incrementExpensiveFunctionCount
incrementExpensiveFunctionCount()
Definition: Parser.php:4032
Parser\$mImageParams
$mImageParams
Definition: Parser.php:162
Parser\setFunctionHook
setFunctionHook( $id, callable $callback, $flags=0)
Create a function, e.g.
Definition: Parser.php:4927
Parser\setLinkID
setLinkID( $id)
Definition: Parser.php:1138
Sanitizer\cleanUrl
static cleanUrl( $url)
Definition: Sanitizer.php:1628
Parser\__construct
__construct(ServiceOptions $svcOptions, MagicWordFactory $magicWordFactory, Language $contLang, ParserFactory $factory, string $urlProtocols, SpecialPageFactory $spFactory, LinkRendererFactory $linkRendererFactory, NamespaceInfo $nsInfo, LoggerInterface $logger, BadFileLookup $badFileLookup, LanguageConverterFactory $languageConverterFactory, HookContainer $hookContainer, TidyDriverBase $tidy, WANObjectCache $wanCache, UserOptionsLookup $userOptionsLookup, UserFactory $userFactory, TitleFormatter $titleFormatter, HttpRequestFactory $httpRequestFactory, TrackingCategories $trackingCategories)
Constructing parsers directly is not allowed! Use a ParserFactory.
Definition: Parser.php:419
Parser\$magicWordFactory
MagicWordFactory $magicWordFactory
Definition: Parser.php:311
Parser\preprocessToDom
preprocessToDom( $text, $flags=0)
Get the document object model for the given wikitext.
Definition: Parser.php:2873
Parser\setUser
setUser(?UserIdentity $user)
Set the current user.
Definition: Parser.php:992
Parser
PHP Parser - Processes wiki markup (which uses a more user-friendly syntax, such as "[[link]]" for ma...
Definition: Parser.php:92
RequestContext\getMain
static getMain()
Get the RequestContext object associated with the main request.
Definition: RequestContext.php:484
MediaWiki\User\UserOptionsLookup
Provides access to user options.
Definition: UserOptionsLookup.php:29
Title\newFromLinkTarget
static newFromLinkTarget(LinkTarget $linkTarget, $forceClone='')
Returns a Title given a LinkTarget.
Definition: Title.php:294
Parser\getMagicWordFactory
getMagicWordFactory()
Get the MagicWordFactory that this Parser is using.
Definition: Parser.php:1213
Parser\argSubstitution
argSubstitution(array $piece, PPFrame $frame)
Triple brace replacement – used for template arguments.
Definition: Parser.php:3873
Linker\makeMediaLinkFile
static makeMediaLinkFile(LinkTarget $title, $file, $html='')
Create a direct link to a given uploaded file.
Definition: Linker.php:943
Sanitizer\fixTagAttributes
static fixTagAttributes( $text, $element, $sorted=false)
Take a tag soup fragment listing an HTML element's attributes and normalize it to well-formed XML,...
Definition: Sanitizer.php:704
Parser\fetchFileAndTitle
fetchFileAndTitle(LinkTarget $link, array $options=[])
Fetch a file and its title and register a reference to it.
Definition: Parser.php:3751
Preprocessor\resetParser
resetParser(?Parser $parser)
Allows resetting the internal Parser reference after Preprocessor is cloned.
Definition: Preprocessor.php:95
Parser\setOutputType
setOutputType( $ot)
Mutator for the output type.
Definition: Parser.php:1066
$lines
if(!file_exists( $CREDITS)) $lines
Definition: updateCredits.php:45
Parser\OT_PREPROCESS
const OT_PREPROCESS
Definition: Parser.php:127
Parser\getExternalLinkAttribs
getExternalLinkAttribs( $url)
Get an associative array of additional HTML attributes appropriate for a particular external link.
Definition: Parser.php:2243
TrackingCategories
This class performs some operations related to tracking categories, such as creating a list of all su...
Definition: TrackingCategories.php:32
Parser\isCurrentRevisionOfTitleCached
isCurrentRevisionOfTitleCached(LinkTarget $link)
Definition: Parser.php:3519
Parser\statelessFetchTemplate
static statelessFetchTemplate( $page, $parser=false)
Static function to get a template Can be overridden via ParserOptions::setTemplateCallback().
Definition: Parser.php:3604
Parser\getUserSig
getUserSig(UserIdentity $user, $nickname=false, $fancySig=null)
Fetch the user's signature text, if any, and normalize to validated, ready-to-insert wikitext.
Definition: Parser.php:4644
Parser\$mStripState
StripState $mStripState
Definition: Parser.php:203
Parser\internalParse
internalParse( $text, $isMain=true, $frame=false)
Helper function for parse() that transforms wiki markup into half-parsed HTML.
Definition: Parser.php:1563
Parser\validateSig
validateSig( $text)
Check that the user's signature contains no bad XML.
Definition: Parser.php:4706
Parser\$mPPNodeCount
$mPPNodeCount
Definition: Parser.php:218
Title
Represents a title within MediaWiki.
Definition: Title.php:47
Parser\resetOutput
resetOutput()
Reset the ParserOutput.
Definition: Parser.php:602
Parser\stripOuterParagraph
static stripOuterParagraph( $html)
Strip outer.
Definition: Parser.php:6323
Parser\$mVarCache
$mVarCache
Definition: Parser.php:161
Parser\$mDefaultSort
$mDefaultSort
Definition: Parser.php:226
Parser\$mExpensiveFunctionCount
$mExpensiveFunctionCount
Definition: Parser.php:233
Parser\normalizeLinkUrl
static normalizeLinkUrl( $url)
Replace unusual escape codes in a URL with their equivalent characters.
Definition: Parser.php:2274
MediaWiki\Preferences\SignatureValidator
Definition: SignatureValidator.php:41
Parser\$mExtLinkBracketedRegex
$mExtLinkBracketedRegex
Definition: Parser.php:185
wfMatchesDomainList
wfMatchesDomainList( $url, $domains)
Check whether a given URL has a domain that occurs in a given set of domains.
Definition: GlobalFunctions.php:852
Parser\$mIncludeSizes
$mIncludeSizes
Definition: Parser.php:216
$cache
$cache
Definition: mcc.php:33
Parser\$httpRequestFactory
HttpRequestFactory $httpRequestFactory
Definition: Parser.php:365
MalformedTitleException
MalformedTitleException is thrown when a TitleParser is unable to parse a title string.
Definition: MalformedTitleException.php:26
Parser\getSection
getSection( $text, $sectionId, $defaultText='')
This function returns the text of a section, specified by a number ($section).
Definition: Parser.php:5751
Parser\$mUser
UserIdentity $mUser
Definition: Parser.php:243
Xml\isWellFormedXmlFragment
static isWellFormedXmlFragment( $text)
Check if a string is a well-formed XML fragment.
Definition: Xml.php:744
ParserOptions\getUserIdentity
getUserIdentity()
Get the identity of the user for whom the parse is made.
Definition: ParserOptions.php:1004
Parser\preprocess
preprocess( $text, ?PageReference $page, ParserOptions $options, $revid=null, $frame=false)
Expand templates and variables in the text, producing valid, static wikitext.
Definition: Parser.php:923
Parser\$mRevisionTimestamp
$mRevisionTimestamp
Definition: Parser.php:268
Parser\replaceSection
replaceSection( $oldText, $sectionId, $newText)
This function returns $oldtext after the content of the section specified by $section has been replac...
Definition: Parser.php:5768
Sanitizer\ID_PRIMARY
const ID_PRIMARY
Tells escapeUrlForHtml() to encode the ID using the wiki's primary encoding.
Definition: Sanitizer.php:71
Parser\$logger
LoggerInterface $logger
Definition: Parser.php:344
TitleFormatter
A title formatter service for MediaWiki.
Definition: TitleFormatter.php:35
PPFrame\virtualBracketedImplode
virtualBracketedImplode( $start, $sep, $end,... $params)
Virtual implode with brackets.
Parser\armorLinks
armorLinks( $text)
Insert a NOPARSE hacky thing into any inline links in a chunk that's going to go through further pars...
Definition: Parser.php:2767
Linker\tocUnindent
static tocUnindent( $level)
Finish one or more sublevels on the Table of Contents.
Definition: Linker.php:1572
Linker\makeImageLink
static makeImageLink(Parser $parser, LinkTarget $title, $file, $frameParams=[], $handlerParams=[], $time=false, $query="", $widthOption=null)
Given parameters derived from [[Image:Foo|options...]], generate the HTML that that syntax inserts in...
Definition: Linker.php:296
getTitle
getTitle()
Definition: RevisionSearchResultTrait.php:81
Parser\parse
parse( $text, PageReference $page, ParserOptions $options, $linestart=true, $clearState=true, $revid=null)
Convert wikitext to HTML Do not call this function recursively.
Definition: Parser.php:625
MediaWiki\Cache\CacheKeyHelper
Helper class for mapping value objects representing basic entities to cache keys.
Definition: CacheKeyHelper.php:43
Parser\getBadFileLookup
getBadFileLookup()
Get the BadFileLookup instance that this Parser is using.
Definition: Parser.php:1233
NS_CATEGORY
const NS_CATEGORY
Definition: Defines.php:78
Parser\getOutput
getOutput()
Definition: Parser.php:1093
StringUtils\delimiterReplace
static delimiterReplace( $startDelim, $endDelim, $replace, $subject, $flags='')
Perform an operation equivalent to preg_replace() with flags.
Definition: StringUtils.php:248
Parser\handleInternalLinks2
handleInternalLinks2(&$s)
Process [[ ]] wikilinks (RIL)
Definition: Parser.php:2424
Parser\getOutputType
getOutputType()
Accessor for the output type.
Definition: Parser.php:1057
Parser\$mGeneratedPPNodeCount
$mGeneratedPPNodeCount
Definition: Parser.php:223
Parser\getHookRunner
getHookRunner()
Get a HookRunner for calling core hooks.
Definition: Parser.php:1669
PPFrame\getArgument
getArgument( $name)
Get an argument to this frame by name.
TextContent\normalizeLineEndings
static normalizeLineEndings( $text)
Do a "\\r\\n" -> "\\n" and "\\r" -> "\\n" transformation as well as trim trailing whitespace.
Definition: TextContent.php:203
Parser\getSectionNameFromStrippedText
static getSectionNameFromStrippedText( $text)
Definition: Parser.php:6027
Sanitizer\escapeIdForLink
static escapeIdForLink( $id)
Given a section name or other user-generated or otherwise unsafe string, escapes it to be a valid URL...
Definition: Sanitizer.php:839
Linker\normalizeSubpageLink
static normalizeSubpageLink( $contextTitle, $target, &$text)
Definition: Linker.php:1411
MediaWiki\Config\ServiceOptions\get
get( $key)
Definition: ServiceOptions.php:93
MediaWiki\HookContainer\HookContainer
HookContainer class.
Definition: HookContainer.php:45
Title\castFromPageReference
static castFromPageReference(?PageReference $pageReference)
Return a Title for a given Reference.
Definition: Title.php:344
Parser\SFH_NO_HASH
const SFH_NO_HASH
Definition: Parser.php:95
CoreMagicVariables\expand
static expand(Parser $parser, string $id, int $ts, NamespaceInfo $nsInfo, ServiceOptions $svcOptions, LoggerInterface $logger)
Expand the magic variable given by $index.
Definition: CoreMagicVariables.php:49
Parser\$mShowToc
$mShowToc
Definition: Parser.php:235
ImageGalleryBase\factory
static factory( $mode=false, IContextSource $context=null)
Get a new image gallery.
Definition: ImageGalleryBase.php:116
Sanitizer\decodeTagAttributes
static decodeTagAttributes( $text)
Return an associative array of attribute names and values from a partial tag string.
Definition: Sanitizer.php:1005
Parser\guessSectionNameFromStrippedText
static guessSectionNameFromStrippedText( $text)
Like guessSectionNameFromWikiText(), but takes already-stripped text as input.
Definition: Parser.php:6089
Parser\getTemplateDom
getTemplateDom(LinkTarget $title)
Get the semi-parsed DOM representation of a template with a given title, and its redirect destination...
Definition: Parser.php:3444
Parser\$languageConverterFactory
LanguageConverterFactory $languageConverterFactory
Definition: Parser.php:317
NamespaceInfo
This is a utility class for dealing with namespaces that encodes all the "magic" behaviors of them ba...
Definition: NamespaceInfo.php:35
PPFrame\isTemplate
isTemplate()
Return true if the frame is a template frame.
MediaWiki\HookContainer\HookRunner
This class provides an implementation of the core hook interfaces, forwarding hook calls to HookConta...
Definition: HookRunner.php:557
Parser\parseLinkParameter
parseLinkParameter( $value)
Parse the value of 'link' parameter in image syntax ([[File:Foo.jpg|link=<value>]]).
Definition: Parser.php:5468
$t
$t
Definition: testCompression.php:74
Title\castFromLinkTarget
static castFromLinkTarget( $linkTarget)
Same as newFromLinkTarget, but if passed null, returns null.
Definition: Title.php:318
Title\legalChars
static legalChars()
Get a regex character class describing the legal characters in a link.
Definition: Title.php:736
Sanitizer\decodeCharReferences
static decodeCharReferences( $text)
Decode any character references, numeric or named entities, in the text and return a UTF-8 string.
Definition: Sanitizer.php:1229
Parser\getRevisionTimestamp
getRevisionTimestamp()
Get the timestamp associated with the current revision, adjusted for the default server-local timesta...
Definition: Parser.php:5920
Parser\expandMagicVariable
expandMagicVariable( $index, $frame=false)
Return value of a magic variable (like PAGENAME)
Definition: Parser.php:2794
NS_FILE
const NS_FILE
Definition: Defines.php:70
MediaWiki\Linker\LinkTarget
Definition: LinkTarget.php:26
Parser\pstPass2
pstPass2( $text, UserIdentity $user)
Pre-save transform helper function.
Definition: Parser.php:4567
Parser\$mPreprocessor
Preprocessor $mPreprocessor
Definition: Parser.php:191
Parser\parseWidthParam
static parseWidthParam( $value, $parseHeight=true)
Parsed a width param of imagelink like 300px or 200x300px.
Definition: Parser.php:6266
RawMessage
Variant of the Message class.
Definition: RawMessage.php:35
Parser\cleanSigInSig
static cleanSigInSig( $text)
Strip 3, 4 or 5 tildes out of signatures.
Definition: Parser.php:4764
Parser\setTitle
setTitle(Title $t=null)
Set the context title.
Definition: Parser.php:1003
User
The User object encapsulates all of the user-specific settings (user_id, name, rights,...
Definition: User.php:68
Parser\replaceLinkHoldersText
replaceLinkHoldersText( $text)
Replace "<!--LINK-->" link placeholders with plain text of links (not HTML-formatted).
Definition: Parser.php:4998
Parser\normalizeUrlComponent
static normalizeUrlComponent( $component, $unsafe)
Definition: Parser.php:2333
Parser\clearTagHooks
clearTagHooks()
Remove all tag hooks.
Definition: Parser.php:4878
MWTimestamp\getLocalInstance
static getLocalInstance( $ts=false)
Get a timestamp instance in the server local timezone ($wgLocaltimezone)
Definition: MWTimestamp.php:173
OT_WIKI
const OT_WIKI
Definition: Defines.php:158
MediaWiki\User\UserFactory
Creates User objects.
Definition: UserFactory.php:41
Linker\makeExternalImage
static makeExternalImage( $url, $alt='')
Return the code for images which were added via external links, via Parser::maybeMakeExternalImage().
Definition: Linker.php:240
Parser\$mTitle
Title null $mTitle
Since 1.34, leaving mTitle uninitialized or setting mTitle to null is deprecated.
Definition: Parser.php:261
Parser\getLinkRenderer
getLinkRenderer()
Get a LinkRenderer instance to make links with.
Definition: Parser.php:1198
Language
Internationalisation code See https://www.mediawiki.org/wiki/Special:MyLanguage/Localisation for more...
Definition: Language.php:42
Parser\$mRevisionId
$mRevisionId
Definition: Parser.php:266
RequestContext\setTitle
setTitle(Title $title=null)
Definition: RequestContext.php:173
Parser\setPage
setPage(?PageReference $t=null)
Set the page used as context for parsing, e.g.
Definition: Parser.php:1025
Parser\$mRevisionSize
$mRevisionSize
Definition: Parser.php:272
Parser\getRevisionId
getRevisionId()
Get the ID of the revision we are parsing.
Definition: Parser.php:5855
Parser\fetchCurrentRevisionRecordOfTitle
fetchCurrentRevisionRecordOfTitle(LinkTarget $link)
Fetch the current revision of a given title as a RevisionRecord.
Definition: Parser.php:3489
MediaWiki\Revision\SlotRecord
Value object representing a content slot associated with a page revision.
Definition: SlotRecord.php:40
OT_PLAIN
const OT_PLAIN
Definition: Defines.php:161
Parser\clearState
clearState()
Clear Parser state.
Definition: Parser.php:554
Parser\guessLegacySectionNameFromWikiText
guessLegacySectionNameFromWikiText( $text)
Same as guessSectionNameFromWikiText(), but produces legacy anchors instead, if possible.
Definition: Parser.php:6076
MediaWiki\Config\ServiceOptions\assertRequiredOptions
assertRequiredOptions(array $expectedKeys)
Assert that the list of options provided in this instance exactly match $expectedKeys,...
Definition: ServiceOptions.php:71
ParserOptions\newFromUser
static newFromUser( $user)
Get a ParserOptions object from a given user.
Definition: ParserOptions.php:1044
Parser\stripSectionName
stripSectionName( $text)
Strips a text string of wikitext for use in a section anchor.
Definition: Parser.php:6129
Parser\EXT_IMAGE_REGEX
const EXT_IMAGE_REGEX
Definition: Parser.php:111
Parser\getPreprocessor
getPreprocessor()
Get a preprocessor object.
Definition: Parser.php:1188
Parser\doBlockLevels
doBlockLevels( $text, $linestart)
Make lists from lines starting with ':', '*', '#', etc.
Definition: Parser.php:2781
Parser\$hookContainer
HookContainer $hookContainer
Definition: Parser.php:350
$type
$type
Definition: testCompression.php:52