MediaWiki  master
Parser.php
Go to the documentation of this file.
1 <?php
47 use Psr\Log\LoggerInterface;
48 use Wikimedia\IPUtils;
49 use Wikimedia\ScopedCallback;
50 
91 class Parser {
92 
93  # Flags for Parser::setFunctionHook
94  public const SFH_NO_HASH = 1;
95  public const SFH_OBJECT_ARGS = 2;
96 
97  # Constants needed for external link processing
98  # Everything except bracket, space, or control characters
99  # \p{Zs} is unicode 'separator, space' category. It covers the space 0x20
100  # as well as U+3000 is IDEOGRAPHIC SPACE for T21052
101  # \x{FFFD} is the Unicode replacement character, which the HTML5 spec
102  # uses to replace invalid HTML characters.
103  public const EXT_LINK_URL_CLASS = '[^][<>"\\x00-\\x20\\x7F\p{Zs}\x{FFFD}]';
104  # Simplified expression to match an IPv4 or IPv6 address, or
105  # at least one character of a host name (embeds EXT_LINK_URL_CLASS)
106  // phpcs:ignore Generic.Files.LineLength
107  private const EXT_LINK_ADDR = '(?:[0-9.]+|\\[(?i:[0-9a-f:.]+)\\]|[^][<>"\\x00-\\x20\\x7F\p{Zs}\x{FFFD}])';
108  # RegExp to make image URLs (embeds IPv6 part of EXT_LINK_ADDR)
109  // phpcs:ignore Generic.Files.LineLength
110  private const EXT_IMAGE_REGEX = '/^(http:\/\/|https:\/\/)((?:\\[(?i:[0-9a-f:.]+)\\])?[^][<>"\\x00-\\x20\\x7F\p{Zs}\x{FFFD}]+)
111  \\/([A-Za-z0-9_.,~%\\-+&;#*?!=()@\\x80-\\xFF]+)\\.((?i)gif|png|jpg|jpeg)$/Sxu';
112 
113  # Regular expression for a non-newline space
114  private const SPACE_NOT_NL = '(?:\t|&nbsp;|&\#0*160;|&\#[Xx]0*[Aa]0;|\p{Zs})';
115 
120  public const PTD_FOR_INCLUSION = Preprocessor::DOM_FOR_INCLUSION;
121 
122  # Allowed values for $this->mOutputType
123  # Parameter to startExternalParse().
124  public const OT_HTML = 1; # like parse()
125  public const OT_WIKI = 2; # like preSaveTransform()
126  public const OT_PREPROCESS = 3; # like preprocess()
127  public const OT_MSG = 3;
128  # like extractSections() - portions of the original are returned unchanged.
129  public const OT_PLAIN = 4;
130 
148  public const MARKER_SUFFIX = "-QINU`\"'\x7f";
149  public const MARKER_PREFIX = "\x7f'\"`UNIQ-";
150 
163  public const TOC_START = '<mw:toc>';
164 
170  public const TOC_END = '</mw:toc>';
171 
189  public const TOC_PLACEHOLDER = '<mw:tocplace></mw:tocplace>';
190 
191  # Persistent:
192  private $mTagHooks = [];
193  private $mFunctionHooks = [];
194  private $mFunctionSynonyms = [ 0 => [], 1 => [] ];
195  private $mStripList = [];
196  private $mVarCache = [];
197  private $mImageParams = [];
200  public $mMarkerIndex = 0;
205  public $mFirstCall = false;
206 
207  # Initialised by initializeVariables()
208 
212  private $mVariables;
213 
217  private $mSubstWords;
218 
219  # Initialised in constructor
221 
222  # Initialized in constructor
223 
226  private $mPreprocessor;
227 
228  # Cleared with clearState():
229 
232  private $mOutput;
233  private $mAutonumber;
234 
238  private $mStripState;
239 
243  private $mLinkHolders;
244 
249  public $mLinkID;
261  private $mDefaultSort;
264  public $mHeadings;
268  public $mExpensiveFunctionCount; # number of expensive parser function calls
270  public $mShowToc;
273  private $mTplDomCache;
274 
278  private $mUser;
279 
280  # Temporary
281  # These are variables reset at least once per parse regardless of $clearState
282 
287  public $mOptions;
288 
296  public $mTitle; # Title context, used for self-link rendering and similar things
297  private $mOutputType; # Output type, one of the OT_xxx constants
299  public $ot; # Shortcut alias, see setOutputType()
301  public $mRevisionId; # ID to display in {{REVISIONID}} tags
302 
303  public $mRevisionTimestamp; # The timestamp of the specified revision ID
305  public $mRevisionUser; # User to display in {{REVISIONUSER}} tag
307  public $mRevisionSize; # Size to display in {{REVISIONSIZE}} variable
309  public $mInputSize = false; # For {{PAGESIZE}} on current page.
310 
313 
320 
328 
335  public $mInParse = false;
336 
338  private $mProfiler;
339 
343  private $mLinkRenderer;
344 
347 
349  private $contLang;
350 
353 
355  private $factory;
356 
359 
362 
370  private $svcOptions;
371 
374 
376  private $nsInfo;
377 
379  private $logger;
380 
382  private $badFileLookup;
383 
385  private $hookContainer;
386 
388  private $hookRunner;
389 
391  private $tidy;
392 
395 
397  private $userFactory;
398 
401 
404 
408  public const CONSTRUCTOR_OPTIONS = [
409  // See documentation for the corresponding config options
410  'ArticlePath',
411  'EnableScaryTranscluding',
412  'ExtraInterlanguageLinkPrefixes',
413  'FragmentMode',
414  'MaxSigChars',
415  'MaxTocLevel',
416  'MiserMode',
417  'ScriptPath',
418  'Server',
419  'ServerName',
420  'ShowHostnames',
421  'SignatureValidation',
422  'Sitename',
423  'StylePath',
424  'TranscludeCacheExpiry',
425  'PreprocessorCacheThreshold',
426  ];
427 
452  public function __construct(
457  string $urlProtocols,
458  SpecialPageFactory $spFactory,
461  LoggerInterface $logger,
466  WANObjectCache $wanCache,
472  ) {
473  if ( ParserFactory::$inParserFactory === 0 ) {
474  // Direct construction of Parser was deprecated in 1.34 and
475  // removed in 1.36; use a ParserFactory instead.
476  throw new MWException( 'Direct construction of Parser not allowed' );
477  }
478  $svcOptions->assertRequiredOptions( self::CONSTRUCTOR_OPTIONS );
479  $this->svcOptions = $svcOptions;
480 
481  $this->mUrlProtocols = $urlProtocols;
482  $this->mExtLinkBracketedRegex = '/\[(((?i)' . $this->mUrlProtocols . ')' .
483  self::EXT_LINK_ADDR .
484  self::EXT_LINK_URL_CLASS . '*)\p{Zs}*([^\]\\x00-\\x08\\x0a-\\x1F\\x{FFFD}]*?)\]/Su';
485 
486  $this->magicWordFactory = $magicWordFactory;
487 
488  $this->contLang = $contLang;
489 
490  $this->factory = $factory;
491  $this->specialPageFactory = $spFactory;
492  $this->linkRendererFactory = $linkRendererFactory;
493  $this->nsInfo = $nsInfo;
494  $this->logger = $logger;
495  $this->badFileLookup = $badFileLookup;
496 
497  $this->languageConverterFactory = $languageConverterFactory;
498 
499  $this->hookContainer = $hookContainer;
500  $this->hookRunner = new HookRunner( $hookContainer );
501 
502  $this->tidy = $tidy;
503 
504  $this->mPreprocessor = new Preprocessor_Hash(
505  $this,
506  $wanCache,
507  [
508  'cacheThreshold' => $svcOptions->get( 'PreprocessorCacheThreshold' ),
509  'disableLangConversion' => $languageConverterFactory->isConversionDisabled(),
510  ]
511  );
512 
513  $this->userOptionsLookup = $userOptionsLookup;
514  $this->userFactory = $userFactory;
515  $this->titleFormatter = $titleFormatter;
516  $this->httpRequestFactory = $httpRequestFactory;
517  $this->trackingCategories = $trackingCategories;
518 
519  // These steps used to be done in "::firstCallInit()"
520  // (if you're chasing a reference from some old code)
522  CoreTagHooks::register( $this );
523  $this->initializeVariables();
524 
525  $this->hookRunner->onParserFirstCallInit( $this );
526  }
527 
531  public function __destruct() {
532  if ( isset( $this->mLinkHolders ) ) {
533  // @phan-suppress-next-line PhanTypeObjectUnsetDeclaredProperty
534  unset( $this->mLinkHolders );
535  }
536  // @phan-suppress-next-line PhanTypeSuspiciousNonTraversableForeach
537  foreach ( $this as $name => $value ) {
538  unset( $this->$name );
539  }
540  }
541 
545  public function __clone() {
546  $this->mInParse = false;
547 
548  // T58226: When you create a reference "to" an object field, that
549  // makes the object field itself be a reference too (until the other
550  // reference goes out of scope). When cloning, any field that's a
551  // reference is copied as a reference in the new object. Both of these
552  // are defined PHP5 behaviors, as inconvenient as it is for us when old
553  // hooks from PHP4 days are passing fields by reference.
554  foreach ( [ 'mStripState', 'mVarCache' ] as $k ) {
555  // Make a non-reference copy of the field, then rebind the field to
556  // reference the new copy.
557  $tmp = $this->$k;
558  $this->$k =& $tmp;
559  unset( $tmp );
560  }
561 
562  $this->mPreprocessor = clone $this->mPreprocessor;
563  $this->mPreprocessor->resetParser( $this );
564 
565  $this->hookRunner->onParserCloned( $this );
566  }
567 
575  public function firstCallInit() {
576  /*
577  * This method should be hard-deprecated once remaining calls are
578  * removed; it no longer does anything.
579  */
580  }
581 
587  public function clearState() {
588  $this->resetOutput();
589  $this->mAutonumber = 0;
590  $this->mLinkHolders = new LinkHolderArray(
591  $this,
593  $this->getHookContainer()
594  );
595  $this->mLinkID = 0;
596  $this->mRevisionTimestamp = null;
597  $this->mRevisionId = null;
598  $this->mRevisionUser = null;
599  $this->mRevisionSize = null;
600  $this->mRevisionRecordObject = null;
601  $this->mVarCache = [];
602  $this->mUser = null;
603  $this->mLangLinkLanguages = [];
604  $this->currentRevisionCache = null;
605 
606  $this->mStripState = new StripState( $this );
607 
608  # Clear these on every parse, T6549
609  $this->mTplRedirCache = [];
610  $this->mTplDomCache = [];
611 
612  $this->mShowToc = true;
613  $this->mForceTocPosition = false;
614  $this->mIncludeSizes = [
615  'post-expand' => 0,
616  'arg' => 0,
617  ];
618  $this->mPPNodeCount = 0;
619  $this->mGeneratedPPNodeCount = 0;
620  $this->mHighestExpansionDepth = 0;
621  $this->mDefaultSort = false;
622  $this->mHeadings = [];
623  $this->mDoubleUnderscores = [];
624  $this->mExpensiveFunctionCount = 0;
625 
626  $this->mProfiler = new SectionProfiler();
627 
628  $this->hookRunner->onParserClearState( $this );
629  }
630 
635  public function resetOutput() {
636  $this->mOutput = new ParserOutput;
637  $this->mOptions->registerWatcher( [ $this->mOutput, 'recordOption' ] );
638  }
639 
658  public function parse(
659  $text, PageReference $page, ParserOptions $options,
660  $linestart = true, $clearState = true, $revid = null
661  ) {
662  if ( $clearState ) {
663  // We use U+007F DELETE to construct strip markers, so we have to make
664  // sure that this character does not occur in the input text.
665  $text = strtr( $text, "\x7f", "?" );
666  $magicScopeVariable = $this->lock();
667  }
668  // Strip U+0000 NULL (T159174)
669  $text = str_replace( "\000", '', $text );
670 
671  $this->startParse( $page, $options, self::OT_HTML, $clearState );
672 
673  $this->currentRevisionCache = null;
674  $this->mInputSize = strlen( $text );
675  $this->mOutput->resetParseStartTime();
676 
677  $oldRevisionId = $this->mRevisionId;
678  $oldRevisionRecordObject = $this->mRevisionRecordObject;
679  $oldRevisionTimestamp = $this->mRevisionTimestamp;
680  $oldRevisionUser = $this->mRevisionUser;
681  $oldRevisionSize = $this->mRevisionSize;
682  if ( $revid !== null ) {
683  $this->mRevisionId = $revid;
684  $this->mRevisionRecordObject = null;
685  $this->mRevisionTimestamp = null;
686  $this->mRevisionUser = null;
687  $this->mRevisionSize = null;
688  }
689 
690  $text = $this->internalParse( $text );
691  $this->hookRunner->onParserAfterParse( $this, $text, $this->mStripState );
692 
693  $text = $this->internalParseHalfParsed( $text, true, $linestart );
694 
702  if ( !$options->getDisableTitleConversion()
703  && !isset( $this->mDoubleUnderscores['nocontentconvert'] )
704  && !isset( $this->mDoubleUnderscores['notitleconvert'] )
705  && $this->mOutput->getDisplayTitle() === false
706  ) {
707  $titleText = $this->getTargetLanguageConverter()->getConvRuleTitle();
708  if ( $titleText === false ) {
709  $titleText = $this->getTargetLanguageConverter()->convertTitle( $page );
710  }
711  $this->mOutput->setTitleText(
712  htmlspecialchars( $titleText, ENT_NOQUOTES )
713  );
714  }
715 
716  # Compute runtime adaptive expiry if set
717  $this->mOutput->finalizeAdaptiveCacheExpiry();
718 
719  # Warn if too many heavyweight parser functions were used
720  if ( $this->mExpensiveFunctionCount > $this->mOptions->getExpensiveParserFunctionLimit() ) {
721  $this->limitationWarn( 'expensive-parserfunction',
722  $this->mExpensiveFunctionCount,
723  $this->mOptions->getExpensiveParserFunctionLimit()
724  );
725  }
726 
727  # Information on limits, for the benefit of users who try to skirt them
728  if ( MediaWikiServices::getInstance()->getMainConfig()->get( 'EnableParserLimitReporting' ) ) {
729  $this->makeLimitReport();
730  }
731 
732  # Wrap non-interface parser output in a <div> so it can be targeted
733  # with CSS (T37247)
734  $class = $this->mOptions->getWrapOutputClass();
735  if ( $class !== false && !$this->mOptions->getInterfaceMessage() ) {
736  $this->mOutput->addWrapperDivClass( $class );
737  }
738 
739  $this->mOutput->setText( $text );
740 
741  $this->mRevisionId = $oldRevisionId;
742  $this->mRevisionRecordObject = $oldRevisionRecordObject;
743  $this->mRevisionTimestamp = $oldRevisionTimestamp;
744  $this->mRevisionUser = $oldRevisionUser;
745  $this->mRevisionSize = $oldRevisionSize;
746  $this->mInputSize = false;
747  $this->currentRevisionCache = null;
748 
749  return $this->mOutput;
750  }
751 
755  protected function makeLimitReport() {
756  $maxIncludeSize = $this->mOptions->getMaxIncludeSize();
757 
758  $cpuTime = $this->mOutput->getTimeSinceStart( 'cpu' );
759  if ( $cpuTime !== null ) {
760  $this->mOutput->setLimitReportData( 'limitreport-cputime',
761  sprintf( "%.3f", $cpuTime )
762  );
763  }
764 
765  $wallTime = $this->mOutput->getTimeSinceStart( 'wall' );
766  $this->mOutput->setLimitReportData( 'limitreport-walltime',
767  sprintf( "%.3f", $wallTime )
768  );
769 
770  $this->mOutput->setLimitReportData( 'limitreport-ppvisitednodes',
771  [ $this->mPPNodeCount, $this->mOptions->getMaxPPNodeCount() ]
772  );
773  $this->mOutput->setLimitReportData( 'limitreport-postexpandincludesize',
774  [ $this->mIncludeSizes['post-expand'], $maxIncludeSize ]
775  );
776  $this->mOutput->setLimitReportData( 'limitreport-templateargumentsize',
777  [ $this->mIncludeSizes['arg'], $maxIncludeSize ]
778  );
779  $this->mOutput->setLimitReportData( 'limitreport-expansiondepth',
780  [ $this->mHighestExpansionDepth, $this->mOptions->getMaxPPExpandDepth() ]
781  );
782  $this->mOutput->setLimitReportData( 'limitreport-expensivefunctioncount',
783  [ $this->mExpensiveFunctionCount, $this->mOptions->getExpensiveParserFunctionLimit() ]
784  );
785 
786  foreach ( $this->mStripState->getLimitReport() as list( $key, $value ) ) {
787  $this->mOutput->setLimitReportData( $key, $value );
788  }
789 
790  $this->hookRunner->onParserLimitReportPrepare( $this, $this->mOutput );
791 
792  // Add on template profiling data in human/machine readable way
793  $dataByFunc = $this->mProfiler->getFunctionStats();
794  uasort( $dataByFunc, static function ( $a, $b ) {
795  return $b['real'] <=> $a['real']; // descending order
796  } );
797  $profileReport = [];
798  foreach ( array_slice( $dataByFunc, 0, 10 ) as $item ) {
799  $profileReport[] = sprintf( "%6.2f%% %8.3f %6d %s",
800  $item['%real'], $item['real'], $item['calls'],
801  htmlspecialchars( $item['name'] ) );
802  }
803 
804  $this->mOutput->setLimitReportData( 'limitreport-timingprofile', $profileReport );
805 
806  // Add other cache related metadata
807  if ( $this->svcOptions->get( 'ShowHostnames' ) ) {
808  $this->mOutput->setLimitReportData( 'cachereport-origin', wfHostname() );
809  }
810  $this->mOutput->setLimitReportData( 'cachereport-timestamp',
811  $this->mOutput->getCacheTime() );
812  $this->mOutput->setLimitReportData( 'cachereport-ttl',
813  $this->mOutput->getCacheExpiry() );
814  $this->mOutput->setLimitReportData( 'cachereport-transientcontent',
815  $this->mOutput->hasReducedExpiry() );
816  }
817 
843  public function recursiveTagParse( $text, $frame = false ) {
844  $text = $this->internalParse( $text, false, $frame );
845  return $text;
846  }
847 
867  public function recursiveTagParseFully( $text, $frame = false ) {
868  $text = $this->recursiveTagParse( $text, $frame );
869  $text = $this->internalParseHalfParsed( $text, false );
870  return $text;
871  }
872 
892  public function parseExtensionTagAsTopLevelDoc( $text ) {
893  $text = $this->recursiveTagParse( $text );
894  $this->hookRunner->onParserAfterParse( $this, $text, $this->mStripState );
895  $text = $this->internalParseHalfParsed( $text, true );
896  return $text;
897  }
898 
911  public function preprocess(
912  $text,
913  ?PageReference $page,
914  ParserOptions $options,
915  $revid = null,
916  $frame = false
917  ) {
918  $magicScopeVariable = $this->lock();
919  $this->startParse( $page, $options, self::OT_PREPROCESS, true );
920  if ( $revid !== null ) {
921  $this->mRevisionId = $revid;
922  }
923  $this->hookRunner->onParserBeforePreprocess( $this, $text, $this->mStripState );
924  $text = $this->replaceVariables( $text, $frame );
925  $text = $this->mStripState->unstripBoth( $text );
926  return $text;
927  }
928 
938  public function recursivePreprocess( $text, $frame = false ) {
939  $text = $this->replaceVariables( $text, $frame );
940  $text = $this->mStripState->unstripBoth( $text );
941  return $text;
942  }
943 
958  public function getPreloadText( $text, PageReference $page, ParserOptions $options, $params = [] ) {
959  $msg = new RawMessage( $text );
960  $text = $msg->params( $params )->plain();
961 
962  # Parser (re)initialisation
963  $magicScopeVariable = $this->lock();
964  $this->startParse( $page, $options, self::OT_PLAIN, true );
965 
967  $dom = $this->preprocessToDom( $text, Preprocessor::DOM_FOR_INCLUSION );
968  $text = $this->getPreprocessor()->newFrame()->expand( $dom, $flags );
969  $text = $this->mStripState->unstripBoth( $text );
970  return $text;
971  }
972 
980  public function setUser( ?UserIdentity $user ) {
981  $this->mUser = $user;
982  }
983 
991  public function setTitle( Title $t = null ) {
992  $this->setPage( $t );
993  }
994 
1000  public function getTitle(): Title {
1001  if ( !$this->mTitle ) {
1002  $this->mTitle = Title::makeTitle( NS_SPECIAL, 'Badtitle/Parser' );
1003  }
1004  return $this->mTitle;
1005  }
1006 
1013  public function setPage( ?PageReference $t = null ) {
1014  if ( !$t ) {
1015  $t = Title::makeTitle( NS_SPECIAL, 'Badtitle/Parser' );
1016  } else {
1017  // For now (early 1.37 alpha), always convert to Title, so we don't have to do it over
1018  // and over again in other methods. Eventually, we will no longer need to have a Title
1019  // instance internally.
1021  }
1022 
1023  if ( $t->hasFragment() ) {
1024  # Strip the fragment to avoid various odd effects
1025  $this->mTitle = $t->createFragmentTarget( '' );
1026  } else {
1027  $this->mTitle = $t;
1028  }
1029  }
1030 
1036  public function getPage(): ?PageReference {
1037  return $this->mTitle;
1038  }
1039 
1045  public function getOutputType(): int {
1046  return $this->mOutputType;
1047  }
1048 
1054  public function setOutputType( $ot ): void {
1055  $this->mOutputType = $ot;
1056  # Shortcut alias
1057  $this->ot = [
1058  'html' => $ot == self::OT_HTML,
1059  'wiki' => $ot == self::OT_WIKI,
1060  'pre' => $ot == self::OT_PREPROCESS,
1061  'plain' => $ot == self::OT_PLAIN,
1062  ];
1063  }
1064 
1072  public function OutputType( $x = null ) {
1073  wfDeprecated( __METHOD__, '1.35' );
1074  return wfSetVar( $this->mOutputType, $x );
1075  }
1076 
1081  public function getOutput() {
1082  return $this->mOutput;
1083  }
1084 
1089  public function getOptions() {
1090  return $this->mOptions;
1091  }
1092 
1098  public function setOptions( ParserOptions $options ): void {
1099  $this->mOptions = $options;
1100  }
1101 
1109  public function Options( $x = null ) {
1110  wfDeprecated( __METHOD__, '1.35' );
1111  return wfSetVar( $this->mOptions, $x );
1112  }
1113 
1118  public function nextLinkID() {
1119  return $this->mLinkID++;
1120  }
1121 
1126  public function setLinkID( $id ) {
1127  $this->mLinkID = $id;
1128  }
1129 
1135  public function getFunctionLang() {
1136  return $this->getTargetLanguage();
1137  }
1138 
1147  public function getTargetLanguage() {
1148  $target = $this->mOptions->getTargetLanguage();
1149 
1150  if ( $target !== null ) {
1151  return $target;
1152  } elseif ( $this->mOptions->getInterfaceMessage() ) {
1153  return $this->mOptions->getUserLangObj();
1154  }
1155 
1156  return $this->getTitle()->getPageLanguage();
1157  }
1158 
1166  public function getUserIdentity(): UserIdentity {
1167  return $this->mUser ?? $this->getOptions()->getUserIdentity();
1168  }
1169 
1176  public function getPreprocessor() {
1177  return $this->mPreprocessor;
1178  }
1179 
1186  public function getLinkRenderer() {
1187  // XXX We make the LinkRenderer with current options and then cache it forever
1188  if ( !$this->mLinkRenderer ) {
1189  $this->mLinkRenderer = $this->linkRendererFactory->create();
1190  }
1191 
1192  return $this->mLinkRenderer;
1193  }
1194 
1201  public function getMagicWordFactory() {
1202  return $this->magicWordFactory;
1203  }
1204 
1211  public function getContentLanguage() {
1212  return $this->contLang;
1213  }
1214 
1221  public function getBadFileLookup() {
1222  return $this->badFileLookup;
1223  }
1224 
1244  public static function extractTagsAndParams( array $elements, $text, &$matches ) {
1245  static $n = 1;
1246  $stripped = '';
1247  $matches = [];
1248 
1249  $taglist = implode( '|', $elements );
1250  $start = "/<($taglist)(\\s+[^>]*?|\\s*?)(\/?>)|<(!--)/i";
1251 
1252  while ( $text != '' ) {
1253  $p = preg_split( $start, $text, 2, PREG_SPLIT_DELIM_CAPTURE );
1254  $stripped .= $p[0];
1255  if ( count( $p ) < 5 ) {
1256  break;
1257  }
1258  if ( count( $p ) > 5 ) {
1259  # comment
1260  $element = $p[4];
1261  $attributes = '';
1262  $close = '';
1263  $inside = $p[5];
1264  } else {
1265  # tag
1266  list( , $element, $attributes, $close, $inside ) = $p;
1267  }
1268 
1269  $marker = self::MARKER_PREFIX . "-$element-" . sprintf( '%08X', $n++ ) . self::MARKER_SUFFIX;
1270  $stripped .= $marker;
1271 
1272  if ( $close === '/>' ) {
1273  # Empty element tag, <tag />
1274  $content = null;
1275  $text = $inside;
1276  $tail = null;
1277  } else {
1278  if ( $element === '!--' ) {
1279  $end = '/(-->)/';
1280  } else {
1281  $end = "/(<\\/$element\\s*>)/i";
1282  }
1283  $q = preg_split( $end, $inside, 2, PREG_SPLIT_DELIM_CAPTURE );
1284  $content = $q[0];
1285  if ( count( $q ) < 3 ) {
1286  # No end tag -- let it run out to the end of the text.
1287  $tail = '';
1288  $text = '';
1289  } else {
1290  list( , $tail, $text ) = $q;
1291  }
1292  }
1293 
1294  $matches[$marker] = [ $element,
1295  $content,
1296  Sanitizer::decodeTagAttributes( $attributes ),
1297  "<$element$attributes$close$content$tail" ];
1298  }
1299  return $stripped;
1300  }
1301 
1307  public function getStripList() {
1308  return $this->mStripList;
1309  }
1310 
1315  public function getStripState() {
1316  return $this->mStripState;
1317  }
1318 
1328  public function insertStripItem( $text ) {
1329  $marker = self::MARKER_PREFIX . "-item-{$this->mMarkerIndex}-" . self::MARKER_SUFFIX;
1330  $this->mMarkerIndex++;
1331  $this->mStripState->addGeneral( $marker, $text );
1332  return $marker;
1333  }
1334 
1341  private function handleTables( $text ) {
1342  $lines = StringUtils::explode( "\n", $text );
1343  $out = '';
1344  $td_history = []; # Is currently a td tag open?
1345  $last_tag_history = []; # Save history of last lag activated (td, th or caption)
1346  $tr_history = []; # Is currently a tr tag open?
1347  $tr_attributes = []; # history of tr attributes
1348  $has_opened_tr = []; # Did this table open a <tr> element?
1349  $indent_level = 0; # indent level of the table
1350 
1351  foreach ( $lines as $outLine ) {
1352  $line = trim( $outLine );
1353 
1354  if ( $line === '' ) { # empty line, go to next line
1355  $out .= $outLine . "\n";
1356  continue;
1357  }
1358 
1359  $first_character = $line[0];
1360  $first_two = substr( $line, 0, 2 );
1361  $matches = [];
1362 
1363  if ( preg_match( '/^(:*)\s*\{\|(.*)$/', $line, $matches ) ) {
1364  # First check if we are starting a new table
1365  $indent_level = strlen( $matches[1] );
1366 
1367  $attributes = $this->mStripState->unstripBoth( $matches[2] );
1368  $attributes = Sanitizer::fixTagAttributes( $attributes, 'table' );
1369 
1370  $outLine = str_repeat( '<dl><dd>', $indent_level ) . "<table{$attributes}>";
1371  array_push( $td_history, false );
1372  array_push( $last_tag_history, '' );
1373  array_push( $tr_history, false );
1374  array_push( $tr_attributes, '' );
1375  array_push( $has_opened_tr, false );
1376  } elseif ( count( $td_history ) == 0 ) {
1377  # Don't do any of the following
1378  $out .= $outLine . "\n";
1379  continue;
1380  } elseif ( $first_two === '|}' ) {
1381  # We are ending a table
1382  $line = '</table>' . substr( $line, 2 );
1383  $last_tag = array_pop( $last_tag_history );
1384 
1385  if ( !array_pop( $has_opened_tr ) ) {
1386  $line = "<tr><td></td></tr>{$line}";
1387  }
1388 
1389  if ( array_pop( $tr_history ) ) {
1390  $line = "</tr>{$line}";
1391  }
1392 
1393  if ( array_pop( $td_history ) ) {
1394  $line = "</{$last_tag}>{$line}";
1395  }
1396  array_pop( $tr_attributes );
1397  if ( $indent_level > 0 ) {
1398  $outLine = rtrim( $line ) . str_repeat( '</dd></dl>', $indent_level );
1399  } else {
1400  $outLine = $line;
1401  }
1402  } elseif ( $first_two === '|-' ) {
1403  # Now we have a table row
1404  $line = preg_replace( '#^\|-+#', '', $line );
1405 
1406  # Whats after the tag is now only attributes
1407  $attributes = $this->mStripState->unstripBoth( $line );
1408  $attributes = Sanitizer::fixTagAttributes( $attributes, 'tr' );
1409  array_pop( $tr_attributes );
1410  array_push( $tr_attributes, $attributes );
1411 
1412  $line = '';
1413  $last_tag = array_pop( $last_tag_history );
1414  array_pop( $has_opened_tr );
1415  array_push( $has_opened_tr, true );
1416 
1417  if ( array_pop( $tr_history ) ) {
1418  $line = '</tr>';
1419  }
1420 
1421  if ( array_pop( $td_history ) ) {
1422  $line = "</{$last_tag}>{$line}";
1423  }
1424 
1425  $outLine = $line;
1426  array_push( $tr_history, false );
1427  array_push( $td_history, false );
1428  array_push( $last_tag_history, '' );
1429  } elseif ( $first_character === '|'
1430  || $first_character === '!'
1431  || $first_two === '|+'
1432  ) {
1433  # This might be cell elements, td, th or captions
1434  if ( $first_two === '|+' ) {
1435  $first_character = '+';
1436  $line = substr( $line, 2 );
1437  } else {
1438  $line = substr( $line, 1 );
1439  }
1440 
1441  // Implies both are valid for table headings.
1442  if ( $first_character === '!' ) {
1443  $line = StringUtils::replaceMarkup( '!!', '||', $line );
1444  }
1445 
1446  # Split up multiple cells on the same line.
1447  # FIXME : This can result in improper nesting of tags processed
1448  # by earlier parser steps.
1449  $cells = explode( '||', $line );
1450 
1451  $outLine = '';
1452 
1453  # Loop through each table cell
1454  foreach ( $cells as $cell ) {
1455  $previous = '';
1456  if ( $first_character !== '+' ) {
1457  $tr_after = array_pop( $tr_attributes );
1458  if ( !array_pop( $tr_history ) ) {
1459  $previous = "<tr{$tr_after}>\n";
1460  }
1461  array_push( $tr_history, true );
1462  array_push( $tr_attributes, '' );
1463  array_pop( $has_opened_tr );
1464  array_push( $has_opened_tr, true );
1465  }
1466 
1467  $last_tag = array_pop( $last_tag_history );
1468 
1469  if ( array_pop( $td_history ) ) {
1470  $previous = "</{$last_tag}>\n{$previous}";
1471  }
1472 
1473  if ( $first_character === '|' ) {
1474  $last_tag = 'td';
1475  } elseif ( $first_character === '!' ) {
1476  $last_tag = 'th';
1477  } elseif ( $first_character === '+' ) {
1478  $last_tag = 'caption';
1479  } else {
1480  $last_tag = '';
1481  }
1482 
1483  array_push( $last_tag_history, $last_tag );
1484 
1485  # A cell could contain both parameters and data
1486  $cell_data = explode( '|', $cell, 2 );
1487 
1488  # T2553: Note that a '|' inside an invalid link should not
1489  # be mistaken as delimiting cell parameters
1490  # Bug T153140: Neither should language converter markup.
1491  if ( preg_match( '/\[\[|-\{/', $cell_data[0] ) === 1 ) {
1492  $cell = "{$previous}<{$last_tag}>" . trim( $cell );
1493  } elseif ( count( $cell_data ) == 1 ) {
1494  // Whitespace in cells is trimmed
1495  $cell = "{$previous}<{$last_tag}>" . trim( $cell_data[0] );
1496  } else {
1497  $attributes = $this->mStripState->unstripBoth( $cell_data[0] );
1498  $attributes = Sanitizer::fixTagAttributes( $attributes, $last_tag );
1499  // Whitespace in cells is trimmed
1500  $cell = "{$previous}<{$last_tag}{$attributes}>" . trim( $cell_data[1] );
1501  }
1502 
1503  $outLine .= $cell;
1504  array_push( $td_history, true );
1505  }
1506  }
1507  $out .= $outLine . "\n";
1508  }
1509 
1510  # Closing open td, tr && table
1511  while ( count( $td_history ) > 0 ) {
1512  if ( array_pop( $td_history ) ) {
1513  $out .= "</td>\n";
1514  }
1515  if ( array_pop( $tr_history ) ) {
1516  $out .= "</tr>\n";
1517  }
1518  if ( !array_pop( $has_opened_tr ) ) {
1519  $out .= "<tr><td></td></tr>\n";
1520  }
1521 
1522  $out .= "</table>\n";
1523  }
1524 
1525  # Remove trailing line-ending (b/c)
1526  if ( substr( $out, -1 ) === "\n" ) {
1527  $out = substr( $out, 0, -1 );
1528  }
1529 
1530  # special case: don't return empty table
1531  if ( $out === "<table>\n<tr><td></td></tr>\n</table>" ) {
1532  $out = '';
1533  }
1534 
1535  return $out;
1536  }
1537 
1551  public function internalParse( $text, $isMain = true, $frame = false ) {
1552  $origText = $text;
1553 
1554  # Hook to suspend the parser in this state
1555  if ( !$this->hookRunner->onParserBeforeInternalParse( $this, $text, $this->mStripState ) ) {
1556  return $text;
1557  }
1558 
1559  # if $frame is provided, then use $frame for replacing any variables
1560  if ( $frame ) {
1561  # use frame depth to infer how include/noinclude tags should be handled
1562  # depth=0 means this is the top-level document; otherwise it's an included document
1563  if ( !$frame->depth ) {
1564  $flag = 0;
1565  } else {
1567  }
1568  $dom = $this->preprocessToDom( $text, $flag );
1569  $text = $frame->expand( $dom );
1570  } else {
1571  # if $frame is not provided, then use old-style replaceVariables
1572  $text = $this->replaceVariables( $text );
1573  }
1574 
1575  $this->hookRunner->onInternalParseBeforeSanitize( $this, $text, $this->mStripState );
1576  $text = Sanitizer::removeHTMLtags(
1577  $text,
1578  // Callback from the Sanitizer for expanding items found in
1579  // HTML attribute values, so they can be safely tested and escaped.
1580  function ( &$text, $frame = false ) {
1581  $text = $this->replaceVariables( $text, $frame );
1582  $text = $this->mStripState->unstripBoth( $text );
1583  },
1584  false,
1585  [],
1586  []
1587  );
1588  $this->hookRunner->onInternalParseBeforeLinks( $this, $text, $this->mStripState );
1589 
1590  # Tables need to come after variable replacement for things to work
1591  # properly; putting them before other transformations should keep
1592  # exciting things like link expansions from showing up in surprising
1593  # places.
1594  $text = $this->handleTables( $text );
1595 
1596  $text = preg_replace( '/(^|\n)-----*/', '\\1<hr />', $text );
1597 
1598  $text = $this->handleDoubleUnderscore( $text );
1599 
1600  $text = $this->handleHeadings( $text );
1601  $text = $this->handleInternalLinks( $text );
1602  $text = $this->handleAllQuotes( $text );
1603  $text = $this->handleExternalLinks( $text );
1604 
1605  # handleInternalLinks may sometimes leave behind
1606  # absolute URLs, which have to be masked to hide them from handleExternalLinks
1607  $text = str_replace( self::MARKER_PREFIX . 'NOPARSE', '', $text );
1608 
1609  $text = $this->handleMagicLinks( $text );
1610  $text = $this->finalizeHeadings( $text, $origText, $isMain );
1611 
1612  return $text;
1613  }
1614 
1622  return $this->languageConverterFactory->getLanguageConverter(
1623  $this->getTargetLanguage()
1624  );
1625  }
1626 
1633  return $this->languageConverterFactory->getLanguageConverter(
1634  $this->getContentLanguage()
1635  );
1636  }
1637 
1645  protected function getHookContainer() {
1646  return $this->hookContainer;
1647  }
1648 
1657  protected function getHookRunner() {
1658  return $this->hookRunner;
1659  }
1660 
1670  private function internalParseHalfParsed( $text, $isMain = true, $linestart = true ) {
1671  $text = $this->mStripState->unstripGeneral( $text );
1672 
1673  $text = BlockLevelPass::doBlockLevels( $text, $linestart );
1674 
1675  $this->replaceLinkHoldersPrivate( $text );
1676 
1684  if ( !( $this->mOptions->getDisableContentConversion()
1685  || isset( $this->mDoubleUnderscores['nocontentconvert'] ) )
1686  && !$this->mOptions->getInterfaceMessage()
1687  ) {
1688  # The position of the convert() call should not be changed. it
1689  # assumes that the links are all replaced and the only thing left
1690  # is the <nowiki> mark.
1691  $text = $this->getTargetLanguageConverter()->convert( $text );
1692  }
1693 
1694  $text = $this->mStripState->unstripNoWiki( $text );
1695 
1696  $text = $this->mStripState->unstripGeneral( $text );
1697 
1698  $text = $this->tidy->tidy( $text, [ Sanitizer::class, 'armorFrenchSpaces' ] );
1699 
1700  if ( $isMain ) {
1701  $this->hookRunner->onParserAfterTidy( $this, $text );
1702  }
1703 
1704  return $text;
1705  }
1706 
1717  private function handleMagicLinks( $text ) {
1718  $prots = wfUrlProtocolsWithoutProtRel();
1719  $urlChar = self::EXT_LINK_URL_CLASS;
1720  $addr = self::EXT_LINK_ADDR;
1721  $space = self::SPACE_NOT_NL; # non-newline space
1722  $spdash = "(?:-|$space)"; # a dash or a non-newline space
1723  $spaces = "$space++"; # possessive match of 1 or more spaces
1724  $text = preg_replace_callback(
1725  '!(?: # Start cases
1726  (<a[ \t\r\n>].*?</a>) | # m[1]: Skip link text
1727  (<.*?>) | # m[2]: Skip stuff inside HTML elements' . "
1728  (\b # m[3]: Free external links
1729  (?i:$prots)
1730  ($addr$urlChar*) # m[4]: Post-protocol path
1731  ) |
1732  \b(?:RFC|PMID) $spaces # m[5]: RFC or PMID, capture number
1733  ([0-9]+)\b |
1734  \bISBN $spaces ( # m[6]: ISBN, capture number
1735  (?: 97[89] $spdash? )? # optional 13-digit ISBN prefix
1736  (?: [0-9] $spdash? ){9} # 9 digits with opt. delimiters
1737  [0-9Xx] # check digit
1738  )\b
1739  )!xu",
1740  [ $this, 'magicLinkCallback' ],
1741  $text
1742  );
1743  return $text;
1744  }
1745 
1751  private function magicLinkCallback( array $m ) {
1752  if ( isset( $m[1] ) && $m[1] !== '' ) {
1753  # Skip anchor
1754  return $m[0];
1755  } elseif ( isset( $m[2] ) && $m[2] !== '' ) {
1756  # Skip HTML element
1757  return $m[0];
1758  } elseif ( isset( $m[3] ) && $m[3] !== '' ) {
1759  # Free external link
1760  return $this->makeFreeExternalLink( $m[0], strlen( $m[4] ) );
1761  } elseif ( isset( $m[5] ) && $m[5] !== '' ) {
1762  # RFC or PMID
1763  if ( substr( $m[0], 0, 3 ) === 'RFC' ) {
1764  if ( !$this->mOptions->getMagicRFCLinks() ) {
1765  return $m[0];
1766  }
1767  $keyword = 'RFC';
1768  $urlmsg = 'rfcurl';
1769  $cssClass = 'mw-magiclink-rfc';
1770  $trackingCat = 'magiclink-tracking-rfc';
1771  $id = $m[5];
1772  } elseif ( substr( $m[0], 0, 4 ) === 'PMID' ) {
1773  if ( !$this->mOptions->getMagicPMIDLinks() ) {
1774  return $m[0];
1775  }
1776  $keyword = 'PMID';
1777  $urlmsg = 'pubmedurl';
1778  $cssClass = 'mw-magiclink-pmid';
1779  $trackingCat = 'magiclink-tracking-pmid';
1780  $id = $m[5];
1781  } else {
1782  // Should never happen
1783  throw new MWException( __METHOD__ . ': unrecognised match type "' .
1784  substr( $m[0], 0, 20 ) . '"' );
1785  }
1786  $url = wfMessage( $urlmsg, $id )->inContentLanguage()->text();
1787  $this->addTrackingCategory( $trackingCat );
1788  return Linker::makeExternalLink(
1789  $url,
1790  "{$keyword} {$id}",
1791  true,
1792  $cssClass,
1793  [],
1794  $this->getTitle()
1795  );
1796  } elseif ( isset( $m[6] ) && $m[6] !== ''
1797  && $this->mOptions->getMagicISBNLinks()
1798  ) {
1799  # ISBN
1800  $isbn = $m[6];
1801  $space = self::SPACE_NOT_NL; # non-newline space
1802  $isbn = preg_replace( "/$space/", ' ', $isbn );
1803  $num = strtr( $isbn, [
1804  '-' => '',
1805  ' ' => '',
1806  'x' => 'X',
1807  ] );
1808  $this->addTrackingCategory( 'magiclink-tracking-isbn' );
1809  return $this->getLinkRenderer()->makeKnownLink(
1810  SpecialPage::getTitleFor( 'Booksources', $num ),
1811  "ISBN $isbn",
1812  [
1813  'class' => 'internal mw-magiclink-isbn',
1814  'title' => false // suppress title attribute
1815  ]
1816  );
1817  } else {
1818  return $m[0];
1819  }
1820  }
1821 
1831  private function makeFreeExternalLink( $url, $numPostProto ) {
1832  $trail = '';
1833 
1834  # The characters '<' and '>' (which were escaped by
1835  # removeHTMLtags()) should not be included in
1836  # URLs, per RFC 2396.
1837  # Make &nbsp; terminate a URL as well (bug T84937)
1838  $m2 = [];
1839  if ( preg_match(
1840  '/&(lt|gt|nbsp|#x0*(3[CcEe]|[Aa]0)|#0*(60|62|160));/',
1841  $url,
1842  $m2,
1843  PREG_OFFSET_CAPTURE
1844  ) ) {
1845  $trail = substr( $url, $m2[0][1] ) . $trail;
1846  $url = substr( $url, 0, $m2[0][1] );
1847  }
1848 
1849  # Move trailing punctuation to $trail
1850  $sep = ',;\.:!?';
1851  # If there is no left bracket, then consider right brackets fair game too
1852  if ( strpos( $url, '(' ) === false ) {
1853  $sep .= ')';
1854  }
1855 
1856  $urlRev = strrev( $url );
1857  $numSepChars = strspn( $urlRev, $sep );
1858  # Don't break a trailing HTML entity by moving the ; into $trail
1859  # This is in hot code, so use substr_compare to avoid having to
1860  # create a new string object for the comparison
1861  if ( $numSepChars && substr_compare( $url, ";", -$numSepChars, 1 ) === 0 ) {
1862  # more optimization: instead of running preg_match with a $
1863  # anchor, which can be slow, do the match on the reversed
1864  # string starting at the desired offset.
1865  # un-reversed regexp is: /&([a-z]+|#x[\da-f]+|#\d+)$/i
1866  if ( preg_match( '/\G([a-z]+|[\da-f]+x#|\d+#)&/i', $urlRev, $m2, 0, $numSepChars ) ) {
1867  $numSepChars--;
1868  }
1869  }
1870  if ( $numSepChars ) {
1871  $trail = substr( $url, -$numSepChars ) . $trail;
1872  $url = substr( $url, 0, -$numSepChars );
1873  }
1874 
1875  # Verify that we still have a real URL after trail removal, and
1876  # not just lone protocol
1877  if ( strlen( $trail ) >= $numPostProto ) {
1878  return $url . $trail;
1879  }
1880 
1881  $url = Sanitizer::cleanUrl( $url );
1882 
1883  # Is this an external image?
1884  $text = $this->maybeMakeExternalImage( $url );
1885  if ( $text === false ) {
1886  # Not an image, make a link
1887  $text = Linker::makeExternalLink(
1888  $url,
1889  $this->getTargetLanguageConverter()->markNoConversion( $url ),
1890  true,
1891  'free',
1892  $this->getExternalLinkAttribs( $url ),
1893  $this->getTitle()
1894  );
1895  # Register it in the output object...
1896  $this->mOutput->addExternalLink( $url );
1897  }
1898  return $text . $trail;
1899  }
1900 
1907  private function handleHeadings( $text ) {
1908  for ( $i = 6; $i >= 1; --$i ) {
1909  $h = str_repeat( '=', $i );
1910  // Trim non-newline whitespace from headings
1911  // Using \s* will break for: "==\n===\n" and parse as <h2>=</h2>
1912  $text = preg_replace( "/^(?:$h)[ \\t]*(.+?)[ \\t]*(?:$h)\\s*$/m", "<h$i>\\1</h$i>", $text );
1913  }
1914  return $text;
1915  }
1916 
1924  private function handleAllQuotes( $text ) {
1925  $outtext = '';
1926  $lines = StringUtils::explode( "\n", $text );
1927  foreach ( $lines as $line ) {
1928  $outtext .= $this->doQuotes( $line ) . "\n";
1929  }
1930  $outtext = substr( $outtext, 0, -1 );
1931  return $outtext;
1932  }
1933 
1942  public function doQuotes( $text ) {
1943  $arr = preg_split( "/(''+)/", $text, -1, PREG_SPLIT_DELIM_CAPTURE );
1944  $countarr = count( $arr );
1945  if ( $countarr == 1 ) {
1946  return $text;
1947  }
1948 
1949  // First, do some preliminary work. This may shift some apostrophes from
1950  // being mark-up to being text. It also counts the number of occurrences
1951  // of bold and italics mark-ups.
1952  $numbold = 0;
1953  $numitalics = 0;
1954  for ( $i = 1; $i < $countarr; $i += 2 ) {
1955  $thislen = strlen( $arr[$i] );
1956  // If there are ever four apostrophes, assume the first is supposed to
1957  // be text, and the remaining three constitute mark-up for bold text.
1958  // (T15227: ''''foo'''' turns into ' ''' foo ' ''')
1959  if ( $thislen == 4 ) {
1960  $arr[$i - 1] .= "'";
1961  $arr[$i] = "'''";
1962  $thislen = 3;
1963  } elseif ( $thislen > 5 ) {
1964  // If there are more than 5 apostrophes in a row, assume they're all
1965  // text except for the last 5.
1966  // (T15227: ''''''foo'''''' turns into ' ''''' foo ' ''''')
1967  $arr[$i - 1] .= str_repeat( "'", $thislen - 5 );
1968  $arr[$i] = "'''''";
1969  $thislen = 5;
1970  }
1971  // Count the number of occurrences of bold and italics mark-ups.
1972  if ( $thislen == 2 ) {
1973  $numitalics++;
1974  } elseif ( $thislen == 3 ) {
1975  $numbold++;
1976  } elseif ( $thislen == 5 ) {
1977  $numitalics++;
1978  $numbold++;
1979  }
1980  }
1981 
1982  // If there is an odd number of both bold and italics, it is likely
1983  // that one of the bold ones was meant to be an apostrophe followed
1984  // by italics. Which one we cannot know for certain, but it is more
1985  // likely to be one that has a single-letter word before it.
1986  if ( ( $numbold % 2 == 1 ) && ( $numitalics % 2 == 1 ) ) {
1987  $firstsingleletterword = -1;
1988  $firstmultiletterword = -1;
1989  $firstspace = -1;
1990  for ( $i = 1; $i < $countarr; $i += 2 ) {
1991  if ( strlen( $arr[$i] ) == 3 ) {
1992  $x1 = substr( $arr[$i - 1], -1 );
1993  $x2 = substr( $arr[$i - 1], -2, 1 );
1994  if ( $x1 === ' ' ) {
1995  if ( $firstspace == -1 ) {
1996  $firstspace = $i;
1997  }
1998  } elseif ( $x2 === ' ' ) {
1999  $firstsingleletterword = $i;
2000  // if $firstsingleletterword is set, we don't
2001  // look at the other options, so we can bail early.
2002  break;
2003  } elseif ( $firstmultiletterword == -1 ) {
2004  $firstmultiletterword = $i;
2005  }
2006  }
2007  }
2008 
2009  // If there is a single-letter word, use it!
2010  if ( $firstsingleletterword > -1 ) {
2011  $arr[$firstsingleletterword] = "''";
2012  $arr[$firstsingleletterword - 1] .= "'";
2013  } elseif ( $firstmultiletterword > -1 ) {
2014  // If not, but there's a multi-letter word, use that one.
2015  $arr[$firstmultiletterword] = "''";
2016  $arr[$firstmultiletterword - 1] .= "'";
2017  } elseif ( $firstspace > -1 ) {
2018  // ... otherwise use the first one that has neither.
2019  // (notice that it is possible for all three to be -1 if, for example,
2020  // there is only one pentuple-apostrophe in the line)
2021  $arr[$firstspace] = "''";
2022  $arr[$firstspace - 1] .= "'";
2023  }
2024  }
2025 
2026  // Now let's actually convert our apostrophic mush to HTML!
2027  $output = '';
2028  $buffer = '';
2029  $state = '';
2030  $i = 0;
2031  foreach ( $arr as $r ) {
2032  if ( ( $i % 2 ) == 0 ) {
2033  if ( $state === 'both' ) {
2034  $buffer .= $r;
2035  } else {
2036  $output .= $r;
2037  }
2038  } else {
2039  $thislen = strlen( $r );
2040  if ( $thislen == 2 ) {
2041  // two quotes - open or close italics
2042  if ( $state === 'i' ) {
2043  $output .= '</i>';
2044  $state = '';
2045  } elseif ( $state === 'bi' ) {
2046  $output .= '</i>';
2047  $state = 'b';
2048  } elseif ( $state === 'ib' ) {
2049  $output .= '</b></i><b>';
2050  $state = 'b';
2051  } elseif ( $state === 'both' ) {
2052  $output .= '<b><i>' . $buffer . '</i>';
2053  $state = 'b';
2054  } else { // $state can be 'b' or ''
2055  $output .= '<i>';
2056  $state .= 'i';
2057  }
2058  } elseif ( $thislen == 3 ) {
2059  // three quotes - open or close bold
2060  if ( $state === 'b' ) {
2061  $output .= '</b>';
2062  $state = '';
2063  } elseif ( $state === 'bi' ) {
2064  $output .= '</i></b><i>';
2065  $state = 'i';
2066  } elseif ( $state === 'ib' ) {
2067  $output .= '</b>';
2068  $state = 'i';
2069  } elseif ( $state === 'both' ) {
2070  $output .= '<i><b>' . $buffer . '</b>';
2071  $state = 'i';
2072  } else { // $state can be 'i' or ''
2073  $output .= '<b>';
2074  $state .= 'b';
2075  }
2076  } elseif ( $thislen == 5 ) {
2077  // five quotes - open or close both separately
2078  if ( $state === 'b' ) {
2079  $output .= '</b><i>';
2080  $state = 'i';
2081  } elseif ( $state === 'i' ) {
2082  $output .= '</i><b>';
2083  $state = 'b';
2084  } elseif ( $state === 'bi' ) {
2085  $output .= '</i></b>';
2086  $state = '';
2087  } elseif ( $state === 'ib' ) {
2088  $output .= '</b></i>';
2089  $state = '';
2090  } elseif ( $state === 'both' ) {
2091  $output .= '<i><b>' . $buffer . '</b></i>';
2092  $state = '';
2093  } else { // ($state == '')
2094  $buffer = '';
2095  $state = 'both';
2096  }
2097  }
2098  }
2099  $i++;
2100  }
2101  // Now close all remaining tags. Notice that the order is important.
2102  if ( $state === 'b' || $state === 'ib' ) {
2103  $output .= '</b>';
2104  }
2105  if ( $state === 'i' || $state === 'bi' || $state === 'ib' ) {
2106  $output .= '</i>';
2107  }
2108  if ( $state === 'bi' ) {
2109  $output .= '</b>';
2110  }
2111  // There might be lonely ''''', so make sure we have a buffer
2112  if ( $state === 'both' && $buffer ) {
2113  $output .= '<b><i>' . $buffer . '</i></b>';
2114  }
2115  return $output;
2116  }
2117 
2128  private function handleExternalLinks( $text ) {
2129  $bits = preg_split( $this->mExtLinkBracketedRegex, $text, -1, PREG_SPLIT_DELIM_CAPTURE );
2130  // @phan-suppress-next-line PhanTypeComparisonFromArray See phan issue #3161
2131  if ( $bits === false ) {
2132  throw new MWException( "PCRE needs to be compiled with "
2133  . "--enable-unicode-properties in order for MediaWiki to function" );
2134  }
2135  $s = array_shift( $bits );
2136 
2137  $i = 0;
2138  while ( $i < count( $bits ) ) {
2139  $url = $bits[$i++];
2140  $i++; // protocol
2141  $text = $bits[$i++];
2142  $trail = $bits[$i++];
2143 
2144  # The characters '<' and '>' (which were escaped by
2145  # removeHTMLtags()) should not be included in
2146  # URLs, per RFC 2396.
2147  $m2 = [];
2148  if ( preg_match( '/&(lt|gt);/', $url, $m2, PREG_OFFSET_CAPTURE ) ) {
2149  $text = substr( $url, $m2[0][1] ) . ' ' . $text;
2150  $url = substr( $url, 0, $m2[0][1] );
2151  }
2152 
2153  # If the link text is an image URL, replace it with an <img> tag
2154  # This happened by accident in the original parser, but some people used it extensively
2155  $img = $this->maybeMakeExternalImage( $text );
2156  if ( $img !== false ) {
2157  $text = $img;
2158  }
2159 
2160  $dtrail = '';
2161 
2162  # Set linktype for CSS
2163  $linktype = 'text';
2164 
2165  # No link text, e.g. [http://domain.tld/some.link]
2166  if ( $text == '' ) {
2167  # Autonumber
2168  $langObj = $this->getTargetLanguage();
2169  $text = '[' . $langObj->formatNum( ++$this->mAutonumber ) . ']';
2170  $linktype = 'autonumber';
2171  } else {
2172  # Have link text, e.g. [http://domain.tld/some.link text]s
2173  # Check for trail
2174  list( $dtrail, $trail ) = Linker::splitTrail( $trail );
2175  }
2176 
2177  // Excluding protocol-relative URLs may avoid many false positives.
2178  if ( preg_match( '/^(?:' . wfUrlProtocolsWithoutProtRel() . ')/', $text ) ) {
2179  $text = $this->getTargetLanguageConverter()->markNoConversion( $text );
2180  }
2181 
2182  $url = Sanitizer::cleanUrl( $url );
2183 
2184  # Use the encoded URL
2185  # This means that users can paste URLs directly into the text
2186  # Funny characters like ö aren't valid in URLs anyway
2187  # This was changed in August 2004
2188  // @phan-suppress-next-line SecurityCheck-XSS,SecurityCheck-DoubleEscaped using false for escape is valid
2189  $s .= Linker::makeExternalLink( $url, $text, false, $linktype,
2190  $this->getExternalLinkAttribs( $url ), $this->getTitle() ) . $dtrail . $trail;
2191 
2192  # Register link in the output object.
2193  $this->mOutput->addExternalLink( $url );
2194  }
2195 
2196  return $s;
2197  }
2198 
2209  public static function getExternalLinkRel( $url = false, LinkTarget $title = null ) {
2210  $mainConfig = MediaWikiServices::getInstance()->getMainConfig();
2211  $noFollowLinks = $mainConfig->get( 'NoFollowLinks' );
2212  $noFollowNsExceptions = $mainConfig->get( 'NoFollowNsExceptions' );
2213  $noFollowDomainExceptions = $mainConfig->get( 'NoFollowDomainExceptions' );
2214  $ns = $title ? $title->getNamespace() : false;
2215  if ( $noFollowLinks && !in_array( $ns, $noFollowNsExceptions )
2216  && !wfMatchesDomainList( $url, $noFollowDomainExceptions )
2217  ) {
2218  return 'nofollow';
2219  }
2220  return null;
2221  }
2222 
2234  public function getExternalLinkAttribs( $url ) {
2235  $attribs = [];
2236  $rel = self::getExternalLinkRel( $url, $this->getTitle() );
2237 
2238  $target = $this->mOptions->getExternalLinkTarget();
2239  if ( $target ) {
2240  $attribs['target'] = $target;
2241  if ( !in_array( $target, [ '_self', '_parent', '_top' ] ) ) {
2242  // T133507. New windows can navigate parent cross-origin.
2243  // Including noreferrer due to lacking browser
2244  // support of noopener. Eventually noreferrer should be removed.
2245  if ( $rel !== '' ) {
2246  $rel .= ' ';
2247  }
2248  $rel .= 'noreferrer noopener';
2249  }
2250  }
2251  $attribs['rel'] = $rel;
2252  return $attribs;
2253  }
2254 
2265  public static function normalizeLinkUrl( $url ) {
2266  # Test for RFC 3986 IPv6 syntax
2267  $scheme = '[a-z][a-z0-9+.-]*:';
2268  $userinfo = '(?:[a-z0-9\-._~!$&\'()*+,;=:]|%[0-9a-f]{2})*';
2269  $ipv6Host = '\\[((?:[0-9a-f:]|%3[0-A]|%[46][1-6])+)\\]';
2270  if ( preg_match( "<^(?:{$scheme})?//(?:{$userinfo}@)?{$ipv6Host}(?:[:/?#].*|)$>i", $url, $m ) &&
2271  IPUtils::isValid( rawurldecode( $m[1] ) )
2272  ) {
2273  $isIPv6 = rawurldecode( $m[1] );
2274  } else {
2275  $isIPv6 = false;
2276  }
2277 
2278  # Make sure unsafe characters are encoded
2279  $url = preg_replace_callback(
2280  '/[\x00-\x20"<>\[\\\\\]^`{|}\x7F-\xFF]/',
2281  static function ( $m ) {
2282  return rawurlencode( $m[0] );
2283  },
2284  $url
2285  );
2286 
2287  $ret = '';
2288  $end = strlen( $url );
2289 
2290  # Fragment part - 'fragment'
2291  $start = strpos( $url, '#' );
2292  if ( $start !== false && $start < $end ) {
2294  substr( $url, $start, $end - $start ), '"#%<>[\]^`{|}' ) . $ret;
2295  $end = $start;
2296  }
2297 
2298  # Query part - 'query' minus &=+;
2299  $start = strpos( $url, '?' );
2300  if ( $start !== false && $start < $end ) {
2302  substr( $url, $start, $end - $start ), '"#%<>[\]^`{|}&=+;' ) . $ret;
2303  $end = $start;
2304  }
2305 
2306  # Scheme and path part - 'pchar'
2307  # (we assume no userinfo or encoded colons in the host)
2309  substr( $url, 0, $end ), '"#%<>[\]^`{|}/?' ) . $ret;
2310 
2311  # Fix IPv6 syntax
2312  if ( $isIPv6 !== false ) {
2313  $ipv6Host = "%5B({$isIPv6})%5D";
2314  $ret = preg_replace(
2315  "<^((?:{$scheme})?//(?:{$userinfo}@)?){$ipv6Host}(?=[:/?#]|$)>i",
2316  "$1[$2]",
2317  $ret
2318  );
2319  }
2320 
2321  return $ret;
2322  }
2323 
2324  private static function normalizeUrlComponent( $component, $unsafe ) {
2325  $callback = static function ( $matches ) use ( $unsafe ) {
2326  $char = urldecode( $matches[0] );
2327  $ord = ord( $char );
2328  if ( $ord > 32 && $ord < 127 && strpos( $unsafe, $char ) === false ) {
2329  # Unescape it
2330  return $char;
2331  } else {
2332  # Leave it escaped, but use uppercase for a-f
2333  return strtoupper( $matches[0] );
2334  }
2335  };
2336  return preg_replace_callback( '/%[0-9A-Fa-f]{2}/', $callback, $component );
2337  }
2338 
2347  private function maybeMakeExternalImage( $url ) {
2348  $imagesfrom = $this->mOptions->getAllowExternalImagesFrom();
2349  $imagesexception = !empty( $imagesfrom );
2350  $text = false;
2351  # $imagesfrom could be either a single string or an array of strings, parse out the latter
2352  if ( $imagesexception && is_array( $imagesfrom ) ) {
2353  $imagematch = false;
2354  foreach ( $imagesfrom as $match ) {
2355  if ( strpos( $url, $match ) === 0 ) {
2356  $imagematch = true;
2357  break;
2358  }
2359  }
2360  } elseif ( $imagesexception ) {
2361  $imagematch = ( strpos( $url, $imagesfrom ) === 0 );
2362  } else {
2363  $imagematch = false;
2364  }
2365 
2366  if ( $this->mOptions->getAllowExternalImages()
2367  || ( $imagesexception && $imagematch )
2368  ) {
2369  if ( preg_match( self::EXT_IMAGE_REGEX, $url ) ) {
2370  # Image found
2371  $text = Linker::makeExternalImage( $url );
2372  }
2373  }
2374  if ( !$text && $this->mOptions->getEnableImageWhitelist()
2375  && preg_match( self::EXT_IMAGE_REGEX, $url )
2376  ) {
2377  $whitelist = explode(
2378  "\n",
2379  wfMessage( 'external_image_whitelist' )->inContentLanguage()->text()
2380  );
2381 
2382  foreach ( $whitelist as $entry ) {
2383  # Sanitize the regex fragment, make it case-insensitive, ignore blank entries/comments
2384  if ( strpos( $entry, '#' ) === 0 || $entry === '' ) {
2385  continue;
2386  }
2387  // @phan-suppress-next-line SecurityCheck-ReDoS preg_quote is not wanted here
2388  if ( preg_match( '/' . str_replace( '/', '\\/', $entry ) . '/i', $url ) ) {
2389  # Image matches a whitelist entry
2390  $text = Linker::makeExternalImage( $url );
2391  break;
2392  }
2393  }
2394  }
2395  return $text;
2396  }
2397 
2405  private function handleInternalLinks( $text ) {
2406  $this->mLinkHolders->merge( $this->handleInternalLinks2( $text ) );
2407  return $text;
2408  }
2409 
2415  private function handleInternalLinks2( &$s ) {
2416  static $tc = false, $e1, $e1_img;
2417  # the % is needed to support urlencoded titles as well
2418  if ( !$tc ) {
2419  $tc = Title::legalChars() . '#%';
2420  # Match a link having the form [[namespace:link|alternate]]trail
2421  $e1 = "/^([{$tc}]+)(?:\\|(.+?))?]](.*)\$/sD";
2422  # Match cases where there is no "]]", which might still be images
2423  $e1_img = "/^([{$tc}]+)\\|(.*)\$/sD";
2424  }
2425 
2426  $holders = new LinkHolderArray(
2427  $this,
2428  $this->getContentLanguageConverter(),
2429  $this->getHookContainer() );
2430 
2431  # split the entire text string on occurrences of [[
2432  $a = StringUtils::explode( '[[', ' ' . $s );
2433  # get the first element (all text up to first [[), and remove the space we added
2434  $s = $a->current();
2435  $a->next();
2436  $line = $a->current(); # Workaround for broken ArrayIterator::next() that returns "void"
2437  $s = substr( $s, 1 );
2438 
2439  $nottalk = !$this->getTitle()->isTalkPage();
2440 
2441  $useLinkPrefixExtension = $this->getTargetLanguage()->linkPrefixExtension();
2442  $e2 = null;
2443  if ( $useLinkPrefixExtension ) {
2444  # Match the end of a line for a word that's not followed by whitespace,
2445  # e.g. in the case of 'The Arab al[[Razi]]', 'al' will be matched
2446  $charset = $this->contLang->linkPrefixCharset();
2447  $e2 = "/^((?>.*[^$charset]|))(.+)$/sDu";
2448  $m = [];
2449  if ( preg_match( $e2, $s, $m ) ) {
2450  $first_prefix = $m[2];
2451  } else {
2452  $first_prefix = false;
2453  }
2454  } else {
2455  $prefix = '';
2456  }
2457 
2458  # Some namespaces don't allow subpages
2459  $useSubpages = $this->nsInfo->hasSubpages(
2460  $this->getTitle()->getNamespace()
2461  );
2462 
2463  # Loop for each link
2464  for ( ; $line !== false && $line !== null; $a->next(), $line = $a->current() ) {
2465  # Check for excessive memory usage
2466  if ( $holders->isBig() ) {
2467  # Too big
2468  # Do the existence check, replace the link holders and clear the array
2469  $holders->replace( $s );
2470  $holders->clear();
2471  }
2472 
2473  if ( $useLinkPrefixExtension ) {
2474  if ( preg_match( $e2, $s, $m ) ) {
2475  list( , $s, $prefix ) = $m;
2476  } else {
2477  $prefix = '';
2478  }
2479  # first link
2480  if ( $first_prefix ) {
2481  $prefix = $first_prefix;
2482  $first_prefix = false;
2483  }
2484  }
2485 
2486  $might_be_img = false;
2487 
2488  if ( preg_match( $e1, $line, $m ) ) { # page with normal text or alt
2489  $text = $m[2];
2490  # If we get a ] at the beginning of $m[3] that means we have a link that's something like:
2491  # [[Image:Foo.jpg|[http://example.com desc]]] <- having three ] in a row fucks up,
2492  # the real problem is with the $e1 regex
2493  # See T1500.
2494  # Still some problems for cases where the ] is meant to be outside punctuation,
2495  # and no image is in sight. See T4095.
2496  if ( $text !== ''
2497  && substr( $m[3], 0, 1 ) === ']'
2498  && strpos( $text, '[' ) !== false
2499  ) {
2500  $text .= ']'; # so that handleExternalLinks($text) works later
2501  $m[3] = substr( $m[3], 1 );
2502  }
2503  # fix up urlencoded title texts
2504  if ( strpos( $m[1], '%' ) !== false ) {
2505  # Should anchors '#' also be rejected?
2506  $m[1] = str_replace( [ '<', '>' ], [ '&lt;', '&gt;' ], rawurldecode( $m[1] ) );
2507  }
2508  $trail = $m[3];
2509  } elseif ( preg_match( $e1_img, $line, $m ) ) {
2510  # Invalid, but might be an image with a link in its caption
2511  $might_be_img = true;
2512  $text = $m[2];
2513  if ( strpos( $m[1], '%' ) !== false ) {
2514  $m[1] = str_replace( [ '<', '>' ], [ '&lt;', '&gt;' ], rawurldecode( $m[1] ) );
2515  }
2516  $trail = "";
2517  } else { # Invalid form; output directly
2518  $s .= $prefix . '[[' . $line;
2519  continue;
2520  }
2521 
2522  $origLink = ltrim( $m[1], ' ' );
2523 
2524  # Don't allow internal links to pages containing
2525  # PROTO: where PROTO is a valid URL protocol; these
2526  # should be external links.
2527  if ( preg_match( '/^(?i:' . $this->mUrlProtocols . ')/', $origLink ) ) {
2528  $s .= $prefix . '[[' . $line;
2529  continue;
2530  }
2531 
2532  # Make subpage if necessary
2533  if ( $useSubpages ) {
2535  $this->getTitle(), $origLink, $text
2536  );
2537  } else {
2538  $link = $origLink;
2539  }
2540 
2541  // \x7f isn't a default legal title char, so most likely strip
2542  // markers will force us into the "invalid form" path above. But,
2543  // just in case, let's assert that xmlish tags aren't valid in
2544  // the title position.
2545  $unstrip = $this->mStripState->killMarkers( $link );
2546  $noMarkers = ( $unstrip === $link );
2547 
2548  $nt = $noMarkers ? Title::newFromText( $link ) : null;
2549  if ( $nt === null ) {
2550  $s .= $prefix . '[[' . $line;
2551  continue;
2552  }
2553 
2554  $ns = $nt->getNamespace();
2555  $iw = $nt->getInterwiki();
2556 
2557  $noforce = ( substr( $origLink, 0, 1 ) !== ':' );
2558 
2559  if ( $might_be_img ) { # if this is actually an invalid link
2560  if ( $ns === NS_FILE && $noforce ) { # but might be an image
2561  $found = false;
2562  while ( true ) {
2563  # look at the next 'line' to see if we can close it there
2564  $a->next();
2565  $next_line = $a->current();
2566  if ( $next_line === false || $next_line === null ) {
2567  break;
2568  }
2569  $m = explode( ']]', $next_line, 3 );
2570  if ( count( $m ) == 3 ) {
2571  # the first ]] closes the inner link, the second the image
2572  $found = true;
2573  $text .= "[[{$m[0]}]]{$m[1]}";
2574  $trail = $m[2];
2575  break;
2576  } elseif ( count( $m ) == 2 ) {
2577  # if there's exactly one ]] that's fine, we'll keep looking
2578  $text .= "[[{$m[0]}]]{$m[1]}";
2579  } else {
2580  # if $next_line is invalid too, we need look no further
2581  $text .= '[[' . $next_line;
2582  break;
2583  }
2584  }
2585  if ( !$found ) {
2586  # we couldn't find the end of this imageLink, so output it raw
2587  # but don't ignore what might be perfectly normal links in the text we've examined
2588  $holders->merge( $this->handleInternalLinks2( $text ) );
2589  $s .= "{$prefix}[[$link|$text";
2590  # note: no $trail, because without an end, there *is* no trail
2591  continue;
2592  }
2593  } else { # it's not an image, so output it raw
2594  $s .= "{$prefix}[[$link|$text";
2595  # note: no $trail, because without an end, there *is* no trail
2596  continue;
2597  }
2598  }
2599 
2600  $wasblank = ( $text == '' );
2601  if ( $wasblank ) {
2602  $text = $link;
2603  if ( !$noforce ) {
2604  # Strip off leading ':'
2605  $text = substr( $text, 1 );
2606  }
2607  } else {
2608  # T6598 madness. Handle the quotes only if they come from the alternate part
2609  # [[Lista d''e paise d''o munno]] -> <a href="...">Lista d''e paise d''o munno</a>
2610  # [[Criticism of Harry Potter|Criticism of ''Harry Potter'']]
2611  # -> <a href="Criticism of Harry Potter">Criticism of <i>Harry Potter</i></a>
2612  $text = $this->doQuotes( $text );
2613  }
2614 
2615  # Link not escaped by : , create the various objects
2616  if ( $noforce && !$nt->wasLocalInterwiki() ) {
2617  # Interwikis
2618  if (
2619  $iw && $this->mOptions->getInterwikiMagic() && $nottalk && (
2620  MediaWikiServices::getInstance()->getLanguageNameUtils()
2621  ->getLanguageName(
2622  $iw,
2623  LanguageNameUtils::AUTONYMS,
2624  LanguageNameUtils::DEFINED
2625  )
2626  || in_array( $iw, $this->svcOptions->get( 'ExtraInterlanguageLinkPrefixes' ) )
2627  )
2628  ) {
2629  # T26502: filter duplicates
2630  if ( !isset( $this->mLangLinkLanguages[$iw] ) ) {
2631  $this->mLangLinkLanguages[$iw] = true;
2632  $this->mOutput->addLanguageLink( $nt->getFullText() );
2633  }
2634 
2638  $s = rtrim( $s . $prefix ) . $trail; # T175416
2639  continue;
2640  }
2641 
2642  if ( $ns === NS_FILE ) {
2643  if ( !$this->badFileLookup->isBadFile( $nt->getDBkey(), $this->getTitle() ) ) {
2644  if ( $wasblank ) {
2645  # if no parameters were passed, $text
2646  # becomes something like "File:Foo.png",
2647  # which we don't want to pass on to the
2648  # image generator
2649  $text = '';
2650  } else {
2651  # recursively parse links inside the image caption
2652  # actually, this will parse them in any other parameters, too,
2653  # but it might be hard to fix that, and it doesn't matter ATM
2654  $text = $this->handleExternalLinks( $text );
2655  $holders->merge( $this->handleInternalLinks2( $text ) );
2656  }
2657  # cloak any absolute URLs inside the image markup, so handleExternalLinks() won't touch them
2658  $s .= $prefix . $this->armorLinks(
2659  $this->makeImage( $nt, $text, $holders ) ) . $trail;
2660  continue;
2661  }
2662  } elseif ( $ns === NS_CATEGORY ) {
2666  $s = rtrim( $s . $prefix ) . $trail; # T2087, T87753
2667 
2668  if ( $wasblank ) {
2669  $sortkey = $this->getDefaultSort();
2670  } else {
2671  $sortkey = $text;
2672  }
2673  $sortkey = Sanitizer::decodeCharReferences( $sortkey );
2674  $sortkey = str_replace( "\n", '', $sortkey );
2675  $sortkey = $this->getTargetLanguageConverter()->convertCategoryKey( $sortkey );
2676  $this->mOutput->addCategory( $nt->getDBkey(), $sortkey );
2677 
2678  continue;
2679  }
2680  }
2681 
2682  # Self-link checking. For some languages, variants of the title are checked in
2683  # LinkHolderArray::doVariants() to allow batching the existence checks necessary
2684  # for linking to a different variant.
2685  if ( $ns !== NS_SPECIAL && $nt->equals( $this->getTitle() ) && !$nt->hasFragment() ) {
2686  $s .= $prefix . Linker::makeSelfLinkObj( $nt, $text, '', $trail );
2687  continue;
2688  }
2689 
2690  # NS_MEDIA is a pseudo-namespace for linking directly to a file
2691  # @todo FIXME: Should do batch file existence checks, see comment below
2692  if ( $ns === NS_MEDIA ) {
2693  # Give extensions a chance to select the file revision for us
2694  $options = [];
2695  $descQuery = false;
2696  $this->hookRunner->onBeforeParserFetchFileAndTitle(
2697  $this, $nt, $options, $descQuery );
2698  # Fetch and register the file (file title may be different via hooks)
2699  list( $file, $nt ) = $this->fetchFileAndTitle( $nt, $options );
2700  # Cloak with NOPARSE to avoid replacement in handleExternalLinks
2701  $s .= $prefix . $this->armorLinks(
2702  Linker::makeMediaLinkFile( $nt, $file, $text ) ) . $trail;
2703  continue;
2704  }
2705 
2706  # Some titles, such as valid special pages or files in foreign repos, should
2707  # be shown as bluelinks even though they're not included in the page table
2708  # @todo FIXME: isAlwaysKnown() can be expensive for file links; we should really do
2709  # batch file existence checks for NS_FILE and NS_MEDIA
2710  if ( $iw == '' && $nt->isAlwaysKnown() ) {
2711  $this->mOutput->addLink( $nt );
2712  $s .= $this->makeKnownLinkHolder( $nt, $text, $trail, $prefix );
2713  } else {
2714  # Links will be added to the output link list after checking
2715  $s .= $holders->makeHolder( $nt, $text, $trail, $prefix );
2716  }
2717  }
2718  return $holders;
2719  }
2720 
2734  private function makeKnownLinkHolder( LinkTarget $nt, $text = '', $trail = '', $prefix = '' ) {
2735  list( $inside, $trail ) = Linker::splitTrail( $trail );
2736 
2737  if ( $text == '' ) {
2738  $text = htmlspecialchars( $this->titleFormatter->getPrefixedText( $nt ) );
2739  }
2740 
2741  $link = $this->getLinkRenderer()->makeKnownLink(
2742  $nt, new HtmlArmor( "$prefix$text$inside" )
2743  );
2744 
2745  return $this->armorLinks( $link ) . $trail;
2746  }
2747 
2758  private function armorLinks( $text ) {
2759  return preg_replace( '/\b((?i)' . $this->mUrlProtocols . ')/',
2760  self::MARKER_PREFIX . "NOPARSE$1", $text );
2761  }
2762 
2772  public function doBlockLevels( $text, $linestart ) {
2773  wfDeprecated( __METHOD__, '1.35' );
2774  return BlockLevelPass::doBlockLevels( $text, $linestart );
2775  }
2776 
2785  private function expandMagicVariable( $index, $frame = false ) {
2790  if (
2791  $this->hookRunner->onParserGetVariableValueVarCache( $this, $this->mVarCache ) &&
2792  isset( $this->mVarCache[$index] )
2793  ) {
2794  return $this->mVarCache[$index];
2795  }
2796 
2797  $ts = wfTimestamp( TS_UNIX, $this->mOptions->getTimestamp() );
2798  $this->hookRunner->onParserGetVariableValueTs( $this, $ts );
2799 
2800  $value = CoreMagicVariables::expand(
2801  $this, $index, $ts, $this->nsInfo, $this->svcOptions, $this->logger
2802  );
2803 
2804  if ( $value === null ) {
2805  // Not a defined core magic word
2806  $ret = null;
2807  $originalIndex = $index;
2808  $this->hookRunner->onParserGetVariableValueSwitch( $this,
2809  $this->mVarCache, $index, $ret, $frame );
2810  if ( $index !== $originalIndex ) {
2812  'A ParserGetVariableValueSwitch hook handler modified $index, ' .
2813  'this is deprecated since MediaWiki 1.35',
2814  '1.35', false, false
2815  );
2816  }
2817  if ( !isset( $this->mVarCache[$originalIndex] ) ||
2818  $this->mVarCache[$originalIndex] !== $ret ) {
2820  'A ParserGetVariableValueSwitch hook handler bypassed the cache, ' .
2821  'this is deprecated since MediaWiki 1.35', '1.35', false, false
2822  );
2823  }// FIXME: in the future, don't give this hook unrestricted
2824  // access to mVarCache; we can cache it ourselves by falling
2825  // through here.
2826  return $ret;
2827  }
2828 
2829  $this->mVarCache[$index] = $value;
2830 
2831  return $value;
2832  }
2833 
2838  private function initializeVariables() {
2839  $variableIDs = $this->magicWordFactory->getVariableIDs();
2840  $substIDs = $this->magicWordFactory->getSubstIDs();
2841 
2842  $this->mVariables = $this->magicWordFactory->newArray( $variableIDs );
2843  $this->mSubstWords = $this->magicWordFactory->newArray( $substIDs );
2844  }
2845 
2864  public function preprocessToDom( $text, $flags = 0 ) {
2865  return $this->getPreprocessor()->preprocessToObj( $text, $flags );
2866  }
2867 
2889  public function replaceVariables( $text, $frame = false, $argsOnly = false ) {
2890  # Is there any text? Also, Prevent too big inclusions!
2891  $textSize = strlen( $text );
2892  if ( $textSize < 1 || $textSize > $this->mOptions->getMaxIncludeSize() ) {
2893  return $text;
2894  }
2895 
2896  if ( $frame === false ) {
2897  $frame = $this->getPreprocessor()->newFrame();
2898  } elseif ( !( $frame instanceof PPFrame ) ) {
2899  $this->logger->debug(
2900  __METHOD__ . " called using plain parameters instead of " .
2901  "a PPFrame instance. Creating custom frame."
2902  );
2903  $frame = $this->getPreprocessor()->newCustomFrame( $frame );
2904  }
2905 
2906  $dom = $this->preprocessToDom( $text );
2907  $flags = $argsOnly ? PPFrame::NO_TEMPLATES : 0;
2908  $text = $frame->expand( $dom, $flags );
2909 
2910  return $text;
2911  }
2912 
2940  public function limitationWarn( $limitationType, $current = '', $max = '' ) {
2941  # does no harm if $current and $max are present but are unnecessary for the message
2942  # Not doing ->inLanguage( $this->mOptions->getUserLangObj() ), since this is shown
2943  # only during preview, and that would split the parser cache unnecessarily.
2944  $this->mOutput->addWarningMsg(
2945  "$limitationType-warning",
2946  Message::numParam( $current ),
2947  Message::numParam( $max )
2948  );
2949  $this->addTrackingCategory( "$limitationType-category" );
2950  }
2951 
2965  public function braceSubstitution( array $piece, PPFrame $frame ) {
2966  // Flags
2967 
2968  // $text has been filled
2969  $found = false;
2970  // wiki markup in $text should be escaped
2971  $nowiki = false;
2972  // $text is HTML, armour it against wikitext transformation
2973  $isHTML = false;
2974  // Force interwiki transclusion to be done in raw mode not rendered
2975  $forceRawInterwiki = false;
2976  // $text is a DOM node needing expansion in a child frame
2977  $isChildObj = false;
2978  // $text is a DOM node needing expansion in the current frame
2979  $isLocalObj = false;
2980 
2981  # Title object, where $text came from
2982  $title = false;
2983 
2984  # $part1 is the bit before the first |, and must contain only title characters.
2985  # Various prefixes will be stripped from it later.
2986  $titleWithSpaces = $frame->expand( $piece['title'] );
2987  $part1 = trim( $titleWithSpaces );
2988  $titleText = false;
2989 
2990  # Original title text preserved for various purposes
2991  $originalTitle = $part1;
2992 
2993  # $args is a list of argument nodes, starting from index 0, not including $part1
2994  # @todo FIXME: If piece['parts'] is null then the call to getLength()
2995  # below won't work b/c this $args isn't an object
2996  $args = ( $piece['parts'] == null ) ? [] : $piece['parts'];
2997 
2998  $profileSection = null; // profile templates
2999 
3000  $sawDeprecatedTemplateEquals = false; // T91154
3001 
3002  # SUBST
3003  // @phan-suppress-next-line PhanImpossibleCondition
3004  if ( !$found ) {
3005  $substMatch = $this->mSubstWords->matchStartAndRemove( $part1 );
3006 
3007  # Possibilities for substMatch: "subst", "safesubst" or FALSE
3008  # Decide whether to expand template or keep wikitext as-is.
3009  if ( $this->ot['wiki'] ) {
3010  if ( $substMatch === false ) {
3011  $literal = true; # literal when in PST with no prefix
3012  } else {
3013  $literal = false; # expand when in PST with subst: or safesubst:
3014  }
3015  } else {
3016  if ( $substMatch == 'subst' ) {
3017  $literal = true; # literal when not in PST with plain subst:
3018  } else {
3019  $literal = false; # expand when not in PST with safesubst: or no prefix
3020  }
3021  }
3022  if ( $literal ) {
3023  $text = $frame->virtualBracketedImplode( '{{', '|', '}}', $titleWithSpaces, $args );
3024  $isLocalObj = true;
3025  $found = true;
3026  }
3027  }
3028 
3029  # Variables
3030  if ( !$found && $args->getLength() == 0 ) {
3031  $id = $this->mVariables->matchStartToEnd( $part1 );
3032  if ( $id !== false ) {
3033  $text = $this->expandMagicVariable( $id, $frame );
3034  if ( $this->magicWordFactory->getCacheTTL( $id ) > -1 ) {
3035  $this->mOutput->updateCacheExpiry(
3036  $this->magicWordFactory->getCacheTTL( $id ) );
3037  }
3038  $found = true;
3039  }
3040  }
3041 
3042  # MSG, MSGNW and RAW
3043  if ( !$found ) {
3044  # Check for MSGNW:
3045  $mwMsgnw = $this->magicWordFactory->get( 'msgnw' );
3046  if ( $mwMsgnw->matchStartAndRemove( $part1 ) ) {
3047  $nowiki = true;
3048  } else {
3049  # Remove obsolete MSG:
3050  $mwMsg = $this->magicWordFactory->get( 'msg' );
3051  $mwMsg->matchStartAndRemove( $part1 );
3052  }
3053 
3054  # Check for RAW:
3055  $mwRaw = $this->magicWordFactory->get( 'raw' );
3056  if ( $mwRaw->matchStartAndRemove( $part1 ) ) {
3057  $forceRawInterwiki = true;
3058  }
3059  }
3060 
3061  # Parser functions
3062  if ( !$found ) {
3063  $colonPos = strpos( $part1, ':' );
3064  if ( $colonPos !== false ) {
3065  $func = substr( $part1, 0, $colonPos );
3066  $funcArgs = [ trim( substr( $part1, $colonPos + 1 ) ) ];
3067  $argsLength = $args->getLength();
3068  for ( $i = 0; $i < $argsLength; $i++ ) {
3069  $funcArgs[] = $args->item( $i );
3070  }
3071 
3072  $result = $this->callParserFunction( $frame, $func, $funcArgs );
3073 
3074  // Extract any forwarded flags
3075  if ( isset( $result['title'] ) ) {
3076  $title = $result['title'];
3077  }
3078  if ( isset( $result['found'] ) ) {
3079  $found = $result['found'];
3080  }
3081  if ( array_key_exists( 'text', $result ) ) {
3082  // a string or null
3083  $text = $result['text'];
3084  }
3085  if ( isset( $result['nowiki'] ) ) {
3086  $nowiki = $result['nowiki'];
3087  }
3088  if ( isset( $result['isHTML'] ) ) {
3089  $isHTML = $result['isHTML'];
3090  }
3091  if ( isset( $result['forceRawInterwiki'] ) ) {
3092  $forceRawInterwiki = $result['forceRawInterwiki'];
3093  }
3094  if ( isset( $result['isChildObj'] ) ) {
3095  $isChildObj = $result['isChildObj'];
3096  }
3097  if ( isset( $result['isLocalObj'] ) ) {
3098  $isLocalObj = $result['isLocalObj'];
3099  }
3100  }
3101  }
3102 
3103  # Finish mangling title and then check for loops.
3104  # Set $title to a Title object and $titleText to the PDBK
3105  if ( !$found ) {
3106  $ns = NS_TEMPLATE;
3107  # Split the title into page and subpage
3108  $subpage = '';
3109  $relative = Linker::normalizeSubpageLink(
3110  $this->getTitle(), $part1, $subpage
3111  );
3112  if ( $part1 !== $relative ) {
3113  $part1 = $relative;
3114  $ns = $this->getTitle()->getNamespace();
3115  }
3116  $title = Title::newFromText( $part1, $ns );
3117  if ( $title ) {
3118  $titleText = $title->getPrefixedText();
3119  # Check for language variants if the template is not found
3120  if ( $this->getTargetLanguageConverter()->hasVariants() && $title->getArticleID() == 0 ) {
3121  $this->getTargetLanguageConverter()->findVariantLink( $part1, $title, true );
3122  }
3123  # Do recursion depth check
3124  $limit = $this->mOptions->getMaxTemplateDepth();
3125  if ( $frame->depth >= $limit ) {
3126  $found = true;
3127  $text = '<span class="error">'
3128  . wfMessage( 'parser-template-recursion-depth-warning' )
3129  ->numParams( $limit )->inContentLanguage()->text()
3130  . '</span>';
3131  }
3132  }
3133  }
3134 
3135  # Load from database
3136  if ( !$found && $title ) {
3137  $profileSection = $this->mProfiler->scopedProfileIn( $title->getPrefixedDBkey() );
3138  if ( !$title->isExternal() ) {
3139  if ( $title->isSpecialPage()
3140  && $this->mOptions->getAllowSpecialInclusion()
3141  && $this->ot['html']
3142  ) {
3143  $specialPage = $this->specialPageFactory->getPage( $title->getDBkey() );
3144  // Pass the template arguments as URL parameters.
3145  // "uselang" will have no effect since the Language object
3146  // is forced to the one defined in ParserOptions.
3147  $pageArgs = [];
3148  $argsLength = $args->getLength();
3149  for ( $i = 0; $i < $argsLength; $i++ ) {
3150  $bits = $args->item( $i )->splitArg();
3151  if ( strval( $bits['index'] ) === '' ) {
3152  $name = trim( $frame->expand( $bits['name'], PPFrame::STRIP_COMMENTS ) );
3153  $value = trim( $frame->expand( $bits['value'] ) );
3154  $pageArgs[$name] = $value;
3155  }
3156  }
3157 
3158  // Create a new context to execute the special page
3159  $context = new RequestContext;
3160  $context->setTitle( $title );
3161  $context->setRequest( new FauxRequest( $pageArgs ) );
3162  if ( $specialPage && $specialPage->maxIncludeCacheTime() === 0 ) {
3163  $context->setUser( $this->userFactory->newFromUserIdentity( $this->getUserIdentity() ) );
3164  } else {
3165  // If this page is cached, then we better not be per user.
3166  $context->setUser( User::newFromName( '127.0.0.1', false ) );
3167  }
3168  $context->setLanguage( $this->mOptions->getUserLangObj() );
3169  $ret = $this->specialPageFactory->capturePath( $title, $context, $this->getLinkRenderer() );
3170  if ( $ret ) {
3171  $text = $context->getOutput()->getHTML();
3172  $this->mOutput->addOutputPageMetadata( $context->getOutput() );
3173  $found = true;
3174  $isHTML = true;
3175  if ( $specialPage && $specialPage->maxIncludeCacheTime() !== false ) {
3176  $this->mOutput->updateRuntimeAdaptiveExpiry(
3177  $specialPage->maxIncludeCacheTime()
3178  );
3179  }
3180  }
3181  } elseif ( $this->nsInfo->isNonincludable( $title->getNamespace() ) ) {
3182  $found = false; # access denied
3183  $this->logger->debug(
3184  __METHOD__ .
3185  ": template inclusion denied for " . $title->getPrefixedDBkey()
3186  );
3187  } else {
3188  list( $text, $title ) = $this->getTemplateDom( $title );
3189  if ( $text !== false ) {
3190  $found = true;
3191  $isChildObj = true;
3192  if (
3193  $title->getNamespace() === NS_TEMPLATE &&
3194  $title->getDBkey() === '=' &&
3195  $originalTitle === '='
3196  ) {
3197  // Note that we won't get here if `=` is evaluated
3198  // (in the future) as a parser function, nor if
3199  // the Template namespace is given explicitly,
3200  // ie `{{Template:=}}`. Only `{{=}}` triggers.
3201  $sawDeprecatedTemplateEquals = true; // T91154
3202  }
3203  }
3204  }
3205 
3206  # If the title is valid but undisplayable, make a link to it
3207  if ( !$found && ( $this->ot['html'] || $this->ot['pre'] ) ) {
3208  $text = "[[:$titleText]]";
3209  $found = true;
3210  }
3211  } elseif ( $title->isTrans() ) {
3212  # Interwiki transclusion
3213  if ( $this->ot['html'] && !$forceRawInterwiki ) {
3214  $text = $this->interwikiTransclude( $title, 'render' );
3215  $isHTML = true;
3216  } else {
3217  $text = $this->interwikiTransclude( $title, 'raw' );
3218  # Preprocess it like a template
3219  $text = $this->preprocessToDom( $text, Preprocessor::DOM_FOR_INCLUSION );
3220  $isChildObj = true;
3221  }
3222  $found = true;
3223  }
3224 
3225  # Do infinite loop check
3226  # This has to be done after redirect resolution to avoid infinite loops via redirects
3227  if ( !$frame->loopCheck( $title ) ) {
3228  $found = true;
3229  $text = '<span class="error">'
3230  . wfMessage( 'parser-template-loop-warning', $titleText )->inContentLanguage()->text()
3231  . '</span>';
3232  $this->addTrackingCategory( 'template-loop-category' );
3233  $this->mOutput->addWarningMsg(
3234  'template-loop-warning',
3235  Message::plaintextParam( $titleText )
3236  );
3237  $this->logger->debug( __METHOD__ . ": template loop broken at '$titleText'" );
3238  }
3239  }
3240 
3241  # If we haven't found text to substitute by now, we're done
3242  # Recover the source wikitext and return it
3243  if ( !$found ) {
3244  $text = $frame->virtualBracketedImplode( '{{', '|', '}}', $titleWithSpaces, $args );
3245  if ( $profileSection ) {
3246  $this->mProfiler->scopedProfileOut( $profileSection );
3247  }
3248  return [ 'object' => $text ];
3249  }
3250 
3251  # Expand DOM-style return values in a child frame
3252  if ( $isChildObj ) {
3253  # Clean up argument array
3254  $newFrame = $frame->newChild( $args, $title );
3255 
3256  if ( $nowiki ) {
3257  $text = $newFrame->expand( $text, PPFrame::RECOVER_ORIG );
3258  } elseif ( $titleText !== false && $newFrame->isEmpty() ) {
3259  # Expansion is eligible for the empty-frame cache
3260  $text = $newFrame->cachedExpand( $titleText, $text );
3261  } else {
3262  # Uncached expansion
3263  $text = $newFrame->expand( $text );
3264  }
3265  }
3266  if ( $isLocalObj && $nowiki ) {
3267  $text = $frame->expand( $text, PPFrame::RECOVER_ORIG );
3268  $isLocalObj = false;
3269  }
3270 
3271  if ( $profileSection ) {
3272  $this->mProfiler->scopedProfileOut( $profileSection );
3273  }
3274  if (
3275  $sawDeprecatedTemplateEquals &&
3276  $this->mStripState->unstripBoth( $text ) !== '='
3277  ) {
3278  // T91154: {{=}} is deprecated when it doesn't expand to `=`;
3279  // use {{Template:=}} if you must.
3280  $this->addTrackingCategory( 'template-equals-category' );
3281  $this->mOutput->addWarningMsg( 'template-equals-warning' );
3282  }
3283 
3284  # Replace raw HTML by a placeholder
3285  if ( $isHTML ) {
3286  // @phan-suppress-next-line SecurityCheck-XSS Mixed mode, here html and safe
3287  $text = $this->insertStripItem( $text );
3288  } elseif ( $nowiki && ( $this->ot['html'] || $this->ot['pre'] ) ) {
3289  # Escape nowiki-style return values
3290  // @phan-suppress-next-line SecurityCheck-DoubleEscaped Mixed mode, here html and safe
3291  $text = wfEscapeWikiText( $text );
3292  } elseif ( is_string( $text )
3293  && !$piece['lineStart']
3294  && preg_match( '/^(?:{\\||:|;|#|\*)/', $text )
3295  ) {
3296  # T2529: if the template begins with a table or block-level
3297  # element, it should be treated as beginning a new line.
3298  # This behavior is somewhat controversial.
3299  $text = "\n" . $text;
3300  }
3301 
3302  if ( is_string( $text ) && !$this->incrementIncludeSize( 'post-expand', strlen( $text ) ) ) {
3303  # Error, oversize inclusion
3304  if ( $titleText !== false ) {
3305  # Make a working, properly escaped link if possible (T25588)
3306  $text = "[[:$titleText]]";
3307  } else {
3308  # This will probably not be a working link, but at least it may
3309  # provide some hint of where the problem is
3310  $originalTitle = preg_replace( '/^:/', '', $originalTitle );
3311  $text = "[[:$originalTitle]]";
3312  }
3313  $text .= $this->insertStripItem( '<!-- WARNING: template omitted, '
3314  . 'post-expand include size too large -->' );
3315  $this->limitationWarn( 'post-expand-template-inclusion' );
3316  }
3317 
3318  if ( $isLocalObj ) {
3319  $ret = [ 'object' => $text ];
3320  } else {
3321  $ret = [ 'text' => $text ];
3322  }
3323 
3324  return $ret;
3325  }
3326 
3345  public function callParserFunction( PPFrame $frame, $function, array $args = [] ) {
3346  # Case sensitive functions
3347  if ( isset( $this->mFunctionSynonyms[1][$function] ) ) {
3348  $function = $this->mFunctionSynonyms[1][$function];
3349  } else {
3350  # Case insensitive functions
3351  $function = $this->contLang->lc( $function );
3352  if ( isset( $this->mFunctionSynonyms[0][$function] ) ) {
3353  $function = $this->mFunctionSynonyms[0][$function];
3354  } else {
3355  return [ 'found' => false ];
3356  }
3357  }
3358 
3359  list( $callback, $flags ) = $this->mFunctionHooks[$function];
3360 
3361  $allArgs = [ $this ];
3362  if ( $flags & self::SFH_OBJECT_ARGS ) {
3363  # Convert arguments to PPNodes and collect for appending to $allArgs
3364  $funcArgs = [];
3365  foreach ( $args as $k => $v ) {
3366  if ( $v instanceof PPNode || $k === 0 ) {
3367  $funcArgs[] = $v;
3368  } else {
3369  $funcArgs[] = $this->mPreprocessor->newPartNodeArray( [ $k => $v ] )->item( 0 );
3370  }
3371  }
3372 
3373  # Add a frame parameter, and pass the arguments as an array
3374  $allArgs[] = $frame;
3375  $allArgs[] = $funcArgs;
3376  } else {
3377  # Convert arguments to plain text and append to $allArgs
3378  foreach ( $args as $k => $v ) {
3379  if ( $v instanceof PPNode ) {
3380  $allArgs[] = trim( $frame->expand( $v ) );
3381  } elseif ( is_int( $k ) && $k >= 0 ) {
3382  $allArgs[] = trim( $v );
3383  } else {
3384  $allArgs[] = trim( "$k=$v" );
3385  }
3386  }
3387  }
3388 
3389  $result = $callback( ...$allArgs );
3390 
3391  # The interface for function hooks allows them to return a wikitext
3392  # string or an array containing the string and any flags. This mungs
3393  # things around to match what this method should return.
3394  if ( !is_array( $result ) ) {
3395  $result = [
3396  'found' => true,
3397  'text' => $result,
3398  ];
3399  } else {
3400  if ( isset( $result[0] ) && !isset( $result['text'] ) ) {
3401  $result['text'] = $result[0];
3402  }
3403  unset( $result[0] );
3404  $result += [
3405  'found' => true,
3406  ];
3407  }
3408 
3409  $noparse = true;
3410  $preprocessFlags = 0;
3411  if ( isset( $result['noparse'] ) ) {
3412  $noparse = $result['noparse'];
3413  }
3414  if ( isset( $result['preprocessFlags'] ) ) {
3415  $preprocessFlags = $result['preprocessFlags'];
3416  }
3417 
3418  if ( !$noparse ) {
3419  $result['text'] = $this->preprocessToDom( $result['text'], $preprocessFlags );
3420  $result['isChildObj'] = true;
3421  }
3422 
3423  return $result;
3424  }
3425 
3435  public function getTemplateDom( LinkTarget $title ) {
3436  $cacheTitle = $title;
3437  $titleKey = CacheKeyHelper::getKeyForPage( $title );
3438 
3439  if ( isset( $this->mTplRedirCache[$titleKey] ) ) {
3440  list( $ns, $dbk ) = $this->mTplRedirCache[$titleKey];
3441  $title = Title::makeTitle( $ns, $dbk );
3442  $titleKey = CacheKeyHelper::getKeyForPage( $title );
3443  }
3444  if ( isset( $this->mTplDomCache[$titleKey] ) ) {
3445  return [ $this->mTplDomCache[$titleKey], $title ];
3446  }
3447 
3448  # Cache miss, go to the database
3449  list( $text, $title ) = $this->fetchTemplateAndTitle( $title );
3450 
3451  if ( $text === false ) {
3452  $this->mTplDomCache[$titleKey] = false;
3453  return [ false, $title ];
3454  }
3455 
3456  $dom = $this->preprocessToDom( $text, Preprocessor::DOM_FOR_INCLUSION );
3457  $this->mTplDomCache[$titleKey] = $dom;
3458 
3459  if ( !$title->isSamePageAs( $cacheTitle ) ) {
3460  $this->mTplRedirCache[ CacheKeyHelper::getKeyForPage( $cacheTitle ) ] =
3461  [ $title->getNamespace(), $title->getDBkey() ];
3462  }
3463 
3464  return [ $dom, $title ];
3465  }
3466 
3481  $cacheKey = CacheKeyHelper::getKeyForPage( $link );
3482  if ( !$this->currentRevisionCache ) {
3483  $this->currentRevisionCache = new MapCacheLRU( 100 );
3484  }
3485  if ( !$this->currentRevisionCache->has( $cacheKey ) ) {
3486  $title = Title::castFromLinkTarget( $link ); // hook signature compat
3487  $revisionRecord =
3488  // Defaults to Parser::statelessFetchRevisionRecord()
3489  call_user_func(
3490  $this->mOptions->getCurrentRevisionRecordCallback(),
3491  $title,
3492  $this
3493  );
3494  if ( !$revisionRecord ) {
3495  // Parser::statelessFetchRevisionRecord() can return false;
3496  // normalize it to null.
3497  $revisionRecord = null;
3498  }
3499  $this->currentRevisionCache->set( $cacheKey, $revisionRecord );
3500  }
3501  return $this->currentRevisionCache->get( $cacheKey );
3502  }
3503 
3510  public function isCurrentRevisionOfTitleCached( LinkTarget $link ) {
3511  $key = CacheKeyHelper::getKeyForPage( $link );
3512  return (
3513  $this->currentRevisionCache &&
3514  $this->currentRevisionCache->has( $key )
3515  );
3516  }
3517 
3526  public static function statelessFetchRevisionRecord( LinkTarget $link, $parser = null ) {
3527  if ( $link instanceof PageIdentity ) {
3528  // probably a Title, just use it.
3529  $page = $link;
3530  } else {
3531  // XXX: use RevisionStore::getPageForLink()!
3532  // ...but get the info for the current revision at the same time?
3533  // Should RevisionStore::getKnownCurrentRevision accept a LinkTarget?
3534  $page = Title::castFromLinkTarget( $link );
3535  }
3536 
3537  $revRecord = MediaWikiServices::getInstance()
3538  ->getRevisionLookup()
3539  ->getKnownCurrentRevision( $page );
3540  return $revRecord;
3541  }
3542 
3549  public function fetchTemplateAndTitle( LinkTarget $link ) {
3550  // Use Title for compatibility with callbacks and return type
3551  $title = Title::castFromLinkTarget( $link );
3552 
3553  // Defaults to Parser::statelessFetchTemplate()
3554  $templateCb = $this->mOptions->getTemplateCallback();
3555  $stuff = call_user_func( $templateCb, $title, $this );
3556  if ( isset( $stuff['revision-record'] ) ) {
3557  $revRecord = $stuff['revision-record'];
3558  } else {
3559  $revRecord = null;
3560  }
3561 
3562  $text = $stuff['text'];
3563  if ( is_string( $stuff['text'] ) ) {
3564  // We use U+007F DELETE to distinguish strip markers from regular text
3565  $text = strtr( $text, "\x7f", "?" );
3566  }
3567  $finalTitle = $stuff['finalTitle'] ?? $title;
3568  foreach ( ( $stuff['deps'] ?? [] ) as $dep ) {
3569  $this->mOutput->addTemplate( $dep['title'], $dep['page_id'], $dep['rev_id'] );
3570  if ( $dep['title']->equals( $this->getTitle() ) && $revRecord instanceof RevisionRecord ) {
3571  // Self-transclusion; final result may change based on the new page version
3572  try {
3573  $sha1 = $revRecord->getSha1();
3574  } catch ( RevisionAccessException $e ) {
3575  $sha1 = null;
3576  }
3577  $this->setOutputFlag( ParserOutputFlags::VARY_REVISION_SHA1, 'Self transclusion' );
3578  $this->getOutput()->setRevisionUsedSha1Base36( $sha1 );
3579  }
3580  }
3581 
3582  return [ $text, $finalTitle ];
3583  }
3584 
3595  public static function statelessFetchTemplate( $page, $parser = false ) {
3596  $title = Title::castFromLinkTarget( $page ); // for compatibility with return type
3597  $text = $skip = false;
3598  $finalTitle = $title;
3599  $deps = [];
3600  $revRecord = null;
3601  $contextTitle = $parser ? $parser->getTitle() : null;
3602 
3603  # Loop to fetch the article, with up to 2 redirects
3604  $revLookup = MediaWikiServices::getInstance()->getRevisionLookup();
3605  for ( $i = 0; $i < 3 && is_object( $title ); $i++ ) {
3606  # Give extensions a chance to select the revision instead
3607  $revRecord = null; # Assume no hook
3608  $id = false; # Assume current
3609  $origTitle = $title;
3610  $titleChanged = false;
3611  Hooks::runner()->onBeforeParserFetchTemplateRevisionRecord(
3612  # The $title is a not a PageIdentity, as it may
3613  # contain fragments or even represent an attempt to transclude
3614  # a broken or otherwise-missing Title, which the hook may
3615  # fix up. Similarly, the $contextTitle may represent a special
3616  # page or other page which "exists" as a parsing context but
3617  # is not in the DB.
3618  $contextTitle, $title,
3619  $skip, $revRecord
3620  );
3621  if ( !$skip && !$revRecord ) {
3622  # Deprecated legacy hook
3623  Hooks::runner()->onBeforeParserFetchTemplateAndtitle(
3624  $parser, $title, $skip, $id
3625  );
3626  }
3627 
3628  if ( $skip ) {
3629  $text = false;
3630  $deps[] = [
3631  'title' => $title,
3632  'page_id' => $title->getArticleID(),
3633  'rev_id' => null
3634  ];
3635  break;
3636  }
3637  # Get the revision
3638  if ( !$revRecord ) {
3639  if ( $id ) {
3640  # Handle $id returned by deprecated legacy hook
3641  $revRecord = $revLookup->getRevisionById( $id );
3642  } elseif ( $parser ) {
3643  $revRecord = $parser->fetchCurrentRevisionRecordOfTitle( $title );
3644  } else {
3645  $revRecord = $revLookup->getRevisionByTitle( $title );
3646  }
3647  }
3648  if ( $revRecord ) {
3649  # Update title, as $revRecord may have been changed by hook
3651  $revRecord->getPageAsLinkTarget()
3652  );
3653  $deps[] = [
3654  'title' => $title,
3655  'page_id' => $revRecord->getPageId(),
3656  'rev_id' => $revRecord->getId(),
3657  ];
3658  } else {
3659  $deps[] = [
3660  'title' => $title,
3661  'page_id' => $title->getArticleID(),
3662  'rev_id' => null,
3663  ];
3664  }
3665  if ( !$title->equals( $origTitle ) ) {
3666  # If we fetched a rev from a different title, register
3667  # the original title too...
3668  $deps[] = [
3669  'title' => $origTitle,
3670  'page_id' => $origTitle->getArticleID(),
3671  'rev_id' => null,
3672  ];
3673  $titleChanged = true;
3674  }
3675  # If there is no current revision, there is no page
3676  if ( $revRecord === null || $revRecord->getId() === null ) {
3677  $linkCache = MediaWikiServices::getInstance()->getLinkCache();
3678  $linkCache->addBadLinkObj( $title );
3679  }
3680  if ( $revRecord ) {
3681  if ( $titleChanged && !$revRecord->hasSlot( SlotRecord::MAIN ) ) {
3682  // We've added this (missing) title to the dependencies;
3683  // give the hook another chance to redirect it to an
3684  // actual page.
3685  $text = false;
3686  $finalTitle = $title;
3687  continue;
3688  }
3689  if ( $revRecord->hasSlot( SlotRecord::MAIN ) ) { // T276476
3690  $content = $revRecord->getContent( SlotRecord::MAIN );
3691  $text = $content ? $content->getWikitextForTransclusion() : null;
3692  } else {
3693  $text = false;
3694  }
3695 
3696  if ( $text === false || $text === null ) {
3697  $text = false;
3698  break;
3699  }
3700  } elseif ( $title->getNamespace() === NS_MEDIAWIKI ) {
3701  $message = wfMessage( MediaWikiServices::getInstance()->getContentLanguage()->
3702  lcfirst( $title->getText() ) )->inContentLanguage();
3703  if ( !$message->exists() ) {
3704  $text = false;
3705  break;
3706  }
3707  $text = $message->plain();
3708  break;
3709  } else {
3710  break;
3711  }
3712  if ( !$content ) {
3713  break;
3714  }
3715  # Redirect?
3716  $finalTitle = $title;
3717  $title = $content->getRedirectTarget();
3718  }
3719 
3720  $retValues = [
3721  // previously, when this also returned a Revision object, we set
3722  // 'revision-record' to false instead of null if it was unavailable,
3723  // so that callers to use isset and then rely on the revision-record
3724  // key instead of the revision key, even if there was no corresponding
3725  // object - we continue to set to false here for backwards compatability
3726  'revision-record' => $revRecord ?: false,
3727  'text' => $text,
3728  'finalTitle' => $finalTitle,
3729  'deps' => $deps
3730  ];
3731  return $retValues;
3732  }
3733 
3742  public function fetchFileAndTitle( LinkTarget $link, array $options = [] ) {
3743  $file = $this->fetchFileNoRegister( $link, $options );
3744 
3745  $time = $file ? $file->getTimestamp() : false;
3746  $sha1 = $file ? $file->getSha1() : false;
3747  # Register the file as a dependency...
3748  $this->mOutput->addImage( $link->getDBkey(), $time, $sha1 );
3749  if ( $file && !$link->isSameLinkAs( $file->getTitle() ) ) {
3750  # Update fetched file title
3751  $page = $file->getTitle();
3752  $this->mOutput->addImage( $page->getDBkey(), $time, $sha1 );
3753  }
3754 
3755  $title = Title::castFromLinkTarget( $link ); // for return type compat
3756  return [ $file, $title ];
3757  }
3758 
3769  protected function fetchFileNoRegister( LinkTarget $link, array $options = [] ) {
3770  if ( isset( $options['broken'] ) ) {
3771  $file = false; // broken thumbnail forced by hook
3772  } else {
3773  $repoGroup = MediaWikiServices::getInstance()->getRepoGroup();
3774  if ( isset( $options['sha1'] ) ) { // get by (sha1,timestamp)
3775  $file = $repoGroup->findFileFromKey( $options['sha1'], $options );
3776  } else { // get by (name,timestamp)
3777  $file = $repoGroup->findFile( $link, $options );
3778  }
3779  }
3780  return $file;
3781  }
3782 
3792  public function interwikiTransclude( LinkTarget $link, $action ) {
3793  if ( !$this->svcOptions->get( 'EnableScaryTranscluding' ) ) {
3794  return wfMessage( 'scarytranscludedisabled' )->inContentLanguage()->text();
3795  }
3796 
3797  // TODO: extract relevant functionality from Title
3798  $title = Title::castFromLinkTarget( $link );
3799 
3800  $url = $title->getFullURL( [ 'action' => $action ] );
3801  if ( strlen( $url ) > 1024 ) {
3802  return wfMessage( 'scarytranscludetoolong' )->inContentLanguage()->text();
3803  }
3804 
3805  $wikiId = $title->getTransWikiID(); // remote wiki ID or false
3806 
3807  $fname = __METHOD__;
3808  $cache = MediaWikiServices::getInstance()->getMainWANObjectCache();
3809 
3810  $data = $cache->getWithSetCallback(
3811  $cache->makeGlobalKey(
3812  'interwiki-transclude',
3813  ( $wikiId !== false ) ? $wikiId : 'external',
3814  sha1( $url )
3815  ),
3816  $this->svcOptions->get( 'TranscludeCacheExpiry' ),
3817  function ( $oldValue, &$ttl ) use ( $url, $fname, $cache ) {
3818  $req = $this->httpRequestFactory->create( $url, [], $fname );
3819 
3820  $status = $req->execute(); // Status object
3821  if ( !$status->isOK() ) {
3822  $ttl = $cache::TTL_UNCACHEABLE;
3823  } elseif ( $req->getResponseHeader( 'X-Database-Lagged' ) !== null ) {
3824  $ttl = min( $cache::TTL_LAGGED, $ttl );
3825  }
3826 
3827  return [
3828  'text' => $status->isOK() ? $req->getContent() : null,
3829  'code' => $req->getStatus()
3830  ];
3831  },
3832  [
3833  'checkKeys' => ( $wikiId !== false )
3834  ? [ $cache->makeGlobalKey( 'interwiki-page', $wikiId, $title->getDBkey() ) ]
3835  : [],
3836  'pcGroup' => 'interwiki-transclude:5',
3837  'pcTTL' => $cache::TTL_PROC_LONG
3838  ]
3839  );
3840 
3841  if ( is_string( $data['text'] ) ) {
3842  $text = $data['text'];
3843  } elseif ( $data['code'] != 200 ) {
3844  // Though we failed to fetch the content, this status is useless.
3845  $text = wfMessage( 'scarytranscludefailed-httpstatus' )
3846  ->params( $url, $data['code'] )->inContentLanguage()->text();
3847  } else {
3848  $text = wfMessage( 'scarytranscludefailed', $url )->inContentLanguage()->text();
3849  }
3850 
3851  return $text;
3852  }
3853 
3864  public function argSubstitution( array $piece, PPFrame $frame ) {
3865  $error = false;
3866  $parts = $piece['parts'];
3867  $nameWithSpaces = $frame->expand( $piece['title'] );
3868  $argName = trim( $nameWithSpaces );
3869  $object = false;
3870  $text = $frame->getArgument( $argName );
3871  if ( $text === false && $parts->getLength() > 0
3872  && ( $this->ot['html']
3873  || $this->ot['pre']
3874  || ( $this->ot['wiki'] && $frame->isTemplate() )
3875  )
3876  ) {
3877  # No match in frame, use the supplied default
3878  $object = $parts->item( 0 )->getChildren();
3879  }
3880  if ( !$this->incrementIncludeSize( 'arg', strlen( $text ) ) ) {
3881  $error = '<!-- WARNING: argument omitted, expansion size too large -->';
3882  $this->limitationWarn( 'post-expand-template-argument' );
3883  }
3884 
3885  if ( $text === false && $object === false ) {
3886  # No match anywhere
3887  $object = $frame->virtualBracketedImplode( '{{{', '|', '}}}', $nameWithSpaces, $parts );
3888  }
3889  if ( $error !== false ) {
3890  $text .= $error;
3891  }
3892  if ( $object !== false ) {
3893  $ret = [ 'object' => $object ];
3894  } else {
3895  $ret = [ 'text' => $text ];
3896  }
3897 
3898  return $ret;
3899  }
3900 
3918  public function extensionSubstitution( array $params, PPFrame $frame ) {
3919  static $errorStr = '<span class="error">';
3920  static $errorLen = 20;
3921 
3922  $name = $frame->expand( $params['name'] );
3923  if ( substr( $name, 0, $errorLen ) === $errorStr ) {
3924  // Probably expansion depth or node count exceeded. Just punt the
3925  // error up.
3926  return $name;
3927  }
3928 
3929  $attrText = !isset( $params['attr'] ) ? null : $frame->expand( $params['attr'] );
3930  if ( substr( $attrText, 0, $errorLen ) === $errorStr ) {
3931  // See above
3932  return $attrText;
3933  }
3934 
3935  // We can't safely check if the expansion for $content resulted in an
3936  // error, because the content could happen to be the error string
3937  // (T149622).
3938  $content = !isset( $params['inner'] ) ? null : $frame->expand( $params['inner'] );
3939 
3940  $marker = self::MARKER_PREFIX . "-$name-"
3941  . sprintf( '%08X', $this->mMarkerIndex++ ) . self::MARKER_SUFFIX;
3942 
3943  $markerType = 'general';
3944  if ( $this->ot['html'] ) {
3945  $name = strtolower( $name );
3946  $attributes = Sanitizer::decodeTagAttributes( $attrText );
3947  if ( isset( $params['attributes'] ) ) {
3948  $attributes += $params['attributes'];
3949  }
3950 
3951  if ( isset( $this->mTagHooks[$name] ) ) {
3952  // Note that $content may be null here, for example if the
3953  // tag is self-closed.
3954  $output = call_user_func_array( $this->mTagHooks[$name],
3955  [ $content, $attributes, $this, $frame ] );
3956  } else {
3957  $output = '<span class="error">Invalid tag extension name: ' .
3958  htmlspecialchars( $name ) . '</span>';
3959  }
3960 
3961  if ( is_array( $output ) ) {
3962  // Extract flags
3963  $flags = $output;
3964  $output = $flags[0];
3965  if ( isset( $flags['markerType'] ) ) {
3966  $markerType = $flags['markerType'];
3967  }
3968  }
3969  } else {
3970  if ( $attrText === null ) {
3971  $attrText = '';
3972  }
3973  if ( isset( $params['attributes'] ) ) {
3974  foreach ( $params['attributes'] as $attrName => $attrValue ) {
3975  $attrText .= ' ' . htmlspecialchars( $attrName ) . '="' .
3976  htmlspecialchars( $attrValue ) . '"';
3977  }
3978  }
3979  if ( $content === null ) {
3980  $output = "<$name$attrText/>";
3981  } else {
3982  $close = $params['close'] === null ? '' : $frame->expand( $params['close'] );
3983  if ( substr( $close, 0, $errorLen ) === $errorStr ) {
3984  // See above
3985  return $close;
3986  }
3987  $output = "<$name$attrText>$content$close";
3988  }
3989  }
3990 
3991  if ( $markerType === 'none' ) {
3992  return $output;
3993  } elseif ( $markerType === 'nowiki' ) {
3994  $this->mStripState->addNoWiki( $marker, $output );
3995  } elseif ( $markerType === 'general' ) {
3996  $this->mStripState->addGeneral( $marker, $output );
3997  } else {
3998  throw new MWException( __METHOD__ . ': invalid marker type' );
3999  }
4000  return $marker;
4001  }
4002 
4010  private function incrementIncludeSize( $type, $size ) {
4011  if ( $this->mIncludeSizes[$type] + $size > $this->mOptions->getMaxIncludeSize() ) {
4012  return false;
4013  } else {
4014  $this->mIncludeSizes[$type] += $size;
4015  return true;
4016  }
4017  }
4018 
4024  $this->mExpensiveFunctionCount++;
4025  return $this->mExpensiveFunctionCount <= $this->mOptions->getExpensiveParserFunctionLimit();
4026  }
4027 
4035  private function handleDoubleUnderscore( $text ) {
4036  # The position of __TOC__ needs to be recorded
4037  $mw = $this->magicWordFactory->get( 'toc' );
4038  if ( $mw->match( $text ) ) {
4039  $this->mShowToc = true;
4040  $this->mForceTocPosition = true;
4041 
4042  # Set a placeholder. At the end we'll fill it in with the TOC.
4043  $text = $mw->replace( self::TOC_PLACEHOLDER, $text, 1 );
4044 
4045  # Only keep the first one.
4046  $text = $mw->replace( '', $text );
4047  }
4048 
4049  # Now match and remove the rest of them
4050  $mwa = $this->magicWordFactory->getDoubleUnderscoreArray();
4051  $this->mDoubleUnderscores = $mwa->matchAndRemove( $text );
4052 
4053  if ( isset( $this->mDoubleUnderscores['nogallery'] ) ) {
4054  $this->mOutput->setNoGallery( true );
4055  }
4056  if ( isset( $this->mDoubleUnderscores['notoc'] ) && !$this->mForceTocPosition ) {
4057  $this->mShowToc = false;
4058  }
4059  if ( isset( $this->mDoubleUnderscores['hiddencat'] )
4060  && $this->getTitle()->getNamespace() === NS_CATEGORY
4061  ) {
4062  $this->addTrackingCategory( 'hidden-category-category' );
4063  }
4064  # (T10068) Allow control over whether robots index a page.
4065  # __INDEX__ always overrides __NOINDEX__, see T16899
4066  if ( isset( $this->mDoubleUnderscores['noindex'] ) && $this->getTitle()->canUseNoindex() ) {
4067  $this->mOutput->setIndexPolicy( 'noindex' );
4068  $this->addTrackingCategory( 'noindex-category' );
4069  }
4070  if ( isset( $this->mDoubleUnderscores['index'] ) && $this->getTitle()->canUseNoindex() ) {
4071  $this->mOutput->setIndexPolicy( 'index' );
4072  $this->addTrackingCategory( 'index-category' );
4073  }
4074 
4075  # Cache all double underscores in the database
4076  foreach ( $this->mDoubleUnderscores as $key => $val ) {
4077  $this->mOutput->setPageProperty( $key, '' );
4078  }
4079 
4080  return $text;
4081  }
4082 
4089  public function addTrackingCategory( $msg ) {
4090  return $this->trackingCategories->addTrackingCategory(
4091  $this->mOutput, $msg, $this->getPage()
4092  );
4093  }
4094 
4110  private function finalizeHeadings( $text, $origText, $isMain = true ) {
4111  # Inhibit editsection links if requested in the page
4112  if ( isset( $this->mDoubleUnderscores['noeditsection'] ) ) {
4113  $maybeShowEditLink = false;
4114  } else {
4115  $maybeShowEditLink = true; /* Actual presence will depend on post-cache transforms */
4116  }
4117 
4118  # Get all headlines for numbering them and adding funky stuff like [edit]
4119  # links - this is for later, but we need the number of headlines right now
4120  # NOTE: white space in headings have been trimmed in handleHeadings. They shouldn't
4121  # be trimmed here since whitespace in HTML headings is significant.
4122  $matches = [];
4123  $numMatches = preg_match_all(
4124  '/<H(?P<level>[1-6])(?P<attrib>.*?>)(?P<header>[\s\S]*?)<\/H[1-6] *>/i',
4125  $text,
4126  $matches
4127  );
4128 
4129  # if there are fewer than 4 headlines in the article, do not show TOC
4130  # unless it's been explicitly enabled.
4131  $enoughToc = $this->mShowToc &&
4132  ( ( $numMatches >= 4 ) || $this->mForceTocPosition );
4133 
4134  # Allow user to stipulate that a page should have a "new section"
4135  # link added via __NEWSECTIONLINK__
4136  if ( isset( $this->mDoubleUnderscores['newsectionlink'] ) ) {
4137  $this->mOutput->setNewSection( true );
4138  }
4139 
4140  # Allow user to remove the "new section"
4141  # link via __NONEWSECTIONLINK__
4142  if ( isset( $this->mDoubleUnderscores['nonewsectionlink'] ) ) {
4143  $this->mOutput->setHideNewSection( true );
4144  }
4145 
4146  # if the string __FORCETOC__ (not case-sensitive) occurs in the HTML,
4147  # override above conditions and always show TOC above first header
4148  if ( isset( $this->mDoubleUnderscores['forcetoc'] ) ) {
4149  $this->mShowToc = true;
4150  $enoughToc = true;
4151  }
4152 
4153  # headline counter
4154  $headlineCount = 0;
4155  $numVisible = 0;
4156 
4157  # Ugh .. the TOC should have neat indentation levels which can be
4158  # passed to the skin functions. These are determined here
4159  $toc = '';
4160  $full = '';
4161  $head = [];
4162  $sublevelCount = [];
4163  $levelCount = [];
4164  $level = 0;
4165  $prevlevel = 0;
4166  $toclevel = 0;
4167  $prevtoclevel = 0;
4168  $markerRegex = self::MARKER_PREFIX . "-h-(\d+)-" . self::MARKER_SUFFIX;
4169  $baseTitleText = $this->getTitle()->getPrefixedDBkey();
4170  $oldType = $this->mOutputType;
4171  $this->setOutputType( self::OT_WIKI );
4172  $frame = $this->getPreprocessor()->newFrame();
4173  $root = $this->preprocessToDom( $origText );
4174  $node = $root->getFirstChild();
4175  $byteOffset = 0;
4176  $tocraw = [];
4177  $refers = [];
4178 
4179  $headlines = $numMatches !== false ? $matches[3] : [];
4180 
4181  $maxTocLevel = $this->svcOptions->get( 'MaxTocLevel' );
4182  foreach ( $headlines as $headline ) {
4183  $isTemplate = false;
4184  $titleText = false;
4185  $sectionIndex = false;
4186  $numbering = '';
4187  $markerMatches = [];
4188  if ( preg_match( "/^$markerRegex/", $headline, $markerMatches ) ) {
4189  $serial = $markerMatches[1];
4190  list( $titleText, $sectionIndex ) = $this->mHeadings[$serial];
4191  $isTemplate = ( $titleText != $baseTitleText );
4192  $headline = preg_replace( "/^$markerRegex\\s*/", "", $headline );
4193  }
4194 
4195  if ( $toclevel ) {
4196  $prevlevel = $level;
4197  }
4198  $level = $matches[1][$headlineCount];
4199 
4200  if ( $level > $prevlevel ) {
4201  # Increase TOC level
4202  $toclevel++;
4203  $sublevelCount[$toclevel] = 0;
4204  if ( $toclevel < $maxTocLevel ) {
4205  $prevtoclevel = $toclevel;
4206  $toc .= Linker::tocIndent();
4207  $numVisible++;
4208  }
4209  } elseif ( $level < $prevlevel && $toclevel > 1 ) {
4210  # Decrease TOC level, find level to jump to
4211 
4212  for ( $i = $toclevel; $i > 0; $i-- ) {
4213  // @phan-suppress-next-line PhanTypeInvalidDimOffset
4214  if ( $levelCount[$i] == $level ) {
4215  # Found last matching level
4216  $toclevel = $i;
4217  break;
4218  } elseif ( $levelCount[$i] < $level ) {
4219  // @phan-suppress-previous-line PhanTypeInvalidDimOffset
4220  # Found first matching level below current level
4221  $toclevel = $i + 1;
4222  break;
4223  }
4224  }
4225  if ( $i == 0 ) {
4226  $toclevel = 1;
4227  }
4228  if ( $toclevel < $maxTocLevel ) {
4229  if ( $prevtoclevel < $maxTocLevel ) {
4230  # Unindent only if the previous toc level was shown :p
4231  $toc .= Linker::tocUnindent( $prevtoclevel - $toclevel );
4232  $prevtoclevel = $toclevel;
4233  } else {
4234  $toc .= Linker::tocLineEnd();
4235  }
4236  }
4237  } else {
4238  # No change in level, end TOC line
4239  if ( $toclevel < $maxTocLevel ) {
4240  $toc .= Linker::tocLineEnd();
4241  }
4242  }
4243 
4244  $levelCount[$toclevel] = $level;
4245 
4246  # count number of headlines for each level
4247  $sublevelCount[$toclevel]++;
4248  $dot = 0;
4249  for ( $i = 1; $i <= $toclevel; $i++ ) {
4250  if ( !empty( $sublevelCount[$i] ) ) {
4251  if ( $dot ) {
4252  $numbering .= '.';
4253  }
4254  $numbering .= $this->getTargetLanguage()->formatNum( $sublevelCount[$i] );
4255  $dot = 1;
4256  }
4257  }
4258 
4259  # The safe header is a version of the header text safe to use for links
4260 
4261  # Remove link placeholders by the link text.
4262  # <!--LINK number-->
4263  # turns into
4264  # link text with suffix
4265  # Do this before unstrip since link text can contain strip markers
4266  $safeHeadline = $this->replaceLinkHoldersText( $headline );
4267 
4268  # Avoid insertion of weird stuff like <math> by expanding the relevant sections
4269  $safeHeadline = $this->mStripState->unstripBoth( $safeHeadline );
4270 
4271  # Remove any <style> or <script> tags (T198618)
4272  $safeHeadline = preg_replace(
4273  '#<(style|script)(?: [^>]*[^>/])?>.*?</\1>#is',
4274  '',
4275  $safeHeadline
4276  );
4277 
4278  # Strip out HTML (first regex removes any tag not allowed)
4279  # Allowed tags are:
4280  # * <sup> and <sub> (T10393)
4281  # * <i> (T28375)
4282  # * <b> (r105284)
4283  # * <bdi> (T74884)
4284  # * <span dir="rtl"> and <span dir="ltr"> (T37167)
4285  # * <s> and <strike> (T35715)
4286  # We strip any parameter from accepted tags (second regex), except dir="rtl|ltr" from <span>,
4287  # to allow setting directionality in toc items.
4288  $tocline = preg_replace(
4289  [
4290  '#<(?!/?(span|sup|sub|bdi|i|b|s|strike)(?: [^>]*)?>).*?>#',
4291  '#<(/?(?:span(?: dir="(?:rtl|ltr)")?|sup|sub|bdi|i|b|s|strike))(?: .*?)?>#'
4292  ],
4293  [ '', '<$1>' ],
4294  $safeHeadline
4295  );
4296 
4297  # Strip '<span></span>', which is the result from the above if
4298  # <span id="foo"></span> is used to produce an additional anchor
4299  # for a section.
4300  $tocline = str_replace( '<span></span>', '', $tocline );
4301 
4302  $tocline = trim( $tocline );
4303 
4304  # For the anchor, strip out HTML-y stuff period
4305  $safeHeadline = preg_replace( '/<.*?>/', '', $safeHeadline );
4306  $safeHeadline = Sanitizer::normalizeSectionNameWhitespace( $safeHeadline );
4307 
4308  # Save headline for section edit hint before it's escaped
4309  $headlineHint = $safeHeadline;
4310 
4311  # Decode HTML entities
4312  $safeHeadline = Sanitizer::decodeCharReferences( $safeHeadline );
4313 
4314  $safeHeadline = self::normalizeSectionName( $safeHeadline );
4315 
4316  $fallbackHeadline = Sanitizer::escapeIdForAttribute( $safeHeadline, Sanitizer::ID_FALLBACK );
4317  $linkAnchor = Sanitizer::escapeIdForLink( $safeHeadline );
4318  $safeHeadline = Sanitizer::escapeIdForAttribute( $safeHeadline, Sanitizer::ID_PRIMARY );
4319  if ( $fallbackHeadline === $safeHeadline ) {
4320  # No reason to have both (in fact, we can't)
4321  $fallbackHeadline = false;
4322  }
4323 
4324  # HTML IDs must be case-insensitively unique for IE compatibility (T12721).
4325  $arrayKey = strtolower( $safeHeadline );
4326  if ( $fallbackHeadline === false ) {
4327  $fallbackArrayKey = false;
4328  } else {
4329  $fallbackArrayKey = strtolower( $fallbackHeadline );
4330  }
4331 
4332  # Create the anchor for linking from the TOC to the section
4333  $anchor = $safeHeadline;
4334  $fallbackAnchor = $fallbackHeadline;
4335  if ( isset( $refers[$arrayKey] ) ) {
4336  for ( $i = 2; isset( $refers["${arrayKey}_$i"] ); ++$i );
4337  $anchor .= "_$i";
4338  $linkAnchor .= "_$i";
4339  $refers["${arrayKey}_$i"] = true;
4340  } else {
4341  $refers[$arrayKey] = true;
4342  }
4343  if ( $fallbackHeadline !== false && isset( $refers[$fallbackArrayKey] ) ) {
4344  for ( $i = 2; isset( $refers["${fallbackArrayKey}_$i"] ); ++$i );
4345  $fallbackAnchor .= "_$i";
4346  $refers["${fallbackArrayKey}_$i"] = true;
4347  } else {
4348  $refers[$fallbackArrayKey] = true;
4349  }
4350 
4351  if ( $enoughToc && ( !isset( $maxTocLevel ) || $toclevel < $maxTocLevel ) ) {
4352  $toc .= Linker::tocLine(
4353  $linkAnchor,
4354  $tocline,
4355  $numbering,
4356  $toclevel,
4357  ( $isTemplate ? false : $sectionIndex )
4358  );
4359  }
4360 
4361  # Add the section to the section tree
4362  # Find the DOM node for this header
4363  $noOffset = ( $isTemplate || $sectionIndex === false );
4364  while ( $node && !$noOffset ) {
4365  if ( $node->getName() === 'h' ) {
4366  $bits = $node->splitHeading();
4367  if ( $bits['i'] == $sectionIndex ) {
4368  break;
4369  }
4370  }
4371  $byteOffset += mb_strlen(
4372  $this->mStripState->unstripBoth(
4373  $frame->expand( $node, PPFrame::RECOVER_ORIG )
4374  )
4375  );
4376  $node = $node->getNextSibling();
4377  }
4378  $tocraw[] = [
4379  'toclevel' => $toclevel,
4380  'level' => $level,
4381  'line' => $tocline,
4382  'number' => $numbering,
4383  'index' => ( $isTemplate ? 'T-' : '' ) . $sectionIndex,
4384  'fromtitle' => $titleText,
4385  'byteoffset' => ( $noOffset ? null : $byteOffset ),
4386  'anchor' => $anchor,
4387  ];
4388 
4389  # give headline the correct <h#> tag
4390  if ( $maybeShowEditLink && $sectionIndex !== false ) {
4391  // Output edit section links as markers with styles that can be customized by skins
4392  if ( $isTemplate ) {
4393  # Put a T flag in the section identifier, to indicate to extractSections()
4394  # that sections inside <includeonly> should be counted.
4395  $editsectionPage = $titleText;
4396  $editsectionSection = "T-$sectionIndex";
4397  $editsectionContent = null;
4398  } else {
4399  $editsectionPage = $this->getTitle()->getPrefixedText();
4400  $editsectionSection = $sectionIndex;
4401  $editsectionContent = $headlineHint;
4402  }
4403  // We use a bit of pesudo-xml for editsection markers. The
4404  // language converter is run later on. Using a UNIQ style marker
4405  // leads to the converter screwing up the tokens when it
4406  // converts stuff. And trying to insert strip tags fails too. At
4407  // this point all real inputted tags have already been escaped,
4408  // so we don't have to worry about a user trying to input one of
4409  // these markers directly. We use a page and section attribute
4410  // to stop the language converter from converting these
4411  // important bits of data, but put the headline hint inside a
4412  // content block because the language converter is supposed to
4413  // be able to convert that piece of data.
4414  // Gets replaced with html in ParserOutput::getText
4415  $editlink = '<mw:editsection page="' . htmlspecialchars( $editsectionPage );
4416  // @phan-suppress-next-line SecurityCheck-DoubleEscaped
4417  $editlink .= '" section="' . htmlspecialchars( $editsectionSection ) . '"';
4418  if ( $editsectionContent !== null ) {
4419  $editlink .= '>' . $editsectionContent . '</mw:editsection>';
4420  } else {
4421  $editlink .= '/>';
4422  }
4423  } else {
4424  $editlink = '';
4425  }
4426  $head[$headlineCount] = Linker::makeHeadline(
4427  $level,
4428  $matches['attrib'][$headlineCount],
4429  $anchor,
4430  $headline,
4431  $editlink,
4432  $fallbackAnchor
4433  );
4434 
4435  $headlineCount++;
4436  }
4437 
4438  $this->setOutputType( $oldType );
4439 
4440  # Never ever show TOC if no headers
4441  if ( $numVisible < 1 ) {
4442  $enoughToc = false;
4443  }
4444 
4445  if ( $enoughToc ) {
4446  if ( $prevtoclevel > 0 && $prevtoclevel < $maxTocLevel ) {
4447  $toc .= Linker::tocUnindent( $prevtoclevel - 1 );
4448  }
4449  $toc = Linker::tocList( $toc, $this->mOptions->getUserLangObj() );
4450  $this->mOutput->setTOCHTML( $toc );
4451  }
4452 
4453  if ( $isMain ) {
4454  $this->mOutput->setSections( $tocraw );
4455  }
4456 
4457  # split up and insert constructed headlines
4458  $blocks = preg_split( '/<H[1-6].*?>[\s\S]*?<\/H[1-6]>/i', $text );
4459  $i = 0;
4460 
4461  // build an array of document sections
4462  $sections = [];
4463  foreach ( $blocks as $block ) {
4464  // $head is zero-based, sections aren't.
4465  if ( empty( $head[$i - 1] ) ) {
4466  $sections[$i] = $block;
4467  } else {
4468  $sections[$i] = $head[$i - 1] . $block;
4469  }
4470 
4481  $this->hookRunner->onParserSectionCreate( $this, $i, $sections[$i], $maybeShowEditLink );
4482 
4483  $i++;
4484  }
4485 
4486  if ( $enoughToc && $isMain && !$this->mForceTocPosition ) {
4487  // append the TOC at the beginning
4488  // Top anchor now in skin
4489  $sections[0] .= self::TOC_PLACEHOLDER . "\n";
4490  }
4491 
4492  $full .= implode( '', $sections );
4493 
4494  return $full;
4495  }
4496 
4509  public function preSaveTransform(
4510  $text,
4511  PageReference $page,
4512  UserIdentity $user,
4513  ParserOptions $options,
4514  $clearState = true
4515  ) {
4516  if ( $clearState ) {
4517  $magicScopeVariable = $this->lock();
4518  }
4519  $this->startParse( $page, $options, self::OT_WIKI, $clearState );
4520  $this->setUser( $user );
4521 
4522  // Strip U+0000 NULL (T159174)
4523  $text = str_replace( "\000", '', $text );
4524 
4525  // We still normalize line endings (including trimming trailing whitespace) for
4526  // backwards-compatibility with other code that just calls PST, but this should already
4527  // be handled in TextContent subclasses
4528  $text = TextContent::normalizeLineEndings( $text );
4529 
4530  if ( $options->getPreSaveTransform() ) {
4531  $text = $this->pstPass2( $text, $user );
4532  }
4533  $text = $this->mStripState->unstripBoth( $text );
4534 
4535  // Trim trailing whitespace again, because the previous steps can introduce it.
4536  $text = rtrim( $text );
4537 
4538  $this->hookRunner->onParserPreSaveTransformComplete( $this, $text );
4539 
4540  $this->setUser( null ); # Reset
4541 
4542  return $text;
4543  }
4544 
4553  private function pstPass2( $text, UserIdentity $user ) {
4554  # Note: This is the timestamp saved as hardcoded wikitext to the database, we use
4555  # $this->contLang here in order to give everyone the same signature and use the default one
4556  # rather than the one selected in each user's preferences. (see also T14815)
4557  $ts = $this->mOptions->getTimestamp();
4558  $timestamp = MWTimestamp::getLocalInstance( $ts );
4559  $ts = $timestamp->format( 'YmdHis' );
4560  $tzMsg = $timestamp->getTimezoneMessage()->inContentLanguage()->text();
4561 
4562  $d = $this->contLang->timeanddate( $ts, false, false ) . " ($tzMsg)";
4563 
4564  # Variable replacement
4565  # Because mOutputType is OT_WIKI, this will only process {{subst:xxx}} type tags
4566  $text = $this->replaceVariables( $text );
4567 
4568  # This works almost by chance, as the replaceVariables are done before the getUserSig(),
4569  # which may corrupt this parser instance via its wfMessage()->text() call-
4570 
4571  # Signatures
4572  if ( strpos( $text, '~~~' ) !== false ) {
4573  $sigText = $this->getUserSig( $user );
4574  $text = strtr( $text, [
4575  '~~~~~' => $d,
4576  '~~~~' => "$sigText $d",
4577  '~~~' => $sigText
4578  ] );
4579  # The main two signature forms used above are time-sensitive
4580  $this->setOutputFlag( ParserOutputFlags::USER_SIGNATURE, 'User signature detected' );
4581  }
4582 
4583  # Context links ("pipe tricks"): [[|name]] and [[name (context)|]]
4584  $tc = '[' . Title::legalChars() . ']';
4585  $nc = '[ _0-9A-Za-z\x80-\xff-]'; # Namespaces can use non-ascii!
4586 
4587  // [[ns:page (context)|]]
4588  $p1 = "/\[\[(:?$nc+:|:|)($tc+?)( ?\\($tc+\\))\\|]]/";
4589  // [[ns:page(context)|]] (double-width brackets, added in r40257)
4590  $p4 = "/\[\[(:?$nc+:|:|)($tc+?)( ?($tc+))\\|]]/";
4591  // [[ns:page (context), context|]] (using single, double-width or Arabic comma)
4592  $p3 = "/\[\[(:?$nc+:|:|)($tc+?)( ?\\($tc+\\)|)((?:, |,|، )$tc+|)\\|]]/";
4593  // [[|page]] (reverse pipe trick: add context from page title)
4594  $p2 = "/\[\[\\|($tc+)]]/";
4595 
4596  # try $p1 first, to turn "[[A, B (C)|]]" into "[[A, B (C)|A, B]]"
4597  $text = preg_replace( $p1, '[[\\1\\2\\3|\\2]]', $text );
4598  $text = preg_replace( $p4, '[[\\1\\2\\3|\\2]]', $text );
4599  $text = preg_replace( $p3, '[[\\1\\2\\3\\4|\\2]]', $text );
4600 
4601  $t = $this->getTitle()->getText();
4602  $m = [];
4603  if ( preg_match( "/^($nc+:|)$tc+?( \\($tc+\\))$/", $t, $m ) ) {
4604  $text = preg_replace( $p2, "[[$m[1]\\1$m[2]|\\1]]", $text );
4605  } elseif ( preg_match( "/^($nc+:|)$tc+?(, $tc+|)$/", $t, $m ) && "$m[1]$m[2]" != '' ) {
4606  $text = preg_replace( $p2, "[[$m[1]\\1$m[2]|\\1]]", $text );
4607  } else {
4608  # if there's no context, don't bother duplicating the title
4609  $text = preg_replace( $p2, '[[\\1]]', $text );
4610  }
4611 
4612  return $text;
4613  }
4614 
4630  public function getUserSig( UserIdentity $user, $nickname = false, $fancySig = null ) {
4631  $username = $user->getName();
4632 
4633  # If not given, retrieve from the user object.
4634  if ( $nickname === false ) {
4635  $nickname = $this->userOptionsLookup->getOption( $user, 'nickname' );
4636  }
4637 
4638  if ( $fancySig === null ) {
4639  $fancySig = $this->userOptionsLookup->getBoolOption( $user, 'fancysig' );
4640  }
4641 
4642  if ( $nickname === null || $nickname === '' ) {
4643  // Empty value results in the default signature (even when fancysig is enabled)
4644  $nickname = $username;
4645  } elseif ( mb_strlen( $nickname ) > $this->svcOptions->get( 'MaxSigChars' ) ) {
4646  $nickname = $username;
4647  $this->logger->debug( __METHOD__ . ": $username has overlong signature." );
4648  } elseif ( $fancySig !== false ) {
4649  # Sig. might contain markup; validate this
4650  $isValid = $this->validateSig( $nickname ) !== false;
4651 
4652  # New validator
4653  $sigValidation = $this->svcOptions->get( 'SignatureValidation' );
4654  if ( $isValid && $sigValidation === 'disallow' ) {
4655  $services = MediaWikiServices::getInstance();
4656  $parserOpts = $services->getParser()->getOptions();
4657  $validator = $services->getSignatureValidatorFactory()
4658  ->newSignatureValidator( $user, null, $parserOpts );
4659  $isValid = !$validator->validateSignature( $nickname );
4660  }
4661 
4662  if ( $isValid ) {
4663  # Validated; clean up (if needed) and return it
4664  return $this->cleanSig( $nickname, true );
4665  } else {
4666  # Failed to validate; fall back to the default
4667  $nickname = $username;
4668  $this->logger->debug( __METHOD__ . ": $username has invalid signature." );
4669  }
4670  }
4671 
4672  # Make sure nickname doesnt get a sig in a sig
4673  $nickname = self::cleanSigInSig( $nickname );
4674 
4675  # If we're still here, make it a link to the user page
4676  $userText = wfEscapeWikiText( $username );
4677  $nickText = wfEscapeWikiText( $nickname );
4678  $msgName = $user->isRegistered() ? 'signature' : 'signature-anon';
4679 
4680  return wfMessage( $msgName, $userText, $nickText )->inContentLanguage()
4681  ->page( $this->getPage() )->text();
4682  }
4683 
4691  public function validateSig( $text ) {
4692  return Xml::isWellFormedXmlFragment( $text ) ? $text : false;
4693  }
4694 
4706  public function cleanSig( $text, $parsing = false ) {
4707  if ( !$parsing ) {
4708  global $wgTitle;
4709  $magicScopeVariable = $this->lock();
4710  $this->startParse(
4711  $wgTitle,
4714  true
4715  );
4716  }
4717 
4718  # Option to disable this feature
4719  if ( !$this->mOptions->getCleanSignatures() ) {
4720  return $text;
4721  }
4722 
4723  # @todo FIXME: Regex doesn't respect extension tags or nowiki
4724  # => Move this logic to braceSubstitution()
4725  $substWord = $this->magicWordFactory->get( 'subst' );
4726  $substRegex = '/\{\{(?!(?:' . $substWord->getBaseRegex() . '))/x' . $substWord->getRegexCase();
4727  $substText = '{{' . $substWord->getSynonym( 0 );
4728 
4729  $text = preg_replace( $substRegex, $substText, $text );
4730  $text = self::cleanSigInSig( $text );
4731  $dom = $this->preprocessToDom( $text );
4732  $frame = $this->getPreprocessor()->newFrame();
4733  $text = $frame->expand( $dom );
4734 
4735  if ( !$parsing ) {
4736  $text = $this->mStripState->unstripBoth( $text );
4737  }
4738 
4739  return $text;
4740  }
4741 
4749  public static function cleanSigInSig( $text ) {
4750  $text = preg_replace( '/~{3,5}/', '', $text );
4751  return $text;
4752  }
4753 
4770  public static function replaceTableOfContentsMarker( $text, $toc ) {
4771  return str_replace(
4772  self::TOC_PLACEHOLDER,
4773  $toc,
4774  $text
4775  );
4776  }
4777 
4789  public function startExternalParse( ?PageReference $page, ParserOptions $options,
4790  $outputType, $clearState = true, $revId = null
4791  ) {
4792  $this->startParse( $page, $options, $outputType, $clearState );
4793  if ( $revId !== null ) {
4794  $this->mRevisionId = $revId;
4795  }
4796  }
4797 
4804  private function startParse( ?PageReference $page, ParserOptions $options,
4805  $outputType, $clearState = true
4806  ) {
4807  $this->setPage( $page );
4808  $this->mOptions = $options;
4809  $this->setOutputType( $outputType );
4810  if ( $clearState ) {
4811  $this->clearState();
4812  }
4813  }
4814 
4824  public function transformMsg( $text, ParserOptions $options, ?PageReference $page = null ) {
4825  static $executing = false;
4826 
4827  # Guard against infinite recursion
4828  if ( $executing ) {
4829  return $text;
4830  }
4831  $executing = true;
4832 
4833  if ( !$page ) {
4834  global $wgTitle;
4835  $page = $wgTitle;
4836  }
4837 
4838  $text = $this->preprocess( $text, $page, $options );
4839 
4840  $executing = false;
4841  return $text;
4842  }
4843 
4869  public function setHook( $tag, callable $callback ) {
4870  $tag = strtolower( $tag );
4871  if ( preg_match( '/[<>\r\n]/', $tag, $m ) ) {
4872  throw new MWException( "Invalid character {$m[0]} in setHook('$tag', ...) call" );
4873  }
4874  $oldVal = $this->mTagHooks[$tag] ?? null;
4875  $this->mTagHooks[$tag] = $callback;
4876  if ( !in_array( $tag, $this->mStripList ) ) {
4877  $this->mStripList[] = $tag;
4878  }
4879 
4880  return $oldVal;
4881  }
4882 
4887  public function clearTagHooks() {
4888  $this->mTagHooks = [];
4889  $this->mStripList = [];
4890  }
4891 
4936  public function setFunctionHook( $id, callable $callback, $flags = 0 ) {
4937  $oldVal = $this->mFunctionHooks[$id][0] ?? null;
4938  $this->mFunctionHooks[$id] = [ $callback, $flags ];
4939 
4940  # Add to function cache
4941  $mw = $this->magicWordFactory->get( $id );
4942  if ( !$mw ) {
4943  throw new MWException( __METHOD__ . '() expecting a magic word identifier.' );
4944  }
4945 
4946  $synonyms = $mw->getSynonyms();
4947  $sensitive = intval( $mw->isCaseSensitive() );
4948 
4949  foreach ( $synonyms as $syn ) {
4950  # Case
4951  if ( !$sensitive ) {
4952  $syn = $this->contLang->lc( $syn );
4953  }
4954  # Add leading hash
4955  if ( !( $flags & self::SFH_NO_HASH ) ) {
4956  $syn = '#' . $syn;
4957  }
4958  # Remove trailing colon
4959  if ( substr( $syn, -1, 1 ) === ':' ) {
4960  $syn = substr( $syn, 0, -1 );
4961  }
4962  $this->mFunctionSynonyms[$sensitive][$syn] = $id;
4963  }
4964  return $oldVal;
4965  }
4966 
4973  public function getFunctionHooks() {
4974  return array_keys( $this->mFunctionHooks );
4975  }
4976 
4985  public function replaceLinkHolders( &$text, $options = 0 ) {
4986  $this->replaceLinkHoldersPrivate( $text, $options );
4987  }
4988 
4996  private function replaceLinkHoldersPrivate( &$text, $options = 0 ) {
4997  $this->mLinkHolders->replace( $text );
4998  }
4999 
5007  private function replaceLinkHoldersText( $text ) {
5008  return $this->mLinkHolders->replaceText( $text );
5009  }
5010 
5025  public function renderImageGallery( $text, array $params ) {
5026  $mode = false;
5027  if ( isset( $params['mode'] ) ) {
5028  $mode = $params['mode'];
5029  }
5030 
5031  try {
5032  $ig = ImageGalleryBase::factory( $mode );
5033  } catch ( Exception $e ) {
5034  // If invalid type set, fallback to default.
5035  $ig = ImageGalleryBase::factory( false );
5036  }
5037 
5038  $ig->setContextTitle( $this->getTitle() );
5039  $ig->setShowBytes( false );
5040  $ig->setShowDimensions( false );
5041  $ig->setShowFilename( false );
5042  $ig->setParser( $this );
5043  $ig->setHideBadImages();
5044  $ig->setAttributes( Sanitizer::validateTagAttributes( $params, 'ul' ) );
5045 
5046  if ( isset( $params['showfilename'] ) ) {
5047  $ig->setShowFilename( true );
5048  } else {
5049  $ig->setShowFilename( false );
5050  }
5051  if ( isset( $params['caption'] ) ) {
5052  // NOTE: We aren't passing a frame here or below. Frame info
5053  // is currently opaque to Parsoid, which acts on OT_PREPROCESS.
5054  // See T107332#4030581
5055  $caption = $this->recursiveTagParse( $params['caption'] );
5056  $ig->setCaptionHtml( $caption );
5057  }
5058  if ( isset( $params['perrow'] ) ) {
5059  $ig->setPerRow( $params['perrow'] );
5060  }
5061  if ( isset( $params['widths'] ) ) {
5062  $ig->setWidths( $params['widths'] );
5063  }
5064  if ( isset( $params['heights'] ) ) {
5065  $ig->setHeights( $params['heights'] );
5066  }
5067  $ig->setAdditionalOptions( $params );
5068 
5069  $this->hookRunner->onBeforeParserrenderImageGallery( $this, $ig );
5070 
5071  $lines = StringUtils::explode( "\n", $text );
5072  foreach ( $lines as $line ) {
5073  # match lines like these:
5074  # Image:someimage.jpg|This is some image
5075  $matches = [];
5076  preg_match( "/^([^|]+)(\\|(.*))?$/", $line, $matches );
5077  # Skip empty lines
5078  if ( count( $matches ) == 0 ) {
5079  continue;
5080  }
5081 
5082  if ( strpos( $matches[0], '%' ) !== false ) {
5083  $matches[1] = rawurldecode( $matches[1] );
5084  }
5086  if ( $title === null ) {
5087  # Bogus title. Ignore these so we don't bomb out later.
5088  continue;
5089  }
5090 
5091  # We need to get what handler the file uses, to figure out parameters.
5092  # Note, a hook can overide the file name, and chose an entirely different
5093  # file (which potentially could be of a different type and have different handler).
5094  $options = [];
5095  $descQuery = false;
5096  $this->hookRunner->onBeforeParserFetchFileAndTitle(
5097  $this, $title, $options, $descQuery );
5098  # Don't register it now, as TraditionalImageGallery does that later.
5099  $file = $this->fetchFileNoRegister( $title, $options );
5100  $handler = $file ? $file->getHandler() : false;
5101 
5102  $paramMap = [
5103  'img_alt' => 'gallery-internal-alt',
5104  'img_link' => 'gallery-internal-link',
5105  ];
5106  if ( $handler ) {
5107  $paramMap += $handler->getParamMap();
5108  // We don't want people to specify per-image widths.
5109  // Additionally the width parameter would need special casing anyhow.
5110  unset( $paramMap['img_width'] );
5111  }
5112 
5113  $mwArray = $this->magicWordFactory->newArray( array_keys( $paramMap ) );
5114 
5115  $label = '';
5116  $alt = '';
5117  $link = '';
5118  $handlerOptions = [];
5119  if ( isset( $matches[3] ) ) {
5120  // look for an |alt= definition while trying not to break existing
5121  // captions with multiple pipes (|) in it, until a more sensible grammar
5122  // is defined for images in galleries
5123 
5124  // FIXME: Doing recursiveTagParse at this stage, and the trim before
5125  // splitting on '|' is a bit odd, and different from makeImage.
5126  $matches[3] = $this->recursiveTagParse( trim( $matches[3] ) );
5127  // Protect LanguageConverter markup
5128  $parameterMatches = StringUtils::delimiterExplode(
5129  '-{', '}-',
5130  '|',
5131  $matches[3],
5132  true /* nested */
5133  );
5134 
5135  foreach ( $parameterMatches as $parameterMatch ) {
5136  list( $magicName, $match ) = $mwArray->matchVariableStartToEnd( $parameterMatch );
5137  if ( !$magicName ) {
5138  // Last pipe wins.
5139  $label = $parameterMatch;
5140  continue;
5141  }
5142 
5143  $paramName = $paramMap[$magicName];
5144  switch ( $paramName ) {
5145  case 'gallery-internal-alt':
5146  $alt = $this->stripAltText( $match, false );
5147  break;
5148  case 'gallery-internal-link':
5149  $linkValue = $this->stripAltText( $match, false );
5150  if ( preg_match( '/^-{R\|(.*)}-$/', $linkValue ) ) {
5151  // Result of LanguageConverter::markNoConversion
5152  // invoked on an external link.
5153  $linkValue = substr( $linkValue, 4, -2 );
5154  }
5155  list( $type, $target ) = $this->parseLinkParameter( $linkValue );
5156  if ( $type === 'link-url' ) {
5157  $link = $target;
5158  $this->mOutput->addExternalLink( $target );
5159  } elseif ( $type === 'link-title' ) {
5160  $link = $target->getLinkURL();
5161  $this->mOutput->addLink( $target );
5162  }
5163  break;
5164  default:
5165  // Must be a handler specific parameter.
5166  if ( $handler->validateParam( $paramName, $match ) ) {
5167  $handlerOptions[$paramName] = $match;
5168  } else {
5169  // Guess not, consider it as caption.
5170  $this->logger->debug(
5171  "$parameterMatch failed parameter validation" );
5172  $label = $parameterMatch;
5173  }
5174  }
5175  }
5176  }
5177 
5178  $ig->add( $title, $label, $alt, $link, $handlerOptions );
5179  }
5180  $html = $ig->toHTML();
5181  $this->hookRunner->onAfterParserFetchFileAndTitle( $this, $ig, $html );
5182  return $html;
5183  }
5184 
5189  private function getImageParams( $handler ) {
5190  if ( $handler ) {
5191  $handlerClass = get_class( $handler );
5192  } else {
5193  $handlerClass = '';
5194  }
5195  if ( !isset( $this->mImageParams[$handlerClass] ) ) {
5196  # Initialise static lists
5197  static $internalParamNames = [
5198  'horizAlign' => [ 'left', 'right', 'center', 'none' ],
5199  'vertAlign' => [ 'baseline', 'sub', 'super', 'top', 'text-top', 'middle',
5200  'bottom', 'text-bottom' ],
5201  'frame' => [ 'thumbnail', 'manualthumb', 'framed', 'frameless',
5202  'upright', 'border', 'link', 'alt', 'class' ],
5203  ];
5204  static $internalParamMap;
5205  if ( !$internalParamMap ) {
5206  $internalParamMap = [];
5207  foreach ( $internalParamNames as $type => $names ) {
5208  foreach ( $names as $name ) {
5209  // For grep: img_left, img_right, img_center, img_none,
5210  // img_baseline, img_sub, img_super, img_top, img_text_top, img_middle,
5211  // img_bottom, img_text_bottom,
5212  // img_thumbnail, img_manualthumb, img_framed, img_frameless, img_upright,
5213  // img_border, img_link, img_alt, img_class
5214  $magicName = str_replace( '-', '_', "img_$name" );
5215  $internalParamMap[$magicName] = [ $type, $name ];
5216  }
5217  }
5218  }
5219 
5220  # Add handler params
5221  $paramMap = $internalParamMap;
5222  if ( $handler ) {
5223  $handlerParamMap = $handler->getParamMap();
5224  foreach ( $handlerParamMap as $magic => $paramName ) {
5225  $paramMap[$magic] = [ 'handler', $paramName ];
5226  }
5227  } else {
5228  // Parse the size for non-existent files. See T273013
5229  $paramMap[ 'img_width' ] = [ 'handler', 'width' ];
5230  }
5231  $this->mImageParams[$handlerClass] = $paramMap;
5232  $this->mImageParamsMagicArray[$handlerClass] =
5233  $this->magicWordFactory->newArray( array_keys( $paramMap ) );
5234  }
5235  return [ $this->mImageParams[$handlerClass], $this->mImageParamsMagicArray[$handlerClass] ];
5236  }
5237 
5247  public function makeImage( LinkTarget $link, $options, $holders = false ) {
5248  # Check if the options text is of the form "options|alt text"
5249  # Options are:
5250  # * thumbnail make a thumbnail with enlarge-icon and caption, alignment depends on lang
5251  # * left no resizing, just left align. label is used for alt= only
5252  # * right same, but right aligned
5253  # * none same, but not aligned
5254  # * ___px scale to ___ pixels width, no aligning. e.g. use in taxobox
5255  # * center center the image
5256  # * frame Keep original image size, no magnify-button.
5257  # * framed Same as "frame"
5258  # * frameless like 'thumb' but without a frame. Keeps user preferences for width
5259  # * upright reduce width for upright images, rounded to full __0 px
5260  # * border draw a 1px border around the image
5261  # * alt Text for HTML alt attribute (defaults to empty)
5262  # * class Set a class for img node
5263  # * link Set the target of the image link. Can be external, interwiki, or local
5264  # vertical-align values (no % or length right now):
5265  # * baseline
5266  # * sub
5267  # * super
5268  # * top
5269  # * text-top
5270  # * middle
5271  # * bottom
5272  # * text-bottom
5273 
5274  # Protect LanguageConverter markup when splitting into parts
5276  '-{', '}-', '|', $options, true /* allow nesting */
5277  );
5278 
5279  # Give extensions a chance to select the file revision for us
5280  $options = [];
5281  $descQuery = false;
5282  $title = Title::castFromLinkTarget( $link ); // hook signature compat
5283  $this->hookRunner->onBeforeParserFetchFileAndTitle(
5284  $this, $title, $options, $descQuery );
5285  # Fetch and register the file (file title may be different via hooks)
5286  list( $file, $link ) = $this->fetchFileAndTitle( $link, $options );
5287 
5288  # Get parameter map
5289  $handler = $file ? $file->getHandler() : false;
5290 
5291  list( $paramMap, $mwArray ) = $this->getImageParams( $handler );
5292 
5293  if ( !$file ) {
5294  $this->addTrackingCategory( 'broken-file-category' );
5295  }
5296 
5297  # Process the input parameters
5298  $caption = '';
5299  $params = [ 'frame' => [], 'handler' => [],
5300  'horizAlign' => [], 'vertAlign' => [] ];
5301  $seenformat = false;
5302  foreach ( $parts as $part ) {
5303  $part = trim( $part );
5304  list( $magicName, $value ) = $mwArray->matchVariableStartToEnd( $part );
5305  $validated = false;
5306  if ( isset( $paramMap[$magicName] ) ) {
5307  list( $type, $paramName ) = $paramMap[$magicName];
5308 
5309  # Special case; width and height come in one variable together
5310  if ( $type === 'handler' && $paramName === 'width' ) {
5311  $parsedWidthParam = self::parseWidthParam( $value );
5312  // Parsoid applies data-(width|height) attributes to broken
5313  // media spans, for client use. See T273013
5314  $validateFunc = static function ( $name, $value ) use ( $handler ) {
5315  return $handler
5316  ? $handler->validateParam( $name, $value )
5317  : $value > 0;
5318  };
5319  if ( isset( $parsedWidthParam['width'] ) ) {
5320  $width = $parsedWidthParam['width'];
5321  if ( $validateFunc( 'width', $width ) ) {
5322  $params[$type]['width'] = $width;
5323  $validated = true;
5324  }
5325  }
5326  if ( isset( $parsedWidthParam['height'] ) ) {
5327  $height = $parsedWidthParam['height'];
5328  if ( $validateFunc( 'height', $height ) ) {
5329  $params[$type]['height'] = $height;
5330  $validated = true;
5331  }
5332  }
5333  # else no validation -- T15436
5334  } else {
5335  if ( $type === 'handler' ) {
5336  # Validate handler parameter
5337  $validated = $handler->validateParam( $paramName, $value );
5338  } else {
5339  # Validate internal parameters
5340  switch ( $paramName ) {
5341  case 'manualthumb':
5342  case 'alt':
5343  case 'class':
5344  # @todo FIXME: Possibly check validity here for
5345  # manualthumb? downstream behavior seems odd with
5346  # missing manual thumbs.
5347  $validated = true;
5348  $value = $this->stripAltText( $value, $holders );
5349  break;
5350  case 'link':
5351  list( $paramName, $value ) =
5352  $this->parseLinkParameter(
5353  $this->stripAltText( $value, $holders )
5354  );
5355  if ( $paramName ) {
5356  $validated = true;
5357  if ( $paramName === 'no-link' ) {
5358  $value = true;
5359  }
5360  if ( ( $paramName === 'link-url' ) && $this->mOptions->getExternalLinkTarget() ) {
5361  $params[$type]['link-target'] = $this->mOptions->getExternalLinkTarget();
5362  }
5363  }
5364  break;
5365  case 'frameless':
5366  case 'framed':
5367  case 'thumbnail':
5368  // use first appearing option, discard others.
5369  $validated = !$seenformat;
5370  $seenformat = true;
5371  break;
5372  default:
5373  # Most other things appear to be empty or numeric...
5374  $validated = ( $value === false || is_numeric( trim( $value ) ) );
5375  }
5376  }
5377 
5378  if ( $validated ) {
5379  $params[$type][$paramName] = $value;
5380  }
5381  }
5382  }
5383  if ( !$validated ) {
5384  $caption = $part;
5385  }
5386  }
5387 
5388  # Process alignment parameters
5389  if ( $params['horizAlign'] ) {
5390  $params['frame']['align'] = key( $params['horizAlign'] );
5391  }
5392  if ( $params['vertAlign'] ) {
5393  $params['frame']['valign'] = key( $params['vertAlign'] );
5394  }
5395 
5396  $params['frame']['caption'] = $caption;
5397 
5398  # Will the image be presented in a frame, with the caption below?
5399  $imageIsFramed = isset( $params['frame']['frame'] )
5400  || isset( $params['frame']['framed'] )
5401  || isset( $params['frame']['thumbnail'] )
5402  || isset( $params['frame']['manualthumb'] );
5403 
5404  # In the old days, [[Image:Foo|text...]] would set alt text. Later it
5405  # came to also set the caption, ordinary text after the image -- which
5406  # makes no sense, because that just repeats the text multiple times in
5407  # screen readers. It *also* came to set the title attribute.
5408  # Now that we have an alt attribute, we should not set the alt text to
5409  # equal the caption: that's worse than useless, it just repeats the
5410  # text. This is the framed/thumbnail case. If there's no caption, we
5411  # use the unnamed parameter for alt text as well, just for the time be-
5412  # ing, if the unnamed param is set and the alt param is not.
5413  # For the future, we need to figure out if we want to tweak this more,
5414  # e.g., introducing a title= parameter for the title; ignoring the un-
5415  # named parameter entirely for images without a caption; adding an ex-
5416  # plicit caption= parameter and preserving the old magic unnamed para-
5417  # meter for BC; ...
5418  if ( $imageIsFramed ) { # Framed image
5419  if ( $caption === '' && !isset( $params['frame']['alt'] ) ) {
5420  # No caption or alt text, add the filename as the alt text so
5421  # that screen readers at least get some description of the image
5422  $params['frame']['alt'] = $link->getText();
5423  }
5424  # Do not set $params['frame']['title'] because tooltips don't make sense
5425  # for framed images
5426  } else { # Inline image
5427  if ( !isset( $params['frame']['alt'] ) ) {
5428  # No alt text, use the "caption" for the alt text
5429  if ( $caption !== '' ) {
5430  $params['frame']['alt'] = $this->stripAltText( $caption, $holders );
5431  } else {
5432  # No caption, fall back to using the filename for the
5433  # alt text
5434  $params['frame']['alt'] = $link->getText();
5435  }
5436  }
5437  # Use the "caption" for the tooltip text
5438  $params['frame']['title'] = $this->stripAltText( $caption, $holders );
5439  }
5440  $params['handler']['targetlang'] = $this->getTargetLanguage()->getCode();
5441 
5442  // hook signature compat again, $link may have changed
5443  $title = Title::castFromLinkTarget( $link );
5444  $this->hookRunner->onParserMakeImageParams( $title, $file, $params, $this );
5445 
5446  # Linker does the rest
5447  $time = $options['time'] ?? false;
5448  $ret = Linker::makeImageLink( $this, $link, $file, $params['frame'], $params['handler'],
5449  $time, $descQuery, $this->mOptions->getThumbSize() );
5450 
5451  # Give the handler a chance to modify the parser object
5452  if ( $handler ) {
5453  $handler->parserTransformHook( $this, $file );
5454  }
5455  if ( $file ) {
5456  $this->modifyImageHtml( $file, $params, $ret );
5457  }
5458 
5459  return $ret;
5460  }
5461 
5480  private function parseLinkParameter( $value ) {
5481  $chars = self::EXT_LINK_URL_CLASS;
5482  $addr = self::EXT_LINK_ADDR;
5483  $prots = $this->mUrlProtocols;
5484  $type = null;
5485  $target = false;
5486  if ( $value === '' ) {
5487  $type = 'no-link';
5488  } elseif ( preg_match( "/^((?i)$prots)/", $value ) ) {
5489  if ( preg_match( "/^((?i)$prots)$addr$chars*$/u", $value, $m ) ) {
5490  $this->mOutput->addExternalLink( $value );
5491  $type = 'link-url';
5492  $target = $value;
5493  }
5494  } else {
5495  $linkTitle = Title::newFromText( $value );
5496  if ( $linkTitle ) {
5497  $this->mOutput->addLink( $linkTitle );
5498  $type = 'link-title';
5499  $target = $linkTitle;
5500  }
5501  }
5502  return [ $type, $target ];
5503  }
5504 
5512  public function modifyImageHtml( File $file, array $params, string &$html ) {
5513  $this->hookRunner->onParserModifyImageHTML( $this, $file, $params, $html );
5514  }
5515 
5521  private function stripAltText( $caption, $holders ) {
5522  # Strip bad stuff out of the title (tooltip). We can't just use
5523  # replaceLinkHoldersText() here, because if this function is called
5524  # from handleInternalLinks2(), mLinkHolders won't be up-to-date.
5525  if ( $holders ) {
5526  $tooltip = $holders->replaceText( $caption );
5527  } else {
5528  $tooltip = $this->replaceLinkHoldersText( $caption );
5529  }
5530 
5531  # make sure there are no placeholders in thumbnail attributes
5532  # that are later expanded to html- so expand them now and
5533  # remove the tags
5534  $tooltip = $this->mStripState->unstripBoth( $tooltip );
5535  # Compatibility hack! In HTML certain entity references not terminated
5536  # by a semicolon are decoded (but not if we're in an attribute; that's
5537  # how link URLs get away without properly escaping & in queries).
5538  # But wikitext has always required semicolon-termination of entities,
5539  # so encode & where needed to avoid decode of semicolon-less entities.
5540  # See T209236 and
5541  # https://www.w3.org/TR/html5/syntax.html#named-character-references
5542  # T210437 discusses moving this workaround to Sanitizer::stripAllTags.
5543  $tooltip = preg_replace( "/
5544  & # 1. entity prefix
5545  (?= # 2. followed by:
5546  (?: # a. one of the legacy semicolon-less named entities
5547  A(?:Elig|MP|acute|circ|grave|ring|tilde|uml)|
5548  C(?:OPY|cedil)|E(?:TH|acute|circ|grave|uml)|
5549  GT|I(?:acute|circ|grave|uml)|LT|Ntilde|
5550  O(?:acute|circ|grave|slash|tilde|uml)|QUOT|REG|THORN|
5551  U(?:acute|circ|grave|uml)|Yacute|
5552  a(?:acute|c(?:irc|ute)|elig|grave|mp|ring|tilde|uml)|brvbar|
5553  c(?:cedil|edil|urren)|cent(?!erdot;)|copy(?!sr;)|deg|
5554  divide(?!ontimes;)|e(?:acute|circ|grave|th|uml)|
5555  frac(?:1(?:2|4)|34)|
5556  gt(?!c(?:c|ir)|dot|lPar|quest|r(?:a(?:pprox|rr)|dot|eq(?:less|qless)|less|sim);)|
5557  i(?:acute|circ|excl|grave|quest|uml)|laquo|
5558  lt(?!c(?:c|ir)|dot|hree|imes|larr|quest|r(?:Par|i(?:e|f|));)|
5559  m(?:acr|i(?:cro|ddot))|n(?:bsp|tilde)|
5560  not(?!in(?:E|dot|v(?:a|b|c)|)|ni(?:v(?:a|b|c)|);)|
5561  o(?:acute|circ|grave|rd(?:f|m)|slash|tilde|uml)|
5562  p(?:lusmn|ound)|para(?!llel;)|quot|r(?:aquo|eg)|
5563  s(?:ect|hy|up(?:1|2|3)|zlig)|thorn|times(?!b(?:ar|)|d;)|
5564  u(?:acute|circ|grave|ml|uml)|y(?:acute|en|uml)
5565  )
5566  (?:[^;]|$)) # b. and not followed by a semicolon
5567  # S = study, for efficiency
5568  /Sx", '&amp;', $tooltip );
5569  $tooltip = Sanitizer::stripAllTags( $tooltip );
5570 
5571  return $tooltip;
5572  }
5573 
5583  public function attributeStripCallback( &$text, $frame = false ) {
5584  wfDeprecated( __METHOD__, '1.35' );
5585  $text = $this->replaceVariables( $text, $frame );
5586  $text = $this->mStripState->unstripBoth( $text );
5587  return $text;
5588  }
5589 
5596  public function getTags() {
5597  return array_keys( $this->mTagHooks );
5598  }
5599 
5604  public function getFunctionSynonyms() {
5605  return $this->mFunctionSynonyms;
5606  }
5607 
5612  public function getUrlProtocols() {
5613  return $this->mUrlProtocols;
5614  }
5615 
5645  private function extractSections( $text, $sectionId, $mode, $newText = '' ) {
5646  global $wgTitle; # not generally used but removes an ugly failure mode
5647 
5648  $magicScopeVariable = $this->lock();
5649  $this->startParse(
5650  $wgTitle,
5653  true
5654  );
5655  $outText = '';
5656  $frame = $this->getPreprocessor()->newFrame();
5657 
5658  # Process section extraction flags
5659  $flags = 0;
5660  $sectionParts = explode( '-', $sectionId );
5661  $sectionIndex = array_pop( $sectionParts );
5662  foreach ( $sectionParts as $part ) {
5663  if ( $part === 'T' ) {
5665  }
5666  }
5667 
5668  # Check for empty input
5669  if ( strval( $text ) === '' ) {
5670  # Only sections 0 and T-0 exist in an empty document
5671  if ( $sectionIndex == 0 ) {
5672  if ( $mode === 'get' ) {
5673  return '';
5674  }
5675 
5676  return $newText;
5677  } else {
5678  if ( $mode === 'get' ) {
5679  return $newText;
5680  }
5681 
5682  return $text;
5683  }
5684  }
5685 
5686  # Preprocess the text
5687  $root = $this->preprocessToDom( $text, $flags );
5688 
5689  # <h> nodes indicate section breaks
5690  # They can only occur at the top level, so we can find them by iterating the root's children
5691  $node = $root->getFirstChild();
5692 
5693  # Find the target section
5694  if ( $sectionIndex == 0 ) {
5695  # Section zero doesn't nest, level=big
5696  $targetLevel = 1000;
5697  } else {
5698  while ( $node ) {
5699  if ( $node->getName() === 'h' ) {
5700  $bits = $node->splitHeading();
5701  if ( $bits['i'] == $sectionIndex ) {
5702  $targetLevel = $bits['level'];
5703  break;
5704  }
5705  }
5706  if ( $mode === 'replace' ) {
5707  $outText .= $frame->expand( $node, PPFrame::RECOVER_ORIG );
5708  }
5709  $node = $node->getNextSibling();
5710  }
5711  }
5712 
5713  if ( !$node ) {
5714  # Not found
5715  if ( $mode === 'get' ) {
5716  return $newText;
5717  } else {
5718  return $text;
5719  }
5720  }
5721 
5722  # Find the end of the section, including nested sections
5723  do {
5724  if ( $node->getName() === 'h' ) {
5725  $bits = $node->splitHeading();
5726  $curLevel = $bits['level'];
5727  if ( $bits['i'] != $sectionIndex && $curLevel <= $targetLevel ) {
5728  break;
5729  }
5730  }
5731  if ( $mode === 'get' ) {
5732  $outText .= $frame->expand( $node, PPFrame::RECOVER_ORIG );
5733  }
5734  $node = $node->getNextSibling();
5735  } while ( $node );
5736 
5737  # Write out the remainder (in replace mode only)
5738  if ( $mode === 'replace' ) {
5739  # Output the replacement text
5740  # Add two newlines on -- trailing whitespace in $newText is conventionally
5741  # stripped by the editor, so we need both newlines to restore the paragraph gap
5742  # Only add trailing whitespace if there is newText
5743  if ( $newText != "" ) {
5744  $outText .= $newText . "\n\n";
5745  }
5746 
5747  while ( $node ) {
5748  $outText .= $frame->expand( $node, PPFrame::RECOVER_ORIG );
5749  $node = $node->getNextSibling();
5750  }
5751  }
5752 
5753  # Re-insert stripped tags
5754  $outText = rtrim( $this->mStripState->unstripBoth( $outText ) );
5755 
5756  return $outText;
5757  }
5758 
5774  public function getSection( $text, $sectionId, $defaultText = '' ) {
5775  return $this->extractSections( $text, $sectionId, 'get', $defaultText );
5776  }
5777 
5791  public function replaceSection( $oldText, $sectionId, $newText ) {
5792  return $this->extractSections( $oldText, $sectionId, 'replace', $newText );
5793  }
5794 
5824  public function getFlatSectionInfo( $text ) {
5825  $magicScopeVariable = $this->lock();
5826  $this->startParse(
5827  null,
5830  true
5831  );
5832  $frame = $this->getPreprocessor()->newFrame();
5833  $root = $this->preprocessToDom( $text, 0 );
5834  $node = $root->getFirstChild();
5835  $offset = 0;
5836  $currentSection = [
5837  'index' => 0,
5838  'level' => 0,
5839  'offset' => 0,
5840  'heading' => '',
5841  'text' => ''
5842  ];
5843  $sections = [];
5844 
5845  while ( $node ) {
5846  $nodeText = $frame->expand( $node, PPFrame::RECOVER_ORIG );
5847  if ( $node->getName() === 'h' ) {
5848  $bits = $node->splitHeading();
5849  $sections[] = $currentSection;
5850  $currentSection = [
5851  'index' => $bits['i'],
5852  'level' => $bits['level'],
5853  'offset' => $offset,
5854  'heading' => $nodeText,
5855  'text' => $nodeText
5856  ];
5857  } else {
5858  $currentSection['text'] .= $nodeText;
5859  }
5860  $offset += strlen( $nodeText );
5861  $node = $node->getNextSibling();
5862  }
5863  $sections[] = $currentSection;
5864  return $sections;
5865  }
5866 
5878  public function getRevisionId() {
5879  return $this->mRevisionId;
5880  }
5881 
5888  public function getRevisionRecordObject() {
5889  if ( $this->mRevisionRecordObject ) {
5890  return $this->mRevisionRecordObject;
5891  }
5892 
5893  // NOTE: try to get the RevisionRecord object even if mRevisionId is null.
5894  // This is useful when parsing a revision that has not yet been saved.
5895  // However, if we get back a saved revision even though we are in
5896  // preview mode, we'll have to ignore it, see below.
5897  // NOTE: This callback may be used to inject an OLD revision that was
5898  // already loaded, so "current" is a bit of a misnomer. We can't just
5899  // skip it if mRevisionId is set.
5900  $rev = call_user_func(
5901  $this->mOptions->getCurrentRevisionRecordCallback(),
5902  $this->getTitle(),
5903  $this
5904  );
5905 
5906  if ( $rev === false ) {
5907  // The revision record callback returns `false` (not null) to
5908  // indicate that the revision is missing. (See for example
5909  // Parser::statelessFetchRevisionRecord(), the default callback.)
5910  // This API expects `null` instead. (T251952)
5911  $rev = null;
5912  }
5913 
5914  if ( $this->mRevisionId === null && $rev && $rev->getId() ) {
5915  // We are in preview mode (mRevisionId is null), and the current revision callback
5916  // returned an existing revision. Ignore it and return null, it's probably the page's
5917  // current revision, which is not what we want here. Note that we do want to call the
5918  // callback to allow the unsaved revision to be injected here, e.g. for
5919  // self-transclusion previews.
5920  return null;
5921  }
5922 
5923  // If the parse is for a new revision, then the callback should have
5924  // already been set to force the object and should match mRevisionId.
5925  // If not, try to fetch by mRevisionId instead.
5926  if ( $this->mRevisionId && $rev && $rev->getId() != $this->mRevisionId ) {
5927  $rev = MediaWikiServices::getInstance()
5928  ->getRevisionLookup()
5929  ->getRevisionById( $this->mRevisionId );
5930  }
5931 
5932  $this->mRevisionRecordObject = $rev;
5933 
5934  return $this->mRevisionRecordObject;
5935  }
5936 
5943  public function getRevisionTimestamp() {
5944  if ( $this->mRevisionTimestamp !== null ) {
5945  return $this->mRevisionTimestamp;
5946  }
5947 
5948  # Use specified revision timestamp, falling back to the current timestamp
5949  $revObject = $this->getRevisionRecordObject();
5950  $timestamp = $revObject ? $revObject->getTimestamp() : $this->mOptions->getTimestamp();
5951  $this->mOutput->setRevisionTimestampUsed( $timestamp ); // unadjusted time zone
5952 
5953  # The cryptic '' timezone parameter tells to use the site-default
5954  # timezone offset instead of the user settings.
5955  # Since this value will be saved into the parser cache, served
5956  # to other users, and potentially even used inside links and such,
5957  # it needs to be consistent for all visitors.
5958  $this->mRevisionTimestamp = $this->contLang->userAdjust( $timestamp, '' );
5959 
5960  return $this->mRevisionTimestamp;
5961  }
5962 
5969  public function getRevisionUser(): ?string {
5970  if ( $this->mRevisionUser === null ) {
5971  $revObject = $this->getRevisionRecordObject();
5972 
5973  # if this template is subst: the revision id will be blank,
5974  # so just use the current user's name
5975  if ( $revObject && $revObject->getUser() ) {
5976  $this->mRevisionUser = $revObject->getUser()->getName();
5977  } elseif ( $this->ot['wiki'] || $this->mOptions->getIsPreview() ) {
5978  $this->mRevisionUser = $this->getUserIdentity()->getName();
5979  } else {
5980  # Note that we fall through here with
5981  # $this->mRevisionUser still null
5982  }
5983  }
5984  return $this->mRevisionUser;
5985  }
5986 
5993  public function getRevisionSize() {
5994  if ( $this->mRevisionSize === null ) {
5995  $revObject = $this->getRevisionRecordObject();
5996 
5997  # if this variable is subst: the revision id will be blank,
5998  # so just use the parser input size, because the own substitution
5999  # will change the size.
6000  if ( $revObject ) {
6001  $this->mRevisionSize = $revObject->getSize();
6002  } else {
6003  $this->mRevisionSize = $this->mInputSize;
6004  }
6005  }
6006  return $this->mRevisionSize;
6007  }
6008 
6015  public function setDefaultSort( $sort ) {
6016  $this->mDefaultSort = $sort;
6017  $this->mOutput->setPageProperty( 'defaultsort', $sort );
6018  }
6019 
6031  public function getDefaultSort() {
6032  if ( $this->mDefaultSort !== false ) {
6033  return $this->mDefaultSort;
6034  } else {
6035  return '';
6036  }
6037  }
6038 
6046  public function getCustomDefaultSort() {
6047  return $this->mDefaultSort;
6048  }
6049 
6050  private static function getSectionNameFromStrippedText( $text ) {
6052  $text = Sanitizer::decodeCharReferences( $text );
6053  $text = self::normalizeSectionName( $text );
6054  return $text;
6055  }
6056 
6057  private static function makeAnchor( $sectionName ) {
6058  return '#' . Sanitizer::escapeIdForLink( $sectionName );
6059  }
6060 
6061  private function makeLegacyAnchor( $sectionName ) {
6062  $fragmentMode = $this->svcOptions->get( 'FragmentMode' );
6063  if ( isset( $fragmentMode[1] ) && $fragmentMode[1] === 'legacy' ) {
6064  // ForAttribute() and ForLink() are the same for legacy encoding
6066  } else {
6067  $id = Sanitizer::escapeIdForLink( $sectionName );
6068  }
6069 
6070  return "#$id";
6071  }
6072 
6082  public function guessSectionNameFromWikiText( $text ) {
6083  # Strip out wikitext links(they break the anchor)
6084  $text = $this->stripSectionName( $text );
6085  $sectionName = self::getSectionNameFromStrippedText( $text );
6086  return self::makeAnchor( $sectionName );
6087  }
6088 
6099  public function guessLegacySectionNameFromWikiText( $text ) {
6100  # Strip out wikitext links(they break the anchor)
6101  $text = $this->stripSectionName( $text );
6102  $sectionName = self::getSectionNameFromStrippedText( $text );
6103  return $this->makeLegacyAnchor( $sectionName );
6104  }
6105 
6112  public static function guessSectionNameFromStrippedText( $text ) {
6113  $sectionName = self::getSectionNameFromStrippedText( $text );
6114  return self::makeAnchor( $sectionName );
6115  }
6116 
6123  private static function normalizeSectionName( $text ) {
6124  # T90902: ensure the same normalization is applied for IDs as to links
6125 
6126  $titleParser = MediaWikiServices::getInstance()->getTitleParser();
6127  '@phan-var MediaWikiTitleCodec $titleParser';
6128  try {
6129 
6130  $parts = $titleParser->splitTitleString( "#$text" );
6131  } catch ( MalformedTitleException $ex ) {
6132  return $text;
6133  }
6134  return $parts['fragment'];
6135  }
6136 
6152  public function stripSectionName( $text ) {
6153  # Strip internal link markup
6154  $text = preg_replace( '/\[\[:?([^[|]+)\|([^[]+)\]\]/', '$2', $text );
6155  $text = preg_replace( '/\[\[:?([^[]+)\|?\]\]/', '$1', $text );
6156 
6157  # Strip external link markup
6158  # @todo FIXME: Not tolerant to blank link text
6159  # I.E. [https://www.mediawiki.org] will render as [1] or something depending
6160  # on how many empty links there are on the page - need to figure that out.
6161  $text = preg_replace( '/\[(?i:' . $this->mUrlProtocols . ')([^ ]+?) ([^[]+)\]/', '$2', $text );
6162 
6163  # Parse wikitext quotes (italics & bold)
6164  $text = $this->doQuotes( $text );
6165 
6166  # Strip HTML tags
6167  $text = StringUtils::delimiterReplace( '<', '>', '', $text );
6168  return $text;
6169  }
6170 
6184  private function fuzzTestSrvus( $text, PageReference $page, ParserOptions $options,
6185  $outputType = self::OT_HTML
6186  ) {
6187  $magicScopeVariable = $this->lock();
6188  $this->startParse( $page, $options, $outputType, true );
6189 
6190  $text = $this->replaceVariables( $text );
6191  $text = $this->mStripState->unstripBoth( $text );
6192  $text = Sanitizer::removeHTMLtags( $text );
6193  return $text;
6194  }
6195 
6207  private function fuzzTestPst( $text, PageReference $page, ParserOptions $options ) {
6208  return $this->preSaveTransform( $text, $page, $options->getUserIdentity(), $options );
6209  }
6210 
6222  private function fuzzTestPreprocess( $text, PageReference $page, ParserOptions $options ) {
6223  return $this->fuzzTestSrvus( $text, $page, $options, self::OT_PREPROCESS );
6224  }
6225 
6244  public function markerSkipCallback( $s, callable $callback ) {
6245  $i = 0;
6246  $out = '';
6247  while ( $i < strlen( $s ) ) {
6248  $markerStart = strpos( $s, self::MARKER_PREFIX, $i );
6249  if ( $markerStart === false ) {
6250  $out .= call_user_func( $callback, substr( $s, $i ) );
6251  break;
6252  } else {
6253  $out .= call_user_func( $callback, substr( $s, $i, $markerStart - $i ) );
6254  $markerEnd = strpos( $s, self::MARKER_SUFFIX, $markerStart );
6255  if ( $markerEnd === false ) {
6256  $out .= substr( $s, $markerStart );
6257  break;
6258  } else {
6259  $markerEnd += strlen( self::MARKER_SUFFIX );
6260  $out .= substr( $s, $markerStart, $markerEnd - $markerStart );
6261  $i = $markerEnd;
6262  }
6263  }
6264  }
6265  return $out;
6266  }
6267 
6275  public function killMarkers( $text ) {
6276  return $this->mStripState->killMarkers( $text );
6277  }
6278 
6289  public static function parseWidthParam( $value, $parseHeight = true ) {
6290  $parsedWidthParam = [];
6291  if ( $value === '' ) {
6292  return $parsedWidthParam;
6293  }
6294  $m = [];
6295  # (T15500) In both cases (width/height and width only),
6296  # permit trailing "px" for backward compatibility.
6297  if ( $parseHeight && preg_match( '/^([0-9]*)x([0-9]*)\s*(?:px)?\s*$/', $value, $m ) ) {
6298  $width = intval( $m[1] );
6299  $height = intval( $m[2] );
6300  $parsedWidthParam['width'] = $width;
6301  $parsedWidthParam['height'] = $height;
6302  } elseif ( preg_match( '/^[0-9]*\s*(?:px)?\s*$/', $value ) ) {
6303  $width = intval( $value );
6304  $parsedWidthParam['width'] = $width;
6305  }
6306  return $parsedWidthParam;
6307  }
6308 
6318  protected function lock() {
6319  if ( $this->mInParse ) {
6320  throw new MWException( "Parser state cleared while parsing. "
6321  . "Did you call Parser::parse recursively? Lock is held by: " . $this->mInParse );
6322  }
6323 
6324  // Save the backtrace when locking, so that if some code tries locking again,
6325  // we can print the lock owner's backtrace for easier debugging
6326  $e = new Exception;
6327  $this->mInParse = $e->getTraceAsString();
6328 
6329  $recursiveCheck = new ScopedCallback( function () {
6330  $this->mInParse = false;
6331  } );
6332 
6333  return $recursiveCheck;
6334  }
6335 
6346  public static function stripOuterParagraph( $html ) {
6347  $m = [];
6348  if ( preg_match( '/^<p>(.*)\n?<\/p>\n?$/sU', $html, $m ) && strpos( $m[1], '</p>' ) === false ) {
6349  $html = $m[1];
6350  }
6351 
6352  return $html;
6353  }
6354 
6365  public function getFreshParser() {
6366  if ( $this->mInParse ) {
6367  return $this->factory->create();
6368  } else {
6369  return $this;
6370  }
6371  }
6372 
6380  public function enableOOUI() {
6381  wfDeprecated( __METHOD__, '1.35' );
6383  $this->mOutput->setEnableOOUI( true );
6384  }
6385 
6392  private function setOutputFlag( string $flag, string $reason ): void {
6393  $this->mOutput->setOutputFlag( $flag );
6394  $name = $this->getTitle()->getPrefixedText();
6395  $this->logger->debug( __METHOD__ . ": set $flag flag on '$name'; $reason" );
6396  }
6397 }
Parser\$badFileLookup
BadFileLookup $badFileLookup
Definition: Parser.php:382
Page\PageIdentity
Interface for objects (potentially) representing an editable wiki page.
Definition: PageIdentity.php:64
Parser\getFunctionHooks
getFunctionHooks()
Get all registered function hook identifiers.
Definition: Parser.php:4973
Parser\$mLinkRenderer
LinkRenderer $mLinkRenderer
Definition: Parser.php:343
Parser\$mForceTocPosition
$mForceTocPosition
Definition: Parser.php:271
Parser\recursivePreprocess
recursivePreprocess( $text, $frame=false)
Recursive parser entry point that can be called from an extension tag hook.
Definition: Parser.php:938
Parser\getContentLanguageConverter
getContentLanguageConverter()
Shorthand for getting a Language Converter for Content language.
Definition: Parser.php:1632
Parser\transformMsg
transformMsg( $text, ParserOptions $options, ?PageReference $page=null)
Wrapper for preprocess()
Definition: Parser.php:4824
ParserOptions
Set options of the Parser.
Definition: ParserOptions.php:45
Parser\attributeStripCallback
attributeStripCallback(&$text, $frame=false)
Callback from the Sanitizer for expanding items found in HTML attribute values, so they can be safely...
Definition: Parser.php:5583
PPFrame\loopCheck
loopCheck( $title)
Returns true if the infinite loop check is OK, false if a loop is detected.
Parser\$mSubstWords
MagicWordArray $mSubstWords
Definition: Parser.php:217
Parser\$linkRendererFactory
LinkRendererFactory $linkRendererFactory
Definition: Parser.php:373
Sanitizer\ID_FALLBACK
const ID_FALLBACK
Tells escapeUrlForHtml() to encode the ID using the fallback encoding, or return false if no fallback...
Definition: Sanitizer.php:79
Parser\maybeMakeExternalImage
maybeMakeExternalImage( $url)
make an image if it's allowed, either through the global option, through the exception,...
Definition: Parser.php:2347
Message\numParam
static numParam( $num)
Definition: Message.php:1127
MagicWordArray
Class for handling an array of magic words.
Definition: MagicWordArray.php:32
Parser\EXT_LINK_ADDR
const EXT_LINK_ADDR
Definition: Parser.php:107
MediaWiki\Revision\RevisionAccessException
Exception representing a failure to look up a revision.
Definition: RevisionAccessException.php:37
MediaWiki\Linker\LinkTarget\isSameLinkAs
isSameLinkAs(LinkTarget $other)
Checks whether the given LinkTarget refers to the same target as this LinkTarget.
FauxRequest
WebRequest clone which takes values from a provided array.
Definition: FauxRequest.php:36
Title\newFromText
static newFromText( $text, $defaultNamespace=NS_MAIN)
Create a new Title from text, such as what one would find in a link.
Definition: Title.php:377
Parser\$mInputSize
$mInputSize
Definition: Parser.php:309
PPFrame\STRIP_COMMENTS
const STRIP_COMMENTS
Definition: PPFrame.php:31
Parser\SPACE_NOT_NL
const SPACE_NOT_NL
Definition: Parser.php:114
Parser\$titleFormatter
TitleFormatter $titleFormatter
Definition: Parser.php:361
HtmlArmor
Marks HTML that shouldn't be escaped.
Definition: HtmlArmor.php:30
MediaWiki\Revision\RevisionRecord
Page revision base class.
Definition: RevisionRecord.php:47
MediaWiki\Linker\LinkTarget\getText
getText()
Returns the link in text form, without namespace prefix or fragment.
Parser\__destruct
__destruct()
Reduce memory usage to reduce the impact of circular references.
Definition: Parser.php:531
Preprocessor\DOM_FOR_INCLUSION
const DOM_FOR_INCLUSION
Transclusion mode flag for Preprocessor::preprocessToObj()
Definition: Preprocessor.php:29
ParserOutput
Definition: ParserOutput.php:35
Parser\$mLinkHolders
LinkHolderArray $mLinkHolders
Definition: Parser.php:243
NS_MEDIAWIKI
const NS_MEDIAWIKI
Definition: Defines.php:72
Parser\makeImage
makeImage(LinkTarget $link, $options, $holders=false)
Parse image options text and use it to make an image.
Definition: Parser.php:5247
Parser\braceSubstitution
braceSubstitution(array $piece, PPFrame $frame)
Return the text of a template, after recursively replacing any variables or templates within the temp...
Definition: Parser.php:2965
Parser\makeLimitReport
makeLimitReport()
Set the limit report data in the current ParserOutput.
Definition: Parser.php:755
MediaWiki\Languages\LanguageConverterFactory\isConversionDisabled
isConversionDisabled()
Whether to disable language variant conversion.
Definition: LanguageConverterFactory.php:141
MagicWordFactory
A factory that stores information about MagicWords, and creates them on demand with caching.
Definition: MagicWordFactory.php:37
Parser\internalParseHalfParsed
internalParseHalfParsed( $text, $isMain=true, $linestart=true)
Helper function for parse() that transforms half-parsed HTML into fully parsed HTML.
Definition: Parser.php:1670
Parser\$userFactory
UserFactory $userFactory
Definition: Parser.php:397
Parser\stripAltText
stripAltText( $caption, $holders)
Definition: Parser.php:5521
Parser\killMarkers
killMarkers( $text)
Remove any strip markers found in the given text.
Definition: Parser.php:6275
Sanitizer\stripAllTags
static stripAllTags( $html)
Take a fragment of (potentially invalid) HTML and return a version with any tags removed,...
Definition: Sanitizer.php:1576
Parser\$mTagHooks
$mTagHooks
Definition: Parser.php:192
Parser\OutputType
OutputType( $x=null)
Accessor/mutator for the output type.
Definition: Parser.php:1072
Parser\$currentRevisionCache
MapCacheLRU null $currentRevisionCache
Definition: Parser.php:327
Parser\setOutputFlag
setOutputFlag(string $flag, string $reason)
Sets the flag on the parser output but also does some debug logging.
Definition: Parser.php:6392
Parser\enableOOUI
enableOOUI()
Set's up the PHP implementation of OOUI for use in this request and instructs OutputPage to enable OO...
Definition: Parser.php:6380
MediaWiki\MediaWikiServices
MediaWikiServices is the service locator for the application scope of MediaWiki.
Definition: MediaWikiServices.php:203
Linker\makeSelfLinkObj
static makeSelfLinkObj( $nt, $html='', $query='', $trail='', $prefix='')
Make appropriate markup for a link to the current article.
Definition: Linker.php:162
Parser\$mTplDomCache
array $mTplDomCache
Definition: Parser.php:273
MediaWiki\BadFileLookup
Definition: BadFileLookup.php:13
PPFrame\NO_ARGS
const NO_ARGS
Definition: PPFrame.php:29
Parser\statelessFetchRevisionRecord
static statelessFetchRevisionRecord(LinkTarget $link, $parser=null)
Wrapper around RevisionLookup::getKnownCurrentRevision.
Definition: Parser.php:3526
wfSetVar
wfSetVar(&$dest, $source, $force=false)
Sets dest to source and returns the original value of dest If source is NULL, it just returns the val...
Definition: GlobalFunctions.php:1496
Parser\parseExtensionTagAsTopLevelDoc
parseExtensionTagAsTopLevelDoc( $text)
Needed by Parsoid/PHP to ensure all the hooks for extensions are run in the right order.
Definition: Parser.php:892
Parser\$mDoubleUnderscores
$mDoubleUnderscores
Definition: Parser.php:266
Linker\tocIndent
static tocIndent()
Add another level to the Table of Contents.
Definition: Linker.php:1600
Parser\getRevisionSize
getRevisionSize()
Get the size of the revision.
Definition: Parser.php:5993
Sanitizer\escapeIdForAttribute
static escapeIdForAttribute( $id, $mode=self::ID_PRIMARY)
Given a section name or other user-generated or otherwise unsafe string, escapes it to be a valid HTM...
Definition: Sanitizer.php:811
Sanitizer\removeHTMLtags
static removeHTMLtags( $text, $processCallback=null, $args=[], $extratags=[], $removetags=[])
Cleans up HTML, removes dangerous tags and attributes, and removes HTML comments.
Definition: Sanitizer.php:240
Parser\ParserOutputFlags
Definition: ParserOutputFlags.php:41
MediaWiki\Http\HttpRequestFactory
Factory creating MWHttpRequest objects.
Definition: HttpRequestFactory.php:35
Parser\handleExternalLinks
handleExternalLinks( $text)
Replace external links (REL)
Definition: Parser.php:2128
Parser\$mOutputType
$mOutputType
Definition: Parser.php:297
MediaWiki\Linker\LinkRenderer
Class that generates HTML links for pages.
Definition: LinkRenderer.php:43
ParserOptions\getDisableTitleConversion
getDisableTitleConversion()
Whether title conversion should be disabled.
Definition: ParserOptions.php:558
wfTimestamp
wfTimestamp( $outputtype=TS_UNIX, $ts=0)
Get a timestamp string in one of various formats.
Definition: GlobalFunctions.php:1649
Parser\handleHeadings
handleHeadings( $text)
Parse headers and return html.
Definition: Parser.php:1907
MediaWiki\SpecialPage\SpecialPageFactory
Factory for handling the special page list and generating SpecialPage objects.
Definition: SpecialPageFactory.php:63
Parser\handleAllQuotes
handleAllQuotes( $text)
Replace single quotes with HTML markup.
Definition: Parser.php:1924
Parser\$mUrlProtocols
$mUrlProtocols
Definition: Parser.php:220
Parser\extractTagsAndParams
static extractTagsAndParams(array $elements, $text, &$matches)
Replaces all occurrences of HTML-style comments and the given tags in the text with a random marker a...
Definition: Parser.php:1244
Parser\$mLinkID
int $mLinkID
Definition: Parser.php:249
OT_HTML
const OT_HTML
Definition: Defines.php:157
SFH_NO_HASH
const SFH_NO_HASH
Definition: Defines.php:170
Parser\handleDoubleUnderscore
handleDoubleUnderscore( $text)
Strip double-underscore items like NOGALLERY and NOTOC Fills $this->mDoubleUnderscores,...
Definition: Parser.php:4035
Sanitizer\normalizeSectionNameWhitespace
static normalizeSectionNameWhitespace( $section)
Normalizes whitespace in a section name, such as might be returned by Parser::stripSectionName(),...
Definition: Sanitizer.php:1104
OT_PREPROCESS
const OT_PREPROCESS
Definition: Defines.php:159
Parser\normalizeSectionName
static normalizeSectionName( $text)
Apply the same normalization as code making links to this section would.
Definition: Parser.php:6123
$file
if(PHP_SAPI !='cli-server') if(!isset( $_SERVER['SCRIPT_FILENAME'])) $file
Item class for a filearchive table row.
Definition: router.php:42
Parser\fetchFileNoRegister
fetchFileNoRegister(LinkTarget $link, array $options=[])
Helper function for fetchFileAndTitle.
Definition: Parser.php:3769
wfHostname
wfHostname()
Get host name of the current machine, for use in error reporting.
Definition: GlobalFunctions.php:1230
Parser\recursiveTagParseFully
recursiveTagParseFully( $text, $frame=false)
Fully parse wikitext to fully parsed HTML.
Definition: Parser.php:867
Parser\$specialPageFactory
SpecialPageFactory $specialPageFactory
Definition: Parser.php:358
Parser\nextLinkID
nextLinkID()
Definition: Parser.php:1118
Parser\getTargetLanguage
getTargetLanguage()
Get the target language for the content being parsed.
Definition: Parser.php:1147
User\newFromName
static newFromName( $name, $validate='valid')
Definition: User.php:595
Parser\$mStripList
$mStripList
Definition: Parser.php:195
wfMessage
wfMessage( $key,... $params)
This is the function for getting translated interface messages.
Definition: GlobalFunctions.php:1167
MediaWiki\Linker\LinkRendererFactory
Factory to create LinkRender objects.
Definition: LinkRendererFactory.php:34
SpecialPage\getTitleFor
static getTitleFor( $name, $subpage=false, $fragment='')
Get a localised Title object for a specified special page name If you don't need a full Title object,...
Definition: SpecialPage.php:131
Parser\startExternalParse
startExternalParse(?PageReference $page, ParserOptions $options, $outputType, $clearState=true, $revId=null)
Set up some variables which are usually set up in parse() so that an external function can call some ...
Definition: Parser.php:4789
Parser\guessSectionNameFromWikiText
guessSectionNameFromWikiText( $text)
Try to guess the section anchor name based on a wikitext fragment presumably extracted from a heading...
Definition: Parser.php:6082
Parser\setDefaultSort
setDefaultSort( $sort)
Mutator for $mDefaultSort.
Definition: Parser.php:6015
Parser\getPage
getPage()
Returns the page used as context for parsing, e.g.
Definition: Parser.php:1036
Preprocessor_Hash
Differences from DOM schema:
Definition: Preprocessor_Hash.php:43
StripState
Definition: StripState.php:29
Parser\getExternalLinkRel
static getExternalLinkRel( $url=false, LinkTarget $title=null)
Get the rel attribute for a particular external link.
Definition: Parser.php:2209
Parser\replaceVariables
replaceVariables( $text, $frame=false, $argsOnly=false)
Replace magic variables, templates, and template arguments with the appropriate text.
Definition: Parser.php:2889
Parser\MARKER_PREFIX
const MARKER_PREFIX
Definition: Parser.php:149
Parser\getFunctionSynonyms
getFunctionSynonyms()
Definition: Parser.php:5604
Parser\$mInParse
bool string $mInParse
Recursive call protection.
Definition: Parser.php:335
Parser\doQuotes
doQuotes( $text)
Helper function for handleAllQuotes()
Definition: Parser.php:1942
Linker\tocLine
static tocLine( $anchor, $tocline, $tocnumber, $level, $sectionIndex=false)
parameter level defines if we are on an indentation level
Definition: Linker.php:1626
Parser\startParse
startParse(?PageReference $page, ParserOptions $options, $outputType, $clearState=true)
Definition: Parser.php:4804
MediaWiki\Languages\LanguageConverterFactory
An interface for creating language converters.
Definition: LanguageConverterFactory.php:46
Page\PageReference
Interface for objects (potentially) representing a page that can be viewable and linked to on a wiki.
Definition: PageReference.php:49
Parser\$svcOptions
ServiceOptions $svcOptions
This is called $svcOptions instead of $options like elsewhere to avoid confusion with $mOptions,...
Definition: Parser.php:370
MediaWiki\User\UserIdentity
Interface for objects representing user identity.
Definition: UserIdentity.php:39
Linker\tocList
static tocList( $toc, Language $lang=null)
Wraps the TOC in a div with ARIA navigation role and provides the hide/collapse JavaScript.
Definition: Linker.php:1662
Parser\SFH_OBJECT_ARGS
const SFH_OBJECT_ARGS
Definition: Parser.php:95
Parser\OT_WIKI
const OT_WIKI
Definition: Parser.php:125
Parser\getTags
getTags()
Accessor.
Definition: Parser.php:5596
Parser\getStripList
getStripList()
Get a list of strippable XML-like elements.
Definition: Parser.php:1307
Parser\initializeVariables
initializeVariables()
Initialize the magic variables (like CURRENTMONTHNAME) and substitution modifiers.
Definition: Parser.php:2838
PPFrame\NO_TEMPLATES
const NO_TEMPLATES
Definition: PPFrame.php:30
Preprocessor
Definition: Preprocessor.php:27
Parser\getOptions
getOptions()
Definition: Parser.php:1089
MediaWiki\Languages\LanguageNameUtils
A service that provides utilities to do with language names and codes.
Definition: LanguageNameUtils.php:43
PPFrame\newChild
newChild( $args=false, $title=false, $indexOffset=0)
Create a child frame.
Parser\getFunctionLang
getFunctionLang()
Get a language object for use in parser functions such as {{FORMATNUM:}}.
Definition: Parser.php:1135
StringUtils\replaceMarkup
static replaceMarkup( $search, $replace, $text)
More or less "markup-safe" str_replace() Ignores any instances of the separator inside <....
Definition: StringUtils.php:268
Parser\$mRevisionRecordObject
RevisionRecord null $mRevisionRecordObject
Definition: Parser.php:312
Parser\Options
Options( $x=null)
Accessor/mutator for the ParserOptions object.
Definition: Parser.php:1109
NS_SPECIAL
const NS_SPECIAL
Definition: Defines.php:53
Parser\preSaveTransform
preSaveTransform( $text, PageReference $page, UserIdentity $user, ParserOptions $options, $clearState=true)
Transform wiki markup when saving a page by doing "\\r\\n" -> "\\n" conversion, substituting signatur...
Definition: Parser.php:4509
Parser\lock
lock()
Lock the current instance of the parser.
Definition: Parser.php:6318
Parser\getDefaultSort
getDefaultSort()
Accessor for $mDefaultSort Will use the empty string if none is set.
Definition: Parser.php:6031
Parser\$mFunctionSynonyms
$mFunctionSynonyms
Definition: Parser.php:194
Parser\$hookRunner
HookRunner $hookRunner
Definition: Parser.php:388
Parser\$nsInfo
NamespaceInfo $nsInfo
Definition: Parser.php:376
Parser\makeKnownLinkHolder
makeKnownLinkHolder(LinkTarget $nt, $text='', $trail='', $prefix='')
Render a forced-blue link inline; protect against double expansion of URLs if we're in a mode that pr...
Definition: Parser.php:2734
Parser\makeLegacyAnchor
makeLegacyAnchor( $sectionName)
Definition: Parser.php:6061
Parser\fuzzTestSrvus
fuzzTestSrvus( $text, PageReference $page, ParserOptions $options, $outputType=self::OT_HTML)
Strip/replaceVariables/unstrip for preprocessor regression testing.
Definition: Parser.php:6184
Parser\setHook
setHook( $tag, callable $callback)
Create an HTML-style tag, e.g.
Definition: Parser.php:4869
Parser\$mHeadings
$mHeadings
Definition: Parser.php:264
Parser\$userOptionsLookup
UserOptionsLookup $userOptionsLookup
Definition: Parser.php:394
getUser
getUser()
Parser\interwikiTransclude
interwikiTransclude(LinkTarget $link, $action)
Transclude an interwiki link.
Definition: Parser.php:3792
Parser\getTitle
getTitle()
Definition: Parser.php:1000
File
Implements some public methods and some protected utility functions which are required by multiple ch...
Definition: File.php:67
Parser\$mVariables
MagicWordArray $mVariables
Definition: Parser.php:212
wfDeprecatedMsg
wfDeprecatedMsg( $msg, $version=false, $component=false, $callerOffset=2)
Log a deprecation warning with arbitrary message text.
Definition: GlobalFunctions.php:1028
MWException
MediaWiki exception.
Definition: MWException.php:29
Parser\$ot
$ot
Definition: Parser.php:299
Parser\getRevisionRecordObject
getRevisionRecordObject()
Get the revision record object for $this->mRevisionId.
Definition: Parser.php:5888
MediaWiki\Config\ServiceOptions
A class for passing options to services.
Definition: ServiceOptions.php:27
wfDeprecated
wfDeprecated( $function, $version=false, $component=false, $callerOffset=2)
Logs a warning that a deprecated feature was used.
Definition: GlobalFunctions.php:997
Parser\OT_MSG
const OT_MSG
Definition: Parser.php:127
Parser\getPreloadText
getPreloadText( $text, PageReference $page, ParserOptions $options, $params=[])
Process the wikitext for the "?preload=" feature.
Definition: Parser.php:958
MediaWiki\User\UserIdentity\isRegistered
isRegistered()
Parser\firstCallInit
firstCallInit()
Used to do various kinds of initialisation on the first call of the parser.
Definition: Parser.php:575
Parser\$mProfiler
SectionProfiler $mProfiler
Definition: Parser.php:338
Parser\getFlatSectionInfo
getFlatSectionInfo( $text)
Get an array of preprocessor section information.
Definition: Parser.php:5824
Parser\$mMarkerIndex
$mMarkerIndex
Definition: Parser.php:200
BlockLevelPass\doBlockLevels
static doBlockLevels( $text, $lineStart)
Make lists from lines starting with ':', '*', '#', etc.
Definition: BlockLevelPass.php:52
Parser\getCustomDefaultSort
getCustomDefaultSort()
Accessor for $mDefaultSort Unlike getDefaultSort(), will return false if none is set.
Definition: Parser.php:6046
wfUrlProtocolsWithoutProtRel
wfUrlProtocolsWithoutProtRel()
Like wfUrlProtocols(), but excludes '//' from the protocol list.
Definition: GlobalFunctions.php:747
Parser\handleTables
handleTables( $text)
Parse the wiki syntax used to render tables.
Definition: Parser.php:1341
$matches
$matches
Definition: NoLocalSettings.php:24
CoreTagHooks\register
static register( $parser)
Definition: CoreTagHooks.php:36
Parser\$contLang
Language $contLang
Definition: Parser.php:349
Parser\makeAnchor
static makeAnchor( $sectionName)
Definition: Parser.php:6057
StringUtils\explode
static explode( $separator, $subject)
Workalike for explode() with limited memory usage.
Definition: StringUtils.php:326
PPNode
There are three types of nodes:
Definition: PPNode.php:35
Parser\$factory
ParserFactory $factory
Definition: Parser.php:355
Parser\replaceLinkHoldersPrivate
replaceLinkHoldersPrivate(&$text, $options=0)
Replace "<!--LINK-->" link placeholders with actual links, in the buffer Placeholders created in Link...
Definition: Parser.php:4996
LinkHolderArray
Definition: LinkHolderArray.php:33
Parser\__clone
__clone()
Allow extensions to clean up when the parser is cloned.
Definition: Parser.php:545
NS_TEMPLATE
const NS_TEMPLATE
Definition: Defines.php:74
PPFrame\RECOVER_ORIG
const RECOVER_ORIG
Definition: PPFrame.php:36
Linker\makeHeadline
static makeHeadline( $level, $attribs, $anchor, $html, $link, $fallbackAnchor=false)
Create a headline for content.
Definition: Linker.php:1737
Parser\getHookContainer
getHookContainer()
Get a HookContainer capable of returning metadata about hooks or running extension hooks.
Definition: Parser.php:1645
Parser\callParserFunction
callParserFunction(PPFrame $frame, $function, array $args=[])
Call a parser function and return an array with text and flags.
Definition: Parser.php:3345
Parser\extensionSubstitution
extensionSubstitution(array $params, PPFrame $frame)
Return the text to be used for a given extension tag.
Definition: Parser.php:3918
Linker\tocLineEnd
static tocLineEnd()
End a Table Of Contents line.
Definition: Linker.php:1650
$args
if( $line===false) $args
Definition: mcc.php:124
MapCacheLRU
Handles a simple LRU key/value map with a maximum number of entries.
Definition: MapCacheLRU.php:36
Parser\$mLangLinkLanguages
array $mLangLinkLanguages
Array with the language name of each language link (i.e.
Definition: Parser.php:319
Parser\$trackingCategories
TrackingCategories $trackingCategories
Definition: Parser.php:403
Parser\markerSkipCallback
markerSkipCallback( $s, callable $callback)
Call a callback function on all regions of the given text that are not inside strip markers,...
Definition: Parser.php:6244
Parser\limitationWarn
limitationWarn( $limitationType, $current='', $max='')
Warn the user when a parser limitation is reached Will warn at most once the user per limitation type...
Definition: Parser.php:2940
MediaWiki\User\UserIdentity\getName
getName()
$title
$title
Definition: testCompression.php:38
Parser\recursiveTagParse
recursiveTagParse( $text, $frame=false)
Half-parse wikitext to half-parsed HTML.
Definition: Parser.php:843
Linker\makeExternalLink
static makeExternalLink( $url, $text, $escape=true, $linktype='', $attribs=[], $title=null)
Make an external link.
Definition: Linker.php:1025
Parser\finalizeHeadings
finalizeHeadings( $text, $origText, $isMain=true)
This function accomplishes several tasks: 1) Auto-number headings if that option is enabled 2) Add an...
Definition: Parser.php:4110
Title\makeTitle
static makeTitle( $ns, $title, $fragment='', $interwiki='')
Create a new Title from a namespace index and a DB key.
Definition: Title.php:648
Parser\$mHighestExpansionDepth
$mHighestExpansionDepth
Definition: Parser.php:260
SectionProfiler
Arbitrary section name based PHP profiling.
Definition: SectionProfiler.php:35
Parser\cleanSig
cleanSig( $text, $parsing=false)
Clean up signature text.
Definition: Parser.php:4706
RequestContext
Group all the pieces relevant to the context of a request into one instance.
Definition: RequestContext.php:41
Parser\$mImageParamsMagicArray
$mImageParamsMagicArray
Definition: Parser.php:198
SFH_OBJECT_ARGS
const SFH_OBJECT_ARGS
Definition: Defines.php:171
Parser\handleInternalLinks
handleInternalLinks( $text)
Process [[ ]] wikilinks.
Definition: Parser.php:2405
Parser\$mTplRedirCache
$mTplRedirCache
Definition: Parser.php:262
Parser\$tidy
TidyDriverBase $tidy
Definition: Parser.php:391
Parser\$mFirstCall
bool $mFirstCall
Whether firstCallInit still needs to be called.
Definition: Parser.php:205
Parser\modifyImageHtml
modifyImageHtml(File $file, array $params, string &$html)
Give hooks a chance to modify image thumbnail HTML.
Definition: Parser.php:5512
ParserOptions\getPreSaveTransform
getPreSaveTransform()
Transform wiki markup when saving the page?
Definition: ParserOptions.php:664
Parser\getStripState
getStripState()
Definition: Parser.php:1315
Parser\getContentLanguage
getContentLanguage()
Get the content language that this Parser is using.
Definition: Parser.php:1211
Parser\OT_PLAIN
const OT_PLAIN
Definition: Parser.php:129
$wgTitle
$wgTitle
Definition: Setup.php:879
Parser\handleMagicLinks
handleMagicLinks( $text)
Replace special strings like "ISBN xxx" and "RFC xxx" with magic external links.
Definition: Parser.php:1717
Linker\splitTrail
static splitTrail( $trail)
Split a link trail, return the "inside" portion and the remainder of the trail as a two-element array...
Definition: Linker.php:1758
MediaWiki\Tidy\TidyDriverBase
Base class for HTML cleanup utilities.
Definition: TidyDriverBase.php:8
Parser\insertStripItem
insertStripItem( $text)
Add an item to the strip state Returns the unique tag which must be inserted into the stripped text T...
Definition: Parser.php:1328
Parser\getFreshParser
getFreshParser()
Return this parser if it is not doing anything, otherwise get a fresh parser.
Definition: Parser.php:6365
Parser\getRevisionUser
getRevisionUser()
Get the name of the user that edited the last revision.
Definition: Parser.php:5969
Parser\setOptions
setOptions(ParserOptions $options)
Mutator for the ParserOptions object.
Definition: Parser.php:1098
Parser\getImageParams
getImageParams( $handler)
Definition: Parser.php:5189
Parser\$mAutonumber
$mAutonumber
Definition: Parser.php:233
Parser\fuzzTestPst
fuzzTestPst( $text, PageReference $page, ParserOptions $options)
Strip/replaceVariables/unstrip for preprocessor regression testing.
Definition: Parser.php:6207
Parser\replaceLinkHolders
replaceLinkHolders(&$text, $options=0)
Replace "<!--LINK-->" link placeholders with actual links, in the buffer Placeholders created in Link...
Definition: Parser.php:4985
Parser\addTrackingCategory
addTrackingCategory( $msg)
Definition: Parser.php:4089
Parser\getUrlProtocols
getUrlProtocols()
Definition: Parser.php:5612
Parser\incrementIncludeSize
incrementIncludeSize( $type, $size)
Increment an include size counter.
Definition: Parser.php:4010
Parser\getTargetLanguageConverter
getTargetLanguageConverter()
Shorthand for getting a Language Converter for Target language.
Definition: Parser.php:1621
ParserFactory
Definition: ParserFactory.php:37
$content
$content
Definition: router.php:76
CoreParserFunctions\register
static register( $parser)
Definition: CoreParserFunctions.php:38
Parser\makeFreeExternalLink
makeFreeExternalLink( $url, $numPostProto)
Make a free external link, given a user-supplied URL.
Definition: Parser.php:1831
Parser\CONSTRUCTOR_OPTIONS
const CONSTRUCTOR_OPTIONS
Definition: Parser.php:408
$s
foreach( $mmfl['setupFiles'] as $fileName) if( $queue) if(empty( $mmfl['quiet'])) $s
Definition: mergeMessageFileList.php:206
NS_MEDIA
const NS_MEDIA
Definition: Defines.php:52
PPFrame\expand
expand( $root, $flags=0)
Expand a document tree node.
ILanguageConverter
The shared interface for all language converters.
Definition: ILanguageConverter.php:29
Parser\$mOutput
ParserOutput $mOutput
Definition: Parser.php:232
Parser\$mFunctionHooks
$mFunctionHooks
Definition: Parser.php:193
ParserFactory\$inParserFactory
static int $inParserFactory
Track calls to Parser constructor to aid in deprecation of direct Parser invocation.
Definition: ParserFactory.php:91
Parser\$mOptions
ParserOptions null $mOptions
Definition: Parser.php:287
Parser\$mRevisionUser
$mRevisionUser
Definition: Parser.php:305
Message\plaintextParam
static plaintextParam( $plaintext)
Definition: Message.php:1248
Sanitizer\validateTagAttributes
static validateTagAttributes( $attribs, $element)
Take an array of attribute names and values and normalize or discard illegal values for the given ele...
Definition: Sanitizer.php:391
Parser\replaceTableOfContentsMarker
static replaceTableOfContentsMarker( $text, $toc)
Replace table of contents marker in parsed HTML.
Definition: Parser.php:4770
Parser\extractSections
extractSections( $text, $sectionId, $mode, $newText='')
Break wikitext input into sections, and either pull or replace some particular section's text.
Definition: Parser.php:5645
Hooks\runner
static runner()
Get a HookRunner instance for calling hooks using the new interfaces.
Definition: Hooks.php:173
Parser\OT_HTML
const OT_HTML
Definition: Parser.php:124
WANObjectCache
Multi-datacenter aware caching interface.
Definition: WANObjectCache.php:131
Parser\fuzzTestPreprocess
fuzzTestPreprocess( $text, PageReference $page, ParserOptions $options)
Strip/replaceVariables/unstrip for preprocessor regression testing.
Definition: Parser.php:6222
PPFrame
Definition: PPFrame.php:28
$line
$line
Definition: mcc.php:119
Parser\EXT_LINK_URL_CLASS
const EXT_LINK_URL_CLASS
Definition: Parser.php:103
MediaWiki\Linker\LinkTarget\getDBkey
getDBkey()
Get the main part with underscores.
Parser\getUserIdentity
getUserIdentity()
Get a user either from the user set on Parser if it's set, or from the ParserOptions object otherwise...
Definition: Parser.php:1166
Parser\renderImageGallery
renderImageGallery( $text, array $params)
Renders an image gallery from a text with one line per image.
Definition: Parser.php:5025
StringUtils\delimiterExplode
static delimiterExplode( $startDelim, $endDelim, $separator, $subject, $nested=false)
Explode a string, but ignore any instances of the separator inside the given start and end delimiters...
Definition: StringUtils.php:59
OutputPage\setupOOUI
static setupOOUI( $skinName='default', $dir='ltr')
Helper function to setup the PHP implementation of OOUI to use in this request.
Definition: OutputPage.php:4203
Parser\magicLinkCallback
magicLinkCallback(array $m)
Definition: Parser.php:1751
Parser\fetchTemplateAndTitle
fetchTemplateAndTitle(LinkTarget $link)
Fetch the unparsed text of a template and register a reference to it.
Definition: Parser.php:3549
wfEscapeWikiText
wfEscapeWikiText( $text)
Escapes the given text so that it may be output using addWikiText() without any linking,...
Definition: GlobalFunctions.php:1440
Parser\incrementExpensiveFunctionCount
incrementExpensiveFunctionCount()
Definition: Parser.php:4023
Parser\$mImageParams
$mImageParams
Definition: Parser.php:197
Parser\setFunctionHook
setFunctionHook( $id, callable $callback, $flags=0)
Create a function, e.g.
Definition: Parser.php:4936
Parser\setLinkID
setLinkID( $id)
Definition: Parser.php:1126
Sanitizer\cleanUrl
static cleanUrl( $url)
Definition: Sanitizer.php:1627
Parser\__construct
__construct(ServiceOptions $svcOptions, MagicWordFactory $magicWordFactory, Language $contLang, ParserFactory $factory, string $urlProtocols, SpecialPageFactory $spFactory, LinkRendererFactory $linkRendererFactory, NamespaceInfo $nsInfo, LoggerInterface $logger, BadFileLookup $badFileLookup, LanguageConverterFactory $languageConverterFactory, HookContainer $hookContainer, TidyDriverBase $tidy, WANObjectCache $wanCache, UserOptionsLookup $userOptionsLookup, UserFactory $userFactory, TitleFormatter $titleFormatter, HttpRequestFactory $httpRequestFactory, TrackingCategories $trackingCategories)
Constructing parsers directly is not allowed! Use a ParserFactory.
Definition: Parser.php:452
Parser\$magicWordFactory
MagicWordFactory $magicWordFactory
Definition: Parser.php:346
Parser\preprocessToDom
preprocessToDom( $text, $flags=0)
Get the document object model for the given wikitext.
Definition: Parser.php:2864
Parser\setUser
setUser(?UserIdentity $user)
Set the current user.
Definition: Parser.php:980
Parser
PHP Parser - Processes wiki markup (which uses a more user-friendly syntax, such as "[[link]]" for ma...
Definition: Parser.php:91
RequestContext\getMain
static getMain()
Get the RequestContext object associated with the main request.
Definition: RequestContext.php:484
MediaWiki\User\UserOptionsLookup
Provides access to user options.
Definition: UserOptionsLookup.php:29
Title\newFromLinkTarget
static newFromLinkTarget(LinkTarget $linkTarget, $forceClone='')
Returns a Title given a LinkTarget.
Definition: Title.php:289
Parser\getMagicWordFactory
getMagicWordFactory()
Get the MagicWordFactory that this Parser is using.
Definition: Parser.php:1201
Parser\argSubstitution
argSubstitution(array $piece, PPFrame $frame)
Triple brace replacement – used for template arguments.
Definition: Parser.php:3864
Linker\makeMediaLinkFile
static makeMediaLinkFile(LinkTarget $title, $file, $html='')
Create a direct link to a given uploaded file.
Definition: Linker.php:957
Sanitizer\fixTagAttributes
static fixTagAttributes( $text, $element, $sorted=false)
Take a tag soup fragment listing an HTML element's attributes and normalize it to well-formed XML,...
Definition: Sanitizer.php:703
Parser\fetchFileAndTitle
fetchFileAndTitle(LinkTarget $link, array $options=[])
Fetch a file and its title and register a reference to it.
Definition: Parser.php:3742
Preprocessor\resetParser
resetParser(?Parser $parser)
Allows resetting the internal Parser reference after Preprocessor is cloned.
Definition: Preprocessor.php:95
Parser\setOutputType
setOutputType( $ot)
Mutator for the output type.
Definition: Parser.php:1054
$lines
if(!file_exists( $CREDITS)) $lines
Definition: updateCredits.php:45
Parser\OT_PREPROCESS
const OT_PREPROCESS
Definition: Parser.php:126
Parser\getExternalLinkAttribs
getExternalLinkAttribs( $url)
Get an associative array of additional HTML attributes appropriate for a particular external link.
Definition: Parser.php:2234
TrackingCategories
This class performs some operations related to tracking categories, such as creating a list of all su...
Definition: TrackingCategories.php:32
Parser\isCurrentRevisionOfTitleCached
isCurrentRevisionOfTitleCached(LinkTarget $link)
Definition: Parser.php:3510
Parser\statelessFetchTemplate
static statelessFetchTemplate( $page, $parser=false)
Static function to get a template Can be overridden via ParserOptions::setTemplateCallback().
Definition: Parser.php:3595
Parser\getUserSig
getUserSig(UserIdentity $user, $nickname=false, $fancySig=null)
Fetch the user's signature text, if any, and normalize to validated, ready-to-insert wikitext.
Definition: Parser.php:4630
Parser\$mStripState
StripState $mStripState
Definition: Parser.php:238
Parser\internalParse
internalParse( $text, $isMain=true, $frame=false)
Helper function for parse() that transforms wiki markup into half-parsed HTML.
Definition: Parser.php:1551
Parser\validateSig
validateSig( $text)
Check that the user's signature contains no bad XML.
Definition: Parser.php:4691
Parser\$mPPNodeCount
$mPPNodeCount
Definition: Parser.php:253
Title
Represents a title within MediaWiki.
Definition: Title.php:47
Parser\resetOutput
resetOutput()
Reset the ParserOutput.
Definition: Parser.php:635
Parser\stripOuterParagraph
static stripOuterParagraph( $html)
Strip outer.
Definition: Parser.php:6346
Parser\$mVarCache
$mVarCache
Definition: Parser.php:196
Parser\$mDefaultSort
$mDefaultSort
Definition: Parser.php:261
Parser\$mExpensiveFunctionCount
$mExpensiveFunctionCount
Definition: Parser.php:268
Parser\normalizeLinkUrl
static normalizeLinkUrl( $url)
Replace unusual escape codes in a URL with their equivalent characters.
Definition: Parser.php:2265
Parser\$mExtLinkBracketedRegex
$mExtLinkBracketedRegex
Definition: Parser.php:220
wfMatchesDomainList
wfMatchesDomainList( $url, $domains)
Check whether a given URL has a domain that occurs in a given set of domains.
Definition: GlobalFunctions.php:860
Parser\$mIncludeSizes
$mIncludeSizes
Definition: Parser.php:251
$cache
$cache
Definition: mcc.php:33
Parser\$httpRequestFactory
HttpRequestFactory $httpRequestFactory
Definition: Parser.php:400
MalformedTitleException
MalformedTitleException is thrown when a TitleParser is unable to parse a title string.
Definition: MalformedTitleException.php:26
Parser\getSection
getSection( $text, $sectionId, $defaultText='')
This function returns the text of a section, specified by a number ($section).
Definition: Parser.php:5774
Parser\$mUser
UserIdentity $mUser
Definition: Parser.php:278
Xml\isWellFormedXmlFragment
static isWellFormedXmlFragment( $text)
Check if a string is a well-formed XML fragment.
Definition: Xml.php:745
ParserOptions\getUserIdentity
getUserIdentity()
Get the identity of the user for whom the parse is made.
Definition: ParserOptions.php:1010
Parser\preprocess
preprocess( $text, ?PageReference $page, ParserOptions $options, $revid=null, $frame=false)
Expand templates and variables in the text, producing valid, static wikitext.
Definition: Parser.php:911
Parser\$mRevisionTimestamp
$mRevisionTimestamp
Definition: Parser.php:303
Parser\replaceSection
replaceSection( $oldText, $sectionId, $newText)
This function returns $oldtext after the content of the section specified by $section has been replac...
Definition: Parser.php:5791
Sanitizer\ID_PRIMARY
const ID_PRIMARY
Tells escapeUrlForHtml() to encode the ID using the wiki's primary encoding.
Definition: Sanitizer.php:71
Parser\$logger
LoggerInterface $logger
Definition: Parser.php:379
TitleFormatter
A title formatter service for MediaWiki.
Definition: TitleFormatter.php:35
PPFrame\virtualBracketedImplode
virtualBracketedImplode( $start, $sep, $end,... $params)
Virtual implode with brackets.
Parser\armorLinks
armorLinks( $text)
Insert a NOPARSE hacky thing into any inline links in a chunk that's going to go through further pars...
Definition: Parser.php:2758
Linker\tocUnindent
static tocUnindent( $level)
Finish one or more sublevels on the Table of Contents.
Definition: Linker.php:1611
Linker\makeImageLink
static makeImageLink(Parser $parser, LinkTarget $title, $file, $frameParams=[], $handlerParams=[], $time=false, $query="", $widthOption=null)
Given parameters derived from [[Image:Foo|options...]], generate the HTML that that syntax inserts in...
Definition: Linker.php:297
getTitle
getTitle()
Definition: RevisionSearchResultTrait.php:81
Parser\parse
parse( $text, PageReference $page, ParserOptions $options, $linestart=true, $clearState=true, $revid=null)
Convert wikitext to HTML Do not call this function recursively.
Definition: Parser.php:658
MediaWiki\Cache\CacheKeyHelper
Helper class for mapping value objects representing basic entities to cache keys.
Definition: CacheKeyHelper.php:43
Parser\getBadFileLookup
getBadFileLookup()
Get the BadFileLookup instance that this Parser is using.
Definition: Parser.php:1221
NS_CATEGORY
const NS_CATEGORY
Definition: Defines.php:78
Parser\getOutput
getOutput()
Definition: Parser.php:1081
StringUtils\delimiterReplace
static delimiterReplace( $startDelim, $endDelim, $replace, $subject, $flags='')
Perform an operation equivalent to preg_replace() with flags.
Definition: StringUtils.php:248
Parser\handleInternalLinks2
handleInternalLinks2(&$s)
Process [[ ]] wikilinks (RIL)
Definition: Parser.php:2415
Parser\getOutputType
getOutputType()
Accessor for the output type.
Definition: Parser.php:1045
Parser\$mGeneratedPPNodeCount
$mGeneratedPPNodeCount
Definition: Parser.php:258
Parser\getHookRunner
getHookRunner()
Get a HookRunner for calling core hooks.
Definition: Parser.php:1657
PPFrame\getArgument
getArgument( $name)
Get an argument to this frame by name.
TextContent\normalizeLineEndings
static normalizeLineEndings( $text)
Do a "\\r\\n" -> "\\n" and "\\r" -> "\\n" transformation as well as trim trailing whitespace.
Definition: TextContent.php:203
Parser\getSectionNameFromStrippedText
static getSectionNameFromStrippedText( $text)
Definition: Parser.php:6050
Sanitizer\escapeIdForLink
static escapeIdForLink( $id)
Given a section name or other user-generated or otherwise unsafe string, escapes it to be a valid URL...
Definition: Sanitizer.php:838
Linker\normalizeSubpageLink
static normalizeSubpageLink( $contextTitle, $target, &$text)
Definition: Linker.php:1450
MediaWiki\Config\ServiceOptions\get
get( $key)
Definition: ServiceOptions.php:93
MediaWiki\HookContainer\HookContainer
HookContainer class.
Definition: HookContainer.php:45
Title\castFromPageReference
static castFromPageReference(?PageReference $pageReference)
Return a Title for a given Reference.
Definition: Title.php:339
Parser\SFH_NO_HASH
const SFH_NO_HASH
Definition: Parser.php:94
CoreMagicVariables\expand
static expand(Parser $parser, string $id, int $ts, NamespaceInfo $nsInfo, ServiceOptions $svcOptions, LoggerInterface $logger)
Expand the magic variable given by $index.
Definition: CoreMagicVariables.php:49
Parser\$mShowToc
$mShowToc
Definition: Parser.php:270
ImageGalleryBase\factory
static factory( $mode=false, IContextSource $context=null)
Get a new image gallery.
Definition: ImageGalleryBase.php:116
Sanitizer\decodeTagAttributes
static decodeTagAttributes( $text)
Return an associative array of attribute names and values from a partial tag string.
Definition: Sanitizer.php:1004
Parser\guessSectionNameFromStrippedText
static guessSectionNameFromStrippedText( $text)
Like guessSectionNameFromWikiText(), but takes already-stripped text as input.
Definition: Parser.php:6112
Parser\getTemplateDom
getTemplateDom(LinkTarget $title)
Get the semi-parsed DOM representation of a template with a given title, and its redirect destination...
Definition: Parser.php:3435
Parser\$languageConverterFactory
LanguageConverterFactory $languageConverterFactory
Definition: Parser.php:352
NamespaceInfo
This is a utility class for dealing with namespaces that encodes all the "magic" behaviors of them ba...
Definition: NamespaceInfo.php:35
PPFrame\isTemplate
isTemplate()
Return true if the frame is a template frame.
MediaWiki\HookContainer\HookRunner
This class provides an implementation of the core hook interfaces, forwarding hook calls to HookConta...
Definition: HookRunner.php:557
Parser\parseLinkParameter
parseLinkParameter( $value)
Parse the value of 'link' parameter in image syntax ([[File:Foo.jpg|link=<value>]]).
Definition: Parser.php:5480
$t
$t
Definition: testCompression.php:74
Title\castFromLinkTarget
static castFromLinkTarget( $linkTarget)
Same as newFromLinkTarget, but if passed null, returns null.
Definition: Title.php:313
Title\legalChars
static legalChars()
Get a regex character class describing the legal characters in a link.
Definition: Title.php:734
Sanitizer\decodeCharReferences
static decodeCharReferences( $text)
Decode any character references, numeric or named entities, in the text and return a UTF-8 string.
Definition: Sanitizer.php:1228
Parser\getRevisionTimestamp
getRevisionTimestamp()
Get the timestamp associated with the current revision, adjusted for the default server-local timesta...
Definition: Parser.php:5943
Parser\expandMagicVariable
expandMagicVariable( $index, $frame=false)
Return value of a magic variable (like PAGENAME)
Definition: Parser.php:2785
NS_FILE
const NS_FILE
Definition: Defines.php:70
MediaWiki\Linker\LinkTarget
Definition: LinkTarget.php:26
Parser\pstPass2
pstPass2( $text, UserIdentity $user)
Pre-save transform helper function.
Definition: Parser.php:4553
Parser\$mPreprocessor
Preprocessor $mPreprocessor
Definition: Parser.php:226
Parser\parseWidthParam
static parseWidthParam( $value, $parseHeight=true)
Parsed a width param of imagelink like 300px or 200x300px.
Definition: Parser.php:6289
RawMessage
Variant of the Message class.
Definition: RawMessage.php:35
Parser\cleanSigInSig
static cleanSigInSig( $text)
Strip 3, 4 or 5 tildes out of signatures.
Definition: Parser.php:4749
Parser\setTitle
setTitle(Title $t=null)
Set the context title.
Definition: Parser.php:991
User
The User object encapsulates all of the user-specific settings (user_id, name, rights,...
Definition: User.php:67
Parser\replaceLinkHoldersText
replaceLinkHoldersText( $text)
Replace "<!--LINK-->" link placeholders with plain text of links (not HTML-formatted).
Definition: Parser.php:5007
Parser\normalizeUrlComponent
static normalizeUrlComponent( $component, $unsafe)
Definition: Parser.php:2324
Parser\clearTagHooks
clearTagHooks()
Remove all tag hooks.
Definition: Parser.php:4887
MWTimestamp\getLocalInstance
static getLocalInstance( $ts=false)
Get a timestamp instance in the server local timezone ($wgLocaltimezone)
Definition: MWTimestamp.php:173
OT_WIKI
const OT_WIKI
Definition: Defines.php:158
MediaWiki\User\UserFactory
Creates User objects.
Definition: UserFactory.php:41
Linker\makeExternalImage
static makeExternalImage( $url, $alt='')
Return the code for images which were added via external links, via Parser::maybeMakeExternalImage().
Definition: Linker.php:241
Parser\$mTitle
Title null $mTitle
Since 1.34, leaving mTitle uninitialized or setting mTitle to null is deprecated.
Definition: Parser.php:296
Parser\getLinkRenderer
getLinkRenderer()
Get a LinkRenderer instance to make links with.
Definition: Parser.php:1186
Language
Internationalisation code See https://www.mediawiki.org/wiki/Special:MyLanguage/Localisation for more...
Definition: Language.php:42
Parser\$mRevisionId
$mRevisionId
Definition: Parser.php:301
RequestContext\setTitle
setTitle(Title $title=null)
Definition: RequestContext.php:173
Parser\setPage
setPage(?PageReference $t=null)
Set the page used as context for parsing, e.g.
Definition: Parser.php:1013
Parser\$mRevisionSize
$mRevisionSize
Definition: Parser.php:307
Parser\getRevisionId
getRevisionId()
Get the ID of the revision we are parsing.
Definition: Parser.php:5878
Parser\fetchCurrentRevisionRecordOfTitle
fetchCurrentRevisionRecordOfTitle(LinkTarget $link)
Fetch the current revision of a given title as a RevisionRecord.
Definition: Parser.php:3480
MediaWiki\Revision\SlotRecord
Value object representing a content slot associated with a page revision.
Definition: SlotRecord.php:40
OT_PLAIN
const OT_PLAIN
Definition: Defines.php:161
Parser\clearState
clearState()
Clear Parser state.
Definition: Parser.php:587
Parser\guessLegacySectionNameFromWikiText
guessLegacySectionNameFromWikiText( $text)
Same as guessSectionNameFromWikiText(), but produces legacy anchors instead, if possible.
Definition: Parser.php:6099
MediaWiki\Config\ServiceOptions\assertRequiredOptions
assertRequiredOptions(array $expectedKeys)
Assert that the list of options provided in this instance exactly match $expectedKeys,...
Definition: ServiceOptions.php:71
ParserOptions\newFromUser
static newFromUser( $user)
Get a ParserOptions object from a given user.
Definition: ParserOptions.php:1044
Parser\stripSectionName
stripSectionName( $text)
Strips a text string of wikitext for use in a section anchor.
Definition: Parser.php:6152
Parser\EXT_IMAGE_REGEX
const EXT_IMAGE_REGEX
Definition: Parser.php:110
Parser\getPreprocessor
getPreprocessor()
Get a preprocessor object.
Definition: Parser.php:1176
Parser\doBlockLevels
doBlockLevels( $text, $linestart)
Make lists from lines starting with ':', '*', '#', etc.
Definition: Parser.php:2772
Parser\$hookContainer
HookContainer $hookContainer
Definition: Parser.php:385
$type
$type
Definition: testCompression.php:52