MediaWiki  master
Parser.php
Go to the documentation of this file.
1 <?php
30 
71 class Parser {
77  const VERSION = '1.6.4';
78 
84 
85  # Flags for Parser::setFunctionHook
86  const SFH_NO_HASH = 1;
87  const SFH_OBJECT_ARGS = 2;
88 
89  # Constants needed for external link processing
90  # Everything except bracket, space, or control characters
91  # \p{Zs} is unicode 'separator, space' category. It covers the space 0x20
92  # as well as U+3000 is IDEOGRAPHIC SPACE for T21052
93  # \x{FFFD} is the Unicode replacement character, which Preprocessor_DOM
94  # uses to replace invalid HTML characters.
95  const EXT_LINK_URL_CLASS = '[^][<>"\\x00-\\x20\\x7F\p{Zs}\x{FFFD}]';
96  # Simplified expression to match an IPv4 or IPv6 address, or
97  # at least one character of a host name (embeds EXT_LINK_URL_CLASS)
98  const EXT_LINK_ADDR = '(?:[0-9.]+|\\[(?i:[0-9a-f:.]+)\\]|[^][<>"\\x00-\\x20\\x7F\p{Zs}\x{FFFD}])';
99  # RegExp to make image URLs (embeds IPv6 part of EXT_LINK_ADDR)
100  // phpcs:ignore Generic.Files.LineLength
101  const EXT_IMAGE_REGEX = '/^(http:\/\/|https:\/\/)((?:\\[(?i:[0-9a-f:.]+)\\])?[^][<>"\\x00-\\x20\\x7F\p{Zs}\x{FFFD}]+)
102  \\/([A-Za-z0-9_.,~%\\-+&;#*?!=()@\\x80-\\xFF]+)\\.((?i)gif|png|jpg|jpeg)$/Sxu';
103 
104  # Regular expression for a non-newline space
105  const SPACE_NOT_NL = '(?:\t|&nbsp;|&\#0*160;|&\#[Xx]0*[Aa]0;|\p{Zs})';
106 
107  # Flags for preprocessToDom
108  const PTD_FOR_INCLUSION = 1;
109 
110  # Allowed values for $this->mOutputType
111  # Parameter to startExternalParse().
112  const OT_HTML = 1; # like parse()
113  const OT_WIKI = 2; # like preSaveTransform()
115  const OT_MSG = 3;
116  const OT_PLAIN = 4; # like extractSections() - portions of the original are returned unchanged.
117 
135  const MARKER_SUFFIX = "-QINU`\"'\x7f";
136  const MARKER_PREFIX = "\x7f'\"`UNIQ-";
137 
138  # Markers used for wrapping the table of contents
139  const TOC_START = '<mw:toc>';
140  const TOC_END = '</mw:toc>';
141 
143  const MAX_TTS = 900;
144 
145  # Persistent:
146  public $mTagHooks = [];
148  public $mFunctionHooks = [];
149  public $mFunctionSynonyms = [ 0 => [], 1 => [] ];
150  public $mFunctionTagHooks = [];
151  public $mStripList = [];
152  public $mDefaultStripList = [];
153  public $mVarCache = [];
154  public $mImageParams = [];
156  public $mMarkerIndex = 0;
160  public $mFirstCall = true;
161 
162  # Initialised by initialiseVariables()
163 
167  public $mVariables;
168 
172  public $mSubstWords;
173 
178  public $mConf;
179 
180  # Initialised in constructor
182 
183  # Initialized in getPreprocessor()
184 
186 
187  # Cleared with clearState():
188 
191  public $mOutput;
192  public $mAutonumber;
193 
197  public $mStripState;
198 
204 
205  public $mLinkID;
209  public $mExpensiveFunctionCount; # number of expensive parser function calls
211 
215  public $mUser; # User object; only used when doing pre-save transform
216 
217  # Temporary
218  # These are variables reset at least once per parse regardless of $clearState
219 
223  public $mOptions;
224 
228  public $mTitle; # Title context, used for self-link rendering and similar things
229  public $mOutputType; # Output type, one of the OT_xxx constants
230  public $ot; # Shortcut alias, see setOutputType()
231  public $mRevisionObject; # The revision object of the specified revision ID
232  public $mRevisionId; # ID to display in {{REVISIONID}} tags
233  public $mRevisionTimestamp; # The timestamp of the specified revision ID
234  public $mRevisionUser; # User to display in {{REVISIONUSER}} tag
235  public $mRevisionSize; # Size to display in {{REVISIONSIZE}} variable
236  public $mRevIdForTs; # The revision ID which was used to fetch the timestamp
237  public $mInputSize = false; # For {{PAGESIZE}} on current page.
238 
245 
253 
258  public $mInParse = false;
259 
261  protected $mProfiler;
262 
266  protected $mLinkRenderer;
267 
270 
272  private $contLang;
273 
275  private $factory;
276 
279 
287  private $svcOptions;
288 
291 
293  private $nsInfo;
294 
301  public static $constructorOptions = [
302  // See $wgParserConf documentation
303  'class',
304  'preprocessorClass',
305  // See documentation for the corresponding config options
306  'ArticlePath',
307  'EnableScaryTranscluding',
308  'ExtraInterlanguageLinkPrefixes',
309  'FragmentMode',
310  'LanguageCode',
311  'MaxSigChars',
312  'MaxTocLevel',
313  'MiserMode',
314  'ScriptPath',
315  'Server',
316  'ServerName',
317  'ShowHostnames',
318  'Sitename',
319  'StylePath',
320  'TranscludeCacheExpiry',
321  ];
322 
335  public function __construct(
337  Language $contLang = null, ParserFactory $factory = null, $urlProtocols = null,
339  ) {
340  $services = MediaWikiServices::getInstance();
341  if ( !$svcOptions || is_array( $svcOptions ) ) {
342  // Pre-1.34 calling convention is the first parameter is just ParserConf, the seventh is
343  // Config, and the eighth is LinkRendererFactory.
344  $this->mConf = (array)$svcOptions;
345  if ( empty( $this->mConf['class'] ) ) {
346  $this->mConf['class'] = self::class;
347  }
348  if ( empty( $this->mConf['preprocessorClass'] ) ) {
349  $this->mConf['preprocessorClass'] = self::getDefaultPreprocessorClass();
350  }
351  $this->svcOptions = new ServiceOptions( self::$constructorOptions,
352  $this->mConf,
353  func_num_args() > 6 ? func_get_arg( 6 ) : $services->getMainConfig()
354  );
355  $linkRendererFactory = func_num_args() > 7 ? func_get_arg( 7 ) : null;
356  $nsInfo = func_num_args() > 8 ? func_get_arg( 8 ) : null;
357  } else {
358  // New calling convention
359  $svcOptions->assertRequiredOptions( self::$constructorOptions );
360  // $this->mConf is public, so we'll keep those two options there as well for
361  // compatibility until it's removed
362  $this->mConf = [
363  'class' => $svcOptions->get( 'class' ),
364  'preprocessorClass' => $svcOptions->get( 'preprocessorClass' ),
365  ];
366  $this->svcOptions = $svcOptions;
367  }
368 
369  $this->mUrlProtocols = $urlProtocols ?? wfUrlProtocols();
370  $this->mExtLinkBracketedRegex = '/\[(((?i)' . $this->mUrlProtocols . ')' .
371  self::EXT_LINK_ADDR .
372  self::EXT_LINK_URL_CLASS . '*)\p{Zs}*([^\]\\x00-\\x08\\x0a-\\x1F\\x{FFFD}]*?)\]/Su';
373 
374  $this->magicWordFactory = $magicWordFactory ??
375  $services->getMagicWordFactory();
376 
377  $this->contLang = $contLang ?? $services->getContentLanguage();
378 
379  $this->factory = $factory ?? $services->getParserFactory();
380  $this->specialPageFactory = $spFactory ?? $services->getSpecialPageFactory();
381  $this->linkRendererFactory = $linkRendererFactory ?? $services->getLinkRendererFactory();
382  $this->nsInfo = $nsInfo ?? $services->getNamespaceInfo();
383  }
384 
388  public function __destruct() {
389  if ( isset( $this->mLinkHolders ) ) {
390  unset( $this->mLinkHolders );
391  }
392  foreach ( $this as $name => $value ) {
393  unset( $this->$name );
394  }
395  }
396 
400  public function __clone() {
401  $this->mInParse = false;
402 
403  // T58226: When you create a reference "to" an object field, that
404  // makes the object field itself be a reference too (until the other
405  // reference goes out of scope). When cloning, any field that's a
406  // reference is copied as a reference in the new object. Both of these
407  // are defined PHP5 behaviors, as inconvenient as it is for us when old
408  // hooks from PHP4 days are passing fields by reference.
409  foreach ( [ 'mStripState', 'mVarCache' ] as $k ) {
410  // Make a non-reference copy of the field, then rebind the field to
411  // reference the new copy.
412  $tmp = $this->$k;
413  $this->$k =& $tmp;
414  unset( $tmp );
415  }
416 
417  Hooks::run( 'ParserCloned', [ $this ] );
418  }
419 
427  public static function getDefaultPreprocessorClass() {
429  }
430 
434  public function firstCallInit() {
435  if ( !$this->mFirstCall ) {
436  return;
437  }
438  $this->mFirstCall = false;
439 
441  CoreTagHooks::register( $this );
442  $this->initialiseVariables();
443 
444  // Avoid PHP 7.1 warning from passing $this by reference
445  $parser = $this;
446  Hooks::run( 'ParserFirstCallInit', [ &$parser ] );
447  }
448 
454  public function clearState() {
455  $this->firstCallInit();
456  $this->mOutput = new ParserOutput;
457  $this->mOptions->registerWatcher( [ $this->mOutput, 'recordOption' ] );
458  $this->mAutonumber = 0;
459  $this->mIncludeCount = [];
460  $this->mLinkHolders = new LinkHolderArray( $this );
461  $this->mLinkID = 0;
462  $this->mRevisionObject = $this->mRevisionTimestamp =
463  $this->mRevisionId = $this->mRevisionUser = $this->mRevisionSize = null;
464  $this->mVarCache = [];
465  $this->mUser = null;
466  $this->mLangLinkLanguages = [];
467  $this->currentRevisionCache = null;
468 
469  $this->mStripState = new StripState( $this );
470 
471  # Clear these on every parse, T6549
472  $this->mTplRedirCache = $this->mTplDomCache = [];
473 
474  $this->mShowToc = true;
475  $this->mForceTocPosition = false;
476  $this->mIncludeSizes = [
477  'post-expand' => 0,
478  'arg' => 0,
479  ];
480  $this->mPPNodeCount = 0;
481  $this->mGeneratedPPNodeCount = 0;
482  $this->mHighestExpansionDepth = 0;
483  $this->mDefaultSort = false;
484  $this->mHeadings = [];
485  $this->mDoubleUnderscores = [];
486  $this->mExpensiveFunctionCount = 0;
487 
488  # Fix cloning
489  if ( isset( $this->mPreprocessor ) && $this->mPreprocessor->parser !== $this ) {
490  $this->mPreprocessor = null;
491  }
492 
493  $this->mProfiler = new SectionProfiler();
494 
495  // Avoid PHP 7.1 warning from passing $this by reference
496  $parser = $this;
497  Hooks::run( 'ParserClearState', [ &$parser ] );
498  }
499 
514  public function parse(
516  $linestart = true, $clearState = true, $revid = null
517  ) {
518  if ( $clearState ) {
519  // We use U+007F DELETE to construct strip markers, so we have to make
520  // sure that this character does not occur in the input text.
521  $text = strtr( $text, "\x7f", "?" );
522  $magicScopeVariable = $this->lock();
523  }
524  // Strip U+0000 NULL (T159174)
525  $text = str_replace( "\000", '', $text );
526 
527  $this->startParse( $title, $options, self::OT_HTML, $clearState );
528 
529  $this->currentRevisionCache = null;
530  $this->mInputSize = strlen( $text );
531  if ( $this->mOptions->getEnableLimitReport() ) {
532  $this->mOutput->resetParseStartTime();
533  }
534 
535  $oldRevisionId = $this->mRevisionId;
536  $oldRevisionObject = $this->mRevisionObject;
537  $oldRevisionTimestamp = $this->mRevisionTimestamp;
538  $oldRevisionUser = $this->mRevisionUser;
539  $oldRevisionSize = $this->mRevisionSize;
540  if ( $revid !== null ) {
541  $this->mRevisionId = $revid;
542  $this->mRevisionObject = null;
543  $this->mRevisionTimestamp = null;
544  $this->mRevisionUser = null;
545  $this->mRevisionSize = null;
546  }
547 
548  // Avoid PHP 7.1 warning from passing $this by reference
549  $parser = $this;
550  Hooks::run( 'ParserBeforeStrip', [ &$parser, &$text, &$this->mStripState ] );
551  # No more strip!
552  Hooks::run( 'ParserAfterStrip', [ &$parser, &$text, &$this->mStripState ] );
553  $text = $this->internalParse( $text );
554  Hooks::run( 'ParserAfterParse', [ &$parser, &$text, &$this->mStripState ] );
555 
556  $text = $this->internalParseHalfParsed( $text, true, $linestart );
557 
565  if ( !( $options->getDisableTitleConversion()
566  || isset( $this->mDoubleUnderscores['nocontentconvert'] )
567  || isset( $this->mDoubleUnderscores['notitleconvert'] )
568  || $this->mOutput->getDisplayTitle() !== false )
569  ) {
570  $convruletitle = $this->getTargetLanguage()->getConvRuleTitle();
571  if ( $convruletitle ) {
572  $this->mOutput->setTitleText( $convruletitle );
573  } else {
574  $titleText = $this->getTargetLanguage()->convertTitle( $title );
575  $this->mOutput->setTitleText( $titleText );
576  }
577  }
578 
579  # Compute runtime adaptive expiry if set
580  $this->mOutput->finalizeAdaptiveCacheExpiry();
581 
582  # Warn if too many heavyweight parser functions were used
583  if ( $this->mExpensiveFunctionCount > $this->mOptions->getExpensiveParserFunctionLimit() ) {
584  $this->limitationWarn( 'expensive-parserfunction',
585  $this->mExpensiveFunctionCount,
586  $this->mOptions->getExpensiveParserFunctionLimit()
587  );
588  }
589 
590  # Information on limits, for the benefit of users who try to skirt them
591  if ( $this->mOptions->getEnableLimitReport() ) {
592  $text .= $this->makeLimitReport();
593  }
594 
595  # Wrap non-interface parser output in a <div> so it can be targeted
596  # with CSS (T37247)
597  $class = $this->mOptions->getWrapOutputClass();
598  if ( $class !== false && !$this->mOptions->getInterfaceMessage() ) {
599  $this->mOutput->addWrapperDivClass( $class );
600  }
601 
602  $this->mOutput->setText( $text );
603 
604  $this->mRevisionId = $oldRevisionId;
605  $this->mRevisionObject = $oldRevisionObject;
606  $this->mRevisionTimestamp = $oldRevisionTimestamp;
607  $this->mRevisionUser = $oldRevisionUser;
608  $this->mRevisionSize = $oldRevisionSize;
609  $this->mInputSize = false;
610  $this->currentRevisionCache = null;
611 
612  return $this->mOutput;
613  }
614 
621  protected function makeLimitReport() {
622  $maxIncludeSize = $this->mOptions->getMaxIncludeSize();
623 
624  $cpuTime = $this->mOutput->getTimeSinceStart( 'cpu' );
625  if ( $cpuTime !== null ) {
626  $this->mOutput->setLimitReportData( 'limitreport-cputime',
627  sprintf( "%.3f", $cpuTime )
628  );
629  }
630 
631  $wallTime = $this->mOutput->getTimeSinceStart( 'wall' );
632  $this->mOutput->setLimitReportData( 'limitreport-walltime',
633  sprintf( "%.3f", $wallTime )
634  );
635 
636  $this->mOutput->setLimitReportData( 'limitreport-ppvisitednodes',
637  [ $this->mPPNodeCount, $this->mOptions->getMaxPPNodeCount() ]
638  );
639  $this->mOutput->setLimitReportData( 'limitreport-ppgeneratednodes',
640  [ $this->mGeneratedPPNodeCount, $this->mOptions->getMaxGeneratedPPNodeCount() ]
641  );
642  $this->mOutput->setLimitReportData( 'limitreport-postexpandincludesize',
643  [ $this->mIncludeSizes['post-expand'], $maxIncludeSize ]
644  );
645  $this->mOutput->setLimitReportData( 'limitreport-templateargumentsize',
646  [ $this->mIncludeSizes['arg'], $maxIncludeSize ]
647  );
648  $this->mOutput->setLimitReportData( 'limitreport-expansiondepth',
649  [ $this->mHighestExpansionDepth, $this->mOptions->getMaxPPExpandDepth() ]
650  );
651  $this->mOutput->setLimitReportData( 'limitreport-expensivefunctioncount',
652  [ $this->mExpensiveFunctionCount, $this->mOptions->getExpensiveParserFunctionLimit() ]
653  );
654 
655  foreach ( $this->mStripState->getLimitReport() as list( $key, $value ) ) {
656  $this->mOutput->setLimitReportData( $key, $value );
657  }
658 
659  Hooks::run( 'ParserLimitReportPrepare', [ $this, $this->mOutput ] );
660 
661  $limitReport = "NewPP limit report\n";
662  if ( $this->svcOptions->get( 'ShowHostnames' ) ) {
663  $limitReport .= 'Parsed by ' . wfHostname() . "\n";
664  }
665  $limitReport .= 'Cached time: ' . $this->mOutput->getCacheTime() . "\n";
666  $limitReport .= 'Cache expiry: ' . $this->mOutput->getCacheExpiry() . "\n";
667  $limitReport .= 'Dynamic content: ' .
668  ( $this->mOutput->hasDynamicContent() ? 'true' : 'false' ) .
669  "\n";
670  $limitReport .= 'Complications: [' . implode( ', ', $this->mOutput->getAllFlags() ) . "]\n";
671 
672  foreach ( $this->mOutput->getLimitReportData() as $key => $value ) {
673  if ( Hooks::run( 'ParserLimitReportFormat',
674  [ $key, &$value, &$limitReport, false, false ]
675  ) ) {
676  $keyMsg = wfMessage( $key )->inLanguage( 'en' )->useDatabase( false );
677  $valueMsg = wfMessage( [ "$key-value-text", "$key-value" ] )
678  ->inLanguage( 'en' )->useDatabase( false );
679  if ( !$valueMsg->exists() ) {
680  $valueMsg = new RawMessage( '$1' );
681  }
682  if ( !$keyMsg->isDisabled() && !$valueMsg->isDisabled() ) {
683  $valueMsg->params( $value );
684  $limitReport .= "{$keyMsg->text()}: {$valueMsg->text()}\n";
685  }
686  }
687  }
688  // Since we're not really outputting HTML, decode the entities and
689  // then re-encode the things that need hiding inside HTML comments.
690  $limitReport = htmlspecialchars_decode( $limitReport );
691 
692  // Sanitize for comment. Note '‐' in the replacement is U+2010,
693  // which looks much like the problematic '-'.
694  $limitReport = str_replace( [ '-', '&' ], [ '‐', '&amp;' ], $limitReport );
695  $text = "\n<!-- \n$limitReport-->\n";
696 
697  // Add on template profiling data in human/machine readable way
698  $dataByFunc = $this->mProfiler->getFunctionStats();
699  uasort( $dataByFunc, function ( $a, $b ) {
700  return $b['real'] <=> $a['real']; // descending order
701  } );
702  $profileReport = [];
703  foreach ( array_slice( $dataByFunc, 0, 10 ) as $item ) {
704  $profileReport[] = sprintf( "%6.2f%% %8.3f %6d %s",
705  $item['%real'], $item['real'], $item['calls'],
706  htmlspecialchars( $item['name'] ) );
707  }
708  $text .= "<!--\nTransclusion expansion time report (%,ms,calls,template)\n";
709  $text .= implode( "\n", $profileReport ) . "\n-->\n";
710 
711  $this->mOutput->setLimitReportData( 'limitreport-timingprofile', $profileReport );
712 
713  // Add other cache related metadata
714  if ( $this->svcOptions->get( 'ShowHostnames' ) ) {
715  $this->mOutput->setLimitReportData( 'cachereport-origin', wfHostname() );
716  }
717  $this->mOutput->setLimitReportData( 'cachereport-timestamp',
718  $this->mOutput->getCacheTime() );
719  $this->mOutput->setLimitReportData( 'cachereport-ttl',
720  $this->mOutput->getCacheExpiry() );
721  $this->mOutput->setLimitReportData( 'cachereport-transientcontent',
722  $this->mOutput->hasDynamicContent() );
723 
724  if ( $this->mGeneratedPPNodeCount > $this->mOptions->getMaxGeneratedPPNodeCount() / 10 ) {
725  wfDebugLog( 'generated-pp-node-count', $this->mGeneratedPPNodeCount . ' ' .
726  $this->mTitle->getPrefixedDBkey() );
727  }
728  return $text;
729  }
730 
755  public function recursiveTagParse( $text, $frame = false ) {
756  // Avoid PHP 7.1 warning from passing $this by reference
757  $parser = $this;
758  Hooks::run( 'ParserBeforeStrip', [ &$parser, &$text, &$this->mStripState ] );
759  Hooks::run( 'ParserAfterStrip', [ &$parser, &$text, &$this->mStripState ] );
760  $text = $this->internalParse( $text, false, $frame );
761  return $text;
762  }
763 
783  public function recursiveTagParseFully( $text, $frame = false ) {
784  $text = $this->recursiveTagParse( $text, $frame );
785  $text = $this->internalParseHalfParsed( $text, false );
786  return $text;
787  }
788 
800  public function preprocess( $text, Title $title = null,
801  ParserOptions $options, $revid = null, $frame = false
802  ) {
803  $magicScopeVariable = $this->lock();
804  $this->startParse( $title, $options, self::OT_PREPROCESS, true );
805  if ( $revid !== null ) {
806  $this->mRevisionId = $revid;
807  }
808  // Avoid PHP 7.1 warning from passing $this by reference
809  $parser = $this;
810  Hooks::run( 'ParserBeforeStrip', [ &$parser, &$text, &$this->mStripState ] );
811  Hooks::run( 'ParserAfterStrip', [ &$parser, &$text, &$this->mStripState ] );
812  $text = $this->replaceVariables( $text, $frame );
813  $text = $this->mStripState->unstripBoth( $text );
814  return $text;
815  }
816 
826  public function recursivePreprocess( $text, $frame = false ) {
827  $text = $this->replaceVariables( $text, $frame );
828  $text = $this->mStripState->unstripBoth( $text );
829  return $text;
830  }
831 
845  public function getPreloadText( $text, Title $title, ParserOptions $options, $params = [] ) {
846  $msg = new RawMessage( $text );
847  $text = $msg->params( $params )->plain();
848 
849  # Parser (re)initialisation
850  $magicScopeVariable = $this->lock();
851  $this->startParse( $title, $options, self::OT_PLAIN, true );
852 
854  $dom = $this->preprocessToDom( $text, self::PTD_FOR_INCLUSION );
855  $text = $this->getPreprocessor()->newFrame()->expand( $dom, $flags );
856  $text = $this->mStripState->unstripBoth( $text );
857  return $text;
858  }
859 
866  public function setUser( $user ) {
867  $this->mUser = $user;
868  }
869 
875  public function setTitle( $t ) {
876  if ( !$t ) {
877  $t = Title::newFromText( 'NO TITLE' );
878  }
879 
880  if ( $t->hasFragment() ) {
881  # Strip the fragment to avoid various odd effects
882  $this->mTitle = $t->createFragmentTarget( '' );
883  } else {
884  $this->mTitle = $t;
885  }
886  }
887 
893  public function getTitle() {
894  return $this->mTitle;
895  }
896 
903  public function Title( $x = null ) {
904  return wfSetVar( $this->mTitle, $x );
905  }
906 
912  public function setOutputType( $ot ) {
913  $this->mOutputType = $ot;
914  # Shortcut alias
915  $this->ot = [
916  'html' => $ot == self::OT_HTML,
917  'wiki' => $ot == self::OT_WIKI,
918  'pre' => $ot == self::OT_PREPROCESS,
919  'plain' => $ot == self::OT_PLAIN,
920  ];
921  }
922 
929  public function OutputType( $x = null ) {
930  return wfSetVar( $this->mOutputType, $x );
931  }
932 
938  public function getOutput() {
939  return $this->mOutput;
940  }
941 
947  public function getOptions() {
948  return $this->mOptions;
949  }
950 
957  public function Options( $x = null ) {
958  return wfSetVar( $this->mOptions, $x );
959  }
960 
964  public function nextLinkID() {
965  return $this->mLinkID++;
966  }
967 
971  public function setLinkID( $id ) {
972  $this->mLinkID = $id;
973  }
974 
979  public function getFunctionLang() {
980  return $this->getTargetLanguage();
981  }
982 
992  public function getTargetLanguage() {
993  $target = $this->mOptions->getTargetLanguage();
994 
995  if ( $target !== null ) {
996  return $target;
997  } elseif ( $this->mOptions->getInterfaceMessage() ) {
998  return $this->mOptions->getUserLangObj();
999  } elseif ( is_null( $this->mTitle ) ) {
1000  throw new MWException( __METHOD__ . ': $this->mTitle is null' );
1001  }
1002 
1003  return $this->mTitle->getPageLanguage();
1004  }
1005 
1011  public function getConverterLanguage() {
1012  return $this->getTargetLanguage();
1013  }
1014 
1021  public function getUser() {
1022  if ( !is_null( $this->mUser ) ) {
1023  return $this->mUser;
1024  }
1025  return $this->mOptions->getUser();
1026  }
1027 
1033  public function getPreprocessor() {
1034  if ( !isset( $this->mPreprocessor ) ) {
1035  $class = $this->svcOptions->get( 'preprocessorClass' );
1036  $this->mPreprocessor = new $class( $this );
1037  }
1038  return $this->mPreprocessor;
1039  }
1040 
1047  public function getLinkRenderer() {
1048  // XXX We make the LinkRenderer with current options and then cache it forever
1049  if ( !$this->mLinkRenderer ) {
1050  $this->mLinkRenderer = $this->linkRendererFactory->create();
1051  $this->mLinkRenderer->setStubThreshold(
1052  $this->getOptions()->getStubThreshold()
1053  );
1054  }
1055 
1056  return $this->mLinkRenderer;
1057  }
1058 
1065  public function getMagicWordFactory() {
1066  return $this->magicWordFactory;
1067  }
1068 
1075  public function getContentLanguage() {
1076  return $this->contLang;
1077  }
1078 
1098  public static function extractTagsAndParams( $elements, $text, &$matches ) {
1099  static $n = 1;
1100  $stripped = '';
1101  $matches = [];
1102 
1103  $taglist = implode( '|', $elements );
1104  $start = "/<($taglist)(\\s+[^>]*?|\\s*?)(\/?>)|<(!--)/i";
1105 
1106  while ( $text != '' ) {
1107  $p = preg_split( $start, $text, 2, PREG_SPLIT_DELIM_CAPTURE );
1108  $stripped .= $p[0];
1109  if ( count( $p ) < 5 ) {
1110  break;
1111  }
1112  if ( count( $p ) > 5 ) {
1113  # comment
1114  $element = $p[4];
1115  $attributes = '';
1116  $close = '';
1117  $inside = $p[5];
1118  } else {
1119  # tag
1120  list( , $element, $attributes, $close, $inside ) = $p;
1121  }
1122 
1123  $marker = self::MARKER_PREFIX . "-$element-" . sprintf( '%08X', $n++ ) . self::MARKER_SUFFIX;
1124  $stripped .= $marker;
1125 
1126  if ( $close === '/>' ) {
1127  # Empty element tag, <tag />
1128  $content = null;
1129  $text = $inside;
1130  $tail = null;
1131  } else {
1132  if ( $element === '!--' ) {
1133  $end = '/(-->)/';
1134  } else {
1135  $end = "/(<\\/$element\\s*>)/i";
1136  }
1137  $q = preg_split( $end, $inside, 2, PREG_SPLIT_DELIM_CAPTURE );
1138  $content = $q[0];
1139  if ( count( $q ) < 3 ) {
1140  # No end tag -- let it run out to the end of the text.
1141  $tail = '';
1142  $text = '';
1143  } else {
1144  list( , $tail, $text ) = $q;
1145  }
1146  }
1147 
1148  $matches[$marker] = [ $element,
1149  $content,
1150  Sanitizer::decodeTagAttributes( $attributes ),
1151  "<$element$attributes$close$content$tail" ];
1152  }
1153  return $stripped;
1154  }
1155 
1161  public function getStripList() {
1162  return $this->mStripList;
1163  }
1164 
1174  public function insertStripItem( $text ) {
1175  $marker = self::MARKER_PREFIX . "-item-{$this->mMarkerIndex}-" . self::MARKER_SUFFIX;
1176  $this->mMarkerIndex++;
1177  $this->mStripState->addGeneral( $marker, $text );
1178  return $marker;
1179  }
1180 
1188  public function doTableStuff( $text ) {
1189  $lines = StringUtils::explode( "\n", $text );
1190  $out = '';
1191  $td_history = []; # Is currently a td tag open?
1192  $last_tag_history = []; # Save history of last lag activated (td, th or caption)
1193  $tr_history = []; # Is currently a tr tag open?
1194  $tr_attributes = []; # history of tr attributes
1195  $has_opened_tr = []; # Did this table open a <tr> element?
1196  $indent_level = 0; # indent level of the table
1197 
1198  foreach ( $lines as $outLine ) {
1199  $line = trim( $outLine );
1200 
1201  if ( $line === '' ) { # empty line, go to next line
1202  $out .= $outLine . "\n";
1203  continue;
1204  }
1205 
1206  $first_character = $line[0];
1207  $first_two = substr( $line, 0, 2 );
1208  $matches = [];
1209 
1210  if ( preg_match( '/^(:*)\s*\{\|(.*)$/', $line, $matches ) ) {
1211  # First check if we are starting a new table
1212  $indent_level = strlen( $matches[1] );
1213 
1214  $attributes = $this->mStripState->unstripBoth( $matches[2] );
1215  $attributes = Sanitizer::fixTagAttributes( $attributes, 'table' );
1216 
1217  $outLine = str_repeat( '<dl><dd>', $indent_level ) . "<table{$attributes}>";
1218  array_push( $td_history, false );
1219  array_push( $last_tag_history, '' );
1220  array_push( $tr_history, false );
1221  array_push( $tr_attributes, '' );
1222  array_push( $has_opened_tr, false );
1223  } elseif ( count( $td_history ) == 0 ) {
1224  # Don't do any of the following
1225  $out .= $outLine . "\n";
1226  continue;
1227  } elseif ( $first_two === '|}' ) {
1228  # We are ending a table
1229  $line = '</table>' . substr( $line, 2 );
1230  $last_tag = array_pop( $last_tag_history );
1231 
1232  if ( !array_pop( $has_opened_tr ) ) {
1233  $line = "<tr><td></td></tr>{$line}";
1234  }
1235 
1236  if ( array_pop( $tr_history ) ) {
1237  $line = "</tr>{$line}";
1238  }
1239 
1240  if ( array_pop( $td_history ) ) {
1241  $line = "</{$last_tag}>{$line}";
1242  }
1243  array_pop( $tr_attributes );
1244  if ( $indent_level > 0 ) {
1245  $outLine = rtrim( $line ) . str_repeat( '</dd></dl>', $indent_level );
1246  } else {
1247  $outLine = $line;
1248  }
1249  } elseif ( $first_two === '|-' ) {
1250  # Now we have a table row
1251  $line = preg_replace( '#^\|-+#', '', $line );
1252 
1253  # Whats after the tag is now only attributes
1254  $attributes = $this->mStripState->unstripBoth( $line );
1255  $attributes = Sanitizer::fixTagAttributes( $attributes, 'tr' );
1256  array_pop( $tr_attributes );
1257  array_push( $tr_attributes, $attributes );
1258 
1259  $line = '';
1260  $last_tag = array_pop( $last_tag_history );
1261  array_pop( $has_opened_tr );
1262  array_push( $has_opened_tr, true );
1263 
1264  if ( array_pop( $tr_history ) ) {
1265  $line = '</tr>';
1266  }
1267 
1268  if ( array_pop( $td_history ) ) {
1269  $line = "</{$last_tag}>{$line}";
1270  }
1271 
1272  $outLine = $line;
1273  array_push( $tr_history, false );
1274  array_push( $td_history, false );
1275  array_push( $last_tag_history, '' );
1276  } elseif ( $first_character === '|'
1277  || $first_character === '!'
1278  || $first_two === '|+'
1279  ) {
1280  # This might be cell elements, td, th or captions
1281  if ( $first_two === '|+' ) {
1282  $first_character = '+';
1283  $line = substr( $line, 2 );
1284  } else {
1285  $line = substr( $line, 1 );
1286  }
1287 
1288  // Implies both are valid for table headings.
1289  if ( $first_character === '!' ) {
1290  $line = StringUtils::replaceMarkup( '!!', '||', $line );
1291  }
1292 
1293  # Split up multiple cells on the same line.
1294  # FIXME : This can result in improper nesting of tags processed
1295  # by earlier parser steps.
1296  $cells = explode( '||', $line );
1297 
1298  $outLine = '';
1299 
1300  # Loop through each table cell
1301  foreach ( $cells as $cell ) {
1302  $previous = '';
1303  if ( $first_character !== '+' ) {
1304  $tr_after = array_pop( $tr_attributes );
1305  if ( !array_pop( $tr_history ) ) {
1306  $previous = "<tr{$tr_after}>\n";
1307  }
1308  array_push( $tr_history, true );
1309  array_push( $tr_attributes, '' );
1310  array_pop( $has_opened_tr );
1311  array_push( $has_opened_tr, true );
1312  }
1313 
1314  $last_tag = array_pop( $last_tag_history );
1315 
1316  if ( array_pop( $td_history ) ) {
1317  $previous = "</{$last_tag}>\n{$previous}";
1318  }
1319 
1320  if ( $first_character === '|' ) {
1321  $last_tag = 'td';
1322  } elseif ( $first_character === '!' ) {
1323  $last_tag = 'th';
1324  } elseif ( $first_character === '+' ) {
1325  $last_tag = 'caption';
1326  } else {
1327  $last_tag = '';
1328  }
1329 
1330  array_push( $last_tag_history, $last_tag );
1331 
1332  # A cell could contain both parameters and data
1333  $cell_data = explode( '|', $cell, 2 );
1334 
1335  # T2553: Note that a '|' inside an invalid link should not
1336  # be mistaken as delimiting cell parameters
1337  # Bug T153140: Neither should language converter markup.
1338  if ( preg_match( '/\[\[|-\{/', $cell_data[0] ) === 1 ) {
1339  $cell = "{$previous}<{$last_tag}>" . trim( $cell );
1340  } elseif ( count( $cell_data ) == 1 ) {
1341  // Whitespace in cells is trimmed
1342  $cell = "{$previous}<{$last_tag}>" . trim( $cell_data[0] );
1343  } else {
1344  $attributes = $this->mStripState->unstripBoth( $cell_data[0] );
1345  $attributes = Sanitizer::fixTagAttributes( $attributes, $last_tag );
1346  // Whitespace in cells is trimmed
1347  $cell = "{$previous}<{$last_tag}{$attributes}>" . trim( $cell_data[1] );
1348  }
1349 
1350  $outLine .= $cell;
1351  array_push( $td_history, true );
1352  }
1353  }
1354  $out .= $outLine . "\n";
1355  }
1356 
1357  # Closing open td, tr && table
1358  while ( count( $td_history ) > 0 ) {
1359  if ( array_pop( $td_history ) ) {
1360  $out .= "</td>\n";
1361  }
1362  if ( array_pop( $tr_history ) ) {
1363  $out .= "</tr>\n";
1364  }
1365  if ( !array_pop( $has_opened_tr ) ) {
1366  $out .= "<tr><td></td></tr>\n";
1367  }
1368 
1369  $out .= "</table>\n";
1370  }
1371 
1372  # Remove trailing line-ending (b/c)
1373  if ( substr( $out, -1 ) === "\n" ) {
1374  $out = substr( $out, 0, -1 );
1375  }
1376 
1377  # special case: don't return empty table
1378  if ( $out === "<table>\n<tr><td></td></tr>\n</table>" ) {
1379  $out = '';
1380  }
1381 
1382  return $out;
1383  }
1384 
1398  public function internalParse( $text, $isMain = true, $frame = false ) {
1399  $origText = $text;
1400 
1401  // Avoid PHP 7.1 warning from passing $this by reference
1402  $parser = $this;
1403 
1404  # Hook to suspend the parser in this state
1405  if ( !Hooks::run( 'ParserBeforeInternalParse', [ &$parser, &$text, &$this->mStripState ] ) ) {
1406  return $text;
1407  }
1408 
1409  # if $frame is provided, then use $frame for replacing any variables
1410  if ( $frame ) {
1411  # use frame depth to infer how include/noinclude tags should be handled
1412  # depth=0 means this is the top-level document; otherwise it's an included document
1413  if ( !$frame->depth ) {
1414  $flag = 0;
1415  } else {
1416  $flag = self::PTD_FOR_INCLUSION;
1417  }
1418  $dom = $this->preprocessToDom( $text, $flag );
1419  $text = $frame->expand( $dom );
1420  } else {
1421  # if $frame is not provided, then use old-style replaceVariables
1422  $text = $this->replaceVariables( $text );
1423  }
1424 
1425  Hooks::run( 'InternalParseBeforeSanitize', [ &$parser, &$text, &$this->mStripState ] );
1426  $text = Sanitizer::removeHTMLtags(
1427  $text,
1428  [ $this, 'attributeStripCallback' ],
1429  false,
1430  array_keys( $this->mTransparentTagHooks ),
1431  [],
1432  [ $this, 'addTrackingCategory' ]
1433  );
1434  Hooks::run( 'InternalParseBeforeLinks', [ &$parser, &$text, &$this->mStripState ] );
1435 
1436  # Tables need to come after variable replacement for things to work
1437  # properly; putting them before other transformations should keep
1438  # exciting things like link expansions from showing up in surprising
1439  # places.
1440  $text = $this->doTableStuff( $text );
1441 
1442  $text = preg_replace( '/(^|\n)-----*/', '\\1<hr />', $text );
1443 
1444  $text = $this->doDoubleUnderscore( $text );
1445 
1446  $text = $this->doHeadings( $text );
1447  $text = $this->replaceInternalLinks( $text );
1448  $text = $this->doAllQuotes( $text );
1449  $text = $this->replaceExternalLinks( $text );
1450 
1451  # replaceInternalLinks may sometimes leave behind
1452  # absolute URLs, which have to be masked to hide them from replaceExternalLinks
1453  $text = str_replace( self::MARKER_PREFIX . 'NOPARSE', '', $text );
1454 
1455  $text = $this->doMagicLinks( $text );
1456  $text = $this->formatHeadings( $text, $origText, $isMain );
1457 
1458  return $text;
1459  }
1460 
1470  private function internalParseHalfParsed( $text, $isMain = true, $linestart = true ) {
1471  $text = $this->mStripState->unstripGeneral( $text );
1472 
1473  // Avoid PHP 7.1 warning from passing $this by reference
1474  $parser = $this;
1475 
1476  if ( $isMain ) {
1477  Hooks::run( 'ParserAfterUnstrip', [ &$parser, &$text ] );
1478  }
1479 
1480  # Clean up special characters, only run once, next-to-last before doBlockLevels
1481  $text = Sanitizer::armorFrenchSpaces( $text );
1482 
1483  $text = $this->doBlockLevels( $text, $linestart );
1484 
1485  $this->replaceLinkHolders( $text );
1486 
1494  if ( !( $this->mOptions->getDisableContentConversion()
1495  || isset( $this->mDoubleUnderscores['nocontentconvert'] ) )
1496  && !$this->mOptions->getInterfaceMessage()
1497  ) {
1498  # The position of the convert() call should not be changed. it
1499  # assumes that the links are all replaced and the only thing left
1500  # is the <nowiki> mark.
1501  $text = $this->getTargetLanguage()->convert( $text );
1502  }
1503 
1504  $text = $this->mStripState->unstripNoWiki( $text );
1505 
1506  if ( $isMain ) {
1507  Hooks::run( 'ParserBeforeTidy', [ &$parser, &$text ] );
1508  }
1509 
1510  $text = $this->replaceTransparentTags( $text );
1511  $text = $this->mStripState->unstripGeneral( $text );
1512 
1513  $text = Sanitizer::normalizeCharReferences( $text );
1514 
1515  if ( MWTidy::isEnabled() ) {
1516  if ( $this->mOptions->getTidy() ) {
1517  $text = MWTidy::tidy( $text );
1518  }
1519  } else {
1520  # attempt to sanitize at least some nesting problems
1521  # (T4702 and quite a few others)
1522  # This code path is buggy and deprecated!
1523  wfDeprecated( 'disabling tidy', '1.33' );
1524  $tidyregs = [
1525  # ''Something [http://www.cool.com cool''] -->
1526  # <i>Something</i><a href="http://www.cool.com"..><i>cool></i></a>
1527  '/(<([bi])>)(<([bi])>)?([^<]*)(<\/?a[^<]*>)([^<]*)(<\/\\4>)?(<\/\\2>)/' =>
1528  '\\1\\3\\5\\8\\9\\6\\1\\3\\7\\8\\9',
1529  # fix up an anchor inside another anchor, only
1530  # at least for a single single nested link (T5695)
1531  '/(<a[^>]+>)([^<]*)(<a[^>]+>[^<]*)<\/a>(.*)<\/a>/' =>
1532  '\\1\\2</a>\\3</a>\\1\\4</a>',
1533  # fix div inside inline elements- doBlockLevels won't wrap a line which
1534  # contains a div, so fix it up here; replace
1535  # div with escaped text
1536  '/(<([aib]) [^>]+>)([^<]*)(<div([^>]*)>)(.*)(<\/div>)([^<]*)(<\/\\2>)/' =>
1537  '\\1\\3&lt;div\\5&gt;\\6&lt;/div&gt;\\8\\9',
1538  # remove empty italic or bold tag pairs, some
1539  # introduced by rules above
1540  '/<([bi])><\/\\1>/' => '',
1541  ];
1542 
1543  $text = preg_replace(
1544  array_keys( $tidyregs ),
1545  array_values( $tidyregs ),
1546  $text );
1547  }
1548 
1549  if ( $isMain ) {
1550  Hooks::run( 'ParserAfterTidy', [ &$parser, &$text ] );
1551  }
1552 
1553  return $text;
1554  }
1555 
1567  public function doMagicLinks( $text ) {
1568  $prots = wfUrlProtocolsWithoutProtRel();
1569  $urlChar = self::EXT_LINK_URL_CLASS;
1570  $addr = self::EXT_LINK_ADDR;
1571  $space = self::SPACE_NOT_NL; # non-newline space
1572  $spdash = "(?:-|$space)"; # a dash or a non-newline space
1573  $spaces = "$space++"; # possessive match of 1 or more spaces
1574  $text = preg_replace_callback(
1575  '!(?: # Start cases
1576  (<a[ \t\r\n>].*?</a>) | # m[1]: Skip link text
1577  (<.*?>) | # m[2]: Skip stuff inside HTML elements' . "
1578  (\b # m[3]: Free external links
1579  (?i:$prots)
1580  ($addr$urlChar*) # m[4]: Post-protocol path
1581  ) |
1582  \b(?:RFC|PMID) $spaces # m[5]: RFC or PMID, capture number
1583  ([0-9]+)\b |
1584  \bISBN $spaces ( # m[6]: ISBN, capture number
1585  (?: 97[89] $spdash? )? # optional 13-digit ISBN prefix
1586  (?: [0-9] $spdash? ){9} # 9 digits with opt. delimiters
1587  [0-9Xx] # check digit
1588  )\b
1589  )!xu", [ $this, 'magicLinkCallback' ], $text );
1590  return $text;
1591  }
1592 
1598  public function magicLinkCallback( $m ) {
1599  if ( isset( $m[1] ) && $m[1] !== '' ) {
1600  # Skip anchor
1601  return $m[0];
1602  } elseif ( isset( $m[2] ) && $m[2] !== '' ) {
1603  # Skip HTML element
1604  return $m[0];
1605  } elseif ( isset( $m[3] ) && $m[3] !== '' ) {
1606  # Free external link
1607  return $this->makeFreeExternalLink( $m[0], strlen( $m[4] ) );
1608  } elseif ( isset( $m[5] ) && $m[5] !== '' ) {
1609  # RFC or PMID
1610  if ( substr( $m[0], 0, 3 ) === 'RFC' ) {
1611  if ( !$this->mOptions->getMagicRFCLinks() ) {
1612  return $m[0];
1613  }
1614  $keyword = 'RFC';
1615  $urlmsg = 'rfcurl';
1616  $cssClass = 'mw-magiclink-rfc';
1617  $trackingCat = 'magiclink-tracking-rfc';
1618  $id = $m[5];
1619  } elseif ( substr( $m[0], 0, 4 ) === 'PMID' ) {
1620  if ( !$this->mOptions->getMagicPMIDLinks() ) {
1621  return $m[0];
1622  }
1623  $keyword = 'PMID';
1624  $urlmsg = 'pubmedurl';
1625  $cssClass = 'mw-magiclink-pmid';
1626  $trackingCat = 'magiclink-tracking-pmid';
1627  $id = $m[5];
1628  } else {
1629  throw new MWException( __METHOD__ . ': unrecognised match type "' .
1630  substr( $m[0], 0, 20 ) . '"' );
1631  }
1632  $url = wfMessage( $urlmsg, $id )->inContentLanguage()->text();
1633  $this->addTrackingCategory( $trackingCat );
1634  return Linker::makeExternalLink( $url, "{$keyword} {$id}", true, $cssClass, [], $this->mTitle );
1635  } elseif ( isset( $m[6] ) && $m[6] !== ''
1636  && $this->mOptions->getMagicISBNLinks()
1637  ) {
1638  # ISBN
1639  $isbn = $m[6];
1640  $space = self::SPACE_NOT_NL; # non-newline space
1641  $isbn = preg_replace( "/$space/", ' ', $isbn );
1642  $num = strtr( $isbn, [
1643  '-' => '',
1644  ' ' => '',
1645  'x' => 'X',
1646  ] );
1647  $this->addTrackingCategory( 'magiclink-tracking-isbn' );
1648  return $this->getLinkRenderer()->makeKnownLink(
1649  SpecialPage::getTitleFor( 'Booksources', $num ),
1650  "ISBN $isbn",
1651  [
1652  'class' => 'internal mw-magiclink-isbn',
1653  'title' => false // suppress title attribute
1654  ]
1655  );
1656  } else {
1657  return $m[0];
1658  }
1659  }
1660 
1670  public function makeFreeExternalLink( $url, $numPostProto ) {
1671  $trail = '';
1672 
1673  # The characters '<' and '>' (which were escaped by
1674  # removeHTMLtags()) should not be included in
1675  # URLs, per RFC 2396.
1676  # Make &nbsp; terminate a URL as well (bug T84937)
1677  $m2 = [];
1678  if ( preg_match(
1679  '/&(lt|gt|nbsp|#x0*(3[CcEe]|[Aa]0)|#0*(60|62|160));/',
1680  $url,
1681  $m2,
1682  PREG_OFFSET_CAPTURE
1683  ) ) {
1684  $trail = substr( $url, $m2[0][1] ) . $trail;
1685  $url = substr( $url, 0, $m2[0][1] );
1686  }
1687 
1688  # Move trailing punctuation to $trail
1689  $sep = ',;\.:!?';
1690  # If there is no left bracket, then consider right brackets fair game too
1691  if ( strpos( $url, '(' ) === false ) {
1692  $sep .= ')';
1693  }
1694 
1695  $urlRev = strrev( $url );
1696  $numSepChars = strspn( $urlRev, $sep );
1697  # Don't break a trailing HTML entity by moving the ; into $trail
1698  # This is in hot code, so use substr_compare to avoid having to
1699  # create a new string object for the comparison
1700  if ( $numSepChars && substr_compare( $url, ";", -$numSepChars, 1 ) === 0 ) {
1701  # more optimization: instead of running preg_match with a $
1702  # anchor, which can be slow, do the match on the reversed
1703  # string starting at the desired offset.
1704  # un-reversed regexp is: /&([a-z]+|#x[\da-f]+|#\d+)$/i
1705  if ( preg_match( '/\G([a-z]+|[\da-f]+x#|\d+#)&/i', $urlRev, $m2, 0, $numSepChars ) ) {
1706  $numSepChars--;
1707  }
1708  }
1709  if ( $numSepChars ) {
1710  $trail = substr( $url, -$numSepChars ) . $trail;
1711  $url = substr( $url, 0, -$numSepChars );
1712  }
1713 
1714  # Verify that we still have a real URL after trail removal, and
1715  # not just lone protocol
1716  if ( strlen( $trail ) >= $numPostProto ) {
1717  return $url . $trail;
1718  }
1719 
1720  $url = Sanitizer::cleanUrl( $url );
1721 
1722  # Is this an external image?
1723  $text = $this->maybeMakeExternalImage( $url );
1724  if ( $text === false ) {
1725  # Not an image, make a link
1726  $text = Linker::makeExternalLink( $url,
1727  $this->getTargetLanguage()->getConverter()->markNoConversion( $url ),
1728  true, 'free',
1729  $this->getExternalLinkAttribs( $url ), $this->mTitle );
1730  # Register it in the output object...
1731  $this->mOutput->addExternalLink( $url );
1732  }
1733  return $text . $trail;
1734  }
1735 
1745  public function doHeadings( $text ) {
1746  for ( $i = 6; $i >= 1; --$i ) {
1747  $h = str_repeat( '=', $i );
1748  // Trim non-newline whitespace from headings
1749  // Using \s* will break for: "==\n===\n" and parse as <h2>=</h2>
1750  $text = preg_replace( "/^(?:$h)[ \\t]*(.+?)[ \\t]*(?:$h)\\s*$/m", "<h$i>\\1</h$i>", $text );
1751  }
1752  return $text;
1753  }
1754 
1763  public function doAllQuotes( $text ) {
1764  $outtext = '';
1765  $lines = StringUtils::explode( "\n", $text );
1766  foreach ( $lines as $line ) {
1767  $outtext .= $this->doQuotes( $line ) . "\n";
1768  }
1769  $outtext = substr( $outtext, 0, -1 );
1770  return $outtext;
1771  }
1772 
1780  public function doQuotes( $text ) {
1781  $arr = preg_split( "/(''+)/", $text, -1, PREG_SPLIT_DELIM_CAPTURE );
1782  $countarr = count( $arr );
1783  if ( $countarr == 1 ) {
1784  return $text;
1785  }
1786 
1787  // First, do some preliminary work. This may shift some apostrophes from
1788  // being mark-up to being text. It also counts the number of occurrences
1789  // of bold and italics mark-ups.
1790  $numbold = 0;
1791  $numitalics = 0;
1792  for ( $i = 1; $i < $countarr; $i += 2 ) {
1793  $thislen = strlen( $arr[$i] );
1794  // If there are ever four apostrophes, assume the first is supposed to
1795  // be text, and the remaining three constitute mark-up for bold text.
1796  // (T15227: ''''foo'''' turns into ' ''' foo ' ''')
1797  if ( $thislen == 4 ) {
1798  $arr[$i - 1] .= "'";
1799  $arr[$i] = "'''";
1800  $thislen = 3;
1801  } elseif ( $thislen > 5 ) {
1802  // If there are more than 5 apostrophes in a row, assume they're all
1803  // text except for the last 5.
1804  // (T15227: ''''''foo'''''' turns into ' ''''' foo ' ''''')
1805  $arr[$i - 1] .= str_repeat( "'", $thislen - 5 );
1806  $arr[$i] = "'''''";
1807  $thislen = 5;
1808  }
1809  // Count the number of occurrences of bold and italics mark-ups.
1810  if ( $thislen == 2 ) {
1811  $numitalics++;
1812  } elseif ( $thislen == 3 ) {
1813  $numbold++;
1814  } elseif ( $thislen == 5 ) {
1815  $numitalics++;
1816  $numbold++;
1817  }
1818  }
1819 
1820  // If there is an odd number of both bold and italics, it is likely
1821  // that one of the bold ones was meant to be an apostrophe followed
1822  // by italics. Which one we cannot know for certain, but it is more
1823  // likely to be one that has a single-letter word before it.
1824  if ( ( $numbold % 2 == 1 ) && ( $numitalics % 2 == 1 ) ) {
1825  $firstsingleletterword = -1;
1826  $firstmultiletterword = -1;
1827  $firstspace = -1;
1828  for ( $i = 1; $i < $countarr; $i += 2 ) {
1829  if ( strlen( $arr[$i] ) == 3 ) {
1830  $x1 = substr( $arr[$i - 1], -1 );
1831  $x2 = substr( $arr[$i - 1], -2, 1 );
1832  if ( $x1 === ' ' ) {
1833  if ( $firstspace == -1 ) {
1834  $firstspace = $i;
1835  }
1836  } elseif ( $x2 === ' ' ) {
1837  $firstsingleletterword = $i;
1838  // if $firstsingleletterword is set, we don't
1839  // look at the other options, so we can bail early.
1840  break;
1841  } elseif ( $firstmultiletterword == -1 ) {
1842  $firstmultiletterword = $i;
1843  }
1844  }
1845  }
1846 
1847  // If there is a single-letter word, use it!
1848  if ( $firstsingleletterword > -1 ) {
1849  $arr[$firstsingleletterword] = "''";
1850  $arr[$firstsingleletterword - 1] .= "'";
1851  } elseif ( $firstmultiletterword > -1 ) {
1852  // If not, but there's a multi-letter word, use that one.
1853  $arr[$firstmultiletterword] = "''";
1854  $arr[$firstmultiletterword - 1] .= "'";
1855  } elseif ( $firstspace > -1 ) {
1856  // ... otherwise use the first one that has neither.
1857  // (notice that it is possible for all three to be -1 if, for example,
1858  // there is only one pentuple-apostrophe in the line)
1859  $arr[$firstspace] = "''";
1860  $arr[$firstspace - 1] .= "'";
1861  }
1862  }
1863 
1864  // Now let's actually convert our apostrophic mush to HTML!
1865  $output = '';
1866  $buffer = '';
1867  $state = '';
1868  $i = 0;
1869  foreach ( $arr as $r ) {
1870  if ( ( $i % 2 ) == 0 ) {
1871  if ( $state === 'both' ) {
1872  $buffer .= $r;
1873  } else {
1874  $output .= $r;
1875  }
1876  } else {
1877  $thislen = strlen( $r );
1878  if ( $thislen == 2 ) {
1879  if ( $state === 'i' ) {
1880  $output .= '</i>';
1881  $state = '';
1882  } elseif ( $state === 'bi' ) {
1883  $output .= '</i>';
1884  $state = 'b';
1885  } elseif ( $state === 'ib' ) {
1886  $output .= '</b></i><b>';
1887  $state = 'b';
1888  } elseif ( $state === 'both' ) {
1889  $output .= '<b><i>' . $buffer . '</i>';
1890  $state = 'b';
1891  } else { // $state can be 'b' or ''
1892  $output .= '<i>';
1893  $state .= 'i';
1894  }
1895  } elseif ( $thislen == 3 ) {
1896  if ( $state === 'b' ) {
1897  $output .= '</b>';
1898  $state = '';
1899  } elseif ( $state === 'bi' ) {
1900  $output .= '</i></b><i>';
1901  $state = 'i';
1902  } elseif ( $state === 'ib' ) {
1903  $output .= '</b>';
1904  $state = 'i';
1905  } elseif ( $state === 'both' ) {
1906  $output .= '<i><b>' . $buffer . '</b>';
1907  $state = 'i';
1908  } else { // $state can be 'i' or ''
1909  $output .= '<b>';
1910  $state .= 'b';
1911  }
1912  } elseif ( $thislen == 5 ) {
1913  if ( $state === 'b' ) {
1914  $output .= '</b><i>';
1915  $state = 'i';
1916  } elseif ( $state === 'i' ) {
1917  $output .= '</i><b>';
1918  $state = 'b';
1919  } elseif ( $state === 'bi' ) {
1920  $output .= '</i></b>';
1921  $state = '';
1922  } elseif ( $state === 'ib' ) {
1923  $output .= '</b></i>';
1924  $state = '';
1925  } elseif ( $state === 'both' ) {
1926  $output .= '<i><b>' . $buffer . '</b></i>';
1927  $state = '';
1928  } else { // ($state == '')
1929  $buffer = '';
1930  $state = 'both';
1931  }
1932  }
1933  }
1934  $i++;
1935  }
1936  // Now close all remaining tags. Notice that the order is important.
1937  if ( $state === 'b' || $state === 'ib' ) {
1938  $output .= '</b>';
1939  }
1940  if ( $state === 'i' || $state === 'bi' || $state === 'ib' ) {
1941  $output .= '</i>';
1942  }
1943  if ( $state === 'bi' ) {
1944  $output .= '</b>';
1945  }
1946  // There might be lonely ''''', so make sure we have a buffer
1947  if ( $state === 'both' && $buffer ) {
1948  $output .= '<b><i>' . $buffer . '</i></b>';
1949  }
1950  return $output;
1951  }
1952 
1966  public function replaceExternalLinks( $text ) {
1967  $bits = preg_split( $this->mExtLinkBracketedRegex, $text, -1, PREG_SPLIT_DELIM_CAPTURE );
1968  if ( $bits === false ) {
1969  throw new MWException( "PCRE needs to be compiled with "
1970  . "--enable-unicode-properties in order for MediaWiki to function" );
1971  }
1972  $s = array_shift( $bits );
1973 
1974  $i = 0;
1975  while ( $i < count( $bits ) ) {
1976  $url = $bits[$i++];
1977  $i++; // protocol
1978  $text = $bits[$i++];
1979  $trail = $bits[$i++];
1980 
1981  # The characters '<' and '>' (which were escaped by
1982  # removeHTMLtags()) should not be included in
1983  # URLs, per RFC 2396.
1984  $m2 = [];
1985  if ( preg_match( '/&(lt|gt);/', $url, $m2, PREG_OFFSET_CAPTURE ) ) {
1986  $text = substr( $url, $m2[0][1] ) . ' ' . $text;
1987  $url = substr( $url, 0, $m2[0][1] );
1988  }
1989 
1990  # If the link text is an image URL, replace it with an <img> tag
1991  # This happened by accident in the original parser, but some people used it extensively
1992  $img = $this->maybeMakeExternalImage( $text );
1993  if ( $img !== false ) {
1994  $text = $img;
1995  }
1996 
1997  $dtrail = '';
1998 
1999  # Set linktype for CSS
2000  $linktype = 'text';
2001 
2002  # No link text, e.g. [http://domain.tld/some.link]
2003  if ( $text == '' ) {
2004  # Autonumber
2005  $langObj = $this->getTargetLanguage();
2006  $text = '[' . $langObj->formatNum( ++$this->mAutonumber ) . ']';
2007  $linktype = 'autonumber';
2008  } else {
2009  # Have link text, e.g. [http://domain.tld/some.link text]s
2010  # Check for trail
2011  list( $dtrail, $trail ) = Linker::splitTrail( $trail );
2012  }
2013 
2014  // Excluding protocol-relative URLs may avoid many false positives.
2015  if ( preg_match( '/^(?:' . wfUrlProtocolsWithoutProtRel() . ')/', $text ) ) {
2016  $text = $this->getTargetLanguage()->getConverter()->markNoConversion( $text );
2017  }
2018 
2019  $url = Sanitizer::cleanUrl( $url );
2020 
2021  # Use the encoded URL
2022  # This means that users can paste URLs directly into the text
2023  # Funny characters like ö aren't valid in URLs anyway
2024  # This was changed in August 2004
2025  $s .= Linker::makeExternalLink( $url, $text, false, $linktype,
2026  $this->getExternalLinkAttribs( $url ), $this->mTitle ) . $dtrail . $trail;
2027 
2028  # Register link in the output object.
2029  $this->mOutput->addExternalLink( $url );
2030  }
2031 
2032  return $s;
2033  }
2034 
2044  public static function getExternalLinkRel( $url = false, $title = null ) {
2046  $ns = $title ? $title->getNamespace() : false;
2047  if ( $wgNoFollowLinks && !in_array( $ns, $wgNoFollowNsExceptions )
2048  && !wfMatchesDomainList( $url, $wgNoFollowDomainExceptions )
2049  ) {
2050  return 'nofollow';
2051  }
2052  return null;
2053  }
2054 
2065  public function getExternalLinkAttribs( $url ) {
2066  $attribs = [];
2067  $rel = self::getExternalLinkRel( $url, $this->mTitle );
2068 
2069  $target = $this->mOptions->getExternalLinkTarget();
2070  if ( $target ) {
2071  $attribs['target'] = $target;
2072  if ( !in_array( $target, [ '_self', '_parent', '_top' ] ) ) {
2073  // T133507. New windows can navigate parent cross-origin.
2074  // Including noreferrer due to lacking browser
2075  // support of noopener. Eventually noreferrer should be removed.
2076  if ( $rel !== '' ) {
2077  $rel .= ' ';
2078  }
2079  $rel .= 'noreferrer noopener';
2080  }
2081  }
2082  $attribs['rel'] = $rel;
2083  return $attribs;
2084  }
2085 
2095  public static function normalizeLinkUrl( $url ) {
2096  # Test for RFC 3986 IPv6 syntax
2097  $scheme = '[a-z][a-z0-9+.-]*:';
2098  $userinfo = '(?:[a-z0-9\-._~!$&\'()*+,;=:]|%[0-9a-f]{2})*';
2099  $ipv6Host = '\\[((?:[0-9a-f:]|%3[0-A]|%[46][1-6])+)\\]';
2100  if ( preg_match( "<^(?:{$scheme})?//(?:{$userinfo}@)?{$ipv6Host}(?:[:/?#].*|)$>i", $url, $m ) &&
2101  IP::isValid( rawurldecode( $m[1] ) )
2102  ) {
2103  $isIPv6 = rawurldecode( $m[1] );
2104  } else {
2105  $isIPv6 = false;
2106  }
2107 
2108  # Make sure unsafe characters are encoded
2109  $url = preg_replace_callback( '/[\x00-\x20"<>\[\\\\\]^`{|}\x7F-\xFF]/',
2110  function ( $m ) {
2111  return rawurlencode( $m[0] );
2112  },
2113  $url
2114  );
2115 
2116  $ret = '';
2117  $end = strlen( $url );
2118 
2119  # Fragment part - 'fragment'
2120  $start = strpos( $url, '#' );
2121  if ( $start !== false && $start < $end ) {
2122  $ret = self::normalizeUrlComponent(
2123  substr( $url, $start, $end - $start ), '"#%<>[\]^`{|}' ) . $ret;
2124  $end = $start;
2125  }
2126 
2127  # Query part - 'query' minus &=+;
2128  $start = strpos( $url, '?' );
2129  if ( $start !== false && $start < $end ) {
2130  $ret = self::normalizeUrlComponent(
2131  substr( $url, $start, $end - $start ), '"#%<>[\]^`{|}&=+;' ) . $ret;
2132  $end = $start;
2133  }
2134 
2135  # Scheme and path part - 'pchar'
2136  # (we assume no userinfo or encoded colons in the host)
2137  $ret = self::normalizeUrlComponent(
2138  substr( $url, 0, $end ), '"#%<>[\]^`{|}/?' ) . $ret;
2139 
2140  # Fix IPv6 syntax
2141  if ( $isIPv6 !== false ) {
2142  $ipv6Host = "%5B({$isIPv6})%5D";
2143  $ret = preg_replace(
2144  "<^((?:{$scheme})?//(?:{$userinfo}@)?){$ipv6Host}(?=[:/?#]|$)>i",
2145  "$1[$2]",
2146  $ret
2147  );
2148  }
2149 
2150  return $ret;
2151  }
2152 
2153  private static function normalizeUrlComponent( $component, $unsafe ) {
2154  $callback = function ( $matches ) use ( $unsafe ) {
2155  $char = urldecode( $matches[0] );
2156  $ord = ord( $char );
2157  if ( $ord > 32 && $ord < 127 && strpos( $unsafe, $char ) === false ) {
2158  # Unescape it
2159  return $char;
2160  } else {
2161  # Leave it escaped, but use uppercase for a-f
2162  return strtoupper( $matches[0] );
2163  }
2164  };
2165  return preg_replace_callback( '/%[0-9A-Fa-f]{2}/', $callback, $component );
2166  }
2167 
2176  private function maybeMakeExternalImage( $url ) {
2177  $imagesfrom = $this->mOptions->getAllowExternalImagesFrom();
2178  $imagesexception = !empty( $imagesfrom );
2179  $text = false;
2180  # $imagesfrom could be either a single string or an array of strings, parse out the latter
2181  if ( $imagesexception && is_array( $imagesfrom ) ) {
2182  $imagematch = false;
2183  foreach ( $imagesfrom as $match ) {
2184  if ( strpos( $url, $match ) === 0 ) {
2185  $imagematch = true;
2186  break;
2187  }
2188  }
2189  } elseif ( $imagesexception ) {
2190  $imagematch = ( strpos( $url, $imagesfrom ) === 0 );
2191  } else {
2192  $imagematch = false;
2193  }
2194 
2195  if ( $this->mOptions->getAllowExternalImages()
2196  || ( $imagesexception && $imagematch )
2197  ) {
2198  if ( preg_match( self::EXT_IMAGE_REGEX, $url ) ) {
2199  # Image found
2200  $text = Linker::makeExternalImage( $url );
2201  }
2202  }
2203  if ( !$text && $this->mOptions->getEnableImageWhitelist()
2204  && preg_match( self::EXT_IMAGE_REGEX, $url )
2205  ) {
2206  $whitelist = explode(
2207  "\n",
2208  wfMessage( 'external_image_whitelist' )->inContentLanguage()->text()
2209  );
2210 
2211  foreach ( $whitelist as $entry ) {
2212  # Sanitize the regex fragment, make it case-insensitive, ignore blank entries/comments
2213  if ( strpos( $entry, '#' ) === 0 || $entry === '' ) {
2214  continue;
2215  }
2216  if ( preg_match( '/' . str_replace( '/', '\\/', $entry ) . '/i', $url ) ) {
2217  # Image matches a whitelist entry
2218  $text = Linker::makeExternalImage( $url );
2219  break;
2220  }
2221  }
2222  }
2223  return $text;
2224  }
2225 
2235  public function replaceInternalLinks( $s ) {
2236  $this->mLinkHolders->merge( $this->replaceInternalLinks2( $s ) );
2237  return $s;
2238  }
2239 
2248  public function replaceInternalLinks2( &$s ) {
2249  static $tc = false, $e1, $e1_img;
2250  # the % is needed to support urlencoded titles as well
2251  if ( !$tc ) {
2252  $tc = Title::legalChars() . '#%';
2253  # Match a link having the form [[namespace:link|alternate]]trail
2254  $e1 = "/^([{$tc}]+)(?:\\|(.+?))?]](.*)\$/sD";
2255  # Match cases where there is no "]]", which might still be images
2256  $e1_img = "/^([{$tc}]+)\\|(.*)\$/sD";
2257  }
2258 
2259  $holders = new LinkHolderArray( $this );
2260 
2261  # split the entire text string on occurrences of [[
2262  $a = StringUtils::explode( '[[', ' ' . $s );
2263  # get the first element (all text up to first [[), and remove the space we added
2264  $s = $a->current();
2265  $a->next();
2266  $line = $a->current(); # Workaround for broken ArrayIterator::next() that returns "void"
2267  $s = substr( $s, 1 );
2268 
2269  $useLinkPrefixExtension = $this->getTargetLanguage()->linkPrefixExtension();
2270  $e2 = null;
2271  if ( $useLinkPrefixExtension ) {
2272  # Match the end of a line for a word that's not followed by whitespace,
2273  # e.g. in the case of 'The Arab al[[Razi]]', 'al' will be matched
2274  $charset = $this->contLang->linkPrefixCharset();
2275  $e2 = "/^((?>.*[^$charset]|))(.+)$/sDu";
2276  }
2277 
2278  if ( is_null( $this->mTitle ) ) {
2279  throw new MWException( __METHOD__ . ": \$this->mTitle is null\n" );
2280  }
2281  $nottalk = !$this->mTitle->isTalkPage();
2282 
2283  if ( $useLinkPrefixExtension ) {
2284  $m = [];
2285  if ( preg_match( $e2, $s, $m ) ) {
2286  $first_prefix = $m[2];
2287  } else {
2288  $first_prefix = false;
2289  }
2290  } else {
2291  $prefix = '';
2292  }
2293 
2294  $useSubpages = $this->areSubpagesAllowed();
2295 
2296  # Loop for each link
2297  for ( ; $line !== false && $line !== null; $a->next(), $line = $a->current() ) {
2298  # Check for excessive memory usage
2299  if ( $holders->isBig() ) {
2300  # Too big
2301  # Do the existence check, replace the link holders and clear the array
2302  $holders->replace( $s );
2303  $holders->clear();
2304  }
2305 
2306  if ( $useLinkPrefixExtension ) {
2307  if ( preg_match( $e2, $s, $m ) ) {
2308  list( , $s, $prefix ) = $m;
2309  } else {
2310  $prefix = '';
2311  }
2312  # first link
2313  if ( $first_prefix ) {
2314  $prefix = $first_prefix;
2315  $first_prefix = false;
2316  }
2317  }
2318 
2319  $might_be_img = false;
2320 
2321  if ( preg_match( $e1, $line, $m ) ) { # page with normal text or alt
2322  $text = $m[2];
2323  # If we get a ] at the beginning of $m[3] that means we have a link that's something like:
2324  # [[Image:Foo.jpg|[http://example.com desc]]] <- having three ] in a row fucks up,
2325  # the real problem is with the $e1 regex
2326  # See T1500.
2327  # Still some problems for cases where the ] is meant to be outside punctuation,
2328  # and no image is in sight. See T4095.
2329  if ( $text !== ''
2330  && substr( $m[3], 0, 1 ) === ']'
2331  && strpos( $text, '[' ) !== false
2332  ) {
2333  $text .= ']'; # so that replaceExternalLinks($text) works later
2334  $m[3] = substr( $m[3], 1 );
2335  }
2336  # fix up urlencoded title texts
2337  if ( strpos( $m[1], '%' ) !== false ) {
2338  # Should anchors '#' also be rejected?
2339  $m[1] = str_replace( [ '<', '>' ], [ '&lt;', '&gt;' ], rawurldecode( $m[1] ) );
2340  }
2341  $trail = $m[3];
2342  } elseif ( preg_match( $e1_img, $line, $m ) ) {
2343  # Invalid, but might be an image with a link in its caption
2344  $might_be_img = true;
2345  $text = $m[2];
2346  if ( strpos( $m[1], '%' ) !== false ) {
2347  $m[1] = str_replace( [ '<', '>' ], [ '&lt;', '&gt;' ], rawurldecode( $m[1] ) );
2348  }
2349  $trail = "";
2350  } else { # Invalid form; output directly
2351  $s .= $prefix . '[[' . $line;
2352  continue;
2353  }
2354 
2355  $origLink = ltrim( $m[1], ' ' );
2356 
2357  # Don't allow internal links to pages containing
2358  # PROTO: where PROTO is a valid URL protocol; these
2359  # should be external links.
2360  if ( preg_match( '/^(?i:' . $this->mUrlProtocols . ')/', $origLink ) ) {
2361  $s .= $prefix . '[[' . $line;
2362  continue;
2363  }
2364 
2365  # Make subpage if necessary
2366  if ( $useSubpages ) {
2367  $link = $this->maybeDoSubpageLink( $origLink, $text );
2368  } else {
2369  $link = $origLink;
2370  }
2371 
2372  // \x7f isn't a default legal title char, so most likely strip
2373  // markers will force us into the "invalid form" path above. But,
2374  // just in case, let's assert that xmlish tags aren't valid in
2375  // the title position.
2376  $unstrip = $this->mStripState->killMarkers( $link );
2377  $noMarkers = ( $unstrip === $link );
2378 
2379  $nt = $noMarkers ? Title::newFromText( $link ) : null;
2380  if ( $nt === null ) {
2381  $s .= $prefix . '[[' . $line;
2382  continue;
2383  }
2384 
2385  $ns = $nt->getNamespace();
2386  $iw = $nt->getInterwiki();
2387 
2388  $noforce = ( substr( $origLink, 0, 1 ) !== ':' );
2389 
2390  if ( $might_be_img ) { # if this is actually an invalid link
2391  if ( $ns == NS_FILE && $noforce ) { # but might be an image
2392  $found = false;
2393  while ( true ) {
2394  # look at the next 'line' to see if we can close it there
2395  $a->next();
2396  $next_line = $a->current();
2397  if ( $next_line === false || $next_line === null ) {
2398  break;
2399  }
2400  $m = explode( ']]', $next_line, 3 );
2401  if ( count( $m ) == 3 ) {
2402  # the first ]] closes the inner link, the second the image
2403  $found = true;
2404  $text .= "[[{$m[0]}]]{$m[1]}";
2405  $trail = $m[2];
2406  break;
2407  } elseif ( count( $m ) == 2 ) {
2408  # if there's exactly one ]] that's fine, we'll keep looking
2409  $text .= "[[{$m[0]}]]{$m[1]}";
2410  } else {
2411  # if $next_line is invalid too, we need look no further
2412  $text .= '[[' . $next_line;
2413  break;
2414  }
2415  }
2416  if ( !$found ) {
2417  # we couldn't find the end of this imageLink, so output it raw
2418  # but don't ignore what might be perfectly normal links in the text we've examined
2419  $holders->merge( $this->replaceInternalLinks2( $text ) );
2420  $s .= "{$prefix}[[$link|$text";
2421  # note: no $trail, because without an end, there *is* no trail
2422  continue;
2423  }
2424  } else { # it's not an image, so output it raw
2425  $s .= "{$prefix}[[$link|$text";
2426  # note: no $trail, because without an end, there *is* no trail
2427  continue;
2428  }
2429  }
2430 
2431  $wasblank = ( $text == '' );
2432  if ( $wasblank ) {
2433  $text = $link;
2434  if ( !$noforce ) {
2435  # Strip off leading ':'
2436  $text = substr( $text, 1 );
2437  }
2438  } else {
2439  # T6598 madness. Handle the quotes only if they come from the alternate part
2440  # [[Lista d''e paise d''o munno]] -> <a href="...">Lista d''e paise d''o munno</a>
2441  # [[Criticism of Harry Potter|Criticism of ''Harry Potter'']]
2442  # -> <a href="Criticism of Harry Potter">Criticism of <i>Harry Potter</i></a>
2443  $text = $this->doQuotes( $text );
2444  }
2445 
2446  # Link not escaped by : , create the various objects
2447  if ( $noforce && !$nt->wasLocalInterwiki() ) {
2448  # Interwikis
2449  if (
2450  $iw && $this->mOptions->getInterwikiMagic() && $nottalk && (
2451  Language::fetchLanguageName( $iw, null, 'mw' ) ||
2452  in_array( $iw, $this->svcOptions->get( 'ExtraInterlanguageLinkPrefixes' ) )
2453  )
2454  ) {
2455  # T26502: filter duplicates
2456  if ( !isset( $this->mLangLinkLanguages[$iw] ) ) {
2457  $this->mLangLinkLanguages[$iw] = true;
2458  $this->mOutput->addLanguageLink( $nt->getFullText() );
2459  }
2460 
2464  $s = rtrim( $s . $prefix ) . $trail; # T175416
2465  continue;
2466  }
2467 
2468  if ( $ns == NS_FILE ) {
2469  if ( !wfIsBadImage( $nt->getDBkey(), $this->mTitle ) ) {
2470  if ( $wasblank ) {
2471  # if no parameters were passed, $text
2472  # becomes something like "File:Foo.png",
2473  # which we don't want to pass on to the
2474  # image generator
2475  $text = '';
2476  } else {
2477  # recursively parse links inside the image caption
2478  # actually, this will parse them in any other parameters, too,
2479  # but it might be hard to fix that, and it doesn't matter ATM
2480  $text = $this->replaceExternalLinks( $text );
2481  $holders->merge( $this->replaceInternalLinks2( $text ) );
2482  }
2483  # cloak any absolute URLs inside the image markup, so replaceExternalLinks() won't touch them
2484  $s .= $prefix . $this->armorLinks(
2485  $this->makeImage( $nt, $text, $holders ) ) . $trail;
2486  continue;
2487  }
2488  } elseif ( $ns == NS_CATEGORY ) {
2492  $s = rtrim( $s . $prefix ) . $trail; # T2087, T87753
2493 
2494  if ( $wasblank ) {
2495  $sortkey = $this->getDefaultSort();
2496  } else {
2497  $sortkey = $text;
2498  }
2499  $sortkey = Sanitizer::decodeCharReferences( $sortkey );
2500  $sortkey = str_replace( "\n", '', $sortkey );
2501  $sortkey = $this->getTargetLanguage()->convertCategoryKey( $sortkey );
2502  $this->mOutput->addCategory( $nt->getDBkey(), $sortkey );
2503 
2504  continue;
2505  }
2506  }
2507 
2508  # Self-link checking. For some languages, variants of the title are checked in
2509  # LinkHolderArray::doVariants() to allow batching the existence checks necessary
2510  # for linking to a different variant.
2511  if ( $ns != NS_SPECIAL && $nt->equals( $this->mTitle ) && !$nt->hasFragment() ) {
2512  $s .= $prefix . Linker::makeSelfLinkObj( $nt, $text, '', $trail );
2513  continue;
2514  }
2515 
2516  # NS_MEDIA is a pseudo-namespace for linking directly to a file
2517  # @todo FIXME: Should do batch file existence checks, see comment below
2518  if ( $ns == NS_MEDIA ) {
2519  # Give extensions a chance to select the file revision for us
2520  $options = [];
2521  $descQuery = false;
2522  Hooks::run( 'BeforeParserFetchFileAndTitle',
2523  [ $this, $nt, &$options, &$descQuery ] );
2524  # Fetch and register the file (file title may be different via hooks)
2525  list( $file, $nt ) = $this->fetchFileAndTitle( $nt, $options );
2526  # Cloak with NOPARSE to avoid replacement in replaceExternalLinks
2527  $s .= $prefix . $this->armorLinks(
2528  Linker::makeMediaLinkFile( $nt, $file, $text ) ) . $trail;
2529  continue;
2530  }
2531 
2532  # Some titles, such as valid special pages or files in foreign repos, should
2533  # be shown as bluelinks even though they're not included in the page table
2534  # @todo FIXME: isAlwaysKnown() can be expensive for file links; we should really do
2535  # batch file existence checks for NS_FILE and NS_MEDIA
2536  if ( $iw == '' && $nt->isAlwaysKnown() ) {
2537  $this->mOutput->addLink( $nt );
2538  $s .= $this->makeKnownLinkHolder( $nt, $text, $trail, $prefix );
2539  } else {
2540  # Links will be added to the output link list after checking
2541  $s .= $holders->makeHolder( $nt, $text, [], $trail, $prefix );
2542  }
2543  }
2544  return $holders;
2545  }
2546 
2560  protected function makeKnownLinkHolder( $nt, $text = '', $trail = '', $prefix = '' ) {
2561  list( $inside, $trail ) = Linker::splitTrail( $trail );
2562 
2563  if ( $text == '' ) {
2564  $text = htmlspecialchars( $nt->getPrefixedText() );
2565  }
2566 
2567  $link = $this->getLinkRenderer()->makeKnownLink(
2568  $nt, new HtmlArmor( "$prefix$text$inside" )
2569  );
2570 
2571  return $this->armorLinks( $link ) . $trail;
2572  }
2573 
2584  public function armorLinks( $text ) {
2585  return preg_replace( '/\b((?i)' . $this->mUrlProtocols . ')/',
2586  self::MARKER_PREFIX . "NOPARSE$1", $text );
2587  }
2588 
2593  public function areSubpagesAllowed() {
2594  # Some namespaces don't allow subpages
2595  return $this->nsInfo->hasSubpages( $this->mTitle->getNamespace() );
2596  }
2597 
2606  public function maybeDoSubpageLink( $target, &$text ) {
2607  return Linker::normalizeSubpageLink( $this->mTitle, $target, $text );
2608  }
2609 
2618  public function doBlockLevels( $text, $linestart ) {
2619  return BlockLevelPass::doBlockLevels( $text, $linestart );
2620  }
2621 
2633  public function getVariableValue( $index, $frame = false ) {
2634  if ( is_null( $this->mTitle ) ) {
2635  // If no title set, bad things are going to happen
2636  // later. Title should always be set since this
2637  // should only be called in the middle of a parse
2638  // operation (but the unit-tests do funky stuff)
2639  throw new MWException( __METHOD__ . ' Should only be '
2640  . ' called while parsing (no title set)' );
2641  }
2642 
2643  // Avoid PHP 7.1 warning from passing $this by reference
2644  $parser = $this;
2645 
2650  if (
2651  Hooks::run( 'ParserGetVariableValueVarCache', [ &$parser, &$this->mVarCache ] ) &&
2652  isset( $this->mVarCache[$index] )
2653  ) {
2654  return $this->mVarCache[$index];
2655  }
2656 
2657  $ts = wfTimestamp( TS_UNIX, $this->mOptions->getTimestamp() );
2658  Hooks::run( 'ParserGetVariableValueTs', [ &$parser, &$ts ] );
2659 
2660  $pageLang = $this->getFunctionLang();
2661 
2662  switch ( $index ) {
2663  case '!':
2664  $value = '|';
2665  break;
2666  case 'currentmonth':
2667  $value = $pageLang->formatNum( MWTimestamp::getInstance( $ts )->format( 'm' ), true );
2668  break;
2669  case 'currentmonth1':
2670  $value = $pageLang->formatNum( MWTimestamp::getInstance( $ts )->format( 'n' ), true );
2671  break;
2672  case 'currentmonthname':
2673  $value = $pageLang->getMonthName( MWTimestamp::getInstance( $ts )->format( 'n' ) );
2674  break;
2675  case 'currentmonthnamegen':
2676  $value = $pageLang->getMonthNameGen( MWTimestamp::getInstance( $ts )->format( 'n' ) );
2677  break;
2678  case 'currentmonthabbrev':
2679  $value = $pageLang->getMonthAbbreviation( MWTimestamp::getInstance( $ts )->format( 'n' ) );
2680  break;
2681  case 'currentday':
2682  $value = $pageLang->formatNum( MWTimestamp::getInstance( $ts )->format( 'j' ), true );
2683  break;
2684  case 'currentday2':
2685  $value = $pageLang->formatNum( MWTimestamp::getInstance( $ts )->format( 'd' ), true );
2686  break;
2687  case 'localmonth':
2688  $value = $pageLang->formatNum( MWTimestamp::getLocalInstance( $ts )->format( 'm' ), true );
2689  break;
2690  case 'localmonth1':
2691  $value = $pageLang->formatNum( MWTimestamp::getLocalInstance( $ts )->format( 'n' ), true );
2692  break;
2693  case 'localmonthname':
2694  $value = $pageLang->getMonthName( MWTimestamp::getLocalInstance( $ts )->format( 'n' ) );
2695  break;
2696  case 'localmonthnamegen':
2697  $value = $pageLang->getMonthNameGen( MWTimestamp::getLocalInstance( $ts )->format( 'n' ) );
2698  break;
2699  case 'localmonthabbrev':
2700  $value = $pageLang->getMonthAbbreviation( MWTimestamp::getLocalInstance( $ts )->format( 'n' ) );
2701  break;
2702  case 'localday':
2703  $value = $pageLang->formatNum( MWTimestamp::getLocalInstance( $ts )->format( 'j' ), true );
2704  break;
2705  case 'localday2':
2706  $value = $pageLang->formatNum( MWTimestamp::getLocalInstance( $ts )->format( 'd' ), true );
2707  break;
2708  case 'pagename':
2709  $value = wfEscapeWikiText( $this->mTitle->getText() );
2710  break;
2711  case 'pagenamee':
2712  $value = wfEscapeWikiText( $this->mTitle->getPartialURL() );
2713  break;
2714  case 'fullpagename':
2715  $value = wfEscapeWikiText( $this->mTitle->getPrefixedText() );
2716  break;
2717  case 'fullpagenamee':
2718  $value = wfEscapeWikiText( $this->mTitle->getPrefixedURL() );
2719  break;
2720  case 'subpagename':
2721  $value = wfEscapeWikiText( $this->mTitle->getSubpageText() );
2722  break;
2723  case 'subpagenamee':
2724  $value = wfEscapeWikiText( $this->mTitle->getSubpageUrlForm() );
2725  break;
2726  case 'rootpagename':
2727  $value = wfEscapeWikiText( $this->mTitle->getRootText() );
2728  break;
2729  case 'rootpagenamee':
2730  $value = wfEscapeWikiText( wfUrlencode( str_replace(
2731  ' ',
2732  '_',
2733  $this->mTitle->getRootText()
2734  ) ) );
2735  break;
2736  case 'basepagename':
2737  $value = wfEscapeWikiText( $this->mTitle->getBaseText() );
2738  break;
2739  case 'basepagenamee':
2740  $value = wfEscapeWikiText( wfUrlencode( str_replace(
2741  ' ',
2742  '_',
2743  $this->mTitle->getBaseText()
2744  ) ) );
2745  break;
2746  case 'talkpagename':
2747  if ( $this->mTitle->canHaveTalkPage() ) {
2748  $talkPage = $this->mTitle->getTalkPage();
2749  $value = wfEscapeWikiText( $talkPage->getPrefixedText() );
2750  } else {
2751  $value = '';
2752  }
2753  break;
2754  case 'talkpagenamee':
2755  if ( $this->mTitle->canHaveTalkPage() ) {
2756  $talkPage = $this->mTitle->getTalkPage();
2757  $value = wfEscapeWikiText( $talkPage->getPrefixedURL() );
2758  } else {
2759  $value = '';
2760  }
2761  break;
2762  case 'subjectpagename':
2763  $subjPage = $this->mTitle->getSubjectPage();
2764  $value = wfEscapeWikiText( $subjPage->getPrefixedText() );
2765  break;
2766  case 'subjectpagenamee':
2767  $subjPage = $this->mTitle->getSubjectPage();
2768  $value = wfEscapeWikiText( $subjPage->getPrefixedURL() );
2769  break;
2770  case 'pageid': // requested in T25427
2771  $pageid = $this->getTitle()->getArticleID();
2772  if ( $pageid == 0 ) {
2773  # 0 means the page doesn't exist in the database,
2774  # which means the user is previewing a new page.
2775  # The vary-revision flag must be set, because the magic word
2776  # will have a different value once the page is saved.
2777  $this->mOutput->setFlag( 'vary-revision' );
2778  wfDebug( __METHOD__ . ": {{PAGEID}} used in a new page, setting vary-revision...\n" );
2779  }
2780  $value = $pageid ?: null;
2781  break;
2782  case 'revisionid':
2783  if (
2784  $this->svcOptions->get( 'MiserMode' ) &&
2785  !$this->mOptions->getInterfaceMessage() &&
2786  // @TODO: disallow this word on all namespaces
2787  $this->nsInfo->isContent( $this->mTitle->getNamespace() )
2788  ) {
2789  // Use a stub result instead of the actual revision ID in order to avoid
2790  // double parses on page save but still allow preview detection (T137900)
2791  if ( $this->getRevisionId() || $this->mOptions->getSpeculativeRevId() ) {
2792  $value = '-';
2793  } else {
2794  $this->mOutput->setFlag( 'vary-revision-exists' );
2795  $value = '';
2796  }
2797  } else {
2798  # Inform the edit saving system that getting the canonical output after
2799  # revision insertion requires another parse using the actual revision ID
2800  $this->mOutput->setFlag( 'vary-revision-id' );
2801  wfDebug( __METHOD__ . ": {{REVISIONID}} used, setting vary-revision-id...\n" );
2802  $value = $this->getRevisionId();
2803  if ( $value === 0 ) {
2804  $rev = $this->getRevisionObject();
2805  $value = $rev ? $rev->getId() : $value;
2806  }
2807  if ( !$value ) {
2808  $value = $this->mOptions->getSpeculativeRevId();
2809  if ( $value ) {
2810  $this->mOutput->setSpeculativeRevIdUsed( $value );
2811  }
2812  }
2813  }
2814  break;
2815  case 'revisionday':
2816  $value = (int)$this->getRevisionTimestampSubstring( 6, 2, self::MAX_TTS, $index );
2817  break;
2818  case 'revisionday2':
2819  $value = $this->getRevisionTimestampSubstring( 6, 2, self::MAX_TTS, $index );
2820  break;
2821  case 'revisionmonth':
2822  $value = $this->getRevisionTimestampSubstring( 4, 2, self::MAX_TTS, $index );
2823  break;
2824  case 'revisionmonth1':
2825  $value = (int)$this->getRevisionTimestampSubstring( 4, 2, self::MAX_TTS, $index );
2826  break;
2827  case 'revisionyear':
2828  $value = $this->getRevisionTimestampSubstring( 0, 4, self::MAX_TTS, $index );
2829  break;
2830  case 'revisiontimestamp':
2831  # Let the edit saving system know we should parse the page
2832  # *after* a revision ID has been assigned. This is for null edits.
2833  $this->mOutput->setFlag( 'vary-revision' );
2834  wfDebug( __METHOD__ . ": {{REVISIONTIMESTAMP}} used, setting vary-revision...\n" );
2835  $value = $this->getRevisionTimestamp();
2836  break;
2837  case 'revisionuser':
2838  # Let the edit saving system know we should parse the page
2839  # *after* a revision ID has been assigned for null edits.
2840  $this->mOutput->setFlag( 'vary-user' );
2841  wfDebug( __METHOD__ . ": {{REVISIONUSER}} used, setting vary-user...\n" );
2842  $value = $this->getRevisionUser();
2843  break;
2844  case 'revisionsize':
2845  $value = $this->getRevisionSize();
2846  break;
2847  case 'namespace':
2848  $value = str_replace( '_', ' ',
2849  $this->contLang->getNsText( $this->mTitle->getNamespace() ) );
2850  break;
2851  case 'namespacee':
2852  $value = wfUrlencode( $this->contLang->getNsText( $this->mTitle->getNamespace() ) );
2853  break;
2854  case 'namespacenumber':
2855  $value = $this->mTitle->getNamespace();
2856  break;
2857  case 'talkspace':
2858  $value = $this->mTitle->canHaveTalkPage()
2859  ? str_replace( '_', ' ', $this->mTitle->getTalkNsText() )
2860  : '';
2861  break;
2862  case 'talkspacee':
2863  $value = $this->mTitle->canHaveTalkPage() ? wfUrlencode( $this->mTitle->getTalkNsText() ) : '';
2864  break;
2865  case 'subjectspace':
2866  $value = str_replace( '_', ' ', $this->mTitle->getSubjectNsText() );
2867  break;
2868  case 'subjectspacee':
2869  $value = ( wfUrlencode( $this->mTitle->getSubjectNsText() ) );
2870  break;
2871  case 'currentdayname':
2872  $value = $pageLang->getWeekdayName( (int)MWTimestamp::getInstance( $ts )->format( 'w' ) + 1 );
2873  break;
2874  case 'currentyear':
2875  $value = $pageLang->formatNum( MWTimestamp::getInstance( $ts )->format( 'Y' ), true );
2876  break;
2877  case 'currenttime':
2878  $value = $pageLang->time( wfTimestamp( TS_MW, $ts ), false, false );
2879  break;
2880  case 'currenthour':
2881  $value = $pageLang->formatNum( MWTimestamp::getInstance( $ts )->format( 'H' ), true );
2882  break;
2883  case 'currentweek':
2884  # @bug T6594 PHP5 has it zero padded, PHP4 does not, cast to
2885  # int to remove the padding
2886  $value = $pageLang->formatNum( (int)MWTimestamp::getInstance( $ts )->format( 'W' ) );
2887  break;
2888  case 'currentdow':
2889  $value = $pageLang->formatNum( MWTimestamp::getInstance( $ts )->format( 'w' ) );
2890  break;
2891  case 'localdayname':
2892  $value = $pageLang->getWeekdayName(
2893  (int)MWTimestamp::getLocalInstance( $ts )->format( 'w' ) + 1
2894  );
2895  break;
2896  case 'localyear':
2897  $value = $pageLang->formatNum( MWTimestamp::getLocalInstance( $ts )->format( 'Y' ), true );
2898  break;
2899  case 'localtime':
2900  $value = $pageLang->time(
2901  MWTimestamp::getLocalInstance( $ts )->format( 'YmdHis' ),
2902  false,
2903  false
2904  );
2905  break;
2906  case 'localhour':
2907  $value = $pageLang->formatNum( MWTimestamp::getLocalInstance( $ts )->format( 'H' ), true );
2908  break;
2909  case 'localweek':
2910  # @bug T6594 PHP5 has it zero padded, PHP4 does not, cast to
2911  # int to remove the padding
2912  $value = $pageLang->formatNum( (int)MWTimestamp::getLocalInstance( $ts )->format( 'W' ) );
2913  break;
2914  case 'localdow':
2915  $value = $pageLang->formatNum( MWTimestamp::getLocalInstance( $ts )->format( 'w' ) );
2916  break;
2917  case 'numberofarticles':
2918  $value = $pageLang->formatNum( SiteStats::articles() );
2919  break;
2920  case 'numberoffiles':
2921  $value = $pageLang->formatNum( SiteStats::images() );
2922  break;
2923  case 'numberofusers':
2924  $value = $pageLang->formatNum( SiteStats::users() );
2925  break;
2926  case 'numberofactiveusers':
2927  $value = $pageLang->formatNum( SiteStats::activeUsers() );
2928  break;
2929  case 'numberofpages':
2930  $value = $pageLang->formatNum( SiteStats::pages() );
2931  break;
2932  case 'numberofadmins':
2933  $value = $pageLang->formatNum( SiteStats::numberingroup( 'sysop' ) );
2934  break;
2935  case 'numberofedits':
2936  $value = $pageLang->formatNum( SiteStats::edits() );
2937  break;
2938  case 'currenttimestamp':
2939  $value = wfTimestamp( TS_MW, $ts );
2940  break;
2941  case 'localtimestamp':
2942  $value = MWTimestamp::getLocalInstance( $ts )->format( 'YmdHis' );
2943  break;
2944  case 'currentversion':
2946  break;
2947  case 'articlepath':
2948  return $this->svcOptions->get( 'ArticlePath' );
2949  case 'sitename':
2950  return $this->svcOptions->get( 'Sitename' );
2951  case 'server':
2952  return $this->svcOptions->get( 'Server' );
2953  case 'servername':
2954  return $this->svcOptions->get( 'ServerName' );
2955  case 'scriptpath':
2956  return $this->svcOptions->get( 'ScriptPath' );
2957  case 'stylepath':
2958  return $this->svcOptions->get( 'StylePath' );
2959  case 'directionmark':
2960  return $pageLang->getDirMark();
2961  case 'contentlanguage':
2962  return $this->svcOptions->get( 'LanguageCode' );
2963  case 'pagelanguage':
2964  $value = $pageLang->getCode();
2965  break;
2966  case 'cascadingsources':
2968  break;
2969  default:
2970  $ret = null;
2971  Hooks::run(
2972  'ParserGetVariableValueSwitch',
2973  [ &$parser, &$this->mVarCache, &$index, &$ret, &$frame ]
2974  );
2975 
2976  return $ret;
2977  }
2978 
2979  if ( $index ) {
2980  $this->mVarCache[$index] = $value;
2981  }
2982 
2983  return $value;
2984  }
2985 
2993  private function getRevisionTimestampSubstring( $start, $len, $mtts, $variable ) {
2994  # Get the timezone-adjusted timestamp to be used for this revision
2995  $resNow = substr( $this->getRevisionTimestamp(), $start, $len );
2996  # Possibly set vary-revision if there is not yet an associated revision
2997  if ( !$this->getRevisionObject() ) {
2998  # Get the timezone-adjusted timestamp $mtts seconds in the future
2999  $resThen = substr(
3000  $this->contLang->userAdjust( wfTimestamp( TS_MW, time() + $mtts ), '' ),
3001  $start,
3002  $len
3003  );
3004 
3005  if ( $resNow !== $resThen ) {
3006  # Let the edit saving system know we should parse the page
3007  # *after* a revision ID has been assigned. This is for null edits.
3008  $this->mOutput->setFlag( 'vary-revision' );
3009  wfDebug( __METHOD__ . ": $variable used, setting vary-revision...\n" );
3010  }
3011  }
3012 
3013  return $resNow;
3014  }
3015 
3021  public function initialiseVariables() {
3022  $variableIDs = $this->magicWordFactory->getVariableIDs();
3023  $substIDs = $this->magicWordFactory->getSubstIDs();
3024 
3025  $this->mVariables = $this->magicWordFactory->newArray( $variableIDs );
3026  $this->mSubstWords = $this->magicWordFactory->newArray( $substIDs );
3027  }
3028 
3051  public function preprocessToDom( $text, $flags = 0 ) {
3052  $dom = $this->getPreprocessor()->preprocessToObj( $text, $flags );
3053  return $dom;
3054  }
3055 
3063  public static function splitWhitespace( $s ) {
3064  $ltrimmed = ltrim( $s );
3065  $w1 = substr( $s, 0, strlen( $s ) - strlen( $ltrimmed ) );
3066  $trimmed = rtrim( $ltrimmed );
3067  $diff = strlen( $ltrimmed ) - strlen( $trimmed );
3068  if ( $diff > 0 ) {
3069  $w2 = substr( $ltrimmed, -$diff );
3070  } else {
3071  $w2 = '';
3072  }
3073  return [ $w1, $trimmed, $w2 ];
3074  }
3075 
3096  public function replaceVariables( $text, $frame = false, $argsOnly = false ) {
3097  # Is there any text? Also, Prevent too big inclusions!
3098  $textSize = strlen( $text );
3099  if ( $textSize < 1 || $textSize > $this->mOptions->getMaxIncludeSize() ) {
3100  return $text;
3101  }
3102 
3103  if ( $frame === false ) {
3104  $frame = $this->getPreprocessor()->newFrame();
3105  } elseif ( !( $frame instanceof PPFrame ) ) {
3106  wfDebug( __METHOD__ . " called using plain parameters instead of "
3107  . "a PPFrame instance. Creating custom frame.\n" );
3108  $frame = $this->getPreprocessor()->newCustomFrame( $frame );
3109  }
3110 
3111  $dom = $this->preprocessToDom( $text );
3112  $flags = $argsOnly ? PPFrame::NO_TEMPLATES : 0;
3113  $text = $frame->expand( $dom, $flags );
3114 
3115  return $text;
3116  }
3117 
3125  public static function createAssocArgs( $args ) {
3126  $assocArgs = [];
3127  $index = 1;
3128  foreach ( $args as $arg ) {
3129  $eqpos = strpos( $arg, '=' );
3130  if ( $eqpos === false ) {
3131  $assocArgs[$index++] = $arg;
3132  } else {
3133  $name = trim( substr( $arg, 0, $eqpos ) );
3134  $value = trim( substr( $arg, $eqpos + 1 ) );
3135  if ( $value === false ) {
3136  $value = '';
3137  }
3138  if ( $name !== false ) {
3139  $assocArgs[$name] = $value;
3140  }
3141  }
3142  }
3143 
3144  return $assocArgs;
3145  }
3146 
3173  public function limitationWarn( $limitationType, $current = '', $max = '' ) {
3174  # does no harm if $current and $max are present but are unnecessary for the message
3175  # Not doing ->inLanguage( $this->mOptions->getUserLangObj() ), since this is shown
3176  # only during preview, and that would split the parser cache unnecessarily.
3177  $warning = wfMessage( "$limitationType-warning" )->numParams( $current, $max )
3178  ->text();
3179  $this->mOutput->addWarning( $warning );
3180  $this->addTrackingCategory( "$limitationType-category" );
3181  }
3182 
3195  public function braceSubstitution( $piece, $frame ) {
3196  // Flags
3197 
3198  // $text has been filled
3199  $found = false;
3200  // wiki markup in $text should be escaped
3201  $nowiki = false;
3202  // $text is HTML, armour it against wikitext transformation
3203  $isHTML = false;
3204  // Force interwiki transclusion to be done in raw mode not rendered
3205  $forceRawInterwiki = false;
3206  // $text is a DOM node needing expansion in a child frame
3207  $isChildObj = false;
3208  // $text is a DOM node needing expansion in the current frame
3209  $isLocalObj = false;
3210 
3211  # Title object, where $text came from
3212  $title = false;
3213 
3214  # $part1 is the bit before the first |, and must contain only title characters.
3215  # Various prefixes will be stripped from it later.
3216  $titleWithSpaces = $frame->expand( $piece['title'] );
3217  $part1 = trim( $titleWithSpaces );
3218  $titleText = false;
3219 
3220  # Original title text preserved for various purposes
3221  $originalTitle = $part1;
3222 
3223  # $args is a list of argument nodes, starting from index 0, not including $part1
3224  # @todo FIXME: If piece['parts'] is null then the call to getLength()
3225  # below won't work b/c this $args isn't an object
3226  $args = ( $piece['parts'] == null ) ? [] : $piece['parts'];
3227 
3228  $profileSection = null; // profile templates
3229 
3230  # SUBST
3231  if ( !$found ) {
3232  $substMatch = $this->mSubstWords->matchStartAndRemove( $part1 );
3233 
3234  # Possibilities for substMatch: "subst", "safesubst" or FALSE
3235  # Decide whether to expand template or keep wikitext as-is.
3236  if ( $this->ot['wiki'] ) {
3237  if ( $substMatch === false ) {
3238  $literal = true; # literal when in PST with no prefix
3239  } else {
3240  $literal = false; # expand when in PST with subst: or safesubst:
3241  }
3242  } else {
3243  if ( $substMatch == 'subst' ) {
3244  $literal = true; # literal when not in PST with plain subst:
3245  } else {
3246  $literal = false; # expand when not in PST with safesubst: or no prefix
3247  }
3248  }
3249  if ( $literal ) {
3250  $text = $frame->virtualBracketedImplode( '{{', '|', '}}', $titleWithSpaces, $args );
3251  $isLocalObj = true;
3252  $found = true;
3253  }
3254  }
3255 
3256  # Variables
3257  if ( !$found && $args->getLength() == 0 ) {
3258  $id = $this->mVariables->matchStartToEnd( $part1 );
3259  if ( $id !== false ) {
3260  $text = $this->getVariableValue( $id, $frame );
3261  if ( $this->magicWordFactory->getCacheTTL( $id ) > -1 ) {
3262  $this->mOutput->updateCacheExpiry(
3263  $this->magicWordFactory->getCacheTTL( $id ) );
3264  }
3265  $found = true;
3266  }
3267  }
3268 
3269  # MSG, MSGNW and RAW
3270  if ( !$found ) {
3271  # Check for MSGNW:
3272  $mwMsgnw = $this->magicWordFactory->get( 'msgnw' );
3273  if ( $mwMsgnw->matchStartAndRemove( $part1 ) ) {
3274  $nowiki = true;
3275  } else {
3276  # Remove obsolete MSG:
3277  $mwMsg = $this->magicWordFactory->get( 'msg' );
3278  $mwMsg->matchStartAndRemove( $part1 );
3279  }
3280 
3281  # Check for RAW:
3282  $mwRaw = $this->magicWordFactory->get( 'raw' );
3283  if ( $mwRaw->matchStartAndRemove( $part1 ) ) {
3284  $forceRawInterwiki = true;
3285  }
3286  }
3287 
3288  # Parser functions
3289  if ( !$found ) {
3290  $colonPos = strpos( $part1, ':' );
3291  if ( $colonPos !== false ) {
3292  $func = substr( $part1, 0, $colonPos );
3293  $funcArgs = [ trim( substr( $part1, $colonPos + 1 ) ) ];
3294  $argsLength = $args->getLength();
3295  for ( $i = 0; $i < $argsLength; $i++ ) {
3296  $funcArgs[] = $args->item( $i );
3297  }
3298 
3299  $result = $this->callParserFunction( $frame, $func, $funcArgs );
3300 
3301  // Extract any forwarded flags
3302  if ( isset( $result['title'] ) ) {
3303  $title = $result['title'];
3304  }
3305  if ( isset( $result['found'] ) ) {
3306  $found = $result['found'];
3307  }
3308  if ( array_key_exists( 'text', $result ) ) {
3309  // a string or null
3310  $text = $result['text'];
3311  }
3312  if ( isset( $result['nowiki'] ) ) {
3313  $nowiki = $result['nowiki'];
3314  }
3315  if ( isset( $result['isHTML'] ) ) {
3316  $isHTML = $result['isHTML'];
3317  }
3318  if ( isset( $result['forceRawInterwiki'] ) ) {
3319  $forceRawInterwiki = $result['forceRawInterwiki'];
3320  }
3321  if ( isset( $result['isChildObj'] ) ) {
3322  $isChildObj = $result['isChildObj'];
3323  }
3324  if ( isset( $result['isLocalObj'] ) ) {
3325  $isLocalObj = $result['isLocalObj'];
3326  }
3327  }
3328  }
3329 
3330  # Finish mangling title and then check for loops.
3331  # Set $title to a Title object and $titleText to the PDBK
3332  if ( !$found ) {
3333  $ns = NS_TEMPLATE;
3334  # Split the title into page and subpage
3335  $subpage = '';
3336  $relative = $this->maybeDoSubpageLink( $part1, $subpage );
3337  if ( $part1 !== $relative ) {
3338  $part1 = $relative;
3339  $ns = $this->mTitle->getNamespace();
3340  }
3341  $title = Title::newFromText( $part1, $ns );
3342  if ( $title ) {
3343  $titleText = $title->getPrefixedText();
3344  # Check for language variants if the template is not found
3345  if ( $this->getTargetLanguage()->hasVariants() && $title->getArticleID() == 0 ) {
3346  $this->getTargetLanguage()->findVariantLink( $part1, $title, true );
3347  }
3348  # Do recursion depth check
3349  $limit = $this->mOptions->getMaxTemplateDepth();
3350  if ( $frame->depth >= $limit ) {
3351  $found = true;
3352  $text = '<span class="error">'
3353  . wfMessage( 'parser-template-recursion-depth-warning' )
3354  ->numParams( $limit )->inContentLanguage()->text()
3355  . '</span>';
3356  }
3357  }
3358  }
3359 
3360  # Load from database
3361  if ( !$found && $title ) {
3362  $profileSection = $this->mProfiler->scopedProfileIn( $title->getPrefixedDBkey() );
3363  if ( !$title->isExternal() ) {
3364  if ( $title->isSpecialPage()
3365  && $this->mOptions->getAllowSpecialInclusion()
3366  && $this->ot['html']
3367  ) {
3368  $specialPage = $this->specialPageFactory->getPage( $title->getDBkey() );
3369  // Pass the template arguments as URL parameters.
3370  // "uselang" will have no effect since the Language object
3371  // is forced to the one defined in ParserOptions.
3372  $pageArgs = [];
3373  $argsLength = $args->getLength();
3374  for ( $i = 0; $i < $argsLength; $i++ ) {
3375  $bits = $args->item( $i )->splitArg();
3376  if ( strval( $bits['index'] ) === '' ) {
3377  $name = trim( $frame->expand( $bits['name'], PPFrame::STRIP_COMMENTS ) );
3378  $value = trim( $frame->expand( $bits['value'] ) );
3379  $pageArgs[$name] = $value;
3380  }
3381  }
3382 
3383  // Create a new context to execute the special page
3384  $context = new RequestContext;
3385  $context->setTitle( $title );
3386  $context->setRequest( new FauxRequest( $pageArgs ) );
3387  if ( $specialPage && $specialPage->maxIncludeCacheTime() === 0 ) {
3388  $context->setUser( $this->getUser() );
3389  } else {
3390  // If this page is cached, then we better not be per user.
3391  $context->setUser( User::newFromName( '127.0.0.1', false ) );
3392  }
3393  $context->setLanguage( $this->mOptions->getUserLangObj() );
3394  $ret = $this->specialPageFactory->capturePath( $title, $context, $this->getLinkRenderer() );
3395  if ( $ret ) {
3396  $text = $context->getOutput()->getHTML();
3397  $this->mOutput->addOutputPageMetadata( $context->getOutput() );
3398  $found = true;
3399  $isHTML = true;
3400  if ( $specialPage && $specialPage->maxIncludeCacheTime() !== false ) {
3401  $this->mOutput->updateRuntimeAdaptiveExpiry(
3402  $specialPage->maxIncludeCacheTime()
3403  );
3404  }
3405  }
3406  } elseif ( $this->nsInfo->isNonincludable( $title->getNamespace() ) ) {
3407  $found = false; # access denied
3408  wfDebug( __METHOD__ . ": template inclusion denied for " .
3409  $title->getPrefixedDBkey() . "\n" );
3410  } else {
3411  list( $text, $title ) = $this->getTemplateDom( $title );
3412  if ( $text !== false ) {
3413  $found = true;
3414  $isChildObj = true;
3415  }
3416  }
3417 
3418  # If the title is valid but undisplayable, make a link to it
3419  if ( !$found && ( $this->ot['html'] || $this->ot['pre'] ) ) {
3420  $text = "[[:$titleText]]";
3421  $found = true;
3422  }
3423  } elseif ( $title->isTrans() ) {
3424  # Interwiki transclusion
3425  if ( $this->ot['html'] && !$forceRawInterwiki ) {
3426  $text = $this->interwikiTransclude( $title, 'render' );
3427  $isHTML = true;
3428  } else {
3429  $text = $this->interwikiTransclude( $title, 'raw' );
3430  # Preprocess it like a template
3431  $text = $this->preprocessToDom( $text, self::PTD_FOR_INCLUSION );
3432  $isChildObj = true;
3433  }
3434  $found = true;
3435  }
3436 
3437  # Do infinite loop check
3438  # This has to be done after redirect resolution to avoid infinite loops via redirects
3439  if ( !$frame->loopCheck( $title ) ) {
3440  $found = true;
3441  $text = '<span class="error">'
3442  . wfMessage( 'parser-template-loop-warning', $titleText )->inContentLanguage()->text()
3443  . '</span>';
3444  $this->addTrackingCategory( 'template-loop-category' );
3445  $this->mOutput->addWarning( wfMessage( 'template-loop-warning',
3446  wfEscapeWikiText( $titleText ) )->text() );
3447  wfDebug( __METHOD__ . ": template loop broken at '$titleText'\n" );
3448  }
3449  }
3450 
3451  # If we haven't found text to substitute by now, we're done
3452  # Recover the source wikitext and return it
3453  if ( !$found ) {
3454  $text = $frame->virtualBracketedImplode( '{{', '|', '}}', $titleWithSpaces, $args );
3455  if ( $profileSection ) {
3456  $this->mProfiler->scopedProfileOut( $profileSection );
3457  }
3458  return [ 'object' => $text ];
3459  }
3460 
3461  # Expand DOM-style return values in a child frame
3462  if ( $isChildObj ) {
3463  # Clean up argument array
3464  $newFrame = $frame->newChild( $args, $title );
3465 
3466  if ( $nowiki ) {
3467  $text = $newFrame->expand( $text, PPFrame::RECOVER_ORIG );
3468  } elseif ( $titleText !== false && $newFrame->isEmpty() ) {
3469  # Expansion is eligible for the empty-frame cache
3470  $text = $newFrame->cachedExpand( $titleText, $text );
3471  } else {
3472  # Uncached expansion
3473  $text = $newFrame->expand( $text );
3474  }
3475  }
3476  if ( $isLocalObj && $nowiki ) {
3477  $text = $frame->expand( $text, PPFrame::RECOVER_ORIG );
3478  $isLocalObj = false;
3479  }
3480 
3481  if ( $profileSection ) {
3482  $this->mProfiler->scopedProfileOut( $profileSection );
3483  }
3484 
3485  # Replace raw HTML by a placeholder
3486  if ( $isHTML ) {
3487  $text = $this->insertStripItem( $text );
3488  } elseif ( $nowiki && ( $this->ot['html'] || $this->ot['pre'] ) ) {
3489  # Escape nowiki-style return values
3490  $text = wfEscapeWikiText( $text );
3491  } elseif ( is_string( $text )
3492  && !$piece['lineStart']
3493  && preg_match( '/^(?:{\\||:|;|#|\*)/', $text )
3494  ) {
3495  # T2529: if the template begins with a table or block-level
3496  # element, it should be treated as beginning a new line.
3497  # This behavior is somewhat controversial.
3498  $text = "\n" . $text;
3499  }
3500 
3501  if ( is_string( $text ) && !$this->incrementIncludeSize( 'post-expand', strlen( $text ) ) ) {
3502  # Error, oversize inclusion
3503  if ( $titleText !== false ) {
3504  # Make a working, properly escaped link if possible (T25588)
3505  $text = "[[:$titleText]]";
3506  } else {
3507  # This will probably not be a working link, but at least it may
3508  # provide some hint of where the problem is
3509  preg_replace( '/^:/', '', $originalTitle );
3510  $text = "[[:$originalTitle]]";
3511  }
3512  $text .= $this->insertStripItem( '<!-- WARNING: template omitted, '
3513  . 'post-expand include size too large -->' );
3514  $this->limitationWarn( 'post-expand-template-inclusion' );
3515  }
3516 
3517  if ( $isLocalObj ) {
3518  $ret = [ 'object' => $text ];
3519  } else {
3520  $ret = [ 'text' => $text ];
3521  }
3522 
3523  return $ret;
3524  }
3525 
3545  public function callParserFunction( $frame, $function, array $args = [] ) {
3546  # Case sensitive functions
3547  if ( isset( $this->mFunctionSynonyms[1][$function] ) ) {
3548  $function = $this->mFunctionSynonyms[1][$function];
3549  } else {
3550  # Case insensitive functions
3551  $function = $this->contLang->lc( $function );
3552  if ( isset( $this->mFunctionSynonyms[0][$function] ) ) {
3553  $function = $this->mFunctionSynonyms[0][$function];
3554  } else {
3555  return [ 'found' => false ];
3556  }
3557  }
3558 
3559  list( $callback, $flags ) = $this->mFunctionHooks[$function];
3560 
3561  // Avoid PHP 7.1 warning from passing $this by reference
3562  $parser = $this;
3563 
3564  $allArgs = [ &$parser ];
3565  if ( $flags & self::SFH_OBJECT_ARGS ) {
3566  # Convert arguments to PPNodes and collect for appending to $allArgs
3567  $funcArgs = [];
3568  foreach ( $args as $k => $v ) {
3569  if ( $v instanceof PPNode || $k === 0 ) {
3570  $funcArgs[] = $v;
3571  } else {
3572  $funcArgs[] = $this->mPreprocessor->newPartNodeArray( [ $k => $v ] )->item( 0 );
3573  }
3574  }
3575 
3576  # Add a frame parameter, and pass the arguments as an array
3577  $allArgs[] = $frame;
3578  $allArgs[] = $funcArgs;
3579  } else {
3580  # Convert arguments to plain text and append to $allArgs
3581  foreach ( $args as $k => $v ) {
3582  if ( $v instanceof PPNode ) {
3583  $allArgs[] = trim( $frame->expand( $v ) );
3584  } elseif ( is_int( $k ) && $k >= 0 ) {
3585  $allArgs[] = trim( $v );
3586  } else {
3587  $allArgs[] = trim( "$k=$v" );
3588  }
3589  }
3590  }
3591 
3592  $result = $callback( ...$allArgs );
3593 
3594  # The interface for function hooks allows them to return a wikitext
3595  # string or an array containing the string and any flags. This mungs
3596  # things around to match what this method should return.
3597  if ( !is_array( $result ) ) {
3598  $result = [
3599  'found' => true,
3600  'text' => $result,
3601  ];
3602  } else {
3603  if ( isset( $result[0] ) && !isset( $result['text'] ) ) {
3604  $result['text'] = $result[0];
3605  }
3606  unset( $result[0] );
3607  $result += [
3608  'found' => true,
3609  ];
3610  }
3611 
3612  $noparse = true;
3613  $preprocessFlags = 0;
3614  if ( isset( $result['noparse'] ) ) {
3615  $noparse = $result['noparse'];
3616  }
3617  if ( isset( $result['preprocessFlags'] ) ) {
3618  $preprocessFlags = $result['preprocessFlags'];
3619  }
3620 
3621  if ( !$noparse ) {
3622  $result['text'] = $this->preprocessToDom( $result['text'], $preprocessFlags );
3623  $result['isChildObj'] = true;
3624  }
3625 
3626  return $result;
3627  }
3628 
3637  public function getTemplateDom( $title ) {
3638  $cacheTitle = $title;
3639  $titleText = $title->getPrefixedDBkey();
3640 
3641  if ( isset( $this->mTplRedirCache[$titleText] ) ) {
3642  list( $ns, $dbk ) = $this->mTplRedirCache[$titleText];
3643  $title = Title::makeTitle( $ns, $dbk );
3644  $titleText = $title->getPrefixedDBkey();
3645  }
3646  if ( isset( $this->mTplDomCache[$titleText] ) ) {
3647  return [ $this->mTplDomCache[$titleText], $title ];
3648  }
3649 
3650  # Cache miss, go to the database
3651  list( $text, $title ) = $this->fetchTemplateAndTitle( $title );
3652 
3653  if ( $text === false ) {
3654  $this->mTplDomCache[$titleText] = false;
3655  return [ false, $title ];
3656  }
3657 
3658  $dom = $this->preprocessToDom( $text, self::PTD_FOR_INCLUSION );
3659  $this->mTplDomCache[$titleText] = $dom;
3660 
3661  if ( !$title->equals( $cacheTitle ) ) {
3662  $this->mTplRedirCache[$cacheTitle->getPrefixedDBkey()] =
3663  [ $title->getNamespace(), $title->getDBkey() ];
3664  }
3665 
3666  return [ $dom, $title ];
3667  }
3668 
3681  $cacheKey = $title->getPrefixedDBkey();
3682  if ( !$this->currentRevisionCache ) {
3683  $this->currentRevisionCache = new MapCacheLRU( 100 );
3684  }
3685  if ( !$this->currentRevisionCache->has( $cacheKey ) ) {
3686  $this->currentRevisionCache->set( $cacheKey,
3687  // Defaults to Parser::statelessFetchRevision()
3688  call_user_func( $this->mOptions->getCurrentRevisionCallback(), $title, $this )
3689  );
3690  }
3691  return $this->currentRevisionCache->get( $cacheKey );
3692  }
3693 
3703  public static function statelessFetchRevision( Title $title, $parser = false ) {
3705 
3706  return $rev;
3707  }
3708 
3714  public function fetchTemplateAndTitle( $title ) {
3715  // Defaults to Parser::statelessFetchTemplate()
3716  $templateCb = $this->mOptions->getTemplateCallback();
3717  $stuff = call_user_func( $templateCb, $title, $this );
3718  // We use U+007F DELETE to distinguish strip markers from regular text.
3719  $text = $stuff['text'];
3720  if ( is_string( $stuff['text'] ) ) {
3721  $text = strtr( $text, "\x7f", "?" );
3722  }
3723  $finalTitle = $stuff['finalTitle'] ?? $title;
3724  if ( isset( $stuff['deps'] ) ) {
3725  foreach ( $stuff['deps'] as $dep ) {
3726  $this->mOutput->addTemplate( $dep['title'], $dep['page_id'], $dep['rev_id'] );
3727  if ( $dep['title']->equals( $this->getTitle() ) ) {
3728  // If we transclude ourselves, the final result
3729  // will change based on the new version of the page
3730  $this->mOutput->setFlag( 'vary-revision' );
3731  }
3732  }
3733  }
3734  return [ $text, $finalTitle ];
3735  }
3736 
3742  public function fetchTemplate( $title ) {
3743  return $this->fetchTemplateAndTitle( $title )[0];
3744  }
3745 
3755  public static function statelessFetchTemplate( $title, $parser = false ) {
3756  $text = $skip = false;
3757  $finalTitle = $title;
3758  $deps = [];
3759 
3760  # Loop to fetch the article, with up to 1 redirect
3761  for ( $i = 0; $i < 2 && is_object( $title ); $i++ ) {
3762  # Give extensions a chance to select the revision instead
3763  $id = false; # Assume current
3764  Hooks::run( 'BeforeParserFetchTemplateAndtitle',
3765  [ $parser, $title, &$skip, &$id ] );
3766 
3767  if ( $skip ) {
3768  $text = false;
3769  $deps[] = [
3770  'title' => $title,
3771  'page_id' => $title->getArticleID(),
3772  'rev_id' => null
3773  ];
3774  break;
3775  }
3776  # Get the revision
3777  if ( $id ) {
3778  $rev = Revision::newFromId( $id );
3779  } elseif ( $parser ) {
3780  $rev = $parser->fetchCurrentRevisionOfTitle( $title );
3781  } else {
3783  }
3784  $rev_id = $rev ? $rev->getId() : 0;
3785  # If there is no current revision, there is no page
3786  if ( $id === false && !$rev ) {
3787  $linkCache = MediaWikiServices::getInstance()->getLinkCache();
3788  $linkCache->addBadLinkObj( $title );
3789  }
3790 
3791  $deps[] = [
3792  'title' => $title,
3793  'page_id' => $title->getArticleID(),
3794  'rev_id' => $rev_id ];
3795  if ( $rev && !$title->equals( $rev->getTitle() ) ) {
3796  # We fetched a rev from a different title; register it too...
3797  $deps[] = [
3798  'title' => $rev->getTitle(),
3799  'page_id' => $rev->getPage(),
3800  'rev_id' => $rev_id ];
3801  }
3802 
3803  if ( $rev ) {
3804  $content = $rev->getContent();
3805  $text = $content ? $content->getWikitextForTransclusion() : null;
3806 
3807  Hooks::run( 'ParserFetchTemplate',
3808  [ $parser, $title, $rev, &$text, &$deps ] );
3809 
3810  if ( $text === false || $text === null ) {
3811  $text = false;
3812  break;
3813  }
3814  } elseif ( $title->getNamespace() == NS_MEDIAWIKI ) {
3815  $message = wfMessage( MediaWikiServices::getInstance()->getContentLanguage()->
3816  lcfirst( $title->getText() ) )->inContentLanguage();
3817  if ( !$message->exists() ) {
3818  $text = false;
3819  break;
3820  }
3821  $content = $message->content();
3822  $text = $message->plain();
3823  } else {
3824  break;
3825  }
3826  if ( !$content ) {
3827  break;
3828  }
3829  # Redirect?
3830  $finalTitle = $title;
3831  $title = $content->getRedirectTarget();
3832  }
3833  return [
3834  'text' => $text,
3835  'finalTitle' => $finalTitle,
3836  'deps' => $deps ];
3837  }
3838 
3847  public function fetchFile( $title, $options = [] ) {
3848  wfDeprecated( __METHOD__, '1.32' );
3849  return $this->fetchFileAndTitle( $title, $options )[0];
3850  }
3851 
3859  public function fetchFileAndTitle( $title, $options = [] ) {
3861 
3862  $time = $file ? $file->getTimestamp() : false;
3863  $sha1 = $file ? $file->getSha1() : false;
3864  # Register the file as a dependency...
3865  $this->mOutput->addImage( $title->getDBkey(), $time, $sha1 );
3866  if ( $file && !$title->equals( $file->getTitle() ) ) {
3867  # Update fetched file title
3868  $title = $file->getTitle();
3869  $this->mOutput->addImage( $title->getDBkey(), $time, $sha1 );
3870  }
3871  return [ $file, $title ];
3872  }
3873 
3884  protected function fetchFileNoRegister( $title, $options = [] ) {
3885  if ( isset( $options['broken'] ) ) {
3886  $file = false; // broken thumbnail forced by hook
3887  } elseif ( isset( $options['sha1'] ) ) { // get by (sha1,timestamp)
3888  $file = RepoGroup::singleton()->findFileFromKey( $options['sha1'], $options );
3889  } else { // get by (name,timestamp)
3890  $file = MediaWikiServices::getInstance()->getRepoGroup()->findFile( $title, $options );
3891  }
3892  return $file;
3893  }
3894 
3903  public function interwikiTransclude( $title, $action ) {
3904  if ( !$this->svcOptions->get( 'EnableScaryTranscluding' ) ) {
3905  return wfMessage( 'scarytranscludedisabled' )->inContentLanguage()->text();
3906  }
3907 
3908  $url = $title->getFullURL( [ 'action' => $action ] );
3909  if ( strlen( $url ) > 1024 ) {
3910  return wfMessage( 'scarytranscludetoolong' )->inContentLanguage()->text();
3911  }
3912 
3913  $wikiId = $title->getTransWikiID(); // remote wiki ID or false
3914 
3915  $fname = __METHOD__;
3916  $cache = MediaWikiServices::getInstance()->getMainWANObjectCache();
3917 
3918  $data = $cache->getWithSetCallback(
3919  $cache->makeGlobalKey(
3920  'interwiki-transclude',
3921  ( $wikiId !== false ) ? $wikiId : 'external',
3922  sha1( $url )
3923  ),
3924  $this->svcOptions->get( 'TranscludeCacheExpiry' ),
3925  function ( $oldValue, &$ttl ) use ( $url, $fname, $cache ) {
3926  $req = MWHttpRequest::factory( $url, [], $fname );
3927 
3928  $status = $req->execute(); // Status object
3929  if ( !$status->isOK() ) {
3930  $ttl = $cache::TTL_UNCACHEABLE;
3931  } elseif ( $req->getResponseHeader( 'X-Database-Lagged' ) !== null ) {
3932  $ttl = min( $cache::TTL_LAGGED, $ttl );
3933  }
3934 
3935  return [
3936  'text' => $status->isOK() ? $req->getContent() : null,
3937  'code' => $req->getStatus()
3938  ];
3939  },
3940  [
3941  'checkKeys' => ( $wikiId !== false )
3942  ? [ $cache->makeGlobalKey( 'interwiki-page', $wikiId, $title->getDBkey() ) ]
3943  : [],
3944  'pcGroup' => 'interwiki-transclude:5',
3945  'pcTTL' => $cache::TTL_PROC_LONG
3946  ]
3947  );
3948 
3949  if ( is_string( $data['text'] ) ) {
3950  $text = $data['text'];
3951  } elseif ( $data['code'] != 200 ) {
3952  // Though we failed to fetch the content, this status is useless.
3953  $text = wfMessage( 'scarytranscludefailed-httpstatus' )
3954  ->params( $url, $data['code'] )->inContentLanguage()->text();
3955  } else {
3956  $text = wfMessage( 'scarytranscludefailed', $url )->inContentLanguage()->text();
3957  }
3958 
3959  return $text;
3960  }
3961 
3971  public function argSubstitution( $piece, $frame ) {
3972  $error = false;
3973  $parts = $piece['parts'];
3974  $nameWithSpaces = $frame->expand( $piece['title'] );
3975  $argName = trim( $nameWithSpaces );
3976  $object = false;
3977  $text = $frame->getArgument( $argName );
3978  if ( $text === false && $parts->getLength() > 0
3979  && ( $this->ot['html']
3980  || $this->ot['pre']
3981  || ( $this->ot['wiki'] && $frame->isTemplate() )
3982  )
3983  ) {
3984  # No match in frame, use the supplied default
3985  $object = $parts->item( 0 )->getChildren();
3986  }
3987  if ( !$this->incrementIncludeSize( 'arg', strlen( $text ) ) ) {
3988  $error = '<!-- WARNING: argument omitted, expansion size too large -->';
3989  $this->limitationWarn( 'post-expand-template-argument' );
3990  }
3991 
3992  if ( $text === false && $object === false ) {
3993  # No match anywhere
3994  $object = $frame->virtualBracketedImplode( '{{{', '|', '}}}', $nameWithSpaces, $parts );
3995  }
3996  if ( $error !== false ) {
3997  $text .= $error;
3998  }
3999  if ( $object !== false ) {
4000  $ret = [ 'object' => $object ];
4001  } else {
4002  $ret = [ 'text' => $text ];
4003  }
4004 
4005  return $ret;
4006  }
4007 
4023  public function extensionSubstitution( $params, $frame ) {
4024  static $errorStr = '<span class="error">';
4025  static $errorLen = 20;
4026 
4027  $name = $frame->expand( $params['name'] );
4028  if ( substr( $name, 0, $errorLen ) === $errorStr ) {
4029  // Probably expansion depth or node count exceeded. Just punt the
4030  // error up.
4031  return $name;
4032  }
4033 
4034  $attrText = !isset( $params['attr'] ) ? null : $frame->expand( $params['attr'] );
4035  if ( substr( $attrText, 0, $errorLen ) === $errorStr ) {
4036  // See above
4037  return $attrText;
4038  }
4039 
4040  // We can't safely check if the expansion for $content resulted in an
4041  // error, because the content could happen to be the error string
4042  // (T149622).
4043  $content = !isset( $params['inner'] ) ? null : $frame->expand( $params['inner'] );
4044 
4045  $marker = self::MARKER_PREFIX . "-$name-"
4046  . sprintf( '%08X', $this->mMarkerIndex++ ) . self::MARKER_SUFFIX;
4047 
4048  $isFunctionTag = isset( $this->mFunctionTagHooks[strtolower( $name )] ) &&
4049  ( $this->ot['html'] || $this->ot['pre'] );
4050  if ( $isFunctionTag ) {
4051  $markerType = 'none';
4052  } else {
4053  $markerType = 'general';
4054  }
4055  if ( $this->ot['html'] || $isFunctionTag ) {
4056  $name = strtolower( $name );
4057  $attributes = Sanitizer::decodeTagAttributes( $attrText );
4058  if ( isset( $params['attributes'] ) ) {
4059  $attributes += $params['attributes'];
4060  }
4061 
4062  if ( isset( $this->mTagHooks[$name] ) ) {
4063  $output = call_user_func_array( $this->mTagHooks[$name],
4064  [ $content, $attributes, $this, $frame ] );
4065  } elseif ( isset( $this->mFunctionTagHooks[$name] ) ) {
4066  list( $callback, ) = $this->mFunctionTagHooks[$name];
4067 
4068  // Avoid PHP 7.1 warning from passing $this by reference
4069  $parser = $this;
4070  $output = call_user_func_array( $callback, [ &$parser, $frame, $content, $attributes ] );
4071  } else {
4072  $output = '<span class="error">Invalid tag extension name: ' .
4073  htmlspecialchars( $name ) . '</span>';
4074  }
4075 
4076  if ( is_array( $output ) ) {
4077  // Extract flags
4078  $flags = $output;
4079  $output = $flags[0];
4080  if ( isset( $flags['markerType'] ) ) {
4081  $markerType = $flags['markerType'];
4082  }
4083  }
4084  } else {
4085  if ( is_null( $attrText ) ) {
4086  $attrText = '';
4087  }
4088  if ( isset( $params['attributes'] ) ) {
4089  foreach ( $params['attributes'] as $attrName => $attrValue ) {
4090  $attrText .= ' ' . htmlspecialchars( $attrName ) . '="' .
4091  htmlspecialchars( $attrValue ) . '"';
4092  }
4093  }
4094  if ( $content === null ) {
4095  $output = "<$name$attrText/>";
4096  } else {
4097  $close = is_null( $params['close'] ) ? '' : $frame->expand( $params['close'] );
4098  if ( substr( $close, 0, $errorLen ) === $errorStr ) {
4099  // See above
4100  return $close;
4101  }
4102  $output = "<$name$attrText>$content$close";
4103  }
4104  }
4105 
4106  if ( $markerType === 'none' ) {
4107  return $output;
4108  } elseif ( $markerType === 'nowiki' ) {
4109  $this->mStripState->addNoWiki( $marker, $output );
4110  } elseif ( $markerType === 'general' ) {
4111  $this->mStripState->addGeneral( $marker, $output );
4112  } else {
4113  throw new MWException( __METHOD__ . ': invalid marker type' );
4114  }
4115  return $marker;
4116  }
4117 
4125  public function incrementIncludeSize( $type, $size ) {
4126  if ( $this->mIncludeSizes[$type] + $size > $this->mOptions->getMaxIncludeSize() ) {
4127  return false;
4128  } else {
4129  $this->mIncludeSizes[$type] += $size;
4130  return true;
4131  }
4132  }
4133 
4140  $this->mExpensiveFunctionCount++;
4141  return $this->mExpensiveFunctionCount <= $this->mOptions->getExpensiveParserFunctionLimit();
4142  }
4143 
4152  public function doDoubleUnderscore( $text ) {
4153  # The position of __TOC__ needs to be recorded
4154  $mw = $this->magicWordFactory->get( 'toc' );
4155  if ( $mw->match( $text ) ) {
4156  $this->mShowToc = true;
4157  $this->mForceTocPosition = true;
4158 
4159  # Set a placeholder. At the end we'll fill it in with the TOC.
4160  $text = $mw->replace( '<!--MWTOC\'"-->', $text, 1 );
4161 
4162  # Only keep the first one.
4163  $text = $mw->replace( '', $text );
4164  }
4165 
4166  # Now match and remove the rest of them
4167  $mwa = $this->magicWordFactory->getDoubleUnderscoreArray();
4168  $this->mDoubleUnderscores = $mwa->matchAndRemove( $text );
4169 
4170  if ( isset( $this->mDoubleUnderscores['nogallery'] ) ) {
4171  $this->mOutput->mNoGallery = true;
4172  }
4173  if ( isset( $this->mDoubleUnderscores['notoc'] ) && !$this->mForceTocPosition ) {
4174  $this->mShowToc = false;
4175  }
4176  if ( isset( $this->mDoubleUnderscores['hiddencat'] )
4177  && $this->mTitle->getNamespace() == NS_CATEGORY
4178  ) {
4179  $this->addTrackingCategory( 'hidden-category-category' );
4180  }
4181  # (T10068) Allow control over whether robots index a page.
4182  # __INDEX__ always overrides __NOINDEX__, see T16899
4183  if ( isset( $this->mDoubleUnderscores['noindex'] ) && $this->mTitle->canUseNoindex() ) {
4184  $this->mOutput->setIndexPolicy( 'noindex' );
4185  $this->addTrackingCategory( 'noindex-category' );
4186  }
4187  if ( isset( $this->mDoubleUnderscores['index'] ) && $this->mTitle->canUseNoindex() ) {
4188  $this->mOutput->setIndexPolicy( 'index' );
4189  $this->addTrackingCategory( 'index-category' );
4190  }
4191 
4192  # Cache all double underscores in the database
4193  foreach ( $this->mDoubleUnderscores as $key => $val ) {
4194  $this->mOutput->setProperty( $key, '' );
4195  }
4196 
4197  return $text;
4198  }
4199 
4205  public function addTrackingCategory( $msg ) {
4206  return $this->mOutput->addTrackingCategory( $msg, $this->mTitle );
4207  }
4208 
4225  public function formatHeadings( $text, $origText, $isMain = true ) {
4226  # Inhibit editsection links if requested in the page
4227  if ( isset( $this->mDoubleUnderscores['noeditsection'] ) ) {
4228  $maybeShowEditLink = false;
4229  } else {
4230  $maybeShowEditLink = true; /* Actual presence will depend on post-cache transforms */
4231  }
4232 
4233  # Get all headlines for numbering them and adding funky stuff like [edit]
4234  # links - this is for later, but we need the number of headlines right now
4235  # NOTE: white space in headings have been trimmed in doHeadings. They shouldn't
4236  # be trimmed here since whitespace in HTML headings is significant.
4237  $matches = [];
4238  $numMatches = preg_match_all(
4239  '/<H(?P<level>[1-6])(?P<attrib>.*?>)(?P<header>[\s\S]*?)<\/H[1-6] *>/i',
4240  $text,
4241  $matches
4242  );
4243 
4244  # if there are fewer than 4 headlines in the article, do not show TOC
4245  # unless it's been explicitly enabled.
4246  $enoughToc = $this->mShowToc &&
4247  ( ( $numMatches >= 4 ) || $this->mForceTocPosition );
4248 
4249  # Allow user to stipulate that a page should have a "new section"
4250  # link added via __NEWSECTIONLINK__
4251  if ( isset( $this->mDoubleUnderscores['newsectionlink'] ) ) {
4252  $this->mOutput->setNewSection( true );
4253  }
4254 
4255  # Allow user to remove the "new section"
4256  # link via __NONEWSECTIONLINK__
4257  if ( isset( $this->mDoubleUnderscores['nonewsectionlink'] ) ) {
4258  $this->mOutput->hideNewSection( true );
4259  }
4260 
4261  # if the string __FORCETOC__ (not case-sensitive) occurs in the HTML,
4262  # override above conditions and always show TOC above first header
4263  if ( isset( $this->mDoubleUnderscores['forcetoc'] ) ) {
4264  $this->mShowToc = true;
4265  $enoughToc = true;
4266  }
4267 
4268  # headline counter
4269  $headlineCount = 0;
4270  $numVisible = 0;
4271 
4272  # Ugh .. the TOC should have neat indentation levels which can be
4273  # passed to the skin functions. These are determined here
4274  $toc = '';
4275  $full = '';
4276  $head = [];
4277  $sublevelCount = [];
4278  $levelCount = [];
4279  $level = 0;
4280  $prevlevel = 0;
4281  $toclevel = 0;
4282  $prevtoclevel = 0;
4283  $markerRegex = self::MARKER_PREFIX . "-h-(\d+)-" . self::MARKER_SUFFIX;
4284  $baseTitleText = $this->mTitle->getPrefixedDBkey();
4285  $oldType = $this->mOutputType;
4286  $this->setOutputType( self::OT_WIKI );
4287  $frame = $this->getPreprocessor()->newFrame();
4288  $root = $this->preprocessToDom( $origText );
4289  $node = $root->getFirstChild();
4290  $byteOffset = 0;
4291  $tocraw = [];
4292  $refers = [];
4293 
4294  $headlines = $numMatches !== false ? $matches[3] : [];
4295 
4296  $maxTocLevel = $this->svcOptions->get( 'MaxTocLevel' );
4297  foreach ( $headlines as $headline ) {
4298  $isTemplate = false;
4299  $titleText = false;
4300  $sectionIndex = false;
4301  $numbering = '';
4302  $markerMatches = [];
4303  if ( preg_match( "/^$markerRegex/", $headline, $markerMatches ) ) {
4304  $serial = $markerMatches[1];
4305  list( $titleText, $sectionIndex ) = $this->mHeadings[$serial];
4306  $isTemplate = ( $titleText != $baseTitleText );
4307  $headline = preg_replace( "/^$markerRegex\\s*/", "", $headline );
4308  }
4309 
4310  if ( $toclevel ) {
4311  $prevlevel = $level;
4312  }
4313  $level = $matches[1][$headlineCount];
4314 
4315  if ( $level > $prevlevel ) {
4316  # Increase TOC level
4317  $toclevel++;
4318  $sublevelCount[$toclevel] = 0;
4319  if ( $toclevel < $maxTocLevel ) {
4320  $prevtoclevel = $toclevel;
4321  $toc .= Linker::tocIndent();
4322  $numVisible++;
4323  }
4324  } elseif ( $level < $prevlevel && $toclevel > 1 ) {
4325  # Decrease TOC level, find level to jump to
4326 
4327  for ( $i = $toclevel; $i > 0; $i-- ) {
4328  if ( $levelCount[$i] == $level ) {
4329  # Found last matching level
4330  $toclevel = $i;
4331  break;
4332  } elseif ( $levelCount[$i] < $level ) {
4333  # Found first matching level below current level
4334  $toclevel = $i + 1;
4335  break;
4336  }
4337  }
4338  if ( $i == 0 ) {
4339  $toclevel = 1;
4340  }
4341  if ( $toclevel < $maxTocLevel ) {
4342  if ( $prevtoclevel < $maxTocLevel ) {
4343  # Unindent only if the previous toc level was shown :p
4344  $toc .= Linker::tocUnindent( $prevtoclevel - $toclevel );
4345  $prevtoclevel = $toclevel;
4346  } else {
4347  $toc .= Linker::tocLineEnd();
4348  }
4349  }
4350  } else {
4351  # No change in level, end TOC line
4352  if ( $toclevel < $maxTocLevel ) {
4353  $toc .= Linker::tocLineEnd();
4354  }
4355  }
4356 
4357  $levelCount[$toclevel] = $level;
4358 
4359  # count number of headlines for each level
4360  $sublevelCount[$toclevel]++;
4361  $dot = 0;
4362  for ( $i = 1; $i <= $toclevel; $i++ ) {
4363  if ( !empty( $sublevelCount[$i] ) ) {
4364  if ( $dot ) {
4365  $numbering .= '.';
4366  }
4367  $numbering .= $this->getTargetLanguage()->formatNum( $sublevelCount[$i] );
4368  $dot = 1;
4369  }
4370  }
4371 
4372  # The safe header is a version of the header text safe to use for links
4373 
4374  # Remove link placeholders by the link text.
4375  # <!--LINK number-->
4376  # turns into
4377  # link text with suffix
4378  # Do this before unstrip since link text can contain strip markers
4379  $safeHeadline = $this->replaceLinkHoldersText( $headline );
4380 
4381  # Avoid insertion of weird stuff like <math> by expanding the relevant sections
4382  $safeHeadline = $this->mStripState->unstripBoth( $safeHeadline );
4383 
4384  # Remove any <style> or <script> tags (T198618)
4385  $safeHeadline = preg_replace(
4386  '#<(style|script)(?: [^>]*[^>/])?>.*?</\1>#is',
4387  '',
4388  $safeHeadline
4389  );
4390 
4391  # Strip out HTML (first regex removes any tag not allowed)
4392  # Allowed tags are:
4393  # * <sup> and <sub> (T10393)
4394  # * <i> (T28375)
4395  # * <b> (r105284)
4396  # * <bdi> (T74884)
4397  # * <span dir="rtl"> and <span dir="ltr"> (T37167)
4398  # * <s> and <strike> (T35715)
4399  # We strip any parameter from accepted tags (second regex), except dir="rtl|ltr" from <span>,
4400  # to allow setting directionality in toc items.
4401  $tocline = preg_replace(
4402  [
4403  '#<(?!/?(span|sup|sub|bdi|i|b|s|strike)(?: [^>]*)?>).*?>#',
4404  '#<(/?(?:span(?: dir="(?:rtl|ltr)")?|sup|sub|bdi|i|b|s|strike))(?: .*?)?>#'
4405  ],
4406  [ '', '<$1>' ],
4407  $safeHeadline
4408  );
4409 
4410  # Strip '<span></span>', which is the result from the above if
4411  # <span id="foo"></span> is used to produce an additional anchor
4412  # for a section.
4413  $tocline = str_replace( '<span></span>', '', $tocline );
4414 
4415  $tocline = trim( $tocline );
4416 
4417  # For the anchor, strip out HTML-y stuff period
4418  $safeHeadline = preg_replace( '/<.*?>/', '', $safeHeadline );
4419  $safeHeadline = Sanitizer::normalizeSectionNameWhitespace( $safeHeadline );
4420 
4421  # Save headline for section edit hint before it's escaped
4422  $headlineHint = $safeHeadline;
4423 
4424  # Decode HTML entities
4425  $safeHeadline = Sanitizer::decodeCharReferences( $safeHeadline );
4426 
4427  $safeHeadline = self::normalizeSectionName( $safeHeadline );
4428 
4429  $fallbackHeadline = Sanitizer::escapeIdForAttribute( $safeHeadline, Sanitizer::ID_FALLBACK );
4430  $linkAnchor = Sanitizer::escapeIdForLink( $safeHeadline );
4431  $safeHeadline = Sanitizer::escapeIdForAttribute( $safeHeadline, Sanitizer::ID_PRIMARY );
4432  if ( $fallbackHeadline === $safeHeadline ) {
4433  # No reason to have both (in fact, we can't)
4434  $fallbackHeadline = false;
4435  }
4436 
4437  # HTML IDs must be case-insensitively unique for IE compatibility (T12721).
4438  # @todo FIXME: We may be changing them depending on the current locale.
4439  $arrayKey = strtolower( $safeHeadline );
4440  if ( $fallbackHeadline === false ) {
4441  $fallbackArrayKey = false;
4442  } else {
4443  $fallbackArrayKey = strtolower( $fallbackHeadline );
4444  }
4445 
4446  # Create the anchor for linking from the TOC to the section
4447  $anchor = $safeHeadline;
4448  $fallbackAnchor = $fallbackHeadline;
4449  if ( isset( $refers[$arrayKey] ) ) {
4450  // phpcs:ignore Generic.Formatting.DisallowMultipleStatements
4451  for ( $i = 2; isset( $refers["${arrayKey}_$i"] ); ++$i );
4452  $anchor .= "_$i";
4453  $linkAnchor .= "_$i";
4454  $refers["${arrayKey}_$i"] = true;
4455  } else {
4456  $refers[$arrayKey] = true;
4457  }
4458  if ( $fallbackHeadline !== false && isset( $refers[$fallbackArrayKey] ) ) {
4459  // phpcs:ignore Generic.Formatting.DisallowMultipleStatements
4460  for ( $i = 2; isset( $refers["${fallbackArrayKey}_$i"] ); ++$i );
4461  $fallbackAnchor .= "_$i";
4462  $refers["${fallbackArrayKey}_$i"] = true;
4463  } else {
4464  $refers[$fallbackArrayKey] = true;
4465  }
4466 
4467  # Don't number the heading if it is the only one (looks silly)
4468  if ( count( $matches[3] ) > 1 && $this->mOptions->getNumberHeadings() ) {
4469  # the two are different if the line contains a link
4470  $headline = Html::element(
4471  'span',
4472  [ 'class' => 'mw-headline-number' ],
4473  $numbering
4474  ) . ' ' . $headline;
4475  }
4476 
4477  if ( $enoughToc && ( !isset( $maxTocLevel ) || $toclevel < $maxTocLevel ) ) {
4478  $toc .= Linker::tocLine( $linkAnchor, $tocline,
4479  $numbering, $toclevel, ( $isTemplate ? false : $sectionIndex ) );
4480  }
4481 
4482  # Add the section to the section tree
4483  # Find the DOM node for this header
4484  $noOffset = ( $isTemplate || $sectionIndex === false );
4485  while ( $node && !$noOffset ) {
4486  if ( $node->getName() === 'h' ) {
4487  $bits = $node->splitHeading();
4488  if ( $bits['i'] == $sectionIndex ) {
4489  break;
4490  }
4491  }
4492  $byteOffset += mb_strlen( $this->mStripState->unstripBoth(
4493  $frame->expand( $node, PPFrame::RECOVER_ORIG ) ) );
4494  $node = $node->getNextSibling();
4495  }
4496  $tocraw[] = [
4497  'toclevel' => $toclevel,
4498  'level' => $level,
4499  'line' => $tocline,
4500  'number' => $numbering,
4501  'index' => ( $isTemplate ? 'T-' : '' ) . $sectionIndex,
4502  'fromtitle' => $titleText,
4503  'byteoffset' => ( $noOffset ? null : $byteOffset ),
4504  'anchor' => $anchor,
4505  ];
4506 
4507  # give headline the correct <h#> tag
4508  if ( $maybeShowEditLink && $sectionIndex !== false ) {
4509  // Output edit section links as markers with styles that can be customized by skins
4510  if ( $isTemplate ) {
4511  # Put a T flag in the section identifier, to indicate to extractSections()
4512  # that sections inside <includeonly> should be counted.
4513  $editsectionPage = $titleText;
4514  $editsectionSection = "T-$sectionIndex";
4515  $editsectionContent = null;
4516  } else {
4517  $editsectionPage = $this->mTitle->getPrefixedText();
4518  $editsectionSection = $sectionIndex;
4519  $editsectionContent = $headlineHint;
4520  }
4521  // We use a bit of pesudo-xml for editsection markers. The
4522  // language converter is run later on. Using a UNIQ style marker
4523  // leads to the converter screwing up the tokens when it
4524  // converts stuff. And trying to insert strip tags fails too. At
4525  // this point all real inputted tags have already been escaped,
4526  // so we don't have to worry about a user trying to input one of
4527  // these markers directly. We use a page and section attribute
4528  // to stop the language converter from converting these
4529  // important bits of data, but put the headline hint inside a
4530  // content block because the language converter is supposed to
4531  // be able to convert that piece of data.
4532  // Gets replaced with html in ParserOutput::getText
4533  $editlink = '<mw:editsection page="' . htmlspecialchars( $editsectionPage );
4534  $editlink .= '" section="' . htmlspecialchars( $editsectionSection ) . '"';
4535  if ( $editsectionContent !== null ) {
4536  $editlink .= '>' . $editsectionContent . '</mw:editsection>';
4537  } else {
4538  $editlink .= '/>';
4539  }
4540  } else {
4541  $editlink = '';
4542  }
4543  $head[$headlineCount] = Linker::makeHeadline( $level,
4544  $matches['attrib'][$headlineCount], $anchor, $headline,
4545  $editlink, $fallbackAnchor );
4546 
4547  $headlineCount++;
4548  }
4549 
4550  $this->setOutputType( $oldType );
4551 
4552  # Never ever show TOC if no headers
4553  if ( $numVisible < 1 ) {
4554  $enoughToc = false;
4555  }
4556 
4557  if ( $enoughToc ) {
4558  if ( $prevtoclevel > 0 && $prevtoclevel < $maxTocLevel ) {
4559  $toc .= Linker::tocUnindent( $prevtoclevel - 1 );
4560  }
4561  $toc = Linker::tocList( $toc, $this->mOptions->getUserLangObj() );
4562  $this->mOutput->setTOCHTML( $toc );
4563  $toc = self::TOC_START . $toc . self::TOC_END;
4564  }
4565 
4566  if ( $isMain ) {
4567  $this->mOutput->setSections( $tocraw );
4568  }
4569 
4570  # split up and insert constructed headlines
4571  $blocks = preg_split( '/<H[1-6].*?>[\s\S]*?<\/H[1-6]>/i', $text );
4572  $i = 0;
4573 
4574  // build an array of document sections
4575  $sections = [];
4576  foreach ( $blocks as $block ) {
4577  // $head is zero-based, sections aren't.
4578  if ( empty( $head[$i - 1] ) ) {
4579  $sections[$i] = $block;
4580  } else {
4581  $sections[$i] = $head[$i - 1] . $block;
4582  }
4583 
4594  Hooks::run( 'ParserSectionCreate', [ $this, $i, &$sections[$i], $maybeShowEditLink ] );
4595 
4596  $i++;
4597  }
4598 
4599  if ( $enoughToc && $isMain && !$this->mForceTocPosition ) {
4600  // append the TOC at the beginning
4601  // Top anchor now in skin
4602  $sections[0] .= $toc . "\n";
4603  }
4604 
4605  $full .= implode( '', $sections );
4606 
4607  if ( $this->mForceTocPosition ) {
4608  return str_replace( '<!--MWTOC\'"-->', $toc, $full );
4609  } else {
4610  return $full;
4611  }
4612  }
4613 
4625  public function preSaveTransform( $text, Title $title, User $user,
4626  ParserOptions $options, $clearState = true
4627  ) {
4628  if ( $clearState ) {
4629  $magicScopeVariable = $this->lock();
4630  }
4631  $this->startParse( $title, $options, self::OT_WIKI, $clearState );
4632  $this->setUser( $user );
4633 
4634  // Strip U+0000 NULL (T159174)
4635  $text = str_replace( "\000", '', $text );
4636 
4637  // We still normalize line endings for backwards-compatibility
4638  // with other code that just calls PST, but this should already
4639  // be handled in TextContent subclasses
4640  $text = TextContent::normalizeLineEndings( $text );
4641 
4642  if ( $options->getPreSaveTransform() ) {
4643  $text = $this->pstPass2( $text, $user );
4644  }
4645  $text = $this->mStripState->unstripBoth( $text );
4646 
4647  $this->setUser( null ); # Reset
4648 
4649  return $text;
4650  }
4651 
4660  private function pstPass2( $text, $user ) {
4661  # Note: This is the timestamp saved as hardcoded wikitext to the database, we use
4662  # $this->contLang here in order to give everyone the same signature and use the default one
4663  # rather than the one selected in each user's preferences. (see also T14815)
4664  $ts = $this->mOptions->getTimestamp();
4665  $timestamp = MWTimestamp::getLocalInstance( $ts );
4666  $ts = $timestamp->format( 'YmdHis' );
4667  $tzMsg = $timestamp->getTimezoneMessage()->inContentLanguage()->text();
4668 
4669  $d = $this->contLang->timeanddate( $ts, false, false ) . " ($tzMsg)";
4670 
4671  # Variable replacement
4672  # Because mOutputType is OT_WIKI, this will only process {{subst:xxx}} type tags
4673  $text = $this->replaceVariables( $text );
4674 
4675  # This works almost by chance, as the replaceVariables are done before the getUserSig(),
4676  # which may corrupt this parser instance via its wfMessage()->text() call-
4677 
4678  # Signatures
4679  if ( strpos( $text, '~~~' ) !== false ) {
4680  $sigText = $this->getUserSig( $user );
4681  $text = strtr( $text, [
4682  '~~~~~' => $d,
4683  '~~~~' => "$sigText $d",
4684  '~~~' => $sigText
4685  ] );
4686  # The main two signature forms used above are time-sensitive
4687  $this->mOutput->setFlag( 'user-signature' );
4688  }
4689 
4690  # Context links ("pipe tricks"): [[|name]] and [[name (context)|]]
4691  $tc = '[' . Title::legalChars() . ']';
4692  $nc = '[ _0-9A-Za-z\x80-\xff-]'; # Namespaces can use non-ascii!
4693 
4694  // [[ns:page (context)|]]
4695  $p1 = "/\[\[(:?$nc+:|:|)($tc+?)( ?\\($tc+\\))\\|]]/";
4696  // [[ns:page(context)|]] (double-width brackets, added in r40257)
4697  $p4 = "/\[\[(:?$nc+:|:|)($tc+?)( ?($tc+))\\|]]/";
4698  // [[ns:page (context), context|]] (using either single or double-width comma)
4699  $p3 = "/\[\[(:?$nc+:|:|)($tc+?)( ?\\($tc+\\)|)((?:, |,)$tc+|)\\|]]/";
4700  // [[|page]] (reverse pipe trick: add context from page title)
4701  $p2 = "/\[\[\\|($tc+)]]/";
4702 
4703  # try $p1 first, to turn "[[A, B (C)|]]" into "[[A, B (C)|A, B]]"
4704  $text = preg_replace( $p1, '[[\\1\\2\\3|\\2]]', $text );
4705  $text = preg_replace( $p4, '[[\\1\\2\\3|\\2]]', $text );
4706  $text = preg_replace( $p3, '[[\\1\\2\\3\\4|\\2]]', $text );
4707 
4708  $t = $this->mTitle->getText();
4709  $m = [];
4710  if ( preg_match( "/^($nc+:|)$tc+?( \\($tc+\\))$/", $t, $m ) ) {
4711  $text = preg_replace( $p2, "[[$m[1]\\1$m[2]|\\1]]", $text );
4712  } elseif ( preg_match( "/^($nc+:|)$tc+?(, $tc+|)$/", $t, $m ) && "$m[1]$m[2]" != '' ) {
4713  $text = preg_replace( $p2, "[[$m[1]\\1$m[2]|\\1]]", $text );
4714  } else {
4715  # if there's no context, don't bother duplicating the title
4716  $text = preg_replace( $p2, '[[\\1]]', $text );
4717  }
4718 
4719  return $text;
4720  }
4721 
4736  public function getUserSig( &$user, $nickname = false, $fancySig = null ) {
4737  $username = $user->getName();
4738 
4739  # If not given, retrieve from the user object.
4740  if ( $nickname === false ) {
4741  $nickname = $user->getOption( 'nickname' );
4742  }
4743 
4744  if ( is_null( $fancySig ) ) {
4745  $fancySig = $user->getBoolOption( 'fancysig' );
4746  }
4747 
4748  $nickname = $nickname == null ? $username : $nickname;
4749 
4750  if ( mb_strlen( $nickname ) > $this->svcOptions->get( 'MaxSigChars' ) ) {
4751  $nickname = $username;
4752  wfDebug( __METHOD__ . ": $username has overlong signature.\n" );
4753  } elseif ( $fancySig !== false ) {
4754  # Sig. might contain markup; validate this
4755  if ( $this->validateSig( $nickname ) !== false ) {
4756  # Validated; clean up (if needed) and return it
4757  return $this->cleanSig( $nickname, true );
4758  } else {
4759  # Failed to validate; fall back to the default
4760  $nickname = $username;
4761  wfDebug( __METHOD__ . ": $username has bad XML tags in signature.\n" );
4762  }
4763  }
4764 
4765  # Make sure nickname doesnt get a sig in a sig
4766  $nickname = self::cleanSigInSig( $nickname );
4767 
4768  # If we're still here, make it a link to the user page
4769  $userText = wfEscapeWikiText( $username );
4770  $nickText = wfEscapeWikiText( $nickname );
4771  $msgName = $user->isAnon() ? 'signature-anon' : 'signature';
4772 
4773  return wfMessage( $msgName, $userText, $nickText )->inContentLanguage()
4774  ->title( $this->getTitle() )->text();
4775  }
4776 
4783  public function validateSig( $text ) {
4784  return Xml::isWellFormedXmlFragment( $text ) ? $text : false;
4785  }
4786 
4797  public function cleanSig( $text, $parsing = false ) {
4798  if ( !$parsing ) {
4799  global $wgTitle;
4800  $magicScopeVariable = $this->lock();
4801  $this->startParse( $wgTitle, new ParserOptions, self::OT_PREPROCESS, true );
4802  }
4803 
4804  # Option to disable this feature
4805  if ( !$this->mOptions->getCleanSignatures() ) {
4806  return $text;
4807  }
4808 
4809  # @todo FIXME: Regex doesn't respect extension tags or nowiki
4810  # => Move this logic to braceSubstitution()
4811  $substWord = $this->magicWordFactory->get( 'subst' );
4812  $substRegex = '/\{\{(?!(?:' . $substWord->getBaseRegex() . '))/x' . $substWord->getRegexCase();
4813  $substText = '{{' . $substWord->getSynonym( 0 );
4814 
4815  $text = preg_replace( $substRegex, $substText, $text );
4816  $text = self::cleanSigInSig( $text );
4817  $dom = $this->preprocessToDom( $text );
4818  $frame = $this->getPreprocessor()->newFrame();
4819  $text = $frame->expand( $dom );
4820 
4821  if ( !$parsing ) {
4822  $text = $this->mStripState->unstripBoth( $text );
4823  }
4824 
4825  return $text;
4826  }
4827 
4834  public static function cleanSigInSig( $text ) {
4835  $text = preg_replace( '/~{3,5}/', '', $text );
4836  return $text;
4837  }
4838 
4849  $outputType, $clearState = true
4850  ) {
4851  $this->startParse( $title, $options, $outputType, $clearState );
4852  }
4853 
4861  $outputType, $clearState = true
4862  ) {
4863  $this->setTitle( $title );
4864  $this->mOptions = $options;
4865  $this->setOutputType( $outputType );
4866  if ( $clearState ) {
4867  $this->clearState();
4868  }
4869  }
4870 
4879  public function transformMsg( $text, $options, $title = null ) {
4880  static $executing = false;
4881 
4882  # Guard against infinite recursion
4883  if ( $executing ) {
4884  return $text;
4885  }
4886  $executing = true;
4887 
4888  if ( !$title ) {
4889  global $wgTitle;
4890  $title = $wgTitle;
4891  }
4892 
4893  $text = $this->preprocess( $text, $title, $options );
4894 
4895  $executing = false;
4896  return $text;
4897  }
4898 
4923  public function setHook( $tag, callable $callback ) {
4924  $tag = strtolower( $tag );
4925  if ( preg_match( '/[<>\r\n]/', $tag, $m ) ) {
4926  throw new MWException( "Invalid character {$m[0]} in setHook('$tag', ...) call" );
4927  }
4928  $oldVal = $this->mTagHooks[$tag] ?? null;
4929  $this->mTagHooks[$tag] = $callback;
4930  if ( !in_array( $tag, $this->mStripList ) ) {
4931  $this->mStripList[] = $tag;
4932  }
4933 
4934  return $oldVal;
4935  }
4936 
4954  public function setTransparentTagHook( $tag, callable $callback ) {
4955  $tag = strtolower( $tag );
4956  if ( preg_match( '/[<>\r\n]/', $tag, $m ) ) {
4957  throw new MWException( "Invalid character {$m[0]} in setTransparentHook('$tag', ...) call" );
4958  }
4959  $oldVal = $this->mTransparentTagHooks[$tag] ?? null;
4960  $this->mTransparentTagHooks[$tag] = $callback;
4961 
4962  return $oldVal;
4963  }
4964 
4968  public function clearTagHooks() {
4969  $this->mTagHooks = [];
4970  $this->mFunctionTagHooks = [];
4971  $this->mStripList = $this->mDefaultStripList;
4972  }
4973 
5017  public function setFunctionHook( $id, callable $callback, $flags = 0 ) {
5018  $oldVal = isset( $this->mFunctionHooks[$id] ) ? $this->mFunctionHooks[$id][0] : null;
5019  $this->mFunctionHooks[$id] = [ $callback, $flags ];
5020 
5021  # Add to function cache
5022  $mw = $this->magicWordFactory->get( $id );
5023  if ( !$mw ) {
5024  throw new MWException( __METHOD__ . '() expecting a magic word identifier.' );
5025  }
5026 
5027  $synonyms = $mw->getSynonyms();
5028  $sensitive = intval( $mw->isCaseSensitive() );
5029 
5030  foreach ( $synonyms as $syn ) {
5031  # Case
5032  if ( !$sensitive ) {
5033  $syn = $this->contLang->lc( $syn );
5034  }
5035  # Add leading hash
5036  if ( !( $flags & self::SFH_NO_HASH ) ) {
5037  $syn = '#' . $syn;
5038  }
5039  # Remove trailing colon
5040  if ( substr( $syn, -1, 1 ) === ':' ) {
5041  $syn = substr( $syn, 0, -1 );
5042  }
5043  $this->mFunctionSynonyms[$sensitive][$syn] = $id;
5044  }
5045  return $oldVal;
5046  }
5047 
5053  public function getFunctionHooks() {
5054  $this->firstCallInit();
5055  return array_keys( $this->mFunctionHooks );
5056  }
5057 
5068  public function setFunctionTagHook( $tag, callable $callback, $flags ) {
5069  $tag = strtolower( $tag );
5070  if ( preg_match( '/[<>\r\n]/', $tag, $m ) ) {
5071  throw new MWException( "Invalid character {$m[0]} in setFunctionTagHook('$tag', ...) call" );
5072  }
5073  $old = $this->mFunctionTagHooks[$tag] ?? null;
5074  $this->mFunctionTagHooks[$tag] = [ $callback, $flags ];
5075 
5076  if ( !in_array( $tag, $this->mStripList ) ) {
5077  $this->mStripList[] = $tag;
5078  }
5079 
5080  return $old;
5081  }
5082 
5090  public function replaceLinkHolders( &$text, $options = 0 ) {
5091  $this->mLinkHolders->replace( $text );
5092  }
5093 
5101  public function replaceLinkHoldersText( $text ) {
5102  return $this->mLinkHolders->replaceText( $text );
5103  }
5104 
5118  public function renderImageGallery( $text, $params ) {
5119  $mode = false;
5120  if ( isset( $params['mode'] ) ) {
5121  $mode = $params['mode'];
5122  }
5123 
5124  try {
5125  $ig = ImageGalleryBase::factory( $mode );
5126  } catch ( Exception $e ) {
5127  // If invalid type set, fallback to default.
5128  $ig = ImageGalleryBase::factory( false );
5129  }
5130 
5131  $ig->setContextTitle( $this->mTitle );
5132  $ig->setShowBytes( false );
5133  $ig->setShowDimensions( false );
5134  $ig->setShowFilename( false );
5135  $ig->setParser( $this );
5136  $ig->setHideBadImages();
5137  $ig->setAttributes( Sanitizer::validateTagAttributes( $params, 'ul' ) );
5138 
5139  if ( isset( $params['showfilename'] ) ) {
5140  $ig->setShowFilename( true );
5141  } else {
5142  $ig->setShowFilename( false );
5143  }
5144  if ( isset( $params['caption'] ) ) {
5145  // NOTE: We aren't passing a frame here or below. Frame info
5146  // is currently opaque to Parsoid, which acts on OT_PREPROCESS.
5147  // See T107332#4030581
5148  $caption = $this->recursiveTagParse( $params['caption'] );
5149  $ig->setCaptionHtml( $caption );
5150  }
5151  if ( isset( $params['perrow'] ) ) {
5152  $ig->setPerRow( $params['perrow'] );
5153  }
5154  if ( isset( $params['widths'] ) ) {
5155  $ig->setWidths( $params['widths'] );
5156  }
5157  if ( isset( $params['heights'] ) ) {
5158  $ig->setHeights( $params['heights'] );
5159  }
5160  $ig->setAdditionalOptions( $params );
5161 
5162  // Avoid PHP 7.1 warning from passing $this by reference
5163  $parser = $this;
5164  Hooks::run( 'BeforeParserrenderImageGallery', [ &$parser, &$ig ] );
5165 
5166  $lines = StringUtils::explode( "\n", $text );
5167  foreach ( $lines as $line ) {
5168  # match lines like these:
5169  # Image:someimage.jpg|This is some image
5170  $matches = [];
5171  preg_match( "/^([^|]+)(\\|(.*))?$/", $line, $matches );
5172  # Skip empty lines
5173  if ( count( $matches ) == 0 ) {
5174  continue;
5175  }
5176 
5177  if ( strpos( $matches[0], '%' ) !== false ) {
5178  $matches[1] = rawurldecode( $matches[1] );
5179  }
5181  if ( is_null( $title ) ) {
5182  # Bogus title. Ignore these so we don't bomb out later.
5183  continue;
5184  }
5185 
5186  # We need to get what handler the file uses, to figure out parameters.
5187  # Note, a hook can overide the file name, and chose an entirely different
5188  # file (which potentially could be of a different type and have different handler).
5189  $options = [];
5190  $descQuery = false;
5191  Hooks::run( 'BeforeParserFetchFileAndTitle',
5192  [ $this, $title, &$options, &$descQuery ] );
5193  # Don't register it now, as TraditionalImageGallery does that later.
5195  $handler = $file ? $file->getHandler() : false;
5196 
5197  $paramMap = [
5198  'img_alt' => 'gallery-internal-alt',
5199  'img_link' => 'gallery-internal-link',
5200  ];
5201  if ( $handler ) {
5202  $paramMap += $handler->getParamMap();
5203  // We don't want people to specify per-image widths.
5204  // Additionally the width parameter would need special casing anyhow.
5205  unset( $paramMap['img_width'] );
5206  }
5207 
5208  $mwArray = $this->magicWordFactory->newArray( array_keys( $paramMap ) );
5209 
5210  $label = '';
5211  $alt = '';
5212  $link = '';
5213  $handlerOptions = [];
5214  if ( isset( $matches[3] ) ) {
5215  // look for an |alt= definition while trying not to break existing
5216  // captions with multiple pipes (|) in it, until a more sensible grammar
5217  // is defined for images in galleries
5218 
5219  // FIXME: Doing recursiveTagParse at this stage, and the trim before
5220  // splitting on '|' is a bit odd, and different from makeImage.
5221  $matches[3] = $this->recursiveTagParse( trim( $matches[3] ) );
5222  // Protect LanguageConverter markup
5223  $parameterMatches = StringUtils::delimiterExplode(
5224  '-{', '}-', '|', $matches[3], true /* nested */
5225  );
5226 
5227  foreach ( $parameterMatches as $parameterMatch ) {
5228  list( $magicName, $match ) = $mwArray->matchVariableStartToEnd( $parameterMatch );
5229  if ( $magicName ) {
5230  $paramName = $paramMap[$magicName];
5231 
5232  switch ( $paramName ) {
5233  case 'gallery-internal-alt':
5234  $alt = $this->stripAltText( $match, false );
5235  break;
5236  case 'gallery-internal-link':
5237  $linkValue = $this->stripAltText( $match, false );
5238  if ( preg_match( '/^-{R|(.*)}-$/', $linkValue ) ) {
5239  // Result of LanguageConverter::markNoConversion
5240  // invoked on an external link.
5241  $linkValue = substr( $linkValue, 4, -2 );
5242  }
5243  list( $type, $target ) = $this->parseLinkParameter( $linkValue );
5244  if ( $type === 'link-url' ) {
5245  $link = $target;
5246  $this->mOutput->addExternalLink( $target );
5247  } elseif ( $type === 'link-title' ) {
5248  $link = $target->getLinkURL();
5249  $this->mOutput->addLink( $target );
5250  }
5251  break;
5252  default:
5253  // Must be a handler specific parameter.
5254  if ( $handler->validateParam( $paramName, $match ) ) {
5255  $handlerOptions[$paramName] = $match;
5256  } else {
5257  // Guess not, consider it as caption.
5258  wfDebug( "$parameterMatch failed parameter validation\n" );
5259  $label = $parameterMatch;
5260  }
5261  }
5262 
5263  } else {
5264  // Last pipe wins.
5265  $label = $parameterMatch;
5266  }
5267  }
5268  }
5269 
5270  $ig->add( $title, $label, $alt, $link, $handlerOptions );
5271  }
5272  $html = $ig->toHTML();
5273  Hooks::run( 'AfterParserFetchFileAndTitle', [ $this, $ig, &$html ] );
5274  return $html;
5275  }
5276 
5281  public function getImageParams( $handler ) {
5282  if ( $handler ) {
5283  $handlerClass = get_class( $handler );
5284  } else {
5285  $handlerClass = '';
5286  }
5287  if ( !isset( $this->mImageParams[$handlerClass] ) ) {
5288  # Initialise static lists
5289  static $internalParamNames = [
5290  'horizAlign' => [ 'left', 'right', 'center', 'none' ],
5291  'vertAlign' => [ 'baseline', 'sub', 'super', 'top', 'text-top', 'middle',
5292  'bottom', 'text-bottom' ],
5293  'frame' => [ 'thumbnail', 'manualthumb', 'framed', 'frameless',
5294  'upright', 'border', 'link', 'alt', 'class' ],
5295  ];
5296  static $internalParamMap;
5297  if ( !$internalParamMap ) {
5298  $internalParamMap = [];
5299  foreach ( $internalParamNames as $type => $names ) {
5300  foreach ( $names as $name ) {
5301  // For grep: img_left, img_right, img_center, img_none,
5302  // img_baseline, img_sub, img_super, img_top, img_text_top, img_middle,
5303  // img_bottom, img_text_bottom,
5304  // img_thumbnail, img_manualthumb, img_framed, img_frameless, img_upright,
5305  // img_border, img_link, img_alt, img_class
5306  $magicName = str_replace( '-', '_', "img_$name" );
5307  $internalParamMap[$magicName] = [ $type, $name ];
5308  }
5309  }
5310  }
5311 
5312  # Add handler params
5313  $paramMap = $internalParamMap;
5314  if ( $handler ) {
5315  $handlerParamMap = $handler->getParamMap();
5316  foreach ( $handlerParamMap as $magic => $paramName ) {
5317  $paramMap[$magic] = [ 'handler', $paramName ];
5318  }
5319  }
5320  $this->mImageParams[$handlerClass] = $paramMap;
5321  $this->mImageParamsMagicArray[$handlerClass] =
5322  $this->magicWordFactory->newArray( array_keys( $paramMap ) );
5323  }
5324  return [ $this->mImageParams[$handlerClass], $this->mImageParamsMagicArray[$handlerClass] ];
5325  }
5326 
5335  public function makeImage( $title, $options, $holders = false ) {
5336  # Check if the options text is of the form "options|alt text"
5337  # Options are:
5338  # * thumbnail make a thumbnail with enlarge-icon and caption, alignment depends on lang
5339  # * left no resizing, just left align. label is used for alt= only
5340  # * right same, but right aligned
5341  # * none same, but not aligned
5342  # * ___px scale to ___ pixels width, no aligning. e.g. use in taxobox
5343  # * center center the image
5344  # * frame Keep original image size, no magnify-button.
5345  # * framed Same as "frame"
5346  # * frameless like 'thumb' but without a frame. Keeps user preferences for width
5347  # * upright reduce width for upright images, rounded to full __0 px
5348  # * border draw a 1px border around the image
5349  # * alt Text for HTML alt attribute (defaults to empty)
5350  # * class Set a class for img node
5351  # * link Set the target of the image link. Can be external, interwiki, or local
5352  # vertical-align values (no % or length right now):
5353  # * baseline
5354  # * sub
5355  # * super
5356  # * top
5357  # * text-top
5358  # * middle
5359  # * bottom
5360  # * text-bottom
5361 
5362  # Protect LanguageConverter markup when splitting into parts
5364  '-{', '}-', '|', $options, true /* allow nesting */
5365  );
5366 
5367  # Give extensions a chance to select the file revision for us
5368  $options = [];
5369  $descQuery = false;
5370  Hooks::run( 'BeforeParserFetchFileAndTitle',
5371  [ $this, $title, &$options, &$descQuery ] );
5372  # Fetch and register the file (file title may be different via hooks)
5373  list( $file, $title ) = $this->fetchFileAndTitle( $title, $options );
5374 
5375  # Get parameter map
5376  $handler = $file ? $file->getHandler() : false;
5377 
5378  list( $paramMap, $mwArray ) = $this->getImageParams( $handler );
5379 
5380  if ( !$file ) {
5381  $this->addTrackingCategory( 'broken-file-category' );
5382  }
5383 
5384  # Process the input parameters
5385  $caption = '';
5386  $params = [ 'frame' => [], 'handler' => [],
5387  'horizAlign' => [], 'vertAlign' => [] ];
5388  $seenformat = false;
5389  foreach ( $parts as $part ) {
5390  $part = trim( $part );
5391  list( $magicName, $value ) = $mwArray->matchVariableStartToEnd( $part );
5392  $validated = false;
5393  if ( isset( $paramMap[$magicName] ) ) {
5394  list( $type, $paramName ) = $paramMap[$magicName];
5395 
5396  # Special case; width and height come in one variable together
5397  if ( $type === 'handler' && $paramName === 'width' ) {
5398  $parsedWidthParam = self::parseWidthParam( $value );
5399  if ( isset( $parsedWidthParam['width'] ) ) {
5400  $width = $parsedWidthParam['width'];
5401  if ( $handler->validateParam( 'width', $width ) ) {
5402  $params[$type]['width'] = $width;
5403  $validated = true;
5404  }
5405  }
5406  if ( isset( $parsedWidthParam['height'] ) ) {
5407  $height = $parsedWidthParam['height'];
5408  if ( $handler->validateParam( 'height', $height ) ) {
5409  $params[$type]['height'] = $height;
5410  $validated = true;
5411  }
5412  }
5413  # else no validation -- T15436
5414  } else {
5415  if ( $type === 'handler' ) {
5416  # Validate handler parameter
5417  $validated = $handler->validateParam( $paramName, $value );
5418  } else {
5419  # Validate internal parameters
5420  switch ( $paramName ) {
5421  case 'manualthumb':
5422  case 'alt':
5423  case 'class':
5424  # @todo FIXME: Possibly check validity here for
5425  # manualthumb? downstream behavior seems odd with
5426  # missing manual thumbs.
5427  $validated = true;
5428  $value = $this->stripAltText( $value, $holders );
5429  break;
5430  case 'link':
5431  list( $paramName, $value ) =
5432  $this->parseLinkParameter(
5433  $this->stripAltText( $value, $holders )
5434  );
5435  if ( $paramName ) {
5436  $validated = true;
5437  if ( $paramName === 'no-link' ) {
5438  $value = true;
5439  }
5440  if ( ( $paramName === 'link-url' ) && $this->mOptions->getExternalLinkTarget() ) {
5441  $params[$type]['link-target'] = $this->mOptions->getExternalLinkTarget();
5442  }
5443  }
5444  break;
5445  case 'frameless':
5446  case 'framed':
5447  case 'thumbnail':
5448  // use first appearing option, discard others.
5449  $validated = !$seenformat;
5450  $seenformat = true;
5451  break;
5452  default:
5453  # Most other things appear to be empty or numeric...
5454  $validated = ( $value === false || is_numeric( trim( $value ) ) );
5455  }
5456  }
5457 
5458  if ( $validated ) {
5459  $params[$type][$paramName] = $value;
5460  }
5461  }
5462  }
5463  if ( !$validated ) {
5464  $caption = $part;
5465  }
5466  }
5467 
5468  # Process alignment parameters
5469  if ( $params['horizAlign'] ) {
5470  $params['frame']['align'] = key( $params['horizAlign'] );
5471  }
5472  if ( $params['vertAlign'] ) {
5473  $params['frame']['valign'] = key( $params['vertAlign'] );
5474  }
5475 
5476  $params['frame']['caption'] = $caption;
5477 
5478  # Will the image be presented in a frame, with the caption below?
5479  $imageIsFramed = isset( $params['frame']['frame'] )
5480  || isset( $params['frame']['framed'] )
5481  || isset( $params['frame']['thumbnail'] )
5482  || isset( $params['frame']['manualthumb'] );
5483 
5484  # In the old days, [[Image:Foo|text...]] would set alt text. Later it
5485  # came to also set the caption, ordinary text after the image -- which
5486  # makes no sense, because that just repeats the text multiple times in
5487  # screen readers. It *also* came to set the title attribute.
5488  # Now that we have an alt attribute, we should not set the alt text to
5489  # equal the caption: that's worse than useless, it just repeats the
5490  # text. This is the framed/thumbnail case. If there's no caption, we
5491  # use the unnamed parameter for alt text as well, just for the time be-
5492  # ing, if the unnamed param is set and the alt param is not.
5493  # For the future, we need to figure out if we want to tweak this more,
5494  # e.g., introducing a title= parameter for the title; ignoring the un-
5495  # named parameter entirely for images without a caption; adding an ex-
5496  # plicit caption= parameter and preserving the old magic unnamed para-
5497  # meter for BC; ...
5498  if ( $imageIsFramed ) { # Framed image
5499  if ( $caption === '' && !isset( $params['frame']['alt'] ) ) {
5500  # No caption or alt text, add the filename as the alt text so
5501  # that screen readers at least get some description of the image
5502  $params['frame']['alt'] = $title->getText();
5503  }
5504  # Do not set $params['frame']['title'] because tooltips don't make sense
5505  # for framed images
5506  } else { # Inline image
5507  if ( !isset( $params['frame']['alt'] ) ) {
5508  # No alt text, use the "caption" for the alt text
5509  if ( $caption !== '' ) {
5510  $params['frame']['alt'] = $this->stripAltText( $caption, $holders );
5511  } else {
5512  # No caption, fall back to using the filename for the
5513  # alt text
5514  $params['frame']['alt'] = $title->getText();
5515  }
5516  }
5517  # Use the "caption" for the tooltip text
5518  $params['frame']['title'] = $this->stripAltText( $caption, $holders );
5519  }
5520  $params['handler']['targetlang'] = $this->getTargetLanguage()->getCode();
5521 
5522  Hooks::run( 'ParserMakeImageParams', [ $title, $file, &$params, $this ] );
5523 
5524  # Linker does the rest
5525  $time = $options['time'] ?? false;
5526  $ret = Linker::makeImageLink( $this, $title, $file, $params['frame'], $params['handler'],
5527  $time, $descQuery, $this->mOptions->getThumbSize() );
5528 
5529  # Give the handler a chance to modify the parser object
5530  if ( $handler ) {
5531  $handler->parserTransformHook( $this, $file );
5532  }
5533 
5534  return $ret;
5535  }
5536 
5555  public function parseLinkParameter( $value ) {
5556  $chars = self::EXT_LINK_URL_CLASS;
5557  $addr = self::EXT_LINK_ADDR;
5558  $prots = $this->mUrlProtocols;
5559  $type = null;
5560  $target = false;
5561  if ( $value === '' ) {
5562  $type = 'no-link';
5563  } elseif ( preg_match( "/^((?i)$prots)/", $value ) ) {
5564  if ( preg_match( "/^((?i)$prots)$addr$chars*$/u", $value, $m ) ) {
5565  $this->mOutput->addExternalLink( $value );
5566  $type = 'link-url';
5567  $target = $value;
5568  }
5569  } else {
5570  $linkTitle = Title::newFromText( $value );
5571  if ( $linkTitle ) {
5572  $this->mOutput->addLink( $linkTitle );
5573  $type = 'link-title';
5574  $target = $linkTitle;
5575  }
5576  }
5577  return [ $type, $target ];
5578  }
5579 
5585  protected function stripAltText( $caption, $holders ) {
5586  # Strip bad stuff out of the title (tooltip). We can't just use
5587  # replaceLinkHoldersText() here, because if this function is called
5588  # from replaceInternalLinks2(), mLinkHolders won't be up-to-date.
5589  if ( $holders ) {
5590  $tooltip = $holders->replaceText( $caption );
5591  } else {
5592  $tooltip = $this->replaceLinkHoldersText( $caption );
5593  }
5594 
5595  # make sure there are no placeholders in thumbnail attributes
5596  # that are later expanded to html- so expand them now and
5597  # remove the tags
5598  $tooltip = $this->mStripState->unstripBoth( $tooltip );
5599  # Compatibility hack! In HTML certain entity references not terminated
5600  # by a semicolon are decoded (but not if we're in an attribute; that's
5601  # how link URLs get away without properly escaping & in queries).
5602  # But wikitext has always required semicolon-termination of entities,
5603  # so encode & where needed to avoid decode of semicolon-less entities.
5604  # See T209236 and
5605  # https://www.w3.org/TR/html5/syntax.html#named-character-references
5606  # T210437 discusses moving this workaround to Sanitizer::stripAllTags.
5607  $tooltip = preg_replace( "/
5608  & # 1. entity prefix
5609  (?= # 2. followed by:
5610  (?: # a. one of the legacy semicolon-less named entities
5611  A(?:Elig|MP|acute|circ|grave|ring|tilde|uml)|
5612  C(?:OPY|cedil)|E(?:TH|acute|circ|grave|uml)|
5613  GT|I(?:acute|circ|grave|uml)|LT|Ntilde|
5614  O(?:acute|circ|grave|slash|tilde|uml)|QUOT|REG|THORN|
5615  U(?:acute|circ|grave|uml)|Yacute|
5616  a(?:acute|c(?:irc|ute)|elig|grave|mp|ring|tilde|uml)|brvbar|
5617  c(?:cedil|edil|urren)|cent(?!erdot;)|copy(?!sr;)|deg|
5618  divide(?!ontimes;)|e(?:acute|circ|grave|th|uml)|
5619  frac(?:1(?:2|4)|34)|
5620  gt(?!c(?:c|ir)|dot|lPar|quest|r(?:a(?:pprox|rr)|dot|eq(?:less|qless)|less|sim);)|
5621  i(?:acute|circ|excl|grave|quest|uml)|laquo|
5622  lt(?!c(?:c|ir)|dot|hree|imes|larr|quest|r(?:Par|i(?:e|f|));)|
5623  m(?:acr|i(?:cro|ddot))|n(?:bsp|tilde)|
5624  not(?!in(?:E|dot|v(?:a|b|c)|)|ni(?:v(?:a|b|c)|);)|
5625  o(?:acute|circ|grave|rd(?:f|m)|slash|tilde|uml)|
5626  p(?:lusmn|ound)|para(?!llel;)|quot|r(?:aquo|eg)|
5627  s(?:ect|hy|up(?:1|2|3)|zlig)|thorn|times(?!b(?:ar|)|d;)|
5628  u(?:acute|circ|grave|ml|uml)|y(?:acute|en|uml)
5629  )
5630  (?:[^;]|$)) # b. and not followed by a semicolon
5631  # S = study, for efficiency
5632  /Sx", '&amp;', $tooltip );
5633  $tooltip = Sanitizer::stripAllTags( $tooltip );
5634 
5635  return $tooltip;
5636  }
5637 
5643  public function disableCache() {
5644  wfDebug( "Parser output marked as uncacheable.\n" );
5645  if ( !$this->mOutput ) {
5646  throw new MWException( __METHOD__ .
5647  " can only be called when actually parsing something" );
5648  }
5649  $this->mOutput->updateCacheExpiry( 0 ); // new style, for consistency
5650  }
5651 
5660  public function attributeStripCallback( &$text, $frame = false ) {
5661  $text = $this->replaceVariables( $text, $frame );
5662  $text = $this->mStripState->unstripBoth( $text );
5663  return $text;
5664  }
5665 
5671  public function getTags() {
5672  $this->firstCallInit();
5673  return array_merge(
5674  array_keys( $this->mTransparentTagHooks ),
5675  array_keys( $this->mTagHooks ),
5676  array_keys( $this->mFunctionTagHooks )
5677  );
5678  }
5679 
5684  public function getFunctionSynonyms() {
5685  $this->firstCallInit();
5686  return $this->mFunctionSynonyms;
5687  }
5688 
5693  public function getUrlProtocols() {
5694  return $this->mUrlProtocols;
5695  }
5696 
5707  public function replaceTransparentTags( $text ) {
5708  $matches = [];
5709  $elements = array_keys( $this->mTransparentTagHooks );
5710  $text = self::extractTagsAndParams( $elements, $text, $matches );
5711  $replacements = [];
5712 
5713  foreach ( $matches as $marker => $data ) {
5714  list( $element, $content, $params, $tag ) = $data;
5715  $tagName = strtolower( $element );
5716  if ( isset( $this->mTransparentTagHooks[$tagName] ) ) {
5717  $output = call_user_func_array(
5718  $this->mTransparentTagHooks[$tagName],
5719  [ $content, $params, $this ]
5720  );
5721  } else {
5722  $output = $tag;
5723  }
5724  $replacements[$marker] = $output;
5725  }
5726  return strtr( $text, $replacements );
5727  }
5728 
5758  private function extractSections( $text, $sectionId, $mode, $newText = '' ) {
5759  global $wgTitle; # not generally used but removes an ugly failure mode
5760 
5761  $magicScopeVariable = $this->lock();
5762  $this->startParse( $wgTitle, new ParserOptions, self::OT_PLAIN, true );
5763  $outText = '';
5764  $frame = $this->getPreprocessor()->newFrame();
5765 
5766  # Process section extraction flags
5767  $flags = 0;
5768  $sectionParts = explode( '-', $sectionId );
5769  $sectionIndex = array_pop( $sectionParts );
5770  foreach ( $sectionParts as $part ) {
5771  if ( $part === 'T' ) {
5772  $flags |= self::PTD_FOR_INCLUSION;
5773  }
5774  }
5775 
5776  # Check for empty input
5777  if ( strval( $text ) === '' ) {
5778  # Only sections 0 and T-0 exist in an empty document
5779  if ( $sectionIndex == 0 ) {
5780  if ( $mode === 'get' ) {
5781  return '';
5782  }
5783 
5784  return $newText;
5785  } else {
5786  if ( $mode === 'get' ) {
5787  return $newText;
5788  }
5789 
5790  return $text;
5791  }
5792  }
5793 
5794  # Preprocess the text
5795  $root = $this->preprocessToDom( $text, $flags );
5796 
5797  # <h> nodes indicate section breaks
5798  # They can only occur at the top level, so we can find them by iterating the root's children
5799  $node = $root->getFirstChild();
5800 
5801  # Find the target section
5802  if ( $sectionIndex == 0 ) {
5803  # Section zero doesn't nest, level=big
5804  $targetLevel = 1000;
5805  } else {
5806  while ( $node ) {
5807  if ( $node->getName() === 'h' ) {
5808  $bits = $node->splitHeading();
5809  if ( $bits['i'] == $sectionIndex ) {
5810  $targetLevel = $bits['level'];
5811  break;
5812  }
5813  }
5814  if ( $mode === 'replace' ) {
5815  $outText .= $frame->expand( $node, PPFrame::RECOVER_ORIG );
5816  }
5817  $node = $node->getNextSibling();
5818  }
5819  }
5820 
5821  if ( !$node ) {
5822  # Not found
5823  if ( $mode === 'get' ) {
5824  return $newText;
5825  } else {
5826  return $text;
5827  }
5828  }
5829 
5830  # Find the end of the section, including nested sections
5831  do {
5832  if ( $node->getName() === 'h' ) {
5833  $bits = $node->splitHeading();
5834  $curLevel = $bits['level'];
5835  if ( $bits['i'] != $sectionIndex && $curLevel <= $targetLevel ) {
5836  break;
5837  }
5838  }
5839  if ( $mode === 'get' ) {
5840  $outText .= $frame->expand( $node, PPFrame::RECOVER_ORIG );
5841  }
5842  $node = $node->getNextSibling();
5843  } while ( $node );
5844 
5845  # Write out the remainder (in replace mode only)
5846  if ( $mode === 'replace' ) {
5847  # Output the replacement text
5848  # Add two newlines on -- trailing whitespace in $newText is conventionally
5849  # stripped by the editor, so we need both newlines to restore the paragraph gap
5850  # Only add trailing whitespace if there is newText
5851  if ( $newText != "" ) {
5852  $outText .= $newText . "\n\n";
5853  }
5854 
5855  while ( $node ) {
5856  $outText .= $frame->expand( $node, PPFrame::RECOVER_ORIG );
5857  $node = $node->getNextSibling();
5858  }
5859  }
5860 
5861  if ( is_string( $outText ) ) {
5862  # Re-insert stripped tags
5863  $outText = rtrim( $this->mStripState->unstripBoth( $outText ) );
5864  }
5865 
5866  return $outText;
5867  }
5868 
5883  public function getSection( $text, $sectionId, $defaultText = '' ) {
5884  return $this->extractSections( $text, $sectionId, 'get', $defaultText );
5885  }
5886 
5899  public function replaceSection( $oldText, $sectionId, $newText ) {
5900  return $this->extractSections( $oldText, $sectionId, 'replace', $newText );
5901  }
5902 
5913  public function getRevisionId() {
5914  return $this->mRevisionId;
5915  }
5916 
5923  public function getRevisionObject() {
5924  if ( !is_null( $this->mRevisionObject ) ) {
5925  return $this->mRevisionObject;
5926  }
5927 
5928  // NOTE: try to get the RevisionObject even if mRevisionId is null.
5929  // This is useful when parsing revision that has not yet been saved.
5930  // However, if we get back a saved revision even though we are in
5931  // preview mode, we'll have to ignore it, see below.
5932  // NOTE: This callback may be used to inject an OLD revision that was
5933  // already loaded, so "current" is a bit of a misnomer. We can't just
5934  // skip it if mRevisionId is set.
5935  $rev = call_user_func(
5936  $this->mOptions->getCurrentRevisionCallback(), $this->getTitle(), $this
5937  );
5938 
5939  if ( $this->mRevisionId === null && $rev && $rev->getId() ) {
5940  // We are in preview mode (mRevisionId is null), and the current revision callback
5941  // returned an existing revision. Ignore it and return null, it's probably the page's
5942  // current revision, which is not what we want here. Note that we do want to call the
5943  // callback to allow the unsaved revision to be injected here, e.g. for
5944  // self-transclusion previews.
5945  return null;
5946  }
5947 
5948  // If the parse is for a new revision, then the callback should have
5949  // already been set to force the object and should match mRevisionId.
5950  // If not, try to fetch by mRevisionId for sanity.
5951  if ( $this->mRevisionId && $rev && $rev->getId() != $this->mRevisionId ) {
5952  $rev = Revision::newFromId( $this->mRevisionId );
5953  }
5954 
5955  $this->mRevisionObject = $rev;
5956 
5957  return $this->mRevisionObject;
5958  }
5959 
5965  public function getRevisionTimestamp() {
5966  if ( is_null( $this->mRevisionTimestamp ) ) {
5967  $revObject = $this->getRevisionObject();
5968  $timestamp = $revObject ? $revObject->getTimestamp() : wfTimestampNow();
5969 
5970  # The cryptic '' timezone parameter tells to use the site-default
5971  # timezone offset instead of the user settings.
5972  # Since this value will be saved into the parser cache, served
5973  # to other users, and potentially even used inside links and such,
5974  # it needs to be consistent for all visitors.
5975  $this->mRevisionTimestamp = $this->contLang->userAdjust( $timestamp, '' );
5976  }
5978  }
5979 
5985  public function getRevisionUser() {
5986  if ( is_null( $this->mRevisionUser ) ) {
5987  $revObject = $this->getRevisionObject();
5988 
5989  # if this template is subst: the revision id will be blank,
5990  # so just use the current user's name
5991  if ( $revObject ) {
5992  $this->mRevisionUser = $revObject->getUserText();
5993  } elseif ( $this->ot['wiki'] || $this->mOptions->getIsPreview() ) {
5994  $this->mRevisionUser = $this->getUser()->getName();
5995  }
5996  }
5997  return $this->mRevisionUser;
5998  }
5999 
6005  public function getRevisionSize() {
6006  if ( is_null( $this->mRevisionSize ) ) {
6007  $revObject = $this->getRevisionObject();
6008 
6009  # if this variable is subst: the revision id will be blank,
6010  # so just use the parser input size, because the own substituation
6011  # will change the size.
6012  if ( $revObject ) {
6013  $this->mRevisionSize = $revObject->getSize();
6014  } else {
6015  $this->mRevisionSize = $this->mInputSize;
6016  }
6017  }
6018  return $this->mRevisionSize;
6019  }
6020 
6026  public function setDefaultSort( $sort ) {
6027  $this->mDefaultSort = $sort;
6028  $this->mOutput->setProperty( 'defaultsort', $sort );
6029  }
6030 
6041  public function getDefaultSort() {
6042  if ( $this->mDefaultSort !== false ) {
6043  return $this->mDefaultSort;
6044  } else {
6045  return '';
6046  }
6047  }
6048 
6055  public function getCustomDefaultSort() {
6056  return $this->mDefaultSort;
6057  }
6058 
6059  private static function getSectionNameFromStrippedText( $text ) {
6061  $text = Sanitizer::decodeCharReferences( $text );
6062  $text = self::normalizeSectionName( $text );
6063  return $text;
6064  }
6065 
6066  private static function makeAnchor( $sectionName ) {
6067  return '#' . Sanitizer::escapeIdForLink( $sectionName );
6068  }
6069 
6070  private function makeLegacyAnchor( $sectionName ) {
6071  $fragmentMode = $this->svcOptions->get( 'FragmentMode' );
6072  if ( isset( $fragmentMode[1] ) && $fragmentMode[1] === 'legacy' ) {
6073  // ForAttribute() and ForLink() are the same for legacy encoding
6075  } else {
6076  $id = Sanitizer::escapeIdForLink( $sectionName );
6077  }
6078 
6079  return "#$id";
6080  }
6081 
6090  public function guessSectionNameFromWikiText( $text ) {
6091  # Strip out wikitext links(they break the anchor)
6092  $text = $this->stripSectionName( $text );
6093  $sectionName = self::getSectionNameFromStrippedText( $text );
6094  return self::makeAnchor( $sectionName );
6095  }
6096 
6106  public function guessLegacySectionNameFromWikiText( $text ) {
6107  # Strip out wikitext links(they break the anchor)
6108  $text = $this->stripSectionName( $text );
6109  $sectionName = self::getSectionNameFromStrippedText( $text );
6110  return $this->makeLegacyAnchor( $sectionName );
6111  }
6112 
6118  public static function guessSectionNameFromStrippedText( $text ) {
6119  $sectionName = self::getSectionNameFromStrippedText( $text );
6120  return self::makeAnchor( $sectionName );
6121  }
6122 
6129  private static function normalizeSectionName( $text ) {
6130  # T90902: ensure the same normalization is applied for IDs as to links
6131  $titleParser = MediaWikiServices::getInstance()->getTitleParser();
6132  try {
6133 
6134  $parts = $titleParser->splitTitleString( "#$text" );
6135  } catch ( MalformedTitleException $ex ) {
6136  return $text;
6137  }
6138  return $parts['fragment'];
6139  }
6140 
6155  public function stripSectionName( $text ) {
6156  # Strip internal link markup
6157  $text = preg_replace( '/\[\[:?([^[|]+)\|([^[]+)\]\]/', '$2', $text );
6158  $text = preg_replace( '/\[\[:?([^[]+)\|?\]\]/', '$1', $text );
6159 
6160  # Strip external link markup
6161  # @todo FIXME: Not tolerant to blank link text
6162  # I.E. [https://www.mediawiki.org] will render as [1] or something depending
6163  # on how many empty links there are on the page - need to figure that out.
6164  $text = preg_replace( '/\[(?i:' . $this->mUrlProtocols . ')([^ ]+?) ([^[]+)\]/', '$2', $text );
6165 
6166  # Parse wikitext quotes (italics & bold)
6167  $text = $this->doQuotes( $text );
6168 
6169  # Strip HTML tags
6170  $text = StringUtils::delimiterReplace( '<', '>', '', $text );
6171  return $text;
6172  }
6173 
6184  public function testSrvus( $text, Title $title, ParserOptions $options,
6185  $outputType = self::OT_HTML
6186  ) {
6187  $magicScopeVariable = $this->lock();
6188  $this->startParse( $title, $options, $outputType, true );
6189 
6190  $text = $this->replaceVariables( $text );
6191  $text = $this->mStripState->unstripBoth( $text );
6192  $text = Sanitizer::removeHTMLtags( $text );
6193  return $text;
6194  }
6195 
6202  public function testPst( $text, Title $title, ParserOptions $options ) {
6203  return $this->preSaveTransform( $text, $title, $options->getUser(), $options );
6204  }
6205 
6212  public function testPreprocess( $text, Title $title, ParserOptions $options ) {
6213  return $this->testSrvus( $text, $title, $options, self::OT_PREPROCESS );
6214  }
6215 
6232  public function markerSkipCallback( $s, $callback ) {
6233  $i = 0;
6234  $out = '';
6235  while ( $i < strlen( $s ) ) {
6236  $markerStart = strpos( $s, self::MARKER_PREFIX, $i );
6237  if ( $markerStart === false ) {
6238  $out .= call_user_func( $callback, substr( $s, $i ) );
6239  break;
6240  } else {
6241  $out .= call_user_func( $callback, substr( $s, $i, $markerStart - $i ) );
6242  $markerEnd = strpos( $s, self::MARKER_SUFFIX, $markerStart );
6243  if ( $markerEnd === false ) {
6244  $out .= substr( $s, $markerStart );
6245  break;
6246  } else {
6247  $markerEnd += strlen( self::MARKER_SUFFIX );
6248  $out .= substr( $s, $markerStart, $markerEnd - $markerStart );
6249  $i = $markerEnd;
6250  }
6251  }
6252  }
6253  return $out;
6254  }
6255 
6262  public function killMarkers( $text ) {
6263  return $this->mStripState->killMarkers( $text );
6264  }
6265 
6283  public function serializeHalfParsedText( $text ) {
6284  wfDeprecated( __METHOD__, '1.31' );
6285  $data = [
6286  'text' => $text,
6287  'version' => self::HALF_PARSED_VERSION,
6288  'stripState' => $this->mStripState->getSubState( $text ),
6289  'linkHolders' => $this->mLinkHolders->getSubArray( $text )
6290  ];
6291  return $data;
6292  }
6293 
6310  public function unserializeHalfParsedText( $data ) {
6311  wfDeprecated( __METHOD__, '1.31' );
6312  if ( !isset( $data['version'] ) || $data['version'] != self::HALF_PARSED_VERSION ) {
6313  throw new MWException( __METHOD__ . ': invalid version' );
6314  }
6315 
6316  # First, extract the strip state.
6317  $texts = [ $data['text'] ];
6318  $texts = $this->mStripState->merge( $data['stripState'], $texts );
6319 
6320  # Now renumber links
6321  $texts = $this->mLinkHolders->mergeForeign( $data['linkHolders'], $texts );
6322 
6323  # Should be good to go.
6324  return $texts[0];
6325  }
6326 
6337  public function isValidHalfParsedText( $data ) {
6338  wfDeprecated( __METHOD__, '1.31' );
6339  return isset( $data['version'] ) && $data['version'] == self::HALF_PARSED_VERSION;
6340  }
6341 
6351  public static function parseWidthParam( $value, $parseHeight = true ) {
6352  $parsedWidthParam = [];
6353  if ( $value === '' ) {
6354  return $parsedWidthParam;
6355  }
6356  $m = [];
6357  # (T15500) In both cases (width/height and width only),
6358  # permit trailing "px" for backward compatibility.
6359  if ( $parseHeight && preg_match( '/^([0-9]*)x([0-9]*)\s*(?:px)?\s*$/', $value, $m ) ) {
6360  $width = intval( $m[1] );
6361  $height = intval( $m[2] );
6362  $parsedWidthParam['width'] = $width;
6363  $parsedWidthParam['height'] = $height;
6364  } elseif ( preg_match( '/^[0-9]*\s*(?:px)?\s*$/', $value ) ) {
6365  $width = intval( $value );
6366  $parsedWidthParam['width'] = $width;
6367  }
6368  return $parsedWidthParam;
6369  }
6370 
6380  protected function lock() {
6381  if ( $this->mInParse ) {
6382  throw new MWException( "Parser state cleared while parsing. "
6383  . "Did you call Parser::parse recursively? Lock is held by: " . $this->mInParse );
6384  }
6385 
6386  // Save the backtrace when locking, so that if some code tries locking again,
6387  // we can print the lock owner's backtrace for easier debugging
6388  $e = new Exception;
6389  $this->mInParse = $e->getTraceAsString();
6390 
6391  $recursiveCheck = new ScopedCallback( function () {
6392  $this->mInParse = false;
6393  } );
6394 
6395  return $recursiveCheck;
6396  }
6397 
6408  public static function stripOuterParagraph( $html ) {
6409  $m = [];
6410  if ( preg_match( '/^<p>(.*)\n?<\/p>\n?$/sU', $html, $m ) && strpos( $m[1], '</p>' ) === false ) {
6411  $html = $m[1];
6412  }
6413 
6414  return $html;
6415  }
6416 
6427  public function getFreshParser() {
6428  if ( $this->mInParse ) {
6429  return $this->factory->create();
6430  } else {
6431  return $this;
6432  }
6433  }
6434 
6441  public function enableOOUI() {
6443  $this->mOutput->setEnableOOUI( true );
6444  }
6445 }
getRevisionObject()
Get the revision object for $this->mRevisionId.
Definition: Parser.php:5923
extensionSubstitution( $params, $frame)
Return the text to be used for a given extension tag.
Definition: Parser.php:4023
getFunctionSynonyms()
Definition: Parser.php:5684
static armorFrenchSpaces( $text, $space='&#160;')
Armor French spaces with a replacement character.
Definition: Sanitizer.php:1172
static register( $parser)
$mAutonumber
Definition: Parser.php:192
$mPPNodeCount
Definition: Parser.php:206
replaceInternalLinks2(&$s)
Process [[ ]] wikilinks (RIL)
Definition: Parser.php:2248
bool string $mInParse
Recursive call protection.
Definition: Parser.php:258
const MARKER_PREFIX
Definition: Parser.php:136
deferred txt A few of the database updates required by various functions here can be deferred until after the result page is displayed to the user For updating the view updating the linked to tables after a etc PHP does not yet have any way to tell the server to actually return and disconnect while still running these but it might have such a feature in the future We handle these by creating a deferred update object and putting those objects on a global then executing the whole list after the page is displayed We don t do anything smart like collating updates to the same table or such because the list is almost always going to have just one item on if that
Definition: deferred.txt:11
wfUrlProtocols( $includeProtocolRelative=true)
Returns a regular expression of url protocols.
null means default in associative array form
Definition: hooks.txt:1982
setLinkID( $id)
Definition: Parser.php:971
null means default in associative array with keys and values unescaped Should be merged with default with a value of false meaning to suppress the attribute in associative array with keys and values unescaped noclasses just before the function returns a value If you return an< a > element with HTML attributes $attribs and contents $html will be returned If you return $ret will be returned and may include noclasses & $html
Definition: hooks.txt:1982
static tocLineEnd()
End a Table Of Contents line.
Definition: Linker.php:1662
deferred txt A few of the database updates required by various functions here can be deferred until after the result page is displayed to the user For updating the view updating the linked to tables after a etc PHP does not yet have any way to tell the server to actually return and disconnect while still running these but it might have such a feature in the future We handle these by creating a deferred update object and putting those objects on a global list
Definition: deferred.txt:11
wfEscapeWikiText( $text)
Escapes the given text so that it may be output using addWikiText() without any linking, formatting, etc.
__construct( $svcOptions=null, MagicWordFactory $magicWordFactory=null, Language $contLang=null, ParserFactory $factory=null, $urlProtocols=null, SpecialPageFactory $spFactory=null, $linkRendererFactory=null, $nsInfo=null)
Constructing parsers directly is deprecated! Use a ParserFactory.
Definition: Parser.php:335
cleanSig( $text, $parsing=false)
Clean up signature text.
Definition: Parser.php:4797
static factory( $url, array $options=null, $caller=__METHOD__)
Generate a new request object.
$mTplRedirCache
Definition: Parser.php:208
LinkRenderer $mLinkRenderer
Definition: Parser.php:266
getRevisionUser()
Get the name of the user that edited the last revision.
Definition: Parser.php:5985
doMagicLinks( $text)
Replace special strings like "ISBN xxx" and "RFC xxx" with magic external links.
Definition: Parser.php:1567
parseLinkParameter( $value)
Parse the value of &#39;link&#39; parameter in image syntax ([[File:Foo.jpg|link=<value>]]).
Definition: Parser.php:5555
const OT_PREPROCESS
Definition: Defines.php:182
static element( $element, $attribs=[], $contents='')
Identical to rawElement(), but HTML-escapes $contents (like Xml::element()).
Definition: Html.php:232
doHeadings( $text)
Parse headers and return html.
Definition: Parser.php:1745
either a plain
Definition: hooks.txt:2043
static tidy( $text)
Interface with Remex tidy.
Definition: MWTidy.php:42
$mDoubleUnderscores
Definition: Parser.php:208
SpecialPageFactory $specialPageFactory
Definition: Parser.php:278
MalformedTitleException is thrown when a TitleParser is unable to parse a title string.
killMarkers( $text)
Remove any strip markers found in the given text.
Definition: Parser.php:6262
static getExternalLinkRel( $url=false, $title=null)
Get the rel attribute for a particular external link.
Definition: Parser.php:2044
getSection( $text, $sectionId, $defaultText='')
This function returns the text of a section, specified by a number ($section).
Definition: Parser.php:5883
MapCacheLRU null $currentRevisionCache
Definition: Parser.php:252
static getLocalInstance( $ts=false)
Get a timestamp instance in the server local timezone ($wgLocaltimezone)
bool $mFirstCall
Whether firstCallInit still needs to be called.
Definition: Parser.php:160
processing should stop and the error should be shown to the user * false
Definition: hooks.txt:187
const ID_PRIMARY
Tells escapeUrlForHtml() to encode the ID using the wiki&#39;s primary encoding.
Definition: Sanitizer.php:66
getRevisionTimestampSubstring( $start, $len, $mtts, $variable)
Definition: Parser.php:2993
nextLinkID()
Definition: Parser.php:964
getTemplateDom( $title)
Get the semi-parsed DOM representation of a template with a given title, and its redirect destination...
Definition: Parser.php:3637
Title( $x=null)
Accessor/mutator for the Title object.
Definition: Parser.php:903
const SPACE_NOT_NL
Definition: Parser.php:105
null means default in associative array with keys and values unescaped Should be merged with default with a value of false meaning to suppress the attribute in associative array with keys and values unescaped noclasses & $ret
Definition: hooks.txt:1982
static normalizeSubpageLink( $contextTitle, $target, &$text)
Definition: Linker.php:1450
Apache License January AND DISTRIBUTION Definitions License shall mean the terms and conditions for use
const OT_PLAIN
Definition: Parser.php:116
getTags()
Accessor.
Definition: Parser.php:5671
if(PHP_SAPI !='cli-server') if(!isset( $_SERVER['SCRIPT_FILENAME'])) $file
Definition: router.php:42
const OT_WIKI
Definition: Parser.php:113
div flags Integer display flags(NO_ACTION_LINK, NO_EXTRA_USER_LINKS) 'LogException' returning false will NOT prevent logging $e
Definition: hooks.txt:2159
User $mUser
Definition: Parser.php:215
initialiseVariables()
initialise the magic variables (like CURRENTMONTHNAME) and substitution modifiers ...
Definition: Parser.php:3021
static cleanUrl( $url)
Definition: Sanitizer.php:2033
static splitTrail( $trail)
Split a link trail, return the "inside" portion and the remainder of the trail as a two-element array...
Definition: Linker.php:1776
static isEnabled()
Definition: MWTidy.php:54
getFunctionHooks()
Get all registered function hook identifiers.
Definition: Parser.php:5053
getExternalLinkAttribs( $url)
Get an associative array of additional HTML attributes appropriate for a particular external link...
Definition: Parser.php:2065
callParserFunction( $frame, $function, array $args=[])
Call a parser function and return an array with text and flags.
Definition: Parser.php:3545
magicLinkCallback( $m)
Definition: Parser.php:1598
globals txt Globals are evil The original MediaWiki code relied on globals for processing context far too often MediaWiki development since then has been a story of slowly moving context out of global variables and into objects Storing processing context in object member variables allows those objects to be reused in a much more flexible way Consider the elegance of
database rows
Definition: globals.txt:10
wfHostname()
Fetch server name for use in error reporting etc.
braceSubstitution( $piece, $frame)
Return the text of a template, after recursively replacing any variables or templates within the temp...
Definition: Parser.php:3195
getFunctionLang()
Get a language object for use in parser functions such as {{FORMATNUM:}}.
Definition: Parser.php:979
globals will be eliminated from MediaWiki replaced by an application object which would be passed to constructors Whether that would be an convenient solution remains to be but certainly PHP makes such object oriented programming models easier than they were in previous versions For the time being MediaWiki programmers will have to work in an environment with some global context At the time of globals were initialised on startup by MediaWiki of these were configuration which are documented in DefaultSettings php There is no comprehensive documentation for the remaining however some of the most important ones are listed below They are typically initialised either in index php or in Setup php $wgTitle Title object created from the request URL $wgOut OutputPage object for HTTP response $wgUser User object for the user associated with the current request $wgLang Language object selected by user preferences $wgContLang Language object associated with the wiki being viewed $wgParser Parser object Parser extensions register their hooks here $wgRequest WebRequest object
Definition: globals.txt:25
preprocessToDom( $text, $flags=0)
Preprocess some wikitext and return the document tree.
Definition: Parser.php:3051
const TOC_START
Definition: Parser.php:139
wfGetDB( $db, $groups=[], $wiki=false)
Get a Database object.
SectionProfiler $mProfiler
Definition: Parser.php:261
static replaceMarkup( $search, $replace, $text)
More or less "markup-safe" str_replace() Ignores any instances of the separator inside <...
$sort
you don t have to do a grep find to see where the $wgReverseTitle variable is used
Definition: hooks.txt:115
null for the local wiki Added in
Definition: hooks.txt:1585
There are three types of nodes:
Definition: PPNode.php:35
$mHeadings
Definition: Parser.php:208
$value
clearTagHooks()
Remove all tag hooks.
Definition: Parser.php:4968
const NS_SPECIAL
Definition: Defines.php:49
clearState()
Clear Parser state.
Definition: Parser.php:454
const EXT_LINK_ADDR
Definition: Parser.php:98
replaceExternalLinks( $text)
Replace external links (REL)
Definition: Parser.php:1966
either a unescaped string or a HtmlArmor object after in associative array form externallinks including delete and has completed for all link tables whether this was an auto creation use $formDescriptor instead default is conds Array Extra conditions for the No matching items in log is displayed if loglist is empty msgKey Array If you want a nice box with a set this to the key of the message First element is the message key
Definition: hooks.txt:2151
static factory( $mode=false, IContextSource $context=null)
Get a new image gallery.
stripSectionName( $text)
Strips a text string of wikitext for use in a section anchor.
Definition: Parser.php:6155
injection txt This is an overview of how MediaWiki makes use of dependency injection The design described here grew from the discussion of RFC T384 The term dependency this means that anything an object needs to operate should be injected from the the object itself should only know narrow no concrete implementation of the logic it relies on The requirement to inject everything typically results in an architecture that based on two main types of and essentially stateless service objects that use other service objects to operate on the value objects As of the beginning MediaWiki is only starting to use the DI approach Much of the code still relies on global state or direct resulting in a highly cyclical dependency MediaWikiServices
Definition: injection.txt:23
wfUrlProtocolsWithoutProtRel()
Like wfUrlProtocols(), but excludes &#39;//&#39; from the protocol list.
replaceLinkHolders(&$text, $options=0)
Replace "<!--LINK-->" link placeholders with actual links, in the buffer Placeholders created in Link...
Definition: Parser.php:5090
static statelessFetchRevision(Title $title, $parser=false)
Wrapper around Revision::newFromTitle to allow passing additional parameters without passing them on ...
Definition: Parser.php:3703
armorLinks( $text)
Insert a NOPARSE hacky thing into any inline links in a chunk that&#39;s going to go through further pars...
Definition: Parser.php:2584
static activeUsers()
Definition: SiteStats.php:130
$mLinkID
Definition: Parser.php:205
setHook( $tag, callable $callback)
Create an HTML-style tag, e.g.
Definition: Parser.php:4923
static createAssocArgs( $args)
Clean up argument array - refactored in 1.9 so parserfunctions can use it, too.
Definition: Parser.php:3125
LinkRendererFactory $linkRendererFactory
Definition: Parser.php:290
static makeMediaLinkFile(LinkTarget $title, $file, $html='')
Create a direct link to a given uploaded file.
Definition: Linker.php:783
$mGeneratedPPNodeCount
Definition: Parser.php:206
$mRevisionId
Definition: Parser.php:232
target page
static cleanSigInSig( $text)
Strip 3, 4 or 5 tildes out of signatures.
Definition: Parser.php:4834
getLinkRenderer()
Get a LinkRenderer instance to make links with.
Definition: Parser.php:1047
const NS_TEMPLATE
Definition: Defines.php:70
see documentation in includes Linker php for Linker::makeImageLink & $time
Definition: hooks.txt:1799
static newFromTitle(LinkTarget $linkTarget, $id=0, $flags=0)
Load either the current, or a specified, revision that&#39;s attached to a given link target...
Definition: Revision.php:137
setTitle( $t)
Set the context title.
Definition: Parser.php:875
const NO_ARGS
Definition: PPFrame.php:26
fetchFileNoRegister( $title, $options=[])
Helper function for fetchFileAndTitle.
Definition: Parser.php:3884
see documentation in includes Linker php for Linker::makeImageLink or false for current used if you return false $parser
Definition: hooks.txt:1799
MagicWordArray $mVariables
Definition: Parser.php:167
This list may contain false positives That usually means there is additional text with links below the first Each row contains links to the first and second as well as the first line of the second redirect text
const SFH_NO_HASH
Definition: Parser.php:86
static setupOOUI( $skinName='default', $dir='ltr')
Helper function to setup the PHP implementation of OOUI to use in this request.
setTransparentTagHook( $tag, callable $callback)
As setHook(), but letting the contents be parsed.
Definition: Parser.php:4954
$mForceTocPosition
Definition: Parser.php:210
getRevisionId()
Get the ID of the revision we are parsing.
Definition: Parser.php:5913
const OT_PREPROCESS
Definition: Parser.php:114
limitationWarn( $limitationType, $current='', $max='')
Warn the user when a parser limitation is reached Will warn at most once the user per limitation type...
Definition: Parser.php:3173
guessLegacySectionNameFromWikiText( $text)
Same as guessSectionNameFromWikiText(), but produces legacy anchors instead, if possible.
Definition: Parser.php:6106
$mFunctionSynonyms
Definition: Parser.php:149
If you want to remove the page from your watchlist later
getPreSaveTransform()
Transform wiki markup when saving the page?
$mOutputType
Definition: Parser.php:229
interwikiTransclude( $title, $action)
Transclude an interwiki link.
Definition: Parser.php:3903
$mDefaultStripList
Definition: Parser.php:152
$mExtLinkBracketedRegex
Definition: Parser.php:181
static configuration should be added through ResourceLoaderGetConfigVars instead can be used to get the real title e g db for database replication lag or jobqueue for job queue size converted to pseudo seconds It is possible to add more fields and they will be returned to the user in the API response after the basic globals have been set but before ordinary actions take place $output
Definition: hooks.txt:2217
The index of the header message $result[1]=The index of the body text message $result[2 through n]=Parameters passed to body text message. Please note the header message cannot receive/use parameters. 'ImportHandleLogItemXMLTag':When parsing a XML tag in a log item. Return false to stop further processing of the tag $reader:XMLReader object $logInfo:Array of information 'ImportHandlePageXMLTag':When parsing a XML tag in a page. Return false to stop further processing of the tag $reader:XMLReader object & $pageInfo:Array of information 'ImportHandleRevisionXMLTag':When parsing a XML tag in a page revision. Return false to stop further processing of the tag $reader:XMLReader object $pageInfo:Array of page information $revisionInfo:Array of revision information 'ImportHandleToplevelXMLTag':When parsing a top level XML tag. Return false to stop further processing of the tag $reader:XMLReader object 'ImportHandleUnknownUser':When a user doesn 't exist locally, this hook is called to give extensions an opportunity to auto-create it. If the auto-creation is successful, return false. $name:User name 'ImportHandleUploadXMLTag':When parsing a XML tag in a file upload. Return false to stop further processing of the tag $reader:XMLReader object $revisionInfo:Array of information 'ImportLogInterwikiLink':Hook to change the interwiki link used in log entries and edit summaries for transwiki imports. & $fullInterwikiPrefix:Interwiki prefix, may contain colons. & $pageTitle:String that contains page title. 'ImportSources':Called when reading from the $wgImportSources configuration variable. Can be used to lazy-load the import sources list. & $importSources:The value of $wgImportSources. Modify as necessary. See the comment in DefaultSettings.php for the detail of how to structure this array. 'InfoAction':When building information to display on the action=info page. $context:IContextSource object & $pageInfo:Array of information 'InitializeArticleMaybeRedirect':MediaWiki check to see if title is a redirect. & $title:Title object for the current page & $request:WebRequest & $ignoreRedirect:boolean to skip redirect check & $target:Title/string of redirect target & $article:Article object 'InternalParseBeforeLinks':during Parser 's internalParse method before links but after nowiki/noinclude/includeonly/onlyinclude and other processings. & $parser:Parser object & $text:string containing partially parsed text & $stripState:Parser 's internal StripState object 'InternalParseBeforeSanitize':during Parser 's internalParse method just before the parser removes unwanted/dangerous HTML tags and after nowiki/noinclude/includeonly/onlyinclude and other processings. Ideal for syntax-extensions after template/parser function execution which respect nowiki and HTML-comments. & $parser:Parser object & $text:string containing partially parsed text & $stripState:Parser 's internal StripState object 'InterwikiLoadPrefix':When resolving if a given prefix is an interwiki or not. Return true without providing an interwiki to continue interwiki search. $prefix:interwiki prefix we are looking for. & $iwData:output array describing the interwiki with keys iw_url, iw_local, iw_trans and optionally iw_api and iw_wikiid. 'InvalidateEmailComplete':Called after a user 's email has been invalidated successfully. $user:user(object) whose email is being invalidated 'IRCLineURL':When constructing the URL to use in an IRC notification. Callee may modify $url and $query, URL will be constructed as $url . $query & $url:URL to index.php & $query:Query string $rc:RecentChange object that triggered url generation 'IsFileCacheable':Override the result of Article::isFileCacheable()(if true) & $article:article(object) being checked 'IsTrustedProxy':Override the result of IP::isTrustedProxy() & $ip:IP being check & $result:Change this value to override the result of IP::isTrustedProxy() 'IsUploadAllowedFromUrl':Override the result of UploadFromUrl::isAllowedUrl() $url:URL used to upload from & $allowed:Boolean indicating if uploading is allowed for given URL 'isValidEmailAddr':Override the result of Sanitizer::validateEmail(), for instance to return false if the domain name doesn 't match your organization. $addr:The e-mail address entered by the user & $result:Set this and return false to override the internal checks 'isValidPassword':Override the result of User::isValidPassword() $password:The password entered by the user & $result:Set this and return false to override the internal checks $user:User the password is being validated for 'Language::getMessagesFileName':$code:The language code or the language we 're looking for a messages file for & $file:The messages file path, you can override this to change the location. 'LanguageGetNamespaces':Provide custom ordering for namespaces or remove namespaces. Do not use this hook to add namespaces. Use CanonicalNamespaces for that. & $namespaces:Array of namespaces indexed by their numbers 'LanguageGetTranslatedLanguageNames':Provide translated language names. & $names:array of language code=> language name $code:language of the preferred translations 'LanguageLinks':Manipulate a page 's language links. This is called in various places to allow extensions to define the effective language links for a page. $title:The page 's Title. & $links:Array with elements of the form "language:title" in the order that they will be output. & $linkFlags:Associative array mapping prefixed links to arrays of flags. Currently unused, but planned to provide support for marking individual language links in the UI, e.g. for featured articles. 'LanguageSelector':Hook to change the language selector available on a page. $out:The output page. $cssClassName:CSS class name of the language selector. 'LinkBegin':DEPRECATED since 1.28! Use HtmlPageLinkRendererBegin instead. Used when generating internal and interwiki links in Linker::link(), before processing starts. Return false to skip default processing and return $ret. See documentation for Linker::link() for details on the expected meanings of parameters. $skin:the Skin object $target:the Title that the link is pointing to & $html:the contents that the< a > tag should have(raw HTML) $result
Definition: hooks.txt:1980
This document provides an overview of the usage of PageUpdater and that is
Definition: pageupdater.txt:3
makeKnownLinkHolder( $nt, $text='', $trail='', $prefix='')
Render a forced-blue link inline; protect against double expansion of URLs if we&#39;re in a mode that pr...
Definition: Parser.php:2560
if( $line===false) $args
Definition: cdb.php:64
static stripOuterParagraph( $html)
Strip outer.
Definition: Parser.php:6408
A class for passing options to services.
const ID_FALLBACK
Tells escapeUrlForHtml() to encode the ID using the fallback encoding, or return false if no fallback...
Definition: Sanitizer.php:74
usually copyright or history_copyright This message must be in HTML not wikitext & $link
Definition: hooks.txt:3051
The User object encapsulates all of the user-specific settings (user_id, name, rights, email address, options, last login time).
Definition: User.php:51
this hook is for auditing only or null if authentication failed before getting that far or null if we can t even determine that When $user is not it can be in the form of< username >< more info > e g for bot passwords intended to be added to log contexts Fields it might only if the login was with a bot password it is not rendered in wiki pages or galleries in category pages allow injecting custom HTML after the section Any uses of the hook need to handle escaping see BaseTemplate::getToolbox and BaseTemplate::makeListItem for details on the format of individual items inside of this array or by returning and letting standard HTTP rendering take place modifiable or by returning false and taking over the output modifiable modifiable after all normalizations have been except for the $wgMaxImageArea check set to true or false to override the $wgMaxImageArea check result gives extension the possibility to transform it themselves $handler
Definition: hooks.txt:780
static getInstance( $ts=false)
Get a timestamp instance in GMT.
Definition: MWTimestamp.php:39
this hook is for auditing only or null if authentication failed before getting that far or null if we can t even determine that When $user is not it can be in the form of< username >< more info > e g for bot passwords intended to be added to log contexts Fields it might only if the login was with a bot password it is not rendered in wiki pages or galleries in category pages allow injecting custom HTML after the section Any uses of the hook need to handle escaping see BaseTemplate::getToolbox and BaseTemplate::makeListItem for details on the format of individual items inside of this array or by returning and letting standard HTTP rendering take place modifiable or by returning false and taking over the output $out
Definition: hooks.txt:780
static numberingroup( $group)
Find the number of users in a given user group.
Definition: SiteStats.php:150
stripAltText( $caption, $holders)
Definition: Parser.php:5585
setDefaultSort( $sort)
Mutator for $mDefaultSort.
Definition: Parser.php:6026
$wgNoFollowNsExceptions
Namespaces in which $wgNoFollowLinks doesn&#39;t apply.
static makeExternalImage( $url, $alt='')
Return the code for images which were added via external links, via Parser::maybeMakeExternalImage()...
Definition: Linker.php:252
Custom PHP profiler for parser/DB type section names that xhprof/xdebug can&#39;t handle.
static edits()
Definition: SiteStats.php:94
Status::newGood()` to allow deletion, and then `return false` from the hook function. Ensure you consume the 'ChangeTagAfterDelete' hook to carry out custom deletion actions. $tag:name of the tag $user:user initiating the action & $status:Status object. See above. 'ChangeTagsListActive':Allows you to nominate which of the tags your extension uses are in active use. & $tags:list of all active tags. Append to this array. 'ChangeTagsAfterUpdateTags':Called after tags have been updated with the ChangeTags::updateTags function. Params:$addedTags:tags effectively added in the update $removedTags:tags effectively removed in the update $prevTags:tags that were present prior to the update $rc_id:recentchanges table id $rev_id:revision table id $log_id:logging table id $params:tag params $rc:RecentChange being tagged when the tagging accompanies the action, or null $user:User who performed the tagging when the tagging is subsequent to the action, or null 'ChangeTagsAllowedAdd':Called when checking if a user can add tags to a change. & $allowedTags:List of all the tags the user is allowed to add. Any tags the user wants to add( $addTags) that are not in this array will cause it to fail. You may add or remove tags to this array as required. $addTags:List of tags user intends to add. $user:User who is adding the tags. 'ChangeUserGroups':Called before user groups are changed. $performer:The User who will perform the change $user:The User whose groups will be changed & $add:The groups that will be added & $remove:The groups that will be removed 'Collation::factory':Called if $wgCategoryCollation is an unknown collation. $collationName:Name of the collation in question & $collationObject:Null. Replace with a subclass of the Collation class that implements the collation given in $collationName. 'ConfirmEmailComplete':Called after a user 's email has been confirmed successfully. $user:user(object) whose email is being confirmed 'ContentAlterParserOutput':Modify parser output for a given content object. Called by Content::getParserOutput after parsing has finished. Can be used for changes that depend on the result of the parsing but have to be done before LinksUpdate is called(such as adding tracking categories based on the rendered HTML). $content:The Content to render $title:Title of the page, as context $parserOutput:ParserOutput to manipulate 'ContentGetParserOutput':Customize parser output for a given content object, called by AbstractContent::getParserOutput. May be used to override the normal model-specific rendering of page content. $content:The Content to render $title:Title of the page, as context $revId:The revision ID, as context $options:ParserOptions for rendering. To avoid confusing the parser cache, the output can only depend on parameters provided to this hook function, not on global state. $generateHtml:boolean, indicating whether full HTML should be generated. If false, generation of HTML may be skipped, but other information should still be present in the ParserOutput object. & $output:ParserOutput, to manipulate or replace 'ContentHandlerDefaultModelFor':Called when the default content model is determined for a given title. May be used to assign a different model for that title. $title:the Title in question & $model:the model name. Use with CONTENT_MODEL_XXX constants. 'ContentHandlerForModelID':Called when a ContentHandler is requested for a given content model name, but no entry for that model exists in $wgContentHandlers. Note:if your extension implements additional models via this hook, please use GetContentModels hook to make them known to core. $modeName:the requested content model name & $handler:set this to a ContentHandler object, if desired. 'ContentModelCanBeUsedOn':Called to determine whether that content model can be used on a given page. This is especially useful to prevent some content models to be used in some special location. $contentModel:ID of the content model in question $title:the Title in question. & $ok:Output parameter, whether it is OK to use $contentModel on $title. Handler functions that modify $ok should generally return false to prevent further hooks from further modifying $ok. 'ContribsPager::getQueryInfo':Before the contributions query is about to run & $pager:Pager object for contributions & $queryInfo:The query for the contribs Pager 'ContribsPager::reallyDoQuery':Called before really executing the query for My Contributions & $data:an array of results of all contribs queries $pager:The ContribsPager object hooked into $offset:Index offset, inclusive $limit:Exact query limit $descending:Query direction, false for ascending, true for descending 'ContributionsLineEnding':Called before a contributions HTML line is finished $page:SpecialPage object for contributions & $ret:the HTML line $row:the DB row for this line & $classes:the classes to add to the surrounding< li > & $attribs:associative array of other HTML attributes for the< li > element. Currently only data attributes reserved to MediaWiki are allowed(see Sanitizer::isReservedDataAttribute). 'ContributionsToolLinks':Change tool links above Special:Contributions $id:User identifier $title:User page title & $tools:Array of tool links $specialPage:SpecialPage instance for context and services. Can be either SpecialContributions or DeletedContributionsPage. Extensions should type hint against a generic SpecialPage though. 'ConvertContent':Called by AbstractContent::convert when a conversion to another content model is requested. Handler functions that modify $result should generally return false to disable further attempts at conversion. $content:The Content object to be converted. $toModel:The ID of the content model to convert to. $lossy:boolean indicating whether lossy conversion is allowed. & $result:Output parameter, in case the handler function wants to provide a converted Content object. Note that $result->getContentModel() must return $toModel. 'ContentSecurityPolicyDefaultSource':Modify the allowed CSP load sources. This affects all directives except for the script directive. If you want to add a script source, see ContentSecurityPolicyScriptSource hook. & $defaultSrc:Array of Content-Security-Policy allowed sources $policyConfig:Current configuration for the Content-Security-Policy header $mode:ContentSecurityPolicy::REPORT_ONLY_MODE or ContentSecurityPolicy::FULL_MODE depending on type of header 'ContentSecurityPolicyDirectives':Modify the content security policy directives. Use this only if ContentSecurityPolicyDefaultSource and ContentSecurityPolicyScriptSource do not meet your needs. & $directives:Array of CSP directives $policyConfig:Current configuration for the CSP header $mode:ContentSecurityPolicy::REPORT_ONLY_MODE or ContentSecurityPolicy::FULL_MODE depending on type of header 'ContentSecurityPolicyScriptSource':Modify the allowed CSP script sources. Note that you also have to use ContentSecurityPolicyDefaultSource if you want non-script sources to be loaded from whatever you add. & $scriptSrc:Array of CSP directives $policyConfig:Current configuration for the CSP header $mode:ContentSecurityPolicy::REPORT_ONLY_MODE or ContentSecurityPolicy::FULL_MODE depending on type of header 'CustomEditor':When invoking the page editor Return true to allow the normal editor to be used, or false if implementing a custom editor, e.g. for a special namespace, etc. $article:Article being edited $user:User performing the edit 'DeletedContribsPager::reallyDoQuery':Called before really executing the query for Special:DeletedContributions Similar to ContribsPager::reallyDoQuery & $data:an array of results of all contribs queries $pager:The DeletedContribsPager object hooked into $offset:Index offset, inclusive $limit:Exact query limit $descending:Query direction, false for ascending, true for descending 'DeletedContributionsLineEnding':Called before a DeletedContributions HTML line is finished. Similar to ContributionsLineEnding $page:SpecialPage object for DeletedContributions & $ret:the HTML line $row:the DB row for this line & $classes:the classes to add to the surrounding< li > & $attribs:associative array of other HTML attributes for the< li > element. Currently only data attributes reserved to MediaWiki are allowed(see Sanitizer::isReservedDataAttribute). 'DeleteUnknownPreferences':Called by the cleanupPreferences.php maintenance script to build a WHERE clause with which to delete preferences that are not known about. This hook is used by extensions that have dynamically-named preferences that should not be deleted in the usual cleanup process. For example, the Gadgets extension creates preferences prefixed with 'gadget-', and so anything with that prefix is excluded from the deletion. &where:An array that will be passed as the $cond parameter to IDatabase::select() to determine what will be deleted from the user_properties table. $db:The IDatabase object, useful for accessing $db->buildLike() etc. 'DifferenceEngineAfterLoadNewText':called in DifferenceEngine::loadNewText() after the new revision 's content has been loaded into the class member variable $differenceEngine->mNewContent but before returning true from this function. $differenceEngine:DifferenceEngine object 'DifferenceEngineLoadTextAfterNewContentIsLoaded':called in DifferenceEngine::loadText() after the new revision 's content has been loaded into the class member variable $differenceEngine->mNewContent but before checking if the variable 's value is null. This hook can be used to inject content into said class member variable. $differenceEngine:DifferenceEngine object 'DifferenceEngineMarkPatrolledLink':Allows extensions to change the "mark as patrolled" link which is shown both on the diff header as well as on the bottom of a page, usually wrapped in a span element which has class="patrollink". $differenceEngine:DifferenceEngine object & $markAsPatrolledLink:The "mark as patrolled" link HTML(string) $rcid:Recent change ID(rc_id) for this change(int) 'DifferenceEngineMarkPatrolledRCID':Allows extensions to possibly change the rcid parameter. For example the rcid might be set to zero due to the user being the same as the performer of the change but an extension might still want to show it under certain conditions. & $rcid:rc_id(int) of the change or 0 $differenceEngine:DifferenceEngine object $change:RecentChange object $user:User object representing the current user 'DifferenceEngineNewHeader':Allows extensions to change the $newHeader variable, which contains information about the new revision, such as the revision 's author, whether the revision was marked as a minor edit or not, etc. $differenceEngine:DifferenceEngine object & $newHeader:The string containing the various #mw-diff-otitle[1-5] divs, which include things like revision author info, revision comment, RevisionDelete link and more $formattedRevisionTools:Array containing revision tools, some of which may have been injected with the DiffRevisionTools hook $nextlink:String containing the link to the next revision(if any) $status
Definition: hooks.txt:1263
wfTimestamp( $outputtype=TS_UNIX, $ts=0)
Get a timestamp string in one of various formats.
startExternalParse(Title $title=null, ParserOptions $options, $outputType, $clearState=true)
Set up some variables which are usually set up in parse() so that an external function can call some ...
Definition: Parser.php:4848
fetchFileAndTitle( $title, $options=[])
Fetch a file and its title and register a reference to it.
Definition: Parser.php:3859
static validateTagAttributes( $attribs, $element)
Take an array of attribute names and values and normalize or discard illegal values for the given ele...
Definition: Sanitizer.php:791
const NO_TEMPLATES
Definition: PPFrame.php:27
static parseWidthParam( $value, $parseHeight=true)
Parsed a width param of imagelink like 300px or 200x300px.
Definition: Parser.php:6351
$mVarCache
Definition: Parser.php:153
disableCache()
Set a flag in the output object indicating that the content is dynamic and shouldn&#39;t be cached...
Definition: Parser.php:5643
$mRevisionObject
Definition: Parser.php:231
Title $mTitle
Definition: Parser.php:228
static stripAllTags( $html)
Take a fragment of (potentially invalid) HTML and return a version with any tags removed, encoded as plain text.
Definition: Sanitizer.php:1995
getPreloadText( $text, Title $title, ParserOptions $options, $params=[])
Process the wikitext for the "?preload=" feature.
Definition: Parser.php:845
makeImage( $title, $options, $holders=false)
Parse image options text and use it to make an image.
Definition: Parser.php:5335
__destruct()
Reduce memory usage to reduce the impact of circular references.
Definition: Parser.php:388
wfMatchesDomainList( $url, $domains)
Check whether a given URL has a domain that occurs in a given set of domains.
getRevisionTimestamp()
Get the timestamp associated with the current revision, adjusted for the default server-local timesta...
Definition: Parser.php:5965
wfUrlencode( $s)
We want some things to be included as literal characters in our title URLs for prettiness, which urlencode encodes by default.
getImageParams( $handler)
Definition: Parser.php:5281
fetchCurrentRevisionOfTitle( $title)
Fetch the current revision of a given title.
Definition: Parser.php:3680
either a unescaped string or a HtmlArmor object after in associative array form externallinks including delete and has completed for all link tables whether this was an auto creation use $formDescriptor instead default is conds Array Extra conditions for the No matching items in log is displayed if loglist is empty msgKey Array If you want a nice box with a set this to the key of the message First element is the message additional optional elements are parameters for the key that are processed with wfMessage() -> params() ->parseAsBlock() - offset Set to overwrite offset parameter in $wgRequest set to '' to unset offset - wrap String Wrap the message in html(usually something like "&lt
Factory for handling the special page list and generating SpecialPage objects.
static extractTagsAndParams( $elements, $text, &$matches)
Replaces all occurrences of HTML-style comments and the given tags in the text with a random marker a...
Definition: Parser.php:1098
$mRevIdForTs
Definition: Parser.php:236
setUser( $user)
Set the current user.
Definition: Parser.php:866
$mStripList
Definition: Parser.php:151
$mFunctionTagHooks
Definition: Parser.php:150
const OT_PLAIN
Definition: Defines.php:184
$wgNoFollowLinks
If true, external URL links in wiki text will be given the rel="nofollow" attribute as a hint to sear...
static makeSelfLinkObj( $nt, $html='', $query='', $trail='', $prefix='')
Make appropriate markup for a link to the current article.
Definition: Linker.php:168
static configuration should be added through ResourceLoaderGetConfigVars instead can be used to get the real title e g db for database replication lag or jobqueue for job queue size converted to pseudo seconds It is possible to add more fields and they will be returned to the user in the API response after the basic globals have been set but before ordinary actions take place or wrap services the preferred way to define a new service is the $wgServiceWiringFiles array $services
Definition: hooks.txt:2217
$mRevisionTimestamp
Definition: Parser.php:233
$mImageParams
Definition: Parser.php:154
makeLimitReport()
Set the limit report data in the current ParserOutput, and return the limit report HTML comment...
Definition: Parser.php:621
static escapeIdForAttribute( $id, $mode=self::ID_PRIMARY)
Given a section name or other user-generated or otherwise unsafe string, escapes it to be a valid HTM...
Definition: Sanitizer.php:1288
static newKnownCurrent(IDatabase $db, $pageIdOrTitle, $revId=0)
Load a revision based on a known page ID and current revision ID from the DB.
Definition: Revision.php:1325
const VERSION
Update this version number when the ParserOutput format changes in an incompatible way...
Definition: Parser.php:77
const OT_WIKI
Definition: Defines.php:181
Preprocessor $mPreprocessor
Definition: Parser.php:185
getPreprocessor()
Get a preprocessor object.
Definition: Parser.php:1033
const NS_MEDIA
Definition: Defines.php:48
replaceSection( $oldText, $sectionId, $newText)
This function returns $oldtext after the content of the section specified by $section has been replac...
Definition: Parser.php:5899
static getVersion( $flags='', $lang=null)
Return a string of the MediaWiki version with Git revision if available.
static singleton()
Definition: RepoGroup.php:60
static normalizeSectionName( $text)
Apply the same normalization as code making links to this section would.
Definition: Parser.php:6129
replaceTransparentTags( $text)
Replace transparent tags in $text with the values given by the callbacks.
Definition: Parser.php:5707
wfDebug( $text, $dest='all', array $context=[])
Sends a line to the debug log if enabled or, optionally, to a comment in output.
argSubstitution( $piece, $frame)
Triple brace replacement – used for template arguments.
Definition: Parser.php:3971
const RECOVER_ORIG
Definition: PPFrame.php:33
static normalizeUrlComponent( $component, $unsafe)
Definition: Parser.php:2153
static isValid( $ip)
Validate an IP address.
Definition: IP.php:111
StripState $mStripState
Definition: Parser.php:197
replaceVariables( $text, $frame=false, $argsOnly=false)
Replace magic variables, templates, and template arguments with the appropriate text.
Definition: Parser.php:3096
$mDefaultSort
Definition: Parser.php:207
getUser()
Get a User object either from $this->mUser, if set, or from the ParserOptions object otherwise...
Definition: Parser.php:1021
wfTimestampNow()
Convenience function; returns MediaWiki timestamp for the present time.
static explode( $separator, $subject)
Workalike for explode() with limited memory usage.
getStripList()
Get a list of strippable XML-like elements.
Definition: Parser.php:1161
const EXT_IMAGE_REGEX
Definition: Parser.php:101
startParse(Title $title=null, ParserOptions $options, $outputType, $clearState=true)
Definition: Parser.php:4860
$cache
Definition: mcc.php:33
static tocLine( $anchor, $tocline, $tocnumber, $level, $sectionIndex=false)
parameter level defines if we are on an indentation level
Definition: Linker.php:1638
$params
replaceInternalLinks( $s)
Process [[ ]] wikilinks.
Definition: Parser.php:2235
const NS_CATEGORY
Definition: Defines.php:74
setFunctionHook( $id, callable $callback, $flags=0)
Create a function, e.g.
Definition: Parser.php:5017
null means default in associative array with keys and values unescaped Should be merged with default with a value of false meaning to suppress the attribute in associative array with keys and values unescaped & $options
Definition: hooks.txt:1982
doQuotes( $text)
Helper function for doAllQuotes()
Definition: Parser.php:1780
static makeExternalLink( $url, $text, $escape=true, $linktype='', $attribs=[], $title=null)
Make an external link.
Definition: Linker.php:850
extractSections( $text, $sectionId, $mode, $newText='')
Break wikitext input into sections, and either pull or replace some particular section&#39;s text...
Definition: Parser.php:5758
setOutputType( $ot)
Set the output type.
Definition: Parser.php:912
and(b) You must cause any modified files to carry prominent notices stating that You changed the files
getRevisionSize()
Get the size of the revision.
Definition: Parser.php:6005
$mImageParamsMagicArray
Definition: Parser.php:155
LinkHolderArray $mLinkHolders
Definition: Parser.php:203
$wgNoFollowDomainExceptions
If this is set to an array of domains, external links to these domain names (or any subdomains) will ...
null means default in associative array with keys and values unescaped Should be merged with default with a value of false meaning to suppress the attribute in associative array with keys and values unescaped noclasses just before the function returns a value If you return an< a > element with HTML attributes $attribs and contents $html will be returned If you return $ret will be returned and may include noclasses after processing & $attribs
Definition: hooks.txt:1982
deferred txt A few of the database updates required by various functions here can be deferred until after the result page is displayed to the user For updating the view updating the linked to tables after a save
Definition: deferred.txt:4
as see the revision history and available at free of to any person obtaining a copy of this software and associated documentation to deal in the Software without including without limitation the rights to and or sell copies of the and to permit persons to whom the Software is furnished to do so
Definition: LICENSE.txt:10
getMagicWordFactory()
Get the MagicWordFactory that this Parser is using.
Definition: Parser.php:1065
Some information about database access in MediaWiki By Tim January Database layout For information about the MediaWiki database such as a description of the tables and their please see
Definition: database.txt:2
static splitWhitespace( $s)
Return a three-element array: leading whitespace, string contents, trailing whitespace.
Definition: Parser.php:3063
this hook is for auditing only or null if authentication failed before getting that far or null if we can t even determine that When $user is not null
Definition: hooks.txt:780
getTargetLanguage()
Get the target language for the content being parsed.
Definition: Parser.php:992
$buffer
static decodeTagAttributes( $text)
Return an associative array of attribute names and values from a partial tag string.
Definition: Sanitizer.php:1443
internalParseHalfParsed( $text, $isMain=true, $linestart=true)
Helper function for parse() that transforms half-parsed HTML into fully parsed HTML.
Definition: Parser.php:1470
namespace and then decline to actually register it file or subcat img or subcat $title
Definition: hooks.txt:925
getConverterLanguage()
Get the language object for language conversion.
Definition: Parser.php:1011
$mInputSize
Definition: Parser.php:237
magicword txt Magic Words are some phrases used in the wikitext They are used for two things
Definition: magicword.txt:4
formatHeadings( $text, $origText, $isMain=true)
This function accomplishes several tasks: 1) Auto-number headings if that option is enabled 2) Add an...
Definition: Parser.php:4225
getUserSig(&$user, $nickname=false, $fancySig=null)
Fetch the user&#39;s signature text, if any, and normalize to validated, ready-to-insert wikitext...
Definition: Parser.php:4736
const HALF_PARSED_VERSION
Update this version number when the output of serialiseHalfParsedText() changes in an incompatible wa...
Definition: Parser.php:83
const NS_FILE
Definition: Defines.php:66
firstCallInit()
Do various kinds of initialisation on the first call of the parser.
Definition: Parser.php:434
static makeAnchor( $sectionName)
Definition: Parser.php:6066
static getTitleFor( $name, $subpage=false, $fragment='')
Get a localised Title object for a specified special page name If you don&#39;t need a full Title object...
Definition: SpecialPage.php:83
static normalizeCharReferences( $text)
Ensure that any entities and character references are legal for XML and XHTML specifically.
Definition: Sanitizer.php:1562
const PTD_FOR_INCLUSION
Definition: Parser.php:108
isValidHalfParsedText( $data)
Returns true if the given array, presumed to be generated by serializeHalfParsedText(), is compatible with the current version of the parser.
Definition: Parser.php:6337
null means default in associative array with keys and values unescaped Should be merged with default with a value of false meaning to suppress the attribute in associative array with keys and values unescaped broken
Definition: hooks.txt:1982
doDoubleUnderscore( $text)
Strip double-underscore items like NOGALLERY and NOTOC Fills $this->mDoubleUnderscores, returns the modified text.
Definition: Parser.php:4152
presenting them properly to the user as errors is done by the caller return true use this to change the list i e etc $rev
Definition: hooks.txt:1766
renderImageGallery( $text, $params)
Renders an image gallery from a text with one line per image.
Definition: Parser.php:5118
This document is intended to provide useful advice for parties seeking to redistribute MediaWiki to end users It s targeted particularly at maintainers for Linux since it s been observed that distribution packages of MediaWiki often break We ve consistently had to recommend that users seeking support use official tarballs instead of their distribution s and this often solves whatever problem the user is having It would be nice if this could such as
Definition: distributors.txt:9
$mTagHooks
Definition: Parser.php:146
NamespaceInfo $nsInfo
Definition: Parser.php:293
fetchTemplateAndTitle( $title)
Fetch the unparsed text of a template and register a reference to it.
Definition: Parser.php:3714
const NS_MEDIAWIKI
Definition: Defines.php:68
MagicWordFactory $magicWordFactory
Definition: Parser.php:269
if(defined( 'MW_SETUP_CALLBACK')) $fname
Customization point after all loading (constants, functions, classes, DefaultSettings, LocalSettings).
Definition: Setup.php:123
static doBlockLevels( $text, $lineStart)
Make lists from lines starting with &#39;:&#39;, &#39;*&#39;, &#39;#&#39;, etc.
enableOOUI()
Set&#39;s up the PHP implementation of OOUI for use in this request and instructs OutputPage to enable OO...
Definition: Parser.php:6441
testSrvus( $text, Title $title, ParserOptions $options, $outputType=self::OT_HTML)
strip/replaceVariables/unstrip for preprocessor regression testing
Definition: Parser.php:6184
areSubpagesAllowed()
Return true if subpage links should be expanded on this page.
Definition: Parser.php:2593
const OT_HTML
Definition: Defines.php:180
addTrackingCategory( $msg)
Definition: