MediaWiki  1.27.1
Parser.php
Go to the documentation of this file.
1 <?php
67 class Parser {
73  const VERSION = '1.6.4';
74 
80 
81  # Flags for Parser::setFunctionHook
82  const SFH_NO_HASH = 1;
83  const SFH_OBJECT_ARGS = 2;
84 
85  # Constants needed for external link processing
86  # Everything except bracket, space, or control characters
87  # \p{Zs} is unicode 'separator, space' category. It covers the space 0x20
88  # as well as U+3000 is IDEOGRAPHIC SPACE for bug 19052
89  const EXT_LINK_URL_CLASS = '[^][<>"\\x00-\\x20\\x7F\p{Zs}]';
90  # Simplified expression to match an IPv4 or IPv6 address, or
91  # at least one character of a host name (embeds EXT_LINK_URL_CLASS)
92  const EXT_LINK_ADDR = '(?:[0-9.]+|\\[(?i:[0-9a-f:.]+)\\]|[^][<>"\\x00-\\x20\\x7F\p{Zs}])';
93  # RegExp to make image URLs (embeds IPv6 part of EXT_LINK_ADDR)
94  // @codingStandardsIgnoreStart Generic.Files.LineLength
95  const EXT_IMAGE_REGEX = '/^(http:\/\/|https:\/\/)((?:\\[(?i:[0-9a-f:.]+)\\])?[^][<>"\\x00-\\x20\\x7F\p{Zs}]+)
96  \\/([A-Za-z0-9_.,~%\\-+&;#*?!=()@\\x80-\\xFF]+)\\.((?i)gif|png|jpg|jpeg)$/Sxu';
97  // @codingStandardsIgnoreEnd
98 
99  # Regular expression for a non-newline space
100  const SPACE_NOT_NL = '(?:\t|&nbsp;|&\#0*160;|&\#[Xx]0*[Aa]0;|\p{Zs})';
101 
102  # State constants for the definition list colon extraction
103  const COLON_STATE_TEXT = 0;
104  const COLON_STATE_TAG = 1;
111 
112  # Flags for preprocessToDom
113  const PTD_FOR_INCLUSION = 1;
114 
115  # Allowed values for $this->mOutputType
116  # Parameter to startExternalParse().
117  const OT_HTML = 1; # like parse()
118  const OT_WIKI = 2; # like preSaveTransform()
120  const OT_MSG = 3;
121  const OT_PLAIN = 4; # like extractSections() - portions of the original are returned unchanged.
122 
140  const MARKER_SUFFIX = "-QINU`\"'\x7f";
141  const MARKER_PREFIX = "\x7f'\"`UNIQ-";
142 
143  # Markers used for wrapping the table of contents
144  const TOC_START = '<mw:toc>';
145  const TOC_END = '</mw:toc>';
146 
147  # Persistent:
148  public $mTagHooks = [];
150  public $mFunctionHooks = [];
151  public $mFunctionSynonyms = [ 0 => [], 1 => [] ];
152  public $mFunctionTagHooks = [];
153  public $mStripList = [];
154  public $mDefaultStripList = [];
155  public $mVarCache = [];
156  public $mImageParams = [];
158  public $mMarkerIndex = 0;
159  public $mFirstCall = true;
160 
161  # Initialised by initialiseVariables()
162 
166  public $mVariables;
167 
171  public $mSubstWords;
172  # Initialised in constructor
174 
175  # Initialized in getPreprocessor()
176 
178 
179  # Cleared with clearState():
180 
183  public $mOutput;
185 
189  public $mStripState;
190 
196 
197  public $mLinkID;
201  public $mExpensiveFunctionCount; # number of expensive parser function calls
203 
207  public $mUser; # User object; only used when doing pre-save transform
208 
209  # Temporary
210  # These are variables reset at least once per parse regardless of $clearState
211 
215  public $mOptions;
216 
220  public $mTitle; # Title context, used for self-link rendering and similar things
221  public $mOutputType; # Output type, one of the OT_xxx constants
222  public $ot; # Shortcut alias, see setOutputType()
223  public $mRevisionObject; # The revision object of the specified revision ID
224  public $mRevisionId; # ID to display in {{REVISIONID}} tags
225  public $mRevisionTimestamp; # The timestamp of the specified revision ID
226  public $mRevisionUser; # User to display in {{REVISIONUSER}} tag
227  public $mRevisionSize; # Size to display in {{REVISIONSIZE}} variable
228  public $mRevIdForTs; # The revision ID which was used to fetch the timestamp
229  public $mInputSize = false; # For {{PAGESIZE}} on current page.
230 
235  public $mUniqPrefix = Parser::MARKER_PREFIX;
236 
243 
251 
256  public $mInParse = false;
257 
259  protected $mProfiler;
260 
264  public function __construct( $conf = [] ) {
265  $this->mConf = $conf;
266  $this->mUrlProtocols = wfUrlProtocols();
267  $this->mExtLinkBracketedRegex = '/\[(((?i)' . $this->mUrlProtocols . ')' .
268  self::EXT_LINK_ADDR .
269  self::EXT_LINK_URL_CLASS . '*)\p{Zs}*([^\]\\x00-\\x08\\x0a-\\x1F]*?)\]/Su';
270  if ( isset( $conf['preprocessorClass'] ) ) {
271  $this->mPreprocessorClass = $conf['preprocessorClass'];
272  } elseif ( defined( 'HPHP_VERSION' ) ) {
273  # Preprocessor_Hash is much faster than Preprocessor_DOM under HipHop
274  $this->mPreprocessorClass = 'Preprocessor_Hash';
275  } elseif ( extension_loaded( 'domxml' ) ) {
276  # PECL extension that conflicts with the core DOM extension (bug 13770)
277  wfDebug( "Warning: you have the obsolete domxml extension for PHP. Please remove it!\n" );
278  $this->mPreprocessorClass = 'Preprocessor_Hash';
279  } elseif ( extension_loaded( 'dom' ) ) {
280  $this->mPreprocessorClass = 'Preprocessor_DOM';
281  } else {
282  $this->mPreprocessorClass = 'Preprocessor_Hash';
283  }
284  wfDebug( __CLASS__ . ": using preprocessor: {$this->mPreprocessorClass}\n" );
285  }
286 
290  public function __destruct() {
291  if ( isset( $this->mLinkHolders ) ) {
292  unset( $this->mLinkHolders );
293  }
294  foreach ( $this as $name => $value ) {
295  unset( $this->$name );
296  }
297  }
298 
302  public function __clone() {
303  $this->mInParse = false;
304 
305  // Bug 56226: When you create a reference "to" an object field, that
306  // makes the object field itself be a reference too (until the other
307  // reference goes out of scope). When cloning, any field that's a
308  // reference is copied as a reference in the new object. Both of these
309  // are defined PHP5 behaviors, as inconvenient as it is for us when old
310  // hooks from PHP4 days are passing fields by reference.
311  foreach ( [ 'mStripState', 'mVarCache' ] as $k ) {
312  // Make a non-reference copy of the field, then rebind the field to
313  // reference the new copy.
314  $tmp = $this->$k;
315  $this->$k =& $tmp;
316  unset( $tmp );
317  }
318 
319  Hooks::run( 'ParserCloned', [ $this ] );
320  }
321 
325  public function firstCallInit() {
326  if ( !$this->mFirstCall ) {
327  return;
328  }
329  $this->mFirstCall = false;
330 
332  CoreTagHooks::register( $this );
333  $this->initialiseVariables();
334 
335  Hooks::run( 'ParserFirstCallInit', [ &$this ] );
336  }
337 
343  public function clearState() {
344  if ( $this->mFirstCall ) {
345  $this->firstCallInit();
346  }
347  $this->mOutput = new ParserOutput;
348  $this->mOptions->registerWatcher( [ $this->mOutput, 'recordOption' ] );
349  $this->mAutonumber = 0;
350  $this->mLastSection = '';
351  $this->mDTopen = false;
352  $this->mIncludeCount = [];
353  $this->mArgStack = false;
354  $this->mInPre = false;
355  $this->mLinkHolders = new LinkHolderArray( $this );
356  $this->mLinkID = 0;
357  $this->mRevisionObject = $this->mRevisionTimestamp =
358  $this->mRevisionId = $this->mRevisionUser = $this->mRevisionSize = null;
359  $this->mVarCache = [];
360  $this->mUser = null;
361  $this->mLangLinkLanguages = [];
362  $this->currentRevisionCache = null;
363 
364  $this->mStripState = new StripState;
365 
366  # Clear these on every parse, bug 4549
367  $this->mTplRedirCache = $this->mTplDomCache = [];
368 
369  $this->mShowToc = true;
370  $this->mForceTocPosition = false;
371  $this->mIncludeSizes = [
372  'post-expand' => 0,
373  'arg' => 0,
374  ];
375  $this->mPPNodeCount = 0;
376  $this->mGeneratedPPNodeCount = 0;
377  $this->mHighestExpansionDepth = 0;
378  $this->mDefaultSort = false;
379  $this->mHeadings = [];
380  $this->mDoubleUnderscores = [];
381  $this->mExpensiveFunctionCount = 0;
382 
383  # Fix cloning
384  if ( isset( $this->mPreprocessor ) && $this->mPreprocessor->parser !== $this ) {
385  $this->mPreprocessor = null;
386  }
387 
388  $this->mProfiler = new SectionProfiler();
389 
390  Hooks::run( 'ParserClearState', [ &$this ] );
391  }
392 
405  public function parse( $text, Title $title, ParserOptions $options,
406  $linestart = true, $clearState = true, $revid = null
407  ) {
413  global $wgShowHostnames;
414 
415  if ( $clearState ) {
416  // We use U+007F DELETE to construct strip markers, so we have to make
417  // sure that this character does not occur in the input text.
418  $text = strtr( $text, "\x7f", "?" );
419  $magicScopeVariable = $this->lock();
420  }
421 
422  $this->startParse( $title, $options, self::OT_HTML, $clearState );
423 
424  $this->currentRevisionCache = null;
425  $this->mInputSize = strlen( $text );
426  if ( $this->mOptions->getEnableLimitReport() ) {
427  $this->mOutput->resetParseStartTime();
428  }
429 
430  $oldRevisionId = $this->mRevisionId;
431  $oldRevisionObject = $this->mRevisionObject;
432  $oldRevisionTimestamp = $this->mRevisionTimestamp;
433  $oldRevisionUser = $this->mRevisionUser;
434  $oldRevisionSize = $this->mRevisionSize;
435  if ( $revid !== null ) {
436  $this->mRevisionId = $revid;
437  $this->mRevisionObject = null;
438  $this->mRevisionTimestamp = null;
439  $this->mRevisionUser = null;
440  $this->mRevisionSize = null;
441  }
442 
443  Hooks::run( 'ParserBeforeStrip', [ &$this, &$text, &$this->mStripState ] );
444  # No more strip!
445  Hooks::run( 'ParserAfterStrip', [ &$this, &$text, &$this->mStripState ] );
446  $text = $this->internalParse( $text );
447  Hooks::run( 'ParserAfterParse', [ &$this, &$text, &$this->mStripState ] );
448 
449  $text = $this->internalParseHalfParsed( $text, true, $linestart );
450 
458  if ( !( $options->getDisableTitleConversion()
459  || isset( $this->mDoubleUnderscores['nocontentconvert'] )
460  || isset( $this->mDoubleUnderscores['notitleconvert'] )
461  || $this->mOutput->getDisplayTitle() !== false )
462  ) {
463  $convruletitle = $this->getConverterLanguage()->getConvRuleTitle();
464  if ( $convruletitle ) {
465  $this->mOutput->setTitleText( $convruletitle );
466  } else {
467  $titleText = $this->getConverterLanguage()->convertTitle( $title );
468  $this->mOutput->setTitleText( $titleText );
469  }
470  }
471 
472  if ( $this->mExpensiveFunctionCount > $this->mOptions->getExpensiveParserFunctionLimit() ) {
473  $this->limitationWarn( 'expensive-parserfunction',
474  $this->mExpensiveFunctionCount,
475  $this->mOptions->getExpensiveParserFunctionLimit()
476  );
477  }
478 
479  # Information on include size limits, for the benefit of users who try to skirt them
480  if ( $this->mOptions->getEnableLimitReport() ) {
481  $max = $this->mOptions->getMaxIncludeSize();
482 
483  $cpuTime = $this->mOutput->getTimeSinceStart( 'cpu' );
484  if ( $cpuTime !== null ) {
485  $this->mOutput->setLimitReportData( 'limitreport-cputime',
486  sprintf( "%.3f", $cpuTime )
487  );
488  }
489 
490  $wallTime = $this->mOutput->getTimeSinceStart( 'wall' );
491  $this->mOutput->setLimitReportData( 'limitreport-walltime',
492  sprintf( "%.3f", $wallTime )
493  );
494 
495  $this->mOutput->setLimitReportData( 'limitreport-ppvisitednodes',
496  [ $this->mPPNodeCount, $this->mOptions->getMaxPPNodeCount() ]
497  );
498  $this->mOutput->setLimitReportData( 'limitreport-ppgeneratednodes',
499  [ $this->mGeneratedPPNodeCount, $this->mOptions->getMaxGeneratedPPNodeCount() ]
500  );
501  $this->mOutput->setLimitReportData( 'limitreport-postexpandincludesize',
502  [ $this->mIncludeSizes['post-expand'], $max ]
503  );
504  $this->mOutput->setLimitReportData( 'limitreport-templateargumentsize',
505  [ $this->mIncludeSizes['arg'], $max ]
506  );
507  $this->mOutput->setLimitReportData( 'limitreport-expansiondepth',
508  [ $this->mHighestExpansionDepth, $this->mOptions->getMaxPPExpandDepth() ]
509  );
510  $this->mOutput->setLimitReportData( 'limitreport-expensivefunctioncount',
511  [ $this->mExpensiveFunctionCount, $this->mOptions->getExpensiveParserFunctionLimit() ]
512  );
513  Hooks::run( 'ParserLimitReportPrepare', [ $this, $this->mOutput ] );
514 
515  $limitReport = "NewPP limit report\n";
516  if ( $wgShowHostnames ) {
517  $limitReport .= 'Parsed by ' . wfHostname() . "\n";
518  }
519  $limitReport .= 'Cached time: ' . $this->mOutput->getCacheTime() . "\n";
520  $limitReport .= 'Cache expiry: ' . $this->mOutput->getCacheExpiry() . "\n";
521  $limitReport .= 'Dynamic content: ' .
522  ( $this->mOutput->hasDynamicContent() ? 'true' : 'false' ) .
523  "\n";
524 
525  foreach ( $this->mOutput->getLimitReportData() as $key => $value ) {
526  if ( Hooks::run( 'ParserLimitReportFormat',
527  [ $key, &$value, &$limitReport, false, false ]
528  ) ) {
529  $keyMsg = wfMessage( $key )->inLanguage( 'en' )->useDatabase( false );
530  $valueMsg = wfMessage( [ "$key-value-text", "$key-value" ] )
531  ->inLanguage( 'en' )->useDatabase( false );
532  if ( !$valueMsg->exists() ) {
533  $valueMsg = new RawMessage( '$1' );
534  }
535  if ( !$keyMsg->isDisabled() && !$valueMsg->isDisabled() ) {
536  $valueMsg->params( $value );
537  $limitReport .= "{$keyMsg->text()}: {$valueMsg->text()}\n";
538  }
539  }
540  }
541  // Since we're not really outputting HTML, decode the entities and
542  // then re-encode the things that need hiding inside HTML comments.
543  $limitReport = htmlspecialchars_decode( $limitReport );
544  Hooks::run( 'ParserLimitReport', [ $this, &$limitReport ] );
545 
546  // Sanitize for comment. Note '‐' in the replacement is U+2010,
547  // which looks much like the problematic '-'.
548  $limitReport = str_replace( [ '-', '&' ], [ '‐', '&amp;' ], $limitReport );
549  $text .= "\n<!-- \n$limitReport-->\n";
550 
551  // Add on template profiling data
552  $dataByFunc = $this->mProfiler->getFunctionStats();
553  uasort( $dataByFunc, function ( $a, $b ) {
554  return $a['real'] < $b['real']; // descending order
555  } );
556  $profileReport = "Transclusion expansion time report (%,ms,calls,template)\n";
557  foreach ( array_slice( $dataByFunc, 0, 10 ) as $item ) {
558  $profileReport .= sprintf( "%6.2f%% %8.3f %6d - %s\n",
559  $item['%real'], $item['real'], $item['calls'],
560  htmlspecialchars( $item['name'] ) );
561  }
562  $text .= "\n<!-- \n$profileReport-->\n";
563 
564  if ( $this->mGeneratedPPNodeCount > $this->mOptions->getMaxGeneratedPPNodeCount() / 10 ) {
565  wfDebugLog( 'generated-pp-node-count', $this->mGeneratedPPNodeCount . ' ' .
566  $this->mTitle->getPrefixedDBkey() );
567  }
568  }
569  $this->mOutput->setText( $text );
570 
571  $this->mRevisionId = $oldRevisionId;
572  $this->mRevisionObject = $oldRevisionObject;
573  $this->mRevisionTimestamp = $oldRevisionTimestamp;
574  $this->mRevisionUser = $oldRevisionUser;
575  $this->mRevisionSize = $oldRevisionSize;
576  $this->mInputSize = false;
577  $this->currentRevisionCache = null;
578 
579  return $this->mOutput;
580  }
581 
604  public function recursiveTagParse( $text, $frame = false ) {
605  Hooks::run( 'ParserBeforeStrip', [ &$this, &$text, &$this->mStripState ] );
606  Hooks::run( 'ParserAfterStrip', [ &$this, &$text, &$this->mStripState ] );
607  $text = $this->internalParse( $text, false, $frame );
608  return $text;
609  }
610 
628  public function recursiveTagParseFully( $text, $frame = false ) {
629  $text = $this->recursiveTagParse( $text, $frame );
630  $text = $this->internalParseHalfParsed( $text, false );
631  return $text;
632  }
633 
645  public function preprocess( $text, Title $title = null,
646  ParserOptions $options, $revid = null, $frame = false
647  ) {
648  $magicScopeVariable = $this->lock();
649  $this->startParse( $title, $options, self::OT_PREPROCESS, true );
650  if ( $revid !== null ) {
651  $this->mRevisionId = $revid;
652  }
653  Hooks::run( 'ParserBeforeStrip', [ &$this, &$text, &$this->mStripState ] );
654  Hooks::run( 'ParserAfterStrip', [ &$this, &$text, &$this->mStripState ] );
655  $text = $this->replaceVariables( $text, $frame );
656  $text = $this->mStripState->unstripBoth( $text );
657  return $text;
658  }
659 
669  public function recursivePreprocess( $text, $frame = false ) {
670  $text = $this->replaceVariables( $text, $frame );
671  $text = $this->mStripState->unstripBoth( $text );
672  return $text;
673  }
674 
688  public function getPreloadText( $text, Title $title, ParserOptions $options, $params = [] ) {
689  $msg = new RawMessage( $text );
690  $text = $msg->params( $params )->plain();
691 
692  # Parser (re)initialisation
693  $magicScopeVariable = $this->lock();
694  $this->startParse( $title, $options, self::OT_PLAIN, true );
695 
697  $dom = $this->preprocessToDom( $text, self::PTD_FOR_INCLUSION );
698  $text = $this->getPreprocessor()->newFrame()->expand( $dom, $flags );
699  $text = $this->mStripState->unstripBoth( $text );
700  return $text;
701  }
702 
709  public static function getRandomString() {
710  wfDeprecated( __METHOD__, '1.26' );
711  return wfRandomString( 16 );
712  }
713 
720  public function setUser( $user ) {
721  $this->mUser = $user;
722  }
723 
730  public function uniqPrefix() {
731  wfDeprecated( __METHOD__, '1.26' );
732  return self::MARKER_PREFIX;
733  }
734 
740  public function setTitle( $t ) {
741  if ( !$t ) {
742  $t = Title::newFromText( 'NO TITLE' );
743  }
744 
745  if ( $t->hasFragment() ) {
746  # Strip the fragment to avoid various odd effects
747  $this->mTitle = $t->createFragmentTarget( '' );
748  } else {
749  $this->mTitle = $t;
750  }
751  }
752 
758  public function getTitle() {
759  return $this->mTitle;
760  }
761 
768  public function Title( $x = null ) {
769  return wfSetVar( $this->mTitle, $x );
770  }
771 
777  public function setOutputType( $ot ) {
778  $this->mOutputType = $ot;
779  # Shortcut alias
780  $this->ot = [
781  'html' => $ot == self::OT_HTML,
782  'wiki' => $ot == self::OT_WIKI,
783  'pre' => $ot == self::OT_PREPROCESS,
784  'plain' => $ot == self::OT_PLAIN,
785  ];
786  }
787 
794  public function OutputType( $x = null ) {
795  return wfSetVar( $this->mOutputType, $x );
796  }
797 
803  public function getOutput() {
804  return $this->mOutput;
805  }
806 
812  public function getOptions() {
813  return $this->mOptions;
814  }
815 
822  public function Options( $x = null ) {
823  return wfSetVar( $this->mOptions, $x );
824  }
825 
829  public function nextLinkID() {
830  return $this->mLinkID++;
831  }
832 
836  public function setLinkID( $id ) {
837  $this->mLinkID = $id;
838  }
839 
844  public function getFunctionLang() {
845  return $this->getTargetLanguage();
846  }
847 
857  public function getTargetLanguage() {
858  $target = $this->mOptions->getTargetLanguage();
859 
860  if ( $target !== null ) {
861  return $target;
862  } elseif ( $this->mOptions->getInterfaceMessage() ) {
863  return $this->mOptions->getUserLangObj();
864  } elseif ( is_null( $this->mTitle ) ) {
865  throw new MWException( __METHOD__ . ': $this->mTitle is null' );
866  }
867 
868  return $this->mTitle->getPageLanguage();
869  }
870 
875  public function getConverterLanguage() {
876  return $this->getTargetLanguage();
877  }
878 
885  public function getUser() {
886  if ( !is_null( $this->mUser ) ) {
887  return $this->mUser;
888  }
889  return $this->mOptions->getUser();
890  }
891 
897  public function getPreprocessor() {
898  if ( !isset( $this->mPreprocessor ) ) {
899  $class = $this->mPreprocessorClass;
900  $this->mPreprocessor = new $class( $this );
901  }
902  return $this->mPreprocessor;
903  }
904 
926  public static function extractTagsAndParams( $elements, $text, &$matches, $uniq_prefix = null ) {
927  if ( $uniq_prefix !== null ) {
928  wfDeprecated( __METHOD__ . ' called with $prefix argument', '1.26' );
929  }
930  static $n = 1;
931  $stripped = '';
932  $matches = [];
933 
934  $taglist = implode( '|', $elements );
935  $start = "/<($taglist)(\\s+[^>]*?|\\s*?)(\/?" . ">)|<(!--)/i";
936 
937  while ( $text != '' ) {
938  $p = preg_split( $start, $text, 2, PREG_SPLIT_DELIM_CAPTURE );
939  $stripped .= $p[0];
940  if ( count( $p ) < 5 ) {
941  break;
942  }
943  if ( count( $p ) > 5 ) {
944  # comment
945  $element = $p[4];
946  $attributes = '';
947  $close = '';
948  $inside = $p[5];
949  } else {
950  # tag
951  $element = $p[1];
952  $attributes = $p[2];
953  $close = $p[3];
954  $inside = $p[4];
955  }
956 
957  $marker = self::MARKER_PREFIX . "-$element-" . sprintf( '%08X', $n++ ) . self::MARKER_SUFFIX;
958  $stripped .= $marker;
959 
960  if ( $close === '/>' ) {
961  # Empty element tag, <tag />
962  $content = null;
963  $text = $inside;
964  $tail = null;
965  } else {
966  if ( $element === '!--' ) {
967  $end = '/(-->)/';
968  } else {
969  $end = "/(<\\/$element\\s*>)/i";
970  }
971  $q = preg_split( $end, $inside, 2, PREG_SPLIT_DELIM_CAPTURE );
972  $content = $q[0];
973  if ( count( $q ) < 3 ) {
974  # No end tag -- let it run out to the end of the text.
975  $tail = '';
976  $text = '';
977  } else {
978  $tail = $q[1];
979  $text = $q[2];
980  }
981  }
982 
983  $matches[$marker] = [ $element,
984  $content,
985  Sanitizer::decodeTagAttributes( $attributes ),
986  "<$element$attributes$close$content$tail" ];
987  }
988  return $stripped;
989  }
990 
996  public function getStripList() {
997  return $this->mStripList;
998  }
999 
1009  public function insertStripItem( $text ) {
1010  $marker = self::MARKER_PREFIX . "-item-{$this->mMarkerIndex}-" . self::MARKER_SUFFIX;
1011  $this->mMarkerIndex++;
1012  $this->mStripState->addGeneral( $marker, $text );
1013  return $marker;
1014  }
1015 
1023  public function doTableStuff( $text ) {
1024 
1025  $lines = StringUtils::explode( "\n", $text );
1026  $out = '';
1027  $td_history = []; # Is currently a td tag open?
1028  $last_tag_history = []; # Save history of last lag activated (td, th or caption)
1029  $tr_history = []; # Is currently a tr tag open?
1030  $tr_attributes = []; # history of tr attributes
1031  $has_opened_tr = []; # Did this table open a <tr> element?
1032  $indent_level = 0; # indent level of the table
1033 
1034  foreach ( $lines as $outLine ) {
1035  $line = trim( $outLine );
1036 
1037  if ( $line === '' ) { # empty line, go to next line
1038  $out .= $outLine . "\n";
1039  continue;
1040  }
1041 
1042  $first_character = $line[0];
1043  $first_two = substr( $line, 0, 2 );
1044  $matches = [];
1045 
1046  if ( preg_match( '/^(:*)\s*\{\|(.*)$/', $line, $matches ) ) {
1047  # First check if we are starting a new table
1048  $indent_level = strlen( $matches[1] );
1049 
1050  $attributes = $this->mStripState->unstripBoth( $matches[2] );
1051  $attributes = Sanitizer::fixTagAttributes( $attributes, 'table' );
1052 
1053  $outLine = str_repeat( '<dl><dd>', $indent_level ) . "<table{$attributes}>";
1054  array_push( $td_history, false );
1055  array_push( $last_tag_history, '' );
1056  array_push( $tr_history, false );
1057  array_push( $tr_attributes, '' );
1058  array_push( $has_opened_tr, false );
1059  } elseif ( count( $td_history ) == 0 ) {
1060  # Don't do any of the following
1061  $out .= $outLine . "\n";
1062  continue;
1063  } elseif ( $first_two === '|}' ) {
1064  # We are ending a table
1065  $line = '</table>' . substr( $line, 2 );
1066  $last_tag = array_pop( $last_tag_history );
1067 
1068  if ( !array_pop( $has_opened_tr ) ) {
1069  $line = "<tr><td></td></tr>{$line}";
1070  }
1071 
1072  if ( array_pop( $tr_history ) ) {
1073  $line = "</tr>{$line}";
1074  }
1075 
1076  if ( array_pop( $td_history ) ) {
1077  $line = "</{$last_tag}>{$line}";
1078  }
1079  array_pop( $tr_attributes );
1080  $outLine = $line . str_repeat( '</dd></dl>', $indent_level );
1081  } elseif ( $first_two === '|-' ) {
1082  # Now we have a table row
1083  $line = preg_replace( '#^\|-+#', '', $line );
1084 
1085  # Whats after the tag is now only attributes
1086  $attributes = $this->mStripState->unstripBoth( $line );
1087  $attributes = Sanitizer::fixTagAttributes( $attributes, 'tr' );
1088  array_pop( $tr_attributes );
1089  array_push( $tr_attributes, $attributes );
1090 
1091  $line = '';
1092  $last_tag = array_pop( $last_tag_history );
1093  array_pop( $has_opened_tr );
1094  array_push( $has_opened_tr, true );
1095 
1096  if ( array_pop( $tr_history ) ) {
1097  $line = '</tr>';
1098  }
1099 
1100  if ( array_pop( $td_history ) ) {
1101  $line = "</{$last_tag}>{$line}";
1102  }
1103 
1104  $outLine = $line;
1105  array_push( $tr_history, false );
1106  array_push( $td_history, false );
1107  array_push( $last_tag_history, '' );
1108  } elseif ( $first_character === '|'
1109  || $first_character === '!'
1110  || $first_two === '|+'
1111  ) {
1112  # This might be cell elements, td, th or captions
1113  if ( $first_two === '|+' ) {
1114  $first_character = '+';
1115  $line = substr( $line, 2 );
1116  } else {
1117  $line = substr( $line, 1 );
1118  }
1119 
1120  // Implies both are valid for table headings.
1121  if ( $first_character === '!' ) {
1122  $line = StringUtils::replaceMarkup( '!!', '||', $line );
1123  }
1124 
1125  # Split up multiple cells on the same line.
1126  # FIXME : This can result in improper nesting of tags processed
1127  # by earlier parser steps.
1128  $cells = explode( '||', $line );
1129 
1130  $outLine = '';
1131 
1132  # Loop through each table cell
1133  foreach ( $cells as $cell ) {
1134  $previous = '';
1135  if ( $first_character !== '+' ) {
1136  $tr_after = array_pop( $tr_attributes );
1137  if ( !array_pop( $tr_history ) ) {
1138  $previous = "<tr{$tr_after}>\n";
1139  }
1140  array_push( $tr_history, true );
1141  array_push( $tr_attributes, '' );
1142  array_pop( $has_opened_tr );
1143  array_push( $has_opened_tr, true );
1144  }
1145 
1146  $last_tag = array_pop( $last_tag_history );
1147 
1148  if ( array_pop( $td_history ) ) {
1149  $previous = "</{$last_tag}>\n{$previous}";
1150  }
1151 
1152  if ( $first_character === '|' ) {
1153  $last_tag = 'td';
1154  } elseif ( $first_character === '!' ) {
1155  $last_tag = 'th';
1156  } elseif ( $first_character === '+' ) {
1157  $last_tag = 'caption';
1158  } else {
1159  $last_tag = '';
1160  }
1161 
1162  array_push( $last_tag_history, $last_tag );
1163 
1164  # A cell could contain both parameters and data
1165  $cell_data = explode( '|', $cell, 2 );
1166 
1167  # Bug 553: Note that a '|' inside an invalid link should not
1168  # be mistaken as delimiting cell parameters
1169  if ( strpos( $cell_data[0], '[[' ) !== false ) {
1170  $cell = "{$previous}<{$last_tag}>{$cell}";
1171  } elseif ( count( $cell_data ) == 1 ) {
1172  $cell = "{$previous}<{$last_tag}>{$cell_data[0]}";
1173  } else {
1174  $attributes = $this->mStripState->unstripBoth( $cell_data[0] );
1175  $attributes = Sanitizer::fixTagAttributes( $attributes, $last_tag );
1176  $cell = "{$previous}<{$last_tag}{$attributes}>{$cell_data[1]}";
1177  }
1178 
1179  $outLine .= $cell;
1180  array_push( $td_history, true );
1181  }
1182  }
1183  $out .= $outLine . "\n";
1184  }
1185 
1186  # Closing open td, tr && table
1187  while ( count( $td_history ) > 0 ) {
1188  if ( array_pop( $td_history ) ) {
1189  $out .= "</td>\n";
1190  }
1191  if ( array_pop( $tr_history ) ) {
1192  $out .= "</tr>\n";
1193  }
1194  if ( !array_pop( $has_opened_tr ) ) {
1195  $out .= "<tr><td></td></tr>\n";
1196  }
1197 
1198  $out .= "</table>\n";
1199  }
1200 
1201  # Remove trailing line-ending (b/c)
1202  if ( substr( $out, -1 ) === "\n" ) {
1203  $out = substr( $out, 0, -1 );
1204  }
1205 
1206  # special case: don't return empty table
1207  if ( $out === "<table>\n<tr><td></td></tr>\n</table>" ) {
1208  $out = '';
1209  }
1210 
1211  return $out;
1212  }
1213 
1226  public function internalParse( $text, $isMain = true, $frame = false ) {
1227 
1228  $origText = $text;
1229 
1230  # Hook to suspend the parser in this state
1231  if ( !Hooks::run( 'ParserBeforeInternalParse', [ &$this, &$text, &$this->mStripState ] ) ) {
1232  return $text;
1233  }
1234 
1235  # if $frame is provided, then use $frame for replacing any variables
1236  if ( $frame ) {
1237  # use frame depth to infer how include/noinclude tags should be handled
1238  # depth=0 means this is the top-level document; otherwise it's an included document
1239  if ( !$frame->depth ) {
1240  $flag = 0;
1241  } else {
1242  $flag = Parser::PTD_FOR_INCLUSION;
1243  }
1244  $dom = $this->preprocessToDom( $text, $flag );
1245  $text = $frame->expand( $dom );
1246  } else {
1247  # if $frame is not provided, then use old-style replaceVariables
1248  $text = $this->replaceVariables( $text );
1249  }
1250 
1251  Hooks::run( 'InternalParseBeforeSanitize', [ &$this, &$text, &$this->mStripState ] );
1252  $text = Sanitizer::removeHTMLtags(
1253  $text,
1254  [ &$this, 'attributeStripCallback' ],
1255  false,
1256  array_keys( $this->mTransparentTagHooks )
1257  );
1258  Hooks::run( 'InternalParseBeforeLinks', [ &$this, &$text, &$this->mStripState ] );
1259 
1260  # Tables need to come after variable replacement for things to work
1261  # properly; putting them before other transformations should keep
1262  # exciting things like link expansions from showing up in surprising
1263  # places.
1264  $text = $this->doTableStuff( $text );
1265 
1266  $text = preg_replace( '/(^|\n)-----*/', '\\1<hr />', $text );
1267 
1268  $text = $this->doDoubleUnderscore( $text );
1269 
1270  $text = $this->doHeadings( $text );
1271  $text = $this->replaceInternalLinks( $text );
1272  $text = $this->doAllQuotes( $text );
1273  $text = $this->replaceExternalLinks( $text );
1274 
1275  # replaceInternalLinks may sometimes leave behind
1276  # absolute URLs, which have to be masked to hide them from replaceExternalLinks
1277  $text = str_replace( self::MARKER_PREFIX . 'NOPARSE', '', $text );
1278 
1279  $text = $this->doMagicLinks( $text );
1280  $text = $this->formatHeadings( $text, $origText, $isMain );
1281 
1282  return $text;
1283  }
1284 
1294  private function internalParseHalfParsed( $text, $isMain = true, $linestart = true ) {
1295  $text = $this->mStripState->unstripGeneral( $text );
1296 
1297  if ( $isMain ) {
1298  Hooks::run( 'ParserAfterUnstrip', [ &$this, &$text ] );
1299  }
1300 
1301  # Clean up special characters, only run once, next-to-last before doBlockLevels
1302  $fixtags = [
1303  # french spaces, last one Guillemet-left
1304  # only if there is something before the space
1305  '/(.) (?=\\?|:|;|!|%|\\302\\273)/' => '\\1&#160;',
1306  # french spaces, Guillemet-right
1307  '/(\\302\\253) /' => '\\1&#160;',
1308  '/&#160;(!\s*important)/' => ' \\1', # Beware of CSS magic word !important, bug #11874.
1309  ];
1310  $text = preg_replace( array_keys( $fixtags ), array_values( $fixtags ), $text );
1311 
1312  $text = $this->doBlockLevels( $text, $linestart );
1313 
1314  $this->replaceLinkHolders( $text );
1315 
1323  if ( !( $this->mOptions->getDisableContentConversion()
1324  || isset( $this->mDoubleUnderscores['nocontentconvert'] ) )
1325  ) {
1326  if ( !$this->mOptions->getInterfaceMessage() ) {
1327  # The position of the convert() call should not be changed. it
1328  # assumes that the links are all replaced and the only thing left
1329  # is the <nowiki> mark.
1330  $text = $this->getConverterLanguage()->convert( $text );
1331  }
1332  }
1333 
1334  $text = $this->mStripState->unstripNoWiki( $text );
1335 
1336  if ( $isMain ) {
1337  Hooks::run( 'ParserBeforeTidy', [ &$this, &$text ] );
1338  }
1339 
1340  $text = $this->replaceTransparentTags( $text );
1341  $text = $this->mStripState->unstripGeneral( $text );
1342 
1343  $text = Sanitizer::normalizeCharReferences( $text );
1344 
1345  if ( MWTidy::isEnabled() && $this->mOptions->getTidy() ) {
1346  $text = MWTidy::tidy( $text );
1347  $this->mOutput->addModuleStyles( MWTidy::getModuleStyles() );
1348  } else {
1349  # attempt to sanitize at least some nesting problems
1350  # (bug #2702 and quite a few others)
1351  $tidyregs = [
1352  # ''Something [http://www.cool.com cool''] -->
1353  # <i>Something</i><a href="http://www.cool.com"..><i>cool></i></a>
1354  '/(<([bi])>)(<([bi])>)?([^<]*)(<\/?a[^<]*>)([^<]*)(<\/\\4>)?(<\/\\2>)/' =>
1355  '\\1\\3\\5\\8\\9\\6\\1\\3\\7\\8\\9',
1356  # fix up an anchor inside another anchor, only
1357  # at least for a single single nested link (bug 3695)
1358  '/(<a[^>]+>)([^<]*)(<a[^>]+>[^<]*)<\/a>(.*)<\/a>/' =>
1359  '\\1\\2</a>\\3</a>\\1\\4</a>',
1360  # fix div inside inline elements- doBlockLevels won't wrap a line which
1361  # contains a div, so fix it up here; replace
1362  # div with escaped text
1363  '/(<([aib]) [^>]+>)([^<]*)(<div([^>]*)>)(.*)(<\/div>)([^<]*)(<\/\\2>)/' =>
1364  '\\1\\3&lt;div\\5&gt;\\6&lt;/div&gt;\\8\\9',
1365  # remove empty italic or bold tag pairs, some
1366  # introduced by rules above
1367  '/<([bi])><\/\\1>/' => '',
1368  ];
1369 
1370  $text = preg_replace(
1371  array_keys( $tidyregs ),
1372  array_values( $tidyregs ),
1373  $text );
1374  }
1375 
1376  if ( $isMain ) {
1377  Hooks::run( 'ParserAfterTidy', [ &$this, &$text ] );
1378  }
1379 
1380  return $text;
1381  }
1382 
1394  public function doMagicLinks( $text ) {
1395  $prots = wfUrlProtocolsWithoutProtRel();
1396  $urlChar = self::EXT_LINK_URL_CLASS;
1397  $addr = self::EXT_LINK_ADDR;
1398  $space = self::SPACE_NOT_NL; # non-newline space
1399  $spdash = "(?:-|$space)"; # a dash or a non-newline space
1400  $spaces = "$space++"; # possessive match of 1 or more spaces
1401  $text = preg_replace_callback(
1402  '!(?: # Start cases
1403  (<a[ \t\r\n>].*?</a>) | # m[1]: Skip link text
1404  (<.*?>) | # m[2]: Skip stuff inside
1405  # HTML elements' . "
1406  (\b(?i:$prots)($addr$urlChar*)) | # m[3]: Free external links
1407  # m[4]: Post-protocol path
1408  \b(?:RFC|PMID) $spaces # m[5]: RFC or PMID, capture number
1409  ([0-9]+)\b |
1410  \bISBN $spaces ( # m[6]: ISBN, capture number
1411  (?: 97[89] $spdash? )? # optional 13-digit ISBN prefix
1412  (?: [0-9] $spdash? ){9} # 9 digits with opt. delimiters
1413  [0-9Xx] # check digit
1414  )\b
1415  )!xu", [ &$this, 'magicLinkCallback' ], $text );
1416  return $text;
1417  }
1418 
1424  public function magicLinkCallback( $m ) {
1425  if ( isset( $m[1] ) && $m[1] !== '' ) {
1426  # Skip anchor
1427  return $m[0];
1428  } elseif ( isset( $m[2] ) && $m[2] !== '' ) {
1429  # Skip HTML element
1430  return $m[0];
1431  } elseif ( isset( $m[3] ) && $m[3] !== '' ) {
1432  # Free external link
1433  return $this->makeFreeExternalLink( $m[0], strlen( $m[4] ) );
1434  } elseif ( isset( $m[5] ) && $m[5] !== '' ) {
1435  # RFC or PMID
1436  if ( substr( $m[0], 0, 3 ) === 'RFC' ) {
1437  $keyword = 'RFC';
1438  $urlmsg = 'rfcurl';
1439  $cssClass = 'mw-magiclink-rfc';
1440  $id = $m[5];
1441  } elseif ( substr( $m[0], 0, 4 ) === 'PMID' ) {
1442  $keyword = 'PMID';
1443  $urlmsg = 'pubmedurl';
1444  $cssClass = 'mw-magiclink-pmid';
1445  $id = $m[5];
1446  } else {
1447  throw new MWException( __METHOD__ . ': unrecognised match type "' .
1448  substr( $m[0], 0, 20 ) . '"' );
1449  }
1450  $url = wfMessage( $urlmsg, $id )->inContentLanguage()->text();
1451  return Linker::makeExternalLink( $url, "{$keyword} {$id}", true, $cssClass );
1452  } elseif ( isset( $m[6] ) && $m[6] !== '' ) {
1453  # ISBN
1454  $isbn = $m[6];
1455  $space = self::SPACE_NOT_NL; # non-newline space
1456  $isbn = preg_replace( "/$space/", ' ', $isbn );
1457  $num = strtr( $isbn, [
1458  '-' => '',
1459  ' ' => '',
1460  'x' => 'X',
1461  ] );
1462  $titleObj = SpecialPage::getTitleFor( 'Booksources', $num );
1463  return '<a href="' .
1464  htmlspecialchars( $titleObj->getLocalURL() ) .
1465  "\" class=\"internal mw-magiclink-isbn\">ISBN $isbn</a>";
1466  } else {
1467  return $m[0];
1468  }
1469  }
1470 
1480  public function makeFreeExternalLink( $url, $numPostProto ) {
1481  $trail = '';
1482 
1483  # The characters '<' and '>' (which were escaped by
1484  # removeHTMLtags()) should not be included in
1485  # URLs, per RFC 2396.
1486  # Make &nbsp; terminate a URL as well (bug T84937)
1487  $m2 = [];
1488  if ( preg_match(
1489  '/&(lt|gt|nbsp|#x0*(3[CcEe]|[Aa]0)|#0*(60|62|160));/',
1490  $url,
1491  $m2,
1492  PREG_OFFSET_CAPTURE
1493  ) ) {
1494  $trail = substr( $url, $m2[0][1] ) . $trail;
1495  $url = substr( $url, 0, $m2[0][1] );
1496  }
1497 
1498  # Move trailing punctuation to $trail
1499  $sep = ',;\.:!?';
1500  # If there is no left bracket, then consider right brackets fair game too
1501  if ( strpos( $url, '(' ) === false ) {
1502  $sep .= ')';
1503  }
1504 
1505  $urlRev = strrev( $url );
1506  $numSepChars = strspn( $urlRev, $sep );
1507  # Don't break a trailing HTML entity by moving the ; into $trail
1508  # This is in hot code, so use substr_compare to avoid having to
1509  # create a new string object for the comparison
1510  if ( $numSepChars && substr_compare( $url, ";", -$numSepChars, 1 ) === 0 ) {
1511  # more optimization: instead of running preg_match with a $
1512  # anchor, which can be slow, do the match on the reversed
1513  # string starting at the desired offset.
1514  # un-reversed regexp is: /&([a-z]+|#x[\da-f]+|#\d+)$/i
1515  if ( preg_match( '/\G([a-z]+|[\da-f]+x#|\d+#)&/i', $urlRev, $m2, 0, $numSepChars ) ) {
1516  $numSepChars--;
1517  }
1518  }
1519  if ( $numSepChars ) {
1520  $trail = substr( $url, -$numSepChars ) . $trail;
1521  $url = substr( $url, 0, -$numSepChars );
1522  }
1523 
1524  # Verify that we still have a real URL after trail removal, and
1525  # not just lone protocol
1526  if ( strlen( $trail ) >= $numPostProto ) {
1527  return $url . $trail;
1528  }
1529 
1530  $url = Sanitizer::cleanUrl( $url );
1531 
1532  # Is this an external image?
1533  $text = $this->maybeMakeExternalImage( $url );
1534  if ( $text === false ) {
1535  # Not an image, make a link
1536  $text = Linker::makeExternalLink( $url,
1537  $this->getConverterLanguage()->markNoConversion( $url, true ),
1538  true, 'free',
1539  $this->getExternalLinkAttribs( $url ) );
1540  # Register it in the output object...
1541  # Replace unnecessary URL escape codes with their equivalent characters
1542  $pasteurized = self::normalizeLinkUrl( $url );
1543  $this->mOutput->addExternalLink( $pasteurized );
1544  }
1545  return $text . $trail;
1546  }
1547 
1557  public function doHeadings( $text ) {
1558  for ( $i = 6; $i >= 1; --$i ) {
1559  $h = str_repeat( '=', $i );
1560  $text = preg_replace( "/^$h(.+)$h\\s*$/m", "<h$i>\\1</h$i>", $text );
1561  }
1562  return $text;
1563  }
1564 
1573  public function doAllQuotes( $text ) {
1574  $outtext = '';
1575  $lines = StringUtils::explode( "\n", $text );
1576  foreach ( $lines as $line ) {
1577  $outtext .= $this->doQuotes( $line ) . "\n";
1578  }
1579  $outtext = substr( $outtext, 0, -1 );
1580  return $outtext;
1581  }
1582 
1590  public function doQuotes( $text ) {
1591  $arr = preg_split( "/(''+)/", $text, -1, PREG_SPLIT_DELIM_CAPTURE );
1592  $countarr = count( $arr );
1593  if ( $countarr == 1 ) {
1594  return $text;
1595  }
1596 
1597  // First, do some preliminary work. This may shift some apostrophes from
1598  // being mark-up to being text. It also counts the number of occurrences
1599  // of bold and italics mark-ups.
1600  $numbold = 0;
1601  $numitalics = 0;
1602  for ( $i = 1; $i < $countarr; $i += 2 ) {
1603  $thislen = strlen( $arr[$i] );
1604  // If there are ever four apostrophes, assume the first is supposed to
1605  // be text, and the remaining three constitute mark-up for bold text.
1606  // (bug 13227: ''''foo'''' turns into ' ''' foo ' ''')
1607  if ( $thislen == 4 ) {
1608  $arr[$i - 1] .= "'";
1609  $arr[$i] = "'''";
1610  $thislen = 3;
1611  } elseif ( $thislen > 5 ) {
1612  // If there are more than 5 apostrophes in a row, assume they're all
1613  // text except for the last 5.
1614  // (bug 13227: ''''''foo'''''' turns into ' ''''' foo ' ''''')
1615  $arr[$i - 1] .= str_repeat( "'", $thislen - 5 );
1616  $arr[$i] = "'''''";
1617  $thislen = 5;
1618  }
1619  // Count the number of occurrences of bold and italics mark-ups.
1620  if ( $thislen == 2 ) {
1621  $numitalics++;
1622  } elseif ( $thislen == 3 ) {
1623  $numbold++;
1624  } elseif ( $thislen == 5 ) {
1625  $numitalics++;
1626  $numbold++;
1627  }
1628  }
1629 
1630  // If there is an odd number of both bold and italics, it is likely
1631  // that one of the bold ones was meant to be an apostrophe followed
1632  // by italics. Which one we cannot know for certain, but it is more
1633  // likely to be one that has a single-letter word before it.
1634  if ( ( $numbold % 2 == 1 ) && ( $numitalics % 2 == 1 ) ) {
1635  $firstsingleletterword = -1;
1636  $firstmultiletterword = -1;
1637  $firstspace = -1;
1638  for ( $i = 1; $i < $countarr; $i += 2 ) {
1639  if ( strlen( $arr[$i] ) == 3 ) {
1640  $x1 = substr( $arr[$i - 1], -1 );
1641  $x2 = substr( $arr[$i - 1], -2, 1 );
1642  if ( $x1 === ' ' ) {
1643  if ( $firstspace == -1 ) {
1644  $firstspace = $i;
1645  }
1646  } elseif ( $x2 === ' ' ) {
1647  $firstsingleletterword = $i;
1648  // if $firstsingleletterword is set, we don't
1649  // look at the other options, so we can bail early.
1650  break;
1651  } else {
1652  if ( $firstmultiletterword == -1 ) {
1653  $firstmultiletterword = $i;
1654  }
1655  }
1656  }
1657  }
1658 
1659  // If there is a single-letter word, use it!
1660  if ( $firstsingleletterword > -1 ) {
1661  $arr[$firstsingleletterword] = "''";
1662  $arr[$firstsingleletterword - 1] .= "'";
1663  } elseif ( $firstmultiletterword > -1 ) {
1664  // If not, but there's a multi-letter word, use that one.
1665  $arr[$firstmultiletterword] = "''";
1666  $arr[$firstmultiletterword - 1] .= "'";
1667  } elseif ( $firstspace > -1 ) {
1668  // ... otherwise use the first one that has neither.
1669  // (notice that it is possible for all three to be -1 if, for example,
1670  // there is only one pentuple-apostrophe in the line)
1671  $arr[$firstspace] = "''";
1672  $arr[$firstspace - 1] .= "'";
1673  }
1674  }
1675 
1676  // Now let's actually convert our apostrophic mush to HTML!
1677  $output = '';
1678  $buffer = '';
1679  $state = '';
1680  $i = 0;
1681  foreach ( $arr as $r ) {
1682  if ( ( $i % 2 ) == 0 ) {
1683  if ( $state === 'both' ) {
1684  $buffer .= $r;
1685  } else {
1686  $output .= $r;
1687  }
1688  } else {
1689  $thislen = strlen( $r );
1690  if ( $thislen == 2 ) {
1691  if ( $state === 'i' ) {
1692  $output .= '</i>';
1693  $state = '';
1694  } elseif ( $state === 'bi' ) {
1695  $output .= '</i>';
1696  $state = 'b';
1697  } elseif ( $state === 'ib' ) {
1698  $output .= '</b></i><b>';
1699  $state = 'b';
1700  } elseif ( $state === 'both' ) {
1701  $output .= '<b><i>' . $buffer . '</i>';
1702  $state = 'b';
1703  } else { // $state can be 'b' or ''
1704  $output .= '<i>';
1705  $state .= 'i';
1706  }
1707  } elseif ( $thislen == 3 ) {
1708  if ( $state === 'b' ) {
1709  $output .= '</b>';
1710  $state = '';
1711  } elseif ( $state === 'bi' ) {
1712  $output .= '</i></b><i>';
1713  $state = 'i';
1714  } elseif ( $state === 'ib' ) {
1715  $output .= '</b>';
1716  $state = 'i';
1717  } elseif ( $state === 'both' ) {
1718  $output .= '<i><b>' . $buffer . '</b>';
1719  $state = 'i';
1720  } else { // $state can be 'i' or ''
1721  $output .= '<b>';
1722  $state .= 'b';
1723  }
1724  } elseif ( $thislen == 5 ) {
1725  if ( $state === 'b' ) {
1726  $output .= '</b><i>';
1727  $state = 'i';
1728  } elseif ( $state === 'i' ) {
1729  $output .= '</i><b>';
1730  $state = 'b';
1731  } elseif ( $state === 'bi' ) {
1732  $output .= '</i></b>';
1733  $state = '';
1734  } elseif ( $state === 'ib' ) {
1735  $output .= '</b></i>';
1736  $state = '';
1737  } elseif ( $state === 'both' ) {
1738  $output .= '<i><b>' . $buffer . '</b></i>';
1739  $state = '';
1740  } else { // ($state == '')
1741  $buffer = '';
1742  $state = 'both';
1743  }
1744  }
1745  }
1746  $i++;
1747  }
1748  // Now close all remaining tags. Notice that the order is important.
1749  if ( $state === 'b' || $state === 'ib' ) {
1750  $output .= '</b>';
1751  }
1752  if ( $state === 'i' || $state === 'bi' || $state === 'ib' ) {
1753  $output .= '</i>';
1754  }
1755  if ( $state === 'bi' ) {
1756  $output .= '</b>';
1757  }
1758  // There might be lonely ''''', so make sure we have a buffer
1759  if ( $state === 'both' && $buffer ) {
1760  $output .= '<b><i>' . $buffer . '</i></b>';
1761  }
1762  return $output;
1763  }
1764 
1778  public function replaceExternalLinks( $text ) {
1779 
1780  $bits = preg_split( $this->mExtLinkBracketedRegex, $text, -1, PREG_SPLIT_DELIM_CAPTURE );
1781  if ( $bits === false ) {
1782  throw new MWException( "PCRE needs to be compiled with "
1783  . "--enable-unicode-properties in order for MediaWiki to function" );
1784  }
1785  $s = array_shift( $bits );
1786 
1787  $i = 0;
1788  while ( $i < count( $bits ) ) {
1789  $url = $bits[$i++];
1790  $i++; // protocol
1791  $text = $bits[$i++];
1792  $trail = $bits[$i++];
1793 
1794  # The characters '<' and '>' (which were escaped by
1795  # removeHTMLtags()) should not be included in
1796  # URLs, per RFC 2396.
1797  $m2 = [];
1798  if ( preg_match( '/&(lt|gt);/', $url, $m2, PREG_OFFSET_CAPTURE ) ) {
1799  $text = substr( $url, $m2[0][1] ) . ' ' . $text;
1800  $url = substr( $url, 0, $m2[0][1] );
1801  }
1802 
1803  # If the link text is an image URL, replace it with an <img> tag
1804  # This happened by accident in the original parser, but some people used it extensively
1805  $img = $this->maybeMakeExternalImage( $text );
1806  if ( $img !== false ) {
1807  $text = $img;
1808  }
1809 
1810  $dtrail = '';
1811 
1812  # Set linktype for CSS - if URL==text, link is essentially free
1813  $linktype = ( $text === $url ) ? 'free' : 'text';
1814 
1815  # No link text, e.g. [http://domain.tld/some.link]
1816  if ( $text == '' ) {
1817  # Autonumber
1818  $langObj = $this->getTargetLanguage();
1819  $text = '[' . $langObj->formatNum( ++$this->mAutonumber ) . ']';
1820  $linktype = 'autonumber';
1821  } else {
1822  # Have link text, e.g. [http://domain.tld/some.link text]s
1823  # Check for trail
1824  list( $dtrail, $trail ) = Linker::splitTrail( $trail );
1825  }
1826 
1827  $text = $this->getConverterLanguage()->markNoConversion( $text );
1828 
1829  $url = Sanitizer::cleanUrl( $url );
1830 
1831  # Use the encoded URL
1832  # This means that users can paste URLs directly into the text
1833  # Funny characters like ö aren't valid in URLs anyway
1834  # This was changed in August 2004
1835  $s .= Linker::makeExternalLink( $url, $text, false, $linktype,
1836  $this->getExternalLinkAttribs( $url ) ) . $dtrail . $trail;
1837 
1838  # Register link in the output object.
1839  # Replace unnecessary URL escape codes with the referenced character
1840  # This prevents spammers from hiding links from the filters
1841  $pasteurized = self::normalizeLinkUrl( $url );
1842  $this->mOutput->addExternalLink( $pasteurized );
1843  }
1844 
1845  return $s;
1846  }
1847 
1857  public static function getExternalLinkRel( $url = false, $title = null ) {
1858  global $wgNoFollowLinks, $wgNoFollowNsExceptions, $wgNoFollowDomainExceptions;
1859  $ns = $title ? $title->getNamespace() : false;
1860  if ( $wgNoFollowLinks && !in_array( $ns, $wgNoFollowNsExceptions )
1861  && !wfMatchesDomainList( $url, $wgNoFollowDomainExceptions )
1862  ) {
1863  return 'nofollow';
1864  }
1865  return null;
1866  }
1867 
1878  public function getExternalLinkAttribs( $url = false ) {
1879  $attribs = [];
1880  $rel = self::getExternalLinkRel( $url, $this->mTitle );
1881 
1882  $target = $this->mOptions->getExternalLinkTarget();
1883  if ( $target ) {
1884  $attribs['target'] = $target;
1885  if ( !in_array( $target, [ '_self', '_parent', '_top' ] ) ) {
1886  // T133507. New windows can navigate parent cross-origin.
1887  // Including noreferrer due to lacking browser
1888  // support of noopener. Eventually noreferrer should be removed.
1889  if ( $rel !== '' ) {
1890  $rel .= ' ';
1891  }
1892  $rel .= 'noreferrer noopener';
1893  }
1894  }
1895  $attribs['rel'] = $rel;
1896  return $attribs;
1897  }
1898 
1906  public static function replaceUnusualEscapes( $url ) {
1907  wfDeprecated( __METHOD__, '1.24' );
1908  return self::normalizeLinkUrl( $url );
1909  }
1910 
1920  public static function normalizeLinkUrl( $url ) {
1921  # First, make sure unsafe characters are encoded
1922  $url = preg_replace_callback( '/[\x00-\x20"<>\[\\\\\]^`{|}\x7F-\xFF]/',
1923  function ( $m ) {
1924  return rawurlencode( $m[0] );
1925  },
1926  $url
1927  );
1928 
1929  $ret = '';
1930  $end = strlen( $url );
1931 
1932  # Fragment part - 'fragment'
1933  $start = strpos( $url, '#' );
1934  if ( $start !== false && $start < $end ) {
1935  $ret = self::normalizeUrlComponent(
1936  substr( $url, $start, $end - $start ), '"#%<>[\]^`{|}' ) . $ret;
1937  $end = $start;
1938  }
1939 
1940  # Query part - 'query' minus &=+;
1941  $start = strpos( $url, '?' );
1942  if ( $start !== false && $start < $end ) {
1943  $ret = self::normalizeUrlComponent(
1944  substr( $url, $start, $end - $start ), '"#%<>[\]^`{|}&=+;' ) . $ret;
1945  $end = $start;
1946  }
1947 
1948  # Scheme and path part - 'pchar'
1949  # (we assume no userinfo or encoded colons in the host)
1950  $ret = self::normalizeUrlComponent(
1951  substr( $url, 0, $end ), '"#%<>[\]^`{|}/?' ) . $ret;
1952 
1953  return $ret;
1954  }
1955 
1956  private static function normalizeUrlComponent( $component, $unsafe ) {
1957  $callback = function ( $matches ) use ( $unsafe ) {
1958  $char = urldecode( $matches[0] );
1959  $ord = ord( $char );
1960  if ( $ord > 32 && $ord < 127 && strpos( $unsafe, $char ) === false ) {
1961  # Unescape it
1962  return $char;
1963  } else {
1964  # Leave it escaped, but use uppercase for a-f
1965  return strtoupper( $matches[0] );
1966  }
1967  };
1968  return preg_replace_callback( '/%[0-9A-Fa-f]{2}/', $callback, $component );
1969  }
1970 
1979  private function maybeMakeExternalImage( $url ) {
1980  $imagesfrom = $this->mOptions->getAllowExternalImagesFrom();
1981  $imagesexception = !empty( $imagesfrom );
1982  $text = false;
1983  # $imagesfrom could be either a single string or an array of strings, parse out the latter
1984  if ( $imagesexception && is_array( $imagesfrom ) ) {
1985  $imagematch = false;
1986  foreach ( $imagesfrom as $match ) {
1987  if ( strpos( $url, $match ) === 0 ) {
1988  $imagematch = true;
1989  break;
1990  }
1991  }
1992  } elseif ( $imagesexception ) {
1993  $imagematch = ( strpos( $url, $imagesfrom ) === 0 );
1994  } else {
1995  $imagematch = false;
1996  }
1997 
1998  if ( $this->mOptions->getAllowExternalImages()
1999  || ( $imagesexception && $imagematch )
2000  ) {
2001  if ( preg_match( self::EXT_IMAGE_REGEX, $url ) ) {
2002  # Image found
2003  $text = Linker::makeExternalImage( $url );
2004  }
2005  }
2006  if ( !$text && $this->mOptions->getEnableImageWhitelist()
2007  && preg_match( self::EXT_IMAGE_REGEX, $url )
2008  ) {
2009  $whitelist = explode(
2010  "\n",
2011  wfMessage( 'external_image_whitelist' )->inContentLanguage()->text()
2012  );
2013 
2014  foreach ( $whitelist as $entry ) {
2015  # Sanitize the regex fragment, make it case-insensitive, ignore blank entries/comments
2016  if ( strpos( $entry, '#' ) === 0 || $entry === '' ) {
2017  continue;
2018  }
2019  if ( preg_match( '/' . str_replace( '/', '\\/', $entry ) . '/i', $url ) ) {
2020  # Image matches a whitelist entry
2021  $text = Linker::makeExternalImage( $url );
2022  break;
2023  }
2024  }
2025  }
2026  return $text;
2027  }
2028 
2038  public function replaceInternalLinks( $s ) {
2039  $this->mLinkHolders->merge( $this->replaceInternalLinks2( $s ) );
2040  return $s;
2041  }
2042 
2051  public function replaceInternalLinks2( &$s ) {
2053 
2054  static $tc = false, $e1, $e1_img;
2055  # the % is needed to support urlencoded titles as well
2056  if ( !$tc ) {
2057  $tc = Title::legalChars() . '#%';
2058  # Match a link having the form [[namespace:link|alternate]]trail
2059  $e1 = "/^([{$tc}]+)(?:\\|(.+?))?]](.*)\$/sD";
2060  # Match cases where there is no "]]", which might still be images
2061  $e1_img = "/^([{$tc}]+)\\|(.*)\$/sD";
2062  }
2063 
2064  $holders = new LinkHolderArray( $this );
2065 
2066  # split the entire text string on occurrences of [[
2067  $a = StringUtils::explode( '[[', ' ' . $s );
2068  # get the first element (all text up to first [[), and remove the space we added
2069  $s = $a->current();
2070  $a->next();
2071  $line = $a->current(); # Workaround for broken ArrayIterator::next() that returns "void"
2072  $s = substr( $s, 1 );
2073 
2074  $useLinkPrefixExtension = $this->getTargetLanguage()->linkPrefixExtension();
2075  $e2 = null;
2076  if ( $useLinkPrefixExtension ) {
2077  # Match the end of a line for a word that's not followed by whitespace,
2078  # e.g. in the case of 'The Arab al[[Razi]]', 'al' will be matched
2080  $charset = $wgContLang->linkPrefixCharset();
2081  $e2 = "/^((?>.*[^$charset]|))(.+)$/sDu";
2082  }
2083 
2084  if ( is_null( $this->mTitle ) ) {
2085  throw new MWException( __METHOD__ . ": \$this->mTitle is null\n" );
2086  }
2087  $nottalk = !$this->mTitle->isTalkPage();
2088 
2089  if ( $useLinkPrefixExtension ) {
2090  $m = [];
2091  if ( preg_match( $e2, $s, $m ) ) {
2092  $first_prefix = $m[2];
2093  } else {
2094  $first_prefix = false;
2095  }
2096  } else {
2097  $prefix = '';
2098  }
2099 
2100  $useSubpages = $this->areSubpagesAllowed();
2101 
2102  // @codingStandardsIgnoreStart Squiz.WhiteSpace.SemicolonSpacing.Incorrect
2103  # Loop for each link
2104  for ( ; $line !== false && $line !== null; $a->next(), $line = $a->current() ) {
2105  // @codingStandardsIgnoreEnd
2106 
2107  # Check for excessive memory usage
2108  if ( $holders->isBig() ) {
2109  # Too big
2110  # Do the existence check, replace the link holders and clear the array
2111  $holders->replace( $s );
2112  $holders->clear();
2113  }
2114 
2115  if ( $useLinkPrefixExtension ) {
2116  if ( preg_match( $e2, $s, $m ) ) {
2117  $prefix = $m[2];
2118  $s = $m[1];
2119  } else {
2120  $prefix = '';
2121  }
2122  # first link
2123  if ( $first_prefix ) {
2124  $prefix = $first_prefix;
2125  $first_prefix = false;
2126  }
2127  }
2128 
2129  $might_be_img = false;
2130 
2131  if ( preg_match( $e1, $line, $m ) ) { # page with normal text or alt
2132  $text = $m[2];
2133  # If we get a ] at the beginning of $m[3] that means we have a link that's something like:
2134  # [[Image:Foo.jpg|[http://example.com desc]]] <- having three ] in a row fucks up,
2135  # the real problem is with the $e1 regex
2136  # See bug 1300.
2137  # Still some problems for cases where the ] is meant to be outside punctuation,
2138  # and no image is in sight. See bug 2095.
2139  if ( $text !== ''
2140  && substr( $m[3], 0, 1 ) === ']'
2141  && strpos( $text, '[' ) !== false
2142  ) {
2143  $text .= ']'; # so that replaceExternalLinks($text) works later
2144  $m[3] = substr( $m[3], 1 );
2145  }
2146  # fix up urlencoded title texts
2147  if ( strpos( $m[1], '%' ) !== false ) {
2148  # Should anchors '#' also be rejected?
2149  $m[1] = str_replace( [ '<', '>' ], [ '&lt;', '&gt;' ], rawurldecode( $m[1] ) );
2150  }
2151  $trail = $m[3];
2152  } elseif ( preg_match( $e1_img, $line, $m ) ) {
2153  # Invalid, but might be an image with a link in its caption
2154  $might_be_img = true;
2155  $text = $m[2];
2156  if ( strpos( $m[1], '%' ) !== false ) {
2157  $m[1] = str_replace( [ '<', '>' ], [ '&lt;', '&gt;' ], rawurldecode( $m[1] ) );
2158  }
2159  $trail = "";
2160  } else { # Invalid form; output directly
2161  $s .= $prefix . '[[' . $line;
2162  continue;
2163  }
2164 
2165  $origLink = $m[1];
2166 
2167  # Don't allow internal links to pages containing
2168  # PROTO: where PROTO is a valid URL protocol; these
2169  # should be external links.
2170  if ( preg_match( '/^(?i:' . $this->mUrlProtocols . ')/', $origLink ) ) {
2171  $s .= $prefix . '[[' . $line;
2172  continue;
2173  }
2174 
2175  # Make subpage if necessary
2176  if ( $useSubpages ) {
2177  $link = $this->maybeDoSubpageLink( $origLink, $text );
2178  } else {
2179  $link = $origLink;
2180  }
2181 
2182  $noforce = ( substr( $origLink, 0, 1 ) !== ':' );
2183  if ( !$noforce ) {
2184  # Strip off leading ':'
2185  $link = substr( $link, 1 );
2186  }
2187 
2188  $unstrip = $this->mStripState->unstripNoWiki( $link );
2189  $nt = is_string( $unstrip ) ? Title::newFromText( $unstrip ) : null;
2190  if ( $nt === null ) {
2191  $s .= $prefix . '[[' . $line;
2192  continue;
2193  }
2194 
2195  $ns = $nt->getNamespace();
2196  $iw = $nt->getInterwiki();
2197 
2198  if ( $might_be_img ) { # if this is actually an invalid link
2199  if ( $ns == NS_FILE && $noforce ) { # but might be an image
2200  $found = false;
2201  while ( true ) {
2202  # look at the next 'line' to see if we can close it there
2203  $a->next();
2204  $next_line = $a->current();
2205  if ( $next_line === false || $next_line === null ) {
2206  break;
2207  }
2208  $m = explode( ']]', $next_line, 3 );
2209  if ( count( $m ) == 3 ) {
2210  # the first ]] closes the inner link, the second the image
2211  $found = true;
2212  $text .= "[[{$m[0]}]]{$m[1]}";
2213  $trail = $m[2];
2214  break;
2215  } elseif ( count( $m ) == 2 ) {
2216  # if there's exactly one ]] that's fine, we'll keep looking
2217  $text .= "[[{$m[0]}]]{$m[1]}";
2218  } else {
2219  # if $next_line is invalid too, we need look no further
2220  $text .= '[[' . $next_line;
2221  break;
2222  }
2223  }
2224  if ( !$found ) {
2225  # we couldn't find the end of this imageLink, so output it raw
2226  # but don't ignore what might be perfectly normal links in the text we've examined
2227  $holders->merge( $this->replaceInternalLinks2( $text ) );
2228  $s .= "{$prefix}[[$link|$text";
2229  # note: no $trail, because without an end, there *is* no trail
2230  continue;
2231  }
2232  } else { # it's not an image, so output it raw
2233  $s .= "{$prefix}[[$link|$text";
2234  # note: no $trail, because without an end, there *is* no trail
2235  continue;
2236  }
2237  }
2238 
2239  $wasblank = ( $text == '' );
2240  if ( $wasblank ) {
2241  $text = $link;
2242  } else {
2243  # Bug 4598 madness. Handle the quotes only if they come from the alternate part
2244  # [[Lista d''e paise d''o munno]] -> <a href="...">Lista d''e paise d''o munno</a>
2245  # [[Criticism of Harry Potter|Criticism of ''Harry Potter'']]
2246  # -> <a href="Criticism of Harry Potter">Criticism of <i>Harry Potter</i></a>
2247  $text = $this->doQuotes( $text );
2248  }
2249 
2250  # Link not escaped by : , create the various objects
2251  if ( $noforce && !$nt->wasLocalInterwiki() ) {
2252  # Interwikis
2253  if (
2254  $iw && $this->mOptions->getInterwikiMagic() && $nottalk && (
2255  Language::fetchLanguageName( $iw, null, 'mw' ) ||
2256  in_array( $iw, $wgExtraInterlanguageLinkPrefixes )
2257  )
2258  ) {
2259  # Bug 24502: filter duplicates
2260  if ( !isset( $this->mLangLinkLanguages[$iw] ) ) {
2261  $this->mLangLinkLanguages[$iw] = true;
2262  $this->mOutput->addLanguageLink( $nt->getFullText() );
2263  }
2264 
2265  $s = rtrim( $s . $prefix );
2266  $s .= trim( $trail, "\n" ) == '' ? '': $prefix . $trail;
2267  continue;
2268  }
2269 
2270  if ( $ns == NS_FILE ) {
2271  if ( !wfIsBadImage( $nt->getDBkey(), $this->mTitle ) ) {
2272  if ( $wasblank ) {
2273  # if no parameters were passed, $text
2274  # becomes something like "File:Foo.png",
2275  # which we don't want to pass on to the
2276  # image generator
2277  $text = '';
2278  } else {
2279  # recursively parse links inside the image caption
2280  # actually, this will parse them in any other parameters, too,
2281  # but it might be hard to fix that, and it doesn't matter ATM
2282  $text = $this->replaceExternalLinks( $text );
2283  $holders->merge( $this->replaceInternalLinks2( $text ) );
2284  }
2285  # cloak any absolute URLs inside the image markup, so replaceExternalLinks() won't touch them
2286  $s .= $prefix . $this->armorLinks(
2287  $this->makeImage( $nt, $text, $holders ) ) . $trail;
2288  } else {
2289  $s .= $prefix . $trail;
2290  }
2291  continue;
2292  }
2293 
2294  if ( $ns == NS_CATEGORY ) {
2295  $s = rtrim( $s . "\n" ); # bug 87
2296 
2297  if ( $wasblank ) {
2298  $sortkey = $this->getDefaultSort();
2299  } else {
2300  $sortkey = $text;
2301  }
2302  $sortkey = Sanitizer::decodeCharReferences( $sortkey );
2303  $sortkey = str_replace( "\n", '', $sortkey );
2304  $sortkey = $this->getConverterLanguage()->convertCategoryKey( $sortkey );
2305  $this->mOutput->addCategory( $nt->getDBkey(), $sortkey );
2306 
2310  $s .= trim( $prefix . $trail, "\n" ) == '' ? '' : $prefix . $trail;
2311 
2312  continue;
2313  }
2314  }
2315 
2316  # Self-link checking. For some languages, variants of the title are checked in
2317  # LinkHolderArray::doVariants() to allow batching the existence checks necessary
2318  # for linking to a different variant.
2319  if ( $ns != NS_SPECIAL && $nt->equals( $this->mTitle ) && !$nt->hasFragment() ) {
2320  $s .= $prefix . Linker::makeSelfLinkObj( $nt, $text, '', $trail );
2321  continue;
2322  }
2323 
2324  # NS_MEDIA is a pseudo-namespace for linking directly to a file
2325  # @todo FIXME: Should do batch file existence checks, see comment below
2326  if ( $ns == NS_MEDIA ) {
2327  # Give extensions a chance to select the file revision for us
2328  $options = [];
2329  $descQuery = false;
2330  Hooks::run( 'BeforeParserFetchFileAndTitle',
2331  [ $this, $nt, &$options, &$descQuery ] );
2332  # Fetch and register the file (file title may be different via hooks)
2333  list( $file, $nt ) = $this->fetchFileAndTitle( $nt, $options );
2334  # Cloak with NOPARSE to avoid replacement in replaceExternalLinks
2335  $s .= $prefix . $this->armorLinks(
2336  Linker::makeMediaLinkFile( $nt, $file, $text ) ) . $trail;
2337  continue;
2338  }
2339 
2340  # Some titles, such as valid special pages or files in foreign repos, should
2341  # be shown as bluelinks even though they're not included in the page table
2342  # @todo FIXME: isAlwaysKnown() can be expensive for file links; we should really do
2343  # batch file existence checks for NS_FILE and NS_MEDIA
2344  if ( $iw == '' && $nt->isAlwaysKnown() ) {
2345  $this->mOutput->addLink( $nt );
2346  $s .= $this->makeKnownLinkHolder( $nt, $text, [], $trail, $prefix );
2347  } else {
2348  # Links will be added to the output link list after checking
2349  $s .= $holders->makeHolder( $nt, $text, [], $trail, $prefix );
2350  }
2351  }
2352  return $holders;
2353  }
2354 
2369  public function makeKnownLinkHolder( $nt, $text = '', $query = [], $trail = '', $prefix = '' ) {
2370  list( $inside, $trail ) = Linker::splitTrail( $trail );
2371 
2372  if ( is_string( $query ) ) {
2373  $query = wfCgiToArray( $query );
2374  }
2375  if ( $text == '' ) {
2376  $text = htmlspecialchars( $nt->getPrefixedText() );
2377  }
2378 
2379  $link = Linker::linkKnown( $nt, "$prefix$text$inside", [], $query );
2380 
2381  return $this->armorLinks( $link ) . $trail;
2382  }
2383 
2394  public function armorLinks( $text ) {
2395  return preg_replace( '/\b((?i)' . $this->mUrlProtocols . ')/',
2396  self::MARKER_PREFIX . "NOPARSE$1", $text );
2397  }
2398 
2403  public function areSubpagesAllowed() {
2404  # Some namespaces don't allow subpages
2405  return MWNamespace::hasSubpages( $this->mTitle->getNamespace() );
2406  }
2407 
2416  public function maybeDoSubpageLink( $target, &$text ) {
2417  return Linker::normalizeSubpageLink( $this->mTitle, $target, $text );
2418  }
2419 
2426  public function closeParagraph() {
2427  $result = '';
2428  if ( $this->mLastSection != '' ) {
2429  $result = '</' . $this->mLastSection . ">\n";
2430  }
2431  $this->mInPre = false;
2432  $this->mLastSection = '';
2433  return $result;
2434  }
2435 
2446  public function getCommon( $st1, $st2 ) {
2447  $fl = strlen( $st1 );
2448  $shorter = strlen( $st2 );
2449  if ( $fl < $shorter ) {
2450  $shorter = $fl;
2451  }
2452 
2453  for ( $i = 0; $i < $shorter; ++$i ) {
2454  if ( $st1[$i] != $st2[$i] ) {
2455  break;
2456  }
2457  }
2458  return $i;
2459  }
2460 
2470  public function openList( $char ) {
2471  $result = $this->closeParagraph();
2472 
2473  if ( '*' === $char ) {
2474  $result .= "<ul><li>";
2475  } elseif ( '#' === $char ) {
2476  $result .= "<ol><li>";
2477  } elseif ( ':' === $char ) {
2478  $result .= "<dl><dd>";
2479  } elseif ( ';' === $char ) {
2480  $result .= "<dl><dt>";
2481  $this->mDTopen = true;
2482  } else {
2483  $result = '<!-- ERR 1 -->';
2484  }
2485 
2486  return $result;
2487  }
2488 
2496  public function nextItem( $char ) {
2497  if ( '*' === $char || '#' === $char ) {
2498  return "</li>\n<li>";
2499  } elseif ( ':' === $char || ';' === $char ) {
2500  $close = "</dd>\n";
2501  if ( $this->mDTopen ) {
2502  $close = "</dt>\n";
2503  }
2504  if ( ';' === $char ) {
2505  $this->mDTopen = true;
2506  return $close . '<dt>';
2507  } else {
2508  $this->mDTopen = false;
2509  return $close . '<dd>';
2510  }
2511  }
2512  return '<!-- ERR 2 -->';
2513  }
2514 
2522  public function closeList( $char ) {
2523  if ( '*' === $char ) {
2524  $text = "</li></ul>";
2525  } elseif ( '#' === $char ) {
2526  $text = "</li></ol>";
2527  } elseif ( ':' === $char ) {
2528  if ( $this->mDTopen ) {
2529  $this->mDTopen = false;
2530  $text = "</dt></dl>";
2531  } else {
2532  $text = "</dd></dl>";
2533  }
2534  } else {
2535  return '<!-- ERR 3 -->';
2536  }
2537  return $text;
2538  }
2549  public function doBlockLevels( $text, $linestart ) {
2550 
2551  # Parsing through the text line by line. The main thing
2552  # happening here is handling of block-level elements p, pre,
2553  # and making lists from lines starting with * # : etc.
2554  $textLines = StringUtils::explode( "\n", $text );
2555 
2556  $lastPrefix = $output = '';
2557  $this->mDTopen = $inBlockElem = false;
2558  $prefixLength = 0;
2559  $paragraphStack = false;
2560  $inBlockquote = false;
2561 
2562  foreach ( $textLines as $oLine ) {
2563  # Fix up $linestart
2564  if ( !$linestart ) {
2565  $output .= $oLine;
2566  $linestart = true;
2567  continue;
2568  }
2569  # * = ul
2570  # # = ol
2571  # ; = dt
2572  # : = dd
2573 
2574  $lastPrefixLength = strlen( $lastPrefix );
2575  $preCloseMatch = preg_match( '/<\\/pre/i', $oLine );
2576  $preOpenMatch = preg_match( '/<pre/i', $oLine );
2577  # If not in a <pre> element, scan for and figure out what prefixes are there.
2578  if ( !$this->mInPre ) {
2579  # Multiple prefixes may abut each other for nested lists.
2580  $prefixLength = strspn( $oLine, '*#:;' );
2581  $prefix = substr( $oLine, 0, $prefixLength );
2582 
2583  # eh?
2584  # ; and : are both from definition-lists, so they're equivalent
2585  # for the purposes of determining whether or not we need to open/close
2586  # elements.
2587  $prefix2 = str_replace( ';', ':', $prefix );
2588  $t = substr( $oLine, $prefixLength );
2589  $this->mInPre = (bool)$preOpenMatch;
2590  } else {
2591  # Don't interpret any other prefixes in preformatted text
2592  $prefixLength = 0;
2593  $prefix = $prefix2 = '';
2594  $t = $oLine;
2595  }
2596 
2597  # List generation
2598  if ( $prefixLength && $lastPrefix === $prefix2 ) {
2599  # Same as the last item, so no need to deal with nesting or opening stuff
2600  $output .= $this->nextItem( substr( $prefix, -1 ) );
2601  $paragraphStack = false;
2602 
2603  if ( substr( $prefix, -1 ) === ';' ) {
2604  # The one nasty exception: definition lists work like this:
2605  # ; title : definition text
2606  # So we check for : in the remainder text to split up the
2607  # title and definition, without b0rking links.
2608  $term = $t2 = '';
2609  if ( $this->findColonNoLinks( $t, $term, $t2 ) !== false ) {
2610  $t = $t2;
2611  $output .= $term . $this->nextItem( ':' );
2612  }
2613  }
2614  } elseif ( $prefixLength || $lastPrefixLength ) {
2615  # We need to open or close prefixes, or both.
2616 
2617  # Either open or close a level...
2618  $commonPrefixLength = $this->getCommon( $prefix, $lastPrefix );
2619  $paragraphStack = false;
2620 
2621  # Close all the prefixes which aren't shared.
2622  while ( $commonPrefixLength < $lastPrefixLength ) {
2623  $output .= $this->closeList( $lastPrefix[$lastPrefixLength - 1] );
2624  --$lastPrefixLength;
2625  }
2626 
2627  # Continue the current prefix if appropriate.
2628  if ( $prefixLength <= $commonPrefixLength && $commonPrefixLength > 0 ) {
2629  $output .= $this->nextItem( $prefix[$commonPrefixLength - 1] );
2630  }
2631 
2632  # Open prefixes where appropriate.
2633  if ( $lastPrefix && $prefixLength > $commonPrefixLength ) {
2634  $output .= "\n";
2635  }
2636  while ( $prefixLength > $commonPrefixLength ) {
2637  $char = substr( $prefix, $commonPrefixLength, 1 );
2638  $output .= $this->openList( $char );
2639 
2640  if ( ';' === $char ) {
2641  # @todo FIXME: This is dupe of code above
2642  if ( $this->findColonNoLinks( $t, $term, $t2 ) !== false ) {
2643  $t = $t2;
2644  $output .= $term . $this->nextItem( ':' );
2645  }
2646  }
2647  ++$commonPrefixLength;
2648  }
2649  if ( !$prefixLength && $lastPrefix ) {
2650  $output .= "\n";
2651  }
2652  $lastPrefix = $prefix2;
2653  }
2654 
2655  # If we have no prefixes, go to paragraph mode.
2656  if ( 0 == $prefixLength ) {
2657  # No prefix (not in list)--go to paragraph mode
2658  # XXX: use a stack for nestable elements like span, table and div
2659  $openmatch = preg_match(
2660  '/(?:<table|<h1|<h2|<h3|<h4|<h5|<h6|<pre|<tr|'
2661  . '<p|<ul|<ol|<dl|<li|<\\/tr|<\\/td|<\\/th)/iS',
2662  $t
2663  );
2664  $closematch = preg_match(
2665  '/(?:<\\/table|<\\/h1|<\\/h2|<\\/h3|<\\/h4|<\\/h5|<\\/h6|'
2666  . '<td|<th|<\\/?blockquote|<\\/?div|<hr|<\\/pre|<\\/p|<\\/mw:|'
2667  . self::MARKER_PREFIX
2668  . '-pre|<\\/li|<\\/ul|<\\/ol|<\\/dl|<\\/?center)/iS',
2669  $t
2670  );
2671 
2672  if ( $openmatch || $closematch ) {
2673  $paragraphStack = false;
2674  # @todo bug 5718: paragraph closed
2675  $output .= $this->closeParagraph();
2676  if ( $preOpenMatch && !$preCloseMatch ) {
2677  $this->mInPre = true;
2678  }
2679  $bqOffset = 0;
2680  while ( preg_match( '/<(\\/?)blockquote[\s>]/i', $t,
2681  $bqMatch, PREG_OFFSET_CAPTURE, $bqOffset )
2682  ) {
2683  $inBlockquote = !$bqMatch[1][0]; // is this a close tag?
2684  $bqOffset = $bqMatch[0][1] + strlen( $bqMatch[0][0] );
2685  }
2686  $inBlockElem = !$closematch;
2687  } elseif ( !$inBlockElem && !$this->mInPre ) {
2688  if ( ' ' == substr( $t, 0, 1 )
2689  && ( $this->mLastSection === 'pre' || trim( $t ) != '' )
2690  && !$inBlockquote
2691  ) {
2692  # pre
2693  if ( $this->mLastSection !== 'pre' ) {
2694  $paragraphStack = false;
2695  $output .= $this->closeParagraph() . '<pre>';
2696  $this->mLastSection = 'pre';
2697  }
2698  $t = substr( $t, 1 );
2699  } else {
2700  # paragraph
2701  if ( trim( $t ) === '' ) {
2702  if ( $paragraphStack ) {
2703  $output .= $paragraphStack . '<br />';
2704  $paragraphStack = false;
2705  $this->mLastSection = 'p';
2706  } else {
2707  if ( $this->mLastSection !== 'p' ) {
2708  $output .= $this->closeParagraph();
2709  $this->mLastSection = '';
2710  $paragraphStack = '<p>';
2711  } else {
2712  $paragraphStack = '</p><p>';
2713  }
2714  }
2715  } else {
2716  if ( $paragraphStack ) {
2717  $output .= $paragraphStack;
2718  $paragraphStack = false;
2719  $this->mLastSection = 'p';
2720  } elseif ( $this->mLastSection !== 'p' ) {
2721  $output .= $this->closeParagraph() . '<p>';
2722  $this->mLastSection = 'p';
2723  }
2724  }
2725  }
2726  }
2727  }
2728  # somewhere above we forget to get out of pre block (bug 785)
2729  if ( $preCloseMatch && $this->mInPre ) {
2730  $this->mInPre = false;
2731  }
2732  if ( $paragraphStack === false ) {
2733  $output .= $t;
2734  if ( $prefixLength === 0 ) {
2735  $output .= "\n";
2736  }
2737  }
2738  }
2739  while ( $prefixLength ) {
2740  $output .= $this->closeList( $prefix2[$prefixLength - 1] );
2741  --$prefixLength;
2742  if ( !$prefixLength ) {
2743  $output .= "\n";
2744  }
2745  }
2746  if ( $this->mLastSection != '' ) {
2747  $output .= '</' . $this->mLastSection . '>';
2748  $this->mLastSection = '';
2749  }
2750 
2751  return $output;
2752  }
2753 
2764  public function findColonNoLinks( $str, &$before, &$after ) {
2765 
2766  $pos = strpos( $str, ':' );
2767  if ( $pos === false ) {
2768  # Nothing to find!
2769  return false;
2770  }
2771 
2772  $lt = strpos( $str, '<' );
2773  if ( $lt === false || $lt > $pos ) {
2774  # Easy; no tag nesting to worry about
2775  $before = substr( $str, 0, $pos );
2776  $after = substr( $str, $pos + 1 );
2777  return $pos;
2778  }
2779 
2780  # Ugly state machine to walk through avoiding tags.
2781  $state = self::COLON_STATE_TEXT;
2782  $stack = 0;
2783  $len = strlen( $str );
2784  for ( $i = 0; $i < $len; $i++ ) {
2785  $c = $str[$i];
2786 
2787  switch ( $state ) {
2788  # (Using the number is a performance hack for common cases)
2789  case 0: # self::COLON_STATE_TEXT:
2790  switch ( $c ) {
2791  case "<":
2792  # Could be either a <start> tag or an </end> tag
2793  $state = self::COLON_STATE_TAGSTART;
2794  break;
2795  case ":":
2796  if ( $stack == 0 ) {
2797  # We found it!
2798  $before = substr( $str, 0, $i );
2799  $after = substr( $str, $i + 1 );
2800  return $i;
2801  }
2802  # Embedded in a tag; don't break it.
2803  break;
2804  default:
2805  # Skip ahead looking for something interesting
2806  $colon = strpos( $str, ':', $i );
2807  if ( $colon === false ) {
2808  # Nothing else interesting
2809  return false;
2810  }
2811  $lt = strpos( $str, '<', $i );
2812  if ( $stack === 0 ) {
2813  if ( $lt === false || $colon < $lt ) {
2814  # We found it!
2815  $before = substr( $str, 0, $colon );
2816  $after = substr( $str, $colon + 1 );
2817  return $i;
2818  }
2819  }
2820  if ( $lt === false ) {
2821  # Nothing else interesting to find; abort!
2822  # We're nested, but there's no close tags left. Abort!
2823  break 2;
2824  }
2825  # Skip ahead to next tag start
2826  $i = $lt;
2827  $state = self::COLON_STATE_TAGSTART;
2828  }
2829  break;
2830  case 1: # self::COLON_STATE_TAG:
2831  # In a <tag>
2832  switch ( $c ) {
2833  case ">":
2834  $stack++;
2835  $state = self::COLON_STATE_TEXT;
2836  break;
2837  case "/":
2838  # Slash may be followed by >?
2839  $state = self::COLON_STATE_TAGSLASH;
2840  break;
2841  default:
2842  # ignore
2843  }
2844  break;
2845  case 2: # self::COLON_STATE_TAGSTART:
2846  switch ( $c ) {
2847  case "/":
2848  $state = self::COLON_STATE_CLOSETAG;
2849  break;
2850  case "!":
2851  $state = self::COLON_STATE_COMMENT;
2852  break;
2853  case ">":
2854  # Illegal early close? This shouldn't happen D:
2855  $state = self::COLON_STATE_TEXT;
2856  break;
2857  default:
2858  $state = self::COLON_STATE_TAG;
2859  }
2860  break;
2861  case 3: # self::COLON_STATE_CLOSETAG:
2862  # In a </tag>
2863  if ( $c === ">" ) {
2864  $stack--;
2865  if ( $stack < 0 ) {
2866  wfDebug( __METHOD__ . ": Invalid input; too many close tags\n" );
2867  return false;
2868  }
2869  $state = self::COLON_STATE_TEXT;
2870  }
2871  break;
2872  case self::COLON_STATE_TAGSLASH:
2873  if ( $c === ">" ) {
2874  # Yes, a self-closed tag <blah/>
2875  $state = self::COLON_STATE_TEXT;
2876  } else {
2877  # Probably we're jumping the gun, and this is an attribute
2878  $state = self::COLON_STATE_TAG;
2879  }
2880  break;
2881  case 5: # self::COLON_STATE_COMMENT:
2882  if ( $c === "-" ) {
2883  $state = self::COLON_STATE_COMMENTDASH;
2884  }
2885  break;
2886  case self::COLON_STATE_COMMENTDASH:
2887  if ( $c === "-" ) {
2888  $state = self::COLON_STATE_COMMENTDASHDASH;
2889  } else {
2890  $state = self::COLON_STATE_COMMENT;
2891  }
2892  break;
2893  case self::COLON_STATE_COMMENTDASHDASH:
2894  if ( $c === ">" ) {
2895  $state = self::COLON_STATE_TEXT;
2896  } else {
2897  $state = self::COLON_STATE_COMMENT;
2898  }
2899  break;
2900  default:
2901  throw new MWException( "State machine error in " . __METHOD__ );
2902  }
2903  }
2904  if ( $stack > 0 ) {
2905  wfDebug( __METHOD__ . ": Invalid input; not enough close tags (stack $stack, state $state)\n" );
2906  return false;
2907  }
2908  return false;
2909  }
2910 
2922  public function getVariableValue( $index, $frame = false ) {
2925 
2926  if ( is_null( $this->mTitle ) ) {
2927  // If no title set, bad things are going to happen
2928  // later. Title should always be set since this
2929  // should only be called in the middle of a parse
2930  // operation (but the unit-tests do funky stuff)
2931  throw new MWException( __METHOD__ . ' Should only be '
2932  . ' called while parsing (no title set)' );
2933  }
2934 
2939  if ( Hooks::run( 'ParserGetVariableValueVarCache', [ &$this, &$this->mVarCache ] ) ) {
2940  if ( isset( $this->mVarCache[$index] ) ) {
2941  return $this->mVarCache[$index];
2942  }
2943  }
2944 
2945  $ts = wfTimestamp( TS_UNIX, $this->mOptions->getTimestamp() );
2946  Hooks::run( 'ParserGetVariableValueTs', [ &$this, &$ts ] );
2947 
2948  $pageLang = $this->getFunctionLang();
2949 
2950  switch ( $index ) {
2951  case '!':
2952  $value = '|';
2953  break;
2954  case 'currentmonth':
2955  $value = $pageLang->formatNum( MWTimestamp::getInstance( $ts )->format( 'm' ) );
2956  break;
2957  case 'currentmonth1':
2958  $value = $pageLang->formatNum( MWTimestamp::getInstance( $ts )->format( 'n' ) );
2959  break;
2960  case 'currentmonthname':
2961  $value = $pageLang->getMonthName( MWTimestamp::getInstance( $ts )->format( 'n' ) );
2962  break;
2963  case 'currentmonthnamegen':
2964  $value = $pageLang->getMonthNameGen( MWTimestamp::getInstance( $ts )->format( 'n' ) );
2965  break;
2966  case 'currentmonthabbrev':
2967  $value = $pageLang->getMonthAbbreviation( MWTimestamp::getInstance( $ts )->format( 'n' ) );
2968  break;
2969  case 'currentday':
2970  $value = $pageLang->formatNum( MWTimestamp::getInstance( $ts )->format( 'j' ) );
2971  break;
2972  case 'currentday2':
2973  $value = $pageLang->formatNum( MWTimestamp::getInstance( $ts )->format( 'd' ) );
2974  break;
2975  case 'localmonth':
2976  $value = $pageLang->formatNum( MWTimestamp::getLocalInstance( $ts )->format( 'm' ) );
2977  break;
2978  case 'localmonth1':
2979  $value = $pageLang->formatNum( MWTimestamp::getLocalInstance( $ts )->format( 'n' ) );
2980  break;
2981  case 'localmonthname':
2982  $value = $pageLang->getMonthName( MWTimestamp::getLocalInstance( $ts )->format( 'n' ) );
2983  break;
2984  case 'localmonthnamegen':
2985  $value = $pageLang->getMonthNameGen( MWTimestamp::getLocalInstance( $ts )->format( 'n' ) );
2986  break;
2987  case 'localmonthabbrev':
2988  $value = $pageLang->getMonthAbbreviation( MWTimestamp::getLocalInstance( $ts )->format( 'n' ) );
2989  break;
2990  case 'localday':
2991  $value = $pageLang->formatNum( MWTimestamp::getLocalInstance( $ts )->format( 'j' ) );
2992  break;
2993  case 'localday2':
2994  $value = $pageLang->formatNum( MWTimestamp::getLocalInstance( $ts )->format( 'd' ) );
2995  break;
2996  case 'pagename':
2997  $value = wfEscapeWikiText( $this->mTitle->getText() );
2998  break;
2999  case 'pagenamee':
3000  $value = wfEscapeWikiText( $this->mTitle->getPartialURL() );
3001  break;
3002  case 'fullpagename':
3003  $value = wfEscapeWikiText( $this->mTitle->getPrefixedText() );
3004  break;
3005  case 'fullpagenamee':
3006  $value = wfEscapeWikiText( $this->mTitle->getPrefixedURL() );
3007  break;
3008  case 'subpagename':
3009  $value = wfEscapeWikiText( $this->mTitle->getSubpageText() );
3010  break;
3011  case 'subpagenamee':
3012  $value = wfEscapeWikiText( $this->mTitle->getSubpageUrlForm() );
3013  break;
3014  case 'rootpagename':
3015  $value = wfEscapeWikiText( $this->mTitle->getRootText() );
3016  break;
3017  case 'rootpagenamee':
3018  $value = wfEscapeWikiText( wfUrlencode( str_replace(
3019  ' ',
3020  '_',
3021  $this->mTitle->getRootText()
3022  ) ) );
3023  break;
3024  case 'basepagename':
3025  $value = wfEscapeWikiText( $this->mTitle->getBaseText() );
3026  break;
3027  case 'basepagenamee':
3028  $value = wfEscapeWikiText( wfUrlencode( str_replace(
3029  ' ',
3030  '_',
3031  $this->mTitle->getBaseText()
3032  ) ) );
3033  break;
3034  case 'talkpagename':
3035  if ( $this->mTitle->canTalk() ) {
3036  $talkPage = $this->mTitle->getTalkPage();
3037  $value = wfEscapeWikiText( $talkPage->getPrefixedText() );
3038  } else {
3039  $value = '';
3040  }
3041  break;
3042  case 'talkpagenamee':
3043  if ( $this->mTitle->canTalk() ) {
3044  $talkPage = $this->mTitle->getTalkPage();
3045  $value = wfEscapeWikiText( $talkPage->getPrefixedURL() );
3046  } else {
3047  $value = '';
3048  }
3049  break;
3050  case 'subjectpagename':
3051  $subjPage = $this->mTitle->getSubjectPage();
3052  $value = wfEscapeWikiText( $subjPage->getPrefixedText() );
3053  break;
3054  case 'subjectpagenamee':
3055  $subjPage = $this->mTitle->getSubjectPage();
3056  $value = wfEscapeWikiText( $subjPage->getPrefixedURL() );
3057  break;
3058  case 'pageid': // requested in bug 23427
3059  $pageid = $this->getTitle()->getArticleID();
3060  if ( $pageid == 0 ) {
3061  # 0 means the page doesn't exist in the database,
3062  # which means the user is previewing a new page.
3063  # The vary-revision flag must be set, because the magic word
3064  # will have a different value once the page is saved.
3065  $this->mOutput->setFlag( 'vary-revision' );
3066  wfDebug( __METHOD__ . ": {{PAGEID}} used in a new page, setting vary-revision...\n" );
3067  }
3068  $value = $pageid ? $pageid : null;
3069  break;
3070  case 'revisionid':
3071  # Let the edit saving system know we should parse the page
3072  # *after* a revision ID has been assigned.
3073  $this->mOutput->setFlag( 'vary-revision' );
3074  wfDebug( __METHOD__ . ": {{REVISIONID}} used, setting vary-revision...\n" );
3075  $value = $this->mRevisionId;
3076  break;
3077  case 'revisionday':
3078  # Let the edit saving system know we should parse the page
3079  # *after* a revision ID has been assigned. This is for null edits.
3080  $this->mOutput->setFlag( 'vary-revision' );
3081  wfDebug( __METHOD__ . ": {{REVISIONDAY}} used, setting vary-revision...\n" );
3082  $value = intval( substr( $this->getRevisionTimestamp(), 6, 2 ) );
3083  break;
3084  case 'revisionday2':
3085  # Let the edit saving system know we should parse the page
3086  # *after* a revision ID has been assigned. This is for null edits.
3087  $this->mOutput->setFlag( 'vary-revision' );
3088  wfDebug( __METHOD__ . ": {{REVISIONDAY2}} used, setting vary-revision...\n" );
3089  $value = substr( $this->getRevisionTimestamp(), 6, 2 );
3090  break;
3091  case 'revisionmonth':
3092  # Let the edit saving system know we should parse the page
3093  # *after* a revision ID has been assigned. This is for null edits.
3094  $this->mOutput->setFlag( 'vary-revision' );
3095  wfDebug( __METHOD__ . ": {{REVISIONMONTH}} used, setting vary-revision...\n" );
3096  $value = substr( $this->getRevisionTimestamp(), 4, 2 );
3097  break;
3098  case 'revisionmonth1':
3099  # Let the edit saving system know we should parse the page
3100  # *after* a revision ID has been assigned. This is for null edits.
3101  $this->mOutput->setFlag( 'vary-revision' );
3102  wfDebug( __METHOD__ . ": {{REVISIONMONTH1}} used, setting vary-revision...\n" );
3103  $value = intval( substr( $this->getRevisionTimestamp(), 4, 2 ) );
3104  break;
3105  case 'revisionyear':
3106  # Let the edit saving system know we should parse the page
3107  # *after* a revision ID has been assigned. This is for null edits.
3108  $this->mOutput->setFlag( 'vary-revision' );
3109  wfDebug( __METHOD__ . ": {{REVISIONYEAR}} used, setting vary-revision...\n" );
3110  $value = substr( $this->getRevisionTimestamp(), 0, 4 );
3111  break;
3112  case 'revisiontimestamp':
3113  # Let the edit saving system know we should parse the page
3114  # *after* a revision ID has been assigned. This is for null edits.
3115  $this->mOutput->setFlag( 'vary-revision' );
3116  wfDebug( __METHOD__ . ": {{REVISIONTIMESTAMP}} used, setting vary-revision...\n" );
3117  $value = $this->getRevisionTimestamp();
3118  break;
3119  case 'revisionuser':
3120  # Let the edit saving system know we should parse the page
3121  # *after* a revision ID has been assigned. This is for null edits.
3122  $this->mOutput->setFlag( 'vary-revision' );
3123  wfDebug( __METHOD__ . ": {{REVISIONUSER}} used, setting vary-revision...\n" );
3124  $value = $this->getRevisionUser();
3125  break;
3126  case 'revisionsize':
3127  # Let the edit saving system know we should parse the page
3128  # *after* a revision ID has been assigned. This is for null edits.
3129  $this->mOutput->setFlag( 'vary-revision' );
3130  wfDebug( __METHOD__ . ": {{REVISIONSIZE}} used, setting vary-revision...\n" );
3131  $value = $this->getRevisionSize();
3132  break;
3133  case 'namespace':
3134  $value = str_replace( '_', ' ', $wgContLang->getNsText( $this->mTitle->getNamespace() ) );
3135  break;
3136  case 'namespacee':
3137  $value = wfUrlencode( $wgContLang->getNsText( $this->mTitle->getNamespace() ) );
3138  break;
3139  case 'namespacenumber':
3140  $value = $this->mTitle->getNamespace();
3141  break;
3142  case 'talkspace':
3143  $value = $this->mTitle->canTalk()
3144  ? str_replace( '_', ' ', $this->mTitle->getTalkNsText() )
3145  : '';
3146  break;
3147  case 'talkspacee':
3148  $value = $this->mTitle->canTalk() ? wfUrlencode( $this->mTitle->getTalkNsText() ) : '';
3149  break;
3150  case 'subjectspace':
3151  $value = str_replace( '_', ' ', $this->mTitle->getSubjectNsText() );
3152  break;
3153  case 'subjectspacee':
3154  $value = ( wfUrlencode( $this->mTitle->getSubjectNsText() ) );
3155  break;
3156  case 'currentdayname':
3157  $value = $pageLang->getWeekdayName( (int)MWTimestamp::getInstance( $ts )->format( 'w' ) + 1 );
3158  break;
3159  case 'currentyear':
3160  $value = $pageLang->formatNum( MWTimestamp::getInstance( $ts )->format( 'Y' ), true );
3161  break;
3162  case 'currenttime':
3163  $value = $pageLang->time( wfTimestamp( TS_MW, $ts ), false, false );
3164  break;
3165  case 'currenthour':
3166  $value = $pageLang->formatNum( MWTimestamp::getInstance( $ts )->format( 'H' ), true );
3167  break;
3168  case 'currentweek':
3169  # @bug 4594 PHP5 has it zero padded, PHP4 does not, cast to
3170  # int to remove the padding
3171  $value = $pageLang->formatNum( (int)MWTimestamp::getInstance( $ts )->format( 'W' ) );
3172  break;
3173  case 'currentdow':
3174  $value = $pageLang->formatNum( MWTimestamp::getInstance( $ts )->format( 'w' ) );
3175  break;
3176  case 'localdayname':
3177  $value = $pageLang->getWeekdayName(
3178  (int)MWTimestamp::getLocalInstance( $ts )->format( 'w' ) + 1
3179  );
3180  break;
3181  case 'localyear':
3182  $value = $pageLang->formatNum( MWTimestamp::getLocalInstance( $ts )->format( 'Y' ), true );
3183  break;
3184  case 'localtime':
3185  $value = $pageLang->time(
3186  MWTimestamp::getLocalInstance( $ts )->format( 'YmdHis' ),
3187  false,
3188  false
3189  );
3190  break;
3191  case 'localhour':
3192  $value = $pageLang->formatNum( MWTimestamp::getLocalInstance( $ts )->format( 'H' ), true );
3193  break;
3194  case 'localweek':
3195  # @bug 4594 PHP5 has it zero padded, PHP4 does not, cast to
3196  # int to remove the padding
3197  $value = $pageLang->formatNum( (int)MWTimestamp::getLocalInstance( $ts )->format( 'W' ) );
3198  break;
3199  case 'localdow':
3200  $value = $pageLang->formatNum( MWTimestamp::getLocalInstance( $ts )->format( 'w' ) );
3201  break;
3202  case 'numberofarticles':
3203  $value = $pageLang->formatNum( SiteStats::articles() );
3204  break;
3205  case 'numberoffiles':
3206  $value = $pageLang->formatNum( SiteStats::images() );
3207  break;
3208  case 'numberofusers':
3209  $value = $pageLang->formatNum( SiteStats::users() );
3210  break;
3211  case 'numberofactiveusers':
3212  $value = $pageLang->formatNum( SiteStats::activeUsers() );
3213  break;
3214  case 'numberofpages':
3215  $value = $pageLang->formatNum( SiteStats::pages() );
3216  break;
3217  case 'numberofadmins':
3218  $value = $pageLang->formatNum( SiteStats::numberingroup( 'sysop' ) );
3219  break;
3220  case 'numberofedits':
3221  $value = $pageLang->formatNum( SiteStats::edits() );
3222  break;
3223  case 'currenttimestamp':
3224  $value = wfTimestamp( TS_MW, $ts );
3225  break;
3226  case 'localtimestamp':
3227  $value = MWTimestamp::getLocalInstance( $ts )->format( 'YmdHis' );
3228  break;
3229  case 'currentversion':
3231  break;
3232  case 'articlepath':
3233  return $wgArticlePath;
3234  case 'sitename':
3235  return $wgSitename;
3236  case 'server':
3237  return $wgServer;
3238  case 'servername':
3239  return $wgServerName;
3240  case 'scriptpath':
3241  return $wgScriptPath;
3242  case 'stylepath':
3243  return $wgStylePath;
3244  case 'directionmark':
3245  return $pageLang->getDirMark();
3246  case 'contentlanguage':
3248  return $wgLanguageCode;
3249  case 'cascadingsources':
3251  break;
3252  default:
3253  $ret = null;
3254  Hooks::run(
3255  'ParserGetVariableValueSwitch',
3256  [ &$this, &$this->mVarCache, &$index, &$ret, &$frame ]
3257  );
3258 
3259  return $ret;
3260  }
3261 
3262  if ( $index ) {
3263  $this->mVarCache[$index] = $value;
3264  }
3265 
3266  return $value;
3267  }
3268 
3274  public function initialiseVariables() {
3275  $variableIDs = MagicWord::getVariableIDs();
3276  $substIDs = MagicWord::getSubstIDs();
3277 
3278  $this->mVariables = new MagicWordArray( $variableIDs );
3279  $this->mSubstWords = new MagicWordArray( $substIDs );
3280  }
3281 
3304  public function preprocessToDom( $text, $flags = 0 ) {
3305  $dom = $this->getPreprocessor()->preprocessToObj( $text, $flags );
3306  return $dom;
3307  }
3308 
3316  public static function splitWhitespace( $s ) {
3317  $ltrimmed = ltrim( $s );
3318  $w1 = substr( $s, 0, strlen( $s ) - strlen( $ltrimmed ) );
3319  $trimmed = rtrim( $ltrimmed );
3320  $diff = strlen( $ltrimmed ) - strlen( $trimmed );
3321  if ( $diff > 0 ) {
3322  $w2 = substr( $ltrimmed, -$diff );
3323  } else {
3324  $w2 = '';
3325  }
3326  return [ $w1, $trimmed, $w2 ];
3327  }
3328 
3349  public function replaceVariables( $text, $frame = false, $argsOnly = false ) {
3350  # Is there any text? Also, Prevent too big inclusions!
3351  $textSize = strlen( $text );
3352  if ( $textSize < 1 || $textSize > $this->mOptions->getMaxIncludeSize() ) {
3353  return $text;
3354  }
3355 
3356  if ( $frame === false ) {
3357  $frame = $this->getPreprocessor()->newFrame();
3358  } elseif ( !( $frame instanceof PPFrame ) ) {
3359  wfDebug( __METHOD__ . " called using plain parameters instead of "
3360  . "a PPFrame instance. Creating custom frame.\n" );
3361  $frame = $this->getPreprocessor()->newCustomFrame( $frame );
3362  }
3363 
3364  $dom = $this->preprocessToDom( $text );
3365  $flags = $argsOnly ? PPFrame::NO_TEMPLATES : 0;
3366  $text = $frame->expand( $dom, $flags );
3367 
3368  return $text;
3369  }
3370 
3378  public static function createAssocArgs( $args ) {
3379  $assocArgs = [];
3380  $index = 1;
3381  foreach ( $args as $arg ) {
3382  $eqpos = strpos( $arg, '=' );
3383  if ( $eqpos === false ) {
3384  $assocArgs[$index++] = $arg;
3385  } else {
3386  $name = trim( substr( $arg, 0, $eqpos ) );
3387  $value = trim( substr( $arg, $eqpos + 1 ) );
3388  if ( $value === false ) {
3389  $value = '';
3390  }
3391  if ( $name !== false ) {
3392  $assocArgs[$name] = $value;
3393  }
3394  }
3395  }
3396 
3397  return $assocArgs;
3398  }
3399 
3426  public function limitationWarn( $limitationType, $current = '', $max = '' ) {
3427  # does no harm if $current and $max are present but are unnecessary for the message
3428  # Not doing ->inLanguage( $this->mOptions->getUserLangObj() ), since this is shown
3429  # only during preview, and that would split the parser cache unnecessarily.
3430  $warning = wfMessage( "$limitationType-warning" )->numParams( $current, $max )
3431  ->text();
3432  $this->mOutput->addWarning( $warning );
3433  $this->addTrackingCategory( "$limitationType-category" );
3434  }
3435 
3448  public function braceSubstitution( $piece, $frame ) {
3449 
3450  // Flags
3451 
3452  // $text has been filled
3453  $found = false;
3454  // wiki markup in $text should be escaped
3455  $nowiki = false;
3456  // $text is HTML, armour it against wikitext transformation
3457  $isHTML = false;
3458  // Force interwiki transclusion to be done in raw mode not rendered
3459  $forceRawInterwiki = false;
3460  // $text is a DOM node needing expansion in a child frame
3461  $isChildObj = false;
3462  // $text is a DOM node needing expansion in the current frame
3463  $isLocalObj = false;
3464 
3465  # Title object, where $text came from
3466  $title = false;
3467 
3468  # $part1 is the bit before the first |, and must contain only title characters.
3469  # Various prefixes will be stripped from it later.
3470  $titleWithSpaces = $frame->expand( $piece['title'] );
3471  $part1 = trim( $titleWithSpaces );
3472  $titleText = false;
3473 
3474  # Original title text preserved for various purposes
3475  $originalTitle = $part1;
3476 
3477  # $args is a list of argument nodes, starting from index 0, not including $part1
3478  # @todo FIXME: If piece['parts'] is null then the call to getLength()
3479  # below won't work b/c this $args isn't an object
3480  $args = ( null == $piece['parts'] ) ? [] : $piece['parts'];
3481 
3482  $profileSection = null; // profile templates
3483 
3484  # SUBST
3485  if ( !$found ) {
3486  $substMatch = $this->mSubstWords->matchStartAndRemove( $part1 );
3487 
3488  # Possibilities for substMatch: "subst", "safesubst" or FALSE
3489  # Decide whether to expand template or keep wikitext as-is.
3490  if ( $this->ot['wiki'] ) {
3491  if ( $substMatch === false ) {
3492  $literal = true; # literal when in PST with no prefix
3493  } else {
3494  $literal = false; # expand when in PST with subst: or safesubst:
3495  }
3496  } else {
3497  if ( $substMatch == 'subst' ) {
3498  $literal = true; # literal when not in PST with plain subst:
3499  } else {
3500  $literal = false; # expand when not in PST with safesubst: or no prefix
3501  }
3502  }
3503  if ( $literal ) {
3504  $text = $frame->virtualBracketedImplode( '{{', '|', '}}', $titleWithSpaces, $args );
3505  $isLocalObj = true;
3506  $found = true;
3507  }
3508  }
3509 
3510  # Variables
3511  if ( !$found && $args->getLength() == 0 ) {
3512  $id = $this->mVariables->matchStartToEnd( $part1 );
3513  if ( $id !== false ) {
3514  $text = $this->getVariableValue( $id, $frame );
3515  if ( MagicWord::getCacheTTL( $id ) > -1 ) {
3516  $this->mOutput->updateCacheExpiry( MagicWord::getCacheTTL( $id ) );
3517  }
3518  $found = true;
3519  }
3520  }
3521 
3522  # MSG, MSGNW and RAW
3523  if ( !$found ) {
3524  # Check for MSGNW:
3525  $mwMsgnw = MagicWord::get( 'msgnw' );
3526  if ( $mwMsgnw->matchStartAndRemove( $part1 ) ) {
3527  $nowiki = true;
3528  } else {
3529  # Remove obsolete MSG:
3530  $mwMsg = MagicWord::get( 'msg' );
3531  $mwMsg->matchStartAndRemove( $part1 );
3532  }
3533 
3534  # Check for RAW:
3535  $mwRaw = MagicWord::get( 'raw' );
3536  if ( $mwRaw->matchStartAndRemove( $part1 ) ) {
3537  $forceRawInterwiki = true;
3538  }
3539  }
3540 
3541  # Parser functions
3542  if ( !$found ) {
3543  $colonPos = strpos( $part1, ':' );
3544  if ( $colonPos !== false ) {
3545  $func = substr( $part1, 0, $colonPos );
3546  $funcArgs = [ trim( substr( $part1, $colonPos + 1 ) ) ];
3547  $argsLength = $args->getLength();
3548  for ( $i = 0; $i < $argsLength; $i++ ) {
3549  $funcArgs[] = $args->item( $i );
3550  }
3551  try {
3552  $result = $this->callParserFunction( $frame, $func, $funcArgs );
3553  } catch ( Exception $ex ) {
3554  throw $ex;
3555  }
3556 
3557  # The interface for parser functions allows for extracting
3558  # flags into the local scope. Extract any forwarded flags
3559  # here.
3560  extract( $result );
3561  }
3562  }
3563 
3564  # Finish mangling title and then check for loops.
3565  # Set $title to a Title object and $titleText to the PDBK
3566  if ( !$found ) {
3567  $ns = NS_TEMPLATE;
3568  # Split the title into page and subpage
3569  $subpage = '';
3570  $relative = $this->maybeDoSubpageLink( $part1, $subpage );
3571  if ( $part1 !== $relative ) {
3572  $part1 = $relative;
3573  $ns = $this->mTitle->getNamespace();
3574  }
3575  $title = Title::newFromText( $part1, $ns );
3576  if ( $title ) {
3577  $titleText = $title->getPrefixedText();
3578  # Check for language variants if the template is not found
3579  if ( $this->getConverterLanguage()->hasVariants() && $title->getArticleID() == 0 ) {
3580  $this->getConverterLanguage()->findVariantLink( $part1, $title, true );
3581  }
3582  # Do recursion depth check
3583  $limit = $this->mOptions->getMaxTemplateDepth();
3584  if ( $frame->depth >= $limit ) {
3585  $found = true;
3586  $text = '<span class="error">'
3587  . wfMessage( 'parser-template-recursion-depth-warning' )
3588  ->numParams( $limit )->inContentLanguage()->text()
3589  . '</span>';
3590  }
3591  }
3592  }
3593 
3594  # Load from database
3595  if ( !$found && $title ) {
3596  $profileSection = $this->mProfiler->scopedProfileIn( $title->getPrefixedDBkey() );
3597  if ( !$title->isExternal() ) {
3598  if ( $title->isSpecialPage()
3599  && $this->mOptions->getAllowSpecialInclusion()
3600  && $this->ot['html']
3601  ) {
3602  // Pass the template arguments as URL parameters.
3603  // "uselang" will have no effect since the Language object
3604  // is forced to the one defined in ParserOptions.
3605  $pageArgs = [];
3606  $argsLength = $args->getLength();
3607  for ( $i = 0; $i < $argsLength; $i++ ) {
3608  $bits = $args->item( $i )->splitArg();
3609  if ( strval( $bits['index'] ) === '' ) {
3610  $name = trim( $frame->expand( $bits['name'], PPFrame::STRIP_COMMENTS ) );
3611  $value = trim( $frame->expand( $bits['value'] ) );
3612  $pageArgs[$name] = $value;
3613  }
3614  }
3615 
3616  // Create a new context to execute the special page
3617  $context = new RequestContext;
3618  $context->setTitle( $title );
3619  $context->setRequest( new FauxRequest( $pageArgs ) );
3620  $context->setUser( $this->getUser() );
3621  $context->setLanguage( $this->mOptions->getUserLangObj() );
3623  if ( $ret ) {
3624  $text = $context->getOutput()->getHTML();
3625  $this->mOutput->addOutputPageMetadata( $context->getOutput() );
3626  $found = true;
3627  $isHTML = true;
3628  $this->disableCache();
3629  }
3630  } elseif ( MWNamespace::isNonincludable( $title->getNamespace() ) ) {
3631  $found = false; # access denied
3632  wfDebug( __METHOD__ . ": template inclusion denied for " .
3633  $title->getPrefixedDBkey() . "\n" );
3634  } else {
3635  list( $text, $title ) = $this->getTemplateDom( $title );
3636  if ( $text !== false ) {
3637  $found = true;
3638  $isChildObj = true;
3639  }
3640  }
3641 
3642  # If the title is valid but undisplayable, make a link to it
3643  if ( !$found && ( $this->ot['html'] || $this->ot['pre'] ) ) {
3644  $text = "[[:$titleText]]";
3645  $found = true;
3646  }
3647  } elseif ( $title->isTrans() ) {
3648  # Interwiki transclusion
3649  if ( $this->ot['html'] && !$forceRawInterwiki ) {
3650  $text = $this->interwikiTransclude( $title, 'render' );
3651  $isHTML = true;
3652  } else {
3653  $text = $this->interwikiTransclude( $title, 'raw' );
3654  # Preprocess it like a template
3655  $text = $this->preprocessToDom( $text, self::PTD_FOR_INCLUSION );
3656  $isChildObj = true;
3657  }
3658  $found = true;
3659  }
3660 
3661  # Do infinite loop check
3662  # This has to be done after redirect resolution to avoid infinite loops via redirects
3663  if ( !$frame->loopCheck( $title ) ) {
3664  $found = true;
3665  $text = '<span class="error">'
3666  . wfMessage( 'parser-template-loop-warning', $titleText )->inContentLanguage()->text()
3667  . '</span>';
3668  wfDebug( __METHOD__ . ": template loop broken at '$titleText'\n" );
3669  }
3670  }
3671 
3672  # If we haven't found text to substitute by now, we're done
3673  # Recover the source wikitext and return it
3674  if ( !$found ) {
3675  $text = $frame->virtualBracketedImplode( '{{', '|', '}}', $titleWithSpaces, $args );
3676  if ( $profileSection ) {
3677  $this->mProfiler->scopedProfileOut( $profileSection );
3678  }
3679  return [ 'object' => $text ];
3680  }
3681 
3682  # Expand DOM-style return values in a child frame
3683  if ( $isChildObj ) {
3684  # Clean up argument array
3685  $newFrame = $frame->newChild( $args, $title );
3686 
3687  if ( $nowiki ) {
3688  $text = $newFrame->expand( $text, PPFrame::RECOVER_ORIG );
3689  } elseif ( $titleText !== false && $newFrame->isEmpty() ) {
3690  # Expansion is eligible for the empty-frame cache
3691  $text = $newFrame->cachedExpand( $titleText, $text );
3692  } else {
3693  # Uncached expansion
3694  $text = $newFrame->expand( $text );
3695  }
3696  }
3697  if ( $isLocalObj && $nowiki ) {
3698  $text = $frame->expand( $text, PPFrame::RECOVER_ORIG );
3699  $isLocalObj = false;
3700  }
3701 
3702  if ( $profileSection ) {
3703  $this->mProfiler->scopedProfileOut( $profileSection );
3704  }
3705 
3706  # Replace raw HTML by a placeholder
3707  if ( $isHTML ) {
3708  $text = $this->insertStripItem( $text );
3709  } elseif ( $nowiki && ( $this->ot['html'] || $this->ot['pre'] ) ) {
3710  # Escape nowiki-style return values
3711  $text = wfEscapeWikiText( $text );
3712  } elseif ( is_string( $text )
3713  && !$piece['lineStart']
3714  && preg_match( '/^(?:{\\||:|;|#|\*)/', $text )
3715  ) {
3716  # Bug 529: if the template begins with a table or block-level
3717  # element, it should be treated as beginning a new line.
3718  # This behavior is somewhat controversial.
3719  $text = "\n" . $text;
3720  }
3721 
3722  if ( is_string( $text ) && !$this->incrementIncludeSize( 'post-expand', strlen( $text ) ) ) {
3723  # Error, oversize inclusion
3724  if ( $titleText !== false ) {
3725  # Make a working, properly escaped link if possible (bug 23588)
3726  $text = "[[:$titleText]]";
3727  } else {
3728  # This will probably not be a working link, but at least it may
3729  # provide some hint of where the problem is
3730  preg_replace( '/^:/', '', $originalTitle );
3731  $text = "[[:$originalTitle]]";
3732  }
3733  $text .= $this->insertStripItem( '<!-- WARNING: template omitted, '
3734  . 'post-expand include size too large -->' );
3735  $this->limitationWarn( 'post-expand-template-inclusion' );
3736  }
3737 
3738  if ( $isLocalObj ) {
3739  $ret = [ 'object' => $text ];
3740  } else {
3741  $ret = [ 'text' => $text ];
3742  }
3743 
3744  return $ret;
3745  }
3746 
3766  public function callParserFunction( $frame, $function, array $args = [] ) {
3768 
3769  # Case sensitive functions
3770  if ( isset( $this->mFunctionSynonyms[1][$function] ) ) {
3771  $function = $this->mFunctionSynonyms[1][$function];
3772  } else {
3773  # Case insensitive functions
3774  $function = $wgContLang->lc( $function );
3775  if ( isset( $this->mFunctionSynonyms[0][$function] ) ) {
3776  $function = $this->mFunctionSynonyms[0][$function];
3777  } else {
3778  return [ 'found' => false ];
3779  }
3780  }
3781 
3782  list( $callback, $flags ) = $this->mFunctionHooks[$function];
3783 
3784  # Workaround for PHP bug 35229 and similar
3785  if ( !is_callable( $callback ) ) {
3786  throw new MWException( "Tag hook for $function is not callable\n" );
3787  }
3788 
3789  $allArgs = [ &$this ];
3790  if ( $flags & self::SFH_OBJECT_ARGS ) {
3791  # Convert arguments to PPNodes and collect for appending to $allArgs
3792  $funcArgs = [];
3793  foreach ( $args as $k => $v ) {
3794  if ( $v instanceof PPNode || $k === 0 ) {
3795  $funcArgs[] = $v;
3796  } else {
3797  $funcArgs[] = $this->mPreprocessor->newPartNodeArray( [ $k => $v ] )->item( 0 );
3798  }
3799  }
3800 
3801  # Add a frame parameter, and pass the arguments as an array
3802  $allArgs[] = $frame;
3803  $allArgs[] = $funcArgs;
3804  } else {
3805  # Convert arguments to plain text and append to $allArgs
3806  foreach ( $args as $k => $v ) {
3807  if ( $v instanceof PPNode ) {
3808  $allArgs[] = trim( $frame->expand( $v ) );
3809  } elseif ( is_int( $k ) && $k >= 0 ) {
3810  $allArgs[] = trim( $v );
3811  } else {
3812  $allArgs[] = trim( "$k=$v" );
3813  }
3814  }
3815  }
3816 
3817  $result = call_user_func_array( $callback, $allArgs );
3818 
3819  # The interface for function hooks allows them to return a wikitext
3820  # string or an array containing the string and any flags. This mungs
3821  # things around to match what this method should return.
3822  if ( !is_array( $result ) ) {
3823  $result =[
3824  'found' => true,
3825  'text' => $result,
3826  ];
3827  } else {
3828  if ( isset( $result[0] ) && !isset( $result['text'] ) ) {
3829  $result['text'] = $result[0];
3830  }
3831  unset( $result[0] );
3832  $result += [
3833  'found' => true,
3834  ];
3835  }
3836 
3837  $noparse = true;
3838  $preprocessFlags = 0;
3839  if ( isset( $result['noparse'] ) ) {
3840  $noparse = $result['noparse'];
3841  }
3842  if ( isset( $result['preprocessFlags'] ) ) {
3843  $preprocessFlags = $result['preprocessFlags'];
3844  }
3845 
3846  if ( !$noparse ) {
3847  $result['text'] = $this->preprocessToDom( $result['text'], $preprocessFlags );
3848  $result['isChildObj'] = true;
3849  }
3850 
3851  return $result;
3852  }
3853 
3862  public function getTemplateDom( $title ) {
3863  $cacheTitle = $title;
3864  $titleText = $title->getPrefixedDBkey();
3865 
3866  if ( isset( $this->mTplRedirCache[$titleText] ) ) {
3867  list( $ns, $dbk ) = $this->mTplRedirCache[$titleText];
3868  $title = Title::makeTitle( $ns, $dbk );
3869  $titleText = $title->getPrefixedDBkey();
3870  }
3871  if ( isset( $this->mTplDomCache[$titleText] ) ) {
3872  return [ $this->mTplDomCache[$titleText], $title ];
3873  }
3874 
3875  # Cache miss, go to the database
3876  list( $text, $title ) = $this->fetchTemplateAndTitle( $title );
3877 
3878  if ( $text === false ) {
3879  $this->mTplDomCache[$titleText] = false;
3880  return [ false, $title ];
3881  }
3882 
3883  $dom = $this->preprocessToDom( $text, self::PTD_FOR_INCLUSION );
3884  $this->mTplDomCache[$titleText] = $dom;
3885 
3886  if ( !$title->equals( $cacheTitle ) ) {
3887  $this->mTplRedirCache[$cacheTitle->getPrefixedDBkey()] =
3888  [ $title->getNamespace(), $cdb = $title->getDBkey() ];
3889  }
3890 
3891  return [ $dom, $title ];
3892  }
3893 
3906  $cacheKey = $title->getPrefixedDBkey();
3907  if ( !$this->currentRevisionCache ) {
3908  $this->currentRevisionCache = new MapCacheLRU( 100 );
3909  }
3910  if ( !$this->currentRevisionCache->has( $cacheKey ) ) {
3911  $this->currentRevisionCache->set( $cacheKey,
3912  // Defaults to Parser::statelessFetchRevision()
3913  call_user_func( $this->mOptions->getCurrentRevisionCallback(), $title, $this )
3914  );
3915  }
3916  return $this->currentRevisionCache->get( $cacheKey );
3917  }
3918 
3928  public static function statelessFetchRevision( $title, $parser = false ) {
3929  return Revision::newFromTitle( $title );
3930  }
3931 
3937  public function fetchTemplateAndTitle( $title ) {
3938  // Defaults to Parser::statelessFetchTemplate()
3939  $templateCb = $this->mOptions->getTemplateCallback();
3940  $stuff = call_user_func( $templateCb, $title, $this );
3941  // We use U+007F DELETE to distinguish strip markers from regular text.
3942  $text = $stuff['text'];
3943  if ( is_string( $stuff['text'] ) ) {
3944  $text = strtr( $text, "\x7f", "?" );
3945  }
3946  $finalTitle = isset( $stuff['finalTitle'] ) ? $stuff['finalTitle'] : $title;
3947  if ( isset( $stuff['deps'] ) ) {
3948  foreach ( $stuff['deps'] as $dep ) {
3949  $this->mOutput->addTemplate( $dep['title'], $dep['page_id'], $dep['rev_id'] );
3950  if ( $dep['title']->equals( $this->getTitle() ) ) {
3951  // If we transclude ourselves, the final result
3952  // will change based on the new version of the page
3953  $this->mOutput->setFlag( 'vary-revision' );
3954  }
3955  }
3956  }
3957  return [ $text, $finalTitle ];
3958  }
3959 
3965  public function fetchTemplate( $title ) {
3966  return $this->fetchTemplateAndTitle( $title )[0];
3967  }
3968 
3978  public static function statelessFetchTemplate( $title, $parser = false ) {
3979  $text = $skip = false;
3980  $finalTitle = $title;
3981  $deps = [];
3982 
3983  # Loop to fetch the article, with up to 1 redirect
3984  // @codingStandardsIgnoreStart Generic.CodeAnalysis.ForLoopWithTestFunctionCall.NotAllowed
3985  for ( $i = 0; $i < 2 && is_object( $title ); $i++ ) {
3986  // @codingStandardsIgnoreEnd
3987  # Give extensions a chance to select the revision instead
3988  $id = false; # Assume current
3989  Hooks::run( 'BeforeParserFetchTemplateAndtitle',
3990  [ $parser, $title, &$skip, &$id ] );
3991 
3992  if ( $skip ) {
3993  $text = false;
3994  $deps[] = [
3995  'title' => $title,
3996  'page_id' => $title->getArticleID(),
3997  'rev_id' => null
3998  ];
3999  break;
4000  }
4001  # Get the revision
4002  if ( $id ) {
4003  $rev = Revision::newFromId( $id );
4004  } elseif ( $parser ) {
4005  $rev = $parser->fetchCurrentRevisionOfTitle( $title );
4006  } else {
4008  }
4009  $rev_id = $rev ? $rev->getId() : 0;
4010  # If there is no current revision, there is no page
4011  if ( $id === false && !$rev ) {
4012  $linkCache = LinkCache::singleton();
4013  $linkCache->addBadLinkObj( $title );
4014  }
4015 
4016  $deps[] = [
4017  'title' => $title,
4018  'page_id' => $title->getArticleID(),
4019  'rev_id' => $rev_id ];
4020  if ( $rev && !$title->equals( $rev->getTitle() ) ) {
4021  # We fetched a rev from a different title; register it too...
4022  $deps[] = [
4023  'title' => $rev->getTitle(),
4024  'page_id' => $rev->getPage(),
4025  'rev_id' => $rev_id ];
4026  }
4027 
4028  if ( $rev ) {
4029  $content = $rev->getContent();
4030  $text = $content ? $content->getWikitextForTransclusion() : null;
4031 
4032  if ( $text === false || $text === null ) {
4033  $text = false;
4034  break;
4035  }
4036  } elseif ( $title->getNamespace() == NS_MEDIAWIKI ) {
4038  $message = wfMessage( $wgContLang->lcfirst( $title->getText() ) )->inContentLanguage();
4039  if ( !$message->exists() ) {
4040  $text = false;
4041  break;
4042  }
4043  $content = $message->content();
4044  $text = $message->plain();
4045  } else {
4046  break;
4047  }
4048  if ( !$content ) {
4049  break;
4050  }
4051  # Redirect?
4052  $finalTitle = $title;
4053  $title = $content->getRedirectTarget();
4054  }
4055  return [
4056  'text' => $text,
4057  'finalTitle' => $finalTitle,
4058  'deps' => $deps ];
4059  }
4060 
4068  public function fetchFile( $title, $options = [] ) {
4069  return $this->fetchFileAndTitle( $title, $options )[0];
4070  }
4071 
4079  public function fetchFileAndTitle( $title, $options = [] ) {
4080  $file = $this->fetchFileNoRegister( $title, $options );
4081 
4082  $time = $file ? $file->getTimestamp() : false;
4083  $sha1 = $file ? $file->getSha1() : false;
4084  # Register the file as a dependency...
4085  $this->mOutput->addImage( $title->getDBkey(), $time, $sha1 );
4086  if ( $file && !$title->equals( $file->getTitle() ) ) {
4087  # Update fetched file title
4088  $title = $file->getTitle();
4089  $this->mOutput->addImage( $title->getDBkey(), $time, $sha1 );
4090  }
4091  return [ $file, $title ];
4092  }
4093 
4104  protected function fetchFileNoRegister( $title, $options = [] ) {
4105  if ( isset( $options['broken'] ) ) {
4106  $file = false; // broken thumbnail forced by hook
4107  } elseif ( isset( $options['sha1'] ) ) { // get by (sha1,timestamp)
4108  $file = RepoGroup::singleton()->findFileFromKey( $options['sha1'], $options );
4109  } else { // get by (name,timestamp)
4110  $file = wfFindFile( $title, $options );
4111  }
4112  return $file;
4113  }
4114 
4123  public function interwikiTransclude( $title, $action ) {
4124  global $wgEnableScaryTranscluding;
4125 
4126  if ( !$wgEnableScaryTranscluding ) {
4127  return wfMessage( 'scarytranscludedisabled' )->inContentLanguage()->text();
4128  }
4129 
4130  $url = $title->getFullURL( [ 'action' => $action ] );
4131 
4132  if ( strlen( $url ) > 255 ) {
4133  return wfMessage( 'scarytranscludetoolong' )->inContentLanguage()->text();
4134  }
4135  return $this->fetchScaryTemplateMaybeFromCache( $url );
4136  }
4137 
4142  public function fetchScaryTemplateMaybeFromCache( $url ) {
4143  global $wgTranscludeCacheExpiry;
4144  $dbr = wfGetDB( DB_SLAVE );
4145  $tsCond = $dbr->timestamp( time() - $wgTranscludeCacheExpiry );
4146  $obj = $dbr->selectRow( 'transcache', [ 'tc_time', 'tc_contents' ],
4147  [ 'tc_url' => $url, "tc_time >= " . $dbr->addQuotes( $tsCond ) ] );
4148  if ( $obj ) {
4149  return $obj->tc_contents;
4150  }
4151 
4152  $req = MWHttpRequest::factory( $url, [], __METHOD__ );
4153  $status = $req->execute(); // Status object
4154  if ( $status->isOK() ) {
4155  $text = $req->getContent();
4156  } elseif ( $req->getStatus() != 200 ) {
4157  // Though we failed to fetch the content, this status is useless.
4158  return wfMessage( 'scarytranscludefailed-httpstatus' )
4159  ->params( $url, $req->getStatus() /* HTTP status */ )->inContentLanguage()->text();
4160  } else {
4161  return wfMessage( 'scarytranscludefailed', $url )->inContentLanguage()->text();
4162  }
4163 
4164  $dbw = wfGetDB( DB_MASTER );
4165  $dbw->replace( 'transcache', [ 'tc_url' ], [
4166  'tc_url' => $url,
4167  'tc_time' => $dbw->timestamp( time() ),
4168  'tc_contents' => $text
4169  ] );
4170  return $text;
4171  }
4172 
4182  public function argSubstitution( $piece, $frame ) {
4183 
4184  $error = false;
4185  $parts = $piece['parts'];
4186  $nameWithSpaces = $frame->expand( $piece['title'] );
4187  $argName = trim( $nameWithSpaces );
4188  $object = false;
4189  $text = $frame->getArgument( $argName );
4190  if ( $text === false && $parts->getLength() > 0
4191  && ( $this->ot['html']
4192  || $this->ot['pre']
4193  || ( $this->ot['wiki'] && $frame->isTemplate() )
4194  )
4195  ) {
4196  # No match in frame, use the supplied default
4197  $object = $parts->item( 0 )->getChildren();
4198  }
4199  if ( !$this->incrementIncludeSize( 'arg', strlen( $text ) ) ) {
4200  $error = '<!-- WARNING: argument omitted, expansion size too large -->';
4201  $this->limitationWarn( 'post-expand-template-argument' );
4202  }
4203 
4204  if ( $text === false && $object === false ) {
4205  # No match anywhere
4206  $object = $frame->virtualBracketedImplode( '{{{', '|', '}}}', $nameWithSpaces, $parts );
4207  }
4208  if ( $error !== false ) {
4209  $text .= $error;
4210  }
4211  if ( $object !== false ) {
4212  $ret = [ 'object' => $object ];
4213  } else {
4214  $ret = [ 'text' => $text ];
4215  }
4216 
4217  return $ret;
4218  }
4219 
4235  public function extensionSubstitution( $params, $frame ) {
4236  $name = $frame->expand( $params['name'] );
4237  $attrText = !isset( $params['attr'] ) ? null : $frame->expand( $params['attr'] );
4238  $content = !isset( $params['inner'] ) ? null : $frame->expand( $params['inner'] );
4239  $marker = self::MARKER_PREFIX . "-$name-"
4240  . sprintf( '%08X', $this->mMarkerIndex++ ) . self::MARKER_SUFFIX;
4241 
4242  $isFunctionTag = isset( $this->mFunctionTagHooks[strtolower( $name )] ) &&
4243  ( $this->ot['html'] || $this->ot['pre'] );
4244  if ( $isFunctionTag ) {
4245  $markerType = 'none';
4246  } else {
4247  $markerType = 'general';
4248  }
4249  if ( $this->ot['html'] || $isFunctionTag ) {
4250  $name = strtolower( $name );
4251  $attributes = Sanitizer::decodeTagAttributes( $attrText );
4252  if ( isset( $params['attributes'] ) ) {
4253  $attributes = $attributes + $params['attributes'];
4254  }
4255 
4256  if ( isset( $this->mTagHooks[$name] ) ) {
4257  # Workaround for PHP bug 35229 and similar
4258  if ( !is_callable( $this->mTagHooks[$name] ) ) {
4259  throw new MWException( "Tag hook for $name is not callable\n" );
4260  }
4261  $output = call_user_func_array( $this->mTagHooks[$name],
4262  [ $content, $attributes, $this, $frame ] );
4263  } elseif ( isset( $this->mFunctionTagHooks[$name] ) ) {
4264  list( $callback, ) = $this->mFunctionTagHooks[$name];
4265  if ( !is_callable( $callback ) ) {
4266  throw new MWException( "Tag hook for $name is not callable\n" );
4267  }
4268 
4269  $output = call_user_func_array( $callback, [ &$this, $frame, $content, $attributes ] );
4270  } else {
4271  $output = '<span class="error">Invalid tag extension name: ' .
4272  htmlspecialchars( $name ) . '</span>';
4273  }
4274 
4275  if ( is_array( $output ) ) {
4276  # Extract flags to local scope (to override $markerType)
4277  $flags = $output;
4278  $output = $flags[0];
4279  unset( $flags[0] );
4280  extract( $flags );
4281  }
4282  } else {
4283  if ( is_null( $attrText ) ) {
4284  $attrText = '';
4285  }
4286  if ( isset( $params['attributes'] ) ) {
4287  foreach ( $params['attributes'] as $attrName => $attrValue ) {
4288  $attrText .= ' ' . htmlspecialchars( $attrName ) . '="' .
4289  htmlspecialchars( $attrValue ) . '"';
4290  }
4291  }
4292  if ( $content === null ) {
4293  $output = "<$name$attrText/>";
4294  } else {
4295  $close = is_null( $params['close'] ) ? '' : $frame->expand( $params['close'] );
4296  $output = "<$name$attrText>$content$close";
4297  }
4298  }
4299 
4300  if ( $markerType === 'none' ) {
4301  return $output;
4302  } elseif ( $markerType === 'nowiki' ) {
4303  $this->mStripState->addNoWiki( $marker, $output );
4304  } elseif ( $markerType === 'general' ) {
4305  $this->mStripState->addGeneral( $marker, $output );
4306  } else {
4307  throw new MWException( __METHOD__ . ': invalid marker type' );
4308  }
4309  return $marker;
4310  }
4311 
4319  public function incrementIncludeSize( $type, $size ) {
4320  if ( $this->mIncludeSizes[$type] + $size > $this->mOptions->getMaxIncludeSize() ) {
4321  return false;
4322  } else {
4323  $this->mIncludeSizes[$type] += $size;
4324  return true;
4325  }
4326  }
4327 
4334  $this->mExpensiveFunctionCount++;
4335  return $this->mExpensiveFunctionCount <= $this->mOptions->getExpensiveParserFunctionLimit();
4336  }
4337 
4346  public function doDoubleUnderscore( $text ) {
4347 
4348  # The position of __TOC__ needs to be recorded
4349  $mw = MagicWord::get( 'toc' );
4350  if ( $mw->match( $text ) ) {
4351  $this->mShowToc = true;
4352  $this->mForceTocPosition = true;
4353 
4354  # Set a placeholder. At the end we'll fill it in with the TOC.
4355  $text = $mw->replace( '<!--MWTOC-->', $text, 1 );
4356 
4357  # Only keep the first one.
4358  $text = $mw->replace( '', $text );
4359  }
4360 
4361  # Now match and remove the rest of them
4363  $this->mDoubleUnderscores = $mwa->matchAndRemove( $text );
4364 
4365  if ( isset( $this->mDoubleUnderscores['nogallery'] ) ) {
4366  $this->mOutput->mNoGallery = true;
4367  }
4368  if ( isset( $this->mDoubleUnderscores['notoc'] ) && !$this->mForceTocPosition ) {
4369  $this->mShowToc = false;
4370  }
4371  if ( isset( $this->mDoubleUnderscores['hiddencat'] )
4372  && $this->mTitle->getNamespace() == NS_CATEGORY
4373  ) {
4374  $this->addTrackingCategory( 'hidden-category-category' );
4375  }
4376  # (bug 8068) Allow control over whether robots index a page.
4377  # @todo FIXME: Bug 14899: __INDEX__ always overrides __NOINDEX__ here! This
4378  # is not desirable, the last one on the page should win.
4379  if ( isset( $this->mDoubleUnderscores['noindex'] ) && $this->mTitle->canUseNoindex() ) {
4380  $this->mOutput->setIndexPolicy( 'noindex' );
4381  $this->addTrackingCategory( 'noindex-category' );
4382  }
4383  if ( isset( $this->mDoubleUnderscores['index'] ) && $this->mTitle->canUseNoindex() ) {
4384  $this->mOutput->setIndexPolicy( 'index' );
4385  $this->addTrackingCategory( 'index-category' );
4386  }
4387 
4388  # Cache all double underscores in the database
4389  foreach ( $this->mDoubleUnderscores as $key => $val ) {
4390  $this->mOutput->setProperty( $key, '' );
4391  }
4392 
4393  return $text;
4394  }
4395 
4401  public function addTrackingCategory( $msg ) {
4402  return $this->mOutput->addTrackingCategory( $msg, $this->mTitle );
4403  }
4404 
4421  public function formatHeadings( $text, $origText, $isMain = true ) {
4422  global $wgMaxTocLevel, $wgExperimentalHtmlIds;
4423 
4424  # Inhibit editsection links if requested in the page
4425  if ( isset( $this->mDoubleUnderscores['noeditsection'] ) ) {
4426  $maybeShowEditLink = $showEditLink = false;
4427  } else {
4428  $maybeShowEditLink = true; /* Actual presence will depend on ParserOptions option */
4429  $showEditLink = $this->mOptions->getEditSection();
4430  }
4431  if ( $showEditLink ) {
4432  $this->mOutput->setEditSectionTokens( true );
4433  }
4434 
4435  # Get all headlines for numbering them and adding funky stuff like [edit]
4436  # links - this is for later, but we need the number of headlines right now
4437  $matches = [];
4438  $numMatches = preg_match_all(
4439  '/<H(?P<level>[1-6])(?P<attrib>.*?>)\s*(?P<header>[\s\S]*?)\s*<\/H[1-6] *>/i',
4440  $text,
4441  $matches
4442  );
4443 
4444  # if there are fewer than 4 headlines in the article, do not show TOC
4445  # unless it's been explicitly enabled.
4446  $enoughToc = $this->mShowToc &&
4447  ( ( $numMatches >= 4 ) || $this->mForceTocPosition );
4448 
4449  # Allow user to stipulate that a page should have a "new section"
4450  # link added via __NEWSECTIONLINK__
4451  if ( isset( $this->mDoubleUnderscores['newsectionlink'] ) ) {
4452  $this->mOutput->setNewSection( true );
4453  }
4454 
4455  # Allow user to remove the "new section"
4456  # link via __NONEWSECTIONLINK__
4457  if ( isset( $this->mDoubleUnderscores['nonewsectionlink'] ) ) {
4458  $this->mOutput->hideNewSection( true );
4459  }
4460 
4461  # if the string __FORCETOC__ (not case-sensitive) occurs in the HTML,
4462  # override above conditions and always show TOC above first header
4463  if ( isset( $this->mDoubleUnderscores['forcetoc'] ) ) {
4464  $this->mShowToc = true;
4465  $enoughToc = true;
4466  }
4467 
4468  # headline counter
4469  $headlineCount = 0;
4470  $numVisible = 0;
4471 
4472  # Ugh .. the TOC should have neat indentation levels which can be
4473  # passed to the skin functions. These are determined here
4474  $toc = '';
4475  $full = '';
4476  $head = [];
4477  $sublevelCount = [];
4478  $levelCount = [];
4479  $level = 0;
4480  $prevlevel = 0;
4481  $toclevel = 0;
4482  $prevtoclevel = 0;
4483  $markerRegex = self::MARKER_PREFIX . "-h-(\d+)-" . self::MARKER_SUFFIX;
4484  $baseTitleText = $this->mTitle->getPrefixedDBkey();
4485  $oldType = $this->mOutputType;
4486  $this->setOutputType( self::OT_WIKI );
4487  $frame = $this->getPreprocessor()->newFrame();
4488  $root = $this->preprocessToDom( $origText );
4489  $node = $root->getFirstChild();
4490  $byteOffset = 0;
4491  $tocraw = [];
4492  $refers = [];
4493 
4494  $headlines = $numMatches !== false ? $matches[3] : [];
4495 
4496  foreach ( $headlines as $headline ) {
4497  $isTemplate = false;
4498  $titleText = false;
4499  $sectionIndex = false;
4500  $numbering = '';
4501  $markerMatches = [];
4502  if ( preg_match( "/^$markerRegex/", $headline, $markerMatches ) ) {
4503  $serial = $markerMatches[1];
4504  list( $titleText, $sectionIndex ) = $this->mHeadings[$serial];
4505  $isTemplate = ( $titleText != $baseTitleText );
4506  $headline = preg_replace( "/^$markerRegex\\s*/", "", $headline );
4507  }
4508 
4509  if ( $toclevel ) {
4510  $prevlevel = $level;
4511  }
4512  $level = $matches[1][$headlineCount];
4513 
4514  if ( $level > $prevlevel ) {
4515  # Increase TOC level
4516  $toclevel++;
4517  $sublevelCount[$toclevel] = 0;
4518  if ( $toclevel < $wgMaxTocLevel ) {
4519  $prevtoclevel = $toclevel;
4520  $toc .= Linker::tocIndent();
4521  $numVisible++;
4522  }
4523  } elseif ( $level < $prevlevel && $toclevel > 1 ) {
4524  # Decrease TOC level, find level to jump to
4525 
4526  for ( $i = $toclevel; $i > 0; $i-- ) {
4527  if ( $levelCount[$i] == $level ) {
4528  # Found last matching level
4529  $toclevel = $i;
4530  break;
4531  } elseif ( $levelCount[$i] < $level ) {
4532  # Found first matching level below current level
4533  $toclevel = $i + 1;
4534  break;
4535  }
4536  }
4537  if ( $i == 0 ) {
4538  $toclevel = 1;
4539  }
4540  if ( $toclevel < $wgMaxTocLevel ) {
4541  if ( $prevtoclevel < $wgMaxTocLevel ) {
4542  # Unindent only if the previous toc level was shown :p
4543  $toc .= Linker::tocUnindent( $prevtoclevel - $toclevel );
4544  $prevtoclevel = $toclevel;
4545  } else {
4546  $toc .= Linker::tocLineEnd();
4547  }
4548  }
4549  } else {
4550  # No change in level, end TOC line
4551  if ( $toclevel < $wgMaxTocLevel ) {
4552  $toc .= Linker::tocLineEnd();
4553  }
4554  }
4555 
4556  $levelCount[$toclevel] = $level;
4557 
4558  # count number of headlines for each level
4559  $sublevelCount[$toclevel]++;
4560  $dot = 0;
4561  for ( $i = 1; $i <= $toclevel; $i++ ) {
4562  if ( !empty( $sublevelCount[$i] ) ) {
4563  if ( $dot ) {
4564  $numbering .= '.';
4565  }
4566  $numbering .= $this->getTargetLanguage()->formatNum( $sublevelCount[$i] );
4567  $dot = 1;
4568  }
4569  }
4570 
4571  # The safe header is a version of the header text safe to use for links
4572 
4573  # Remove link placeholders by the link text.
4574  # <!--LINK number-->
4575  # turns into
4576  # link text with suffix
4577  # Do this before unstrip since link text can contain strip markers
4578  $safeHeadline = $this->replaceLinkHoldersText( $headline );
4579 
4580  # Avoid insertion of weird stuff like <math> by expanding the relevant sections
4581  $safeHeadline = $this->mStripState->unstripBoth( $safeHeadline );
4582 
4583  # Strip out HTML (first regex removes any tag not allowed)
4584  # Allowed tags are:
4585  # * <sup> and <sub> (bug 8393)
4586  # * <i> (bug 26375)
4587  # * <b> (r105284)
4588  # * <bdi> (bug 72884)
4589  # * <span dir="rtl"> and <span dir="ltr"> (bug 35167)
4590  # We strip any parameter from accepted tags (second regex), except dir="rtl|ltr" from <span>,
4591  # to allow setting directionality in toc items.
4592  $tocline = preg_replace(
4593  [
4594  '#<(?!/?(span|sup|sub|bdi|i|b)(?: [^>]*)?>).*?>#',
4595  '#<(/?(?:span(?: dir="(?:rtl|ltr)")?|sup|sub|bdi|i|b))(?: .*?)?>#'
4596  ],
4597  [ '', '<$1>' ],
4598  $safeHeadline
4599  );
4600 
4601  # Strip '<span></span>', which is the result from the above if
4602  # <span id="foo"></span> is used to produce an additional anchor
4603  # for a section.
4604  $tocline = str_replace( '<span></span>', '', $tocline );
4605 
4606  $tocline = trim( $tocline );
4607 
4608  # For the anchor, strip out HTML-y stuff period
4609  $safeHeadline = preg_replace( '/<.*?>/', '', $safeHeadline );
4610  $safeHeadline = Sanitizer::normalizeSectionNameWhitespace( $safeHeadline );
4611 
4612  # Save headline for section edit hint before it's escaped
4613  $headlineHint = $safeHeadline;
4614 
4615  if ( $wgExperimentalHtmlIds ) {
4616  # For reverse compatibility, provide an id that's
4617  # HTML4-compatible, like we used to.
4618  # It may be worth noting, academically, that it's possible for
4619  # the legacy anchor to conflict with a non-legacy headline
4620  # anchor on the page. In this case likely the "correct" thing
4621  # would be to either drop the legacy anchors or make sure
4622  # they're numbered first. However, this would require people
4623  # to type in section names like "abc_.D7.93.D7.90.D7.A4"
4624  # manually, so let's not bother worrying about it.
4625  $legacyHeadline = Sanitizer::escapeId( $safeHeadline,
4626  [ 'noninitial', 'legacy' ] );
4627  $safeHeadline = Sanitizer::escapeId( $safeHeadline );
4628 
4629  if ( $legacyHeadline == $safeHeadline ) {
4630  # No reason to have both (in fact, we can't)
4631  $legacyHeadline = false;
4632  }
4633  } else {
4634  $legacyHeadline = false;
4635  $safeHeadline = Sanitizer::escapeId( $safeHeadline,
4636  'noninitial' );
4637  }
4638 
4639  # HTML names must be case-insensitively unique (bug 10721).
4640  # This does not apply to Unicode characters per
4641  # http://www.w3.org/TR/html5/infrastructure.html#case-sensitivity-and-string-comparison
4642  # @todo FIXME: We may be changing them depending on the current locale.
4643  $arrayKey = strtolower( $safeHeadline );
4644  if ( $legacyHeadline === false ) {
4645  $legacyArrayKey = false;
4646  } else {
4647  $legacyArrayKey = strtolower( $legacyHeadline );
4648  }
4649 
4650  # Create the anchor for linking from the TOC to the section
4651  $anchor = $safeHeadline;
4652  $legacyAnchor = $legacyHeadline;
4653  if ( isset( $refers[$arrayKey] ) ) {
4654  // @codingStandardsIgnoreStart
4655  for ( $i = 2; isset( $refers["${arrayKey}_$i"] ); ++$i );
4656  // @codingStandardsIgnoreEnd
4657  $anchor .= "_$i";
4658  $refers["${arrayKey}_$i"] = true;
4659  } else {
4660  $refers[$arrayKey] = true;
4661  }
4662  if ( $legacyHeadline !== false && isset( $refers[$legacyArrayKey] ) ) {
4663  // @codingStandardsIgnoreStart
4664  for ( $i = 2; isset( $refers["${legacyArrayKey}_$i"] ); ++$i );
4665  // @codingStandardsIgnoreEnd
4666  $legacyAnchor .= "_$i";
4667  $refers["${legacyArrayKey}_$i"] = true;
4668  } else {
4669  $refers[$legacyArrayKey] = true;
4670  }
4671 
4672  # Don't number the heading if it is the only one (looks silly)
4673  if ( count( $matches[3] ) > 1 && $this->mOptions->getNumberHeadings() ) {
4674  # the two are different if the line contains a link
4675  $headline = Html::element(
4676  'span',
4677  [ 'class' => 'mw-headline-number' ],
4678  $numbering
4679  ) . ' ' . $headline;
4680  }
4681 
4682  if ( $enoughToc && ( !isset( $wgMaxTocLevel ) || $toclevel < $wgMaxTocLevel ) ) {
4683  $toc .= Linker::tocLine( $anchor, $tocline,
4684  $numbering, $toclevel, ( $isTemplate ? false : $sectionIndex ) );
4685  }
4686 
4687  # Add the section to the section tree
4688  # Find the DOM node for this header
4689  $noOffset = ( $isTemplate || $sectionIndex === false );
4690  while ( $node && !$noOffset ) {
4691  if ( $node->getName() === 'h' ) {
4692  $bits = $node->splitHeading();
4693  if ( $bits['i'] == $sectionIndex ) {
4694  break;
4695  }
4696  }
4697  $byteOffset += mb_strlen( $this->mStripState->unstripBoth(
4698  $frame->expand( $node, PPFrame::RECOVER_ORIG ) ) );
4699  $node = $node->getNextSibling();
4700  }
4701  $tocraw[] = [
4702  'toclevel' => $toclevel,
4703  'level' => $level,
4704  'line' => $tocline,
4705  'number' => $numbering,
4706  'index' => ( $isTemplate ? 'T-' : '' ) . $sectionIndex,
4707  'fromtitle' => $titleText,
4708  'byteoffset' => ( $noOffset ? null : $byteOffset ),
4709  'anchor' => $anchor,
4710  ];
4711 
4712  # give headline the correct <h#> tag
4713  if ( $maybeShowEditLink && $sectionIndex !== false ) {
4714  // Output edit section links as markers with styles that can be customized by skins
4715  if ( $isTemplate ) {
4716  # Put a T flag in the section identifier, to indicate to extractSections()
4717  # that sections inside <includeonly> should be counted.
4718  $editsectionPage = $titleText;
4719  $editsectionSection = "T-$sectionIndex";
4720  $editsectionContent = null;
4721  } else {
4722  $editsectionPage = $this->mTitle->getPrefixedText();
4723  $editsectionSection = $sectionIndex;
4724  $editsectionContent = $headlineHint;
4725  }
4726  // We use a bit of pesudo-xml for editsection markers. The
4727  // language converter is run later on. Using a UNIQ style marker
4728  // leads to the converter screwing up the tokens when it
4729  // converts stuff. And trying to insert strip tags fails too. At
4730  // this point all real inputted tags have already been escaped,
4731  // so we don't have to worry about a user trying to input one of
4732  // these markers directly. We use a page and section attribute
4733  // to stop the language converter from converting these
4734  // important bits of data, but put the headline hint inside a
4735  // content block because the language converter is supposed to
4736  // be able to convert that piece of data.
4737  // Gets replaced with html in ParserOutput::getText
4738  $editlink = '<mw:editsection page="' . htmlspecialchars( $editsectionPage );
4739  $editlink .= '" section="' . htmlspecialchars( $editsectionSection ) . '"';
4740  if ( $editsectionContent !== null ) {
4741  $editlink .= '>' . $editsectionContent . '</mw:editsection>';
4742  } else {
4743  $editlink .= '/>';
4744  }
4745  } else {
4746  $editlink = '';
4747  }
4748  $head[$headlineCount] = Linker::makeHeadline( $level,
4749  $matches['attrib'][$headlineCount], $anchor, $headline,
4750  $editlink, $legacyAnchor );
4751 
4752  $headlineCount++;
4753  }
4754 
4755  $this->setOutputType( $oldType );
4756 
4757  # Never ever show TOC if no headers
4758  if ( $numVisible < 1 ) {
4759  $enoughToc = false;
4760  }
4761 
4762  if ( $enoughToc ) {
4763  if ( $prevtoclevel > 0 && $prevtoclevel < $wgMaxTocLevel ) {
4764  $toc .= Linker::tocUnindent( $prevtoclevel - 1 );
4765  }
4766  $toc = Linker::tocList( $toc, $this->mOptions->getUserLangObj() );
4767  $this->mOutput->setTOCHTML( $toc );
4768  $toc = self::TOC_START . $toc . self::TOC_END;
4769  $this->mOutput->addModules( 'mediawiki.toc' );
4770  }
4771 
4772  if ( $isMain ) {
4773  $this->mOutput->setSections( $tocraw );
4774  }
4775 
4776  # split up and insert constructed headlines
4777  $blocks = preg_split( '/<H[1-6].*?>[\s\S]*?<\/H[1-6]>/i', $text );
4778  $i = 0;
4779 
4780  // build an array of document sections
4781  $sections = [];
4782  foreach ( $blocks as $block ) {
4783  // $head is zero-based, sections aren't.
4784  if ( empty( $head[$i - 1] ) ) {
4785  $sections[$i] = $block;
4786  } else {
4787  $sections[$i] = $head[$i - 1] . $block;
4788  }
4789 
4800  Hooks::run( 'ParserSectionCreate', [ $this, $i, &$sections[$i], $showEditLink ] );
4801 
4802  $i++;
4803  }
4804 
4805  if ( $enoughToc && $isMain && !$this->mForceTocPosition ) {
4806  // append the TOC at the beginning
4807  // Top anchor now in skin
4808  $sections[0] = $sections[0] . $toc . "\n";
4809  }
4810 
4811  $full .= implode( '', $sections );
4812 
4813  if ( $this->mForceTocPosition ) {
4814  return str_replace( '<!--MWTOC-->', $toc, $full );
4815  } else {
4816  return $full;
4817  }
4818  }
4819 
4831  public function preSaveTransform( $text, Title $title, User $user,
4832  ParserOptions $options, $clearState = true
4833  ) {
4834  if ( $clearState ) {
4835  $magicScopeVariable = $this->lock();
4836  }
4837  $this->startParse( $title, $options, self::OT_WIKI, $clearState );
4838  $this->setUser( $user );
4839 
4840  $pairs = [
4841  "\r\n" => "\n",
4842  "\r" => "\n",
4843  ];
4844  $text = str_replace( array_keys( $pairs ), array_values( $pairs ), $text );
4845  if ( $options->getPreSaveTransform() ) {
4846  $text = $this->pstPass2( $text, $user );
4847  }
4848  $text = $this->mStripState->unstripBoth( $text );
4849 
4850  $this->setUser( null ); # Reset
4851 
4852  return $text;
4853  }
4854 
4863  private function pstPass2( $text, $user ) {
4865 
4866  # Note: This is the timestamp saved as hardcoded wikitext to
4867  # the database, we use $wgContLang here in order to give
4868  # everyone the same signature and use the default one rather
4869  # than the one selected in each user's preferences.
4870  # (see also bug 12815)
4871  $ts = $this->mOptions->getTimestamp();
4873  $ts = $timestamp->format( 'YmdHis' );
4874  $tzMsg = $timestamp->getTimezoneMessage()->inContentLanguage()->text();
4875 
4876  $d = $wgContLang->timeanddate( $ts, false, false ) . " ($tzMsg)";
4877 
4878  # Variable replacement
4879  # Because mOutputType is OT_WIKI, this will only process {{subst:xxx}} type tags
4880  $text = $this->replaceVariables( $text );
4881 
4882  # This works almost by chance, as the replaceVariables are done before the getUserSig(),
4883  # which may corrupt this parser instance via its wfMessage()->text() call-
4884 
4885  # Signatures
4886  $sigText = $this->getUserSig( $user );
4887  $text = strtr( $text, [
4888  '~~~~~' => $d,
4889  '~~~~' => "$sigText $d",
4890  '~~~' => $sigText
4891  ] );
4892 
4893  # Context links ("pipe tricks"): [[|name]] and [[name (context)|]]
4894  $tc = '[' . Title::legalChars() . ']';
4895  $nc = '[ _0-9A-Za-z\x80-\xff-]'; # Namespaces can use non-ascii!
4896 
4897  // [[ns:page (context)|]]
4898  $p1 = "/\[\[(:?$nc+:|:|)($tc+?)( ?\\($tc+\\))\\|]]/";
4899  // [[ns:page(context)|]] (double-width brackets, added in r40257)
4900  $p4 = "/\[\[(:?$nc+:|:|)($tc+?)( ?($tc+))\\|]]/";
4901  // [[ns:page (context), context|]] (using either single or double-width comma)
4902  $p3 = "/\[\[(:?$nc+:|:|)($tc+?)( ?\\($tc+\\)|)((?:, |,)$tc+|)\\|]]/";
4903  // [[|page]] (reverse pipe trick: add context from page title)
4904  $p2 = "/\[\[\\|($tc+)]]/";
4905 
4906  # try $p1 first, to turn "[[A, B (C)|]]" into "[[A, B (C)|A, B]]"
4907  $text = preg_replace( $p1, '[[\\1\\2\\3|\\2]]', $text );
4908  $text = preg_replace( $p4, '[[\\1\\2\\3|\\2]]', $text );
4909  $text = preg_replace( $p3, '[[\\1\\2\\3\\4|\\2]]', $text );
4910 
4911  $t = $this->mTitle->getText();
4912  $m = [];
4913  if ( preg_match( "/^($nc+:|)$tc+?( \\($tc+\\))$/", $t, $m ) ) {
4914  $text = preg_replace( $p2, "[[$m[1]\\1$m[2]|\\1]]", $text );
4915  } elseif ( preg_match( "/^($nc+:|)$tc+?(, $tc+|)$/", $t, $m ) && "$m[1]$m[2]" != '' ) {
4916  $text = preg_replace( $p2, "[[$m[1]\\1$m[2]|\\1]]", $text );
4917  } else {
4918  # if there's no context, don't bother duplicating the title
4919  $text = preg_replace( $p2, '[[\\1]]', $text );
4920  }
4921 
4922  # Trim trailing whitespace
4923  $text = rtrim( $text );
4924 
4925  return $text;
4926  }
4927 
4942  public function getUserSig( &$user, $nickname = false, $fancySig = null ) {
4943  global $wgMaxSigChars;
4944 
4945  $username = $user->getName();
4946 
4947  # If not given, retrieve from the user object.
4948  if ( $nickname === false ) {
4949  $nickname = $user->getOption( 'nickname' );
4950  }
4951 
4952  if ( is_null( $fancySig ) ) {
4953  $fancySig = $user->getBoolOption( 'fancysig' );
4954  }
4955 
4956  $nickname = $nickname == null ? $username : $nickname;
4957 
4958  if ( mb_strlen( $nickname ) > $wgMaxSigChars ) {
4959  $nickname = $username;
4960  wfDebug( __METHOD__ . ": $username has overlong signature.\n" );
4961  } elseif ( $fancySig !== false ) {
4962  # Sig. might contain markup; validate this
4963  if ( $this->validateSig( $nickname ) !== false ) {
4964  # Validated; clean up (if needed) and return it
4965  return $this->cleanSig( $nickname, true );
4966  } else {
4967  # Failed to validate; fall back to the default
4968  $nickname = $username;
4969  wfDebug( __METHOD__ . ": $username has bad XML tags in signature.\n" );
4970  }
4971  }
4972 
4973  # Make sure nickname doesnt get a sig in a sig
4974  $nickname = self::cleanSigInSig( $nickname );
4975 
4976  # If we're still here, make it a link to the user page
4977  $userText = wfEscapeWikiText( $username );
4978  $nickText = wfEscapeWikiText( $nickname );
4979  $msgName = $user->isAnon() ? 'signature-anon' : 'signature';
4980 
4981  return wfMessage( $msgName, $userText, $nickText )->inContentLanguage()
4982  ->title( $this->getTitle() )->text();
4983  }
4984 
4991  public function validateSig( $text ) {
4992  return Xml::isWellFormedXmlFragment( $text ) ? $text : false;
4993  }
4994 
5005  public function cleanSig( $text, $parsing = false ) {
5006  if ( !$parsing ) {
5007  global $wgTitle;
5008  $magicScopeVariable = $this->lock();
5009  $this->startParse( $wgTitle, new ParserOptions, self::OT_PREPROCESS, true );
5010  }
5011 
5012  # Option to disable this feature
5013  if ( !$this->mOptions->getCleanSignatures() ) {
5014  return $text;
5015  }
5016 
5017  # @todo FIXME: Regex doesn't respect extension tags or nowiki
5018  # => Move this logic to braceSubstitution()
5019  $substWord = MagicWord::get( 'subst' );
5020  $substRegex = '/\{\{(?!(?:' . $substWord->getBaseRegex() . '))/x' . $substWord->getRegexCase();
5021  $substText = '{{' . $substWord->getSynonym( 0 );
5022 
5023  $text = preg_replace( $substRegex, $substText, $text );
5024  $text = self::cleanSigInSig( $text );
5025  $dom = $this->preprocessToDom( $text );
5026  $frame = $this->getPreprocessor()->newFrame();
5027  $text = $frame->expand( $dom );
5028 
5029  if ( !$parsing ) {
5030  $text = $this->mStripState->unstripBoth( $text );
5031  }
5032 
5033  return $text;
5034  }
5035 
5042  public static function cleanSigInSig( $text ) {
5043  $text = preg_replace( '/~{3,5}/', '', $text );
5044  return $text;
5045  }
5046 
5057  $outputType, $clearState = true
5058  ) {
5059  $this->startParse( $title, $options, $outputType, $clearState );
5060  }
5061 
5068  private function startParse( Title $title = null, ParserOptions $options,
5069  $outputType, $clearState = true
5070  ) {
5071  $this->setTitle( $title );
5072  $this->mOptions = $options;
5073  $this->setOutputType( $outputType );
5074  if ( $clearState ) {
5075  $this->clearState();
5076  }
5077  }
5078 
5087  public function transformMsg( $text, $options, $title = null ) {
5088  static $executing = false;
5089 
5090  # Guard against infinite recursion
5091  if ( $executing ) {
5092  return $text;
5093  }
5094  $executing = true;
5095 
5096  if ( !$title ) {
5097  global $wgTitle;
5098  $title = $wgTitle;
5099  }
5100 
5101  $text = $this->preprocess( $text, $title, $options );
5102 
5103  $executing = false;
5104  return $text;
5105  }
5106 
5131  public function setHook( $tag, $callback ) {
5132  $tag = strtolower( $tag );
5133  if ( preg_match( '/[<>\r\n]/', $tag, $m ) ) {
5134  throw new MWException( "Invalid character {$m[0]} in setHook('$tag', ...) call" );
5135  }
5136  $oldVal = isset( $this->mTagHooks[$tag] ) ? $this->mTagHooks[$tag] : null;
5137  $this->mTagHooks[$tag] = $callback;
5138  if ( !in_array( $tag, $this->mStripList ) ) {
5139  $this->mStripList[] = $tag;
5140  }
5141 
5142  return $oldVal;
5143  }
5144 
5162  public function setTransparentTagHook( $tag, $callback ) {
5163  $tag = strtolower( $tag );
5164  if ( preg_match( '/[<>\r\n]/', $tag, $m ) ) {
5165  throw new MWException( "Invalid character {$m[0]} in setTransparentHook('$tag', ...) call" );
5166  }
5167  $oldVal = isset( $this->mTransparentTagHooks[$tag] ) ? $this->mTransparentTagHooks[$tag] : null;
5168  $this->mTransparentTagHooks[$tag] = $callback;
5169 
5170  return $oldVal;
5171  }
5172 
5176  public function clearTagHooks() {
5177  $this->mTagHooks = [];
5178  $this->mFunctionTagHooks = [];
5179  $this->mStripList = $this->mDefaultStripList;
5180  }
5181 
5225  public function setFunctionHook( $id, $callback, $flags = 0 ) {
5227 
5228  $oldVal = isset( $this->mFunctionHooks[$id] ) ? $this->mFunctionHooks[$id][0] : null;
5229  $this->mFunctionHooks[$id] = [ $callback, $flags ];
5230 
5231  # Add to function cache
5232  $mw = MagicWord::get( $id );
5233  if ( !$mw ) {
5234  throw new MWException( __METHOD__ . '() expecting a magic word identifier.' );
5235  }
5236 
5237  $synonyms = $mw->getSynonyms();
5238  $sensitive = intval( $mw->isCaseSensitive() );
5239 
5240  foreach ( $synonyms as $syn ) {
5241  # Case
5242  if ( !$sensitive ) {
5243  $syn = $wgContLang->lc( $syn );
5244  }
5245  # Add leading hash
5246  if ( !( $flags & self::SFH_NO_HASH ) ) {
5247  $syn = '#' . $syn;
5248  }
5249  # Remove trailing colon
5250  if ( substr( $syn, -1, 1 ) === ':' ) {
5251  $syn = substr( $syn, 0, -1 );
5252  }
5253  $this->mFunctionSynonyms[$sensitive][$syn] = $id;
5254  }
5255  return $oldVal;
5256  }
5257 
5263  public function getFunctionHooks() {
5264  return array_keys( $this->mFunctionHooks );
5265  }
5266 
5277  public function setFunctionTagHook( $tag, $callback, $flags ) {
5278  $tag = strtolower( $tag );
5279  if ( preg_match( '/[<>\r\n]/', $tag, $m ) ) {
5280  throw new MWException( "Invalid character {$m[0]} in setFunctionTagHook('$tag', ...) call" );
5281  }
5282  $old = isset( $this->mFunctionTagHooks[$tag] ) ?
5283  $this->mFunctionTagHooks[$tag] : null;
5284  $this->mFunctionTagHooks[$tag] = [ $callback, $flags ];
5285 
5286  if ( !in_array( $tag, $this->mStripList ) ) {
5287  $this->mStripList[] = $tag;
5288  }
5289 
5290  return $old;
5291  }
5292 
5300  public function replaceLinkHolders( &$text, $options = 0 ) {
5301  $this->mLinkHolders->replace( $text );
5302  }
5303 
5311  public function replaceLinkHoldersText( $text ) {
5312  return $this->mLinkHolders->replaceText( $text );
5313  }
5314 
5328  public function renderImageGallery( $text, $params ) {
5329 
5330  $mode = false;
5331  if ( isset( $params['mode'] ) ) {
5332  $mode = $params['mode'];
5333  }
5334 
5335  try {
5336  $ig = ImageGalleryBase::factory( $mode );
5337  } catch ( Exception $e ) {
5338  // If invalid type set, fallback to default.
5339  $ig = ImageGalleryBase::factory( false );
5340  }
5341 
5342  $ig->setContextTitle( $this->mTitle );
5343  $ig->setShowBytes( false );
5344  $ig->setShowFilename( false );
5345  $ig->setParser( $this );
5346  $ig->setHideBadImages();
5347  $ig->setAttributes( Sanitizer::validateTagAttributes( $params, 'table' ) );
5348 
5349  if ( isset( $params['showfilename'] ) ) {
5350  $ig->setShowFilename( true );
5351  } else {
5352  $ig->setShowFilename( false );
5353  }
5354  if ( isset( $params['caption'] ) ) {
5355  $caption = $params['caption'];
5356  $caption = htmlspecialchars( $caption );
5357  $caption = $this->replaceInternalLinks( $caption );
5358  $ig->setCaptionHtml( $caption );
5359  }
5360  if ( isset( $params['perrow'] ) ) {
5361  $ig->setPerRow( $params['perrow'] );
5362  }
5363  if ( isset( $params['widths'] ) ) {
5364  $ig->setWidths( $params['widths'] );
5365  }
5366  if ( isset( $params['heights'] ) ) {
5367  $ig->setHeights( $params['heights'] );
5368  }
5369  $ig->setAdditionalOptions( $params );
5370 
5371  Hooks::run( 'BeforeParserrenderImageGallery', [ &$this, &$ig ] );
5372 
5373  $lines = StringUtils::explode( "\n", $text );
5374  foreach ( $lines as $line ) {
5375  # match lines like these:
5376  # Image:someimage.jpg|This is some image
5377  $matches = [];
5378  preg_match( "/^([^|]+)(\\|(.*))?$/", $line, $matches );
5379  # Skip empty lines
5380  if ( count( $matches ) == 0 ) {
5381  continue;
5382  }
5383 
5384  if ( strpos( $matches[0], '%' ) !== false ) {
5385  $matches[1] = rawurldecode( $matches[1] );
5386  }
5388  if ( is_null( $title ) ) {
5389  # Bogus title. Ignore these so we don't bomb out later.
5390  continue;
5391  }
5392 
5393  # We need to get what handler the file uses, to figure out parameters.
5394  # Note, a hook can overide the file name, and chose an entirely different
5395  # file (which potentially could be of a different type and have different handler).
5396  $options = [];
5397  $descQuery = false;
5398  Hooks::run( 'BeforeParserFetchFileAndTitle',
5399  [ $this, $title, &$options, &$descQuery ] );
5400  # Don't register it now, as ImageGallery does that later.
5401  $file = $this->fetchFileNoRegister( $title, $options );
5402  $handler = $file ? $file->getHandler() : false;
5403 
5404  $paramMap = [
5405  'img_alt' => 'gallery-internal-alt',
5406  'img_link' => 'gallery-internal-link',
5407  ];
5408  if ( $handler ) {
5409  $paramMap = $paramMap + $handler->getParamMap();
5410  // We don't want people to specify per-image widths.
5411  // Additionally the width parameter would need special casing anyhow.
5412  unset( $paramMap['img_width'] );
5413  }
5414 
5415  $mwArray = new MagicWordArray( array_keys( $paramMap ) );
5416 
5417  $label = '';
5418  $alt = '';
5419  $link = '';
5420  $handlerOptions = [];
5421  if ( isset( $matches[3] ) ) {
5422  // look for an |alt= definition while trying not to break existing
5423  // captions with multiple pipes (|) in it, until a more sensible grammar
5424  // is defined for images in galleries
5425 
5426  // FIXME: Doing recursiveTagParse at this stage, and the trim before
5427  // splitting on '|' is a bit odd, and different from makeImage.
5428  $matches[3] = $this->recursiveTagParse( trim( $matches[3] ) );
5429  $parameterMatches = StringUtils::explode( '|', $matches[3] );
5430 
5431  foreach ( $parameterMatches as $parameterMatch ) {
5432  list( $magicName, $match ) = $mwArray->matchVariableStartToEnd( $parameterMatch );
5433  if ( $magicName ) {
5434  $paramName = $paramMap[$magicName];
5435 
5436  switch ( $paramName ) {
5437  case 'gallery-internal-alt':
5438  $alt = $this->stripAltText( $match, false );
5439  break;
5440  case 'gallery-internal-link':
5441  $linkValue = strip_tags( $this->replaceLinkHoldersText( $match ) );
5442  $chars = self::EXT_LINK_URL_CLASS;
5443  $addr = self::EXT_LINK_ADDR;
5444  $prots = $this->mUrlProtocols;
5445  // check to see if link matches an absolute url, if not then it must be a wiki link.
5446  if ( preg_match( "/^($prots)$addr$chars*$/u", $linkValue ) ) {
5447  $link = $linkValue;
5448  } else {
5449  $localLinkTitle = Title::newFromText( $linkValue );
5450  if ( $localLinkTitle !== null ) {
5451  $link = $localLinkTitle->getLinkURL();
5452  }
5453  }
5454  break;
5455  default:
5456  // Must be a handler specific parameter.
5457  if ( $handler->validateParam( $paramName, $match ) ) {
5458  $handlerOptions[$paramName] = $match;
5459  } else {
5460  // Guess not, consider it as caption.
5461  wfDebug( "$parameterMatch failed parameter validation\n" );
5462  $label = '|' . $parameterMatch;
5463  }
5464  }
5465 
5466  } else {
5467  // Last pipe wins.
5468  $label = '|' . $parameterMatch;
5469  }
5470  }
5471  // Remove the pipe.
5472  $label = substr( $label, 1 );
5473  }
5474 
5475  $ig->add( $title, $label, $alt, $link, $handlerOptions );
5476  }
5477  $html = $ig->toHTML();
5478  Hooks::run( 'AfterParserFetchFileAndTitle', [ $this, $ig, &$html ] );
5479  return $html;
5480  }
5481 
5486  public function getImageParams( $handler ) {
5487  if ( $handler ) {
5488  $handlerClass = get_class( $handler );
5489  } else {
5490  $handlerClass = '';
5491  }
5492  if ( !isset( $this->mImageParams[$handlerClass] ) ) {
5493  # Initialise static lists
5494  static $internalParamNames = [
5495  'horizAlign' => [ 'left', 'right', 'center', 'none' ],
5496  'vertAlign' => [ 'baseline', 'sub', 'super', 'top', 'text-top', 'middle',
5497  'bottom', 'text-bottom' ],
5498  'frame' => [ 'thumbnail', 'manualthumb', 'framed', 'frameless',
5499  'upright', 'border', 'link', 'alt', 'class' ],
5500  ];
5501  static $internalParamMap;
5502  if ( !$internalParamMap ) {
5503  $internalParamMap = [];
5504  foreach ( $internalParamNames as $type => $names ) {
5505  foreach ( $names as $name ) {
5506  $magicName = str_replace( '-', '_', "img_$name" );
5507  $internalParamMap[$magicName] = [ $type, $name ];
5508  }
5509  }
5510  }
5511 
5512  # Add handler params
5513  $paramMap = $internalParamMap;
5514  if ( $handler ) {
5515  $handlerParamMap = $handler->getParamMap();
5516  foreach ( $handlerParamMap as $magic => $paramName ) {
5517  $paramMap[$magic] = [ 'handler', $paramName ];
5518  }
5519  }
5520  $this->mImageParams[$handlerClass] = $paramMap;
5521  $this->mImageParamsMagicArray[$handlerClass] = new MagicWordArray( array_keys( $paramMap ) );
5522  }
5523  return [ $this->mImageParams[$handlerClass], $this->mImageParamsMagicArray[$handlerClass] ];
5524  }
5525 
5534  public function makeImage( $title, $options, $holders = false ) {
5535  # Check if the options text is of the form "options|alt text"
5536  # Options are:
5537  # * thumbnail make a thumbnail with enlarge-icon and caption, alignment depends on lang
5538  # * left no resizing, just left align. label is used for alt= only
5539  # * right same, but right aligned
5540  # * none same, but not aligned
5541  # * ___px scale to ___ pixels width, no aligning. e.g. use in taxobox
5542  # * center center the image
5543  # * frame Keep original image size, no magnify-button.
5544  # * framed Same as "frame"
5545  # * frameless like 'thumb' but without a frame. Keeps user preferences for width
5546  # * upright reduce width for upright images, rounded to full __0 px
5547  # * border draw a 1px border around the image
5548  # * alt Text for HTML alt attribute (defaults to empty)
5549  # * class Set a class for img node
5550  # * link Set the target of the image link. Can be external, interwiki, or local
5551  # vertical-align values (no % or length right now):
5552  # * baseline
5553  # * sub
5554  # * super
5555  # * top
5556  # * text-top
5557  # * middle
5558  # * bottom
5559  # * text-bottom
5560 
5561  $parts = StringUtils::explode( "|", $options );
5562 
5563  # Give extensions a chance to select the file revision for us
5564  $options = [];
5565  $descQuery = false;
5566  Hooks::run( 'BeforeParserFetchFileAndTitle',
5567  [ $this, $title, &$options, &$descQuery ] );
5568  # Fetch and register the file (file title may be different via hooks)
5569  list( $file, $title ) = $this->fetchFileAndTitle( $title, $options );
5570 
5571  # Get parameter map
5572  $handler = $file ? $file->getHandler() : false;
5573 
5574  list( $paramMap, $mwArray ) = $this->getImageParams( $handler );
5575 
5576  if ( !$file ) {
5577  $this->addTrackingCategory( 'broken-file-category' );
5578  }
5579 
5580  # Process the input parameters
5581  $caption = '';
5582  $params = [ 'frame' => [], 'handler' => [],
5583  'horizAlign' => [], 'vertAlign' => [] ];
5584  $seenformat = false;
5585  foreach ( $parts as $part ) {
5586  $part = trim( $part );
5587  list( $magicName, $value ) = $mwArray->matchVariableStartToEnd( $part );
5588  $validated = false;
5589  if ( isset( $paramMap[$magicName] ) ) {
5590  list( $type, $paramName ) = $paramMap[$magicName];
5591 
5592  # Special case; width and height come in one variable together
5593  if ( $type === 'handler' && $paramName === 'width' ) {
5594  $parsedWidthParam = $this->parseWidthParam( $value );
5595  if ( isset( $parsedWidthParam['width'] ) ) {
5596  $width = $parsedWidthParam['width'];
5597  if ( $handler->validateParam( 'width', $width ) ) {
5598  $params[$type]['width'] = $width;
5599  $validated = true;
5600  }
5601  }
5602  if ( isset( $parsedWidthParam['height'] ) ) {
5603  $height = $parsedWidthParam['height'];
5604  if ( $handler->validateParam( 'height', $height ) ) {
5605  $params[$type]['height'] = $height;
5606  $validated = true;
5607  }
5608  }
5609  # else no validation -- bug 13436
5610  } else {
5611  if ( $type === 'handler' ) {
5612  # Validate handler parameter
5613  $validated = $handler->validateParam( $paramName, $value );
5614  } else {
5615  # Validate internal parameters
5616  switch ( $paramName ) {
5617  case 'manualthumb':
5618  case 'alt':
5619  case 'class':
5620  # @todo FIXME: Possibly check validity here for
5621  # manualthumb? downstream behavior seems odd with
5622  # missing manual thumbs.
5623  $validated = true;
5624  $value = $this->stripAltText( $value, $holders );
5625  break;
5626  case 'link':
5627  $chars = self::EXT_LINK_URL_CLASS;
5628  $addr = self::EXT_LINK_ADDR;
5629  $prots = $this->mUrlProtocols;
5630  if ( $value === '' ) {
5631  $paramName = 'no-link';
5632  $value = true;
5633  $validated = true;
5634  } elseif ( preg_match( "/^((?i)$prots)/", $value ) ) {
5635  if ( preg_match( "/^((?i)$prots)$addr$chars*$/u", $value, $m ) ) {
5636  $paramName = 'link-url';
5637  $this->mOutput->addExternalLink( $value );
5638  if ( $this->mOptions->getExternalLinkTarget() ) {
5639  $params[$type]['link-target'] = $this->mOptions->getExternalLinkTarget();
5640  }
5641  $validated = true;
5642  }
5643  } else {
5644  $linkTitle = Title::newFromText( $value );
5645  if ( $linkTitle ) {
5646  $paramName = 'link-title';
5647  $value = $linkTitle;
5648  $this->mOutput->addLink( $linkTitle );
5649  $validated = true;
5650  }
5651  }
5652  break;
5653  case 'frameless':
5654  case 'framed':
5655  case 'thumbnail':
5656  // use first appearing option, discard others.
5657  $validated = ! $seenformat;
5658  $seenformat = true;
5659  break;
5660  default:
5661  # Most other things appear to be empty or numeric...
5662  $validated = ( $value === false || is_numeric( trim( $value ) ) );
5663  }
5664  }
5665 
5666  if ( $validated ) {
5667  $params[$type][$paramName] = $value;
5668  }
5669  }
5670  }
5671  if ( !$validated ) {
5672  $caption = $part;
5673  }
5674  }
5675 
5676  # Process alignment parameters
5677  if ( $params['horizAlign'] ) {
5678  $params['frame']['align'] = key( $params['horizAlign'] );
5679  }
5680  if ( $params['vertAlign'] ) {
5681  $params['frame']['valign'] = key( $params['vertAlign'] );
5682  }
5683 
5684  $params['frame']['caption'] = $caption;
5685 
5686  # Will the image be presented in a frame, with the caption below?
5687  $imageIsFramed = isset( $params['frame']['frame'] )
5688  || isset( $params['frame']['framed'] )
5689  || isset( $params['frame']['thumbnail'] )
5690  || isset( $params['frame']['manualthumb'] );
5691 
5692  # In the old days, [[Image:Foo|text...]] would set alt text. Later it
5693  # came to also set the caption, ordinary text after the image -- which
5694  # makes no sense, because that just repeats the text multiple times in
5695  # screen readers. It *also* came to set the title attribute.
5696  # Now that we have an alt attribute, we should not set the alt text to
5697  # equal the caption: that's worse than useless, it just repeats the
5698  # text. This is the framed/thumbnail case. If there's no caption, we
5699  # use the unnamed parameter for alt text as well, just for the time be-
5700  # ing, if the unnamed param is set and the alt param is not.
5701  # For the future, we need to figure out if we want to tweak this more,
5702  # e.g., introducing a title= parameter for the title; ignoring the un-
5703  # named parameter entirely for images without a caption; adding an ex-
5704  # plicit caption= parameter and preserving the old magic unnamed para-
5705  # meter for BC; ...
5706  if ( $imageIsFramed ) { # Framed image
5707  if ( $caption === '' && !isset( $params['frame']['alt'] ) ) {
5708  # No caption or alt text, add the filename as the alt text so
5709  # that screen readers at least get some description of the image
5710  $params['frame']['alt'] = $title->getText();
5711  }
5712  # Do not set $params['frame']['title'] because tooltips don't make sense
5713  # for framed images
5714  } else { # Inline image
5715  if ( !isset( $params['frame']['alt'] ) ) {
5716  # No alt text, use the "caption" for the alt text
5717  if ( $caption !== '' ) {
5718  $params['frame']['alt'] = $this->stripAltText( $caption, $holders );
5719  } else {
5720  # No caption, fall back to using the filename for the
5721  # alt text
5722  $params['frame']['alt'] = $title->getText();
5723  }
5724  }
5725  # Use the "caption" for the tooltip text
5726  $params['frame']['title'] = $this->stripAltText( $caption, $holders );
5727  }
5728 
5729  Hooks::run( 'ParserMakeImageParams', [ $title, $file, &$params, $this ] );
5730 
5731  # Linker does the rest
5732  $time = isset( $options['time'] ) ? $options['time'] : false;
5733  $ret = Linker::makeImageLink( $this, $title, $file, $params['frame'], $params['handler'],
5734  $time, $descQuery, $this->mOptions->getThumbSize() );
5735 
5736  # Give the handler a chance to modify the parser object
5737  if ( $handler ) {
5738  $handler->parserTransformHook( $this, $file );
5739  }
5740 
5741  return $ret;
5742  }
5743 
5749  protected function stripAltText( $caption, $holders ) {
5750  # Strip bad stuff out of the title (tooltip). We can't just use
5751  # replaceLinkHoldersText() here, because if this function is called
5752  # from replaceInternalLinks2(), mLinkHolders won't be up-to-date.
5753  if ( $holders ) {
5754  $tooltip = $holders->replaceText( $caption );
5755  } else {
5756  $tooltip = $this->replaceLinkHoldersText( $caption );
5757  }
5758 
5759  # make sure there are no placeholders in thumbnail attributes
5760  # that are later expanded to html- so expand them now and
5761  # remove the tags
5762  $tooltip = $this->mStripState->unstripBoth( $tooltip );
5763  $tooltip = Sanitizer::stripAllTags( $tooltip );
5764 
5765  return $tooltip;
5766  }
5767 
5772  public function disableCache() {
5773  wfDebug( "Parser output marked as uncacheable.\n" );
5774  if ( !$this->mOutput ) {
5775  throw new MWException( __METHOD__ .
5776  " can only be called when actually parsing something" );
5777  }
5778  $this->mOutput->updateCacheExpiry( 0 ); // new style, for consistency
5779  }
5780 
5789  public function attributeStripCallback( &$text, $frame = false ) {
5790  $text = $this->replaceVariables( $text, $frame );
5791  $text = $this->mStripState->unstripBoth( $text );
5792  return $text;
5793  }
5794 
5800  public function getTags() {
5801  return array_merge(
5802  array_keys( $this->mTransparentTagHooks ),
5803  array_keys( $this->mTagHooks ),
5804  array_keys( $this->mFunctionTagHooks )
5805  );
5806  }
5807 
5818  public function replaceTransparentTags( $text ) {
5819  $matches = [];
5820  $elements = array_keys( $this->mTransparentTagHooks );
5821  $text = self::extractTagsAndParams( $elements, $text, $matches );
5822  $replacements = [];
5823 
5824  foreach ( $matches as $marker => $data ) {
5825  list( $element, $content, $params, $tag ) = $data;
5826  $tagName = strtolower( $element );
5827  if ( isset( $this->mTransparentTagHooks[$tagName] ) ) {
5828  $output = call_user_func_array(
5829  $this->mTransparentTagHooks[$tagName],
5830  [ $content, $params, $this ]
5831  );
5832  } else {
5833  $output = $tag;
5834  }
5835  $replacements[$marker] = $output;
5836  }
5837  return strtr( $text, $replacements );
5838  }
5839 
5869  private function extractSections( $text, $sectionId, $mode, $newText = '' ) {
5870  global $wgTitle; # not generally used but removes an ugly failure mode
5871 
5872  $magicScopeVariable = $this->lock();
5873  $this->startParse( $wgTitle, new ParserOptions, self::OT_PLAIN, true );
5874  $outText = '';
5875  $frame = $this->getPreprocessor()->newFrame();
5876 
5877  # Process section extraction flags
5878  $flags = 0;
5879  $sectionParts = explode( '-', $sectionId );
5880  $sectionIndex = array_pop( $sectionParts );
5881  foreach ( $sectionParts as $part ) {
5882  if ( $part === 'T' ) {
5883  $flags |= self::PTD_FOR_INCLUSION;
5884  }
5885  }
5886 
5887  # Check for empty input
5888  if ( strval( $text ) === '' ) {
5889  # Only sections 0 and T-0 exist in an empty document
5890  if ( $sectionIndex == 0 ) {
5891  if ( $mode === 'get' ) {
5892  return '';
5893  } else {
5894  return $newText;
5895  }
5896  } else {
5897  if ( $mode === 'get' ) {
5898  return $newText;
5899  } else {
5900  return $text;
5901  }
5902  }
5903  }
5904 
5905  # Preprocess the text
5906  $root = $this->preprocessToDom( $text, $flags );
5907 
5908  # <h> nodes indicate section breaks
5909  # They can only occur at the top level, so we can find them by iterating the root's children
5910  $node = $root->getFirstChild();
5911 
5912  # Find the target section
5913  if ( $sectionIndex == 0 ) {
5914  # Section zero doesn't nest, level=big
5915  $targetLevel = 1000;
5916  } else {
5917  while ( $node ) {
5918  if ( $node->getName() === 'h' ) {
5919  $bits = $node->splitHeading();
5920  if ( $bits['i'] == $sectionIndex ) {
5921  $targetLevel = $bits['level'];
5922  break;
5923  }
5924  }
5925  if ( $mode === 'replace' ) {
5926  $outText .= $frame->expand( $node, PPFrame::RECOVER_ORIG );
5927  }
5928  $node = $node->getNextSibling();
5929  }
5930  }
5931 
5932  if ( !$node ) {
5933  # Not found
5934  if ( $mode === 'get' ) {
5935  return $newText;
5936  } else {
5937  return $text;
5938  }
5939  }
5940 
5941  # Find the end of the section, including nested sections
5942  do {
5943  if ( $node->getName() === 'h' ) {
5944  $bits = $node->splitHeading();
5945  $curLevel = $bits['level'];
5946  if ( $bits['i'] != $sectionIndex && $curLevel <= $targetLevel ) {
5947  break;
5948  }
5949  }
5950  if ( $mode === 'get' ) {
5951  $outText .= $frame->expand( $node, PPFrame::RECOVER_ORIG );
5952  }
5953  $node = $node->getNextSibling();
5954  } while ( $node );
5955 
5956  # Write out the remainder (in replace mode only)
5957  if ( $mode === 'replace' ) {
5958  # Output the replacement text
5959  # Add two newlines on -- trailing whitespace in $newText is conventionally
5960  # stripped by the editor, so we need both newlines to restore the paragraph gap
5961  # Only add trailing whitespace if there is newText
5962  if ( $newText != "" ) {
5963  $outText .= $newText . "\n\n";
5964  }
5965 
5966  while ( $node ) {
5967  $outText .= $frame->expand( $node, PPFrame::RECOVER_ORIG );
5968  $node = $node->getNextSibling();
5969  }
5970  }
5971 
5972  if ( is_string( $outText ) ) {
5973  # Re-insert stripped tags
5974  $outText = rtrim( $this->mStripState->unstripBoth( $outText ) );
5975  }
5976 
5977  return $outText;
5978  }
5979 
5994  public function getSection( $text, $sectionId, $defaultText = '' ) {
5995  return $this->extractSections( $text, $sectionId, 'get', $defaultText );
5996  }
5997 
6010  public function replaceSection( $oldText, $sectionId, $newText ) {
6011  return $this->extractSections( $oldText, $sectionId, 'replace', $newText );
6012  }
6013 
6019  public function getRevisionId() {
6020  return $this->mRevisionId;
6021  }
6022 
6029  public function getRevisionObject() {
6030  if ( !is_null( $this->mRevisionObject ) ) {
6031  return $this->mRevisionObject;
6032  }
6033  if ( is_null( $this->mRevisionId ) ) {
6034  return null;
6035  }
6036 
6037  $rev = call_user_func(
6038  $this->mOptions->getCurrentRevisionCallback(), $this->getTitle(), $this
6039  );
6040 
6041  # If the parse is for a new revision, then the callback should have
6042  # already been set to force the object and should match mRevisionId.
6043  # If not, try to fetch by mRevisionId for sanity.
6044  if ( $rev && $rev->getId() != $this->mRevisionId ) {
6045  $rev = Revision::newFromId( $this->mRevisionId );
6046  }
6047 
6048  $this->mRevisionObject = $rev;
6049 
6050  return $this->mRevisionObject;
6051  }
6052 
6058  public function getRevisionTimestamp() {
6059  if ( is_null( $this->mRevisionTimestamp ) ) {
6061 
6062  $revObject = $this->getRevisionObject();
6063  $timestamp = $revObject ? $revObject->getTimestamp() : wfTimestampNow();
6064 
6065  # The cryptic '' timezone parameter tells to use the site-default
6066  # timezone offset instead of the user settings.
6067  # Since this value will be saved into the parser cache, served
6068  # to other users, and potentially even used inside links and such,
6069  # it needs to be consistent for all visitors.
6070  $this->mRevisionTimestamp = $wgContLang->userAdjust( $timestamp, '' );
6071 
6072  }
6073  return $this->mRevisionTimestamp;
6074  }
6075 
6081  public function getRevisionUser() {
6082  if ( is_null( $this->mRevisionUser ) ) {
6083  $revObject = $this->getRevisionObject();
6084 
6085  # if this template is subst: the revision id will be blank,
6086  # so just use the current user's name
6087  if ( $revObject ) {
6088  $this->mRevisionUser = $revObject->getUserText();
6089  } elseif ( $this->ot['wiki'] || $this->mOptions->getIsPreview() ) {
6090  $this->mRevisionUser = $this->getUser()->getName();
6091  }
6092  }
6093  return $this->mRevisionUser;
6094  }
6095 
6101  public function getRevisionSize() {
6102  if ( is_null( $this->mRevisionSize ) ) {
6103  $revObject = $this->getRevisionObject();
6104 
6105  # if this variable is subst: the revision id will be blank,
6106  # so just use the parser input size, because the own substituation
6107  # will change the size.
6108  if ( $revObject ) {
6109  $this->mRevisionSize = $revObject->getSize();
6110  } elseif ( $this->ot['wiki'] || $this->mOptions->getIsPreview() ) {
6111  $this->mRevisionSize = $this->mInputSize;
6112  }
6113  }
6114  return $this->mRevisionSize;
6115  }
6116 
6122  public function setDefaultSort( $sort ) {
6123  $this->mDefaultSort = $sort;
6124  $this->mOutput->setProperty( 'defaultsort', $sort );
6125  }
6126 
6137  public function getDefaultSort() {
6138  if ( $this->mDefaultSort !== false ) {
6139  return $this->mDefaultSort;
6140  } else {
6141  return '';
6142  }
6143  }
6144 
6151  public function getCustomDefaultSort() {
6152  return $this->mDefaultSort;
6153  }
6154 
6164  public function guessSectionNameFromWikiText( $text ) {
6165  # Strip out wikitext links(they break the anchor)
6166  $text = $this->stripSectionName( $text );
6168  return '#' . Sanitizer::escapeId( $text, 'noninitial' );
6169  }
6170 
6179  public function guessLegacySectionNameFromWikiText( $text ) {
6180  # Strip out wikitext links(they break the anchor)
6181  $text = $this->stripSectionName( $text );
6183  return '#' . Sanitizer::escapeId( $text, [ 'noninitial', 'legacy' ] );
6184  }
6185 
6200  public function stripSectionName( $text ) {
6201  # Strip internal link markup
6202  $text = preg_replace( '/\[\[:?([^[|]+)\|([^[]+)\]\]/', '$2', $text );
6203  $text = preg_replace( '/\[\[:?([^[]+)\|?\]\]/', '$1', $text );
6204 
6205  # Strip external link markup
6206  # @todo FIXME: Not tolerant to blank link text
6207  # I.E. [https://www.mediawiki.org] will render as [1] or something depending
6208  # on how many empty links there are on the page - need to figure that out.
6209  $text = preg_replace( '/\[(?i:' . $this->mUrlProtocols . ')([^ ]+?) ([^[]+)\]/', '$2', $text );
6210 
6211  # Parse wikitext quotes (italics & bold)
6212  $text = $this->doQuotes( $text );
6213 
6214  # Strip HTML tags
6215  $text = StringUtils::delimiterReplace( '<', '>', '', $text );
6216  return $text;
6217  }
6218 
6229  public function testSrvus( $text, Title $title, ParserOptions $options,
6230  $outputType = self::OT_HTML
6231  ) {
6232  $magicScopeVariable = $this->lock();
6233  $this->startParse( $title, $options, $outputType, true );
6234 
6235  $text = $this->replaceVariables( $text );
6236  $text = $this->mStripState->unstripBoth( $text );
6237  $text = Sanitizer::removeHTMLtags( $text );
6238  return $text;
6239  }
6240 
6247  public function testPst( $text, Title $title, ParserOptions $options ) {
6248  return $this->preSaveTransform( $text, $title, $options->getUser(), $options );
6249  }
6250 
6257  public function testPreprocess( $text, Title $title, ParserOptions $options ) {
6258  return $this->testSrvus( $text, $title, $options, self::OT_PREPROCESS );
6259  }
6260 
6277  public function markerSkipCallback( $s, $callback ) {
6278  $i = 0;
6279  $out = '';
6280  while ( $i < strlen( $s ) ) {
6281  $markerStart = strpos( $s, self::MARKER_PREFIX, $i );
6282  if ( $markerStart === false ) {
6283  $out .= call_user_func( $callback, substr( $s, $i ) );
6284  break;
6285  } else {
6286  $out .= call_user_func( $callback, substr( $s, $i, $markerStart - $i ) );
6287  $markerEnd = strpos( $s, self::MARKER_SUFFIX, $markerStart );
6288  if ( $markerEnd === false ) {
6289  $out .= substr( $s, $markerStart );
6290  break;
6291  } else {
6292  $markerEnd += strlen( self::MARKER_SUFFIX );
6293  $out .= substr( $s, $markerStart, $markerEnd - $markerStart );
6294  $i = $markerEnd;
6295  }
6296  }
6297  }
6298  return $out;
6299  }
6300 
6307  public function killMarkers( $text ) {
6308  return $this->mStripState->killMarkers( $text );
6309  }
6310 
6327  public function serializeHalfParsedText( $text ) {
6328  $data = [
6329  'text' => $text,
6330  'version' => self::HALF_PARSED_VERSION,
6331  'stripState' => $this->mStripState->getSubState( $text ),
6332  'linkHolders' => $this->mLinkHolders->getSubArray( $text )
6333  ];
6334  return $data;
6335  }
6336 
6352  public function unserializeHalfParsedText( $data ) {
6353  if ( !isset( $data['version'] ) || $data['version'] != self::HALF_PARSED_VERSION ) {
6354  throw new MWException( __METHOD__ . ': invalid version' );
6355  }
6356 
6357  # First, extract the strip state.
6358  $texts = [ $data['text'] ];
6359  $texts = $this->mStripState->merge( $data['stripState'], $texts );
6360 
6361  # Now renumber links
6362  $texts = $this->mLinkHolders->mergeForeign( $data['linkHolders'], $texts );
6363 
6364  # Should be good to go.
6365  return $texts[0];
6366  }
6367 
6377  public function isValidHalfParsedText( $data ) {
6378  return isset( $data['version'] ) && $data['version'] == self::HALF_PARSED_VERSION;
6379  }
6380 
6389  public function parseWidthParam( $value ) {
6390  $parsedWidthParam = [];
6391  if ( $value === '' ) {
6392  return $parsedWidthParam;
6393  }
6394  $m = [];
6395  # (bug 13500) In both cases (width/height and width only),
6396  # permit trailing "px" for backward compatibility.
6397  if ( preg_match( '/^([0-9]*)x([0-9]*)\s*(?:px)?\s*$/', $value, $m ) ) {
6398  $width = intval( $m[1] );
6399  $height = intval( $m[2] );
6400  $parsedWidthParam['width'] = $width;
6401  $parsedWidthParam['height'] = $height;
6402  } elseif ( preg_match( '/^[0-9]*\s*(?:px)?\s*$/', $value ) ) {
6403  $width = intval( $value );
6404  $parsedWidthParam['width'] = $width;
6405  }
6406  return $parsedWidthParam;
6407  }
6408 
6418  protected function lock() {
6419  if ( $this->mInParse ) {
6420  throw new MWException( "Parser state cleared while parsing. "
6421  . "Did you call Parser::parse recursively?" );
6422  }
6423  $this->mInParse = true;
6424 
6425  $recursiveCheck = new ScopedCallback( function() {
6426  $this->mInParse = false;
6427  } );
6428 
6429  return $recursiveCheck;
6430  }
6431 
6442  public static function stripOuterParagraph( $html ) {
6443  $m = [];
6444  if ( preg_match( '/^<p>(.*)\n?<\/p>\n?$/sU', $html, $m ) ) {
6445  if ( strpos( $m[1], '</p>' ) === false ) {
6446  $html = $m[1];
6447  }
6448  }
6449 
6450  return $html;
6451  }
6452 
6463  public function getFreshParser() {
6464  global $wgParserConf;
6465  if ( $this->mInParse ) {
6466  return new $wgParserConf['class']( $wgParserConf );
6467  } else {
6468  return $this;
6469  }
6470  }
6471 
6478  public function enableOOUI() {
6480  $this->mOutput->setEnableOOUI( true );
6481  }
6482 }
getRevisionObject()
Get the revision object for $this->mRevisionId.
Definition: Parser.php:6029
setTitle($t)
Set the context title.
Definition: Parser.php:740
$mAutonumber
Definition: Parser.php:184
markerSkipCallback($s, $callback)
Call a callback function on all regions of the given text that are not inside strip markers...
Definition: Parser.php:6277
#define the
table suitable for use with IDatabase::select()
$mPPNodeCount
Definition: Parser.php:198
replaceInternalLinks2(&$s)
Process [[ ]] wikilinks (RIL)
Definition: Parser.php:2051
static getVariableIDs()
Get an array of parser variable IDs.
Definition: MagicWord.php:271
you don t have to do a grep find to see where the $wgReverseTitle variable is used
Definition: hooks.txt:117
const MARKER_PREFIX
Definition: Parser.php:141
deferred txt A few of the database updates required by various functions here can be deferred until after the result page is displayed to the user For updating the view updating the linked to tables after a etc PHP does not yet have any way to tell the server to actually return and disconnect while still running these but it might have such a feature in the future We handle these by creating a deferred update object and putting those objects on a global then executing the whole list after the page is displayed We don t do anything smart like collating updates to the same table or such because the list is almost always going to have just one item on if that
Definition: deferred.txt:11
external whereas SearchGetNearMatch runs after $term
Definition: hooks.txt:2558
isValidHalfParsedText($data)
Returns true if the given array, presumed to be generated by serializeHalfParsedText(), is compatible with the current version of the parser.
Definition: Parser.php:6377
null means default in associative array form
Definition: hooks.txt:1798
null means default in associative array with keys and values unescaped Should be merged with default with a value of false meaning to suppress the attribute in associative array with keys and values unescaped noclasses just before the function returns a value If you return an< a > element with HTML attributes $attribs and contents $html will be returned If you return $ret will be returned and may include noclasses & $html
Definition: hooks.txt:1798
static tocLineEnd()
End a Table Of Contents line.
Definition: Linker.php:1734
deferred txt A few of the database updates required by various functions here can be deferred until after the result page is displayed to the user For updating the view updating the linked to tables after a etc PHP does not yet have any way to tell the server to actually return and disconnect while still running these but it might have such a feature in the future We handle these by creating a deferred update object and putting those objects on a global list
Definition: deferred.txt:11
getSection($text, $sectionId, $defaultText= '')
This function returns the text of a section, specified by a number ($section).
Definition: Parser.php:5994
static decodeTagAttributes($text)
Return an associative array of attribute names and values from a partial tag string.
Definition: Sanitizer.php:1248
$mTplRedirCache
Definition: Parser.php:200
killMarkers($text)
Remove any strip markers found in the given text.
Definition: Parser.php:6307
wfGetDB($db, $groups=[], $wiki=false)
Get a Database object.
static tocList($toc, $lang=false)
Wraps the TOC in a table and provides the hide/collapse javascript.
Definition: Linker.php:1745
fetchTemplateAndTitle($title)
Fetch the unparsed text of a template and register a reference to it.
Definition: Parser.php:3937
this hook is for auditing only or null if authentication failed before getting that far or null if we can t even determine that probably a stub it is not rendered in wiki pages or galleries in category pages allow injecting custom HTML after the section Any uses of the hook need to handle escaping see BaseTemplate::getToolbox and BaseTemplate::makeListItem for details on the format of individual items inside of this array or by returning and letting standard HTTP rendering take place modifiable or by returning false and taking over the output $out
Definition: hooks.txt:762
getRevisionUser()
Get the name of the user that edited the last revision.
Definition: Parser.php:6081
setFunctionTagHook($tag, $callback, $flags)
Create a tag function, e.g.
Definition: Parser.php:5277
the array() calling protocol came about after MediaWiki 1.4rc1.
stripSectionName($text)
Strips a text string of wikitext for use in a section anchor.
Definition: Parser.php:6200
null for the local wiki Added should default to null in handler for backwards compatibility add a value to it if you want to add a cookie that have to vary cache options can modify $query
Definition: hooks.txt:1418
const OT_PREPROCESS
Definition: Defines.php:228
$mLastSection
Definition: Parser.php:191
static linkKnown($target, $html=null, $customAttribs=[], $query=[], $options=[ 'known', 'noclasses'])
Identical to link(), except $options defaults to 'known'.
Definition: Linker.php:264
$mDoubleUnderscores
Definition: Parser.php:200
magic word the default is to use $key to get the and $key value or $key value text $key value html to format the value $key
Definition: hooks.txt:2321
Group all the pieces relevant to the context of a request into one instance.
getPreloadText($text, Title $title, ParserOptions $options, $params=[])
Process the wikitext for the "?preload=" feature.
Definition: Parser.php:688
$context
Definition: load.php:44
validateSig($text)
Check that the user's signature contains no bad XML.
Definition: Parser.php:4991
MapCacheLRU null $currentRevisionCache
Definition: Parser.php:250
$wgSitename
Name of the site.
renderImageGallery($text, $params)
Renders an image gallery from a text with one line per image.
Definition: Parser.php:5328
recursivePreprocess($text, $frame=false)
Recursive parser entry point that can be called from an extension tag hook.
Definition: Parser.php:669
replaceExternalLinks($text)
Replace external links (REL)
Definition: Parser.php:1778
static isNonincludable($index)
It is not possible to use pages from this namespace as template?
nextLinkID()
Definition: Parser.php:829
const SPACE_NOT_NL
Definition: Parser.php:100
static replaceUnusualEscapes($url)
Replace unusual escape codes in a URL with their equivalent characters.
Definition: Parser.php:1906
getImageParams($handler)
Definition: Parser.php:5486
Apache License January AND DISTRIBUTION Definitions License shall mean the terms and conditions for use
doHeadings($text)
Parse headers and return html.
Definition: Parser.php:1557
static getTitleFor($name, $subpage=false, $fragment= '')
Get a localised Title object for a specified special page name.
Definition: SpecialPage.php:75
const OT_PLAIN
Definition: Parser.php:121
getTags()
Accessor.
Definition: Parser.php:5800
findColonNoLinks($str, &$before, &$after)
Split up a string on ':', ignoring any occurrences inside tags to prevent illegal overlapping...
Definition: Parser.php:2764
static isWellFormedXmlFragment($text)
Check if a string is a well-formed XML fragment.
Definition: Xml.php:735
const OT_WIKI
Definition: Parser.php:118
div flags Integer display flags(NO_ACTION_LINK, NO_EXTRA_USER_LINKS) 'LogException'returning false will NOT prevent logging $e
Definition: hooks.txt:1932
fetchFileAndTitle($title, $options=[])
Fetch a file and its title and register a reference to it.
Definition: Parser.php:4079
User $mUser
Definition: Parser.php:207
We use the convention $dbr for read and $dbw for write to help you keep track of whether the database object is a the world will explode Or to be a subsequent write query which succeeded on the master may fail when replicated to the slave due to a unique key collision Replication on the slave will stop and it may take hours to repair the database and get it back online Setting read_only in my cnf on the slave will avoid this but given the dire we prefer to have as many checks as possible We provide a but the wrapper functions like please read the documentation for except in special pages derived from QueryPage It s a common pitfall for new developers to submit code containing SQL queries which examine huge numbers of rows Remember that COUNT * is(N), counting rows in atable is like counting beans in a bucket.------------------------------------------------------------------------Replication------------------------------------------------------------------------The largest installation of MediaWiki, Wikimedia, uses a large set ofslave MySQL servers replicating writes made to a master MySQL server.Itis important to understand the issues associated with this setup if youwant to write code destined for Wikipedia.It's often the case that the best algorithm to use for a given taskdepends on whether or not replication is in use.Due to our unabashedWikipedia-centrism, we often just use the replication-friendly version, but if you like, you can use wfGetLB() ->getServerCount() > 1 tocheck to see if replication is in use.===Lag===Lag primarily occurs when large write queries are sent to the master.Writes on the master are executed in parallel, but they are executed inserial when they are replicated to the slaves.The master writes thequery to the binlog when the transaction is committed.The slaves pollthe binlog and start executing the query as soon as it appears.They canservice reads while they are performing a write query, but will not readanything more from the binlog and thus will perform no more writes.Thismeans that if the write query runs for a long time, the slaves will lagbehind the master for the time it takes for the write query to complete.Lag can be exacerbated by high read load.MediaWiki's load balancer willstop sending reads to a slave when it is lagged by more than 30 seconds.If the load ratios are set incorrectly, or if there is too much loadgenerally, this may lead to a slave permanently hovering around 30seconds lag.If all slaves are lagged by more than 30 seconds, MediaWiki will stopwriting to the database.All edits and other write operations will berefused, with an error returned to the user.This gives the slaves achance to catch up.Before we had this mechanism, the slaves wouldregularly lag by several minutes, making review of recent editsdifficult.In addition to this, MediaWiki attempts to ensure that the user seesevents occurring on the wiki in chronological order.A few seconds of lagcan be tolerated, as long as the user sees a consistent picture fromsubsequent requests.This is done by saving the master binlog positionin the session, and then at the start of each request, waiting for theslave to catch up to that position before doing any reads from it.Ifthis wait times out, reads are allowed anyway, but the request isconsidered to be in"lagged slave mode".Lagged slave mode can bechecked by calling wfGetLB() ->getLaggedSlaveMode().The onlypractical consequence at present is a warning displayed in the pagefooter.===Lag avoidance===To avoid excessive lag, queries which write large numbers of rows shouldbe split up, generally to write one row at a time.Multi-row INSERT...SELECT queries are the worst offenders should be avoided altogether.Instead do the select first and then the insert.===Working with lag===Despite our best efforts, it's not practical to guarantee a low-lagenvironment.Lag will usually be less than one second, but mayoccasionally be up to 30 seconds.For scalability, it's very importantto keep load on the master low, so simply sending all your queries tothe master is not the answer.So when you have a genuine need forup-to-date data, the following approach is advised:1) Do a quick query to the master for a sequence number or timestamp 2) Run the full query on the slave and check if it matches the data you gotfrom the master 3) If it doesn't, run the full query on the masterTo avoid swamping the master every time the slaves lag, use of thisapproach should be kept to a minimum.In most cases you should just readfrom the slave and let the user deal with the delay.------------------------------------------------------------------------Lock contention------------------------------------------------------------------------Due to the high write rate on Wikipedia(and some other wikis), MediaWiki developers need to be very careful to structure their writesto avoid long-lasting locks.By default, MediaWiki opens a transactionat the first query, and commits it before the output is sent.Locks willbe held from the time when the query is done until the commit.So youcan reduce lock time by doing as much processing as possible before youdo your write queries.Often this approach is not good enough, and it becomes necessary toenclose small groups of queries in their own transaction.Use thefollowing syntax:$dbw=wfGetDB(DB_MASTER
initialiseVariables()
initialise the magic variables (like CURRENTMONTHNAME) and substitution modifiers ...
Definition: Parser.php:3274
null means default in associative array with keys and values unescaped Should be merged with default with a value of false meaning to suppress the attribute in associative array with keys and values unescaped noclasses & $ret
Definition: hooks.txt:1798
static isEnabled()
Definition: MWTidy.php:92
Set options of the Parser.
static tidy($text)
Interface with html tidy.
Definition: MWTidy.php:45
getFunctionHooks()
Get all registered function hook identifiers.
Definition: Parser.php:5263
globals txt Globals are evil The original MediaWiki code relied on globals for processing context far too often MediaWiki development since then has been a story of slowly moving context out of global variables and into objects Storing processing context in object member variables allows those objects to be reused in a much more flexible way Consider the elegance of
database rows
Definition: globals.txt:10
wfHostname()
Fetch server name for use in error reporting etc.
getFunctionLang()
Get a language object for use in parser functions such as {{FORMATNUM:}}.
Definition: Parser.php:844
processing should stop and the error should be shown to the user * false
Definition: hooks.txt:189
argSubstitution($piece, $frame)
Triple brace replacement – used for template arguments.
Definition: Parser.php:4182
testSrvus($text, Title $title, ParserOptions $options, $outputType=self::OT_HTML)
strip/replaceVariables/unstrip for preprocessor regression testing
Definition: Parser.php:6229
uniqPrefix()
Accessor for mUniqPrefix.
Definition: Parser.php:730
const TOC_START
Definition: Parser.php:144
Title($x=null)
Accessor/mutator for the Title object.
Definition: Parser.php:768
SectionProfiler $mProfiler
Definition: Parser.php:259
$sort
fetchFileNoRegister($title, $options=[])
Helper function for fetchFileAndTitle.
Definition: Parser.php:4104
null for the local wiki Added in
Definition: hooks.txt:1418
There are three types of nodes:
$mHeadings
Definition: Parser.php:200
$value
clearTagHooks()
Remove all tag hooks.
Definition: Parser.php:5176
const COLON_STATE_TAGSLASH
Definition: Parser.php:107
static makeSelfLinkObj($nt, $html= '', $query= '', $trail= '', $prefix= '')
Make appropriate markup for a link to the current article.
Definition: Linker.php:409
const NS_SPECIAL
Definition: Defines.php:58
clearState()
Clear Parser state.
Definition: Parser.php:343
__construct($conf=[])
Definition: Parser.php:264
const EXT_LINK_ADDR
Definition: Parser.php:92
$mFirstCall
Definition: Parser.php:159
interwikiTransclude($title, $action)
Transclude an interwiki link.
Definition: Parser.php:4123
pstPass2($text, $user)
Pre-save transform helper function.
Definition: Parser.php:4863
guessLegacySectionNameFromWikiText($text)
Same as guessSectionNameFromWikiText(), but produces legacy anchors instead.
Definition: Parser.php:6179
wfUrlProtocolsWithoutProtRel()
Like wfUrlProtocols(), but excludes '//' from the protocol list.
Options($x=null)
Accessor/mutator for the ParserOptions object.
Definition: Parser.php:822
it s the revision text itself In either if gzip is the revision text is gzipped $flags
Definition: hooks.txt:2548
serializeHalfParsedText($text)
Save the parser state required to convert the given half-parsed text to HTML.
Definition: Parser.php:6327
replaceLinkHolders(&$text, $options=0)
Replace "" link placeholders with actual links, in the buffer Placeholders created in Link...
Definition: Parser.php:5300
static activeUsers()
Definition: SiteStats.php:161
$mLinkID
Definition: Parser.php:197
doQuotes($text)
Helper function for doAllQuotes()
Definition: Parser.php:1590
preprocessToDom($text, $flags=0)
Preprocess some wikitext and return the document tree.
Definition: Parser.php:3304
limitationWarn($limitationType, $current= '', $max= '')
Warn the user when a parser limitation is reached Will warn at most once the user per limitation type...
Definition: Parser.php:3426
static cleanUrl($url)
Definition: Sanitizer.php:1817
wfUrlencode($s)
We want some things to be included as literal characters in our title URLs for prettiness, which urlencode encodes by default.
static newFromText($text, $defaultNamespace=NS_MAIN)
Create a new Title from text, such as what one would find in a link.
Definition: Title.php:277
$mGeneratedPPNodeCount
Definition: Parser.php:198
Represents a title within MediaWiki.
Definition: Title.php:34
static getRandomString()
Get a random string.
Definition: Parser.php:709
$mRevisionId
Definition: Parser.php:224
static stripAllTags($text)
Take a fragment of (potentially invalid) HTML and return a version with any tags removed, encoded as plain text.
Definition: Sanitizer.php:1784
when a variable name is used in a it is silently declared as a new local masking the global
Definition: design.txt:93
doBlockLevels($text, $linestart)
#@-
Definition: Parser.php:2549
$wgArticlePath
Definition: img_auth.php:45
OutputType($x=null)
Accessor/mutator for the output type.
Definition: Parser.php:794
const NS_TEMPLATE
Definition: Defines.php:79
static newFromTitle(LinkTarget $linkTarget, $id=0, $flags=0)
Load either the current, or a specified, revision that's attached to a given link target...
Definition: Revision.php:117
const COLON_STATE_COMMENTDASHDASH
Definition: Parser.php:110
getVariableValue($index, $frame=false)
Return value of a magic variable (like PAGENAME)
Definition: Parser.php:2922
recursiveTagParse($text, $frame=false)
Half-parse wikitext to half-parsed HTML.
Definition: Parser.php:604
const NO_ARGS
magic word & $parser
Definition: hooks.txt:2321
MagicWordArray $mVariables
Definition: Parser.php:166
static validateTagAttributes($attribs, $element)
Take an array of attribute names and values and normalize or discard illegal values for the given ele...
Definition: Sanitizer.php:716
const SFH_NO_HASH
Definition: Parser.php:82
const COLON_STATE_COMMENTDASH
Definition: Parser.php:109
globals will be eliminated from MediaWiki replaced by an application object which would be passed to constructors Whether that would be an convenient solution remains to be but certainly PHP makes such object oriented programming models easier than they were in previous versions For the time being MediaWiki programmers will have to work in an environment with some global context At the time of globals were initialised on startup by MediaWiki of these were configuration which are documented in DefaultSettings php There is no comprehensive documentation for the remaining however some of the most important ones are listed below They are typically initialised either in index php or in Setup php For a description of the see design txt $wgTitle Title object created from the request URL $wgOut OutputPage object for HTTP response $wgUser User object for the user associated with the current request $wgLang Language object selected by user preferences $wgContLang Language object associated with the wiki being viewed $wgParser Parser object Parser extensions register their hooks here $wgRequest WebRequest object
Definition: globals.txt:25
wfRandomString($length=32)
Get a random string containing a number of pseudo-random hex characters.
$mForceTocPosition
Definition: Parser.php:202
preprocess($text, Title $title=null, ParserOptions $options, $revid=null, $frame=false)
Expand templates and variables in the text, producing valid, static wikitext.
Definition: Parser.php:645
static getCacheTTL($id)
Allow external reads of TTL array.
Definition: MagicWord.php:294
getRevisionId()
Get the ID of the revision we are parsing.
Definition: Parser.php:6019
const OT_PREPROCESS
Definition: Parser.php:119
see documentation in includes Linker php for Linker::makeImageLink & $time
Definition: hooks.txt:1612
maybeDoSubpageLink($target, &$text)
Handle link to subpage if necessary.
Definition: Parser.php:2416
$mFunctionSynonyms
Definition: Parser.php:151
If you want to remove the page from your watchlist later
replaceLinkHoldersText($text)
Replace "" link placeholders with plain text of links (not HTML-formatted).
Definition: Parser.php:5311
setLinkID($id)
Definition: Parser.php:836
$mOutputType
Definition: Parser.php:221
wfDebug($text, $dest= 'all', array $context=[])
Sends a line to the debug log if enabled or, optionally, to a comment in output.
$mDefaultStripList
Definition: Parser.php:154
static createAssocArgs($args)
Clean up argument array - refactored in 1.9 so parserfunctions can use it, too.
Definition: Parser.php:3378
$mExtLinkBracketedRegex
Definition: Parser.php:173
The index of the header message $result[1]=The index of the body text message $result[2 through n]=Parameters passed to body text message.Please note the header message cannot receive/use parameters. 'ImportHandleLogItemXMLTag':When parsing a XML tag in a log item.Return false to stop further processing of the tag $reader:XMLReader object $logInfo:Array of information 'ImportHandlePageXMLTag':When parsing a XML tag in a page.Return false to stop further processing of the tag $reader:XMLReader object &$pageInfo:Array of information 'ImportHandleRevisionXMLTag':When parsing a XML tag in a page revision.Return false to stop further processing of the tag $reader:XMLReader object $pageInfo:Array of page information $revisionInfo:Array of revision information 'ImportHandleToplevelXMLTag':When parsing a top level XML tag.Return false to stop further processing of the tag $reader:XMLReader object 'ImportHandleUploadXMLTag':When parsing a XML tag in a file upload.Return false to stop further processing of the tag $reader:XMLReader object $revisionInfo:Array of information 'ImportLogInterwikiLink':Hook to change the interwiki link used in log entries and edit summaries for transwiki imports.&$fullInterwikiPrefix:Interwiki prefix, may contain colons.&$pageTitle:String that contains page title. 'ImportSources':Called when reading from the $wgImportSources configuration variable.Can be used to lazy-load the import sources list.&$importSources:The value of $wgImportSources.Modify as necessary.See the comment in DefaultSettings.php for the detail of how to structure this array. 'InfoAction':When building information to display on the action=info page.$context:IContextSource object &$pageInfo:Array of information 'InitializeArticleMaybeRedirect':MediaWiki check to see if title is a redirect.&$title:Title object for the current page &$request:WebRequest &$ignoreRedirect:boolean to skip redirect check &$target:Title/string of redirect target &$article:Article object 'InternalParseBeforeLinks':during Parser's internalParse method before links but after nowiki/noinclude/includeonly/onlyinclude and other processings.&$parser:Parser object &$text:string containing partially parsed text &$stripState:Parser's internal StripState object 'InternalParseBeforeSanitize':during Parser's internalParse method just before the parser removes unwanted/dangerous HTML tags and after nowiki/noinclude/includeonly/onlyinclude and other processings.Ideal for syntax-extensions after template/parser function execution which respect nowiki and HTML-comments.&$parser:Parser object &$text:string containing partially parsed text &$stripState:Parser's internal StripState object 'InterwikiLoadPrefix':When resolving if a given prefix is an interwiki or not.Return true without providing an interwiki to continue interwiki search.$prefix:interwiki prefix we are looking for.&$iwData:output array describing the interwiki with keys iw_url, iw_local, iw_trans and optionally iw_api and iw_wikiid. 'InvalidateEmailComplete':Called after a user's email has been invalidated successfully.$user:user(object) whose email is being invalidated 'IRCLineURL':When constructing the URL to use in an IRC notification.Callee may modify $url and $query, URL will be constructed as $url.$query &$url:URL to index.php &$query:Query string $rc:RecentChange object that triggered url generation 'IsFileCacheable':Override the result of Article::isFileCacheable()(if true) &$article:article(object) being checked 'IsTrustedProxy':Override the result of IP::isTrustedProxy() &$ip:IP being check &$result:Change this value to override the result of IP::isTrustedProxy() 'IsUploadAllowedFromUrl':Override the result of UploadFromUrl::isAllowedUrl() $url:URL used to upload from &$allowed:Boolean indicating if uploading is allowed for given URL 'isValidEmailAddr':Override the result of Sanitizer::validateEmail(), for instance to return false if the domain name doesn't match your organization.$addr:The e-mail address entered by the user &$result:Set this and return false to override the internal checks 'isValidPassword':Override the result of User::isValidPassword() $password:The password entered by the user &$result:Set this and return false to override the internal checks $user:User the password is being validated for 'Language::getMessagesFileName':$code:The language code or the language we're looking for a messages file for &$file:The messages file path, you can override this to change the location. 'LanguageGetMagic':DEPRECATED!Use $magicWords in a file listed in $wgExtensionMessagesFiles instead.Use this to define synonyms of magic words depending of the language &$magicExtensions:associative array of magic words synonyms $lang:language code(string) 'LanguageGetNamespaces':Provide custom ordering for namespaces or remove namespaces.Do not use this hook to add namespaces.Use CanonicalNamespaces for that.&$namespaces:Array of namespaces indexed by their numbers 'LanguageGetSpecialPageAliases':DEPRECATED!Use $specialPageAliases in a file listed in $wgExtensionMessagesFiles instead.Use to define aliases of special pages names depending of the language &$specialPageAliases:associative array of magic words synonyms $lang:language code(string) 'LanguageGetTranslatedLanguageNames':Provide translated language names.&$names:array of language code=> language name $code:language of the preferred translations 'LanguageLinks':Manipulate a page's language links.This is called in various places to allow extensions to define the effective language links for a page.$title:The page's Title.&$links:Associative array mapping language codes to prefixed links of the form"language:title".&$linkFlags:Associative array mapping prefixed links to arrays of flags.Currently unused, but planned to provide support for marking individual language links in the UI, e.g.for featured articles. 'LanguageSelector':Hook to change the language selector available on a page.$out:The output page.$cssClassName:CSS class name of the language selector. 'LinkBegin':Used when generating internal and interwiki links in Linker::link(), before processing starts.Return false to skip default processing and return $ret.See documentation for Linker::link() for details on the expected meanings of parameters.$skin:the Skin object $target:the Title that the link is pointing to &$html:the contents that the< a > tag should have(raw HTML) $result
Definition: hooks.txt:1796
if($line===false) $args
Definition: cdb.php:64
the value to return A Title object or null for latest to be modified or replaced by the hook handler or if authentication is not possible after cache objects are set for highlighting & $link
Definition: hooks.txt:2581
static getLocalInstance($ts=false)
Get a timestamp instance in the server local timezone ($wgLocaltimezone)
const COLON_STATE_TAG
Definition: Parser.php:104
static getDoubleUnderscoreArray()
Get a MagicWordArray of double-underscore entities.
Definition: MagicWord.php:307
static splitTrail($trail)
Split a link trail, return the "inside" portion and the remainder of the trail as a two-element array...
Definition: Linker.php:1818
getTemplateDom($title)
Get the semi-parsed DOM representation of a template with a given title, and its redirect destination...
Definition: Parser.php:3862
static decodeCharReferences($text)
Decode any character references, numeric or named entities, in the text and return a UTF-8 string...
Definition: Sanitizer.php:1461
openList($char)
These next three functions open, continue, and close the list element appropriate to the prefix chara...
Definition: Parser.php:2470
cleanSig($text, $parsing=false)
Clean up signature text.
Definition: Parser.php:5005
wfTimestamp($outputtype=TS_UNIX, $ts=0)
Get a timestamp string in one of various formats.
static factory($mode=false, IContextSource $context=null)
Get a new image gallery.
$wgLanguageCode
Site language code.
Custom PHP profiler for parser/DB type section names that xhprof/xdebug can't handle.
static edits()
Definition: SiteStats.php:129
Class for asserting that a callback happens when an dummy object leaves scope.
$wgExtraInterlanguageLinkPrefixes
List of additional interwiki prefixes that should be treated as interlanguage links (i...
startExternalParse(Title $title=null, ParserOptions $options, $outputType, $clearState=true)
Set up some variables which are usually set up in parse() so that an external function can call some ...
Definition: Parser.php:5056
wfCgiToArray($query)
This is the logical opposite of wfArrayToCgi(): it accepts a query string as its argument and returns...
wfDebugLog($logGroup, $text, $dest= 'all', array $context=[])
Send a line to a supplementary debug log file, if configured, or main debug log if not...
static capturePath(Title $title, IContextSource $context)
Just like executePath() but will override global variables and execute the page in "inclusion" mode...
const NO_TEMPLATES
addTrackingCategory($msg)
Definition: Parser.php:4401
replaceInternalLinks($s)
Process [[ ]] wikilinks.
Definition: Parser.php:2038
$mVarCache
Definition: Parser.php:155
$wgStylePath
The URL path of the skins directory.
disableCache()
Set a flag in the output object indicating that the content is dynamic and shouldn't be cached...
Definition: Parser.php:5772
$mRevisionObject
Definition: Parser.php:223
static normalizeSectionNameWhitespace($section)
Normalizes whitespace in a section name, such as might be returned by Parser::stripSectionName(), for use in the id's that are used for section links.
Definition: Sanitizer.php:1342
internalParse($text, $isMain=true, $frame=false)
Helper function for parse() that transforms wiki markup into half-parsed HTML.
Definition: Parser.php:1226
Title $mTitle
Definition: Parser.php:220
static delimiterReplace($startDelim, $endDelim, $replace, $subject, $flags= '')
Perform an operation equivalent to preg_replace() with flags.
__destruct()
Reduce memory usage to reduce the impact of circular references.
Definition: Parser.php:290
wfEscapeWikiText($text)
Escapes the given text so that it may be output using addWikiText() without any linking, formatting, etc.
getRevisionTimestamp()
Get the timestamp associated with the current revision, adjusted for the default server-local timesta...
Definition: Parser.php:6058
static stripOuterParagraph($html)
Strip outer.
Definition: Parser.php:6442
static register($parser)
$mRevIdForTs
Definition: Parser.php:228
static singleton()
Get an instance of this class.
Definition: LinkCache.php:61
design txt This is a brief overview of the new design More thorough and up to date information is available on the documentation wiki at etc Handles the details of getting and saving to the user table of the and dealing with sessions and cookies OutputPage Encapsulates the entire HTML page that will be sent in response to any server request It is used by calling its functions to add in any and then calling but I prefer the flexibility This should also do the output encoding The system allocates a global one in $wgOut Title Represents the title of an and does all the work of translating among various forms such as plain database key
Definition: design.txt:25
static normalizeSubpageLink($contextTitle, $target, &$text)
Definition: Linker.php:1547
parseWidthParam($value)
Parsed a width param of imagelink like 300px or 200x300px.
Definition: Parser.php:6389
$mStripList
Definition: Parser.php:153
$mFunctionTagHooks
Definition: Parser.php:152
fetchScaryTemplateMaybeFromCache($url)
Definition: Parser.php:4142
const OT_PLAIN
Definition: Defines.php:230
fetchCurrentRevisionOfTitle($title)
Fetch the current revision of a given title.
Definition: Parser.php:3905
$mRevisionTimestamp
Definition: Parser.php:225
$mImageParams
Definition: Parser.php:156
stripAltText($caption, $holders)
Definition: Parser.php:5749
doAllQuotes($text)
Replace single quotes with HTML markup.
Definition: Parser.php:1573
static replaceMarkup($search, $replace, $text)
More or less "markup-safe" str_replace() Ignores any instances of the separator inside <...
static normalizeUrlComponent($component, $unsafe)
Definition: Parser.php:1956
if($limit) $timestamp
const VERSION
Update this version number when the ParserOutput format changes in an incompatible way...
Definition: Parser.php:73
this hook is for auditing only RecentChangesLinked and Watchlist RecentChangesLinked and Watchlist e g Watchlist removed from all revisions and log entries to which it was applied This gives extensions a chance to take it off their books as the deletion has already been partly carried out by this point or something similar the user will be unable to create the tag set and then return false from the hook function Ensure you consume the ChangeTagAfterDelete hook to carry out custom deletion actions as context called by AbstractContent::getParserOutput May be used to override the normal model specific rendering of page content as context as context $options
Definition: hooks.txt:1004
$mInPre
Definition: Parser.php:191
setHook($tag, $callback)
Create an HTML-style tag, e.g.
Definition: Parser.php:5131
const OT_WIKI
Definition: Defines.php:227
Preprocessor $mPreprocessor
Definition: Parser.php:177
getPreprocessor()
Get a preprocessor object.
Definition: Parser.php:897
This document is intended to provide useful advice for parties seeking to redistribute MediaWiki to end users It s targeted particularly at maintainers for Linux since it s been observed that distribution packages of MediaWiki often break We ve consistently had to recommend that users seeking support use official tarballs instead of their distribution s and this often solves whatever problem the user is having It would be nice if this could such and we might be restricted by PHP settings such as safe mode or open_basedir We cannot assume that the software even has read access anywhere useful Many shared hosts run all users web applications under the same so they can t rely on Unix and must forbid reads to even standard directories like tmp lest users read each others files We cannot assume that the user has the ability to install or run any programs not written as web accessible PHP scripts Since anything that works on cheap shared hosting will work if you have shell or root access MediaWiki s design is based around catering to the lowest common denominator Although we support higher end setups as the way many things work by default is tailored toward shared hosting These defaults are unconventional from the point of view of normal(non-web) applications--they might conflict with distributors'policies
static getInstance($ts=false)
Get a timestamp instance in GMT.
const NS_MEDIA
Definition: Defines.php:57
closeList($char)
Definition: Parser.php:2522
static singleton()
Get a RepoGroup instance.
Definition: RepoGroup.php:59
replaceVariables($text, $frame=false, $argsOnly=false)
Replace magic variables, templates, and template arguments with the appropriate text.
Definition: Parser.php:3349
const RECOVER_ORIG
wfMatchesDomainList($url, $domains)
Check whether a given URL has a domain that occurs in a given set of domains.
StripState $mStripState
Definition: Parser.php:189
$mDefaultSort
Definition: Parser.php:199
getUser()
Get a User object either from $this->mUser, if set, or from the ParserOptions object otherwise...
Definition: Parser.php:885
wfTimestampNow()
Convenience function; returns MediaWiki timestamp for the present time.
incrementIncludeSize($type, $size)
Increment an include size counter.
Definition: Parser.php:4319
getStripList()
Get a list of strippable XML-like elements.
Definition: Parser.php:996
null means default in associative array with keys and values unescaped Should be merged with default with a value of false meaning to suppress the attribute in associative array with keys and values unescaped noclasses just before the function returns a value If you return an< a > element with HTML attributes $attribs and contents $html will be returned If you return $ret will be returned and may include noclasses after processing after in associative array form externallinks including delete and has completed for all link tables whether this was an auto creation default is conds Array Extra conditions for the No matching items in log is displayed if loglist is empty msgKey Array If you want a nice box with a set this to the key of the message First element is the message additional optional elements are parameters for the key that are processed with wfMessage() -> params() ->parseAsBlock()-offset Set to overwrite offset parameter in $wgRequest set to ''to unsetoffset-wrap String Wrap the message in html(usually something like"&lt
const EXT_IMAGE_REGEX
Definition: Parser.php:95
startParse(Title $title=null, ParserOptions $options, $outputType, $clearState=true)
Definition: Parser.php:5068
$params
const NS_CATEGORY
Definition: Defines.php:83
static makeHeadline($level, $attribs, $anchor, $html, $link, $legacyAnchor=false)
Create a headline for content.
Definition: Linker.php:1799
static extractTagsAndParams($elements, $text, &$matches, $uniq_prefix=null)
Replaces all occurrences of HTML-style comments and the given tags in the text with a random marker a...
Definition: Parser.php:926
and(b) You must cause any modified files to carry prominent notices stating that You changed the files
doTableStuff($text)
parse the wiki syntax used to render tables
Definition: Parser.php:1023
wfDeprecated($function, $version=false, $component=false, $callerOffset=2)
Throws a warning that $function is deprecated.
getRevisionSize()
Get the size of the revision.
Definition: Parser.php:6101
$mImageParamsMagicArray
Definition: Parser.php:157
LinkHolderArray $mLinkHolders
Definition: Parser.php:195
static register($parser)
deferred txt A few of the database updates required by various functions here can be deferred until after the result page is displayed to the user For updating the view updating the linked to tables after a save
Definition: deferred.txt:4
as see the revision history and available at free of to any person obtaining a copy of this software and associated documentation to deal in the Software without including without limitation the rights to and or sell copies of the and to permit persons to whom the Software is furnished to do so
Definition: LICENSE.txt:10
Some information about database access in MediaWiki By Tim January Database layout For information about the MediaWiki database such as a description of the tables and their please see
Definition: database.txt:2
closeParagraph()
#@+ Used by doBlockLevels()
Definition: Parser.php:2426
const DB_SLAVE
Definition: Defines.php:46
preSaveTransform($text, Title $title, User $user, ParserOptions $options, $clearState=true)
Transform wiki markup when saving a page by doing "\\r\\n" -> "\\n" conversion, substituting signatur...
Definition: Parser.php:4831
getTargetLanguage()
Get the target language for the content being parsed.
Definition: Parser.php:857
$buffer
namespace and then decline to actually register it file or subcat img or subcat $title
Definition: hooks.txt:912
static hasSubpages($index)
Does the namespace allow subpages?
formatHeadings($text, $origText, $isMain=true)
This function accomplishes several tasks: 1) Auto-number headings if that option is enabled 2) Add an...
Definition: Parser.php:4421
getConverterLanguage()
Get the language object for language conversion.
Definition: Parser.php:875
static tocUnindent($level)
Finish one or more sublevels on the Table of Contents.
Definition: Linker.php:1703
nextItem($char)
TODO: document.
Definition: Parser.php:2496
static run($event, array $args=[], $deprecatedVersion=null)
Call hook functions defined in Hooks::register and $wgHooks.
Definition: Hooks.php:131
static tocLine($anchor, $tocline, $tocnumber, $level, $sectionIndex=false)
parameter level defines if we are on an indentation level
Definition: Linker.php:1717
design txt This is a brief overview of the new design More thorough and up to date information is available on the documentation wiki at etc Handles the details of getting and saving to the user table of the and dealing with sessions and cookies OutputPage Encapsulates the entire HTML page that will be sent in response to any server request It is used by calling its functions to add text
Definition: design.txt:12
getExternalLinkAttribs($url=false)
Get an associative array of additional HTML attributes appropriate for a particular external link...
Definition: Parser.php:1878
$mInputSize
Definition: Parser.php:229
magicword txt Magic Words are some phrases used in the wikitext They are used for two things
Definition: magicword.txt:4
this hook is for auditing only RecentChangesLinked and Watchlist RecentChangesLinked and Watchlist e g Watchlist removed from all revisions and log entries to which it was applied This gives extensions a chance to take it off their books $tag
Definition: hooks.txt:965
getUserSig(&$user, $nickname=false, $fancySig=null)
Fetch the user's signature text, if any, and normalize to validated, ready-to-insert wikitext...
Definition: Parser.php:4942
const HALF_PARSED_VERSION
Update this version number when the output of serialiseHalfParsedText() changes in an incompatible wa...
Definition: Parser.php:79
const NS_FILE
Definition: Defines.php:75
firstCallInit()
Do various kinds of initialisation on the first call of the parser.
Definition: Parser.php:325
Handles a simple LRU key/value map with a maximum number of entries.
Definition: MapCacheLRU.php:34
static makeImageLink(Parser $parser, Title $title, $file, $frameParams=[], $handlerParams=[], $time=false, $query="", $widthOption=null)
Given parameters derived from [[Image:Foo|options...]], generate the HTML that that syntax inserts in...
Definition: Linker.php:545
const PTD_FOR_INCLUSION
Definition: Parser.php:113
null means default in associative array with keys and values unescaped Should be merged with default with a value of false meaning to suppress the attribute in associative array with keys and values unescaped broken
Definition: hooks.txt:1798
armorLinks($text)
Insert a NOPARSE hacky thing into any inline links in a chunk that's going to go through further pars...
Definition: Parser.php:2394
presenting them properly to the user as errors is done by the caller return true use this to change the list i e etc $rev
Definition: hooks.txt:1584
static splitWhitespace($s)
Return a three-element array: leading whitespace, string contents, trailing whitespace.
Definition: Parser.php:3316
This document is intended to provide useful advice for parties seeking to redistribute MediaWiki to end users It s targeted particularly at maintainers for Linux since it s been observed that distribution packages of MediaWiki often break We ve consistently had to recommend that users seeking support use official tarballs instead of their distribution s and this often solves whatever problem the user is having It would be nice if this could such as
Definition: distributors.txt:9
setOutputType($ot)
Set the output type.
Definition: Parser.php:777
$mTagHooks
Definition: Parser.php:148
Class for handling an array of magic words.
const NS_MEDIAWIKI
Definition: Defines.php:77
static & get($id)
Factory: creates an object representing an ID.
Definition: MagicWord.php:257
static getModuleStyles()
Get CSS modules needed if HTML from the current driver is to be displayed.
Definition: MWTidy.php:63
enableOOUI()
Set's up the PHP implementation of OOUI for use in this request and instructs OutputPage to enable OO...
Definition: Parser.php:6478
please add to it if you re going to add events to the MediaWiki code where normally authentication against an external auth plugin would be creating a local account $user
Definition: hooks.txt:242
fetchTemplate($title)
Fetch the unparsed text of a template and register a reference to it.
Definition: Parser.php:3965
maybeMakeExternalImage($url)
make an image if it's allowed, either through the global option, through the exception, or through the on-wiki whitelist
Definition: Parser.php:1979
areSubpagesAllowed()
Return true if subpage links should be expanded on this page.
Definition: Parser.php:2403
const OT_HTML
Definition: Defines.php:226
static escapeId($id, $options=[])
Given a value, escape it so that it can be used in an id attribute and return it. ...
Definition: Sanitizer.php:1131
this hook is for auditing only RecentChangesLinked and Watchlist RecentChangesLinked and Watchlist e g Watchlist removed from all revisions and log entries to which it was applied This gives extensions a chance to take it off their books as the deletion has already been partly carried out by this point or something similar the user will be unable to create the tag set and then return false from the hook function Ensure you consume the ChangeTagAfterDelete hook to carry out custom deletion actions as context called by AbstractContent::getParserOutput May be used to ove