MediaWiki  1.27.3
Parser.php
Go to the documentation of this file.
1 <?php
67 class Parser {
73  const VERSION = '1.6.4';
74 
80 
81  # Flags for Parser::setFunctionHook
82  const SFH_NO_HASH = 1;
83  const SFH_OBJECT_ARGS = 2;
84 
85  # Constants needed for external link processing
86  # Everything except bracket, space, or control characters
87  # \p{Zs} is unicode 'separator, space' category. It covers the space 0x20
88  # as well as U+3000 is IDEOGRAPHIC SPACE for bug 19052
89  const EXT_LINK_URL_CLASS = '[^][<>"\\x00-\\x20\\x7F\p{Zs}]';
90  # Simplified expression to match an IPv4 or IPv6 address, or
91  # at least one character of a host name (embeds EXT_LINK_URL_CLASS)
92  const EXT_LINK_ADDR = '(?:[0-9.]+|\\[(?i:[0-9a-f:.]+)\\]|[^][<>"\\x00-\\x20\\x7F\p{Zs}])';
93  # RegExp to make image URLs (embeds IPv6 part of EXT_LINK_ADDR)
94  // @codingStandardsIgnoreStart Generic.Files.LineLength
95  const EXT_IMAGE_REGEX = '/^(http:\/\/|https:\/\/)((?:\\[(?i:[0-9a-f:.]+)\\])?[^][<>"\\x00-\\x20\\x7F\p{Zs}]+)
96  \\/([A-Za-z0-9_.,~%\\-+&;#*?!=()@\\x80-\\xFF]+)\\.((?i)gif|png|jpg|jpeg)$/Sxu';
97  // @codingStandardsIgnoreEnd
98 
99  # Regular expression for a non-newline space
100  const SPACE_NOT_NL = '(?:\t|&nbsp;|&\#0*160;|&\#[Xx]0*[Aa]0;|\p{Zs})';
101 
102  # State constants for the definition list colon extraction
103  const COLON_STATE_TEXT = 0;
104  const COLON_STATE_TAG = 1;
111 
112  # Flags for preprocessToDom
113  const PTD_FOR_INCLUSION = 1;
114 
115  # Allowed values for $this->mOutputType
116  # Parameter to startExternalParse().
117  const OT_HTML = 1; # like parse()
118  const OT_WIKI = 2; # like preSaveTransform()
120  const OT_MSG = 3;
121  const OT_PLAIN = 4; # like extractSections() - portions of the original are returned unchanged.
122 
140  const MARKER_SUFFIX = "-QINU`\"'\x7f";
141  const MARKER_PREFIX = "\x7f'\"`UNIQ-";
142 
143  # Markers used for wrapping the table of contents
144  const TOC_START = '<mw:toc>';
145  const TOC_END = '</mw:toc>';
146 
147  # Persistent:
148  public $mTagHooks = [];
150  public $mFunctionHooks = [];
151  public $mFunctionSynonyms = [ 0 => [], 1 => [] ];
152  public $mFunctionTagHooks = [];
153  public $mStripList = [];
154  public $mDefaultStripList = [];
155  public $mVarCache = [];
156  public $mImageParams = [];
158  public $mMarkerIndex = 0;
159  public $mFirstCall = true;
160 
161  # Initialised by initialiseVariables()
162 
166  public $mVariables;
167 
171  public $mSubstWords;
172  # Initialised in constructor
174 
175  # Initialized in getPreprocessor()
176 
178 
179  # Cleared with clearState():
180 
183  public $mOutput;
185 
189  public $mStripState;
190 
196 
197  public $mLinkID;
201  public $mExpensiveFunctionCount; # number of expensive parser function calls
203 
207  public $mUser; # User object; only used when doing pre-save transform
208 
209  # Temporary
210  # These are variables reset at least once per parse regardless of $clearState
211 
215  public $mOptions;
216 
220  public $mTitle; # Title context, used for self-link rendering and similar things
221  public $mOutputType; # Output type, one of the OT_xxx constants
222  public $ot; # Shortcut alias, see setOutputType()
223  public $mRevisionObject; # The revision object of the specified revision ID
224  public $mRevisionId; # ID to display in {{REVISIONID}} tags
225  public $mRevisionTimestamp; # The timestamp of the specified revision ID
226  public $mRevisionUser; # User to display in {{REVISIONUSER}} tag
227  public $mRevisionSize; # Size to display in {{REVISIONSIZE}} variable
228  public $mRevIdForTs; # The revision ID which was used to fetch the timestamp
229  public $mInputSize = false; # For {{PAGESIZE}} on current page.
230 
235  public $mUniqPrefix = Parser::MARKER_PREFIX;
236 
243 
251 
256  public $mInParse = false;
257 
259  protected $mProfiler;
260 
264  public function __construct( $conf = [] ) {
265  $this->mConf = $conf;
266  $this->mUrlProtocols = wfUrlProtocols();
267  $this->mExtLinkBracketedRegex = '/\[(((?i)' . $this->mUrlProtocols . ')' .
268  self::EXT_LINK_ADDR .
269  self::EXT_LINK_URL_CLASS . '*)\p{Zs}*([^\]\\x00-\\x08\\x0a-\\x1F]*?)\]/Su';
270  if ( isset( $conf['preprocessorClass'] ) ) {
271  $this->mPreprocessorClass = $conf['preprocessorClass'];
272  } elseif ( defined( 'HPHP_VERSION' ) ) {
273  # Preprocessor_Hash is much faster than Preprocessor_DOM under HipHop
274  $this->mPreprocessorClass = 'Preprocessor_Hash';
275  } elseif ( extension_loaded( 'domxml' ) ) {
276  # PECL extension that conflicts with the core DOM extension (bug 13770)
277  wfDebug( "Warning: you have the obsolete domxml extension for PHP. Please remove it!\n" );
278  $this->mPreprocessorClass = 'Preprocessor_Hash';
279  } elseif ( extension_loaded( 'dom' ) ) {
280  $this->mPreprocessorClass = 'Preprocessor_DOM';
281  } else {
282  $this->mPreprocessorClass = 'Preprocessor_Hash';
283  }
284  wfDebug( __CLASS__ . ": using preprocessor: {$this->mPreprocessorClass}\n" );
285  }
286 
290  public function __destruct() {
291  if ( isset( $this->mLinkHolders ) ) {
292  unset( $this->mLinkHolders );
293  }
294  foreach ( $this as $name => $value ) {
295  unset( $this->$name );
296  }
297  }
298 
302  public function __clone() {
303  $this->mInParse = false;
304 
305  // Bug 56226: When you create a reference "to" an object field, that
306  // makes the object field itself be a reference too (until the other
307  // reference goes out of scope). When cloning, any field that's a
308  // reference is copied as a reference in the new object. Both of these
309  // are defined PHP5 behaviors, as inconvenient as it is for us when old
310  // hooks from PHP4 days are passing fields by reference.
311  foreach ( [ 'mStripState', 'mVarCache' ] as $k ) {
312  // Make a non-reference copy of the field, then rebind the field to
313  // reference the new copy.
314  $tmp = $this->$k;
315  $this->$k =& $tmp;
316  unset( $tmp );
317  }
318 
319  Hooks::run( 'ParserCloned', [ $this ] );
320  }
321 
325  public function firstCallInit() {
326  if ( !$this->mFirstCall ) {
327  return;
328  }
329  $this->mFirstCall = false;
330 
332  CoreTagHooks::register( $this );
333  $this->initialiseVariables();
334 
335  // Avoid PHP 7.1 warning from passing $this by reference
336  $parser = $this;
337  Hooks::run( 'ParserFirstCallInit', [ &$parser ] );
338  }
339 
345  public function clearState() {
346  if ( $this->mFirstCall ) {
347  $this->firstCallInit();
348  }
349  $this->mOutput = new ParserOutput;
350  $this->mOptions->registerWatcher( [ $this->mOutput, 'recordOption' ] );
351  $this->mAutonumber = 0;
352  $this->mLastSection = '';
353  $this->mDTopen = false;
354  $this->mIncludeCount = [];
355  $this->mArgStack = false;
356  $this->mInPre = false;
357  $this->mLinkHolders = new LinkHolderArray( $this );
358  $this->mLinkID = 0;
359  $this->mRevisionObject = $this->mRevisionTimestamp =
360  $this->mRevisionId = $this->mRevisionUser = $this->mRevisionSize = null;
361  $this->mVarCache = [];
362  $this->mUser = null;
363  $this->mLangLinkLanguages = [];
364  $this->currentRevisionCache = null;
365 
366  $this->mStripState = new StripState;
367 
368  # Clear these on every parse, bug 4549
369  $this->mTplRedirCache = $this->mTplDomCache = [];
370 
371  $this->mShowToc = true;
372  $this->mForceTocPosition = false;
373  $this->mIncludeSizes = [
374  'post-expand' => 0,
375  'arg' => 0,
376  ];
377  $this->mPPNodeCount = 0;
378  $this->mGeneratedPPNodeCount = 0;
379  $this->mHighestExpansionDepth = 0;
380  $this->mDefaultSort = false;
381  $this->mHeadings = [];
382  $this->mDoubleUnderscores = [];
383  $this->mExpensiveFunctionCount = 0;
384 
385  # Fix cloning
386  if ( isset( $this->mPreprocessor ) && $this->mPreprocessor->parser !== $this ) {
387  $this->mPreprocessor = null;
388  }
389 
390  $this->mProfiler = new SectionProfiler();
391 
392  // Avoid PHP 7.1 warning from passing $this by reference
393  $parser = $this;
394  Hooks::run( 'ParserClearState', [ &$parser ] );
395  }
396 
409  public function parse( $text, Title $title, ParserOptions $options,
410  $linestart = true, $clearState = true, $revid = null
411  ) {
417  global $wgShowHostnames;
418 
419  if ( $clearState ) {
420  // We use U+007F DELETE to construct strip markers, so we have to make
421  // sure that this character does not occur in the input text.
422  $text = strtr( $text, "\x7f", "?" );
423  $magicScopeVariable = $this->lock();
424  }
425 
426  $this->startParse( $title, $options, self::OT_HTML, $clearState );
427 
428  $this->currentRevisionCache = null;
429  $this->mInputSize = strlen( $text );
430  if ( $this->mOptions->getEnableLimitReport() ) {
431  $this->mOutput->resetParseStartTime();
432  }
433 
434  $oldRevisionId = $this->mRevisionId;
435  $oldRevisionObject = $this->mRevisionObject;
436  $oldRevisionTimestamp = $this->mRevisionTimestamp;
437  $oldRevisionUser = $this->mRevisionUser;
438  $oldRevisionSize = $this->mRevisionSize;
439  if ( $revid !== null ) {
440  $this->mRevisionId = $revid;
441  $this->mRevisionObject = null;
442  $this->mRevisionTimestamp = null;
443  $this->mRevisionUser = null;
444  $this->mRevisionSize = null;
445  }
446 
447  // Avoid PHP 7.1 warning from passing $this by reference
448  $parser = $this;
449  Hooks::run( 'ParserBeforeStrip', [ &$parser, &$text, &$this->mStripState ] );
450  # No more strip!
451  Hooks::run( 'ParserAfterStrip', [ &$parser, &$text, &$this->mStripState ] );
452  $text = $this->internalParse( $text );
453  Hooks::run( 'ParserAfterParse', [ &$parser, &$text, &$this->mStripState ] );
454 
455  $text = $this->internalParseHalfParsed( $text, true, $linestart );
456 
464  if ( !( $options->getDisableTitleConversion()
465  || isset( $this->mDoubleUnderscores['nocontentconvert'] )
466  || isset( $this->mDoubleUnderscores['notitleconvert'] )
467  || $this->mOutput->getDisplayTitle() !== false )
468  ) {
469  $convruletitle = $this->getConverterLanguage()->getConvRuleTitle();
470  if ( $convruletitle ) {
471  $this->mOutput->setTitleText( $convruletitle );
472  } else {
473  $titleText = $this->getConverterLanguage()->convertTitle( $title );
474  $this->mOutput->setTitleText( $titleText );
475  }
476  }
477 
478  if ( $this->mExpensiveFunctionCount > $this->mOptions->getExpensiveParserFunctionLimit() ) {
479  $this->limitationWarn( 'expensive-parserfunction',
480  $this->mExpensiveFunctionCount,
481  $this->mOptions->getExpensiveParserFunctionLimit()
482  );
483  }
484 
485  # Information on include size limits, for the benefit of users who try to skirt them
486  if ( $this->mOptions->getEnableLimitReport() ) {
487  $max = $this->mOptions->getMaxIncludeSize();
488 
489  $cpuTime = $this->mOutput->getTimeSinceStart( 'cpu' );
490  if ( $cpuTime !== null ) {
491  $this->mOutput->setLimitReportData( 'limitreport-cputime',
492  sprintf( "%.3f", $cpuTime )
493  );
494  }
495 
496  $wallTime = $this->mOutput->getTimeSinceStart( 'wall' );
497  $this->mOutput->setLimitReportData( 'limitreport-walltime',
498  sprintf( "%.3f", $wallTime )
499  );
500 
501  $this->mOutput->setLimitReportData( 'limitreport-ppvisitednodes',
502  [ $this->mPPNodeCount, $this->mOptions->getMaxPPNodeCount() ]
503  );
504  $this->mOutput->setLimitReportData( 'limitreport-ppgeneratednodes',
505  [ $this->mGeneratedPPNodeCount, $this->mOptions->getMaxGeneratedPPNodeCount() ]
506  );
507  $this->mOutput->setLimitReportData( 'limitreport-postexpandincludesize',
508  [ $this->mIncludeSizes['post-expand'], $max ]
509  );
510  $this->mOutput->setLimitReportData( 'limitreport-templateargumentsize',
511  [ $this->mIncludeSizes['arg'], $max ]
512  );
513  $this->mOutput->setLimitReportData( 'limitreport-expansiondepth',
514  [ $this->mHighestExpansionDepth, $this->mOptions->getMaxPPExpandDepth() ]
515  );
516  $this->mOutput->setLimitReportData( 'limitreport-expensivefunctioncount',
517  [ $this->mExpensiveFunctionCount, $this->mOptions->getExpensiveParserFunctionLimit() ]
518  );
519  Hooks::run( 'ParserLimitReportPrepare', [ $this, $this->mOutput ] );
520 
521  $limitReport = "NewPP limit report\n";
522  if ( $wgShowHostnames ) {
523  $limitReport .= 'Parsed by ' . wfHostname() . "\n";
524  }
525  $limitReport .= 'Cached time: ' . $this->mOutput->getCacheTime() . "\n";
526  $limitReport .= 'Cache expiry: ' . $this->mOutput->getCacheExpiry() . "\n";
527  $limitReport .= 'Dynamic content: ' .
528  ( $this->mOutput->hasDynamicContent() ? 'true' : 'false' ) .
529  "\n";
530 
531  foreach ( $this->mOutput->getLimitReportData() as $key => $value ) {
532  if ( Hooks::run( 'ParserLimitReportFormat',
533  [ $key, &$value, &$limitReport, false, false ]
534  ) ) {
535  $keyMsg = wfMessage( $key )->inLanguage( 'en' )->useDatabase( false );
536  $valueMsg = wfMessage( [ "$key-value-text", "$key-value" ] )
537  ->inLanguage( 'en' )->useDatabase( false );
538  if ( !$valueMsg->exists() ) {
539  $valueMsg = new RawMessage( '$1' );
540  }
541  if ( !$keyMsg->isDisabled() && !$valueMsg->isDisabled() ) {
542  $valueMsg->params( $value );
543  $limitReport .= "{$keyMsg->text()}: {$valueMsg->text()}\n";
544  }
545  }
546  }
547  // Since we're not really outputting HTML, decode the entities and
548  // then re-encode the things that need hiding inside HTML comments.
549  $limitReport = htmlspecialchars_decode( $limitReport );
550  Hooks::run( 'ParserLimitReport', [ $this, &$limitReport ] );
551 
552  // Sanitize for comment. Note '‐' in the replacement is U+2010,
553  // which looks much like the problematic '-'.
554  $limitReport = str_replace( [ '-', '&' ], [ '‐', '&amp;' ], $limitReport );
555  $text .= "\n<!-- \n$limitReport-->\n";
556 
557  // Add on template profiling data
558  $dataByFunc = $this->mProfiler->getFunctionStats();
559  uasort( $dataByFunc, function ( $a, $b ) {
560  return $a['real'] < $b['real']; // descending order
561  } );
562  $profileReport = "Transclusion expansion time report (%,ms,calls,template)\n";
563  foreach ( array_slice( $dataByFunc, 0, 10 ) as $item ) {
564  $profileReport .= sprintf( "%6.2f%% %8.3f %6d - %s\n",
565  $item['%real'], $item['real'], $item['calls'],
566  htmlspecialchars( $item['name'] ) );
567  }
568  $text .= "\n<!-- \n$profileReport-->\n";
569 
570  if ( $this->mGeneratedPPNodeCount > $this->mOptions->getMaxGeneratedPPNodeCount() / 10 ) {
571  wfDebugLog( 'generated-pp-node-count', $this->mGeneratedPPNodeCount . ' ' .
572  $this->mTitle->getPrefixedDBkey() );
573  }
574  }
575  $this->mOutput->setText( $text );
576 
577  $this->mRevisionId = $oldRevisionId;
578  $this->mRevisionObject = $oldRevisionObject;
579  $this->mRevisionTimestamp = $oldRevisionTimestamp;
580  $this->mRevisionUser = $oldRevisionUser;
581  $this->mRevisionSize = $oldRevisionSize;
582  $this->mInputSize = false;
583  $this->currentRevisionCache = null;
584 
585  return $this->mOutput;
586  }
587 
610  public function recursiveTagParse( $text, $frame = false ) {
611  // Avoid PHP 7.1 warning from passing $this by reference
612  $parser = $this;
613  Hooks::run( 'ParserBeforeStrip', [ &$parser, &$text, &$this->mStripState ] );
614  Hooks::run( 'ParserAfterStrip', [ &$parser, &$text, &$this->mStripState ] );
615  $text = $this->internalParse( $text, false, $frame );
616  return $text;
617  }
618 
636  public function recursiveTagParseFully( $text, $frame = false ) {
637  $text = $this->recursiveTagParse( $text, $frame );
638  $text = $this->internalParseHalfParsed( $text, false );
639  return $text;
640  }
641 
653  public function preprocess( $text, Title $title = null,
654  ParserOptions $options, $revid = null, $frame = false
655  ) {
656  $magicScopeVariable = $this->lock();
657  $this->startParse( $title, $options, self::OT_PREPROCESS, true );
658  if ( $revid !== null ) {
659  $this->mRevisionId = $revid;
660  }
661  // Avoid PHP 7.1 warning from passing $this by reference
662  $parser = $this;
663  Hooks::run( 'ParserBeforeStrip', [ &$parser, &$text, &$this->mStripState ] );
664  Hooks::run( 'ParserAfterStrip', [ &$parser, &$text, &$this->mStripState ] );
665  $text = $this->replaceVariables( $text, $frame );
666  $text = $this->mStripState->unstripBoth( $text );
667  return $text;
668  }
669 
679  public function recursivePreprocess( $text, $frame = false ) {
680  $text = $this->replaceVariables( $text, $frame );
681  $text = $this->mStripState->unstripBoth( $text );
682  return $text;
683  }
684 
698  public function getPreloadText( $text, Title $title, ParserOptions $options, $params = [] ) {
699  $msg = new RawMessage( $text );
700  $text = $msg->params( $params )->plain();
701 
702  # Parser (re)initialisation
703  $magicScopeVariable = $this->lock();
704  $this->startParse( $title, $options, self::OT_PLAIN, true );
705 
707  $dom = $this->preprocessToDom( $text, self::PTD_FOR_INCLUSION );
708  $text = $this->getPreprocessor()->newFrame()->expand( $dom, $flags );
709  $text = $this->mStripState->unstripBoth( $text );
710  return $text;
711  }
712 
719  public static function getRandomString() {
720  wfDeprecated( __METHOD__, '1.26' );
721  return wfRandomString( 16 );
722  }
723 
730  public function setUser( $user ) {
731  $this->mUser = $user;
732  }
733 
740  public function uniqPrefix() {
741  wfDeprecated( __METHOD__, '1.26' );
742  return self::MARKER_PREFIX;
743  }
744 
750  public function setTitle( $t ) {
751  if ( !$t ) {
752  $t = Title::newFromText( 'NO TITLE' );
753  }
754 
755  if ( $t->hasFragment() ) {
756  # Strip the fragment to avoid various odd effects
757  $this->mTitle = $t->createFragmentTarget( '' );
758  } else {
759  $this->mTitle = $t;
760  }
761  }
762 
768  public function getTitle() {
769  return $this->mTitle;
770  }
771 
778  public function Title( $x = null ) {
779  return wfSetVar( $this->mTitle, $x );
780  }
781 
787  public function setOutputType( $ot ) {
788  $this->mOutputType = $ot;
789  # Shortcut alias
790  $this->ot = [
791  'html' => $ot == self::OT_HTML,
792  'wiki' => $ot == self::OT_WIKI,
793  'pre' => $ot == self::OT_PREPROCESS,
794  'plain' => $ot == self::OT_PLAIN,
795  ];
796  }
797 
804  public function OutputType( $x = null ) {
805  return wfSetVar( $this->mOutputType, $x );
806  }
807 
813  public function getOutput() {
814  return $this->mOutput;
815  }
816 
822  public function getOptions() {
823  return $this->mOptions;
824  }
825 
832  public function Options( $x = null ) {
833  return wfSetVar( $this->mOptions, $x );
834  }
835 
839  public function nextLinkID() {
840  return $this->mLinkID++;
841  }
842 
846  public function setLinkID( $id ) {
847  $this->mLinkID = $id;
848  }
849 
854  public function getFunctionLang() {
855  return $this->getTargetLanguage();
856  }
857 
867  public function getTargetLanguage() {
868  $target = $this->mOptions->getTargetLanguage();
869 
870  if ( $target !== null ) {
871  return $target;
872  } elseif ( $this->mOptions->getInterfaceMessage() ) {
873  return $this->mOptions->getUserLangObj();
874  } elseif ( is_null( $this->mTitle ) ) {
875  throw new MWException( __METHOD__ . ': $this->mTitle is null' );
876  }
877 
878  return $this->mTitle->getPageLanguage();
879  }
880 
885  public function getConverterLanguage() {
886  return $this->getTargetLanguage();
887  }
888 
895  public function getUser() {
896  if ( !is_null( $this->mUser ) ) {
897  return $this->mUser;
898  }
899  return $this->mOptions->getUser();
900  }
901 
907  public function getPreprocessor() {
908  if ( !isset( $this->mPreprocessor ) ) {
909  $class = $this->mPreprocessorClass;
910  $this->mPreprocessor = new $class( $this );
911  }
912  return $this->mPreprocessor;
913  }
914 
936  public static function extractTagsAndParams( $elements, $text, &$matches, $uniq_prefix = null ) {
937  if ( $uniq_prefix !== null ) {
938  wfDeprecated( __METHOD__ . ' called with $prefix argument', '1.26' );
939  }
940  static $n = 1;
941  $stripped = '';
942  $matches = [];
943 
944  $taglist = implode( '|', $elements );
945  $start = "/<($taglist)(\\s+[^>]*?|\\s*?)(\/?" . ">)|<(!--)/i";
946 
947  while ( $text != '' ) {
948  $p = preg_split( $start, $text, 2, PREG_SPLIT_DELIM_CAPTURE );
949  $stripped .= $p[0];
950  if ( count( $p ) < 5 ) {
951  break;
952  }
953  if ( count( $p ) > 5 ) {
954  # comment
955  $element = $p[4];
956  $attributes = '';
957  $close = '';
958  $inside = $p[5];
959  } else {
960  # tag
961  $element = $p[1];
962  $attributes = $p[2];
963  $close = $p[3];
964  $inside = $p[4];
965  }
966 
967  $marker = self::MARKER_PREFIX . "-$element-" . sprintf( '%08X', $n++ ) . self::MARKER_SUFFIX;
968  $stripped .= $marker;
969 
970  if ( $close === '/>' ) {
971  # Empty element tag, <tag />
972  $content = null;
973  $text = $inside;
974  $tail = null;
975  } else {
976  if ( $element === '!--' ) {
977  $end = '/(-->)/';
978  } else {
979  $end = "/(<\\/$element\\s*>)/i";
980  }
981  $q = preg_split( $end, $inside, 2, PREG_SPLIT_DELIM_CAPTURE );
982  $content = $q[0];
983  if ( count( $q ) < 3 ) {
984  # No end tag -- let it run out to the end of the text.
985  $tail = '';
986  $text = '';
987  } else {
988  $tail = $q[1];
989  $text = $q[2];
990  }
991  }
992 
993  $matches[$marker] = [ $element,
994  $content,
995  Sanitizer::decodeTagAttributes( $attributes ),
996  "<$element$attributes$close$content$tail" ];
997  }
998  return $stripped;
999  }
1000 
1006  public function getStripList() {
1007  return $this->mStripList;
1008  }
1009 
1019  public function insertStripItem( $text ) {
1020  $marker = self::MARKER_PREFIX . "-item-{$this->mMarkerIndex}-" . self::MARKER_SUFFIX;
1021  $this->mMarkerIndex++;
1022  $this->mStripState->addGeneral( $marker, $text );
1023  return $marker;
1024  }
1025 
1033  public function doTableStuff( $text ) {
1034 
1035  $lines = StringUtils::explode( "\n", $text );
1036  $out = '';
1037  $td_history = []; # Is currently a td tag open?
1038  $last_tag_history = []; # Save history of last lag activated (td, th or caption)
1039  $tr_history = []; # Is currently a tr tag open?
1040  $tr_attributes = []; # history of tr attributes
1041  $has_opened_tr = []; # Did this table open a <tr> element?
1042  $indent_level = 0; # indent level of the table
1043 
1044  foreach ( $lines as $outLine ) {
1045  $line = trim( $outLine );
1046 
1047  if ( $line === '' ) { # empty line, go to next line
1048  $out .= $outLine . "\n";
1049  continue;
1050  }
1051 
1052  $first_character = $line[0];
1053  $first_two = substr( $line, 0, 2 );
1054  $matches = [];
1055 
1056  if ( preg_match( '/^(:*)\s*\{\|(.*)$/', $line, $matches ) ) {
1057  # First check if we are starting a new table
1058  $indent_level = strlen( $matches[1] );
1059 
1060  $attributes = $this->mStripState->unstripBoth( $matches[2] );
1061  $attributes = Sanitizer::fixTagAttributes( $attributes, 'table' );
1062 
1063  $outLine = str_repeat( '<dl><dd>', $indent_level ) . "<table{$attributes}>";
1064  array_push( $td_history, false );
1065  array_push( $last_tag_history, '' );
1066  array_push( $tr_history, false );
1067  array_push( $tr_attributes, '' );
1068  array_push( $has_opened_tr, false );
1069  } elseif ( count( $td_history ) == 0 ) {
1070  # Don't do any of the following
1071  $out .= $outLine . "\n";
1072  continue;
1073  } elseif ( $first_two === '|}' ) {
1074  # We are ending a table
1075  $line = '</table>' . substr( $line, 2 );
1076  $last_tag = array_pop( $last_tag_history );
1077 
1078  if ( !array_pop( $has_opened_tr ) ) {
1079  $line = "<tr><td></td></tr>{$line}";
1080  }
1081 
1082  if ( array_pop( $tr_history ) ) {
1083  $line = "</tr>{$line}";
1084  }
1085 
1086  if ( array_pop( $td_history ) ) {
1087  $line = "</{$last_tag}>{$line}";
1088  }
1089  array_pop( $tr_attributes );
1090  $outLine = $line . str_repeat( '</dd></dl>', $indent_level );
1091  } elseif ( $first_two === '|-' ) {
1092  # Now we have a table row
1093  $line = preg_replace( '#^\|-+#', '', $line );
1094 
1095  # Whats after the tag is now only attributes
1096  $attributes = $this->mStripState->unstripBoth( $line );
1097  $attributes = Sanitizer::fixTagAttributes( $attributes, 'tr' );
1098  array_pop( $tr_attributes );
1099  array_push( $tr_attributes, $attributes );
1100 
1101  $line = '';
1102  $last_tag = array_pop( $last_tag_history );
1103  array_pop( $has_opened_tr );
1104  array_push( $has_opened_tr, true );
1105 
1106  if ( array_pop( $tr_history ) ) {
1107  $line = '</tr>';
1108  }
1109 
1110  if ( array_pop( $td_history ) ) {
1111  $line = "</{$last_tag}>{$line}";
1112  }
1113 
1114  $outLine = $line;
1115  array_push( $tr_history, false );
1116  array_push( $td_history, false );
1117  array_push( $last_tag_history, '' );
1118  } elseif ( $first_character === '|'
1119  || $first_character === '!'
1120  || $first_two === '|+'
1121  ) {
1122  # This might be cell elements, td, th or captions
1123  if ( $first_two === '|+' ) {
1124  $first_character = '+';
1125  $line = substr( $line, 2 );
1126  } else {
1127  $line = substr( $line, 1 );
1128  }
1129 
1130  // Implies both are valid for table headings.
1131  if ( $first_character === '!' ) {
1132  $line = StringUtils::replaceMarkup( '!!', '||', $line );
1133  }
1134 
1135  # Split up multiple cells on the same line.
1136  # FIXME : This can result in improper nesting of tags processed
1137  # by earlier parser steps.
1138  $cells = explode( '||', $line );
1139 
1140  $outLine = '';
1141 
1142  # Loop through each table cell
1143  foreach ( $cells as $cell ) {
1144  $previous = '';
1145  if ( $first_character !== '+' ) {
1146  $tr_after = array_pop( $tr_attributes );
1147  if ( !array_pop( $tr_history ) ) {
1148  $previous = "<tr{$tr_after}>\n";
1149  }
1150  array_push( $tr_history, true );
1151  array_push( $tr_attributes, '' );
1152  array_pop( $has_opened_tr );
1153  array_push( $has_opened_tr, true );
1154  }
1155 
1156  $last_tag = array_pop( $last_tag_history );
1157 
1158  if ( array_pop( $td_history ) ) {
1159  $previous = "</{$last_tag}>\n{$previous}";
1160  }
1161 
1162  if ( $first_character === '|' ) {
1163  $last_tag = 'td';
1164  } elseif ( $first_character === '!' ) {
1165  $last_tag = 'th';
1166  } elseif ( $first_character === '+' ) {
1167  $last_tag = 'caption';
1168  } else {
1169  $last_tag = '';
1170  }
1171 
1172  array_push( $last_tag_history, $last_tag );
1173 
1174  # A cell could contain both parameters and data
1175  $cell_data = explode( '|', $cell, 2 );
1176 
1177  # Bug 553: Note that a '|' inside an invalid link should not
1178  # be mistaken as delimiting cell parameters
1179  if ( strpos( $cell_data[0], '[[' ) !== false ) {
1180  $cell = "{$previous}<{$last_tag}>{$cell}";
1181  } elseif ( count( $cell_data ) == 1 ) {
1182  $cell = "{$previous}<{$last_tag}>{$cell_data[0]}";
1183  } else {
1184  $attributes = $this->mStripState->unstripBoth( $cell_data[0] );
1185  $attributes = Sanitizer::fixTagAttributes( $attributes, $last_tag );
1186  $cell = "{$previous}<{$last_tag}{$attributes}>{$cell_data[1]}";
1187  }
1188 
1189  $outLine .= $cell;
1190  array_push( $td_history, true );
1191  }
1192  }
1193  $out .= $outLine . "\n";
1194  }
1195 
1196  # Closing open td, tr && table
1197  while ( count( $td_history ) > 0 ) {
1198  if ( array_pop( $td_history ) ) {
1199  $out .= "</td>\n";
1200  }
1201  if ( array_pop( $tr_history ) ) {
1202  $out .= "</tr>\n";
1203  }
1204  if ( !array_pop( $has_opened_tr ) ) {
1205  $out .= "<tr><td></td></tr>\n";
1206  }
1207 
1208  $out .= "</table>\n";
1209  }
1210 
1211  # Remove trailing line-ending (b/c)
1212  if ( substr( $out, -1 ) === "\n" ) {
1213  $out = substr( $out, 0, -1 );
1214  }
1215 
1216  # special case: don't return empty table
1217  if ( $out === "<table>\n<tr><td></td></tr>\n</table>" ) {
1218  $out = '';
1219  }
1220 
1221  return $out;
1222  }
1223 
1236  public function internalParse( $text, $isMain = true, $frame = false ) {
1237 
1238  $origText = $text;
1239 
1240  // Avoid PHP 7.1 warning from passing $this by reference
1241  $parser = $this;
1242 
1243  # Hook to suspend the parser in this state
1244  if ( !Hooks::run( 'ParserBeforeInternalParse', [ &$parser, &$text, &$this->mStripState ] ) ) {
1245  return $text;
1246  }
1247 
1248  # if $frame is provided, then use $frame for replacing any variables
1249  if ( $frame ) {
1250  # use frame depth to infer how include/noinclude tags should be handled
1251  # depth=0 means this is the top-level document; otherwise it's an included document
1252  if ( !$frame->depth ) {
1253  $flag = 0;
1254  } else {
1255  $flag = Parser::PTD_FOR_INCLUSION;
1256  }
1257  $dom = $this->preprocessToDom( $text, $flag );
1258  $text = $frame->expand( $dom );
1259  } else {
1260  # if $frame is not provided, then use old-style replaceVariables
1261  $text = $this->replaceVariables( $text );
1262  }
1263 
1264  Hooks::run( 'InternalParseBeforeSanitize', [ &$parser, &$text, &$this->mStripState ] );
1265  $text = Sanitizer::removeHTMLtags(
1266  $text,
1267  [ $this, 'attributeStripCallback' ],
1268  false,
1269  array_keys( $this->mTransparentTagHooks )
1270  );
1271  Hooks::run( 'InternalParseBeforeLinks', [ &$parser, &$text, &$this->mStripState ] );
1272 
1273  # Tables need to come after variable replacement for things to work
1274  # properly; putting them before other transformations should keep
1275  # exciting things like link expansions from showing up in surprising
1276  # places.
1277  $text = $this->doTableStuff( $text );
1278 
1279  $text = preg_replace( '/(^|\n)-----*/', '\\1<hr />', $text );
1280 
1281  $text = $this->doDoubleUnderscore( $text );
1282 
1283  $text = $this->doHeadings( $text );
1284  $text = $this->replaceInternalLinks( $text );
1285  $text = $this->doAllQuotes( $text );
1286  $text = $this->replaceExternalLinks( $text );
1287 
1288  # replaceInternalLinks may sometimes leave behind
1289  # absolute URLs, which have to be masked to hide them from replaceExternalLinks
1290  $text = str_replace( self::MARKER_PREFIX . 'NOPARSE', '', $text );
1291 
1292  $text = $this->doMagicLinks( $text );
1293  $text = $this->formatHeadings( $text, $origText, $isMain );
1294 
1295  return $text;
1296  }
1297 
1307  private function internalParseHalfParsed( $text, $isMain = true, $linestart = true ) {
1308  $text = $this->mStripState->unstripGeneral( $text );
1309 
1310  // Avoid PHP 7.1 warning from passing $this by reference
1311  $parser = $this;
1312 
1313  if ( $isMain ) {
1314  Hooks::run( 'ParserAfterUnstrip', [ &$parser, &$text ] );
1315  }
1316 
1317  # Clean up special characters, only run once, next-to-last before doBlockLevels
1318  $fixtags = [
1319  # french spaces, last one Guillemet-left
1320  # only if there is something before the space
1321  '/(.) (?=\\?|:|;|!|%|\\302\\273)/' => '\\1&#160;',
1322  # french spaces, Guillemet-right
1323  '/(\\302\\253) /' => '\\1&#160;',
1324  '/&#160;(!\s*important)/' => ' \\1', # Beware of CSS magic word !important, bug #11874.
1325  ];
1326  $text = preg_replace( array_keys( $fixtags ), array_values( $fixtags ), $text );
1327 
1328  $text = $this->doBlockLevels( $text, $linestart );
1329 
1330  $this->replaceLinkHolders( $text );
1331 
1339  if ( !( $this->mOptions->getDisableContentConversion()
1340  || isset( $this->mDoubleUnderscores['nocontentconvert'] ) )
1341  ) {
1342  if ( !$this->mOptions->getInterfaceMessage() ) {
1343  # The position of the convert() call should not be changed. it
1344  # assumes that the links are all replaced and the only thing left
1345  # is the <nowiki> mark.
1346  $text = $this->getConverterLanguage()->convert( $text );
1347  }
1348  }
1349 
1350  $text = $this->mStripState->unstripNoWiki( $text );
1351 
1352  if ( $isMain ) {
1353  Hooks::run( 'ParserBeforeTidy', [ &$parser, &$text ] );
1354  }
1355 
1356  $text = $this->replaceTransparentTags( $text );
1357  $text = $this->mStripState->unstripGeneral( $text );
1358 
1359  $text = Sanitizer::normalizeCharReferences( $text );
1360 
1361  if ( MWTidy::isEnabled() && $this->mOptions->getTidy() ) {
1362  $text = MWTidy::tidy( $text );
1363  $this->mOutput->addModuleStyles( MWTidy::getModuleStyles() );
1364  } else {
1365  # attempt to sanitize at least some nesting problems
1366  # (bug #2702 and quite a few others)
1367  $tidyregs = [
1368  # ''Something [http://www.cool.com cool''] -->
1369  # <i>Something</i><a href="http://www.cool.com"..><i>cool></i></a>
1370  '/(<([bi])>)(<([bi])>)?([^<]*)(<\/?a[^<]*>)([^<]*)(<\/\\4>)?(<\/\\2>)/' =>
1371  '\\1\\3\\5\\8\\9\\6\\1\\3\\7\\8\\9',
1372  # fix up an anchor inside another anchor, only
1373  # at least for a single single nested link (bug 3695)
1374  '/(<a[^>]+>)([^<]*)(<a[^>]+>[^<]*)<\/a>(.*)<\/a>/' =>
1375  '\\1\\2</a>\\3</a>\\1\\4</a>',
1376  # fix div inside inline elements- doBlockLevels won't wrap a line which
1377  # contains a div, so fix it up here; replace
1378  # div with escaped text
1379  '/(<([aib]) [^>]+>)([^<]*)(<div([^>]*)>)(.*)(<\/div>)([^<]*)(<\/\\2>)/' =>
1380  '\\1\\3&lt;div\\5&gt;\\6&lt;/div&gt;\\8\\9',
1381  # remove empty italic or bold tag pairs, some
1382  # introduced by rules above
1383  '/<([bi])><\/\\1>/' => '',
1384  ];
1385 
1386  $text = preg_replace(
1387  array_keys( $tidyregs ),
1388  array_values( $tidyregs ),
1389  $text );
1390  }
1391 
1392  if ( $isMain ) {
1393  Hooks::run( 'ParserAfterTidy', [ &$parser, &$text ] );
1394  }
1395 
1396  return $text;
1397  }
1398 
1410  public function doMagicLinks( $text ) {
1411  $prots = wfUrlProtocolsWithoutProtRel();
1412  $urlChar = self::EXT_LINK_URL_CLASS;
1413  $addr = self::EXT_LINK_ADDR;
1414  $space = self::SPACE_NOT_NL; # non-newline space
1415  $spdash = "(?:-|$space)"; # a dash or a non-newline space
1416  $spaces = "$space++"; # possessive match of 1 or more spaces
1417  $text = preg_replace_callback(
1418  '!(?: # Start cases
1419  (<a[ \t\r\n>].*?</a>) | # m[1]: Skip link text
1420  (<.*?>) | # m[2]: Skip stuff inside
1421  # HTML elements' . "
1422  (\b(?i:$prots)($addr$urlChar*)) | # m[3]: Free external links
1423  # m[4]: Post-protocol path
1424  \b(?:RFC|PMID) $spaces # m[5]: RFC or PMID, capture number
1425  ([0-9]+)\b |
1426  \bISBN $spaces ( # m[6]: ISBN, capture number
1427  (?: 97[89] $spdash? )? # optional 13-digit ISBN prefix
1428  (?: [0-9] $spdash? ){9} # 9 digits with opt. delimiters
1429  [0-9Xx] # check digit
1430  )\b
1431  )!xu", [ $this, 'magicLinkCallback' ], $text );
1432  return $text;
1433  }
1434 
1440  public function magicLinkCallback( $m ) {
1441  if ( isset( $m[1] ) && $m[1] !== '' ) {
1442  # Skip anchor
1443  return $m[0];
1444  } elseif ( isset( $m[2] ) && $m[2] !== '' ) {
1445  # Skip HTML element
1446  return $m[0];
1447  } elseif ( isset( $m[3] ) && $m[3] !== '' ) {
1448  # Free external link
1449  return $this->makeFreeExternalLink( $m[0], strlen( $m[4] ) );
1450  } elseif ( isset( $m[5] ) && $m[5] !== '' ) {
1451  # RFC or PMID
1452  if ( substr( $m[0], 0, 3 ) === 'RFC' ) {
1453  $keyword = 'RFC';
1454  $urlmsg = 'rfcurl';
1455  $cssClass = 'mw-magiclink-rfc';
1456  $id = $m[5];
1457  } elseif ( substr( $m[0], 0, 4 ) === 'PMID' ) {
1458  $keyword = 'PMID';
1459  $urlmsg = 'pubmedurl';
1460  $cssClass = 'mw-magiclink-pmid';
1461  $id = $m[5];
1462  } else {
1463  throw new MWException( __METHOD__ . ': unrecognised match type "' .
1464  substr( $m[0], 0, 20 ) . '"' );
1465  }
1466  $url = wfMessage( $urlmsg, $id )->inContentLanguage()->text();
1467  return Linker::makeExternalLink( $url, "{$keyword} {$id}", true, $cssClass );
1468  } elseif ( isset( $m[6] ) && $m[6] !== '' ) {
1469  # ISBN
1470  $isbn = $m[6];
1471  $space = self::SPACE_NOT_NL; # non-newline space
1472  $isbn = preg_replace( "/$space/", ' ', $isbn );
1473  $num = strtr( $isbn, [
1474  '-' => '',
1475  ' ' => '',
1476  'x' => 'X',
1477  ] );
1478  $titleObj = SpecialPage::getTitleFor( 'Booksources', $num );
1479  return '<a href="' .
1480  htmlspecialchars( $titleObj->getLocalURL() ) .
1481  "\" class=\"internal mw-magiclink-isbn\">ISBN $isbn</a>";
1482  } else {
1483  return $m[0];
1484  }
1485  }
1486 
1496  public function makeFreeExternalLink( $url, $numPostProto ) {
1497  $trail = '';
1498 
1499  # The characters '<' and '>' (which were escaped by
1500  # removeHTMLtags()) should not be included in
1501  # URLs, per RFC 2396.
1502  # Make &nbsp; terminate a URL as well (bug T84937)
1503  $m2 = [];
1504  if ( preg_match(
1505  '/&(lt|gt|nbsp|#x0*(3[CcEe]|[Aa]0)|#0*(60|62|160));/',
1506  $url,
1507  $m2,
1508  PREG_OFFSET_CAPTURE
1509  ) ) {
1510  $trail = substr( $url, $m2[0][1] ) . $trail;
1511  $url = substr( $url, 0, $m2[0][1] );
1512  }
1513 
1514  # Move trailing punctuation to $trail
1515  $sep = ',;\.:!?';
1516  # If there is no left bracket, then consider right brackets fair game too
1517  if ( strpos( $url, '(' ) === false ) {
1518  $sep .= ')';
1519  }
1520 
1521  $urlRev = strrev( $url );
1522  $numSepChars = strspn( $urlRev, $sep );
1523  # Don't break a trailing HTML entity by moving the ; into $trail
1524  # This is in hot code, so use substr_compare to avoid having to
1525  # create a new string object for the comparison
1526  if ( $numSepChars && substr_compare( $url, ";", -$numSepChars, 1 ) === 0 ) {
1527  # more optimization: instead of running preg_match with a $
1528  # anchor, which can be slow, do the match on the reversed
1529  # string starting at the desired offset.
1530  # un-reversed regexp is: /&([a-z]+|#x[\da-f]+|#\d+)$/i
1531  if ( preg_match( '/\G([a-z]+|[\da-f]+x#|\d+#)&/i', $urlRev, $m2, 0, $numSepChars ) ) {
1532  $numSepChars--;
1533  }
1534  }
1535  if ( $numSepChars ) {
1536  $trail = substr( $url, -$numSepChars ) . $trail;
1537  $url = substr( $url, 0, -$numSepChars );
1538  }
1539 
1540  # Verify that we still have a real URL after trail removal, and
1541  # not just lone protocol
1542  if ( strlen( $trail ) >= $numPostProto ) {
1543  return $url . $trail;
1544  }
1545 
1546  $url = Sanitizer::cleanUrl( $url );
1547 
1548  # Is this an external image?
1549  $text = $this->maybeMakeExternalImage( $url );
1550  if ( $text === false ) {
1551  # Not an image, make a link
1552  $text = Linker::makeExternalLink( $url,
1553  $this->getConverterLanguage()->markNoConversion( $url, true ),
1554  true, 'free',
1555  $this->getExternalLinkAttribs( $url ) );
1556  # Register it in the output object...
1557  $this->mOutput->addExternalLink( $url );
1558  }
1559  return $text . $trail;
1560  }
1561 
1571  public function doHeadings( $text ) {
1572  for ( $i = 6; $i >= 1; --$i ) {
1573  $h = str_repeat( '=', $i );
1574  $text = preg_replace( "/^$h(.+)$h\\s*$/m", "<h$i>\\1</h$i>", $text );
1575  }
1576  return $text;
1577  }
1578 
1587  public function doAllQuotes( $text ) {
1588  $outtext = '';
1589  $lines = StringUtils::explode( "\n", $text );
1590  foreach ( $lines as $line ) {
1591  $outtext .= $this->doQuotes( $line ) . "\n";
1592  }
1593  $outtext = substr( $outtext, 0, -1 );
1594  return $outtext;
1595  }
1596 
1604  public function doQuotes( $text ) {
1605  $arr = preg_split( "/(''+)/", $text, -1, PREG_SPLIT_DELIM_CAPTURE );
1606  $countarr = count( $arr );
1607  if ( $countarr == 1 ) {
1608  return $text;
1609  }
1610 
1611  // First, do some preliminary work. This may shift some apostrophes from
1612  // being mark-up to being text. It also counts the number of occurrences
1613  // of bold and italics mark-ups.
1614  $numbold = 0;
1615  $numitalics = 0;
1616  for ( $i = 1; $i < $countarr; $i += 2 ) {
1617  $thislen = strlen( $arr[$i] );
1618  // If there are ever four apostrophes, assume the first is supposed to
1619  // be text, and the remaining three constitute mark-up for bold text.
1620  // (bug 13227: ''''foo'''' turns into ' ''' foo ' ''')
1621  if ( $thislen == 4 ) {
1622  $arr[$i - 1] .= "'";
1623  $arr[$i] = "'''";
1624  $thislen = 3;
1625  } elseif ( $thislen > 5 ) {
1626  // If there are more than 5 apostrophes in a row, assume they're all
1627  // text except for the last 5.
1628  // (bug 13227: ''''''foo'''''' turns into ' ''''' foo ' ''''')
1629  $arr[$i - 1] .= str_repeat( "'", $thislen - 5 );
1630  $arr[$i] = "'''''";
1631  $thislen = 5;
1632  }
1633  // Count the number of occurrences of bold and italics mark-ups.
1634  if ( $thislen == 2 ) {
1635  $numitalics++;
1636  } elseif ( $thislen == 3 ) {
1637  $numbold++;
1638  } elseif ( $thislen == 5 ) {
1639  $numitalics++;
1640  $numbold++;
1641  }
1642  }
1643 
1644  // If there is an odd number of both bold and italics, it is likely
1645  // that one of the bold ones was meant to be an apostrophe followed
1646  // by italics. Which one we cannot know for certain, but it is more
1647  // likely to be one that has a single-letter word before it.
1648  if ( ( $numbold % 2 == 1 ) && ( $numitalics % 2 == 1 ) ) {
1649  $firstsingleletterword = -1;
1650  $firstmultiletterword = -1;
1651  $firstspace = -1;
1652  for ( $i = 1; $i < $countarr; $i += 2 ) {
1653  if ( strlen( $arr[$i] ) == 3 ) {
1654  $x1 = substr( $arr[$i - 1], -1 );
1655  $x2 = substr( $arr[$i - 1], -2, 1 );
1656  if ( $x1 === ' ' ) {
1657  if ( $firstspace == -1 ) {
1658  $firstspace = $i;
1659  }
1660  } elseif ( $x2 === ' ' ) {
1661  $firstsingleletterword = $i;
1662  // if $firstsingleletterword is set, we don't
1663  // look at the other options, so we can bail early.
1664  break;
1665  } else {
1666  if ( $firstmultiletterword == -1 ) {
1667  $firstmultiletterword = $i;
1668  }
1669  }
1670  }
1671  }
1672 
1673  // If there is a single-letter word, use it!
1674  if ( $firstsingleletterword > -1 ) {
1675  $arr[$firstsingleletterword] = "''";
1676  $arr[$firstsingleletterword - 1] .= "'";
1677  } elseif ( $firstmultiletterword > -1 ) {
1678  // If not, but there's a multi-letter word, use that one.
1679  $arr[$firstmultiletterword] = "''";
1680  $arr[$firstmultiletterword - 1] .= "'";
1681  } elseif ( $firstspace > -1 ) {
1682  // ... otherwise use the first one that has neither.
1683  // (notice that it is possible for all three to be -1 if, for example,
1684  // there is only one pentuple-apostrophe in the line)
1685  $arr[$firstspace] = "''";
1686  $arr[$firstspace - 1] .= "'";
1687  }
1688  }
1689 
1690  // Now let's actually convert our apostrophic mush to HTML!
1691  $output = '';
1692  $buffer = '';
1693  $state = '';
1694  $i = 0;
1695  foreach ( $arr as $r ) {
1696  if ( ( $i % 2 ) == 0 ) {
1697  if ( $state === 'both' ) {
1698  $buffer .= $r;
1699  } else {
1700  $output .= $r;
1701  }
1702  } else {
1703  $thislen = strlen( $r );
1704  if ( $thislen == 2 ) {
1705  if ( $state === 'i' ) {
1706  $output .= '</i>';
1707  $state = '';
1708  } elseif ( $state === 'bi' ) {
1709  $output .= '</i>';
1710  $state = 'b';
1711  } elseif ( $state === 'ib' ) {
1712  $output .= '</b></i><b>';
1713  $state = 'b';
1714  } elseif ( $state === 'both' ) {
1715  $output .= '<b><i>' . $buffer . '</i>';
1716  $state = 'b';
1717  } else { // $state can be 'b' or ''
1718  $output .= '<i>';
1719  $state .= 'i';
1720  }
1721  } elseif ( $thislen == 3 ) {
1722  if ( $state === 'b' ) {
1723  $output .= '</b>';
1724  $state = '';
1725  } elseif ( $state === 'bi' ) {
1726  $output .= '</i></b><i>';
1727  $state = 'i';
1728  } elseif ( $state === 'ib' ) {
1729  $output .= '</b>';
1730  $state = 'i';
1731  } elseif ( $state === 'both' ) {
1732  $output .= '<i><b>' . $buffer . '</b>';
1733  $state = 'i';
1734  } else { // $state can be 'i' or ''
1735  $output .= '<b>';
1736  $state .= 'b';
1737  }
1738  } elseif ( $thislen == 5 ) {
1739  if ( $state === 'b' ) {
1740  $output .= '</b><i>';
1741  $state = 'i';
1742  } elseif ( $state === 'i' ) {
1743  $output .= '</i><b>';
1744  $state = 'b';
1745  } elseif ( $state === 'bi' ) {
1746  $output .= '</i></b>';
1747  $state = '';
1748  } elseif ( $state === 'ib' ) {
1749  $output .= '</b></i>';
1750  $state = '';
1751  } elseif ( $state === 'both' ) {
1752  $output .= '<i><b>' . $buffer . '</b></i>';
1753  $state = '';
1754  } else { // ($state == '')
1755  $buffer = '';
1756  $state = 'both';
1757  }
1758  }
1759  }
1760  $i++;
1761  }
1762  // Now close all remaining tags. Notice that the order is important.
1763  if ( $state === 'b' || $state === 'ib' ) {
1764  $output .= '</b>';
1765  }
1766  if ( $state === 'i' || $state === 'bi' || $state === 'ib' ) {
1767  $output .= '</i>';
1768  }
1769  if ( $state === 'bi' ) {
1770  $output .= '</b>';
1771  }
1772  // There might be lonely ''''', so make sure we have a buffer
1773  if ( $state === 'both' && $buffer ) {
1774  $output .= '<b><i>' . $buffer . '</i></b>';
1775  }
1776  return $output;
1777  }
1778 
1792  public function replaceExternalLinks( $text ) {
1793 
1794  $bits = preg_split( $this->mExtLinkBracketedRegex, $text, -1, PREG_SPLIT_DELIM_CAPTURE );
1795  if ( $bits === false ) {
1796  throw new MWException( "PCRE needs to be compiled with "
1797  . "--enable-unicode-properties in order for MediaWiki to function" );
1798  }
1799  $s = array_shift( $bits );
1800 
1801  $i = 0;
1802  while ( $i < count( $bits ) ) {
1803  $url = $bits[$i++];
1804  $i++; // protocol
1805  $text = $bits[$i++];
1806  $trail = $bits[$i++];
1807 
1808  # The characters '<' and '>' (which were escaped by
1809  # removeHTMLtags()) should not be included in
1810  # URLs, per RFC 2396.
1811  $m2 = [];
1812  if ( preg_match( '/&(lt|gt);/', $url, $m2, PREG_OFFSET_CAPTURE ) ) {
1813  $text = substr( $url, $m2[0][1] ) . ' ' . $text;
1814  $url = substr( $url, 0, $m2[0][1] );
1815  }
1816 
1817  # If the link text is an image URL, replace it with an <img> tag
1818  # This happened by accident in the original parser, but some people used it extensively
1819  $img = $this->maybeMakeExternalImage( $text );
1820  if ( $img !== false ) {
1821  $text = $img;
1822  }
1823 
1824  $dtrail = '';
1825 
1826  # Set linktype for CSS - if URL==text, link is essentially free
1827  $linktype = ( $text === $url ) ? 'free' : 'text';
1828 
1829  # No link text, e.g. [http://domain.tld/some.link]
1830  if ( $text == '' ) {
1831  # Autonumber
1832  $langObj = $this->getTargetLanguage();
1833  $text = '[' . $langObj->formatNum( ++$this->mAutonumber ) . ']';
1834  $linktype = 'autonumber';
1835  } else {
1836  # Have link text, e.g. [http://domain.tld/some.link text]s
1837  # Check for trail
1838  list( $dtrail, $trail ) = Linker::splitTrail( $trail );
1839  }
1840 
1841  $text = $this->getConverterLanguage()->markNoConversion( $text );
1842 
1843  $url = Sanitizer::cleanUrl( $url );
1844 
1845  # Use the encoded URL
1846  # This means that users can paste URLs directly into the text
1847  # Funny characters like ö aren't valid in URLs anyway
1848  # This was changed in August 2004
1849  $s .= Linker::makeExternalLink( $url, $text, false, $linktype,
1850  $this->getExternalLinkAttribs( $url ) ) . $dtrail . $trail;
1851 
1852  # Register link in the output object.
1853  $this->mOutput->addExternalLink( $url );
1854  }
1855 
1856  return $s;
1857  }
1858 
1868  public static function getExternalLinkRel( $url = false, $title = null ) {
1869  global $wgNoFollowLinks, $wgNoFollowNsExceptions, $wgNoFollowDomainExceptions;
1870  $ns = $title ? $title->getNamespace() : false;
1871  if ( $wgNoFollowLinks && !in_array( $ns, $wgNoFollowNsExceptions )
1872  && !wfMatchesDomainList( $url, $wgNoFollowDomainExceptions )
1873  ) {
1874  return 'nofollow';
1875  }
1876  return null;
1877  }
1878 
1889  public function getExternalLinkAttribs( $url = false ) {
1890  $attribs = [];
1891  $rel = self::getExternalLinkRel( $url, $this->mTitle );
1892 
1893  $target = $this->mOptions->getExternalLinkTarget();
1894  if ( $target ) {
1895  $attribs['target'] = $target;
1896  if ( !in_array( $target, [ '_self', '_parent', '_top' ] ) ) {
1897  // T133507. New windows can navigate parent cross-origin.
1898  // Including noreferrer due to lacking browser
1899  // support of noopener. Eventually noreferrer should be removed.
1900  if ( $rel !== '' ) {
1901  $rel .= ' ';
1902  }
1903  $rel .= 'noreferrer noopener';
1904  }
1905  }
1906  $attribs['rel'] = $rel;
1907  return $attribs;
1908  }
1909 
1917  public static function replaceUnusualEscapes( $url ) {
1918  wfDeprecated( __METHOD__, '1.24' );
1919  return self::normalizeLinkUrl( $url );
1920  }
1921 
1931  public static function normalizeLinkUrl( $url ) {
1932  # First, make sure unsafe characters are encoded
1933  $url = preg_replace_callback( '/[\x00-\x20"<>\[\\\\\]^`{|}\x7F-\xFF]/',
1934  function ( $m ) {
1935  return rawurlencode( $m[0] );
1936  },
1937  $url
1938  );
1939 
1940  $ret = '';
1941  $end = strlen( $url );
1942 
1943  # Fragment part - 'fragment'
1944  $start = strpos( $url, '#' );
1945  if ( $start !== false && $start < $end ) {
1946  $ret = self::normalizeUrlComponent(
1947  substr( $url, $start, $end - $start ), '"#%<>[\]^`{|}' ) . $ret;
1948  $end = $start;
1949  }
1950 
1951  # Query part - 'query' minus &=+;
1952  $start = strpos( $url, '?' );
1953  if ( $start !== false && $start < $end ) {
1954  $ret = self::normalizeUrlComponent(
1955  substr( $url, $start, $end - $start ), '"#%<>[\]^`{|}&=+;' ) . $ret;
1956  $end = $start;
1957  }
1958 
1959  # Scheme and path part - 'pchar'
1960  # (we assume no userinfo or encoded colons in the host)
1961  $ret = self::normalizeUrlComponent(
1962  substr( $url, 0, $end ), '"#%<>[\]^`{|}/?' ) . $ret;
1963 
1964  return $ret;
1965  }
1966 
1967  private static function normalizeUrlComponent( $component, $unsafe ) {
1968  $callback = function ( $matches ) use ( $unsafe ) {
1969  $char = urldecode( $matches[0] );
1970  $ord = ord( $char );
1971  if ( $ord > 32 && $ord < 127 && strpos( $unsafe, $char ) === false ) {
1972  # Unescape it
1973  return $char;
1974  } else {
1975  # Leave it escaped, but use uppercase for a-f
1976  return strtoupper( $matches[0] );
1977  }
1978  };
1979  return preg_replace_callback( '/%[0-9A-Fa-f]{2}/', $callback, $component );
1980  }
1981 
1990  private function maybeMakeExternalImage( $url ) {
1991  $imagesfrom = $this->mOptions->getAllowExternalImagesFrom();
1992  $imagesexception = !empty( $imagesfrom );
1993  $text = false;
1994  # $imagesfrom could be either a single string or an array of strings, parse out the latter
1995  if ( $imagesexception && is_array( $imagesfrom ) ) {
1996  $imagematch = false;
1997  foreach ( $imagesfrom as $match ) {
1998  if ( strpos( $url, $match ) === 0 ) {
1999  $imagematch = true;
2000  break;
2001  }
2002  }
2003  } elseif ( $imagesexception ) {
2004  $imagematch = ( strpos( $url, $imagesfrom ) === 0 );
2005  } else {
2006  $imagematch = false;
2007  }
2008 
2009  if ( $this->mOptions->getAllowExternalImages()
2010  || ( $imagesexception && $imagematch )
2011  ) {
2012  if ( preg_match( self::EXT_IMAGE_REGEX, $url ) ) {
2013  # Image found
2014  $text = Linker::makeExternalImage( $url );
2015  }
2016  }
2017  if ( !$text && $this->mOptions->getEnableImageWhitelist()
2018  && preg_match( self::EXT_IMAGE_REGEX, $url )
2019  ) {
2020  $whitelist = explode(
2021  "\n",
2022  wfMessage( 'external_image_whitelist' )->inContentLanguage()->text()
2023  );
2024 
2025  foreach ( $whitelist as $entry ) {
2026  # Sanitize the regex fragment, make it case-insensitive, ignore blank entries/comments
2027  if ( strpos( $entry, '#' ) === 0 || $entry === '' ) {
2028  continue;
2029  }
2030  if ( preg_match( '/' . str_replace( '/', '\\/', $entry ) . '/i', $url ) ) {
2031  # Image matches a whitelist entry
2032  $text = Linker::makeExternalImage( $url );
2033  break;
2034  }
2035  }
2036  }
2037  return $text;
2038  }
2039 
2049  public function replaceInternalLinks( $s ) {
2050  $this->mLinkHolders->merge( $this->replaceInternalLinks2( $s ) );
2051  return $s;
2052  }
2053 
2062  public function replaceInternalLinks2( &$s ) {
2064 
2065  static $tc = false, $e1, $e1_img;
2066  # the % is needed to support urlencoded titles as well
2067  if ( !$tc ) {
2068  $tc = Title::legalChars() . '#%';
2069  # Match a link having the form [[namespace:link|alternate]]trail
2070  $e1 = "/^([{$tc}]+)(?:\\|(.+?))?]](.*)\$/sD";
2071  # Match cases where there is no "]]", which might still be images
2072  $e1_img = "/^([{$tc}]+)\\|(.*)\$/sD";
2073  }
2074 
2075  $holders = new LinkHolderArray( $this );
2076 
2077  # split the entire text string on occurrences of [[
2078  $a = StringUtils::explode( '[[', ' ' . $s );
2079  # get the first element (all text up to first [[), and remove the space we added
2080  $s = $a->current();
2081  $a->next();
2082  $line = $a->current(); # Workaround for broken ArrayIterator::next() that returns "void"
2083  $s = substr( $s, 1 );
2084 
2085  $useLinkPrefixExtension = $this->getTargetLanguage()->linkPrefixExtension();
2086  $e2 = null;
2087  if ( $useLinkPrefixExtension ) {
2088  # Match the end of a line for a word that's not followed by whitespace,
2089  # e.g. in the case of 'The Arab al[[Razi]]', 'al' will be matched
2091  $charset = $wgContLang->linkPrefixCharset();
2092  $e2 = "/^((?>.*[^$charset]|))(.+)$/sDu";
2093  }
2094 
2095  if ( is_null( $this->mTitle ) ) {
2096  throw new MWException( __METHOD__ . ": \$this->mTitle is null\n" );
2097  }
2098  $nottalk = !$this->mTitle->isTalkPage();
2099 
2100  if ( $useLinkPrefixExtension ) {
2101  $m = [];
2102  if ( preg_match( $e2, $s, $m ) ) {
2103  $first_prefix = $m[2];
2104  } else {
2105  $first_prefix = false;
2106  }
2107  } else {
2108  $prefix = '';
2109  }
2110 
2111  $useSubpages = $this->areSubpagesAllowed();
2112 
2113  // @codingStandardsIgnoreStart Squiz.WhiteSpace.SemicolonSpacing.Incorrect
2114  # Loop for each link
2115  for ( ; $line !== false && $line !== null; $a->next(), $line = $a->current() ) {
2116  // @codingStandardsIgnoreEnd
2117 
2118  # Check for excessive memory usage
2119  if ( $holders->isBig() ) {
2120  # Too big
2121  # Do the existence check, replace the link holders and clear the array
2122  $holders->replace( $s );
2123  $holders->clear();
2124  }
2125 
2126  if ( $useLinkPrefixExtension ) {
2127  if ( preg_match( $e2, $s, $m ) ) {
2128  $prefix = $m[2];
2129  $s = $m[1];
2130  } else {
2131  $prefix = '';
2132  }
2133  # first link
2134  if ( $first_prefix ) {
2135  $prefix = $first_prefix;
2136  $first_prefix = false;
2137  }
2138  }
2139 
2140  $might_be_img = false;
2141 
2142  if ( preg_match( $e1, $line, $m ) ) { # page with normal text or alt
2143  $text = $m[2];
2144  # If we get a ] at the beginning of $m[3] that means we have a link that's something like:
2145  # [[Image:Foo.jpg|[http://example.com desc]]] <- having three ] in a row fucks up,
2146  # the real problem is with the $e1 regex
2147  # See bug 1300.
2148  # Still some problems for cases where the ] is meant to be outside punctuation,
2149  # and no image is in sight. See bug 2095.
2150  if ( $text !== ''
2151  && substr( $m[3], 0, 1 ) === ']'
2152  && strpos( $text, '[' ) !== false
2153  ) {
2154  $text .= ']'; # so that replaceExternalLinks($text) works later
2155  $m[3] = substr( $m[3], 1 );
2156  }
2157  # fix up urlencoded title texts
2158  if ( strpos( $m[1], '%' ) !== false ) {
2159  # Should anchors '#' also be rejected?
2160  $m[1] = str_replace( [ '<', '>' ], [ '&lt;', '&gt;' ], rawurldecode( $m[1] ) );
2161  }
2162  $trail = $m[3];
2163  } elseif ( preg_match( $e1_img, $line, $m ) ) {
2164  # Invalid, but might be an image with a link in its caption
2165  $might_be_img = true;
2166  $text = $m[2];
2167  if ( strpos( $m[1], '%' ) !== false ) {
2168  $m[1] = str_replace( [ '<', '>' ], [ '&lt;', '&gt;' ], rawurldecode( $m[1] ) );
2169  }
2170  $trail = "";
2171  } else { # Invalid form; output directly
2172  $s .= $prefix . '[[' . $line;
2173  continue;
2174  }
2175 
2176  $origLink = $m[1];
2177 
2178  # Don't allow internal links to pages containing
2179  # PROTO: where PROTO is a valid URL protocol; these
2180  # should be external links.
2181  if ( preg_match( '/^(?i:' . $this->mUrlProtocols . ')/', $origLink ) ) {
2182  $s .= $prefix . '[[' . $line;
2183  continue;
2184  }
2185 
2186  # Make subpage if necessary
2187  if ( $useSubpages ) {
2188  $link = $this->maybeDoSubpageLink( $origLink, $text );
2189  } else {
2190  $link = $origLink;
2191  }
2192 
2193  $noforce = ( substr( $origLink, 0, 1 ) !== ':' );
2194  if ( !$noforce ) {
2195  # Strip off leading ':'
2196  $link = substr( $link, 1 );
2197  }
2198 
2199  $unstrip = $this->mStripState->unstripNoWiki( $link );
2200  $nt = is_string( $unstrip ) ? Title::newFromText( $unstrip ) : null;
2201  if ( $nt === null ) {
2202  $s .= $prefix . '[[' . $line;
2203  continue;
2204  }
2205 
2206  $ns = $nt->getNamespace();
2207  $iw = $nt->getInterwiki();
2208 
2209  if ( $might_be_img ) { # if this is actually an invalid link
2210  if ( $ns == NS_FILE && $noforce ) { # but might be an image
2211  $found = false;
2212  while ( true ) {
2213  # look at the next 'line' to see if we can close it there
2214  $a->next();
2215  $next_line = $a->current();
2216  if ( $next_line === false || $next_line === null ) {
2217  break;
2218  }
2219  $m = explode( ']]', $next_line, 3 );
2220  if ( count( $m ) == 3 ) {
2221  # the first ]] closes the inner link, the second the image
2222  $found = true;
2223  $text .= "[[{$m[0]}]]{$m[1]}";
2224  $trail = $m[2];
2225  break;
2226  } elseif ( count( $m ) == 2 ) {
2227  # if there's exactly one ]] that's fine, we'll keep looking
2228  $text .= "[[{$m[0]}]]{$m[1]}";
2229  } else {
2230  # if $next_line is invalid too, we need look no further
2231  $text .= '[[' . $next_line;
2232  break;
2233  }
2234  }
2235  if ( !$found ) {
2236  # we couldn't find the end of this imageLink, so output it raw
2237  # but don't ignore what might be perfectly normal links in the text we've examined
2238  $holders->merge( $this->replaceInternalLinks2( $text ) );
2239  $s .= "{$prefix}[[$link|$text";
2240  # note: no $trail, because without an end, there *is* no trail
2241  continue;
2242  }
2243  } else { # it's not an image, so output it raw
2244  $s .= "{$prefix}[[$link|$text";
2245  # note: no $trail, because without an end, there *is* no trail
2246  continue;
2247  }
2248  }
2249 
2250  $wasblank = ( $text == '' );
2251  if ( $wasblank ) {
2252  $text = $link;
2253  } else {
2254  # Bug 4598 madness. Handle the quotes only if they come from the alternate part
2255  # [[Lista d''e paise d''o munno]] -> <a href="...">Lista d''e paise d''o munno</a>
2256  # [[Criticism of Harry Potter|Criticism of ''Harry Potter'']]
2257  # -> <a href="Criticism of Harry Potter">Criticism of <i>Harry Potter</i></a>
2258  $text = $this->doQuotes( $text );
2259  }
2260 
2261  # Link not escaped by : , create the various objects
2262  if ( $noforce && !$nt->wasLocalInterwiki() ) {
2263  # Interwikis
2264  if (
2265  $iw && $this->mOptions->getInterwikiMagic() && $nottalk && (
2266  Language::fetchLanguageName( $iw, null, 'mw' ) ||
2267  in_array( $iw, $wgExtraInterlanguageLinkPrefixes )
2268  )
2269  ) {
2270  # Bug 24502: filter duplicates
2271  if ( !isset( $this->mLangLinkLanguages[$iw] ) ) {
2272  $this->mLangLinkLanguages[$iw] = true;
2273  $this->mOutput->addLanguageLink( $nt->getFullText() );
2274  }
2275 
2276  $s = rtrim( $s . $prefix );
2277  $s .= trim( $trail, "\n" ) == '' ? '': $prefix . $trail;
2278  continue;
2279  }
2280 
2281  if ( $ns == NS_FILE ) {
2282  if ( !wfIsBadImage( $nt->getDBkey(), $this->mTitle ) ) {
2283  if ( $wasblank ) {
2284  # if no parameters were passed, $text
2285  # becomes something like "File:Foo.png",
2286  # which we don't want to pass on to the
2287  # image generator
2288  $text = '';
2289  } else {
2290  # recursively parse links inside the image caption
2291  # actually, this will parse them in any other parameters, too,
2292  # but it might be hard to fix that, and it doesn't matter ATM
2293  $text = $this->replaceExternalLinks( $text );
2294  $holders->merge( $this->replaceInternalLinks2( $text ) );
2295  }
2296  # cloak any absolute URLs inside the image markup, so replaceExternalLinks() won't touch them
2297  $s .= $prefix . $this->armorLinks(
2298  $this->makeImage( $nt, $text, $holders ) ) . $trail;
2299  } else {
2300  $s .= $prefix . $trail;
2301  }
2302  continue;
2303  }
2304 
2305  if ( $ns == NS_CATEGORY ) {
2306  $s = rtrim( $s . "\n" ); # bug 87
2307 
2308  if ( $wasblank ) {
2309  $sortkey = $this->getDefaultSort();
2310  } else {
2311  $sortkey = $text;
2312  }
2313  $sortkey = Sanitizer::decodeCharReferences( $sortkey );
2314  $sortkey = str_replace( "\n", '', $sortkey );
2315  $sortkey = $this->getConverterLanguage()->convertCategoryKey( $sortkey );
2316  $this->mOutput->addCategory( $nt->getDBkey(), $sortkey );
2317 
2321  $s .= trim( $prefix . $trail, "\n" ) == '' ? '' : $prefix . $trail;
2322 
2323  continue;
2324  }
2325  }
2326 
2327  # Self-link checking. For some languages, variants of the title are checked in
2328  # LinkHolderArray::doVariants() to allow batching the existence checks necessary
2329  # for linking to a different variant.
2330  if ( $ns != NS_SPECIAL && $nt->equals( $this->mTitle ) && !$nt->hasFragment() ) {
2331  $s .= $prefix . Linker::makeSelfLinkObj( $nt, $text, '', $trail );
2332  continue;
2333  }
2334 
2335  # NS_MEDIA is a pseudo-namespace for linking directly to a file
2336  # @todo FIXME: Should do batch file existence checks, see comment below
2337  if ( $ns == NS_MEDIA ) {
2338  # Give extensions a chance to select the file revision for us
2339  $options = [];
2340  $descQuery = false;
2341  Hooks::run( 'BeforeParserFetchFileAndTitle',
2342  [ $this, $nt, &$options, &$descQuery ] );
2343  # Fetch and register the file (file title may be different via hooks)
2344  list( $file, $nt ) = $this->fetchFileAndTitle( $nt, $options );
2345  # Cloak with NOPARSE to avoid replacement in replaceExternalLinks
2346  $s .= $prefix . $this->armorLinks(
2347  Linker::makeMediaLinkFile( $nt, $file, $text ) ) . $trail;
2348  continue;
2349  }
2350 
2351  # Some titles, such as valid special pages or files in foreign repos, should
2352  # be shown as bluelinks even though they're not included in the page table
2353  # @todo FIXME: isAlwaysKnown() can be expensive for file links; we should really do
2354  # batch file existence checks for NS_FILE and NS_MEDIA
2355  if ( $iw == '' && $nt->isAlwaysKnown() ) {
2356  $this->mOutput->addLink( $nt );
2357  $s .= $this->makeKnownLinkHolder( $nt, $text, [], $trail, $prefix );
2358  } else {
2359  # Links will be added to the output link list after checking
2360  $s .= $holders->makeHolder( $nt, $text, [], $trail, $prefix );
2361  }
2362  }
2363  return $holders;
2364  }
2365 
2380  public function makeKnownLinkHolder( $nt, $text = '', $query = [], $trail = '', $prefix = '' ) {
2381  list( $inside, $trail ) = Linker::splitTrail( $trail );
2382 
2383  if ( is_string( $query ) ) {
2384  $query = wfCgiToArray( $query );
2385  }
2386  if ( $text == '' ) {
2387  $text = htmlspecialchars( $nt->getPrefixedText() );
2388  }
2389 
2390  $link = Linker::linkKnown( $nt, "$prefix$text$inside", [], $query );
2391 
2392  return $this->armorLinks( $link ) . $trail;
2393  }
2394 
2405  public function armorLinks( $text ) {
2406  return preg_replace( '/\b((?i)' . $this->mUrlProtocols . ')/',
2407  self::MARKER_PREFIX . "NOPARSE$1", $text );
2408  }
2409 
2414  public function areSubpagesAllowed() {
2415  # Some namespaces don't allow subpages
2416  return MWNamespace::hasSubpages( $this->mTitle->getNamespace() );
2417  }
2418 
2427  public function maybeDoSubpageLink( $target, &$text ) {
2428  return Linker::normalizeSubpageLink( $this->mTitle, $target, $text );
2429  }
2430 
2437  public function closeParagraph() {
2438  $result = '';
2439  if ( $this->mLastSection != '' ) {
2440  $result = '</' . $this->mLastSection . ">\n";
2441  }
2442  $this->mInPre = false;
2443  $this->mLastSection = '';
2444  return $result;
2445  }
2446 
2457  public function getCommon( $st1, $st2 ) {
2458  $fl = strlen( $st1 );
2459  $shorter = strlen( $st2 );
2460  if ( $fl < $shorter ) {
2461  $shorter = $fl;
2462  }
2463 
2464  for ( $i = 0; $i < $shorter; ++$i ) {
2465  if ( $st1[$i] != $st2[$i] ) {
2466  break;
2467  }
2468  }
2469  return $i;
2470  }
2471 
2481  public function openList( $char ) {
2482  $result = $this->closeParagraph();
2483 
2484  if ( '*' === $char ) {
2485  $result .= "<ul><li>";
2486  } elseif ( '#' === $char ) {
2487  $result .= "<ol><li>";
2488  } elseif ( ':' === $char ) {
2489  $result .= "<dl><dd>";
2490  } elseif ( ';' === $char ) {
2491  $result .= "<dl><dt>";
2492  $this->mDTopen = true;
2493  } else {
2494  $result = '<!-- ERR 1 -->';
2495  }
2496 
2497  return $result;
2498  }
2499 
2507  public function nextItem( $char ) {
2508  if ( '*' === $char || '#' === $char ) {
2509  return "</li>\n<li>";
2510  } elseif ( ':' === $char || ';' === $char ) {
2511  $close = "</dd>\n";
2512  if ( $this->mDTopen ) {
2513  $close = "</dt>\n";
2514  }
2515  if ( ';' === $char ) {
2516  $this->mDTopen = true;
2517  return $close . '<dt>';
2518  } else {
2519  $this->mDTopen = false;
2520  return $close . '<dd>';
2521  }
2522  }
2523  return '<!-- ERR 2 -->';
2524  }
2525 
2533  public function closeList( $char ) {
2534  if ( '*' === $char ) {
2535  $text = "</li></ul>";
2536  } elseif ( '#' === $char ) {
2537  $text = "</li></ol>";
2538  } elseif ( ':' === $char ) {
2539  if ( $this->mDTopen ) {
2540  $this->mDTopen = false;
2541  $text = "</dt></dl>";
2542  } else {
2543  $text = "</dd></dl>";
2544  }
2545  } else {
2546  return '<!-- ERR 3 -->';
2547  }
2548  return $text;
2549  }
2560  public function doBlockLevels( $text, $linestart ) {
2561 
2562  # Parsing through the text line by line. The main thing
2563  # happening here is handling of block-level elements p, pre,
2564  # and making lists from lines starting with * # : etc.
2565  $textLines = StringUtils::explode( "\n", $text );
2566 
2567  $lastPrefix = $output = '';
2568  $this->mDTopen = $inBlockElem = false;
2569  $prefixLength = 0;
2570  $paragraphStack = false;
2571  $inBlockquote = false;
2572 
2573  foreach ( $textLines as $oLine ) {
2574  # Fix up $linestart
2575  if ( !$linestart ) {
2576  $output .= $oLine;
2577  $linestart = true;
2578  continue;
2579  }
2580  # * = ul
2581  # # = ol
2582  # ; = dt
2583  # : = dd
2584 
2585  $lastPrefixLength = strlen( $lastPrefix );
2586  $preCloseMatch = preg_match( '/<\\/pre/i', $oLine );
2587  $preOpenMatch = preg_match( '/<pre/i', $oLine );
2588  # If not in a <pre> element, scan for and figure out what prefixes are there.
2589  if ( !$this->mInPre ) {
2590  # Multiple prefixes may abut each other for nested lists.
2591  $prefixLength = strspn( $oLine, '*#:;' );
2592  $prefix = substr( $oLine, 0, $prefixLength );
2593 
2594  # eh?
2595  # ; and : are both from definition-lists, so they're equivalent
2596  # for the purposes of determining whether or not we need to open/close
2597  # elements.
2598  $prefix2 = str_replace( ';', ':', $prefix );
2599  $t = substr( $oLine, $prefixLength );
2600  $this->mInPre = (bool)$preOpenMatch;
2601  } else {
2602  # Don't interpret any other prefixes in preformatted text
2603  $prefixLength = 0;
2604  $prefix = $prefix2 = '';
2605  $t = $oLine;
2606  }
2607 
2608  # List generation
2609  if ( $prefixLength && $lastPrefix === $prefix2 ) {
2610  # Same as the last item, so no need to deal with nesting or opening stuff
2611  $output .= $this->nextItem( substr( $prefix, -1 ) );
2612  $paragraphStack = false;
2613 
2614  if ( substr( $prefix, -1 ) === ';' ) {
2615  # The one nasty exception: definition lists work like this:
2616  # ; title : definition text
2617  # So we check for : in the remainder text to split up the
2618  # title and definition, without b0rking links.
2619  $term = $t2 = '';
2620  if ( $this->findColonNoLinks( $t, $term, $t2 ) !== false ) {
2621  $t = $t2;
2622  $output .= $term . $this->nextItem( ':' );
2623  }
2624  }
2625  } elseif ( $prefixLength || $lastPrefixLength ) {
2626  # We need to open or close prefixes, or both.
2627 
2628  # Either open or close a level...
2629  $commonPrefixLength = $this->getCommon( $prefix, $lastPrefix );
2630  $paragraphStack = false;
2631 
2632  # Close all the prefixes which aren't shared.
2633  while ( $commonPrefixLength < $lastPrefixLength ) {
2634  $output .= $this->closeList( $lastPrefix[$lastPrefixLength - 1] );
2635  --$lastPrefixLength;
2636  }
2637 
2638  # Continue the current prefix if appropriate.
2639  if ( $prefixLength <= $commonPrefixLength && $commonPrefixLength > 0 ) {
2640  $output .= $this->nextItem( $prefix[$commonPrefixLength - 1] );
2641  }
2642 
2643  # Open prefixes where appropriate.
2644  if ( $lastPrefix && $prefixLength > $commonPrefixLength ) {
2645  $output .= "\n";
2646  }
2647  while ( $prefixLength > $commonPrefixLength ) {
2648  $char = substr( $prefix, $commonPrefixLength, 1 );
2649  $output .= $this->openList( $char );
2650 
2651  if ( ';' === $char ) {
2652  # @todo FIXME: This is dupe of code above
2653  if ( $this->findColonNoLinks( $t, $term, $t2 ) !== false ) {
2654  $t = $t2;
2655  $output .= $term . $this->nextItem( ':' );
2656  }
2657  }
2658  ++$commonPrefixLength;
2659  }
2660  if ( !$prefixLength && $lastPrefix ) {
2661  $output .= "\n";
2662  }
2663  $lastPrefix = $prefix2;
2664  }
2665 
2666  # If we have no prefixes, go to paragraph mode.
2667  if ( 0 == $prefixLength ) {
2668  # No prefix (not in list)--go to paragraph mode
2669  # XXX: use a stack for nestable elements like span, table and div
2670  $openmatch = preg_match(
2671  '/(?:<table|<h1|<h2|<h3|<h4|<h5|<h6|<pre|<tr|'
2672  . '<p|<ul|<ol|<dl|<li|<\\/tr|<\\/td|<\\/th)/iS',
2673  $t
2674  );
2675  $closematch = preg_match(
2676  '/(?:<\\/table|<\\/h1|<\\/h2|<\\/h3|<\\/h4|<\\/h5|<\\/h6|'
2677  . '<td|<th|<\\/?blockquote|<\\/?div|<hr|<\\/pre|<\\/p|<\\/mw:|'
2678  . self::MARKER_PREFIX
2679  . '-pre|<\\/li|<\\/ul|<\\/ol|<\\/dl|<\\/?center)/iS',
2680  $t
2681  );
2682 
2683  if ( $openmatch || $closematch ) {
2684  $paragraphStack = false;
2685  # @todo bug 5718: paragraph closed
2686  $output .= $this->closeParagraph();
2687  if ( $preOpenMatch && !$preCloseMatch ) {
2688  $this->mInPre = true;
2689  }
2690  $bqOffset = 0;
2691  while ( preg_match( '/<(\\/?)blockquote[\s>]/i', $t,
2692  $bqMatch, PREG_OFFSET_CAPTURE, $bqOffset )
2693  ) {
2694  $inBlockquote = !$bqMatch[1][0]; // is this a close tag?
2695  $bqOffset = $bqMatch[0][1] + strlen( $bqMatch[0][0] );
2696  }
2697  $inBlockElem = !$closematch;
2698  } elseif ( !$inBlockElem && !$this->mInPre ) {
2699  if ( ' ' == substr( $t, 0, 1 )
2700  && ( $this->mLastSection === 'pre' || trim( $t ) != '' )
2701  && !$inBlockquote
2702  ) {
2703  # pre
2704  if ( $this->mLastSection !== 'pre' ) {
2705  $paragraphStack = false;
2706  $output .= $this->closeParagraph() . '<pre>';
2707  $this->mLastSection = 'pre';
2708  }
2709  $t = substr( $t, 1 );
2710  } else {
2711  # paragraph
2712  if ( trim( $t ) === '' ) {
2713  if ( $paragraphStack ) {
2714  $output .= $paragraphStack . '<br />';
2715  $paragraphStack = false;
2716  $this->mLastSection = 'p';
2717  } else {
2718  if ( $this->mLastSection !== 'p' ) {
2719  $output .= $this->closeParagraph();
2720  $this->mLastSection = '';
2721  $paragraphStack = '<p>';
2722  } else {
2723  $paragraphStack = '</p><p>';
2724  }
2725  }
2726  } else {
2727  if ( $paragraphStack ) {
2728  $output .= $paragraphStack;
2729  $paragraphStack = false;
2730  $this->mLastSection = 'p';
2731  } elseif ( $this->mLastSection !== 'p' ) {
2732  $output .= $this->closeParagraph() . '<p>';
2733  $this->mLastSection = 'p';
2734  }
2735  }
2736  }
2737  }
2738  }
2739  # somewhere above we forget to get out of pre block (bug 785)
2740  if ( $preCloseMatch && $this->mInPre ) {
2741  $this->mInPre = false;
2742  }
2743  if ( $paragraphStack === false ) {
2744  $output .= $t;
2745  if ( $prefixLength === 0 ) {
2746  $output .= "\n";
2747  }
2748  }
2749  }
2750  while ( $prefixLength ) {
2751  $output .= $this->closeList( $prefix2[$prefixLength - 1] );
2752  --$prefixLength;
2753  if ( !$prefixLength ) {
2754  $output .= "\n";
2755  }
2756  }
2757  if ( $this->mLastSection != '' ) {
2758  $output .= '</' . $this->mLastSection . '>';
2759  $this->mLastSection = '';
2760  }
2761 
2762  return $output;
2763  }
2764 
2775  public function findColonNoLinks( $str, &$before, &$after ) {
2776 
2777  $pos = strpos( $str, ':' );
2778  if ( $pos === false ) {
2779  # Nothing to find!
2780  return false;
2781  }
2782 
2783  $lt = strpos( $str, '<' );
2784  if ( $lt === false || $lt > $pos ) {
2785  # Easy; no tag nesting to worry about
2786  $before = substr( $str, 0, $pos );
2787  $after = substr( $str, $pos + 1 );
2788  return $pos;
2789  }
2790 
2791  # Ugly state machine to walk through avoiding tags.
2792  $state = self::COLON_STATE_TEXT;
2793  $stack = 0;
2794  $len = strlen( $str );
2795  for ( $i = 0; $i < $len; $i++ ) {
2796  $c = $str[$i];
2797 
2798  switch ( $state ) {
2799  # (Using the number is a performance hack for common cases)
2800  case 0: # self::COLON_STATE_TEXT:
2801  switch ( $c ) {
2802  case "<":
2803  # Could be either a <start> tag or an </end> tag
2804  $state = self::COLON_STATE_TAGSTART;
2805  break;
2806  case ":":
2807  if ( $stack == 0 ) {
2808  # We found it!
2809  $before = substr( $str, 0, $i );
2810  $after = substr( $str, $i + 1 );
2811  return $i;
2812  }
2813  # Embedded in a tag; don't break it.
2814  break;
2815  default:
2816  # Skip ahead looking for something interesting
2817  $colon = strpos( $str, ':', $i );
2818  if ( $colon === false ) {
2819  # Nothing else interesting
2820  return false;
2821  }
2822  $lt = strpos( $str, '<', $i );
2823  if ( $stack === 0 ) {
2824  if ( $lt === false || $colon < $lt ) {
2825  # We found it!
2826  $before = substr( $str, 0, $colon );
2827  $after = substr( $str, $colon + 1 );
2828  return $i;
2829  }
2830  }
2831  if ( $lt === false ) {
2832  # Nothing else interesting to find; abort!
2833  # We're nested, but there's no close tags left. Abort!
2834  break 2;
2835  }
2836  # Skip ahead to next tag start
2837  $i = $lt;
2838  $state = self::COLON_STATE_TAGSTART;
2839  }
2840  break;
2841  case 1: # self::COLON_STATE_TAG:
2842  # In a <tag>
2843  switch ( $c ) {
2844  case ">":
2845  $stack++;
2846  $state = self::COLON_STATE_TEXT;
2847  break;
2848  case "/":
2849  # Slash may be followed by >?
2850  $state = self::COLON_STATE_TAGSLASH;
2851  break;
2852  default:
2853  # ignore
2854  }
2855  break;
2856  case 2: # self::COLON_STATE_TAGSTART:
2857  switch ( $c ) {
2858  case "/":
2859  $state = self::COLON_STATE_CLOSETAG;
2860  break;
2861  case "!":
2862  $state = self::COLON_STATE_COMMENT;
2863  break;
2864  case ">":
2865  # Illegal early close? This shouldn't happen D:
2866  $state = self::COLON_STATE_TEXT;
2867  break;
2868  default:
2869  $state = self::COLON_STATE_TAG;
2870  }
2871  break;
2872  case 3: # self::COLON_STATE_CLOSETAG:
2873  # In a </tag>
2874  if ( $c === ">" ) {
2875  $stack--;
2876  if ( $stack < 0 ) {
2877  wfDebug( __METHOD__ . ": Invalid input; too many close tags\n" );
2878  return false;
2879  }
2880  $state = self::COLON_STATE_TEXT;
2881  }
2882  break;
2883  case self::COLON_STATE_TAGSLASH:
2884  if ( $c === ">" ) {
2885  # Yes, a self-closed tag <blah/>
2886  $state = self::COLON_STATE_TEXT;
2887  } else {
2888  # Probably we're jumping the gun, and this is an attribute
2889  $state = self::COLON_STATE_TAG;
2890  }
2891  break;
2892  case 5: # self::COLON_STATE_COMMENT:
2893  if ( $c === "-" ) {
2894  $state = self::COLON_STATE_COMMENTDASH;
2895  }
2896  break;
2897  case self::COLON_STATE_COMMENTDASH:
2898  if ( $c === "-" ) {
2899  $state = self::COLON_STATE_COMMENTDASHDASH;
2900  } else {
2901  $state = self::COLON_STATE_COMMENT;
2902  }
2903  break;
2904  case self::COLON_STATE_COMMENTDASHDASH:
2905  if ( $c === ">" ) {
2906  $state = self::COLON_STATE_TEXT;
2907  } else {
2908  $state = self::COLON_STATE_COMMENT;
2909  }
2910  break;
2911  default:
2912  throw new MWException( "State machine error in " . __METHOD__ );
2913  }
2914  }
2915  if ( $stack > 0 ) {
2916  wfDebug( __METHOD__ . ": Invalid input; not enough close tags (stack $stack, state $state)\n" );
2917  return false;
2918  }
2919  return false;
2920  }
2921 
2933  public function getVariableValue( $index, $frame = false ) {
2936 
2937  if ( is_null( $this->mTitle ) ) {
2938  // If no title set, bad things are going to happen
2939  // later. Title should always be set since this
2940  // should only be called in the middle of a parse
2941  // operation (but the unit-tests do funky stuff)
2942  throw new MWException( __METHOD__ . ' Should only be '
2943  . ' called while parsing (no title set)' );
2944  }
2945 
2946  // Avoid PHP 7.1 warning from passing $this by reference
2947  $parser = $this;
2948 
2953  if ( Hooks::run( 'ParserGetVariableValueVarCache', [ &$parser, &$this->mVarCache ] ) ) {
2954  if ( isset( $this->mVarCache[$index] ) ) {
2955  return $this->mVarCache[$index];
2956  }
2957  }
2958 
2959  $ts = wfTimestamp( TS_UNIX, $this->mOptions->getTimestamp() );
2960  Hooks::run( 'ParserGetVariableValueTs', [ &$parser, &$ts ] );
2961 
2962  $pageLang = $this->getFunctionLang();
2963 
2964  switch ( $index ) {
2965  case '!':
2966  $value = '|';
2967  break;
2968  case 'currentmonth':
2969  $value = $pageLang->formatNum( MWTimestamp::getInstance( $ts )->format( 'm' ) );
2970  break;
2971  case 'currentmonth1':
2972  $value = $pageLang->formatNum( MWTimestamp::getInstance( $ts )->format( 'n' ) );
2973  break;
2974  case 'currentmonthname':
2975  $value = $pageLang->getMonthName( MWTimestamp::getInstance( $ts )->format( 'n' ) );
2976  break;
2977  case 'currentmonthnamegen':
2978  $value = $pageLang->getMonthNameGen( MWTimestamp::getInstance( $ts )->format( 'n' ) );
2979  break;
2980  case 'currentmonthabbrev':
2981  $value = $pageLang->getMonthAbbreviation( MWTimestamp::getInstance( $ts )->format( 'n' ) );
2982  break;
2983  case 'currentday':
2984  $value = $pageLang->formatNum( MWTimestamp::getInstance( $ts )->format( 'j' ) );
2985  break;
2986  case 'currentday2':
2987  $value = $pageLang->formatNum( MWTimestamp::getInstance( $ts )->format( 'd' ) );
2988  break;
2989  case 'localmonth':
2990  $value = $pageLang->formatNum( MWTimestamp::getLocalInstance( $ts )->format( 'm' ) );
2991  break;
2992  case 'localmonth1':
2993  $value = $pageLang->formatNum( MWTimestamp::getLocalInstance( $ts )->format( 'n' ) );
2994  break;
2995  case 'localmonthname':
2996  $value = $pageLang->getMonthName( MWTimestamp::getLocalInstance( $ts )->format( 'n' ) );
2997  break;
2998  case 'localmonthnamegen':
2999  $value = $pageLang->getMonthNameGen( MWTimestamp::getLocalInstance( $ts )->format( 'n' ) );
3000  break;
3001  case 'localmonthabbrev':
3002  $value = $pageLang->getMonthAbbreviation( MWTimestamp::getLocalInstance( $ts )->format( 'n' ) );
3003  break;
3004  case 'localday':
3005  $value = $pageLang->formatNum( MWTimestamp::getLocalInstance( $ts )->format( 'j' ) );
3006  break;
3007  case 'localday2':
3008  $value = $pageLang->formatNum( MWTimestamp::getLocalInstance( $ts )->format( 'd' ) );
3009  break;
3010  case 'pagename':
3011  $value = wfEscapeWikiText( $this->mTitle->getText() );
3012  break;
3013  case 'pagenamee':
3014  $value = wfEscapeWikiText( $this->mTitle->getPartialURL() );
3015  break;
3016  case 'fullpagename':
3017  $value = wfEscapeWikiText( $this->mTitle->getPrefixedText() );
3018  break;
3019  case 'fullpagenamee':
3020  $value = wfEscapeWikiText( $this->mTitle->getPrefixedURL() );
3021  break;
3022  case 'subpagename':
3023  $value = wfEscapeWikiText( $this->mTitle->getSubpageText() );
3024  break;
3025  case 'subpagenamee':
3026  $value = wfEscapeWikiText( $this->mTitle->getSubpageUrlForm() );
3027  break;
3028  case 'rootpagename':
3029  $value = wfEscapeWikiText( $this->mTitle->getRootText() );
3030  break;
3031  case 'rootpagenamee':
3032  $value = wfEscapeWikiText( wfUrlencode( str_replace(
3033  ' ',
3034  '_',
3035  $this->mTitle->getRootText()
3036  ) ) );
3037  break;
3038  case 'basepagename':
3039  $value = wfEscapeWikiText( $this->mTitle->getBaseText() );
3040  break;
3041  case 'basepagenamee':
3042  $value = wfEscapeWikiText( wfUrlencode( str_replace(
3043  ' ',
3044  '_',
3045  $this->mTitle->getBaseText()
3046  ) ) );
3047  break;
3048  case 'talkpagename':
3049  if ( $this->mTitle->canTalk() ) {
3050  $talkPage = $this->mTitle->getTalkPage();
3051  $value = wfEscapeWikiText( $talkPage->getPrefixedText() );
3052  } else {
3053  $value = '';
3054  }
3055  break;
3056  case 'talkpagenamee':
3057  if ( $this->mTitle->canTalk() ) {
3058  $talkPage = $this->mTitle->getTalkPage();
3059  $value = wfEscapeWikiText( $talkPage->getPrefixedURL() );
3060  } else {
3061  $value = '';
3062  }
3063  break;
3064  case 'subjectpagename':
3065  $subjPage = $this->mTitle->getSubjectPage();
3066  $value = wfEscapeWikiText( $subjPage->getPrefixedText() );
3067  break;
3068  case 'subjectpagenamee':
3069  $subjPage = $this->mTitle->getSubjectPage();
3070  $value = wfEscapeWikiText( $subjPage->getPrefixedURL() );
3071  break;
3072  case 'pageid': // requested in bug 23427
3073  $pageid = $this->getTitle()->getArticleID();
3074  if ( $pageid == 0 ) {
3075  # 0 means the page doesn't exist in the database,
3076  # which means the user is previewing a new page.
3077  # The vary-revision flag must be set, because the magic word
3078  # will have a different value once the page is saved.
3079  $this->mOutput->setFlag( 'vary-revision' );
3080  wfDebug( __METHOD__ . ": {{PAGEID}} used in a new page, setting vary-revision...\n" );
3081  }
3082  $value = $pageid ? $pageid : null;
3083  break;
3084  case 'revisionid':
3085  # Let the edit saving system know we should parse the page
3086  # *after* a revision ID has been assigned.
3087  $this->mOutput->setFlag( 'vary-revision' );
3088  wfDebug( __METHOD__ . ": {{REVISIONID}} used, setting vary-revision...\n" );
3089  $value = $this->mRevisionId;
3090  break;
3091  case 'revisionday':
3092  # Let the edit saving system know we should parse the page
3093  # *after* a revision ID has been assigned. This is for null edits.
3094  $this->mOutput->setFlag( 'vary-revision' );
3095  wfDebug( __METHOD__ . ": {{REVISIONDAY}} used, setting vary-revision...\n" );
3096  $value = intval( substr( $this->getRevisionTimestamp(), 6, 2 ) );
3097  break;
3098  case 'revisionday2':
3099  # Let the edit saving system know we should parse the page
3100  # *after* a revision ID has been assigned. This is for null edits.
3101  $this->mOutput->setFlag( 'vary-revision' );
3102  wfDebug( __METHOD__ . ": {{REVISIONDAY2}} used, setting vary-revision...\n" );
3103  $value = substr( $this->getRevisionTimestamp(), 6, 2 );
3104  break;
3105  case 'revisionmonth':
3106  # Let the edit saving system know we should parse the page
3107  # *after* a revision ID has been assigned. This is for null edits.
3108  $this->mOutput->setFlag( 'vary-revision' );
3109  wfDebug( __METHOD__ . ": {{REVISIONMONTH}} used, setting vary-revision...\n" );
3110  $value = substr( $this->getRevisionTimestamp(), 4, 2 );
3111  break;
3112  case 'revisionmonth1':
3113  # Let the edit saving system know we should parse the page
3114  # *after* a revision ID has been assigned. This is for null edits.
3115  $this->mOutput->setFlag( 'vary-revision' );
3116  wfDebug( __METHOD__ . ": {{REVISIONMONTH1}} used, setting vary-revision...\n" );
3117  $value = intval( substr( $this->getRevisionTimestamp(), 4, 2 ) );
3118  break;
3119  case 'revisionyear':
3120  # Let the edit saving system know we should parse the page
3121  # *after* a revision ID has been assigned. This is for null edits.
3122  $this->mOutput->setFlag( 'vary-revision' );
3123  wfDebug( __METHOD__ . ": {{REVISIONYEAR}} used, setting vary-revision...\n" );
3124  $value = substr( $this->getRevisionTimestamp(), 0, 4 );
3125  break;
3126  case 'revisiontimestamp':
3127  # Let the edit saving system know we should parse the page
3128  # *after* a revision ID has been assigned. This is for null edits.
3129  $this->mOutput->setFlag( 'vary-revision' );
3130  wfDebug( __METHOD__ . ": {{REVISIONTIMESTAMP}} used, setting vary-revision...\n" );
3131  $value = $this->getRevisionTimestamp();
3132  break;
3133  case 'revisionuser':
3134  # Let the edit saving system know we should parse the page
3135  # *after* a revision ID has been assigned. This is for null edits.
3136  $this->mOutput->setFlag( 'vary-revision' );
3137  wfDebug( __METHOD__ . ": {{REVISIONUSER}} used, setting vary-revision...\n" );
3138  $value = $this->getRevisionUser();
3139  break;
3140  case 'revisionsize':
3141  # Let the edit saving system know we should parse the page
3142  # *after* a revision ID has been assigned. This is for null edits.
3143  $this->mOutput->setFlag( 'vary-revision' );
3144  wfDebug( __METHOD__ . ": {{REVISIONSIZE}} used, setting vary-revision...\n" );
3145  $value = $this->getRevisionSize();
3146  break;
3147  case 'namespace':
3148  $value = str_replace( '_', ' ', $wgContLang->getNsText( $this->mTitle->getNamespace() ) );
3149  break;
3150  case 'namespacee':
3151  $value = wfUrlencode( $wgContLang->getNsText( $this->mTitle->getNamespace() ) );
3152  break;
3153  case 'namespacenumber':
3154  $value = $this->mTitle->getNamespace();
3155  break;
3156  case 'talkspace':
3157  $value = $this->mTitle->canTalk()
3158  ? str_replace( '_', ' ', $this->mTitle->getTalkNsText() )
3159  : '';
3160  break;
3161  case 'talkspacee':
3162  $value = $this->mTitle->canTalk() ? wfUrlencode( $this->mTitle->getTalkNsText() ) : '';
3163  break;
3164  case 'subjectspace':
3165  $value = str_replace( '_', ' ', $this->mTitle->getSubjectNsText() );
3166  break;
3167  case 'subjectspacee':
3168  $value = ( wfUrlencode( $this->mTitle->getSubjectNsText() ) );
3169  break;
3170  case 'currentdayname':
3171  $value = $pageLang->getWeekdayName( (int)MWTimestamp::getInstance( $ts )->format( 'w' ) + 1 );
3172  break;
3173  case 'currentyear':
3174  $value = $pageLang->formatNum( MWTimestamp::getInstance( $ts )->format( 'Y' ), true );
3175  break;
3176  case 'currenttime':
3177  $value = $pageLang->time( wfTimestamp( TS_MW, $ts ), false, false );
3178  break;
3179  case 'currenthour':
3180  $value = $pageLang->formatNum( MWTimestamp::getInstance( $ts )->format( 'H' ), true );
3181  break;
3182  case 'currentweek':
3183  # @bug 4594 PHP5 has it zero padded, PHP4 does not, cast to
3184  # int to remove the padding
3185  $value = $pageLang->formatNum( (int)MWTimestamp::getInstance( $ts )->format( 'W' ) );
3186  break;
3187  case 'currentdow':
3188  $value = $pageLang->formatNum( MWTimestamp::getInstance( $ts )->format( 'w' ) );
3189  break;
3190  case 'localdayname':
3191  $value = $pageLang->getWeekdayName(
3192  (int)MWTimestamp::getLocalInstance( $ts )->format( 'w' ) + 1
3193  );
3194  break;
3195  case 'localyear':
3196  $value = $pageLang->formatNum( MWTimestamp::getLocalInstance( $ts )->format( 'Y' ), true );
3197  break;
3198  case 'localtime':
3199  $value = $pageLang->time(
3200  MWTimestamp::getLocalInstance( $ts )->format( 'YmdHis' ),
3201  false,
3202  false
3203  );
3204  break;
3205  case 'localhour':
3206  $value = $pageLang->formatNum( MWTimestamp::getLocalInstance( $ts )->format( 'H' ), true );
3207  break;
3208  case 'localweek':
3209  # @bug 4594 PHP5 has it zero padded, PHP4 does not, cast to
3210  # int to remove the padding
3211  $value = $pageLang->formatNum( (int)MWTimestamp::getLocalInstance( $ts )->format( 'W' ) );
3212  break;
3213  case 'localdow':
3214  $value = $pageLang->formatNum( MWTimestamp::getLocalInstance( $ts )->format( 'w' ) );
3215  break;
3216  case 'numberofarticles':
3217  $value = $pageLang->formatNum( SiteStats::articles() );
3218  break;
3219  case 'numberoffiles':
3220  $value = $pageLang->formatNum( SiteStats::images() );
3221  break;
3222  case 'numberofusers':
3223  $value = $pageLang->formatNum( SiteStats::users() );
3224  break;
3225  case 'numberofactiveusers':
3226  $value = $pageLang->formatNum( SiteStats::activeUsers() );
3227  break;
3228  case 'numberofpages':
3229  $value = $pageLang->formatNum( SiteStats::pages() );
3230  break;
3231  case 'numberofadmins':
3232  $value = $pageLang->formatNum( SiteStats::numberingroup( 'sysop' ) );
3233  break;
3234  case 'numberofedits':
3235  $value = $pageLang->formatNum( SiteStats::edits() );
3236  break;
3237  case 'currenttimestamp':
3238  $value = wfTimestamp( TS_MW, $ts );
3239  break;
3240  case 'localtimestamp':
3241  $value = MWTimestamp::getLocalInstance( $ts )->format( 'YmdHis' );
3242  break;
3243  case 'currentversion':
3245  break;
3246  case 'articlepath':
3247  return $wgArticlePath;
3248  case 'sitename':
3249  return $wgSitename;
3250  case 'server':
3251  return $wgServer;
3252  case 'servername':
3253  return $wgServerName;
3254  case 'scriptpath':
3255  return $wgScriptPath;
3256  case 'stylepath':
3257  return $wgStylePath;
3258  case 'directionmark':
3259  return $pageLang->getDirMark();
3260  case 'contentlanguage':
3262  return $wgLanguageCode;
3263  case 'cascadingsources':
3265  break;
3266  default:
3267  $ret = null;
3268  Hooks::run(
3269  'ParserGetVariableValueSwitch',
3270  [ &$parser, &$this->mVarCache, &$index, &$ret, &$frame ]
3271  );
3272 
3273  return $ret;
3274  }
3275 
3276  if ( $index ) {
3277  $this->mVarCache[$index] = $value;
3278  }
3279 
3280  return $value;
3281  }
3282 
3288  public function initialiseVariables() {
3289  $variableIDs = MagicWord::getVariableIDs();
3290  $substIDs = MagicWord::getSubstIDs();
3291 
3292  $this->mVariables = new MagicWordArray( $variableIDs );
3293  $this->mSubstWords = new MagicWordArray( $substIDs );
3294  }
3295 
3318  public function preprocessToDom( $text, $flags = 0 ) {
3319  $dom = $this->getPreprocessor()->preprocessToObj( $text, $flags );
3320  return $dom;
3321  }
3322 
3330  public static function splitWhitespace( $s ) {
3331  $ltrimmed = ltrim( $s );
3332  $w1 = substr( $s, 0, strlen( $s ) - strlen( $ltrimmed ) );
3333  $trimmed = rtrim( $ltrimmed );
3334  $diff = strlen( $ltrimmed ) - strlen( $trimmed );
3335  if ( $diff > 0 ) {
3336  $w2 = substr( $ltrimmed, -$diff );
3337  } else {
3338  $w2 = '';
3339  }
3340  return [ $w1, $trimmed, $w2 ];
3341  }
3342 
3363  public function replaceVariables( $text, $frame = false, $argsOnly = false ) {
3364  # Is there any text? Also, Prevent too big inclusions!
3365  $textSize = strlen( $text );
3366  if ( $textSize < 1 || $textSize > $this->mOptions->getMaxIncludeSize() ) {
3367  return $text;
3368  }
3369 
3370  if ( $frame === false ) {
3371  $frame = $this->getPreprocessor()->newFrame();
3372  } elseif ( !( $frame instanceof PPFrame ) ) {
3373  wfDebug( __METHOD__ . " called using plain parameters instead of "
3374  . "a PPFrame instance. Creating custom frame.\n" );
3375  $frame = $this->getPreprocessor()->newCustomFrame( $frame );
3376  }
3377 
3378  $dom = $this->preprocessToDom( $text );
3379  $flags = $argsOnly ? PPFrame::NO_TEMPLATES : 0;
3380  $text = $frame->expand( $dom, $flags );
3381 
3382  return $text;
3383  }
3384 
3392  public static function createAssocArgs( $args ) {
3393  $assocArgs = [];
3394  $index = 1;
3395  foreach ( $args as $arg ) {
3396  $eqpos = strpos( $arg, '=' );
3397  if ( $eqpos === false ) {
3398  $assocArgs[$index++] = $arg;
3399  } else {
3400  $name = trim( substr( $arg, 0, $eqpos ) );
3401  $value = trim( substr( $arg, $eqpos + 1 ) );
3402  if ( $value === false ) {
3403  $value = '';
3404  }
3405  if ( $name !== false ) {
3406  $assocArgs[$name] = $value;
3407  }
3408  }
3409  }
3410 
3411  return $assocArgs;
3412  }
3413 
3440  public function limitationWarn( $limitationType, $current = '', $max = '' ) {
3441  # does no harm if $current and $max are present but are unnecessary for the message
3442  # Not doing ->inLanguage( $this->mOptions->getUserLangObj() ), since this is shown
3443  # only during preview, and that would split the parser cache unnecessarily.
3444  $warning = wfMessage( "$limitationType-warning" )->numParams( $current, $max )
3445  ->text();
3446  $this->mOutput->addWarning( $warning );
3447  $this->addTrackingCategory( "$limitationType-category" );
3448  }
3449 
3462  public function braceSubstitution( $piece, $frame ) {
3463 
3464  // Flags
3465 
3466  // $text has been filled
3467  $found = false;
3468  // wiki markup in $text should be escaped
3469  $nowiki = false;
3470  // $text is HTML, armour it against wikitext transformation
3471  $isHTML = false;
3472  // Force interwiki transclusion to be done in raw mode not rendered
3473  $forceRawInterwiki = false;
3474  // $text is a DOM node needing expansion in a child frame
3475  $isChildObj = false;
3476  // $text is a DOM node needing expansion in the current frame
3477  $isLocalObj = false;
3478 
3479  # Title object, where $text came from
3480  $title = false;
3481 
3482  # $part1 is the bit before the first |, and must contain only title characters.
3483  # Various prefixes will be stripped from it later.
3484  $titleWithSpaces = $frame->expand( $piece['title'] );
3485  $part1 = trim( $titleWithSpaces );
3486  $titleText = false;
3487 
3488  # Original title text preserved for various purposes
3489  $originalTitle = $part1;
3490 
3491  # $args is a list of argument nodes, starting from index 0, not including $part1
3492  # @todo FIXME: If piece['parts'] is null then the call to getLength()
3493  # below won't work b/c this $args isn't an object
3494  $args = ( null == $piece['parts'] ) ? [] : $piece['parts'];
3495 
3496  $profileSection = null; // profile templates
3497 
3498  # SUBST
3499  if ( !$found ) {
3500  $substMatch = $this->mSubstWords->matchStartAndRemove( $part1 );
3501 
3502  # Possibilities for substMatch: "subst", "safesubst" or FALSE
3503  # Decide whether to expand template or keep wikitext as-is.
3504  if ( $this->ot['wiki'] ) {
3505  if ( $substMatch === false ) {
3506  $literal = true; # literal when in PST with no prefix
3507  } else {
3508  $literal = false; # expand when in PST with subst: or safesubst:
3509  }
3510  } else {
3511  if ( $substMatch == 'subst' ) {
3512  $literal = true; # literal when not in PST with plain subst:
3513  } else {
3514  $literal = false; # expand when not in PST with safesubst: or no prefix
3515  }
3516  }
3517  if ( $literal ) {
3518  $text = $frame->virtualBracketedImplode( '{{', '|', '}}', $titleWithSpaces, $args );
3519  $isLocalObj = true;
3520  $found = true;
3521  }
3522  }
3523 
3524  # Variables
3525  if ( !$found && $args->getLength() == 0 ) {
3526  $id = $this->mVariables->matchStartToEnd( $part1 );
3527  if ( $id !== false ) {
3528  $text = $this->getVariableValue( $id, $frame );
3529  if ( MagicWord::getCacheTTL( $id ) > -1 ) {
3530  $this->mOutput->updateCacheExpiry( MagicWord::getCacheTTL( $id ) );
3531  }
3532  $found = true;
3533  }
3534  }
3535 
3536  # MSG, MSGNW and RAW
3537  if ( !$found ) {
3538  # Check for MSGNW:
3539  $mwMsgnw = MagicWord::get( 'msgnw' );
3540  if ( $mwMsgnw->matchStartAndRemove( $part1 ) ) {
3541  $nowiki = true;
3542  } else {
3543  # Remove obsolete MSG:
3544  $mwMsg = MagicWord::get( 'msg' );
3545  $mwMsg->matchStartAndRemove( $part1 );
3546  }
3547 
3548  # Check for RAW:
3549  $mwRaw = MagicWord::get( 'raw' );
3550  if ( $mwRaw->matchStartAndRemove( $part1 ) ) {
3551  $forceRawInterwiki = true;
3552  }
3553  }
3554 
3555  # Parser functions
3556  if ( !$found ) {
3557  $colonPos = strpos( $part1, ':' );
3558  if ( $colonPos !== false ) {
3559  $func = substr( $part1, 0, $colonPos );
3560  $funcArgs = [ trim( substr( $part1, $colonPos + 1 ) ) ];
3561  $argsLength = $args->getLength();
3562  for ( $i = 0; $i < $argsLength; $i++ ) {
3563  $funcArgs[] = $args->item( $i );
3564  }
3565  try {
3566  $result = $this->callParserFunction( $frame, $func, $funcArgs );
3567  } catch ( Exception $ex ) {
3568  throw $ex;
3569  }
3570 
3571  # The interface for parser functions allows for extracting
3572  # flags into the local scope. Extract any forwarded flags
3573  # here.
3574  extract( $result );
3575  }
3576  }
3577 
3578  # Finish mangling title and then check for loops.
3579  # Set $title to a Title object and $titleText to the PDBK
3580  if ( !$found ) {
3581  $ns = NS_TEMPLATE;
3582  # Split the title into page and subpage
3583  $subpage = '';
3584  $relative = $this->maybeDoSubpageLink( $part1, $subpage );
3585  if ( $part1 !== $relative ) {
3586  $part1 = $relative;
3587  $ns = $this->mTitle->getNamespace();
3588  }
3589  $title = Title::newFromText( $part1, $ns );
3590  if ( $title ) {
3591  $titleText = $title->getPrefixedText();
3592  # Check for language variants if the template is not found
3593  if ( $this->getConverterLanguage()->hasVariants() && $title->getArticleID() == 0 ) {
3594  $this->getConverterLanguage()->findVariantLink( $part1, $title, true );
3595  }
3596  # Do recursion depth check
3597  $limit = $this->mOptions->getMaxTemplateDepth();
3598  if ( $frame->depth >= $limit ) {
3599  $found = true;
3600  $text = '<span class="error">'
3601  . wfMessage( 'parser-template-recursion-depth-warning' )
3602  ->numParams( $limit )->inContentLanguage()->text()
3603  . '</span>';
3604  }
3605  }
3606  }
3607 
3608  # Load from database
3609  if ( !$found && $title ) {
3610  $profileSection = $this->mProfiler->scopedProfileIn( $title->getPrefixedDBkey() );
3611  if ( !$title->isExternal() ) {
3612  if ( $title->isSpecialPage()
3613  && $this->mOptions->getAllowSpecialInclusion()
3614  && $this->ot['html']
3615  ) {
3616  // Pass the template arguments as URL parameters.
3617  // "uselang" will have no effect since the Language object
3618  // is forced to the one defined in ParserOptions.
3619  $pageArgs = [];
3620  $argsLength = $args->getLength();
3621  for ( $i = 0; $i < $argsLength; $i++ ) {
3622  $bits = $args->item( $i )->splitArg();
3623  if ( strval( $bits['index'] ) === '' ) {
3624  $name = trim( $frame->expand( $bits['name'], PPFrame::STRIP_COMMENTS ) );
3625  $value = trim( $frame->expand( $bits['value'] ) );
3626  $pageArgs[$name] = $value;
3627  }
3628  }
3629 
3630  // Create a new context to execute the special page
3631  $context = new RequestContext;
3632  $context->setTitle( $title );
3633  $context->setRequest( new FauxRequest( $pageArgs ) );
3634  $context->setUser( $this->getUser() );
3635  $context->setLanguage( $this->mOptions->getUserLangObj() );
3637  if ( $ret ) {
3638  $text = $context->getOutput()->getHTML();
3639  $this->mOutput->addOutputPageMetadata( $context->getOutput() );
3640  $found = true;
3641  $isHTML = true;
3642  $this->disableCache();
3643  }
3644  } elseif ( MWNamespace::isNonincludable( $title->getNamespace() ) ) {
3645  $found = false; # access denied
3646  wfDebug( __METHOD__ . ": template inclusion denied for " .
3647  $title->getPrefixedDBkey() . "\n" );
3648  } else {
3649  list( $text, $title ) = $this->getTemplateDom( $title );
3650  if ( $text !== false ) {
3651  $found = true;
3652  $isChildObj = true;
3653  }
3654  }
3655 
3656  # If the title is valid but undisplayable, make a link to it
3657  if ( !$found && ( $this->ot['html'] || $this->ot['pre'] ) ) {
3658  $text = "[[:$titleText]]";
3659  $found = true;
3660  }
3661  } elseif ( $title->isTrans() ) {
3662  # Interwiki transclusion
3663  if ( $this->ot['html'] && !$forceRawInterwiki ) {
3664  $text = $this->interwikiTransclude( $title, 'render' );
3665  $isHTML = true;
3666  } else {
3667  $text = $this->interwikiTransclude( $title, 'raw' );
3668  # Preprocess it like a template
3669  $text = $this->preprocessToDom( $text, self::PTD_FOR_INCLUSION );
3670  $isChildObj = true;
3671  }
3672  $found = true;
3673  }
3674 
3675  # Do infinite loop check
3676  # This has to be done after redirect resolution to avoid infinite loops via redirects
3677  if ( !$frame->loopCheck( $title ) ) {
3678  $found = true;
3679  $text = '<span class="error">'
3680  . wfMessage( 'parser-template-loop-warning', $titleText )->inContentLanguage()->text()
3681  . '</span>';
3682  wfDebug( __METHOD__ . ": template loop broken at '$titleText'\n" );
3683  }
3684  }
3685 
3686  # If we haven't found text to substitute by now, we're done
3687  # Recover the source wikitext and return it
3688  if ( !$found ) {
3689  $text = $frame->virtualBracketedImplode( '{{', '|', '}}', $titleWithSpaces, $args );
3690  if ( $profileSection ) {
3691  $this->mProfiler->scopedProfileOut( $profileSection );
3692  }
3693  return [ 'object' => $text ];
3694  }
3695 
3696  # Expand DOM-style return values in a child frame
3697  if ( $isChildObj ) {
3698  # Clean up argument array
3699  $newFrame = $frame->newChild( $args, $title );
3700 
3701  if ( $nowiki ) {
3702  $text = $newFrame->expand( $text, PPFrame::RECOVER_ORIG );
3703  } elseif ( $titleText !== false && $newFrame->isEmpty() ) {
3704  # Expansion is eligible for the empty-frame cache
3705  $text = $newFrame->cachedExpand( $titleText, $text );
3706  } else {
3707  # Uncached expansion
3708  $text = $newFrame->expand( $text );
3709  }
3710  }
3711  if ( $isLocalObj && $nowiki ) {
3712  $text = $frame->expand( $text, PPFrame::RECOVER_ORIG );
3713  $isLocalObj = false;
3714  }
3715 
3716  if ( $profileSection ) {
3717  $this->mProfiler->scopedProfileOut( $profileSection );
3718  }
3719 
3720  # Replace raw HTML by a placeholder
3721  if ( $isHTML ) {
3722  $text = $this->insertStripItem( $text );
3723  } elseif ( $nowiki && ( $this->ot['html'] || $this->ot['pre'] ) ) {
3724  # Escape nowiki-style return values
3725  $text = wfEscapeWikiText( $text );
3726  } elseif ( is_string( $text )
3727  && !$piece['lineStart']
3728  && preg_match( '/^(?:{\\||:|;|#|\*)/', $text )
3729  ) {
3730  # Bug 529: if the template begins with a table or block-level
3731  # element, it should be treated as beginning a new line.
3732  # This behavior is somewhat controversial.
3733  $text = "\n" . $text;
3734  }
3735 
3736  if ( is_string( $text ) && !$this->incrementIncludeSize( 'post-expand', strlen( $text ) ) ) {
3737  # Error, oversize inclusion
3738  if ( $titleText !== false ) {
3739  # Make a working, properly escaped link if possible (bug 23588)
3740  $text = "[[:$titleText]]";
3741  } else {
3742  # This will probably not be a working link, but at least it may
3743  # provide some hint of where the problem is
3744  preg_replace( '/^:/', '', $originalTitle );
3745  $text = "[[:$originalTitle]]";
3746  }
3747  $text .= $this->insertStripItem( '<!-- WARNING: template omitted, '
3748  . 'post-expand include size too large -->' );
3749  $this->limitationWarn( 'post-expand-template-inclusion' );
3750  }
3751 
3752  if ( $isLocalObj ) {
3753  $ret = [ 'object' => $text ];
3754  } else {
3755  $ret = [ 'text' => $text ];
3756  }
3757 
3758  return $ret;
3759  }
3760 
3780  public function callParserFunction( $frame, $function, array $args = [] ) {
3782 
3783  # Case sensitive functions
3784  if ( isset( $this->mFunctionSynonyms[1][$function] ) ) {
3785  $function = $this->mFunctionSynonyms[1][$function];
3786  } else {
3787  # Case insensitive functions
3788  $function = $wgContLang->lc( $function );
3789  if ( isset( $this->mFunctionSynonyms[0][$function] ) ) {
3790  $function = $this->mFunctionSynonyms[0][$function];
3791  } else {
3792  return [ 'found' => false ];
3793  }
3794  }
3795 
3796  list( $callback, $flags ) = $this->mFunctionHooks[$function];
3797 
3798  # Workaround for PHP bug 35229 and similar
3799  if ( !is_callable( $callback ) ) {
3800  throw new MWException( "Tag hook for $function is not callable\n" );
3801  }
3802 
3803  // Avoid PHP 7.1 warning from passing $this by reference
3804  $parser = $this;
3805 
3806  $allArgs = [ &$parser ];
3807  if ( $flags & self::SFH_OBJECT_ARGS ) {
3808  # Convert arguments to PPNodes and collect for appending to $allArgs
3809  $funcArgs = [];
3810  foreach ( $args as $k => $v ) {
3811  if ( $v instanceof PPNode || $k === 0 ) {
3812  $funcArgs[] = $v;
3813  } else {
3814  $funcArgs[] = $this->mPreprocessor->newPartNodeArray( [ $k => $v ] )->item( 0 );
3815  }
3816  }
3817 
3818  # Add a frame parameter, and pass the arguments as an array
3819  $allArgs[] = $frame;
3820  $allArgs[] = $funcArgs;
3821  } else {
3822  # Convert arguments to plain text and append to $allArgs
3823  foreach ( $args as $k => $v ) {
3824  if ( $v instanceof PPNode ) {
3825  $allArgs[] = trim( $frame->expand( $v ) );
3826  } elseif ( is_int( $k ) && $k >= 0 ) {
3827  $allArgs[] = trim( $v );
3828  } else {
3829  $allArgs[] = trim( "$k=$v" );
3830  }
3831  }
3832  }
3833 
3834  $result = call_user_func_array( $callback, $allArgs );
3835 
3836  # The interface for function hooks allows them to return a wikitext
3837  # string or an array containing the string and any flags. This mungs
3838  # things around to match what this method should return.
3839  if ( !is_array( $result ) ) {
3840  $result =[
3841  'found' => true,
3842  'text' => $result,
3843  ];
3844  } else {
3845  if ( isset( $result[0] ) && !isset( $result['text'] ) ) {
3846  $result['text'] = $result[0];
3847  }
3848  unset( $result[0] );
3849  $result += [
3850  'found' => true,
3851  ];
3852  }
3853 
3854  $noparse = true;
3855  $preprocessFlags = 0;
3856  if ( isset( $result['noparse'] ) ) {
3857  $noparse = $result['noparse'];
3858  }
3859  if ( isset( $result['preprocessFlags'] ) ) {
3860  $preprocessFlags = $result['preprocessFlags'];
3861  }
3862 
3863  if ( !$noparse ) {
3864  $result['text'] = $this->preprocessToDom( $result['text'], $preprocessFlags );
3865  $result['isChildObj'] = true;
3866  }
3867 
3868  return $result;
3869  }
3870 
3879  public function getTemplateDom( $title ) {
3880  $cacheTitle = $title;
3881  $titleText = $title->getPrefixedDBkey();
3882 
3883  if ( isset( $this->mTplRedirCache[$titleText] ) ) {
3884  list( $ns, $dbk ) = $this->mTplRedirCache[$titleText];
3885  $title = Title::makeTitle( $ns, $dbk );
3886  $titleText = $title->getPrefixedDBkey();
3887  }
3888  if ( isset( $this->mTplDomCache[$titleText] ) ) {
3889  return [ $this->mTplDomCache[$titleText], $title ];
3890  }
3891 
3892  # Cache miss, go to the database
3893  list( $text, $title ) = $this->fetchTemplateAndTitle( $title );
3894 
3895  if ( $text === false ) {
3896  $this->mTplDomCache[$titleText] = false;
3897  return [ false, $title ];
3898  }
3899 
3900  $dom = $this->preprocessToDom( $text, self::PTD_FOR_INCLUSION );
3901  $this->mTplDomCache[$titleText] = $dom;
3902 
3903  if ( !$title->equals( $cacheTitle ) ) {
3904  $this->mTplRedirCache[$cacheTitle->getPrefixedDBkey()] =
3905  [ $title->getNamespace(), $cdb = $title->getDBkey() ];
3906  }
3907 
3908  return [ $dom, $title ];
3909  }
3910 
3923  $cacheKey = $title->getPrefixedDBkey();
3924  if ( !$this->currentRevisionCache ) {
3925  $this->currentRevisionCache = new MapCacheLRU( 100 );
3926  }
3927  if ( !$this->currentRevisionCache->has( $cacheKey ) ) {
3928  $this->currentRevisionCache->set( $cacheKey,
3929  // Defaults to Parser::statelessFetchRevision()
3930  call_user_func( $this->mOptions->getCurrentRevisionCallback(), $title, $this )
3931  );
3932  }
3933  return $this->currentRevisionCache->get( $cacheKey );
3934  }
3935 
3945  public static function statelessFetchRevision( $title, $parser = false ) {
3946  return Revision::newFromTitle( $title );
3947  }
3948 
3954  public function fetchTemplateAndTitle( $title ) {
3955  // Defaults to Parser::statelessFetchTemplate()
3956  $templateCb = $this->mOptions->getTemplateCallback();
3957  $stuff = call_user_func( $templateCb, $title, $this );
3958  // We use U+007F DELETE to distinguish strip markers from regular text.
3959  $text = $stuff['text'];
3960  if ( is_string( $stuff['text'] ) ) {
3961  $text = strtr( $text, "\x7f", "?" );
3962  }
3963  $finalTitle = isset( $stuff['finalTitle'] ) ? $stuff['finalTitle'] : $title;
3964  if ( isset( $stuff['deps'] ) ) {
3965  foreach ( $stuff['deps'] as $dep ) {
3966  $this->mOutput->addTemplate( $dep['title'], $dep['page_id'], $dep['rev_id'] );
3967  if ( $dep['title']->equals( $this->getTitle() ) ) {
3968  // If we transclude ourselves, the final result
3969  // will change based on the new version of the page
3970  $this->mOutput->setFlag( 'vary-revision' );
3971  }
3972  }
3973  }
3974  return [ $text, $finalTitle ];
3975  }
3976 
3982  public function fetchTemplate( $title ) {
3983  return $this->fetchTemplateAndTitle( $title )[0];
3984  }
3985 
3995  public static function statelessFetchTemplate( $title, $parser = false ) {
3996  $text = $skip = false;
3997  $finalTitle = $title;
3998  $deps = [];
3999 
4000  # Loop to fetch the article, with up to 1 redirect
4001  // @codingStandardsIgnoreStart Generic.CodeAnalysis.ForLoopWithTestFunctionCall.NotAllowed
4002  for ( $i = 0; $i < 2 && is_object( $title ); $i++ ) {
4003  // @codingStandardsIgnoreEnd
4004  # Give extensions a chance to select the revision instead
4005  $id = false; # Assume current
4006  Hooks::run( 'BeforeParserFetchTemplateAndtitle',
4007  [ $parser, $title, &$skip, &$id ] );
4008 
4009  if ( $skip ) {
4010  $text = false;
4011  $deps[] = [
4012  'title' => $title,
4013  'page_id' => $title->getArticleID(),
4014  'rev_id' => null
4015  ];
4016  break;
4017  }
4018  # Get the revision
4019  if ( $id ) {
4020  $rev = Revision::newFromId( $id );
4021  } elseif ( $parser ) {
4022  $rev = $parser->fetchCurrentRevisionOfTitle( $title );
4023  } else {
4025  }
4026  $rev_id = $rev ? $rev->getId() : 0;
4027  # If there is no current revision, there is no page
4028  if ( $id === false && !$rev ) {
4029  $linkCache = LinkCache::singleton();
4030  $linkCache->addBadLinkObj( $title );
4031  }
4032 
4033  $deps[] = [
4034  'title' => $title,
4035  'page_id' => $title->getArticleID(),
4036  'rev_id' => $rev_id ];
4037  if ( $rev && !$title->equals( $rev->getTitle() ) ) {
4038  # We fetched a rev from a different title; register it too...
4039  $deps[] = [
4040  'title' => $rev->getTitle(),
4041  'page_id' => $rev->getPage(),
4042  'rev_id' => $rev_id ];
4043  }
4044 
4045  if ( $rev ) {
4046  $content = $rev->getContent();
4047  $text = $content ? $content->getWikitextForTransclusion() : null;
4048 
4049  if ( $text === false || $text === null ) {
4050  $text = false;
4051  break;
4052  }
4053  } elseif ( $title->getNamespace() == NS_MEDIAWIKI ) {
4055  $message = wfMessage( $wgContLang->lcfirst( $title->getText() ) )->inContentLanguage();
4056  if ( !$message->exists() ) {
4057  $text = false;
4058  break;
4059  }
4060  $content = $message->content();
4061  $text = $message->plain();
4062  } else {
4063  break;
4064  }
4065  if ( !$content ) {
4066  break;
4067  }
4068  # Redirect?
4069  $finalTitle = $title;
4070  $title = $content->getRedirectTarget();
4071  }
4072  return [
4073  'text' => $text,
4074  'finalTitle' => $finalTitle,
4075  'deps' => $deps ];
4076  }
4077 
4085  public function fetchFile( $title, $options = [] ) {
4086  return $this->fetchFileAndTitle( $title, $options )[0];
4087  }
4088 
4096  public function fetchFileAndTitle( $title, $options = [] ) {
4097  $file = $this->fetchFileNoRegister( $title, $options );
4098 
4099  $time = $file ? $file->getTimestamp() : false;
4100  $sha1 = $file ? $file->getSha1() : false;
4101  # Register the file as a dependency...
4102  $this->mOutput->addImage( $title->getDBkey(), $time, $sha1 );
4103  if ( $file && !$title->equals( $file->getTitle() ) ) {
4104  # Update fetched file title
4105  $title = $file->getTitle();
4106  $this->mOutput->addImage( $title->getDBkey(), $time, $sha1 );
4107  }
4108  return [ $file, $title ];
4109  }
4110 
4121  protected function fetchFileNoRegister( $title, $options = [] ) {
4122  if ( isset( $options['broken'] ) ) {
4123  $file = false; // broken thumbnail forced by hook
4124  } elseif ( isset( $options['sha1'] ) ) { // get by (sha1,timestamp)
4125  $file = RepoGroup::singleton()->findFileFromKey( $options['sha1'], $options );
4126  } else { // get by (name,timestamp)
4127  $file = wfFindFile( $title, $options );
4128  }
4129  return $file;
4130  }
4131 
4140  public function interwikiTransclude( $title, $action ) {
4141  global $wgEnableScaryTranscluding;
4142 
4143  if ( !$wgEnableScaryTranscluding ) {
4144  return wfMessage( 'scarytranscludedisabled' )->inContentLanguage()->text();
4145  }
4146 
4147  $url = $title->getFullURL( [ 'action' => $action ] );
4148 
4149  if ( strlen( $url ) > 255 ) {
4150  return wfMessage( 'scarytranscludetoolong' )->inContentLanguage()->text();
4151  }
4152  return $this->fetchScaryTemplateMaybeFromCache( $url );
4153  }
4154 
4159  public function fetchScaryTemplateMaybeFromCache( $url ) {
4160  global $wgTranscludeCacheExpiry;
4161  $dbr = wfGetDB( DB_SLAVE );
4162  $tsCond = $dbr->timestamp( time() - $wgTranscludeCacheExpiry );
4163  $obj = $dbr->selectRow( 'transcache', [ 'tc_time', 'tc_contents' ],
4164  [ 'tc_url' => $url, "tc_time >= " . $dbr->addQuotes( $tsCond ) ] );
4165  if ( $obj ) {
4166  return $obj->tc_contents;
4167  }
4168 
4169  $req = MWHttpRequest::factory( $url, [], __METHOD__ );
4170  $status = $req->execute(); // Status object
4171  if ( $status->isOK() ) {
4172  $text = $req->getContent();
4173  } elseif ( $req->getStatus() != 200 ) {
4174  // Though we failed to fetch the content, this status is useless.
4175  return wfMessage( 'scarytranscludefailed-httpstatus' )
4176  ->params( $url, $req->getStatus() /* HTTP status */ )->inContentLanguage()->text();
4177  } else {
4178  return wfMessage( 'scarytranscludefailed', $url )->inContentLanguage()->text();
4179  }
4180 
4181  $dbw = wfGetDB( DB_MASTER );
4182  $dbw->replace( 'transcache', [ 'tc_url' ], [
4183  'tc_url' => $url,
4184  'tc_time' => $dbw->timestamp( time() ),
4185  'tc_contents' => $text
4186  ] );
4187  return $text;
4188  }
4189 
4199  public function argSubstitution( $piece, $frame ) {
4200 
4201  $error = false;
4202  $parts = $piece['parts'];
4203  $nameWithSpaces = $frame->expand( $piece['title'] );
4204  $argName = trim( $nameWithSpaces );
4205  $object = false;
4206  $text = $frame->getArgument( $argName );
4207  if ( $text === false && $parts->getLength() > 0
4208  && ( $this->ot['html']
4209  || $this->ot['pre']
4210  || ( $this->ot['wiki'] && $frame->isTemplate() )
4211  )
4212  ) {
4213  # No match in frame, use the supplied default
4214  $object = $parts->item( 0 )->getChildren();
4215  }
4216  if ( !$this->incrementIncludeSize( 'arg', strlen( $text ) ) ) {
4217  $error = '<!-- WARNING: argument omitted, expansion size too large -->';
4218  $this->limitationWarn( 'post-expand-template-argument' );
4219  }
4220 
4221  if ( $text === false && $object === false ) {
4222  # No match anywhere
4223  $object = $frame->virtualBracketedImplode( '{{{', '|', '}}}', $nameWithSpaces, $parts );
4224  }
4225  if ( $error !== false ) {
4226  $text .= $error;
4227  }
4228  if ( $object !== false ) {
4229  $ret = [ 'object' => $object ];
4230  } else {
4231  $ret = [ 'text' => $text ];
4232  }
4233 
4234  return $ret;
4235  }
4236 
4252  public function extensionSubstitution( $params, $frame ) {
4253  $name = $frame->expand( $params['name'] );
4254  $attrText = !isset( $params['attr'] ) ? null : $frame->expand( $params['attr'] );
4255  $content = !isset( $params['inner'] ) ? null : $frame->expand( $params['inner'] );
4256  $marker = self::MARKER_PREFIX . "-$name-"
4257  . sprintf( '%08X', $this->mMarkerIndex++ ) . self::MARKER_SUFFIX;
4258 
4259  $isFunctionTag = isset( $this->mFunctionTagHooks[strtolower( $name )] ) &&
4260  ( $this->ot['html'] || $this->ot['pre'] );
4261  if ( $isFunctionTag ) {
4262  $markerType = 'none';
4263  } else {
4264  $markerType = 'general';
4265  }
4266  if ( $this->ot['html'] || $isFunctionTag ) {
4267  $name = strtolower( $name );
4268  $attributes = Sanitizer::decodeTagAttributes( $attrText );
4269  if ( isset( $params['attributes'] ) ) {
4270  $attributes = $attributes + $params['attributes'];
4271  }
4272 
4273  if ( isset( $this->mTagHooks[$name] ) ) {
4274  # Workaround for PHP bug 35229 and similar
4275  if ( !is_callable( $this->mTagHooks[$name] ) ) {
4276  throw new MWException( "Tag hook for $name is not callable\n" );
4277  }
4278  $output = call_user_func_array( $this->mTagHooks[$name],
4279  [ $content, $attributes, $this, $frame ] );
4280  } elseif ( isset( $this->mFunctionTagHooks[$name] ) ) {
4281  list( $callback, ) = $this->mFunctionTagHooks[$name];
4282  if ( !is_callable( $callback ) ) {
4283  throw new MWException( "Tag hook for $name is not callable\n" );
4284  }
4285 
4286  // Avoid PHP 7.1 warning from passing $this by reference
4287  $parser = $this;
4288  $output = call_user_func_array( $callback, [ &$parser, $frame, $content, $attributes ] );
4289  } else {
4290  $output = '<span class="error">Invalid tag extension name: ' .
4291  htmlspecialchars( $name ) . '</span>';
4292  }
4293 
4294  if ( is_array( $output ) ) {
4295  # Extract flags to local scope (to override $markerType)
4296  $flags = $output;
4297  $output = $flags[0];
4298  unset( $flags[0] );
4299  extract( $flags );
4300  }
4301  } else {
4302  if ( is_null( $attrText ) ) {
4303  $attrText = '';
4304  }
4305  if ( isset( $params['attributes'] ) ) {
4306  foreach ( $params['attributes'] as $attrName => $attrValue ) {
4307  $attrText .= ' ' . htmlspecialchars( $attrName ) . '="' .
4308  htmlspecialchars( $attrValue ) . '"';
4309  }
4310  }
4311  if ( $content === null ) {
4312  $output = "<$name$attrText/>";
4313  } else {
4314  $close = is_null( $params['close'] ) ? '' : $frame->expand( $params['close'] );
4315  $output = "<$name$attrText>$content$close";
4316  }
4317  }
4318 
4319  if ( $markerType === 'none' ) {
4320  return $output;
4321  } elseif ( $markerType === 'nowiki' ) {
4322  $this->mStripState->addNoWiki( $marker, $output );
4323  } elseif ( $markerType === 'general' ) {
4324  $this->mStripState->addGeneral( $marker, $output );
4325  } else {
4326  throw new MWException( __METHOD__ . ': invalid marker type' );
4327  }
4328  return $marker;
4329  }
4330 
4338  public function incrementIncludeSize( $type, $size ) {
4339  if ( $this->mIncludeSizes[$type] + $size > $this->mOptions->getMaxIncludeSize() ) {
4340  return false;
4341  } else {
4342  $this->mIncludeSizes[$type] += $size;
4343  return true;
4344  }
4345  }
4346 
4353  $this->mExpensiveFunctionCount++;
4354  return $this->mExpensiveFunctionCount <= $this->mOptions->getExpensiveParserFunctionLimit();
4355  }
4356 
4365  public function doDoubleUnderscore( $text ) {
4366 
4367  # The position of __TOC__ needs to be recorded
4368  $mw = MagicWord::get( 'toc' );
4369  if ( $mw->match( $text ) ) {
4370  $this->mShowToc = true;
4371  $this->mForceTocPosition = true;
4372 
4373  # Set a placeholder. At the end we'll fill it in with the TOC.
4374  $text = $mw->replace( '<!--MWTOC-->', $text, 1 );
4375 
4376  # Only keep the first one.
4377  $text = $mw->replace( '', $text );
4378  }
4379 
4380  # Now match and remove the rest of them
4382  $this->mDoubleUnderscores = $mwa->matchAndRemove( $text );
4383 
4384  if ( isset( $this->mDoubleUnderscores['nogallery'] ) ) {
4385  $this->mOutput->mNoGallery = true;
4386  }
4387  if ( isset( $this->mDoubleUnderscores['notoc'] ) && !$this->mForceTocPosition ) {
4388  $this->mShowToc = false;
4389  }
4390  if ( isset( $this->mDoubleUnderscores['hiddencat'] )
4391  && $this->mTitle->getNamespace() == NS_CATEGORY
4392  ) {
4393  $this->addTrackingCategory( 'hidden-category-category' );
4394  }
4395  # (bug 8068) Allow control over whether robots index a page.
4396  # @todo FIXME: Bug 14899: __INDEX__ always overrides __NOINDEX__ here! This
4397  # is not desirable, the last one on the page should win.
4398  if ( isset( $this->mDoubleUnderscores['noindex'] ) && $this->mTitle->canUseNoindex() ) {
4399  $this->mOutput->setIndexPolicy( 'noindex' );
4400  $this->addTrackingCategory( 'noindex-category' );
4401  }
4402  if ( isset( $this->mDoubleUnderscores['index'] ) && $this->mTitle->canUseNoindex() ) {
4403  $this->mOutput->setIndexPolicy( 'index' );
4404  $this->addTrackingCategory( 'index-category' );
4405  }
4406 
4407  # Cache all double underscores in the database
4408  foreach ( $this->mDoubleUnderscores as $key => $val ) {
4409  $this->mOutput->setProperty( $key, '' );
4410  }
4411 
4412  return $text;
4413  }
4414 
4420  public function addTrackingCategory( $msg ) {
4421  return $this->mOutput->addTrackingCategory( $msg, $this->mTitle );
4422  }
4423 
4440  public function formatHeadings( $text, $origText, $isMain = true ) {
4441  global $wgMaxTocLevel, $wgExperimentalHtmlIds;
4442 
4443  # Inhibit editsection links if requested in the page
4444  if ( isset( $this->mDoubleUnderscores['noeditsection'] ) ) {
4445  $maybeShowEditLink = $showEditLink = false;
4446  } else {
4447  $maybeShowEditLink = true; /* Actual presence will depend on ParserOptions option */
4448  $showEditLink = $this->mOptions->getEditSection();
4449  }
4450  if ( $showEditLink ) {
4451  $this->mOutput->setEditSectionTokens( true );
4452  }
4453 
4454  # Get all headlines for numbering them and adding funky stuff like [edit]
4455  # links - this is for later, but we need the number of headlines right now
4456  $matches = [];
4457  $numMatches = preg_match_all(
4458  '/<H(?P<level>[1-6])(?P<attrib>.*?>)\s*(?P<header>[\s\S]*?)\s*<\/H[1-6] *>/i',
4459  $text,
4460  $matches
4461  );
4462 
4463  # if there are fewer than 4 headlines in the article, do not show TOC
4464  # unless it's been explicitly enabled.
4465  $enoughToc = $this->mShowToc &&
4466  ( ( $numMatches >= 4 ) || $this->mForceTocPosition );
4467 
4468  # Allow user to stipulate that a page should have a "new section"
4469  # link added via __NEWSECTIONLINK__
4470  if ( isset( $this->mDoubleUnderscores['newsectionlink'] ) ) {
4471  $this->mOutput->setNewSection( true );
4472  }
4473 
4474  # Allow user to remove the "new section"
4475  # link via __NONEWSECTIONLINK__
4476  if ( isset( $this->mDoubleUnderscores['nonewsectionlink'] ) ) {
4477  $this->mOutput->hideNewSection( true );
4478  }
4479 
4480  # if the string __FORCETOC__ (not case-sensitive) occurs in the HTML,
4481  # override above conditions and always show TOC above first header
4482  if ( isset( $this->mDoubleUnderscores['forcetoc'] ) ) {
4483  $this->mShowToc = true;
4484  $enoughToc = true;
4485  }
4486 
4487  # headline counter
4488  $headlineCount = 0;
4489  $numVisible = 0;
4490 
4491  # Ugh .. the TOC should have neat indentation levels which can be
4492  # passed to the skin functions. These are determined here
4493  $toc = '';
4494  $full = '';
4495  $head = [];
4496  $sublevelCount = [];
4497  $levelCount = [];
4498  $level = 0;
4499  $prevlevel = 0;
4500  $toclevel = 0;
4501  $prevtoclevel = 0;
4502  $markerRegex = self::MARKER_PREFIX . "-h-(\d+)-" . self::MARKER_SUFFIX;
4503  $baseTitleText = $this->mTitle->getPrefixedDBkey();
4504  $oldType = $this->mOutputType;
4505  $this->setOutputType( self::OT_WIKI );
4506  $frame = $this->getPreprocessor()->newFrame();
4507  $root = $this->preprocessToDom( $origText );
4508  $node = $root->getFirstChild();
4509  $byteOffset = 0;
4510  $tocraw = [];
4511  $refers = [];
4512 
4513  $headlines = $numMatches !== false ? $matches[3] : [];
4514 
4515  foreach ( $headlines as $headline ) {
4516  $isTemplate = false;
4517  $titleText = false;
4518  $sectionIndex = false;
4519  $numbering = '';
4520  $markerMatches = [];
4521  if ( preg_match( "/^$markerRegex/", $headline, $markerMatches ) ) {
4522  $serial = $markerMatches[1];
4523  list( $titleText, $sectionIndex ) = $this->mHeadings[$serial];
4524  $isTemplate = ( $titleText != $baseTitleText );
4525  $headline = preg_replace( "/^$markerRegex\\s*/", "", $headline );
4526  }
4527 
4528  if ( $toclevel ) {
4529  $prevlevel = $level;
4530  }
4531  $level = $matches[1][$headlineCount];
4532 
4533  if ( $level > $prevlevel ) {
4534  # Increase TOC level
4535  $toclevel++;
4536  $sublevelCount[$toclevel] = 0;
4537  if ( $toclevel < $wgMaxTocLevel ) {
4538  $prevtoclevel = $toclevel;
4539  $toc .= Linker::tocIndent();
4540  $numVisible++;
4541  }
4542  } elseif ( $level < $prevlevel && $toclevel > 1 ) {
4543  # Decrease TOC level, find level to jump to
4544 
4545  for ( $i = $toclevel; $i > 0; $i-- ) {
4546  if ( $levelCount[$i] == $level ) {
4547  # Found last matching level
4548  $toclevel = $i;
4549  break;
4550  } elseif ( $levelCount[$i] < $level ) {
4551  # Found first matching level below current level
4552  $toclevel = $i + 1;
4553  break;
4554  }
4555  }
4556  if ( $i == 0 ) {
4557  $toclevel = 1;
4558  }
4559  if ( $toclevel < $wgMaxTocLevel ) {
4560  if ( $prevtoclevel < $wgMaxTocLevel ) {
4561  # Unindent only if the previous toc level was shown :p
4562  $toc .= Linker::tocUnindent( $prevtoclevel - $toclevel );
4563  $prevtoclevel = $toclevel;
4564  } else {
4565  $toc .= Linker::tocLineEnd();
4566  }
4567  }
4568  } else {
4569  # No change in level, end TOC line
4570  if ( $toclevel < $wgMaxTocLevel ) {
4571  $toc .= Linker::tocLineEnd();
4572  }
4573  }
4574 
4575  $levelCount[$toclevel] = $level;
4576 
4577  # count number of headlines for each level
4578  $sublevelCount[$toclevel]++;
4579  $dot = 0;
4580  for ( $i = 1; $i <= $toclevel; $i++ ) {
4581  if ( !empty( $sublevelCount[$i] ) ) {
4582  if ( $dot ) {
4583  $numbering .= '.';
4584  }
4585  $numbering .= $this->getTargetLanguage()->formatNum( $sublevelCount[$i] );
4586  $dot = 1;
4587  }
4588  }
4589 
4590  # The safe header is a version of the header text safe to use for links
4591 
4592  # Remove link placeholders by the link text.
4593  # <!--LINK number-->
4594  # turns into
4595  # link text with suffix
4596  # Do this before unstrip since link text can contain strip markers
4597  $safeHeadline = $this->replaceLinkHoldersText( $headline );
4598 
4599  # Avoid insertion of weird stuff like <math> by expanding the relevant sections
4600  $safeHeadline = $this->mStripState->unstripBoth( $safeHeadline );
4601 
4602  # Strip out HTML (first regex removes any tag not allowed)
4603  # Allowed tags are:
4604  # * <sup> and <sub> (bug 8393)
4605  # * <i> (bug 26375)
4606  # * <b> (r105284)
4607  # * <bdi> (bug 72884)
4608  # * <span dir="rtl"> and <span dir="ltr"> (bug 35167)
4609  # We strip any parameter from accepted tags (second regex), except dir="rtl|ltr" from <span>,
4610  # to allow setting directionality in toc items.
4611  $tocline = preg_replace(
4612  [
4613  '#<(?!/?(span|sup|sub|bdi|i|b)(?: [^>]*)?>).*?>#',
4614  '#<(/?(?:span(?: dir="(?:rtl|ltr)")?|sup|sub|bdi|i|b))(?: .*?)?>#'
4615  ],
4616  [ '', '<$1>' ],
4617  $safeHeadline
4618  );
4619 
4620  # Strip '<span></span>', which is the result from the above if
4621  # <span id="foo"></span> is used to produce an additional anchor
4622  # for a section.
4623  $tocline = str_replace( '<span></span>', '', $tocline );
4624 
4625  $tocline = trim( $tocline );
4626 
4627  # For the anchor, strip out HTML-y stuff period
4628  $safeHeadline = preg_replace( '/<.*?>/', '', $safeHeadline );
4629  $safeHeadline = Sanitizer::normalizeSectionNameWhitespace( $safeHeadline );
4630 
4631  # Save headline for section edit hint before it's escaped
4632  $headlineHint = $safeHeadline;
4633 
4634  if ( $wgExperimentalHtmlIds ) {
4635  # For reverse compatibility, provide an id that's
4636  # HTML4-compatible, like we used to.
4637  # It may be worth noting, academically, that it's possible for
4638  # the legacy anchor to conflict with a non-legacy headline
4639  # anchor on the page. In this case likely the "correct" thing
4640  # would be to either drop the legacy anchors or make sure
4641  # they're numbered first. However, this would require people
4642  # to type in section names like "abc_.D7.93.D7.90.D7.A4"
4643  # manually, so let's not bother worrying about it.
4644  $legacyHeadline = Sanitizer::escapeId( $safeHeadline,
4645  [ 'noninitial', 'legacy' ] );
4646  $safeHeadline = Sanitizer::escapeId( $safeHeadline );
4647 
4648  if ( $legacyHeadline == $safeHeadline ) {
4649  # No reason to have both (in fact, we can't)
4650  $legacyHeadline = false;
4651  }
4652  } else {
4653  $legacyHeadline = false;
4654  $safeHeadline = Sanitizer::escapeId( $safeHeadline,
4655  'noninitial' );
4656  }
4657 
4658  # HTML names must be case-insensitively unique (bug 10721).
4659  # This does not apply to Unicode characters per
4660  # http://www.w3.org/TR/html5/infrastructure.html#case-sensitivity-and-string-comparison
4661  # @todo FIXME: We may be changing them depending on the current locale.
4662  $arrayKey = strtolower( $safeHeadline );
4663  if ( $legacyHeadline === false ) {
4664  $legacyArrayKey = false;
4665  } else {
4666  $legacyArrayKey = strtolower( $legacyHeadline );
4667  }
4668 
4669  # Create the anchor for linking from the TOC to the section
4670  $anchor = $safeHeadline;
4671  $legacyAnchor = $legacyHeadline;
4672  if ( isset( $refers[$arrayKey] ) ) {
4673  // @codingStandardsIgnoreStart
4674  for ( $i = 2; isset( $refers["${arrayKey}_$i"] ); ++$i );
4675  // @codingStandardsIgnoreEnd
4676  $anchor .= "_$i";
4677  $refers["${arrayKey}_$i"] = true;
4678  } else {
4679  $refers[$arrayKey] = true;
4680  }
4681  if ( $legacyHeadline !== false && isset( $refers[$legacyArrayKey] ) ) {
4682  // @codingStandardsIgnoreStart
4683  for ( $i = 2; isset( $refers["${legacyArrayKey}_$i"] ); ++$i );
4684  // @codingStandardsIgnoreEnd
4685  $legacyAnchor .= "_$i";
4686  $refers["${legacyArrayKey}_$i"] = true;
4687  } else {
4688  $refers[$legacyArrayKey] = true;
4689  }
4690 
4691  # Don't number the heading if it is the only one (looks silly)
4692  if ( count( $matches[3] ) > 1 && $this->mOptions->getNumberHeadings() ) {
4693  # the two are different if the line contains a link
4694  $headline = Html::element(
4695  'span',
4696  [ 'class' => 'mw-headline-number' ],
4697  $numbering
4698  ) . ' ' . $headline;
4699  }
4700 
4701  if ( $enoughToc && ( !isset( $wgMaxTocLevel ) || $toclevel < $wgMaxTocLevel ) ) {
4702  $toc .= Linker::tocLine( $anchor, $tocline,
4703  $numbering, $toclevel, ( $isTemplate ? false : $sectionIndex ) );
4704  }
4705 
4706  # Add the section to the section tree
4707  # Find the DOM node for this header
4708  $noOffset = ( $isTemplate || $sectionIndex === false );
4709  while ( $node && !$noOffset ) {
4710  if ( $node->getName() === 'h' ) {
4711  $bits = $node->splitHeading();
4712  if ( $bits['i'] == $sectionIndex ) {
4713  break;
4714  }
4715  }
4716  $byteOffset += mb_strlen( $this->mStripState->unstripBoth(
4717  $frame->expand( $node, PPFrame::RECOVER_ORIG ) ) );
4718  $node = $node->getNextSibling();
4719  }
4720  $tocraw[] = [
4721  'toclevel' => $toclevel,
4722  'level' => $level,
4723  'line' => $tocline,
4724  'number' => $numbering,
4725  'index' => ( $isTemplate ? 'T-' : '' ) . $sectionIndex,
4726  'fromtitle' => $titleText,
4727  'byteoffset' => ( $noOffset ? null : $byteOffset ),
4728  'anchor' => $anchor,
4729  ];
4730 
4731  # give headline the correct <h#> tag
4732  if ( $maybeShowEditLink && $sectionIndex !== false ) {
4733  // Output edit section links as markers with styles that can be customized by skins
4734  if ( $isTemplate ) {
4735  # Put a T flag in the section identifier, to indicate to extractSections()
4736  # that sections inside <includeonly> should be counted.
4737  $editsectionPage = $titleText;
4738  $editsectionSection = "T-$sectionIndex";
4739  $editsectionContent = null;
4740  } else {
4741  $editsectionPage = $this->mTitle->getPrefixedText();
4742  $editsectionSection = $sectionIndex;
4743  $editsectionContent = $headlineHint;
4744  }
4745  // We use a bit of pesudo-xml for editsection markers. The
4746  // language converter is run later on. Using a UNIQ style marker
4747  // leads to the converter screwing up the tokens when it
4748  // converts stuff. And trying to insert strip tags fails too. At
4749  // this point all real inputted tags have already been escaped,
4750  // so we don't have to worry about a user trying to input one of
4751  // these markers directly. We use a page and section attribute
4752  // to stop the language converter from converting these
4753  // important bits of data, but put the headline hint inside a
4754  // content block because the language converter is supposed to
4755  // be able to convert that piece of data.
4756  // Gets replaced with html in ParserOutput::getText
4757  $editlink = '<mw:editsection page="' . htmlspecialchars( $editsectionPage );
4758  $editlink .= '" section="' . htmlspecialchars( $editsectionSection ) . '"';
4759  if ( $editsectionContent !== null ) {
4760  $editlink .= '>' . $editsectionContent . '</mw:editsection>';
4761  } else {
4762  $editlink .= '/>';
4763  }
4764  } else {
4765  $editlink = '';
4766  }
4767  $head[$headlineCount] = Linker::makeHeadline( $level,
4768  $matches['attrib'][$headlineCount], $anchor, $headline,
4769  $editlink, $legacyAnchor );
4770 
4771  $headlineCount++;
4772  }
4773 
4774  $this->setOutputType( $oldType );
4775 
4776  # Never ever show TOC if no headers
4777  if ( $numVisible < 1 ) {
4778  $enoughToc = false;
4779  }
4780 
4781  if ( $enoughToc ) {
4782  if ( $prevtoclevel > 0 && $prevtoclevel < $wgMaxTocLevel ) {
4783  $toc .= Linker::tocUnindent( $prevtoclevel - 1 );
4784  }
4785  $toc = Linker::tocList( $toc, $this->mOptions->getUserLangObj() );
4786  $this->mOutput->setTOCHTML( $toc );
4787  $toc = self::TOC_START . $toc . self::TOC_END;
4788  $this->mOutput->addModules( 'mediawiki.toc' );
4789  }
4790 
4791  if ( $isMain ) {
4792  $this->mOutput->setSections( $tocraw );
4793  }
4794 
4795  # split up and insert constructed headlines
4796  $blocks = preg_split( '/<H[1-6].*?>[\s\S]*?<\/H[1-6]>/i', $text );
4797  $i = 0;
4798 
4799  // build an array of document sections
4800  $sections = [];
4801  foreach ( $blocks as $block ) {
4802  // $head is zero-based, sections aren't.
4803  if ( empty( $head[$i - 1] ) ) {
4804  $sections[$i] = $block;
4805  } else {
4806  $sections[$i] = $head[$i - 1] . $block;
4807  }
4808 
4819  Hooks::run( 'ParserSectionCreate', [ $this, $i, &$sections[$i], $showEditLink ] );
4820 
4821  $i++;
4822  }
4823 
4824  if ( $enoughToc && $isMain && !$this->mForceTocPosition ) {
4825  // append the TOC at the beginning
4826  // Top anchor now in skin
4827  $sections[0] = $sections[0] . $toc . "\n";
4828  }
4829 
4830  $full .= implode( '', $sections );
4831 
4832  if ( $this->mForceTocPosition ) {
4833  return str_replace( '<!--MWTOC-->', $toc, $full );
4834  } else {
4835  return $full;
4836  }
4837  }
4838 
4850  public function preSaveTransform( $text, Title $title, User $user,
4851  ParserOptions $options, $clearState = true
4852  ) {
4853  if ( $clearState ) {
4854  $magicScopeVariable = $this->lock();
4855  }
4856  $this->startParse( $title, $options, self::OT_WIKI, $clearState );
4857  $this->setUser( $user );
4858 
4859  $pairs = [
4860  "\r\n" => "\n",
4861  "\r" => "\n",
4862  ];
4863  $text = str_replace( array_keys( $pairs ), array_values( $pairs ), $text );
4864  if ( $options->getPreSaveTransform() ) {
4865  $text = $this->pstPass2( $text, $user );
4866  }
4867  $text = $this->mStripState->unstripBoth( $text );
4868 
4869  $this->setUser( null ); # Reset
4870 
4871  return $text;
4872  }
4873 
4882  private function pstPass2( $text, $user ) {
4884 
4885  # Note: This is the timestamp saved as hardcoded wikitext to
4886  # the database, we use $wgContLang here in order to give
4887  # everyone the same signature and use the default one rather
4888  # than the one selected in each user's preferences.
4889  # (see also bug 12815)
4890  $ts = $this->mOptions->getTimestamp();
4892  $ts = $timestamp->format( 'YmdHis' );
4893  $tzMsg = $timestamp->getTimezoneMessage()->inContentLanguage()->text();
4894 
4895  $d = $wgContLang->timeanddate( $ts, false, false ) . " ($tzMsg)";
4896 
4897  # Variable replacement
4898  # Because mOutputType is OT_WIKI, this will only process {{subst:xxx}} type tags
4899  $text = $this->replaceVariables( $text );
4900 
4901  # This works almost by chance, as the replaceVariables are done before the getUserSig(),
4902  # which may corrupt this parser instance via its wfMessage()->text() call-
4903 
4904  # Signatures
4905  $sigText = $this->getUserSig( $user );
4906  $text = strtr( $text, [
4907  '~~~~~' => $d,
4908  '~~~~' => "$sigText $d",
4909  '~~~' => $sigText
4910  ] );
4911 
4912  # Context links ("pipe tricks"): [[|name]] and [[name (context)|]]
4913  $tc = '[' . Title::legalChars() . ']';
4914  $nc = '[ _0-9A-Za-z\x80-\xff-]'; # Namespaces can use non-ascii!
4915 
4916  // [[ns:page (context)|]]
4917  $p1 = "/\[\[(:?$nc+:|:|)($tc+?)( ?\\($tc+\\))\\|]]/";
4918  // [[ns:page(context)|]] (double-width brackets, added in r40257)
4919  $p4 = "/\[\[(:?$nc+:|:|)($tc+?)( ?($tc+))\\|]]/";
4920  // [[ns:page (context), context|]] (using either single or double-width comma)
4921  $p3 = "/\[\[(:?$nc+:|:|)($tc+?)( ?\\($tc+\\)|)((?:, |,)$tc+|)\\|]]/";
4922  // [[|page]] (reverse pipe trick: add context from page title)
4923  $p2 = "/\[\[\\|($tc+)]]/";
4924 
4925  # try $p1 first, to turn "[[A, B (C)|]]" into "[[A, B (C)|A, B]]"
4926  $text = preg_replace( $p1, '[[\\1\\2\\3|\\2]]', $text );
4927  $text = preg_replace( $p4, '[[\\1\\2\\3|\\2]]', $text );
4928  $text = preg_replace( $p3, '[[\\1\\2\\3\\4|\\2]]', $text );
4929 
4930  $t = $this->mTitle->getText();
4931  $m = [];
4932  if ( preg_match( "/^($nc+:|)$tc+?( \\($tc+\\))$/", $t, $m ) ) {
4933  $text = preg_replace( $p2, "[[$m[1]\\1$m[2]|\\1]]", $text );
4934  } elseif ( preg_match( "/^($nc+:|)$tc+?(, $tc+|)$/", $t, $m ) && "$m[1]$m[2]" != '' ) {
4935  $text = preg_replace( $p2, "[[$m[1]\\1$m[2]|\\1]]", $text );
4936  } else {
4937  # if there's no context, don't bother duplicating the title
4938  $text = preg_replace( $p2, '[[\\1]]', $text );
4939  }
4940 
4941  # Trim trailing whitespace
4942  $text = rtrim( $text );
4943 
4944  return $text;
4945  }
4946 
4961  public function getUserSig( &$user, $nickname = false, $fancySig = null ) {
4962  global $wgMaxSigChars;
4963 
4964  $username = $user->getName();
4965 
4966  # If not given, retrieve from the user object.
4967  if ( $nickname === false ) {
4968  $nickname = $user->getOption( 'nickname' );
4969  }
4970 
4971  if ( is_null( $fancySig ) ) {
4972  $fancySig = $user->getBoolOption( 'fancysig' );
4973  }
4974 
4975  $nickname = $nickname == null ? $username : $nickname;
4976 
4977  if ( mb_strlen( $nickname ) > $wgMaxSigChars ) {
4978  $nickname = $username;
4979  wfDebug( __METHOD__ . ": $username has overlong signature.\n" );
4980  } elseif ( $fancySig !== false ) {
4981  # Sig. might contain markup; validate this
4982  if ( $this->validateSig( $nickname ) !== false ) {
4983  # Validated; clean up (if needed) and return it
4984  return $this->cleanSig( $nickname, true );
4985  } else {
4986  # Failed to validate; fall back to the default
4987  $nickname = $username;
4988  wfDebug( __METHOD__ . ": $username has bad XML tags in signature.\n" );
4989  }
4990  }
4991 
4992  # Make sure nickname doesnt get a sig in a sig
4993  $nickname = self::cleanSigInSig( $nickname );
4994 
4995  # If we're still here, make it a link to the user page
4996  $userText = wfEscapeWikiText( $username );
4997  $nickText = wfEscapeWikiText( $nickname );
4998  $msgName = $user->isAnon() ? 'signature-anon' : 'signature';
4999 
5000  return wfMessage( $msgName, $userText, $nickText )->inContentLanguage()
5001  ->title( $this->getTitle() )->text();
5002  }
5003 
5010  public function validateSig( $text ) {
5011  return Xml::isWellFormedXmlFragment( $text ) ? $text : false;
5012  }
5013 
5024  public function cleanSig( $text, $parsing = false ) {
5025  if ( !$parsing ) {
5026  global $wgTitle;
5027  $magicScopeVariable = $this->lock();
5028  $this->startParse( $wgTitle, new ParserOptions, self::OT_PREPROCESS, true );
5029  }
5030 
5031  # Option to disable this feature
5032  if ( !$this->mOptions->getCleanSignatures() ) {
5033  return $text;
5034  }
5035 
5036  # @todo FIXME: Regex doesn't respect extension tags or nowiki
5037  # => Move this logic to braceSubstitution()
5038  $substWord = MagicWord::get( 'subst' );
5039  $substRegex = '/\{\{(?!(?:' . $substWord->getBaseRegex() . '))/x' . $substWord->getRegexCase();
5040  $substText = '{{' . $substWord->getSynonym( 0 );
5041 
5042  $text = preg_replace( $substRegex, $substText, $text );
5043  $text = self::cleanSigInSig( $text );
5044  $dom = $this->preprocessToDom( $text );
5045  $frame = $this->getPreprocessor()->newFrame();
5046  $text = $frame->expand( $dom );
5047 
5048  if ( !$parsing ) {
5049  $text = $this->mStripState->unstripBoth( $text );
5050  }
5051 
5052  return $text;
5053  }
5054 
5061  public static function cleanSigInSig( $text ) {
5062  $text = preg_replace( '/~{3,5}/', '', $text );
5063  return $text;
5064  }
5065 
5076  $outputType, $clearState = true
5077  ) {
5078  $this->startParse( $title, $options, $outputType, $clearState );
5079  }
5080 
5087  private function startParse( Title $title = null, ParserOptions $options,
5088  $outputType, $clearState = true
5089  ) {
5090  $this->setTitle( $title );
5091  $this->mOptions = $options;
5092  $this->setOutputType( $outputType );
5093  if ( $clearState ) {
5094  $this->clearState();
5095  }
5096  }
5097 
5106  public function transformMsg( $text, $options, $title = null ) {
5107  static $executing = false;
5108 
5109  # Guard against infinite recursion
5110  if ( $executing ) {
5111  return $text;
5112  }
5113  $executing = true;
5114 
5115  if ( !$title ) {
5116  global $wgTitle;
5117  $title = $wgTitle;
5118  }
5119 
5120  $text = $this->preprocess( $text, $title, $options );
5121 
5122  $executing = false;
5123  return $text;
5124  }
5125 
5150  public function setHook( $tag, $callback ) {
5151  $tag = strtolower( $tag );
5152  if ( preg_match( '/[<>\r\n]/', $tag, $m ) ) {
5153  throw new MWException( "Invalid character {$m[0]} in setHook('$tag', ...) call" );
5154  }
5155  $oldVal = isset( $this->mTagHooks[$tag] ) ? $this->mTagHooks[$tag] : null;
5156  $this->mTagHooks[$tag] = $callback;
5157  if ( !in_array( $tag, $this->mStripList ) ) {
5158  $this->mStripList[] = $tag;
5159  }
5160 
5161  return $oldVal;
5162  }
5163 
5181  public function setTransparentTagHook( $tag, $callback ) {
5182  $tag = strtolower( $tag );
5183  if ( preg_match( '/[<>\r\n]/', $tag, $m ) ) {
5184  throw new MWException( "Invalid character {$m[0]} in setTransparentHook('$tag', ...) call" );
5185  }
5186  $oldVal = isset( $this->mTransparentTagHooks[$tag] ) ? $this->mTransparentTagHooks[$tag] : null;
5187  $this->mTransparentTagHooks[$tag] = $callback;
5188 
5189  return $oldVal;
5190  }
5191 
5195  public function clearTagHooks() {
5196  $this->mTagHooks = [];
5197  $this->mFunctionTagHooks = [];
5198  $this->mStripList = $this->mDefaultStripList;
5199  }
5200 
5244  public function setFunctionHook( $id, $callback, $flags = 0 ) {
5246 
5247  $oldVal = isset( $this->mFunctionHooks[$id] ) ? $this->mFunctionHooks[$id][0] : null;
5248  $this->mFunctionHooks[$id] = [ $callback, $flags ];
5249 
5250  # Add to function cache
5251  $mw = MagicWord::get( $id );
5252  if ( !$mw ) {
5253  throw new MWException( __METHOD__ . '() expecting a magic word identifier.' );
5254  }
5255 
5256  $synonyms = $mw->getSynonyms();
5257  $sensitive = intval( $mw->isCaseSensitive() );
5258 
5259  foreach ( $synonyms as $syn ) {
5260  # Case
5261  if ( !$sensitive ) {
5262  $syn = $wgContLang->lc( $syn );
5263  }
5264  # Add leading hash
5265  if ( !( $flags & self::SFH_NO_HASH ) ) {
5266  $syn = '#' . $syn;
5267  }
5268  # Remove trailing colon
5269  if ( substr( $syn, -1, 1 ) === ':' ) {
5270  $syn = substr( $syn, 0, -1 );
5271  }
5272  $this->mFunctionSynonyms[$sensitive][$syn] = $id;
5273  }
5274  return $oldVal;
5275  }
5276 
5282  public function getFunctionHooks() {
5283  return array_keys( $this->mFunctionHooks );
5284  }
5285 
5296  public function setFunctionTagHook( $tag, $callback, $flags ) {
5297  $tag = strtolower( $tag );
5298  if ( preg_match( '/[<>\r\n]/', $tag, $m ) ) {
5299  throw new MWException( "Invalid character {$m[0]} in setFunctionTagHook('$tag', ...) call" );
5300  }
5301  $old = isset( $this->mFunctionTagHooks[$tag] ) ?
5302  $this->mFunctionTagHooks[$tag] : null;
5303  $this->mFunctionTagHooks[$tag] = [ $callback, $flags ];
5304 
5305  if ( !in_array( $tag, $this->mStripList ) ) {
5306  $this->mStripList[] = $tag;
5307  }
5308 
5309  return $old;
5310  }
5311 
5319  public function replaceLinkHolders( &$text, $options = 0 ) {
5320  $this->mLinkHolders->replace( $text );
5321  }
5322 
5330  public function replaceLinkHoldersText( $text ) {
5331  return $this->mLinkHolders->replaceText( $text );
5332  }
5333 
5347  public function renderImageGallery( $text, $params ) {
5348 
5349  $mode = false;
5350  if ( isset( $params['mode'] ) ) {
5351  $mode = $params['mode'];
5352  }
5353 
5354  try {
5355  $ig = ImageGalleryBase::factory( $mode );
5356  } catch ( Exception $e ) {
5357  // If invalid type set, fallback to default.
5358  $ig = ImageGalleryBase::factory( false );
5359  }
5360 
5361  $ig->setContextTitle( $this->mTitle );
5362  $ig->setShowBytes( false );
5363  $ig->setShowFilename( false );
5364  $ig->setParser( $this );
5365  $ig->setHideBadImages();
5366  $ig->setAttributes( Sanitizer::validateTagAttributes( $params, 'table' ) );
5367 
5368  if ( isset( $params['showfilename'] ) ) {
5369  $ig->setShowFilename( true );
5370  } else {
5371  $ig->setShowFilename( false );
5372  }
5373  if ( isset( $params['caption'] ) ) {
5374  $caption = $params['caption'];
5375  $caption = htmlspecialchars( $caption );
5376  $caption = $this->replaceInternalLinks( $caption );
5377  $ig->setCaptionHtml( $caption );
5378  }
5379  if ( isset( $params['perrow'] ) ) {
5380  $ig->setPerRow( $params['perrow'] );
5381  }
5382  if ( isset( $params['widths'] ) ) {
5383  $ig->setWidths( $params['widths'] );
5384  }
5385  if ( isset( $params['heights'] ) ) {
5386  $ig->setHeights( $params['heights'] );
5387  }
5388  $ig->setAdditionalOptions( $params );
5389 
5390  // Avoid PHP 7.1 warning from passing $this by reference
5391  $parser = $this;
5392  Hooks::run( 'BeforeParserrenderImageGallery', [ &$parser, &$ig ] );
5393 
5394  $lines = StringUtils::explode( "\n", $text );
5395  foreach ( $lines as $line ) {
5396  # match lines like these:
5397  # Image:someimage.jpg|This is some image
5398  $matches = [];
5399  preg_match( "/^([^|]+)(\\|(.*))?$/", $line, $matches );
5400  # Skip empty lines
5401  if ( count( $matches ) == 0 ) {
5402  continue;
5403  }
5404 
5405  if ( strpos( $matches[0], '%' ) !== false ) {
5406  $matches[1] = rawurldecode( $matches[1] );
5407  }
5409  if ( is_null( $title ) ) {
5410  # Bogus title. Ignore these so we don't bomb out later.
5411  continue;
5412  }
5413 
5414  # We need to get what handler the file uses, to figure out parameters.
5415  # Note, a hook can overide the file name, and chose an entirely different
5416  # file (which potentially could be of a different type and have different handler).
5417  $options = [];
5418  $descQuery = false;
5419  Hooks::run( 'BeforeParserFetchFileAndTitle',
5420  [ $this, $title, &$options, &$descQuery ] );
5421  # Don't register it now, as ImageGallery does that later.
5422  $file = $this->fetchFileNoRegister( $title, $options );
5423  $handler = $file ? $file->getHandler() : false;
5424 
5425  $paramMap = [
5426  'img_alt' => 'gallery-internal-alt',
5427  'img_link' => 'gallery-internal-link',
5428  ];
5429  if ( $handler ) {
5430  $paramMap = $paramMap + $handler->getParamMap();
5431  // We don't want people to specify per-image widths.
5432  // Additionally the width parameter would need special casing anyhow.
5433  unset( $paramMap['img_width'] );
5434  }
5435 
5436  $mwArray = new MagicWordArray( array_keys( $paramMap ) );
5437 
5438  $label = '';
5439  $alt = '';
5440  $link = '';
5441  $handlerOptions = [];
5442  if ( isset( $matches[3] ) ) {
5443  // look for an |alt= definition while trying not to break existing
5444  // captions with multiple pipes (|) in it, until a more sensible grammar
5445  // is defined for images in galleries
5446 
5447  // FIXME: Doing recursiveTagParse at this stage, and the trim before
5448  // splitting on '|' is a bit odd, and different from makeImage.
5449  $matches[3] = $this->recursiveTagParse( trim( $matches[3] ) );
5450  $parameterMatches = StringUtils::explode( '|', $matches[3] );
5451 
5452  foreach ( $parameterMatches as $parameterMatch ) {
5453  list( $magicName, $match ) = $mwArray->matchVariableStartToEnd( $parameterMatch );
5454  if ( $magicName ) {
5455  $paramName = $paramMap[$magicName];
5456 
5457  switch ( $paramName ) {
5458  case 'gallery-internal-alt':
5459  $alt = $this->stripAltText( $match, false );
5460  break;
5461  case 'gallery-internal-link':
5462  $linkValue = strip_tags( $this->replaceLinkHoldersText( $match ) );
5463  $chars = self::EXT_LINK_URL_CLASS;
5464  $addr = self::EXT_LINK_ADDR;
5465  $prots = $this->mUrlProtocols;
5466  // check to see if link matches an absolute url, if not then it must be a wiki link.
5467  if ( preg_match( "/^($prots)$addr$chars*$/u", $linkValue ) ) {
5468  $link = $linkValue;
5469  $this->mOutput->addExternalLink( $link );
5470  } else {
5471  $localLinkTitle = Title::newFromText( $linkValue );
5472  if ( $localLinkTitle !== null ) {
5473  $this->mOutput->addLink( $localLinkTitle );
5474  $link = $localLinkTitle->getLinkURL();
5475  }
5476  }
5477  break;
5478  default:
5479  // Must be a handler specific parameter.
5480  if ( $handler->validateParam( $paramName, $match ) ) {
5481  $handlerOptions[$paramName] = $match;
5482  } else {
5483  // Guess not, consider it as caption.
5484  wfDebug( "$parameterMatch failed parameter validation\n" );
5485  $label = '|' . $parameterMatch;
5486  }
5487  }
5488 
5489  } else {
5490  // Last pipe wins.
5491  $label = '|' . $parameterMatch;
5492  }
5493  }
5494  // Remove the pipe.
5495  $label = substr( $label, 1 );
5496  }
5497 
5498  $ig->add( $title, $label, $alt, $link, $handlerOptions );
5499  }
5500  $html = $ig->toHTML();
5501  Hooks::run( 'AfterParserFetchFileAndTitle', [ $this, $ig, &$html ] );
5502  return $html;
5503  }
5504 
5509  public function getImageParams( $handler ) {
5510  if ( $handler ) {
5511  $handlerClass = get_class( $handler );
5512  } else {
5513  $handlerClass = '';
5514  }
5515  if ( !isset( $this->mImageParams[$handlerClass] ) ) {
5516  # Initialise static lists
5517  static $internalParamNames = [
5518  'horizAlign' => [ 'left', 'right', 'center', 'none' ],
5519  'vertAlign' => [ 'baseline', 'sub', 'super', 'top', 'text-top', 'middle',
5520  'bottom', 'text-bottom' ],
5521  'frame' => [ 'thumbnail', 'manualthumb', 'framed', 'frameless',
5522  'upright', 'border', 'link', 'alt', 'class' ],
5523  ];
5524  static $internalParamMap;
5525  if ( !$internalParamMap ) {
5526  $internalParamMap = [];
5527  foreach ( $internalParamNames as $type => $names ) {
5528  foreach ( $names as $name ) {
5529  $magicName = str_replace( '-', '_', "img_$name" );
5530  $internalParamMap[$magicName] = [ $type, $name ];
5531  }
5532  }
5533  }
5534 
5535  # Add handler params
5536  $paramMap = $internalParamMap;
5537  if ( $handler ) {
5538  $handlerParamMap = $handler->getParamMap();
5539  foreach ( $handlerParamMap as $magic => $paramName ) {
5540  $paramMap[$magic] = [ 'handler', $paramName ];
5541  }
5542  }
5543  $this->mImageParams[$handlerClass] = $paramMap;
5544  $this->mImageParamsMagicArray[$handlerClass] = new MagicWordArray( array_keys( $paramMap ) );
5545  }
5546  return [ $this->mImageParams[$handlerClass], $this->mImageParamsMagicArray[$handlerClass] ];
5547  }
5548 
5557  public function makeImage( $title, $options, $holders = false ) {
5558  # Check if the options text is of the form "options|alt text"
5559  # Options are:
5560  # * thumbnail make a thumbnail with enlarge-icon and caption, alignment depends on lang
5561  # * left no resizing, just left align. label is used for alt= only
5562  # * right same, but right aligned
5563  # * none same, but not aligned
5564  # * ___px scale to ___ pixels width, no aligning. e.g. use in taxobox
5565  # * center center the image
5566  # * frame Keep original image size, no magnify-button.
5567  # * framed Same as "frame"
5568  # * frameless like 'thumb' but without a frame. Keeps user preferences for width
5569  # * upright reduce width for upright images, rounded to full __0 px
5570  # * border draw a 1px border around the image
5571  # * alt Text for HTML alt attribute (defaults to empty)
5572  # * class Set a class for img node
5573  # * link Set the target of the image link. Can be external, interwiki, or local
5574  # vertical-align values (no % or length right now):
5575  # * baseline
5576  # * sub
5577  # * super
5578  # * top
5579  # * text-top
5580  # * middle
5581  # * bottom
5582  # * text-bottom
5583 
5584  $parts = StringUtils::explode( "|", $options );
5585 
5586  # Give extensions a chance to select the file revision for us
5587  $options = [];
5588  $descQuery = false;
5589  Hooks::run( 'BeforeParserFetchFileAndTitle',
5590  [ $this, $title, &$options, &$descQuery ] );
5591  # Fetch and register the file (file title may be different via hooks)
5592  list( $file, $title ) = $this->fetchFileAndTitle( $title, $options );
5593 
5594  # Get parameter map
5595  $handler = $file ? $file->getHandler() : false;
5596 
5597  list( $paramMap, $mwArray ) = $this->getImageParams( $handler );
5598 
5599  if ( !$file ) {
5600  $this->addTrackingCategory( 'broken-file-category' );
5601  }
5602 
5603  # Process the input parameters
5604  $caption = '';
5605  $params = [ 'frame' => [], 'handler' => [],
5606  'horizAlign' => [], 'vertAlign' => [] ];
5607  $seenformat = false;
5608  foreach ( $parts as $part ) {
5609  $part = trim( $part );
5610  list( $magicName, $value ) = $mwArray->matchVariableStartToEnd( $part );
5611  $validated = false;
5612  if ( isset( $paramMap[$magicName] ) ) {
5613  list( $type, $paramName ) = $paramMap[$magicName];
5614 
5615  # Special case; width and height come in one variable together
5616  if ( $type === 'handler' && $paramName === 'width' ) {
5617  $parsedWidthParam = $this->parseWidthParam( $value );
5618  if ( isset( $parsedWidthParam['width'] ) ) {
5619  $width = $parsedWidthParam['width'];
5620  if ( $handler->validateParam( 'width', $width ) ) {
5621  $params[$type]['width'] = $width;
5622  $validated = true;
5623  }
5624  }
5625  if ( isset( $parsedWidthParam['height'] ) ) {
5626  $height = $parsedWidthParam['height'];
5627  if ( $handler->validateParam( 'height', $height ) ) {
5628  $params[$type]['height'] = $height;
5629  $validated = true;
5630  }
5631  }
5632  # else no validation -- bug 13436
5633  } else {
5634  if ( $type === 'handler' ) {
5635  # Validate handler parameter
5636  $validated = $handler->validateParam( $paramName, $value );
5637  } else {
5638  # Validate internal parameters
5639  switch ( $paramName ) {
5640  case 'manualthumb':
5641  case 'alt':
5642  case 'class':
5643  # @todo FIXME: Possibly check validity here for
5644  # manualthumb? downstream behavior seems odd with
5645  # missing manual thumbs.
5646  $validated = true;
5647  $value = $this->stripAltText( $value, $holders );
5648  break;
5649  case 'link':
5650  $chars = self::EXT_LINK_URL_CLASS;
5651  $addr = self::EXT_LINK_ADDR;
5652  $prots = $this->mUrlProtocols;
5653  if ( $value === '' ) {
5654  $paramName = 'no-link';
5655  $value = true;
5656  $validated = true;
5657  } elseif ( preg_match( "/^((?i)$prots)/", $value ) ) {
5658  if ( preg_match( "/^((?i)$prots)$addr$chars*$/u", $value, $m ) ) {
5659  $paramName = 'link-url';
5660  $this->mOutput->addExternalLink( $value );
5661  if ( $this->mOptions->getExternalLinkTarget() ) {
5662  $params[$type]['link-target'] = $this->mOptions->getExternalLinkTarget();
5663  }
5664  $validated = true;
5665  }
5666  } else {
5667  $linkTitle = Title::newFromText( $value );
5668  if ( $linkTitle ) {
5669  $paramName = 'link-title';
5670  $value = $linkTitle;
5671  $this->mOutput->addLink( $linkTitle );
5672  $validated = true;
5673  }
5674  }
5675  break;
5676  case 'frameless':
5677  case 'framed':
5678  case 'thumbnail':
5679  // use first appearing option, discard others.
5680  $validated = ! $seenformat;
5681  $seenformat = true;
5682  break;
5683  default:
5684  # Most other things appear to be empty or numeric...
5685  $validated = ( $value === false || is_numeric( trim( $value ) ) );
5686  }
5687  }
5688 
5689  if ( $validated ) {
5690  $params[$type][$paramName] = $value;
5691  }
5692  }
5693  }
5694  if ( !$validated ) {
5695  $caption = $part;
5696  }
5697  }
5698 
5699  # Process alignment parameters
5700  if ( $params['horizAlign'] ) {
5701  $params['frame']['align'] = key( $params['horizAlign'] );
5702  }
5703  if ( $params['vertAlign'] ) {
5704  $params['frame']['valign'] = key( $params['vertAlign'] );
5705  }
5706 
5707  $params['frame']['caption'] = $caption;
5708 
5709  # Will the image be presented in a frame, with the caption below?
5710  $imageIsFramed = isset( $params['frame']['frame'] )
5711  || isset( $params['frame']['framed'] )
5712  || isset( $params['frame']['thumbnail'] )
5713  || isset( $params['frame']['manualthumb'] );
5714 
5715  # In the old days, [[Image:Foo|text...]] would set alt text. Later it
5716  # came to also set the caption, ordinary text after the image -- which
5717  # makes no sense, because that just repeats the text multiple times in
5718  # screen readers. It *also* came to set the title attribute.
5719  # Now that we have an alt attribute, we should not set the alt text to
5720  # equal the caption: that's worse than useless, it just repeats the
5721  # text. This is the framed/thumbnail case. If there's no caption, we
5722  # use the unnamed parameter for alt text as well, just for the time be-
5723  # ing, if the unnamed param is set and the alt param is not.
5724  # For the future, we need to figure out if we want to tweak this more,
5725  # e.g., introducing a title= parameter for the title; ignoring the un-
5726  # named parameter entirely for images without a caption; adding an ex-
5727  # plicit caption= parameter and preserving the old magic unnamed para-
5728  # meter for BC; ...
5729  if ( $imageIsFramed ) { # Framed image
5730  if ( $caption === '' && !isset( $params['frame']['alt'] ) ) {
5731  # No caption or alt text, add the filename as the alt text so
5732  # that screen readers at least get some description of the image
5733  $params['frame']['alt'] = $title->getText();
5734  }
5735  # Do not set $params['frame']['title'] because tooltips don't make sense
5736  # for framed images
5737  } else { # Inline image
5738  if ( !isset( $params['frame']['alt'] ) ) {
5739  # No alt text, use the "caption" for the alt text
5740  if ( $caption !== '' ) {
5741  $params['frame']['alt'] = $this->stripAltText( $caption, $holders );
5742  } else {
5743  # No caption, fall back to using the filename for the
5744  # alt text
5745  $params['frame']['alt'] = $title->getText();
5746  }
5747  }
5748  # Use the "caption" for the tooltip text
5749  $params['frame']['title'] = $this->stripAltText( $caption, $holders );
5750  }
5751 
5752  Hooks::run( 'ParserMakeImageParams', [ $title, $file, &$params, $this ] );
5753 
5754  # Linker does the rest
5755  $time = isset( $options['time'] ) ? $options['time'] : false;
5756  $ret = Linker::makeImageLink( $this, $title, $file, $params['frame'], $params['handler'],
5757  $time, $descQuery, $this->mOptions->getThumbSize() );
5758 
5759  # Give the handler a chance to modify the parser object
5760  if ( $handler ) {
5761  $handler->parserTransformHook( $this, $file );
5762  }
5763 
5764  return $ret;
5765  }
5766 
5772  protected function stripAltText( $caption, $holders ) {
5773  # Strip bad stuff out of the title (tooltip). We can't just use
5774  # replaceLinkHoldersText() here, because if this function is called
5775  # from replaceInternalLinks2(), mLinkHolders won't be up-to-date.
5776  if ( $holders ) {
5777  $tooltip = $holders->replaceText( $caption );
5778  } else {
5779  $tooltip = $this->replaceLinkHoldersText( $caption );
5780  }
5781 
5782  # make sure there are no placeholders in thumbnail attributes
5783  # that are later expanded to html- so expand them now and
5784  # remove the tags
5785  $tooltip = $this->mStripState->unstripBoth( $tooltip );
5786  $tooltip = Sanitizer::stripAllTags( $tooltip );
5787 
5788  return $tooltip;
5789  }
5790 
5795  public function disableCache() {
5796  wfDebug( "Parser output marked as uncacheable.\n" );
5797  if ( !$this->mOutput ) {
5798  throw new MWException( __METHOD__ .
5799  " can only be called when actually parsing something" );
5800  }
5801  $this->mOutput->updateCacheExpiry( 0 ); // new style, for consistency
5802  }
5803 
5812  public function attributeStripCallback( &$text, $frame = false ) {
5813  $text = $this->replaceVariables( $text, $frame );
5814  $text = $this->mStripState->unstripBoth( $text );
5815  return $text;
5816  }
5817 
5823  public function getTags() {
5824  return array_merge(
5825  array_keys( $this->mTransparentTagHooks ),
5826  array_keys( $this->mTagHooks ),
5827  array_keys( $this->mFunctionTagHooks )
5828  );
5829  }
5830 
5841  public function replaceTransparentTags( $text ) {
5842  $matches = [];
5843  $elements = array_keys( $this->mTransparentTagHooks );
5844  $text = self::extractTagsAndParams( $elements, $text, $matches );
5845  $replacements = [];
5846 
5847  foreach ( $matches as $marker => $data ) {
5848  list( $element, $content, $params, $tag ) = $data;
5849  $tagName = strtolower( $element );
5850  if ( isset( $this->mTransparentTagHooks[$tagName] ) ) {
5851  $output = call_user_func_array(
5852  $this->mTransparentTagHooks[$tagName],
5853  [ $content, $params, $this ]
5854  );
5855  } else {
5856  $output = $tag;
5857  }
5858  $replacements[$marker] = $output;
5859  }
5860  return strtr( $text, $replacements );
5861  }
5862 
5892  private function extractSections( $text, $sectionId, $mode, $newText = '' ) {
5893  global $wgTitle; # not generally used but removes an ugly failure mode
5894 
5895  $magicScopeVariable = $this->lock();
5896  $this->startParse( $wgTitle, new ParserOptions, self::OT_PLAIN, true );
5897  $outText = '';
5898  $frame = $this->getPreprocessor()->newFrame();
5899 
5900  # Process section extraction flags
5901  $flags = 0;
5902  $sectionParts = explode( '-', $sectionId );
5903  $sectionIndex = array_pop( $sectionParts );
5904  foreach ( $sectionParts as $part ) {
5905  if ( $part === 'T' ) {
5906  $flags |= self::PTD_FOR_INCLUSION;
5907  }
5908  }
5909 
5910  # Check for empty input
5911  if ( strval( $text ) === '' ) {
5912  # Only sections 0 and T-0 exist in an empty document
5913  if ( $sectionIndex == 0 ) {
5914  if ( $mode === 'get' ) {
5915  return '';
5916  } else {
5917  return $newText;
5918  }
5919  } else {
5920  if ( $mode === 'get' ) {
5921  return $newText;
5922  } else {
5923  return $text;
5924  }
5925  }
5926  }
5927 
5928  # Preprocess the text
5929  $root = $this->preprocessToDom( $text, $flags );
5930 
5931  # <h> nodes indicate section breaks
5932  # They can only occur at the top level, so we can find them by iterating the root's children
5933  $node = $root->getFirstChild();
5934 
5935  # Find the target section
5936  if ( $sectionIndex == 0 ) {
5937  # Section zero doesn't nest, level=big
5938  $targetLevel = 1000;
5939  } else {
5940  while ( $node ) {
5941  if ( $node->getName() === 'h' ) {
5942  $bits = $node->splitHeading();
5943  if ( $bits['i'] == $sectionIndex ) {
5944  $targetLevel = $bits['level'];
5945  break;
5946  }
5947  }
5948  if ( $mode === 'replace' ) {
5949  $outText .= $frame->expand( $node, PPFrame::RECOVER_ORIG );
5950  }
5951  $node = $node->getNextSibling();
5952  }
5953  }
5954 
5955  if ( !$node ) {
5956  # Not found
5957  if ( $mode === 'get' ) {
5958  return $newText;
5959  } else {
5960  return $text;
5961  }
5962  }
5963 
5964  # Find the end of the section, including nested sections
5965  do {
5966  if ( $node->getName() === 'h' ) {
5967  $bits = $node->splitHeading();
5968  $curLevel = $bits['level'];
5969  if ( $bits['i'] != $sectionIndex && $curLevel <= $targetLevel ) {
5970  break;
5971  }
5972  }
5973  if ( $mode === 'get' ) {
5974  $outText .= $frame->expand( $node, PPFrame::RECOVER_ORIG );
5975  }
5976  $node = $node->getNextSibling();
5977  } while ( $node );
5978 
5979  # Write out the remainder (in replace mode only)
5980  if ( $mode === 'replace' ) {
5981  # Output the replacement text
5982  # Add two newlines on -- trailing whitespace in $newText is conventionally
5983  # stripped by the editor, so we need both newlines to restore the paragraph gap
5984  # Only add trailing whitespace if there is newText
5985  if ( $newText != "" ) {
5986  $outText .= $newText . "\n\n";
5987  }
5988 
5989  while ( $node ) {
5990  $outText .= $frame->expand( $node, PPFrame::RECOVER_ORIG );
5991  $node = $node->getNextSibling();
5992  }
5993  }
5994 
5995  if ( is_string( $outText ) ) {
5996  # Re-insert stripped tags
5997  $outText = rtrim( $this->mStripState->unstripBoth( $outText ) );
5998  }
5999 
6000  return $outText;
6001  }
6002 
6017  public function getSection( $text, $sectionId, $defaultText = '' ) {
6018  return $this->extractSections( $text, $sectionId, 'get', $defaultText );
6019  }
6020 
6033  public function replaceSection( $oldText, $sectionId, $newText ) {
6034  return $this->extractSections( $oldText, $sectionId, 'replace', $newText );
6035  }
6036 
6042  public function getRevisionId() {
6043  return $this->mRevisionId;
6044  }
6045 
6052  public function getRevisionObject() {
6053  if ( !is_null( $this->mRevisionObject ) ) {
6054  return $this->mRevisionObject;
6055  }
6056  if ( is_null( $this->mRevisionId ) ) {
6057  return null;
6058  }
6059 
6060  $rev = call_user_func(
6061  $this->mOptions->getCurrentRevisionCallback(), $this->getTitle(), $this
6062  );
6063 
6064  # If the parse is for a new revision, then the callback should have
6065  # already been set to force the object and should match mRevisionId.
6066  # If not, try to fetch by mRevisionId for sanity.
6067  if ( $rev && $rev->getId() != $this->mRevisionId ) {
6068  $rev = Revision::newFromId( $this->mRevisionId );
6069  }
6070 
6071  $this->mRevisionObject = $rev;
6072 
6073  return $this->mRevisionObject;
6074  }
6075 
6081  public function getRevisionTimestamp() {
6082  if ( is_null( $this->mRevisionTimestamp ) ) {
6084 
6085  $revObject = $this->getRevisionObject();
6086  $timestamp = $revObject ? $revObject->getTimestamp() : wfTimestampNow();
6087 
6088  # The cryptic '' timezone parameter tells to use the site-default
6089  # timezone offset instead of the user settings.
6090  # Since this value will be saved into the parser cache, served
6091  # to other users, and potentially even used inside links and such,
6092  # it needs to be consistent for all visitors.
6093  $this->mRevisionTimestamp = $wgContLang->userAdjust( $timestamp, '' );
6094 
6095  }
6096  return $this->mRevisionTimestamp;
6097  }
6098 
6104  public function getRevisionUser() {
6105  if ( is_null( $this->mRevisionUser ) ) {
6106  $revObject = $this->getRevisionObject();
6107 
6108  # if this template is subst: the revision id will be blank,
6109  # so just use the current user's name
6110  if ( $revObject ) {
6111  $this->mRevisionUser = $revObject->getUserText();
6112  } elseif ( $this->ot['wiki'] || $this->mOptions->getIsPreview() ) {
6113  $this->mRevisionUser = $this->getUser()->getName();
6114  }
6115  }
6116  return $this->mRevisionUser;
6117  }
6118 
6124  public function getRevisionSize() {
6125  if ( is_null( $this->mRevisionSize ) ) {
6126  $revObject = $this->getRevisionObject();
6127 
6128  # if this variable is subst: the revision id will be blank,
6129  # so just use the parser input size, because the own substituation
6130  # will change the size.
6131  if ( $revObject ) {
6132  $this->mRevisionSize = $revObject->getSize();
6133  } elseif ( $this->ot['wiki'] || $this->mOptions->getIsPreview() ) {
6134  $this->mRevisionSize = $this->mInputSize;
6135  }
6136  }
6137  return $this->mRevisionSize;
6138  }
6139 
6145  public function setDefaultSort( $sort ) {
6146  $this->mDefaultSort = $sort;
6147  $this->mOutput->setProperty( 'defaultsort', $sort );
6148  }
6149 
6160  public function getDefaultSort() {
6161  if ( $this->mDefaultSort !== false ) {
6162  return $this->mDefaultSort;
6163  } else {
6164  return '';
6165  }
6166  }
6167 
6174  public function getCustomDefaultSort() {
6175  return $this->mDefaultSort;
6176  }
6177 
6187  public function guessSectionNameFromWikiText( $text ) {
6188  # Strip out wikitext links(they break the anchor)
6189  $text = $this->stripSectionName( $text );
6191  return '#' . Sanitizer::escapeId( $text, 'noninitial' );
6192  }
6193 
6202  public function guessLegacySectionNameFromWikiText( $text ) {
6203  # Strip out wikitext links(they break the anchor)
6204  $text = $this->stripSectionName( $text );
6206  return '#' . Sanitizer::escapeId( $text, [ 'noninitial', 'legacy' ] );
6207  }
6208 
6223  public function stripSectionName( $text ) {
6224  # Strip internal link markup
6225  $text = preg_replace( '/\[\[:?([^[|]+)\|([^[]+)\]\]/', '$2', $text );
6226  $text = preg_replace( '/\[\[:?([^[]+)\|?\]\]/', '$1', $text );
6227 
6228  # Strip external link markup
6229  # @todo FIXME: Not tolerant to blank link text
6230  # I.E. [https://www.mediawiki.org] will render as [1] or something depending
6231  # on how many empty links there are on the page - need to figure that out.
6232  $text = preg_replace( '/\[(?i:' . $this->mUrlProtocols . ')([^ ]+?) ([^[]+)\]/', '$2', $text );
6233 
6234  # Parse wikitext quotes (italics & bold)
6235  $text = $this->doQuotes( $text );
6236 
6237  # Strip HTML tags
6238  $text = StringUtils::delimiterReplace( '<', '>', '', $text );
6239  return $text;
6240  }
6241 
6252  public function testSrvus( $text, Title $title, ParserOptions $options,
6253  $outputType = self::OT_HTML
6254  ) {
6255  $magicScopeVariable = $this->lock();
6256  $this->startParse( $title, $options, $outputType, true );
6257 
6258  $text = $this->replaceVariables( $text );
6259  $text = $this->mStripState->unstripBoth( $text );
6260  $text = Sanitizer::removeHTMLtags( $text );
6261  return $text;
6262  }
6263 
6270  public function testPst( $text, Title $title, ParserOptions $options ) {
6271  return $this->preSaveTransform( $text, $title, $options->getUser(), $options );
6272  }
6273 
6280  public function testPreprocess( $text, Title $title, ParserOptions $options ) {
6281  return $this->testSrvus( $text, $title, $options, self::OT_PREPROCESS );
6282  }
6283 
6300  public function markerSkipCallback( $s, $callback ) {
6301  $i = 0;
6302  $out = '';
6303  while ( $i < strlen( $s ) ) {
6304  $markerStart = strpos( $s, self::MARKER_PREFIX, $i );
6305  if ( $markerStart === false ) {
6306  $out .= call_user_func( $callback, substr( $s, $i ) );
6307  break;
6308  } else {
6309  $out .= call_user_func( $callback, substr( $s, $i, $markerStart - $i ) );
6310  $markerEnd = strpos( $s, self::MARKER_SUFFIX, $markerStart );
6311  if ( $markerEnd === false ) {
6312  $out .= substr( $s, $markerStart );
6313  break;
6314  } else {
6315  $markerEnd += strlen( self::MARKER_SUFFIX );
6316  $out .= substr( $s, $markerStart, $markerEnd - $markerStart );
6317  $i = $markerEnd;
6318  }
6319  }
6320  }
6321  return $out;
6322  }
6323 
6330  public function killMarkers( $text ) {
6331  return $this->mStripState->killMarkers( $text );
6332  }
6333 
6350  public function serializeHalfParsedText( $text ) {
6351  $data = [
6352  'text' => $text,
6353  'version' => self::HALF_PARSED_VERSION,
6354  'stripState' => $this->mStripState->getSubState( $text ),
6355  'linkHolders' => $this->mLinkHolders->getSubArray( $text )
6356  ];
6357  return $data;
6358  }
6359 
6375  public function unserializeHalfParsedText( $data ) {
6376  if ( !isset( $data['version'] ) || $data['version'] != self::HALF_PARSED_VERSION ) {
6377  throw new MWException( __METHOD__ . ': invalid version' );
6378  }
6379 
6380  # First, extract the strip state.
6381  $texts = [ $data['text'] ];
6382  $texts = $this->mStripState->merge( $data['stripState'], $texts );
6383 
6384  # Now renumber links
6385  $texts = $this->mLinkHolders->mergeForeign( $data['linkHolders'], $texts );
6386 
6387  # Should be good to go.
6388  return $texts[0];
6389  }
6390 
6400  public function isValidHalfParsedText( $data ) {
6401  return isset( $data['version'] ) && $data['version'] == self::HALF_PARSED_VERSION;
6402  }
6403 
6412  public function parseWidthParam( $value ) {
6413  $parsedWidthParam = [];
6414  if ( $value === '' ) {
6415  return $parsedWidthParam;
6416  }
6417  $m = [];
6418  # (bug 13500) In both cases (width/height and width only),
6419  # permit trailing "px" for backward compatibility.
6420  if ( preg_match( '/^([0-9]*)x([0-9]*)\s*(?:px)?\s*$/', $value, $m ) ) {
6421  $width = intval( $m[1] );
6422  $height = intval( $m[2] );
6423  $parsedWidthParam['width'] = $width;
6424  $parsedWidthParam['height'] = $height;
6425  } elseif ( preg_match( '/^[0-9]*\s*(?:px)?\s*$/', $value ) ) {
6426  $width = intval( $value );
6427  $parsedWidthParam['width'] = $width;
6428  }
6429  return $parsedWidthParam;
6430  }
6431 
6441  protected function lock() {
6442  if ( $this->mInParse ) {
6443  throw new MWException( "Parser state cleared while parsing. "
6444  . "Did you call Parser::parse recursively?" );
6445  }
6446  $this->mInParse = true;
6447 
6448  $recursiveCheck = new ScopedCallback( function() {
6449  $this->mInParse = false;
6450  } );
6451 
6452  return $recursiveCheck;
6453  }
6454 
6465  public static function stripOuterParagraph( $html ) {
6466  $m = [];
6467  if ( preg_match( '/^<p>(.*)\n?<\/p>\n?$/sU', $html, $m ) ) {
6468  if ( strpos( $m[1], '</p>' ) === false ) {
6469  $html = $m[1];
6470  }
6471  }
6472 
6473  return $html;
6474  }
6475 
6486  public function getFreshParser() {
6487  global $wgParserConf;
6488  if ( $this->mInParse ) {
6489  return new $wgParserConf['class']( $wgParserConf );
6490  } else {
6491  return $this;
6492  }
6493  }
6494 
6501  public function enableOOUI() {
6503  $this->mOutput->setEnableOOUI( true );
6504  }
6505 }
getRevisionObject()
Get the revision object for $this->mRevisionId.
Definition: Parser.php:6052
setTitle($t)
Set the context title.
Definition: Parser.php:750
$mAutonumber
Definition: Parser.php:184
markerSkipCallback($s, $callback)
Call a callback function on all regions of the given text that are not inside strip markers...
Definition: Parser.php:6300
#define the
table suitable for use with IDatabase::select()
$mPPNodeCount
Definition: Parser.php:198
replaceInternalLinks2(&$s)
Process [[ ]] wikilinks (RIL)
Definition: Parser.php:2062
static getVariableIDs()
Get an array of parser variable IDs.
Definition: MagicWord.php:271
you don t have to do a grep find to see where the $wgReverseTitle variable is used
Definition: hooks.txt:117
const MARKER_PREFIX
Definition: Parser.php:141
deferred txt A few of the database updates required by various functions here can be deferred until after the result page is displayed to the user For updating the view updating the linked to tables after a etc PHP does not yet have any way to tell the server to actually return and disconnect while still running these but it might have such a feature in the future We handle these by creating a deferred update object and putting those objects on a global then executing the whole list after the page is displayed We don t do anything smart like collating updates to the same table or such because the list is almost always going to have just one item on if that
Definition: deferred.txt:11
external whereas SearchGetNearMatch runs after $term
Definition: hooks.txt:2562
isValidHalfParsedText($data)
Returns true if the given array, presumed to be generated by serializeHalfParsedText(), is compatible with the current version of the parser.
Definition: Parser.php:6400
null means default in associative array form
Definition: hooks.txt:1802
null means default in associative array with keys and values unescaped Should be merged with default with a value of false meaning to suppress the attribute in associative array with keys and values unescaped noclasses just before the function returns a value If you return an< a > element with HTML attributes $attribs and contents $html will be returned If you return $ret will be returned and may include noclasses & $html
Definition: hooks.txt:1802
static tocLineEnd()
End a Table Of Contents line.
Definition: Linker.php:1734
deferred txt A few of the database updates required by various functions here can be deferred until after the result page is displayed to the user For updating the view updating the linked to tables after a etc PHP does not yet have any way to tell the server to actually return and disconnect while still running these but it might have such a feature in the future We handle these by creating a deferred update object and putting those objects on a global list
Definition: deferred.txt:11
getSection($text, $sectionId, $defaultText= '')
This function returns the text of a section, specified by a number ($section).
Definition: Parser.php:6017
static decodeTagAttributes($text)
Return an associative array of attribute names and values from a partial tag string.
Definition: Sanitizer.php:1249
$mTplRedirCache
Definition: Parser.php:200
killMarkers($text)
Remove any strip markers found in the given text.
Definition: Parser.php:6330
wfGetDB($db, $groups=[], $wiki=false)
Get a Database object.
static tocList($toc, $lang=false)
Wraps the TOC in a table and provides the hide/collapse javascript.
Definition: Linker.php:1745
fetchTemplateAndTitle($title)
Fetch the unparsed text of a template and register a reference to it.
Definition: Parser.php:3954
this hook is for auditing only or null if authentication failed before getting that far or null if we can t even determine that probably a stub it is not rendered in wiki pages or galleries in category pages allow injecting custom HTML after the section Any uses of the hook need to handle escaping see BaseTemplate::getToolbox and BaseTemplate::makeListItem for details on the format of individual items inside of this array or by returning and letting standard HTTP rendering take place modifiable or by returning false and taking over the output $out
Definition: hooks.txt:766
getRevisionUser()
Get the name of the user that edited the last revision.
Definition: Parser.php:6104
setFunctionTagHook($tag, $callback, $flags)
Create a tag function, e.g.
Definition: Parser.php:5296
the array() calling protocol came about after MediaWiki 1.4rc1.
stripSectionName($text)
Strips a text string of wikitext for use in a section anchor.
Definition: Parser.php:6223
null for the local wiki Added should default to null in handler for backwards compatibility add a value to it if you want to add a cookie that have to vary cache options can modify $query
Definition: hooks.txt:1422
const OT_PREPROCESS
Definition: Defines.php:228
$mLastSection
Definition: Parser.php:191
static linkKnown($target, $html=null, $customAttribs=[], $query=[], $options=[ 'known', 'noclasses'])
Identical to link(), except $options defaults to 'known'.
Definition: Linker.php:264
$mDoubleUnderscores
Definition: Parser.php:200
magic word the default is to use $key to get the and $key value or $key value text $key value html to format the value $key
Definition: hooks.txt:2325
Group all the pieces relevant to the context of a request into one instance.
getPreloadText($text, Title $title, ParserOptions $options, $params=[])
Process the wikitext for the "?preload=" feature.
Definition: Parser.php:698
$context
Definition: load.php:44
validateSig($text)
Check that the user's signature contains no bad XML.
Definition: Parser.php:5010
MapCacheLRU null $currentRevisionCache
Definition: Parser.php:250
$wgSitename
Name of the site.
renderImageGallery($text, $params)
Renders an image gallery from a text with one line per image.
Definition: Parser.php:5347
recursivePreprocess($text, $frame=false)
Recursive parser entry point that can be called from an extension tag hook.
Definition: Parser.php:679
replaceExternalLinks($text)
Replace external links (REL)
Definition: Parser.php:1792
static isNonincludable($index)
It is not possible to use pages from this namespace as template?
nextLinkID()
Definition: Parser.php:839
const SPACE_NOT_NL
Definition: Parser.php:100
static replaceUnusualEscapes($url)
Replace unusual escape codes in a URL with their equivalent characters.
Definition: Parser.php:1917
getImageParams($handler)
Definition: Parser.php:5509
Apache License January AND DISTRIBUTION Definitions License shall mean the terms and conditions for use
doHeadings($text)
Parse headers and return html.
Definition: Parser.php:1571
static getTitleFor($name, $subpage=false, $fragment= '')
Get a localised Title object for a specified special page name.
Definition: SpecialPage.php:75
const OT_PLAIN
Definition: Parser.php:121
getTags()
Accessor.
Definition: Parser.php:5823
findColonNoLinks($str, &$before, &$after)
Split up a string on ':', ignoring any occurrences inside tags to prevent illegal overlapping...
Definition: Parser.php:2775
static isWellFormedXmlFragment($text)
Check if a string is a well-formed XML fragment.
Definition: Xml.php:735
const OT_WIKI
Definition: Parser.php:118
div flags Integer display flags(NO_ACTION_LINK, NO_EXTRA_USER_LINKS) 'LogException'returning false will NOT prevent logging $e
Definition: hooks.txt:1936
fetchFileAndTitle($title, $options=[])
Fetch a file and its title and register a reference to it.
Definition: Parser.php:4096
User $mUser
Definition: Parser.php:207
We use the convention $dbr for read and $dbw for write to help you keep track of whether the database object is a the world will explode Or to be a subsequent write query which succeeded on the master may fail when replicated to the slave due to a unique key collision Replication on the slave will stop and it may take hours to repair the database and get it back online Setting read_only in my cnf on the slave will avoid this but given the dire we prefer to have as many checks as possible We provide a but the wrapper functions like please read the documentation for except in special pages derived from QueryPage It s a common pitfall for new developers to submit code containing SQL queries which examine huge numbers of rows Remember that COUNT * is(N), counting rows in atable is like counting beans in a bucket.------------------------------------------------------------------------Replication------------------------------------------------------------------------The largest installation of MediaWiki, Wikimedia, uses a large set ofslave MySQL servers replicating writes made to a master MySQL server.Itis important to understand the issues associated with this setup if youwant to write code destined for Wikipedia.It's often the case that the best algorithm to use for a given taskdepends on whether or not replication is in use.Due to our unabashedWikipedia-centrism, we often just use the replication-friendly version, but if you like, you can use wfGetLB() ->getServerCount() > 1 tocheck to see if replication is in use.===Lag===Lag primarily occurs when large write queries are sent to the master.Writes on the master are executed in parallel, but they are executed inserial when they are replicated to the slaves.The master writes thequery to the binlog when the transaction is committed.The slaves pollthe binlog and start executing the query as soon as it appears.They canservice reads while they are performing a write query, but will not readanything more from the binlog and thus will perform no more writes.Thismeans that if the write query runs for a long time, the slaves will lagbehind the master for the time it takes for the write query to complete.Lag can be exacerbated by high read load.MediaWiki's load balancer willstop sending reads to a slave when it is lagged by more than 30 seconds.If the load ratios are set incorrectly, or if there is too much loadgenerally, this may lead to a slave permanently hovering around 30seconds lag.If all slaves are lagged by more than 30 seconds, MediaWiki will stopwriting to the database.All edits and other write operations will berefused, with an error returned to the user.This gives the slaves achance to catch up.Before we had this mechanism, the slaves wouldregularly lag by several minutes, making review of recent editsdifficult.In addition to this, MediaWiki attempts to ensure that the user seesevents occurring on the wiki in chronological order.A few seconds of lagcan be tolerated, as long as the user sees a consistent picture fromsubsequent requests.This is done by saving the master binlog positionin the session, and then at the start of each request, waiting for theslave to catch up to that position before doing any reads from it.Ifthis wait times out, reads are allowed anyway, but the request isconsidered to be in"lagged slave mode".Lagged slave mode can bechecked by calling wfGetLB() ->getLaggedSlaveMode().The onlypractical consequence at present is a warning displayed in the pagefooter.===Lag avoidance===To avoid excessive lag, queries which write large numbers of rows shouldbe split up, generally to write one row at a time.Multi-row INSERT...SELECT queries are the worst offenders should be avoided altogether.Instead do the select first and then the insert.===Working with lag===Despite our best efforts, it's not practical to guarantee a low-lagenvironment.Lag will usually be less than one second, but mayoccasionally be up to 30 seconds.For scalability, it's very importantto keep load on the master low, so simply sending all your queries tothe master is not the answer.So when you have a genuine need forup-to-date data, the following approach is advised:1) Do a quick query to the master for a sequence number or timestamp 2) Run the full query on the slave and check if it matches the data you gotfrom the master 3) If it doesn't, run the full query on the masterTo avoid swamping the master every time the slaves lag, use of thisapproach should be kept to a minimum.In most cases you should just readfrom the slave and let the user deal with the delay.------------------------------------------------------------------------Lock contention------------------------------------------------------------------------Due to the high write rate on Wikipedia(and some other wikis), MediaWiki developers need to be very careful to structure their writesto avoid long-lasting locks.By default, MediaWiki opens a transactionat the first query, and commits it before the output is sent.Locks willbe held from the time when the query is done until the commit.So youcan reduce lock time by doing as much processing as possible before youdo your write queries.Often this approach is not good enough, and it becomes necessary toenclose small groups of queries in their own transaction.Use thefollowing syntax:$dbw=wfGetDB(DB_MASTER
initialiseVariables()
initialise the magic variables (like CURRENTMONTHNAME) and substitution modifiers ...
Definition: Parser.php:3288
null means default in associative array with keys and values unescaped Should be merged with default with a value of false meaning to suppress the attribute in associative array with keys and values unescaped noclasses & $ret
Definition: hooks.txt:1802
static isEnabled()
Definition: MWTidy.php:92
Set options of the Parser.
static tidy($text)
Interface with html tidy.
Definition: MWTidy.php:45
getFunctionHooks()
Get all registered function hook identifiers.
Definition: Parser.php:5282
globals txt Globals are evil The original MediaWiki code relied on globals for processing context far too often MediaWiki development since then has been a story of slowly moving context out of global variables and into objects Storing processing context in object member variables allows those objects to be reused in a much more flexible way Consider the elegance of
database rows
Definition: globals.txt:10
wfHostname()
Fetch server name for use in error reporting etc.
getFunctionLang()
Get a language object for use in parser functions such as {{FORMATNUM:}}.
Definition: Parser.php:854
processing should stop and the error should be shown to the user * false
Definition: hooks.txt:189
argSubstitution($piece, $frame)
Triple brace replacement – used for template arguments.
Definition: Parser.php:4199
testSrvus($text, Title $title, ParserOptions $options, $outputType=self::OT_HTML)
strip/replaceVariables/unstrip for preprocessor regression testing
Definition: Parser.php:6252
uniqPrefix()
Accessor for mUniqPrefix.
Definition: Parser.php:740
const TOC_START
Definition: Parser.php:144
Title($x=null)
Accessor/mutator for the Title object.
Definition: Parser.php:778
SectionProfiler $mProfiler
Definition: Parser.php:259
$sort
fetchFileNoRegister($title, $options=[])
Helper function for fetchFileAndTitle.
Definition: Parser.php:4121
null for the local wiki Added in
Definition: hooks.txt:1422
There are three types of nodes:
$mHeadings
Definition: Parser.php:200
$value
clearTagHooks()
Remove all tag hooks.
Definition: Parser.php:5195
const COLON_STATE_TAGSLASH
Definition: Parser.php:107
static makeSelfLinkObj($nt, $html= '', $query= '', $trail= '', $prefix= '')
Make appropriate markup for a link to the current article.
Definition: Linker.php:409
const NS_SPECIAL
Definition: Defines.php:58
clearState()
Clear Parser state.
Definition: Parser.php:345
__construct($conf=[])
Definition: Parser.php:264
const EXT_LINK_ADDR
Definition: Parser.php:92
$mFirstCall
Definition: Parser.php:159
interwikiTransclude($title, $action)
Transclude an interwiki link.
Definition: Parser.php:4140
pstPass2($text, $user)
Pre-save transform helper function.
Definition: Parser.php:4882
guessLegacySectionNameFromWikiText($text)
Same as guessSectionNameFromWikiText(), but produces legacy anchors instead.
Definition: Parser.php:6202
wfUrlProtocolsWithoutProtRel()
Like wfUrlProtocols(), but excludes '//' from the protocol list.
Options($x=null)
Accessor/mutator for the ParserOptions object.
Definition: Parser.php:832
it s the revision text itself In either if gzip is the revision text is gzipped $flags
Definition: hooks.txt:2552
serializeHalfParsedText($text)
Save the parser state required to convert the given half-parsed text to HTML.
Definition: Parser.php:6350
replaceLinkHolders(&$text, $options=0)
Replace "" link placeholders with actual links, in the buffer Placeholders created in Link...
Definition: Parser.php:5319
static activeUsers()
Definition: SiteStats.php:161
$mLinkID
Definition: Parser.php:197
doQuotes($text)
Helper function for doAllQuotes()
Definition: Parser.php:1604
preprocessToDom($text, $flags=0)
Preprocess some wikitext and return the document tree.
Definition: Parser.php:3318
limitationWarn($limitationType, $current= '', $max= '')
Warn the user when a parser limitation is reached Will warn at most once the user per limitation type...
Definition: Parser.php:3440
static cleanUrl($url)
Definition: Sanitizer.php:1818
wfUrlencode($s)
We want some things to be included as literal characters in our title URLs for prettiness, which urlencode encodes by default.
static newFromText($text, $defaultNamespace=NS_MAIN)
Create a new Title from text, such as what one would find in a link.
Definition: Title.php:277
$mGeneratedPPNodeCount
Definition: Parser.php:198
Represents a title within MediaWiki.
Definition: Title.php:34
static getRandomString()
Get a random string.
Definition: Parser.php:719
$mRevisionId
Definition: Parser.php:224
static stripAllTags($text)
Take a fragment of (potentially invalid) HTML and return a version with any tags removed, encoded as plain text.
Definition: Sanitizer.php:1785
when a variable name is used in a it is silently declared as a new local masking the global
Definition: design.txt:93
doBlockLevels($text, $linestart)
#@-
Definition: Parser.php:2560
$wgArticlePath
Definition: img_auth.php:45
OutputType($x=null)
Accessor/mutator for the output type.
Definition: Parser.php:804
const NS_TEMPLATE
Definition: Defines.php:79
static newFromTitle(LinkTarget $linkTarget, $id=0, $flags=0)
Load either the current, or a specified, revision that's attached to a given link target...
Definition: Revision.php:117
const COLON_STATE_COMMENTDASHDASH
Definition: Parser.php:110
getVariableValue($index, $frame=false)
Return value of a magic variable (like PAGENAME)
Definition: Parser.php:2933
recursiveTagParse($text, $frame=false)
Half-parse wikitext to half-parsed HTML.
Definition: Parser.php:610
const NO_ARGS
magic word & $parser
Definition: hooks.txt:2325
MagicWordArray $mVariables
Definition: Parser.php:166
static validateTagAttributes($attribs, $element)
Take an array of attribute names and values and normalize or discard illegal values for the given ele...
Definition: Sanitizer.php:716
const SFH_NO_HASH
Definition: Parser.php:82
const COLON_STATE_COMMENTDASH
Definition: Parser.php:109
globals will be eliminated from MediaWiki replaced by an application object which would be passed to constructors Whether that would be an convenient solution remains to be but certainly PHP makes such object oriented programming models easier than they were in previous versions For the time being MediaWiki programmers will have to work in an environment with some global context At the time of globals were initialised on startup by MediaWiki of these were configuration which are documented in DefaultSettings php There is no comprehensive documentation for the remaining however some of the most important ones are listed below They are typically initialised either in index php or in Setup php For a description of the see design txt $wgTitle Title object created from the request URL $wgOut OutputPage object for HTTP response $wgUser User object for the user associated with the current request $wgLang Language object selected by user preferences $wgContLang Language object associated with the wiki being viewed $wgParser Parser object Parser extensions register their hooks here $wgRequest WebRequest object
Definition: globals.txt:25
wfRandomString($length=32)
Get a random string containing a number of pseudo-random hex characters.
$mForceTocPosition
Definition: Parser.php:202
preprocess($text, Title $title=null, ParserOptions $options, $revid=null, $frame=false)
Expand templates and variables in the text, producing valid, static wikitext.
Definition: Parser.php:653
static getCacheTTL($id)
Allow external reads of TTL array.
Definition: MagicWord.php:294
getRevisionId()
Get the ID of the revision we are parsing.
Definition: Parser.php:6042
const OT_PREPROCESS
Definition: Parser.php:119
see documentation in includes Linker php for Linker::makeImageLink & $time
Definition: hooks.txt:1616
maybeDoSubpageLink($target, &$text)
Handle link to subpage if necessary.
Definition: Parser.php:2427
$mFunctionSynonyms
Definition: Parser.php:151
If you want to remove the page from your watchlist later
replaceLinkHoldersText($text)
Replace "" link placeholders with plain text of links (not HTML-formatted).
Definition: Parser.php:5330
setLinkID($id)
Definition: Parser.php:846
$mOutputType
Definition: Parser.php:221
wfDebug($text, $dest= 'all', array $context=[])
Sends a line to the debug log if enabled or, optionally, to a comment in output.
$mDefaultStripList
Definition: Parser.php:154
static createAssocArgs($args)
Clean up argument array - refactored in 1.9 so parserfunctions can use it, too.
Definition: Parser.php:3392
$mExtLinkBracketedRegex
Definition: Parser.php:173
The index of the header message $result[1]=The index of the body text message $result[2 through n]=Parameters passed to body text message.Please note the header message cannot receive/use parameters. 'ImportHandleLogItemXMLTag':When parsing a XML tag in a log item.Return false to stop further processing of the tag $reader:XMLReader object $logInfo:Array of information 'ImportHandlePageXMLTag':When parsing a XML tag in a page.Return false to stop further processing of the tag $reader:XMLReader object &$pageInfo:Array of information 'ImportHandleRevisionXMLTag':When parsing a XML tag in a page revision.Return false to stop further processing of the tag $reader:XMLReader object $pageInfo:Array of page information $revisionInfo:Array of revision information 'ImportHandleToplevelXMLTag':When parsing a top level XML tag.Return false to stop further processing of the tag $reader:XMLReader object 'ImportHandleUploadXMLTag':When parsing a XML tag in a file upload.Return false to stop further processing of the tag $reader:XMLReader object $revisionInfo:Array of information 'ImportLogInterwikiLink':Hook to change the interwiki link used in log entries and edit summaries for transwiki imports.&$fullInterwikiPrefix:Interwiki prefix, may contain colons.&$pageTitle:String that contains page title. 'ImportSources':Called when reading from the $wgImportSources configuration variable.Can be used to lazy-load the import sources list.&$importSources:The value of $wgImportSources.Modify as necessary.See the comment in DefaultSettings.php for the detail of how to structure this array. 'InfoAction':When building information to display on the action=info page.$context:IContextSource object &$pageInfo:Array of information 'InitializeArticleMaybeRedirect':MediaWiki check to see if title is a redirect.&$title:Title object for the current page &$request:WebRequest &$ignoreRedirect:boolean to skip redirect check &$target:Title/string of redirect target &$article:Article object 'InternalParseBeforeLinks':during Parser's internalParse method before links but after nowiki/noinclude/includeonly/onlyinclude and other processings.&$parser:Parser object &$text:string containing partially parsed text &$stripState:Parser's internal StripState object 'InternalParseBeforeSanitize':during Parser's internalParse method just before the parser removes unwanted/dangerous HTML tags and after nowiki/noinclude/includeonly/onlyinclude and other processings.Ideal for syntax-extensions after template/parser function execution which respect nowiki and HTML-comments.&$parser:Parser object &$text:string containing partially parsed text &$stripState:Parser's internal StripState object 'InterwikiLoadPrefix':When resolving if a given prefix is an interwiki or not.Return true without providing an interwiki to continue interwiki search.$prefix:interwiki prefix we are looking for.&$iwData:output array describing the interwiki with keys iw_url, iw_local, iw_trans and optionally iw_api and iw_wikiid. 'InvalidateEmailComplete':Called after a user's email has been invalidated successfully.$user:user(object) whose email is being invalidated 'IRCLineURL':When constructing the URL to use in an IRC notification.Callee may modify $url and $query, URL will be constructed as $url.$query &$url:URL to index.php &$query:Query string $rc:RecentChange object that triggered url generation 'IsFileCacheable':Override the result of Article::isFileCacheable()(if true) &$article:article(object) being checked 'IsTrustedProxy':Override the result of IP::isTrustedProxy() &$ip:IP being check &$result:Change this value to override the result of IP::isTrustedProxy() 'IsUploadAllowedFromUrl':Override the result of UploadFromUrl::isAllowedUrl() $url:URL used to upload from &$allowed:Boolean indicating if uploading is allowed for given URL 'isValidEmailAddr':Override the result of Sanitizer::validateEmail(), for instance to return false if the domain name doesn't match your organization.$addr:The e-mail address entered by the user &$result:Set this and return false to override the internal checks 'isValidPassword':Override the result of User::isValidPassword() $password:The password entered by the user &$result:Set this and return false to override the internal checks $user:User the password is being validated for 'Language::getMessagesFileName':$code:The language code or the language we're looking for a messages file for &$file:The messages file path, you can override this to change the location. 'LanguageGetMagic':DEPRECATED!Use $magicWords in a file listed in $wgExtensionMessagesFiles instead.Use this to define synonyms of magic words depending of the language &$magicExtensions:associative array of magic words synonyms $lang:language code(string) 'LanguageGetNamespaces':Provide custom ordering for namespaces or remove namespaces.Do not use this hook to add namespaces.Use CanonicalNamespaces for that.&$namespaces:Array of namespaces indexed by their numbers 'LanguageGetSpecialPageAliases':DEPRECATED!Use $specialPageAliases in a file listed in $wgExtensionMessagesFiles instead.Use to define aliases of special pages names depending of the language &$specialPageAliases:associative array of magic words synonyms $lang:language code(string) 'LanguageGetTranslatedLanguageNames':Provide translated language names.&$names:array of language code=> language name $code:language of the preferred translations 'LanguageLinks':Manipulate a page's language links.This is called in various places to allow extensions to define the effective language links for a page.$title:The page's Title.&$links:Associative array mapping language codes to prefixed links of the form"language:title".&$linkFlags:Associative array mapping prefixed links to arrays of flags.Currently unused, but planned to provide support for marking individual language links in the UI, e.g.for featured articles. 'LanguageSelector':Hook to change the language selector available on a page.$out:The output page.$cssClassName:CSS class name of the language selector. 'LinkBegin':Used when generating internal and interwiki links in Linker::link(), before processing starts.Return false to skip default processing and return $ret.See documentation for Linker::link() for details on the expected meanings of parameters.$skin:the Skin object $target:the Title that the link is pointing to &$html:the contents that the< a > tag should have(raw HTML) $result
Definition: hooks.txt:1800
if($line===false) $args
Definition: cdb.php:64
the value to return A Title object or null for latest to be modified or replaced by the hook handler or if authentication is not possible after cache objects are set for highlighting & $link
Definition: hooks.txt:2585
static getLocalInstance($ts=false)
Get a timestamp instance in the server local timezone ($wgLocaltimezone)
const COLON_STATE_TAG
Definition: Parser.php:104
static getDoubleUnderscoreArray()
Get a MagicWordArray of double-underscore entities.
Definition: MagicWord.php:307
static splitTrail($trail)
Split a link trail, return the "inside" portion and the remainder of the trail as a two-element array...
Definition: Linker.php:1818
getTemplateDom($title)
Get the semi-parsed DOM representation of a template with a given title, and its redirect destination...
Definition: Parser.php:3879
static decodeCharReferences($text)
Decode any character references, numeric or named entities, in the text and return a UTF-8 string...
Definition: Sanitizer.php:1462
openList($char)
These next three functions open, continue, and close the list element appropriate to the prefix chara...
Definition: Parser.php:2481
cleanSig($text, $parsing=false)
Clean up signature text.
Definition: Parser.php:5024
wfTimestamp($outputtype=TS_UNIX, $ts=0)
Get a timestamp string in one of various formats.
static factory($mode=false, IContextSource $context=null)
Get a new image gallery.
$wgLanguageCode
Site language code.
Custom PHP profiler for parser/DB type section names that xhprof/xdebug can't handle.
static edits()
Definition: SiteStats.php:129
Class for asserting that a callback happens when an dummy object leaves scope.
$wgExtraInterlanguageLinkPrefixes
List of additional interwiki prefixes that should be treated as interlanguage links (i...
startExternalParse(Title $title=null, ParserOptions $options, $outputType, $clearState=true)
Set up some variables which are usually set up in parse() so that an external function can call some ...
Definition: Parser.php:5075
wfCgiToArray($query)
This is the logical opposite of wfArrayToCgi(): it accepts a query string as its argument and returns...
wfDebugLog($logGroup, $text, $dest= 'all', array $context=[])
Send a line to a supplementary debug log file, if configured, or main debug log if not...
static capturePath(Title $title, IContextSource $context)
Just like executePath() but will override global variables and execute the page in "inclusion" mode...
const NO_TEMPLATES
addTrackingCategory($msg)
Definition: Parser.php:4420
replaceInternalLinks($s)
Process [[ ]] wikilinks.
Definition: Parser.php:2049
$mVarCache
Definition: Parser.php:155
$wgStylePath
The URL path of the skins directory.
disableCache()
Set a flag in the output object indicating that the content is dynamic and shouldn't be cached...
Definition: Parser.php:5795
$mRevisionObject
Definition: Parser.php:223
static normalizeSectionNameWhitespace($section)
Normalizes whitespace in a section name, such as might be returned by Parser::stripSectionName(), for use in the id's that are used for section links.
Definition: Sanitizer.php:1343
internalParse($text, $isMain=true, $frame=false)
Helper function for parse() that transforms wiki markup into half-parsed HTML.
Definition: Parser.php:1236
Title $mTitle
Definition: Parser.php:220
static delimiterReplace($startDelim, $endDelim, $replace, $subject, $flags= '')
Perform an operation equivalent to preg_replace() with flags.
__destruct()
Reduce memory usage to reduce the impact of circular references.
Definition: Parser.php:290
wfEscapeWikiText($text)
Escapes the given text so that it may be output using addWikiText() without any linking, formatting, etc.
getRevisionTimestamp()
Get the timestamp associated with the current revision, adjusted for the default server-local timesta...
Definition: Parser.php:6081
static stripOuterParagraph($html)
Strip outer.
Definition: Parser.php:6465
static register($parser)
$mRevIdForTs
Definition: Parser.php:228
static singleton()
Get an instance of this class.
Definition: LinkCache.php:61
design txt This is a brief overview of the new design More thorough and up to date information is available on the documentation wiki at etc Handles the details of getting and saving to the user table of the and dealing with sessions and cookies OutputPage Encapsulates the entire HTML page that will be sent in response to any server request It is used by calling its functions to add in any and then calling but I prefer the flexibility This should also do the output encoding The system allocates a global one in $wgOut Title Represents the title of an and does all the work of translating among various forms such as plain database key
Definition: design.txt:25
static normalizeSubpageLink($contextTitle, $target, &$text)
Definition: Linker.php:1547
parseWidthParam($value)
Parsed a width param of imagelink like 300px or 200x300px.
Definition: Parser.php:6412
$mStripList
Definition: Parser.php:153
$mFunctionTagHooks
Definition: Parser.php:152
fetchScaryTemplateMaybeFromCache($url)
Definition: Parser.php:4159
const OT_PLAIN
Definition: Defines.php:230
fetchCurrentRevisionOfTitle($title)
Fetch the current revision of a given title.
Definition: Parser.php:3922
$mRevisionTimestamp
Definition: Parser.php:225
$mImageParams
Definition: Parser.php:156
stripAltText($caption, $holders)
Definition: Parser.php:5772
doAllQuotes($text)
Replace single quotes with HTML markup.
Definition: Parser.php:1587
static replaceMarkup($search, $replace, $text)
More or less "markup-safe" str_replace() Ignores any instances of the separator inside <...
static normalizeUrlComponent($component, $unsafe)
Definition: Parser.php:1967
if($limit) $timestamp
const VERSION
Update this version number when the ParserOutput format changes in an incompatible way...
Definition: Parser.php:73
this hook is for auditing only RecentChangesLinked and Watchlist RecentChangesLinked and Watchlist e g Watchlist removed from all revisions and log entries to which it was applied This gives extensions a chance to take it off their books as the deletion has already been partly carried out by this point or something similar the user will be unable to create the tag set and then return false from the hook function Ensure you consume the ChangeTagAfterDelete hook to carry out custom deletion actions as context called by AbstractContent::getParserOutput May be used to override the normal model specific rendering of page content as context as context $options
Definition: hooks.txt:1008
$mInPre
Definition: Parser.php:191
setHook($tag, $callback)
Create an HTML-style tag, e.g.
Definition: Parser.php:5150
const OT_WIKI
Definition: Defines.php:227
Preprocessor $mPreprocessor
Definition: Parser.php:177
getPreprocessor()
Get a preprocessor object.
Definition: Parser.php:907
This document is intended to provide useful advice for parties seeking to redistribute MediaWiki to end users It s targeted particularly at maintainers for Linux since it s been observed that distribution packages of MediaWiki often break We ve consistently had to recommend that users seeking support use official tarballs instead of their distribution s and this often solves whatever problem the user is having It would be nice if this could such and we might be restricted by PHP settings such as safe mode or open_basedir We cannot assume that the software even has read access anywhere useful Many shared hosts run all users web applications under the same so they can t rely on Unix and must forbid reads to even standard directories like tmp lest users read each others files We cannot assume that the user has the ability to install or run any programs not written as web accessible PHP scripts Since anything that works on cheap shared hosting will work if you have shell or root access MediaWiki s design is based around catering to the lowest common denominator Although we support higher end setups as the way many things work by default is tailored toward shared hosting These defaults are unconventional from the point of view of normal(non-web) applications--they might conflict with distributors'policies
static getInstance($ts=false)
Get a timestamp instance in GMT.
const NS_MEDIA
Definition: Defines.php:57
closeList($char)
Definition: Parser.php:2533
static singleton()
Get a RepoGroup instance.
Definition: RepoGroup.php:59
replaceVariables($text, $frame=false, $argsOnly=false)
Replace magic variables, templates, and template arguments with the appropriate text.
Definition: Parser.php:3363
const RECOVER_ORIG
wfMatchesDomainList($url, $domains)
Check whether a given URL has a domain that occurs in a given set of domains.
StripState $mStripState
Definition: Parser.php:189
$mDefaultSort
Definition: Parser.php:199
getUser()
Get a User object either from $this->mUser, if set, or from the ParserOptions object otherwise...
Definition: Parser.php:895
wfTimestampNow()
Convenience function; returns MediaWiki timestamp for the present time.
incrementIncludeSize($type, $size)
Increment an include size counter.
Definition: Parser.php:4338
getStripList()
Get a list of strippable XML-like elements.
Definition: Parser.php:1006
null means default in associative array with keys and values unescaped Should be merged with default with a value of false meaning to suppress the attribute in associative array with keys and values unescaped noclasses just before the function returns a value If you return an< a > element with HTML attributes $attribs and contents $html will be returned If you return $ret will be returned and may include noclasses after processing after in associative array form externallinks including delete and has completed for all link tables whether this was an auto creation default is conds Array Extra conditions for the No matching items in log is displayed if loglist is empty msgKey Array If you want a nice box with a set this to the key of the message First element is the message additional optional elements are parameters for the key that are processed with wfMessage() -> params() ->parseAsBlock()-offset Set to overwrite offset parameter in $wgRequest set to ''to unsetoffset-wrap String Wrap the message in html(usually something like"&lt
const EXT_IMAGE_REGEX
Definition: Parser.php:95
startParse(Title $title=null, ParserOptions $options, $outputType, $clearState=true)
Definition: Parser.php:5087
$params
const NS_CATEGORY
Definition: Defines.php:83
static makeHeadline($level, $attribs, $anchor, $html, $link, $legacyAnchor=false)
Create a headline for content.
Definition: Linker.php:1799
static extractTagsAndParams($elements, $text, &$matches, $uniq_prefix=null)
Replaces all occurrences of HTML-style comments and the given tags in the text with a random marker a...
Definition: Parser.php:936
and(b) You must cause any modified files to carry prominent notices stating that You changed the files
doTableStuff($text)
parse the wiki syntax used to render tables
Definition: Parser.php:1033
wfDeprecated($function, $version=false, $component=false, $callerOffset=2)
Throws a warning that $function is deprecated.
getRevisionSize()
Get the size of the revision.
Definition: Parser.php:6124
$mImageParamsMagicArray
Definition: Parser.php:157
LinkHolderArray $mLinkHolders
Definition: Parser.php:195
static register($parser)
deferred txt A few of the database updates required by various functions here can be deferred until after the result page is displayed to the user For updating the view updating the linked to tables after a save
Definition: deferred.txt:4
as see the revision history and available at free of to any person obtaining a copy of this software and associated documentation to deal in the Software without including without limitation the rights to and or sell copies of the and to permit persons to whom the Software is furnished to do so
Definition: LICENSE.txt:10
Some information about database access in MediaWiki By Tim January Database layout For information about the MediaWiki database such as a description of the tables and their please see
Definition: database.txt:2
closeParagraph()
#@+ Used by doBlockLevels()
Definition: Parser.php:2437
const DB_SLAVE
Definition: Defines.php:46
preSaveTransform($text, Title $title, User $user, ParserOptions $options, $clearState=true)
Transform wiki markup when saving a page by doing "\\r\\n" -> "\\n" conversion, substituting signatur...
Definition: Parser.php:4850
getTargetLanguage()
Get the target language for the content being parsed.
Definition: Parser.php:867
$buffer
namespace and then decline to actually register it file or subcat img or subcat $title
Definition: hooks.txt:916
static hasSubpages($index)
Does the namespace allow subpages?
formatHeadings($text, $origText, $isMain=true)
This function accomplishes several tasks: 1) Auto-number headings if that option is enabled 2) Add an...
Definition: Parser.php:4440
getConverterLanguage()
Get the language object for language conversion.
Definition: Parser.php:885
static tocUnindent($level)
Finish one or more sublevels on the Table of Contents.
Definition: Linker.php:1703
nextItem($char)
TODO: document.
Definition: Parser.php:2507
static run($event, array $args=[], $deprecatedVersion=null)
Call hook functions defined in Hooks::register and $wgHooks.
Definition: Hooks.php:131
static tocLine($anchor, $tocline, $tocnumber, $level, $sectionIndex=false)
parameter level defines if we are on an indentation level
Definition: Linker.php:1717
design txt This is a brief overview of the new design More thorough and up to date information is available on the documentation wiki at etc Handles the details of getting and saving to the user table of the and dealing with sessions and cookies OutputPage Encapsulates the entire HTML page that will be sent in response to any server request It is used by calling its functions to add text
Definition: design.txt:12
getExternalLinkAttribs($url=false)
Get an associative array of additional HTML attributes appropriate for a particular external link...
Definition: Parser.php:1889
$mInputSize
Definition: Parser.php:229
magicword txt Magic Words are some phrases used in the wikitext They are used for two things
Definition: magicword.txt:4
this hook is for auditing only RecentChangesLinked and Watchlist RecentChangesLinked and Watchlist e g Watchlist removed from all revisions and log entries to which it was applied This gives extensions a chance to take it off their books $tag
Definition: hooks.txt:969
getUserSig(&$user, $nickname=false, $fancySig=null)
Fetch the user's signature text, if any, and normalize to validated, ready-to-insert wikitext...
Definition: Parser.php:4961
const HALF_PARSED_VERSION
Update this version number when the output of serialiseHalfParsedText() changes in an incompatible wa...
Definition: Parser.php:79
const NS_FILE
Definition: Defines.php:75
firstCallInit()
Do various kinds of initialisation on the first call of the parser.
Definition: Parser.php:325
Handles a simple LRU key/value map with a maximum number of entries.
Definition: MapCacheLRU.php:34
static makeImageLink(Parser $parser, Title $title, $file, $frameParams=[], $handlerParams=[], $time=false, $query="", $widthOption=null)
Given parameters derived from [[Image:Foo|options...]], generate the HTML that that syntax inserts in...
Definition: Linker.php:545
const PTD_FOR_INCLUSION
Definition: Parser.php:113
null means default in associative array with keys and values unescaped Should be merged with default with a value of false meaning to suppress the attribute in associative array with keys and values unescaped broken
Definition: hooks.txt:1802
armorLinks($text)
Insert a NOPARSE hacky thing into any inline links in a chunk that's going to go through further pars...
Definition: Parser.php:2405
presenting them properly to the user as errors is done by the caller return true use this to change the list i e etc $rev
Definition: hooks.txt:1588
static splitWhitespace($s)
Return a three-element array: leading whitespace, string contents, trailing whitespace.
Definition: Parser.php:3330
This document is intended to provide useful advice for parties seeking to redistribute MediaWiki to end users It s targeted particularly at maintainers for Linux since it s been observed that distribution packages of MediaWiki often break We ve consistently had to recommend that users seeking support use official tarballs instead of their distribution s and this often solves whatever problem the user is having It would be nice if this could such as
Definition: distributors.txt:9
setOutputType($ot)
Set the output type.
Definition: Parser.php:787
$mTagHooks
Definition: Parser.php:148
Class for handling an array of magic words.
const NS_MEDIAWIKI
Definition: Defines.php:77
static & get($id)
Factory: creates an object representing an ID.
Definition: MagicWord.php:257
static getModuleStyles()
Get CSS modules needed if HTML from the current driver is to be displayed.
Definition: MWTidy.php:63
enableOOUI()
Set's up the PHP implementation of OOUI for use in this request and instructs OutputPage to enable OO...
Definition: Parser.php:6501
please add to it if you re going to add events to the MediaWiki code where normally authentication against an external auth plugin would be creating a local account $user
Definition: hooks.txt:246
fetchTemplate($title)
Fetch the unparsed text of a template and register a reference to it.
Definition: Parser.php:3982
maybeMakeExternalImage($url)
make an image if it's allowed, either through the global option, through the exception, or through the on-wiki whitelist
Definition: Parser.php:1990
areSubpagesAllowed()
Return true if subpage links should be expanded on this page.
Definition: Parser.php:2414
const OT_HTML
Definition: Defines.php:226
static escapeId($id, $options=[])
Given a value, escape it so that it can be used in an id attribute and return it. ...
Definition: Sanitizer.php:1132
this hook is for auditing only RecentChangesLinked and Watchlist RecentChangesLinked and Watchlist e g Watchlist removed from all revisions and log entries to which it was applied This gives extensions a chance to take it off their books as the deletion has already been partly carried out by this point or something similar the user will be unable to create the tag set and then return false from the hook function Ensure you consume the ChangeTagAfterDelete hook to carry out custom deletion actions as context called by AbstractContent::getParserOutput May be used to override the normal model specific rendering of page content as context as context the output can only depend on parameters provided to this hook not on global state indicating whether full HTML should be generated If generation of HTML may be but other information should still be present in the ParserOutput object & $output
Definition: hooks.txt:1008
static getSubstIDs()
Get an array of parser substitution modifier IDs.
Definition: MagicWord.php:284
static images()
Definition: SiteStats.php:169
$mTransparentTagHooks
Definition: Parser.php:149
$mExpensiveFunctionCount
Definition: Parser.php:201
$mUrlProtocols
Definition: Parser.php:173
const TS_MW
MediaWiki concatenated string timestamp (YYYYMMDDHHMMSS)
$mConf
Definition: Parser.php:173
transformMsg($text, $options, $title=null)
Wrapper for preprocess()
Definition: Parser.php:5106
static newFromId($id, $flags=0)
Load a page revision from a given revision ID number.
Definition: Revision.php:99
wfUrlProtocols($includeProtocolRelative=true)
Returns a regular expression of url protocols.
static makeExternalLink($url, $text, $escape=true, $linktype= '', $attribs=[], $title=null)
Make an external link.
Definition: Linker.php:1052
__clone()
Allow extensions to clean up when the parser is cloned.
Definition: Parser.php:302
static getExternalLinkRel($url=false, $title=null)
Get the rel attribute for a particular external link.
Definition: Parser.php:1868
injection txt This is an overview of how MediaWiki makes use of dependency injection The design described here grew from the discussion of RFC T384 The term dependency this means that anything an object needs to operate should be injected from the the object itself should only know narrow no concrete implementation of the logic it relies on The requirement to inject everything typically results in an architecture that based on two main types of and essentially stateless service objects that use other service objects to operate on the value objects As of the beginning MediaWiki is only starting to use the DI approach Much of the code still relies on global state or direct resulting in a highly cyclical dependency which acts as the top level factory for services in MediaWiki which can be used to gain access to default instances of various services MediaWikiServices however also allows new services to be defined and default services to be redefined Services are defined or redefined by providing a callback the instantiator that will return a new instance of the service When it will create an instance of MediaWikiServices and populate it with the services defined in the files listed by thereby bootstrapping the DI framework Per $wgServiceWiringFiles lists includes ServiceWiring php
Definition: injection.txt:35
wfSetVar(&$dest, $source, $force=false)
Sets dest to source and returns the original value of dest If source is NULL, it just returns the val...
this hook is for auditing only $req
Definition: hooks.txt:969
this hook is for auditing only or null if authentication failed before getting that far $username
Definition: hooks.txt:766
presenting them properly to the user as errors is done by the caller return true use this to change the list i e etc next in line in page history
Definition: hooks.txt:1588
array $mLangLinkLanguages
Array with the language name of each language link (i.e.
Definition: Parser.php:242
const OT_MSG
Definition: Parser.php:120
replaceTransparentTags($text)
Replace transparent tags in $text with the values given by the callbacks.
Definition: Parser.php:5841
This document describes the state of Postgres support in and is fairly well maintained The main code is very well while extensions are very hit and miss it is probably the most supported database after MySQL Much of the work in making MediaWiki database agnostic came about through the work of creating Postgres as and are nearing end of but without copying over all the usage comments General notes on the but these can almost always be programmed around *Although Postgres has a true BOOLEAN type
Definition: postgres.txt:22
replaceSection($oldText, $sectionId, $newText)
This function returns $oldtext after the content of the section specified by $section has been replac...
Definition: Parser.php:6033
getLinkURL($query= '', $query2=false, $proto=PROTO_RELATIVE)
Get a URL that's the simplest URL that will be valid to link, locally, to the current Title...
Definition: Title.php:1826
doDoubleUnderscore($text)
Strip double-underscore items like NOGALLERY and NOTOC Fills $this->mDoubleUnderscores, returns the modified text.
Definition: Parser.php:4365
$mFunctionHooks
Definition: Parser.php:150
$lines
Definition: router.php:66
testPreprocess($text, Title $title, ParserOptions $options)
Definition: Parser.php:6280
deferred txt A few of the database updates required by various functions here can be deferred until after the result page is displayed to the user For updating the view updating the linked to tables after a etc PHP does not yet have any way to tell the server to actually return and disconnect while still running these but it might have such a feature in the future We handle these by creating a deferred update object and putting those objects on a global then executing the whole list after the page is displayed We don t do anything smart like collating updates to the same table or such because the list is almost always going to have just one item on if so it s not worth the trouble Since there is a job queue in the jobs table
Definition: deferred.txt:11
MagicWordArray $mSubstWords
Definition: Parser.php:171
const COLON_STATE_TEXT
Definition: Parser.php:103
const TOC_END
Definition: Parser.php:145
static normalizeCharReferences($text)
Ensure that any entities and character references are legal for XML and XHTML specifically.
Definition: Sanitizer.php:1362
callParserFunction($frame, $function, array $args=[])
Call a parser function and return an array with text and flags.
Definition: Parser.php:3780
$mArgStack
Definition: Parser.php:191
$wgScriptPath
The path we should point to.
Variant of the Message class.
Definition: Message.php:1232
getFreshParser()
Return this parser if it is not doing anything, otherwise get a fresh parser.
Definition: Parser.php:6486
design txt This is a brief overview of the new design More thorough and up to date information is available on the documentation wiki at etc Handles the details of getting and saving to the user table of the and dealing with sessions and cookies OutputPage Encapsulates the entire HTML page that will be sent in response to any server request It is used by calling its functions to add in any and then calling but I prefer the flexibility This should also do the output encoding The system allocates a global one in $wgOut Title Represents the title of an and does all the work of translating among various forms such as plain database etc For and for historical it also represents a few features of articles that don t involve their such as access rights See also title txt Article Encapsulates access to the page table of the database The object represents a an and maintains state such as etc Revision Encapsulates individual page revision data and access to the revision text blobs storage system Higher level code should never touch text storage directly
Definition: design.txt:34
this hook is for auditing only RecentChangesLinked and Watchlist RecentChangesLinked and Watchlist e g Watchlist removed from all revisions and log entries to which it was applied This gives extensions a chance to take it off their books as the deletion has already been partly carried out by this point or something similar the user will be unable to create the tag set and then return false from the hook function Ensure you consume the ChangeTagAfterDelete hook to carry out custom deletion actions as context called by AbstractContent::getParserOutput May be used to override the normal model specific rendering of page content $content
Definition: hooks.txt:1008
static articles()
Definition: SiteStats.php:137
const COLON_STATE_TAGSTART
Definition: Parser.php:105
$mRevisionUser
Definition: Parser.php:226
lock()
Lock the current instance of the parser.
Definition: Parser.php:6441
static pages()
Definition: SiteStats.php:145
$line
Definition: cdb.php:59
const COLON_STATE_COMMENT
Definition: Parser.php:108
const SFH_OBJECT_ARGS
Definition: Parser.php:83
static statelessFetchTemplate($title, $parser=false)
Static function to get a template Can be overridden via ParserOptions::setTemplateCallback().
Definition: Parser.php:3995
$mDTopen
Definition: Parser.php:184
I won t presume to tell you how to I m just describing the methods I chose to use for myself If you do choose to follow these it will probably be easier for you to collaborate with others on the but if you want to contribute without by all means do which work well I also use K &R brace matching style I know that s a religious issue for so if you want to use a style that puts opening braces on the next line
Definition: design.txt:79
setFunctionHook($id, $callback, $flags=0)
Create a function, e.g.
Definition: Parser.php:5244
static setupOOUI($skinName= '', $dir= 'ltr')
Helper function to setup the PHP implementation of OOUI to use in this request.
static makeMediaLinkFile(Title $title, $file, $html= '')
Create a direct link to a given uploaded file.
Definition: Linker.php:994
$mIncludeCount
Definition: Parser.php:191
usually copyright or history_copyright This message must be in HTML not wikitext if the section is included from a template to be included in the link
Definition: hooks.txt:2719
$mMarkerIndex
Definition: Parser.php:158
this hook is for auditing only RecentChangesLinked and Watchlist RecentChangesLinked and Watchlist e g Watchlist removed from all revisions and log entries to which it was applied This gives extensions a chance to take it off their books as the deletion has already been partly carried out by this point or something similar the user will be unable to create the tag set and then return false from the hook function Ensure you consume the ChangeTagAfterDelete hook to carry out custom deletion actions as context called by AbstractContent::getParserOutput May be used to override the normal model specific rendering of page content as context as context the output can only depend on parameters provided to this hook not on global state indicating whether full HTML should be generated If generation of HTML may be but other information should still be present in the ParserOutput object to manipulate or replace but no entry for that model exists in $wgContentHandlers if desired whether it is OK to use $contentModel on $title Handler functions that modify $ok should generally return false to prevent further hooks from further modifying $ok inclusive $limit
Definition: hooks.txt:1008
getTitle()
Accessor for the Title object.
Definition: Parser.php:768
this class mediates it Skin Encapsulates a look and feel for the wiki All of the functions that render HTML and make choices about how to render it are here and are called from various other places when and is meant to be subclassed with other skins that may override some of its functions The User object contains a reference to a and so rather than having a global skin object we just rely on the global User and get the skin with $wgUser and also has some character encoding functions and other locale stuff The current user interface language is instantiated as and the local content language as $wgContLang
Definition: design.txt:56
extractSections($text, $sectionId, $mode, $newText= '')
Break wikitext input into sections, and either pull or replace some particular section's text...
Definition: Parser.php:5892
ParserOutput $mOutput
Definition: Parser.php:183
getOutput()
Get the ParserOutput object.
Definition: Parser.php:813
$wgExperimentalHtmlIds
Should we allow a broader set of characters in id attributes, per HTML5? If not, use only HTML 4-comp...
static statelessFetchRevision($title, $parser=false)
Wrapper around Revision::newFromTitle to allow passing additional parameters without passing them on ...
Definition: Parser.php:3945
doMagicLinks($text)
Replace special strings like "ISBN xxx" and "RFC xxx" with magic external links.
Definition: Parser.php:1410
Apache License January AND DISTRIBUTION Definitions License shall mean the terms and conditions for and distribution as defined by Sections through of this document Licensor shall mean the copyright owner or entity authorized by the copyright owner that is granting the License Legal Entity shall mean the union of the acting entity and all other entities that control are controlled by or are under common control with that entity For the purposes of this definition control direct or to cause the direction or management of such whether by contract or including but not limited to software source documentation and configuration files Object form shall mean any form resulting from mechanical transformation or translation of a Source including but not limited to compiled object generated and conversions to other media types Work shall mean the work of whether in Source or Object made available under the as indicated by a copyright notice that is included in or attached to the whether in Source or Object that is based or other modifications as a an original work of authorship For the purposes of this Derivative Works shall not include works that remain separable or merely the Work and Derivative Works thereof Contribution shall mean any work of including the original version of the Work and any modifications or additions to that Work or Derivative Works that is intentionally submitted to Licensor for inclusion in the Work by the copyright owner or by an individual or Legal Entity authorized to submit on behalf of the copyright owner For the purposes of this submitted means any form of or written communication sent to the Licensor or its including but not limited to communication on electronic mailing source code control and issue tracking systems that are managed or on behalf the Licensor for the purpose of discussing and improving the but excluding communication that is conspicuously marked or otherwise designated in writing by the copyright owner as Not a Contribution Contributor shall mean Licensor and any individual or Legal Entity on behalf of whom a Contribution has been received by Licensor and subsequently incorporated within the Work Grant of Copyright License Subject to the terms and conditions of this each Contributor hereby grants to You a non no royalty irrevocable copyright license to prepare Derivative Works publicly display
getCommon($st1, $st2)
getCommon() returns the length of the longest common substring of both arguments, starting at the beg...
Definition: Parser.php:2457
!html< table >< tr >< td > broken</td ></tr ></table >!end!test Table cell attributes
this hook is for auditing only RecentChangesLinked and Watchlist RecentChangesLinked and Watchlist e g Watchlist removed from all revisions and log entries to which it was applied This gives extensions a chance to take it off their books as the deletion has already been partly carried out by this point or something similar the user will be unable to create the tag set $status
Definition: hooks.txt:1008
static cleanSigInSig($text)
Strip 3, 4 or 5 tildes out of signatures.
Definition: Parser.php:5061
setDefaultSort($sort)
Mutator for $mDefaultSort.
Definition: Parser.php:6145
fetchFile($title, $options=[])
Fetch a file and its title and register a reference to it.
Definition: Parser.php:4085
static fixTagAttributes($text, $element)
Take a tag soup fragment listing an HTML element's attributes and normalize it to well-formed XML...
Definition: Sanitizer.php:1037
static tocIndent()
Add another level to the Table of Contents.
Definition: Linker.php:1693
static legalChars()
Get a regex character class describing the legal characters in a link.
Definition: Title.php:606
design txt This is a brief overview of the new design More thorough and up to date information is available on the documentation wiki at etc Handles the details of getting and saving to the user table of the and dealing with sessions and cookies OutputPage Encapsulates the entire HTML page that will be sent in response to any server request It is used by calling its functions to add in any and then calling output() to send it all.It could be easily changed to send incrementally if that becomes useful
$wgServer
URL of the server.
We ve cleaned up the code here by removing clumps of infrequently used code and moving them off somewhere else It s much easier for someone working with this code to see what s _really_ going on
Definition: hooks.txt:86
incrementExpensiveFunctionCount()
Increment the expensive function count.
Definition: Parser.php:4352
const DB_MASTER
Definition: Defines.php:47
$mShowToc
Definition: Parser.php:202
static normalizeLinkUrl($url)
Replace unusual escape codes in a URL with their equivalent characters.
Definition: Parser.php:1931
static removeHTMLtags($text, $processCallback=null, $args=[], $extratags=[], $removetags=[])
Cleans up HTML, removes dangerous tags and attributes, and removes HTML comments. ...
Definition: Sanitizer.php:455
magicLinkCallback($m)
Definition: Parser.php:1440
const EXT_LINK_URL_CLASS
Definition: Parser.php:89
insertStripItem($text)
Add an item to the strip state Returns the unique tag which must be inserted into the stripped text T...
Definition: Parser.php:1019
testPst($text, Title $title, ParserOptions $options)
Definition: Parser.php:6270
static factory($url, $options=null, $caller=__METHOD__)
Generate a new request object.
const TS_UNIX
Unix time - the number of seconds since 1970-01-01 00:00:00 UTC.
if(!$wgRequest->checkUrlExtension()) if(!$wgEnableAPI) $wgTitle
Definition: api.php:57
static explode($separator, $subject)
Workalike for explode() with limited memory usage.
ParserOptions $mOptions
Definition: Parser.php:215
parse($text, Title $title, ParserOptions $options, $linestart=true, $clearState=true, $revid=null)
Convert wikitext to HTML Do not call this function recursively.
Definition: Parser.php:409
this hook is for auditing only or null if authentication failed before getting that far or null if we can t even determine that probably a stub it is not rendered in wiki pages or galleries in category pages allow injecting custom HTML after the section Any uses of the hook need to handle escaping see BaseTemplate::getToolbox and BaseTemplate::makeListItem for details on the format of individual items inside of this array or by returning and letting standard HTTP rendering take place modifiable or by returning false and taking over the output modifiable modifiable after all normalizations have been except for the $wgMaxImageArea check set to true or false to override the $wgMaxImageArea check result gives extension the possibility to transform it themselves $handler
Definition: hooks.txt:766
static numberingroup($group)
Find the number of users in a given user group.
Definition: SiteStats.php:179
=Architecture==Two class hierarchies are used to provide the functionality associated with the different content models:*Content interface(and AbstractContent base class) define functionality that acts on the concrete content of a page, and *ContentHandler base class provides functionality specific to a content model, but not acting on concrete content.The most important function of ContentHandler is to act as a factory for the appropriate implementation of Content.These Content objects are to be used by MediaWiki everywhere, instead of passing page content around as text.All manipulation and analysis of page content must be done via the appropriate methods of the Content object.For each content model, a subclass of ContentHandler has to be registered with $wgContentHandlers.The ContentHandler object for a given content model can be obtained using ContentHandler::getForModelID($id).Also Title, WikiPage and Revision now have getContentHandler() methods for convenience.ContentHandler objects are singletons that provide functionality specific to the content type, but not directly acting on the content of some page.ContentHandler::makeEmptyContent() and ContentHandler::unserializeContent() can be used to create a Content object of the appropriate type.However, it is recommended to instead use WikiPage::getContent() resp.Revision::getContent() to get a page's content as a Content object.These two methods should be the ONLY way in which page content is accessed.Another important function of ContentHandler objects is to define custom action handlers for a content model, see ContentHandler::getActionOverrides().This is similar to what WikiPage::getActionOverrides() was already doing.==Serialization==With the ContentHandler facility, page content no longer has to be text based.Objects implementing the Content interface are used to represent and handle the content internally.For storage and data exchange, each content model supports at least one serialization format via ContentHandler::serializeContent($content).The list of supported formats for a given content model can be accessed using ContentHandler::getSupportedFormats().Content serialization formats are identified using MIME type like strings.The following formats are built in:*text/x-wiki-wikitext *text/javascript-for js pages *text/css-for css pages *text/plain-for future use, e.g.with plain text messages.*text/html-for future use, e.g.with plain html messages.*application/vnd.php.serialized-for future use with the api and for extensions *application/json-for future use with the api, and for use by extensions *application/xml-for future use with the api, and for use by extensions In PHP, use the corresponding CONTENT_FORMAT_XXX constant.Note that when using the API to access page content, especially action=edit, action=parse and action=query &prop=revisions, the model and format of the content should always be handled explicitly.Without that information, interpretation of the provided content is not reliable.The same applies to XML dumps generated via maintenance/dumpBackup.php or Special:Export.Also note that the API will provide encapsulated, serialized content-so if the API was called with format=json, and contentformat is also json(or rather, application/json), the page content is represented as a string containing an escaped json structure.Extensions that use JSON to serialize some types of page content may provide specialized API modules that allow access to that content in a more natural form.==Compatibility==The ContentHandler facility is introduced in a way that should allow all existing code to keep functioning at least for pages that contain wikitext or other text based content.However, a number of functions and hooks have been deprecated in favor of new versions that are aware of the page's content model, and will now generate warnings when used.Most importantly, the following functions have been deprecated:*Revisions::getText() is deprecated in favor Revisions::getContent()*WikiPage::getText() is deprecated in favor WikiPage::getContent() Also, the old Article::getContent()(which returns text) is superceded by Article::getContentObject().However, both methods should be avoided since they do not provide clean access to the page's actual content.For instance, they may return a system message for non-existing pages.Use WikiPage::getContent() instead.Code that relies on a textual representation of the page content should eventually be rewritten.However, ContentHandler::getContentText() provides a stop-gap that can be used to get text for a page.Its behavior is controlled by $wgContentHandlerTextFallback it
const STRIP_COMMENTS
static getVersion($flags= '', $lang=null)
Return a string of the MediaWiki version with Git revision if available.
braceSubstitution($piece, $frame)
Return the text of a template, after recursively replacing any variables or templates within the temp...
Definition: Parser.php:3462
setUser($user)
Set the current user.
Definition: Parser.php:730
$mHighestExpansionDepth
Definition: Parser.php:198
makeImage($title, $options, $holders=false)
Parse image options text and use it to make an image.
Definition: Parser.php:5557
attributeStripCallback(&$text, $frame=false)
Callback from the Sanitizer for expanding items found in HTML attribute values, so they can be safely...
Definition: Parser.php:5812
static cascadingsources($parser, $title= '')
Returns the sources of any cascading protection acting on a specified page.
getCustomDefaultSort()
Accessor for $mDefaultSort Unlike getDefaultSort(), will return false if none is set.
Definition: Parser.php:6174
extensionSubstitution($params, $frame)
Return the text to be used for a given extension tag.
Definition: Parser.php:4252
static makeExternalImage($url, $alt= '')
Return the code for images which were added via external links, via Parser::maybeMakeExternalImage()...
Definition: Linker.php:492
recursiveTagParseFully($text, $frame=false)
Fully parse wikitext to fully parsed HTML.
Definition: Parser.php:636
setTransparentTagHook($tag, $callback)
As setHook(), but letting the contents be parsed.
Definition: Parser.php:5181
static element($element, $attribs=[], $contents= '')
Identical to rawElement(), but HTML-escapes $contents (like Xml::element()).
Definition: Html.php:230
wfFindFile($title, $options=[])
Find a file.
$mRevisionSize
Definition: Parser.php:227
do that in ParserLimitReportFormat instead use this to modify the parameters of the image and a DIV can begin in one section and end in another Make sure your code can handle that case gracefully See the EditSectionClearerLink extension for an example zero but section is usually empty its values are the globals values before the output is cached one of or reset my talk page
Definition: hooks.txt:2342
static users()
Definition: SiteStats.php:153
unserializeHalfParsedText($data)
Load the parser state given in the $data array, which is assumed to have been generated by serializeH...
Definition: Parser.php:6375
do that in ParserLimitReportFormat instead use this to modify the parameters of the image and a DIV can begin in one section and end in another Make sure your code can handle that case gracefully See the EditSectionClearerLink extension for an example zero but section is usually empty its values are the globals values before the output is cached one of or reset my talk my contributions etc etc otherwise the built in rate limiting checks are if enabled allows for interception of redirect as a string mapping parameter names to values & $type
Definition: hooks.txt:2342
guessSectionNameFromWikiText($text)
Try to guess the section anchor name based on a wikitext fragment presumably extracted from a heading...
Definition: Parser.php:6187
const SFH_OBJECT_ARGS
Definition: Defines.php:240
static & makeTitle($ns, $title, $fragment= '', $interwiki= '')
Create a new Title from a namespace index and a DB key.
Definition: Title.php:524
$wgServerName
Server name.
internalParseHalfParsed($text, $isMain=true, $linestart=true)
Helper function for parse() that transforms half-parsed HTML into fully parsed HTML.
Definition: Parser.php:1307
const OT_HTML
Definition: Parser.php:117
$mIncludeSizes
Definition: Parser.php:198
if the prop value should be in the metadata multi language array format
Definition: hooks.txt:1477
null means default in associative array with keys and values unescaped Should be merged with default with a value of false meaning to suppress the attribute in associative array with keys and values unescaped noclasses just before the function returns a value If you return an< a > element with HTML attributes $attribs and contents $html will be returned If you return $ret will be returned and may include noclasses after processing & $attribs
Definition: hooks.txt:1802
controlled by $wgMainCacheType controlled by $wgParserCacheType controlled by $wgMessageCacheType If you set CACHE_NONE to one of the three control variable
Definition: memcached.txt:78
getOptions()
Get the ParserOptions object.
Definition: Parser.php:822
makeKnownLinkHolder($nt, $text= '', $query=[], $trail= '', $prefix= '')
Render a forced-blue link inline; protect against double expansion of URLs if we're in a mode that pr...
Definition: Parser.php:2380
getDefaultSort()
Accessor for $mDefaultSort Will use the empty string if none is set.
Definition: Parser.php:6160
For a write use something like
Definition: database.txt:26
const SFH_NO_HASH
Definition: Defines.php:239
makeFreeExternalLink($url, $numPostProto)
Make a free external link, given a user-supplied URL.
Definition: Parser.php:1496
$matches
const COLON_STATE_CLOSETAG
Definition: Parser.php:106
Allows to change the fields on the form that will be generated $name
Definition: hooks.txt:314
$mTplDomCache
Definition: Parser.php:200