MediaWiki  1.27.2
Parser.php
Go to the documentation of this file.
1 <?php
67 class Parser {
73  const VERSION = '1.6.4';
74 
80 
81  # Flags for Parser::setFunctionHook
82  const SFH_NO_HASH = 1;
83  const SFH_OBJECT_ARGS = 2;
84 
85  # Constants needed for external link processing
86  # Everything except bracket, space, or control characters
87  # \p{Zs} is unicode 'separator, space' category. It covers the space 0x20
88  # as well as U+3000 is IDEOGRAPHIC SPACE for bug 19052
89  const EXT_LINK_URL_CLASS = '[^][<>"\\x00-\\x20\\x7F\p{Zs}]';
90  # Simplified expression to match an IPv4 or IPv6 address, or
91  # at least one character of a host name (embeds EXT_LINK_URL_CLASS)
92  const EXT_LINK_ADDR = '(?:[0-9.]+|\\[(?i:[0-9a-f:.]+)\\]|[^][<>"\\x00-\\x20\\x7F\p{Zs}])';
93  # RegExp to make image URLs (embeds IPv6 part of EXT_LINK_ADDR)
94  // @codingStandardsIgnoreStart Generic.Files.LineLength
95  const EXT_IMAGE_REGEX = '/^(http:\/\/|https:\/\/)((?:\\[(?i:[0-9a-f:.]+)\\])?[^][<>"\\x00-\\x20\\x7F\p{Zs}]+)
96  \\/([A-Za-z0-9_.,~%\\-+&;#*?!=()@\\x80-\\xFF]+)\\.((?i)gif|png|jpg|jpeg)$/Sxu';
97  // @codingStandardsIgnoreEnd
98 
99  # Regular expression for a non-newline space
100  const SPACE_NOT_NL = '(?:\t|&nbsp;|&\#0*160;|&\#[Xx]0*[Aa]0;|\p{Zs})';
101 
102  # State constants for the definition list colon extraction
103  const COLON_STATE_TEXT = 0;
104  const COLON_STATE_TAG = 1;
111 
112  # Flags for preprocessToDom
113  const PTD_FOR_INCLUSION = 1;
114 
115  # Allowed values for $this->mOutputType
116  # Parameter to startExternalParse().
117  const OT_HTML = 1; # like parse()
118  const OT_WIKI = 2; # like preSaveTransform()
120  const OT_MSG = 3;
121  const OT_PLAIN = 4; # like extractSections() - portions of the original are returned unchanged.
122 
140  const MARKER_SUFFIX = "-QINU`\"'\x7f";
141  const MARKER_PREFIX = "\x7f'\"`UNIQ-";
142 
143  # Markers used for wrapping the table of contents
144  const TOC_START = '<mw:toc>';
145  const TOC_END = '</mw:toc>';
146 
147  # Persistent:
148  public $mTagHooks = [];
150  public $mFunctionHooks = [];
151  public $mFunctionSynonyms = [ 0 => [], 1 => [] ];
152  public $mFunctionTagHooks = [];
153  public $mStripList = [];
154  public $mDefaultStripList = [];
155  public $mVarCache = [];
156  public $mImageParams = [];
158  public $mMarkerIndex = 0;
159  public $mFirstCall = true;
160 
161  # Initialised by initialiseVariables()
162 
166  public $mVariables;
167 
171  public $mSubstWords;
172  # Initialised in constructor
174 
175  # Initialized in getPreprocessor()
176 
178 
179  # Cleared with clearState():
180 
183  public $mOutput;
185 
189  public $mStripState;
190 
196 
197  public $mLinkID;
201  public $mExpensiveFunctionCount; # number of expensive parser function calls
203 
207  public $mUser; # User object; only used when doing pre-save transform
208 
209  # Temporary
210  # These are variables reset at least once per parse regardless of $clearState
211 
215  public $mOptions;
216 
220  public $mTitle; # Title context, used for self-link rendering and similar things
221  public $mOutputType; # Output type, one of the OT_xxx constants
222  public $ot; # Shortcut alias, see setOutputType()
223  public $mRevisionObject; # The revision object of the specified revision ID
224  public $mRevisionId; # ID to display in {{REVISIONID}} tags
225  public $mRevisionTimestamp; # The timestamp of the specified revision ID
226  public $mRevisionUser; # User to display in {{REVISIONUSER}} tag
227  public $mRevisionSize; # Size to display in {{REVISIONSIZE}} variable
228  public $mRevIdForTs; # The revision ID which was used to fetch the timestamp
229  public $mInputSize = false; # For {{PAGESIZE}} on current page.
230 
235  public $mUniqPrefix = Parser::MARKER_PREFIX;
236 
243 
251 
256  public $mInParse = false;
257 
259  protected $mProfiler;
260 
264  public function __construct( $conf = [] ) {
265  $this->mConf = $conf;
266  $this->mUrlProtocols = wfUrlProtocols();
267  $this->mExtLinkBracketedRegex = '/\[(((?i)' . $this->mUrlProtocols . ')' .
268  self::EXT_LINK_ADDR .
269  self::EXT_LINK_URL_CLASS . '*)\p{Zs}*([^\]\\x00-\\x08\\x0a-\\x1F]*?)\]/Su';
270  if ( isset( $conf['preprocessorClass'] ) ) {
271  $this->mPreprocessorClass = $conf['preprocessorClass'];
272  } elseif ( defined( 'HPHP_VERSION' ) ) {
273  # Preprocessor_Hash is much faster than Preprocessor_DOM under HipHop
274  $this->mPreprocessorClass = 'Preprocessor_Hash';
275  } elseif ( extension_loaded( 'domxml' ) ) {
276  # PECL extension that conflicts with the core DOM extension (bug 13770)
277  wfDebug( "Warning: you have the obsolete domxml extension for PHP. Please remove it!\n" );
278  $this->mPreprocessorClass = 'Preprocessor_Hash';
279  } elseif ( extension_loaded( 'dom' ) ) {
280  $this->mPreprocessorClass = 'Preprocessor_DOM';
281  } else {
282  $this->mPreprocessorClass = 'Preprocessor_Hash';
283  }
284  wfDebug( __CLASS__ . ": using preprocessor: {$this->mPreprocessorClass}\n" );
285  }
286 
290  public function __destruct() {
291  if ( isset( $this->mLinkHolders ) ) {
292  unset( $this->mLinkHolders );
293  }
294  foreach ( $this as $name => $value ) {
295  unset( $this->$name );
296  }
297  }
298 
302  public function __clone() {
303  $this->mInParse = false;
304 
305  // Bug 56226: When you create a reference "to" an object field, that
306  // makes the object field itself be a reference too (until the other
307  // reference goes out of scope). When cloning, any field that's a
308  // reference is copied as a reference in the new object. Both of these
309  // are defined PHP5 behaviors, as inconvenient as it is for us when old
310  // hooks from PHP4 days are passing fields by reference.
311  foreach ( [ 'mStripState', 'mVarCache' ] as $k ) {
312  // Make a non-reference copy of the field, then rebind the field to
313  // reference the new copy.
314  $tmp = $this->$k;
315  $this->$k =& $tmp;
316  unset( $tmp );
317  }
318 
319  Hooks::run( 'ParserCloned', [ $this ] );
320  }
321 
325  public function firstCallInit() {
326  if ( !$this->mFirstCall ) {
327  return;
328  }
329  $this->mFirstCall = false;
330 
332  CoreTagHooks::register( $this );
333  $this->initialiseVariables();
334 
335  Hooks::run( 'ParserFirstCallInit', [ &$this ] );
336  }
337 
343  public function clearState() {
344  if ( $this->mFirstCall ) {
345  $this->firstCallInit();
346  }
347  $this->mOutput = new ParserOutput;
348  $this->mOptions->registerWatcher( [ $this->mOutput, 'recordOption' ] );
349  $this->mAutonumber = 0;
350  $this->mLastSection = '';
351  $this->mDTopen = false;
352  $this->mIncludeCount = [];
353  $this->mArgStack = false;
354  $this->mInPre = false;
355  $this->mLinkHolders = new LinkHolderArray( $this );
356  $this->mLinkID = 0;
357  $this->mRevisionObject = $this->mRevisionTimestamp =
358  $this->mRevisionId = $this->mRevisionUser = $this->mRevisionSize = null;
359  $this->mVarCache = [];
360  $this->mUser = null;
361  $this->mLangLinkLanguages = [];
362  $this->currentRevisionCache = null;
363 
364  $this->mStripState = new StripState;
365 
366  # Clear these on every parse, bug 4549
367  $this->mTplRedirCache = $this->mTplDomCache = [];
368 
369  $this->mShowToc = true;
370  $this->mForceTocPosition = false;
371  $this->mIncludeSizes = [
372  'post-expand' => 0,
373  'arg' => 0,
374  ];
375  $this->mPPNodeCount = 0;
376  $this->mGeneratedPPNodeCount = 0;
377  $this->mHighestExpansionDepth = 0;
378  $this->mDefaultSort = false;
379  $this->mHeadings = [];
380  $this->mDoubleUnderscores = [];
381  $this->mExpensiveFunctionCount = 0;
382 
383  # Fix cloning
384  if ( isset( $this->mPreprocessor ) && $this->mPreprocessor->parser !== $this ) {
385  $this->mPreprocessor = null;
386  }
387 
388  $this->mProfiler = new SectionProfiler();
389 
390  Hooks::run( 'ParserClearState', [ &$this ] );
391  }
392 
405  public function parse( $text, Title $title, ParserOptions $options,
406  $linestart = true, $clearState = true, $revid = null
407  ) {
413  global $wgShowHostnames;
414 
415  if ( $clearState ) {
416  // We use U+007F DELETE to construct strip markers, so we have to make
417  // sure that this character does not occur in the input text.
418  $text = strtr( $text, "\x7f", "?" );
419  $magicScopeVariable = $this->lock();
420  }
421 
422  $this->startParse( $title, $options, self::OT_HTML, $clearState );
423 
424  $this->currentRevisionCache = null;
425  $this->mInputSize = strlen( $text );
426  if ( $this->mOptions->getEnableLimitReport() ) {
427  $this->mOutput->resetParseStartTime();
428  }
429 
430  $oldRevisionId = $this->mRevisionId;
431  $oldRevisionObject = $this->mRevisionObject;
432  $oldRevisionTimestamp = $this->mRevisionTimestamp;
433  $oldRevisionUser = $this->mRevisionUser;
434  $oldRevisionSize = $this->mRevisionSize;
435  if ( $revid !== null ) {
436  $this->mRevisionId = $revid;
437  $this->mRevisionObject = null;
438  $this->mRevisionTimestamp = null;
439  $this->mRevisionUser = null;
440  $this->mRevisionSize = null;
441  }
442 
443  Hooks::run( 'ParserBeforeStrip', [ &$this, &$text, &$this->mStripState ] );
444  # No more strip!
445  Hooks::run( 'ParserAfterStrip', [ &$this, &$text, &$this->mStripState ] );
446  $text = $this->internalParse( $text );
447  Hooks::run( 'ParserAfterParse', [ &$this, &$text, &$this->mStripState ] );
448 
449  $text = $this->internalParseHalfParsed( $text, true, $linestart );
450 
458  if ( !( $options->getDisableTitleConversion()
459  || isset( $this->mDoubleUnderscores['nocontentconvert'] )
460  || isset( $this->mDoubleUnderscores['notitleconvert'] )
461  || $this->mOutput->getDisplayTitle() !== false )
462  ) {
463  $convruletitle = $this->getConverterLanguage()->getConvRuleTitle();
464  if ( $convruletitle ) {
465  $this->mOutput->setTitleText( $convruletitle );
466  } else {
467  $titleText = $this->getConverterLanguage()->convertTitle( $title );
468  $this->mOutput->setTitleText( $titleText );
469  }
470  }
471 
472  if ( $this->mExpensiveFunctionCount > $this->mOptions->getExpensiveParserFunctionLimit() ) {
473  $this->limitationWarn( 'expensive-parserfunction',
474  $this->mExpensiveFunctionCount,
475  $this->mOptions->getExpensiveParserFunctionLimit()
476  );
477  }
478 
479  # Information on include size limits, for the benefit of users who try to skirt them
480  if ( $this->mOptions->getEnableLimitReport() ) {
481  $max = $this->mOptions->getMaxIncludeSize();
482 
483  $cpuTime = $this->mOutput->getTimeSinceStart( 'cpu' );
484  if ( $cpuTime !== null ) {
485  $this->mOutput->setLimitReportData( 'limitreport-cputime',
486  sprintf( "%.3f", $cpuTime )
487  );
488  }
489 
490  $wallTime = $this->mOutput->getTimeSinceStart( 'wall' );
491  $this->mOutput->setLimitReportData( 'limitreport-walltime',
492  sprintf( "%.3f", $wallTime )
493  );
494 
495  $this->mOutput->setLimitReportData( 'limitreport-ppvisitednodes',
496  [ $this->mPPNodeCount, $this->mOptions->getMaxPPNodeCount() ]
497  );
498  $this->mOutput->setLimitReportData( 'limitreport-ppgeneratednodes',
499  [ $this->mGeneratedPPNodeCount, $this->mOptions->getMaxGeneratedPPNodeCount() ]
500  );
501  $this->mOutput->setLimitReportData( 'limitreport-postexpandincludesize',
502  [ $this->mIncludeSizes['post-expand'], $max ]
503  );
504  $this->mOutput->setLimitReportData( 'limitreport-templateargumentsize',
505  [ $this->mIncludeSizes['arg'], $max ]
506  );
507  $this->mOutput->setLimitReportData( 'limitreport-expansiondepth',
508  [ $this->mHighestExpansionDepth, $this->mOptions->getMaxPPExpandDepth() ]
509  );
510  $this->mOutput->setLimitReportData( 'limitreport-expensivefunctioncount',
511  [ $this->mExpensiveFunctionCount, $this->mOptions->getExpensiveParserFunctionLimit() ]
512  );
513  Hooks::run( 'ParserLimitReportPrepare', [ $this, $this->mOutput ] );
514 
515  $limitReport = "NewPP limit report\n";
516  if ( $wgShowHostnames ) {
517  $limitReport .= 'Parsed by ' . wfHostname() . "\n";
518  }
519  $limitReport .= 'Cached time: ' . $this->mOutput->getCacheTime() . "\n";
520  $limitReport .= 'Cache expiry: ' . $this->mOutput->getCacheExpiry() . "\n";
521  $limitReport .= 'Dynamic content: ' .
522  ( $this->mOutput->hasDynamicContent() ? 'true' : 'false' ) .
523  "\n";
524 
525  foreach ( $this->mOutput->getLimitReportData() as $key => $value ) {
526  if ( Hooks::run( 'ParserLimitReportFormat',
527  [ $key, &$value, &$limitReport, false, false ]
528  ) ) {
529  $keyMsg = wfMessage( $key )->inLanguage( 'en' )->useDatabase( false );
530  $valueMsg = wfMessage( [ "$key-value-text", "$key-value" ] )
531  ->inLanguage( 'en' )->useDatabase( false );
532  if ( !$valueMsg->exists() ) {
533  $valueMsg = new RawMessage( '$1' );
534  }
535  if ( !$keyMsg->isDisabled() && !$valueMsg->isDisabled() ) {
536  $valueMsg->params( $value );
537  $limitReport .= "{$keyMsg->text()}: {$valueMsg->text()}\n";
538  }
539  }
540  }
541  // Since we're not really outputting HTML, decode the entities and
542  // then re-encode the things that need hiding inside HTML comments.
543  $limitReport = htmlspecialchars_decode( $limitReport );
544  Hooks::run( 'ParserLimitReport', [ $this, &$limitReport ] );
545 
546  // Sanitize for comment. Note '‐' in the replacement is U+2010,
547  // which looks much like the problematic '-'.
548  $limitReport = str_replace( [ '-', '&' ], [ '‐', '&amp;' ], $limitReport );
549  $text .= "\n<!-- \n$limitReport-->\n";
550 
551  // Add on template profiling data
552  $dataByFunc = $this->mProfiler->getFunctionStats();
553  uasort( $dataByFunc, function ( $a, $b ) {
554  return $a['real'] < $b['real']; // descending order
555  } );
556  $profileReport = "Transclusion expansion time report (%,ms,calls,template)\n";
557  foreach ( array_slice( $dataByFunc, 0, 10 ) as $item ) {
558  $profileReport .= sprintf( "%6.2f%% %8.3f %6d - %s\n",
559  $item['%real'], $item['real'], $item['calls'],
560  htmlspecialchars( $item['name'] ) );
561  }
562  $text .= "\n<!-- \n$profileReport-->\n";
563 
564  if ( $this->mGeneratedPPNodeCount > $this->mOptions->getMaxGeneratedPPNodeCount() / 10 ) {
565  wfDebugLog( 'generated-pp-node-count', $this->mGeneratedPPNodeCount . ' ' .
566  $this->mTitle->getPrefixedDBkey() );
567  }
568  }
569  $this->mOutput->setText( $text );
570 
571  $this->mRevisionId = $oldRevisionId;
572  $this->mRevisionObject = $oldRevisionObject;
573  $this->mRevisionTimestamp = $oldRevisionTimestamp;
574  $this->mRevisionUser = $oldRevisionUser;
575  $this->mRevisionSize = $oldRevisionSize;
576  $this->mInputSize = false;
577  $this->currentRevisionCache = null;
578 
579  return $this->mOutput;
580  }
581 
604  public function recursiveTagParse( $text, $frame = false ) {
605  Hooks::run( 'ParserBeforeStrip', [ &$this, &$text, &$this->mStripState ] );
606  Hooks::run( 'ParserAfterStrip', [ &$this, &$text, &$this->mStripState ] );
607  $text = $this->internalParse( $text, false, $frame );
608  return $text;
609  }
610 
628  public function recursiveTagParseFully( $text, $frame = false ) {
629  $text = $this->recursiveTagParse( $text, $frame );
630  $text = $this->internalParseHalfParsed( $text, false );
631  return $text;
632  }
633 
645  public function preprocess( $text, Title $title = null,
646  ParserOptions $options, $revid = null, $frame = false
647  ) {
648  $magicScopeVariable = $this->lock();
649  $this->startParse( $title, $options, self::OT_PREPROCESS, true );
650  if ( $revid !== null ) {
651  $this->mRevisionId = $revid;
652  }
653  Hooks::run( 'ParserBeforeStrip', [ &$this, &$text, &$this->mStripState ] );
654  Hooks::run( 'ParserAfterStrip', [ &$this, &$text, &$this->mStripState ] );
655  $text = $this->replaceVariables( $text, $frame );
656  $text = $this->mStripState->unstripBoth( $text );
657  return $text;
658  }
659 
669  public function recursivePreprocess( $text, $frame = false ) {
670  $text = $this->replaceVariables( $text, $frame );
671  $text = $this->mStripState->unstripBoth( $text );
672  return $text;
673  }
674 
688  public function getPreloadText( $text, Title $title, ParserOptions $options, $params = [] ) {
689  $msg = new RawMessage( $text );
690  $text = $msg->params( $params )->plain();
691 
692  # Parser (re)initialisation
693  $magicScopeVariable = $this->lock();
694  $this->startParse( $title, $options, self::OT_PLAIN, true );
695 
697  $dom = $this->preprocessToDom( $text, self::PTD_FOR_INCLUSION );
698  $text = $this->getPreprocessor()->newFrame()->expand( $dom, $flags );
699  $text = $this->mStripState->unstripBoth( $text );
700  return $text;
701  }
702 
709  public static function getRandomString() {
710  wfDeprecated( __METHOD__, '1.26' );
711  return wfRandomString( 16 );
712  }
713 
720  public function setUser( $user ) {
721  $this->mUser = $user;
722  }
723 
730  public function uniqPrefix() {
731  wfDeprecated( __METHOD__, '1.26' );
732  return self::MARKER_PREFIX;
733  }
734 
740  public function setTitle( $t ) {
741  if ( !$t ) {
742  $t = Title::newFromText( 'NO TITLE' );
743  }
744 
745  if ( $t->hasFragment() ) {
746  # Strip the fragment to avoid various odd effects
747  $this->mTitle = $t->createFragmentTarget( '' );
748  } else {
749  $this->mTitle = $t;
750  }
751  }
752 
758  public function getTitle() {
759  return $this->mTitle;
760  }
761 
768  public function Title( $x = null ) {
769  return wfSetVar( $this->mTitle, $x );
770  }
771 
777  public function setOutputType( $ot ) {
778  $this->mOutputType = $ot;
779  # Shortcut alias
780  $this->ot = [
781  'html' => $ot == self::OT_HTML,
782  'wiki' => $ot == self::OT_WIKI,
783  'pre' => $ot == self::OT_PREPROCESS,
784  'plain' => $ot == self::OT_PLAIN,
785  ];
786  }
787 
794  public function OutputType( $x = null ) {
795  return wfSetVar( $this->mOutputType, $x );
796  }
797 
803  public function getOutput() {
804  return $this->mOutput;
805  }
806 
812  public function getOptions() {
813  return $this->mOptions;
814  }
815 
822  public function Options( $x = null ) {
823  return wfSetVar( $this->mOptions, $x );
824  }
825 
829  public function nextLinkID() {
830  return $this->mLinkID++;
831  }
832 
836  public function setLinkID( $id ) {
837  $this->mLinkID = $id;
838  }
839 
844  public function getFunctionLang() {
845  return $this->getTargetLanguage();
846  }
847 
857  public function getTargetLanguage() {
858  $target = $this->mOptions->getTargetLanguage();
859 
860  if ( $target !== null ) {
861  return $target;
862  } elseif ( $this->mOptions->getInterfaceMessage() ) {
863  return $this->mOptions->getUserLangObj();
864  } elseif ( is_null( $this->mTitle ) ) {
865  throw new MWException( __METHOD__ . ': $this->mTitle is null' );
866  }
867 
868  return $this->mTitle->getPageLanguage();
869  }
870 
875  public function getConverterLanguage() {
876  return $this->getTargetLanguage();
877  }
878 
885  public function getUser() {
886  if ( !is_null( $this->mUser ) ) {
887  return $this->mUser;
888  }
889  return $this->mOptions->getUser();
890  }
891 
897  public function getPreprocessor() {
898  if ( !isset( $this->mPreprocessor ) ) {
899  $class = $this->mPreprocessorClass;
900  $this->mPreprocessor = new $class( $this );
901  }
902  return $this->mPreprocessor;
903  }
904 
926  public static function extractTagsAndParams( $elements, $text, &$matches, $uniq_prefix = null ) {
927  if ( $uniq_prefix !== null ) {
928  wfDeprecated( __METHOD__ . ' called with $prefix argument', '1.26' );
929  }
930  static $n = 1;
931  $stripped = '';
932  $matches = [];
933 
934  $taglist = implode( '|', $elements );
935  $start = "/<($taglist)(\\s+[^>]*?|\\s*?)(\/?" . ">)|<(!--)/i";
936 
937  while ( $text != '' ) {
938  $p = preg_split( $start, $text, 2, PREG_SPLIT_DELIM_CAPTURE );
939  $stripped .= $p[0];
940  if ( count( $p ) < 5 ) {
941  break;
942  }
943  if ( count( $p ) > 5 ) {
944  # comment
945  $element = $p[4];
946  $attributes = '';
947  $close = '';
948  $inside = $p[5];
949  } else {
950  # tag
951  $element = $p[1];
952  $attributes = $p[2];
953  $close = $p[3];
954  $inside = $p[4];
955  }
956 
957  $marker = self::MARKER_PREFIX . "-$element-" . sprintf( '%08X', $n++ ) . self::MARKER_SUFFIX;
958  $stripped .= $marker;
959 
960  if ( $close === '/>' ) {
961  # Empty element tag, <tag />
962  $content = null;
963  $text = $inside;
964  $tail = null;
965  } else {
966  if ( $element === '!--' ) {
967  $end = '/(-->)/';
968  } else {
969  $end = "/(<\\/$element\\s*>)/i";
970  }
971  $q = preg_split( $end, $inside, 2, PREG_SPLIT_DELIM_CAPTURE );
972  $content = $q[0];
973  if ( count( $q ) < 3 ) {
974  # No end tag -- let it run out to the end of the text.
975  $tail = '';
976  $text = '';
977  } else {
978  $tail = $q[1];
979  $text = $q[2];
980  }
981  }
982 
983  $matches[$marker] = [ $element,
984  $content,
985  Sanitizer::decodeTagAttributes( $attributes ),
986  "<$element$attributes$close$content$tail" ];
987  }
988  return $stripped;
989  }
990 
996  public function getStripList() {
997  return $this->mStripList;
998  }
999 
1009  public function insertStripItem( $text ) {
1010  $marker = self::MARKER_PREFIX . "-item-{$this->mMarkerIndex}-" . self::MARKER_SUFFIX;
1011  $this->mMarkerIndex++;
1012  $this->mStripState->addGeneral( $marker, $text );
1013  return $marker;
1014  }
1015 
1023  public function doTableStuff( $text ) {
1024 
1025  $lines = StringUtils::explode( "\n", $text );
1026  $out = '';
1027  $td_history = []; # Is currently a td tag open?
1028  $last_tag_history = []; # Save history of last lag activated (td, th or caption)
1029  $tr_history = []; # Is currently a tr tag open?
1030  $tr_attributes = []; # history of tr attributes
1031  $has_opened_tr = []; # Did this table open a <tr> element?
1032  $indent_level = 0; # indent level of the table
1033 
1034  foreach ( $lines as $outLine ) {
1035  $line = trim( $outLine );
1036 
1037  if ( $line === '' ) { # empty line, go to next line
1038  $out .= $outLine . "\n";
1039  continue;
1040  }
1041 
1042  $first_character = $line[0];
1043  $first_two = substr( $line, 0, 2 );
1044  $matches = [];
1045 
1046  if ( preg_match( '/^(:*)\s*\{\|(.*)$/', $line, $matches ) ) {
1047  # First check if we are starting a new table
1048  $indent_level = strlen( $matches[1] );
1049 
1050  $attributes = $this->mStripState->unstripBoth( $matches[2] );
1051  $attributes = Sanitizer::fixTagAttributes( $attributes, 'table' );
1052 
1053  $outLine = str_repeat( '<dl><dd>', $indent_level ) . "<table{$attributes}>";
1054  array_push( $td_history, false );
1055  array_push( $last_tag_history, '' );
1056  array_push( $tr_history, false );
1057  array_push( $tr_attributes, '' );
1058  array_push( $has_opened_tr, false );
1059  } elseif ( count( $td_history ) == 0 ) {
1060  # Don't do any of the following
1061  $out .= $outLine . "\n";
1062  continue;
1063  } elseif ( $first_two === '|}' ) {
1064  # We are ending a table
1065  $line = '</table>' . substr( $line, 2 );
1066  $last_tag = array_pop( $last_tag_history );
1067 
1068  if ( !array_pop( $has_opened_tr ) ) {
1069  $line = "<tr><td></td></tr>{$line}";
1070  }
1071 
1072  if ( array_pop( $tr_history ) ) {
1073  $line = "</tr>{$line}";
1074  }
1075 
1076  if ( array_pop( $td_history ) ) {
1077  $line = "</{$last_tag}>{$line}";
1078  }
1079  array_pop( $tr_attributes );
1080  $outLine = $line . str_repeat( '</dd></dl>', $indent_level );
1081  } elseif ( $first_two === '|-' ) {
1082  # Now we have a table row
1083  $line = preg_replace( '#^\|-+#', '', $line );
1084 
1085  # Whats after the tag is now only attributes
1086  $attributes = $this->mStripState->unstripBoth( $line );
1087  $attributes = Sanitizer::fixTagAttributes( $attributes, 'tr' );
1088  array_pop( $tr_attributes );
1089  array_push( $tr_attributes, $attributes );
1090 
1091  $line = '';
1092  $last_tag = array_pop( $last_tag_history );
1093  array_pop( $has_opened_tr );
1094  array_push( $has_opened_tr, true );
1095 
1096  if ( array_pop( $tr_history ) ) {
1097  $line = '</tr>';
1098  }
1099 
1100  if ( array_pop( $td_history ) ) {
1101  $line = "</{$last_tag}>{$line}";
1102  }
1103 
1104  $outLine = $line;
1105  array_push( $tr_history, false );
1106  array_push( $td_history, false );
1107  array_push( $last_tag_history, '' );
1108  } elseif ( $first_character === '|'
1109  || $first_character === '!'
1110  || $first_two === '|+'
1111  ) {
1112  # This might be cell elements, td, th or captions
1113  if ( $first_two === '|+' ) {
1114  $first_character = '+';
1115  $line = substr( $line, 2 );
1116  } else {
1117  $line = substr( $line, 1 );
1118  }
1119 
1120  // Implies both are valid for table headings.
1121  if ( $first_character === '!' ) {
1122  $line = StringUtils::replaceMarkup( '!!', '||', $line );
1123  }
1124 
1125  # Split up multiple cells on the same line.
1126  # FIXME : This can result in improper nesting of tags processed
1127  # by earlier parser steps.
1128  $cells = explode( '||', $line );
1129 
1130  $outLine = '';
1131 
1132  # Loop through each table cell
1133  foreach ( $cells as $cell ) {
1134  $previous = '';
1135  if ( $first_character !== '+' ) {
1136  $tr_after = array_pop( $tr_attributes );
1137  if ( !array_pop( $tr_history ) ) {
1138  $previous = "<tr{$tr_after}>\n";
1139  }
1140  array_push( $tr_history, true );
1141  array_push( $tr_attributes, '' );
1142  array_pop( $has_opened_tr );
1143  array_push( $has_opened_tr, true );
1144  }
1145 
1146  $last_tag = array_pop( $last_tag_history );
1147 
1148  if ( array_pop( $td_history ) ) {
1149  $previous = "</{$last_tag}>\n{$previous}";
1150  }
1151 
1152  if ( $first_character === '|' ) {
1153  $last_tag = 'td';
1154  } elseif ( $first_character === '!' ) {
1155  $last_tag = 'th';
1156  } elseif ( $first_character === '+' ) {
1157  $last_tag = 'caption';
1158  } else {
1159  $last_tag = '';
1160  }
1161 
1162  array_push( $last_tag_history, $last_tag );
1163 
1164  # A cell could contain both parameters and data
1165  $cell_data = explode( '|', $cell, 2 );
1166 
1167  # Bug 553: Note that a '|' inside an invalid link should not
1168  # be mistaken as delimiting cell parameters
1169  if ( strpos( $cell_data[0], '[[' ) !== false ) {
1170  $cell = "{$previous}<{$last_tag}>{$cell}";
1171  } elseif ( count( $cell_data ) == 1 ) {
1172  $cell = "{$previous}<{$last_tag}>{$cell_data[0]}";
1173  } else {
1174  $attributes = $this->mStripState->unstripBoth( $cell_data[0] );
1175  $attributes = Sanitizer::fixTagAttributes( $attributes, $last_tag );
1176  $cell = "{$previous}<{$last_tag}{$attributes}>{$cell_data[1]}";
1177  }
1178 
1179  $outLine .= $cell;
1180  array_push( $td_history, true );
1181  }
1182  }
1183  $out .= $outLine . "\n";
1184  }
1185 
1186  # Closing open td, tr && table
1187  while ( count( $td_history ) > 0 ) {
1188  if ( array_pop( $td_history ) ) {
1189  $out .= "</td>\n";
1190  }
1191  if ( array_pop( $tr_history ) ) {
1192  $out .= "</tr>\n";
1193  }
1194  if ( !array_pop( $has_opened_tr ) ) {
1195  $out .= "<tr><td></td></tr>\n";
1196  }
1197 
1198  $out .= "</table>\n";
1199  }
1200 
1201  # Remove trailing line-ending (b/c)
1202  if ( substr( $out, -1 ) === "\n" ) {
1203  $out = substr( $out, 0, -1 );
1204  }
1205 
1206  # special case: don't return empty table
1207  if ( $out === "<table>\n<tr><td></td></tr>\n</table>" ) {
1208  $out = '';
1209  }
1210 
1211  return $out;
1212  }
1213 
1226  public function internalParse( $text, $isMain = true, $frame = false ) {
1227 
1228  $origText = $text;
1229 
1230  # Hook to suspend the parser in this state
1231  if ( !Hooks::run( 'ParserBeforeInternalParse', [ &$this, &$text, &$this->mStripState ] ) ) {
1232  return $text;
1233  }
1234 
1235  # if $frame is provided, then use $frame for replacing any variables
1236  if ( $frame ) {
1237  # use frame depth to infer how include/noinclude tags should be handled
1238  # depth=0 means this is the top-level document; otherwise it's an included document
1239  if ( !$frame->depth ) {
1240  $flag = 0;
1241  } else {
1242  $flag = Parser::PTD_FOR_INCLUSION;
1243  }
1244  $dom = $this->preprocessToDom( $text, $flag );
1245  $text = $frame->expand( $dom );
1246  } else {
1247  # if $frame is not provided, then use old-style replaceVariables
1248  $text = $this->replaceVariables( $text );
1249  }
1250 
1251  Hooks::run( 'InternalParseBeforeSanitize', [ &$this, &$text, &$this->mStripState ] );
1252  $text = Sanitizer::removeHTMLtags(
1253  $text,
1254  [ &$this, 'attributeStripCallback' ],
1255  false,
1256  array_keys( $this->mTransparentTagHooks )
1257  );
1258  Hooks::run( 'InternalParseBeforeLinks', [ &$this, &$text, &$this->mStripState ] );
1259 
1260  # Tables need to come after variable replacement for things to work
1261  # properly; putting them before other transformations should keep
1262  # exciting things like link expansions from showing up in surprising
1263  # places.
1264  $text = $this->doTableStuff( $text );
1265 
1266  $text = preg_replace( '/(^|\n)-----*/', '\\1<hr />', $text );
1267 
1268  $text = $this->doDoubleUnderscore( $text );
1269 
1270  $text = $this->doHeadings( $text );
1271  $text = $this->replaceInternalLinks( $text );
1272  $text = $this->doAllQuotes( $text );
1273  $text = $this->replaceExternalLinks( $text );
1274 
1275  # replaceInternalLinks may sometimes leave behind
1276  # absolute URLs, which have to be masked to hide them from replaceExternalLinks
1277  $text = str_replace( self::MARKER_PREFIX . 'NOPARSE', '', $text );
1278 
1279  $text = $this->doMagicLinks( $text );
1280  $text = $this->formatHeadings( $text, $origText, $isMain );
1281 
1282  return $text;
1283  }
1284 
1294  private function internalParseHalfParsed( $text, $isMain = true, $linestart = true ) {
1295  $text = $this->mStripState->unstripGeneral( $text );
1296 
1297  if ( $isMain ) {
1298  Hooks::run( 'ParserAfterUnstrip', [ &$this, &$text ] );
1299  }
1300 
1301  # Clean up special characters, only run once, next-to-last before doBlockLevels
1302  $fixtags = [
1303  # french spaces, last one Guillemet-left
1304  # only if there is something before the space
1305  '/(.) (?=\\?|:|;|!|%|\\302\\273)/' => '\\1&#160;',
1306  # french spaces, Guillemet-right
1307  '/(\\302\\253) /' => '\\1&#160;',
1308  '/&#160;(!\s*important)/' => ' \\1', # Beware of CSS magic word !important, bug #11874.
1309  ];
1310  $text = preg_replace( array_keys( $fixtags ), array_values( $fixtags ), $text );
1311 
1312  $text = $this->doBlockLevels( $text, $linestart );
1313 
1314  $this->replaceLinkHolders( $text );
1315 
1323  if ( !( $this->mOptions->getDisableContentConversion()
1324  || isset( $this->mDoubleUnderscores['nocontentconvert'] ) )
1325  ) {
1326  if ( !$this->mOptions->getInterfaceMessage() ) {
1327  # The position of the convert() call should not be changed. it
1328  # assumes that the links are all replaced and the only thing left
1329  # is the <nowiki> mark.
1330  $text = $this->getConverterLanguage()->convert( $text );
1331  }
1332  }
1333 
1334  $text = $this->mStripState->unstripNoWiki( $text );
1335 
1336  if ( $isMain ) {
1337  Hooks::run( 'ParserBeforeTidy', [ &$this, &$text ] );
1338  }
1339 
1340  $text = $this->replaceTransparentTags( $text );
1341  $text = $this->mStripState->unstripGeneral( $text );
1342 
1343  $text = Sanitizer::normalizeCharReferences( $text );
1344 
1345  if ( MWTidy::isEnabled() && $this->mOptions->getTidy() ) {
1346  $text = MWTidy::tidy( $text );
1347  $this->mOutput->addModuleStyles( MWTidy::getModuleStyles() );
1348  } else {
1349  # attempt to sanitize at least some nesting problems
1350  # (bug #2702 and quite a few others)
1351  $tidyregs = [
1352  # ''Something [http://www.cool.com cool''] -->
1353  # <i>Something</i><a href="http://www.cool.com"..><i>cool></i></a>
1354  '/(<([bi])>)(<([bi])>)?([^<]*)(<\/?a[^<]*>)([^<]*)(<\/\\4>)?(<\/\\2>)/' =>
1355  '\\1\\3\\5\\8\\9\\6\\1\\3\\7\\8\\9',
1356  # fix up an anchor inside another anchor, only
1357  # at least for a single single nested link (bug 3695)
1358  '/(<a[^>]+>)([^<]*)(<a[^>]+>[^<]*)<\/a>(.*)<\/a>/' =>
1359  '\\1\\2</a>\\3</a>\\1\\4</a>',
1360  # fix div inside inline elements- doBlockLevels won't wrap a line which
1361  # contains a div, so fix it up here; replace
1362  # div with escaped text
1363  '/(<([aib]) [^>]+>)([^<]*)(<div([^>]*)>)(.*)(<\/div>)([^<]*)(<\/\\2>)/' =>
1364  '\\1\\3&lt;div\\5&gt;\\6&lt;/div&gt;\\8\\9',
1365  # remove empty italic or bold tag pairs, some
1366  # introduced by rules above
1367  '/<([bi])><\/\\1>/' => '',
1368  ];
1369 
1370  $text = preg_replace(
1371  array_keys( $tidyregs ),
1372  array_values( $tidyregs ),
1373  $text );
1374  }
1375 
1376  if ( $isMain ) {
1377  Hooks::run( 'ParserAfterTidy', [ &$this, &$text ] );
1378  }
1379 
1380  return $text;
1381  }
1382 
1394  public function doMagicLinks( $text ) {
1395  $prots = wfUrlProtocolsWithoutProtRel();
1396  $urlChar = self::EXT_LINK_URL_CLASS;
1397  $addr = self::EXT_LINK_ADDR;
1398  $space = self::SPACE_NOT_NL; # non-newline space
1399  $spdash = "(?:-|$space)"; # a dash or a non-newline space
1400  $spaces = "$space++"; # possessive match of 1 or more spaces
1401  $text = preg_replace_callback(
1402  '!(?: # Start cases
1403  (<a[ \t\r\n>].*?</a>) | # m[1]: Skip link text
1404  (<.*?>) | # m[2]: Skip stuff inside
1405  # HTML elements' . "
1406  (\b(?i:$prots)($addr$urlChar*)) | # m[3]: Free external links
1407  # m[4]: Post-protocol path
1408  \b(?:RFC|PMID) $spaces # m[5]: RFC or PMID, capture number
1409  ([0-9]+)\b |
1410  \bISBN $spaces ( # m[6]: ISBN, capture number
1411  (?: 97[89] $spdash? )? # optional 13-digit ISBN prefix
1412  (?: [0-9] $spdash? ){9} # 9 digits with opt. delimiters
1413  [0-9Xx] # check digit
1414  )\b
1415  )!xu", [ &$this, 'magicLinkCallback' ], $text );
1416  return $text;
1417  }
1418 
1424  public function magicLinkCallback( $m ) {
1425  if ( isset( $m[1] ) && $m[1] !== '' ) {
1426  # Skip anchor
1427  return $m[0];
1428  } elseif ( isset( $m[2] ) && $m[2] !== '' ) {
1429  # Skip HTML element
1430  return $m[0];
1431  } elseif ( isset( $m[3] ) && $m[3] !== '' ) {
1432  # Free external link
1433  return $this->makeFreeExternalLink( $m[0], strlen( $m[4] ) );
1434  } elseif ( isset( $m[5] ) && $m[5] !== '' ) {
1435  # RFC or PMID
1436  if ( substr( $m[0], 0, 3 ) === 'RFC' ) {
1437  $keyword = 'RFC';
1438  $urlmsg = 'rfcurl';
1439  $cssClass = 'mw-magiclink-rfc';
1440  $id = $m[5];
1441  } elseif ( substr( $m[0], 0, 4 ) === 'PMID' ) {
1442  $keyword = 'PMID';
1443  $urlmsg = 'pubmedurl';
1444  $cssClass = 'mw-magiclink-pmid';
1445  $id = $m[5];
1446  } else {
1447  throw new MWException( __METHOD__ . ': unrecognised match type "' .
1448  substr( $m[0], 0, 20 ) . '"' );
1449  }
1450  $url = wfMessage( $urlmsg, $id )->inContentLanguage()->text();
1451  return Linker::makeExternalLink( $url, "{$keyword} {$id}", true, $cssClass );
1452  } elseif ( isset( $m[6] ) && $m[6] !== '' ) {
1453  # ISBN
1454  $isbn = $m[6];
1455  $space = self::SPACE_NOT_NL; # non-newline space
1456  $isbn = preg_replace( "/$space/", ' ', $isbn );
1457  $num = strtr( $isbn, [
1458  '-' => '',
1459  ' ' => '',
1460  'x' => 'X',
1461  ] );
1462  $titleObj = SpecialPage::getTitleFor( 'Booksources', $num );
1463  return '<a href="' .
1464  htmlspecialchars( $titleObj->getLocalURL() ) .
1465  "\" class=\"internal mw-magiclink-isbn\">ISBN $isbn</a>";
1466  } else {
1467  return $m[0];
1468  }
1469  }
1470 
1480  public function makeFreeExternalLink( $url, $numPostProto ) {
1481  $trail = '';
1482 
1483  # The characters '<' and '>' (which were escaped by
1484  # removeHTMLtags()) should not be included in
1485  # URLs, per RFC 2396.
1486  # Make &nbsp; terminate a URL as well (bug T84937)
1487  $m2 = [];
1488  if ( preg_match(
1489  '/&(lt|gt|nbsp|#x0*(3[CcEe]|[Aa]0)|#0*(60|62|160));/',
1490  $url,
1491  $m2,
1492  PREG_OFFSET_CAPTURE
1493  ) ) {
1494  $trail = substr( $url, $m2[0][1] ) . $trail;
1495  $url = substr( $url, 0, $m2[0][1] );
1496  }
1497 
1498  # Move trailing punctuation to $trail
1499  $sep = ',;\.:!?';
1500  # If there is no left bracket, then consider right brackets fair game too
1501  if ( strpos( $url, '(' ) === false ) {
1502  $sep .= ')';
1503  }
1504 
1505  $urlRev = strrev( $url );
1506  $numSepChars = strspn( $urlRev, $sep );
1507  # Don't break a trailing HTML entity by moving the ; into $trail
1508  # This is in hot code, so use substr_compare to avoid having to
1509  # create a new string object for the comparison
1510  if ( $numSepChars && substr_compare( $url, ";", -$numSepChars, 1 ) === 0 ) {
1511  # more optimization: instead of running preg_match with a $
1512  # anchor, which can be slow, do the match on the reversed
1513  # string starting at the desired offset.
1514  # un-reversed regexp is: /&([a-z]+|#x[\da-f]+|#\d+)$/i
1515  if ( preg_match( '/\G([a-z]+|[\da-f]+x#|\d+#)&/i', $urlRev, $m2, 0, $numSepChars ) ) {
1516  $numSepChars--;
1517  }
1518  }
1519  if ( $numSepChars ) {
1520  $trail = substr( $url, -$numSepChars ) . $trail;
1521  $url = substr( $url, 0, -$numSepChars );
1522  }
1523 
1524  # Verify that we still have a real URL after trail removal, and
1525  # not just lone protocol
1526  if ( strlen( $trail ) >= $numPostProto ) {
1527  return $url . $trail;
1528  }
1529 
1530  $url = Sanitizer::cleanUrl( $url );
1531 
1532  # Is this an external image?
1533  $text = $this->maybeMakeExternalImage( $url );
1534  if ( $text === false ) {
1535  # Not an image, make a link
1536  $text = Linker::makeExternalLink( $url,
1537  $this->getConverterLanguage()->markNoConversion( $url, true ),
1538  true, 'free',
1539  $this->getExternalLinkAttribs( $url ) );
1540  # Register it in the output object...
1541  $this->mOutput->addExternalLink( $url );
1542  }
1543  return $text . $trail;
1544  }
1545 
1555  public function doHeadings( $text ) {
1556  for ( $i = 6; $i >= 1; --$i ) {
1557  $h = str_repeat( '=', $i );
1558  $text = preg_replace( "/^$h(.+)$h\\s*$/m", "<h$i>\\1</h$i>", $text );
1559  }
1560  return $text;
1561  }
1562 
1571  public function doAllQuotes( $text ) {
1572  $outtext = '';
1573  $lines = StringUtils::explode( "\n", $text );
1574  foreach ( $lines as $line ) {
1575  $outtext .= $this->doQuotes( $line ) . "\n";
1576  }
1577  $outtext = substr( $outtext, 0, -1 );
1578  return $outtext;
1579  }
1580 
1588  public function doQuotes( $text ) {
1589  $arr = preg_split( "/(''+)/", $text, -1, PREG_SPLIT_DELIM_CAPTURE );
1590  $countarr = count( $arr );
1591  if ( $countarr == 1 ) {
1592  return $text;
1593  }
1594 
1595  // First, do some preliminary work. This may shift some apostrophes from
1596  // being mark-up to being text. It also counts the number of occurrences
1597  // of bold and italics mark-ups.
1598  $numbold = 0;
1599  $numitalics = 0;
1600  for ( $i = 1; $i < $countarr; $i += 2 ) {
1601  $thislen = strlen( $arr[$i] );
1602  // If there are ever four apostrophes, assume the first is supposed to
1603  // be text, and the remaining three constitute mark-up for bold text.
1604  // (bug 13227: ''''foo'''' turns into ' ''' foo ' ''')
1605  if ( $thislen == 4 ) {
1606  $arr[$i - 1] .= "'";
1607  $arr[$i] = "'''";
1608  $thislen = 3;
1609  } elseif ( $thislen > 5 ) {
1610  // If there are more than 5 apostrophes in a row, assume they're all
1611  // text except for the last 5.
1612  // (bug 13227: ''''''foo'''''' turns into ' ''''' foo ' ''''')
1613  $arr[$i - 1] .= str_repeat( "'", $thislen - 5 );
1614  $arr[$i] = "'''''";
1615  $thislen = 5;
1616  }
1617  // Count the number of occurrences of bold and italics mark-ups.
1618  if ( $thislen == 2 ) {
1619  $numitalics++;
1620  } elseif ( $thislen == 3 ) {
1621  $numbold++;
1622  } elseif ( $thislen == 5 ) {
1623  $numitalics++;
1624  $numbold++;
1625  }
1626  }
1627 
1628  // If there is an odd number of both bold and italics, it is likely
1629  // that one of the bold ones was meant to be an apostrophe followed
1630  // by italics. Which one we cannot know for certain, but it is more
1631  // likely to be one that has a single-letter word before it.
1632  if ( ( $numbold % 2 == 1 ) && ( $numitalics % 2 == 1 ) ) {
1633  $firstsingleletterword = -1;
1634  $firstmultiletterword = -1;
1635  $firstspace = -1;
1636  for ( $i = 1; $i < $countarr; $i += 2 ) {
1637  if ( strlen( $arr[$i] ) == 3 ) {
1638  $x1 = substr( $arr[$i - 1], -1 );
1639  $x2 = substr( $arr[$i - 1], -2, 1 );
1640  if ( $x1 === ' ' ) {
1641  if ( $firstspace == -1 ) {
1642  $firstspace = $i;
1643  }
1644  } elseif ( $x2 === ' ' ) {
1645  $firstsingleletterword = $i;
1646  // if $firstsingleletterword is set, we don't
1647  // look at the other options, so we can bail early.
1648  break;
1649  } else {
1650  if ( $firstmultiletterword == -1 ) {
1651  $firstmultiletterword = $i;
1652  }
1653  }
1654  }
1655  }
1656 
1657  // If there is a single-letter word, use it!
1658  if ( $firstsingleletterword > -1 ) {
1659  $arr[$firstsingleletterword] = "''";
1660  $arr[$firstsingleletterword - 1] .= "'";
1661  } elseif ( $firstmultiletterword > -1 ) {
1662  // If not, but there's a multi-letter word, use that one.
1663  $arr[$firstmultiletterword] = "''";
1664  $arr[$firstmultiletterword - 1] .= "'";
1665  } elseif ( $firstspace > -1 ) {
1666  // ... otherwise use the first one that has neither.
1667  // (notice that it is possible for all three to be -1 if, for example,
1668  // there is only one pentuple-apostrophe in the line)
1669  $arr[$firstspace] = "''";
1670  $arr[$firstspace - 1] .= "'";
1671  }
1672  }
1673 
1674  // Now let's actually convert our apostrophic mush to HTML!
1675  $output = '';
1676  $buffer = '';
1677  $state = '';
1678  $i = 0;
1679  foreach ( $arr as $r ) {
1680  if ( ( $i % 2 ) == 0 ) {
1681  if ( $state === 'both' ) {
1682  $buffer .= $r;
1683  } else {
1684  $output .= $r;
1685  }
1686  } else {
1687  $thislen = strlen( $r );
1688  if ( $thislen == 2 ) {
1689  if ( $state === 'i' ) {
1690  $output .= '</i>';
1691  $state = '';
1692  } elseif ( $state === 'bi' ) {
1693  $output .= '</i>';
1694  $state = 'b';
1695  } elseif ( $state === 'ib' ) {
1696  $output .= '</b></i><b>';
1697  $state = 'b';
1698  } elseif ( $state === 'both' ) {
1699  $output .= '<b><i>' . $buffer . '</i>';
1700  $state = 'b';
1701  } else { // $state can be 'b' or ''
1702  $output .= '<i>';
1703  $state .= 'i';
1704  }
1705  } elseif ( $thislen == 3 ) {
1706  if ( $state === 'b' ) {
1707  $output .= '</b>';
1708  $state = '';
1709  } elseif ( $state === 'bi' ) {
1710  $output .= '</i></b><i>';
1711  $state = 'i';
1712  } elseif ( $state === 'ib' ) {
1713  $output .= '</b>';
1714  $state = 'i';
1715  } elseif ( $state === 'both' ) {
1716  $output .= '<i><b>' . $buffer . '</b>';
1717  $state = 'i';
1718  } else { // $state can be 'i' or ''
1719  $output .= '<b>';
1720  $state .= 'b';
1721  }
1722  } elseif ( $thislen == 5 ) {
1723  if ( $state === 'b' ) {
1724  $output .= '</b><i>';
1725  $state = 'i';
1726  } elseif ( $state === 'i' ) {
1727  $output .= '</i><b>';
1728  $state = 'b';
1729  } elseif ( $state === 'bi' ) {
1730  $output .= '</i></b>';
1731  $state = '';
1732  } elseif ( $state === 'ib' ) {
1733  $output .= '</b></i>';
1734  $state = '';
1735  } elseif ( $state === 'both' ) {
1736  $output .= '<i><b>' . $buffer . '</b></i>';
1737  $state = '';
1738  } else { // ($state == '')
1739  $buffer = '';
1740  $state = 'both';
1741  }
1742  }
1743  }
1744  $i++;
1745  }
1746  // Now close all remaining tags. Notice that the order is important.
1747  if ( $state === 'b' || $state === 'ib' ) {
1748  $output .= '</b>';
1749  }
1750  if ( $state === 'i' || $state === 'bi' || $state === 'ib' ) {
1751  $output .= '</i>';
1752  }
1753  if ( $state === 'bi' ) {
1754  $output .= '</b>';
1755  }
1756  // There might be lonely ''''', so make sure we have a buffer
1757  if ( $state === 'both' && $buffer ) {
1758  $output .= '<b><i>' . $buffer . '</i></b>';
1759  }
1760  return $output;
1761  }
1762 
1776  public function replaceExternalLinks( $text ) {
1777 
1778  $bits = preg_split( $this->mExtLinkBracketedRegex, $text, -1, PREG_SPLIT_DELIM_CAPTURE );
1779  if ( $bits === false ) {
1780  throw new MWException( "PCRE needs to be compiled with "
1781  . "--enable-unicode-properties in order for MediaWiki to function" );
1782  }
1783  $s = array_shift( $bits );
1784 
1785  $i = 0;
1786  while ( $i < count( $bits ) ) {
1787  $url = $bits[$i++];
1788  $i++; // protocol
1789  $text = $bits[$i++];
1790  $trail = $bits[$i++];
1791 
1792  # The characters '<' and '>' (which were escaped by
1793  # removeHTMLtags()) should not be included in
1794  # URLs, per RFC 2396.
1795  $m2 = [];
1796  if ( preg_match( '/&(lt|gt);/', $url, $m2, PREG_OFFSET_CAPTURE ) ) {
1797  $text = substr( $url, $m2[0][1] ) . ' ' . $text;
1798  $url = substr( $url, 0, $m2[0][1] );
1799  }
1800 
1801  # If the link text is an image URL, replace it with an <img> tag
1802  # This happened by accident in the original parser, but some people used it extensively
1803  $img = $this->maybeMakeExternalImage( $text );
1804  if ( $img !== false ) {
1805  $text = $img;
1806  }
1807 
1808  $dtrail = '';
1809 
1810  # Set linktype for CSS - if URL==text, link is essentially free
1811  $linktype = ( $text === $url ) ? 'free' : 'text';
1812 
1813  # No link text, e.g. [http://domain.tld/some.link]
1814  if ( $text == '' ) {
1815  # Autonumber
1816  $langObj = $this->getTargetLanguage();
1817  $text = '[' . $langObj->formatNum( ++$this->mAutonumber ) . ']';
1818  $linktype = 'autonumber';
1819  } else {
1820  # Have link text, e.g. [http://domain.tld/some.link text]s
1821  # Check for trail
1822  list( $dtrail, $trail ) = Linker::splitTrail( $trail );
1823  }
1824 
1825  $text = $this->getConverterLanguage()->markNoConversion( $text );
1826 
1827  $url = Sanitizer::cleanUrl( $url );
1828 
1829  # Use the encoded URL
1830  # This means that users can paste URLs directly into the text
1831  # Funny characters like ö aren't valid in URLs anyway
1832  # This was changed in August 2004
1833  $s .= Linker::makeExternalLink( $url, $text, false, $linktype,
1834  $this->getExternalLinkAttribs( $url ) ) . $dtrail . $trail;
1835 
1836  # Register link in the output object.
1837  $this->mOutput->addExternalLink( $url );
1838  }
1839 
1840  return $s;
1841  }
1842 
1852  public static function getExternalLinkRel( $url = false, $title = null ) {
1853  global $wgNoFollowLinks, $wgNoFollowNsExceptions, $wgNoFollowDomainExceptions;
1854  $ns = $title ? $title->getNamespace() : false;
1855  if ( $wgNoFollowLinks && !in_array( $ns, $wgNoFollowNsExceptions )
1856  && !wfMatchesDomainList( $url, $wgNoFollowDomainExceptions )
1857  ) {
1858  return 'nofollow';
1859  }
1860  return null;
1861  }
1862 
1873  public function getExternalLinkAttribs( $url = false ) {
1874  $attribs = [];
1875  $rel = self::getExternalLinkRel( $url, $this->mTitle );
1876 
1877  $target = $this->mOptions->getExternalLinkTarget();
1878  if ( $target ) {
1879  $attribs['target'] = $target;
1880  if ( !in_array( $target, [ '_self', '_parent', '_top' ] ) ) {
1881  // T133507. New windows can navigate parent cross-origin.
1882  // Including noreferrer due to lacking browser
1883  // support of noopener. Eventually noreferrer should be removed.
1884  if ( $rel !== '' ) {
1885  $rel .= ' ';
1886  }
1887  $rel .= 'noreferrer noopener';
1888  }
1889  }
1890  $attribs['rel'] = $rel;
1891  return $attribs;
1892  }
1893 
1901  public static function replaceUnusualEscapes( $url ) {
1902  wfDeprecated( __METHOD__, '1.24' );
1903  return self::normalizeLinkUrl( $url );
1904  }
1905 
1915  public static function normalizeLinkUrl( $url ) {
1916  # First, make sure unsafe characters are encoded
1917  $url = preg_replace_callback( '/[\x00-\x20"<>\[\\\\\]^`{|}\x7F-\xFF]/',
1918  function ( $m ) {
1919  return rawurlencode( $m[0] );
1920  },
1921  $url
1922  );
1923 
1924  $ret = '';
1925  $end = strlen( $url );
1926 
1927  # Fragment part - 'fragment'
1928  $start = strpos( $url, '#' );
1929  if ( $start !== false && $start < $end ) {
1930  $ret = self::normalizeUrlComponent(
1931  substr( $url, $start, $end - $start ), '"#%<>[\]^`{|}' ) . $ret;
1932  $end = $start;
1933  }
1934 
1935  # Query part - 'query' minus &=+;
1936  $start = strpos( $url, '?' );
1937  if ( $start !== false && $start < $end ) {
1938  $ret = self::normalizeUrlComponent(
1939  substr( $url, $start, $end - $start ), '"#%<>[\]^`{|}&=+;' ) . $ret;
1940  $end = $start;
1941  }
1942 
1943  # Scheme and path part - 'pchar'
1944  # (we assume no userinfo or encoded colons in the host)
1945  $ret = self::normalizeUrlComponent(
1946  substr( $url, 0, $end ), '"#%<>[\]^`{|}/?' ) . $ret;
1947 
1948  return $ret;
1949  }
1950 
1951  private static function normalizeUrlComponent( $component, $unsafe ) {
1952  $callback = function ( $matches ) use ( $unsafe ) {
1953  $char = urldecode( $matches[0] );
1954  $ord = ord( $char );
1955  if ( $ord > 32 && $ord < 127 && strpos( $unsafe, $char ) === false ) {
1956  # Unescape it
1957  return $char;
1958  } else {
1959  # Leave it escaped, but use uppercase for a-f
1960  return strtoupper( $matches[0] );
1961  }
1962  };
1963  return preg_replace_callback( '/%[0-9A-Fa-f]{2}/', $callback, $component );
1964  }
1965 
1974  private function maybeMakeExternalImage( $url ) {
1975  $imagesfrom = $this->mOptions->getAllowExternalImagesFrom();
1976  $imagesexception = !empty( $imagesfrom );
1977  $text = false;
1978  # $imagesfrom could be either a single string or an array of strings, parse out the latter
1979  if ( $imagesexception && is_array( $imagesfrom ) ) {
1980  $imagematch = false;
1981  foreach ( $imagesfrom as $match ) {
1982  if ( strpos( $url, $match ) === 0 ) {
1983  $imagematch = true;
1984  break;
1985  }
1986  }
1987  } elseif ( $imagesexception ) {
1988  $imagematch = ( strpos( $url, $imagesfrom ) === 0 );
1989  } else {
1990  $imagematch = false;
1991  }
1992 
1993  if ( $this->mOptions->getAllowExternalImages()
1994  || ( $imagesexception && $imagematch )
1995  ) {
1996  if ( preg_match( self::EXT_IMAGE_REGEX, $url ) ) {
1997  # Image found
1998  $text = Linker::makeExternalImage( $url );
1999  }
2000  }
2001  if ( !$text && $this->mOptions->getEnableImageWhitelist()
2002  && preg_match( self::EXT_IMAGE_REGEX, $url )
2003  ) {
2004  $whitelist = explode(
2005  "\n",
2006  wfMessage( 'external_image_whitelist' )->inContentLanguage()->text()
2007  );
2008 
2009  foreach ( $whitelist as $entry ) {
2010  # Sanitize the regex fragment, make it case-insensitive, ignore blank entries/comments
2011  if ( strpos( $entry, '#' ) === 0 || $entry === '' ) {
2012  continue;
2013  }
2014  if ( preg_match( '/' . str_replace( '/', '\\/', $entry ) . '/i', $url ) ) {
2015  # Image matches a whitelist entry
2016  $text = Linker::makeExternalImage( $url );
2017  break;
2018  }
2019  }
2020  }
2021  return $text;
2022  }
2023 
2033  public function replaceInternalLinks( $s ) {
2034  $this->mLinkHolders->merge( $this->replaceInternalLinks2( $s ) );
2035  return $s;
2036  }
2037 
2046  public function replaceInternalLinks2( &$s ) {
2048 
2049  static $tc = false, $e1, $e1_img;
2050  # the % is needed to support urlencoded titles as well
2051  if ( !$tc ) {
2052  $tc = Title::legalChars() . '#%';
2053  # Match a link having the form [[namespace:link|alternate]]trail
2054  $e1 = "/^([{$tc}]+)(?:\\|(.+?))?]](.*)\$/sD";
2055  # Match cases where there is no "]]", which might still be images
2056  $e1_img = "/^([{$tc}]+)\\|(.*)\$/sD";
2057  }
2058 
2059  $holders = new LinkHolderArray( $this );
2060 
2061  # split the entire text string on occurrences of [[
2062  $a = StringUtils::explode( '[[', ' ' . $s );
2063  # get the first element (all text up to first [[), and remove the space we added
2064  $s = $a->current();
2065  $a->next();
2066  $line = $a->current(); # Workaround for broken ArrayIterator::next() that returns "void"
2067  $s = substr( $s, 1 );
2068 
2069  $useLinkPrefixExtension = $this->getTargetLanguage()->linkPrefixExtension();
2070  $e2 = null;
2071  if ( $useLinkPrefixExtension ) {
2072  # Match the end of a line for a word that's not followed by whitespace,
2073  # e.g. in the case of 'The Arab al[[Razi]]', 'al' will be matched
2075  $charset = $wgContLang->linkPrefixCharset();
2076  $e2 = "/^((?>.*[^$charset]|))(.+)$/sDu";
2077  }
2078 
2079  if ( is_null( $this->mTitle ) ) {
2080  throw new MWException( __METHOD__ . ": \$this->mTitle is null\n" );
2081  }
2082  $nottalk = !$this->mTitle->isTalkPage();
2083 
2084  if ( $useLinkPrefixExtension ) {
2085  $m = [];
2086  if ( preg_match( $e2, $s, $m ) ) {
2087  $first_prefix = $m[2];
2088  } else {
2089  $first_prefix = false;
2090  }
2091  } else {
2092  $prefix = '';
2093  }
2094 
2095  $useSubpages = $this->areSubpagesAllowed();
2096 
2097  // @codingStandardsIgnoreStart Squiz.WhiteSpace.SemicolonSpacing.Incorrect
2098  # Loop for each link
2099  for ( ; $line !== false && $line !== null; $a->next(), $line = $a->current() ) {
2100  // @codingStandardsIgnoreEnd
2101 
2102  # Check for excessive memory usage
2103  if ( $holders->isBig() ) {
2104  # Too big
2105  # Do the existence check, replace the link holders and clear the array
2106  $holders->replace( $s );
2107  $holders->clear();
2108  }
2109 
2110  if ( $useLinkPrefixExtension ) {
2111  if ( preg_match( $e2, $s, $m ) ) {
2112  $prefix = $m[2];
2113  $s = $m[1];
2114  } else {
2115  $prefix = '';
2116  }
2117  # first link
2118  if ( $first_prefix ) {
2119  $prefix = $first_prefix;
2120  $first_prefix = false;
2121  }
2122  }
2123 
2124  $might_be_img = false;
2125 
2126  if ( preg_match( $e1, $line, $m ) ) { # page with normal text or alt
2127  $text = $m[2];
2128  # If we get a ] at the beginning of $m[3] that means we have a link that's something like:
2129  # [[Image:Foo.jpg|[http://example.com desc]]] <- having three ] in a row fucks up,
2130  # the real problem is with the $e1 regex
2131  # See bug 1300.
2132  # Still some problems for cases where the ] is meant to be outside punctuation,
2133  # and no image is in sight. See bug 2095.
2134  if ( $text !== ''
2135  && substr( $m[3], 0, 1 ) === ']'
2136  && strpos( $text, '[' ) !== false
2137  ) {
2138  $text .= ']'; # so that replaceExternalLinks($text) works later
2139  $m[3] = substr( $m[3], 1 );
2140  }
2141  # fix up urlencoded title texts
2142  if ( strpos( $m[1], '%' ) !== false ) {
2143  # Should anchors '#' also be rejected?
2144  $m[1] = str_replace( [ '<', '>' ], [ '&lt;', '&gt;' ], rawurldecode( $m[1] ) );
2145  }
2146  $trail = $m[3];
2147  } elseif ( preg_match( $e1_img, $line, $m ) ) {
2148  # Invalid, but might be an image with a link in its caption
2149  $might_be_img = true;
2150  $text = $m[2];
2151  if ( strpos( $m[1], '%' ) !== false ) {
2152  $m[1] = str_replace( [ '<', '>' ], [ '&lt;', '&gt;' ], rawurldecode( $m[1] ) );
2153  }
2154  $trail = "";
2155  } else { # Invalid form; output directly
2156  $s .= $prefix . '[[' . $line;
2157  continue;
2158  }
2159 
2160  $origLink = $m[1];
2161 
2162  # Don't allow internal links to pages containing
2163  # PROTO: where PROTO is a valid URL protocol; these
2164  # should be external links.
2165  if ( preg_match( '/^(?i:' . $this->mUrlProtocols . ')/', $origLink ) ) {
2166  $s .= $prefix . '[[' . $line;
2167  continue;
2168  }
2169 
2170  # Make subpage if necessary
2171  if ( $useSubpages ) {
2172  $link = $this->maybeDoSubpageLink( $origLink, $text );
2173  } else {
2174  $link = $origLink;
2175  }
2176 
2177  $noforce = ( substr( $origLink, 0, 1 ) !== ':' );
2178  if ( !$noforce ) {
2179  # Strip off leading ':'
2180  $link = substr( $link, 1 );
2181  }
2182 
2183  $unstrip = $this->mStripState->unstripNoWiki( $link );
2184  $nt = is_string( $unstrip ) ? Title::newFromText( $unstrip ) : null;
2185  if ( $nt === null ) {
2186  $s .= $prefix . '[[' . $line;
2187  continue;
2188  }
2189 
2190  $ns = $nt->getNamespace();
2191  $iw = $nt->getInterwiki();
2192 
2193  if ( $might_be_img ) { # if this is actually an invalid link
2194  if ( $ns == NS_FILE && $noforce ) { # but might be an image
2195  $found = false;
2196  while ( true ) {
2197  # look at the next 'line' to see if we can close it there
2198  $a->next();
2199  $next_line = $a->current();
2200  if ( $next_line === false || $next_line === null ) {
2201  break;
2202  }
2203  $m = explode( ']]', $next_line, 3 );
2204  if ( count( $m ) == 3 ) {
2205  # the first ]] closes the inner link, the second the image
2206  $found = true;
2207  $text .= "[[{$m[0]}]]{$m[1]}";
2208  $trail = $m[2];
2209  break;
2210  } elseif ( count( $m ) == 2 ) {
2211  # if there's exactly one ]] that's fine, we'll keep looking
2212  $text .= "[[{$m[0]}]]{$m[1]}";
2213  } else {
2214  # if $next_line is invalid too, we need look no further
2215  $text .= '[[' . $next_line;
2216  break;
2217  }
2218  }
2219  if ( !$found ) {
2220  # we couldn't find the end of this imageLink, so output it raw
2221  # but don't ignore what might be perfectly normal links in the text we've examined
2222  $holders->merge( $this->replaceInternalLinks2( $text ) );
2223  $s .= "{$prefix}[[$link|$text";
2224  # note: no $trail, because without an end, there *is* no trail
2225  continue;
2226  }
2227  } else { # it's not an image, so output it raw
2228  $s .= "{$prefix}[[$link|$text";
2229  # note: no $trail, because without an end, there *is* no trail
2230  continue;
2231  }
2232  }
2233 
2234  $wasblank = ( $text == '' );
2235  if ( $wasblank ) {
2236  $text = $link;
2237  } else {
2238  # Bug 4598 madness. Handle the quotes only if they come from the alternate part
2239  # [[Lista d''e paise d''o munno]] -> <a href="...">Lista d''e paise d''o munno</a>
2240  # [[Criticism of Harry Potter|Criticism of ''Harry Potter'']]
2241  # -> <a href="Criticism of Harry Potter">Criticism of <i>Harry Potter</i></a>
2242  $text = $this->doQuotes( $text );
2243  }
2244 
2245  # Link not escaped by : , create the various objects
2246  if ( $noforce && !$nt->wasLocalInterwiki() ) {
2247  # Interwikis
2248  if (
2249  $iw && $this->mOptions->getInterwikiMagic() && $nottalk && (
2250  Language::fetchLanguageName( $iw, null, 'mw' ) ||
2251  in_array( $iw, $wgExtraInterlanguageLinkPrefixes )
2252  )
2253  ) {
2254  # Bug 24502: filter duplicates
2255  if ( !isset( $this->mLangLinkLanguages[$iw] ) ) {
2256  $this->mLangLinkLanguages[$iw] = true;
2257  $this->mOutput->addLanguageLink( $nt->getFullText() );
2258  }
2259 
2260  $s = rtrim( $s . $prefix );
2261  $s .= trim( $trail, "\n" ) == '' ? '': $prefix . $trail;
2262  continue;
2263  }
2264 
2265  if ( $ns == NS_FILE ) {
2266  if ( !wfIsBadImage( $nt->getDBkey(), $this->mTitle ) ) {
2267  if ( $wasblank ) {
2268  # if no parameters were passed, $text
2269  # becomes something like "File:Foo.png",
2270  # which we don't want to pass on to the
2271  # image generator
2272  $text = '';
2273  } else {
2274  # recursively parse links inside the image caption
2275  # actually, this will parse them in any other parameters, too,
2276  # but it might be hard to fix that, and it doesn't matter ATM
2277  $text = $this->replaceExternalLinks( $text );
2278  $holders->merge( $this->replaceInternalLinks2( $text ) );
2279  }
2280  # cloak any absolute URLs inside the image markup, so replaceExternalLinks() won't touch them
2281  $s .= $prefix . $this->armorLinks(
2282  $this->makeImage( $nt, $text, $holders ) ) . $trail;
2283  } else {
2284  $s .= $prefix . $trail;
2285  }
2286  continue;
2287  }
2288 
2289  if ( $ns == NS_CATEGORY ) {
2290  $s = rtrim( $s . "\n" ); # bug 87
2291 
2292  if ( $wasblank ) {
2293  $sortkey = $this->getDefaultSort();
2294  } else {
2295  $sortkey = $text;
2296  }
2297  $sortkey = Sanitizer::decodeCharReferences( $sortkey );
2298  $sortkey = str_replace( "\n", '', $sortkey );
2299  $sortkey = $this->getConverterLanguage()->convertCategoryKey( $sortkey );
2300  $this->mOutput->addCategory( $nt->getDBkey(), $sortkey );
2301 
2305  $s .= trim( $prefix . $trail, "\n" ) == '' ? '' : $prefix . $trail;
2306 
2307  continue;
2308  }
2309  }
2310 
2311  # Self-link checking. For some languages, variants of the title are checked in
2312  # LinkHolderArray::doVariants() to allow batching the existence checks necessary
2313  # for linking to a different variant.
2314  if ( $ns != NS_SPECIAL && $nt->equals( $this->mTitle ) && !$nt->hasFragment() ) {
2315  $s .= $prefix . Linker::makeSelfLinkObj( $nt, $text, '', $trail );
2316  continue;
2317  }
2318 
2319  # NS_MEDIA is a pseudo-namespace for linking directly to a file
2320  # @todo FIXME: Should do batch file existence checks, see comment below
2321  if ( $ns == NS_MEDIA ) {
2322  # Give extensions a chance to select the file revision for us
2323  $options = [];
2324  $descQuery = false;
2325  Hooks::run( 'BeforeParserFetchFileAndTitle',
2326  [ $this, $nt, &$options, &$descQuery ] );
2327  # Fetch and register the file (file title may be different via hooks)
2328  list( $file, $nt ) = $this->fetchFileAndTitle( $nt, $options );
2329  # Cloak with NOPARSE to avoid replacement in replaceExternalLinks
2330  $s .= $prefix . $this->armorLinks(
2331  Linker::makeMediaLinkFile( $nt, $file, $text ) ) . $trail;
2332  continue;
2333  }
2334 
2335  # Some titles, such as valid special pages or files in foreign repos, should
2336  # be shown as bluelinks even though they're not included in the page table
2337  # @todo FIXME: isAlwaysKnown() can be expensive for file links; we should really do
2338  # batch file existence checks for NS_FILE and NS_MEDIA
2339  if ( $iw == '' && $nt->isAlwaysKnown() ) {
2340  $this->mOutput->addLink( $nt );
2341  $s .= $this->makeKnownLinkHolder( $nt, $text, [], $trail, $prefix );
2342  } else {
2343  # Links will be added to the output link list after checking
2344  $s .= $holders->makeHolder( $nt, $text, [], $trail, $prefix );
2345  }
2346  }
2347  return $holders;
2348  }
2349 
2364  public function makeKnownLinkHolder( $nt, $text = '', $query = [], $trail = '', $prefix = '' ) {
2365  list( $inside, $trail ) = Linker::splitTrail( $trail );
2366 
2367  if ( is_string( $query ) ) {
2368  $query = wfCgiToArray( $query );
2369  }
2370  if ( $text == '' ) {
2371  $text = htmlspecialchars( $nt->getPrefixedText() );
2372  }
2373 
2374  $link = Linker::linkKnown( $nt, "$prefix$text$inside", [], $query );
2375 
2376  return $this->armorLinks( $link ) . $trail;
2377  }
2378 
2389  public function armorLinks( $text ) {
2390  return preg_replace( '/\b((?i)' . $this->mUrlProtocols . ')/',
2391  self::MARKER_PREFIX . "NOPARSE$1", $text );
2392  }
2393 
2398  public function areSubpagesAllowed() {
2399  # Some namespaces don't allow subpages
2400  return MWNamespace::hasSubpages( $this->mTitle->getNamespace() );
2401  }
2402 
2411  public function maybeDoSubpageLink( $target, &$text ) {
2412  return Linker::normalizeSubpageLink( $this->mTitle, $target, $text );
2413  }
2414 
2421  public function closeParagraph() {
2422  $result = '';
2423  if ( $this->mLastSection != '' ) {
2424  $result = '</' . $this->mLastSection . ">\n";
2425  }
2426  $this->mInPre = false;
2427  $this->mLastSection = '';
2428  return $result;
2429  }
2430 
2441  public function getCommon( $st1, $st2 ) {
2442  $fl = strlen( $st1 );
2443  $shorter = strlen( $st2 );
2444  if ( $fl < $shorter ) {
2445  $shorter = $fl;
2446  }
2447 
2448  for ( $i = 0; $i < $shorter; ++$i ) {
2449  if ( $st1[$i] != $st2[$i] ) {
2450  break;
2451  }
2452  }
2453  return $i;
2454  }
2455 
2465  public function openList( $char ) {
2466  $result = $this->closeParagraph();
2467 
2468  if ( '*' === $char ) {
2469  $result .= "<ul><li>";
2470  } elseif ( '#' === $char ) {
2471  $result .= "<ol><li>";
2472  } elseif ( ':' === $char ) {
2473  $result .= "<dl><dd>";
2474  } elseif ( ';' === $char ) {
2475  $result .= "<dl><dt>";
2476  $this->mDTopen = true;
2477  } else {
2478  $result = '<!-- ERR 1 -->';
2479  }
2480 
2481  return $result;
2482  }
2483 
2491  public function nextItem( $char ) {
2492  if ( '*' === $char || '#' === $char ) {
2493  return "</li>\n<li>";
2494  } elseif ( ':' === $char || ';' === $char ) {
2495  $close = "</dd>\n";
2496  if ( $this->mDTopen ) {
2497  $close = "</dt>\n";
2498  }
2499  if ( ';' === $char ) {
2500  $this->mDTopen = true;
2501  return $close . '<dt>';
2502  } else {
2503  $this->mDTopen = false;
2504  return $close . '<dd>';
2505  }
2506  }
2507  return '<!-- ERR 2 -->';
2508  }
2509 
2517  public function closeList( $char ) {
2518  if ( '*' === $char ) {
2519  $text = "</li></ul>";
2520  } elseif ( '#' === $char ) {
2521  $text = "</li></ol>";
2522  } elseif ( ':' === $char ) {
2523  if ( $this->mDTopen ) {
2524  $this->mDTopen = false;
2525  $text = "</dt></dl>";
2526  } else {
2527  $text = "</dd></dl>";
2528  }
2529  } else {
2530  return '<!-- ERR 3 -->';
2531  }
2532  return $text;
2533  }
2544  public function doBlockLevels( $text, $linestart ) {
2545 
2546  # Parsing through the text line by line. The main thing
2547  # happening here is handling of block-level elements p, pre,
2548  # and making lists from lines starting with * # : etc.
2549  $textLines = StringUtils::explode( "\n", $text );
2550 
2551  $lastPrefix = $output = '';
2552  $this->mDTopen = $inBlockElem = false;
2553  $prefixLength = 0;
2554  $paragraphStack = false;
2555  $inBlockquote = false;
2556 
2557  foreach ( $textLines as $oLine ) {
2558  # Fix up $linestart
2559  if ( !$linestart ) {
2560  $output .= $oLine;
2561  $linestart = true;
2562  continue;
2563  }
2564  # * = ul
2565  # # = ol
2566  # ; = dt
2567  # : = dd
2568 
2569  $lastPrefixLength = strlen( $lastPrefix );
2570  $preCloseMatch = preg_match( '/<\\/pre/i', $oLine );
2571  $preOpenMatch = preg_match( '/<pre/i', $oLine );
2572  # If not in a <pre> element, scan for and figure out what prefixes are there.
2573  if ( !$this->mInPre ) {
2574  # Multiple prefixes may abut each other for nested lists.
2575  $prefixLength = strspn( $oLine, '*#:;' );
2576  $prefix = substr( $oLine, 0, $prefixLength );
2577 
2578  # eh?
2579  # ; and : are both from definition-lists, so they're equivalent
2580  # for the purposes of determining whether or not we need to open/close
2581  # elements.
2582  $prefix2 = str_replace( ';', ':', $prefix );
2583  $t = substr( $oLine, $prefixLength );
2584  $this->mInPre = (bool)$preOpenMatch;
2585  } else {
2586  # Don't interpret any other prefixes in preformatted text
2587  $prefixLength = 0;
2588  $prefix = $prefix2 = '';
2589  $t = $oLine;
2590  }
2591 
2592  # List generation
2593  if ( $prefixLength && $lastPrefix === $prefix2 ) {
2594  # Same as the last item, so no need to deal with nesting or opening stuff
2595  $output .= $this->nextItem( substr( $prefix, -1 ) );
2596  $paragraphStack = false;
2597 
2598  if ( substr( $prefix, -1 ) === ';' ) {
2599  # The one nasty exception: definition lists work like this:
2600  # ; title : definition text
2601  # So we check for : in the remainder text to split up the
2602  # title and definition, without b0rking links.
2603  $term = $t2 = '';
2604  if ( $this->findColonNoLinks( $t, $term, $t2 ) !== false ) {
2605  $t = $t2;
2606  $output .= $term . $this->nextItem( ':' );
2607  }
2608  }
2609  } elseif ( $prefixLength || $lastPrefixLength ) {
2610  # We need to open or close prefixes, or both.
2611 
2612  # Either open or close a level...
2613  $commonPrefixLength = $this->getCommon( $prefix, $lastPrefix );
2614  $paragraphStack = false;
2615 
2616  # Close all the prefixes which aren't shared.
2617  while ( $commonPrefixLength < $lastPrefixLength ) {
2618  $output .= $this->closeList( $lastPrefix[$lastPrefixLength - 1] );
2619  --$lastPrefixLength;
2620  }
2621 
2622  # Continue the current prefix if appropriate.
2623  if ( $prefixLength <= $commonPrefixLength && $commonPrefixLength > 0 ) {
2624  $output .= $this->nextItem( $prefix[$commonPrefixLength - 1] );
2625  }
2626 
2627  # Open prefixes where appropriate.
2628  if ( $lastPrefix && $prefixLength > $commonPrefixLength ) {
2629  $output .= "\n";
2630  }
2631  while ( $prefixLength > $commonPrefixLength ) {
2632  $char = substr( $prefix, $commonPrefixLength, 1 );
2633  $output .= $this->openList( $char );
2634 
2635  if ( ';' === $char ) {
2636  # @todo FIXME: This is dupe of code above
2637  if ( $this->findColonNoLinks( $t, $term, $t2 ) !== false ) {
2638  $t = $t2;
2639  $output .= $term . $this->nextItem( ':' );
2640  }
2641  }
2642  ++$commonPrefixLength;
2643  }
2644  if ( !$prefixLength && $lastPrefix ) {
2645  $output .= "\n";
2646  }
2647  $lastPrefix = $prefix2;
2648  }
2649 
2650  # If we have no prefixes, go to paragraph mode.
2651  if ( 0 == $prefixLength ) {
2652  # No prefix (not in list)--go to paragraph mode
2653  # XXX: use a stack for nestable elements like span, table and div
2654  $openmatch = preg_match(
2655  '/(?:<table|<h1|<h2|<h3|<h4|<h5|<h6|<pre|<tr|'
2656  . '<p|<ul|<ol|<dl|<li|<\\/tr|<\\/td|<\\/th)/iS',
2657  $t
2658  );
2659  $closematch = preg_match(
2660  '/(?:<\\/table|<\\/h1|<\\/h2|<\\/h3|<\\/h4|<\\/h5|<\\/h6|'
2661  . '<td|<th|<\\/?blockquote|<\\/?div|<hr|<\\/pre|<\\/p|<\\/mw:|'
2662  . self::MARKER_PREFIX
2663  . '-pre|<\\/li|<\\/ul|<\\/ol|<\\/dl|<\\/?center)/iS',
2664  $t
2665  );
2666 
2667  if ( $openmatch || $closematch ) {
2668  $paragraphStack = false;
2669  # @todo bug 5718: paragraph closed
2670  $output .= $this->closeParagraph();
2671  if ( $preOpenMatch && !$preCloseMatch ) {
2672  $this->mInPre = true;
2673  }
2674  $bqOffset = 0;
2675  while ( preg_match( '/<(\\/?)blockquote[\s>]/i', $t,
2676  $bqMatch, PREG_OFFSET_CAPTURE, $bqOffset )
2677  ) {
2678  $inBlockquote = !$bqMatch[1][0]; // is this a close tag?
2679  $bqOffset = $bqMatch[0][1] + strlen( $bqMatch[0][0] );
2680  }
2681  $inBlockElem = !$closematch;
2682  } elseif ( !$inBlockElem && !$this->mInPre ) {
2683  if ( ' ' == substr( $t, 0, 1 )
2684  && ( $this->mLastSection === 'pre' || trim( $t ) != '' )
2685  && !$inBlockquote
2686  ) {
2687  # pre
2688  if ( $this->mLastSection !== 'pre' ) {
2689  $paragraphStack = false;
2690  $output .= $this->closeParagraph() . '<pre>';
2691  $this->mLastSection = 'pre';
2692  }
2693  $t = substr( $t, 1 );
2694  } else {
2695  # paragraph
2696  if ( trim( $t ) === '' ) {
2697  if ( $paragraphStack ) {
2698  $output .= $paragraphStack . '<br />';
2699  $paragraphStack = false;
2700  $this->mLastSection = 'p';
2701  } else {
2702  if ( $this->mLastSection !== 'p' ) {
2703  $output .= $this->closeParagraph();
2704  $this->mLastSection = '';
2705  $paragraphStack = '<p>';
2706  } else {
2707  $paragraphStack = '</p><p>';
2708  }
2709  }
2710  } else {
2711  if ( $paragraphStack ) {
2712  $output .= $paragraphStack;
2713  $paragraphStack = false;
2714  $this->mLastSection = 'p';
2715  } elseif ( $this->mLastSection !== 'p' ) {
2716  $output .= $this->closeParagraph() . '<p>';
2717  $this->mLastSection = 'p';
2718  }
2719  }
2720  }
2721  }
2722  }
2723  # somewhere above we forget to get out of pre block (bug 785)
2724  if ( $preCloseMatch && $this->mInPre ) {
2725  $this->mInPre = false;
2726  }
2727  if ( $paragraphStack === false ) {
2728  $output .= $t;
2729  if ( $prefixLength === 0 ) {
2730  $output .= "\n";
2731  }
2732  }
2733  }
2734  while ( $prefixLength ) {
2735  $output .= $this->closeList( $prefix2[$prefixLength - 1] );
2736  --$prefixLength;
2737  if ( !$prefixLength ) {
2738  $output .= "\n";
2739  }
2740  }
2741  if ( $this->mLastSection != '' ) {
2742  $output .= '</' . $this->mLastSection . '>';
2743  $this->mLastSection = '';
2744  }
2745 
2746  return $output;
2747  }
2748 
2759  public function findColonNoLinks( $str, &$before, &$after ) {
2760 
2761  $pos = strpos( $str, ':' );
2762  if ( $pos === false ) {
2763  # Nothing to find!
2764  return false;
2765  }
2766 
2767  $lt = strpos( $str, '<' );
2768  if ( $lt === false || $lt > $pos ) {
2769  # Easy; no tag nesting to worry about
2770  $before = substr( $str, 0, $pos );
2771  $after = substr( $str, $pos + 1 );
2772  return $pos;
2773  }
2774 
2775  # Ugly state machine to walk through avoiding tags.
2776  $state = self::COLON_STATE_TEXT;
2777  $stack = 0;
2778  $len = strlen( $str );
2779  for ( $i = 0; $i < $len; $i++ ) {
2780  $c = $str[$i];
2781 
2782  switch ( $state ) {
2783  # (Using the number is a performance hack for common cases)
2784  case 0: # self::COLON_STATE_TEXT:
2785  switch ( $c ) {
2786  case "<":
2787  # Could be either a <start> tag or an </end> tag
2788  $state = self::COLON_STATE_TAGSTART;
2789  break;
2790  case ":":
2791  if ( $stack == 0 ) {
2792  # We found it!
2793  $before = substr( $str, 0, $i );
2794  $after = substr( $str, $i + 1 );
2795  return $i;
2796  }
2797  # Embedded in a tag; don't break it.
2798  break;
2799  default:
2800  # Skip ahead looking for something interesting
2801  $colon = strpos( $str, ':', $i );
2802  if ( $colon === false ) {
2803  # Nothing else interesting
2804  return false;
2805  }
2806  $lt = strpos( $str, '<', $i );
2807  if ( $stack === 0 ) {
2808  if ( $lt === false || $colon < $lt ) {
2809  # We found it!
2810  $before = substr( $str, 0, $colon );
2811  $after = substr( $str, $colon + 1 );
2812  return $i;
2813  }
2814  }
2815  if ( $lt === false ) {
2816  # Nothing else interesting to find; abort!
2817  # We're nested, but there's no close tags left. Abort!
2818  break 2;
2819  }
2820  # Skip ahead to next tag start
2821  $i = $lt;
2822  $state = self::COLON_STATE_TAGSTART;
2823  }
2824  break;
2825  case 1: # self::COLON_STATE_TAG:
2826  # In a <tag>
2827  switch ( $c ) {
2828  case ">":
2829  $stack++;
2830  $state = self::COLON_STATE_TEXT;
2831  break;
2832  case "/":
2833  # Slash may be followed by >?
2834  $state = self::COLON_STATE_TAGSLASH;
2835  break;
2836  default:
2837  # ignore
2838  }
2839  break;
2840  case 2: # self::COLON_STATE_TAGSTART:
2841  switch ( $c ) {
2842  case "/":
2843  $state = self::COLON_STATE_CLOSETAG;
2844  break;
2845  case "!":
2846  $state = self::COLON_STATE_COMMENT;
2847  break;
2848  case ">":
2849  # Illegal early close? This shouldn't happen D:
2850  $state = self::COLON_STATE_TEXT;
2851  break;
2852  default:
2853  $state = self::COLON_STATE_TAG;
2854  }
2855  break;
2856  case 3: # self::COLON_STATE_CLOSETAG:
2857  # In a </tag>
2858  if ( $c === ">" ) {
2859  $stack--;
2860  if ( $stack < 0 ) {
2861  wfDebug( __METHOD__ . ": Invalid input; too many close tags\n" );
2862  return false;
2863  }
2864  $state = self::COLON_STATE_TEXT;
2865  }
2866  break;
2867  case self::COLON_STATE_TAGSLASH:
2868  if ( $c === ">" ) {
2869  # Yes, a self-closed tag <blah/>
2870  $state = self::COLON_STATE_TEXT;
2871  } else {
2872  # Probably we're jumping the gun, and this is an attribute
2873  $state = self::COLON_STATE_TAG;
2874  }
2875  break;
2876  case 5: # self::COLON_STATE_COMMENT:
2877  if ( $c === "-" ) {
2878  $state = self::COLON_STATE_COMMENTDASH;
2879  }
2880  break;
2881  case self::COLON_STATE_COMMENTDASH:
2882  if ( $c === "-" ) {
2883  $state = self::COLON_STATE_COMMENTDASHDASH;
2884  } else {
2885  $state = self::COLON_STATE_COMMENT;
2886  }
2887  break;
2888  case self::COLON_STATE_COMMENTDASHDASH:
2889  if ( $c === ">" ) {
2890  $state = self::COLON_STATE_TEXT;
2891  } else {
2892  $state = self::COLON_STATE_COMMENT;
2893  }
2894  break;
2895  default:
2896  throw new MWException( "State machine error in " . __METHOD__ );
2897  }
2898  }
2899  if ( $stack > 0 ) {
2900  wfDebug( __METHOD__ . ": Invalid input; not enough close tags (stack $stack, state $state)\n" );
2901  return false;
2902  }
2903  return false;
2904  }
2905 
2917  public function getVariableValue( $index, $frame = false ) {
2920 
2921  if ( is_null( $this->mTitle ) ) {
2922  // If no title set, bad things are going to happen
2923  // later. Title should always be set since this
2924  // should only be called in the middle of a parse
2925  // operation (but the unit-tests do funky stuff)
2926  throw new MWException( __METHOD__ . ' Should only be '
2927  . ' called while parsing (no title set)' );
2928  }
2929 
2934  if ( Hooks::run( 'ParserGetVariableValueVarCache', [ &$this, &$this->mVarCache ] ) ) {
2935  if ( isset( $this->mVarCache[$index] ) ) {
2936  return $this->mVarCache[$index];
2937  }
2938  }
2939 
2940  $ts = wfTimestamp( TS_UNIX, $this->mOptions->getTimestamp() );
2941  Hooks::run( 'ParserGetVariableValueTs', [ &$this, &$ts ] );
2942 
2943  $pageLang = $this->getFunctionLang();
2944 
2945  switch ( $index ) {
2946  case '!':
2947  $value = '|';
2948  break;
2949  case 'currentmonth':
2950  $value = $pageLang->formatNum( MWTimestamp::getInstance( $ts )->format( 'm' ) );
2951  break;
2952  case 'currentmonth1':
2953  $value = $pageLang->formatNum( MWTimestamp::getInstance( $ts )->format( 'n' ) );
2954  break;
2955  case 'currentmonthname':
2956  $value = $pageLang->getMonthName( MWTimestamp::getInstance( $ts )->format( 'n' ) );
2957  break;
2958  case 'currentmonthnamegen':
2959  $value = $pageLang->getMonthNameGen( MWTimestamp::getInstance( $ts )->format( 'n' ) );
2960  break;
2961  case 'currentmonthabbrev':
2962  $value = $pageLang->getMonthAbbreviation( MWTimestamp::getInstance( $ts )->format( 'n' ) );
2963  break;
2964  case 'currentday':
2965  $value = $pageLang->formatNum( MWTimestamp::getInstance( $ts )->format( 'j' ) );
2966  break;
2967  case 'currentday2':
2968  $value = $pageLang->formatNum( MWTimestamp::getInstance( $ts )->format( 'd' ) );
2969  break;
2970  case 'localmonth':
2971  $value = $pageLang->formatNum( MWTimestamp::getLocalInstance( $ts )->format( 'm' ) );
2972  break;
2973  case 'localmonth1':
2974  $value = $pageLang->formatNum( MWTimestamp::getLocalInstance( $ts )->format( 'n' ) );
2975  break;
2976  case 'localmonthname':
2977  $value = $pageLang->getMonthName( MWTimestamp::getLocalInstance( $ts )->format( 'n' ) );
2978  break;
2979  case 'localmonthnamegen':
2980  $value = $pageLang->getMonthNameGen( MWTimestamp::getLocalInstance( $ts )->format( 'n' ) );
2981  break;
2982  case 'localmonthabbrev':
2983  $value = $pageLang->getMonthAbbreviation( MWTimestamp::getLocalInstance( $ts )->format( 'n' ) );
2984  break;
2985  case 'localday':
2986  $value = $pageLang->formatNum( MWTimestamp::getLocalInstance( $ts )->format( 'j' ) );
2987  break;
2988  case 'localday2':
2989  $value = $pageLang->formatNum( MWTimestamp::getLocalInstance( $ts )->format( 'd' ) );
2990  break;
2991  case 'pagename':
2992  $value = wfEscapeWikiText( $this->mTitle->getText() );
2993  break;
2994  case 'pagenamee':
2995  $value = wfEscapeWikiText( $this->mTitle->getPartialURL() );
2996  break;
2997  case 'fullpagename':
2998  $value = wfEscapeWikiText( $this->mTitle->getPrefixedText() );
2999  break;
3000  case 'fullpagenamee':
3001  $value = wfEscapeWikiText( $this->mTitle->getPrefixedURL() );
3002  break;
3003  case 'subpagename':
3004  $value = wfEscapeWikiText( $this->mTitle->getSubpageText() );
3005  break;
3006  case 'subpagenamee':
3007  $value = wfEscapeWikiText( $this->mTitle->getSubpageUrlForm() );
3008  break;
3009  case 'rootpagename':
3010  $value = wfEscapeWikiText( $this->mTitle->getRootText() );
3011  break;
3012  case 'rootpagenamee':
3013  $value = wfEscapeWikiText( wfUrlencode( str_replace(
3014  ' ',
3015  '_',
3016  $this->mTitle->getRootText()
3017  ) ) );
3018  break;
3019  case 'basepagename':
3020  $value = wfEscapeWikiText( $this->mTitle->getBaseText() );
3021  break;
3022  case 'basepagenamee':
3023  $value = wfEscapeWikiText( wfUrlencode( str_replace(
3024  ' ',
3025  '_',
3026  $this->mTitle->getBaseText()
3027  ) ) );
3028  break;
3029  case 'talkpagename':
3030  if ( $this->mTitle->canTalk() ) {
3031  $talkPage = $this->mTitle->getTalkPage();
3032  $value = wfEscapeWikiText( $talkPage->getPrefixedText() );
3033  } else {
3034  $value = '';
3035  }
3036  break;
3037  case 'talkpagenamee':
3038  if ( $this->mTitle->canTalk() ) {
3039  $talkPage = $this->mTitle->getTalkPage();
3040  $value = wfEscapeWikiText( $talkPage->getPrefixedURL() );
3041  } else {
3042  $value = '';
3043  }
3044  break;
3045  case 'subjectpagename':
3046  $subjPage = $this->mTitle->getSubjectPage();
3047  $value = wfEscapeWikiText( $subjPage->getPrefixedText() );
3048  break;
3049  case 'subjectpagenamee':
3050  $subjPage = $this->mTitle->getSubjectPage();
3051  $value = wfEscapeWikiText( $subjPage->getPrefixedURL() );
3052  break;
3053  case 'pageid': // requested in bug 23427
3054  $pageid = $this->getTitle()->getArticleID();
3055  if ( $pageid == 0 ) {
3056  # 0 means the page doesn't exist in the database,
3057  # which means the user is previewing a new page.
3058  # The vary-revision flag must be set, because the magic word
3059  # will have a different value once the page is saved.
3060  $this->mOutput->setFlag( 'vary-revision' );
3061  wfDebug( __METHOD__ . ": {{PAGEID}} used in a new page, setting vary-revision...\n" );
3062  }
3063  $value = $pageid ? $pageid : null;
3064  break;
3065  case 'revisionid':
3066  # Let the edit saving system know we should parse the page
3067  # *after* a revision ID has been assigned.
3068  $this->mOutput->setFlag( 'vary-revision' );
3069  wfDebug( __METHOD__ . ": {{REVISIONID}} used, setting vary-revision...\n" );
3070  $value = $this->mRevisionId;
3071  break;
3072  case 'revisionday':
3073  # Let the edit saving system know we should parse the page
3074  # *after* a revision ID has been assigned. This is for null edits.
3075  $this->mOutput->setFlag( 'vary-revision' );
3076  wfDebug( __METHOD__ . ": {{REVISIONDAY}} used, setting vary-revision...\n" );
3077  $value = intval( substr( $this->getRevisionTimestamp(), 6, 2 ) );
3078  break;
3079  case 'revisionday2':
3080  # Let the edit saving system know we should parse the page
3081  # *after* a revision ID has been assigned. This is for null edits.
3082  $this->mOutput->setFlag( 'vary-revision' );
3083  wfDebug( __METHOD__ . ": {{REVISIONDAY2}} used, setting vary-revision...\n" );
3084  $value = substr( $this->getRevisionTimestamp(), 6, 2 );
3085  break;
3086  case 'revisionmonth':
3087  # Let the edit saving system know we should parse the page
3088  # *after* a revision ID has been assigned. This is for null edits.
3089  $this->mOutput->setFlag( 'vary-revision' );
3090  wfDebug( __METHOD__ . ": {{REVISIONMONTH}} used, setting vary-revision...\n" );
3091  $value = substr( $this->getRevisionTimestamp(), 4, 2 );
3092  break;
3093  case 'revisionmonth1':
3094  # Let the edit saving system know we should parse the page
3095  # *after* a revision ID has been assigned. This is for null edits.
3096  $this->mOutput->setFlag( 'vary-revision' );
3097  wfDebug( __METHOD__ . ": {{REVISIONMONTH1}} used, setting vary-revision...\n" );
3098  $value = intval( substr( $this->getRevisionTimestamp(), 4, 2 ) );
3099  break;
3100  case 'revisionyear':
3101  # Let the edit saving system know we should parse the page
3102  # *after* a revision ID has been assigned. This is for null edits.
3103  $this->mOutput->setFlag( 'vary-revision' );
3104  wfDebug( __METHOD__ . ": {{REVISIONYEAR}} used, setting vary-revision...\n" );
3105  $value = substr( $this->getRevisionTimestamp(), 0, 4 );
3106  break;
3107  case 'revisiontimestamp':
3108  # Let the edit saving system know we should parse the page
3109  # *after* a revision ID has been assigned. This is for null edits.
3110  $this->mOutput->setFlag( 'vary-revision' );
3111  wfDebug( __METHOD__ . ": {{REVISIONTIMESTAMP}} used, setting vary-revision...\n" );
3112  $value = $this->getRevisionTimestamp();
3113  break;
3114  case 'revisionuser':
3115  # Let the edit saving system know we should parse the page
3116  # *after* a revision ID has been assigned. This is for null edits.
3117  $this->mOutput->setFlag( 'vary-revision' );
3118  wfDebug( __METHOD__ . ": {{REVISIONUSER}} used, setting vary-revision...\n" );
3119  $value = $this->getRevisionUser();
3120  break;
3121  case 'revisionsize':
3122  # Let the edit saving system know we should parse the page
3123  # *after* a revision ID has been assigned. This is for null edits.
3124  $this->mOutput->setFlag( 'vary-revision' );
3125  wfDebug( __METHOD__ . ": {{REVISIONSIZE}} used, setting vary-revision...\n" );
3126  $value = $this->getRevisionSize();
3127  break;
3128  case 'namespace':
3129  $value = str_replace( '_', ' ', $wgContLang->getNsText( $this->mTitle->getNamespace() ) );
3130  break;
3131  case 'namespacee':
3132  $value = wfUrlencode( $wgContLang->getNsText( $this->mTitle->getNamespace() ) );
3133  break;
3134  case 'namespacenumber':
3135  $value = $this->mTitle->getNamespace();
3136  break;
3137  case 'talkspace':
3138  $value = $this->mTitle->canTalk()
3139  ? str_replace( '_', ' ', $this->mTitle->getTalkNsText() )
3140  : '';
3141  break;
3142  case 'talkspacee':
3143  $value = $this->mTitle->canTalk() ? wfUrlencode( $this->mTitle->getTalkNsText() ) : '';
3144  break;
3145  case 'subjectspace':
3146  $value = str_replace( '_', ' ', $this->mTitle->getSubjectNsText() );
3147  break;
3148  case 'subjectspacee':
3149  $value = ( wfUrlencode( $this->mTitle->getSubjectNsText() ) );
3150  break;
3151  case 'currentdayname':
3152  $value = $pageLang->getWeekdayName( (int)MWTimestamp::getInstance( $ts )->format( 'w' ) + 1 );
3153  break;
3154  case 'currentyear':
3155  $value = $pageLang->formatNum( MWTimestamp::getInstance( $ts )->format( 'Y' ), true );
3156  break;
3157  case 'currenttime':
3158  $value = $pageLang->time( wfTimestamp( TS_MW, $ts ), false, false );
3159  break;
3160  case 'currenthour':
3161  $value = $pageLang->formatNum( MWTimestamp::getInstance( $ts )->format( 'H' ), true );
3162  break;
3163  case 'currentweek':
3164  # @bug 4594 PHP5 has it zero padded, PHP4 does not, cast to
3165  # int to remove the padding
3166  $value = $pageLang->formatNum( (int)MWTimestamp::getInstance( $ts )->format( 'W' ) );
3167  break;
3168  case 'currentdow':
3169  $value = $pageLang->formatNum( MWTimestamp::getInstance( $ts )->format( 'w' ) );
3170  break;
3171  case 'localdayname':
3172  $value = $pageLang->getWeekdayName(
3173  (int)MWTimestamp::getLocalInstance( $ts )->format( 'w' ) + 1
3174  );
3175  break;
3176  case 'localyear':
3177  $value = $pageLang->formatNum( MWTimestamp::getLocalInstance( $ts )->format( 'Y' ), true );
3178  break;
3179  case 'localtime':
3180  $value = $pageLang->time(
3181  MWTimestamp::getLocalInstance( $ts )->format( 'YmdHis' ),
3182  false,
3183  false
3184  );
3185  break;
3186  case 'localhour':
3187  $value = $pageLang->formatNum( MWTimestamp::getLocalInstance( $ts )->format( 'H' ), true );
3188  break;
3189  case 'localweek':
3190  # @bug 4594 PHP5 has it zero padded, PHP4 does not, cast to
3191  # int to remove the padding
3192  $value = $pageLang->formatNum( (int)MWTimestamp::getLocalInstance( $ts )->format( 'W' ) );
3193  break;
3194  case 'localdow':
3195  $value = $pageLang->formatNum( MWTimestamp::getLocalInstance( $ts )->format( 'w' ) );
3196  break;
3197  case 'numberofarticles':
3198  $value = $pageLang->formatNum( SiteStats::articles() );
3199  break;
3200  case 'numberoffiles':
3201  $value = $pageLang->formatNum( SiteStats::images() );
3202  break;
3203  case 'numberofusers':
3204  $value = $pageLang->formatNum( SiteStats::users() );
3205  break;
3206  case 'numberofactiveusers':
3207  $value = $pageLang->formatNum( SiteStats::activeUsers() );
3208  break;
3209  case 'numberofpages':
3210  $value = $pageLang->formatNum( SiteStats::pages() );
3211  break;
3212  case 'numberofadmins':
3213  $value = $pageLang->formatNum( SiteStats::numberingroup( 'sysop' ) );
3214  break;
3215  case 'numberofedits':
3216  $value = $pageLang->formatNum( SiteStats::edits() );
3217  break;
3218  case 'currenttimestamp':
3219  $value = wfTimestamp( TS_MW, $ts );
3220  break;
3221  case 'localtimestamp':
3222  $value = MWTimestamp::getLocalInstance( $ts )->format( 'YmdHis' );
3223  break;
3224  case 'currentversion':
3226  break;
3227  case 'articlepath':
3228  return $wgArticlePath;
3229  case 'sitename':
3230  return $wgSitename;
3231  case 'server':
3232  return $wgServer;
3233  case 'servername':
3234  return $wgServerName;
3235  case 'scriptpath':
3236  return $wgScriptPath;
3237  case 'stylepath':
3238  return $wgStylePath;
3239  case 'directionmark':
3240  return $pageLang->getDirMark();
3241  case 'contentlanguage':
3243  return $wgLanguageCode;
3244  case 'cascadingsources':
3246  break;
3247  default:
3248  $ret = null;
3249  Hooks::run(
3250  'ParserGetVariableValueSwitch',
3251  [ &$this, &$this->mVarCache, &$index, &$ret, &$frame ]
3252  );
3253 
3254  return $ret;
3255  }
3256 
3257  if ( $index ) {
3258  $this->mVarCache[$index] = $value;
3259  }
3260 
3261  return $value;
3262  }
3263 
3269  public function initialiseVariables() {
3270  $variableIDs = MagicWord::getVariableIDs();
3271  $substIDs = MagicWord::getSubstIDs();
3272 
3273  $this->mVariables = new MagicWordArray( $variableIDs );
3274  $this->mSubstWords = new MagicWordArray( $substIDs );
3275  }
3276 
3299  public function preprocessToDom( $text, $flags = 0 ) {
3300  $dom = $this->getPreprocessor()->preprocessToObj( $text, $flags );
3301  return $dom;
3302  }
3303 
3311  public static function splitWhitespace( $s ) {
3312  $ltrimmed = ltrim( $s );
3313  $w1 = substr( $s, 0, strlen( $s ) - strlen( $ltrimmed ) );
3314  $trimmed = rtrim( $ltrimmed );
3315  $diff = strlen( $ltrimmed ) - strlen( $trimmed );
3316  if ( $diff > 0 ) {
3317  $w2 = substr( $ltrimmed, -$diff );
3318  } else {
3319  $w2 = '';
3320  }
3321  return [ $w1, $trimmed, $w2 ];
3322  }
3323 
3344  public function replaceVariables( $text, $frame = false, $argsOnly = false ) {
3345  # Is there any text? Also, Prevent too big inclusions!
3346  $textSize = strlen( $text );
3347  if ( $textSize < 1 || $textSize > $this->mOptions->getMaxIncludeSize() ) {
3348  return $text;
3349  }
3350 
3351  if ( $frame === false ) {
3352  $frame = $this->getPreprocessor()->newFrame();
3353  } elseif ( !( $frame instanceof PPFrame ) ) {
3354  wfDebug( __METHOD__ . " called using plain parameters instead of "
3355  . "a PPFrame instance. Creating custom frame.\n" );
3356  $frame = $this->getPreprocessor()->newCustomFrame( $frame );
3357  }
3358 
3359  $dom = $this->preprocessToDom( $text );
3360  $flags = $argsOnly ? PPFrame::NO_TEMPLATES : 0;
3361  $text = $frame->expand( $dom, $flags );
3362 
3363  return $text;
3364  }
3365 
3373  public static function createAssocArgs( $args ) {
3374  $assocArgs = [];
3375  $index = 1;
3376  foreach ( $args as $arg ) {
3377  $eqpos = strpos( $arg, '=' );
3378  if ( $eqpos === false ) {
3379  $assocArgs[$index++] = $arg;
3380  } else {
3381  $name = trim( substr( $arg, 0, $eqpos ) );
3382  $value = trim( substr( $arg, $eqpos + 1 ) );
3383  if ( $value === false ) {
3384  $value = '';
3385  }
3386  if ( $name !== false ) {
3387  $assocArgs[$name] = $value;
3388  }
3389  }
3390  }
3391 
3392  return $assocArgs;
3393  }
3394 
3421  public function limitationWarn( $limitationType, $current = '', $max = '' ) {
3422  # does no harm if $current and $max are present but are unnecessary for the message
3423  # Not doing ->inLanguage( $this->mOptions->getUserLangObj() ), since this is shown
3424  # only during preview, and that would split the parser cache unnecessarily.
3425  $warning = wfMessage( "$limitationType-warning" )->numParams( $current, $max )
3426  ->text();
3427  $this->mOutput->addWarning( $warning );
3428  $this->addTrackingCategory( "$limitationType-category" );
3429  }
3430 
3443  public function braceSubstitution( $piece, $frame ) {
3444 
3445  // Flags
3446 
3447  // $text has been filled
3448  $found = false;
3449  // wiki markup in $text should be escaped
3450  $nowiki = false;
3451  // $text is HTML, armour it against wikitext transformation
3452  $isHTML = false;
3453  // Force interwiki transclusion to be done in raw mode not rendered
3454  $forceRawInterwiki = false;
3455  // $text is a DOM node needing expansion in a child frame
3456  $isChildObj = false;
3457  // $text is a DOM node needing expansion in the current frame
3458  $isLocalObj = false;
3459 
3460  # Title object, where $text came from
3461  $title = false;
3462 
3463  # $part1 is the bit before the first |, and must contain only title characters.
3464  # Various prefixes will be stripped from it later.
3465  $titleWithSpaces = $frame->expand( $piece['title'] );
3466  $part1 = trim( $titleWithSpaces );
3467  $titleText = false;
3468 
3469  # Original title text preserved for various purposes
3470  $originalTitle = $part1;
3471 
3472  # $args is a list of argument nodes, starting from index 0, not including $part1
3473  # @todo FIXME: If piece['parts'] is null then the call to getLength()
3474  # below won't work b/c this $args isn't an object
3475  $args = ( null == $piece['parts'] ) ? [] : $piece['parts'];
3476 
3477  $profileSection = null; // profile templates
3478 
3479  # SUBST
3480  if ( !$found ) {
3481  $substMatch = $this->mSubstWords->matchStartAndRemove( $part1 );
3482 
3483  # Possibilities for substMatch: "subst", "safesubst" or FALSE
3484  # Decide whether to expand template or keep wikitext as-is.
3485  if ( $this->ot['wiki'] ) {
3486  if ( $substMatch === false ) {
3487  $literal = true; # literal when in PST with no prefix
3488  } else {
3489  $literal = false; # expand when in PST with subst: or safesubst:
3490  }
3491  } else {
3492  if ( $substMatch == 'subst' ) {
3493  $literal = true; # literal when not in PST with plain subst:
3494  } else {
3495  $literal = false; # expand when not in PST with safesubst: or no prefix
3496  }
3497  }
3498  if ( $literal ) {
3499  $text = $frame->virtualBracketedImplode( '{{', '|', '}}', $titleWithSpaces, $args );
3500  $isLocalObj = true;
3501  $found = true;
3502  }
3503  }
3504 
3505  # Variables
3506  if ( !$found && $args->getLength() == 0 ) {
3507  $id = $this->mVariables->matchStartToEnd( $part1 );
3508  if ( $id !== false ) {
3509  $text = $this->getVariableValue( $id, $frame );
3510  if ( MagicWord::getCacheTTL( $id ) > -1 ) {
3511  $this->mOutput->updateCacheExpiry( MagicWord::getCacheTTL( $id ) );
3512  }
3513  $found = true;
3514  }
3515  }
3516 
3517  # MSG, MSGNW and RAW
3518  if ( !$found ) {
3519  # Check for MSGNW:
3520  $mwMsgnw = MagicWord::get( 'msgnw' );
3521  if ( $mwMsgnw->matchStartAndRemove( $part1 ) ) {
3522  $nowiki = true;
3523  } else {
3524  # Remove obsolete MSG:
3525  $mwMsg = MagicWord::get( 'msg' );
3526  $mwMsg->matchStartAndRemove( $part1 );
3527  }
3528 
3529  # Check for RAW:
3530  $mwRaw = MagicWord::get( 'raw' );
3531  if ( $mwRaw->matchStartAndRemove( $part1 ) ) {
3532  $forceRawInterwiki = true;
3533  }
3534  }
3535 
3536  # Parser functions
3537  if ( !$found ) {
3538  $colonPos = strpos( $part1, ':' );
3539  if ( $colonPos !== false ) {
3540  $func = substr( $part1, 0, $colonPos );
3541  $funcArgs = [ trim( substr( $part1, $colonPos + 1 ) ) ];
3542  $argsLength = $args->getLength();
3543  for ( $i = 0; $i < $argsLength; $i++ ) {
3544  $funcArgs[] = $args->item( $i );
3545  }
3546  try {
3547  $result = $this->callParserFunction( $frame, $func, $funcArgs );
3548  } catch ( Exception $ex ) {
3549  throw $ex;
3550  }
3551 
3552  # The interface for parser functions allows for extracting
3553  # flags into the local scope. Extract any forwarded flags
3554  # here.
3555  extract( $result );
3556  }
3557  }
3558 
3559  # Finish mangling title and then check for loops.
3560  # Set $title to a Title object and $titleText to the PDBK
3561  if ( !$found ) {
3562  $ns = NS_TEMPLATE;
3563  # Split the title into page and subpage
3564  $subpage = '';
3565  $relative = $this->maybeDoSubpageLink( $part1, $subpage );
3566  if ( $part1 !== $relative ) {
3567  $part1 = $relative;
3568  $ns = $this->mTitle->getNamespace();
3569  }
3570  $title = Title::newFromText( $part1, $ns );
3571  if ( $title ) {
3572  $titleText = $title->getPrefixedText();
3573  # Check for language variants if the template is not found
3574  if ( $this->getConverterLanguage()->hasVariants() && $title->getArticleID() == 0 ) {
3575  $this->getConverterLanguage()->findVariantLink( $part1, $title, true );
3576  }
3577  # Do recursion depth check
3578  $limit = $this->mOptions->getMaxTemplateDepth();
3579  if ( $frame->depth >= $limit ) {
3580  $found = true;
3581  $text = '<span class="error">'
3582  . wfMessage( 'parser-template-recursion-depth-warning' )
3583  ->numParams( $limit )->inContentLanguage()->text()
3584  . '</span>';
3585  }
3586  }
3587  }
3588 
3589  # Load from database
3590  if ( !$found && $title ) {
3591  $profileSection = $this->mProfiler->scopedProfileIn( $title->getPrefixedDBkey() );
3592  if ( !$title->isExternal() ) {
3593  if ( $title->isSpecialPage()
3594  && $this->mOptions->getAllowSpecialInclusion()
3595  && $this->ot['html']
3596  ) {
3597  // Pass the template arguments as URL parameters.
3598  // "uselang" will have no effect since the Language object
3599  // is forced to the one defined in ParserOptions.
3600  $pageArgs = [];
3601  $argsLength = $args->getLength();
3602  for ( $i = 0; $i < $argsLength; $i++ ) {
3603  $bits = $args->item( $i )->splitArg();
3604  if ( strval( $bits['index'] ) === '' ) {
3605  $name = trim( $frame->expand( $bits['name'], PPFrame::STRIP_COMMENTS ) );
3606  $value = trim( $frame->expand( $bits['value'] ) );
3607  $pageArgs[$name] = $value;
3608  }
3609  }
3610 
3611  // Create a new context to execute the special page
3612  $context = new RequestContext;
3613  $context->setTitle( $title );
3614  $context->setRequest( new FauxRequest( $pageArgs ) );
3615  $context->setUser( $this->getUser() );
3616  $context->setLanguage( $this->mOptions->getUserLangObj() );
3618  if ( $ret ) {
3619  $text = $context->getOutput()->getHTML();
3620  $this->mOutput->addOutputPageMetadata( $context->getOutput() );
3621  $found = true;
3622  $isHTML = true;
3623  $this->disableCache();
3624  }
3625  } elseif ( MWNamespace::isNonincludable( $title->getNamespace() ) ) {
3626  $found = false; # access denied
3627  wfDebug( __METHOD__ . ": template inclusion denied for " .
3628  $title->getPrefixedDBkey() . "\n" );
3629  } else {
3630  list( $text, $title ) = $this->getTemplateDom( $title );
3631  if ( $text !== false ) {
3632  $found = true;
3633  $isChildObj = true;
3634  }
3635  }
3636 
3637  # If the title is valid but undisplayable, make a link to it
3638  if ( !$found && ( $this->ot['html'] || $this->ot['pre'] ) ) {
3639  $text = "[[:$titleText]]";
3640  $found = true;
3641  }
3642  } elseif ( $title->isTrans() ) {
3643  # Interwiki transclusion
3644  if ( $this->ot['html'] && !$forceRawInterwiki ) {
3645  $text = $this->interwikiTransclude( $title, 'render' );
3646  $isHTML = true;
3647  } else {
3648  $text = $this->interwikiTransclude( $title, 'raw' );
3649  # Preprocess it like a template
3650  $text = $this->preprocessToDom( $text, self::PTD_FOR_INCLUSION );
3651  $isChildObj = true;
3652  }
3653  $found = true;
3654  }
3655 
3656  # Do infinite loop check
3657  # This has to be done after redirect resolution to avoid infinite loops via redirects
3658  if ( !$frame->loopCheck( $title ) ) {
3659  $found = true;
3660  $text = '<span class="error">'
3661  . wfMessage( 'parser-template-loop-warning', $titleText )->inContentLanguage()->text()
3662  . '</span>';
3663  wfDebug( __METHOD__ . ": template loop broken at '$titleText'\n" );
3664  }
3665  }
3666 
3667  # If we haven't found text to substitute by now, we're done
3668  # Recover the source wikitext and return it
3669  if ( !$found ) {
3670  $text = $frame->virtualBracketedImplode( '{{', '|', '}}', $titleWithSpaces, $args );
3671  if ( $profileSection ) {
3672  $this->mProfiler->scopedProfileOut( $profileSection );
3673  }
3674  return [ 'object' => $text ];
3675  }
3676 
3677  # Expand DOM-style return values in a child frame
3678  if ( $isChildObj ) {
3679  # Clean up argument array
3680  $newFrame = $frame->newChild( $args, $title );
3681 
3682  if ( $nowiki ) {
3683  $text = $newFrame->expand( $text, PPFrame::RECOVER_ORIG );
3684  } elseif ( $titleText !== false && $newFrame->isEmpty() ) {
3685  # Expansion is eligible for the empty-frame cache
3686  $text = $newFrame->cachedExpand( $titleText, $text );
3687  } else {
3688  # Uncached expansion
3689  $text = $newFrame->expand( $text );
3690  }
3691  }
3692  if ( $isLocalObj && $nowiki ) {
3693  $text = $frame->expand( $text, PPFrame::RECOVER_ORIG );
3694  $isLocalObj = false;
3695  }
3696 
3697  if ( $profileSection ) {
3698  $this->mProfiler->scopedProfileOut( $profileSection );
3699  }
3700 
3701  # Replace raw HTML by a placeholder
3702  if ( $isHTML ) {
3703  $text = $this->insertStripItem( $text );
3704  } elseif ( $nowiki && ( $this->ot['html'] || $this->ot['pre'] ) ) {
3705  # Escape nowiki-style return values
3706  $text = wfEscapeWikiText( $text );
3707  } elseif ( is_string( $text )
3708  && !$piece['lineStart']
3709  && preg_match( '/^(?:{\\||:|;|#|\*)/', $text )
3710  ) {
3711  # Bug 529: if the template begins with a table or block-level
3712  # element, it should be treated as beginning a new line.
3713  # This behavior is somewhat controversial.
3714  $text = "\n" . $text;
3715  }
3716 
3717  if ( is_string( $text ) && !$this->incrementIncludeSize( 'post-expand', strlen( $text ) ) ) {
3718  # Error, oversize inclusion
3719  if ( $titleText !== false ) {
3720  # Make a working, properly escaped link if possible (bug 23588)
3721  $text = "[[:$titleText]]";
3722  } else {
3723  # This will probably not be a working link, but at least it may
3724  # provide some hint of where the problem is
3725  preg_replace( '/^:/', '', $originalTitle );
3726  $text = "[[:$originalTitle]]";
3727  }
3728  $text .= $this->insertStripItem( '<!-- WARNING: template omitted, '
3729  . 'post-expand include size too large -->' );
3730  $this->limitationWarn( 'post-expand-template-inclusion' );
3731  }
3732 
3733  if ( $isLocalObj ) {
3734  $ret = [ 'object' => $text ];
3735  } else {
3736  $ret = [ 'text' => $text ];
3737  }
3738 
3739  return $ret;
3740  }
3741 
3761  public function callParserFunction( $frame, $function, array $args = [] ) {
3763 
3764  # Case sensitive functions
3765  if ( isset( $this->mFunctionSynonyms[1][$function] ) ) {
3766  $function = $this->mFunctionSynonyms[1][$function];
3767  } else {
3768  # Case insensitive functions
3769  $function = $wgContLang->lc( $function );
3770  if ( isset( $this->mFunctionSynonyms[0][$function] ) ) {
3771  $function = $this->mFunctionSynonyms[0][$function];
3772  } else {
3773  return [ 'found' => false ];
3774  }
3775  }
3776 
3777  list( $callback, $flags ) = $this->mFunctionHooks[$function];
3778 
3779  # Workaround for PHP bug 35229 and similar
3780  if ( !is_callable( $callback ) ) {
3781  throw new MWException( "Tag hook for $function is not callable\n" );
3782  }
3783 
3784  $allArgs = [ &$this ];
3785  if ( $flags & self::SFH_OBJECT_ARGS ) {
3786  # Convert arguments to PPNodes and collect for appending to $allArgs
3787  $funcArgs = [];
3788  foreach ( $args as $k => $v ) {
3789  if ( $v instanceof PPNode || $k === 0 ) {
3790  $funcArgs[] = $v;
3791  } else {
3792  $funcArgs[] = $this->mPreprocessor->newPartNodeArray( [ $k => $v ] )->item( 0 );
3793  }
3794  }
3795 
3796  # Add a frame parameter, and pass the arguments as an array
3797  $allArgs[] = $frame;
3798  $allArgs[] = $funcArgs;
3799  } else {
3800  # Convert arguments to plain text and append to $allArgs
3801  foreach ( $args as $k => $v ) {
3802  if ( $v instanceof PPNode ) {
3803  $allArgs[] = trim( $frame->expand( $v ) );
3804  } elseif ( is_int( $k ) && $k >= 0 ) {
3805  $allArgs[] = trim( $v );
3806  } else {
3807  $allArgs[] = trim( "$k=$v" );
3808  }
3809  }
3810  }
3811 
3812  $result = call_user_func_array( $callback, $allArgs );
3813 
3814  # The interface for function hooks allows them to return a wikitext
3815  # string or an array containing the string and any flags. This mungs
3816  # things around to match what this method should return.
3817  if ( !is_array( $result ) ) {
3818  $result =[
3819  'found' => true,
3820  'text' => $result,
3821  ];
3822  } else {
3823  if ( isset( $result[0] ) && !isset( $result['text'] ) ) {
3824  $result['text'] = $result[0];
3825  }
3826  unset( $result[0] );
3827  $result += [
3828  'found' => true,
3829  ];
3830  }
3831 
3832  $noparse = true;
3833  $preprocessFlags = 0;
3834  if ( isset( $result['noparse'] ) ) {
3835  $noparse = $result['noparse'];
3836  }
3837  if ( isset( $result['preprocessFlags'] ) ) {
3838  $preprocessFlags = $result['preprocessFlags'];
3839  }
3840 
3841  if ( !$noparse ) {
3842  $result['text'] = $this->preprocessToDom( $result['text'], $preprocessFlags );
3843  $result['isChildObj'] = true;
3844  }
3845 
3846  return $result;
3847  }
3848 
3857  public function getTemplateDom( $title ) {
3858  $cacheTitle = $title;
3859  $titleText = $title->getPrefixedDBkey();
3860 
3861  if ( isset( $this->mTplRedirCache[$titleText] ) ) {
3862  list( $ns, $dbk ) = $this->mTplRedirCache[$titleText];
3863  $title = Title::makeTitle( $ns, $dbk );
3864  $titleText = $title->getPrefixedDBkey();
3865  }
3866  if ( isset( $this->mTplDomCache[$titleText] ) ) {
3867  return [ $this->mTplDomCache[$titleText], $title ];
3868  }
3869 
3870  # Cache miss, go to the database
3871  list( $text, $title ) = $this->fetchTemplateAndTitle( $title );
3872 
3873  if ( $text === false ) {
3874  $this->mTplDomCache[$titleText] = false;
3875  return [ false, $title ];
3876  }
3877 
3878  $dom = $this->preprocessToDom( $text, self::PTD_FOR_INCLUSION );
3879  $this->mTplDomCache[$titleText] = $dom;
3880 
3881  if ( !$title->equals( $cacheTitle ) ) {
3882  $this->mTplRedirCache[$cacheTitle->getPrefixedDBkey()] =
3883  [ $title->getNamespace(), $cdb = $title->getDBkey() ];
3884  }
3885 
3886  return [ $dom, $title ];
3887  }
3888 
3901  $cacheKey = $title->getPrefixedDBkey();
3902  if ( !$this->currentRevisionCache ) {
3903  $this->currentRevisionCache = new MapCacheLRU( 100 );
3904  }
3905  if ( !$this->currentRevisionCache->has( $cacheKey ) ) {
3906  $this->currentRevisionCache->set( $cacheKey,
3907  // Defaults to Parser::statelessFetchRevision()
3908  call_user_func( $this->mOptions->getCurrentRevisionCallback(), $title, $this )
3909  );
3910  }
3911  return $this->currentRevisionCache->get( $cacheKey );
3912  }
3913 
3923  public static function statelessFetchRevision( $title, $parser = false ) {
3924  return Revision::newFromTitle( $title );
3925  }
3926 
3932  public function fetchTemplateAndTitle( $title ) {
3933  // Defaults to Parser::statelessFetchTemplate()
3934  $templateCb = $this->mOptions->getTemplateCallback();
3935  $stuff = call_user_func( $templateCb, $title, $this );
3936  // We use U+007F DELETE to distinguish strip markers from regular text.
3937  $text = $stuff['text'];
3938  if ( is_string( $stuff['text'] ) ) {
3939  $text = strtr( $text, "\x7f", "?" );
3940  }
3941  $finalTitle = isset( $stuff['finalTitle'] ) ? $stuff['finalTitle'] : $title;
3942  if ( isset( $stuff['deps'] ) ) {
3943  foreach ( $stuff['deps'] as $dep ) {
3944  $this->mOutput->addTemplate( $dep['title'], $dep['page_id'], $dep['rev_id'] );
3945  if ( $dep['title']->equals( $this->getTitle() ) ) {
3946  // If we transclude ourselves, the final result
3947  // will change based on the new version of the page
3948  $this->mOutput->setFlag( 'vary-revision' );
3949  }
3950  }
3951  }
3952  return [ $text, $finalTitle ];
3953  }
3954 
3960  public function fetchTemplate( $title ) {
3961  return $this->fetchTemplateAndTitle( $title )[0];
3962  }
3963 
3973  public static function statelessFetchTemplate( $title, $parser = false ) {
3974  $text = $skip = false;
3975  $finalTitle = $title;
3976  $deps = [];
3977 
3978  # Loop to fetch the article, with up to 1 redirect
3979  // @codingStandardsIgnoreStart Generic.CodeAnalysis.ForLoopWithTestFunctionCall.NotAllowed
3980  for ( $i = 0; $i < 2 && is_object( $title ); $i++ ) {
3981  // @codingStandardsIgnoreEnd
3982  # Give extensions a chance to select the revision instead
3983  $id = false; # Assume current
3984  Hooks::run( 'BeforeParserFetchTemplateAndtitle',
3985  [ $parser, $title, &$skip, &$id ] );
3986 
3987  if ( $skip ) {
3988  $text = false;
3989  $deps[] = [
3990  'title' => $title,
3991  'page_id' => $title->getArticleID(),
3992  'rev_id' => null
3993  ];
3994  break;
3995  }
3996  # Get the revision
3997  if ( $id ) {
3998  $rev = Revision::newFromId( $id );
3999  } elseif ( $parser ) {
4000  $rev = $parser->fetchCurrentRevisionOfTitle( $title );
4001  } else {
4003  }
4004  $rev_id = $rev ? $rev->getId() : 0;
4005  # If there is no current revision, there is no page
4006  if ( $id === false && !$rev ) {
4007  $linkCache = LinkCache::singleton();
4008  $linkCache->addBadLinkObj( $title );
4009  }
4010 
4011  $deps[] = [
4012  'title' => $title,
4013  'page_id' => $title->getArticleID(),
4014  'rev_id' => $rev_id ];
4015  if ( $rev && !$title->equals( $rev->getTitle() ) ) {
4016  # We fetched a rev from a different title; register it too...
4017  $deps[] = [
4018  'title' => $rev->getTitle(),
4019  'page_id' => $rev->getPage(),
4020  'rev_id' => $rev_id ];
4021  }
4022 
4023  if ( $rev ) {
4024  $content = $rev->getContent();
4025  $text = $content ? $content->getWikitextForTransclusion() : null;
4026 
4027  if ( $text === false || $text === null ) {
4028  $text = false;
4029  break;
4030  }
4031  } elseif ( $title->getNamespace() == NS_MEDIAWIKI ) {
4033  $message = wfMessage( $wgContLang->lcfirst( $title->getText() ) )->inContentLanguage();
4034  if ( !$message->exists() ) {
4035  $text = false;
4036  break;
4037  }
4038  $content = $message->content();
4039  $text = $message->plain();
4040  } else {
4041  break;
4042  }
4043  if ( !$content ) {
4044  break;
4045  }
4046  # Redirect?
4047  $finalTitle = $title;
4048  $title = $content->getRedirectTarget();
4049  }
4050  return [
4051  'text' => $text,
4052  'finalTitle' => $finalTitle,
4053  'deps' => $deps ];
4054  }
4055 
4063  public function fetchFile( $title, $options = [] ) {
4064  return $this->fetchFileAndTitle( $title, $options )[0];
4065  }
4066 
4074  public function fetchFileAndTitle( $title, $options = [] ) {
4075  $file = $this->fetchFileNoRegister( $title, $options );
4076 
4077  $time = $file ? $file->getTimestamp() : false;
4078  $sha1 = $file ? $file->getSha1() : false;
4079  # Register the file as a dependency...
4080  $this->mOutput->addImage( $title->getDBkey(), $time, $sha1 );
4081  if ( $file && !$title->equals( $file->getTitle() ) ) {
4082  # Update fetched file title
4083  $title = $file->getTitle();
4084  $this->mOutput->addImage( $title->getDBkey(), $time, $sha1 );
4085  }
4086  return [ $file, $title ];
4087  }
4088 
4099  protected function fetchFileNoRegister( $title, $options = [] ) {
4100  if ( isset( $options['broken'] ) ) {
4101  $file = false; // broken thumbnail forced by hook
4102  } elseif ( isset( $options['sha1'] ) ) { // get by (sha1,timestamp)
4103  $file = RepoGroup::singleton()->findFileFromKey( $options['sha1'], $options );
4104  } else { // get by (name,timestamp)
4105  $file = wfFindFile( $title, $options );
4106  }
4107  return $file;
4108  }
4109 
4118  public function interwikiTransclude( $title, $action ) {
4119  global $wgEnableScaryTranscluding;
4120 
4121  if ( !$wgEnableScaryTranscluding ) {
4122  return wfMessage( 'scarytranscludedisabled' )->inContentLanguage()->text();
4123  }
4124 
4125  $url = $title->getFullURL( [ 'action' => $action ] );
4126 
4127  if ( strlen( $url ) > 255 ) {
4128  return wfMessage( 'scarytranscludetoolong' )->inContentLanguage()->text();
4129  }
4130  return $this->fetchScaryTemplateMaybeFromCache( $url );
4131  }
4132 
4137  public function fetchScaryTemplateMaybeFromCache( $url ) {
4138  global $wgTranscludeCacheExpiry;
4139  $dbr = wfGetDB( DB_SLAVE );
4140  $tsCond = $dbr->timestamp( time() - $wgTranscludeCacheExpiry );
4141  $obj = $dbr->selectRow( 'transcache', [ 'tc_time', 'tc_contents' ],
4142  [ 'tc_url' => $url, "tc_time >= " . $dbr->addQuotes( $tsCond ) ] );
4143  if ( $obj ) {
4144  return $obj->tc_contents;
4145  }
4146 
4147  $req = MWHttpRequest::factory( $url, [], __METHOD__ );
4148  $status = $req->execute(); // Status object
4149  if ( $status->isOK() ) {
4150  $text = $req->getContent();
4151  } elseif ( $req->getStatus() != 200 ) {
4152  // Though we failed to fetch the content, this status is useless.
4153  return wfMessage( 'scarytranscludefailed-httpstatus' )
4154  ->params( $url, $req->getStatus() /* HTTP status */ )->inContentLanguage()->text();
4155  } else {
4156  return wfMessage( 'scarytranscludefailed', $url )->inContentLanguage()->text();
4157  }
4158 
4159  $dbw = wfGetDB( DB_MASTER );
4160  $dbw->replace( 'transcache', [ 'tc_url' ], [
4161  'tc_url' => $url,
4162  'tc_time' => $dbw->timestamp( time() ),
4163  'tc_contents' => $text
4164  ] );
4165  return $text;
4166  }
4167 
4177  public function argSubstitution( $piece, $frame ) {
4178 
4179  $error = false;
4180  $parts = $piece['parts'];
4181  $nameWithSpaces = $frame->expand( $piece['title'] );
4182  $argName = trim( $nameWithSpaces );
4183  $object = false;
4184  $text = $frame->getArgument( $argName );
4185  if ( $text === false && $parts->getLength() > 0
4186  && ( $this->ot['html']
4187  || $this->ot['pre']
4188  || ( $this->ot['wiki'] && $frame->isTemplate() )
4189  )
4190  ) {
4191  # No match in frame, use the supplied default
4192  $object = $parts->item( 0 )->getChildren();
4193  }
4194  if ( !$this->incrementIncludeSize( 'arg', strlen( $text ) ) ) {
4195  $error = '<!-- WARNING: argument omitted, expansion size too large -->';
4196  $this->limitationWarn( 'post-expand-template-argument' );
4197  }
4198 
4199  if ( $text === false && $object === false ) {
4200  # No match anywhere
4201  $object = $frame->virtualBracketedImplode( '{{{', '|', '}}}', $nameWithSpaces, $parts );
4202  }
4203  if ( $error !== false ) {
4204  $text .= $error;
4205  }
4206  if ( $object !== false ) {
4207  $ret = [ 'object' => $object ];
4208  } else {
4209  $ret = [ 'text' => $text ];
4210  }
4211 
4212  return $ret;
4213  }
4214 
4230  public function extensionSubstitution( $params, $frame ) {
4231  $name = $frame->expand( $params['name'] );
4232  $attrText = !isset( $params['attr'] ) ? null : $frame->expand( $params['attr'] );
4233  $content = !isset( $params['inner'] ) ? null : $frame->expand( $params['inner'] );
4234  $marker = self::MARKER_PREFIX . "-$name-"
4235  . sprintf( '%08X', $this->mMarkerIndex++ ) . self::MARKER_SUFFIX;
4236 
4237  $isFunctionTag = isset( $this->mFunctionTagHooks[strtolower( $name )] ) &&
4238  ( $this->ot['html'] || $this->ot['pre'] );
4239  if ( $isFunctionTag ) {
4240  $markerType = 'none';
4241  } else {
4242  $markerType = 'general';
4243  }
4244  if ( $this->ot['html'] || $isFunctionTag ) {
4245  $name = strtolower( $name );
4246  $attributes = Sanitizer::decodeTagAttributes( $attrText );
4247  if ( isset( $params['attributes'] ) ) {
4248  $attributes = $attributes + $params['attributes'];
4249  }
4250 
4251  if ( isset( $this->mTagHooks[$name] ) ) {
4252  # Workaround for PHP bug 35229 and similar
4253  if ( !is_callable( $this->mTagHooks[$name] ) ) {
4254  throw new MWException( "Tag hook for $name is not callable\n" );
4255  }
4256  $output = call_user_func_array( $this->mTagHooks[$name],
4257  [ $content, $attributes, $this, $frame ] );
4258  } elseif ( isset( $this->mFunctionTagHooks[$name] ) ) {
4259  list( $callback, ) = $this->mFunctionTagHooks[$name];
4260  if ( !is_callable( $callback ) ) {
4261  throw new MWException( "Tag hook for $name is not callable\n" );
4262  }
4263 
4264  $output = call_user_func_array( $callback, [ &$this, $frame, $content, $attributes ] );
4265  } else {
4266  $output = '<span class="error">Invalid tag extension name: ' .
4267  htmlspecialchars( $name ) . '</span>';
4268  }
4269 
4270  if ( is_array( $output ) ) {
4271  # Extract flags to local scope (to override $markerType)
4272  $flags = $output;
4273  $output = $flags[0];
4274  unset( $flags[0] );
4275  extract( $flags );
4276  }
4277  } else {
4278  if ( is_null( $attrText ) ) {
4279  $attrText = '';
4280  }
4281  if ( isset( $params['attributes'] ) ) {
4282  foreach ( $params['attributes'] as $attrName => $attrValue ) {
4283  $attrText .= ' ' . htmlspecialchars( $attrName ) . '="' .
4284  htmlspecialchars( $attrValue ) . '"';
4285  }
4286  }
4287  if ( $content === null ) {
4288  $output = "<$name$attrText/>";
4289  } else {
4290  $close = is_null( $params['close'] ) ? '' : $frame->expand( $params['close'] );
4291  $output = "<$name$attrText>$content$close";
4292  }
4293  }
4294 
4295  if ( $markerType === 'none' ) {
4296  return $output;
4297  } elseif ( $markerType === 'nowiki' ) {
4298  $this->mStripState->addNoWiki( $marker, $output );
4299  } elseif ( $markerType === 'general' ) {
4300  $this->mStripState->addGeneral( $marker, $output );
4301  } else {
4302  throw new MWException( __METHOD__ . ': invalid marker type' );
4303  }
4304  return $marker;
4305  }
4306 
4314  public function incrementIncludeSize( $type, $size ) {
4315  if ( $this->mIncludeSizes[$type] + $size > $this->mOptions->getMaxIncludeSize() ) {
4316  return false;
4317  } else {
4318  $this->mIncludeSizes[$type] += $size;
4319  return true;
4320  }
4321  }
4322 
4329  $this->mExpensiveFunctionCount++;
4330  return $this->mExpensiveFunctionCount <= $this->mOptions->getExpensiveParserFunctionLimit();
4331  }
4332 
4341  public function doDoubleUnderscore( $text ) {
4342 
4343  # The position of __TOC__ needs to be recorded
4344  $mw = MagicWord::get( 'toc' );
4345  if ( $mw->match( $text ) ) {
4346  $this->mShowToc = true;
4347  $this->mForceTocPosition = true;
4348 
4349  # Set a placeholder. At the end we'll fill it in with the TOC.
4350  $text = $mw->replace( '<!--MWTOC-->', $text, 1 );
4351 
4352  # Only keep the first one.
4353  $text = $mw->replace( '', $text );
4354  }
4355 
4356  # Now match and remove the rest of them
4358  $this->mDoubleUnderscores = $mwa->matchAndRemove( $text );
4359 
4360  if ( isset( $this->mDoubleUnderscores['nogallery'] ) ) {
4361  $this->mOutput->mNoGallery = true;
4362  }
4363  if ( isset( $this->mDoubleUnderscores['notoc'] ) && !$this->mForceTocPosition ) {
4364  $this->mShowToc = false;
4365  }
4366  if ( isset( $this->mDoubleUnderscores['hiddencat'] )
4367  && $this->mTitle->getNamespace() == NS_CATEGORY
4368  ) {
4369  $this->addTrackingCategory( 'hidden-category-category' );
4370  }
4371  # (bug 8068) Allow control over whether robots index a page.
4372  # @todo FIXME: Bug 14899: __INDEX__ always overrides __NOINDEX__ here! This
4373  # is not desirable, the last one on the page should win.
4374  if ( isset( $this->mDoubleUnderscores['noindex'] ) && $this->mTitle->canUseNoindex() ) {
4375  $this->mOutput->setIndexPolicy( 'noindex' );
4376  $this->addTrackingCategory( 'noindex-category' );
4377  }
4378  if ( isset( $this->mDoubleUnderscores['index'] ) && $this->mTitle->canUseNoindex() ) {
4379  $this->mOutput->setIndexPolicy( 'index' );
4380  $this->addTrackingCategory( 'index-category' );
4381  }
4382 
4383  # Cache all double underscores in the database
4384  foreach ( $this->mDoubleUnderscores as $key => $val ) {
4385  $this->mOutput->setProperty( $key, '' );
4386  }
4387 
4388  return $text;
4389  }
4390 
4396  public function addTrackingCategory( $msg ) {
4397  return $this->mOutput->addTrackingCategory( $msg, $this->mTitle );
4398  }
4399 
4416  public function formatHeadings( $text, $origText, $isMain = true ) {
4417  global $wgMaxTocLevel, $wgExperimentalHtmlIds;
4418 
4419  # Inhibit editsection links if requested in the page
4420  if ( isset( $this->mDoubleUnderscores['noeditsection'] ) ) {
4421  $maybeShowEditLink = $showEditLink = false;
4422  } else {
4423  $maybeShowEditLink = true; /* Actual presence will depend on ParserOptions option */
4424  $showEditLink = $this->mOptions->getEditSection();
4425  }
4426  if ( $showEditLink ) {
4427  $this->mOutput->setEditSectionTokens( true );
4428  }
4429 
4430  # Get all headlines for numbering them and adding funky stuff like [edit]
4431  # links - this is for later, but we need the number of headlines right now
4432  $matches = [];
4433  $numMatches = preg_match_all(
4434  '/<H(?P<level>[1-6])(?P<attrib>.*?>)\s*(?P<header>[\s\S]*?)\s*<\/H[1-6] *>/i',
4435  $text,
4436  $matches
4437  );
4438 
4439  # if there are fewer than 4 headlines in the article, do not show TOC
4440  # unless it's been explicitly enabled.
4441  $enoughToc = $this->mShowToc &&
4442  ( ( $numMatches >= 4 ) || $this->mForceTocPosition );
4443 
4444  # Allow user to stipulate that a page should have a "new section"
4445  # link added via __NEWSECTIONLINK__
4446  if ( isset( $this->mDoubleUnderscores['newsectionlink'] ) ) {
4447  $this->mOutput->setNewSection( true );
4448  }
4449 
4450  # Allow user to remove the "new section"
4451  # link via __NONEWSECTIONLINK__
4452  if ( isset( $this->mDoubleUnderscores['nonewsectionlink'] ) ) {
4453  $this->mOutput->hideNewSection( true );
4454  }
4455 
4456  # if the string __FORCETOC__ (not case-sensitive) occurs in the HTML,
4457  # override above conditions and always show TOC above first header
4458  if ( isset( $this->mDoubleUnderscores['forcetoc'] ) ) {
4459  $this->mShowToc = true;
4460  $enoughToc = true;
4461  }
4462 
4463  # headline counter
4464  $headlineCount = 0;
4465  $numVisible = 0;
4466 
4467  # Ugh .. the TOC should have neat indentation levels which can be
4468  # passed to the skin functions. These are determined here
4469  $toc = '';
4470  $full = '';
4471  $head = [];
4472  $sublevelCount = [];
4473  $levelCount = [];
4474  $level = 0;
4475  $prevlevel = 0;
4476  $toclevel = 0;
4477  $prevtoclevel = 0;
4478  $markerRegex = self::MARKER_PREFIX . "-h-(\d+)-" . self::MARKER_SUFFIX;
4479  $baseTitleText = $this->mTitle->getPrefixedDBkey();
4480  $oldType = $this->mOutputType;
4481  $this->setOutputType( self::OT_WIKI );
4482  $frame = $this->getPreprocessor()->newFrame();
4483  $root = $this->preprocessToDom( $origText );
4484  $node = $root->getFirstChild();
4485  $byteOffset = 0;
4486  $tocraw = [];
4487  $refers = [];
4488 
4489  $headlines = $numMatches !== false ? $matches[3] : [];
4490 
4491  foreach ( $headlines as $headline ) {
4492  $isTemplate = false;
4493  $titleText = false;
4494  $sectionIndex = false;
4495  $numbering = '';
4496  $markerMatches = [];
4497  if ( preg_match( "/^$markerRegex/", $headline, $markerMatches ) ) {
4498  $serial = $markerMatches[1];
4499  list( $titleText, $sectionIndex ) = $this->mHeadings[$serial];
4500  $isTemplate = ( $titleText != $baseTitleText );
4501  $headline = preg_replace( "/^$markerRegex\\s*/", "", $headline );
4502  }
4503 
4504  if ( $toclevel ) {
4505  $prevlevel = $level;
4506  }
4507  $level = $matches[1][$headlineCount];
4508 
4509  if ( $level > $prevlevel ) {
4510  # Increase TOC level
4511  $toclevel++;
4512  $sublevelCount[$toclevel] = 0;
4513  if ( $toclevel < $wgMaxTocLevel ) {
4514  $prevtoclevel = $toclevel;
4515  $toc .= Linker::tocIndent();
4516  $numVisible++;
4517  }
4518  } elseif ( $level < $prevlevel && $toclevel > 1 ) {
4519  # Decrease TOC level, find level to jump to
4520 
4521  for ( $i = $toclevel; $i > 0; $i-- ) {
4522  if ( $levelCount[$i] == $level ) {
4523  # Found last matching level
4524  $toclevel = $i;
4525  break;
4526  } elseif ( $levelCount[$i] < $level ) {
4527  # Found first matching level below current level
4528  $toclevel = $i + 1;
4529  break;
4530  }
4531  }
4532  if ( $i == 0 ) {
4533  $toclevel = 1;
4534  }
4535  if ( $toclevel < $wgMaxTocLevel ) {
4536  if ( $prevtoclevel < $wgMaxTocLevel ) {
4537  # Unindent only if the previous toc level was shown :p
4538  $toc .= Linker::tocUnindent( $prevtoclevel - $toclevel );
4539  $prevtoclevel = $toclevel;
4540  } else {
4541  $toc .= Linker::tocLineEnd();
4542  }
4543  }
4544  } else {
4545  # No change in level, end TOC line
4546  if ( $toclevel < $wgMaxTocLevel ) {
4547  $toc .= Linker::tocLineEnd();
4548  }
4549  }
4550 
4551  $levelCount[$toclevel] = $level;
4552 
4553  # count number of headlines for each level
4554  $sublevelCount[$toclevel]++;
4555  $dot = 0;
4556  for ( $i = 1; $i <= $toclevel; $i++ ) {
4557  if ( !empty( $sublevelCount[$i] ) ) {
4558  if ( $dot ) {
4559  $numbering .= '.';
4560  }
4561  $numbering .= $this->getTargetLanguage()->formatNum( $sublevelCount[$i] );
4562  $dot = 1;
4563  }
4564  }
4565 
4566  # The safe header is a version of the header text safe to use for links
4567 
4568  # Remove link placeholders by the link text.
4569  # <!--LINK number-->
4570  # turns into
4571  # link text with suffix
4572  # Do this before unstrip since link text can contain strip markers
4573  $safeHeadline = $this->replaceLinkHoldersText( $headline );
4574 
4575  # Avoid insertion of weird stuff like <math> by expanding the relevant sections
4576  $safeHeadline = $this->mStripState->unstripBoth( $safeHeadline );
4577 
4578  # Strip out HTML (first regex removes any tag not allowed)
4579  # Allowed tags are:
4580  # * <sup> and <sub> (bug 8393)
4581  # * <i> (bug 26375)
4582  # * <b> (r105284)
4583  # * <bdi> (bug 72884)
4584  # * <span dir="rtl"> and <span dir="ltr"> (bug 35167)
4585  # We strip any parameter from accepted tags (second regex), except dir="rtl|ltr" from <span>,
4586  # to allow setting directionality in toc items.
4587  $tocline = preg_replace(
4588  [
4589  '#<(?!/?(span|sup|sub|bdi|i|b)(?: [^>]*)?>).*?>#',
4590  '#<(/?(?:span(?: dir="(?:rtl|ltr)")?|sup|sub|bdi|i|b))(?: .*?)?>#'
4591  ],
4592  [ '', '<$1>' ],
4593  $safeHeadline
4594  );
4595 
4596  # Strip '<span></span>', which is the result from the above if
4597  # <span id="foo"></span> is used to produce an additional anchor
4598  # for a section.
4599  $tocline = str_replace( '<span></span>', '', $tocline );
4600 
4601  $tocline = trim( $tocline );
4602 
4603  # For the anchor, strip out HTML-y stuff period
4604  $safeHeadline = preg_replace( '/<.*?>/', '', $safeHeadline );
4605  $safeHeadline = Sanitizer::normalizeSectionNameWhitespace( $safeHeadline );
4606 
4607  # Save headline for section edit hint before it's escaped
4608  $headlineHint = $safeHeadline;
4609 
4610  if ( $wgExperimentalHtmlIds ) {
4611  # For reverse compatibility, provide an id that's
4612  # HTML4-compatible, like we used to.
4613  # It may be worth noting, academically, that it's possible for
4614  # the legacy anchor to conflict with a non-legacy headline
4615  # anchor on the page. In this case likely the "correct" thing
4616  # would be to either drop the legacy anchors or make sure
4617  # they're numbered first. However, this would require people
4618  # to type in section names like "abc_.D7.93.D7.90.D7.A4"
4619  # manually, so let's not bother worrying about it.
4620  $legacyHeadline = Sanitizer::escapeId( $safeHeadline,
4621  [ 'noninitial', 'legacy' ] );
4622  $safeHeadline = Sanitizer::escapeId( $safeHeadline );
4623 
4624  if ( $legacyHeadline == $safeHeadline ) {
4625  # No reason to have both (in fact, we can't)
4626  $legacyHeadline = false;
4627  }
4628  } else {
4629  $legacyHeadline = false;
4630  $safeHeadline = Sanitizer::escapeId( $safeHeadline,
4631  'noninitial' );
4632  }
4633 
4634  # HTML names must be case-insensitively unique (bug 10721).
4635  # This does not apply to Unicode characters per
4636  # http://www.w3.org/TR/html5/infrastructure.html#case-sensitivity-and-string-comparison
4637  # @todo FIXME: We may be changing them depending on the current locale.
4638  $arrayKey = strtolower( $safeHeadline );
4639  if ( $legacyHeadline === false ) {
4640  $legacyArrayKey = false;
4641  } else {
4642  $legacyArrayKey = strtolower( $legacyHeadline );
4643  }
4644 
4645  # Create the anchor for linking from the TOC to the section
4646  $anchor = $safeHeadline;
4647  $legacyAnchor = $legacyHeadline;
4648  if ( isset( $refers[$arrayKey] ) ) {
4649  // @codingStandardsIgnoreStart
4650  for ( $i = 2; isset( $refers["${arrayKey}_$i"] ); ++$i );
4651  // @codingStandardsIgnoreEnd
4652  $anchor .= "_$i";
4653  $refers["${arrayKey}_$i"] = true;
4654  } else {
4655  $refers[$arrayKey] = true;
4656  }
4657  if ( $legacyHeadline !== false && isset( $refers[$legacyArrayKey] ) ) {
4658  // @codingStandardsIgnoreStart
4659  for ( $i = 2; isset( $refers["${legacyArrayKey}_$i"] ); ++$i );
4660  // @codingStandardsIgnoreEnd
4661  $legacyAnchor .= "_$i";
4662  $refers["${legacyArrayKey}_$i"] = true;
4663  } else {
4664  $refers[$legacyArrayKey] = true;
4665  }
4666 
4667  # Don't number the heading if it is the only one (looks silly)
4668  if ( count( $matches[3] ) > 1 && $this->mOptions->getNumberHeadings() ) {
4669  # the two are different if the line contains a link
4670  $headline = Html::element(
4671  'span',
4672  [ 'class' => 'mw-headline-number' ],
4673  $numbering
4674  ) . ' ' . $headline;
4675  }
4676 
4677  if ( $enoughToc && ( !isset( $wgMaxTocLevel ) || $toclevel < $wgMaxTocLevel ) ) {
4678  $toc .= Linker::tocLine( $anchor, $tocline,
4679  $numbering, $toclevel, ( $isTemplate ? false : $sectionIndex ) );
4680  }
4681 
4682  # Add the section to the section tree
4683  # Find the DOM node for this header
4684  $noOffset = ( $isTemplate || $sectionIndex === false );
4685  while ( $node && !$noOffset ) {
4686  if ( $node->getName() === 'h' ) {
4687  $bits = $node->splitHeading();
4688  if ( $bits['i'] == $sectionIndex ) {
4689  break;
4690  }
4691  }
4692  $byteOffset += mb_strlen( $this->mStripState->unstripBoth(
4693  $frame->expand( $node, PPFrame::RECOVER_ORIG ) ) );
4694  $node = $node->getNextSibling();
4695  }
4696  $tocraw[] = [
4697  'toclevel' => $toclevel,
4698  'level' => $level,
4699  'line' => $tocline,
4700  'number' => $numbering,
4701  'index' => ( $isTemplate ? 'T-' : '' ) . $sectionIndex,
4702  'fromtitle' => $titleText,
4703  'byteoffset' => ( $noOffset ? null : $byteOffset ),
4704  'anchor' => $anchor,
4705  ];
4706 
4707  # give headline the correct <h#> tag
4708  if ( $maybeShowEditLink && $sectionIndex !== false ) {
4709  // Output edit section links as markers with styles that can be customized by skins
4710  if ( $isTemplate ) {
4711  # Put a T flag in the section identifier, to indicate to extractSections()
4712  # that sections inside <includeonly> should be counted.
4713  $editsectionPage = $titleText;
4714  $editsectionSection = "T-$sectionIndex";
4715  $editsectionContent = null;
4716  } else {
4717  $editsectionPage = $this->mTitle->getPrefixedText();
4718  $editsectionSection = $sectionIndex;
4719  $editsectionContent = $headlineHint;
4720  }
4721  // We use a bit of pesudo-xml for editsection markers. The
4722  // language converter is run later on. Using a UNIQ style marker
4723  // leads to the converter screwing up the tokens when it
4724  // converts stuff. And trying to insert strip tags fails too. At
4725  // this point all real inputted tags have already been escaped,
4726  // so we don't have to worry about a user trying to input one of
4727  // these markers directly. We use a page and section attribute
4728  // to stop the language converter from converting these
4729  // important bits of data, but put the headline hint inside a
4730  // content block because the language converter is supposed to
4731  // be able to convert that piece of data.
4732  // Gets replaced with html in ParserOutput::getText
4733  $editlink = '<mw:editsection page="' . htmlspecialchars( $editsectionPage );
4734  $editlink .= '" section="' . htmlspecialchars( $editsectionSection ) . '"';
4735  if ( $editsectionContent !== null ) {
4736  $editlink .= '>' . $editsectionContent . '</mw:editsection>';
4737  } else {
4738  $editlink .= '/>';
4739  }
4740  } else {
4741  $editlink = '';
4742  }
4743  $head[$headlineCount] = Linker::makeHeadline( $level,
4744  $matches['attrib'][$headlineCount], $anchor, $headline,
4745  $editlink, $legacyAnchor );
4746 
4747  $headlineCount++;
4748  }
4749 
4750  $this->setOutputType( $oldType );
4751 
4752  # Never ever show TOC if no headers
4753  if ( $numVisible < 1 ) {
4754  $enoughToc = false;
4755  }
4756 
4757  if ( $enoughToc ) {
4758  if ( $prevtoclevel > 0 && $prevtoclevel < $wgMaxTocLevel ) {
4759  $toc .= Linker::tocUnindent( $prevtoclevel - 1 );
4760  }
4761  $toc = Linker::tocList( $toc, $this->mOptions->getUserLangObj() );
4762  $this->mOutput->setTOCHTML( $toc );
4763  $toc = self::TOC_START . $toc . self::TOC_END;
4764  $this->mOutput->addModules( 'mediawiki.toc' );
4765  }
4766 
4767  if ( $isMain ) {
4768  $this->mOutput->setSections( $tocraw );
4769  }
4770 
4771  # split up and insert constructed headlines
4772  $blocks = preg_split( '/<H[1-6].*?>[\s\S]*?<\/H[1-6]>/i', $text );
4773  $i = 0;
4774 
4775  // build an array of document sections
4776  $sections = [];
4777  foreach ( $blocks as $block ) {
4778  // $head is zero-based, sections aren't.
4779  if ( empty( $head[$i - 1] ) ) {
4780  $sections[$i] = $block;
4781  } else {
4782  $sections[$i] = $head[$i - 1] . $block;
4783  }
4784 
4795  Hooks::run( 'ParserSectionCreate', [ $this, $i, &$sections[$i], $showEditLink ] );
4796 
4797  $i++;
4798  }
4799 
4800  if ( $enoughToc && $isMain && !$this->mForceTocPosition ) {
4801  // append the TOC at the beginning
4802  // Top anchor now in skin
4803  $sections[0] = $sections[0] . $toc . "\n";
4804  }
4805 
4806  $full .= implode( '', $sections );
4807 
4808  if ( $this->mForceTocPosition ) {
4809  return str_replace( '<!--MWTOC-->', $toc, $full );
4810  } else {
4811  return $full;
4812  }
4813  }
4814 
4826  public function preSaveTransform( $text, Title $title, User $user,
4827  ParserOptions $options, $clearState = true
4828  ) {
4829  if ( $clearState ) {
4830  $magicScopeVariable = $this->lock();
4831  }
4832  $this->startParse( $title, $options, self::OT_WIKI, $clearState );
4833  $this->setUser( $user );
4834 
4835  $pairs = [
4836  "\r\n" => "\n",
4837  "\r" => "\n",
4838  ];
4839  $text = str_replace( array_keys( $pairs ), array_values( $pairs ), $text );
4840  if ( $options->getPreSaveTransform() ) {
4841  $text = $this->pstPass2( $text, $user );
4842  }
4843  $text = $this->mStripState->unstripBoth( $text );
4844 
4845  $this->setUser( null ); # Reset
4846 
4847  return $text;
4848  }
4849 
4858  private function pstPass2( $text, $user ) {
4860 
4861  # Note: This is the timestamp saved as hardcoded wikitext to
4862  # the database, we use $wgContLang here in order to give
4863  # everyone the same signature and use the default one rather
4864  # than the one selected in each user's preferences.
4865  # (see also bug 12815)
4866  $ts = $this->mOptions->getTimestamp();
4868  $ts = $timestamp->format( 'YmdHis' );
4869  $tzMsg = $timestamp->getTimezoneMessage()->inContentLanguage()->text();
4870 
4871  $d = $wgContLang->timeanddate( $ts, false, false ) . " ($tzMsg)";
4872 
4873  # Variable replacement
4874  # Because mOutputType is OT_WIKI, this will only process {{subst:xxx}} type tags
4875  $text = $this->replaceVariables( $text );
4876 
4877  # This works almost by chance, as the replaceVariables are done before the getUserSig(),
4878  # which may corrupt this parser instance via its wfMessage()->text() call-
4879 
4880  # Signatures
4881  $sigText = $this->getUserSig( $user );
4882  $text = strtr( $text, [
4883  '~~~~~' => $d,
4884  '~~~~' => "$sigText $d",
4885  '~~~' => $sigText
4886  ] );
4887 
4888  # Context links ("pipe tricks"): [[|name]] and [[name (context)|]]
4889  $tc = '[' . Title::legalChars() . ']';
4890  $nc = '[ _0-9A-Za-z\x80-\xff-]'; # Namespaces can use non-ascii!
4891 
4892  // [[ns:page (context)|]]
4893  $p1 = "/\[\[(:?$nc+:|:|)($tc+?)( ?\\($tc+\\))\\|]]/";
4894  // [[ns:page(context)|]] (double-width brackets, added in r40257)
4895  $p4 = "/\[\[(:?$nc+:|:|)($tc+?)( ?($tc+))\\|]]/";
4896  // [[ns:page (context), context|]] (using either single or double-width comma)
4897  $p3 = "/\[\[(:?$nc+:|:|)($tc+?)( ?\\($tc+\\)|)((?:, |,)$tc+|)\\|]]/";
4898  // [[|page]] (reverse pipe trick: add context from page title)
4899  $p2 = "/\[\[\\|($tc+)]]/";
4900 
4901  # try $p1 first, to turn "[[A, B (C)|]]" into "[[A, B (C)|A, B]]"
4902  $text = preg_replace( $p1, '[[\\1\\2\\3|\\2]]', $text );
4903  $text = preg_replace( $p4, '[[\\1\\2\\3|\\2]]', $text );
4904  $text = preg_replace( $p3, '[[\\1\\2\\3\\4|\\2]]', $text );
4905 
4906  $t = $this->mTitle->getText();
4907  $m = [];
4908  if ( preg_match( "/^($nc+:|)$tc+?( \\($tc+\\))$/", $t, $m ) ) {
4909  $text = preg_replace( $p2, "[[$m[1]\\1$m[2]|\\1]]", $text );
4910  } elseif ( preg_match( "/^($nc+:|)$tc+?(, $tc+|)$/", $t, $m ) && "$m[1]$m[2]" != '' ) {
4911  $text = preg_replace( $p2, "[[$m[1]\\1$m[2]|\\1]]", $text );
4912  } else {
4913  # if there's no context, don't bother duplicating the title
4914  $text = preg_replace( $p2, '[[\\1]]', $text );
4915  }
4916 
4917  # Trim trailing whitespace
4918  $text = rtrim( $text );
4919 
4920  return $text;
4921  }
4922 
4937  public function getUserSig( &$user, $nickname = false, $fancySig = null ) {
4938  global $wgMaxSigChars;
4939 
4940  $username = $user->getName();
4941 
4942  # If not given, retrieve from the user object.
4943  if ( $nickname === false ) {
4944  $nickname = $user->getOption( 'nickname' );
4945  }
4946 
4947  if ( is_null( $fancySig ) ) {
4948  $fancySig = $user->getBoolOption( 'fancysig' );
4949  }
4950 
4951  $nickname = $nickname == null ? $username : $nickname;
4952 
4953  if ( mb_strlen( $nickname ) > $wgMaxSigChars ) {
4954  $nickname = $username;
4955  wfDebug( __METHOD__ . ": $username has overlong signature.\n" );
4956  } elseif ( $fancySig !== false ) {
4957  # Sig. might contain markup; validate this
4958  if ( $this->validateSig( $nickname ) !== false ) {
4959  # Validated; clean up (if needed) and return it
4960  return $this->cleanSig( $nickname, true );
4961  } else {
4962  # Failed to validate; fall back to the default
4963  $nickname = $username;
4964  wfDebug( __METHOD__ . ": $username has bad XML tags in signature.\n" );
4965  }
4966  }
4967 
4968  # Make sure nickname doesnt get a sig in a sig
4969  $nickname = self::cleanSigInSig( $nickname );
4970 
4971  # If we're still here, make it a link to the user page
4972  $userText = wfEscapeWikiText( $username );
4973  $nickText = wfEscapeWikiText( $nickname );
4974  $msgName = $user->isAnon() ? 'signature-anon' : 'signature';
4975 
4976  return wfMessage( $msgName, $userText, $nickText )->inContentLanguage()
4977  ->title( $this->getTitle() )->text();
4978  }
4979 
4986  public function validateSig( $text ) {
4987  return Xml::isWellFormedXmlFragment( $text ) ? $text : false;
4988  }
4989 
5000  public function cleanSig( $text, $parsing = false ) {
5001  if ( !$parsing ) {
5002  global $wgTitle;
5003  $magicScopeVariable = $this->lock();
5004  $this->startParse( $wgTitle, new ParserOptions, self::OT_PREPROCESS, true );
5005  }
5006 
5007  # Option to disable this feature
5008  if ( !$this->mOptions->getCleanSignatures() ) {
5009  return $text;
5010  }
5011 
5012  # @todo FIXME: Regex doesn't respect extension tags or nowiki
5013  # => Move this logic to braceSubstitution()
5014  $substWord = MagicWord::get( 'subst' );
5015  $substRegex = '/\{\{(?!(?:' . $substWord->getBaseRegex() . '))/x' . $substWord->getRegexCase();
5016  $substText = '{{' . $substWord->getSynonym( 0 );
5017 
5018  $text = preg_replace( $substRegex, $substText, $text );
5019  $text = self::cleanSigInSig( $text );
5020  $dom = $this->preprocessToDom( $text );
5021  $frame = $this->getPreprocessor()->newFrame();
5022  $text = $frame->expand( $dom );
5023 
5024  if ( !$parsing ) {
5025  $text = $this->mStripState->unstripBoth( $text );
5026  }
5027 
5028  return $text;
5029  }
5030 
5037  public static function cleanSigInSig( $text ) {
5038  $text = preg_replace( '/~{3,5}/', '', $text );
5039  return $text;
5040  }
5041 
5052  $outputType, $clearState = true
5053  ) {
5054  $this->startParse( $title, $options, $outputType, $clearState );
5055  }
5056 
5063  private function startParse( Title $title = null, ParserOptions $options,
5064  $outputType, $clearState = true
5065  ) {
5066  $this->setTitle( $title );
5067  $this->mOptions = $options;
5068  $this->setOutputType( $outputType );
5069  if ( $clearState ) {
5070  $this->clearState();
5071  }
5072  }
5073 
5082  public function transformMsg( $text, $options, $title = null ) {
5083  static $executing = false;
5084 
5085  # Guard against infinite recursion
5086  if ( $executing ) {
5087  return $text;
5088  }
5089  $executing = true;
5090 
5091  if ( !$title ) {
5092  global $wgTitle;
5093  $title = $wgTitle;
5094  }
5095 
5096  $text = $this->preprocess( $text, $title, $options );
5097 
5098  $executing = false;
5099  return $text;
5100  }
5101 
5126  public function setHook( $tag, $callback ) {
5127  $tag = strtolower( $tag );
5128  if ( preg_match( '/[<>\r\n]/', $tag, $m ) ) {
5129  throw new MWException( "Invalid character {$m[0]} in setHook('$tag', ...) call" );
5130  }
5131  $oldVal = isset( $this->mTagHooks[$tag] ) ? $this->mTagHooks[$tag] : null;
5132  $this->mTagHooks[$tag] = $callback;
5133  if ( !in_array( $tag, $this->mStripList ) ) {
5134  $this->mStripList[] = $tag;
5135  }
5136 
5137  return $oldVal;
5138  }
5139 
5157  public function setTransparentTagHook( $tag, $callback ) {
5158  $tag = strtolower( $tag );
5159  if ( preg_match( '/[<>\r\n]/', $tag, $m ) ) {
5160  throw new MWException( "Invalid character {$m[0]} in setTransparentHook('$tag', ...) call" );
5161  }
5162  $oldVal = isset( $this->mTransparentTagHooks[$tag] ) ? $this->mTransparentTagHooks[$tag] : null;
5163  $this->mTransparentTagHooks[$tag] = $callback;
5164 
5165  return $oldVal;
5166  }
5167 
5171  public function clearTagHooks() {
5172  $this->mTagHooks = [];
5173  $this->mFunctionTagHooks = [];
5174  $this->mStripList = $this->mDefaultStripList;
5175  }
5176 
5220  public function setFunctionHook( $id, $callback, $flags = 0 ) {
5222 
5223  $oldVal = isset( $this->mFunctionHooks[$id] ) ? $this->mFunctionHooks[$id][0] : null;
5224  $this->mFunctionHooks[$id] = [ $callback, $flags ];
5225 
5226  # Add to function cache
5227  $mw = MagicWord::get( $id );
5228  if ( !$mw ) {
5229  throw new MWException( __METHOD__ . '() expecting a magic word identifier.' );
5230  }
5231 
5232  $synonyms = $mw->getSynonyms();
5233  $sensitive = intval( $mw->isCaseSensitive() );
5234 
5235  foreach ( $synonyms as $syn ) {
5236  # Case
5237  if ( !$sensitive ) {
5238  $syn = $wgContLang->lc( $syn );
5239  }
5240  # Add leading hash
5241  if ( !( $flags & self::SFH_NO_HASH ) ) {
5242  $syn = '#' . $syn;
5243  }
5244  # Remove trailing colon
5245  if ( substr( $syn, -1, 1 ) === ':' ) {
5246  $syn = substr( $syn, 0, -1 );
5247  }
5248  $this->mFunctionSynonyms[$sensitive][$syn] = $id;
5249  }
5250  return $oldVal;
5251  }
5252 
5258  public function getFunctionHooks() {
5259  return array_keys( $this->mFunctionHooks );
5260  }
5261 
5272  public function setFunctionTagHook( $tag, $callback, $flags ) {
5273  $tag = strtolower( $tag );
5274  if ( preg_match( '/[<>\r\n]/', $tag, $m ) ) {
5275  throw new MWException( "Invalid character {$m[0]} in setFunctionTagHook('$tag', ...) call" );
5276  }
5277  $old = isset( $this->mFunctionTagHooks[$tag] ) ?
5278  $this->mFunctionTagHooks[$tag] : null;
5279  $this->mFunctionTagHooks[$tag] = [ $callback, $flags ];
5280 
5281  if ( !in_array( $tag, $this->mStripList ) ) {
5282  $this->mStripList[] = $tag;
5283  }
5284 
5285  return $old;
5286  }
5287 
5295  public function replaceLinkHolders( &$text, $options = 0 ) {
5296  $this->mLinkHolders->replace( $text );
5297  }
5298 
5306  public function replaceLinkHoldersText( $text ) {
5307  return $this->mLinkHolders->replaceText( $text );
5308  }
5309 
5323  public function renderImageGallery( $text, $params ) {
5324 
5325  $mode = false;
5326  if ( isset( $params['mode'] ) ) {
5327  $mode = $params['mode'];
5328  }
5329 
5330  try {
5331  $ig = ImageGalleryBase::factory( $mode );
5332  } catch ( Exception $e ) {
5333  // If invalid type set, fallback to default.
5334  $ig = ImageGalleryBase::factory( false );
5335  }
5336 
5337  $ig->setContextTitle( $this->mTitle );
5338  $ig->setShowBytes( false );
5339  $ig->setShowFilename( false );
5340  $ig->setParser( $this );
5341  $ig->setHideBadImages();
5342  $ig->setAttributes( Sanitizer::validateTagAttributes( $params, 'table' ) );
5343 
5344  if ( isset( $params['showfilename'] ) ) {
5345  $ig->setShowFilename( true );
5346  } else {
5347  $ig->setShowFilename( false );
5348  }
5349  if ( isset( $params['caption'] ) ) {
5350  $caption = $params['caption'];
5351  $caption = htmlspecialchars( $caption );
5352  $caption = $this->replaceInternalLinks( $caption );
5353  $ig->setCaptionHtml( $caption );
5354  }
5355  if ( isset( $params['perrow'] ) ) {
5356  $ig->setPerRow( $params['perrow'] );
5357  }
5358  if ( isset( $params['widths'] ) ) {
5359  $ig->setWidths( $params['widths'] );
5360  }
5361  if ( isset( $params['heights'] ) ) {
5362  $ig->setHeights( $params['heights'] );
5363  }
5364  $ig->setAdditionalOptions( $params );
5365 
5366  Hooks::run( 'BeforeParserrenderImageGallery', [ &$this, &$ig ] );
5367 
5368  $lines = StringUtils::explode( "\n", $text );
5369  foreach ( $lines as $line ) {
5370  # match lines like these:
5371  # Image:someimage.jpg|This is some image
5372  $matches = [];
5373  preg_match( "/^([^|]+)(\\|(.*))?$/", $line, $matches );
5374  # Skip empty lines
5375  if ( count( $matches ) == 0 ) {
5376  continue;
5377  }
5378 
5379  if ( strpos( $matches[0], '%' ) !== false ) {
5380  $matches[1] = rawurldecode( $matches[1] );
5381  }
5383  if ( is_null( $title ) ) {
5384  # Bogus title. Ignore these so we don't bomb out later.
5385  continue;
5386  }
5387 
5388  # We need to get what handler the file uses, to figure out parameters.
5389  # Note, a hook can overide the file name, and chose an entirely different
5390  # file (which potentially could be of a different type and have different handler).
5391  $options = [];
5392  $descQuery = false;
5393  Hooks::run( 'BeforeParserFetchFileAndTitle',
5394  [ $this, $title, &$options, &$descQuery ] );
5395  # Don't register it now, as ImageGallery does that later.
5396  $file = $this->fetchFileNoRegister( $title, $options );
5397  $handler = $file ? $file->getHandler() : false;
5398 
5399  $paramMap = [
5400  'img_alt' => 'gallery-internal-alt',
5401  'img_link' => 'gallery-internal-link',
5402  ];
5403  if ( $handler ) {
5404  $paramMap = $paramMap + $handler->getParamMap();
5405  // We don't want people to specify per-image widths.
5406  // Additionally the width parameter would need special casing anyhow.
5407  unset( $paramMap['img_width'] );
5408  }
5409 
5410  $mwArray = new MagicWordArray( array_keys( $paramMap ) );
5411 
5412  $label = '';
5413  $alt = '';
5414  $link = '';
5415  $handlerOptions = [];
5416  if ( isset( $matches[3] ) ) {
5417  // look for an |alt= definition while trying not to break existing
5418  // captions with multiple pipes (|) in it, until a more sensible grammar
5419  // is defined for images in galleries
5420 
5421  // FIXME: Doing recursiveTagParse at this stage, and the trim before
5422  // splitting on '|' is a bit odd, and different from makeImage.
5423  $matches[3] = $this->recursiveTagParse( trim( $matches[3] ) );
5424  $parameterMatches = StringUtils::explode( '|', $matches[3] );
5425 
5426  foreach ( $parameterMatches as $parameterMatch ) {
5427  list( $magicName, $match ) = $mwArray->matchVariableStartToEnd( $parameterMatch );
5428  if ( $magicName ) {
5429  $paramName = $paramMap[$magicName];
5430 
5431  switch ( $paramName ) {
5432  case 'gallery-internal-alt':
5433  $alt = $this->stripAltText( $match, false );
5434  break;
5435  case 'gallery-internal-link':
5436  $linkValue = strip_tags( $this->replaceLinkHoldersText( $match ) );
5437  $chars = self::EXT_LINK_URL_CLASS;
5438  $addr = self::EXT_LINK_ADDR;
5439  $prots = $this->mUrlProtocols;
5440  // check to see if link matches an absolute url, if not then it must be a wiki link.
5441  if ( preg_match( "/^($prots)$addr$chars*$/u", $linkValue ) ) {
5442  $link = $linkValue;
5443  $this->mOutput->addExternalLink( $link );
5444  } else {
5445  $localLinkTitle = Title::newFromText( $linkValue );
5446  if ( $localLinkTitle !== null ) {
5447  $this->mOutput->addLink( $localLinkTitle );
5448  $link = $localLinkTitle->getLinkURL();
5449  }
5450  }
5451  break;
5452  default:
5453  // Must be a handler specific parameter.
5454  if ( $handler->validateParam( $paramName, $match ) ) {
5455  $handlerOptions[$paramName] = $match;
5456  } else {
5457  // Guess not, consider it as caption.
5458  wfDebug( "$parameterMatch failed parameter validation\n" );
5459  $label = '|' . $parameterMatch;
5460  }
5461  }
5462 
5463  } else {
5464  // Last pipe wins.
5465  $label = '|' . $parameterMatch;
5466  }
5467  }
5468  // Remove the pipe.
5469  $label = substr( $label, 1 );
5470  }
5471 
5472  $ig->add( $title, $label, $alt, $link, $handlerOptions );
5473  }
5474  $html = $ig->toHTML();
5475  Hooks::run( 'AfterParserFetchFileAndTitle', [ $this, $ig, &$html ] );
5476  return $html;
5477  }
5478 
5483  public function getImageParams( $handler ) {
5484  if ( $handler ) {
5485  $handlerClass = get_class( $handler );
5486  } else {
5487  $handlerClass = '';
5488  }
5489  if ( !isset( $this->mImageParams[$handlerClass] ) ) {
5490  # Initialise static lists
5491  static $internalParamNames = [
5492  'horizAlign' => [ 'left', 'right', 'center', 'none' ],
5493  'vertAlign' => [ 'baseline', 'sub', 'super', 'top', 'text-top', 'middle',
5494  'bottom', 'text-bottom' ],
5495  'frame' => [ 'thumbnail', 'manualthumb', 'framed', 'frameless',
5496  'upright', 'border', 'link', 'alt', 'class' ],
5497  ];
5498  static $internalParamMap;
5499  if ( !$internalParamMap ) {
5500  $internalParamMap = [];
5501  foreach ( $internalParamNames as $type => $names ) {
5502  foreach ( $names as $name ) {
5503  $magicName = str_replace( '-', '_', "img_$name" );
5504  $internalParamMap[$magicName] = [ $type, $name ];
5505  }
5506  }
5507  }
5508 
5509  # Add handler params
5510  $paramMap = $internalParamMap;
5511  if ( $handler ) {
5512  $handlerParamMap = $handler->getParamMap();
5513  foreach ( $handlerParamMap as $magic => $paramName ) {
5514  $paramMap[$magic] = [ 'handler', $paramName ];
5515  }
5516  }
5517  $this->mImageParams[$handlerClass] = $paramMap;
5518  $this->mImageParamsMagicArray[$handlerClass] = new MagicWordArray( array_keys( $paramMap ) );
5519  }
5520  return [ $this->mImageParams[$handlerClass], $this->mImageParamsMagicArray[$handlerClass] ];
5521  }
5522 
5531  public function makeImage( $title, $options, $holders = false ) {
5532  # Check if the options text is of the form "options|alt text"
5533  # Options are:
5534  # * thumbnail make a thumbnail with enlarge-icon and caption, alignment depends on lang
5535  # * left no resizing, just left align. label is used for alt= only
5536  # * right same, but right aligned
5537  # * none same, but not aligned
5538  # * ___px scale to ___ pixels width, no aligning. e.g. use in taxobox
5539  # * center center the image
5540  # * frame Keep original image size, no magnify-button.
5541  # * framed Same as "frame"
5542  # * frameless like 'thumb' but without a frame. Keeps user preferences for width
5543  # * upright reduce width for upright images, rounded to full __0 px
5544  # * border draw a 1px border around the image
5545  # * alt Text for HTML alt attribute (defaults to empty)
5546  # * class Set a class for img node
5547  # * link Set the target of the image link. Can be external, interwiki, or local
5548  # vertical-align values (no % or length right now):
5549  # * baseline
5550  # * sub
5551  # * super
5552  # * top
5553  # * text-top
5554  # * middle
5555  # * bottom
5556  # * text-bottom
5557 
5558  $parts = StringUtils::explode( "|", $options );
5559 
5560  # Give extensions a chance to select the file revision for us
5561  $options = [];
5562  $descQuery = false;
5563  Hooks::run( 'BeforeParserFetchFileAndTitle',
5564  [ $this, $title, &$options, &$descQuery ] );
5565  # Fetch and register the file (file title may be different via hooks)
5566  list( $file, $title ) = $this->fetchFileAndTitle( $title, $options );
5567 
5568  # Get parameter map
5569  $handler = $file ? $file->getHandler() : false;
5570 
5571  list( $paramMap, $mwArray ) = $this->getImageParams( $handler );
5572 
5573  if ( !$file ) {
5574  $this->addTrackingCategory( 'broken-file-category' );
5575  }
5576 
5577  # Process the input parameters
5578  $caption = '';
5579  $params = [ 'frame' => [], 'handler' => [],
5580  'horizAlign' => [], 'vertAlign' => [] ];
5581  $seenformat = false;
5582  foreach ( $parts as $part ) {
5583  $part = trim( $part );
5584  list( $magicName, $value ) = $mwArray->matchVariableStartToEnd( $part );
5585  $validated = false;
5586  if ( isset( $paramMap[$magicName] ) ) {
5587  list( $type, $paramName ) = $paramMap[$magicName];
5588 
5589  # Special case; width and height come in one variable together
5590  if ( $type === 'handler' && $paramName === 'width' ) {
5591  $parsedWidthParam = $this->parseWidthParam( $value );
5592  if ( isset( $parsedWidthParam['width'] ) ) {
5593  $width = $parsedWidthParam['width'];
5594  if ( $handler->validateParam( 'width', $width ) ) {
5595  $params[$type]['width'] = $width;
5596  $validated = true;
5597  }
5598  }
5599  if ( isset( $parsedWidthParam['height'] ) ) {
5600  $height = $parsedWidthParam['height'];
5601  if ( $handler->validateParam( 'height', $height ) ) {
5602  $params[$type]['height'] = $height;
5603  $validated = true;
5604  }
5605  }
5606  # else no validation -- bug 13436
5607  } else {
5608  if ( $type === 'handler' ) {
5609  # Validate handler parameter
5610  $validated = $handler->validateParam( $paramName, $value );
5611  } else {
5612  # Validate internal parameters
5613  switch ( $paramName ) {
5614  case 'manualthumb':
5615  case 'alt':
5616  case 'class':
5617  # @todo FIXME: Possibly check validity here for
5618  # manualthumb? downstream behavior seems odd with
5619  # missing manual thumbs.
5620  $validated = true;
5621  $value = $this->stripAltText( $value, $holders );
5622  break;
5623  case 'link':
5624  $chars = self::EXT_LINK_URL_CLASS;
5625  $addr = self::EXT_LINK_ADDR;
5626  $prots = $this->mUrlProtocols;
5627  if ( $value === '' ) {
5628  $paramName = 'no-link';
5629  $value = true;
5630  $validated = true;
5631  } elseif ( preg_match( "/^((?i)$prots)/", $value ) ) {
5632  if ( preg_match( "/^((?i)$prots)$addr$chars*$/u", $value, $m ) ) {
5633  $paramName = 'link-url';
5634  $this->mOutput->addExternalLink( $value );
5635  if ( $this->mOptions->getExternalLinkTarget() ) {
5636  $params[$type]['link-target'] = $this->mOptions->getExternalLinkTarget();
5637  }
5638  $validated = true;
5639  }
5640  } else {
5641  $linkTitle = Title::newFromText( $value );
5642  if ( $linkTitle ) {
5643  $paramName = 'link-title';
5644  $value = $linkTitle;
5645  $this->mOutput->addLink( $linkTitle );
5646  $validated = true;
5647  }
5648  }
5649  break;
5650  case 'frameless':
5651  case 'framed':
5652  case 'thumbnail':
5653  // use first appearing option, discard others.
5654  $validated = ! $seenformat;
5655  $seenformat = true;
5656  break;
5657  default:
5658  # Most other things appear to be empty or numeric...
5659  $validated = ( $value === false || is_numeric( trim( $value ) ) );
5660  }
5661  }
5662 
5663  if ( $validated ) {
5664  $params[$type][$paramName] = $value;
5665  }
5666  }
5667  }
5668  if ( !$validated ) {
5669  $caption = $part;
5670  }
5671  }
5672 
5673  # Process alignment parameters
5674  if ( $params['horizAlign'] ) {
5675  $params['frame']['align'] = key( $params['horizAlign'] );
5676  }
5677  if ( $params['vertAlign'] ) {
5678  $params['frame']['valign'] = key( $params['vertAlign'] );
5679  }
5680 
5681  $params['frame']['caption'] = $caption;
5682 
5683  # Will the image be presented in a frame, with the caption below?
5684  $imageIsFramed = isset( $params['frame']['frame'] )
5685  || isset( $params['frame']['framed'] )
5686  || isset( $params['frame']['thumbnail'] )
5687  || isset( $params['frame']['manualthumb'] );
5688 
5689  # In the old days, [[Image:Foo|text...]] would set alt text. Later it
5690  # came to also set the caption, ordinary text after the image -- which
5691  # makes no sense, because that just repeats the text multiple times in
5692  # screen readers. It *also* came to set the title attribute.
5693  # Now that we have an alt attribute, we should not set the alt text to
5694  # equal the caption: that's worse than useless, it just repeats the
5695  # text. This is the framed/thumbnail case. If there's no caption, we
5696  # use the unnamed parameter for alt text as well, just for the time be-
5697  # ing, if the unnamed param is set and the alt param is not.
5698  # For the future, we need to figure out if we want to tweak this more,
5699  # e.g., introducing a title= parameter for the title; ignoring the un-
5700  # named parameter entirely for images without a caption; adding an ex-
5701  # plicit caption= parameter and preserving the old magic unnamed para-
5702  # meter for BC; ...
5703  if ( $imageIsFramed ) { # Framed image
5704  if ( $caption === '' && !isset( $params['frame']['alt'] ) ) {
5705  # No caption or alt text, add the filename as the alt text so
5706  # that screen readers at least get some description of the image
5707  $params['frame']['alt'] = $title->getText();
5708  }
5709  # Do not set $params['frame']['title'] because tooltips don't make sense
5710  # for framed images
5711  } else { # Inline image
5712  if ( !isset( $params['frame']['alt'] ) ) {
5713  # No alt text, use the "caption" for the alt text
5714  if ( $caption !== '' ) {
5715  $params['frame']['alt'] = $this->stripAltText( $caption, $holders );
5716  } else {
5717  # No caption, fall back to using the filename for the
5718  # alt text
5719  $params['frame']['alt'] = $title->getText();
5720  }
5721  }
5722  # Use the "caption" for the tooltip text
5723  $params['frame']['title'] = $this->stripAltText( $caption, $holders );
5724  }
5725 
5726  Hooks::run( 'ParserMakeImageParams', [ $title, $file, &$params, $this ] );
5727 
5728  # Linker does the rest
5729  $time = isset( $options['time'] ) ? $options['time'] : false;
5730  $ret = Linker::makeImageLink( $this, $title, $file, $params['frame'], $params['handler'],
5731  $time, $descQuery, $this->mOptions->getThumbSize() );
5732 
5733  # Give the handler a chance to modify the parser object
5734  if ( $handler ) {
5735  $handler->parserTransformHook( $this, $file );
5736  }
5737 
5738  return $ret;
5739  }
5740 
5746  protected function stripAltText( $caption, $holders ) {
5747  # Strip bad stuff out of the title (tooltip). We can't just use
5748  # replaceLinkHoldersText() here, because if this function is called
5749  # from replaceInternalLinks2(), mLinkHolders won't be up-to-date.
5750  if ( $holders ) {
5751  $tooltip = $holders->replaceText( $caption );
5752  } else {
5753  $tooltip = $this->replaceLinkHoldersText( $caption );
5754  }
5755 
5756  # make sure there are no placeholders in thumbnail attributes
5757  # that are later expanded to html- so expand them now and
5758  # remove the tags
5759  $tooltip = $this->mStripState->unstripBoth( $tooltip );
5760  $tooltip = Sanitizer::stripAllTags( $tooltip );
5761 
5762  return $tooltip;
5763  }
5764 
5769  public function disableCache() {
5770  wfDebug( "Parser output marked as uncacheable.\n" );
5771  if ( !$this->mOutput ) {
5772  throw new MWException( __METHOD__ .
5773  " can only be called when actually parsing something" );
5774  }
5775  $this->mOutput->updateCacheExpiry( 0 ); // new style, for consistency
5776  }
5777 
5786  public function attributeStripCallback( &$text, $frame = false ) {
5787  $text = $this->replaceVariables( $text, $frame );
5788  $text = $this->mStripState->unstripBoth( $text );
5789  return $text;
5790  }
5791 
5797  public function getTags() {
5798  return array_merge(
5799  array_keys( $this->mTransparentTagHooks ),
5800  array_keys( $this->mTagHooks ),
5801  array_keys( $this->mFunctionTagHooks )
5802  );
5803  }
5804 
5815  public function replaceTransparentTags( $text ) {
5816  $matches = [];
5817  $elements = array_keys( $this->mTransparentTagHooks );
5818  $text = self::extractTagsAndParams( $elements, $text, $matches );
5819  $replacements = [];
5820 
5821  foreach ( $matches as $marker => $data ) {
5822  list( $element, $content, $params, $tag ) = $data;
5823  $tagName = strtolower( $element );
5824  if ( isset( $this->mTransparentTagHooks[$tagName] ) ) {
5825  $output = call_user_func_array(
5826  $this->mTransparentTagHooks[$tagName],
5827  [ $content, $params, $this ]
5828  );
5829  } else {
5830  $output = $tag;
5831  }
5832  $replacements[$marker] = $output;
5833  }
5834  return strtr( $text, $replacements );
5835  }
5836 
5866  private function extractSections( $text, $sectionId, $mode, $newText = '' ) {
5867  global $wgTitle; # not generally used but removes an ugly failure mode
5868 
5869  $magicScopeVariable = $this->lock();
5870  $this->startParse( $wgTitle, new ParserOptions, self::OT_PLAIN, true );
5871  $outText = '';
5872  $frame = $this->getPreprocessor()->newFrame();
5873 
5874  # Process section extraction flags
5875  $flags = 0;
5876  $sectionParts = explode( '-', $sectionId );
5877  $sectionIndex = array_pop( $sectionParts );
5878  foreach ( $sectionParts as $part ) {
5879  if ( $part === 'T' ) {
5880  $flags |= self::PTD_FOR_INCLUSION;
5881  }
5882  }
5883 
5884  # Check for empty input
5885  if ( strval( $text ) === '' ) {
5886  # Only sections 0 and T-0 exist in an empty document
5887  if ( $sectionIndex == 0 ) {
5888  if ( $mode === 'get' ) {
5889  return '';
5890  } else {
5891  return $newText;
5892  }
5893  } else {
5894  if ( $mode === 'get' ) {
5895  return $newText;
5896  } else {
5897  return $text;
5898  }
5899  }
5900  }
5901 
5902  # Preprocess the text
5903  $root = $this->preprocessToDom( $text, $flags );
5904 
5905  # <h> nodes indicate section breaks
5906  # They can only occur at the top level, so we can find them by iterating the root's children
5907  $node = $root->getFirstChild();
5908 
5909  # Find the target section
5910  if ( $sectionIndex == 0 ) {
5911  # Section zero doesn't nest, level=big
5912  $targetLevel = 1000;
5913  } else {
5914  while ( $node ) {
5915  if ( $node->getName() === 'h' ) {
5916  $bits = $node->splitHeading();
5917  if ( $bits['i'] == $sectionIndex ) {
5918  $targetLevel = $bits['level'];
5919  break;
5920  }
5921  }
5922  if ( $mode === 'replace' ) {
5923  $outText .= $frame->expand( $node, PPFrame::RECOVER_ORIG );
5924  }
5925  $node = $node->getNextSibling();
5926  }
5927  }
5928 
5929  if ( !$node ) {
5930  # Not found
5931  if ( $mode === 'get' ) {
5932  return $newText;
5933  } else {
5934  return $text;
5935  }
5936  }
5937 
5938  # Find the end of the section, including nested sections
5939  do {
5940  if ( $node->getName() === 'h' ) {
5941  $bits = $node->splitHeading();
5942  $curLevel = $bits['level'];
5943  if ( $bits['i'] != $sectionIndex && $curLevel <= $targetLevel ) {
5944  break;
5945  }
5946  }
5947  if ( $mode === 'get' ) {
5948  $outText .= $frame->expand( $node, PPFrame::RECOVER_ORIG );
5949  }
5950  $node = $node->getNextSibling();
5951  } while ( $node );
5952 
5953  # Write out the remainder (in replace mode only)
5954  if ( $mode === 'replace' ) {
5955  # Output the replacement text
5956  # Add two newlines on -- trailing whitespace in $newText is conventionally
5957  # stripped by the editor, so we need both newlines to restore the paragraph gap
5958  # Only add trailing whitespace if there is newText
5959  if ( $newText != "" ) {
5960  $outText .= $newText . "\n\n";
5961  }
5962 
5963  while ( $node ) {
5964  $outText .= $frame->expand( $node, PPFrame::RECOVER_ORIG );
5965  $node = $node->getNextSibling();
5966  }
5967  }
5968 
5969  if ( is_string( $outText ) ) {
5970  # Re-insert stripped tags
5971  $outText = rtrim( $this->mStripState->unstripBoth( $outText ) );
5972  }
5973 
5974  return $outText;
5975  }
5976 
5991  public function getSection( $text, $sectionId, $defaultText = '' ) {
5992  return $this->extractSections( $text, $sectionId, 'get', $defaultText );
5993  }
5994 
6007  public function replaceSection( $oldText, $sectionId, $newText ) {
6008  return $this->extractSections( $oldText, $sectionId, 'replace', $newText );
6009  }
6010 
6016  public function getRevisionId() {
6017  return $this->mRevisionId;
6018  }
6019 
6026  public function getRevisionObject() {
6027  if ( !is_null( $this->mRevisionObject ) ) {
6028  return $this->mRevisionObject;
6029  }
6030  if ( is_null( $this->mRevisionId ) ) {
6031  return null;
6032  }
6033 
6034  $rev = call_user_func(
6035  $this->mOptions->getCurrentRevisionCallback(), $this->getTitle(), $this
6036  );
6037 
6038  # If the parse is for a new revision, then the callback should have
6039  # already been set to force the object and should match mRevisionId.
6040  # If not, try to fetch by mRevisionId for sanity.
6041  if ( $rev && $rev->getId() != $this->mRevisionId ) {
6042  $rev = Revision::newFromId( $this->mRevisionId );
6043  }
6044 
6045  $this->mRevisionObject = $rev;
6046 
6047  return $this->mRevisionObject;
6048  }
6049 
6055  public function getRevisionTimestamp() {
6056  if ( is_null( $this->mRevisionTimestamp ) ) {
6058 
6059  $revObject = $this->getRevisionObject();
6060  $timestamp = $revObject ? $revObject->getTimestamp() : wfTimestampNow();
6061 
6062  # The cryptic '' timezone parameter tells to use the site-default
6063  # timezone offset instead of the user settings.
6064  # Since this value will be saved into the parser cache, served
6065  # to other users, and potentially even used inside links and such,
6066  # it needs to be consistent for all visitors.
6067  $this->mRevisionTimestamp = $wgContLang->userAdjust( $timestamp, '' );
6068 
6069  }
6070  return $this->mRevisionTimestamp;
6071  }
6072 
6078  public function getRevisionUser() {
6079  if ( is_null( $this->mRevisionUser ) ) {
6080  $revObject = $this->getRevisionObject();
6081 
6082  # if this template is subst: the revision id will be blank,
6083  # so just use the current user's name
6084  if ( $revObject ) {
6085  $this->mRevisionUser = $revObject->getUserText();
6086  } elseif ( $this->ot['wiki'] || $this->mOptions->getIsPreview() ) {
6087  $this->mRevisionUser = $this->getUser()->getName();
6088  }
6089  }
6090  return $this->mRevisionUser;
6091  }
6092 
6098  public function getRevisionSize() {
6099  if ( is_null( $this->mRevisionSize ) ) {
6100  $revObject = $this->getRevisionObject();
6101 
6102  # if this variable is subst: the revision id will be blank,
6103  # so just use the parser input size, because the own substituation
6104  # will change the size.
6105  if ( $revObject ) {
6106  $this->mRevisionSize = $revObject->getSize();
6107  } elseif ( $this->ot['wiki'] || $this->mOptions->getIsPreview() ) {
6108  $this->mRevisionSize = $this->mInputSize;
6109  }
6110  }
6111  return $this->mRevisionSize;
6112  }
6113 
6119  public function setDefaultSort( $sort ) {
6120  $this->mDefaultSort = $sort;
6121  $this->mOutput->setProperty( 'defaultsort', $sort );
6122  }
6123 
6134  public function getDefaultSort() {
6135  if ( $this->mDefaultSort !== false ) {
6136  return $this->mDefaultSort;
6137  } else {
6138  return '';
6139  }
6140  }
6141 
6148  public function getCustomDefaultSort() {
6149  return $this->mDefaultSort;
6150  }
6151 
6161  public function guessSectionNameFromWikiText( $text ) {
6162  # Strip out wikitext links(they break the anchor)
6163  $text = $this->stripSectionName( $text );
6165  return '#' . Sanitizer::escapeId( $text, 'noninitial' );
6166  }
6167 
6176  public function guessLegacySectionNameFromWikiText( $text ) {
6177  # Strip out wikitext links(they break the anchor)
6178  $text = $this->stripSectionName( $text );
6180  return '#' . Sanitizer::escapeId( $text, [ 'noninitial', 'legacy' ] );
6181  }
6182 
6197  public function stripSectionName( $text ) {
6198  # Strip internal link markup
6199  $text = preg_replace( '/\[\[:?([^[|]+)\|([^[]+)\]\]/', '$2', $text );
6200  $text = preg_replace( '/\[\[:?([^[]+)\|?\]\]/', '$1', $text );
6201 
6202  # Strip external link markup
6203  # @todo FIXME: Not tolerant to blank link text
6204  # I.E. [https://www.mediawiki.org] will render as [1] or something depending
6205  # on how many empty links there are on the page - need to figure that out.
6206  $text = preg_replace( '/\[(?i:' . $this->mUrlProtocols . ')([^ ]+?) ([^[]+)\]/', '$2', $text );
6207 
6208  # Parse wikitext quotes (italics & bold)
6209  $text = $this->doQuotes( $text );
6210 
6211  # Strip HTML tags
6212  $text = StringUtils::delimiterReplace( '<', '>', '', $text );
6213  return $text;
6214  }
6215 
6226  public function testSrvus( $text, Title $title, ParserOptions $options,
6227  $outputType = self::OT_HTML
6228  ) {
6229  $magicScopeVariable = $this->lock();
6230  $this->startParse( $title, $options, $outputType, true );
6231 
6232  $text = $this->replaceVariables( $text );
6233  $text = $this->mStripState->unstripBoth( $text );
6234  $text = Sanitizer::removeHTMLtags( $text );
6235  return $text;
6236  }
6237 
6244  public function testPst( $text, Title $title, ParserOptions $options ) {
6245  return $this->preSaveTransform( $text, $title, $options->getUser(), $options );
6246  }
6247 
6254  public function testPreprocess( $text, Title $title, ParserOptions $options ) {
6255  return $this->testSrvus( $text, $title, $options, self::OT_PREPROCESS );
6256  }
6257 
6274  public function markerSkipCallback( $s, $callback ) {
6275  $i = 0;
6276  $out = '';
6277  while ( $i < strlen( $s ) ) {
6278  $markerStart = strpos( $s, self::MARKER_PREFIX, $i );
6279  if ( $markerStart === false ) {
6280  $out .= call_user_func( $callback, substr( $s, $i ) );
6281  break;
6282  } else {
6283  $out .= call_user_func( $callback, substr( $s, $i, $markerStart - $i ) );
6284  $markerEnd = strpos( $s, self::MARKER_SUFFIX, $markerStart );
6285  if ( $markerEnd === false ) {
6286  $out .= substr( $s, $markerStart );
6287  break;
6288  } else {
6289  $markerEnd += strlen( self::MARKER_SUFFIX );
6290  $out .= substr( $s, $markerStart, $markerEnd - $markerStart );
6291  $i = $markerEnd;
6292  }
6293  }
6294  }
6295  return $out;
6296  }
6297 
6304  public function killMarkers( $text ) {
6305  return $this->mStripState->killMarkers( $text );
6306  }
6307 
6324  public function serializeHalfParsedText( $text ) {
6325  $data = [
6326  'text' => $text,
6327  'version' => self::HALF_PARSED_VERSION,
6328  'stripState' => $this->mStripState->getSubState( $text ),
6329  'linkHolders' => $this->mLinkHolders->getSubArray( $text )
6330  ];
6331  return $data;
6332  }
6333 
6349  public function unserializeHalfParsedText( $data ) {
6350  if ( !isset( $data['version'] ) || $data['version'] != self::HALF_PARSED_VERSION ) {
6351  throw new MWException( __METHOD__ . ': invalid version' );
6352  }
6353 
6354  # First, extract the strip state.
6355  $texts = [ $data['text'] ];
6356  $texts = $this->mStripState->merge( $data['stripState'], $texts );
6357 
6358  # Now renumber links
6359  $texts = $this->mLinkHolders->mergeForeign( $data['linkHolders'], $texts );
6360 
6361  # Should be good to go.
6362  return $texts[0];
6363  }
6364 
6374  public function isValidHalfParsedText( $data ) {
6375  return isset( $data['version'] ) && $data['version'] == self::HALF_PARSED_VERSION;
6376  }
6377 
6386  public function parseWidthParam( $value ) {
6387  $parsedWidthParam = [];
6388  if ( $value === '' ) {
6389  return $parsedWidthParam;
6390  }
6391  $m = [];
6392  # (bug 13500) In both cases (width/height and width only),
6393  # permit trailing "px" for backward compatibility.
6394  if ( preg_match( '/^([0-9]*)x([0-9]*)\s*(?:px)?\s*$/', $value, $m ) ) {
6395  $width = intval( $m[1] );
6396  $height = intval( $m[2] );
6397  $parsedWidthParam['width'] = $width;
6398  $parsedWidthParam['height'] = $height;
6399  } elseif ( preg_match( '/^[0-9]*\s*(?:px)?\s*$/', $value ) ) {
6400  $width = intval( $value );
6401  $parsedWidthParam['width'] = $width;
6402  }
6403  return $parsedWidthParam;
6404  }
6405 
6415  protected function lock() {
6416  if ( $this->mInParse ) {
6417  throw new MWException( "Parser state cleared while parsing. "
6418  . "Did you call Parser::parse recursively?" );
6419  }
6420  $this->mInParse = true;
6421 
6422  $recursiveCheck = new ScopedCallback( function() {
6423  $this->mInParse = false;
6424  } );
6425 
6426  return $recursiveCheck;
6427  }
6428 
6439  public static function stripOuterParagraph( $html ) {
6440  $m = [];
6441  if ( preg_match( '/^<p>(.*)\n?<\/p>\n?$/sU', $html, $m ) ) {
6442  if ( strpos( $m[1], '</p>' ) === false ) {
6443  $html = $m[1];
6444  }
6445  }
6446 
6447  return $html;
6448  }
6449 
6460  public function getFreshParser() {
6461  global $wgParserConf;
6462  if ( $this->mInParse ) {
6463  return new $wgParserConf['class']( $wgParserConf );
6464  } else {
6465  return $this;
6466  }
6467  }
6468 
6475  public function enableOOUI() {
6477  $this->mOutput->setEnableOOUI( true );
6478  }
6479 }
getRevisionObject()
Get the revision object for $this->mRevisionId.
Definition: Parser.php:6026
setTitle($t)
Set the context title.
Definition: Parser.php:740
$mAutonumber
Definition: Parser.php:184
markerSkipCallback($s, $callback)
Call a callback function on all regions of the given text that are not inside strip markers...
Definition: Parser.php:6274
#define the
table suitable for use with IDatabase::select()
$mPPNodeCount
Definition: Parser.php:198
replaceInternalLinks2(&$s)
Process [[ ]] wikilinks (RIL)
Definition: Parser.php:2046
static getVariableIDs()
Get an array of parser variable IDs.
Definition: MagicWord.php:271
you don t have to do a grep find to see where the $wgReverseTitle variable is used
Definition: hooks.txt:117
const MARKER_PREFIX
Definition: Parser.php:141
deferred txt A few of the database updates required by various functions here can be deferred until after the result page is displayed to the user For updating the view updating the linked to tables after a etc PHP does not yet have any way to tell the server to actually return and disconnect while still running these but it might have such a feature in the future We handle these by creating a deferred update object and putting those objects on a global then executing the whole list after the page is displayed We don t do anything smart like collating updates to the same table or such because the list is almost always going to have just one item on if that
Definition: deferred.txt:11
external whereas SearchGetNearMatch runs after $term
Definition: hooks.txt:2558
isValidHalfParsedText($data)
Returns true if the given array, presumed to be generated by serializeHalfParsedText(), is compatible with the current version of the parser.
Definition: Parser.php:6374
null means default in associative array form
Definition: hooks.txt:1798
null means default in associative array with keys and values unescaped Should be merged with default with a value of false meaning to suppress the attribute in associative array with keys and values unescaped noclasses just before the function returns a value If you return an< a > element with HTML attributes $attribs and contents $html will be returned If you return $ret will be returned and may include noclasses & $html
Definition: hooks.txt:1798
static tocLineEnd()
End a Table Of Contents line.
Definition: Linker.php:1734
deferred txt A few of the database updates required by various functions here can be deferred until after the result page is displayed to the user For updating the view updating the linked to tables after a etc PHP does not yet have any way to tell the server to actually return and disconnect while still running these but it might have such a feature in the future We handle these by creating a deferred update object and putting those objects on a global list
Definition: deferred.txt:11
getSection($text, $sectionId, $defaultText= '')
This function returns the text of a section, specified by a number ($section).
Definition: Parser.php:5991
static decodeTagAttributes($text)
Return an associative array of attribute names and values from a partial tag string.
Definition: Sanitizer.php:1249
$mTplRedirCache
Definition: Parser.php:200
killMarkers($text)
Remove any strip markers found in the given text.
Definition: Parser.php:6304
wfGetDB($db, $groups=[], $wiki=false)
Get a Database object.
static tocList($toc, $lang=false)
Wraps the TOC in a table and provides the hide/collapse javascript.
Definition: Linker.php:1745
fetchTemplateAndTitle($title)
Fetch the unparsed text of a template and register a reference to it.
Definition: Parser.php:3932
this hook is for auditing only or null if authentication failed before getting that far or null if we can t even determine that probably a stub it is not rendered in wiki pages or galleries in category pages allow injecting custom HTML after the section Any uses of the hook need to handle escaping see BaseTemplate::getToolbox and BaseTemplate::makeListItem for details on the format of individual items inside of this array or by returning and letting standard HTTP rendering take place modifiable or by returning false and taking over the output $out
Definition: hooks.txt:762
getRevisionUser()
Get the name of the user that edited the last revision.
Definition: Parser.php:6078
setFunctionTagHook($tag, $callback, $flags)
Create a tag function, e.g.
Definition: Parser.php:5272
the array() calling protocol came about after MediaWiki 1.4rc1.
stripSectionName($text)
Strips a text string of wikitext for use in a section anchor.
Definition: Parser.php:6197
null for the local wiki Added should default to null in handler for backwards compatibility add a value to it if you want to add a cookie that have to vary cache options can modify $query
Definition: hooks.txt:1418
const OT_PREPROCESS
Definition: Defines.php:228
$mLastSection
Definition: Parser.php:191
static linkKnown($target, $html=null, $customAttribs=[], $query=[], $options=[ 'known', 'noclasses'])
Identical to link(), except $options defaults to 'known'.
Definition: Linker.php:264
$mDoubleUnderscores
Definition: Parser.php:200
magic word the default is to use $key to get the and $key value or $key value text $key value html to format the value $key
Definition: hooks.txt:2321
Group all the pieces relevant to the context of a request into one instance.
getPreloadText($text, Title $title, ParserOptions $options, $params=[])
Process the wikitext for the "?preload=" feature.
Definition: Parser.php:688
$context
Definition: load.php:44
validateSig($text)
Check that the user's signature contains no bad XML.
Definition: Parser.php:4986
MapCacheLRU null $currentRevisionCache
Definition: Parser.php:250
$wgSitename
Name of the site.
renderImageGallery($text, $params)
Renders an image gallery from a text with one line per image.
Definition: Parser.php:5323
recursivePreprocess($text, $frame=false)
Recursive parser entry point that can be called from an extension tag hook.
Definition: Parser.php:669
replaceExternalLinks($text)
Replace external links (REL)
Definition: Parser.php:1776
static isNonincludable($index)
It is not possible to use pages from this namespace as template?
nextLinkID()
Definition: Parser.php:829
const SPACE_NOT_NL
Definition: Parser.php:100
static replaceUnusualEscapes($url)
Replace unusual escape codes in a URL with their equivalent characters.
Definition: Parser.php:1901
getImageParams($handler)
Definition: Parser.php:5483
Apache License January AND DISTRIBUTION Definitions License shall mean the terms and conditions for use
doHeadings($text)
Parse headers and return html.
Definition: Parser.php:1555
static getTitleFor($name, $subpage=false, $fragment= '')
Get a localised Title object for a specified special page name.
Definition: SpecialPage.php:75
const OT_PLAIN
Definition: Parser.php:121
getTags()
Accessor.
Definition: Parser.php:5797
findColonNoLinks($str, &$before, &$after)
Split up a string on ':', ignoring any occurrences inside tags to prevent illegal overlapping...
Definition: Parser.php:2759
static isWellFormedXmlFragment($text)
Check if a string is a well-formed XML fragment.
Definition: Xml.php:735
const OT_WIKI
Definition: Parser.php:118
div flags Integer display flags(NO_ACTION_LINK, NO_EXTRA_USER_LINKS) 'LogException'returning false will NOT prevent logging $e
Definition: hooks.txt:1932
fetchFileAndTitle($title, $options=[])
Fetch a file and its title and register a reference to it.
Definition: Parser.php:4074
User $mUser
Definition: Parser.php:207
We use the convention $dbr for read and $dbw for write to help you keep track of whether the database object is a the world will explode Or to be a subsequent write query which succeeded on the master may fail when replicated to the slave due to a unique key collision Replication on the slave will stop and it may take hours to repair the database and get it back online Setting read_only in my cnf on the slave will avoid this but given the dire we prefer to have as many checks as possible We provide a but the wrapper functions like please read the documentation for except in special pages derived from QueryPage It s a common pitfall for new developers to submit code containing SQL queries which examine huge numbers of rows Remember that COUNT * is(N), counting rows in atable is like counting beans in a bucket.------------------------------------------------------------------------Replication------------------------------------------------------------------------The largest installation of MediaWiki, Wikimedia, uses a large set ofslave MySQL servers replicating writes made to a master MySQL server.Itis important to understand the issues associated with this setup if youwant to write code destined for Wikipedia.It's often the case that the best algorithm to use for a given taskdepends on whether or not replication is in use.Due to our unabashedWikipedia-centrism, we often just use the replication-friendly version, but if you like, you can use wfGetLB() ->getServerCount() > 1 tocheck to see if replication is in use.===Lag===Lag primarily occurs when large write queries are sent to the master.Writes on the master are executed in parallel, but they are executed inserial when they are replicated to the slaves.The master writes thequery to the binlog when the transaction is committed.The slaves pollthe binlog and start executing the query as soon as it appears.They canservice reads while they are performing a write query, but will not readanything more from the binlog and thus will perform no more writes.Thismeans that if the write query runs for a long time, the slaves will lagbehind the master for the time it takes for the write query to complete.Lag can be exacerbated by high read load.MediaWiki's load balancer willstop sending reads to a slave when it is lagged by more than 30 seconds.If the load ratios are set incorrectly, or if there is too much loadgenerally, this may lead to a slave permanently hovering around 30seconds lag.If all slaves are lagged by more than 30 seconds, MediaWiki will stopwriting to the database.All edits and other write operations will berefused, with an error returned to the user.This gives the slaves achance to catch up.Before we had this mechanism, the slaves wouldregularly lag by several minutes, making review of recent editsdifficult.In addition to this, MediaWiki attempts to ensure that the user seesevents occurring on the wiki in chronological order.A few seconds of lagcan be tolerated, as long as the user sees a consistent picture fromsubsequent requests.This is done by saving the master binlog positionin the session, and then at the start of each request, waiting for theslave to catch up to that position before doing any reads from it.Ifthis wait times out, reads are allowed anyway, but the request isconsidered to be in"lagged slave mode".Lagged slave mode can bechecked by calling wfGetLB() ->getLaggedSlaveMode().The onlypractical consequence at present is a warning displayed in the pagefooter.===Lag avoidance===To avoid excessive lag, queries which write large numbers of rows shouldbe split up, generally to write one row at a time.Multi-row INSERT...SELECT queries are the worst offenders should be avoided altogether.Instead do the select first and then the insert.===Working with lag===Despite our best efforts, it's not practical to guarantee a low-lagenvironment.Lag will usually be less than one second, but mayoccasionally be up to 30 seconds.For scalability, it's very importantto keep load on the master low, so simply sending all your queries tothe master is not the answer.So when you have a genuine need forup-to-date data, the following approach is advised:1) Do a quick query to the master for a sequence number or timestamp 2) Run the full query on the slave and check if it matches the data you gotfrom the master 3) If it doesn't, run the full query on the masterTo avoid swamping the master every time the slaves lag, use of thisapproach should be kept to a minimum.In most cases you should just readfrom the slave and let the user deal with the delay.------------------------------------------------------------------------Lock contention------------------------------------------------------------------------Due to the high write rate on Wikipedia(and some other wikis), MediaWiki developers need to be very careful to structure their writesto avoid long-lasting locks.By default, MediaWiki opens a transactionat the first query, and commits it before the output is sent.Locks willbe held from the time when the query is done until the commit.So youcan reduce lock time by doing as much processing as possible before youdo your write queries.Often this approach is not good enough, and it becomes necessary toenclose small groups of queries in their own transaction.Use thefollowing syntax:$dbw=wfGetDB(DB_MASTER
initialiseVariables()
initialise the magic variables (like CURRENTMONTHNAME) and substitution modifiers ...
Definition: Parser.php:3269
null means default in associative array with keys and values unescaped Should be merged with default with a value of false meaning to suppress the attribute in associative array with keys and values unescaped noclasses & $ret
Definition: hooks.txt:1798
static isEnabled()
Definition: MWTidy.php:92
Set options of the Parser.
static tidy($text)
Interface with html tidy.
Definition: MWTidy.php:45
getFunctionHooks()
Get all registered function hook identifiers.
Definition: Parser.php:5258
globals txt Globals are evil The original MediaWiki code relied on globals for processing context far too often MediaWiki development since then has been a story of slowly moving context out of global variables and into objects Storing processing context in object member variables allows those objects to be reused in a much more flexible way Consider the elegance of
database rows
Definition: globals.txt:10
wfHostname()
Fetch server name for use in error reporting etc.
getFunctionLang()
Get a language object for use in parser functions such as {{FORMATNUM:}}.
Definition: Parser.php:844
processing should stop and the error should be shown to the user * false
Definition: hooks.txt:189
argSubstitution($piece, $frame)
Triple brace replacement – used for template arguments.
Definition: Parser.php:4177
testSrvus($text, Title $title, ParserOptions $options, $outputType=self::OT_HTML)
strip/replaceVariables/unstrip for preprocessor regression testing
Definition: Parser.php:6226
uniqPrefix()
Accessor for mUniqPrefix.
Definition: Parser.php:730
const TOC_START
Definition: Parser.php:144
Title($x=null)
Accessor/mutator for the Title object.
Definition: Parser.php:768
SectionProfiler $mProfiler
Definition: Parser.php:259
$sort
fetchFileNoRegister($title, $options=[])
Helper function for fetchFileAndTitle.
Definition: Parser.php:4099
null for the local wiki Added in
Definition: hooks.txt:1418
There are three types of nodes:
$mHeadings
Definition: Parser.php:200
$value
clearTagHooks()
Remove all tag hooks.
Definition: Parser.php:5171
const COLON_STATE_TAGSLASH
Definition: Parser.php:107
static makeSelfLinkObj($nt, $html= '', $query= '', $trail= '', $prefix= '')
Make appropriate markup for a link to the current article.
Definition: Linker.php:409
const NS_SPECIAL
Definition: Defines.php:58
clearState()
Clear Parser state.
Definition: Parser.php:343
__construct($conf=[])
Definition: Parser.php:264
const EXT_LINK_ADDR
Definition: Parser.php:92
$mFirstCall
Definition: Parser.php:159
interwikiTransclude($title, $action)
Transclude an interwiki link.
Definition: Parser.php:4118
pstPass2($text, $user)
Pre-save transform helper function.
Definition: Parser.php:4858
guessLegacySectionNameFromWikiText($text)
Same as guessSectionNameFromWikiText(), but produces legacy anchors instead.
Definition: Parser.php:6176
wfUrlProtocolsWithoutProtRel()
Like wfUrlProtocols(), but excludes '//' from the protocol list.
Options($x=null)
Accessor/mutator for the ParserOptions object.
Definition: Parser.php:822
it s the revision text itself In either if gzip is the revision text is gzipped $flags
Definition: hooks.txt:2548
serializeHalfParsedText($text)
Save the parser state required to convert the given half-parsed text to HTML.
Definition: Parser.php:6324
replaceLinkHolders(&$text, $options=0)
Replace "" link placeholders with actual links, in the buffer Placeholders created in Link...
Definition: Parser.php:5295
static activeUsers()
Definition: SiteStats.php:161
$mLinkID
Definition: Parser.php:197
doQuotes($text)
Helper function for doAllQuotes()
Definition: Parser.php:1588
preprocessToDom($text, $flags=0)
Preprocess some wikitext and return the document tree.
Definition: Parser.php:3299
limitationWarn($limitationType, $current= '', $max= '')
Warn the user when a parser limitation is reached Will warn at most once the user per limitation type...
Definition: Parser.php:3421
static cleanUrl($url)
Definition: Sanitizer.php:1818
wfUrlencode($s)
We want some things to be included as literal characters in our title URLs for prettiness, which urlencode encodes by default.
static newFromText($text, $defaultNamespace=NS_MAIN)
Create a new Title from text, such as what one would find in a link.
Definition: Title.php:277
$mGeneratedPPNodeCount
Definition: Parser.php:198
Represents a title within MediaWiki.
Definition: Title.php:34
static getRandomString()
Get a random string.
Definition: Parser.php:709
$mRevisionId
Definition: Parser.php:224
static stripAllTags($text)
Take a fragment of (potentially invalid) HTML and return a version with any tags removed, encoded as plain text.
Definition: Sanitizer.php:1785
when a variable name is used in a it is silently declared as a new local masking the global
Definition: design.txt:93
doBlockLevels($text, $linestart)
#@-
Definition: Parser.php:2544
$wgArticlePath
Definition: img_auth.php:45
OutputType($x=null)
Accessor/mutator for the output type.
Definition: Parser.php:794
const NS_TEMPLATE
Definition: Defines.php:79
static newFromTitle(LinkTarget $linkTarget, $id=0, $flags=0)
Load either the current, or a specified, revision that's attached to a given link target...
Definition: Revision.php:117
const COLON_STATE_COMMENTDASHDASH
Definition: Parser.php:110
getVariableValue($index, $frame=false)
Return value of a magic variable (like PAGENAME)
Definition: Parser.php:2917
recursiveTagParse($text, $frame=false)
Half-parse wikitext to half-parsed HTML.
Definition: Parser.php:604
const NO_ARGS
magic word & $parser
Definition: hooks.txt:2321
MagicWordArray $mVariables
Definition: Parser.php:166
static validateTagAttributes($attribs, $element)
Take an array of attribute names and values and normalize or discard illegal values for the given ele...
Definition: Sanitizer.php:716
const SFH_NO_HASH
Definition: Parser.php:82
const COLON_STATE_COMMENTDASH
Definition: Parser.php:109
globals will be eliminated from MediaWiki replaced by an application object which would be passed to constructors Whether that would be an convenient solution remains to be but certainly PHP makes such object oriented programming models easier than they were in previous versions For the time being MediaWiki programmers will have to work in an environment with some global context At the time of globals were initialised on startup by MediaWiki of these were configuration which are documented in DefaultSettings php There is no comprehensive documentation for the remaining however some of the most important ones are listed below They are typically initialised either in index php or in Setup php For a description of the see design txt $wgTitle Title object created from the request URL $wgOut OutputPage object for HTTP response $wgUser User object for the user associated with the current request $wgLang Language object selected by user preferences $wgContLang Language object associated with the wiki being viewed $wgParser Parser object Parser extensions register their hooks here $wgRequest WebRequest object
Definition: globals.txt:25
wfRandomString($length=32)
Get a random string containing a number of pseudo-random hex characters.
$mForceTocPosition
Definition: Parser.php:202
preprocess($text, Title $title=null, ParserOptions $options, $revid=null, $frame=false)
Expand templates and variables in the text, producing valid, static wikitext.
Definition: Parser.php:645
static getCacheTTL($id)
Allow external reads of TTL array.
Definition: MagicWord.php:294
getRevisionId()
Get the ID of the revision we are parsing.
Definition: Parser.php:6016
const OT_PREPROCESS
Definition: Parser.php:119
see documentation in includes Linker php for Linker::makeImageLink & $time
Definition: hooks.txt:1612
maybeDoSubpageLink($target, &$text)
Handle link to subpage if necessary.
Definition: Parser.php:2411
$mFunctionSynonyms
Definition: Parser.php:151
If you want to remove the page from your watchlist later
replaceLinkHoldersText($text)
Replace "" link placeholders with plain text of links (not HTML-formatted).
Definition: Parser.php:5306
setLinkID($id)
Definition: Parser.php:836
$mOutputType
Definition: Parser.php:221
wfDebug($text, $dest= 'all', array $context=[])
Sends a line to the debug log if enabled or, optionally, to a comment in output.
$mDefaultStripList
Definition: Parser.php:154
static createAssocArgs($args)
Clean up argument array - refactored in 1.9 so parserfunctions can use it, too.
Definition: Parser.php:3373
$mExtLinkBracketedRegex
Definition: Parser.php:173
The index of the header message $result[1]=The index of the body text message $result[2 through n]=Parameters passed to body text message.Please note the header message cannot receive/use parameters. 'ImportHandleLogItemXMLTag':When parsing a XML tag in a log item.Return false to stop further processing of the tag $reader:XMLReader object $logInfo:Array of information 'ImportHandlePageXMLTag':When parsing a XML tag in a page.Return false to stop further processing of the tag $reader:XMLReader object &$pageInfo:Array of information 'ImportHandleRevisionXMLTag':When parsing a XML tag in a page revision.Return false to stop further processing of the tag $reader:XMLReader object $pageInfo:Array of page information $revisionInfo:Array of revision information 'ImportHandleToplevelXMLTag':When parsing a top level XML tag.Return false to stop further processing of the tag $reader:XMLReader object 'ImportHandleUploadXMLTag':When parsing a XML tag in a file upload.Return false to stop further processing of the tag $reader:XMLReader object $revisionInfo:Array of information 'ImportLogInterwikiLink':Hook to change the interwiki link used in log entries and edit summaries for transwiki imports.&$fullInterwikiPrefix:Interwiki prefix, may contain colons.&$pageTitle:String that contains page title. 'ImportSources':Called when reading from the $wgImportSources configuration variable.Can be used to lazy-load the import sources list.&$importSources:The value of $wgImportSources.Modify as necessary.See the comment in DefaultSettings.php for the detail of how to structure this array. 'InfoAction':When building information to display on the action=info page.$context:IContextSource object &$pageInfo:Array of information 'InitializeArticleMaybeRedirect':MediaWiki check to see if title is a redirect.&$title:Title object for the current page &$request:WebRequest &$ignoreRedirect:boolean to skip redirect check &$target:Title/string of redirect target &$article:Article object 'InternalParseBeforeLinks':during Parser's internalParse method before links but after nowiki/noinclude/includeonly/onlyinclude and other processings.&$parser:Parser object &$text:string containing partially parsed text &$stripState:Parser's internal StripState object 'InternalParseBeforeSanitize':during Parser's internalParse method just before the parser removes unwanted/dangerous HTML tags and after nowiki/noinclude/includeonly/onlyinclude and other processings.Ideal for syntax-extensions after template/parser function execution which respect nowiki and HTML-comments.&$parser:Parser object &$text:string containing partially parsed text &$stripState:Parser's internal StripState object 'InterwikiLoadPrefix':When resolving if a given prefix is an interwiki or not.Return true without providing an interwiki to continue interwiki search.$prefix:interwiki prefix we are looking for.&$iwData:output array describing the interwiki with keys iw_url, iw_local, iw_trans and optionally iw_api and iw_wikiid. 'InvalidateEmailComplete':Called after a user's email has been invalidated successfully.$user:user(object) whose email is being invalidated 'IRCLineURL':When constructing the URL to use in an IRC notification.Callee may modify $url and $query, URL will be constructed as $url.$query &$url:URL to index.php &$query:Query string $rc:RecentChange object that triggered url generation 'IsFileCacheable':Override the result of Article::isFileCacheable()(if true) &$article:article(object) being checked 'IsTrustedProxy':Override the result of IP::isTrustedProxy() &$ip:IP being check &$result:Change this value to override the result of IP::isTrustedProxy() 'IsUploadAllowedFromUrl':Override the result of UploadFromUrl::isAllowedUrl() $url:URL used to upload from &$allowed:Boolean indicating if uploading is allowed for given URL 'isValidEmailAddr':Override the result of Sanitizer::validateEmail(), for instance to return false if the domain name doesn't match your organization.$addr:The e-mail address entered by the user &$result:Set this and return false to override the internal checks 'isValidPassword':Override the result of User::isValidPassword() $password:The password entered by the user &$result:Set this and return false to override the internal checks $user:User the password is being validated for 'Language::getMessagesFileName':$code:The language code or the language we're looking for a messages file for &$file:The messages file path, you can override this to change the location. 'LanguageGetMagic':DEPRECATED!Use $magicWords in a file listed in $wgExtensionMessagesFiles instead.Use this to define synonyms of magic words depending of the language &$magicExtensions:associative array of magic words synonyms $lang:language code(string) 'LanguageGetNamespaces':Provide custom ordering for namespaces or remove namespaces.Do not use this hook to add namespaces.Use CanonicalNamespaces for that.&$namespaces:Array of namespaces indexed by their numbers 'LanguageGetSpecialPageAliases':DEPRECATED!Use $specialPageAliases in a file listed in $wgExtensionMessagesFiles instead.Use to define aliases of special pages names depending of the language &$specialPageAliases:associative array of magic words synonyms $lang:language code(string) 'LanguageGetTranslatedLanguageNames':Provide translated language names.&$names:array of language code=> language name $code:language of the preferred translations 'LanguageLinks':Manipulate a page's language links.This is called in various places to allow extensions to define the effective language links for a page.$title:The page's Title.&$links:Associative array mapping language codes to prefixed links of the form"language:title".&$linkFlags:Associative array mapping prefixed links to arrays of flags.Currently unused, but planned to provide support for marking individual language links in the UI, e.g.for featured articles. 'LanguageSelector':Hook to change the language selector available on a page.$out:The output page.$cssClassName:CSS class name of the language selector. 'LinkBegin':Used when generating internal and interwiki links in Linker::link(), before processing starts.Return false to skip default processing and return $ret.See documentation for Linker::link() for details on the expected meanings of parameters.$skin:the Skin object $target:the Title that the link is pointing to &$html:the contents that the< a > tag should have(raw HTML) $result
Definition: hooks.txt:1796
if($line===false) $args
Definition: cdb.php:64
the value to return A Title object or null for latest to be modified or replaced by the hook handler or if authentication is not possible after cache objects are set for highlighting & $link
Definition: hooks.txt:2581
static getLocalInstance($ts=false)
Get a timestamp instance in the server local timezone ($wgLocaltimezone)
const COLON_STATE_TAG
Definition: Parser.php:104
static getDoubleUnderscoreArray()
Get a MagicWordArray of double-underscore entities.
Definition: MagicWord.php:307
static splitTrail($trail)
Split a link trail, return the "inside" portion and the remainder of the trail as a two-element array...
Definition: Linker.php:1818
getTemplateDom($title)
Get the semi-parsed DOM representation of a template with a given title, and its redirect destination...
Definition: Parser.php:3857
static decodeCharReferences($text)
Decode any character references, numeric or named entities, in the text and return a UTF-8 string...
Definition: Sanitizer.php:1462
openList($char)
These next three functions open, continue, and close the list element appropriate to the prefix chara...
Definition: Parser.php:2465
cleanSig($text, $parsing=false)
Clean up signature text.
Definition: Parser.php:5000
wfTimestamp($outputtype=TS_UNIX, $ts=0)
Get a timestamp string in one of various formats.
static factory($mode=false, IContextSource $context=null)
Get a new image gallery.
$wgLanguageCode
Site language code.
Custom PHP profiler for parser/DB type section names that xhprof/xdebug can't handle.
static edits()
Definition: SiteStats.php:129
Class for asserting that a callback happens when an dummy object leaves scope.
$wgExtraInterlanguageLinkPrefixes
List of additional interwiki prefixes that should be treated as interlanguage links (i...
startExternalParse(Title $title=null, ParserOptions $options, $outputType, $clearState=true)
Set up some variables which are usually set up in parse() so that an external function can call some ...
Definition: Parser.php:5051
wfCgiToArray($query)
This is the logical opposite of wfArrayToCgi(): it accepts a query string as its argument and returns...
wfDebugLog($logGroup, $text, $dest= 'all', array $context=[])
Send a line to a supplementary debug log file, if configured, or main debug log if not...
static capturePath(Title $title, IContextSource $context)
Just like executePath() but will override global variables and execute the page in "inclusion" mode...
const NO_TEMPLATES
addTrackingCategory($msg)
Definition: Parser.php:4396
replaceInternalLinks($s)
Process [[ ]] wikilinks.
Definition: Parser.php:2033
$mVarCache
Definition: Parser.php:155
$wgStylePath
The URL path of the skins directory.
disableCache()
Set a flag in the output object indicating that the content is dynamic and shouldn't be cached...
Definition: Parser.php:5769
$mRevisionObject
Definition: Parser.php:223
static normalizeSectionNameWhitespace($section)
Normalizes whitespace in a section name, such as might be returned by Parser::stripSectionName(), for use in the id's that are used for section links.
Definition: Sanitizer.php:1343
internalParse($text, $isMain=true, $frame=false)
Helper function for parse() that transforms wiki markup into half-parsed HTML.
Definition: Parser.php:1226
Title $mTitle
Definition: Parser.php:220
static delimiterReplace($startDelim, $endDelim, $replace, $subject, $flags= '')
Perform an operation equivalent to preg_replace() with flags.
__destruct()
Reduce memory usage to reduce the impact of circular references.
Definition: Parser.php:290
wfEscapeWikiText($text)
Escapes the given text so that it may be output using addWikiText() without any linking, formatting, etc.
getRevisionTimestamp()
Get the timestamp associated with the current revision, adjusted for the default server-local timesta...
Definition: Parser.php:6055
static stripOuterParagraph($html)
Strip outer.
Definition: Parser.php:6439
static register($parser)
$mRevIdForTs
Definition: Parser.php:228
static singleton()
Get an instance of this class.
Definition: LinkCache.php:61
design txt This is a brief overview of the new design More thorough and up to date information is available on the documentation wiki at etc Handles the details of getting and saving to the user table of the and dealing with sessions and cookies OutputPage Encapsulates the entire HTML page that will be sent in response to any server request It is used by calling its functions to add in any and then calling but I prefer the flexibility This should also do the output encoding The system allocates a global one in $wgOut Title Represents the title of an and does all the work of translating among various forms such as plain database key
Definition: design.txt:25
static normalizeSubpageLink($contextTitle, $target, &$text)
Definition: Linker.php:1547
parseWidthParam($value)
Parsed a width param of imagelink like 300px or 200x300px.
Definition: Parser.php:6386
$mStripList
Definition: Parser.php:153
$mFunctionTagHooks
Definition: Parser.php:152
fetchScaryTemplateMaybeFromCache($url)
Definition: Parser.php:4137
const OT_PLAIN
Definition: Defines.php:230
fetchCurrentRevisionOfTitle($title)
Fetch the current revision of a given title.
Definition: Parser.php:3900
$mRevisionTimestamp
Definition: Parser.php:225
$mImageParams
Definition: Parser.php:156
stripAltText($caption, $holders)
Definition: Parser.php:5746
doAllQuotes($text)
Replace single quotes with HTML markup.
Definition: Parser.php:1571
static replaceMarkup($search, $replace, $text)
More or less "markup-safe" str_replace() Ignores any instances of the separator inside <...
static normalizeUrlComponent($component, $unsafe)
Definition: Parser.php:1951
if($limit) $timestamp
const VERSION
Update this version number when the ParserOutput format changes in an incompatible way...
Definition: Parser.php:73
this hook is for auditing only RecentChangesLinked and Watchlist RecentChangesLinked and Watchlist e g Watchlist removed from all revisions and log entries to which it was applied This gives extensions a chance to take it off their books as the deletion has already been partly carried out by this point or something similar the user will be unable to create the tag set and then return false from the hook function Ensure you consume the ChangeTagAfterDelete hook to carry out custom deletion actions as context called by AbstractContent::getParserOutput May be used to override the normal model specific rendering of page content as context as context $options
Definition: hooks.txt:1004
$mInPre
Definition: Parser.php:191
setHook($tag, $callback)
Create an HTML-style tag, e.g.
Definition: Parser.php:5126
const OT_WIKI
Definition: Defines.php:227
Preprocessor $mPreprocessor
Definition: Parser.php:177
getPreprocessor()
Get a preprocessor object.
Definition: Parser.php:897
This document is intended to provide useful advice for parties seeking to redistribute MediaWiki to end users It s targeted particularly at maintainers for Linux since it s been observed that distribution packages of MediaWiki often break We ve consistently had to recommend that users seeking support use official tarballs instead of their distribution s and this often solves whatever problem the user is having It would be nice if this could such and we might be restricted by PHP settings such as safe mode or open_basedir We cannot assume that the software even has read access anywhere useful Many shared hosts run all users web applications under the same so they can t rely on Unix and must forbid reads to even standard directories like tmp lest users read each others files We cannot assume that the user has the ability to install or run any programs not written as web accessible PHP scripts Since anything that works on cheap shared hosting will work if you have shell or root access MediaWiki s design is based around catering to the lowest common denominator Although we support higher end setups as the way many things work by default is tailored toward shared hosting These defaults are unconventional from the point of view of normal(non-web) applications--they might conflict with distributors'policies
static getInstance($ts=false)
Get a timestamp instance in GMT.
const NS_MEDIA
Definition: Defines.php:57
closeList($char)
Definition: Parser.php:2517
static singleton()
Get a RepoGroup instance.
Definition: RepoGroup.php:59
replaceVariables($text, $frame=false, $argsOnly=false)
Replace magic variables, templates, and template arguments with the appropriate text.
Definition: Parser.php:3344
const RECOVER_ORIG
wfMatchesDomainList($url, $domains)
Check whether a given URL has a domain that occurs in a given set of domains.
StripState $mStripState
Definition: Parser.php:189
$mDefaultSort
Definition: Parser.php:199
getUser()
Get a User object either from $this->mUser, if set, or from the ParserOptions object otherwise...
Definition: Parser.php:885
wfTimestampNow()
Convenience function; returns MediaWiki timestamp for the present time.
incrementIncludeSize($type, $size)
Increment an include size counter.
Definition: Parser.php:4314
getStripList()
Get a list of strippable XML-like elements.
Definition: Parser.php:996
null means default in associative array with keys and values unescaped Should be merged with default with a value of false meaning to suppress the attribute in associative array with keys and values unescaped noclasses just before the function returns a value If you return an< a > element with HTML attributes $attribs and contents $html will be returned If you return $ret will be returned and may include noclasses after processing after in associative array form externallinks including delete and has completed for all link tables whether this was an auto creation default is conds Array Extra conditions for the No matching items in log is displayed if loglist is empty msgKey Array If you want a nice box with a set this to the key of the message First element is the message additional optional elements are parameters for the key that are processed with wfMessage() -> params() ->parseAsBlock()-offset Set to overwrite offset parameter in $wgRequest set to ''to unsetoffset-wrap String Wrap the message in html(usually something like"&lt
const EXT_IMAGE_REGEX
Definition: Parser.php:95
startParse(Title $title=null, ParserOptions $options, $outputType, $clearState=true)
Definition: Parser.php:5063
$params
const NS_CATEGORY
Definition: Defines.php:83
static makeHeadline($level, $attribs, $anchor, $html, $link, $legacyAnchor=false)
Create a headline for content.
Definition: Linker.php:1799
static extractTagsAndParams($elements, $text, &$matches, $uniq_prefix=null)
Replaces all occurrences of HTML-style comments and the given tags in the text with a random marker a...
Definition: Parser.php:926
and(b) You must cause any modified files to carry prominent notices stating that You changed the files
doTableStuff($text)
parse the wiki syntax used to render tables
Definition: Parser.php:1023
wfDeprecated($function, $version=false, $component=false, $callerOffset=2)
Throws a warning that $function is deprecated.
getRevisionSize()
Get the size of the revision.
Definition: Parser.php:6098
$mImageParamsMagicArray
Definition: Parser.php:157
LinkHolderArray $mLinkHolders
Definition: Parser.php:195
static register($parser)
deferred txt A few of the database updates required by various functions here can be deferred until after the result page is displayed to the user For updating the view updating the linked to tables after a save
Definition: deferred.txt:4
as see the revision history and available at free of to any person obtaining a copy of this software and associated documentation to deal in the Software without including without limitation the rights to and or sell copies of the and to permit persons to whom the Software is furnished to do so
Definition: LICENSE.txt:10
Some information about database access in MediaWiki By Tim January Database layout For information about the MediaWiki database such as a description of the tables and their please see
Definition: database.txt:2
closeParagraph()
#@+ Used by doBlockLevels()
Definition: Parser.php:2421
const DB_SLAVE
Definition: Defines.php:46
preSaveTransform($text, Title $title, User $user, ParserOptions $options, $clearState=true)
Transform wiki markup when saving a page by doing "\\r\\n" -> "\\n" conversion, substituting signatur...
Definition: Parser.php:4826
getTargetLanguage()
Get the target language for the content being parsed.
Definition: Parser.php:857
$buffer
namespace and then decline to actually register it file or subcat img or subcat $title
Definition: hooks.txt:912
static hasSubpages($index)
Does the namespace allow subpages?
formatHeadings($text, $origText, $isMain=true)
This function accomplishes several tasks: 1) Auto-number headings if that option is enabled 2) Add an...
Definition: Parser.php:4416
getConverterLanguage()
Get the language object for language conversion.
Definition: Parser.php:875
static tocUnindent($level)
Finish one or more sublevels on the Table of Contents.
Definition: Linker.php:1703
nextItem($char)
TODO: document.
Definition: Parser.php:2491
static run($event, array $args=[], $deprecatedVersion=null)
Call hook functions defined in Hooks::register and $wgHooks.
Definition: Hooks.php:131
static tocLine($anchor, $tocline, $tocnumber, $level, $sectionIndex=false)
parameter level defines if we are on an indentation level
Definition: Linker.php:1717
design txt This is a brief overview of the new design More thorough and up to date information is available on the documentation wiki at etc Handles the details of getting and saving to the user table of the and dealing with sessions and cookies OutputPage Encapsulates the entire HTML page that will be sent in response to any server request It is used by calling its functions to add text
Definition: design.txt:12
getExternalLinkAttribs($url=false)
Get an associative array of additional HTML attributes appropriate for a particular external link...
Definition: Parser.php:1873
$mInputSize
Definition: Parser.php:229
magicword txt Magic Words are some phrases used in the wikitext They are used for two things
Definition: magicword.txt:4
this hook is for auditing only RecentChangesLinked and Watchlist RecentChangesLinked and Watchlist e g Watchlist removed from all revisions and log entries to which it was applied This gives extensions a chance to take it off their books $tag
Definition: hooks.txt:965
getUserSig(&$user, $nickname=false, $fancySig=null)
Fetch the user's signature text, if any, and normalize to validated, ready-to-insert wikitext...
Definition: Parser.php:4937
const HALF_PARSED_VERSION
Update this version number when the output of serialiseHalfParsedText() changes in an incompatible wa...
Definition: Parser.php:79
const NS_FILE
Definition: Defines.php:75
firstCallInit()
Do various kinds of initialisation on the first call of the parser.
Definition: Parser.php:325
Handles a simple LRU key/value map with a maximum number of entries.
Definition: MapCacheLRU.php:34
static makeImageLink(Parser $parser, Title $title, $file, $frameParams=[], $handlerParams=[], $time=false, $query="", $widthOption=null)
Given parameters derived from [[Image:Foo|options...]], generate the HTML that that syntax inserts in...
Definition: Linker.php:545
const PTD_FOR_INCLUSION
Definition: Parser.php:113
null means default in associative array with keys and values unescaped Should be merged with default with a value of false meaning to suppress the attribute in associative array with keys and values unescaped broken
Definition: hooks.txt:1798
armorLinks($text)
Insert a NOPARSE hacky thing into any inline links in a chunk that's going to go through further pars...
Definition: Parser.php:2389
presenting them properly to the user as errors is done by the caller return true use this to change the list i e etc $rev
Definition: hooks.txt:1584
static splitWhitespace($s)
Return a three-element array: leading whitespace, string contents, trailing whitespace.
Definition: Parser.php:3311
This document is intended to provide useful advice for parties seeking to redistribute MediaWiki to end users It s targeted particularly at maintainers for Linux since it s been observed that distribution packages of MediaWiki often break We ve consistently had to recommend that users seeking support use official tarballs instead of their distribution s and this often solves whatever problem the user is having It would be nice if this could such as
Definition: distributors.txt:9
setOutputType($ot)
Set the output type.
Definition: Parser.php:777
$mTagHooks
Definition: Parser.php:148
Class for handling an array of magic words.
const NS_MEDIAWIKI
Definition: Defines.php:77
static & get($id)
Factory: creates an object representing an ID.
Definition: MagicWord.php:257
static getModuleStyles()
Get CSS modules needed if HTML from the current driver is to be displayed.
Definition: MWTidy.php:63
enableOOUI()
Set's up the PHP implementation of OOUI for use in this request and instructs OutputPage to enable OO...
Definition: Parser.php:6475
please add to it if you re going to add events to the MediaWiki code where normally authentication against an external auth plugin would be creating a local account $user
Definition: hooks.txt:242
fetchTemplate($title)
Fetch the unparsed text of a template and register a reference to it.
Definition: Parser.php:3960
maybeMakeExternalImage($url)
make an image if it's allowed, either through the global option, through the exception, or through the on-wiki whitelist
Definition: Parser.php:1974
areSubpagesAllowed()
Return true if subpage links should be expanded on this page.
Definition: Parser.php:2398
const OT_HTML
Definition: Defines.php:226
static escapeId($id, $options=[])
Given a value, escape it so that it can be used in an id attribute and return it. ...
Definition: Sanitizer.php:1132
this hook is for auditing only RecentChangesLinked and Watchlist RecentChangesLinked and Watchlist e g Watchlist removed from all revisions and log entries to which it was applied This gives extensions a chance to take it off their books as the deletion has already been partly carried out by this point or something similar the user will be unable to create the tag set and then return false from the hook function Ensure you consume the ChangeTagAfterDelete hook to carry out custom deletion actions as context called by AbstractContent::getParserOutput May be used to override the normal model specific rendering of page content as context as context the output can only depend on parameters provided to this hook not on global state indicating whether full HTML should be generated If generation of HTML may be but other information should still be present in the ParserOutput object & $output
Definition: hooks.txt:1004
static getSubstIDs()
Get an array of parser substitution modifier IDs.
Definition: MagicWord.php:284
static images()
Definition: SiteStats.php:169
$mTransparentTagHooks
Definition: Parser.php:149
$mExpensiveFunctionCount
Definition: Parser.php:201
$mUrlProtocols
Definition: Parser.php:173
const TS_MW
MediaWiki concatenated string timestamp (YYYYMMDDHHMMSS)
$mConf
Definition: Parser.php:173
transformMsg($text, $options, $title=null)
Wrapper for preprocess()
Definition: Parser.php:5082
static newFromId($id, $flags=0)
Load a page revision from a given revision ID number.
Definition: Revision.php:99
wfUrlProtocols($includeProtocolRelative=true)
Returns a regular expression of url protocols.
static makeExternalLink($url, $text, $escape=true, $linktype= '', $attribs=[], $title=null)
Make an external link.
Definition: Linker.php:1052
__clone()
Allow extensions to clean up when the parser is cloned.
Definition: Parser.php:302
static getExternalLinkRel($url=false, $title=null)
Get the rel attribute for a particular external link.
Definition: Parser.php:1852
injection txt This is an overview of how MediaWiki makes use of dependency injection The design described here grew from the discussion of RFC T384 The term dependency this means that anything an object needs to operate should be injected from the the object itself should only know narrow no concrete implementation of the logic it relies on The requirement to inject everything typically results in an architecture that based on two main types of and essentially stateless service objects that use other service objects to operate on the value objects As of the beginning MediaWiki is only starting to use the DI approach Much of the code still relies on global state or direct resulting in a highly cyclical dependency which acts as the top level factory for services in MediaWiki which can be used to gain access to default instances of various services MediaWikiServices however also allows new services to be defined and default services to be redefined Services are defined or redefined by providing a callback the instantiator that will return a new instance of the service When it will create an instance of MediaWikiServices and populate it with the services defined in the files listed by thereby bootstrapping the DI framework Per $wgServiceWiringFiles lists includes ServiceWiring php
Definition: injection.txt:35
wfSetVar(&$dest, $source, $force=false)
Sets dest to source and returns the original value of dest If source is NULL, it just returns the val...
this hook is for auditing only $req
Definition: hooks.txt:965
this hook is for auditing only or null if authentication failed before getting that far $username
Definition: hooks.txt:762
presenting them properly to the user as errors is done by the caller return true use this to change the list i e etc next in line in page history
Definition: hooks.txt:1584
array $mLangLinkLanguages
Array with the language name of each language link (i.e.
Definition: Parser.php:242
const OT_MSG
Definition: Parser.php:120
replaceTransparentTags($text)
Replace transparent tags in $text with the values given by the callbacks.
Definition: Parser.php:5815
This document describes the state of Postgres support in and is fairly well maintained The main code is very well while extensions are very hit and miss it is probably the most supported database after MySQL Much of the work in making MediaWiki database agnostic came about through the work of creating Postgres as and are nearing end of but without copying over all the usage comments General notes on the but these can almost always be programmed around *Although Postgres has a true BOOLEAN type
Definition: postgres.txt:22
replaceSection($oldText, $sectionId, $newText)
This function returns $oldtext after the content of the section specified by $section has been replac...
Definition: Parser.php:6007
getLinkURL($query= '', $query2=false, $proto=PROTO_RELATIVE)
Get a URL that's the simplest URL that will be valid to link, locally, to the current Title...
Definition: Title.php:1826
doDoubleUnderscore($text)
Strip double-underscore items like NOGALLERY and NOTOC Fills $this->mDoubleUnderscores, returns the modified text.
Definition: Parser.php:4341
$mFunctionHooks
Definition: Parser.php:150
$lines
Definition: router.php:66
testPreprocess($text, Title $title, ParserOptions $options)
Definition: Parser.php:6254
deferred txt A few of the database updates required by various functions here can be deferred until after the result page is displayed to the user For updating the view updating the linked to tables after a etc PHP does not yet have any way to tell the server to actually return and disconnect while still running these but it might have such a feature in the future We handle these by creating a deferred update object and putting those objects on a global then executing the whole list after the page is displayed We don t do anything smart like collating updates to the same table or such because the list is almost always going to have just one item on if so it s not worth the trouble Since there is a job queue in the jobs table
Definition: deferred.txt:11
MagicWordArray $mSubstWords
Definition: Parser.php:171
const COLON_STATE_TEXT
Definition: Parser.php:103
const TOC_END
Definition: Parser.php:145
static normalizeCharReferences($text)
Ensure that any entities and character references are legal for XML and XHTML specifically.
Definition: Sanitizer.php:1362
callParserFunction($frame, $function, array $args=[])
Call a parser function and return an array with text and flags.
Definition: Parser.php:3761
$mArgStack
Definition: Parser.php:191
$wgScriptPath
The path we should point to.
Variant of the Message class.
Definition: Message.php:1232
getFreshParser()
Return this parser if it is not doing anything, otherwise get a fresh parser.
Definition: Parser.php:6460
design txt This is a brief overview of the new design More thorough and up to date information is available on the documentation wiki at etc Handles the details of getting and saving to the user table of the and dealing with sessions and cookies OutputPage Encapsulates the entire HTML page that will be sent in response to any server request It is used by calling its functions to add in any and then calling but I prefer the flexibility This should also do the output encoding The system allocates a global one in $wgOut Title Represents the title of an and does all the work of translating among various forms such as plain database etc For and for historical it also represents a few features of articles that don t involve their such as access rights See also title txt Article Encapsulates access to the page table of the database The object represents a an and maintains state such as etc Revision Encapsulates individual page revision data and access to the revision text blobs storage system Higher level code should never touch text storage directly
Definition: design.txt:34
this hook is for auditing only RecentChangesLinked and Watchlist RecentChangesLinked and Watchlist e g Watchlist removed from all revisions and log entries to which it was applied This gives extensions a chance to take it off their books as the deletion has already been partly carried out by this point or something similar the user will be unable to create the tag set and then return false from the hook function Ensure you consume the ChangeTagAfterDelete hook to carry out custom deletion actions as context called by AbstractContent::getParserOutput May be used to override the normal model specific rendering of page content $content
Definition: hooks.txt:1004
static articles()
Definition: SiteStats.php:137
const COLON_STATE_TAGSTART
Definition: Parser.php:105
$mRevisionUser
Definition: Parser.php:226
lock()
Lock the current instance of the parser.
Definition: Parser.php:6415
static pages()
Definition: SiteStats.php:145
$line
Definition: cdb.php:59
const COLON_STATE_COMMENT
Definition: Parser.php:108
const SFH_OBJECT_ARGS
Definition: Parser.php:83
static statelessFetchTemplate($title, $parser=false)
Static function to get a template Can be overridden via ParserOptions::setTemplateCallback().
Definition: Parser.php:3973
$mDTopen
Definition: Parser.php:184
I won t presume to tell you how to I m just describing the methods I chose to use for myself If you do choose to follow these it will probably be easier for you to collaborate with others on the but if you want to contribute without by all means do which work well I also use K &R brace matching style I know that s a religious issue for so if you want to use a style that puts opening braces on the next line
Definition: design.txt:79
setFunctionHook($id, $callback, $flags=0)
Create a function, e.g.
Definition: Parser.php:5220
static setupOOUI($skinName= '', $dir= 'ltr')
Helper function to setup the PHP implementation of OOUI to use in this request.
static makeMediaLinkFile(Title $title, $file, $html= '')
Create a direct link to a given uploaded file.
Definition: Linker.php:994
$mIncludeCount
Definition: Parser.php:191
usually copyright or history_copyright This message must be in HTML not wikitext if the section is included from a template to be included in the link
Definition: hooks.txt:2715
$mMarkerIndex
Definition: Parser.php:158
this hook is for auditing only RecentChangesLinked and Watchlist RecentChangesLinked and Watchlist e g Watchlist removed from all revisions and log entries to which it was applied This gives extensions a chance to take it off their books as the deletion has already been partly carried out by this point or something similar the user will be unable to create the tag set and then return false from the hook function Ensure you consume the ChangeTagAfterDelete hook to carry out custom deletion actions as context called by AbstractContent::getParserOutput May be used to override the normal model specific rendering of page content as context as context the output can only depend on parameters provided to this hook not on global state indicating whether full HTML should be generated If generation of HTML may be but other information should still be present in the ParserOutput object to manipulate or replace but no entry for that model exists in $wgContentHandlers if desired whether it is OK to use $contentModel on $title Handler functions that modify $ok should generally return false to prevent further hooks from further modifying $ok inclusive $limit
Definition: hooks.txt:1004
getTitle()
Accessor for the Title object.
Definition: Parser.php:758
this class mediates it Skin Encapsulates a look and feel for the wiki All of the functions that render HTML and make choices about how to render it are here and are called from various other places when and is meant to be subclassed with other skins that may override some of its functions The User object contains a reference to a and so rather than having a global skin object we just rely on the global User and get the skin with $wgUser and also has some character encoding functions and other locale stuff The current user interface language is instantiated as and the local content language as $wgContLang
Definition: design.txt:56
extractSections($text, $sectionId, $mode, $newText= '')
Break wikitext input into sections, and either pull or replace some particular section's text...
Definition: Parser.php:5866
ParserOutput $mOutput
Definition: Parser.php:183
getOutput()
Get the ParserOutput object.
Definition: Parser.php:803
$wgExperimentalHtmlIds
Should we allow a broader set of characters in id attributes, per HTML5? If not, use only HTML 4-comp...
static statelessFetchRevision($title, $parser=false)
Wrapper around Revision::newFromTitle to allow passing additional parameters without passing them on ...
Definition: Parser.php:3923
doMagicLinks($text)
Replace special strings like "ISBN xxx" and "RFC xxx" with magic external links.
Definition: Parser.php:1394
Apache License January AND DISTRIBUTION Definitions License shall mean the terms and conditions for and distribution as defined by Sections through of this document Licensor shall mean the copyright owner or entity authorized by the copyright owner that is granting the License Legal Entity shall mean the union of the acting entity and all other entities that control are controlled by or are under common control with that entity For the purposes of this definition control direct or to cause the direction or management of such whether by contract or including but not limited to software source documentation and configuration files Object form shall mean any form resulting from mechanical transformation or translation of a Source including but not limited to compiled object generated and conversions to other media types Work shall mean the work of whether in Source or Object made available under the as indicated by a copyright notice that is included in or attached to the whether in Source or Object that is based or other modifications as a an original work of authorship For the purposes of this Derivative Works shall not include works that remain separable or merely the Work and Derivative Works thereof Contribution shall mean any work of including the original version of the Work and any modifications or additions to that Work or Derivative Works that is intentionally submitted to Licensor for inclusion in the Work by the copyright owner or by an individual or Legal Entity authorized to submit on behalf of the copyright owner For the purposes of this submitted means any form of or written communication sent to the Licensor or its including but not limited to communication on electronic mailing source code control and issue tracking systems that are managed or on behalf the Licensor for the purpose of discussing and improving the but excluding communication that is conspicuously marked or otherwise designated in writing by the copyright owner as Not a Contribution Contributor shall mean Licensor and any individual or Legal Entity on behalf of whom a Contribution has been received by Licensor and subsequently incorporated within the Work Grant of Copyright License Subject to the terms and conditions of this each Contributor hereby grants to You a non no royalty irrevocable copyright license to prepare Derivative Works publicly display
getCommon($st1, $st2)
getCommon() returns the length of the longest common substring of both arguments, starting at the beg...
Definition: Parser.php:2441
!html< table >< tr >< td > broken</td ></tr ></table >!end!test Table cell attributes
this hook is for auditing only RecentChangesLinked and Watchlist RecentChangesLinked and Watchlist e g Watchlist removed from all revisions and log entries to which it was applied This gives extensions a chance to take it off their books as the deletion has already been partly carried out by this point or something similar the user will be unable to create the tag set $status
Definition: hooks.txt:1004
static cleanSigInSig($text)
Strip 3, 4 or 5 tildes out of signatures.
Definition: Parser.php:5037
setDefaultSort($sort)
Mutator for $mDefaultSort.
Definition: Parser.php:6119
fetchFile($title, $options=[])
Fetch a file and its title and register a reference to it.
Definition: Parser.php:4063
static fixTagAttributes($text, $element)
Take a tag soup fragment listing an HTML element's attributes and normalize it to well-formed XML...
Definition: Sanitizer.php:1037
static tocIndent()
Add another level to the Table of Contents.
Definition: Linker.php:1693
static legalChars()
Get a regex character class describing the legal characters in a link.
Definition: Title.php:606
design txt This is a brief overview of the new design More thorough and up to date information is available on the documentation wiki at etc Handles the details of getting and saving to the user table of the and dealing with sessions and cookies OutputPage Encapsulates the entire HTML page that will be sent in response to any server request It is used by calling its functions to add in any and then calling output() to send it all.It could be easily changed to send incrementally if that becomes useful
$wgServer
URL of the server.
We ve cleaned up the code here by removing clumps of infrequently used code and moving them off somewhere else It s much easier for someone working with this code to see what s _really_ going on
Definition: hooks.txt:86
incrementExpensiveFunctionCount()
Increment the expensive function count.
Definition: Parser.php:4328
const DB_MASTER
Definition: Defines.php:47
$mShowToc
Definition: Parser.php:202
static normalizeLinkUrl($url)
Replace unusual escape codes in a URL with their equivalent characters.
Definition: Parser.php:1915
static removeHTMLtags($text, $processCallback=null, $args=[], $extratags=[], $removetags=[])
Cleans up HTML, removes dangerous tags and attributes, and removes HTML comments. ...
Definition: Sanitizer.php:455
magicLinkCallback($m)
Definition: Parser.php:1424
const EXT_LINK_URL_CLASS
Definition: Parser.php:89
insertStripItem($text)
Add an item to the strip state Returns the unique tag which must be inserted into the stripped text T...
Definition: Parser.php:1009
testPst($text, Title $title, ParserOptions $options)
Definition: Parser.php:6244
static factory($url, $options=null, $caller=__METHOD__)
Generate a new request object.
const TS_UNIX
Unix time - the number of seconds since 1970-01-01 00:00:00 UTC.
if(!$wgRequest->checkUrlExtension()) if(!$wgEnableAPI) $wgTitle
Definition: api.php:57
static explode($separator, $subject)
Workalike for explode() with limited memory usage.
ParserOptions $mOptions
Definition: Parser.php:215
parse($text, Title $title, ParserOptions $options, $linestart=true, $clearState=true, $revid=null)
Convert wikitext to HTML Do not call this function recursively.
Definition: Parser.php:405
this hook is for auditing only or null if authentication failed before getting that far or null if we can t even determine that probably a stub it is not rendered in wiki pages or galleries in category pages allow injecting custom HTML after the section Any uses of the hook need to handle escaping see BaseTemplate::getToolbox and BaseTemplate::makeListItem for details on the format of individual items inside of this array or by returning and letting standard HTTP rendering take place modifiable or by returning false and taking over the output modifiable modifiable after all normalizations have been except for the $wgMaxImageArea check set to true or false to override the $wgMaxImageArea check result gives extension the possibility to transform it themselves $handler
Definition: hooks.txt:762
static numberingroup($group)
Find the number of users in a given user group.
Definition: SiteStats.php:179
=Architecture==Two class hierarchies are used to provide the functionality associated with the different content models:*Content interface(and AbstractContent base class) define functionality that acts on the concrete content of a page, and *ContentHandler base class provides functionality specific to a content model, but not acting on concrete content.The most important function of ContentHandler is to act as a factory for the appropriate implementation of Content.These Content objects are to be used by MediaWiki everywhere, instead of passing page content around as text.All manipulation and analysis of page content must be done via the appropriate methods of the Content object.For each content model, a subclass of ContentHandler has to be registered with $wgContentHandlers.The ContentHandler object for a given content model can be obtained using ContentHandler::getForModelID($id).Also Title, WikiPage and Revision now have getContentHandler() methods for convenience.ContentHandler objects are singletons that provide functionality specific to the content type, but not directly acting on the content of some page.ContentHandler::makeEmptyContent() and ContentHandler::unserializeContent() can be used to create a Content object of the appropriate type.However, it is recommended to instead use WikiPage::getContent() resp.Revision::getContent() to get a page's content as a Content object.These two methods should be the ONLY way in which page content is accessed.Another important function of ContentHandler objects is to define custom action handlers for a content model, see ContentHandler::getActionOverrides().This is similar to what WikiPage::getActionOverrides() was already doing.==Serialization==With the ContentHandler facility, page content no longer has to be text based.Objects implementing the Content interface are used to represent and handle the content internally.For storage and data exchange, each content model supports at least one serialization format via ContentHandler::serializeContent($content).The list of supported formats for a given content model can be accessed using ContentHandler::getSupportedFormats().Content serialization formats are identified using MIME type like strings.The following formats are built in:*text/x-wiki-wikitext *text/javascript-for js pages *text/css-for css pages *text/plain-for future use, e.g.with plain text messages.*text/html-for future use, e.g.with plain html messages.*application/vnd.php.serialized-for future use with the api and for extensions *application/json-for future use with the api, and for use by extensions *application/xml-for future use with the api, and for use by extensions In PHP, use the corresponding CONTENT_FORMAT_XXX constant.Note that when using the API to access page content, especially action=edit, action=parse and action=query &prop=revisions, the model and format of the content should always be handled explicitly.Without that information, interpretation of the provided content is not reliable.The same applies to XML dumps generated via maintenance/dumpBackup.php or Special:Export.Also note that the API will provide encapsulated, serialized content-so if the API was called with format=json, and contentformat is also json(or rather, application/json), the page content is represented as a string containing an escaped json structure.Extensions that use JSON to serialize some types of page content may provide specialized API modules that allow access to that content in a more natural form.==Compatibility==The ContentHandler facility is introduced in a way that should allow all existing code to keep functioning at least for pages that contain wikitext or other text based content.However, a number of functions and hooks have been deprecated in favor of new versions that are aware of the page's content model, and will now generate warnings when used.Most importantly, the following functions have been deprecated:*Revisions::getText() is deprecated in favor Revisions::getContent()*WikiPage::getText() is deprecated in favor WikiPage::getContent() Also, the old Article::getContent()(which returns text) is superceded by Article::getContentObject().However, both methods should be avoided since they do not provide clean access to the page's actual content.For instance, they may return a system message for non-existing pages.Use WikiPage::getContent() instead.Code that relies on a textual representation of the page content should eventually be rewritten.However, ContentHandler::getContentText() provides a stop-gap that can be used to get text for a page.Its behavior is controlled by $wgContentHandlerTextFallback it
const STRIP_COMMENTS
static getVersion($flags= '', $lang=null)
Return a string of the MediaWiki version with Git revision if available.
braceSubstitution($piece, $frame)
Return the text of a template, after recursively replacing any variables or templates within the temp...
Definition: Parser.php:3443
setUser($user)
Set the current user.
Definition: Parser.php:720
$mHighestExpansionDepth
Definition: Parser.php:198
makeImage($title, $options, $holders=false)
Parse image options text and use it to make an image.
Definition: Parser.php:5531
attributeStripCallback(&$text, $frame=false)
Callback from the Sanitizer for expanding items found in HTML attribute values, so they can be safely...
Definition: Parser.php:5786
static cascadingsources($parser, $title= '')
Returns the sources of any cascading protection acting on a specified page.
getCustomDefaultSort()
Accessor for $mDefaultSort Unlike getDefaultSort(), will return false if none is set.
Definition: Parser.php:6148
extensionSubstitution($params, $frame)
Return the text to be used for a given extension tag.
Definition: Parser.php:4230
static makeExternalImage($url, $alt= '')
Return the code for images which were added via external links, via Parser::maybeMakeExternalImage()...
Definition: Linker.php:492
recursiveTagParseFully($text, $frame=false)
Fully parse wikitext to fully parsed HTML.
Definition: Parser.php:628
setTransparentTagHook($tag, $callback)
As setHook(), but letting the contents be parsed.
Definition: Parser.php:5157
static element($element, $attribs=[], $contents= '')
Identical to rawElement(), but HTML-escapes $contents (like Xml::element()).
Definition: Html.php:230
wfFindFile($title, $options=[])
Find a file.
$mRevisionSize
Definition: Parser.php:227
do that in ParserLimitReportFormat instead use this to modify the parameters of the image and a DIV can begin in one section and end in another Make sure your code can handle that case gracefully See the EditSectionClearerLink extension for an example zero but section is usually empty its values are the globals values before the output is cached one of or reset my talk page
Definition: hooks.txt:2338
static users()
Definition: SiteStats.php:153
unserializeHalfParsedText($data)
Load the parser state given in the $data array, which is assumed to have been generated by serializeH...
Definition: Parser.php:6349
do that in ParserLimitReportFormat instead use this to modify the parameters of the image and a DIV can begin in one section and end in another Make sure your code can handle that case gracefully See the EditSectionClearerLink extension for an example zero but section is usually empty its values are the globals values before the output is cached one of or reset my talk my contributions etc etc otherwise the built in rate limiting checks are if enabled allows for interception of redirect as a string mapping parameter names to values & $type
Definition: hooks.txt:2338
guessSectionNameFromWikiText($text)
Try to guess the section anchor name based on a wikitext fragment presumably extracted from a heading...
Definition: Parser.php:6161
const SFH_OBJECT_ARGS
Definition: Defines.php:240
static & makeTitle($ns, $title, $fragment= '', $interwiki= '')
Create a new Title from a namespace index and a DB key.
Definition: Title.php:524
$wgServerName
Server name.
internalParseHalfParsed($text, $isMain=true, $linestart=true)
Helper function for parse() that transforms half-parsed HTML into fully parsed HTML.
Definition: Parser.php:1294
const OT_HTML
Definition: Parser.php:117
$mIncludeSizes
Definition: Parser.php:198
if the prop value should be in the metadata multi language array format
Definition: hooks.txt:1473
null means default in associative array with keys and values unescaped Should be merged with default with a value of false meaning to suppress the attribute in associative array with keys and values unescaped noclasses just before the function returns a value If you return an< a > element with HTML attributes $attribs and contents $html will be returned If you return $ret will be returned and may include noclasses after processing & $attribs
Definition: hooks.txt:1798
controlled by $wgMainCacheType controlled by $wgParserCacheType controlled by $wgMessageCacheType If you set CACHE_NONE to one of the three control variable
Definition: memcached.txt:78
getOptions()
Get the ParserOptions object.
Definition: Parser.php:812
makeKnownLinkHolder($nt, $text= '', $query=[], $trail= '', $prefix= '')
Render a forced-blue link inline; protect against double expansion of URLs if we're in a mode that pr...
Definition: Parser.php:2364
getDefaultSort()
Accessor for $mDefaultSort Will use the empty string if none is set.
Definition: Parser.php:6134
For a write use something like
Definition: database.txt:26
const SFH_NO_HASH
Definition: Defines.php:239
makeFreeExternalLink($url, $numPostProto)
Make a free external link, given a user-supplied URL.
Definition: Parser.php:1480
$matches
const COLON_STATE_CLOSETAG
Definition: Parser.php:106
Allows to change the fields on the form that will be generated $name
Definition: hooks.txt:310
$mTplDomCache
Definition: Parser.php:200