MediaWiki  master
Parser.php
Go to the documentation of this file.
1 <?php
25 
69 class Parser {
75  const VERSION = '1.6.4';
76 
82 
83  # Flags for Parser::setFunctionHook
84  const SFH_NO_HASH = 1;
85  const SFH_OBJECT_ARGS = 2;
86 
87  # Constants needed for external link processing
88  # Everything except bracket, space, or control characters
89  # \p{Zs} is unicode 'separator, space' category. It covers the space 0x20
90  # as well as U+3000 is IDEOGRAPHIC SPACE for bug 19052
91  const EXT_LINK_URL_CLASS = '[^][<>"\\x00-\\x20\\x7F\p{Zs}]';
92  # Simplified expression to match an IPv4 or IPv6 address, or
93  # at least one character of a host name (embeds EXT_LINK_URL_CLASS)
94  const EXT_LINK_ADDR = '(?:[0-9.]+|\\[(?i:[0-9a-f:.]+)\\]|[^][<>"\\x00-\\x20\\x7F\p{Zs}])';
95  # RegExp to make image URLs (embeds IPv6 part of EXT_LINK_ADDR)
96  // @codingStandardsIgnoreStart Generic.Files.LineLength
97  const EXT_IMAGE_REGEX = '/^(http:\/\/|https:\/\/)((?:\\[(?i:[0-9a-f:.]+)\\])?[^][<>"\\x00-\\x20\\x7F\p{Zs}]+)
98  \\/([A-Za-z0-9_.,~%\\-+&;#*?!=()@\\x80-\\xFF]+)\\.((?i)gif|png|jpg|jpeg)$/Sxu';
99  // @codingStandardsIgnoreEnd
100 
101  # Regular expression for a non-newline space
102  const SPACE_NOT_NL = '(?:\t|&nbsp;|&\#0*160;|&\#[Xx]0*[Aa]0;|\p{Zs})';
103 
104  # Flags for preprocessToDom
105  const PTD_FOR_INCLUSION = 1;
106 
107  # Allowed values for $this->mOutputType
108  # Parameter to startExternalParse().
109  const OT_HTML = 1; # like parse()
110  const OT_WIKI = 2; # like preSaveTransform()
112  const OT_MSG = 3;
113  const OT_PLAIN = 4; # like extractSections() - portions of the original are returned unchanged.
114 
132  const MARKER_SUFFIX = "-QINU`\"'\x7f";
133  const MARKER_PREFIX = "\x7f'\"`UNIQ-";
134 
135  # Markers used for wrapping the table of contents
136  const TOC_START = '<mw:toc>';
137  const TOC_END = '</mw:toc>';
138 
139  # Persistent:
140  public $mTagHooks = [];
142  public $mFunctionHooks = [];
143  public $mFunctionSynonyms = [ 0 => [], 1 => [] ];
144  public $mFunctionTagHooks = [];
145  public $mStripList = [];
146  public $mDefaultStripList = [];
147  public $mVarCache = [];
148  public $mImageParams = [];
150  public $mMarkerIndex = 0;
151  public $mFirstCall = true;
152 
153  # Initialised by initialiseVariables()
154 
158  public $mVariables;
159 
163  public $mSubstWords;
164  # Initialised in constructor
166 
167  # Initialized in getPreprocessor()
168 
170 
171  # Cleared with clearState():
172 
175  public $mOutput;
176  public $mAutonumber;
177 
181  public $mStripState;
182 
188 
189  public $mLinkID;
193  public $mExpensiveFunctionCount; # number of expensive parser function calls
195 
199  public $mUser; # User object; only used when doing pre-save transform
200 
201  # Temporary
202  # These are variables reset at least once per parse regardless of $clearState
203 
207  public $mOptions;
208 
212  public $mTitle; # Title context, used for self-link rendering and similar things
213  public $mOutputType; # Output type, one of the OT_xxx constants
214  public $ot; # Shortcut alias, see setOutputType()
215  public $mRevisionObject; # The revision object of the specified revision ID
216  public $mRevisionId; # ID to display in {{REVISIONID}} tags
217  public $mRevisionTimestamp; # The timestamp of the specified revision ID
218  public $mRevisionUser; # User to display in {{REVISIONUSER}} tag
219  public $mRevisionSize; # Size to display in {{REVISIONSIZE}} variable
220  public $mRevIdForTs; # The revision ID which was used to fetch the timestamp
221  public $mInputSize = false; # For {{PAGESIZE}} on current page.
222 
228 
235 
243 
248  public $mInParse = false;
249 
251  protected $mProfiler;
252 
256  protected $mLinkRenderer;
257 
261  public function __construct( $conf = [] ) {
262  $this->mConf = $conf;
263  $this->mUrlProtocols = wfUrlProtocols();
264  $this->mExtLinkBracketedRegex = '/\[(((?i)' . $this->mUrlProtocols . ')' .
265  self::EXT_LINK_ADDR .
266  self::EXT_LINK_URL_CLASS . '*)\p{Zs}*([^\]\\x00-\\x08\\x0a-\\x1F]*?)\]/Su';
267  if ( isset( $conf['preprocessorClass'] ) ) {
268  $this->mPreprocessorClass = $conf['preprocessorClass'];
269  } elseif ( defined( 'HPHP_VERSION' ) ) {
270  # Preprocessor_Hash is much faster than Preprocessor_DOM under HipHop
271  $this->mPreprocessorClass = 'Preprocessor_Hash';
272  } elseif ( extension_loaded( 'domxml' ) ) {
273  # PECL extension that conflicts with the core DOM extension (bug 13770)
274  wfDebug( "Warning: you have the obsolete domxml extension for PHP. Please remove it!\n" );
275  $this->mPreprocessorClass = 'Preprocessor_Hash';
276  } elseif ( extension_loaded( 'dom' ) ) {
277  $this->mPreprocessorClass = 'Preprocessor_DOM';
278  } else {
279  $this->mPreprocessorClass = 'Preprocessor_Hash';
280  }
281  wfDebug( __CLASS__ . ": using preprocessor: {$this->mPreprocessorClass}\n" );
282  }
283 
287  public function __destruct() {
288  if ( isset( $this->mLinkHolders ) ) {
289  unset( $this->mLinkHolders );
290  }
291  foreach ( $this as $name => $value ) {
292  unset( $this->$name );
293  }
294  }
295 
299  public function __clone() {
300  $this->mInParse = false;
301 
302  // Bug 56226: When you create a reference "to" an object field, that
303  // makes the object field itself be a reference too (until the other
304  // reference goes out of scope). When cloning, any field that's a
305  // reference is copied as a reference in the new object. Both of these
306  // are defined PHP5 behaviors, as inconvenient as it is for us when old
307  // hooks from PHP4 days are passing fields by reference.
308  foreach ( [ 'mStripState', 'mVarCache' ] as $k ) {
309  // Make a non-reference copy of the field, then rebind the field to
310  // reference the new copy.
311  $tmp = $this->$k;
312  $this->$k =& $tmp;
313  unset( $tmp );
314  }
315 
316  Hooks::run( 'ParserCloned', [ $this ] );
317  }
318 
322  public function firstCallInit() {
323  if ( !$this->mFirstCall ) {
324  return;
325  }
326  $this->mFirstCall = false;
327 
329  CoreTagHooks::register( $this );
330  $this->initialiseVariables();
331 
332  Hooks::run( 'ParserFirstCallInit', [ &$this ] );
333  }
334 
340  public function clearState() {
341  if ( $this->mFirstCall ) {
342  $this->firstCallInit();
343  }
344  $this->mOutput = new ParserOutput;
345  $this->mOptions->registerWatcher( [ $this->mOutput, 'recordOption' ] );
346  $this->mAutonumber = 0;
347  $this->mIncludeCount = [];
348  $this->mLinkHolders = new LinkHolderArray( $this );
349  $this->mLinkID = 0;
350  $this->mRevisionObject = $this->mRevisionTimestamp =
351  $this->mRevisionId = $this->mRevisionUser = $this->mRevisionSize = null;
352  $this->mVarCache = [];
353  $this->mUser = null;
354  $this->mLangLinkLanguages = [];
355  $this->currentRevisionCache = null;
356 
357  $this->mStripState = new StripState;
358 
359  # Clear these on every parse, bug 4549
360  $this->mTplRedirCache = $this->mTplDomCache = [];
361 
362  $this->mShowToc = true;
363  $this->mForceTocPosition = false;
364  $this->mIncludeSizes = [
365  'post-expand' => 0,
366  'arg' => 0,
367  ];
368  $this->mPPNodeCount = 0;
369  $this->mGeneratedPPNodeCount = 0;
370  $this->mHighestExpansionDepth = 0;
371  $this->mDefaultSort = false;
372  $this->mHeadings = [];
373  $this->mDoubleUnderscores = [];
374  $this->mExpensiveFunctionCount = 0;
375 
376  # Fix cloning
377  if ( isset( $this->mPreprocessor ) && $this->mPreprocessor->parser !== $this ) {
378  $this->mPreprocessor = null;
379  }
380 
381  $this->mProfiler = new SectionProfiler();
382 
383  Hooks::run( 'ParserClearState', [ &$this ] );
384  }
385 
398  public function parse( $text, Title $title, ParserOptions $options,
399  $linestart = true, $clearState = true, $revid = null
400  ) {
407 
408  if ( $clearState ) {
409  // We use U+007F DELETE to construct strip markers, so we have to make
410  // sure that this character does not occur in the input text.
411  $text = strtr( $text, "\x7f", "?" );
412  $magicScopeVariable = $this->lock();
413  }
414 
415  $this->startParse( $title, $options, self::OT_HTML, $clearState );
416 
417  $this->currentRevisionCache = null;
418  $this->mInputSize = strlen( $text );
419  if ( $this->mOptions->getEnableLimitReport() ) {
420  $this->mOutput->resetParseStartTime();
421  }
422 
423  $oldRevisionId = $this->mRevisionId;
424  $oldRevisionObject = $this->mRevisionObject;
425  $oldRevisionTimestamp = $this->mRevisionTimestamp;
426  $oldRevisionUser = $this->mRevisionUser;
427  $oldRevisionSize = $this->mRevisionSize;
428  if ( $revid !== null ) {
429  $this->mRevisionId = $revid;
430  $this->mRevisionObject = null;
431  $this->mRevisionTimestamp = null;
432  $this->mRevisionUser = null;
433  $this->mRevisionSize = null;
434  }
435 
436  Hooks::run( 'ParserBeforeStrip', [ &$this, &$text, &$this->mStripState ] );
437  # No more strip!
438  Hooks::run( 'ParserAfterStrip', [ &$this, &$text, &$this->mStripState ] );
439  $text = $this->internalParse( $text );
440  Hooks::run( 'ParserAfterParse', [ &$this, &$text, &$this->mStripState ] );
441 
442  $text = $this->internalParseHalfParsed( $text, true, $linestart );
443 
451  if ( !( $options->getDisableTitleConversion()
452  || isset( $this->mDoubleUnderscores['nocontentconvert'] )
453  || isset( $this->mDoubleUnderscores['notitleconvert'] )
454  || $this->mOutput->getDisplayTitle() !== false )
455  ) {
456  $convruletitle = $this->getConverterLanguage()->getConvRuleTitle();
457  if ( $convruletitle ) {
458  $this->mOutput->setTitleText( $convruletitle );
459  } else {
460  $titleText = $this->getConverterLanguage()->convertTitle( $title );
461  $this->mOutput->setTitleText( $titleText );
462  }
463  }
464 
465  if ( $this->mExpensiveFunctionCount > $this->mOptions->getExpensiveParserFunctionLimit() ) {
466  $this->limitationWarn( 'expensive-parserfunction',
467  $this->mExpensiveFunctionCount,
468  $this->mOptions->getExpensiveParserFunctionLimit()
469  );
470  }
471 
472  # Information on include size limits, for the benefit of users who try to skirt them
473  if ( $this->mOptions->getEnableLimitReport() ) {
474  $max = $this->mOptions->getMaxIncludeSize();
475 
476  $cpuTime = $this->mOutput->getTimeSinceStart( 'cpu' );
477  if ( $cpuTime !== null ) {
478  $this->mOutput->setLimitReportData( 'limitreport-cputime',
479  sprintf( "%.3f", $cpuTime )
480  );
481  }
482 
483  $wallTime = $this->mOutput->getTimeSinceStart( 'wall' );
484  $this->mOutput->setLimitReportData( 'limitreport-walltime',
485  sprintf( "%.3f", $wallTime )
486  );
487 
488  $this->mOutput->setLimitReportData( 'limitreport-ppvisitednodes',
489  [ $this->mPPNodeCount, $this->mOptions->getMaxPPNodeCount() ]
490  );
491  $this->mOutput->setLimitReportData( 'limitreport-ppgeneratednodes',
492  [ $this->mGeneratedPPNodeCount, $this->mOptions->getMaxGeneratedPPNodeCount() ]
493  );
494  $this->mOutput->setLimitReportData( 'limitreport-postexpandincludesize',
495  [ $this->mIncludeSizes['post-expand'], $max ]
496  );
497  $this->mOutput->setLimitReportData( 'limitreport-templateargumentsize',
498  [ $this->mIncludeSizes['arg'], $max ]
499  );
500  $this->mOutput->setLimitReportData( 'limitreport-expansiondepth',
501  [ $this->mHighestExpansionDepth, $this->mOptions->getMaxPPExpandDepth() ]
502  );
503  $this->mOutput->setLimitReportData( 'limitreport-expensivefunctioncount',
504  [ $this->mExpensiveFunctionCount, $this->mOptions->getExpensiveParserFunctionLimit() ]
505  );
506  Hooks::run( 'ParserLimitReportPrepare', [ $this, $this->mOutput ] );
507 
508  $limitReport = "NewPP limit report\n";
509  if ( $wgShowHostnames ) {
510  $limitReport .= 'Parsed by ' . wfHostname() . "\n";
511  }
512  $limitReport .= 'Cached time: ' . $this->mOutput->getCacheTime() . "\n";
513  $limitReport .= 'Cache expiry: ' . $this->mOutput->getCacheExpiry() . "\n";
514  $limitReport .= 'Dynamic content: ' .
515  ( $this->mOutput->hasDynamicContent() ? 'true' : 'false' ) .
516  "\n";
517 
518  foreach ( $this->mOutput->getLimitReportData() as $key => $value ) {
519  if ( Hooks::run( 'ParserLimitReportFormat',
520  [ $key, &$value, &$limitReport, false, false ]
521  ) ) {
522  $keyMsg = wfMessage( $key )->inLanguage( 'en' )->useDatabase( false );
523  $valueMsg = wfMessage( [ "$key-value-text", "$key-value" ] )
524  ->inLanguage( 'en' )->useDatabase( false );
525  if ( !$valueMsg->exists() ) {
526  $valueMsg = new RawMessage( '$1' );
527  }
528  if ( !$keyMsg->isDisabled() && !$valueMsg->isDisabled() ) {
529  $valueMsg->params( $value );
530  $limitReport .= "{$keyMsg->text()}: {$valueMsg->text()}\n";
531  }
532  }
533  }
534  // Since we're not really outputting HTML, decode the entities and
535  // then re-encode the things that need hiding inside HTML comments.
536  $limitReport = htmlspecialchars_decode( $limitReport );
537  Hooks::run( 'ParserLimitReport', [ $this, &$limitReport ] );
538 
539  // Sanitize for comment. Note '‐' in the replacement is U+2010,
540  // which looks much like the problematic '-'.
541  $limitReport = str_replace( [ '-', '&' ], [ '‐', '&amp;' ], $limitReport );
542  $text .= "\n<!-- \n$limitReport-->\n";
543 
544  // Add on template profiling data
545  $dataByFunc = $this->mProfiler->getFunctionStats();
546  uasort( $dataByFunc, function ( $a, $b ) {
547  return $a['real'] < $b['real']; // descending order
548  } );
549  $profileReport = "Transclusion expansion time report (%,ms,calls,template)\n";
550  foreach ( array_slice( $dataByFunc, 0, 10 ) as $item ) {
551  $profileReport .= sprintf( "%6.2f%% %8.3f %6d - %s\n",
552  $item['%real'], $item['real'], $item['calls'],
553  htmlspecialchars( $item['name'] ) );
554  }
555  $text .= "\n<!-- \n$profileReport-->\n";
556 
557  if ( $this->mGeneratedPPNodeCount > $this->mOptions->getMaxGeneratedPPNodeCount() / 10 ) {
558  wfDebugLog( 'generated-pp-node-count', $this->mGeneratedPPNodeCount . ' ' .
559  $this->mTitle->getPrefixedDBkey() );
560  }
561  }
562  $this->mOutput->setText( $text );
563 
564  $this->mRevisionId = $oldRevisionId;
565  $this->mRevisionObject = $oldRevisionObject;
566  $this->mRevisionTimestamp = $oldRevisionTimestamp;
567  $this->mRevisionUser = $oldRevisionUser;
568  $this->mRevisionSize = $oldRevisionSize;
569  $this->mInputSize = false;
570  $this->currentRevisionCache = null;
571 
572  return $this->mOutput;
573  }
574 
597  public function recursiveTagParse( $text, $frame = false ) {
598  Hooks::run( 'ParserBeforeStrip', [ &$this, &$text, &$this->mStripState ] );
599  Hooks::run( 'ParserAfterStrip', [ &$this, &$text, &$this->mStripState ] );
600  $text = $this->internalParse( $text, false, $frame );
601  return $text;
602  }
603 
621  public function recursiveTagParseFully( $text, $frame = false ) {
622  $text = $this->recursiveTagParse( $text, $frame );
623  $text = $this->internalParseHalfParsed( $text, false );
624  return $text;
625  }
626 
638  public function preprocess( $text, Title $title = null,
639  ParserOptions $options, $revid = null, $frame = false
640  ) {
641  $magicScopeVariable = $this->lock();
642  $this->startParse( $title, $options, self::OT_PREPROCESS, true );
643  if ( $revid !== null ) {
644  $this->mRevisionId = $revid;
645  }
646  Hooks::run( 'ParserBeforeStrip', [ &$this, &$text, &$this->mStripState ] );
647  Hooks::run( 'ParserAfterStrip', [ &$this, &$text, &$this->mStripState ] );
648  $text = $this->replaceVariables( $text, $frame );
649  $text = $this->mStripState->unstripBoth( $text );
650  return $text;
651  }
652 
662  public function recursivePreprocess( $text, $frame = false ) {
663  $text = $this->replaceVariables( $text, $frame );
664  $text = $this->mStripState->unstripBoth( $text );
665  return $text;
666  }
667 
681  public function getPreloadText( $text, Title $title, ParserOptions $options, $params = [] ) {
682  $msg = new RawMessage( $text );
683  $text = $msg->params( $params )->plain();
684 
685  # Parser (re)initialisation
686  $magicScopeVariable = $this->lock();
687  $this->startParse( $title, $options, self::OT_PLAIN, true );
688 
690  $dom = $this->preprocessToDom( $text, self::PTD_FOR_INCLUSION );
691  $text = $this->getPreprocessor()->newFrame()->expand( $dom, $flags );
692  $text = $this->mStripState->unstripBoth( $text );
693  return $text;
694  }
695 
702  public static function getRandomString() {
703  wfDeprecated( __METHOD__, '1.26' );
704  return wfRandomString( 16 );
705  }
706 
713  public function setUser( $user ) {
714  $this->mUser = $user;
715  }
716 
723  public function uniqPrefix() {
724  wfDeprecated( __METHOD__, '1.26' );
725  return self::MARKER_PREFIX;
726  }
727 
733  public function setTitle( $t ) {
734  if ( !$t ) {
735  $t = Title::newFromText( 'NO TITLE' );
736  }
737 
738  if ( $t->hasFragment() ) {
739  # Strip the fragment to avoid various odd effects
740  $this->mTitle = $t->createFragmentTarget( '' );
741  } else {
742  $this->mTitle = $t;
743  }
744  }
745 
751  public function getTitle() {
752  return $this->mTitle;
753  }
754 
761  public function Title( $x = null ) {
762  return wfSetVar( $this->mTitle, $x );
763  }
764 
770  public function setOutputType( $ot ) {
771  $this->mOutputType = $ot;
772  # Shortcut alias
773  $this->ot = [
774  'html' => $ot == self::OT_HTML,
775  'wiki' => $ot == self::OT_WIKI,
776  'pre' => $ot == self::OT_PREPROCESS,
777  'plain' => $ot == self::OT_PLAIN,
778  ];
779  }
780 
787  public function OutputType( $x = null ) {
788  return wfSetVar( $this->mOutputType, $x );
789  }
790 
796  public function getOutput() {
797  return $this->mOutput;
798  }
799 
805  public function getOptions() {
806  return $this->mOptions;
807  }
808 
815  public function Options( $x = null ) {
816  return wfSetVar( $this->mOptions, $x );
817  }
818 
822  public function nextLinkID() {
823  return $this->mLinkID++;
824  }
825 
829  public function setLinkID( $id ) {
830  $this->mLinkID = $id;
831  }
832 
837  public function getFunctionLang() {
838  return $this->getTargetLanguage();
839  }
840 
850  public function getTargetLanguage() {
851  $target = $this->mOptions->getTargetLanguage();
852 
853  if ( $target !== null ) {
854  return $target;
855  } elseif ( $this->mOptions->getInterfaceMessage() ) {
856  return $this->mOptions->getUserLangObj();
857  } elseif ( is_null( $this->mTitle ) ) {
858  throw new MWException( __METHOD__ . ': $this->mTitle is null' );
859  }
860 
861  return $this->mTitle->getPageLanguage();
862  }
863 
868  public function getConverterLanguage() {
869  return $this->getTargetLanguage();
870  }
871 
878  public function getUser() {
879  if ( !is_null( $this->mUser ) ) {
880  return $this->mUser;
881  }
882  return $this->mOptions->getUser();
883  }
884 
890  public function getPreprocessor() {
891  if ( !isset( $this->mPreprocessor ) ) {
892  $class = $this->mPreprocessorClass;
893  $this->mPreprocessor = new $class( $this );
894  }
895  return $this->mPreprocessor;
896  }
897 
904  public function getLinkRenderer() {
905  if ( !$this->mLinkRenderer ) {
906  $this->mLinkRenderer = MediaWikiServices::getInstance()
907  ->getLinkRendererFactory()->create();
908  $this->mLinkRenderer->setStubThreshold(
909  $this->getOptions()->getStubThreshold()
910  );
911  }
912 
913  return $this->mLinkRenderer;
914  }
915 
937  public static function extractTagsAndParams( $elements, $text, &$matches, $uniq_prefix = null ) {
938  if ( $uniq_prefix !== null ) {
939  wfDeprecated( __METHOD__ . ' called with $prefix argument', '1.26' );
940  }
941  static $n = 1;
942  $stripped = '';
943  $matches = [];
944 
945  $taglist = implode( '|', $elements );
946  $start = "/<($taglist)(\\s+[^>]*?|\\s*?)(\/?" . ">)|<(!--)/i";
947 
948  while ( $text != '' ) {
949  $p = preg_split( $start, $text, 2, PREG_SPLIT_DELIM_CAPTURE );
950  $stripped .= $p[0];
951  if ( count( $p ) < 5 ) {
952  break;
953  }
954  if ( count( $p ) > 5 ) {
955  # comment
956  $element = $p[4];
957  $attributes = '';
958  $close = '';
959  $inside = $p[5];
960  } else {
961  # tag
962  $element = $p[1];
963  $attributes = $p[2];
964  $close = $p[3];
965  $inside = $p[4];
966  }
967 
968  $marker = self::MARKER_PREFIX . "-$element-" . sprintf( '%08X', $n++ ) . self::MARKER_SUFFIX;
969  $stripped .= $marker;
970 
971  if ( $close === '/>' ) {
972  # Empty element tag, <tag />
973  $content = null;
974  $text = $inside;
975  $tail = null;
976  } else {
977  if ( $element === '!--' ) {
978  $end = '/(-->)/';
979  } else {
980  $end = "/(<\\/$element\\s*>)/i";
981  }
982  $q = preg_split( $end, $inside, 2, PREG_SPLIT_DELIM_CAPTURE );
983  $content = $q[0];
984  if ( count( $q ) < 3 ) {
985  # No end tag -- let it run out to the end of the text.
986  $tail = '';
987  $text = '';
988  } else {
989  $tail = $q[1];
990  $text = $q[2];
991  }
992  }
993 
994  $matches[$marker] = [ $element,
995  $content,
996  Sanitizer::decodeTagAttributes( $attributes ),
997  "<$element$attributes$close$content$tail" ];
998  }
999  return $stripped;
1000  }
1001 
1007  public function getStripList() {
1008  return $this->mStripList;
1009  }
1010 
1020  public function insertStripItem( $text ) {
1021  $marker = self::MARKER_PREFIX . "-item-{$this->mMarkerIndex}-" . self::MARKER_SUFFIX;
1022  $this->mMarkerIndex++;
1023  $this->mStripState->addGeneral( $marker, $text );
1024  return $marker;
1025  }
1026 
1034  public function doTableStuff( $text ) {
1035 
1036  $lines = StringUtils::explode( "\n", $text );
1037  $out = '';
1038  $td_history = []; # Is currently a td tag open?
1039  $last_tag_history = []; # Save history of last lag activated (td, th or caption)
1040  $tr_history = []; # Is currently a tr tag open?
1041  $tr_attributes = []; # history of tr attributes
1042  $has_opened_tr = []; # Did this table open a <tr> element?
1043  $indent_level = 0; # indent level of the table
1044 
1045  foreach ( $lines as $outLine ) {
1046  $line = trim( $outLine );
1047 
1048  if ( $line === '' ) { # empty line, go to next line
1049  $out .= $outLine . "\n";
1050  continue;
1051  }
1052 
1053  $first_character = $line[0];
1054  $first_two = substr( $line, 0, 2 );
1055  $matches = [];
1056 
1057  if ( preg_match( '/^(:*)\s*\{\|(.*)$/', $line, $matches ) ) {
1058  # First check if we are starting a new table
1059  $indent_level = strlen( $matches[1] );
1060 
1061  $attributes = $this->mStripState->unstripBoth( $matches[2] );
1062  $attributes = Sanitizer::fixTagAttributes( $attributes, 'table' );
1063 
1064  $outLine = str_repeat( '<dl><dd>', $indent_level ) . "<table{$attributes}>";
1065  array_push( $td_history, false );
1066  array_push( $last_tag_history, '' );
1067  array_push( $tr_history, false );
1068  array_push( $tr_attributes, '' );
1069  array_push( $has_opened_tr, false );
1070  } elseif ( count( $td_history ) == 0 ) {
1071  # Don't do any of the following
1072  $out .= $outLine . "\n";
1073  continue;
1074  } elseif ( $first_two === '|}' ) {
1075  # We are ending a table
1076  $line = '</table>' . substr( $line, 2 );
1077  $last_tag = array_pop( $last_tag_history );
1078 
1079  if ( !array_pop( $has_opened_tr ) ) {
1080  $line = "<tr><td></td></tr>{$line}";
1081  }
1082 
1083  if ( array_pop( $tr_history ) ) {
1084  $line = "</tr>{$line}";
1085  }
1086 
1087  if ( array_pop( $td_history ) ) {
1088  $line = "</{$last_tag}>{$line}";
1089  }
1090  array_pop( $tr_attributes );
1091  $outLine = $line . str_repeat( '</dd></dl>', $indent_level );
1092  } elseif ( $first_two === '|-' ) {
1093  # Now we have a table row
1094  $line = preg_replace( '#^\|-+#', '', $line );
1095 
1096  # Whats after the tag is now only attributes
1097  $attributes = $this->mStripState->unstripBoth( $line );
1098  $attributes = Sanitizer::fixTagAttributes( $attributes, 'tr' );
1099  array_pop( $tr_attributes );
1100  array_push( $tr_attributes, $attributes );
1101 
1102  $line = '';
1103  $last_tag = array_pop( $last_tag_history );
1104  array_pop( $has_opened_tr );
1105  array_push( $has_opened_tr, true );
1106 
1107  if ( array_pop( $tr_history ) ) {
1108  $line = '</tr>';
1109  }
1110 
1111  if ( array_pop( $td_history ) ) {
1112  $line = "</{$last_tag}>{$line}";
1113  }
1114 
1115  $outLine = $line;
1116  array_push( $tr_history, false );
1117  array_push( $td_history, false );
1118  array_push( $last_tag_history, '' );
1119  } elseif ( $first_character === '|'
1120  || $first_character === '!'
1121  || $first_two === '|+'
1122  ) {
1123  # This might be cell elements, td, th or captions
1124  if ( $first_two === '|+' ) {
1125  $first_character = '+';
1126  $line = substr( $line, 2 );
1127  } else {
1128  $line = substr( $line, 1 );
1129  }
1130 
1131  // Implies both are valid for table headings.
1132  if ( $first_character === '!' ) {
1133  $line = StringUtils::replaceMarkup( '!!', '||', $line );
1134  }
1135 
1136  # Split up multiple cells on the same line.
1137  # FIXME : This can result in improper nesting of tags processed
1138  # by earlier parser steps.
1139  $cells = explode( '||', $line );
1140 
1141  $outLine = '';
1142 
1143  # Loop through each table cell
1144  foreach ( $cells as $cell ) {
1145  $previous = '';
1146  if ( $first_character !== '+' ) {
1147  $tr_after = array_pop( $tr_attributes );
1148  if ( !array_pop( $tr_history ) ) {
1149  $previous = "<tr{$tr_after}>\n";
1150  }
1151  array_push( $tr_history, true );
1152  array_push( $tr_attributes, '' );
1153  array_pop( $has_opened_tr );
1154  array_push( $has_opened_tr, true );
1155  }
1156 
1157  $last_tag = array_pop( $last_tag_history );
1158 
1159  if ( array_pop( $td_history ) ) {
1160  $previous = "</{$last_tag}>\n{$previous}";
1161  }
1162 
1163  if ( $first_character === '|' ) {
1164  $last_tag = 'td';
1165  } elseif ( $first_character === '!' ) {
1166  $last_tag = 'th';
1167  } elseif ( $first_character === '+' ) {
1168  $last_tag = 'caption';
1169  } else {
1170  $last_tag = '';
1171  }
1172 
1173  array_push( $last_tag_history, $last_tag );
1174 
1175  # A cell could contain both parameters and data
1176  $cell_data = explode( '|', $cell, 2 );
1177 
1178  # Bug 553: Note that a '|' inside an invalid link should not
1179  # be mistaken as delimiting cell parameters
1180  if ( strpos( $cell_data[0], '[[' ) !== false ) {
1181  $cell = "{$previous}<{$last_tag}>{$cell}";
1182  } elseif ( count( $cell_data ) == 1 ) {
1183  $cell = "{$previous}<{$last_tag}>{$cell_data[0]}";
1184  } else {
1185  $attributes = $this->mStripState->unstripBoth( $cell_data[0] );
1186  $attributes = Sanitizer::fixTagAttributes( $attributes, $last_tag );
1187  $cell = "{$previous}<{$last_tag}{$attributes}>{$cell_data[1]}";
1188  }
1189 
1190  $outLine .= $cell;
1191  array_push( $td_history, true );
1192  }
1193  }
1194  $out .= $outLine . "\n";
1195  }
1196 
1197  # Closing open td, tr && table
1198  while ( count( $td_history ) > 0 ) {
1199  if ( array_pop( $td_history ) ) {
1200  $out .= "</td>\n";
1201  }
1202  if ( array_pop( $tr_history ) ) {
1203  $out .= "</tr>\n";
1204  }
1205  if ( !array_pop( $has_opened_tr ) ) {
1206  $out .= "<tr><td></td></tr>\n";
1207  }
1208 
1209  $out .= "</table>\n";
1210  }
1211 
1212  # Remove trailing line-ending (b/c)
1213  if ( substr( $out, -1 ) === "\n" ) {
1214  $out = substr( $out, 0, -1 );
1215  }
1216 
1217  # special case: don't return empty table
1218  if ( $out === "<table>\n<tr><td></td></tr>\n</table>" ) {
1219  $out = '';
1220  }
1221 
1222  return $out;
1223  }
1224 
1237  public function internalParse( $text, $isMain = true, $frame = false ) {
1238 
1239  $origText = $text;
1240 
1241  # Hook to suspend the parser in this state
1242  if ( !Hooks::run( 'ParserBeforeInternalParse', [ &$this, &$text, &$this->mStripState ] ) ) {
1243  return $text;
1244  }
1245 
1246  # if $frame is provided, then use $frame for replacing any variables
1247  if ( $frame ) {
1248  # use frame depth to infer how include/noinclude tags should be handled
1249  # depth=0 means this is the top-level document; otherwise it's an included document
1250  if ( !$frame->depth ) {
1251  $flag = 0;
1252  } else {
1253  $flag = Parser::PTD_FOR_INCLUSION;
1254  }
1255  $dom = $this->preprocessToDom( $text, $flag );
1256  $text = $frame->expand( $dom );
1257  } else {
1258  # if $frame is not provided, then use old-style replaceVariables
1259  $text = $this->replaceVariables( $text );
1260  }
1261 
1262  Hooks::run( 'InternalParseBeforeSanitize', [ &$this, &$text, &$this->mStripState ] );
1263  $text = Sanitizer::removeHTMLtags(
1264  $text,
1265  [ &$this, 'attributeStripCallback' ],
1266  false,
1267  array_keys( $this->mTransparentTagHooks )
1268  );
1269  Hooks::run( 'InternalParseBeforeLinks', [ &$this, &$text, &$this->mStripState ] );
1270 
1271  # Tables need to come after variable replacement for things to work
1272  # properly; putting them before other transformations should keep
1273  # exciting things like link expansions from showing up in surprising
1274  # places.
1275  $text = $this->doTableStuff( $text );
1276 
1277  $text = preg_replace( '/(^|\n)-----*/', '\\1<hr />', $text );
1278 
1279  $text = $this->doDoubleUnderscore( $text );
1280 
1281  $text = $this->doHeadings( $text );
1282  $text = $this->replaceInternalLinks( $text );
1283  $text = $this->doAllQuotes( $text );
1284  $text = $this->replaceExternalLinks( $text );
1285 
1286  # replaceInternalLinks may sometimes leave behind
1287  # absolute URLs, which have to be masked to hide them from replaceExternalLinks
1288  $text = str_replace( self::MARKER_PREFIX . 'NOPARSE', '', $text );
1289 
1290  $text = $this->doMagicLinks( $text );
1291  $text = $this->formatHeadings( $text, $origText, $isMain );
1292 
1293  return $text;
1294  }
1295 
1305  private function internalParseHalfParsed( $text, $isMain = true, $linestart = true ) {
1306  $text = $this->mStripState->unstripGeneral( $text );
1307 
1308  if ( $isMain ) {
1309  Hooks::run( 'ParserAfterUnstrip', [ &$this, &$text ] );
1310  }
1311 
1312  # Clean up special characters, only run once, next-to-last before doBlockLevels
1313  $fixtags = [
1314  # french spaces, last one Guillemet-left
1315  # only if there is something before the space
1316  '/(.) (?=\\?|:|;|!|%|\\302\\273)/' => '\\1&#160;',
1317  # french spaces, Guillemet-right
1318  '/(\\302\\253) /' => '\\1&#160;',
1319  '/&#160;(!\s*important)/' => ' \\1', # Beware of CSS magic word !important, bug #11874.
1320  ];
1321  $text = preg_replace( array_keys( $fixtags ), array_values( $fixtags ), $text );
1322 
1323  $text = $this->doBlockLevels( $text, $linestart );
1324 
1325  $this->replaceLinkHolders( $text );
1326 
1334  if ( !( $this->mOptions->getDisableContentConversion()
1335  || isset( $this->mDoubleUnderscores['nocontentconvert'] ) )
1336  ) {
1337  if ( !$this->mOptions->getInterfaceMessage() ) {
1338  # The position of the convert() call should not be changed. it
1339  # assumes that the links are all replaced and the only thing left
1340  # is the <nowiki> mark.
1341  $text = $this->getConverterLanguage()->convert( $text );
1342  }
1343  }
1344 
1345  $text = $this->mStripState->unstripNoWiki( $text );
1346 
1347  if ( $isMain ) {
1348  Hooks::run( 'ParserBeforeTidy', [ &$this, &$text ] );
1349  }
1350 
1351  $text = $this->replaceTransparentTags( $text );
1352  $text = $this->mStripState->unstripGeneral( $text );
1353 
1354  $text = Sanitizer::normalizeCharReferences( $text );
1355 
1356  if ( MWTidy::isEnabled() && $this->mOptions->getTidy() ) {
1357  $text = MWTidy::tidy( $text );
1358  $this->mOutput->addModuleStyles( MWTidy::getModuleStyles() );
1359  } else {
1360  # attempt to sanitize at least some nesting problems
1361  # (bug #2702 and quite a few others)
1362  $tidyregs = [
1363  # ''Something [http://www.cool.com cool''] -->
1364  # <i>Something</i><a href="http://www.cool.com"..><i>cool></i></a>
1365  '/(<([bi])>)(<([bi])>)?([^<]*)(<\/?a[^<]*>)([^<]*)(<\/\\4>)?(<\/\\2>)/' =>
1366  '\\1\\3\\5\\8\\9\\6\\1\\3\\7\\8\\9',
1367  # fix up an anchor inside another anchor, only
1368  # at least for a single single nested link (bug 3695)
1369  '/(<a[^>]+>)([^<]*)(<a[^>]+>[^<]*)<\/a>(.*)<\/a>/' =>
1370  '\\1\\2</a>\\3</a>\\1\\4</a>',
1371  # fix div inside inline elements- doBlockLevels won't wrap a line which
1372  # contains a div, so fix it up here; replace
1373  # div with escaped text
1374  '/(<([aib]) [^>]+>)([^<]*)(<div([^>]*)>)(.*)(<\/div>)([^<]*)(<\/\\2>)/' =>
1375  '\\1\\3&lt;div\\5&gt;\\6&lt;/div&gt;\\8\\9',
1376  # remove empty italic or bold tag pairs, some
1377  # introduced by rules above
1378  '/<([bi])><\/\\1>/' => '',
1379  ];
1380 
1381  $text = preg_replace(
1382  array_keys( $tidyregs ),
1383  array_values( $tidyregs ),
1384  $text );
1385  }
1386 
1387  if ( $isMain ) {
1388  Hooks::run( 'ParserAfterTidy', [ &$this, &$text ] );
1389  }
1390 
1391  return $text;
1392  }
1393 
1405  public function doMagicLinks( $text ) {
1406  $prots = wfUrlProtocolsWithoutProtRel();
1407  $urlChar = self::EXT_LINK_URL_CLASS;
1408  $addr = self::EXT_LINK_ADDR;
1409  $space = self::SPACE_NOT_NL; # non-newline space
1410  $spdash = "(?:-|$space)"; # a dash or a non-newline space
1411  $spaces = "$space++"; # possessive match of 1 or more spaces
1412  $text = preg_replace_callback(
1413  '!(?: # Start cases
1414  (<a[ \t\r\n>].*?</a>) | # m[1]: Skip link text
1415  (<.*?>) | # m[2]: Skip stuff inside
1416  # HTML elements' . "
1417  (\b(?i:$prots)($addr$urlChar*)) | # m[3]: Free external links
1418  # m[4]: Post-protocol path
1419  \b(?:RFC|PMID) $spaces # m[5]: RFC or PMID, capture number
1420  ([0-9]+)\b |
1421  \bISBN $spaces ( # m[6]: ISBN, capture number
1422  (?: 97[89] $spdash? )? # optional 13-digit ISBN prefix
1423  (?: [0-9] $spdash? ){9} # 9 digits with opt. delimiters
1424  [0-9Xx] # check digit
1425  )\b
1426  )!xu", [ &$this, 'magicLinkCallback' ], $text );
1427  return $text;
1428  }
1429 
1435  public function magicLinkCallback( $m ) {
1436  if ( isset( $m[1] ) && $m[1] !== '' ) {
1437  # Skip anchor
1438  return $m[0];
1439  } elseif ( isset( $m[2] ) && $m[2] !== '' ) {
1440  # Skip HTML element
1441  return $m[0];
1442  } elseif ( isset( $m[3] ) && $m[3] !== '' ) {
1443  # Free external link
1444  return $this->makeFreeExternalLink( $m[0], strlen( $m[4] ) );
1445  } elseif ( isset( $m[5] ) && $m[5] !== '' ) {
1446  # RFC or PMID
1447  if ( substr( $m[0], 0, 3 ) === 'RFC' ) {
1448  $keyword = 'RFC';
1449  $urlmsg = 'rfcurl';
1450  $cssClass = 'mw-magiclink-rfc';
1451  $id = $m[5];
1452  } elseif ( substr( $m[0], 0, 4 ) === 'PMID' ) {
1453  $keyword = 'PMID';
1454  $urlmsg = 'pubmedurl';
1455  $cssClass = 'mw-magiclink-pmid';
1456  $id = $m[5];
1457  } else {
1458  throw new MWException( __METHOD__ . ': unrecognised match type "' .
1459  substr( $m[0], 0, 20 ) . '"' );
1460  }
1461  $url = wfMessage( $urlmsg, $id )->inContentLanguage()->text();
1462  return Linker::makeExternalLink( $url, "{$keyword} {$id}", true, $cssClass );
1463  } elseif ( isset( $m[6] ) && $m[6] !== '' ) {
1464  # ISBN
1465  $isbn = $m[6];
1466  $space = self::SPACE_NOT_NL; # non-newline space
1467  $isbn = preg_replace( "/$space/", ' ', $isbn );
1468  $num = strtr( $isbn, [
1469  '-' => '',
1470  ' ' => '',
1471  'x' => 'X',
1472  ] );
1473  $titleObj = SpecialPage::getTitleFor( 'Booksources', $num );
1474  return '<a href="' .
1475  htmlspecialchars( $titleObj->getLocalURL() ) .
1476  "\" class=\"internal mw-magiclink-isbn\">ISBN $isbn</a>";
1477  } else {
1478  return $m[0];
1479  }
1480  }
1481 
1491  public function makeFreeExternalLink( $url, $numPostProto ) {
1492  $trail = '';
1493 
1494  # The characters '<' and '>' (which were escaped by
1495  # removeHTMLtags()) should not be included in
1496  # URLs, per RFC 2396.
1497  # Make &nbsp; terminate a URL as well (bug T84937)
1498  $m2 = [];
1499  if ( preg_match(
1500  '/&(lt|gt|nbsp|#x0*(3[CcEe]|[Aa]0)|#0*(60|62|160));/',
1501  $url,
1502  $m2,
1503  PREG_OFFSET_CAPTURE
1504  ) ) {
1505  $trail = substr( $url, $m2[0][1] ) . $trail;
1506  $url = substr( $url, 0, $m2[0][1] );
1507  }
1508 
1509  # Move trailing punctuation to $trail
1510  $sep = ',;\.:!?';
1511  # If there is no left bracket, then consider right brackets fair game too
1512  if ( strpos( $url, '(' ) === false ) {
1513  $sep .= ')';
1514  }
1515 
1516  $urlRev = strrev( $url );
1517  $numSepChars = strspn( $urlRev, $sep );
1518  # Don't break a trailing HTML entity by moving the ; into $trail
1519  # This is in hot code, so use substr_compare to avoid having to
1520  # create a new string object for the comparison
1521  if ( $numSepChars && substr_compare( $url, ";", -$numSepChars, 1 ) === 0 ) {
1522  # more optimization: instead of running preg_match with a $
1523  # anchor, which can be slow, do the match on the reversed
1524  # string starting at the desired offset.
1525  # un-reversed regexp is: /&([a-z]+|#x[\da-f]+|#\d+)$/i
1526  if ( preg_match( '/\G([a-z]+|[\da-f]+x#|\d+#)&/i', $urlRev, $m2, 0, $numSepChars ) ) {
1527  $numSepChars--;
1528  }
1529  }
1530  if ( $numSepChars ) {
1531  $trail = substr( $url, -$numSepChars ) . $trail;
1532  $url = substr( $url, 0, -$numSepChars );
1533  }
1534 
1535  # Verify that we still have a real URL after trail removal, and
1536  # not just lone protocol
1537  if ( strlen( $trail ) >= $numPostProto ) {
1538  return $url . $trail;
1539  }
1540 
1541  $url = Sanitizer::cleanUrl( $url );
1542 
1543  # Is this an external image?
1544  $text = $this->maybeMakeExternalImage( $url );
1545  if ( $text === false ) {
1546  # Not an image, make a link
1547  $text = Linker::makeExternalLink( $url,
1548  $this->getConverterLanguage()->markNoConversion( $url, true ),
1549  true, 'free',
1550  $this->getExternalLinkAttribs( $url ) );
1551  # Register it in the output object...
1552  # Replace unnecessary URL escape codes with their equivalent characters
1553  $pasteurized = self::normalizeLinkUrl( $url );
1554  $this->mOutput->addExternalLink( $pasteurized );
1555  }
1556  return $text . $trail;
1557  }
1558 
1568  public function doHeadings( $text ) {
1569  for ( $i = 6; $i >= 1; --$i ) {
1570  $h = str_repeat( '=', $i );
1571  $text = preg_replace( "/^$h(.+)$h\\s*$/m", "<h$i>\\1</h$i>", $text );
1572  }
1573  return $text;
1574  }
1575 
1584  public function doAllQuotes( $text ) {
1585  $outtext = '';
1586  $lines = StringUtils::explode( "\n", $text );
1587  foreach ( $lines as $line ) {
1588  $outtext .= $this->doQuotes( $line ) . "\n";
1589  }
1590  $outtext = substr( $outtext, 0, -1 );
1591  return $outtext;
1592  }
1593 
1601  public function doQuotes( $text ) {
1602  $arr = preg_split( "/(''+)/", $text, -1, PREG_SPLIT_DELIM_CAPTURE );
1603  $countarr = count( $arr );
1604  if ( $countarr == 1 ) {
1605  return $text;
1606  }
1607 
1608  // First, do some preliminary work. This may shift some apostrophes from
1609  // being mark-up to being text. It also counts the number of occurrences
1610  // of bold and italics mark-ups.
1611  $numbold = 0;
1612  $numitalics = 0;
1613  for ( $i = 1; $i < $countarr; $i += 2 ) {
1614  $thislen = strlen( $arr[$i] );
1615  // If there are ever four apostrophes, assume the first is supposed to
1616  // be text, and the remaining three constitute mark-up for bold text.
1617  // (bug 13227: ''''foo'''' turns into ' ''' foo ' ''')
1618  if ( $thislen == 4 ) {
1619  $arr[$i - 1] .= "'";
1620  $arr[$i] = "'''";
1621  $thislen = 3;
1622  } elseif ( $thislen > 5 ) {
1623  // If there are more than 5 apostrophes in a row, assume they're all
1624  // text except for the last 5.
1625  // (bug 13227: ''''''foo'''''' turns into ' ''''' foo ' ''''')
1626  $arr[$i - 1] .= str_repeat( "'", $thislen - 5 );
1627  $arr[$i] = "'''''";
1628  $thislen = 5;
1629  }
1630  // Count the number of occurrences of bold and italics mark-ups.
1631  if ( $thislen == 2 ) {
1632  $numitalics++;
1633  } elseif ( $thislen == 3 ) {
1634  $numbold++;
1635  } elseif ( $thislen == 5 ) {
1636  $numitalics++;
1637  $numbold++;
1638  }
1639  }
1640 
1641  // If there is an odd number of both bold and italics, it is likely
1642  // that one of the bold ones was meant to be an apostrophe followed
1643  // by italics. Which one we cannot know for certain, but it is more
1644  // likely to be one that has a single-letter word before it.
1645  if ( ( $numbold % 2 == 1 ) && ( $numitalics % 2 == 1 ) ) {
1646  $firstsingleletterword = -1;
1647  $firstmultiletterword = -1;
1648  $firstspace = -1;
1649  for ( $i = 1; $i < $countarr; $i += 2 ) {
1650  if ( strlen( $arr[$i] ) == 3 ) {
1651  $x1 = substr( $arr[$i - 1], -1 );
1652  $x2 = substr( $arr[$i - 1], -2, 1 );
1653  if ( $x1 === ' ' ) {
1654  if ( $firstspace == -1 ) {
1655  $firstspace = $i;
1656  }
1657  } elseif ( $x2 === ' ' ) {
1658  $firstsingleletterword = $i;
1659  // if $firstsingleletterword is set, we don't
1660  // look at the other options, so we can bail early.
1661  break;
1662  } else {
1663  if ( $firstmultiletterword == -1 ) {
1664  $firstmultiletterword = $i;
1665  }
1666  }
1667  }
1668  }
1669 
1670  // If there is a single-letter word, use it!
1671  if ( $firstsingleletterword > -1 ) {
1672  $arr[$firstsingleletterword] = "''";
1673  $arr[$firstsingleletterword - 1] .= "'";
1674  } elseif ( $firstmultiletterword > -1 ) {
1675  // If not, but there's a multi-letter word, use that one.
1676  $arr[$firstmultiletterword] = "''";
1677  $arr[$firstmultiletterword - 1] .= "'";
1678  } elseif ( $firstspace > -1 ) {
1679  // ... otherwise use the first one that has neither.
1680  // (notice that it is possible for all three to be -1 if, for example,
1681  // there is only one pentuple-apostrophe in the line)
1682  $arr[$firstspace] = "''";
1683  $arr[$firstspace - 1] .= "'";
1684  }
1685  }
1686 
1687  // Now let's actually convert our apostrophic mush to HTML!
1688  $output = '';
1689  $buffer = '';
1690  $state = '';
1691  $i = 0;
1692  foreach ( $arr as $r ) {
1693  if ( ( $i % 2 ) == 0 ) {
1694  if ( $state === 'both' ) {
1695  $buffer .= $r;
1696  } else {
1697  $output .= $r;
1698  }
1699  } else {
1700  $thislen = strlen( $r );
1701  if ( $thislen == 2 ) {
1702  if ( $state === 'i' ) {
1703  $output .= '</i>';
1704  $state = '';
1705  } elseif ( $state === 'bi' ) {
1706  $output .= '</i>';
1707  $state = 'b';
1708  } elseif ( $state === 'ib' ) {
1709  $output .= '</b></i><b>';
1710  $state = 'b';
1711  } elseif ( $state === 'both' ) {
1712  $output .= '<b><i>' . $buffer . '</i>';
1713  $state = 'b';
1714  } else { // $state can be 'b' or ''
1715  $output .= '<i>';
1716  $state .= 'i';
1717  }
1718  } elseif ( $thislen == 3 ) {
1719  if ( $state === 'b' ) {
1720  $output .= '</b>';
1721  $state = '';
1722  } elseif ( $state === 'bi' ) {
1723  $output .= '</i></b><i>';
1724  $state = 'i';
1725  } elseif ( $state === 'ib' ) {
1726  $output .= '</b>';
1727  $state = 'i';
1728  } elseif ( $state === 'both' ) {
1729  $output .= '<i><b>' . $buffer . '</b>';
1730  $state = 'i';
1731  } else { // $state can be 'i' or ''
1732  $output .= '<b>';
1733  $state .= 'b';
1734  }
1735  } elseif ( $thislen == 5 ) {
1736  if ( $state === 'b' ) {
1737  $output .= '</b><i>';
1738  $state = 'i';
1739  } elseif ( $state === 'i' ) {
1740  $output .= '</i><b>';
1741  $state = 'b';
1742  } elseif ( $state === 'bi' ) {
1743  $output .= '</i></b>';
1744  $state = '';
1745  } elseif ( $state === 'ib' ) {
1746  $output .= '</b></i>';
1747  $state = '';
1748  } elseif ( $state === 'both' ) {
1749  $output .= '<i><b>' . $buffer . '</b></i>';
1750  $state = '';
1751  } else { // ($state == '')
1752  $buffer = '';
1753  $state = 'both';
1754  }
1755  }
1756  }
1757  $i++;
1758  }
1759  // Now close all remaining tags. Notice that the order is important.
1760  if ( $state === 'b' || $state === 'ib' ) {
1761  $output .= '</b>';
1762  }
1763  if ( $state === 'i' || $state === 'bi' || $state === 'ib' ) {
1764  $output .= '</i>';
1765  }
1766  if ( $state === 'bi' ) {
1767  $output .= '</b>';
1768  }
1769  // There might be lonely ''''', so make sure we have a buffer
1770  if ( $state === 'both' && $buffer ) {
1771  $output .= '<b><i>' . $buffer . '</i></b>';
1772  }
1773  return $output;
1774  }
1775 
1789  public function replaceExternalLinks( $text ) {
1790 
1791  $bits = preg_split( $this->mExtLinkBracketedRegex, $text, -1, PREG_SPLIT_DELIM_CAPTURE );
1792  if ( $bits === false ) {
1793  throw new MWException( "PCRE needs to be compiled with "
1794  . "--enable-unicode-properties in order for MediaWiki to function" );
1795  }
1796  $s = array_shift( $bits );
1797 
1798  $i = 0;
1799  while ( $i < count( $bits ) ) {
1800  $url = $bits[$i++];
1801  $i++; // protocol
1802  $text = $bits[$i++];
1803  $trail = $bits[$i++];
1804 
1805  # The characters '<' and '>' (which were escaped by
1806  # removeHTMLtags()) should not be included in
1807  # URLs, per RFC 2396.
1808  $m2 = [];
1809  if ( preg_match( '/&(lt|gt);/', $url, $m2, PREG_OFFSET_CAPTURE ) ) {
1810  $text = substr( $url, $m2[0][1] ) . ' ' . $text;
1811  $url = substr( $url, 0, $m2[0][1] );
1812  }
1813 
1814  # If the link text is an image URL, replace it with an <img> tag
1815  # This happened by accident in the original parser, but some people used it extensively
1816  $img = $this->maybeMakeExternalImage( $text );
1817  if ( $img !== false ) {
1818  $text = $img;
1819  }
1820 
1821  $dtrail = '';
1822 
1823  # Set linktype for CSS - if URL==text, link is essentially free
1824  $linktype = ( $text === $url ) ? 'free' : 'text';
1825 
1826  # No link text, e.g. [http://domain.tld/some.link]
1827  if ( $text == '' ) {
1828  # Autonumber
1829  $langObj = $this->getTargetLanguage();
1830  $text = '[' . $langObj->formatNum( ++$this->mAutonumber ) . ']';
1831  $linktype = 'autonumber';
1832  } else {
1833  # Have link text, e.g. [http://domain.tld/some.link text]s
1834  # Check for trail
1835  list( $dtrail, $trail ) = Linker::splitTrail( $trail );
1836  }
1837 
1838  $text = $this->getConverterLanguage()->markNoConversion( $text );
1839 
1840  $url = Sanitizer::cleanUrl( $url );
1841 
1842  # Use the encoded URL
1843  # This means that users can paste URLs directly into the text
1844  # Funny characters like ö aren't valid in URLs anyway
1845  # This was changed in August 2004
1846  $s .= Linker::makeExternalLink( $url, $text, false, $linktype,
1847  $this->getExternalLinkAttribs( $url ) ) . $dtrail . $trail;
1848 
1849  # Register link in the output object.
1850  # Replace unnecessary URL escape codes with the referenced character
1851  # This prevents spammers from hiding links from the filters
1852  $pasteurized = self::normalizeLinkUrl( $url );
1853  $this->mOutput->addExternalLink( $pasteurized );
1854  }
1855 
1856  return $s;
1857  }
1858 
1868  public static function getExternalLinkRel( $url = false, $title = null ) {
1870  $ns = $title ? $title->getNamespace() : false;
1871  if ( $wgNoFollowLinks && !in_array( $ns, $wgNoFollowNsExceptions )
1872  && !wfMatchesDomainList( $url, $wgNoFollowDomainExceptions )
1873  ) {
1874  return 'nofollow';
1875  }
1876  return null;
1877  }
1878 
1889  public function getExternalLinkAttribs( $url = false ) {
1890  $attribs = [];
1891  $rel = self::getExternalLinkRel( $url, $this->mTitle );
1892 
1893  $target = $this->mOptions->getExternalLinkTarget();
1894  if ( $target ) {
1895  $attribs['target'] = $target;
1896  if ( !in_array( $target, [ '_self', '_parent', '_top' ] ) ) {
1897  // T133507. New windows can navigate parent cross-origin.
1898  // Including noreferrer due to lacking browser
1899  // support of noopener. Eventually noreferrer should be removed.
1900  if ( $rel !== '' ) {
1901  $rel .= ' ';
1902  }
1903  $rel .= 'noreferrer noopener';
1904  }
1905  }
1906  $attribs['rel'] = $rel;
1907  return $attribs;
1908  }
1909 
1917  public static function replaceUnusualEscapes( $url ) {
1918  wfDeprecated( __METHOD__, '1.24' );
1919  return self::normalizeLinkUrl( $url );
1920  }
1921 
1931  public static function normalizeLinkUrl( $url ) {
1932  # First, make sure unsafe characters are encoded
1933  $url = preg_replace_callback( '/[\x00-\x20"<>\[\\\\\]^`{|}\x7F-\xFF]/',
1934  function ( $m ) {
1935  return rawurlencode( $m[0] );
1936  },
1937  $url
1938  );
1939 
1940  $ret = '';
1941  $end = strlen( $url );
1942 
1943  # Fragment part - 'fragment'
1944  $start = strpos( $url, '#' );
1945  if ( $start !== false && $start < $end ) {
1946  $ret = self::normalizeUrlComponent(
1947  substr( $url, $start, $end - $start ), '"#%<>[\]^`{|}' ) . $ret;
1948  $end = $start;
1949  }
1950 
1951  # Query part - 'query' minus &=+;
1952  $start = strpos( $url, '?' );
1953  if ( $start !== false && $start < $end ) {
1954  $ret = self::normalizeUrlComponent(
1955  substr( $url, $start, $end - $start ), '"#%<>[\]^`{|}&=+;' ) . $ret;
1956  $end = $start;
1957  }
1958 
1959  # Scheme and path part - 'pchar'
1960  # (we assume no userinfo or encoded colons in the host)
1961  $ret = self::normalizeUrlComponent(
1962  substr( $url, 0, $end ), '"#%<>[\]^`{|}/?' ) . $ret;
1963 
1964  return $ret;
1965  }
1966 
1967  private static function normalizeUrlComponent( $component, $unsafe ) {
1968  $callback = function ( $matches ) use ( $unsafe ) {
1969  $char = urldecode( $matches[0] );
1970  $ord = ord( $char );
1971  if ( $ord > 32 && $ord < 127 && strpos( $unsafe, $char ) === false ) {
1972  # Unescape it
1973  return $char;
1974  } else {
1975  # Leave it escaped, but use uppercase for a-f
1976  return strtoupper( $matches[0] );
1977  }
1978  };
1979  return preg_replace_callback( '/%[0-9A-Fa-f]{2}/', $callback, $component );
1980  }
1981 
1990  private function maybeMakeExternalImage( $url ) {
1991  $imagesfrom = $this->mOptions->getAllowExternalImagesFrom();
1992  $imagesexception = !empty( $imagesfrom );
1993  $text = false;
1994  # $imagesfrom could be either a single string or an array of strings, parse out the latter
1995  if ( $imagesexception && is_array( $imagesfrom ) ) {
1996  $imagematch = false;
1997  foreach ( $imagesfrom as $match ) {
1998  if ( strpos( $url, $match ) === 0 ) {
1999  $imagematch = true;
2000  break;
2001  }
2002  }
2003  } elseif ( $imagesexception ) {
2004  $imagematch = ( strpos( $url, $imagesfrom ) === 0 );
2005  } else {
2006  $imagematch = false;
2007  }
2008 
2009  if ( $this->mOptions->getAllowExternalImages()
2010  || ( $imagesexception && $imagematch )
2011  ) {
2012  if ( preg_match( self::EXT_IMAGE_REGEX, $url ) ) {
2013  # Image found
2014  $text = Linker::makeExternalImage( $url );
2015  }
2016  }
2017  if ( !$text && $this->mOptions->getEnableImageWhitelist()
2018  && preg_match( self::EXT_IMAGE_REGEX, $url )
2019  ) {
2020  $whitelist = explode(
2021  "\n",
2022  wfMessage( 'external_image_whitelist' )->inContentLanguage()->text()
2023  );
2024 
2025  foreach ( $whitelist as $entry ) {
2026  # Sanitize the regex fragment, make it case-insensitive, ignore blank entries/comments
2027  if ( strpos( $entry, '#' ) === 0 || $entry === '' ) {
2028  continue;
2029  }
2030  if ( preg_match( '/' . str_replace( '/', '\\/', $entry ) . '/i', $url ) ) {
2031  # Image matches a whitelist entry
2032  $text = Linker::makeExternalImage( $url );
2033  break;
2034  }
2035  }
2036  }
2037  return $text;
2038  }
2039 
2049  public function replaceInternalLinks( $s ) {
2050  $this->mLinkHolders->merge( $this->replaceInternalLinks2( $s ) );
2051  return $s;
2052  }
2053 
2062  public function replaceInternalLinks2( &$s ) {
2064 
2065  static $tc = false, $e1, $e1_img;
2066  # the % is needed to support urlencoded titles as well
2067  if ( !$tc ) {
2068  $tc = Title::legalChars() . '#%';
2069  # Match a link having the form [[namespace:link|alternate]]trail
2070  $e1 = "/^([{$tc}]+)(?:\\|(.+?))?]](.*)\$/sD";
2071  # Match cases where there is no "]]", which might still be images
2072  $e1_img = "/^([{$tc}]+)\\|(.*)\$/sD";
2073  }
2074 
2075  $holders = new LinkHolderArray( $this );
2076 
2077  # split the entire text string on occurrences of [[
2078  $a = StringUtils::explode( '[[', ' ' . $s );
2079  # get the first element (all text up to first [[), and remove the space we added
2080  $s = $a->current();
2081  $a->next();
2082  $line = $a->current(); # Workaround for broken ArrayIterator::next() that returns "void"
2083  $s = substr( $s, 1 );
2084 
2085  $useLinkPrefixExtension = $this->getTargetLanguage()->linkPrefixExtension();
2086  $e2 = null;
2087  if ( $useLinkPrefixExtension ) {
2088  # Match the end of a line for a word that's not followed by whitespace,
2089  # e.g. in the case of 'The Arab al[[Razi]]', 'al' will be matched
2091  $charset = $wgContLang->linkPrefixCharset();
2092  $e2 = "/^((?>.*[^$charset]|))(.+)$/sDu";
2093  }
2094 
2095  if ( is_null( $this->mTitle ) ) {
2096  throw new MWException( __METHOD__ . ": \$this->mTitle is null\n" );
2097  }
2098  $nottalk = !$this->mTitle->isTalkPage();
2099 
2100  if ( $useLinkPrefixExtension ) {
2101  $m = [];
2102  if ( preg_match( $e2, $s, $m ) ) {
2103  $first_prefix = $m[2];
2104  } else {
2105  $first_prefix = false;
2106  }
2107  } else {
2108  $prefix = '';
2109  }
2110 
2111  $useSubpages = $this->areSubpagesAllowed();
2112 
2113  // @codingStandardsIgnoreStart Squiz.WhiteSpace.SemicolonSpacing.Incorrect
2114  # Loop for each link
2115  for ( ; $line !== false && $line !== null; $a->next(), $line = $a->current() ) {
2116  // @codingStandardsIgnoreEnd
2117 
2118  # Check for excessive memory usage
2119  if ( $holders->isBig() ) {
2120  # Too big
2121  # Do the existence check, replace the link holders and clear the array
2122  $holders->replace( $s );
2123  $holders->clear();
2124  }
2125 
2126  if ( $useLinkPrefixExtension ) {
2127  if ( preg_match( $e2, $s, $m ) ) {
2128  $prefix = $m[2];
2129  $s = $m[1];
2130  } else {
2131  $prefix = '';
2132  }
2133  # first link
2134  if ( $first_prefix ) {
2135  $prefix = $first_prefix;
2136  $first_prefix = false;
2137  }
2138  }
2139 
2140  $might_be_img = false;
2141 
2142  if ( preg_match( $e1, $line, $m ) ) { # page with normal text or alt
2143  $text = $m[2];
2144  # If we get a ] at the beginning of $m[3] that means we have a link that's something like:
2145  # [[Image:Foo.jpg|[http://example.com desc]]] <- having three ] in a row fucks up,
2146  # the real problem is with the $e1 regex
2147  # See bug 1300.
2148  # Still some problems for cases where the ] is meant to be outside punctuation,
2149  # and no image is in sight. See bug 2095.
2150  if ( $text !== ''
2151  && substr( $m[3], 0, 1 ) === ']'
2152  && strpos( $text, '[' ) !== false
2153  ) {
2154  $text .= ']'; # so that replaceExternalLinks($text) works later
2155  $m[3] = substr( $m[3], 1 );
2156  }
2157  # fix up urlencoded title texts
2158  if ( strpos( $m[1], '%' ) !== false ) {
2159  # Should anchors '#' also be rejected?
2160  $m[1] = str_replace( [ '<', '>' ], [ '&lt;', '&gt;' ], rawurldecode( $m[1] ) );
2161  }
2162  $trail = $m[3];
2163  } elseif ( preg_match( $e1_img, $line, $m ) ) {
2164  # Invalid, but might be an image with a link in its caption
2165  $might_be_img = true;
2166  $text = $m[2];
2167  if ( strpos( $m[1], '%' ) !== false ) {
2168  $m[1] = rawurldecode( $m[1] );
2169  }
2170  $trail = "";
2171  } else { # Invalid form; output directly
2172  $s .= $prefix . '[[' . $line;
2173  continue;
2174  }
2175 
2176  $origLink = $m[1];
2177 
2178  # Don't allow internal links to pages containing
2179  # PROTO: where PROTO is a valid URL protocol; these
2180  # should be external links.
2181  if ( preg_match( '/^(?i:' . $this->mUrlProtocols . ')/', $origLink ) ) {
2182  $s .= $prefix . '[[' . $line;
2183  continue;
2184  }
2185 
2186  # Make subpage if necessary
2187  if ( $useSubpages ) {
2188  $link = $this->maybeDoSubpageLink( $origLink, $text );
2189  } else {
2190  $link = $origLink;
2191  }
2192 
2193  $noforce = ( substr( $origLink, 0, 1 ) !== ':' );
2194  if ( !$noforce ) {
2195  # Strip off leading ':'
2196  $link = substr( $link, 1 );
2197  }
2198 
2199  $unstrip = $this->mStripState->unstripNoWiki( $link );
2200  $nt = is_string( $unstrip ) ? Title::newFromText( $unstrip ) : null;
2201  if ( $nt === null ) {
2202  $s .= $prefix . '[[' . $line;
2203  continue;
2204  }
2205 
2206  $ns = $nt->getNamespace();
2207  $iw = $nt->getInterwiki();
2208 
2209  if ( $might_be_img ) { # if this is actually an invalid link
2210  if ( $ns == NS_FILE && $noforce ) { # but might be an image
2211  $found = false;
2212  while ( true ) {
2213  # look at the next 'line' to see if we can close it there
2214  $a->next();
2215  $next_line = $a->current();
2216  if ( $next_line === false || $next_line === null ) {
2217  break;
2218  }
2219  $m = explode( ']]', $next_line, 3 );
2220  if ( count( $m ) == 3 ) {
2221  # the first ]] closes the inner link, the second the image
2222  $found = true;
2223  $text .= "[[{$m[0]}]]{$m[1]}";
2224  $trail = $m[2];
2225  break;
2226  } elseif ( count( $m ) == 2 ) {
2227  # if there's exactly one ]] that's fine, we'll keep looking
2228  $text .= "[[{$m[0]}]]{$m[1]}";
2229  } else {
2230  # if $next_line is invalid too, we need look no further
2231  $text .= '[[' . $next_line;
2232  break;
2233  }
2234  }
2235  if ( !$found ) {
2236  # we couldn't find the end of this imageLink, so output it raw
2237  # but don't ignore what might be perfectly normal links in the text we've examined
2238  $holders->merge( $this->replaceInternalLinks2( $text ) );
2239  $s .= "{$prefix}[[$link|$text";
2240  # note: no $trail, because without an end, there *is* no trail
2241  continue;
2242  }
2243  } else { # it's not an image, so output it raw
2244  $s .= "{$prefix}[[$link|$text";
2245  # note: no $trail, because without an end, there *is* no trail
2246  continue;
2247  }
2248  }
2249 
2250  $wasblank = ( $text == '' );
2251  if ( $wasblank ) {
2252  $text = $link;
2253  } else {
2254  # Bug 4598 madness. Handle the quotes only if they come from the alternate part
2255  # [[Lista d''e paise d''o munno]] -> <a href="...">Lista d''e paise d''o munno</a>
2256  # [[Criticism of Harry Potter|Criticism of ''Harry Potter'']]
2257  # -> <a href="Criticism of Harry Potter">Criticism of <i>Harry Potter</i></a>
2258  $text = $this->doQuotes( $text );
2259  }
2260 
2261  # Link not escaped by : , create the various objects
2262  if ( $noforce && !$nt->wasLocalInterwiki() ) {
2263  # Interwikis
2264  if (
2265  $iw && $this->mOptions->getInterwikiMagic() && $nottalk && (
2266  Language::fetchLanguageName( $iw, null, 'mw' ) ||
2267  in_array( $iw, $wgExtraInterlanguageLinkPrefixes )
2268  )
2269  ) {
2270  # Bug 24502: filter duplicates
2271  if ( !isset( $this->mLangLinkLanguages[$iw] ) ) {
2272  $this->mLangLinkLanguages[$iw] = true;
2273  $this->mOutput->addLanguageLink( $nt->getFullText() );
2274  }
2275 
2276  $s = rtrim( $s . $prefix );
2277  $s .= trim( $trail, "\n" ) == '' ? '': $prefix . $trail;
2278  continue;
2279  }
2280 
2281  if ( $ns == NS_FILE ) {
2282  if ( !wfIsBadImage( $nt->getDBkey(), $this->mTitle ) ) {
2283  if ( $wasblank ) {
2284  # if no parameters were passed, $text
2285  # becomes something like "File:Foo.png",
2286  # which we don't want to pass on to the
2287  # image generator
2288  $text = '';
2289  } else {
2290  # recursively parse links inside the image caption
2291  # actually, this will parse them in any other parameters, too,
2292  # but it might be hard to fix that, and it doesn't matter ATM
2293  $text = $this->replaceExternalLinks( $text );
2294  $holders->merge( $this->replaceInternalLinks2( $text ) );
2295  }
2296  # cloak any absolute URLs inside the image markup, so replaceExternalLinks() won't touch them
2297  $s .= $prefix . $this->armorLinks(
2298  $this->makeImage( $nt, $text, $holders ) ) . $trail;
2299  } else {
2300  $s .= $prefix . $trail;
2301  }
2302  continue;
2303  }
2304 
2305  if ( $ns == NS_CATEGORY ) {
2306  $s = rtrim( $s . "\n" ); # bug 87
2307 
2308  if ( $wasblank ) {
2309  $sortkey = $this->getDefaultSort();
2310  } else {
2311  $sortkey = $text;
2312  }
2313  $sortkey = Sanitizer::decodeCharReferences( $sortkey );
2314  $sortkey = str_replace( "\n", '', $sortkey );
2315  $sortkey = $this->getConverterLanguage()->convertCategoryKey( $sortkey );
2316  $this->mOutput->addCategory( $nt->getDBkey(), $sortkey );
2317 
2321  $s .= trim( $prefix . $trail, "\n" ) == '' ? '' : $prefix . $trail;
2322 
2323  continue;
2324  }
2325  }
2326 
2327  # Self-link checking. For some languages, variants of the title are checked in
2328  # LinkHolderArray::doVariants() to allow batching the existence checks necessary
2329  # for linking to a different variant.
2330  if ( $ns != NS_SPECIAL && $nt->equals( $this->mTitle ) && !$nt->hasFragment() ) {
2331  $s .= $prefix . Linker::makeSelfLinkObj( $nt, $text, '', $trail );
2332  continue;
2333  }
2334 
2335  # NS_MEDIA is a pseudo-namespace for linking directly to a file
2336  # @todo FIXME: Should do batch file existence checks, see comment below
2337  if ( $ns == NS_MEDIA ) {
2338  # Give extensions a chance to select the file revision for us
2339  $options = [];
2340  $descQuery = false;
2341  Hooks::run( 'BeforeParserFetchFileAndTitle',
2342  [ $this, $nt, &$options, &$descQuery ] );
2343  # Fetch and register the file (file title may be different via hooks)
2344  list( $file, $nt ) = $this->fetchFileAndTitle( $nt, $options );
2345  # Cloak with NOPARSE to avoid replacement in replaceExternalLinks
2346  $s .= $prefix . $this->armorLinks(
2347  Linker::makeMediaLinkFile( $nt, $file, $text ) ) . $trail;
2348  continue;
2349  }
2350 
2351  # Some titles, such as valid special pages or files in foreign repos, should
2352  # be shown as bluelinks even though they're not included in the page table
2353  # @todo FIXME: isAlwaysKnown() can be expensive for file links; we should really do
2354  # batch file existence checks for NS_FILE and NS_MEDIA
2355  if ( $iw == '' && $nt->isAlwaysKnown() ) {
2356  $this->mOutput->addLink( $nt );
2357  $s .= $this->makeKnownLinkHolder( $nt, $text, [], $trail, $prefix );
2358  } else {
2359  # Links will be added to the output link list after checking
2360  $s .= $holders->makeHolder( $nt, $text, [], $trail, $prefix );
2361  }
2362  }
2363  return $holders;
2364  }
2365 
2380  public function makeKnownLinkHolder( $nt, $text = '', $query = [], $trail = '', $prefix = '' ) {
2381  list( $inside, $trail ) = Linker::splitTrail( $trail );
2382 
2383  if ( is_string( $query ) ) {
2384  $query = wfCgiToArray( $query );
2385  }
2386  if ( $text == '' ) {
2387  $text = htmlspecialchars( $nt->getPrefixedText() );
2388  }
2389 
2390  $link = $this->getLinkRenderer()->makeKnownLink(
2391  $nt, new HtmlArmor( "$prefix$text$inside" ), [], $query
2392  );
2393 
2394  return $this->armorLinks( $link ) . $trail;
2395  }
2396 
2407  public function armorLinks( $text ) {
2408  return preg_replace( '/\b((?i)' . $this->mUrlProtocols . ')/',
2409  self::MARKER_PREFIX . "NOPARSE$1", $text );
2410  }
2411 
2416  public function areSubpagesAllowed() {
2417  # Some namespaces don't allow subpages
2418  return MWNamespace::hasSubpages( $this->mTitle->getNamespace() );
2419  }
2420 
2429  public function maybeDoSubpageLink( $target, &$text ) {
2430  return Linker::normalizeSubpageLink( $this->mTitle, $target, $text );
2431  }
2432 
2441  public function doBlockLevels( $text, $linestart ) {
2442  return BlockLevelPass::doBlockLevels( $text, $linestart );
2443  }
2444 
2456  public function getVariableValue( $index, $frame = false ) {
2459 
2460  if ( is_null( $this->mTitle ) ) {
2461  // If no title set, bad things are going to happen
2462  // later. Title should always be set since this
2463  // should only be called in the middle of a parse
2464  // operation (but the unit-tests do funky stuff)
2465  throw new MWException( __METHOD__ . ' Should only be '
2466  . ' called while parsing (no title set)' );
2467  }
2468 
2473  if ( Hooks::run( 'ParserGetVariableValueVarCache', [ &$this, &$this->mVarCache ] ) ) {
2474  if ( isset( $this->mVarCache[$index] ) ) {
2475  return $this->mVarCache[$index];
2476  }
2477  }
2478 
2479  $ts = wfTimestamp( TS_UNIX, $this->mOptions->getTimestamp() );
2480  Hooks::run( 'ParserGetVariableValueTs', [ &$this, &$ts ] );
2481 
2482  $pageLang = $this->getFunctionLang();
2483 
2484  switch ( $index ) {
2485  case '!':
2486  $value = '|';
2487  break;
2488  case 'currentmonth':
2489  $value = $pageLang->formatNum( MWTimestamp::getInstance( $ts )->format( 'm' ) );
2490  break;
2491  case 'currentmonth1':
2492  $value = $pageLang->formatNum( MWTimestamp::getInstance( $ts )->format( 'n' ) );
2493  break;
2494  case 'currentmonthname':
2495  $value = $pageLang->getMonthName( MWTimestamp::getInstance( $ts )->format( 'n' ) );
2496  break;
2497  case 'currentmonthnamegen':
2498  $value = $pageLang->getMonthNameGen( MWTimestamp::getInstance( $ts )->format( 'n' ) );
2499  break;
2500  case 'currentmonthabbrev':
2501  $value = $pageLang->getMonthAbbreviation( MWTimestamp::getInstance( $ts )->format( 'n' ) );
2502  break;
2503  case 'currentday':
2504  $value = $pageLang->formatNum( MWTimestamp::getInstance( $ts )->format( 'j' ) );
2505  break;
2506  case 'currentday2':
2507  $value = $pageLang->formatNum( MWTimestamp::getInstance( $ts )->format( 'd' ) );
2508  break;
2509  case 'localmonth':
2510  $value = $pageLang->formatNum( MWTimestamp::getLocalInstance( $ts )->format( 'm' ) );
2511  break;
2512  case 'localmonth1':
2513  $value = $pageLang->formatNum( MWTimestamp::getLocalInstance( $ts )->format( 'n' ) );
2514  break;
2515  case 'localmonthname':
2516  $value = $pageLang->getMonthName( MWTimestamp::getLocalInstance( $ts )->format( 'n' ) );
2517  break;
2518  case 'localmonthnamegen':
2519  $value = $pageLang->getMonthNameGen( MWTimestamp::getLocalInstance( $ts )->format( 'n' ) );
2520  break;
2521  case 'localmonthabbrev':
2522  $value = $pageLang->getMonthAbbreviation( MWTimestamp::getLocalInstance( $ts )->format( 'n' ) );
2523  break;
2524  case 'localday':
2525  $value = $pageLang->formatNum( MWTimestamp::getLocalInstance( $ts )->format( 'j' ) );
2526  break;
2527  case 'localday2':
2528  $value = $pageLang->formatNum( MWTimestamp::getLocalInstance( $ts )->format( 'd' ) );
2529  break;
2530  case 'pagename':
2531  $value = wfEscapeWikiText( $this->mTitle->getText() );
2532  break;
2533  case 'pagenamee':
2534  $value = wfEscapeWikiText( $this->mTitle->getPartialURL() );
2535  break;
2536  case 'fullpagename':
2537  $value = wfEscapeWikiText( $this->mTitle->getPrefixedText() );
2538  break;
2539  case 'fullpagenamee':
2540  $value = wfEscapeWikiText( $this->mTitle->getPrefixedURL() );
2541  break;
2542  case 'subpagename':
2543  $value = wfEscapeWikiText( $this->mTitle->getSubpageText() );
2544  break;
2545  case 'subpagenamee':
2546  $value = wfEscapeWikiText( $this->mTitle->getSubpageUrlForm() );
2547  break;
2548  case 'rootpagename':
2549  $value = wfEscapeWikiText( $this->mTitle->getRootText() );
2550  break;
2551  case 'rootpagenamee':
2552  $value = wfEscapeWikiText( wfUrlencode( str_replace(
2553  ' ',
2554  '_',
2555  $this->mTitle->getRootText()
2556  ) ) );
2557  break;
2558  case 'basepagename':
2559  $value = wfEscapeWikiText( $this->mTitle->getBaseText() );
2560  break;
2561  case 'basepagenamee':
2562  $value = wfEscapeWikiText( wfUrlencode( str_replace(
2563  ' ',
2564  '_',
2565  $this->mTitle->getBaseText()
2566  ) ) );
2567  break;
2568  case 'talkpagename':
2569  if ( $this->mTitle->canTalk() ) {
2570  $talkPage = $this->mTitle->getTalkPage();
2571  $value = wfEscapeWikiText( $talkPage->getPrefixedText() );
2572  } else {
2573  $value = '';
2574  }
2575  break;
2576  case 'talkpagenamee':
2577  if ( $this->mTitle->canTalk() ) {
2578  $talkPage = $this->mTitle->getTalkPage();
2579  $value = wfEscapeWikiText( $talkPage->getPrefixedURL() );
2580  } else {
2581  $value = '';
2582  }
2583  break;
2584  case 'subjectpagename':
2585  $subjPage = $this->mTitle->getSubjectPage();
2586  $value = wfEscapeWikiText( $subjPage->getPrefixedText() );
2587  break;
2588  case 'subjectpagenamee':
2589  $subjPage = $this->mTitle->getSubjectPage();
2590  $value = wfEscapeWikiText( $subjPage->getPrefixedURL() );
2591  break;
2592  case 'pageid': // requested in bug 23427
2593  $pageid = $this->getTitle()->getArticleID();
2594  if ( $pageid == 0 ) {
2595  # 0 means the page doesn't exist in the database,
2596  # which means the user is previewing a new page.
2597  # The vary-revision flag must be set, because the magic word
2598  # will have a different value once the page is saved.
2599  $this->mOutput->setFlag( 'vary-revision' );
2600  wfDebug( __METHOD__ . ": {{PAGEID}} used in a new page, setting vary-revision...\n" );
2601  }
2602  $value = $pageid ? $pageid : null;
2603  break;
2604  case 'revisionid':
2605  # Let the edit saving system know we should parse the page
2606  # *after* a revision ID has been assigned.
2607  $this->mOutput->setFlag( 'vary-revision' );
2608  wfDebug( __METHOD__ . ": {{REVISIONID}} used, setting vary-revision...\n" );
2609  $value = $this->mRevisionId;
2610  break;
2611  case 'revisionday':
2612  # Let the edit saving system know we should parse the page
2613  # *after* a revision ID has been assigned. This is for null edits.
2614  $this->mOutput->setFlag( 'vary-revision' );
2615  wfDebug( __METHOD__ . ": {{REVISIONDAY}} used, setting vary-revision...\n" );
2616  $value = intval( substr( $this->getRevisionTimestamp(), 6, 2 ) );
2617  break;
2618  case 'revisionday2':
2619  # Let the edit saving system know we should parse the page
2620  # *after* a revision ID has been assigned. This is for null edits.
2621  $this->mOutput->setFlag( 'vary-revision' );
2622  wfDebug( __METHOD__ . ": {{REVISIONDAY2}} used, setting vary-revision...\n" );
2623  $value = substr( $this->getRevisionTimestamp(), 6, 2 );
2624  break;
2625  case 'revisionmonth':
2626  # Let the edit saving system know we should parse the page
2627  # *after* a revision ID has been assigned. This is for null edits.
2628  $this->mOutput->setFlag( 'vary-revision' );
2629  wfDebug( __METHOD__ . ": {{REVISIONMONTH}} used, setting vary-revision...\n" );
2630  $value = substr( $this->getRevisionTimestamp(), 4, 2 );
2631  break;
2632  case 'revisionmonth1':
2633  # Let the edit saving system know we should parse the page
2634  # *after* a revision ID has been assigned. This is for null edits.
2635  $this->mOutput->setFlag( 'vary-revision' );
2636  wfDebug( __METHOD__ . ": {{REVISIONMONTH1}} used, setting vary-revision...\n" );
2637  $value = intval( substr( $this->getRevisionTimestamp(), 4, 2 ) );
2638  break;
2639  case 'revisionyear':
2640  # Let the edit saving system know we should parse the page
2641  # *after* a revision ID has been assigned. This is for null edits.
2642  $this->mOutput->setFlag( 'vary-revision' );
2643  wfDebug( __METHOD__ . ": {{REVISIONYEAR}} used, setting vary-revision...\n" );
2644  $value = substr( $this->getRevisionTimestamp(), 0, 4 );
2645  break;
2646  case 'revisiontimestamp':
2647  # Let the edit saving system know we should parse the page
2648  # *after* a revision ID has been assigned. This is for null edits.
2649  $this->mOutput->setFlag( 'vary-revision' );
2650  wfDebug( __METHOD__ . ": {{REVISIONTIMESTAMP}} used, setting vary-revision...\n" );
2651  $value = $this->getRevisionTimestamp();
2652  break;
2653  case 'revisionuser':
2654  # Let the edit saving system know we should parse the page
2655  # *after* a revision ID has been assigned. This is for null edits.
2656  $this->mOutput->setFlag( 'vary-revision' );
2657  wfDebug( __METHOD__ . ": {{REVISIONUSER}} used, setting vary-revision...\n" );
2658  $value = $this->getRevisionUser();
2659  break;
2660  case 'revisionsize':
2661  # Let the edit saving system know we should parse the page
2662  # *after* a revision ID has been assigned. This is for null edits.
2663  $this->mOutput->setFlag( 'vary-revision' );
2664  wfDebug( __METHOD__ . ": {{REVISIONSIZE}} used, setting vary-revision...\n" );
2665  $value = $this->getRevisionSize();
2666  break;
2667  case 'namespace':
2668  $value = str_replace( '_', ' ', $wgContLang->getNsText( $this->mTitle->getNamespace() ) );
2669  break;
2670  case 'namespacee':
2671  $value = wfUrlencode( $wgContLang->getNsText( $this->mTitle->getNamespace() ) );
2672  break;
2673  case 'namespacenumber':
2674  $value = $this->mTitle->getNamespace();
2675  break;
2676  case 'talkspace':
2677  $value = $this->mTitle->canTalk()
2678  ? str_replace( '_', ' ', $this->mTitle->getTalkNsText() )
2679  : '';
2680  break;
2681  case 'talkspacee':
2682  $value = $this->mTitle->canTalk() ? wfUrlencode( $this->mTitle->getTalkNsText() ) : '';
2683  break;
2684  case 'subjectspace':
2685  $value = str_replace( '_', ' ', $this->mTitle->getSubjectNsText() );
2686  break;
2687  case 'subjectspacee':
2688  $value = ( wfUrlencode( $this->mTitle->getSubjectNsText() ) );
2689  break;
2690  case 'currentdayname':
2691  $value = $pageLang->getWeekdayName( (int)MWTimestamp::getInstance( $ts )->format( 'w' ) + 1 );
2692  break;
2693  case 'currentyear':
2694  $value = $pageLang->formatNum( MWTimestamp::getInstance( $ts )->format( 'Y' ), true );
2695  break;
2696  case 'currenttime':
2697  $value = $pageLang->time( wfTimestamp( TS_MW, $ts ), false, false );
2698  break;
2699  case 'currenthour':
2700  $value = $pageLang->formatNum( MWTimestamp::getInstance( $ts )->format( 'H' ), true );
2701  break;
2702  case 'currentweek':
2703  # @bug 4594 PHP5 has it zero padded, PHP4 does not, cast to
2704  # int to remove the padding
2705  $value = $pageLang->formatNum( (int)MWTimestamp::getInstance( $ts )->format( 'W' ) );
2706  break;
2707  case 'currentdow':
2708  $value = $pageLang->formatNum( MWTimestamp::getInstance( $ts )->format( 'w' ) );
2709  break;
2710  case 'localdayname':
2711  $value = $pageLang->getWeekdayName(
2712  (int)MWTimestamp::getLocalInstance( $ts )->format( 'w' ) + 1
2713  );
2714  break;
2715  case 'localyear':
2716  $value = $pageLang->formatNum( MWTimestamp::getLocalInstance( $ts )->format( 'Y' ), true );
2717  break;
2718  case 'localtime':
2719  $value = $pageLang->time(
2720  MWTimestamp::getLocalInstance( $ts )->format( 'YmdHis' ),
2721  false,
2722  false
2723  );
2724  break;
2725  case 'localhour':
2726  $value = $pageLang->formatNum( MWTimestamp::getLocalInstance( $ts )->format( 'H' ), true );
2727  break;
2728  case 'localweek':
2729  # @bug 4594 PHP5 has it zero padded, PHP4 does not, cast to
2730  # int to remove the padding
2731  $value = $pageLang->formatNum( (int)MWTimestamp::getLocalInstance( $ts )->format( 'W' ) );
2732  break;
2733  case 'localdow':
2734  $value = $pageLang->formatNum( MWTimestamp::getLocalInstance( $ts )->format( 'w' ) );
2735  break;
2736  case 'numberofarticles':
2737  $value = $pageLang->formatNum( SiteStats::articles() );
2738  break;
2739  case 'numberoffiles':
2740  $value = $pageLang->formatNum( SiteStats::images() );
2741  break;
2742  case 'numberofusers':
2743  $value = $pageLang->formatNum( SiteStats::users() );
2744  break;
2745  case 'numberofactiveusers':
2746  $value = $pageLang->formatNum( SiteStats::activeUsers() );
2747  break;
2748  case 'numberofpages':
2749  $value = $pageLang->formatNum( SiteStats::pages() );
2750  break;
2751  case 'numberofadmins':
2752  $value = $pageLang->formatNum( SiteStats::numberingroup( 'sysop' ) );
2753  break;
2754  case 'numberofedits':
2755  $value = $pageLang->formatNum( SiteStats::edits() );
2756  break;
2757  case 'currenttimestamp':
2758  $value = wfTimestamp( TS_MW, $ts );
2759  break;
2760  case 'localtimestamp':
2761  $value = MWTimestamp::getLocalInstance( $ts )->format( 'YmdHis' );
2762  break;
2763  case 'currentversion':
2765  break;
2766  case 'articlepath':
2767  return $wgArticlePath;
2768  case 'sitename':
2769  return $wgSitename;
2770  case 'server':
2771  return $wgServer;
2772  case 'servername':
2773  return $wgServerName;
2774  case 'scriptpath':
2775  return $wgScriptPath;
2776  case 'stylepath':
2777  return $wgStylePath;
2778  case 'directionmark':
2779  return $pageLang->getDirMark();
2780  case 'contentlanguage':
2782  return $wgLanguageCode;
2783  case 'cascadingsources':
2785  break;
2786  default:
2787  $ret = null;
2788  Hooks::run(
2789  'ParserGetVariableValueSwitch',
2790  [ &$this, &$this->mVarCache, &$index, &$ret, &$frame ]
2791  );
2792 
2793  return $ret;
2794  }
2795 
2796  if ( $index ) {
2797  $this->mVarCache[$index] = $value;
2798  }
2799 
2800  return $value;
2801  }
2802 
2808  public function initialiseVariables() {
2809  $variableIDs = MagicWord::getVariableIDs();
2810  $substIDs = MagicWord::getSubstIDs();
2811 
2812  $this->mVariables = new MagicWordArray( $variableIDs );
2813  $this->mSubstWords = new MagicWordArray( $substIDs );
2814  }
2815 
2838  public function preprocessToDom( $text, $flags = 0 ) {
2839  $dom = $this->getPreprocessor()->preprocessToObj( $text, $flags );
2840  return $dom;
2841  }
2842 
2850  public static function splitWhitespace( $s ) {
2851  $ltrimmed = ltrim( $s );
2852  $w1 = substr( $s, 0, strlen( $s ) - strlen( $ltrimmed ) );
2853  $trimmed = rtrim( $ltrimmed );
2854  $diff = strlen( $ltrimmed ) - strlen( $trimmed );
2855  if ( $diff > 0 ) {
2856  $w2 = substr( $ltrimmed, -$diff );
2857  } else {
2858  $w2 = '';
2859  }
2860  return [ $w1, $trimmed, $w2 ];
2861  }
2862 
2883  public function replaceVariables( $text, $frame = false, $argsOnly = false ) {
2884  # Is there any text? Also, Prevent too big inclusions!
2885  $textSize = strlen( $text );
2886  if ( $textSize < 1 || $textSize > $this->mOptions->getMaxIncludeSize() ) {
2887  return $text;
2888  }
2889 
2890  if ( $frame === false ) {
2891  $frame = $this->getPreprocessor()->newFrame();
2892  } elseif ( !( $frame instanceof PPFrame ) ) {
2893  wfDebug( __METHOD__ . " called using plain parameters instead of "
2894  . "a PPFrame instance. Creating custom frame.\n" );
2895  $frame = $this->getPreprocessor()->newCustomFrame( $frame );
2896  }
2897 
2898  $dom = $this->preprocessToDom( $text );
2899  $flags = $argsOnly ? PPFrame::NO_TEMPLATES : 0;
2900  $text = $frame->expand( $dom, $flags );
2901 
2902  return $text;
2903  }
2904 
2912  public static function createAssocArgs( $args ) {
2913  $assocArgs = [];
2914  $index = 1;
2915  foreach ( $args as $arg ) {
2916  $eqpos = strpos( $arg, '=' );
2917  if ( $eqpos === false ) {
2918  $assocArgs[$index++] = $arg;
2919  } else {
2920  $name = trim( substr( $arg, 0, $eqpos ) );
2921  $value = trim( substr( $arg, $eqpos + 1 ) );
2922  if ( $value === false ) {
2923  $value = '';
2924  }
2925  if ( $name !== false ) {
2926  $assocArgs[$name] = $value;
2927  }
2928  }
2929  }
2930 
2931  return $assocArgs;
2932  }
2933 
2960  public function limitationWarn( $limitationType, $current = '', $max = '' ) {
2961  # does no harm if $current and $max are present but are unnecessary for the message
2962  # Not doing ->inLanguage( $this->mOptions->getUserLangObj() ), since this is shown
2963  # only during preview, and that would split the parser cache unnecessarily.
2964  $warning = wfMessage( "$limitationType-warning" )->numParams( $current, $max )
2965  ->text();
2966  $this->mOutput->addWarning( $warning );
2967  $this->addTrackingCategory( "$limitationType-category" );
2968  }
2969 
2982  public function braceSubstitution( $piece, $frame ) {
2983 
2984  // Flags
2985 
2986  // $text has been filled
2987  $found = false;
2988  // wiki markup in $text should be escaped
2989  $nowiki = false;
2990  // $text is HTML, armour it against wikitext transformation
2991  $isHTML = false;
2992  // Force interwiki transclusion to be done in raw mode not rendered
2993  $forceRawInterwiki = false;
2994  // $text is a DOM node needing expansion in a child frame
2995  $isChildObj = false;
2996  // $text is a DOM node needing expansion in the current frame
2997  $isLocalObj = false;
2998 
2999  # Title object, where $text came from
3000  $title = false;
3001 
3002  # $part1 is the bit before the first |, and must contain only title characters.
3003  # Various prefixes will be stripped from it later.
3004  $titleWithSpaces = $frame->expand( $piece['title'] );
3005  $part1 = trim( $titleWithSpaces );
3006  $titleText = false;
3007 
3008  # Original title text preserved for various purposes
3009  $originalTitle = $part1;
3010 
3011  # $args is a list of argument nodes, starting from index 0, not including $part1
3012  # @todo FIXME: If piece['parts'] is null then the call to getLength()
3013  # below won't work b/c this $args isn't an object
3014  $args = ( null == $piece['parts'] ) ? [] : $piece['parts'];
3015 
3016  $profileSection = null; // profile templates
3017 
3018  # SUBST
3019  if ( !$found ) {
3020  $substMatch = $this->mSubstWords->matchStartAndRemove( $part1 );
3021 
3022  # Possibilities for substMatch: "subst", "safesubst" or FALSE
3023  # Decide whether to expand template or keep wikitext as-is.
3024  if ( $this->ot['wiki'] ) {
3025  if ( $substMatch === false ) {
3026  $literal = true; # literal when in PST with no prefix
3027  } else {
3028  $literal = false; # expand when in PST with subst: or safesubst:
3029  }
3030  } else {
3031  if ( $substMatch == 'subst' ) {
3032  $literal = true; # literal when not in PST with plain subst:
3033  } else {
3034  $literal = false; # expand when not in PST with safesubst: or no prefix
3035  }
3036  }
3037  if ( $literal ) {
3038  $text = $frame->virtualBracketedImplode( '{{', '|', '}}', $titleWithSpaces, $args );
3039  $isLocalObj = true;
3040  $found = true;
3041  }
3042  }
3043 
3044  # Variables
3045  if ( !$found && $args->getLength() == 0 ) {
3046  $id = $this->mVariables->matchStartToEnd( $part1 );
3047  if ( $id !== false ) {
3048  $text = $this->getVariableValue( $id, $frame );
3049  if ( MagicWord::getCacheTTL( $id ) > -1 ) {
3050  $this->mOutput->updateCacheExpiry( MagicWord::getCacheTTL( $id ) );
3051  }
3052  $found = true;
3053  }
3054  }
3055 
3056  # MSG, MSGNW and RAW
3057  if ( !$found ) {
3058  # Check for MSGNW:
3059  $mwMsgnw = MagicWord::get( 'msgnw' );
3060  if ( $mwMsgnw->matchStartAndRemove( $part1 ) ) {
3061  $nowiki = true;
3062  } else {
3063  # Remove obsolete MSG:
3064  $mwMsg = MagicWord::get( 'msg' );
3065  $mwMsg->matchStartAndRemove( $part1 );
3066  }
3067 
3068  # Check for RAW:
3069  $mwRaw = MagicWord::get( 'raw' );
3070  if ( $mwRaw->matchStartAndRemove( $part1 ) ) {
3071  $forceRawInterwiki = true;
3072  }
3073  }
3074 
3075  # Parser functions
3076  if ( !$found ) {
3077  $colonPos = strpos( $part1, ':' );
3078  if ( $colonPos !== false ) {
3079  $func = substr( $part1, 0, $colonPos );
3080  $funcArgs = [ trim( substr( $part1, $colonPos + 1 ) ) ];
3081  $argsLength = $args->getLength();
3082  for ( $i = 0; $i < $argsLength; $i++ ) {
3083  $funcArgs[] = $args->item( $i );
3084  }
3085  try {
3086  $result = $this->callParserFunction( $frame, $func, $funcArgs );
3087  } catch ( Exception $ex ) {
3088  throw $ex;
3089  }
3090 
3091  # The interface for parser functions allows for extracting
3092  # flags into the local scope. Extract any forwarded flags
3093  # here.
3094  extract( $result );
3095  }
3096  }
3097 
3098  # Finish mangling title and then check for loops.
3099  # Set $title to a Title object and $titleText to the PDBK
3100  if ( !$found ) {
3101  $ns = NS_TEMPLATE;
3102  # Split the title into page and subpage
3103  $subpage = '';
3104  $relative = $this->maybeDoSubpageLink( $part1, $subpage );
3105  if ( $part1 !== $relative ) {
3106  $part1 = $relative;
3107  $ns = $this->mTitle->getNamespace();
3108  }
3109  $title = Title::newFromText( $part1, $ns );
3110  if ( $title ) {
3111  $titleText = $title->getPrefixedText();
3112  # Check for language variants if the template is not found
3113  if ( $this->getConverterLanguage()->hasVariants() && $title->getArticleID() == 0 ) {
3114  $this->getConverterLanguage()->findVariantLink( $part1, $title, true );
3115  }
3116  # Do recursion depth check
3117  $limit = $this->mOptions->getMaxTemplateDepth();
3118  if ( $frame->depth >= $limit ) {
3119  $found = true;
3120  $text = '<span class="error">'
3121  . wfMessage( 'parser-template-recursion-depth-warning' )
3122  ->numParams( $limit )->inContentLanguage()->text()
3123  . '</span>';
3124  }
3125  }
3126  }
3127 
3128  # Load from database
3129  if ( !$found && $title ) {
3130  $profileSection = $this->mProfiler->scopedProfileIn( $title->getPrefixedDBkey() );
3131  if ( !$title->isExternal() ) {
3132  if ( $title->isSpecialPage()
3133  && $this->mOptions->getAllowSpecialInclusion()
3134  && $this->ot['html']
3135  ) {
3136  // Pass the template arguments as URL parameters.
3137  // "uselang" will have no effect since the Language object
3138  // is forced to the one defined in ParserOptions.
3139  $pageArgs = [];
3140  $argsLength = $args->getLength();
3141  for ( $i = 0; $i < $argsLength; $i++ ) {
3142  $bits = $args->item( $i )->splitArg();
3143  if ( strval( $bits['index'] ) === '' ) {
3144  $name = trim( $frame->expand( $bits['name'], PPFrame::STRIP_COMMENTS ) );
3145  $value = trim( $frame->expand( $bits['value'] ) );
3146  $pageArgs[$name] = $value;
3147  }
3148  }
3149 
3150  // Create a new context to execute the special page
3151  $context = new RequestContext;
3152  $context->setTitle( $title );
3153  $context->setRequest( new FauxRequest( $pageArgs ) );
3154  $context->setUser( $this->getUser() );
3155  $context->setLanguage( $this->mOptions->getUserLangObj() );
3157  if ( $ret ) {
3158  $text = $context->getOutput()->getHTML();
3159  $this->mOutput->addOutputPageMetadata( $context->getOutput() );
3160  $found = true;
3161  $isHTML = true;
3162  $this->disableCache();
3163  }
3164  } elseif ( MWNamespace::isNonincludable( $title->getNamespace() ) ) {
3165  $found = false; # access denied
3166  wfDebug( __METHOD__ . ": template inclusion denied for " .
3167  $title->getPrefixedDBkey() . "\n" );
3168  } else {
3169  list( $text, $title ) = $this->getTemplateDom( $title );
3170  if ( $text !== false ) {
3171  $found = true;
3172  $isChildObj = true;
3173  }
3174  }
3175 
3176  # If the title is valid but undisplayable, make a link to it
3177  if ( !$found && ( $this->ot['html'] || $this->ot['pre'] ) ) {
3178  $text = "[[:$titleText]]";
3179  $found = true;
3180  }
3181  } elseif ( $title->isTrans() ) {
3182  # Interwiki transclusion
3183  if ( $this->ot['html'] && !$forceRawInterwiki ) {
3184  $text = $this->interwikiTransclude( $title, 'render' );
3185  $isHTML = true;
3186  } else {
3187  $text = $this->interwikiTransclude( $title, 'raw' );
3188  # Preprocess it like a template
3189  $text = $this->preprocessToDom( $text, self::PTD_FOR_INCLUSION );
3190  $isChildObj = true;
3191  }
3192  $found = true;
3193  }
3194 
3195  # Do infinite loop check
3196  # This has to be done after redirect resolution to avoid infinite loops via redirects
3197  if ( !$frame->loopCheck( $title ) ) {
3198  $found = true;
3199  $text = '<span class="error">'
3200  . wfMessage( 'parser-template-loop-warning', $titleText )->inContentLanguage()->text()
3201  . '</span>';
3202  wfDebug( __METHOD__ . ": template loop broken at '$titleText'\n" );
3203  }
3204  }
3205 
3206  # If we haven't found text to substitute by now, we're done
3207  # Recover the source wikitext and return it
3208  if ( !$found ) {
3209  $text = $frame->virtualBracketedImplode( '{{', '|', '}}', $titleWithSpaces, $args );
3210  if ( $profileSection ) {
3211  $this->mProfiler->scopedProfileOut( $profileSection );
3212  }
3213  return [ 'object' => $text ];
3214  }
3215 
3216  # Expand DOM-style return values in a child frame
3217  if ( $isChildObj ) {
3218  # Clean up argument array
3219  $newFrame = $frame->newChild( $args, $title );
3220 
3221  if ( $nowiki ) {
3222  $text = $newFrame->expand( $text, PPFrame::RECOVER_ORIG );
3223  } elseif ( $titleText !== false && $newFrame->isEmpty() ) {
3224  # Expansion is eligible for the empty-frame cache
3225  $text = $newFrame->cachedExpand( $titleText, $text );
3226  } else {
3227  # Uncached expansion
3228  $text = $newFrame->expand( $text );
3229  }
3230  }
3231  if ( $isLocalObj && $nowiki ) {
3232  $text = $frame->expand( $text, PPFrame::RECOVER_ORIG );
3233  $isLocalObj = false;
3234  }
3235 
3236  if ( $profileSection ) {
3237  $this->mProfiler->scopedProfileOut( $profileSection );
3238  }
3239 
3240  # Replace raw HTML by a placeholder
3241  if ( $isHTML ) {
3242  $text = $this->insertStripItem( $text );
3243  } elseif ( $nowiki && ( $this->ot['html'] || $this->ot['pre'] ) ) {
3244  # Escape nowiki-style return values
3245  $text = wfEscapeWikiText( $text );
3246  } elseif ( is_string( $text )
3247  && !$piece['lineStart']
3248  && preg_match( '/^(?:{\\||:|;|#|\*)/', $text )
3249  ) {
3250  # Bug 529: if the template begins with a table or block-level
3251  # element, it should be treated as beginning a new line.
3252  # This behavior is somewhat controversial.
3253  $text = "\n" . $text;
3254  }
3255 
3256  if ( is_string( $text ) && !$this->incrementIncludeSize( 'post-expand', strlen( $text ) ) ) {
3257  # Error, oversize inclusion
3258  if ( $titleText !== false ) {
3259  # Make a working, properly escaped link if possible (bug 23588)
3260  $text = "[[:$titleText]]";
3261  } else {
3262  # This will probably not be a working link, but at least it may
3263  # provide some hint of where the problem is
3264  preg_replace( '/^:/', '', $originalTitle );
3265  $text = "[[:$originalTitle]]";
3266  }
3267  $text .= $this->insertStripItem( '<!-- WARNING: template omitted, '
3268  . 'post-expand include size too large -->' );
3269  $this->limitationWarn( 'post-expand-template-inclusion' );
3270  }
3271 
3272  if ( $isLocalObj ) {
3273  $ret = [ 'object' => $text ];
3274  } else {
3275  $ret = [ 'text' => $text ];
3276  }
3277 
3278  return $ret;
3279  }
3280 
3300  public function callParserFunction( $frame, $function, array $args = [] ) {
3302 
3303  # Case sensitive functions
3304  if ( isset( $this->mFunctionSynonyms[1][$function] ) ) {
3305  $function = $this->mFunctionSynonyms[1][$function];
3306  } else {
3307  # Case insensitive functions
3308  $function = $wgContLang->lc( $function );
3309  if ( isset( $this->mFunctionSynonyms[0][$function] ) ) {
3310  $function = $this->mFunctionSynonyms[0][$function];
3311  } else {
3312  return [ 'found' => false ];
3313  }
3314  }
3315 
3316  list( $callback, $flags ) = $this->mFunctionHooks[$function];
3317 
3318  # Workaround for PHP bug 35229 and similar
3319  if ( !is_callable( $callback ) ) {
3320  throw new MWException( "Tag hook for $function is not callable\n" );
3321  }
3322 
3323  $allArgs = [ &$this ];
3324  if ( $flags & self::SFH_OBJECT_ARGS ) {
3325  # Convert arguments to PPNodes and collect for appending to $allArgs
3326  $funcArgs = [];
3327  foreach ( $args as $k => $v ) {
3328  if ( $v instanceof PPNode || $k === 0 ) {
3329  $funcArgs[] = $v;
3330  } else {
3331  $funcArgs[] = $this->mPreprocessor->newPartNodeArray( [ $k => $v ] )->item( 0 );
3332  }
3333  }
3334 
3335  # Add a frame parameter, and pass the arguments as an array
3336  $allArgs[] = $frame;
3337  $allArgs[] = $funcArgs;
3338  } else {
3339  # Convert arguments to plain text and append to $allArgs
3340  foreach ( $args as $k => $v ) {
3341  if ( $v instanceof PPNode ) {
3342  $allArgs[] = trim( $frame->expand( $v ) );
3343  } elseif ( is_int( $k ) && $k >= 0 ) {
3344  $allArgs[] = trim( $v );
3345  } else {
3346  $allArgs[] = trim( "$k=$v" );
3347  }
3348  }
3349  }
3350 
3351  $result = call_user_func_array( $callback, $allArgs );
3352 
3353  # The interface for function hooks allows them to return a wikitext
3354  # string or an array containing the string and any flags. This mungs
3355  # things around to match what this method should return.
3356  if ( !is_array( $result ) ) {
3357  $result =[
3358  'found' => true,
3359  'text' => $result,
3360  ];
3361  } else {
3362  if ( isset( $result[0] ) && !isset( $result['text'] ) ) {
3363  $result['text'] = $result[0];
3364  }
3365  unset( $result[0] );
3366  $result += [
3367  'found' => true,
3368  ];
3369  }
3370 
3371  $noparse = true;
3372  $preprocessFlags = 0;
3373  if ( isset( $result['noparse'] ) ) {
3374  $noparse = $result['noparse'];
3375  }
3376  if ( isset( $result['preprocessFlags'] ) ) {
3377  $preprocessFlags = $result['preprocessFlags'];
3378  }
3379 
3380  if ( !$noparse ) {
3381  $result['text'] = $this->preprocessToDom( $result['text'], $preprocessFlags );
3382  $result['isChildObj'] = true;
3383  }
3384 
3385  return $result;
3386  }
3387 
3396  public function getTemplateDom( $title ) {
3397  $cacheTitle = $title;
3398  $titleText = $title->getPrefixedDBkey();
3399 
3400  if ( isset( $this->mTplRedirCache[$titleText] ) ) {
3401  list( $ns, $dbk ) = $this->mTplRedirCache[$titleText];
3402  $title = Title::makeTitle( $ns, $dbk );
3403  $titleText = $title->getPrefixedDBkey();
3404  }
3405  if ( isset( $this->mTplDomCache[$titleText] ) ) {
3406  return [ $this->mTplDomCache[$titleText], $title ];
3407  }
3408 
3409  # Cache miss, go to the database
3410  list( $text, $title ) = $this->fetchTemplateAndTitle( $title );
3411 
3412  if ( $text === false ) {
3413  $this->mTplDomCache[$titleText] = false;
3414  return [ false, $title ];
3415  }
3416 
3417  $dom = $this->preprocessToDom( $text, self::PTD_FOR_INCLUSION );
3418  $this->mTplDomCache[$titleText] = $dom;
3419 
3420  if ( !$title->equals( $cacheTitle ) ) {
3421  $this->mTplRedirCache[$cacheTitle->getPrefixedDBkey()] =
3422  [ $title->getNamespace(), $cdb = $title->getDBkey() ];
3423  }
3424 
3425  return [ $dom, $title ];
3426  }
3427 
3440  $cacheKey = $title->getPrefixedDBkey();
3441  if ( !$this->currentRevisionCache ) {
3442  $this->currentRevisionCache = new MapCacheLRU( 100 );
3443  }
3444  if ( !$this->currentRevisionCache->has( $cacheKey ) ) {
3445  $this->currentRevisionCache->set( $cacheKey,
3446  // Defaults to Parser::statelessFetchRevision()
3447  call_user_func( $this->mOptions->getCurrentRevisionCallback(), $title, $this )
3448  );
3449  }
3450  return $this->currentRevisionCache->get( $cacheKey );
3451  }
3452 
3462  public static function statelessFetchRevision( $title, $parser = false ) {
3463  return Revision::newFromTitle( $title );
3464  }
3465 
3471  public function fetchTemplateAndTitle( $title ) {
3472  // Defaults to Parser::statelessFetchTemplate()
3473  $templateCb = $this->mOptions->getTemplateCallback();
3474  $stuff = call_user_func( $templateCb, $title, $this );
3475  // We use U+007F DELETE to distinguish strip markers from regular text.
3476  $text = $stuff['text'];
3477  if ( is_string( $stuff['text'] ) ) {
3478  $text = strtr( $text, "\x7f", "?" );
3479  }
3480  $finalTitle = isset( $stuff['finalTitle'] ) ? $stuff['finalTitle'] : $title;
3481  if ( isset( $stuff['deps'] ) ) {
3482  foreach ( $stuff['deps'] as $dep ) {
3483  $this->mOutput->addTemplate( $dep['title'], $dep['page_id'], $dep['rev_id'] );
3484  if ( $dep['title']->equals( $this->getTitle() ) ) {
3485  // If we transclude ourselves, the final result
3486  // will change based on the new version of the page
3487  $this->mOutput->setFlag( 'vary-revision' );
3488  }
3489  }
3490  }
3491  return [ $text, $finalTitle ];
3492  }
3493 
3499  public function fetchTemplate( $title ) {
3500  return $this->fetchTemplateAndTitle( $title )[0];
3501  }
3502 
3512  public static function statelessFetchTemplate( $title, $parser = false ) {
3513  $text = $skip = false;
3514  $finalTitle = $title;
3515  $deps = [];
3516 
3517  # Loop to fetch the article, with up to 1 redirect
3518  // @codingStandardsIgnoreStart Generic.CodeAnalysis.ForLoopWithTestFunctionCall.NotAllowed
3519  for ( $i = 0; $i < 2 && is_object( $title ); $i++ ) {
3520  // @codingStandardsIgnoreEnd
3521  # Give extensions a chance to select the revision instead
3522  $id = false; # Assume current
3523  Hooks::run( 'BeforeParserFetchTemplateAndtitle',
3524  [ $parser, $title, &$skip, &$id ] );
3525 
3526  if ( $skip ) {
3527  $text = false;
3528  $deps[] = [
3529  'title' => $title,
3530  'page_id' => $title->getArticleID(),
3531  'rev_id' => null
3532  ];
3533  break;
3534  }
3535  # Get the revision
3536  if ( $id ) {
3537  $rev = Revision::newFromId( $id );
3538  } elseif ( $parser ) {
3539  $rev = $parser->fetchCurrentRevisionOfTitle( $title );
3540  } else {
3542  }
3543  $rev_id = $rev ? $rev->getId() : 0;
3544  # If there is no current revision, there is no page
3545  if ( $id === false && !$rev ) {
3546  $linkCache = LinkCache::singleton();
3547  $linkCache->addBadLinkObj( $title );
3548  }
3549 
3550  $deps[] = [
3551  'title' => $title,
3552  'page_id' => $title->getArticleID(),
3553  'rev_id' => $rev_id ];
3554  if ( $rev && !$title->equals( $rev->getTitle() ) ) {
3555  # We fetched a rev from a different title; register it too...
3556  $deps[] = [
3557  'title' => $rev->getTitle(),
3558  'page_id' => $rev->getPage(),
3559  'rev_id' => $rev_id ];
3560  }
3561 
3562  if ( $rev ) {
3563  $content = $rev->getContent();
3564  $text = $content ? $content->getWikitextForTransclusion() : null;
3565 
3566  if ( $text === false || $text === null ) {
3567  $text = false;
3568  break;
3569  }
3570  } elseif ( $title->getNamespace() == NS_MEDIAWIKI ) {
3572  $message = wfMessage( $wgContLang->lcfirst( $title->getText() ) )->inContentLanguage();
3573  if ( !$message->exists() ) {
3574  $text = false;
3575  break;
3576  }
3577  $content = $message->content();
3578  $text = $message->plain();
3579  } else {
3580  break;
3581  }
3582  if ( !$content ) {
3583  break;
3584  }
3585  # Redirect?
3586  $finalTitle = $title;
3587  $title = $content->getRedirectTarget();
3588  }
3589  return [
3590  'text' => $text,
3591  'finalTitle' => $finalTitle,
3592  'deps' => $deps ];
3593  }
3594 
3602  public function fetchFile( $title, $options = [] ) {
3603  return $this->fetchFileAndTitle( $title, $options )[0];
3604  }
3605 
3613  public function fetchFileAndTitle( $title, $options = [] ) {
3614  $file = $this->fetchFileNoRegister( $title, $options );
3615 
3616  $time = $file ? $file->getTimestamp() : false;
3617  $sha1 = $file ? $file->getSha1() : false;
3618  # Register the file as a dependency...
3619  $this->mOutput->addImage( $title->getDBkey(), $time, $sha1 );
3620  if ( $file && !$title->equals( $file->getTitle() ) ) {
3621  # Update fetched file title
3622  $title = $file->getTitle();
3623  $this->mOutput->addImage( $title->getDBkey(), $time, $sha1 );
3624  }
3625  return [ $file, $title ];
3626  }
3627 
3638  protected function fetchFileNoRegister( $title, $options = [] ) {
3639  if ( isset( $options['broken'] ) ) {
3640  $file = false; // broken thumbnail forced by hook
3641  } elseif ( isset( $options['sha1'] ) ) { // get by (sha1,timestamp)
3642  $file = RepoGroup::singleton()->findFileFromKey( $options['sha1'], $options );
3643  } else { // get by (name,timestamp)
3644  $file = wfFindFile( $title, $options );
3645  }
3646  return $file;
3647  }
3648 
3657  public function interwikiTransclude( $title, $action ) {
3659 
3660  if ( !$wgEnableScaryTranscluding ) {
3661  return wfMessage( 'scarytranscludedisabled' )->inContentLanguage()->text();
3662  }
3663 
3664  $url = $title->getFullURL( [ 'action' => $action ] );
3665 
3666  if ( strlen( $url ) > 255 ) {
3667  return wfMessage( 'scarytranscludetoolong' )->inContentLanguage()->text();
3668  }
3669  return $this->fetchScaryTemplateMaybeFromCache( $url );
3670  }
3671 
3676  public function fetchScaryTemplateMaybeFromCache( $url ) {
3678  $dbr = wfGetDB( DB_SLAVE );
3679  $tsCond = $dbr->timestamp( time() - $wgTranscludeCacheExpiry );
3680  $obj = $dbr->selectRow( 'transcache', [ 'tc_time', 'tc_contents' ],
3681  [ 'tc_url' => $url, "tc_time >= " . $dbr->addQuotes( $tsCond ) ] );
3682  if ( $obj ) {
3683  return $obj->tc_contents;
3684  }
3685 
3686  $req = MWHttpRequest::factory( $url, [], __METHOD__ );
3687  $status = $req->execute(); // Status object
3688  if ( $status->isOK() ) {
3689  $text = $req->getContent();
3690  } elseif ( $req->getStatus() != 200 ) {
3691  // Though we failed to fetch the content, this status is useless.
3692  return wfMessage( 'scarytranscludefailed-httpstatus' )
3693  ->params( $url, $req->getStatus() /* HTTP status */ )->inContentLanguage()->text();
3694  } else {
3695  return wfMessage( 'scarytranscludefailed', $url )->inContentLanguage()->text();
3696  }
3697 
3698  $dbw = wfGetDB( DB_MASTER );
3699  $dbw->replace( 'transcache', [ 'tc_url' ], [
3700  'tc_url' => $url,
3701  'tc_time' => $dbw->timestamp( time() ),
3702  'tc_contents' => $text
3703  ] );
3704  return $text;
3705  }
3706 
3716  public function argSubstitution( $piece, $frame ) {
3717 
3718  $error = false;
3719  $parts = $piece['parts'];
3720  $nameWithSpaces = $frame->expand( $piece['title'] );
3721  $argName = trim( $nameWithSpaces );
3722  $object = false;
3723  $text = $frame->getArgument( $argName );
3724  if ( $text === false && $parts->getLength() > 0
3725  && ( $this->ot['html']
3726  || $this->ot['pre']
3727  || ( $this->ot['wiki'] && $frame->isTemplate() )
3728  )
3729  ) {
3730  # No match in frame, use the supplied default
3731  $object = $parts->item( 0 )->getChildren();
3732  }
3733  if ( !$this->incrementIncludeSize( 'arg', strlen( $text ) ) ) {
3734  $error = '<!-- WARNING: argument omitted, expansion size too large -->';
3735  $this->limitationWarn( 'post-expand-template-argument' );
3736  }
3737 
3738  if ( $text === false && $object === false ) {
3739  # No match anywhere
3740  $object = $frame->virtualBracketedImplode( '{{{', '|', '}}}', $nameWithSpaces, $parts );
3741  }
3742  if ( $error !== false ) {
3743  $text .= $error;
3744  }
3745  if ( $object !== false ) {
3746  $ret = [ 'object' => $object ];
3747  } else {
3748  $ret = [ 'text' => $text ];
3749  }
3750 
3751  return $ret;
3752  }
3753 
3769  public function extensionSubstitution( $params, $frame ) {
3770  $name = $frame->expand( $params['name'] );
3771  $attrText = !isset( $params['attr'] ) ? null : $frame->expand( $params['attr'] );
3772  $content = !isset( $params['inner'] ) ? null : $frame->expand( $params['inner'] );
3773  $marker = self::MARKER_PREFIX . "-$name-"
3774  . sprintf( '%08X', $this->mMarkerIndex++ ) . self::MARKER_SUFFIX;
3775 
3776  $isFunctionTag = isset( $this->mFunctionTagHooks[strtolower( $name )] ) &&
3777  ( $this->ot['html'] || $this->ot['pre'] );
3778  if ( $isFunctionTag ) {
3779  $markerType = 'none';
3780  } else {
3781  $markerType = 'general';
3782  }
3783  if ( $this->ot['html'] || $isFunctionTag ) {
3784  $name = strtolower( $name );
3785  $attributes = Sanitizer::decodeTagAttributes( $attrText );
3786  if ( isset( $params['attributes'] ) ) {
3787  $attributes = $attributes + $params['attributes'];
3788  }
3789 
3790  if ( isset( $this->mTagHooks[$name] ) ) {
3791  # Workaround for PHP bug 35229 and similar
3792  if ( !is_callable( $this->mTagHooks[$name] ) ) {
3793  throw new MWException( "Tag hook for $name is not callable\n" );
3794  }
3795  $output = call_user_func_array( $this->mTagHooks[$name],
3796  [ $content, $attributes, $this, $frame ] );
3797  } elseif ( isset( $this->mFunctionTagHooks[$name] ) ) {
3798  list( $callback, ) = $this->mFunctionTagHooks[$name];
3799  if ( !is_callable( $callback ) ) {
3800  throw new MWException( "Tag hook for $name is not callable\n" );
3801  }
3802 
3803  $output = call_user_func_array( $callback, [ &$this, $frame, $content, $attributes ] );
3804  } else {
3805  $output = '<span class="error">Invalid tag extension name: ' .
3806  htmlspecialchars( $name ) . '</span>';
3807  }
3808 
3809  if ( is_array( $output ) ) {
3810  # Extract flags to local scope (to override $markerType)
3811  $flags = $output;
3812  $output = $flags[0];
3813  unset( $flags[0] );
3814  extract( $flags );
3815  }
3816  } else {
3817  if ( is_null( $attrText ) ) {
3818  $attrText = '';
3819  }
3820  if ( isset( $params['attributes'] ) ) {
3821  foreach ( $params['attributes'] as $attrName => $attrValue ) {
3822  $attrText .= ' ' . htmlspecialchars( $attrName ) . '="' .
3823  htmlspecialchars( $attrValue ) . '"';
3824  }
3825  }
3826  if ( $content === null ) {
3827  $output = "<$name$attrText/>";
3828  } else {
3829  $close = is_null( $params['close'] ) ? '' : $frame->expand( $params['close'] );
3830  $output = "<$name$attrText>$content$close";
3831  }
3832  }
3833 
3834  if ( $markerType === 'none' ) {
3835  return $output;
3836  } elseif ( $markerType === 'nowiki' ) {
3837  $this->mStripState->addNoWiki( $marker, $output );
3838  } elseif ( $markerType === 'general' ) {
3839  $this->mStripState->addGeneral( $marker, $output );
3840  } else {
3841  throw new MWException( __METHOD__ . ': invalid marker type' );
3842  }
3843  return $marker;
3844  }
3845 
3853  public function incrementIncludeSize( $type, $size ) {
3854  if ( $this->mIncludeSizes[$type] + $size > $this->mOptions->getMaxIncludeSize() ) {
3855  return false;
3856  } else {
3857  $this->mIncludeSizes[$type] += $size;
3858  return true;
3859  }
3860  }
3861 
3868  $this->mExpensiveFunctionCount++;
3869  return $this->mExpensiveFunctionCount <= $this->mOptions->getExpensiveParserFunctionLimit();
3870  }
3871 
3880  public function doDoubleUnderscore( $text ) {
3881 
3882  # The position of __TOC__ needs to be recorded
3883  $mw = MagicWord::get( 'toc' );
3884  if ( $mw->match( $text ) ) {
3885  $this->mShowToc = true;
3886  $this->mForceTocPosition = true;
3887 
3888  # Set a placeholder. At the end we'll fill it in with the TOC.
3889  $text = $mw->replace( '<!--MWTOC-->', $text, 1 );
3890 
3891  # Only keep the first one.
3892  $text = $mw->replace( '', $text );
3893  }
3894 
3895  # Now match and remove the rest of them
3897  $this->mDoubleUnderscores = $mwa->matchAndRemove( $text );
3898 
3899  if ( isset( $this->mDoubleUnderscores['nogallery'] ) ) {
3900  $this->mOutput->mNoGallery = true;
3901  }
3902  if ( isset( $this->mDoubleUnderscores['notoc'] ) && !$this->mForceTocPosition ) {
3903  $this->mShowToc = false;
3904  }
3905  if ( isset( $this->mDoubleUnderscores['hiddencat'] )
3906  && $this->mTitle->getNamespace() == NS_CATEGORY
3907  ) {
3908  $this->addTrackingCategory( 'hidden-category-category' );
3909  }
3910  # (bug 8068) Allow control over whether robots index a page.
3911  # @todo FIXME: Bug 14899: __INDEX__ always overrides __NOINDEX__ here! This
3912  # is not desirable, the last one on the page should win.
3913  if ( isset( $this->mDoubleUnderscores['noindex'] ) && $this->mTitle->canUseNoindex() ) {
3914  $this->mOutput->setIndexPolicy( 'noindex' );
3915  $this->addTrackingCategory( 'noindex-category' );
3916  }
3917  if ( isset( $this->mDoubleUnderscores['index'] ) && $this->mTitle->canUseNoindex() ) {
3918  $this->mOutput->setIndexPolicy( 'index' );
3919  $this->addTrackingCategory( 'index-category' );
3920  }
3921 
3922  # Cache all double underscores in the database
3923  foreach ( $this->mDoubleUnderscores as $key => $val ) {
3924  $this->mOutput->setProperty( $key, '' );
3925  }
3926 
3927  return $text;
3928  }
3929 
3935  public function addTrackingCategory( $msg ) {
3936  return $this->mOutput->addTrackingCategory( $msg, $this->mTitle );
3937  }
3938 
3955  public function formatHeadings( $text, $origText, $isMain = true ) {
3956  global $wgMaxTocLevel, $wgExperimentalHtmlIds;
3957 
3958  # Inhibit editsection links if requested in the page
3959  if ( isset( $this->mDoubleUnderscores['noeditsection'] ) ) {
3960  $maybeShowEditLink = $showEditLink = false;
3961  } else {
3962  $maybeShowEditLink = true; /* Actual presence will depend on ParserOptions option */
3963  $showEditLink = $this->mOptions->getEditSection();
3964  }
3965  if ( $showEditLink ) {
3966  $this->mOutput->setEditSectionTokens( true );
3967  }
3968 
3969  # Get all headlines for numbering them and adding funky stuff like [edit]
3970  # links - this is for later, but we need the number of headlines right now
3971  $matches = [];
3972  $numMatches = preg_match_all(
3973  '/<H(?P<level>[1-6])(?P<attrib>.*?>)\s*(?P<header>[\s\S]*?)\s*<\/H[1-6] *>/i',
3974  $text,
3975  $matches
3976  );
3977 
3978  # if there are fewer than 4 headlines in the article, do not show TOC
3979  # unless it's been explicitly enabled.
3980  $enoughToc = $this->mShowToc &&
3981  ( ( $numMatches >= 4 ) || $this->mForceTocPosition );
3982 
3983  # Allow user to stipulate that a page should have a "new section"
3984  # link added via __NEWSECTIONLINK__
3985  if ( isset( $this->mDoubleUnderscores['newsectionlink'] ) ) {
3986  $this->mOutput->setNewSection( true );
3987  }
3988 
3989  # Allow user to remove the "new section"
3990  # link via __NONEWSECTIONLINK__
3991  if ( isset( $this->mDoubleUnderscores['nonewsectionlink'] ) ) {
3992  $this->mOutput->hideNewSection( true );
3993  }
3994 
3995  # if the string __FORCETOC__ (not case-sensitive) occurs in the HTML,
3996  # override above conditions and always show TOC above first header
3997  if ( isset( $this->mDoubleUnderscores['forcetoc'] ) ) {
3998  $this->mShowToc = true;
3999  $enoughToc = true;
4000  }
4001 
4002  # headline counter
4003  $headlineCount = 0;
4004  $numVisible = 0;
4005 
4006  # Ugh .. the TOC should have neat indentation levels which can be
4007  # passed to the skin functions. These are determined here
4008  $toc = '';
4009  $full = '';
4010  $head = [];
4011  $sublevelCount = [];
4012  $levelCount = [];
4013  $level = 0;
4014  $prevlevel = 0;
4015  $toclevel = 0;
4016  $prevtoclevel = 0;
4017  $markerRegex = self::MARKER_PREFIX . "-h-(\d+)-" . self::MARKER_SUFFIX;
4018  $baseTitleText = $this->mTitle->getPrefixedDBkey();
4019  $oldType = $this->mOutputType;
4020  $this->setOutputType( self::OT_WIKI );
4021  $frame = $this->getPreprocessor()->newFrame();
4022  $root = $this->preprocessToDom( $origText );
4023  $node = $root->getFirstChild();
4024  $byteOffset = 0;
4025  $tocraw = [];
4026  $refers = [];
4027 
4028  $headlines = $numMatches !== false ? $matches[3] : [];
4029 
4030  foreach ( $headlines as $headline ) {
4031  $isTemplate = false;
4032  $titleText = false;
4033  $sectionIndex = false;
4034  $numbering = '';
4035  $markerMatches = [];
4036  if ( preg_match( "/^$markerRegex/", $headline, $markerMatches ) ) {
4037  $serial = $markerMatches[1];
4038  list( $titleText, $sectionIndex ) = $this->mHeadings[$serial];
4039  $isTemplate = ( $titleText != $baseTitleText );
4040  $headline = preg_replace( "/^$markerRegex\\s*/", "", $headline );
4041  }
4042 
4043  if ( $toclevel ) {
4044  $prevlevel = $level;
4045  }
4046  $level = $matches[1][$headlineCount];
4047 
4048  if ( $level > $prevlevel ) {
4049  # Increase TOC level
4050  $toclevel++;
4051  $sublevelCount[$toclevel] = 0;
4052  if ( $toclevel < $wgMaxTocLevel ) {
4053  $prevtoclevel = $toclevel;
4054  $toc .= Linker::tocIndent();
4055  $numVisible++;
4056  }
4057  } elseif ( $level < $prevlevel && $toclevel > 1 ) {
4058  # Decrease TOC level, find level to jump to
4059 
4060  for ( $i = $toclevel; $i > 0; $i-- ) {
4061  if ( $levelCount[$i] == $level ) {
4062  # Found last matching level
4063  $toclevel = $i;
4064  break;
4065  } elseif ( $levelCount[$i] < $level ) {
4066  # Found first matching level below current level
4067  $toclevel = $i + 1;
4068  break;
4069  }
4070  }
4071  if ( $i == 0 ) {
4072  $toclevel = 1;
4073  }
4074  if ( $toclevel < $wgMaxTocLevel ) {
4075  if ( $prevtoclevel < $wgMaxTocLevel ) {
4076  # Unindent only if the previous toc level was shown :p
4077  $toc .= Linker::tocUnindent( $prevtoclevel - $toclevel );
4078  $prevtoclevel = $toclevel;
4079  } else {
4080  $toc .= Linker::tocLineEnd();
4081  }
4082  }
4083  } else {
4084  # No change in level, end TOC line
4085  if ( $toclevel < $wgMaxTocLevel ) {
4086  $toc .= Linker::tocLineEnd();
4087  }
4088  }
4089 
4090  $levelCount[$toclevel] = $level;
4091 
4092  # count number of headlines for each level
4093  $sublevelCount[$toclevel]++;
4094  $dot = 0;
4095  for ( $i = 1; $i <= $toclevel; $i++ ) {
4096  if ( !empty( $sublevelCount[$i] ) ) {
4097  if ( $dot ) {
4098  $numbering .= '.';
4099  }
4100  $numbering .= $this->getTargetLanguage()->formatNum( $sublevelCount[$i] );
4101  $dot = 1;
4102  }
4103  }
4104 
4105  # The safe header is a version of the header text safe to use for links
4106 
4107  # Remove link placeholders by the link text.
4108  # <!--LINK number-->
4109  # turns into
4110  # link text with suffix
4111  # Do this before unstrip since link text can contain strip markers
4112  $safeHeadline = $this->replaceLinkHoldersText( $headline );
4113 
4114  # Avoid insertion of weird stuff like <math> by expanding the relevant sections
4115  $safeHeadline = $this->mStripState->unstripBoth( $safeHeadline );
4116 
4117  # Strip out HTML (first regex removes any tag not allowed)
4118  # Allowed tags are:
4119  # * <sup> and <sub> (bug 8393)
4120  # * <i> (bug 26375)
4121  # * <b> (r105284)
4122  # * <bdi> (bug 72884)
4123  # * <span dir="rtl"> and <span dir="ltr"> (bug 35167)
4124  # We strip any parameter from accepted tags (second regex), except dir="rtl|ltr" from <span>,
4125  # to allow setting directionality in toc items.
4126  $tocline = preg_replace(
4127  [
4128  '#<(?!/?(span|sup|sub|bdi|i|b)(?: [^>]*)?>).*?>#',
4129  '#<(/?(?:span(?: dir="(?:rtl|ltr)")?|sup|sub|bdi|i|b))(?: .*?)?>#'
4130  ],
4131  [ '', '<$1>' ],
4132  $safeHeadline
4133  );
4134 
4135  # Strip '<span></span>', which is the result from the above if
4136  # <span id="foo"></span> is used to produce an additional anchor
4137  # for a section.
4138  $tocline = str_replace( '<span></span>', '', $tocline );
4139 
4140  $tocline = trim( $tocline );
4141 
4142  # For the anchor, strip out HTML-y stuff period
4143  $safeHeadline = preg_replace( '/<.*?>/', '', $safeHeadline );
4144  $safeHeadline = Sanitizer::normalizeSectionNameWhitespace( $safeHeadline );
4145 
4146  # Save headline for section edit hint before it's escaped
4147  $headlineHint = $safeHeadline;
4148 
4149  if ( $wgExperimentalHtmlIds ) {
4150  # For reverse compatibility, provide an id that's
4151  # HTML4-compatible, like we used to.
4152  # It may be worth noting, academically, that it's possible for
4153  # the legacy anchor to conflict with a non-legacy headline
4154  # anchor on the page. In this case likely the "correct" thing
4155  # would be to either drop the legacy anchors or make sure
4156  # they're numbered first. However, this would require people
4157  # to type in section names like "abc_.D7.93.D7.90.D7.A4"
4158  # manually, so let's not bother worrying about it.
4159  $legacyHeadline = Sanitizer::escapeId( $safeHeadline,
4160  [ 'noninitial', 'legacy' ] );
4161  $safeHeadline = Sanitizer::escapeId( $safeHeadline );
4162 
4163  if ( $legacyHeadline == $safeHeadline ) {
4164  # No reason to have both (in fact, we can't)
4165  $legacyHeadline = false;
4166  }
4167  } else {
4168  $legacyHeadline = false;
4169  $safeHeadline = Sanitizer::escapeId( $safeHeadline,
4170  'noninitial' );
4171  }
4172 
4173  # HTML names must be case-insensitively unique (bug 10721).
4174  # This does not apply to Unicode characters per
4175  # http://www.w3.org/TR/html5/infrastructure.html#case-sensitivity-and-string-comparison
4176  # @todo FIXME: We may be changing them depending on the current locale.
4177  $arrayKey = strtolower( $safeHeadline );
4178  if ( $legacyHeadline === false ) {
4179  $legacyArrayKey = false;
4180  } else {
4181  $legacyArrayKey = strtolower( $legacyHeadline );
4182  }
4183 
4184  # Create the anchor for linking from the TOC to the section
4185  $anchor = $safeHeadline;
4186  $legacyAnchor = $legacyHeadline;
4187  if ( isset( $refers[$arrayKey] ) ) {
4188  // @codingStandardsIgnoreStart
4189  for ( $i = 2; isset( $refers["${arrayKey}_$i"] ); ++$i );
4190  // @codingStandardsIgnoreEnd
4191  $anchor .= "_$i";
4192  $refers["${arrayKey}_$i"] = true;
4193  } else {
4194  $refers[$arrayKey] = true;
4195  }
4196  if ( $legacyHeadline !== false && isset( $refers[$legacyArrayKey] ) ) {
4197  // @codingStandardsIgnoreStart
4198  for ( $i = 2; isset( $refers["${legacyArrayKey}_$i"] ); ++$i );
4199  // @codingStandardsIgnoreEnd
4200  $legacyAnchor .= "_$i";
4201  $refers["${legacyArrayKey}_$i"] = true;
4202  } else {
4203  $refers[$legacyArrayKey] = true;
4204  }
4205 
4206  # Don't number the heading if it is the only one (looks silly)
4207  if ( count( $matches[3] ) > 1 && $this->mOptions->getNumberHeadings() ) {
4208  # the two are different if the line contains a link
4209  $headline = Html::element(
4210  'span',
4211  [ 'class' => 'mw-headline-number' ],
4212  $numbering
4213  ) . ' ' . $headline;
4214  }
4215 
4216  if ( $enoughToc && ( !isset( $wgMaxTocLevel ) || $toclevel < $wgMaxTocLevel ) ) {
4217  $toc .= Linker::tocLine( $anchor, $tocline,
4218  $numbering, $toclevel, ( $isTemplate ? false : $sectionIndex ) );
4219  }
4220 
4221  # Add the section to the section tree
4222  # Find the DOM node for this header
4223  $noOffset = ( $isTemplate || $sectionIndex === false );
4224  while ( $node && !$noOffset ) {
4225  if ( $node->getName() === 'h' ) {
4226  $bits = $node->splitHeading();
4227  if ( $bits['i'] == $sectionIndex ) {
4228  break;
4229  }
4230  }
4231  $byteOffset += mb_strlen( $this->mStripState->unstripBoth(
4232  $frame->expand( $node, PPFrame::RECOVER_ORIG ) ) );
4233  $node = $node->getNextSibling();
4234  }
4235  $tocraw[] = [
4236  'toclevel' => $toclevel,
4237  'level' => $level,
4238  'line' => $tocline,
4239  'number' => $numbering,
4240  'index' => ( $isTemplate ? 'T-' : '' ) . $sectionIndex,
4241  'fromtitle' => $titleText,
4242  'byteoffset' => ( $noOffset ? null : $byteOffset ),
4243  'anchor' => $anchor,
4244  ];
4245 
4246  # give headline the correct <h#> tag
4247  if ( $maybeShowEditLink && $sectionIndex !== false ) {
4248  // Output edit section links as markers with styles that can be customized by skins
4249  if ( $isTemplate ) {
4250  # Put a T flag in the section identifier, to indicate to extractSections()
4251  # that sections inside <includeonly> should be counted.
4252  $editsectionPage = $titleText;
4253  $editsectionSection = "T-$sectionIndex";
4254  $editsectionContent = null;
4255  } else {
4256  $editsectionPage = $this->mTitle->getPrefixedText();
4257  $editsectionSection = $sectionIndex;
4258  $editsectionContent = $headlineHint;
4259  }
4260  // We use a bit of pesudo-xml for editsection markers. The
4261  // language converter is run later on. Using a UNIQ style marker
4262  // leads to the converter screwing up the tokens when it
4263  // converts stuff. And trying to insert strip tags fails too. At
4264  // this point all real inputted tags have already been escaped,
4265  // so we don't have to worry about a user trying to input one of
4266  // these markers directly. We use a page and section attribute
4267  // to stop the language converter from converting these
4268  // important bits of data, but put the headline hint inside a
4269  // content block because the language converter is supposed to
4270  // be able to convert that piece of data.
4271  // Gets replaced with html in ParserOutput::getText
4272  $editlink = '<mw:editsection page="' . htmlspecialchars( $editsectionPage );
4273  $editlink .= '" section="' . htmlspecialchars( $editsectionSection ) . '"';
4274  if ( $editsectionContent !== null ) {
4275  $editlink .= '>' . $editsectionContent . '</mw:editsection>';
4276  } else {
4277  $editlink .= '/>';
4278  }
4279  } else {
4280  $editlink = '';
4281  }
4282  $head[$headlineCount] = Linker::makeHeadline( $level,
4283  $matches['attrib'][$headlineCount], $anchor, $headline,
4284  $editlink, $legacyAnchor );
4285 
4286  $headlineCount++;
4287  }
4288 
4289  $this->setOutputType( $oldType );
4290 
4291  # Never ever show TOC if no headers
4292  if ( $numVisible < 1 ) {
4293  $enoughToc = false;
4294  }
4295 
4296  if ( $enoughToc ) {
4297  if ( $prevtoclevel > 0 && $prevtoclevel < $wgMaxTocLevel ) {
4298  $toc .= Linker::tocUnindent( $prevtoclevel - 1 );
4299  }
4300  $toc = Linker::tocList( $toc, $this->mOptions->getUserLangObj() );
4301  $this->mOutput->setTOCHTML( $toc );
4302  $toc = self::TOC_START . $toc . self::TOC_END;
4303  $this->mOutput->addModules( 'mediawiki.toc' );
4304  }
4305 
4306  if ( $isMain ) {
4307  $this->mOutput->setSections( $tocraw );
4308  }
4309 
4310  # split up and insert constructed headlines
4311  $blocks = preg_split( '/<H[1-6].*?>[\s\S]*?<\/H[1-6]>/i', $text );
4312  $i = 0;
4313 
4314  // build an array of document sections
4315  $sections = [];
4316  foreach ( $blocks as $block ) {
4317  // $head is zero-based, sections aren't.
4318  if ( empty( $head[$i - 1] ) ) {
4319  $sections[$i] = $block;
4320  } else {
4321  $sections[$i] = $head[$i - 1] . $block;
4322  }
4323 
4334  Hooks::run( 'ParserSectionCreate', [ $this, $i, &$sections[$i], $showEditLink ] );
4335 
4336  $i++;
4337  }
4338 
4339  if ( $enoughToc && $isMain && !$this->mForceTocPosition ) {
4340  // append the TOC at the beginning
4341  // Top anchor now in skin
4342  $sections[0] = $sections[0] . $toc . "\n";
4343  }
4344 
4345  $full .= implode( '', $sections );
4346 
4347  if ( $this->mForceTocPosition ) {
4348  return str_replace( '<!--MWTOC-->', $toc, $full );
4349  } else {
4350  return $full;
4351  }
4352  }
4353 
4365  public function preSaveTransform( $text, Title $title, User $user,
4366  ParserOptions $options, $clearState = true
4367  ) {
4368  if ( $clearState ) {
4369  $magicScopeVariable = $this->lock();
4370  }
4371  $this->startParse( $title, $options, self::OT_WIKI, $clearState );
4372  $this->setUser( $user );
4373 
4374  $pairs = [
4375  "\r\n" => "\n",
4376  "\r" => "\n",
4377  ];
4378  $text = str_replace( array_keys( $pairs ), array_values( $pairs ), $text );
4379  if ( $options->getPreSaveTransform() ) {
4380  $text = $this->pstPass2( $text, $user );
4381  }
4382  $text = $this->mStripState->unstripBoth( $text );
4383 
4384  $this->setUser( null ); # Reset
4385 
4386  return $text;
4387  }
4388 
4397  private function pstPass2( $text, $user ) {
4399 
4400  # Note: This is the timestamp saved as hardcoded wikitext to
4401  # the database, we use $wgContLang here in order to give
4402  # everyone the same signature and use the default one rather
4403  # than the one selected in each user's preferences.
4404  # (see also bug 12815)
4405  $ts = $this->mOptions->getTimestamp();
4407  $ts = $timestamp->format( 'YmdHis' );
4408  $tzMsg = $timestamp->getTimezoneMessage()->inContentLanguage()->text();
4409 
4410  $d = $wgContLang->timeanddate( $ts, false, false ) . " ($tzMsg)";
4411 
4412  # Variable replacement
4413  # Because mOutputType is OT_WIKI, this will only process {{subst:xxx}} type tags
4414  $text = $this->replaceVariables( $text );
4415 
4416  # This works almost by chance, as the replaceVariables are done before the getUserSig(),
4417  # which may corrupt this parser instance via its wfMessage()->text() call-
4418 
4419  # Signatures
4420  $sigText = $this->getUserSig( $user );
4421  $text = strtr( $text, [
4422  '~~~~~' => $d,
4423  '~~~~' => "$sigText $d",
4424  '~~~' => $sigText
4425  ] );
4426 
4427  # Context links ("pipe tricks"): [[|name]] and [[name (context)|]]
4428  $tc = '[' . Title::legalChars() . ']';
4429  $nc = '[ _0-9A-Za-z\x80-\xff-]'; # Namespaces can use non-ascii!
4430 
4431  // [[ns:page (context)|]]
4432  $p1 = "/\[\[(:?$nc+:|:|)($tc+?)( ?\\($tc+\\))\\|]]/";
4433  // [[ns:page(context)|]] (double-width brackets, added in r40257)
4434  $p4 = "/\[\[(:?$nc+:|:|)($tc+?)( ?($tc+))\\|]]/";
4435  // [[ns:page (context), context|]] (using either single or double-width comma)
4436  $p3 = "/\[\[(:?$nc+:|:|)($tc+?)( ?\\($tc+\\)|)((?:, |,)$tc+|)\\|]]/";
4437  // [[|page]] (reverse pipe trick: add context from page title)
4438  $p2 = "/\[\[\\|($tc+)]]/";
4439 
4440  # try $p1 first, to turn "[[A, B (C)|]]" into "[[A, B (C)|A, B]]"
4441  $text = preg_replace( $p1, '[[\\1\\2\\3|\\2]]', $text );
4442  $text = preg_replace( $p4, '[[\\1\\2\\3|\\2]]', $text );
4443  $text = preg_replace( $p3, '[[\\1\\2\\3\\4|\\2]]', $text );
4444 
4445  $t = $this->mTitle->getText();
4446  $m = [];
4447  if ( preg_match( "/^($nc+:|)$tc+?( \\($tc+\\))$/", $t, $m ) ) {
4448  $text = preg_replace( $p2, "[[$m[1]\\1$m[2]|\\1]]", $text );
4449  } elseif ( preg_match( "/^($nc+:|)$tc+?(, $tc+|)$/", $t, $m ) && "$m[1]$m[2]" != '' ) {
4450  $text = preg_replace( $p2, "[[$m[1]\\1$m[2]|\\1]]", $text );
4451  } else {
4452  # if there's no context, don't bother duplicating the title
4453  $text = preg_replace( $p2, '[[\\1]]', $text );
4454  }
4455 
4456  # Trim trailing whitespace
4457  $text = rtrim( $text );
4458 
4459  return $text;
4460  }
4461 
4476  public function getUserSig( &$user, $nickname = false, $fancySig = null ) {
4478 
4479  $username = $user->getName();
4480 
4481  # If not given, retrieve from the user object.
4482  if ( $nickname === false ) {
4483  $nickname = $user->getOption( 'nickname' );
4484  }
4485 
4486  if ( is_null( $fancySig ) ) {
4487  $fancySig = $user->getBoolOption( 'fancysig' );
4488  }
4489 
4490  $nickname = $nickname == null ? $username : $nickname;
4491 
4492  if ( mb_strlen( $nickname ) > $wgMaxSigChars ) {
4493  $nickname = $username;
4494  wfDebug( __METHOD__ . ": $username has overlong signature.\n" );
4495  } elseif ( $fancySig !== false ) {
4496  # Sig. might contain markup; validate this
4497  if ( $this->validateSig( $nickname ) !== false ) {
4498  # Validated; clean up (if needed) and return it
4499  return $this->cleanSig( $nickname, true );
4500  } else {
4501  # Failed to validate; fall back to the default
4502  $nickname = $username;
4503  wfDebug( __METHOD__ . ": $username has bad XML tags in signature.\n" );
4504  }
4505  }
4506 
4507  # Make sure nickname doesnt get a sig in a sig
4508  $nickname = self::cleanSigInSig( $nickname );
4509 
4510  # If we're still here, make it a link to the user page
4511  $userText = wfEscapeWikiText( $username );
4512  $nickText = wfEscapeWikiText( $nickname );
4513  $msgName = $user->isAnon() ? 'signature-anon' : 'signature';
4514 
4515  return wfMessage( $msgName, $userText, $nickText )->inContentLanguage()
4516  ->title( $this->getTitle() )->text();
4517  }
4518 
4525  public function validateSig( $text ) {
4526  return Xml::isWellFormedXmlFragment( $text ) ? $text : false;
4527  }
4528 
4539  public function cleanSig( $text, $parsing = false ) {
4540  if ( !$parsing ) {
4541  global $wgTitle;
4542  $magicScopeVariable = $this->lock();
4543  $this->startParse( $wgTitle, new ParserOptions, self::OT_PREPROCESS, true );
4544  }
4545 
4546  # Option to disable this feature
4547  if ( !$this->mOptions->getCleanSignatures() ) {
4548  return $text;
4549  }
4550 
4551  # @todo FIXME: Regex doesn't respect extension tags or nowiki
4552  # => Move this logic to braceSubstitution()
4553  $substWord = MagicWord::get( 'subst' );
4554  $substRegex = '/\{\{(?!(?:' . $substWord->getBaseRegex() . '))/x' . $substWord->getRegexCase();
4555  $substText = '{{' . $substWord->getSynonym( 0 );
4556 
4557  $text = preg_replace( $substRegex, $substText, $text );
4558  $text = self::cleanSigInSig( $text );
4559  $dom = $this->preprocessToDom( $text );
4560  $frame = $this->getPreprocessor()->newFrame();
4561  $text = $frame->expand( $dom );
4562 
4563  if ( !$parsing ) {
4564  $text = $this->mStripState->unstripBoth( $text );
4565  }
4566 
4567  return $text;
4568  }
4569 
4576  public static function cleanSigInSig( $text ) {
4577  $text = preg_replace( '/~{3,5}/', '', $text );
4578  return $text;
4579  }
4580 
4591  $outputType, $clearState = true
4592  ) {
4593  $this->startParse( $title, $options, $outputType, $clearState );
4594  }
4595 
4602  private function startParse( Title $title = null, ParserOptions $options,
4603  $outputType, $clearState = true
4604  ) {
4605  $this->setTitle( $title );
4606  $this->mOptions = $options;
4607  $this->setOutputType( $outputType );
4608  if ( $clearState ) {
4609  $this->clearState();
4610  }
4611  }
4612 
4621  public function transformMsg( $text, $options, $title = null ) {
4622  static $executing = false;
4623 
4624  # Guard against infinite recursion
4625  if ( $executing ) {
4626  return $text;
4627  }
4628  $executing = true;
4629 
4630  if ( !$title ) {
4631  global $wgTitle;
4632  $title = $wgTitle;
4633  }
4634 
4635  $text = $this->preprocess( $text, $title, $options );
4636 
4637  $executing = false;
4638  return $text;
4639  }
4640 
4665  public function setHook( $tag, $callback ) {
4666  $tag = strtolower( $tag );
4667  if ( preg_match( '/[<>\r\n]/', $tag, $m ) ) {
4668  throw new MWException( "Invalid character {$m[0]} in setHook('$tag', ...) call" );
4669  }
4670  $oldVal = isset( $this->mTagHooks[$tag] ) ? $this->mTagHooks[$tag] : null;
4671  $this->mTagHooks[$tag] = $callback;
4672  if ( !in_array( $tag, $this->mStripList ) ) {
4673  $this->mStripList[] = $tag;
4674  }
4675 
4676  return $oldVal;
4677  }
4678 
4696  public function setTransparentTagHook( $tag, $callback ) {
4697  $tag = strtolower( $tag );
4698  if ( preg_match( '/[<>\r\n]/', $tag, $m ) ) {
4699  throw new MWException( "Invalid character {$m[0]} in setTransparentHook('$tag', ...) call" );
4700  }
4701  $oldVal = isset( $this->mTransparentTagHooks[$tag] ) ? $this->mTransparentTagHooks[$tag] : null;
4702  $this->mTransparentTagHooks[$tag] = $callback;
4703 
4704  return $oldVal;
4705  }
4706 
4710  public function clearTagHooks() {
4711  $this->mTagHooks = [];
4712  $this->mFunctionTagHooks = [];
4713  $this->mStripList = $this->mDefaultStripList;
4714  }
4715 
4759  public function setFunctionHook( $id, $callback, $flags = 0 ) {
4761 
4762  $oldVal = isset( $this->mFunctionHooks[$id] ) ? $this->mFunctionHooks[$id][0] : null;
4763  $this->mFunctionHooks[$id] = [ $callback, $flags ];
4764 
4765  # Add to function cache
4766  $mw = MagicWord::get( $id );
4767  if ( !$mw ) {
4768  throw new MWException( __METHOD__ . '() expecting a magic word identifier.' );
4769  }
4770 
4771  $synonyms = $mw->getSynonyms();
4772  $sensitive = intval( $mw->isCaseSensitive() );
4773 
4774  foreach ( $synonyms as $syn ) {
4775  # Case
4776  if ( !$sensitive ) {
4777  $syn = $wgContLang->lc( $syn );
4778  }
4779  # Add leading hash
4780  if ( !( $flags & self::SFH_NO_HASH ) ) {
4781  $syn = '#' . $syn;
4782  }
4783  # Remove trailing colon
4784  if ( substr( $syn, -1, 1 ) === ':' ) {
4785  $syn = substr( $syn, 0, -1 );
4786  }
4787  $this->mFunctionSynonyms[$sensitive][$syn] = $id;
4788  }
4789  return $oldVal;
4790  }
4791 
4797  public function getFunctionHooks() {
4798  return array_keys( $this->mFunctionHooks );
4799  }
4800 
4811  public function setFunctionTagHook( $tag, $callback, $flags ) {
4812  $tag = strtolower( $tag );
4813  if ( preg_match( '/[<>\r\n]/', $tag, $m ) ) {
4814  throw new MWException( "Invalid character {$m[0]} in setFunctionTagHook('$tag', ...) call" );
4815  }
4816  $old = isset( $this->mFunctionTagHooks[$tag] ) ?
4817  $this->mFunctionTagHooks[$tag] : null;
4818  $this->mFunctionTagHooks[$tag] = [ $callback, $flags ];
4819 
4820  if ( !in_array( $tag, $this->mStripList ) ) {
4821  $this->mStripList[] = $tag;
4822  }
4823 
4824  return $old;
4825  }
4826 
4834  public function replaceLinkHolders( &$text, $options = 0 ) {
4835  $this->mLinkHolders->replace( $text );
4836  }
4837 
4845  public function replaceLinkHoldersText( $text ) {
4846  return $this->mLinkHolders->replaceText( $text );
4847  }
4848 
4862  public function renderImageGallery( $text, $params ) {
4863 
4864  $mode = false;
4865  if ( isset( $params['mode'] ) ) {
4866  $mode = $params['mode'];
4867  }
4868 
4869  try {
4870  $ig = ImageGalleryBase::factory( $mode );
4871  } catch ( Exception $e ) {
4872  // If invalid type set, fallback to default.
4873  $ig = ImageGalleryBase::factory( false );
4874  }
4875 
4876  $ig->setContextTitle( $this->mTitle );
4877  $ig->setShowBytes( false );
4878  $ig->setShowFilename( false );
4879  $ig->setParser( $this );
4880  $ig->setHideBadImages();
4881  $ig->setAttributes( Sanitizer::validateTagAttributes( $params, 'table' ) );
4882 
4883  if ( isset( $params['showfilename'] ) ) {
4884  $ig->setShowFilename( true );
4885  } else {
4886  $ig->setShowFilename( false );
4887  }
4888  if ( isset( $params['caption'] ) ) {
4889  $caption = $params['caption'];
4890  $caption = htmlspecialchars( $caption );
4891  $caption = $this->replaceInternalLinks( $caption );
4892  $ig->setCaptionHtml( $caption );
4893  }
4894  if ( isset( $params['perrow'] ) ) {
4895  $ig->setPerRow( $params['perrow'] );
4896  }
4897  if ( isset( $params['widths'] ) ) {
4898  $ig->setWidths( $params['widths'] );
4899  }
4900  if ( isset( $params['heights'] ) ) {
4901  $ig->setHeights( $params['heights'] );
4902  }
4903  $ig->setAdditionalOptions( $params );
4904 
4905  Hooks::run( 'BeforeParserrenderImageGallery', [ &$this, &$ig ] );
4906 
4907  $lines = StringUtils::explode( "\n", $text );
4908  foreach ( $lines as $line ) {
4909  # match lines like these:
4910  # Image:someimage.jpg|This is some image
4911  $matches = [];
4912  preg_match( "/^([^|]+)(\\|(.*))?$/", $line, $matches );
4913  # Skip empty lines
4914  if ( count( $matches ) == 0 ) {
4915  continue;
4916  }
4917 
4918  if ( strpos( $matches[0], '%' ) !== false ) {
4919  $matches[1] = rawurldecode( $matches[1] );
4920  }
4922  if ( is_null( $title ) ) {
4923  # Bogus title. Ignore these so we don't bomb out later.
4924  continue;
4925  }
4926 
4927  # We need to get what handler the file uses, to figure out parameters.
4928  # Note, a hook can overide the file name, and chose an entirely different
4929  # file (which potentially could be of a different type and have different handler).
4930  $options = [];
4931  $descQuery = false;
4932  Hooks::run( 'BeforeParserFetchFileAndTitle',
4933  [ $this, $title, &$options, &$descQuery ] );
4934  # Don't register it now, as ImageGallery does that later.
4935  $file = $this->fetchFileNoRegister( $title, $options );
4936  $handler = $file ? $file->getHandler() : false;
4937 
4938  $paramMap = [
4939  'img_alt' => 'gallery-internal-alt',
4940  'img_link' => 'gallery-internal-link',
4941  ];
4942  if ( $handler ) {
4943  $paramMap = $paramMap + $handler->getParamMap();
4944  // We don't want people to specify per-image widths.
4945  // Additionally the width parameter would need special casing anyhow.
4946  unset( $paramMap['img_width'] );
4947  }
4948 
4949  $mwArray = new MagicWordArray( array_keys( $paramMap ) );
4950 
4951  $label = '';
4952  $alt = '';
4953  $link = '';
4954  $handlerOptions = [];
4955  if ( isset( $matches[3] ) ) {
4956  // look for an |alt= definition while trying not to break existing
4957  // captions with multiple pipes (|) in it, until a more sensible grammar
4958  // is defined for images in galleries
4959 
4960  // FIXME: Doing recursiveTagParse at this stage, and the trim before
4961  // splitting on '|' is a bit odd, and different from makeImage.
4962  $matches[3] = $this->recursiveTagParse( trim( $matches[3] ) );
4963  $parameterMatches = StringUtils::explode( '|', $matches[3] );
4964 
4965  foreach ( $parameterMatches as $parameterMatch ) {
4966  list( $magicName, $match ) = $mwArray->matchVariableStartToEnd( $parameterMatch );
4967  if ( $magicName ) {
4968  $paramName = $paramMap[$magicName];
4969 
4970  switch ( $paramName ) {
4971  case 'gallery-internal-alt':
4972  $alt = $this->stripAltText( $match, false );
4973  break;
4974  case 'gallery-internal-link':
4975  $linkValue = strip_tags( $this->replaceLinkHoldersText( $match ) );
4976  $chars = self::EXT_LINK_URL_CLASS;
4977  $addr = self::EXT_LINK_ADDR;
4978  $prots = $this->mUrlProtocols;
4979  // check to see if link matches an absolute url, if not then it must be a wiki link.
4980  if ( preg_match( "/^($prots)$addr$chars*$/u", $linkValue ) ) {
4981  $link = $linkValue;
4982  } else {
4983  $localLinkTitle = Title::newFromText( $linkValue );
4984  if ( $localLinkTitle !== null ) {
4985  $link = $localLinkTitle->getLinkURL();
4986  }
4987  }
4988  break;
4989  default:
4990  // Must be a handler specific parameter.
4991  if ( $handler->validateParam( $paramName, $match ) ) {
4992  $handlerOptions[$paramName] = $match;
4993  } else {
4994  // Guess not, consider it as caption.
4995  wfDebug( "$parameterMatch failed parameter validation\n" );
4996  $label = '|' . $parameterMatch;
4997  }
4998  }
4999 
5000  } else {
5001  // Last pipe wins.
5002  $label = '|' . $parameterMatch;
5003  }
5004  }
5005  // Remove the pipe.
5006  $label = substr( $label, 1 );
5007  }
5008 
5009  $ig->add( $title, $label, $alt, $link, $handlerOptions );
5010  }
5011  $html = $ig->toHTML();
5012  Hooks::run( 'AfterParserFetchFileAndTitle', [ $this, $ig, &$html ] );
5013  return $html;
5014  }
5015 
5020  public function getImageParams( $handler ) {
5021  if ( $handler ) {
5022  $handlerClass = get_class( $handler );
5023  } else {
5024  $handlerClass = '';
5025  }
5026  if ( !isset( $this->mImageParams[$handlerClass] ) ) {
5027  # Initialise static lists
5028  static $internalParamNames = [
5029  'horizAlign' => [ 'left', 'right', 'center', 'none' ],
5030  'vertAlign' => [ 'baseline', 'sub', 'super', 'top', 'text-top', 'middle',
5031  'bottom', 'text-bottom' ],
5032  'frame' => [ 'thumbnail', 'manualthumb', 'framed', 'frameless',
5033  'upright', 'border', 'link', 'alt', 'class' ],
5034  ];
5035  static $internalParamMap;
5036  if ( !$internalParamMap ) {
5037  $internalParamMap = [];
5038  foreach ( $internalParamNames as $type => $names ) {
5039  foreach ( $names as $name ) {
5040  $magicName = str_replace( '-', '_', "img_$name" );
5041  $internalParamMap[$magicName] = [ $type, $name ];
5042  }
5043  }
5044  }
5045 
5046  # Add handler params
5047  $paramMap = $internalParamMap;
5048  if ( $handler ) {
5049  $handlerParamMap = $handler->getParamMap();
5050  foreach ( $handlerParamMap as $magic => $paramName ) {
5051  $paramMap[$magic] = [ 'handler', $paramName ];
5052  }
5053  }
5054  $this->mImageParams[$handlerClass] = $paramMap;
5055  $this->mImageParamsMagicArray[$handlerClass] = new MagicWordArray( array_keys( $paramMap ) );
5056  }
5057  return [ $this->mImageParams[$handlerClass], $this->mImageParamsMagicArray[$handlerClass] ];
5058  }
5059 
5068  public function makeImage( $title, $options, $holders = false ) {
5069  # Check if the options text is of the form "options|alt text"
5070  # Options are:
5071  # * thumbnail make a thumbnail with enlarge-icon and caption, alignment depends on lang
5072  # * left no resizing, just left align. label is used for alt= only
5073  # * right same, but right aligned
5074  # * none same, but not aligned
5075  # * ___px scale to ___ pixels width, no aligning. e.g. use in taxobox
5076  # * center center the image
5077  # * frame Keep original image size, no magnify-button.
5078  # * framed Same as "frame"
5079  # * frameless like 'thumb' but without a frame. Keeps user preferences for width
5080  # * upright reduce width for upright images, rounded to full __0 px
5081  # * border draw a 1px border around the image
5082  # * alt Text for HTML alt attribute (defaults to empty)
5083  # * class Set a class for img node
5084  # * link Set the target of the image link. Can be external, interwiki, or local
5085  # vertical-align values (no % or length right now):
5086  # * baseline
5087  # * sub
5088  # * super
5089  # * top
5090  # * text-top
5091  # * middle
5092  # * bottom
5093  # * text-bottom
5094 
5095  $parts = StringUtils::explode( "|", $options );
5096 
5097  # Give extensions a chance to select the file revision for us
5098  $options = [];
5099  $descQuery = false;
5100  Hooks::run( 'BeforeParserFetchFileAndTitle',
5101  [ $this, $title, &$options, &$descQuery ] );
5102  # Fetch and register the file (file title may be different via hooks)
5103  list( $file, $title ) = $this->fetchFileAndTitle( $title, $options );
5104 
5105  # Get parameter map
5106  $handler = $file ? $file->getHandler() : false;
5107 
5108  list( $paramMap, $mwArray ) = $this->getImageParams( $handler );
5109 
5110  if ( !$file ) {
5111  $this->addTrackingCategory( 'broken-file-category' );
5112  }
5113 
5114  # Process the input parameters
5115  $caption = '';
5116  $params = [ 'frame' => [], 'handler' => [],
5117  'horizAlign' => [], 'vertAlign' => [] ];
5118  $seenformat = false;
5119  foreach ( $parts as $part ) {
5120  $part = trim( $part );
5121  list( $magicName, $value ) = $mwArray->matchVariableStartToEnd( $part );
5122  $validated = false;
5123  if ( isset( $paramMap[$magicName] ) ) {
5124  list( $type, $paramName ) = $paramMap[$magicName];
5125 
5126  # Special case; width and height come in one variable together
5127  if ( $type === 'handler' && $paramName === 'width' ) {
5128  $parsedWidthParam = $this->parseWidthParam( $value );
5129  if ( isset( $parsedWidthParam['width'] ) ) {
5130  $width = $parsedWidthParam['width'];
5131  if ( $handler->validateParam( 'width', $width ) ) {
5132  $params[$type]['width'] = $width;
5133  $validated = true;
5134  }
5135  }
5136  if ( isset( $parsedWidthParam['height'] ) ) {
5137  $height = $parsedWidthParam['height'];
5138  if ( $handler->validateParam( 'height', $height ) ) {
5139  $params[$type]['height'] = $height;
5140  $validated = true;
5141  }
5142  }
5143  # else no validation -- bug 13436
5144  } else {
5145  if ( $type === 'handler' ) {
5146  # Validate handler parameter
5147  $validated = $handler->validateParam( $paramName, $value );
5148  } else {
5149  # Validate internal parameters
5150  switch ( $paramName ) {
5151  case 'manualthumb':
5152  case 'alt':
5153  case 'class':
5154  # @todo FIXME: Possibly check validity here for
5155  # manualthumb? downstream behavior seems odd with
5156  # missing manual thumbs.
5157  $validated = true;
5158  $value = $this->stripAltText( $value, $holders );
5159  break;
5160  case 'link':
5161  $chars = self::EXT_LINK_URL_CLASS;
5162  $addr = self::EXT_LINK_ADDR;
5163  $prots = $this->mUrlProtocols;
5164  if ( $value === '' ) {
5165  $paramName = 'no-link';
5166  $value = true;
5167  $validated = true;
5168  } elseif ( preg_match( "/^((?i)$prots)/", $value ) ) {
5169  if ( preg_match( "/^((?i)$prots)$addr$chars*$/u", $value, $m ) ) {
5170  $paramName = 'link-url';
5171  $this->mOutput->addExternalLink( $value );
5172  if ( $this->mOptions->getExternalLinkTarget() ) {
5173  $params[$type]['link-target'] = $this->mOptions->getExternalLinkTarget();
5174  }
5175  $validated = true;
5176  }
5177  } else {
5178  $linkTitle = Title::newFromText( $value );
5179  if ( $linkTitle ) {
5180  $paramName = 'link-title';
5181  $value = $linkTitle;
5182  $this->mOutput->addLink( $linkTitle );
5183  $validated = true;
5184  }
5185  }
5186  break;
5187  case 'frameless':
5188  case 'framed':
5189  case 'thumbnail':
5190  // use first appearing option, discard others.
5191  $validated = ! $seenformat;
5192  $seenformat = true;
5193  break;
5194  default:
5195  # Most other things appear to be empty or numeric...
5196  $validated = ( $value === false || is_numeric( trim( $value ) ) );
5197  }
5198  }
5199 
5200  if ( $validated ) {
5201  $params[$type][$paramName] = $value;
5202  }
5203  }
5204  }
5205  if ( !$validated ) {
5206  $caption = $part;
5207  }
5208  }
5209 
5210  # Process alignment parameters
5211  if ( $params['horizAlign'] ) {
5212  $params['frame']['align'] = key( $params['horizAlign'] );
5213  }
5214  if ( $params['vertAlign'] ) {
5215  $params['frame']['valign'] = key( $params['vertAlign'] );
5216  }
5217 
5218  $params['frame']['caption'] = $caption;
5219 
5220  # Will the image be presented in a frame, with the caption below?
5221  $imageIsFramed = isset( $params['frame']['frame'] )
5222  || isset( $params['frame']['framed'] )
5223  || isset( $params['frame']['thumbnail'] )
5224  || isset( $params['frame']['manualthumb'] );
5225 
5226  # In the old days, [[Image:Foo|text...]] would set alt text. Later it
5227  # came to also set the caption, ordinary text after the image -- which
5228  # makes no sense, because that just repeats the text multiple times in
5229  # screen readers. It *also* came to set the title attribute.
5230  # Now that we have an alt attribute, we should not set the alt text to
5231  # equal the caption: that's worse than useless, it just repeats the
5232  # text. This is the framed/thumbnail case. If there's no caption, we
5233  # use the unnamed parameter for alt text as well, just for the time be-
5234  # ing, if the unnamed param is set and the alt param is not.
5235  # For the future, we need to figure out if we want to tweak this more,
5236  # e.g., introducing a title= parameter for the title; ignoring the un-
5237  # named parameter entirely for images without a caption; adding an ex-
5238  # plicit caption= parameter and preserving the old magic unnamed para-
5239  # meter for BC; ...
5240  if ( $imageIsFramed ) { # Framed image
5241  if ( $caption === '' && !isset( $params['frame']['alt'] ) ) {
5242  # No caption or alt text, add the filename as the alt text so
5243  # that screen readers at least get some description of the image
5244  $params['frame']['alt'] = $title->getText();
5245  }
5246  # Do not set $params['frame']['title'] because tooltips don't make sense
5247  # for framed images
5248  } else { # Inline image
5249  if ( !isset( $params['frame']['alt'] ) ) {
5250  # No alt text, use the "caption" for the alt text
5251  if ( $caption !== '' ) {
5252  $params['frame']['alt'] = $this->stripAltText( $caption, $holders );
5253  } else {
5254  # No caption, fall back to using the filename for the
5255  # alt text
5256  $params['frame']['alt'] = $title->getText();
5257  }
5258  }
5259  # Use the "caption" for the tooltip text
5260  $params['frame']['title'] = $this->stripAltText( $caption, $holders );
5261  }
5262 
5263  Hooks::run( 'ParserMakeImageParams', [ $title, $file, &$params, $this ] );
5264 
5265  # Linker does the rest
5266  $time = isset( $options['time'] ) ? $options['time'] : false;
5267  $ret = Linker::makeImageLink( $this, $title, $file, $params['frame'], $params['handler'],
5268  $time, $descQuery, $this->mOptions->getThumbSize() );
5269 
5270  # Give the handler a chance to modify the parser object
5271  if ( $handler ) {
5272  $handler->parserTransformHook( $this, $file );
5273  }
5274 
5275  return $ret;
5276  }
5277 
5283  protected function stripAltText( $caption, $holders ) {
5284  # Strip bad stuff out of the title (tooltip). We can't just use
5285  # replaceLinkHoldersText() here, because if this function is called
5286  # from replaceInternalLinks2(), mLinkHolders won't be up-to-date.
5287  if ( $holders ) {
5288  $tooltip = $holders->replaceText( $caption );
5289  } else {
5290  $tooltip = $this->replaceLinkHoldersText( $caption );
5291  }
5292 
5293  # make sure there are no placeholders in thumbnail attributes
5294  # that are later expanded to html- so expand them now and
5295  # remove the tags
5296  $tooltip = $this->mStripState->unstripBoth( $tooltip );
5297  $tooltip = Sanitizer::stripAllTags( $tooltip );
5298 
5299  return $tooltip;
5300  }
5301 
5306  public function disableCache() {
5307  wfDebug( "Parser output marked as uncacheable.\n" );
5308  if ( !$this->mOutput ) {
5309  throw new MWException( __METHOD__ .
5310  " can only be called when actually parsing something" );
5311  }
5312  $this->mOutput->updateCacheExpiry( 0 ); // new style, for consistency
5313  }
5314 
5323  public function attributeStripCallback( &$text, $frame = false ) {
5324  $text = $this->replaceVariables( $text, $frame );
5325  $text = $this->mStripState->unstripBoth( $text );
5326  return $text;
5327  }
5328 
5334  public function getTags() {
5335  return array_merge(
5336  array_keys( $this->mTransparentTagHooks ),
5337  array_keys( $this->mTagHooks ),
5338  array_keys( $this->mFunctionTagHooks )
5339  );
5340  }
5341 
5352  public function replaceTransparentTags( $text ) {
5353  $matches = [];
5354  $elements = array_keys( $this->mTransparentTagHooks );
5355  $text = self::extractTagsAndParams( $elements, $text, $matches );
5356  $replacements = [];
5357 
5358  foreach ( $matches as $marker => $data ) {
5359  list( $element, $content, $params, $tag ) = $data;
5360  $tagName = strtolower( $element );
5361  if ( isset( $this->mTransparentTagHooks[$tagName] ) ) {
5362  $output = call_user_func_array(
5363  $this->mTransparentTagHooks[$tagName],
5364  [ $content, $params, $this ]
5365  );
5366  } else {
5367  $output = $tag;
5368  }
5369  $replacements[$marker] = $output;
5370  }
5371  return strtr( $text, $replacements );
5372  }
5373 
5403  private function extractSections( $text, $sectionId, $mode, $newText = '' ) {
5404  global $wgTitle; # not generally used but removes an ugly failure mode
5405 
5406  $magicScopeVariable = $this->lock();
5407  $this->startParse( $wgTitle, new ParserOptions, self::OT_PLAIN, true );
5408  $outText = '';
5409  $frame = $this->getPreprocessor()->newFrame();
5410 
5411  # Process section extraction flags
5412  $flags = 0;
5413  $sectionParts = explode( '-', $sectionId );
5414  $sectionIndex = array_pop( $sectionParts );
5415  foreach ( $sectionParts as $part ) {
5416  if ( $part === 'T' ) {
5417  $flags |= self::PTD_FOR_INCLUSION;
5418  }
5419  }
5420 
5421  # Check for empty input
5422  if ( strval( $text ) === '' ) {
5423  # Only sections 0 and T-0 exist in an empty document
5424  if ( $sectionIndex == 0 ) {
5425  if ( $mode === 'get' ) {
5426  return '';
5427  } else {
5428  return $newText;
5429  }
5430  } else {
5431  if ( $mode === 'get' ) {
5432  return $newText;
5433  } else {
5434  return $text;
5435  }
5436  }
5437  }
5438 
5439  # Preprocess the text
5440  $root = $this->preprocessToDom( $text, $flags );
5441 
5442  # <h> nodes indicate section breaks
5443  # They can only occur at the top level, so we can find them by iterating the root's children
5444  $node = $root->getFirstChild();
5445 
5446  # Find the target section
5447  if ( $sectionIndex == 0 ) {
5448  # Section zero doesn't nest, level=big
5449  $targetLevel = 1000;
5450  } else {
5451  while ( $node ) {
5452  if ( $node->getName() === 'h' ) {
5453  $bits = $node->splitHeading();
5454  if ( $bits['i'] == $sectionIndex ) {
5455  $targetLevel = $bits['level'];
5456  break;
5457  }
5458  }
5459  if ( $mode === 'replace' ) {
5460  $outText .= $frame->expand( $node, PPFrame::RECOVER_ORIG );
5461  }
5462  $node = $node->getNextSibling();
5463  }
5464  }
5465 
5466  if ( !$node ) {
5467  # Not found
5468  if ( $mode === 'get' ) {
5469  return $newText;
5470  } else {
5471  return $text;
5472  }
5473  }
5474 
5475  # Find the end of the section, including nested sections
5476  do {
5477  if ( $node->getName() === 'h' ) {
5478  $bits = $node->splitHeading();
5479  $curLevel = $bits['level'];
5480  if ( $bits['i'] != $sectionIndex && $curLevel <= $targetLevel ) {
5481  break;
5482  }
5483  }
5484  if ( $mode === 'get' ) {
5485  $outText .= $frame->expand( $node, PPFrame::RECOVER_ORIG );
5486  }
5487  $node = $node->getNextSibling();
5488  } while ( $node );
5489 
5490  # Write out the remainder (in replace mode only)
5491  if ( $mode === 'replace' ) {
5492  # Output the replacement text
5493  # Add two newlines on -- trailing whitespace in $newText is conventionally
5494  # stripped by the editor, so we need both newlines to restore the paragraph gap
5495  # Only add trailing whitespace if there is newText
5496  if ( $newText != "" ) {
5497  $outText .= $newText . "\n\n";
5498  }
5499 
5500  while ( $node ) {
5501  $outText .= $frame->expand( $node, PPFrame::RECOVER_ORIG );
5502  $node = $node->getNextSibling();
5503  }
5504  }
5505 
5506  if ( is_string( $outText ) ) {
5507  # Re-insert stripped tags
5508  $outText = rtrim( $this->mStripState->unstripBoth( $outText ) );
5509  }
5510 
5511  return $outText;
5512  }
5513 
5528  public function getSection( $text, $sectionId, $defaultText = '' ) {
5529  return $this->extractSections( $text, $sectionId, 'get', $defaultText );
5530  }
5531 
5544  public function replaceSection( $oldText, $sectionId, $newText ) {
5545  return $this->extractSections( $oldText, $sectionId, 'replace', $newText );
5546  }
5547 
5553  public function getRevisionId() {
5554  return $this->mRevisionId;
5555  }
5556 
5563  public function getRevisionObject() {
5564  if ( !is_null( $this->mRevisionObject ) ) {
5565  return $this->mRevisionObject;
5566  }
5567  if ( is_null( $this->mRevisionId ) ) {
5568  return null;
5569  }
5570 
5571  $rev = call_user_func(
5572  $this->mOptions->getCurrentRevisionCallback(), $this->getTitle(), $this
5573  );
5574 
5575  # If the parse is for a new revision, then the callback should have
5576  # already been set to force the object and should match mRevisionId.
5577  # If not, try to fetch by mRevisionId for sanity.
5578  if ( $rev && $rev->getId() != $this->mRevisionId ) {
5579  $rev = Revision::newFromId( $this->mRevisionId );
5580  }
5581 
5582  $this->mRevisionObject = $rev;
5583 
5584  return $this->mRevisionObject;
5585  }
5586 
5592  public function getRevisionTimestamp() {
5593  if ( is_null( $this->mRevisionTimestamp ) ) {
5595 
5596  $revObject = $this->getRevisionObject();
5597  $timestamp = $revObject ? $revObject->getTimestamp() : wfTimestampNow();
5598 
5599  # The cryptic '' timezone parameter tells to use the site-default
5600  # timezone offset instead of the user settings.
5601  # Since this value will be saved into the parser cache, served
5602  # to other users, and potentially even used inside links and such,
5603  # it needs to be consistent for all visitors.
5604  $this->mRevisionTimestamp = $wgContLang->userAdjust( $timestamp, '' );
5605 
5606  }
5607  return $this->mRevisionTimestamp;
5608  }
5609 
5615  public function getRevisionUser() {
5616  if ( is_null( $this->mRevisionUser ) ) {
5617  $revObject = $this->getRevisionObject();
5618 
5619  # if this template is subst: the revision id will be blank,
5620  # so just use the current user's name
5621  if ( $revObject ) {
5622  $this->mRevisionUser = $revObject->getUserText();
5623  } elseif ( $this->ot['wiki'] || $this->mOptions->getIsPreview() ) {
5624  $this->mRevisionUser = $this->getUser()->getName();
5625  }
5626  }
5627  return $this->mRevisionUser;
5628  }
5629 
5635  public function getRevisionSize() {
5636  if ( is_null( $this->mRevisionSize ) ) {
5637  $revObject = $this->getRevisionObject();
5638 
5639  # if this variable is subst: the revision id will be blank,
5640  # so just use the parser input size, because the own substituation
5641  # will change the size.
5642  if ( $revObject ) {
5643  $this->mRevisionSize = $revObject->getSize();
5644  } elseif ( $this->ot['wiki'] || $this->mOptions->getIsPreview() ) {
5645  $this->mRevisionSize = $this->mInputSize;
5646  }
5647  }
5648  return $this->mRevisionSize;
5649  }
5650 
5656  public function setDefaultSort( $sort ) {
5657  $this->mDefaultSort = $sort;
5658  $this->mOutput->setProperty( 'defaultsort', $sort );
5659  }
5660 
5671  public function getDefaultSort() {
5672  if ( $this->mDefaultSort !== false ) {
5673  return $this->mDefaultSort;
5674  } else {
5675  return '';
5676  }
5677  }
5678 
5685  public function getCustomDefaultSort() {
5686  return $this->mDefaultSort;
5687  }
5688 
5698  public function guessSectionNameFromWikiText( $text ) {
5699  # Strip out wikitext links(they break the anchor)
5700  $text = $this->stripSectionName( $text );
5702  return '#' . Sanitizer::escapeId( $text, 'noninitial' );
5703  }
5704 
5713  public function guessLegacySectionNameFromWikiText( $text ) {
5714  # Strip out wikitext links(they break the anchor)
5715  $text = $this->stripSectionName( $text );
5717  return '#' . Sanitizer::escapeId( $text, [ 'noninitial', 'legacy' ] );
5718  }
5719 
5734  public function stripSectionName( $text ) {
5735  # Strip internal link markup
5736  $text = preg_replace( '/\[\[:?([^[|]+)\|([^[]+)\]\]/', '$2', $text );
5737  $text = preg_replace( '/\[\[:?([^[]+)\|?\]\]/', '$1', $text );
5738 
5739  # Strip external link markup
5740  # @todo FIXME: Not tolerant to blank link text
5741  # I.E. [https://www.mediawiki.org] will render as [1] or something depending
5742  # on how many empty links there are on the page - need to figure that out.
5743  $text = preg_replace( '/\[(?i:' . $this->mUrlProtocols . ')([^ ]+?) ([^[]+)\]/', '$2', $text );
5744 
5745  # Parse wikitext quotes (italics & bold)
5746  $text = $this->doQuotes( $text );
5747 
5748  # Strip HTML tags
5749  $text = StringUtils::delimiterReplace( '<', '>', '', $text );
5750  return $text;
5751  }
5752 
5763  public function testSrvus( $text, Title $title, ParserOptions $options,
5764  $outputType = self::OT_HTML
5765  ) {
5766  $magicScopeVariable = $this->lock();
5767  $this->startParse( $title, $options, $outputType, true );
5768 
5769  $text = $this->replaceVariables( $text );
5770  $text = $this->mStripState->unstripBoth( $text );
5771  $text = Sanitizer::removeHTMLtags( $text );
5772  return $text;
5773  }
5774 
5781  public function testPst( $text, Title $title, ParserOptions $options ) {
5782  return $this->preSaveTransform( $text, $title, $options->getUser(), $options );
5783  }
5784 
5791  public function testPreprocess( $text, Title $title, ParserOptions $options ) {
5792  return $this->testSrvus( $text, $title, $options, self::OT_PREPROCESS );
5793  }
5794 
5811  public function markerSkipCallback( $s, $callback ) {
5812  $i = 0;
5813  $out = '';
5814  while ( $i < strlen( $s ) ) {
5815  $markerStart = strpos( $s, self::MARKER_PREFIX, $i );
5816  if ( $markerStart === false ) {
5817  $out .= call_user_func( $callback, substr( $s, $i ) );
5818  break;
5819  } else {
5820  $out .= call_user_func( $callback, substr( $s, $i, $markerStart - $i ) );
5821  $markerEnd = strpos( $s, self::MARKER_SUFFIX, $markerStart );
5822  if ( $markerEnd === false ) {
5823  $out .= substr( $s, $markerStart );
5824  break;
5825  } else {
5826  $markerEnd += strlen( self::MARKER_SUFFIX );
5827  $out .= substr( $s, $markerStart, $markerEnd - $markerStart );
5828  $i = $markerEnd;
5829  }
5830  }
5831  }
5832  return $out;
5833  }
5834 
5841  public function killMarkers( $text ) {
5842  return $this->mStripState->killMarkers( $text );
5843  }
5844 
5861  public function serializeHalfParsedText( $text ) {
5862  $data = [
5863  'text' => $text,
5864  'version' => self::HALF_PARSED_VERSION,
5865  'stripState' => $this->mStripState->getSubState( $text ),
5866  'linkHolders' => $this->mLinkHolders->getSubArray( $text )
5867  ];
5868  return $data;
5869  }
5870 
5886  public function unserializeHalfParsedText( $data ) {
5887  if ( !isset( $data['version'] ) || $data['version'] != self::HALF_PARSED_VERSION ) {
5888  throw new MWException( __METHOD__ . ': invalid version' );
5889  }
5890 
5891  # First, extract the strip state.
5892  $texts = [ $data['text'] ];
5893  $texts = $this->mStripState->merge( $data['stripState'], $texts );
5894 
5895  # Now renumber links
5896  $texts = $this->mLinkHolders->mergeForeign( $data['linkHolders'], $texts );
5897 
5898  # Should be good to go.
5899  return $texts[0];
5900  }
5901 
5911  public function isValidHalfParsedText( $data ) {
5912  return isset( $data['version'] ) && $data['version'] == self::HALF_PARSED_VERSION;
5913  }
5914 
5923  public function parseWidthParam( $value ) {
5924  $parsedWidthParam = [];
5925  if ( $value === '' ) {
5926  return $parsedWidthParam;
5927  }
5928  $m = [];
5929  # (bug 13500) In both cases (width/height and width only),
5930  # permit trailing "px" for backward compatibility.
5931  if ( preg_match( '/^([0-9]*)x([0-9]*)\s*(?:px)?\s*$/', $value, $m ) ) {
5932  $width = intval( $m[1] );
5933  $height = intval( $m[2] );
5934  $parsedWidthParam['width'] = $width;
5935  $parsedWidthParam['height'] = $height;
5936  } elseif ( preg_match( '/^[0-9]*\s*(?:px)?\s*$/', $value ) ) {
5937  $width = intval( $value );
5938  $parsedWidthParam['width'] = $width;
5939  }
5940  return $parsedWidthParam;
5941  }
5942 
5952  protected function lock() {
5953  if ( $this->mInParse ) {
5954  throw new MWException( "Parser state cleared while parsing. "
5955  . "Did you call Parser::parse recursively?" );
5956  }
5957  $this->mInParse = true;
5958 
5959  $recursiveCheck = new ScopedCallback( function() {
5960  $this->mInParse = false;
5961  } );
5962 
5963  return $recursiveCheck;
5964  }
5965 
5976  public static function stripOuterParagraph( $html ) {
5977  $m = [];
5978  if ( preg_match( '/^<p>(.*)\n?<\/p>\n?$/sU', $html, $m ) ) {
5979  if ( strpos( $m[1], '</p>' ) === false ) {
5980  $html = $m[1];
5981  }
5982  }
5983 
5984  return $html;
5985  }
5986 
5997  public function getFreshParser() {
5998  global $wgParserConf;
5999  if ( $this->mInParse ) {
6000  return new $wgParserConf['class']( $wgParserConf );
6001  } else {
6002  return $this;
6003  }
6004  }
6005 
6012  public function enableOOUI() {
6014  $this->mOutput->setEnableOOUI( true );
6015  }
6016 }
getRevisionObject()
Get the revision object for $this->mRevisionId.
Definition: Parser.php:5563
setTitle($t)
Set the context title.
Definition: Parser.php:733
$mAutonumber
Definition: Parser.php:176
markerSkipCallback($s, $callback)
Call a callback function on all regions of the given text that are not inside strip markers...
Definition: Parser.php:5811
#define the
table suitable for use with IDatabase::select()
$mPPNodeCount
Definition: Parser.php:190
replaceInternalLinks2(&$s)
Process [[ ]] wikilinks (RIL)
Definition: Parser.php:2062
static getVariableIDs()
Get an array of parser variable IDs.
Definition: MagicWord.php:271
you don t have to do a grep find to see where the $wgReverseTitle variable is used
Definition: hooks.txt:117
const MARKER_PREFIX
Definition: Parser.php:133
deferred txt A few of the database updates required by various functions here can be deferred until after the result page is displayed to the user For updating the view updating the linked to tables after a etc PHP does not yet have any way to tell the server to actually return and disconnect while still running these but it might have such a feature in the future We handle these by creating a deferred update object and putting those objects on a global then executing the whole list after the page is displayed We don t do anything smart like collating updates to the same table or such because the list is almost always going to have just one item on if that
Definition: deferred.txt:11
isValidHalfParsedText($data)
Returns true if the given array, presumed to be generated by serializeHalfParsedText(), is compatible with the current version of the parser.
Definition: Parser.php:5911
null means default in associative array form
Definition: hooks.txt:1795
null means default in associative array with keys and values unescaped Should be merged with default with a value of false meaning to suppress the attribute in associative array with keys and values unescaped noclasses just before the function returns a value If you return an< a > element with HTML attributes $attribs and contents $html will be returned If you return $ret will be returned and may include noclasses & $html
Definition: hooks.txt:1795
static tocLineEnd()
End a Table Of Contents line.
Definition: Linker.php:1630
deferred txt A few of the database updates required by various functions here can be deferred until after the result page is displayed to the user For updating the view updating the linked to tables after a etc PHP does not yet have any way to tell the server to actually return and disconnect while still running these but it might have such a feature in the future We handle these by creating a deferred update object and putting those objects on a global list
Definition: deferred.txt:11
getSection($text, $sectionId, $defaultText= '')
This function returns the text of a section, specified by a number ($section).
Definition: Parser.php:5528
static decodeTagAttributes($text)
Return an associative array of attribute names and values from a partial tag string.
Definition: Sanitizer.php:1248
$mTplRedirCache
Definition: Parser.php:192
killMarkers($text)
Remove any strip markers found in the given text.
Definition: Parser.php:5841
wfGetDB($db, $groups=[], $wiki=false)
Get a Database object.
static tocList($toc, $lang=false)
Wraps the TOC in a table and provides the hide/collapse javascript.
Definition: Linker.php:1642
LinkRenderer $mLinkRenderer
Definition: Parser.php:256
fetchTemplateAndTitle($title)
Fetch the unparsed text of a template and register a reference to it.
Definition: Parser.php:3471
this hook is for auditing only or null if authentication failed before getting that far or null if we can t even determine that probably a stub it is not rendered in wiki pages or galleries in category pages allow injecting custom HTML after the section Any uses of the hook need to handle escaping see BaseTemplate::getToolbox and BaseTemplate::makeListItem for details on the format of individual items inside of this array or by returning and letting standard HTTP rendering take place modifiable or by returning false and taking over the output $out
Definition: hooks.txt:758
getRevisionUser()
Get the name of the user that edited the last revision.
Definition: Parser.php:5615
setFunctionTagHook($tag, $callback, $flags)
Create a tag function, e.g.
Definition: Parser.php:4811
the array() calling protocol came about after MediaWiki 1.4rc1.
stripSectionName($text)
Strips a text string of wikitext for use in a section anchor.
Definition: Parser.php:5734
null for the local wiki Added should default to null in handler for backwards compatibility add a value to it if you want to add a cookie that have to vary cache options can modify $query
Definition: hooks.txt:1414
const OT_PREPROCESS
Definition: Defines.php:228
$mDoubleUnderscores
Definition: Parser.php:192
magic word the default is to use $key to get the and $key value or $key value text $key value html to format the value $key
Definition: hooks.txt:2350
Group all the pieces relevant to the context of a request into one instance.
getPreloadText($text, Title $title, ParserOptions $options, $params=[])
Process the wikitext for the "?preload=" feature.
Definition: Parser.php:681
$context
Definition: load.php:43
validateSig($text)
Check that the user's signature contains no bad XML.
Definition: Parser.php:4525
MapCacheLRU null $currentRevisionCache
Definition: Parser.php:242
$wgSitename
Name of the site.
renderImageGallery($text, $params)
Renders an image gallery from a text with one line per image.
Definition: Parser.php:4862
recursivePreprocess($text, $frame=false)
Recursive parser entry point that can be called from an extension tag hook.
Definition: Parser.php:662
replaceExternalLinks($text)
Replace external links (REL)
Definition: Parser.php:1789
static isNonincludable($index)
It is not possible to use pages from this namespace as template?
nextLinkID()
Definition: Parser.php:822
const SPACE_NOT_NL
Definition: Parser.php:102
static replaceUnusualEscapes($url)
Replace unusual escape codes in a URL with their equivalent characters.
Definition: Parser.php:1917
getImageParams($handler)
Definition: Parser.php:5020
Apache License January AND DISTRIBUTION Definitions License shall mean the terms and conditions for use
doHeadings($text)
Parse headers and return html.
Definition: Parser.php:1568
static getTitleFor($name, $subpage=false, $fragment= '')
Get a localised Title object for a specified special page name.
Definition: SpecialPage.php:75
const OT_PLAIN
Definition: Parser.php:113
getTags()
Accessor.
Definition: Parser.php:5334
static isWellFormedXmlFragment($text)
Check if a string is a well-formed XML fragment.
Definition: Xml.php:735
const OT_WIKI
Definition: Parser.php:110
div flags Integer display flags(NO_ACTION_LINK, NO_EXTRA_USER_LINKS) 'LogException'returning false will NOT prevent logging $e
Definition: hooks.txt:1959
fetchFileAndTitle($title, $options=[])
Fetch a file and its title and register a reference to it.
Definition: Parser.php:3613
User $mUser
Definition: Parser.php:199
initialiseVariables()
initialise the magic variables (like CURRENTMONTHNAME) and substitution modifiers ...
Definition: Parser.php:2808
static isEnabled()
Definition: MWTidy.php:92
Set options of the Parser.
static tidy($text)
Interface with html tidy.
Definition: MWTidy.php:45
getFunctionHooks()
Get all registered function hook identifiers.
Definition: Parser.php:4797
globals txt Globals are evil The original MediaWiki code relied on globals for processing context far too often MediaWiki development since then has been a story of slowly moving context out of global variables and into objects Storing processing context in object member variables allows those objects to be reused in a much more flexible way Consider the elegance of
database rows
Definition: globals.txt:10
wfHostname()
Fetch server name for use in error reporting etc.
getFunctionLang()
Get a language object for use in parser functions such as {{FORMATNUM:}}.
Definition: Parser.php:837
processing should stop and the error should be shown to the user * false
Definition: hooks.txt:189
argSubstitution($piece, $frame)
Triple brace replacement – used for template arguments.
Definition: Parser.php:3716
testSrvus($text, Title $title, ParserOptions $options, $outputType=self::OT_HTML)
strip/replaceVariables/unstrip for preprocessor regression testing
Definition: Parser.php:5763
uniqPrefix()
Accessor for mUniqPrefix.
Definition: Parser.php:723
const TOC_START
Definition: Parser.php:136
Title($x=null)
Accessor/mutator for the Title object.
Definition: Parser.php:761
SectionProfiler $mProfiler
Definition: Parser.php:251
$wgEnableScaryTranscluding
Enable interwiki transcluding.
$sort
fetchFileNoRegister($title, $options=[])
Helper function for fetchFileAndTitle.
Definition: Parser.php:3638
null for the local wiki Added in
Definition: hooks.txt:1414
There are three types of nodes:
$mHeadings
Definition: Parser.php:192
$value
clearTagHooks()
Remove all tag hooks.
Definition: Parser.php:4710
static makeSelfLinkObj($nt, $html= '', $query= '', $trail= '', $prefix= '')
Make appropriate markup for a link to the current article.
Definition: Linker.php:273
const NS_SPECIAL
Definition: Defines.php:58
clearState()
Clear Parser state.
Definition: Parser.php:340
__construct($conf=[])
Definition: Parser.php:261
const EXT_LINK_ADDR
Definition: Parser.php:94
$mFirstCall
Definition: Parser.php:151
interwikiTransclude($title, $action)
Transclude an interwiki link.
Definition: Parser.php:3657
pstPass2($text, $user)
Pre-save transform helper function.
Definition: Parser.php:4397
guessLegacySectionNameFromWikiText($text)
Same as guessSectionNameFromWikiText(), but produces legacy anchors instead.
Definition: Parser.php:5713
injection txt This is an overview of how MediaWiki makes use of dependency injection The design described here grew from the discussion of RFC T384 The term dependency this means that anything an object needs to operate should be injected from the the object itself should only know narrow no concrete implementation of the logic it relies on The requirement to inject everything typically results in an architecture that based on two main types of and essentially stateless service objects that use other service objects to operate on the value objects As of the beginning MediaWiki is only starting to use the DI approach Much of the code still relies on global state or direct resulting in a highly cyclical dependency MediaWikiServices
Definition: injection.txt:23
wfUrlProtocolsWithoutProtRel()
Like wfUrlProtocols(), but excludes '//' from the protocol list.
Options($x=null)
Accessor/mutator for the ParserOptions object.
Definition: Parser.php:815
it s the revision text itself In either if gzip is the revision text is gzipped $flags
Definition: hooks.txt:2577
serializeHalfParsedText($text)
Save the parser state required to convert the given half-parsed text to HTML.
Definition: Parser.php:5861
replaceLinkHolders(&$text, $options=0)
Replace "<!--LINK-->" link placeholders with actual links, in the buffer Placeholders created in Link...
Definition: Parser.php:4834
static activeUsers()
Definition: SiteStats.php:161
$mLinkID
Definition: Parser.php:189
doQuotes($text)
Helper function for doAllQuotes()
Definition: Parser.php:1601
preprocessToDom($text, $flags=0)
Preprocess some wikitext and return the document tree.
Definition: Parser.php:2838
limitationWarn($limitationType, $current= '', $max= '')
Warn the user when a parser limitation is reached Will warn at most once the user per limitation type...
Definition: Parser.php:2960
static cleanUrl($url)
Definition: Sanitizer.php:1817
wfUrlencode($s)
We want some things to be included as literal characters in our title URLs for prettiness, which urlencode encodes by default.
static newFromText($text, $defaultNamespace=NS_MAIN)
Create a new Title from text, such as what one would find in a link.
Definition: Title.php:256
$mGeneratedPPNodeCount
Definition: Parser.php:190
Represents a title within MediaWiki.
Definition: Title.php:36
static getRandomString()
Get a random string.
Definition: Parser.php:702
$mRevisionId
Definition: Parser.php:216
static stripAllTags($text)
Take a fragment of (potentially invalid) HTML and return a version with any tags removed, encoded as plain text.
Definition: Sanitizer.php:1784
when a variable name is used in a it is silently declared as a new local masking the global
Definition: design.txt:93
doBlockLevels($text, $linestart)
Make lists from lines starting with ':', '*', '#', etc.
Definition: Parser.php:2441
$wgArticlePath
Definition: img_auth.php:45
OutputType($x=null)
Accessor/mutator for the output type.
Definition: Parser.php:787
getLinkRenderer()
Get a LinkRenderer instance to make links with.
Definition: Parser.php:904
const NS_TEMPLATE
Definition: Defines.php:79
static newFromTitle(LinkTarget $linkTarget, $id=0, $flags=0)
Load either the current, or a specified, revision that's attached to a given link target...
Definition: Revision.php:117
getVariableValue($index, $frame=false)
Return value of a magic variable (like PAGENAME)
Definition: Parser.php:2456
recursiveTagParse($text, $frame=false)
Half-parse wikitext to half-parsed HTML.
Definition: Parser.php:597
const NO_ARGS
magic word & $parser
Definition: hooks.txt:2350
MagicWordArray $mVariables
Definition: Parser.php:158
static validateTagAttributes($attribs, $element)
Take an array of attribute names and values and normalize or discard illegal values for the given ele...
Definition: Sanitizer.php:716
const SFH_NO_HASH
Definition: Parser.php:84
globals will be eliminated from MediaWiki replaced by an application object which would be passed to constructors Whether that would be an convenient solution remains to be but certainly PHP makes such object oriented programming models easier than they were in previous versions For the time being MediaWiki programmers will have to work in an environment with some global context At the time of globals were initialised on startup by MediaWiki of these were configuration which are documented in DefaultSettings php There is no comprehensive documentation for the remaining however some of the most important ones are listed below They are typically initialised either in index php or in Setup php For a description of the see design txt $wgTitle Title object created from the request URL $wgOut OutputPage object for HTTP response $wgUser User object for the user associated with the current request $wgLang Language object selected by user preferences $wgContLang Language object associated with the wiki being viewed $wgParser Parser object Parser extensions register their hooks here $wgRequest WebRequest object
Definition: globals.txt:25
wfRandomString($length=32)
Get a random string containing a number of pseudo-random hex characters.
$mForceTocPosition
Definition: Parser.php:194
preprocess($text, Title $title=null, ParserOptions $options, $revid=null, $frame=false)
Expand templates and variables in the text, producing valid, static wikitext.
Definition: Parser.php:638
static getCacheTTL($id)
Allow external reads of TTL array.
Definition: MagicWord.php:294
getRevisionId()
Get the ID of the revision we are parsing.
Definition: Parser.php:5553
const OT_PREPROCESS
Definition: Parser.php:111
see documentation in includes Linker php for Linker::makeImageLink & $time
Definition: hooks.txt:1608
maybeDoSubpageLink($target, &$text)
Handle link to subpage if necessary.
Definition: Parser.php:2429
$mFunctionSynonyms
Definition: Parser.php:143
If you want to remove the page from your watchlist later
replaceLinkHoldersText($text)
Replace "<!--LINK-->" link placeholders with plain text of links (not HTML-formatted).
Definition: Parser.php:4845
setLinkID($id)
Definition: Parser.php:829
$mOutputType
Definition: Parser.php:213
wfDebug($text, $dest= 'all', array $context=[])
Sends a line to the debug log if enabled or, optionally, to a comment in output.
$mDefaultStripList
Definition: Parser.php:146
static createAssocArgs($args)
Clean up argument array - refactored in 1.9 so parserfunctions can use it, too.
Definition: Parser.php:2912
$mExtLinkBracketedRegex
Definition: Parser.php:165
The index of the header message $result[1]=The index of the body text message $result[2 through n]=Parameters passed to body text message.Please note the header message cannot receive/use parameters. 'ImportHandleLogItemXMLTag':When parsing a XML tag in a log item.Return false to stop further processing of the tag $reader:XMLReader object $logInfo:Array of information 'ImportHandlePageXMLTag':When parsing a XML tag in a page.Return false to stop further processing of the tag $reader:XMLReader object &$pageInfo:Array of information 'ImportHandleRevisionXMLTag':When parsing a XML tag in a page revision.Return false to stop further processing of the tag $reader:XMLReader object $pageInfo:Array of page information $revisionInfo:Array of revision information 'ImportHandleToplevelXMLTag':When parsing a top level XML tag.Return false to stop further processing of the tag $reader:XMLReader object 'ImportHandleUploadXMLTag':When parsing a XML tag in a file upload.Return false to stop further processing of the tag $reader:XMLReader object $revisionInfo:Array of information 'ImportLogInterwikiLink':Hook to change the interwiki link used in log entries and edit summaries for transwiki imports.&$fullInterwikiPrefix:Interwiki prefix, may contain colons.&$pageTitle:String that contains page title. 'ImportSources':Called when reading from the $wgImportSources configuration variable.Can be used to lazy-load the import sources list.&$importSources:The value of $wgImportSources.Modify as necessary.See the comment in DefaultSettings.php for the detail of how to structure this array. 'InfoAction':When building information to display on the action=info page.$context:IContextSource object &$pageInfo:Array of information 'InitializeArticleMaybeRedirect':MediaWiki check to see if title is a redirect.&$title:Title object for the current page &$request:WebRequest &$ignoreRedirect:boolean to skip redirect check &$target:Title/string of redirect target &$article:Article object 'InternalParseBeforeLinks':during Parser's internalParse method before links but after nowiki/noinclude/includeonly/onlyinclude and other processings.&$parser:Parser object &$text:string containing partially parsed text &$stripState:Parser's internal StripState object 'InternalParseBeforeSanitize':during Parser's internalParse method just before the parser removes unwanted/dangerous HTML tags and after nowiki/noinclude/includeonly/onlyinclude and other processings.Ideal for syntax-extensions after template/parser function execution which respect nowiki and HTML-comments.&$parser:Parser object &$text:string containing partially parsed text &$stripState:Parser's internal StripState object 'InterwikiLoadPrefix':When resolving if a given prefix is an interwiki or not.Return true without providing an interwiki to continue interwiki search.$prefix:interwiki prefix we are looking for.&$iwData:output array describing the interwiki with keys iw_url, iw_local, iw_trans and optionally iw_api and iw_wikiid. 'InvalidateEmailComplete':Called after a user's email has been invalidated successfully.$user:user(object) whose email is being invalidated 'IRCLineURL':When constructing the URL to use in an IRC notification.Callee may modify $url and $query, URL will be constructed as $url.$query &$url:URL to index.php &$query:Query string $rc:RecentChange object that triggered url generation 'IsFileCacheable':Override the result of Article::isFileCacheable()(if true) &$article:article(object) being checked 'IsTrustedProxy':Override the result of IP::isTrustedProxy() &$ip:IP being check &$result:Change this value to override the result of IP::isTrustedProxy() 'IsUploadAllowedFromUrl':Override the result of UploadFromUrl::isAllowedUrl() $url:URL used to upload from &$allowed:Boolean indicating if uploading is allowed for given URL 'isValidEmailAddr':Override the result of Sanitizer::validateEmail(), for instance to return false if the domain name doesn't match your organization.$addr:The e-mail address entered by the user &$result:Set this and return false to override the internal checks 'isValidPassword':Override the result of User::isValidPassword() $password:The password entered by the user &$result:Set this and return false to override the internal checks $user:User the password is being validated for 'Language::getMessagesFileName':$code:The language code or the language we're looking for a messages file for &$file:The messages file path, you can override this to change the location. 'LanguageGetMagic':DEPRECATED!Use $magicWords in a file listed in $wgExtensionMessagesFiles instead.Use this to define synonyms of magic words depending of the language &$magicExtensions:associative array of magic words synonyms $lang:language code(string) 'LanguageGetNamespaces':Provide custom ordering for namespaces or remove namespaces.Do not use this hook to add namespaces.Use CanonicalNamespaces for that.&$namespaces:Array of namespaces indexed by their numbers 'LanguageGetSpecialPageAliases':DEPRECATED!Use $specialPageAliases in a file listed in $wgExtensionMessagesFiles instead.Use to define aliases of special pages names depending of the language &$specialPageAliases:associative array of magic words synonyms $lang:language code(string) 'LanguageGetTranslatedLanguageNames':Provide translated language names.&$names:array of language code=> language name $code:language of the preferred translations 'LanguageLinks':Manipulate a page's language links.This is called in various places to allow extensions to define the effective language links for a page.$title:The page's Title.&$links:Associative array mapping language codes to prefixed links of the form"language:title".&$linkFlags:Associative array mapping prefixed links to arrays of flags.Currently unused, but planned to provide support for marking individual language links in the UI, e.g.for featured articles. 'LanguageSelector':Hook to change the language selector available on a page.$out:The output page.$cssClassName:CSS class name of the language selector. 'LinkBegin':DEPRECATED!Use HtmlPageLinkRendererBegin instead.Used when generating internal and interwiki links in Linker::link(), before processing starts.Return false to skip default processing and return $ret.See documentation for Linker::link() for details on the expected meanings of parameters.$skin:the Skin object $target:the Title that the link is pointing to &$html:the contents that the< a > tag should have(raw HTML) $result
Definition: hooks.txt:1793
if($line===false) $args
Definition: cdb.php:64
the value to return A Title object or null for latest to be modified or replaced by the hook handler or if authentication is not possible after cache objects are set for highlighting & $link
Definition: hooks.txt:2610
static getLocalInstance($ts=false)
Get a timestamp instance in the server local timezone ($wgLocaltimezone)
$wgMaxSigChars
Maximum number of Unicode characters in signature.
static getDoubleUnderscoreArray()
Get a MagicWordArray of double-underscore entities.
Definition: MagicWord.php:307
static splitTrail($trail)
Split a link trail, return the "inside" portion and the remainder of the trail as a two-element array...
Definition: Linker.php:1717
getTemplateDom($title)
Get the semi-parsed DOM representation of a template with a given title, and its redirect destination...
Definition: Parser.php:3396
The User object encapsulates all of the user-specific settings (user_id, name, rights, email address, options, last login time).
Definition: User.php:47
static decodeCharReferences($text)
Decode any character references, numeric or named entities, in the text and return a UTF-8 string...
Definition: Sanitizer.php:1461
cleanSig($text, $parsing=false)
Clean up signature text.
Definition: Parser.php:4539
wfTimestamp($outputtype=TS_UNIX, $ts=0)
Get a timestamp string in one of various formats.
$wgNoFollowNsExceptions
Namespaces in which $wgNoFollowLinks doesn't apply.
static factory($mode=false, IContextSource $context=null)
Get a new image gallery.
$wgLanguageCode
Site language code.
Custom PHP profiler for parser/DB type section names that xhprof/xdebug can't handle.
static edits()
Definition: SiteStats.php:129
Class for asserting that a callback happens when an dummy object leaves scope.
$wgExtraInterlanguageLinkPrefixes
List of additional interwiki prefixes that should be treated as interlanguage links (i...
startExternalParse(Title $title=null, ParserOptions $options, $outputType, $clearState=true)
Set up some variables which are usually set up in parse() so that an external function can call some ...
Definition: Parser.php:4590
wfCgiToArray($query)
This is the logical opposite of wfArrayToCgi(): it accepts a query string as its argument and returns...
wfDebugLog($logGroup, $text, $dest= 'all', array $context=[])
Send a line to a supplementary debug log file, if configured, or main debug log if not...
static capturePath(Title $title, IContextSource $context)
Just like executePath() but will override global variables and execute the page in "inclusion" mode...
const NO_TEMPLATES
addTrackingCategory($msg)
Definition: Parser.php:3935
replaceInternalLinks($s)
Process [[ ]] wikilinks.
Definition: Parser.php:2049
$mVarCache
Definition: Parser.php:147
$wgStylePath
The URL path of the skins directory.
disableCache()
Set a flag in the output object indicating that the content is dynamic and shouldn't be cached...
Definition: Parser.php:5306
$mRevisionObject
Definition: Parser.php:215
static normalizeSectionNameWhitespace($section)
Normalizes whitespace in a section name, such as might be returned by Parser::stripSectionName(), for use in the id's that are used for section links.
Definition: Sanitizer.php:1342
internalParse($text, $isMain=true, $frame=false)
Helper function for parse() that transforms wiki markup into half-parsed HTML.
Definition: Parser.php:1237
Title $mTitle
Definition: Parser.php:212
__destruct()
Reduce memory usage to reduce the impact of circular references.
Definition: Parser.php:287
wfEscapeWikiText($text)
Escapes the given text so that it may be output using addWikiText() without any linking, formatting, etc.
bool $mInParse
Recursive call protection.
Definition: Parser.php:248
Some quick notes on the file repository architecture Functionality is
Definition: README:3
getRevisionTimestamp()
Get the timestamp associated with the current revision, adjusted for the default server-local timesta...
Definition: Parser.php:5592
Class that generates HTML links for pages.
static stripOuterParagraph($html)
Strip outer.
Definition: Parser.php:5976
static register($parser)
$mRevIdForTs
Definition: Parser.php:220
static singleton()
Get an instance of this class.
Definition: LinkCache.php:65
design txt This is a brief overview of the new design More thorough and up to date information is available on the documentation wiki at etc Handles the details of getting and saving to the user table of the and dealing with sessions and cookies OutputPage Encapsulates the entire HTML page that will be sent in response to any server request It is used by calling its functions to add in any and then calling but I prefer the flexibility This should also do the output encoding The system allocates a global one in $wgOut Title Represents the title of an and does all the work of translating among various forms such as plain database key
Definition: design.txt:25
static normalizeSubpageLink($contextTitle, $target, &$text)
Definition: Linker.php:1436
parseWidthParam($value)
Parsed a width param of imagelink like 300px or 200x300px.
Definition: Parser.php:5923
$mStripList
Definition: Parser.php:145
$mFunctionTagHooks
Definition: Parser.php:144
fetchScaryTemplateMaybeFromCache($url)
Definition: Parser.php:3676
const OT_PLAIN
Definition: Defines.php:230
$wgNoFollowLinks
If true, external URL links in wiki text will be given the rel="nofollow" attribute as a hint to sear...
fetchCurrentRevisionOfTitle($title)
Fetch the current revision of a given title.
Definition: Parser.php:3439
$mRevisionTimestamp
Definition: Parser.php:217
$mImageParams
Definition: Parser.php:148
stripAltText($caption, $holders)
Definition: Parser.php:5283
doAllQuotes($text)
Replace single quotes with HTML markup.
Definition: Parser.php:1584
static normalizeUrlComponent($component, $unsafe)
Definition: Parser.php:1967
if($limit) $timestamp
const VERSION
Update this version number when the ParserOutput format changes in an incompatible way...
Definition: Parser.php:75
this hook is for auditing only RecentChangesLinked and Watchlist RecentChangesLinked and Watchlist e g Watchlist removed from all revisions and log entries to which it was applied This gives extensions a chance to take it off their books as the deletion has already been partly carried out by this point or something similar the user will be unable to create the tag set and then return false from the hook function Ensure you consume the ChangeTagAfterDelete hook to carry out custom deletion actions as context called by AbstractContent::getParserOutput May be used to override the normal model specific rendering of page content as context as context $options
Definition: hooks.txt:1000
setHook($tag, $callback)
Create an HTML-style tag, e.g.
Definition: Parser.php:4665
const OT_WIKI
Definition: Defines.php:227
Preprocessor $mPreprocessor
Definition: Parser.php:169
getPreprocessor()
Get a preprocessor object.
Definition: Parser.php:890
This document is intended to provide useful advice for parties seeking to redistribute MediaWiki to end users It s targeted particularly at maintainers for Linux since it s been observed that distribution packages of MediaWiki often break We ve consistently had to recommend that users seeking support use official tarballs instead of their distribution s and this often solves whatever problem the user is having It would be nice if this could such and we might be restricted by PHP settings such as safe mode or open_basedir We cannot assume that the software even has read access anywhere useful Many shared hosts run all users web applications under the same so they can t rely on Unix and must forbid reads to even standard directories like tmp lest users read each others files We cannot assume that the user has the ability to install or run any programs not written as web accessible PHP scripts Since anything that works on cheap shared hosting will work if you have shell or root access MediaWiki s design is based around catering to the lowest common denominator Although we support higher end setups as the way many things work by default is tailored toward shared hosting These defaults are unconventional from the point of view of normal(non-web) applications--they might conflict with distributors'policies
static getInstance($ts=false)
Get a timestamp instance in GMT.
const NS_MEDIA
Definition: Defines.php:57
static singleton()
Get a RepoGroup instance.
Definition: RepoGroup.php:59
replaceVariables($text, $frame=false, $argsOnly=false)
Replace magic variables, templates, and template arguments with the appropriate text.
Definition: Parser.php:2883
const RECOVER_ORIG
wfMatchesDomainList($url, $domains)
Check whether a given URL has a domain that occurs in a given set of domains.
MediaWiki exception.
Definition: MWException.php:26
StripState $mStripState
Definition: Parser.php:181
$mDefaultSort
Definition: Parser.php:191
getUser()
Get a User object either from $this->mUser, if set, or from the ParserOptions object otherwise...
Definition: Parser.php:878
wfTimestampNow()
Convenience function; returns MediaWiki timestamp for the present time.
incrementIncludeSize($type, $size)
Increment an include size counter.
Definition: Parser.php:3853
getStripList()
Get a list of strippable XML-like elements.
Definition: Parser.php:1007
const EXT_IMAGE_REGEX
Definition: Parser.php:97
startParse(Title $title=null, ParserOptions $options, $outputType, $clearState=true)
Definition: Parser.php:4602
$params
const NS_CATEGORY
Definition: Defines.php:83
static makeHeadline($level, $attribs, $anchor, $html, $link, $legacyAnchor=false)
Create a headline for content.
Definition: Linker.php:1698
static extractTagsAndParams($elements, $text, &$matches, $uniq_prefix=null)
Replaces all occurrences of HTML-style comments and the given tags in the text with a random marker a...
Definition: Parser.php:937
and(b) You must cause any modified files to carry prominent notices stating that You changed the files
doTableStuff($text)
parse the wiki syntax used to render tables
Definition: Parser.php:1034
wfDeprecated($function, $version=false, $component=false, $callerOffset=2)
Throws a warning that $function is deprecated.
getRevisionSize()
Get the size of the revision.
Definition: Parser.php:5635
$mImageParamsMagicArray
Definition: Parser.php:149
LinkHolderArray $mLinkHolders
Definition: Parser.php:187
$wgNoFollowDomainExceptions
If this is set to an array of domains, external links to these domain names (or any subdomains) will ...
static register($parser)
deferred txt A few of the database updates required by various functions here can be deferred until after the result page is displayed to the user For updating the view updating the linked to tables after a save
Definition: deferred.txt:4
as see the revision history and available at free of to any person obtaining a copy of this software and associated documentation to deal in the Software without including without limitation the rights to and or sell copies of the and to permit persons to whom the Software is furnished to do so
Definition: LICENSE.txt:10
$wgTranscludeCacheExpiry
Expiry time for transcluded templates cached in transcache database table.
Some information about database access in MediaWiki By Tim January Database layout For information about the MediaWiki database such as a description of the tables and their please see
Definition: database.txt:2
const DB_SLAVE
Definition: Defines.php:46
preSaveTransform($text, Title $title, User $user, ParserOptions $options, $clearState=true)
Transform wiki markup when saving a page by doing "\\r\\n" -> "\\n" conversion, substituting signatur...
Definition: Parser.php:4365
getTargetLanguage()
Get the target language for the content being parsed.
Definition: Parser.php:850
null means default in associative array with keys and values unescaped Should be merged with default with a value of false meaning to suppress the attribute in associative array with keys and values unescaped just before the function returns a value If you return an< a > element with HTML attributes $attribs and contents $html will be returned If you return $ret will be returned after processing after in associative array form externallinks including delete and has completed for all link tables whether this was an auto creation default is conds Array Extra conditions for the No matching items in log is displayed if loglist is empty msgKey Array If you want a nice box with a set this to the key of the message First element is the message additional optional elements are parameters for the key that are processed with wfMessage() -> params() ->parseAsBlock()-offset Set to overwrite offset parameter in $wgRequest set to ''to unsetoffset-wrap String Wrap the message in html(usually something like"&lt
$buffer
Allows to change the fields on the form that will be generated are created Can be used to omit specific feeds from being outputted You must not use this hook to add use OutputPage::addFeedLink() instead.&$feedLinks conditions will AND in the final query as a Content object as a Content object $title
Definition: hooks.txt:312
static hasSubpages($index)
Does the namespace allow subpages?
formatHeadings($text, $origText, $isMain=true)
This function accomplishes several tasks: 1) Auto-number headings if that option is enabled 2) Add an...
Definition: Parser.php:3955
getConverterLanguage()
Get the language object for language conversion.
Definition: Parser.php:868
static tocUnindent($level)
Finish one or more sublevels on the Table of Contents.
Definition: Linker.php:1597
static run($event, array $args=[], $deprecatedVersion=null)
Call hook functions defined in Hooks::register and $wgHooks.
Definition: Hooks.php:131
static tocLine($anchor, $tocline, $tocnumber, $level, $sectionIndex=false)
parameter level defines if we are on an indentation level
Definition: Linker.php:1612
design txt This is a brief overview of the new design More thorough and up to date information is available on the documentation wiki at etc Handles the details of getting and saving to the user table of the and dealing with sessions and cookies OutputPage Encapsulates the entire HTML page that will be sent in response to any server request It is used by calling its functions to add text
Definition: design.txt:12
getExternalLinkAttribs($url=false)
Get an associative array of additional HTML attributes appropriate for a particular external link...
Definition: Parser.php:1889
$mInputSize
Definition: Parser.php:221
magicword txt Magic Words are some phrases used in the wikitext They are used for two things
Definition: magicword.txt:4
this hook is for auditing only RecentChangesLinked and Watchlist RecentChangesLinked and Watchlist e g Watchlist removed from all revisions and log entries to which it was applied This gives extensions a chance to take it off their books $tag
Definition: hooks.txt:961
getUserSig(&$user, $nickname=false, $fancySig=null)
Fetch the user's signature text, if any, and normalize to validated, ready-to-insert wikitext...
Definition: Parser.php:4476
const HALF_PARSED_VERSION
Update this version number when the output of serialiseHalfParsedText() changes in an incompatible wa...
Definition: Parser.php:81
const NS_FILE
Definition: Defines.php:75
firstCallInit()
Do various kinds of initialisation on the first call of the parser.
Definition: Parser.php:322
Handles a simple LRU key/value map with a maximum number of entries.
Definition: MapCacheLRU.php:34
static makeImageLink(Parser $parser, Title $title, $file, $frameParams=[], $handlerParams=[], $time=false, $query="", $widthOption=null)
Given parameters derived from [[Image:Foo|options...]], generate the HTML that that syntax inserts in...
Definition: Linker.php:411
const PTD_FOR_INCLUSION
Definition: Parser.php:105
null means default in associative array with keys and values unescaped Should be merged with default with a value of false meaning to suppress the attribute in associative array with keys and values unescaped broken
Definition: hooks.txt:1795
armorLinks($text)
Insert a NOPARSE hacky thing into any inline links in a chunk that's going to go through further pars...
Definition: Parser.php:2407
presenting them properly to the user as errors is done by the caller return true use this to change the list i e etc $rev
Definition: hooks.txt:1580
null means default in associative array with keys and values unescaped Should be merged with default with a value of false meaning to suppress the attribute in associative array with keys and values unescaped noclasses & $ret
Definition: hooks.txt:1795
static splitWhitespace($s)
Return a three-element array: leading whitespace, string contents, trailing whitespace.
Definition: Parser.php:2850
This document is intended to provide useful advice for parties seeking to redistribute MediaWiki to end users It s targeted particularly at maintainers for Linux since it s been observed that distribution packages of MediaWiki often break We ve consistently had to recommend that users seeking support use official tarballs instead of their distribution s and this often solves whatever problem the user is having It would be nice if this could such as
Definition: distributors.txt:9
setOutputType($ot)
Set the output type.
Definition: Parser.php:770
$mTagHooks
Definition: Parser.php:140
Class for handling an array of magic words.
const NS_MEDIAWIKI
Definition: Defines.php:77
static & get($id)
Factory: creates an object representing an ID.
Definition: MagicWord.php:257
static getModuleStyles()
Get CSS modules needed if HTML from the current driver is to be displayed.
Definition: MWTidy.php:63
equals(Content $that=null)
Returns true if this Content objects is conceptually equivalent to the given Content object...
enableOOUI()
Set's up the PHP implementation of OOUI for use in this request and instructs OutputPage to enable OO...
Definition: Parser.php:6012
please add to it if you re going to add events to the MediaWiki code where normally authentication against an external auth plugin would be creating a local account $user
Definition: hooks.txt:242
fetchTemplate($title)
Fetch the unparsed text of a template and register a reference to it.
Definition: Parser.php:3499
maybeMakeExternalImage($url)
make an image if it's allowed, either through the global option, through the exception, or through the on-wiki whitelist
Definition: Parser.php:1990
areSubpagesAllowed()
Return true if subpage links should be expanded on this page.
Definition: Parser.php:2416
const OT_HTML
Definition: Defines.php:226
static escapeId($id, $options=[])
Given a value, escape it so that it can be used in an id attribute and return it. ...
Definition: Sanitizer.php:1131
this hook is for auditing only RecentChangesLinked and Watchlist RecentChangesLinked and Watchlist e g Watchlist removed from all revisions and log entries to which it was applied This gives extensions a chance to take it off their books as the deletion has already been partly carried out by this point or something similar the user will be unable to create the tag set and then return false from the hook function Ensure you consume the ChangeTagAfterDelete hook to carry out custom deletion actions as context called by AbstractContent::getParserOutput May be used to override the normal model specific rendering of page content as context as context the output can only depend on parameters provided to this hook not on global state indicating whether full HTML should be generated If generation of HTML may be but other information should still be present in the ParserOutput object & $output
Definition: hooks.txt:1000
static getSubstIDs()
Get an array of parser substitution modifier IDs.
Definition: MagicWord.php:284
static images()
Definition: SiteStats.php:169
$mTransparentTagHooks
Definition: Parser.php:141
$mExpensiveFunctionCount
Definition: Parser.php:193
$mUrlProtocols
Definition: Parser.php:165
const TS_MW
MediaWiki concatenated string timestamp (YYYYMMDDHHMMSS)
$mConf
Definition: Parser.php:165
transformMsg($text, $options, $title=null)
Wrapper for preprocess()
Definition: Parser.php:4621
static newFromId($id, $flags=0)
Load a page revision from a given revision ID number.
Definition: Revision.php:99
wfUrlProtocols($includeProtocolRelative=true)
Returns a regular expression of url protocols.
static makeExternalLink($url, $text, $escape=true, $linktype= '', $attribs=[], $title=null)
Make an external link.
Definition: Linker.php:932
__clone()
Allow extensions to clean up when the parser is cloned.
Definition: Parser.php:299
static getExternalLinkRel($url=false, $title=null)
Get the rel attribute for a particular external link.
Definition: Parser.php:1868
string $mUniqPrefix
Deprecated accessor for the strip marker prefix.
Definition: Parser.php:227
injection txt This is an overview of how MediaWiki makes use of dependency injection The design described here grew from the discussion of RFC T384 The term dependency this means that anything an object needs to operate should be injected from the the object itself should only know narrow no concrete implementation of the logic it relies on The requirement to inject everything typically results in an architecture that based on two main types of and essentially stateless service objects that use other service objects to operate on the value objects As of the beginning MediaWiki is only starting to use the DI approach Much of the code still relies on global state or direct resulting in a highly cyclical dependency which acts as the top level factory for services in MediaWiki which can be used to gain access to default instances of various services MediaWikiServices however also allows new services to be defined and default services to be redefined Services are defined or redefined by providing a callback the instantiator that will return a new instance of the service When it will create an instance of MediaWikiServices and populate it with the services defined in the files listed by thereby bootstrapping the DI framework Per $wgServiceWiringFiles lists includes ServiceWiring php
Definition: injection.txt:35
wfSetVar(&$dest, $source, $force=false)
Sets dest to source and returns the original value of dest If source is NULL, it just returns the val...
this hook is for auditing only $req
Definition: hooks.txt:961
this hook is for auditing only or null if authentication failed before getting that far $username
Definition: hooks.txt:758
presenting them properly to the user as errors is done by the caller return true use this to change the list i e etc next in line in page history
Definition: hooks.txt:1580
array $mLangLinkLanguages
Array with the language name of each language link (i.e.
Definition: Parser.php:234
const OT_MSG
Definition: Parser.php:112
replaceTransparentTags($text)
Replace transparent tags in $text with the values given by the callbacks.
Definition: Parser.php:5352
This document describes the state of Postgres support in and is fairly well maintained The main code is very well while extensions are very hit and miss it is probably the most supported database after MySQL Much of the work in making MediaWiki database agnostic came about through the work of creating Postgres as and are nearing end of but without copying over all the usage comments General notes on the but these can almost always be programmed around *Although Postgres has a true BOOLEAN type
Definition: postgres.txt:22
replaceSection($oldText, $sectionId, $newText)
This function returns $oldtext after the content of the section specified by $section has been replac...
Definition: Parser.php:5544
getLinkURL($query= '', $query2=false, $proto=PROTO_RELATIVE)
Get a URL that's the simplest URL that will be valid to link, locally, to the current Title...
Definition: Title.php:1780
doDoubleUnderscore($text)
Strip double-underscore items like NOGALLERY and NOTOC Fills $this->mDoubleUnderscores, returns the modified text.
Definition: Parser.php:3880
$mFunctionHooks
Definition: Parser.php:142
$lines
Definition: router.php:66
testPreprocess($text, Title $title, ParserOptions $options)
Definition: Parser.php:5791
deferred txt A few of the database updates required by various functions here can be deferred until after the result page is displayed to the user For updating the view updating the linked to tables after a etc PHP does not yet have any way to tell the server to actually return and disconnect while still running these but it might have such a feature in the future We handle these by creating a deferred update object and putting those objects on a global then executing the whole list after the page is displayed We don t do anything smart like collating updates to the same table or such because the list is almost always going to have just one item on if so it s not worth the trouble Since there is a job queue in the jobs table
Definition: deferred.txt:11
MagicWordArray $mSubstWords
Definition: Parser.php:163
const TOC_END
Definition: Parser.php:137
static normalizeCharReferences($text)
Ensure that any entities and character references are legal for XML and XHTML specifically.
Definition: Sanitizer.php:1361
callParserFunction($frame, $function, array $args=[])
Call a parser function and return an array with text and flags.
Definition: Parser.php:3300
$wgScriptPath
The path we should point to.
Variant of the Message class.
Definition: Message.php:1225
getFreshParser()
Return this parser if it is not doing anything, otherwise get a fresh parser.
Definition: Parser.php:5997
design txt This is a brief overview of the new design More thorough and up to date information is available on the documentation wiki at etc Handles the details of getting and saving to the user table of the and dealing with sessions and cookies OutputPage Encapsulates the entire HTML page that will be sent in response to any server request It is used by calling its functions to add in any and then calling but I prefer the flexibility This should also do the output encoding The system allocates a global one in $wgOut Title Represents the title of an and does all the work of translating among various forms such as plain database etc For and for historical it also represents a few features of articles that don t involve their such as access rights See also title txt Article Encapsulates access to the page table of the database The object represents a an and maintains state such as etc Revision Encapsulates individual page revision data and access to the revision text blobs storage system Higher level code should never touch text storage directly
Definition: design.txt:34
WebRequest clone which takes values from a provided array.
Definition: FauxRequest.php:33
this hook is for auditing only RecentChangesLinked and Watchlist RecentChangesLinked and Watchlist e g Watchlist removed from all revisions and log entries to which it was applied This gives extensions a chance to take it off their books as the deletion has already been partly carried out by this point or something similar the user will be unable to create the tag set and then return false from the hook function Ensure you consume the ChangeTagAfterDelete hook to carry out custom deletion actions as context called by AbstractContent::getParserOutput May be used to override the normal model specific rendering of page content $content
Definition: hooks.txt:1000
static articles()
Definition: SiteStats.php:137
$mRevisionUser
Definition: Parser.php:218
lock()
Lock the current instance of the parser.
Definition: Parser.php:5952
static pages()
Definition: SiteStats.php:145
$line
Definition: cdb.php:59
const SFH_OBJECT_ARGS
Definition: Parser.php:85
static statelessFetchTemplate($title, $parser=false)
Static function to get a template Can be overridden via ParserOptions::setTemplateCallback().
Definition: Parser.php:3512
I won t presume to tell you how to I m just describing the methods I chose to use for myself If you do choose to follow these it will probably be easier for you to collaborate with others on the but if you want to contribute without by all means do which work well I also use K &R brace matching style I know that s a religious issue for so if you want to use a style that puts opening braces on the next line
Definition: design.txt:79
setFunctionHook($id, $callback, $flags=0)
Create a function, e.g.
Definition: Parser.php:4759
static setupOOUI($skinName= '', $dir= 'ltr')
Helper function to setup the PHP implementation of OOUI to use in this request.
static makeMediaLinkFile(Title $title, $file, $html= '')
Create a direct link to a given uploaded file.
Definition: Linker.php:872
$mIncludeCount
Definition: Parser.php:183
usually copyright or history_copyright This message must be in HTML not wikitext if the section is included from a template to be included in the link
Definition: hooks.txt:2744
$mMarkerIndex
Definition: Parser.php:150
this hook is for auditing only RecentChangesLinked and Watchlist RecentChangesLinked and Watchlist e g Watchlist removed from all revisions and log entries to which it was applied This gives extensions a chance to take it off their books as the deletion has already been partly carried out by this point or something similar the user will be unable to create the tag set and then return false from the hook function Ensure you consume the ChangeTagAfterDelete hook to carry out custom deletion actions as context called by AbstractContent::getParserOutput May be used to override the normal model specific rendering of page content as context as context the output can only depend on parameters provided to this hook not on global state indicating whether full HTML should be generated If generation of HTML may be but other information should still be present in the ParserOutput object to manipulate or replace but no entry for that model exists in $wgContentHandlers if desired whether it is OK to use $contentModel on $title Handler functions that modify $ok should generally return false to prevent further hooks from further modifying $ok inclusive $limit
Definition: hooks.txt:1000
getTitle()
Accessor for the Title object.
Definition: Parser.php:751
this class mediates it Skin Encapsulates a look and feel for the wiki All of the functions that render HTML and make choices about how to render it are here and are called from various other places when and is meant to be subclassed with other skins that may override some of its functions The User object contains a reference to a and so rather than having a global skin object we just rely on the global User and get the skin with $wgUser and also has some character encoding functions and other locale stuff The current user interface language is instantiated as and the local content language as $wgContLang
Definition: design.txt:56
extractSections($text, $sectionId, $mode, $newText= '')
Break wikitext input into sections, and either pull or replace some particular section's text...
Definition: Parser.php:5403
ParserOutput $mOutput
Definition: Parser.php:175
getOutput()
Get the ParserOutput object.
Definition: Parser.php:796
$wgExperimentalHtmlIds
Should we allow a broader set of characters in id attributes, per HTML5? If not, use only HTML 4-comp...
static statelessFetchRevision($title, $parser=false)
Wrapper around Revision::newFromTitle to allow passing additional parameters without passing them on ...
Definition: Parser.php:3462
doMagicLinks($text)
Replace special strings like "ISBN xxx" and "RFC xxx" with magic external links.
Definition: Parser.php:1405
Apache License January AND DISTRIBUTION Definitions License shall mean the terms and conditions for and distribution as defined by Sections through of this document Licensor shall mean the copyright owner or entity authorized by the copyright owner that is granting the License Legal Entity shall mean the union of the acting entity and all other entities that control are controlled by or are under common control with that entity For the purposes of this definition control direct or to cause the direction or management of such whether by contract or including but not limited to software source documentation and configuration files Object form shall mean any form resulting from mechanical transformation or translation of a Source including but not limited to compiled object generated and conversions to other media types Work shall mean the work of whether in Source or Object made available under the as indicated by a copyright notice that is included in or attached to the whether in Source or Object that is based or other modifications as a an original work of authorship For the purposes of this Derivative Works shall not include works that remain separable or merely the Work and Derivative Works thereof Contribution shall mean any work of including the original version of the Work and any modifications or additions to that Work or Derivative Works that is intentionally submitted to Licensor for inclusion in the Work by the copyright owner or by an individual or Legal Entity authorized to submit on behalf of the copyright owner For the purposes of this submitted means any form of or written communication sent to the Licensor or its including but not limited to communication on electronic mailing source code control and issue tracking systems that are managed or on behalf the Licensor for the purpose of discussing and improving the but excluding communication that is conspicuously marked or otherwise designated in writing by the copyright owner as Not a Contribution Contributor shall mean Licensor and any individual or Legal Entity on behalf of whom a Contribution has been received by Licensor and subsequently incorporated within the Work Grant of Copyright License Subject to the terms and conditions of this each Contributor hereby grants to You a non no royalty irrevocable copyright license to prepare Derivative Works publicly display
this hook is for auditing only RecentChangesLinked and Watchlist RecentChangesLinked and Watchlist e g Watchlist removed from all revisions and log entries to which it was applied This gives extensions a chance to take it off their books as the deletion has already been partly carried out by this point or something similar the user will be unable to create the tag set $status
Definition: hooks.txt:1000
static cleanSigInSig($text)
Strip 3, 4 or 5 tildes out of signatures.
Definition: Parser.php:4576
setDefaultSort($sort)
Mutator for $mDefaultSort.
Definition: Parser.php:5656
fetchFile($title, $options=[])
Fetch a file and its title and register a reference to it.
Definition: Parser.php:3602
static fixTagAttributes($text, $element)
Take a tag soup fragment listing an HTML element's attributes and normalize it to well-formed XML...
Definition: Sanitizer.php:1036
static tocIndent()
Add another level to the Table of Contents.
Definition: Linker.php:1586
static legalChars()
Get a regex character class describing the legal characters in a link.
Definition: Title.php:585
design txt This is a brief overview of the new design More thorough and up to date information is available on the documentation wiki at etc Handles the details of getting and saving to the user table of the and dealing with sessions and cookies OutputPage Encapsulates the entire HTML page that will be sent in response to any server request It is used by calling its functions to add in any and then calling output() to send it all.It could be easily changed to send incrementally if that becomes useful
static doBlockLevels($text, $lineStart)
Make lists from lines starting with ':', '*', '#', etc.
$wgServer
URL of the server.
We ve cleaned up the code here by removing clumps of infrequently used code and moving them off somewhere else It s much easier for someone working with this code to see what s _really_ going on
Definition: hooks.txt:86
incrementExpensiveFunctionCount()
Increment the expensive function count.
Definition: Parser.php:3867
const DB_MASTER
Definition: Defines.php:47
$mShowToc
Definition: Parser.php:194
static normalizeLinkUrl($url)
Replace unusual escape codes in a URL with their equivalent characters.
Definition: Parser.php:1931
static removeHTMLtags($text, $processCallback=null, $args=[], $extratags=[], $removetags=[])
Cleans up HTML, removes dangerous tags and attributes, and removes HTML comments. ...
Definition: Sanitizer.php:455
magicLinkCallback($m)
Definition: Parser.php:1435
const EXT_LINK_URL_CLASS
Definition: Parser.php:91
insertStripItem($text)
Add an item to the strip state Returns the unique tag which must be inserted into the stripped text T...
Definition: Parser.php:1020
testPst($text, Title $title, ParserOptions $options)
Definition: Parser.php:5781
static factory($url, $options=null, $caller=__METHOD__)
Generate a new request object.
const TS_UNIX
Unix time - the number of seconds since 1970-01-01 00:00:00 UTC.
null means default in associative array with keys and values unescaped Should be merged with default with a value of false meaning to suppress the attribute in associative array with keys and values unescaped noclasses just before the function returns a value If you return an< a > element with HTML attributes $attribs and contents $html will be returned If you return $ret will be returned and may include noclasses after processing & $attribs
Definition: hooks.txt:1795
if(!$wgRequest->checkUrlExtension()) if(!$wgEnableAPI) $wgTitle
Definition: api.php:57
ParserOptions $mOptions
Definition: Parser.php:207
parse($text, Title $title, ParserOptions $options, $linestart=true, $clearState=true, $revid=null)
Convert wikitext to HTML Do not call this function recursively.
Definition: Parser.php:398
this hook is for auditing only or null if authentication failed before getting that far or null if we can t even determine that probably a stub it is not rendered in wiki pages or galleries in category pages allow injecting custom HTML after the section Any uses of the hook need to handle escaping see BaseTemplate::getToolbox and BaseTemplate::makeListItem for details on the format of individual items inside of this array or by returning and letting standard HTTP rendering take place modifiable or by returning false and taking over the output modifiable modifiable after all normalizations have been except for the $wgMaxImageArea check set to true or false to override the $wgMaxImageArea check result gives extension the possibility to transform it themselves $handler
Definition: hooks.txt:758
static numberingroup($group)
Find the number of users in a given user group.
Definition: SiteStats.php:179
=Architecture==Two class hierarchies are used to provide the functionality associated with the different content models:*Content interface(and AbstractContent base class) define functionality that acts on the concrete content of a page, and *ContentHandler base class provides functionality specific to a content model, but not acting on concrete content.The most important function of ContentHandler is to act as a factory for the appropriate implementation of Content.These Content objects are to be used by MediaWiki everywhere, instead of passing page content around as text.All manipulation and analysis of page content must be done via the appropriate methods of the Content object.For each content model, a subclass of ContentHandler has to be registered with $wgContentHandlers.The ContentHandler object for a given content model can be obtained using ContentHandler::getForModelID($id).Also Title, WikiPage and Revision now have getContentHandler() methods for convenience.ContentHandler objects are singletons that provide functionality specific to the content type, but not directly acting on the content of some page.ContentHandler::makeEmptyContent() and ContentHandler::unserializeContent() can be used to create a Content object of the appropriate type.However, it is recommended to instead use WikiPage::getContent() resp.Revision::getContent() to get a page's content as a Content object.These two methods should be the ONLY way in which page content is accessed.Another important function of ContentHandler objects is to define custom action handlers for a content model, see ContentHandler::getActionOverrides().This is similar to what WikiPage::getActionOverrides() was already doing.==Serialization==With the ContentHandler facility, page content no longer has to be text based.Objects implementing the Content interface are used to represent and handle the content internally.For storage and data exchange, each content model supports at least one serialization format via ContentHandler::serializeContent($content).The list of supported formats for a given content model can be accessed using ContentHandler::getSupportedFormats().Content serialization formats are identified using MIME type like strings.The following formats are built in:*text/x-wiki-wikitext *text/javascript-for js pages *text/css-for css pages *text/plain-for future use, e.g.with plain text messages.*text/html-for future use, e.g.with plain html messages.*application/vnd.php.serialized-for future use with the api and for extensions *application/json-for future use with the api, and for use by extensions *application/xml-for future use with the api, and for use by extensions In PHP, use the corresponding CONTENT_FORMAT_XXX constant.Note that when using the API to access page content, especially action=edit, action=parse and action=query &prop=revisions, the model and format of the content should always be handled explicitly.Without that information, interpretation of the provided content is not reliable.The same applies to XML dumps generated via maintenance/dumpBackup.php or Special:Export.Also note that the API will provide encapsulated, serialized content-so if the API was called with format=json, and contentformat is also json(or rather, application/json), the page content is represented as a string containing an escaped json structure.Extensions that use JSON to serialize some types of page content may provide specialized API modules that allow access to that content in a more natural form.==Compatibility==The ContentHandler facility is introduced in a way that should allow all existing code to keep functioning at least for pages that contain wikitext or other text based content.However, a number of functions and hooks have been deprecated in favor of new versions that are aware of the page's content model, and will now generate warnings when used.Most importantly, the following functions have been deprecated:*Revisions::getText() is deprecated in favor Revisions::getContent()*WikiPage::getText() is deprecated in favor WikiPage::getContent() Also, the old Article::getContent()(which returns text) is superceded by Article::getContentObject().However, both methods should be avoided since they do not provide clean access to the page's actual content.For instance, they may return a system message for non-existing pages.Use WikiPage::getContent() instead.Code that relies on a textual representation of the page content should eventually be rewritten.However, ContentHandler::getContentText() provides a stop-gap that can be used to get text for a page.Its behavior is controlled by $wgContentHandlerTextFallback it
const STRIP_COMMENTS
Marks HTML that shouldn't be escaped.
Definition: HtmlArmor.php:28
static getVersion($flags= '', $lang=null)
Return a string of the MediaWiki version with Git revision if available.
braceSubstitution($piece, $frame)
Return the text of a template, after recursively replacing any variables or templates within the temp...
Definition: Parser.php:2982
setUser($user)
Set the current user.
Definition: Parser.php:713
$mHighestExpansionDepth
Definition: Parser.php:190
makeImage($title, $options, $holders=false)
Parse image options text and use it to make an image.
Definition: Parser.php:5068
attributeStripCallback(&$text, $frame=false)
Callback from the Sanitizer for expanding items found in HTML attribute values, so they can be safely...
Definition: Parser.php:5323
static cascadingsources($parser, $title= '')
Returns the sources of any cascading protection acting on a specified page.
getCustomDefaultSort()
<