MediaWiki  master
Parser.php
Go to the documentation of this file.
1 <?php
26 
70 class Parser {
76  const VERSION = '1.6.4';
77 
83 
84  # Flags for Parser::setFunctionHook
85  const SFH_NO_HASH = 1;
86  const SFH_OBJECT_ARGS = 2;
87 
88  # Constants needed for external link processing
89  # Everything except bracket, space, or control characters
90  # \p{Zs} is unicode 'separator, space' category. It covers the space 0x20
91  # as well as U+3000 is IDEOGRAPHIC SPACE for T21052
92  const EXT_LINK_URL_CLASS = '[^][<>"\\x00-\\x20\\x7F\p{Zs}]';
93  # Simplified expression to match an IPv4 or IPv6 address, or
94  # at least one character of a host name (embeds EXT_LINK_URL_CLASS)
95  const EXT_LINK_ADDR = '(?:[0-9.]+|\\[(?i:[0-9a-f:.]+)\\]|[^][<>"\\x00-\\x20\\x7F\p{Zs}])';
96  # RegExp to make image URLs (embeds IPv6 part of EXT_LINK_ADDR)
97  // @codingStandardsIgnoreStart Generic.Files.LineLength
98  const EXT_IMAGE_REGEX = '/^(http:\/\/|https:\/\/)((?:\\[(?i:[0-9a-f:.]+)\\])?[^][<>"\\x00-\\x20\\x7F\p{Zs}]+)
99  \\/([A-Za-z0-9_.,~%\\-+&;#*?!=()@\\x80-\\xFF]+)\\.((?i)gif|png|jpg|jpeg)$/Sxu';
100  // @codingStandardsIgnoreEnd
101 
102  # Regular expression for a non-newline space
103  const SPACE_NOT_NL = '(?:\t|&nbsp;|&\#0*160;|&\#[Xx]0*[Aa]0;|\p{Zs})';
104 
105  # Flags for preprocessToDom
106  const PTD_FOR_INCLUSION = 1;
107 
108  # Allowed values for $this->mOutputType
109  # Parameter to startExternalParse().
110  const OT_HTML = 1; # like parse()
111  const OT_WIKI = 2; # like preSaveTransform()
113  const OT_MSG = 3;
114  const OT_PLAIN = 4; # like extractSections() - portions of the original are returned unchanged.
115 
133  const MARKER_SUFFIX = "-QINU`\"'\x7f";
134  const MARKER_PREFIX = "\x7f'\"`UNIQ-";
135 
136  # Markers used for wrapping the table of contents
137  const TOC_START = '<mw:toc>';
138  const TOC_END = '</mw:toc>';
139 
140  # Persistent:
141  public $mTagHooks = [];
143  public $mFunctionHooks = [];
144  public $mFunctionSynonyms = [ 0 => [], 1 => [] ];
145  public $mFunctionTagHooks = [];
146  public $mStripList = [];
147  public $mDefaultStripList = [];
148  public $mVarCache = [];
149  public $mImageParams = [];
151  public $mMarkerIndex = 0;
152  public $mFirstCall = true;
153 
154  # Initialised by initialiseVariables()
155 
159  public $mVariables;
160 
164  public $mSubstWords;
165  # Initialised in constructor
167 
168  # Initialized in getPreprocessor()
169 
171 
172  # Cleared with clearState():
173 
176  public $mOutput;
177  public $mAutonumber;
178 
182  public $mStripState;
183 
189 
190  public $mLinkID;
194  public $mExpensiveFunctionCount; # number of expensive parser function calls
196 
200  public $mUser; # User object; only used when doing pre-save transform
201 
202  # Temporary
203  # These are variables reset at least once per parse regardless of $clearState
204 
208  public $mOptions;
209 
213  public $mTitle; # Title context, used for self-link rendering and similar things
214  public $mOutputType; # Output type, one of the OT_xxx constants
215  public $ot; # Shortcut alias, see setOutputType()
216  public $mRevisionObject; # The revision object of the specified revision ID
217  public $mRevisionId; # ID to display in {{REVISIONID}} tags
218  public $mRevisionTimestamp; # The timestamp of the specified revision ID
219  public $mRevisionUser; # User to display in {{REVISIONUSER}} tag
220  public $mRevisionSize; # Size to display in {{REVISIONSIZE}} variable
221  public $mRevIdForTs; # The revision ID which was used to fetch the timestamp
222  public $mInputSize = false; # For {{PAGESIZE}} on current page.
223 
228  public $mUniqPrefix = Parser::MARKER_PREFIX;
229 
236 
244 
249  public $mInParse = false;
250 
252  protected $mProfiler;
253 
257  protected $mLinkRenderer;
258 
262  public function __construct( $conf = [] ) {
263  $this->mConf = $conf;
264  $this->mUrlProtocols = wfUrlProtocols();
265  $this->mExtLinkBracketedRegex = '/\[(((?i)' . $this->mUrlProtocols . ')' .
266  self::EXT_LINK_ADDR .
267  self::EXT_LINK_URL_CLASS . '*)\p{Zs}*([^\]\\x00-\\x08\\x0a-\\x1F]*?)\]/Su';
268  if ( isset( $conf['preprocessorClass'] ) ) {
269  $this->mPreprocessorClass = $conf['preprocessorClass'];
270  } elseif ( defined( 'HPHP_VERSION' ) ) {
271  # Preprocessor_Hash is much faster than Preprocessor_DOM under HipHop
272  $this->mPreprocessorClass = 'Preprocessor_Hash';
273  } elseif ( extension_loaded( 'domxml' ) ) {
274  # PECL extension that conflicts with the core DOM extension (T15770)
275  wfDebug( "Warning: you have the obsolete domxml extension for PHP. Please remove it!\n" );
276  $this->mPreprocessorClass = 'Preprocessor_Hash';
277  } elseif ( extension_loaded( 'dom' ) ) {
278  $this->mPreprocessorClass = 'Preprocessor_DOM';
279  } else {
280  $this->mPreprocessorClass = 'Preprocessor_Hash';
281  }
282  wfDebug( __CLASS__ . ": using preprocessor: {$this->mPreprocessorClass}\n" );
283  }
284 
288  public function __destruct() {
289  if ( isset( $this->mLinkHolders ) ) {
290  unset( $this->mLinkHolders );
291  }
292  foreach ( $this as $name => $value ) {
293  unset( $this->$name );
294  }
295  }
296 
300  public function __clone() {
301  $this->mInParse = false;
302 
303  // T58226: When you create a reference "to" an object field, that
304  // makes the object field itself be a reference too (until the other
305  // reference goes out of scope). When cloning, any field that's a
306  // reference is copied as a reference in the new object. Both of these
307  // are defined PHP5 behaviors, as inconvenient as it is for us when old
308  // hooks from PHP4 days are passing fields by reference.
309  foreach ( [ 'mStripState', 'mVarCache' ] as $k ) {
310  // Make a non-reference copy of the field, then rebind the field to
311  // reference the new copy.
312  $tmp = $this->$k;
313  $this->$k =& $tmp;
314  unset( $tmp );
315  }
316 
317  Hooks::run( 'ParserCloned', [ $this ] );
318  }
319 
323  public function firstCallInit() {
324  if ( !$this->mFirstCall ) {
325  return;
326  }
327  $this->mFirstCall = false;
328 
330  CoreTagHooks::register( $this );
331  $this->initialiseVariables();
332 
333  // Avoid PHP 7.1 warning from passing $this by reference
334  $parser = $this;
335  Hooks::run( 'ParserFirstCallInit', [ &$parser ] );
336  }
337 
343  public function clearState() {
344  if ( $this->mFirstCall ) {
345  $this->firstCallInit();
346  }
347  $this->mOutput = new ParserOutput;
348  $this->mOptions->registerWatcher( [ $this->mOutput, 'recordOption' ] );
349  $this->mAutonumber = 0;
350  $this->mIncludeCount = [];
351  $this->mLinkHolders = new LinkHolderArray( $this );
352  $this->mLinkID = 0;
353  $this->mRevisionObject = $this->mRevisionTimestamp =
354  $this->mRevisionId = $this->mRevisionUser = $this->mRevisionSize = null;
355  $this->mVarCache = [];
356  $this->mUser = null;
357  $this->mLangLinkLanguages = [];
358  $this->currentRevisionCache = null;
359 
360  $this->mStripState = new StripState;
361 
362  # Clear these on every parse, T6549
363  $this->mTplRedirCache = $this->mTplDomCache = [];
364 
365  $this->mShowToc = true;
366  $this->mForceTocPosition = false;
367  $this->mIncludeSizes = [
368  'post-expand' => 0,
369  'arg' => 0,
370  ];
371  $this->mPPNodeCount = 0;
372  $this->mGeneratedPPNodeCount = 0;
373  $this->mHighestExpansionDepth = 0;
374  $this->mDefaultSort = false;
375  $this->mHeadings = [];
376  $this->mDoubleUnderscores = [];
377  $this->mExpensiveFunctionCount = 0;
378 
379  # Fix cloning
380  if ( isset( $this->mPreprocessor ) && $this->mPreprocessor->parser !== $this ) {
381  $this->mPreprocessor = null;
382  }
383 
384  $this->mProfiler = new SectionProfiler();
385 
386  // Avoid PHP 7.1 warning from passing $this by reference
387  $parser = $this;
388  Hooks::run( 'ParserClearState', [ &$parser ] );
389  }
390 
403  public function parse(
405  $linestart = true, $clearState = true, $revid = null
406  ) {
412  global $wgShowHostnames;
413 
414  if ( $clearState ) {
415  // We use U+007F DELETE to construct strip markers, so we have to make
416  // sure that this character does not occur in the input text.
417  $text = strtr( $text, "\x7f", "?" );
418  $magicScopeVariable = $this->lock();
419  }
420 
421  $this->startParse( $title, $options, self::OT_HTML, $clearState );
422 
423  $this->currentRevisionCache = null;
424  $this->mInputSize = strlen( $text );
425  if ( $this->mOptions->getEnableLimitReport() ) {
426  $this->mOutput->resetParseStartTime();
427  }
428 
429  $oldRevisionId = $this->mRevisionId;
430  $oldRevisionObject = $this->mRevisionObject;
431  $oldRevisionTimestamp = $this->mRevisionTimestamp;
432  $oldRevisionUser = $this->mRevisionUser;
433  $oldRevisionSize = $this->mRevisionSize;
434  if ( $revid !== null ) {
435  $this->mRevisionId = $revid;
436  $this->mRevisionObject = null;
437  $this->mRevisionTimestamp = null;
438  $this->mRevisionUser = null;
439  $this->mRevisionSize = null;
440  }
441 
442  // Avoid PHP 7.1 warning from passing $this by reference
443  $parser = $this;
444  Hooks::run( 'ParserBeforeStrip', [ &$parser, &$text, &$this->mStripState ] );
445  # No more strip!
446  Hooks::run( 'ParserAfterStrip', [ &$parser, &$text, &$this->mStripState ] );
447  $text = $this->internalParse( $text );
448  Hooks::run( 'ParserAfterParse', [ &$parser, &$text, &$this->mStripState ] );
449 
450  $text = $this->internalParseHalfParsed( $text, true, $linestart );
451 
459  if ( !( $options->getDisableTitleConversion()
460  || isset( $this->mDoubleUnderscores['nocontentconvert'] )
461  || isset( $this->mDoubleUnderscores['notitleconvert'] )
462  || $this->mOutput->getDisplayTitle() !== false )
463  ) {
464  $convruletitle = $this->getConverterLanguage()->getConvRuleTitle();
465  if ( $convruletitle ) {
466  $this->mOutput->setTitleText( $convruletitle );
467  } else {
468  $titleText = $this->getConverterLanguage()->convertTitle( $title );
469  $this->mOutput->setTitleText( $titleText );
470  }
471  }
472 
473  # Done parsing! Compute runtime adaptive expiry if set
474  $this->mOutput->finalizeAdaptiveCacheExpiry();
475 
476  # Warn if too many heavyweight parser functions were used
477  if ( $this->mExpensiveFunctionCount > $this->mOptions->getExpensiveParserFunctionLimit() ) {
478  $this->limitationWarn( 'expensive-parserfunction',
479  $this->mExpensiveFunctionCount,
480  $this->mOptions->getExpensiveParserFunctionLimit()
481  );
482  }
483 
484  # Information on include size limits, for the benefit of users who try to skirt them
485  if ( $this->mOptions->getEnableLimitReport() ) {
486  $max = $this->mOptions->getMaxIncludeSize();
487 
488  $cpuTime = $this->mOutput->getTimeSinceStart( 'cpu' );
489  if ( $cpuTime !== null ) {
490  $this->mOutput->setLimitReportData( 'limitreport-cputime',
491  sprintf( "%.3f", $cpuTime )
492  );
493  }
494 
495  $wallTime = $this->mOutput->getTimeSinceStart( 'wall' );
496  $this->mOutput->setLimitReportData( 'limitreport-walltime',
497  sprintf( "%.3f", $wallTime )
498  );
499 
500  $this->mOutput->setLimitReportData( 'limitreport-ppvisitednodes',
501  [ $this->mPPNodeCount, $this->mOptions->getMaxPPNodeCount() ]
502  );
503  $this->mOutput->setLimitReportData( 'limitreport-ppgeneratednodes',
504  [ $this->mGeneratedPPNodeCount, $this->mOptions->getMaxGeneratedPPNodeCount() ]
505  );
506  $this->mOutput->setLimitReportData( 'limitreport-postexpandincludesize',
507  [ $this->mIncludeSizes['post-expand'], $max ]
508  );
509  $this->mOutput->setLimitReportData( 'limitreport-templateargumentsize',
510  [ $this->mIncludeSizes['arg'], $max ]
511  );
512  $this->mOutput->setLimitReportData( 'limitreport-expansiondepth',
513  [ $this->mHighestExpansionDepth, $this->mOptions->getMaxPPExpandDepth() ]
514  );
515  $this->mOutput->setLimitReportData( 'limitreport-expensivefunctioncount',
516  [ $this->mExpensiveFunctionCount, $this->mOptions->getExpensiveParserFunctionLimit() ]
517  );
518  Hooks::run( 'ParserLimitReportPrepare', [ $this, $this->mOutput ] );
519 
520  $limitReport = "NewPP limit report\n";
521  if ( $wgShowHostnames ) {
522  $limitReport .= 'Parsed by ' . wfHostname() . "\n";
523  }
524  $limitReport .= 'Cached time: ' . $this->mOutput->getCacheTime() . "\n";
525  $limitReport .= 'Cache expiry: ' . $this->mOutput->getCacheExpiry() . "\n";
526  $limitReport .= 'Dynamic content: ' .
527  ( $this->mOutput->hasDynamicContent() ? 'true' : 'false' ) .
528  "\n";
529 
530  foreach ( $this->mOutput->getLimitReportData() as $key => $value ) {
531  if ( Hooks::run( 'ParserLimitReportFormat',
532  [ $key, &$value, &$limitReport, false, false ]
533  ) ) {
534  $keyMsg = wfMessage( $key )->inLanguage( 'en' )->useDatabase( false );
535  $valueMsg = wfMessage( [ "$key-value-text", "$key-value" ] )
536  ->inLanguage( 'en' )->useDatabase( false );
537  if ( !$valueMsg->exists() ) {
538  $valueMsg = new RawMessage( '$1' );
539  }
540  if ( !$keyMsg->isDisabled() && !$valueMsg->isDisabled() ) {
541  $valueMsg->params( $value );
542  $limitReport .= "{$keyMsg->text()}: {$valueMsg->text()}\n";
543  }
544  }
545  }
546  // Since we're not really outputting HTML, decode the entities and
547  // then re-encode the things that need hiding inside HTML comments.
548  $limitReport = htmlspecialchars_decode( $limitReport );
549  Hooks::run( 'ParserLimitReport', [ $this, &$limitReport ] );
550 
551  // Sanitize for comment. Note '‐' in the replacement is U+2010,
552  // which looks much like the problematic '-'.
553  $limitReport = str_replace( [ '-', '&' ], [ '‐', '&amp;' ], $limitReport );
554  $text .= "\n<!-- \n$limitReport-->\n";
555 
556  // Add on template profiling data in human/machine readable way
557  $dataByFunc = $this->mProfiler->getFunctionStats();
558  uasort( $dataByFunc, function ( $a, $b ) {
559  return $a['real'] < $b['real']; // descending order
560  } );
561  $profileReport = [];
562  foreach ( array_slice( $dataByFunc, 0, 10 ) as $item ) {
563  $profileReport[] = sprintf( "%6.2f%% %8.3f %6d %s",
564  $item['%real'], $item['real'], $item['calls'],
565  htmlspecialchars( $item['name'] ) );
566  }
567  $text .= "<!--\nTransclusion expansion time report (%,ms,calls,template)\n";
568  $text .= implode( "\n", $profileReport ) . "\n-->\n";
569 
570  $this->mOutput->setLimitReportData( 'limitreport-timingprofile', $profileReport );
571 
572  // Add other cache related metadata
573  if ( $wgShowHostnames ) {
574  $this->mOutput->setLimitReportData( 'cachereport-origin', wfHostname() );
575  }
576  $this->mOutput->setLimitReportData( 'cachereport-timestamp',
577  $this->mOutput->getCacheTime() );
578  $this->mOutput->setLimitReportData( 'cachereport-ttl',
579  $this->mOutput->getCacheExpiry() );
580  $this->mOutput->setLimitReportData( 'cachereport-transientcontent',
581  $this->mOutput->hasDynamicContent() );
582 
583  if ( $this->mGeneratedPPNodeCount > $this->mOptions->getMaxGeneratedPPNodeCount() / 10 ) {
584  wfDebugLog( 'generated-pp-node-count', $this->mGeneratedPPNodeCount . ' ' .
585  $this->mTitle->getPrefixedDBkey() );
586  }
587  }
588  $this->mOutput->setText( $text );
589 
590  $this->mRevisionId = $oldRevisionId;
591  $this->mRevisionObject = $oldRevisionObject;
592  $this->mRevisionTimestamp = $oldRevisionTimestamp;
593  $this->mRevisionUser = $oldRevisionUser;
594  $this->mRevisionSize = $oldRevisionSize;
595  $this->mInputSize = false;
596  $this->currentRevisionCache = null;
597 
598  return $this->mOutput;
599  }
600 
623  public function recursiveTagParse( $text, $frame = false ) {
624  // Avoid PHP 7.1 warning from passing $this by reference
625  $parser = $this;
626  Hooks::run( 'ParserBeforeStrip', [ &$parser, &$text, &$this->mStripState ] );
627  Hooks::run( 'ParserAfterStrip', [ &$parser, &$text, &$this->mStripState ] );
628  $text = $this->internalParse( $text, false, $frame );
629  return $text;
630  }
631 
649  public function recursiveTagParseFully( $text, $frame = false ) {
650  $text = $this->recursiveTagParse( $text, $frame );
651  $text = $this->internalParseHalfParsed( $text, false );
652  return $text;
653  }
654 
666  public function preprocess( $text, Title $title = null,
667  ParserOptions $options, $revid = null, $frame = false
668  ) {
669  $magicScopeVariable = $this->lock();
670  $this->startParse( $title, $options, self::OT_PREPROCESS, true );
671  if ( $revid !== null ) {
672  $this->mRevisionId = $revid;
673  }
674  // Avoid PHP 7.1 warning from passing $this by reference
675  $parser = $this;
676  Hooks::run( 'ParserBeforeStrip', [ &$parser, &$text, &$this->mStripState ] );
677  Hooks::run( 'ParserAfterStrip', [ &$parser, &$text, &$this->mStripState ] );
678  $text = $this->replaceVariables( $text, $frame );
679  $text = $this->mStripState->unstripBoth( $text );
680  return $text;
681  }
682 
692  public function recursivePreprocess( $text, $frame = false ) {
693  $text = $this->replaceVariables( $text, $frame );
694  $text = $this->mStripState->unstripBoth( $text );
695  return $text;
696  }
697 
711  public function getPreloadText( $text, Title $title, ParserOptions $options, $params = [] ) {
712  $msg = new RawMessage( $text );
713  $text = $msg->params( $params )->plain();
714 
715  # Parser (re)initialisation
716  $magicScopeVariable = $this->lock();
717  $this->startParse( $title, $options, self::OT_PLAIN, true );
718 
720  $dom = $this->preprocessToDom( $text, self::PTD_FOR_INCLUSION );
721  $text = $this->getPreprocessor()->newFrame()->expand( $dom, $flags );
722  $text = $this->mStripState->unstripBoth( $text );
723  return $text;
724  }
725 
732  public static function getRandomString() {
733  wfDeprecated( __METHOD__, '1.26' );
734  return wfRandomString( 16 );
735  }
736 
743  public function setUser( $user ) {
744  $this->mUser = $user;
745  }
746 
753  public function uniqPrefix() {
754  wfDeprecated( __METHOD__, '1.26' );
755  return self::MARKER_PREFIX;
756  }
757 
763  public function setTitle( $t ) {
764  if ( !$t ) {
765  $t = Title::newFromText( 'NO TITLE' );
766  }
767 
768  if ( $t->hasFragment() ) {
769  # Strip the fragment to avoid various odd effects
770  $this->mTitle = $t->createFragmentTarget( '' );
771  } else {
772  $this->mTitle = $t;
773  }
774  }
775 
781  public function getTitle() {
782  return $this->mTitle;
783  }
784 
791  public function Title( $x = null ) {
792  return wfSetVar( $this->mTitle, $x );
793  }
794 
800  public function setOutputType( $ot ) {
801  $this->mOutputType = $ot;
802  # Shortcut alias
803  $this->ot = [
804  'html' => $ot == self::OT_HTML,
805  'wiki' => $ot == self::OT_WIKI,
806  'pre' => $ot == self::OT_PREPROCESS,
807  'plain' => $ot == self::OT_PLAIN,
808  ];
809  }
810 
817  public function OutputType( $x = null ) {
818  return wfSetVar( $this->mOutputType, $x );
819  }
820 
826  public function getOutput() {
827  return $this->mOutput;
828  }
829 
835  public function getOptions() {
836  return $this->mOptions;
837  }
838 
845  public function Options( $x = null ) {
846  return wfSetVar( $this->mOptions, $x );
847  }
848 
852  public function nextLinkID() {
853  return $this->mLinkID++;
854  }
855 
859  public function setLinkID( $id ) {
860  $this->mLinkID = $id;
861  }
862 
867  public function getFunctionLang() {
868  return $this->getTargetLanguage();
869  }
870 
880  public function getTargetLanguage() {
881  $target = $this->mOptions->getTargetLanguage();
882 
883  if ( $target !== null ) {
884  return $target;
885  } elseif ( $this->mOptions->getInterfaceMessage() ) {
886  return $this->mOptions->getUserLangObj();
887  } elseif ( is_null( $this->mTitle ) ) {
888  throw new MWException( __METHOD__ . ': $this->mTitle is null' );
889  }
890 
891  return $this->mTitle->getPageLanguage();
892  }
893 
898  public function getConverterLanguage() {
899  return $this->getTargetLanguage();
900  }
901 
908  public function getUser() {
909  if ( !is_null( $this->mUser ) ) {
910  return $this->mUser;
911  }
912  return $this->mOptions->getUser();
913  }
914 
920  public function getPreprocessor() {
921  if ( !isset( $this->mPreprocessor ) ) {
922  $class = $this->mPreprocessorClass;
923  $this->mPreprocessor = new $class( $this );
924  }
925  return $this->mPreprocessor;
926  }
927 
934  public function getLinkRenderer() {
935  if ( !$this->mLinkRenderer ) {
936  $this->mLinkRenderer = MediaWikiServices::getInstance()
937  ->getLinkRendererFactory()->create();
938  $this->mLinkRenderer->setStubThreshold(
939  $this->getOptions()->getStubThreshold()
940  );
941  }
942 
943  return $this->mLinkRenderer;
944  }
945 
967  public static function extractTagsAndParams( $elements, $text, &$matches, $uniq_prefix = null ) {
968  if ( $uniq_prefix !== null ) {
969  wfDeprecated( __METHOD__ . ' called with $prefix argument', '1.26' );
970  }
971  static $n = 1;
972  $stripped = '';
973  $matches = [];
974 
975  $taglist = implode( '|', $elements );
976  $start = "/<($taglist)(\\s+[^>]*?|\\s*?)(\/?" . ">)|<(!--)/i";
977 
978  while ( $text != '' ) {
979  $p = preg_split( $start, $text, 2, PREG_SPLIT_DELIM_CAPTURE );
980  $stripped .= $p[0];
981  if ( count( $p ) < 5 ) {
982  break;
983  }
984  if ( count( $p ) > 5 ) {
985  # comment
986  $element = $p[4];
987  $attributes = '';
988  $close = '';
989  $inside = $p[5];
990  } else {
991  # tag
992  $element = $p[1];
993  $attributes = $p[2];
994  $close = $p[3];
995  $inside = $p[4];
996  }
997 
998  $marker = self::MARKER_PREFIX . "-$element-" . sprintf( '%08X', $n++ ) . self::MARKER_SUFFIX;
999  $stripped .= $marker;
1000 
1001  if ( $close === '/>' ) {
1002  # Empty element tag, <tag />
1003  $content = null;
1004  $text = $inside;
1005  $tail = null;
1006  } else {
1007  if ( $element === '!--' ) {
1008  $end = '/(-->)/';
1009  } else {
1010  $end = "/(<\\/$element\\s*>)/i";
1011  }
1012  $q = preg_split( $end, $inside, 2, PREG_SPLIT_DELIM_CAPTURE );
1013  $content = $q[0];
1014  if ( count( $q ) < 3 ) {
1015  # No end tag -- let it run out to the end of the text.
1016  $tail = '';
1017  $text = '';
1018  } else {
1019  $tail = $q[1];
1020  $text = $q[2];
1021  }
1022  }
1023 
1024  $matches[$marker] = [ $element,
1025  $content,
1026  Sanitizer::decodeTagAttributes( $attributes ),
1027  "<$element$attributes$close$content$tail" ];
1028  }
1029  return $stripped;
1030  }
1031 
1037  public function getStripList() {
1038  return $this->mStripList;
1039  }
1040 
1050  public function insertStripItem( $text ) {
1051  $marker = self::MARKER_PREFIX . "-item-{$this->mMarkerIndex}-" . self::MARKER_SUFFIX;
1052  $this->mMarkerIndex++;
1053  $this->mStripState->addGeneral( $marker, $text );
1054  return $marker;
1055  }
1056 
1064  public function doTableStuff( $text ) {
1065 
1066  $lines = StringUtils::explode( "\n", $text );
1067  $out = '';
1068  $td_history = []; # Is currently a td tag open?
1069  $last_tag_history = []; # Save history of last lag activated (td, th or caption)
1070  $tr_history = []; # Is currently a tr tag open?
1071  $tr_attributes = []; # history of tr attributes
1072  $has_opened_tr = []; # Did this table open a <tr> element?
1073  $indent_level = 0; # indent level of the table
1074 
1075  foreach ( $lines as $outLine ) {
1076  $line = trim( $outLine );
1077 
1078  if ( $line === '' ) { # empty line, go to next line
1079  $out .= $outLine . "\n";
1080  continue;
1081  }
1082 
1083  $first_character = $line[0];
1084  $first_two = substr( $line, 0, 2 );
1085  $matches = [];
1086 
1087  if ( preg_match( '/^(:*)\s*\{\|(.*)$/', $line, $matches ) ) {
1088  # First check if we are starting a new table
1089  $indent_level = strlen( $matches[1] );
1090 
1091  $attributes = $this->mStripState->unstripBoth( $matches[2] );
1092  $attributes = Sanitizer::fixTagAttributes( $attributes, 'table' );
1093 
1094  $outLine = str_repeat( '<dl><dd>', $indent_level ) . "<table{$attributes}>";
1095  array_push( $td_history, false );
1096  array_push( $last_tag_history, '' );
1097  array_push( $tr_history, false );
1098  array_push( $tr_attributes, '' );
1099  array_push( $has_opened_tr, false );
1100  } elseif ( count( $td_history ) == 0 ) {
1101  # Don't do any of the following
1102  $out .= $outLine . "\n";
1103  continue;
1104  } elseif ( $first_two === '|}' ) {
1105  # We are ending a table
1106  $line = '</table>' . substr( $line, 2 );
1107  $last_tag = array_pop( $last_tag_history );
1108 
1109  if ( !array_pop( $has_opened_tr ) ) {
1110  $line = "<tr><td></td></tr>{$line}";
1111  }
1112 
1113  if ( array_pop( $tr_history ) ) {
1114  $line = "</tr>{$line}";
1115  }
1116 
1117  if ( array_pop( $td_history ) ) {
1118  $line = "</{$last_tag}>{$line}";
1119  }
1120  array_pop( $tr_attributes );
1121  $outLine = $line . str_repeat( '</dd></dl>', $indent_level );
1122  } elseif ( $first_two === '|-' ) {
1123  # Now we have a table row
1124  $line = preg_replace( '#^\|-+#', '', $line );
1125 
1126  # Whats after the tag is now only attributes
1127  $attributes = $this->mStripState->unstripBoth( $line );
1128  $attributes = Sanitizer::fixTagAttributes( $attributes, 'tr' );
1129  array_pop( $tr_attributes );
1130  array_push( $tr_attributes, $attributes );
1131 
1132  $line = '';
1133  $last_tag = array_pop( $last_tag_history );
1134  array_pop( $has_opened_tr );
1135  array_push( $has_opened_tr, true );
1136 
1137  if ( array_pop( $tr_history ) ) {
1138  $line = '</tr>';
1139  }
1140 
1141  if ( array_pop( $td_history ) ) {
1142  $line = "</{$last_tag}>{$line}";
1143  }
1144 
1145  $outLine = $line;
1146  array_push( $tr_history, false );
1147  array_push( $td_history, false );
1148  array_push( $last_tag_history, '' );
1149  } elseif ( $first_character === '|'
1150  || $first_character === '!'
1151  || $first_two === '|+'
1152  ) {
1153  # This might be cell elements, td, th or captions
1154  if ( $first_two === '|+' ) {
1155  $first_character = '+';
1156  $line = substr( $line, 2 );
1157  } else {
1158  $line = substr( $line, 1 );
1159  }
1160 
1161  // Implies both are valid for table headings.
1162  if ( $first_character === '!' ) {
1163  $line = StringUtils::replaceMarkup( '!!', '||', $line );
1164  }
1165 
1166  # Split up multiple cells on the same line.
1167  # FIXME : This can result in improper nesting of tags processed
1168  # by earlier parser steps.
1169  $cells = explode( '||', $line );
1170 
1171  $outLine = '';
1172 
1173  # Loop through each table cell
1174  foreach ( $cells as $cell ) {
1175  $previous = '';
1176  if ( $first_character !== '+' ) {
1177  $tr_after = array_pop( $tr_attributes );
1178  if ( !array_pop( $tr_history ) ) {
1179  $previous = "<tr{$tr_after}>\n";
1180  }
1181  array_push( $tr_history, true );
1182  array_push( $tr_attributes, '' );
1183  array_pop( $has_opened_tr );
1184  array_push( $has_opened_tr, true );
1185  }
1186 
1187  $last_tag = array_pop( $last_tag_history );
1188 
1189  if ( array_pop( $td_history ) ) {
1190  $previous = "</{$last_tag}>\n{$previous}";
1191  }
1192 
1193  if ( $first_character === '|' ) {
1194  $last_tag = 'td';
1195  } elseif ( $first_character === '!' ) {
1196  $last_tag = 'th';
1197  } elseif ( $first_character === '+' ) {
1198  $last_tag = 'caption';
1199  } else {
1200  $last_tag = '';
1201  }
1202 
1203  array_push( $last_tag_history, $last_tag );
1204 
1205  # A cell could contain both parameters and data
1206  $cell_data = explode( '|', $cell, 2 );
1207 
1208  # T2553: Note that a '|' inside an invalid link should not
1209  # be mistaken as delimiting cell parameters
1210  # Bug T153140: Neither should language converter markup.
1211  if ( preg_match( '/\[\[|-\{/', $cell_data[0] ) === 1 ) {
1212  $cell = "{$previous}<{$last_tag}>{$cell}";
1213  } elseif ( count( $cell_data ) == 1 ) {
1214  $cell = "{$previous}<{$last_tag}>{$cell_data[0]}";
1215  } else {
1216  $attributes = $this->mStripState->unstripBoth( $cell_data[0] );
1217  $attributes = Sanitizer::fixTagAttributes( $attributes, $last_tag );
1218  $cell = "{$previous}<{$last_tag}{$attributes}>{$cell_data[1]}";
1219  }
1220 
1221  $outLine .= $cell;
1222  array_push( $td_history, true );
1223  }
1224  }
1225  $out .= $outLine . "\n";
1226  }
1227 
1228  # Closing open td, tr && table
1229  while ( count( $td_history ) > 0 ) {
1230  if ( array_pop( $td_history ) ) {
1231  $out .= "</td>\n";
1232  }
1233  if ( array_pop( $tr_history ) ) {
1234  $out .= "</tr>\n";
1235  }
1236  if ( !array_pop( $has_opened_tr ) ) {
1237  $out .= "<tr><td></td></tr>\n";
1238  }
1239 
1240  $out .= "</table>\n";
1241  }
1242 
1243  # Remove trailing line-ending (b/c)
1244  if ( substr( $out, -1 ) === "\n" ) {
1245  $out = substr( $out, 0, -1 );
1246  }
1247 
1248  # special case: don't return empty table
1249  if ( $out === "<table>\n<tr><td></td></tr>\n</table>" ) {
1250  $out = '';
1251  }
1252 
1253  return $out;
1254  }
1255 
1268  public function internalParse( $text, $isMain = true, $frame = false ) {
1269 
1270  $origText = $text;
1271 
1272  // Avoid PHP 7.1 warning from passing $this by reference
1273  $parser = $this;
1274 
1275  # Hook to suspend the parser in this state
1276  if ( !Hooks::run( 'ParserBeforeInternalParse', [ &$parser, &$text, &$this->mStripState ] ) ) {
1277  return $text;
1278  }
1279 
1280  # if $frame is provided, then use $frame for replacing any variables
1281  if ( $frame ) {
1282  # use frame depth to infer how include/noinclude tags should be handled
1283  # depth=0 means this is the top-level document; otherwise it's an included document
1284  if ( !$frame->depth ) {
1285  $flag = 0;
1286  } else {
1287  $flag = Parser::PTD_FOR_INCLUSION;
1288  }
1289  $dom = $this->preprocessToDom( $text, $flag );
1290  $text = $frame->expand( $dom );
1291  } else {
1292  # if $frame is not provided, then use old-style replaceVariables
1293  $text = $this->replaceVariables( $text );
1294  }
1295 
1296  Hooks::run( 'InternalParseBeforeSanitize', [ &$parser, &$text, &$this->mStripState ] );
1297  $text = Sanitizer::removeHTMLtags(
1298  $text,
1299  [ $this, 'attributeStripCallback' ],
1300  false,
1301  array_keys( $this->mTransparentTagHooks ),
1302  [],
1303  [ $this, 'addTrackingCategory' ]
1304  );
1305  Hooks::run( 'InternalParseBeforeLinks', [ &$parser, &$text, &$this->mStripState ] );
1306 
1307  # Tables need to come after variable replacement for things to work
1308  # properly; putting them before other transformations should keep
1309  # exciting things like link expansions from showing up in surprising
1310  # places.
1311  $text = $this->doTableStuff( $text );
1312 
1313  $text = preg_replace( '/(^|\n)-----*/', '\\1<hr />', $text );
1314 
1315  $text = $this->doDoubleUnderscore( $text );
1316 
1317  $text = $this->doHeadings( $text );
1318  $text = $this->replaceInternalLinks( $text );
1319  $text = $this->doAllQuotes( $text );
1320  $text = $this->replaceExternalLinks( $text );
1321 
1322  # replaceInternalLinks may sometimes leave behind
1323  # absolute URLs, which have to be masked to hide them from replaceExternalLinks
1324  $text = str_replace( self::MARKER_PREFIX . 'NOPARSE', '', $text );
1325 
1326  $text = $this->doMagicLinks( $text );
1327  $text = $this->formatHeadings( $text, $origText, $isMain );
1328 
1329  return $text;
1330  }
1331 
1341  private function internalParseHalfParsed( $text, $isMain = true, $linestart = true ) {
1342  $text = $this->mStripState->unstripGeneral( $text );
1343 
1344  // Avoid PHP 7.1 warning from passing $this by reference
1345  $parser = $this;
1346 
1347  if ( $isMain ) {
1348  Hooks::run( 'ParserAfterUnstrip', [ &$parser, &$text ] );
1349  }
1350 
1351  # Clean up special characters, only run once, next-to-last before doBlockLevels
1352  $fixtags = [
1353  # French spaces, last one Guillemet-left
1354  # only if there is something before the space
1355  '/(.) (?=\\?|:|;|!|%|\\302\\273)/' => '\\1&#160;',
1356  # french spaces, Guillemet-right
1357  '/(\\302\\253) /' => '\\1&#160;',
1358  '/&#160;(!\s*important)/' => ' \\1', # Beware of CSS magic word !important, T13874.
1359  ];
1360  $text = preg_replace( array_keys( $fixtags ), array_values( $fixtags ), $text );
1361 
1362  $text = $this->doBlockLevels( $text, $linestart );
1363 
1364  $this->replaceLinkHolders( $text );
1365 
1373  if ( !( $this->mOptions->getDisableContentConversion()
1374  || isset( $this->mDoubleUnderscores['nocontentconvert'] ) )
1375  ) {
1376  if ( !$this->mOptions->getInterfaceMessage() ) {
1377  # The position of the convert() call should not be changed. it
1378  # assumes that the links are all replaced and the only thing left
1379  # is the <nowiki> mark.
1380  $text = $this->getConverterLanguage()->convert( $text );
1381  }
1382  }
1383 
1384  $text = $this->mStripState->unstripNoWiki( $text );
1385 
1386  if ( $isMain ) {
1387  Hooks::run( 'ParserBeforeTidy', [ &$parser, &$text ] );
1388  }
1389 
1390  $text = $this->replaceTransparentTags( $text );
1391  $text = $this->mStripState->unstripGeneral( $text );
1392 
1393  $text = Sanitizer::normalizeCharReferences( $text );
1394 
1395  if ( MWTidy::isEnabled() ) {
1396  if ( $this->mOptions->getTidy() ) {
1397  $text = MWTidy::tidy( $text );
1398  }
1399  } else {
1400  # attempt to sanitize at least some nesting problems
1401  # (T4702 and quite a few others)
1402  $tidyregs = [
1403  # ''Something [http://www.cool.com cool''] -->
1404  # <i>Something</i><a href="http://www.cool.com"..><i>cool></i></a>
1405  '/(<([bi])>)(<([bi])>)?([^<]*)(<\/?a[^<]*>)([^<]*)(<\/\\4>)?(<\/\\2>)/' =>
1406  '\\1\\3\\5\\8\\9\\6\\1\\3\\7\\8\\9',
1407  # fix up an anchor inside another anchor, only
1408  # at least for a single single nested link (T5695)
1409  '/(<a[^>]+>)([^<]*)(<a[^>]+>[^<]*)<\/a>(.*)<\/a>/' =>
1410  '\\1\\2</a>\\3</a>\\1\\4</a>',
1411  # fix div inside inline elements- doBlockLevels won't wrap a line which
1412  # contains a div, so fix it up here; replace
1413  # div with escaped text
1414  '/(<([aib]) [^>]+>)([^<]*)(<div([^>]*)>)(.*)(<\/div>)([^<]*)(<\/\\2>)/' =>
1415  '\\1\\3&lt;div\\5&gt;\\6&lt;/div&gt;\\8\\9',
1416  # remove empty italic or bold tag pairs, some
1417  # introduced by rules above
1418  '/<([bi])><\/\\1>/' => '',
1419  ];
1420 
1421  $text = preg_replace(
1422  array_keys( $tidyregs ),
1423  array_values( $tidyregs ),
1424  $text );
1425  }
1426 
1427  if ( $isMain ) {
1428  Hooks::run( 'ParserAfterTidy', [ &$parser, &$text ] );
1429  }
1430 
1431  return $text;
1432  }
1433 
1445  public function doMagicLinks( $text ) {
1446  $prots = wfUrlProtocolsWithoutProtRel();
1447  $urlChar = self::EXT_LINK_URL_CLASS;
1448  $addr = self::EXT_LINK_ADDR;
1449  $space = self::SPACE_NOT_NL; # non-newline space
1450  $spdash = "(?:-|$space)"; # a dash or a non-newline space
1451  $spaces = "$space++"; # possessive match of 1 or more spaces
1452  $text = preg_replace_callback(
1453  '!(?: # Start cases
1454  (<a[ \t\r\n>].*?</a>) | # m[1]: Skip link text
1455  (<.*?>) | # m[2]: Skip stuff inside
1456  # HTML elements' . "
1457  (\b(?i:$prots)($addr$urlChar*)) | # m[3]: Free external links
1458  # m[4]: Post-protocol path
1459  \b(?:RFC|PMID) $spaces # m[5]: RFC or PMID, capture number
1460  ([0-9]+)\b |
1461  \bISBN $spaces ( # m[6]: ISBN, capture number
1462  (?: 97[89] $spdash? )? # optional 13-digit ISBN prefix
1463  (?: [0-9] $spdash? ){9} # 9 digits with opt. delimiters
1464  [0-9Xx] # check digit
1465  )\b
1466  )!xu", [ $this, 'magicLinkCallback' ], $text );
1467  return $text;
1468  }
1469 
1475  public function magicLinkCallback( $m ) {
1476  if ( isset( $m[1] ) && $m[1] !== '' ) {
1477  # Skip anchor
1478  return $m[0];
1479  } elseif ( isset( $m[2] ) && $m[2] !== '' ) {
1480  # Skip HTML element
1481  return $m[0];
1482  } elseif ( isset( $m[3] ) && $m[3] !== '' ) {
1483  # Free external link
1484  return $this->makeFreeExternalLink( $m[0], strlen( $m[4] ) );
1485  } elseif ( isset( $m[5] ) && $m[5] !== '' ) {
1486  # RFC or PMID
1487  if ( substr( $m[0], 0, 3 ) === 'RFC' ) {
1488  if ( !$this->mOptions->getMagicRFCLinks() ) {
1489  return $m[0];
1490  }
1491  $keyword = 'RFC';
1492  $urlmsg = 'rfcurl';
1493  $cssClass = 'mw-magiclink-rfc';
1494  $trackingCat = 'magiclink-tracking-rfc';
1495  $id = $m[5];
1496  } elseif ( substr( $m[0], 0, 4 ) === 'PMID' ) {
1497  if ( !$this->mOptions->getMagicPMIDLinks() ) {
1498  return $m[0];
1499  }
1500  $keyword = 'PMID';
1501  $urlmsg = 'pubmedurl';
1502  $cssClass = 'mw-magiclink-pmid';
1503  $trackingCat = 'magiclink-tracking-pmid';
1504  $id = $m[5];
1505  } else {
1506  throw new MWException( __METHOD__ . ': unrecognised match type "' .
1507  substr( $m[0], 0, 20 ) . '"' );
1508  }
1509  $url = wfMessage( $urlmsg, $id )->inContentLanguage()->text();
1510  $this->addTrackingCategory( $trackingCat );
1511  return Linker::makeExternalLink( $url, "{$keyword} {$id}", true, $cssClass, [], $this->mTitle );
1512  } elseif ( isset( $m[6] ) && $m[6] !== ''
1513  && $this->mOptions->getMagicISBNLinks()
1514  ) {
1515  # ISBN
1516  $isbn = $m[6];
1517  $space = self::SPACE_NOT_NL; # non-newline space
1518  $isbn = preg_replace( "/$space/", ' ', $isbn );
1519  $num = strtr( $isbn, [
1520  '-' => '',
1521  ' ' => '',
1522  'x' => 'X',
1523  ] );
1524  $this->addTrackingCategory( 'magiclink-tracking-isbn' );
1525  return $this->getLinkRenderer()->makeKnownLink(
1526  SpecialPage::getTitleFor( 'Booksources', $num ),
1527  "ISBN $isbn",
1528  [
1529  'class' => 'internal mw-magiclink-isbn',
1530  'title' => false // suppress title attribute
1531  ]
1532  );
1533  } else {
1534  return $m[0];
1535  }
1536  }
1537 
1547  public function makeFreeExternalLink( $url, $numPostProto ) {
1548  $trail = '';
1549 
1550  # The characters '<' and '>' (which were escaped by
1551  # removeHTMLtags()) should not be included in
1552  # URLs, per RFC 2396.
1553  # Make &nbsp; terminate a URL as well (bug T84937)
1554  $m2 = [];
1555  if ( preg_match(
1556  '/&(lt|gt|nbsp|#x0*(3[CcEe]|[Aa]0)|#0*(60|62|160));/',
1557  $url,
1558  $m2,
1559  PREG_OFFSET_CAPTURE
1560  ) ) {
1561  $trail = substr( $url, $m2[0][1] ) . $trail;
1562  $url = substr( $url, 0, $m2[0][1] );
1563  }
1564 
1565  # Move trailing punctuation to $trail
1566  $sep = ',;\.:!?';
1567  # If there is no left bracket, then consider right brackets fair game too
1568  if ( strpos( $url, '(' ) === false ) {
1569  $sep .= ')';
1570  }
1571 
1572  $urlRev = strrev( $url );
1573  $numSepChars = strspn( $urlRev, $sep );
1574  # Don't break a trailing HTML entity by moving the ; into $trail
1575  # This is in hot code, so use substr_compare to avoid having to
1576  # create a new string object for the comparison
1577  if ( $numSepChars && substr_compare( $url, ";", -$numSepChars, 1 ) === 0 ) {
1578  # more optimization: instead of running preg_match with a $
1579  # anchor, which can be slow, do the match on the reversed
1580  # string starting at the desired offset.
1581  # un-reversed regexp is: /&([a-z]+|#x[\da-f]+|#\d+)$/i
1582  if ( preg_match( '/\G([a-z]+|[\da-f]+x#|\d+#)&/i', $urlRev, $m2, 0, $numSepChars ) ) {
1583  $numSepChars--;
1584  }
1585  }
1586  if ( $numSepChars ) {
1587  $trail = substr( $url, -$numSepChars ) . $trail;
1588  $url = substr( $url, 0, -$numSepChars );
1589  }
1590 
1591  # Verify that we still have a real URL after trail removal, and
1592  # not just lone protocol
1593  if ( strlen( $trail ) >= $numPostProto ) {
1594  return $url . $trail;
1595  }
1596 
1597  $url = Sanitizer::cleanUrl( $url );
1598 
1599  # Is this an external image?
1600  $text = $this->maybeMakeExternalImage( $url );
1601  if ( $text === false ) {
1602  # Not an image, make a link
1603  $text = Linker::makeExternalLink( $url,
1604  $this->getConverterLanguage()->markNoConversion( $url, true ),
1605  true, 'free',
1606  $this->getExternalLinkAttribs( $url ), $this->mTitle );
1607  # Register it in the output object...
1608  # Replace unnecessary URL escape codes with their equivalent characters
1609  $pasteurized = self::normalizeLinkUrl( $url );
1610  $this->mOutput->addExternalLink( $pasteurized );
1611  }
1612  return $text . $trail;
1613  }
1614 
1624  public function doHeadings( $text ) {
1625  for ( $i = 6; $i >= 1; --$i ) {
1626  $h = str_repeat( '=', $i );
1627  $text = preg_replace( "/^$h(.+)$h\\s*$/m", "<h$i>\\1</h$i>", $text );
1628  }
1629  return $text;
1630  }
1631 
1640  public function doAllQuotes( $text ) {
1641  $outtext = '';
1642  $lines = StringUtils::explode( "\n", $text );
1643  foreach ( $lines as $line ) {
1644  $outtext .= $this->doQuotes( $line ) . "\n";
1645  }
1646  $outtext = substr( $outtext, 0, -1 );
1647  return $outtext;
1648  }
1649 
1657  public function doQuotes( $text ) {
1658  $arr = preg_split( "/(''+)/", $text, -1, PREG_SPLIT_DELIM_CAPTURE );
1659  $countarr = count( $arr );
1660  if ( $countarr == 1 ) {
1661  return $text;
1662  }
1663 
1664  // First, do some preliminary work. This may shift some apostrophes from
1665  // being mark-up to being text. It also counts the number of occurrences
1666  // of bold and italics mark-ups.
1667  $numbold = 0;
1668  $numitalics = 0;
1669  for ( $i = 1; $i < $countarr; $i += 2 ) {
1670  $thislen = strlen( $arr[$i] );
1671  // If there are ever four apostrophes, assume the first is supposed to
1672  // be text, and the remaining three constitute mark-up for bold text.
1673  // (T15227: ''''foo'''' turns into ' ''' foo ' ''')
1674  if ( $thislen == 4 ) {
1675  $arr[$i - 1] .= "'";
1676  $arr[$i] = "'''";
1677  $thislen = 3;
1678  } elseif ( $thislen > 5 ) {
1679  // If there are more than 5 apostrophes in a row, assume they're all
1680  // text except for the last 5.
1681  // (T15227: ''''''foo'''''' turns into ' ''''' foo ' ''''')
1682  $arr[$i - 1] .= str_repeat( "'", $thislen - 5 );
1683  $arr[$i] = "'''''";
1684  $thislen = 5;
1685  }
1686  // Count the number of occurrences of bold and italics mark-ups.
1687  if ( $thislen == 2 ) {
1688  $numitalics++;
1689  } elseif ( $thislen == 3 ) {
1690  $numbold++;
1691  } elseif ( $thislen == 5 ) {
1692  $numitalics++;
1693  $numbold++;
1694  }
1695  }
1696 
1697  // If there is an odd number of both bold and italics, it is likely
1698  // that one of the bold ones was meant to be an apostrophe followed
1699  // by italics. Which one we cannot know for certain, but it is more
1700  // likely to be one that has a single-letter word before it.
1701  if ( ( $numbold % 2 == 1 ) && ( $numitalics % 2 == 1 ) ) {
1702  $firstsingleletterword = -1;
1703  $firstmultiletterword = -1;
1704  $firstspace = -1;
1705  for ( $i = 1; $i < $countarr; $i += 2 ) {
1706  if ( strlen( $arr[$i] ) == 3 ) {
1707  $x1 = substr( $arr[$i - 1], -1 );
1708  $x2 = substr( $arr[$i - 1], -2, 1 );
1709  if ( $x1 === ' ' ) {
1710  if ( $firstspace == -1 ) {
1711  $firstspace = $i;
1712  }
1713  } elseif ( $x2 === ' ' ) {
1714  $firstsingleletterword = $i;
1715  // if $firstsingleletterword is set, we don't
1716  // look at the other options, so we can bail early.
1717  break;
1718  } else {
1719  if ( $firstmultiletterword == -1 ) {
1720  $firstmultiletterword = $i;
1721  }
1722  }
1723  }
1724  }
1725 
1726  // If there is a single-letter word, use it!
1727  if ( $firstsingleletterword > -1 ) {
1728  $arr[$firstsingleletterword] = "''";
1729  $arr[$firstsingleletterword - 1] .= "'";
1730  } elseif ( $firstmultiletterword > -1 ) {
1731  // If not, but there's a multi-letter word, use that one.
1732  $arr[$firstmultiletterword] = "''";
1733  $arr[$firstmultiletterword - 1] .= "'";
1734  } elseif ( $firstspace > -1 ) {
1735  // ... otherwise use the first one that has neither.
1736  // (notice that it is possible for all three to be -1 if, for example,
1737  // there is only one pentuple-apostrophe in the line)
1738  $arr[$firstspace] = "''";
1739  $arr[$firstspace - 1] .= "'";
1740  }
1741  }
1742 
1743  // Now let's actually convert our apostrophic mush to HTML!
1744  $output = '';
1745  $buffer = '';
1746  $state = '';
1747  $i = 0;
1748  foreach ( $arr as $r ) {
1749  if ( ( $i % 2 ) == 0 ) {
1750  if ( $state === 'both' ) {
1751  $buffer .= $r;
1752  } else {
1753  $output .= $r;
1754  }
1755  } else {
1756  $thislen = strlen( $r );
1757  if ( $thislen == 2 ) {
1758  if ( $state === 'i' ) {
1759  $output .= '</i>';
1760  $state = '';
1761  } elseif ( $state === 'bi' ) {
1762  $output .= '</i>';
1763  $state = 'b';
1764  } elseif ( $state === 'ib' ) {
1765  $output .= '</b></i><b>';
1766  $state = 'b';
1767  } elseif ( $state === 'both' ) {
1768  $output .= '<b><i>' . $buffer . '</i>';
1769  $state = 'b';
1770  } else { // $state can be 'b' or ''
1771  $output .= '<i>';
1772  $state .= 'i';
1773  }
1774  } elseif ( $thislen == 3 ) {
1775  if ( $state === 'b' ) {
1776  $output .= '</b>';
1777  $state = '';
1778  } elseif ( $state === 'bi' ) {
1779  $output .= '</i></b><i>';
1780  $state = 'i';
1781  } elseif ( $state === 'ib' ) {
1782  $output .= '</b>';
1783  $state = 'i';
1784  } elseif ( $state === 'both' ) {
1785  $output .= '<i><b>' . $buffer . '</b>';
1786  $state = 'i';
1787  } else { // $state can be 'i' or ''
1788  $output .= '<b>';
1789  $state .= 'b';
1790  }
1791  } elseif ( $thislen == 5 ) {
1792  if ( $state === 'b' ) {
1793  $output .= '</b><i>';
1794  $state = 'i';
1795  } elseif ( $state === 'i' ) {
1796  $output .= '</i><b>';
1797  $state = 'b';
1798  } elseif ( $state === 'bi' ) {
1799  $output .= '</i></b>';
1800  $state = '';
1801  } elseif ( $state === 'ib' ) {
1802  $output .= '</b></i>';
1803  $state = '';
1804  } elseif ( $state === 'both' ) {
1805  $output .= '<i><b>' . $buffer . '</b></i>';
1806  $state = '';
1807  } else { // ($state == '')
1808  $buffer = '';
1809  $state = 'both';
1810  }
1811  }
1812  }
1813  $i++;
1814  }
1815  // Now close all remaining tags. Notice that the order is important.
1816  if ( $state === 'b' || $state === 'ib' ) {
1817  $output .= '</b>';
1818  }
1819  if ( $state === 'i' || $state === 'bi' || $state === 'ib' ) {
1820  $output .= '</i>';
1821  }
1822  if ( $state === 'bi' ) {
1823  $output .= '</b>';
1824  }
1825  // There might be lonely ''''', so make sure we have a buffer
1826  if ( $state === 'both' && $buffer ) {
1827  $output .= '<b><i>' . $buffer . '</i></b>';
1828  }
1829  return $output;
1830  }
1831 
1845  public function replaceExternalLinks( $text ) {
1846 
1847  $bits = preg_split( $this->mExtLinkBracketedRegex, $text, -1, PREG_SPLIT_DELIM_CAPTURE );
1848  if ( $bits === false ) {
1849  throw new MWException( "PCRE needs to be compiled with "
1850  . "--enable-unicode-properties in order for MediaWiki to function" );
1851  }
1852  $s = array_shift( $bits );
1853 
1854  $i = 0;
1855  while ( $i < count( $bits ) ) {
1856  $url = $bits[$i++];
1857  $i++; // protocol
1858  $text = $bits[$i++];
1859  $trail = $bits[$i++];
1860 
1861  # The characters '<' and '>' (which were escaped by
1862  # removeHTMLtags()) should not be included in
1863  # URLs, per RFC 2396.
1864  $m2 = [];
1865  if ( preg_match( '/&(lt|gt);/', $url, $m2, PREG_OFFSET_CAPTURE ) ) {
1866  $text = substr( $url, $m2[0][1] ) . ' ' . $text;
1867  $url = substr( $url, 0, $m2[0][1] );
1868  }
1869 
1870  # If the link text is an image URL, replace it with an <img> tag
1871  # This happened by accident in the original parser, but some people used it extensively
1872  $img = $this->maybeMakeExternalImage( $text );
1873  if ( $img !== false ) {
1874  $text = $img;
1875  }
1876 
1877  $dtrail = '';
1878 
1879  # Set linktype for CSS - if URL==text, link is essentially free
1880  $linktype = ( $text === $url ) ? 'free' : 'text';
1881 
1882  # No link text, e.g. [http://domain.tld/some.link]
1883  if ( $text == '' ) {
1884  # Autonumber
1885  $langObj = $this->getTargetLanguage();
1886  $text = '[' . $langObj->formatNum( ++$this->mAutonumber ) . ']';
1887  $linktype = 'autonumber';
1888  } else {
1889  # Have link text, e.g. [http://domain.tld/some.link text]s
1890  # Check for trail
1891  list( $dtrail, $trail ) = Linker::splitTrail( $trail );
1892  }
1893 
1894  $text = $this->getConverterLanguage()->markNoConversion( $text );
1895 
1896  $url = Sanitizer::cleanUrl( $url );
1897 
1898  # Use the encoded URL
1899  # This means that users can paste URLs directly into the text
1900  # Funny characters like ö aren't valid in URLs anyway
1901  # This was changed in August 2004
1902  $s .= Linker::makeExternalLink( $url, $text, false, $linktype,
1903  $this->getExternalLinkAttribs( $url ), $this->mTitle ) . $dtrail . $trail;
1904 
1905  # Register link in the output object.
1906  # Replace unnecessary URL escape codes with the referenced character
1907  # This prevents spammers from hiding links from the filters
1908  $pasteurized = self::normalizeLinkUrl( $url );
1909  $this->mOutput->addExternalLink( $pasteurized );
1910  }
1911 
1912  return $s;
1913  }
1914 
1924  public static function getExternalLinkRel( $url = false, $title = null ) {
1925  global $wgNoFollowLinks, $wgNoFollowNsExceptions, $wgNoFollowDomainExceptions;
1926  $ns = $title ? $title->getNamespace() : false;
1927  if ( $wgNoFollowLinks && !in_array( $ns, $wgNoFollowNsExceptions )
1928  && !wfMatchesDomainList( $url, $wgNoFollowDomainExceptions )
1929  ) {
1930  return 'nofollow';
1931  }
1932  return null;
1933  }
1934 
1945  public function getExternalLinkAttribs( $url ) {
1946  $attribs = [];
1947  $rel = self::getExternalLinkRel( $url, $this->mTitle );
1948 
1949  $target = $this->mOptions->getExternalLinkTarget();
1950  if ( $target ) {
1951  $attribs['target'] = $target;
1952  if ( !in_array( $target, [ '_self', '_parent', '_top' ] ) ) {
1953  // T133507. New windows can navigate parent cross-origin.
1954  // Including noreferrer due to lacking browser
1955  // support of noopener. Eventually noreferrer should be removed.
1956  if ( $rel !== '' ) {
1957  $rel .= ' ';
1958  }
1959  $rel .= 'noreferrer noopener';
1960  }
1961  }
1962  $attribs['rel'] = $rel;
1963  return $attribs;
1964  }
1965 
1975  public static function normalizeLinkUrl( $url ) {
1976  # First, make sure unsafe characters are encoded
1977  $url = preg_replace_callback( '/[\x00-\x20"<>\[\\\\\]^`{|}\x7F-\xFF]/',
1978  function ( $m ) {
1979  return rawurlencode( $m[0] );
1980  },
1981  $url
1982  );
1983 
1984  $ret = '';
1985  $end = strlen( $url );
1986 
1987  # Fragment part - 'fragment'
1988  $start = strpos( $url, '#' );
1989  if ( $start !== false && $start < $end ) {
1990  $ret = self::normalizeUrlComponent(
1991  substr( $url, $start, $end - $start ), '"#%<>[\]^`{|}' ) . $ret;
1992  $end = $start;
1993  }
1994 
1995  # Query part - 'query' minus &=+;
1996  $start = strpos( $url, '?' );
1997  if ( $start !== false && $start < $end ) {
1998  $ret = self::normalizeUrlComponent(
1999  substr( $url, $start, $end - $start ), '"#%<>[\]^`{|}&=+;' ) . $ret;
2000  $end = $start;
2001  }
2002 
2003  # Scheme and path part - 'pchar'
2004  # (we assume no userinfo or encoded colons in the host)
2005  $ret = self::normalizeUrlComponent(
2006  substr( $url, 0, $end ), '"#%<>[\]^`{|}/?' ) . $ret;
2007 
2008  return $ret;
2009  }
2010 
2011  private static function normalizeUrlComponent( $component, $unsafe ) {
2012  $callback = function ( $matches ) use ( $unsafe ) {
2013  $char = urldecode( $matches[0] );
2014  $ord = ord( $char );
2015  if ( $ord > 32 && $ord < 127 && strpos( $unsafe, $char ) === false ) {
2016  # Unescape it
2017  return $char;
2018  } else {
2019  # Leave it escaped, but use uppercase for a-f
2020  return strtoupper( $matches[0] );
2021  }
2022  };
2023  return preg_replace_callback( '/%[0-9A-Fa-f]{2}/', $callback, $component );
2024  }
2025 
2034  private function maybeMakeExternalImage( $url ) {
2035  $imagesfrom = $this->mOptions->getAllowExternalImagesFrom();
2036  $imagesexception = !empty( $imagesfrom );
2037  $text = false;
2038  # $imagesfrom could be either a single string or an array of strings, parse out the latter
2039  if ( $imagesexception && is_array( $imagesfrom ) ) {
2040  $imagematch = false;
2041  foreach ( $imagesfrom as $match ) {
2042  if ( strpos( $url, $match ) === 0 ) {
2043  $imagematch = true;
2044  break;
2045  }
2046  }
2047  } elseif ( $imagesexception ) {
2048  $imagematch = ( strpos( $url, $imagesfrom ) === 0 );
2049  } else {
2050  $imagematch = false;
2051  }
2052 
2053  if ( $this->mOptions->getAllowExternalImages()
2054  || ( $imagesexception && $imagematch )
2055  ) {
2056  if ( preg_match( self::EXT_IMAGE_REGEX, $url ) ) {
2057  # Image found
2058  $text = Linker::makeExternalImage( $url );
2059  }
2060  }
2061  if ( !$text && $this->mOptions->getEnableImageWhitelist()
2062  && preg_match( self::EXT_IMAGE_REGEX, $url )
2063  ) {
2064  $whitelist = explode(
2065  "\n",
2066  wfMessage( 'external_image_whitelist' )->inContentLanguage()->text()
2067  );
2068 
2069  foreach ( $whitelist as $entry ) {
2070  # Sanitize the regex fragment, make it case-insensitive, ignore blank entries/comments
2071  if ( strpos( $entry, '#' ) === 0 || $entry === '' ) {
2072  continue;
2073  }
2074  if ( preg_match( '/' . str_replace( '/', '\\/', $entry ) . '/i', $url ) ) {
2075  # Image matches a whitelist entry
2076  $text = Linker::makeExternalImage( $url );
2077  break;
2078  }
2079  }
2080  }
2081  return $text;
2082  }
2083 
2093  public function replaceInternalLinks( $s ) {
2094  $this->mLinkHolders->merge( $this->replaceInternalLinks2( $s ) );
2095  return $s;
2096  }
2097 
2106  public function replaceInternalLinks2( &$s ) {
2108 
2109  static $tc = false, $e1, $e1_img;
2110  # the % is needed to support urlencoded titles as well
2111  if ( !$tc ) {
2112  $tc = Title::legalChars() . '#%';
2113  # Match a link having the form [[namespace:link|alternate]]trail
2114  $e1 = "/^([{$tc}]+)(?:\\|(.+?))?]](.*)\$/sD";
2115  # Match cases where there is no "]]", which might still be images
2116  $e1_img = "/^([{$tc}]+)\\|(.*)\$/sD";
2117  }
2118 
2119  $holders = new LinkHolderArray( $this );
2120 
2121  # split the entire text string on occurrences of [[
2122  $a = StringUtils::explode( '[[', ' ' . $s );
2123  # get the first element (all text up to first [[), and remove the space we added
2124  $s = $a->current();
2125  $a->next();
2126  $line = $a->current(); # Workaround for broken ArrayIterator::next() that returns "void"
2127  $s = substr( $s, 1 );
2128 
2129  $useLinkPrefixExtension = $this->getTargetLanguage()->linkPrefixExtension();
2130  $e2 = null;
2131  if ( $useLinkPrefixExtension ) {
2132  # Match the end of a line for a word that's not followed by whitespace,
2133  # e.g. in the case of 'The Arab al[[Razi]]', 'al' will be matched
2135  $charset = $wgContLang->linkPrefixCharset();
2136  $e2 = "/^((?>.*[^$charset]|))(.+)$/sDu";
2137  }
2138 
2139  if ( is_null( $this->mTitle ) ) {
2140  throw new MWException( __METHOD__ . ": \$this->mTitle is null\n" );
2141  }
2142  $nottalk = !$this->mTitle->isTalkPage();
2143 
2144  if ( $useLinkPrefixExtension ) {
2145  $m = [];
2146  if ( preg_match( $e2, $s, $m ) ) {
2147  $first_prefix = $m[2];
2148  } else {
2149  $first_prefix = false;
2150  }
2151  } else {
2152  $prefix = '';
2153  }
2154 
2155  $useSubpages = $this->areSubpagesAllowed();
2156 
2157  // @codingStandardsIgnoreStart Squiz.WhiteSpace.SemicolonSpacing.Incorrect
2158  # Loop for each link
2159  for ( ; $line !== false && $line !== null; $a->next(), $line = $a->current() ) {
2160  // @codingStandardsIgnoreEnd
2161 
2162  # Check for excessive memory usage
2163  if ( $holders->isBig() ) {
2164  # Too big
2165  # Do the existence check, replace the link holders and clear the array
2166  $holders->replace( $s );
2167  $holders->clear();
2168  }
2169 
2170  if ( $useLinkPrefixExtension ) {
2171  if ( preg_match( $e2, $s, $m ) ) {
2172  $prefix = $m[2];
2173  $s = $m[1];
2174  } else {
2175  $prefix = '';
2176  }
2177  # first link
2178  if ( $first_prefix ) {
2179  $prefix = $first_prefix;
2180  $first_prefix = false;
2181  }
2182  }
2183 
2184  $might_be_img = false;
2185 
2186  if ( preg_match( $e1, $line, $m ) ) { # page with normal text or alt
2187  $text = $m[2];
2188  # If we get a ] at the beginning of $m[3] that means we have a link that's something like:
2189  # [[Image:Foo.jpg|[http://example.com desc]]] <- having three ] in a row fucks up,
2190  # the real problem is with the $e1 regex
2191  # See T1500.
2192  # Still some problems for cases where the ] is meant to be outside punctuation,
2193  # and no image is in sight. See T4095.
2194  if ( $text !== ''
2195  && substr( $m[3], 0, 1 ) === ']'
2196  && strpos( $text, '[' ) !== false
2197  ) {
2198  $text .= ']'; # so that replaceExternalLinks($text) works later
2199  $m[3] = substr( $m[3], 1 );
2200  }
2201  # fix up urlencoded title texts
2202  if ( strpos( $m[1], '%' ) !== false ) {
2203  # Should anchors '#' also be rejected?
2204  $m[1] = str_replace( [ '<', '>' ], [ '&lt;', '&gt;' ], rawurldecode( $m[1] ) );
2205  }
2206  $trail = $m[3];
2207  } elseif ( preg_match( $e1_img, $line, $m ) ) {
2208  # Invalid, but might be an image with a link in its caption
2209  $might_be_img = true;
2210  $text = $m[2];
2211  if ( strpos( $m[1], '%' ) !== false ) {
2212  $m[1] = str_replace( [ '<', '>' ], [ '&lt;', '&gt;' ], rawurldecode( $m[1] ) );
2213  }
2214  $trail = "";
2215  } else { # Invalid form; output directly
2216  $s .= $prefix . '[[' . $line;
2217  continue;
2218  }
2219 
2220  $origLink = ltrim( $m[1], ' ' );
2221 
2222  # Don't allow internal links to pages containing
2223  # PROTO: where PROTO is a valid URL protocol; these
2224  # should be external links.
2225  if ( preg_match( '/^(?i:' . $this->mUrlProtocols . ')/', $origLink ) ) {
2226  $s .= $prefix . '[[' . $line;
2227  continue;
2228  }
2229 
2230  # Make subpage if necessary
2231  if ( $useSubpages ) {
2232  $link = $this->maybeDoSubpageLink( $origLink, $text );
2233  } else {
2234  $link = $origLink;
2235  }
2236 
2237  $noforce = ( substr( $origLink, 0, 1 ) !== ':' );
2238  if ( !$noforce ) {
2239  # Strip off leading ':'
2240  $link = substr( $link, 1 );
2241  }
2242 
2243  $unstrip = $this->mStripState->unstripNoWiki( $link );
2244  $nt = is_string( $unstrip ) ? Title::newFromText( $unstrip ) : null;
2245  if ( $nt === null ) {
2246  $s .= $prefix . '[[' . $line;
2247  continue;
2248  }
2249 
2250  $ns = $nt->getNamespace();
2251  $iw = $nt->getInterwiki();
2252 
2253  if ( $might_be_img ) { # if this is actually an invalid link
2254  if ( $ns == NS_FILE && $noforce ) { # but might be an image
2255  $found = false;
2256  while ( true ) {
2257  # look at the next 'line' to see if we can close it there
2258  $a->next();
2259  $next_line = $a->current();
2260  if ( $next_line === false || $next_line === null ) {
2261  break;
2262  }
2263  $m = explode( ']]', $next_line, 3 );
2264  if ( count( $m ) == 3 ) {
2265  # the first ]] closes the inner link, the second the image
2266  $found = true;
2267  $text .= "[[{$m[0]}]]{$m[1]}";
2268  $trail = $m[2];
2269  break;
2270  } elseif ( count( $m ) == 2 ) {
2271  # if there's exactly one ]] that's fine, we'll keep looking
2272  $text .= "[[{$m[0]}]]{$m[1]}";
2273  } else {
2274  # if $next_line is invalid too, we need look no further
2275  $text .= '[[' . $next_line;
2276  break;
2277  }
2278  }
2279  if ( !$found ) {
2280  # we couldn't find the end of this imageLink, so output it raw
2281  # but don't ignore what might be perfectly normal links in the text we've examined
2282  $holders->merge( $this->replaceInternalLinks2( $text ) );
2283  $s .= "{$prefix}[[$link|$text";
2284  # note: no $trail, because without an end, there *is* no trail
2285  continue;
2286  }
2287  } else { # it's not an image, so output it raw
2288  $s .= "{$prefix}[[$link|$text";
2289  # note: no $trail, because without an end, there *is* no trail
2290  continue;
2291  }
2292  }
2293 
2294  $wasblank = ( $text == '' );
2295  if ( $wasblank ) {
2296  $text = $link;
2297  } else {
2298  # T6598 madness. Handle the quotes only if they come from the alternate part
2299  # [[Lista d''e paise d''o munno]] -> <a href="...">Lista d''e paise d''o munno</a>
2300  # [[Criticism of Harry Potter|Criticism of ''Harry Potter'']]
2301  # -> <a href="Criticism of Harry Potter">Criticism of <i>Harry Potter</i></a>
2302  $text = $this->doQuotes( $text );
2303  }
2304 
2305  # Link not escaped by : , create the various objects
2306  if ( $noforce && !$nt->wasLocalInterwiki() ) {
2307  # Interwikis
2308  if (
2309  $iw && $this->mOptions->getInterwikiMagic() && $nottalk && (
2310  Language::fetchLanguageName( $iw, null, 'mw' ) ||
2311  in_array( $iw, $wgExtraInterlanguageLinkPrefixes )
2312  )
2313  ) {
2314  # T26502: filter duplicates
2315  if ( !isset( $this->mLangLinkLanguages[$iw] ) ) {
2316  $this->mLangLinkLanguages[$iw] = true;
2317  $this->mOutput->addLanguageLink( $nt->getFullText() );
2318  }
2319 
2320  $s = rtrim( $s . $prefix );
2321  $s .= trim( $trail, "\n" ) == '' ? '': $prefix . $trail;
2322  continue;
2323  }
2324 
2325  if ( $ns == NS_FILE ) {
2326  if ( !wfIsBadImage( $nt->getDBkey(), $this->mTitle ) ) {
2327  if ( $wasblank ) {
2328  # if no parameters were passed, $text
2329  # becomes something like "File:Foo.png",
2330  # which we don't want to pass on to the
2331  # image generator
2332  $text = '';
2333  } else {
2334  # recursively parse links inside the image caption
2335  # actually, this will parse them in any other parameters, too,
2336  # but it might be hard to fix that, and it doesn't matter ATM
2337  $text = $this->replaceExternalLinks( $text );
2338  $holders->merge( $this->replaceInternalLinks2( $text ) );
2339  }
2340  # cloak any absolute URLs inside the image markup, so replaceExternalLinks() won't touch them
2341  $s .= $prefix . $this->armorLinks(
2342  $this->makeImage( $nt, $text, $holders ) ) . $trail;
2343  continue;
2344  }
2345  } elseif ( $ns == NS_CATEGORY ) {
2346  $s = rtrim( $s . "\n" ); # T2087
2347 
2348  if ( $wasblank ) {
2349  $sortkey = $this->getDefaultSort();
2350  } else {
2351  $sortkey = $text;
2352  }
2353  $sortkey = Sanitizer::decodeCharReferences( $sortkey );
2354  $sortkey = str_replace( "\n", '', $sortkey );
2355  $sortkey = $this->getConverterLanguage()->convertCategoryKey( $sortkey );
2356  $this->mOutput->addCategory( $nt->getDBkey(), $sortkey );
2357 
2361  $s .= trim( $prefix . $trail, "\n" ) == '' ? '' : $prefix . $trail;
2362 
2363  continue;
2364  }
2365  }
2366 
2367  # Self-link checking. For some languages, variants of the title are checked in
2368  # LinkHolderArray::doVariants() to allow batching the existence checks necessary
2369  # for linking to a different variant.
2370  if ( $ns != NS_SPECIAL && $nt->equals( $this->mTitle ) && !$nt->hasFragment() ) {
2371  $s .= $prefix . Linker::makeSelfLinkObj( $nt, $text, '', $trail );
2372  continue;
2373  }
2374 
2375  # NS_MEDIA is a pseudo-namespace for linking directly to a file
2376  # @todo FIXME: Should do batch file existence checks, see comment below
2377  if ( $ns == NS_MEDIA ) {
2378  # Give extensions a chance to select the file revision for us
2379  $options = [];
2380  $descQuery = false;
2381  Hooks::run( 'BeforeParserFetchFileAndTitle',
2382  [ $this, $nt, &$options, &$descQuery ] );
2383  # Fetch and register the file (file title may be different via hooks)
2384  list( $file, $nt ) = $this->fetchFileAndTitle( $nt, $options );
2385  # Cloak with NOPARSE to avoid replacement in replaceExternalLinks
2386  $s .= $prefix . $this->armorLinks(
2387  Linker::makeMediaLinkFile( $nt, $file, $text ) ) . $trail;
2388  continue;
2389  }
2390 
2391  # Some titles, such as valid special pages or files in foreign repos, should
2392  # be shown as bluelinks even though they're not included in the page table
2393  # @todo FIXME: isAlwaysKnown() can be expensive for file links; we should really do
2394  # batch file existence checks for NS_FILE and NS_MEDIA
2395  if ( $iw == '' && $nt->isAlwaysKnown() ) {
2396  $this->mOutput->addLink( $nt );
2397  $s .= $this->makeKnownLinkHolder( $nt, $text, $trail, $prefix );
2398  } else {
2399  # Links will be added to the output link list after checking
2400  $s .= $holders->makeHolder( $nt, $text, [], $trail, $prefix );
2401  }
2402  }
2403  return $holders;
2404  }
2405 
2419  protected function makeKnownLinkHolder( $nt, $text = '', $trail = '', $prefix = '' ) {
2420  list( $inside, $trail ) = Linker::splitTrail( $trail );
2421 
2422  if ( $text == '' ) {
2423  $text = htmlspecialchars( $nt->getPrefixedText() );
2424  }
2425 
2426  $link = $this->getLinkRenderer()->makeKnownLink(
2427  $nt, new HtmlArmor( "$prefix$text$inside" )
2428  );
2429 
2430  return $this->armorLinks( $link ) . $trail;
2431  }
2432 
2443  public function armorLinks( $text ) {
2444  return preg_replace( '/\b((?i)' . $this->mUrlProtocols . ')/',
2445  self::MARKER_PREFIX . "NOPARSE$1", $text );
2446  }
2447 
2452  public function areSubpagesAllowed() {
2453  # Some namespaces don't allow subpages
2454  return MWNamespace::hasSubpages( $this->mTitle->getNamespace() );
2455  }
2456 
2465  public function maybeDoSubpageLink( $target, &$text ) {
2466  return Linker::normalizeSubpageLink( $this->mTitle, $target, $text );
2467  }
2468 
2477  public function doBlockLevels( $text, $linestart ) {
2478  return BlockLevelPass::doBlockLevels( $text, $linestart );
2479  }
2480 
2492  public function getVariableValue( $index, $frame = false ) {
2495 
2496  if ( is_null( $this->mTitle ) ) {
2497  // If no title set, bad things are going to happen
2498  // later. Title should always be set since this
2499  // should only be called in the middle of a parse
2500  // operation (but the unit-tests do funky stuff)
2501  throw new MWException( __METHOD__ . ' Should only be '
2502  . ' called while parsing (no title set)' );
2503  }
2504 
2505  // Avoid PHP 7.1 warning from passing $this by reference
2506  $parser = $this;
2507 
2512  if ( Hooks::run( 'ParserGetVariableValueVarCache', [ &$parser, &$this->mVarCache ] ) ) {
2513  if ( isset( $this->mVarCache[$index] ) ) {
2514  return $this->mVarCache[$index];
2515  }
2516  }
2517 
2518  $ts = wfTimestamp( TS_UNIX, $this->mOptions->getTimestamp() );
2519  Hooks::run( 'ParserGetVariableValueTs', [ &$parser, &$ts ] );
2520 
2521  $pageLang = $this->getFunctionLang();
2522 
2523  switch ( $index ) {
2524  case '!':
2525  $value = '|';
2526  break;
2527  case 'currentmonth':
2528  $value = $pageLang->formatNum( MWTimestamp::getInstance( $ts )->format( 'm' ) );
2529  break;
2530  case 'currentmonth1':
2531  $value = $pageLang->formatNum( MWTimestamp::getInstance( $ts )->format( 'n' ) );
2532  break;
2533  case 'currentmonthname':
2534  $value = $pageLang->getMonthName( MWTimestamp::getInstance( $ts )->format( 'n' ) );
2535  break;
2536  case 'currentmonthnamegen':
2537  $value = $pageLang->getMonthNameGen( MWTimestamp::getInstance( $ts )->format( 'n' ) );
2538  break;
2539  case 'currentmonthabbrev':
2540  $value = $pageLang->getMonthAbbreviation( MWTimestamp::getInstance( $ts )->format( 'n' ) );
2541  break;
2542  case 'currentday':
2543  $value = $pageLang->formatNum( MWTimestamp::getInstance( $ts )->format( 'j' ) );
2544  break;
2545  case 'currentday2':
2546  $value = $pageLang->formatNum( MWTimestamp::getInstance( $ts )->format( 'd' ) );
2547  break;
2548  case 'localmonth':
2549  $value = $pageLang->formatNum( MWTimestamp::getLocalInstance( $ts )->format( 'm' ) );
2550  break;
2551  case 'localmonth1':
2552  $value = $pageLang->formatNum( MWTimestamp::getLocalInstance( $ts )->format( 'n' ) );
2553  break;
2554  case 'localmonthname':
2555  $value = $pageLang->getMonthName( MWTimestamp::getLocalInstance( $ts )->format( 'n' ) );
2556  break;
2557  case 'localmonthnamegen':
2558  $value = $pageLang->getMonthNameGen( MWTimestamp::getLocalInstance( $ts )->format( 'n' ) );
2559  break;
2560  case 'localmonthabbrev':
2561  $value = $pageLang->getMonthAbbreviation( MWTimestamp::getLocalInstance( $ts )->format( 'n' ) );
2562  break;
2563  case 'localday':
2564  $value = $pageLang->formatNum( MWTimestamp::getLocalInstance( $ts )->format( 'j' ) );
2565  break;
2566  case 'localday2':
2567  $value = $pageLang->formatNum( MWTimestamp::getLocalInstance( $ts )->format( 'd' ) );
2568  break;
2569  case 'pagename':
2570  $value = wfEscapeWikiText( $this->mTitle->getText() );
2571  break;
2572  case 'pagenamee':
2573  $value = wfEscapeWikiText( $this->mTitle->getPartialURL() );
2574  break;
2575  case 'fullpagename':
2576  $value = wfEscapeWikiText( $this->mTitle->getPrefixedText() );
2577  break;
2578  case 'fullpagenamee':
2579  $value = wfEscapeWikiText( $this->mTitle->getPrefixedURL() );
2580  break;
2581  case 'subpagename':
2582  $value = wfEscapeWikiText( $this->mTitle->getSubpageText() );
2583  break;
2584  case 'subpagenamee':
2585  $value = wfEscapeWikiText( $this->mTitle->getSubpageUrlForm() );
2586  break;
2587  case 'rootpagename':
2588  $value = wfEscapeWikiText( $this->mTitle->getRootText() );
2589  break;
2590  case 'rootpagenamee':
2591  $value = wfEscapeWikiText( wfUrlencode( str_replace(
2592  ' ',
2593  '_',
2594  $this->mTitle->getRootText()
2595  ) ) );
2596  break;
2597  case 'basepagename':
2598  $value = wfEscapeWikiText( $this->mTitle->getBaseText() );
2599  break;
2600  case 'basepagenamee':
2601  $value = wfEscapeWikiText( wfUrlencode( str_replace(
2602  ' ',
2603  '_',
2604  $this->mTitle->getBaseText()
2605  ) ) );
2606  break;
2607  case 'talkpagename':
2608  if ( $this->mTitle->canTalk() ) {
2609  $talkPage = $this->mTitle->getTalkPage();
2610  $value = wfEscapeWikiText( $talkPage->getPrefixedText() );
2611  } else {
2612  $value = '';
2613  }
2614  break;
2615  case 'talkpagenamee':
2616  if ( $this->mTitle->canTalk() ) {
2617  $talkPage = $this->mTitle->getTalkPage();
2618  $value = wfEscapeWikiText( $talkPage->getPrefixedURL() );
2619  } else {
2620  $value = '';
2621  }
2622  break;
2623  case 'subjectpagename':
2624  $subjPage = $this->mTitle->getSubjectPage();
2625  $value = wfEscapeWikiText( $subjPage->getPrefixedText() );
2626  break;
2627  case 'subjectpagenamee':
2628  $subjPage = $this->mTitle->getSubjectPage();
2629  $value = wfEscapeWikiText( $subjPage->getPrefixedURL() );
2630  break;
2631  case 'pageid': // requested in T25427
2632  $pageid = $this->getTitle()->getArticleID();
2633  if ( $pageid == 0 ) {
2634  # 0 means the page doesn't exist in the database,
2635  # which means the user is previewing a new page.
2636  # The vary-revision flag must be set, because the magic word
2637  # will have a different value once the page is saved.
2638  $this->mOutput->setFlag( 'vary-revision' );
2639  wfDebug( __METHOD__ . ": {{PAGEID}} used in a new page, setting vary-revision...\n" );
2640  }
2641  $value = $pageid ? $pageid : null;
2642  break;
2643  case 'revisionid':
2644  # Let the edit saving system know we should parse the page
2645  # *after* a revision ID has been assigned.
2646  $this->mOutput->setFlag( 'vary-revision-id' );
2647  wfDebug( __METHOD__ . ": {{REVISIONID}} used, setting vary-revision-id...\n" );
2648  $value = $this->mRevisionId;
2649  if ( !$value && $this->mOptions->getSpeculativeRevIdCallback() ) {
2650  $value = call_user_func( $this->mOptions->getSpeculativeRevIdCallback() );
2651  $this->mOutput->setSpeculativeRevIdUsed( $value );
2652  }
2653  break;
2654  case 'revisionday':
2655  # Let the edit saving system know we should parse the page
2656  # *after* a revision ID has been assigned. This is for null edits.
2657  $this->mOutput->setFlag( 'vary-revision' );
2658  wfDebug( __METHOD__ . ": {{REVISIONDAY}} used, setting vary-revision...\n" );
2659  $value = intval( substr( $this->getRevisionTimestamp(), 6, 2 ) );
2660  break;
2661  case 'revisionday2':
2662  # Let the edit saving system know we should parse the page
2663  # *after* a revision ID has been assigned. This is for null edits.
2664  $this->mOutput->setFlag( 'vary-revision' );
2665  wfDebug( __METHOD__ . ": {{REVISIONDAY2}} used, setting vary-revision...\n" );
2666  $value = substr( $this->getRevisionTimestamp(), 6, 2 );
2667  break;
2668  case 'revisionmonth':
2669  # Let the edit saving system know we should parse the page
2670  # *after* a revision ID has been assigned. This is for null edits.
2671  $this->mOutput->setFlag( 'vary-revision' );
2672  wfDebug( __METHOD__ . ": {{REVISIONMONTH}} used, setting vary-revision...\n" );
2673  $value = substr( $this->getRevisionTimestamp(), 4, 2 );
2674  break;
2675  case 'revisionmonth1':
2676  # Let the edit saving system know we should parse the page
2677  # *after* a revision ID has been assigned. This is for null edits.
2678  $this->mOutput->setFlag( 'vary-revision' );
2679  wfDebug( __METHOD__ . ": {{REVISIONMONTH1}} used, setting vary-revision...\n" );
2680  $value = intval( substr( $this->getRevisionTimestamp(), 4, 2 ) );
2681  break;
2682  case 'revisionyear':
2683  # Let the edit saving system know we should parse the page
2684  # *after* a revision ID has been assigned. This is for null edits.
2685  $this->mOutput->setFlag( 'vary-revision' );
2686  wfDebug( __METHOD__ . ": {{REVISIONYEAR}} used, setting vary-revision...\n" );
2687  $value = substr( $this->getRevisionTimestamp(), 0, 4 );
2688  break;
2689  case 'revisiontimestamp':
2690  # Let the edit saving system know we should parse the page
2691  # *after* a revision ID has been assigned. This is for null edits.
2692  $this->mOutput->setFlag( 'vary-revision' );
2693  wfDebug( __METHOD__ . ": {{REVISIONTIMESTAMP}} used, setting vary-revision...\n" );
2694  $value = $this->getRevisionTimestamp();
2695  break;
2696  case 'revisionuser':
2697  # Let the edit saving system know we should parse the page
2698  # *after* a revision ID has been assigned for null edits.
2699  $this->mOutput->setFlag( 'vary-user' );
2700  wfDebug( __METHOD__ . ": {{REVISIONUSER}} used, setting vary-user...\n" );
2701  $value = $this->getRevisionUser();
2702  break;
2703  case 'revisionsize':
2704  $value = $this->getRevisionSize();
2705  break;
2706  case 'namespace':
2707  $value = str_replace( '_', ' ', $wgContLang->getNsText( $this->mTitle->getNamespace() ) );
2708  break;
2709  case 'namespacee':
2710  $value = wfUrlencode( $wgContLang->getNsText( $this->mTitle->getNamespace() ) );
2711  break;
2712  case 'namespacenumber':
2713  $value = $this->mTitle->getNamespace();
2714  break;
2715  case 'talkspace':
2716  $value = $this->mTitle->canTalk()
2717  ? str_replace( '_', ' ', $this->mTitle->getTalkNsText() )
2718  : '';
2719  break;
2720  case 'talkspacee':
2721  $value = $this->mTitle->canTalk() ? wfUrlencode( $this->mTitle->getTalkNsText() ) : '';
2722  break;
2723  case 'subjectspace':
2724  $value = str_replace( '_', ' ', $this->mTitle->getSubjectNsText() );
2725  break;
2726  case 'subjectspacee':
2727  $value = ( wfUrlencode( $this->mTitle->getSubjectNsText() ) );
2728  break;
2729  case 'currentdayname':
2730  $value = $pageLang->getWeekdayName( (int)MWTimestamp::getInstance( $ts )->format( 'w' ) + 1 );
2731  break;
2732  case 'currentyear':
2733  $value = $pageLang->formatNum( MWTimestamp::getInstance( $ts )->format( 'Y' ), true );
2734  break;
2735  case 'currenttime':
2736  $value = $pageLang->time( wfTimestamp( TS_MW, $ts ), false, false );
2737  break;
2738  case 'currenthour':
2739  $value = $pageLang->formatNum( MWTimestamp::getInstance( $ts )->format( 'H' ), true );
2740  break;
2741  case 'currentweek':
2742  # @bug T6594 PHP5 has it zero padded, PHP4 does not, cast to
2743  # int to remove the padding
2744  $value = $pageLang->formatNum( (int)MWTimestamp::getInstance( $ts )->format( 'W' ) );
2745  break;
2746  case 'currentdow':
2747  $value = $pageLang->formatNum( MWTimestamp::getInstance( $ts )->format( 'w' ) );
2748  break;
2749  case 'localdayname':
2750  $value = $pageLang->getWeekdayName(
2751  (int)MWTimestamp::getLocalInstance( $ts )->format( 'w' ) + 1
2752  );
2753  break;
2754  case 'localyear':
2755  $value = $pageLang->formatNum( MWTimestamp::getLocalInstance( $ts )->format( 'Y' ), true );
2756  break;
2757  case 'localtime':
2758  $value = $pageLang->time(
2759  MWTimestamp::getLocalInstance( $ts )->format( 'YmdHis' ),
2760  false,
2761  false
2762  );
2763  break;
2764  case 'localhour':
2765  $value = $pageLang->formatNum( MWTimestamp::getLocalInstance( $ts )->format( 'H' ), true );
2766  break;
2767  case 'localweek':
2768  # @bug T6594 PHP5 has it zero padded, PHP4 does not, cast to
2769  # int to remove the padding
2770  $value = $pageLang->formatNum( (int)MWTimestamp::getLocalInstance( $ts )->format( 'W' ) );
2771  break;
2772  case 'localdow':
2773  $value = $pageLang->formatNum( MWTimestamp::getLocalInstance( $ts )->format( 'w' ) );
2774  break;
2775  case 'numberofarticles':
2776  $value = $pageLang->formatNum( SiteStats::articles() );
2777  break;
2778  case 'numberoffiles':
2779  $value = $pageLang->formatNum( SiteStats::images() );
2780  break;
2781  case 'numberofusers':
2782  $value = $pageLang->formatNum( SiteStats::users() );
2783  break;
2784  case 'numberofactiveusers':
2785  $value = $pageLang->formatNum( SiteStats::activeUsers() );
2786  break;
2787  case 'numberofpages':
2788  $value = $pageLang->formatNum( SiteStats::pages() );
2789  break;
2790  case 'numberofadmins':
2791  $value = $pageLang->formatNum( SiteStats::numberingroup( 'sysop' ) );
2792  break;
2793  case 'numberofedits':
2794  $value = $pageLang->formatNum( SiteStats::edits() );
2795  break;
2796  case 'currenttimestamp':
2797  $value = wfTimestamp( TS_MW, $ts );
2798  break;
2799  case 'localtimestamp':
2800  $value = MWTimestamp::getLocalInstance( $ts )->format( 'YmdHis' );
2801  break;
2802  case 'currentversion':
2804  break;
2805  case 'articlepath':
2806  return $wgArticlePath;
2807  case 'sitename':
2808  return $wgSitename;
2809  case 'server':
2810  return $wgServer;
2811  case 'servername':
2812  return $wgServerName;
2813  case 'scriptpath':
2814  return $wgScriptPath;
2815  case 'stylepath':
2816  return $wgStylePath;
2817  case 'directionmark':
2818  return $pageLang->getDirMark();
2819  case 'contentlanguage':
2821  return $wgLanguageCode;
2822  case 'pagelanguage':
2823  $value = $pageLang->getCode();
2824  break;
2825  case 'cascadingsources':
2827  break;
2828  default:
2829  $ret = null;
2830  Hooks::run(
2831  'ParserGetVariableValueSwitch',
2832  [ &$parser, &$this->mVarCache, &$index, &$ret, &$frame ]
2833  );
2834 
2835  return $ret;
2836  }
2837 
2838  if ( $index ) {
2839  $this->mVarCache[$index] = $value;
2840  }
2841 
2842  return $value;
2843  }
2844 
2850  public function initialiseVariables() {
2851  $variableIDs = MagicWord::getVariableIDs();
2852  $substIDs = MagicWord::getSubstIDs();
2853 
2854  $this->mVariables = new MagicWordArray( $variableIDs );
2855  $this->mSubstWords = new MagicWordArray( $substIDs );
2856  }
2857 
2880  public function preprocessToDom( $text, $flags = 0 ) {
2881  $dom = $this->getPreprocessor()->preprocessToObj( $text, $flags );
2882  return $dom;
2883  }
2884 
2892  public static function splitWhitespace( $s ) {
2893  $ltrimmed = ltrim( $s );
2894  $w1 = substr( $s, 0, strlen( $s ) - strlen( $ltrimmed ) );
2895  $trimmed = rtrim( $ltrimmed );
2896  $diff = strlen( $ltrimmed ) - strlen( $trimmed );
2897  if ( $diff > 0 ) {
2898  $w2 = substr( $ltrimmed, -$diff );
2899  } else {
2900  $w2 = '';
2901  }
2902  return [ $w1, $trimmed, $w2 ];
2903  }
2904 
2925  public function replaceVariables( $text, $frame = false, $argsOnly = false ) {
2926  # Is there any text? Also, Prevent too big inclusions!
2927  $textSize = strlen( $text );
2928  if ( $textSize < 1 || $textSize > $this->mOptions->getMaxIncludeSize() ) {
2929  return $text;
2930  }
2931 
2932  if ( $frame === false ) {
2933  $frame = $this->getPreprocessor()->newFrame();
2934  } elseif ( !( $frame instanceof PPFrame ) ) {
2935  wfDebug( __METHOD__ . " called using plain parameters instead of "
2936  . "a PPFrame instance. Creating custom frame.\n" );
2937  $frame = $this->getPreprocessor()->newCustomFrame( $frame );
2938  }
2939 
2940  $dom = $this->preprocessToDom( $text );
2941  $flags = $argsOnly ? PPFrame::NO_TEMPLATES : 0;
2942  $text = $frame->expand( $dom, $flags );
2943 
2944  return $text;
2945  }
2946 
2954  public static function createAssocArgs( $args ) {
2955  $assocArgs = [];
2956  $index = 1;
2957  foreach ( $args as $arg ) {
2958  $eqpos = strpos( $arg, '=' );
2959  if ( $eqpos === false ) {
2960  $assocArgs[$index++] = $arg;
2961  } else {
2962  $name = trim( substr( $arg, 0, $eqpos ) );
2963  $value = trim( substr( $arg, $eqpos + 1 ) );
2964  if ( $value === false ) {
2965  $value = '';
2966  }
2967  if ( $name !== false ) {
2968  $assocArgs[$name] = $value;
2969  }
2970  }
2971  }
2972 
2973  return $assocArgs;
2974  }
2975 
3002  public function limitationWarn( $limitationType, $current = '', $max = '' ) {
3003  # does no harm if $current and $max are present but are unnecessary for the message
3004  # Not doing ->inLanguage( $this->mOptions->getUserLangObj() ), since this is shown
3005  # only during preview, and that would split the parser cache unnecessarily.
3006  $warning = wfMessage( "$limitationType-warning" )->numParams( $current, $max )
3007  ->text();
3008  $this->mOutput->addWarning( $warning );
3009  $this->addTrackingCategory( "$limitationType-category" );
3010  }
3011 
3024  public function braceSubstitution( $piece, $frame ) {
3025 
3026  // Flags
3027 
3028  // $text has been filled
3029  $found = false;
3030  // wiki markup in $text should be escaped
3031  $nowiki = false;
3032  // $text is HTML, armour it against wikitext transformation
3033  $isHTML = false;
3034  // Force interwiki transclusion to be done in raw mode not rendered
3035  $forceRawInterwiki = false;
3036  // $text is a DOM node needing expansion in a child frame
3037  $isChildObj = false;
3038  // $text is a DOM node needing expansion in the current frame
3039  $isLocalObj = false;
3040 
3041  # Title object, where $text came from
3042  $title = false;
3043 
3044  # $part1 is the bit before the first |, and must contain only title characters.
3045  # Various prefixes will be stripped from it later.
3046  $titleWithSpaces = $frame->expand( $piece['title'] );
3047  $part1 = trim( $titleWithSpaces );
3048  $titleText = false;
3049 
3050  # Original title text preserved for various purposes
3051  $originalTitle = $part1;
3052 
3053  # $args is a list of argument nodes, starting from index 0, not including $part1
3054  # @todo FIXME: If piece['parts'] is null then the call to getLength()
3055  # below won't work b/c this $args isn't an object
3056  $args = ( null == $piece['parts'] ) ? [] : $piece['parts'];
3057 
3058  $profileSection = null; // profile templates
3059 
3060  # SUBST
3061  if ( !$found ) {
3062  $substMatch = $this->mSubstWords->matchStartAndRemove( $part1 );
3063 
3064  # Possibilities for substMatch: "subst", "safesubst" or FALSE
3065  # Decide whether to expand template or keep wikitext as-is.
3066  if ( $this->ot['wiki'] ) {
3067  if ( $substMatch === false ) {
3068  $literal = true; # literal when in PST with no prefix
3069  } else {
3070  $literal = false; # expand when in PST with subst: or safesubst:
3071  }
3072  } else {
3073  if ( $substMatch == 'subst' ) {
3074  $literal = true; # literal when not in PST with plain subst:
3075  } else {
3076  $literal = false; # expand when not in PST with safesubst: or no prefix
3077  }
3078  }
3079  if ( $literal ) {
3080  $text = $frame->virtualBracketedImplode( '{{', '|', '}}', $titleWithSpaces, $args );
3081  $isLocalObj = true;
3082  $found = true;
3083  }
3084  }
3085 
3086  # Variables
3087  if ( !$found && $args->getLength() == 0 ) {
3088  $id = $this->mVariables->matchStartToEnd( $part1 );
3089  if ( $id !== false ) {
3090  $text = $this->getVariableValue( $id, $frame );
3091  if ( MagicWord::getCacheTTL( $id ) > -1 ) {
3092  $this->mOutput->updateCacheExpiry( MagicWord::getCacheTTL( $id ) );
3093  }
3094  $found = true;
3095  }
3096  }
3097 
3098  # MSG, MSGNW and RAW
3099  if ( !$found ) {
3100  # Check for MSGNW:
3101  $mwMsgnw = MagicWord::get( 'msgnw' );
3102  if ( $mwMsgnw->matchStartAndRemove( $part1 ) ) {
3103  $nowiki = true;
3104  } else {
3105  # Remove obsolete MSG:
3106  $mwMsg = MagicWord::get( 'msg' );
3107  $mwMsg->matchStartAndRemove( $part1 );
3108  }
3109 
3110  # Check for RAW:
3111  $mwRaw = MagicWord::get( 'raw' );
3112  if ( $mwRaw->matchStartAndRemove( $part1 ) ) {
3113  $forceRawInterwiki = true;
3114  }
3115  }
3116 
3117  # Parser functions
3118  if ( !$found ) {
3119  $colonPos = strpos( $part1, ':' );
3120  if ( $colonPos !== false ) {
3121  $func = substr( $part1, 0, $colonPos );
3122  $funcArgs = [ trim( substr( $part1, $colonPos + 1 ) ) ];
3123  $argsLength = $args->getLength();
3124  for ( $i = 0; $i < $argsLength; $i++ ) {
3125  $funcArgs[] = $args->item( $i );
3126  }
3127  try {
3128  $result = $this->callParserFunction( $frame, $func, $funcArgs );
3129  } catch ( Exception $ex ) {
3130  throw $ex;
3131  }
3132 
3133  # The interface for parser functions allows for extracting
3134  # flags into the local scope. Extract any forwarded flags
3135  # here.
3136  extract( $result );
3137  }
3138  }
3139 
3140  # Finish mangling title and then check for loops.
3141  # Set $title to a Title object and $titleText to the PDBK
3142  if ( !$found ) {
3143  $ns = NS_TEMPLATE;
3144  # Split the title into page and subpage
3145  $subpage = '';
3146  $relative = $this->maybeDoSubpageLink( $part1, $subpage );
3147  if ( $part1 !== $relative ) {
3148  $part1 = $relative;
3149  $ns = $this->mTitle->getNamespace();
3150  }
3151  $title = Title::newFromText( $part1, $ns );
3152  if ( $title ) {
3153  $titleText = $title->getPrefixedText();
3154  # Check for language variants if the template is not found
3155  if ( $this->getConverterLanguage()->hasVariants() && $title->getArticleID() == 0 ) {
3156  $this->getConverterLanguage()->findVariantLink( $part1, $title, true );
3157  }
3158  # Do recursion depth check
3159  $limit = $this->mOptions->getMaxTemplateDepth();
3160  if ( $frame->depth >= $limit ) {
3161  $found = true;
3162  $text = '<span class="error">'
3163  . wfMessage( 'parser-template-recursion-depth-warning' )
3164  ->numParams( $limit )->inContentLanguage()->text()
3165  . '</span>';
3166  }
3167  }
3168  }
3169 
3170  # Load from database
3171  if ( !$found && $title ) {
3172  $profileSection = $this->mProfiler->scopedProfileIn( $title->getPrefixedDBkey() );
3173  if ( !$title->isExternal() ) {
3174  if ( $title->isSpecialPage()
3175  && $this->mOptions->getAllowSpecialInclusion()
3176  && $this->ot['html']
3177  ) {
3178  $specialPage = SpecialPageFactory::getPage( $title->getDBkey() );
3179  // Pass the template arguments as URL parameters.
3180  // "uselang" will have no effect since the Language object
3181  // is forced to the one defined in ParserOptions.
3182  $pageArgs = [];
3183  $argsLength = $args->getLength();
3184  for ( $i = 0; $i < $argsLength; $i++ ) {
3185  $bits = $args->item( $i )->splitArg();
3186  if ( strval( $bits['index'] ) === '' ) {
3187  $name = trim( $frame->expand( $bits['name'], PPFrame::STRIP_COMMENTS ) );
3188  $value = trim( $frame->expand( $bits['value'] ) );
3189  $pageArgs[$name] = $value;
3190  }
3191  }
3192 
3193  // Create a new context to execute the special page
3194  $context = new RequestContext;
3195  $context->setTitle( $title );
3196  $context->setRequest( new FauxRequest( $pageArgs ) );
3197  if ( $specialPage && $specialPage->maxIncludeCacheTime() === 0 ) {
3198  $context->setUser( $this->getUser() );
3199  } else {
3200  // If this page is cached, then we better not be per user.
3201  $context->setUser( User::newFromName( '127.0.0.1', false ) );
3202  }
3203  $context->setLanguage( $this->mOptions->getUserLangObj() );
3205  $title, $context, $this->getLinkRenderer() );
3206  if ( $ret ) {
3207  $text = $context->getOutput()->getHTML();
3208  $this->mOutput->addOutputPageMetadata( $context->getOutput() );
3209  $found = true;
3210  $isHTML = true;
3211  if ( $specialPage && $specialPage->maxIncludeCacheTime() !== false ) {
3212  $this->mOutput->updateRuntimeAdaptiveExpiry(
3213  $specialPage->maxIncludeCacheTime()
3214  );
3215  }
3216  }
3217  } elseif ( MWNamespace::isNonincludable( $title->getNamespace() ) ) {
3218  $found = false; # access denied
3219  wfDebug( __METHOD__ . ": template inclusion denied for " .
3220  $title->getPrefixedDBkey() . "\n" );
3221  } else {
3222  list( $text, $title ) = $this->getTemplateDom( $title );
3223  if ( $text !== false ) {
3224  $found = true;
3225  $isChildObj = true;
3226  }
3227  }
3228 
3229  # If the title is valid but undisplayable, make a link to it
3230  if ( !$found && ( $this->ot['html'] || $this->ot['pre'] ) ) {
3231  $text = "[[:$titleText]]";
3232  $found = true;
3233  }
3234  } elseif ( $title->isTrans() ) {
3235  # Interwiki transclusion
3236  if ( $this->ot['html'] && !$forceRawInterwiki ) {
3237  $text = $this->interwikiTransclude( $title, 'render' );
3238  $isHTML = true;
3239  } else {
3240  $text = $this->interwikiTransclude( $title, 'raw' );
3241  # Preprocess it like a template
3242  $text = $this->preprocessToDom( $text, self::PTD_FOR_INCLUSION );
3243  $isChildObj = true;
3244  }
3245  $found = true;
3246  }
3247 
3248  # Do infinite loop check
3249  # This has to be done after redirect resolution to avoid infinite loops via redirects
3250  if ( !$frame->loopCheck( $title ) ) {
3251  $found = true;
3252  $text = '<span class="error">'
3253  . wfMessage( 'parser-template-loop-warning', $titleText )->inContentLanguage()->text()
3254  . '</span>';
3255  wfDebug( __METHOD__ . ": template loop broken at '$titleText'\n" );
3256  }
3257  }
3258 
3259  # If we haven't found text to substitute by now, we're done
3260  # Recover the source wikitext and return it
3261  if ( !$found ) {
3262  $text = $frame->virtualBracketedImplode( '{{', '|', '}}', $titleWithSpaces, $args );
3263  if ( $profileSection ) {
3264  $this->mProfiler->scopedProfileOut( $profileSection );
3265  }
3266  return [ 'object' => $text ];
3267  }
3268 
3269  # Expand DOM-style return values in a child frame
3270  if ( $isChildObj ) {
3271  # Clean up argument array
3272  $newFrame = $frame->newChild( $args, $title );
3273 
3274  if ( $nowiki ) {
3275  $text = $newFrame->expand( $text, PPFrame::RECOVER_ORIG );
3276  } elseif ( $titleText !== false && $newFrame->isEmpty() ) {
3277  # Expansion is eligible for the empty-frame cache
3278  $text = $newFrame->cachedExpand( $titleText, $text );
3279  } else {
3280  # Uncached expansion
3281  $text = $newFrame->expand( $text );
3282  }
3283  }
3284  if ( $isLocalObj && $nowiki ) {
3285  $text = $frame->expand( $text, PPFrame::RECOVER_ORIG );
3286  $isLocalObj = false;
3287  }
3288 
3289  if ( $profileSection ) {
3290  $this->mProfiler->scopedProfileOut( $profileSection );
3291  }
3292 
3293  # Replace raw HTML by a placeholder
3294  if ( $isHTML ) {
3295  $text = $this->insertStripItem( $text );
3296  } elseif ( $nowiki && ( $this->ot['html'] || $this->ot['pre'] ) ) {
3297  # Escape nowiki-style return values
3298  $text = wfEscapeWikiText( $text );
3299  } elseif ( is_string( $text )
3300  && !$piece['lineStart']
3301  && preg_match( '/^(?:{\\||:|;|#|\*)/', $text )
3302  ) {
3303  # T2529: if the template begins with a table or block-level
3304  # element, it should be treated as beginning a new line.
3305  # This behavior is somewhat controversial.
3306  $text = "\n" . $text;
3307  }
3308 
3309  if ( is_string( $text ) && !$this->incrementIncludeSize( 'post-expand', strlen( $text ) ) ) {
3310  # Error, oversize inclusion
3311  if ( $titleText !== false ) {
3312  # Make a working, properly escaped link if possible (T25588)
3313  $text = "[[:$titleText]]";
3314  } else {
3315  # This will probably not be a working link, but at least it may
3316  # provide some hint of where the problem is
3317  preg_replace( '/^:/', '', $originalTitle );
3318  $text = "[[:$originalTitle]]";
3319  }
3320  $text .= $this->insertStripItem( '<!-- WARNING: template omitted, '
3321  . 'post-expand include size too large -->' );
3322  $this->limitationWarn( 'post-expand-template-inclusion' );
3323  }
3324 
3325  if ( $isLocalObj ) {
3326  $ret = [ 'object' => $text ];
3327  } else {
3328  $ret = [ 'text' => $text ];
3329  }
3330 
3331  return $ret;
3332  }
3333 
3353  public function callParserFunction( $frame, $function, array $args = [] ) {
3355 
3356  # Case sensitive functions
3357  if ( isset( $this->mFunctionSynonyms[1][$function] ) ) {
3358  $function = $this->mFunctionSynonyms[1][$function];
3359  } else {
3360  # Case insensitive functions
3361  $function = $wgContLang->lc( $function );
3362  if ( isset( $this->mFunctionSynonyms[0][$function] ) ) {
3363  $function = $this->mFunctionSynonyms[0][$function];
3364  } else {
3365  return [ 'found' => false ];
3366  }
3367  }
3368 
3369  list( $callback, $flags ) = $this->mFunctionHooks[$function];
3370 
3371  # Workaround for PHP bug 35229 and similar
3372  if ( !is_callable( $callback ) ) {
3373  throw new MWException( "Tag hook for $function is not callable\n" );
3374  }
3375 
3376  // Avoid PHP 7.1 warning from passing $this by reference
3377  $parser = $this;
3378 
3379  $allArgs = [ &$parser ];
3380  if ( $flags & self::SFH_OBJECT_ARGS ) {
3381  # Convert arguments to PPNodes and collect for appending to $allArgs
3382  $funcArgs = [];
3383  foreach ( $args as $k => $v ) {
3384  if ( $v instanceof PPNode || $k === 0 ) {
3385  $funcArgs[] = $v;
3386  } else {
3387  $funcArgs[] = $this->mPreprocessor->newPartNodeArray( [ $k => $v ] )->item( 0 );
3388  }
3389  }
3390 
3391  # Add a frame parameter, and pass the arguments as an array
3392  $allArgs[] = $frame;
3393  $allArgs[] = $funcArgs;
3394  } else {
3395  # Convert arguments to plain text and append to $allArgs
3396  foreach ( $args as $k => $v ) {
3397  if ( $v instanceof PPNode ) {
3398  $allArgs[] = trim( $frame->expand( $v ) );
3399  } elseif ( is_int( $k ) && $k >= 0 ) {
3400  $allArgs[] = trim( $v );
3401  } else {
3402  $allArgs[] = trim( "$k=$v" );
3403  }
3404  }
3405  }
3406 
3407  $result = call_user_func_array( $callback, $allArgs );
3408 
3409  # The interface for function hooks allows them to return a wikitext
3410  # string or an array containing the string and any flags. This mungs
3411  # things around to match what this method should return.
3412  if ( !is_array( $result ) ) {
3413  $result =[
3414  'found' => true,
3415  'text' => $result,
3416  ];
3417  } else {
3418  if ( isset( $result[0] ) && !isset( $result['text'] ) ) {
3419  $result['text'] = $result[0];
3420  }
3421  unset( $result[0] );
3422  $result += [
3423  'found' => true,
3424  ];
3425  }
3426 
3427  $noparse = true;
3428  $preprocessFlags = 0;
3429  if ( isset( $result['noparse'] ) ) {
3430  $noparse = $result['noparse'];
3431  }
3432  if ( isset( $result['preprocessFlags'] ) ) {
3433  $preprocessFlags = $result['preprocessFlags'];
3434  }
3435 
3436  if ( !$noparse ) {
3437  $result['text'] = $this->preprocessToDom( $result['text'], $preprocessFlags );
3438  $result['isChildObj'] = true;
3439  }
3440 
3441  return $result;
3442  }
3443 
3452  public function getTemplateDom( $title ) {
3453  $cacheTitle = $title;
3454  $titleText = $title->getPrefixedDBkey();
3455 
3456  if ( isset( $this->mTplRedirCache[$titleText] ) ) {
3457  list( $ns, $dbk ) = $this->mTplRedirCache[$titleText];
3458  $title = Title::makeTitle( $ns, $dbk );
3459  $titleText = $title->getPrefixedDBkey();
3460  }
3461  if ( isset( $this->mTplDomCache[$titleText] ) ) {
3462  return [ $this->mTplDomCache[$titleText], $title ];
3463  }
3464 
3465  # Cache miss, go to the database
3466  list( $text, $title ) = $this->fetchTemplateAndTitle( $title );
3467 
3468  if ( $text === false ) {
3469  $this->mTplDomCache[$titleText] = false;
3470  return [ false, $title ];
3471  }
3472 
3473  $dom = $this->preprocessToDom( $text, self::PTD_FOR_INCLUSION );
3474  $this->mTplDomCache[$titleText] = $dom;
3475 
3476  if ( !$title->equals( $cacheTitle ) ) {
3477  $this->mTplRedirCache[$cacheTitle->getPrefixedDBkey()] =
3478  [ $title->getNamespace(), $cdb = $title->getDBkey() ];
3479  }
3480 
3481  return [ $dom, $title ];
3482  }
3483 
3496  $cacheKey = $title->getPrefixedDBkey();
3497  if ( !$this->currentRevisionCache ) {
3498  $this->currentRevisionCache = new MapCacheLRU( 100 );
3499  }
3500  if ( !$this->currentRevisionCache->has( $cacheKey ) ) {
3501  $this->currentRevisionCache->set( $cacheKey,
3502  // Defaults to Parser::statelessFetchRevision()
3503  call_user_func( $this->mOptions->getCurrentRevisionCallback(), $title, $this )
3504  );
3505  }
3506  return $this->currentRevisionCache->get( $cacheKey );
3507  }
3508 
3518  public static function statelessFetchRevision( Title $title, $parser = false ) {
3519  $pageId = $title->getArticleID();
3520  $revId = $title->getLatestRevID();
3521 
3523  if ( $rev ) {
3524  $rev->setTitle( $title );
3525  }
3526 
3527  return $rev;
3528  }
3529 
3535  public function fetchTemplateAndTitle( $title ) {
3536  // Defaults to Parser::statelessFetchTemplate()
3537  $templateCb = $this->mOptions->getTemplateCallback();
3538  $stuff = call_user_func( $templateCb, $title, $this );
3539  // We use U+007F DELETE to distinguish strip markers from regular text.
3540  $text = $stuff['text'];
3541  if ( is_string( $stuff['text'] ) ) {
3542  $text = strtr( $text, "\x7f", "?" );
3543  }
3544  $finalTitle = isset( $stuff['finalTitle'] ) ? $stuff['finalTitle'] : $title;
3545  if ( isset( $stuff['deps'] ) ) {
3546  foreach ( $stuff['deps'] as $dep ) {
3547  $this->mOutput->addTemplate( $dep['title'], $dep['page_id'], $dep['rev_id'] );
3548  if ( $dep['title']->equals( $this->getTitle() ) ) {
3549  // If we transclude ourselves, the final result
3550  // will change based on the new version of the page
3551  $this->mOutput->setFlag( 'vary-revision' );
3552  }
3553  }
3554  }
3555  return [ $text, $finalTitle ];
3556  }
3557 
3563  public function fetchTemplate( $title ) {
3564  return $this->fetchTemplateAndTitle( $title )[0];
3565  }
3566 
3576  public static function statelessFetchTemplate( $title, $parser = false ) {
3577  $text = $skip = false;
3578  $finalTitle = $title;
3579  $deps = [];
3580 
3581  # Loop to fetch the article, with up to 1 redirect
3582  // @codingStandardsIgnoreStart Generic.CodeAnalysis.ForLoopWithTestFunctionCall.NotAllowed
3583  for ( $i = 0; $i < 2 && is_object( $title ); $i++ ) {
3584  // @codingStandardsIgnoreEnd
3585  # Give extensions a chance to select the revision instead
3586  $id = false; # Assume current
3587  Hooks::run( 'BeforeParserFetchTemplateAndtitle',
3588  [ $parser, $title, &$skip, &$id ] );
3589 
3590  if ( $skip ) {
3591  $text = false;
3592  $deps[] = [
3593  'title' => $title,
3594  'page_id' => $title->getArticleID(),
3595  'rev_id' => null
3596  ];
3597  break;
3598  }
3599  # Get the revision
3600  if ( $id ) {
3601  $rev = Revision::newFromId( $id );
3602  } elseif ( $parser ) {
3603  $rev = $parser->fetchCurrentRevisionOfTitle( $title );
3604  } else {
3606  }
3607  $rev_id = $rev ? $rev->getId() : 0;
3608  # If there is no current revision, there is no page
3609  if ( $id === false && !$rev ) {
3610  $linkCache = LinkCache::singleton();
3611  $linkCache->addBadLinkObj( $title );
3612  }
3613 
3614  $deps[] = [
3615  'title' => $title,
3616  'page_id' => $title->getArticleID(),
3617  'rev_id' => $rev_id ];
3618  if ( $rev && !$title->equals( $rev->getTitle() ) ) {
3619  # We fetched a rev from a different title; register it too...
3620  $deps[] = [
3621  'title' => $rev->getTitle(),
3622  'page_id' => $rev->getPage(),
3623  'rev_id' => $rev_id ];
3624  }
3625 
3626  if ( $rev ) {
3627  $content = $rev->getContent();
3628  $text = $content ? $content->getWikitextForTransclusion() : null;
3629 
3630  Hooks::run( 'ParserFetchTemplate',
3631  [ $parser, $title, $rev, &$text, &$deps ] );
3632 
3633  if ( $text === false || $text === null ) {
3634  $text = false;
3635  break;
3636  }
3637  } elseif ( $title->getNamespace() == NS_MEDIAWIKI ) {
3639  $message = wfMessage( $wgContLang->lcfirst( $title->getText() ) )->inContentLanguage();
3640  if ( !$message->exists() ) {
3641  $text = false;
3642  break;
3643  }
3644  $content = $message->content();
3645  $text = $message->plain();
3646  } else {
3647  break;
3648  }
3649  if ( !$content ) {
3650  break;
3651  }
3652  # Redirect?
3653  $finalTitle = $title;
3654  $title = $content->getRedirectTarget();
3655  }
3656  return [
3657  'text' => $text,
3658  'finalTitle' => $finalTitle,
3659  'deps' => $deps ];
3660  }
3661 
3669  public function fetchFile( $title, $options = [] ) {
3670  return $this->fetchFileAndTitle( $title, $options )[0];
3671  }
3672 
3680  public function fetchFileAndTitle( $title, $options = [] ) {
3681  $file = $this->fetchFileNoRegister( $title, $options );
3682 
3683  $time = $file ? $file->getTimestamp() : false;
3684  $sha1 = $file ? $file->getSha1() : false;
3685  # Register the file as a dependency...
3686  $this->mOutput->addImage( $title->getDBkey(), $time, $sha1 );
3687  if ( $file && !$title->equals( $file->getTitle() ) ) {
3688  # Update fetched file title
3689  $title = $file->getTitle();
3690  $this->mOutput->addImage( $title->getDBkey(), $time, $sha1 );
3691  }
3692  return [ $file, $title ];
3693  }
3694 
3705  protected function fetchFileNoRegister( $title, $options = [] ) {
3706  if ( isset( $options['broken'] ) ) {
3707  $file = false; // broken thumbnail forced by hook
3708  } elseif ( isset( $options['sha1'] ) ) { // get by (sha1,timestamp)
3709  $file = RepoGroup::singleton()->findFileFromKey( $options['sha1'], $options );
3710  } else { // get by (name,timestamp)
3711  $file = wfFindFile( $title, $options );
3712  }
3713  return $file;
3714  }
3715 
3724  public function interwikiTransclude( $title, $action ) {
3725  global $wgEnableScaryTranscluding;
3726 
3727  if ( !$wgEnableScaryTranscluding ) {
3728  return wfMessage( 'scarytranscludedisabled' )->inContentLanguage()->text();
3729  }
3730 
3731  $url = $title->getFullURL( [ 'action' => $action ] );
3732 
3733  if ( strlen( $url ) > 255 ) {
3734  return wfMessage( 'scarytranscludetoolong' )->inContentLanguage()->text();
3735  }
3736  return $this->fetchScaryTemplateMaybeFromCache( $url );
3737  }
3738 
3743  public function fetchScaryTemplateMaybeFromCache( $url ) {
3744  global $wgTranscludeCacheExpiry;
3745  $dbr = wfGetDB( DB_REPLICA );
3746  $tsCond = $dbr->timestamp( time() - $wgTranscludeCacheExpiry );
3747  $obj = $dbr->selectRow( 'transcache', [ 'tc_time', 'tc_contents' ],
3748  [ 'tc_url' => $url, "tc_time >= " . $dbr->addQuotes( $tsCond ) ] );
3749  if ( $obj ) {
3750  return $obj->tc_contents;
3751  }
3752 
3753  $req = MWHttpRequest::factory( $url, [], __METHOD__ );
3754  $status = $req->execute(); // Status object
3755  if ( $status->isOK() ) {
3756  $text = $req->getContent();
3757  } elseif ( $req->getStatus() != 200 ) {
3758  // Though we failed to fetch the content, this status is useless.
3759  return wfMessage( 'scarytranscludefailed-httpstatus' )
3760  ->params( $url, $req->getStatus() /* HTTP status */ )->inContentLanguage()->text();
3761  } else {
3762  return wfMessage( 'scarytranscludefailed', $url )->inContentLanguage()->text();
3763  }
3764 
3765  $dbw = wfGetDB( DB_MASTER );
3766  $dbw->replace( 'transcache', [ 'tc_url' ], [
3767  'tc_url' => $url,
3768  'tc_time' => $dbw->timestamp( time() ),
3769  'tc_contents' => $text
3770  ] );
3771  return $text;
3772  }
3773 
3783  public function argSubstitution( $piece, $frame ) {
3784 
3785  $error = false;
3786  $parts = $piece['parts'];
3787  $nameWithSpaces = $frame->expand( $piece['title'] );
3788  $argName = trim( $nameWithSpaces );
3789  $object = false;
3790  $text = $frame->getArgument( $argName );
3791  if ( $text === false && $parts->getLength() > 0
3792  && ( $this->ot['html']
3793  || $this->ot['pre']
3794  || ( $this->ot['wiki'] && $frame->isTemplate() )
3795  )
3796  ) {
3797  # No match in frame, use the supplied default
3798  $object = $parts->item( 0 )->getChildren();
3799  }
3800  if ( !$this->incrementIncludeSize( 'arg', strlen( $text ) ) ) {
3801  $error = '<!-- WARNING: argument omitted, expansion size too large -->';
3802  $this->limitationWarn( 'post-expand-template-argument' );
3803  }
3804 
3805  if ( $text === false && $object === false ) {
3806  # No match anywhere
3807  $object = $frame->virtualBracketedImplode( '{{{', '|', '}}}', $nameWithSpaces, $parts );
3808  }
3809  if ( $error !== false ) {
3810  $text .= $error;
3811  }
3812  if ( $object !== false ) {
3813  $ret = [ 'object' => $object ];
3814  } else {
3815  $ret = [ 'text' => $text ];
3816  }
3817 
3818  return $ret;
3819  }
3820 
3836  public function extensionSubstitution( $params, $frame ) {
3837  static $errorStr = '<span class="error">';
3838  static $errorLen = 20;
3839 
3840  $name = $frame->expand( $params['name'] );
3841  if ( substr( $name, 0, $errorLen ) === $errorStr ) {
3842  // Probably expansion depth or node count exceeded. Just punt the
3843  // error up.
3844  return $name;
3845  }
3846 
3847  $attrText = !isset( $params['attr'] ) ? null : $frame->expand( $params['attr'] );
3848  if ( substr( $attrText, 0, $errorLen ) === $errorStr ) {
3849  // See above
3850  return $attrText;
3851  }
3852 
3853  // We can't safely check if the expansion for $content resulted in an
3854  // error, because the content could happen to be the error string
3855  // (T149622).
3856  $content = !isset( $params['inner'] ) ? null : $frame->expand( $params['inner'] );
3857 
3858  $marker = self::MARKER_PREFIX . "-$name-"
3859  . sprintf( '%08X', $this->mMarkerIndex++ ) . self::MARKER_SUFFIX;
3860 
3861  $isFunctionTag = isset( $this->mFunctionTagHooks[strtolower( $name )] ) &&
3862  ( $this->ot['html'] || $this->ot['pre'] );
3863  if ( $isFunctionTag ) {
3864  $markerType = 'none';
3865  } else {
3866  $markerType = 'general';
3867  }
3868  if ( $this->ot['html'] || $isFunctionTag ) {
3869  $name = strtolower( $name );
3870  $attributes = Sanitizer::decodeTagAttributes( $attrText );
3871  if ( isset( $params['attributes'] ) ) {
3872  $attributes = $attributes + $params['attributes'];
3873  }
3874 
3875  if ( isset( $this->mTagHooks[$name] ) ) {
3876  # Workaround for PHP bug 35229 and similar
3877  if ( !is_callable( $this->mTagHooks[$name] ) ) {
3878  throw new MWException( "Tag hook for $name is not callable\n" );
3879  }
3880  $output = call_user_func_array( $this->mTagHooks[$name],
3881  [ $content, $attributes, $this, $frame ] );
3882  } elseif ( isset( $this->mFunctionTagHooks[$name] ) ) {
3883  list( $callback, ) = $this->mFunctionTagHooks[$name];
3884  if ( !is_callable( $callback ) ) {
3885  throw new MWException( "Tag hook for $name is not callable\n" );
3886  }
3887 
3888  // Avoid PHP 7.1 warning from passing $this by reference
3889  $parser = $this;
3890  $output = call_user_func_array( $callback, [ &$parser, $frame, $content, $attributes ] );
3891  } else {
3892  $output = '<span class="error">Invalid tag extension name: ' .
3893  htmlspecialchars( $name ) . '</span>';
3894  }
3895 
3896  if ( is_array( $output ) ) {
3897  # Extract flags to local scope (to override $markerType)
3898  $flags = $output;
3899  $output = $flags[0];
3900  unset( $flags[0] );
3901  extract( $flags );
3902  }
3903  } else {
3904  if ( is_null( $attrText ) ) {
3905  $attrText = '';
3906  }
3907  if ( isset( $params['attributes'] ) ) {
3908  foreach ( $params['attributes'] as $attrName => $attrValue ) {
3909  $attrText .= ' ' . htmlspecialchars( $attrName ) . '="' .
3910  htmlspecialchars( $attrValue ) . '"';
3911  }
3912  }
3913  if ( $content === null ) {
3914  $output = "<$name$attrText/>";
3915  } else {
3916  $close = is_null( $params['close'] ) ? '' : $frame->expand( $params['close'] );
3917  if ( substr( $close, 0, $errorLen ) === $errorStr ) {
3918  // See above
3919  return $close;
3920  }
3921  $output = "<$name$attrText>$content$close";
3922  }
3923  }
3924 
3925  if ( $markerType === 'none' ) {
3926  return $output;
3927  } elseif ( $markerType === 'nowiki' ) {
3928  $this->mStripState->addNoWiki( $marker, $output );
3929  } elseif ( $markerType === 'general' ) {
3930  $this->mStripState->addGeneral( $marker, $output );
3931  } else {
3932  throw new MWException( __METHOD__ . ': invalid marker type' );
3933  }
3934  return $marker;
3935  }
3936 
3944  public function incrementIncludeSize( $type, $size ) {
3945  if ( $this->mIncludeSizes[$type] + $size > $this->mOptions->getMaxIncludeSize() ) {
3946  return false;
3947  } else {
3948  $this->mIncludeSizes[$type] += $size;
3949  return true;
3950  }
3951  }
3952 
3959  $this->mExpensiveFunctionCount++;
3960  return $this->mExpensiveFunctionCount <= $this->mOptions->getExpensiveParserFunctionLimit();
3961  }
3962 
3971  public function doDoubleUnderscore( $text ) {
3972 
3973  # The position of __TOC__ needs to be recorded
3974  $mw = MagicWord::get( 'toc' );
3975  if ( $mw->match( $text ) ) {
3976  $this->mShowToc = true;
3977  $this->mForceTocPosition = true;
3978 
3979  # Set a placeholder. At the end we'll fill it in with the TOC.
3980  $text = $mw->replace( '<!--MWTOC-->', $text, 1 );
3981 
3982  # Only keep the first one.
3983  $text = $mw->replace( '', $text );
3984  }
3985 
3986  # Now match and remove the rest of them
3988  $this->mDoubleUnderscores = $mwa->matchAndRemove( $text );
3989 
3990  if ( isset( $this->mDoubleUnderscores['nogallery'] ) ) {
3991  $this->mOutput->mNoGallery = true;
3992  }
3993  if ( isset( $this->mDoubleUnderscores['notoc'] ) && !$this->mForceTocPosition ) {
3994  $this->mShowToc = false;
3995  }
3996  if ( isset( $this->mDoubleUnderscores['hiddencat'] )
3997  && $this->mTitle->getNamespace() == NS_CATEGORY
3998  ) {
3999  $this->addTrackingCategory( 'hidden-category-category' );
4000  }
4001  # (T10068) Allow control over whether robots index a page.
4002  # __INDEX__ always overrides __NOINDEX__, see T16899
4003  if ( isset( $this->mDoubleUnderscores['noindex'] ) && $this->mTitle->canUseNoindex() ) {
4004  $this->mOutput->setIndexPolicy( 'noindex' );
4005  $this->addTrackingCategory( 'noindex-category' );
4006  }
4007  if ( isset( $this->mDoubleUnderscores['index'] ) && $this->mTitle->canUseNoindex() ) {
4008  $this->mOutput->setIndexPolicy( 'index' );
4009  $this->addTrackingCategory( 'index-category' );
4010  }
4011 
4012  # Cache all double underscores in the database
4013  foreach ( $this->mDoubleUnderscores as $key => $val ) {
4014  $this->mOutput->setProperty( $key, '' );
4015  }
4016 
4017  return $text;
4018  }
4019 
4025  public function addTrackingCategory( $msg ) {
4026  return $this->mOutput->addTrackingCategory( $msg, $this->mTitle );
4027  }
4028 
4045  public function formatHeadings( $text, $origText, $isMain = true ) {
4046  global $wgMaxTocLevel, $wgExperimentalHtmlIds;
4047 
4048  # Inhibit editsection links if requested in the page
4049  if ( isset( $this->mDoubleUnderscores['noeditsection'] ) ) {
4050  $maybeShowEditLink = $showEditLink = false;
4051  } else {
4052  $maybeShowEditLink = true; /* Actual presence will depend on ParserOptions option */
4053  $showEditLink = $this->mOptions->getEditSection();
4054  }
4055  if ( $showEditLink ) {
4056  $this->mOutput->setEditSectionTokens( true );
4057  }
4058 
4059  # Get all headlines for numbering them and adding funky stuff like [edit]
4060  # links - this is for later, but we need the number of headlines right now
4061  $matches = [];
4062  $numMatches = preg_match_all(
4063  '/<H(?P<level>[1-6])(?P<attrib>.*?>)\s*(?P<header>[\s\S]*?)\s*<\/H[1-6] *>/i',
4064  $text,
4065  $matches
4066  );
4067 
4068  # if there are fewer than 4 headlines in the article, do not show TOC
4069  # unless it's been explicitly enabled.
4070  $enoughToc = $this->mShowToc &&
4071  ( ( $numMatches >= 4 ) || $this->mForceTocPosition );
4072 
4073  # Allow user to stipulate that a page should have a "new section"
4074  # link added via __NEWSECTIONLINK__
4075  if ( isset( $this->mDoubleUnderscores['newsectionlink'] ) ) {
4076  $this->mOutput->setNewSection( true );
4077  }
4078 
4079  # Allow user to remove the "new section"
4080  # link via __NONEWSECTIONLINK__
4081  if ( isset( $this->mDoubleUnderscores['nonewsectionlink'] ) ) {
4082  $this->mOutput->hideNewSection( true );
4083  }
4084 
4085  # if the string __FORCETOC__ (not case-sensitive) occurs in the HTML,
4086  # override above conditions and always show TOC above first header
4087  if ( isset( $this->mDoubleUnderscores['forcetoc'] ) ) {
4088  $this->mShowToc = true;
4089  $enoughToc = true;
4090  }
4091 
4092  # headline counter
4093  $headlineCount = 0;
4094  $numVisible = 0;
4095 
4096  # Ugh .. the TOC should have neat indentation levels which can be
4097  # passed to the skin functions. These are determined here
4098  $toc = '';
4099  $full = '';
4100  $head = [];
4101  $sublevelCount = [];
4102  $levelCount = [];
4103  $level = 0;
4104  $prevlevel = 0;
4105  $toclevel = 0;
4106  $prevtoclevel = 0;
4107  $markerRegex = self::MARKER_PREFIX . "-h-(\d+)-" . self::MARKER_SUFFIX;
4108  $baseTitleText = $this->mTitle->getPrefixedDBkey();
4109  $oldType = $this->mOutputType;
4110  $this->setOutputType( self::OT_WIKI );
4111  $frame = $this->getPreprocessor()->newFrame();
4112  $root = $this->preprocessToDom( $origText );
4113  $node = $root->getFirstChild();
4114  $byteOffset = 0;
4115  $tocraw = [];
4116  $refers = [];
4117 
4118  $headlines = $numMatches !== false ? $matches[3] : [];
4119 
4120  foreach ( $headlines as $headline ) {
4121  $isTemplate = false;
4122  $titleText = false;
4123  $sectionIndex = false;
4124  $numbering = '';
4125  $markerMatches = [];
4126  if ( preg_match( "/^$markerRegex/", $headline, $markerMatches ) ) {
4127  $serial = $markerMatches[1];
4128  list( $titleText, $sectionIndex ) = $this->mHeadings[$serial];
4129  $isTemplate = ( $titleText != $baseTitleText );
4130  $headline = preg_replace( "/^$markerRegex\\s*/", "", $headline );
4131  }
4132 
4133  if ( $toclevel ) {
4134  $prevlevel = $level;
4135  }
4136  $level = $matches[1][$headlineCount];
4137 
4138  if ( $level > $prevlevel ) {
4139  # Increase TOC level
4140  $toclevel++;
4141  $sublevelCount[$toclevel] = 0;
4142  if ( $toclevel < $wgMaxTocLevel ) {
4143  $prevtoclevel = $toclevel;
4144  $toc .= Linker::tocIndent();
4145  $numVisible++;
4146  }
4147  } elseif ( $level < $prevlevel && $toclevel > 1 ) {
4148  # Decrease TOC level, find level to jump to
4149 
4150  for ( $i = $toclevel; $i > 0; $i-- ) {
4151  if ( $levelCount[$i] == $level ) {
4152  # Found last matching level
4153  $toclevel = $i;
4154  break;
4155  } elseif ( $levelCount[$i] < $level ) {
4156  # Found first matching level below current level
4157  $toclevel = $i + 1;
4158  break;
4159  }
4160  }
4161  if ( $i == 0 ) {
4162  $toclevel = 1;
4163  }
4164  if ( $toclevel < $wgMaxTocLevel ) {
4165  if ( $prevtoclevel < $wgMaxTocLevel ) {
4166  # Unindent only if the previous toc level was shown :p
4167  $toc .= Linker::tocUnindent( $prevtoclevel - $toclevel );
4168  $prevtoclevel = $toclevel;
4169  } else {
4170  $toc .= Linker::tocLineEnd();
4171  }
4172  }
4173  } else {
4174  # No change in level, end TOC line
4175  if ( $toclevel < $wgMaxTocLevel ) {
4176  $toc .= Linker::tocLineEnd();
4177  }
4178  }
4179 
4180  $levelCount[$toclevel] = $level;
4181 
4182  # count number of headlines for each level
4183  $sublevelCount[$toclevel]++;
4184  $dot = 0;
4185  for ( $i = 1; $i <= $toclevel; $i++ ) {
4186  if ( !empty( $sublevelCount[$i] ) ) {
4187  if ( $dot ) {
4188  $numbering .= '.';
4189  }
4190  $numbering .= $this->getTargetLanguage()->formatNum( $sublevelCount[$i] );
4191  $dot = 1;
4192  }
4193  }
4194 
4195  # The safe header is a version of the header text safe to use for links
4196 
4197  # Remove link placeholders by the link text.
4198  # <!--LINK number-->
4199  # turns into
4200  # link text with suffix
4201  # Do this before unstrip since link text can contain strip markers
4202  $safeHeadline = $this->replaceLinkHoldersText( $headline );
4203 
4204  # Avoid insertion of weird stuff like <math> by expanding the relevant sections
4205  $safeHeadline = $this->mStripState->unstripBoth( $safeHeadline );
4206 
4207  # Strip out HTML (first regex removes any tag not allowed)
4208  # Allowed tags are:
4209  # * <sup> and <sub> (T10393)
4210  # * <i> (T28375)
4211  # * <b> (r105284)
4212  # * <bdi> (T74884)
4213  # * <span dir="rtl"> and <span dir="ltr"> (T37167)
4214  # * <s> and <strike> (T35715)
4215  # We strip any parameter from accepted tags (second regex), except dir="rtl|ltr" from <span>,
4216  # to allow setting directionality in toc items.
4217  $tocline = preg_replace(
4218  [
4219  '#<(?!/?(span|sup|sub|bdi|i|b|s|strike)(?: [^>]*)?>).*?>#',
4220  '#<(/?(?:span(?: dir="(?:rtl|ltr)")?|sup|sub|bdi|i|b|s|strike))(?: .*?)?>#'
4221  ],
4222  [ '', '<$1>' ],
4223  $safeHeadline
4224  );
4225 
4226  # Strip '<span></span>', which is the result from the above if
4227  # <span id="foo"></span> is used to produce an additional anchor
4228  # for a section.
4229  $tocline = str_replace( '<span></span>', '', $tocline );
4230 
4231  $tocline = trim( $tocline );
4232 
4233  # For the anchor, strip out HTML-y stuff period
4234  $safeHeadline = preg_replace( '/<.*?>/', '', $safeHeadline );
4235  $safeHeadline = Sanitizer::normalizeSectionNameWhitespace( $safeHeadline );
4236 
4237  # Save headline for section edit hint before it's escaped
4238  $headlineHint = $safeHeadline;
4239 
4240  if ( $wgExperimentalHtmlIds ) {
4241  # For reverse compatibility, provide an id that's
4242  # HTML4-compatible, like we used to.
4243  # It may be worth noting, academically, that it's possible for
4244  # the legacy anchor to conflict with a non-legacy headline
4245  # anchor on the page. In this case likely the "correct" thing
4246  # would be to either drop the legacy anchors or make sure
4247  # they're numbered first. However, this would require people
4248  # to type in section names like "abc_.D7.93.D7.90.D7.A4"
4249  # manually, so let's not bother worrying about it.
4250  $legacyHeadline = Sanitizer::escapeId( $safeHeadline,
4251  [ 'noninitial', 'legacy' ] );
4252  $safeHeadline = Sanitizer::escapeId( $safeHeadline );
4253 
4254  if ( $legacyHeadline == $safeHeadline ) {
4255  # No reason to have both (in fact, we can't)
4256  $legacyHeadline = false;
4257  }
4258  } else {
4259  $legacyHeadline = false;
4260  $safeHeadline = Sanitizer::escapeId( $safeHeadline,
4261  'noninitial' );
4262  }
4263 
4264  # HTML names must be case-insensitively unique (T12721).
4265  # This does not apply to Unicode characters per
4266  # https://www.w3.org/TR/html5/infrastructure.html#case-sensitivity-and-string-comparison
4267  # @todo FIXME: We may be changing them depending on the current locale.
4268  $arrayKey = strtolower( $safeHeadline );
4269  if ( $legacyHeadline === false ) {
4270  $legacyArrayKey = false;
4271  } else {
4272  $legacyArrayKey = strtolower( $legacyHeadline );
4273  }
4274 
4275  # Create the anchor for linking from the TOC to the section
4276  $anchor = $safeHeadline;
4277  $legacyAnchor = $legacyHeadline;
4278  if ( isset( $refers[$arrayKey] ) ) {
4279  // @codingStandardsIgnoreStart
4280  for ( $i = 2; isset( $refers["${arrayKey}_$i"] ); ++$i );
4281  // @codingStandardsIgnoreEnd
4282  $anchor .= "_$i";
4283  $refers["${arrayKey}_$i"] = true;
4284  } else {
4285  $refers[$arrayKey] = true;
4286  }
4287  if ( $legacyHeadline !== false && isset( $refers[$legacyArrayKey] ) ) {
4288  // @codingStandardsIgnoreStart
4289  for ( $i = 2; isset( $refers["${legacyArrayKey}_$i"] ); ++$i );
4290  // @codingStandardsIgnoreEnd
4291  $legacyAnchor .= "_$i";
4292  $refers["${legacyArrayKey}_$i"] = true;
4293  } else {
4294  $refers[$legacyArrayKey] = true;
4295  }
4296 
4297  # Don't number the heading if it is the only one (looks silly)
4298  if ( count( $matches[3] ) > 1 && $this->mOptions->getNumberHeadings() ) {
4299  # the two are different if the line contains a link
4300  $headline = Html::element(
4301  'span',
4302  [ 'class' => 'mw-headline-number' ],
4303  $numbering
4304  ) . ' ' . $headline;
4305  }
4306 
4307  if ( $enoughToc && ( !isset( $wgMaxTocLevel ) || $toclevel < $wgMaxTocLevel ) ) {
4308  $toc .= Linker::tocLine( $anchor, $tocline,
4309  $numbering, $toclevel, ( $isTemplate ? false : $sectionIndex ) );
4310  }
4311 
4312  # Add the section to the section tree
4313  # Find the DOM node for this header
4314  $noOffset = ( $isTemplate || $sectionIndex === false );
4315  while ( $node && !$noOffset ) {
4316  if ( $node->getName() === 'h' ) {
4317  $bits = $node->splitHeading();
4318  if ( $bits['i'] == $sectionIndex ) {
4319  break;
4320  }
4321  }
4322  $byteOffset += mb_strlen( $this->mStripState->unstripBoth(
4323  $frame->expand( $node, PPFrame::RECOVER_ORIG ) ) );
4324  $node = $node->getNextSibling();
4325  }
4326  $tocraw[] = [
4327  'toclevel' => $toclevel,
4328  'level' => $level,
4329  'line' => $tocline,
4330  'number' => $numbering,
4331  'index' => ( $isTemplate ? 'T-' : '' ) . $sectionIndex,
4332  'fromtitle' => $titleText,
4333  'byteoffset' => ( $noOffset ? null : $byteOffset ),
4334  'anchor' => $anchor,
4335  ];
4336 
4337  # give headline the correct <h#> tag
4338  if ( $maybeShowEditLink && $sectionIndex !== false ) {
4339  // Output edit section links as markers with styles that can be customized by skins
4340  if ( $isTemplate ) {
4341  # Put a T flag in the section identifier, to indicate to extractSections()
4342  # that sections inside <includeonly> should be counted.
4343  $editsectionPage = $titleText;
4344  $editsectionSection = "T-$sectionIndex";
4345  $editsectionContent = null;
4346  } else {
4347  $editsectionPage = $this->mTitle->getPrefixedText();
4348  $editsectionSection = $sectionIndex;
4349  $editsectionContent = $headlineHint;
4350  }
4351  // We use a bit of pesudo-xml for editsection markers. The
4352  // language converter is run later on. Using a UNIQ style marker
4353  // leads to the converter screwing up the tokens when it
4354  // converts stuff. And trying to insert strip tags fails too. At
4355  // this point all real inputted tags have already been escaped,
4356  // so we don't have to worry about a user trying to input one of
4357  // these markers directly. We use a page and section attribute
4358  // to stop the language converter from converting these
4359  // important bits of data, but put the headline hint inside a
4360  // content block because the language converter is supposed to
4361  // be able to convert that piece of data.
4362  // Gets replaced with html in ParserOutput::getText
4363  $editlink = '<mw:editsection page="' . htmlspecialchars( $editsectionPage );
4364  $editlink .= '" section="' . htmlspecialchars( $editsectionSection ) . '"';
4365  if ( $editsectionContent !== null ) {
4366  $editlink .= '>' . $editsectionContent . '</mw:editsection>';
4367  } else {
4368  $editlink .= '/>';
4369  }
4370  } else {
4371  $editlink = '';
4372  }
4373  $head[$headlineCount] = Linker::makeHeadline( $level,
4374  $matches['attrib'][$headlineCount], $anchor, $headline,
4375  $editlink, $legacyAnchor );
4376 
4377  $headlineCount++;
4378  }
4379 
4380  $this->setOutputType( $oldType );
4381 
4382  # Never ever show TOC if no headers
4383  if ( $numVisible < 1 ) {
4384  $enoughToc = false;
4385  }
4386 
4387  if ( $enoughToc ) {
4388  if ( $prevtoclevel > 0 && $prevtoclevel < $wgMaxTocLevel ) {
4389  $toc .= Linker::tocUnindent( $prevtoclevel - 1 );
4390  }
4391  $toc = Linker::tocList( $toc, $this->mOptions->getUserLangObj() );
4392  $this->mOutput->setTOCHTML( $toc );
4393  $toc = self::TOC_START . $toc . self::TOC_END;
4394  $this->mOutput->addModules( 'mediawiki.toc' );
4395  }
4396 
4397  if ( $isMain ) {
4398  $this->mOutput->setSections( $tocraw );
4399  }
4400 
4401  # split up and insert constructed headlines
4402  $blocks = preg_split( '/<H[1-6].*?>[\s\S]*?<\/H[1-6]>/i', $text );
4403  $i = 0;
4404 
4405  // build an array of document sections
4406  $sections = [];
4407  foreach ( $blocks as $block ) {
4408  // $head is zero-based, sections aren't.
4409  if ( empty( $head[$i - 1] ) ) {
4410  $sections[$i] = $block;
4411  } else {
4412  $sections[$i] = $head[$i - 1] . $block;
4413  }
4414 
4425  Hooks::run( 'ParserSectionCreate', [ $this, $i, &$sections[$i], $showEditLink ] );
4426 
4427  $i++;
4428  }
4429 
4430  if ( $enoughToc && $isMain && !$this->mForceTocPosition ) {
4431  // append the TOC at the beginning
4432  // Top anchor now in skin
4433  $sections[0] = $sections[0] . $toc . "\n";
4434  }
4435 
4436  $full .= implode( '', $sections );
4437 
4438  if ( $this->mForceTocPosition ) {
4439  return str_replace( '<!--MWTOC-->', $toc, $full );
4440  } else {
4441  return $full;
4442  }
4443  }
4444 
4456  public function preSaveTransform( $text, Title $title, User $user,
4457  ParserOptions $options, $clearState = true
4458  ) {
4459  if ( $clearState ) {
4460  $magicScopeVariable = $this->lock();
4461  }
4462  $this->startParse( $title, $options, self::OT_WIKI, $clearState );
4463  $this->setUser( $user );
4464 
4465  // We still normalize line endings for backwards-compatibility
4466  // with other code that just calls PST, but this should already
4467  // be handled in TextContent subclasses
4468  $text = TextContent::normalizeLineEndings( $text );
4469 
4470  if ( $options->getPreSaveTransform() ) {
4471  $text = $this->pstPass2( $text, $user );
4472  }
4473  $text = $this->mStripState->unstripBoth( $text );
4474 
4475  $this->setUser( null ); # Reset
4476 
4477  return $text;
4478  }
4479 
4488  private function pstPass2( $text, $user ) {
4490 
4491  # Note: This is the timestamp saved as hardcoded wikitext to
4492  # the database, we use $wgContLang here in order to give
4493  # everyone the same signature and use the default one rather
4494  # than the one selected in each user's preferences.
4495  # (see also T14815)
4496  $ts = $this->mOptions->getTimestamp();
4497  $timestamp = MWTimestamp::getLocalInstance( $ts );
4498  $ts = $timestamp->format( 'YmdHis' );
4499  $tzMsg = $timestamp->getTimezoneMessage()->inContentLanguage()->text();
4500 
4501  $d = $wgContLang->timeanddate( $ts, false, false ) . " ($tzMsg)";
4502 
4503  # Variable replacement
4504  # Because mOutputType is OT_WIKI, this will only process {{subst:xxx}} type tags
4505  $text = $this->replaceVariables( $text );
4506 
4507  # This works almost by chance, as the replaceVariables are done before the getUserSig(),
4508  # which may corrupt this parser instance via its wfMessage()->text() call-
4509 
4510  # Signatures
4511  $sigText = $this->getUserSig( $user );
4512  $text = strtr( $text, [
4513  '~~~~~' => $d,
4514  '~~~~' => "$sigText $d",
4515  '~~~' => $sigText
4516  ] );
4517 
4518  # Context links ("pipe tricks"): [[|name]] and [[name (context)|]]
4519  $tc = '[' . Title::legalChars() . ']';
4520  $nc = '[ _0-9A-Za-z\x80-\xff-]'; # Namespaces can use non-ascii!
4521 
4522  // [[ns:page (context)|]]
4523  $p1 = "/\[\[(:?$nc+:|:|)($tc+?)( ?\\($tc+\\))\\|]]/";
4524  // [[ns:page(context)|]] (double-width brackets, added in r40257)
4525  $p4 = "/\[\[(:?$nc+:|:|)($tc+?)( ?($tc+))\\|]]/";
4526  // [[ns:page (context), context|]] (using either single or double-width comma)
4527  $p3 = "/\[\[(:?$nc+:|:|)($tc+?)( ?\\($tc+\\)|)((?:, |,)$tc+|)\\|]]/";
4528  // [[|page]] (reverse pipe trick: add context from page title)
4529  $p2 = "/\[\[\\|($tc+)]]/";
4530 
4531  # try $p1 first, to turn "[[A, B (C)|]]" into "[[A, B (C)|A, B]]"
4532  $text = preg_replace( $p1, '[[\\1\\2\\3|\\2]]', $text );
4533  $text = preg_replace( $p4, '[[\\1\\2\\3|\\2]]', $text );
4534  $text = preg_replace( $p3, '[[\\1\\2\\3\\4|\\2]]', $text );
4535 
4536  $t = $this->mTitle->getText();
4537  $m = [];
4538  if ( preg_match( "/^($nc+:|)$tc+?( \\($tc+\\))$/", $t, $m ) ) {
4539  $text = preg_replace( $p2, "[[$m[1]\\1$m[2]|\\1]]", $text );
4540  } elseif ( preg_match( "/^($nc+:|)$tc+?(, $tc+|)$/", $t, $m ) && "$m[1]$m[2]" != '' ) {
4541  $text = preg_replace( $p2, "[[$m[1]\\1$m[2]|\\1]]", $text );
4542  } else {
4543  # if there's no context, don't bother duplicating the title
4544  $text = preg_replace( $p2, '[[\\1]]', $text );
4545  }
4546 
4547  return $text;
4548  }
4549 
4564  public function getUserSig( &$user, $nickname = false, $fancySig = null ) {
4565  global $wgMaxSigChars;
4566 
4567  $username = $user->getName();
4568 
4569  # If not given, retrieve from the user object.
4570  if ( $nickname === false ) {
4571  $nickname = $user->getOption( 'nickname' );
4572  }
4573 
4574  if ( is_null( $fancySig ) ) {
4575  $fancySig = $user->getBoolOption( 'fancysig' );
4576  }
4577 
4578  $nickname = $nickname == null ? $username : $nickname;
4579 
4580  if ( mb_strlen( $nickname ) > $wgMaxSigChars ) {
4581  $nickname = $username;
4582  wfDebug( __METHOD__ . ": $username has overlong signature.\n" );
4583  } elseif ( $fancySig !== false ) {
4584  # Sig. might contain markup; validate this
4585  if ( $this->validateSig( $nickname ) !== false ) {
4586  # Validated; clean up (if needed) and return it
4587  return $this->cleanSig( $nickname, true );
4588  } else {
4589  # Failed to validate; fall back to the default
4590  $nickname = $username;
4591  wfDebug( __METHOD__ . ": $username has bad XML tags in signature.\n" );
4592  }
4593  }
4594 
4595  # Make sure nickname doesnt get a sig in a sig
4596  $nickname = self::cleanSigInSig( $nickname );
4597 
4598  # If we're still here, make it a link to the user page
4599  $userText = wfEscapeWikiText( $username );
4600  $nickText = wfEscapeWikiText( $nickname );
4601  $msgName = $user->isAnon() ? 'signature-anon' : 'signature';
4602 
4603  return wfMessage( $msgName, $userText, $nickText )->inContentLanguage()
4604  ->title( $this->getTitle() )->text();
4605  }
4606 
4613  public function validateSig( $text ) {
4614  return Xml::isWellFormedXmlFragment( $text ) ? $text : false;
4615  }
4616 
4627  public function cleanSig( $text, $parsing = false ) {
4628  if ( !$parsing ) {
4629  global $wgTitle;
4630  $magicScopeVariable = $this->lock();
4631  $this->startParse( $wgTitle, new ParserOptions, self::OT_PREPROCESS, true );
4632  }
4633 
4634  # Option to disable this feature
4635  if ( !$this->mOptions->getCleanSignatures() ) {
4636  return $text;
4637  }
4638 
4639  # @todo FIXME: Regex doesn't respect extension tags or nowiki
4640  # => Move this logic to braceSubstitution()
4641  $substWord = MagicWord::get( 'subst' );
4642  $substRegex = '/\{\{(?!(?:' . $substWord->getBaseRegex() . '))/x' . $substWord->getRegexCase();
4643  $substText = '{{' . $substWord->getSynonym( 0 );
4644 
4645  $text = preg_replace( $substRegex, $substText, $text );
4646  $text = self::cleanSigInSig( $text );
4647  $dom = $this->preprocessToDom( $text );
4648  $frame = $this->getPreprocessor()->newFrame();
4649  $text = $frame->expand( $dom );
4650 
4651  if ( !$parsing ) {
4652  $text = $this->mStripState->unstripBoth( $text );
4653  }
4654 
4655  return $text;
4656  }
4657 
4664  public static function cleanSigInSig( $text ) {
4665  $text = preg_replace( '/~{3,5}/', '', $text );
4666  return $text;
4667  }
4668 
4679  $outputType, $clearState = true
4680  ) {
4681  $this->startParse( $title, $options, $outputType, $clearState );
4682  }
4683 
4690  private function startParse( Title $title = null, ParserOptions $options,
4691  $outputType, $clearState = true
4692  ) {
4693  $this->setTitle( $title );
4694  $this->mOptions = $options;
4695  $this->setOutputType( $outputType );
4696  if ( $clearState ) {
4697  $this->clearState();
4698  }
4699  }
4700 
4709  public function transformMsg( $text, $options, $title = null ) {
4710  static $executing = false;
4711 
4712  # Guard against infinite recursion
4713  if ( $executing ) {
4714  return $text;
4715  }
4716  $executing = true;
4717 
4718  if ( !$title ) {
4719  global $wgTitle;
4720  $title = $wgTitle;
4721  }
4722 
4723  $text = $this->preprocess( $text, $title, $options );
4724 
4725  $executing = false;
4726  return $text;
4727  }
4728 
4753  public function setHook( $tag, $callback ) {
4754  $tag = strtolower( $tag );
4755  if ( preg_match( '/[<>\r\n]/', $tag, $m ) ) {
4756  throw new MWException( "Invalid character {$m[0]} in setHook('$tag', ...) call" );
4757  }
4758  $oldVal = isset( $this->mTagHooks[$tag] ) ? $this->mTagHooks[$tag] : null;
4759  $this->mTagHooks[$tag] = $callback;
4760  if ( !in_array( $tag, $this->mStripList ) ) {
4761  $this->mStripList[] = $tag;
4762  }
4763 
4764  return $oldVal;
4765  }
4766 
4784  public function setTransparentTagHook( $tag, $callback ) {
4785  $tag = strtolower( $tag );
4786  if ( preg_match( '/[<>\r\n]/', $tag, $m ) ) {
4787  throw new MWException( "Invalid character {$m[0]} in setTransparentHook('$tag', ...) call" );
4788  }
4789  $oldVal = isset( $this->mTransparentTagHooks[$tag] ) ? $this->mTransparentTagHooks[$tag] : null;
4790  $this->mTransparentTagHooks[$tag] = $callback;
4791 
4792  return $oldVal;
4793  }
4794 
4798  public function clearTagHooks() {
4799  $this->mTagHooks = [];
4800  $this->mFunctionTagHooks = [];
4801  $this->mStripList = $this->mDefaultStripList;
4802  }
4803 
4847  public function setFunctionHook( $id, $callback, $flags = 0 ) {
4849 
4850  $oldVal = isset( $this->mFunctionHooks[$id] ) ? $this->mFunctionHooks[$id][0] : null;
4851  $this->mFunctionHooks[$id] = [ $callback, $flags ];
4852 
4853  # Add to function cache
4854  $mw = MagicWord::get( $id );
4855  if ( !$mw ) {
4856  throw new MWException( __METHOD__ . '() expecting a magic word identifier.' );
4857  }
4858 
4859  $synonyms = $mw->getSynonyms();
4860  $sensitive = intval( $mw->isCaseSensitive() );
4861 
4862  foreach ( $synonyms as $syn ) {
4863  # Case
4864  if ( !$sensitive ) {
4865  $syn = $wgContLang->lc( $syn );
4866  }
4867  # Add leading hash
4868  if ( !( $flags & self::SFH_NO_HASH ) ) {
4869  $syn = '#' . $syn;
4870  }
4871  # Remove trailing colon
4872  if ( substr( $syn, -1, 1 ) === ':' ) {
4873  $syn = substr( $syn, 0, -1 );
4874  }
4875  $this->mFunctionSynonyms[$sensitive][$syn] = $id;
4876  }
4877  return $oldVal;
4878  }
4879 
4885  public function getFunctionHooks() {
4886  return array_keys( $this->mFunctionHooks );
4887  }
4888 
4899  public function setFunctionTagHook( $tag, $callback, $flags ) {
4900  $tag = strtolower( $tag );
4901  if ( preg_match( '/[<>\r\n]/', $tag, $m ) ) {
4902  throw new MWException( "Invalid character {$m[0]} in setFunctionTagHook('$tag', ...) call" );
4903  }
4904  $old = isset( $this->mFunctionTagHooks[$tag] ) ?
4905  $this->mFunctionTagHooks[$tag] : null;
4906  $this->mFunctionTagHooks[$tag] = [ $callback, $flags ];
4907 
4908  if ( !in_array( $tag, $this->mStripList ) ) {
4909  $this->mStripList[] = $tag;
4910  }
4911 
4912  return $old;
4913  }
4914 
4922  public function replaceLinkHolders( &$text, $options = 0 ) {
4923  $this->mLinkHolders->replace( $text );
4924  }
4925 
4933  public function replaceLinkHoldersText( $text ) {
4934  return $this->mLinkHolders->replaceText( $text );
4935  }
4936 
4950  public function renderImageGallery( $text, $params ) {
4951 
4952  $mode = false;
4953  if ( isset( $params['mode'] ) ) {
4954  $mode = $params['mode'];
4955  }
4956 
4957  try {
4958  $ig = ImageGalleryBase::factory( $mode );
4959  } catch ( Exception $e ) {
4960  // If invalid type set, fallback to default.
4961  $ig = ImageGalleryBase::factory( false );
4962  }
4963 
4964  $ig->setContextTitle( $this->mTitle );
4965  $ig->setShowBytes( false );
4966  $ig->setShowFilename( false );
4967  $ig->setParser( $this );
4968  $ig->setHideBadImages();
4969  $ig->setAttributes( Sanitizer::validateTagAttributes( $params, 'table' ) );
4970 
4971  if ( isset( $params['showfilename'] ) ) {
4972  $ig->setShowFilename( true );
4973  } else {
4974  $ig->setShowFilename( false );
4975  }
4976  if ( isset( $params['caption'] ) ) {
4977  $caption = $params['caption'];
4978  $caption = htmlspecialchars( $caption );
4979  $caption = $this->replaceInternalLinks( $caption );
4980  $ig->setCaptionHtml( $caption );
4981  }
4982  if ( isset( $params['perrow'] ) ) {
4983  $ig->setPerRow( $params['perrow'] );
4984  }
4985  if ( isset( $params['widths'] ) ) {
4986  $ig->setWidths( $params['widths'] );
4987  }
4988  if ( isset( $params['heights'] ) ) {
4989  $ig->setHeights( $params['heights'] );
4990  }
4991  $ig->setAdditionalOptions( $params );
4992 
4993  // Avoid PHP 7.1 warning from passing $this by reference
4994  $parser = $this;
4995  Hooks::run( 'BeforeParserrenderImageGallery', [ &$parser, &$ig ] );
4996 
4997  $lines = StringUtils::explode( "\n", $text );
4998  foreach ( $lines as $line ) {
4999  # match lines like these:
5000  # Image:someimage.jpg|This is some image
5001  $matches = [];
5002  preg_match( "/^([^|]+)(\\|(.*))?$/", $line, $matches );
5003  # Skip empty lines
5004  if ( count( $matches ) == 0 ) {
5005  continue;
5006  }
5007 
5008  if ( strpos( $matches[0], '%' ) !== false ) {
5009  $matches[1] = rawurldecode( $matches[1] );
5010  }
5012  if ( is_null( $title ) ) {
5013  # Bogus title. Ignore these so we don't bomb out later.
5014  continue;
5015  }
5016 
5017  # We need to get what handler the file uses, to figure out parameters.
5018  # Note, a hook can overide the file name, and chose an entirely different
5019  # file (which potentially could be of a different type and have different handler).
5020  $options = [];
5021  $descQuery = false;
5022  Hooks::run( 'BeforeParserFetchFileAndTitle',
5023  [ $this, $title, &$options, &$descQuery ] );
5024  # Don't register it now, as TraditionalImageGallery does that later.
5025  $file = $this->fetchFileNoRegister( $title, $options );
5026  $handler = $file ? $file->getHandler() : false;
5027 
5028  $paramMap = [
5029  'img_alt' => 'gallery-internal-alt',
5030  'img_link' => 'gallery-internal-link',
5031  ];
5032  if ( $handler ) {
5033  $paramMap = $paramMap + $handler->getParamMap();
5034  // We don't want people to specify per-image widths.
5035  // Additionally the width parameter would need special casing anyhow.
5036  unset( $paramMap['img_width'] );
5037  }
5038 
5039  $mwArray = new MagicWordArray( array_keys( $paramMap ) );
5040 
5041  $label = '';
5042  $alt = '';
5043  $link = '';
5044  $handlerOptions = [];
5045  if ( isset( $matches[3] ) ) {
5046  // look for an |alt= definition while trying not to break existing
5047  // captions with multiple pipes (|) in it, until a more sensible grammar
5048  // is defined for images in galleries
5049 
5050  // FIXME: Doing recursiveTagParse at this stage, and the trim before
5051  // splitting on '|' is a bit odd, and different from makeImage.
5052  $matches[3] = $this->recursiveTagParse( trim( $matches[3] ) );
5053  // Protect LanguageConverter markup
5054  $parameterMatches = StringUtils::delimiterExplode(
5055  '-{', '}-', '|', $matches[3], true /* nested */
5056  );
5057 
5058  foreach ( $parameterMatches as $parameterMatch ) {
5059  list( $magicName, $match ) = $mwArray->matchVariableStartToEnd( $parameterMatch );
5060  if ( $magicName ) {
5061  $paramName = $paramMap[$magicName];
5062 
5063  switch ( $paramName ) {
5064  case 'gallery-internal-alt':
5065  $alt = $this->stripAltText( $match, false );
5066  break;
5067  case 'gallery-internal-link':
5068  $linkValue = strip_tags( $this->replaceLinkHoldersText( $match ) );
5069  $chars = self::EXT_LINK_URL_CLASS;
5070  $addr = self::EXT_LINK_ADDR;
5071  $prots = $this->mUrlProtocols;
5072  // check to see if link matches an absolute url, if not then it must be a wiki link.
5073  if ( preg_match( '/^-{R|(.*)}-$/', $linkValue ) ) {
5074  // Result of LanguageConverter::markNoConversion
5075  // invoked on an external link.
5076  $linkValue = substr( $linkValue, 4, -2 );
5077  }
5078  if ( preg_match( "/^($prots)$addr$chars*$/u", $linkValue ) ) {
5079  $link = $linkValue;
5080  } else {
5081  $localLinkTitle = Title::newFromText( $linkValue );
5082  if ( $localLinkTitle !== null ) {
5083  $link = $localLinkTitle->getLinkURL();
5084  }
5085  }
5086  break;
5087  default:
5088  // Must be a handler specific parameter.
5089  if ( $handler->validateParam( $paramName, $match ) ) {
5090  $handlerOptions[$paramName] = $match;
5091  } else {
5092  // Guess not, consider it as caption.
5093  wfDebug( "$parameterMatch failed parameter validation\n" );
5094  $label = '|' . $parameterMatch;
5095  }
5096  }
5097 
5098  } else {
5099  // Last pipe wins.
5100  $label = '|' . $parameterMatch;
5101  }
5102  }
5103  // Remove the pipe.
5104  $label = substr( $label, 1 );
5105  }
5106 
5107  $ig->add( $title, $label, $alt, $link, $handlerOptions );
5108  }
5109  $html = $ig->toHTML();
5110  Hooks::run( 'AfterParserFetchFileAndTitle', [ $this, $ig, &$html ] );
5111  return $html;
5112  }
5113 
5118  public function getImageParams( $handler ) {
5119  if ( $handler ) {
5120  $handlerClass = get_class( $handler );
5121  } else {
5122  $handlerClass = '';
5123  }
5124  if ( !isset( $this->mImageParams[$handlerClass] ) ) {
5125  # Initialise static lists
5126  static $internalParamNames = [
5127  'horizAlign' => [ 'left', 'right', 'center', 'none' ],
5128  'vertAlign' => [ 'baseline', 'sub', 'super', 'top', 'text-top', 'middle',
5129  'bottom', 'text-bottom' ],
5130  'frame' => [ 'thumbnail', 'manualthumb', 'framed', 'frameless',
5131  'upright', 'border', 'link', 'alt', 'class' ],
5132  ];
5133  static $internalParamMap;
5134  if ( !$internalParamMap ) {
5135  $internalParamMap = [];
5136  foreach ( $internalParamNames as $type => $names ) {
5137  foreach ( $names as $name ) {
5138  $magicName = str_replace( '-', '_', "img_$name" );
5139  $internalParamMap[$magicName] = [ $type, $name ];
5140  }
5141  }
5142  }
5143 
5144  # Add handler params
5145  $paramMap = $internalParamMap;
5146  if ( $handler ) {
5147  $handlerParamMap = $handler->getParamMap();
5148  foreach ( $handlerParamMap as $magic => $paramName ) {
5149  $paramMap[$magic] = [ 'handler', $paramName ];
5150  }
5151  }
5152  $this->mImageParams[$handlerClass] = $paramMap;
5153  $this->mImageParamsMagicArray[$handlerClass] = new MagicWordArray( array_keys( $paramMap ) );
5154  }
5155  return [ $this->mImageParams[$handlerClass], $this->mImageParamsMagicArray[$handlerClass] ];
5156  }
5157 
5166  public function makeImage( $title, $options, $holders = false ) {
5167  # Check if the options text is of the form "options|alt text"
5168  # Options are:
5169  # * thumbnail make a thumbnail with enlarge-icon and caption, alignment depends on lang
5170  # * left no resizing, just left align. label is used for alt= only
5171  # * right same, but right aligned
5172  # * none same, but not aligned
5173  # * ___px scale to ___ pixels width, no aligning. e.g. use in taxobox
5174  # * center center the image
5175  # * frame Keep original image size, no magnify-button.
5176  # * framed Same as "frame"
5177  # * frameless like 'thumb' but without a frame. Keeps user preferences for width
5178  # * upright reduce width for upright images, rounded to full __0 px
5179  # * border draw a 1px border around the image
5180  # * alt Text for HTML alt attribute (defaults to empty)
5181  # * class Set a class for img node
5182  # * link Set the target of the image link. Can be external, interwiki, or local
5183  # vertical-align values (no % or length right now):
5184  # * baseline
5185  # * sub
5186  # * super
5187  # * top
5188  # * text-top
5189  # * middle
5190  # * bottom
5191  # * text-bottom
5192 
5193  # Protect LanguageConverter markup when splitting into parts
5195  '-{', '}-', '|', $options, true /* allow nesting */
5196  );
5197 
5198  # Give extensions a chance to select the file revision for us
5199  $options = [];
5200  $descQuery = false;
5201  Hooks::run( 'BeforeParserFetchFileAndTitle',
5202  [ $this, $title, &$options, &$descQuery ] );
5203  # Fetch and register the file (file title may be different via hooks)
5204  list( $file, $title ) = $this->fetchFileAndTitle( $title, $options );
5205 
5206  # Get parameter map
5207  $handler = $file ? $file->getHandler() : false;
5208 
5209  list( $paramMap, $mwArray ) = $this->getImageParams( $handler );
5210 
5211  if ( !$file ) {
5212  $this->addTrackingCategory( 'broken-file-category' );
5213  }
5214 
5215  # Process the input parameters
5216  $caption = '';
5217  $params = [ 'frame' => [], 'handler' => [],
5218  'horizAlign' => [], 'vertAlign' => [] ];
5219  $seenformat = false;
5220  foreach ( $parts as $part ) {
5221  $part = trim( $part );
5222  list( $magicName, $value ) = $mwArray->matchVariableStartToEnd( $part );
5223  $validated = false;
5224  if ( isset( $paramMap[$magicName] ) ) {
5225  list( $type, $paramName ) = $paramMap[$magicName];
5226 
5227  # Special case; width and height come in one variable together
5228  if ( $type === 'handler' && $paramName === 'width' ) {
5229  $parsedWidthParam = $this->parseWidthParam( $value );
5230  if ( isset( $parsedWidthParam['width'] ) ) {
5231  $width = $parsedWidthParam['width'];
5232  if ( $handler->validateParam( 'width', $width ) ) {
5233  $params[$type]['width'] = $width;
5234  $validated = true;
5235  }
5236  }
5237  if ( isset( $parsedWidthParam['height'] ) ) {
5238  $height = $parsedWidthParam['height'];
5239  if ( $handler->validateParam( 'height', $height ) ) {
5240  $params[$type]['height'] = $height;
5241  $validated = true;
5242  }
5243  }
5244  # else no validation -- T15436
5245  } else {
5246  if ( $type === 'handler' ) {
5247  # Validate handler parameter
5248  $validated = $handler->validateParam( $paramName, $value );
5249  } else {
5250  # Validate internal parameters
5251  switch ( $paramName ) {
5252  case 'manualthumb':
5253  case 'alt':
5254  case 'class':
5255  # @todo FIXME: Possibly check validity here for
5256  # manualthumb? downstream behavior seems odd with
5257  # missing manual thumbs.
5258  $validated = true;
5259  $value = $this->stripAltText( $value, $holders );
5260  break;
5261  case 'link':
5262  $chars = self::EXT_LINK_URL_CLASS;
5263  $addr = self::EXT_LINK_ADDR;
5264  $prots = $this->mUrlProtocols;
5265  if ( $value === '' ) {
5266  $paramName = 'no-link';
5267  $value = true;
5268  $validated = true;
5269  } elseif ( preg_match( "/^((?i)$prots)/", $value ) ) {
5270  if ( preg_match( "/^((?i)$prots)$addr$chars*$/u", $value, $m ) ) {
5271  $paramName = 'link-url';
5272  $this->mOutput->addExternalLink( $value );
5273  if ( $this->mOptions->getExternalLinkTarget() ) {
5274  $params[$type]['link-target'] = $this->mOptions->getExternalLinkTarget();
5275  }
5276  $validated = true;
5277  }
5278  } else {
5279  $linkTitle = Title::newFromText( $value );
5280  if ( $linkTitle ) {
5281  $paramName = 'link-title';
5282  $value = $linkTitle;
5283  $this->mOutput->addLink( $linkTitle );
5284  $validated = true;
5285  }
5286  }
5287  break;
5288  case 'frameless':
5289  case 'framed':
5290  case 'thumbnail':
5291  // use first appearing option, discard others.
5292  $validated = !$seenformat;
5293  $seenformat = true;
5294  break;
5295  default:
5296  # Most other things appear to be empty or numeric...
5297  $validated = ( $value === false || is_numeric( trim( $value ) ) );
5298  }
5299  }
5300 
5301  if ( $validated ) {
5302  $params[$type][$paramName] = $value;
5303  }
5304  }
5305  }
5306  if ( !$validated ) {
5307  $caption = $part;
5308  }
5309  }
5310 
5311  # Process alignment parameters
5312  if ( $params['horizAlign'] ) {
5313  $params['frame']['align'] = key( $params['horizAlign'] );
5314  }
5315  if ( $params['vertAlign'] ) {
5316  $params['frame']['valign'] = key( $params['vertAlign'] );
5317  }
5318 
5319  $params['frame']['caption'] = $caption;
5320 
5321  # Will the image be presented in a frame, with the caption below?
5322  $imageIsFramed = isset( $params['frame']['frame'] )
5323  || isset( $params['frame']['framed'] )
5324  || isset( $params['frame']['thumbnail'] )
5325  || isset( $params['frame']['manualthumb'] );
5326 
5327  # In the old days, [[Image:Foo|text...]] would set alt text. Later it
5328  # came to also set the caption, ordinary text after the image -- which
5329  # makes no sense, because that just repeats the text multiple times in
5330  # screen readers. It *also* came to set the title attribute.
5331  # Now that we have an alt attribute, we should not set the alt text to
5332  # equal the caption: that's worse than useless, it just repeats the
5333  # text. This is the framed/thumbnail case. If there's no caption, we
5334  # use the unnamed parameter for alt text as well, just for the time be-
5335  # ing, if the unnamed param is set and the alt param is not.
5336  # For the future, we need to figure out if we want to tweak this more,
5337  # e.g., introducing a title= parameter for the title; ignoring the un-
5338  # named parameter entirely for images without a caption; adding an ex-
5339  # plicit caption= parameter and preserving the old magic unnamed para-
5340  # meter for BC; ...
5341  if ( $imageIsFramed ) { # Framed image
5342  if ( $caption === '' && !isset( $params['frame']['alt'] ) ) {
5343  # No caption or alt text, add the filename as the alt text so
5344  # that screen readers at least get some description of the image
5345  $params['frame']['alt'] = $title->getText();
5346  }
5347  # Do not set $params['frame']['title'] because tooltips don't make sense
5348  # for framed images
5349  } else { # Inline image
5350  if ( !isset( $params['frame']['alt'] ) ) {
5351  # No alt text, use the "caption" for the alt text
5352  if ( $caption !== '' ) {
5353  $params['frame']['alt'] = $this->stripAltText( $caption, $holders );
5354  } else {
5355  # No caption, fall back to using the filename for the
5356  # alt text
5357  $params['frame']['alt'] = $title->getText();
5358  }
5359  }
5360  # Use the "caption" for the tooltip text
5361  $params['frame']['title'] = $this->stripAltText( $caption, $holders );
5362  }
5363 
5364  Hooks::run( 'ParserMakeImageParams', [ $title, $file, &$params, $this ] );
5365 
5366  # Linker does the rest
5367  $time = isset( $options['time'] ) ? $options['time'] : false;
5368  $ret = Linker::makeImageLink( $this, $title, $file, $params['frame'], $params['handler'],
5369  $time, $descQuery, $this->mOptions->getThumbSize() );
5370 
5371  # Give the handler a chance to modify the parser object
5372  if ( $handler ) {
5373  $handler->parserTransformHook( $this, $file );
5374  }
5375 
5376  return $ret;
5377  }
5378 
5384  protected function stripAltText( $caption, $holders ) {
5385  # Strip bad stuff out of the title (tooltip). We can't just use
5386  # replaceLinkHoldersText() here, because if this function is called
5387  # from replaceInternalLinks2(), mLinkHolders won't be up-to-date.
5388  if ( $holders ) {
5389  $tooltip = $holders->replaceText( $caption );
5390  } else {
5391  $tooltip = $this->replaceLinkHoldersText( $caption );
5392  }
5393 
5394  # make sure there are no placeholders in thumbnail attributes
5395  # that are later expanded to html- so expand them now and
5396  # remove the tags
5397  $tooltip = $this->mStripState->unstripBoth( $tooltip );
5398  $tooltip = Sanitizer::stripAllTags( $tooltip );
5399 
5400  return $tooltip;
5401  }
5402 
5408  public function disableCache() {
5409  wfDebug( "Parser output marked as uncacheable.\n" );
5410  if ( !$this->mOutput ) {
5411  throw new MWException( __METHOD__ .
5412  " can only be called when actually parsing something" );
5413  }
5414  $this->mOutput->updateCacheExpiry( 0 ); // new style, for consistency
5415  }
5416 
5425  public function attributeStripCallback( &$text, $frame = false ) {
5426  $text = $this->replaceVariables( $text, $frame );
5427  $text = $this->mStripState->unstripBoth( $text );
5428  return $text;
5429  }
5430 
5436  public function getTags() {
5437  return array_merge(
5438  array_keys( $this->mTransparentTagHooks ),
5439  array_keys( $this->mTagHooks ),
5440  array_keys( $this->mFunctionTagHooks )
5441  );
5442  }
5443 
5454  public function replaceTransparentTags( $text ) {
5455  $matches = [];
5456  $elements = array_keys( $this->mTransparentTagHooks );
5457  $text = self::extractTagsAndParams( $elements, $text, $matches );
5458  $replacements = [];
5459 
5460  foreach ( $matches as $marker => $data ) {
5461  list( $element, $content, $params, $tag ) = $data;
5462  $tagName = strtolower( $element );
5463  if ( isset( $this->mTransparentTagHooks[$tagName] ) ) {
5464  $output = call_user_func_array(
5465  $this->mTransparentTagHooks[$tagName],
5466  [ $content, $params, $this ]
5467  );
5468  } else {
5469  $output = $tag;
5470  }
5471  $replacements[$marker] = $output;
5472  }
5473  return strtr( $text, $replacements );
5474  }
5475 
5505  private function extractSections( $text, $sectionId, $mode, $newText = '' ) {
5506  global $wgTitle; # not generally used but removes an ugly failure mode
5507 
5508  $magicScopeVariable = $this->lock();
5509  $this->startParse( $wgTitle, new ParserOptions, self::OT_PLAIN, true );
5510  $outText = '';
5511  $frame = $this->getPreprocessor()->newFrame();
5512 
5513  # Process section extraction flags
5514  $flags = 0;
5515  $sectionParts = explode( '-', $sectionId );
5516  $sectionIndex = array_pop( $sectionParts );
5517  foreach ( $sectionParts as $part ) {
5518  if ( $part === 'T' ) {
5519  $flags |= self::PTD_FOR_INCLUSION;
5520  }
5521  }
5522 
5523  # Check for empty input
5524  if ( strval( $text ) === '' ) {
5525  # Only sections 0 and T-0 exist in an empty document
5526  if ( $sectionIndex == 0 ) {
5527  if ( $mode === 'get' ) {
5528  return '';
5529  } else {
5530  return $newText;
5531  }
5532  } else {
5533  if ( $mode === 'get' ) {
5534  return $newText;
5535  } else {
5536  return $text;
5537  }
5538  }
5539  }
5540 
5541  # Preprocess the text
5542  $root = $this->preprocessToDom( $text, $flags );
5543 
5544  # <h> nodes indicate section breaks
5545  # They can only occur at the top level, so we can find them by iterating the root's children
5546  $node = $root->getFirstChild();
5547 
5548  # Find the target section
5549  if ( $sectionIndex == 0 ) {
5550  # Section zero doesn't nest, level=big
5551  $targetLevel = 1000;
5552  } else {
5553  while ( $node ) {
5554  if ( $node->getName() === 'h' ) {
5555  $bits = $node->splitHeading();
5556  if ( $bits['i'] == $sectionIndex ) {
5557  $targetLevel = $bits['level'];
5558  break;
5559  }
5560  }
5561  if ( $mode === 'replace' ) {
5562  $outText .= $frame->expand( $node, PPFrame::RECOVER_ORIG );
5563  }
5564  $node = $node->getNextSibling();
5565  }
5566  }
5567 
5568  if ( !$node ) {
5569  # Not found
5570  if ( $mode === 'get' ) {
5571  return $newText;
5572  } else {
5573  return $text;
5574  }
5575  }
5576 
5577  # Find the end of the section, including nested sections
5578  do {
5579  if ( $node->getName() === 'h' ) {
5580  $bits = $node->splitHeading();
5581  $curLevel = $bits['level'];
5582  if ( $bits['i'] != $sectionIndex && $curLevel <= $targetLevel ) {
5583  break;
5584  }
5585  }
5586  if ( $mode === 'get' ) {
5587  $outText .= $frame->expand( $node, PPFrame::RECOVER_ORIG );
5588  }
5589  $node = $node->getNextSibling();
5590  } while ( $node );
5591 
5592  # Write out the remainder (in replace mode only)
5593  if ( $mode === 'replace' ) {
5594  # Output the replacement text
5595  # Add two newlines on -- trailing whitespace in $newText is conventionally
5596  # stripped by the editor, so we need both newlines to restore the paragraph gap
5597  # Only add trailing whitespace if there is newText
5598  if ( $newText != "" ) {
5599  $outText .= $newText . "\n\n";
5600  }
5601 
5602  while ( $node ) {
5603  $outText .= $frame->expand( $node, PPFrame::RECOVER_ORIG );
5604  $node = $node->getNextSibling();
5605  }
5606  }
5607 
5608  if ( is_string( $outText ) ) {
5609  # Re-insert stripped tags
5610  $outText = rtrim( $this->mStripState->unstripBoth( $outText ) );
5611  }
5612 
5613  return $outText;
5614  }
5615 
5630  public function getSection( $text, $sectionId, $defaultText = '' ) {
5631  return $this->extractSections( $text, $sectionId, 'get', $defaultText );
5632  }
5633 
5646  public function replaceSection( $oldText, $sectionId, $newText ) {
5647  return $this->extractSections( $oldText, $sectionId, 'replace', $newText );
5648  }
5649 
5655  public function getRevisionId() {
5656  return $this->mRevisionId;
5657  }
5658 
5665  public function getRevisionObject() {
5666  if ( !is_null( $this->mRevisionObject ) ) {
5667  return $this->mRevisionObject;
5668  }
5669  if ( is_null( $this->mRevisionId ) ) {
5670  return null;
5671  }
5672 
5673  $rev = call_user_func(
5674  $this->mOptions->getCurrentRevisionCallback(), $this->getTitle(), $this
5675  );
5676 
5677  # If the parse is for a new revision, then the callback should have
5678  # already been set to force the object and should match mRevisionId.
5679  # If not, try to fetch by mRevisionId for sanity.
5680  if ( $rev && $rev->getId() != $this->mRevisionId ) {
5681  $rev = Revision::newFromId( $this->mRevisionId );
5682  }
5683 
5684  $this->mRevisionObject = $rev;
5685 
5686  return $this->mRevisionObject;
5687  }
5688 
5694  public function getRevisionTimestamp() {
5695  if ( is_null( $this->mRevisionTimestamp ) ) {
5697 
5698  $revObject = $this->getRevisionObject();
5699  $timestamp = $revObject ? $revObject->getTimestamp() : wfTimestampNow();
5700 
5701  # The cryptic '' timezone parameter tells to use the site-default
5702  # timezone offset instead of the user settings.
5703  # Since this value will be saved into the parser cache, served
5704  # to other users, and potentially even used inside links and such,
5705  # it needs to be consistent for all visitors.
5706  $this->mRevisionTimestamp = $wgContLang->userAdjust( $timestamp, '' );
5707 
5708  }
5709  return $this->mRevisionTimestamp;
5710  }
5711 
5717  public function getRevisionUser() {
5718  if ( is_null( $this->mRevisionUser ) ) {
5719  $revObject = $this->getRevisionObject();
5720 
5721  # if this template is subst: the revision id will be blank,
5722  # so just use the current user's name
5723  if ( $revObject ) {
5724  $this->mRevisionUser = $revObject->getUserText();
5725  } elseif ( $this->ot['wiki'] || $this->mOptions->getIsPreview() ) {
5726  $this->mRevisionUser = $this->getUser()->getName();
5727  }
5728  }
5729  return $this->mRevisionUser;
5730  }
5731 
5737  public function getRevisionSize() {
5738  if ( is_null( $this->mRevisionSize ) ) {
5739  $revObject = $this->getRevisionObject();
5740 
5741  # if this variable is subst: the revision id will be blank,
5742  # so just use the parser input size, because the own substituation
5743  # will change the size.
5744  if ( $revObject ) {
5745  $this->mRevisionSize = $revObject->getSize();
5746  } else {
5747  $this->mRevisionSize = $this->mInputSize;
5748  }
5749  }
5750  return $this->mRevisionSize;
5751  }
5752 
5758  public function setDefaultSort( $sort ) {
5759  $this->mDefaultSort = $sort;
5760  $this->mOutput->setProperty( 'defaultsort', $sort );
5761  }
5762 
5773  public function getDefaultSort() {
5774  if ( $this->mDefaultSort !== false ) {
5775  return $this->mDefaultSort;
5776  } else {
5777  return '';
5778  }
5779  }
5780 
5787  public function getCustomDefaultSort() {
5788  return $this->mDefaultSort;
5789  }
5790 
5800  public function guessSectionNameFromWikiText( $text ) {
5801  # Strip out wikitext links(they break the anchor)
5802  $text = $this->stripSectionName( $text );
5804  return '#' . Sanitizer::escapeId( $text, 'noninitial' );
5805  }
5806 
5815  public function guessLegacySectionNameFromWikiText( $text ) {
5816  # Strip out wikitext links(they break the anchor)
5817  $text = $this->stripSectionName( $text );
5819  return '#' . Sanitizer::escapeId( $text, [ 'noninitial', 'legacy' ] );
5820  }
5821 
5836  public function stripSectionName( $text ) {
5837  # Strip internal link markup
5838  $text = preg_replace( '/\[\[:?([^[|]+)\|([^[]+)\]\]/', '$2', $text );
5839  $text = preg_replace( '/\[\[:?([^[]+)\|?\]\]/', '$1', $text );
5840 
5841  # Strip external link markup
5842  # @todo FIXME: Not tolerant to blank link text
5843  # I.E. [https://www.mediawiki.org] will render as [1] or something depending
5844  # on how many empty links there are on the page - need to figure that out.
5845  $text = preg_replace( '/\[(?i:' . $this->mUrlProtocols . ')([^ ]+?) ([^[]+)\]/', '$2', $text );
5846 
5847  # Parse wikitext quotes (italics & bold)
5848  $text = $this->doQuotes( $text );
5849 
5850  # Strip HTML tags
5851  $text = StringUtils::delimiterReplace( '<', '>', '', $text );
5852  return $text;
5853  }
5854 
5865  public function testSrvus( $text, Title $title, ParserOptions $options,
5866  $outputType = self::OT_HTML
5867  ) {
5868  $magicScopeVariable = $this->lock();
5869  $this->startParse( $title, $options, $outputType, true );
5870 
5871  $text = $this->replaceVariables( $text );
5872  $text = $this->mStripState->unstripBoth( $text );
5873  $text = Sanitizer::removeHTMLtags( $text );
5874  return $text;
5875  }
5876 
5883  public function testPst( $text, Title $title, ParserOptions $options ) {
5884  return $this->preSaveTransform( $text, $title, $options->getUser(), $options );
5885  }
5886 
5893  public function testPreprocess( $text, Title $title, ParserOptions $options ) {
5894  return $this->testSrvus( $text, $title, $options, self::OT_PREPROCESS );
5895  }
5896 
5913  public function markerSkipCallback( $s, $callback ) {
5914  $i = 0;
5915  $out = '';
5916  while ( $i < strlen( $s ) ) {
5917  $markerStart = strpos( $s, self::MARKER_PREFIX, $i );
5918  if ( $markerStart === false ) {
5919  $out .= call_user_func( $callback, substr( $s, $i ) );
5920  break;
5921  } else {
5922  $out .= call_user_func( $callback, substr( $s, $i, $markerStart - $i ) );
5923  $markerEnd = strpos( $s, self::MARKER_SUFFIX, $markerStart );
5924  if ( $markerEnd === false ) {
5925  $out .= substr( $s, $markerStart );
5926  break;
5927  } else {
5928  $markerEnd += strlen( self::MARKER_SUFFIX );
5929  $out .= substr( $s, $markerStart, $markerEnd - $markerStart );
5930  $i = $markerEnd;
5931  }
5932  }
5933  }
5934  return $out;
5935  }
5936 
5943  public function killMarkers( $text ) {
5944  return $this->mStripState->killMarkers( $text );
5945  }
5946 
5963  public function serializeHalfParsedText( $text ) {
5964  $data = [
5965  'text' => $text,
5966  'version' => self::HALF_PARSED_VERSION,
5967  'stripState' => $this->mStripState->getSubState( $text ),
5968  'linkHolders' => $this->mLinkHolders->getSubArray( $text )
5969  ];
5970  return $data;
5971  }
5972 
5988  public function unserializeHalfParsedText( $data ) {
5989  if ( !isset( $data['version'] ) || $data['version'] != self::HALF_PARSED_VERSION ) {
5990  throw new MWException( __METHOD__ . ': invalid version' );
5991  }
5992 
5993  # First, extract the strip state.
5994  $texts = [ $data['text'] ];
5995  $texts = $this->mStripState->merge( $data['stripState'], $texts );
5996 
5997  # Now renumber links
5998  $texts = $this->mLinkHolders->mergeForeign( $data['linkHolders'], $texts );
5999 
6000  # Should be good to go.
6001  return $texts[0];
6002  }
6003 
6013  public function isValidHalfParsedText( $data ) {
6014  return isset( $data['version'] ) && $data['version'] == self::HALF_PARSED_VERSION;
6015  }
6016 
6025  public function parseWidthParam( $value ) {
6026  $parsedWidthParam = [];
6027  if ( $value === '' ) {
6028  return $parsedWidthParam;
6029  }
6030  $m = [];
6031  # (T15500) In both cases (width/height and width only),
6032  # permit trailing "px" for backward compatibility.
6033  if ( preg_match( '/^([0-9]*)x([0-9]*)\s*(?:px)?\s*$/', $value, $m ) ) {
6034  $width = intval( $m[1] );
6035  $height = intval( $m[2] );
6036  $parsedWidthParam['width'] = $width;
6037  $parsedWidthParam['height'] = $height;
6038  } elseif ( preg_match( '/^[0-9]*\s*(?:px)?\s*$/', $value ) ) {
6039  $width = intval( $value );
6040  $parsedWidthParam['width'] = $width;
6041  }
6042  return $parsedWidthParam;
6043  }
6044 
6054  protected function lock() {
6055  if ( $this->mInParse ) {
6056  throw new MWException( "Parser state cleared while parsing. "
6057  . "Did you call Parser::parse recursively?" );
6058  }
6059  $this->mInParse = true;
6060 
6061  $recursiveCheck = new ScopedCallback( function() {
6062  $this->mInParse = false;
6063  } );
6064 
6065  return $recursiveCheck;
6066  }
6067 
6078  public static function stripOuterParagraph( $html ) {
6079  $m = [];
6080  if ( preg_match( '/^<p>(.*)\n?<\/p>\n?$/sU', $html, $m ) ) {
6081  if ( strpos( $m[1], '</p>' ) === false ) {
6082  $html = $m[1];
6083  }
6084  }
6085 
6086  return $html;
6087  }
6088 
6099  public function getFreshParser() {
6100  global $wgParserConf;
6101  if ( $this->mInParse ) {
6102  return new $wgParserConf['class']( $wgParserConf );
6103  } else {
6104  return $this;
6105  }
6106  }
6107 
6114  public function enableOOUI() {
6116  $this->mOutput->setEnableOOUI( true );
6117  }
6118 }
getRevisionObject()
Get the revision object for $this->mRevisionId.
Definition: Parser.php:5665
static newFromName($name, $validate= 'valid')
Static factory method for creation from username.
Definition: User.php:544
setTitle($t)
Set the context title.
Definition: Parser.php:763
$mAutonumber
Definition: Parser.php:177
getLatestRevID($flags=0)
What is the page_latest field for this page?
Definition: Title.php:3254
markerSkipCallback($s, $callback)
Call a callback function on all regions of the given text that are not inside strip markers...
Definition: Parser.php:5913
$mPPNodeCount
Definition: Parser.php:191
replaceInternalLinks2(&$s)
Process [[ ]] wikilinks (RIL)
Definition: Parser.php:2106
static getVariableIDs()
Get an array of parser variable IDs.
Definition: MagicWord.php:272
getExternalLinkAttribs($url)
Get an associative array of additional HTML attributes appropriate for a particular external link...
Definition: Parser.php:1945
const MARKER_PREFIX
Definition: Parser.php:134
deferred txt A few of the database updates required by various functions here can be deferred until after the result page is displayed to the user For updating the view updating the linked to tables after a etc PHP does not yet have any way to tell the server to actually return and disconnect while still running these but it might have such a feature in the future We handle these by creating a deferred update object and putting those objects on a global then executing the whole list after the page is displayed We don t do anything smart like collating updates to the same table or such because the list is almost always going to have just one item on if that
Definition: deferred.txt:11
isValidHalfParsedText($data)
Returns true if the given array, presumed to be generated by serializeHalfParsedText(), is compatible with the current version of the parser.
Definition: Parser.php:6013
null means default in associative array form
Definition: hooks.txt:1923
null means default in associative array with keys and values unescaped Should be merged with default with a value of false meaning to suppress the attribute in associative array with keys and values unescaped noclasses just before the function returns a value If you return an< a > element with HTML attributes $attribs and contents $html will be returned If you return $ret will be returned and may include noclasses & $html
Definition: hooks.txt:1923
static tocLineEnd()
End a Table Of Contents line.
Definition: Linker.php:1547
deferred txt A few of the database updates required by various functions here can be deferred until after the result page is displayed to the user For updating the view updating the linked to tables after a etc PHP does not yet have any way to tell the server to actually return and disconnect while still running these but it might have such a feature in the future We handle these by creating a deferred update object and putting those objects on a global list
Definition: deferred.txt:11
getSection($text, $sectionId, $defaultText= '')
This function returns the text of a section, specified by a number ($section).
Definition: Parser.php:5630
static decodeTagAttributes($text)
Return an associative array of attribute names and values from a partial tag string.
Definition: Sanitizer.php:1289
$mTplRedirCache
Definition: Parser.php:193
killMarkers($text)
Remove any strip markers found in the given text.
Definition: Parser.php:5943
wfGetDB($db, $groups=[], $wiki=false)
Get a Database object.
static tocList($toc, $lang=false)
Wraps the TOC in a table and provides the hide/collapse javascript.
Definition: Linker.php:1559
LinkRenderer $mLinkRenderer
Definition: Parser.php:257
fetchTemplateAndTitle($title)
Fetch the unparsed text of a template and register a reference to it.
Definition: Parser.php:3535
this hook is for auditing only or null if authentication failed before getting that far or null if we can t even determine that probably a stub it is not rendered in wiki pages or galleries in category pages allow injecting custom HTML after the section Any uses of the hook need to handle escaping see BaseTemplate::getToolbox and BaseTemplate::makeListItem for details on the format of individual items inside of this array or by returning and letting standard HTTP rendering take place modifiable or by returning false and taking over the output $out
Definition: hooks.txt:781
getRevisionUser()
Get the name of the user that edited the last revision.
Definition: Parser.php:5717
setFunctionTagHook($tag, $callback, $flags)
Create a tag function, e.g.
Definition: Parser.php:4899
the array() calling protocol came about after MediaWiki 1.4rc1.
stripSectionName($text)
Strips a text string of wikitext for use in a section anchor.
Definition: Parser.php:5836
const OT_PREPROCESS
Definition: Defines.php:183
either a plain
Definition: hooks.txt:1974
$mDoubleUnderscores
Definition: Parser.php:193
magic word the default is to use $key to get the and $key value or $key value text $key value html to format the value $key
Definition: hooks.txt:2482
Group all the pieces relevant to the context of a request into one instance.
getPreloadText($text, Title $title, ParserOptions $options, $params=[])
Process the wikitext for the "?preload=" feature.
Definition: Parser.php:711
$context
Definition: load.php:50
validateSig($text)
Check that the user's signature contains no bad XML.
Definition: Parser.php:4613
MapCacheLRU null $currentRevisionCache
Definition: Parser.php:243
getArticleID($flags=0)
Get the article ID for this Title from the link cache, adding it if necessary.
Definition: Title.php:3165
$wgSitename
Name of the site.
renderImageGallery($text, $params)
Renders an image gallery from a text with one line per image.
Definition: Parser.php:4950
recursivePreprocess($text, $frame=false)
Recursive parser entry point that can be called from an extension tag hook.
Definition: Parser.php:692
replaceExternalLinks($text)
Replace external links (REL)
Definition: Parser.php:1845
static isNonincludable($index)
It is not possible to use pages from this namespace as template?
nextLinkID()
Definition: Parser.php:852
const SPACE_NOT_NL
Definition: Parser.php:103
null means default in associative array with keys and values unescaped Should be merged with default with a value of false meaning to suppress the attribute in associative array with keys and values unescaped noclasses & $ret
Definition: hooks.txt:1923
getImageParams($handler)
Definition: Parser.php:5118
Apache License January AND DISTRIBUTION Definitions License shall mean the terms and conditions for use
doHeadings($text)
Parse headers and return html.
Definition: Parser.php:1624
static getTitleFor($name, $subpage=false, $fragment= '')
Get a localised Title object for a specified special page name If you don't need a full Title object...
Definition: SpecialPage.php:82
const OT_PLAIN
Definition: Parser.php:114
getTags()
Accessor.
Definition: Parser.php:5436
static isWellFormedXmlFragment($text)
Check if a string is a well-formed XML fragment.
Definition: Xml.php:728
const OT_WIKI
Definition: Parser.php:111
div flags Integer display flags(NO_ACTION_LINK, NO_EXTRA_USER_LINKS) 'LogException'returning false will NOT prevent logging $e
Definition: hooks.txt:2089
fetchFileAndTitle($title, $options=[])
Fetch a file and its title and register a reference to it.
Definition: Parser.php:3680
User $mUser
Definition: Parser.php:200
We use the convention $dbr for read and $dbw for write to help you keep track of whether the database object is a the world will explode Or to be a subsequent write query which succeeded on the master may fail when replicated to the slave due to a unique key collision Replication on the slave will stop and it may take hours to repair the database and get it back online Setting read_only in my cnf on the slave will avoid this but given the dire we prefer to have as many checks as possible We provide a but the wrapper functions like please read the documentation for except in special pages derived from QueryPage It s a common pitfall for new developers to submit code containing SQL queries which examine huge numbers of rows Remember that COUNT * is(N), counting rows in atable is like counting beans in a bucket.------------------------------------------------------------------------Replication------------------------------------------------------------------------The largest installation of MediaWiki, Wikimedia, uses a large set ofslave MySQL servers replicating writes made to a master MySQL server.Itis important to understand the issues associated with this setup if youwant to write code destined for Wikipedia.It's often the case that the best algorithm to use for a given taskdepends on whether or not replication is in use.Due to our unabashedWikipedia-centrism, we often just use the replication-friendly version, but if you like, you can use wfGetLB() ->getServerCount() > 1 tocheck to see if replication is in use.===Lag===Lag primarily occurs when large write queries are sent to the master.Writes on the master are executed in parallel, but they are executed inserial when they are replicated to the slaves.The master writes thequery to the binlog when the transaction is committed.The slaves pollthe binlog and start executing the query as soon as it appears.They canservice reads while they are performing a write query, but will not readanything more from the binlog and thus will perform no more writes.Thismeans that if the write query runs for a long time, the slaves will lagbehind the master for the time it takes for the write query to complete.Lag can be exacerbated by high read load.MediaWiki's load balancer willstop sending reads to a slave when it is lagged by more than 30 seconds.If the load ratios are set incorrectly, or if there is too much loadgenerally, this may lead to a slave permanently hovering around 30seconds lag.If all slaves are lagged by more than 30 seconds, MediaWiki will stopwriting to the database.All edits and other write operations will berefused, with an error returned to the user.This gives the slaves achance to catch up.Before we had this mechanism, the slaves wouldregularly lag by several minutes, making review of recent editsdifficult.In addition to this, MediaWiki attempts to ensure that the user seesevents occurring on the wiki in chronological order.A few seconds of lagcan be tolerated, as long as the user sees a consistent picture fromsubsequent requests.This is done by saving the master binlog positionin the session, and then at the start of each request, waiting for theslave to catch up to that position before doing any reads from it.Ifthis wait times out, reads are allowed anyway, but the request isconsidered to be in"lagged slave mode".Lagged slave mode can bechecked by calling wfGetLB() ->getLaggedSlaveMode().The onlypractical consequence at present is a warning displayed in the pagefooter.===Lag avoidance===To avoid excessive lag, queries which write large numbers of rows shouldbe split up, generally to write one row at a time.Multi-row INSERT...SELECT queries are the worst offenders should be avoided altogether.Instead do the select first and then the insert.===Working with lag===Despite our best efforts, it's not practical to guarantee a low-lagenvironment.Lag will usually be less than one second, but mayoccasionally be up to 30 seconds.For scalability, it's very importantto keep load on the master low, so simply sending all your queries tothe master is not the answer.So when you have a genuine need forup-to-date data, the following approach is advised:1) Do a quick query to the master for a sequence number or timestamp 2) Run the full query on the slave and check if it matches the data you gotfrom the master 3) If it doesn't, run the full query on the masterTo avoid swamping the master every time the slaves lag, use of thisapproach should be kept to a minimum.In most cases you should just readfrom the slave and let the user deal with the delay.------------------------------------------------------------------------Lock contention------------------------------------------------------------------------Due to the high write rate on Wikipedia(and some other wikis), MediaWiki developers need to be very careful to structure their writesto avoid long-lasting locks.By default, MediaWiki opens a transactionat the first query, and commits it before the output is sent.Locks willbe held from the time when the query is done until the commit.So youcan reduce lock time by doing as much processing as possible before youdo your write queries.Often this approach is not good enough, and it becomes necessary toenclose small groups of queries in their own transaction.Use thefollowing syntax:$dbw=wfGetDB(DB_MASTER
initialiseVariables()
initialise the magic variables (like CURRENTMONTHNAME) and substitution modifiers ...
Definition: Parser.php:2850
static isEnabled()
Definition: MWTidy.php:79
Set options of the Parser.
static tidy($text)
Interface with html tidy.
Definition: MWTidy.php:46
getFunctionHooks()
Get all registered function hook identifiers.
Definition: Parser.php:4885
static fixTagAttributes($text, $element, $sorted=false)
Take a tag soup fragment listing an HTML element's attributes and normalize it to well-formed XML...
Definition: Sanitizer.php:1071
globals txt Globals are evil The original MediaWiki code relied on globals for processing context far too often MediaWiki development since then has been a story of slowly moving context out of global variables and into objects Storing processing context in object member variables allows those objects to be reused in a much more flexible way Consider the elegance of
database rows
Definition: globals.txt:10
wfHostname()
Fetch server name for use in error reporting etc.
getFunctionLang()
Get a language object for use in parser functions such as {{FORMATNUM:}}.
Definition: Parser.php:867
processing should stop and the error should be shown to the user * false
Definition: hooks.txt:189
argSubstitution($piece, $frame)
Triple brace replacement – used for template arguments.
Definition: Parser.php:3783
testSrvus($text, Title $title, ParserOptions $options, $outputType=self::OT_HTML)
strip/replaceVariables/unstrip for preprocessor regression testing
Definition: Parser.php:5865
uniqPrefix()
Accessor for mUniqPrefix.
Definition: Parser.php:753
const TOC_START
Definition: Parser.php:137
Title($x=null)
Accessor/mutator for the Title object.
Definition: Parser.php:791
SectionProfiler $mProfiler
Definition: Parser.php:252
$sort
fetchFileNoRegister($title, $options=[])
Helper function for fetchFileAndTitle.
Definition: Parser.php:3705
null for the local wiki Added in
Definition: hooks.txt:1539
There are three types of nodes:
$mHeadings
Definition: Parser.php:193
$value
clearTagHooks()
Remove all tag hooks.
Definition: Parser.php:4798
static makeSelfLinkObj($nt, $html= '', $query= '', $trail= '', $prefix= '')
Make appropriate markup for a link to the current article.
Definition: Linker.php:181
const NS_SPECIAL
Definition: Defines.php:50
clearState()
Clear Parser state.
Definition: Parser.php:343
this hook is for auditing only RecentChangesLinked and Watchlist RecentChangesLinked and Watchlist e g Watchlist removed from all revisions and log entries to which it was applied This gives extensions a chance to take it off their books as the deletion has already been partly carried out by this point or something similar the user will be unable to create the tag set and then return false from the hook function Ensure you consume the ChangeTagAfterDelete hook to carry out custom deletion actions as context called by AbstractContent::getParserOutput May be used to override the normal model specific rendering of page content as context $revId
Definition: hooks.txt:1025
__construct($conf=[])
Definition: Parser.php:262
const EXT_LINK_ADDR
Definition: Parser.php:95
$mFirstCall
Definition: Parser.php:152
interwikiTransclude($title, $action)
Transclude an interwiki link.
Definition: Parser.php:3724
pstPass2($text, $user)
Pre-save transform helper function.
Definition: Parser.php:4488
guessLegacySectionNameFromWikiText($text)
Same as guessSectionNameFromWikiText(), but produces legacy anchors instead.
Definition: Parser.php:5815
injection txt This is an overview of how MediaWiki makes use of dependency injection The design described here grew from the discussion of RFC T384 The term dependency this means that anything an object needs to operate should be injected from the the object itself should only know narrow no concrete implementation of the logic it relies on The requirement to inject everything typically results in an architecture that based on two main types of and essentially stateless service objects that use other service objects to operate on the value objects As of the beginning MediaWiki is only starting to use the DI approach Much of the code still relies on global state or direct resulting in a highly cyclical dependency MediaWikiServices
Definition: injection.txt:23
wfUrlProtocolsWithoutProtRel()
Like wfUrlProtocols(), but excludes '//' from the protocol list.
Options($x=null)
Accessor/mutator for the ParserOptions object.
Definition: Parser.php:845
it s the revision text itself In either if gzip is the revision text is gzipped $flags
Definition: hooks.txt:2705
serializeHalfParsedText($text)
Save the parser state required to convert the given half-parsed text to HTML.
Definition: Parser.php:5963
replaceLinkHolders(&$text, $options=0)
Replace "" link placeholders with actual links, in the buffer Placeholders created in Link...
Definition: Parser.php:4922
static statelessFetchRevision(Title $title, $parser=false)
Wrapper around Revision::newFromTitle to allow passing additional parameters without passing them on ...
Definition: Parser.php:3518
static activeUsers()
Definition: SiteStats.php:165
$mLinkID
Definition: Parser.php:190
doQuotes($text)
Helper function for doAllQuotes()
Definition: Parser.php:1657
preprocessToDom($text, $flags=0)
Preprocess some wikitext and return the document tree.
Definition: Parser.php:2880
limitationWarn($limitationType, $current= '', $max= '')
Warn the user when a parser limitation is reached Will warn at most once the user per limitation type...
Definition: Parser.php:3002
static cleanUrl($url)
Definition: Sanitizer.php:1858
wfUrlencode($s)
We want some things to be included as literal characters in our title URLs for prettiness, which urlencode encodes by default.
static newFromText($text, $defaultNamespace=NS_MAIN)
Create a new Title from text, such as what one would find in a link.
Definition: Title.php:262
$mGeneratedPPNodeCount
Definition: Parser.php:191
static getRandomString()
Get a random string.
Definition: Parser.php:732
$mRevisionId
Definition: Parser.php:217
static stripAllTags($text)
Take a fragment of (potentially invalid) HTML and return a version with any tags removed, encoded as plain text.
Definition: Sanitizer.php:1825
when a variable name is used in a it is silently declared as a new local masking the global
Definition: design.txt:93
doBlockLevels($text, $linestart)
Make lists from lines starting with ':', '*', '#', etc.
Definition: Parser.php:2477
$wgArticlePath
Definition: img_auth.php:45
OutputType($x=null)
Accessor/mutator for the output type.
Definition: Parser.php:817
getLinkRenderer()
Get a LinkRenderer instance to make links with.
Definition: Parser.php:934
const NS_TEMPLATE
Definition: Defines.php:71
static newFromTitle(LinkTarget $linkTarget, $id=0, $flags=0)
Load either the current, or a specified, revision that's attached to a given link target...
Definition: Revision.php:129
getVariableValue($index, $frame=false)
Return value of a magic variable (like PAGENAME)
Definition: Parser.php:2492
recursiveTagParse($text, $frame=false)
Half-parse wikitext to half-parsed HTML.
Definition: Parser.php:623
const NO_ARGS
magic word & $parser
Definition: hooks.txt:2482
MagicWordArray $mVariables
Definition: Parser.php:159
static validateTagAttributes($attribs, $element)
Take an array of attribute names and values and normalize or discard illegal values for the given ele...
Definition: Sanitizer.php:748
const SFH_NO_HASH
Definition: Parser.php:85
const DB_MASTER
Definition: defines.php:25
globals will be eliminated from MediaWiki replaced by an application object which would be passed to constructors Whether that would be an convenient solution remains to be but certainly PHP makes such object oriented programming models easier than they were in previous versions For the time being MediaWiki programmers will have to work in an environment with some global context At the time of globals were initialised on startup by MediaWiki of these were configuration which are documented in DefaultSettings php There is no comprehensive documentation for the remaining however some of the most important ones are listed below They are typically initialised either in index php or in Setup php For a description of the see design txt $wgTitle Title object created from the request URL $wgOut OutputPage object for HTTP response $wgUser User object for the user associated with the current request $wgLang Language object selected by user preferences $wgContLang Language object associated with the wiki being viewed $wgParser Parser object Parser extensions register their hooks here $wgRequest WebRequest object
Definition: globals.txt:25
wfRandomString($length=32)
Get a random string containing a number of pseudo-random hex characters.
$mForceTocPosition
Definition: Parser.php:195
preprocess($text, Title $title=null, ParserOptions $options, $revid=null, $frame=false)
Expand templates and variables in the text, producing valid, static wikitext.
Definition: Parser.php:666
static getCacheTTL($id)
Allow external reads of TTL array.
Definition: MagicWord.php:295
getRevisionId()
Get the ID of the revision we are parsing.
Definition: Parser.php:5655
const OT_PREPROCESS
Definition: Parser.php:112
maybeDoSubpageLink($target, &$text)
Handle link to subpage if necessary.
Definition: Parser.php:2465
$mFunctionSynonyms
Definition: Parser.php:144
If you want to remove the page from your watchlist later
replaceLinkHoldersText($text)
Replace "" link placeholders with plain text of links (not HTML-formatted).
Definition: Parser.php:4933
setLinkID($id)
Definition: Parser.php:859
$mOutputType
Definition: Parser.php:214
wfDebug($text, $dest= 'all', array $context=[])
Sends a line to the debug log if enabled or, optionally, to a comment in output.
$mDefaultStripList
Definition: Parser.php:147
static createAssocArgs($args)
Clean up argument array - refactored in 1.9 so parserfunctions can use it, too.
Definition: Parser.php:2954
$mExtLinkBracketedRegex
Definition: Parser.php:166
The index of the header message $result[1]=The index of the body text message $result[2 through n]=Parameters passed to body text message.Please note the header message cannot receive/use parameters. 'ImportHandleLogItemXMLTag':When parsing a XML tag in a log item.Return false to stop further processing of the tag $reader:XMLReader object $logInfo:Array of information 'ImportHandlePageXMLTag':When parsing a XML tag in a page.Return false to stop further processing of the tag $reader:XMLReader object &$pageInfo:Array of information 'ImportHandleRevisionXMLTag':When parsing a XML tag in a page revision.Return false to stop further processing of the tag $reader:XMLReader object $pageInfo:Array of page information $revisionInfo:Array of revision information 'ImportHandleToplevelXMLTag':When parsing a top level XML tag.Return false to stop further processing of the tag $reader:XMLReader object 'ImportHandleUploadXMLTag':When parsing a XML tag in a file upload.Return false to stop further processing of the tag $reader:XMLReader object $revisionInfo:Array of information 'ImportLogInterwikiLink':Hook to change the interwiki link used in log entries and edit summaries for transwiki imports.&$fullInterwikiPrefix:Interwiki prefix, may contain colons.&$pageTitle:String that contains page title. 'ImportSources':Called when reading from the $wgImportSources configuration variable.Can be used to lazy-load the import sources list.&$importSources:The value of $wgImportSources.Modify as necessary.See the comment in DefaultSettings.php for the detail of how to structure this array. 'InfoAction':When building information to display on the action=info page.$context:IContextSource object &$pageInfo:Array of information 'InitializeArticleMaybeRedirect':MediaWiki check to see if title is a redirect.&$title:Title object for the current page &$request:WebRequest &$ignoreRedirect:boolean to skip redirect check &$target:Title/string of redirect target &$article:Article object 'InternalParseBeforeLinks':during Parser's internalParse method before links but after nowiki/noinclude/includeonly/onlyinclude and other processings.&$parser:Parser object &$text:string containing partially parsed text &$stripState:Parser's internal StripState object 'InternalParseBeforeSanitize':during Parser's internalParse method just before the parser removes unwanted/dangerous HTML tags and after nowiki/noinclude/includeonly/onlyinclude and other processings.Ideal for syntax-extensions after template/parser function execution which respect nowiki and HTML-comments.&$parser:Parser object &$text:string containing partially parsed text &$stripState:Parser's internal StripState object 'InterwikiLoadPrefix':When resolving if a given prefix is an interwiki or not.Return true without providing an interwiki to continue interwiki search.$prefix:interwiki prefix we are looking for.&$iwData:output array describing the interwiki with keys iw_url, iw_local, iw_trans and optionally iw_api and iw_wikiid. 'InvalidateEmailComplete':Called after a user's email has been invalidated successfully.$user:user(object) whose email is being invalidated 'IRCLineURL':When constructing the URL to use in an IRC notification.Callee may modify $url and $query, URL will be constructed as $url.$query &$url:URL to index.php &$query:Query string $rc:RecentChange object that triggered url generation 'IsFileCacheable':Override the result of Article::isFileCacheable()(if true) &$article:article(object) being checked 'IsTrustedProxy':Override the result of IP::isTrustedProxy() &$ip:IP being check &$result:Change this value to override the result of IP::isTrustedProxy() 'IsUploadAllowedFromUrl':Override the result of UploadFromUrl::isAllowedUrl() $url:URL used to upload from &$allowed:Boolean indicating if uploading is allowed for given URL 'isValidEmailAddr':Override the result of Sanitizer::validateEmail(), for instance to return false if the domain name doesn't match your organization.$addr:The e-mail address entered by the user &$result:Set this and return false to override the internal checks 'isValidPassword':Override the result of User::isValidPassword() $password:The password entered by the user &$result:Set this and return false to override the internal checks $user:User the password is being validated for 'Language::getMessagesFileName':$code:The language code or the language we're looking for a messages file for &$file:The messages file path, you can override this to change the location. 'LanguageGetMagic':DEPRECATED!Use $magicWords in a file listed in $wgExtensionMessagesFiles instead.Use this to define synonyms of magic words depending of the language &$magicExtensions:associative array of magic words synonyms $lang:language code(string) 'LanguageGetNamespaces':Provide custom ordering for namespaces or remove namespaces.Do not use this hook to add namespaces.Use CanonicalNamespaces for that.&$namespaces:Array of namespaces indexed by their numbers 'LanguageGetSpecialPageAliases':DEPRECATED!Use $specialPageAliases in a file listed in $wgExtensionMessagesFiles instead.Use to define aliases of special pages names depending of the language &$specialPageAliases:associative array of magic words synonyms $lang:language code(string) 'LanguageGetTranslatedLanguageNames':Provide translated language names.&$names:array of language code=> language name $code:language of the preferred translations 'LanguageLinks':Manipulate a page's language links.This is called in various places to allow extensions to define the effective language links for a page.$title:The page's Title.&$links:Array with elements of the form"language:title"in the order that they will be output.&$linkFlags:Associative array mapping prefixed links to arrays of flags.Currently unused, but planned to provide support for marking individual language links in the UI, e.g.for featured articles. 'LanguageSelector':Hook to change the language selector available on a page.$out:The output page.$cssClassName:CSS class name of the language selector. 'LinkBegin':DEPRECATED!Use HtmlPageLinkRendererBegin instead.Used when generating internal and interwiki links in Linker::link(), before processing starts.Return false to skip default processing and return $ret.See documentation for Linker::link() for details on the expected meanings of parameters.$skin:the Skin object $target:the Title that the link is pointing to &$html:the contents that the< a > tag should have(raw HTML) $result
Definition: hooks.txt:1921
const TS_UNIX
Unix time - the number of seconds since 1970-01-01 00:00:00 UTC.
Definition: defines.php:6
if($line===false) $args
Definition: cdb.php:63
static getLocalInstance($ts=false)
Get a timestamp instance in the server local timezone ($wgLocaltimezone)
static getDoubleUnderscoreArray()
Get a MagicWordArray of double-underscore entities.
Definition: MagicWord.php:308
static splitTrail($trail)
Split a link trail, return the "inside" portion and the remainder of the trail as a two-element array...
Definition: Linker.php:1634
getTemplateDom($title)
Get the semi-parsed DOM representation of a template with a given title, and its redirect destination...
Definition: Parser.php:3452
usually copyright or history_copyright This message must be in HTML not wikitext & $link
Definition: hooks.txt:2885
static decodeCharReferences($text)
Decode any character references, numeric or named entities, in the text and return a UTF-8 string...
Definition: Sanitizer.php:1502
cleanSig($text, $parsing=false)
Clean up signature text.
Definition: Parser.php:4627
wfTimestamp($outputtype=TS_UNIX, $ts=0)
Get a timestamp string in one of various formats.
static factory($mode=false, IContextSource $context=null)
Get a new image gallery.
$wgLanguageCode
Site language code.
Custom PHP profiler for parser/DB type section names that xhprof/xdebug can't handle.
static getPage($name)
Find the object with a given name and return it (or NULL)
static edits()
Definition: SiteStats.php:133
$wgExtraInterlanguageLinkPrefixes
List of additional interwiki prefixes that should be treated as interlanguage links (i...
startExternalParse(Title $title=null, ParserOptions $options, $outputType, $clearState=true)
Set up some variables which are usually set up in parse() so that an external function can call some ...
Definition: Parser.php:4678
wfDebugLog($logGroup, $text, $dest= 'all', array $context=[])
Send a line to a supplementary debug log file, if configured, or main debug log if not...
const NO_TEMPLATES
addTrackingCategory($msg)
Definition: Parser.php:4025
replaceInternalLinks($s)
Process [[ ]] wikilinks.
Definition: Parser.php:2093
$mVarCache
Definition: Parser.php:148
$wgStylePath
The URL path of the skins directory.
disableCache()
Set a flag in the output object indicating that the content is dynamic and shouldn't be cached...
Definition: Parser.php:5408
$mRevisionObject
Definition: Parser.php:216
static normalizeSectionNameWhitespace($section)
Normalizes whitespace in a section name, such as might be returned by Parser::stripSectionName(), for use in the id's that are used for section links.
Definition: Sanitizer.php:1383
internalParse($text, $isMain=true, $frame=false)
Helper function for parse() that transforms wiki markup into half-parsed HTML.
Definition: Parser.php:1268
Title $mTitle
Definition: Parser.php:213
static delimiterReplace($startDelim, $endDelim, $replace, $subject, $flags= '')
Perform an operation equivalent to preg_replace() with flags.
__destruct()
Reduce memory usage to reduce the impact of circular references.
Definition: Parser.php:288
wfEscapeWikiText($text)
Escapes the given text so that it may be output using addWikiText() without any linking, formatting, etc.
getRevisionTimestamp()
Get the timestamp associated with the current revision, adjusted for the default server-local timesta...
Definition: Parser.php:5694
static stripOuterParagraph($html)
Strip outer.
Definition: Parser.php:6078
static register($parser)
$mRevIdForTs
Definition: Parser.php:221
static singleton()
Get an instance of this class.
Definition: LinkCache.php:64
design txt This is a brief overview of the new design More thorough and up to date information is available on the documentation wiki at etc Handles the details of getting and saving to the user table of the and dealing with sessions and cookies OutputPage Encapsulates the entire HTML page that will be sent in response to any server request It is used by calling its functions to add in any and then calling but I prefer the flexibility This should also do the output encoding The system allocates a global one in $wgOut Title Represents the title of an and does all the work of translating among various forms such as plain database key
Definition: design.txt:25
static normalizeSubpageLink($contextTitle, $target, &$text)
Definition: Linker.php:1353
parseWidthParam($value)
Parsed a width param of imagelink like 300px or 200x300px.
Definition: Parser.php:6025
$mStripList
Definition: Parser.php:146
$mFunctionTagHooks
Definition: Parser.php:145
fetchScaryTemplateMaybeFromCache($url)
Definition: Parser.php:3743
const OT_PLAIN
Definition: Defines.php:185
fetchCurrentRevisionOfTitle($title)
Fetch the current revision of a given title.
Definition: Parser.php:3495
$mRevisionTimestamp
Definition: Parser.php:218
$mImageParams
Definition: Parser.php:149
stripAltText($caption, $holders)
Definition: Parser.php:5384
doAllQuotes($text)
Replace single quotes with HTML markup.
Definition: Parser.php:1640
either a unescaped string or a HtmlArmor object after in associative array form externallinks including delete and has completed for all link tables whether this was an auto creation default is conds Array Extra conditions for the No matching items in log is displayed if loglist is empty msgKey Array If you want a nice box with a set this to the key of the message First element is the message additional optional elements are parameters for the key that are processed with wfMessage() -> params() ->parseAsBlock()-offset Set to overwrite offset parameter in $wgRequest set to ''to unsetoffset-wrap String Wrap the message in html(usually something like"&lt
static replaceMarkup($search, $replace, $text)
More or less "markup-safe" str_replace() Ignores any instances of the separator inside <...
static normalizeUrlComponent($component, $unsafe)
Definition: Parser.php:2011
const VERSION
Update this version number when the ParserOutput format changes in an incompatible way...
Definition: Parser.php:76
this hook is for auditing only RecentChangesLinked and Watchlist RecentChangesLinked and Watchlist e g Watchlist removed from all revisions and log entries to which it was applied This gives extensions a chance to take it off their books as the deletion has already been partly carried out by this point or something similar the user will be unable to create the tag set and then return false from the hook function Ensure you consume the ChangeTagAfterDelete hook to carry out custom deletion actions as context called by AbstractContent::getParserOutput May be used to override the normal model specific rendering of page content as context as context $options
Definition: hooks.txt:1025
setHook($tag, $callback)
Create an HTML-style tag, e.g.
Definition: Parser.php:4753
const OT_WIKI
Definition: Defines.php:182
Preprocessor $mPreprocessor
Definition: Parser.php:170
getPreprocessor()
Get a preprocessor object.
Definition: Parser.php:920
This document is intended to provide useful advice for parties seeking to redistribute MediaWiki to end users It s targeted particularly at maintainers for Linux since it s been observed that distribution packages of MediaWiki often break We ve consistently had to recommend that users seeking support use official tarballs instead of their distribution s and this often solves whatever problem the user is having It would be nice if this could such and we might be restricted by PHP settings such as safe mode or open_basedir We cannot assume that the software even has read access anywhere useful Many shared hosts run all users web applications under the same so they can t rely on Unix and must forbid reads to even standard directories like tmp lest users read each others files We cannot assume that the user has the ability to install or run any programs not written as web accessible PHP scripts Since anything that works on cheap shared hosting will work if you have shell or root access MediaWiki s design is based around catering to the lowest common denominator Although we support higher end setups as the way many things work by default is tailored toward shared hosting These defaults are unconventional from the point of view of normal(non-web) applications--they might conflict with distributors'policies
static getInstance($ts=false)
Get a timestamp instance in GMT.
Definition: MWTimestamp.php:38
const NS_MEDIA
Definition: Defines.php:49
static singleton()
Get a RepoGroup instance.
Definition: RepoGroup.php:59
replaceVariables($text, $frame=false, $argsOnly=false)
Replace magic variables, templates, and template arguments with the appropriate text.
Definition: Parser.php:2925
const RECOVER_ORIG
wfMatchesDomainList($url, $domains)
Check whether a given URL has a domain that occurs in a given set of domains.
StripState $mStripState
Definition: Parser.php:182
$mDefaultSort
Definition: Parser.php:192
getUser()
Get a User object either from $this->mUser, if set, or from the ParserOptions object otherwise...
Definition: Parser.php:908
wfTimestampNow()
Convenience function; returns MediaWiki timestamp for the present time.
incrementIncludeSize($type, $size)
Increment an include size counter.
Definition: Parser.php:3944
getStripList()
Get a list of strippable XML-like elements.
Definition: Parser.php:1037
const EXT_IMAGE_REGEX
Definition: Parser.php:98
startParse(Title $title=null, ParserOptions $options, $outputType, $clearState=true)
Definition: Parser.php:4690
$params
const NS_CATEGORY
Definition: Defines.php:75
static makeHeadline($level, $attribs, $anchor, $html, $link, $legacyAnchor=false)
Create a headline for content.
Definition: Linker.php:1615
static extractTagsAndParams($elements, $text, &$matches, $uniq_prefix=null)
Replaces all occurrences of HTML-style comments and the given tags in the text with a random marker a...
Definition: Parser.php:967
and(b) You must cause any modified files to carry prominent notices stating that You changed the files
doTableStuff($text)
parse the wiki syntax used to render tables
Definition: Parser.php:1064
wfDeprecated($function, $version=false, $component=false, $callerOffset=2)
Throws a warning that $function is deprecated.
getRevisionSize()
Get the size of the revision.
Definition: Parser.php:5737
$mImageParamsMagicArray
Definition: Parser.php:150
LinkHolderArray $mLinkHolders
Definition: Parser.php:188
const TS_MW
MediaWiki concatenated string timestamp (YYYYMMDDHHMMSS)
Definition: defines.php:11
static register($parser)
null means default in associative array with keys and values unescaped Should be merged with default with a value of false meaning to suppress the attribute in associative array with keys and values unescaped noclasses just before the function returns a value If you return an< a > element with HTML attributes $attribs and contents $html will be returned If you return $ret will be returned and may include noclasses after processing & $attribs
Definition: hooks.txt:1923
deferred txt A few of the database updates required by various functions here can be deferred until after the result page is displayed to the user For updating the view updating the linked to tables after a save
Definition: deferred.txt:4
as see the revision history and available at free of to any person obtaining a copy of this software and associated documentation to deal in the Software without including without limitation the rights to and or sell copies of the and to permit persons to whom the Software is furnished to do so
Definition: LICENSE.txt:10
Some information about database access in MediaWiki By Tim January Database layout For information about the MediaWiki database such as a description of the tables and their please see
Definition: database.txt:2
you don t have to do a grep find to see where the $wgReverseTitle variable is used
Definition: hooks.txt:117
preSaveTransform($text, Title $title, User $user, ParserOptions $options, $clearState=true)
Transform wiki markup when saving a page by doing "\\r\\n" -> "\\n" conversion, substituting signatur...
Definition: Parser.php:4456
static capturePath(Title $title, IContextSource $context, LinkRenderer $linkRenderer=null)
Just like executePath() but will override global variables and execute the page in "inclusion" mode...
getTargetLanguage()
Get the target language for the content being parsed.
Definition: Parser.php:880
$buffer
namespace and then decline to actually register it file or subcat img or subcat $title
Definition: hooks.txt:932
static newKnownCurrent(IDatabase $db, $pageId, $revId)
Load a revision based on a known page ID and current revision ID from the DB.
Definition: Revision.php:1886
static hasSubpages($index)
Does the namespace allow subpages?
formatHeadings($text, $origText, $isMain=true)
This function accomplishes several tasks: 1) Auto-number headings if that option is enabled 2) Add an...
Definition: Parser.php:4045
getConverterLanguage()
Get the language object for language conversion.
Definition: Parser.php:898
static tocUnindent($level)
Finish one or more sublevels on the Table of Contents.
Definition: Linker.php:1514
static run($event, array $args=[], $deprecatedVersion=null)
Call hook functions defined in Hooks::register and $wgHooks.
Definition: Hooks.php:131
static tocLine($anchor, $tocline, $tocnumber, $level, $sectionIndex=false)
parameter level defines if we are on an indentation level
Definition: Linker.php:1529
design txt This is a brief overview of the new design More thorough and up to date information is available on the documentation wiki at etc Handles the details of getting and saving to the user table of the and dealing with sessions and cookies OutputPage Encapsulates the entire HTML page that will be sent in response to any server request It is used by calling its functions to add text
Definition: design.txt:12
$mInputSize
Definition: Parser.php:222
magicword txt Magic Words are some phrases used in the wikitext They are used for two things
Definition: magicword.txt:4
this hook is for auditing only RecentChangesLinked and Watchlist RecentChangesLinked and Watchlist e g Watchlist removed from all revisions and log entries to which it was applied This gives extensions a chance to take it off their books $tag
Definition: hooks.txt:986
getUserSig(&$user, $nickname=false, $fancySig=null)
Fetch the user's signature text, if any, and normalize to validated, ready-to-insert wikitext...
Definition: Parser.php:4564
const HALF_PARSED_VERSION
Update this version number when the output of serialiseHalfParsedText() changes in an incompatible wa...
Definition: Parser.php:82
const NS_FILE
Definition: Defines.php:67
firstCallInit()
Do various kinds of initialisation on the first call of the parser.
Definition: Parser.php:323
static makeImageLink(Parser $parser, Title $title, $file, $frameParams=[], $handlerParams=[], $time=false, $query="", $widthOption=null)
Given parameters derived from [[Image:Foo|options...]], generate the HTML that that syntax inserts in...
Definition: Linker.php:319
const PTD_FOR_INCLUSION
Definition: Parser.php:106
null means default in associative array with keys and values unescaped Should be merged with default with a value of false meaning to suppress the attribute in associative array with keys and values unescaped broken
Definition: hooks.txt:1923
armorLinks($text)
Insert a NOPARSE hacky thing into any inline links in a chunk that's going to go through further pars...
Definition: Parser.php:2443
presenting them properly to the user as errors is done by the caller return true use this to change the list i e etc $rev
Definition: hooks.txt:1708
static splitWhitespace($s)
Return a three-element array: leading whitespace, string contents, trailing whitespace.
Definition: Parser.php:2892
This document is intended to provide useful advice for parties seeking to redistribute MediaWiki to end users It s targeted particularly at maintainers for Linux since it s been observed that distribution packages of MediaWiki often break We ve consistently had to recommend that users seeking support use official tarballs instead of their distribution s and this often solves whatever problem the user is having It would be nice if this could such as
Definition: distributors.txt:9
setOutputType($ot)
Set the output type.
Definition: Parser.php:800
$mTagHooks
Definition: Parser.php:141
Class for handling an array of magic words.
const NS_MEDIAWIKI
Definition: Defines.php:69
static & get($id)
Factory: creates an object representing an ID.
Definition: MagicWord.php:258
enableOOUI()
Set's up the PHP implementation of OOUI for use in this request and instructs OutputPage to enable OO...
Definition: Parser.php:6114
please add to it if you re going to add events to the MediaWiki code where normally authentication against an external auth plugin would be creating a local account $user
Definition: hooks.txt:246
fetchTemplate($title)
Fetch the unparsed text of a template and register a reference to it.
Definition: Parser.php:3563
maybeMakeExternalImage($url)
make an image if it's allowed, either through the global option, through the exception, or through the on-wiki whitelist
Definition: Parser.php:2034
areSubpagesAllowed()
Return true if subpage links should be expanded on this page.
Definition: Parser.php:2452
const OT_HTML
Definition: Defines.php:181
static escapeId($id, $options=[])
Given a value, escape it so that it can be used in an id attribute and return it. ...
Definition: Sanitizer.php:1171
this hook is for auditing only RecentChangesLinked and Watchlist RecentChangesLinked and Watchlist e g Watchlist removed from all revisions and log entries to which it was applied This gives extensions a chance to take it off their books as the deletion has already been partly carried out by this point or something similar the user will be unable to create the tag set and then return false from the hook function Ensure you consume the ChangeTagAfterDelete hook to carry out custom deletion actions as context called by AbstractContent::getParserOutput May be used to override the normal model specific rendering of page content as context as context the output can only depend on parameters provided to this hook not on global state indicating whether full HTML should be generated If generation of HTML may be but other information should still be present in the ParserOutput object & $output
Definition: hooks.txt:1025
static getSubstIDs()
Get an array of parser substitution modifier IDs.
Definition: MagicWord.php:285
static images()
Definition: SiteStats.php:173
$mTransparentTagHooks
Definition: Parser.php:142
$mExpensiveFunctionCount
Definition: Parser.php:194
$mUrlProtocols
Definition: Parser.php:166
do that in ParserLimitReportFormat instead use this to modify the parameters of the image and a DIV can begin in one section and end in another Make sure your code can handle that case gracefully See the EditSectionClearerLink extension for an example zero but section is usually empty its values are the globals values before the output is cached my talk my contributions etc etc otherwise the built in rate limiting checks are if enabled allows for interception of redirect as a string mapping parameter names to values & $type
Definition: hooks.txt:2499
$mConf
Definition: Parser.php:166
transformMsg($text, $options, $title=null)
Wrapper for preprocess()
Definition: Parser.php:4709
static newFromId($id, $flags=0)
Load a page revision from a given revision ID number.
Definition: Revision.php:111
wfUrlProtocols($includeProtocolRelative=true)
Returns a regular expression of url protocols.
static makeExternalLink($url, $text, $escape=true, $linktype= '', $attribs=[], $title=null)
Make an external link.
Definition: Linker.php:838
__clone()
Allow extensions to clean up when the parser is cloned.
Definition: Parser.php:300
static getExternalLinkRel($url=false, $title=null)
Get the rel attribute for a particular external link.
Definition: Parser.php:1924
injection txt This is an overview of how MediaWiki makes use of dependency injection The design described here grew from the discussion of RFC T384 The term dependency this means that anything an object needs to operate should be injected from the the object itself should only know narrow no concrete implementation of the logic it relies on The requirement to inject everything typically results in an architecture that based on two main types of and essentially stateless service objects that use other service objects to operate on the value objects As of the beginning MediaWiki is only starting to use the DI approach Much of the code still relies on global state or direct resulting in a highly cyclical dependency which acts as the top level factory for services in MediaWiki which can be used to gain access to default instances of various services MediaWikiServices however also allows new services to be defined and default services to be redefined Services are defined or redefined by providing a callback the instantiator that will return a new instance of the service When it will create an instance of MediaWikiServices and populate it with the services defined in the files listed by thereby bootstrapping the DI framework Per $wgServiceWiringFiles lists includes ServiceWiring php
Definition: injection.txt:35
wfSetVar(&$dest, $source, $force=false)
Sets dest to source and returns the original value of dest If source is NULL, it just returns the val...
this hook is for auditing only $req
Definition: hooks.txt:986
this hook is for auditing only or null if authentication failed before getting that far $username
Definition: hooks.txt:781
presenting them properly to the user as errors is done by the caller return true use this to change the list i e etc next in line in page history
Definition: hooks.txt:1708
array $mLangLinkLanguages
Array with the language name of each language link (i.e.
Definition: Parser.php:235
const OT_MSG
Definition: Parser.php:113
replaceTransparentTags($text)
Replace transparent tags in $text with the values given by the callbacks.
Definition: Parser.php:5454
This document describes the state of Postgres support in and is fairly well maintained The main code is very well while extensions are very hit and miss it is probably the most supported database after MySQL Much of the work in making MediaWiki database agnostic came about through the work of creating Postgres as and are nearing end of but without copying over all the usage comments General notes on the but these can almost always be programmed around *Although Postgres has a true BOOLEAN type
Definition: postgres.txt:22
replaceSection($oldText, $sectionId, $newText)
This function returns $oldtext after the content of the section specified by $section has been replac...
Definition: Parser.php:5646
doDoubleUnderscore($text)
Strip double-underscore items like NOGALLERY and NOTOC Fills $this->mDoubleUnderscores, returns the modified text.
Definition: Parser.php:3971
$mFunctionHooks
Definition: Parser.php:143
static removeHTMLtags($text, $processCallback=null, $args=[], $extratags=[], $removetags=[], $warnCallback=null)
Cleans up HTML, removes dangerous tags and attributes, and removes HTML comments. ...
Definition: Sanitizer.php:462
$lines
Definition: router.php:67
testPreprocess($text, Title $title, ParserOptions $options)
Definition: Parser.php:5893
deferred txt A few of the database updates required by various functions here can be deferred until after the result page is displayed to the user For updating the view updating the linked to tables after a etc PHP does not yet have any way to tell the server to actually return and disconnect while still running these but it might have such a feature in the future We handle these by creating a deferred update object and putting those objects on a global then executing the whole list after the page is displayed We don t do anything smart like collating updates to the same table or such because the list is almost always going to have just one item on if so it s not worth the trouble Since there is a job queue in the jobs table
Definition: deferred.txt:11
MagicWordArray $mSubstWords
Definition: Parser.php:164
const TOC_END
Definition: Parser.php:138
static normalizeCharReferences($text)
Ensure that any entities and character references are legal for XML and XHTML specifically.
Definition: Sanitizer.php:1402
callParserFunction($frame, $function, array $args=[])
Call a parser function and return an array with text and flags.
Definition: Parser.php:3353
$wgScriptPath
The path we should point to.
Variant of the Message class.
Definition: Message.php:1361
getFreshParser()
Return this parser if it is not doing anything, otherwise get a fresh parser.
Definition: Parser.php:6099
design txt This is a brief overview of the new design More thorough and up to date information is available on the documentation wiki at etc Handles the details of getting and saving to the user table of the and dealing with sessions and cookies OutputPage Encapsulates the entire HTML page that will be sent in response to any server request It is used by calling its functions to add in any and then calling but I prefer the flexibility This should also do the output encoding The system allocates a global one in $wgOut Title Represents the title of an and does all the work of translating among various forms such as plain database etc For and for historical it also represents a few features of articles that don t involve their such as access rights See also title txt Article Encapsulates access to the page table of the database The object represents a an and maintains state such as etc Revision Encapsulates individual page revision data and access to the revision text blobs storage system Higher level code should never touch text storage directly
Definition: design.txt:34
this hook is for auditing only RecentChangesLinked and Watchlist RecentChangesLinked and Watchlist e g Watchlist removed from all revisions and log entries to which it was applied This gives extensions a chance to take it off their books as the deletion has already been partly carried out by this point or something similar the user will be unable to create the tag set and then return false from the hook function Ensure you consume the ChangeTagAfterDelete hook to carry out custom deletion actions as context called by AbstractContent::getParserOutput May be used to override the normal model specific rendering of page content $content
Definition: hooks.txt:1025
static articles()
Definition: SiteStats.php:141
$mRevisionUser
Definition: Parser.php:219
lock()
Lock the current instance of the parser.
Definition: Parser.php:6054
static pages()
Definition: SiteStats.php:149
$line
Definition: cdb.php:58
const SFH_OBJECT_ARGS
Definition: Parser.php:86
static delimiterExplode($startDelim, $endDelim, $separator, $subject, $nested=false)
Explode a string, but ignore any instances of the separator inside the given start and end delimiters...
Definition: StringUtils.php:68
makeKnownLinkHolder($nt, $text= '', $trail= '', $prefix= '')
Render a forced-blue link inline; protect against double expansion of URLs if we're in a mode that pr...
Definition: Parser.php:2419
static statelessFetchTemplate($title, $parser=false)
Static function to get a template Can be overridden via ParserOptions::setTemplateCallback().
Definition: Parser.php:3576
I won t presume to tell you how to I m just describing the methods I chose to use for myself If you do choose to follow these it will probably be easier for you to collaborate with others on the but if you want to contribute without by all means do which work well I also use K &R brace matching style I know that s a religious issue for so if you want to use a style that puts opening braces on the next line
Definition: design.txt:79
setFunctionHook($id, $callback, $flags=0)
Create a function, e.g.
Definition: Parser.php:4847
static setupOOUI($skinName= '', $dir= 'ltr')
Helper function to setup the PHP implementation of OOUI to use in this request.
static makeMediaLinkFile(Title $title, $file, $html= '')
Create a direct link to a given uploaded file.
Definition: Linker.php:778
$mIncludeCount
Definition: Parser.php:184
this hook is for auditing only RecentChangesLinked and Watchlist RecentChangesLinked and Watchlist e g Watchlist removed from all revisions and log entries to which it was applied This gives extensions a chance to take it off their books as the deletion has already been partly carried out by this point or something similar the user will be unable to create the tag set and then return false from the hook function Ensure you consume the ChangeTagAfterDelete hook to carry out custom deletion actions as context called by AbstractContent::getParserOutput May be used to override the normal model specific rendering of page content as context as context the output can only depend on parameters provided to this hook not on global state indicating whether full HTML should be generated If generation of HTML may be but other information should still be present in the ParserOutput object to manipulate or replace but no entry for that model exists in $wgContentHandlers please use GetContentModels hook to make them known to core if desired whether it is OK to use $contentModel on $title Handler functions that modify $ok should generally return false to prevent further hooks from further modifying $ok inclusive $limit
Definition: hooks.txt:1025
usually copyright or history_copyright This message must be in HTML not wikitext if the section is included from a template to be included in the link
Definition: hooks.txt:2885
$mMarkerIndex
Definition: Parser.php:151
getTitle()
Accessor for the Title object.
Definition: Parser.php:781
this class mediates it Skin Encapsulates a look and feel for the wiki All of the functions that render HTML and make choices about how to render it are here and are called from various other places when and is meant to be subclassed with other skins that may override some of its functions The User object contains a reference to a and so rather than having a global skin object we just rely on the global User and get the skin with $wgUser and also has some character encoding functions and other locale stuff The current user interface language is instantiated as and the local content language as $wgContLang
Definition: design.txt:56
extractSections($text, $sectionId, $mode, $newText= '')
Break wikitext input into sections, and either pull or replace some particular section's text...
Definition: Parser.php:5505
ParserOutput $mOutput
Definition: Parser.php:176
getOutput()
Get the ParserOutput object.
Definition: Parser.php:826
$wgExperimentalHtmlIds
Should we allow a broader set of characters in id attributes, per HTML5? If not, use only HTML 4-comp...
doMagicLinks($text)
Replace special strings like "ISBN xxx" and "RFC xxx" with magic external links.
Definition: Parser.php:1445
Apache License January AND DISTRIBUTION Definitions License shall mean the terms and conditions for and distribution as defined by Sections through of this document Licensor shall mean the copyright owner or entity authorized by the copyright owner that is granting the License Legal Entity shall mean the union of the acting entity and all other entities that control are controlled by or are under common control with that entity For the purposes of this definition control direct or to cause the direction or management of such whether by contract or including but not limited to software source documentation and configuration files Object form shall mean any form resulting from mechanical transformation or translation of a Source including but not limited to compiled object generated and conversions to other media types Work shall mean the work of whether in Source or Object made available under the as indicated by a copyright notice that is included in or attached to the whether in Source or Object that is based or other modifications as a an original work of authorship For the purposes of this Derivative Works shall not include works that remain separable or merely the Work and Derivative Works thereof Contribution shall mean any work of including the original version of the Work and any modifications or additions to that Work or Derivative Works that is intentionally submitted to Licensor for inclusion in the Work by the copyright owner or by an individual or Legal Entity authorized to submit on behalf of the copyright owner For the purposes of this submitted means any form of or written communication sent to the Licensor or its including but not limited to communication on electronic mailing source code control and issue tracking systems that are managed or on behalf the Licensor for the purpose of discussing and improving the but excluding communication that is conspicuously marked or otherwise designated in writing by the copyright owner as Not a Contribution Contributor shall mean Licensor and any individual or Legal Entity on behalf of whom a Contribution has been received by Licensor and subsequently incorporated within the Work Grant of Copyright License Subject to the terms and conditions of this each Contributor hereby grants to You a non no royalty irrevocable copyright license to prepare Derivative Works publicly display
this hook is for auditing only RecentChangesLinked and Watchlist RecentChangesLinked and Watchlist e g Watchlist removed from all revisions and log entries to which it was applied This gives extensions a chance to take it off their books as the deletion has already been partly carried out by this point or something similar the user will be unable to create the tag set $status
Definition: hooks.txt:1025
static cleanSigInSig($text)
Strip 3, 4 or 5 tildes out of signatures.
Definition: Parser.php:4664
setDefaultSort($sort)
Mutator for $mDefaultSort.
Definition: Parser.php:5758
fetchFile($title, $options=[])
Fetch a file and its title and register a reference to it.
Definition: Parser.php:3669
static tocIndent()
Add another level to the Table of Contents.
Definition: Linker.php:1503
static legalChars()
Get a regex character class describing the legal characters in a link.
Definition: Title.php:593
design txt This is a brief overview of the new design More thorough and up to date information is available on the documentation wiki at etc Handles the details of getting and saving to the user table of the and dealing with sessions and cookies OutputPage Encapsulates the entire HTML page that will be sent in response to any server request It is used by calling its functions to add in any and then calling output() to send it all.It could be easily changed to send incrementally if that becomes useful
static doBlockLevels($text, $lineStart)
Make lists from lines starting with ':', '*', '#', etc.
$wgServer
URL of the server.
We ve cleaned up the code here by removing clumps of infrequently used code and moving them off somewhere else It s much easier for someone working with this code to see what s _really_ going on
Definition: hooks.txt:86
incrementExpensiveFunctionCount()
Increment the expensive function count.
Definition: Parser.php:3958
$mShowToc
Definition: Parser.php:195
static normalizeLinkUrl($url)
Replace unusual escape codes in a URL with their equivalent characters.
Definition: Parser.php:1975
const DB_REPLICA
Definition: defines.php:24
magicLinkCallback($m)
Definition: Parser.php:1475
const EXT_LINK_URL_CLASS
Definition: Parser.php:92
insertStripItem($text)
Add an item to the strip state Returns the unique tag which must be inserted into the stripped text T...
Definition: Parser.php:1050
testPst($text, Title $title, ParserOptions $options)
Definition: Parser.php:5883
static factory($url, $options=null, $caller=__METHOD__)
Generate a new request object.
if(!$wgRequest->checkUrlExtension()) if(!$wgEnableAPI) $wgTitle
Definition: api.php:57
static explode($separator, $subject)
Workalike for explode() with limited memory usage.
ParserOptions $mOptions
Definition: Parser.php:208
parse($text, Title $title, ParserOptions $options, $linestart=true, $clearState=true, $revid=null)
Convert wikitext to HTML Do not call this function recursively.
Definition: Parser.php:403
this hook is for auditing only or null if authentication failed before getting that far or null if we can t even determine that probably a stub it is not rendered in wiki pages or galleries in category pages allow injecting custom HTML after the section Any uses of the hook need to handle escaping see BaseTemplate::getToolbox and BaseTemplate::makeListItem for details on the format of individual items inside of this array or by returning and letting standard HTTP rendering take place modifiable or by returning false and taking over the output modifiable modifiable after all normalizations have been except for the $wgMaxImageArea check set to true or false to override the $wgMaxImageArea check result gives extension the possibility to transform it themselves $handler
Definition: hooks.txt:781
static numberingroup($group)
Find the number of users in a given user group.
Definition: SiteStats.php:183
=Architecture==Two class hierarchies are used to provide the functionality associated with the different content models:*Content interface(and AbstractContent base class) define functionality that acts on the concrete content of a page, and *ContentHandler base class provides functionality specific to a content model, but not acting on concrete content.The most important function of ContentHandler is to act as a factory for the appropriate implementation of Content.These Content objects are to be used by MediaWiki everywhere, instead of passing page content around as text.All manipulation and analysis of page content must be done via the appropriate methods of the Content object.For each content model, a subclass of ContentHandler has to be registered with $wgContentHandlers.The ContentHandler object for a given content model can be obtained using ContentHandler::getForModelID($id).Also Title, WikiPage and Revision now have getContentHandler() methods for convenience.ContentHandler objects are singletons that provide functionality specific to the content type, but not directly acting on the content of some page.ContentHandler::makeEmptyContent() and ContentHandler::unserializeContent() can be used to create a Content object of the appropriate type.However, it is recommended to instead use WikiPage::getContent() resp.Revision::getContent() to get a page's content as a Content object.These two methods should be the ONLY way in which page content is accessed.Another important function of ContentHandler objects is to define custom action handlers for a content model, see ContentHandler::getActionOverrides().This is similar to what WikiPage::getActionOverrides() was already doing.==Serialization==With the ContentHandler facility, page content no longer has to be text based.Objects implementing the Content interface are used to represent and handle the content internally.For storage and data exchange, each content model supports at least one serialization format via ContentHandler::serializeContent($content).The list of supported formats for a given content model can be accessed using ContentHandler::getSupportedFormats().Content serialization formats are identified using MIME type like strings.The following formats are built in:*text/x-wiki-wikitext *text/javascript-for js pages *text/css-for css pages *text/plain-for future use, e.g.with plain text messages.*text/html-for future use, e.g.with plain html messages.*application/vnd.php.serialized-for future use with the api and for extensions *application/json-for future use with the api, and for use by extensions *application/xml-for future use with the api, and for use by extensions In PHP, use the corresponding CONTENT_FORMAT_XXX constant.Note that when using the API to access page content, especially action=edit, action=parse and action=query &prop=revisions, the model and format of the content should always be handled explicitly.Without that information, interpretation of the provided content is not reliable.The same applies to XML dumps generated via maintenance/dumpBackup.php or Special:Export.Also note that the API will provide encapsulated, serialized content-so if the API was called with format=json, and contentformat is also json(or rather, application/json), the page content is represented as a string containing an escaped json structure.Extensions that use JSON to serialize some types of page content may provide specialized API modules that allow access to that content in a more natural form.==Compatibility==The ContentHandler facility is introduced in a way that should allow all existing code to keep functioning at least for pages that contain wikitext or other text based content.However, a number of functions and hooks have been deprecated in favor of new versions that are aware of the page's content model, and will now generate warnings when used.Most importantly, the following functions have been deprecated:*Revisions::getText() is deprecated in favor Revisions::getContent()*WikiPage::getText() is deprecated in favor WikiPage::getContent() Also, the old Article::getContent()(which returns t