MediaWiki  master
Parser.php
Go to the documentation of this file.
1 <?php
25 
69 class Parser {
75  const VERSION = '1.6.4';
76 
82 
83  # Flags for Parser::setFunctionHook
84  const SFH_NO_HASH = 1;
85  const SFH_OBJECT_ARGS = 2;
86 
87  # Constants needed for external link processing
88  # Everything except bracket, space, or control characters
89  # \p{Zs} is unicode 'separator, space' category. It covers the space 0x20
90  # as well as U+3000 is IDEOGRAPHIC SPACE for bug 19052
91  const EXT_LINK_URL_CLASS = '[^][<>"\\x00-\\x20\\x7F\p{Zs}]';
92  # Simplified expression to match an IPv4 or IPv6 address, or
93  # at least one character of a host name (embeds EXT_LINK_URL_CLASS)
94  const EXT_LINK_ADDR = '(?:[0-9.]+|\\[(?i:[0-9a-f:.]+)\\]|[^][<>"\\x00-\\x20\\x7F\p{Zs}])';
95  # RegExp to make image URLs (embeds IPv6 part of EXT_LINK_ADDR)
96  // @codingStandardsIgnoreStart Generic.Files.LineLength
97  const EXT_IMAGE_REGEX = '/^(http:\/\/|https:\/\/)((?:\\[(?i:[0-9a-f:.]+)\\])?[^][<>"\\x00-\\x20\\x7F\p{Zs}]+)
98  \\/([A-Za-z0-9_.,~%\\-+&;#*?!=()@\\x80-\\xFF]+)\\.((?i)gif|png|jpg|jpeg)$/Sxu';
99  // @codingStandardsIgnoreEnd
100 
101  # Regular expression for a non-newline space
102  const SPACE_NOT_NL = '(?:\t|&nbsp;|&\#0*160;|&\#[Xx]0*[Aa]0;|\p{Zs})';
103 
104  # Flags for preprocessToDom
105  const PTD_FOR_INCLUSION = 1;
106 
107  # Allowed values for $this->mOutputType
108  # Parameter to startExternalParse().
109  const OT_HTML = 1; # like parse()
110  const OT_WIKI = 2; # like preSaveTransform()
112  const OT_MSG = 3;
113  const OT_PLAIN = 4; # like extractSections() - portions of the original are returned unchanged.
114 
132  const MARKER_SUFFIX = "-QINU`\"'\x7f";
133  const MARKER_PREFIX = "\x7f'\"`UNIQ-";
134 
135  # Markers used for wrapping the table of contents
136  const TOC_START = '<mw:toc>';
137  const TOC_END = '</mw:toc>';
138 
139  # Persistent:
140  public $mTagHooks = [];
142  public $mFunctionHooks = [];
143  public $mFunctionSynonyms = [ 0 => [], 1 => [] ];
144  public $mFunctionTagHooks = [];
145  public $mStripList = [];
146  public $mDefaultStripList = [];
147  public $mVarCache = [];
148  public $mImageParams = [];
150  public $mMarkerIndex = 0;
151  public $mFirstCall = true;
152 
153  # Initialised by initialiseVariables()
154 
158  public $mVariables;
159 
163  public $mSubstWords;
164  # Initialised in constructor
166 
167  # Initialized in getPreprocessor()
168 
170 
171  # Cleared with clearState():
172 
175  public $mOutput;
176  public $mAutonumber;
177 
181  public $mStripState;
182 
188 
189  public $mLinkID;
193  public $mExpensiveFunctionCount; # number of expensive parser function calls
195 
199  public $mUser; # User object; only used when doing pre-save transform
200 
201  # Temporary
202  # These are variables reset at least once per parse regardless of $clearState
203 
207  public $mOptions;
208 
212  public $mTitle; # Title context, used for self-link rendering and similar things
213  public $mOutputType; # Output type, one of the OT_xxx constants
214  public $ot; # Shortcut alias, see setOutputType()
215  public $mRevisionObject; # The revision object of the specified revision ID
216  public $mRevisionId; # ID to display in {{REVISIONID}} tags
217  public $mRevisionTimestamp; # The timestamp of the specified revision ID
218  public $mRevisionUser; # User to display in {{REVISIONUSER}} tag
219  public $mRevisionSize; # Size to display in {{REVISIONSIZE}} variable
220  public $mRevIdForTs; # The revision ID which was used to fetch the timestamp
221  public $mInputSize = false; # For {{PAGESIZE}} on current page.
222 
228 
235 
243 
248  public $mInParse = false;
249 
251  protected $mProfiler;
252 
256  protected $mLinkRenderer;
257 
261  public function __construct( $conf = [] ) {
262  $this->mConf = $conf;
263  $this->mUrlProtocols = wfUrlProtocols();
264  $this->mExtLinkBracketedRegex = '/\[(((?i)' . $this->mUrlProtocols . ')' .
265  self::EXT_LINK_ADDR .
266  self::EXT_LINK_URL_CLASS . '*)\p{Zs}*([^\]\\x00-\\x08\\x0a-\\x1F]*?)\]/Su';
267  if ( isset( $conf['preprocessorClass'] ) ) {
268  $this->mPreprocessorClass = $conf['preprocessorClass'];
269  } elseif ( defined( 'HPHP_VERSION' ) ) {
270  # Preprocessor_Hash is much faster than Preprocessor_DOM under HipHop
271  $this->mPreprocessorClass = 'Preprocessor_Hash';
272  } elseif ( extension_loaded( 'domxml' ) ) {
273  # PECL extension that conflicts with the core DOM extension (bug 13770)
274  wfDebug( "Warning: you have the obsolete domxml extension for PHP. Please remove it!\n" );
275  $this->mPreprocessorClass = 'Preprocessor_Hash';
276  } elseif ( extension_loaded( 'dom' ) ) {
277  $this->mPreprocessorClass = 'Preprocessor_DOM';
278  } else {
279  $this->mPreprocessorClass = 'Preprocessor_Hash';
280  }
281  wfDebug( __CLASS__ . ": using preprocessor: {$this->mPreprocessorClass}\n" );
282  }
283 
287  public function __destruct() {
288  if ( isset( $this->mLinkHolders ) ) {
289  unset( $this->mLinkHolders );
290  }
291  foreach ( $this as $name => $value ) {
292  unset( $this->$name );
293  }
294  }
295 
299  public function __clone() {
300  $this->mInParse = false;
301 
302  // Bug 56226: When you create a reference "to" an object field, that
303  // makes the object field itself be a reference too (until the other
304  // reference goes out of scope). When cloning, any field that's a
305  // reference is copied as a reference in the new object. Both of these
306  // are defined PHP5 behaviors, as inconvenient as it is for us when old
307  // hooks from PHP4 days are passing fields by reference.
308  foreach ( [ 'mStripState', 'mVarCache' ] as $k ) {
309  // Make a non-reference copy of the field, then rebind the field to
310  // reference the new copy.
311  $tmp = $this->$k;
312  $this->$k =& $tmp;
313  unset( $tmp );
314  }
315 
316  Hooks::run( 'ParserCloned', [ $this ] );
317  }
318 
322  public function firstCallInit() {
323  if ( !$this->mFirstCall ) {
324  return;
325  }
326  $this->mFirstCall = false;
327 
329  CoreTagHooks::register( $this );
330  $this->initialiseVariables();
331 
332  Hooks::run( 'ParserFirstCallInit', [ &$this ] );
333  }
334 
340  public function clearState() {
341  if ( $this->mFirstCall ) {
342  $this->firstCallInit();
343  }
344  $this->mOutput = new ParserOutput;
345  $this->mOptions->registerWatcher( [ $this->mOutput, 'recordOption' ] );
346  $this->mAutonumber = 0;
347  $this->mIncludeCount = [];
348  $this->mLinkHolders = new LinkHolderArray( $this );
349  $this->mLinkID = 0;
350  $this->mRevisionObject = $this->mRevisionTimestamp =
351  $this->mRevisionId = $this->mRevisionUser = $this->mRevisionSize = null;
352  $this->mVarCache = [];
353  $this->mUser = null;
354  $this->mLangLinkLanguages = [];
355  $this->currentRevisionCache = null;
356 
357  $this->mStripState = new StripState;
358 
359  # Clear these on every parse, bug 4549
360  $this->mTplRedirCache = $this->mTplDomCache = [];
361 
362  $this->mShowToc = true;
363  $this->mForceTocPosition = false;
364  $this->mIncludeSizes = [
365  'post-expand' => 0,
366  'arg' => 0,
367  ];
368  $this->mPPNodeCount = 0;
369  $this->mGeneratedPPNodeCount = 0;
370  $this->mHighestExpansionDepth = 0;
371  $this->mDefaultSort = false;
372  $this->mHeadings = [];
373  $this->mDoubleUnderscores = [];
374  $this->mExpensiveFunctionCount = 0;
375 
376  # Fix cloning
377  if ( isset( $this->mPreprocessor ) && $this->mPreprocessor->parser !== $this ) {
378  $this->mPreprocessor = null;
379  }
380 
381  $this->mProfiler = new SectionProfiler();
382 
383  Hooks::run( 'ParserClearState', [ &$this ] );
384  }
385 
398  public function parse(
400  $linestart = true, $clearState = true, $revid = null
401  ) {
408 
409  if ( $clearState ) {
410  // We use U+007F DELETE to construct strip markers, so we have to make
411  // sure that this character does not occur in the input text.
412  $text = strtr( $text, "\x7f", "?" );
413  $magicScopeVariable = $this->lock();
414  }
415 
416  $this->startParse( $title, $options, self::OT_HTML, $clearState );
417 
418  $this->currentRevisionCache = null;
419  $this->mInputSize = strlen( $text );
420  if ( $this->mOptions->getEnableLimitReport() ) {
421  $this->mOutput->resetParseStartTime();
422  }
423 
424  $oldRevisionId = $this->mRevisionId;
425  $oldRevisionObject = $this->mRevisionObject;
426  $oldRevisionTimestamp = $this->mRevisionTimestamp;
427  $oldRevisionUser = $this->mRevisionUser;
428  $oldRevisionSize = $this->mRevisionSize;
429  if ( $revid !== null ) {
430  $this->mRevisionId = $revid;
431  $this->mRevisionObject = null;
432  $this->mRevisionTimestamp = null;
433  $this->mRevisionUser = null;
434  $this->mRevisionSize = null;
435  }
436 
437  Hooks::run( 'ParserBeforeStrip', [ &$this, &$text, &$this->mStripState ] );
438  # No more strip!
439  Hooks::run( 'ParserAfterStrip', [ &$this, &$text, &$this->mStripState ] );
440  $text = $this->internalParse( $text );
441  Hooks::run( 'ParserAfterParse', [ &$this, &$text, &$this->mStripState ] );
442 
443  $text = $this->internalParseHalfParsed( $text, true, $linestart );
444 
452  if ( !( $options->getDisableTitleConversion()
453  || isset( $this->mDoubleUnderscores['nocontentconvert'] )
454  || isset( $this->mDoubleUnderscores['notitleconvert'] )
455  || $this->mOutput->getDisplayTitle() !== false )
456  ) {
457  $convruletitle = $this->getConverterLanguage()->getConvRuleTitle();
458  if ( $convruletitle ) {
459  $this->mOutput->setTitleText( $convruletitle );
460  } else {
461  $titleText = $this->getConverterLanguage()->convertTitle( $title );
462  $this->mOutput->setTitleText( $titleText );
463  }
464  }
465 
466  # Done parsing! Compute runtime adaptive expiry if set
467  $this->mOutput->finalizeAdaptiveCacheExpiry();
468 
469  # Warn if too many heavyweight parser functions were used
470  if ( $this->mExpensiveFunctionCount > $this->mOptions->getExpensiveParserFunctionLimit() ) {
471  $this->limitationWarn( 'expensive-parserfunction',
472  $this->mExpensiveFunctionCount,
473  $this->mOptions->getExpensiveParserFunctionLimit()
474  );
475  }
476 
477  # Information on include size limits, for the benefit of users who try to skirt them
478  if ( $this->mOptions->getEnableLimitReport() ) {
479  $max = $this->mOptions->getMaxIncludeSize();
480 
481  $cpuTime = $this->mOutput->getTimeSinceStart( 'cpu' );
482  if ( $cpuTime !== null ) {
483  $this->mOutput->setLimitReportData( 'limitreport-cputime',
484  sprintf( "%.3f", $cpuTime )
485  );
486  }
487 
488  $wallTime = $this->mOutput->getTimeSinceStart( 'wall' );
489  $this->mOutput->setLimitReportData( 'limitreport-walltime',
490  sprintf( "%.3f", $wallTime )
491  );
492 
493  $this->mOutput->setLimitReportData( 'limitreport-ppvisitednodes',
494  [ $this->mPPNodeCount, $this->mOptions->getMaxPPNodeCount() ]
495  );
496  $this->mOutput->setLimitReportData( 'limitreport-ppgeneratednodes',
497  [ $this->mGeneratedPPNodeCount, $this->mOptions->getMaxGeneratedPPNodeCount() ]
498  );
499  $this->mOutput->setLimitReportData( 'limitreport-postexpandincludesize',
500  [ $this->mIncludeSizes['post-expand'], $max ]
501  );
502  $this->mOutput->setLimitReportData( 'limitreport-templateargumentsize',
503  [ $this->mIncludeSizes['arg'], $max ]
504  );
505  $this->mOutput->setLimitReportData( 'limitreport-expansiondepth',
506  [ $this->mHighestExpansionDepth, $this->mOptions->getMaxPPExpandDepth() ]
507  );
508  $this->mOutput->setLimitReportData( 'limitreport-expensivefunctioncount',
509  [ $this->mExpensiveFunctionCount,
510  $this->mOptions->getExpensiveParserFunctionLimit() ]
511  );
512  Hooks::run( 'ParserLimitReportPrepare', [ $this, $this->mOutput ] );
513 
514  $limitReport = '';
515  Hooks::run( 'ParserLimitReport', [ $this, &$limitReport ] );
516  if ( $limitReport != '' ) {
517  // Sanitize for comment. Note '‐' in the replacement is U+2010,
518  // which looks much like the problematic '-'.
519  $limitReport = str_replace( [ '-', '&' ], [ '‐', '&amp;' ], $limitReport );
520  $text .= "\n<!-- \nNewPP limit report\n$limitReport-->\n";
521  }
522 
523  // Add on template profiling data in human/machine readable way
524  $dataByFunc = $this->mProfiler->getFunctionStats();
525  uasort( $dataByFunc, function ( $a, $b ) {
526  return $a['real'] < $b['real']; // descending order
527  } );
528  $profileReport = [];
529  foreach ( array_slice( $dataByFunc, 0, 10 ) as $item ) {
530  $profileReport[] = sprintf( "%6.2f%% %8.3f %6d %s",
531  $item['%real'], $item['real'], $item['calls'], $item['name'] );
532  }
533  $this->mOutput->setLimitReportData( 'limitreport-timingprofile', $profileReport );
534 
535  // Add other cache related metadata
536  if ( $wgShowHostnames ) {
537  $this->mOutput->setLimitReportData( 'cachereport-origin', wfHostname() );
538  }
539  $this->mOutput->setLimitReportData( 'cachereport-timestamp',
540  $this->mOutput->getCacheTime() );
541  $this->mOutput->setLimitReportData( 'cachereport-ttl',
542  $this->mOutput->getCacheExpiry() );
543  $this->mOutput->setLimitReportData( 'cachereport-transientcontent',
544  $this->mOutput->hasDynamicContent() );
545 
546  if ( $this->mGeneratedPPNodeCount
547  > $this->mOptions->getMaxGeneratedPPNodeCount() / 10
548  ) {
549  wfDebugLog( 'generated-pp-node-count', $this->mGeneratedPPNodeCount . ' ' .
550  $this->mTitle->getPrefixedDBkey() );
551  }
552  }
553  $this->mOutput->setText( $text );
554 
555  $this->mRevisionId = $oldRevisionId;
556  $this->mRevisionObject = $oldRevisionObject;
557  $this->mRevisionTimestamp = $oldRevisionTimestamp;
558  $this->mRevisionUser = $oldRevisionUser;
559  $this->mRevisionSize = $oldRevisionSize;
560  $this->mInputSize = false;
561  $this->currentRevisionCache = null;
562 
563  return $this->mOutput;
564  }
565 
588  public function recursiveTagParse( $text, $frame = false ) {
589  Hooks::run( 'ParserBeforeStrip', [ &$this, &$text, &$this->mStripState ] );
590  Hooks::run( 'ParserAfterStrip', [ &$this, &$text, &$this->mStripState ] );
591  $text = $this->internalParse( $text, false, $frame );
592  return $text;
593  }
594 
612  public function recursiveTagParseFully( $text, $frame = false ) {
613  $text = $this->recursiveTagParse( $text, $frame );
614  $text = $this->internalParseHalfParsed( $text, false );
615  return $text;
616  }
617 
629  public function preprocess( $text, Title $title = null,
630  ParserOptions $options, $revid = null, $frame = false
631  ) {
632  $magicScopeVariable = $this->lock();
633  $this->startParse( $title, $options, self::OT_PREPROCESS, true );
634  if ( $revid !== null ) {
635  $this->mRevisionId = $revid;
636  }
637  Hooks::run( 'ParserBeforeStrip', [ &$this, &$text, &$this->mStripState ] );
638  Hooks::run( 'ParserAfterStrip', [ &$this, &$text, &$this->mStripState ] );
639  $text = $this->replaceVariables( $text, $frame );
640  $text = $this->mStripState->unstripBoth( $text );
641  return $text;
642  }
643 
653  public function recursivePreprocess( $text, $frame = false ) {
654  $text = $this->replaceVariables( $text, $frame );
655  $text = $this->mStripState->unstripBoth( $text );
656  return $text;
657  }
658 
672  public function getPreloadText( $text, Title $title, ParserOptions $options, $params = [] ) {
673  $msg = new RawMessage( $text );
674  $text = $msg->params( $params )->plain();
675 
676  # Parser (re)initialisation
677  $magicScopeVariable = $this->lock();
678  $this->startParse( $title, $options, self::OT_PLAIN, true );
679 
681  $dom = $this->preprocessToDom( $text, self::PTD_FOR_INCLUSION );
682  $text = $this->getPreprocessor()->newFrame()->expand( $dom, $flags );
683  $text = $this->mStripState->unstripBoth( $text );
684  return $text;
685  }
686 
693  public static function getRandomString() {
694  wfDeprecated( __METHOD__, '1.26' );
695  return wfRandomString( 16 );
696  }
697 
704  public function setUser( $user ) {
705  $this->mUser = $user;
706  }
707 
714  public function uniqPrefix() {
715  wfDeprecated( __METHOD__, '1.26' );
716  return self::MARKER_PREFIX;
717  }
718 
724  public function setTitle( $t ) {
725  if ( !$t ) {
726  $t = Title::newFromText( 'NO TITLE' );
727  }
728 
729  if ( $t->hasFragment() ) {
730  # Strip the fragment to avoid various odd effects
731  $this->mTitle = $t->createFragmentTarget( '' );
732  } else {
733  $this->mTitle = $t;
734  }
735  }
736 
742  public function getTitle() {
743  return $this->mTitle;
744  }
745 
752  public function Title( $x = null ) {
753  return wfSetVar( $this->mTitle, $x );
754  }
755 
761  public function setOutputType( $ot ) {
762  $this->mOutputType = $ot;
763  # Shortcut alias
764  $this->ot = [
765  'html' => $ot == self::OT_HTML,
766  'wiki' => $ot == self::OT_WIKI,
767  'pre' => $ot == self::OT_PREPROCESS,
768  'plain' => $ot == self::OT_PLAIN,
769  ];
770  }
771 
778  public function OutputType( $x = null ) {
779  return wfSetVar( $this->mOutputType, $x );
780  }
781 
787  public function getOutput() {
788  return $this->mOutput;
789  }
790 
796  public function getOptions() {
797  return $this->mOptions;
798  }
799 
806  public function Options( $x = null ) {
807  return wfSetVar( $this->mOptions, $x );
808  }
809 
813  public function nextLinkID() {
814  return $this->mLinkID++;
815  }
816 
820  public function setLinkID( $id ) {
821  $this->mLinkID = $id;
822  }
823 
828  public function getFunctionLang() {
829  return $this->getTargetLanguage();
830  }
831 
841  public function getTargetLanguage() {
842  $target = $this->mOptions->getTargetLanguage();
843 
844  if ( $target !== null ) {
845  return $target;
846  } elseif ( $this->mOptions->getInterfaceMessage() ) {
847  return $this->mOptions->getUserLangObj();
848  } elseif ( is_null( $this->mTitle ) ) {
849  throw new MWException( __METHOD__ . ': $this->mTitle is null' );
850  }
851 
852  return $this->mTitle->getPageLanguage();
853  }
854 
859  public function getConverterLanguage() {
860  return $this->getTargetLanguage();
861  }
862 
869  public function getUser() {
870  if ( !is_null( $this->mUser ) ) {
871  return $this->mUser;
872  }
873  return $this->mOptions->getUser();
874  }
875 
881  public function getPreprocessor() {
882  if ( !isset( $this->mPreprocessor ) ) {
883  $class = $this->mPreprocessorClass;
884  $this->mPreprocessor = new $class( $this );
885  }
886  return $this->mPreprocessor;
887  }
888 
895  public function getLinkRenderer() {
896  if ( !$this->mLinkRenderer ) {
897  $this->mLinkRenderer = MediaWikiServices::getInstance()
898  ->getLinkRendererFactory()->create();
899  $this->mLinkRenderer->setStubThreshold(
900  $this->getOptions()->getStubThreshold()
901  );
902  }
903 
904  return $this->mLinkRenderer;
905  }
906 
928  public static function extractTagsAndParams( $elements, $text, &$matches, $uniq_prefix = null ) {
929  if ( $uniq_prefix !== null ) {
930  wfDeprecated( __METHOD__ . ' called with $prefix argument', '1.26' );
931  }
932  static $n = 1;
933  $stripped = '';
934  $matches = [];
935 
936  $taglist = implode( '|', $elements );
937  $start = "/<($taglist)(\\s+[^>]*?|\\s*?)(\/?" . ">)|<(!--)/i";
938 
939  while ( $text != '' ) {
940  $p = preg_split( $start, $text, 2, PREG_SPLIT_DELIM_CAPTURE );
941  $stripped .= $p[0];
942  if ( count( $p ) < 5 ) {
943  break;
944  }
945  if ( count( $p ) > 5 ) {
946  # comment
947  $element = $p[4];
948  $attributes = '';
949  $close = '';
950  $inside = $p[5];
951  } else {
952  # tag
953  $element = $p[1];
954  $attributes = $p[2];
955  $close = $p[3];
956  $inside = $p[4];
957  }
958 
959  $marker = self::MARKER_PREFIX . "-$element-" . sprintf( '%08X', $n++ ) . self::MARKER_SUFFIX;
960  $stripped .= $marker;
961 
962  if ( $close === '/>' ) {
963  # Empty element tag, <tag />
964  $content = null;
965  $text = $inside;
966  $tail = null;
967  } else {
968  if ( $element === '!--' ) {
969  $end = '/(-->)/';
970  } else {
971  $end = "/(<\\/$element\\s*>)/i";
972  }
973  $q = preg_split( $end, $inside, 2, PREG_SPLIT_DELIM_CAPTURE );
974  $content = $q[0];
975  if ( count( $q ) < 3 ) {
976  # No end tag -- let it run out to the end of the text.
977  $tail = '';
978  $text = '';
979  } else {
980  $tail = $q[1];
981  $text = $q[2];
982  }
983  }
984 
985  $matches[$marker] = [ $element,
986  $content,
987  Sanitizer::decodeTagAttributes( $attributes ),
988  "<$element$attributes$close$content$tail" ];
989  }
990  return $stripped;
991  }
992 
998  public function getStripList() {
999  return $this->mStripList;
1000  }
1001 
1011  public function insertStripItem( $text ) {
1012  $marker = self::MARKER_PREFIX . "-item-{$this->mMarkerIndex}-" . self::MARKER_SUFFIX;
1013  $this->mMarkerIndex++;
1014  $this->mStripState->addGeneral( $marker, $text );
1015  return $marker;
1016  }
1017 
1025  public function doTableStuff( $text ) {
1026 
1027  $lines = StringUtils::explode( "\n", $text );
1028  $out = '';
1029  $td_history = []; # Is currently a td tag open?
1030  $last_tag_history = []; # Save history of last lag activated (td, th or caption)
1031  $tr_history = []; # Is currently a tr tag open?
1032  $tr_attributes = []; # history of tr attributes
1033  $has_opened_tr = []; # Did this table open a <tr> element?
1034  $indent_level = 0; # indent level of the table
1035 
1036  foreach ( $lines as $outLine ) {
1037  $line = trim( $outLine );
1038 
1039  if ( $line === '' ) { # empty line, go to next line
1040  $out .= $outLine . "\n";
1041  continue;
1042  }
1043 
1044  $first_character = $line[0];
1045  $first_two = substr( $line, 0, 2 );
1046  $matches = [];
1047 
1048  if ( preg_match( '/^(:*)\s*\{\|(.*)$/', $line, $matches ) ) {
1049  # First check if we are starting a new table
1050  $indent_level = strlen( $matches[1] );
1051 
1052  $attributes = $this->mStripState->unstripBoth( $matches[2] );
1053  $attributes = Sanitizer::fixTagAttributes( $attributes, 'table' );
1054 
1055  $outLine = str_repeat( '<dl><dd>', $indent_level ) . "<table{$attributes}>";
1056  array_push( $td_history, false );
1057  array_push( $last_tag_history, '' );
1058  array_push( $tr_history, false );
1059  array_push( $tr_attributes, '' );
1060  array_push( $has_opened_tr, false );
1061  } elseif ( count( $td_history ) == 0 ) {
1062  # Don't do any of the following
1063  $out .= $outLine . "\n";
1064  continue;
1065  } elseif ( $first_two === '|}' ) {
1066  # We are ending a table
1067  $line = '</table>' . substr( $line, 2 );
1068  $last_tag = array_pop( $last_tag_history );
1069 
1070  if ( !array_pop( $has_opened_tr ) ) {
1071  $line = "<tr><td></td></tr>{$line}";
1072  }
1073 
1074  if ( array_pop( $tr_history ) ) {
1075  $line = "</tr>{$line}";
1076  }
1077 
1078  if ( array_pop( $td_history ) ) {
1079  $line = "</{$last_tag}>{$line}";
1080  }
1081  array_pop( $tr_attributes );
1082  $outLine = $line . str_repeat( '</dd></dl>', $indent_level );
1083  } elseif ( $first_two === '|-' ) {
1084  # Now we have a table row
1085  $line = preg_replace( '#^\|-+#', '', $line );
1086 
1087  # Whats after the tag is now only attributes
1088  $attributes = $this->mStripState->unstripBoth( $line );
1089  $attributes = Sanitizer::fixTagAttributes( $attributes, 'tr' );
1090  array_pop( $tr_attributes );
1091  array_push( $tr_attributes, $attributes );
1092 
1093  $line = '';
1094  $last_tag = array_pop( $last_tag_history );
1095  array_pop( $has_opened_tr );
1096  array_push( $has_opened_tr, true );
1097 
1098  if ( array_pop( $tr_history ) ) {
1099  $line = '</tr>';
1100  }
1101 
1102  if ( array_pop( $td_history ) ) {
1103  $line = "</{$last_tag}>{$line}";
1104  }
1105 
1106  $outLine = $line;
1107  array_push( $tr_history, false );
1108  array_push( $td_history, false );
1109  array_push( $last_tag_history, '' );
1110  } elseif ( $first_character === '|'
1111  || $first_character === '!'
1112  || $first_two === '|+'
1113  ) {
1114  # This might be cell elements, td, th or captions
1115  if ( $first_two === '|+' ) {
1116  $first_character = '+';
1117  $line = substr( $line, 2 );
1118  } else {
1119  $line = substr( $line, 1 );
1120  }
1121 
1122  // Implies both are valid for table headings.
1123  if ( $first_character === '!' ) {
1124  $line = StringUtils::replaceMarkup( '!!', '||', $line );
1125  }
1126 
1127  # Split up multiple cells on the same line.
1128  # FIXME : This can result in improper nesting of tags processed
1129  # by earlier parser steps.
1130  $cells = explode( '||', $line );
1131 
1132  $outLine = '';
1133 
1134  # Loop through each table cell
1135  foreach ( $cells as $cell ) {
1136  $previous = '';
1137  if ( $first_character !== '+' ) {
1138  $tr_after = array_pop( $tr_attributes );
1139  if ( !array_pop( $tr_history ) ) {
1140  $previous = "<tr{$tr_after}>\n";
1141  }
1142  array_push( $tr_history, true );
1143  array_push( $tr_attributes, '' );
1144  array_pop( $has_opened_tr );
1145  array_push( $has_opened_tr, true );
1146  }
1147 
1148  $last_tag = array_pop( $last_tag_history );
1149 
1150  if ( array_pop( $td_history ) ) {
1151  $previous = "</{$last_tag}>\n{$previous}";
1152  }
1153 
1154  if ( $first_character === '|' ) {
1155  $last_tag = 'td';
1156  } elseif ( $first_character === '!' ) {
1157  $last_tag = 'th';
1158  } elseif ( $first_character === '+' ) {
1159  $last_tag = 'caption';
1160  } else {
1161  $last_tag = '';
1162  }
1163 
1164  array_push( $last_tag_history, $last_tag );
1165 
1166  # A cell could contain both parameters and data
1167  $cell_data = explode( '|', $cell, 2 );
1168 
1169  # Bug 553: Note that a '|' inside an invalid link should not
1170  # be mistaken as delimiting cell parameters
1171  if ( strpos( $cell_data[0], '[[' ) !== false ) {
1172  $cell = "{$previous}<{$last_tag}>{$cell}";
1173  } elseif ( count( $cell_data ) == 1 ) {
1174  $cell = "{$previous}<{$last_tag}>{$cell_data[0]}";
1175  } else {
1176  $attributes = $this->mStripState->unstripBoth( $cell_data[0] );
1177  $attributes = Sanitizer::fixTagAttributes( $attributes, $last_tag );
1178  $cell = "{$previous}<{$last_tag}{$attributes}>{$cell_data[1]}";
1179  }
1180 
1181  $outLine .= $cell;
1182  array_push( $td_history, true );
1183  }
1184  }
1185  $out .= $outLine . "\n";
1186  }
1187 
1188  # Closing open td, tr && table
1189  while ( count( $td_history ) > 0 ) {
1190  if ( array_pop( $td_history ) ) {
1191  $out .= "</td>\n";
1192  }
1193  if ( array_pop( $tr_history ) ) {
1194  $out .= "</tr>\n";
1195  }
1196  if ( !array_pop( $has_opened_tr ) ) {
1197  $out .= "<tr><td></td></tr>\n";
1198  }
1199 
1200  $out .= "</table>\n";
1201  }
1202 
1203  # Remove trailing line-ending (b/c)
1204  if ( substr( $out, -1 ) === "\n" ) {
1205  $out = substr( $out, 0, -1 );
1206  }
1207 
1208  # special case: don't return empty table
1209  if ( $out === "<table>\n<tr><td></td></tr>\n</table>" ) {
1210  $out = '';
1211  }
1212 
1213  return $out;
1214  }
1215 
1228  public function internalParse( $text, $isMain = true, $frame = false ) {
1229 
1230  $origText = $text;
1231 
1232  # Hook to suspend the parser in this state
1233  if ( !Hooks::run( 'ParserBeforeInternalParse', [ &$this, &$text, &$this->mStripState ] ) ) {
1234  return $text;
1235  }
1236 
1237  # if $frame is provided, then use $frame for replacing any variables
1238  if ( $frame ) {
1239  # use frame depth to infer how include/noinclude tags should be handled
1240  # depth=0 means this is the top-level document; otherwise it's an included document
1241  if ( !$frame->depth ) {
1242  $flag = 0;
1243  } else {
1244  $flag = Parser::PTD_FOR_INCLUSION;
1245  }
1246  $dom = $this->preprocessToDom( $text, $flag );
1247  $text = $frame->expand( $dom );
1248  } else {
1249  # if $frame is not provided, then use old-style replaceVariables
1250  $text = $this->replaceVariables( $text );
1251  }
1252 
1253  Hooks::run( 'InternalParseBeforeSanitize', [ &$this, &$text, &$this->mStripState ] );
1254  $text = Sanitizer::removeHTMLtags(
1255  $text,
1256  [ &$this, 'attributeStripCallback' ],
1257  false,
1258  array_keys( $this->mTransparentTagHooks ),
1259  [],
1260  [ &$this, 'addTrackingCategory' ]
1261  );
1262  Hooks::run( 'InternalParseBeforeLinks', [ &$this, &$text, &$this->mStripState ] );
1263 
1264  # Tables need to come after variable replacement for things to work
1265  # properly; putting them before other transformations should keep
1266  # exciting things like link expansions from showing up in surprising
1267  # places.
1268  $text = $this->doTableStuff( $text );
1269 
1270  $text = preg_replace( '/(^|\n)-----*/', '\\1<hr />', $text );
1271 
1272  $text = $this->doDoubleUnderscore( $text );
1273 
1274  $text = $this->doHeadings( $text );
1275  $text = $this->replaceInternalLinks( $text );
1276  $text = $this->doAllQuotes( $text );
1277  $text = $this->replaceExternalLinks( $text );
1278 
1279  # replaceInternalLinks may sometimes leave behind
1280  # absolute URLs, which have to be masked to hide them from replaceExternalLinks
1281  $text = str_replace( self::MARKER_PREFIX . 'NOPARSE', '', $text );
1282 
1283  $text = $this->doMagicLinks( $text );
1284  $text = $this->formatHeadings( $text, $origText, $isMain );
1285 
1286  return $text;
1287  }
1288 
1298  private function internalParseHalfParsed( $text, $isMain = true, $linestart = true ) {
1299  $text = $this->mStripState->unstripGeneral( $text );
1300 
1301  if ( $isMain ) {
1302  Hooks::run( 'ParserAfterUnstrip', [ &$this, &$text ] );
1303  }
1304 
1305  # Clean up special characters, only run once, next-to-last before doBlockLevels
1306  $fixtags = [
1307  # french spaces, last one Guillemet-left
1308  # only if there is something before the space
1309  '/(.) (?=\\?|:|;|!|%|\\302\\273)/' => '\\1&#160;',
1310  # french spaces, Guillemet-right
1311  '/(\\302\\253) /' => '\\1&#160;',
1312  '/&#160;(!\s*important)/' => ' \\1', # Beware of CSS magic word !important, bug #11874.
1313  ];
1314  $text = preg_replace( array_keys( $fixtags ), array_values( $fixtags ), $text );
1315 
1316  $text = $this->doBlockLevels( $text, $linestart );
1317 
1318  $this->replaceLinkHolders( $text );
1319 
1327  if ( !( $this->mOptions->getDisableContentConversion()
1328  || isset( $this->mDoubleUnderscores['nocontentconvert'] ) )
1329  ) {
1330  if ( !$this->mOptions->getInterfaceMessage() ) {
1331  # The position of the convert() call should not be changed. it
1332  # assumes that the links are all replaced and the only thing left
1333  # is the <nowiki> mark.
1334  $text = $this->getConverterLanguage()->convert( $text );
1335  }
1336  }
1337 
1338  $text = $this->mStripState->unstripNoWiki( $text );
1339 
1340  if ( $isMain ) {
1341  Hooks::run( 'ParserBeforeTidy', [ &$this, &$text ] );
1342  }
1343 
1344  $text = $this->replaceTransparentTags( $text );
1345  $text = $this->mStripState->unstripGeneral( $text );
1346 
1347  $text = Sanitizer::normalizeCharReferences( $text );
1348 
1349  if ( MWTidy::isEnabled() ) {
1350  if ( $this->mOptions->getTidy() ) {
1351  $text = MWTidy::tidy( $text );
1352  }
1353  } else {
1354  # attempt to sanitize at least some nesting problems
1355  # (bug #2702 and quite a few others)
1356  $tidyregs = [
1357  # ''Something [http://www.cool.com cool''] -->
1358  # <i>Something</i><a href="http://www.cool.com"..><i>cool></i></a>
1359  '/(<([bi])>)(<([bi])>)?([^<]*)(<\/?a[^<]*>)([^<]*)(<\/\\4>)?(<\/\\2>)/' =>
1360  '\\1\\3\\5\\8\\9\\6\\1\\3\\7\\8\\9',
1361  # fix up an anchor inside another anchor, only
1362  # at least for a single single nested link (bug 3695)
1363  '/(<a[^>]+>)([^<]*)(<a[^>]+>[^<]*)<\/a>(.*)<\/a>/' =>
1364  '\\1\\2</a>\\3</a>\\1\\4</a>',
1365  # fix div inside inline elements- doBlockLevels won't wrap a line which
1366  # contains a div, so fix it up here; replace
1367  # div with escaped text
1368  '/(<([aib]) [^>]+>)([^<]*)(<div([^>]*)>)(.*)(<\/div>)([^<]*)(<\/\\2>)/' =>
1369  '\\1\\3&lt;div\\5&gt;\\6&lt;/div&gt;\\8\\9',
1370  # remove empty italic or bold tag pairs, some
1371  # introduced by rules above
1372  '/<([bi])><\/\\1>/' => '',
1373  ];
1374 
1375  $text = preg_replace(
1376  array_keys( $tidyregs ),
1377  array_values( $tidyregs ),
1378  $text );
1379  }
1380 
1381  if ( $isMain ) {
1382  Hooks::run( 'ParserAfterTidy', [ &$this, &$text ] );
1383  }
1384 
1385  return $text;
1386  }
1387 
1399  public function doMagicLinks( $text ) {
1400  $prots = wfUrlProtocolsWithoutProtRel();
1401  $urlChar = self::EXT_LINK_URL_CLASS;
1402  $addr = self::EXT_LINK_ADDR;
1403  $space = self::SPACE_NOT_NL; # non-newline space
1404  $spdash = "(?:-|$space)"; # a dash or a non-newline space
1405  $spaces = "$space++"; # possessive match of 1 or more spaces
1406  $text = preg_replace_callback(
1407  '!(?: # Start cases
1408  (<a[ \t\r\n>].*?</a>) | # m[1]: Skip link text
1409  (<.*?>) | # m[2]: Skip stuff inside
1410  # HTML elements' . "
1411  (\b(?i:$prots)($addr$urlChar*)) | # m[3]: Free external links
1412  # m[4]: Post-protocol path
1413  \b(?:RFC|PMID) $spaces # m[5]: RFC or PMID, capture number
1414  ([0-9]+)\b |
1415  \bISBN $spaces ( # m[6]: ISBN, capture number
1416  (?: 97[89] $spdash? )? # optional 13-digit ISBN prefix
1417  (?: [0-9] $spdash? ){9} # 9 digits with opt. delimiters
1418  [0-9Xx] # check digit
1419  )\b
1420  )!xu", [ &$this, 'magicLinkCallback' ], $text );
1421  return $text;
1422  }
1423 
1429  public function magicLinkCallback( $m ) {
1430  if ( isset( $m[1] ) && $m[1] !== '' ) {
1431  # Skip anchor
1432  return $m[0];
1433  } elseif ( isset( $m[2] ) && $m[2] !== '' ) {
1434  # Skip HTML element
1435  return $m[0];
1436  } elseif ( isset( $m[3] ) && $m[3] !== '' ) {
1437  # Free external link
1438  return $this->makeFreeExternalLink( $m[0], strlen( $m[4] ) );
1439  } elseif ( isset( $m[5] ) && $m[5] !== '' ) {
1440  # RFC or PMID
1441  if ( substr( $m[0], 0, 3 ) === 'RFC' ) {
1442  if ( !$this->mOptions->getMagicRFCLinks() ) {
1443  return $m[0];
1444  }
1445  $keyword = 'RFC';
1446  $urlmsg = 'rfcurl';
1447  $cssClass = 'mw-magiclink-rfc';
1448  $id = $m[5];
1449  } elseif ( substr( $m[0], 0, 4 ) === 'PMID' ) {
1450  if ( !$this->mOptions->getMagicPMIDLinks() ) {
1451  return $m[0];
1452  }
1453  $keyword = 'PMID';
1454  $urlmsg = 'pubmedurl';
1455  $cssClass = 'mw-magiclink-pmid';
1456  $id = $m[5];
1457  } else {
1458  throw new MWException( __METHOD__ . ': unrecognised match type "' .
1459  substr( $m[0], 0, 20 ) . '"' );
1460  }
1461  $url = wfMessage( $urlmsg, $id )->inContentLanguage()->text();
1462  return Linker::makeExternalLink( $url, "{$keyword} {$id}", true, $cssClass, [], $this->mTitle );
1463  } elseif ( isset( $m[6] ) && $m[6] !== ''
1464  && $this->mOptions->getMagicISBNLinks()
1465  ) {
1466  # ISBN
1467  $isbn = $m[6];
1468  $space = self::SPACE_NOT_NL; # non-newline space
1469  $isbn = preg_replace( "/$space/", ' ', $isbn );
1470  $num = strtr( $isbn, [
1471  '-' => '',
1472  ' ' => '',
1473  'x' => 'X',
1474  ] );
1475  return $this->getLinkRenderer()->makeKnownLink(
1476  SpecialPage::getTitleFor( 'Booksources', $num ),
1477  "ISBN $isbn",
1478  [
1479  'class' => 'internal mw-magiclink-isbn',
1480  'title' => false // suppress title attribute
1481  ]
1482  );
1483  } else {
1484  return $m[0];
1485  }
1486  }
1487 
1497  public function makeFreeExternalLink( $url, $numPostProto ) {
1498  $trail = '';
1499 
1500  # The characters '<' and '>' (which were escaped by
1501  # removeHTMLtags()) should not be included in
1502  # URLs, per RFC 2396.
1503  # Make &nbsp; terminate a URL as well (bug T84937)
1504  $m2 = [];
1505  if ( preg_match(
1506  '/&(lt|gt|nbsp|#x0*(3[CcEe]|[Aa]0)|#0*(60|62|160));/',
1507  $url,
1508  $m2,
1509  PREG_OFFSET_CAPTURE
1510  ) ) {
1511  $trail = substr( $url, $m2[0][1] ) . $trail;
1512  $url = substr( $url, 0, $m2[0][1] );
1513  }
1514 
1515  # Move trailing punctuation to $trail
1516  $sep = ',;\.:!?';
1517  # If there is no left bracket, then consider right brackets fair game too
1518  if ( strpos( $url, '(' ) === false ) {
1519  $sep .= ')';
1520  }
1521 
1522  $urlRev = strrev( $url );
1523  $numSepChars = strspn( $urlRev, $sep );
1524  # Don't break a trailing HTML entity by moving the ; into $trail
1525  # This is in hot code, so use substr_compare to avoid having to
1526  # create a new string object for the comparison
1527  if ( $numSepChars && substr_compare( $url, ";", -$numSepChars, 1 ) === 0 ) {
1528  # more optimization: instead of running preg_match with a $
1529  # anchor, which can be slow, do the match on the reversed
1530  # string starting at the desired offset.
1531  # un-reversed regexp is: /&([a-z]+|#x[\da-f]+|#\d+)$/i
1532  if ( preg_match( '/\G([a-z]+|[\da-f]+x#|\d+#)&/i', $urlRev, $m2, 0, $numSepChars ) ) {
1533  $numSepChars--;
1534  }
1535  }
1536  if ( $numSepChars ) {
1537  $trail = substr( $url, -$numSepChars ) . $trail;
1538  $url = substr( $url, 0, -$numSepChars );
1539  }
1540 
1541  # Verify that we still have a real URL after trail removal, and
1542  # not just lone protocol
1543  if ( strlen( $trail ) >= $numPostProto ) {
1544  return $url . $trail;
1545  }
1546 
1547  $url = Sanitizer::cleanUrl( $url );
1548 
1549  # Is this an external image?
1550  $text = $this->maybeMakeExternalImage( $url );
1551  if ( $text === false ) {
1552  # Not an image, make a link
1553  $text = Linker::makeExternalLink( $url,
1554  $this->getConverterLanguage()->markNoConversion( $url, true ),
1555  true, 'free',
1556  $this->getExternalLinkAttribs( $url ), $this->mTitle );
1557  # Register it in the output object...
1558  # Replace unnecessary URL escape codes with their equivalent characters
1559  $pasteurized = self::normalizeLinkUrl( $url );
1560  $this->mOutput->addExternalLink( $pasteurized );
1561  }
1562  return $text . $trail;
1563  }
1564 
1574  public function doHeadings( $text ) {
1575  for ( $i = 6; $i >= 1; --$i ) {
1576  $h = str_repeat( '=', $i );
1577  $text = preg_replace( "/^$h(.+)$h\\s*$/m", "<h$i>\\1</h$i>", $text );
1578  }
1579  return $text;
1580  }
1581 
1590  public function doAllQuotes( $text ) {
1591  $outtext = '';
1592  $lines = StringUtils::explode( "\n", $text );
1593  foreach ( $lines as $line ) {
1594  $outtext .= $this->doQuotes( $line ) . "\n";
1595  }
1596  $outtext = substr( $outtext, 0, -1 );
1597  return $outtext;
1598  }
1599 
1607  public function doQuotes( $text ) {
1608  $arr = preg_split( "/(''+)/", $text, -1, PREG_SPLIT_DELIM_CAPTURE );
1609  $countarr = count( $arr );
1610  if ( $countarr == 1 ) {
1611  return $text;
1612  }
1613 
1614  // First, do some preliminary work. This may shift some apostrophes from
1615  // being mark-up to being text. It also counts the number of occurrences
1616  // of bold and italics mark-ups.
1617  $numbold = 0;
1618  $numitalics = 0;
1619  for ( $i = 1; $i < $countarr; $i += 2 ) {
1620  $thislen = strlen( $arr[$i] );
1621  // If there are ever four apostrophes, assume the first is supposed to
1622  // be text, and the remaining three constitute mark-up for bold text.
1623  // (bug 13227: ''''foo'''' turns into ' ''' foo ' ''')
1624  if ( $thislen == 4 ) {
1625  $arr[$i - 1] .= "'";
1626  $arr[$i] = "'''";
1627  $thislen = 3;
1628  } elseif ( $thislen > 5 ) {
1629  // If there are more than 5 apostrophes in a row, assume they're all
1630  // text except for the last 5.
1631  // (bug 13227: ''''''foo'''''' turns into ' ''''' foo ' ''''')
1632  $arr[$i - 1] .= str_repeat( "'", $thislen - 5 );
1633  $arr[$i] = "'''''";
1634  $thislen = 5;
1635  }
1636  // Count the number of occurrences of bold and italics mark-ups.
1637  if ( $thislen == 2 ) {
1638  $numitalics++;
1639  } elseif ( $thislen == 3 ) {
1640  $numbold++;
1641  } elseif ( $thislen == 5 ) {
1642  $numitalics++;
1643  $numbold++;
1644  }
1645  }
1646 
1647  // If there is an odd number of both bold and italics, it is likely
1648  // that one of the bold ones was meant to be an apostrophe followed
1649  // by italics. Which one we cannot know for certain, but it is more
1650  // likely to be one that has a single-letter word before it.
1651  if ( ( $numbold % 2 == 1 ) && ( $numitalics % 2 == 1 ) ) {
1652  $firstsingleletterword = -1;
1653  $firstmultiletterword = -1;
1654  $firstspace = -1;
1655  for ( $i = 1; $i < $countarr; $i += 2 ) {
1656  if ( strlen( $arr[$i] ) == 3 ) {
1657  $x1 = substr( $arr[$i - 1], -1 );
1658  $x2 = substr( $arr[$i - 1], -2, 1 );
1659  if ( $x1 === ' ' ) {
1660  if ( $firstspace == -1 ) {
1661  $firstspace = $i;
1662  }
1663  } elseif ( $x2 === ' ' ) {
1664  $firstsingleletterword = $i;
1665  // if $firstsingleletterword is set, we don't
1666  // look at the other options, so we can bail early.
1667  break;
1668  } else {
1669  if ( $firstmultiletterword == -1 ) {
1670  $firstmultiletterword = $i;
1671  }
1672  }
1673  }
1674  }
1675 
1676  // If there is a single-letter word, use it!
1677  if ( $firstsingleletterword > -1 ) {
1678  $arr[$firstsingleletterword] = "''";
1679  $arr[$firstsingleletterword - 1] .= "'";
1680  } elseif ( $firstmultiletterword > -1 ) {
1681  // If not, but there's a multi-letter word, use that one.
1682  $arr[$firstmultiletterword] = "''";
1683  $arr[$firstmultiletterword - 1] .= "'";
1684  } elseif ( $firstspace > -1 ) {
1685  // ... otherwise use the first one that has neither.
1686  // (notice that it is possible for all three to be -1 if, for example,
1687  // there is only one pentuple-apostrophe in the line)
1688  $arr[$firstspace] = "''";
1689  $arr[$firstspace - 1] .= "'";
1690  }
1691  }
1692 
1693  // Now let's actually convert our apostrophic mush to HTML!
1694  $output = '';
1695  $buffer = '';
1696  $state = '';
1697  $i = 0;
1698  foreach ( $arr as $r ) {
1699  if ( ( $i % 2 ) == 0 ) {
1700  if ( $state === 'both' ) {
1701  $buffer .= $r;
1702  } else {
1703  $output .= $r;
1704  }
1705  } else {
1706  $thislen = strlen( $r );
1707  if ( $thislen == 2 ) {
1708  if ( $state === 'i' ) {
1709  $output .= '</i>';
1710  $state = '';
1711  } elseif ( $state === 'bi' ) {
1712  $output .= '</i>';
1713  $state = 'b';
1714  } elseif ( $state === 'ib' ) {
1715  $output .= '</b></i><b>';
1716  $state = 'b';
1717  } elseif ( $state === 'both' ) {
1718  $output .= '<b><i>' . $buffer . '</i>';
1719  $state = 'b';
1720  } else { // $state can be 'b' or ''
1721  $output .= '<i>';
1722  $state .= 'i';
1723  }
1724  } elseif ( $thislen == 3 ) {
1725  if ( $state === 'b' ) {
1726  $output .= '</b>';
1727  $state = '';
1728  } elseif ( $state === 'bi' ) {
1729  $output .= '</i></b><i>';
1730  $state = 'i';
1731  } elseif ( $state === 'ib' ) {
1732  $output .= '</b>';
1733  $state = 'i';
1734  } elseif ( $state === 'both' ) {
1735  $output .= '<i><b>' . $buffer . '</b>';
1736  $state = 'i';
1737  } else { // $state can be 'i' or ''
1738  $output .= '<b>';
1739  $state .= 'b';
1740  }
1741  } elseif ( $thislen == 5 ) {
1742  if ( $state === 'b' ) {
1743  $output .= '</b><i>';
1744  $state = 'i';
1745  } elseif ( $state === 'i' ) {
1746  $output .= '</i><b>';
1747  $state = 'b';
1748  } elseif ( $state === 'bi' ) {
1749  $output .= '</i></b>';
1750  $state = '';
1751  } elseif ( $state === 'ib' ) {
1752  $output .= '</b></i>';
1753  $state = '';
1754  } elseif ( $state === 'both' ) {
1755  $output .= '<i><b>' . $buffer . '</b></i>';
1756  $state = '';
1757  } else { // ($state == '')
1758  $buffer = '';
1759  $state = 'both';
1760  }
1761  }
1762  }
1763  $i++;
1764  }
1765  // Now close all remaining tags. Notice that the order is important.
1766  if ( $state === 'b' || $state === 'ib' ) {
1767  $output .= '</b>';
1768  }
1769  if ( $state === 'i' || $state === 'bi' || $state === 'ib' ) {
1770  $output .= '</i>';
1771  }
1772  if ( $state === 'bi' ) {
1773  $output .= '</b>';
1774  }
1775  // There might be lonely ''''', so make sure we have a buffer
1776  if ( $state === 'both' && $buffer ) {
1777  $output .= '<b><i>' . $buffer . '</i></b>';
1778  }
1779  return $output;
1780  }
1781 
1795  public function replaceExternalLinks( $text ) {
1796 
1797  $bits = preg_split( $this->mExtLinkBracketedRegex, $text, -1, PREG_SPLIT_DELIM_CAPTURE );
1798  if ( $bits === false ) {
1799  throw new MWException( "PCRE needs to be compiled with "
1800  . "--enable-unicode-properties in order for MediaWiki to function" );
1801  }
1802  $s = array_shift( $bits );
1803 
1804  $i = 0;
1805  while ( $i < count( $bits ) ) {
1806  $url = $bits[$i++];
1807  $i++; // protocol
1808  $text = $bits[$i++];
1809  $trail = $bits[$i++];
1810 
1811  # The characters '<' and '>' (which were escaped by
1812  # removeHTMLtags()) should not be included in
1813  # URLs, per RFC 2396.
1814  $m2 = [];
1815  if ( preg_match( '/&(lt|gt);/', $url, $m2, PREG_OFFSET_CAPTURE ) ) {
1816  $text = substr( $url, $m2[0][1] ) . ' ' . $text;
1817  $url = substr( $url, 0, $m2[0][1] );
1818  }
1819 
1820  # If the link text is an image URL, replace it with an <img> tag
1821  # This happened by accident in the original parser, but some people used it extensively
1822  $img = $this->maybeMakeExternalImage( $text );
1823  if ( $img !== false ) {
1824  $text = $img;
1825  }
1826 
1827  $dtrail = '';
1828 
1829  # Set linktype for CSS - if URL==text, link is essentially free
1830  $linktype = ( $text === $url ) ? 'free' : 'text';
1831 
1832  # No link text, e.g. [http://domain.tld/some.link]
1833  if ( $text == '' ) {
1834  # Autonumber
1835  $langObj = $this->getTargetLanguage();
1836  $text = '[' . $langObj->formatNum( ++$this->mAutonumber ) . ']';
1837  $linktype = 'autonumber';
1838  } else {
1839  # Have link text, e.g. [http://domain.tld/some.link text]s
1840  # Check for trail
1841  list( $dtrail, $trail ) = Linker::splitTrail( $trail );
1842  }
1843 
1844  $text = $this->getConverterLanguage()->markNoConversion( $text );
1845 
1846  $url = Sanitizer::cleanUrl( $url );
1847 
1848  # Use the encoded URL
1849  # This means that users can paste URLs directly into the text
1850  # Funny characters like ö aren't valid in URLs anyway
1851  # This was changed in August 2004
1852  $s .= Linker::makeExternalLink( $url, $text, false, $linktype,
1853  $this->getExternalLinkAttribs( $url ), $this->mTitle ) . $dtrail . $trail;
1854 
1855  # Register link in the output object.
1856  # Replace unnecessary URL escape codes with the referenced character
1857  # This prevents spammers from hiding links from the filters
1858  $pasteurized = self::normalizeLinkUrl( $url );
1859  $this->mOutput->addExternalLink( $pasteurized );
1860  }
1861 
1862  return $s;
1863  }
1864 
1874  public static function getExternalLinkRel( $url = false, $title = null ) {
1876  $ns = $title ? $title->getNamespace() : false;
1877  if ( $wgNoFollowLinks && !in_array( $ns, $wgNoFollowNsExceptions )
1878  && !wfMatchesDomainList( $url, $wgNoFollowDomainExceptions )
1879  ) {
1880  return 'nofollow';
1881  }
1882  return null;
1883  }
1884 
1895  public function getExternalLinkAttribs( $url ) {
1896  $attribs = [];
1897  $rel = self::getExternalLinkRel( $url, $this->mTitle );
1898 
1899  $target = $this->mOptions->getExternalLinkTarget();
1900  if ( $target ) {
1901  $attribs['target'] = $target;
1902  if ( !in_array( $target, [ '_self', '_parent', '_top' ] ) ) {
1903  // T133507. New windows can navigate parent cross-origin.
1904  // Including noreferrer due to lacking browser
1905  // support of noopener. Eventually noreferrer should be removed.
1906  if ( $rel !== '' ) {
1907  $rel .= ' ';
1908  }
1909  $rel .= 'noreferrer noopener';
1910  }
1911  }
1912  $attribs['rel'] = $rel;
1913  return $attribs;
1914  }
1915 
1923  public static function replaceUnusualEscapes( $url ) {
1924  wfDeprecated( __METHOD__, '1.24' );
1925  return self::normalizeLinkUrl( $url );
1926  }
1927 
1937  public static function normalizeLinkUrl( $url ) {
1938  # First, make sure unsafe characters are encoded
1939  $url = preg_replace_callback( '/[\x00-\x20"<>\[\\\\\]^`{|}\x7F-\xFF]/',
1940  function ( $m ) {
1941  return rawurlencode( $m[0] );
1942  },
1943  $url
1944  );
1945 
1946  $ret = '';
1947  $end = strlen( $url );
1948 
1949  # Fragment part - 'fragment'
1950  $start = strpos( $url, '#' );
1951  if ( $start !== false && $start < $end ) {
1952  $ret = self::normalizeUrlComponent(
1953  substr( $url, $start, $end - $start ), '"#%<>[\]^`{|}' ) . $ret;
1954  $end = $start;
1955  }
1956 
1957  # Query part - 'query' minus &=+;
1958  $start = strpos( $url, '?' );
1959  if ( $start !== false && $start < $end ) {
1960  $ret = self::normalizeUrlComponent(
1961  substr( $url, $start, $end - $start ), '"#%<>[\]^`{|}&=+;' ) . $ret;
1962  $end = $start;
1963  }
1964 
1965  # Scheme and path part - 'pchar'
1966  # (we assume no userinfo or encoded colons in the host)
1967  $ret = self::normalizeUrlComponent(
1968  substr( $url, 0, $end ), '"#%<>[\]^`{|}/?' ) . $ret;
1969 
1970  return $ret;
1971  }
1972 
1973  private static function normalizeUrlComponent( $component, $unsafe ) {
1974  $callback = function ( $matches ) use ( $unsafe ) {
1975  $char = urldecode( $matches[0] );
1976  $ord = ord( $char );
1977  if ( $ord > 32 && $ord < 127 && strpos( $unsafe, $char ) === false ) {
1978  # Unescape it
1979  return $char;
1980  } else {
1981  # Leave it escaped, but use uppercase for a-f
1982  return strtoupper( $matches[0] );
1983  }
1984  };
1985  return preg_replace_callback( '/%[0-9A-Fa-f]{2}/', $callback, $component );
1986  }
1987 
1996  private function maybeMakeExternalImage( $url ) {
1997  $imagesfrom = $this->mOptions->getAllowExternalImagesFrom();
1998  $imagesexception = !empty( $imagesfrom );
1999  $text = false;
2000  # $imagesfrom could be either a single string or an array of strings, parse out the latter
2001  if ( $imagesexception && is_array( $imagesfrom ) ) {
2002  $imagematch = false;
2003  foreach ( $imagesfrom as $match ) {
2004  if ( strpos( $url, $match ) === 0 ) {
2005  $imagematch = true;
2006  break;
2007  }
2008  }
2009  } elseif ( $imagesexception ) {
2010  $imagematch = ( strpos( $url, $imagesfrom ) === 0 );
2011  } else {
2012  $imagematch = false;
2013  }
2014 
2015  if ( $this->mOptions->getAllowExternalImages()
2016  || ( $imagesexception && $imagematch )
2017  ) {
2018  if ( preg_match( self::EXT_IMAGE_REGEX, $url ) ) {
2019  # Image found
2020  $text = Linker::makeExternalImage( $url );
2021  }
2022  }
2023  if ( !$text && $this->mOptions->getEnableImageWhitelist()
2024  && preg_match( self::EXT_IMAGE_REGEX, $url )
2025  ) {
2026  $whitelist = explode(
2027  "\n",
2028  wfMessage( 'external_image_whitelist' )->inContentLanguage()->text()
2029  );
2030 
2031  foreach ( $whitelist as $entry ) {
2032  # Sanitize the regex fragment, make it case-insensitive, ignore blank entries/comments
2033  if ( strpos( $entry, '#' ) === 0 || $entry === '' ) {
2034  continue;
2035  }
2036  if ( preg_match( '/' . str_replace( '/', '\\/', $entry ) . '/i', $url ) ) {
2037  # Image matches a whitelist entry
2038  $text = Linker::makeExternalImage( $url );
2039  break;
2040  }
2041  }
2042  }
2043  return $text;
2044  }
2045 
2055  public function replaceInternalLinks( $s ) {
2056  $this->mLinkHolders->merge( $this->replaceInternalLinks2( $s ) );
2057  return $s;
2058  }
2059 
2068  public function replaceInternalLinks2( &$s ) {
2070 
2071  static $tc = false, $e1, $e1_img;
2072  # the % is needed to support urlencoded titles as well
2073  if ( !$tc ) {
2074  $tc = Title::legalChars() . '#%';
2075  # Match a link having the form [[namespace:link|alternate]]trail
2076  $e1 = "/^([{$tc}]+)(?:\\|(.+?))?]](.*)\$/sD";
2077  # Match cases where there is no "]]", which might still be images
2078  $e1_img = "/^([{$tc}]+)\\|(.*)\$/sD";
2079  }
2080 
2081  $holders = new LinkHolderArray( $this );
2082 
2083  # split the entire text string on occurrences of [[
2084  $a = StringUtils::explode( '[[', ' ' . $s );
2085  # get the first element (all text up to first [[), and remove the space we added
2086  $s = $a->current();
2087  $a->next();
2088  $line = $a->current(); # Workaround for broken ArrayIterator::next() that returns "void"
2089  $s = substr( $s, 1 );
2090 
2091  $useLinkPrefixExtension = $this->getTargetLanguage()->linkPrefixExtension();
2092  $e2 = null;
2093  if ( $useLinkPrefixExtension ) {
2094  # Match the end of a line for a word that's not followed by whitespace,
2095  # e.g. in the case of 'The Arab al[[Razi]]', 'al' will be matched
2097  $charset = $wgContLang->linkPrefixCharset();
2098  $e2 = "/^((?>.*[^$charset]|))(.+)$/sDu";
2099  }
2100 
2101  if ( is_null( $this->mTitle ) ) {
2102  throw new MWException( __METHOD__ . ": \$this->mTitle is null\n" );
2103  }
2104  $nottalk = !$this->mTitle->isTalkPage();
2105 
2106  if ( $useLinkPrefixExtension ) {
2107  $m = [];
2108  if ( preg_match( $e2, $s, $m ) ) {
2109  $first_prefix = $m[2];
2110  } else {
2111  $first_prefix = false;
2112  }
2113  } else {
2114  $prefix = '';
2115  }
2116 
2117  $useSubpages = $this->areSubpagesAllowed();
2118 
2119  // @codingStandardsIgnoreStart Squiz.WhiteSpace.SemicolonSpacing.Incorrect
2120  # Loop for each link
2121  for ( ; $line !== false && $line !== null; $a->next(), $line = $a->current() ) {
2122  // @codingStandardsIgnoreEnd
2123 
2124  # Check for excessive memory usage
2125  if ( $holders->isBig() ) {
2126  # Too big
2127  # Do the existence check, replace the link holders and clear the array
2128  $holders->replace( $s );
2129  $holders->clear();
2130  }
2131 
2132  if ( $useLinkPrefixExtension ) {
2133  if ( preg_match( $e2, $s, $m ) ) {
2134  $prefix = $m[2];
2135  $s = $m[1];
2136  } else {
2137  $prefix = '';
2138  }
2139  # first link
2140  if ( $first_prefix ) {
2141  $prefix = $first_prefix;
2142  $first_prefix = false;
2143  }
2144  }
2145 
2146  $might_be_img = false;
2147 
2148  if ( preg_match( $e1, $line, $m ) ) { # page with normal text or alt
2149  $text = $m[2];
2150  # If we get a ] at the beginning of $m[3] that means we have a link that's something like:
2151  # [[Image:Foo.jpg|[http://example.com desc]]] <- having three ] in a row fucks up,
2152  # the real problem is with the $e1 regex
2153  # See bug 1300.
2154  # Still some problems for cases where the ] is meant to be outside punctuation,
2155  # and no image is in sight. See bug 2095.
2156  if ( $text !== ''
2157  && substr( $m[3], 0, 1 ) === ']'
2158  && strpos( $text, '[' ) !== false
2159  ) {
2160  $text .= ']'; # so that replaceExternalLinks($text) works later
2161  $m[3] = substr( $m[3], 1 );
2162  }
2163  # fix up urlencoded title texts
2164  if ( strpos( $m[1], '%' ) !== false ) {
2165  # Should anchors '#' also be rejected?
2166  $m[1] = str_replace( [ '<', '>' ], [ '&lt;', '&gt;' ], rawurldecode( $m[1] ) );
2167  }
2168  $trail = $m[3];
2169  } elseif ( preg_match( $e1_img, $line, $m ) ) {
2170  # Invalid, but might be an image with a link in its caption
2171  $might_be_img = true;
2172  $text = $m[2];
2173  if ( strpos( $m[1], '%' ) !== false ) {
2174  $m[1] = str_replace( [ '<', '>' ], [ '&lt;', '&gt;' ], rawurldecode( $m[1] ) );
2175  }
2176  $trail = "";
2177  } else { # Invalid form; output directly
2178  $s .= $prefix . '[[' . $line;
2179  continue;
2180  }
2181 
2182  $origLink = $m[1];
2183 
2184  # Don't allow internal links to pages containing
2185  # PROTO: where PROTO is a valid URL protocol; these
2186  # should be external links.
2187  if ( preg_match( '/^(?i:' . $this->mUrlProtocols . ')/', $origLink ) ) {
2188  $s .= $prefix . '[[' . $line;
2189  continue;
2190  }
2191 
2192  # Make subpage if necessary
2193  if ( $useSubpages ) {
2194  $link = $this->maybeDoSubpageLink( $origLink, $text );
2195  } else {
2196  $link = $origLink;
2197  }
2198 
2199  $noforce = ( substr( $origLink, 0, 1 ) !== ':' );
2200  if ( !$noforce ) {
2201  # Strip off leading ':'
2202  $link = substr( $link, 1 );
2203  }
2204 
2205  $unstrip = $this->mStripState->unstripNoWiki( $link );
2206  $nt = is_string( $unstrip ) ? Title::newFromText( $unstrip ) : null;
2207  if ( $nt === null ) {
2208  $s .= $prefix . '[[' . $line;
2209  continue;
2210  }
2211 
2212  $ns = $nt->getNamespace();
2213  $iw = $nt->getInterwiki();
2214 
2215  if ( $might_be_img ) { # if this is actually an invalid link
2216  if ( $ns == NS_FILE && $noforce ) { # but might be an image
2217  $found = false;
2218  while ( true ) {
2219  # look at the next 'line' to see if we can close it there
2220  $a->next();
2221  $next_line = $a->current();
2222  if ( $next_line === false || $next_line === null ) {
2223  break;
2224  }
2225  $m = explode( ']]', $next_line, 3 );
2226  if ( count( $m ) == 3 ) {
2227  # the first ]] closes the inner link, the second the image
2228  $found = true;
2229  $text .= "[[{$m[0]}]]{$m[1]}";
2230  $trail = $m[2];
2231  break;
2232  } elseif ( count( $m ) == 2 ) {
2233  # if there's exactly one ]] that's fine, we'll keep looking
2234  $text .= "[[{$m[0]}]]{$m[1]}";
2235  } else {
2236  # if $next_line is invalid too, we need look no further
2237  $text .= '[[' . $next_line;
2238  break;
2239  }
2240  }
2241  if ( !$found ) {
2242  # we couldn't find the end of this imageLink, so output it raw
2243  # but don't ignore what might be perfectly normal links in the text we've examined
2244  $holders->merge( $this->replaceInternalLinks2( $text ) );
2245  $s .= "{$prefix}[[$link|$text";
2246  # note: no $trail, because without an end, there *is* no trail
2247  continue;
2248  }
2249  } else { # it's not an image, so output it raw
2250  $s .= "{$prefix}[[$link|$text";
2251  # note: no $trail, because without an end, there *is* no trail
2252  continue;
2253  }
2254  }
2255 
2256  $wasblank = ( $text == '' );
2257  if ( $wasblank ) {
2258  $text = $link;
2259  } else {
2260  # Bug 4598 madness. Handle the quotes only if they come from the alternate part
2261  # [[Lista d''e paise d''o munno]] -> <a href="...">Lista d''e paise d''o munno</a>
2262  # [[Criticism of Harry Potter|Criticism of ''Harry Potter'']]
2263  # -> <a href="Criticism of Harry Potter">Criticism of <i>Harry Potter</i></a>
2264  $text = $this->doQuotes( $text );
2265  }
2266 
2267  # Link not escaped by : , create the various objects
2268  if ( $noforce && !$nt->wasLocalInterwiki() ) {
2269  # Interwikis
2270  if (
2271  $iw && $this->mOptions->getInterwikiMagic() && $nottalk && (
2272  Language::fetchLanguageName( $iw, null, 'mw' ) ||
2273  in_array( $iw, $wgExtraInterlanguageLinkPrefixes )
2274  )
2275  ) {
2276  # Bug 24502: filter duplicates
2277  if ( !isset( $this->mLangLinkLanguages[$iw] ) ) {
2278  $this->mLangLinkLanguages[$iw] = true;
2279  $this->mOutput->addLanguageLink( $nt->getFullText() );
2280  }
2281 
2282  $s = rtrim( $s . $prefix );
2283  $s .= trim( $trail, "\n" ) == '' ? '': $prefix . $trail;
2284  continue;
2285  }
2286 
2287  if ( $ns == NS_FILE ) {
2288  if ( !wfIsBadImage( $nt->getDBkey(), $this->mTitle ) ) {
2289  if ( $wasblank ) {
2290  # if no parameters were passed, $text
2291  # becomes something like "File:Foo.png",
2292  # which we don't want to pass on to the
2293  # image generator
2294  $text = '';
2295  } else {
2296  # recursively parse links inside the image caption
2297  # actually, this will parse them in any other parameters, too,
2298  # but it might be hard to fix that, and it doesn't matter ATM
2299  $text = $this->replaceExternalLinks( $text );
2300  $holders->merge( $this->replaceInternalLinks2( $text ) );
2301  }
2302  # cloak any absolute URLs inside the image markup, so replaceExternalLinks() won't touch them
2303  $s .= $prefix . $this->armorLinks(
2304  $this->makeImage( $nt, $text, $holders ) ) . $trail;
2305  continue;
2306  }
2307  } elseif ( $ns == NS_CATEGORY ) {
2308  $s = rtrim( $s . "\n" ); # bug 87
2309 
2310  if ( $wasblank ) {
2311  $sortkey = $this->getDefaultSort();
2312  } else {
2313  $sortkey = $text;
2314  }
2315  $sortkey = Sanitizer::decodeCharReferences( $sortkey );
2316  $sortkey = str_replace( "\n", '', $sortkey );
2317  $sortkey = $this->getConverterLanguage()->convertCategoryKey( $sortkey );
2318  $this->mOutput->addCategory( $nt->getDBkey(), $sortkey );
2319 
2323  $s .= trim( $prefix . $trail, "\n" ) == '' ? '' : $prefix . $trail;
2324 
2325  continue;
2326  }
2327  }
2328 
2329  # Self-link checking. For some languages, variants of the title are checked in
2330  # LinkHolderArray::doVariants() to allow batching the existence checks necessary
2331  # for linking to a different variant.
2332  if ( $ns != NS_SPECIAL && $nt->equals( $this->mTitle ) && !$nt->hasFragment() ) {
2333  $s .= $prefix . Linker::makeSelfLinkObj( $nt, $text, '', $trail );
2334  continue;
2335  }
2336 
2337  # NS_MEDIA is a pseudo-namespace for linking directly to a file
2338  # @todo FIXME: Should do batch file existence checks, see comment below
2339  if ( $ns == NS_MEDIA ) {
2340  # Give extensions a chance to select the file revision for us
2341  $options = [];
2342  $descQuery = false;
2343  Hooks::run( 'BeforeParserFetchFileAndTitle',
2344  [ $this, $nt, &$options, &$descQuery ] );
2345  # Fetch and register the file (file title may be different via hooks)
2346  list( $file, $nt ) = $this->fetchFileAndTitle( $nt, $options );
2347  # Cloak with NOPARSE to avoid replacement in replaceExternalLinks
2348  $s .= $prefix . $this->armorLinks(
2349  Linker::makeMediaLinkFile( $nt, $file, $text ) ) . $trail;
2350  continue;
2351  }
2352 
2353  # Some titles, such as valid special pages or files in foreign repos, should
2354  # be shown as bluelinks even though they're not included in the page table
2355  # @todo FIXME: isAlwaysKnown() can be expensive for file links; we should really do
2356  # batch file existence checks for NS_FILE and NS_MEDIA
2357  if ( $iw == '' && $nt->isAlwaysKnown() ) {
2358  $this->mOutput->addLink( $nt );
2359  $s .= $this->makeKnownLinkHolder( $nt, $text, $trail, $prefix );
2360  } else {
2361  # Links will be added to the output link list after checking
2362  $s .= $holders->makeHolder( $nt, $text, [], $trail, $prefix );
2363  }
2364  }
2365  return $holders;
2366  }
2367 
2381  protected function makeKnownLinkHolder( $nt, $text = '', $trail = '', $prefix = '' ) {
2382  list( $inside, $trail ) = Linker::splitTrail( $trail );
2383 
2384  if ( $text == '' ) {
2385  $text = htmlspecialchars( $nt->getPrefixedText() );
2386  }
2387 
2388  $link = $this->getLinkRenderer()->makeKnownLink(
2389  $nt, new HtmlArmor( "$prefix$text$inside" )
2390  );
2391 
2392  return $this->armorLinks( $link ) . $trail;
2393  }
2394 
2405  public function armorLinks( $text ) {
2406  return preg_replace( '/\b((?i)' . $this->mUrlProtocols . ')/',
2407  self::MARKER_PREFIX . "NOPARSE$1", $text );
2408  }
2409 
2414  public function areSubpagesAllowed() {
2415  # Some namespaces don't allow subpages
2416  return MWNamespace::hasSubpages( $this->mTitle->getNamespace() );
2417  }
2418 
2427  public function maybeDoSubpageLink( $target, &$text ) {
2428  return Linker::normalizeSubpageLink( $this->mTitle, $target, $text );
2429  }
2430 
2439  public function doBlockLevels( $text, $linestart ) {
2440  return BlockLevelPass::doBlockLevels( $text, $linestart );
2441  }
2442 
2454  public function getVariableValue( $index, $frame = false ) {
2457 
2458  if ( is_null( $this->mTitle ) ) {
2459  // If no title set, bad things are going to happen
2460  // later. Title should always be set since this
2461  // should only be called in the middle of a parse
2462  // operation (but the unit-tests do funky stuff)
2463  throw new MWException( __METHOD__ . ' Should only be '
2464  . ' called while parsing (no title set)' );
2465  }
2466 
2471  if ( Hooks::run( 'ParserGetVariableValueVarCache', [ &$this, &$this->mVarCache ] ) ) {
2472  if ( isset( $this->mVarCache[$index] ) ) {
2473  return $this->mVarCache[$index];
2474  }
2475  }
2476 
2477  $ts = wfTimestamp( TS_UNIX, $this->mOptions->getTimestamp() );
2478  Hooks::run( 'ParserGetVariableValueTs', [ &$this, &$ts ] );
2479 
2480  $pageLang = $this->getFunctionLang();
2481 
2482  switch ( $index ) {
2483  case '!':
2484  $value = '|';
2485  break;
2486  case 'currentmonth':
2487  $value = $pageLang->formatNum( MWTimestamp::getInstance( $ts )->format( 'm' ) );
2488  break;
2489  case 'currentmonth1':
2490  $value = $pageLang->formatNum( MWTimestamp::getInstance( $ts )->format( 'n' ) );
2491  break;
2492  case 'currentmonthname':
2493  $value = $pageLang->getMonthName( MWTimestamp::getInstance( $ts )->format( 'n' ) );
2494  break;
2495  case 'currentmonthnamegen':
2496  $value = $pageLang->getMonthNameGen( MWTimestamp::getInstance( $ts )->format( 'n' ) );
2497  break;
2498  case 'currentmonthabbrev':
2499  $value = $pageLang->getMonthAbbreviation( MWTimestamp::getInstance( $ts )->format( 'n' ) );
2500  break;
2501  case 'currentday':
2502  $value = $pageLang->formatNum( MWTimestamp::getInstance( $ts )->format( 'j' ) );
2503  break;
2504  case 'currentday2':
2505  $value = $pageLang->formatNum( MWTimestamp::getInstance( $ts )->format( 'd' ) );
2506  break;
2507  case 'localmonth':
2508  $value = $pageLang->formatNum( MWTimestamp::getLocalInstance( $ts )->format( 'm' ) );
2509  break;
2510  case 'localmonth1':
2511  $value = $pageLang->formatNum( MWTimestamp::getLocalInstance( $ts )->format( 'n' ) );
2512  break;
2513  case 'localmonthname':
2514  $value = $pageLang->getMonthName( MWTimestamp::getLocalInstance( $ts )->format( 'n' ) );
2515  break;
2516  case 'localmonthnamegen':
2517  $value = $pageLang->getMonthNameGen( MWTimestamp::getLocalInstance( $ts )->format( 'n' ) );
2518  break;
2519  case 'localmonthabbrev':
2520  $value = $pageLang->getMonthAbbreviation( MWTimestamp::getLocalInstance( $ts )->format( 'n' ) );
2521  break;
2522  case 'localday':
2523  $value = $pageLang->formatNum( MWTimestamp::getLocalInstance( $ts )->format( 'j' ) );
2524  break;
2525  case 'localday2':
2526  $value = $pageLang->formatNum( MWTimestamp::getLocalInstance( $ts )->format( 'd' ) );
2527  break;
2528  case 'pagename':
2529  $value = wfEscapeWikiText( $this->mTitle->getText() );
2530  break;
2531  case 'pagenamee':
2532  $value = wfEscapeWikiText( $this->mTitle->getPartialURL() );
2533  break;
2534  case 'fullpagename':
2535  $value = wfEscapeWikiText( $this->mTitle->getPrefixedText() );
2536  break;
2537  case 'fullpagenamee':
2538  $value = wfEscapeWikiText( $this->mTitle->getPrefixedURL() );
2539  break;
2540  case 'subpagename':
2541  $value = wfEscapeWikiText( $this->mTitle->getSubpageText() );
2542  break;
2543  case 'subpagenamee':
2544  $value = wfEscapeWikiText( $this->mTitle->getSubpageUrlForm() );
2545  break;
2546  case 'rootpagename':
2547  $value = wfEscapeWikiText( $this->mTitle->getRootText() );
2548  break;
2549  case 'rootpagenamee':
2550  $value = wfEscapeWikiText( wfUrlencode( str_replace(
2551  ' ',
2552  '_',
2553  $this->mTitle->getRootText()
2554  ) ) );
2555  break;
2556  case 'basepagename':
2557  $value = wfEscapeWikiText( $this->mTitle->getBaseText() );
2558  break;
2559  case 'basepagenamee':
2560  $value = wfEscapeWikiText( wfUrlencode( str_replace(
2561  ' ',
2562  '_',
2563  $this->mTitle->getBaseText()
2564  ) ) );
2565  break;
2566  case 'talkpagename':
2567  if ( $this->mTitle->canTalk() ) {
2568  $talkPage = $this->mTitle->getTalkPage();
2569  $value = wfEscapeWikiText( $talkPage->getPrefixedText() );
2570  } else {
2571  $value = '';
2572  }
2573  break;
2574  case 'talkpagenamee':
2575  if ( $this->mTitle->canTalk() ) {
2576  $talkPage = $this->mTitle->getTalkPage();
2577  $value = wfEscapeWikiText( $talkPage->getPrefixedURL() );
2578  } else {
2579  $value = '';
2580  }
2581  break;
2582  case 'subjectpagename':
2583  $subjPage = $this->mTitle->getSubjectPage();
2584  $value = wfEscapeWikiText( $subjPage->getPrefixedText() );
2585  break;
2586  case 'subjectpagenamee':
2587  $subjPage = $this->mTitle->getSubjectPage();
2588  $value = wfEscapeWikiText( $subjPage->getPrefixedURL() );
2589  break;
2590  case 'pageid': // requested in bug 23427
2591  $pageid = $this->getTitle()->getArticleID();
2592  if ( $pageid == 0 ) {
2593  # 0 means the page doesn't exist in the database,
2594  # which means the user is previewing a new page.
2595  # The vary-revision flag must be set, because the magic word
2596  # will have a different value once the page is saved.
2597  $this->mOutput->setFlag( 'vary-revision' );
2598  wfDebug( __METHOD__ . ": {{PAGEID}} used in a new page, setting vary-revision...\n" );
2599  }
2600  $value = $pageid ? $pageid : null;
2601  break;
2602  case 'revisionid':
2603  # Let the edit saving system know we should parse the page
2604  # *after* a revision ID has been assigned.
2605  $this->mOutput->setFlag( 'vary-revision-id' );
2606  wfDebug( __METHOD__ . ": {{REVISIONID}} used, setting vary-revision-id...\n" );
2607  $value = $this->mRevisionId;
2608  if ( !$value && $this->mOptions->getSpeculativeRevIdCallback() ) {
2609  $value = call_user_func( $this->mOptions->getSpeculativeRevIdCallback() );
2610  $this->mOutput->setSpeculativeRevIdUsed( $value );
2611  }
2612  break;
2613  case 'revisionday':
2614  # Let the edit saving system know we should parse the page
2615  # *after* a revision ID has been assigned. This is for null edits.
2616  $this->mOutput->setFlag( 'vary-revision' );
2617  wfDebug( __METHOD__ . ": {{REVISIONDAY}} used, setting vary-revision...\n" );
2618  $value = intval( substr( $this->getRevisionTimestamp(), 6, 2 ) );
2619  break;
2620  case 'revisionday2':
2621  # Let the edit saving system know we should parse the page
2622  # *after* a revision ID has been assigned. This is for null edits.
2623  $this->mOutput->setFlag( 'vary-revision' );
2624  wfDebug( __METHOD__ . ": {{REVISIONDAY2}} used, setting vary-revision...\n" );
2625  $value = substr( $this->getRevisionTimestamp(), 6, 2 );
2626  break;
2627  case 'revisionmonth':
2628  # Let the edit saving system know we should parse the page
2629  # *after* a revision ID has been assigned. This is for null edits.
2630  $this->mOutput->setFlag( 'vary-revision' );
2631  wfDebug( __METHOD__ . ": {{REVISIONMONTH}} used, setting vary-revision...\n" );
2632  $value = substr( $this->getRevisionTimestamp(), 4, 2 );
2633  break;
2634  case 'revisionmonth1':
2635  # Let the edit saving system know we should parse the page
2636  # *after* a revision ID has been assigned. This is for null edits.
2637  $this->mOutput->setFlag( 'vary-revision' );
2638  wfDebug( __METHOD__ . ": {{REVISIONMONTH1}} used, setting vary-revision...\n" );
2639  $value = intval( substr( $this->getRevisionTimestamp(), 4, 2 ) );
2640  break;
2641  case 'revisionyear':
2642  # Let the edit saving system know we should parse the page
2643  # *after* a revision ID has been assigned. This is for null edits.
2644  $this->mOutput->setFlag( 'vary-revision' );
2645  wfDebug( __METHOD__ . ": {{REVISIONYEAR}} used, setting vary-revision...\n" );
2646  $value = substr( $this->getRevisionTimestamp(), 0, 4 );
2647  break;
2648  case 'revisiontimestamp':
2649  # Let the edit saving system know we should parse the page
2650  # *after* a revision ID has been assigned. This is for null edits.
2651  $this->mOutput->setFlag( 'vary-revision' );
2652  wfDebug( __METHOD__ . ": {{REVISIONTIMESTAMP}} used, setting vary-revision...\n" );
2653  $value = $this->getRevisionTimestamp();
2654  break;
2655  case 'revisionuser':
2656  # Let the edit saving system know we should parse the page
2657  # *after* a revision ID has been assigned for null edits.
2658  $this->mOutput->setFlag( 'vary-user' );
2659  wfDebug( __METHOD__ . ": {{REVISIONUSER}} used, setting vary-user...\n" );
2660  $value = $this->getRevisionUser();
2661  break;
2662  case 'revisionsize':
2663  $value = $this->getRevisionSize();
2664  break;
2665  case 'namespace':
2666  $value = str_replace( '_', ' ', $wgContLang->getNsText( $this->mTitle->getNamespace() ) );
2667  break;
2668  case 'namespacee':
2669  $value = wfUrlencode( $wgContLang->getNsText( $this->mTitle->getNamespace() ) );
2670  break;
2671  case 'namespacenumber':
2672  $value = $this->mTitle->getNamespace();
2673  break;
2674  case 'talkspace':
2675  $value = $this->mTitle->canTalk()
2676  ? str_replace( '_', ' ', $this->mTitle->getTalkNsText() )
2677  : '';
2678  break;
2679  case 'talkspacee':
2680  $value = $this->mTitle->canTalk() ? wfUrlencode( $this->mTitle->getTalkNsText() ) : '';
2681  break;
2682  case 'subjectspace':
2683  $value = str_replace( '_', ' ', $this->mTitle->getSubjectNsText() );
2684  break;
2685  case 'subjectspacee':
2686  $value = ( wfUrlencode( $this->mTitle->getSubjectNsText() ) );
2687  break;
2688  case 'currentdayname':
2689  $value = $pageLang->getWeekdayName( (int)MWTimestamp::getInstance( $ts )->format( 'w' ) + 1 );
2690  break;
2691  case 'currentyear':
2692  $value = $pageLang->formatNum( MWTimestamp::getInstance( $ts )->format( 'Y' ), true );
2693  break;
2694  case 'currenttime':
2695  $value = $pageLang->time( wfTimestamp( TS_MW, $ts ), false, false );
2696  break;
2697  case 'currenthour':
2698  $value = $pageLang->formatNum( MWTimestamp::getInstance( $ts )->format( 'H' ), true );
2699  break;
2700  case 'currentweek':
2701  # @bug 4594 PHP5 has it zero padded, PHP4 does not, cast to
2702  # int to remove the padding
2703  $value = $pageLang->formatNum( (int)MWTimestamp::getInstance( $ts )->format( 'W' ) );
2704  break;
2705  case 'currentdow':
2706  $value = $pageLang->formatNum( MWTimestamp::getInstance( $ts )->format( 'w' ) );
2707  break;
2708  case 'localdayname':
2709  $value = $pageLang->getWeekdayName(
2710  (int)MWTimestamp::getLocalInstance( $ts )->format( 'w' ) + 1
2711  );
2712  break;
2713  case 'localyear':
2714  $value = $pageLang->formatNum( MWTimestamp::getLocalInstance( $ts )->format( 'Y' ), true );
2715  break;
2716  case 'localtime':
2717  $value = $pageLang->time(
2718  MWTimestamp::getLocalInstance( $ts )->format( 'YmdHis' ),
2719  false,
2720  false
2721  );
2722  break;
2723  case 'localhour':
2724  $value = $pageLang->formatNum( MWTimestamp::getLocalInstance( $ts )->format( 'H' ), true );
2725  break;
2726  case 'localweek':
2727  # @bug 4594 PHP5 has it zero padded, PHP4 does not, cast to
2728  # int to remove the padding
2729  $value = $pageLang->formatNum( (int)MWTimestamp::getLocalInstance( $ts )->format( 'W' ) );
2730  break;
2731  case 'localdow':
2732  $value = $pageLang->formatNum( MWTimestamp::getLocalInstance( $ts )->format( 'w' ) );
2733  break;
2734  case 'numberofarticles':
2735  $value = $pageLang->formatNum( SiteStats::articles() );
2736  break;
2737  case 'numberoffiles':
2738  $value = $pageLang->formatNum( SiteStats::images() );
2739  break;
2740  case 'numberofusers':
2741  $value = $pageLang->formatNum( SiteStats::users() );
2742  break;
2743  case 'numberofactiveusers':
2744  $value = $pageLang->formatNum( SiteStats::activeUsers() );
2745  break;
2746  case 'numberofpages':
2747  $value = $pageLang->formatNum( SiteStats::pages() );
2748  break;
2749  case 'numberofadmins':
2750  $value = $pageLang->formatNum( SiteStats::numberingroup( 'sysop' ) );
2751  break;
2752  case 'numberofedits':
2753  $value = $pageLang->formatNum( SiteStats::edits() );
2754  break;
2755  case 'currenttimestamp':
2756  $value = wfTimestamp( TS_MW, $ts );
2757  break;
2758  case 'localtimestamp':
2759  $value = MWTimestamp::getLocalInstance( $ts )->format( 'YmdHis' );
2760  break;
2761  case 'currentversion':
2763  break;
2764  case 'articlepath':
2765  return $wgArticlePath;
2766  case 'sitename':
2767  return $wgSitename;
2768  case 'server':
2769  return $wgServer;
2770  case 'servername':
2771  return $wgServerName;
2772  case 'scriptpath':
2773  return $wgScriptPath;
2774  case 'stylepath':
2775  return $wgStylePath;
2776  case 'directionmark':
2777  return $pageLang->getDirMark();
2778  case 'contentlanguage':
2780  return $wgLanguageCode;
2781  case 'cascadingsources':
2783  break;
2784  default:
2785  $ret = null;
2786  Hooks::run(
2787  'ParserGetVariableValueSwitch',
2788  [ &$this, &$this->mVarCache, &$index, &$ret, &$frame ]
2789  );
2790 
2791  return $ret;
2792  }
2793 
2794  if ( $index ) {
2795  $this->mVarCache[$index] = $value;
2796  }
2797 
2798  return $value;
2799  }
2800 
2806  public function initialiseVariables() {
2807  $variableIDs = MagicWord::getVariableIDs();
2808  $substIDs = MagicWord::getSubstIDs();
2809 
2810  $this->mVariables = new MagicWordArray( $variableIDs );
2811  $this->mSubstWords = new MagicWordArray( $substIDs );
2812  }
2813 
2836  public function preprocessToDom( $text, $flags = 0 ) {
2837  $dom = $this->getPreprocessor()->preprocessToObj( $text, $flags );
2838  return $dom;
2839  }
2840 
2848  public static function splitWhitespace( $s ) {
2849  $ltrimmed = ltrim( $s );
2850  $w1 = substr( $s, 0, strlen( $s ) - strlen( $ltrimmed ) );
2851  $trimmed = rtrim( $ltrimmed );
2852  $diff = strlen( $ltrimmed ) - strlen( $trimmed );
2853  if ( $diff > 0 ) {
2854  $w2 = substr( $ltrimmed, -$diff );
2855  } else {
2856  $w2 = '';
2857  }
2858  return [ $w1, $trimmed, $w2 ];
2859  }
2860 
2881  public function replaceVariables( $text, $frame = false, $argsOnly = false ) {
2882  # Is there any text? Also, Prevent too big inclusions!
2883  $textSize = strlen( $text );
2884  if ( $textSize < 1 || $textSize > $this->mOptions->getMaxIncludeSize() ) {
2885  return $text;
2886  }
2887 
2888  if ( $frame === false ) {
2889  $frame = $this->getPreprocessor()->newFrame();
2890  } elseif ( !( $frame instanceof PPFrame ) ) {
2891  wfDebug( __METHOD__ . " called using plain parameters instead of "
2892  . "a PPFrame instance. Creating custom frame.\n" );
2893  $frame = $this->getPreprocessor()->newCustomFrame( $frame );
2894  }
2895 
2896  $dom = $this->preprocessToDom( $text );
2897  $flags = $argsOnly ? PPFrame::NO_TEMPLATES : 0;
2898  $text = $frame->expand( $dom, $flags );
2899 
2900  return $text;
2901  }
2902 
2910  public static function createAssocArgs( $args ) {
2911  $assocArgs = [];
2912  $index = 1;
2913  foreach ( $args as $arg ) {
2914  $eqpos = strpos( $arg, '=' );
2915  if ( $eqpos === false ) {
2916  $assocArgs[$index++] = $arg;
2917  } else {
2918  $name = trim( substr( $arg, 0, $eqpos ) );
2919  $value = trim( substr( $arg, $eqpos + 1 ) );
2920  if ( $value === false ) {
2921  $value = '';
2922  }
2923  if ( $name !== false ) {
2924  $assocArgs[$name] = $value;
2925  }
2926  }
2927  }
2928 
2929  return $assocArgs;
2930  }
2931 
2958  public function limitationWarn( $limitationType, $current = '', $max = '' ) {
2959  # does no harm if $current and $max are present but are unnecessary for the message
2960  # Not doing ->inLanguage( $this->mOptions->getUserLangObj() ), since this is shown
2961  # only during preview, and that would split the parser cache unnecessarily.
2962  $warning = wfMessage( "$limitationType-warning" )->numParams( $current, $max )
2963  ->text();
2964  $this->mOutput->addWarning( $warning );
2965  $this->addTrackingCategory( "$limitationType-category" );
2966  }
2967 
2980  public function braceSubstitution( $piece, $frame ) {
2981 
2982  // Flags
2983 
2984  // $text has been filled
2985  $found = false;
2986  // wiki markup in $text should be escaped
2987  $nowiki = false;
2988  // $text is HTML, armour it against wikitext transformation
2989  $isHTML = false;
2990  // Force interwiki transclusion to be done in raw mode not rendered
2991  $forceRawInterwiki = false;
2992  // $text is a DOM node needing expansion in a child frame
2993  $isChildObj = false;
2994  // $text is a DOM node needing expansion in the current frame
2995  $isLocalObj = false;
2996 
2997  # Title object, where $text came from
2998  $title = false;
2999 
3000  # $part1 is the bit before the first |, and must contain only title characters.
3001  # Various prefixes will be stripped from it later.
3002  $titleWithSpaces = $frame->expand( $piece['title'] );
3003  $part1 = trim( $titleWithSpaces );
3004  $titleText = false;
3005 
3006  # Original title text preserved for various purposes
3007  $originalTitle = $part1;
3008 
3009  # $args is a list of argument nodes, starting from index 0, not including $part1
3010  # @todo FIXME: If piece['parts'] is null then the call to getLength()
3011  # below won't work b/c this $args isn't an object
3012  $args = ( null == $piece['parts'] ) ? [] : $piece['parts'];
3013 
3014  $profileSection = null; // profile templates
3015 
3016  # SUBST
3017  if ( !$found ) {
3018  $substMatch = $this->mSubstWords->matchStartAndRemove( $part1 );
3019 
3020  # Possibilities for substMatch: "subst", "safesubst" or FALSE
3021  # Decide whether to expand template or keep wikitext as-is.
3022  if ( $this->ot['wiki'] ) {
3023  if ( $substMatch === false ) {
3024  $literal = true; # literal when in PST with no prefix
3025  } else {
3026  $literal = false; # expand when in PST with subst: or safesubst:
3027  }
3028  } else {
3029  if ( $substMatch == 'subst' ) {
3030  $literal = true; # literal when not in PST with plain subst:
3031  } else {
3032  $literal = false; # expand when not in PST with safesubst: or no prefix
3033  }
3034  }
3035  if ( $literal ) {
3036  $text = $frame->virtualBracketedImplode( '{{', '|', '}}', $titleWithSpaces, $args );
3037  $isLocalObj = true;
3038  $found = true;
3039  }
3040  }
3041 
3042  # Variables
3043  if ( !$found && $args->getLength() == 0 ) {
3044  $id = $this->mVariables->matchStartToEnd( $part1 );
3045  if ( $id !== false ) {
3046  $text = $this->getVariableValue( $id, $frame );
3047  if ( MagicWord::getCacheTTL( $id ) > -1 ) {
3048  $this->mOutput->updateCacheExpiry( MagicWord::getCacheTTL( $id ) );
3049  }
3050  $found = true;
3051  }
3052  }
3053 
3054  # MSG, MSGNW and RAW
3055  if ( !$found ) {
3056  # Check for MSGNW:
3057  $mwMsgnw = MagicWord::get( 'msgnw' );
3058  if ( $mwMsgnw->matchStartAndRemove( $part1 ) ) {
3059  $nowiki = true;
3060  } else {
3061  # Remove obsolete MSG:
3062  $mwMsg = MagicWord::get( 'msg' );
3063  $mwMsg->matchStartAndRemove( $part1 );
3064  }
3065 
3066  # Check for RAW:
3067  $mwRaw = MagicWord::get( 'raw' );
3068  if ( $mwRaw->matchStartAndRemove( $part1 ) ) {
3069  $forceRawInterwiki = true;
3070  }
3071  }
3072 
3073  # Parser functions
3074  if ( !$found ) {
3075  $colonPos = strpos( $part1, ':' );
3076  if ( $colonPos !== false ) {
3077  $func = substr( $part1, 0, $colonPos );
3078  $funcArgs = [ trim( substr( $part1, $colonPos + 1 ) ) ];
3079  $argsLength = $args->getLength();
3080  for ( $i = 0; $i < $argsLength; $i++ ) {
3081  $funcArgs[] = $args->item( $i );
3082  }
3083  try {
3084  $result = $this->callParserFunction( $frame, $func, $funcArgs );
3085  } catch ( Exception $ex ) {
3086  throw $ex;
3087  }
3088 
3089  # The interface for parser functions allows for extracting
3090  # flags into the local scope. Extract any forwarded flags
3091  # here.
3092  extract( $result );
3093  }
3094  }
3095 
3096  # Finish mangling title and then check for loops.
3097  # Set $title to a Title object and $titleText to the PDBK
3098  if ( !$found ) {
3099  $ns = NS_TEMPLATE;
3100  # Split the title into page and subpage
3101  $subpage = '';
3102  $relative = $this->maybeDoSubpageLink( $part1, $subpage );
3103  if ( $part1 !== $relative ) {
3104  $part1 = $relative;
3105  $ns = $this->mTitle->getNamespace();
3106  }
3107  $title = Title::newFromText( $part1, $ns );
3108  if ( $title ) {
3109  $titleText = $title->getPrefixedText();
3110  # Check for language variants if the template is not found
3111  if ( $this->getConverterLanguage()->hasVariants() && $title->getArticleID() == 0 ) {
3112  $this->getConverterLanguage()->findVariantLink( $part1, $title, true );
3113  }
3114  # Do recursion depth check
3115  $limit = $this->mOptions->getMaxTemplateDepth();
3116  if ( $frame->depth >= $limit ) {
3117  $found = true;
3118  $text = '<span class="error">'
3119  . wfMessage( 'parser-template-recursion-depth-warning' )
3120  ->numParams( $limit )->inContentLanguage()->text()
3121  . '</span>';
3122  }
3123  }
3124  }
3125 
3126  # Load from database
3127  if ( !$found && $title ) {
3128  $profileSection = $this->mProfiler->scopedProfileIn( $title->getPrefixedDBkey() );
3129  if ( !$title->isExternal() ) {
3130  if ( $title->isSpecialPage()
3131  && $this->mOptions->getAllowSpecialInclusion()
3132  && $this->ot['html']
3133  ) {
3134  $specialPage = SpecialPageFactory::getPage( $title->getDBkey() );
3135  // Pass the template arguments as URL parameters.
3136  // "uselang" will have no effect since the Language object
3137  // is forced to the one defined in ParserOptions.
3138  $pageArgs = [];
3139  $argsLength = $args->getLength();
3140  for ( $i = 0; $i < $argsLength; $i++ ) {
3141  $bits = $args->item( $i )->splitArg();
3142  if ( strval( $bits['index'] ) === '' ) {
3143  $name = trim( $frame->expand( $bits['name'], PPFrame::STRIP_COMMENTS ) );
3144  $value = trim( $frame->expand( $bits['value'] ) );
3145  $pageArgs[$name] = $value;
3146  }
3147  }
3148 
3149  // Create a new context to execute the special page
3150  $context = new RequestContext;
3151  $context->setTitle( $title );
3152  $context->setRequest( new FauxRequest( $pageArgs ) );
3153  if ( $specialPage && $specialPage->maxIncludeCacheTime() === 0 ) {
3154  $context->setUser( $this->getUser() );
3155  } else {
3156  // If this page is cached, then we better not be per user.
3157  $context->setUser( User::newFromName( '127.0.0.1', false ) );
3158  }
3159  $context->setLanguage( $this->mOptions->getUserLangObj() );
3161  $title, $context, $this->getLinkRenderer() );
3162  if ( $ret ) {
3163  $text = $context->getOutput()->getHTML();
3164  $this->mOutput->addOutputPageMetadata( $context->getOutput() );
3165  $found = true;
3166  $isHTML = true;
3167  if ( $specialPage && $specialPage->maxIncludeCacheTime() !== false ) {
3168  $this->mOutput->updateRuntimeAdaptiveExpiry(
3169  $specialPage->maxIncludeCacheTime()
3170  );
3171  }
3172  }
3173  } elseif ( MWNamespace::isNonincludable( $title->getNamespace() ) ) {
3174  $found = false; # access denied
3175  wfDebug( __METHOD__ . ": template inclusion denied for " .
3176  $title->getPrefixedDBkey() . "\n" );
3177  } else {
3178  list( $text, $title ) = $this->getTemplateDom( $title );
3179  if ( $text !== false ) {
3180  $found = true;
3181  $isChildObj = true;
3182  }
3183  }
3184 
3185  # If the title is valid but undisplayable, make a link to it
3186  if ( !$found && ( $this->ot['html'] || $this->ot['pre'] ) ) {
3187  $text = "[[:$titleText]]";
3188  $found = true;
3189  }
3190  } elseif ( $title->isTrans() ) {
3191  # Interwiki transclusion
3192  if ( $this->ot['html'] && !$forceRawInterwiki ) {
3193  $text = $this->interwikiTransclude( $title, 'render' );
3194  $isHTML = true;
3195  } else {
3196  $text = $this->interwikiTransclude( $title, 'raw' );
3197  # Preprocess it like a template
3198  $text = $this->preprocessToDom( $text, self::PTD_FOR_INCLUSION );
3199  $isChildObj = true;
3200  }
3201  $found = true;
3202  }
3203 
3204  # Do infinite loop check
3205  # This has to be done after redirect resolution to avoid infinite loops via redirects
3206  if ( !$frame->loopCheck( $title ) ) {
3207  $found = true;
3208  $text = '<span class="error">'
3209  . wfMessage( 'parser-template-loop-warning', $titleText )->inContentLanguage()->text()
3210  . '</span>';
3211  wfDebug( __METHOD__ . ": template loop broken at '$titleText'\n" );
3212  }
3213  }
3214 
3215  # If we haven't found text to substitute by now, we're done
3216  # Recover the source wikitext and return it
3217  if ( !$found ) {
3218  $text = $frame->virtualBracketedImplode( '{{', '|', '}}', $titleWithSpaces, $args );
3219  if ( $profileSection ) {
3220  $this->mProfiler->scopedProfileOut( $profileSection );
3221  }
3222  return [ 'object' => $text ];
3223  }
3224 
3225  # Expand DOM-style return values in a child frame
3226  if ( $isChildObj ) {
3227  # Clean up argument array
3228  $newFrame = $frame->newChild( $args, $title );
3229 
3230  if ( $nowiki ) {
3231  $text = $newFrame->expand( $text, PPFrame::RECOVER_ORIG );
3232  } elseif ( $titleText !== false && $newFrame->isEmpty() ) {
3233  # Expansion is eligible for the empty-frame cache
3234  $text = $newFrame->cachedExpand( $titleText, $text );
3235  } else {
3236  # Uncached expansion
3237  $text = $newFrame->expand( $text );
3238  }
3239  }
3240  if ( $isLocalObj && $nowiki ) {
3241  $text = $frame->expand( $text, PPFrame::RECOVER_ORIG );
3242  $isLocalObj = false;
3243  }
3244 
3245  if ( $profileSection ) {
3246  $this->mProfiler->scopedProfileOut( $profileSection );
3247  }
3248 
3249  # Replace raw HTML by a placeholder
3250  if ( $isHTML ) {
3251  $text = $this->insertStripItem( $text );
3252  } elseif ( $nowiki && ( $this->ot['html'] || $this->ot['pre'] ) ) {
3253  # Escape nowiki-style return values
3254  $text = wfEscapeWikiText( $text );
3255  } elseif ( is_string( $text )
3256  && !$piece['lineStart']
3257  && preg_match( '/^(?:{\\||:|;|#|\*)/', $text )
3258  ) {
3259  # Bug 529: if the template begins with a table or block-level
3260  # element, it should be treated as beginning a new line.
3261  # This behavior is somewhat controversial.
3262  $text = "\n" . $text;
3263  }
3264 
3265  if ( is_string( $text ) && !$this->incrementIncludeSize( 'post-expand', strlen( $text ) ) ) {
3266  # Error, oversize inclusion
3267  if ( $titleText !== false ) {
3268  # Make a working, properly escaped link if possible (bug 23588)
3269  $text = "[[:$titleText]]";
3270  } else {
3271  # This will probably not be a working link, but at least it may
3272  # provide some hint of where the problem is
3273  preg_replace( '/^:/', '', $originalTitle );
3274  $text = "[[:$originalTitle]]";
3275  }
3276  $text .= $this->insertStripItem( '<!-- WARNING: template omitted, '
3277  . 'post-expand include size too large -->' );
3278  $this->limitationWarn( 'post-expand-template-inclusion' );
3279  }
3280 
3281  if ( $isLocalObj ) {
3282  $ret = [ 'object' => $text ];
3283  } else {
3284  $ret = [ 'text' => $text ];
3285  }
3286 
3287  return $ret;
3288  }
3289 
3309  public function callParserFunction( $frame, $function, array $args = [] ) {
3311 
3312  # Case sensitive functions
3313  if ( isset( $this->mFunctionSynonyms[1][$function] ) ) {
3314  $function = $this->mFunctionSynonyms[1][$function];
3315  } else {
3316  # Case insensitive functions
3317  $function = $wgContLang->lc( $function );
3318  if ( isset( $this->mFunctionSynonyms[0][$function] ) ) {
3319  $function = $this->mFunctionSynonyms[0][$function];
3320  } else {
3321  return [ 'found' => false ];
3322  }
3323  }
3324 
3325  list( $callback, $flags ) = $this->mFunctionHooks[$function];
3326 
3327  # Workaround for PHP bug 35229 and similar
3328  if ( !is_callable( $callback ) ) {
3329  throw new MWException( "Tag hook for $function is not callable\n" );
3330  }
3331 
3332  $allArgs = [ &$this ];
3333  if ( $flags & self::SFH_OBJECT_ARGS ) {
3334  # Convert arguments to PPNodes and collect for appending to $allArgs
3335  $funcArgs = [];
3336  foreach ( $args as $k => $v ) {
3337  if ( $v instanceof PPNode || $k === 0 ) {
3338  $funcArgs[] = $v;
3339  } else {
3340  $funcArgs[] = $this->mPreprocessor->newPartNodeArray( [ $k => $v ] )->item( 0 );
3341  }
3342  }
3343 
3344  # Add a frame parameter, and pass the arguments as an array
3345  $allArgs[] = $frame;
3346  $allArgs[] = $funcArgs;
3347  } else {
3348  # Convert arguments to plain text and append to $allArgs
3349  foreach ( $args as $k => $v ) {
3350  if ( $v instanceof PPNode ) {
3351  $allArgs[] = trim( $frame->expand( $v ) );
3352  } elseif ( is_int( $k ) && $k >= 0 ) {
3353  $allArgs[] = trim( $v );
3354  } else {
3355  $allArgs[] = trim( "$k=$v" );
3356  }
3357  }
3358  }
3359 
3360  $result = call_user_func_array( $callback, $allArgs );
3361 
3362  # The interface for function hooks allows them to return a wikitext
3363  # string or an array containing the string and any flags. This mungs
3364  # things around to match what this method should return.
3365  if ( !is_array( $result ) ) {
3366  $result =[
3367  'found' => true,
3368  'text' => $result,
3369  ];
3370  } else {
3371  if ( isset( $result[0] ) && !isset( $result['text'] ) ) {
3372  $result['text'] = $result[0];
3373  }
3374  unset( $result[0] );
3375  $result += [
3376  'found' => true,
3377  ];
3378  }
3379 
3380  $noparse = true;
3381  $preprocessFlags = 0;
3382  if ( isset( $result['noparse'] ) ) {
3383  $noparse = $result['noparse'];
3384  }
3385  if ( isset( $result['preprocessFlags'] ) ) {
3386  $preprocessFlags = $result['preprocessFlags'];
3387  }
3388 
3389  if ( !$noparse ) {
3390  $result['text'] = $this->preprocessToDom( $result['text'], $preprocessFlags );
3391  $result['isChildObj'] = true;
3392  }
3393 
3394  return $result;
3395  }
3396 
3405  public function getTemplateDom( $title ) {
3406  $cacheTitle = $title;
3407  $titleText = $title->getPrefixedDBkey();
3408 
3409  if ( isset( $this->mTplRedirCache[$titleText] ) ) {
3410  list( $ns, $dbk ) = $this->mTplRedirCache[$titleText];
3411  $title = Title::makeTitle( $ns, $dbk );
3412  $titleText = $title->getPrefixedDBkey();
3413  }
3414  if ( isset( $this->mTplDomCache[$titleText] ) ) {
3415  return [ $this->mTplDomCache[$titleText], $title ];
3416  }
3417 
3418  # Cache miss, go to the database
3419  list( $text, $title ) = $this->fetchTemplateAndTitle( $title );
3420 
3421  if ( $text === false ) {
3422  $this->mTplDomCache[$titleText] = false;
3423  return [ false, $title ];
3424  }
3425 
3426  $dom = $this->preprocessToDom( $text, self::PTD_FOR_INCLUSION );
3427  $this->mTplDomCache[$titleText] = $dom;
3428 
3429  if ( !$title->equals( $cacheTitle ) ) {
3430  $this->mTplRedirCache[$cacheTitle->getPrefixedDBkey()] =
3431  [ $title->getNamespace(), $cdb = $title->getDBkey() ];
3432  }
3433 
3434  return [ $dom, $title ];
3435  }
3436 
3449  $cacheKey = $title->getPrefixedDBkey();
3450  if ( !$this->currentRevisionCache ) {
3451  $this->currentRevisionCache = new MapCacheLRU( 100 );
3452  }
3453  if ( !$this->currentRevisionCache->has( $cacheKey ) ) {
3454  $this->currentRevisionCache->set( $cacheKey,
3455  // Defaults to Parser::statelessFetchRevision()
3456  call_user_func( $this->mOptions->getCurrentRevisionCallback(), $title, $this )
3457  );
3458  }
3459  return $this->currentRevisionCache->get( $cacheKey );
3460  }
3461 
3471  public static function statelessFetchRevision( Title $title, $parser = false ) {
3472  $pageId = $title->getArticleID();
3473  $revId = $title->getLatestRevID();
3474 
3476  if ( $rev ) {
3477  $rev->setTitle( $title );
3478  }
3479 
3480  return $rev;
3481  }
3482 
3488  public function fetchTemplateAndTitle( $title ) {
3489  // Defaults to Parser::statelessFetchTemplate()
3490  $templateCb = $this->mOptions->getTemplateCallback();
3491  $stuff = call_user_func( $templateCb, $title, $this );
3492  // We use U+007F DELETE to distinguish strip markers from regular text.
3493  $text = $stuff['text'];
3494  if ( is_string( $stuff['text'] ) ) {
3495  $text = strtr( $text, "\x7f", "?" );
3496  }
3497  $finalTitle = isset( $stuff['finalTitle'] ) ? $stuff['finalTitle'] : $title;
3498  if ( isset( $stuff['deps'] ) ) {
3499  foreach ( $stuff['deps'] as $dep ) {
3500  $this->mOutput->addTemplate( $dep['title'], $dep['page_id'], $dep['rev_id'] );
3501  if ( $dep['title']->equals( $this->getTitle() ) ) {
3502  // If we transclude ourselves, the final result
3503  // will change based on the new version of the page
3504  $this->mOutput->setFlag( 'vary-revision' );
3505  }
3506  }
3507  }
3508  return [ $text, $finalTitle ];
3509  }
3510 
3516  public function fetchTemplate( $title ) {
3517  return $this->fetchTemplateAndTitle( $title )[0];
3518  }
3519 
3529  public static function statelessFetchTemplate( $title, $parser = false ) {
3530  $text = $skip = false;
3531  $finalTitle = $title;
3532  $deps = [];
3533 
3534  # Loop to fetch the article, with up to 1 redirect
3535  // @codingStandardsIgnoreStart Generic.CodeAnalysis.ForLoopWithTestFunctionCall.NotAllowed
3536  for ( $i = 0; $i < 2 && is_object( $title ); $i++ ) {
3537  // @codingStandardsIgnoreEnd
3538  # Give extensions a chance to select the revision instead
3539  $id = false; # Assume current
3540  Hooks::run( 'BeforeParserFetchTemplateAndtitle',
3541  [ $parser, $title, &$skip, &$id ] );
3542 
3543  if ( $skip ) {
3544  $text = false;
3545  $deps[] = [
3546  'title' => $title,
3547  'page_id' => $title->getArticleID(),
3548  'rev_id' => null
3549  ];
3550  break;
3551  }
3552  # Get the revision
3553  if ( $id ) {
3554  $rev = Revision::newFromId( $id );
3555  } elseif ( $parser ) {
3556  $rev = $parser->fetchCurrentRevisionOfTitle( $title );
3557  } else {
3559  }
3560  $rev_id = $rev ? $rev->getId() : 0;
3561  # If there is no current revision, there is no page
3562  if ( $id === false && !$rev ) {
3563  $linkCache = LinkCache::singleton();
3564  $linkCache->addBadLinkObj( $title );
3565  }
3566 
3567  $deps[] = [
3568  'title' => $title,
3569  'page_id' => $title->getArticleID(),
3570  'rev_id' => $rev_id ];
3571  if ( $rev && !$title->equals( $rev->getTitle() ) ) {
3572  # We fetched a rev from a different title; register it too...
3573  $deps[] = [
3574  'title' => $rev->getTitle(),
3575  'page_id' => $rev->getPage(),
3576  'rev_id' => $rev_id ];
3577  }
3578 
3579  if ( $rev ) {
3580  $content = $rev->getContent();
3581  $text = $content ? $content->getWikitextForTransclusion() : null;
3582 
3583  if ( $text === false || $text === null ) {
3584  $text = false;
3585  break;
3586  }
3587  } elseif ( $title->getNamespace() == NS_MEDIAWIKI ) {
3589  $message = wfMessage( $wgContLang->lcfirst( $title->getText() ) )->inContentLanguage();
3590  if ( !$message->exists() ) {
3591  $text = false;
3592  break;
3593  }
3594  $content = $message->content();
3595  $text = $message->plain();
3596  } else {
3597  break;
3598  }
3599  if ( !$content ) {
3600  break;
3601  }
3602  # Redirect?
3603  $finalTitle = $title;
3604  $title = $content->getRedirectTarget();
3605  }
3606  return [
3607  'text' => $text,
3608  'finalTitle' => $finalTitle,
3609  'deps' => $deps ];
3610  }
3611 
3619  public function fetchFile( $title, $options = [] ) {
3620  return $this->fetchFileAndTitle( $title, $options )[0];
3621  }
3622 
3630  public function fetchFileAndTitle( $title, $options = [] ) {
3631  $file = $this->fetchFileNoRegister( $title, $options );
3632 
3633  $time = $file ? $file->getTimestamp() : false;
3634  $sha1 = $file ? $file->getSha1() : false;
3635  # Register the file as a dependency...
3636  $this->mOutput->addImage( $title->getDBkey(), $time, $sha1 );
3637  if ( $file && !$title->equals( $file->getTitle() ) ) {
3638  # Update fetched file title
3639  $title = $file->getTitle();
3640  $this->mOutput->addImage( $title->getDBkey(), $time, $sha1 );
3641  }
3642  return [ $file, $title ];
3643  }
3644 
3655  protected function fetchFileNoRegister( $title, $options = [] ) {
3656  if ( isset( $options['broken'] ) ) {
3657  $file = false; // broken thumbnail forced by hook
3658  } elseif ( isset( $options['sha1'] ) ) { // get by (sha1,timestamp)
3659  $file = RepoGroup::singleton()->findFileFromKey( $options['sha1'], $options );
3660  } else { // get by (name,timestamp)
3661  $file = wfFindFile( $title, $options );
3662  }
3663  return $file;
3664  }
3665 
3674  public function interwikiTransclude( $title, $action ) {
3676 
3677  if ( !$wgEnableScaryTranscluding ) {
3678  return wfMessage( 'scarytranscludedisabled' )->inContentLanguage()->text();
3679  }
3680 
3681  $url = $title->getFullURL( [ 'action' => $action ] );
3682 
3683  if ( strlen( $url ) > 255 ) {
3684  return wfMessage( 'scarytranscludetoolong' )->inContentLanguage()->text();
3685  }
3686  return $this->fetchScaryTemplateMaybeFromCache( $url );
3687  }
3688 
3693  public function fetchScaryTemplateMaybeFromCache( $url ) {
3695  $dbr = wfGetDB( DB_REPLICA );
3696  $tsCond = $dbr->timestamp( time() - $wgTranscludeCacheExpiry );
3697  $obj = $dbr->selectRow( 'transcache', [ 'tc_time', 'tc_contents' ],
3698  [ 'tc_url' => $url, "tc_time >= " . $dbr->addQuotes( $tsCond ) ] );
3699  if ( $obj ) {
3700  return $obj->tc_contents;
3701  }
3702 
3703  $req = MWHttpRequest::factory( $url, [], __METHOD__ );
3704  $status = $req->execute(); // Status object
3705  if ( $status->isOK() ) {
3706  $text = $req->getContent();
3707  } elseif ( $req->getStatus() != 200 ) {
3708  // Though we failed to fetch the content, this status is useless.
3709  return wfMessage( 'scarytranscludefailed-httpstatus' )
3710  ->params( $url, $req->getStatus() /* HTTP status */ )->inContentLanguage()->text();
3711  } else {
3712  return wfMessage( 'scarytranscludefailed', $url )->inContentLanguage()->text();
3713  }
3714 
3715  $dbw = wfGetDB( DB_MASTER );
3716  $dbw->replace( 'transcache', [ 'tc_url' ], [
3717  'tc_url' => $url,
3718  'tc_time' => $dbw->timestamp( time() ),
3719  'tc_contents' => $text
3720  ] );
3721  return $text;
3722  }
3723 
3733  public function argSubstitution( $piece, $frame ) {
3734 
3735  $error = false;
3736  $parts = $piece['parts'];
3737  $nameWithSpaces = $frame->expand( $piece['title'] );
3738  $argName = trim( $nameWithSpaces );
3739  $object = false;
3740  $text = $frame->getArgument( $argName );
3741  if ( $text === false && $parts->getLength() > 0
3742  && ( $this->ot['html']
3743  || $this->ot['pre']
3744  || ( $this->ot['wiki'] && $frame->isTemplate() )
3745  )
3746  ) {
3747  # No match in frame, use the supplied default
3748  $object = $parts->item( 0 )->getChildren();
3749  }
3750  if ( !$this->incrementIncludeSize( 'arg', strlen( $text ) ) ) {
3751  $error = '<!-- WARNING: argument omitted, expansion size too large -->';
3752  $this->limitationWarn( 'post-expand-template-argument' );
3753  }
3754 
3755  if ( $text === false && $object === false ) {
3756  # No match anywhere
3757  $object = $frame->virtualBracketedImplode( '{{{', '|', '}}}', $nameWithSpaces, $parts );
3758  }
3759  if ( $error !== false ) {
3760  $text .= $error;
3761  }
3762  if ( $object !== false ) {
3763  $ret = [ 'object' => $object ];
3764  } else {
3765  $ret = [ 'text' => $text ];
3766  }
3767 
3768  return $ret;
3769  }
3770 
3786  public function extensionSubstitution( $params, $frame ) {
3787  $name = $frame->expand( $params['name'] );
3788  $attrText = !isset( $params['attr'] ) ? null : $frame->expand( $params['attr'] );
3789  $content = !isset( $params['inner'] ) ? null : $frame->expand( $params['inner'] );
3790  $marker = self::MARKER_PREFIX . "-$name-"
3791  . sprintf( '%08X', $this->mMarkerIndex++ ) . self::MARKER_SUFFIX;
3792 
3793  $isFunctionTag = isset( $this->mFunctionTagHooks[strtolower( $name )] ) &&
3794  ( $this->ot['html'] || $this->ot['pre'] );
3795  if ( $isFunctionTag ) {
3796  $markerType = 'none';
3797  } else {
3798  $markerType = 'general';
3799  }
3800  if ( $this->ot['html'] || $isFunctionTag ) {
3801  $name = strtolower( $name );
3802  $attributes = Sanitizer::decodeTagAttributes( $attrText );
3803  if ( isset( $params['attributes'] ) ) {
3804  $attributes = $attributes + $params['attributes'];
3805  }
3806 
3807  if ( isset( $this->mTagHooks[$name] ) ) {
3808  # Workaround for PHP bug 35229 and similar
3809  if ( !is_callable( $this->mTagHooks[$name] ) ) {
3810  throw new MWException( "Tag hook for $name is not callable\n" );
3811  }
3812  $output = call_user_func_array( $this->mTagHooks[$name],
3813  [ $content, $attributes, $this, $frame ] );
3814  } elseif ( isset( $this->mFunctionTagHooks[$name] ) ) {
3815  list( $callback, ) = $this->mFunctionTagHooks[$name];
3816  if ( !is_callable( $callback ) ) {
3817  throw new MWException( "Tag hook for $name is not callable\n" );
3818  }
3819 
3820  $output = call_user_func_array( $callback, [ &$this, $frame, $content, $attributes ] );
3821  } else {
3822  $output = '<span class="error">Invalid tag extension name: ' .
3823  htmlspecialchars( $name ) . '</span>';
3824  }
3825 
3826  if ( is_array( $output ) ) {
3827  # Extract flags to local scope (to override $markerType)
3828  $flags = $output;
3829  $output = $flags[0];
3830  unset( $flags[0] );
3831  extract( $flags );
3832  }
3833  } else {
3834  if ( is_null( $attrText ) ) {
3835  $attrText = '';
3836  }
3837  if ( isset( $params['attributes'] ) ) {
3838  foreach ( $params['attributes'] as $attrName => $attrValue ) {
3839  $attrText .= ' ' . htmlspecialchars( $attrName ) . '="' .
3840  htmlspecialchars( $attrValue ) . '"';
3841  }
3842  }
3843  if ( $content === null ) {
3844  $output = "<$name$attrText/>";
3845  } else {
3846  $close = is_null( $params['close'] ) ? '' : $frame->expand( $params['close'] );
3847  $output = "<$name$attrText>$content$close";
3848  }
3849  }
3850 
3851  if ( $markerType === 'none' ) {
3852  return $output;
3853  } elseif ( $markerType === 'nowiki' ) {
3854  $this->mStripState->addNoWiki( $marker, $output );
3855  } elseif ( $markerType === 'general' ) {
3856  $this->mStripState->addGeneral( $marker, $output );
3857  } else {
3858  throw new MWException( __METHOD__ . ': invalid marker type' );
3859  }
3860  return $marker;
3861  }
3862 
3870  public function incrementIncludeSize( $type, $size ) {
3871  if ( $this->mIncludeSizes[$type] + $size > $this->mOptions->getMaxIncludeSize() ) {
3872  return false;
3873  } else {
3874  $this->mIncludeSizes[$type] += $size;
3875  return true;
3876  }
3877  }
3878 
3885  $this->mExpensiveFunctionCount++;
3886  return $this->mExpensiveFunctionCount <= $this->mOptions->getExpensiveParserFunctionLimit();
3887  }
3888 
3897  public function doDoubleUnderscore( $text ) {
3898 
3899  # The position of __TOC__ needs to be recorded
3900  $mw = MagicWord::get( 'toc' );
3901  if ( $mw->match( $text ) ) {
3902  $this->mShowToc = true;
3903  $this->mForceTocPosition = true;
3904 
3905  # Set a placeholder. At the end we'll fill it in with the TOC.
3906  $text = $mw->replace( '<!--MWTOC-->', $text, 1 );
3907 
3908  # Only keep the first one.
3909  $text = $mw->replace( '', $text );
3910  }
3911 
3912  # Now match and remove the rest of them
3914  $this->mDoubleUnderscores = $mwa->matchAndRemove( $text );
3915 
3916  if ( isset( $this->mDoubleUnderscores['nogallery'] ) ) {
3917  $this->mOutput->mNoGallery = true;
3918  }
3919  if ( isset( $this->mDoubleUnderscores['notoc'] ) && !$this->mForceTocPosition ) {
3920  $this->mShowToc = false;
3921  }
3922  if ( isset( $this->mDoubleUnderscores['hiddencat'] )
3923  && $this->mTitle->getNamespace() == NS_CATEGORY
3924  ) {
3925  $this->addTrackingCategory( 'hidden-category-category' );
3926  }
3927  # (bug 8068) Allow control over whether robots index a page.
3928  # @todo FIXME: Bug 14899: __INDEX__ always overrides __NOINDEX__ here! This
3929  # is not desirable, the last one on the page should win.
3930  if ( isset( $this->mDoubleUnderscores['noindex'] ) && $this->mTitle->canUseNoindex() ) {
3931  $this->mOutput->setIndexPolicy( 'noindex' );
3932  $this->addTrackingCategory( 'noindex-category' );
3933  }
3934  if ( isset( $this->mDoubleUnderscores['index'] ) && $this->mTitle->canUseNoindex() ) {
3935  $this->mOutput->setIndexPolicy( 'index' );
3936  $this->addTrackingCategory( 'index-category' );
3937  }
3938 
3939  # Cache all double underscores in the database
3940  foreach ( $this->mDoubleUnderscores as $key => $val ) {
3941  $this->mOutput->setProperty( $key, '' );
3942  }
3943 
3944  return $text;
3945  }
3946 
3952  public function addTrackingCategory( $msg ) {
3953  return $this->mOutput->addTrackingCategory( $msg, $this->mTitle );
3954  }
3955 
3972  public function formatHeadings( $text, $origText, $isMain = true ) {
3973  global $wgMaxTocLevel, $wgExperimentalHtmlIds;
3974 
3975  # Inhibit editsection links if requested in the page
3976  if ( isset( $this->mDoubleUnderscores['noeditsection'] ) ) {
3977  $maybeShowEditLink = $showEditLink = false;
3978  } else {
3979  $maybeShowEditLink = true; /* Actual presence will depend on ParserOptions option */
3980  $showEditLink = $this->mOptions->getEditSection();
3981  }
3982  if ( $showEditLink ) {
3983  $this->mOutput->setEditSectionTokens( true );
3984  }
3985 
3986  # Get all headlines for numbering them and adding funky stuff like [edit]
3987  # links - this is for later, but we need the number of headlines right now
3988  $matches = [];
3989  $numMatches = preg_match_all(
3990  '/<H(?P<level>[1-6])(?P<attrib>.*?>)\s*(?P<header>[\s\S]*?)\s*<\/H[1-6] *>/i',
3991  $text,
3992  $matches
3993  );
3994 
3995  # if there are fewer than 4 headlines in the article, do not show TOC
3996  # unless it's been explicitly enabled.
3997  $enoughToc = $this->mShowToc &&
3998  ( ( $numMatches >= 4 ) || $this->mForceTocPosition );
3999 
4000  # Allow user to stipulate that a page should have a "new section"
4001  # link added via __NEWSECTIONLINK__
4002  if ( isset( $this->mDoubleUnderscores['newsectionlink'] ) ) {
4003  $this->mOutput->setNewSection( true );
4004  }
4005 
4006  # Allow user to remove the "new section"
4007  # link via __NONEWSECTIONLINK__
4008  if ( isset( $this->mDoubleUnderscores['nonewsectionlink'] ) ) {
4009  $this->mOutput->hideNewSection( true );
4010  }
4011 
4012  # if the string __FORCETOC__ (not case-sensitive) occurs in the HTML,
4013  # override above conditions and always show TOC above first header
4014  if ( isset( $this->mDoubleUnderscores['forcetoc'] ) ) {
4015  $this->mShowToc = true;
4016  $enoughToc = true;
4017  }
4018 
4019  # headline counter
4020  $headlineCount = 0;
4021  $numVisible = 0;
4022 
4023  # Ugh .. the TOC should have neat indentation levels which can be
4024  # passed to the skin functions. These are determined here
4025  $toc = '';
4026  $full = '';
4027  $head = [];
4028  $sublevelCount = [];
4029  $levelCount = [];
4030  $level = 0;
4031  $prevlevel = 0;
4032  $toclevel = 0;
4033  $prevtoclevel = 0;
4034  $markerRegex = self::MARKER_PREFIX . "-h-(\d+)-" . self::MARKER_SUFFIX;
4035  $baseTitleText = $this->mTitle->getPrefixedDBkey();
4036  $oldType = $this->mOutputType;
4037  $this->setOutputType( self::OT_WIKI );
4038  $frame = $this->getPreprocessor()->newFrame();
4039  $root = $this->preprocessToDom( $origText );
4040  $node = $root->getFirstChild();
4041  $byteOffset = 0;
4042  $tocraw = [];
4043  $refers = [];
4044 
4045  $headlines = $numMatches !== false ? $matches[3] : [];
4046 
4047  foreach ( $headlines as $headline ) {
4048  $isTemplate = false;
4049  $titleText = false;
4050  $sectionIndex = false;
4051  $numbering = '';
4052  $markerMatches = [];
4053  if ( preg_match( "/^$markerRegex/", $headline, $markerMatches ) ) {
4054  $serial = $markerMatches[1];
4055  list( $titleText, $sectionIndex ) = $this->mHeadings[$serial];
4056  $isTemplate = ( $titleText != $baseTitleText );
4057  $headline = preg_replace( "/^$markerRegex\\s*/", "", $headline );
4058  }
4059 
4060  if ( $toclevel ) {
4061  $prevlevel = $level;
4062  }
4063  $level = $matches[1][$headlineCount];
4064 
4065  if ( $level > $prevlevel ) {
4066  # Increase TOC level
4067  $toclevel++;
4068  $sublevelCount[$toclevel] = 0;
4069  if ( $toclevel < $wgMaxTocLevel ) {
4070  $prevtoclevel = $toclevel;
4071  $toc .= Linker::tocIndent();
4072  $numVisible++;
4073  }
4074  } elseif ( $level < $prevlevel && $toclevel > 1 ) {
4075  # Decrease TOC level, find level to jump to
4076 
4077  for ( $i = $toclevel; $i > 0; $i-- ) {
4078  if ( $levelCount[$i] == $level ) {
4079  # Found last matching level
4080  $toclevel = $i;
4081  break;
4082  } elseif ( $levelCount[$i] < $level ) {
4083  # Found first matching level below current level
4084  $toclevel = $i + 1;
4085  break;
4086  }
4087  }
4088  if ( $i == 0 ) {
4089  $toclevel = 1;
4090  }
4091  if ( $toclevel < $wgMaxTocLevel ) {
4092  if ( $prevtoclevel < $wgMaxTocLevel ) {
4093  # Unindent only if the previous toc level was shown :p
4094  $toc .= Linker::tocUnindent( $prevtoclevel - $toclevel );
4095  $prevtoclevel = $toclevel;
4096  } else {
4097  $toc .= Linker::tocLineEnd();
4098  }
4099  }
4100  } else {
4101  # No change in level, end TOC line
4102  if ( $toclevel < $wgMaxTocLevel ) {
4103  $toc .= Linker::tocLineEnd();
4104  }
4105  }
4106 
4107  $levelCount[$toclevel] = $level;
4108 
4109  # count number of headlines for each level
4110  $sublevelCount[$toclevel]++;
4111  $dot = 0;
4112  for ( $i = 1; $i <= $toclevel; $i++ ) {
4113  if ( !empty( $sublevelCount[$i] ) ) {
4114  if ( $dot ) {
4115  $numbering .= '.';
4116  }
4117  $numbering .= $this->getTargetLanguage()->formatNum( $sublevelCount[$i] );
4118  $dot = 1;
4119  }
4120  }
4121 
4122  # The safe header is a version of the header text safe to use for links
4123 
4124  # Remove link placeholders by the link text.
4125  # <!--LINK number-->
4126  # turns into
4127  # link text with suffix
4128  # Do this before unstrip since link text can contain strip markers
4129  $safeHeadline = $this->replaceLinkHoldersText( $headline );
4130 
4131  # Avoid insertion of weird stuff like <math> by expanding the relevant sections
4132  $safeHeadline = $this->mStripState->unstripBoth( $safeHeadline );
4133 
4134  # Strip out HTML (first regex removes any tag not allowed)
4135  # Allowed tags are:
4136  # * <sup> and <sub> (bug 8393)
4137  # * <i> (bug 26375)
4138  # * <b> (r105284)
4139  # * <bdi> (bug 72884)
4140  # * <span dir="rtl"> and <span dir="ltr"> (bug 35167)
4141  # * <s> and <strike> (T35715)
4142  # We strip any parameter from accepted tags (second regex), except dir="rtl|ltr" from <span>,
4143  # to allow setting directionality in toc items.
4144  $tocline = preg_replace(
4145  [
4146  '#<(?!/?(span|sup|sub|bdi|i|b|s|strike)(?: [^>]*)?>).*?>#',
4147  '#<(/?(?:span(?: dir="(?:rtl|ltr)")?|sup|sub|bdi|i|b|s|strike))(?: .*?)?>#'
4148  ],
4149  [ '', '<$1>' ],
4150  $safeHeadline
4151  );
4152 
4153  # Strip '<span></span>', which is the result from the above if
4154  # <span id="foo"></span> is used to produce an additional anchor
4155  # for a section.
4156  $tocline = str_replace( '<span></span>', '', $tocline );
4157 
4158  $tocline = trim( $tocline );
4159 
4160  # For the anchor, strip out HTML-y stuff period
4161  $safeHeadline = preg_replace( '/<.*?>/', '', $safeHeadline );
4162  $safeHeadline = Sanitizer::normalizeSectionNameWhitespace( $safeHeadline );
4163 
4164  # Save headline for section edit hint before it's escaped
4165  $headlineHint = $safeHeadline;
4166 
4167  if ( $wgExperimentalHtmlIds ) {
4168  # For reverse compatibility, provide an id that's
4169  # HTML4-compatible, like we used to.
4170  # It may be worth noting, academically, that it's possible for
4171  # the legacy anchor to conflict with a non-legacy headline
4172  # anchor on the page. In this case likely the "correct" thing
4173  # would be to either drop the legacy anchors or make sure
4174  # they're numbered first. However, this would require people
4175  # to type in section names like "abc_.D7.93.D7.90.D7.A4"
4176  # manually, so let's not bother worrying about it.
4177  $legacyHeadline = Sanitizer::escapeId( $safeHeadline,
4178  [ 'noninitial', 'legacy' ] );
4179  $safeHeadline = Sanitizer::escapeId( $safeHeadline );
4180 
4181  if ( $legacyHeadline == $safeHeadline ) {
4182  # No reason to have both (in fact, we can't)
4183  $legacyHeadline = false;
4184  }
4185  } else {
4186  $legacyHeadline = false;
4187  $safeHeadline = Sanitizer::escapeId( $safeHeadline,
4188  'noninitial' );
4189  }
4190 
4191  # HTML names must be case-insensitively unique (bug 10721).
4192  # This does not apply to Unicode characters per
4193  # http://www.w3.org/TR/html5/infrastructure.html#case-sensitivity-and-string-comparison
4194  # @todo FIXME: We may be changing them depending on the current locale.
4195  $arrayKey = strtolower( $safeHeadline );
4196  if ( $legacyHeadline === false ) {
4197  $legacyArrayKey = false;
4198  } else {
4199  $legacyArrayKey = strtolower( $legacyHeadline );
4200  }
4201 
4202  # Create the anchor for linking from the TOC to the section
4203  $anchor = $safeHeadline;
4204  $legacyAnchor = $legacyHeadline;
4205  if ( isset( $refers[$arrayKey] ) ) {
4206  // @codingStandardsIgnoreStart
4207  for ( $i = 2; isset( $refers["${arrayKey}_$i"] ); ++$i );
4208  // @codingStandardsIgnoreEnd
4209  $anchor .= "_$i";
4210  $refers["${arrayKey}_$i"] = true;
4211  } else {
4212  $refers[$arrayKey] = true;
4213  }
4214  if ( $legacyHeadline !== false && isset( $refers[$legacyArrayKey] ) ) {
4215  // @codingStandardsIgnoreStart
4216  for ( $i = 2; isset( $refers["${legacyArrayKey}_$i"] ); ++$i );
4217  // @codingStandardsIgnoreEnd
4218  $legacyAnchor .= "_$i";
4219  $refers["${legacyArrayKey}_$i"] = true;
4220  } else {
4221  $refers[$legacyArrayKey] = true;
4222  }
4223 
4224  # Don't number the heading if it is the only one (looks silly)
4225  if ( count( $matches[3] ) > 1 && $this->mOptions->getNumberHeadings() ) {
4226  # the two are different if the line contains a link
4227  $headline = Html::element(
4228  'span',
4229  [ 'class' => 'mw-headline-number' ],
4230  $numbering
4231  ) . ' ' . $headline;
4232  }
4233 
4234  if ( $enoughToc && ( !isset( $wgMaxTocLevel ) || $toclevel < $wgMaxTocLevel ) ) {
4235  $toc .= Linker::tocLine( $anchor, $tocline,
4236  $numbering, $toclevel, ( $isTemplate ? false : $sectionIndex ) );
4237  }
4238 
4239  # Add the section to the section tree
4240  # Find the DOM node for this header
4241  $noOffset = ( $isTemplate || $sectionIndex === false );
4242  while ( $node && !$noOffset ) {
4243  if ( $node->getName() === 'h' ) {
4244  $bits = $node->splitHeading();
4245  if ( $bits['i'] == $sectionIndex ) {
4246  break;
4247  }
4248  }
4249  $byteOffset += mb_strlen( $this->mStripState->unstripBoth(
4250  $frame->expand( $node, PPFrame::RECOVER_ORIG ) ) );
4251  $node = $node->getNextSibling();
4252  }
4253  $tocraw[] = [
4254  'toclevel' => $toclevel,
4255  'level' => $level,
4256  'line' => $tocline,
4257  'number' => $numbering,
4258  'index' => ( $isTemplate ? 'T-' : '' ) . $sectionIndex,
4259  'fromtitle' => $titleText,
4260  'byteoffset' => ( $noOffset ? null : $byteOffset ),
4261  'anchor' => $anchor,
4262  ];
4263 
4264  # give headline the correct <h#> tag
4265  if ( $maybeShowEditLink && $sectionIndex !== false ) {
4266  // Output edit section links as markers with styles that can be customized by skins
4267  if ( $isTemplate ) {
4268  # Put a T flag in the section identifier, to indicate to extractSections()
4269  # that sections inside <includeonly> should be counted.
4270  $editsectionPage = $titleText;
4271  $editsectionSection = "T-$sectionIndex";
4272  $editsectionContent = null;
4273  } else {
4274  $editsectionPage = $this->mTitle->getPrefixedText();
4275  $editsectionSection = $sectionIndex;
4276  $editsectionContent = $headlineHint;
4277  }
4278  // We use a bit of pesudo-xml for editsection markers. The
4279  // language converter is run later on. Using a UNIQ style marker
4280  // leads to the converter screwing up the tokens when it
4281  // converts stuff. And trying to insert strip tags fails too. At
4282  // this point all real inputted tags have already been escaped,
4283  // so we don't have to worry about a user trying to input one of
4284  // these markers directly. We use a page and section attribute
4285  // to stop the language converter from converting these
4286  // important bits of data, but put the headline hint inside a
4287  // content block because the language converter is supposed to
4288  // be able to convert that piece of data.
4289  // Gets replaced with html in ParserOutput::getText
4290  $editlink = '<mw:editsection page="' . htmlspecialchars( $editsectionPage );
4291  $editlink .= '" section="' . htmlspecialchars( $editsectionSection ) . '"';
4292  if ( $editsectionContent !== null ) {
4293  $editlink .= '>' . $editsectionContent . '</mw:editsection>';
4294  } else {
4295  $editlink .= '/>';
4296  }
4297  } else {
4298  $editlink = '';
4299  }
4300  $head[$headlineCount] = Linker::makeHeadline( $level,
4301  $matches['attrib'][$headlineCount], $anchor, $headline,
4302  $editlink, $legacyAnchor );
4303 
4304  $headlineCount++;
4305  }
4306 
4307  $this->setOutputType( $oldType );
4308 
4309  # Never ever show TOC if no headers
4310  if ( $numVisible < 1 ) {
4311  $enoughToc = false;
4312  }
4313 
4314  if ( $enoughToc ) {
4315  if ( $prevtoclevel > 0 && $prevtoclevel < $wgMaxTocLevel ) {
4316  $toc .= Linker::tocUnindent( $prevtoclevel - 1 );
4317  }
4318  $toc = Linker::tocList( $toc, $this->mOptions->getUserLangObj() );
4319  $this->mOutput->setTOCHTML( $toc );
4320  $toc = self::TOC_START . $toc . self::TOC_END;
4321  $this->mOutput->addModules( 'mediawiki.toc' );
4322  }
4323 
4324  if ( $isMain ) {
4325  $this->mOutput->setSections( $tocraw );
4326  }
4327 
4328  # split up and insert constructed headlines
4329  $blocks = preg_split( '/<H[1-6].*?>[\s\S]*?<\/H[1-6]>/i', $text );
4330  $i = 0;
4331 
4332  // build an array of document sections
4333  $sections = [];
4334  foreach ( $blocks as $block ) {
4335  // $head is zero-based, sections aren't.
4336  if ( empty( $head[$i - 1] ) ) {
4337  $sections[$i] = $block;
4338  } else {
4339  $sections[$i] = $head[$i - 1] . $block;
4340  }
4341 
4352  Hooks::run( 'ParserSectionCreate', [ $this, $i, &$sections[$i], $showEditLink ] );
4353 
4354  $i++;
4355  }
4356 
4357  if ( $enoughToc && $isMain && !$this->mForceTocPosition ) {
4358  // append the TOC at the beginning
4359  // Top anchor now in skin
4360  $sections[0] = $sections[0] . $toc . "\n";
4361  }
4362 
4363  $full .= implode( '', $sections );
4364 
4365  if ( $this->mForceTocPosition ) {
4366  return str_replace( '<!--MWTOC-->', $toc, $full );
4367  } else {
4368  return $full;
4369  }
4370  }
4371 
4383  public function preSaveTransform( $text, Title $title, User $user,
4384  ParserOptions $options, $clearState = true
4385  ) {
4386  if ( $clearState ) {
4387  $magicScopeVariable = $this->lock();
4388  }
4389  $this->startParse( $title, $options, self::OT_WIKI, $clearState );
4390  $this->setUser( $user );
4391 
4392  // We still normalize line endings for backwards-compatibility
4393  // with other code that just calls PST, but this should already
4394  // be handled in TextContent subclasses
4395  $text = TextContent::normalizeLineEndings( $text );
4396 
4397  if ( $options->getPreSaveTransform() ) {
4398  $text = $this->pstPass2( $text, $user );
4399  }
4400  $text = $this->mStripState->unstripBoth( $text );
4401 
4402  $this->setUser( null ); # Reset
4403 
4404  return $text;
4405  }
4406 
4415  private function pstPass2( $text, $user ) {
4417 
4418  # Note: This is the timestamp saved as hardcoded wikitext to
4419  # the database, we use $wgContLang here in order to give
4420  # everyone the same signature and use the default one rather
4421  # than the one selected in each user's preferences.
4422  # (see also bug 12815)
4423  $ts = $this->mOptions->getTimestamp();
4425  $ts = $timestamp->format( 'YmdHis' );
4426  $tzMsg = $timestamp->getTimezoneMessage()->inContentLanguage()->text();
4427 
4428  $d = $wgContLang->timeanddate( $ts, false, false ) . " ($tzMsg)";
4429 
4430  # Variable replacement
4431  # Because mOutputType is OT_WIKI, this will only process {{subst:xxx}} type tags
4432  $text = $this->replaceVariables( $text );
4433 
4434  # This works almost by chance, as the replaceVariables are done before the getUserSig(),
4435  # which may corrupt this parser instance via its wfMessage()->text() call-
4436 
4437  # Signatures
4438  $sigText = $this->getUserSig( $user );
4439  $text = strtr( $text, [
4440  '~~~~~' => $d,
4441  '~~~~' => "$sigText $d",
4442  '~~~' => $sigText
4443  ] );
4444 
4445  # Context links ("pipe tricks"): [[|name]] and [[name (context)|]]
4446  $tc = '[' . Title::legalChars() . ']';
4447  $nc = '[ _0-9A-Za-z\x80-\xff-]'; # Namespaces can use non-ascii!
4448 
4449  // [[ns:page (context)|]]
4450  $p1 = "/\[\[(:?$nc+:|:|)($tc+?)( ?\\($tc+\\))\\|]]/";
4451  // [[ns:page(context)|]] (double-width brackets, added in r40257)
4452  $p4 = "/\[\[(:?$nc+:|:|)($tc+?)( ?($tc+))\\|]]/";
4453  // [[ns:page (context), context|]] (using either single or double-width comma)
4454  $p3 = "/\[\[(:?$nc+:|:|)($tc+?)( ?\\($tc+\\)|)((?:, |,)$tc+|)\\|]]/";
4455  // [[|page]] (reverse pipe trick: add context from page title)
4456  $p2 = "/\[\[\\|($tc+)]]/";
4457 
4458  # try $p1 first, to turn "[[A, B (C)|]]" into "[[A, B (C)|A, B]]"
4459  $text = preg_replace( $p1, '[[\\1\\2\\3|\\2]]', $text );
4460  $text = preg_replace( $p4, '[[\\1\\2\\3|\\2]]', $text );
4461  $text = preg_replace( $p3, '[[\\1\\2\\3\\4|\\2]]', $text );
4462 
4463  $t = $this->mTitle->getText();
4464  $m = [];
4465  if ( preg_match( "/^($nc+:|)$tc+?( \\($tc+\\))$/", $t, $m ) ) {
4466  $text = preg_replace( $p2, "[[$m[1]\\1$m[2]|\\1]]", $text );
4467  } elseif ( preg_match( "/^($nc+:|)$tc+?(, $tc+|)$/", $t, $m ) && "$m[1]$m[2]" != '' ) {
4468  $text = preg_replace( $p2, "[[$m[1]\\1$m[2]|\\1]]", $text );
4469  } else {
4470  # if there's no context, don't bother duplicating the title
4471  $text = preg_replace( $p2, '[[\\1]]', $text );
4472  }
4473 
4474  return $text;
4475  }
4476 
4491  public function getUserSig( &$user, $nickname = false, $fancySig = null ) {
4493 
4494  $username = $user->getName();
4495 
4496  # If not given, retrieve from the user object.
4497  if ( $nickname === false ) {
4498  $nickname = $user->getOption( 'nickname' );
4499  }
4500 
4501  if ( is_null( $fancySig ) ) {
4502  $fancySig = $user->getBoolOption( 'fancysig' );
4503  }
4504 
4505  $nickname = $nickname == null ? $username : $nickname;
4506 
4507  if ( mb_strlen( $nickname ) > $wgMaxSigChars ) {
4508  $nickname = $username;
4509  wfDebug( __METHOD__ . ": $username has overlong signature.\n" );
4510  } elseif ( $fancySig !== false ) {
4511  # Sig. might contain markup; validate this
4512  if ( $this->validateSig( $nickname ) !== false ) {
4513  # Validated; clean up (if needed) and return it
4514  return $this->cleanSig( $nickname, true );
4515  } else {
4516  # Failed to validate; fall back to the default
4517  $nickname = $username;
4518  wfDebug( __METHOD__ . ": $username has bad XML tags in signature.\n" );
4519  }
4520  }
4521 
4522  # Make sure nickname doesnt get a sig in a sig
4523  $nickname = self::cleanSigInSig( $nickname );
4524 
4525  # If we're still here, make it a link to the user page
4526  $userText = wfEscapeWikiText( $username );
4527  $nickText = wfEscapeWikiText( $nickname );
4528  $msgName = $user->isAnon() ? 'signature-anon' : 'signature';
4529 
4530  return wfMessage( $msgName, $userText, $nickText )->inContentLanguage()
4531  ->title( $this->getTitle() )->text();
4532  }
4533 
4540  public function validateSig( $text ) {
4541  return Xml::isWellFormedXmlFragment( $text ) ? $text : false;
4542  }
4543 
4554  public function cleanSig( $text, $parsing = false ) {
4555  if ( !$parsing ) {
4556  global $wgTitle;
4557  $magicScopeVariable = $this->lock();
4558  $this->startParse( $wgTitle, new ParserOptions, self::OT_PREPROCESS, true );
4559  }
4560 
4561  # Option to disable this feature
4562  if ( !$this->mOptions->getCleanSignatures() ) {
4563  return $text;
4564  }
4565 
4566  # @todo FIXME: Regex doesn't respect extension tags or nowiki
4567  # => Move this logic to braceSubstitution()
4568  $substWord = MagicWord::get( 'subst' );
4569  $substRegex = '/\{\{(?!(?:' . $substWord->getBaseRegex() . '))/x' . $substWord->getRegexCase();
4570  $substText = '{{' . $substWord->getSynonym( 0 );
4571 
4572  $text = preg_replace( $substRegex, $substText, $text );
4573  $text = self::cleanSigInSig( $text );
4574  $dom = $this->preprocessToDom( $text );
4575  $frame = $this->getPreprocessor()->newFrame();
4576  $text = $frame->expand( $dom );
4577 
4578  if ( !$parsing ) {
4579  $text = $this->mStripState->unstripBoth( $text );
4580  }
4581 
4582  return $text;
4583  }
4584 
4591  public static function cleanSigInSig( $text ) {
4592  $text = preg_replace( '/~{3,5}/', '', $text );
4593  return $text;
4594  }
4595 
4606  $outputType, $clearState = true
4607  ) {
4608  $this->startParse( $title, $options, $outputType, $clearState );
4609  }
4610 
4617  private function startParse( Title $title = null, ParserOptions $options,
4618  $outputType, $clearState = true
4619  ) {
4620  $this->setTitle( $title );
4621  $this->mOptions = $options;
4622  $this->setOutputType( $outputType );
4623  if ( $clearState ) {
4624  $this->clearState();
4625  }
4626  }
4627 
4636  public function transformMsg( $text, $options, $title = null ) {
4637  static $executing = false;
4638 
4639  # Guard against infinite recursion
4640  if ( $executing ) {
4641  return $text;
4642  }
4643  $executing = true;
4644 
4645  if ( !$title ) {
4646  global $wgTitle;
4647  $title = $wgTitle;
4648  }
4649 
4650  $text = $this->preprocess( $text, $title, $options );
4651 
4652  $executing = false;
4653  return $text;
4654  }
4655 
4680  public function setHook( $tag, $callback ) {
4681  $tag = strtolower( $tag );
4682  if ( preg_match( '/[<>\r\n]/', $tag, $m ) ) {
4683  throw new MWException( "Invalid character {$m[0]} in setHook('$tag', ...) call" );
4684  }
4685  $oldVal = isset( $this->mTagHooks[$tag] ) ? $this->mTagHooks[$tag] : null;
4686  $this->mTagHooks[$tag] = $callback;
4687  if ( !in_array( $tag, $this->mStripList ) ) {
4688  $this->mStripList[] = $tag;
4689  }
4690 
4691  return $oldVal;
4692  }
4693 
4711  public function setTransparentTagHook( $tag, $callback ) {
4712  $tag = strtolower( $tag );
4713  if ( preg_match( '/[<>\r\n]/', $tag, $m ) ) {
4714  throw new MWException( "Invalid character {$m[0]} in setTransparentHook('$tag', ...) call" );
4715  }
4716  $oldVal = isset( $this->mTransparentTagHooks[$tag] ) ? $this->mTransparentTagHooks[$tag] : null;
4717  $this->mTransparentTagHooks[$tag] = $callback;
4718 
4719  return $oldVal;
4720  }
4721 
4725  public function clearTagHooks() {
4726  $this->mTagHooks = [];
4727  $this->mFunctionTagHooks = [];
4728  $this->mStripList = $this->mDefaultStripList;
4729  }
4730 
4774  public function setFunctionHook( $id, $callback, $flags = 0 ) {
4776 
4777  $oldVal = isset( $this->mFunctionHooks[$id] ) ? $this->mFunctionHooks[$id][0] : null;
4778  $this->mFunctionHooks[$id] = [ $callback, $flags ];
4779 
4780  # Add to function cache
4781  $mw = MagicWord::get( $id );
4782  if ( !$mw ) {
4783  throw new MWException( __METHOD__ . '() expecting a magic word identifier.' );
4784  }
4785 
4786  $synonyms = $mw->getSynonyms();
4787  $sensitive = intval( $mw->isCaseSensitive() );
4788 
4789  foreach ( $synonyms as $syn ) {
4790  # Case
4791  if ( !$sensitive ) {
4792  $syn = $wgContLang->lc( $syn );
4793  }
4794  # Add leading hash
4795  if ( !( $flags & self::SFH_NO_HASH ) ) {
4796  $syn = '#' . $syn;
4797  }
4798  # Remove trailing colon
4799  if ( substr( $syn, -1, 1 ) === ':' ) {
4800  $syn = substr( $syn, 0, -1 );
4801  }
4802  $this->mFunctionSynonyms[$sensitive][$syn] = $id;
4803  }
4804  return $oldVal;
4805  }
4806 
4812  public function getFunctionHooks() {
4813  return array_keys( $this->mFunctionHooks );
4814  }
4815 
4826  public function setFunctionTagHook( $tag, $callback, $flags ) {
4827  $tag = strtolower( $tag );
4828  if ( preg_match( '/[<>\r\n]/', $tag, $m ) ) {
4829  throw new MWException( "Invalid character {$m[0]} in setFunctionTagHook('$tag', ...) call" );
4830  }
4831  $old = isset( $this->mFunctionTagHooks[$tag] ) ?
4832  $this->mFunctionTagHooks[$tag] : null;
4833  $this->mFunctionTagHooks[$tag] = [ $callback, $flags ];
4834 
4835  if ( !in_array( $tag, $this->mStripList ) ) {
4836  $this->mStripList[] = $tag;
4837  }
4838 
4839  return $old;
4840  }
4841 
4849  public function replaceLinkHolders( &$text, $options = 0 ) {
4850  $this->mLinkHolders->replace( $text );
4851  }
4852 
4860  public function replaceLinkHoldersText( $text ) {
4861  return $this->mLinkHolders->replaceText( $text );
4862  }
4863 
4877  public function renderImageGallery( $text, $params ) {
4878 
4879  $mode = false;
4880  if ( isset( $params['mode'] ) ) {
4881  $mode = $params['mode'];
4882  }
4883 
4884  try {
4885  $ig = ImageGalleryBase::factory( $mode );
4886  } catch ( Exception $e ) {
4887  // If invalid type set, fallback to default.
4888  $ig = ImageGalleryBase::factory( false );
4889  }
4890 
4891  $ig->setContextTitle( $this->mTitle );
4892  $ig->setShowBytes( false );
4893  $ig->setShowFilename( false );
4894  $ig->setParser( $this );
4895  $ig->setHideBadImages();
4896  $ig->setAttributes( Sanitizer::validateTagAttributes( $params, 'table' ) );
4897 
4898  if ( isset( $params['showfilename'] ) ) {
4899  $ig->setShowFilename( true );
4900  } else {
4901  $ig->setShowFilename( false );
4902  }
4903  if ( isset( $params['caption'] ) ) {
4904  $caption = $params['caption'];
4905  $caption = htmlspecialchars( $caption );
4906  $caption = $this->replaceInternalLinks( $caption );
4907  $ig->setCaptionHtml( $caption );
4908  }
4909  if ( isset( $params['perrow'] ) ) {
4910  $ig->setPerRow( $params['perrow'] );
4911  }
4912  if ( isset( $params['widths'] ) ) {
4913  $ig->setWidths( $params['widths'] );
4914  }
4915  if ( isset( $params['heights'] ) ) {
4916  $ig->setHeights( $params['heights'] );
4917  }
4918  $ig->setAdditionalOptions( $params );
4919 
4920  Hooks::run( 'BeforeParserrenderImageGallery', [ &$this, &$ig ] );
4921 
4922  $lines = StringUtils::explode( "\n", $text );
4923  foreach ( $lines as $line ) {
4924  # match lines like these:
4925  # Image:someimage.jpg|This is some image
4926  $matches = [];
4927  preg_match( "/^([^|]+)(\\|(.*))?$/", $line, $matches );
4928  # Skip empty lines
4929  if ( count( $matches ) == 0 ) {
4930  continue;
4931  }
4932 
4933  if ( strpos( $matches[0], '%' ) !== false ) {
4934  $matches[1] = rawurldecode( $matches[1] );
4935  }
4937  if ( is_null( $title ) ) {
4938  # Bogus title. Ignore these so we don't bomb out later.
4939  continue;
4940  }
4941 
4942  # We need to get what handler the file uses, to figure out parameters.
4943  # Note, a hook can overide the file name, and chose an entirely different
4944  # file (which potentially could be of a different type and have different handler).
4945  $options = [];
4946  $descQuery = false;
4947  Hooks::run( 'BeforeParserFetchFileAndTitle',
4948  [ $this, $title, &$options, &$descQuery ] );
4949  # Don't register it now, as ImageGallery does that later.
4950  $file = $this->fetchFileNoRegister( $title, $options );
4951  $handler = $file ? $file->getHandler() : false;
4952 
4953  $paramMap = [
4954  'img_alt' => 'gallery-internal-alt',
4955  'img_link' => 'gallery-internal-link',
4956  ];
4957  if ( $handler ) {
4958  $paramMap = $paramMap + $handler->getParamMap();
4959  // We don't want people to specify per-image widths.
4960  // Additionally the width parameter would need special casing anyhow.
4961  unset( $paramMap['img_width'] );
4962  }
4963 
4964  $mwArray = new MagicWordArray( array_keys( $paramMap ) );
4965 
4966  $label = '';
4967  $alt = '';
4968  $link = '';
4969  $handlerOptions = [];
4970  if ( isset( $matches[3] ) ) {
4971  // look for an |alt= definition while trying not to break existing
4972  // captions with multiple pipes (|) in it, until a more sensible grammar
4973  // is defined for images in galleries
4974 
4975  // FIXME: Doing recursiveTagParse at this stage, and the trim before
4976  // splitting on '|' is a bit odd, and different from makeImage.
4977  $matches[3] = $this->recursiveTagParse( trim( $matches[3] ) );
4978  $parameterMatches = StringUtils::explode( '|', $matches[3] );
4979 
4980  foreach ( $parameterMatches as $parameterMatch ) {
4981  list( $magicName, $match ) = $mwArray->matchVariableStartToEnd( $parameterMatch );
4982  if ( $magicName ) {
4983  $paramName = $paramMap[$magicName];
4984 
4985  switch ( $paramName ) {
4986  case 'gallery-internal-alt':
4987  $alt = $this->stripAltText( $match, false );
4988  break;
4989  case 'gallery-internal-link':
4990  $linkValue = strip_tags( $this->replaceLinkHoldersText( $match ) );
4991  $chars = self::EXT_LINK_URL_CLASS;
4992  $addr = self::EXT_LINK_ADDR;
4993  $prots = $this->mUrlProtocols;
4994  // check to see if link matches an absolute url, if not then it must be a wiki link.
4995  if ( preg_match( "/^($prots)$addr$chars*$/u", $linkValue ) ) {
4996  $link = $linkValue;
4997  } else {
4998  $localLinkTitle = Title::newFromText( $linkValue );
4999  if ( $localLinkTitle !== null ) {
5000  $link = $localLinkTitle->getLinkURL();
5001  }
5002  }
5003  break;
5004  default:
5005  // Must be a handler specific parameter.
5006  if ( $handler->validateParam( $paramName, $match ) ) {
5007  $handlerOptions[$paramName] = $match;
5008  } else {
5009  // Guess not, consider it as caption.
5010  wfDebug( "$parameterMatch failed parameter validation\n" );
5011  $label = '|' . $parameterMatch;
5012  }
5013  }
5014 
5015  } else {
5016  // Last pipe wins.
5017  $label = '|' . $parameterMatch;
5018  }
5019  }
5020  // Remove the pipe.
5021  $label = substr( $label, 1 );
5022  }
5023 
5024  $ig->add( $title, $label, $alt, $link, $handlerOptions );
5025  }
5026  $html = $ig->toHTML();
5027  Hooks::run( 'AfterParserFetchFileAndTitle', [ $this, $ig, &$html ] );
5028  return $html;
5029  }
5030 
5035  public function getImageParams( $handler ) {
5036  if ( $handler ) {
5037  $handlerClass = get_class( $handler );
5038  } else {
5039  $handlerClass = '';
5040  }
5041  if ( !isset( $this->mImageParams[$handlerClass] ) ) {
5042  # Initialise static lists
5043  static $internalParamNames = [
5044  'horizAlign' => [ 'left', 'right', 'center', 'none' ],
5045  'vertAlign' => [ 'baseline', 'sub', 'super', 'top', 'text-top', 'middle',
5046  'bottom', 'text-bottom' ],
5047  'frame' => [ 'thumbnail', 'manualthumb', 'framed', 'frameless',
5048  'upright', 'border', 'link', 'alt', 'class' ],
5049  ];
5050  static $internalParamMap;
5051  if ( !$internalParamMap ) {
5052  $internalParamMap = [];
5053  foreach ( $internalParamNames as $type => $names ) {
5054  foreach ( $names as $name ) {
5055  $magicName = str_replace( '-', '_', "img_$name" );
5056  $internalParamMap[$magicName] = [ $type, $name ];
5057  }
5058  }
5059  }
5060 
5061  # Add handler params
5062  $paramMap = $internalParamMap;
5063  if ( $handler ) {
5064  $handlerParamMap = $handler->getParamMap();
5065  foreach ( $handlerParamMap as $magic => $paramName ) {
5066  $paramMap[$magic] = [ 'handler', $paramName ];
5067  }
5068  }
5069  $this->mImageParams[$handlerClass] = $paramMap;
5070  $this->mImageParamsMagicArray[$handlerClass] = new MagicWordArray( array_keys( $paramMap ) );
5071  }
5072  return [ $this->mImageParams[$handlerClass], $this->mImageParamsMagicArray[$handlerClass] ];
5073  }
5074 
5083  public function makeImage( $title, $options, $holders = false ) {
5084  # Check if the options text is of the form "options|alt text"
5085  # Options are:
5086  # * thumbnail make a thumbnail with enlarge-icon and caption, alignment depends on lang
5087  # * left no resizing, just left align. label is used for alt= only
5088  # * right same, but right aligned
5089  # * none same, but not aligned
5090  # * ___px scale to ___ pixels width, no aligning. e.g. use in taxobox
5091  # * center center the image
5092  # * frame Keep original image size, no magnify-button.
5093  # * framed Same as "frame"
5094  # * frameless like 'thumb' but without a frame. Keeps user preferences for width
5095  # * upright reduce width for upright images, rounded to full __0 px
5096  # * border draw a 1px border around the image
5097  # * alt Text for HTML alt attribute (defaults to empty)
5098  # * class Set a class for img node
5099  # * link Set the target of the image link. Can be external, interwiki, or local
5100  # vertical-align values (no % or length right now):
5101  # * baseline
5102  # * sub
5103  # * super
5104  # * top
5105  # * text-top
5106  # * middle
5107  # * bottom
5108  # * text-bottom
5109 
5110  $parts = StringUtils::explode( "|", $options );
5111 
5112  # Give extensions a chance to select the file revision for us
5113  $options = [];
5114  $descQuery = false;
5115  Hooks::run( 'BeforeParserFetchFileAndTitle',
5116  [ $this, $title, &$options, &$descQuery ] );
5117  # Fetch and register the file (file title may be different via hooks)
5118  list( $file, $title ) = $this->fetchFileAndTitle( $title, $options );
5119 
5120  # Get parameter map
5121  $handler = $file ? $file->getHandler() : false;
5122 
5123  list( $paramMap, $mwArray ) = $this->getImageParams( $handler );
5124 
5125  if ( !$file ) {
5126  $this->addTrackingCategory( 'broken-file-category' );
5127  }
5128 
5129  # Process the input parameters
5130  $caption = '';
5131  $params = [ 'frame' => [], 'handler' => [],
5132  'horizAlign' => [], 'vertAlign' => [] ];
5133  $seenformat = false;
5134  foreach ( $parts as $part ) {
5135  $part = trim( $part );
5136  list( $magicName, $value ) = $mwArray->matchVariableStartToEnd( $part );
5137  $validated = false;
5138  if ( isset( $paramMap[$magicName] ) ) {
5139  list( $type, $paramName ) = $paramMap[$magicName];
5140 
5141  # Special case; width and height come in one variable together
5142  if ( $type === 'handler' && $paramName === 'width' ) {
5143  $parsedWidthParam = $this->parseWidthParam( $value );
5144  if ( isset( $parsedWidthParam['width'] ) ) {
5145  $width = $parsedWidthParam['width'];
5146  if ( $handler->validateParam( 'width', $width ) ) {
5147  $params[$type]['width'] = $width;
5148  $validated = true;
5149  }
5150  }
5151  if ( isset( $parsedWidthParam['height'] ) ) {
5152  $height = $parsedWidthParam['height'];
5153  if ( $handler->validateParam( 'height', $height ) ) {
5154  $params[$type]['height'] = $height;
5155  $validated = true;
5156  }
5157  }
5158  # else no validation -- bug 13436
5159  } else {
5160  if ( $type === 'handler' ) {
5161  # Validate handler parameter
5162  $validated = $handler->validateParam( $paramName, $value );
5163  } else {
5164  # Validate internal parameters
5165  switch ( $paramName ) {
5166  case 'manualthumb':
5167  case 'alt':
5168  case 'class':
5169  # @todo FIXME: Possibly check validity here for
5170  # manualthumb? downstream behavior seems odd with
5171  # missing manual thumbs.
5172  $validated = true;
5173  $value = $this->stripAltText( $value, $holders );
5174  break;
5175  case 'link':
5176  $chars = self::EXT_LINK_URL_CLASS;
5177  $addr = self::EXT_LINK_ADDR;
5178  $prots = $this->mUrlProtocols;
5179  if ( $value === '' ) {
5180  $paramName = 'no-link';
5181  $value = true;
5182  $validated = true;
5183  } elseif ( preg_match( "/^((?i)$prots)/", $value ) ) {
5184  if ( preg_match( "/^((?i)$prots)$addr$chars*$/u", $value, $m ) ) {
5185  $paramName = 'link-url';
5186  $this->mOutput->addExternalLink( $value );
5187  if ( $this->mOptions->getExternalLinkTarget() ) {
5188  $params[$type]['link-target'] = $this->mOptions->getExternalLinkTarget();
5189  }
5190  $validated = true;
5191  }
5192  } else {
5193  $linkTitle = Title::newFromText( $value );
5194  if ( $linkTitle ) {
5195  $paramName = 'link-title';
5196  $value = $linkTitle;
5197  $this->mOutput->addLink( $linkTitle );
5198  $validated = true;
5199  }
5200  }
5201  break;
5202  case 'frameless':
5203  case 'framed':
5204  case 'thumbnail':
5205  // use first appearing option, discard others.
5206  $validated = ! $seenformat;
5207  $seenformat = true;
5208  break;
5209  default:
5210  # Most other things appear to be empty or numeric...
5211  $validated = ( $value === false || is_numeric( trim( $value ) ) );
5212  }
5213  }
5214 
5215  if ( $validated ) {
5216  $params[$type][$paramName] = $value;
5217  }
5218  }
5219  }
5220  if ( !$validated ) {
5221  $caption = $part;
5222  }
5223  }
5224 
5225  # Process alignment parameters
5226  if ( $params['horizAlign'] ) {
5227  $params['frame']['align'] = key( $params['horizAlign'] );
5228  }
5229  if ( $params['vertAlign'] ) {
5230  $params['frame']['valign'] = key( $params['vertAlign'] );
5231  }
5232 
5233  $params['frame']['caption'] = $caption;
5234 
5235  # Will the image be presented in a frame, with the caption below?
5236  $imageIsFramed = isset( $params['frame']['frame'] )
5237  || isset( $params['frame']['framed'] )
5238  || isset( $params['frame']['thumbnail'] )
5239  || isset( $params['frame']['manualthumb'] );
5240 
5241  # In the old days, [[Image:Foo|text...]] would set alt text. Later it
5242  # came to also set the caption, ordinary text after the image -- which
5243  # makes no sense, because that just repeats the text multiple times in
5244  # screen readers. It *also* came to set the title attribute.
5245  # Now that we have an alt attribute, we should not set the alt text to
5246  # equal the caption: that's worse than useless, it just repeats the
5247  # text. This is the framed/thumbnail case. If there's no caption, we
5248  # use the unnamed parameter for alt text as well, just for the time be-
5249  # ing, if the unnamed param is set and the alt param is not.
5250  # For the future, we need to figure out if we want to tweak this more,
5251  # e.g., introducing a title= parameter for the title; ignoring the un-
5252  # named parameter entirely for images without a caption; adding an ex-
5253  # plicit caption= parameter and preserving the old magic unnamed para-
5254  # meter for BC; ...
5255  if ( $imageIsFramed ) { # Framed image
5256  if ( $caption === '' && !isset( $params['frame']['alt'] ) ) {
5257  # No caption or alt text, add the filename as the alt text so
5258  # that screen readers at least get some description of the image
5259  $params['frame']['alt'] = $title->getText();
5260  }
5261  # Do not set $params['frame']['title'] because tooltips don't make sense
5262  # for framed images
5263  } else { # Inline image
5264  if ( !isset( $params['frame']['alt'] ) ) {
5265  # No alt text, use the "caption" for the alt text
5266  if ( $caption !== '' ) {
5267  $params['frame']['alt'] = $this->stripAltText( $caption, $holders );
5268  } else {
5269  # No caption, fall back to using the filename for the
5270  # alt text
5271  $params['frame']['alt'] = $title->getText();
5272  }
5273  }
5274  # Use the "caption" for the tooltip text
5275  $params['frame']['title'] = $this->stripAltText( $caption, $holders );
5276  }
5277 
5278  Hooks::run( 'ParserMakeImageParams', [ $title, $file, &$params, $this ] );
5279 
5280  # Linker does the rest
5281  $time = isset( $options['time'] ) ? $options['time'] : false;
5282  $ret = Linker::makeImageLink( $this, $title, $file, $params['frame'], $params['handler'],
5283  $time, $descQuery, $this->mOptions->getThumbSize() );
5284 
5285  # Give the handler a chance to modify the parser object
5286  if ( $handler ) {
5287  $handler->parserTransformHook( $this, $file );
5288  }
5289 
5290  return $ret;
5291  }
5292 
5298  protected function stripAltText( $caption, $holders ) {
5299  # Strip bad stuff out of the title (tooltip). We can't just use
5300  # replaceLinkHoldersText() here, because if this function is called
5301  # from replaceInternalLinks2(), mLinkHolders won't be up-to-date.
5302  if ( $holders ) {
5303  $tooltip = $holders->replaceText( $caption );
5304  } else {
5305  $tooltip = $this->replaceLinkHoldersText( $caption );
5306  }
5307 
5308  # make sure there are no placeholders in thumbnail attributes
5309  # that are later expanded to html- so expand them now and
5310  # remove the tags
5311  $tooltip = $this->mStripState->unstripBoth( $tooltip );
5312  $tooltip = Sanitizer::stripAllTags( $tooltip );
5313 
5314  return $tooltip;
5315  }
5316 
5322  public function disableCache() {
5323  wfDebug( "Parser output marked as uncacheable.\n" );
5324  if ( !$this->mOutput ) {
5325  throw new MWException( __METHOD__ .
5326  " can only be called when actually parsing something" );
5327  }
5328  $this->mOutput->updateCacheExpiry( 0 ); // new style, for consistency
5329  }
5330 
5339  public function attributeStripCallback( &$text, $frame = false ) {
5340  $text = $this->replaceVariables( $text, $frame );
5341  $text = $this->mStripState->unstripBoth( $text );
5342  return $text;
5343  }
5344 
5350  public function getTags() {
5351  return array_merge(
5352  array_keys( $this->mTransparentTagHooks ),
5353  array_keys( $this->mTagHooks ),
5354  array_keys( $this->mFunctionTagHooks )
5355  );
5356  }
5357 
5368  public function replaceTransparentTags( $text ) {
5369  $matches = [];
5370  $elements = array_keys( $this->mTransparentTagHooks );
5371  $text = self::extractTagsAndParams( $elements, $text, $matches );
5372  $replacements = [];
5373 
5374  foreach ( $matches as $marker => $data ) {
5375  list( $element, $content, $params, $tag ) = $data;
5376  $tagName = strtolower( $element );
5377  if ( isset( $this->mTransparentTagHooks[$tagName] ) ) {
5378  $output = call_user_func_array(
5379  $this->mTransparentTagHooks[$tagName],
5380  [ $content, $params, $this ]
5381  );
5382  } else {
5383  $output = $tag;
5384  }
5385  $replacements[$marker] = $output;
5386  }
5387  return strtr( $text, $replacements );
5388  }
5389 
5419  private function extractSections( $text, $sectionId, $mode, $newText = '' ) {
5420  global $wgTitle; # not generally used but removes an ugly failure mode
5421 
5422  $magicScopeVariable = $this->lock();
5423  $this->startParse( $wgTitle, new ParserOptions, self::OT_PLAIN, true );
5424  $outText = '';
5425  $frame = $this->getPreprocessor()->newFrame();
5426 
5427  # Process section extraction flags
5428  $flags = 0;
5429  $sectionParts = explode( '-', $sectionId );
5430  $sectionIndex = array_pop( $sectionParts );
5431  foreach ( $sectionParts as $part ) {
5432  if ( $part === 'T' ) {
5433  $flags |= self::PTD_FOR_INCLUSION;
5434  }
5435  }
5436 
5437  # Check for empty input
5438  if ( strval( $text ) === '' ) {
5439  # Only sections 0 and T-0 exist in an empty document
5440  if ( $sectionIndex == 0 ) {
5441  if ( $mode === 'get' ) {
5442  return '';
5443  } else {
5444  return $newText;
5445  }
5446  } else {
5447  if ( $mode === 'get' ) {
5448  return $newText;
5449  } else {
5450  return $text;
5451  }
5452  }
5453  }
5454 
5455  # Preprocess the text
5456  $root = $this->preprocessToDom( $text, $flags );
5457 
5458  # <h> nodes indicate section breaks
5459  # They can only occur at the top level, so we can find them by iterating the root's children
5460  $node = $root->getFirstChild();
5461 
5462  # Find the target section
5463  if ( $sectionIndex == 0 ) {
5464  # Section zero doesn't nest, level=big
5465  $targetLevel = 1000;
5466  } else {
5467  while ( $node ) {
5468  if ( $node->getName() === 'h' ) {
5469  $bits = $node->splitHeading();
5470  if ( $bits['i'] == $sectionIndex ) {
5471  $targetLevel = $bits['level'];
5472  break;
5473  }
5474  }
5475  if ( $mode === 'replace' ) {
5476  $outText .= $frame->expand( $node, PPFrame::RECOVER_ORIG );
5477  }
5478  $node = $node->getNextSibling();
5479  }
5480  }
5481 
5482  if ( !$node ) {
5483  # Not found
5484  if ( $mode === 'get' ) {
5485  return $newText;
5486  } else {
5487  return $text;
5488  }
5489  }
5490 
5491  # Find the end of the section, including nested sections
5492  do {
5493  if ( $node->getName() === 'h' ) {
5494  $bits = $node->splitHeading();
5495  $curLevel = $bits['level'];
5496  if ( $bits['i'] != $sectionIndex && $curLevel <= $targetLevel ) {
5497  break;
5498  }
5499  }
5500  if ( $mode === 'get' ) {
5501  $outText .= $frame->expand( $node, PPFrame::RECOVER_ORIG );
5502  }
5503  $node = $node->getNextSibling();
5504  } while ( $node );
5505 
5506  # Write out the remainder (in replace mode only)
5507  if ( $mode === 'replace' ) {
5508  # Output the replacement text
5509  # Add two newlines on -- trailing whitespace in $newText is conventionally
5510  # stripped by the editor, so we need both newlines to restore the paragraph gap
5511  # Only add trailing whitespace if there is newText
5512  if ( $newText != "" ) {
5513  $outText .= $newText . "\n\n";
5514  }
5515 
5516  while ( $node ) {
5517  $outText .= $frame->expand( $node, PPFrame::RECOVER_ORIG );
5518  $node = $node->getNextSibling();
5519  }
5520  }
5521 
5522  if ( is_string( $outText ) ) {
5523  # Re-insert stripped tags
5524  $outText = rtrim( $this->mStripState->unstripBoth( $outText ) );
5525  }
5526 
5527  return $outText;
5528  }
5529 
5544  public function getSection( $text, $sectionId, $defaultText = '' ) {
5545  return $this->extractSections( $text, $sectionId, 'get', $defaultText );
5546  }
5547 
5560  public function replaceSection( $oldText, $sectionId, $newText ) {
5561  return $this->extractSections( $oldText, $sectionId, 'replace', $newText );
5562  }
5563 
5569  public function getRevisionId() {
5570  return $this->mRevisionId;
5571  }
5572 
5579  public function getRevisionObject() {
5580  if ( !is_null( $this->mRevisionObject ) ) {
5581  return $this->mRevisionObject;
5582  }
5583  if ( is_null( $this->mRevisionId ) ) {
5584  return null;
5585  }
5586 
5587  $rev = call_user_func(
5588  $this->mOptions->getCurrentRevisionCallback(), $this->getTitle(), $this
5589  );
5590 
5591  # If the parse is for a new revision, then the callback should have
5592  # already been set to force the object and should match mRevisionId.
5593  # If not, try to fetch by mRevisionId for sanity.
5594  if ( $rev && $rev->getId() != $this->mRevisionId ) {
5595  $rev = Revision::newFromId( $this->mRevisionId );
5596  }
5597 
5598  $this->mRevisionObject = $rev;
5599 
5600  return $this->mRevisionObject;
5601  }
5602 
5608  public function getRevisionTimestamp() {
5609  if ( is_null( $this->mRevisionTimestamp ) ) {
5611 
5612  $revObject = $this->getRevisionObject();
5613  $timestamp = $revObject ? $revObject->getTimestamp() : wfTimestampNow();
5614 
5615  # The cryptic '' timezone parameter tells to use the site-default
5616  # timezone offset instead of the user settings.
5617  # Since this value will be saved into the parser cache, served
5618  # to other users, and potentially even used inside links and such,
5619  # it needs to be consistent for all visitors.
5620  $this->mRevisionTimestamp = $wgContLang->userAdjust( $timestamp, '' );
5621 
5622  }
5623  return $this->mRevisionTimestamp;
5624  }
5625 
5631  public function getRevisionUser() {
5632  if ( is_null( $this->mRevisionUser ) ) {
5633  $revObject = $this->getRevisionObject();
5634 
5635  # if this template is subst: the revision id will be blank,
5636  # so just use the current user's name
5637  if ( $revObject ) {
5638  $this->mRevisionUser = $revObject->getUserText();
5639  } elseif ( $this->ot['wiki'] || $this->mOptions->getIsPreview() ) {
5640  $this->mRevisionUser = $this->getUser()->getName();
5641  }
5642  }
5643  return $this->mRevisionUser;
5644  }
5645 
5651  public function getRevisionSize() {
5652  if ( is_null( $this->mRevisionSize ) ) {
5653  $revObject = $this->getRevisionObject();
5654 
5655  # if this variable is subst: the revision id will be blank,
5656  # so just use the parser input size, because the own substituation
5657  # will change the size.
5658  if ( $revObject ) {
5659  $this->mRevisionSize = $revObject->getSize();
5660  } else {
5661  $this->mRevisionSize = $this->mInputSize;
5662  }
5663  }
5664  return $this->mRevisionSize;
5665  }
5666 
5672  public function setDefaultSort( $sort ) {
5673  $this->mDefaultSort = $sort;
5674  $this->mOutput->setProperty( 'defaultsort', $sort );
5675  }
5676 
5687  public function getDefaultSort() {
5688  if ( $this->mDefaultSort !== false ) {
5689  return $this->mDefaultSort;
5690  } else {
5691  return '';
5692  }
5693  }
5694 
5701  public function getCustomDefaultSort() {
5702  return $this->mDefaultSort;
5703  }
5704 
5714  public function guessSectionNameFromWikiText( $text ) {
5715  # Strip out wikitext links(they break the anchor)
5716  $text = $this->stripSectionName( $text );
5718  return '#' . Sanitizer::escapeId( $text, 'noninitial' );
5719  }
5720 
5729  public function guessLegacySectionNameFromWikiText( $text ) {
5730  # Strip out wikitext links(they break the anchor)
5731  $text = $this->stripSectionName( $text );
5733  return '#' . Sanitizer::escapeId( $text, [ 'noninitial', 'legacy' ] );
5734  }
5735 
5750  public function stripSectionName( $text ) {
5751  # Strip internal link markup
5752  $text = preg_replace( '/\[\[:?([^[|]+)\|([^[]+)\]\]/', '$2', $text );
5753  $text = preg_replace( '/\[\[:?([^[]+)\|?\]\]/', '$1', $text );
5754 
5755  # Strip external link markup
5756  # @todo FIXME: Not tolerant to blank link text
5757  # I.E. [https://www.mediawiki.org] will render as [1] or something depending
5758  # on how many empty links there are on the page - need to figure that out.
5759  $text = preg_replace( '/\[(?i:' . $this->mUrlProtocols . ')([^ ]+?) ([^[]+)\]/', '$2', $text );
5760 
5761  # Parse wikitext quotes (italics & bold)
5762  $text = $this->doQuotes( $text );
5763 
5764  # Strip HTML tags
5765  $text = StringUtils::delimiterReplace( '<', '>', '', $text );
5766  return $text;
5767  }
5768 
5779  public function testSrvus( $text, Title $title, ParserOptions $options,
5780  $outputType = self::OT_HTML
5781  ) {
5782  $magicScopeVariable = $this->lock();
5783  $this->startParse( $title, $options, $outputType, true );
5784 
5785  $text = $this->replaceVariables( $text );
5786  $text = $this->mStripState->unstripBoth( $text );
5787  $text = Sanitizer::removeHTMLtags( $text );
5788  return $text;
5789  }
5790 
5797  public function testPst( $text, Title $title, ParserOptions $options ) {
5798  return $this->preSaveTransform( $text, $title, $options->getUser(), $options );
5799  }
5800 
5807  public function testPreprocess( $text, Title $title, ParserOptions $options ) {
5808  return $this->testSrvus( $text, $title, $options, self::OT_PREPROCESS );
5809  }
5810 
5827  public function markerSkipCallback( $s, $callback ) {
5828  $i = 0;
5829  $out = '';
5830  while ( $i < strlen( $s ) ) {
5831  $markerStart = strpos( $s, self::MARKER_PREFIX, $i );
5832  if ( $markerStart === false ) {
5833  $out .= call_user_func( $callback, substr( $s, $i ) );
5834  break;
5835  } else {
5836  $out .= call_user_func( $callback, substr( $s, $i, $markerStart - $i ) );
5837  $markerEnd = strpos( $s, self::MARKER_SUFFIX, $markerStart );
5838  if ( $markerEnd === false ) {
5839  $out .= substr( $s, $markerStart );
5840  break;
5841  } else {
5842  $markerEnd += strlen( self::MARKER_SUFFIX );
5843  $out .= substr( $s, $markerStart, $markerEnd - $markerStart );
5844  $i = $markerEnd;
5845  }
5846  }
5847  }
5848  return $out;
5849  }
5850 
5857  public function killMarkers( $text ) {
5858  return $this->mStripState->killMarkers( $text );
5859  }
5860 
5877  public function serializeHalfParsedText( $text ) {
5878  $data = [
5879  'text' => $text,
5880  'version' => self::HALF_PARSED_VERSION,
5881  'stripState' => $this->mStripState->getSubState( $text ),
5882  'linkHolders' => $this->mLinkHolders->getSubArray( $text )
5883  ];
5884  return $data;
5885  }
5886 
5902  public function unserializeHalfParsedText( $data ) {
5903  if ( !isset( $data['version'] ) || $data['version'] != self::HALF_PARSED_VERSION ) {
5904  throw new MWException( __METHOD__ . ': invalid version' );
5905  }
5906 
5907  # First, extract the strip state.
5908  $texts = [ $data['text'] ];
5909  $texts = $this->mStripState->merge( $data['stripState'], $texts );
5910 
5911  # Now renumber links
5912  $texts = $this->mLinkHolders->mergeForeign( $data['linkHolders'], $texts );
5913 
5914  # Should be good to go.
5915  return $texts[0];
5916  }
5917 
5927  public function isValidHalfParsedText( $data ) {
5928  return isset( $data['version'] ) && $data['version'] == self::HALF_PARSED_VERSION;
5929  }
5930 
5939  public function parseWidthParam( $value ) {
5940  $parsedWidthParam = [];
5941  if ( $value === '' ) {
5942  return $parsedWidthParam;
5943  }
5944  $m = [];
5945  # (bug 13500) In both cases (width/height and width only),
5946  # permit trailing "px" for backward compatibility.
5947  if ( preg_match( '/^([0-9]*)x([0-9]*)\s*(?:px)?\s*$/', $value, $m ) ) {
5948  $width = intval( $m[1] );
5949  $height = intval( $m[2] );
5950  $parsedWidthParam['width'] = $width;
5951  $parsedWidthParam['height'] = $height;
5952  } elseif ( preg_match( '/^[0-9]*\s*(?:px)?\s*$/', $value ) ) {
5953  $width = intval( $value );
5954  $parsedWidthParam['width'] = $width;
5955  }
5956  return $parsedWidthParam;
5957  }
5958 
5968  protected function lock() {
5969  if ( $this->mInParse ) {
5970  throw new MWException( "Parser state cleared while parsing. "
5971  . "Did you call Parser::parse recursively?" );
5972  }
5973  $this->mInParse = true;
5974 
5975  $recursiveCheck = new ScopedCallback( function() {
5976  $this->mInParse = false;
5977  } );
5978 
5979  return $recursiveCheck;
5980  }
5981 
5992  public static function stripOuterParagraph( $html ) {
5993  $m = [];
5994  if ( preg_match( '/^<p>(.*)\n?<\/p>\n?$/sU', $html, $m ) ) {
5995  if ( strpos( $m[1], '</p>' ) === false ) {
5996  $html = $m[1];
5997  }
5998  }
5999 
6000  return $html;
6001  }
6002 
6013  public function getFreshParser() {
6014  global $wgParserConf;
6015  if ( $this->mInParse ) {
6016  return new $wgParserConf['class']( $wgParserConf );
6017  } else {
6018  return $this;
6019  }
6020  }
6021 
6028  public function enableOOUI() {
6030  $this->mOutput->setEnableOOUI( true );
6031  }
6032 }
getRevisionObject()
Get the revision object for $this->mRevisionId.
Definition: Parser.php:5579
static newFromName($name, $validate= 'valid')
Static factory method for creation from username.
Definition: User.php:524
setTitle($t)
Set the context title.
Definition: Parser.php:724
$mAutonumber
Definition: Parser.php:176
getLatestRevID($flags=0)
What is the page_latest field for this page?
Definition: Title.php:3260
markerSkipCallback($s, $callback)
Call a callback function on all regions of the given text that are not inside strip markers...
Definition: Parser.php:5827
#define the
table suitable for use with IDatabase::select()
$mPPNodeCount
Definition: Parser.php:190
replaceInternalLinks2(&$s)
Process [[ ]] wikilinks (RIL)
Definition: Parser.php:2068
static getVariableIDs()
Get an array of parser variable IDs.
Definition: MagicWord.php:271
you don t have to do a grep find to see where the $wgReverseTitle variable is used
Definition: hooks.txt:117
getExternalLinkAttribs($url)
Get an associative array of additional HTML attributes appropriate for a particular external link...
Definition: Parser.php:1895
const MARKER_PREFIX
Definition: Parser.php:133
deferred txt A few of the database updates required by various functions here can be deferred until after the result page is displayed to the user For updating the view updating the linked to tables after a etc PHP does not yet have any way to tell the server to actually return and disconnect while still running these but it might have such a feature in the future We handle these by creating a deferred update object and putting those objects on a global then executing the whole list after the page is displayed We don t do anything smart like collating updates to the same table or such because the list is almost always going to have just one item on if that
Definition: deferred.txt:11
isValidHalfParsedText($data)
Returns true if the given array, presumed to be generated by serializeHalfParsedText(), is compatible with the current version of the parser.
Definition: Parser.php:5927
null means default in associative array form
Definition: hooks.txt:1901
null means default in associative array with keys and values unescaped Should be merged with default with a value of false meaning to suppress the attribute in associative array with keys and values unescaped noclasses just before the function returns a value If you return an< a > element with HTML attributes $attribs and contents $html will be returned If you return $ret will be returned and may include noclasses & $html
Definition: hooks.txt:1901
static tocLineEnd()
End a Table Of Contents line.
Definition: Linker.php:1633
deferred txt A few of the database updates required by various functions here can be deferred until after the result page is displayed to the user For updating the view updating the linked to tables after a etc PHP does not yet have any way to tell the server to actually return and disconnect while still running these but it might have such a feature in the future We handle these by creating a deferred update object and putting those objects on a global list
Definition: deferred.txt:11
getSection($text, $sectionId, $defaultText= '')
This function returns the text of a section, specified by a number ($section).
Definition: Parser.php:5544
static decodeTagAttributes($text)
Return an associative array of attribute names and values from a partial tag string.
Definition: Sanitizer.php:1287
$mTplRedirCache
Definition: Parser.php:192
killMarkers($text)
Remove any strip markers found in the given text.
Definition: Parser.php:5857
wfGetDB($db, $groups=[], $wiki=false)
Get a Database object.
static tocList($toc, $lang=false)
Wraps the TOC in a table and provides the hide/collapse javascript.
Definition: Linker.php:1645
LinkRenderer $mLinkRenderer
Definition: Parser.php:256
fetchTemplateAndTitle($title)
Fetch the unparsed text of a template and register a reference to it.
Definition: Parser.php:3488
this hook is for auditing only or null if authentication failed before getting that far or null if we can t even determine that probably a stub it is not rendered in wiki pages or galleries in category pages allow injecting custom HTML after the section Any uses of the hook need to handle escaping see BaseTemplate::getToolbox and BaseTemplate::makeListItem for details on the format of individual items inside of this array or by returning and letting standard HTTP rendering take place modifiable or by returning false and taking over the output $out
Definition: hooks.txt:767
getRevisionUser()
Get the name of the user that edited the last revision.
Definition: Parser.php:5631
setFunctionTagHook($tag, $callback, $flags)
Create a tag function, e.g.
Definition: Parser.php:4826
the array() calling protocol came about after MediaWiki 1.4rc1.
stripSectionName($text)
Strips a text string of wikitext for use in a section anchor.
Definition: Parser.php:5750
const OT_PREPROCESS
Definition: Defines.php:215
either a plain
Definition: hooks.txt:1952
$mDoubleUnderscores
Definition: Parser.php:192
Group all the pieces relevant to the context of a request into one instance.
getPreloadText($text, Title $title, ParserOptions $options, $params=[])
Process the wikitext for the "?preload=" feature.
Definition: Parser.php:672
$context
Definition: load.php:50
validateSig($text)
Check that the user's signature contains no bad XML.
Definition: Parser.php:4540
MapCacheLRU null $currentRevisionCache
Definition: Parser.php:242
getArticleID($flags=0)
Get the article ID for this Title from the link cache, adding it if necessary.
Definition: Title.php:3171
$wgSitename
Name of the site.
renderImageGallery($text, $params)
Renders an image gallery from a text with one line per image.
Definition: Parser.php:4877
recursivePreprocess($text, $frame=false)
Recursive parser entry point that can be called from an extension tag hook.
Definition: Parser.php:653
replaceExternalLinks($text)
Replace external links (REL)
Definition: Parser.php:1795
processing should stop and the error should be shown to the user * false
Definition: hooks.txt:189
static isNonincludable($index)
It is not possible to use pages from this namespace as template?
nextLinkID()
Definition: Parser.php:813
const SPACE_NOT_NL
Definition: Parser.php:102
null means default in associative array with keys and values unescaped Should be merged with default with a value of false meaning to suppress the attribute in associative array with keys and values unescaped noclasses & $ret
Definition: hooks.txt:1901
static replaceUnusualEscapes($url)
Replace unusual escape codes in a URL with their equivalent characters.
Definition: Parser.php:1923
getImageParams($handler)
Definition: Parser.php:5035
Apache License January AND DISTRIBUTION Definitions License shall mean the terms and conditions for use
doHeadings($text)
Parse headers and return html.
Definition: Parser.php:1574
static getTitleFor($name, $subpage=false, $fragment= '')
Get a localised Title object for a specified special page name If you don't need a full Title object...
Definition: SpecialPage.php:82
const OT_PLAIN
Definition: Parser.php:113
getTags()
Accessor.
Definition: Parser.php:5350
static isWellFormedXmlFragment($text)
Check if a string is a well-formed XML fragment.
Definition: Xml.php:735
const OT_WIKI
Definition: Parser.php:110
div flags Integer display flags(NO_ACTION_LINK, NO_EXTRA_USER_LINKS) 'LogException'returning false will NOT prevent logging $e
Definition: hooks.txt:2067
fetchFileAndTitle($title, $options=[])
Fetch a file and its title and register a reference to it.
Definition: Parser.php:3630
User $mUser
Definition: Parser.php:199
initialiseVariables()
initialise the magic variables (like CURRENTMONTHNAME) and substitution modifiers ...
Definition: Parser.php:2806
static isEnabled()
Definition: MWTidy.php:79
Set options of the Parser.
static tidy($text)
Interface with html tidy.
Definition: MWTidy.php:46
getFunctionHooks()
Get all registered function hook identifiers.
Definition: Parser.php:4812
static fixTagAttributes($text, $element, $sorted=false)
Take a tag soup fragment listing an HTML element's attributes and normalize it to well-formed XML...
Definition: Sanitizer.php:1071
globals txt Globals are evil The original MediaWiki code relied on globals for processing context far too often MediaWiki development since then has been a story of slowly moving context out of global variables and into objects Storing processing context in object member variables allows those objects to be reused in a much more flexible way Consider the elegance of
database rows
Definition: globals.txt:10
wfHostname()
Fetch server name for use in error reporting etc.
getFunctionLang()
Get a language object for use in parser functions such as {{FORMATNUM:}}.
Definition: Parser.php:828
argSubstitution($piece, $frame)
Triple brace replacement – used for template arguments.
Definition: Parser.php:3733
testSrvus($text, Title $title, ParserOptions $options, $outputType=self::OT_HTML)
strip/replaceVariables/unstrip for preprocessor regression testing
Definition: Parser.php:5779
uniqPrefix()
Accessor for mUniqPrefix.
Definition: Parser.php:714
const TOC_START
Definition: Parser.php:136
Title($x=null)
Accessor/mutator for the Title object.
Definition: Parser.php:752
SectionProfiler $mProfiler
Definition: Parser.php:251
$wgEnableScaryTranscluding
Enable interwiki transcluding.
$sort
fetchFileNoRegister($title, $options=[])
Helper function for fetchFileAndTitle.
Definition: Parser.php:3655
null for the local wiki Added in
Definition: hooks.txt:1520
There are three types of nodes:
$mHeadings
Definition: Parser.php:192
$value
clearTagHooks()
Remove all tag hooks.
Definition: Parser.php:4725
static makeSelfLinkObj($nt, $html= '', $query= '', $trail= '', $prefix= '')
Make appropriate markup for a link to the current article.
Definition: Linker.php:277
const NS_SPECIAL
Definition: Defines.php:45
clearState()
Clear Parser state.
Definition: Parser.php:340
this hook is for auditing only RecentChangesLinked and Watchlist RecentChangesLinked and Watchlist e g Watchlist removed from all revisions and log entries to which it was applied This gives extensions a chance to take it off their books as the deletion has already been partly carried out by this point or something similar the user will be unable to create the tag set and then return false from the hook function Ensure you consume the ChangeTagAfterDelete hook to carry out custom deletion actions as context called by AbstractContent::getParserOutput May be used to override the normal model specific rendering of page content as context $revId
Definition: hooks.txt:1011
__construct($conf=[])
Definition: Parser.php:261
const EXT_LINK_ADDR
Definition: Parser.php:94
$mFirstCall
Definition: Parser.php:151
interwikiTransclude($title, $action)
Transclude an interwiki link.
Definition: Parser.php:3674
pstPass2($text, $user)
Pre-save transform helper function.
Definition: Parser.php:4415
guessLegacySectionNameFromWikiText($text)
Same as guessSectionNameFromWikiText(), but produces legacy anchors instead.
Definition: Parser.php:5729
injection txt This is an overview of how MediaWiki makes use of dependency injection The design described here grew from the discussion of RFC T384 The term dependency this means that anything an object needs to operate should be injected from the the object itself should only know narrow no concrete implementation of the logic it relies on The requirement to inject everything typically results in an architecture that based on two main types of and essentially stateless service objects that use other service objects to operate on the value objects As of the beginning MediaWiki is only starting to use the DI approach Much of the code still relies on global state or direct resulting in a highly cyclical dependency MediaWikiServices
Definition: injection.txt:23
wfUrlProtocolsWithoutProtRel()
Like wfUrlProtocols(), but excludes '//' from the protocol list.
Options($x=null)
Accessor/mutator for the ParserOptions object.
Definition: Parser.php:806
it s the revision text itself In either if gzip is the revision text is gzipped $flags
Definition: hooks.txt:2668
serializeHalfParsedText($text)
Save the parser state required to convert the given half-parsed text to HTML.
Definition: Parser.php:5877
replaceLinkHolders(&$text, $options=0)
Replace "<!--LINK-->" link placeholders with actual links, in the buffer Placeholders created in Link...
Definition: Parser.php:4849
static statelessFetchRevision(Title $title, $parser=false)
Wrapper around Revision::newFromTitle to allow passing additional parameters without passing them on ...
Definition: Parser.php:3471
static activeUsers()
Definition: SiteStats.php:165
$mLinkID
Definition: Parser.php:189
doQuotes($text)
Helper function for doAllQuotes()
Definition: Parser.php:1607
preprocessToDom($text, $flags=0)
Preprocess some wikitext and return the document tree.
Definition: Parser.php:2836
limitationWarn($limitationType, $current= '', $max= '')
Warn the user when a parser limitation is reached Will warn at most once the user per limitation type...
Definition: Parser.php:2958
static cleanUrl($url)
Definition: Sanitizer.php:1856
wfUrlencode($s)
We want some things to be included as literal characters in our title URLs for prettiness, which urlencode encodes by default.
static newFromText($text, $defaultNamespace=NS_MAIN)
Create a new Title from text, such as what one would find in a link.
Definition: Title.php:262
$mGeneratedPPNodeCount
Definition: Parser.php:190
Represents a title within MediaWiki.
Definition: Title.php:36
static getRandomString()
Get a random string.
Definition: Parser.php:693
$mRevisionId
Definition: Parser.php:216
static stripAllTags($text)
Take a fragment of (potentially invalid) HTML and return a version with any tags removed, encoded as plain text.
Definition: Sanitizer.php:1823
when a variable name is used in a it is silently declared as a new local masking the global
Definition: design.txt:93
doBlockLevels($text, $linestart)
Make lists from lines starting with ':', '*', '#', etc.
Definition: Parser.php:2439
$wgArticlePath
Definition: img_auth.php:45
OutputType($x=null)
Accessor/mutator for the output type.
Definition: Parser.php:778
getLinkRenderer()
Get a LinkRenderer instance to make links with.
Definition: Parser.php:895
const NS_TEMPLATE
Definition: Defines.php:66
static newFromTitle(LinkTarget $linkTarget, $id=0, $flags=0)
Load either the current, or a specified, revision that's attached to a given link target...
Definition: Revision.php:127
getVariableValue($index, $frame=false)
Return value of a magic variable (like PAGENAME)
Definition: Parser.php:2454
recursiveTagParse($text, $frame=false)
Half-parse wikitext to half-parsed HTML.
Definition: Parser.php:588
const NO_ARGS
magic word & $parser
Definition: hooks.txt:2452
MagicWordArray $mVariables
Definition: Parser.php:158
this hook is for auditing only RecentChangesLinked and Watchlist RecentChangesLinked and Watchlist e g Watchlist removed from all revisions and log entries to which it was applied This gives extensions a chance to take it off their books as the deletion has already been partly carried out by this point or something similar the user will be unable to create the tag set and then return false from the hook function Ensure you consume the ChangeTagAfterDelete hook to carry out custom deletion actions as context called by AbstractContent::getParserOutput May be used to override the normal model specific rendering of page content as context as context the output can only depend on parameters provided to this hook not on global state indicating whether full HTML should be generated If generation of HTML may be but other information should still be present in the ParserOutput object to manipulate or replace but no entry for that model exists in $wgContentHandlers if desired whether it is OK to use $contentModel on $title Handler functions that modify $ok should generally return false to prevent further hooks from further modifying $ok inclusive false for true for descending in case the handler function wants to provide a converted Content object Note that $result getContentModel() must return $toModel. 'CustomEditor'$rcid is used in generating this variable which contains information about the new revision
Definition: hooks.txt:1121
static validateTagAttributes($attribs, $element)
Take an array of attribute names and values and normalize or discard illegal values for the given ele...
Definition: Sanitizer.php:748
const SFH_NO_HASH
Definition: Parser.php:84
const DB_MASTER
Definition: defines.php:23
globals will be eliminated from MediaWiki replaced by an application object which would be passed to constructors Whether that would be an convenient solution remains to be but certainly PHP makes such object oriented programming models easier than they were in previous versions For the time being MediaWiki programmers will have to work in an environment with some global context At the time of globals were initialised on startup by MediaWiki of these were configuration which are documented in DefaultSettings php There is no comprehensive documentation for the remaining however some of the most important ones are listed below They are typically initialised either in index php or in Setup php For a description of the see design txt $wgTitle Title object created from the request URL $wgOut OutputPage object for HTTP response $wgUser User object for the user associated with the current request $wgLang Language object selected by user preferences $wgContLang Language object associated with the wiki being viewed $wgParser Parser object Parser extensions register their hooks here $wgRequest WebRequest object
Definition: globals.txt:25
wfRandomString($length=32)
Get a random string containing a number of pseudo-random hex characters.
$mForceTocPosition
Definition: Parser.php:194
preprocess($text, Title $title=null, ParserOptions $options, $revid=null, $frame=false)
Expand templates and variables in the text, producing valid, static wikitext.
Definition: Parser.php:629
static getCacheTTL($id)
Allow external reads of TTL array.
Definition: MagicWord.php:294
getRevisionId()
Get the ID of the revision we are parsing.
Definition: Parser.php:5569
const OT_PREPROCESS
Definition: Parser.php:111
maybeDoSubpageLink($target, &$text)
Handle link to subpage if necessary.
Definition: Parser.php:2427
$mFunctionSynonyms
Definition: Parser.php:143
If you want to remove the page from your watchlist later
replaceLinkHoldersText($text)
Replace "<!--LINK-->" link placeholders with plain text of links (not HTML-formatted).
Definition: Parser.php:4860
setLinkID($id)
Definition: Parser.php:820
$mOutputType
Definition: Parser.php:213
wfDebug($text, $dest= 'all', array $context=[])
Sends a line to the debug log if enabled or, optionally, to a comment in output.
$mDefaultStripList
Definition: Parser.php:146
static createAssocArgs($args)
Clean up argument array - refactored in 1.9 so parserfunctions can use it, too.
Definition: Parser.php:2910
$mExtLinkBracketedRegex
Definition: Parser.php:165
The index of the header message $result[1]=The index of the body text message $result[2 through n]=Parameters passed to body text message.Please note the header message cannot receive/use parameters. 'ImportHandleLogItemXMLTag':When parsing a XML tag in a log item.Return false to stop further processing of the tag $reader:XMLReader object $logInfo:Array of information 'ImportHandlePageXMLTag':When parsing a XML tag in a page.Return false to stop further processing of the tag $reader:XMLReader object &$pageInfo:Array of information 'ImportHandleRevisionXMLTag':When parsing a XML tag in a page revision.Return false to stop further processing of the tag $reader:XMLReader object $pageInfo:Array of page information $revisionInfo:Array of revision information 'ImportHandleToplevelXMLTag':When parsing a top level XML tag.Return false to stop further processing of the tag $reader:XMLReader object 'ImportHandleUploadXMLTag':When parsing a XML tag in a file upload.Return false to stop further processing of the tag $reader:XMLReader object $revisionInfo:Array of information 'ImportLogInterwikiLink':Hook to change the interwiki link used in log entries and edit summaries for transwiki imports.&$fullInterwikiPrefix:Interwiki prefix, may contain colons.&$pageTitle:String that contains page title. 'ImportSources':Called when reading from the $wgImportSources configuration variable.Can be used to lazy-load the import sources list.&$importSources:The value of $wgImportSources.Modify as necessary.See the comment in DefaultSettings.php for the detail of how to structure this array. 'InfoAction':When building information to display on the action=info page.$context:IContextSource object &$pageInfo:Array of information 'InitializeArticleMaybeRedirect':MediaWiki check to see if title is a redirect.&$title:Title object for the current page &$request:WebRequest &$ignoreRedirect:boolean to skip redirect check &$target:Title/string of redirect target &$article:Article object 'InternalParseBeforeLinks':during Parser's internalParse method before links but after nowiki/noinclude/includeonly/onlyinclude and other processings.&$parser:Parser object &$text:string containing partially parsed text &$stripState:Parser's internal StripState object 'InternalParseBeforeSanitize':during Parser's internalParse method just before the parser removes unwanted/dangerous HTML tags and after nowiki/noinclude/includeonly/onlyinclude and other processings.Ideal for syntax-extensions after template/parser function execution which respect nowiki and HTML-comments.&$parser:Parser object &$text:string containing partially parsed text &$stripState:Parser's internal StripState object 'InterwikiLoadPrefix':When resolving if a given prefix is an interwiki or not.Return true without providing an interwiki to continue interwiki search.$prefix:interwiki prefix we are looking for.&$iwData:output array describing the interwiki with keys iw_url, iw_local, iw_trans and optionally iw_api and iw_wikiid. 'InvalidateEmailComplete':Called after a user's email has been invalidated successfully.$user:user(object) whose email is being invalidated 'IRCLineURL':When constructing the URL to use in an IRC notification.Callee may modify $url and $query, URL will be constructed as $url.$query &$url:URL to index.php &$query:Query string $rc:RecentChange object that triggered url generation 'IsFileCacheable':Override the result of Article::isFileCacheable()(if true) &$article:article(object) being checked 'IsTrustedProxy':Override the result of IP::isTrustedProxy() &$ip:IP being check &$result:Change this value to override the result of IP::isTrustedProxy() 'IsUploadAllowedFromUrl':Override the result of UploadFromUrl::isAllowedUrl() $url:URL used to upload from &$allowed:Boolean indicating if uploading is allowed for given URL 'isValidEmailAddr':Override the result of Sanitizer::validateEmail(), for instance to return false if the domain name doesn't match your organization.$addr:The e-mail address entered by the user &$result:Set this and return false to override the internal checks 'isValidPassword':Override the result of User::isValidPassword() $password:The password entered by the user &$result:Set this and return false to override the internal checks $user:User the password is being validated for 'Language::getMessagesFileName':$code:The language code or the language we're looking for a messages file for &$file:The messages file path, you can override this to change the location. 'LanguageGetMagic':DEPRECATED!Use $magicWords in a file listed in $wgExtensionMessagesFiles instead.Use this to define synonyms of magic words depending of the language &$magicExtensions:associative array of magic words synonyms $lang:language code(string) 'LanguageGetNamespaces':Provide custom ordering for namespaces or remove namespaces.Do not use this hook to add namespaces.Use CanonicalNamespaces for that.&$namespaces:Array of namespaces indexed by their numbers 'LanguageGetSpecialPageAliases':DEPRECATED!Use $specialPageAliases in a file listed in $wgExtensionMessagesFiles instead.Use to define aliases of special pages names depending of the language &$specialPageAliases:associative array of magic words synonyms $lang:language code(string) 'LanguageGetTranslatedLanguageNames':Provide translated language names.&$names:array of language code=> language name $code:language of the preferred translations 'LanguageLinks':Manipulate a page's language links.This is called in various places to allow extensions to define the effective language links for a page.$title:The page's Title.&$links:Associative array mapping language codes to prefixed links of the form"language:title".&$linkFlags:Associative array mapping prefixed links to arrays of flags.Currently unused, but planned to provide support for marking individual language links in the UI, e.g.for featured articles. 'LanguageSelector':Hook to change the language selector available on a page.$out:The output page.$cssClassName:CSS class name of the language selector. 'LinkBegin':DEPRECATED!Use HtmlPageLinkRendererBegin instead.Used when generating internal and interwiki links in Linker::link(), before processing starts.Return false to skip default processing and return $ret.See documentation for Linker::link() for details on the expected meanings of parameters.$skin:the Skin object $target:the Title that the link is pointing to &$html:the contents that the< a > tag should have(raw HTML) $result
Definition: hooks.txt:1899
const TS_UNIX
Unix time - the number of seconds since 1970-01-01 00:00:00 UTC.
Definition: defines.php:6
if($line===false) $args
Definition: cdb.php:64
static getLocalInstance($ts=false)
Get a timestamp instance in the server local timezone ($wgLocaltimezone)
$wgMaxSigChars
Maximum number of Unicode characters in signature.
static getDoubleUnderscoreArray()
Get a MagicWordArray of double-underscore entities.
Definition: MagicWord.php:307
static splitTrail($trail)
Split a link trail, return the "inside" portion and the remainder of the trail as a two-element array...
Definition: Linker.php:1720
getTemplateDom($title)
Get the semi-parsed DOM representation of a template with a given title, and its redirect destination...
Definition: Parser.php:3405
usually copyright or history_copyright This message must be in HTML not wikitext & $link
Definition: hooks.txt:2854
The User object encapsulates all of the user-specific settings (user_id, name, rights, email address, options, last login time).
Definition: User.php:47
static decodeCharReferences($text)
Decode any character references, numeric or named entities, in the text and return a UTF-8 string...
Definition: Sanitizer.php:1500
cleanSig($text, $parsing=false)
Clean up signature text.
Definition: Parser.php:4554
wfTimestamp($outputtype=TS_UNIX, $ts=0)
Get a timestamp string in one of various formats.
$wgNoFollowNsExceptions
Namespaces in which $wgNoFollowLinks doesn't apply.
static factory($mode=false, IContextSource $context=null)
Get a new image gallery.
$wgLanguageCode
Site language code.
Custom PHP profiler for parser/DB type section names that xhprof/xdebug can't handle.
static getPage($name)
Find the object with a given name and return it (or NULL)
static edits()
Definition: SiteStats.php:133
$wgExtraInterlanguageLinkPrefixes
List of additional interwiki prefixes that should be treated as interlanguage links (i...
startExternalParse(Title $title=null, ParserOptions $options, $outputType, $clearState=true)
Set up some variables which are usually set up in parse() so that an external function can call some ...
Definition: Parser.php:4605
wfDebugLog($logGroup, $text, $dest= 'all', array $context=[])
Send a line to a supplementary debug log file, if configured, or main debug log if not...
const NO_TEMPLATES
addTrackingCategory($msg)
Definition: Parser.php:3952
replaceInternalLinks($s)
Process [[ ]] wikilinks.
Definition: Parser.php:2055
$mVarCache
Definition: Parser.php:147
$wgStylePath
The URL path of the skins directory.
disableCache()
Set a flag in the output object indicating that the content is dynamic and shouldn't be cached...
Definition: Parser.php:5322
$mRevisionObject
Definition: Parser.php:215
static normalizeSectionNameWhitespace($section)
Normalizes whitespace in a section name, such as might be returned by Parser::stripSectionName(), for use in the id's that are used for section links.
Definition: Sanitizer.php:1381
internalParse($text, $isMain=true, $frame=false)
Helper function for parse() that transforms wiki markup into half-parsed HTML.
Definition: Parser.php:1228
Title $mTitle
Definition: Parser.php:212
__destruct()
Reduce memory usage to reduce the impact of circular references.
Definition: Parser.php:287
wfEscapeWikiText($text)
Escapes the given text so that it may be output using addWikiText() without any linking, formatting, etc.
bool $mInParse
Recursive call protection.
Definition: Parser.php:248
Some quick notes on the file repository architecture Functionality is
Definition: README:3
getRevisionTimestamp()
Get the timestamp associated with the current revision, adjusted for the default server-local timesta...
Definition: Parser.php:5608
Class that generates HTML links for pages.
static stripOuterParagraph($html)
Strip outer.
Definition: Parser.php:5992
static register($parser)
$mRevIdForTs
Definition: Parser.php:220
static singleton()
Get an instance of this class.
Definition: LinkCache.php:64
design txt This is a brief overview of the new design More thorough and up to date information is available on the documentation wiki at etc Handles the details of getting and saving to the user table of the and dealing with sessions and cookies OutputPage Encapsulates the entire HTML page that will be sent in response to any server request It is used by calling its functions to add in any and then calling but I prefer the flexibility This should also do the output encoding The system allocates a global one in $wgOut Title Represents the title of an and does all the work of translating among various forms such as plain database key
Definition: design.txt:25
static normalizeSubpageLink($contextTitle, $target, &$text)
Definition: Linker.php:1439
parseWidthParam($value)
Parsed a width param of imagelink like 300px or 200x300px.
Definition: Parser.php:5939
$mStripList
Definition: Parser.php:145
$mFunctionTagHooks
Definition: Parser.php:144
fetchScaryTemplateMaybeFromCache($url)
Definition: Parser.php:3693
const OT_PLAIN
Definition: Defines.php:217
$wgNoFollowLinks
If true, external URL links in wiki text will be given the rel="nofollow" attribute as a hint to sear...
fetchCurrentRevisionOfTitle($title)
Fetch the current revision of a given title.
Definition: Parser.php:3448
$mRevisionTimestamp
Definition: Parser.php:217
$mImageParams
Definition: Parser.php:148
stripAltText($caption, $holders)
Definition: Parser.php:5298
doAllQuotes($text)
Replace single quotes with HTML markup.
Definition: Parser.php:1590
either a unescaped string or a HtmlArmor object after in associative array form externallinks including delete and has completed for all link tables whether this was an auto creation default is conds Array Extra conditions for the No matching items in log is displayed if loglist is empty msgKey Array If you want a nice box with a set this to the key of the message First element is the message additional optional elements are parameters for the key that are processed with wfMessage() -> params() ->parseAsBlock()-offset Set to overwrite offset parameter in $wgRequest set to ''to unsetoffset-wrap String Wrap the message in html(usually something like"&lt
static normalizeUrlComponent($component, $unsafe)
Definition: Parser.php:1973
if($limit) $timestamp
const VERSION
Update this version number when the ParserOutput format changes in an incompatible way...
Definition: Parser.php:75
this hook is for auditing only RecentChangesLinked and Watchlist RecentChangesLinked and Watchlist e g Watchlist removed from all revisions and log entries to which it was applied This gives extensions a chance to take it off their books as the deletion has already been partly carried out by this point or something similar the user will be unable to create the tag set and then return false from the hook function Ensure you consume the ChangeTagAfterDelete hook to carry out custom deletion actions as context called by AbstractContent::getParserOutput May be used to override the normal model specific rendering of page content as context as context $options
Definition: hooks.txt:1011
setHook($tag, $callback)
Create an HTML-style tag, e.g.
Definition: Parser.php:4680
const OT_WIKI
Definition: Defines.php:214
Preprocessor $mPreprocessor
Definition: Parser.php:169
getPreprocessor()
Get a preprocessor object.
Definition: Parser.php:881
This document is intended to provide useful advice for parties seeking to redistribute MediaWiki to end users It s targeted particularly at maintainers for Linux since it s been observed that distribution packages of MediaWiki often break We ve consistently had to recommend that users seeking support use official tarballs instead of their distribution s and this often solves whatever problem the user is having It would be nice if this could such and we might be restricted by PHP settings such as safe mode or open_basedir We cannot assume that the software even has read access anywhere useful Many shared hosts run all users web applications under the same so they can t rely on Unix and must forbid reads to even standard directories like tmp lest users read each others files We cannot assume that the user has the ability to install or run any programs not written as web accessible PHP scripts Since anything that works on cheap shared hosting will work if you have shell or root access MediaWiki s design is based around catering to the lowest common denominator Although we support higher end setups as the way many things work by default is tailored toward shared hosting These defaults are unconventional from the point of view of normal(non-web) applications--they might conflict with distributors'policies
static getInstance($ts=false)
Get a timestamp instance in GMT.
Definition: MWTimestamp.php:38
const NS_MEDIA
Definition: Defines.php:44
static singleton()
Get a RepoGroup instance.
Definition: RepoGroup.php:59
replaceVariables($text, $frame=false, $argsOnly=false)
Replace magic variables, templates, and template arguments with the appropriate text.
Definition: Parser.php:2881
const RECOVER_ORIG
wfMatchesDomainList($url, $domains)
Check whether a given URL has a domain that occurs in a given set of domains.
MediaWiki exception.
Definition: MWException.php:26
StripState $mStripState
Definition: Parser.php:181
$mDefaultSort
Definition: Parser.php:191
getUser()
Get a User object either from $this->mUser, if set, or from the ParserOptions object otherwise...
Definition: Parser.php:869
wfTimestampNow()
Convenience function; returns MediaWiki timestamp for the present time.
incrementIncludeSize($type, $size)
Increment an include size counter.
Definition: Parser.php:3870
getStripList()
Get a list of strippable XML-like elements.
Definition: Parser.php:998
const EXT_IMAGE_REGEX
Definition: Parser.php:97
startParse(Title $title=null, ParserOptions $options, $outputType, $clearState=true)
Definition: Parser.php:4617
$params
const NS_CATEGORY
Definition: Defines.php:70
static makeHeadline($level, $attribs, $anchor, $html, $link, $legacyAnchor=false)
Create a headline for content.
Definition: Linker.php:1701
static extractTagsAndParams($elements, $text, &$matches, $uniq_prefix=null)
Replaces all occurrences of HTML-style comments and the given tags in the text with a random marker a...
Definition: Parser.php:928
and(b) You must cause any modified files to carry prominent notices stating that You changed the files
doTableStuff($text)
parse the wiki syntax used to render tables
Definition: Parser.php:1025
wfDeprecated($function, $version=false, $component=false, $callerOffset=2)
Throws a warning that $function is deprecated.
getRevisionSize()
Get the size of the revision.
Definition: Parser.php:5651
$mImageParamsMagicArray
Definition: Parser.php:149
LinkHolderArray $mLinkHolders
Definition: Parser.php:187
$wgNoFollowDomainExceptions
If this is set to an array of domains, external links to these domain names (or any subdomains) will ...
const TS_MW
MediaWiki concatenated string timestamp (YYYYMMDDHHMMSS)
Definition: defines.php:11
static register($parser)
null means default in associative array with keys and values unescaped Should be merged with default with a value of false meaning to suppress the attribute in associative array with keys and values unescaped noclasses just before the function returns a value If you return an< a > element with HTML attributes $attribs and contents $html will be returned If you return $ret will be returned and may include noclasses after processing & $attribs
Definition: hooks.txt:1901
deferred txt A few of the database updates required by various functions here can be deferred until after the result page is displayed to the user For updating the view updating the linked to tables after a save
Definition: deferred.txt:4
as see the revision history and available at free of to any person obtaining a copy of this software and associated documentation to deal in the Software without including without limitation the rights to and or sell copies of the and to permit persons to whom the Software is furnished to do so
Definition: LICENSE.txt:10
$wgTranscludeCacheExpiry
Expiry time for transcluded templates cached in transcache database table.
Some information about database access in MediaWiki By Tim January Database layout For information about the MediaWiki database such as a description of the tables and their please see
Definition: database.txt:2
preSaveTransform($text, Title $title, User $user, ParserOptions $options, $clearState=true)
Transform wiki markup when saving a page by doing "\\r\\n" -> "\\n" conversion, substituting signatur...
Definition: Parser.php:4383
static capturePath(Title $title, IContextSource $context, LinkRenderer $linkRenderer=null)
Just like executePath() but will override global variables and execute the page in "inclusion" mode...
getTargetLanguage()
Get the target language for the content being parsed.
Definition: Parser.php:841
$buffer
Allows to change the fields on the form that will be generated are created Can be used to omit specific feeds from being outputted You must not use this hook to add use OutputPage::addFeedLink() instead.&$feedLinks conditions will AND in the final query as a Content object as a Content object $title
Definition: hooks.txt:302
static newKnownCurrent(IDatabase $db, $pageId, $revId)
Load a revision based on a known page ID and current revision ID from the DB.
Definition: Revision.php:1900
static hasSubpages($index)
Does the namespace allow subpages?
formatHeadings($text, $origText, $isMain=true)
This function accomplishes several tasks: 1) Auto-number headings if that option is enabled 2) Add an...
Definition: Parser.php:3972
getConverterLanguage()
Get the language object for language conversion.
Definition: Parser.php:859
static tocUnindent($level)
Finish one or more sublevels on the Table of Contents.
Definition: Linker.php:1600
static run($event, array $args=[], $deprecatedVersion=null)
Call hook functions defined in Hooks::register and $wgHooks.
Definition: Hooks.php:131
static tocLine($anchor, $tocline, $tocnumber, $level, $sectionIndex=false)
parameter level defines if we are on an indentation level
Definition: Linker.php:1615
design txt This is a brief overview of the new design More thorough and up to date information is available on the documentation wiki at etc Handles the details of getting and saving to the user table of the and dealing with sessions and cookies OutputPage Encapsulates the entire HTML page that will be sent in response to any server request It is used by calling its functions to add text
Definition: design.txt:12
$mInputSize
Definition: Parser.php:221
magicword txt Magic Words are some phrases used in the wikitext They are used for two things
Definition: magicword.txt:4
this hook is for auditing only RecentChangesLinked and Watchlist RecentChangesLinked and Watchlist e g Watchlist removed from all revisions and log entries to which it was applied This gives extensions a chance to take it off their books $tag
Definition: hooks.txt:972
getUserSig(&$user, $nickname=false, $fancySig=null)
Fetch the user's signature text, if any, and normalize to validated, ready-to-insert wikitext...
Definition: Parser.php:4491
const HALF_PARSED_VERSION
Update this version number when the output of serialiseHalfParsedText() changes in an incompatible wa...
Definition: Parser.php:81
const NS_FILE
Definition: Defines.php:62
firstCallInit()
Do various kinds of initialisation on the first call of the parser.
Definition: Parser.php:322
Handles a simple LRU key/value map with a maximum number of entries.
Definition: MapCacheLRU.php:34
static makeImageLink(Parser $parser, Title $title, $file, $frameParams=[], $handlerParams=[], $time=false, $query="", $widthOption=null)
Given parameters derived from [[Image:Foo|options...]], generate the HTML that that syntax inserts in...
Definition: Linker.php:415
const PTD_FOR_INCLUSION
Definition: Parser.php:105
null means default in associative array with keys and values unescaped Should be merged with default with a value of false meaning to suppress the attribute in associative array with keys and values unescaped broken
Definition: hooks.txt:1901
armorLinks($text)
Insert a NOPARSE hacky thing into any inline links in a chunk that's going to go through further pars...
Definition: Parser.php:2405
presenting them properly to the user as errors is done by the caller return true use this to change the list i e etc $rev
Definition: hooks.txt:1686
static splitWhitespace($s)
Return a three-element array: leading whitespace, string contents, trailing whitespace.
Definition: Parser.php:2848
This document is intended to provide useful advice for parties seeking to redistribute MediaWiki to end users It s targeted particularly at maintainers for Linux since it s been observed that distribution packages of MediaWiki often break We ve consistently had to recommend that users seeking support use official tarballs instead of their distribution s and this often solves whatever problem the user is having It would be nice if this could such as
Definition: distributors.txt:9
setOutputType($ot)
Set the output type.
Definition: Parser.php:761
$mTagHooks
Definition: Parser.php:140
Class for handling an array of magic words.
const NS_MEDIAWIKI
Definition: Defines.php:64
static & get($id)
Factory: creates an object representing an ID.
Definition: MagicWord.php:257
equals(Content $that=null)
Returns true if this Content objects is conceptually equivalent to the given Content object...
enableOOUI()
Set's up the PHP implementation of OOUI for use in this request and instructs OutputPage to enable OO...
Definition: Parser.php:6028
please add to it if you re going to add events to the MediaWiki code where normally authentication against an external auth plugin would be creating a local account $user
Definition: hooks.txt:242
this hook is for auditing only RecentChangesLinked and Watchlist RecentChangesLinked and Watchlist e g Watchlist removed from all revisions and log entries to which it was applied This gives extensions a chance to take it off their books as the deletion has already been partly carried out by this point or something similar the user will be unable to create the tag set and then return false from the hook function Ensure you consume the ChangeTagAfterDelete hook to carry out custom deletion actions as context called by AbstractContent::getParserOutput May be used to override the normal model specific rendering of page content as context as context the output can only depend on parameters provided to this hook not on global state indicating whether full HTML should be generated If generation of HTML may be but other information should still be present in the ParserOutput object to manipulate or replace but no entry for that model exists in $wgContentHandlers if desired whether it is OK to use $contentModel on $title Handler functions that modify $ok should generally return false to prevent further hooks from further modifying $ok inclusive false for true for descending in case the handler function wants to provide a converted Content object Note that $result getContentModel() must return $toModel. 'CustomEditor'$rcid is used in generating this variable which contains information about the new such as the revision s whether the revision was marked as a minor edit or not
Definition: hooks.txt:1121
fetchTemplate($title)
Fetch the unparsed text of a template and register a reference to it.
Definition: Parser.php:3516
maybeMakeExternalImage($url)
make an image if it's allowed, either through the global option, through the exception, or through the on-wiki whitelist
Definition: Parser.php:1996
areSubpagesAllowed()
Return true if subpage links should be expanded on this page.
Definition: Parser.php:2414
const OT_HTML
Definition: Defines.php:213
static escapeId($id, $options=[])
Given a value, escape it so that it can be used in an id attribute and return it. ...
Definition: Sanitizer.php:1170
this hook is for auditing only RecentChangesLinked and Watchlist RecentChangesLinked and Watchlist e g Watchlist removed from all revisions and log entries to which it was applied This gives extensions a chance to take it off their books as the deletion has already been partly carried out by this point or something similar the user will be unable to create the tag set and then return false from the hook function Ensure you consume the ChangeTagAfterDelete hook to carry out custom deletion actions as context called by AbstractContent::getParserOutput May be used to override the normal model specific rendering of page content as context as context the output can only depend on parameters provided to this hook not on global state indicating whether full HTML should be generated If generation of HTML may be but other information should still be present in the ParserOutput object & $output
Definition: hooks.txt:1011
static getSubstIDs()
Get an array of parser substitution modifier IDs.
Definition: MagicWord.php:284
static images()
Definition: SiteStats.php:173
$mTransparentTagHooks
Definition: Parser.php:141
$mExpensiveFunctionCount
Definition: Parser.php:193
$mUrlProtocols
Definition: Parser.php:165
$mConf
Definition: Parser.php:165
transformMsg($text, $options, $title=null)
Wrapper for preprocess()
Definition: Parser.php:4636
static newFromId($id, $flags=0)
Load a page revision from a given revision ID number.
Definition: Revision.php:109
wfUrlProtocols($includeProtocolRelative=true)
Returns a regular expression of url protocols.
static makeExternalLink($url, $text, $escape=true, $linktype= '', $attribs=[], $title=null)
Make an external link.
Definition: Linker.php:934
__clone()
Allow extensions to clean up when the parser is cloned.
Definition: Parser.php:299
static getExternalLinkRel($url=false, $title=null)
Get the rel attribute for a particular external link.
Definition: Parser.php:1874
string $mUniqPrefix
Deprecated accessor for the strip marker prefix.
Definition: Parser.php:227
injection txt This is an overview of how MediaWiki makes use of dependency injection The design described here grew from the discussion of RFC T384 The term dependency this means that anything an object needs to operate should be injected from the the object itself should only know narrow no concrete implementation of the logic it relies on The requirement to inject everything typically results in an architecture that based on two main types of and essentially stateless service objects that use other service objects to operate on the value objects As of the beginning MediaWiki is only starting to use the DI approach Much of the code still relies on global state or direct resulting in a highly cyclical dependency which acts as the top level factory for services in MediaWiki which can be used to gain access to default instances of various services MediaWikiServices however also allows new services to be defined and default services to be redefined Services are defined or redefined by providing a callback the instantiator that will return a new instance of the service When it will create an instance of MediaWikiServices and populate it with the services defined in the files listed by thereby bootstrapping the DI framework Per $wgServiceWiringFiles lists includes ServiceWiring php
Definition: injection.txt:35
wfSetVar(&$dest, $source, $force=false)
Sets dest to source and returns the original value of dest If source is NULL, it just returns the val...
this hook is for auditing only $req
Definition: hooks.txt:972
this hook is for auditing only or null if authentication failed before getting that far $username
Definition: hooks.txt:767
presenting them properly to the user as errors is done by the caller return true use this to change the list i e etc next in line in page history
Definition: hooks.txt:1686
array $mLangLinkLanguages
Array with the language name of each language link (i.e.
Definition: Parser.php:234
const OT_MSG
Definition: Parser.php:112
replaceTransparentTags($text)
Replace transparent tags in $text with the values given by the callbacks.
Definition: Parser.php:5368
This document describes the state of Postgres support in and is fairly well maintained The main code is very well while extensions are very hit and miss it is probably the most supported database after MySQL Much of the work in making MediaWiki database agnostic came about through the work of creating Postgres as and are nearing end of but without copying over all the usage comments General notes on the but these can almost always be programmed around *Although Postgres has a true BOOLEAN type
Definition: postgres.txt:22
replaceSection($oldText, $sectionId, $newText)
This function returns $oldtext after the content of the section specified by $section has been replac...
Definition: Parser.php:5560
doDoubleUnderscore($text)
Strip double-underscore items like NOGALLERY and NOTOC Fills $this->mDoubleUnderscores, returns the modified text.
Definition: Parser.php:3897
$mFunctionHooks
Definition: Parser.php:142
static removeHTMLtags($text, $processCallback=null, $args=[], $extratags=[], $removetags=[], $warnCallback=null)
Cleans up HTML, removes dangerous tags and attributes, and removes HTML comments. ...
Definition: Sanitizer.php:462
$lines
Definition: router.php:66
testPreprocess($text, Title $title, ParserOptions $options)
Definition: Parser.php:5807
deferred txt A few of the database updates required by various functions here can be deferred until after the result page is displayed to the user For updating the view updating the linked to tables after a etc PHP does not yet have any way to tell the server to actually return and disconnect while still running these but it might have such a feature in the future We handle these by creating a deferred update object and putting those objects on a global then executing the whole list after the page is displayed We don t do anything smart like collating updates to the same table or such because the list is almost always going to have just one item on if so it s not worth the trouble Since there is a job queue in the jobs table
Definition: deferred.txt:11
MagicWordArray $mSubstWords
Definition: Parser.php:163
const TOC_END
Definition: Parser.php:137
static normalizeCharReferences($text)
Ensure that any entities and character references are legal for XML and XHTML specifically.
Definition: Sanitizer.php:1400
callParserFunction($frame, $function, array $args=[])
Call a parser function and return an array with text and flags.
Definition: Parser.php:3309
$wgScriptPath
The path we should point to.
Variant of the Message class.
Definition: Message.php:1242
getFreshParser()
Return this parser if it is not doing anything, otherwise get a fresh parser.
Definition: Parser.php:6013
design txt This is a brief overview of the new design More thorough and up to date information is available on the documentation wiki at etc Handles the details of getting and saving to the user table of the and dealing with sessions and cookies OutputPage Encapsulates the entire HTML page that will be sent in response to any server request It is used by calling its functions to add in any and then calling but I prefer the flexibility This should also do the output encoding The system allocates a global one in $wgOut Title Represents the title of an and does all the work of translating among various forms such as plain database etc For and for historical it also represents a few features of articles that don t involve their such as access rights See also title txt Article Encapsulates access to the page table of the database The object represents a an and maintains state such as etc Revision Encapsulates individual page revision data and access to the revision text blobs storage system Higher level code should never touch text storage directly
Definition: design.txt:34
WebRequest clone which takes values from a provided array.
Definition: FauxRequest.php:33
this hook is for auditing only RecentChangesLinked and Watchlist RecentChangesLinked and Watchlist e g Watchlist removed from all revisions and log entries to which it was applied This gives extensions a chance to take it off their books as the deletion has already been partly carried out by this point or something similar the user will be unable to create the tag set and then return false from the hook function Ensure you consume the ChangeTagAfterDelete hook to carry out custom deletion actions as context called by AbstractContent::getParserOutput May be used to override the normal model specific rendering of page content $content
Definition: hooks.txt:1011
static articles()
Definition: SiteStats.php:141
$mRevisionUser
Definition: Parser.php:218
lock()
Lock the current instance of the parser.
Definition: Parser.php:5968
static pages()
Definition: SiteStats.php:149
$line
Definition: cdb.php:59
const SFH_OBJECT_ARGS
Definition: Parser.php:85
makeKnownLinkHolder($nt, $text= '', $trail= '', $prefix= '')
Render a forced-blue link inline; protect against double expansion of URLs if we're in a mode that pr...
Definition: Parser.php:2381
static statelessFetchTemplate($title, $parser=false)
Static function to get a template Can be overridden via ParserOptions::setTemplateCallback().
Definition: Parser.php:3529
I won t presume to tell you how to I m just describing the methods I chose to use for myself If you do choose to follow these it will probably be easier for you to collaborate with others on the but if you want to contribute without by all means do which work well I also use K &R brace matching style I know that s a religious issue for so if you want to use a style that puts opening braces on the next line
Definition: design.txt:79
setFunctionHook($id, $callback, $flags=0)
Create a function, e.g.
Definition: Parser.php:4774
static setupOOUI($skinName= '', $dir= 'ltr')
Helper function to setup the PHP implementation of OOUI to use in this request.
static makeMediaLinkFile(Title $title, $file, $html= '')
Create a direct link to a given uploaded file.
Definition: Linker.php:874
$mIncludeCount
Definition: Parser.php:183
usually copyright or history_copyright This message must be in HTML not wikitext if the section is included from a template to be included in the link
Definition: hooks.txt:2854
$mMarkerIndex
Definition: Parser.php:150
this hook is for auditing only RecentChangesLinked and Watchlist RecentChangesLinked and Watchlist e g Watchlist removed from all revisions and log entries to which it was applied This gives extensions a chance to take it off their books as the deletion has already been partly carried out by this point or something similar the user will be unable to create the tag set and then return false from the hook function Ensure you consume the ChangeTagAfterDelete hook to carry out custom deletion actions as context called by AbstractContent::getParserOutput May be used to override the normal model specific rendering of page content as context as context the output can only depend on parameters provided to this hook not on global state indicating whether full HTML should be generated If generation of HTML may be but other information should still be present in the ParserOutput object to manipulate or replace but no entry for that model exists in $wgContentHandlers if desired whether it is OK to use $contentModel on $title Handler functions that modify $ok should generally return false to prevent further hooks from further modifying $ok inclusive $limit
Definition: hooks.txt:1011
getTitle()
Accessor for the Title object.
Definition: Parser.php:742
this class mediates it Skin Encapsulates a look and feel for the wiki All of the functions that render HTML and make choices about how to render it are here and are called from various other places when and is meant to be subclassed with other skins that may override some of its functions The User object contains a reference to a and so rather than having a global skin object we just rely on the global User and get the skin with $wgUser and also has some character encoding functions and other locale stuff The current user interface language is instantiated as and the local content language as $wgContLang
Definition: design.txt:56
extractSections($text, $sectionId, $mode, $newText= '')
Break wikitext input into sections, and either pull or replace some particular section's text...
Definition: Parser.php:5419
ParserOutput $mOutput
Definition: Parser.php:175
getOutput()
Get the ParserOutput object.
Definition: Parser.php:787
$wgExperimentalHtmlIds
Should we allow a broader set of characters in id attributes, per HTML5? If not, use only HTML 4-comp...
doMagicLinks($text)
Replace special strings like "ISBN xxx" and "RFC xxx" with magic external links.
Definition: Parser.php:1399
Apache License January AND DISTRIBUTION Definitions License shall mean the terms and conditions for and distribution as defined by Sections through of this document Licensor shall mean the copyright owner or entity authorized by the copyright owner that is granting the License Legal Entity shall mean the union of the acting entity and all other entities that control are controlled by or are under common control with that entity For the purposes of this definition control direct or to cause the direction or management of such whether by contract or including but not limited to software source documentation and configuration files Object form shall mean any form resulting from mechanical transformation or translation of a Source including but not limited to compiled object generated and conversions to other media types Work shall mean the work of whether in Source or Object made available under the as indicated by a copyright notice that is included in or attached to the whether in Source or Object that is based or other modifications as a an original work of authorship For the purposes of this Derivative Works shall not include works that remain separable or merely the Work and Derivative Works thereof Contribution shall mean any work of including the original version of the Work and any modifications or additions to that Work or Derivative Works that is intentionally submitted to Licensor for inclusion in the Work by the copyright owner or by an individual or Legal Entity authorized to submit on behalf of the copyright owner For the purposes of this submitted means any form of or written communication sent to the Licensor or its including but not limited to communication on electronic mailing source code control and issue tracking systems that are managed or on behalf the Licensor for the purpose of discussing and improving the but excluding communication that is conspicuously marked or otherwise designated in writing by the copyright owner as Not a Contribution Contributor shall mean Licensor and any individual or Legal Entity on behalf of whom a Contribution has been received by Licensor and subsequently incorporated within the Work Grant of Copyright License Subject to the terms and conditions of this each Contributor hereby grants to You a non no royalty irrevocable copyright license to prepare Derivative Works publicly display
this hook is for auditing only RecentChangesLinked and Watchlist RecentChangesLinked and Watchlist e g Watchlist removed from all revisions and log entries to which it was applied This gives extensions a chance to take it off their books as the deletion has already been partly carried out by this point or something similar the user will be unable to create the tag set $status
Definition: hooks.txt:1011
static cleanSigInSig($text)
Strip 3, 4 or 5 tildes out of signatures.
Definition: Parser.php:4591
setDefaultSort($sort)
Mutator for $mDefaultSort.
Definition: Parser.php:5672
fetchFile($title, $options=[])
Fetch a file and its title and register a reference to it.
Definition: Parser.php:3619
static tocIndent()
Add another level to the Table of Contents.
Definition: Linker.php:1589
static legalChars()
Get a regex character class describing the legal characters in a link.
Definition: Title.php:593
design txt This is a brief overview of the new design More thorough and up to date information is available on the documentation wiki at etc Handles the details of getting and saving to the user table of the and dealing with sessions and cookies OutputPage Encapsulates the entire HTML page that will be sent in response to any server request It is used by calling its functions to add in any and then calling output() to send it all.It could be easily changed to send incrementally if that becomes useful
static doBlockLevels($text, $lineStart)
Make lists from lines starting with ':', '*', '#', etc.
$wgServer
URL of the server.
We ve cleaned up the code here by removing clumps of infrequently used code and moving them off somewhere else It s much easier for someone working with this code to see what s _really_ going on
Definition: hooks.txt:86
incrementExpensiveFunctionCount()
Increment the expensive function count.
Definition: Parser.php:3884
$mShowToc
Definition: Parser.php:194
static normalizeLinkUrl($url)
Replace unusual escape codes in a URL with their equivalent characters.
Definition: Parser.php:1937
const DB_REPLICA
Definition: defines.php:22
magicLinkCallback($m)
Definition: Parser.php:1429
const EXT_LINK_URL_CLASS
Definition: Parser.php:91
insertStripItem($text)
Add an item to the strip state Returns the unique tag which must be inserted into the stripped text T...
Definition: Parser.php:1011
testPst($text, Title $title, ParserOptions $options)
Definition: Parser.php:5797
static factory($url, $options=null, $caller=__METHOD__)
Generate a new request object.
if(!$wgRequest->checkUrlExtension()) if(!$wgEnableAPI) $wgTitle
Definition: api.php:57
ParserOptions $mOptions
Definition: Parser.php:207
parse($text, Title $title, ParserOptions $options, $linestart=true, $clearState=true, $revid=null)
Convert wikitext to HTML Do not call this function recursively.
Definition: Parser.php:398
this hook is for auditing only or null if authentication failed before getting that far or null if we can t even determine that probably a stub it is not rendered in wiki pages or galleries in category pages allow injecting custom HTML after the section Any uses of the hook need to handle escaping see BaseTemplate::getToolbox and BaseTemplate::makeListItem for details on the format of individual items inside of this array or by returning and letting standard HTTP rendering take place modifiable or by returning false and taking over the output modifiable modifiable after all normalizations have been except for the $wgMaxImageArea check set to true or false to override the $wgMaxImageArea check result gives extension the possibility to transform it themselves $handler
Definition: hooks.txt:767
static numberingroup($group)
Find the number of users in a given user group.
Definition: SiteStats.php:183
=Architecture==Two class hierarchies are used to provide the functionality associated with the different content models:*Content interface(and AbstractContent base class) define functionality that acts on the concrete content of a page, and *ContentHandler base class provides functionality specific to a content model, but not acting on concrete content.The most important function of ContentHandler is to act as a factory for the appropriate implementation of Content.These Content objects are to be used by MediaWiki everywhere, instead of passing page content around as text.All manipulation and analysis of page content must be done via the appropriate methods of the Content object.For each content model, a subclass of ContentHandler has to be registered with $wgContentHandlers.The ContentHandler object for a given content model can be obtained using ContentHandler::getForModelID($id).Also Title, WikiPage and Revision now have getContentHandler() methods for convenience.ContentHandler objects are singletons that provide functionality specific to the content type, but not directly acting on the content of some page.ContentHandler::makeEmptyContent() and ContentHandler::unserializeContent() can be used to create a Content object of the appropriate type.However, it is recommended to instead use WikiPage::getContent() resp.Revision::getContent() to get a page's content as a Content object.These two methods should be the ONLY way in which page content is accessed.Another important function of ContentHandler objects is to define custom action handlers for a content model, see ContentHandler::getActionOverrides().This is similar to what WikiPage::getActionOverrides() was already doing.==Serialization==With the ContentHandler facility, page content no longer has to be text based.Objects implementing the Content interface are used to represent and handle the content internally.For storage and data exchange, each content model supports at least one serialization format via ContentHandler::serializeContent($content).The list of supported formats for a given content model can be accessed using ContentHandler::getSupportedFormats().Content serialization formats are identified using MIME type like strings.The following formats are built in:*text/x-wiki-wikitext *text/javascript-for js pages *text/css-for css pages *text/plain-for future use, e.g.with plain text messages.*text/html-for future use, e.g.with plain html messages.*application/vnd.php.serialized-for future use with the api and for extensions *application/json-for future use with the api, and for use by extensions *application/xml-for future use with the api, and for use by extensions In PHP, use the corresponding CONTENT_FORMAT_XXX constant.Note that when using the API to access page content, especially action=edit, action=parse and action=query &prop=revisions, the model an