MediaWiki  master
Parser.php
Go to the documentation of this file.
1 <?php
67 class Parser {
73  const VERSION = '1.6.4';
74 
80 
81  # Flags for Parser::setFunctionHook
82  const SFH_NO_HASH = 1;
83  const SFH_OBJECT_ARGS = 2;
84 
85  # Constants needed for external link processing
86  # Everything except bracket, space, or control characters
87  # \p{Zs} is unicode 'separator, space' category. It covers the space 0x20
88  # as well as U+3000 is IDEOGRAPHIC SPACE for bug 19052
89  const EXT_LINK_URL_CLASS = '[^][<>"\\x00-\\x20\\x7F\p{Zs}]';
90  # Simplified expression to match an IPv4 or IPv6 address, or
91  # at least one character of a host name (embeds EXT_LINK_URL_CLASS)
92  const EXT_LINK_ADDR = '(?:[0-9.]+|\\[(?i:[0-9a-f:.]+)\\]|[^][<>"\\x00-\\x20\\x7F\p{Zs}])';
93  # RegExp to make image URLs (embeds IPv6 part of EXT_LINK_ADDR)
94  // @codingStandardsIgnoreStart Generic.Files.LineLength
95  const EXT_IMAGE_REGEX = '/^(http:\/\/|https:\/\/)((?:\\[(?i:[0-9a-f:.]+)\\])?[^][<>"\\x00-\\x20\\x7F\p{Zs}]+)
96  \\/([A-Za-z0-9_.,~%\\-+&;#*?!=()@\\x80-\\xFF]+)\\.((?i)gif|png|jpg|jpeg)$/Sxu';
97  // @codingStandardsIgnoreEnd
98 
99  # Regular expression for a non-newline space
100  const SPACE_NOT_NL = '(?:\t|&nbsp;|&\#0*160;|&\#[Xx]0*[Aa]0;|\p{Zs})';
101 
102  # State constants for the definition list colon extraction
103  const COLON_STATE_TEXT = 0;
104  const COLON_STATE_TAG = 1;
111 
112  # Flags for preprocessToDom
113  const PTD_FOR_INCLUSION = 1;
114 
115  # Allowed values for $this->mOutputType
116  # Parameter to startExternalParse().
117  const OT_HTML = 1; # like parse()
118  const OT_WIKI = 2; # like preSaveTransform()
120  const OT_MSG = 3;
121  const OT_PLAIN = 4; # like extractSections() - portions of the original are returned unchanged.
122 
135  const MARKER_SUFFIX = "-QINU\x7f";
136  const MARKER_PREFIX = "\x7fUNIQ-";
137 
138  # Markers used for wrapping the table of contents
139  const TOC_START = '<mw:toc>';
140  const TOC_END = '</mw:toc>';
141 
142  # Persistent:
143  public $mTagHooks = array();
145  public $mFunctionHooks = array();
146  public $mFunctionSynonyms = array( 0 => array(), 1 => array() );
148  public $mStripList = array();
150  public $mVarCache = array();
151  public $mImageParams = array();
153  public $mMarkerIndex = 0;
154  public $mFirstCall = true;
155 
156  # Initialised by initialiseVariables()
157 
161  public $mVariables;
162 
166  public $mSubstWords;
167  # Initialised in constructor
169 
170  # Initialized in getPreprocessor()
171 
173 
174  # Cleared with clearState():
175 
178  public $mOutput;
180 
184  public $mStripState;
185 
191 
192  public $mLinkID;
196  public $mExpensiveFunctionCount; # number of expensive parser function calls
198 
202  public $mUser; # User object; only used when doing pre-save transform
203 
204  # Temporary
205  # These are variables reset at least once per parse regardless of $clearState
206 
210  public $mOptions;
211 
215  public $mTitle; # Title context, used for self-link rendering and similar things
216  public $mOutputType; # Output type, one of the OT_xxx constants
217  public $ot; # Shortcut alias, see setOutputType()
218  public $mRevisionObject; # The revision object of the specified revision ID
219  public $mRevisionId; # ID to display in {{REVISIONID}} tags
220  public $mRevisionTimestamp; # The timestamp of the specified revision ID
221  public $mRevisionUser; # User to display in {{REVISIONUSER}} tag
222  public $mRevisionSize; # Size to display in {{REVISIONSIZE}} variable
223  public $mRevIdForTs; # The revision ID which was used to fetch the timestamp
224  public $mInputSize = false; # For {{PAGESIZE}} on current page.
225 
231 
238 
246 
251  public $mInParse = false;
252 
254  protected $mProfiler;
255 
259  public function __construct( $conf = array() ) {
260  $this->mConf = $conf;
261  $this->mUrlProtocols = wfUrlProtocols();
262  $this->mExtLinkBracketedRegex = '/\[(((?i)' . $this->mUrlProtocols . ')' .
263  self::EXT_LINK_ADDR .
264  self::EXT_LINK_URL_CLASS . '*)\p{Zs}*([^\]\\x00-\\x08\\x0a-\\x1F]*?)\]/Su';
265  if ( isset( $conf['preprocessorClass'] ) ) {
266  $this->mPreprocessorClass = $conf['preprocessorClass'];
267  } elseif ( defined( 'HPHP_VERSION' ) ) {
268  # Preprocessor_Hash is much faster than Preprocessor_DOM under HipHop
269  $this->mPreprocessorClass = 'Preprocessor_Hash';
270  } elseif ( extension_loaded( 'domxml' ) ) {
271  # PECL extension that conflicts with the core DOM extension (bug 13770)
272  wfDebug( "Warning: you have the obsolete domxml extension for PHP. Please remove it!\n" );
273  $this->mPreprocessorClass = 'Preprocessor_Hash';
274  } elseif ( extension_loaded( 'dom' ) ) {
275  $this->mPreprocessorClass = 'Preprocessor_DOM';
276  } else {
277  $this->mPreprocessorClass = 'Preprocessor_Hash';
278  }
279  wfDebug( __CLASS__ . ": using preprocessor: {$this->mPreprocessorClass}\n" );
280  }
281 
285  public function __destruct() {
286  if ( isset( $this->mLinkHolders ) ) {
287  unset( $this->mLinkHolders );
288  }
289  foreach ( $this as $name => $value ) {
290  unset( $this->$name );
291  }
292  }
293 
297  public function __clone() {
298  $this->mInParse = false;
299 
300  // Bug 56226: When you create a reference "to" an object field, that
301  // makes the object field itself be a reference too (until the other
302  // reference goes out of scope). When cloning, any field that's a
303  // reference is copied as a reference in the new object. Both of these
304  // are defined PHP5 behaviors, as inconvenient as it is for us when old
305  // hooks from PHP4 days are passing fields by reference.
306  foreach ( array( 'mStripState', 'mVarCache' ) as $k ) {
307  // Make a non-reference copy of the field, then rebind the field to
308  // reference the new copy.
309  $tmp = $this->$k;
310  $this->$k =& $tmp;
311  unset( $tmp );
312  }
313 
314  Hooks::run( 'ParserCloned', array( $this ) );
315  }
316 
320  public function firstCallInit() {
321  if ( !$this->mFirstCall ) {
322  return;
323  }
324  $this->mFirstCall = false;
325 
327  CoreTagHooks::register( $this );
328  $this->initialiseVariables();
329 
330  Hooks::run( 'ParserFirstCallInit', array( &$this ) );
331  }
332 
338  public function clearState() {
339  if ( $this->mFirstCall ) {
340  $this->firstCallInit();
341  }
342  $this->mOutput = new ParserOutput;
343  $this->mOptions->registerWatcher( array( $this->mOutput, 'recordOption' ) );
344  $this->mAutonumber = 0;
345  $this->mLastSection = '';
346  $this->mDTopen = false;
347  $this->mIncludeCount = array();
348  $this->mArgStack = false;
349  $this->mInPre = false;
350  $this->mLinkHolders = new LinkHolderArray( $this );
351  $this->mLinkID = 0;
352  $this->mRevisionObject = $this->mRevisionTimestamp =
353  $this->mRevisionId = $this->mRevisionUser = $this->mRevisionSize = null;
354  $this->mVarCache = array();
355  $this->mUser = null;
356  $this->mLangLinkLanguages = array();
357  $this->currentRevisionCache = null;
358 
359  $this->mStripState = new StripState;
360 
361  # Clear these on every parse, bug 4549
362  $this->mTplRedirCache = $this->mTplDomCache = array();
363 
364  $this->mShowToc = true;
365  $this->mForceTocPosition = false;
366  $this->mIncludeSizes = array(
367  'post-expand' => 0,
368  'arg' => 0,
369  );
370  $this->mPPNodeCount = 0;
371  $this->mGeneratedPPNodeCount = 0;
372  $this->mHighestExpansionDepth = 0;
373  $this->mDefaultSort = false;
374  $this->mHeadings = array();
375  $this->mDoubleUnderscores = array();
376  $this->mExpensiveFunctionCount = 0;
377 
378  # Fix cloning
379  if ( isset( $this->mPreprocessor ) && $this->mPreprocessor->parser !== $this ) {
380  $this->mPreprocessor = null;
381  }
382 
383  $this->mProfiler = new SectionProfiler();
384 
385  Hooks::run( 'ParserClearState', array( &$this ) );
386  }
387 
400  public function parse( $text, Title $title, ParserOptions $options,
401  $linestart = true, $clearState = true, $revid = null
402  ) {
409 
410  if ( $clearState ) {
411  // We use U+007F DELETE to construct strip markers, so we have to make
412  // sure that this character does not occur in the input text.
413  $text = strtr( $text, "\x7f", "?" );
414  $magicScopeVariable = $this->lock();
415  }
416 
417  $this->startParse( $title, $options, self::OT_HTML, $clearState );
418 
419  $this->currentRevisionCache = null;
420  $this->mInputSize = strlen( $text );
421  if ( $this->mOptions->getEnableLimitReport() ) {
422  $this->mOutput->resetParseStartTime();
423  }
424 
425  $oldRevisionId = $this->mRevisionId;
426  $oldRevisionObject = $this->mRevisionObject;
427  $oldRevisionTimestamp = $this->mRevisionTimestamp;
428  $oldRevisionUser = $this->mRevisionUser;
429  $oldRevisionSize = $this->mRevisionSize;
430  if ( $revid !== null ) {
431  $this->mRevisionId = $revid;
432  $this->mRevisionObject = null;
433  $this->mRevisionTimestamp = null;
434  $this->mRevisionUser = null;
435  $this->mRevisionSize = null;
436  }
437 
438  Hooks::run( 'ParserBeforeStrip', array( &$this, &$text, &$this->mStripState ) );
439  # No more strip!
440  Hooks::run( 'ParserAfterStrip', array( &$this, &$text, &$this->mStripState ) );
441  $text = $this->internalParse( $text );
442  Hooks::run( 'ParserAfterParse', array( &$this, &$text, &$this->mStripState ) );
443 
444  $text = $this->internalParseHalfParsed( $text, true, $linestart );
445 
453  if ( !( $options->getDisableTitleConversion()
454  || isset( $this->mDoubleUnderscores['nocontentconvert'] )
455  || isset( $this->mDoubleUnderscores['notitleconvert'] )
456  || $this->mOutput->getDisplayTitle() !== false )
457  ) {
458  $convruletitle = $this->getConverterLanguage()->getConvRuleTitle();
459  if ( $convruletitle ) {
460  $this->mOutput->setTitleText( $convruletitle );
461  } else {
462  $titleText = $this->getConverterLanguage()->convertTitle( $title );
463  $this->mOutput->setTitleText( $titleText );
464  }
465  }
466 
467  if ( $this->mExpensiveFunctionCount > $this->mOptions->getExpensiveParserFunctionLimit() ) {
468  $this->limitationWarn( 'expensive-parserfunction',
469  $this->mExpensiveFunctionCount,
470  $this->mOptions->getExpensiveParserFunctionLimit()
471  );
472  }
473 
474  # Information on include size limits, for the benefit of users who try to skirt them
475  if ( $this->mOptions->getEnableLimitReport() ) {
476  $max = $this->mOptions->getMaxIncludeSize();
477 
478  $cpuTime = $this->mOutput->getTimeSinceStart( 'cpu' );
479  if ( $cpuTime !== null ) {
480  $this->mOutput->setLimitReportData( 'limitreport-cputime',
481  sprintf( "%.3f", $cpuTime )
482  );
483  }
484 
485  $wallTime = $this->mOutput->getTimeSinceStart( 'wall' );
486  $this->mOutput->setLimitReportData( 'limitreport-walltime',
487  sprintf( "%.3f", $wallTime )
488  );
489 
490  $this->mOutput->setLimitReportData( 'limitreport-ppvisitednodes',
491  array( $this->mPPNodeCount, $this->mOptions->getMaxPPNodeCount() )
492  );
493  $this->mOutput->setLimitReportData( 'limitreport-ppgeneratednodes',
494  array( $this->mGeneratedPPNodeCount, $this->mOptions->getMaxGeneratedPPNodeCount() )
495  );
496  $this->mOutput->setLimitReportData( 'limitreport-postexpandincludesize',
497  array( $this->mIncludeSizes['post-expand'], $max )
498  );
499  $this->mOutput->setLimitReportData( 'limitreport-templateargumentsize',
500  array( $this->mIncludeSizes['arg'], $max )
501  );
502  $this->mOutput->setLimitReportData( 'limitreport-expansiondepth',
503  array( $this->mHighestExpansionDepth, $this->mOptions->getMaxPPExpandDepth() )
504  );
505  $this->mOutput->setLimitReportData( 'limitreport-expensivefunctioncount',
506  array( $this->mExpensiveFunctionCount, $this->mOptions->getExpensiveParserFunctionLimit() )
507  );
508  Hooks::run( 'ParserLimitReportPrepare', array( $this, $this->mOutput ) );
509 
510  $limitReport = "NewPP limit report\n";
511  if ( $wgShowHostnames ) {
512  $limitReport .= 'Parsed by ' . wfHostname() . "\n";
513  }
514  $limitReport .= 'Cached time: ' . $this->mOutput->getCacheTime() . "\n";
515  $limitReport .= 'Cache expiry: ' . $this->mOutput->getCacheExpiry() . "\n";
516  $limitReport .= 'Dynamic content: ' .
517  ( $this->mOutput->hasDynamicContent() ? 'true' : 'false' ) .
518  "\n";
519 
520  foreach ( $this->mOutput->getLimitReportData() as $key => $value ) {
521  if ( Hooks::run( 'ParserLimitReportFormat',
522  array( $key, &$value, &$limitReport, false, false )
523  ) ) {
524  $keyMsg = wfMessage( $key )->inLanguage( 'en' )->useDatabase( false );
525  $valueMsg = wfMessage( array( "$key-value-text", "$key-value" ) )
526  ->inLanguage( 'en' )->useDatabase( false );
527  if ( !$valueMsg->exists() ) {
528  $valueMsg = new RawMessage( '$1' );
529  }
530  if ( !$keyMsg->isDisabled() && !$valueMsg->isDisabled() ) {
531  $valueMsg->params( $value );
532  $limitReport .= "{$keyMsg->text()}: {$valueMsg->text()}\n";
533  }
534  }
535  }
536  // Since we're not really outputting HTML, decode the entities and
537  // then re-encode the things that need hiding inside HTML comments.
538  $limitReport = htmlspecialchars_decode( $limitReport );
539  Hooks::run( 'ParserLimitReport', array( $this, &$limitReport ) );
540 
541  // Sanitize for comment. Note '‐' in the replacement is U+2010,
542  // which looks much like the problematic '-'.
543  $limitReport = str_replace( array( '-', '&' ), array( '‐', '&amp;' ), $limitReport );
544  $text .= "\n<!-- \n$limitReport-->\n";
545 
546  // Add on template profiling data
547  $dataByFunc = $this->mProfiler->getFunctionStats();
548  uasort( $dataByFunc, function ( $a, $b ) {
549  return $a['real'] < $b['real']; // descending order
550  } );
551  $profileReport = "Transclusion expansion time report (%,ms,calls,template)\n";
552  foreach ( array_slice( $dataByFunc, 0, 10 ) as $item ) {
553  $profileReport .= sprintf( "%6.2f%% %8.3f %6d - %s\n",
554  $item['%real'], $item['real'], $item['calls'],
555  htmlspecialchars( $item['name'] ) );
556  }
557  $text .= "\n<!-- \n$profileReport-->\n";
558 
559  if ( $this->mGeneratedPPNodeCount > $this->mOptions->getMaxGeneratedPPNodeCount() / 10 ) {
560  wfDebugLog( 'generated-pp-node-count', $this->mGeneratedPPNodeCount . ' ' .
561  $this->mTitle->getPrefixedDBkey() );
562  }
563  }
564  $this->mOutput->setText( $text );
565 
566  $this->mRevisionId = $oldRevisionId;
567  $this->mRevisionObject = $oldRevisionObject;
568  $this->mRevisionTimestamp = $oldRevisionTimestamp;
569  $this->mRevisionUser = $oldRevisionUser;
570  $this->mRevisionSize = $oldRevisionSize;
571  $this->mInputSize = false;
572  $this->currentRevisionCache = null;
573 
574  return $this->mOutput;
575  }
576 
599  public function recursiveTagParse( $text, $frame = false ) {
600  Hooks::run( 'ParserBeforeStrip', array( &$this, &$text, &$this->mStripState ) );
601  Hooks::run( 'ParserAfterStrip', array( &$this, &$text, &$this->mStripState ) );
602  $text = $this->internalParse( $text, false, $frame );
603  return $text;
604  }
605 
623  public function recursiveTagParseFully( $text, $frame = false ) {
624  $text = $this->recursiveTagParse( $text, $frame );
625  $text = $this->internalParseHalfParsed( $text, false );
626  return $text;
627  }
628 
640  public function preprocess( $text, Title $title = null,
641  ParserOptions $options, $revid = null, $frame = false
642  ) {
643  $magicScopeVariable = $this->lock();
644  $this->startParse( $title, $options, self::OT_PREPROCESS, true );
645  if ( $revid !== null ) {
646  $this->mRevisionId = $revid;
647  }
648  Hooks::run( 'ParserBeforeStrip', array( &$this, &$text, &$this->mStripState ) );
649  Hooks::run( 'ParserAfterStrip', array( &$this, &$text, &$this->mStripState ) );
650  $text = $this->replaceVariables( $text, $frame );
651  $text = $this->mStripState->unstripBoth( $text );
652  return $text;
653  }
654 
664  public function recursivePreprocess( $text, $frame = false ) {
665  $text = $this->replaceVariables( $text, $frame );
666  $text = $this->mStripState->unstripBoth( $text );
667  return $text;
668  }
669 
683  public function getPreloadText( $text, Title $title, ParserOptions $options, $params = array() ) {
684  $msg = new RawMessage( $text );
685  $text = $msg->params( $params )->plain();
686 
687  # Parser (re)initialisation
688  $magicScopeVariable = $this->lock();
689  $this->startParse( $title, $options, self::OT_PLAIN, true );
690 
692  $dom = $this->preprocessToDom( $text, self::PTD_FOR_INCLUSION );
693  $text = $this->getPreprocessor()->newFrame()->expand( $dom, $flags );
694  $text = $this->mStripState->unstripBoth( $text );
695  return $text;
696  }
697 
704  public static function getRandomString() {
705  wfDeprecated( __METHOD__, '1.26' );
706  return wfRandomString( 16 );
707  }
708 
715  public function setUser( $user ) {
716  $this->mUser = $user;
717  }
718 
725  public function uniqPrefix() {
726  wfDeprecated( __METHOD__, '1.26' );
727  return self::MARKER_PREFIX;
728  }
729 
735  public function setTitle( $t ) {
736  if ( !$t ) {
737  $t = Title::newFromText( 'NO TITLE' );
738  }
739 
740  if ( $t->hasFragment() ) {
741  # Strip the fragment to avoid various odd effects
742  $this->mTitle = clone $t;
743  $this->mTitle->setFragment( '' );
744  } else {
745  $this->mTitle = $t;
746  }
747  }
748 
754  public function getTitle() {
755  return $this->mTitle;
756  }
757 
764  public function Title( $x = null ) {
765  return wfSetVar( $this->mTitle, $x );
766  }
767 
773  public function setOutputType( $ot ) {
774  $this->mOutputType = $ot;
775  # Shortcut alias
776  $this->ot = array(
777  'html' => $ot == self::OT_HTML,
778  'wiki' => $ot == self::OT_WIKI,
779  'pre' => $ot == self::OT_PREPROCESS,
780  'plain' => $ot == self::OT_PLAIN,
781  );
782  }
783 
790  public function OutputType( $x = null ) {
791  return wfSetVar( $this->mOutputType, $x );
792  }
793 
799  public function getOutput() {
800  return $this->mOutput;
801  }
802 
808  public function getOptions() {
809  return $this->mOptions;
810  }
811 
818  public function Options( $x = null ) {
819  return wfSetVar( $this->mOptions, $x );
820  }
821 
825  public function nextLinkID() {
826  return $this->mLinkID++;
827  }
828 
832  public function setLinkID( $id ) {
833  $this->mLinkID = $id;
834  }
835 
840  public function getFunctionLang() {
841  return $this->getTargetLanguage();
842  }
843 
853  public function getTargetLanguage() {
854  $target = $this->mOptions->getTargetLanguage();
855 
856  if ( $target !== null ) {
857  return $target;
858  } elseif ( $this->mOptions->getInterfaceMessage() ) {
859  return $this->mOptions->getUserLangObj();
860  } elseif ( is_null( $this->mTitle ) ) {
861  throw new MWException( __METHOD__ . ': $this->mTitle is null' );
862  }
863 
864  return $this->mTitle->getPageLanguage();
865  }
866 
871  public function getConverterLanguage() {
872  return $this->getTargetLanguage();
873  }
874 
881  public function getUser() {
882  if ( !is_null( $this->mUser ) ) {
883  return $this->mUser;
884  }
885  return $this->mOptions->getUser();
886  }
887 
893  public function getPreprocessor() {
894  if ( !isset( $this->mPreprocessor ) ) {
895  $class = $this->mPreprocessorClass;
896  $this->mPreprocessor = new $class( $this );
897  }
898  return $this->mPreprocessor;
899  }
900 
922  public static function extractTagsAndParams( $elements, $text, &$matches, $uniq_prefix = null ) {
923  if ( $uniq_prefix !== null ) {
924  wfDeprecated( __METHOD__ . ' called with $prefix argument', '1.26' );
925  }
926  static $n = 1;
927  $stripped = '';
928  $matches = array();
929 
930  $taglist = implode( '|', $elements );
931  $start = "/<($taglist)(\\s+[^>]*?|\\s*?)(\/?" . ">)|<(!--)/i";
932 
933  while ( $text != '' ) {
934  $p = preg_split( $start, $text, 2, PREG_SPLIT_DELIM_CAPTURE );
935  $stripped .= $p[0];
936  if ( count( $p ) < 5 ) {
937  break;
938  }
939  if ( count( $p ) > 5 ) {
940  # comment
941  $element = $p[4];
942  $attributes = '';
943  $close = '';
944  $inside = $p[5];
945  } else {
946  # tag
947  $element = $p[1];
948  $attributes = $p[2];
949  $close = $p[3];
950  $inside = $p[4];
951  }
952 
953  $marker = self::MARKER_PREFIX . "-$element-" . sprintf( '%08X', $n++ ) . self::MARKER_SUFFIX;
954  $stripped .= $marker;
955 
956  if ( $close === '/>' ) {
957  # Empty element tag, <tag />
958  $content = null;
959  $text = $inside;
960  $tail = null;
961  } else {
962  if ( $element === '!--' ) {
963  $end = '/(-->)/';
964  } else {
965  $end = "/(<\\/$element\\s*>)/i";
966  }
967  $q = preg_split( $end, $inside, 2, PREG_SPLIT_DELIM_CAPTURE );
968  $content = $q[0];
969  if ( count( $q ) < 3 ) {
970  # No end tag -- let it run out to the end of the text.
971  $tail = '';
972  $text = '';
973  } else {
974  $tail = $q[1];
975  $text = $q[2];
976  }
977  }
978 
979  $matches[$marker] = array( $element,
980  $content,
981  Sanitizer::decodeTagAttributes( $attributes ),
982  "<$element$attributes$close$content$tail" );
983  }
984  return $stripped;
985  }
986 
992  public function getStripList() {
993  return $this->mStripList;
994  }
995 
1005  public function insertStripItem( $text ) {
1006  $marker = self::MARKER_PREFIX . "-item-{$this->mMarkerIndex}-" . self::MARKER_SUFFIX;
1007  $this->mMarkerIndex++;
1008  $this->mStripState->addGeneral( $marker, $text );
1009  return $marker;
1010  }
1011 
1019  public function doTableStuff( $text ) {
1020 
1021  $lines = StringUtils::explode( "\n", $text );
1022  $out = '';
1023  $td_history = array(); # Is currently a td tag open?
1024  $last_tag_history = array(); # Save history of last lag activated (td, th or caption)
1025  $tr_history = array(); # Is currently a tr tag open?
1026  $tr_attributes = array(); # history of tr attributes
1027  $has_opened_tr = array(); # Did this table open a <tr> element?
1028  $indent_level = 0; # indent level of the table
1029 
1030  foreach ( $lines as $outLine ) {
1031  $line = trim( $outLine );
1032 
1033  if ( $line === '' ) { # empty line, go to next line
1034  $out .= $outLine . "\n";
1035  continue;
1036  }
1037 
1038  $first_character = $line[0];
1039  $first_two = substr( $line, 0, 2 );
1040  $matches = array();
1041 
1042  if ( preg_match( '/^(:*)\s*\{\|(.*)$/', $line, $matches ) ) {
1043  # First check if we are starting a new table
1044  $indent_level = strlen( $matches[1] );
1045 
1046  $attributes = $this->mStripState->unstripBoth( $matches[2] );
1047  $attributes = Sanitizer::fixTagAttributes( $attributes, 'table' );
1048 
1049  $outLine = str_repeat( '<dl><dd>', $indent_level ) . "<table{$attributes}>";
1050  array_push( $td_history, false );
1051  array_push( $last_tag_history, '' );
1052  array_push( $tr_history, false );
1053  array_push( $tr_attributes, '' );
1054  array_push( $has_opened_tr, false );
1055  } elseif ( count( $td_history ) == 0 ) {
1056  # Don't do any of the following
1057  $out .= $outLine . "\n";
1058  continue;
1059  } elseif ( $first_two === '|}' ) {
1060  # We are ending a table
1061  $line = '</table>' . substr( $line, 2 );
1062  $last_tag = array_pop( $last_tag_history );
1063 
1064  if ( !array_pop( $has_opened_tr ) ) {
1065  $line = "<tr><td></td></tr>{$line}";
1066  }
1067 
1068  if ( array_pop( $tr_history ) ) {
1069  $line = "</tr>{$line}";
1070  }
1071 
1072  if ( array_pop( $td_history ) ) {
1073  $line = "</{$last_tag}>{$line}";
1074  }
1075  array_pop( $tr_attributes );
1076  $outLine = $line . str_repeat( '</dd></dl>', $indent_level );
1077  } elseif ( $first_two === '|-' ) {
1078  # Now we have a table row
1079  $line = preg_replace( '#^\|-+#', '', $line );
1080 
1081  # Whats after the tag is now only attributes
1082  $attributes = $this->mStripState->unstripBoth( $line );
1083  $attributes = Sanitizer::fixTagAttributes( $attributes, 'tr' );
1084  array_pop( $tr_attributes );
1085  array_push( $tr_attributes, $attributes );
1086 
1087  $line = '';
1088  $last_tag = array_pop( $last_tag_history );
1089  array_pop( $has_opened_tr );
1090  array_push( $has_opened_tr, true );
1091 
1092  if ( array_pop( $tr_history ) ) {
1093  $line = '</tr>';
1094  }
1095 
1096  if ( array_pop( $td_history ) ) {
1097  $line = "</{$last_tag}>{$line}";
1098  }
1099 
1100  $outLine = $line;
1101  array_push( $tr_history, false );
1102  array_push( $td_history, false );
1103  array_push( $last_tag_history, '' );
1104  } elseif ( $first_character === '|'
1105  || $first_character === '!'
1106  || $first_two === '|+'
1107  ) {
1108  # This might be cell elements, td, th or captions
1109  if ( $first_two === '|+' ) {
1110  $first_character = '+';
1111  $line = substr( $line, 2 );
1112  } else {
1113  $line = substr( $line, 1 );
1114  }
1115 
1116  // Implies both are valid for table headings.
1117  if ( $first_character === '!' ) {
1118  $line = str_replace( '!!', '||', $line );
1119  }
1120 
1121  # Split up multiple cells on the same line.
1122  # FIXME : This can result in improper nesting of tags processed
1123  # by earlier parser steps.
1124  $cells = explode( '||', $line );
1125 
1126  $outLine = '';
1127 
1128  # Loop through each table cell
1129  foreach ( $cells as $cell ) {
1130  $previous = '';
1131  if ( $first_character !== '+' ) {
1132  $tr_after = array_pop( $tr_attributes );
1133  if ( !array_pop( $tr_history ) ) {
1134  $previous = "<tr{$tr_after}>\n";
1135  }
1136  array_push( $tr_history, true );
1137  array_push( $tr_attributes, '' );
1138  array_pop( $has_opened_tr );
1139  array_push( $has_opened_tr, true );
1140  }
1141 
1142  $last_tag = array_pop( $last_tag_history );
1143 
1144  if ( array_pop( $td_history ) ) {
1145  $previous = "</{$last_tag}>\n{$previous}";
1146  }
1147 
1148  if ( $first_character === '|' ) {
1149  $last_tag = 'td';
1150  } elseif ( $first_character === '!' ) {
1151  $last_tag = 'th';
1152  } elseif ( $first_character === '+' ) {
1153  $last_tag = 'caption';
1154  } else {
1155  $last_tag = '';
1156  }
1157 
1158  array_push( $last_tag_history, $last_tag );
1159 
1160  # A cell could contain both parameters and data
1161  $cell_data = explode( '|', $cell, 2 );
1162 
1163  # Bug 553: Note that a '|' inside an invalid link should not
1164  # be mistaken as delimiting cell parameters
1165  if ( strpos( $cell_data[0], '[[' ) !== false ) {
1166  $cell = "{$previous}<{$last_tag}>{$cell}";
1167  } elseif ( count( $cell_data ) == 1 ) {
1168  $cell = "{$previous}<{$last_tag}>{$cell_data[0]}";
1169  } else {
1170  $attributes = $this->mStripState->unstripBoth( $cell_data[0] );
1171  $attributes = Sanitizer::fixTagAttributes( $attributes, $last_tag );
1172  $cell = "{$previous}<{$last_tag}{$attributes}>{$cell_data[1]}";
1173  }
1174 
1175  $outLine .= $cell;
1176  array_push( $td_history, true );
1177  }
1178  }
1179  $out .= $outLine . "\n";
1180  }
1181 
1182  # Closing open td, tr && table
1183  while ( count( $td_history ) > 0 ) {
1184  if ( array_pop( $td_history ) ) {
1185  $out .= "</td>\n";
1186  }
1187  if ( array_pop( $tr_history ) ) {
1188  $out .= "</tr>\n";
1189  }
1190  if ( !array_pop( $has_opened_tr ) ) {
1191  $out .= "<tr><td></td></tr>\n";
1192  }
1193 
1194  $out .= "</table>\n";
1195  }
1196 
1197  # Remove trailing line-ending (b/c)
1198  if ( substr( $out, -1 ) === "\n" ) {
1199  $out = substr( $out, 0, -1 );
1200  }
1201 
1202  # special case: don't return empty table
1203  if ( $out === "<table>\n<tr><td></td></tr>\n</table>" ) {
1204  $out = '';
1205  }
1206 
1207  return $out;
1208  }
1209 
1222  public function internalParse( $text, $isMain = true, $frame = false ) {
1223 
1224  $origText = $text;
1225 
1226  # Hook to suspend the parser in this state
1227  if ( !Hooks::run( 'ParserBeforeInternalParse', array( &$this, &$text, &$this->mStripState ) ) ) {
1228  return $text;
1229  }
1230 
1231  # if $frame is provided, then use $frame for replacing any variables
1232  if ( $frame ) {
1233  # use frame depth to infer how include/noinclude tags should be handled
1234  # depth=0 means this is the top-level document; otherwise it's an included document
1235  if ( !$frame->depth ) {
1236  $flag = 0;
1237  } else {
1238  $flag = Parser::PTD_FOR_INCLUSION;
1239  }
1240  $dom = $this->preprocessToDom( $text, $flag );
1241  $text = $frame->expand( $dom );
1242  } else {
1243  # if $frame is not provided, then use old-style replaceVariables
1244  $text = $this->replaceVariables( $text );
1245  }
1246 
1247  Hooks::run( 'InternalParseBeforeSanitize', array( &$this, &$text, &$this->mStripState ) );
1248  $text = Sanitizer::removeHTMLtags(
1249  $text,
1250  array( &$this, 'attributeStripCallback' ),
1251  false,
1252  array_keys( $this->mTransparentTagHooks )
1253  );
1254  Hooks::run( 'InternalParseBeforeLinks', array( &$this, &$text, &$this->mStripState ) );
1255 
1256  # Tables need to come after variable replacement for things to work
1257  # properly; putting them before other transformations should keep
1258  # exciting things like link expansions from showing up in surprising
1259  # places.
1260  $text = $this->doTableStuff( $text );
1261 
1262  $text = preg_replace( '/(^|\n)-----*/', '\\1<hr />', $text );
1263 
1264  $text = $this->doDoubleUnderscore( $text );
1265 
1266  $text = $this->doHeadings( $text );
1267  $text = $this->replaceInternalLinks( $text );
1268  $text = $this->doAllQuotes( $text );
1269  $text = $this->replaceExternalLinks( $text );
1270 
1271  # replaceInternalLinks may sometimes leave behind
1272  # absolute URLs, which have to be masked to hide them from replaceExternalLinks
1273  $text = str_replace( self::MARKER_PREFIX . 'NOPARSE', '', $text );
1274 
1275  $text = $this->doMagicLinks( $text );
1276  $text = $this->formatHeadings( $text, $origText, $isMain );
1277 
1278  return $text;
1279  }
1280 
1290  private function internalParseHalfParsed( $text, $isMain = true, $linestart = true ) {
1291  $text = $this->mStripState->unstripGeneral( $text );
1292 
1293  if ( $isMain ) {
1294  Hooks::run( 'ParserAfterUnstrip', array( &$this, &$text ) );
1295  }
1296 
1297  # Clean up special characters, only run once, next-to-last before doBlockLevels
1298  $fixtags = array(
1299  # french spaces, last one Guillemet-left
1300  # only if there is something before the space
1301  '/(.) (?=\\?|:|;|!|%|\\302\\273)/' => '\\1&#160;',
1302  # french spaces, Guillemet-right
1303  '/(\\302\\253) /' => '\\1&#160;',
1304  '/&#160;(!\s*important)/' => ' \\1', # Beware of CSS magic word !important, bug #11874.
1305  );
1306  $text = preg_replace( array_keys( $fixtags ), array_values( $fixtags ), $text );
1307 
1308  $text = $this->doBlockLevels( $text, $linestart );
1309 
1310  $this->replaceLinkHolders( $text );
1311 
1319  if ( !( $this->mOptions->getDisableContentConversion()
1320  || isset( $this->mDoubleUnderscores['nocontentconvert'] ) )
1321  ) {
1322  if ( !$this->mOptions->getInterfaceMessage() ) {
1323  # The position of the convert() call should not be changed. it
1324  # assumes that the links are all replaced and the only thing left
1325  # is the <nowiki> mark.
1326  $text = $this->getConverterLanguage()->convert( $text );
1327  }
1328  }
1329 
1330  $text = $this->mStripState->unstripNoWiki( $text );
1331 
1332  if ( $isMain ) {
1333  Hooks::run( 'ParserBeforeTidy', array( &$this, &$text ) );
1334  }
1335 
1336  $text = $this->replaceTransparentTags( $text );
1337  $text = $this->mStripState->unstripGeneral( $text );
1338 
1339  $text = Sanitizer::normalizeCharReferences( $text );
1340 
1341  if ( MWTidy::isEnabled() && $this->mOptions->getTidy() ) {
1342  $text = MWTidy::tidy( $text );
1343  $this->mOutput->addModuleStyles( MWTidy::getModuleStyles() );
1344  } else {
1345  # attempt to sanitize at least some nesting problems
1346  # (bug #2702 and quite a few others)
1347  $tidyregs = array(
1348  # ''Something [http://www.cool.com cool''] -->
1349  # <i>Something</i><a href="http://www.cool.com"..><i>cool></i></a>
1350  '/(<([bi])>)(<([bi])>)?([^<]*)(<\/?a[^<]*>)([^<]*)(<\/\\4>)?(<\/\\2>)/' =>
1351  '\\1\\3\\5\\8\\9\\6\\1\\3\\7\\8\\9',
1352  # fix up an anchor inside another anchor, only
1353  # at least for a single single nested link (bug 3695)
1354  '/(<a[^>]+>)([^<]*)(<a[^>]+>[^<]*)<\/a>(.*)<\/a>/' =>
1355  '\\1\\2</a>\\3</a>\\1\\4</a>',
1356  # fix div inside inline elements- doBlockLevels won't wrap a line which
1357  # contains a div, so fix it up here; replace
1358  # div with escaped text
1359  '/(<([aib]) [^>]+>)([^<]*)(<div([^>]*)>)(.*)(<\/div>)([^<]*)(<\/\\2>)/' =>
1360  '\\1\\3&lt;div\\5&gt;\\6&lt;/div&gt;\\8\\9',
1361  # remove empty italic or bold tag pairs, some
1362  # introduced by rules above
1363  '/<([bi])><\/\\1>/' => '',
1364  );
1365 
1366  $text = preg_replace(
1367  array_keys( $tidyregs ),
1368  array_values( $tidyregs ),
1369  $text );
1370  }
1371 
1372  if ( $isMain ) {
1373  Hooks::run( 'ParserAfterTidy', array( &$this, &$text ) );
1374  }
1375 
1376  return $text;
1377  }
1378 
1390  public function doMagicLinks( $text ) {
1391  $prots = wfUrlProtocolsWithoutProtRel();
1392  $urlChar = self::EXT_LINK_URL_CLASS;
1393  $addr = self::EXT_LINK_ADDR;
1394  $space = self::SPACE_NOT_NL; # non-newline space
1395  $spdash = "(?:-|$space)"; # a dash or a non-newline space
1396  $spaces = "$space++"; # possessive match of 1 or more spaces
1397  $text = preg_replace_callback(
1398  '!(?: # Start cases
1399  (<a[ \t\r\n>].*?</a>) | # m[1]: Skip link text
1400  (<.*?>) | # m[2]: Skip stuff inside
1401  # HTML elements' . "
1402  (\b(?i:$prots)($addr$urlChar*)) | # m[3]: Free external links
1403  # m[4]: Post-protocol path
1404  \b(?:RFC|PMID) $spaces # m[5]: RFC or PMID, capture number
1405  ([0-9]+)\b |
1406  \bISBN $spaces ( # m[6]: ISBN, capture number
1407  (?: 97[89] $spdash? )? # optional 13-digit ISBN prefix
1408  (?: [0-9] $spdash? ){9} # 9 digits with opt. delimiters
1409  [0-9Xx] # check digit
1410  )\b
1411  )!xu", array( &$this, 'magicLinkCallback' ), $text );
1412  return $text;
1413  }
1414 
1420  public function magicLinkCallback( $m ) {
1421  if ( isset( $m[1] ) && $m[1] !== '' ) {
1422  # Skip anchor
1423  return $m[0];
1424  } elseif ( isset( $m[2] ) && $m[2] !== '' ) {
1425  # Skip HTML element
1426  return $m[0];
1427  } elseif ( isset( $m[3] ) && $m[3] !== '' ) {
1428  # Free external link
1429  return $this->makeFreeExternalLink( $m[0], strlen( $m[4] ) );
1430  } elseif ( isset( $m[5] ) && $m[5] !== '' ) {
1431  # RFC or PMID
1432  if ( substr( $m[0], 0, 3 ) === 'RFC' ) {
1433  $keyword = 'RFC';
1434  $urlmsg = 'rfcurl';
1435  $cssClass = 'mw-magiclink-rfc';
1436  $id = $m[5];
1437  } elseif ( substr( $m[0], 0, 4 ) === 'PMID' ) {
1438  $keyword = 'PMID';
1439  $urlmsg = 'pubmedurl';
1440  $cssClass = 'mw-magiclink-pmid';
1441  $id = $m[5];
1442  } else {
1443  throw new MWException( __METHOD__ . ': unrecognised match type "' .
1444  substr( $m[0], 0, 20 ) . '"' );
1445  }
1446  $url = wfMessage( $urlmsg, $id )->inContentLanguage()->text();
1447  return Linker::makeExternalLink( $url, "{$keyword} {$id}", true, $cssClass );
1448  } elseif ( isset( $m[6] ) && $m[6] !== '' ) {
1449  # ISBN
1450  $isbn = $m[6];
1451  $space = self::SPACE_NOT_NL; # non-newline space
1452  $isbn = preg_replace( "/$space/", ' ', $isbn );
1453  $num = strtr( $isbn, array(
1454  '-' => '',
1455  ' ' => '',
1456  'x' => 'X',
1457  ) );
1458  $titleObj = SpecialPage::getTitleFor( 'Booksources', $num );
1459  return '<a href="' .
1460  htmlspecialchars( $titleObj->getLocalURL() ) .
1461  "\" class=\"internal mw-magiclink-isbn\">ISBN $isbn</a>";
1462  } else {
1463  return $m[0];
1464  }
1465  }
1466 
1476  public function makeFreeExternalLink( $url, $numPostProto ) {
1477  $trail = '';
1478 
1479  # The characters '<' and '>' (which were escaped by
1480  # removeHTMLtags()) should not be included in
1481  # URLs, per RFC 2396.
1482  # Make &nbsp; terminate a URL as well (bug T84937)
1483  $m2 = array();
1484  if ( preg_match(
1485  '/&(lt|gt|nbsp|#x0*(3[CcEe]|[Aa]0)|#0*(60|62|160));/',
1486  $url,
1487  $m2,
1488  PREG_OFFSET_CAPTURE
1489  ) ) {
1490  $trail = substr( $url, $m2[0][1] ) . $trail;
1491  $url = substr( $url, 0, $m2[0][1] );
1492  }
1493 
1494  # Move trailing punctuation to $trail
1495  $sep = ',;\.:!?';
1496  # If there is no left bracket, then consider right brackets fair game too
1497  if ( strpos( $url, '(' ) === false ) {
1498  $sep .= ')';
1499  }
1500 
1501  $urlRev = strrev( $url );
1502  $numSepChars = strspn( $urlRev, $sep );
1503  # Don't break a trailing HTML entity by moving the ; into $trail
1504  # This is in hot code, so use substr_compare to avoid having to
1505  # create a new string object for the comparison
1506  if ( $numSepChars && substr_compare( $url, ";", -$numSepChars, 1 ) === 0 ) {
1507  # more optimization: instead of running preg_match with a $
1508  # anchor, which can be slow, do the match on the reversed
1509  # string starting at the desired offset.
1510  # un-reversed regexp is: /&([a-z]+|#x[\da-f]+|#\d+)$/i
1511  if ( preg_match( '/\G([a-z]+|[\da-f]+x#|\d+#)&/i', $urlRev, $m2, 0, $numSepChars ) ) {
1512  $numSepChars--;
1513  }
1514  }
1515  if ( $numSepChars ) {
1516  $trail = substr( $url, -$numSepChars ) . $trail;
1517  $url = substr( $url, 0, -$numSepChars );
1518  }
1519 
1520  # Verify that we still have a real URL after trail removal, and
1521  # not just lone protocol
1522  if ( strlen( $trail ) >= $numPostProto ) {
1523  return $url . $trail;
1524  }
1525 
1526  $url = Sanitizer::cleanUrl( $url );
1527 
1528  # Is this an external image?
1529  $text = $this->maybeMakeExternalImage( $url );
1530  if ( $text === false ) {
1531  # Not an image, make a link
1532  $text = Linker::makeExternalLink( $url,
1533  $this->getConverterLanguage()->markNoConversion( $url, true ),
1534  true, 'free',
1535  $this->getExternalLinkAttribs( $url ) );
1536  # Register it in the output object...
1537  # Replace unnecessary URL escape codes with their equivalent characters
1538  $pasteurized = self::normalizeLinkUrl( $url );
1539  $this->mOutput->addExternalLink( $pasteurized );
1540  }
1541  return $text . $trail;
1542  }
1543 
1553  public function doHeadings( $text ) {
1554  for ( $i = 6; $i >= 1; --$i ) {
1555  $h = str_repeat( '=', $i );
1556  $text = preg_replace( "/^$h(.+)$h\\s*$/m", "<h$i>\\1</h$i>", $text );
1557  }
1558  return $text;
1559  }
1560 
1569  public function doAllQuotes( $text ) {
1570  $outtext = '';
1571  $lines = StringUtils::explode( "\n", $text );
1572  foreach ( $lines as $line ) {
1573  $outtext .= $this->doQuotes( $line ) . "\n";
1574  }
1575  $outtext = substr( $outtext, 0, -1 );
1576  return $outtext;
1577  }
1578 
1586  public function doQuotes( $text ) {
1587  $arr = preg_split( "/(''+)/", $text, -1, PREG_SPLIT_DELIM_CAPTURE );
1588  $countarr = count( $arr );
1589  if ( $countarr == 1 ) {
1590  return $text;
1591  }
1592 
1593  // First, do some preliminary work. This may shift some apostrophes from
1594  // being mark-up to being text. It also counts the number of occurrences
1595  // of bold and italics mark-ups.
1596  $numbold = 0;
1597  $numitalics = 0;
1598  for ( $i = 1; $i < $countarr; $i += 2 ) {
1599  $thislen = strlen( $arr[$i] );
1600  // If there are ever four apostrophes, assume the first is supposed to
1601  // be text, and the remaining three constitute mark-up for bold text.
1602  // (bug 13227: ''''foo'''' turns into ' ''' foo ' ''')
1603  if ( $thislen == 4 ) {
1604  $arr[$i - 1] .= "'";
1605  $arr[$i] = "'''";
1606  $thislen = 3;
1607  } elseif ( $thislen > 5 ) {
1608  // If there are more than 5 apostrophes in a row, assume they're all
1609  // text except for the last 5.
1610  // (bug 13227: ''''''foo'''''' turns into ' ''''' foo ' ''''')
1611  $arr[$i - 1] .= str_repeat( "'", $thislen - 5 );
1612  $arr[$i] = "'''''";
1613  $thislen = 5;
1614  }
1615  // Count the number of occurrences of bold and italics mark-ups.
1616  if ( $thislen == 2 ) {
1617  $numitalics++;
1618  } elseif ( $thislen == 3 ) {
1619  $numbold++;
1620  } elseif ( $thislen == 5 ) {
1621  $numitalics++;
1622  $numbold++;
1623  }
1624  }
1625 
1626  // If there is an odd number of both bold and italics, it is likely
1627  // that one of the bold ones was meant to be an apostrophe followed
1628  // by italics. Which one we cannot know for certain, but it is more
1629  // likely to be one that has a single-letter word before it.
1630  if ( ( $numbold % 2 == 1 ) && ( $numitalics % 2 == 1 ) ) {
1631  $firstsingleletterword = -1;
1632  $firstmultiletterword = -1;
1633  $firstspace = -1;
1634  for ( $i = 1; $i < $countarr; $i += 2 ) {
1635  if ( strlen( $arr[$i] ) == 3 ) {
1636  $x1 = substr( $arr[$i - 1], -1 );
1637  $x2 = substr( $arr[$i - 1], -2, 1 );
1638  if ( $x1 === ' ' ) {
1639  if ( $firstspace == -1 ) {
1640  $firstspace = $i;
1641  }
1642  } elseif ( $x2 === ' ' ) {
1643  $firstsingleletterword = $i;
1644  // if $firstsingleletterword is set, we don't
1645  // look at the other options, so we can bail early.
1646  break;
1647  } else {
1648  if ( $firstmultiletterword == -1 ) {
1649  $firstmultiletterword = $i;
1650  }
1651  }
1652  }
1653  }
1654 
1655  // If there is a single-letter word, use it!
1656  if ( $firstsingleletterword > -1 ) {
1657  $arr[$firstsingleletterword] = "''";
1658  $arr[$firstsingleletterword - 1] .= "'";
1659  } elseif ( $firstmultiletterword > -1 ) {
1660  // If not, but there's a multi-letter word, use that one.
1661  $arr[$firstmultiletterword] = "''";
1662  $arr[$firstmultiletterword - 1] .= "'";
1663  } elseif ( $firstspace > -1 ) {
1664  // ... otherwise use the first one that has neither.
1665  // (notice that it is possible for all three to be -1 if, for example,
1666  // there is only one pentuple-apostrophe in the line)
1667  $arr[$firstspace] = "''";
1668  $arr[$firstspace - 1] .= "'";
1669  }
1670  }
1671 
1672  // Now let's actually convert our apostrophic mush to HTML!
1673  $output = '';
1674  $buffer = '';
1675  $state = '';
1676  $i = 0;
1677  foreach ( $arr as $r ) {
1678  if ( ( $i % 2 ) == 0 ) {
1679  if ( $state === 'both' ) {
1680  $buffer .= $r;
1681  } else {
1682  $output .= $r;
1683  }
1684  } else {
1685  $thislen = strlen( $r );
1686  if ( $thislen == 2 ) {
1687  if ( $state === 'i' ) {
1688  $output .= '</i>';
1689  $state = '';
1690  } elseif ( $state === 'bi' ) {
1691  $output .= '</i>';
1692  $state = 'b';
1693  } elseif ( $state === 'ib' ) {
1694  $output .= '</b></i><b>';
1695  $state = 'b';
1696  } elseif ( $state === 'both' ) {
1697  $output .= '<b><i>' . $buffer . '</i>';
1698  $state = 'b';
1699  } else { // $state can be 'b' or ''
1700  $output .= '<i>';
1701  $state .= 'i';
1702  }
1703  } elseif ( $thislen == 3 ) {
1704  if ( $state === 'b' ) {
1705  $output .= '</b>';
1706  $state = '';
1707  } elseif ( $state === 'bi' ) {
1708  $output .= '</i></b><i>';
1709  $state = 'i';
1710  } elseif ( $state === 'ib' ) {
1711  $output .= '</b>';
1712  $state = 'i';
1713  } elseif ( $state === 'both' ) {
1714  $output .= '<i><b>' . $buffer . '</b>';
1715  $state = 'i';
1716  } else { // $state can be 'i' or ''
1717  $output .= '<b>';
1718  $state .= 'b';
1719  }
1720  } elseif ( $thislen == 5 ) {
1721  if ( $state === 'b' ) {
1722  $output .= '</b><i>';
1723  $state = 'i';
1724  } elseif ( $state === 'i' ) {
1725  $output .= '</i><b>';
1726  $state = 'b';
1727  } elseif ( $state === 'bi' ) {
1728  $output .= '</i></b>';
1729  $state = '';
1730  } elseif ( $state === 'ib' ) {
1731  $output .= '</b></i>';
1732  $state = '';
1733  } elseif ( $state === 'both' ) {
1734  $output .= '<i><b>' . $buffer . '</b></i>';
1735  $state = '';
1736  } else { // ($state == '')
1737  $buffer = '';
1738  $state = 'both';
1739  }
1740  }
1741  }
1742  $i++;
1743  }
1744  // Now close all remaining tags. Notice that the order is important.
1745  if ( $state === 'b' || $state === 'ib' ) {
1746  $output .= '</b>';
1747  }
1748  if ( $state === 'i' || $state === 'bi' || $state === 'ib' ) {
1749  $output .= '</i>';
1750  }
1751  if ( $state === 'bi' ) {
1752  $output .= '</b>';
1753  }
1754  // There might be lonely ''''', so make sure we have a buffer
1755  if ( $state === 'both' && $buffer ) {
1756  $output .= '<b><i>' . $buffer . '</i></b>';
1757  }
1758  return $output;
1759  }
1760 
1774  public function replaceExternalLinks( $text ) {
1775 
1776  $bits = preg_split( $this->mExtLinkBracketedRegex, $text, -1, PREG_SPLIT_DELIM_CAPTURE );
1777  if ( $bits === false ) {
1778  throw new MWException( "PCRE needs to be compiled with "
1779  . "--enable-unicode-properties in order for MediaWiki to function" );
1780  }
1781  $s = array_shift( $bits );
1782 
1783  $i = 0;
1784  while ( $i < count( $bits ) ) {
1785  $url = $bits[$i++];
1786  $i++; // protocol
1787  $text = $bits[$i++];
1788  $trail = $bits[$i++];
1789 
1790  # The characters '<' and '>' (which were escaped by
1791  # removeHTMLtags()) should not be included in
1792  # URLs, per RFC 2396.
1793  $m2 = array();
1794  if ( preg_match( '/&(lt|gt);/', $url, $m2, PREG_OFFSET_CAPTURE ) ) {
1795  $text = substr( $url, $m2[0][1] ) . ' ' . $text;
1796  $url = substr( $url, 0, $m2[0][1] );
1797  }
1798 
1799  # If the link text is an image URL, replace it with an <img> tag
1800  # This happened by accident in the original parser, but some people used it extensively
1801  $img = $this->maybeMakeExternalImage( $text );
1802  if ( $img !== false ) {
1803  $text = $img;
1804  }
1805 
1806  $dtrail = '';
1807 
1808  # Set linktype for CSS - if URL==text, link is essentially free
1809  $linktype = ( $text === $url ) ? 'free' : 'text';
1810 
1811  # No link text, e.g. [http://domain.tld/some.link]
1812  if ( $text == '' ) {
1813  # Autonumber
1814  $langObj = $this->getTargetLanguage();
1815  $text = '[' . $langObj->formatNum( ++$this->mAutonumber ) . ']';
1816  $linktype = 'autonumber';
1817  } else {
1818  # Have link text, e.g. [http://domain.tld/some.link text]s
1819  # Check for trail
1820  list( $dtrail, $trail ) = Linker::splitTrail( $trail );
1821  }
1822 
1823  $text = $this->getConverterLanguage()->markNoConversion( $text );
1824 
1825  $url = Sanitizer::cleanUrl( $url );
1826 
1827  # Use the encoded URL
1828  # This means that users can paste URLs directly into the text
1829  # Funny characters like ö aren't valid in URLs anyway
1830  # This was changed in August 2004
1831  $s .= Linker::makeExternalLink( $url, $text, false, $linktype,
1832  $this->getExternalLinkAttribs( $url ) ) . $dtrail . $trail;
1833 
1834  # Register link in the output object.
1835  # Replace unnecessary URL escape codes with the referenced character
1836  # This prevents spammers from hiding links from the filters
1837  $pasteurized = self::normalizeLinkUrl( $url );
1838  $this->mOutput->addExternalLink( $pasteurized );
1839  }
1840 
1841  return $s;
1842  }
1843 
1853  public static function getExternalLinkRel( $url = false, $title = null ) {
1855  $ns = $title ? $title->getNamespace() : false;
1856  if ( $wgNoFollowLinks && !in_array( $ns, $wgNoFollowNsExceptions )
1857  && !wfMatchesDomainList( $url, $wgNoFollowDomainExceptions )
1858  ) {
1859  return 'nofollow';
1860  }
1861  return null;
1862  }
1863 
1874  public function getExternalLinkAttribs( $url = false ) {
1875  $attribs = array();
1876  $attribs['rel'] = self::getExternalLinkRel( $url, $this->mTitle );
1877 
1878  if ( $this->mOptions->getExternalLinkTarget() ) {
1879  $attribs['target'] = $this->mOptions->getExternalLinkTarget();
1880  }
1881  return $attribs;
1882  }
1883 
1891  public static function replaceUnusualEscapes( $url ) {
1892  wfDeprecated( __METHOD__, '1.24' );
1893  return self::normalizeLinkUrl( $url );
1894  }
1895 
1905  public static function normalizeLinkUrl( $url ) {
1906  # First, make sure unsafe characters are encoded
1907  $url = preg_replace_callback( '/[\x00-\x20"<>\[\\\\\]^`{|}\x7F-\xFF]/',
1908  function ( $m ) {
1909  return rawurlencode( $m[0] );
1910  },
1911  $url
1912  );
1913 
1914  $ret = '';
1915  $end = strlen( $url );
1916 
1917  # Fragment part - 'fragment'
1918  $start = strpos( $url, '#' );
1919  if ( $start !== false && $start < $end ) {
1920  $ret = self::normalizeUrlComponent(
1921  substr( $url, $start, $end - $start ), '"#%<>[\]^`{|}' ) . $ret;
1922  $end = $start;
1923  }
1924 
1925  # Query part - 'query' minus &=+;
1926  $start = strpos( $url, '?' );
1927  if ( $start !== false && $start < $end ) {
1928  $ret = self::normalizeUrlComponent(
1929  substr( $url, $start, $end - $start ), '"#%<>[\]^`{|}&=+;' ) . $ret;
1930  $end = $start;
1931  }
1932 
1933  # Scheme and path part - 'pchar'
1934  # (we assume no userinfo or encoded colons in the host)
1935  $ret = self::normalizeUrlComponent(
1936  substr( $url, 0, $end ), '"#%<>[\]^`{|}/?' ) . $ret;
1937 
1938  return $ret;
1939  }
1940 
1941  private static function normalizeUrlComponent( $component, $unsafe ) {
1942  $callback = function ( $matches ) use ( $unsafe ) {
1943  $char = urldecode( $matches[0] );
1944  $ord = ord( $char );
1945  if ( $ord > 32 && $ord < 127 && strpos( $unsafe, $char ) === false ) {
1946  # Unescape it
1947  return $char;
1948  } else {
1949  # Leave it escaped, but use uppercase for a-f
1950  return strtoupper( $matches[0] );
1951  }
1952  };
1953  return preg_replace_callback( '/%[0-9A-Fa-f]{2}/', $callback, $component );
1954  }
1955 
1964  private function maybeMakeExternalImage( $url ) {
1965  $imagesfrom = $this->mOptions->getAllowExternalImagesFrom();
1966  $imagesexception = !empty( $imagesfrom );
1967  $text = false;
1968  # $imagesfrom could be either a single string or an array of strings, parse out the latter
1969  if ( $imagesexception && is_array( $imagesfrom ) ) {
1970  $imagematch = false;
1971  foreach ( $imagesfrom as $match ) {
1972  if ( strpos( $url, $match ) === 0 ) {
1973  $imagematch = true;
1974  break;
1975  }
1976  }
1977  } elseif ( $imagesexception ) {
1978  $imagematch = ( strpos( $url, $imagesfrom ) === 0 );
1979  } else {
1980  $imagematch = false;
1981  }
1982 
1983  if ( $this->mOptions->getAllowExternalImages()
1984  || ( $imagesexception && $imagematch )
1985  ) {
1986  if ( preg_match( self::EXT_IMAGE_REGEX, $url ) ) {
1987  # Image found
1988  $text = Linker::makeExternalImage( $url );
1989  }
1990  }
1991  if ( !$text && $this->mOptions->getEnableImageWhitelist()
1992  && preg_match( self::EXT_IMAGE_REGEX, $url )
1993  ) {
1994  $whitelist = explode(
1995  "\n",
1996  wfMessage( 'external_image_whitelist' )->inContentLanguage()->text()
1997  );
1998 
1999  foreach ( $whitelist as $entry ) {
2000  # Sanitize the regex fragment, make it case-insensitive, ignore blank entries/comments
2001  if ( strpos( $entry, '#' ) === 0 || $entry === '' ) {
2002  continue;
2003  }
2004  if ( preg_match( '/' . str_replace( '/', '\\/', $entry ) . '/i', $url ) ) {
2005  # Image matches a whitelist entry
2006  $text = Linker::makeExternalImage( $url );
2007  break;
2008  }
2009  }
2010  }
2011  return $text;
2012  }
2013 
2023  public function replaceInternalLinks( $s ) {
2024  $this->mLinkHolders->merge( $this->replaceInternalLinks2( $s ) );
2025  return $s;
2026  }
2027 
2036  public function replaceInternalLinks2( &$s ) {
2038 
2039  static $tc = false, $e1, $e1_img;
2040  # the % is needed to support urlencoded titles as well
2041  if ( !$tc ) {
2042  $tc = Title::legalChars() . '#%';
2043  # Match a link having the form [[namespace:link|alternate]]trail
2044  $e1 = "/^([{$tc}]+)(?:\\|(.+?))?]](.*)\$/sD";
2045  # Match cases where there is no "]]", which might still be images
2046  $e1_img = "/^([{$tc}]+)\\|(.*)\$/sD";
2047  }
2048 
2049  $holders = new LinkHolderArray( $this );
2050 
2051  # split the entire text string on occurrences of [[
2052  $a = StringUtils::explode( '[[', ' ' . $s );
2053  # get the first element (all text up to first [[), and remove the space we added
2054  $s = $a->current();
2055  $a->next();
2056  $line = $a->current(); # Workaround for broken ArrayIterator::next() that returns "void"
2057  $s = substr( $s, 1 );
2058 
2059  $useLinkPrefixExtension = $this->getTargetLanguage()->linkPrefixExtension();
2060  $e2 = null;
2061  if ( $useLinkPrefixExtension ) {
2062  # Match the end of a line for a word that's not followed by whitespace,
2063  # e.g. in the case of 'The Arab al[[Razi]]', 'al' will be matched
2065  $charset = $wgContLang->linkPrefixCharset();
2066  $e2 = "/^((?>.*[^$charset]|))(.+)$/sDu";
2067  }
2068 
2069  if ( is_null( $this->mTitle ) ) {
2070  throw new MWException( __METHOD__ . ": \$this->mTitle is null\n" );
2071  }
2072  $nottalk = !$this->mTitle->isTalkPage();
2073 
2074  if ( $useLinkPrefixExtension ) {
2075  $m = array();
2076  if ( preg_match( $e2, $s, $m ) ) {
2077  $first_prefix = $m[2];
2078  } else {
2079  $first_prefix = false;
2080  }
2081  } else {
2082  $prefix = '';
2083  }
2084 
2085  $useSubpages = $this->areSubpagesAllowed();
2086 
2087  // @codingStandardsIgnoreStart Squiz.WhiteSpace.SemicolonSpacing.Incorrect
2088  # Loop for each link
2089  for ( ; $line !== false && $line !== null; $a->next(), $line = $a->current() ) {
2090  // @codingStandardsIgnoreStart
2091 
2092  # Check for excessive memory usage
2093  if ( $holders->isBig() ) {
2094  # Too big
2095  # Do the existence check, replace the link holders and clear the array
2096  $holders->replace( $s );
2097  $holders->clear();
2098  }
2099 
2100  if ( $useLinkPrefixExtension ) {
2101  if ( preg_match( $e2, $s, $m ) ) {
2102  $prefix = $m[2];
2103  $s = $m[1];
2104  } else {
2105  $prefix = '';
2106  }
2107  # first link
2108  if ( $first_prefix ) {
2109  $prefix = $first_prefix;
2110  $first_prefix = false;
2111  }
2112  }
2113 
2114  $might_be_img = false;
2115 
2116  if ( preg_match( $e1, $line, $m ) ) { # page with normal text or alt
2117  $text = $m[2];
2118  # If we get a ] at the beginning of $m[3] that means we have a link that's something like:
2119  # [[Image:Foo.jpg|[http://example.com desc]]] <- having three ] in a row fucks up,
2120  # the real problem is with the $e1 regex
2121  # See bug 1300.
2122  #
2123  # Still some problems for cases where the ] is meant to be outside punctuation,
2124  # and no image is in sight. See bug 2095.
2125  #
2126  if ( $text !== ''
2127  && substr( $m[3], 0, 1 ) === ']'
2128  && strpos( $text, '[' ) !== false
2129  ) {
2130  $text .= ']'; # so that replaceExternalLinks($text) works later
2131  $m[3] = substr( $m[3], 1 );
2132  }
2133  # fix up urlencoded title texts
2134  if ( strpos( $m[1], '%' ) !== false ) {
2135  # Should anchors '#' also be rejected?
2136  $m[1] = str_replace( array( '<', '>' ), array( '&lt;', '&gt;' ), rawurldecode( $m[1] ) );
2137  }
2138  $trail = $m[3];
2139  } elseif ( preg_match( $e1_img, $line, $m ) ) {
2140  # Invalid, but might be an image with a link in its caption
2141  $might_be_img = true;
2142  $text = $m[2];
2143  if ( strpos( $m[1], '%' ) !== false ) {
2144  $m[1] = rawurldecode( $m[1] );
2145  }
2146  $trail = "";
2147  } else { # Invalid form; output directly
2148  $s .= $prefix . '[[' . $line;
2149  continue;
2150  }
2151 
2152  $origLink = $m[1];
2153 
2154  # Don't allow internal links to pages containing
2155  # PROTO: where PROTO is a valid URL protocol; these
2156  # should be external links.
2157  if ( preg_match( '/^(?i:' . $this->mUrlProtocols . ')/', $origLink ) ) {
2158  $s .= $prefix . '[[' . $line;
2159  continue;
2160  }
2161 
2162  # Make subpage if necessary
2163  if ( $useSubpages ) {
2164  $link = $this->maybeDoSubpageLink( $origLink, $text );
2165  } else {
2166  $link = $origLink;
2167  }
2168 
2169  $noforce = ( substr( $origLink, 0, 1 ) !== ':' );
2170  if ( !$noforce ) {
2171  # Strip off leading ':'
2172  $link = substr( $link, 1 );
2173  }
2174 
2175  $unstrip = $this->mStripState->unstripNoWiki( $link );
2176  $nt = is_string( $unstrip ) ? Title::newFromText( $unstrip ) : null;
2177  if ( $nt === null ) {
2178  $s .= $prefix . '[[' . $line;
2179  continue;
2180  }
2181 
2182  $ns = $nt->getNamespace();
2183  $iw = $nt->getInterwiki();
2184 
2185  if ( $might_be_img ) { # if this is actually an invalid link
2186  if ( $ns == NS_FILE && $noforce ) { # but might be an image
2187  $found = false;
2188  while ( true ) {
2189  # look at the next 'line' to see if we can close it there
2190  $a->next();
2191  $next_line = $a->current();
2192  if ( $next_line === false || $next_line === null ) {
2193  break;
2194  }
2195  $m = explode( ']]', $next_line, 3 );
2196  if ( count( $m ) == 3 ) {
2197  # the first ]] closes the inner link, the second the image
2198  $found = true;
2199  $text .= "[[{$m[0]}]]{$m[1]}";
2200  $trail = $m[2];
2201  break;
2202  } elseif ( count( $m ) == 2 ) {
2203  # if there's exactly one ]] that's fine, we'll keep looking
2204  $text .= "[[{$m[0]}]]{$m[1]}";
2205  } else {
2206  # if $next_line is invalid too, we need look no further
2207  $text .= '[[' . $next_line;
2208  break;
2209  }
2210  }
2211  if ( !$found ) {
2212  # we couldn't find the end of this imageLink, so output it raw
2213  # but don't ignore what might be perfectly normal links in the text we've examined
2214  $holders->merge( $this->replaceInternalLinks2( $text ) );
2215  $s .= "{$prefix}[[$link|$text";
2216  # note: no $trail, because without an end, there *is* no trail
2217  continue;
2218  }
2219  } else { # it's not an image, so output it raw
2220  $s .= "{$prefix}[[$link|$text";
2221  # note: no $trail, because without an end, there *is* no trail
2222  continue;
2223  }
2224  }
2225 
2226  $wasblank = ( $text == '' );
2227  if ( $wasblank ) {
2228  $text = $link;
2229  } else {
2230  # Bug 4598 madness. Handle the quotes only if they come from the alternate part
2231  # [[Lista d''e paise d''o munno]] -> <a href="...">Lista d''e paise d''o munno</a>
2232  # [[Criticism of Harry Potter|Criticism of ''Harry Potter'']]
2233  # -> <a href="Criticism of Harry Potter">Criticism of <i>Harry Potter</i></a>
2234  $text = $this->doQuotes( $text );
2235  }
2236 
2237  # Link not escaped by : , create the various objects
2238  if ( $noforce && !$nt->wasLocalInterwiki() ) {
2239  # Interwikis
2240  if (
2241  $iw && $this->mOptions->getInterwikiMagic() && $nottalk && (
2242  Language::fetchLanguageName( $iw, null, 'mw' ) ||
2243  in_array( $iw, $wgExtraInterlanguageLinkPrefixes )
2244  )
2245  ) {
2246  # Bug 24502: filter duplicates
2247  if ( !isset( $this->mLangLinkLanguages[$iw] ) ) {
2248  $this->mLangLinkLanguages[$iw] = true;
2249  $this->mOutput->addLanguageLink( $nt->getFullText() );
2250  }
2251 
2252  $s = rtrim( $s . $prefix );
2253  $s .= trim( $trail, "\n" ) == '' ? '': $prefix . $trail;
2254  continue;
2255  }
2256 
2257  if ( $ns == NS_FILE ) {
2258  if ( !wfIsBadImage( $nt->getDBkey(), $this->mTitle ) ) {
2259  if ( $wasblank ) {
2260  # if no parameters were passed, $text
2261  # becomes something like "File:Foo.png",
2262  # which we don't want to pass on to the
2263  # image generator
2264  $text = '';
2265  } else {
2266  # recursively parse links inside the image caption
2267  # actually, this will parse them in any other parameters, too,
2268  # but it might be hard to fix that, and it doesn't matter ATM
2269  $text = $this->replaceExternalLinks( $text );
2270  $holders->merge( $this->replaceInternalLinks2( $text ) );
2271  }
2272  # cloak any absolute URLs inside the image markup, so replaceExternalLinks() won't touch them
2273  $s .= $prefix . $this->armorLinks(
2274  $this->makeImage( $nt, $text, $holders ) ) . $trail;
2275  } else {
2276  $s .= $prefix . $trail;
2277  }
2278  continue;
2279  }
2280 
2281  if ( $ns == NS_CATEGORY ) {
2282  $s = rtrim( $s . "\n" ); # bug 87
2283 
2284  if ( $wasblank ) {
2285  $sortkey = $this->getDefaultSort();
2286  } else {
2287  $sortkey = $text;
2288  }
2289  $sortkey = Sanitizer::decodeCharReferences( $sortkey );
2290  $sortkey = str_replace( "\n", '', $sortkey );
2291  $sortkey = $this->getConverterLanguage()->convertCategoryKey( $sortkey );
2292  $this->mOutput->addCategory( $nt->getDBkey(), $sortkey );
2293 
2297  $s .= trim( $prefix . $trail, "\n" ) == '' ? '' : $prefix . $trail;
2298 
2299  continue;
2300  }
2301  }
2302 
2303  # Self-link checking. For some languages, variants of the title are checked in
2304  # LinkHolderArray::doVariants() to allow batching the existence checks necessary
2305  # for linking to a different variant.
2306  if ( $ns != NS_SPECIAL && $nt->equals( $this->mTitle ) && !$nt->hasFragment() ) {
2307  $s .= $prefix . Linker::makeSelfLinkObj( $nt, $text, '', $trail );
2308  continue;
2309  }
2310 
2311  # NS_MEDIA is a pseudo-namespace for linking directly to a file
2312  # @todo FIXME: Should do batch file existence checks, see comment below
2313  if ( $ns == NS_MEDIA ) {
2314  # Give extensions a chance to select the file revision for us
2315  $options = array();
2316  $descQuery = false;
2317  Hooks::run( 'BeforeParserFetchFileAndTitle',
2318  array( $this, $nt, &$options, &$descQuery ) );
2319  # Fetch and register the file (file title may be different via hooks)
2320  list( $file, $nt ) = $this->fetchFileAndTitle( $nt, $options );
2321  # Cloak with NOPARSE to avoid replacement in replaceExternalLinks
2322  $s .= $prefix . $this->armorLinks(
2323  Linker::makeMediaLinkFile( $nt, $file, $text ) ) . $trail;
2324  continue;
2325  }
2326 
2327  # Some titles, such as valid special pages or files in foreign repos, should
2328  # be shown as bluelinks even though they're not included in the page table
2329  #
2330  # @todo FIXME: isAlwaysKnown() can be expensive for file links; we should really do
2331  # batch file existence checks for NS_FILE and NS_MEDIA
2332  if ( $iw == '' && $nt->isAlwaysKnown() ) {
2333  $this->mOutput->addLink( $nt );
2334  $s .= $this->makeKnownLinkHolder( $nt, $text, array(), $trail, $prefix );
2335  } else {
2336  # Links will be added to the output link list after checking
2337  $s .= $holders->makeHolder( $nt, $text, array(), $trail, $prefix );
2338  }
2339  }
2340  return $holders;
2341  }
2342 
2357  public function makeKnownLinkHolder( $nt, $text = '', $query = array(), $trail = '', $prefix = '' ) {
2358  list( $inside, $trail ) = Linker::splitTrail( $trail );
2359 
2360  if ( is_string( $query ) ) {
2361  $query = wfCgiToArray( $query );
2362  }
2363  if ( $text == '' ) {
2364  $text = htmlspecialchars( $nt->getPrefixedText() );
2365  }
2366 
2367  $link = Linker::linkKnown( $nt, "$prefix$text$inside", array(), $query );
2368 
2369  return $this->armorLinks( $link ) . $trail;
2370  }
2371 
2382  public function armorLinks( $text ) {
2383  return preg_replace( '/\b((?i)' . $this->mUrlProtocols . ')/',
2384  self::MARKER_PREFIX . "NOPARSE$1", $text );
2385  }
2386 
2391  public function areSubpagesAllowed() {
2392  # Some namespaces don't allow subpages
2393  return MWNamespace::hasSubpages( $this->mTitle->getNamespace() );
2394  }
2395 
2404  public function maybeDoSubpageLink( $target, &$text ) {
2405  return Linker::normalizeSubpageLink( $this->mTitle, $target, $text );
2406  }
2407 
2414  public function closeParagraph() {
2415  $result = '';
2416  if ( $this->mLastSection != '' ) {
2417  $result = '</' . $this->mLastSection . ">\n";
2418  }
2419  $this->mInPre = false;
2420  $this->mLastSection = '';
2421  return $result;
2422  }
2423 
2434  public function getCommon( $st1, $st2 ) {
2435  $fl = strlen( $st1 );
2436  $shorter = strlen( $st2 );
2437  if ( $fl < $shorter ) {
2438  $shorter = $fl;
2439  }
2440 
2441  for ( $i = 0; $i < $shorter; ++$i ) {
2442  if ( $st1[$i] != $st2[$i] ) {
2443  break;
2444  }
2445  }
2446  return $i;
2447  }
2448 
2458  public function openList( $char ) {
2459  $result = $this->closeParagraph();
2460 
2461  if ( '*' === $char ) {
2462  $result .= "<ul><li>";
2463  } elseif ( '#' === $char ) {
2464  $result .= "<ol><li>";
2465  } elseif ( ':' === $char ) {
2466  $result .= "<dl><dd>";
2467  } elseif ( ';' === $char ) {
2468  $result .= "<dl><dt>";
2469  $this->mDTopen = true;
2470  } else {
2471  $result = '<!-- ERR 1 -->';
2472  }
2473 
2474  return $result;
2475  }
2476 
2484  public function nextItem( $char ) {
2485  if ( '*' === $char || '#' === $char ) {
2486  return "</li>\n<li>";
2487  } elseif ( ':' === $char || ';' === $char ) {
2488  $close = "</dd>\n";
2489  if ( $this->mDTopen ) {
2490  $close = "</dt>\n";
2491  }
2492  if ( ';' === $char ) {
2493  $this->mDTopen = true;
2494  return $close . '<dt>';
2495  } else {
2496  $this->mDTopen = false;
2497  return $close . '<dd>';
2498  }
2499  }
2500  return '<!-- ERR 2 -->';
2501  }
2502 
2510  public function closeList( $char ) {
2511  if ( '*' === $char ) {
2512  $text = "</li></ul>";
2513  } elseif ( '#' === $char ) {
2514  $text = "</li></ol>";
2515  } elseif ( ':' === $char ) {
2516  if ( $this->mDTopen ) {
2517  $this->mDTopen = false;
2518  $text = "</dt></dl>";
2519  } else {
2520  $text = "</dd></dl>";
2521  }
2522  } else {
2523  return '<!-- ERR 3 -->';
2524  }
2525  return $text;
2526  }
2537  public function doBlockLevels( $text, $linestart ) {
2538 
2539  # Parsing through the text line by line. The main thing
2540  # happening here is handling of block-level elements p, pre,
2541  # and making lists from lines starting with * # : etc.
2542  #
2543  $textLines = StringUtils::explode( "\n", $text );
2544 
2545  $lastPrefix = $output = '';
2546  $this->mDTopen = $inBlockElem = false;
2547  $prefixLength = 0;
2548  $paragraphStack = false;
2549  $inBlockquote = false;
2550 
2551  foreach ( $textLines as $oLine ) {
2552  # Fix up $linestart
2553  if ( !$linestart ) {
2554  $output .= $oLine;
2555  $linestart = true;
2556  continue;
2557  }
2558  # * = ul
2559  # # = ol
2560  # ; = dt
2561  # : = dd
2562 
2563  $lastPrefixLength = strlen( $lastPrefix );
2564  $preCloseMatch = preg_match( '/<\\/pre/i', $oLine );
2565  $preOpenMatch = preg_match( '/<pre/i', $oLine );
2566  # If not in a <pre> element, scan for and figure out what prefixes are there.
2567  if ( !$this->mInPre ) {
2568  # Multiple prefixes may abut each other for nested lists.
2569  $prefixLength = strspn( $oLine, '*#:;' );
2570  $prefix = substr( $oLine, 0, $prefixLength );
2571 
2572  # eh?
2573  # ; and : are both from definition-lists, so they're equivalent
2574  # for the purposes of determining whether or not we need to open/close
2575  # elements.
2576  $prefix2 = str_replace( ';', ':', $prefix );
2577  $t = substr( $oLine, $prefixLength );
2578  $this->mInPre = (bool)$preOpenMatch;
2579  } else {
2580  # Don't interpret any other prefixes in preformatted text
2581  $prefixLength = 0;
2582  $prefix = $prefix2 = '';
2583  $t = $oLine;
2584  }
2585 
2586  # List generation
2587  if ( $prefixLength && $lastPrefix === $prefix2 ) {
2588  # Same as the last item, so no need to deal with nesting or opening stuff
2589  $output .= $this->nextItem( substr( $prefix, -1 ) );
2590  $paragraphStack = false;
2591 
2592  if ( substr( $prefix, -1 ) === ';' ) {
2593  # The one nasty exception: definition lists work like this:
2594  # ; title : definition text
2595  # So we check for : in the remainder text to split up the
2596  # title and definition, without b0rking links.
2597  $term = $t2 = '';
2598  if ( $this->findColonNoLinks( $t, $term, $t2 ) !== false ) {
2599  $t = $t2;
2600  $output .= $term . $this->nextItem( ':' );
2601  }
2602  }
2603  } elseif ( $prefixLength || $lastPrefixLength ) {
2604  # We need to open or close prefixes, or both.
2605 
2606  # Either open or close a level...
2607  $commonPrefixLength = $this->getCommon( $prefix, $lastPrefix );
2608  $paragraphStack = false;
2609 
2610  # Close all the prefixes which aren't shared.
2611  while ( $commonPrefixLength < $lastPrefixLength ) {
2612  $output .= $this->closeList( $lastPrefix[$lastPrefixLength - 1] );
2613  --$lastPrefixLength;
2614  }
2615 
2616  # Continue the current prefix if appropriate.
2617  if ( $prefixLength <= $commonPrefixLength && $commonPrefixLength > 0 ) {
2618  $output .= $this->nextItem( $prefix[$commonPrefixLength - 1] );
2619  }
2620 
2621  # Open prefixes where appropriate.
2622  if ( $lastPrefix && $prefixLength > $commonPrefixLength ) {
2623  $output .= "\n";
2624  }
2625  while ( $prefixLength > $commonPrefixLength ) {
2626  $char = substr( $prefix, $commonPrefixLength, 1 );
2627  $output .= $this->openList( $char );
2628 
2629  if ( ';' === $char ) {
2630  # @todo FIXME: This is dupe of code above
2631  if ( $this->findColonNoLinks( $t, $term, $t2 ) !== false ) {
2632  $t = $t2;
2633  $output .= $term . $this->nextItem( ':' );
2634  }
2635  }
2636  ++$commonPrefixLength;
2637  }
2638  if ( !$prefixLength && $lastPrefix ) {
2639  $output .= "\n";
2640  }
2641  $lastPrefix = $prefix2;
2642  }
2643 
2644  # If we have no prefixes, go to paragraph mode.
2645  if ( 0 == $prefixLength ) {
2646  # No prefix (not in list)--go to paragraph mode
2647  # XXX: use a stack for nestable elements like span, table and div
2648  $openmatch = preg_match(
2649  '/(?:<table|<h1|<h2|<h3|<h4|<h5|<h6|<pre|<tr|'
2650  . '<p|<ul|<ol|<dl|<li|<\\/tr|<\\/td|<\\/th)/iS',
2651  $t
2652  );
2653  $closematch = preg_match(
2654  '/(?:<\\/table|<\\/h1|<\\/h2|<\\/h3|<\\/h4|<\\/h5|<\\/h6|'
2655  . '<td|<th|<\\/?blockquote|<\\/?div|<hr|<\\/pre|<\\/p|<\\/mw:|'
2656  . self::MARKER_PREFIX
2657  . '-pre|<\\/li|<\\/ul|<\\/ol|<\\/dl|<\\/?center)/iS',
2658  $t
2659  );
2660 
2661  if ( $openmatch || $closematch ) {
2662  $paragraphStack = false;
2663  # @todo bug 5718: paragraph closed
2664  $output .= $this->closeParagraph();
2665  if ( $preOpenMatch && !$preCloseMatch ) {
2666  $this->mInPre = true;
2667  }
2668  $bqOffset = 0;
2669  while ( preg_match( '/<(\\/?)blockquote[\s>]/i', $t, $bqMatch, PREG_OFFSET_CAPTURE, $bqOffset ) ) {
2670  $inBlockquote = !$bqMatch[1][0]; // is this a close tag?
2671  $bqOffset = $bqMatch[0][1] + strlen( $bqMatch[0][0] );
2672  }
2673  $inBlockElem = !$closematch;
2674  } elseif ( !$inBlockElem && !$this->mInPre ) {
2675  if ( ' ' == substr( $t, 0, 1 )
2676  && ( $this->mLastSection === 'pre' || trim( $t ) != '' )
2677  && !$inBlockquote
2678  ) {
2679  # pre
2680  if ( $this->mLastSection !== 'pre' ) {
2681  $paragraphStack = false;
2682  $output .= $this->closeParagraph() . '<pre>';
2683  $this->mLastSection = 'pre';
2684  }
2685  $t = substr( $t, 1 );
2686  } else {
2687  # paragraph
2688  if ( trim( $t ) === '' ) {
2689  if ( $paragraphStack ) {
2690  $output .= $paragraphStack . '<br />';
2691  $paragraphStack = false;
2692  $this->mLastSection = 'p';
2693  } else {
2694  if ( $this->mLastSection !== 'p' ) {
2695  $output .= $this->closeParagraph();
2696  $this->mLastSection = '';
2697  $paragraphStack = '<p>';
2698  } else {
2699  $paragraphStack = '</p><p>';
2700  }
2701  }
2702  } else {
2703  if ( $paragraphStack ) {
2704  $output .= $paragraphStack;
2705  $paragraphStack = false;
2706  $this->mLastSection = 'p';
2707  } elseif ( $this->mLastSection !== 'p' ) {
2708  $output .= $this->closeParagraph() . '<p>';
2709  $this->mLastSection = 'p';
2710  }
2711  }
2712  }
2713  }
2714  }
2715  # somewhere above we forget to get out of pre block (bug 785)
2716  if ( $preCloseMatch && $this->mInPre ) {
2717  $this->mInPre = false;
2718  }
2719  if ( $paragraphStack === false ) {
2720  $output .= $t;
2721  if ( $prefixLength === 0 ) {
2722  $output .= "\n";
2723  }
2724  }
2725  }
2726  while ( $prefixLength ) {
2727  $output .= $this->closeList( $prefix2[$prefixLength - 1] );
2728  --$prefixLength;
2729  if ( !$prefixLength ) {
2730  $output .= "\n";
2731  }
2732  }
2733  if ( $this->mLastSection != '' ) {
2734  $output .= '</' . $this->mLastSection . '>';
2735  $this->mLastSection = '';
2736  }
2737 
2738  return $output;
2739  }
2740 
2751  public function findColonNoLinks( $str, &$before, &$after ) {
2752 
2753  $pos = strpos( $str, ':' );
2754  if ( $pos === false ) {
2755  # Nothing to find!
2756  return false;
2757  }
2758 
2759  $lt = strpos( $str, '<' );
2760  if ( $lt === false || $lt > $pos ) {
2761  # Easy; no tag nesting to worry about
2762  $before = substr( $str, 0, $pos );
2763  $after = substr( $str, $pos + 1 );
2764  return $pos;
2765  }
2766 
2767  # Ugly state machine to walk through avoiding tags.
2768  $state = self::COLON_STATE_TEXT;
2769  $stack = 0;
2770  $len = strlen( $str );
2771  for ( $i = 0; $i < $len; $i++ ) {
2772  $c = $str[$i];
2773 
2774  switch ( $state ) {
2775  # (Using the number is a performance hack for common cases)
2776  case 0: # self::COLON_STATE_TEXT:
2777  switch ( $c ) {
2778  case "<":
2779  # Could be either a <start> tag or an </end> tag
2780  $state = self::COLON_STATE_TAGSTART;
2781  break;
2782  case ":":
2783  if ( $stack == 0 ) {
2784  # We found it!
2785  $before = substr( $str, 0, $i );
2786  $after = substr( $str, $i + 1 );
2787  return $i;
2788  }
2789  # Embedded in a tag; don't break it.
2790  break;
2791  default:
2792  # Skip ahead looking for something interesting
2793  $colon = strpos( $str, ':', $i );
2794  if ( $colon === false ) {
2795  # Nothing else interesting
2796  return false;
2797  }
2798  $lt = strpos( $str, '<', $i );
2799  if ( $stack === 0 ) {
2800  if ( $lt === false || $colon < $lt ) {
2801  # We found it!
2802  $before = substr( $str, 0, $colon );
2803  $after = substr( $str, $colon + 1 );
2804  return $i;
2805  }
2806  }
2807  if ( $lt === false ) {
2808  # Nothing else interesting to find; abort!
2809  # We're nested, but there's no close tags left. Abort!
2810  break 2;
2811  }
2812  # Skip ahead to next tag start
2813  $i = $lt;
2814  $state = self::COLON_STATE_TAGSTART;
2815  }
2816  break;
2817  case 1: # self::COLON_STATE_TAG:
2818  # In a <tag>
2819  switch ( $c ) {
2820  case ">":
2821  $stack++;
2822  $state = self::COLON_STATE_TEXT;
2823  break;
2824  case "/":
2825  # Slash may be followed by >?
2826  $state = self::COLON_STATE_TAGSLASH;
2827  break;
2828  default:
2829  # ignore
2830  }
2831  break;
2832  case 2: # self::COLON_STATE_TAGSTART:
2833  switch ( $c ) {
2834  case "/":
2835  $state = self::COLON_STATE_CLOSETAG;
2836  break;
2837  case "!":
2838  $state = self::COLON_STATE_COMMENT;
2839  break;
2840  case ">":
2841  # Illegal early close? This shouldn't happen D:
2842  $state = self::COLON_STATE_TEXT;
2843  break;
2844  default:
2845  $state = self::COLON_STATE_TAG;
2846  }
2847  break;
2848  case 3: # self::COLON_STATE_CLOSETAG:
2849  # In a </tag>
2850  if ( $c === ">" ) {
2851  $stack--;
2852  if ( $stack < 0 ) {
2853  wfDebug( __METHOD__ . ": Invalid input; too many close tags\n" );
2854  return false;
2855  }
2856  $state = self::COLON_STATE_TEXT;
2857  }
2858  break;
2859  case self::COLON_STATE_TAGSLASH:
2860  if ( $c === ">" ) {
2861  # Yes, a self-closed tag <blah/>
2862  $state = self::COLON_STATE_TEXT;
2863  } else {
2864  # Probably we're jumping the gun, and this is an attribute
2865  $state = self::COLON_STATE_TAG;
2866  }
2867  break;
2868  case 5: # self::COLON_STATE_COMMENT:
2869  if ( $c === "-" ) {
2870  $state = self::COLON_STATE_COMMENTDASH;
2871  }
2872  break;
2873  case self::COLON_STATE_COMMENTDASH:
2874  if ( $c === "-" ) {
2875  $state = self::COLON_STATE_COMMENTDASHDASH;
2876  } else {
2877  $state = self::COLON_STATE_COMMENT;
2878  }
2879  break;
2880  case self::COLON_STATE_COMMENTDASHDASH:
2881  if ( $c === ">" ) {
2882  $state = self::COLON_STATE_TEXT;
2883  } else {
2884  $state = self::COLON_STATE_COMMENT;
2885  }
2886  break;
2887  default:
2888  throw new MWException( "State machine error in " . __METHOD__ );
2889  }
2890  }
2891  if ( $stack > 0 ) {
2892  wfDebug( __METHOD__ . ": Invalid input; not enough close tags (stack $stack, state $state)\n" );
2893  return false;
2894  }
2895  return false;
2896  }
2897 
2909  public function getVariableValue( $index, $frame = false ) {
2912 
2913  if ( is_null( $this->mTitle ) ) {
2914  // If no title set, bad things are going to happen
2915  // later. Title should always be set since this
2916  // should only be called in the middle of a parse
2917  // operation (but the unit-tests do funky stuff)
2918  throw new MWException( __METHOD__ . ' Should only be '
2919  . ' called while parsing (no title set)' );
2920  }
2921 
2926  if ( Hooks::run( 'ParserGetVariableValueVarCache', array( &$this, &$this->mVarCache ) ) ) {
2927  if ( isset( $this->mVarCache[$index] ) ) {
2928  return $this->mVarCache[$index];
2929  }
2930  }
2931 
2932  $ts = wfTimestamp( TS_UNIX, $this->mOptions->getTimestamp() );
2933  Hooks::run( 'ParserGetVariableValueTs', array( &$this, &$ts ) );
2934 
2935  $pageLang = $this->getFunctionLang();
2936 
2937  switch ( $index ) {
2938  case '!':
2939  $value = '|';
2940  break;
2941  case 'currentmonth':
2942  $value = $pageLang->formatNum( MWTimestamp::getInstance( $ts )->format( 'm' ) );
2943  break;
2944  case 'currentmonth1':
2945  $value = $pageLang->formatNum( MWTimestamp::getInstance( $ts )->format( 'n' ) );
2946  break;
2947  case 'currentmonthname':
2948  $value = $pageLang->getMonthName( MWTimestamp::getInstance( $ts )->format( 'n' ) );
2949  break;
2950  case 'currentmonthnamegen':
2951  $value = $pageLang->getMonthNameGen( MWTimestamp::getInstance( $ts )->format( 'n' ) );
2952  break;
2953  case 'currentmonthabbrev':
2954  $value = $pageLang->getMonthAbbreviation( MWTimestamp::getInstance( $ts )->format( 'n' ) );
2955  break;
2956  case 'currentday':
2957  $value = $pageLang->formatNum( MWTimestamp::getInstance( $ts )->format( 'j' ) );
2958  break;
2959  case 'currentday2':
2960  $value = $pageLang->formatNum( MWTimestamp::getInstance( $ts )->format( 'd' ) );
2961  break;
2962  case 'localmonth':
2963  $value = $pageLang->formatNum( MWTimestamp::getLocalInstance( $ts )->format( 'm' ) );
2964  break;
2965  case 'localmonth1':
2966  $value = $pageLang->formatNum( MWTimestamp::getLocalInstance( $ts )->format( 'n' ) );
2967  break;
2968  case 'localmonthname':
2969  $value = $pageLang->getMonthName( MWTimestamp::getLocalInstance( $ts )->format( 'n' ) );
2970  break;
2971  case 'localmonthnamegen':
2972  $value = $pageLang->getMonthNameGen( MWTimestamp::getLocalInstance( $ts )->format( 'n' ) );
2973  break;
2974  case 'localmonthabbrev':
2975  $value = $pageLang->getMonthAbbreviation( MWTimestamp::getLocalInstance( $ts )->format( 'n' ) );
2976  break;
2977  case 'localday':
2978  $value = $pageLang->formatNum( MWTimestamp::getLocalInstance( $ts )->format( 'j' ) );
2979  break;
2980  case 'localday2':
2981  $value = $pageLang->formatNum( MWTimestamp::getLocalInstance( $ts )->format( 'd' ) );
2982  break;
2983  case 'pagename':
2984  $value = wfEscapeWikiText( $this->mTitle->getText() );
2985  break;
2986  case 'pagenamee':
2987  $value = wfEscapeWikiText( $this->mTitle->getPartialURL() );
2988  break;
2989  case 'fullpagename':
2990  $value = wfEscapeWikiText( $this->mTitle->getPrefixedText() );
2991  break;
2992  case 'fullpagenamee':
2993  $value = wfEscapeWikiText( $this->mTitle->getPrefixedURL() );
2994  break;
2995  case 'subpagename':
2996  $value = wfEscapeWikiText( $this->mTitle->getSubpageText() );
2997  break;
2998  case 'subpagenamee':
2999  $value = wfEscapeWikiText( $this->mTitle->getSubpageUrlForm() );
3000  break;
3001  case 'rootpagename':
3002  $value = wfEscapeWikiText( $this->mTitle->getRootText() );
3003  break;
3004  case 'rootpagenamee':
3005  $value = wfEscapeWikiText( wfUrlEncode( str_replace(
3006  ' ',
3007  '_',
3008  $this->mTitle->getRootText()
3009  ) ) );
3010  break;
3011  case 'basepagename':
3012  $value = wfEscapeWikiText( $this->mTitle->getBaseText() );
3013  break;
3014  case 'basepagenamee':
3015  $value = wfEscapeWikiText( wfUrlEncode( str_replace(
3016  ' ',
3017  '_',
3018  $this->mTitle->getBaseText()
3019  ) ) );
3020  break;
3021  case 'talkpagename':
3022  if ( $this->mTitle->canTalk() ) {
3023  $talkPage = $this->mTitle->getTalkPage();
3024  $value = wfEscapeWikiText( $talkPage->getPrefixedText() );
3025  } else {
3026  $value = '';
3027  }
3028  break;
3029  case 'talkpagenamee':
3030  if ( $this->mTitle->canTalk() ) {
3031  $talkPage = $this->mTitle->getTalkPage();
3032  $value = wfEscapeWikiText( $talkPage->getPrefixedURL() );
3033  } else {
3034  $value = '';
3035  }
3036  break;
3037  case 'subjectpagename':
3038  $subjPage = $this->mTitle->getSubjectPage();
3039  $value = wfEscapeWikiText( $subjPage->getPrefixedText() );
3040  break;
3041  case 'subjectpagenamee':
3042  $subjPage = $this->mTitle->getSubjectPage();
3043  $value = wfEscapeWikiText( $subjPage->getPrefixedURL() );
3044  break;
3045  case 'pageid': // requested in bug 23427
3046  $pageid = $this->getTitle()->getArticleID();
3047  if ( $pageid == 0 ) {
3048  # 0 means the page doesn't exist in the database,
3049  # which means the user is previewing a new page.
3050  # The vary-revision flag must be set, because the magic word
3051  # will have a different value once the page is saved.
3052  $this->mOutput->setFlag( 'vary-revision' );
3053  wfDebug( __METHOD__ . ": {{PAGEID}} used in a new page, setting vary-revision...\n" );
3054  }
3055  $value = $pageid ? $pageid : null;
3056  break;
3057  case 'revisionid':
3058  # Let the edit saving system know we should parse the page
3059  # *after* a revision ID has been assigned.
3060  $this->mOutput->setFlag( 'vary-revision' );
3061  wfDebug( __METHOD__ . ": {{REVISIONID}} used, setting vary-revision...\n" );
3062  $value = $this->mRevisionId;
3063  break;
3064  case 'revisionday':
3065  # Let the edit saving system know we should parse the page
3066  # *after* a revision ID has been assigned. This is for null edits.
3067  $this->mOutput->setFlag( 'vary-revision' );
3068  wfDebug( __METHOD__ . ": {{REVISIONDAY}} used, setting vary-revision...\n" );
3069  $value = intval( substr( $this->getRevisionTimestamp(), 6, 2 ) );
3070  break;
3071  case 'revisionday2':
3072  # Let the edit saving system know we should parse the page
3073  # *after* a revision ID has been assigned. This is for null edits.
3074  $this->mOutput->setFlag( 'vary-revision' );
3075  wfDebug( __METHOD__ . ": {{REVISIONDAY2}} used, setting vary-revision...\n" );
3076  $value = substr( $this->getRevisionTimestamp(), 6, 2 );
3077  break;
3078  case 'revisionmonth':
3079  # Let the edit saving system know we should parse the page
3080  # *after* a revision ID has been assigned. This is for null edits.
3081  $this->mOutput->setFlag( 'vary-revision' );
3082  wfDebug( __METHOD__ . ": {{REVISIONMONTH}} used, setting vary-revision...\n" );
3083  $value = substr( $this->getRevisionTimestamp(), 4, 2 );
3084  break;
3085  case 'revisionmonth1':
3086  # Let the edit saving system know we should parse the page
3087  # *after* a revision ID has been assigned. This is for null edits.
3088  $this->mOutput->setFlag( 'vary-revision' );
3089  wfDebug( __METHOD__ . ": {{REVISIONMONTH1}} used, setting vary-revision...\n" );
3090  $value = intval( substr( $this->getRevisionTimestamp(), 4, 2 ) );
3091  break;
3092  case 'revisionyear':
3093  # Let the edit saving system know we should parse the page
3094  # *after* a revision ID has been assigned. This is for null edits.
3095  $this->mOutput->setFlag( 'vary-revision' );
3096  wfDebug( __METHOD__ . ": {{REVISIONYEAR}} used, setting vary-revision...\n" );
3097  $value = substr( $this->getRevisionTimestamp(), 0, 4 );
3098  break;
3099  case 'revisiontimestamp':
3100  # Let the edit saving system know we should parse the page
3101  # *after* a revision ID has been assigned. This is for null edits.
3102  $this->mOutput->setFlag( 'vary-revision' );
3103  wfDebug( __METHOD__ . ": {{REVISIONTIMESTAMP}} used, setting vary-revision...\n" );
3104  $value = $this->getRevisionTimestamp();
3105  break;
3106  case 'revisionuser':
3107  # Let the edit saving system know we should parse the page
3108  # *after* a revision ID has been assigned. This is for null edits.
3109  $this->mOutput->setFlag( 'vary-revision' );
3110  wfDebug( __METHOD__ . ": {{REVISIONUSER}} used, setting vary-revision...\n" );
3111  $value = $this->getRevisionUser();
3112  break;
3113  case 'revisionsize':
3114  # Let the edit saving system know we should parse the page
3115  # *after* a revision ID has been assigned. This is for null edits.
3116  $this->mOutput->setFlag( 'vary-revision' );
3117  wfDebug( __METHOD__ . ": {{REVISIONSIZE}} used, setting vary-revision...\n" );
3118  $value = $this->getRevisionSize();
3119  break;
3120  case 'namespace':
3121  $value = str_replace( '_', ' ', $wgContLang->getNsText( $this->mTitle->getNamespace() ) );
3122  break;
3123  case 'namespacee':
3124  $value = wfUrlencode( $wgContLang->getNsText( $this->mTitle->getNamespace() ) );
3125  break;
3126  case 'namespacenumber':
3127  $value = $this->mTitle->getNamespace();
3128  break;
3129  case 'talkspace':
3130  $value = $this->mTitle->canTalk()
3131  ? str_replace( '_', ' ', $this->mTitle->getTalkNsText() )
3132  : '';
3133  break;
3134  case 'talkspacee':
3135  $value = $this->mTitle->canTalk() ? wfUrlencode( $this->mTitle->getTalkNsText() ) : '';
3136  break;
3137  case 'subjectspace':
3138  $value = str_replace( '_', ' ', $this->mTitle->getSubjectNsText() );
3139  break;
3140  case 'subjectspacee':
3141  $value = ( wfUrlencode( $this->mTitle->getSubjectNsText() ) );
3142  break;
3143  case 'currentdayname':
3144  $value = $pageLang->getWeekdayName( (int)MWTimestamp::getInstance( $ts )->format( 'w' ) + 1 );
3145  break;
3146  case 'currentyear':
3147  $value = $pageLang->formatNum( MWTimestamp::getInstance( $ts )->format( 'Y' ), true );
3148  break;
3149  case 'currenttime':
3150  $value = $pageLang->time( wfTimestamp( TS_MW, $ts ), false, false );
3151  break;
3152  case 'currenthour':
3153  $value = $pageLang->formatNum( MWTimestamp::getInstance( $ts )->format( 'H' ), true );
3154  break;
3155  case 'currentweek':
3156  # @bug 4594 PHP5 has it zero padded, PHP4 does not, cast to
3157  # int to remove the padding
3158  $value = $pageLang->formatNum( (int)MWTimestamp::getInstance( $ts )->format( 'W' ) );
3159  break;
3160  case 'currentdow':
3161  $value = $pageLang->formatNum( MWTimestamp::getInstance( $ts )->format( 'w' ) );
3162  break;
3163  case 'localdayname':
3164  $value = $pageLang->getWeekdayName(
3165  (int)MWTimestamp::getLocalInstance( $ts )->format( 'w' ) + 1
3166  );
3167  break;
3168  case 'localyear':
3169  $value = $pageLang->formatNum( MWTimestamp::getLocalInstance( $ts )->format( 'Y' ), true );
3170  break;
3171  case 'localtime':
3172  $value = $pageLang->time(
3173  MWTimestamp::getLocalInstance( $ts )->format( 'YmdHis' ),
3174  false,
3175  false
3176  );
3177  break;
3178  case 'localhour':
3179  $value = $pageLang->formatNum( MWTimestamp::getLocalInstance( $ts )->format( 'H' ), true );
3180  break;
3181  case 'localweek':
3182  # @bug 4594 PHP5 has it zero padded, PHP4 does not, cast to
3183  # int to remove the padding
3184  $value = $pageLang->formatNum( (int)MWTimestamp::getLocalInstance( $ts )->format( 'W' ) );
3185  break;
3186  case 'localdow':
3187  $value = $pageLang->formatNum( MWTimestamp::getLocalInstance( $ts )->format( 'w' ) );
3188  break;
3189  case 'numberofarticles':
3190  $value = $pageLang->formatNum( SiteStats::articles() );
3191  break;
3192  case 'numberoffiles':
3193  $value = $pageLang->formatNum( SiteStats::images() );
3194  break;
3195  case 'numberofusers':
3196  $value = $pageLang->formatNum( SiteStats::users() );
3197  break;
3198  case 'numberofactiveusers':
3199  $value = $pageLang->formatNum( SiteStats::activeUsers() );
3200  break;
3201  case 'numberofpages':
3202  $value = $pageLang->formatNum( SiteStats::pages() );
3203  break;
3204  case 'numberofadmins':
3205  $value = $pageLang->formatNum( SiteStats::numberingroup( 'sysop' ) );
3206  break;
3207  case 'numberofedits':
3208  $value = $pageLang->formatNum( SiteStats::edits() );
3209  break;
3210  case 'currenttimestamp':
3211  $value = wfTimestamp( TS_MW, $ts );
3212  break;
3213  case 'localtimestamp':
3214  $value = MWTimestamp::getLocalInstance( $ts )->format( 'YmdHis' );
3215  break;
3216  case 'currentversion':
3218  break;
3219  case 'articlepath':
3220  return $wgArticlePath;
3221  case 'sitename':
3222  return $wgSitename;
3223  case 'server':
3224  return $wgServer;
3225  case 'servername':
3226  return $wgServerName;
3227  case 'scriptpath':
3228  return $wgScriptPath;
3229  case 'stylepath':
3230  return $wgStylePath;
3231  case 'directionmark':
3232  return $pageLang->getDirMark();
3233  case 'contentlanguage':
3235  return $wgLanguageCode;
3236  case 'cascadingsources':
3238  break;
3239  default:
3240  $ret = null;
3241  Hooks::run(
3242  'ParserGetVariableValueSwitch',
3243  array( &$this, &$this->mVarCache, &$index, &$ret, &$frame )
3244  );
3245 
3246  return $ret;
3247  }
3248 
3249  if ( $index ) {
3250  $this->mVarCache[$index] = $value;
3251  }
3252 
3253  return $value;
3254  }
3255 
3261  public function initialiseVariables() {
3262  $variableIDs = MagicWord::getVariableIDs();
3263  $substIDs = MagicWord::getSubstIDs();
3264 
3265  $this->mVariables = new MagicWordArray( $variableIDs );
3266  $this->mSubstWords = new MagicWordArray( $substIDs );
3267  }
3268 
3291  public function preprocessToDom( $text, $flags = 0 ) {
3292  $dom = $this->getPreprocessor()->preprocessToObj( $text, $flags );
3293  return $dom;
3294  }
3295 
3303  public static function splitWhitespace( $s ) {
3304  $ltrimmed = ltrim( $s );
3305  $w1 = substr( $s, 0, strlen( $s ) - strlen( $ltrimmed ) );
3306  $trimmed = rtrim( $ltrimmed );
3307  $diff = strlen( $ltrimmed ) - strlen( $trimmed );
3308  if ( $diff > 0 ) {
3309  $w2 = substr( $ltrimmed, -$diff );
3310  } else {
3311  $w2 = '';
3312  }
3313  return array( $w1, $trimmed, $w2 );
3314  }
3315 
3336  public function replaceVariables( $text, $frame = false, $argsOnly = false ) {
3337  # Is there any text? Also, Prevent too big inclusions!
3338  $textSize = strlen( $text );
3339  if ( $textSize < 1 || $textSize > $this->mOptions->getMaxIncludeSize() ) {
3340  return $text;
3341  }
3342 
3343  if ( $frame === false ) {
3344  $frame = $this->getPreprocessor()->newFrame();
3345  } elseif ( !( $frame instanceof PPFrame ) ) {
3346  wfDebug( __METHOD__ . " called using plain parameters instead of "
3347  . "a PPFrame instance. Creating custom frame.\n" );
3348  $frame = $this->getPreprocessor()->newCustomFrame( $frame );
3349  }
3350 
3351  $dom = $this->preprocessToDom( $text );
3352  $flags = $argsOnly ? PPFrame::NO_TEMPLATES : 0;
3353  $text = $frame->expand( $dom, $flags );
3354 
3355  return $text;
3356  }
3357 
3365  public static function createAssocArgs( $args ) {
3366  $assocArgs = array();
3367  $index = 1;
3368  foreach ( $args as $arg ) {
3369  $eqpos = strpos( $arg, '=' );
3370  if ( $eqpos === false ) {
3371  $assocArgs[$index++] = $arg;
3372  } else {
3373  $name = trim( substr( $arg, 0, $eqpos ) );
3374  $value = trim( substr( $arg, $eqpos + 1 ) );
3375  if ( $value === false ) {
3376  $value = '';
3377  }
3378  if ( $name !== false ) {
3379  $assocArgs[$name] = $value;
3380  }
3381  }
3382  }
3383 
3384  return $assocArgs;
3385  }
3386 
3413  public function limitationWarn( $limitationType, $current = '', $max = '' ) {
3414  # does no harm if $current and $max are present but are unnecessary for the message
3415  # Not doing ->inLanguage( $this->mOptions->getUserLangObj() ), since this is shown
3416  # only during preview, and that would split the parser cache unnecessarily.
3417  $warning = wfMessage( "$limitationType-warning" )->numParams( $current, $max )
3418  ->text();
3419  $this->mOutput->addWarning( $warning );
3420  $this->addTrackingCategory( "$limitationType-category" );
3421  }
3422 
3435  public function braceSubstitution( $piece, $frame ) {
3436 
3437  // Flags
3438 
3439  // $text has been filled
3440  $found = false;
3441  // wiki markup in $text should be escaped
3442  $nowiki = false;
3443  // $text is HTML, armour it against wikitext transformation
3444  $isHTML = false;
3445  // Force interwiki transclusion to be done in raw mode not rendered
3446  $forceRawInterwiki = false;
3447  // $text is a DOM node needing expansion in a child frame
3448  $isChildObj = false;
3449  // $text is a DOM node needing expansion in the current frame
3450  $isLocalObj = false;
3451 
3452  # Title object, where $text came from
3453  $title = false;
3454 
3455  # $part1 is the bit before the first |, and must contain only title characters.
3456  # Various prefixes will be stripped from it later.
3457  $titleWithSpaces = $frame->expand( $piece['title'] );
3458  $part1 = trim( $titleWithSpaces );
3459  $titleText = false;
3460 
3461  # Original title text preserved for various purposes
3462  $originalTitle = $part1;
3463 
3464  # $args is a list of argument nodes, starting from index 0, not including $part1
3465  # @todo FIXME: If piece['parts'] is null then the call to getLength()
3466  # below won't work b/c this $args isn't an object
3467  $args = ( null == $piece['parts'] ) ? array() : $piece['parts'];
3468 
3469  $profileSection = null; // profile templates
3470 
3471  # SUBST
3472  if ( !$found ) {
3473  $substMatch = $this->mSubstWords->matchStartAndRemove( $part1 );
3474 
3475  # Possibilities for substMatch: "subst", "safesubst" or FALSE
3476  # Decide whether to expand template or keep wikitext as-is.
3477  if ( $this->ot['wiki'] ) {
3478  if ( $substMatch === false ) {
3479  $literal = true; # literal when in PST with no prefix
3480  } else {
3481  $literal = false; # expand when in PST with subst: or safesubst:
3482  }
3483  } else {
3484  if ( $substMatch == 'subst' ) {
3485  $literal = true; # literal when not in PST with plain subst:
3486  } else {
3487  $literal = false; # expand when not in PST with safesubst: or no prefix
3488  }
3489  }
3490  if ( $literal ) {
3491  $text = $frame->virtualBracketedImplode( '{{', '|', '}}', $titleWithSpaces, $args );
3492  $isLocalObj = true;
3493  $found = true;
3494  }
3495  }
3496 
3497  # Variables
3498  if ( !$found && $args->getLength() == 0 ) {
3499  $id = $this->mVariables->matchStartToEnd( $part1 );
3500  if ( $id !== false ) {
3501  $text = $this->getVariableValue( $id, $frame );
3502  if ( MagicWord::getCacheTTL( $id ) > -1 ) {
3503  $this->mOutput->updateCacheExpiry( MagicWord::getCacheTTL( $id ) );
3504  }
3505  $found = true;
3506  }
3507  }
3508 
3509  # MSG, MSGNW and RAW
3510  if ( !$found ) {
3511  # Check for MSGNW:
3512  $mwMsgnw = MagicWord::get( 'msgnw' );
3513  if ( $mwMsgnw->matchStartAndRemove( $part1 ) ) {
3514  $nowiki = true;
3515  } else {
3516  # Remove obsolete MSG:
3517  $mwMsg = MagicWord::get( 'msg' );
3518  $mwMsg->matchStartAndRemove( $part1 );
3519  }
3520 
3521  # Check for RAW:
3522  $mwRaw = MagicWord::get( 'raw' );
3523  if ( $mwRaw->matchStartAndRemove( $part1 ) ) {
3524  $forceRawInterwiki = true;
3525  }
3526  }
3527 
3528  # Parser functions
3529  if ( !$found ) {
3530  $colonPos = strpos( $part1, ':' );
3531  if ( $colonPos !== false ) {
3532  $func = substr( $part1, 0, $colonPos );
3533  $funcArgs = array( trim( substr( $part1, $colonPos + 1 ) ) );
3534  for ( $i = 0; $i < $args->getLength(); $i++ ) {
3535  $funcArgs[] = $args->item( $i );
3536  }
3537  try {
3538  $result = $this->callParserFunction( $frame, $func, $funcArgs );
3539  } catch ( Exception $ex ) {
3540  throw $ex;
3541  }
3542 
3543  # The interface for parser functions allows for extracting
3544  # flags into the local scope. Extract any forwarded flags
3545  # here.
3546  extract( $result );
3547  }
3548  }
3549 
3550  # Finish mangling title and then check for loops.
3551  # Set $title to a Title object and $titleText to the PDBK
3552  if ( !$found ) {
3553  $ns = NS_TEMPLATE;
3554  # Split the title into page and subpage
3555  $subpage = '';
3556  $relative = $this->maybeDoSubpageLink( $part1, $subpage );
3557  if ( $part1 !== $relative ) {
3558  $part1 = $relative;
3559  $ns = $this->mTitle->getNamespace();
3560  }
3561  $title = Title::newFromText( $part1, $ns );
3562  if ( $title ) {
3563  $titleText = $title->getPrefixedText();
3564  # Check for language variants if the template is not found
3565  if ( $this->getConverterLanguage()->hasVariants() && $title->getArticleID() == 0 ) {
3566  $this->getConverterLanguage()->findVariantLink( $part1, $title, true );
3567  }
3568  # Do recursion depth check
3569  $limit = $this->mOptions->getMaxTemplateDepth();
3570  if ( $frame->depth >= $limit ) {
3571  $found = true;
3572  $text = '<span class="error">'
3573  . wfMessage( 'parser-template-recursion-depth-warning' )
3574  ->numParams( $limit )->inContentLanguage()->text()
3575  . '</span>';
3576  }
3577  }
3578  }
3579 
3580  # Load from database
3581  if ( !$found && $title ) {
3582  $profileSection = $this->mProfiler->scopedProfileIn( $title->getPrefixedDBkey() );
3583  if ( !$title->isExternal() ) {
3584  if ( $title->isSpecialPage()
3585  && $this->mOptions->getAllowSpecialInclusion()
3586  && $this->ot['html']
3587  ) {
3588  // Pass the template arguments as URL parameters.
3589  // "uselang" will have no effect since the Language object
3590  // is forced to the one defined in ParserOptions.
3591  $pageArgs = array();
3592  $argsLength = $args->getLength();
3593  for ( $i = 0; $i < $argsLength; $i++ ) {
3594  $bits = $args->item( $i )->splitArg();
3595  if ( strval( $bits['index'] ) === '' ) {
3596  $name = trim( $frame->expand( $bits['name'], PPFrame::STRIP_COMMENTS ) );
3597  $value = trim( $frame->expand( $bits['value'] ) );
3598  $pageArgs[$name] = $value;
3599  }
3600  }
3601 
3602  // Create a new context to execute the special page
3603  $context = new RequestContext;
3604  $context->setTitle( $title );
3605  $context->setRequest( new FauxRequest( $pageArgs ) );
3606  $context->setUser( $this->getUser() );
3607  $context->setLanguage( $this->mOptions->getUserLangObj() );
3608  $ret = SpecialPageFactory::capturePath( $title, $context );
3609  if ( $ret ) {
3610  $text = $context->getOutput()->getHTML();
3611  $this->mOutput->addOutputPageMetadata( $context->getOutput() );
3612  $found = true;
3613  $isHTML = true;
3614  $this->disableCache();
3615  }
3616  } elseif ( MWNamespace::isNonincludable( $title->getNamespace() ) ) {
3617  $found = false; # access denied
3618  wfDebug( __METHOD__ . ": template inclusion denied for " .
3619  $title->getPrefixedDBkey() . "\n" );
3620  } else {
3621  list( $text, $title ) = $this->getTemplateDom( $title );
3622  if ( $text !== false ) {
3623  $found = true;
3624  $isChildObj = true;
3625  }
3626  }
3627 
3628  # If the title is valid but undisplayable, make a link to it
3629  if ( !$found && ( $this->ot['html'] || $this->ot['pre'] ) ) {
3630  $text = "[[:$titleText]]";
3631  $found = true;
3632  }
3633  } elseif ( $title->isTrans() ) {
3634  # Interwiki transclusion
3635  if ( $this->ot['html'] && !$forceRawInterwiki ) {
3636  $text = $this->interwikiTransclude( $title, 'render' );
3637  $isHTML = true;
3638  } else {
3639  $text = $this->interwikiTransclude( $title, 'raw' );
3640  # Preprocess it like a template
3641  $text = $this->preprocessToDom( $text, self::PTD_FOR_INCLUSION );
3642  $isChildObj = true;
3643  }
3644  $found = true;
3645  }
3646 
3647  # Do infinite loop check
3648  # This has to be done after redirect resolution to avoid infinite loops via redirects
3649  if ( !$frame->loopCheck( $title ) ) {
3650  $found = true;
3651  $text = '<span class="error">'
3652  . wfMessage( 'parser-template-loop-warning', $titleText )->inContentLanguage()->text()
3653  . '</span>';
3654  wfDebug( __METHOD__ . ": template loop broken at '$titleText'\n" );
3655  }
3656  }
3657 
3658  # If we haven't found text to substitute by now, we're done
3659  # Recover the source wikitext and return it
3660  if ( !$found ) {
3661  $text = $frame->virtualBracketedImplode( '{{', '|', '}}', $titleWithSpaces, $args );
3662  if ( $profileSection ) {
3663  $this->mProfiler->scopedProfileOut( $profileSection );
3664  }
3665  return array( 'object' => $text );
3666  }
3667 
3668  # Expand DOM-style return values in a child frame
3669  if ( $isChildObj ) {
3670  # Clean up argument array
3671  $newFrame = $frame->newChild( $args, $title );
3672 
3673  if ( $nowiki ) {
3674  $text = $newFrame->expand( $text, PPFrame::RECOVER_ORIG );
3675  } elseif ( $titleText !== false && $newFrame->isEmpty() ) {
3676  # Expansion is eligible for the empty-frame cache
3677  $text = $newFrame->cachedExpand( $titleText, $text );
3678  } else {
3679  # Uncached expansion
3680  $text = $newFrame->expand( $text );
3681  }
3682  }
3683  if ( $isLocalObj && $nowiki ) {
3684  $text = $frame->expand( $text, PPFrame::RECOVER_ORIG );
3685  $isLocalObj = false;
3686  }
3687 
3688  if ( $profileSection ) {
3689  $this->mProfiler->scopedProfileOut( $profileSection );
3690  }
3691 
3692  # Replace raw HTML by a placeholder
3693  if ( $isHTML ) {
3694  $text = $this->insertStripItem( $text );
3695  } elseif ( $nowiki && ( $this->ot['html'] || $this->ot['pre'] ) ) {
3696  # Escape nowiki-style return values
3697  $text = wfEscapeWikiText( $text );
3698  } elseif ( is_string( $text )
3699  && !$piece['lineStart']
3700  && preg_match( '/^(?:{\\||:|;|#|\*)/', $text )
3701  ) {
3702  # Bug 529: if the template begins with a table or block-level
3703  # element, it should be treated as beginning a new line.
3704  # This behavior is somewhat controversial.
3705  $text = "\n" . $text;
3706  }
3707 
3708  if ( is_string( $text ) && !$this->incrementIncludeSize( 'post-expand', strlen( $text ) ) ) {
3709  # Error, oversize inclusion
3710  if ( $titleText !== false ) {
3711  # Make a working, properly escaped link if possible (bug 23588)
3712  $text = "[[:$titleText]]";
3713  } else {
3714  # This will probably not be a working link, but at least it may
3715  # provide some hint of where the problem is
3716  preg_replace( '/^:/', '', $originalTitle );
3717  $text = "[[:$originalTitle]]";
3718  }
3719  $text .= $this->insertStripItem( '<!-- WARNING: template omitted, '
3720  . 'post-expand include size too large -->' );
3721  $this->limitationWarn( 'post-expand-template-inclusion' );
3722  }
3723 
3724  if ( $isLocalObj ) {
3725  $ret = array( 'object' => $text );
3726  } else {
3727  $ret = array( 'text' => $text );
3728  }
3729 
3730  return $ret;
3731  }
3732 
3752  public function callParserFunction( $frame, $function, array $args = array() ) {
3754 
3755  # Case sensitive functions
3756  if ( isset( $this->mFunctionSynonyms[1][$function] ) ) {
3757  $function = $this->mFunctionSynonyms[1][$function];
3758  } else {
3759  # Case insensitive functions
3760  $function = $wgContLang->lc( $function );
3761  if ( isset( $this->mFunctionSynonyms[0][$function] ) ) {
3762  $function = $this->mFunctionSynonyms[0][$function];
3763  } else {
3764  return array( 'found' => false );
3765  }
3766  }
3767 
3768  list( $callback, $flags ) = $this->mFunctionHooks[$function];
3769 
3770  # Workaround for PHP bug 35229 and similar
3771  if ( !is_callable( $callback ) ) {
3772  throw new MWException( "Tag hook for $function is not callable\n" );
3773  }
3774 
3775  $allArgs = array( &$this );
3776  if ( $flags & self::SFH_OBJECT_ARGS ) {
3777  # Convert arguments to PPNodes and collect for appending to $allArgs
3778  $funcArgs = array();
3779  foreach ( $args as $k => $v ) {
3780  if ( $v instanceof PPNode || $k === 0 ) {
3781  $funcArgs[] = $v;
3782  } else {
3783  $funcArgs[] = $this->mPreprocessor->newPartNodeArray( array( $k => $v ) )->item( 0 );
3784  }
3785  }
3786 
3787  # Add a frame parameter, and pass the arguments as an array
3788  $allArgs[] = $frame;
3789  $allArgs[] = $funcArgs;
3790  } else {
3791  # Convert arguments to plain text and append to $allArgs
3792  foreach ( $args as $k => $v ) {
3793  if ( $v instanceof PPNode ) {
3794  $allArgs[] = trim( $frame->expand( $v ) );
3795  } elseif ( is_int( $k ) && $k >= 0 ) {
3796  $allArgs[] = trim( $v );
3797  } else {
3798  $allArgs[] = trim( "$k=$v" );
3799  }
3800  }
3801  }
3802 
3803  $result = call_user_func_array( $callback, $allArgs );
3804 
3805  # The interface for function hooks allows them to return a wikitext
3806  # string or an array containing the string and any flags. This mungs
3807  # things around to match what this method should return.
3808  if ( !is_array( $result ) ) {
3809  $result = array(
3810  'found' => true,
3811  'text' => $result,
3812  );
3813  } else {
3814  if ( isset( $result[0] ) && !isset( $result['text'] ) ) {
3815  $result['text'] = $result[0];
3816  }
3817  unset( $result[0] );
3818  $result += array(
3819  'found' => true,
3820  );
3821  }
3822 
3823  $noparse = true;
3824  $preprocessFlags = 0;
3825  if ( isset( $result['noparse'] ) ) {
3826  $noparse = $result['noparse'];
3827  }
3828  if ( isset( $result['preprocessFlags'] ) ) {
3829  $preprocessFlags = $result['preprocessFlags'];
3830  }
3831 
3832  if ( !$noparse ) {
3833  $result['text'] = $this->preprocessToDom( $result['text'], $preprocessFlags );
3834  $result['isChildObj'] = true;
3835  }
3836 
3837  return $result;
3838  }
3839 
3848  public function getTemplateDom( $title ) {
3849  $cacheTitle = $title;
3850  $titleText = $title->getPrefixedDBkey();
3851 
3852  if ( isset( $this->mTplRedirCache[$titleText] ) ) {
3853  list( $ns, $dbk ) = $this->mTplRedirCache[$titleText];
3854  $title = Title::makeTitle( $ns, $dbk );
3855  $titleText = $title->getPrefixedDBkey();
3856  }
3857  if ( isset( $this->mTplDomCache[$titleText] ) ) {
3858  return array( $this->mTplDomCache[$titleText], $title );
3859  }
3860 
3861  # Cache miss, go to the database
3862  list( $text, $title ) = $this->fetchTemplateAndTitle( $title );
3863 
3864  if ( $text === false ) {
3865  $this->mTplDomCache[$titleText] = false;
3866  return array( false, $title );
3867  }
3868 
3869  $dom = $this->preprocessToDom( $text, self::PTD_FOR_INCLUSION );
3870  $this->mTplDomCache[$titleText] = $dom;
3871 
3872  if ( !$title->equals( $cacheTitle ) ) {
3873  $this->mTplRedirCache[$cacheTitle->getPrefixedDBkey()] =
3874  array( $title->getNamespace(), $cdb = $title->getDBkey() );
3875  }
3876 
3877  return array( $dom, $title );
3878  }
3879 
3891  public function fetchCurrentRevisionOfTitle( $title ) {
3892  $cacheKey = $title->getPrefixedDBkey();
3893  if ( !$this->currentRevisionCache ) {
3894  $this->currentRevisionCache = new MapCacheLRU( 100 );
3895  }
3896  if ( !$this->currentRevisionCache->has( $cacheKey ) ) {
3897  $this->currentRevisionCache->set( $cacheKey,
3898  // Defaults to Parser::statelessFetchRevision()
3899  call_user_func( $this->mOptions->getCurrentRevisionCallback(), $title, $this )
3900  );
3901  }
3902  return $this->currentRevisionCache->get( $cacheKey );
3903  }
3904 
3914  public static function statelessFetchRevision( $title, $parser = false ) {
3915  return Revision::newFromTitle( $title );
3916  }
3917 
3923  public function fetchTemplateAndTitle( $title ) {
3924  // Defaults to Parser::statelessFetchTemplate()
3925  $templateCb = $this->mOptions->getTemplateCallback();
3926  $stuff = call_user_func( $templateCb, $title, $this );
3927  // We use U+007F DELETE to distinguish strip markers from regular text.
3928  $text = $stuff['text'];
3929  if ( is_string( $stuff['text'] ) ) {
3930  $text = strtr( $text, "\x7f", "?" );
3931  }
3932  $finalTitle = isset( $stuff['finalTitle'] ) ? $stuff['finalTitle'] : $title;
3933  if ( isset( $stuff['deps'] ) ) {
3934  foreach ( $stuff['deps'] as $dep ) {
3935  $this->mOutput->addTemplate( $dep['title'], $dep['page_id'], $dep['rev_id'] );
3936  if ( $dep['title']->equals( $this->getTitle() ) ) {
3937  // If we transclude ourselves, the final result
3938  // will change based on the new version of the page
3939  $this->mOutput->setFlag( 'vary-revision' );
3940  }
3941  }
3942  }
3943  return array( $text, $finalTitle );
3944  }
3945 
3951  public function fetchTemplate( $title ) {
3952  $rv = $this->fetchTemplateAndTitle( $title );
3953  return $rv[0];
3954  }
3955 
3965  public static function statelessFetchTemplate( $title, $parser = false ) {
3966  $text = $skip = false;
3967  $finalTitle = $title;
3968  $deps = array();
3969 
3970  # Loop to fetch the article, with up to 1 redirect
3971  for ( $i = 0; $i < 2 && is_object( $title ); $i++ ) {
3972  # Give extensions a chance to select the revision instead
3973  $id = false; # Assume current
3974  Hooks::run( 'BeforeParserFetchTemplateAndtitle',
3975  array( $parser, $title, &$skip, &$id ) );
3976 
3977  if ( $skip ) {
3978  $text = false;
3979  $deps[] = array(
3980  'title' => $title,
3981  'page_id' => $title->getArticleID(),
3982  'rev_id' => null
3983  );
3984  break;
3985  }
3986  # Get the revision
3987  if ( $id ) {
3988  $rev = Revision::newFromId( $id );
3989  } elseif ( $parser ) {
3990  $rev = $parser->fetchCurrentRevisionOfTitle( $title );
3991  } else {
3992  $rev = Revision::newFromTitle( $title );
3993  }
3994  $rev_id = $rev ? $rev->getId() : 0;
3995  # If there is no current revision, there is no page
3996  if ( $id === false && !$rev ) {
3997  $linkCache = LinkCache::singleton();
3998  $linkCache->addBadLinkObj( $title );
3999  }
4000 
4001  $deps[] = array(
4002  'title' => $title,
4003  'page_id' => $title->getArticleID(),
4004  'rev_id' => $rev_id );
4005  if ( $rev && !$title->equals( $rev->getTitle() ) ) {
4006  # We fetched a rev from a different title; register it too...
4007  $deps[] = array(
4008  'title' => $rev->getTitle(),
4009  'page_id' => $rev->getPage(),
4010  'rev_id' => $rev_id );
4011  }
4012 
4013  if ( $rev ) {
4014  $content = $rev->getContent();
4015  $text = $content ? $content->getWikitextForTransclusion() : null;
4016 
4017  if ( $text === false || $text === null ) {
4018  $text = false;
4019  break;
4020  }
4021  } elseif ( $title->getNamespace() == NS_MEDIAWIKI ) {
4023  $message = wfMessage( $wgContLang->lcfirst( $title->getText() ) )->inContentLanguage();
4024  if ( !$message->exists() ) {
4025  $text = false;
4026  break;
4027  }
4028  $content = $message->content();
4029  $text = $message->plain();
4030  } else {
4031  break;
4032  }
4033  if ( !$content ) {
4034  break;
4035  }
4036  # Redirect?
4037  $finalTitle = $title;
4038  $title = $content->getRedirectTarget();
4039  }
4040  return array(
4041  'text' => $text,
4042  'finalTitle' => $finalTitle,
4043  'deps' => $deps );
4044  }
4045 
4053  public function fetchFile( $title, $options = array() ) {
4054  $res = $this->fetchFileAndTitle( $title, $options );
4055  return $res[0];
4056  }
4057 
4065  public function fetchFileAndTitle( $title, $options = array() ) {
4066  $file = $this->fetchFileNoRegister( $title, $options );
4067 
4068  $time = $file ? $file->getTimestamp() : false;
4069  $sha1 = $file ? $file->getSha1() : false;
4070  # Register the file as a dependency...
4071  $this->mOutput->addImage( $title->getDBkey(), $time, $sha1 );
4072  if ( $file && !$title->equals( $file->getTitle() ) ) {
4073  # Update fetched file title
4074  $title = $file->getTitle();
4075  $this->mOutput->addImage( $title->getDBkey(), $time, $sha1 );
4076  }
4077  return array( $file, $title );
4078  }
4079 
4090  protected function fetchFileNoRegister( $title, $options = array() ) {
4091  if ( isset( $options['broken'] ) ) {
4092  $file = false; // broken thumbnail forced by hook
4093  } elseif ( isset( $options['sha1'] ) ) { // get by (sha1,timestamp)
4094  $file = RepoGroup::singleton()->findFileFromKey( $options['sha1'], $options );
4095  } else { // get by (name,timestamp)
4096  $file = wfFindFile( $title, $options );
4097  }
4098  return $file;
4099  }
4100 
4109  public function interwikiTransclude( $title, $action ) {
4111 
4112  if ( !$wgEnableScaryTranscluding ) {
4113  return wfMessage( 'scarytranscludedisabled' )->inContentLanguage()->text();
4114  }
4115 
4116  $url = $title->getFullURL( array( 'action' => $action ) );
4117 
4118  if ( strlen( $url ) > 255 ) {
4119  return wfMessage( 'scarytranscludetoolong' )->inContentLanguage()->text();
4120  }
4121  return $this->fetchScaryTemplateMaybeFromCache( $url );
4122  }
4123 
4128  public function fetchScaryTemplateMaybeFromCache( $url ) {
4130  $dbr = wfGetDB( DB_SLAVE );
4131  $tsCond = $dbr->timestamp( time() - $wgTranscludeCacheExpiry );
4132  $obj = $dbr->selectRow( 'transcache', array( 'tc_time', 'tc_contents' ),
4133  array( 'tc_url' => $url, "tc_time >= " . $dbr->addQuotes( $tsCond ) ) );
4134  if ( $obj ) {
4135  return $obj->tc_contents;
4136  }
4137 
4138  $req = MWHttpRequest::factory( $url, array(), __METHOD__ );
4139  $status = $req->execute(); // Status object
4140  if ( $status->isOK() ) {
4141  $text = $req->getContent();
4142  } elseif ( $req->getStatus() != 200 ) {
4143  // Though we failed to fetch the content, this status is useless.
4144  return wfMessage( 'scarytranscludefailed-httpstatus' )
4145  ->params( $url, $req->getStatus() /* HTTP status */ )->inContentLanguage()->text();
4146  } else {
4147  return wfMessage( 'scarytranscludefailed', $url )->inContentLanguage()->text();
4148  }
4149 
4150  $dbw = wfGetDB( DB_MASTER );
4151  $dbw->replace( 'transcache', array( 'tc_url' ), array(
4152  'tc_url' => $url,
4153  'tc_time' => $dbw->timestamp( time() ),
4154  'tc_contents' => $text
4155  ) );
4156  return $text;
4157  }
4158 
4168  public function argSubstitution( $piece, $frame ) {
4169 
4170  $error = false;
4171  $parts = $piece['parts'];
4172  $nameWithSpaces = $frame->expand( $piece['title'] );
4173  $argName = trim( $nameWithSpaces );
4174  $object = false;
4175  $text = $frame->getArgument( $argName );
4176  if ( $text === false && $parts->getLength() > 0
4177  && ( $this->ot['html']
4178  || $this->ot['pre']
4179  || ( $this->ot['wiki'] && $frame->isTemplate() )
4180  )
4181  ) {
4182  # No match in frame, use the supplied default
4183  $object = $parts->item( 0 )->getChildren();
4184  }
4185  if ( !$this->incrementIncludeSize( 'arg', strlen( $text ) ) ) {
4186  $error = '<!-- WARNING: argument omitted, expansion size too large -->';
4187  $this->limitationWarn( 'post-expand-template-argument' );
4188  }
4189 
4190  if ( $text === false && $object === false ) {
4191  # No match anywhere
4192  $object = $frame->virtualBracketedImplode( '{{{', '|', '}}}', $nameWithSpaces, $parts );
4193  }
4194  if ( $error !== false ) {
4195  $text .= $error;
4196  }
4197  if ( $object !== false ) {
4198  $ret = array( 'object' => $object );
4199  } else {
4200  $ret = array( 'text' => $text );
4201  }
4202 
4203  return $ret;
4204  }
4205 
4221  public function extensionSubstitution( $params, $frame ) {
4222  $name = $frame->expand( $params['name'] );
4223  $attrText = !isset( $params['attr'] ) ? null : $frame->expand( $params['attr'] );
4224  $content = !isset( $params['inner'] ) ? null : $frame->expand( $params['inner'] );
4225  $marker = self::MARKER_PREFIX . "-$name-"
4226  . sprintf( '%08X', $this->mMarkerIndex++ ) . self::MARKER_SUFFIX;
4227 
4228  $isFunctionTag = isset( $this->mFunctionTagHooks[strtolower( $name )] ) &&
4229  ( $this->ot['html'] || $this->ot['pre'] );
4230  if ( $isFunctionTag ) {
4231  $markerType = 'none';
4232  } else {
4233  $markerType = 'general';
4234  }
4235  if ( $this->ot['html'] || $isFunctionTag ) {
4236  $name = strtolower( $name );
4237  $attributes = Sanitizer::decodeTagAttributes( $attrText );
4238  if ( isset( $params['attributes'] ) ) {
4239  $attributes = $attributes + $params['attributes'];
4240  }
4241 
4242  if ( isset( $this->mTagHooks[$name] ) ) {
4243  # Workaround for PHP bug 35229 and similar
4244  if ( !is_callable( $this->mTagHooks[$name] ) ) {
4245  throw new MWException( "Tag hook for $name is not callable\n" );
4246  }
4247  $output = call_user_func_array( $this->mTagHooks[$name],
4248  array( $content, $attributes, $this, $frame ) );
4249  } elseif ( isset( $this->mFunctionTagHooks[$name] ) ) {
4250  list( $callback, ) = $this->mFunctionTagHooks[$name];
4251  if ( !is_callable( $callback ) ) {
4252  throw new MWException( "Tag hook for $name is not callable\n" );
4253  }
4254 
4255  $output = call_user_func_array( $callback, array( &$this, $frame, $content, $attributes ) );
4256  } else {
4257  $output = '<span class="error">Invalid tag extension name: ' .
4258  htmlspecialchars( $name ) . '</span>';
4259  }
4260 
4261  if ( is_array( $output ) ) {
4262  # Extract flags to local scope (to override $markerType)
4263  $flags = $output;
4264  $output = $flags[0];
4265  unset( $flags[0] );
4266  extract( $flags );
4267  }
4268  } else {
4269  if ( is_null( $attrText ) ) {
4270  $attrText = '';
4271  }
4272  if ( isset( $params['attributes'] ) ) {
4273  foreach ( $params['attributes'] as $attrName => $attrValue ) {
4274  $attrText .= ' ' . htmlspecialchars( $attrName ) . '="' .
4275  htmlspecialchars( $attrValue ) . '"';
4276  }
4277  }
4278  if ( $content === null ) {
4279  $output = "<$name$attrText/>";
4280  } else {
4281  $close = is_null( $params['close'] ) ? '' : $frame->expand( $params['close'] );
4282  $output = "<$name$attrText>$content$close";
4283  }
4284  }
4285 
4286  if ( $markerType === 'none' ) {
4287  return $output;
4288  } elseif ( $markerType === 'nowiki' ) {
4289  $this->mStripState->addNoWiki( $marker, $output );
4290  } elseif ( $markerType === 'general' ) {
4291  $this->mStripState->addGeneral( $marker, $output );
4292  } else {
4293  throw new MWException( __METHOD__ . ': invalid marker type' );
4294  }
4295  return $marker;
4296  }
4297 
4305  public function incrementIncludeSize( $type, $size ) {
4306  if ( $this->mIncludeSizes[$type] + $size > $this->mOptions->getMaxIncludeSize() ) {
4307  return false;
4308  } else {
4309  $this->mIncludeSizes[$type] += $size;
4310  return true;
4311  }
4312  }
4313 
4320  $this->mExpensiveFunctionCount++;
4321  return $this->mExpensiveFunctionCount <= $this->mOptions->getExpensiveParserFunctionLimit();
4322  }
4323 
4332  public function doDoubleUnderscore( $text ) {
4333 
4334  # The position of __TOC__ needs to be recorded
4335  $mw = MagicWord::get( 'toc' );
4336  if ( $mw->match( $text ) ) {
4337  $this->mShowToc = true;
4338  $this->mForceTocPosition = true;
4339 
4340  # Set a placeholder. At the end we'll fill it in with the TOC.
4341  $text = $mw->replace( '<!--MWTOC-->', $text, 1 );
4342 
4343  # Only keep the first one.
4344  $text = $mw->replace( '', $text );
4345  }
4346 
4347  # Now match and remove the rest of them
4349  $this->mDoubleUnderscores = $mwa->matchAndRemove( $text );
4350 
4351  if ( isset( $this->mDoubleUnderscores['nogallery'] ) ) {
4352  $this->mOutput->mNoGallery = true;
4353  }
4354  if ( isset( $this->mDoubleUnderscores['notoc'] ) && !$this->mForceTocPosition ) {
4355  $this->mShowToc = false;
4356  }
4357  if ( isset( $this->mDoubleUnderscores['hiddencat'] )
4358  && $this->mTitle->getNamespace() == NS_CATEGORY
4359  ) {
4360  $this->addTrackingCategory( 'hidden-category-category' );
4361  }
4362  # (bug 8068) Allow control over whether robots index a page.
4363  #
4364  # @todo FIXME: Bug 14899: __INDEX__ always overrides __NOINDEX__ here! This
4365  # is not desirable, the last one on the page should win.
4366  if ( isset( $this->mDoubleUnderscores['noindex'] ) && $this->mTitle->canUseNoindex() ) {
4367  $this->mOutput->setIndexPolicy( 'noindex' );
4368  $this->addTrackingCategory( 'noindex-category' );
4369  }
4370  if ( isset( $this->mDoubleUnderscores['index'] ) && $this->mTitle->canUseNoindex() ) {
4371  $this->mOutput->setIndexPolicy( 'index' );
4372  $this->addTrackingCategory( 'index-category' );
4373  }
4374 
4375  # Cache all double underscores in the database
4376  foreach ( $this->mDoubleUnderscores as $key => $val ) {
4377  $this->mOutput->setProperty( $key, '' );
4378  }
4379 
4380  return $text;
4381  }
4382 
4388  public function addTrackingCategory( $msg ) {
4389  return $this->mOutput->addTrackingCategory( $msg, $this->mTitle );
4390  }
4391 
4408  public function formatHeadings( $text, $origText, $isMain = true ) {
4409  global $wgMaxTocLevel, $wgExperimentalHtmlIds;
4410 
4411  # Inhibit editsection links if requested in the page
4412  if ( isset( $this->mDoubleUnderscores['noeditsection'] ) ) {
4413  $maybeShowEditLink = $showEditLink = false;
4414  } else {
4415  $maybeShowEditLink = true; /* Actual presence will depend on ParserOptions option */
4416  $showEditLink = $this->mOptions->getEditSection();
4417  }
4418  if ( $showEditLink ) {
4419  $this->mOutput->setEditSectionTokens( true );
4420  }
4421 
4422  # Get all headlines for numbering them and adding funky stuff like [edit]
4423  # links - this is for later, but we need the number of headlines right now
4424  $matches = array();
4425  $numMatches = preg_match_all(
4426  '/<H(?P<level>[1-6])(?P<attrib>.*?>)\s*(?P<header>[\s\S]*?)\s*<\/H[1-6] *>/i',
4427  $text,
4428  $matches
4429  );
4430 
4431  # if there are fewer than 4 headlines in the article, do not show TOC
4432  # unless it's been explicitly enabled.
4433  $enoughToc = $this->mShowToc &&
4434  ( ( $numMatches >= 4 ) || $this->mForceTocPosition );
4435 
4436  # Allow user to stipulate that a page should have a "new section"
4437  # link added via __NEWSECTIONLINK__
4438  if ( isset( $this->mDoubleUnderscores['newsectionlink'] ) ) {
4439  $this->mOutput->setNewSection( true );
4440  }
4441 
4442  # Allow user to remove the "new section"
4443  # link via __NONEWSECTIONLINK__
4444  if ( isset( $this->mDoubleUnderscores['nonewsectionlink'] ) ) {
4445  $this->mOutput->hideNewSection( true );
4446  }
4447 
4448  # if the string __FORCETOC__ (not case-sensitive) occurs in the HTML,
4449  # override above conditions and always show TOC above first header
4450  if ( isset( $this->mDoubleUnderscores['forcetoc'] ) ) {
4451  $this->mShowToc = true;
4452  $enoughToc = true;
4453  }
4454 
4455  # headline counter
4456  $headlineCount = 0;
4457  $numVisible = 0;
4458 
4459  # Ugh .. the TOC should have neat indentation levels which can be
4460  # passed to the skin functions. These are determined here
4461  $toc = '';
4462  $full = '';
4463  $head = array();
4464  $sublevelCount = array();
4465  $levelCount = array();
4466  $level = 0;
4467  $prevlevel = 0;
4468  $toclevel = 0;
4469  $prevtoclevel = 0;
4470  $markerRegex = self::MARKER_PREFIX . "-h-(\d+)-" . self::MARKER_SUFFIX;
4471  $baseTitleText = $this->mTitle->getPrefixedDBkey();
4472  $oldType = $this->mOutputType;
4473  $this->setOutputType( self::OT_WIKI );
4474  $frame = $this->getPreprocessor()->newFrame();
4475  $root = $this->preprocessToDom( $origText );
4476  $node = $root->getFirstChild();
4477  $byteOffset = 0;
4478  $tocraw = array();
4479  $refers = array();
4480 
4481  $headlines = $numMatches !== false ? $matches[3] : array();
4482 
4483  foreach ( $headlines as $headline ) {
4484  $isTemplate = false;
4485  $titleText = false;
4486  $sectionIndex = false;
4487  $numbering = '';
4488  $markerMatches = array();
4489  if ( preg_match( "/^$markerRegex/", $headline, $markerMatches ) ) {
4490  $serial = $markerMatches[1];
4491  list( $titleText, $sectionIndex ) = $this->mHeadings[$serial];
4492  $isTemplate = ( $titleText != $baseTitleText );
4493  $headline = preg_replace( "/^$markerRegex\\s*/", "", $headline );
4494  }
4495 
4496  if ( $toclevel ) {
4497  $prevlevel = $level;
4498  }
4499  $level = $matches[1][$headlineCount];
4500 
4501  if ( $level > $prevlevel ) {
4502  # Increase TOC level
4503  $toclevel++;
4504  $sublevelCount[$toclevel] = 0;
4505  if ( $toclevel < $wgMaxTocLevel ) {
4506  $prevtoclevel = $toclevel;
4507  $toc .= Linker::tocIndent();
4508  $numVisible++;
4509  }
4510  } elseif ( $level < $prevlevel && $toclevel > 1 ) {
4511  # Decrease TOC level, find level to jump to
4512 
4513  for ( $i = $toclevel; $i > 0; $i-- ) {
4514  if ( $levelCount[$i] == $level ) {
4515  # Found last matching level
4516  $toclevel = $i;
4517  break;
4518  } elseif ( $levelCount[$i] < $level ) {
4519  # Found first matching level below current level
4520  $toclevel = $i + 1;
4521  break;
4522  }
4523  }
4524  if ( $i == 0 ) {
4525  $toclevel = 1;
4526  }
4527  if ( $toclevel < $wgMaxTocLevel ) {
4528  if ( $prevtoclevel < $wgMaxTocLevel ) {
4529  # Unindent only if the previous toc level was shown :p
4530  $toc .= Linker::tocUnindent( $prevtoclevel - $toclevel );
4531  $prevtoclevel = $toclevel;
4532  } else {
4533  $toc .= Linker::tocLineEnd();
4534  }
4535  }
4536  } else {
4537  # No change in level, end TOC line
4538  if ( $toclevel < $wgMaxTocLevel ) {
4539  $toc .= Linker::tocLineEnd();
4540  }
4541  }
4542 
4543  $levelCount[$toclevel] = $level;
4544 
4545  # count number of headlines for each level
4546  $sublevelCount[$toclevel]++;
4547  $dot = 0;
4548  for ( $i = 1; $i <= $toclevel; $i++ ) {
4549  if ( !empty( $sublevelCount[$i] ) ) {
4550  if ( $dot ) {
4551  $numbering .= '.';
4552  }
4553  $numbering .= $this->getTargetLanguage()->formatNum( $sublevelCount[$i] );
4554  $dot = 1;
4555  }
4556  }
4557 
4558  # The safe header is a version of the header text safe to use for links
4559 
4560  # Remove link placeholders by the link text.
4561  # <!--LINK number-->
4562  # turns into
4563  # link text with suffix
4564  # Do this before unstrip since link text can contain strip markers
4565  $safeHeadline = $this->replaceLinkHoldersText( $headline );
4566 
4567  # Avoid insertion of weird stuff like <math> by expanding the relevant sections
4568  $safeHeadline = $this->mStripState->unstripBoth( $safeHeadline );
4569 
4570  # Strip out HTML (first regex removes any tag not allowed)
4571  # Allowed tags are:
4572  # * <sup> and <sub> (bug 8393)
4573  # * <i> (bug 26375)
4574  # * <b> (r105284)
4575  # * <bdi> (bug 72884)
4576  # * <span dir="rtl"> and <span dir="ltr"> (bug 35167)
4577  #
4578  # We strip any parameter from accepted tags (second regex), except dir="rtl|ltr" from <span>,
4579  # to allow setting directionality in toc items.
4580  $tocline = preg_replace(
4581  array(
4582  '#<(?!/?(span|sup|sub|bdi|i|b)(?: [^>]*)?>).*?>#',
4583  '#<(/?(?:span(?: dir="(?:rtl|ltr)")?|sup|sub|bdi|i|b))(?: .*?)?>#'
4584  ),
4585  array( '', '<$1>' ),
4586  $safeHeadline
4587  );
4588 
4589  # Strip '<span></span>', which is the result from the above if
4590  # <span id="foo"></span> is used to produce an additional anchor
4591  # for a section.
4592  $tocline = str_replace( '<span></span>', '', $tocline );
4593 
4594  $tocline = trim( $tocline );
4595 
4596  # For the anchor, strip out HTML-y stuff period
4597  $safeHeadline = preg_replace( '/<.*?>/', '', $safeHeadline );
4598  $safeHeadline = Sanitizer::normalizeSectionNameWhitespace( $safeHeadline );
4599 
4600  # Save headline for section edit hint before it's escaped
4601  $headlineHint = $safeHeadline;
4602 
4603  if ( $wgExperimentalHtmlIds ) {
4604  # For reverse compatibility, provide an id that's
4605  # HTML4-compatible, like we used to.
4606  #
4607  # It may be worth noting, academically, that it's possible for
4608  # the legacy anchor to conflict with a non-legacy headline
4609  # anchor on the page. In this case likely the "correct" thing
4610  # would be to either drop the legacy anchors or make sure
4611  # they're numbered first. However, this would require people
4612  # to type in section names like "abc_.D7.93.D7.90.D7.A4"
4613  # manually, so let's not bother worrying about it.
4614  $legacyHeadline = Sanitizer::escapeId( $safeHeadline,
4615  array( 'noninitial', 'legacy' ) );
4616  $safeHeadline = Sanitizer::escapeId( $safeHeadline );
4617 
4618  if ( $legacyHeadline == $safeHeadline ) {
4619  # No reason to have both (in fact, we can't)
4620  $legacyHeadline = false;
4621  }
4622  } else {
4623  $legacyHeadline = false;
4624  $safeHeadline = Sanitizer::escapeId( $safeHeadline,
4625  'noninitial' );
4626  }
4627 
4628  # HTML names must be case-insensitively unique (bug 10721).
4629  # This does not apply to Unicode characters per
4630  # http://www.w3.org/TR/html5/infrastructure.html#case-sensitivity-and-string-comparison
4631  # @todo FIXME: We may be changing them depending on the current locale.
4632  $arrayKey = strtolower( $safeHeadline );
4633  if ( $legacyHeadline === false ) {
4634  $legacyArrayKey = false;
4635  } else {
4636  $legacyArrayKey = strtolower( $legacyHeadline );
4637  }
4638 
4639  # Create the anchor for linking from the TOC to the section
4640  $anchor = $safeHeadline;
4641  $legacyAnchor = $legacyHeadline;
4642  if ( isset( $refers[$arrayKey] ) ) {
4643  for ( $i = 2; isset( $refers["${arrayKey}_$i"] ); ++$i );
4644  $anchor .= "_$i";
4645  $refers["${arrayKey}_$i"] = true;
4646  } else {
4647  $refers[$arrayKey] = true;
4648  }
4649  if ( $legacyHeadline !== false && isset( $refers[$legacyArrayKey] ) ) {
4650  for ( $i = 2; isset( $refers["${legacyArrayKey}_$i"] ); ++$i );
4651  $legacyAnchor .= "_$i";
4652  $refers["${legacyArrayKey}_$i"] = true;
4653  } else {
4654  $refers[$legacyArrayKey] = true;
4655  }
4656 
4657  # Don't number the heading if it is the only one (looks silly)
4658  if ( count( $matches[3] ) > 1 && $this->mOptions->getNumberHeadings() ) {
4659  # the two are different if the line contains a link
4660  $headline = Html::element(
4661  'span',
4662  array( 'class' => 'mw-headline-number' ),
4663  $numbering
4664  ) . ' ' . $headline;
4665  }
4666 
4667  if ( $enoughToc && ( !isset( $wgMaxTocLevel ) || $toclevel < $wgMaxTocLevel ) ) {
4668  $toc .= Linker::tocLine( $anchor, $tocline,
4669  $numbering, $toclevel, ( $isTemplate ? false : $sectionIndex ) );
4670  }
4671 
4672  # Add the section to the section tree
4673  # Find the DOM node for this header
4674  $noOffset = ( $isTemplate || $sectionIndex === false );
4675  while ( $node && !$noOffset ) {
4676  if ( $node->getName() === 'h' ) {
4677  $bits = $node->splitHeading();
4678  if ( $bits['i'] == $sectionIndex ) {
4679  break;
4680  }
4681  }
4682  $byteOffset += mb_strlen( $this->mStripState->unstripBoth(
4683  $frame->expand( $node, PPFrame::RECOVER_ORIG ) ) );
4684  $node = $node->getNextSibling();
4685  }
4686  $tocraw[] = array(
4687  'toclevel' => $toclevel,
4688  'level' => $level,
4689  'line' => $tocline,
4690  'number' => $numbering,
4691  'index' => ( $isTemplate ? 'T-' : '' ) . $sectionIndex,
4692  'fromtitle' => $titleText,
4693  'byteoffset' => ( $noOffset ? null : $byteOffset ),
4694  'anchor' => $anchor,
4695  );
4696 
4697  # give headline the correct <h#> tag
4698  if ( $maybeShowEditLink && $sectionIndex !== false ) {
4699  // Output edit section links as markers with styles that can be customized by skins
4700  if ( $isTemplate ) {
4701  # Put a T flag in the section identifier, to indicate to extractSections()
4702  # that sections inside <includeonly> should be counted.
4703  $editsectionPage = $titleText;
4704  $editsectionSection = "T-$sectionIndex";
4705  $editsectionContent = null;
4706  } else {
4707  $editsectionPage = $this->mTitle->getPrefixedText();
4708  $editsectionSection = $sectionIndex;
4709  $editsectionContent = $headlineHint;
4710  }
4711  // We use a bit of pesudo-xml for editsection markers. The
4712  // language converter is run later on. Using a UNIQ style marker
4713  // leads to the converter screwing up the tokens when it
4714  // converts stuff. And trying to insert strip tags fails too. At
4715  // this point all real inputted tags have already been escaped,
4716  // so we don't have to worry about a user trying to input one of
4717  // these markers directly. We use a page and section attribute
4718  // to stop the language converter from converting these
4719  // important bits of data, but put the headline hint inside a
4720  // content block because the language converter is supposed to
4721  // be able to convert that piece of data.
4722  // Gets replaced with html in ParserOutput::getText
4723  $editlink = '<mw:editsection page="' . htmlspecialchars( $editsectionPage );
4724  $editlink .= '" section="' . htmlspecialchars( $editsectionSection ) . '"';
4725  if ( $editsectionContent !== null ) {
4726  $editlink .= '>' . $editsectionContent . '</mw:editsection>';
4727  } else {
4728  $editlink .= '/>';
4729  }
4730  } else {
4731  $editlink = '';
4732  }
4733  $head[$headlineCount] = Linker::makeHeadline( $level,
4734  $matches['attrib'][$headlineCount], $anchor, $headline,
4735  $editlink, $legacyAnchor );
4736 
4737  $headlineCount++;
4738  }
4739 
4740  $this->setOutputType( $oldType );
4741 
4742  # Never ever show TOC if no headers
4743  if ( $numVisible < 1 ) {
4744  $enoughToc = false;
4745  }
4746 
4747  if ( $enoughToc ) {
4748  if ( $prevtoclevel > 0 && $prevtoclevel < $wgMaxTocLevel ) {
4749  $toc .= Linker::tocUnindent( $prevtoclevel - 1 );
4750  }
4751  $toc = Linker::tocList( $toc, $this->mOptions->getUserLangObj() );
4752  $this->mOutput->setTOCHTML( $toc );
4753  $toc = self::TOC_START . $toc . self::TOC_END;
4754  $this->mOutput->addModules( 'mediawiki.toc' );
4755  }
4756 
4757  if ( $isMain ) {
4758  $this->mOutput->setSections( $tocraw );
4759  }
4760 
4761  # split up and insert constructed headlines
4762  $blocks = preg_split( '/<H[1-6].*?>[\s\S]*?<\/H[1-6]>/i', $text );
4763  $i = 0;
4764 
4765  // build an array of document sections
4766  $sections = array();
4767  foreach ( $blocks as $block ) {
4768  // $head is zero-based, sections aren't.
4769  if ( empty( $head[$i - 1] ) ) {
4770  $sections[$i] = $block;
4771  } else {
4772  $sections[$i] = $head[$i - 1] . $block;
4773  }
4774 
4785  Hooks::run( 'ParserSectionCreate', array( $this, $i, &$sections[$i], $showEditLink ) );
4786 
4787  $i++;
4788  }
4789 
4790  if ( $enoughToc && $isMain && !$this->mForceTocPosition ) {
4791  // append the TOC at the beginning
4792  // Top anchor now in skin
4793  $sections[0] = $sections[0] . $toc . "\n";
4794  }
4795 
4796  $full .= join( '', $sections );
4797 
4798  if ( $this->mForceTocPosition ) {
4799  return str_replace( '<!--MWTOC-->', $toc, $full );
4800  } else {
4801  return $full;
4802  }
4803  }
4804 
4816  public function preSaveTransform( $text, Title $title, User $user,
4817  ParserOptions $options, $clearState = true
4818  ) {
4819  if ( $clearState ) {
4820  $magicScopeVariable = $this->lock();
4821  }
4822  $this->startParse( $title, $options, self::OT_WIKI, $clearState );
4823  $this->setUser( $user );
4824 
4825  $pairs = array(
4826  "\r\n" => "\n",
4827  "\r" => "\n",
4828  );
4829  $text = str_replace( array_keys( $pairs ), array_values( $pairs ), $text );
4830  if ( $options->getPreSaveTransform() ) {
4831  $text = $this->pstPass2( $text, $user );
4832  }
4833  $text = $this->mStripState->unstripBoth( $text );
4834 
4835  $this->setUser( null ); #Reset
4836 
4837  return $text;
4838  }
4839 
4848  private function pstPass2( $text, $user ) {
4850 
4851  # Note: This is the timestamp saved as hardcoded wikitext to
4852  # the database, we use $wgContLang here in order to give
4853  # everyone the same signature and use the default one rather
4854  # than the one selected in each user's preferences.
4855  # (see also bug 12815)
4856  $ts = $this->mOptions->getTimestamp();
4858  $ts = $timestamp->format( 'YmdHis' );
4859  $tzMsg = $timestamp->getTimezoneMessage()->inContentLanguage()->text();
4860 
4861  $d = $wgContLang->timeanddate( $ts, false, false ) . " ($tzMsg)";
4862 
4863  # Variable replacement
4864  # Because mOutputType is OT_WIKI, this will only process {{subst:xxx}} type tags
4865  $text = $this->replaceVariables( $text );
4866 
4867  # This works almost by chance, as the replaceVariables are done before the getUserSig(),
4868  # which may corrupt this parser instance via its wfMessage()->text() call-
4869 
4870  # Signatures
4871  $sigText = $this->getUserSig( $user );
4872  $text = strtr( $text, array(
4873  '~~~~~' => $d,
4874  '~~~~' => "$sigText $d",
4875  '~~~' => $sigText
4876  ) );
4877 
4878  # Context links ("pipe tricks"): [[|name]] and [[name (context)|]]
4879  $tc = '[' . Title::legalChars() . ']';
4880  $nc = '[ _0-9A-Za-z\x80-\xff-]'; # Namespaces can use non-ascii!
4881 
4882  // [[ns:page (context)|]]
4883  $p1 = "/\[\[(:?$nc+:|:|)($tc+?)( ?\\($tc+\\))\\|]]/";
4884  // [[ns:page(context)|]] (double-width brackets, added in r40257)
4885  $p4 = "/\[\[(:?$nc+:|:|)($tc+?)( ?($tc+))\\|]]/";
4886  // [[ns:page (context), context|]] (using either single or double-width comma)
4887  $p3 = "/\[\[(:?$nc+:|:|)($tc+?)( ?\\($tc+\\)|)((?:, |,)$tc+|)\\|]]/";
4888  // [[|page]] (reverse pipe trick: add context from page title)
4889  $p2 = "/\[\[\\|($tc+)]]/";
4890 
4891  # try $p1 first, to turn "[[A, B (C)|]]" into "[[A, B (C)|A, B]]"
4892  $text = preg_replace( $p1, '[[\\1\\2\\3|\\2]]', $text );
4893  $text = preg_replace( $p4, '[[\\1\\2\\3|\\2]]', $text );
4894  $text = preg_replace( $p3, '[[\\1\\2\\3\\4|\\2]]', $text );
4895 
4896  $t = $this->mTitle->getText();
4897  $m = array();
4898  if ( preg_match( "/^($nc+:|)$tc+?( \\($tc+\\))$/", $t, $m ) ) {
4899  $text = preg_replace( $p2, "[[$m[1]\\1$m[2]|\\1]]", $text );
4900  } elseif ( preg_match( "/^($nc+:|)$tc+?(, $tc+|)$/", $t, $m ) && "$m[1]$m[2]" != '' ) {
4901  $text = preg_replace( $p2, "[[$m[1]\\1$m[2]|\\1]]", $text );
4902  } else {
4903  # if there's no context, don't bother duplicating the title
4904  $text = preg_replace( $p2, '[[\\1]]', $text );
4905  }
4906 
4907  # Trim trailing whitespace
4908  $text = rtrim( $text );
4909 
4910  return $text;
4911  }
4912 
4927  public function getUserSig( &$user, $nickname = false, $fancySig = null ) {
4929 
4930  $username = $user->getName();
4931 
4932  # If not given, retrieve from the user object.
4933  if ( $nickname === false ) {
4934  $nickname = $user->getOption( 'nickname' );
4935  }
4936 
4937  if ( is_null( $fancySig ) ) {
4938  $fancySig = $user->getBoolOption( 'fancysig' );
4939  }
4940 
4941  $nickname = $nickname == null ? $username : $nickname;
4942 
4943  if ( mb_strlen( $nickname ) > $wgMaxSigChars ) {
4944  $nickname = $username;
4945  wfDebug( __METHOD__ . ": $username has overlong signature.\n" );
4946  } elseif ( $fancySig !== false ) {
4947  # Sig. might contain markup; validate this
4948  if ( $this->validateSig( $nickname ) !== false ) {
4949  # Validated; clean up (if needed) and return it
4950  return $this->cleanSig( $nickname, true );
4951  } else {
4952  # Failed to validate; fall back to the default
4953  $nickname = $username;
4954  wfDebug( __METHOD__ . ": $username has bad XML tags in signature.\n" );
4955  }
4956  }
4957 
4958  # Make sure nickname doesnt get a sig in a sig
4959  $nickname = self::cleanSigInSig( $nickname );
4960 
4961  # If we're still here, make it a link to the user page
4962  $userText = wfEscapeWikiText( $username );
4963  $nickText = wfEscapeWikiText( $nickname );
4964  $msgName = $user->isAnon() ? 'signature-anon' : 'signature';
4965 
4966  return wfMessage( $msgName, $userText, $nickText )->inContentLanguage()
4967  ->title( $this->getTitle() )->text();
4968  }
4969 
4976  public function validateSig( $text ) {
4977  return Xml::isWellFormedXmlFragment( $text ) ? $text : false;
4978  }
4979 
4990  public function cleanSig( $text, $parsing = false ) {
4991  if ( !$parsing ) {
4992  global $wgTitle;
4993  $magicScopeVariable = $this->lock();
4994  $this->startParse( $wgTitle, new ParserOptions, self::OT_PREPROCESS, true );
4995  }
4996 
4997  # Option to disable this feature
4998  if ( !$this->mOptions->getCleanSignatures() ) {
4999  return $text;
5000  }
5001 
5002  # @todo FIXME: Regex doesn't respect extension tags or nowiki
5003  # => Move this logic to braceSubstitution()
5004  $substWord = MagicWord::get( 'subst' );
5005  $substRegex = '/\{\{(?!(?:' . $substWord->getBaseRegex() . '))/x' . $substWord->getRegexCase();
5006  $substText = '{{' . $substWord->getSynonym( 0 );
5007 
5008  $text = preg_replace( $substRegex, $substText, $text );
5009  $text = self::cleanSigInSig( $text );
5010  $dom = $this->preprocessToDom( $text );
5011  $frame = $this->getPreprocessor()->newFrame();
5012  $text = $frame->expand( $dom );
5013 
5014  if ( !$parsing ) {
5015  $text = $this->mStripState->unstripBoth( $text );
5016  }
5017 
5018  return $text;
5019  }
5020 
5027  public static function cleanSigInSig( $text ) {
5028  $text = preg_replace( '/~{3,5}/', '', $text );
5029  return $text;
5030  }
5031 
5041  public function startExternalParse( Title $title = null, ParserOptions $options,
5042  $outputType, $clearState = true
5043  ) {
5044  $this->startParse( $title, $options, $outputType, $clearState );
5045  }
5046 
5053  private function startParse( Title $title = null, ParserOptions $options,
5054  $outputType, $clearState = true
5055  ) {
5056  $this->setTitle( $title );
5057  $this->mOptions = $options;
5058  $this->setOutputType( $outputType );
5059  if ( $clearState ) {
5060  $this->clearState();
5061  }
5062  }
5063 
5072  public function transformMsg( $text, $options, $title = null ) {
5073  static $executing = false;
5074 
5075  # Guard against infinite recursion
5076  if ( $executing ) {
5077  return $text;
5078  }
5079  $executing = true;
5080 
5081  if ( !$title ) {
5082  global $wgTitle;
5083  $title = $wgTitle;
5084  }
5085 
5086  $text = $this->preprocess( $text, $title, $options );
5087 
5088  $executing = false;
5089  return $text;
5090  }
5091 
5116  public function setHook( $tag, $callback ) {
5117  $tag = strtolower( $tag );
5118  if ( preg_match( '/[<>\r\n]/', $tag, $m ) ) {
5119  throw new MWException( "Invalid character {$m[0]} in setHook('$tag', ...) call" );
5120  }
5121  $oldVal = isset( $this->mTagHooks[$tag] ) ? $this->mTagHooks[$tag] : null;
5122  $this->mTagHooks[$tag] = $callback;
5123  if ( !in_array( $tag, $this->mStripList ) ) {
5124  $this->mStripList[] = $tag;
5125  }
5126 
5127  return $oldVal;
5128  }
5129 
5147  public function setTransparentTagHook( $tag, $callback ) {
5148  $tag = strtolower( $tag );
5149  if ( preg_match( '/[<>\r\n]/', $tag, $m ) ) {
5150  throw new MWException( "Invalid character {$m[0]} in setTransparentHook('$tag', ...) call" );
5151  }
5152  $oldVal = isset( $this->mTransparentTagHooks[$tag] ) ? $this->mTransparentTagHooks[$tag] : null;
5153  $this->mTransparentTagHooks[$tag] = $callback;
5154 
5155  return $oldVal;
5156  }
5157 
5161  public function clearTagHooks() {
5162  $this->mTagHooks = array();
5163  $this->mFunctionTagHooks = array();
5164  $this->mStripList = $this->mDefaultStripList;
5165  }
5166 
5210  public function setFunctionHook( $id, $callback, $flags = 0 ) {
5212 
5213  $oldVal = isset( $this->mFunctionHooks[$id] ) ? $this->mFunctionHooks[$id][0] : null;
5214  $this->mFunctionHooks[$id] = array( $callback, $flags );
5215 
5216  # Add to function cache
5217  $mw = MagicWord::get( $id );
5218  if ( !$mw ) {
5219  throw new MWException( __METHOD__ . '() expecting a magic word identifier.' );
5220  }
5221 
5222  $synonyms = $mw->getSynonyms();
5223  $sensitive = intval( $mw->isCaseSensitive() );
5224 
5225  foreach ( $synonyms as $syn ) {
5226  # Case
5227  if ( !$sensitive ) {
5228  $syn = $wgContLang->lc( $syn );
5229  }
5230  # Add leading hash
5231  if ( !( $flags & self::SFH_NO_HASH ) ) {
5232  $syn = '#' . $syn;
5233  }
5234  # Remove trailing colon
5235  if ( substr( $syn, -1, 1 ) === ':' ) {
5236  $syn = substr( $syn, 0, -1 );
5237  }
5238  $this->mFunctionSynonyms[$sensitive][$syn] = $id;
5239  }
5240  return $oldVal;
5241  }
5242 
5248  public function getFunctionHooks() {
5249  return array_keys( $this->mFunctionHooks );
5250  }
5251 
5262  public function setFunctionTagHook( $tag, $callback, $flags ) {
5263  $tag = strtolower( $tag );
5264  if ( preg_match( '/[<>\r\n]/', $tag, $m ) ) {
5265  throw new MWException( "Invalid character {$m[0]} in setFunctionTagHook('$tag', ...) call" );
5266  }
5267  $old = isset( $this->mFunctionTagHooks[$tag] ) ?
5268  $this->mFunctionTagHooks[$tag] : null;
5269  $this->mFunctionTagHooks[$tag] = array( $callback, $flags );
5270 
5271  if ( !in_array( $tag, $this->mStripList ) ) {
5272  $this->mStripList[] = $tag;
5273  }
5274 
5275  return $old;
5276  }
5277 
5285  public function replaceLinkHolders( &$text, $options = 0 ) {
5286  $this->mLinkHolders->replace( $text );
5287  }
5288 
5296  public function replaceLinkHoldersText( $text ) {
5297  return $this->mLinkHolders->replaceText( $text );
5298  }
5299 
5313  public function renderImageGallery( $text, $params ) {
5314 
5315  $mode = false;
5316  if ( isset( $params['mode'] ) ) {
5317  $mode = $params['mode'];
5318  }
5319 
5320  try {
5321  $ig = ImageGalleryBase::factory( $mode );
5322  } catch ( Exception $e ) {
5323  // If invalid type set, fallback to default.
5324  $ig = ImageGalleryBase::factory( false );
5325  }
5326 
5327  $ig->setContextTitle( $this->mTitle );
5328  $ig->setShowBytes( false );
5329  $ig->setShowFilename( false );
5330  $ig->setParser( $this );
5331  $ig->setHideBadImages();
5332  $ig->setAttributes( Sanitizer::validateTagAttributes( $params, 'table' ) );
5333 
5334  if ( isset( $params['showfilename'] ) ) {
5335  $ig->setShowFilename( true );
5336  } else {
5337  $ig->setShowFilename( false );
5338  }
5339  if ( isset( $params['caption'] ) ) {
5340  $caption = $params['caption'];
5341  $caption = htmlspecialchars( $caption );
5342  $caption = $this->replaceInternalLinks( $caption );
5343  $ig->setCaptionHtml( $caption );
5344  }
5345  if ( isset( $params['perrow'] ) ) {
5346  $ig->setPerRow( $params['perrow'] );
5347  }
5348  if ( isset( $params['widths'] ) ) {
5349  $ig->setWidths( $params['widths'] );
5350  }
5351  if ( isset( $params['heights'] ) ) {
5352  $ig->setHeights( $params['heights'] );
5353  }
5354  $ig->setAdditionalOptions( $params );
5355 
5356  Hooks::run( 'BeforeParserrenderImageGallery', array( &$this, &$ig ) );
5357 
5358  $lines = StringUtils::explode( "\n", $text );
5359  foreach ( $lines as $line ) {
5360  # match lines like these:
5361  # Image:someimage.jpg|This is some image
5362  $matches = array();
5363  preg_match( "/^([^|]+)(\\|(.*))?$/", $line, $matches );
5364  # Skip empty lines
5365  if ( count( $matches ) == 0 ) {
5366  continue;
5367  }
5368 
5369  if ( strpos( $matches[0], '%' ) !== false ) {
5370  $matches[1] = rawurldecode( $matches[1] );
5371  }
5372  $title = Title::newFromText( $matches[1], NS_FILE );
5373  if ( is_null( $title ) ) {
5374  # Bogus title. Ignore these so we don't bomb out later.
5375  continue;
5376  }
5377 
5378  # We need to get what handler the file uses, to figure out parameters.
5379  # Note, a hook can overide the file name, and chose an entirely different
5380  # file (which potentially could be of a different type and have different handler).
5381  $options = array();
5382  $descQuery = false;
5383  Hooks::run( 'BeforeParserFetchFileAndTitle',
5384  array( $this, $title, &$options, &$descQuery ) );
5385  # Don't register it now, as ImageGallery does that later.
5386  $file = $this->fetchFileNoRegister( $title, $options );
5387  $handler = $file ? $file->getHandler() : false;
5388 
5389  $paramMap = array(
5390  'img_alt' => 'gallery-internal-alt',
5391  'img_link' => 'gallery-internal-link',
5392  );
5393  if ( $handler ) {
5394  $paramMap = $paramMap + $handler->getParamMap();
5395  // We don't want people to specify per-image widths.
5396  // Additionally the width parameter would need special casing anyhow.
5397  unset( $paramMap['img_width'] );
5398  }
5399 
5400  $mwArray = new MagicWordArray( array_keys( $paramMap ) );
5401 
5402  $label = '';
5403  $alt = '';
5404  $link = '';
5405  $handlerOptions = array();
5406  if ( isset( $matches[3] ) ) {
5407  // look for an |alt= definition while trying not to break existing
5408  // captions with multiple pipes (|) in it, until a more sensible grammar
5409  // is defined for images in galleries
5410 
5411  // FIXME: Doing recursiveTagParse at this stage, and the trim before
5412  // splitting on '|' is a bit odd, and different from makeImage.
5413  $matches[3] = $this->recursiveTagParse( trim( $matches[3] ) );
5414  $parameterMatches = StringUtils::explode( '|', $matches[3] );
5415 
5416  foreach ( $parameterMatches as $parameterMatch ) {
5417  list( $magicName, $match ) = $mwArray->matchVariableStartToEnd( $parameterMatch );
5418  if ( $magicName ) {
5419  $paramName = $paramMap[$magicName];
5420 
5421  switch ( $paramName ) {
5422  case 'gallery-internal-alt':
5423  $alt = $this->stripAltText( $match, false );
5424  break;
5425  case 'gallery-internal-link':
5426  $linkValue = strip_tags( $this->replaceLinkHoldersText( $match ) );
5427  $chars = self::EXT_LINK_URL_CLASS;
5428  $addr = self::EXT_LINK_ADDR;
5429  $prots = $this->mUrlProtocols;
5430  //check to see if link matches an absolute url, if not then it must be a wiki link.
5431  if ( preg_match( "/^($prots)$addr$chars*$/u", $linkValue ) ) {
5432  $link = $linkValue;
5433  } else {
5434  $localLinkTitle = Title::newFromText( $linkValue );
5435  if ( $localLinkTitle !== null ) {
5436  $link = $localLinkTitle->getLinkURL();
5437  }
5438  }
5439  break;
5440  default:
5441  // Must be a handler specific parameter.
5442  if ( $handler->validateParam( $paramName, $match ) ) {
5443  $handlerOptions[$paramName] = $match;
5444  } else {
5445  // Guess not, consider it as caption.
5446  wfDebug( "$parameterMatch failed parameter validation\n" );
5447  $label = '|' . $parameterMatch;
5448  }
5449  }
5450 
5451  } else {
5452  // Last pipe wins.
5453  $label = '|' . $parameterMatch;
5454  }
5455  }
5456  // Remove the pipe.
5457  $label = substr( $label, 1 );
5458  }
5459 
5460  $ig->add( $title, $label, $alt, $link, $handlerOptions );
5461  }
5462  $html = $ig->toHTML();
5463  Hooks::run( 'AfterParserFetchFileAndTitle', array( $this, $ig, &$html ) );
5464  return $html;
5465  }
5466 
5471  public function getImageParams( $handler ) {
5472  if ( $handler ) {
5473  $handlerClass = get_class( $handler );
5474  } else {
5475  $handlerClass = '';
5476  }
5477  if ( !isset( $this->mImageParams[$handlerClass] ) ) {
5478  # Initialise static lists
5479  static $internalParamNames = array(
5480  'horizAlign' => array( 'left', 'right', 'center', 'none' ),
5481  'vertAlign' => array( 'baseline', 'sub', 'super', 'top', 'text-top', 'middle',
5482  'bottom', 'text-bottom' ),
5483  'frame' => array( 'thumbnail', 'manualthumb', 'framed', 'frameless',
5484  'upright', 'border', 'link', 'alt', 'class' ),
5485  );
5486  static $internalParamMap;
5487  if ( !$internalParamMap ) {
5488  $internalParamMap = array();
5489  foreach ( $internalParamNames as $type => $names ) {
5490  foreach ( $names as $name ) {
5491  $magicName = str_replace( '-', '_', "img_$name" );
5492  $internalParamMap[$magicName] = array( $type, $name );
5493  }
5494  }
5495  }
5496 
5497  # Add handler params
5498  $paramMap = $internalParamMap;
5499  if ( $handler ) {
5500  $handlerParamMap = $handler->getParamMap();
5501  foreach ( $handlerParamMap as $magic => $paramName ) {
5502  $paramMap[$magic] = array( 'handler', $paramName );
5503  }
5504  }
5505  $this->mImageParams[$handlerClass] = $paramMap;
5506  $this->mImageParamsMagicArray[$handlerClass] = new MagicWordArray( array_keys( $paramMap ) );
5507  }
5508  return array( $this->mImageParams[$handlerClass], $this->mImageParamsMagicArray[$handlerClass] );
5509  }
5510 
5519  public function makeImage( $title, $options, $holders = false ) {
5520  # Check if the options text is of the form "options|alt text"
5521  # Options are:
5522  # * thumbnail make a thumbnail with enlarge-icon and caption, alignment depends on lang
5523  # * left no resizing, just left align. label is used for alt= only
5524  # * right same, but right aligned
5525  # * none same, but not aligned
5526  # * ___px scale to ___ pixels width, no aligning. e.g. use in taxobox
5527  # * center center the image
5528  # * frame Keep original image size, no magnify-button.
5529  # * framed Same as "frame"
5530  # * frameless like 'thumb' but without a frame. Keeps user preferences for width
5531  # * upright reduce width for upright images, rounded to full __0 px
5532  # * border draw a 1px border around the image
5533  # * alt Text for HTML alt attribute (defaults to empty)
5534  # * class Set a class for img node
5535  # * link Set the target of the image link. Can be external, interwiki, or local
5536  # vertical-align values (no % or length right now):
5537  # * baseline
5538  # * sub
5539  # * super
5540  # * top
5541  # * text-top
5542  # * middle
5543  # * bottom
5544  # * text-bottom
5545 
5546  $parts = StringUtils::explode( "|", $options );
5547 
5548  # Give extensions a chance to select the file revision for us
5549  $options = array();
5550  $descQuery = false;
5551  Hooks::run( 'BeforeParserFetchFileAndTitle',
5552  array( $this, $title, &$options, &$descQuery ) );
5553  # Fetch and register the file (file title may be different via hooks)
5554  list( $file, $title ) = $this->fetchFileAndTitle( $title, $options );
5555 
5556  # Get parameter map
5557  $handler = $file ? $file->getHandler() : false;
5558 
5559  list( $paramMap, $mwArray ) = $this->getImageParams( $handler );
5560 
5561  if ( !$file ) {
5562  $this->addTrackingCategory( 'broken-file-category' );
5563  }
5564 
5565  # Process the input parameters
5566  $caption = '';
5567  $params = array( 'frame' => array(), 'handler' => array(),
5568  'horizAlign' => array(), 'vertAlign' => array() );
5569  $seenformat = false;
5570  foreach ( $parts as $part ) {
5571  $part = trim( $part );
5572  list( $magicName, $value ) = $mwArray->matchVariableStartToEnd( $part );
5573  $validated = false;
5574  if ( isset( $paramMap[$magicName] ) ) {
5575  list( $type, $paramName ) = $paramMap[$magicName];
5576 
5577  # Special case; width and height come in one variable together
5578  if ( $type === 'handler' && $paramName === 'width' ) {
5579  $parsedWidthParam = $this->parseWidthParam( $value );
5580  if ( isset( $parsedWidthParam['width'] ) ) {
5581  $width = $parsedWidthParam['width'];
5582  if ( $handler->validateParam( 'width', $width ) ) {
5583  $params[$type]['width'] = $width;
5584  $validated = true;
5585  }
5586  }
5587  if ( isset( $parsedWidthParam['height'] ) ) {
5588  $height = $parsedWidthParam['height'];
5589  if ( $handler->validateParam( 'height', $height ) ) {
5590  $params[$type]['height'] = $height;
5591  $validated = true;
5592  }
5593  }
5594  # else no validation -- bug 13436
5595  } else {
5596  if ( $type === 'handler' ) {
5597  # Validate handler parameter
5598  $validated = $handler->validateParam( $paramName, $value );
5599  } else {
5600  # Validate internal parameters
5601  switch ( $paramName ) {
5602  case 'manualthumb':
5603  case 'alt':
5604  case 'class':
5605  # @todo FIXME: Possibly check validity here for
5606  # manualthumb? downstream behavior seems odd with
5607  # missing manual thumbs.
5608  $validated = true;
5609  $value = $this->stripAltText( $value, $holders );
5610  break;
5611  case 'link':
5612  $chars = self::EXT_LINK_URL_CLASS;
5613  $addr = self::EXT_LINK_ADDR;
5614  $prots = $this->mUrlProtocols;
5615  if ( $value === '' ) {
5616  $paramName = 'no-link';
5617  $value = true;
5618  $validated = true;
5619  } elseif ( preg_match( "/^((?i)$prots)/", $value ) ) {
5620  if ( preg_match( "/^((?i)$prots)$addr$chars*$/u", $value, $m ) ) {
5621  $paramName = 'link-url';
5622  $this->mOutput->addExternalLink( $value );
5623  if ( $this->mOptions->getExternalLinkTarget() ) {
5624  $params[$type]['link-target'] = $this->mOptions->getExternalLinkTarget();
5625  }
5626  $validated = true;
5627  }
5628  } else {
5629  $linkTitle = Title::newFromText( $value );
5630  if ( $linkTitle ) {
5631  $paramName = 'link-title';
5632  $value = $linkTitle;
5633  $this->mOutput->addLink( $linkTitle );
5634  $validated = true;
5635  }
5636  }
5637  break;
5638  case 'frameless':
5639  case 'framed':
5640  case 'thumbnail':
5641  // use first appearing option, discard others.
5642  $validated = ! $seenformat;
5643  $seenformat = true;
5644  break;
5645  default:
5646  # Most other things appear to be empty or numeric...
5647  $validated = ( $value === false || is_numeric( trim( $value ) ) );
5648  }
5649  }
5650 
5651  if ( $validated ) {
5652  $params[$type][$paramName] = $value;
5653  }
5654  }
5655  }
5656  if ( !$validated ) {
5657  $caption = $part;
5658  }
5659  }
5660 
5661  # Process alignment parameters
5662  if ( $params['horizAlign'] ) {
5663  $params['frame']['align'] = key( $params['horizAlign'] );
5664  }
5665  if ( $params['vertAlign'] ) {
5666  $params['frame']['valign'] = key( $params['vertAlign'] );
5667  }
5668 
5669  $params['frame']['caption'] = $caption;
5670 
5671  # Will the image be presented in a frame, with the caption below?
5672  $imageIsFramed = isset( $params['frame']['frame'] )
5673  || isset( $params['frame']['framed'] )
5674  || isset( $params['frame']['thumbnail'] )
5675  || isset( $params['frame']['manualthumb'] );
5676 
5677  # In the old days, [[Image:Foo|text...]] would set alt text. Later it
5678  # came to also set the caption, ordinary text after the image -- which
5679  # makes no sense, because that just repeats the text multiple times in
5680  # screen readers. It *also* came to set the title attribute.
5681  #
5682  # Now that we have an alt attribute, we should not set the alt text to
5683  # equal the caption: that's worse than useless, it just repeats the
5684  # text. This is the framed/thumbnail case. If there's no caption, we
5685  # use the unnamed parameter for alt text as well, just for the time be-
5686  # ing, if the unnamed param is set and the alt param is not.
5687  #
5688  # For the future, we need to figure out if we want to tweak this more,
5689  # e.g., introducing a title= parameter for the title; ignoring the un-
5690  # named parameter entirely for images without a caption; adding an ex-
5691  # plicit caption= parameter and preserving the old magic unnamed para-
5692  # meter for BC; ...
5693  if ( $imageIsFramed ) { # Framed image
5694  if ( $caption === '' && !isset( $params['frame']['alt'] ) ) {
5695  # No caption or alt text, add the filename as the alt text so
5696  # that screen readers at least get some description of the image
5697  $params['frame']['alt'] = $title->getText();
5698  }
5699  # Do not set $params['frame']['title'] because tooltips don't make sense
5700  # for framed images
5701  } else { # Inline image
5702  if ( !isset( $params['frame']['alt'] ) ) {
5703  # No alt text, use the "caption" for the alt text
5704  if ( $caption !== '' ) {
5705  $params['frame']['alt'] = $this->stripAltText( $caption, $holders );
5706  } else {
5707  # No caption, fall back to using the filename for the
5708  # alt text
5709  $params['frame']['alt'] = $title->getText();
5710  }
5711  }
5712  # Use the "caption" for the tooltip text
5713  $params['frame']['title'] = $this->stripAltText( $caption, $holders );
5714  }
5715 
5716  Hooks::run( 'ParserMakeImageParams', array( $title, $file, &$params, $this ) );
5717 
5718  # Linker does the rest
5719  $time = isset( $options['time'] ) ? $options['time'] : false;
5720  $ret = Linker::makeImageLink( $this, $title, $file, $params['frame'], $params['handler'],
5721  $time, $descQuery, $this->mOptions->getThumbSize() );
5722 
5723  # Give the handler a chance to modify the parser object
5724  if ( $handler ) {
5725  $handler->parserTransformHook( $this, $file );
5726  }
5727 
5728  return $ret;
5729  }
5730 
5736  protected function stripAltText( $caption, $holders ) {
5737  # Strip bad stuff out of the title (tooltip). We can't just use
5738  # replaceLinkHoldersText() here, because if this function is called
5739  # from replaceInternalLinks2(), mLinkHolders won't be up-to-date.
5740  if ( $holders ) {
5741  $tooltip = $holders->replaceText( $caption );
5742  } else {
5743  $tooltip = $this->replaceLinkHoldersText( $caption );
5744  }
5745 
5746  # make sure there are no placeholders in thumbnail attributes
5747  # that are later expanded to html- so expand them now and
5748  # remove the tags
5749  $tooltip = $this->mStripState->unstripBoth( $tooltip );
5750  $tooltip = Sanitizer::stripAllTags( $tooltip );
5751 
5752  return $tooltip;
5753  }
5754 
5759  public function disableCache() {
5760  wfDebug( "Parser output marked as uncacheable.\n" );
5761  if ( !$this->mOutput ) {
5762  throw new MWException( __METHOD__ .
5763  " can only be called when actually parsing something" );
5764  }
5765  $this->mOutput->updateCacheExpiry( 0 ); // new style, for consistency
5766  }
5767 
5776  public function attributeStripCallback( &$text, $frame = false ) {
5777  $text = $this->replaceVariables( $text, $frame );
5778  $text = $this->mStripState->unstripBoth( $text );
5779  return $text;
5780  }
5781 
5787  public function getTags() {
5788  return array_merge(
5789  array_keys( $this->mTransparentTagHooks ),
5790  array_keys( $this->mTagHooks ),
5791  array_keys( $this->mFunctionTagHooks )
5792  );
5793  }
5794 
5805  public function replaceTransparentTags( $text ) {
5806  $matches = array();
5807  $elements = array_keys( $this->mTransparentTagHooks );
5808  $text = self::extractTagsAndParams( $elements, $text, $matches );
5809  $replacements = array();
5810 
5811  foreach ( $matches as $marker => $data ) {
5812  list( $element, $content, $params, $tag ) = $data;
5813  $tagName = strtolower( $element );
5814  if ( isset( $this->mTransparentTagHooks[$tagName] ) ) {
5815  $output = call_user_func_array(
5816  $this->mTransparentTagHooks[$tagName],
5817  array( $content, $params, $this )
5818  );
5819  } else {
5820  $output = $tag;
5821  }
5822  $replacements[$marker] = $output;
5823  }
5824  return strtr( $text, $replacements );
5825  }
5826 
5856  private function extractSections( $text, $sectionId, $mode, $newText = '' ) {
5857  global $wgTitle; # not generally used but removes an ugly failure mode
5858 
5859  $magicScopeVariable = $this->lock();
5860  $this->startParse( $wgTitle, new ParserOptions, self::OT_PLAIN, true );
5861  $outText = '';
5862  $frame = $this->getPreprocessor()->newFrame();
5863 
5864  # Process section extraction flags
5865  $flags = 0;
5866  $sectionParts = explode( '-', $sectionId );
5867  $sectionIndex = array_pop( $sectionParts );
5868  foreach ( $sectionParts as $part ) {
5869  if ( $part === 'T' ) {
5870  $flags |= self::PTD_FOR_INCLUSION;
5871  }
5872  }
5873 
5874  # Check for empty input
5875  if ( strval( $text ) === '' ) {
5876  # Only sections 0 and T-0 exist in an empty document
5877  if ( $sectionIndex == 0 ) {
5878  if ( $mode === 'get' ) {
5879  return '';
5880  } else {
5881  return $newText;
5882  }
5883  } else {
5884  if ( $mode === 'get' ) {
5885  return $newText;
5886  } else {
5887  return $text;
5888  }
5889  }
5890  }
5891 
5892  # Preprocess the text
5893  $root = $this->preprocessToDom( $text, $flags );
5894 
5895  # <h> nodes indicate section breaks
5896  # They can only occur at the top level, so we can find them by iterating the root's children
5897  $node = $root->getFirstChild();
5898 
5899  # Find the target section
5900  if ( $sectionIndex == 0 ) {
5901  # Section zero doesn't nest, level=big
5902  $targetLevel = 1000;
5903  } else {
5904  while ( $node ) {
5905  if ( $node->getName() === 'h' ) {
5906  $bits = $node->splitHeading();
5907  if ( $bits['i'] == $sectionIndex ) {
5908  $targetLevel = $bits['level'];
5909  break;
5910  }
5911  }
5912  if ( $mode === 'replace' ) {
5913  $outText .= $frame->expand( $node, PPFrame::RECOVER_ORIG );
5914  }
5915  $node = $node->getNextSibling();
5916  }
5917  }
5918 
5919  if ( !$node ) {
5920  # Not found
5921  if ( $mode === 'get' ) {
5922  return $newText;
5923  } else {
5924  return $text;
5925  }
5926  }
5927 
5928  # Find the end of the section, including nested sections
5929  do {
5930  if ( $node->getName() === 'h' ) {
5931  $bits = $node->splitHeading();
5932  $curLevel = $bits['level'];
5933  if ( $bits['i'] != $sectionIndex && $curLevel <= $targetLevel ) {
5934  break;
5935  }
5936  }
5937  if ( $mode === 'get' ) {
5938  $outText .= $frame->expand( $node, PPFrame::RECOVER_ORIG );
5939  }
5940  $node = $node->getNextSibling();
5941  } while ( $node );
5942 
5943  # Write out the remainder (in replace mode only)
5944  if ( $mode === 'replace' ) {
5945  # Output the replacement text
5946  # Add two newlines on -- trailing whitespace in $newText is conventionally
5947  # stripped by the editor, so we need both newlines to restore the paragraph gap
5948  # Only add trailing whitespace if there is newText
5949  if ( $newText != "" ) {
5950  $outText .= $newText . "\n\n";
5951  }
5952 
5953  while ( $node ) {
5954  $outText .= $frame->expand( $node, PPFrame::RECOVER_ORIG );
5955  $node = $node->getNextSibling();
5956  }
5957  }
5958 
5959  if ( is_string( $outText ) ) {
5960  # Re-insert stripped tags
5961  $outText = rtrim( $this->mStripState->unstripBoth( $outText ) );
5962  }
5963 
5964  return $outText;
5965  }
5966 
5981  public function getSection( $text, $sectionId, $defaultText = '' ) {
5982  return $this->extractSections( $text, $sectionId, 'get', $defaultText );
5983  }
5984 
5997  public function replaceSection( $oldText, $sectionId, $newText ) {
5998  return $this->extractSections( $oldText, $sectionId, 'replace', $newText );
5999  }
6000 
6006  public function getRevisionId() {
6007  return $this->mRevisionId;
6008  }
6009 
6016  public function getRevisionObject() {
6017  if ( !is_null( $this->mRevisionObject ) ) {
6018  return $this->mRevisionObject;
6019  }
6020  if ( is_null( $this->mRevisionId ) ) {
6021  return null;
6022  }
6023 
6024  $rev = call_user_func(
6025  $this->mOptions->getCurrentRevisionCallback(), $this->getTitle(), $this
6026  );
6027 
6028  # If the parse is for a new revision, then the callback should have
6029  # already been set to force the object and should match mRevisionId.
6030  # If not, try to fetch by mRevisionId for sanity.
6031  if ( $rev && $rev->getId() != $this->mRevisionId ) {
6032  $rev = Revision::newFromId( $this->mRevisionId );
6033  }
6034 
6035  $this->mRevisionObject = $rev;
6036 
6037  return $this->mRevisionObject;
6038  }
6039 
6045  public function getRevisionTimestamp() {
6046  if ( is_null( $this->mRevisionTimestamp ) ) {
6048 
6049  $revObject = $this->getRevisionObject();
6050  $timestamp = $revObject ? $revObject->getTimestamp() : wfTimestampNow();
6051 
6052  # The cryptic '' timezone parameter tells to use the site-default
6053  # timezone offset instead of the user settings.
6054  #
6055  # Since this value will be saved into the parser cache, served
6056  # to other users, and potentially even used inside links and such,
6057  # it needs to be consistent for all visitors.
6058  $this->mRevisionTimestamp = $wgContLang->userAdjust( $timestamp, '' );
6059 
6060  }
6061  return $this->mRevisionTimestamp;
6062  }
6063 
6069  public function getRevisionUser() {
6070  if ( is_null( $this->mRevisionUser ) ) {
6071  $revObject = $this->getRevisionObject();
6072 
6073  # if this template is subst: the revision id will be blank,
6074  # so just use the current user's name
6075  if ( $revObject ) {
6076  $this->mRevisionUser = $revObject->getUserText();
6077  } elseif ( $this->ot['wiki'] || $this->mOptions->getIsPreview() ) {
6078  $this->mRevisionUser = $this->getUser()->getName();
6079  }
6080  }
6081  return $this->mRevisionUser;
6082  }
6083 
6089  public function getRevisionSize() {
6090  if ( is_null( $this->mRevisionSize ) ) {
6091  $revObject = $this->getRevisionObject();
6092 
6093  # if this variable is subst: the revision id will be blank,
6094  # so just use the parser input size, because the own substituation
6095  # will change the size.
6096  if ( $revObject ) {
6097  $this->mRevisionSize = $revObject->getSize();
6098  } elseif ( $this->ot['wiki'] || $this->mOptions->getIsPreview() ) {
6099  $this->mRevisionSize = $this->mInputSize;
6100  }
6101  }
6102  return $this->mRevisionSize;
6103  }
6104 
6110  public function setDefaultSort( $sort ) {
6111  $this->mDefaultSort = $sort;
6112  $this->mOutput->setProperty( 'defaultsort', $sort );
6113  }
6114 
6125  public function getDefaultSort() {
6126  if ( $this->mDefaultSort !== false ) {
6127  return $this->mDefaultSort;
6128  } else {
6129  return '';
6130  }
6131  }
6132 
6139  public function getCustomDefaultSort() {
6140  return $this->mDefaultSort;
6141  }
6142 
6152  public function guessSectionNameFromWikiText( $text ) {
6153  # Strip out wikitext links(they break the anchor)
6154  $text = $this->stripSectionName( $text );
6156  return '#' . Sanitizer::escapeId( $text, 'noninitial' );
6157  }
6158 
6167  public function guessLegacySectionNameFromWikiText( $text ) {
6168  # Strip out wikitext links(they break the anchor)
6169  $text = $this->stripSectionName( $text );
6171  return '#' . Sanitizer::escapeId( $text, array( 'noninitial', 'legacy' ) );
6172  }
6173 
6188  public function stripSectionName( $text ) {
6189  # Strip internal link markup
6190  $text = preg_replace( '/\[\[:?([^[|]+)\|([^[]+)\]\]/', '$2', $text );
6191  $text = preg_replace( '/\[\[:?([^[]+)\|?\]\]/', '$1', $text );
6192 
6193  # Strip external link markup
6194  # @todo FIXME: Not tolerant to blank link text
6195  # I.E. [https://www.mediawiki.org] will render as [1] or something depending
6196  # on how many empty links there are on the page - need to figure that out.
6197  $text = preg_replace( '/\[(?i:' . $this->mUrlProtocols . ')([^ ]+?) ([^[]+)\]/', '$2', $text );
6198 
6199  # Parse wikitext quotes (italics & bold)
6200  $text = $this->doQuotes( $text );
6201 
6202  # Strip HTML tags
6203  $text = StringUtils::delimiterReplace( '<', '>', '', $text );
6204  return $text;
6205  }
6206 
6217  public function testSrvus( $text, Title $title, ParserOptions $options, $outputType = self::OT_HTML ) {
6218  $magicScopeVariable = $this->lock();
6219  $this->startParse( $title, $options, $outputType, true );
6220 
6221  $text = $this->replaceVariables( $text );
6222  $text = $this->mStripState->unstripBoth( $text );
6223  $text = Sanitizer::removeHTMLtags( $text );
6224  return $text;
6225  }
6226 
6233  public function testPst( $text, Title $title, ParserOptions $options ) {
6234  return $this->preSaveTransform( $text, $title, $options->getUser(), $options );
6235  }
6236 
6243  public function testPreprocess( $text, Title $title, ParserOptions $options ) {
6244  return $this->testSrvus( $text, $title, $options, self::OT_PREPROCESS );
6245  }
6246 
6263  public function markerSkipCallback( $s, $callback ) {
6264  $i = 0;
6265  $out = '';
6266  while ( $i < strlen( $s ) ) {
6267  $markerStart = strpos( $s, self::MARKER_PREFIX, $i );
6268  if ( $markerStart === false ) {
6269  $out .= call_user_func( $callback, substr( $s, $i ) );
6270  break;
6271  } else {
6272  $out .= call_user_func( $callback, substr( $s, $i, $markerStart - $i ) );
6273  $markerEnd = strpos( $s, self::MARKER_SUFFIX, $markerStart );
6274  if ( $markerEnd === false ) {
6275  $out .= substr( $s, $markerStart );
6276  break;
6277  } else {
6278  $markerEnd += strlen( self::MARKER_SUFFIX );
6279  $out .= substr( $s, $markerStart, $markerEnd - $markerStart );
6280  $i = $markerEnd;
6281  }
6282  }
6283  }
6284  return $out;
6285  }
6286 
6293  public function killMarkers( $text ) {
6294  return $this->mStripState->killMarkers( $text );
6295  }
6296 
6313  public function serializeHalfParsedText( $text ) {
6314  $data = array(
6315  'text' => $text,
6316  'version' => self::HALF_PARSED_VERSION,
6317  'stripState' => $this->mStripState->getSubState( $text ),
6318  'linkHolders' => $this->mLinkHolders->getSubArray( $text )
6319  );
6320  return $data;
6321  }
6322 
6338  public function unserializeHalfParsedText( $data ) {
6339  if ( !isset( $data['version'] ) || $data['version'] != self::HALF_PARSED_VERSION ) {
6340  throw new MWException( __METHOD__ . ': invalid version' );
6341  }
6342 
6343  # First, extract the strip state.
6344  $texts = array( $data['text'] );
6345  $texts = $this->mStripState->merge( $data['stripState'], $texts );
6346 
6347  # Now renumber links
6348  $texts = $this->mLinkHolders->mergeForeign( $data['linkHolders'], $texts );
6349 
6350  # Should be good to go.
6351  return $texts[0];
6352  }
6353 
6363  public function isValidHalfParsedText( $data ) {
6364  return isset( $data['version'] ) && $data['version'] == self::HALF_PARSED_VERSION;
6365  }
6366 
6375  public function parseWidthParam( $value ) {
6376  $parsedWidthParam = array();
6377  if ( $value === '' ) {
6378  return $parsedWidthParam;
6379  }
6380  $m = array();
6381  # (bug 13500) In both cases (width/height and width only),
6382  # permit trailing "px" for backward compatibility.
6383  if ( preg_match( '/^([0-9]*)x([0-9]*)\s*(?:px)?\s*$/', $value, $m ) ) {
6384  $width = intval( $m[1] );
6385  $height = intval( $m[2] );
6386  $parsedWidthParam['width'] = $width;
6387  $parsedWidthParam['height'] = $height;
6388  } elseif ( preg_match( '/^[0-9]*\s*(?:px)?\s*$/', $value ) ) {
6389  $width = intval( $value );
6390  $parsedWidthParam['width'] = $width;
6391  }
6392  return $parsedWidthParam;
6393  }
6394 
6404  protected function lock() {
6405  if ( $this->mInParse ) {
6406  throw new MWException( "Parser state cleared while parsing. "
6407  . "Did you call Parser::parse recursively?" );
6408  }
6409  $this->mInParse = true;
6410 
6411  $that = $this;
6412  $recursiveCheck = new ScopedCallback( function() use ( $that ) {
6413  $that->mInParse = false;
6414  } );
6415 
6416  return $recursiveCheck;
6417  }
6418 
6429  public static function stripOuterParagraph( $html ) {
6430  $m = array();
6431  if ( preg_match( '/^<p>(.*)\n?<\/p>\n?$/sU', $html, $m ) ) {
6432  if ( strpos( $m[1], '</p>' ) === false ) {
6433  $html = $m[1];
6434  }
6435  }
6436 
6437  return $html;
6438  }
6439 
6450  public function getFreshParser() {
6451  global $wgParserConf;
6452  if ( $this->mInParse ) {
6453  return new $wgParserConf['class']( $wgParserConf );
6454  } else {
6455  return $this;
6456  }
6457  }
6458 
6465  public function enableOOUI() {
6467  $this->mOutput->setEnableOOUI( true );
6468  }
6469 }
getRevisionObject()
Get the revision object for $this->mRevisionId.
Definition: Parser.php:6016
setTitle($t)
Set the context title.
Definition: Parser.php:735
$mAutonumber
Definition: Parser.php:179
markerSkipCallback($s, $callback)
Call a callback function on all regions of the given text that are not inside strip markers...
Definition: Parser.php:6263
#define the
table suitable for use with IDatabase::select()
$mPPNodeCount
Definition: Parser.php:193
replaceInternalLinks2(&$s)
Process [[ ]] wikilinks (RIL)
Definition: Parser.php:2036
static getVariableIDs()
Get an array of parser variable IDs.
Definition: MagicWord.php:275
you don t have to do a grep find to see where the $wgReverseTitle variable is used
Definition: hooks.txt:117
const MARKER_PREFIX
Definition: Parser.php:136
deferred txt A few of the database updates required by various functions here can be deferred until after the result page is displayed to the user For updating the view updating the linked to tables after a etc PHP does not yet have any way to tell the server to actually return and disconnect while still running these but it might have such a feature in the future We handle these by creating a deferred update object and putting those objects on a global then executing the whole list after the page is displayed We don t do anything smart like collating updates to the same table or such because the list is almost always going to have just one item on if that
Definition: deferred.txt:11
external whereas SearchGetNearMatch runs after $term
Definition: hooks.txt:2538
isValidHalfParsedText($data)
Returns true if the given array, presumed to be generated by serializeHalfParsedText(), is compatible with the current version of the parser.
Definition: Parser.php:6363
null means default in associative array form
Definition: hooks.txt:1769
null means default in associative array with keys and values unescaped Should be merged with default with a value of false meaning to suppress the attribute in associative array with keys and values unescaped noclasses just before the function returns a value If you return an< a > element with HTML attributes $attribs and contents $html will be returned If you return $ret will be returned and may include noclasses & $html
Definition: hooks.txt:1769
static tocLineEnd()
End a Table Of Contents line.
Definition: Linker.php:1721
deferred txt A few of the database updates required by various functions here can be deferred until after the result page is displayed to the user For updating the view updating the linked to tables after a etc PHP does not yet have any way to tell the server to actually return and disconnect while still running these but it might have such a feature in the future We handle these by creating a deferred update object and putting those objects on a global list
Definition: deferred.txt:11
getSection($text, $sectionId, $defaultText= '')
This function returns the text of a section, specified by a number ($section).
Definition: Parser.php:5981
static decodeTagAttributes($text)
Return an associative array of attribute names and values from a partial tag string.
Definition: Sanitizer.php:1250
$mTplRedirCache
Definition: Parser.php:195
killMarkers($text)
Remove any strip markers found in the given text.
Definition: Parser.php:6293
static tocList($toc, $lang=false)
Wraps the TOC in a table and provides the hide/collapse javascript.
Definition: Linker.php:1732
static makeExternalLink($url, $text, $escape=true, $linktype= '', $attribs=array(), $title=null)
Make an external link.
Definition: Linker.php:1051
fetchTemplateAndTitle($title)
Fetch the unparsed text of a template and register a reference to it.
Definition: Parser.php:3923
getRevisionUser()
Get the name of the user that edited the last revision.
Definition: Parser.php:6069
setFunctionTagHook($tag, $callback, $flags)
Create a tag function, e.g.
Definition: Parser.php:5262
the array() calling protocol came about after MediaWiki 1.4rc1.
stripSectionName($text)
Strips a text string of wikitext for use in a section anchor.
Definition: Parser.php:6188
null for the local wiki Added should default to null in handler for backwards compatibility add a value to it if you want to add a cookie that have to vary cache options can modify $query
Definition: hooks.txt:1389
const OT_PREPROCESS
Definition: Defines.php:228
$mLastSection
Definition: Parser.php:186
$mDoubleUnderscores
Definition: Parser.php:195
magic word the default is to use $key to get the and $key value or $key value text $key value html to format the value $key
Definition: hooks.txt:2303
Group all the pieces relevant to the context of a request into one instance.
validateSig($text)
Check that the user's signature contains no bad XML.
Definition: Parser.php:4976
MapCacheLRU null $currentRevisionCache
Definition: Parser.php:245
callParserFunction($frame, $function, array $args=array())
Call a parser function and return an array with text and flags.
Definition: Parser.php:3752
getArticleID($flags=0)
Get the article ID for this Title from the link cache, adding it if necessary.
Definition: Title.php:3165
$wgSitename
Name of the site.
renderImageGallery($text, $params)
Renders an image gallery from a text with one line per image.
Definition: Parser.php:5313
recursivePreprocess($text, $frame=false)
Recursive parser entry point that can be called from an extension tag hook.
Definition: Parser.php:664
getText()
Get the text form (spaces not underscores) of the main part.
Definition: Title.php:934
replaceExternalLinks($text)
Replace external links (REL)
Definition: Parser.php:1774
static isNonincludable($index)
It is not possible to use pages from this namespace as template?
nextLinkID()
Definition: Parser.php:825
const SPACE_NOT_NL
Definition: Parser.php:100
static replaceUnusualEscapes($url)
Replace unusual escape codes in a URL with their equivalent characters.
Definition: Parser.php:1891
Allows to change the fields on the form that will be generated $name
Definition: hooks.txt:311
getImageParams($handler)
Definition: Parser.php:5471
Apache License January AND DISTRIBUTION Definitions License shall mean the terms and conditions for use
doHeadings($text)
Parse headers and return html.
Definition: Parser.php:1553
const OT_PLAIN
Definition: Parser.php:121
getTags()
Accessor.
Definition: Parser.php:5787
findColonNoLinks($str, &$before, &$after)
Split up a string on ':', ignoring any occurrences inside tags to prevent illegal overlapping...
Definition: Parser.php:2751
static removeHTMLtags($text, $processCallback=null, $args=array(), $extratags=array(), $removetags=array())
Cleans up HTML, removes dangerous tags and attributes, and removes HTML comments. ...
Definition: Sanitizer.php:454
static isWellFormedXmlFragment($text)
Check if a string is a well-formed XML fragment.
Definition: Xml.php:735
const OT_WIKI
Definition: Parser.php:118
div flags Integer display flags(NO_ACTION_LINK, NO_EXTRA_USER_LINKS) 'LogException'returning false will NOT prevent logging $e
Definition: hooks.txt:1903
User $mUser
Definition: Parser.php:202
initialiseVariables()
initialise the magic variables (like CURRENTMONTHNAME) and substitution modifiers ...
Definition: Parser.php:3261
null means default in associative array with keys and values unescaped Should be merged with default with a value of false meaning to suppress the attribute in associative array with keys and values unescaped noclasses & $ret
Definition: hooks.txt:1769
=Architecture==Two class hierarchies are used to provide the functionality associated with the different content models:*Content interface(and AbstractContent base class) define functionality that acts on the concrete content of a page, and *ContentHandler base class provides functionality specific to a content model, but not acting on concrete content.The most important function of ContentHandler is to act as a factory for the appropriate implementation of Content.These Content objects are to be used by MediaWiki everywhere, instead of passing page content around as text.All manipulation and analysis of page content must be done via the appropriate methods of the Content object.For each content model, a subclass of ContentHandler has to be registered with $wgContentHandlers.The ContentHandler object for a given content model can be obtained using ContentHandler::getForModelID($id).Also Title, WikiPage and Revision now have getContentHandler() methods for convenience.ContentHandler objects are singletons that provide functionality specific to the content type, but not directly acting on the content of some page.ContentHandler::makeEmptyContent() and ContentHandler::unserializeContent() can be used to create a Content object of the appropriate type.However, it is recommended to instead use WikiPage::getContent() resp.Revision::getContent() to get a page's content as a Content object.These two methods should be the ONLY way in which page content is accessed.Another important function of ContentHandler objects is to define custom action handlers for a content model, see ContentHandler::getActionOverrides().This is similar to what WikiPage::getActionOverrides() was already doing.==Serialization==With the ContentHandler facility, page content no longer has to be text based.Objects implementing the Content interface are used to represent and handle the content internally.For storage and data exchange, each content model supports at least one serialization format via ContentHandler::serializeContent($content).The list of supported formats for a given content model can be accessed using ContentHandler::getSupportedFormats().Content serialization formats are identified using MIME type like strings.The following formats are built in:*text/x-wiki-wikitext *text/javascript-for js pages *text/css-for css pages *text/plain-for future use, e.g.with plain text messages.*text/html-for future use, e.g.with plain html messages.*application/vnd.php.serialized-for future use with the api and for extensions *application/json-for future use with the api, and for use by extensions *application/xml-for future use with the api, and for use by extensions In PHP, use the corresponding CONTENT_FORMAT_XXX constant.Note that when using the API to access page content, especially action=edit, action=parse and action=query &prop=revisions, the model and format of the content should always be handled explicitly.Without that information, interpretation of the provided content is not reliable.The same applies to XML dumps generated via maintenance/dumpBackup.php or Special:Export.Also note that the API will provide encapsulated, serialized content-so if the API was called with format=json, and contentformat is also json(or rather, application/json), the page content is represented as a string containing an escaped json structure.Extensions that use JSON to serialize some types of page content may provide specialized API modules that allow access to that content in a more natural form.==Compatibility==The ContentHandler facility is introduced in a way that should allow all existing code to keep functioning at least for pages that contain wikitext or other text based content.However, a number of functions and hooks have been deprecated in favor of new versions that are aware of the page's content model, and will now generate warnings when used.Most importantly, the following functions have been deprecated:*Revisions::getText() and Revisions::getRawText() is deprecated in favor Revisions::getContent()*WikiPage::getText() is deprecated in favor WikiPage::getContent() Also, the old Article::getContent()(which returns text) is superceded by Article::getContentObject().However, both methods should be avoided since they do not provide clean access to the page's actual content.For instance, they may return a system message for non-existing pages.Use WikiPage::getContent() instead.Code that relies on a textual representation of the page content should eventually be rewritten.However, ContentHandler::getContentText() provides a stop-gap that can be used to get text for a page.Its behavior is controlled by $wgContentHandlerTextFallback it
static isEnabled()
Definition: MWTidy.php:92
Set options of the Parser.
static tidy($text)
Interface with html tidy.
Definition: MWTidy.php:45
getFunctionHooks()
Get all registered function hook identifiers.
Definition: Parser.php:5248
namespace and then decline to actually register it file or subcat img or subcat RecentChangesLinked and Watchlist RecentChangesLinked and Watchlist e g Watchlist removed from all revisions and log entries to which it was applied This gives extensions a chance to take it off their books as the deletion has already been partly carried out by this point or something similar the user will be unable to create the tag set $status
Definition: hooks.txt:975
globals txt Globals are evil The original MediaWiki code relied on globals for processing context far too often MediaWiki development since then has been a story of slowly moving context out of global variables and into objects Storing processing context in object member variables allows those objects to be reused in a much more flexible way Consider the elegance of
database rows
Definition: globals.txt:10
wfHostname()
Fetch server name for use in error reporting etc.
getFunctionLang()
Get a language object for use in parser functions such as {{FORMATNUM:}}.
Definition: Parser.php:840
processing should stop and the error should be shown to the user * false
Definition: hooks.txt:188
argSubstitution($piece, $frame)
Triple brace replacement – used for template arguments.
Definition: Parser.php:4168
testSrvus($text, Title $title, ParserOptions $options, $outputType=self::OT_HTML)
strip/replaceVariables/unstrip for preprocessor regression testing
Definition: Parser.php:6217
uniqPrefix()
Accessor for mUniqPrefix.
Definition: Parser.php:725
const TOC_START
Definition: Parser.php:139
Title($x=null)
Accessor/mutator for the Title object.
Definition: Parser.php:764
SectionProfiler $mProfiler
Definition: Parser.php:254
$wgEnableScaryTranscluding
Enable interwiki transcluding.
$sort
wfDebug($text, $dest= 'all', array $context=array())
Sends a line to the debug log if enabled or, optionally, to a comment in output.
null for the local wiki Added in
Definition: hooks.txt:1389
There are three types of nodes:
has been added to your &Future changes to this page and its associated Talk page will be listed there
$mHeadings
Definition: Parser.php:195
$value
clearTagHooks()
Remove all tag hooks.
Definition: Parser.php:5161
const COLON_STATE_TAGSLASH
Definition: Parser.php:107
static makeSelfLinkObj($nt, $html= '', $query= '', $trail= '', $prefix= '')
Make appropriate markup for a link to the current article.
Definition: Linker.php:408
const NS_SPECIAL
Definition: Defines.php:58
clearState()
Clear Parser state.
Definition: Parser.php:338
const EXT_LINK_ADDR
Definition: Parser.php:92
$mFirstCall
Definition: Parser.php:154
getPreloadText($text, Title $title, ParserOptions $options, $params=array())
Process the wikitext for the "?preload=" feature.
Definition: Parser.php:683
interwikiTransclude($title, $action)
Transclude an interwiki link.
Definition: Parser.php:4109
pstPass2($text, $user)
Pre-save transform helper function.
Definition: Parser.php:4848
guessLegacySectionNameFromWikiText($text)
Same as guessSectionNameFromWikiText(), but produces legacy anchors instead.
Definition: Parser.php:6167
Options($x=null)
Accessor/mutator for the ParserOptions object.
Definition: Parser.php:818
it s the revision text itself In either if gzip is the revision text is gzipped $flags
Definition: hooks.txt:2528
serializeHalfParsedText($text)
Save the parser state required to convert the given half-parsed text to HTML.
Definition: Parser.php:6313
replaceLinkHolders(&$text, $options=0)
Replace "<!--LINK-->" link placeholders with actual links, in the buffer Placeholders created in Link...
Definition: Parser.php:5285
static activeUsers()
Definition: SiteStats.php:164
fetchFile($title, $options=array())
Fetch a file and its title and register a reference to it.
Definition: Parser.php:4053
$mLinkID
Definition: Parser.php:192
doQuotes($text)
Helper function for doAllQuotes()
Definition: Parser.php:1586
preprocessToDom($text, $flags=0)
Preprocess some wikitext and return the document tree.
Definition: Parser.php:3291
getPrefixedText()
Get the prefixed title with spaces.
Definition: Title.php:1470
limitationWarn($limitationType, $current= '', $max= '')
Warn the user when a parser limitation is reached Will warn at most once the user per limitation type...
Definition: Parser.php:3413
static cleanUrl($url)
Definition: Sanitizer.php:1823
wfUrlencode($s)
We want some things to be included as literal characters in our title URLs for prettiness, which urlencode encodes by default.
static newFromText($text, $defaultNamespace=NS_MAIN)
Create a new Title from text, such as what one would find in a link.
Definition: Title.php:269
$mGeneratedPPNodeCount
Definition: Parser.php:193
Represents a title within MediaWiki.
Definition: Title.php:33
static getRandomString()
Get a random string.
Definition: Parser.php:704
$mRevisionId
Definition: Parser.php:219
static stripAllTags($text)
Take a fragment of (potentially invalid) HTML and return a version with any tags removed, encoded as plain text.
Definition: Sanitizer.php:1790
when a variable name is used in a it is silently declared as a new local masking the global
Definition: design.txt:93
doBlockLevels($text, $linestart)
#@-
Definition: Parser.php:2537
$wgArticlePath
Definition: img_auth.php:45
OutputType($x=null)
Accessor/mutator for the output type.
Definition: Parser.php:790
const NS_TEMPLATE
Definition: Defines.php:79
static newFromTitle(LinkTarget $linkTarget, $id=0, $flags=0)
Load either the current, or a specified, revision that's attached to a given link target...
Definition: Revision.php:116
const COLON_STATE_COMMENTDASHDASH
Definition: Parser.php:110
getVariableValue($index, $frame=false)
Return value of a magic variable (like PAGENAME)
Definition: Parser.php:2909
recursiveTagParse($text, $frame=false)
Half-parse wikitext to half-parsed HTML.
Definition: Parser.php:599
const NO_ARGS
magic word & $parser
Definition: hooks.txt:2303
MagicWordArray $mVariables
Definition: Parser.php:161
static validateTagAttributes($attribs, $element)
Take an array of attribute names and values and normalize or discard illegal values for the given ele...
Definition: Sanitizer.php:715
const SFH_NO_HASH
Definition: Parser.php:82
const COLON_STATE_COMMENTDASH
Definition: Parser.php:109
globals will be eliminated from MediaWiki replaced by an application object which would be passed to constructors Whether that would be an convenient solution remains to be but certainly PHP makes such object oriented programming models easier than they were in previous versions For the time being MediaWiki programmers will have to work in an environment with some global context At the time of globals were initialised on startup by MediaWiki of these were configuration which are documented in DefaultSettings php There is no comprehensive documentation for the remaining however some of the most important ones are listed below They are typically initialised either in index php or in Setup php For a description of the see design txt $wgTitle Title object created from the request URL $wgOut OutputPage object for HTTP response $wgUser User object for the user associated with the current request $wgLang Language object selected by user preferences $wgContLang Language object associated with the wiki being viewed $wgParser Parser object Parser extensions register their hooks here $wgRequest WebRequest object
Definition: globals.txt:25
wfRandomString($length=32)
Get a random string containing a number of pseudo-random hex characters.
$mForceTocPosition
Definition: Parser.php:197
preprocess($text, Title $title=null, ParserOptions $options, $revid=null, $frame=false)
Expand templates and variables in the text, producing valid, static wikitext.
Definition: Parser.php:640
static getCacheTTL($id)
Allow external reads of TTL array.
Definition: MagicWord.php:298
getRevisionId()
Get the ID of the revision we are parsing.
Definition: Parser.php:6006
const OT_PREPROCESS
Definition: Parser.php:119
see documentation in includes Linker php for Linker::makeImageLink & $time
Definition: hooks.txt:1583
maybeDoSubpageLink($target, &$text)
Handle link to subpage if necessary.
Definition: Parser.php:2404
$mFunctionSynonyms
Definition: Parser.php:146
If you want to remove the page from your watchlist later
replaceLinkHoldersText($text)
Replace "<!--LINK-->" link placeholders with plain text of links (not HTML-formatted).
Definition: Parser.php:5296
setLinkID($id)
Definition: Parser.php:832
$mOutputType
Definition: Parser.php:216
Apache License January http
$mDefaultStripList
Definition: Parser.php:149
static createAssocArgs($args)
Clean up argument array - refactored in 1.9 so parserfunctions can use it, too.
Definition: Parser.php:3365
$mExtLinkBracketedRegex
Definition: Parser.php:168
The index of the header message $result[1]=The index of the body text message $result[2 through n]=Parameters passed to body text message.Please note the header message cannot receive/use parameters. 'ImportHandleLogItemXMLTag':When parsing a XML tag in a log item.Return false to stop further processing of the tag $reader:XMLReader object $logInfo:Array of information 'ImportHandlePageXMLTag':When parsing a XML tag in a page.Return false to stop further processing of the tag $reader:XMLReader object &$pageInfo:Array of information 'ImportHandleRevisionXMLTag':When parsing a XML tag in a page revision.Return false to stop further processing of the tag $reader:XMLReader object $pageInfo:Array of page information $revisionInfo:Array of revision information 'ImportHandleToplevelXMLTag':When parsing a top level XML tag.Return false to stop further processing of the tag $reader:XMLReader object 'ImportHandleUploadXMLTag':When parsing a XML tag in a file upload.Return false to stop further processing of the tag $reader:XMLReader object $revisionInfo:Array of information 'ImportLogInterwikiLink':Hook to change the interwiki link used in log entries and edit summaries for transwiki imports.&$fullInterwikiPrefix:Interwiki prefix, may contain colons.&$pageTitle:String that contains page title. 'ImportSources':Called when reading from the $wgImportSources configuration variable.Can be used to lazy-load the import sources list.&$importSources:The value of $wgImportSources.Modify as necessary.See the comment in DefaultSettings.php for the detail of how to structure this array. 'InfoAction':When building information to display on the action=info page.$context:IContextSource object &$pageInfo:Array of information 'InitializeArticleMaybeRedirect':MediaWiki check to see if title is a redirect.&$title:Title object for the current page &$request:WebRequest &$ignoreRedirect:boolean to skip redirect check &$target:Title/string of redirect target &$article:Article object 'InternalParseBeforeLinks':during Parser's internalParse method before links but after nowiki/noinclude/includeonly/onlyinclude and other processings.&$parser:Parser object &$text:string containing partially parsed text &$stripState:Parser's internal StripState object 'InternalParseBeforeSanitize':during Parser's internalParse method just before the parser removes unwanted/dangerous HTML tags and after nowiki/noinclude/includeonly/onlyinclude and other processings.Ideal for syntax-extensions after template/parser function execution which respect nowiki and HTML-comments.&$parser:Parser object &$text:string containing partially parsed text &$stripState:Parser's internal StripState object 'InterwikiLoadPrefix':When resolving if a given prefix is an interwiki or not.Return true without providing an interwiki to continue interwiki search.$prefix:interwiki prefix we are looking for.&$iwData:output array describing the interwiki with keys iw_url, iw_local, iw_trans and optionally iw_api and iw_wikiid. 'InvalidateEmailComplete':Called after a user's email has been invalidated successfully.$user:user(object) whose email is being invalidated 'IRCLineURL':When constructing the URL to use in an IRC notification.Callee may modify $url and $query, URL will be constructed as $url.$query &$url:URL to index.php &$query:Query string $rc:RecentChange object that triggered url generation 'IsFileCacheable':Override the result of Article::isFileCacheable()(if true) &$article:article(object) being checked 'IsTrustedProxy':Override the result of IP::isTrustedProxy() &$ip:IP being check &$result:Change this value to override the result of IP::isTrustedProxy() 'IsUploadAllowedFromUrl':Override the result of UploadFromUrl::isAllowedUrl() $url:URL used to upload from &$allowed:Boolean indicating if uploading is allowed for given URL 'isValidEmailAddr':Override the result of Sanitizer::validateEmail(), for instance to return false if the domain name doesn't match your organization.$addr:The e-mail address entered by the user &$result:Set this and return false to override the internal checks 'isValidPassword':Override the result of User::isValidPassword() $password:The password entered by the user &$result:Set this and return false to override the internal checks $user:User the password is being validated for 'Language::getMessagesFileName':$code:The language code or the language we're looking for a messages file for &$file:The messages file path, you can override this to change the location. 'LanguageGetMagic':DEPRECATED!Use $magicWords in a file listed in $wgExtensionMessagesFiles instead.Use this to define synonyms of magic words depending of the language &$magicExtensions:associative array of magic words synonyms $lang:language code(string) 'LanguageGetNamespaces':Provide custom ordering for namespaces or remove namespaces.Do not use this hook to add namespaces.Use CanonicalNamespaces for that.&$namespaces:Array of namespaces indexed by their numbers 'LanguageGetSpecialPageAliases':DEPRECATED!Use $specialPageAliases in a file listed in $wgExtensionMessagesFiles instead.Use to define aliases of special pages names depending of the language &$specialPageAliases:associative array of magic words synonyms $lang:language code(string) 'LanguageGetTranslatedLanguageNames':Provide translated language names.&$names:array of language code=> language name $code:language of the preferred translations 'LanguageLinks':Manipulate a page's language links.This is called in various places to allow extensions to define the effective language links for a page.$title:The page's Title.&$links:Associative array mapping language codes to prefixed links of the form"language:title".&$linkFlags:Associative array mapping prefixed links to arrays of flags.Currently unused, but planned to provide support for marking individual language links in the UI, e.g.for featured articles. 'LanguageSelector':Hook to change the language selector available on a page.$out:The output page.$cssClassName:CSS class name of the language selector. 'LinkBegin':Used when generating internal and interwiki links in Linker::link(), before processing starts.Return false to skip default processing and return $ret.See documentation for Linker::link() for details on the expected meanings of parameters.$skin:the Skin object $target:the Title that the link is pointing to &$html:the contents that the< a > tag should have(raw HTML) $result
Definition: hooks.txt:1767
if($line===false) $args
Definition: cdb.php:64
the value to return A Title object or null for latest to be modified or replaced by the hook handler after cache objects are set for highlighting & $link
Definition: hooks.txt:2561
static getLocalInstance($ts=false)
Get a timestamp instance in the server local timezone ($wgLocaltimezone)
$wgMaxSigChars
Maximum number of Unicode characters in signature.
const COLON_STATE_TAG
Definition: Parser.php:104
static getDoubleUnderscoreArray()
Get a MagicWordArray of double-underscore entities.
Definition: MagicWord.php:311
static splitTrail($trail)
Split a link trail, return the "inside" portion and the remainder of the trail as a two-element array...
Definition: Linker.php:1805
getTemplateDom($title)
Get the semi-parsed DOM representation of a template with a given title, and its redirect destination...
Definition: Parser.php:3848
The User object encapsulates all of the user-specific settings (user_id, name, rights, email address, options, last login time).
Definition: User.php:42
static decodeCharReferences($text)
Decode any character references, numeric or named entities, in the text and return a UTF-8 string...
Definition: Sanitizer.php:1463
openList($char)
These next three functions open, continue, and close the list element appropriate to the prefix chara...
Definition: Parser.php:2458
cleanSig($text, $parsing=false)
Clean up signature text.
Definition: Parser.php:4990
wfTimestamp($outputtype=TS_UNIX, $ts=0)
Get a timestamp string in one of various formats.
$wgNoFollowNsExceptions
Namespaces in which $wgNoFollowLinks doesn't apply.
static factory($mode=false, IContextSource $context=null)
Get a new image gallery.
$wgLanguageCode
Site language code.
Custom PHP profiler for parser/DB type section names that xhprof/xdebug can't handle.
static edits()
Definition: SiteStats.php:132
Class for asserting that a callback happens when an dummy object leaves scope.
$wgExtraInterlanguageLinkPrefixes
List of additional interwiki prefixes that should be treated as interlanguage links (i...
startExternalParse(Title $title=null, ParserOptions $options, $outputType, $clearState=true)
Set up some variables which are usually set up in parse() so that an external function can call some ...
Definition: Parser.php:5041
wfCgiToArray($query)
This is the logical opposite of wfArrayToCgi(): it accepts a query string as its argument and returns...
static capturePath(Title $title, IContextSource $context)
Just like executePath() but will override global variables and execute the page in "inclusion" mode...
const NO_TEMPLATES
addTrackingCategory($msg)
Definition: Parser.php:4388
replaceInternalLinks($s)
Process [[ ]] wikilinks.
Definition: Parser.php:2023
$mVarCache
Definition: Parser.php:150
$wgStylePath
The URL path of the skins directory.
disableCache()
Set a flag in the output object indicating that the content is dynamic and shouldn't be cached...
Definition: Parser.php:5759
$mRevisionObject
Definition: Parser.php:218
static normalizeSectionNameWhitespace($section)
Normalizes whitespace in a section name, such as might be returned by Parser::stripSectionName(), for use in the id's that are used for section links.
Definition: Sanitizer.php:1344
internalParse($text, $isMain=true, $frame=false)
Helper function for parse() that transforms wiki markup into half-parsed HTML.
Definition: Parser.php:1222
Title $mTitle
Definition: Parser.php:215
fetchFileNoRegister($title, $options=array())
Helper function for fetchFileAndTitle.
Definition: Parser.php:4090
__destruct()
Reduce memory usage to reduce the impact of circular references.
Definition: Parser.php:285
wfEscapeWikiText($text)
Escapes the given text so that it may be output using addWikiText() without any linking, formatting, etc.
bool $mInParse
Recursive call protection.
Definition: Parser.php:251
Some quick notes on the file repository architecture Functionality is
Definition: README:3
getRevisionTimestamp()
Get the timestamp associated with the current revision, adjusted for the default server-local timesta...
Definition: Parser.php:6045
isExternal()
Is this Title interwiki?
Definition: Title.php:853
namespace and then decline to actually register it file or subcat img or subcat RecentChangesLinked and Watchlist RecentChangesLinked and Watchlist e g Watchlist removed from all revisions and log entries to which it was applied This gives extensions a chance to take it off their books $tag
Definition: hooks.txt:889
static stripOuterParagraph($html)
Strip outer.
Definition: Parser.php:6429
static register($parser)
$mRevIdForTs
Definition: Parser.php:223
design txt This is a brief overview of the new design More thorough and up to date information is available on the documentation wiki at etc Handles the details of getting and saving to the user table of the and dealing with sessions and cookies OutputPage Encapsulates the entire HTML page that will be sent in response to any server request It is used by calling its functions to add in any and then calling but I prefer the flexibility This should also do the output encoding The system allocates a global one in $wgOut Title Represents the title of an and does all the work of translating among various forms such as plain database key
Definition: design.txt:25
static normalizeSubpageLink($contextTitle, $target, &$text)
Definition: Linker.php:1534
parseWidthParam($value)
Parsed a width param of imagelink like 300px or 200x300px.
Definition: Parser.php:6375
$mStripList
Definition: Parser.php:148
$mFunctionTagHooks
Definition: Parser.php:147
fetchScaryTemplateMaybeFromCache($url)
Definition: Parser.php:4128
const OT_PLAIN
Definition: Defines.php:230
$wgNoFollowLinks
If true, external URL links in wiki text will be given the rel="nofollow" attribute as a hint to sear...
fetchCurrentRevisionOfTitle($title)
Fetch the current revision of a given title.
Definition: Parser.php:3891
$mRevisionTimestamp
Definition: Parser.php:220
$mImageParams
Definition: Parser.php:151
getDBkey()
Get the main part with underscores.
Definition: Title.php:952
stripAltText($caption, $holders)
Definition: Parser.php:5736
doAllQuotes($text)
Replace single quotes with HTML markup.
Definition: Parser.php:1569
static normalizeUrlComponent($component, $unsafe)
Definition: Parser.php:1941
if($limit) $timestamp
const VERSION
Update this version number when the ParserOutput format changes in an incompatible way...
Definition: Parser.php:73
wfGetDB($db, $groups=array(), $wiki=false)
Get a Database object.
namespace and then decline to actually register it file or subcat img or subcat RecentChangesLinked and Watchlist RecentChangesLinked and Watchlist e g Watchlist removed from all revisions and log entries to which it was applied This gives extensions a chance to take it off their books as the deletion has already been partly carried out by this point or something similar the user will be unable to create the tag set and then return false from the hook function Ensure you consume the ChangeTagAfterDelete hook to carry out custom deletion actions as context called by AbstractContent::getParserOutput May be used to override the normal model specific rendering of page content as context as context $options
Definition: hooks.txt:975
$mInPre
Definition: Parser.php:186
setHook($tag, $callback)
Create an HTML-style tag, e.g.
Definition: Parser.php:5116
const OT_WIKI
Definition: Defines.php:227
Preprocessor $mPreprocessor
Definition: Parser.php:172
getPreprocessor()
Get a preprocessor object.
Definition: Parser.php:893
This document is intended to provide useful advice for parties seeking to redistribute MediaWiki to end users It s targeted particularly at maintainers for Linux since it s been observed that distribution packages of MediaWiki often break We ve consistently had to recommend that users seeking support use official tarballs instead of their distribution s and this often solves whatever problem the user is having It would be nice if this could such and we might be restricted by PHP settings such as safe mode or open_basedir We cannot assume that the software even has read access anywhere useful Many shared hosts run all users web applications under the same so they can t rely on Unix and must forbid reads to even standard directories like tmp lest users read each others files We cannot assume that the user has the ability to install or run any programs not written as web accessible PHP scripts Since anything that works on cheap shared hosting will work if you have shell or root access MediaWiki s design is based around catering to the lowest common denominator Although we support higher end setups as the way many things work by default is tailored toward shared hosting These defaults are unconventional from the point of view of normal(non-web) applications--they might conflict with distributors'policies
static getInstance($ts=false)
Get a timestamp instance in GMT.
const NS_MEDIA
Definition: Defines.php:57
closeList($char)
Definition: Parser.php:2510
$res
Definition: database.txt:21
static linkKnown($target, $html=null, $customAttribs=array(), $query=array(), $options=array( 'known', 'noclasses'))
Identical to link(), except $options defaults to 'known'.
Definition: Linker.php:263
static singleton()
Get a RepoGroup instance.
Definition: RepoGroup.php:59
replaceVariables($text, $frame=false, $argsOnly=false)
Replace magic variables, templates, and template arguments with the appropriate text.
Definition: Parser.php:3336
const RECOVER_ORIG
wfMatchesDomainList($url, $domains)
Check whether a given URL has a domain that occurs in a given set of domains.
MediaWiki exception.
Definition: MWException.php:26
StripState $mStripState
Definition: Parser.php:184
$mDefaultSort
Definition: Parser.php:194
getUser()
Get a User object either from $this->mUser, if set, or from the ParserOptions object otherwise...
Definition: Parser.php:881
static run($event, array $args=array(), $deprecatedVersion=null)
Call hook functions defined in Hooks::register and $wgHooks.
Definition: Hooks.php:131
wfTimestampNow()
Convenience function; returns MediaWiki timestamp for the present time.
incrementIncludeSize($type, $size)
Increment an include size counter.
Definition: Parser.php:4305
getStripList()
Get a list of strippable XML-like elements.
Definition: Parser.php:992
null means default in associative array with keys and values unescaped Should be merged with default with a value of false meaning to suppress the attribute in associative array with keys and values unescaped noclasses just before the function returns a value If you return an< a > element with HTML attributes $attribs and contents $html will be returned If you return $ret will be returned and may include noclasses after processing after in associative array form externallinks including delete and has completed for all link tables whether this was an auto creation default is conds Array Extra conditions for the No matching items in log is displayed if loglist is empty msgKey Array If you want a nice box with a set this to the key of the message First element is the message additional optional elements are parameters for the key that are processed with wfMessage() -> params() ->parseAsBlock()-offset Set to overwrite offset parameter in $wgRequest set to ''to unsetoffset-wrap String Wrap the message in html(usually something like"&lt
const EXT_IMAGE_REGEX
Definition: Parser.php:95
startParse(Title $title=null, ParserOptions $options, $outputType, $clearState=true)
Definition: Parser.php:5053