MediaWiki  master
Parser.php
Go to the documentation of this file.
1 <?php
67 class Parser {
73  const VERSION = '1.6.4';
74 
80 
81  # Flags for Parser::setFunctionHook
82  const SFH_NO_HASH = 1;
83  const SFH_OBJECT_ARGS = 2;
84 
85  # Constants needed for external link processing
86  # Everything except bracket, space, or control characters
87  # \p{Zs} is unicode 'separator, space' category. It covers the space 0x20
88  # as well as U+3000 is IDEOGRAPHIC SPACE for bug 19052
89  const EXT_LINK_URL_CLASS = '[^][<>"\\x00-\\x20\\x7F\p{Zs}]';
90  # Simplified expression to match an IPv4 or IPv6 address, or
91  # at least one character of a host name (embeds EXT_LINK_URL_CLASS)
92  const EXT_LINK_ADDR = '(?:[0-9.]+|\\[(?i:[0-9a-f:.]+)\\]|[^][<>"\\x00-\\x20\\x7F\p{Zs}])';
93  # RegExp to make image URLs (embeds IPv6 part of EXT_LINK_ADDR)
94  const EXT_IMAGE_REGEX = '/^(http:\/\/|https:\/\/)((?:\\[(?i:[0-9a-f:.]+)\\])?[^][<>"\\x00-\\x20\\x7F\p{Zs}]+)
95  \\/([A-Za-z0-9_.,~%\\-+&;#*?!=()@\\x80-\\xFF]+)\\.((?i)gif|png|jpg|jpeg)$/Sxu';
96 
97  # Regular expression for a non-newline space
98  const SPACE_NOT_NL = '(?:\t|&nbsp;|&\#0*160;|&\#[Xx]0*[Aa]0;|\p{Zs})';
99 
100  # State constants for the definition list colon extraction
101  const COLON_STATE_TEXT = 0;
102  const COLON_STATE_TAG = 1;
109 
110  # Flags for preprocessToDom
111  const PTD_FOR_INCLUSION = 1;
112 
113  # Allowed values for $this->mOutputType
114  # Parameter to startExternalParse().
115  const OT_HTML = 1; # like parse()
116  const OT_WIKI = 2; # like preSaveTransform()
118  const OT_MSG = 3;
119  const OT_PLAIN = 4; # like extractSections() - portions of the original are returned unchanged.
120 
133  const MARKER_SUFFIX = "-QINU\x7f";
134  const MARKER_PREFIX = "\x7fUNIQ-";
135 
136  # Markers used for wrapping the table of contents
137  const TOC_START = '<mw:toc>';
138  const TOC_END = '</mw:toc>';
139 
140  # Persistent:
141  public $mTagHooks = array();
143  public $mFunctionHooks = array();
144  public $mFunctionSynonyms = array( 0 => array(), 1 => array() );
146  public $mStripList = array();
148  public $mVarCache = array();
149  public $mImageParams = array();
151  public $mMarkerIndex = 0;
152  public $mFirstCall = true;
153 
154  # Initialised by initialiseVariables()
155 
159  public $mVariables;
164  public $mSubstWords;
165  # Initialised in constructor
167 
168  # Initialized in getPreprocessor()
171 
172  # Cleared with clearState():
176  public $mOutput;
178 
182  public $mStripState;
188  public $mLinkHolders;
189 
190  public $mLinkID;
192  public $mDefaultSort;
194  public $mExpensiveFunctionCount; # number of expensive parser function calls
200  public $mUser; # User object; only used when doing pre-save transform
202  # Temporary
203  # These are variables reset at least once per parse regardless of $clearState
208  public $mOptions;
209 
213  public $mTitle; # Title context, used for self-link rendering and similar things
214  public $mOutputType; # Output type, one of the OT_xxx constants
215  public $ot; # Shortcut alias, see setOutputType()
216  public $mRevisionObject; # The revision object of the specified revision ID
217  public $mRevisionId; # ID to display in {{REVISIONID}} tags
218  public $mRevisionTimestamp; # The timestamp of the specified revision ID
219  public $mRevisionUser; # User to display in {{REVISIONUSER}} tag
220  public $mRevisionSize; # Size to display in {{REVISIONSIZE}} variable
221  public $mRevIdForTs; # The revision ID which was used to fetch the timestamp
222  public $mInputSize = false; # For {{PAGESIZE}} on current page.
223 
235  public $mLangLinkLanguages;
236 
243  public $currentRevisionCache;
244 
249  public $mInParse = false;
250 
252  protected $mProfiler;
253 
257  public function __construct( $conf = array() ) {
258  $this->mConf = $conf;
259  $this->mUrlProtocols = wfUrlProtocols();
260  $this->mExtLinkBracketedRegex = '/\[(((?i)' . $this->mUrlProtocols . ')' .
261  self::EXT_LINK_ADDR .
262  self::EXT_LINK_URL_CLASS . '*)\p{Zs}*([^\]\\x00-\\x08\\x0a-\\x1F]*?)\]/Su';
263  if ( isset( $conf['preprocessorClass'] ) ) {
264  $this->mPreprocessorClass = $conf['preprocessorClass'];
265  } elseif ( defined( 'HPHP_VERSION' ) ) {
266  # Preprocessor_Hash is much faster than Preprocessor_DOM under HipHop
267  $this->mPreprocessorClass = 'Preprocessor_Hash';
268  } elseif ( extension_loaded( 'domxml' ) ) {
269  # PECL extension that conflicts with the core DOM extension (bug 13770)
270  wfDebug( "Warning: you have the obsolete domxml extension for PHP. Please remove it!\n" );
271  $this->mPreprocessorClass = 'Preprocessor_Hash';
272  } elseif ( extension_loaded( 'dom' ) ) {
273  $this->mPreprocessorClass = 'Preprocessor_DOM';
274  } else {
275  $this->mPreprocessorClass = 'Preprocessor_Hash';
276  }
277  wfDebug( __CLASS__ . ": using preprocessor: {$this->mPreprocessorClass}\n" );
278  }
279 
283  public function __destruct() {
284  if ( isset( $this->mLinkHolders ) ) {
285  unset( $this->mLinkHolders );
286  }
287  foreach ( $this as $name => $value ) {
288  unset( $this->$name );
289  }
290  }
291 
295  public function __clone() {
296  $this->mInParse = false;
297 
298  // Bug 56226: When you create a reference "to" an object field, that
299  // makes the object field itself be a reference too (until the other
300  // reference goes out of scope). When cloning, any field that's a
301  // reference is copied as a reference in the new object. Both of these
302  // are defined PHP5 behaviors, as inconvenient as it is for us when old
303  // hooks from PHP4 days are passing fields by reference.
304  foreach ( array( 'mStripState', 'mVarCache' ) as $k ) {
305  // Make a non-reference copy of the field, then rebind the field to
306  // reference the new copy.
307  $tmp = $this->$k;
308  $this->$k =& $tmp;
309  unset( $tmp );
310  }
311 
312  Hooks::run( 'ParserCloned', array( $this ) );
313  }
314 
318  public function firstCallInit() {
319  if ( !$this->mFirstCall ) {
320  return;
321  }
322  $this->mFirstCall = false;
323 
325  CoreTagHooks::register( $this );
326  $this->initialiseVariables();
327 
328  Hooks::run( 'ParserFirstCallInit', array( &$this ) );
329  }
330 
336  public function clearState() {
337  if ( $this->mFirstCall ) {
338  $this->firstCallInit();
339  }
340  $this->mOutput = new ParserOutput;
341  $this->mOptions->registerWatcher( array( $this->mOutput, 'recordOption' ) );
342  $this->mAutonumber = 0;
343  $this->mLastSection = '';
344  $this->mDTopen = false;
345  $this->mIncludeCount = array();
346  $this->mArgStack = false;
347  $this->mInPre = false;
348  $this->mLinkHolders = new LinkHolderArray( $this );
349  $this->mLinkID = 0;
350  $this->mRevisionObject = $this->mRevisionTimestamp =
351  $this->mRevisionId = $this->mRevisionUser = $this->mRevisionSize = null;
352  $this->mVarCache = array();
353  $this->mUser = null;
354  $this->mLangLinkLanguages = array();
355  $this->currentRevisionCache = null;
356 
357  $this->mStripState = new StripState;
358 
359  # Clear these on every parse, bug 4549
360  $this->mTplRedirCache = $this->mTplDomCache = array();
361 
362  $this->mShowToc = true;
363  $this->mForceTocPosition = false;
364  $this->mIncludeSizes = array(
365  'post-expand' => 0,
366  'arg' => 0,
367  );
368  $this->mPPNodeCount = 0;
369  $this->mGeneratedPPNodeCount = 0;
370  $this->mHighestExpansionDepth = 0;
371  $this->mDefaultSort = false;
372  $this->mHeadings = array();
373  $this->mDoubleUnderscores = array();
374  $this->mExpensiveFunctionCount = 0;
376  # Fix cloning
377  if ( isset( $this->mPreprocessor ) && $this->mPreprocessor->parser !== $this ) {
378  $this->mPreprocessor = null;
379  }
380 
381  $this->mProfiler = new SectionProfiler();
382 
383  Hooks::run( 'ParserClearState', array( &$this ) );
384  }
385 
398  public function parse( $text, Title $title, ParserOptions $options,
399  $linestart = true, $clearState = true, $revid = null
400  ) {
407 
408  if ( $clearState ) {
409  // We use U+007F DELETE to construct strip markers, so we have to make
410  // sure that this character does not occur in the input text.
411  $text = strtr( $text, "\x7f", "?" );
412  $magicScopeVariable = $this->lock();
413  }
414 
415  $this->startParse( $title, $options, self::OT_HTML, $clearState );
416 
417  $this->currentRevisionCache = null;
418  $this->mInputSize = strlen( $text );
419  if ( $this->mOptions->getEnableLimitReport() ) {
420  $this->mOutput->resetParseStartTime();
421  }
422 
423  $oldRevisionId = $this->mRevisionId;
424  $oldRevisionObject = $this->mRevisionObject;
425  $oldRevisionTimestamp = $this->mRevisionTimestamp;
426  $oldRevisionUser = $this->mRevisionUser;
427  $oldRevisionSize = $this->mRevisionSize;
428  if ( $revid !== null ) {
429  $this->mRevisionId = $revid;
430  $this->mRevisionObject = null;
431  $this->mRevisionTimestamp = null;
432  $this->mRevisionUser = null;
433  $this->mRevisionSize = null;
434  }
435 
436  Hooks::run( 'ParserBeforeStrip', array( &$this, &$text, &$this->mStripState ) );
437  # No more strip!
438  Hooks::run( 'ParserAfterStrip', array( &$this, &$text, &$this->mStripState ) );
439  $text = $this->internalParse( $text );
440  Hooks::run( 'ParserAfterParse', array( &$this, &$text, &$this->mStripState ) );
441 
442  $text = $this->internalParseHalfParsed( $text, true, $linestart );
443 
451  if ( !( $options->getDisableTitleConversion()
452  || isset( $this->mDoubleUnderscores['nocontentconvert'] )
453  || isset( $this->mDoubleUnderscores['notitleconvert'] )
454  || $this->mOutput->getDisplayTitle() !== false )
455  ) {
456  $convruletitle = $this->getConverterLanguage()->getConvRuleTitle();
457  if ( $convruletitle ) {
458  $this->mOutput->setTitleText( $convruletitle );
459  } else {
460  $titleText = $this->getConverterLanguage()->convertTitle( $title );
461  $this->mOutput->setTitleText( $titleText );
462  }
463  }
464 
465  if ( $this->mExpensiveFunctionCount > $this->mOptions->getExpensiveParserFunctionLimit() ) {
466  $this->limitationWarn( 'expensive-parserfunction',
467  $this->mExpensiveFunctionCount,
468  $this->mOptions->getExpensiveParserFunctionLimit()
469  );
470  }
471 
472  # Information on include size limits, for the benefit of users who try to skirt them
473  if ( $this->mOptions->getEnableLimitReport() ) {
474  $max = $this->mOptions->getMaxIncludeSize();
475 
476  $cpuTime = $this->mOutput->getTimeSinceStart( 'cpu' );
477  if ( $cpuTime !== null ) {
478  $this->mOutput->setLimitReportData( 'limitreport-cputime',
479  sprintf( "%.3f", $cpuTime )
480  );
481  }
482 
483  $wallTime = $this->mOutput->getTimeSinceStart( 'wall' );
484  $this->mOutput->setLimitReportData( 'limitreport-walltime',
485  sprintf( "%.3f", $wallTime )
486  );
487 
488  $this->mOutput->setLimitReportData( 'limitreport-ppvisitednodes',
489  array( $this->mPPNodeCount, $this->mOptions->getMaxPPNodeCount() )
490  );
491  $this->mOutput->setLimitReportData( 'limitreport-ppgeneratednodes',
492  array( $this->mGeneratedPPNodeCount, $this->mOptions->getMaxGeneratedPPNodeCount() )
493  );
494  $this->mOutput->setLimitReportData( 'limitreport-postexpandincludesize',
495  array( $this->mIncludeSizes['post-expand'], $max )
496  );
497  $this->mOutput->setLimitReportData( 'limitreport-templateargumentsize',
498  array( $this->mIncludeSizes['arg'], $max )
499  );
500  $this->mOutput->setLimitReportData( 'limitreport-expansiondepth',
501  array( $this->mHighestExpansionDepth, $this->mOptions->getMaxPPExpandDepth() )
502  );
503  $this->mOutput->setLimitReportData( 'limitreport-expensivefunctioncount',
504  array( $this->mExpensiveFunctionCount, $this->mOptions->getExpensiveParserFunctionLimit() )
505  );
506  Hooks::run( 'ParserLimitReportPrepare', array( $this, $this->mOutput ) );
507 
508  $limitReport = "NewPP limit report\n";
509  if ( $wgShowHostnames ) {
510  $limitReport .= 'Parsed by ' . wfHostname() . "\n";
511  }
512  $limitReport .= 'Cached time: ' . $this->mOutput->getCacheTime() . "\n";
513  $limitReport .= 'Cache expiry: ' . $this->mOutput->getCacheExpiry() . "\n";
514  $limitReport .= 'Dynamic content: ' . ( $this->mOutput->hasDynamicContent() ? 'true' : 'false' ) . "\n";
515  foreach ( $this->mOutput->getLimitReportData() as $key => $value ) {
516  if ( Hooks::run( 'ParserLimitReportFormat',
517  array( $key, &$value, &$limitReport, false, false )
518  ) ) {
519  $keyMsg = wfMessage( $key )->inLanguage( 'en' )->useDatabase( false );
520  $valueMsg = wfMessage( array( "$key-value-text", "$key-value" ) )
521  ->inLanguage( 'en' )->useDatabase( false );
522  if ( !$valueMsg->exists() ) {
523  $valueMsg = new RawMessage( '$1' );
524  }
525  if ( !$keyMsg->isDisabled() && !$valueMsg->isDisabled() ) {
526  $valueMsg->params( $value );
527  $limitReport .= "{$keyMsg->text()}: {$valueMsg->text()}\n";
528  }
529  }
530  }
531  // Since we're not really outputting HTML, decode the entities and
532  // then re-encode the things that need hiding inside HTML comments.
533  $limitReport = htmlspecialchars_decode( $limitReport );
534  Hooks::run( 'ParserLimitReport', array( $this, &$limitReport ) );
535 
536  // Sanitize for comment. Note '‐' in the replacement is U+2010,
537  // which looks much like the problematic '-'.
538  $limitReport = str_replace( array( '-', '&' ), array( '‐', '&amp;' ), $limitReport );
539  $text .= "\n<!-- \n$limitReport-->\n";
540 
541  // Add on template profiling data
542  $dataByFunc = $this->mProfiler->getFunctionStats();
543  uasort( $dataByFunc, function ( $a, $b ) {
544  return $a['real'] < $b['real']; // descending order
545  } );
546  $profileReport = "Transclusion expansion time report (%,ms,calls,template)\n";
547  foreach ( array_slice( $dataByFunc, 0, 10 ) as $item ) {
548  $profileReport .= sprintf( "%6.2f%% %8.3f %6d - %s\n",
549  $item['%real'], $item['real'], $item['calls'],
550  htmlspecialchars( $item['name'] ) );
551  }
552  $text .= "\n<!-- \n$profileReport-->\n";
553 
554  if ( $this->mGeneratedPPNodeCount > $this->mOptions->getMaxGeneratedPPNodeCount() / 10 ) {
555  wfDebugLog( 'generated-pp-node-count', $this->mGeneratedPPNodeCount . ' ' .
556  $this->mTitle->getPrefixedDBkey() );
557  }
558  }
559  $this->mOutput->setText( $text );
560 
561  $this->mRevisionId = $oldRevisionId;
562  $this->mRevisionObject = $oldRevisionObject;
563  $this->mRevisionTimestamp = $oldRevisionTimestamp;
564  $this->mRevisionUser = $oldRevisionUser;
565  $this->mRevisionSize = $oldRevisionSize;
566  $this->mInputSize = false;
567  $this->currentRevisionCache = null;
568 
569  return $this->mOutput;
570  }
594  public function recursiveTagParse( $text, $frame = false ) {
595  Hooks::run( 'ParserBeforeStrip', array( &$this, &$text, &$this->mStripState ) );
596  Hooks::run( 'ParserAfterStrip', array( &$this, &$text, &$this->mStripState ) );
597  $text = $this->internalParse( $text, false, $frame );
598  return $text;
599  }
600 
618  public function recursiveTagParseFully( $text, $frame = false ) {
619  $text = $this->recursiveTagParse( $text, $frame );
620  $text = $this->internalParseHalfParsed( $text, false );
621  return $text;
622  }
623 
635  public function preprocess( $text, Title $title = null,
636  ParserOptions $options, $revid = null, $frame = false
637  ) {
638  $magicScopeVariable = $this->lock();
639  $this->startParse( $title, $options, self::OT_PREPROCESS, true );
640  if ( $revid !== null ) {
641  $this->mRevisionId = $revid;
642  }
643  Hooks::run( 'ParserBeforeStrip', array( &$this, &$text, &$this->mStripState ) );
644  Hooks::run( 'ParserAfterStrip', array( &$this, &$text, &$this->mStripState ) );
645  $text = $this->replaceVariables( $text, $frame );
646  $text = $this->mStripState->unstripBoth( $text );
647  return $text;
648  }
649 
659  public function recursivePreprocess( $text, $frame = false ) {
660  $text = $this->replaceVariables( $text, $frame );
661  $text = $this->mStripState->unstripBoth( $text );
662  return $text;
663  }
664 
678  public function getPreloadText( $text, Title $title, ParserOptions $options, $params = array() ) {
679  $msg = new RawMessage( $text );
680  $text = $msg->params( $params )->plain();
681 
682  # Parser (re)initialisation
683  $magicScopeVariable = $this->lock();
684  $this->startParse( $title, $options, self::OT_PLAIN, true );
685 
687  $dom = $this->preprocessToDom( $text, self::PTD_FOR_INCLUSION );
688  $text = $this->getPreprocessor()->newFrame()->expand( $dom, $flags );
689  $text = $this->mStripState->unstripBoth( $text );
690  return $text;
691  }
692 
699  public static function getRandomString() {
700  wfDeprecated( __METHOD__, '1.26' );
701  return wfRandomString( 16 );
702  }
703 
710  public function setUser( $user ) {
711  $this->mUser = $user;
712  }
713 
720  public function uniqPrefix() {
721  wfDeprecated( __METHOD__, '1.26' );
722  return self::MARKER_PREFIX;
723  }
724 
730  public function setTitle( $t ) {
731  if ( !$t ) {
732  $t = Title::newFromText( 'NO TITLE' );
733  }
734 
735  if ( $t->hasFragment() ) {
736  # Strip the fragment to avoid various odd effects
737  $this->mTitle = clone $t;
738  $this->mTitle->setFragment( '' );
739  } else {
740  $this->mTitle = $t;
741  }
742  }
743 
749  public function getTitle() {
750  return $this->mTitle;
751  }
752 
759  public function Title( $x = null ) {
760  return wfSetVar( $this->mTitle, $x );
761  }
768  public function setOutputType( $ot ) {
769  $this->mOutputType = $ot;
770  # Shortcut alias
771  $this->ot = array(
772  'html' => $ot == self::OT_HTML,
773  'wiki' => $ot == self::OT_WIKI,
774  'pre' => $ot == self::OT_PREPROCESS,
775  'plain' => $ot == self::OT_PLAIN,
776  );
777  }
778 
785  public function OutputType( $x = null ) {
786  return wfSetVar( $this->mOutputType, $x );
787  }
788 
794  public function getOutput() {
795  return $this->mOutput;
796  }
803  public function getOptions() {
805  }
806 
813  public function Options( $x = null ) {
814  return wfSetVar( $this->mOptions, $x );
815  }
816 
820  public function nextLinkID() {
821  return $this->mLinkID++;
822  }
823 
827  public function setLinkID( $id ) {
828  $this->mLinkID = $id;
829  }
830 
835  public function getFunctionLang() {
836  return $this->getTargetLanguage();
837  }
838 
848  public function getTargetLanguage() {
849  $target = $this->mOptions->getTargetLanguage();
850 
851  if ( $target !== null ) {
852  return $target;
853  } elseif ( $this->mOptions->getInterfaceMessage() ) {
854  return $this->mOptions->getUserLangObj();
855  } elseif ( is_null( $this->mTitle ) ) {
856  throw new MWException( __METHOD__ . ': $this->mTitle is null' );
857  }
858 
859  return $this->mTitle->getPageLanguage();
860  }
861 
866  public function getConverterLanguage() {
867  return $this->getTargetLanguage();
868  }
869 
876  public function getUser() {
877  if ( !is_null( $this->mUser ) ) {
878  return $this->mUser;
879  }
880  return $this->mOptions->getUser();
881  }
882 
888  public function getPreprocessor() {
889  if ( !isset( $this->mPreprocessor ) ) {
890  $class = $this->mPreprocessorClass;
891  $this->mPreprocessor = new $class( $this );
892  }
893  return $this->mPreprocessor;
894  }
895 
917  public static function extractTagsAndParams( $elements, $text, &$matches, $uniq_prefix = null ) {
918  if ( $uniq_prefix !== null ) {
919  wfDeprecated( __METHOD__ . ' called with $prefix argument', '1.26' );
920  }
921  static $n = 1;
922  $stripped = '';
923  $matches = array();
924 
925  $taglist = implode( '|', $elements );
926  $start = "/<($taglist)(\\s+[^>]*?|\\s*?)(\/?" . ">)|<(!--)/i";
927 
928  while ( $text != '' ) {
929  $p = preg_split( $start, $text, 2, PREG_SPLIT_DELIM_CAPTURE );
930  $stripped .= $p[0];
931  if ( count( $p ) < 5 ) {
932  break;
933  }
934  if ( count( $p ) > 5 ) {
935  # comment
936  $element = $p[4];
937  $attributes = '';
938  $close = '';
939  $inside = $p[5];
940  } else {
941  # tag
942  $element = $p[1];
943  $attributes = $p[2];
944  $close = $p[3];
945  $inside = $p[4];
946  }
947 
948  $marker = self::MARKER_PREFIX . "-$element-" . sprintf( '%08X', $n++ ) . self::MARKER_SUFFIX;
949  $stripped .= $marker;
950 
951  if ( $close === '/>' ) {
952  # Empty element tag, <tag />
953  $content = null;
954  $text = $inside;
955  $tail = null;
956  } else {
957  if ( $element === '!--' ) {
958  $end = '/(-->)/';
959  } else {
960  $end = "/(<\\/$element\\s*>)/i";
961  }
962  $q = preg_split( $end, $inside, 2, PREG_SPLIT_DELIM_CAPTURE );
963  $content = $q[0];
964  if ( count( $q ) < 3 ) {
965  # No end tag -- let it run out to the end of the text.
966  $tail = '';
967  $text = '';
968  } else {
969  $tail = $q[1];
970  $text = $q[2];
971  }
972  }
973 
974  $matches[$marker] = array( $element,
975  $content,
976  Sanitizer::decodeTagAttributes( $attributes ),
977  "<$element$attributes$close$content$tail" );
978  }
979  return $stripped;
980  }
981 
987  public function getStripList() {
988  return $this->mStripList;
989  }
990 
1000  public function insertStripItem( $text ) {
1001  $marker = self::MARKER_PREFIX . "-item-{$this->mMarkerIndex}-" . self::MARKER_SUFFIX;
1002  $this->mMarkerIndex++;
1003  $this->mStripState->addGeneral( $marker, $text );
1004  return $marker;
1005  }
1006 
1014  public function doTableStuff( $text ) {
1015 
1016  $lines = StringUtils::explode( "\n", $text );
1017  $out = '';
1018  $td_history = array(); # Is currently a td tag open?
1019  $last_tag_history = array(); # Save history of last lag activated (td, th or caption)
1020  $tr_history = array(); # Is currently a tr tag open?
1021  $tr_attributes = array(); # history of tr attributes
1022  $has_opened_tr = array(); # Did this table open a <tr> element?
1023  $indent_level = 0; # indent level of the table
1024 
1025  foreach ( $lines as $outLine ) {
1026  $line = trim( $outLine );
1027 
1028  if ( $line === '' ) { # empty line, go to next line
1029  $out .= $outLine . "\n";
1030  continue;
1031  }
1032 
1033  $first_character = $line[0];
1034  $first_two = substr( $line, 0, 2 );
1035  $matches = array();
1036 
1037  if ( preg_match( '/^(:*)\s*\{\|(.*)$/', $line, $matches ) ) {
1038  # First check if we are starting a new table
1039  $indent_level = strlen( $matches[1] );
1040 
1041  $attributes = $this->mStripState->unstripBoth( $matches[2] );
1042  $attributes = Sanitizer::fixTagAttributes( $attributes, 'table' );
1043 
1044  $outLine = str_repeat( '<dl><dd>', $indent_level ) . "<table{$attributes}>";
1045  array_push( $td_history, false );
1046  array_push( $last_tag_history, '' );
1047  array_push( $tr_history, false );
1048  array_push( $tr_attributes, '' );
1049  array_push( $has_opened_tr, false );
1050  } elseif ( count( $td_history ) == 0 ) {
1051  # Don't do any of the following
1052  $out .= $outLine . "\n";
1053  continue;
1054  } elseif ( $first_two === '|}' ) {
1055  # We are ending a table
1056  $line = '</table>' . substr( $line, 2 );
1057  $last_tag = array_pop( $last_tag_history );
1058 
1059  if ( !array_pop( $has_opened_tr ) ) {
1060  $line = "<tr><td></td></tr>{$line}";
1061  }
1062 
1063  if ( array_pop( $tr_history ) ) {
1064  $line = "</tr>{$line}";
1065  }
1066 
1067  if ( array_pop( $td_history ) ) {
1068  $line = "</{$last_tag}>{$line}";
1069  }
1070  array_pop( $tr_attributes );
1071  $outLine = $line . str_repeat( '</dd></dl>', $indent_level );
1072  } elseif ( $first_two === '|-' ) {
1073  # Now we have a table row
1074  $line = preg_replace( '#^\|-+#', '', $line );
1075 
1076  # Whats after the tag is now only attributes
1077  $attributes = $this->mStripState->unstripBoth( $line );
1078  $attributes = Sanitizer::fixTagAttributes( $attributes, 'tr' );
1079  array_pop( $tr_attributes );
1080  array_push( $tr_attributes, $attributes );
1081 
1082  $line = '';
1083  $last_tag = array_pop( $last_tag_history );
1084  array_pop( $has_opened_tr );
1085  array_push( $has_opened_tr, true );
1086 
1087  if ( array_pop( $tr_history ) ) {
1088  $line = '</tr>';
1089  }
1090 
1091  if ( array_pop( $td_history ) ) {
1092  $line = "</{$last_tag}>{$line}";
1093  }
1094 
1095  $outLine = $line;
1096  array_push( $tr_history, false );
1097  array_push( $td_history, false );
1098  array_push( $last_tag_history, '' );
1099  } elseif ( $first_character === '|'
1100  || $first_character === '!'
1101  || $first_two === '|+'
1102  ) {
1103  # This might be cell elements, td, th or captions
1104  if ( $first_two === '|+' ) {
1105  $first_character = '+';
1106  $line = substr( $line, 2 );
1107  } else {
1108  $line = substr( $line, 1 );
1109  }
1110 
1111  if ( $first_character === '!' ) {
1112  $line = str_replace( '!!', '||', $line );
1113  }
1114 
1115  # Split up multiple cells on the same line.
1116  # FIXME : This can result in improper nesting of tags processed
1117  # by earlier parser steps, but should avoid splitting up eg
1118  # attribute values containing literal "||".
1119  $cells = StringUtils::explodeMarkup( '||', $line );
1120 
1121  $outLine = '';
1122 
1123  # Loop through each table cell
1124  foreach ( $cells as $cell ) {
1125  $previous = '';
1126  if ( $first_character !== '+' ) {
1127  $tr_after = array_pop( $tr_attributes );
1128  if ( !array_pop( $tr_history ) ) {
1129  $previous = "<tr{$tr_after}>\n";
1130  }
1131  array_push( $tr_history, true );
1132  array_push( $tr_attributes, '' );
1133  array_pop( $has_opened_tr );
1134  array_push( $has_opened_tr, true );
1135  }
1136 
1137  $last_tag = array_pop( $last_tag_history );
1138 
1139  if ( array_pop( $td_history ) ) {
1140  $previous = "</{$last_tag}>\n{$previous}";
1141  }
1142 
1143  if ( $first_character === '|' ) {
1144  $last_tag = 'td';
1145  } elseif ( $first_character === '!' ) {
1146  $last_tag = 'th';
1147  } elseif ( $first_character === '+' ) {
1148  $last_tag = 'caption';
1149  } else {
1150  $last_tag = '';
1151  }
1152 
1153  array_push( $last_tag_history, $last_tag );
1154 
1155  # A cell could contain both parameters and data
1156  $cell_data = explode( '|', $cell, 2 );
1157 
1158  # Bug 553: Note that a '|' inside an invalid link should not
1159  # be mistaken as delimiting cell parameters
1160  if ( strpos( $cell_data[0], '[[' ) !== false ) {
1161  $cell = "{$previous}<{$last_tag}>{$cell}";
1162  } elseif ( count( $cell_data ) == 1 ) {
1163  $cell = "{$previous}<{$last_tag}>{$cell_data[0]}";
1164  } else {
1165  $attributes = $this->mStripState->unstripBoth( $cell_data[0] );
1166  $attributes = Sanitizer::fixTagAttributes( $attributes, $last_tag );
1167  $cell = "{$previous}<{$last_tag}{$attributes}>{$cell_data[1]}";
1168  }
1169 
1170  $outLine .= $cell;
1171  array_push( $td_history, true );
1172  }
1173  }
1174  $out .= $outLine . "\n";
1175  }
1176 
1177  # Closing open td, tr && table
1178  while ( count( $td_history ) > 0 ) {
1179  if ( array_pop( $td_history ) ) {
1180  $out .= "</td>\n";
1181  }
1182  if ( array_pop( $tr_history ) ) {
1183  $out .= "</tr>\n";
1184  }
1185  if ( !array_pop( $has_opened_tr ) ) {
1186  $out .= "<tr><td></td></tr>\n";
1187  }
1188 
1189  $out .= "</table>\n";
1190  }
1191 
1192  # Remove trailing line-ending (b/c)
1193  if ( substr( $out, -1 ) === "\n" ) {
1194  $out = substr( $out, 0, -1 );
1195  }
1196 
1197  # special case: don't return empty table
1198  if ( $out === "<table>\n<tr><td></td></tr>\n</table>" ) {
1199  $out = '';
1200  }
1201 
1202  return $out;
1203  }
1204 
1217  public function internalParse( $text, $isMain = true, $frame = false ) {
1218 
1219  $origText = $text;
1220 
1221  # Hook to suspend the parser in this state
1222  if ( !Hooks::run( 'ParserBeforeInternalParse', array( &$this, &$text, &$this->mStripState ) ) ) {
1223  return $text;
1224  }
1225 
1226  # if $frame is provided, then use $frame for replacing any variables
1227  if ( $frame ) {
1228  # use frame depth to infer how include/noinclude tags should be handled
1229  # depth=0 means this is the top-level document; otherwise it's an included document
1230  if ( !$frame->depth ) {
1231  $flag = 0;
1232  } else {
1233  $flag = Parser::PTD_FOR_INCLUSION;
1234  }
1235  $dom = $this->preprocessToDom( $text, $flag );
1236  $text = $frame->expand( $dom );
1237  } else {
1238  # if $frame is not provided, then use old-style replaceVariables
1239  $text = $this->replaceVariables( $text );
1240  }
1241 
1242  Hooks::run( 'InternalParseBeforeSanitize', array( &$this, &$text, &$this->mStripState ) );
1243  $text = Sanitizer::removeHTMLtags(
1244  $text,
1245  array( &$this, 'attributeStripCallback' ),
1246  false,
1247  array_keys( $this->mTransparentTagHooks )
1248  );
1249  Hooks::run( 'InternalParseBeforeLinks', array( &$this, &$text, &$this->mStripState ) );
1250 
1251  # Tables need to come after variable replacement for things to work
1252  # properly; putting them before other transformations should keep
1253  # exciting things like link expansions from showing up in surprising
1254  # places.
1255  $text = $this->doTableStuff( $text );
1256 
1257  $text = preg_replace( '/(^|\n)-----*/', '\\1<hr />', $text );
1258 
1259  $text = $this->doDoubleUnderscore( $text );
1260 
1261  $text = $this->doHeadings( $text );
1262  $text = $this->replaceInternalLinks( $text );
1263  $text = $this->doAllQuotes( $text );
1264  $text = $this->replaceExternalLinks( $text );
1265 
1266  # replaceInternalLinks may sometimes leave behind
1267  # absolute URLs, which have to be masked to hide them from replaceExternalLinks
1268  $text = str_replace( self::MARKER_PREFIX . 'NOPARSE', '', $text );
1269 
1270  $text = $this->doMagicLinks( $text );
1271  $text = $this->formatHeadings( $text, $origText, $isMain );
1272 
1273  return $text;
1274  }
1275 
1285  private function internalParseHalfParsed( $text, $isMain = true, $linestart = true ) {
1287 
1288  $text = $this->mStripState->unstripGeneral( $text );
1289 
1290  if ( $isMain ) {
1291  Hooks::run( 'ParserAfterUnstrip', array( &$this, &$text ) );
1292  }
1293 
1294  # Clean up special characters, only run once, next-to-last before doBlockLevels
1295  $fixtags = array(
1296  # french spaces, last one Guillemet-left
1297  # only if there is something before the space
1298  '/(.) (?=\\?|:|;|!|%|\\302\\273)/' => '\\1&#160;',
1299  # french spaces, Guillemet-right
1300  '/(\\302\\253) /' => '\\1&#160;',
1301  '/&#160;(!\s*important)/' => ' \\1', # Beware of CSS magic word !important, bug #11874.
1302  );
1303  $text = preg_replace( array_keys( $fixtags ), array_values( $fixtags ), $text );
1304 
1305  $text = $this->doBlockLevels( $text, $linestart );
1306 
1307  $this->replaceLinkHolders( $text );
1308 
1316  if ( !( $this->mOptions->getDisableContentConversion()
1317  || isset( $this->mDoubleUnderscores['nocontentconvert'] ) )
1318  ) {
1319  if ( !$this->mOptions->getInterfaceMessage() ) {
1320  # The position of the convert() call should not be changed. it
1321  # assumes that the links are all replaced and the only thing left
1322  # is the <nowiki> mark.
1323  $text = $this->getConverterLanguage()->convert( $text );
1324  }
1325  }
1326 
1327  $text = $this->mStripState->unstripNoWiki( $text );
1328 
1329  if ( $isMain ) {
1330  Hooks::run( 'ParserBeforeTidy', array( &$this, &$text ) );
1331  }
1332 
1333  $text = $this->replaceTransparentTags( $text );
1334  $text = $this->mStripState->unstripGeneral( $text );
1335 
1336  $text = Sanitizer::normalizeCharReferences( $text );
1337 
1338  if ( ( $wgUseTidy && $this->mOptions->getTidy() ) || $wgAlwaysUseTidy ) {
1339  $text = MWTidy::tidy( $text );
1340  } else {
1341  # attempt to sanitize at least some nesting problems
1342  # (bug #2702 and quite a few others)
1343  $tidyregs = array(
1344  # ''Something [http://www.cool.com cool''] -->
1345  # <i>Something</i><a href="http://www.cool.com"..><i>cool></i></a>
1346  '/(<([bi])>)(<([bi])>)?([^<]*)(<\/?a[^<]*>)([^<]*)(<\/\\4>)?(<\/\\2>)/' =>
1347  '\\1\\3\\5\\8\\9\\6\\1\\3\\7\\8\\9',
1348  # fix up an anchor inside another anchor, only
1349  # at least for a single single nested link (bug 3695)
1350  '/(<a[^>]+>)([^<]*)(<a[^>]+>[^<]*)<\/a>(.*)<\/a>/' =>
1351  '\\1\\2</a>\\3</a>\\1\\4</a>',
1352  # fix div inside inline elements- doBlockLevels won't wrap a line which
1353  # contains a div, so fix it up here; replace
1354  # div with escaped text
1355  '/(<([aib]) [^>]+>)([^<]*)(<div([^>]*)>)(.*)(<\/div>)([^<]*)(<\/\\2>)/' =>
1356  '\\1\\3&lt;div\\5&gt;\\6&lt;/div&gt;\\8\\9',
1357  # remove empty italic or bold tag pairs, some
1358  # introduced by rules above
1359  '/<([bi])><\/\\1>/' => '',
1360  );
1361 
1362  $text = preg_replace(
1363  array_keys( $tidyregs ),
1364  array_values( $tidyregs ),
1365  $text );
1366  }
1367 
1368  if ( $isMain ) {
1369  Hooks::run( 'ParserAfterTidy', array( &$this, &$text ) );
1370  }
1371 
1372  return $text;
1373  }
1374 
1386  public function doMagicLinks( $text ) {
1387  $prots = wfUrlProtocolsWithoutProtRel();
1388  $urlChar = self::EXT_LINK_URL_CLASS;
1389  $addr = self::EXT_LINK_ADDR;
1390  $space = self::SPACE_NOT_NL; # non-newline space
1391  $spdash = "(?:-|$space)"; # a dash or a non-newline space
1392  $spaces = "$space++"; # possessive match of 1 or more spaces
1393  $text = preg_replace_callback(
1394  '!(?: # Start cases
1395  (<a[ \t\r\n>].*?</a>) | # m[1]: Skip link text
1396  (<.*?>) | # m[2]: Skip stuff inside
1397  # HTML elements' . "
1398  (\b(?i:$prots)($addr$urlChar*)) | # m[3]: Free external links
1399  # m[4]: Post-protocol path
1400  \b(?:RFC|PMID) $spaces # m[5]: RFC or PMID, capture number
1401  ([0-9]+)\b |
1402  \bISBN $spaces ( # m[6]: ISBN, capture number
1403  (?: 97[89] $spdash? )? # optional 13-digit ISBN prefix
1404  (?: [0-9] $spdash? ){9} # 9 digits with opt. delimiters
1405  [0-9Xx] # check digit
1406  )\b
1407  )!xu", array( &$this, 'magicLinkCallback' ), $text );
1408  return $text;
1409  }
1410 
1416  public function magicLinkCallback( $m ) {
1417  if ( isset( $m[1] ) && $m[1] !== '' ) {
1418  # Skip anchor
1419  return $m[0];
1420  } elseif ( isset( $m[2] ) && $m[2] !== '' ) {
1421  # Skip HTML element
1422  return $m[0];
1423  } elseif ( isset( $m[3] ) && $m[3] !== '' ) {
1424  # Free external link
1425  return $this->makeFreeExternalLink( $m[0], strlen( $m[4] ) );
1426  } elseif ( isset( $m[5] ) && $m[5] !== '' ) {
1427  # RFC or PMID
1428  if ( substr( $m[0], 0, 3 ) === 'RFC' ) {
1429  $keyword = 'RFC';
1430  $urlmsg = 'rfcurl';
1431  $cssClass = 'mw-magiclink-rfc';
1432  $id = $m[5];
1433  } elseif ( substr( $m[0], 0, 4 ) === 'PMID' ) {
1434  $keyword = 'PMID';
1435  $urlmsg = 'pubmedurl';
1436  $cssClass = 'mw-magiclink-pmid';
1437  $id = $m[5];
1438  } else {
1439  throw new MWException( __METHOD__ . ': unrecognised match type "' .
1440  substr( $m[0], 0, 20 ) . '"' );
1441  }
1442  $url = wfMessage( $urlmsg, $id )->inContentLanguage()->text();
1443  return Linker::makeExternalLink( $url, "{$keyword} {$id}", true, $cssClass );
1444  } elseif ( isset( $m[6] ) && $m[6] !== '' ) {
1445  # ISBN
1446  $isbn = $m[6];
1447  $space = self::SPACE_NOT_NL; # non-newline space
1448  $isbn = preg_replace( "/$space/", ' ', $isbn );
1449  $num = strtr( $isbn, array(
1450  '-' => '',
1451  ' ' => '',
1452  'x' => 'X',
1453  ) );
1454  $titleObj = SpecialPage::getTitleFor( 'Booksources', $num );
1455  return '<a href="' .
1456  htmlspecialchars( $titleObj->getLocalURL() ) .
1457  "\" class=\"internal mw-magiclink-isbn\">ISBN $isbn</a>";
1458  } else {
1459  return $m[0];
1460  }
1461  }
1462 
1472  public function makeFreeExternalLink( $url, $numPostProto ) {
1473 
1474  $trail = '';
1475 
1476  # The characters '<' and '>' (which were escaped by
1477  # removeHTMLtags()) should not be included in
1478  # URLs, per RFC 2396.
1479  $m2 = array();
1480  if ( preg_match( '/&(lt|gt);/', $url, $m2, PREG_OFFSET_CAPTURE ) ) {
1481  $trail = substr( $url, $m2[0][1] ) . $trail;
1482  $url = substr( $url, 0, $m2[0][1] );
1483  }
1484 
1485  # Move trailing punctuation to $trail
1486  $sep = ',;\.:!?';
1487  # If there is no left bracket, then consider right brackets fair game too
1488  if ( strpos( $url, '(' ) === false ) {
1489  $sep .= ')';
1490  }
1491 
1492  $urlRev = strrev( $url );
1493  $numSepChars = strspn( $urlRev, $sep );
1494  # Don't break a trailing HTML entity by moving the ; into $trail
1495  # This is in hot code, so use substr_compare to avoid having to
1496  # create a new string object for the comparison
1497  if ( $numSepChars && substr_compare( $url, ";", -$numSepChars, 1 ) === 0 ) {
1498  # more optimization: instead of running preg_match with a $
1499  # anchor, which can be slow, do the match on the reversed
1500  # string starting at the desired offset.
1501  # un-reversed regexp is: /&([a-z]+|#x[\da-f]+|#\d+)$/i
1502  if ( preg_match( '/\G([a-z]+|[\da-f]+x#|\d+#)&/i', $urlRev, $m2, 0, $numSepChars ) ) {
1503  $numSepChars--;
1504  }
1505  }
1506  if ( $numSepChars ) {
1507  $trail = substr( $url, -$numSepChars ) . $trail;
1508  $url = substr( $url, 0, -$numSepChars );
1509  }
1510 
1511  # Verify that we still have a real URL after trail removal, and
1512  # not just lone protocol
1513  if ( strlen( $trail ) >= $numPostProto ) {
1514  return $url . $trail;
1515  }
1516 
1517  $url = Sanitizer::cleanUrl( $url );
1518 
1519  # Is this an external image?
1520  $text = $this->maybeMakeExternalImage( $url );
1521  if ( $text === false ) {
1522  # Not an image, make a link
1523  $text = Linker::makeExternalLink( $url,
1524  $this->getConverterLanguage()->markNoConversion( $url, true ),
1525  true, 'free',
1526  $this->getExternalLinkAttribs( $url ) );
1527  # Register it in the output object...
1528  # Replace unnecessary URL escape codes with their equivalent characters
1529  $pasteurized = self::normalizeLinkUrl( $url );
1530  $this->mOutput->addExternalLink( $pasteurized );
1531  }
1532  return $text . $trail;
1533  }
1534 
1544  public function doHeadings( $text ) {
1545  for ( $i = 6; $i >= 1; --$i ) {
1546  $h = str_repeat( '=', $i );
1547  $text = preg_replace( "/^$h(.+)$h\\s*$/m", "<h$i>\\1</h$i>", $text );
1548  }
1549  return $text;
1550  }
1551 
1560  public function doAllQuotes( $text ) {
1561  $outtext = '';
1562  $lines = StringUtils::explode( "\n", $text );
1563  foreach ( $lines as $line ) {
1564  $outtext .= $this->doQuotes( $line ) . "\n";
1565  }
1566  $outtext = substr( $outtext, 0, -1 );
1567  return $outtext;
1568  }
1569 
1577  public function doQuotes( $text ) {
1578  $arr = preg_split( "/(''+)/", $text, -1, PREG_SPLIT_DELIM_CAPTURE );
1579  $countarr = count( $arr );
1580  if ( $countarr == 1 ) {
1581  return $text;
1582  }
1583 
1584  // First, do some preliminary work. This may shift some apostrophes from
1585  // being mark-up to being text. It also counts the number of occurrences
1586  // of bold and italics mark-ups.
1587  $numbold = 0;
1588  $numitalics = 0;
1589  for ( $i = 1; $i < $countarr; $i += 2 ) {
1590  $thislen = strlen( $arr[$i] );
1591  // If there are ever four apostrophes, assume the first is supposed to
1592  // be text, and the remaining three constitute mark-up for bold text.
1593  // (bug 13227: ''''foo'''' turns into ' ''' foo ' ''')
1594  if ( $thislen == 4 ) {
1595  $arr[$i - 1] .= "'";
1596  $arr[$i] = "'''";
1597  $thislen = 3;
1598  } elseif ( $thislen > 5 ) {
1599  // If there are more than 5 apostrophes in a row, assume they're all
1600  // text except for the last 5.
1601  // (bug 13227: ''''''foo'''''' turns into ' ''''' foo ' ''''')
1602  $arr[$i - 1] .= str_repeat( "'", $thislen - 5 );
1603  $arr[$i] = "'''''";
1604  $thislen = 5;
1605  }
1606  // Count the number of occurrences of bold and italics mark-ups.
1607  if ( $thislen == 2 ) {
1608  $numitalics++;
1609  } elseif ( $thislen == 3 ) {
1610  $numbold++;
1611  } elseif ( $thislen == 5 ) {
1612  $numitalics++;
1613  $numbold++;
1614  }
1615  }
1616 
1617  // If there is an odd number of both bold and italics, it is likely
1618  // that one of the bold ones was meant to be an apostrophe followed
1619  // by italics. Which one we cannot know for certain, but it is more
1620  // likely to be one that has a single-letter word before it.
1621  if ( ( $numbold % 2 == 1 ) && ( $numitalics % 2 == 1 ) ) {
1622  $firstsingleletterword = -1;
1623  $firstmultiletterword = -1;
1624  $firstspace = -1;
1625  for ( $i = 1; $i < $countarr; $i += 2 ) {
1626  if ( strlen( $arr[$i] ) == 3 ) {
1627  $x1 = substr( $arr[$i - 1], -1 );
1628  $x2 = substr( $arr[$i - 1], -2, 1 );
1629  if ( $x1 === ' ' ) {
1630  if ( $firstspace == -1 ) {
1631  $firstspace = $i;
1632  }
1633  } elseif ( $x2 === ' ' ) {
1634  $firstsingleletterword = $i;
1635  // if $firstsingleletterword is set, we don't
1636  // look at the other options, so we can bail early.
1637  break;
1638  } else {
1639  if ( $firstmultiletterword == -1 ) {
1640  $firstmultiletterword = $i;
1641  }
1642  }
1643  }
1644  }
1645 
1646  // If there is a single-letter word, use it!
1647  if ( $firstsingleletterword > -1 ) {
1648  $arr[$firstsingleletterword] = "''";
1649  $arr[$firstsingleletterword - 1] .= "'";
1650  } elseif ( $firstmultiletterword > -1 ) {
1651  // If not, but there's a multi-letter word, use that one.
1652  $arr[$firstmultiletterword] = "''";
1653  $arr[$firstmultiletterword - 1] .= "'";
1654  } elseif ( $firstspace > -1 ) {
1655  // ... otherwise use the first one that has neither.
1656  // (notice that it is possible for all three to be -1 if, for example,
1657  // there is only one pentuple-apostrophe in the line)
1658  $arr[$firstspace] = "''";
1659  $arr[$firstspace - 1] .= "'";
1660  }
1661  }
1662 
1663  // Now let's actually convert our apostrophic mush to HTML!
1664  $output = '';
1665  $buffer = '';
1666  $state = '';
1667  $i = 0;
1668  foreach ( $arr as $r ) {
1669  if ( ( $i % 2 ) == 0 ) {
1670  if ( $state === 'both' ) {
1671  $buffer .= $r;
1672  } else {
1673  $output .= $r;
1674  }
1675  } else {
1676  $thislen = strlen( $r );
1677  if ( $thislen == 2 ) {
1678  if ( $state === 'i' ) {
1679  $output .= '</i>';
1680  $state = '';
1681  } elseif ( $state === 'bi' ) {
1682  $output .= '</i>';
1683  $state = 'b';
1684  } elseif ( $state === 'ib' ) {
1685  $output .= '</b></i><b>';
1686  $state = 'b';
1687  } elseif ( $state === 'both' ) {
1688  $output .= '<b><i>' . $buffer . '</i>';
1689  $state = 'b';
1690  } else { // $state can be 'b' or ''
1691  $output .= '<i>';
1692  $state .= 'i';
1693  }
1694  } elseif ( $thislen == 3 ) {
1695  if ( $state === 'b' ) {
1696  $output .= '</b>';
1697  $state = '';
1698  } elseif ( $state === 'bi' ) {
1699  $output .= '</i></b><i>';
1700  $state = 'i';
1701  } elseif ( $state === 'ib' ) {
1702  $output .= '</b>';
1703  $state = 'i';
1704  } elseif ( $state === 'both' ) {
1705  $output .= '<i><b>' . $buffer . '</b>';
1706  $state = 'i';
1707  } else { // $state can be 'i' or ''
1708  $output .= '<b>';
1709  $state .= 'b';
1710  }
1711  } elseif ( $thislen == 5 ) {
1712  if ( $state === 'b' ) {
1713  $output .= '</b><i>';
1714  $state = 'i';
1715  } elseif ( $state === 'i' ) {
1716  $output .= '</i><b>';
1717  $state = 'b';
1718  } elseif ( $state === 'bi' ) {
1719  $output .= '</i></b>';
1720  $state = '';
1721  } elseif ( $state === 'ib' ) {
1722  $output .= '</b></i>';
1723  $state = '';
1724  } elseif ( $state === 'both' ) {
1725  $output .= '<i><b>' . $buffer . '</b></i>';
1726  $state = '';
1727  } else { // ($state == '')
1728  $buffer = '';
1729  $state = 'both';
1730  }
1731  }
1732  }
1733  $i++;
1734  }
1735  // Now close all remaining tags. Notice that the order is important.
1736  if ( $state === 'b' || $state === 'ib' ) {
1737  $output .= '</b>';
1738  }
1739  if ( $state === 'i' || $state === 'bi' || $state === 'ib' ) {
1740  $output .= '</i>';
1741  }
1742  if ( $state === 'bi' ) {
1743  $output .= '</b>';
1744  }
1745  // There might be lonely ''''', so make sure we have a buffer
1746  if ( $state === 'both' && $buffer ) {
1747  $output .= '<b><i>' . $buffer . '</i></b>';
1748  }
1749  return $output;
1750  }
1751 
1765  public function replaceExternalLinks( $text ) {
1766 
1767  $bits = preg_split( $this->mExtLinkBracketedRegex, $text, -1, PREG_SPLIT_DELIM_CAPTURE );
1768  if ( $bits === false ) {
1769  throw new MWException( "PCRE needs to be compiled with "
1770  . "--enable-unicode-properties in order for MediaWiki to function" );
1771  }
1772  $s = array_shift( $bits );
1773 
1774  $i = 0;
1775  while ( $i < count( $bits ) ) {
1776  $url = $bits[$i++];
1777  $i++; // protocol
1778  $text = $bits[$i++];
1779  $trail = $bits[$i++];
1780 
1781  # The characters '<' and '>' (which were escaped by
1782  # removeHTMLtags()) should not be included in
1783  # URLs, per RFC 2396.
1784  $m2 = array();
1785  if ( preg_match( '/&(lt|gt);/', $url, $m2, PREG_OFFSET_CAPTURE ) ) {
1786  $text = substr( $url, $m2[0][1] ) . ' ' . $text;
1787  $url = substr( $url, 0, $m2[0][1] );
1788  }
1789 
1790  # If the link text is an image URL, replace it with an <img> tag
1791  # This happened by accident in the original parser, but some people used it extensively
1792  $img = $this->maybeMakeExternalImage( $text );
1793  if ( $img !== false ) {
1794  $text = $img;
1795  }
1796 
1797  $dtrail = '';
1798 
1799  # Set linktype for CSS - if URL==text, link is essentially free
1800  $linktype = ( $text === $url ) ? 'free' : 'text';
1801 
1802  # No link text, e.g. [http://domain.tld/some.link]
1803  if ( $text == '' ) {
1804  # Autonumber
1805  $langObj = $this->getTargetLanguage();
1806  $text = '[' . $langObj->formatNum( ++$this->mAutonumber ) . ']';
1807  $linktype = 'autonumber';
1808  } else {
1809  # Have link text, e.g. [http://domain.tld/some.link text]s
1810  # Check for trail
1811  list( $dtrail, $trail ) = Linker::splitTrail( $trail );
1812  }
1813 
1814  $text = $this->getConverterLanguage()->markNoConversion( $text );
1815 
1816  $url = Sanitizer::cleanUrl( $url );
1817 
1818  # Use the encoded URL
1819  # This means that users can paste URLs directly into the text
1820  # Funny characters like ö aren't valid in URLs anyway
1821  # This was changed in August 2004
1822  $s .= Linker::makeExternalLink( $url, $text, false, $linktype,
1823  $this->getExternalLinkAttribs( $url ) ) . $dtrail . $trail;
1824 
1825  # Register link in the output object.
1826  # Replace unnecessary URL escape codes with the referenced character
1827  # This prevents spammers from hiding links from the filters
1828  $pasteurized = self::normalizeLinkUrl( $url );
1829  $this->mOutput->addExternalLink( $pasteurized );
1830  }
1831 
1832  return $s;
1833  }
1834 
1844  public static function getExternalLinkRel( $url = false, $title = null ) {
1846  $ns = $title ? $title->getNamespace() : false;
1847  if ( $wgNoFollowLinks && !in_array( $ns, $wgNoFollowNsExceptions )
1848  && !wfMatchesDomainList( $url, $wgNoFollowDomainExceptions )
1849  ) {
1850  return 'nofollow';
1851  }
1852  return null;
1853  }
1854 
1865  public function getExternalLinkAttribs( $url = false ) {
1866  $attribs = array();
1867  $attribs['rel'] = self::getExternalLinkRel( $url, $this->mTitle );
1868 
1869  if ( $this->mOptions->getExternalLinkTarget() ) {
1870  $attribs['target'] = $this->mOptions->getExternalLinkTarget();
1871  }
1872  return $attribs;
1873  }
1874 
1882  public static function replaceUnusualEscapes( $url ) {
1883  wfDeprecated( __METHOD__, '1.24' );
1884  return self::normalizeLinkUrl( $url );
1885  }
1886 
1896  public static function normalizeLinkUrl( $url ) {
1897  # First, make sure unsafe characters are encoded
1898  $url = preg_replace_callback( '/[\x00-\x20"<>\[\\\\\]^`{|}\x7F-\xFF]/',
1899  function ( $m ) {
1900  return rawurlencode( $m[0] );
1901  },
1902  $url
1903  );
1904 
1905  $ret = '';
1906  $end = strlen( $url );
1907 
1908  # Fragment part - 'fragment'
1909  $start = strpos( $url, '#' );
1910  if ( $start !== false && $start < $end ) {
1911  $ret = self::normalizeUrlComponent(
1912  substr( $url, $start, $end - $start ), '"#%<>[\]^`{|}' ) . $ret;
1913  $end = $start;
1914  }
1915 
1916  # Query part - 'query' minus &=+;
1917  $start = strpos( $url, '?' );
1918  if ( $start !== false && $start < $end ) {
1919  $ret = self::normalizeUrlComponent(
1920  substr( $url, $start, $end - $start ), '"#%<>[\]^`{|}&=+;' ) . $ret;
1921  $end = $start;
1922  }
1923 
1924  # Scheme and path part - 'pchar'
1925  # (we assume no userinfo or encoded colons in the host)
1926  $ret = self::normalizeUrlComponent(
1927  substr( $url, 0, $end ), '"#%<>[\]^`{|}/?' ) . $ret;
1928 
1929  return $ret;
1930  }
1931 
1932  private static function normalizeUrlComponent( $component, $unsafe ) {
1933  $callback = function ( $matches ) use ( $unsafe ) {
1934  $char = urldecode( $matches[0] );
1935  $ord = ord( $char );
1936  if ( $ord > 32 && $ord < 127 && strpos( $unsafe, $char ) === false ) {
1937  # Unescape it
1938  return $char;
1939  } else {
1940  # Leave it escaped, but use uppercase for a-f
1941  return strtoupper( $matches[0] );
1942  }
1943  };
1944  return preg_replace_callback( '/%[0-9A-Fa-f]{2}/', $callback, $component );
1945  }
1946 
1955  private function maybeMakeExternalImage( $url ) {
1956  $imagesfrom = $this->mOptions->getAllowExternalImagesFrom();
1957  $imagesexception = !empty( $imagesfrom );
1958  $text = false;
1959  # $imagesfrom could be either a single string or an array of strings, parse out the latter
1960  if ( $imagesexception && is_array( $imagesfrom ) ) {
1961  $imagematch = false;
1962  foreach ( $imagesfrom as $match ) {
1963  if ( strpos( $url, $match ) === 0 ) {
1964  $imagematch = true;
1965  break;
1966  }
1967  }
1968  } elseif ( $imagesexception ) {
1969  $imagematch = ( strpos( $url, $imagesfrom ) === 0 );
1970  } else {
1971  $imagematch = false;
1972  }
1973 
1974  if ( $this->mOptions->getAllowExternalImages()
1975  || ( $imagesexception && $imagematch )
1976  ) {
1977  if ( preg_match( self::EXT_IMAGE_REGEX, $url ) ) {
1978  # Image found
1979  $text = Linker::makeExternalImage( $url );
1980  }
1981  }
1982  if ( !$text && $this->mOptions->getEnableImageWhitelist()
1983  && preg_match( self::EXT_IMAGE_REGEX, $url )
1984  ) {
1985  $whitelist = explode(
1986  "\n",
1987  wfMessage( 'external_image_whitelist' )->inContentLanguage()->text()
1988  );
1989 
1990  foreach ( $whitelist as $entry ) {
1991  # Sanitize the regex fragment, make it case-insensitive, ignore blank entries/comments
1992  if ( strpos( $entry, '#' ) === 0 || $entry === '' ) {
1993  continue;
1994  }
1995  if ( preg_match( '/' . str_replace( '/', '\\/', $entry ) . '/i', $url ) ) {
1996  # Image matches a whitelist entry
1997  $text = Linker::makeExternalImage( $url );
1998  break;
1999  }
2000  }
2001  }
2002  return $text;
2003  }
2014  public function replaceInternalLinks( $s ) {
2015  $this->mLinkHolders->merge( $this->replaceInternalLinks2( $s ) );
2016  return $s;
2017  }
2018 
2027  public function replaceInternalLinks2( &$s ) {
2029 
2030  static $tc = false, $e1, $e1_img;
2031  # the % is needed to support urlencoded titles as well
2032  if ( !$tc ) {
2033  $tc = Title::legalChars() . '#%';
2034  # Match a link having the form [[namespace:link|alternate]]trail
2035  $e1 = "/^([{$tc}]+)(?:\\|(.+?))?]](.*)\$/sD";
2036  # Match cases where there is no "]]", which might still be images
2037  $e1_img = "/^([{$tc}]+)\\|(.*)\$/sD";
2038  }
2039 
2040  $holders = new LinkHolderArray( $this );
2041 
2042  # split the entire text string on occurrences of [[
2043  $a = StringUtils::explode( '[[', ' ' . $s );
2044  # get the first element (all text up to first [[), and remove the space we added
2045  $s = $a->current();
2046  $a->next();
2047  $line = $a->current(); # Workaround for broken ArrayIterator::next() that returns "void"
2048  $s = substr( $s, 1 );
2049 
2050  $useLinkPrefixExtension = $this->getTargetLanguage()->linkPrefixExtension();
2051  $e2 = null;
2052  if ( $useLinkPrefixExtension ) {
2053  # Match the end of a line for a word that's not followed by whitespace,
2054  # e.g. in the case of 'The Arab al[[Razi]]', 'al' will be matched
2056  $charset = $wgContLang->linkPrefixCharset();
2057  $e2 = "/^((?>.*[^$charset]|))(.+)$/sDu";
2058  }
2059 
2060  if ( is_null( $this->mTitle ) ) {
2061  throw new MWException( __METHOD__ . ": \$this->mTitle is null\n" );
2062  }
2063  $nottalk = !$this->mTitle->isTalkPage();
2064 
2065  if ( $useLinkPrefixExtension ) {
2066  $m = array();
2067  if ( preg_match( $e2, $s, $m ) ) {
2068  $first_prefix = $m[2];
2069  } else {
2070  $first_prefix = false;
2071  }
2072  } else {
2073  $prefix = '';
2074  }
2075 
2076  $useSubpages = $this->areSubpagesAllowed();
2077 
2078  // @codingStandardsIgnoreStart Squiz.WhiteSpace.SemicolonSpacing.Incorrect
2079  # Loop for each link
2080  for ( ; $line !== false && $line !== null; $a->next(), $line = $a->current() ) {
2081  // @codingStandardsIgnoreStart
2082 
2083  # Check for excessive memory usage
2084  if ( $holders->isBig() ) {
2085  # Too big
2086  # Do the existence check, replace the link holders and clear the array
2087  $holders->replace( $s );
2088  $holders->clear();
2089  }
2090 
2091  if ( $useLinkPrefixExtension ) {
2092  if ( preg_match( $e2, $s, $m ) ) {
2093  $prefix = $m[2];
2094  $s = $m[1];
2095  } else {
2096  $prefix = '';
2097  }
2098  # first link
2099  if ( $first_prefix ) {
2100  $prefix = $first_prefix;
2101  $first_prefix = false;
2102  }
2103  }
2104 
2105  $might_be_img = false;
2106 
2107  if ( preg_match( $e1, $line, $m ) ) { # page with normal text or alt
2108  $text = $m[2];
2109  # If we get a ] at the beginning of $m[3] that means we have a link that's something like:
2110  # [[Image:Foo.jpg|[http://example.com desc]]] <- having three ] in a row fucks up,
2111  # the real problem is with the $e1 regex
2112  # See bug 1300.
2113  #
2114  # Still some problems for cases where the ] is meant to be outside punctuation,
2115  # and no image is in sight. See bug 2095.
2116  #
2117  if ( $text !== ''
2118  && substr( $m[3], 0, 1 ) === ']'
2119  && strpos( $text, '[' ) !== false
2120  ) {
2121  $text .= ']'; # so that replaceExternalLinks($text) works later
2122  $m[3] = substr( $m[3], 1 );
2123  }
2124  # fix up urlencoded title texts
2125  if ( strpos( $m[1], '%' ) !== false ) {
2126  # Should anchors '#' also be rejected?
2127  $m[1] = str_replace( array( '<', '>' ), array( '&lt;', '&gt;' ), rawurldecode( $m[1] ) );
2128  }
2129  $trail = $m[3];
2130  } elseif ( preg_match( $e1_img, $line, $m ) ) {
2131  # Invalid, but might be an image with a link in its caption
2132  $might_be_img = true;
2133  $text = $m[2];
2134  if ( strpos( $m[1], '%' ) !== false ) {
2135  $m[1] = rawurldecode( $m[1] );
2136  }
2137  $trail = "";
2138  } else { # Invalid form; output directly
2139  $s .= $prefix . '[[' . $line;
2140  continue;
2141  }
2142 
2143  $origLink = $m[1];
2144 
2145  # Don't allow internal links to pages containing
2146  # PROTO: where PROTO is a valid URL protocol; these
2147  # should be external links.
2148  if ( preg_match( '/^(?i:' . $this->mUrlProtocols . ')/', $origLink ) ) {
2149  $s .= $prefix . '[[' . $line;
2150  continue;
2151  }
2152 
2153  # Make subpage if necessary
2154  if ( $useSubpages ) {
2155  $link = $this->maybeDoSubpageLink( $origLink, $text );
2156  } else {
2157  $link = $origLink;
2158  }
2159 
2160  $noforce = ( substr( $origLink, 0, 1 ) !== ':' );
2161  if ( !$noforce ) {
2162  # Strip off leading ':'
2163  $link = substr( $link, 1 );
2164  }
2165 
2166  $unstrip = $this->mStripState->unstripNoWiki( $link );
2167  $nt = is_string( $unstrip ) ? Title::newFromText( $unstrip ) : null;
2168  if ( $nt === null ) {
2169  $s .= $prefix . '[[' . $line;
2170  continue;
2171  }
2172 
2173  $ns = $nt->getNamespace();
2174  $iw = $nt->getInterwiki();
2175 
2176  if ( $might_be_img ) { # if this is actually an invalid link
2177  if ( $ns == NS_FILE && $noforce ) { # but might be an image
2178  $found = false;
2179  while ( true ) {
2180  # look at the next 'line' to see if we can close it there
2181  $a->next();
2182  $next_line = $a->current();
2183  if ( $next_line === false || $next_line === null ) {
2184  break;
2185  }
2186  $m = explode( ']]', $next_line, 3 );
2187  if ( count( $m ) == 3 ) {
2188  # the first ]] closes the inner link, the second the image
2189  $found = true;
2190  $text .= "[[{$m[0]}]]{$m[1]}";
2191  $trail = $m[2];
2192  break;
2193  } elseif ( count( $m ) == 2 ) {
2194  # if there's exactly one ]] that's fine, we'll keep looking
2195  $text .= "[[{$m[0]}]]{$m[1]}";
2196  } else {
2197  # if $next_line is invalid too, we need look no further
2198  $text .= '[[' . $next_line;
2199  break;
2200  }
2201  }
2202  if ( !$found ) {
2203  # we couldn't find the end of this imageLink, so output it raw
2204  # but don't ignore what might be perfectly normal links in the text we've examined
2205  $holders->merge( $this->replaceInternalLinks2( $text ) );
2206  $s .= "{$prefix}[[$link|$text";
2207  # note: no $trail, because without an end, there *is* no trail
2208  continue;
2209  }
2210  } else { # it's not an image, so output it raw
2211  $s .= "{$prefix}[[$link|$text";
2212  # note: no $trail, because without an end, there *is* no trail
2213  continue;
2214  }
2215  }
2216 
2217  $wasblank = ( $text == '' );
2218  if ( $wasblank ) {
2219  $text = $link;
2220  } else {
2221  # Bug 4598 madness. Handle the quotes only if they come from the alternate part
2222  # [[Lista d''e paise d''o munno]] -> <a href="...">Lista d''e paise d''o munno</a>
2223  # [[Criticism of Harry Potter|Criticism of ''Harry Potter'']]
2224  # -> <a href="Criticism of Harry Potter">Criticism of <i>Harry Potter</i></a>
2225  $text = $this->doQuotes( $text );
2226  }
2227 
2228  # Link not escaped by : , create the various objects
2229  if ( $noforce && !$nt->wasLocalInterwiki() ) {
2230  # Interwikis
2231  if (
2232  $iw && $this->mOptions->getInterwikiMagic() && $nottalk && (
2233  Language::fetchLanguageName( $iw, null, 'mw' ) ||
2234  in_array( $iw, $wgExtraInterlanguageLinkPrefixes )
2235  )
2236  ) {
2237  # Bug 24502: filter duplicates
2238  if ( !isset( $this->mLangLinkLanguages[$iw] ) ) {
2239  $this->mLangLinkLanguages[$iw] = true;
2240  $this->mOutput->addLanguageLink( $nt->getFullText() );
2241  }
2242 
2243  $s = rtrim( $s . $prefix );
2244  $s .= trim( $trail, "\n" ) == '' ? '': $prefix . $trail;
2245  continue;
2246  }
2247 
2248  if ( $ns == NS_FILE ) {
2249  if ( !wfIsBadImage( $nt->getDBkey(), $this->mTitle ) ) {
2250  if ( $wasblank ) {
2251  # if no parameters were passed, $text
2252  # becomes something like "File:Foo.png",
2253  # which we don't want to pass on to the
2254  # image generator
2255  $text = '';
2256  } else {
2257  # recursively parse links inside the image caption
2258  # actually, this will parse them in any other parameters, too,
2259  # but it might be hard to fix that, and it doesn't matter ATM
2260  $text = $this->replaceExternalLinks( $text );
2261  $holders->merge( $this->replaceInternalLinks2( $text ) );
2262  }
2263  # cloak any absolute URLs inside the image markup, so replaceExternalLinks() won't touch them
2264  $s .= $prefix . $this->armorLinks(
2265  $this->makeImage( $nt, $text, $holders ) ) . $trail;
2266  } else {
2267  $s .= $prefix . $trail;
2268  }
2269  continue;
2270  }
2271 
2272  if ( $ns == NS_CATEGORY ) {
2273  $s = rtrim( $s . "\n" ); # bug 87
2274 
2275  if ( $wasblank ) {
2276  $sortkey = $this->getDefaultSort();
2277  } else {
2278  $sortkey = $text;
2279  }
2280  $sortkey = Sanitizer::decodeCharReferences( $sortkey );
2281  $sortkey = str_replace( "\n", '', $sortkey );
2282  $sortkey = $this->getConverterLanguage()->convertCategoryKey( $sortkey );
2283  $this->mOutput->addCategory( $nt->getDBkey(), $sortkey );
2284 
2288  $s .= trim( $prefix . $trail, "\n" ) == '' ? '' : $prefix . $trail;
2289 
2290  continue;
2291  }
2292  }
2293 
2294  # Self-link checking. For some languages, variants of the title are checked in
2295  # LinkHolderArray::doVariants() to allow batching the existence checks necessary
2296  # for linking to a different variant.
2297  if ( $ns != NS_SPECIAL && $nt->equals( $this->mTitle ) && !$nt->hasFragment() ) {
2298  $s .= $prefix . Linker::makeSelfLinkObj( $nt, $text, '', $trail );
2299  continue;
2300  }
2301 
2302  # NS_MEDIA is a pseudo-namespace for linking directly to a file
2303  # @todo FIXME: Should do batch file existence checks, see comment below
2304  if ( $ns == NS_MEDIA ) {
2305  # Give extensions a chance to select the file revision for us
2306  $options = array();
2307  $descQuery = false;
2308  Hooks::run( 'BeforeParserFetchFileAndTitle',
2309  array( $this, $nt, &$options, &$descQuery ) );
2310  # Fetch and register the file (file title may be different via hooks)
2311  list( $file, $nt ) = $this->fetchFileAndTitle( $nt, $options );
2312  # Cloak with NOPARSE to avoid replacement in replaceExternalLinks
2313  $s .= $prefix . $this->armorLinks(
2314  Linker::makeMediaLinkFile( $nt, $file, $text ) ) . $trail;
2315  continue;
2316  }
2317 
2318  # Some titles, such as valid special pages or files in foreign repos, should
2319  # be shown as bluelinks even though they're not included in the page table
2320  #
2321  # @todo FIXME: isAlwaysKnown() can be expensive for file links; we should really do
2322  # batch file existence checks for NS_FILE and NS_MEDIA
2323  if ( $iw == '' && $nt->isAlwaysKnown() ) {
2324  $this->mOutput->addLink( $nt );
2325  $s .= $this->makeKnownLinkHolder( $nt, $text, array(), $trail, $prefix );
2326  } else {
2327  # Links will be added to the output link list after checking
2328  $s .= $holders->makeHolder( $nt, $text, array(), $trail, $prefix );
2329  }
2330  }
2331  return $holders;
2332  }
2333 
2348  public function makeKnownLinkHolder( $nt, $text = '', $query = array(), $trail = '', $prefix = '' ) {
2349  list( $inside, $trail ) = Linker::splitTrail( $trail );
2351  if ( is_string( $query ) ) {
2352  $query = wfCgiToArray( $query );
2353  }
2354  if ( $text == '' ) {
2355  $text = htmlspecialchars( $nt->getPrefixedText() );
2356  }
2357 
2358  $link = Linker::linkKnown( $nt, "$prefix$text$inside", array(), $query );
2360  return $this->armorLinks( $link ) . $trail;
2361  }
2362 
2373  public function armorLinks( $text ) {
2374  return preg_replace( '/\b((?i)' . $this->mUrlProtocols . ')/',
2375  self::MARKER_PREFIX . "NOPARSE$1", $text );
2376  }
2377 
2382  public function areSubpagesAllowed() {
2383  # Some namespaces don't allow subpages
2384  return MWNamespace::hasSubpages( $this->mTitle->getNamespace() );
2385  }
2386 
2395  public function maybeDoSubpageLink( $target, &$text ) {
2396  return Linker::normalizeSubpageLink( $this->mTitle, $target, $text );
2397  }
2398 
2405  public function closeParagraph() {
2406  $result = '';
2407  if ( $this->mLastSection != '' ) {
2408  $result = '</' . $this->mLastSection . ">\n";
2409  }
2410  $this->mInPre = false;
2411  $this->mLastSection = '';
2412  return $result;
2413  }
2414 
2425  public function getCommon( $st1, $st2 ) {
2426  $fl = strlen( $st1 );
2427  $shorter = strlen( $st2 );
2428  if ( $fl < $shorter ) {
2429  $shorter = $fl;
2430  }
2431 
2432  for ( $i = 0; $i < $shorter; ++$i ) {
2433  if ( $st1[$i] != $st2[$i] ) {
2434  break;
2435  }
2436  }
2437  return $i;
2438  }
2439 
2449  public function openList( $char ) {
2450  $result = $this->closeParagraph();
2451 
2452  if ( '*' === $char ) {
2453  $result .= "<ul><li>";
2454  } elseif ( '#' === $char ) {
2455  $result .= "<ol><li>";
2456  } elseif ( ':' === $char ) {
2457  $result .= "<dl><dd>";
2458  } elseif ( ';' === $char ) {
2459  $result .= "<dl><dt>";
2460  $this->mDTopen = true;
2461  } else {
2462  $result = '<!-- ERR 1 -->';
2463  }
2464 
2465  return $result;
2466  }
2467 
2475  public function nextItem( $char ) {
2476  if ( '*' === $char || '#' === $char ) {
2477  return "</li>\n<li>";
2478  } elseif ( ':' === $char || ';' === $char ) {
2479  $close = "</dd>\n";
2480  if ( $this->mDTopen ) {
2481  $close = "</dt>\n";
2482  }
2483  if ( ';' === $char ) {
2484  $this->mDTopen = true;
2485  return $close . '<dt>';
2486  } else {
2487  $this->mDTopen = false;
2488  return $close . '<dd>';
2489  }
2490  }
2491  return '<!-- ERR 2 -->';
2492  }
2493 
2501  public function closeList( $char ) {
2502  if ( '*' === $char ) {
2503  $text = "</li></ul>";
2504  } elseif ( '#' === $char ) {
2505  $text = "</li></ol>";
2506  } elseif ( ':' === $char ) {
2507  if ( $this->mDTopen ) {
2508  $this->mDTopen = false;
2509  $text = "</dt></dl>";
2510  } else {
2511  $text = "</dd></dl>";
2512  }
2513  } else {
2514  return '<!-- ERR 3 -->';
2515  }
2516  return $text;
2517  }
2528  public function doBlockLevels( $text, $linestart ) {
2529 
2530  # Parsing through the text line by line. The main thing
2531  # happening here is handling of block-level elements p, pre,
2532  # and making lists from lines starting with * # : etc.
2533  #
2534  $textLines = StringUtils::explode( "\n", $text );
2535 
2536  $lastPrefix = $output = '';
2537  $this->mDTopen = $inBlockElem = false;
2538  $prefixLength = 0;
2539  $paragraphStack = false;
2540  $inBlockquote = false;
2541 
2542  foreach ( $textLines as $oLine ) {
2543  # Fix up $linestart
2544  if ( !$linestart ) {
2545  $output .= $oLine;
2546  $linestart = true;
2547  continue;
2548  }
2549  # * = ul
2550  # # = ol
2551  # ; = dt
2552  # : = dd
2553 
2554  $lastPrefixLength = strlen( $lastPrefix );
2555  $preCloseMatch = preg_match( '/<\\/pre/i', $oLine );
2556  $preOpenMatch = preg_match( '/<pre/i', $oLine );
2557  # If not in a <pre> element, scan for and figure out what prefixes are there.
2558  if ( !$this->mInPre ) {
2559  # Multiple prefixes may abut each other for nested lists.
2560  $prefixLength = strspn( $oLine, '*#:;' );
2561  $prefix = substr( $oLine, 0, $prefixLength );
2562 
2563  # eh?
2564  # ; and : are both from definition-lists, so they're equivalent
2565  # for the purposes of determining whether or not we need to open/close
2566  # elements.
2567  $prefix2 = str_replace( ';', ':', $prefix );
2568  $t = substr( $oLine, $prefixLength );
2569  $this->mInPre = (bool)$preOpenMatch;
2570  } else {
2571  # Don't interpret any other prefixes in preformatted text
2572  $prefixLength = 0;
2573  $prefix = $prefix2 = '';
2574  $t = $oLine;
2575  }
2576 
2577  # List generation
2578  if ( $prefixLength && $lastPrefix === $prefix2 ) {
2579  # Same as the last item, so no need to deal with nesting or opening stuff
2580  $output .= $this->nextItem( substr( $prefix, -1 ) );
2581  $paragraphStack = false;
2582 
2583  if ( substr( $prefix, -1 ) === ';' ) {
2584  # The one nasty exception: definition lists work like this:
2585  # ; title : definition text
2586  # So we check for : in the remainder text to split up the
2587  # title and definition, without b0rking links.
2588  $term = $t2 = '';
2589  if ( $this->findColonNoLinks( $t, $term, $t2 ) !== false ) {
2590  $t = $t2;
2591  $output .= $term . $this->nextItem( ':' );
2592  }
2593  }
2594  } elseif ( $prefixLength || $lastPrefixLength ) {
2595  # We need to open or close prefixes, or both.
2596 
2597  # Either open or close a level...
2598  $commonPrefixLength = $this->getCommon( $prefix, $lastPrefix );
2599  $paragraphStack = false;
2600 
2601  # Close all the prefixes which aren't shared.
2602  while ( $commonPrefixLength < $lastPrefixLength ) {
2603  $output .= $this->closeList( $lastPrefix[$lastPrefixLength - 1] );
2604  --$lastPrefixLength;
2605  }
2606 
2607  # Continue the current prefix if appropriate.
2608  if ( $prefixLength <= $commonPrefixLength && $commonPrefixLength > 0 ) {
2609  $output .= $this->nextItem( $prefix[$commonPrefixLength - 1] );
2610  }
2611 
2612  # Open prefixes where appropriate.
2613  if ( $lastPrefix && $prefixLength > $commonPrefixLength ) {
2614  $output .= "\n";
2615  }
2616  while ( $prefixLength > $commonPrefixLength ) {
2617  $char = substr( $prefix, $commonPrefixLength, 1 );
2618  $output .= $this->openList( $char );
2619 
2620  if ( ';' === $char ) {
2621  # @todo FIXME: This is dupe of code above
2622  if ( $this->findColonNoLinks( $t, $term, $t2 ) !== false ) {
2623  $t = $t2;
2624  $output .= $term . $this->nextItem( ':' );
2625  }
2626  }
2627  ++$commonPrefixLength;
2628  }
2629  if ( !$prefixLength && $lastPrefix ) {
2630  $output .= "\n";
2631  }
2632  $lastPrefix = $prefix2;
2633  }
2634 
2635  # If we have no prefixes, go to paragraph mode.
2636  if ( 0 == $prefixLength ) {
2637  # No prefix (not in list)--go to paragraph mode
2638  # XXX: use a stack for nestable elements like span, table and div
2639  $openmatch = preg_match(
2640  '/(?:<table|<h1|<h2|<h3|<h4|<h5|<h6|<pre|<tr|'
2641  . '<p|<ul|<ol|<dl|<li|<\\/tr|<\\/td|<\\/th)/iS',
2642  $t
2643  );
2644  $closematch = preg_match(
2645  '/(?:<\\/table|<\\/h1|<\\/h2|<\\/h3|<\\/h4|<\\/h5|<\\/h6|'
2646  . '<td|<th|<\\/?blockquote|<\\/?div|<hr|<\\/pre|<\\/p|<\\/mw:|'
2647  . self::MARKER_PREFIX
2648  . '-pre|<\\/li|<\\/ul|<\\/ol|<\\/dl|<\\/?center)/iS',
2649  $t
2650  );
2651 
2652  if ( $openmatch || $closematch ) {
2653  $paragraphStack = false;
2654  # @todo bug 5718: paragraph closed
2655  $output .= $this->closeParagraph();
2656  if ( $preOpenMatch && !$preCloseMatch ) {
2657  $this->mInPre = true;
2658  }
2659  $bqOffset = 0;
2660  while ( preg_match( '/<(\\/?)blockquote[\s>]/i', $t, $bqMatch, PREG_OFFSET_CAPTURE, $bqOffset ) ) {
2661  $inBlockquote = !$bqMatch[1][0]; // is this a close tag?
2662  $bqOffset = $bqMatch[0][1] + strlen( $bqMatch[0][0] );
2663  }
2664  $inBlockElem = !$closematch;
2665  } elseif ( !$inBlockElem && !$this->mInPre ) {
2666  if ( ' ' == substr( $t, 0, 1 )
2667  && ( $this->mLastSection === 'pre' || trim( $t ) != '' )
2668  && !$inBlockquote
2669  ) {
2670  # pre
2671  if ( $this->mLastSection !== 'pre' ) {
2672  $paragraphStack = false;
2673  $output .= $this->closeParagraph() . '<pre>';
2674  $this->mLastSection = 'pre';
2675  }
2676  $t = substr( $t, 1 );
2677  } else {
2678  # paragraph
2679  if ( trim( $t ) === '' ) {
2680  if ( $paragraphStack ) {
2681  $output .= $paragraphStack . '<br />';
2682  $paragraphStack = false;
2683  $this->mLastSection = 'p';
2684  } else {
2685  if ( $this->mLastSection !== 'p' ) {
2686  $output .= $this->closeParagraph();
2687  $this->mLastSection = '';
2688  $paragraphStack = '<p>';
2689  } else {
2690  $paragraphStack = '</p><p>';
2691  }
2692  }
2693  } else {
2694  if ( $paragraphStack ) {
2695  $output .= $paragraphStack;
2696  $paragraphStack = false;
2697  $this->mLastSection = 'p';
2698  } elseif ( $this->mLastSection !== 'p' ) {
2699  $output .= $this->closeParagraph() . '<p>';
2700  $this->mLastSection = 'p';
2701  }
2702  }
2703  }
2704  }
2705  }
2706  # somewhere above we forget to get out of pre block (bug 785)
2707  if ( $preCloseMatch && $this->mInPre ) {
2708  $this->mInPre = false;
2709  }
2710  if ( $paragraphStack === false ) {
2711  $output .= $t;
2712  if ( $prefixLength === 0 ) {
2713  $output .= "\n";
2714  }
2715  }
2716  }
2717  while ( $prefixLength ) {
2718  $output .= $this->closeList( $prefix2[$prefixLength - 1] );
2719  --$prefixLength;
2720  if ( !$prefixLength ) {
2721  $output .= "\n";
2722  }
2723  }
2724  if ( $this->mLastSection != '' ) {
2725  $output .= '</' . $this->mLastSection . '>';
2726  $this->mLastSection = '';
2727  }
2728 
2729  return $output;
2730  }
2731 
2742  public function findColonNoLinks( $str, &$before, &$after ) {
2743 
2744  $pos = strpos( $str, ':' );
2745  if ( $pos === false ) {
2746  # Nothing to find!
2747  return false;
2748  }
2749 
2750  $lt = strpos( $str, '<' );
2751  if ( $lt === false || $lt > $pos ) {
2752  # Easy; no tag nesting to worry about
2753  $before = substr( $str, 0, $pos );
2754  $after = substr( $str, $pos + 1 );
2755  return $pos;
2756  }
2757 
2758  # Ugly state machine to walk through avoiding tags.
2759  $state = self::COLON_STATE_TEXT;
2760  $stack = 0;
2761  $len = strlen( $str );
2762  for ( $i = 0; $i < $len; $i++ ) {
2763  $c = $str[$i];
2764 
2765  switch ( $state ) {
2766  # (Using the number is a performance hack for common cases)
2767  case 0: # self::COLON_STATE_TEXT:
2768  switch ( $c ) {
2769  case "<":
2770  # Could be either a <start> tag or an </end> tag
2771  $state = self::COLON_STATE_TAGSTART;
2772  break;
2773  case ":":
2774  if ( $stack == 0 ) {
2775  # We found it!
2776  $before = substr( $str, 0, $i );
2777  $after = substr( $str, $i + 1 );
2778  return $i;
2779  }
2780  # Embedded in a tag; don't break it.
2781  break;
2782  default:
2783  # Skip ahead looking for something interesting
2784  $colon = strpos( $str, ':', $i );
2785  if ( $colon === false ) {
2786  # Nothing else interesting
2787  return false;
2788  }
2789  $lt = strpos( $str, '<', $i );
2790  if ( $stack === 0 ) {
2791  if ( $lt === false || $colon < $lt ) {
2792  # We found it!
2793  $before = substr( $str, 0, $colon );
2794  $after = substr( $str, $colon + 1 );
2795  return $i;
2796  }
2797  }
2798  if ( $lt === false ) {
2799  # Nothing else interesting to find; abort!
2800  # We're nested, but there's no close tags left. Abort!
2801  break 2;
2802  }
2803  # Skip ahead to next tag start
2804  $i = $lt;
2805  $state = self::COLON_STATE_TAGSTART;
2806  }
2807  break;
2808  case 1: # self::COLON_STATE_TAG:
2809  # In a <tag>
2810  switch ( $c ) {
2811  case ">":
2812  $stack++;
2813  $state = self::COLON_STATE_TEXT;
2814  break;
2815  case "/":
2816  # Slash may be followed by >?
2817  $state = self::COLON_STATE_TAGSLASH;
2818  break;
2819  default:
2820  # ignore
2821  }
2822  break;
2823  case 2: # self::COLON_STATE_TAGSTART:
2824  switch ( $c ) {
2825  case "/":
2826  $state = self::COLON_STATE_CLOSETAG;
2827  break;
2828  case "!":
2829  $state = self::COLON_STATE_COMMENT;
2830  break;
2831  case ">":
2832  # Illegal early close? This shouldn't happen D:
2833  $state = self::COLON_STATE_TEXT;
2834  break;
2835  default:
2836  $state = self::COLON_STATE_TAG;
2837  }
2838  break;
2839  case 3: # self::COLON_STATE_CLOSETAG:
2840  # In a </tag>
2841  if ( $c === ">" ) {
2842  $stack--;
2843  if ( $stack < 0 ) {
2844  wfDebug( __METHOD__ . ": Invalid input; too many close tags\n" );
2845  return false;
2846  }
2847  $state = self::COLON_STATE_TEXT;
2848  }
2849  break;
2850  case self::COLON_STATE_TAGSLASH:
2851  if ( $c === ">" ) {
2852  # Yes, a self-closed tag <blah/>
2853  $state = self::COLON_STATE_TEXT;
2854  } else {
2855  # Probably we're jumping the gun, and this is an attribute
2856  $state = self::COLON_STATE_TAG;
2857  }
2858  break;
2859  case 5: # self::COLON_STATE_COMMENT:
2860  if ( $c === "-" ) {
2861  $state = self::COLON_STATE_COMMENTDASH;
2862  }
2863  break;
2864  case self::COLON_STATE_COMMENTDASH:
2865  if ( $c === "-" ) {
2866  $state = self::COLON_STATE_COMMENTDASHDASH;
2867  } else {
2868  $state = self::COLON_STATE_COMMENT;
2869  }
2870  break;
2871  case self::COLON_STATE_COMMENTDASHDASH:
2872  if ( $c === ">" ) {
2873  $state = self::COLON_STATE_TEXT;
2874  } else {
2875  $state = self::COLON_STATE_COMMENT;
2876  }
2877  break;
2878  default:
2879  throw new MWException( "State machine error in " . __METHOD__ );
2880  }
2881  }
2882  if ( $stack > 0 ) {
2883  wfDebug( __METHOD__ . ": Invalid input; not enough close tags (stack $stack, state $state)\n" );
2884  return false;
2885  }
2886  return false;
2887  }
2888 
2900  public function getVariableValue( $index, $frame = false ) {
2903 
2904  if ( is_null( $this->mTitle ) ) {
2905  // If no title set, bad things are going to happen
2906  // later. Title should always be set since this
2907  // should only be called in the middle of a parse
2908  // operation (but the unit-tests do funky stuff)
2909  throw new MWException( __METHOD__ . ' Should only be '
2910  . ' called while parsing (no title set)' );
2911  }
2912 
2917  if ( Hooks::run( 'ParserGetVariableValueVarCache', array( &$this, &$this->mVarCache ) ) ) {
2918  if ( isset( $this->mVarCache[$index] ) ) {
2919  return $this->mVarCache[$index];
2920  }
2921  }
2922 
2923  $ts = wfTimestamp( TS_UNIX, $this->mOptions->getTimestamp() );
2924  Hooks::run( 'ParserGetVariableValueTs', array( &$this, &$ts ) );
2925 
2926  $pageLang = $this->getFunctionLang();
2927 
2928  switch ( $index ) {
2929  case '!':
2930  $value = '|';
2931  break;
2932  case 'currentmonth':
2933  $value = $pageLang->formatNum( MWTimestamp::getInstance( $ts )->format( 'm' ) );
2934  break;
2935  case 'currentmonth1':
2936  $value = $pageLang->formatNum( MWTimestamp::getInstance( $ts )->format( 'n' ) );
2937  break;
2938  case 'currentmonthname':
2939  $value = $pageLang->getMonthName( MWTimestamp::getInstance( $ts )->format( 'n' ) );
2940  break;
2941  case 'currentmonthnamegen':
2942  $value = $pageLang->getMonthNameGen( MWTimestamp::getInstance( $ts )->format( 'n' ) );
2943  break;
2944  case 'currentmonthabbrev':
2945  $value = $pageLang->getMonthAbbreviation( MWTimestamp::getInstance( $ts )->format( 'n' ) );
2946  break;
2947  case 'currentday':
2948  $value = $pageLang->formatNum( MWTimestamp::getInstance( $ts )->format( 'j' ) );
2949  break;
2950  case 'currentday2':
2951  $value = $pageLang->formatNum( MWTimestamp::getInstance( $ts )->format( 'd' ) );
2952  break;
2953  case 'localmonth':
2954  $value = $pageLang->formatNum( MWTimestamp::getLocalInstance( $ts )->format( 'm' ) );
2955  break;
2956  case 'localmonth1':
2957  $value = $pageLang->formatNum( MWTimestamp::getLocalInstance( $ts )->format( 'n' ) );
2958  break;
2959  case 'localmonthname':
2960  $value = $pageLang->getMonthName( MWTimestamp::getLocalInstance( $ts )->format( 'n' ) );
2961  break;
2962  case 'localmonthnamegen':
2963  $value = $pageLang->getMonthNameGen( MWTimestamp::getLocalInstance( $ts )->format( 'n' ) );
2964  break;
2965  case 'localmonthabbrev':
2966  $value = $pageLang->getMonthAbbreviation( MWTimestamp::getLocalInstance( $ts )->format( 'n' ) );
2967  break;
2968  case 'localday':
2969  $value = $pageLang->formatNum( MWTimestamp::getLocalInstance( $ts )->format( 'j' ) );
2970  break;
2971  case 'localday2':
2972  $value = $pageLang->formatNum( MWTimestamp::getLocalInstance( $ts )->format( 'd' ) );
2973  break;
2974  case 'pagename':
2975  $value = wfEscapeWikiText( $this->mTitle->getText() );
2976  break;
2977  case 'pagenamee':
2978  $value = wfEscapeWikiText( $this->mTitle->getPartialURL() );
2979  break;
2980  case 'fullpagename':
2981  $value = wfEscapeWikiText( $this->mTitle->getPrefixedText() );
2982  break;
2983  case 'fullpagenamee':
2984  $value = wfEscapeWikiText( $this->mTitle->getPrefixedURL() );
2985  break;
2986  case 'subpagename':
2987  $value = wfEscapeWikiText( $this->mTitle->getSubpageText() );
2988  break;
2989  case 'subpagenamee':
2990  $value = wfEscapeWikiText( $this->mTitle->getSubpageUrlForm() );
2991  break;
2992  case 'rootpagename':
2993  $value = wfEscapeWikiText( $this->mTitle->getRootText() );
2994  break;
2995  case 'rootpagenamee':
2996  $value = wfEscapeWikiText( wfUrlEncode( str_replace(
2997  ' ',
2998  '_',
2999  $this->mTitle->getRootText()
3000  ) ) );
3001  break;
3002  case 'basepagename':
3003  $value = wfEscapeWikiText( $this->mTitle->getBaseText() );
3004  break;
3005  case 'basepagenamee':
3006  $value = wfEscapeWikiText( wfUrlEncode( str_replace(
3007  ' ',
3008  '_',
3009  $this->mTitle->getBaseText()
3010  ) ) );
3011  break;
3012  case 'talkpagename':
3013  if ( $this->mTitle->canTalk() ) {
3014  $talkPage = $this->mTitle->getTalkPage();
3015  $value = wfEscapeWikiText( $talkPage->getPrefixedText() );
3016  } else {
3017  $value = '';
3018  }
3019  break;
3020  case 'talkpagenamee':
3021  if ( $this->mTitle->canTalk() ) {
3022  $talkPage = $this->mTitle->getTalkPage();
3023  $value = wfEscapeWikiText( $talkPage->getPrefixedURL() );
3024  } else {
3025  $value = '';
3026  }
3027  break;
3028  case 'subjectpagename':
3029  $subjPage = $this->mTitle->getSubjectPage();
3030  $value = wfEscapeWikiText( $subjPage->getPrefixedText() );
3031  break;
3032  case 'subjectpagenamee':
3033  $subjPage = $this->mTitle->getSubjectPage();
3034  $value = wfEscapeWikiText( $subjPage->getPrefixedURL() );
3035  break;
3036  case 'pageid': // requested in bug 23427
3037  $pageid = $this->getTitle()->getArticleID();
3038  if ( $pageid == 0 ) {
3039  # 0 means the page doesn't exist in the database,
3040  # which means the user is previewing a new page.
3041  # The vary-revision flag must be set, because the magic word
3042  # will have a different value once the page is saved.
3043  $this->mOutput->setFlag( 'vary-revision' );
3044  wfDebug( __METHOD__ . ": {{PAGEID}} used in a new page, setting vary-revision...\n" );
3045  }
3046  $value = $pageid ? $pageid : null;
3047  break;
3048  case 'revisionid':
3049  # Let the edit saving system know we should parse the page
3050  # *after* a revision ID has been assigned.
3051  $this->mOutput->setFlag( 'vary-revision' );
3052  wfDebug( __METHOD__ . ": {{REVISIONID}} used, setting vary-revision...\n" );
3053  $value = $this->mRevisionId;
3054  break;
3055  case 'revisionday':
3056  # Let the edit saving system know we should parse the page
3057  # *after* a revision ID has been assigned. This is for null edits.
3058  $this->mOutput->setFlag( 'vary-revision' );
3059  wfDebug( __METHOD__ . ": {{REVISIONDAY}} used, setting vary-revision...\n" );
3060  $value = intval( substr( $this->getRevisionTimestamp(), 6, 2 ) );
3061  break;
3062  case 'revisionday2':
3063  # Let the edit saving system know we should parse the page
3064  # *after* a revision ID has been assigned. This is for null edits.
3065  $this->mOutput->setFlag( 'vary-revision' );
3066  wfDebug( __METHOD__ . ": {{REVISIONDAY2}} used, setting vary-revision...\n" );
3067  $value = substr( $this->getRevisionTimestamp(), 6, 2 );
3068  break;
3069  case 'revisionmonth':
3070  # Let the edit saving system know we should parse the page
3071  # *after* a revision ID has been assigned. This is for null edits.
3072  $this->mOutput->setFlag( 'vary-revision' );
3073  wfDebug( __METHOD__ . ": {{REVISIONMONTH}} used, setting vary-revision...\n" );
3074  $value = substr( $this->getRevisionTimestamp(), 4, 2 );
3075  break;
3076  case 'revisionmonth1':
3077  # Let the edit saving system know we should parse the page
3078  # *after* a revision ID has been assigned. This is for null edits.
3079  $this->mOutput->setFlag( 'vary-revision' );
3080  wfDebug( __METHOD__ . ": {{REVISIONMONTH1}} used, setting vary-revision...\n" );
3081  $value = intval( substr( $this->getRevisionTimestamp(), 4, 2 ) );
3082  break;
3083  case 'revisionyear':
3084  # Let the edit saving system know we should parse the page
3085  # *after* a revision ID has been assigned. This is for null edits.
3086  $this->mOutput->setFlag( 'vary-revision' );
3087  wfDebug( __METHOD__ . ": {{REVISIONYEAR}} used, setting vary-revision...\n" );
3088  $value = substr( $this->getRevisionTimestamp(), 0, 4 );
3089  break;
3090  case 'revisiontimestamp':
3091  # Let the edit saving system know we should parse the page
3092  # *after* a revision ID has been assigned. This is for null edits.
3093  $this->mOutput->setFlag( 'vary-revision' );
3094  wfDebug( __METHOD__ . ": {{REVISIONTIMESTAMP}} used, setting vary-revision...\n" );
3095  $value = $this->getRevisionTimestamp();
3096  break;
3097  case 'revisionuser':
3098  # Let the edit saving system know we should parse the page
3099  # *after* a revision ID has been assigned. This is for null edits.
3100  $this->mOutput->setFlag( 'vary-revision' );
3101  wfDebug( __METHOD__ . ": {{REVISIONUSER}} used, setting vary-revision...\n" );
3102  $value = $this->getRevisionUser();
3103  break;
3104  case 'revisionsize':
3105  # Let the edit saving system know we should parse the page
3106  # *after* a revision ID has been assigned. This is for null edits.
3107  $this->mOutput->setFlag( 'vary-revision' );
3108  wfDebug( __METHOD__ . ": {{REVISIONSIZE}} used, setting vary-revision...\n" );
3109  $value = $this->getRevisionSize();
3110  break;
3111  case 'namespace':
3112  $value = str_replace( '_', ' ', $wgContLang->getNsText( $this->mTitle->getNamespace() ) );
3113  break;
3114  case 'namespacee':
3115  $value = wfUrlencode( $wgContLang->getNsText( $this->mTitle->getNamespace() ) );
3116  break;
3117  case 'namespacenumber':
3118  $value = $this->mTitle->getNamespace();
3119  break;
3120  case 'talkspace':
3121  $value = $this->mTitle->canTalk()
3122  ? str_replace( '_', ' ', $this->mTitle->getTalkNsText() )
3123  : '';
3124  break;
3125  case 'talkspacee':
3126  $value = $this->mTitle->canTalk() ? wfUrlencode( $this->mTitle->getTalkNsText() ) : '';
3127  break;
3128  case 'subjectspace':
3129  $value = str_replace( '_', ' ', $this->mTitle->getSubjectNsText() );
3130  break;
3131  case 'subjectspacee':
3132  $value = ( wfUrlencode( $this->mTitle->getSubjectNsText() ) );
3133  break;
3134  case 'currentdayname':
3135  $value = $pageLang->getWeekdayName( (int)MWTimestamp::getInstance( $ts )->format( 'w' ) + 1 );
3136  break;
3137  case 'currentyear':
3138  $value = $pageLang->formatNum( MWTimestamp::getInstance( $ts )->format( 'Y' ), true );
3139  break;
3140  case 'currenttime':
3141  $value = $pageLang->time( wfTimestamp( TS_MW, $ts ), false, false );
3142  break;
3143  case 'currenthour':
3144  $value = $pageLang->formatNum( MWTimestamp::getInstance( $ts )->format( 'H' ), true );
3145  break;
3146  case 'currentweek':
3147  # @bug 4594 PHP5 has it zero padded, PHP4 does not, cast to
3148  # int to remove the padding
3149  $value = $pageLang->formatNum( (int)MWTimestamp::getInstance( $ts )->format( 'W' ) );
3150  break;
3151  case 'currentdow':
3152  $value = $pageLang->formatNum( MWTimestamp::getInstance( $ts )->format( 'w' ) );
3153  break;
3154  case 'localdayname':
3155  $value = $pageLang->getWeekdayName(
3156  (int)MWTimestamp::getLocalInstance( $ts )->format( 'w' ) + 1
3157  );
3158  break;
3159  case 'localyear':
3160  $value = $pageLang->formatNum( MWTimestamp::getLocalInstance( $ts )->format( 'Y' ), true );
3161  break;
3162  case 'localtime':
3163  $value = $pageLang->time(
3164  MWTimestamp::getLocalInstance( $ts )->format( 'YmdHis' ),
3165  false,
3166  false
3167  );
3168  break;
3169  case 'localhour':
3170  $value = $pageLang->formatNum( MWTimestamp::getLocalInstance( $ts )->format( 'H' ), true );
3171  break;
3172  case 'localweek':
3173  # @bug 4594 PHP5 has it zero padded, PHP4 does not, cast to
3174  # int to remove the padding
3175  $value = $pageLang->formatNum( (int)MWTimestamp::getLocalInstance( $ts )->format( 'W' ) );
3176  break;
3177  case 'localdow':
3178  $value = $pageLang->formatNum( MWTimestamp::getLocalInstance( $ts )->format( 'w' ) );
3179  break;
3180  case 'numberofarticles':
3181  $value = $pageLang->formatNum( SiteStats::articles() );
3182  break;
3183  case 'numberoffiles':
3184  $value = $pageLang->formatNum( SiteStats::images() );
3185  break;
3186  case 'numberofusers':
3187  $value = $pageLang->formatNum( SiteStats::users() );
3188  break;
3189  case 'numberofactiveusers':
3190  $value = $pageLang->formatNum( SiteStats::activeUsers() );
3191  break;
3192  case 'numberofpages':
3193  $value = $pageLang->formatNum( SiteStats::pages() );
3194  break;
3195  case 'numberofadmins':
3196  $value = $pageLang->formatNum( SiteStats::numberingroup( 'sysop' ) );
3197  break;
3198  case 'numberofedits':
3199  $value = $pageLang->formatNum( SiteStats::edits() );
3200  break;
3201  case 'currenttimestamp':
3202  $value = wfTimestamp( TS_MW, $ts );
3203  break;
3204  case 'localtimestamp':
3205  $value = MWTimestamp::getLocalInstance( $ts )->format( 'YmdHis' );
3206  break;
3207  case 'currentversion':
3209  break;
3210  case 'articlepath':
3211  return $wgArticlePath;
3212  case 'sitename':
3213  return $wgSitename;
3214  case 'server':
3215  return $wgServer;
3216  case 'servername':
3217  return $wgServerName;
3218  case 'scriptpath':
3219  return $wgScriptPath;
3220  case 'stylepath':
3221  return $wgStylePath;
3222  case 'directionmark':
3223  return $pageLang->getDirMark();
3224  case 'contentlanguage':
3226  return $wgLanguageCode;
3227  case 'cascadingsources':
3229  break;
3230  default:
3231  $ret = null;
3232  Hooks::run(
3233  'ParserGetVariableValueSwitch',
3234  array( &$this, &$this->mVarCache, &$index, &$ret, &$frame )
3235  );
3236 
3237  return $ret;
3238  }
3239 
3240  if ( $index ) {
3241  $this->mVarCache[$index] = $value;
3242  }
3243 
3244  return $value;
3245  }
3246 
3252  public function initialiseVariables() {
3253  $variableIDs = MagicWord::getVariableIDs();
3254  $substIDs = MagicWord::getSubstIDs();
3255 
3256  $this->mVariables = new MagicWordArray( $variableIDs );
3257  $this->mSubstWords = new MagicWordArray( $substIDs );
3258  }
3282  public function preprocessToDom( $text, $flags = 0 ) {
3283  $dom = $this->getPreprocessor()->preprocessToObj( $text, $flags );
3284  return $dom;
3285  }
3286 
3294  public static function splitWhitespace( $s ) {
3295  $ltrimmed = ltrim( $s );
3296  $w1 = substr( $s, 0, strlen( $s ) - strlen( $ltrimmed ) );
3297  $trimmed = rtrim( $ltrimmed );
3298  $diff = strlen( $ltrimmed ) - strlen( $trimmed );
3299  if ( $diff > 0 ) {
3300  $w2 = substr( $ltrimmed, -$diff );
3301  } else {
3302  $w2 = '';
3303  }
3304  return array( $w1, $trimmed, $w2 );
3305  }
3306 
3327  public function replaceVariables( $text, $frame = false, $argsOnly = false ) {
3328  # Is there any text? Also, Prevent too big inclusions!
3329  if ( strlen( $text ) < 1 || strlen( $text ) > $this->mOptions->getMaxIncludeSize() ) {
3330  return $text;
3331  }
3333  if ( $frame === false ) {
3334  $frame = $this->getPreprocessor()->newFrame();
3335  } elseif ( !( $frame instanceof PPFrame ) ) {
3336  wfDebug( __METHOD__ . " called using plain parameters instead of "
3337  . "a PPFrame instance. Creating custom frame.\n" );
3338  $frame = $this->getPreprocessor()->newCustomFrame( $frame );
3339  }
3340 
3341  $dom = $this->preprocessToDom( $text );
3342  $flags = $argsOnly ? PPFrame::NO_TEMPLATES : 0;
3343  $text = $frame->expand( $dom, $flags );
3344 
3345  return $text;
3346  }
3347 
3355  public static function createAssocArgs( $args ) {
3356  $assocArgs = array();
3357  $index = 1;
3358  foreach ( $args as $arg ) {
3359  $eqpos = strpos( $arg, '=' );
3360  if ( $eqpos === false ) {
3361  $assocArgs[$index++] = $arg;
3362  } else {
3363  $name = trim( substr( $arg, 0, $eqpos ) );
3364  $value = trim( substr( $arg, $eqpos + 1 ) );
3365  if ( $value === false ) {
3366  $value = '';
3367  }
3368  if ( $name !== false ) {
3369  $assocArgs[$name] = $value;
3370  }
3371  }
3372  }
3373 
3374  return $assocArgs;
3375  }
3376 
3401  public function limitationWarn( $limitationType, $current = '', $max = '' ) {
3402  # does no harm if $current and $max are present but are unnecessary for the message
3403  $warning = wfMessage( "$limitationType-warning" )->numParams( $current, $max )
3404  ->inLanguage( $this->mOptions->getUserLangObj() )->text();
3405  $this->mOutput->addWarning( $warning );
3406  $this->addTrackingCategory( "$limitationType-category" );
3407  }
3408 
3421  public function braceSubstitution( $piece, $frame ) {
3422 
3423  // Flags
3424 
3425  // $text has been filled
3426  $found = false;
3427  // wiki markup in $text should be escaped
3428  $nowiki = false;
3429  // $text is HTML, armour it against wikitext transformation
3430  $isHTML = false;
3431  // Force interwiki transclusion to be done in raw mode not rendered
3432  $forceRawInterwiki = false;
3433  // $text is a DOM node needing expansion in a child frame
3434  $isChildObj = false;
3435  // $text is a DOM node needing expansion in the current frame
3436  $isLocalObj = false;
3437 
3438  # Title object, where $text came from
3439  $title = false;
3440 
3441  # $part1 is the bit before the first |, and must contain only title characters.
3442  # Various prefixes will be stripped from it later.
3443  $titleWithSpaces = $frame->expand( $piece['title'] );
3444  $part1 = trim( $titleWithSpaces );
3445  $titleText = false;
3446 
3447  # Original title text preserved for various purposes
3448  $originalTitle = $part1;
3449 
3450  # $args is a list of argument nodes, starting from index 0, not including $part1
3451  # @todo FIXME: If piece['parts'] is null then the call to getLength()
3452  # below won't work b/c this $args isn't an object
3453  $args = ( null == $piece['parts'] ) ? array() : $piece['parts'];
3454 
3455  $profileSection = null; // profile templates
3456 
3457  # SUBST
3458  if ( !$found ) {
3459  $substMatch = $this->mSubstWords->matchStartAndRemove( $part1 );
3460 
3461  # Possibilities for substMatch: "subst", "safesubst" or FALSE
3462  # Decide whether to expand template or keep wikitext as-is.
3463  if ( $this->ot['wiki'] ) {
3464  if ( $substMatch === false ) {
3465  $literal = true; # literal when in PST with no prefix
3466  } else {
3467  $literal = false; # expand when in PST with subst: or safesubst:
3468  }
3469  } else {
3470  if ( $substMatch == 'subst' ) {
3471  $literal = true; # literal when not in PST with plain subst:
3472  } else {
3473  $literal = false; # expand when not in PST with safesubst: or no prefix
3474  }
3475  }
3476  if ( $literal ) {
3477  $text = $frame->virtualBracketedImplode( '{{', '|', '}}', $titleWithSpaces, $args );
3478  $isLocalObj = true;
3479  $found = true;
3480  }
3481  }
3482 
3483  # Variables
3484  if ( !$found && $args->getLength() == 0 ) {
3485  $id = $this->mVariables->matchStartToEnd( $part1 );
3486  if ( $id !== false ) {
3487  $text = $this->getVariableValue( $id, $frame );
3488  if ( MagicWord::getCacheTTL( $id ) > -1 ) {
3489  $this->mOutput->updateCacheExpiry( MagicWord::getCacheTTL( $id ) );
3490  }
3491  $found = true;
3492  }
3493  }
3494 
3495  # MSG, MSGNW and RAW
3496  if ( !$found ) {
3497  # Check for MSGNW:
3498  $mwMsgnw = MagicWord::get( 'msgnw' );
3499  if ( $mwMsgnw->matchStartAndRemove( $part1 ) ) {
3500  $nowiki = true;
3501  } else {
3502  # Remove obsolete MSG:
3503  $mwMsg = MagicWord::get( 'msg' );
3504  $mwMsg->matchStartAndRemove( $part1 );
3505  }
3506 
3507  # Check for RAW:
3508  $mwRaw = MagicWord::get( 'raw' );
3509  if ( $mwRaw->matchStartAndRemove( $part1 ) ) {
3510  $forceRawInterwiki = true;
3511  }
3512  }
3513 
3514  # Parser functions
3515  if ( !$found ) {
3516  $colonPos = strpos( $part1, ':' );
3517  if ( $colonPos !== false ) {
3518  $func = substr( $part1, 0, $colonPos );
3519  $funcArgs = array( trim( substr( $part1, $colonPos + 1 ) ) );
3520  for ( $i = 0; $i < $args->getLength(); $i++ ) {
3521  $funcArgs[] = $args->item( $i );
3522  }
3523  try {
3524  $result = $this->callParserFunction( $frame, $func, $funcArgs );
3525  } catch ( Exception $ex ) {
3526  throw $ex;
3527  }
3528 
3529  # The interface for parser functions allows for extracting
3530  # flags into the local scope. Extract any forwarded flags
3531  # here.
3532  extract( $result );
3533  }
3534  }
3535 
3536  # Finish mangling title and then check for loops.
3537  # Set $title to a Title object and $titleText to the PDBK
3538  if ( !$found ) {
3539  $ns = NS_TEMPLATE;
3540  # Split the title into page and subpage
3541  $subpage = '';
3542  $relative = $this->maybeDoSubpageLink( $part1, $subpage );
3543  if ( $part1 !== $relative ) {
3544  $part1 = $relative;
3545  $ns = $this->mTitle->getNamespace();
3546  }
3547  $title = Title::newFromText( $part1, $ns );
3548  if ( $title ) {
3549  $titleText = $title->getPrefixedText();
3550  # Check for language variants if the template is not found
3551  if ( $this->getConverterLanguage()->hasVariants() && $title->getArticleID() == 0 ) {
3552  $this->getConverterLanguage()->findVariantLink( $part1, $title, true );
3553  }
3554  # Do recursion depth check
3555  $limit = $this->mOptions->getMaxTemplateDepth();
3556  if ( $frame->depth >= $limit ) {
3557  $found = true;
3558  $text = '<span class="error">'
3559  . wfMessage( 'parser-template-recursion-depth-warning' )
3560  ->numParams( $limit )->inContentLanguage()->text()
3561  . '</span>';
3562  }
3563  }
3564  }
3565 
3566  # Load from database
3567  if ( !$found && $title ) {
3568  $profileSection = $this->mProfiler->scopedProfileIn( $title->getPrefixedDBkey() );
3569  if ( !$title->isExternal() ) {
3570  if ( $title->isSpecialPage()
3571  && $this->mOptions->getAllowSpecialInclusion()
3572  && $this->ot['html']
3573  ) {
3574  // Pass the template arguments as URL parameters.
3575  // "uselang" will have no effect since the Language object
3576  // is forced to the one defined in ParserOptions.
3577  $pageArgs = array();
3578  $argsLength = $args->getLength();
3579  for ( $i = 0; $i < $argsLength; $i++ ) {
3580  $bits = $args->item( $i )->splitArg();
3581  if ( strval( $bits['index'] ) === '' ) {
3582  $name = trim( $frame->expand( $bits['name'], PPFrame::STRIP_COMMENTS ) );
3583  $value = trim( $frame->expand( $bits['value'] ) );
3584  $pageArgs[$name] = $value;
3585  }
3586  }
3587 
3588  // Create a new context to execute the special page
3589  $context = new RequestContext;
3590  $context->setTitle( $title );
3591  $context->setRequest( new FauxRequest( $pageArgs ) );
3592  $context->setUser( $this->getUser() );
3593  $context->setLanguage( $this->mOptions->getUserLangObj() );
3594  $ret = SpecialPageFactory::capturePath( $title, $context );
3595  if ( $ret ) {
3596  $text = $context->getOutput()->getHTML();
3597  $this->mOutput->addOutputPageMetadata( $context->getOutput() );
3598  $found = true;
3599  $isHTML = true;
3600  $this->disableCache();
3601  }
3602  } elseif ( MWNamespace::isNonincludable( $title->getNamespace() ) ) {
3603  $found = false; # access denied
3604  wfDebug( __METHOD__ . ": template inclusion denied for " .
3605  $title->getPrefixedDBkey() . "\n" );
3606  } else {
3607  list( $text, $title ) = $this->getTemplateDom( $title );
3608  if ( $text !== false ) {
3609  $found = true;
3610  $isChildObj = true;
3611  }
3612  }
3613 
3614  # If the title is valid but undisplayable, make a link to it
3615  if ( !$found && ( $this->ot['html'] || $this->ot['pre'] ) ) {
3616  $text = "[[:$titleText]]";
3617  $found = true;
3618  }
3619  } elseif ( $title->isTrans() ) {
3620  # Interwiki transclusion
3621  if ( $this->ot['html'] && !$forceRawInterwiki ) {
3622  $text = $this->interwikiTransclude( $title, 'render' );
3623  $isHTML = true;
3624  } else {
3625  $text = $this->interwikiTransclude( $title, 'raw' );
3626  # Preprocess it like a template
3627  $text = $this->preprocessToDom( $text, self::PTD_FOR_INCLUSION );
3628  $isChildObj = true;
3629  }
3630  $found = true;
3631  }
3632 
3633  # Do infinite loop check
3634  # This has to be done after redirect resolution to avoid infinite loops via redirects
3635  if ( !$frame->loopCheck( $title ) ) {
3636  $found = true;
3637  $text = '<span class="error">'
3638  . wfMessage( 'parser-template-loop-warning', $titleText )->inContentLanguage()->text()
3639  . '</span>';
3640  wfDebug( __METHOD__ . ": template loop broken at '$titleText'\n" );
3641  }
3642  }
3643 
3644  # If we haven't found text to substitute by now, we're done
3645  # Recover the source wikitext and return it
3646  if ( !$found ) {
3647  $text = $frame->virtualBracketedImplode( '{{', '|', '}}', $titleWithSpaces, $args );
3648  if ( $profileSection ) {
3649  $this->mProfiler->scopedProfileOut( $profileSection );
3650  }
3651  return array( 'object' => $text );
3652  }
3653 
3654  # Expand DOM-style return values in a child frame
3655  if ( $isChildObj ) {
3656  # Clean up argument array
3657  $newFrame = $frame->newChild( $args, $title );
3658 
3659  if ( $nowiki ) {
3660  $text = $newFrame->expand( $text, PPFrame::RECOVER_ORIG );
3661  } elseif ( $titleText !== false && $newFrame->isEmpty() ) {
3662  # Expansion is eligible for the empty-frame cache
3663  $text = $newFrame->cachedExpand( $titleText, $text );
3664  } else {
3665  # Uncached expansion
3666  $text = $newFrame->expand( $text );
3667  }
3668  }
3669  if ( $isLocalObj && $nowiki ) {
3670  $text = $frame->expand( $text, PPFrame::RECOVER_ORIG );
3671  $isLocalObj = false;
3672  }
3673 
3674  if ( $profileSection ) {
3675  $this->mProfiler->scopedProfileOut( $profileSection );
3676  }
3677 
3678  # Replace raw HTML by a placeholder
3679  if ( $isHTML ) {
3680  $text = $this->insertStripItem( $text );
3681  } elseif ( $nowiki && ( $this->ot['html'] || $this->ot['pre'] ) ) {
3682  # Escape nowiki-style return values
3683  $text = wfEscapeWikiText( $text );
3684  } elseif ( is_string( $text )
3685  && !$piece['lineStart']
3686  && preg_match( '/^(?:{\\||:|;|#|\*)/', $text )
3687  ) {
3688  # Bug 529: if the template begins with a table or block-level
3689  # element, it should be treated as beginning a new line.
3690  # This behavior is somewhat controversial.
3691  $text = "\n" . $text;
3692  }
3693 
3694  if ( is_string( $text ) && !$this->incrementIncludeSize( 'post-expand', strlen( $text ) ) ) {
3695  # Error, oversize inclusion
3696  if ( $titleText !== false ) {
3697  # Make a working, properly escaped link if possible (bug 23588)
3698  $text = "[[:$titleText]]";
3699  } else {
3700  # This will probably not be a working link, but at least it may
3701  # provide some hint of where the problem is
3702  preg_replace( '/^:/', '', $originalTitle );
3703  $text = "[[:$originalTitle]]";
3704  }
3705  $text .= $this->insertStripItem( '<!-- WARNING: template omitted, '
3706  . 'post-expand include size too large -->' );
3707  $this->limitationWarn( 'post-expand-template-inclusion' );
3708  }
3709 
3710  if ( $isLocalObj ) {
3711  $ret = array( 'object' => $text );
3712  } else {
3713  $ret = array( 'text' => $text );
3714  }
3716  return $ret;
3717  }
3718 
3738  public function callParserFunction( $frame, $function, array $args = array() ) {
3740 
3741 
3742  # Case sensitive functions
3743  if ( isset( $this->mFunctionSynonyms[1][$function] ) ) {
3744  $function = $this->mFunctionSynonyms[1][$function];
3745  } else {
3746  # Case insensitive functions
3747  $function = $wgContLang->lc( $function );
3748  if ( isset( $this->mFunctionSynonyms[0][$function] ) ) {
3749  $function = $this->mFunctionSynonyms[0][$function];
3750  } else {
3751  return array( 'found' => false );
3752  }
3753  }
3754 
3755  list( $callback, $flags ) = $this->mFunctionHooks[$function];
3756 
3757  # Workaround for PHP bug 35229 and similar
3758  if ( !is_callable( $callback ) ) {
3759  throw new MWException( "Tag hook for $function is not callable\n" );
3760  }
3761 
3762  $allArgs = array( &$this );
3763  if ( $flags & self::SFH_OBJECT_ARGS ) {
3764  # Convert arguments to PPNodes and collect for appending to $allArgs
3765  $funcArgs = array();
3766  foreach ( $args as $k => $v ) {
3767  if ( $v instanceof PPNode || $k === 0 ) {
3768  $funcArgs[] = $v;
3769  } else {
3770  $funcArgs[] = $this->mPreprocessor->newPartNodeArray( array( $k => $v ) )->item( 0 );
3771  }
3772  }
3773 
3774  # Add a frame parameter, and pass the arguments as an array
3775  $allArgs[] = $frame;
3776  $allArgs[] = $funcArgs;
3777  } else {
3778  # Convert arguments to plain text and append to $allArgs
3779  foreach ( $args as $k => $v ) {
3780  if ( $v instanceof PPNode ) {
3781  $allArgs[] = trim( $frame->expand( $v ) );
3782  } elseif ( is_int( $k ) && $k >= 0 ) {
3783  $allArgs[] = trim( $v );
3784  } else {
3785  $allArgs[] = trim( "$k=$v" );
3786  }
3787  }
3788  }
3789 
3790  $result = call_user_func_array( $callback, $allArgs );
3791 
3792  # The interface for function hooks allows them to return a wikitext
3793  # string or an array containing the string and any flags. This mungs
3794  # things around to match what this method should return.
3795  if ( !is_array( $result ) ) {
3796  $result = array(
3797  'found' => true,
3798  'text' => $result,
3799  );
3800  } else {
3801  if ( isset( $result[0] ) && !isset( $result['text'] ) ) {
3802  $result['text'] = $result[0];
3803  }
3804  unset( $result[0] );
3805  $result += array(
3806  'found' => true,
3807  );
3808  }
3809 
3810  $noparse = true;
3811  $preprocessFlags = 0;
3812  if ( isset( $result['noparse'] ) ) {
3813  $noparse = $result['noparse'];
3814  }
3815  if ( isset( $result['preprocessFlags'] ) ) {
3816  $preprocessFlags = $result['preprocessFlags'];
3817  }
3818 
3819  if ( !$noparse ) {
3820  $result['text'] = $this->preprocessToDom( $result['text'], $preprocessFlags );
3821  $result['isChildObj'] = true;
3822  }
3823 
3824  return $result;
3825  }
3826 
3835  public function getTemplateDom( $title ) {
3836  $cacheTitle = $title;
3837  $titleText = $title->getPrefixedDBkey();
3838 
3839  if ( isset( $this->mTplRedirCache[$titleText] ) ) {
3840  list( $ns, $dbk ) = $this->mTplRedirCache[$titleText];
3841  $title = Title::makeTitle( $ns, $dbk );
3842  $titleText = $title->getPrefixedDBkey();
3843  }
3844  if ( isset( $this->mTplDomCache[$titleText] ) ) {
3845  return array( $this->mTplDomCache[$titleText], $title );
3846  }
3847 
3848  # Cache miss, go to the database
3849  list( $text, $title ) = $this->fetchTemplateAndTitle( $title );
3850 
3851  if ( $text === false ) {
3852  $this->mTplDomCache[$titleText] = false;
3853  return array( false, $title );
3854  }
3856  $dom = $this->preprocessToDom( $text, self::PTD_FOR_INCLUSION );
3857  $this->mTplDomCache[$titleText] = $dom;
3858 
3859  if ( !$title->equals( $cacheTitle ) ) {
3860  $this->mTplRedirCache[$cacheTitle->getPrefixedDBkey()] =
3861  array( $title->getNamespace(), $cdb = $title->getDBkey() );
3862  }
3863 
3864  return array( $dom, $title );
3865  }
3866 
3878  public function fetchCurrentRevisionOfTitle( $title ) {
3879  $cacheKey = $title->getPrefixedDBkey();
3880  if ( !$this->currentRevisionCache ) {
3881  $this->currentRevisionCache = new MapCacheLRU( 100 );
3882  }
3883  if ( !$this->currentRevisionCache->has( $cacheKey ) ) {
3884  $this->currentRevisionCache->set( $cacheKey,
3885  // Defaults to Parser::statelessFetchRevision()
3886  call_user_func( $this->mOptions->getCurrentRevisionCallback(), $title, $this )
3887  );
3888  }
3889  return $this->currentRevisionCache->get( $cacheKey );
3890  }
3891 
3901  public static function statelessFetchRevision( $title, $parser = false ) {
3902  return Revision::newFromTitle( $title );
3903  }
3904 
3910  public function fetchTemplateAndTitle( $title ) {
3911  // Defaults to Parser::statelessFetchTemplate()
3912  $templateCb = $this->mOptions->getTemplateCallback();
3913  $stuff = call_user_func( $templateCb, $title, $this );
3914  // We use U+007F DELETE to distinguish strip markers from regular text.
3915  $text = $stuff['text'];
3916  if ( is_string( $stuff['text'] ) ) {
3917  $text = strtr( $text, "\x7f", "?" );
3918  }
3919  $finalTitle = isset( $stuff['finalTitle'] ) ? $stuff['finalTitle'] : $title;
3920  if ( isset( $stuff['deps'] ) ) {
3921  foreach ( $stuff['deps'] as $dep ) {
3922  $this->mOutput->addTemplate( $dep['title'], $dep['page_id'], $dep['rev_id'] );
3923  if ( $dep['title']->equals( $this->getTitle() ) ) {
3924  // If we transclude ourselves, the final result
3925  // will change based on the new version of the page
3926  $this->mOutput->setFlag( 'vary-revision' );
3927  }
3928  }
3929  }
3930  return array( $text, $finalTitle );
3931  }
3932 
3938  public function fetchTemplate( $title ) {
3939  $rv = $this->fetchTemplateAndTitle( $title );
3940  return $rv[0];
3941  }
3942 
3952  public static function statelessFetchTemplate( $title, $parser = false ) {
3953  $text = $skip = false;
3954  $finalTitle = $title;
3955  $deps = array();
3956 
3957  # Loop to fetch the article, with up to 1 redirect
3958  for ( $i = 0; $i < 2 && is_object( $title ); $i++ ) {
3959  # Give extensions a chance to select the revision instead
3960  $id = false; # Assume current
3961  Hooks::run( 'BeforeParserFetchTemplateAndtitle',
3962  array( $parser, $title, &$skip, &$id ) );
3963 
3964  if ( $skip ) {
3965  $text = false;
3966  $deps[] = array(
3967  'title' => $title,
3968  'page_id' => $title->getArticleID(),
3969  'rev_id' => null
3970  );
3971  break;
3972  }
3973  # Get the revision
3974  if ( $id ) {
3975  $rev = Revision::newFromId( $id );
3976  } elseif ( $parser ) {
3977  $rev = $parser->fetchCurrentRevisionOfTitle( $title );
3978  } else {
3979  $rev = Revision::newFromTitle( $title );
3980  }
3981  $rev_id = $rev ? $rev->getId() : 0;
3982  # If there is no current revision, there is no page
3983  if ( $id === false && !$rev ) {
3984  $linkCache = LinkCache::singleton();
3985  $linkCache->addBadLinkObj( $title );
3986  }
3987 
3988  $deps[] = array(
3989  'title' => $title,
3990  'page_id' => $title->getArticleID(),
3991  'rev_id' => $rev_id );
3992  if ( $rev && !$title->equals( $rev->getTitle() ) ) {
3993  # We fetched a rev from a different title; register it too...
3994  $deps[] = array(
3995  'title' => $rev->getTitle(),
3996  'page_id' => $rev->getPage(),
3997  'rev_id' => $rev_id );
3998  }
3999 
4000  if ( $rev ) {
4001  $content = $rev->getContent();
4002  $text = $content ? $content->getWikitextForTransclusion() : null;
4003 
4004  if ( $text === false || $text === null ) {
4005  $text = false;
4006  break;
4007  }
4008  } elseif ( $title->getNamespace() == NS_MEDIAWIKI ) {
4010  $message = wfMessage( $wgContLang->lcfirst( $title->getText() ) )->inContentLanguage();
4011  if ( !$message->exists() ) {
4012  $text = false;
4013  break;
4014  }
4015  $content = $message->content();
4016  $text = $message->plain();
4017  } else {
4018  break;
4019  }
4020  if ( !$content ) {
4021  break;
4022  }
4023  # Redirect?
4024  $finalTitle = $title;
4025  $title = $content->getRedirectTarget();
4026  }
4027  return array(
4028  'text' => $text,
4029  'finalTitle' => $finalTitle,
4030  'deps' => $deps );
4031  }
4032 
4040  public function fetchFile( $title, $options = array() ) {
4041  $res = $this->fetchFileAndTitle( $title, $options );
4042  return $res[0];
4043  }
4044 
4052  public function fetchFileAndTitle( $title, $options = array() ) {
4053  $file = $this->fetchFileNoRegister( $title, $options );
4055  $time = $file ? $file->getTimestamp() : false;
4056  $sha1 = $file ? $file->getSha1() : false;
4057  # Register the file as a dependency...
4058  $this->mOutput->addImage( $title->getDBkey(), $time, $sha1 );
4059  if ( $file && !$title->equals( $file->getTitle() ) ) {
4060  # Update fetched file title
4061  $title = $file->getTitle();
4062  $this->mOutput->addImage( $title->getDBkey(), $time, $sha1 );
4063  }
4064  return array( $file, $title );
4065  }
4066 
4077  protected function fetchFileNoRegister( $title, $options = array() ) {
4078  if ( isset( $options['broken'] ) ) {
4079  $file = false; // broken thumbnail forced by hook
4080  } elseif ( isset( $options['sha1'] ) ) { // get by (sha1,timestamp)
4081  $file = RepoGroup::singleton()->findFileFromKey( $options['sha1'], $options );
4082  } else { // get by (name,timestamp)
4083  $file = wfFindFile( $title, $options );
4084  }
4085  return $file;
4086  }
4087 
4096  public function interwikiTransclude( $title, $action ) {
4098 
4099  if ( !$wgEnableScaryTranscluding ) {
4100  return wfMessage( 'scarytranscludedisabled' )->inContentLanguage()->text();
4101  }
4102 
4103  $url = $title->getFullURL( array( 'action' => $action ) );
4104 
4105  if ( strlen( $url ) > 255 ) {
4106  return wfMessage( 'scarytranscludetoolong' )->inContentLanguage()->text();
4107  }
4108  return $this->fetchScaryTemplateMaybeFromCache( $url );
4109  }
4110 
4115  public function fetchScaryTemplateMaybeFromCache( $url ) {
4117  $dbr = wfGetDB( DB_SLAVE );
4118  $tsCond = $dbr->timestamp( time() - $wgTranscludeCacheExpiry );
4119  $obj = $dbr->selectRow( 'transcache', array( 'tc_time', 'tc_contents' ),
4120  array( 'tc_url' => $url, "tc_time >= " . $dbr->addQuotes( $tsCond ) ) );
4121  if ( $obj ) {
4122  return $obj->tc_contents;
4123  }
4124 
4125  $req = MWHttpRequest::factory( $url, array(), __METHOD__ );
4126  $status = $req->execute(); // Status object
4127  if ( $status->isOK() ) {
4128  $text = $req->getContent();
4129  } elseif ( $req->getStatus() != 200 ) {
4130  // Though we failed to fetch the content, this status is useless.
4131  return wfMessage( 'scarytranscludefailed-httpstatus' )
4132  ->params( $url, $req->getStatus() /* HTTP status */ )->inContentLanguage()->text();
4133  } else {
4134  return wfMessage( 'scarytranscludefailed', $url )->inContentLanguage()->text();
4135  }
4136 
4137  $dbw = wfGetDB( DB_MASTER );
4138  $dbw->replace( 'transcache', array( 'tc_url' ), array(
4139  'tc_url' => $url,
4140  'tc_time' => $dbw->timestamp( time() ),
4141  'tc_contents' => $text
4142  ) );
4143  return $text;
4144  }
4145 
4155  public function argSubstitution( $piece, $frame ) {
4156 
4157  $error = false;
4158  $parts = $piece['parts'];
4159  $nameWithSpaces = $frame->expand( $piece['title'] );
4160  $argName = trim( $nameWithSpaces );
4161  $object = false;
4162  $text = $frame->getArgument( $argName );
4163  if ( $text === false && $parts->getLength() > 0
4164  && ( $this->ot['html']
4165  || $this->ot['pre']
4166  || ( $this->ot['wiki'] && $frame->isTemplate() )
4167  )
4168  ) {
4169  # No match in frame, use the supplied default
4170  $object = $parts->item( 0 )->getChildren();
4171  }
4172  if ( !$this->incrementIncludeSize( 'arg', strlen( $text ) ) ) {
4173  $error = '<!-- WARNING: argument omitted, expansion size too large -->';
4174  $this->limitationWarn( 'post-expand-template-argument' );
4175  }
4176 
4177  if ( $text === false && $object === false ) {
4178  # No match anywhere
4179  $object = $frame->virtualBracketedImplode( '{{{', '|', '}}}', $nameWithSpaces, $parts );
4180  }
4181  if ( $error !== false ) {
4182  $text .= $error;
4183  }
4184  if ( $object !== false ) {
4185  $ret = array( 'object' => $object );
4186  } else {
4187  $ret = array( 'text' => $text );
4188  }
4189 
4190  return $ret;
4191  }
4192 
4208  public function extensionSubstitution( $params, $frame ) {
4209  $name = $frame->expand( $params['name'] );
4210  $attrText = !isset( $params['attr'] ) ? null : $frame->expand( $params['attr'] );
4211  $content = !isset( $params['inner'] ) ? null : $frame->expand( $params['inner'] );
4212  $marker = self::MARKER_PREFIX . "-$name-"
4213  . sprintf( '%08X', $this->mMarkerIndex++ ) . self::MARKER_SUFFIX;
4214 
4215  $isFunctionTag = isset( $this->mFunctionTagHooks[strtolower( $name )] ) &&
4216  ( $this->ot['html'] || $this->ot['pre'] );
4217  if ( $isFunctionTag ) {
4218  $markerType = 'none';
4219  } else {
4220  $markerType = 'general';
4221  }
4222  if ( $this->ot['html'] || $isFunctionTag ) {
4223  $name = strtolower( $name );
4224  $attributes = Sanitizer::decodeTagAttributes( $attrText );
4225  if ( isset( $params['attributes'] ) ) {
4226  $attributes = $attributes + $params['attributes'];
4227  }
4228 
4229  if ( isset( $this->mTagHooks[$name] ) ) {
4230  # Workaround for PHP bug 35229 and similar
4231  if ( !is_callable( $this->mTagHooks[$name] ) ) {
4232  throw new MWException( "Tag hook for $name is not callable\n" );
4233  }
4234  $output = call_user_func_array( $this->mTagHooks[$name],
4235  array( $content, $attributes, $this, $frame ) );
4236  } elseif ( isset( $this->mFunctionTagHooks[$name] ) ) {
4237  list( $callback, ) = $this->mFunctionTagHooks[$name];
4238  if ( !is_callable( $callback ) ) {
4239  throw new MWException( "Tag hook for $name is not callable\n" );
4240  }
4241 
4242  $output = call_user_func_array( $callback, array( &$this, $frame, $content, $attributes ) );
4243  } else {
4244  $output = '<span class="error">Invalid tag extension name: ' .
4245  htmlspecialchars( $name ) . '</span>';
4246  }
4247 
4248  if ( is_array( $output ) ) {
4249  # Extract flags to local scope (to override $markerType)
4250  $flags = $output;
4251  $output = $flags[0];
4252  unset( $flags[0] );
4253  extract( $flags );
4254  }
4255  } else {
4256  if ( is_null( $attrText ) ) {
4257  $attrText = '';
4258  }
4259  if ( isset( $params['attributes'] ) ) {
4260  foreach ( $params['attributes'] as $attrName => $attrValue ) {
4261  $attrText .= ' ' . htmlspecialchars( $attrName ) . '="' .
4262  htmlspecialchars( $attrValue ) . '"';
4263  }
4264  }
4265  if ( $content === null ) {
4266  $output = "<$name$attrText/>";
4267  } else {
4268  $close = is_null( $params['close'] ) ? '' : $frame->expand( $params['close'] );
4269  $output = "<$name$attrText>$content$close";
4270  }
4271  }
4272 
4273  if ( $markerType === 'none' ) {
4274  return $output;
4275  } elseif ( $markerType === 'nowiki' ) {
4276  $this->mStripState->addNoWiki( $marker, $output );
4277  } elseif ( $markerType === 'general' ) {
4278  $this->mStripState->addGeneral( $marker, $output );
4279  } else {
4280  throw new MWException( __METHOD__ . ': invalid marker type' );
4281  }
4282  return $marker;
4283  }
4284 
4292  public function incrementIncludeSize( $type, $size ) {
4293  if ( $this->mIncludeSizes[$type] + $size > $this->mOptions->getMaxIncludeSize() ) {
4294  return false;
4295  } else {
4296  $this->mIncludeSizes[$type] += $size;
4297  return true;
4298  }
4299  }
4300 
4306  public function incrementExpensiveFunctionCount() {
4307  $this->mExpensiveFunctionCount++;
4308  return $this->mExpensiveFunctionCount <= $this->mOptions->getExpensiveParserFunctionLimit();
4309  }
4310 
4319  public function doDoubleUnderscore( $text ) {
4320 
4321  # The position of __TOC__ needs to be recorded
4322  $mw = MagicWord::get( 'toc' );
4323  if ( $mw->match( $text ) ) {
4324  $this->mShowToc = true;
4325  $this->mForceTocPosition = true;
4326 
4327  # Set a placeholder. At the end we'll fill it in with the TOC.
4328  $text = $mw->replace( '<!--MWTOC-->', $text, 1 );
4329 
4330  # Only keep the first one.
4331  $text = $mw->replace( '', $text );
4332  }
4333 
4334  # Now match and remove the rest of them
4336  $this->mDoubleUnderscores = $mwa->matchAndRemove( $text );
4337 
4338  if ( isset( $this->mDoubleUnderscores['nogallery'] ) ) {
4339  $this->mOutput->mNoGallery = true;
4340  }
4341  if ( isset( $this->mDoubleUnderscores['notoc'] ) && !$this->mForceTocPosition ) {
4342  $this->mShowToc = false;
4343  }
4344  if ( isset( $this->mDoubleUnderscores['hiddencat'] )
4345  && $this->mTitle->getNamespace() == NS_CATEGORY
4346  ) {
4347  $this->addTrackingCategory( 'hidden-category-category' );
4348  }
4349  # (bug 8068) Allow control over whether robots index a page.
4350  #
4351  # @todo FIXME: Bug 14899: __INDEX__ always overrides __NOINDEX__ here! This
4352  # is not desirable, the last one on the page should win.
4353  if ( isset( $this->mDoubleUnderscores['noindex'] ) && $this->mTitle->canUseNoindex() ) {
4354  $this->mOutput->setIndexPolicy( 'noindex' );
4355  $this->addTrackingCategory( 'noindex-category' );
4356  }
4357  if ( isset( $this->mDoubleUnderscores['index'] ) && $this->mTitle->canUseNoindex() ) {
4358  $this->mOutput->setIndexPolicy( 'index' );
4359  $this->addTrackingCategory( 'index-category' );
4360  }
4361 
4362  # Cache all double underscores in the database
4363  foreach ( $this->mDoubleUnderscores as $key => $val ) {
4364  $this->mOutput->setProperty( $key, '' );
4365  }
4366 
4367  return $text;
4368  }
4369 
4375  public function addTrackingCategory( $msg ) {
4376  return $this->mOutput->addTrackingCategory( $msg, $this->mTitle );
4377  }
4378 
4395  public function formatHeadings( $text, $origText, $isMain = true ) {
4396  global $wgMaxTocLevel, $wgExperimentalHtmlIds;
4397 
4398  # Inhibit editsection links if requested in the page
4399  if ( isset( $this->mDoubleUnderscores['noeditsection'] ) ) {
4400  $maybeShowEditLink = $showEditLink = false;
4401  } else {
4402  $maybeShowEditLink = true; /* Actual presence will depend on ParserOptions option */
4403  $showEditLink = $this->mOptions->getEditSection();
4404  }
4405  if ( $showEditLink ) {
4406  $this->mOutput->setEditSectionTokens( true );
4407  }
4408 
4409  # Get all headlines for numbering them and adding funky stuff like [edit]
4410  # links - this is for later, but we need the number of headlines right now
4411  $matches = array();
4412  $numMatches = preg_match_all(
4413  '/<H(?P<level>[1-6])(?P<attrib>.*?>)\s*(?P<header>[\s\S]*?)\s*<\/H[1-6] *>/i',
4414  $text,
4415  $matches
4416  );
4417 
4418  # if there are fewer than 4 headlines in the article, do not show TOC
4419  # unless it's been explicitly enabled.
4420  $enoughToc = $this->mShowToc &&
4421  ( ( $numMatches >= 4 ) || $this->mForceTocPosition );
4422 
4423  # Allow user to stipulate that a page should have a "new section"
4424  # link added via __NEWSECTIONLINK__
4425  if ( isset( $this->mDoubleUnderscores['newsectionlink'] ) ) {
4426  $this->mOutput->setNewSection( true );
4427  }
4428 
4429  # Allow user to remove the "new section"
4430  # link via __NONEWSECTIONLINK__
4431  if ( isset( $this->mDoubleUnderscores['nonewsectionlink'] ) ) {
4432  $this->mOutput->hideNewSection( true );
4433  }
4434 
4435  # if the string __FORCETOC__ (not case-sensitive) occurs in the HTML,
4436  # override above conditions and always show TOC above first header
4437  if ( isset( $this->mDoubleUnderscores['forcetoc'] ) ) {
4438  $this->mShowToc = true;
4439  $enoughToc = true;
4440  }
4441 
4442  # headline counter
4443  $headlineCount = 0;
4444  $numVisible = 0;
4445 
4446  # Ugh .. the TOC should have neat indentation levels which can be
4447  # passed to the skin functions. These are determined here
4448  $toc = '';
4449  $full = '';
4450  $head = array();
4451  $sublevelCount = array();
4452  $levelCount = array();
4453  $level = 0;
4454  $prevlevel = 0;
4455  $toclevel = 0;
4456  $prevtoclevel = 0;
4457  $markerRegex = self::MARKER_PREFIX . "-h-(\d+)-" . self::MARKER_SUFFIX;
4458  $baseTitleText = $this->mTitle->getPrefixedDBkey();
4459  $oldType = $this->mOutputType;
4460  $this->setOutputType( self::OT_WIKI );
4461  $frame = $this->getPreprocessor()->newFrame();
4462  $root = $this->preprocessToDom( $origText );
4463  $node = $root->getFirstChild();
4464  $byteOffset = 0;
4465  $tocraw = array();
4466  $refers = array();
4467 
4468  $headlines = $numMatches !== false ? $matches[3] : array();
4469 
4470  foreach ( $headlines as $headline ) {
4471  $isTemplate = false;
4472  $titleText = false;
4473  $sectionIndex = false;
4474  $numbering = '';
4475  $markerMatches = array();
4476  if ( preg_match( "/^$markerRegex/", $headline, $markerMatches ) ) {
4477  $serial = $markerMatches[1];
4478  list( $titleText, $sectionIndex ) = $this->mHeadings[$serial];
4479  $isTemplate = ( $titleText != $baseTitleText );
4480  $headline = preg_replace( "/^$markerRegex\\s*/", "", $headline );
4481  }
4482 
4483  if ( $toclevel ) {
4484  $prevlevel = $level;
4485  }
4486  $level = $matches[1][$headlineCount];
4487 
4488  if ( $level > $prevlevel ) {
4489  # Increase TOC level
4490  $toclevel++;
4491  $sublevelCount[$toclevel] = 0;
4492  if ( $toclevel < $wgMaxTocLevel ) {
4493  $prevtoclevel = $toclevel;
4494  $toc .= Linker::tocIndent();
4495  $numVisible++;
4496  }
4497  } elseif ( $level < $prevlevel && $toclevel > 1 ) {
4498  # Decrease TOC level, find level to jump to
4499 
4500  for ( $i = $toclevel; $i > 0; $i-- ) {
4501  if ( $levelCount[$i] == $level ) {
4502  # Found last matching level
4503  $toclevel = $i;
4504  break;
4505  } elseif ( $levelCount[$i] < $level ) {
4506  # Found first matching level below current level
4507  $toclevel = $i + 1;
4508  break;
4509  }
4510  }
4511  if ( $i == 0 ) {
4512  $toclevel = 1;
4513  }
4514  if ( $toclevel < $wgMaxTocLevel ) {
4515  if ( $prevtoclevel < $wgMaxTocLevel ) {
4516  # Unindent only if the previous toc level was shown :p
4517  $toc .= Linker::tocUnindent( $prevtoclevel - $toclevel );
4518  $prevtoclevel = $toclevel;
4519  } else {
4520  $toc .= Linker::tocLineEnd();
4521  }
4522  }
4523  } else {
4524  # No change in level, end TOC line
4525  if ( $toclevel < $wgMaxTocLevel ) {
4526  $toc .= Linker::tocLineEnd();
4527  }
4528  }
4529 
4530  $levelCount[$toclevel] = $level;
4531 
4532  # count number of headlines for each level
4533  $sublevelCount[$toclevel]++;
4534  $dot = 0;
4535  for ( $i = 1; $i <= $toclevel; $i++ ) {
4536  if ( !empty( $sublevelCount[$i] ) ) {
4537  if ( $dot ) {
4538  $numbering .= '.';
4539  }
4540  $numbering .= $this->getTargetLanguage()->formatNum( $sublevelCount[$i] );
4541  $dot = 1;
4542  }
4543  }
4544 
4545  # The safe header is a version of the header text safe to use for links
4546 
4547  # Remove link placeholders by the link text.
4548  # <!--LINK number-->
4549  # turns into
4550  # link text with suffix
4551  # Do this before unstrip since link text can contain strip markers
4552  $safeHeadline = $this->replaceLinkHoldersText( $headline );
4553 
4554  # Avoid insertion of weird stuff like <math> by expanding the relevant sections
4555  $safeHeadline = $this->mStripState->unstripBoth( $safeHeadline );
4556 
4557  # Strip out HTML (first regex removes any tag not allowed)
4558  # Allowed tags are:
4559  # * <sup> and <sub> (bug 8393)
4560  # * <i> (bug 26375)
4561  # * <b> (r105284)
4562  # * <bdi> (bug 72884)
4563  # * <span dir="rtl"> and <span dir="ltr"> (bug 35167)
4564  #
4565  # We strip any parameter from accepted tags (second regex), except dir="rtl|ltr" from <span>,
4566  # to allow setting directionality in toc items.
4567  $tocline = preg_replace(
4568  array(
4569  '#<(?!/?(span|sup|sub|bdi|i|b)(?: [^>]*)?>).*?>#',
4570  '#<(/?(?:span(?: dir="(?:rtl|ltr)")?|sup|sub|bdi|i|b))(?: .*?)?>#'
4571  ),
4572  array( '', '<$1>' ),
4573  $safeHeadline
4574  );
4575 
4576  # Strip '<span></span>', which is the result from the above if
4577  # <span id="foo"></span> is used to produce an additional anchor
4578  # for a section.
4579  $tocline = str_replace( '<span></span>', '', $tocline );
4580 
4581  $tocline = trim( $tocline );
4582 
4583  # For the anchor, strip out HTML-y stuff period
4584  $safeHeadline = preg_replace( '/<.*?>/', '', $safeHeadline );
4585  $safeHeadline = Sanitizer::normalizeSectionNameWhitespace( $safeHeadline );
4586 
4587  # Save headline for section edit hint before it's escaped
4588  $headlineHint = $safeHeadline;
4589 
4590  if ( $wgExperimentalHtmlIds ) {
4591  # For reverse compatibility, provide an id that's
4592  # HTML4-compatible, like we used to.
4593  #
4594  # It may be worth noting, academically, that it's possible for
4595  # the legacy anchor to conflict with a non-legacy headline
4596  # anchor on the page. In this case likely the "correct" thing
4597  # would be to either drop the legacy anchors or make sure
4598  # they're numbered first. However, this would require people
4599  # to type in section names like "abc_.D7.93.D7.90.D7.A4"
4600  # manually, so let's not bother worrying about it.
4601  $legacyHeadline = Sanitizer::escapeId( $safeHeadline,
4602  array( 'noninitial', 'legacy' ) );
4603  $safeHeadline = Sanitizer::escapeId( $safeHeadline );
4604 
4605  if ( $legacyHeadline == $safeHeadline ) {
4606  # No reason to have both (in fact, we can't)
4607  $legacyHeadline = false;
4608  }
4609  } else {
4610  $legacyHeadline = false;
4611  $safeHeadline = Sanitizer::escapeId( $safeHeadline,
4612  'noninitial' );
4613  }
4614 
4615  # HTML names must be case-insensitively unique (bug 10721).
4616  # This does not apply to Unicode characters per
4617  # http://www.w3.org/TR/html5/infrastructure.html#case-sensitivity-and-string-comparison
4618  # @todo FIXME: We may be changing them depending on the current locale.
4619  $arrayKey = strtolower( $safeHeadline );
4620  if ( $legacyHeadline === false ) {
4621  $legacyArrayKey = false;
4622  } else {
4623  $legacyArrayKey = strtolower( $legacyHeadline );
4624  }
4625 
4626  # Create the anchor for linking from the TOC to the section
4627  $anchor = $safeHeadline;
4628  $legacyAnchor = $legacyHeadline;
4629  if ( isset( $refers[$arrayKey] ) ) {
4630  for ( $i = 2; isset( $refers["${arrayKey}_$i"] ); ++$i );
4631  $anchor .= "_$i";
4632  $refers["${arrayKey}_$i"] = true;
4633  } else {
4634  $refers[$arrayKey] = true;
4635  }
4636  if ( $legacyHeadline !== false && isset( $refers[$legacyArrayKey] ) ) {
4637  for ( $i = 2; isset( $refers["${legacyArrayKey}_$i"] ); ++$i );
4638  $legacyAnchor .= "_$i";
4639  $refers["${legacyArrayKey}_$i"] = true;
4640  } else {
4641  $refers[$legacyArrayKey] = true;
4642  }
4643 
4644  # Don't number the heading if it is the only one (looks silly)
4645  if ( count( $matches[3] ) > 1 && $this->mOptions->getNumberHeadings() ) {
4646  # the two are different if the line contains a link
4647  $headline = Html::element(
4648  'span',
4649  array( 'class' => 'mw-headline-number' ),
4650  $numbering
4651  ) . ' ' . $headline;
4652  }
4653 
4654  if ( $enoughToc && ( !isset( $wgMaxTocLevel ) || $toclevel < $wgMaxTocLevel ) ) {
4655  $toc .= Linker::tocLine( $anchor, $tocline,
4656  $numbering, $toclevel, ( $isTemplate ? false : $sectionIndex ) );
4657  }
4658 
4659  # Add the section to the section tree
4660  # Find the DOM node for this header
4661  $noOffset = ( $isTemplate || $sectionIndex === false );
4662  while ( $node && !$noOffset ) {
4663  if ( $node->getName() === 'h' ) {
4664  $bits = $node->splitHeading();
4665  if ( $bits['i'] == $sectionIndex ) {
4666  break;
4667  }
4668  }
4669  $byteOffset += mb_strlen( $this->mStripState->unstripBoth(
4670  $frame->expand( $node, PPFrame::RECOVER_ORIG ) ) );
4671  $node = $node->getNextSibling();
4672  }
4673  $tocraw[] = array(
4674  'toclevel' => $toclevel,
4675  'level' => $level,
4676  'line' => $tocline,
4677  'number' => $numbering,
4678  'index' => ( $isTemplate ? 'T-' : '' ) . $sectionIndex,
4679  'fromtitle' => $titleText,
4680  'byteoffset' => ( $noOffset ? null : $byteOffset ),
4681  'anchor' => $anchor,
4682  );
4683 
4684  # give headline the correct <h#> tag
4685  if ( $maybeShowEditLink && $sectionIndex !== false ) {
4686  // Output edit section links as markers with styles that can be customized by skins
4687  if ( $isTemplate ) {
4688  # Put a T flag in the section identifier, to indicate to extractSections()
4689  # that sections inside <includeonly> should be counted.
4690  $editsectionPage = $titleText;
4691  $editsectionSection = "T-$sectionIndex";
4692  $editsectionContent = null;
4693  } else {
4694  $editsectionPage = $this->mTitle->getPrefixedText();
4695  $editsectionSection = $sectionIndex;
4696  $editsectionContent = $headlineHint;
4697  }
4698  // We use a bit of pesudo-xml for editsection markers. The
4699  // language converter is run later on. Using a UNIQ style marker
4700  // leads to the converter screwing up the tokens when it
4701  // converts stuff. And trying to insert strip tags fails too. At
4702  // this point all real inputted tags have already been escaped,
4703  // so we don't have to worry about a user trying to input one of
4704  // these markers directly. We use a page and section attribute
4705  // to stop the language converter from converting these
4706  // important bits of data, but put the headline hint inside a
4707  // content block because the language converter is supposed to
4708  // be able to convert that piece of data.
4709  // Gets replaced with html in ParserOutput::getText
4710  $editlink = '<mw:editsection page="' . htmlspecialchars( $editsectionPage );
4711  $editlink .= '" section="' . htmlspecialchars( $editsectionSection ) . '"';
4712  if ( $editsectionContent !== null ) {
4713  $editlink .= '>' . $editsectionContent . '</mw:editsection>';
4714  } else {
4715  $editlink .= '/>';
4716  }
4717  } else {
4718  $editlink = '';
4719  }
4720  $head[$headlineCount] = Linker::makeHeadline( $level,
4721  $matches['attrib'][$headlineCount], $anchor, $headline,
4722  $editlink, $legacyAnchor );
4723 
4724  $headlineCount++;
4725  }
4726 
4727  $this->setOutputType( $oldType );
4728 
4729  # Never ever show TOC if no headers
4730  if ( $numVisible < 1 ) {
4731  $enoughToc = false;
4732  }
4733 
4734  if ( $enoughToc ) {
4735  if ( $prevtoclevel > 0 && $prevtoclevel < $wgMaxTocLevel ) {
4736  $toc .= Linker::tocUnindent( $prevtoclevel - 1 );
4737  }
4738  $toc = Linker::tocList( $toc, $this->mOptions->getUserLangObj() );
4739  $this->mOutput->setTOCHTML( $toc );
4740  $toc = self::TOC_START . $toc . self::TOC_END;
4741  $this->mOutput->addModules( 'mediawiki.toc' );
4742  }
4743 
4744  if ( $isMain ) {
4745  $this->mOutput->setSections( $tocraw );
4746  }
4747 
4748  # split up and insert constructed headlines
4749  $blocks = preg_split( '/<H[1-6].*?>[\s\S]*?<\/H[1-6]>/i', $text );
4750  $i = 0;
4751 
4752  // build an array of document sections
4753  $sections = array();
4754  foreach ( $blocks as $block ) {
4755  // $head is zero-based, sections aren't.
4756  if ( empty( $head[$i - 1] ) ) {
4757  $sections[$i] = $block;
4758  } else {
4759  $sections[$i] = $head[$i - 1] . $block;
4760  }
4761 
4772  Hooks::run( 'ParserSectionCreate', array( $this, $i, &$sections[$i], $showEditLink ) );
4773 
4774  $i++;
4775  }
4776 
4777  if ( $enoughToc && $isMain && !$this->mForceTocPosition ) {
4778  // append the TOC at the beginning
4779  // Top anchor now in skin
4780  $sections[0] = $sections[0] . $toc . "\n";
4781  }
4782 
4783  $full .= join( '', $sections );
4784 
4785  if ( $this->mForceTocPosition ) {
4786  return str_replace( '<!--MWTOC-->', $toc, $full );
4787  } else {
4788  return $full;
4789  }
4790  }
4791 
4803  public function preSaveTransform( $text, Title $title, User $user,
4804  ParserOptions $options, $clearState = true
4805  ) {
4806  if ( $clearState ) {
4807  $magicScopeVariable = $this->lock();
4808  }
4809  $this->startParse( $title, $options, self::OT_WIKI, $clearState );
4810  $this->setUser( $user );
4811 
4812  $pairs = array(
4813  "\r\n" => "\n",
4814  "\r" => "\n",
4815  );
4816  $text = str_replace( array_keys( $pairs ), array_values( $pairs ), $text );
4817  if ( $options->getPreSaveTransform() ) {
4818  $text = $this->pstPass2( $text, $user );
4819  }
4820  $text = $this->mStripState->unstripBoth( $text );
4821 
4822  $this->setUser( null ); #Reset
4823 
4824  return $text;
4825  }
4826 
4835  private function pstPass2( $text, $user ) {
4837 
4838  # Note: This is the timestamp saved as hardcoded wikitext to
4839  # the database, we use $wgContLang here in order to give
4840  # everyone the same signature and use the default one rather
4841  # than the one selected in each user's preferences.
4842  # (see also bug 12815)
4843  $ts = $this->mOptions->getTimestamp();
4845  $ts = $timestamp->format( 'YmdHis' );
4846  $tzMsg = $timestamp->format( 'T' ); # might vary on DST changeover!
4847 
4848  # Allow translation of timezones through wiki. format() can return
4849  # whatever crap the system uses, localised or not, so we cannot
4850  # ship premade translations.
4851  $key = 'timezone-' . strtolower( trim( $tzMsg ) );
4852  $msg = wfMessage( $key )->inContentLanguage();
4853  if ( $msg->exists() ) {
4854  $tzMsg = $msg->text();
4855  }
4856 
4857  $d = $wgContLang->timeanddate( $ts, false, false ) . " ($tzMsg)";
4858 
4859  # Variable replacement
4860  # Because mOutputType is OT_WIKI, this will only process {{subst:xxx}} type tags
4861  $text = $this->replaceVariables( $text );
4862 
4863  # This works almost by chance, as the replaceVariables are done before the getUserSig(),
4864  # which may corrupt this parser instance via its wfMessage()->text() call-
4865 
4866  # Signatures
4867  $sigText = $this->getUserSig( $user );
4868  $text = strtr( $text, array(
4869  '~~~~~' => $d,
4870  '~~~~' => "$sigText $d",
4871  '~~~' => $sigText
4872  ) );
4873 
4874  # Context links ("pipe tricks"): [[|name]] and [[name (context)|]]
4875  $tc = '[' . Title::legalChars() . ']';
4876  $nc = '[ _0-9A-Za-z\x80-\xff-]'; # Namespaces can use non-ascii!
4877 
4878  // [[ns:page (context)|]]
4879  $p1 = "/\[\[(:?$nc+:|:|)($tc+?)( ?\\($tc+\\))\\|]]/";
4880  // [[ns:page(context)|]] (double-width brackets, added in r40257)
4881  $p4 = "/\[\[(:?$nc+:|:|)($tc+?)( ?($tc+))\\|]]/";
4882  // [[ns:page (context), context|]] (using either single or double-width comma)
4883  $p3 = "/\[\[(:?$nc+:|:|)($tc+?)( ?\\($tc+\\)|)((?:, |,)$tc+|)\\|]]/";
4884  // [[|page]] (reverse pipe trick: add context from page title)
4885  $p2 = "/\[\[\\|($tc+)]]/";
4886 
4887  # try $p1 first, to turn "[[A, B (C)|]]" into "[[A, B (C)|A, B]]"
4888  $text = preg_replace( $p1, '[[\\1\\2\\3|\\2]]', $text );
4889  $text = preg_replace( $p4, '[[\\1\\2\\3|\\2]]', $text );
4890  $text = preg_replace( $p3, '[[\\1\\2\\3\\4|\\2]]', $text );
4891 
4892  $t = $this->mTitle->getText();
4893  $m = array();
4894  if ( preg_match( "/^($nc+:|)$tc+?( \\($tc+\\))$/", $t, $m ) ) {
4895  $text = preg_replace( $p2, "[[$m[1]\\1$m[2]|\\1]]", $text );
4896  } elseif ( preg_match( "/^($nc+:|)$tc+?(, $tc+|)$/", $t, $m ) && "$m[1]$m[2]" != '' ) {
4897  $text = preg_replace( $p2, "[[$m[1]\\1$m[2]|\\1]]", $text );
4898  } else {
4899  # if there's no context, don't bother duplicating the title
4900  $text = preg_replace( $p2, '[[\\1]]', $text );
4901  }
4902 
4903  # Trim trailing whitespace
4904  $text = rtrim( $text );
4905 
4906  return $text;
4907  }
4908 
4923  public function getUserSig( &$user, $nickname = false, $fancySig = null ) {
4925 
4926  $username = $user->getName();
4927 
4928  # If not given, retrieve from the user object.
4929  if ( $nickname === false ) {
4930  $nickname = $user->getOption( 'nickname' );
4931  }
4932 
4933  if ( is_null( $fancySig ) ) {
4934  $fancySig = $user->getBoolOption( 'fancysig' );
4935  }
4936 
4937  $nickname = $nickname == null ? $username : $nickname;
4938 
4939  if ( mb_strlen( $nickname ) > $wgMaxSigChars ) {
4940  $nickname = $username;
4941  wfDebug( __METHOD__ . ": $username has overlong signature.\n" );
4942  } elseif ( $fancySig !== false ) {
4943  # Sig. might contain markup; validate this
4944  if ( $this->validateSig( $nickname ) !== false ) {
4945  # Validated; clean up (if needed) and return it
4946  return $this->cleanSig( $nickname, true );
4947  } else {
4948  # Failed to validate; fall back to the default
4949  $nickname = $username;
4950  wfDebug( __METHOD__ . ": $username has bad XML tags in signature.\n" );
4951  }
4952  }
4953 
4954  # Make sure nickname doesnt get a sig in a sig
4955  $nickname = self::cleanSigInSig( $nickname );
4956 
4957  # If we're still here, make it a link to the user page
4958  $userText = wfEscapeWikiText( $username );
4959  $nickText = wfEscapeWikiText( $nickname );
4960  $msgName = $user->isAnon() ? 'signature-anon' : 'signature';
4961 
4962  return wfMessage( $msgName, $userText, $nickText )->inContentLanguage()
4963  ->title( $this->getTitle() )->text();
4964  }
4965 
4972  public function validateSig( $text ) {
4973  return Xml::isWellFormedXmlFragment( $text ) ? $text : false;
4974  }
4975 
4986  public function cleanSig( $text, $parsing = false ) {
4987  if ( !$parsing ) {
4988  global $wgTitle;
4989  $magicScopeVariable = $this->lock();
4990  $this->startParse( $wgTitle, new ParserOptions, self::OT_PREPROCESS, true );
4991  }
4992 
4993  # Option to disable this feature
4994  if ( !$this->mOptions->getCleanSignatures() ) {
4995  return $text;
4996  }
4997 
4998  # @todo FIXME: Regex doesn't respect extension tags or nowiki
4999  # => Move this logic to braceSubstitution()
5000  $substWord = MagicWord::get( 'subst' );
5001  $substRegex = '/\{\{(?!(?:' . $substWord->getBaseRegex() . '))/x' . $substWord->getRegexCase();
5002  $substText = '{{' . $substWord->getSynonym( 0 );
5003 
5004  $text = preg_replace( $substRegex, $substText, $text );
5005  $text = self::cleanSigInSig( $text );
5006  $dom = $this->preprocessToDom( $text );
5007  $frame = $this->getPreprocessor()->newFrame();
5008  $text = $frame->expand( $dom );
5009 
5010  if ( !$parsing ) {
5011  $text = $this->mStripState->unstripBoth( $text );
5012  }
5013 
5014  return $text;
5015  }
5016 
5023  public static function cleanSigInSig( $text ) {
5024  $text = preg_replace( '/~{3,5}/', '', $text );
5025  return $text;
5026  }
5027 
5037  public function startExternalParse( Title $title = null, ParserOptions $options,
5038  $outputType, $clearState = true
5039  ) {
5040  $this->startParse( $title, $options, $outputType, $clearState );
5041  }
5042 
5049  private function startParse( Title $title = null, ParserOptions $options,
5050  $outputType, $clearState = true
5051  ) {
5052  $this->setTitle( $title );
5053  $this->mOptions = $options;
5054  $this->setOutputType( $outputType );
5055  if ( $clearState ) {
5056  $this->clearState();
5057  }
5058  }
5059 
5068  public function transformMsg( $text, $options, $title = null ) {
5069  static $executing = false;
5070 
5071  # Guard against infinite recursion
5072  if ( $executing ) {
5073  return $text;
5074  }
5075  $executing = true;
5076 
5077  if ( !$title ) {
5078  global $wgTitle;
5079  $title = $wgTitle;
5080  }
5081 
5082  $text = $this->preprocess( $text, $title, $options );
5083 
5084  $executing = false;
5085  return $text;
5086  }
5087 
5112  public function setHook( $tag, $callback ) {
5113  $tag = strtolower( $tag );
5114  if ( preg_match( '/[<>\r\n]/', $tag, $m ) ) {
5115  throw new MWException( "Invalid character {$m[0]} in setHook('$tag', ...) call" );
5116  }
5117  $oldVal = isset( $this->mTagHooks[$tag] ) ? $this->mTagHooks[$tag] : null;
5118  $this->mTagHooks[$tag] = $callback;
5119  if ( !in_array( $tag, $this->mStripList ) ) {
5120  $this->mStripList[] = $tag;
5121  }
5122 
5123  return $oldVal;
5124  }
5125 
5143  public function setTransparentTagHook( $tag, $callback ) {
5144  $tag = strtolower( $tag );
5145  if ( preg_match( '/[<>\r\n]/', $tag, $m ) ) {
5146  throw new MWException( "Invalid character {$m[0]} in setTransparentHook('$tag', ...) call" );
5147  }
5148  $oldVal = isset( $this->mTransparentTagHooks[$tag] ) ? $this->mTransparentTagHooks[$tag] : null;
5149  $this->mTransparentTagHooks[$tag] = $callback;
5150 
5151  return $oldVal;
5152  }
5153 
5157  public function clearTagHooks() {
5158  $this->mTagHooks = array();
5159  $this->mFunctionTagHooks = array();
5160  $this->mStripList = $this->mDefaultStripList;
5161  }
5162 
5206  public function setFunctionHook( $id, $callback, $flags = 0 ) {
5208 
5209  $oldVal = isset( $this->mFunctionHooks[$id] ) ? $this->mFunctionHooks[$id][0] : null;
5210  $this->mFunctionHooks[$id] = array( $callback, $flags );
5211 
5212  # Add to function cache
5213  $mw = MagicWord::get( $id );
5214  if ( !$mw ) {
5215  throw new MWException( __METHOD__ . '() expecting a magic word identifier.' );
5216  }
5217 
5218  $synonyms = $mw->getSynonyms();
5219  $sensitive = intval( $mw->isCaseSensitive() );
5220 
5221  foreach ( $synonyms as $syn ) {
5222  # Case
5223  if ( !$sensitive ) {
5224  $syn = $wgContLang->lc( $syn );
5225  }
5226  # Add leading hash
5227  if ( !( $flags & self::SFH_NO_HASH ) ) {
5228  $syn = '#' . $syn;
5229  }
5230  # Remove trailing colon
5231  if ( substr( $syn, -1, 1 ) === ':' ) {
5232  $syn = substr( $syn, 0, -1 );
5233  }
5234  $this->mFunctionSynonyms[$sensitive][$syn] = $id;
5235  }
5236  return $oldVal;
5237  }
5238 
5244  public function getFunctionHooks() {
5245  return array_keys( $this->mFunctionHooks );
5246  }
5247 
5258  public function setFunctionTagHook( $tag, $callback, $flags ) {
5259  $tag = strtolower( $tag );
5260  if ( preg_match( '/[<>\r\n]/', $tag, $m ) ) {
5261  throw new MWException( "Invalid character {$m[0]} in setFunctionTagHook('$tag', ...) call" );
5262  }
5263  $old = isset( $this->mFunctionTagHooks[$tag] ) ?
5264  $this->mFunctionTagHooks[$tag] : null;
5265  $this->mFunctionTagHooks[$tag] = array( $callback, $flags );
5266 
5267  if ( !in_array( $tag, $this->mStripList ) ) {
5268  $this->mStripList[] = $tag;
5269  }
5271  return $old;
5272  }
5273 
5282  public function replaceLinkHolders( &$text, $options = 0 ) {
5283  $this->mLinkHolders->replace( $text );
5284  }
5285 
5293  public function replaceLinkHoldersText( $text ) {
5294  return $this->mLinkHolders->replaceText( $text );
5295  }
5296 
5310  public function renderImageGallery( $text, $params ) {
5311 
5312  $mode = false;
5313  if ( isset( $params['mode'] ) ) {
5314  $mode = $params['mode'];
5315  }
5316 
5317  try {
5318  $ig = ImageGalleryBase::factory( $mode );
5319  } catch ( Exception $e ) {
5320  // If invalid type set, fallback to default.
5321  $ig = ImageGalleryBase::factory( false );
5322  }
5323 
5324  $ig->setContextTitle( $this->mTitle );
5325  $ig->setShowBytes( false );
5326  $ig->setShowFilename( false );
5327  $ig->setParser( $this );
5328  $ig->setHideBadImages();
5329  $ig->setAttributes( Sanitizer::validateTagAttributes( $params, 'table' ) );
5330  $this->getOutput()->addModuleStyles( 'mediawiki.page.gallery.styles' );
5331 
5332  if ( isset( $params['showfilename'] ) ) {
5333  $ig->setShowFilename( true );
5334  } else {
5335  $ig->setShowFilename( false );
5336  }
5337  if ( isset( $params['caption'] ) ) {
5338  $caption = $params['caption'];
5339  $caption = htmlspecialchars( $caption );
5340  $caption = $this->replaceInternalLinks( $caption );
5341  $ig->setCaptionHtml( $caption );
5342  }
5343  if ( isset( $params['perrow'] ) ) {
5344  $ig->setPerRow( $params['perrow'] );
5345  }
5346  if ( isset( $params['widths'] ) ) {
5347  $ig->setWidths( $params['widths'] );
5348  }
5349  if ( isset( $params['heights'] ) ) {
5350  $ig->setHeights( $params['heights'] );
5351  }
5352  $ig->setAdditionalOptions( $params );
5353 
5354  Hooks::run( 'BeforeParserrenderImageGallery', array( &$this, &$ig ) );
5355 
5356  $lines = StringUtils::explode( "\n", $text );
5357  foreach ( $lines as $line ) {
5358  # match lines like these:
5359  # Image:someimage.jpg|This is some image
5360  $matches = array();
5361  preg_match( "/^([^|]+)(\\|(.*))?$/", $line, $matches );
5362  # Skip empty lines
5363  if ( count( $matches ) == 0 ) {
5364  continue;
5365  }
5366 
5367  if ( strpos( $matches[0], '%' ) !== false ) {
5368  $matches[1] = rawurldecode( $matches[1] );
5369  }
5370  $title = Title::newFromText( $matches[1], NS_FILE );
5371  if ( is_null( $title ) ) {
5372  # Bogus title. Ignore these so we don't bomb out later.
5373  continue;
5374  }
5375 
5376  # We need to get what handler the file uses, to figure out parameters.
5377  # Note, a hook can overide the file name, and chose an entirely different
5378  # file (which potentially could be of a different type and have different handler).
5379  $options = array();
5380  $descQuery = false;
5381  Hooks::run( 'BeforeParserFetchFileAndTitle',
5382  array( $this, $title, &$options, &$descQuery ) );
5383  # Don't register it now, as ImageGallery does that later.
5384  $file = $this->fetchFileNoRegister( $title, $options );
5385  $handler = $file ? $file->getHandler() : false;
5386 
5387  $paramMap = array(
5388  'img_alt' => 'gallery-internal-alt',
5389  'img_link' => 'gallery-internal-link',
5390  );
5391  if ( $handler ) {
5392  $paramMap = $paramMap + $handler->getParamMap();
5393  // We don't want people to specify per-image widths.
5394  // Additionally the width parameter would need special casing anyhow.
5395  unset( $paramMap['img_width'] );
5396  }
5397 
5398  $mwArray = new MagicWordArray( array_keys( $paramMap ) );
5399 
5400  $label = '';
5401  $alt = '';
5402  $link = '';
5403  $handlerOptions = array();
5404  if ( isset( $matches[3] ) ) {
5405  // look for an |alt= definition while trying not to break existing
5406  // captions with multiple pipes (|) in it, until a more sensible grammar
5407  // is defined for images in galleries
5408 
5409  // FIXME: Doing recursiveTagParse at this stage, and the trim before
5410  // splitting on '|' is a bit odd, and different from makeImage.
5411  $matches[3] = $this->recursiveTagParse( trim( $matches[3] ) );
5412  $parameterMatches = StringUtils::explode( '|', $matches[3] );
5413 
5414  foreach ( $parameterMatches as $parameterMatch ) {
5415  list( $magicName, $match ) = $mwArray->matchVariableStartToEnd( $parameterMatch );
5416  if ( $magicName ) {
5417  $paramName = $paramMap[$magicName];
5418 
5419  switch ( $paramName ) {
5420  case 'gallery-internal-alt':
5421  $alt = $this->stripAltText( $match, false );
5422  break;
5423  case 'gallery-internal-link':
5424  $linkValue = strip_tags( $this->replaceLinkHoldersText( $match ) );
5425  $chars = self::EXT_LINK_URL_CLASS;
5426  $addr = self::EXT_LINK_ADDR;
5427  $prots = $this->mUrlProtocols;
5428  //check to see if link matches an absolute url, if not then it must be a wiki link.
5429  if ( preg_match( "/^($prots)$addr$chars*$/u", $linkValue ) ) {
5430  $link = $linkValue;
5431  } else {
5432  $localLinkTitle = Title::newFromText( $linkValue );
5433  if ( $localLinkTitle !== null ) {
5434  $link = $localLinkTitle->getLinkURL();
5435  }
5436  }
5437  break;
5438  default:
5439  // Must be a handler specific parameter.
5440  if ( $handler->validateParam( $paramName, $match ) ) {
5441  $handlerOptions[$paramName] = $match;
5442  } else {
5443  // Guess not. Append it to the caption.
5444  wfDebug( "$parameterMatch failed parameter validation\n" );
5445  $label .= '|' . $parameterMatch;
5446  }
5447  }
5448 
5449  } else {
5450  // concatenate all other pipes
5451  $label .= '|' . $parameterMatch;
5452  }
5453  }
5454  // remove the first pipe
5455  $label = substr( $label, 1 );
5456  }
5457 
5458  $ig->add( $title, $label, $alt, $link, $handlerOptions );
5459  }
5460  $html = $ig->toHTML();
5461  Hooks::run( 'AfterParserFetchFileAndTitle', array( $this, $ig, &$html ) );
5462  return $html;
5463  }
5464 
5469  public function getImageParams( $handler ) {
5470  if ( $handler ) {
5471  $handlerClass = get_class( $handler );
5472  } else {
5473  $handlerClass = '';
5474  }
5475  if ( !isset( $this->mImageParams[$handlerClass] ) ) {
5476  # Initialise static lists
5477  static $internalParamNames = array(
5478  'horizAlign' => array( 'left', 'right', 'center', 'none' ),
5479  'vertAlign' => array( 'baseline', 'sub', 'super', 'top', 'text-top', 'middle',
5480  'bottom', 'text-bottom' ),
5481  'frame' => array( 'thumbnail', 'manualthumb', 'framed', 'frameless',
5482  'upright', 'border', 'link', 'alt', 'class' ),
5483  );
5484  static $internalParamMap;
5485  if ( !$internalParamMap ) {
5486  $internalParamMap = array();
5487  foreach ( $internalParamNames as $type => $names ) {
5488  foreach ( $names as $name ) {
5489  $magicName = str_replace( '-', '_', "img_$name" );
5490  $internalParamMap[$magicName] = array( $type, $name );
5491  }
5492  }
5493  }
5495  # Add handler params
5496  $paramMap = $internalParamMap;
5497  if ( $handler ) {
5498  $handlerParamMap = $handler->getParamMap();
5499  foreach ( $handlerParamMap as $magic => $paramName ) {
5500  $paramMap[$magic] = array( 'handler', $paramName );
5501  }
5502  }
5503  $this->mImageParams[$handlerClass] = $paramMap;
5504  $this->mImageParamsMagicArray[$handlerClass] = new MagicWordArray( array_keys( $paramMap ) );
5505  }
5506  return array( $this->mImageParams[$handlerClass], $this->mImageParamsMagicArray[$handlerClass] );
5507  }
5508 
5517  public function makeImage( $title, $options, $holders = false ) {
5518  # Check if the options text is of the form "options|alt text"
5519  # Options are:
5520  # * thumbnail make a thumbnail with enlarge-icon and caption, alignment depends on lang
5521  # * left no resizing, just left align. label is used for alt= only
5522  # * right same, but right aligned
5523  # * none same, but not aligned
5524  # * ___px scale to ___ pixels width, no aligning. e.g. use in taxobox
5525  # * center center the image
5526  # * frame Keep original image size, no magnify-button.
5527  # * framed Same as "frame"
5528  # * frameless like 'thumb' but without a frame. Keeps user preferences for width
5529  # * upright reduce width for upright images, rounded to full __0 px
5530  # * border draw a 1px border around the image
5531  # * alt Text for HTML alt attribute (defaults to empty)
5532  # * class Set a class for img node
5533  # * link Set the target of the image link. Can be external, interwiki, or local
5534  # vertical-align values (no % or length right now):
5535  # * baseline
5536  # * sub
5537  # * super
5538  # * top
5539  # * text-top
5540  # * middle
5541  # * bottom
5542  # * text-bottom
5543 
5544  $parts = StringUtils::explode( "|", $options );
5545 
5546  # Give extensions a chance to select the file revision for us
5547  $options = array();
5548  $descQuery = false;
5549  Hooks::run( 'BeforeParserFetchFileAndTitle',
5550  array( $this, $title, &$options, &$descQuery ) );
5551  # Fetch and register the file (file title may be different via hooks)
5552  list( $file, $title ) = $this->fetchFileAndTitle( $title, $options );
5553 
5554  # Get parameter map
5555  $handler = $file ? $file->getHandler() : false;
5556 
5557  list( $paramMap, $mwArray ) = $this->getImageParams( $handler );
5558 
5559  if ( !$file ) {
5560  $this->addTrackingCategory( 'broken-file-category' );
5561  }
5562 
5563  # Process the input parameters
5564  $caption = '';
5565  $params = array( 'frame' => array(), 'handler' => array(),
5566  'horizAlign' => array(), 'vertAlign' => array() );
5567  $seenformat = false;
5568  foreach ( $parts as $part ) {
5569  $part = trim( $part );
5570  list( $magicName, $value ) = $mwArray->matchVariableStartToEnd( $part );
5571  $validated = false;
5572  if ( isset( $paramMap[$magicName] ) ) {
5573  list( $type, $paramName ) = $paramMap[$magicName];
5574 
5575  # Special case; width and height come in one variable together
5576  if ( $type === 'handler' && $paramName === 'width' ) {
5577  $parsedWidthParam = $this->parseWidthParam( $value );
5578  if ( isset( $parsedWidthParam['width'] ) ) {
5579  $width = $parsedWidthParam['width'];
5580  if ( $handler->validateParam( 'width', $width ) ) {
5581  $params[$type]['width'] = $width;
5582  $validated = true;
5583  }
5584  }
5585  if ( isset( $parsedWidthParam['height'] ) ) {
5586  $height = $parsedWidthParam['height'];
5587  if ( $handler->validateParam( 'height', $height ) ) {
5588  $params[$type]['height'] = $height;
5589  $validated = true;
5590  }
5591  }
5592  # else no validation -- bug 13436
5593  } else {
5594  if ( $type === 'handler' ) {
5595  # Validate handler parameter
5596  $validated = $handler->validateParam( $paramName, $value );
5597  } else {
5598  # Validate internal parameters
5599  switch ( $paramName ) {
5600  case 'manualthumb':
5601  case 'alt':
5602  case 'class':
5603  # @todo FIXME: Possibly check validity here for
5604  # manualthumb? downstream behavior seems odd with
5605  # missing manual thumbs.
5606  $validated = true;
5607  $value = $this->stripAltText( $value, $holders );
5608  break;
5609  case 'link':
5610  $chars = self::EXT_LINK_URL_CLASS;
5611  $addr = self::EXT_LINK_ADDR;
5612  $prots = $this->mUrlProtocols;
5613  if ( $value === '' ) {
5614  $paramName = 'no-link';
5615  $value = true;
5616  $validated = true;
5617  } elseif ( preg_match( "/^((?i)$prots)/", $value ) ) {
5618  if ( preg_match( "/^((?i)$prots)$addr$chars*$/u", $value, $m ) ) {
5619  $paramName = 'link-url';
5620  $this->mOutput->addExternalLink( $value );
5621  if ( $this->mOptions->getExternalLinkTarget() ) {
5622  $params[$type]['link-target'] = $this->mOptions->getExternalLinkTarget();
5623  }
5624  $validated = true;
5625  }
5626  } else {
5627  $linkTitle = Title::newFromText( $value );
5628  if ( $linkTitle ) {
5629  $paramName = 'link-title';
5630  $value = $linkTitle;
5631  $this->mOutput->addLink( $linkTitle );
5632  $validated = true;
5633  }
5634  }
5635  break;
5636  case 'frameless':
5637  case 'framed':
5638  case 'thumbnail':
5639  // use first appearing option, discard others.
5640  $validated = ! $seenformat;
5641  $seenformat = true;
5642  break;
5643  default:
5644  # Most other things appear to be empty or numeric...
5645  $validated = ( $value === false || is_numeric( trim( $value ) ) );
5646  }
5647  }
5648 
5649  if ( $validated ) {
5650  $params[$type][$paramName] = $value;
5651  }
5652  }
5653  }
5654  if ( !$validated ) {
5655  $caption = $part;
5656  }
5657  }
5658 
5659  # Process alignment parameters
5660  if ( $params['horizAlign'] ) {
5661  $params['frame']['align'] = key( $params['horizAlign'] );
5662  }
5663  if ( $params['vertAlign'] ) {
5664  $params['frame']['valign'] = key( $params['vertAlign'] );
5665  }
5666 
5667  $params['frame']['caption'] = $caption;
5668 
5669  # Will the image be presented in a frame, with the caption below?
5670  $imageIsFramed = isset( $params['frame']['frame'] )
5671  || isset( $params['frame']['framed'] )
5672  || isset( $params['frame']['thumbnail'] )
5673  || isset( $params['frame']['manualthumb'] );
5674 
5675  # In the old days, [[Image:Foo|text...]] would set alt text. Later it
5676  # came to also set the caption, ordinary text after the image -- which
5677  # makes no sense, because that just repeats the text multiple times in
5678  # screen readers. It *also* came to set the title attribute.
5679  #
5680  # Now that we have an alt attribute, we should not set the alt text to
5681  # equal the caption: that's worse than useless, it just repeats the
5682  # text. This is the framed/thumbnail case. If there's no caption, we
5683  # use the unnamed parameter for alt text as well, just for the time be-
5684  # ing, if the unnamed param is set and the alt param is not.
5685  #
5686  # For the future, we need to figure out if we want to tweak this more,
5687  # e.g., introducing a title= parameter for the title; ignoring the un-
5688  # named parameter entirely for images without a caption; adding an ex-
5689  # plicit caption= parameter and preserving the old magic unnamed para-
5690  # meter for BC; ...
5691  if ( $imageIsFramed ) { # Framed image
5692  if ( $caption === '' && !isset( $params['frame']['alt'] ) ) {
5693  # No caption or alt text, add the filename as the alt text so
5694  # that screen readers at least get some description of the image
5695  $params['frame']['alt'] = $title->getText();
5696  }
5697  # Do not set $params['frame']['title'] because tooltips don't make sense
5698  # for framed images
5699  } else { # Inline image
5700  if ( !isset( $params['frame']['alt'] ) ) {
5701  # No alt text, use the "caption" for the alt text
5702  if ( $caption !== '' ) {
5703  $params['frame']['alt'] = $this->stripAltText( $caption, $holders );
5704  } else {
5705  # No caption, fall back to using the filename for the
5706  # alt text
5707  $params['frame']['alt'] = $title->getText();
5708  }
5709  }
5710  # Use the "caption" for the tooltip text
5711  $params['frame']['title'] = $this->stripAltText( $caption, $holders );
5712  }
5713 
5714  Hooks::run( 'ParserMakeImageParams', array( $title, $file, &$params, $this ) );
5715 
5716  # Linker does the rest
5717  $time = isset( $options['time'] ) ? $options['time'] : false;
5718  $ret = Linker::makeImageLink( $this, $title, $file, $params['frame'], $params['handler'],
5719  $time, $descQuery, $this->mOptions->getThumbSize() );
5720 
5721  # Give the handler a chance to modify the parser object
5722  if ( $handler ) {
5723  $handler->parserTransformHook( $this, $file );
5724  }
5725 
5726  return $ret;
5727  }
5728 
5734  protected function stripAltText( $caption, $holders ) {
5735  # Strip bad stuff out of the title (tooltip). We can't just use
5736  # replaceLinkHoldersText() here, because if this function is called
5737  # from replaceInternalLinks2(), mLinkHolders won't be up-to-date.
5738  if ( $holders ) {
5739  $tooltip = $holders->replaceText( $caption );
5740  } else {
5741  $tooltip = $this->replaceLinkHoldersText( $caption );
5742  }
5743 
5744  # make sure there are no placeholders in thumbnail attributes
5745  # that are later expanded to html- so expand them now and
5746  # remove the tags
5747  $tooltip = $this->mStripState->unstripBoth( $tooltip );
5748  $tooltip = Sanitizer::stripAllTags( $tooltip );
5749 
5750  return $tooltip;
5751  }
5757  public function disableCache() {
5758  wfDebug( "Parser output marked as uncacheable.\n" );
5759  if ( !$this->mOutput ) {
5760  throw new MWException( __METHOD__ .
5761  " can only be called when actually parsing something" );
5762  }
5763  $this->mOutput->setCacheTime( -1 ); // old style, for compatibility
5764  $this->mOutput->updateCacheExpiry( 0 ); // new style, for consistency
5765  }
5766 
5775  public function attributeStripCallback( &$text, $frame = false ) {
5776  $text = $this->replaceVariables( $text, $frame );
5777  $text = $this->mStripState->unstripBoth( $text );
5778  return $text;
5779  }
5780 
5786  public function getTags() {
5787  return array_merge(
5788  array_keys( $this->mTransparentTagHooks ),
5789  array_keys( $this->mTagHooks ),
5790  array_keys( $this->mFunctionTagHooks )
5791  );
5792  }
5793 
5804  public function replaceTransparentTags( $text ) {
5805  $matches = array();
5806  $elements = array_keys( $this->mTransparentTagHooks );
5807  $text = self::extractTagsAndParams( $elements, $text, $matches );
5808  $replacements = array();
5809 
5810  foreach ( $matches as $marker => $data ) {
5811  list( $element, $content, $params, $tag ) = $data;
5812  $tagName = strtolower( $element );
5813  if ( isset( $this->mTransparentTagHooks[$tagName] ) ) {
5814  $output = call_user_func_array(
5815  $this->mTransparentTagHooks[$tagName],
5816  array( $content, $params, $this )
5817  );
5818  } else {
5819  $output = $tag;
5820  }
5821  $replacements[$marker] = $output;
5822  }
5823  return strtr( $text, $replacements );
5824  }
5825 
5855  private function extractSections( $text, $sectionId, $mode, $newText = '' ) {
5856  global $wgTitle; # not generally used but removes an ugly failure mode
5857 
5858  $magicScopeVariable = $this->lock();
5859  $this->startParse( $wgTitle, new ParserOptions, self::OT_PLAIN, true );
5860  $outText = '';
5861  $frame = $this->getPreprocessor()->newFrame();
5862 
5863  # Process section extraction flags
5864  $flags = 0;
5865  $sectionParts = explode( '-', $sectionId );
5866  $sectionIndex = array_pop( $sectionParts );
5867  foreach ( $sectionParts as $part ) {
5868  if ( $part === 'T' ) {
5869  $flags |= self::PTD_FOR_INCLUSION;
5870  }
5871  }
5872 
5873  # Check for empty input
5874  if ( strval( $text ) === '' ) {
5875  # Only sections 0 and T-0 exist in an empty document
5876  if ( $sectionIndex == 0 ) {
5877  if ( $mode === 'get' ) {
5878  return '';
5879  } else {
5880  return $newText;
5881  }
5882  } else {
5883  if ( $mode === 'get' ) {
5884  return $newText;
5885  } else {
5886  return $text;
5887  }
5888  }
5889  }
5890 
5891  # Preprocess the text
5892  $root = $this->preprocessToDom( $text, $flags );
5893 
5894  # <h> nodes indicate section breaks
5895  # They can only occur at the top level, so we can find them by iterating the root's children
5896  $node = $root->getFirstChild();
5897 
5898  # Find the target section
5899  if ( $sectionIndex == 0 ) {
5900  # Section zero doesn't nest, level=big
5901  $targetLevel = 1000;
5902  } else {
5903  while ( $node ) {
5904  if ( $node->getName() === 'h' ) {
5905  $bits = $node->splitHeading();
5906  if ( $bits['i'] == $sectionIndex ) {
5907  $targetLevel = $bits['level'];
5908  break;
5909  }
5910  }
5911  if ( $mode === 'replace' ) {
5912  $outText .= $frame->expand( $node, PPFrame::RECOVER_ORIG );
5913  }
5914  $node = $node->getNextSibling();
5915  }
5916  }
5917 
5918  if ( !$node ) {
5919  # Not found
5920  if ( $mode === 'get' ) {
5921  return $newText;
5922  } else {
5923  return $text;
5924  }
5925  }
5926 
5927  # Find the end of the section, including nested sections
5928  do {
5929  if ( $node->getName() === 'h' ) {
5930  $bits = $node->splitHeading();
5931  $curLevel = $bits['level'];
5932  if ( $bits['i'] != $sectionIndex && $curLevel <= $targetLevel ) {
5933  break;
5934  }
5935  }
5936  if ( $mode === 'get' ) {
5937  $outText .= $frame->expand( $node, PPFrame::RECOVER_ORIG );
5938  }
5939  $node = $node->getNextSibling();
5940  } while ( $node );
5941 
5942  # Write out the remainder (in replace mode only)
5943  if ( $mode === 'replace' ) {
5944  # Output the replacement text
5945  # Add two newlines on -- trailing whitespace in $newText is conventionally
5946  # stripped by the editor, so we need both newlines to restore the paragraph gap
5947  # Only add trailing whitespace if there is newText
5948  if ( $newText != "" ) {
5949  $outText .= $newText . "\n\n";
5950  }
5951 
5952  while ( $node ) {
5953  $outText .= $frame->expand( $node, PPFrame::RECOVER_ORIG );
5954  $node = $node->getNextSibling();
5955  }
5956  }
5958  if ( is_string( $outText ) ) {
5959  # Re-insert stripped tags
5960  $outText = rtrim( $this->mStripState->unstripBoth( $outText ) );
5961  }
5962 
5963  return $outText;
5964  }
5965 
5980  public function getSection( $text, $sectionId, $defaultText = '' ) {
5981  return $this->extractSections( $text, $sectionId, 'get', $defaultText );
5982  }
5983 
5996  public function replaceSection( $oldText, $sectionId, $newText ) {
5997  return $this->extractSections( $oldText, $sectionId, 'replace', $newText );
5998  }
5999 
6005  public function getRevisionId() {
6006  return $this->mRevisionId;
6007  }
6008 
6015  public function getRevisionObject() {
6016  if ( !is_null( $this->mRevisionObject ) ) {
6017  return $this->mRevisionObject;
6018  }
6019  if ( is_null( $this->mRevisionId ) ) {
6020  return null;
6021  }
6022 
6023  $rev = call_user_func(
6024  $this->mOptions->getCurrentRevisionCallback(), $this->getTitle(), $this
6025  );
6026 
6027  # If the parse is for a new revision, then the callback should have
6028  # already been set to force the object and should match mRevisionId.
6029  # If not, try to fetch by mRevisionId for sanity.
6030  if ( $rev && $rev->getId() != $this->mRevisionId ) {
6031  $rev = Revision::newFromId( $this->mRevisionId );
6032  }
6033 
6034  $this->mRevisionObject = $rev;
6035 
6036  return $this->mRevisionObject;
6037  }
6038 
6044  public function getRevisionTimestamp() {
6045  if ( is_null( $this->mRevisionTimestamp ) ) {
6047 
6048  $revObject = $this->getRevisionObject();
6049  $timestamp = $revObject ? $revObject->getTimestamp() : wfTimestampNow();
6050 
6051  # The cryptic '' timezone parameter tells to use the site-default
6052  # timezone offset instead of the user settings.
6053  #
6054  # Since this value will be saved into the parser cache, served
6055  # to other users, and potentially even used inside links and such,
6056  # it needs to be consistent for all visitors.
6057  $this->mRevisionTimestamp = $wgContLang->userAdjust( $timestamp, '' );
6058 
6059  }
6060  return $this->mRevisionTimestamp;
6061  }
6062 
6068  public function getRevisionUser() {
6069  if ( is_null( $this->mRevisionUser ) ) {
6070  $revObject = $this->getRevisionObject();
6071 
6072  # if this template is subst: the revision id will be blank,
6073  # so just use the current user's name
6074  if ( $revObject ) {
6075  $this->mRevisionUser = $revObject->getUserText();
6076  } elseif ( $this->ot['wiki'] || $this->mOptions->getIsPreview() ) {
6077  $this->mRevisionUser = $this->getUser()->getName();
6078  }
6079  }
6080  return $this->mRevisionUser;
6081  }
6082 
6088  public function getRevisionSize() {
6089  if ( is_null( $this->mRevisionSize ) ) {
6090  $revObject = $this->getRevisionObject();
6091 
6092  # if this variable is subst: the revision id will be blank,
6093  # so just use the parser input size, because the own substituation
6094  # will change the size.
6095  if ( $revObject ) {
6096  $this->mRevisionSize = $revObject->getSize();
6097  } elseif ( $this->ot['wiki'] || $this->mOptions->getIsPreview() ) {
6098  $this->mRevisionSize = $this->mInputSize;
6099  }
6100  }
6101  return $this->mRevisionSize;
6102  }
6103 
6109  public function setDefaultSort( $sort ) {
6110  $this->mDefaultSort = $sort;
6111  $this->mOutput->setProperty( 'defaultsort', $sort );
6112  }
6113 
6124  public function getDefaultSort() {
6125  if ( $this->mDefaultSort !== false ) {
6126  return $this->mDefaultSort;
6127  } else {
6128  return '';
6129  }
6130  }
6131 
6138  public function getCustomDefaultSort() {
6139  return $this->mDefaultSort;
6140  }
6141 
6151  public function guessSectionNameFromWikiText( $text ) {
6152  # Strip out wikitext links(they break the anchor)
6153  $text = $this->stripSectionName( $text );
6155  return '#' . Sanitizer::escapeId( $text, 'noninitial' );
6156  }
6157 
6166  public function guessLegacySectionNameFromWikiText( $text ) {
6167  # Strip out wikitext links(they break the anchor)
6168  $text = $this->stripSectionName( $text );
6170  return '#' . Sanitizer::escapeId( $text, array( 'noninitial', 'legacy' ) );
6171  }
6172 
6187  public function stripSectionName( $text ) {
6188  # Strip internal link markup
6189  $text = preg_replace( '/\[\[:?([^[|]+)\|([^[]+)\]\]/', '$2', $text );
6190  $text = preg_replace( '/\[\[:?([^[]+)\|?\]\]/', '$1', $text );
6191 
6192  # Strip external link markup
6193  # @todo FIXME: Not tolerant to blank link text
6194  # I.E. [https://www.mediawiki.org] will render as [1] or something depending
6195  # on how many empty links there are on the page - need to figure that out.
6196  $text = preg_replace( '/\[(?i:' . $this->mUrlProtocols . ')([^ ]+?) ([^[]+)\]/', '$2', $text );
6197 
6198  # Parse wikitext quotes (italics & bold)
6199  $text = $this->doQuotes( $text );
6200 
6201  # Strip HTML tags
6202  $text = StringUtils::delimiterReplace( '<', '>', '', $text );
6203  return $text;
6204  }
6205 
6216  public function testSrvus( $text, Title $title, ParserOptions $options, $outputType = self::OT_HTML ) {
6217  $magicScopeVariable = $this->lock();
6218  $this->startParse( $title, $options, $outputType, true );
6220  $text = $this->replaceVariables( $text );
6221  $text = $this->mStripState->unstripBoth( $text );
6222  $text = Sanitizer::removeHTMLtags( $text );
6223  return $text;
6224  }
6225 
6232  public function testPst( $text, Title $title, ParserOptions $options ) {
6233  return $this->preSaveTransform( $text, $title, $options->getUser(), $options );
6234  }
6235 
6242  public function testPreprocess( $text, Title $title, ParserOptions $options ) {
6243  return $this->testSrvus( $text, $title, $options, self::OT_PREPROCESS );
6244  }
6245 
6262  public function markerSkipCallback( $s, $callback ) {
6263  $i = 0;
6264  $out = '';
6265  while ( $i < strlen( $s ) ) {
6266  $markerStart = strpos( $s, self::MARKER_PREFIX, $i );
6267  if ( $markerStart === false ) {
6268  $out .= call_user_func( $callback, substr( $s, $i ) );
6269  break;
6270  } else {
6271  $out .= call_user_func( $callback, substr( $s, $i, $markerStart - $i ) );
6272  $markerEnd = strpos( $s, self::MARKER_SUFFIX, $markerStart );
6273  if ( $markerEnd === false ) {
6274  $out .= substr( $s, $markerStart );
6275  break;
6276  } else {
6277  $markerEnd += strlen( self::MARKER_SUFFIX );
6278  $out .= substr( $s, $markerStart, $markerEnd - $markerStart );
6279  $i = $markerEnd;
6280  }
6281  }
6282  }
6283  return $out;
6284  }
6285 
6292  public function killMarkers( $text ) {
6293  return $this->mStripState->killMarkers( $text );
6294  }
6295 
6312  public function serializeHalfParsedText( $text ) {
6313  $data = array(
6314  'text' => $text,
6315  'version' => self::HALF_PARSED_VERSION,
6316  'stripState' => $this->mStripState->getSubState( $text ),
6317  'linkHolders' => $this->mLinkHolders->getSubArray( $text )
6318  );
6319  return $data;
6320  }
6321 
6337  public function unserializeHalfParsedText( $data ) {
6338  if ( !isset( $data['version'] ) || $data['version'] != self::HALF_PARSED_VERSION ) {
6339  throw new MWException( __METHOD__ . ': invalid version' );
6340  }
6341 
6342  # First, extract the strip state.
6343  $texts = array( $data['text'] );
6344  $texts = $this->mStripState->merge( $data['stripState'], $texts );
6345 
6346  # Now renumber links
6347  $texts = $this->mLinkHolders->mergeForeign( $data['linkHolders'], $texts );
6348 
6349  # Should be good to go.
6350  return $texts[0];
6351  }
6352 
6362  public function isValidHalfParsedText( $data ) {
6363  return isset( $data['version'] ) && $data['version'] == self::HALF_PARSED_VERSION;
6364  }
6365 
6374  public function parseWidthParam( $value ) {
6375  $parsedWidthParam = array();
6376  if ( $value === '' ) {
6377  return $parsedWidthParam;
6378  }
6379  $m = array();
6380  # (bug 13500) In both cases (width/height and width only),
6381  # permit trailing "px" for backward compatibility.
6382  if ( preg_match( '/^([0-9]*)x([0-9]*)\s*(?:px)?\s*$/', $value, $m ) ) {
6383  $width = intval( $m[1] );
6384  $height = intval( $m[2] );
6385  $parsedWidthParam['width'] = $width;
6386  $parsedWidthParam['height'] = $height;
6387  } elseif ( preg_match( '/^[0-9]*\s*(?:px)?\s*$/', $value ) ) {
6388  $width = intval( $value );
6389  $parsedWidthParam['width'] = $width;
6390  }
6391  return $parsedWidthParam;
6392  }
6393 
6403  protected function lock() {
6404  if ( $this->mInParse ) {
6405  throw new MWException( "Parser state cleared while parsing. "
6406  . "Did you call Parser::parse recursively?" );
6407  }
6408  $this->mInParse = true;
6409 
6410  $that = $this;
6411  $recursiveCheck = new ScopedCallback( function() use ( $that ) {
6412  $that->mInParse = false;
6413  } );
6414 
6415  return $recursiveCheck;
6416  }
6417 
6428  public static function stripOuterParagraph( $html ) {
6429  $m = array();
6430  if ( preg_match( '/^<p>(.*)\n?<\/p>\n?$/sU', $html, $m ) ) {
6431  if ( strpos( $m[1], '</p>' ) === false ) {
6432  $html = $m[1];
6433  }
6434  }
6435 
6436  return $html;
6437  }
6438 
6449  public function getFreshParser() {
6450  global $wgParserConf;
6451  if ( $this->mInParse ) {
6452  return new $wgParserConf['class']( $wgParserConf );
6453  } else {
6454  return $this;
6455  }
6456  }
6457 
6464  public function enableOOUI() {
6466  $this->mOutput->setEnableOOUI( true );
6467  }
6468 }
setTitle($t)
Set the context title.
Definition: Parser.php:707
$mAutonumber
Definition: Parser.php:170
$mPPNodeCount
Definition: Parser.php:180
replaceInternalLinks2(&$s)
Process [[ ]] wikilinks (RIL)
Definition: Parser.php:2004
static getVariableIDs()
Get an array of parser variable IDs.
Definition: MagicWord.php:262
const MARKER_PREFIX
Definition: Parser.php:134
external whereas SearchGetNearMatch runs after $term
Definition: hooks.txt:2500
null means default in associative array form
Definition: hooks.txt:1740
null means default in associative array with keys and values unescaped Should be merged with default with a value of false meaning to suppress the attribute in associative array with keys and values unescaped noclasses just before the function returns a value If you return an< a > element with HTML attributes $attribs and contents $html will be returned If you return $ret will be returned and may include noclasses & $html
Definition: hooks.txt:1740
static tocLineEnd()
End a Table Of Contents line.
Definition: Linker.php:1685
deferred txt A few of the database updates required by various functions here can be deferred until after the result page is displayed to the user For updating the view updating the linked to tables after a etc PHP does not yet have any way to tell the server to actually return and disconnect while still running these but it might have such a feature in the future We handle these by creating a deferred update object and putting those objects on a global list
Definition: deferred.txt:11
static decodeTagAttributes($text)
Return an associative array of attribute names and values from a partial tag string.
Definition: Sanitizer.php:1208
$mTplRedirCache
Definition: Parser.php:182
static tocList($toc, $lang=false)
Wraps the TOC in a table and provides the hide/collapse javascript.
Definition: Linker.php:1696
static makeExternalLink($url, $text, $escape=true, $linktype= '', $attribs=array(), $title=null)
Make an external link.
Definition: Linker.php:1056
getBoolOption($oname)
Get the user's current setting for a given option, as a boolean value.
Definition: User.php:2633
return true to allow those checks to and false if checking is done remove or add to the links of a group of changes in EnhancedChangesList Hook subscribers can return false to omit this line from recentchanges use this to change the tables headers temp or archived zone change it to an object instance and return false override the list derivative used the name of the old file when set the default code will be skipped true if there is text before this autocomment true if there is text after this autocomment add a value to it if you want to add a cookie that have to vary cache options can modify $query
Definition: hooks.txt:1269
const OT_PREPROCESS
Definition: Defines.php:227
$mLastSection
Definition: Parser.php:175
$mDoubleUnderscores
Definition: Parser.php:182
magic word the default is to use $key to get the and $key value or $key value text $key value html to format the value $key
Definition: hooks.txt:2266
Group all the pieces relevant to the context of a request into one instance.
or
false for read/write
MapCacheLRU null $currentRevisionCache
Definition: Parser.php:222
getArticleID($flags=0)
Get the article ID for this Title from the link cache, adding it if necessary.
Definition: Title.php:3140
$wgSitename
Name of the site.
recursivePreprocess($text, $frame=false)
Recursive parser entry point that can be called from an extension tag hook.
Definition: Parser.php:636
getText()
Get the text form (spaces not underscores) of the main part.
Definition: Title.php:896
replaceExternalLinks($text)
Replace external links (REL)
Definition: Parser.php:1742
static isNonincludable($index)
It is not possible to use pages from this namespace as template?
nextLinkID()
Definition: Parser.php:797
const SPACE_NOT_NL
Definition: Parser.php:98
static replaceUnusualEscapes($url)
Replace unusual escape codes in a URL with their equivalent characters.
Definition: Parser.php:1859
Allows to change the fields on the form that will be generated $name
Definition: hooks.txt:324
Apache License January AND DISTRIBUTION Definitions License shall mean the terms and conditions for use
doHeadings($text)
Parse headers and return html.
Definition: Parser.php:1521
const OT_PLAIN
Definition: Parser.php:119
static removeHTMLtags($text, $processCallback=null, $args=array(), $extratags=array(), $removetags=array())
Cleans up HTML, removes dangerous tags and attributes, and removes HTML comments. ...
Definition: Sanitizer.php:454
static isWellFormedXmlFragment($text)
Check if a string is a well-formed XML fragment.
Definition: Xml.php:735
const OT_WIKI
Definition: Parser.php:116
div flags Integer display flags(NO_ACTION_LINK, NO_EXTRA_USER_LINKS) 'LogException'returning false will NOT prevent logging $e
Definition: hooks.txt:1870
User $mUser
Definition: Parser.php:187
initialiseVariables()
initialise the magic variables (like CURRENTMONTHNAME) and substitution modifiers ...
Definition: Parser.php:3229
null means default in associative array with keys and values unescaped Should be merged with default with a value of false meaning to suppress the attribute in associative array with keys and values unescaped noclasses & $ret
Definition: hooks.txt:1740
=Architecture==Two class hierarchies are used to provide the functionality associated with the different content models:*Content interface(and AbstractContent base class) define functionality that acts on the concrete content of a page, and *ContentHandler base class provides functionality specific to a content model, but not acting on concrete content.The most important function of ContentHandler is to act as a factory for the appropriate implementation of Content.These Content objects are to be used by MediaWiki everywhere, instead of passing page content around as text.All manipulation and analysis of page content must be done via the appropriate methods of the Content object.For each content model, a subclass of ContentHandler has to be registered with $wgContentHandlers.The ContentHandler object for a given content model can be obtained using ContentHandler::getForModelID($id).Also Title, WikiPage and Revision now have getContentHandler() methods for convenience.ContentHandler objects are singletons that provide functionality specific to the content type, but not directly acting on the content of some page.ContentHandler::makeEmptyContent() and ContentHandler::unserializeContent() can be used to create a Content object of the appropriate type.However, it is recommended to instead use WikiPage::getContent() resp.Revision::getContent() to get a page's content as a Content object.These two methods should be the ONLY way in which page content is accessed.Another important function of ContentHandler objects is to define custom action handlers for a content model, see ContentHandler::getActionOverrides().This is similar to what WikiPage::getActionOverrides() was already doing.==Serialization==With the ContentHandler facility, page content no longer has to be text based.Objects implementing the Content interface are used to represent and handle the content internally.For storage and data exchange, each content model supports at least one serialization format via ContentHandler::serializeContent($content).The list of supported formats for a given content model can be accessed using ContentHandler::getSupportedFormats().Content serialization formats are identified using MIME type like strings.The following formats are built in:*text/x-wiki-wikitext *text/javascript-for js pages *text/css-for css pages *text/plain-for future use, e.g.with plain text messages.*text/html-for future use, e.g.with plain html messages.*application/vnd.php.serialized-for future use with the api and for extensions *application/json-for future use with the api, and for use by extensions *application/xml-for future use with the api, and for use by extensions In PHP, use the corresponding CONTENT_FORMAT_XXX constant.Note that when using the API to access page content, especially action=edit, action=parse and action=query &prop=revisions, the model and format of the content should always be handled explicitly.Without that information, interpretation of the provided content is not reliable.The same applies to XML dumps generated via maintenance/dumpBackup.php or Special:Export.Also note that the API will provide encapsulated, serialized content-so if the API was called with format=json, and contentformat is also json(or rather, application/json), the page content is represented as a string containing an escaped json structure.Extensions that use JSON to serialize some types of page content may provide specialized API modules that allow access to that content in a more natural form.==Compatibility==The ContentHandler facility is introduced in a way that should allow all existing code to keep functioning at least for pages that contain wikitext or other text based content.However, a number of functions and hooks have been deprecated in favor of new versions that are aware of the page's content model, and will now generate warnings when used.Most importantly, the following functions have been deprecated:*Revisions::getText() and Revisions::getRawText() is deprecated in favor Revisions::getContent()*WikiPage::getText() is deprecated in favor WikiPage::getContent() Also, the old Article::getContent()(which returns text) is superceded by Article::getContentObject().However, both methods should be avoided since they do not provide clean access to the page's actual content.For instance, they may return a system message for non-existing pages.Use WikiPage::getContent() instead.Code that relies on a textual representation of the page content should eventually be rewritten.However, ContentHandler::getContentText() provides a stop-gap that can be used to get text for a page.Its behavior is controlled by $wgContentHandlerTextFallback it
Set options of the Parser.
static tidy($text)
Interface with html tidy, used if $wgUseTidy = true.
Definition: MWTidy.php:122
namespace and then decline to actually register it file or subcat img or subcat RecentChangesLinked and Watchlist RecentChangesLinked and Watchlist e g Watchlist removed from all revisions and log entries to which it was applied This gives extensions a chance to take it off their books as the deletion has already been partly carried out by this point or something similar the user will be unable to create the tag set $status
Definition: hooks.txt:968
wfHostname()
Fetch server name for use in error reporting etc.
getFunctionLang()
Get a language object for use in parser functions such as {{FORMATNUM:}}.
Definition: Parser.php:812
processing should stop and the error should be shown to the user * false
Definition: hooks.txt:188
uniqPrefix()
Accessor for mUniqPrefix.
Definition: Parser.php:697
const TOC_START
Definition: Parser.php:137
Title($x=null)
Accessor/mutator for the Title object.
Definition: Parser.php:736
SectionProfiler $mProfiler
Definition: Parser.php:229
$wgEnableScaryTranscluding
Enable interwiki transcluding.
$sort
wfDebug($text, $dest= 'all', array $context=array())
Sends a line to the debug log if enabled or, optionally, to a comment in output.
There are three types of nodes:
has been added to your &Future changes to this page and its associated Talk page will be listed there
$mHeadings
Definition: Parser.php:182
$value
const COLON_STATE_TAGSLASH
Definition: Parser.php:105
static makeSelfLinkObj($nt, $html= '', $query= '', $trail= '', $prefix= '')
Make appropriate markup for a link to the current article.
Definition: Linker.php:401
const NS_SPECIAL
Definition: Defines.php:58
clearState()
Clear Parser state.
Definition: Parser.php:313
const EXT_LINK_ADDR
Definition: Parser.php:92
$mFirstCall
Definition: Parser.php:152
getPreloadText($text, Title $title, ParserOptions $options, $params=array())
Process the wikitext for the "?preload=" feature.
Definition: Parser.php:655
Options($x=null)
Accessor/mutator for the ParserOptions object.
Definition: Parser.php:790
it s the revision text itself In either if gzip is the revision text is gzipped $flags
Definition: hooks.txt:2490
replaceLinkHolders(&$text, $options=0)
Definition: Parser.php:5259
static activeUsers()
Definition: SiteStats.php:164
$mLinkID
Definition: Parser.php:179
doQuotes($text)
Helper function for doAllQuotes()
Definition: Parser.php:1554
preprocessToDom($text, $flags=0)
Preprocess some wikitext and return the document tree.
Definition: Parser.php:3259
getPrefixedText()
Get the prefixed title with spaces.
Definition: Title.php:1433
limitationWarn($limitationType, $current= '', $max= '')
Warn the user when a parser limitation is reached Will warn at most once the user per limitation type...
Definition: Parser.php:3378
static cleanUrl($url)
Definition: Sanitizer.php:1776
wfUrlencode($s)
We want some things to be included as literal characters in our title URLs for prettiness, which urlencode encodes by default.
static newFromText($text, $defaultNamespace=NS_MAIN)
Create a new Title from text, such as what one would find in a link.
Definition: Title.php:235
$mGeneratedPPNodeCount
Definition: Parser.php:180
Represents a title within MediaWiki.
Definition: Title.php:33
static getRandomString()
Get a random string.
Definition: Parser.php:676
$mRevisionId
Definition: Parser.php:200
static stripAllTags($text)
Take a fragment of (potentially invalid) HTML and return a version with any tags removed, encoded as plain text.
Definition: Sanitizer.php:1743
when a variable name is used in a it is silently declared as a new local masking the global
Definition: design.txt:93
doBlockLevels($text, $linestart)
#@-
Definition: Parser.php:2505
$wgArticlePath
Definition: img_auth.php:45
OutputType($x=null)
Accessor/mutator for the output type.
Definition: Parser.php:762
const NS_TEMPLATE
Definition: Defines.php:79
const COLON_STATE_COMMENTDASHDASH
Definition: Parser.php:108
recursiveTagParse($text, $frame=false)
Half-parse wikitext to half-parsed HTML.
Definition: Parser.php:571
const NO_ARGS
Allows to change the fields on the form that will be generated just before adding its HTML to parser output $parser
Definition: hooks.txt:324
MagicWordArray $mVariables
Definition: Parser.php:157
static validateTagAttributes($attribs, $element)
Take an array of attribute names and values and normalize or discard illegal values for the given ele...
Definition: Sanitizer.php:715
const SFH_NO_HASH
Definition: Parser.php:82
const COLON_STATE_COMMENTDASH
Definition: Parser.php:107
globals will be eliminated from MediaWiki replaced by an application object which would be passed to constructors Whether that would be an convenient solution remains to be but certainly PHP makes such object oriented programming models easier than they were in previous versions For the time being MediaWiki programmers will have to work in an environment with some global context At the time of globals were initialised on startup by MediaWiki of these were configuration which are documented in DefaultSettings php There is no comprehensive documentation for the remaining however some of the most important ones are listed below They are typically initialised either in index php or in Setup php For a description of the see design txt $wgTitle Title object created from the request URL $wgOut OutputPage object for HTTP response $wgUser User object for the user associated with the current request $wgLang Language object selected by user preferences $wgContLang Language object associated with the wiki being viewed $wgParser Parser object Parser extensions register their hooks here $wgRequest WebRequest object
Definition: globals.txt:25
wfRandomString($length=32)
Get a random string containing a number of pseudo-random hex characters.
$mForceTocPosition
Definition: Parser.php:184
preprocess($text, Title $title=null, ParserOptions $options, $revid=null, $frame=false)
Expand templates and variables in the text, producing valid, static wikitext.
Definition: Parser.php:612
getName()
Get the user name, or the IP of an anonymous user.
Definition: User.php:2007
static getCacheTTL($id)
Allow external reads of TTL array.
Definition: MagicWord.php:285
globals txt Globals are evil The original MediaWiki code relied on globals for processing context far too often MediaWiki development since then has been a story of slowly moving context out of global variables and into objects Storing processing context in object member variables allows those objects to be reused in a much more flexible way Consider the elegance of
Definition: globals.txt:10
const OT_PREPROCESS
Definition: Parser.php:117
see documentation in includes Linker php for Linker::makeImageLink & $time
Definition: hooks.txt:1565
maybeDoSubpageLink($target, &$text)
Handle link to subpage if necessary.
Definition: Parser.php:2372
$mFunctionSynonyms
Definition: Parser.php:144
If you want to remove the page from your watchlist later
setLinkID($id)
Definition: Parser.php:804
$mOutputType
Definition: Parser.php:197
Apache License January http
$mDefaultStripList
Definition: Parser.php:147
$mExtLinkBracketedRegex
Definition: Parser.php:162
if($line===false) $args
Definition: cdb.php:64
the value to return A Title object or null for latest to be modified or replaced by the hook handler after cache objects are set for highlighting & $link
Definition: hooks.txt:2523
static getLocalInstance($ts=false)
Get a timestamp instance in the server local timezone ($wgLocaltimezone)
$wgMaxSigChars
Maximum number of Unicode characters in signature.
const COLON_STATE_TAG
Definition: Parser.php:102
static getDoubleUnderscoreArray()
Get a MagicWordArray of double-underscore entities.
Definition: MagicWord.php:298
static splitTrail($trail)
Split a link trail, return the "inside" portion and the remainder of the trail as a two-element array...
Definition: Linker.php:1769
The User object encapsulates all of the user-specific settings (user_id, name, rights, password, email address, options, last login time).
Definition: User.php:39
static decodeCharReferences($text)
Decode any character references, numeric or named entities, in the text and return a UTF-8 string...
Definition: Sanitizer.php:1421
null means default in associative array with keys and values unescaped Should be merged with default with a value of false meaning to suppress the attribute in associative array with keys and values unescaped noclasses just before the function returns a value If you return an< a > element with HTML attributes $attribs and contents $html will be returned If you return $ret will be returned and may include noclasses after processing after in associative array form externallinks including delete and has completed for all link tables default is conds Array Extra conditions for the No matching items in log is displayed if loglist is empty msgKey Array If you want a nice box with a set this to the key of the message First element is the message additional optional elements are parameters for the key that are processed with wfMessage() -> params() ->parseAsBlock()-offset Set to overwrite offset parameter in $wgRequest set to ''to unsetoffset-wrap String Wrap the message in html(usually something like"&lt
deferred txt A few of the database updates required by various functions here can be deferred until after the result page is displayed to the user For updating the view updating the linked to tables after a etc PHP does not yet have any way to tell the server to actually return and disconnect while still running these but it might have such a feature in the future We handle these by creating a deferred update object and putting those objects on a global then executing the whole list after the page is displayed We don t do anything smart like collating updates to the same table or such because the list is almost always going to have just one item on if that
pull multiple revisions may often pull multiple times from the same blob.
Definition: deferred.txt:11
wfTimestamp($outputtype=TS_UNIX, $ts=0)
Get a timestamp string in one of various formats.
$wgNoFollowNsExceptions
Namespaces in which $wgNoFollowLinks doesn't apply.
static factory($mode=false, IContextSource $context=null)
Get a new image gallery.
$wgLanguageCode
Site language code.
Custom PHP profiler for parser/DB type section names that xhprof/xdebug can't handle.
static edits()
Definition: SiteStats.php:132
Class for asserting that a callback happens when an dummy object leaves scope.
$wgExtraInterlanguageLinkPrefixes
List of additional interwiki prefixes that should be treated as interlanguage links (i...
wfCgiToArray($query)
This is the logical opposite of wfArrayToCgi(): it accepts a query string as its argument and returns...
static capturePath(Title $title, IContextSource $context)
Just like executePath() but will override global variables and execute the page in "inclusion" mode...
const NO_TEMPLATES
replaceInternalLinks($s)
Process [[ ]] wikilinks.
Definition: Parser.php:1991
$mVarCache
Definition: Parser.php:148
$wgStylePath
The URL path of the skins directory.
$mRevisionObject
Definition: Parser.php:199
static normalizeSectionNameWhitespace($section)
Normalizes whitespace in a section name, such as might be returned by Parser::stripSectionName(), for use in the id's that are used for section links.
Definition: Sanitizer.php:1302
internalParse($text, $isMain=true, $frame=false)
Helper function for parse() that transforms wiki markup into half-parsed HTML.
Definition: Parser.php:1194
Title $mTitle
Definition: Parser.php:196
__destruct()
Reduce memory usage to reduce the impact of circular references.
Definition: Parser.php:260
wfEscapeWikiText($text)
Escapes the given text so that it may be output using addWikiText() without any linking, formatting, etc.
bool $mInParse
Recursive call protection.
Definition: Parser.php:227
Some quick notes on the file repository architecture Functionality is
Definition: README:3
isExternal()
Is this Title interwiki?
Definition: Title.php:815
namespace and then decline to actually register it file or subcat img or subcat RecentChangesLinked and Watchlist RecentChangesLinked and Watchlist e g Watchlist removed from all revisions and log entries to which it was applied This gives extensions a chance to take it off their books $tag
Definition: hooks.txt:882
static register($parser)
$mRevIdForTs
Definition: Parser.php:204
design txt This is a brief overview of the new design More thorough and up to date information is available on the documentation wiki at etc Handles the details of getting and saving to the user table of the and dealing with sessions and cookies OutputPage Encapsulates the entire HTML page that will be sent in response to any server request It is used by calling its functions to add in any and then calling but I prefer the flexibility This should also do the output encoding The system allocates a global one in $wgOut Title Represents the title of an and does all the work of translating among various forms such as plain database key
Definition: design.txt:25
static normalizeSubpageLink($contextTitle, $target, &$text)
Definition: Linker.php:1502
$mStripList
Definition: Parser.php:146
$mFunctionTagHooks
Definition: Parser.php:145
const OT_PLAIN
Definition: Defines.php:229
$wgNoFollowLinks
If true, external URL links in wiki text will be given the rel="nofollow" attribute as a hint to sear...
$mRevisionTimestamp
Definition: Parser.php:201
$mImageParams
Definition: Parser.php:149
getDBkey()
Get the main part with underscores.
Definition: Title.php:914
stripAltText($caption, $holders)
Definition: Parser.php:5711
doAllQuotes($text)
Replace single quotes with HTML markup.
Definition: Parser.php:1537
static normalizeUrlComponent($component, $unsafe)
Definition: Parser.php:1909
isAnon()
Get whether the user is anonymous.
Definition: User.php:3166
if($limit) $timestamp
you don t have to do a grep find to see where the $wgReverseTitle variable is used
Definition: hooks.txt:117
const VERSION
Update this version number when the ParserOutput format changes in an incompatible way...
Definition: Parser.php:73
wfGetDB($db, $groups=array(), $wiki=false)
Get a Database object.
namespace and then decline to actually register it file or subcat img or subcat RecentChangesLinked and Watchlist RecentChangesLinked and Watchlist e g Watchlist removed from all revisions and log entries to which it was applied This gives extensions a chance to take it off their books as the deletion has already been partly carried out by this point or something similar the user will be unable to create the tag set and then return false from the hook function Ensure you consume the ChangeTagAfterDelete hook to carry out custom deletion actions as context called by AbstractContent::getParserOutput May be used to override the normal model specific rendering of page content as context as context $options
Definition: hooks.txt:968
$mInPre
Definition: Parser.php:175
const OT_WIKI
Definition: Defines.php:226
Preprocessor $mPreprocessor
Definition: Parser.php:165
getPreprocessor()
Get a preprocessor object.
Definition: Parser.php:865
This document is intended to provide useful advice for parties seeking to redistribute MediaWiki to end users It s targeted particularly at maintainers for Linux since it s been observed that distribution packages of MediaWiki often break We ve consistently had to recommend that users seeking support use official tarballs instead of their distribution s and this often solves whatever problem the user is having It would be nice if this could such and we might be restricted by PHP settings such as safe mode or open_basedir We cannot assume that the software even has read access anywhere useful Many shared hosts run all users web applications under the same so they can t rely on Unix and must forbid reads to even standard directories like tmp lest users read each others files We cannot assume that the user has the ability to install or run any programs not written as web accessible PHP scripts Since anything that works on cheap shared hosting will work if you have shell or root access MediaWiki s design is based around catering to the lowest common denominator Although we support higher end setups as the way many things work by default is tailored toward shared hosting These defaults are unconventional from the point of view of normal(non-web) applications--they might conflict with distributors'policies
static getInstance($ts=false)
Get a timestamp instance in GMT.
const NS_MEDIA
Definition: Defines.php:57
$res
Definition: database.txt:21
static linkKnown($target, $html=null, $customAttribs=array(), $query=array(), $options=array( 'known', 'noclasses'))
Identical to link(), except $options defaults to 'known'.
Definition: Linker.php:262
static singleton()
Get a RepoGroup instance.
Definition: RepoGroup.php:53
replaceVariables($text, $frame=false, $argsOnly=false)
Replace magic variables, templates, and template arguments with the appropriate text.
Definition: Parser.php:3304
const RECOVER_ORIG
wfMatchesDomainList($url, $domains)
Check whether a given URL has a domain that occurs in a given set of domains.
MediaWiki exception.
Definition: MWException.php:26
StripState $mStripState
Definition: Parser.php:173
$mDefaultSort
Definition: Parser.php:181
getUser()
Get a User object either from $this->mUser, if set, or from the ParserOptions object otherwise...
Definition: Parser.php:853
static run($event, array $args=array(), $deprecatedVersion=null)
Call hook functions defined in Hooks::register and $wgHooks.
Definition: Hooks.php:137
wfTimestampNow()
Convenience function; returns MediaWiki timestamp for the present time.
getStripList()
Get a list of strippable XML-like elements.
Definition: Parser.php:964
const EXT_IMAGE_REGEX
Definition: Parser.php:94
startParse(Title $title=null, ParserOptions $options, $outputType, $clearState=true)
Definition: Parser.php:5026
$params
const NS_CATEGORY
Definition: Defines.php:83
static makeHeadline($level, $attribs, $anchor, $html, $link, $legacyAnchor=false)
Create a headline for content.
Definition: Linker.php:1750
static extractTagsAndParams($elements, $text, &$matches, $uniq_prefix=null)
Replaces all occurrences of HTML-style comments and the given tags in the text with a random marker a...
Definition: Parser.php:894
shown</td >< td > a href
and(b) You must cause any modified files to carry prominent notices stating that You changed the files
doTableStuff($text)
parse the wiki syntax used to render tables
Definition: Parser.php:991
wfDeprecated($function, $version=false, $component=false, $callerOffset=2)
Throws a warning that $function is deprecated.
$mImageParamsMagicArray
Definition: Parser.php:150
LinkHolderArray $mLinkHolders
Definition: Parser.php:177
$wgNoFollowDomainExceptions
If this is set to an array of domains, external links to these domain names (or any subdomains) will ...
static register($parser)
deferred txt A few of the database updates required by various functions here can be deferred until after the result page is displayed to the user For updating the view updating the linked to tables after a save
Definition: deferred.txt:4
as see the revision history and available at free of to any person obtaining a copy of this software and associated documentation to deal in the Software without including without limitation the rights to and or sell copies of the and to permit persons to whom the Software is furnished to do so
Definition: LICENSE.txt:10
$wgTranscludeCacheExpiry
Expiry time for transcluded templates cached in transcache database table.
Some information about database access in MediaWiki By Tim January Database layout For information about the MediaWiki database such as a description of the tables and their please see
Definition: database.txt:2
const DB_SLAVE
Definition: Defines.php:46
getTargetLanguage()
Get the target language for the content being parsed.
Definition: Parser.php:825
while(($__line=Maintenance::readconsole())!==false) print n
Definition: eval.php:64
Allows to change the fields on the form that will be generated just before adding its HTML to parser output an object of one of the gallery classes(inheriting from ImageGalleryBase) $html conditions will AND in the final query as a Content object as a Content object $title
Definition: hooks.txt:327
static hasSubpages($index)
Does the namespace allow subpages?
formatHeadings($text, $origText, $isMain=true)
This function accomplishes several tasks: 1) Auto-number headings if that option is enabled 2) Add an...
Definition: Parser.php:4372
getConverterLanguage()
Get the language object for language conversion.
Definition: Parser.php:843
static tocUnindent($level)
Finish one or more sublevels on the Table of Contents.
Definition: Linker.php:1654
if(!$wgRequest->checkUrlExtension()) if(!$wgEnableAPI) $wgTitle
Definition: api.php:62
static tocLine($anchor, $tocline, $tocnumber, $level, $sectionIndex=false)
parameter level defines if we are on an indentation level
Definition: Linker.php:1668
getNamespace()
Get the namespace index, i.e.
Definition: Title.php:937
design txt This is a brief overview of the new design More thorough and up to date information is available on the documentation wiki at etc Handles the details of getting and saving to the user table of the and dealing with sessions and cookies OutputPage Encapsulates the entire HTML page that will be sent in response to any server request It is used by calling its functions to add text
Definition: design.txt:12
getExternalLinkAttribs($url=false)
Get an associative array of additional HTML attributes appropriate for a particular external link...
Definition: Parser.php:1842
__construct($conf=array())
Definition: Parser.php:234
$mInputSize
Definition: Parser.php:205
equals(Title $title)
Compare with another title.
Definition: Title.php:4192
magicword txt Magic Words are some phrases used in the wikitext They are used for two things
Definition: magicword.txt:4
const HALF_PARSED_VERSION
Update this version number when the output of serialiseHalfParsedText() changes in an incompatible wa...
Definition: Parser.php:79
const NS_FILE
Definition: Defines.php:75
firstCallInit()
Do various kinds of initialisation on the first call of the parser.
Definition: Parser.php:295
Handles a simple LRU key/value map with a maximum number of entries.
Definition: MapCacheLRU.php:34
const PTD_FOR_INCLUSION
Definition: Parser.php:111
static escapeId($id, $options=array())
Given a value, escape it so that it can be used in an id attribute and return it. ...
Definition: Sanitizer.php:1124
null means default in associative array with keys and values unescaped Should be merged with default with a value of false meaning to suppress the attribute in associative array with keys and values unescaped broken
Definition: hooks.txt:1740
armorLinks($text)
Insert a NOPARSE hacky thing into any inline links in a chunk that's going to go through further pars...
Definition: Parser.php:2350
presenting them properly to the user as errors is done by the caller return true use this to change the list i e etc $rev
Definition: hooks.txt:1539
static configuration should be added through ResourceLoaderGetConfigVars instead can be used to get the real title after the basic globals have been set up
Definition: hooks.txt:1939
This document is intended to provide useful advice for parties seeking to redistribute MediaWiki to end users It s targeted particularly at maintainers for Linux since it s been observed that distribution packages of MediaWiki often break We ve consistently had to recommend that users seeking support use official tarballs instead of their distribution s and this often solves whatever problem the user is having It would be nice if this could such as
Definition: distributors.txt:9
setOutputType($ot)
Set the output type.
Definition: Parser.php:745
$mTagHooks
Definition: Parser.php:141
Class for handling an array of magic words.
Definition: MagicWord.php:699
const NS_MEDIAWIKI
Definition: Defines.php:77
static & get($id)
Factory: creates an object representing an ID.
Definition: MagicWord.php:248
please add to it if you re going to add events to the MediaWiki code where normally authentication against an external auth plugin would be creating a local account $user
Definition: hooks.txt:240
#define the
namespace and then decline to actually register it file or subcat img or subcat RecentChangesLinked and Watchlist RecentChangesLinked and Watchlist e g Watchlist removed from all revisions and log entries to which it was applied This gives extensions a chance to take it off their books as the deletion has already been partly carried out by this point or something similar the user will be unable to create the tag set and then return false from the hook function Ensure you consume the ChangeTagAfterDelete hook to carry out custom deletion actions as context called by AbstractContent::getParserOutput May be used to override the normal model specific rendering of page content $content
Definition: hooks.txt:968
maybeMakeExternalImage($url)
make an image if it's allowed, either through the global option, through the exception, or through the on-wiki whitelist
Definition: Parser.php:1932
getOption($oname, $defaultOverride=null, $ignoreHidden=false)
Get the user's current setting for a given option.
Definition: User.php:2574
areSubpagesAllowed()
Return true if subpage links should be expanded on this page.
Definition: Parser.php:2359
const MARKER_SUFFIX
Definition: Parser.php:133
wfDebugLog($logGroup, $text, $dest= 'all', array $context=array())
Send a line to a supplementary debug log file, if configured, or main debug log if not...
const OT_HTML
Definition: Defines.php:225
Prior to maintenance scripts were a hodgepodge of code that had no cohesion or formal method of action Beginning in
Definition: maintenance.txt:1
static makeImageLink(Parser $parser, Title $title, $file, $frameParams=array(), $handlerParams=array(), $time=false, $query="", $widthOption=null)
Given parameters derived from [[Image:Foo|options...]], generate the HTML that that syntax inserts in...
Definition: Linker.php:537
namespace and then decline to actually register it file or subcat img or subcat RecentChangesLinked and Watchlist RecentChangesLinked and Watchlist e g Watchlist removed from all revisions and log entries to which it was applied This gives extensions a chance to take it off their books as the deletion has already been partly carried out by this point or something similar the user will be unable to create the tag set and then return false from the hook function Ensure you consume the ChangeTagAfterDelete hook to carry out custom deletion actions as context called by AbstractContent::getParserOutput May be used to override the normal model specific rendering of page content as context as context the output can only depend on parameters provided to this hook not on global state indicating whether full HTML should be generated If generation of HTML may be but other information should still be present in the ParserOutput object & $output
Definition: hooks.txt:968
static getSubstIDs()
Get an array of parser substitution modifier IDs.
Definition: MagicWord.php:275
static images()
Definition: SiteStats.php:172
isSpecialPage()
Returns true if this is a special page.
Definition: Title.php:1050
$mTransparentTagHooks
Definition: Parser.php:142
$mExpensiveFunctionCount
Definition: Parser.php:183
$mUrlProtocols
Definition: Parser.php:162
const TS_MW
MediaWiki concatenated string timestamp (YYYYMMDDHHMMSS)
static getVersion($flags= '')
Return a string of the MediaWiki version with SVN revision if available.
do that in ParserLimitReportFormat instead use this to modify the parameters of the image and a DIV can begin in one section and end in another Make sure your code can handle that case gracefully See the EditSectionClearerLink extension for an example zero but section is usually empty its values are the globals values one of or reset my talk my contributions etc etc otherwise the built in rate limiting checks are if enabled allows for interception of redirect as a string mapping parameter names to values & $type
Definition: hooks.txt:2283
static newFromTitle($title, $id=0, $flags=0)
Load either the current, or a specified, revision that's attached to a given title.
Definition: Revision.php:104
$mConf
Definition: Parser.php:162
static newFromId($id, $flags=0)
Load a page revision from a given revision ID number.
Definition: Revision.php:86
if($IP===false)
Definition: WebStart.php:75
wfUrlProtocols($includeProtocolRelative=true)
Returns a regular expression of url protocols.
__clone()
Allow extensions to clean up when the parser is cloned.
Definition: Parser.php:272
static getExternalLinkRel($url=false, $title=null)
Get the rel attribute for a particular external link.
Definition: Parser.php:1821
static & singleton()
Get an instance of this class.
Definition: LinkCache.php:61
string $mUniqPrefix
Deprecated accessor for the strip marker prefix.
Definition: Parser.php:210
wfSetVar(&$dest, $source, $force=false)
Sets dest to source and returns the original value of dest If source is NULL, it just returns the val...
array $mLangLinkLanguages
Array with the language name of each language link (i.e.
Definition: Parser.php:216
const OT_MSG
Definition: Parser.php:118
replaceTransparentTags($text)
Replace transparent tags in $text with the values given by the callbacks.
Definition: Parser.php:5781
This document describes the state of Postgres support in and is fairly well maintained The main code is very well while extensions are very hit and miss it is probably the most supported database after MySQL Much of the work in making MediaWiki database agnostic came about through the work of creating Postgres as and are nearing end of but without copying over all the usage comments General notes on the but these can almost always be programmed around *Although Postgres has a true BOOLEAN type
Definition: postgres.txt:22
getLinkURL($query= '', $query2=false, $proto=PROTO_RELATIVE)
Get a URL that's the simplest URL that will be valid to link, locally, to the current Title...
Definition: Title.php:1783
doDoubleUnderscore($te