MediaWiki  master
Parser.php
Go to the documentation of this file.
1 <?php
67 class Parser {
73  const VERSION = '1.6.4';
74 
80 
81  # Flags for Parser::setFunctionHook
82  const SFH_NO_HASH = 1;
83  const SFH_OBJECT_ARGS = 2;
84 
85  # Constants needed for external link processing
86  # Everything except bracket, space, or control characters
87  # \p{Zs} is unicode 'separator, space' category. It covers the space 0x20
88  # as well as U+3000 is IDEOGRAPHIC SPACE for bug 19052
89  const EXT_LINK_URL_CLASS = '[^][<>"\\x00-\\x20\\x7F\p{Zs}]';
90  const EXT_IMAGE_REGEX = '/^(http:\/\/|https:\/\/)([^][<>"\\x00-\\x20\\x7F\p{Zs}]+)
91  \\/([A-Za-z0-9_.,~%\\-+&;#*?!=()@\\x80-\\xFF]+)\\.((?i)gif|png|jpg|jpeg)$/Sxu';
92 
93  # Regular expression for a non-newline space
94  const SPACE_NOT_NL = '(?:\t|&nbsp;|&\#0*160;|&\#[Xx]0*[Aa]0;|\p{Zs})';
95 
96  # State constants for the definition list colon extraction
97  const COLON_STATE_TEXT = 0;
98  const COLON_STATE_TAG = 1;
105 
106  # Flags for preprocessToDom
107  const PTD_FOR_INCLUSION = 1;
108 
109  # Allowed values for $this->mOutputType
110  # Parameter to startExternalParse().
111  const OT_HTML = 1; # like parse()
112  const OT_WIKI = 2; # like preSaveTransform()
114  const OT_MSG = 3;
115  const OT_PLAIN = 4; # like extractSections() - portions of the original are returned unchanged.
116 
117  # Marker Suffix needs to be accessible staticly.
118  const MARKER_SUFFIX = "-QINU\x7f";
119 
120  # Markers used for wrapping the table of contents
121  const TOC_START = '<mw:toc>';
122  const TOC_END = '</mw:toc>';
123 
124  # Persistent:
125  public $mTagHooks = array();
127  public $mFunctionHooks = array();
128  public $mFunctionSynonyms = array( 0 => array(), 1 => array() );
130  public $mStripList = array();
132  public $mVarCache = array();
133  public $mImageParams = array();
135  public $mMarkerIndex = 0;
136  public $mFirstCall = true;
137 
138  # Initialised by initialiseVariables()
139 
143  public $mVariables;
148  public $mSubstWords;
149  # Initialised in constructor
151 
152  # Initialized in getPreprocessor()
155 
156  # Cleared with clearState():
160  public $mOutput;
162 
166  public $mStripState;
172  public $mLinkHolders;
173 
174  public $mLinkID;
176  public $mDefaultSort;
178  public $mExpensiveFunctionCount; # number of expensive parser function calls
184  public $mUser; # User object; only used when doing pre-save transform
186  # Temporary
187  # These are variables reset at least once per parse regardless of $clearState
192  public $mOptions;
193 
197  public $mTitle; # Title context, used for self-link rendering and similar things
198  public $mOutputType; # Output type, one of the OT_xxx constants
199  public $ot; # Shortcut alias, see setOutputType()
200  public $mRevisionObject; # The revision object of the specified revision ID
201  public $mRevisionId; # ID to display in {{REVISIONID}} tags
202  public $mRevisionTimestamp; # The timestamp of the specified revision ID
203  public $mRevisionUser; # User to display in {{REVISIONUSER}} tag
204  public $mRevisionSize; # Size to display in {{REVISIONSIZE}} variable
205  public $mRevIdForTs; # The revision ID which was used to fetch the timestamp
206  public $mInputSize = false; # For {{PAGESIZE}} on current page.
207 
211  public $mUniqPrefix;
212 
218  public $mLangLinkLanguages;
219 
226  public $currentRevisionCache;
227 
232  public $mInParse = false;
233 
235  protected $mProfiler;
236 
240  public function __construct( $conf = array() ) {
241  $this->mConf = $conf;
242  $this->mUrlProtocols = wfUrlProtocols();
243  $this->mExtLinkBracketedRegex = '/\[(((?i)' . $this->mUrlProtocols . ')' .
244  self::EXT_LINK_URL_CLASS . '+)\p{Zs}*([^\]\\x00-\\x08\\x0a-\\x1F]*?)\]/Su';
245  if ( isset( $conf['preprocessorClass'] ) ) {
246  $this->mPreprocessorClass = $conf['preprocessorClass'];
247  } elseif ( defined( 'HPHP_VERSION' ) ) {
248  # Preprocessor_Hash is much faster than Preprocessor_DOM under HipHop
249  $this->mPreprocessorClass = 'Preprocessor_Hash';
250  } elseif ( extension_loaded( 'domxml' ) ) {
251  # PECL extension that conflicts with the core DOM extension (bug 13770)
252  wfDebug( "Warning: you have the obsolete domxml extension for PHP. Please remove it!\n" );
253  $this->mPreprocessorClass = 'Preprocessor_Hash';
254  } elseif ( extension_loaded( 'dom' ) ) {
255  $this->mPreprocessorClass = 'Preprocessor_DOM';
256  } else {
257  $this->mPreprocessorClass = 'Preprocessor_Hash';
258  }
259  wfDebug( __CLASS__ . ": using preprocessor: {$this->mPreprocessorClass}\n" );
260  }
261 
265  public function __destruct() {
266  if ( isset( $this->mLinkHolders ) ) {
267  unset( $this->mLinkHolders );
268  }
269  foreach ( $this as $name => $value ) {
270  unset( $this->$name );
271  }
272  }
273 
277  public function __clone() {
278  $this->mInParse = false;
279 
280  // Bug 56226: When you create a reference "to" an object field, that
281  // makes the object field itself be a reference too (until the other
282  // reference goes out of scope). When cloning, any field that's a
283  // reference is copied as a reference in the new object. Both of these
284  // are defined PHP5 behaviors, as inconvenient as it is for us when old
285  // hooks from PHP4 days are passing fields by reference.
286  foreach ( array( 'mStripState', 'mVarCache' ) as $k ) {
287  // Make a non-reference copy of the field, then rebind the field to
288  // reference the new copy.
289  $tmp = $this->$k;
290  $this->$k =& $tmp;
291  unset( $tmp );
292  }
293 
294  Hooks::run( 'ParserCloned', array( $this ) );
295  }
296 
300  public function firstCallInit() {
301  if ( !$this->mFirstCall ) {
302  return;
303  }
304  $this->mFirstCall = false;
305 
307  CoreTagHooks::register( $this );
308  $this->initialiseVariables();
309 
310  Hooks::run( 'ParserFirstCallInit', array( &$this ) );
311  }
312 
318  public function clearState() {
319  if ( $this->mFirstCall ) {
320  $this->firstCallInit();
321  }
322  $this->mOutput = new ParserOutput;
323  $this->mOptions->registerWatcher( array( $this->mOutput, 'recordOption' ) );
324  $this->mAutonumber = 0;
325  $this->mLastSection = '';
326  $this->mDTopen = false;
327  $this->mIncludeCount = array();
328  $this->mArgStack = false;
329  $this->mInPre = false;
330  $this->mLinkHolders = new LinkHolderArray( $this );
331  $this->mLinkID = 0;
332  $this->mRevisionObject = $this->mRevisionTimestamp =
333  $this->mRevisionId = $this->mRevisionUser = $this->mRevisionSize = null;
334  $this->mVarCache = array();
335  $this->mUser = null;
336  $this->mLangLinkLanguages = array();
337  $this->currentRevisionCache = null;
338 
349  $this->mUniqPrefix = "\x7fUNIQ" . self::getRandomString();
350  $this->mStripState = new StripState( $this->mUniqPrefix );
351 
352  # Clear these on every parse, bug 4549
353  $this->mTplRedirCache = $this->mTplDomCache = array();
354 
355  $this->mShowToc = true;
356  $this->mForceTocPosition = false;
357  $this->mIncludeSizes = array(
358  'post-expand' => 0,
359  'arg' => 0,
360  );
361  $this->mPPNodeCount = 0;
362  $this->mGeneratedPPNodeCount = 0;
363  $this->mHighestExpansionDepth = 0;
364  $this->mDefaultSort = false;
365  $this->mHeadings = array();
366  $this->mDoubleUnderscores = array();
367  $this->mExpensiveFunctionCount = 0;
368 
369  # Fix cloning
370  if ( isset( $this->mPreprocessor ) && $this->mPreprocessor->parser !== $this ) {
371  $this->mPreprocessor = null;
372  }
373 
374  $this->mProfiler = new SectionProfiler();
375 
376  Hooks::run( 'ParserClearState', array( &$this ) );
377  }
378 
391  public function parse( $text, Title $title, ParserOptions $options,
392  $linestart = true, $clearState = true, $revid = null
393  ) {
400 
401  if ( $clearState ) {
402  $magicScopeVariable = $this->lock();
403  }
404 
405  $this->startParse( $title, $options, self::OT_HTML, $clearState );
406 
407  $this->currentRevisionCache = null;
408  $this->mInputSize = strlen( $text );
409  if ( $this->mOptions->getEnableLimitReport() ) {
410  $this->mOutput->resetParseStartTime();
411  }
412 
413  # Remove the strip marker tag prefix from the input, if present.
414  if ( $clearState ) {
415  $text = str_replace( $this->mUniqPrefix, '', $text );
416  }
417 
418  $oldRevisionId = $this->mRevisionId;
419  $oldRevisionObject = $this->mRevisionObject;
420  $oldRevisionTimestamp = $this->mRevisionTimestamp;
421  $oldRevisionUser = $this->mRevisionUser;
422  $oldRevisionSize = $this->mRevisionSize;
423  if ( $revid !== null ) {
424  $this->mRevisionId = $revid;
425  $this->mRevisionObject = null;
426  $this->mRevisionTimestamp = null;
427  $this->mRevisionUser = null;
428  $this->mRevisionSize = null;
429  }
430 
431  Hooks::run( 'ParserBeforeStrip', array( &$this, &$text, &$this->mStripState ) );
432  # No more strip!
433  Hooks::run( 'ParserAfterStrip', array( &$this, &$text, &$this->mStripState ) );
434  $text = $this->internalParse( $text );
435  Hooks::run( 'ParserAfterParse', array( &$this, &$text, &$this->mStripState ) );
436 
437  $text = $this->internalParseHalfParsed( $text, true, $linestart );
438 
446  if ( !( $options->getDisableTitleConversion()
447  || isset( $this->mDoubleUnderscores['nocontentconvert'] )
448  || isset( $this->mDoubleUnderscores['notitleconvert'] )
449  || $this->mOutput->getDisplayTitle() !== false )
450  ) {
451  $convruletitle = $this->getConverterLanguage()->getConvRuleTitle();
452  if ( $convruletitle ) {
453  $this->mOutput->setTitleText( $convruletitle );
454  } else {
455  $titleText = $this->getConverterLanguage()->convertTitle( $title );
456  $this->mOutput->setTitleText( $titleText );
457  }
458  }
459 
460  if ( $this->mExpensiveFunctionCount > $this->mOptions->getExpensiveParserFunctionLimit() ) {
461  $this->limitationWarn( 'expensive-parserfunction',
462  $this->mExpensiveFunctionCount,
463  $this->mOptions->getExpensiveParserFunctionLimit()
464  );
465  }
466 
467  # Information on include size limits, for the benefit of users who try to skirt them
468  if ( $this->mOptions->getEnableLimitReport() ) {
469  $max = $this->mOptions->getMaxIncludeSize();
470 
471  $cpuTime = $this->mOutput->getTimeSinceStart( 'cpu' );
472  if ( $cpuTime !== null ) {
473  $this->mOutput->setLimitReportData( 'limitreport-cputime',
474  sprintf( "%.3f", $cpuTime )
475  );
476  }
477 
478  $wallTime = $this->mOutput->getTimeSinceStart( 'wall' );
479  $this->mOutput->setLimitReportData( 'limitreport-walltime',
480  sprintf( "%.3f", $wallTime )
481  );
482 
483  $this->mOutput->setLimitReportData( 'limitreport-ppvisitednodes',
484  array( $this->mPPNodeCount, $this->mOptions->getMaxPPNodeCount() )
485  );
486  $this->mOutput->setLimitReportData( 'limitreport-ppgeneratednodes',
487  array( $this->mGeneratedPPNodeCount, $this->mOptions->getMaxGeneratedPPNodeCount() )
488  );
489  $this->mOutput->setLimitReportData( 'limitreport-postexpandincludesize',
490  array( $this->mIncludeSizes['post-expand'], $max )
491  );
492  $this->mOutput->setLimitReportData( 'limitreport-templateargumentsize',
493  array( $this->mIncludeSizes['arg'], $max )
494  );
495  $this->mOutput->setLimitReportData( 'limitreport-expansiondepth',
496  array( $this->mHighestExpansionDepth, $this->mOptions->getMaxPPExpandDepth() )
497  );
498  $this->mOutput->setLimitReportData( 'limitreport-expensivefunctioncount',
499  array( $this->mExpensiveFunctionCount, $this->mOptions->getExpensiveParserFunctionLimit() )
500  );
501  Hooks::run( 'ParserLimitReportPrepare', array( $this, $this->mOutput ) );
502 
503  $limitReport = "NewPP limit report\n";
504  if ( $wgShowHostnames ) {
505  $limitReport .= 'Parsed by ' . wfHostname() . "\n";
506  }
507  foreach ( $this->mOutput->getLimitReportData() as $key => $value ) {
508  if ( Hooks::run( 'ParserLimitReportFormat',
509  array( $key, &$value, &$limitReport, false, false )
510  ) ) {
511  $keyMsg = wfMessage( $key )->inLanguage( 'en' )->useDatabase( false );
512  $valueMsg = wfMessage( array( "$key-value-text", "$key-value" ) )
513  ->inLanguage( 'en' )->useDatabase( false );
514  if ( !$valueMsg->exists() ) {
515  $valueMsg = new RawMessage( '$1' );
516  }
517  if ( !$keyMsg->isDisabled() && !$valueMsg->isDisabled() ) {
518  $valueMsg->params( $value );
519  $limitReport .= "{$keyMsg->text()}: {$valueMsg->text()}\n";
520  }
521  }
522  }
523  // Since we're not really outputting HTML, decode the entities and
524  // then re-encode the things that need hiding inside HTML comments.
525  $limitReport = htmlspecialchars_decode( $limitReport );
526  Hooks::run( 'ParserLimitReport', array( $this, &$limitReport ) );
527 
528  // Sanitize for comment. Note '‐' in the replacement is U+2010,
529  // which looks much like the problematic '-'.
530  $limitReport = str_replace( array( '-', '&' ), array( '‐', '&amp;' ), $limitReport );
531  $text .= "\n<!-- \n$limitReport-->\n";
532 
533  // Add on template profiling data
534  $dataByFunc = $this->mProfiler->getFunctionStats();
535  uasort( $dataByFunc, function ( $a, $b ) {
536  return $a['real'] < $b['real']; // descending order
537  } );
538  $profileReport = "Transclusion expansion time report (%,ms,calls,template)\n";
539  foreach ( array_slice( $dataByFunc, 0, 10 ) as $item ) {
540  $profileReport .= sprintf( "%6.2f%% %8.3f %6d - %s\n",
541  $item['%real'], $item['real'], $item['calls'],
542  htmlspecialchars( $item['name'] ) );
543  }
544  $text .= "\n<!-- \n$profileReport-->\n";
545 
546  if ( $this->mGeneratedPPNodeCount > $this->mOptions->getMaxGeneratedPPNodeCount() / 10 ) {
547  wfDebugLog( 'generated-pp-node-count', $this->mGeneratedPPNodeCount . ' ' .
548  $this->mTitle->getPrefixedDBkey() );
549  }
550  }
551  $this->mOutput->setText( $text );
552 
553  $this->mRevisionId = $oldRevisionId;
554  $this->mRevisionObject = $oldRevisionObject;
555  $this->mRevisionTimestamp = $oldRevisionTimestamp;
556  $this->mRevisionUser = $oldRevisionUser;
557  $this->mRevisionSize = $oldRevisionSize;
558  $this->mInputSize = false;
559  $this->currentRevisionCache = null;
560 
561  return $this->mOutput;
562  }
563 
586  public function recursiveTagParse( $text, $frame = false ) {
587  Hooks::run( 'ParserBeforeStrip', array( &$this, &$text, &$this->mStripState ) );
588  Hooks::run( 'ParserAfterStrip', array( &$this, &$text, &$this->mStripState ) );
589  $text = $this->internalParse( $text, false, $frame );
590  return $text;
591  }
592 
610  public function recursiveTagParseFully( $text, $frame = false ) {
611  $text = $this->recursiveTagParse( $text, $frame );
612  $text = $this->internalParseHalfParsed( $text, false );
613  return $text;
614  }
615 
627  public function preprocess( $text, Title $title = null,
628  ParserOptions $options, $revid = null, $frame = false
629  ) {
630  $magicScopeVariable = $this->lock();
631  $this->startParse( $title, $options, self::OT_PREPROCESS, true );
632  if ( $revid !== null ) {
633  $this->mRevisionId = $revid;
634  }
635  Hooks::run( 'ParserBeforeStrip', array( &$this, &$text, &$this->mStripState ) );
636  Hooks::run( 'ParserAfterStrip', array( &$this, &$text, &$this->mStripState ) );
637  $text = $this->replaceVariables( $text, $frame );
638  $text = $this->mStripState->unstripBoth( $text );
639  return $text;
640  }
641 
651  public function recursivePreprocess( $text, $frame = false ) {
652  $text = $this->replaceVariables( $text, $frame );
653  $text = $this->mStripState->unstripBoth( $text );
654  return $text;
655  }
656 
670  public function getPreloadText( $text, Title $title, ParserOptions $options, $params = array() ) {
671  $msg = new RawMessage( $text );
672  $text = $msg->params( $params )->plain();
673 
674  # Parser (re)initialisation
675  $magicScopeVariable = $this->lock();
676  $this->startParse( $title, $options, self::OT_PLAIN, true );
677 
679  $dom = $this->preprocessToDom( $text, self::PTD_FOR_INCLUSION );
680  $text = $this->getPreprocessor()->newFrame()->expand( $dom, $flags );
681  $text = $this->mStripState->unstripBoth( $text );
682  return $text;
683  }
684 
690  public static function getRandomString() {
691  return wfRandomString( 16 );
692  }
693 
700  public function setUser( $user ) {
701  $this->mUser = $user;
702  }
709  public function uniqPrefix() {
710  if ( !isset( $this->mUniqPrefix ) ) {
711  # @todo FIXME: This is probably *horribly wrong*
712  # LanguageConverter seems to want $wgParser's uniqPrefix, however
713  # if this is called for a parser cache hit, the parser may not
714  # have ever been initialized in the first place.
715  # Not really sure what the heck is supposed to be going on here.
716  return '';
717  # throw new MWException( "Accessing uninitialized mUniqPrefix" );
718  }
719  return $this->mUniqPrefix;
720  }
721 
727  public function setTitle( $t ) {
728  if ( !$t ) {
729  $t = Title::newFromText( 'NO TITLE' );
730  }
731 
732  if ( $t->hasFragment() ) {
733  # Strip the fragment to avoid various odd effects
734  $this->mTitle = clone $t;
735  $this->mTitle->setFragment( '' );
736  } else {
737  $this->mTitle = $t;
738  }
739  }
740 
746  public function getTitle() {
747  return $this->mTitle;
748  }
749 
756  public function Title( $x = null ) {
757  return wfSetVar( $this->mTitle, $x );
758  }
759 
765  public function setOutputType( $ot ) {
766  $this->mOutputType = $ot;
767  # Shortcut alias
768  $this->ot = array(
769  'html' => $ot == self::OT_HTML,
770  'wiki' => $ot == self::OT_WIKI,
771  'pre' => $ot == self::OT_PREPROCESS,
772  'plain' => $ot == self::OT_PLAIN,
773  );
774  }
775 
782  public function OutputType( $x = null ) {
783  return wfSetVar( $this->mOutputType, $x );
784  }
785 
791  public function getOutput() {
792  return $this->mOutput;
793  }
794 
800  public function getOptions() {
801  return $this->mOptions;
802  }
803 
810  public function Options( $x = null ) {
811  return wfSetVar( $this->mOptions, $x );
812  }
813 
817  public function nextLinkID() {
818  return $this->mLinkID++;
819  }
820 
824  public function setLinkID( $id ) {
825  $this->mLinkID = $id;
826  }
827 
832  public function getFunctionLang() {
833  return $this->getTargetLanguage();
834  }
835 
845  public function getTargetLanguage() {
846  $target = $this->mOptions->getTargetLanguage();
847 
848  if ( $target !== null ) {
849  return $target;
850  } elseif ( $this->mOptions->getInterfaceMessage() ) {
851  return $this->mOptions->getUserLangObj();
852  } elseif ( is_null( $this->mTitle ) ) {
853  throw new MWException( __METHOD__ . ': $this->mTitle is null' );
854  }
855 
856  return $this->mTitle->getPageLanguage();
857  }
858 
863  public function getConverterLanguage() {
864  return $this->getTargetLanguage();
865  }
866 
873  public function getUser() {
874  if ( !is_null( $this->mUser ) ) {
875  return $this->mUser;
876  }
877  return $this->mOptions->getUser();
878  }
879 
885  public function getPreprocessor() {
886  if ( !isset( $this->mPreprocessor ) ) {
887  $class = $this->mPreprocessorClass;
888  $this->mPreprocessor = new $class( $this );
889  }
890  return $this->mPreprocessor;
891  }
892 
913  public static function extractTagsAndParams( $elements, $text, &$matches, $uniq_prefix = '' ) {
914  static $n = 1;
915  $stripped = '';
916  $matches = array();
917 
918  $taglist = implode( '|', $elements );
919  $start = "/<($taglist)(\\s+[^>]*?|\\s*?)(\/?" . ">)|<(!--)/i";
920 
921  while ( $text != '' ) {
922  $p = preg_split( $start, $text, 2, PREG_SPLIT_DELIM_CAPTURE );
923  $stripped .= $p[0];
924  if ( count( $p ) < 5 ) {
925  break;
926  }
927  if ( count( $p ) > 5 ) {
928  # comment
929  $element = $p[4];
930  $attributes = '';
931  $close = '';
932  $inside = $p[5];
933  } else {
934  # tag
935  $element = $p[1];
936  $attributes = $p[2];
937  $close = $p[3];
938  $inside = $p[4];
939  }
940 
941  $marker = "$uniq_prefix-$element-" . sprintf( '%08X', $n++ ) . self::MARKER_SUFFIX;
942  $stripped .= $marker;
943 
944  if ( $close === '/>' ) {
945  # Empty element tag, <tag />
946  $content = null;
947  $text = $inside;
948  $tail = null;
949  } else {
950  if ( $element === '!--' ) {
951  $end = '/(-->)/';
952  } else {
953  $end = "/(<\\/$element\\s*>)/i";
954  }
955  $q = preg_split( $end, $inside, 2, PREG_SPLIT_DELIM_CAPTURE );
956  $content = $q[0];
957  if ( count( $q ) < 3 ) {
958  # No end tag -- let it run out to the end of the text.
959  $tail = '';
960  $text = '';
961  } else {
962  $tail = $q[1];
963  $text = $q[2];
964  }
965  }
966 
967  $matches[$marker] = array( $element,
968  $content,
970  "<$element$attributes$close$content$tail" );
971  }
972  return $stripped;
973  }
974 
980  public function getStripList() {
981  return $this->mStripList;
982  }
993  public function insertStripItem( $text ) {
994  $rnd = "{$this->mUniqPrefix}-item-{$this->mMarkerIndex}-" . self::MARKER_SUFFIX;
995  $this->mMarkerIndex++;
996  $this->mStripState->addGeneral( $rnd, $text );
997  return $rnd;
998  }
999 
1007  public function doTableStuff( $text ) {
1008 
1009  $lines = StringUtils::explode( "\n", $text );
1010  $out = '';
1011  $td_history = array(); # Is currently a td tag open?
1012  $last_tag_history = array(); # Save history of last lag activated (td, th or caption)
1013  $tr_history = array(); # Is currently a tr tag open?
1014  $tr_attributes = array(); # history of tr attributes
1015  $has_opened_tr = array(); # Did this table open a <tr> element?
1016  $indent_level = 0; # indent level of the table
1017 
1018  foreach ( $lines as $outLine ) {
1019  $line = trim( $outLine );
1020 
1021  if ( $line === '' ) { # empty line, go to next line
1022  $out .= $outLine . "\n";
1023  continue;
1024  }
1025 
1026  $first_character = $line[0];
1027  $matches = array();
1028 
1029  if ( preg_match( '/^(:*)\{\|(.*)$/', $line, $matches ) ) {
1030  # First check if we are starting a new table
1031  $indent_level = strlen( $matches[1] );
1032 
1033  $attributes = $this->mStripState->unstripBoth( $matches[2] );
1034  $attributes = Sanitizer::fixTagAttributes( $attributes, 'table' );
1035 
1036  $outLine = str_repeat( '<dl><dd>', $indent_level ) . "<table{$attributes}>";
1037  array_push( $td_history, false );
1038  array_push( $last_tag_history, '' );
1039  array_push( $tr_history, false );
1040  array_push( $tr_attributes, '' );
1041  array_push( $has_opened_tr, false );
1042  } elseif ( count( $td_history ) == 0 ) {
1043  # Don't do any of the following
1044  $out .= $outLine . "\n";
1045  continue;
1046  } elseif ( substr( $line, 0, 2 ) === '|}' ) {
1047  # We are ending a table
1048  $line = '</table>' . substr( $line, 2 );
1049  $last_tag = array_pop( $last_tag_history );
1050 
1051  if ( !array_pop( $has_opened_tr ) ) {
1052  $line = "<tr><td></td></tr>{$line}";
1053  }
1054 
1055  if ( array_pop( $tr_history ) ) {
1056  $line = "</tr>{$line}";
1057  }
1058 
1059  if ( array_pop( $td_history ) ) {
1060  $line = "</{$last_tag}>{$line}";
1061  }
1062  array_pop( $tr_attributes );
1063  $outLine = $line . str_repeat( '</dd></dl>', $indent_level );
1064  } elseif ( substr( $line, 0, 2 ) === '|-' ) {
1065  # Now we have a table row
1066  $line = preg_replace( '#^\|-+#', '', $line );
1067 
1068  # Whats after the tag is now only attributes
1069  $attributes = $this->mStripState->unstripBoth( $line );
1070  $attributes = Sanitizer::fixTagAttributes( $attributes, 'tr' );
1071  array_pop( $tr_attributes );
1072  array_push( $tr_attributes, $attributes );
1073 
1074  $line = '';
1075  $last_tag = array_pop( $last_tag_history );
1076  array_pop( $has_opened_tr );
1077  array_push( $has_opened_tr, true );
1078 
1079  if ( array_pop( $tr_history ) ) {
1080  $line = '</tr>';
1081  }
1082 
1083  if ( array_pop( $td_history ) ) {
1084  $line = "</{$last_tag}>{$line}";
1085  }
1086 
1087  $outLine = $line;
1088  array_push( $tr_history, false );
1089  array_push( $td_history, false );
1090  array_push( $last_tag_history, '' );
1091  } elseif ( $first_character === '|'
1092  || $first_character === '!'
1093  || substr( $line, 0, 2 ) === '|+'
1094  ) {
1095  # This might be cell elements, td, th or captions
1096  if ( substr( $line, 0, 2 ) === '|+' ) {
1097  $first_character = '+';
1098  $line = substr( $line, 1 );
1099  }
1100 
1101  $line = substr( $line, 1 );
1102 
1103  if ( $first_character === '!' ) {
1104  $line = str_replace( '!!', '||', $line );
1105  }
1106 
1107  # Split up multiple cells on the same line.
1108  # FIXME : This can result in improper nesting of tags processed
1109  # by earlier parser steps, but should avoid splitting up eg
1110  # attribute values containing literal "||".
1111  $cells = StringUtils::explodeMarkup( '||', $line );
1112 
1113  $outLine = '';
1114 
1115  # Loop through each table cell
1116  foreach ( $cells as $cell ) {
1117  $previous = '';
1118  if ( $first_character !== '+' ) {
1119  $tr_after = array_pop( $tr_attributes );
1120  if ( !array_pop( $tr_history ) ) {
1121  $previous = "<tr{$tr_after}>\n";
1122  }
1123  array_push( $tr_history, true );
1124  array_push( $tr_attributes, '' );
1125  array_pop( $has_opened_tr );
1126  array_push( $has_opened_tr, true );
1127  }
1128 
1129  $last_tag = array_pop( $last_tag_history );
1130 
1131  if ( array_pop( $td_history ) ) {
1132  $previous = "</{$last_tag}>\n{$previous}";
1133  }
1134 
1135  if ( $first_character === '|' ) {
1136  $last_tag = 'td';
1137  } elseif ( $first_character === '!' ) {
1138  $last_tag = 'th';
1139  } elseif ( $first_character === '+' ) {
1140  $last_tag = 'caption';
1141  } else {
1142  $last_tag = '';
1143  }
1144 
1145  array_push( $last_tag_history, $last_tag );
1146 
1147  # A cell could contain both parameters and data
1148  $cell_data = explode( '|', $cell, 2 );
1149 
1150  # Bug 553: Note that a '|' inside an invalid link should not
1151  # be mistaken as delimiting cell parameters
1152  if ( strpos( $cell_data[0], '[[' ) !== false ) {
1153  $cell = "{$previous}<{$last_tag}>{$cell}";
1154  } elseif ( count( $cell_data ) == 1 ) {
1155  $cell = "{$previous}<{$last_tag}>{$cell_data[0]}";
1156  } else {
1157  $attributes = $this->mStripState->unstripBoth( $cell_data[0] );
1158  $attributes = Sanitizer::fixTagAttributes( $attributes, $last_tag );
1159  $cell = "{$previous}<{$last_tag}{$attributes}>{$cell_data[1]}";
1160  }
1161 
1162  $outLine .= $cell;
1163  array_push( $td_history, true );
1164  }
1165  }
1166  $out .= $outLine . "\n";
1167  }
1168 
1169  # Closing open td, tr && table
1170  while ( count( $td_history ) > 0 ) {
1171  if ( array_pop( $td_history ) ) {
1172  $out .= "</td>\n";
1173  }
1174  if ( array_pop( $tr_history ) ) {
1175  $out .= "</tr>\n";
1176  }
1177  if ( !array_pop( $has_opened_tr ) ) {
1178  $out .= "<tr><td></td></tr>\n";
1179  }
1180 
1181  $out .= "</table>\n";
1182  }
1183 
1184  # Remove trailing line-ending (b/c)
1185  if ( substr( $out, -1 ) === "\n" ) {
1186  $out = substr( $out, 0, -1 );
1187  }
1188 
1189  # special case: don't return empty table
1190  if ( $out === "<table>\n<tr><td></td></tr>\n</table>" ) {
1191  $out = '';
1192  }
1193 
1194  return $out;
1195  }
1196 
1209  public function internalParse( $text, $isMain = true, $frame = false ) {
1210 
1211  $origText = $text;
1212 
1213  # Hook to suspend the parser in this state
1214  if ( !Hooks::run( 'ParserBeforeInternalParse', array( &$this, &$text, &$this->mStripState ) ) ) {
1215  return $text;
1216  }
1217 
1218  # if $frame is provided, then use $frame for replacing any variables
1219  if ( $frame ) {
1220  # use frame depth to infer how include/noinclude tags should be handled
1221  # depth=0 means this is the top-level document; otherwise it's an included document
1222  if ( !$frame->depth ) {
1223  $flag = 0;
1224  } else {
1225  $flag = Parser::PTD_FOR_INCLUSION;
1226  }
1227  $dom = $this->preprocessToDom( $text, $flag );
1228  $text = $frame->expand( $dom );
1229  } else {
1230  # if $frame is not provided, then use old-style replaceVariables
1231  $text = $this->replaceVariables( $text );
1232  }
1233 
1234  Hooks::run( 'InternalParseBeforeSanitize', array( &$this, &$text, &$this->mStripState ) );
1235  $text = Sanitizer::removeHTMLtags(
1236  $text,
1237  array( &$this, 'attributeStripCallback' ),
1238  false,
1239  array_keys( $this->mTransparentTagHooks )
1240  );
1241  Hooks::run( 'InternalParseBeforeLinks', array( &$this, &$text, &$this->mStripState ) );
1242 
1243  # Tables need to come after variable replacement for things to work
1244  # properly; putting them before other transformations should keep
1245  # exciting things like link expansions from showing up in surprising
1246  # places.
1247  $text = $this->doTableStuff( $text );
1248 
1249  $text = preg_replace( '/(^|\n)-----*/', '\\1<hr />', $text );
1250 
1251  $text = $this->doDoubleUnderscore( $text );
1252 
1253  $text = $this->doHeadings( $text );
1254  $text = $this->replaceInternalLinks( $text );
1255  $text = $this->doAllQuotes( $text );
1256  $text = $this->replaceExternalLinks( $text );
1257 
1258  # replaceInternalLinks may sometimes leave behind
1259  # absolute URLs, which have to be masked to hide them from replaceExternalLinks
1260  $text = str_replace( $this->mUniqPrefix . 'NOPARSE', '', $text );
1261 
1262  $text = $this->doMagicLinks( $text );
1263  $text = $this->formatHeadings( $text, $origText, $isMain );
1264 
1265  return $text;
1266  }
1267 
1277  private function internalParseHalfParsed( $text, $isMain = true, $linestart = true ) {
1279 
1280  $text = $this->mStripState->unstripGeneral( $text );
1281 
1282  # Clean up special characters, only run once, next-to-last before doBlockLevels
1283  $fixtags = array(
1284  # french spaces, last one Guillemet-left
1285  # only if there is something before the space
1286  '/(.) (?=\\?|:|;|!|%|\\302\\273)/' => '\\1&#160;',
1287  # french spaces, Guillemet-right
1288  '/(\\302\\253) /' => '\\1&#160;',
1289  '/&#160;(!\s*important)/' => ' \\1', # Beware of CSS magic word !important, bug #11874.
1290  );
1291  $text = preg_replace( array_keys( $fixtags ), array_values( $fixtags ), $text );
1292 
1293  $text = $this->doBlockLevels( $text, $linestart );
1294 
1295  $this->replaceLinkHolders( $text );
1296 
1304  if ( !( $this->mOptions->getDisableContentConversion()
1305  || isset( $this->mDoubleUnderscores['nocontentconvert'] ) )
1306  ) {
1307  if ( !$this->mOptions->getInterfaceMessage() ) {
1308  # The position of the convert() call should not be changed. it
1309  # assumes that the links are all replaced and the only thing left
1310  # is the <nowiki> mark.
1311  $text = $this->getConverterLanguage()->convert( $text );
1312  }
1313  }
1314 
1315  $text = $this->mStripState->unstripNoWiki( $text );
1316 
1317  if ( $isMain ) {
1318  Hooks::run( 'ParserBeforeTidy', array( &$this, &$text ) );
1319  }
1320 
1321  $text = $this->replaceTransparentTags( $text );
1322  $text = $this->mStripState->unstripGeneral( $text );
1323 
1324  $text = Sanitizer::normalizeCharReferences( $text );
1325 
1326  if ( ( $wgUseTidy && $this->mOptions->getTidy() ) || $wgAlwaysUseTidy ) {
1327  $text = MWTidy::tidy( $text );
1328  } else {
1329  # attempt to sanitize at least some nesting problems
1330  # (bug #2702 and quite a few others)
1331  $tidyregs = array(
1332  # ''Something [http://www.cool.com cool''] -->
1333  # <i>Something</i><a href="http://www.cool.com"..><i>cool></i></a>
1334  '/(<([bi])>)(<([bi])>)?([^<]*)(<\/?a[^<]*>)([^<]*)(<\/\\4>)?(<\/\\2>)/' =>
1335  '\\1\\3\\5\\8\\9\\6\\1\\3\\7\\8\\9',
1336  # fix up an anchor inside another anchor, only
1337  # at least for a single single nested link (bug 3695)
1338  '/(<a[^>]+>)([^<]*)(<a[^>]+>[^<]*)<\/a>(.*)<\/a>/' =>
1339  '\\1\\2</a>\\3</a>\\1\\4</a>',
1340  # fix div inside inline elements- doBlockLevels won't wrap a line which
1341  # contains a div, so fix it up here; replace
1342  # div with escaped text
1343  '/(<([aib]) [^>]+>)([^<]*)(<div([^>]*)>)(.*)(<\/div>)([^<]*)(<\/\\2>)/' =>
1344  '\\1\\3&lt;div\\5&gt;\\6&lt;/div&gt;\\8\\9',
1345  # remove empty italic or bold tag pairs, some
1346  # introduced by rules above
1347  '/<([bi])><\/\\1>/' => '',
1348  );
1349 
1350  $text = preg_replace(
1351  array_keys( $tidyregs ),
1352  array_values( $tidyregs ),
1353  $text );
1354  }
1355 
1356  if ( $isMain ) {
1357  Hooks::run( 'ParserAfterTidy', array( &$this, &$text ) );
1358  }
1359 
1360  return $text;
1361  }
1362 
1374  public function doMagicLinks( $text ) {
1375  $prots = wfUrlProtocolsWithoutProtRel();
1376  $urlChar = self::EXT_LINK_URL_CLASS;
1377  $space = self::SPACE_NOT_NL; # non-newline space
1378  $spdash = "(?:-|$space)"; # a dash or a non-newline space
1379  $spaces = "$space++"; # possessive match of 1 or more spaces
1380  $text = preg_replace_callback(
1381  '!(?: # Start cases
1382  (<a[ \t\r\n>].*?</a>) | # m[1]: Skip link text
1383  (<.*?>) | # m[2]: Skip stuff inside HTML elements' . "
1384  (\b(?i:$prots)$urlChar+) | # m[3]: Free external links
1385  \b(?:RFC|PMID) $spaces # m[4]: RFC or PMID, capture number
1386  ([0-9]+)\b |
1387  \bISBN $spaces ( # m[5]: ISBN, capture number
1388  (?: 97[89] $spdash? )? # optional 13-digit ISBN prefix
1389  (?: [0-9] $spdash? ){9} # 9 digits with opt. delimiters
1390  [0-9Xx] # check digit
1391  )\b
1392  )!xu", array( &$this, 'magicLinkCallback' ), $text );
1393  return $text;
1394  }
1395 
1401  public function magicLinkCallback( $m ) {
1402  if ( isset( $m[1] ) && $m[1] !== '' ) {
1403  # Skip anchor
1404  return $m[0];
1405  } elseif ( isset( $m[2] ) && $m[2] !== '' ) {
1406  # Skip HTML element
1407  return $m[0];
1408  } elseif ( isset( $m[3] ) && $m[3] !== '' ) {
1409  # Free external link
1410  return $this->makeFreeExternalLink( $m[0] );
1411  } elseif ( isset( $m[4] ) && $m[4] !== '' ) {
1412  # RFC or PMID
1413  if ( substr( $m[0], 0, 3 ) === 'RFC' ) {
1414  $keyword = 'RFC';
1415  $urlmsg = 'rfcurl';
1416  $cssClass = 'mw-magiclink-rfc';
1417  $id = $m[4];
1418  } elseif ( substr( $m[0], 0, 4 ) === 'PMID' ) {
1419  $keyword = 'PMID';
1420  $urlmsg = 'pubmedurl';
1421  $cssClass = 'mw-magiclink-pmid';
1422  $id = $m[4];
1423  } else {
1424  throw new MWException( __METHOD__ . ': unrecognised match type "' .
1425  substr( $m[0], 0, 20 ) . '"' );
1426  }
1427  $url = wfMessage( $urlmsg, $id )->inContentLanguage()->text();
1428  return Linker::makeExternalLink( $url, "{$keyword} {$id}", true, $cssClass );
1429  } elseif ( isset( $m[5] ) && $m[5] !== '' ) {
1430  # ISBN
1431  $isbn = $m[5];
1432  $space = self::SPACE_NOT_NL; # non-newline space
1433  $isbn = preg_replace( "/$space/", ' ', $isbn );
1434  $num = strtr( $isbn, array(
1435  '-' => '',
1436  ' ' => '',
1437  'x' => 'X',
1438  ));
1439  $titleObj = SpecialPage::getTitleFor( 'Booksources', $num );
1440  return '<a href="' .
1441  htmlspecialchars( $titleObj->getLocalURL() ) .
1442  "\" class=\"internal mw-magiclink-isbn\">ISBN $isbn</a>";
1443  } else {
1444  return $m[0];
1445  }
1446  }
1447 
1456  public function makeFreeExternalLink( $url ) {
1457 
1458  $trail = '';
1459 
1460  # The characters '<' and '>' (which were escaped by
1461  # removeHTMLtags()) should not be included in
1462  # URLs, per RFC 2396.
1463  $m2 = array();
1464  if ( preg_match( '/&(lt|gt);/', $url, $m2, PREG_OFFSET_CAPTURE ) ) {
1465  $trail = substr( $url, $m2[0][1] ) . $trail;
1466  $url = substr( $url, 0, $m2[0][1] );
1467  }
1468 
1469  # Move trailing punctuation to $trail
1470  $sep = ',;\.:!?';
1471  # If there is no left bracket, then consider right brackets fair game too
1472  if ( strpos( $url, '(' ) === false ) {
1473  $sep .= ')';
1474  }
1475 
1476  $urlRev = strrev( $url );
1477  $numSepChars = strspn( $urlRev, $sep );
1478  # Don't break a trailing HTML entity by moving the ; into $trail
1479  # This is in hot code, so use substr_compare to avoid having to
1480  # create a new string object for the comparison
1481  if ( $numSepChars && substr_compare( $url, ";", -$numSepChars, 1 ) === 0) {
1482  # more optimization: instead of running preg_match with a $
1483  # anchor, which can be slow, do the match on the reversed
1484  # string starting at the desired offset.
1485  # un-reversed regexp is: /&([a-z]+|#x[\da-f]+|#\d+)$/i
1486  if ( preg_match( '/\G([a-z]+|[\da-f]+x#|\d+#)&/i', $urlRev, $m2, 0, $numSepChars ) ) {
1487  $numSepChars--;
1488  }
1489  }
1490  if ( $numSepChars ) {
1491  $trail = substr( $url, -$numSepChars ) . $trail;
1492  $url = substr( $url, 0, -$numSepChars );
1493  }
1494 
1495  $url = Sanitizer::cleanUrl( $url );
1496 
1497  # Is this an external image?
1498  $text = $this->maybeMakeExternalImage( $url );
1499  if ( $text === false ) {
1500  # Not an image, make a link
1501  $text = Linker::makeExternalLink( $url,
1502  $this->getConverterLanguage()->markNoConversion( $url, true ),
1503  true, 'free',
1504  $this->getExternalLinkAttribs( $url ) );
1505  # Register it in the output object...
1506  # Replace unnecessary URL escape codes with their equivalent characters
1507  $pasteurized = self::normalizeLinkUrl( $url );
1508  $this->mOutput->addExternalLink( $pasteurized );
1509  }
1510  return $text . $trail;
1511  }
1512 
1522  public function doHeadings( $text ) {
1523  for ( $i = 6; $i >= 1; --$i ) {
1524  $h = str_repeat( '=', $i );
1525  $text = preg_replace( "/^$h(.+)$h\\s*$/m", "<h$i>\\1</h$i>", $text );
1526  }
1527  return $text;
1528  }
1529 
1538  public function doAllQuotes( $text ) {
1539  $outtext = '';
1540  $lines = StringUtils::explode( "\n", $text );
1541  foreach ( $lines as $line ) {
1542  $outtext .= $this->doQuotes( $line ) . "\n";
1543  }
1544  $outtext = substr( $outtext, 0, -1 );
1545  return $outtext;
1546  }
1547 
1555  public function doQuotes( $text ) {
1556  $arr = preg_split( "/(''+)/", $text, -1, PREG_SPLIT_DELIM_CAPTURE );
1557  $countarr = count( $arr );
1558  if ( $countarr == 1 ) {
1559  return $text;
1560  }
1561 
1562  // First, do some preliminary work. This may shift some apostrophes from
1563  // being mark-up to being text. It also counts the number of occurrences
1564  // of bold and italics mark-ups.
1565  $numbold = 0;
1566  $numitalics = 0;
1567  for ( $i = 1; $i < $countarr; $i += 2 ) {
1568  $thislen = strlen( $arr[$i] );
1569  // If there are ever four apostrophes, assume the first is supposed to
1570  // be text, and the remaining three constitute mark-up for bold text.
1571  // (bug 13227: ''''foo'''' turns into ' ''' foo ' ''')
1572  if ( $thislen == 4 ) {
1573  $arr[$i - 1] .= "'";
1574  $arr[$i] = "'''";
1575  $thislen = 3;
1576  } elseif ( $thislen > 5 ) {
1577  // If there are more than 5 apostrophes in a row, assume they're all
1578  // text except for the last 5.
1579  // (bug 13227: ''''''foo'''''' turns into ' ''''' foo ' ''''')
1580  $arr[$i - 1] .= str_repeat( "'", $thislen - 5 );
1581  $arr[$i] = "'''''";
1582  $thislen = 5;
1583  }
1584  // Count the number of occurrences of bold and italics mark-ups.
1585  if ( $thislen == 2 ) {
1586  $numitalics++;
1587  } elseif ( $thislen == 3 ) {
1588  $numbold++;
1589  } elseif ( $thislen == 5 ) {
1590  $numitalics++;
1591  $numbold++;
1592  }
1593  }
1594 
1595  // If there is an odd number of both bold and italics, it is likely
1596  // that one of the bold ones was meant to be an apostrophe followed
1597  // by italics. Which one we cannot know for certain, but it is more
1598  // likely to be one that has a single-letter word before it.
1599  if ( ( $numbold % 2 == 1 ) && ( $numitalics % 2 == 1 ) ) {
1600  $firstsingleletterword = -1;
1601  $firstmultiletterword = -1;
1602  $firstspace = -1;
1603  for ( $i = 1; $i < $countarr; $i += 2 ) {
1604  if ( strlen( $arr[$i] ) == 3 ) {
1605  $x1 = substr( $arr[$i - 1], -1 );
1606  $x2 = substr( $arr[$i - 1], -2, 1 );
1607  if ( $x1 === ' ' ) {
1608  if ( $firstspace == -1 ) {
1609  $firstspace = $i;
1610  }
1611  } elseif ( $x2 === ' ' ) {
1612  if ( $firstsingleletterword == -1 ) {
1613  $firstsingleletterword = $i;
1614  // if $firstsingleletterword is set, we don't
1615  // look at the other options, so we can bail early.
1616  break;
1617  }
1618  } else {
1619  if ( $firstmultiletterword == -1 ) {
1620  $firstmultiletterword = $i;
1621  }
1622  }
1623  }
1624  }
1625 
1626  // If there is a single-letter word, use it!
1627  if ( $firstsingleletterword > -1 ) {
1628  $arr[$firstsingleletterword] = "''";
1629  $arr[$firstsingleletterword - 1] .= "'";
1630  } elseif ( $firstmultiletterword > -1 ) {
1631  // If not, but there's a multi-letter word, use that one.
1632  $arr[$firstmultiletterword] = "''";
1633  $arr[$firstmultiletterword - 1] .= "'";
1634  } elseif ( $firstspace > -1 ) {
1635  // ... otherwise use the first one that has neither.
1636  // (notice that it is possible for all three to be -1 if, for example,
1637  // there is only one pentuple-apostrophe in the line)
1638  $arr[$firstspace] = "''";
1639  $arr[$firstspace - 1] .= "'";
1640  }
1641  }
1642 
1643  // Now let's actually convert our apostrophic mush to HTML!
1644  $output = '';
1645  $buffer = '';
1646  $state = '';
1647  $i = 0;
1648  foreach ( $arr as $r ) {
1649  if ( ( $i % 2 ) == 0 ) {
1650  if ( $state === 'both' ) {
1651  $buffer .= $r;
1652  } else {
1653  $output .= $r;
1654  }
1655  } else {
1656  $thislen = strlen( $r );
1657  if ( $thislen == 2 ) {
1658  if ( $state === 'i' ) {
1659  $output .= '</i>';
1660  $state = '';
1661  } elseif ( $state === 'bi' ) {
1662  $output .= '</i>';
1663  $state = 'b';
1664  } elseif ( $state === 'ib' ) {
1665  $output .= '</b></i><b>';
1666  $state = 'b';
1667  } elseif ( $state === 'both' ) {
1668  $output .= '<b><i>' . $buffer . '</i>';
1669  $state = 'b';
1670  } else { // $state can be 'b' or ''
1671  $output .= '<i>';
1672  $state .= 'i';
1673  }
1674  } elseif ( $thislen == 3 ) {
1675  if ( $state === 'b' ) {
1676  $output .= '</b>';
1677  $state = '';
1678  } elseif ( $state === 'bi' ) {
1679  $output .= '</i></b><i>';
1680  $state = 'i';
1681  } elseif ( $state === 'ib' ) {
1682  $output .= '</b>';
1683  $state = 'i';
1684  } elseif ( $state === 'both' ) {
1685  $output .= '<i><b>' . $buffer . '</b>';
1686  $state = 'i';
1687  } else { // $state can be 'i' or ''
1688  $output .= '<b>';
1689  $state .= 'b';
1690  }
1691  } elseif ( $thislen == 5 ) {
1692  if ( $state === 'b' ) {
1693  $output .= '</b><i>';
1694  $state = 'i';
1695  } elseif ( $state === 'i' ) {
1696  $output .= '</i><b>';
1697  $state = 'b';
1698  } elseif ( $state === 'bi' ) {
1699  $output .= '</i></b>';
1700  $state = '';
1701  } elseif ( $state === 'ib' ) {
1702  $output .= '</b></i>';
1703  $state = '';
1704  } elseif ( $state === 'both' ) {
1705  $output .= '<i><b>' . $buffer . '</b></i>';
1706  $state = '';
1707  } else { // ($state == '')
1708  $buffer = '';
1709  $state = 'both';
1710  }
1711  }
1712  }
1713  $i++;
1714  }
1715  // Now close all remaining tags. Notice that the order is important.
1716  if ( $state === 'b' || $state === 'ib' ) {
1717  $output .= '</b>';
1718  }
1719  if ( $state === 'i' || $state === 'bi' || $state === 'ib' ) {
1720  $output .= '</i>';
1721  }
1722  if ( $state === 'bi' ) {
1723  $output .= '</b>';
1724  }
1725  // There might be lonely ''''', so make sure we have a buffer
1726  if ( $state === 'both' && $buffer ) {
1727  $output .= '<b><i>' . $buffer . '</i></b>';
1728  }
1729  return $output;
1730  }
1731 
1745  public function replaceExternalLinks( $text ) {
1746 
1747  $bits = preg_split( $this->mExtLinkBracketedRegex, $text, -1, PREG_SPLIT_DELIM_CAPTURE );
1748  if ( $bits === false ) {
1749  throw new MWException( "PCRE needs to be compiled with "
1750  . "--enable-unicode-properties in order for MediaWiki to function" );
1751  }
1752  $s = array_shift( $bits );
1753 
1754  $i = 0;
1755  while ( $i < count( $bits ) ) {
1756  $url = $bits[$i++];
1757  $i++; // protocol
1758  $text = $bits[$i++];
1759  $trail = $bits[$i++];
1760 
1761  # The characters '<' and '>' (which were escaped by
1762  # removeHTMLtags()) should not be included in
1763  # URLs, per RFC 2396.
1764  $m2 = array();
1765  if ( preg_match( '/&(lt|gt);/', $url, $m2, PREG_OFFSET_CAPTURE ) ) {
1766  $text = substr( $url, $m2[0][1] ) . ' ' . $text;
1767  $url = substr( $url, 0, $m2[0][1] );
1768  }
1769 
1770  # If the link text is an image URL, replace it with an <img> tag
1771  # This happened by accident in the original parser, but some people used it extensively
1772  $img = $this->maybeMakeExternalImage( $text );
1773  if ( $img !== false ) {
1774  $text = $img;
1775  }
1776 
1777  $dtrail = '';
1778 
1779  # Set linktype for CSS - if URL==text, link is essentially free
1780  $linktype = ( $text === $url ) ? 'free' : 'text';
1781 
1782  # No link text, e.g. [http://domain.tld/some.link]
1783  if ( $text == '' ) {
1784  # Autonumber
1785  $langObj = $this->getTargetLanguage();
1786  $text = '[' . $langObj->formatNum( ++$this->mAutonumber ) . ']';
1787  $linktype = 'autonumber';
1788  } else {
1789  # Have link text, e.g. [http://domain.tld/some.link text]s
1790  # Check for trail
1791  list( $dtrail, $trail ) = Linker::splitTrail( $trail );
1792  }
1793 
1794  $text = $this->getConverterLanguage()->markNoConversion( $text );
1795 
1796  $url = Sanitizer::cleanUrl( $url );
1797 
1798  # Use the encoded URL
1799  # This means that users can paste URLs directly into the text
1800  # Funny characters like ö aren't valid in URLs anyway
1801  # This was changed in August 2004
1802  $s .= Linker::makeExternalLink( $url, $text, false, $linktype,
1803  $this->getExternalLinkAttribs( $url ) ) . $dtrail . $trail;
1804 
1805  # Register link in the output object.
1806  # Replace unnecessary URL escape codes with the referenced character
1807  # This prevents spammers from hiding links from the filters
1808  $pasteurized = self::normalizeLinkUrl( $url );
1809  $this->mOutput->addExternalLink( $pasteurized );
1810  }
1811 
1812  return $s;
1813  }
1814 
1824  public static function getExternalLinkRel( $url = false, $title = null ) {
1826  $ns = $title ? $title->getNamespace() : false;
1827  if ( $wgNoFollowLinks && !in_array( $ns, $wgNoFollowNsExceptions )
1828  && !wfMatchesDomainList( $url, $wgNoFollowDomainExceptions )
1829  ) {
1830  return 'nofollow';
1831  }
1832  return null;
1833  }
1834 
1845  public function getExternalLinkAttribs( $url = false ) {
1846  $attribs = array();
1847  $attribs['rel'] = self::getExternalLinkRel( $url, $this->mTitle );
1848 
1849  if ( $this->mOptions->getExternalLinkTarget() ) {
1850  $attribs['target'] = $this->mOptions->getExternalLinkTarget();
1851  }
1852  return $attribs;
1853  }
1854 
1862  public static function replaceUnusualEscapes( $url ) {
1863  wfDeprecated( __METHOD__, '1.24' );
1864  return self::normalizeLinkUrl( $url );
1865  }
1866 
1876  public static function normalizeLinkUrl( $url ) {
1877  # First, make sure unsafe characters are encoded
1878  $url = preg_replace_callback( '/[\x00-\x20"<>\[\\\\\]^`{|}\x7F-\xFF]/',
1879  function ( $m ) {
1880  return rawurlencode( $m[0] );
1881  },
1882  $url
1883  );
1884 
1885  $ret = '';
1886  $end = strlen( $url );
1887 
1888  # Fragment part - 'fragment'
1889  $start = strpos( $url, '#' );
1890  if ( $start !== false && $start < $end ) {
1891  $ret = self::normalizeUrlComponent(
1892  substr( $url, $start, $end - $start ), '"#%<>[\]^`{|}' ) . $ret;
1893  $end = $start;
1894  }
1895 
1896  # Query part - 'query' minus &=+;
1897  $start = strpos( $url, '?' );
1898  if ( $start !== false && $start < $end ) {
1899  $ret = self::normalizeUrlComponent(
1900  substr( $url, $start, $end - $start ), '"#%<>[\]^`{|}&=+;' ) . $ret;
1901  $end = $start;
1902  }
1903 
1904  # Scheme and path part - 'pchar'
1905  # (we assume no userinfo or encoded colons in the host)
1906  $ret = self::normalizeUrlComponent(
1907  substr( $url, 0, $end ), '"#%<>[\]^`{|}/?' ) . $ret;
1908 
1909  return $ret;
1910  }
1912  private static function normalizeUrlComponent( $component, $unsafe ) {
1913  $callback = function ( $matches ) use ( $unsafe ) {
1914  $char = urldecode( $matches[0] );
1915  $ord = ord( $char );
1916  if ( $ord > 32 && $ord < 127 && strpos( $unsafe, $char ) === false ) {
1917  # Unescape it
1918  return $char;
1919  } else {
1920  # Leave it escaped, but use uppercase for a-f
1921  return strtoupper( $matches[0] );
1922  }
1923  };
1924  return preg_replace_callback( '/%[0-9A-Fa-f]{2}/', $callback, $component );
1925  }
1926 
1935  private function maybeMakeExternalImage( $url ) {
1936  $imagesfrom = $this->mOptions->getAllowExternalImagesFrom();
1937  $imagesexception = !empty( $imagesfrom );
1938  $text = false;
1939  # $imagesfrom could be either a single string or an array of strings, parse out the latter
1940  if ( $imagesexception && is_array( $imagesfrom ) ) {
1941  $imagematch = false;
1942  foreach ( $imagesfrom as $match ) {
1943  if ( strpos( $url, $match ) === 0 ) {
1944  $imagematch = true;
1945  break;
1946  }
1947  }
1948  } elseif ( $imagesexception ) {
1949  $imagematch = ( strpos( $url, $imagesfrom ) === 0 );
1950  } else {
1951  $imagematch = false;
1952  }
1953 
1954  if ( $this->mOptions->getAllowExternalImages()
1955  || ( $imagesexception && $imagematch )
1956  ) {
1957  if ( preg_match( self::EXT_IMAGE_REGEX, $url ) ) {
1958  # Image found
1959  $text = Linker::makeExternalImage( $url );
1960  }
1961  }
1962  if ( !$text && $this->mOptions->getEnableImageWhitelist()
1963  && preg_match( self::EXT_IMAGE_REGEX, $url )
1964  ) {
1965  $whitelist = explode(
1966  "\n",
1967  wfMessage( 'external_image_whitelist' )->inContentLanguage()->text()
1968  );
1969 
1970  foreach ( $whitelist as $entry ) {
1971  # Sanitize the regex fragment, make it case-insensitive, ignore blank entries/comments
1972  if ( strpos( $entry, '#' ) === 0 || $entry === '' ) {
1973  continue;
1974  }
1975  if ( preg_match( '/' . str_replace( '/', '\\/', $entry ) . '/i', $url ) ) {
1976  # Image matches a whitelist entry
1977  $text = Linker::makeExternalImage( $url );
1978  break;
1979  }
1980  }
1981  }
1982  return $text;
1983  }
1984 
1994  public function replaceInternalLinks( $s ) {
1995  $this->mLinkHolders->merge( $this->replaceInternalLinks2( $s ) );
1996  return $s;
1997  }
1998 
2007  public function replaceInternalLinks2( &$s ) {
2009 
2010  static $tc = false, $e1, $e1_img;
2011  # the % is needed to support urlencoded titles as well
2012  if ( !$tc ) {
2013  $tc = Title::legalChars() . '#%';
2014  # Match a link having the form [[namespace:link|alternate]]trail
2015  $e1 = "/^([{$tc}]+)(?:\\|(.+?))?]](.*)\$/sD";
2016  # Match cases where there is no "]]", which might still be images
2017  $e1_img = "/^([{$tc}]+)\\|(.*)\$/sD";
2018  }
2019 
2020  $holders = new LinkHolderArray( $this );
2021 
2022  # split the entire text string on occurrences of [[
2023  $a = StringUtils::explode( '[[', ' ' . $s );
2024  # get the first element (all text up to first [[), and remove the space we added
2025  $s = $a->current();
2026  $a->next();
2027  $line = $a->current(); # Workaround for broken ArrayIterator::next() that returns "void"
2028  $s = substr( $s, 1 );
2029 
2030  $useLinkPrefixExtension = $this->getTargetLanguage()->linkPrefixExtension();
2031  $e2 = null;
2032  if ( $useLinkPrefixExtension ) {
2033  # Match the end of a line for a word that's not followed by whitespace,
2034  # e.g. in the case of 'The Arab al[[Razi]]', 'al' will be matched
2036  $charset = $wgContLang->linkPrefixCharset();
2037  $e2 = "/^((?>.*[^$charset]|))(.+)$/sDu";
2038  }
2039 
2040  if ( is_null( $this->mTitle ) ) {
2041  throw new MWException( __METHOD__ . ": \$this->mTitle is null\n" );
2042  }
2043  $nottalk = !$this->mTitle->isTalkPage();
2044 
2045  if ( $useLinkPrefixExtension ) {
2046  $m = array();
2047  if ( preg_match( $e2, $s, $m ) ) {
2048  $first_prefix = $m[2];
2049  } else {
2050  $first_prefix = false;
2051  }
2052  } else {
2053  $prefix = '';
2054  }
2055 
2056  $useSubpages = $this->areSubpagesAllowed();
2057 
2058  // @codingStandardsIgnoreStart Squiz.WhiteSpace.SemicolonSpacing.Incorrect
2059  # Loop for each link
2060  for ( ; $line !== false && $line !== null; $a->next(), $line = $a->current() ) {
2061  // @codingStandardsIgnoreStart
2062 
2063  # Check for excessive memory usage
2064  if ( $holders->isBig() ) {
2065  # Too big
2066  # Do the existence check, replace the link holders and clear the array
2067  $holders->replace( $s );
2068  $holders->clear();
2069  }
2070 
2071  if ( $useLinkPrefixExtension ) {
2072  if ( preg_match( $e2, $s, $m ) ) {
2073  $prefix = $m[2];
2074  $s = $m[1];
2075  } else {
2076  $prefix = '';
2077  }
2078  # first link
2079  if ( $first_prefix ) {
2080  $prefix = $first_prefix;
2081  $first_prefix = false;
2082  }
2083  }
2084 
2085  $might_be_img = false;
2086 
2087  if ( preg_match( $e1, $line, $m ) ) { # page with normal text or alt
2088  $text = $m[2];
2089  # If we get a ] at the beginning of $m[3] that means we have a link that's something like:
2090  # [[Image:Foo.jpg|[http://example.com desc]]] <- having three ] in a row fucks up,
2091  # the real problem is with the $e1 regex
2092  # See bug 1300.
2093  #
2094  # Still some problems for cases where the ] is meant to be outside punctuation,
2095  # and no image is in sight. See bug 2095.
2096  #
2097  if ( $text !== ''
2098  && substr( $m[3], 0, 1 ) === ']'
2099  && strpos( $text, '[' ) !== false
2100  ) {
2101  $text .= ']'; # so that replaceExternalLinks($text) works later
2102  $m[3] = substr( $m[3], 1 );
2103  }
2104  # fix up urlencoded title texts
2105  if ( strpos( $m[1], '%' ) !== false ) {
2106  # Should anchors '#' also be rejected?
2107  $m[1] = str_replace( array( '<', '>' ), array( '&lt;', '&gt;' ), rawurldecode( $m[1] ) );
2108  }
2109  $trail = $m[3];
2110  } elseif ( preg_match( $e1_img, $line, $m ) ) {
2111  # Invalid, but might be an image with a link in its caption
2112  $might_be_img = true;
2113  $text = $m[2];
2114  if ( strpos( $m[1], '%' ) !== false ) {
2115  $m[1] = rawurldecode( $m[1] );
2116  }
2117  $trail = "";
2118  } else { # Invalid form; output directly
2119  $s .= $prefix . '[[' . $line;
2120  continue;
2121  }
2122 
2123  $origLink = $m[1];
2124 
2125  # Don't allow internal links to pages containing
2126  # PROTO: where PROTO is a valid URL protocol; these
2127  # should be external links.
2128  if ( preg_match( '/^(?i:' . $this->mUrlProtocols . ')/', $origLink ) ) {
2129  $s .= $prefix . '[[' . $line;
2130  continue;
2131  }
2132 
2133  # Make subpage if necessary
2134  if ( $useSubpages ) {
2135  $link = $this->maybeDoSubpageLink( $origLink, $text );
2136  } else {
2137  $link = $origLink;
2138  }
2139 
2140  $noforce = ( substr( $origLink, 0, 1 ) !== ':' );
2141  if ( !$noforce ) {
2142  # Strip off leading ':'
2143  $link = substr( $link, 1 );
2144  }
2145 
2146  $nt = Title::newFromText( $this->mStripState->unstripNoWiki( $link ) );
2147  if ( $nt === null ) {
2148  $s .= $prefix . '[[' . $line;
2149  continue;
2150  }
2151 
2152  $ns = $nt->getNamespace();
2153  $iw = $nt->getInterwiki();
2154 
2155  if ( $might_be_img ) { # if this is actually an invalid link
2156  if ( $ns == NS_FILE && $noforce ) { # but might be an image
2157  $found = false;
2158  while ( true ) {
2159  # look at the next 'line' to see if we can close it there
2160  $a->next();
2161  $next_line = $a->current();
2162  if ( $next_line === false || $next_line === null ) {
2163  break;
2164  }
2165  $m = explode( ']]', $next_line, 3 );
2166  if ( count( $m ) == 3 ) {
2167  # the first ]] closes the inner link, the second the image
2168  $found = true;
2169  $text .= "[[{$m[0]}]]{$m[1]}";
2170  $trail = $m[2];
2171  break;
2172  } elseif ( count( $m ) == 2 ) {
2173  # if there's exactly one ]] that's fine, we'll keep looking
2174  $text .= "[[{$m[0]}]]{$m[1]}";
2175  } else {
2176  # if $next_line is invalid too, we need look no further
2177  $text .= '[[' . $next_line;
2178  break;
2179  }
2180  }
2181  if ( !$found ) {
2182  # we couldn't find the end of this imageLink, so output it raw
2183  # but don't ignore what might be perfectly normal links in the text we've examined
2184  $holders->merge( $this->replaceInternalLinks2( $text ) );
2185  $s .= "{$prefix}[[$link|$text";
2186  # note: no $trail, because without an end, there *is* no trail
2187  continue;
2188  }
2189  } else { # it's not an image, so output it raw
2190  $s .= "{$prefix}[[$link|$text";
2191  # note: no $trail, because without an end, there *is* no trail
2192  continue;
2193  }
2194  }
2195 
2196  $wasblank = ( $text == '' );
2197  if ( $wasblank ) {
2198  $text = $link;
2199  } else {
2200  # Bug 4598 madness. Handle the quotes only if they come from the alternate part
2201  # [[Lista d''e paise d''o munno]] -> <a href="...">Lista d''e paise d''o munno</a>
2202  # [[Criticism of Harry Potter|Criticism of ''Harry Potter'']]
2203  # -> <a href="Criticism of Harry Potter">Criticism of <i>Harry Potter</i></a>
2204  $text = $this->doQuotes( $text );
2205  }
2206 
2207  # Link not escaped by : , create the various objects
2208  if ( $noforce && !$nt->wasLocalInterwiki() ) {
2209  # Interwikis
2210  if (
2211  $iw && $this->mOptions->getInterwikiMagic() && $nottalk && (
2212  Language::fetchLanguageName( $iw, null, 'mw' ) ||
2213  in_array( $iw, $wgExtraInterlanguageLinkPrefixes )
2214  )
2215  ) {
2216  # Bug 24502: filter duplicates
2217  if ( !isset( $this->mLangLinkLanguages[$iw] ) ) {
2218  $this->mLangLinkLanguages[$iw] = true;
2219  $this->mOutput->addLanguageLink( $nt->getFullText() );
2220  }
2221 
2222  $s = rtrim( $s . $prefix );
2223  $s .= trim( $trail, "\n" ) == '' ? '': $prefix . $trail;
2224  continue;
2225  }
2226 
2227  if ( $ns == NS_FILE ) {
2228  if ( !wfIsBadImage( $nt->getDBkey(), $this->mTitle ) ) {
2229  if ( $wasblank ) {
2230  # if no parameters were passed, $text
2231  # becomes something like "File:Foo.png",
2232  # which we don't want to pass on to the
2233  # image generator
2234  $text = '';
2235  } else {
2236  # recursively parse links inside the image caption
2237  # actually, this will parse them in any other parameters, too,
2238  # but it might be hard to fix that, and it doesn't matter ATM
2239  $text = $this->replaceExternalLinks( $text );
2240  $holders->merge( $this->replaceInternalLinks2( $text ) );
2241  }
2242  # cloak any absolute URLs inside the image markup, so replaceExternalLinks() won't touch them
2243  $s .= $prefix . $this->armorLinks(
2244  $this->makeImage( $nt, $text, $holders ) ) . $trail;
2245  } else {
2246  $s .= $prefix . $trail;
2247  }
2248  continue;
2249  }
2250 
2251  if ( $ns == NS_CATEGORY ) {
2252  $s = rtrim( $s . "\n" ); # bug 87
2253 
2254  if ( $wasblank ) {
2255  $sortkey = $this->getDefaultSort();
2256  } else {
2257  $sortkey = $text;
2258  }
2259  $sortkey = Sanitizer::decodeCharReferences( $sortkey );
2260  $sortkey = str_replace( "\n", '', $sortkey );
2261  $sortkey = $this->getConverterLanguage()->convertCategoryKey( $sortkey );
2262  $this->mOutput->addCategory( $nt->getDBkey(), $sortkey );
2263 
2267  $s .= trim( $prefix . $trail, "\n" ) == '' ? '' : $prefix . $trail;
2268 
2269  continue;
2270  }
2271  }
2272 
2273  # Self-link checking. For some languages, variants of the title are checked in
2274  # LinkHolderArray::doVariants() to allow batching the existence checks necessary
2275  # for linking to a different variant.
2276  if ( $ns != NS_SPECIAL && $nt->equals( $this->mTitle ) && !$nt->hasFragment() ) {
2277  $s .= $prefix . Linker::makeSelfLinkObj( $nt, $text, '', $trail );
2278  continue;
2279  }
2280 
2281  # NS_MEDIA is a pseudo-namespace for linking directly to a file
2282  # @todo FIXME: Should do batch file existence checks, see comment below
2283  if ( $ns == NS_MEDIA ) {
2284  # Give extensions a chance to select the file revision for us
2285  $options = array();
2286  $descQuery = false;
2287  Hooks::run( 'BeforeParserFetchFileAndTitle',
2288  array( $this, $nt, &$options, &$descQuery ) );
2289  # Fetch and register the file (file title may be different via hooks)
2290  list( $file, $nt ) = $this->fetchFileAndTitle( $nt, $options );
2291  # Cloak with NOPARSE to avoid replacement in replaceExternalLinks
2292  $s .= $prefix . $this->armorLinks(
2293  Linker::makeMediaLinkFile( $nt, $file, $text ) ) . $trail;
2294  continue;
2295  }
2296 
2297  # Some titles, such as valid special pages or files in foreign repos, should
2298  # be shown as bluelinks even though they're not included in the page table
2299  #
2300  # @todo FIXME: isAlwaysKnown() can be expensive for file links; we should really do
2301  # batch file existence checks for NS_FILE and NS_MEDIA
2302  if ( $iw == '' && $nt->isAlwaysKnown() ) {
2303  $this->mOutput->addLink( $nt );
2304  $s .= $this->makeKnownLinkHolder( $nt, $text, array(), $trail, $prefix );
2305  } else {
2306  # Links will be added to the output link list after checking
2307  $s .= $holders->makeHolder( $nt, $text, array(), $trail, $prefix );
2308  }
2309  }
2310  return $holders;
2311  }
2312 
2327  public function makeKnownLinkHolder( $nt, $text = '', $query = array(), $trail = '', $prefix = '' ) {
2328  list( $inside, $trail ) = Linker::splitTrail( $trail );
2329 
2330  if ( is_string( $query ) ) {
2331  $query = wfCgiToArray( $query );
2332  }
2333  if ( $text == '' ) {
2334  $text = htmlspecialchars( $nt->getPrefixedText() );
2335  }
2336 
2337  $link = Linker::linkKnown( $nt, "$prefix$text$inside", array(), $query );
2338 
2339  return $this->armorLinks( $link ) . $trail;
2340  }
2341 
2352  public function armorLinks( $text ) {
2353  return preg_replace( '/\b((?i)' . $this->mUrlProtocols . ')/',
2354  "{$this->mUniqPrefix}NOPARSE$1", $text );
2355  }
2356 
2361  public function areSubpagesAllowed() {
2362  # Some namespaces don't allow subpages
2363  return MWNamespace::hasSubpages( $this->mTitle->getNamespace() );
2364  }
2365 
2374  public function maybeDoSubpageLink( $target, &$text ) {
2375  return Linker::normalizeSubpageLink( $this->mTitle, $target, $text );
2376  }
2377 
2384  public function closeParagraph() {
2385  $result = '';
2386  if ( $this->mLastSection != '' ) {
2387  $result = '</' . $this->mLastSection . ">\n";
2388  }
2389  $this->mInPre = false;
2390  $this->mLastSection = '';
2391  return $result;
2392  }
2393 
2404  public function getCommon( $st1, $st2 ) {
2405  $fl = strlen( $st1 );
2406  $shorter = strlen( $st2 );
2407  if ( $fl < $shorter ) {
2408  $shorter = $fl;
2409  }
2410 
2411  for ( $i = 0; $i < $shorter; ++$i ) {
2412  if ( $st1[$i] != $st2[$i] ) {
2413  break;
2414  }
2415  }
2416  return $i;
2417  }
2418 
2428  public function openList( $char ) {
2429  $result = $this->closeParagraph();
2431  if ( '*' === $char ) {
2432  $result .= "<ul><li>";
2433  } elseif ( '#' === $char ) {
2434  $result .= "<ol><li>";
2435  } elseif ( ':' === $char ) {
2436  $result .= "<dl><dd>";
2437  } elseif ( ';' === $char ) {
2438  $result .= "<dl><dt>";
2439  $this->mDTopen = true;
2440  } else {
2441  $result = '<!-- ERR 1 -->';
2442  }
2443 
2444  return $result;
2445  }
2446 
2454  public function nextItem( $char ) {
2455  if ( '*' === $char || '#' === $char ) {
2456  return "</li>\n<li>";
2457  } elseif ( ':' === $char || ';' === $char ) {
2458  $close = "</dd>\n";
2459  if ( $this->mDTopen ) {
2460  $close = "</dt>\n";
2461  }
2462  if ( ';' === $char ) {
2463  $this->mDTopen = true;
2464  return $close . '<dt>';
2465  } else {
2466  $this->mDTopen = false;
2467  return $close . '<dd>';
2468  }
2469  }
2470  return '<!-- ERR 2 -->';
2471  }
2472 
2480  public function closeList( $char ) {
2481  if ( '*' === $char ) {
2482  $text = "</li></ul>";
2483  } elseif ( '#' === $char ) {
2484  $text = "</li></ol>";
2485  } elseif ( ':' === $char ) {
2486  if ( $this->mDTopen ) {
2487  $this->mDTopen = false;
2488  $text = "</dt></dl>";
2489  } else {
2490  $text = "</dd></dl>";
2491  }
2492  } else {
2493  return '<!-- ERR 3 -->';
2494  }
2495  return $text;
2496  }
2507  public function doBlockLevels( $text, $linestart ) {
2508 
2509  # Parsing through the text line by line. The main thing
2510  # happening here is handling of block-level elements p, pre,
2511  # and making lists from lines starting with * # : etc.
2512  #
2513  $textLines = StringUtils::explode( "\n", $text );
2514 
2515  $lastPrefix = $output = '';
2516  $this->mDTopen = $inBlockElem = false;
2517  $prefixLength = 0;
2518  $paragraphStack = false;
2519  $inBlockquote = false;
2520 
2521  foreach ( $textLines as $oLine ) {
2522  # Fix up $linestart
2523  if ( !$linestart ) {
2524  $output .= $oLine;
2525  $linestart = true;
2526  continue;
2527  }
2528  # * = ul
2529  # # = ol
2530  # ; = dt
2531  # : = dd
2532 
2533  $lastPrefixLength = strlen( $lastPrefix );
2534  $preCloseMatch = preg_match( '/<\\/pre/i', $oLine );
2535  $preOpenMatch = preg_match( '/<pre/i', $oLine );
2536  # If not in a <pre> element, scan for and figure out what prefixes are there.
2537  if ( !$this->mInPre ) {
2538  # Multiple prefixes may abut each other for nested lists.
2539  $prefixLength = strspn( $oLine, '*#:;' );
2540  $prefix = substr( $oLine, 0, $prefixLength );
2541 
2542  # eh?
2543  # ; and : are both from definition-lists, so they're equivalent
2544  # for the purposes of determining whether or not we need to open/close
2545  # elements.
2546  $prefix2 = str_replace( ';', ':', $prefix );
2547  $t = substr( $oLine, $prefixLength );
2548  $this->mInPre = (bool)$preOpenMatch;
2549  } else {
2550  # Don't interpret any other prefixes in preformatted text
2551  $prefixLength = 0;
2552  $prefix = $prefix2 = '';
2553  $t = $oLine;
2554  }
2555 
2556  # List generation
2557  if ( $prefixLength && $lastPrefix === $prefix2 ) {
2558  # Same as the last item, so no need to deal with nesting or opening stuff
2559  $output .= $this->nextItem( substr( $prefix, -1 ) );
2560  $paragraphStack = false;
2561 
2562  if ( substr( $prefix, -1 ) === ';' ) {
2563  # The one nasty exception: definition lists work like this:
2564  # ; title : definition text
2565  # So we check for : in the remainder text to split up the
2566  # title and definition, without b0rking links.
2567  $term = $t2 = '';
2568  if ( $this->findColonNoLinks( $t, $term, $t2 ) !== false ) {
2569  $t = $t2;
2570  $output .= $term . $this->nextItem( ':' );
2571  }
2572  }
2573  } elseif ( $prefixLength || $lastPrefixLength ) {
2574  # We need to open or close prefixes, or both.
2575 
2576  # Either open or close a level...
2577  $commonPrefixLength = $this->getCommon( $prefix, $lastPrefix );
2578  $paragraphStack = false;
2579 
2580  # Close all the prefixes which aren't shared.
2581  while ( $commonPrefixLength < $lastPrefixLength ) {
2582  $output .= $this->closeList( $lastPrefix[$lastPrefixLength - 1] );
2583  --$lastPrefixLength;
2584  }
2585 
2586  # Continue the current prefix if appropriate.
2587  if ( $prefixLength <= $commonPrefixLength && $commonPrefixLength > 0 ) {
2588  $output .= $this->nextItem( $prefix[$commonPrefixLength - 1] );
2589  }
2590 
2591  # Open prefixes where appropriate.
2592  if ( $lastPrefix && $prefixLength > $commonPrefixLength ) {
2593  $output .= "\n";
2594  }
2595  while ( $prefixLength > $commonPrefixLength ) {
2596  $char = substr( $prefix, $commonPrefixLength, 1 );
2597  $output .= $this->openList( $char );
2598 
2599  if ( ';' === $char ) {
2600  # @todo FIXME: This is dupe of code above
2601  if ( $this->findColonNoLinks( $t, $term, $t2 ) !== false ) {
2602  $t = $t2;
2603  $output .= $term . $this->nextItem( ':' );
2604  }
2605  }
2606  ++$commonPrefixLength;
2607  }
2608  if ( !$prefixLength && $lastPrefix ) {
2609  $output .= "\n";
2610  }
2611  $lastPrefix = $prefix2;
2612  }
2613 
2614  # If we have no prefixes, go to paragraph mode.
2615  if ( 0 == $prefixLength ) {
2616  # No prefix (not in list)--go to paragraph mode
2617  # XXX: use a stack for nestable elements like span, table and div
2618  $openmatch = preg_match(
2619  '/(?:<table|<h1|<h2|<h3|<h4|<h5|<h6|<pre|<tr|'
2620  . '<p|<ul|<ol|<dl|<li|<\\/tr|<\\/td|<\\/th)/iS',
2621  $t
2622  );
2623  $closematch = preg_match(
2624  '/(?:<\\/table|<\\/h1|<\\/h2|<\\/h3|<\\/h4|<\\/h5|<\\/h6|'
2625  . '<td|<th|<\\/?blockquote|<\\/?div|<hr|<\\/pre|<\\/p|<\\/mw:|'
2626  . $this->mUniqPrefix
2627  . '-pre|<\\/li|<\\/ul|<\\/ol|<\\/dl|<\\/?center)/iS',
2628  $t
2629  );
2630 
2631  if ( $openmatch || $closematch ) {
2632  $paragraphStack = false;
2633  # @todo bug 5718: paragraph closed
2634  $output .= $this->closeParagraph();
2635  if ( $preOpenMatch && !$preCloseMatch ) {
2636  $this->mInPre = true;
2637  }
2638  $bqOffset = 0;
2639  while ( preg_match( '/<(\\/?)blockquote[\s>]/i', $t, $bqMatch, PREG_OFFSET_CAPTURE, $bqOffset ) ) {
2640  $inBlockquote = !$bqMatch[1][0]; // is this a close tag?
2641  $bqOffset = $bqMatch[0][1] + strlen( $bqMatch[0][0] );
2642  }
2643  $inBlockElem = !$closematch;
2644  } elseif ( !$inBlockElem && !$this->mInPre ) {
2645  if ( ' ' == substr( $t, 0, 1 )
2646  && ( $this->mLastSection === 'pre' || trim( $t ) != '' )
2647  && !$inBlockquote
2648  ) {
2649  # pre
2650  if ( $this->mLastSection !== 'pre' ) {
2651  $paragraphStack = false;
2652  $output .= $this->closeParagraph() . '<pre>';
2653  $this->mLastSection = 'pre';
2654  }
2655  $t = substr( $t, 1 );
2656  } else {
2657  # paragraph
2658  if ( trim( $t ) === '' ) {
2659  if ( $paragraphStack ) {
2660  $output .= $paragraphStack . '<br />';
2661  $paragraphStack = false;
2662  $this->mLastSection = 'p';
2663  } else {
2664  if ( $this->mLastSection !== 'p' ) {
2665  $output .= $this->closeParagraph();
2666  $this->mLastSection = '';
2667  $paragraphStack = '<p>';
2668  } else {
2669  $paragraphStack = '</p><p>';
2670  }
2671  }
2672  } else {
2673  if ( $paragraphStack ) {
2674  $output .= $paragraphStack;
2675  $paragraphStack = false;
2676  $this->mLastSection = 'p';
2677  } elseif ( $this->mLastSection !== 'p' ) {
2678  $output .= $this->closeParagraph() . '<p>';
2679  $this->mLastSection = 'p';
2680  }
2681  }
2682  }
2683  }
2684  }
2685  # somewhere above we forget to get out of pre block (bug 785)
2686  if ( $preCloseMatch && $this->mInPre ) {
2687  $this->mInPre = false;
2688  }
2689  if ( $paragraphStack === false ) {
2690  $output .= $t;
2691  if ( $prefixLength === 0 ) {
2692  $output .= "\n";
2693  }
2694  }
2695  }
2696  while ( $prefixLength ) {
2697  $output .= $this->closeList( $prefix2[$prefixLength - 1] );
2698  --$prefixLength;
2699  if ( !$prefixLength ) {
2700  $output .= "\n";
2701  }
2702  }
2703  if ( $this->mLastSection != '' ) {
2704  $output .= '</' . $this->mLastSection . '>';
2705  $this->mLastSection = '';
2706  }
2707 
2708  return $output;
2709  }
2710 
2721  public function findColonNoLinks( $str, &$before, &$after ) {
2722 
2723  $pos = strpos( $str, ':' );
2724  if ( $pos === false ) {
2725  # Nothing to find!
2726  return false;
2727  }
2728 
2729  $lt = strpos( $str, '<' );
2730  if ( $lt === false || $lt > $pos ) {
2731  # Easy; no tag nesting to worry about
2732  $before = substr( $str, 0, $pos );
2733  $after = substr( $str, $pos + 1 );
2734  return $pos;
2735  }
2736 
2737  # Ugly state machine to walk through avoiding tags.
2738  $state = self::COLON_STATE_TEXT;
2739  $stack = 0;
2740  $len = strlen( $str );
2741  for ( $i = 0; $i < $len; $i++ ) {
2742  $c = $str[$i];
2743 
2744  switch ( $state ) {
2745  # (Using the number is a performance hack for common cases)
2746  case 0: # self::COLON_STATE_TEXT:
2747  switch ( $c ) {
2748  case "<":
2749  # Could be either a <start> tag or an </end> tag
2750  $state = self::COLON_STATE_TAGSTART;
2751  break;
2752  case ":":
2753  if ( $stack == 0 ) {
2754  # We found it!
2755  $before = substr( $str, 0, $i );
2756  $after = substr( $str, $i + 1 );
2757  return $i;
2758  }
2759  # Embedded in a tag; don't break it.
2760  break;
2761  default:
2762  # Skip ahead looking for something interesting
2763  $colon = strpos( $str, ':', $i );
2764  if ( $colon === false ) {
2765  # Nothing else interesting
2766  return false;
2767  }
2768  $lt = strpos( $str, '<', $i );
2769  if ( $stack === 0 ) {
2770  if ( $lt === false || $colon < $lt ) {
2771  # We found it!
2772  $before = substr( $str, 0, $colon );
2773  $after = substr( $str, $colon + 1 );
2774  return $i;
2775  }
2776  }
2777  if ( $lt === false ) {
2778  # Nothing else interesting to find; abort!
2779  # We're nested, but there's no close tags left. Abort!
2780  break 2;
2781  }
2782  # Skip ahead to next tag start
2783  $i = $lt;
2784  $state = self::COLON_STATE_TAGSTART;
2785  }
2786  break;
2787  case 1: # self::COLON_STATE_TAG:
2788  # In a <tag>
2789  switch ( $c ) {
2790  case ">":
2791  $stack++;
2792  $state = self::COLON_STATE_TEXT;
2793  break;
2794  case "/":
2795  # Slash may be followed by >?
2796  $state = self::COLON_STATE_TAGSLASH;
2797  break;
2798  default:
2799  # ignore
2800  }
2801  break;
2802  case 2: # self::COLON_STATE_TAGSTART:
2803  switch ( $c ) {
2804  case "/":
2805  $state = self::COLON_STATE_CLOSETAG;
2806  break;
2807  case "!":
2808  $state = self::COLON_STATE_COMMENT;
2809  break;
2810  case ">":
2811  # Illegal early close? This shouldn't happen D:
2812  $state = self::COLON_STATE_TEXT;
2813  break;
2814  default:
2815  $state = self::COLON_STATE_TAG;
2816  }
2817  break;
2818  case 3: # self::COLON_STATE_CLOSETAG:
2819  # In a </tag>
2820  if ( $c === ">" ) {
2821  $stack--;
2822  if ( $stack < 0 ) {
2823  wfDebug( __METHOD__ . ": Invalid input; too many close tags\n" );
2824  return false;
2825  }
2826  $state = self::COLON_STATE_TEXT;
2827  }
2828  break;
2829  case self::COLON_STATE_TAGSLASH:
2830  if ( $c === ">" ) {
2831  # Yes, a self-closed tag <blah/>
2832  $state = self::COLON_STATE_TEXT;
2833  } else {
2834  # Probably we're jumping the gun, and this is an attribute
2835  $state = self::COLON_STATE_TAG;
2836  }
2837  break;
2838  case 5: # self::COLON_STATE_COMMENT:
2839  if ( $c === "-" ) {
2840  $state = self::COLON_STATE_COMMENTDASH;
2841  }
2842  break;
2843  case self::COLON_STATE_COMMENTDASH:
2844  if ( $c === "-" ) {
2845  $state = self::COLON_STATE_COMMENTDASHDASH;
2846  } else {
2847  $state = self::COLON_STATE_COMMENT;
2848  }
2849  break;
2850  case self::COLON_STATE_COMMENTDASHDASH:
2851  if ( $c === ">" ) {
2852  $state = self::COLON_STATE_TEXT;
2853  } else {
2854  $state = self::COLON_STATE_COMMENT;
2855  }
2856  break;
2857  default:
2858  throw new MWException( "State machine error in " . __METHOD__ );
2859  }
2860  }
2861  if ( $stack > 0 ) {
2862  wfDebug( __METHOD__ . ": Invalid input; not enough close tags (stack $stack, state $state)\n" );
2863  return false;
2864  }
2865  return false;
2866  }
2867 
2879  public function getVariableValue( $index, $frame = false ) {
2882 
2883  if ( is_null( $this->mTitle ) ) {
2884  // If no title set, bad things are going to happen
2885  // later. Title should always be set since this
2886  // should only be called in the middle of a parse
2887  // operation (but the unit-tests do funky stuff)
2888  throw new MWException( __METHOD__ . ' Should only be '
2889  . ' called while parsing (no title set)' );
2890  }
2891 
2896  if ( Hooks::run( 'ParserGetVariableValueVarCache', array( &$this, &$this->mVarCache ) ) ) {
2897  if ( isset( $this->mVarCache[$index] ) ) {
2898  return $this->mVarCache[$index];
2899  }
2900  }
2901 
2902  $ts = wfTimestamp( TS_UNIX, $this->mOptions->getTimestamp() );
2903  Hooks::run( 'ParserGetVariableValueTs', array( &$this, &$ts ) );
2904 
2905  $pageLang = $this->getFunctionLang();
2906 
2907  switch ( $index ) {
2908  case '!':
2909  $value = '|';
2910  break;
2911  case 'currentmonth':
2912  $value = $pageLang->formatNum( MWTimestamp::getInstance( $ts )->format( 'm' ) );
2913  break;
2914  case 'currentmonth1':
2915  $value = $pageLang->formatNum( MWTimestamp::getInstance( $ts )->format( 'n' ) );
2916  break;
2917  case 'currentmonthname':
2918  $value = $pageLang->getMonthName( MWTimestamp::getInstance( $ts )->format( 'n' ) );
2919  break;
2920  case 'currentmonthnamegen':
2921  $value = $pageLang->getMonthNameGen( MWTimestamp::getInstance( $ts )->format( 'n' ) );
2922  break;
2923  case 'currentmonthabbrev':
2924  $value = $pageLang->getMonthAbbreviation( MWTimestamp::getInstance( $ts )->format( 'n' ) );
2925  break;
2926  case 'currentday':
2927  $value = $pageLang->formatNum( MWTimestamp::getInstance( $ts )->format( 'j' ) );
2928  break;
2929  case 'currentday2':
2930  $value = $pageLang->formatNum( MWTimestamp::getInstance( $ts )->format( 'd' ) );
2931  break;
2932  case 'localmonth':
2933  $value = $pageLang->formatNum( MWTimestamp::getLocalInstance( $ts )->format( 'm' ) );
2934  break;
2935  case 'localmonth1':
2936  $value = $pageLang->formatNum( MWTimestamp::getLocalInstance( $ts )->format( 'n' ) );
2937  break;
2938  case 'localmonthname':
2939  $value = $pageLang->getMonthName( MWTimestamp::getLocalInstance( $ts )->format( 'n' ) );
2940  break;
2941  case 'localmonthnamegen':
2942  $value = $pageLang->getMonthNameGen( MWTimestamp::getLocalInstance( $ts )->format( 'n' ) );
2943  break;
2944  case 'localmonthabbrev':
2945  $value = $pageLang->getMonthAbbreviation( MWTimestamp::getLocalInstance( $ts )->format( 'n' ) );
2946  break;
2947  case 'localday':
2948  $value = $pageLang->formatNum( MWTimestamp::getLocalInstance( $ts )->format( 'j' ) );
2949  break;
2950  case 'localday2':
2951  $value = $pageLang->formatNum( MWTimestamp::getLocalInstance( $ts )->format( 'd' ) );
2952  break;
2953  case 'pagename':
2954  $value = wfEscapeWikiText( $this->mTitle->getText() );
2955  break;
2956  case 'pagenamee':
2957  $value = wfEscapeWikiText( $this->mTitle->getPartialURL() );
2958  break;
2959  case 'fullpagename':
2960  $value = wfEscapeWikiText( $this->mTitle->getPrefixedText() );
2961  break;
2962  case 'fullpagenamee':
2963  $value = wfEscapeWikiText( $this->mTitle->getPrefixedURL() );
2964  break;
2965  case 'subpagename':
2966  $value = wfEscapeWikiText( $this->mTitle->getSubpageText() );
2967  break;
2968  case 'subpagenamee':
2969  $value = wfEscapeWikiText( $this->mTitle->getSubpageUrlForm() );
2970  break;
2971  case 'rootpagename':
2972  $value = wfEscapeWikiText( $this->mTitle->getRootText() );
2973  break;
2974  case 'rootpagenamee':
2975  $value = wfEscapeWikiText( wfUrlEncode( str_replace(
2976  ' ',
2977  '_',
2978  $this->mTitle->getRootText()
2979  ) ) );
2980  break;
2981  case 'basepagename':
2982  $value = wfEscapeWikiText( $this->mTitle->getBaseText() );
2983  break;
2984  case 'basepagenamee':
2985  $value = wfEscapeWikiText( wfUrlEncode( str_replace(
2986  ' ',
2987  '_',
2988  $this->mTitle->getBaseText()
2989  ) ) );
2990  break;
2991  case 'talkpagename':
2992  if ( $this->mTitle->canTalk() ) {
2993  $talkPage = $this->mTitle->getTalkPage();
2994  $value = wfEscapeWikiText( $talkPage->getPrefixedText() );
2995  } else {
2996  $value = '';
2997  }
2998  break;
2999  case 'talkpagenamee':
3000  if ( $this->mTitle->canTalk() ) {
3001  $talkPage = $this->mTitle->getTalkPage();
3002  $value = wfEscapeWikiText( $talkPage->getPrefixedURL() );
3003  } else {
3004  $value = '';
3005  }
3006  break;
3007  case 'subjectpagename':
3008  $subjPage = $this->mTitle->getSubjectPage();
3009  $value = wfEscapeWikiText( $subjPage->getPrefixedText() );
3010  break;
3011  case 'subjectpagenamee':
3012  $subjPage = $this->mTitle->getSubjectPage();
3013  $value = wfEscapeWikiText( $subjPage->getPrefixedURL() );
3014  break;
3015  case 'pageid': // requested in bug 23427
3016  $pageid = $this->getTitle()->getArticleID();
3017  if ( $pageid == 0 ) {
3018  # 0 means the page doesn't exist in the database,
3019  # which means the user is previewing a new page.
3020  # The vary-revision flag must be set, because the magic word
3021  # will have a different value once the page is saved.
3022  $this->mOutput->setFlag( 'vary-revision' );
3023  wfDebug( __METHOD__ . ": {{PAGEID}} used in a new page, setting vary-revision...\n" );
3024  }
3025  $value = $pageid ? $pageid : null;
3026  break;
3027  case 'revisionid':
3028  # Let the edit saving system know we should parse the page
3029  # *after* a revision ID has been assigned.
3030  $this->mOutput->setFlag( 'vary-revision' );
3031  wfDebug( __METHOD__ . ": {{REVISIONID}} used, setting vary-revision...\n" );
3032  $value = $this->mRevisionId;
3033  break;
3034  case 'revisionday':
3035  # Let the edit saving system know we should parse the page
3036  # *after* a revision ID has been assigned. This is for null edits.
3037  $this->mOutput->setFlag( 'vary-revision' );
3038  wfDebug( __METHOD__ . ": {{REVISIONDAY}} used, setting vary-revision...\n" );
3039  $value = intval( substr( $this->getRevisionTimestamp(), 6, 2 ) );
3040  break;
3041  case 'revisionday2':
3042  # Let the edit saving system know we should parse the page
3043  # *after* a revision ID has been assigned. This is for null edits.
3044  $this->mOutput->setFlag( 'vary-revision' );
3045  wfDebug( __METHOD__ . ": {{REVISIONDAY2}} used, setting vary-revision...\n" );
3046  $value = substr( $this->getRevisionTimestamp(), 6, 2 );
3047  break;
3048  case 'revisionmonth':
3049  # Let the edit saving system know we should parse the page
3050  # *after* a revision ID has been assigned. This is for null edits.
3051  $this->mOutput->setFlag( 'vary-revision' );
3052  wfDebug( __METHOD__ . ": {{REVISIONMONTH}} used, setting vary-revision...\n" );
3053  $value = substr( $this->getRevisionTimestamp(), 4, 2 );
3054  break;
3055  case 'revisionmonth1':
3056  # Let the edit saving system know we should parse the page
3057  # *after* a revision ID has been assigned. This is for null edits.
3058  $this->mOutput->setFlag( 'vary-revision' );
3059  wfDebug( __METHOD__ . ": {{REVISIONMONTH1}} used, setting vary-revision...\n" );
3060  $value = intval( substr( $this->getRevisionTimestamp(), 4, 2 ) );
3061  break;
3062  case 'revisionyear':
3063  # Let the edit saving system know we should parse the page
3064  # *after* a revision ID has been assigned. This is for null edits.
3065  $this->mOutput->setFlag( 'vary-revision' );
3066  wfDebug( __METHOD__ . ": {{REVISIONYEAR}} used, setting vary-revision...\n" );
3067  $value = substr( $this->getRevisionTimestamp(), 0, 4 );
3068  break;
3069  case 'revisiontimestamp':
3070  # Let the edit saving system know we should parse the page
3071  # *after* a revision ID has been assigned. This is for null edits.
3072  $this->mOutput->setFlag( 'vary-revision' );
3073  wfDebug( __METHOD__ . ": {{REVISIONTIMESTAMP}} used, setting vary-revision...\n" );
3074  $value = $this->getRevisionTimestamp();
3075  break;
3076  case 'revisionuser':
3077  # Let the edit saving system know we should parse the page
3078  # *after* a revision ID has been assigned. This is for null edits.
3079  $this->mOutput->setFlag( 'vary-revision' );
3080  wfDebug( __METHOD__ . ": {{REVISIONUSER}} used, setting vary-revision...\n" );
3081  $value = $this->getRevisionUser();
3082  break;
3083  case 'revisionsize':
3084  # Let the edit saving system know we should parse the page
3085  # *after* a revision ID has been assigned. This is for null edits.
3086  $this->mOutput->setFlag( 'vary-revision' );
3087  wfDebug( __METHOD__ . ": {{REVISIONSIZE}} used, setting vary-revision...\n" );
3088  $value = $this->getRevisionSize();
3089  break;
3090  case 'namespace':
3091  $value = str_replace( '_', ' ', $wgContLang->getNsText( $this->mTitle->getNamespace() ) );
3092  break;
3093  case 'namespacee':
3094  $value = wfUrlencode( $wgContLang->getNsText( $this->mTitle->getNamespace() ) );
3095  break;
3096  case 'namespacenumber':
3097  $value = $this->mTitle->getNamespace();
3098  break;
3099  case 'talkspace':
3100  $value = $this->mTitle->canTalk()
3101  ? str_replace( '_', ' ', $this->mTitle->getTalkNsText() )
3102  : '';
3103  break;
3104  case 'talkspacee':
3105  $value = $this->mTitle->canTalk() ? wfUrlencode( $this->mTitle->getTalkNsText() ) : '';
3106  break;
3107  case 'subjectspace':
3108  $value = str_replace( '_', ' ', $this->mTitle->getSubjectNsText() );
3109  break;
3110  case 'subjectspacee':
3111  $value = ( wfUrlencode( $this->mTitle->getSubjectNsText() ) );
3112  break;
3113  case 'currentdayname':
3114  $value = $pageLang->getWeekdayName( (int)MWTimestamp::getInstance( $ts )->format( 'w' ) + 1 );
3115  break;
3116  case 'currentyear':
3117  $value = $pageLang->formatNum( MWTimestamp::getInstance( $ts )->format( 'Y' ), true );
3118  break;
3119  case 'currenttime':
3120  $value = $pageLang->time( wfTimestamp( TS_MW, $ts ), false, false );
3121  break;
3122  case 'currenthour':
3123  $value = $pageLang->formatNum( MWTimestamp::getInstance( $ts )->format( 'H' ), true );
3124  break;
3125  case 'currentweek':
3126  # @bug 4594 PHP5 has it zero padded, PHP4 does not, cast to
3127  # int to remove the padding
3128  $value = $pageLang->formatNum( (int)MWTimestamp::getInstance( $ts )->format( 'W' ) );
3129  break;
3130  case 'currentdow':
3131  $value = $pageLang->formatNum( MWTimestamp::getInstance( $ts )->format( 'w' ) );
3132  break;
3133  case 'localdayname':
3134  $value = $pageLang->getWeekdayName(
3135  (int)MWTimestamp::getLocalInstance( $ts )->format( 'w' ) + 1
3136  );
3137  break;
3138  case 'localyear':
3139  $value = $pageLang->formatNum( MWTimestamp::getLocalInstance( $ts )->format( 'Y' ), true );
3140  break;
3141  case 'localtime':
3142  $value = $pageLang->time(
3143  MWTimestamp::getLocalInstance( $ts )->format( 'YmdHis' ),
3144  false,
3145  false
3146  );
3147  break;
3148  case 'localhour':
3149  $value = $pageLang->formatNum( MWTimestamp::getLocalInstance( $ts )->format( 'H' ), true );
3150  break;
3151  case 'localweek':
3152  # @bug 4594 PHP5 has it zero padded, PHP4 does not, cast to
3153  # int to remove the padding
3154  $value = $pageLang->formatNum( (int)MWTimestamp::getLocalInstance( $ts )->format( 'W' ) );
3155  break;
3156  case 'localdow':
3157  $value = $pageLang->formatNum( MWTimestamp::getLocalInstance( $ts )->format( 'w' ) );
3158  break;
3159  case 'numberofarticles':
3160  $value = $pageLang->formatNum( SiteStats::articles() );
3161  break;
3162  case 'numberoffiles':
3163  $value = $pageLang->formatNum( SiteStats::images() );
3164  break;
3165  case 'numberofusers':
3166  $value = $pageLang->formatNum( SiteStats::users() );
3167  break;
3168  case 'numberofactiveusers':
3169  $value = $pageLang->formatNum( SiteStats::activeUsers() );
3170  break;
3171  case 'numberofpages':
3172  $value = $pageLang->formatNum( SiteStats::pages() );
3173  break;
3174  case 'numberofadmins':
3175  $value = $pageLang->formatNum( SiteStats::numberingroup( 'sysop' ) );
3176  break;
3177  case 'numberofedits':
3178  $value = $pageLang->formatNum( SiteStats::edits() );
3179  break;
3180  case 'currenttimestamp':
3181  $value = wfTimestamp( TS_MW, $ts );
3182  break;
3183  case 'localtimestamp':
3184  $value = MWTimestamp::getLocalInstance( $ts )->format( 'YmdHis' );
3185  break;
3186  case 'currentversion':
3188  break;
3189  case 'articlepath':
3190  return $wgArticlePath;
3191  case 'sitename':
3192  return $wgSitename;
3193  case 'server':
3194  return $wgServer;
3195  case 'servername':
3196  return $wgServerName;
3197  case 'scriptpath':
3198  return $wgScriptPath;
3199  case 'stylepath':
3200  return $wgStylePath;
3201  case 'directionmark':
3202  return $pageLang->getDirMark();
3203  case 'contentlanguage':
3205  return $wgLanguageCode;
3206  case 'cascadingsources':
3208  break;
3209  default:
3210  $ret = null;
3211  Hooks::run(
3212  'ParserGetVariableValueSwitch',
3213  array( &$this, &$this->mVarCache, &$index, &$ret, &$frame )
3214  );
3215 
3216  return $ret;
3217  }
3218 
3219  if ( $index ) {
3220  $this->mVarCache[$index] = $value;
3221  }
3222 
3223  return $value;
3224  }
3225 
3231  public function initialiseVariables() {
3232  $variableIDs = MagicWord::getVariableIDs();
3233  $substIDs = MagicWord::getSubstIDs();
3234 
3235  $this->mVariables = new MagicWordArray( $variableIDs );
3236  $this->mSubstWords = new MagicWordArray( $substIDs );
3237  }
3238 
3261  public function preprocessToDom( $text, $flags = 0 ) {
3262  $dom = $this->getPreprocessor()->preprocessToObj( $text, $flags );
3263  return $dom;
3264  }
3265 
3273  public static function splitWhitespace( $s ) {
3274  $ltrimmed = ltrim( $s );
3275  $w1 = substr( $s, 0, strlen( $s ) - strlen( $ltrimmed ) );
3276  $trimmed = rtrim( $ltrimmed );
3277  $diff = strlen( $ltrimmed ) - strlen( $trimmed );
3278  if ( $diff > 0 ) {
3279  $w2 = substr( $ltrimmed, -$diff );
3280  } else {
3281  $w2 = '';
3282  }
3283  return array( $w1, $trimmed, $w2 );
3284  }
3285 
3306  public function replaceVariables( $text, $frame = false, $argsOnly = false ) {
3307  # Is there any text? Also, Prevent too big inclusions!
3308  if ( strlen( $text ) < 1 || strlen( $text ) > $this->mOptions->getMaxIncludeSize() ) {
3309  return $text;
3310  }
3311 
3312  if ( $frame === false ) {
3313  $frame = $this->getPreprocessor()->newFrame();
3314  } elseif ( !( $frame instanceof PPFrame ) ) {
3315  wfDebug( __METHOD__ . " called using plain parameters instead of "
3316  . "a PPFrame instance. Creating custom frame.\n" );
3317  $frame = $this->getPreprocessor()->newCustomFrame( $frame );
3318  }
3319 
3320  $dom = $this->preprocessToDom( $text );
3321  $flags = $argsOnly ? PPFrame::NO_TEMPLATES : 0;
3322  $text = $frame->expand( $dom, $flags );
3323 
3324  return $text;
3325  }
3326 
3334  public static function createAssocArgs( $args ) {
3335  $assocArgs = array();
3336  $index = 1;
3337  foreach ( $args as $arg ) {
3338  $eqpos = strpos( $arg, '=' );
3339  if ( $eqpos === false ) {
3340  $assocArgs[$index++] = $arg;
3341  } else {
3342  $name = trim( substr( $arg, 0, $eqpos ) );
3343  $value = trim( substr( $arg, $eqpos + 1 ) );
3344  if ( $value === false ) {
3345  $value = '';
3346  }
3347  if ( $name !== false ) {
3348  $assocArgs[$name] = $value;
3349  }
3350  }
3351  }
3352 
3353  return $assocArgs;
3354  }
3355 
3380  public function limitationWarn( $limitationType, $current = '', $max = '' ) {
3381  # does no harm if $current and $max are present but are unnecessary for the message
3382  $warning = wfMessage( "$limitationType-warning" )->numParams( $current, $max )
3383  ->inLanguage( $this->mOptions->getUserLangObj() )->text();
3384  $this->mOutput->addWarning( $warning );
3385  $this->addTrackingCategory( "$limitationType-category" );
3386  }
3387 
3400  public function braceSubstitution( $piece, $frame ) {
3401 
3402  // Flags
3403 
3404  // $text has been filled
3405  $found = false;
3406  // wiki markup in $text should be escaped
3407  $nowiki = false;
3408  // $text is HTML, armour it against wikitext transformation
3409  $isHTML = false;
3410  // Force interwiki transclusion to be done in raw mode not rendered
3411  $forceRawInterwiki = false;
3412  // $text is a DOM node needing expansion in a child frame
3413  $isChildObj = false;
3414  // $text is a DOM node needing expansion in the current frame
3415  $isLocalObj = false;
3416 
3417  # Title object, where $text came from
3418  $title = false;
3419 
3420  # $part1 is the bit before the first |, and must contain only title characters.
3421  # Various prefixes will be stripped from it later.
3422  $titleWithSpaces = $frame->expand( $piece['title'] );
3423  $part1 = trim( $titleWithSpaces );
3424  $titleText = false;
3425 
3426  # Original title text preserved for various purposes
3427  $originalTitle = $part1;
3428 
3429  # $args is a list of argument nodes, starting from index 0, not including $part1
3430  # @todo FIXME: If piece['parts'] is null then the call to getLength()
3431  # below won't work b/c this $args isn't an object
3432  $args = ( null == $piece['parts'] ) ? array() : $piece['parts'];
3433 
3434  $profileSection = null; // profile templates
3435 
3436  # SUBST
3437  if ( !$found ) {
3438 
3439  $substMatch = $this->mSubstWords->matchStartAndRemove( $part1 );
3440 
3441  # Possibilities for substMatch: "subst", "safesubst" or FALSE
3442  # Decide whether to expand template or keep wikitext as-is.
3443  if ( $this->ot['wiki'] ) {
3444  if ( $substMatch === false ) {
3445  $literal = true; # literal when in PST with no prefix
3446  } else {
3447  $literal = false; # expand when in PST with subst: or safesubst:
3448  }
3449  } else {
3450  if ( $substMatch == 'subst' ) {
3451  $literal = true; # literal when not in PST with plain subst:
3452  } else {
3453  $literal = false; # expand when not in PST with safesubst: or no prefix
3454  }
3455  }
3456  if ( $literal ) {
3457  $text = $frame->virtualBracketedImplode( '{{', '|', '}}', $titleWithSpaces, $args );
3458  $isLocalObj = true;
3459  $found = true;
3460  }
3461  }
3462 
3463  # Variables
3464  if ( !$found && $args->getLength() == 0 ) {
3465  $id = $this->mVariables->matchStartToEnd( $part1 );
3466  if ( $id !== false ) {
3467  $text = $this->getVariableValue( $id, $frame );
3468  if ( MagicWord::getCacheTTL( $id ) > -1 ) {
3469  $this->mOutput->updateCacheExpiry( MagicWord::getCacheTTL( $id ) );
3470  }
3471  $found = true;
3472  }
3473  }
3474 
3475  # MSG, MSGNW and RAW
3476  if ( !$found ) {
3477  # Check for MSGNW:
3478  $mwMsgnw = MagicWord::get( 'msgnw' );
3479  if ( $mwMsgnw->matchStartAndRemove( $part1 ) ) {
3480  $nowiki = true;
3481  } else {
3482  # Remove obsolete MSG:
3483  $mwMsg = MagicWord::get( 'msg' );
3484  $mwMsg->matchStartAndRemove( $part1 );
3485  }
3486 
3487  # Check for RAW:
3488  $mwRaw = MagicWord::get( 'raw' );
3489  if ( $mwRaw->matchStartAndRemove( $part1 ) ) {
3490  $forceRawInterwiki = true;
3491  }
3492  }
3493 
3494  # Parser functions
3495  if ( !$found ) {
3496 
3497  $colonPos = strpos( $part1, ':' );
3498  if ( $colonPos !== false ) {
3499  $func = substr( $part1, 0, $colonPos );
3500  $funcArgs = array( trim( substr( $part1, $colonPos + 1 ) ) );
3501  for ( $i = 0; $i < $args->getLength(); $i++ ) {
3502  $funcArgs[] = $args->item( $i );
3503  }
3504  try {
3505  $result = $this->callParserFunction( $frame, $func, $funcArgs );
3506  } catch ( Exception $ex ) {
3507  throw $ex;
3508  }
3509 
3510  # The interface for parser functions allows for extracting
3511  # flags into the local scope. Extract any forwarded flags
3512  # here.
3513  extract( $result );
3514  }
3515  }
3516 
3517  # Finish mangling title and then check for loops.
3518  # Set $title to a Title object and $titleText to the PDBK
3519  if ( !$found ) {
3520  $ns = NS_TEMPLATE;
3521  # Split the title into page and subpage
3522  $subpage = '';
3523  $relative = $this->maybeDoSubpageLink( $part1, $subpage );
3524  if ( $part1 !== $relative ) {
3525  $part1 = $relative;
3526  $ns = $this->mTitle->getNamespace();
3527  }
3528  $title = Title::newFromText( $part1, $ns );
3529  if ( $title ) {
3530  $titleText = $title->getPrefixedText();
3531  # Check for language variants if the template is not found
3532  if ( $this->getConverterLanguage()->hasVariants() && $title->getArticleID() == 0 ) {
3533  $this->getConverterLanguage()->findVariantLink( $part1, $title, true );
3534  }
3535  # Do recursion depth check
3536  $limit = $this->mOptions->getMaxTemplateDepth();
3537  if ( $frame->depth >= $limit ) {
3538  $found = true;
3539  $text = '<span class="error">'
3540  . wfMessage( 'parser-template-recursion-depth-warning' )
3541  ->numParams( $limit )->inContentLanguage()->text()
3542  . '</span>';
3543  }
3544  }
3545  }
3546 
3547  # Load from database
3548  if ( !$found && $title ) {
3549  $profileSection = $this->mProfiler->scopedProfileIn( $title->getPrefixedDBkey() );
3550  if ( !$title->isExternal() ) {
3551  if ( $title->isSpecialPage()
3552  && $this->mOptions->getAllowSpecialInclusion()
3553  && $this->ot['html']
3554  ) {
3555  // Pass the template arguments as URL parameters.
3556  // "uselang" will have no effect since the Language object
3557  // is forced to the one defined in ParserOptions.
3558  $pageArgs = array();
3559  $argsLength = $args->getLength();
3560  for ( $i = 0; $i < $argsLength; $i++ ) {
3561  $bits = $args->item( $i )->splitArg();
3562  if ( strval( $bits['index'] ) === '' ) {
3563  $name = trim( $frame->expand( $bits['name'], PPFrame::STRIP_COMMENTS ) );
3564  $value = trim( $frame->expand( $bits['value'] ) );
3565  $pageArgs[$name] = $value;
3566  }
3567  }
3568 
3569  // Create a new context to execute the special page
3570  $context = new RequestContext;
3571  $context->setTitle( $title );
3572  $context->setRequest( new FauxRequest( $pageArgs ) );
3573  $context->setUser( $this->getUser() );
3574  $context->setLanguage( $this->mOptions->getUserLangObj() );
3575  $ret = SpecialPageFactory::capturePath( $title, $context );
3576  if ( $ret ) {
3577  $text = $context->getOutput()->getHTML();
3578  $this->mOutput->addOutputPageMetadata( $context->getOutput() );
3579  $found = true;
3580  $isHTML = true;
3581  $this->disableCache();
3582  }
3583  } elseif ( MWNamespace::isNonincludable( $title->getNamespace() ) ) {
3584  $found = false; # access denied
3585  wfDebug( __METHOD__ . ": template inclusion denied for " .
3586  $title->getPrefixedDBkey() . "\n" );
3587  } else {
3588  list( $text, $title ) = $this->getTemplateDom( $title );
3589  if ( $text !== false ) {
3590  $found = true;
3591  $isChildObj = true;
3592  }
3593  }
3594 
3595  # If the title is valid but undisplayable, make a link to it
3596  if ( !$found && ( $this->ot['html'] || $this->ot['pre'] ) ) {
3597  $text = "[[:$titleText]]";
3598  $found = true;
3599  }
3600  } elseif ( $title->isTrans() ) {
3601  # Interwiki transclusion
3602  if ( $this->ot['html'] && !$forceRawInterwiki ) {
3603  $text = $this->interwikiTransclude( $title, 'render' );
3604  $isHTML = true;
3605  } else {
3606  $text = $this->interwikiTransclude( $title, 'raw' );
3607  # Preprocess it like a template
3608  $text = $this->preprocessToDom( $text, self::PTD_FOR_INCLUSION );
3609  $isChildObj = true;
3610  }
3611  $found = true;
3612  }
3613 
3614  # Do infinite loop check
3615  # This has to be done after redirect resolution to avoid infinite loops via redirects
3616  if ( !$frame->loopCheck( $title ) ) {
3617  $found = true;
3618  $text = '<span class="error">'
3619  . wfMessage( 'parser-template-loop-warning', $titleText )->inContentLanguage()->text()
3620  . '</span>';
3621  wfDebug( __METHOD__ . ": template loop broken at '$titleText'\n" );
3622  }
3623  }
3624 
3625  # If we haven't found text to substitute by now, we're done
3626  # Recover the source wikitext and return it
3627  if ( !$found ) {
3628  $text = $frame->virtualBracketedImplode( '{{', '|', '}}', $titleWithSpaces, $args );
3629  if ( $profileSection ) {
3630  $this->mProfiler->scopedProfileOut( $profileSection );
3631  }
3632  return array( 'object' => $text );
3633  }
3634 
3635  # Expand DOM-style return values in a child frame
3636  if ( $isChildObj ) {
3637  # Clean up argument array
3638  $newFrame = $frame->newChild( $args, $title );
3639 
3640  if ( $nowiki ) {
3641  $text = $newFrame->expand( $text, PPFrame::RECOVER_ORIG );
3642  } elseif ( $titleText !== false && $newFrame->isEmpty() ) {
3643  # Expansion is eligible for the empty-frame cache
3644  $text = $newFrame->cachedExpand( $titleText, $text );
3645  } else {
3646  # Uncached expansion
3647  $text = $newFrame->expand( $text );
3648  }
3649  }
3650  if ( $isLocalObj && $nowiki ) {
3651  $text = $frame->expand( $text, PPFrame::RECOVER_ORIG );
3652  $isLocalObj = false;
3653  }
3654 
3655  if ( $profileSection ) {
3656  $this->mProfiler->scopedProfileOut( $profileSection );
3657  }
3658 
3659  # Replace raw HTML by a placeholder
3660  if ( $isHTML ) {
3661  $text = $this->insertStripItem( $text );
3662  } elseif ( $nowiki && ( $this->ot['html'] || $this->ot['pre'] ) ) {
3663  # Escape nowiki-style return values
3664  $text = wfEscapeWikiText( $text );
3665  } elseif ( is_string( $text )
3666  && !$piece['lineStart']
3667  && preg_match( '/^(?:{\\||:|;|#|\*)/', $text )
3668  ) {
3669  # Bug 529: if the template begins with a table or block-level
3670  # element, it should be treated as beginning a new line.
3671  # This behavior is somewhat controversial.
3672  $text = "\n" . $text;
3673  }
3674 
3675  if ( is_string( $text ) && !$this->incrementIncludeSize( 'post-expand', strlen( $text ) ) ) {
3676  # Error, oversize inclusion
3677  if ( $titleText !== false ) {
3678  # Make a working, properly escaped link if possible (bug 23588)
3679  $text = "[[:$titleText]]";
3680  } else {
3681  # This will probably not be a working link, but at least it may
3682  # provide some hint of where the problem is
3683  preg_replace( '/^:/', '', $originalTitle );
3684  $text = "[[:$originalTitle]]";
3685  }
3686  $text .= $this->insertStripItem( '<!-- WARNING: template omitted, '
3687  . 'post-expand include size too large -->' );
3688  $this->limitationWarn( 'post-expand-template-inclusion' );
3689  }
3690 
3691  if ( $isLocalObj ) {
3692  $ret = array( 'object' => $text );
3693  } else {
3694  $ret = array( 'text' => $text );
3695  }
3696 
3697  return $ret;
3698  }
3699 
3719  public function callParserFunction( $frame, $function, array $args = array() ) {
3721 
3722 
3723  # Case sensitive functions
3724  if ( isset( $this->mFunctionSynonyms[1][$function] ) ) {
3725  $function = $this->mFunctionSynonyms[1][$function];
3726  } else {
3727  # Case insensitive functions
3728  $function = $wgContLang->lc( $function );
3729  if ( isset( $this->mFunctionSynonyms[0][$function] ) ) {
3730  $function = $this->mFunctionSynonyms[0][$function];
3731  } else {
3732  return array( 'found' => false );
3733  }
3734  }
3735 
3736  list( $callback, $flags ) = $this->mFunctionHooks[$function];
3737 
3738  # Workaround for PHP bug 35229 and similar
3739  if ( !is_callable( $callback ) ) {
3740  throw new MWException( "Tag hook for $function is not callable\n" );
3741  }
3742 
3743  $allArgs = array( &$this );
3744  if ( $flags & self::SFH_OBJECT_ARGS ) {
3745  # Convert arguments to PPNodes and collect for appending to $allArgs
3746  $funcArgs = array();
3747  foreach ( $args as $k => $v ) {
3748  if ( $v instanceof PPNode || $k === 0 ) {
3749  $funcArgs[] = $v;
3750  } else {
3751  $funcArgs[] = $this->mPreprocessor->newPartNodeArray( array( $k => $v ) )->item( 0 );
3752  }
3753  }
3754 
3755  # Add a frame parameter, and pass the arguments as an array
3756  $allArgs[] = $frame;
3757  $allArgs[] = $funcArgs;
3758  } else {
3759  # Convert arguments to plain text and append to $allArgs
3760  foreach ( $args as $k => $v ) {
3761  if ( $v instanceof PPNode ) {
3762  $allArgs[] = trim( $frame->expand( $v ) );
3763  } elseif ( is_int( $k ) && $k >= 0 ) {
3764  $allArgs[] = trim( $v );
3765  } else {
3766  $allArgs[] = trim( "$k=$v" );
3767  }
3768  }
3769  }
3770 
3771  $result = call_user_func_array( $callback, $allArgs );
3772 
3773  # The interface for function hooks allows them to return a wikitext
3774  # string or an array containing the string and any flags. This mungs
3775  # things around to match what this method should return.
3776  if ( !is_array( $result ) ) {
3777  $result = array(
3778  'found' => true,
3779  'text' => $result,
3780  );
3781  } else {
3782  if ( isset( $result[0] ) && !isset( $result['text'] ) ) {
3783  $result['text'] = $result[0];
3784  }
3785  unset( $result[0] );
3786  $result += array(
3787  'found' => true,
3788  );
3789  }
3790 
3791  $noparse = true;
3792  $preprocessFlags = 0;
3793  if ( isset( $result['noparse'] ) ) {
3794  $noparse = $result['noparse'];
3795  }
3796  if ( isset( $result['preprocessFlags'] ) ) {
3797  $preprocessFlags = $result['preprocessFlags'];
3798  }
3799 
3800  if ( !$noparse ) {
3801  $result['text'] = $this->preprocessToDom( $result['text'], $preprocessFlags );
3802  $result['isChildObj'] = true;
3803  }
3804 
3805  return $result;
3806  }
3807 
3816  public function getTemplateDom( $title ) {
3817  $cacheTitle = $title;
3818  $titleText = $title->getPrefixedDBkey();
3819 
3820  if ( isset( $this->mTplRedirCache[$titleText] ) ) {
3821  list( $ns, $dbk ) = $this->mTplRedirCache[$titleText];
3822  $title = Title::makeTitle( $ns, $dbk );
3823  $titleText = $title->getPrefixedDBkey();
3824  }
3825  if ( isset( $this->mTplDomCache[$titleText] ) ) {
3826  return array( $this->mTplDomCache[$titleText], $title );
3827  }
3828 
3829  # Cache miss, go to the database
3830  list( $text, $title ) = $this->fetchTemplateAndTitle( $title );
3831 
3832  if ( $text === false ) {
3833  $this->mTplDomCache[$titleText] = false;
3834  return array( false, $title );
3835  }
3836 
3837  $dom = $this->preprocessToDom( $text, self::PTD_FOR_INCLUSION );
3838  $this->mTplDomCache[$titleText] = $dom;
3839 
3840  if ( !$title->equals( $cacheTitle ) ) {
3841  $this->mTplRedirCache[$cacheTitle->getPrefixedDBkey()] =
3842  array( $title->getNamespace(), $cdb = $title->getDBkey() );
3843  }
3844 
3845  return array( $dom, $title );
3846  }
3847 
3859  public function fetchCurrentRevisionOfTitle( $title ) {
3860  $cacheKey = $title->getPrefixedDBkey();
3861  if ( !$this->currentRevisionCache ) {
3862  $this->currentRevisionCache = new MapCacheLRU( 100 );
3863  }
3864  if ( !$this->currentRevisionCache->has( $cacheKey ) ) {
3865  $this->currentRevisionCache->set( $cacheKey,
3866  // Defaults to Parser::statelessFetchRevision()
3867  call_user_func( $this->mOptions->getCurrentRevisionCallback(), $title, $this )
3868  );
3869  }
3870  return $this->currentRevisionCache->get( $cacheKey );
3871  }
3872 
3882  public static function statelessFetchRevision( $title, $parser = false ) {
3883  return Revision::newFromTitle( $title );
3884  }
3885 
3891  public function fetchTemplateAndTitle( $title ) {
3892  // Defaults to Parser::statelessFetchTemplate()
3893  $templateCb = $this->mOptions->getTemplateCallback();
3894  $stuff = call_user_func( $templateCb, $title, $this );
3895  $text = $stuff['text'];
3896  $finalTitle = isset( $stuff['finalTitle'] ) ? $stuff['finalTitle'] : $title;
3897  if ( isset( $stuff['deps'] ) ) {
3898  foreach ( $stuff['deps'] as $dep ) {
3899  $this->mOutput->addTemplate( $dep['title'], $dep['page_id'], $dep['rev_id'] );
3900  if ( $dep['title']->equals( $this->getTitle() ) ) {
3901  // If we transclude ourselves, the final result
3902  // will change based on the new version of the page
3903  $this->mOutput->setFlag( 'vary-revision' );
3904  }
3905  }
3906  }
3907  return array( $text, $finalTitle );
3908  }
3909 
3915  public function fetchTemplate( $title ) {
3916  $rv = $this->fetchTemplateAndTitle( $title );
3917  return $rv[0];
3918  }
3919 
3929  public static function statelessFetchTemplate( $title, $parser = false ) {
3930  $text = $skip = false;
3931  $finalTitle = $title;
3932  $deps = array();
3933 
3934  # Loop to fetch the article, with up to 1 redirect
3935  for ( $i = 0; $i < 2 && is_object( $title ); $i++ ) {
3936  # Give extensions a chance to select the revision instead
3937  $id = false; # Assume current
3938  Hooks::run( 'BeforeParserFetchTemplateAndtitle',
3939  array( $parser, $title, &$skip, &$id ) );
3940 
3941  if ( $skip ) {
3942  $text = false;
3943  $deps[] = array(
3944  'title' => $title,
3945  'page_id' => $title->getArticleID(),
3946  'rev_id' => null
3947  );
3948  break;
3949  }
3950  # Get the revision
3951  if ( $id ) {
3952  $rev = Revision::newFromId( $id );
3953  } elseif ( $parser ) {
3954  $rev = $parser->fetchCurrentRevisionOfTitle( $title );
3955  } else {
3956  $rev = Revision::newFromTitle( $title );
3957  }
3958  $rev_id = $rev ? $rev->getId() : 0;
3959  # If there is no current revision, there is no page
3960  if ( $id === false && !$rev ) {
3961  $linkCache = LinkCache::singleton();
3962  $linkCache->addBadLinkObj( $title );
3963  }
3964 
3965  $deps[] = array(
3966  'title' => $title,
3967  'page_id' => $title->getArticleID(),
3968  'rev_id' => $rev_id );
3969  if ( $rev && !$title->equals( $rev->getTitle() ) ) {
3970  # We fetched a rev from a different title; register it too...
3971  $deps[] = array(
3972  'title' => $rev->getTitle(),
3973  'page_id' => $rev->getPage(),
3974  'rev_id' => $rev_id );
3975  }
3976 
3977  if ( $rev ) {
3978  $content = $rev->getContent();
3979  $text = $content ? $content->getWikitextForTransclusion() : null;
3980 
3981  if ( $text === false || $text === null ) {
3982  $text = false;
3983  break;
3984  }
3985  } elseif ( $title->getNamespace() == NS_MEDIAWIKI ) {
3987  $message = wfMessage( $wgContLang->lcfirst( $title->getText() ) )->inContentLanguage();
3988  if ( !$message->exists() ) {
3989  $text = false;
3990  break;
3991  }
3992  $content = $message->content();
3993  $text = $message->plain();
3994  } else {
3995  break;
3996  }
3997  if ( !$content ) {
3998  break;
3999  }
4000  # Redirect?
4001  $finalTitle = $title;
4002  $title = $content->getRedirectTarget();
4003  }
4004  return array(
4005  'text' => $text,
4006  'finalTitle' => $finalTitle,
4007  'deps' => $deps );
4008  }
4009 
4017  public function fetchFile( $title, $options = array() ) {
4018  $res = $this->fetchFileAndTitle( $title, $options );
4019  return $res[0];
4020  }
4021 
4029  public function fetchFileAndTitle( $title, $options = array() ) {
4030  $file = $this->fetchFileNoRegister( $title, $options );
4031 
4032  $time = $file ? $file->getTimestamp() : false;
4033  $sha1 = $file ? $file->getSha1() : false;
4034  # Register the file as a dependency...
4035  $this->mOutput->addImage( $title->getDBkey(), $time, $sha1 );
4036  if ( $file && !$title->equals( $file->getTitle() ) ) {
4037  # Update fetched file title
4038  $title = $file->getTitle();
4039  $this->mOutput->addImage( $title->getDBkey(), $time, $sha1 );
4040  }
4041  return array( $file, $title );
4042  }
4043 
4054  protected function fetchFileNoRegister( $title, $options = array() ) {
4055  if ( isset( $options['broken'] ) ) {
4056  $file = false; // broken thumbnail forced by hook
4057  } elseif ( isset( $options['sha1'] ) ) { // get by (sha1,timestamp)
4058  $file = RepoGroup::singleton()->findFileFromKey( $options['sha1'], $options );
4059  } else { // get by (name,timestamp)
4060  $file = wfFindFile( $title, $options );
4061  }
4062  return $file;
4063  }
4064 
4073  public function interwikiTransclude( $title, $action ) {
4075 
4076  if ( !$wgEnableScaryTranscluding ) {
4077  return wfMessage( 'scarytranscludedisabled' )->inContentLanguage()->text();
4078  }
4079 
4080  $url = $title->getFullURL( array( 'action' => $action ) );
4081 
4082  if ( strlen( $url ) > 255 ) {
4083  return wfMessage( 'scarytranscludetoolong' )->inContentLanguage()->text();
4084  }
4085  return $this->fetchScaryTemplateMaybeFromCache( $url );
4086  }
4087 
4092  public function fetchScaryTemplateMaybeFromCache( $url ) {
4094  $dbr = wfGetDB( DB_SLAVE );
4095  $tsCond = $dbr->timestamp( time() - $wgTranscludeCacheExpiry );
4096  $obj = $dbr->selectRow( 'transcache', array( 'tc_time', 'tc_contents' ),
4097  array( 'tc_url' => $url, "tc_time >= " . $dbr->addQuotes( $tsCond ) ) );
4098  if ( $obj ) {
4099  return $obj->tc_contents;
4100  }
4101 
4102  $req = MWHttpRequest::factory( $url, array(), __METHOD__ );
4103  $status = $req->execute(); // Status object
4104  if ( $status->isOK() ) {
4105  $text = $req->getContent();
4106  } elseif ( $req->getStatus() != 200 ) {
4107  // Though we failed to fetch the content, this status is useless.
4108  return wfMessage( 'scarytranscludefailed-httpstatus' )
4109  ->params( $url, $req->getStatus() /* HTTP status */ )->inContentLanguage()->text();
4110  } else {
4111  return wfMessage( 'scarytranscludefailed', $url )->inContentLanguage()->text();
4112  }
4113 
4114  $dbw = wfGetDB( DB_MASTER );
4115  $dbw->replace( 'transcache', array( 'tc_url' ), array(
4116  'tc_url' => $url,
4117  'tc_time' => $dbw->timestamp( time() ),
4118  'tc_contents' => $text
4119  ) );
4120  return $text;
4121  }
4122 
4132  public function argSubstitution( $piece, $frame ) {
4133 
4134  $error = false;
4135  $parts = $piece['parts'];
4136  $nameWithSpaces = $frame->expand( $piece['title'] );
4137  $argName = trim( $nameWithSpaces );
4138  $object = false;
4139  $text = $frame->getArgument( $argName );
4140  if ( $text === false && $parts->getLength() > 0
4141  && ( $this->ot['html']
4142  || $this->ot['pre']
4143  || ( $this->ot['wiki'] && $frame->isTemplate() )
4144  )
4145  ) {
4146  # No match in frame, use the supplied default
4147  $object = $parts->item( 0 )->getChildren();
4148  }
4149  if ( !$this->incrementIncludeSize( 'arg', strlen( $text ) ) ) {
4150  $error = '<!-- WARNING: argument omitted, expansion size too large -->';
4151  $this->limitationWarn( 'post-expand-template-argument' );
4152  }
4153 
4154  if ( $text === false && $object === false ) {
4155  # No match anywhere
4156  $object = $frame->virtualBracketedImplode( '{{{', '|', '}}}', $nameWithSpaces, $parts );
4157  }
4158  if ( $error !== false ) {
4159  $text .= $error;
4160  }
4161  if ( $object !== false ) {
4162  $ret = array( 'object' => $object );
4163  } else {
4164  $ret = array( 'text' => $text );
4165  }
4166 
4167  return $ret;
4168  }
4169 
4185  public function extensionSubstitution( $params, $frame ) {
4186  $name = $frame->expand( $params['name'] );
4187  $attrText = !isset( $params['attr'] ) ? null : $frame->expand( $params['attr'] );
4188  $content = !isset( $params['inner'] ) ? null : $frame->expand( $params['inner'] );
4189  $marker = "{$this->mUniqPrefix}-$name-"
4190  . sprintf( '%08X', $this->mMarkerIndex++ ) . self::MARKER_SUFFIX;
4191 
4192  $isFunctionTag = isset( $this->mFunctionTagHooks[strtolower( $name )] ) &&
4193  ( $this->ot['html'] || $this->ot['pre'] );
4194  if ( $isFunctionTag ) {
4195  $markerType = 'none';
4196  } else {
4197  $markerType = 'general';
4198  }
4199  if ( $this->ot['html'] || $isFunctionTag ) {
4200  $name = strtolower( $name );
4201  $attributes = Sanitizer::decodeTagAttributes( $attrText );
4202  if ( isset( $params['attributes'] ) ) {
4203  $attributes = $attributes + $params['attributes'];
4204  }
4205 
4206  if ( isset( $this->mTagHooks[$name] ) ) {
4207  # Workaround for PHP bug 35229 and similar
4208  if ( !is_callable( $this->mTagHooks[$name] ) ) {
4209  throw new MWException( "Tag hook for $name is not callable\n" );
4210  }
4211  $output = call_user_func_array( $this->mTagHooks[$name],
4212  array( $content, $attributes, $this, $frame ) );
4213  } elseif ( isset( $this->mFunctionTagHooks[$name] ) ) {
4214  list( $callback, ) = $this->mFunctionTagHooks[$name];
4215  if ( !is_callable( $callback ) ) {
4216  throw new MWException( "Tag hook for $name is not callable\n" );
4217  }
4218 
4219  $output = call_user_func_array( $callback, array( &$this, $frame, $content, $attributes ) );
4220  } else {
4221  $output = '<span class="error">Invalid tag extension name: ' .
4222  htmlspecialchars( $name ) . '</span>';
4223  }
4224 
4225  if ( is_array( $output ) ) {
4226  # Extract flags to local scope (to override $markerType)
4227  $flags = $output;
4228  $output = $flags[0];
4229  unset( $flags[0] );
4230  extract( $flags );
4231  }
4232  } else {
4233  if ( is_null( $attrText ) ) {
4234  $attrText = '';
4235  }
4236  if ( isset( $params['attributes'] ) ) {
4237  foreach ( $params['attributes'] as $attrName => $attrValue ) {
4238  $attrText .= ' ' . htmlspecialchars( $attrName ) . '="' .
4239  htmlspecialchars( $attrValue ) . '"';
4240  }
4241  }
4242  if ( $content === null ) {
4243  $output = "<$name$attrText/>";
4244  } else {
4245  $close = is_null( $params['close'] ) ? '' : $frame->expand( $params['close'] );
4246  $output = "<$name$attrText>$content$close";
4247  }
4248  }
4249 
4250  if ( $markerType === 'none' ) {
4251  return $output;
4252  } elseif ( $markerType === 'nowiki' ) {
4253  $this->mStripState->addNoWiki( $marker, $output );
4254  } elseif ( $markerType === 'general' ) {
4255  $this->mStripState->addGeneral( $marker, $output );
4256  } else {
4257  throw new MWException( __METHOD__ . ': invalid marker type' );
4258  }
4259  return $marker;
4260  }
4261 
4269  public function incrementIncludeSize( $type, $size ) {
4270  if ( $this->mIncludeSizes[$type] + $size > $this->mOptions->getMaxIncludeSize() ) {
4271  return false;
4272  } else {
4273  $this->mIncludeSizes[$type] += $size;
4274  return true;
4275  }
4276  }
4277 
4283  public function incrementExpensiveFunctionCount() {
4284  $this->mExpensiveFunctionCount++;
4285  return $this->mExpensiveFunctionCount <= $this->mOptions->getExpensiveParserFunctionLimit();
4286  }
4287 
4296  public function doDoubleUnderscore( $text ) {
4297 
4298  # The position of __TOC__ needs to be recorded
4299  $mw = MagicWord::get( 'toc' );
4300  if ( $mw->match( $text ) ) {
4301  $this->mShowToc = true;
4302  $this->mForceTocPosition = true;
4303 
4304  # Set a placeholder. At the end we'll fill it in with the TOC.
4305  $text = $mw->replace( '<!--MWTOC-->', $text, 1 );
4306 
4307  # Only keep the first one.
4308  $text = $mw->replace( '', $text );
4309  }
4310 
4311  # Now match and remove the rest of them
4313  $this->mDoubleUnderscores = $mwa->matchAndRemove( $text );
4314 
4315  if ( isset( $this->mDoubleUnderscores['nogallery'] ) ) {
4316  $this->mOutput->mNoGallery = true;
4317  }
4318  if ( isset( $this->mDoubleUnderscores['notoc'] ) && !$this->mForceTocPosition ) {
4319  $this->mShowToc = false;
4320  }
4321  if ( isset( $this->mDoubleUnderscores['hiddencat'] )
4322  && $this->mTitle->getNamespace() == NS_CATEGORY
4323  ) {
4324  $this->addTrackingCategory( 'hidden-category-category' );
4325  }
4326  # (bug 8068) Allow control over whether robots index a page.
4327  #
4328  # @todo FIXME: Bug 14899: __INDEX__ always overrides __NOINDEX__ here! This
4329  # is not desirable, the last one on the page should win.
4330  if ( isset( $this->mDoubleUnderscores['noindex'] ) && $this->mTitle->canUseNoindex() ) {
4331  $this->mOutput->setIndexPolicy( 'noindex' );
4332  $this->addTrackingCategory( 'noindex-category' );
4333  }
4334  if ( isset( $this->mDoubleUnderscores['index'] ) && $this->mTitle->canUseNoindex() ) {
4335  $this->mOutput->setIndexPolicy( 'index' );
4336  $this->addTrackingCategory( 'index-category' );
4337  }
4338 
4339  # Cache all double underscores in the database
4340  foreach ( $this->mDoubleUnderscores as $key => $val ) {
4341  $this->mOutput->setProperty( $key, '' );
4342  }
4343 
4344  return $text;
4345  }
4346 
4352  public function addTrackingCategory( $msg ) {
4353  return $this->mOutput->addTrackingCategory( $msg, $this->mTitle );
4354  }
4355 
4372  public function formatHeadings( $text, $origText, $isMain = true ) {
4374 
4375  # Inhibit editsection links if requested in the page
4376  if ( isset( $this->mDoubleUnderscores['noeditsection'] ) ) {
4377  $maybeShowEditLink = $showEditLink = false;
4378  } else {
4379  $maybeShowEditLink = true; /* Actual presence will depend on ParserOptions option */
4380  $showEditLink = $this->mOptions->getEditSection();
4381  }
4382  if ( $showEditLink ) {
4383  $this->mOutput->setEditSectionTokens( true );
4384  }
4385 
4386  # Get all headlines for numbering them and adding funky stuff like [edit]
4387  # links - this is for later, but we need the number of headlines right now
4388  $matches = array();
4389  $numMatches = preg_match_all(
4390  '/<H(?P<level>[1-6])(?P<attrib>.*?>)\s*(?P<header>[\s\S]*?)\s*<\/H[1-6] *>/i',
4391  $text,
4392  $matches
4393  );
4394 
4395  # if there are fewer than 4 headlines in the article, do not show TOC
4396  # unless it's been explicitly enabled.
4397  $enoughToc = $this->mShowToc &&
4398  ( ( $numMatches >= 4 ) || $this->mForceTocPosition );
4399 
4400  # Allow user to stipulate that a page should have a "new section"
4401  # link added via __NEWSECTIONLINK__
4402  if ( isset( $this->mDoubleUnderscores['newsectionlink'] ) ) {
4403  $this->mOutput->setNewSection( true );
4404  }
4405 
4406  # Allow user to remove the "new section"
4407  # link via __NONEWSECTIONLINK__
4408  if ( isset( $this->mDoubleUnderscores['nonewsectionlink'] ) ) {
4409  $this->mOutput->hideNewSection( true );
4410  }
4411 
4412  # if the string __FORCETOC__ (not case-sensitive) occurs in the HTML,
4413  # override above conditions and always show TOC above first header
4414  if ( isset( $this->mDoubleUnderscores['forcetoc'] ) ) {
4415  $this->mShowToc = true;
4416  $enoughToc = true;
4417  }
4418 
4419  # headline counter
4420  $headlineCount = 0;
4421  $numVisible = 0;
4422 
4423  # Ugh .. the TOC should have neat indentation levels which can be
4424  # passed to the skin functions. These are determined here
4425  $toc = '';
4426  $full = '';
4427  $head = array();
4428  $sublevelCount = array();
4429  $levelCount = array();
4430  $level = 0;
4431  $prevlevel = 0;
4432  $toclevel = 0;
4433  $prevtoclevel = 0;
4434  $markerRegex = "{$this->mUniqPrefix}-h-(\d+)-" . self::MARKER_SUFFIX;
4435  $baseTitleText = $this->mTitle->getPrefixedDBkey();
4436  $oldType = $this->mOutputType;
4437  $this->setOutputType( self::OT_WIKI );
4438  $frame = $this->getPreprocessor()->newFrame();
4439  $root = $this->preprocessToDom( $origText );
4440  $node = $root->getFirstChild();
4441  $byteOffset = 0;
4442  $tocraw = array();
4443  $refers = array();
4444 
4445  foreach ( $matches[3] as $headline ) {
4446  $isTemplate = false;
4447  $titleText = false;
4448  $sectionIndex = false;
4449  $numbering = '';
4450  $markerMatches = array();
4451  if ( preg_match( "/^$markerRegex/", $headline, $markerMatches ) ) {
4452  $serial = $markerMatches[1];
4453  list( $titleText, $sectionIndex ) = $this->mHeadings[$serial];
4454  $isTemplate = ( $titleText != $baseTitleText );
4455  $headline = preg_replace( "/^$markerRegex\\s*/", "", $headline );
4456  }
4457 
4458  if ( $toclevel ) {
4459  $prevlevel = $level;
4460  }
4461  $level = $matches[1][$headlineCount];
4462 
4463  if ( $level > $prevlevel ) {
4464  # Increase TOC level
4465  $toclevel++;
4466  $sublevelCount[$toclevel] = 0;
4467  if ( $toclevel < $wgMaxTocLevel ) {
4468  $prevtoclevel = $toclevel;
4469  $toc .= Linker::tocIndent();
4470  $numVisible++;
4471  }
4472  } elseif ( $level < $prevlevel && $toclevel > 1 ) {
4473  # Decrease TOC level, find level to jump to
4474 
4475  for ( $i = $toclevel; $i > 0; $i-- ) {
4476  if ( $levelCount[$i] == $level ) {
4477  # Found last matching level
4478  $toclevel = $i;
4479  break;
4480  } elseif ( $levelCount[$i] < $level ) {
4481  # Found first matching level below current level
4482  $toclevel = $i + 1;
4483  break;
4484  }
4485  }
4486  if ( $i == 0 ) {
4487  $toclevel = 1;
4488  }
4489  if ( $toclevel < $wgMaxTocLevel ) {
4490  if ( $prevtoclevel < $wgMaxTocLevel ) {
4491  # Unindent only if the previous toc level was shown :p
4492  $toc .= Linker::tocUnindent( $prevtoclevel - $toclevel );
4493  $prevtoclevel = $toclevel;
4494  } else {
4495  $toc .= Linker::tocLineEnd();
4496  }
4497  }
4498  } else {
4499  # No change in level, end TOC line
4500  if ( $toclevel < $wgMaxTocLevel ) {
4501  $toc .= Linker::tocLineEnd();
4502  }
4503  }
4504 
4505  $levelCount[$toclevel] = $level;
4506 
4507  # count number of headlines for each level
4508  $sublevelCount[$toclevel]++;
4509  $dot = 0;
4510  for ( $i = 1; $i <= $toclevel; $i++ ) {
4511  if ( !empty( $sublevelCount[$i] ) ) {
4512  if ( $dot ) {
4513  $numbering .= '.';
4514  }
4515  $numbering .= $this->getTargetLanguage()->formatNum( $sublevelCount[$i] );
4516  $dot = 1;
4517  }
4518  }
4519 
4520  # The safe header is a version of the header text safe to use for links
4521 
4522  # Remove link placeholders by the link text.
4523  # <!--LINK number-->
4524  # turns into
4525  # link text with suffix
4526  # Do this before unstrip since link text can contain strip markers
4527  $safeHeadline = $this->replaceLinkHoldersText( $headline );
4528 
4529  # Avoid insertion of weird stuff like <math> by expanding the relevant sections
4530  $safeHeadline = $this->mStripState->unstripBoth( $safeHeadline );
4531 
4532  # Strip out HTML (first regex removes any tag not allowed)
4533  # Allowed tags are:
4534  # * <sup> and <sub> (bug 8393)
4535  # * <i> (bug 26375)
4536  # * <b> (r105284)
4537  # * <bdi> (bug 72884)
4538  # * <span dir="rtl"> and <span dir="ltr"> (bug 35167)
4539  #
4540  # We strip any parameter from accepted tags (second regex), except dir="rtl|ltr" from <span>,
4541  # to allow setting directionality in toc items.
4542  $tocline = preg_replace(
4543  array(
4544  '#<(?!/?(span|sup|sub|bdi|i|b)(?: [^>]*)?>).*?>#',
4545  '#<(/?(?:span(?: dir="(?:rtl|ltr)")?|sup|sub|bdi|i|b))(?: .*?)?>#'
4546  ),
4547  array( '', '<$1>' ),
4548  $safeHeadline
4549  );
4550  $tocline = trim( $tocline );
4551 
4552  # For the anchor, strip out HTML-y stuff period
4553  $safeHeadline = preg_replace( '/<.*?>/', '', $safeHeadline );
4554  $safeHeadline = Sanitizer::normalizeSectionNameWhitespace( $safeHeadline );
4555 
4556  # Save headline for section edit hint before it's escaped
4557  $headlineHint = $safeHeadline;
4558 
4559  if ( $wgExperimentalHtmlIds ) {
4560  # For reverse compatibility, provide an id that's
4561  # HTML4-compatible, like we used to.
4562  #
4563  # It may be worth noting, academically, that it's possible for
4564  # the legacy anchor to conflict with a non-legacy headline
4565  # anchor on the page. In this case likely the "correct" thing
4566  # would be to either drop the legacy anchors or make sure
4567  # they're numbered first. However, this would require people
4568  # to type in section names like "abc_.D7.93.D7.90.D7.A4"
4569  # manually, so let's not bother worrying about it.
4570  $legacyHeadline = Sanitizer::escapeId( $safeHeadline,
4571  array( 'noninitial', 'legacy' ) );
4572  $safeHeadline = Sanitizer::escapeId( $safeHeadline );
4573 
4574  if ( $legacyHeadline == $safeHeadline ) {
4575  # No reason to have both (in fact, we can't)
4576  $legacyHeadline = false;
4577  }
4578  } else {
4579  $legacyHeadline = false;
4580  $safeHeadline = Sanitizer::escapeId( $safeHeadline,
4581  'noninitial' );
4582  }
4583 
4584  # HTML names must be case-insensitively unique (bug 10721).
4585  # This does not apply to Unicode characters per
4586  # http://www.w3.org/TR/html5/infrastructure.html#case-sensitivity-and-string-comparison
4587  # @todo FIXME: We may be changing them depending on the current locale.
4588  $arrayKey = strtolower( $safeHeadline );
4589  if ( $legacyHeadline === false ) {
4590  $legacyArrayKey = false;
4591  } else {
4592  $legacyArrayKey = strtolower( $legacyHeadline );
4593  }
4594 
4595  # Create the anchor for linking from the TOC to the section
4596  $anchor = $safeHeadline;
4597  $legacyAnchor = $legacyHeadline;
4598  if ( isset( $refers[$arrayKey] ) ) {
4599  for ( $i = 2; isset( $refers["${arrayKey}_$i"] ); ++$i );
4600  $anchor .= "_$i";
4601  $refers["${arrayKey}_$i"] = true;
4602  } else {
4603  $refers[$arrayKey] = true;
4604  }
4605  if ( $legacyHeadline !== false && isset( $refers[$legacyArrayKey] ) ) {
4606  for ( $i = 2; isset( $refers["${legacyArrayKey}_$i"] ); ++$i );
4607  $legacyAnchor .= "_$i";
4608  $refers["${legacyArrayKey}_$i"] = true;
4609  } else {
4610  $refers[$legacyArrayKey] = true;
4611  }
4612 
4613  # Don't number the heading if it is the only one (looks silly)
4614  if ( count( $matches[3] ) > 1 && $this->mOptions->getNumberHeadings() ) {
4615  # the two are different if the line contains a link
4616  $headline = Html::element(
4617  'span',
4618  array( 'class' => 'mw-headline-number' ),
4619  $numbering
4620  ) . ' ' . $headline;
4621  }
4622 
4623  if ( $enoughToc && ( !isset( $wgMaxTocLevel ) || $toclevel < $wgMaxTocLevel ) ) {
4624  $toc .= Linker::tocLine( $anchor, $tocline,
4625  $numbering, $toclevel, ( $isTemplate ? false : $sectionIndex ) );
4626  }
4627 
4628  # Add the section to the section tree
4629  # Find the DOM node for this header
4630  $noOffset = ( $isTemplate || $sectionIndex === false );
4631  while ( $node && !$noOffset ) {
4632  if ( $node->getName() === 'h' ) {
4633  $bits = $node->splitHeading();
4634  if ( $bits['i'] == $sectionIndex ) {
4635  break;
4636  }
4637  }
4638  $byteOffset += mb_strlen( $this->mStripState->unstripBoth(
4639  $frame->expand( $node, PPFrame::RECOVER_ORIG ) ) );
4640  $node = $node->getNextSibling();
4641  }
4642  $tocraw[] = array(
4643  'toclevel' => $toclevel,
4644  'level' => $level,
4645  'line' => $tocline,
4646  'number' => $numbering,
4647  'index' => ( $isTemplate ? 'T-' : '' ) . $sectionIndex,
4648  'fromtitle' => $titleText,
4649  'byteoffset' => ( $noOffset ? null : $byteOffset ),
4650  'anchor' => $anchor,
4651  );
4652 
4653  # give headline the correct <h#> tag
4654  if ( $maybeShowEditLink && $sectionIndex !== false ) {
4655  // Output edit section links as markers with styles that can be customized by skins
4656  if ( $isTemplate ) {
4657  # Put a T flag in the section identifier, to indicate to extractSections()
4658  # that sections inside <includeonly> should be counted.
4659  $editsectionPage = $titleText;
4660  $editsectionSection = "T-$sectionIndex";
4661  $editsectionContent = null;
4662  } else {
4663  $editsectionPage = $this->mTitle->getPrefixedText();
4664  $editsectionSection = $sectionIndex;
4665  $editsectionContent = $headlineHint;
4666  }
4667  // We use a bit of pesudo-xml for editsection markers. The
4668  // language converter is run later on. Using a UNIQ style marker
4669  // leads to the converter screwing up the tokens when it
4670  // converts stuff. And trying to insert strip tags fails too. At
4671  // this point all real inputted tags have already been escaped,
4672  // so we don't have to worry about a user trying to input one of
4673  // these markers directly. We use a page and section attribute
4674  // to stop the language converter from converting these
4675  // important bits of data, but put the headline hint inside a
4676  // content block because the language converter is supposed to
4677  // be able to convert that piece of data.
4678  // Gets replaced with html in ParserOutput::getText
4679  $editlink = '<mw:editsection page="' . htmlspecialchars( $editsectionPage );
4680  $editlink .= '" section="' . htmlspecialchars( $editsectionSection ) . '"';
4681  if ( $editsectionContent !== null ) {
4682  $editlink .= '>' . $editsectionContent . '</mw:editsection>';
4683  } else {
4684  $editlink .= '/>';
4685  }
4686  } else {
4687  $editlink = '';
4688  }
4689  $head[$headlineCount] = Linker::makeHeadline( $level,
4690  $matches['attrib'][$headlineCount], $anchor, $headline,
4691  $editlink, $legacyAnchor );
4692 
4693  $headlineCount++;
4694  }
4695 
4696  $this->setOutputType( $oldType );
4697 
4698  # Never ever show TOC if no headers
4699  if ( $numVisible < 1 ) {
4700  $enoughToc = false;
4701  }
4702 
4703  if ( $enoughToc ) {
4704  if ( $prevtoclevel > 0 && $prevtoclevel < $wgMaxTocLevel ) {
4705  $toc .= Linker::tocUnindent( $prevtoclevel - 1 );
4706  }
4707  $toc = Linker::tocList( $toc, $this->mOptions->getUserLangObj() );
4708  $this->mOutput->setTOCHTML( $toc );
4709  $toc = self::TOC_START . $toc . self::TOC_END;
4710  $this->mOutput->addModules( 'mediawiki.toc' );
4711  }
4712 
4713  if ( $isMain ) {
4714  $this->mOutput->setSections( $tocraw );
4715  }
4716 
4717  # split up and insert constructed headlines
4718  $blocks = preg_split( '/<H[1-6].*?>[\s\S]*?<\/H[1-6]>/i', $text );
4719  $i = 0;
4720 
4721  // build an array of document sections
4722  $sections = array();
4723  foreach ( $blocks as $block ) {
4724  // $head is zero-based, sections aren't.
4725  if ( empty( $head[$i - 1] ) ) {
4726  $sections[$i] = $block;
4727  } else {
4728  $sections[$i] = $head[$i - 1] . $block;
4729  }
4730 
4741  Hooks::run( 'ParserSectionCreate', array( $this, $i, &$sections[$i], $showEditLink ) );
4742 
4743  $i++;
4744  }
4745 
4746  if ( $enoughToc && $isMain && !$this->mForceTocPosition ) {
4747  // append the TOC at the beginning
4748  // Top anchor now in skin
4749  $sections[0] = $sections[0] . $toc . "\n";
4750  }
4751 
4752  $full .= join( '', $sections );
4753 
4754  if ( $this->mForceTocPosition ) {
4755  return str_replace( '<!--MWTOC-->', $toc, $full );
4756  } else {
4757  return $full;
4758  }
4759  }
4760 
4772  public function preSaveTransform( $text, Title $title, User $user,
4773  ParserOptions $options, $clearState = true
4774  ) {
4775  if ( $clearState ) {
4776  $magicScopeVariable = $this->lock();
4777  }
4778  $this->startParse( $title, $options, self::OT_WIKI, $clearState );
4779  $this->setUser( $user );
4781  $pairs = array(
4782  "\r\n" => "\n",
4783  "\r" => "\n",
4784  );
4785  $text = str_replace( array_keys( $pairs ), array_values( $pairs ), $text );
4786  if ( $options->getPreSaveTransform() ) {
4787  $text = $this->pstPass2( $text, $user );
4788  }
4789  $text = $this->mStripState->unstripBoth( $text );
4790 
4791  $this->setUser( null ); #Reset
4792 
4793  return $text;
4794  }
4795 
4804  private function pstPass2( $text, $user ) {
4806 
4807  # Note: This is the timestamp saved as hardcoded wikitext to
4808  # the database, we use $wgContLang here in order to give
4809  # everyone the same signature and use the default one rather
4810  # than the one selected in each user's preferences.
4811  # (see also bug 12815)
4812  $ts = $this->mOptions->getTimestamp();
4814  $ts = $timestamp->format( 'YmdHis' );
4815  $tzMsg = $timestamp->format( 'T' ); # might vary on DST changeover!
4816 
4817  # Allow translation of timezones through wiki. format() can return
4818  # whatever crap the system uses, localised or not, so we cannot
4819  # ship premade translations.
4820  $key = 'timezone-' . strtolower( trim( $tzMsg ) );
4821  $msg = wfMessage( $key )->inContentLanguage();
4822  if ( $msg->exists() ) {
4823  $tzMsg = $msg->text();
4824  }
4825 
4826  $d = $wgContLang->timeanddate( $ts, false, false ) . " ($tzMsg)";
4827 
4828  # Variable replacement
4829  # Because mOutputType is OT_WIKI, this will only process {{subst:xxx}} type tags
4830  $text = $this->replaceVariables( $text );
4831 
4832  # This works almost by chance, as the replaceVariables are done before the getUserSig(),
4833  # which may corrupt this parser instance via its wfMessage()->text() call-
4834 
4835  # Signatures
4836  $sigText = $this->getUserSig( $user );
4837  $text = strtr( $text, array(
4838  '~~~~~' => $d,
4839  '~~~~' => "$sigText $d",
4840  '~~~' => $sigText
4841  ) );
4842 
4843  # Context links ("pipe tricks"): [[|name]] and [[name (context)|]]
4844  $tc = '[' . Title::legalChars() . ']';
4845  $nc = '[ _0-9A-Za-z\x80-\xff-]'; # Namespaces can use non-ascii!
4846 
4847  // [[ns:page (context)|]]
4848  $p1 = "/\[\[(:?$nc+:|:|)($tc+?)( ?\\($tc+\\))\\|]]/";
4849  // [[ns:page(context)|]] (double-width brackets, added in r40257)
4850  $p4 = "/\[\[(:?$nc+:|:|)($tc+?)( ?($tc+))\\|]]/";
4851  // [[ns:page (context), context|]] (using either single or double-width comma)
4852  $p3 = "/\[\[(:?$nc+:|:|)($tc+?)( ?\\($tc+\\)|)((?:, |,)$tc+|)\\|]]/";
4853  // [[|page]] (reverse pipe trick: add context from page title)
4854  $p2 = "/\[\[\\|($tc+)]]/";
4855 
4856  # try $p1 first, to turn "[[A, B (C)|]]" into "[[A, B (C)|A, B]]"
4857  $text = preg_replace( $p1, '[[\\1\\2\\3|\\2]]', $text );
4858  $text = preg_replace( $p4, '[[\\1\\2\\3|\\2]]', $text );
4859  $text = preg_replace( $p3, '[[\\1\\2\\3\\4|\\2]]', $text );
4860 
4861  $t = $this->mTitle->getText();
4862  $m = array();
4863  if ( preg_match( "/^($nc+:|)$tc+?( \\($tc+\\))$/", $t, $m ) ) {
4864  $text = preg_replace( $p2, "[[$m[1]\\1$m[2]|\\1]]", $text );
4865  } elseif ( preg_match( "/^($nc+:|)$tc+?(, $tc+|)$/", $t, $m ) && "$m[1]$m[2]" != '' ) {
4866  $text = preg_replace( $p2, "[[$m[1]\\1$m[2]|\\1]]", $text );
4867  } else {
4868  # if there's no context, don't bother duplicating the title
4869  $text = preg_replace( $p2, '[[\\1]]', $text );
4870  }
4871 
4872  # Trim trailing whitespace
4873  $text = rtrim( $text );
4874 
4875  return $text;
4876  }
4877 
4892  public function getUserSig( &$user, $nickname = false, $fancySig = null ) {
4894 
4895  $username = $user->getName();
4896 
4897  # If not given, retrieve from the user object.
4898  if ( $nickname === false ) {
4899  $nickname = $user->getOption( 'nickname' );
4900  }
4901 
4902  if ( is_null( $fancySig ) ) {
4903  $fancySig = $user->getBoolOption( 'fancysig' );
4904  }
4905 
4906  $nickname = $nickname == null ? $username : $nickname;
4907 
4908  if ( mb_strlen( $nickname ) > $wgMaxSigChars ) {
4909  $nickname = $username;
4910  wfDebug( __METHOD__ . ": $username has overlong signature.\n" );
4911  } elseif ( $fancySig !== false ) {
4912  # Sig. might contain markup; validate this
4913  if ( $this->validateSig( $nickname ) !== false ) {
4914  # Validated; clean up (if needed) and return it
4915  return $this->cleanSig( $nickname, true );
4916  } else {
4917  # Failed to validate; fall back to the default
4918  $nickname = $username;
4919  wfDebug( __METHOD__ . ": $username has bad XML tags in signature.\n" );
4920  }
4921  }
4922 
4923  # Make sure nickname doesnt get a sig in a sig
4924  $nickname = self::cleanSigInSig( $nickname );
4925 
4926  # If we're still here, make it a link to the user page
4927  $userText = wfEscapeWikiText( $username );
4928  $nickText = wfEscapeWikiText( $nickname );
4929  $msgName = $user->isAnon() ? 'signature-anon' : 'signature';
4930 
4931  return wfMessage( $msgName, $userText, $nickText )->inContentLanguage()
4932  ->title( $this->getTitle() )->text();
4933  }
4934 
4941  public function validateSig( $text ) {
4942  return Xml::isWellFormedXmlFragment( $text ) ? $text : false;
4943  }
4944 
4955  public function cleanSig( $text, $parsing = false ) {
4956  if ( !$parsing ) {
4957  global $wgTitle;
4958  $magicScopeVariable = $this->lock();
4959  $this->startParse( $wgTitle, new ParserOptions, self::OT_PREPROCESS, true );
4960  }
4961 
4962  # Option to disable this feature
4963  if ( !$this->mOptions->getCleanSignatures() ) {
4964  return $text;
4965  }
4966 
4967  # @todo FIXME: Regex doesn't respect extension tags or nowiki
4968  # => Move this logic to braceSubstitution()
4969  $substWord = MagicWord::get( 'subst' );
4970  $substRegex = '/\{\{(?!(?:' . $substWord->getBaseRegex() . '))/x' . $substWord->getRegexCase();
4971  $substText = '{{' . $substWord->getSynonym( 0 );
4972 
4973  $text = preg_replace( $substRegex, $substText, $text );
4974  $text = self::cleanSigInSig( $text );
4975  $dom = $this->preprocessToDom( $text );
4976  $frame = $this->getPreprocessor()->newFrame();
4977  $text = $frame->expand( $dom );
4978 
4979  if ( !$parsing ) {
4980  $text = $this->mStripState->unstripBoth( $text );
4981  }
4983  return $text;
4984  }
4985 
4992  public static function cleanSigInSig( $text ) {
4993  $text = preg_replace( '/~{3,5}/', '', $text );
4994  return $text;
4995  }
4996 
5006  public function startExternalParse( Title $title = null, ParserOptions $options,
5007  $outputType, $clearState = true
5008  ) {
5009  $this->startParse( $title, $options, $outputType, $clearState );
5010  }
5011 
5018  private function startParse( Title $title = null, ParserOptions $options,
5019  $outputType, $clearState = true
5020  ) {
5021  $this->setTitle( $title );
5022  $this->mOptions = $options;
5023  $this->setOutputType( $outputType );
5024  if ( $clearState ) {
5025  $this->clearState();
5026  }
5027  }
5028 
5037  public function transformMsg( $text, $options, $title = null ) {
5038  static $executing = false;
5039 
5040  # Guard against infinite recursion
5041  if ( $executing ) {
5042  return $text;
5043  }
5044  $executing = true;
5045 
5046  if ( !$title ) {
5047  global $wgTitle;
5048  $title = $wgTitle;
5049  }
5050 
5051  $text = $this->preprocess( $text, $title, $options );
5052 
5053  $executing = false;
5054  return $text;
5055  }
5056 
5081  public function setHook( $tag, $callback ) {
5082  $tag = strtolower( $tag );
5083  if ( preg_match( '/[<>\r\n]/', $tag, $m ) ) {
5084  throw new MWException( "Invalid character {$m[0]} in setHook('$tag', ...) call" );
5085  }
5086  $oldVal = isset( $this->mTagHooks[$tag] ) ? $this->mTagHooks[$tag] : null;
5087  $this->mTagHooks[$tag] = $callback;
5088  if ( !in_array( $tag, $this->mStripList ) ) {
5089  $this->mStripList[] = $tag;
5090  }
5091 
5092  return $oldVal;
5093  }
5094 
5112  public function setTransparentTagHook( $tag, $callback ) {
5113  $tag = strtolower( $tag );
5114  if ( preg_match( '/[<>\r\n]/', $tag, $m ) ) {
5115  throw new MWException( "Invalid character {$m[0]} in setTransparentHook('$tag', ...) call" );
5116  }
5117  $oldVal = isset( $this->mTransparentTagHooks[$tag] ) ? $this->mTransparentTagHooks[$tag] : null;
5118  $this->mTransparentTagHooks[$tag] = $callback;
5119 
5120  return $oldVal;
5121  }
5122 
5126  public function clearTagHooks() {
5127  $this->mTagHooks = array();
5128  $this->mFunctionTagHooks = array();
5129  $this->mStripList = $this->mDefaultStripList;
5130  }
5131 
5175  public function setFunctionHook( $id, $callback, $flags = 0 ) {
5177 
5178  $oldVal = isset( $this->mFunctionHooks[$id] ) ? $this->mFunctionHooks[$id][0] : null;
5179  $this->mFunctionHooks[$id] = array( $callback, $flags );
5180 
5181  # Add to function cache
5182  $mw = MagicWord::get( $id );
5183  if ( !$mw ) {
5184  throw new MWException( __METHOD__ . '() expecting a magic word identifier.' );
5185  }
5186 
5187  $synonyms = $mw->getSynonyms();
5188  $sensitive = intval( $mw->isCaseSensitive() );
5190  foreach ( $synonyms as $syn ) {
5191  # Case
5192  if ( !$sensitive ) {
5193  $syn = $wgContLang->lc( $syn );
5194  }
5195  # Add leading hash
5196  if ( !( $flags & self::SFH_NO_HASH ) ) {
5197  $syn = '#' . $syn;
5198  }
5199  # Remove trailing colon
5200  if ( substr( $syn, -1, 1 ) === ':' ) {
5201  $syn = substr( $syn, 0, -1 );
5202  }
5203  $this->mFunctionSynonyms[$sensitive][$syn] = $id;
5204  }
5205  return $oldVal;
5206  }
5207 
5213  public function getFunctionHooks() {
5214  return array_keys( $this->mFunctionHooks );
5215  }
5216 
5227  public function setFunctionTagHook( $tag, $callback, $flags ) {
5228  $tag = strtolower( $tag );
5229  if ( preg_match( '/[<>\r\n]/', $tag, $m ) ) {
5230  throw new MWException( "Invalid character {$m[0]} in setFunctionTagHook('$tag', ...) call" );
5231  }
5232  $old = isset( $this->mFunctionTagHooks[$tag] ) ?
5233  $this->mFunctionTagHooks[$tag] : null;
5234  $this->mFunctionTagHooks[$tag] = array( $callback, $flags );
5235 
5236  if ( !in_array( $tag, $this->mStripList ) ) {
5237  $this->mStripList[] = $tag;
5238  }
5239 
5240  return $old;
5241  }
5242 
5251  public function replaceLinkHolders( &$text, $options = 0 ) {
5252  $this->mLinkHolders->replace( $text );
5253  }
5254 
5262  public function replaceLinkHoldersText( $text ) {
5263  return $this->mLinkHolders->replaceText( $text );
5264  }
5265 
5279  public function renderImageGallery( $text, $params ) {
5280 
5281  $mode = false;
5282  if ( isset( $params['mode'] ) ) {
5283  $mode = $params['mode'];
5284  }
5285 
5286  try {
5287  $ig = ImageGalleryBase::factory( $mode );
5288  } catch ( Exception $e ) {
5289  // If invalid type set, fallback to default.
5290  $ig = ImageGalleryBase::factory( false );
5291  }
5292 
5293  $ig->setContextTitle( $this->mTitle );
5294  $ig->setShowBytes( false );
5295  $ig->setShowFilename( false );
5296  $ig->setParser( $this );
5297  $ig->setHideBadImages();
5298  $ig->setAttributes( Sanitizer::validateTagAttributes( $params, 'table' ) );
5299 
5300  if ( isset( $params['showfilename'] ) ) {
5301  $ig->setShowFilename( true );
5302  } else {
5303  $ig->setShowFilename( false );
5304  }
5305  if ( isset( $params['caption'] ) ) {
5306  $caption = $params['caption'];
5307  $caption = htmlspecialchars( $caption );
5308  $caption = $this->replaceInternalLinks( $caption );
5309  $ig->setCaptionHtml( $caption );
5310  }
5311  if ( isset( $params['perrow'] ) ) {
5312  $ig->setPerRow( $params['perrow'] );
5313  }
5314  if ( isset( $params['widths'] ) ) {
5315  $ig->setWidths( $params['widths'] );
5316  }
5317  if ( isset( $params['heights'] ) ) {
5318  $ig->setHeights( $params['heights'] );
5319  }
5320  $ig->setAdditionalOptions( $params );
5321 
5322  Hooks::run( 'BeforeParserrenderImageGallery', array( &$this, &$ig ) );
5323 
5324  $lines = StringUtils::explode( "\n", $text );
5325  foreach ( $lines as $line ) {
5326  # match lines like these:
5327  # Image:someimage.jpg|This is some image
5328  $matches = array();
5329  preg_match( "/^([^|]+)(\\|(.*))?$/", $line, $matches );
5330  # Skip empty lines
5331  if ( count( $matches ) == 0 ) {
5332  continue;
5333  }
5334 
5335  if ( strpos( $matches[0], '%' ) !== false ) {
5336  $matches[1] = rawurldecode( $matches[1] );
5337  }
5338  $title = Title::newFromText( $matches[1], NS_FILE );
5339  if ( is_null( $title ) ) {
5340  # Bogus title. Ignore these so we don't bomb out later.
5341  continue;
5342  }
5343 
5344  # We need to get what handler the file uses, to figure out parameters.
5345  # Note, a hook can overide the file name, and chose an entirely different
5346  # file (which potentially could be of a different type and have different handler).
5347  $options = array();
5348  $descQuery = false;
5349  Hooks::run( 'BeforeParserFetchFileAndTitle',
5350  array( $this, $title, &$options, &$descQuery ) );
5351  # Don't register it now, as ImageGallery does that later.
5352  $file = $this->fetchFileNoRegister( $title, $options );
5353  $handler = $file ? $file->getHandler() : false;
5354 
5355  $paramMap = array(
5356  'img_alt' => 'gallery-internal-alt',
5357  'img_link' => 'gallery-internal-link',
5358  );
5359  if ( $handler ) {
5360  $paramMap = $paramMap + $handler->getParamMap();
5361  // We don't want people to specify per-image widths.
5362  // Additionally the width parameter would need special casing anyhow.
5363  unset( $paramMap['img_width'] );
5364  }
5365 
5366  $mwArray = new MagicWordArray( array_keys( $paramMap ) );
5367 
5368  $label = '';
5369  $alt = '';
5370  $link = '';
5371  $handlerOptions = array();
5372  if ( isset( $matches[3] ) ) {
5373  // look for an |alt= definition while trying not to break existing
5374  // captions with multiple pipes (|) in it, until a more sensible grammar
5375  // is defined for images in galleries
5376 
5377  // FIXME: Doing recursiveTagParse at this stage, and the trim before
5378  // splitting on '|' is a bit odd, and different from makeImage.
5379  $matches[3] = $this->recursiveTagParse( trim( $matches[3] ) );
5380  $parameterMatches = StringUtils::explode( '|', $matches[3] );
5381 
5382  foreach ( $parameterMatches as $parameterMatch ) {
5383  list( $magicName, $match ) = $mwArray->matchVariableStartToEnd( $parameterMatch );
5384  if ( $magicName ) {
5385  $paramName = $paramMap[$magicName];
5386 
5387  switch ( $paramName ) {
5388  case 'gallery-internal-alt':
5389  $alt = $this->stripAltText( $match, false );
5390  break;
5391  case 'gallery-internal-link':
5392  $linkValue = strip_tags( $this->replaceLinkHoldersText( $match ) );
5393  $chars = self::EXT_LINK_URL_CLASS;
5394  $prots = $this->mUrlProtocols;
5395  //check to see if link matches an absolute url, if not then it must be a wiki link.
5396  if ( preg_match( "/^($prots)$chars+$/u", $linkValue ) ) {
5397  $link = $linkValue;
5398  } else {
5399  $localLinkTitle = Title::newFromText( $linkValue );
5400  if ( $localLinkTitle !== null ) {
5401  $link = $localLinkTitle->getLinkURL();
5402  }
5403  }
5404  break;
5405  default:
5406  // Must be a handler specific parameter.
5407  if ( $handler->validateParam( $paramName, $match ) ) {
5408  $handlerOptions[$paramName] = $match;
5409  } else {
5410  // Guess not. Append it to the caption.
5411  wfDebug( "$parameterMatch failed parameter validation\n" );
5412  $label .= '|' . $parameterMatch;
5413  }
5414  }
5415 
5416  } else {
5417  // concatenate all other pipes
5418  $label .= '|' . $parameterMatch;
5419  }
5420  }
5421  // remove the first pipe
5422  $label = substr( $label, 1 );
5423  }
5424 
5425  $ig->add( $title, $label, $alt, $link, $handlerOptions );
5426  }
5427  $html = $ig->toHTML();
5428  Hooks::run( 'AfterParserFetchFileAndTitle', array( $this, $ig, &$html ) );
5429  return $html;
5430  }
5431 
5436  public function getImageParams( $handler ) {
5437  if ( $handler ) {
5438  $handlerClass = get_class( $handler );
5439  } else {
5440  $handlerClass = '';
5441  }
5442  if ( !isset( $this->mImageParams[$handlerClass] ) ) {
5443  # Initialise static lists
5444  static $internalParamNames = array(
5445  'horizAlign' => array( 'left', 'right', 'center', 'none' ),
5446  'vertAlign' => array( 'baseline', 'sub', 'super', 'top', 'text-top', 'middle',
5447  'bottom', 'text-bottom' ),
5448  'frame' => array( 'thumbnail', 'manualthumb', 'framed', 'frameless',
5449  'upright', 'border', 'link', 'alt', 'class' ),
5450  );
5451  static $internalParamMap;
5452  if ( !$internalParamMap ) {
5453  $internalParamMap = array();
5454  foreach ( $internalParamNames as $type => $names ) {
5455  foreach ( $names as $name ) {
5456  $magicName = str_replace( '-', '_', "img_$name" );
5457  $internalParamMap[$magicName] = array( $type, $name );
5458  }
5459  }
5460  }
5461 
5462  # Add handler params
5463  $paramMap = $internalParamMap;
5464  if ( $handler ) {
5465  $handlerParamMap = $handler->getParamMap();
5466  foreach ( $handlerParamMap as $magic => $paramName ) {
5467  $paramMap[$magic] = array( 'handler', $paramName );
5468  }
5469  }
5470  $this->mImageParams[$handlerClass] = $paramMap;
5471  $this->mImageParamsMagicArray[$handlerClass] = new MagicWordArray( array_keys( $paramMap ) );
5472  }
5473  return array( $this->mImageParams[$handlerClass], $this->mImageParamsMagicArray[$handlerClass] );
5474  }
5475 
5484  public function makeImage( $title, $options, $holders = false ) {
5485  # Check if the options text is of the form "options|alt text"
5486  # Options are:
5487  # * thumbnail make a thumbnail with enlarge-icon and caption, alignment depends on lang
5488  # * left no resizing, just left align. label is used for alt= only
5489  # * right same, but right aligned
5490  # * none same, but not aligned
5491  # * ___px scale to ___ pixels width, no aligning. e.g. use in taxobox
5492  # * center center the image
5493  # * frame Keep original image size, no magnify-button.
5494  # * framed Same as "frame"
5495  # * frameless like 'thumb' but without a frame. Keeps user preferences for width
5496  # * upright reduce width for upright images, rounded to full __0 px
5497  # * border draw a 1px border around the image
5498  # * alt Text for HTML alt attribute (defaults to empty)
5499  # * class Set a class for img node
5500  # * link Set the target of the image link. Can be external, interwiki, or local
5501  # vertical-align values (no % or length right now):
5502  # * baseline
5503  # * sub
5504  # * super
5505  # * top
5506  # * text-top
5507  # * middle
5508  # * bottom
5509  # * text-bottom
5510 
5511  $parts = StringUtils::explode( "|", $options );
5512 
5513  # Give extensions a chance to select the file revision for us
5514  $options = array();
5515  $descQuery = false;
5516  Hooks::run( 'BeforeParserFetchFileAndTitle',
5517  array( $this, $title, &$options, &$descQuery ) );
5518  # Fetch and register the file (file title may be different via hooks)
5519  list( $file, $title ) = $this->fetchFileAndTitle( $title, $options );
5520 
5521  # Get parameter map
5522  $handler = $file ? $file->getHandler() : false;
5523 
5524  list( $paramMap, $mwArray ) = $this->getImageParams( $handler );
5525 
5526  if ( !$file ) {
5527  $this->addTrackingCategory( 'broken-file-category' );
5528  }
5529 
5530  # Process the input parameters
5531  $caption = '';
5532  $params = array( 'frame' => array(), 'handler' => array(),
5533  'horizAlign' => array(), 'vertAlign' => array() );
5534  $seenformat = false;
5535  foreach ( $parts as $part ) {
5536  $part = trim( $part );
5537  list( $magicName, $value ) = $mwArray->matchVariableStartToEnd( $part );
5538  $validated = false;
5539  if ( isset( $paramMap[$magicName] ) ) {
5540  list( $type, $paramName ) = $paramMap[$magicName];
5541 
5542  # Special case; width and height come in one variable together
5543  if ( $type === 'handler' && $paramName === 'width' ) {
5544  $parsedWidthParam = $this->parseWidthParam( $value );
5545  if ( isset( $parsedWidthParam['width'] ) ) {
5546  $width = $parsedWidthParam['width'];
5547  if ( $handler->validateParam( 'width', $width ) ) {
5548  $params[$type]['width'] = $width;
5549  $validated = true;
5550  }
5551  }
5552  if ( isset( $parsedWidthParam['height'] ) ) {
5553  $height = $parsedWidthParam['height'];
5554  if ( $handler->validateParam( 'height', $height ) ) {
5555  $params[$type]['height'] = $height;
5556  $validated = true;
5557  }
5558  }
5559  # else no validation -- bug 13436
5560  } else {
5561  if ( $type === 'handler' ) {
5562  # Validate handler parameter
5563  $validated = $handler->validateParam( $paramName, $value );
5564  } else {
5565  # Validate internal parameters
5566  switch ( $paramName ) {
5567  case 'manualthumb':
5568  case 'alt':
5569  case 'class':
5570  # @todo FIXME: Possibly check validity here for
5571  # manualthumb? downstream behavior seems odd with
5572  # missing manual thumbs.
5573  $validated = true;
5574  $value = $this->stripAltText( $value, $holders );
5575  break;
5576  case 'link':
5577  $chars = self::EXT_LINK_URL_CLASS;
5578  $prots = $this->mUrlProtocols;
5579  if ( $value === '' ) {
5580  $paramName = 'no-link';
5581  $value = true;
5582  $validated = true;
5583  } elseif ( preg_match( "/^((?i)$prots)/", $value ) ) {
5584  if ( preg_match( "/^((?i)$prots)$chars+$/u", $value, $m ) ) {
5585  $paramName = 'link-url';
5586  $this->mOutput->addExternalLink( $value );
5587  if ( $this->mOptions->getExternalLinkTarget() ) {
5588  $params[$type]['link-target'] = $this->mOptions->getExternalLinkTarget();
5589  }
5590  $validated = true;
5591  }
5592  } else {
5593  $linkTitle = Title::newFromText( $value );
5594  if ( $linkTitle ) {
5595  $paramName = 'link-title';
5596  $value = $linkTitle;
5597  $this->mOutput->addLink( $linkTitle );
5598  $validated = true;
5599  }
5600  }
5601  break;
5602  case 'frameless':
5603  case 'framed':
5604  case 'thumbnail':
5605  // use first appearing option, discard others.
5606  $validated = ! $seenformat;
5607  $seenformat = true;
5608  break;
5609  default:
5610  # Most other things appear to be empty or numeric...
5611  $validated = ( $value === false || is_numeric( trim( $value ) ) );
5612  }
5613  }
5614 
5615  if ( $validated ) {
5616  $params[$type][$paramName] = $value;
5617  }
5618  }
5619  }
5620  if ( !$validated ) {
5621  $caption = $part;
5622  }
5623  }
5624 
5625  # Process alignment parameters
5626  if ( $params['horizAlign'] ) {
5627  $params['frame']['align'] = key( $params['horizAlign'] );
5628  }
5629  if ( $params['vertAlign'] ) {
5630  $params['frame']['valign'] = key( $params['vertAlign'] );
5631  }
5632 
5633  $params['frame']['caption'] = $caption;
5634 
5635  # Will the image be presented in a frame, with the caption below?
5636  $imageIsFramed = isset( $params['frame']['frame'] )
5637  || isset( $params['frame']['framed'] )
5638  || isset( $params['frame']['thumbnail'] )
5639  || isset( $params['frame']['manualthumb'] );
5640 
5641  # In the old days, [[Image:Foo|text...]] would set alt text. Later it
5642  # came to also set the caption, ordinary text after the image -- which
5643  # makes no sense, because that just repeats the text multiple times in
5644  # screen readers. It *also* came to set the title attribute.
5645  #
5646  # Now that we have an alt attribute, we should not set the alt text to
5647  # equal the caption: that's worse than useless, it just repeats the
5648  # text. This is the framed/thumbnail case. If there's no caption, we
5649  # use the unnamed parameter for alt text as well, just for the time be-
5650  # ing, if the unnamed param is set and the alt param is not.
5651  #
5652  # For the future, we need to figure out if we want to tweak this more,
5653  # e.g., introducing a title= parameter for the title; ignoring the un-
5654  # named parameter entirely for images without a caption; adding an ex-
5655  # plicit caption= parameter and preserving the old magic unnamed para-
5656  # meter for BC; ...
5657  if ( $imageIsFramed ) { # Framed image
5658  if ( $caption === '' && !isset( $params['frame']['alt'] ) ) {
5659  # No caption or alt text, add the filename as the alt text so
5660  # that screen readers at least get some description of the image
5661  $params['frame']['alt'] = $title->getText();
5662  }
5663  # Do not set $params['frame']['title'] because tooltips don't make sense
5664  # for framed images
5665  } else { # Inline image
5666  if ( !isset( $params['frame']['alt'] ) ) {
5667  # No alt text, use the "caption" for the alt text
5668  if ( $caption !== '' ) {
5669  $params['frame']['alt'] = $this->stripAltText( $caption, $holders );
5670  } else {
5671  # No caption, fall back to using the filename for the
5672  # alt text
5673  $params['frame']['alt'] = $title->getText();
5674  }
5675  }
5676  # Use the "caption" for the tooltip text
5677  $params['frame']['title'] = $this->stripAltText( $caption, $holders );
5678  }
5679 
5680  Hooks::run( 'ParserMakeImageParams', array( $title, $file, &$params, $this ) );
5681 
5682  # Linker does the rest
5683  $time = isset( $options['time'] ) ? $options['time'] : false;
5684  $ret = Linker::makeImageLink( $this, $title, $file, $params['frame'], $params['handler'],
5685  $time, $descQuery, $this->mOptions->getThumbSize() );
5686 
5687  # Give the handler a chance to modify the parser object
5688  if ( $handler ) {
5689  $handler->parserTransformHook( $this, $file );
5690  }
5691 
5692  return $ret;
5693  }
5694 
5700  protected function stripAltText( $caption, $holders ) {
5701  # Strip bad stuff out of the title (tooltip). We can't just use
5702  # replaceLinkHoldersText() here, because if this function is called
5703  # from replaceInternalLinks2(), mLinkHolders won't be up-to-date.
5704  if ( $holders ) {
5705  $tooltip = $holders->replaceText( $caption );
5706  } else {
5707  $tooltip = $this->replaceLinkHoldersText( $caption );
5708  }
5709 
5710  # make sure there are no placeholders in thumbnail attributes
5711  # that are later expanded to html- so expand them now and
5712  # remove the tags
5713  $tooltip = $this->mStripState->unstripBoth( $tooltip );
5714  $tooltip = Sanitizer::stripAllTags( $tooltip );
5715 
5716  return $tooltip;
5717  }
5718 
5723  public function disableCache() {
5724  wfDebug( "Parser output marked as uncacheable.\n" );
5725  if ( !$this->mOutput ) {
5726  throw new MWException( __METHOD__ .
5727  " can only be called when actually parsing something" );
5728  }
5729  $this->mOutput->setCacheTime( -1 ); // old style, for compatibility
5730  $this->mOutput->updateCacheExpiry( 0 ); // new style, for consistency
5731  }
5732 
5741  public function attributeStripCallback( &$text, $frame = false ) {
5742  $text = $this->replaceVariables( $text, $frame );
5743  $text = $this->mStripState->unstripBoth( $text );
5744  return $text;
5745  }
5752  public function getTags() {
5753  return array_merge(
5754  array_keys( $this->mTransparentTagHooks ),
5755  array_keys( $this->mTagHooks ),
5756  array_keys( $this->mFunctionTagHooks )
5757  );
5758  }
5759 
5770  public function replaceTransparentTags( $text ) {
5771  $matches = array();
5772  $elements = array_keys( $this->mTransparentTagHooks );
5773  $text = self::extractTagsAndParams( $elements, $text, $matches, $this->mUniqPrefix );
5774  $replacements = array();
5775 
5776  foreach ( $matches as $marker => $data ) {
5777  list( $element, $content, $params, $tag ) = $data;
5778  $tagName = strtolower( $element );
5779  if ( isset( $this->mTransparentTagHooks[$tagName] ) ) {
5780  $output = call_user_func_array(
5781  $this->mTransparentTagHooks[$tagName],
5782  array( $content, $params, $this )
5783  );
5784  } else {
5785  $output = $tag;
5786  }
5787  $replacements[$marker] = $output;
5788  }
5789  return strtr( $text, $replacements );
5790  }
5791 
5821  private function extractSections( $text, $sectionId, $mode, $newText = '' ) {
5822  global $wgTitle; # not generally used but removes an ugly failure mode
5823 
5824  $magicScopeVariable = $this->lock();
5825  $this->startParse( $wgTitle, new ParserOptions, self::OT_PLAIN, true );
5826  $outText = '';
5827  $frame = $this->getPreprocessor()->newFrame();
5828 
5829  # Process section extraction flags
5830  $flags = 0;
5831  $sectionParts = explode( '-', $sectionId );
5832  $sectionIndex = array_pop( $sectionParts );
5833  foreach ( $sectionParts as $part ) {
5834  if ( $part === 'T' ) {
5835  $flags |= self::PTD_FOR_INCLUSION;
5836  }
5837  }
5838 
5839  # Check for empty input
5840  if ( strval( $text ) === '' ) {
5841  # Only sections 0 and T-0 exist in an empty document
5842  if ( $sectionIndex == 0 ) {
5843  if ( $mode === 'get' ) {
5844  return '';
5845  } else {
5846  return $newText;
5847  }
5848  } else {
5849  if ( $mode === 'get' ) {
5850  return $newText;
5851  } else {
5852  return $text;
5853  }
5854  }
5855  }
5856 
5857  # Preprocess the text
5858  $root = $this->preprocessToDom( $text, $flags );
5859 
5860  # <h> nodes indicate section breaks
5861  # They can only occur at the top level, so we can find them by iterating the root's children
5862  $node = $root->getFirstChild();
5863 
5864  # Find the target section
5865  if ( $sectionIndex == 0 ) {
5866  # Section zero doesn't nest, level=big
5867  $targetLevel = 1000;
5868  } else {
5869  while ( $node ) {
5870  if ( $node->getName() === 'h' ) {
5871  $bits = $node->splitHeading();
5872  if ( $bits['i'] == $sectionIndex ) {
5873  $targetLevel = $bits['level'];
5874  break;
5875  }
5876  }
5877  if ( $mode === 'replace' ) {
5878  $outText .= $frame->expand( $node, PPFrame::RECOVER_ORIG );
5879  }
5880  $node = $node->getNextSibling();
5881  }
5882  }
5883 
5884  if ( !$node ) {
5885  # Not found
5886  if ( $mode === 'get' ) {
5887  return $newText;
5888  } else {
5889  return $text;
5890  }
5891  }
5892 
5893  # Find the end of the section, including nested sections
5894  do {
5895  if ( $node->getName() === 'h' ) {
5896  $bits = $node->splitHeading();
5897  $curLevel = $bits['level'];
5898  if ( $bits['i'] != $sectionIndex && $curLevel <= $targetLevel ) {
5899  break;
5900  }
5901  }
5902  if ( $mode === 'get' ) {
5903  $outText .= $frame->expand( $node, PPFrame::RECOVER_ORIG );
5904  }
5905  $node = $node->getNextSibling();
5906  } while ( $node );
5907 
5908  # Write out the remainder (in replace mode only)
5909  if ( $mode === 'replace' ) {
5910  # Output the replacement text
5911  # Add two newlines on -- trailing whitespace in $newText is conventionally
5912  # stripped by the editor, so we need both newlines to restore the paragraph gap
5913  # Only add trailing whitespace if there is newText
5914  if ( $newText != "" ) {
5915  $outText .= $newText . "\n\n";
5916  }
5917 
5918  while ( $node ) {
5919  $outText .= $frame->expand( $node, PPFrame::RECOVER_ORIG );
5920  $node = $node->getNextSibling();
5921  }
5922  }
5923 
5924  if ( is_string( $outText ) ) {
5925  # Re-insert stripped tags
5926  $outText = rtrim( $this->mStripState->unstripBoth( $outText ) );
5927  }
5928 
5929  return $outText;
5930  }
5931 
5946  public function getSection( $text, $sectionId, $defaultText = '' ) {
5947  return $this->extractSections( $text, $sectionId, 'get', $defaultText );
5948  }
5949 
5962  public function replaceSection( $oldText, $sectionId, $newText ) {
5963  return $this->extractSections( $oldText, $sectionId, 'replace', $newText );
5964  }
5965 
5971  public function getRevisionId() {
5972  return $this->mRevisionId;
5973  }
5974 
5981  public function getRevisionObject() {
5982  if ( !is_null( $this->mRevisionObject ) ) {
5983  return $this->mRevisionObject;
5984  }
5985  if ( is_null( $this->mRevisionId ) ) {
5986  return null;
5987  }
5988 
5989  $rev = call_user_func(
5990  $this->mOptions->getCurrentRevisionCallback(), $this->getTitle(), $this
5991  );
5992 
5993  # If the parse is for a new revision, then the callback should have
5994  # already been set to force the object and should match mRevisionId.
5995  # If not, try to fetch by mRevisionId for sanity.
5996  if ( $rev && $rev->getId() != $this->mRevisionId ) {
5997  $rev = Revision::newFromId( $this->mRevisionId );
5998  }
5999 
6000  $this->mRevisionObject = $rev;
6001 
6002  return $this->mRevisionObject;
6003  }
6004 
6010  public function getRevisionTimestamp() {
6011  if ( is_null( $this->mRevisionTimestamp ) ) {
6013 
6014  $revObject = $this->getRevisionObject();
6015  $timestamp = $revObject ? $revObject->getTimestamp() : wfTimestampNow();
6016 
6017  # The cryptic '' timezone parameter tells to use the site-default
6018  # timezone offset instead of the user settings.
6019  #
6020  # Since this value will be saved into the parser cache, served
6021  # to other users, and potentially even used inside links and such,
6022  # it needs to be consistent for all visitors.
6023  $this->mRevisionTimestamp = $wgContLang->userAdjust( $timestamp, '' );
6024 
6025  }
6026  return $this->mRevisionTimestamp;
6027  }
6028 
6034  public function getRevisionUser() {
6035  if ( is_null( $this->mRevisionUser ) ) {
6036  $revObject = $this->getRevisionObject();
6037 
6038  # if this template is subst: the revision id will be blank,
6039  # so just use the current user's name
6040  if ( $revObject ) {
6041  $this->mRevisionUser = $revObject->getUserText();
6042  } elseif ( $this->ot['wiki'] || $this->mOptions->getIsPreview() ) {
6043  $this->mRevisionUser = $this->getUser()->getName();
6044  }
6045  }
6046  return $this->mRevisionUser;
6047  }
6048 
6054  public function getRevisionSize() {
6055  if ( is_null( $this->mRevisionSize ) ) {
6056  $revObject = $this->getRevisionObject();
6057 
6058  # if this variable is subst: the revision id will be blank,
6059  # so just use the parser input size, because the own substituation
6060  # will change the size.
6061  if ( $revObject ) {
6062  $this->mRevisionSize = $revObject->getSize();
6063  } elseif ( $this->ot['wiki'] || $this->mOptions->getIsPreview() ) {
6064  $this->mRevisionSize = $this->mInputSize;
6065  }
6066  }
6067  return $this->mRevisionSize;
6068  }
6069 
6075  public function setDefaultSort( $sort ) {
6076  $this->mDefaultSort = $sort;
6077  $this->mOutput->setProperty( 'defaultsort', $sort );
6078  }
6079 
6090  public function getDefaultSort() {
6091  if ( $this->mDefaultSort !== false ) {
6092  return $this->mDefaultSort;
6093  } else {
6094  return '';
6095  }
6096  }
6097 
6104  public function getCustomDefaultSort() {
6105  return $this->mDefaultSort;
6106  }
6107 
6117  public function guessSectionNameFromWikiText( $text ) {
6118  # Strip out wikitext links(they break the anchor)
6119  $text = $this->stripSectionName( $text );
6121  return '#' . Sanitizer::escapeId( $text, 'noninitial' );
6122  }
6123 
6132  public function guessLegacySectionNameFromWikiText( $text ) {
6133  # Strip out wikitext links(they break the anchor)
6134  $text = $this->stripSectionName( $text );
6136  return '#' . Sanitizer::escapeId( $text, array( 'noninitial', 'legacy' ) );
6137  }
6138 
6153  public function stripSectionName( $text ) {
6154  # Strip internal link markup
6155  $text = preg_replace( '/\[\[:?([^[|]+)\|([^[]+)\]\]/', '$2', $text );
6156  $text = preg_replace( '/\[\[:?([^[]+)\|?\]\]/', '$1', $text );
6157 
6158  # Strip external link markup
6159  # @todo FIXME: Not tolerant to blank link text
6160  # I.E. [https://www.mediawiki.org] will render as [1] or something depending
6161  # on how many empty links there are on the page - need to figure that out.
6162  $text = preg_replace( '/\[(?i:' . $this->mUrlProtocols . ')([^ ]+?) ([^[]+)\]/', '$2', $text );
6163 
6164  # Parse wikitext quotes (italics & bold)
6165  $text = $this->doQuotes( $text );
6166 
6167  # Strip HTML tags
6168  $text = StringUtils::delimiterReplace( '<', '>', '', $text );
6169  return $text;
6170  }
6171 
6182  public function testSrvus( $text, Title $title, ParserOptions $options, $outputType = self::OT_HTML ) {
6183  $magicScopeVariable = $this->lock();
6184  $this->startParse( $title, $options, $outputType, true );
6185 
6186  $text = $this->replaceVariables( $text );
6187  $text = $this->mStripState->unstripBoth( $text );
6188  $text = Sanitizer::removeHTMLtags( $text );
6189  return $text;
6190  }
6191 
6198  public function testPst( $text, Title $title, ParserOptions $options ) {
6199  return $this->preSaveTransform( $text, $title, $options->getUser(), $options );
6200  }
6201 
6208  public function testPreprocess( $text, Title $title, ParserOptions $options ) {
6209  return $this->testSrvus( $text, $title, $options, self::OT_PREPROCESS );
6210  }
6211 
6228  public function markerSkipCallback( $s, $callback ) {
6229  $i = 0;
6230  $out = '';
6231  while ( $i < strlen( $s ) ) {
6232  $markerStart = strpos( $s, $this->mUniqPrefix, $i );
6233  if ( $markerStart === false ) {
6234  $out .= call_user_func( $callback, substr( $s, $i ) );
6235  break;
6236  } else {
6237  $out .= call_user_func( $callback, substr( $s, $i, $markerStart - $i ) );
6238  $markerEnd = strpos( $s, self::MARKER_SUFFIX, $markerStart );
6239  if ( $markerEnd === false ) {
6240  $out .= substr( $s, $markerStart );
6241  break;
6242  } else {
6243  $markerEnd += strlen( self::MARKER_SUFFIX );
6244  $out .= substr( $s, $markerStart, $markerEnd - $markerStart );
6245  $i = $markerEnd;
6246  }
6247  }
6248  }
6249  return $out;
6250  }
6251 
6258  public function killMarkers( $text ) {
6259  return $this->mStripState->killMarkers( $text );
6260  }
6261 
6278  public function serializeHalfParsedText( $text ) {
6279  $data = array(
6280  'text' => $text,
6281  'version' => self::HALF_PARSED_VERSION,
6282  'stripState' => $this->mStripState->getSubState( $text ),
6283  'linkHolders' => $this->mLinkHolders->getSubArray( $text )
6284  );
6285  return $data;
6286  }
6287 
6303  public function unserializeHalfParsedText( $data ) {
6304  if ( !isset( $data['version'] ) || $data['version'] != self::HALF_PARSED_VERSION ) {
6305  throw new MWException( __METHOD__ . ': invalid version' );
6306  }
6307 
6308  # First, extract the strip state.
6309  $texts = array( $data['text'] );
6310  $texts = $this->mStripState->merge( $data['stripState'], $texts );
6311 
6312  # Now renumber links
6313  $texts = $this->mLinkHolders->mergeForeign( $data['linkHolders'], $texts );
6314 
6315  # Should be good to go.
6316  return $texts[0];
6317  }
6318 
6328  public function isValidHalfParsedText( $data ) {
6329  return isset( $data['version'] ) && $data['version'] == self::HALF_PARSED_VERSION;
6330  }
6331 
6340  public function parseWidthParam( $value ) {
6341  $parsedWidthParam = array();
6342  if ( $value === '' ) {
6343  return $parsedWidthParam;
6344  }
6345  $m = array();
6346  # (bug 13500) In both cases (width/height and width only),
6347  # permit trailing "px" for backward compatibility.
6348  if ( preg_match( '/^([0-9]*)x([0-9]*)\s*(?:px)?\s*$/', $value, $m ) ) {
6349  $width = intval( $m[1] );
6350  $height = intval( $m[2] );
6351  $parsedWidthParam['width'] = $width;
6352  $parsedWidthParam['height'] = $height;
6353  } elseif ( preg_match( '/^[0-9]*\s*(?:px)?\s*$/', $value ) ) {
6354  $width = intval( $value );
6355  $parsedWidthParam['width'] = $width;
6356  }
6357  return $parsedWidthParam;
6358  }
6359 
6369  protected function lock() {
6370  if ( $this->mInParse ) {
6371  throw new MWException( "Parser state cleared while parsing. "
6372  . "Did you call Parser::parse recursively?" );
6373  }
6374  $this->mInParse = true;
6375 
6376  $that = $this;
6377  $recursiveCheck = new ScopedCallback( function() use ( $that ) {
6378  $that->mInParse = false;
6379  } );
6380 
6381  return $recursiveCheck;
6382  }
6383 
6394  public static function stripOuterParagraph( $html ) {
6395  $m = array();
6396  if ( preg_match( '/^<p>(.*)\n?<\/p>\n?$/sU', $html, $m ) ) {
6397  if ( strpos( $m[1], '</p>' ) === false ) {
6398  $html = $m[1];
6399  }
6400  }
6401 
6402  return $html;
6403  }
6404 
6415  public function getFreshParser() {
6417  if ( $this->mInParse ) {
6418  return new $wgParserConf['class']( $wgParserConf );
6419  } else {
6420  return $this;
6421  }
6422  }
6423 }
setTitle($t)
Set the context title.
Definition: Parser.php:703
$mAutonumber
Definition: Parser.php:154
$mPPNodeCount
Definition: Parser.php:164
replaceInternalLinks2(&$s)
Process [[ ]] wikilinks (RIL)
Definition: Parser.php:1983
static getVariableIDs()
Get an array of parser variable IDs.
Definition: MagicWord.php:262
null means default in associative array form
Definition: hooks.txt:1712
null means default in associative array with keys and values unescaped Should be merged with default with a value of false meaning to suppress the attribute in associative array with keys and values unescaped noclasses just before the function returns a value If you return an< a > element with HTML attributes $attribs and contents $html will be returned If you return $ret will be returned and may include noclasses & $html
Definition: hooks.txt:1712
static tocLineEnd()
End a Table Of Contents line.
Definition: Linker.php:1686
deferred txt A few of the database updates required by various functions here can be deferred until after the result page is displayed to the user For updating the view updating the linked to tables after a etc PHP does not yet have any way to tell the server to actually return and disconnect while still running these but it might have such a feature in the future We handle these by creating a deferred update object and putting those objects on a global list
Definition: deferred.txt:11
static decodeTagAttributes($text)
Return an associative array of attribute names and values from a partial tag string.
Definition: Sanitizer.php:1186
$mTplRedirCache
Definition: Parser.php:166
static tocList($toc, $lang=false)
Wraps the TOC in a table and provides the hide/collapse javascript.
Definition: Linker.php:1697
static makeExternalLink($url, $text, $escape=true, $linktype= '', $attribs=array(), $title=null)
Make an external link.
Definition: Linker.php:1056
although this is appropriate in some e g manual creation of blank tables prior to an import Most of the PHP scripts need to be run from the command line Prior to doing so
Definition: README:1
getBoolOption($oname)
Get the user's current setting for a given option, as a boolean value.
Definition: User.php:2635
return true to allow those checks to and false if checking is done remove or add to the links of a group of changes in EnhancedChangesList use this to change the tables headers temp or archived zone change it to an object instance and return false override the list derivative used the name of the old file when set the default code will be skipped true if there is text before this autocomment true if there is text after this autocomment add a value to it if you want to add a cookie that have to vary cache options can modify $query
Definition: hooks.txt:1273
const OT_PREPROCESS
Definition: Defines.php:227
$mLastSection
Definition: Parser.php:159
$mDoubleUnderscores
Definition: Parser.php:166
Group all the pieces relevant to the context of a request into one instance.
namespace and then decline to actually register it file or subcat img or subcat RecentChangesLinked and Watchlist RecentChangesLinked and Watchlist e g Watchlist removed from all revisions and log entries to which it was applied This gives extensions a chance to take it off their books as the deletion has already been partly carried out by this point or something similar the user will be unable to create the tag set and then return false from the hook function Ensure you consume the ChangeTagAfterDelete hook to carry out custom deletion actions instead of letting the login form give the generic error message that the account does not exist For when the account has been renamed or deleted or an array to pass a message key and parameters but no entry for that model exists in $wgContentHandlers if desired whether it is OK to use $contentModel on $title Handler functions that modify $ok should generally return false to prevent further hooks from further modifying $ok called by AbstractContent::getParserOutput May be used to override the normal model specific rendering of page content $content
Definition: hooks.txt:952
or
false for read/write
MapCacheLRU null $currentRevisionCache
Definition: Parser.php:204
getArticleID($flags=0)
Get the article ID for this Title from the link cache, adding it if necessary.
Definition: Title.php:3115
$wgSitename
Name of the site.
recursivePreprocess($text, $frame=false)
Recursive parser entry point that can be called from an extension tag hook.
Definition: Parser.php:627
getText()
Get the text form (spaces not underscores) of the main part.
Definition: Title.php:866
replaceExternalLinks($text)
Replace external links (REL)
Definition: Parser.php:1721
static isNonincludable($index)
It is not possible to use pages from this namespace as template?
nextLinkID()
Definition: Parser.php:793
const SPACE_NOT_NL
Definition: Parser.php:94
static replaceUnusualEscapes($url)
Replace unusual escape codes in a URL with their equivalent characters.
Definition: Parser.php:1838
Allows to change the fields on the form that will be generated $name
Definition: hooks.txt:325
Apache License January AND DISTRIBUTION Definitions License shall mean the terms and conditions for use
doHeadings($text)
Parse headers and return html.
Definition: Parser.php:1498
const OT_PLAIN
Definition: Parser.php:115
static removeHTMLtags($text, $processCallback=null, $args=array(), $extratags=array(), $removetags=array())
Cleans up HTML, removes dangerous tags and attributes, and removes HTML comments. ...
Definition: Sanitizer.php:372
static isWellFormedXmlFragment($text)
Check if a string is a well-formed XML fragment.
Definition: Xml.php:742
const OT_WIKI
Definition: Parser.php:112
User $mUser
Definition: Parser.php:171
initialiseVariables()
initialise the magic variables (like CURRENTMONTHNAME) and substitution modifiers ...
Definition: Parser.php:3207
null means default in associative array with keys and values unescaped Should be merged with default with a value of false meaning to suppress the attribute in associative array with keys and values unescaped noclasses & $ret
Definition: hooks.txt:1712
=Architecture==Two class hierarchies are used to provide the functionality associated with the different content models:*Content interface(and AbstractContent base class) define functionality that acts on the concrete content of a page, and *ContentHandler base class provides functionality specific to a content model, but not acting on concrete content.The most important function of ContentHandler is to act as a factory for the appropriate implementation of Content.These Content objects are to be used by MediaWiki everywhere, instead of passing page content around as text.All manipulation and analysis of page content must be done via the appropriate methods of the Content object.For each content model, a subclass of ContentHandler has to be registered with $wgContentHandlers.The ContentHandler object for a given content model can be obtained using ContentHandler::getForModelID($id).Also Title, WikiPage and Revision now have getContentHandler() methods for convenience.ContentHandler objects are singletons that provide functionality specific to the content type, but not directly acting on the content of some page.ContentHandler::makeEmptyContent() and ContentHandler::unserializeContent() can be used to create a Content object of the appropriate type.However, it is recommended to instead use WikiPage::getContent() resp.Revision::getContent() to get a page's content as a Content object.These two methods should be the ONLY way in which page content is accessed.Another important function of ContentHandler objects is to define custom action handlers for a content model, see ContentHandler::getActionOverrides().This is similar to what WikiPage::getActionOverrides() was already doing.==Serialization==With the ContentHandler facility, page content no longer has to be text based.Objects implementing the Content interface are used to represent and handle the content internally.For storage and data exchange, each content model supports at least one serialization format via ContentHandler::serializeContent($content).The list of supported formats for a given content model can be accessed using ContentHandler::getSupportedFormats().Content serialization formats are identified using MIME type like strings.The following formats are built in:*text/x-wiki-wikitext *text/javascript-for js pages *text/css-for css pages *text/plain-for future use, e.g.with plain text messages.*text/html-for future use, e.g.with plain html messages.*application/vnd.php.serialized-for future use with the api and for extensions *application/json-for future use with the api, and for use by extensions *application/xml-for future use with the api, and for use by extensions In PHP, use the corresponding CONTENT_FORMAT_XXX constant.Note that when using the API to access page content, especially action=edit, action=parse and action=query &prop=revisions, the model and format of the content should always be handled explicitly.Without that information, interpretation of the provided content is not reliable.The same applies to XML dumps generated via maintenance/dumpBackup.php or Special:Export.Also note that the API will provide encapsulated, serialized content-so if the API was called with format=json, and contentformat is also json(or rather, application/json), the page content is represented as a string containing an escaped json structure.Extensions that use JSON to serialize some types of page content may provide specialized API modules that allow access to that content in a more natural form.==Compatibility==The ContentHandler facility is introduced in a way that should allow all existing code to keep functioning at least for pages that contain wikitext or other text based content.However, a number of functions and hooks have been deprecated in favor of new versions that are aware of the page's content model, and will now generate warnings when used.Most importantly, the following functions have been deprecated:*Revisions::getText() and Revisions::getRawText() is deprecated in favor Revisions::getContent()*WikiPage::getText() is deprecated in favor WikiPage::getContent() Also, the old Article::getContent()(which returns text) is superceded by Article::getContentObject().However, both methods should be avoided since they do not provide clean access to the page's actual content.For instance, they may return a system message for non-existing pages.Use WikiPage::getContent() instead.Code that relies on a textual representation of the page content should eventually be rewritten.However, ContentHandler::getContentText() provides a stop-gap that can be used to get text for a page.Its behavior is controlled by $wgContentHandlerTextFallback it
Set options of the Parser.
static tidy($text)
Interface with html tidy, used if $wgUseTidy = true.
Definition: MWTidy.php:127
namespace and then decline to actually register it file or subcat img or subcat RecentChangesLinked and Watchlist RecentChangesLinked and Watchlist e g Watchlist removed from all revisions and log entries to which it was applied This gives extensions a chance to take it off their books as the deletion has already been partly carried out by this point or something similar the user will be unable to create the tag set $status
Definition: hooks.txt:952
wfHostname()
Fetch server name for use in error reporting etc.
getFunctionLang()
Get a language object for use in parser functions such as {{FORMATNUM:}}.
Definition: Parser.php:808
processing should stop and the error should be shown to the user * false
Definition: hooks.txt:188
uniqPrefix()
Accessor for mUniqPrefix.
Definition: Parser.php:685
const TOC_START
Definition: Parser.php:121
Title($x=null)
Accessor/mutator for the Title object.
Definition: Parser.php:732
SectionProfiler $mProfiler
Definition: Parser.php:211
$wgEnableScaryTranscluding
Enable interwiki transcluding.
$sort
wfDebug($text, $dest= 'all', array $context=array())
Sends a line to the debug log if enabled or, optionally, to a comment in output.
There are three types of nodes:
see documentation in includes Linker php for Linker::makeImageLink & $time
Definition: hooks.txt:1536
has been added to your &Future changes to this page and its associated Talk page will be listed there
$mHeadings
Definition: Parser.php:166
$value
const COLON_STATE_TAGSLASH
Definition: Parser.php:101
static makeSelfLinkObj($nt, $html= '', $query= '', $trail= '', $prefix= '')
Make appropriate markup for a link to the current article.
Definition: Linker.php:401
const NS_SPECIAL
Definition: Defines.php:58
clearState()
Clear Parser state.
Definition: Parser.php:294
$mFirstCall
Definition: Parser.php:136
getPreloadText($text, Title $title, ParserOptions $options, $params=array())
Process the wikitext for the "?preload=" feature.
Definition: Parser.php:646
Options($x=null)
Accessor/mutator for the ParserOptions object.
Definition: Parser.php:786
it s the revision text itself In either if gzip is the revision text is gzipped $flags
Definition: hooks.txt:2385
replaceLinkHolders(&$text, $options=0)
Definition: Parser.php:5227
static activeUsers()
Definition: SiteStats.php:164
$mLinkID
Definition: Parser.php:163
doQuotes($text)
Helper function for doAllQuotes()
Definition: Parser.php:1531
preprocessToDom($text, $flags=0)
Preprocess some wikitext and return the document tree.
Definition: Parser.php:3237
getPrefixedText()
Get the prefixed title with spaces.
Definition: Title.php:1408
limitationWarn($limitationType, $current= '', $max= '')
Warn the user when a parser limitation is reached Will warn at most once the user per limitation type...
Definition: Parser.php:3356
static cleanUrl($url)
Definition: Sanitizer.php:1752
wfUrlencode($s)
We want some things to be included as literal characters in our title URLs for prettiness, which urlencode encodes by default.
static newFromText($text, $defaultNamespace=NS_MAIN)
Create a new Title from text, such as what one would find in a link.
Definition: Title.php:233
$mGeneratedPPNodeCount
Definition: Parser.php:164
Represents a title within MediaWiki.
Definition: Title.php:33
static getRandomString()
Get a random string.
Definition: Parser.php:666
$mRevisionId
Definition: Parser.php:184
static stripAllTags($text)
Take a fragment of (potentially invalid) HTML and return a version with any tags removed, encoded as plain text.
Definition: Sanitizer.php:1719
when a variable name is used in a it is silently declared as a new local masking the global
Definition: design.txt:93
doBlockLevels($text, $linestart)
#@-
Definition: Parser.php:2483
$wgArticlePath
Definition: img_auth.php:45
OutputType($x=null)
Accessor/mutator for the output type.
Definition: Parser.php:758
const NS_TEMPLATE
Definition: Defines.php:79
const COLON_STATE_COMMENTDASHDASH
Definition: Parser.php:104
recursiveTagParse($text, $frame=false)
Half-parse wikitext to half-parsed HTML.
Definition: Parser.php:562
const NO_ARGS
Allows to change the fields on the form that will be generated just before adding its HTML to parser output $parser
Definition: hooks.txt:325
MagicWordArray $mVariables
Definition: Parser.php:141
static validateTagAttributes($attribs, $element)
Take an array of attribute names and values and normalize or discard illegal values for the given ele...
Definition: Sanitizer.php:694
const SFH_NO_HASH
Definition: Parser.php:82
const COLON_STATE_COMMENTDASH
Definition: Parser.php:103
globals will be eliminated from MediaWiki replaced by an application object which would be passed to constructors Whether that would be an convenient solution remains to be but certainly PHP makes such object oriented programming models easier than they were in previous versions For the time being MediaWiki programmers will have to work in an environment with some global context At the time of globals were initialised on startup by MediaWiki of these were configuration which are documented in DefaultSettings php There is no comprehensive documentation for the remaining however some of the most important ones are listed below They are typically initialised either in index php or in Setup php For a description of the see design txt $wgTitle Title object created from the request URL $wgOut OutputPage object for HTTP response $wgUser User object for the user associated with the current request $wgLang Language object selected by user preferences $wgContLang Language object associated with the wiki being viewed $wgParser Parser object Parser extensions register their hooks here $wgRequest WebRequest object
Definition: globals.txt:25
wfRandomString($length=32)
Get a random string containing a number of pseudo-random hex characters.
$mForceTocPosition
Definition: Parser.php:168
preprocess($text, Title $title=null, ParserOptions $options, $revid=null, $frame=false)
Expand templates and variables in the text, producing valid, static wikitext.
Definition: Parser.php:603
getName()
Get the user name, or the IP of an anonymous user.
Definition: User.php:2006
static getCacheTTL($id)
Allow external reads of TTL array.
Definition: MagicWord.php:285
globals txt Globals are evil The original MediaWiki code relied on globals for processing context far too often MediaWiki development since then has been a story of slowly moving context out of global variables and into objects Storing processing context in object member variables allows those objects to be reused in a much more flexible way Consider the elegance of
Definition: globals.txt:10
const OT_PREPROCESS
Definition: Parser.php:113
maybeDoSubpageLink($target, &$text)
Handle link to subpage if necessary.
Definition: Parser.php:2350
$mFunctionSynonyms
Definition: Parser.php:128
If you want to remove the page from your watchlist later
setLinkID($id)
Definition: Parser.php:800
$mOutputType
Definition: Parser.php:181
Apache License January http
$mDefaultStripList
Definition: Parser.php:131
$mExtLinkBracketedRegex
Definition: Parser.php:146
if($line===false) $args
Definition: cdb.php:64
set to $title object and return false for a match for latest to be modified or replaced by the hook handler after cache objects are set use the ContentGetParserOutput hook instead for highlighting & $link
Definition: hooks.txt:2415
static getLocalInstance($ts=false)
Get a timestamp instance in the server local timezone ($wgLocaltimezone)
$wgMaxSigChars
Maximum number of Unicode characters in signature.
const COLON_STATE_TAG
Definition: Parser.php:98
static getDoubleUnderscoreArray()
Get a MagicWordArray of double-underscore entities.
Definition: MagicWord.php:298
static splitTrail($trail)
Split a link trail, return the "inside" portion and the remainder of the trail as a two-element array...
Definition: Linker.php:1770
The User object encapsulates all of the user-specific settings (user_id, name, rights, password, email address, options, last login time).
Definition: User.php:39
static decodeCharReferences($text)
Decode any character references, numeric or named entities, in the text and return a UTF-8 string...
Definition: Sanitizer.php:1397
null means default in associative array with keys and values unescaped Should be merged with default with a value of false meaning to suppress the attribute in associative array with keys and values unescaped noclasses just before the function returns a value If you return an< a > element with HTML attributes $attribs and contents $html will be returned If you return $ret will be returned and may include noclasses after processing after in associative array form externallinks including delete and has completed for all link tables default is conds Array Extra conditions for the No matching items in log is displayed if loglist is empty msgKey Array If you want a nice box with a set this to the key of the message First element is the message additional optional elements are parameters for the key that are processed with wfMessage() -> params() ->parseAsBlock()-offset Set to overwrite offset parameter in $wgRequest set to ''to unsetoffset-wrap String Wrap the message in html(usually something like"&lt
deferred txt A few of the database updates required by various functions here can be deferred until after the result page is displayed to the user For updating the view updating the linked to tables after a etc PHP does not yet have any way to tell the server to actually return and disconnect while still running these but it might have such a feature in the future We handle these by creating a deferred update object and putting those objects on a global then executing the whole list after the page is displayed We don t do anything smart like collating updates to the same table or such because the list is almost always going to have just one item on if that
pull multiple revisions may often pull multiple times from the same blob.
Definition: deferred.txt:11
wfTimestamp($outputtype=TS_UNIX, $ts=0)
Get a timestamp string in one of various formats.
$wgNoFollowNsExceptions
Namespaces in which $wgNoFollowLinks doesn't apply.
static factory($mode=false, IContextSource $context=null)
Get a new image gallery.
$wgLanguageCode
Site language code.
Custom PHP profiler for parser/DB type section names that xhprof/xdebug can't handle.
static edits()
Definition: SiteStats.php:132
Class for asserting that a callback happens when an dummy object leaves scope.
$wgExtraInterlanguageLinkPrefixes
List of additional interwiki prefixes that should be treated as interlanguage links (i...
wfCgiToArray($query)
This is the logical opposite of wfArrayToCgi(): it accepts a query string as its argument and returns...
static capturePath(Title $title, IContextSource $context)
Just like executePath() but will override global variables and execute the page in "inclusion" mode...
const NO_TEMPLATES
replaceInternalLinks($s)
Process [[ ]] wikilinks.
Definition: Parser.php:1970
$mVarCache
Definition: Parser.php:132
$wgStylePath
The URL path of the skins directory.
$mRevisionObject
Definition: Parser.php:183
static normalizeSectionNameWhitespace($section)
Normalizes whitespace in a section name, such as might be returned by Parser::stripSectionName(), for use in the id's that are used for section links.
Definition: Sanitizer.php:1282
internalParse($text, $isMain=true, $frame=false)
Helper function for parse() that transforms wiki markup into half-parsed HTML.
Definition: Parser.php:1185
Title $mTitle
Definition: Parser.php:180
fetchFileNoRegister($title, $options=array())
Helper function for fetchFileAndTitle.
Definition: Parser.php:4030
__destruct()
Reduce memory usage to reduce the impact of circular references.
Definition: Parser.php:241
wfEscapeWikiText($text)
Escapes the given text so that it may be output using addWikiText() without any linking, formatting, etc.
bool $mInParse
Recursive call protection.
Definition: Parser.php:209
Some quick notes on the file repository architecture Functionality is
Definition: README:3
isExternal()
Is this Title interwiki?
Definition: Title.php:785
namespace and then decline to actually register it file or subcat img or subcat RecentChangesLinked and Watchlist RecentChangesLinked and Watchlist e g Watchlist removed from all revisions and log entries to which it was applied This gives extensions a chance to take it off their books $tag
Definition: hooks.txt:870
static register($parser)
magic word use ParserLimitReportPrepare and ParserLimitReportFormat instead Called at the end of the default is to use $key to get the and $key value or $key value text $key value html to format the value $key
Definition: hooks.txt:2187
$mRevIdForTs
Definition: Parser.php:188
design txt This is a brief overview of the new design More thorough and up to date information is available on the documentation wiki at etc Handles the details of getting and saving to the user table of the and dealing with sessions and cookies OutputPage Encapsulates the entire HTML page that will be sent in response to any server request It is used by calling its functions to add in any and then calling but I prefer the flexibility This should also do the output encoding The system allocates a global one in $wgOut Title Represents the title of an and does all the work of translating among various forms such as plain database key
Definition: design.txt:25
static normalizeSubpageLink($contextTitle, $target, &$text)
Definition: Linker.php:1503
$mStripList
Definition: Parser.php:130
$mFunctionTagHooks
Definition: Parser.php:129
const OT_PLAIN
Definition: Defines.php:229
$wgNoFollowLinks
If true, external URL links in wiki text will be given the rel="nofollow" attribute as a hint to sear...
$mRevisionTimestamp
Definition: Parser.php:185
$mImageParams
Definition: Parser.php:133
getDBkey()
Get the main part with underscores.
Definition: Title.php:884
doAllQuotes($text)
Replace single quotes with HTML markup.
Definition: Parser.php:1514
The index of the header message $result[1]=The index of the body text message $result[2 through n]=Parameters passed to body text message.Please note the header message cannot receive/use parameters. 'ImportHandleLogItemXMLTag':When parsing a XML tag in a log item.$reader:XMLReader object $logInfo:Array of information Return false to stop further processing of the tag 'ImportHandlePageXMLTag':When parsing a XML tag in a page.$reader:XMLReader object $pageInfo:Array of information Return false to stop further processing of the tag 'ImportHandleRevisionXMLTag':When parsing a XML tag in a page revision.$reader:XMLReader object $pageInfo:Array of page information $revisionInfo:Array of revision information Return false to stop further processing of the tag 'ImportHandleToplevelXMLTag':When parsing a top level XML tag.$reader:XMLReader object Return false to stop further processing of the tag 'ImportHandleUploadXMLTag':When parsing a XML tag in a file upload.$reader:XMLReader object $revisionInfo:Array of information Return false to stop further processing of the tag 'InfoAction':When building information to display on the action=info page.$context:IContextSource object &$pageInfo:Array of information 'InitializeArticleMaybeRedirect':MediaWiki check to see if title is a redirect.$title:Title object for the current page $request:WebRequest $ignoreRedirect:boolean to skip redirect check $target:Title/string of redirect target $article:Article object 'InterwikiLoadPrefix':When resolving if a given prefix is an interwiki or not.Return true without providing an interwiki to continue interwiki search.$prefix:interwiki prefix we are looking for.&$iwData:output array describing the interwiki with keys iw_url, iw_local, iw_trans and optionally iw_api and iw_wikiid. 'InternalParseBeforeSanitize':during Parser's internalParse method just before the parser removes unwanted/dangerous HTML tags and after nowiki/noinclude/includeonly/onlyinclude and other processings.Ideal for syntax-extensions after template/parser function execution which respect nowiki and HTML-comments.&$parser:Parser object &$text:string containing partially parsed text &$stripState:Parser's internal StripState object 'InternalParseBeforeLinks':during Parser's internalParse method before links but after nowiki/noinclude/includeonly/onlyinclude and other processings.&$parser:Parser object &$text:string containing partially parsed text &$stripState:Parser's internal StripState object 'InvalidateEmailComplete':Called after a user's email has been invalidated successfully.$user:user(object) whose email is being invalidated 'IRCLineURL':When constructing the URL to use in an IRC notification.Callee may modify $url and $query, URL will be constructed as $url.$query &$url:URL to index.php &$query:Query string $rc:RecentChange object that triggered url generation 'IsFileCacheable':Override the result of Article::isFileCacheable()(if true) $article:article(object) being checked 'IsTrustedProxy':Override the result of wfIsTrustedProxy() $ip:IP being check $result:Change this value to override the result of wfIsTrustedProxy() 'IsUploadAllowedFromUrl':Override the result of UploadFromUrl::isAllowedUrl() $url:URL used to upload from &$allowed:Boolean indicating if uploading is allowed for given URL 'isValidEmailAddr':Override the result of Sanitizer::validateEmail(), for instance to return false if the domain name doesn't match your organization.$addr:The e-mail address entered by the user &$result:Set this and return false to override the internal checks 'isValidPassword':Override the result of User::isValidPassword() $password:The password entered by the user &$result:Set this and return false to override the internal checks $user:User the password is being validated for 'Language::getMessagesFileName':$code:The language code or the language we're looking for a messages file for &$file:The messages file path, you can override this to change the location. 'LanguageGetNamespaces':Provide custom ordering for namespaces or remove namespaces.Do not use this hook to add namespaces.Use CanonicalNamespaces for that.&$namespaces:Array of namespaces indexed by their numbers 'LanguageGetMagic':DEPRECATED, use $magicWords in a file listed in $wgExtensionMessagesFiles instead.Use this to define synonyms of magic words depending of the language $magicExtensions:associative array of magic words synonyms $lang:language code(string) 'LanguageGetSpecialPageAliases':DEPRECATED, use $specialPageAliases in a file listed in $wgExtensionMessagesFiles instead.Use to define aliases of special pages names depending of the language $specialPageAliases:associative array of magic words synonyms $lang:language code(string) 'LanguageGetTranslatedLanguageNames':Provide translated language names.&$names:array of language code=> language name $code language of the preferred translations 'LanguageLinks':Manipulate a page's language links.This is called in various places to allow extensions to define the effective language links for a page.$title:The page's Title.&$links:Associative array mapping language codes to prefixed links of the form"language:title".&$linkFlags:Associative array mapping prefixed links to arrays of flags.Currently unused, but planned to provide support for marking individual language links in the UI, e.g.for featured articles. 'LanguageSelector':Hook to change the language selector available on a page.$out:The output page.$cssClassName:CSS class name of the language selector. 'LinkBegin':Used when generating internal and interwiki links in Linker::link(), before processing starts.Return false to skip default processing and return $ret.See documentation for Linker::link() for details on the expected meanings of parameters.$skin:the Skin object $target:the Title that the link is pointing to &$html:the contents that the< a > tag should have(raw HTML) $result
Definition: hooks.txt:1710
static normalizeUrlComponent($component, $unsafe)
Definition: Parser.php:1888
isAnon()
Get whether the user is anonymous.
Definition: User.php:3168
if($limit) $timestamp
const VERSION
Update this version number when the ParserOutput format changes in an incompatible way...
Definition: Parser.php:73
wfGetDB($db, $groups=array(), $wiki=false)
Get a Database object.
namespace and then decline to actually register it file or subcat img or subcat RecentChangesLinked and Watchlist RecentChangesLinked and Watchlist e g Watchlist removed from all revisions and log entries to which it was applied This gives extensions a chance to take it off their books as the deletion has already been partly carried out by this point or something similar the user will be unable to create the tag set and then return false from the hook function Ensure you consume the ChangeTagAfterDelete hook to carry out custom deletion actions instead of letting the login form give the generic error message that the account does not exist For when the account has been renamed or deleted or an array to pass a message key and parameters but no entry for that model exists in $wgContentHandlers if desired whether it is OK to use $contentModel on $title Handler functions that modify $ok should generally return false to prevent further hooks from further modifying $ok called by AbstractContent::getParserOutput May be used to override the normal model specific rendering of page content as context as context $options
Definition: hooks.txt:952
$mInPre
Definition: Parser.php:159
const OT_WIKI
Definition: Defines.php:226
Preprocessor $mPreprocessor
Definition: Parser.php:149
getPreprocessor()
Get a preprocessor object.
Definition: Parser.php:861
This document is intended to provide useful advice for parties seeking to redistribute MediaWiki to end users It s targeted particularly at maintainers for Linux since it s been observed that distribution packages of MediaWiki often break We ve consistently had to recommend that users seeking support use official tarballs instead of their distribution s and this often solves whatever problem the user is having It would be nice if this could such and we might be restricted by PHP settings such as safe mode or open_basedir We cannot assume that the software even has read access anywhere useful Many shared hosts run all users web applications under the same so they can t rely on Unix and must forbid reads to even standard directories like tmp lest users read each others files We cannot assume that the user has the ability to install or run any programs not written as web accessible PHP scripts Since anything that works on cheap shared hosting will work if you have shell or root access MediaWiki s design is based around catering to the lowest common denominator Although we support higher end setups as the way many things work by default is tailored toward shared hosting These defaults are unconventional from the point of view of normal(non-web) applications--they might conflict with distributors'policies
static getInstance($ts=false)
Get a timestamp instance in GMT.
$limit
const NS_MEDIA
Definition: Defines.php:57
$res
Definition: database.txt:21
static linkKnown($target, $html=null, $customAttribs=array(), $query=array(), $options=array( 'known', 'noclasses'))
Identical to link(), except $options defaults to 'known'.
Definition: Linker.php:262
static singleton()
Get a RepoGroup instance.
Definition: RepoGroup.php:53
replaceVariables($text, $frame=false, $argsOnly=false)
Replace magic variables, templates, and template arguments with the appropriate text.
Definition: Parser.php:3282
const RECOVER_ORIG
wfMatchesDomainList($url, $domains)
Check whether a given URL has a domain that occurs in a given set of domains.
MediaWiki exception.
Definition: MWException.php:26
StripState $mStripState
Definition: Parser.php:157
$mDefaultSort
Definition: Parser.php:165
getUser()
Get a User object either from $this->mUser, if set, or from the ParserOptions object otherwise...
Definition: Parser.php:849
static run($event, array $args=array(), $deprecatedVersion=null)
Call hook functions defined in Hooks::register and $wgHooks.
Definition: Hooks.php:137
wfTimestampNow()
Convenience function; returns MediaWiki timestamp for the present time.
getStripList()
Get a list of strippable XML-like elements.
Definition: Parser.php:956
const EXT_IMAGE_REGEX
Definition: Parser.php:90
startParse(Title $title=null, ParserOptions $options, $outputType, $clearState=true)
Definition: Parser.php:4994
$params
const NS_CATEGORY
Definition: Defines.php:83
static makeHeadline($level, $attribs, $anchor, $html, $link, $legacyAnchor=false)
Create a headline for content.
Definition: Linker.php:1751
shown</td >< td > a href
and(b) You must cause any modified files to carry prominent notices stating that You changed the files
doTableStuff($text)
parse the wiki syntax used to render tables
Definition: Parser.php:983
wfDeprecated($function, $version=false, $component=false, $callerOffset=2)
Throws a warning that $function is deprecated.
$mImageParamsMagicArray
Definition: Parser.php:134
LinkHolderArray $mLinkHolders
Definition: Parser.php:161
$wgNoFollowDomainExceptions
If this is set to an array of domains, external links to these domain names (or any subdomains) will ...
static register($parser)
deferred txt A few of the database updates required by various functions here can be deferred until after the result page is displayed to the user For updating the view updating the linked to tables after a save
Definition: deferred.txt:4
$wgTranscludeCacheExpiry
Expiry time for transcluded templates cached in transcache database table.
Some information about database access in MediaWiki By Tim January Database layout For information about the MediaWiki database such as a description of the tables and their please see
Definition: database.txt:2
const DB_SLAVE
Definition: Defines.php:46
if(!function_exists( 'version_compare')||version_compare(PHP_VERSION, '5.3.3')< 0)
Definition: api.php:39
getTargetLanguage()
Get the target language for the content being parsed.
Definition: Parser.php:821
static extractTagsAndParams($elements, $text, &$matches, $uniq_prefix= '')
Replaces all occurrences of HTML-style comments and the given tags in the text with a random marker a...
Definition: Parser.php:889
while(($__line=Maintenance::readconsole())!==false) print n
Definition: eval.php:64
namespace and then decline to actually register it file or subcat img or subcat $title
Definition: hooks.txt:870
static hasSubpages($index)
Does the namespace allow subpages?
formatHeadings($text, $origText, $isMain=true)
This function accomplishes several tasks: 1) Auto-number headings if that option is enabled 2) Add an...
Definition: Parser.php:4348
getConverterLanguage()
Get the language object for language conversion.
Definition: Parser.php:839
static tocUnindent($level)
Finish one or more sublevels on the Table of Contents.
Definition: Linker.php:1655
if(!$wgRequest->checkUrlExtension()) if(!$wgEnableAPI) $wgTitle
Definition: api.php:64
static tocLine($anchor, $tocline, $tocnumber, $level, $sectionIndex=false)
parameter level defines if we are on an indentation level
Definition: Linker.php:1669
getNamespace()
Get the namespace index, i.e.
Definition: Title.php:907
design txt This is a brief overview of the new design More thorough and up to date information is available on the documentation wiki at etc Handles the details of getting and saving to the user table of the and dealing with sessions and cookies OutputPage Encapsulates the entire HTML page that will be sent in response to any server request It is used by calling its functions to add text
Definition: design.txt:12
getExternalLinkAttribs($url=false)
Get an associative array of additional HTML attributes appropriate for a particular external link...
Definition: Parser.php:1821
__construct($conf=array())
Definition: Parser.php:216
$mInputSize
Definition: Parser.php:189
equals(Title $title)
Compare with another title.
Definition: Title.php:4161
magicword txt Magic Words are some phrases used in the wikitext They are used for two things
Definition: magicword.txt:4
const HALF_PARSED_VERSION
Update this version number when the output of serialiseHalfParsedText() changes in an incompatible wa...
Definition: Parser.php:79
const NS_FILE
Definition: Defines.php:75
firstCallInit()
Do various kinds of initialisation on the first call of the parser.
Definition: Parser.php:276
$wgParserConf
Parser configuration.
Handles a simple LRU key/value map with a maximum number of entries.
Definition: MapCacheLRU.php:33
$wgMaxTocLevel
Maximum indent level of toc.
const PTD_FOR_INCLUSION
Definition: Parser.php:107
static escapeId($id, $options=array())
Given a value, escape it so that it can be used in an id attribute and return it. ...
Definition: Sanitizer.php:1102
null means default in associative array with keys and values unescaped Should be merged with default with a value of false meaning to suppress the attribute in associative array with keys and values unescaped broken
Definition: hooks.txt:1712
armorLinks($text)
Insert a NOPARSE hacky thing into any inline links in a chunk that's going to go through further pars...
Definition: Parser.php:2328
presenting them properly to the user as errors is done by the caller return true use this to change the list i e etc $rev
Definition: hooks.txt:1511
static configuration should be added through ResourceLoaderGetConfigVars instead can be used to get the real title after the basic globals have been set up
Definition: hooks.txt:1888
This document is intended to provide useful advice for parties seeking to redistribute MediaWiki to end users It s targeted particularly at maintainers for Linux since it s been observed that distribution packages of MediaWiki often break We ve consistently had to recommend that users seeking support use official tarballs instead of their distribution s and this often solves whatever problem the user is having It would be nice if this could such as
Definition: distributors.txt:9
setOutputType($ot)
Set the output type.
Definition: Parser.php:741
$mTagHooks
Definition: Parser.php:125
Class for handling an array of magic words.
Definition: MagicWord.php:699
const NS_MEDIAWIKI
Definition: Defines.php:77
static & get($id)
Factory: creates an object representing an ID.
Definition: MagicWord.php:248
please add to it if you re going to add events to the MediaWiki code where normally authentication against an external auth plugin would be creating a local account $user
Definition: hooks.txt:240
the value to return A Title object or null whereas SearchGetNearMatch runs after $term
Definition: hooks.txt:2397
#define the
maybeMakeExternalImage($url)
make an image if it's allowed, either through the global option, through the exception, or through the on-wiki whitelist
Definition: Parser.php:1911
getOption($oname, $defaultOverride=null, $ignoreHidden=false)
Get the user's current setting for a given option.
Definition: User.php:2576
areSubpagesAllowed()
Return true if subpage links should be expanded on this page.
Definition: Parser.php:2337
const MARKER_SUFFIX
Definition: Parser.php:118
wfDebugLog($logGroup, $text, $dest= 'all', array $context=array())
Send a line to a supplementary debug log file, if configured, or main debug log if not...
const OT_HTML
Definition: Defines.php:225
Prior to maintenance scripts were a hodgepodge of code that had no cohesion or formal method of action Beginning in
Definition: maintenance.txt:1
static makeImageLink(Parser $parser, Title $title, $file, $frameParams=array(), $handlerParams=array(), $time=false, $query="", $widthOption=null)
Given parameters derived from [[Image:Foo|options...]], generate the HTML that that syntax inserts in...
Definition: Linker.php:537
namespace and then decline to actually register it file or subcat img or subcat RecentChangesLinked and Watchlist RecentChangesLinked and Watchlist e g Watchlist removed from all revisions and log entries to which it was applied This gives extensions a chance to take it off their books as the deletion has already been partly carried out by this point or something similar the user will be unable to create the tag set and then return false from the hook function Ensure you consume the ChangeTagAfterDelete hook to carry out custom deletion actions instead of letting the login form give the generic error message that the account does not exist For when the account has been renamed or deleted or an array to pass a message key and parameters but no entry for that model exists in $wgContentHandlers if desired whether it is OK to use $contentModel on $title Handler functions that modify $ok should generally return false to prevent further hooks from further modifying $ok called by AbstractContent::getParserOutput May be used to override the normal model specific rendering of page content as context as context the output can only depend on parameters provided to this hook not on global state indicating whether full HTML should be generated If generation of HTML may be but other information should still be present in the ParserOutput object & $output
Definition: hooks.txt:952
static getSubstIDs()
Get an array of parser substitution modifier IDs.
Definition: MagicWord.php:275
static images()
Definition: SiteStats.php:172
isSpecialPage()
Returns true if this is a special page.
Definition: Title.php:1025
$mTransparentTagHooks
Definition: Parser.php:126
$mExpensiveFunctionCount
Definition: Parser.php:167
$mUrlProtocols
Definition: Parser.php:146
const TS_MW
MediaWiki concatenated string timestamp (YYYYMMDDHHMMSS)
static getVersion($flags= '')
Return a string of the MediaWiki version with SVN revision if available.
static newFromTitle($title, $id=0, $flags=0)
Load either the current, or a specified, revision that's attached to a given title.
Definition: Revision.php:104
$mConf
Definition: Parser.php:146
static newFromId($id, $flags=0)
Load a page revision from a given revision ID number.
Definition: Revision.php:86
wfUrlProtocols($includeProtocolRelative=true)
Returns a regular expression of url protocols.
__clone()
Allow extensions to clean up when the parser is cloned.
Definition: Parser.php:253
static getExternalLinkRel($url=false, $title=null)
Get the rel attribute for a particular external link.
Definition: Parser.php:1800
static & singleton()
Get an instance of this class.
Definition: LinkCache.php:49
string $mUniqPrefix
Definition: Parser.php:192
wfSetVar(&$dest, $source, $force=false)
Sets dest to source and returns the original value of dest If source is NULL, it just returns the val...
array $mLangLinkLanguages
Array with the language name of each language link (i.e.
Definition: Parser.php:198
const OT_MSG
Definition: Parser.php:114
replaceTransparentTags($text)
Replace transparent tags in $text with the values given by the callbacks.
Definition: Parser.php:5746
This document describes the state of Postgres support in and is fairly well maintained The main code is very well while extensions are very hit and miss it is probably the most supported database after MySQL Much of the work in making MediaWiki database agnostic came about through the work of creating Postgres as and are nearing end of but without copying over all the usage comments General notes on the but these can almost always be programmed around *Although Postgres has a true BOOLEAN type
Definition: postgres.txt:22
doDoubleUnderscore($text)
Strip double-underscore items like NOGALLERY and NOTOC Fills $this->mDoubleUnderscores, returns the modified text.
Definition: Parser.php:4272
$mFunctionHooks
Definition: