MediaWiki  master
Parser.php
Go to the documentation of this file.
1 <?php
67 class Parser {
73  const VERSION = '1.6.4';
74 
80 
81  # Flags for Parser::setFunctionHook
82  const SFH_NO_HASH = 1;
83  const SFH_OBJECT_ARGS = 2;
84 
85  # Constants needed for external link processing
86  # Everything except bracket, space, or control characters
87  # \p{Zs} is unicode 'separator, space' category. It covers the space 0x20
88  # as well as U+3000 is IDEOGRAPHIC SPACE for bug 19052
89  const EXT_LINK_URL_CLASS = '[^][<>"\\x00-\\x20\\x7F\p{Zs}]';
90  const EXT_IMAGE_REGEX = '/^(http:\/\/|https:\/\/)([^][<>"\\x00-\\x20\\x7F\p{Zs}]+)
91  \\/([A-Za-z0-9_.,~%\\-+&;#*?!=()@\\x80-\\xFF]+)\\.((?i)gif|png|jpg|jpeg)$/Sxu';
92 
93  # Regular expression for a non-newline space
94  const SPACE_NOT_NL = '(?:\t|&nbsp;|&\#0*160;|&\#[Xx]0*[Aa]0;|\p{Zs})';
95 
96  # State constants for the definition list colon extraction
97  const COLON_STATE_TEXT = 0;
98  const COLON_STATE_TAG = 1;
105 
106  # Flags for preprocessToDom
107  const PTD_FOR_INCLUSION = 1;
108 
109  # Allowed values for $this->mOutputType
110  # Parameter to startExternalParse().
111  const OT_HTML = 1; # like parse()
112  const OT_WIKI = 2; # like preSaveTransform()
114  const OT_MSG = 3;
115  const OT_PLAIN = 4; # like extractSections() - portions of the original are returned unchanged.
116 
117  # Marker Suffix needs to be accessible staticly.
118  const MARKER_SUFFIX = "-QINU\x7f";
119 
120  # Markers used for wrapping the table of contents
121  const TOC_START = '<mw:toc>';
122  const TOC_END = '</mw:toc>';
123 
124  # Persistent:
125  public $mTagHooks = array();
127  public $mFunctionHooks = array();
128  public $mFunctionSynonyms = array( 0 => array(), 1 => array() );
130  public $mStripList = array();
132  public $mVarCache = array();
133  public $mImageParams = array();
135  public $mMarkerIndex = 0;
136  public $mFirstCall = true;
137 
138  # Initialised by initialiseVariables()
139 
143  public $mVariables;
148  public $mSubstWords;
149  # Initialised in constructor
152  # Cleared with clearState():
153 
156  public $mOutput;
157  public $mAutonumber, $mDTopen;
162  public $mStripState;
169 
170  public $mLinkID;
172  public $mDefaultSort;
174  public $mExpensiveFunctionCount; # number of expensive parser function calls
176 
180  public $mUser; # User object; only used when doing pre-save transform
182  # Temporary
183  # These are variables reset at least once per parse regardless of $clearState
188  public $mOptions;
193  public $mTitle; # Title context, used for self-link rendering and similar things
194  public $mOutputType; # Output type, one of the OT_xxx constants
195  public $ot; # Shortcut alias, see setOutputType()
196  public $mRevisionObject; # The revision object of the specified revision ID
197  public $mRevisionId; # ID to display in {{REVISIONID}} tags
198  public $mRevisionTimestamp; # The timestamp of the specified revision ID
199  public $mRevisionUser; # User to display in {{REVISIONUSER}} tag
200  public $mRevisionSize; # Size to display in {{REVISIONSIZE}} variable
201  public $mRevIdForTs; # The revision ID which was used to fetch the timestamp
202  public $mInputSize = false; # For {{PAGESIZE}} on current page.
203 
207  public $mUniqPrefix;
214  public $mLangLinkLanguages;
215 
222  public $currentRevisionCache;
223 
228  public $mInParse = false;
229 
231  protected $mProfiler;
232 
236  public function __construct( $conf = array() ) {
237  $this->mConf = $conf;
238  $this->mUrlProtocols = wfUrlProtocols();
239  $this->mExtLinkBracketedRegex = '/\[(((?i)' . $this->mUrlProtocols . ')' .
240  self::EXT_LINK_URL_CLASS . '+)\p{Zs}*([^\]\\x00-\\x08\\x0a-\\x1F]*?)\]/Su';
241  if ( isset( $conf['preprocessorClass'] ) ) {
242  $this->mPreprocessorClass = $conf['preprocessorClass'];
243  } elseif ( defined( 'HPHP_VERSION' ) ) {
244  # Preprocessor_Hash is much faster than Preprocessor_DOM under HipHop
245  $this->mPreprocessorClass = 'Preprocessor_Hash';
246  } elseif ( extension_loaded( 'domxml' ) ) {
247  # PECL extension that conflicts with the core DOM extension (bug 13770)
248  wfDebug( "Warning: you have the obsolete domxml extension for PHP. Please remove it!\n" );
249  $this->mPreprocessorClass = 'Preprocessor_Hash';
250  } elseif ( extension_loaded( 'dom' ) ) {
251  $this->mPreprocessorClass = 'Preprocessor_DOM';
252  } else {
253  $this->mPreprocessorClass = 'Preprocessor_Hash';
254  }
255  wfDebug( __CLASS__ . ": using preprocessor: {$this->mPreprocessorClass}\n" );
256  }
257 
261  public function __destruct() {
262  if ( isset( $this->mLinkHolders ) ) {
263  unset( $this->mLinkHolders );
264  }
265  foreach ( $this as $name => $value ) {
266  unset( $this->$name );
267  }
268  }
269 
273  public function __clone() {
274  $this->mInParse = false;
275 
276  // Bug 56226: When you create a reference "to" an object field, that
277  // makes the object field itself be a reference too (until the other
278  // reference goes out of scope). When cloning, any field that's a
279  // reference is copied as a reference in the new object. Both of these
280  // are defined PHP5 behaviors, as inconvenient as it is for us when old
281  // hooks from PHP4 days are passing fields by reference.
282  foreach ( array( 'mStripState', 'mVarCache' ) as $k ) {
283  // Make a non-reference copy of the field, then rebind the field to
284  // reference the new copy.
285  $tmp = $this->$k;
286  $this->$k =& $tmp;
287  unset( $tmp );
288  }
289 
290  Hooks::run( 'ParserCloned', array( $this ) );
291  }
292 
296  public function firstCallInit() {
297  if ( !$this->mFirstCall ) {
298  return;
299  }
300  $this->mFirstCall = false;
301 
303  CoreTagHooks::register( $this );
304  $this->initialiseVariables();
305 
306  Hooks::run( 'ParserFirstCallInit', array( &$this ) );
307  }
308 
314  public function clearState() {
315  if ( $this->mFirstCall ) {
316  $this->firstCallInit();
317  }
318  $this->mOutput = new ParserOutput;
319  $this->mOptions->registerWatcher( array( $this->mOutput, 'recordOption' ) );
320  $this->mAutonumber = 0;
321  $this->mLastSection = '';
322  $this->mDTopen = false;
323  $this->mIncludeCount = array();
324  $this->mArgStack = false;
325  $this->mInPre = false;
326  $this->mLinkHolders = new LinkHolderArray( $this );
327  $this->mLinkID = 0;
328  $this->mRevisionObject = $this->mRevisionTimestamp =
329  $this->mRevisionId = $this->mRevisionUser = $this->mRevisionSize = null;
330  $this->mVarCache = array();
331  $this->mUser = null;
332  $this->mLangLinkLanguages = array();
333  $this->currentRevisionCache = null;
334 
345  $this->mUniqPrefix = "\x7fUNIQ" . self::getRandomString();
346  $this->mStripState = new StripState( $this->mUniqPrefix );
347 
348  # Clear these on every parse, bug 4549
349  $this->mTplRedirCache = $this->mTplDomCache = array();
350 
351  $this->mShowToc = true;
352  $this->mForceTocPosition = false;
353  $this->mIncludeSizes = array(
354  'post-expand' => 0,
355  'arg' => 0,
356  );
357  $this->mPPNodeCount = 0;
358  $this->mGeneratedPPNodeCount = 0;
359  $this->mHighestExpansionDepth = 0;
360  $this->mDefaultSort = false;
361  $this->mHeadings = array();
362  $this->mDoubleUnderscores = array();
363  $this->mExpensiveFunctionCount = 0;
365  # Fix cloning
366  if ( isset( $this->mPreprocessor ) && $this->mPreprocessor->parser !== $this ) {
367  $this->mPreprocessor = null;
368  }
369 
370  $this->mProfiler = new SectionProfiler();
371 
372  Hooks::run( 'ParserClearState', array( &$this ) );
373  }
374 
387  public function parse( $text, Title $title, ParserOptions $options,
388  $linestart = true, $clearState = true, $revid = null
389  ) {
395  global $wgShowHostnames;
396  $fname = __METHOD__ . '-' . wfGetCaller();
397 
398  if ( $clearState ) {
399  $magicScopeVariable = $this->lock();
400  }
401 
402  $this->startParse( $title, $options, self::OT_HTML, $clearState );
403 
404  $this->currentRevisionCache = null;
405  $this->mInputSize = strlen( $text );
406  if ( $this->mOptions->getEnableLimitReport() ) {
407  $this->mOutput->resetParseStartTime();
408  }
409 
410  # Remove the strip marker tag prefix from the input, if present.
411  if ( $clearState ) {
412  $text = str_replace( $this->mUniqPrefix, '', $text );
413  }
414 
415  $oldRevisionId = $this->mRevisionId;
416  $oldRevisionObject = $this->mRevisionObject;
417  $oldRevisionTimestamp = $this->mRevisionTimestamp;
418  $oldRevisionUser = $this->mRevisionUser;
419  $oldRevisionSize = $this->mRevisionSize;
420  if ( $revid !== null ) {
421  $this->mRevisionId = $revid;
422  $this->mRevisionObject = null;
423  $this->mRevisionTimestamp = null;
424  $this->mRevisionUser = null;
425  $this->mRevisionSize = null;
426  }
427 
428  Hooks::run( 'ParserBeforeStrip', array( &$this, &$text, &$this->mStripState ) );
429  # No more strip!
430  Hooks::run( 'ParserAfterStrip', array( &$this, &$text, &$this->mStripState ) );
431  $text = $this->internalParse( $text );
432  Hooks::run( 'ParserAfterParse', array( &$this, &$text, &$this->mStripState ) );
433 
434  $text = $this->internalParseHalfParsed( $text, true, $linestart );
435 
443  if ( !( $options->getDisableTitleConversion()
444  || isset( $this->mDoubleUnderscores['nocontentconvert'] )
445  || isset( $this->mDoubleUnderscores['notitleconvert'] )
446  || $this->mOutput->getDisplayTitle() !== false )
447  ) {
448  $convruletitle = $this->getConverterLanguage()->getConvRuleTitle();
449  if ( $convruletitle ) {
450  $this->mOutput->setTitleText( $convruletitle );
451  } else {
452  $titleText = $this->getConverterLanguage()->convertTitle( $title );
453  $this->mOutput->setTitleText( $titleText );
454  }
455  }
456 
457  if ( $this->mExpensiveFunctionCount > $this->mOptions->getExpensiveParserFunctionLimit() ) {
458  $this->limitationWarn( 'expensive-parserfunction',
459  $this->mExpensiveFunctionCount,
460  $this->mOptions->getExpensiveParserFunctionLimit()
461  );
462  }
463 
464  # Information on include size limits, for the benefit of users who try to skirt them
465  if ( $this->mOptions->getEnableLimitReport() ) {
466  $max = $this->mOptions->getMaxIncludeSize();
467 
468  $cpuTime = $this->mOutput->getTimeSinceStart( 'cpu' );
469  if ( $cpuTime !== null ) {
470  $this->mOutput->setLimitReportData( 'limitreport-cputime',
471  sprintf( "%.3f", $cpuTime )
472  );
473  }
474 
475  $wallTime = $this->mOutput->getTimeSinceStart( 'wall' );
476  $this->mOutput->setLimitReportData( 'limitreport-walltime',
477  sprintf( "%.3f", $wallTime )
478  );
479 
480  $this->mOutput->setLimitReportData( 'limitreport-ppvisitednodes',
481  array( $this->mPPNodeCount, $this->mOptions->getMaxPPNodeCount() )
482  );
483  $this->mOutput->setLimitReportData( 'limitreport-ppgeneratednodes',
484  array( $this->mGeneratedPPNodeCount, $this->mOptions->getMaxGeneratedPPNodeCount() )
485  );
486  $this->mOutput->setLimitReportData( 'limitreport-postexpandincludesize',
487  array( $this->mIncludeSizes['post-expand'], $max )
488  );
489  $this->mOutput->setLimitReportData( 'limitreport-templateargumentsize',
490  array( $this->mIncludeSizes['arg'], $max )
491  );
492  $this->mOutput->setLimitReportData( 'limitreport-expansiondepth',
493  array( $this->mHighestExpansionDepth, $this->mOptions->getMaxPPExpandDepth() )
494  );
495  $this->mOutput->setLimitReportData( 'limitreport-expensivefunctioncount',
496  array( $this->mExpensiveFunctionCount, $this->mOptions->getExpensiveParserFunctionLimit() )
497  );
498  Hooks::run( 'ParserLimitReportPrepare', array( $this, $this->mOutput ) );
499 
500  $limitReport = "NewPP limit report\n";
501  if ( $wgShowHostnames ) {
502  $limitReport .= 'Parsed by ' . wfHostname() . "\n";
503  }
504  foreach ( $this->mOutput->getLimitReportData() as $key => $value ) {
505  if ( Hooks::run( 'ParserLimitReportFormat',
506  array( $key, &$value, &$limitReport, false, false )
507  ) ) {
508  $keyMsg = wfMessage( $key )->inLanguage( 'en' )->useDatabase( false );
509  $valueMsg = wfMessage( array( "$key-value-text", "$key-value" ) )
510  ->inLanguage( 'en' )->useDatabase( false );
511  if ( !$valueMsg->exists() ) {
512  $valueMsg = new RawMessage( '$1' );
513  }
514  if ( !$keyMsg->isDisabled() && !$valueMsg->isDisabled() ) {
515  $valueMsg->params( $value );
516  $limitReport .= "{$keyMsg->text()}: {$valueMsg->text()}\n";
517  }
518  }
519  }
520  // Since we're not really outputting HTML, decode the entities and
521  // then re-encode the things that need hiding inside HTML comments.
522  $limitReport = htmlspecialchars_decode( $limitReport );
523  Hooks::run( 'ParserLimitReport', array( $this, &$limitReport ) );
524 
525  // Sanitize for comment. Note '‐' in the replacement is U+2010,
526  // which looks much like the problematic '-'.
527  $limitReport = str_replace( array( '-', '&' ), array( '‐', '&amp;' ), $limitReport );
528  $text .= "\n<!-- \n$limitReport-->\n";
529 
530  // Add on template profiling data
531  $dataByFunc = $this->mProfiler->getFunctionStats();
532  uasort( $dataByFunc, function ( $a, $b ) {
533  return $a['real'] < $b['real']; // descending order
534  } );
535  $profileReport = "Transclusion expansion time report (%,ms,calls,template)\n";
536  foreach ( array_slice( $dataByFunc, 0, 10 ) as $item ) {
537  $profileReport .= sprintf( "%6.2f%% %8.3f %6d - %s\n",
538  $item['%real'], $item['real'], $item['calls'],
539  htmlspecialchars( $item['name'] ) );
540  }
541  $text .= "\n<!-- \n$profileReport-->\n";
542 
543  if ( $this->mGeneratedPPNodeCount > $this->mOptions->getMaxGeneratedPPNodeCount() / 10 ) {
544  wfDebugLog( 'generated-pp-node-count', $this->mGeneratedPPNodeCount . ' ' .
545  $this->mTitle->getPrefixedDBkey() );
546  }
547  }
548  $this->mOutput->setText( $text );
549 
550  $this->mRevisionId = $oldRevisionId;
551  $this->mRevisionObject = $oldRevisionObject;
552  $this->mRevisionTimestamp = $oldRevisionTimestamp;
553  $this->mRevisionUser = $oldRevisionUser;
554  $this->mRevisionSize = $oldRevisionSize;
555  $this->mInputSize = false;
556  $this->currentRevisionCache = null;
557 
558  return $this->mOutput;
559  }
583  public function recursiveTagParse( $text, $frame = false ) {
584  Hooks::run( 'ParserBeforeStrip', array( &$this, &$text, &$this->mStripState ) );
585  Hooks::run( 'ParserAfterStrip', array( &$this, &$text, &$this->mStripState ) );
586  $text = $this->internalParse( $text, false, $frame );
587  return $text;
588  }
589 
607  public function recursiveTagParseFully( $text, $frame = false ) {
608  $text = $this->recursiveTagParse( $text, $frame );
609  $text = $this->internalParseHalfParsed( $text, false );
610  return $text;
611  }
612 
624  public function preprocess( $text, Title $title = null,
625  ParserOptions $options, $revid = null, $frame = false
626  ) {
627  $magicScopeVariable = $this->lock();
628  $this->startParse( $title, $options, self::OT_PREPROCESS, true );
629  if ( $revid !== null ) {
630  $this->mRevisionId = $revid;
631  }
632  Hooks::run( 'ParserBeforeStrip', array( &$this, &$text, &$this->mStripState ) );
633  Hooks::run( 'ParserAfterStrip', array( &$this, &$text, &$this->mStripState ) );
634  $text = $this->replaceVariables( $text, $frame );
635  $text = $this->mStripState->unstripBoth( $text );
636  return $text;
637  }
638 
648  public function recursivePreprocess( $text, $frame = false ) {
649  $text = $this->replaceVariables( $text, $frame );
650  $text = $this->mStripState->unstripBoth( $text );
651  return $text;
652  }
653 
667  public function getPreloadText( $text, Title $title, ParserOptions $options, $params = array() ) {
668  $msg = new RawMessage( $text );
669  $text = $msg->params( $params )->plain();
670 
671  # Parser (re)initialisation
672  $magicScopeVariable = $this->lock();
673  $this->startParse( $title, $options, self::OT_PLAIN, true );
676  $dom = $this->preprocessToDom( $text, self::PTD_FOR_INCLUSION );
677  $text = $this->getPreprocessor()->newFrame()->expand( $dom, $flags );
678  $text = $this->mStripState->unstripBoth( $text );
679  return $text;
680  }
681 
687  public static function getRandomString() {
688  return wfRandomString( 16 );
689  }
690 
697  public function setUser( $user ) {
698  $this->mUser = $user;
699  }
700 
706  public function uniqPrefix() {
707  if ( !isset( $this->mUniqPrefix ) ) {
708  # @todo FIXME: This is probably *horribly wrong*
709  # LanguageConverter seems to want $wgParser's uniqPrefix, however
710  # if this is called for a parser cache hit, the parser may not
711  # have ever been initialized in the first place.
712  # Not really sure what the heck is supposed to be going on here.
713  return '';
714  # throw new MWException( "Accessing uninitialized mUniqPrefix" );
715  }
716  return $this->mUniqPrefix;
717  }
718 
724  public function setTitle( $t ) {
725  if ( !$t ) {
726  $t = Title::newFromText( 'NO TITLE' );
727  }
728 
729  if ( $t->hasFragment() ) {
730  # Strip the fragment to avoid various odd effects
731  $this->mTitle = clone $t;
732  $this->mTitle->setFragment( '' );
733  } else {
734  $this->mTitle = $t;
735  }
736  }
737 
743  public function getTitle() {
744  return $this->mTitle;
745  }
746 
753  public function Title( $x = null ) {
754  return wfSetVar( $this->mTitle, $x );
755  }
762  public function setOutputType( $ot ) {
763  $this->mOutputType = $ot;
764  # Shortcut alias
765  $this->ot = array(
766  'html' => $ot == self::OT_HTML,
767  'wiki' => $ot == self::OT_WIKI,
768  'pre' => $ot == self::OT_PREPROCESS,
769  'plain' => $ot == self::OT_PLAIN,
770  );
771  }
772 
779  public function OutputType( $x = null ) {
780  return wfSetVar( $this->mOutputType, $x );
781  }
782 
788  public function getOutput() {
789  return $this->mOutput;
790  }
797  public function getOptions() {
799  }
800 
807  public function Options( $x = null ) {
808  return wfSetVar( $this->mOptions, $x );
809  }
810 
814  public function nextLinkID() {
815  return $this->mLinkID++;
816  }
817 
821  public function setLinkID( $id ) {
822  $this->mLinkID = $id;
823  }
824 
829  public function getFunctionLang() {
830  return $this->getTargetLanguage();
831  }
832 
842  public function getTargetLanguage() {
843  $target = $this->mOptions->getTargetLanguage();
844 
845  if ( $target !== null ) {
846  return $target;
847  } elseif ( $this->mOptions->getInterfaceMessage() ) {
848  return $this->mOptions->getUserLangObj();
849  } elseif ( is_null( $this->mTitle ) ) {
850  throw new MWException( __METHOD__ . ': $this->mTitle is null' );
851  }
852 
853  return $this->mTitle->getPageLanguage();
854  }
855 
860  public function getConverterLanguage() {
861  return $this->getTargetLanguage();
862  }
863 
870  public function getUser() {
871  if ( !is_null( $this->mUser ) ) {
872  return $this->mUser;
873  }
874  return $this->mOptions->getUser();
875  }
876 
882  public function getPreprocessor() {
883  if ( !isset( $this->mPreprocessor ) ) {
884  $class = $this->mPreprocessorClass;
885  $this->mPreprocessor = new $class( $this );
886  }
888  }
889 
910  public static function extractTagsAndParams( $elements, $text, &$matches, $uniq_prefix = '' ) {
911  static $n = 1;
912  $stripped = '';
913  $matches = array();
914 
915  $taglist = implode( '|', $elements );
916  $start = "/<($taglist)(\\s+[^>]*?|\\s*?)(\/?" . ">)|<(!--)/i";
917 
918  while ( $text != '' ) {
919  $p = preg_split( $start, $text, 2, PREG_SPLIT_DELIM_CAPTURE );
920  $stripped .= $p[0];
921  if ( count( $p ) < 5 ) {
922  break;
923  }
924  if ( count( $p ) > 5 ) {
925  # comment
926  $element = $p[4];
927  $attributes = '';
928  $close = '';
929  $inside = $p[5];
930  } else {
931  # tag
932  $element = $p[1];
933  $attributes = $p[2];
934  $close = $p[3];
935  $inside = $p[4];
936  }
937 
938  $marker = "$uniq_prefix-$element-" . sprintf( '%08X', $n++ ) . self::MARKER_SUFFIX;
939  $stripped .= $marker;
940 
941  if ( $close === '/>' ) {
942  # Empty element tag, <tag />
943  $content = null;
944  $text = $inside;
945  $tail = null;
946  } else {
947  if ( $element === '!--' ) {
948  $end = '/(-->)/';
949  } else {
950  $end = "/(<\\/$element\\s*>)/i";
951  }
952  $q = preg_split( $end, $inside, 2, PREG_SPLIT_DELIM_CAPTURE );
953  $content = $q[0];
954  if ( count( $q ) < 3 ) {
955  # No end tag -- let it run out to the end of the text.
956  $tail = '';
957  $text = '';
958  } else {
959  $tail = $q[1];
960  $text = $q[2];
961  }
962  }
963 
964  $matches[$marker] = array( $element,
965  $content,
966  Sanitizer::decodeTagAttributes( $attributes ),
967  "<$element$attributes$close$content$tail" );
968  }
969  return $stripped;
970  }
971 
977  public function getStripList() {
978  return $this->mStripList;
979  }
980 
990  public function insertStripItem( $text ) {
991  $rnd = "{$this->mUniqPrefix}-item-{$this->mMarkerIndex}-" . self::MARKER_SUFFIX;
992  $this->mMarkerIndex++;
993  $this->mStripState->addGeneral( $rnd, $text );
994  return $rnd;
995  }
996 
1004  public function doTableStuff( $text ) {
1005 
1006  $lines = StringUtils::explode( "\n", $text );
1007  $out = '';
1008  $td_history = array(); # Is currently a td tag open?
1009  $last_tag_history = array(); # Save history of last lag activated (td, th or caption)
1010  $tr_history = array(); # Is currently a tr tag open?
1011  $tr_attributes = array(); # history of tr attributes
1012  $has_opened_tr = array(); # Did this table open a <tr> element?
1013  $indent_level = 0; # indent level of the table
1014 
1015  foreach ( $lines as $outLine ) {
1016  $line = trim( $outLine );
1017 
1018  if ( $line === '' ) { # empty line, go to next line
1019  $out .= $outLine . "\n";
1020  continue;
1021  }
1022 
1023  $first_character = $line[0];
1024  $matches = array();
1025 
1026  if ( preg_match( '/^(:*)\{\|(.*)$/', $line, $matches ) ) {
1027  # First check if we are starting a new table
1028  $indent_level = strlen( $matches[1] );
1029 
1030  $attributes = $this->mStripState->unstripBoth( $matches[2] );
1031  $attributes = Sanitizer::fixTagAttributes( $attributes, 'table' );
1032 
1033  $outLine = str_repeat( '<dl><dd>', $indent_level ) . "<table{$attributes}>";
1034  array_push( $td_history, false );
1035  array_push( $last_tag_history, '' );
1036  array_push( $tr_history, false );
1037  array_push( $tr_attributes, '' );
1038  array_push( $has_opened_tr, false );
1039  } elseif ( count( $td_history ) == 0 ) {
1040  # Don't do any of the following
1041  $out .= $outLine . "\n";
1042  continue;
1043  } elseif ( substr( $line, 0, 2 ) === '|}' ) {
1044  # We are ending a table
1045  $line = '</table>' . substr( $line, 2 );
1046  $last_tag = array_pop( $last_tag_history );
1047 
1048  if ( !array_pop( $has_opened_tr ) ) {
1049  $line = "<tr><td></td></tr>{$line}";
1050  }
1051 
1052  if ( array_pop( $tr_history ) ) {
1053  $line = "</tr>{$line}";
1054  }
1055 
1056  if ( array_pop( $td_history ) ) {
1057  $line = "</{$last_tag}>{$line}";
1058  }
1059  array_pop( $tr_attributes );
1060  $outLine = $line . str_repeat( '</dd></dl>', $indent_level );
1061  } elseif ( substr( $line, 0, 2 ) === '|-' ) {
1062  # Now we have a table row
1063  $line = preg_replace( '#^\|-+#', '', $line );
1064 
1065  # Whats after the tag is now only attributes
1066  $attributes = $this->mStripState->unstripBoth( $line );
1067  $attributes = Sanitizer::fixTagAttributes( $attributes, 'tr' );
1068  array_pop( $tr_attributes );
1069  array_push( $tr_attributes, $attributes );
1070 
1071  $line = '';
1072  $last_tag = array_pop( $last_tag_history );
1073  array_pop( $has_opened_tr );
1074  array_push( $has_opened_tr, true );
1075 
1076  if ( array_pop( $tr_history ) ) {
1077  $line = '</tr>';
1078  }
1079 
1080  if ( array_pop( $td_history ) ) {
1081  $line = "</{$last_tag}>{$line}";
1082  }
1083 
1084  $outLine = $line;
1085  array_push( $tr_history, false );
1086  array_push( $td_history, false );
1087  array_push( $last_tag_history, '' );
1088  } elseif ( $first_character === '|'
1089  || $first_character === '!'
1090  || substr( $line, 0, 2 ) === '|+'
1091  ) {
1092  # This might be cell elements, td, th or captions
1093  if ( substr( $line, 0, 2 ) === '|+' ) {
1094  $first_character = '+';
1095  $line = substr( $line, 1 );
1096  }
1097 
1098  $line = substr( $line, 1 );
1099 
1100  if ( $first_character === '!' ) {
1101  $line = str_replace( '!!', '||', $line );
1102  }
1103 
1104  # Split up multiple cells on the same line.
1105  # FIXME : This can result in improper nesting of tags processed
1106  # by earlier parser steps, but should avoid splitting up eg
1107  # attribute values containing literal "||".
1108  $cells = StringUtils::explodeMarkup( '||', $line );
1109 
1110  $outLine = '';
1111 
1112  # Loop through each table cell
1113  foreach ( $cells as $cell ) {
1114  $previous = '';
1115  if ( $first_character !== '+' ) {
1116  $tr_after = array_pop( $tr_attributes );
1117  if ( !array_pop( $tr_history ) ) {
1118  $previous = "<tr{$tr_after}>\n";
1119  }
1120  array_push( $tr_history, true );
1121  array_push( $tr_attributes, '' );
1122  array_pop( $has_opened_tr );
1123  array_push( $has_opened_tr, true );
1124  }
1125 
1126  $last_tag = array_pop( $last_tag_history );
1127 
1128  if ( array_pop( $td_history ) ) {
1129  $previous = "</{$last_tag}>\n{$previous}";
1130  }
1131 
1132  if ( $first_character === '|' ) {
1133  $last_tag = 'td';
1134  } elseif ( $first_character === '!' ) {
1135  $last_tag = 'th';
1136  } elseif ( $first_character === '+' ) {
1137  $last_tag = 'caption';
1138  } else {
1139  $last_tag = '';
1140  }
1141 
1142  array_push( $last_tag_history, $last_tag );
1143 
1144  # A cell could contain both parameters and data
1145  $cell_data = explode( '|', $cell, 2 );
1146 
1147  # Bug 553: Note that a '|' inside an invalid link should not
1148  # be mistaken as delimiting cell parameters
1149  if ( strpos( $cell_data[0], '[[' ) !== false ) {
1150  $cell = "{$previous}<{$last_tag}>{$cell}";
1151  } elseif ( count( $cell_data ) == 1 ) {
1152  $cell = "{$previous}<{$last_tag}>{$cell_data[0]}";
1153  } else {
1154  $attributes = $this->mStripState->unstripBoth( $cell_data[0] );
1155  $attributes = Sanitizer::fixTagAttributes( $attributes, $last_tag );
1156  $cell = "{$previous}<{$last_tag}{$attributes}>{$cell_data[1]}";
1157  }
1158 
1159  $outLine .= $cell;
1160  array_push( $td_history, true );
1161  }
1162  }
1163  $out .= $outLine . "\n";
1164  }
1165 
1166  # Closing open td, tr && table
1167  while ( count( $td_history ) > 0 ) {
1168  if ( array_pop( $td_history ) ) {
1169  $out .= "</td>\n";
1170  }
1171  if ( array_pop( $tr_history ) ) {
1172  $out .= "</tr>\n";
1173  }
1174  if ( !array_pop( $has_opened_tr ) ) {
1175  $out .= "<tr><td></td></tr>\n";
1176  }
1177 
1178  $out .= "</table>\n";
1179  }
1180 
1181  # Remove trailing line-ending (b/c)
1182  if ( substr( $out, -1 ) === "\n" ) {
1183  $out = substr( $out, 0, -1 );
1184  }
1185 
1186  # special case: don't return empty table
1187  if ( $out === "<table>\n<tr><td></td></tr>\n</table>" ) {
1188  $out = '';
1189  }
1190 
1191  return $out;
1192  }
1193 
1206  public function internalParse( $text, $isMain = true, $frame = false ) {
1207 
1208  $origText = $text;
1209 
1210  # Hook to suspend the parser in this state
1211  if ( !Hooks::run( 'ParserBeforeInternalParse', array( &$this, &$text, &$this->mStripState ) ) ) {
1212  return $text;
1213  }
1214 
1215  # if $frame is provided, then use $frame for replacing any variables
1216  if ( $frame ) {
1217  # use frame depth to infer how include/noinclude tags should be handled
1218  # depth=0 means this is the top-level document; otherwise it's an included document
1219  if ( !$frame->depth ) {
1220  $flag = 0;
1221  } else {
1222  $flag = Parser::PTD_FOR_INCLUSION;
1223  }
1224  $dom = $this->preprocessToDom( $text, $flag );
1225  $text = $frame->expand( $dom );
1226  } else {
1227  # if $frame is not provided, then use old-style replaceVariables
1228  $text = $this->replaceVariables( $text );
1229  }
1230 
1231  Hooks::run( 'InternalParseBeforeSanitize', array( &$this, &$text, &$this->mStripState ) );
1232  $text = Sanitizer::removeHTMLtags(
1233  $text,
1234  array( &$this, 'attributeStripCallback' ),
1235  false,
1236  array_keys( $this->mTransparentTagHooks )
1237  );
1238  Hooks::run( 'InternalParseBeforeLinks', array( &$this, &$text, &$this->mStripState ) );
1239 
1240  # Tables need to come after variable replacement for things to work
1241  # properly; putting them before other transformations should keep
1242  # exciting things like link expansions from showing up in surprising
1243  # places.
1244  $text = $this->doTableStuff( $text );
1245 
1246  $text = preg_replace( '/(^|\n)-----*/', '\\1<hr />', $text );
1247 
1248  $text = $this->doDoubleUnderscore( $text );
1249 
1250  $text = $this->doHeadings( $text );
1251  $text = $this->replaceInternalLinks( $text );
1252  $text = $this->doAllQuotes( $text );
1253  $text = $this->replaceExternalLinks( $text );
1254 
1255  # replaceInternalLinks may sometimes leave behind
1256  # absolute URLs, which have to be masked to hide them from replaceExternalLinks
1257  $text = str_replace( $this->mUniqPrefix . 'NOPARSE', '', $text );
1258 
1259  $text = $this->doMagicLinks( $text );
1260  $text = $this->formatHeadings( $text, $origText, $isMain );
1261 
1262  return $text;
1263  }
1264 
1274  private function internalParseHalfParsed( $text, $isMain = true, $linestart = true ) {
1275  global $wgUseTidy, $wgAlwaysUseTidy;
1276 
1277  $text = $this->mStripState->unstripGeneral( $text );
1278 
1279  # Clean up special characters, only run once, next-to-last before doBlockLevels
1280  $fixtags = array(
1281  # french spaces, last one Guillemet-left
1282  # only if there is something before the space
1283  '/(.) (?=\\?|:|;|!|%|\\302\\273)/' => '\\1&#160;',
1284  # french spaces, Guillemet-right
1285  '/(\\302\\253) /' => '\\1&#160;',
1286  '/&#160;(!\s*important)/' => ' \\1', # Beware of CSS magic word !important, bug #11874.
1287  );
1288  $text = preg_replace( array_keys( $fixtags ), array_values( $fixtags ), $text );
1289 
1290  $text = $this->doBlockLevels( $text, $linestart );
1291 
1292  $this->replaceLinkHolders( $text );
1293 
1301  if ( !( $this->mOptions->getDisableContentConversion()
1302  || isset( $this->mDoubleUnderscores['nocontentconvert'] ) )
1303  ) {
1304  if ( !$this->mOptions->getInterfaceMessage() ) {
1305  # The position of the convert() call should not be changed. it
1306  # assumes that the links are all replaced and the only thing left
1307  # is the <nowiki> mark.
1308  $text = $this->getConverterLanguage()->convert( $text );
1309  }
1310  }
1311 
1312  $text = $this->mStripState->unstripNoWiki( $text );
1313 
1314  if ( $isMain ) {
1315  Hooks::run( 'ParserBeforeTidy', array( &$this, &$text ) );
1316  }
1317 
1318  $text = $this->replaceTransparentTags( $text );
1319  $text = $this->mStripState->unstripGeneral( $text );
1320 
1321  $text = Sanitizer::normalizeCharReferences( $text );
1322 
1323  if ( ( $wgUseTidy && $this->mOptions->getTidy() ) || $wgAlwaysUseTidy ) {
1324  $text = MWTidy::tidy( $text );
1325  } else {
1326  # attempt to sanitize at least some nesting problems
1327  # (bug #2702 and quite a few others)
1328  $tidyregs = array(
1329  # ''Something [http://www.cool.com cool''] -->
1330  # <i>Something</i><a href="http://www.cool.com"..><i>cool></i></a>
1331  '/(<([bi])>)(<([bi])>)?([^<]*)(<\/?a[^<]*>)([^<]*)(<\/\\4>)?(<\/\\2>)/' =>
1332  '\\1\\3\\5\\8\\9\\6\\1\\3\\7\\8\\9',
1333  # fix up an anchor inside another anchor, only
1334  # at least for a single single nested link (bug 3695)
1335  '/(<a[^>]+>)([^<]*)(<a[^>]+>[^<]*)<\/a>(.*)<\/a>/' =>
1336  '\\1\\2</a>\\3</a>\\1\\4</a>',
1337  # fix div inside inline elements- doBlockLevels won't wrap a line which
1338  # contains a div, so fix it up here; replace
1339  # div with escaped text
1340  '/(<([aib]) [^>]+>)([^<]*)(<div([^>]*)>)(.*)(<\/div>)([^<]*)(<\/\\2>)/' =>
1341  '\\1\\3&lt;div\\5&gt;\\6&lt;/div&gt;\\8\\9',
1342  # remove empty italic or bold tag pairs, some
1343  # introduced by rules above
1344  '/<([bi])><\/\\1>/' => '',
1345  );
1346 
1347  $text = preg_replace(
1348  array_keys( $tidyregs ),
1349  array_values( $tidyregs ),
1350  $text );
1351  }
1352 
1353  if ( $isMain ) {
1354  Hooks::run( 'ParserAfterTidy', array( &$this, &$text ) );
1355  }
1356 
1357  return $text;
1358  }
1359 
1371  public function doMagicLinks( $text ) {
1372  $prots = wfUrlProtocolsWithoutProtRel();
1373  $urlChar = self::EXT_LINK_URL_CLASS;
1374  $space = self::SPACE_NOT_NL; # non-newline space
1375  $spdash = "(?:-|$space)"; # a dash or a non-newline space
1376  $spaces = "$space++"; # possessive match of 1 or more spaces
1377  $text = preg_replace_callback(
1378  '!(?: # Start cases
1379  (<a[ \t\r\n>].*?</a>) | # m[1]: Skip link text
1380  (<.*?>) | # m[2]: Skip stuff inside HTML elements' . "
1381  (\b(?i:$prots)$urlChar+) | # m[3]: Free external links
1382  \b(?:RFC|PMID) $spaces # m[4]: RFC or PMID, capture number
1383  ([0-9]+)\b |
1384  \bISBN $spaces ( # m[5]: ISBN, capture number
1385  (?: 97[89] $spdash? )? # optional 13-digit ISBN prefix
1386  (?: [0-9] $spdash? ){9} # 9 digits with opt. delimiters
1387  [0-9Xx] # check digit
1388  )\b
1389  )!xu", array( &$this, 'magicLinkCallback' ), $text );
1390  return $text;
1391  }
1392 
1398  public function magicLinkCallback( $m ) {
1399  if ( isset( $m[1] ) && $m[1] !== '' ) {
1400  # Skip anchor
1401  return $m[0];
1402  } elseif ( isset( $m[2] ) && $m[2] !== '' ) {
1403  # Skip HTML element
1404  return $m[0];
1405  } elseif ( isset( $m[3] ) && $m[3] !== '' ) {
1406  # Free external link
1407  return $this->makeFreeExternalLink( $m[0] );
1408  } elseif ( isset( $m[4] ) && $m[4] !== '' ) {
1409  # RFC or PMID
1410  if ( substr( $m[0], 0, 3 ) === 'RFC' ) {
1411  $keyword = 'RFC';
1412  $urlmsg = 'rfcurl';
1413  $cssClass = 'mw-magiclink-rfc';
1414  $id = $m[4];
1415  } elseif ( substr( $m[0], 0, 4 ) === 'PMID' ) {
1416  $keyword = 'PMID';
1417  $urlmsg = 'pubmedurl';
1418  $cssClass = 'mw-magiclink-pmid';
1419  $id = $m[4];
1420  } else {
1421  throw new MWException( __METHOD__ . ': unrecognised match type "' .
1422  substr( $m[0], 0, 20 ) . '"' );
1423  }
1424  $url = wfMessage( $urlmsg, $id )->inContentLanguage()->text();
1425  return Linker::makeExternalLink( $url, "{$keyword} {$id}", true, $cssClass );
1426  } elseif ( isset( $m[5] ) && $m[5] !== '' ) {
1427  # ISBN
1428  $isbn = $m[5];
1429  $space = self::SPACE_NOT_NL; # non-newline space
1430  $isbn = preg_replace( "/$space/", ' ', $isbn );
1431  $num = strtr( $isbn, array(
1432  '-' => '',
1433  ' ' => '',
1434  'x' => 'X',
1435  ));
1436  $titleObj = SpecialPage::getTitleFor( 'Booksources', $num );
1437  return '<a href="' .
1438  htmlspecialchars( $titleObj->getLocalURL() ) .
1439  "\" class=\"internal mw-magiclink-isbn\">ISBN $isbn</a>";
1440  } else {
1441  return $m[0];
1442  }
1443  }
1444 
1453  public function makeFreeExternalLink( $url ) {
1454 
1455  $trail = '';
1456 
1457  # The characters '<' and '>' (which were escaped by
1458  # removeHTMLtags()) should not be included in
1459  # URLs, per RFC 2396.
1460  $m2 = array();
1461  if ( preg_match( '/&(lt|gt);/', $url, $m2, PREG_OFFSET_CAPTURE ) ) {
1462  $trail = substr( $url, $m2[0][1] ) . $trail;
1463  $url = substr( $url, 0, $m2[0][1] );
1464  }
1465 
1466  # Move trailing punctuation to $trail
1467  $sep = ',;\.:!?';
1468  # If there is no left bracket, then consider right brackets fair game too
1469  if ( strpos( $url, '(' ) === false ) {
1470  $sep .= ')';
1471  }
1472 
1473  $urlRev = strrev( $url );
1474  $numSepChars = strspn( $urlRev, $sep );
1475  # Don't break a trailing HTML entity by moving the ; into $trail
1476  # This is in hot code, so use substr_compare to avoid having to
1477  # create a new string object for the comparison
1478  if ( $numSepChars && substr_compare( $url, ";", -$numSepChars, 1 ) === 0) {
1479  # more optimization: instead of running preg_match with a $
1480  # anchor, which can be slow, do the match on the reversed
1481  # string starting at the desired offset.
1482  # un-reversed regexp is: /&([a-z]+|#x[\da-f]+|#\d+)$/i
1483  if ( preg_match( '/\G([a-z]+|[\da-f]+x#|\d+#)&/i', $urlRev, $m2, 0, $numSepChars ) ) {
1484  $numSepChars--;
1485  }
1486  }
1487  if ( $numSepChars ) {
1488  $trail = substr( $url, -$numSepChars ) . $trail;
1489  $url = substr( $url, 0, -$numSepChars );
1490  }
1491 
1492  $url = Sanitizer::cleanUrl( $url );
1493 
1494  # Is this an external image?
1495  $text = $this->maybeMakeExternalImage( $url );
1496  if ( $text === false ) {
1497  # Not an image, make a link
1498  $text = Linker::makeExternalLink( $url,
1499  $this->getConverterLanguage()->markNoConversion( $url, true ),
1500  true, 'free',
1501  $this->getExternalLinkAttribs( $url ) );
1502  # Register it in the output object...
1503  # Replace unnecessary URL escape codes with their equivalent characters
1504  $pasteurized = self::normalizeLinkUrl( $url );
1505  $this->mOutput->addExternalLink( $pasteurized );
1506  }
1507  return $text . $trail;
1508  }
1509 
1519  public function doHeadings( $text ) {
1520  for ( $i = 6; $i >= 1; --$i ) {
1521  $h = str_repeat( '=', $i );
1522  $text = preg_replace( "/^$h(.+)$h\\s*$/m", "<h$i>\\1</h$i>", $text );
1523  }
1524  return $text;
1525  }
1526 
1535  public function doAllQuotes( $text ) {
1536  $outtext = '';
1537  $lines = StringUtils::explode( "\n", $text );
1538  foreach ( $lines as $line ) {
1539  $outtext .= $this->doQuotes( $line ) . "\n";
1540  }
1541  $outtext = substr( $outtext, 0, -1 );
1542  return $outtext;
1543  }
1544 
1552  public function doQuotes( $text ) {
1553  $arr = preg_split( "/(''+)/", $text, -1, PREG_SPLIT_DELIM_CAPTURE );
1554  $countarr = count( $arr );
1555  if ( $countarr == 1 ) {
1556  return $text;
1557  }
1558 
1559  // First, do some preliminary work. This may shift some apostrophes from
1560  // being mark-up to being text. It also counts the number of occurrences
1561  // of bold and italics mark-ups.
1562  $numbold = 0;
1563  $numitalics = 0;
1564  for ( $i = 1; $i < $countarr; $i += 2 ) {
1565  $thislen = strlen( $arr[$i] );
1566  // If there are ever four apostrophes, assume the first is supposed to
1567  // be text, and the remaining three constitute mark-up for bold text.
1568  // (bug 13227: ''''foo'''' turns into ' ''' foo ' ''')
1569  if ( $thislen == 4 ) {
1570  $arr[$i - 1] .= "'";
1571  $arr[$i] = "'''";
1572  $thislen = 3;
1573  } elseif ( $thislen > 5 ) {
1574  // If there are more than 5 apostrophes in a row, assume they're all
1575  // text except for the last 5.
1576  // (bug 13227: ''''''foo'''''' turns into ' ''''' foo ' ''''')
1577  $arr[$i - 1] .= str_repeat( "'", $thislen - 5 );
1578  $arr[$i] = "'''''";
1579  $thislen = 5;
1580  }
1581  // Count the number of occurrences of bold and italics mark-ups.
1582  if ( $thislen == 2 ) {
1583  $numitalics++;
1584  } elseif ( $thislen == 3 ) {
1585  $numbold++;
1586  } elseif ( $thislen == 5 ) {
1587  $numitalics++;
1588  $numbold++;
1589  }
1590  }
1591 
1592  // If there is an odd number of both bold and italics, it is likely
1593  // that one of the bold ones was meant to be an apostrophe followed
1594  // by italics. Which one we cannot know for certain, but it is more
1595  // likely to be one that has a single-letter word before it.
1596  if ( ( $numbold % 2 == 1 ) && ( $numitalics % 2 == 1 ) ) {
1597  $firstsingleletterword = -1;
1598  $firstmultiletterword = -1;
1599  $firstspace = -1;
1600  for ( $i = 1; $i < $countarr; $i += 2 ) {
1601  if ( strlen( $arr[$i] ) == 3 ) {
1602  $x1 = substr( $arr[$i - 1], -1 );
1603  $x2 = substr( $arr[$i - 1], -2, 1 );
1604  if ( $x1 === ' ' ) {
1605  if ( $firstspace == -1 ) {
1606  $firstspace = $i;
1607  }
1608  } elseif ( $x2 === ' ' ) {
1609  if ( $firstsingleletterword == -1 ) {
1610  $firstsingleletterword = $i;
1611  // if $firstsingleletterword is set, we don't
1612  // look at the other options, so we can bail early.
1613  break;
1614  }
1615  } else {
1616  if ( $firstmultiletterword == -1 ) {
1617  $firstmultiletterword = $i;
1618  }
1619  }
1620  }
1621  }
1622 
1623  // If there is a single-letter word, use it!
1624  if ( $firstsingleletterword > -1 ) {
1625  $arr[$firstsingleletterword] = "''";
1626  $arr[$firstsingleletterword - 1] .= "'";
1627  } elseif ( $firstmultiletterword > -1 ) {
1628  // If not, but there's a multi-letter word, use that one.
1629  $arr[$firstmultiletterword] = "''";
1630  $arr[$firstmultiletterword - 1] .= "'";
1631  } elseif ( $firstspace > -1 ) {
1632  // ... otherwise use the first one that has neither.
1633  // (notice that it is possible for all three to be -1 if, for example,
1634  // there is only one pentuple-apostrophe in the line)
1635  $arr[$firstspace] = "''";
1636  $arr[$firstspace - 1] .= "'";
1637  }
1638  }
1639 
1640  // Now let's actually convert our apostrophic mush to HTML!
1641  $output = '';
1642  $buffer = '';
1643  $state = '';
1644  $i = 0;
1645  foreach ( $arr as $r ) {
1646  if ( ( $i % 2 ) == 0 ) {
1647  if ( $state === 'both' ) {
1648  $buffer .= $r;
1649  } else {
1650  $output .= $r;
1651  }
1652  } else {
1653  $thislen = strlen( $r );
1654  if ( $thislen == 2 ) {
1655  if ( $state === 'i' ) {
1656  $output .= '</i>';
1657  $state = '';
1658  } elseif ( $state === 'bi' ) {
1659  $output .= '</i>';
1660  $state = 'b';
1661  } elseif ( $state === 'ib' ) {
1662  $output .= '</b></i><b>';
1663  $state = 'b';
1664  } elseif ( $state === 'both' ) {
1665  $output .= '<b><i>' . $buffer . '</i>';
1666  $state = 'b';
1667  } else { // $state can be 'b' or ''
1668  $output .= '<i>';
1669  $state .= 'i';
1670  }
1671  } elseif ( $thislen == 3 ) {
1672  if ( $state === 'b' ) {
1673  $output .= '</b>';
1674  $state = '';
1675  } elseif ( $state === 'bi' ) {
1676  $output .= '</i></b><i>';
1677  $state = 'i';
1678  } elseif ( $state === 'ib' ) {
1679  $output .= '</b>';
1680  $state = 'i';
1681  } elseif ( $state === 'both' ) {
1682  $output .= '<i><b>' . $buffer . '</b>';
1683  $state = 'i';
1684  } else { // $state can be 'i' or ''
1685  $output .= '<b>';
1686  $state .= 'b';
1687  }
1688  } elseif ( $thislen == 5 ) {
1689  if ( $state === 'b' ) {
1690  $output .= '</b><i>';
1691  $state = 'i';
1692  } elseif ( $state === 'i' ) {
1693  $output .= '</i><b>';
1694  $state = 'b';
1695  } elseif ( $state === 'bi' ) {
1696  $output .= '</i></b>';
1697  $state = '';
1698  } elseif ( $state === 'ib' ) {
1699  $output .= '</b></i>';
1700  $state = '';
1701  } elseif ( $state === 'both' ) {
1702  $output .= '<i><b>' . $buffer . '</b></i>';
1703  $state = '';
1704  } else { // ($state == '')
1705  $buffer = '';
1706  $state = 'both';
1707  }
1708  }
1709  }
1710  $i++;
1711  }
1712  // Now close all remaining tags. Notice that the order is important.
1713  if ( $state === 'b' || $state === 'ib' ) {
1714  $output .= '</b>';
1715  }
1716  if ( $state === 'i' || $state === 'bi' || $state === 'ib' ) {
1717  $output .= '</i>';
1718  }
1719  if ( $state === 'bi' ) {
1720  $output .= '</b>';
1721  }
1722  // There might be lonely ''''', so make sure we have a buffer
1723  if ( $state === 'both' && $buffer ) {
1724  $output .= '<b><i>' . $buffer . '</i></b>';
1725  }
1726  return $output;
1727  }
1728 
1742  public function replaceExternalLinks( $text ) {
1743 
1744  $bits = preg_split( $this->mExtLinkBracketedRegex, $text, -1, PREG_SPLIT_DELIM_CAPTURE );
1745  if ( $bits === false ) {
1746  throw new MWException( "PCRE needs to be compiled with "
1747  . "--enable-unicode-properties in order for MediaWiki to function" );
1748  }
1749  $s = array_shift( $bits );
1750 
1751  $i = 0;
1752  while ( $i < count( $bits ) ) {
1753  $url = $bits[$i++];
1754  $i++; // protocol
1755  $text = $bits[$i++];
1756  $trail = $bits[$i++];
1757 
1758  # The characters '<' and '>' (which were escaped by
1759  # removeHTMLtags()) should not be included in
1760  # URLs, per RFC 2396.
1761  $m2 = array();
1762  if ( preg_match( '/&(lt|gt);/', $url, $m2, PREG_OFFSET_CAPTURE ) ) {
1763  $text = substr( $url, $m2[0][1] ) . ' ' . $text;
1764  $url = substr( $url, 0, $m2[0][1] );
1765  }
1766 
1767  # If the link text is an image URL, replace it with an <img> tag
1768  # This happened by accident in the original parser, but some people used it extensively
1769  $img = $this->maybeMakeExternalImage( $text );
1770  if ( $img !== false ) {
1771  $text = $img;
1772  }
1773 
1774  $dtrail = '';
1775 
1776  # Set linktype for CSS - if URL==text, link is essentially free
1777  $linktype = ( $text === $url ) ? 'free' : 'text';
1778 
1779  # No link text, e.g. [http://domain.tld/some.link]
1780  if ( $text == '' ) {
1781  # Autonumber
1782  $langObj = $this->getTargetLanguage();
1783  $text = '[' . $langObj->formatNum( ++$this->mAutonumber ) . ']';
1784  $linktype = 'autonumber';
1785  } else {
1786  # Have link text, e.g. [http://domain.tld/some.link text]s
1787  # Check for trail
1788  list( $dtrail, $trail ) = Linker::splitTrail( $trail );
1789  }
1790 
1791  $text = $this->getConverterLanguage()->markNoConversion( $text );
1792 
1793  $url = Sanitizer::cleanUrl( $url );
1794 
1795  # Use the encoded URL
1796  # This means that users can paste URLs directly into the text
1797  # Funny characters like ö aren't valid in URLs anyway
1798  # This was changed in August 2004
1799  $s .= Linker::makeExternalLink( $url, $text, false, $linktype,
1800  $this->getExternalLinkAttribs( $url ) ) . $dtrail . $trail;
1801 
1802  # Register link in the output object.
1803  # Replace unnecessary URL escape codes with the referenced character
1804  # This prevents spammers from hiding links from the filters
1805  $pasteurized = self::normalizeLinkUrl( $url );
1806  $this->mOutput->addExternalLink( $pasteurized );
1807  }
1808 
1809  return $s;
1810  }
1811 
1821  public static function getExternalLinkRel( $url = false, $title = null ) {
1822  global $wgNoFollowLinks, $wgNoFollowNsExceptions, $wgNoFollowDomainExceptions;
1823  $ns = $title ? $title->getNamespace() : false;
1824  if ( $wgNoFollowLinks && !in_array( $ns, $wgNoFollowNsExceptions )
1825  && !wfMatchesDomainList( $url, $wgNoFollowDomainExceptions )
1826  ) {
1827  return 'nofollow';
1828  }
1829  return null;
1830  }
1831 
1842  public function getExternalLinkAttribs( $url = false ) {
1843  $attribs = array();
1844  $attribs['rel'] = self::getExternalLinkRel( $url, $this->mTitle );
1845 
1846  if ( $this->mOptions->getExternalLinkTarget() ) {
1847  $attribs['target'] = $this->mOptions->getExternalLinkTarget();
1848  }
1849  return $attribs;
1850  }
1851 
1859  public static function replaceUnusualEscapes( $url ) {
1860  wfDeprecated( __METHOD__, '1.24' );
1861  return self::normalizeLinkUrl( $url );
1862  }
1863 
1873  public static function normalizeLinkUrl( $url ) {
1874  # First, make sure unsafe characters are encoded
1875  $url = preg_replace_callback( '/[\x00-\x20"<>\[\\\\\]^`{|}\x7F-\xFF]/',
1876  function ( $m ) {
1877  return rawurlencode( $m[0] );
1878  },
1879  $url
1880  );
1881 
1882  $ret = '';
1883  $end = strlen( $url );
1884 
1885  # Fragment part - 'fragment'
1886  $start = strpos( $url, '#' );
1887  if ( $start !== false && $start < $end ) {
1888  $ret = self::normalizeUrlComponent(
1889  substr( $url, $start, $end - $start ), '"#%<>[\]^`{|}' ) . $ret;
1890  $end = $start;
1891  }
1892 
1893  # Query part - 'query' minus &=+;
1894  $start = strpos( $url, '?' );
1895  if ( $start !== false && $start < $end ) {
1896  $ret = self::normalizeUrlComponent(
1897  substr( $url, $start, $end - $start ), '"#%<>[\]^`{|}&=+;' ) . $ret;
1898  $end = $start;
1899  }
1900 
1901  # Scheme and path part - 'pchar'
1902  # (we assume no userinfo or encoded colons in the host)
1903  $ret = self::normalizeUrlComponent(
1904  substr( $url, 0, $end ), '"#%<>[\]^`{|}/?' ) . $ret;
1905 
1906  return $ret;
1907  }
1908 
1909  private static function normalizeUrlComponent( $component, $unsafe ) {
1910  $callback = function ( $matches ) use ( $unsafe ) {
1911  $char = urldecode( $matches[0] );
1912  $ord = ord( $char );
1913  if ( $ord > 32 && $ord < 127 && strpos( $unsafe, $char ) === false ) {
1914  # Unescape it
1915  return $char;
1916  } else {
1917  # Leave it escaped, but use uppercase for a-f
1918  return strtoupper( $matches[0] );
1919  }
1920  };
1921  return preg_replace_callback( '/%[0-9A-Fa-f]{2}/', $callback, $component );
1922  }
1923 
1932  private function maybeMakeExternalImage( $url ) {
1933  $imagesfrom = $this->mOptions->getAllowExternalImagesFrom();
1934  $imagesexception = !empty( $imagesfrom );
1935  $text = false;
1936  # $imagesfrom could be either a single string or an array of strings, parse out the latter
1937  if ( $imagesexception && is_array( $imagesfrom ) ) {
1938  $imagematch = false;
1939  foreach ( $imagesfrom as $match ) {
1940  if ( strpos( $url, $match ) === 0 ) {
1941  $imagematch = true;
1942  break;
1943  }
1944  }
1945  } elseif ( $imagesexception ) {
1946  $imagematch = ( strpos( $url, $imagesfrom ) === 0 );
1947  } else {
1948  $imagematch = false;
1949  }
1950 
1951  if ( $this->mOptions->getAllowExternalImages()
1952  || ( $imagesexception && $imagematch )
1953  ) {
1954  if ( preg_match( self::EXT_IMAGE_REGEX, $url ) ) {
1955  # Image found
1956  $text = Linker::makeExternalImage( $url );
1957  }
1958  }
1959  if ( !$text && $this->mOptions->getEnableImageWhitelist()
1960  && preg_match( self::EXT_IMAGE_REGEX, $url )
1961  ) {
1962  $whitelist = explode(
1963  "\n",
1964  wfMessage( 'external_image_whitelist' )->inContentLanguage()->text()
1965  );
1966 
1967  foreach ( $whitelist as $entry ) {
1968  # Sanitize the regex fragment, make it case-insensitive, ignore blank entries/comments
1969  if ( strpos( $entry, '#' ) === 0 || $entry === '' ) {
1970  continue;
1971  }
1972  if ( preg_match( '/' . str_replace( '/', '\\/', $entry ) . '/i', $url ) ) {
1973  # Image matches a whitelist entry
1974  $text = Linker::makeExternalImage( $url );
1975  break;
1976  }
1977  }
1978  }
1979  return $text;
1980  }
1991  public function replaceInternalLinks( $s ) {
1992  $this->mLinkHolders->merge( $this->replaceInternalLinks2( $s ) );
1993  return $s;
1994  }
1995 
2004  public function replaceInternalLinks2( &$s ) {
2005  global $wgExtraInterlanguageLinkPrefixes;
2006 
2007  static $tc = false, $e1, $e1_img;
2008  # the % is needed to support urlencoded titles as well
2009  if ( !$tc ) {
2010  $tc = Title::legalChars() . '#%';
2011  # Match a link having the form [[namespace:link|alternate]]trail
2012  $e1 = "/^([{$tc}]+)(?:\\|(.+?))?]](.*)\$/sD";
2013  # Match cases where there is no "]]", which might still be images
2014  $e1_img = "/^([{$tc}]+)\\|(.*)\$/sD";
2015  }
2016 
2017  $holders = new LinkHolderArray( $this );
2018 
2019  # split the entire text string on occurrences of [[
2020  $a = StringUtils::explode( '[[', ' ' . $s );
2021  # get the first element (all text up to first [[), and remove the space we added
2022  $s = $a->current();
2023  $a->next();
2024  $line = $a->current(); # Workaround for broken ArrayIterator::next() that returns "void"
2025  $s = substr( $s, 1 );
2026 
2027  $useLinkPrefixExtension = $this->getTargetLanguage()->linkPrefixExtension();
2028  $e2 = null;
2029  if ( $useLinkPrefixExtension ) {
2030  # Match the end of a line for a word that's not followed by whitespace,
2031  # e.g. in the case of 'The Arab al[[Razi]]', 'al' will be matched
2033  $charset = $wgContLang->linkPrefixCharset();
2034  $e2 = "/^((?>.*[^$charset]|))(.+)$/sDu";
2035  }
2036 
2037  if ( is_null( $this->mTitle ) ) {
2038  throw new MWException( __METHOD__ . ": \$this->mTitle is null\n" );
2039  }
2040  $nottalk = !$this->mTitle->isTalkPage();
2041 
2042  if ( $useLinkPrefixExtension ) {
2043  $m = array();
2044  if ( preg_match( $e2, $s, $m ) ) {
2045  $first_prefix = $m[2];
2046  } else {
2047  $first_prefix = false;
2048  }
2049  } else {
2050  $prefix = '';
2051  }
2052 
2053  $useSubpages = $this->areSubpagesAllowed();
2054 
2055  // @codingStandardsIgnoreStart Squiz.WhiteSpace.SemicolonSpacing.Incorrect
2056  # Loop for each link
2057  for ( ; $line !== false && $line !== null; $a->next(), $line = $a->current() ) {
2058  // @codingStandardsIgnoreStart
2059 
2060  # Check for excessive memory usage
2061  if ( $holders->isBig() ) {
2062  # Too big
2063  # Do the existence check, replace the link holders and clear the array
2064  $holders->replace( $s );
2065  $holders->clear();
2066  }
2067 
2068  if ( $useLinkPrefixExtension ) {
2069  if ( preg_match( $e2, $s, $m ) ) {
2070  $prefix = $m[2];
2071  $s = $m[1];
2072  } else {
2073  $prefix = '';
2074  }
2075  # first link
2076  if ( $first_prefix ) {
2077  $prefix = $first_prefix;
2078  $first_prefix = false;
2079  }
2080  }
2081 
2082  $might_be_img = false;
2083 
2084  if ( preg_match( $e1, $line, $m ) ) { # page with normal text or alt
2085  $text = $m[2];
2086  # If we get a ] at the beginning of $m[3] that means we have a link that's something like:
2087  # [[Image:Foo.jpg|[http://example.com desc]]] <- having three ] in a row fucks up,
2088  # the real problem is with the $e1 regex
2089  # See bug 1300.
2090  #
2091  # Still some problems for cases where the ] is meant to be outside punctuation,
2092  # and no image is in sight. See bug 2095.
2093  #
2094  if ( $text !== ''
2095  && substr( $m[3], 0, 1 ) === ']'
2096  && strpos( $text, '[' ) !== false
2097  ) {
2098  $text .= ']'; # so that replaceExternalLinks($text) works later
2099  $m[3] = substr( $m[3], 1 );
2100  }
2101  # fix up urlencoded title texts
2102  if ( strpos( $m[1], '%' ) !== false ) {
2103  # Should anchors '#' also be rejected?
2104  $m[1] = str_replace( array( '<', '>' ), array( '&lt;', '&gt;' ), rawurldecode( $m[1] ) );
2105  }
2106  $trail = $m[3];
2107  } elseif ( preg_match( $e1_img, $line, $m ) ) {
2108  # Invalid, but might be an image with a link in its caption
2109  $might_be_img = true;
2110  $text = $m[2];
2111  if ( strpos( $m[1], '%' ) !== false ) {
2112  $m[1] = rawurldecode( $m[1] );
2113  }
2114  $trail = "";
2115  } else { # Invalid form; output directly
2116  $s .= $prefix . '[[' . $line;
2117  continue;
2118  }
2119 
2120  $origLink = $m[1];
2121 
2122  # Don't allow internal links to pages containing
2123  # PROTO: where PROTO is a valid URL protocol; these
2124  # should be external links.
2125  if ( preg_match( '/^(?i:' . $this->mUrlProtocols . ')/', $origLink ) ) {
2126  $s .= $prefix . '[[' . $line;
2127  continue;
2128  }
2129 
2130  # Make subpage if necessary
2131  if ( $useSubpages ) {
2132  $link = $this->maybeDoSubpageLink( $origLink, $text );
2133  } else {
2134  $link = $origLink;
2135  }
2136 
2137  $noforce = ( substr( $origLink, 0, 1 ) !== ':' );
2138  if ( !$noforce ) {
2139  # Strip off leading ':'
2140  $link = substr( $link, 1 );
2141  }
2142 
2143  $nt = Title::newFromText( $this->mStripState->unstripNoWiki( $link ) );
2144  if ( $nt === null ) {
2145  $s .= $prefix . '[[' . $line;
2146  continue;
2147  }
2148 
2149  $ns = $nt->getNamespace();
2150  $iw = $nt->getInterwiki();
2151 
2152  if ( $might_be_img ) { # if this is actually an invalid link
2153  if ( $ns == NS_FILE && $noforce ) { # but might be an image
2154  $found = false;
2155  while ( true ) {
2156  # look at the next 'line' to see if we can close it there
2157  $a->next();
2158  $next_line = $a->current();
2159  if ( $next_line === false || $next_line === null ) {
2160  break;
2161  }
2162  $m = explode( ']]', $next_line, 3 );
2163  if ( count( $m ) == 3 ) {
2164  # the first ]] closes the inner link, the second the image
2165  $found = true;
2166  $text .= "[[{$m[0]}]]{$m[1]}";
2167  $trail = $m[2];
2168  break;
2169  } elseif ( count( $m ) == 2 ) {
2170  # if there's exactly one ]] that's fine, we'll keep looking
2171  $text .= "[[{$m[0]}]]{$m[1]}";
2172  } else {
2173  # if $next_line is invalid too, we need look no further
2174  $text .= '[[' . $next_line;
2175  break;
2176  }
2177  }
2178  if ( !$found ) {
2179  # we couldn't find the end of this imageLink, so output it raw
2180  # but don't ignore what might be perfectly normal links in the text we've examined
2181  $holders->merge( $this->replaceInternalLinks2( $text ) );
2182  $s .= "{$prefix}[[$link|$text";
2183  # note: no $trail, because without an end, there *is* no trail
2184  continue;
2185  }
2186  } else { # it's not an image, so output it raw
2187  $s .= "{$prefix}[[$link|$text";
2188  # note: no $trail, because without an end, there *is* no trail
2189  continue;
2190  }
2191  }
2192 
2193  $wasblank = ( $text == '' );
2194  if ( $wasblank ) {
2195  $text = $link;
2196  } else {
2197  # Bug 4598 madness. Handle the quotes only if they come from the alternate part
2198  # [[Lista d''e paise d''o munno]] -> <a href="...">Lista d''e paise d''o munno</a>
2199  # [[Criticism of Harry Potter|Criticism of ''Harry Potter'']]
2200  # -> <a href="Criticism of Harry Potter">Criticism of <i>Harry Potter</i></a>
2201  $text = $this->doQuotes( $text );
2202  }
2203 
2204  # Link not escaped by : , create the various objects
2205  if ( $noforce && !$nt->wasLocalInterwiki() ) {
2206  # Interwikis
2207  if (
2208  $iw && $this->mOptions->getInterwikiMagic() && $nottalk && (
2209  Language::fetchLanguageName( $iw, null, 'mw' ) ||
2210  in_array( $iw, $wgExtraInterlanguageLinkPrefixes )
2211  )
2212  ) {
2213  # Bug 24502: filter duplicates
2214  if ( !isset( $this->mLangLinkLanguages[$iw] ) ) {
2215  $this->mLangLinkLanguages[$iw] = true;
2216  $this->mOutput->addLanguageLink( $nt->getFullText() );
2217  }
2218 
2219  $s = rtrim( $s . $prefix );
2220  $s .= trim( $trail, "\n" ) == '' ? '': $prefix . $trail;
2221  continue;
2222  }
2223 
2224  if ( $ns == NS_FILE ) {
2225  if ( !wfIsBadImage( $nt->getDBkey(), $this->mTitle ) ) {
2226  if ( $wasblank ) {
2227  # if no parameters were passed, $text
2228  # becomes something like "File:Foo.png",
2229  # which we don't want to pass on to the
2230  # image generator
2231  $text = '';
2232  } else {
2233  # recursively parse links inside the image caption
2234  # actually, this will parse them in any other parameters, too,
2235  # but it might be hard to fix that, and it doesn't matter ATM
2236  $text = $this->replaceExternalLinks( $text );
2237  $holders->merge( $this->replaceInternalLinks2( $text ) );
2238  }
2239  # cloak any absolute URLs inside the image markup, so replaceExternalLinks() won't touch them
2240  $s .= $prefix . $this->armorLinks(
2241  $this->makeImage( $nt, $text, $holders ) ) . $trail;
2242  } else {
2243  $s .= $prefix . $trail;
2244  }
2245  continue;
2246  }
2247 
2248  if ( $ns == NS_CATEGORY ) {
2249  $s = rtrim( $s . "\n" ); # bug 87
2250 
2251  if ( $wasblank ) {
2252  $sortkey = $this->getDefaultSort();
2253  } else {
2254  $sortkey = $text;
2255  }
2256  $sortkey = Sanitizer::decodeCharReferences( $sortkey );
2257  $sortkey = str_replace( "\n", '', $sortkey );
2258  $sortkey = $this->getConverterLanguage()->convertCategoryKey( $sortkey );
2259  $this->mOutput->addCategory( $nt->getDBkey(), $sortkey );
2260 
2264  $s .= trim( $prefix . $trail, "\n" ) == '' ? '' : $prefix . $trail;
2265 
2266  continue;
2267  }
2268  }
2269 
2270  # Self-link checking. For some languages, variants of the title are checked in
2271  # LinkHolderArray::doVariants() to allow batching the existence checks necessary
2272  # for linking to a different variant.
2273  if ( $ns != NS_SPECIAL && $nt->equals( $this->mTitle ) && !$nt->hasFragment() ) {
2274  $s .= $prefix . Linker::makeSelfLinkObj( $nt, $text, '', $trail );
2275  continue;
2276  }
2277 
2278  # NS_MEDIA is a pseudo-namespace for linking directly to a file
2279  # @todo FIXME: Should do batch file existence checks, see comment below
2280  if ( $ns == NS_MEDIA ) {
2281  # Give extensions a chance to select the file revision for us
2282  $options = array();
2283  $descQuery = false;
2284  Hooks::run( 'BeforeParserFetchFileAndTitle',
2285  array( $this, $nt, &$options, &$descQuery ) );
2286  # Fetch and register the file (file title may be different via hooks)
2287  list( $file, $nt ) = $this->fetchFileAndTitle( $nt, $options );
2288  # Cloak with NOPARSE to avoid replacement in replaceExternalLinks
2289  $s .= $prefix . $this->armorLinks(
2290  Linker::makeMediaLinkFile( $nt, $file, $text ) ) . $trail;
2291  continue;
2292  }
2293 
2294  # Some titles, such as valid special pages or files in foreign repos, should
2295  # be shown as bluelinks even though they're not included in the page table
2296  #
2297  # @todo FIXME: isAlwaysKnown() can be expensive for file links; we should really do
2298  # batch file existence checks for NS_FILE and NS_MEDIA
2299  if ( $iw == '' && $nt->isAlwaysKnown() ) {
2300  $this->mOutput->addLink( $nt );
2301  $s .= $this->makeKnownLinkHolder( $nt, $text, array(), $trail, $prefix );
2302  } else {
2303  # Links will be added to the output link list after checking
2304  $s .= $holders->makeHolder( $nt, $text, array(), $trail, $prefix );
2305  }
2306  }
2307  return $holders;
2308  }
2309 
2324  public function makeKnownLinkHolder( $nt, $text = '', $query = array(), $trail = '', $prefix = '' ) {
2325  list( $inside, $trail ) = Linker::splitTrail( $trail );
2327  if ( is_string( $query ) ) {
2328  $query = wfCgiToArray( $query );
2329  }
2330  if ( $text == '' ) {
2331  $text = htmlspecialchars( $nt->getPrefixedText() );
2332  }
2333 
2334  $link = Linker::linkKnown( $nt, "$prefix$text$inside", array(), $query );
2336  return $this->armorLinks( $link ) . $trail;
2337  }
2338 
2349  public function armorLinks( $text ) {
2350  return preg_replace( '/\b((?i)' . $this->mUrlProtocols . ')/',
2351  "{$this->mUniqPrefix}NOPARSE$1", $text );
2352  }
2353 
2358  public function areSubpagesAllowed() {
2359  # Some namespaces don't allow subpages
2360  return MWNamespace::hasSubpages( $this->mTitle->getNamespace() );
2361  }
2362 
2371  public function maybeDoSubpageLink( $target, &$text ) {
2372  return Linker::normalizeSubpageLink( $this->mTitle, $target, $text );
2373  }
2374 
2381  public function closeParagraph() {
2382  $result = '';
2383  if ( $this->mLastSection != '' ) {
2384  $result = '</' . $this->mLastSection . ">\n";
2385  }
2386  $this->mInPre = false;
2387  $this->mLastSection = '';
2388  return $result;
2389  }
2390 
2401  public function getCommon( $st1, $st2 ) {
2402  $fl = strlen( $st1 );
2403  $shorter = strlen( $st2 );
2404  if ( $fl < $shorter ) {
2405  $shorter = $fl;
2406  }
2407 
2408  for ( $i = 0; $i < $shorter; ++$i ) {
2409  if ( $st1[$i] != $st2[$i] ) {
2410  break;
2411  }
2412  }
2413  return $i;
2414  }
2415 
2425  public function openList( $char ) {
2426  $result = $this->closeParagraph();
2427 
2428  if ( '*' === $char ) {
2429  $result .= "<ul><li>";
2430  } elseif ( '#' === $char ) {
2431  $result .= "<ol><li>";
2432  } elseif ( ':' === $char ) {
2433  $result .= "<dl><dd>";
2434  } elseif ( ';' === $char ) {
2435  $result .= "<dl><dt>";
2436  $this->mDTopen = true;
2437  } else {
2438  $result = '<!-- ERR 1 -->';
2439  }
2440 
2441  return $result;
2442  }
2443 
2451  public function nextItem( $char ) {
2452  if ( '*' === $char || '#' === $char ) {
2453  return "</li>\n<li>";
2454  } elseif ( ':' === $char || ';' === $char ) {
2455  $close = "</dd>\n";
2456  if ( $this->mDTopen ) {
2457  $close = "</dt>\n";
2458  }
2459  if ( ';' === $char ) {
2460  $this->mDTopen = true;
2461  return $close . '<dt>';
2462  } else {
2463  $this->mDTopen = false;
2464  return $close . '<dd>';
2465  }
2466  }
2467  return '<!-- ERR 2 -->';
2468  }
2469 
2477  public function closeList( $char ) {
2478  if ( '*' === $char ) {
2479  $text = "</li></ul>";
2480  } elseif ( '#' === $char ) {
2481  $text = "</li></ol>";
2482  } elseif ( ':' === $char ) {
2483  if ( $this->mDTopen ) {
2484  $this->mDTopen = false;
2485  $text = "</dt></dl>";
2486  } else {
2487  $text = "</dd></dl>";
2488  }
2489  } else {
2490  return '<!-- ERR 3 -->';
2491  }
2492  return $text;
2493  }
2504  public function doBlockLevels( $text, $linestart ) {
2505 
2506  # Parsing through the text line by line. The main thing
2507  # happening here is handling of block-level elements p, pre,
2508  # and making lists from lines starting with * # : etc.
2509  #
2510  $textLines = StringUtils::explode( "\n", $text );
2511 
2512  $lastPrefix = $output = '';
2513  $this->mDTopen = $inBlockElem = false;
2514  $prefixLength = 0;
2515  $paragraphStack = false;
2516  $inBlockquote = false;
2517 
2518  foreach ( $textLines as $oLine ) {
2519  # Fix up $linestart
2520  if ( !$linestart ) {
2521  $output .= $oLine;
2522  $linestart = true;
2523  continue;
2524  }
2525  # * = ul
2526  # # = ol
2527  # ; = dt
2528  # : = dd
2529 
2530  $lastPrefixLength = strlen( $lastPrefix );
2531  $preCloseMatch = preg_match( '/<\\/pre/i', $oLine );
2532  $preOpenMatch = preg_match( '/<pre/i', $oLine );
2533  # If not in a <pre> element, scan for and figure out what prefixes are there.
2534  if ( !$this->mInPre ) {
2535  # Multiple prefixes may abut each other for nested lists.
2536  $prefixLength = strspn( $oLine, '*#:;' );
2537  $prefix = substr( $oLine, 0, $prefixLength );
2538 
2539  # eh?
2540  # ; and : are both from definition-lists, so they're equivalent
2541  # for the purposes of determining whether or not we need to open/close
2542  # elements.
2543  $prefix2 = str_replace( ';', ':', $prefix );
2544  $t = substr( $oLine, $prefixLength );
2545  $this->mInPre = (bool)$preOpenMatch;
2546  } else {
2547  # Don't interpret any other prefixes in preformatted text
2548  $prefixLength = 0;
2549  $prefix = $prefix2 = '';
2550  $t = $oLine;
2551  }
2552 
2553  # List generation
2554  if ( $prefixLength && $lastPrefix === $prefix2 ) {
2555  # Same as the last item, so no need to deal with nesting or opening stuff
2556  $output .= $this->nextItem( substr( $prefix, -1 ) );
2557  $paragraphStack = false;
2558 
2559  if ( substr( $prefix, -1 ) === ';' ) {
2560  # The one nasty exception: definition lists work like this:
2561  # ; title : definition text
2562  # So we check for : in the remainder text to split up the
2563  # title and definition, without b0rking links.
2564  $term = $t2 = '';
2565  if ( $this->findColonNoLinks( $t, $term, $t2 ) !== false ) {
2566  $t = $t2;
2567  $output .= $term . $this->nextItem( ':' );
2568  }
2569  }
2570  } elseif ( $prefixLength || $lastPrefixLength ) {
2571  # We need to open or close prefixes, or both.
2572 
2573  # Either open or close a level...
2574  $commonPrefixLength = $this->getCommon( $prefix, $lastPrefix );
2575  $paragraphStack = false;
2576 
2577  # Close all the prefixes which aren't shared.
2578  while ( $commonPrefixLength < $lastPrefixLength ) {
2579  $output .= $this->closeList( $lastPrefix[$lastPrefixLength - 1] );
2580  --$lastPrefixLength;
2581  }
2582 
2583  # Continue the current prefix if appropriate.
2584  if ( $prefixLength <= $commonPrefixLength && $commonPrefixLength > 0 ) {
2585  $output .= $this->nextItem( $prefix[$commonPrefixLength - 1] );
2586  }
2587 
2588  # Open prefixes where appropriate.
2589  if ( $lastPrefix && $prefixLength > $commonPrefixLength ) {
2590  $output .= "\n";
2591  }
2592  while ( $prefixLength > $commonPrefixLength ) {
2593  $char = substr( $prefix, $commonPrefixLength, 1 );
2594  $output .= $this->openList( $char );
2595 
2596  if ( ';' === $char ) {
2597  # @todo FIXME: This is dupe of code above
2598  if ( $this->findColonNoLinks( $t, $term, $t2 ) !== false ) {
2599  $t = $t2;
2600  $output .= $term . $this->nextItem( ':' );
2601  }
2602  }
2603  ++$commonPrefixLength;
2604  }
2605  if ( !$prefixLength && $lastPrefix ) {
2606  $output .= "\n";
2607  }
2608  $lastPrefix = $prefix2;
2609  }
2610 
2611  # If we have no prefixes, go to paragraph mode.
2612  if ( 0 == $prefixLength ) {
2613  # No prefix (not in list)--go to paragraph mode
2614  # XXX: use a stack for nestable elements like span, table and div
2615  $openmatch = preg_match(
2616  '/(?:<table|<h1|<h2|<h3|<h4|<h5|<h6|<pre|<tr|'
2617  . '<p|<ul|<ol|<dl|<li|<\\/tr|<\\/td|<\\/th)/iS',
2618  $t
2619  );
2620  $closematch = preg_match(
2621  '/(?:<\\/table|<\\/h1|<\\/h2|<\\/h3|<\\/h4|<\\/h5|<\\/h6|'
2622  . '<td|<th|<\\/?blockquote|<\\/?div|<hr|<\\/pre|<\\/p|<\\/mw:|'
2623  . $this->mUniqPrefix
2624  . '-pre|<\\/li|<\\/ul|<\\/ol|<\\/dl|<\\/?center)/iS',
2625  $t
2626  );
2627 
2628  if ( $openmatch || $closematch ) {
2629  $paragraphStack = false;
2630  # @todo bug 5718: paragraph closed
2631  $output .= $this->closeParagraph();
2632  if ( $preOpenMatch && !$preCloseMatch ) {
2633  $this->mInPre = true;
2634  }
2635  $bqOffset = 0;
2636  while ( preg_match( '/<(\\/?)blockquote[\s>]/i', $t, $bqMatch, PREG_OFFSET_CAPTURE, $bqOffset ) ) {
2637  $inBlockquote = !$bqMatch[1][0]; // is this a close tag?
2638  $bqOffset = $bqMatch[0][1] + strlen( $bqMatch[0][0] );
2639  }
2640  $inBlockElem = !$closematch;
2641  } elseif ( !$inBlockElem && !$this->mInPre ) {
2642  if ( ' ' == substr( $t, 0, 1 )
2643  && ( $this->mLastSection === 'pre' || trim( $t ) != '' )
2644  && !$inBlockquote
2645  ) {
2646  # pre
2647  if ( $this->mLastSection !== 'pre' ) {
2648  $paragraphStack = false;
2649  $output .= $this->closeParagraph() . '<pre>';
2650  $this->mLastSection = 'pre';
2651  }
2652  $t = substr( $t, 1 );
2653  } else {
2654  # paragraph
2655  if ( trim( $t ) === '' ) {
2656  if ( $paragraphStack ) {
2657  $output .= $paragraphStack . '<br />';
2658  $paragraphStack = false;
2659  $this->mLastSection = 'p';
2660  } else {
2661  if ( $this->mLastSection !== 'p' ) {
2662  $output .= $this->closeParagraph();
2663  $this->mLastSection = '';
2664  $paragraphStack = '<p>';
2665  } else {
2666  $paragraphStack = '</p><p>';
2667  }
2668  }
2669  } else {
2670  if ( $paragraphStack ) {
2671  $output .= $paragraphStack;
2672  $paragraphStack = false;
2673  $this->mLastSection = 'p';
2674  } elseif ( $this->mLastSection !== 'p' ) {
2675  $output .= $this->closeParagraph() . '<p>';
2676  $this->mLastSection = 'p';
2677  }
2678  }
2679  }
2680  }
2681  }
2682  # somewhere above we forget to get out of pre block (bug 785)
2683  if ( $preCloseMatch && $this->mInPre ) {
2684  $this->mInPre = false;
2685  }
2686  if ( $paragraphStack === false ) {
2687  $output .= $t;
2688  if ( $prefixLength === 0 ) {
2689  $output .= "\n";
2690  }
2691  }
2692  }
2693  while ( $prefixLength ) {
2694  $output .= $this->closeList( $prefix2[$prefixLength - 1] );
2695  --$prefixLength;
2696  if ( !$prefixLength ) {
2697  $output .= "\n";
2698  }
2699  }
2700  if ( $this->mLastSection != '' ) {
2701  $output .= '</' . $this->mLastSection . '>';
2702  $this->mLastSection = '';
2703  }
2704 
2705  return $output;
2706  }
2707 
2718  public function findColonNoLinks( $str, &$before, &$after ) {
2719 
2720  $pos = strpos( $str, ':' );
2721  if ( $pos === false ) {
2722  # Nothing to find!
2723  return false;
2724  }
2725 
2726  $lt = strpos( $str, '<' );
2727  if ( $lt === false || $lt > $pos ) {
2728  # Easy; no tag nesting to worry about
2729  $before = substr( $str, 0, $pos );
2730  $after = substr( $str, $pos + 1 );
2731  return $pos;
2732  }
2733 
2734  # Ugly state machine to walk through avoiding tags.
2735  $state = self::COLON_STATE_TEXT;
2736  $stack = 0;
2737  $len = strlen( $str );
2738  for ( $i = 0; $i < $len; $i++ ) {
2739  $c = $str[$i];
2740 
2741  switch ( $state ) {
2742  # (Using the number is a performance hack for common cases)
2743  case 0: # self::COLON_STATE_TEXT:
2744  switch ( $c ) {
2745  case "<":
2746  # Could be either a <start> tag or an </end> tag
2747  $state = self::COLON_STATE_TAGSTART;
2748  break;
2749  case ":":
2750  if ( $stack == 0 ) {
2751  # We found it!
2752  $before = substr( $str, 0, $i );
2753  $after = substr( $str, $i + 1 );
2754  return $i;
2755  }
2756  # Embedded in a tag; don't break it.
2757  break;
2758  default:
2759  # Skip ahead looking for something interesting
2760  $colon = strpos( $str, ':', $i );
2761  if ( $colon === false ) {
2762  # Nothing else interesting
2763  return false;
2764  }
2765  $lt = strpos( $str, '<', $i );
2766  if ( $stack === 0 ) {
2767  if ( $lt === false || $colon < $lt ) {
2768  # We found it!
2769  $before = substr( $str, 0, $colon );
2770  $after = substr( $str, $colon + 1 );
2771  return $i;
2772  }
2773  }
2774  if ( $lt === false ) {
2775  # Nothing else interesting to find; abort!
2776  # We're nested, but there's no close tags left. Abort!
2777  break 2;
2778  }
2779  # Skip ahead to next tag start
2780  $i = $lt;
2781  $state = self::COLON_STATE_TAGSTART;
2782  }
2783  break;
2784  case 1: # self::COLON_STATE_TAG:
2785  # In a <tag>
2786  switch ( $c ) {
2787  case ">":
2788  $stack++;
2789  $state = self::COLON_STATE_TEXT;
2790  break;
2791  case "/":
2792  # Slash may be followed by >?
2793  $state = self::COLON_STATE_TAGSLASH;
2794  break;
2795  default:
2796  # ignore
2797  }
2798  break;
2799  case 2: # self::COLON_STATE_TAGSTART:
2800  switch ( $c ) {
2801  case "/":
2802  $state = self::COLON_STATE_CLOSETAG;
2803  break;
2804  case "!":
2805  $state = self::COLON_STATE_COMMENT;
2806  break;
2807  case ">":
2808  # Illegal early close? This shouldn't happen D:
2809  $state = self::COLON_STATE_TEXT;
2810  break;
2811  default:
2812  $state = self::COLON_STATE_TAG;
2813  }
2814  break;
2815  case 3: # self::COLON_STATE_CLOSETAG:
2816  # In a </tag>
2817  if ( $c === ">" ) {
2818  $stack--;
2819  if ( $stack < 0 ) {
2820  wfDebug( __METHOD__ . ": Invalid input; too many close tags\n" );
2821  return false;
2822  }
2823  $state = self::COLON_STATE_TEXT;
2824  }
2825  break;
2826  case self::COLON_STATE_TAGSLASH:
2827  if ( $c === ">" ) {
2828  # Yes, a self-closed tag <blah/>
2829  $state = self::COLON_STATE_TEXT;
2830  } else {
2831  # Probably we're jumping the gun, and this is an attribute
2832  $state = self::COLON_STATE_TAG;
2833  }
2834  break;
2835  case 5: # self::COLON_STATE_COMMENT:
2836  if ( $c === "-" ) {
2837  $state = self::COLON_STATE_COMMENTDASH;
2838  }
2839  break;
2840  case self::COLON_STATE_COMMENTDASH:
2841  if ( $c === "-" ) {
2842  $state = self::COLON_STATE_COMMENTDASHDASH;
2843  } else {
2844  $state = self::COLON_STATE_COMMENT;
2845  }
2846  break;
2847  case self::COLON_STATE_COMMENTDASHDASH:
2848  if ( $c === ">" ) {
2849  $state = self::COLON_STATE_TEXT;
2850  } else {
2851  $state = self::COLON_STATE_COMMENT;
2852  }
2853  break;
2854  default:
2855  throw new MWException( "State machine error in " . __METHOD__ );
2856  }
2857  }
2858  if ( $stack > 0 ) {
2859  wfDebug( __METHOD__ . ": Invalid input; not enough close tags (stack $stack, state $state)\n" );
2860  return false;
2861  }
2862  return false;
2863  }
2864 
2876  public function getVariableValue( $index, $frame = false ) {
2877  global $wgContLang, $wgSitename, $wgServer, $wgServerName;
2878  global $wgArticlePath, $wgScriptPath, $wgStylePath;
2879 
2880  if ( is_null( $this->mTitle ) ) {
2881  // If no title set, bad things are going to happen
2882  // later. Title should always be set since this
2883  // should only be called in the middle of a parse
2884  // operation (but the unit-tests do funky stuff)
2885  throw new MWException( __METHOD__ . ' Should only be '
2886  . ' called while parsing (no title set)' );
2887  }
2888 
2893  if ( Hooks::run( 'ParserGetVariableValueVarCache', array( &$this, &$this->mVarCache ) ) ) {
2894  if ( isset( $this->mVarCache[$index] ) ) {
2895  return $this->mVarCache[$index];
2896  }
2897  }
2898 
2899  $ts = wfTimestamp( TS_UNIX, $this->mOptions->getTimestamp() );
2900  Hooks::run( 'ParserGetVariableValueTs', array( &$this, &$ts ) );
2901 
2902  $pageLang = $this->getFunctionLang();
2903 
2904  switch ( $index ) {
2905  case '!':
2906  $value = '|';
2907  break;
2908  case 'currentmonth':
2909  $value = $pageLang->formatNum( MWTimestamp::getInstance( $ts )->format( 'm' ) );
2910  break;
2911  case 'currentmonth1':
2912  $value = $pageLang->formatNum( MWTimestamp::getInstance( $ts )->format( 'n' ) );
2913  break;
2914  case 'currentmonthname':
2915  $value = $pageLang->getMonthName( MWTimestamp::getInstance( $ts )->format( 'n' ) );
2916  break;
2917  case 'currentmonthnamegen':
2918  $value = $pageLang->getMonthNameGen( MWTimestamp::getInstance( $ts )->format( 'n' ) );
2919  break;
2920  case 'currentmonthabbrev':
2921  $value = $pageLang->getMonthAbbreviation( MWTimestamp::getInstance( $ts )->format( 'n' ) );
2922  break;
2923  case 'currentday':
2924  $value = $pageLang->formatNum( MWTimestamp::getInstance( $ts )->format( 'j' ) );
2925  break;
2926  case 'currentday2':
2927  $value = $pageLang->formatNum( MWTimestamp::getInstance( $ts )->format( 'd' ) );
2928  break;
2929  case 'localmonth':
2930  $value = $pageLang->formatNum( MWTimestamp::getLocalInstance( $ts )->format( 'm' ) );
2931  break;
2932  case 'localmonth1':
2933  $value = $pageLang->formatNum( MWTimestamp::getLocalInstance( $ts )->format( 'n' ) );
2934  break;
2935  case 'localmonthname':
2936  $value = $pageLang->getMonthName( MWTimestamp::getLocalInstance( $ts )->format( 'n' ) );
2937  break;
2938  case 'localmonthnamegen':
2939  $value = $pageLang->getMonthNameGen( MWTimestamp::getLocalInstance( $ts )->format( 'n' ) );
2940  break;
2941  case 'localmonthabbrev':
2942  $value = $pageLang->getMonthAbbreviation( MWTimestamp::getLocalInstance( $ts )->format( 'n' ) );
2943  break;
2944  case 'localday':
2945  $value = $pageLang->formatNum( MWTimestamp::getLocalInstance( $ts )->format( 'j' ) );
2946  break;
2947  case 'localday2':
2948  $value = $pageLang->formatNum( MWTimestamp::getLocalInstance( $ts )->format( 'd' ) );
2949  break;
2950  case 'pagename':
2951  $value = wfEscapeWikiText( $this->mTitle->getText() );
2952  break;
2953  case 'pagenamee':
2954  $value = wfEscapeWikiText( $this->mTitle->getPartialURL() );
2955  break;
2956  case 'fullpagename':
2957  $value = wfEscapeWikiText( $this->mTitle->getPrefixedText() );
2958  break;
2959  case 'fullpagenamee':
2960  $value = wfEscapeWikiText( $this->mTitle->getPrefixedURL() );
2961  break;
2962  case 'subpagename':
2963  $value = wfEscapeWikiText( $this->mTitle->getSubpageText() );
2964  break;
2965  case 'subpagenamee':
2966  $value = wfEscapeWikiText( $this->mTitle->getSubpageUrlForm() );
2967  break;
2968  case 'rootpagename':
2969  $value = wfEscapeWikiText( $this->mTitle->getRootText() );
2970  break;
2971  case 'rootpagenamee':
2972  $value = wfEscapeWikiText( wfUrlEncode( str_replace(
2973  ' ',
2974  '_',
2975  $this->mTitle->getRootText()
2976  ) ) );
2977  break;
2978  case 'basepagename':
2979  $value = wfEscapeWikiText( $this->mTitle->getBaseText() );
2980  break;
2981  case 'basepagenamee':
2982  $value = wfEscapeWikiText( wfUrlEncode( str_replace(
2983  ' ',
2984  '_',
2985  $this->mTitle->getBaseText()
2986  ) ) );
2987  break;
2988  case 'talkpagename':
2989  if ( $this->mTitle->canTalk() ) {
2990  $talkPage = $this->mTitle->getTalkPage();
2991  $value = wfEscapeWikiText( $talkPage->getPrefixedText() );
2992  } else {
2993  $value = '';
2994  }
2995  break;
2996  case 'talkpagenamee':
2997  if ( $this->mTitle->canTalk() ) {
2998  $talkPage = $this->mTitle->getTalkPage();
2999  $value = wfEscapeWikiText( $talkPage->getPrefixedURL() );
3000  } else {
3001  $value = '';
3002  }
3003  break;
3004  case 'subjectpagename':
3005  $subjPage = $this->mTitle->getSubjectPage();
3006  $value = wfEscapeWikiText( $subjPage->getPrefixedText() );
3007  break;
3008  case 'subjectpagenamee':
3009  $subjPage = $this->mTitle->getSubjectPage();
3010  $value = wfEscapeWikiText( $subjPage->getPrefixedURL() );
3011  break;
3012  case 'pageid': // requested in bug 23427
3013  $pageid = $this->getTitle()->getArticleID();
3014  if ( $pageid == 0 ) {
3015  # 0 means the page doesn't exist in the database,
3016  # which means the user is previewing a new page.
3017  # The vary-revision flag must be set, because the magic word
3018  # will have a different value once the page is saved.
3019  $this->mOutput->setFlag( 'vary-revision' );
3020  wfDebug( __METHOD__ . ": {{PAGEID}} used in a new page, setting vary-revision...\n" );
3021  }
3022  $value = $pageid ? $pageid : null;
3023  break;
3024  case 'revisionid':
3025  # Let the edit saving system know we should parse the page
3026  # *after* a revision ID has been assigned.
3027  $this->mOutput->setFlag( 'vary-revision' );
3028  wfDebug( __METHOD__ . ": {{REVISIONID}} used, setting vary-revision...\n" );
3029  $value = $this->mRevisionId;
3030  break;
3031  case 'revisionday':
3032  # Let the edit saving system know we should parse the page
3033  # *after* a revision ID has been assigned. This is for null edits.
3034  $this->mOutput->setFlag( 'vary-revision' );
3035  wfDebug( __METHOD__ . ": {{REVISIONDAY}} used, setting vary-revision...\n" );
3036  $value = intval( substr( $this->getRevisionTimestamp(), 6, 2 ) );
3037  break;
3038  case 'revisionday2':
3039  # Let the edit saving system know we should parse the page
3040  # *after* a revision ID has been assigned. This is for null edits.
3041  $this->mOutput->setFlag( 'vary-revision' );
3042  wfDebug( __METHOD__ . ": {{REVISIONDAY2}} used, setting vary-revision...\n" );
3043  $value = substr( $this->getRevisionTimestamp(), 6, 2 );
3044  break;
3045  case 'revisionmonth':
3046  # Let the edit saving system know we should parse the page
3047  # *after* a revision ID has been assigned. This is for null edits.
3048  $this->mOutput->setFlag( 'vary-revision' );
3049  wfDebug( __METHOD__ . ": {{REVISIONMONTH}} used, setting vary-revision...\n" );
3050  $value = substr( $this->getRevisionTimestamp(), 4, 2 );
3051  break;
3052  case 'revisionmonth1':
3053  # Let the edit saving system know we should parse the page
3054  # *after* a revision ID has been assigned. This is for null edits.
3055  $this->mOutput->setFlag( 'vary-revision' );
3056  wfDebug( __METHOD__ . ": {{REVISIONMONTH1}} used, setting vary-revision...\n" );
3057  $value = intval( substr( $this->getRevisionTimestamp(), 4, 2 ) );
3058  break;
3059  case 'revisionyear':
3060  # Let the edit saving system know we should parse the page
3061  # *after* a revision ID has been assigned. This is for null edits.
3062  $this->mOutput->setFlag( 'vary-revision' );
3063  wfDebug( __METHOD__ . ": {{REVISIONYEAR}} used, setting vary-revision...\n" );
3064  $value = substr( $this->getRevisionTimestamp(), 0, 4 );
3065  break;
3066  case 'revisiontimestamp':
3067  # Let the edit saving system know we should parse the page
3068  # *after* a revision ID has been assigned. This is for null edits.
3069  $this->mOutput->setFlag( 'vary-revision' );
3070  wfDebug( __METHOD__ . ": {{REVISIONTIMESTAMP}} used, setting vary-revision...\n" );
3071  $value = $this->getRevisionTimestamp();
3072  break;
3073  case 'revisionuser':
3074  # Let the edit saving system know we should parse the page
3075  # *after* a revision ID has been assigned. This is for null edits.
3076  $this->mOutput->setFlag( 'vary-revision' );
3077  wfDebug( __METHOD__ . ": {{REVISIONUSER}} used, setting vary-revision...\n" );
3078  $value = $this->getRevisionUser();
3079  break;
3080  case 'revisionsize':
3081  # Let the edit saving system know we should parse the page
3082  # *after* a revision ID has been assigned. This is for null edits.
3083  $this->mOutput->setFlag( 'vary-revision' );
3084  wfDebug( __METHOD__ . ": {{REVISIONSIZE}} used, setting vary-revision...\n" );
3085  $value = $this->getRevisionSize();
3086  break;
3087  case 'namespace':
3088  $value = str_replace( '_', ' ', $wgContLang->getNsText( $this->mTitle->getNamespace() ) );
3089  break;
3090  case 'namespacee':
3091  $value = wfUrlencode( $wgContLang->getNsText( $this->mTitle->getNamespace() ) );
3092  break;
3093  case 'namespacenumber':
3094  $value = $this->mTitle->getNamespace();
3095  break;
3096  case 'talkspace':
3097  $value = $this->mTitle->canTalk()
3098  ? str_replace( '_', ' ', $this->mTitle->getTalkNsText() )
3099  : '';
3100  break;
3101  case 'talkspacee':
3102  $value = $this->mTitle->canTalk() ? wfUrlencode( $this->mTitle->getTalkNsText() ) : '';
3103  break;
3104  case 'subjectspace':
3105  $value = str_replace( '_', ' ', $this->mTitle->getSubjectNsText() );
3106  break;
3107  case 'subjectspacee':
3108  $value = ( wfUrlencode( $this->mTitle->getSubjectNsText() ) );
3109  break;
3110  case 'currentdayname':
3111  $value = $pageLang->getWeekdayName( (int)MWTimestamp::getInstance( $ts )->format( 'w' ) + 1 );
3112  break;
3113  case 'currentyear':
3114  $value = $pageLang->formatNum( MWTimestamp::getInstance( $ts )->format( 'Y' ), true );
3115  break;
3116  case 'currenttime':
3117  $value = $pageLang->time( wfTimestamp( TS_MW, $ts ), false, false );
3118  break;
3119  case 'currenthour':
3120  $value = $pageLang->formatNum( MWTimestamp::getInstance( $ts )->format( 'H' ), true );
3121  break;
3122  case 'currentweek':
3123  # @bug 4594 PHP5 has it zero padded, PHP4 does not, cast to
3124  # int to remove the padding
3125  $value = $pageLang->formatNum( (int)MWTimestamp::getInstance( $ts )->format( 'W' ) );
3126  break;
3127  case 'currentdow':
3128  $value = $pageLang->formatNum( MWTimestamp::getInstance( $ts )->format( 'w' ) );
3129  break;
3130  case 'localdayname':
3131  $value = $pageLang->getWeekdayName(
3132  (int)MWTimestamp::getLocalInstance( $ts )->format( 'w' ) + 1
3133  );
3134  break;
3135  case 'localyear':
3136  $value = $pageLang->formatNum( MWTimestamp::getLocalInstance( $ts )->format( 'Y' ), true );
3137  break;
3138  case 'localtime':
3139  $value = $pageLang->time(
3140  MWTimestamp::getLocalInstance( $ts )->format( 'YmdHis' ),
3141  false,
3142  false
3143  );
3144  break;
3145  case 'localhour':
3146  $value = $pageLang->formatNum( MWTimestamp::getLocalInstance( $ts )->format( 'H' ), true );
3147  break;
3148  case 'localweek':
3149  # @bug 4594 PHP5 has it zero padded, PHP4 does not, cast to
3150  # int to remove the padding
3151  $value = $pageLang->formatNum( (int)MWTimestamp::getLocalInstance( $ts )->format( 'W' ) );
3152  break;
3153  case 'localdow':
3154  $value = $pageLang->formatNum( MWTimestamp::getLocalInstance( $ts )->format( 'w' ) );
3155  break;
3156  case 'numberofarticles':
3157  $value = $pageLang->formatNum( SiteStats::articles() );
3158  break;
3159  case 'numberoffiles':
3160  $value = $pageLang->formatNum( SiteStats::images() );
3161  break;
3162  case 'numberofusers':
3163  $value = $pageLang->formatNum( SiteStats::users() );
3164  break;
3165  case 'numberofactiveusers':
3166  $value = $pageLang->formatNum( SiteStats::activeUsers() );
3167  break;
3168  case 'numberofpages':
3169  $value = $pageLang->formatNum( SiteStats::pages() );
3170  break;
3171  case 'numberofadmins':
3172  $value = $pageLang->formatNum( SiteStats::numberingroup( 'sysop' ) );
3173  break;
3174  case 'numberofedits':
3175  $value = $pageLang->formatNum( SiteStats::edits() );
3176  break;
3177  case 'currenttimestamp':
3178  $value = wfTimestamp( TS_MW, $ts );
3179  break;
3180  case 'localtimestamp':
3181  $value = MWTimestamp::getLocalInstance( $ts )->format( 'YmdHis' );
3182  break;
3183  case 'currentversion':
3185  break;
3186  case 'articlepath':
3187  return $wgArticlePath;
3188  case 'sitename':
3189  return $wgSitename;
3190  case 'server':
3191  return $wgServer;
3192  case 'servername':
3193  return $wgServerName;
3194  case 'scriptpath':
3195  return $wgScriptPath;
3196  case 'stylepath':
3197  return $wgStylePath;
3198  case 'directionmark':
3199  return $pageLang->getDirMark();
3200  case 'contentlanguage':
3201  global $wgLanguageCode;
3202  return $wgLanguageCode;
3203  case 'cascadingsources':
3205  break;
3206  default:
3207  $ret = null;
3208  Hooks::run(
3209  'ParserGetVariableValueSwitch',
3210  array( &$this, &$this->mVarCache, &$index, &$ret, &$frame )
3211  );
3212 
3213  return $ret;
3214  }
3215 
3216  if ( $index ) {
3217  $this->mVarCache[$index] = $value;
3218  }
3219 
3220  return $value;
3221  }
3222 
3228  public function initialiseVariables() {
3229  $variableIDs = MagicWord::getVariableIDs();
3230  $substIDs = MagicWord::getSubstIDs();
3231 
3232  $this->mVariables = new MagicWordArray( $variableIDs );
3233  $this->mSubstWords = new MagicWordArray( $substIDs );
3234  }
3258  public function preprocessToDom( $text, $flags = 0 ) {
3259  $dom = $this->getPreprocessor()->preprocessToObj( $text, $flags );
3260  return $dom;
3261  }
3262 
3270  public static function splitWhitespace( $s ) {
3271  $ltrimmed = ltrim( $s );
3272  $w1 = substr( $s, 0, strlen( $s ) - strlen( $ltrimmed ) );
3273  $trimmed = rtrim( $ltrimmed );
3274  $diff = strlen( $ltrimmed ) - strlen( $trimmed );
3275  if ( $diff > 0 ) {
3276  $w2 = substr( $ltrimmed, -$diff );
3277  } else {
3278  $w2 = '';
3279  }
3280  return array( $w1, $trimmed, $w2 );
3281  }
3282 
3303  public function replaceVariables( $text, $frame = false, $argsOnly = false ) {
3304  # Is there any text? Also, Prevent too big inclusions!
3305  if ( strlen( $text ) < 1 || strlen( $text ) > $this->mOptions->getMaxIncludeSize() ) {
3306  return $text;
3307  }
3309  if ( $frame === false ) {
3310  $frame = $this->getPreprocessor()->newFrame();
3311  } elseif ( !( $frame instanceof PPFrame ) ) {
3312  wfDebug( __METHOD__ . " called using plain parameters instead of "
3313  . "a PPFrame instance. Creating custom frame.\n" );
3314  $frame = $this->getPreprocessor()->newCustomFrame( $frame );
3315  }
3316 
3317  $dom = $this->preprocessToDom( $text );
3318  $flags = $argsOnly ? PPFrame::NO_TEMPLATES : 0;
3319  $text = $frame->expand( $dom, $flags );
3320 
3321  return $text;
3322  }
3323 
3331  public static function createAssocArgs( $args ) {
3332  $assocArgs = array();
3333  $index = 1;
3334  foreach ( $args as $arg ) {
3335  $eqpos = strpos( $arg, '=' );
3336  if ( $eqpos === false ) {
3337  $assocArgs[$index++] = $arg;
3338  } else {
3339  $name = trim( substr( $arg, 0, $eqpos ) );
3340  $value = trim( substr( $arg, $eqpos + 1 ) );
3341  if ( $value === false ) {
3342  $value = '';
3343  }
3344  if ( $name !== false ) {
3345  $assocArgs[$name] = $value;
3346  }
3347  }
3348  }
3349 
3350  return $assocArgs;
3351  }
3352 
3377  public function limitationWarn( $limitationType, $current = '', $max = '' ) {
3378  # does no harm if $current and $max are present but are unnecessary for the message
3379  $warning = wfMessage( "$limitationType-warning" )->numParams( $current, $max )
3380  ->inLanguage( $this->mOptions->getUserLangObj() )->text();
3381  $this->mOutput->addWarning( $warning );
3382  $this->addTrackingCategory( "$limitationType-category" );
3383  }
3384 
3397  public function braceSubstitution( $piece, $frame ) {
3398 
3399  // Flags
3400 
3401  // $text has been filled
3402  $found = false;
3403  // wiki markup in $text should be escaped
3404  $nowiki = false;
3405  // $text is HTML, armour it against wikitext transformation
3406  $isHTML = false;
3407  // Force interwiki transclusion to be done in raw mode not rendered
3408  $forceRawInterwiki = false;
3409  // $text is a DOM node needing expansion in a child frame
3410  $isChildObj = false;
3411  // $text is a DOM node needing expansion in the current frame
3412  $isLocalObj = false;
3413 
3414  # Title object, where $text came from
3415  $title = false;
3416 
3417  # $part1 is the bit before the first |, and must contain only title characters.
3418  # Various prefixes will be stripped from it later.
3419  $titleWithSpaces = $frame->expand( $piece['title'] );
3420  $part1 = trim( $titleWithSpaces );
3421  $titleText = false;
3422 
3423  # Original title text preserved for various purposes
3424  $originalTitle = $part1;
3425 
3426  # $args is a list of argument nodes, starting from index 0, not including $part1
3427  # @todo FIXME: If piece['parts'] is null then the call to getLength()
3428  # below won't work b/c this $args isn't an object
3429  $args = ( null == $piece['parts'] ) ? array() : $piece['parts'];
3430 
3431  $profileSection = null; // profile templates
3432 
3433  # SUBST
3434  if ( !$found ) {
3435 
3436  $substMatch = $this->mSubstWords->matchStartAndRemove( $part1 );
3437 
3438  # Possibilities for substMatch: "subst", "safesubst" or FALSE
3439  # Decide whether to expand template or keep wikitext as-is.
3440  if ( $this->ot['wiki'] ) {
3441  if ( $substMatch === false ) {
3442  $literal = true; # literal when in PST with no prefix
3443  } else {
3444  $literal = false; # expand when in PST with subst: or safesubst:
3445  }
3446  } else {
3447  if ( $substMatch == 'subst' ) {
3448  $literal = true; # literal when not in PST with plain subst:
3449  } else {
3450  $literal = false; # expand when not in PST with safesubst: or no prefix
3451  }
3452  }
3453  if ( $literal ) {
3454  $text = $frame->virtualBracketedImplode( '{{', '|', '}}', $titleWithSpaces, $args );
3455  $isLocalObj = true;
3456  $found = true;
3457  }
3458  }
3459 
3460  # Variables
3461  if ( !$found && $args->getLength() == 0 ) {
3462  $id = $this->mVariables->matchStartToEnd( $part1 );
3463  if ( $id !== false ) {
3464  $text = $this->getVariableValue( $id, $frame );
3465  if ( MagicWord::getCacheTTL( $id ) > -1 ) {
3466  $this->mOutput->updateCacheExpiry( MagicWord::getCacheTTL( $id ) );
3467  }
3468  $found = true;
3469  }
3470  }
3471 
3472  # MSG, MSGNW and RAW
3473  if ( !$found ) {
3474  # Check for MSGNW:
3475  $mwMsgnw = MagicWord::get( 'msgnw' );
3476  if ( $mwMsgnw->matchStartAndRemove( $part1 ) ) {
3477  $nowiki = true;
3478  } else {
3479  # Remove obsolete MSG:
3480  $mwMsg = MagicWord::get( 'msg' );
3481  $mwMsg->matchStartAndRemove( $part1 );
3482  }
3483 
3484  # Check for RAW:
3485  $mwRaw = MagicWord::get( 'raw' );
3486  if ( $mwRaw->matchStartAndRemove( $part1 ) ) {
3487  $forceRawInterwiki = true;
3488  }
3489  }
3490 
3491  # Parser functions
3492  if ( !$found ) {
3493 
3494  $colonPos = strpos( $part1, ':' );
3495  if ( $colonPos !== false ) {
3496  $func = substr( $part1, 0, $colonPos );
3497  $funcArgs = array( trim( substr( $part1, $colonPos + 1 ) ) );
3498  for ( $i = 0; $i < $args->getLength(); $i++ ) {
3499  $funcArgs[] = $args->item( $i );
3500  }
3501  try {
3502  $result = $this->callParserFunction( $frame, $func, $funcArgs );
3503  } catch ( Exception $ex ) {
3504  throw $ex;
3505  }
3506 
3507  # The interface for parser functions allows for extracting
3508  # flags into the local scope. Extract any forwarded flags
3509  # here.
3510  extract( $result );
3511  }
3512  }
3513 
3514  # Finish mangling title and then check for loops.
3515  # Set $title to a Title object and $titleText to the PDBK
3516  if ( !$found ) {
3517  $ns = NS_TEMPLATE;
3518  # Split the title into page and subpage
3519  $subpage = '';
3520  $relative = $this->maybeDoSubpageLink( $part1, $subpage );
3521  if ( $part1 !== $relative ) {
3522  $part1 = $relative;
3523  $ns = $this->mTitle->getNamespace();
3524  }
3525  $title = Title::newFromText( $part1, $ns );
3526  if ( $title ) {
3527  $titleText = $title->getPrefixedText();
3528  # Check for language variants if the template is not found
3529  if ( $this->getConverterLanguage()->hasVariants() && $title->getArticleID() == 0 ) {
3530  $this->getConverterLanguage()->findVariantLink( $part1, $title, true );
3531  }
3532  # Do recursion depth check
3533  $limit = $this->mOptions->getMaxTemplateDepth();
3534  if ( $frame->depth >= $limit ) {
3535  $found = true;
3536  $text = '<span class="error">'
3537  . wfMessage( 'parser-template-recursion-depth-warning' )
3538  ->numParams( $limit )->inContentLanguage()->text()
3539  . '</span>';
3540  }
3541  }
3542  }
3543 
3544  # Load from database
3545  if ( !$found && $title ) {
3546  $profileSection = $this->mProfiler->scopedProfileIn( $title->getPrefixedDBkey() );
3547  if ( !$title->isExternal() ) {
3548  if ( $title->isSpecialPage()
3549  && $this->mOptions->getAllowSpecialInclusion()
3550  && $this->ot['html']
3551  ) {
3552  // Pass the template arguments as URL parameters.
3553  // "uselang" will have no effect since the Language object
3554  // is forced to the one defined in ParserOptions.
3555  $pageArgs = array();
3556  $argsLength = $args->getLength();
3557  for ( $i = 0; $i < $argsLength; $i++ ) {
3558  $bits = $args->item( $i )->splitArg();
3559  if ( strval( $bits['index'] ) === '' ) {
3560  $name = trim( $frame->expand( $bits['name'], PPFrame::STRIP_COMMENTS ) );
3561  $value = trim( $frame->expand( $bits['value'] ) );
3562  $pageArgs[$name] = $value;
3563  }
3564  }
3565 
3566  // Create a new context to execute the special page
3567  $context = new RequestContext;
3568  $context->setTitle( $title );
3569  $context->setRequest( new FauxRequest( $pageArgs ) );
3570  $context->setUser( $this->getUser() );
3571  $context->setLanguage( $this->mOptions->getUserLangObj() );
3572  $ret = SpecialPageFactory::capturePath( $title, $context );
3573  if ( $ret ) {
3574  $text = $context->getOutput()->getHTML();
3575  $this->mOutput->addOutputPageMetadata( $context->getOutput() );
3576  $found = true;
3577  $isHTML = true;
3578  $this->disableCache();
3579  }
3580  } elseif ( MWNamespace::isNonincludable( $title->getNamespace() ) ) {
3581  $found = false; # access denied
3582  wfDebug( __METHOD__ . ": template inclusion denied for " .
3583  $title->getPrefixedDBkey() . "\n" );
3584  } else {
3585  list( $text, $title ) = $this->getTemplateDom( $title );
3586  if ( $text !== false ) {
3587  $found = true;
3588  $isChildObj = true;
3589  }
3590  }
3591 
3592  # If the title is valid but undisplayable, make a link to it
3593  if ( !$found && ( $this->ot['html'] || $this->ot['pre'] ) ) {
3594  $text = "[[:$titleText]]";
3595  $found = true;
3596  }
3597  } elseif ( $title->isTrans() ) {
3598  # Interwiki transclusion
3599  if ( $this->ot['html'] && !$forceRawInterwiki ) {
3600  $text = $this->interwikiTransclude( $title, 'render' );
3601  $isHTML = true;
3602  } else {
3603  $text = $this->interwikiTransclude( $title, 'raw' );
3604  # Preprocess it like a template
3605  $text = $this->preprocessToDom( $text, self::PTD_FOR_INCLUSION );
3606  $isChildObj = true;
3607  }
3608  $found = true;
3609  }
3610 
3611  # Do infinite loop check
3612  # This has to be done after redirect resolution to avoid infinite loops via redirects
3613  if ( !$frame->loopCheck( $title ) ) {
3614  $found = true;
3615  $text = '<span class="error">'
3616  . wfMessage( 'parser-template-loop-warning', $titleText )->inContentLanguage()->text()
3617  . '</span>';
3618  wfDebug( __METHOD__ . ": template loop broken at '$titleText'\n" );
3619  }
3620  }
3621 
3622  # If we haven't found text to substitute by now, we're done
3623  # Recover the source wikitext and return it
3624  if ( !$found ) {
3625  $text = $frame->virtualBracketedImplode( '{{', '|', '}}', $titleWithSpaces, $args );
3626  if ( $profileSection ) {
3627  $this->mProfiler->scopedProfileOut( $profileSection );
3628  }
3629  return array( 'object' => $text );
3630  }
3631 
3632  # Expand DOM-style return values in a child frame
3633  if ( $isChildObj ) {
3634  # Clean up argument array
3635  $newFrame = $frame->newChild( $args, $title );
3636 
3637  if ( $nowiki ) {
3638  $text = $newFrame->expand( $text, PPFrame::RECOVER_ORIG );
3639  } elseif ( $titleText !== false && $newFrame->isEmpty() ) {
3640  # Expansion is eligible for the empty-frame cache
3641  $text = $newFrame->cachedExpand( $titleText, $text );
3642  } else {
3643  # Uncached expansion
3644  $text = $newFrame->expand( $text );
3645  }
3646  }
3647  if ( $isLocalObj && $nowiki ) {
3648  $text = $frame->expand( $text, PPFrame::RECOVER_ORIG );
3649  $isLocalObj = false;
3650  }
3651 
3652  if ( $profileSection ) {
3653  $this->mProfiler->scopedProfileOut( $profileSection );
3654  }
3655 
3656  # Replace raw HTML by a placeholder
3657  if ( $isHTML ) {
3658  $text = $this->insertStripItem( $text );
3659  } elseif ( $nowiki && ( $this->ot['html'] || $this->ot['pre'] ) ) {
3660  # Escape nowiki-style return values
3661  $text = wfEscapeWikiText( $text );
3662  } elseif ( is_string( $text )
3663  && !$piece['lineStart']
3664  && preg_match( '/^(?:{\\||:|;|#|\*)/', $text )
3665  ) {
3666  # Bug 529: if the template begins with a table or block-level
3667  # element, it should be treated as beginning a new line.
3668  # This behavior is somewhat controversial.
3669  $text = "\n" . $text;
3670  }
3671 
3672  if ( is_string( $text ) && !$this->incrementIncludeSize( 'post-expand', strlen( $text ) ) ) {
3673  # Error, oversize inclusion
3674  if ( $titleText !== false ) {
3675  # Make a working, properly escaped link if possible (bug 23588)
3676  $text = "[[:$titleText]]";
3677  } else {
3678  # This will probably not be a working link, but at least it may
3679  # provide some hint of where the problem is
3680  preg_replace( '/^:/', '', $originalTitle );
3681  $text = "[[:$originalTitle]]";
3682  }
3683  $text .= $this->insertStripItem( '<!-- WARNING: template omitted, '
3684  . 'post-expand include size too large -->' );
3685  $this->limitationWarn( 'post-expand-template-inclusion' );
3686  }
3687 
3688  if ( $isLocalObj ) {
3689  $ret = array( 'object' => $text );
3690  } else {
3691  $ret = array( 'text' => $text );
3692  }
3694  return $ret;
3695  }
3696 
3716  public function callParserFunction( $frame, $function, array $args = array() ) {
3718 
3719 
3720  # Case sensitive functions
3721  if ( isset( $this->mFunctionSynonyms[1][$function] ) ) {
3722  $function = $this->mFunctionSynonyms[1][$function];
3723  } else {
3724  # Case insensitive functions
3725  $function = $wgContLang->lc( $function );
3726  if ( isset( $this->mFunctionSynonyms[0][$function] ) ) {
3727  $function = $this->mFunctionSynonyms[0][$function];
3728  } else {
3729  return array( 'found' => false );
3730  }
3731  }
3732 
3733  list( $callback, $flags ) = $this->mFunctionHooks[$function];
3734 
3735  # Workaround for PHP bug 35229 and similar
3736  if ( !is_callable( $callback ) ) {
3737  throw new MWException( "Tag hook for $function is not callable\n" );
3738  }
3739 
3740  $allArgs = array( &$this );
3741  if ( $flags & self::SFH_OBJECT_ARGS ) {
3742  # Convert arguments to PPNodes and collect for appending to $allArgs
3743  $funcArgs = array();
3744  foreach ( $args as $k => $v ) {
3745  if ( $v instanceof PPNode || $k === 0 ) {
3746  $funcArgs[] = $v;
3747  } else {
3748  $funcArgs[] = $this->mPreprocessor->newPartNodeArray( array( $k => $v ) )->item( 0 );
3749  }
3750  }
3751 
3752  # Add a frame parameter, and pass the arguments as an array
3753  $allArgs[] = $frame;
3754  $allArgs[] = $funcArgs;
3755  } else {
3756  # Convert arguments to plain text and append to $allArgs
3757  foreach ( $args as $k => $v ) {
3758  if ( $v instanceof PPNode ) {
3759  $allArgs[] = trim( $frame->expand( $v ) );
3760  } elseif ( is_int( $k ) && $k >= 0 ) {
3761  $allArgs[] = trim( $v );
3762  } else {
3763  $allArgs[] = trim( "$k=$v" );
3764  }
3765  }
3766  }
3767 
3768  $result = call_user_func_array( $callback, $allArgs );
3769 
3770  # The interface for function hooks allows them to return a wikitext
3771  # string or an array containing the string and any flags. This mungs
3772  # things around to match what this method should return.
3773  if ( !is_array( $result ) ) {
3774  $result = array(
3775  'found' => true,
3776  'text' => $result,
3777  );
3778  } else {
3779  if ( isset( $result[0] ) && !isset( $result['text'] ) ) {
3780  $result['text'] = $result[0];
3781  }
3782  unset( $result[0] );
3783  $result += array(
3784  'found' => true,
3785  );
3786  }
3787 
3788  $noparse = true;
3789  $preprocessFlags = 0;
3790  if ( isset( $result['noparse'] ) ) {
3791  $noparse = $result['noparse'];
3792  }
3793  if ( isset( $result['preprocessFlags'] ) ) {
3794  $preprocessFlags = $result['preprocessFlags'];
3795  }
3796 
3797  if ( !$noparse ) {
3798  $result['text'] = $this->preprocessToDom( $result['text'], $preprocessFlags );
3799  $result['isChildObj'] = true;
3800  }
3801 
3802  return $result;
3803  }
3804 
3813  public function getTemplateDom( $title ) {
3814  $cacheTitle = $title;
3815  $titleText = $title->getPrefixedDBkey();
3816 
3817  if ( isset( $this->mTplRedirCache[$titleText] ) ) {
3818  list( $ns, $dbk ) = $this->mTplRedirCache[$titleText];
3819  $title = Title::makeTitle( $ns, $dbk );
3820  $titleText = $title->getPrefixedDBkey();
3821  }
3822  if ( isset( $this->mTplDomCache[$titleText] ) ) {
3823  return array( $this->mTplDomCache[$titleText], $title );
3824  }
3825 
3826  # Cache miss, go to the database
3827  list( $text, $title ) = $this->fetchTemplateAndTitle( $title );
3828 
3829  if ( $text === false ) {
3830  $this->mTplDomCache[$titleText] = false;
3831  return array( false, $title );
3832  }
3834  $dom = $this->preprocessToDom( $text, self::PTD_FOR_INCLUSION );
3835  $this->mTplDomCache[$titleText] = $dom;
3836 
3837  if ( !$title->equals( $cacheTitle ) ) {
3838  $this->mTplRedirCache[$cacheTitle->getPrefixedDBkey()] =
3839  array( $title->getNamespace(), $cdb = $title->getDBkey() );
3840  }
3841 
3842  return array( $dom, $title );
3843  }
3844 
3856  public function fetchCurrentRevisionOfTitle( $title ) {
3857  $cacheKey = $title->getPrefixedDBkey();
3858  if ( !$this->currentRevisionCache ) {
3859  $this->currentRevisionCache = new MapCacheLRU( 100 );
3860  }
3861  if ( !$this->currentRevisionCache->has( $cacheKey ) ) {
3862  $this->currentRevisionCache->set( $cacheKey,
3863  // Defaults to Parser::statelessFetchRevision()
3864  call_user_func( $this->mOptions->getCurrentRevisionCallback(), $title, $this )
3865  );
3866  }
3867  return $this->currentRevisionCache->get( $cacheKey );
3868  }
3869 
3879  public static function statelessFetchRevision( $title, $parser = false ) {
3880  return Revision::newFromTitle( $title );
3881  }
3882 
3888  public function fetchTemplateAndTitle( $title ) {
3889  // Defaults to Parser::statelessFetchTemplate()
3890  $templateCb = $this->mOptions->getTemplateCallback();
3891  $stuff = call_user_func( $templateCb, $title, $this );
3892  $text = $stuff['text'];
3893  $finalTitle = isset( $stuff['finalTitle'] ) ? $stuff['finalTitle'] : $title;
3894  if ( isset( $stuff['deps'] ) ) {
3895  foreach ( $stuff['deps'] as $dep ) {
3896  $this->mOutput->addTemplate( $dep['title'], $dep['page_id'], $dep['rev_id'] );
3897  if ( $dep['title']->equals( $this->getTitle() ) ) {
3898  // If we transclude ourselves, the final result
3899  // will change based on the new version of the page
3900  $this->mOutput->setFlag( 'vary-revision' );
3901  }
3902  }
3903  }
3904  return array( $text, $finalTitle );
3905  }
3906 
3912  public function fetchTemplate( $title ) {
3913  $rv = $this->fetchTemplateAndTitle( $title );
3914  return $rv[0];
3915  }
3916 
3926  public static function statelessFetchTemplate( $title, $parser = false ) {
3927  $text = $skip = false;
3928  $finalTitle = $title;
3929  $deps = array();
3930 
3931  # Loop to fetch the article, with up to 1 redirect
3932  for ( $i = 0; $i < 2 && is_object( $title ); $i++ ) {
3933  # Give extensions a chance to select the revision instead
3934  $id = false; # Assume current
3935  Hooks::run( 'BeforeParserFetchTemplateAndtitle',
3936  array( $parser, $title, &$skip, &$id ) );
3937 
3938  if ( $skip ) {
3939  $text = false;
3940  $deps[] = array(
3941  'title' => $title,
3942  'page_id' => $title->getArticleID(),
3943  'rev_id' => null
3944  );
3945  break;
3946  }
3947  # Get the revision
3948  if ( $id ) {
3949  $rev = Revision::newFromId( $id );
3950  } elseif ( $parser ) {
3951  $rev = $parser->fetchCurrentRevisionOfTitle( $title );
3952  } else {
3953  $rev = Revision::newFromTitle( $title );
3954  }
3955  $rev_id = $rev ? $rev->getId() : 0;
3956  # If there is no current revision, there is no page
3957  if ( $id === false && !$rev ) {
3958  $linkCache = LinkCache::singleton();
3959  $linkCache->addBadLinkObj( $title );
3960  }
3961 
3962  $deps[] = array(
3963  'title' => $title,
3964  'page_id' => $title->getArticleID(),
3965  'rev_id' => $rev_id );
3966  if ( $rev && !$title->equals( $rev->getTitle() ) ) {
3967  # We fetched a rev from a different title; register it too...
3968  $deps[] = array(
3969  'title' => $rev->getTitle(),
3970  'page_id' => $rev->getPage(),
3971  'rev_id' => $rev_id );
3972  }
3973 
3974  if ( $rev ) {
3975  $content = $rev->getContent();
3976  $text = $content ? $content->getWikitextForTransclusion() : null;
3977 
3978  if ( $text === false || $text === null ) {
3979  $text = false;
3980  break;
3981  }
3982  } elseif ( $title->getNamespace() == NS_MEDIAWIKI ) {
3984  $message = wfMessage( $wgContLang->lcfirst( $title->getText() ) )->inContentLanguage();
3985  if ( !$message->exists() ) {
3986  $text = false;
3987  break;
3988  }
3989  $content = $message->content();
3990  $text = $message->plain();
3991  } else {
3992  break;
3993  }
3994  if ( !$content ) {
3995  break;
3996  }
3997  # Redirect?
3998  $finalTitle = $title;
3999  $title = $content->getRedirectTarget();
4000  }
4001  return array(
4002  'text' => $text,
4003  'finalTitle' => $finalTitle,
4004  'deps' => $deps );
4005  }
4006 
4014  public function fetchFile( $title, $options = array() ) {
4015  $res = $this->fetchFileAndTitle( $title, $options );
4016  return $res[0];
4017  }
4018 
4026  public function fetchFileAndTitle( $title, $options = array() ) {
4027  $file = $this->fetchFileNoRegister( $title, $options );
4029  $time = $file ? $file->getTimestamp() : false;
4030  $sha1 = $file ? $file->getSha1() : false;
4031  # Register the file as a dependency...
4032  $this->mOutput->addImage( $title->getDBkey(), $time, $sha1 );
4033  if ( $file && !$title->equals( $file->getTitle() ) ) {
4034  # Update fetched file title
4035  $title = $file->getTitle();
4036  $this->mOutput->addImage( $title->getDBkey(), $time, $sha1 );
4037  }
4038  return array( $file, $title );
4039  }
4040 
4051  protected function fetchFileNoRegister( $title, $options = array() ) {
4052  if ( isset( $options['broken'] ) ) {
4053  $file = false; // broken thumbnail forced by hook
4054  } elseif ( isset( $options['sha1'] ) ) { // get by (sha1,timestamp)
4055  $file = RepoGroup::singleton()->findFileFromKey( $options['sha1'], $options );
4056  } else { // get by (name,timestamp)
4057  $file = wfFindFile( $title, $options );
4058  }
4059  return $file;
4060  }
4061 
4070  public function interwikiTransclude( $title, $action ) {
4071  global $wgEnableScaryTranscluding;
4072 
4073  if ( !$wgEnableScaryTranscluding ) {
4074  return wfMessage( 'scarytranscludedisabled' )->inContentLanguage()->text();
4075  }
4076 
4077  $url = $title->getFullURL( array( 'action' => $action ) );
4078 
4079  if ( strlen( $url ) > 255 ) {
4080  return wfMessage( 'scarytranscludetoolong' )->inContentLanguage()->text();
4081  }
4082  return $this->fetchScaryTemplateMaybeFromCache( $url );
4083  }
4084 
4089  public function fetchScaryTemplateMaybeFromCache( $url ) {
4090  global $wgTranscludeCacheExpiry;
4091  $dbr = wfGetDB( DB_SLAVE );
4092  $tsCond = $dbr->timestamp( time() - $wgTranscludeCacheExpiry );
4093  $obj = $dbr->selectRow( 'transcache', array( 'tc_time', 'tc_contents' ),
4094  array( 'tc_url' => $url, "tc_time >= " . $dbr->addQuotes( $tsCond ) ) );
4095  if ( $obj ) {
4096  return $obj->tc_contents;
4097  }
4098 
4099  $req = MWHttpRequest::factory( $url, array(), __METHOD__ );
4100  $status = $req->execute(); // Status object
4101  if ( $status->isOK() ) {
4102  $text = $req->getContent();
4103  } elseif ( $req->getStatus() != 200 ) {
4104  // Though we failed to fetch the content, this status is useless.
4105  return wfMessage( 'scarytranscludefailed-httpstatus' )
4106  ->params( $url, $req->getStatus() /* HTTP status */ )->inContentLanguage()->text();
4107  } else {
4108  return wfMessage( 'scarytranscludefailed', $url )->inContentLanguage()->text();
4109  }
4110 
4111  $dbw = wfGetDB( DB_MASTER );
4112  $dbw->replace( 'transcache', array( 'tc_url' ), array(
4113  'tc_url' => $url,
4114  'tc_time' => $dbw->timestamp( time() ),
4115  'tc_contents' => $text
4116  ) );
4117  return $text;
4118  }
4119 
4129  public function argSubstitution( $piece, $frame ) {
4130 
4131  $error = false;
4132  $parts = $piece['parts'];
4133  $nameWithSpaces = $frame->expand( $piece['title'] );
4134  $argName = trim( $nameWithSpaces );
4135  $object = false;
4136  $text = $frame->getArgument( $argName );
4137  if ( $text === false && $parts->getLength() > 0
4138  && ( $this->ot['html']
4139  || $this->ot['pre']
4140  || ( $this->ot['wiki'] && $frame->isTemplate() )
4141  )
4142  ) {
4143  # No match in frame, use the supplied default
4144  $object = $parts->item( 0 )->getChildren();
4145  }
4146  if ( !$this->incrementIncludeSize( 'arg', strlen( $text ) ) ) {
4147  $error = '<!-- WARNING: argument omitted, expansion size too large -->';
4148  $this->limitationWarn( 'post-expand-template-argument' );
4149  }
4150 
4151  if ( $text === false && $object === false ) {
4152  # No match anywhere
4153  $object = $frame->virtualBracketedImplode( '{{{', '|', '}}}', $nameWithSpaces, $parts );
4154  }
4155  if ( $error !== false ) {
4156  $text .= $error;
4157  }
4158  if ( $object !== false ) {
4159  $ret = array( 'object' => $object );
4160  } else {
4161  $ret = array( 'text' => $text );
4162  }
4163 
4164  return $ret;
4165  }
4166 
4182  public function extensionSubstitution( $params, $frame ) {
4183  $name = $frame->expand( $params['name'] );
4184  $attrText = !isset( $params['attr'] ) ? null : $frame->expand( $params['attr'] );
4185  $content = !isset( $params['inner'] ) ? null : $frame->expand( $params['inner'] );
4186  $marker = "{$this->mUniqPrefix}-$name-"
4187  . sprintf( '%08X', $this->mMarkerIndex++ ) . self::MARKER_SUFFIX;
4188 
4189  $isFunctionTag = isset( $this->mFunctionTagHooks[strtolower( $name )] ) &&
4190  ( $this->ot['html'] || $this->ot['pre'] );
4191  if ( $isFunctionTag ) {
4192  $markerType = 'none';
4193  } else {
4194  $markerType = 'general';
4195  }
4196  if ( $this->ot['html'] || $isFunctionTag ) {
4197  $name = strtolower( $name );
4198  $attributes = Sanitizer::decodeTagAttributes( $attrText );
4199  if ( isset( $params['attributes'] ) ) {
4200  $attributes = $attributes + $params['attributes'];
4201  }
4202 
4203  if ( isset( $this->mTagHooks[$name] ) ) {
4204  # Workaround for PHP bug 35229 and similar
4205  if ( !is_callable( $this->mTagHooks[$name] ) ) {
4206  throw new MWException( "Tag hook for $name is not callable\n" );
4207  }
4208  $output = call_user_func_array( $this->mTagHooks[$name],
4209  array( $content, $attributes, $this, $frame ) );
4210  } elseif ( isset( $this->mFunctionTagHooks[$name] ) ) {
4211  list( $callback, ) = $this->mFunctionTagHooks[$name];
4212  if ( !is_callable( $callback ) ) {
4213  throw new MWException( "Tag hook for $name is not callable\n" );
4214  }
4215 
4216  $output = call_user_func_array( $callback, array( &$this, $frame, $content, $attributes ) );
4217  } else {
4218  $output = '<span class="error">Invalid tag extension name: ' .
4219  htmlspecialchars( $name ) . '</span>';
4220  }
4221 
4222  if ( is_array( $output ) ) {
4223  # Extract flags to local scope (to override $markerType)
4224  $flags = $output;
4225  $output = $flags[0];
4226  unset( $flags[0] );
4227  extract( $flags );
4228  }
4229  } else {
4230  if ( is_null( $attrText ) ) {
4231  $attrText = '';
4232  }
4233  if ( isset( $params['attributes'] ) ) {
4234  foreach ( $params['attributes'] as $attrName => $attrValue ) {
4235  $attrText .= ' ' . htmlspecialchars( $attrName ) . '="' .
4236  htmlspecialchars( $attrValue ) . '"';
4237  }
4238  }
4239  if ( $content === null ) {
4240  $output = "<$name$attrText/>";
4241  } else {
4242  $close = is_null( $params['close'] ) ? '' : $frame->expand( $params['close'] );
4243  $output = "<$name$attrText>$content$close";
4244  }
4245  }
4246 
4247  if ( $markerType === 'none' ) {
4248  return $output;
4249  } elseif ( $markerType === 'nowiki' ) {
4250  $this->mStripState->addNoWiki( $marker, $output );
4251  } elseif ( $markerType === 'general' ) {
4252  $this->mStripState->addGeneral( $marker, $output );
4253  } else {
4254  throw new MWException( __METHOD__ . ': invalid marker type' );
4255  }
4256  return $marker;
4257  }
4258 
4266  public function incrementIncludeSize( $type, $size ) {
4267  if ( $this->mIncludeSizes[$type] + $size > $this->mOptions->getMaxIncludeSize() ) {
4268  return false;
4269  } else {
4270  $this->mIncludeSizes[$type] += $size;
4271  return true;
4272  }
4273  }
4274 
4280  public function incrementExpensiveFunctionCount() {
4281  $this->mExpensiveFunctionCount++;
4282  return $this->mExpensiveFunctionCount <= $this->mOptions->getExpensiveParserFunctionLimit();
4283  }
4284 
4293  public function doDoubleUnderscore( $text ) {
4294 
4295  # The position of __TOC__ needs to be recorded
4296  $mw = MagicWord::get( 'toc' );
4297  if ( $mw->match( $text ) ) {
4298  $this->mShowToc = true;
4299  $this->mForceTocPosition = true;
4300 
4301  # Set a placeholder. At the end we'll fill it in with the TOC.
4302  $text = $mw->replace( '<!--MWTOC-->', $text, 1 );
4303 
4304  # Only keep the first one.
4305  $text = $mw->replace( '', $text );
4306  }
4307 
4308  # Now match and remove the rest of them
4310  $this->mDoubleUnderscores = $mwa->matchAndRemove( $text );
4311 
4312  if ( isset( $this->mDoubleUnderscores['nogallery'] ) ) {
4313  $this->mOutput->mNoGallery = true;
4314  }
4315  if ( isset( $this->mDoubleUnderscores['notoc'] ) && !$this->mForceTocPosition ) {
4316  $this->mShowToc = false;
4317  }
4318  if ( isset( $this->mDoubleUnderscores['hiddencat'] )
4319  && $this->mTitle->getNamespace() == NS_CATEGORY
4320  ) {
4321  $this->addTrackingCategory( 'hidden-category-category' );
4322  }
4323  # (bug 8068) Allow control over whether robots index a page.
4324  #
4325  # @todo FIXME: Bug 14899: __INDEX__ always overrides __NOINDEX__ here! This
4326  # is not desirable, the last one on the page should win.
4327  if ( isset( $this->mDoubleUnderscores['noindex'] ) && $this->mTitle->canUseNoindex() ) {
4328  $this->mOutput->setIndexPolicy( 'noindex' );
4329  $this->addTrackingCategory( 'noindex-category' );
4330  }
4331  if ( isset( $this->mDoubleUnderscores['index'] ) && $this->mTitle->canUseNoindex() ) {
4332  $this->mOutput->setIndexPolicy( 'index' );
4333  $this->addTrackingCategory( 'index-category' );
4334  }
4335 
4336  # Cache all double underscores in the database
4337  foreach ( $this->mDoubleUnderscores as $key => $val ) {
4338  $this->mOutput->setProperty( $key, '' );
4339  }
4340 
4341  return $text;
4342  }
4343 
4349  public function addTrackingCategory( $msg ) {
4350  return $this->mOutput->addTrackingCategory( $msg, $this->mTitle );
4351  }
4352 
4369  public function formatHeadings( $text, $origText, $isMain = true ) {
4370  global $wgMaxTocLevel, $wgExperimentalHtmlIds;
4371 
4372  # Inhibit editsection links if requested in the page
4373  if ( isset( $this->mDoubleUnderscores['noeditsection'] ) ) {
4374  $maybeShowEditLink = $showEditLink = false;
4375  } else {
4376  $maybeShowEditLink = true; /* Actual presence will depend on ParserOptions option */
4377  $showEditLink = $this->mOptions->getEditSection();
4378  }
4379  if ( $showEditLink ) {
4380  $this->mOutput->setEditSectionTokens( true );
4381  }
4382 
4383  # Get all headlines for numbering them and adding funky stuff like [edit]
4384  # links - this is for later, but we need the number of headlines right now
4385  $matches = array();
4386  $numMatches = preg_match_all(
4387  '/<H(?P<level>[1-6])(?P<attrib>.*?>)\s*(?P<header>[\s\S]*?)\s*<\/H[1-6] *>/i',
4388  $text,
4389  $matches
4390  );
4391 
4392  # if there are fewer than 4 headlines in the article, do not show TOC
4393  # unless it's been explicitly enabled.
4394  $enoughToc = $this->mShowToc &&
4395  ( ( $numMatches >= 4 ) || $this->mForceTocPosition );
4396 
4397  # Allow user to stipulate that a page should have a "new section"
4398  # link added via __NEWSECTIONLINK__
4399  if ( isset( $this->mDoubleUnderscores['newsectionlink'] ) ) {
4400  $this->mOutput->setNewSection( true );
4401  }
4402 
4403  # Allow user to remove the "new section"
4404  # link via __NONEWSECTIONLINK__
4405  if ( isset( $this->mDoubleUnderscores['nonewsectionlink'] ) ) {
4406  $this->mOutput->hideNewSection( true );
4407  }
4408 
4409  # if the string __FORCETOC__ (not case-sensitive) occurs in the HTML,
4410  # override above conditions and always show TOC above first header
4411  if ( isset( $this->mDoubleUnderscores['forcetoc'] ) ) {
4412  $this->mShowToc = true;
4413  $enoughToc = true;
4414  }
4415 
4416  # headline counter
4417  $headlineCount = 0;
4418  $numVisible = 0;
4419 
4420  # Ugh .. the TOC should have neat indentation levels which can be
4421  # passed to the skin functions. These are determined here
4422  $toc = '';
4423  $full = '';
4424  $head = array();
4425  $sublevelCount = array();
4426  $levelCount = array();
4427  $level = 0;
4428  $prevlevel = 0;
4429  $toclevel = 0;
4430  $prevtoclevel = 0;
4431  $markerRegex = "{$this->mUniqPrefix}-h-(\d+)-" . self::MARKER_SUFFIX;
4432  $baseTitleText = $this->mTitle->getPrefixedDBkey();
4433  $oldType = $this->mOutputType;
4434  $this->setOutputType( self::OT_WIKI );
4435  $frame = $this->getPreprocessor()->newFrame();
4436  $root = $this->preprocessToDom( $origText );
4437  $node = $root->getFirstChild();
4438  $byteOffset = 0;
4439  $tocraw = array();
4440  $refers = array();
4441 
4442  foreach ( $matches[3] as $headline ) {
4443  $isTemplate = false;
4444  $titleText = false;
4445  $sectionIndex = false;
4446  $numbering = '';
4447  $markerMatches = array();
4448  if ( preg_match( "/^$markerRegex/", $headline, $markerMatches ) ) {
4449  $serial = $markerMatches[1];
4450  list( $titleText, $sectionIndex ) = $this->mHeadings[$serial];
4451  $isTemplate = ( $titleText != $baseTitleText );
4452  $headline = preg_replace( "/^$markerRegex\\s*/", "", $headline );
4453  }
4454 
4455  if ( $toclevel ) {
4456  $prevlevel = $level;
4457  }
4458  $level = $matches[1][$headlineCount];
4459 
4460  if ( $level > $prevlevel ) {
4461  # Increase TOC level
4462  $toclevel++;
4463  $sublevelCount[$toclevel] = 0;
4464  if ( $toclevel < $wgMaxTocLevel ) {
4465  $prevtoclevel = $toclevel;
4466  $toc .= Linker::tocIndent();
4467  $numVisible++;
4468  }
4469  } elseif ( $level < $prevlevel && $toclevel > 1 ) {
4470  # Decrease TOC level, find level to jump to
4471 
4472  for ( $i = $toclevel; $i > 0; $i-- ) {
4473  if ( $levelCount[$i] == $level ) {
4474  # Found last matching level
4475  $toclevel = $i;
4476  break;
4477  } elseif ( $levelCount[$i] < $level ) {
4478  # Found first matching level below current level
4479  $toclevel = $i + 1;
4480  break;
4481  }
4482  }
4483  if ( $i == 0 ) {
4484  $toclevel = 1;
4485  }
4486  if ( $toclevel < $wgMaxTocLevel ) {
4487  if ( $prevtoclevel < $wgMaxTocLevel ) {
4488  # Unindent only if the previous toc level was shown :p
4489  $toc .= Linker::tocUnindent( $prevtoclevel - $toclevel );
4490  $prevtoclevel = $toclevel;
4491  } else {
4492  $toc .= Linker::tocLineEnd();
4493  }
4494  }
4495  } else {
4496  # No change in level, end TOC line
4497  if ( $toclevel < $wgMaxTocLevel ) {
4498  $toc .= Linker::tocLineEnd();
4499  }
4500  }
4501 
4502  $levelCount[$toclevel] = $level;
4503 
4504  # count number of headlines for each level
4505  $sublevelCount[$toclevel]++;
4506  $dot = 0;
4507  for ( $i = 1; $i <= $toclevel; $i++ ) {
4508  if ( !empty( $sublevelCount[$i] ) ) {
4509  if ( $dot ) {
4510  $numbering .= '.';
4511  }
4512  $numbering .= $this->getTargetLanguage()->formatNum( $sublevelCount[$i] );
4513  $dot = 1;
4514  }
4515  }
4516 
4517  # The safe header is a version of the header text safe to use for links
4518 
4519  # Remove link placeholders by the link text.
4520  # <!--LINK number-->
4521  # turns into
4522  # link text with suffix
4523  # Do this before unstrip since link text can contain strip markers
4524  $safeHeadline = $this->replaceLinkHoldersText( $headline );
4525 
4526  # Avoid insertion of weird stuff like <math> by expanding the relevant sections
4527  $safeHeadline = $this->mStripState->unstripBoth( $safeHeadline );
4528 
4529  # Strip out HTML (first regex removes any tag not allowed)
4530  # Allowed tags are:
4531  # * <sup> and <sub> (bug 8393)
4532  # * <i> (bug 26375)
4533  # * <b> (r105284)
4534  # * <bdi> (bug 72884)
4535  # * <span dir="rtl"> and <span dir="ltr"> (bug 35167)
4536  #
4537  # We strip any parameter from accepted tags (second regex), except dir="rtl|ltr" from <span>,
4538  # to allow setting directionality in toc items.
4539  $tocline = preg_replace(
4540  array(
4541  '#<(?!/?(span|sup|sub|bdi|i|b)(?: [^>]*)?>).*?>#',
4542  '#<(/?(?:span(?: dir="(?:rtl|ltr)")?|sup|sub|bdi|i|b))(?: .*?)?>#'
4543  ),
4544  array( '', '<$1>' ),
4545  $safeHeadline
4546  );
4547  $tocline = trim( $tocline );
4548 
4549  # For the anchor, strip out HTML-y stuff period
4550  $safeHeadline = preg_replace( '/<.*?>/', '', $safeHeadline );
4551  $safeHeadline = Sanitizer::normalizeSectionNameWhitespace( $safeHeadline );
4552 
4553  # Save headline for section edit hint before it's escaped
4554  $headlineHint = $safeHeadline;
4555 
4556  if ( $wgExperimentalHtmlIds ) {
4557  # For reverse compatibility, provide an id that's
4558  # HTML4-compatible, like we used to.
4559  #
4560  # It may be worth noting, academically, that it's possible for
4561  # the legacy anchor to conflict with a non-legacy headline
4562  # anchor on the page. In this case likely the "correct" thing
4563  # would be to either drop the legacy anchors or make sure
4564  # they're numbered first. However, this would require people
4565  # to type in section names like "abc_.D7.93.D7.90.D7.A4"
4566  # manually, so let's not bother worrying about it.
4567  $legacyHeadline = Sanitizer::escapeId( $safeHeadline,
4568  array( 'noninitial', 'legacy' ) );
4569  $safeHeadline = Sanitizer::escapeId( $safeHeadline );
4570 
4571  if ( $legacyHeadline == $safeHeadline ) {
4572  # No reason to have both (in fact, we can't)
4573  $legacyHeadline = false;
4574  }
4575  } else {
4576  $legacyHeadline = false;
4577  $safeHeadline = Sanitizer::escapeId( $safeHeadline,
4578  'noninitial' );
4579  }
4580 
4581  # HTML names must be case-insensitively unique (bug 10721).
4582  # This does not apply to Unicode characters per
4583  # http://www.w3.org/TR/html5/infrastructure.html#case-sensitivity-and-string-comparison
4584  # @todo FIXME: We may be changing them depending on the current locale.
4585  $arrayKey = strtolower( $safeHeadline );
4586  if ( $legacyHeadline === false ) {
4587  $legacyArrayKey = false;
4588  } else {
4589  $legacyArrayKey = strtolower( $legacyHeadline );
4590  }
4591 
4592  # Create the anchor for linking from the TOC to the section
4593  $anchor = $safeHeadline;
4594  $legacyAnchor = $legacyHeadline;
4595  if ( isset( $refers[$arrayKey] ) ) {
4596  for ( $i = 2; isset( $refers["${arrayKey}_$i"] ); ++$i );
4597  $anchor .= "_$i";
4598  $refers["${arrayKey}_$i"] = true;
4599  } else {
4600  $refers[$arrayKey] = true;
4601  }
4602  if ( $legacyHeadline !== false && isset( $refers[$legacyArrayKey] ) ) {
4603  for ( $i = 2; isset( $refers["${legacyArrayKey}_$i"] ); ++$i );
4604  $legacyAnchor .= "_$i";
4605  $refers["${legacyArrayKey}_$i"] = true;
4606  } else {
4607  $refers[$legacyArrayKey] = true;
4608  }
4609 
4610  # Don't number the heading if it is the only one (looks silly)
4611  if ( count( $matches[3] ) > 1 && $this->mOptions->getNumberHeadings() ) {
4612  # the two are different if the line contains a link
4613  $headline = Html::element(
4614  'span',
4615  array( 'class' => 'mw-headline-number' ),
4616  $numbering
4617  ) . ' ' . $headline;
4618  }
4619 
4620  if ( $enoughToc && ( !isset( $wgMaxTocLevel ) || $toclevel < $wgMaxTocLevel ) ) {
4621  $toc .= Linker::tocLine( $anchor, $tocline,
4622  $numbering, $toclevel, ( $isTemplate ? false : $sectionIndex ) );
4623  }
4624 
4625  # Add the section to the section tree
4626  # Find the DOM node for this header
4627  $noOffset = ( $isTemplate || $sectionIndex === false );
4628  while ( $node && !$noOffset ) {
4629  if ( $node->getName() === 'h' ) {
4630  $bits = $node->splitHeading();
4631  if ( $bits['i'] == $sectionIndex ) {
4632  break;
4633  }
4634  }
4635  $byteOffset += mb_strlen( $this->mStripState->unstripBoth(
4636  $frame->expand( $node, PPFrame::RECOVER_ORIG ) ) );
4637  $node = $node->getNextSibling();
4638  }
4639  $tocraw[] = array(
4640  'toclevel' => $toclevel,
4641  'level' => $level,
4642  'line' => $tocline,
4643  'number' => $numbering,
4644  'index' => ( $isTemplate ? 'T-' : '' ) . $sectionIndex,
4645  'fromtitle' => $titleText,
4646  'byteoffset' => ( $noOffset ? null : $byteOffset ),
4647  'anchor' => $anchor,
4648  );
4649 
4650  # give headline the correct <h#> tag
4651  if ( $maybeShowEditLink && $sectionIndex !== false ) {
4652  // Output edit section links as markers with styles that can be customized by skins
4653  if ( $isTemplate ) {
4654  # Put a T flag in the section identifier, to indicate to extractSections()
4655  # that sections inside <includeonly> should be counted.
4656  $editsectionPage = $titleText;
4657  $editsectionSection = "T-$sectionIndex";
4658  $editsectionContent = null;
4659  } else {
4660  $editsectionPage = $this->mTitle->getPrefixedText();
4661  $editsectionSection = $sectionIndex;
4662  $editsectionContent = $headlineHint;
4663  }
4664  // We use a bit of pesudo-xml for editsection markers. The
4665  // language converter is run later on. Using a UNIQ style marker
4666  // leads to the converter screwing up the tokens when it
4667  // converts stuff. And trying to insert strip tags fails too. At
4668  // this point all real inputted tags have already been escaped,
4669  // so we don't have to worry about a user trying to input one of
4670  // these markers directly. We use a page and section attribute
4671  // to stop the language converter from converting these
4672  // important bits of data, but put the headline hint inside a
4673  // content block because the language converter is supposed to
4674  // be able to convert that piece of data.
4675  // Gets replaced with html in ParserOutput::getText
4676  $editlink = '<mw:editsection page="' . htmlspecialchars( $editsectionPage );
4677  $editlink .= '" section="' . htmlspecialchars( $editsectionSection ) . '"';
4678  if ( $editsectionContent !== null ) {
4679  $editlink .= '>' . $editsectionContent . '</mw:editsection>';
4680  } else {
4681  $editlink .= '/>';
4682  }
4683  } else {
4684  $editlink = '';
4685  }
4686  $head[$headlineCount] = Linker::makeHeadline( $level,
4687  $matches['attrib'][$headlineCount], $anchor, $headline,
4688  $editlink, $legacyAnchor );
4689 
4690  $headlineCount++;
4691  }
4692 
4693  $this->setOutputType( $oldType );
4694 
4695  # Never ever show TOC if no headers
4696  if ( $numVisible < 1 ) {
4697  $enoughToc = false;
4698  }
4699 
4700  if ( $enoughToc ) {
4701  if ( $prevtoclevel > 0 && $prevtoclevel < $wgMaxTocLevel ) {
4702  $toc .= Linker::tocUnindent( $prevtoclevel - 1 );
4703  }
4704  $toc = Linker::tocList( $toc, $this->mOptions->getUserLangObj() );
4705  $this->mOutput->setTOCHTML( $toc );
4706  $toc = self::TOC_START . $toc . self::TOC_END;
4707  $this->mOutput->addModules( 'mediawiki.toc' );
4708  }
4709 
4710  if ( $isMain ) {
4711  $this->mOutput->setSections( $tocraw );
4712  }
4713 
4714  # split up and insert constructed headlines
4715  $blocks = preg_split( '/<H[1-6].*?>[\s\S]*?<\/H[1-6]>/i', $text );
4716  $i = 0;
4717 
4718  // build an array of document sections
4719  $sections = array();
4720  foreach ( $blocks as $block ) {
4721  // $head is zero-based, sections aren't.
4722  if ( empty( $head[$i - 1] ) ) {
4723  $sections[$i] = $block;
4724  } else {
4725  $sections[$i] = $head[$i - 1] . $block;
4726  }
4727 
4738  Hooks::run( 'ParserSectionCreate', array( $this, $i, &$sections[$i], $showEditLink ) );
4739 
4740  $i++;
4741  }
4742 
4743  if ( $enoughToc && $isMain && !$this->mForceTocPosition ) {
4744  // append the TOC at the beginning
4745  // Top anchor now in skin
4746  $sections[0] = $sections[0] . $toc . "\n";
4747  }
4748 
4749  $full .= join( '', $sections );
4750 
4751  if ( $this->mForceTocPosition ) {
4752  return str_replace( '<!--MWTOC-->', $toc, $full );
4753  } else {
4754  return $full;
4755  }
4756  }
4757 
4769  public function preSaveTransform( $text, Title $title, User $user,
4770  ParserOptions $options, $clearState = true
4771  ) {
4772  if ( $clearState ) {
4773  $magicScopeVariable = $this->lock();
4774  }
4775  $this->startParse( $title, $options, self::OT_WIKI, $clearState );
4776  $this->setUser( $user );
4777 
4778  $pairs = array(
4779  "\r\n" => "\n",
4780  "\r" => "\n",
4781  );
4782  $text = str_replace( array_keys( $pairs ), array_values( $pairs ), $text );
4783  if ( $options->getPreSaveTransform() ) {
4784  $text = $this->pstPass2( $text, $user );
4785  }
4786  $text = $this->mStripState->unstripBoth( $text );
4787 
4788  $this->setUser( null ); #Reset
4789 
4790  return $text;
4791  }
4792 
4801  private function pstPass2( $text, $user ) {
4803 
4804  # Note: This is the timestamp saved as hardcoded wikitext to
4805  # the database, we use $wgContLang here in order to give
4806  # everyone the same signature and use the default one rather
4807  # than the one selected in each user's preferences.
4808  # (see also bug 12815)
4809  $ts = $this->mOptions->getTimestamp();
4811  $ts = $timestamp->format( 'YmdHis' );
4812  $tzMsg = $timestamp->format( 'T' ); # might vary on DST changeover!
4813 
4814  # Allow translation of timezones through wiki. format() can return
4815  # whatever crap the system uses, localised or not, so we cannot
4816  # ship premade translations.
4817  $key = 'timezone-' . strtolower( trim( $tzMsg ) );
4818  $msg = wfMessage( $key )->inContentLanguage();
4819  if ( $msg->exists() ) {
4820  $tzMsg = $msg->text();
4821  }
4822 
4823  $d = $wgContLang->timeanddate( $ts, false, false ) . " ($tzMsg)";
4824 
4825  # Variable replacement
4826  # Because mOutputType is OT_WIKI, this will only process {{subst:xxx}} type tags
4827  $text = $this->replaceVariables( $text );
4828 
4829  # This works almost by chance, as the replaceVariables are done before the getUserSig(),
4830  # which may corrupt this parser instance via its wfMessage()->text() call-
4831 
4832  # Signatures
4833  $sigText = $this->getUserSig( $user );
4834  $text = strtr( $text, array(
4835  '~~~~~' => $d,
4836  '~~~~' => "$sigText $d",
4837  '~~~' => $sigText
4838  ) );
4839 
4840  # Context links ("pipe tricks"): [[|name]] and [[name (context)|]]
4841  $tc = '[' . Title::legalChars() . ']';
4842  $nc = '[ _0-9A-Za-z\x80-\xff-]'; # Namespaces can use non-ascii!
4843 
4844  // [[ns:page (context)|]]
4845  $p1 = "/\[\[(:?$nc+:|:|)($tc+?)( ?\\($tc+\\))\\|]]/";
4846  // [[ns:page(context)|]] (double-width brackets, added in r40257)
4847  $p4 = "/\[\[(:?$nc+:|:|)($tc+?)( ?($tc+))\\|]]/";
4848  // [[ns:page (context), context|]] (using either single or double-width comma)
4849  $p3 = "/\[\[(:?$nc+:|:|)($tc+?)( ?\\($tc+\\)|)((?:, |,)$tc+|)\\|]]/";
4850  // [[|page]] (reverse pipe trick: add context from page title)
4851  $p2 = "/\[\[\\|($tc+)]]/";
4852 
4853  # try $p1 first, to turn "[[A, B (C)|]]" into "[[A, B (C)|A, B]]"
4854  $text = preg_replace( $p1, '[[\\1\\2\\3|\\2]]', $text );
4855  $text = preg_replace( $p4, '[[\\1\\2\\3|\\2]]', $text );
4856  $text = preg_replace( $p3, '[[\\1\\2\\3\\4|\\2]]', $text );
4857 
4858  $t = $this->mTitle->getText();
4859  $m = array();
4860  if ( preg_match( "/^($nc+:|)$tc+?( \\($tc+\\))$/", $t, $m ) ) {
4861  $text = preg_replace( $p2, "[[$m[1]\\1$m[2]|\\1]]", $text );
4862  } elseif ( preg_match( "/^($nc+:|)$tc+?(, $tc+|)$/", $t, $m ) && "$m[1]$m[2]" != '' ) {
4863  $text = preg_replace( $p2, "[[$m[1]\\1$m[2]|\\1]]", $text );
4864  } else {
4865  # if there's no context, don't bother duplicating the title
4866  $text = preg_replace( $p2, '[[\\1]]', $text );
4867  }
4868 
4869  # Trim trailing whitespace
4870  $text = rtrim( $text );
4871 
4872  return $text;
4873  }
4874 
4889  public function getUserSig( &$user, $nickname = false, $fancySig = null ) {
4890  global $wgMaxSigChars;
4891 
4892  $username = $user->getName();
4893 
4894  # If not given, retrieve from the user object.
4895  if ( $nickname === false ) {
4896  $nickname = $user->getOption( 'nickname' );
4897  }
4898 
4899  if ( is_null( $fancySig ) ) {
4900  $fancySig = $user->getBoolOption( 'fancysig' );
4901  }
4902 
4903  $nickname = $nickname == null ? $username : $nickname;
4904 
4905  if ( mb_strlen( $nickname ) > $wgMaxSigChars ) {
4906  $nickname = $username;
4907  wfDebug( __METHOD__ . ": $username has overlong signature.\n" );
4908  } elseif ( $fancySig !== false ) {
4909  # Sig. might contain markup; validate this
4910  if ( $this->validateSig( $nickname ) !== false ) {
4911  # Validated; clean up (if needed) and return it
4912  return $this->cleanSig( $nickname, true );
4913  } else {
4914  # Failed to validate; fall back to the default
4915  $nickname = $username;
4916  wfDebug( __METHOD__ . ": $username has bad XML tags in signature.\n" );
4917  }
4918  }
4919 
4920  # Make sure nickname doesnt get a sig in a sig
4921  $nickname = self::cleanSigInSig( $nickname );
4922 
4923  # If we're still here, make it a link to the user page
4924  $userText = wfEscapeWikiText( $username );
4925  $nickText = wfEscapeWikiText( $nickname );
4926  $msgName = $user->isAnon() ? 'signature-anon' : 'signature';
4927 
4928  return wfMessage( $msgName, $userText, $nickText )->inContentLanguage()
4929  ->title( $this->getTitle() )->text();
4930  }
4931 
4938  public function validateSig( $text ) {
4939  return Xml::isWellFormedXmlFragment( $text ) ? $text : false;
4940  }
4941 
4952  public function cleanSig( $text, $parsing = false ) {
4953  if ( !$parsing ) {
4954  global $wgTitle;
4955  $magicScopeVariable = $this->lock();
4956  $this->startParse( $wgTitle, new ParserOptions, self::OT_PREPROCESS, true );
4957  }
4958 
4959  # Option to disable this feature
4960  if ( !$this->mOptions->getCleanSignatures() ) {
4961  return $text;
4962  }
4963 
4964  # @todo FIXME: Regex doesn't respect extension tags or nowiki
4965  # => Move this logic to braceSubstitution()
4966  $substWord = MagicWord::get( 'subst' );
4967  $substRegex = '/\{\{(?!(?:' . $substWord->getBaseRegex() . '))/x' . $substWord->getRegexCase();
4968  $substText = '{{' . $substWord->getSynonym( 0 );
4969 
4970  $text = preg_replace( $substRegex, $substText, $text );
4971  $text = self::cleanSigInSig( $text );
4972  $dom = $this->preprocessToDom( $text );
4973  $frame = $this->getPreprocessor()->newFrame();
4974  $text = $frame->expand( $dom );
4975 
4976  if ( !$parsing ) {
4977  $text = $this->mStripState->unstripBoth( $text );
4978  }
4979 
4980  return $text;
4981  }
4982 
4989  public static function cleanSigInSig( $text ) {
4990  $text = preg_replace( '/~{3,5}/', '', $text );
4991  return $text;
4992  }
4993 
5003  public function startExternalParse( Title $title = null, ParserOptions $options,
5004  $outputType, $clearState = true
5005  ) {
5006  $this->startParse( $title, $options, $outputType, $clearState );
5007  }
5008 
5015  private function startParse( Title $title = null, ParserOptions $options,
5016  $outputType, $clearState = true
5017  ) {
5018  $this->setTitle( $title );
5019  $this->mOptions = $options;
5020  $this->setOutputType( $outputType );
5021  if ( $clearState ) {
5022  $this->clearState();
5023  }
5024  }
5025 
5034  public function transformMsg( $text, $options, $title = null ) {
5035  static $executing = false;
5036 
5037  # Guard against infinite recursion
5038  if ( $executing ) {
5039  return $text;
5040  }
5041  $executing = true;
5042 
5043  if ( !$title ) {
5044  global $wgTitle;
5045  $title = $wgTitle;
5046  }
5047 
5048  $text = $this->preprocess( $text, $title, $options );
5049 
5050  $executing = false;
5051  return $text;
5052  }
5053 
5078  public function setHook( $tag, $callback ) {
5079  $tag = strtolower( $tag );
5080  if ( preg_match( '/[<>\r\n]/', $tag, $m ) ) {
5081  throw new MWException( "Invalid character {$m[0]} in setHook('$tag', ...) call" );
5082  }
5083  $oldVal = isset( $this->mTagHooks[$tag] ) ? $this->mTagHooks[$tag] : null;
5084  $this->mTagHooks[$tag] = $callback;
5085  if ( !in_array( $tag, $this->mStripList ) ) {
5086  $this->mStripList[] = $tag;
5087  }
5088 
5089  return $oldVal;
5090  }
5091 
5109  public function setTransparentTagHook( $tag, $callback ) {
5110  $tag = strtolower( $tag );
5111  if ( preg_match( '/[<>\r\n]/', $tag, $m ) ) {
5112  throw new MWException( "Invalid character {$m[0]} in setTransparentHook('$tag', ...) call" );
5113  }
5114  $oldVal = isset( $this->mTransparentTagHooks[$tag] ) ? $this->mTransparentTagHooks[$tag] : null;
5115  $this->mTransparentTagHooks[$tag] = $callback;
5116 
5117  return $oldVal;
5118  }
5119 
5123  public function clearTagHooks() {
5124  $this->mTagHooks = array();
5125  $this->mFunctionTagHooks = array();
5126  $this->mStripList = $this->mDefaultStripList;
5127  }
5128 
5172  public function setFunctionHook( $id, $callback, $flags = 0 ) {
5174 
5175  $oldVal = isset( $this->mFunctionHooks[$id] ) ? $this->mFunctionHooks[$id][0] : null;
5176  $this->mFunctionHooks[$id] = array( $callback, $flags );
5177 
5178  # Add to function cache
5179  $mw = MagicWord::get( $id );
5180  if ( !$mw ) {
5181  throw new MWException( __METHOD__ . '() expecting a magic word identifier.' );
5182  }
5183 
5184  $synonyms = $mw->getSynonyms();
5185  $sensitive = intval( $mw->isCaseSensitive() );
5186 
5187  foreach ( $synonyms as $syn ) {
5188  # Case
5189  if ( !$sensitive ) {
5190  $syn = $wgContLang->lc( $syn );
5191  }
5192  # Add leading hash
5193  if ( !( $flags & self::SFH_NO_HASH ) ) {
5194  $syn = '#' . $syn;
5195  }
5196  # Remove trailing colon
5197  if ( substr( $syn, -1, 1 ) === ':' ) {
5198  $syn = substr( $syn, 0, -1 );
5199  }
5200  $this->mFunctionSynonyms[$sensitive][$syn] = $id;
5201  }
5202  return $oldVal;
5203  }
5204 
5210  public function getFunctionHooks() {
5211  return array_keys( $this->mFunctionHooks );
5212  }
5213 
5224  public function setFunctionTagHook( $tag, $callback, $flags ) {
5225  $tag = strtolower( $tag );
5226  if ( preg_match( '/[<>\r\n]/', $tag, $m ) ) {
5227  throw new MWException( "Invalid character {$m[0]} in setFunctionTagHook('$tag', ...) call" );
5228  }
5229  $old = isset( $this->mFunctionTagHooks[$tag] ) ?
5230  $this->mFunctionTagHooks[$tag] : null;
5231  $this->mFunctionTagHooks[$tag] = array( $callback, $flags );
5232 
5233  if ( !in_array( $tag, $this->mStripList ) ) {
5234  $this->mStripList[] = $tag;
5235  }
5236 
5237  return $old;
5238  }
5239 
5248  public function replaceLinkHolders( &$text, $options = 0 ) {
5249  $this->mLinkHolders->replace( $text );
5250  }
5251 
5259  public function replaceLinkHoldersText( $text ) {
5260  return $this->mLinkHolders->replaceText( $text );
5261  }
5262 
5276  public function renderImageGallery( $text, $params ) {
5277 
5278  $mode = false;
5279  if ( isset( $params['mode'] ) ) {
5280  $mode = $params['mode'];
5281  }
5282 
5283  try {
5284  $ig = ImageGalleryBase::factory( $mode );
5285  } catch ( Exception $e ) {
5286  // If invalid type set, fallback to default.
5287  $ig = ImageGalleryBase::factory( false );
5288  }
5289 
5290  $ig->setContextTitle( $this->mTitle );
5291  $ig->setShowBytes( false );
5292  $ig->setShowFilename( false );
5293  $ig->setParser( $this );
5294  $ig->setHideBadImages();
5295  $ig->setAttributes( Sanitizer::validateTagAttributes( $params, 'table' ) );
5296 
5297  if ( isset( $params['showfilename'] ) ) {
5298  $ig->setShowFilename( true );
5299  } else {
5300  $ig->setShowFilename( false );
5301  }
5302  if ( isset( $params['caption'] ) ) {
5303  $caption = $params['caption'];
5304  $caption = htmlspecialchars( $caption );
5305  $caption = $this->replaceInternalLinks( $caption );
5306  $ig->setCaptionHtml( $caption );
5307  }
5308  if ( isset( $params['perrow'] ) ) {
5309  $ig->setPerRow( $params['perrow'] );
5310  }
5311  if ( isset( $params['widths'] ) ) {
5312  $ig->setWidths( $params['widths'] );
5313  }
5314  if ( isset( $params['heights'] ) ) {
5315  $ig->setHeights( $params['heights'] );
5316  }
5317  $ig->setAdditionalOptions( $params );
5318 
5319  Hooks::run( 'BeforeParserrenderImageGallery', array( &$this, &$ig ) );
5320 
5321  $lines = StringUtils::explode( "\n", $text );
5322  foreach ( $lines as $line ) {
5323  # match lines like these:
5324  # Image:someimage.jpg|This is some image
5325  $matches = array();
5326  preg_match( "/^([^|]+)(\\|(.*))?$/", $line, $matches );
5327  # Skip empty lines
5328  if ( count( $matches ) == 0 ) {
5329  continue;
5330  }
5331 
5332  if ( strpos( $matches[0], '%' ) !== false ) {
5333  $matches[1] = rawurldecode( $matches[1] );
5334  }
5335  $title = Title::newFromText( $matches[1], NS_FILE );
5336  if ( is_null( $title ) ) {
5337  # Bogus title. Ignore these so we don't bomb out later.
5338  continue;
5339  }
5340 
5341  # We need to get what handler the file uses, to figure out parameters.
5342  # Note, a hook can overide the file name, and chose an entirely different
5343  # file (which potentially could be of a different type and have different handler).
5344  $options = array();
5345  $descQuery = false;
5346  Hooks::run( 'BeforeParserFetchFileAndTitle',
5347  array( $this, $title, &$options, &$descQuery ) );
5348  # Don't register it now, as ImageGallery does that later.
5349  $file = $this->fetchFileNoRegister( $title, $options );
5350  $handler = $file ? $file->getHandler() : false;
5351 
5352  $paramMap = array(
5353  'img_alt' => 'gallery-internal-alt',
5354  'img_link' => 'gallery-internal-link',
5355  );
5356  if ( $handler ) {
5357  $paramMap = $paramMap + $handler->getParamMap();
5358  // We don't want people to specify per-image widths.
5359  // Additionally the width parameter would need special casing anyhow.
5360  unset( $paramMap['img_width'] );
5361  }
5362 
5363  $mwArray = new MagicWordArray( array_keys( $paramMap ) );
5364 
5365  $label = '';
5366  $alt = '';
5367  $link = '';
5368  $handlerOptions = array();
5369  if ( isset( $matches[3] ) ) {
5370  // look for an |alt= definition while trying not to break existing
5371  // captions with multiple pipes (|) in it, until a more sensible grammar
5372  // is defined for images in galleries
5373 
5374  // FIXME: Doing recursiveTagParse at this stage, and the trim before
5375  // splitting on '|' is a bit odd, and different from makeImage.
5376  $matches[3] = $this->recursiveTagParse( trim( $matches[3] ) );
5377  $parameterMatches = StringUtils::explode( '|', $matches[3] );
5378 
5379  foreach ( $parameterMatches as $parameterMatch ) {
5380  list( $magicName, $match ) = $mwArray->matchVariableStartToEnd( $parameterMatch );
5381  if ( $magicName ) {
5382  $paramName = $paramMap[$magicName];
5383 
5384  switch ( $paramName ) {
5385  case 'gallery-internal-alt':
5386  $alt = $this->stripAltText( $match, false );
5387  break;
5388  case 'gallery-internal-link':
5389  $linkValue = strip_tags( $this->replaceLinkHoldersText( $match ) );
5390  $chars = self::EXT_LINK_URL_CLASS;
5391  $prots = $this->mUrlProtocols;
5392  //check to see if link matches an absolute url, if not then it must be a wiki link.
5393  if ( preg_match( "/^($prots)$chars+$/u", $linkValue ) ) {
5394  $link = $linkValue;
5395  } else {
5396  $localLinkTitle = Title::newFromText( $linkValue );
5397  if ( $localLinkTitle !== null ) {
5398  $link = $localLinkTitle->getLinkURL();
5399  }
5400  }
5401  break;
5402  default:
5403  // Must be a handler specific parameter.
5404  if ( $handler->validateParam( $paramName, $match ) ) {
5405  $handlerOptions[$paramName] = $match;
5406  } else {
5407  // Guess not. Append it to the caption.
5408  wfDebug( "$parameterMatch failed parameter validation\n" );
5409  $label .= '|' . $parameterMatch;
5410  }
5411  }
5412 
5413  } else {
5414  // concatenate all other pipes
5415  $label .= '|' . $parameterMatch;
5416  }
5417  }
5418  // remove the first pipe
5419  $label = substr( $label, 1 );
5420  }
5421 
5422  $ig->add( $title, $label, $alt, $link, $handlerOptions );
5423  }
5424  $html = $ig->toHTML();
5425  Hooks::run( 'AfterParserFetchFileAndTitle', array( $this, $ig, &$html ) );
5426  return $html;
5427  }
5428 
5433  public function getImageParams( $handler ) {
5434  if ( $handler ) {
5435  $handlerClass = get_class( $handler );
5436  } else {
5437  $handlerClass = '';
5438  }
5439  if ( !isset( $this->mImageParams[$handlerClass] ) ) {
5440  # Initialise static lists
5441  static $internalParamNames = array(
5442  'horizAlign' => array( 'left', 'right', 'center', 'none' ),
5443  'vertAlign' => array( 'baseline', 'sub', 'super', 'top', 'text-top', 'middle',
5444  'bottom', 'text-bottom' ),
5445  'frame' => array( 'thumbnail', 'manualthumb', 'framed', 'frameless',
5446  'upright', 'border', 'link', 'alt', 'class' ),
5447  );
5448  static $internalParamMap;
5449  if ( !$internalParamMap ) {
5450  $internalParamMap = array();
5451  foreach ( $internalParamNames as $type => $names ) {
5452  foreach ( $names as $name ) {
5453  $magicName = str_replace( '-', '_', "img_$name" );
5454  $internalParamMap[$magicName] = array( $type, $name );
5455  }
5456  }
5457  }
5458 
5459  # Add handler params
5460  $paramMap = $internalParamMap;
5461  if ( $handler ) {
5462  $handlerParamMap = $handler->getParamMap();
5463  foreach ( $handlerParamMap as $magic => $paramName ) {
5464  $paramMap[$magic] = array( 'handler', $paramName );
5465  }
5466  }
5467  $this->mImageParams[$handlerClass] = $paramMap;
5468  $this->mImageParamsMagicArray[$handlerClass] = new MagicWordArray( array_keys( $paramMap ) );
5469  }
5470  return array( $this->mImageParams[$handlerClass], $this->mImageParamsMagicArray[$handlerClass] );
5471  }
5472 
5481  public function makeImage( $title, $options, $holders = false ) {
5482  # Check if the options text is of the form "options|alt text"
5483  # Options are:
5484  # * thumbnail make a thumbnail with enlarge-icon and caption, alignment depends on lang
5485  # * left no resizing, just left align. label is used for alt= only
5486  # * right same, but right aligned
5487  # * none same, but not aligned
5488  # * ___px scale to ___ pixels width, no aligning. e.g. use in taxobox
5489  # * center center the image
5490  # * frame Keep original image size, no magnify-button.
5491  # * framed Same as "frame"
5492  # * frameless like 'thumb' but without a frame. Keeps user preferences for width
5493  # * upright reduce width for upright images, rounded to full __0 px
5494  # * border draw a 1px border around the image
5495  # * alt Text for HTML alt attribute (defaults to empty)
5496  # * class Set a class for img node
5497  # * link Set the target of the image link. Can be external, interwiki, or local
5498  # vertical-align values (no % or length right now):
5499  # * baseline
5500  # * sub
5501  # * super
5502  # * top
5503  # * text-top
5504  # * middle
5505  # * bottom
5506  # * text-bottom
5507 
5508  $parts = StringUtils::explode( "|", $options );
5509 
5510  # Give extensions a chance to select the file revision for us
5511  $options = array();
5512  $descQuery = false;
5513  Hooks::run( 'BeforeParserFetchFileAndTitle',
5514  array( $this, $title, &$options, &$descQuery ) );
5515  # Fetch and register the file (file title may be different via hooks)
5516  list( $file, $title ) = $this->fetchFileAndTitle( $title, $options );
5517 
5518  # Get parameter map
5519  $handler = $file ? $file->getHandler() : false;
5520 
5521  list( $paramMap, $mwArray ) = $this->getImageParams( $handler );
5522 
5523  if ( !$file ) {
5524  $this->addTrackingCategory( 'broken-file-category' );
5525  }
5526 
5527  # Process the input parameters
5528  $caption = '';
5529  $params = array( 'frame' => array(), 'handler' => array(),
5530  'horizAlign' => array(), 'vertAlign' => array() );
5531  $seenformat = false;
5532  foreach ( $parts as $part ) {
5533  $part = trim( $part );
5534  list( $magicName, $value ) = $mwArray->matchVariableStartToEnd( $part );
5535  $validated = false;
5536  if ( isset( $paramMap[$magicName] ) ) {
5537  list( $type, $paramName ) = $paramMap[$magicName];
5538 
5539  # Special case; width and height come in one variable together
5540  if ( $type === 'handler' && $paramName === 'width' ) {
5541  $parsedWidthParam = $this->parseWidthParam( $value );
5542  if ( isset( $parsedWidthParam['width'] ) ) {
5543  $width = $parsedWidthParam['width'];
5544  if ( $handler->validateParam( 'width', $width ) ) {
5545  $params[$type]['width'] = $width;
5546  $validated = true;
5547  }
5548  }
5549  if ( isset( $parsedWidthParam['height'] ) ) {
5550  $height = $parsedWidthParam['height'];
5551  if ( $handler->validateParam( 'height', $height ) ) {
5552  $params[$type]['height'] = $height;
5553  $validated = true;
5554  }
5555  }
5556  # else no validation -- bug 13436
5557  } else {
5558  if ( $type === 'handler' ) {
5559  # Validate handler parameter
5560  $validated = $handler->validateParam( $paramName, $value );
5561  } else {
5562  # Validate internal parameters
5563  switch ( $paramName ) {
5564  case 'manualthumb':
5565  case 'alt':
5566  case 'class':
5567  # @todo FIXME: Possibly check validity here for
5568  # manualthumb? downstream behavior seems odd with
5569  # missing manual thumbs.
5570  $validated = true;
5571  $value = $this->stripAltText( $value, $holders );
5572  break;
5573  case 'link':
5574  $chars = self::EXT_LINK_URL_CLASS;
5575  $prots = $this->mUrlProtocols;
5576  if ( $value === '' ) {
5577  $paramName = 'no-link';
5578  $value = true;
5579  $validated = true;
5580  } elseif ( preg_match( "/^((?i)$prots)/", $value ) ) {
5581  if ( preg_match( "/^((?i)$prots)$chars+$/u", $value, $m ) ) {
5582  $paramName = 'link-url';
5583  $this->mOutput->addExternalLink( $value );
5584  if ( $this->mOptions->getExternalLinkTarget() ) {
5585  $params[$type]['link-target'] = $this->mOptions->getExternalLinkTarget();
5586  }
5587  $validated = true;
5588  }
5589  } else {
5590  $linkTitle = Title::newFromText( $value );
5591  if ( $linkTitle ) {
5592  $paramName = 'link-title';
5593  $value = $linkTitle;
5594  $this->mOutput->addLink( $linkTitle );
5595  $validated = true;
5596  }
5597  }
5598  break;
5599  case 'frameless':
5600  case 'framed':
5601  case 'thumbnail':
5602  // use first appearing option, discard others.
5603  $validated = ! $seenformat;
5604  $seenformat = true;
5605  break;
5606  default:
5607  # Most other things appear to be empty or numeric...
5608  $validated = ( $value === false || is_numeric( trim( $value ) ) );
5609  }
5610  }
5611 
5612  if ( $validated ) {
5613  $params[$type][$paramName] = $value;
5614  }
5615  }
5616  }
5617  if ( !$validated ) {
5618  $caption = $part;
5619  }
5620  }
5621 
5622  # Process alignment parameters
5623  if ( $params['horizAlign'] ) {
5624  $params['frame']['align'] = key( $params['horizAlign'] );
5625  }
5626  if ( $params['vertAlign'] ) {
5627  $params['frame']['valign'] = key( $params['vertAlign'] );
5628  }
5629 
5630  $params['frame']['caption'] = $caption;
5631 
5632  # Will the image be presented in a frame, with the caption below?
5633  $imageIsFramed = isset( $params['frame']['frame'] )
5634  || isset( $params['frame']['framed'] )
5635  || isset( $params['frame']['thumbnail'] )
5636  || isset( $params['frame']['manualthumb'] );
5637 
5638  # In the old days, [[Image:Foo|text...]] would set alt text. Later it
5639  # came to also set the caption, ordinary text after the image -- which
5640  # makes no sense, because that just repeats the text multiple times in
5641  # screen readers. It *also* came to set the title attribute.
5642  #
5643  # Now that we have an alt attribute, we should not set the alt text to
5644  # equal the caption: that's worse than useless, it just repeats the
5645  # text. This is the framed/thumbnail case. If there's no caption, we
5646  # use the unnamed parameter for alt text as well, just for the time be-
5647  # ing, if the unnamed param is set and the alt param is not.
5648  #
5649  # For the future, we need to figure out if we want to tweak this more,
5650  # e.g., introducing a title= parameter for the title; ignoring the un-
5651  # named parameter entirely for images without a caption; adding an ex-
5652  # plicit caption= parameter and preserving the old magic unnamed para-
5653  # meter for BC; ...
5654  if ( $imageIsFramed ) { # Framed image
5655  if ( $caption === '' && !isset( $params['frame']['alt'] ) ) {
5656  # No caption or alt text, add the filename as the alt text so
5657  # that screen readers at least get some description of the image
5658  $params['frame']['alt'] = $title->getText();
5659  }
5660  # Do not set $params['frame']['title'] because tooltips don't make sense
5661  # for framed images
5662  } else { # Inline image
5663  if ( !isset( $params['frame']['alt'] ) ) {
5664  # No alt text, use the "caption" for the alt text
5665  if ( $caption !== '' ) {
5666  $params['frame']['alt'] = $this->stripAltText( $caption, $holders );
5667  } else {
5668  # No caption, fall back to using the filename for the
5669  # alt text
5670  $params['frame']['alt'] = $title->getText();
5671  }
5672  }
5673  # Use the "caption" for the tooltip text
5674  $params['frame']['title'] = $this->stripAltText( $caption, $holders );
5675  }
5676 
5677  Hooks::run( 'ParserMakeImageParams', array( $title, $file, &$params, $this ) );
5678 
5679  # Linker does the rest
5680  $time = isset( $options['time'] ) ? $options['time'] : false;
5681  $ret = Linker::makeImageLink( $this, $title, $file, $params['frame'], $params['handler'],
5682  $time, $descQuery, $this->mOptions->getThumbSize() );
5683 
5684  # Give the handler a chance to modify the parser object
5685  if ( $handler ) {
5686  $handler->parserTransformHook( $this, $file );
5687  }
5688 
5689  return $ret;
5690  }
5691 
5697  protected function stripAltText( $caption, $holders ) {
5698  # Strip bad stuff out of the title (tooltip). We can't just use
5699  # replaceLinkHoldersText() here, because if this function is called
5700  # from replaceInternalLinks2(), mLinkHolders won't be up-to-date.
5701  if ( $holders ) {
5702  $tooltip = $holders->replaceText( $caption );
5703  } else {
5704  $tooltip = $this->replaceLinkHoldersText( $caption );
5705  }
5706 
5707  # make sure there are no placeholders in thumbnail attributes
5708  # that are later expanded to html- so expand them now and
5709  # remove the tags
5710  $tooltip = $this->mStripState->unstripBoth( $tooltip );
5711  $tooltip = Sanitizer::stripAllTags( $tooltip );
5712 
5713  return $tooltip;
5714  }
5715 
5720  public function disableCache() {
5721  wfDebug( "Parser output marked as uncacheable.\n" );
5722  if ( !$this->mOutput ) {
5723  throw new MWException( __METHOD__ .
5724  " can only be called when actually parsing something" );
5725  }
5726  $this->mOutput->setCacheTime( -1 ); // old style, for compatibility
5727  $this->mOutput->updateCacheExpiry( 0 ); // new style, for consistency
5728  }
5729 
5738  public function attributeStripCallback( &$text, $frame = false ) {
5739  $text = $this->replaceVariables( $text, $frame );
5740  $text = $this->mStripState->unstripBoth( $text );
5741  return $text;
5742  }
5743 
5749  public function getTags() {
5750  return array_merge(
5751  array_keys( $this->mTransparentTagHooks ),
5752  array_keys( $this->mTagHooks ),
5753  array_keys( $this->mFunctionTagHooks )
5754  );
5755  }
5756 
5767  public function replaceTransparentTags( $text ) {
5768  $matches = array();
5769  $elements = array_keys( $this->mTransparentTagHooks );
5770  $text = self::extractTagsAndParams( $elements, $text, $matches, $this->mUniqPrefix );
5771  $replacements = array();
5772 
5773  foreach ( $matches as $marker => $data ) {
5774  list( $element, $content, $params, $tag ) = $data;
5775  $tagName = strtolower( $element );
5776  if ( isset( $this->mTransparentTagHooks[$tagName] ) ) {
5777  $output = call_user_func_array(
5778  $this->mTransparentTagHooks[$tagName],
5779  array( $content, $params, $this )
5780  );
5781  } else {
5782  $output = $tag;
5783  }
5784  $replacements[$marker] = $output;
5785  }
5786  return strtr( $text, $replacements );
5787  }
5788 
5818  private function extractSections( $text, $sectionId, $mode, $newText = '' ) {
5819  global $wgTitle; # not generally used but removes an ugly failure mode
5820 
5821  $magicScopeVariable = $this->lock();
5822  $this->startParse( $wgTitle, new ParserOptions, self::OT_PLAIN, true );
5823  $outText = '';
5824  $frame = $this->getPreprocessor()->newFrame();
5825 
5826  # Process section extraction flags
5827  $flags = 0;
5828  $sectionParts = explode( '-', $sectionId );
5829  $sectionIndex = array_pop( $sectionParts );
5830  foreach ( $sectionParts as $part ) {
5831  if ( $part === 'T' ) {
5832  $flags |= self::PTD_FOR_INCLUSION;
5833  }
5834  }
5835 
5836  # Check for empty input
5837  if ( strval( $text ) === '' ) {
5838  # Only sections 0 and T-0 exist in an empty document
5839  if ( $sectionIndex == 0 ) {
5840  if ( $mode === 'get' ) {
5841  return '';
5842  } else {
5843  return $newText;
5844  }
5845  } else {
5846  if ( $mode === 'get' ) {
5847  return $newText;
5848  } else {
5849  return $text;
5850  }
5851  }
5852  }
5853 
5854  # Preprocess the text
5855  $root = $this->preprocessToDom( $text, $flags );
5856 
5857  # <h> nodes indicate section breaks
5858  # They can only occur at the top level, so we can find them by iterating the root's children
5859  $node = $root->getFirstChild();
5860 
5861  # Find the target section
5862  if ( $sectionIndex == 0 ) {
5863  # Section zero doesn't nest, level=big
5864  $targetLevel = 1000;
5865  } else {
5866  while ( $node ) {
5867  if ( $node->getName() === 'h' ) {
5868  $bits = $node->splitHeading();
5869  if ( $bits['i'] == $sectionIndex ) {
5870  $targetLevel = $bits['level'];
5871  break;
5872  }
5873  }
5874  if ( $mode === 'replace' ) {
5875  $outText .= $frame->expand( $node, PPFrame::RECOVER_ORIG );
5876  }
5877  $node = $node->getNextSibling();
5878  }
5879  }
5880 
5881  if ( !$node ) {
5882  # Not found
5883  if ( $mode === 'get' ) {
5884  return $newText;
5885  } else {
5886  return $text;
5887  }
5888  }
5889 
5890  # Find the end of the section, including nested sections
5891  do {
5892  if ( $node->getName() === 'h' ) {
5893  $bits = $node->splitHeading();
5894  $curLevel = $bits['level'];
5895  if ( $bits['i'] != $sectionIndex && $curLevel <= $targetLevel ) {
5896  break;
5897  }
5898  }
5899  if ( $mode === 'get' ) {
5900  $outText .= $frame->expand( $node, PPFrame::RECOVER_ORIG );
5901  }
5902  $node = $node->getNextSibling();
5903  } while ( $node );
5904 
5905  # Write out the remainder (in replace mode only)
5906  if ( $mode === 'replace' ) {
5907  # Output the replacement text
5908  # Add two newlines on -- trailing whitespace in $newText is conventionally
5909  # stripped by the editor, so we need both newlines to restore the paragraph gap
5910  # Only add trailing whitespace if there is newText
5911  if ( $newText != "" ) {
5912  $outText .= $newText . "\n\n";
5913  }
5914 
5915  while ( $node ) {
5916  $outText .= $frame->expand( $node, PPFrame::RECOVER_ORIG );
5917  $node = $node->getNextSibling();
5918  }
5919  }
5920 
5921  if ( is_string( $outText ) ) {
5922  # Re-insert stripped tags
5923  $outText = rtrim( $this->mStripState->unstripBoth( $outText ) );
5924  }
5925 
5926  return $outText;
5927  }
5928 
5943  public function getSection( $text, $sectionId, $defaultText = '' ) {
5944  return $this->extractSections( $text, $sectionId, 'get', $defaultText );
5945  }
5946 
5959  public function replaceSection( $oldText, $sectionId, $newText ) {
5960  return $this->extractSections( $oldText, $sectionId, 'replace', $newText );
5961  }
5962 
5968  public function getRevisionId() {
5969  return $this->mRevisionId;
5970  }
5971 
5978  public function getRevisionObject() {
5979  if ( !is_null( $this->mRevisionObject ) ) {
5980  return $this->mRevisionObject;
5981  }
5982  if ( is_null( $this->mRevisionId ) ) {
5983  return null;
5984  }
5985 
5986  $this->mRevisionObject = Revision::newFromId( $this->mRevisionId );
5987  return $this->mRevisionObject;
5988  }
5989 
5995  public function getRevisionTimestamp() {
5996  if ( is_null( $this->mRevisionTimestamp ) ) {
5997 
5999 
6000  $revObject = $this->getRevisionObject();
6001  $timestamp = $revObject ? $revObject->getTimestamp() : wfTimestampNow();
6002 
6003  # The cryptic '' timezone parameter tells to use the site-default
6004  # timezone offset instead of the user settings.
6005  #
6006  # Since this value will be saved into the parser cache, served
6007  # to other users, and potentially even used inside links and such,
6008  # it needs to be consistent for all visitors.
6009  $this->mRevisionTimestamp = $wgContLang->userAdjust( $timestamp, '' );
6010 
6011  }
6012  return $this->mRevisionTimestamp;
6013  }
6014 
6020  public function getRevisionUser() {
6021  if ( is_null( $this->mRevisionUser ) ) {
6022  $revObject = $this->getRevisionObject();
6023 
6024  # if this template is subst: the revision id will be blank,
6025  # so just use the current user's name
6026  if ( $revObject ) {
6027  $this->mRevisionUser = $revObject->getUserText();
6028  } elseif ( $this->ot['wiki'] || $this->mOptions->getIsPreview() ) {
6029  $this->mRevisionUser = $this->getUser()->getName();
6030  }
6031  }
6032  return $this->mRevisionUser;
6033  }
6034 
6040  public function getRevisionSize() {
6041  if ( is_null( $this->mRevisionSize ) ) {
6042  $revObject = $this->getRevisionObject();
6043 
6044  # if this variable is subst: the revision id will be blank,
6045  # so just use the parser input size, because the own substituation
6046  # will change the size.
6047  if ( $revObject ) {
6048  $this->mRevisionSize = $revObject->getSize();
6049  } elseif ( $this->ot['wiki'] || $this->mOptions->getIsPreview() ) {
6050  $this->mRevisionSize = $this->mInputSize;
6051  }
6052  }
6053  return $this->mRevisionSize;
6054  }
6055 
6061  public function setDefaultSort( $sort ) {
6062  $this->mDefaultSort = $sort;
6063  $this->mOutput->setProperty( 'defaultsort', $sort );
6064  }
6065 
6076  public function getDefaultSort() {
6077  if ( $this->mDefaultSort !== false ) {
6078  return $this->mDefaultSort;
6079  } else {
6080  return '';
6081  }
6082  }
6083 
6090  public function getCustomDefaultSort() {
6091  return $this->mDefaultSort;
6092  }
6093 
6103  public function guessSectionNameFromWikiText( $text ) {
6104  # Strip out wikitext links(they break the anchor)
6105  $text = $this->stripSectionName( $text );
6107  return '#' . Sanitizer::escapeId( $text, 'noninitial' );
6108  }
6109 
6118  public function guessLegacySectionNameFromWikiText( $text ) {
6119  # Strip out wikitext links(they break the anchor)
6120  $text = $this->stripSectionName( $text );
6122  return '#' . Sanitizer::escapeId( $text, array( 'noninitial', 'legacy' ) );
6123  }
6124 
6139  public function stripSectionName( $text ) {
6140  # Strip internal link markup
6141  $text = preg_replace( '/\[\[:?([^[|]+)\|([^[]+)\]\]/', '$2', $text );
6142  $text = preg_replace( '/\[\[:?([^[]+)\|?\]\]/', '$1', $text );
6143 
6144  # Strip external link markup
6145  # @todo FIXME: Not tolerant to blank link text
6146  # I.E. [https://www.mediawiki.org] will render as [1] or something depending
6147  # on how many empty links there are on the page - need to figure that out.
6148  $text = preg_replace( '/\[(?i:' . $this->mUrlProtocols . ')([^ ]+?) ([^[]+)\]/', '$2', $text );
6149 
6150  # Parse wikitext quotes (italics & bold)
6151  $text = $this->doQuotes( $text );
6152 
6153  # Strip HTML tags
6154  $text = StringUtils::delimiterReplace( '<', '>', '', $text );
6155  return $text;
6156  }
6157 
6168  public function testSrvus( $text, Title $title, ParserOptions $options, $outputType = self::OT_HTML ) {
6169  $magicScopeVariable = $this->lock();
6170  $this->startParse( $title, $options, $outputType, true );
6171 
6172  $text = $this->replaceVariables( $text );
6173  $text = $this->mStripState->unstripBoth( $text );
6174  $text = Sanitizer::removeHTMLtags( $text );
6175  return $text;
6176  }
6177 
6184  public function testPst( $text, Title $title, ParserOptions $options ) {
6185  return $this->preSaveTransform( $text, $title, $options->getUser(), $options );
6186  }
6187 
6194  public function testPreprocess( $text, Title $title, ParserOptions $options ) {
6195  return $this->testSrvus( $text, $title, $options, self::OT_PREPROCESS );
6196  }
6197 
6214  public function markerSkipCallback( $s, $callback ) {
6215  $i = 0;
6216  $out = '';
6217  while ( $i < strlen( $s ) ) {
6218  $markerStart = strpos( $s, $this->mUniqPrefix, $i );
6219  if ( $markerStart === false ) {
6220  $out .= call_user_func( $callback, substr( $s, $i ) );
6221  break;
6222  } else {
6223  $out .= call_user_func( $callback, substr( $s, $i, $markerStart - $i ) );
6224  $markerEnd = strpos( $s, self::MARKER_SUFFIX, $markerStart );
6225  if ( $markerEnd === false ) {
6226  $out .= substr( $s, $markerStart );
6227  break;
6228  } else {
6229  $markerEnd += strlen( self::MARKER_SUFFIX );
6230  $out .= substr( $s, $markerStart, $markerEnd - $markerStart );
6231  $i = $markerEnd;
6232  }
6233  }
6234  }
6235  return $out;
6236  }
6237 
6244  public function killMarkers( $text ) {
6245  return $this->mStripState->killMarkers( $text );
6246  }
6247 
6264  public function serializeHalfParsedText( $text ) {
6265  $data = array(
6266  'text' => $text,
6267  'version' => self::HALF_PARSED_VERSION,
6268  'stripState' => $this->mStripState->getSubState( $text ),
6269  'linkHolders' => $this->mLinkHolders->getSubArray( $text )
6270  );
6271  return $data;
6272  }
6273 
6289  public function unserializeHalfParsedText( $data ) {
6290  if ( !isset( $data['version'] ) || $data['version'] != self::HALF_PARSED_VERSION ) {
6291  throw new MWException( __METHOD__ . ': invalid version' );
6292  }
6293 
6294  # First, extract the strip state.
6295  $texts = array( $data['text'] );
6296  $texts = $this->mStripState->merge( $data['stripState'], $texts );
6297 
6298  # Now renumber links
6299  $texts = $this->mLinkHolders->mergeForeign( $data['linkHolders'], $texts );
6300 
6301  # Should be good to go.
6302  return $texts[0];
6303  }
6304 
6314  public function isValidHalfParsedText( $data ) {
6315  return isset( $data['version'] ) && $data['version'] == self::HALF_PARSED_VERSION;
6316  }
6317 
6326  public function parseWidthParam( $value ) {
6327  $parsedWidthParam = array();
6328  if ( $value === '' ) {
6329  return $parsedWidthParam;
6330  }
6331  $m = array();
6332  # (bug 13500) In both cases (width/height and width only),
6333  # permit trailing "px" for backward compatibility.
6334  if ( preg_match( '/^([0-9]*)x([0-9]*)\s*(?:px)?\s*$/', $value, $m ) ) {
6335  $width = intval( $m[1] );
6336  $height = intval( $m[2] );
6337  $parsedWidthParam['width'] = $width;
6338  $parsedWidthParam['height'] = $height;
6339  } elseif ( preg_match( '/^[0-9]*\s*(?:px)?\s*$/', $value ) ) {
6340  $width = intval( $value );
6341  $parsedWidthParam['width'] = $width;
6342  }
6343  return $parsedWidthParam;
6344  }
6345 
6355  protected function lock() {
6356  if ( $this->mInParse ) {
6357  throw new MWException( "Parser state cleared while parsing. "
6358  . "Did you call Parser::parse recursively?" );
6359  }
6360  $this->mInParse = true;
6361 
6362  $that = $this;
6363  $recursiveCheck = new ScopedCallback( function() use ( $that ) {
6364  $that->mInParse = false;
6365  } );
6366 
6367  return $recursiveCheck;
6368  }
6369 
6380  public static function stripOuterParagraph( $html ) {
6381  $m = array();
6382  if ( preg_match( '/^<p>(.*)\n?<\/p>\n?$/sU', $html, $m ) ) {
6383  if ( strpos( $m[1], '</p>' ) === false ) {
6384  $html = $m[1];
6385  }
6386  }
6387 
6388  return $html;
6389  }
6390 
6401  public function getFreshParser() {
6402  global $wgParserConf;
6403  if ( $this->mInParse ) {
6404  return new $wgParserConf['class']( $wgParserConf );
6405  } else {
6406  return $this;
6407  }
6408  }
6409 }
setTitle($t)
Set the context title.
Definition: Parser.php:701
$mAutonumber
Definition: Parser.php:151
$mPPNodeCount
Definition: Parser.php:161
replaceInternalLinks2(&$s)
Process [[ ]] wikilinks (RIL)
Definition: Parser.php:1981
static getVariableIDs()
Get an array of parser variable IDs.
Definition: MagicWord.php:262
$mPreprocessor
Definition: Parser.php:146
null means default in associative array form
Definition: hooks.txt:1694
null means default in associative array with keys and values unescaped Should be merged with default with a value of false meaning to suppress the attribute in associative array with keys and values unescaped noclasses just before the function returns a value If you return an< a > element with HTML attributes $attribs and contents $html will be returned If you return $ret will be returned and may include noclasses & $html
Definition: hooks.txt:1694
static tocLineEnd()
End a Table Of Contents line.
Definition: Linker.php:1686
deferred txt A few of the database updates required by various functions here can be deferred until after the result page is displayed to the user For updating the view updating the linked to tables after a etc PHP does not yet have any way to tell the server to actually return and disconnect while still running these but it might have such a feature in the future We handle these by creating a deferred update object and putting those objects on a global list
Definition: deferred.txt:11
static decodeTagAttributes($text)
Return an associative array of attribute names and values from a partial tag string.
Definition: Sanitizer.php:1188
$mTplRedirCache
Definition: Parser.php:163
static tocList($toc, $lang=false)
Wraps the TOC in a table and provides the hide/collapse javascript.
Definition: Linker.php:1697
static makeExternalLink($url, $text, $escape=true, $linktype= '', $attribs=array(), $title=null)
Make an external link.
Definition: Linker.php:1056
although this is appropriate in some e g manual creation of blank tables prior to an import Most of the PHP scripts need to be run from the command line Prior to doing so
Definition: README:1
getBoolOption($oname)
Get the user's current setting for a given option, as a boolean value.
Definition: User.php:2574
return true to allow those checks to and false if checking is done remove or add to the links of a group of changes in EnhancedChangesList use this to change the tables headers temp or archived zone change it to an object instance and return false override the list derivative used the name of the old file when set the default code will be skipped true if there is text before this autocomment true if there is text after this autocomment add a value to it if you want to add a cookie that have to vary cache options can modify $query
Definition: hooks.txt:1255
const OT_PREPROCESS
Definition: Defines.php:233
$mLastSection
Definition: Parser.php:156
$size
Definition: RandomTest.php:76
$mDoubleUnderscores
Definition: Parser.php:163
Group all the pieces relevant to the context of a request into one instance.
namespace and then decline to actually register it file or subcat img or subcat RecentChangesLinked and Watchlist RecentChangesLinked and Watchlist e g Watchlist removed from all revisions and log entries to which it was applied This gives extensions a chance to take it off their books as the deletion has already been partly carried out by this point or something similar the user will be unable to create the tag set and then return false from the hook function Ensure you consume the ChangeTagAfterDelete hook to carry out custom deletion actions instead of letting the login form give the generic error message that the account does not exist For when the account has been renamed or deleted or an array to pass a message key and parameters but no entry for that model exists in $wgContentHandlers if desired whether it is OK to use $contentModel on $title Handler functions that modify $ok should generally return false to prevent further hooks from further modifying $ok called by AbstractContent::getParserOutput May be used to override the normal model specific rendering of page content $content
Definition: hooks.txt:949
or
false for read/write
MapCacheLRU null $currentRevisionCache
Definition: Parser.php:201
getArticleID($flags=0)
Get the article ID for this Title from the link cache, adding it if necessary.
Definition: Title.php:3116
recursivePreprocess($text, $frame=false)
Recursive parser entry point that can be called from an extension tag hook.
Definition: Parser.php:625
getText()
Get the text form (spaces not underscores) of the main part.
Definition: Title.php:863
replaceExternalLinks($text)
Replace external links (REL)
Definition: Parser.php:1719
static isNonincludable($index)
It is not possible to use pages from this namespace as template?
nextLinkID()
Definition: Parser.php:791
const SPACE_NOT_NL
Definition: Parser.php:94
static replaceUnusualEscapes($url)
Replace unusual escape codes in a URL with their equivalent characters.
Definition: Parser.php:1836
Allows to change the fields on the form that will be generated $name
Definition: hooks.txt:322
Apache License January AND DISTRIBUTION Definitions License shall mean the terms and conditions for use
doHeadings($text)
Parse headers and return html.
Definition: Parser.php:1496
const OT_PLAIN
Definition: Parser.php:115
static removeHTMLtags($text, $processCallback=null, $args=array(), $extratags=array(), $removetags=array())
Cleans up HTML, removes dangerous tags and attributes, and removes HTML comments. ...
Definition: Sanitizer.php:372
static isWellFormedXmlFragment($text)
Check if a string is a well-formed XML fragment.
Definition: Xml.php:740
const OT_WIKI
Definition: Parser.php:112
User $mUser
Definition: Parser.php:168
initialiseVariables()
initialise the magic variables (like CURRENTMONTHNAME) and substitution modifiers ...
Definition: Parser.php:3205
null means default in associative array with keys and values unescaped Should be merged with default with a value of false meaning to suppress the attribute in associative array with keys and values unescaped noclasses & $ret
Definition: hooks.txt:1694
=Architecture==Two class hierarchies are used to provide the functionality associated with the different content models:*Content interface(and AbstractContent base class) define functionality that acts on the concrete content of a page, and *ContentHandler base class provides functionality specific to a content model, but not acting on concrete content.The most important function of ContentHandler is to act as a factory for the appropriate implementation of Content.These Content objects are to be used by MediaWiki everywhere, instead of passing page content around as text.All manipulation and analysis of page content must be done via the appropriate methods of the Content object.For each content model, a subclass of ContentHandler has to be registered with $wgContentHandlers.The ContentHandler object for a given content model can be obtained using ContentHandler::getForModelID($id).Also Title, WikiPage and Revision now have getContentHandler() methods for convenience.ContentHandler objects are singletons that provide functionality specific to the content type, but not directly acting on the content of some page.ContentHandler::makeEmptyContent() and ContentHandler::unserializeContent() can be used to create a Content object of the appropriate type.However, it is recommended to instead use WikiPage::getContent() resp.Revision::getContent() to get a page's content as a Content object.These two methods should be the ONLY way in which page content is accessed.Another important function of ContentHandler objects is to define custom action handlers for a content model, see ContentHandler::getActionOverrides().This is similar to what WikiPage::getActionOverrides() was already doing.==Serialization==With the ContentHandler facility, page content no longer has to be text based.Objects implementing the Content interface are used to represent and handle the content internally.For storage and data exchange, each content model supports at least one serialization format via ContentHandler::serializeContent($content).The list of supported formats for a given content model can be accessed using ContentHandler::getSupportedFormats().Content serialization formats are identified using MIME type like strings.The following formats are built in:*text/x-wiki-wikitext *text/javascript-for js pages *text/css-for css pages *text/plain-for future use, e.g.with plain text messages.*text/html-for future use, e.g.with plain html messages.*application/vnd.php.serialized-for future use with the api and for extensions *application/json-for future use with the api, and for use by extensions *application/xml-for future use with the api, and for use by extensions In PHP, use the corresponding CONTENT_FORMAT_XXX constant.Note that when using the API to access page content, especially action=edit, action=parse and action=query &prop=revisions, the model and format of the content should always be handled explicitly.Without that information, interpretation of the provided content is not reliable.The same applies to XML dumps generated via maintenance/dumpBackup.php or Special:Export.Also note that the API will provide encapsulated, serialized content-so if the API was called with format=json, and contentformat is also json(or rather, application/json), the page content is represented as a string containing an escaped json structure.Extensions that use JSON to serialize some types of page content may provide specialized API modules that allow access to that content in a more natural form.==Compatibility==The ContentHandler facility is introduced in a way that should allow all existing code to keep functioning at least for pages that contain wikitext or other text based content.However, a number of functions and hooks have been deprecated in favor of new versions that are aware of the page's content model, and will now generate warnings when used.Most importantly, the following functions have been deprecated:*Revisions::getText() and Revisions::getRawText() is deprecated in favor Revisions::getContent()*WikiPage::getText() is deprecated in favor WikiPage::getContent() Also, the old Article::getContent()(which returns text) is superceded by Article::getContentObject().However, both methods should be avoided since they do not provide clean access to the page's actual content.For instance, they may return a system message for non-existing pages.Use WikiPage::getContent() instead.Code that relies on a textual representation of the page content should eventually be rewritten.However, ContentHandler::getContentText() provides a stop-gap that can be used to get text for a page.Its behavior is controlled by $wgContentHandlerTextFallback it
Set options of the Parser.
static tidy($text)
Interface with html tidy, used if $wgUseTidy = true.
Definition: MWTidy.php:127
namespace and then decline to actually register it file or subcat img or subcat RecentChangesLinked and Watchlist RecentChangesLinked and Watchlist e g Watchlist removed from all revisions and log entries to which it was applied This gives extensions a chance to take it off their books as the deletion has already been partly carried out by this point or something similar the user will be unable to create the tag set $status
Definition: hooks.txt:949
wfHostname()
Fetch server name for use in error reporting etc.
getFunctionLang()
Get a language object for use in parser functions such as {{FORMATNUM:}}.
Definition: Parser.php:806
processing should stop and the error should be shown to the user * false
Definition: hooks.txt:188
uniqPrefix()
Accessor for mUniqPrefix.
Definition: Parser.php:683
const TOC_START
Definition: Parser.php:121
Title($x=null)
Accessor/mutator for the Title object.
Definition: Parser.php:730
SectionProfiler $mProfiler
Definition: Parser.php:208
$sort
wfDebug($text, $dest= 'all', array $context=array())
Sends a line to the debug log if enabled or, optionally, to a comment in output.
There are three types of nodes:
see documentation in includes Linker php for Linker::makeImageLink & $time
Definition: hooks.txt:1518
has been added to your &Future changes to this page and its associated Talk page will be listed there
$mHeadings
Definition: Parser.php:163
$value
const COLON_STATE_TAGSLASH
Definition: Parser.php:101
static makeSelfLinkObj($nt, $html= '', $query= '', $trail= '', $prefix= '')
Make appropriate markup for a link to the current article.
Definition: Linker.php:401
const NS_SPECIAL
Definition: Defines.php:63
clearState()
Clear Parser state.
Definition: Parser.php:291
$mFirstCall
Definition: Parser.php:136
getPreloadText($text, Title $title, ParserOptions $options, $params=array())
Process the wikitext for the "?preload=" feature.
Definition: Parser.php:644
Options($x=null)
Accessor/mutator for the ParserOptions object.
Definition: Parser.php:784
it s the revision text itself In either if gzip is the revision text is gzipped $flags
Definition: hooks.txt:2349
static activeUsers()
Definition: SiteStats.php:162
$mLinkID
Definition: Parser.php:160
doQuotes($text)
Helper function for doAllQuotes()
Definition: Parser.php:1529
preprocessToDom($text, $flags=0)
Preprocess some wikitext and return the document tree.
Definition: Parser.php:3235
getPrefixedText()
Get the prefixed title with spaces.
Definition: Title.php:1405
limitationWarn($limitationType, $current= '', $max= '')
Warn the user when a parser limitation is reached Will warn at most once the user per limitation type...
Definition: Parser.php:3354
static cleanUrl($url)
Definition: Sanitizer.php:1754
wfUrlencode($s)
We want some things to be included as literal characters in our title URLs for prettiness, which urlencode encodes by default.
static newFromText($text, $defaultNamespace=NS_MAIN)
Create a new Title from text, such as what one would find in a link.
Definition: Title.php:235
$mGeneratedPPNodeCount
Definition: Parser.php:161
Represents a title within MediaWiki.
Definition: Title.php:35
static getRandomString()
Get a random string.
Definition: Parser.php:664
$mRevisionId
Definition: Parser.php:181
static stripAllTags($text)
Take a fragment of (potentially invalid) HTML and return a version with any tags removed, encoded as plain text.
Definition: Sanitizer.php:1721
when a variable name is used in a it is silently declared as a new local masking the global
Definition: design.txt:93
doBlockLevels($text, $linestart)
#@-
Definition: Parser.php:2481
$wgArticlePath
Definition: img_auth.php:45
OutputType($x=null)
Accessor/mutator for the output type.
Definition: Parser.php:756
SQUARED ID
const NS_TEMPLATE
Definition: Defines.php:84
if(!$in) print Initializing normalization quick check tables n
const COLON_STATE_COMMENTDASHDASH
Definition: Parser.php:104
recursiveTagParse($text, $frame=false)
Half-parse wikitext to half-parsed HTML.
Definition: Parser.php:560
const NO_ARGS
Allows to change the fields on the form that will be generated just before adding its HTML to parser output $parser
Definition: hooks.txt:322
MagicWordArray $mVariables
Definition: Parser.php:141
static validateTagAttributes($attribs, $element)
Take an array of attribute names and values and normalize or discard illegal values for the given ele...
Definition: Sanitizer.php:696
const SFH_NO_HASH
Definition: Parser.php:82
const COLON_STATE_COMMENTDASH
Definition: Parser.php:103
globals will be eliminated from MediaWiki replaced by an application object which would be passed to constructors Whether that would be an convenient solution remains to be but certainly PHP makes such object oriented programming models easier than they were in previous versions For the time being MediaWiki programmers will have to work in an environment with some global context At the time of globals were initialised on startup by MediaWiki of these were configuration which are documented in DefaultSettings php There is no comprehensive documentation for the remaining however some of the most important ones are listed below They are typically initialised either in index php or in Setup php For a description of the see design txt $wgTitle Title object created from the request URL $wgOut OutputPage object for HTTP response $wgUser User object for the user associated with the current request $wgLang Language object selected by user preferences $wgContLang Language object associated with the wiki being viewed $wgParser Parser object Parser extensions register their hooks here $wgRequest WebRequest object
Definition: globals.txt:25
wfRandomString($length=32)
Get a random string containing a number of pseudo-random hex characters.
$mForceTocPosition
Definition: Parser.php:165
preprocess($text, Title $title=null, ParserOptions $options, $revid=null, $frame=false)
Expand templates and variables in the text, producing valid, static wikitext.
Definition: Parser.php:601
getName()
Get the user name, or the IP of an anonymous user.
Definition: User.php:1949
static getCacheTTL($id)
Allow external reads of TTL array.
Definition: MagicWord.php:285
globals txt Globals are evil The original MediaWiki code relied on globals for processing context far too often MediaWiki development since then has been a story of slowly moving context out of global variables and into objects Storing processing context in object member variables allows those objects to be reused in a much more flexible way Consider the elegance of
Definition: globals.txt:10
const OT_PREPROCESS
Definition: Parser.php:113
maybeDoSubpageLink($target, &$text)
Handle link to subpage if necessary.
Definition: Parser.php:2348
$mFunctionSynonyms
Definition: Parser.php:128
If you want to remove the page from your watchlist later
setLinkID($id)
Definition: Parser.php:798
$mOutputType
Definition: Parser.php:178
Apache License January http
$mDefaultStripList
Definition: Parser.php:131
$mExtLinkBracketedRegex
Definition: Parser.php:146
if($line===false) $args
Definition: cdb.php:64
set to $title object and return false for a match for latest to be modified or replaced by the hook handler after cache objects are set use the ContentGetParserOutput hook instead for highlighting & $link
Definition: hooks.txt:2379
static getLocalInstance($ts=false)
Get a timestamp instance in the server local timezone ($wgLocaltimezone)
const COLON_STATE_TAG
Definition: Parser.php:98
static getDoubleUnderscoreArray()
Get a MagicWordArray of double-underscore entities.
Definition: MagicWord.php:298
static splitTrail($trail)
Split a link trail, return the "inside" portion and the remainder of the trail as a two-element array...
Definition: Linker.php:1781
The User object encapsulates all of the user-specific settings (user_id, name, rights, password, email address, options, last login time).
Definition: User.php:39
static decodeCharReferences($text)
Decode any character references, numeric or named entities, in the text and return a UTF-8 string...
Definition: Sanitizer.php:1399
null means default in associative array with keys and values unescaped Should be merged with default with a value of false meaning to suppress the attribute in associative array with keys and values unescaped noclasses just before the function returns a value If you return an< a > element with HTML attributes $attribs and contents $html will be returned If you return $ret will be returned and may include noclasses after processing after in associative array form externallinks including delete and has completed for all link tables default is conds Array Extra conditions for the No matching items in log is displayed if loglist is empty msgKey Array If you want a nice box with a set this to the key of the message First element is the message additional optional elements are parameters for the key that are processed with wfMessage() -> params() ->parseAsBlock()-offset Set to overwrite offset parameter in $wgRequest set to ''to unsetoffset-wrap String Wrap the message in html(usually something like"&lt
deferred txt A few of the database updates required by various functions here can be deferred until after the result page is displayed to the user For updating the view updating the linked to tables after a etc PHP does not yet have any way to tell the server to actually return and disconnect while still running these but it might have such a feature in the future We handle these by creating a deferred update object and putting those objects on a global then executing the whole list after the page is displayed We don t do anything smart like collating updates to the same table or such because the list is almost always going to have just one item on if that
pull multiple revisions may often pull multiple times from the same blob.
Definition: deferred.txt:11
wfTimestamp($outputtype=TS_UNIX, $ts=0)
Get a timestamp string in one of various formats.
static factory($mode=false, IContextSource $context=null)
Get a new image gallery.
Custom PHP profiler for parser/DB type section names that xhprof/xdebug can't handle.
static edits()
Definition: SiteStats.php:130
Class for asserting that a callback happens when an dummy object leaves scope.
wfCgiToArray($query)
This is the logical opposite of wfArrayToCgi(): it accepts a query string as its argument and returns...
static capturePath(Title $title, IContextSource $context)
Just like executePath() but will override global variables and execute the page in "inclusion" mode...
const NO_TEMPLATES
replaceInternalLinks($s)
Process [[ ]] wikilinks.
Definition: Parser.php:1968
$mVarCache
Definition: Parser.php:132
$mRevisionObject
Definition: Parser.php:180
static normalizeSectionNameWhitespace($section)
Normalizes whitespace in a section name, such as might be returned by Parser::stripSectionName(), for use in the id's that are used for section links.
Definition: Sanitizer.php:1284
internalParse($text, $isMain=true, $frame=false)
Helper function for parse() that transforms wiki markup into half-parsed HTML.
Definition: Parser.php:1183
Title $mTitle
Definition: Parser.php:177
__destruct()
Reduce memory usage to reduce the impact of circular references.
Definition: Parser.php:238
wfEscapeWikiText($text)
Escapes the given text so that it may be output using addWikiText() without any linking, formatting, etc.
bool $mInParse
Recursive call protection.
Definition: Parser.php:206
Some quick notes on the file repository architecture Functionality is
Definition: README:3
isExternal()
Is this Title interwiki?
Definition: Title.php:782
namespace and then decline to actually register it file or subcat img or subcat RecentChangesLinked and Watchlist RecentChangesLinked and Watchlist e g Watchlist removed from all revisions and log entries to which it was applied This gives extensions a chance to take it off their books $tag
Definition: hooks.txt:867
static register($parser)
magic word use ParserLimitReportPrepare and ParserLimitReportFormat instead Called at the end of the default is to use $key to get the and $key value or $key value text $key value html to format the value $key
Definition: hooks.txt:2151
$mRevIdForTs
Definition: Parser.php:185
design txt This is a brief overview of the new design More thorough and up to date information is available on the documentation wiki at etc Handles the details of getting and saving to the user table of the and dealing with sessions and cookies OutputPage Encapsulates the entire HTML page that will be sent in response to any server request It is used by calling its functions to add in any and then calling but I prefer the flexibility This should also do the output encoding The system allocates a global one in $wgOut Title Represents the title of an and does all the work of translating among various forms such as plain database key
Definition: design.txt:25
static normalizeSubpageLink($contextTitle, $target, &$text)
Definition: Linker.php:1503
$mStripList
Definition: Parser.php:130
$mFunctionTagHooks
Definition: Parser.php:129
const OT_PLAIN
Definition: Defines.php:235
$mRevisionTimestamp
Definition: Parser.php:182
$mImageParams
Definition: Parser.php:133
getDBkey()
Get the main part with underscores.
Definition: Title.php:881
doAllQuotes($text)
Replace single quotes with HTML markup.
Definition: Parser.php:1512
The index of the header message $result[1]=The index of the body text message $result[2 through n]=Parameters passed to body text message.Please note the header message cannot receive/use parameters. 'ImportHandleLogItemXMLTag':When parsing a XML tag in a log item.$reader:XMLReader object $logInfo:Array of information Return false to stop further processing of the tag 'ImportHandlePageXMLTag':When parsing a XML tag in a page.$reader:XMLReader object $pageInfo:Array of information Return false to stop further processing of the tag 'ImportHandleRevisionXMLTag':When parsing a XML tag in a page revision.$reader:XMLReader object $pageInfo:Array of page information $revisionInfo:Array of revision information Return false to stop further processing of the tag 'ImportHandleToplevelXMLTag':When parsing a top level XML tag.$reader:XMLReader object Return false to stop further processing of the tag 'ImportHandleUploadXMLTag':When parsing a XML tag in a file upload.$reader:XMLReader object $revisionInfo:Array of information Return false to stop further processing of the tag 'InfoAction':When building information to display on the action=info page.$context:IContextSource object &$pageInfo:Array of information 'InitializeArticleMaybeRedirect':MediaWiki check to see if title is a redirect.$title:Title object for the current page $request:WebRequest $ignoreRedirect:boolean to skip redirect check $target:Title/string of redirect target $article:Article object 'InterwikiLoadPrefix':When resolving if a given prefix is an interwiki or not.Return true without providing an interwiki to continue interwiki search.$prefix:interwiki prefix we are looking for.&$iwData:output array describing the interwiki with keys iw_url, iw_local, iw_trans and optionally iw_api and iw_wikiid. 'InternalParseBeforeSanitize':during Parser's internalParse method just before the parser removes unwanted/dangerous HTML tags and after nowiki/noinclude/includeonly/onlyinclude and other processings.Ideal for syntax-extensions after template/parser function execution which respect nowiki and HTML-comments.&$parser:Parser object &$text:string containing partially parsed text &$stripState:Parser's internal StripState object 'InternalParseBeforeLinks':during Parser's internalParse method before links but after nowiki/noinclude/includeonly/onlyinclude and other processings.&$parser:Parser object &$text:string containing partially parsed text &$stripState:Parser's internal StripState object 'InvalidateEmailComplete':Called after a user's email has been invalidated successfully.$user:user(object) whose email is being invalidated 'IRCLineURL':When constructing the URL to use in an IRC notification.Callee may modify $url and $query, URL will be constructed as $url.$query &$url:URL to index.php &$query:Query string $rc:RecentChange object that triggered url generation 'IsFileCacheable':Override the result of Article::isFileCacheable()(if true) $article:article(object) being checked 'IsTrustedProxy':Override the result of wfIsTrustedProxy() $ip:IP being check $result:Change this value to override the result of wfIsTrustedProxy() 'IsUploadAllowedFromUrl':Override the result of UploadFromUrl::isAllowedUrl() $url:URL used to upload from &$allowed:Boolean indicating if uploading is allowed for given URL 'isValidEmailAddr':Override the result of Sanitizer::validateEmail(), for instance to return false if the domain name doesn't match your organization.$addr:The e-mail address entered by the user &$result:Set this and return false to override the internal checks 'isValidPassword':Override the result of User::isValidPassword() $password:The password entered by the user &$result:Set this and return false to override the internal checks $user:User the password is being validated for 'Language::getMessagesFileName':$code:The language code or the language we're looking for a messages file for &$file:The messages file path, you can override this to change the location. 'LanguageGetNamespaces':Provide custom ordering for namespaces or remove namespaces.Do not use this hook to add namespaces.Use CanonicalNamespaces for that.&$namespaces:Array of namespaces indexed by their numbers 'LanguageGetMagic':DEPRECATED, use $magicWords in a file listed in $wgExtensionMessagesFiles instead.Use this to define synonyms of magic words depending of the language $magicExtensions:associative array of magic words synonyms $lang:language code(string) 'LanguageGetSpecialPageAliases':DEPRECATED, use $specialPageAliases in a file listed in $wgExtensionMessagesFiles instead.Use to define aliases of special pages names depending of the language $specialPageAliases:associative array of magic words synonyms $lang:language code(string) 'LanguageGetTranslatedLanguageNames':Provide translated language names.&$names:array of language code=> language name $code language of the preferred translations 'LanguageLinks':Manipulate a page's language links.This is called in various places to allow extensions to define the effective language links for a page.$title:The page's Title.&$links:Associative array mapping language codes to prefixed links of the form"language:title".&$linkFlags:Associative array mapping prefixed links to arrays of flags.Currently unused, but planned to provide support for marking individual language links in the UI, e.g.for featured articles. 'LanguageSelector':Hook to change the language selector available on a page.$out:The output page.$cssClassName:CSS class name of the language selector. 'LinkBegin':Used when generating internal and interwiki links in Linker::link(), before processing starts.Return false to skip default processing and return $ret.See documentation for Linker::link() for details on the expected meanings of parameters.$skin:the Skin object $target:the Title that the link is pointing to &$html:the contents that the< a > tag should have(raw HTML) $result
Definition: hooks.txt:1692
static normalizeUrlComponent($component, $unsafe)
Definition: Parser.php:1886
isAnon()
Get whether the user is anonymous.
Definition: User.php:3098
if($limit) $timestamp
const VERSION
Update this version number when the ParserOutput format changes in an incompatible way...
Definition: Parser.php:73
wfGetDB($db, $groups=array(), $wiki=false)
Get a Database object.
namespace and then decline to actually register it file or subcat img or subcat RecentChangesLinked and Watchlist RecentChangesLinked and Watchlist e g Watchlist removed from all revisions and log entries to which it was applied This gives extensions a chance to take it off their books as the deletion has already been partly carried out by this point or something similar the user will be unable to create the tag set and then return false from the hook function Ensure you consume the ChangeTagAfterDelete hook to carry out custom deletion actions instead of letting the login form give the generic error message that the account does not exist For when the account has been renamed or deleted or an array to pass a message key and parameters but no entry for that model exists in $wgContentHandlers if desired whether it is OK to use $contentModel on $title Handler functions that modify $ok should generally return false to prevent further hooks from further modifying $ok called by AbstractContent::getParserOutput May be used to override the normal model specific rendering of page content as context as context $options
Definition: hooks.txt:949
$mInPre
Definition: Parser.php:156
const OT_WIKI
Definition: Defines.php:232
getPreprocessor()
Get a preprocessor object.
Definition: Parser.php:859
This document is intended to provide useful advice for parties seeking to redistribute MediaWiki to end users It s targeted particularly at maintainers for Linux since it s been observed that distribution packages of MediaWiki often break We ve consistently had to recommend that users seeking support use official tarballs instead of their distribution s and this often solves whatever problem the user is having It would be nice if this could such and we might be restricted by PHP settings such as safe mode or open_basedir We cannot assume that the software even has read access anywhere useful Many shared hosts run all users web applications under the same so they can t rely on Unix and must forbid reads to even standard directories like tmp lest users read each others files We cannot assume that the user has the ability to install or run any programs not written as web accessible PHP scripts Since anything that works on cheap shared hosting will work if you have shell or root access MediaWiki s design is based around catering to the lowest common denominator Although we support higher end setups as the way many things work by default is tailored toward shared hosting These defaults are unconventional from the point of view of normal(non-web) applications--they might conflict with distributors'policies
static getInstance($ts=false)
Get a timestamp instance in GMT.
$limit
const NS_MEDIA
Definition: Defines.php:62
$res
Definition: database.txt:21
static linkKnown($target, $html=null, $customAttribs=array(), $query=array(), $options=array( 'known', 'noclasses'))
Identical to link(), except $options defaults to 'known'.
Definition: Linker.php:262
static singleton()
Get a RepoGroup instance.
Definition: RepoGroup.php:53
static run($event, array $args=array(), $deprecatedVersion=null)
Call hook functions defined in Hooks::register and $wgHooks.
Definition: Hooks.php:137
replaceVariables($text, $frame=false, $argsOnly=false)
Replace magic variables, templates, and template arguments with the appropriate text.
Definition: Parser.php:3280
const RECOVER_ORIG
wfMatchesDomainList($url, $domains)
Check whether a given URL has a domain that occurs in a given set of domains.
MediaWiki exception.
Definition: MWException.php:26
StripState $mStripState
Definition: Parser.php:154
$mDefaultSort
Definition: Parser.php:162
getUser()
Get a User object either from $this->mUser, if set, or from the ParserOptions object otherwise...
Definition: Parser.php:847
wfTimestampNow()
Convenience function; returns MediaWiki timestamp for the present time.
getStripList()
Get a list of strippable XML-like elements.
Definition: Parser.php:954
const EXT_IMAGE_REGEX
Definition: Parser.php:90
$params
const NS_CATEGORY
Definition: Defines.php:88
static makeHeadline($level, $attribs, $anchor, $html, $link, $legacyAnchor=false)
Create a headline for content.
Definition: Linker.php:1751
shown</td >< td > a href
and(b) You must cause any modified files to carry prominent notices stating that You changed the files
doTableStuff($text)
parse the wiki syntax used to render tables
Definition: Parser.php:981
wfDeprecated($function, $version=false, $component=false, $callerOffset=2)
Throws a warning that $function is deprecated.
$mImageParamsMagicArray
Definition: Parser.php:134
LinkHolderArray $mLinkHolders
Definition: Parser.php:158
static register($parser)
deferred txt A few of the database updates required by various functions here can be deferred until after the result page is displayed to the user For updating the view updating the linked to tables after a save
Definition: deferred.txt:4
Some information about database access in MediaWiki By Tim January Database layout For information about the MediaWiki database such as a description of the tables and their please see
Definition: database.txt:2
const DB_SLAVE
Definition: Defines.php:51
if(!function_exists( 'version_compare')||version_compare(PHP_VERSION, '5.3.3')< 0)
Definition: api.php:37
getTargetLanguage()
Get the target language for the content being parsed.
Definition: Parser.php:819
static extractTagsAndParams($elements, $text, &$matches, $uniq_prefix= '')
Replaces all occurrences of HTML-style comments and the given tags in the text with a random marker a...
Definition: Parser.php:887
namespace and then decline to actually register it file or subcat img or subcat $title
Definition: hooks.txt:867
static hasSubpages($index)
Does the namespace allow subpages?
formatHeadings($text, $origText, $isMain=true)
This function accomplishes several tasks: 1) Auto-number headings if that option is enabled 2) Add an...
Definition: Parser.php:4346
getConverterLanguage()
Get the language object for language conversion.
Definition: Parser.php:837
static tocUnindent($level)
Finish one or more sublevels on the Table of Contents.
Definition: Linker.php:1655
if(!$wgRequest->checkUrlExtension()) if(!$wgEnableAPI) $wgTitle
Definition: api.php:62
static tocLine($anchor, $tocline, $tocnumber, $level, $sectionIndex=false)
parameter level defines if we are on an indentation level
Definition: Linker.php:1669
getNamespace()
Get the namespace index, i.e.
Definition: Title.php:904
design txt This is a brief overview of the new design More thorough and up to date information is available on the documentation wiki at etc Handles the details of getting and saving to the user table of the and dealing with sessions and cookies OutputPage Encapsulates the entire HTML page that will be sent in response to any server request It is used by calling its functions to add text
Definition: design.txt:12
getExternalLinkAttribs($url=false)
Get an associative array of additional HTML attributes appropriate for a particular external link...
Definition: Parser.php:1819
__construct($conf=array())
Definition: Parser.php:213
$mInputSize
Definition: Parser.php:186
equals(Title $title)
Compare with another title.
Definition: Title.php:4172
magicword txt Magic Words are some phrases used in the wikitext They are used for two things
Definition: magicword.txt:4
const HALF_PARSED_VERSION
Update this version number when the output of serialiseHalfParsedText() changes in an incompatible wa...
Definition: Parser.php:79
const NS_FILE
Definition: Defines.php:80
firstCallInit()
Do various kinds of initialisation on the first call of the parser.
Definition: Parser.php:273
Handles a simple LRU key/value map with a maximum number of entries.
Definition: MapCacheLRU.php:33
const PTD_FOR_INCLUSION
Definition: Parser.php:107
static escapeId($id, $options=array())
Given a value, escape it so that it can be used in an id attribute and return it. ...
Definition: Sanitizer.php:1104
null means default in associative array with keys and values unescaped Should be merged with default with a value of false meaning to suppress the attribute in associative array with keys and values unescaped broken
Definition: hooks.txt:1694
armorLinks($text)
Insert a NOPARSE hacky thing into any inline links in a chunk that's going to go through further pars...
Definition: Parser.php:2326
presenting them properly to the user as errors is done by the caller return true use this to change the list i e etc $rev
Definition: hooks.txt:1493
static configuration should be added through ResourceLoaderGetConfigVars instead can be used to get the real title after the basic globals have been set up
Definition: hooks.txt:1870
This document is intended to provide useful advice for parties seeking to redistribute MediaWiki to end users It s targeted particularly at maintainers for Linux since it s been observed that distribution packages of MediaWiki often break We ve consistently had to recommend that users seeking support use official tarballs instead of their distribution s and this often solves whatever problem the user is having It would be nice if this could such as
Definition: distributors.txt:9
setOutputType($ot)
Set the output type.
Definition: Parser.php:739
$mTagHooks
Definition: Parser.php:125
Class for handling an array of magic words.
Definition: MagicWord.php:699
const NS_MEDIAWIKI
Definition: Defines.php:82
static & get($id)
Factory: creates an object representing an ID.
Definition: MagicWord.php:248
please add to it if you re going to add events to the MediaWiki code where normally authentication against an external auth plugin would be creating a local account $user
Definition: hooks.txt:237
the value to return A Title object or null whereas SearchGetNearMatch runs after $term
Definition: hooks.txt:2361
if($wgServerName!==false) $wgServerName
Definition: Setup.php:496
#define the
$n
Definition: RandomTest.php:77
maybeMakeExternalImage($url)
make an image if it's allowed, either through the global option, through the exception, or through the on-wiki whitelist
Definition: Parser.php:1909
getOption($oname, $defaultOverride=null, $ignoreHidden=false)
Get the user's current setting for a given option.
Definition: User.php:2515
areSubpagesAllowed()
Return true if subpage links should be expanded on this page.
Definition: Parser.php:2335
const MARKER_SUFFIX
Definition: Parser.php:118
wfDebugLog($logGroup, $text, $dest= 'all', array $context=array())
Send a line to a supplementary debug log file, if configured, or main debug log if not...
const OT_HTML
Definition: Defines.php:231
Prior to maintenance scripts were a hodgepodge of code that had no cohesion or formal method of action Beginning in
Definition: maintenance.txt:1
static makeImageLink(Parser $parser, Title $title, $file, $frameParams=array(), $handlerParams=array(), $time=false, $query="", $widthOption=null)
Given parameters derived from [[Image:Foo|options...]], generate the HTML that that syntax inserts in...
Definition: Linker.php:537
namespace and then decline to actually register it file or subcat img or subcat RecentChangesLinked and Watchlist RecentChangesLinked and Watchlist e g Watchlist removed from all revisions and log entries to which it was applied This gives extensions a chance to take it off their books as the deletion has already been partly carried out by this point or something similar the user will be unable to create the tag set and then return false from the hook function Ensure you consume the ChangeTagAfterDelete hook to carry out custom deletion actions instead of letting the login form give the generic error message that the account does not exist For when the account has been renamed or deleted or an array to pass a message key and parameters but no entry for that model exists in $wgContentHandlers if desired whether it is OK to use $contentModel on $title Handler functions that modify $ok should generally return false to prevent further hooks from further modifying $ok called by AbstractContent::getParserOutput May be used to override the normal model specific rendering of page content as context as context the output can only depend on parameters provided to this hook not on global state indicating whether full HTML should be generated If generation of HTML may be but other information should still be present in the ParserOutput object & $output
Definition: hooks.txt:949
static getSubstIDs()
Get an array of parser substitution modifier IDs.
Definition: MagicWord.php:275
static images()
Definition: SiteStats.php:170
isSpecialPage()
Returns true if this is a special page.
Definition: Title.php:1022
$mTransparentTagHooks
Definition: Parser.php:126
$mExpensiveFunctionCount
Definition: Parser.php:164
$mUrlProtocols
Definition: Parser.php:146
const TS_MW
MediaWiki concatenated string timestamp (YYYYMMDDHHMMSS)
static getVersion($flags= '')
Return a string of the MediaWiki version with SVN revision if available.
static newFromTitle($title, $id=0, $flags=0)
Load either the current, or a specified, revision that's attached to a given title.
Definition: Revision.php:104
$mConf
Definition: Parser.php:146
static newFromId($id, $flags=0)
Load a page revision from a given revision ID number.
Definition: Revision.php:86
wfUrlProtocols($includeProtocolRelative=true)
Returns a regular expression of url protocols.
__clone()
Allow extensions to clean up when the parser is cloned.
Definition: Parser.php:250
static getExternalLinkRel($url=false, $title=null)
Get the rel attribute for a particular external link.
Definition: Parser.php:1798
static & singleton()
Get an instance of this class.
Definition: LinkCache.php:49
string $mUniqPrefix
Definition: Parser.php:189
wfSetVar(&$dest, $source, $force=false)
Sets dest to source and returns the original value of dest If source is NULL, it just returns the val...
array $mLangLinkLanguages
Array with the language name of each language link (i.e.
Definition: Parser.php:195
const OT_MSG
Definition: Parser.php:114
This document describes the state of Postgres support in and is fairly well maintained The main code is very well while extensions are very hit and miss it is probably the most supported database after MySQL Much of the work in making MediaWiki database agnostic came about through the work of creating Postgres as and are nearing end of but without copying over all the usage comments General notes on the but these can almost always be programmed around *Although Postgres has a true BOOLEAN type
Definition: postgres.txt:22
doDoubleUnderscore($text)
Strip double-underscore items like NOGALLERY and NOTOC Fills $this->mDoubleUnderscores, returns the modified text.
Definition: Parser.php:4270
$mFunctionHooks
Definition: Parser.php:127
$lines
Definition: router.php:66
follow the installation instructions in the PHPUnit Manual at
Definition: README:1
if(!defined( 'MEDIAWIKI')) $fname
This file is not a valid entry point, perform no further processing unless MEDIAWIKI is defined...
Definition: Setup.php:35
deferred txt A few of the database updates required by various functions here can be deferred until after the result page is displayed to the user For updating the view updating the linked to tables after a etc PHP does not yet have any way to tell the server to actually return and disconnect while still running these but it might have such a feature in the future We handle these by creating a deferred update object and putting those objects on a global then executing the whole list after the page is displayed We don t do anything smart like collating updates to the same table or such because the list is almost always going to have just one item on if so it s not worth the trouble Since there is a job queue in the jobs table
Definition: deferred.txt:11
MagicWordArray $mSubstWords
Definition: Parser.php:144
const COLON_STATE_TEXT
Definition: Parser.php:97
const TOC_END
Definition: Parser.php:122
static normalizeCharReferences($text)
Ensure that any entities and character references are legal for XML and XHTML specifically.
Definition: Sanitizer.php:1303
the array() calling protocol came about after MediaWiki 1.4rc1.
List of Api Query prop modules.
if(PHP_SAPI!= 'cli') $file
static element($element, $attribs=array(), $contents= '')
Identical to rawElement(), but HTML-escapes $contents (like Xml::element()).
Definition: Html.php:243
$mArgStack
Definition: Parser.php:156
isTrans()
Determine whether the object refers to a page within this project and is transcludable.
Definition: Title.php:812
Variant of the Message class.
Definition: Message.php:1115
design txt This is a brief overview of the new design More thorough and up to date information is available on the documentation wiki at etc Handles the details of getting and saving to the user table of the and dealing with sessions and cookies OutputPage Encapsulates the entire HTML page that will be sent in response to any server request It is used by calling its functions to add in any and then calling but I prefer the flexibility This should also do the output encoding The system allocates a global one in $wgOut Title Represents the title of an and does all the work of translating among various forms such as plain database etc For and for historical it also represents a few features of articles that don t involve their such as access rights See also title txt Article Encapsulates access to the page table of the database The object represents a an and maintains state such as etc Revision Encapsulates individual page revision data and access to the revision text blobs storage system Higher level code should never touch text storage directly
Definition: design.txt:34
WebRequest clone which takes values from a provided array.
static articles()
Definition: SiteStats.php:138
const COLON_STATE_TAGSTART
Definition: Parser.php:99
$mRevisionUser
Definition: Parser.php:183
fetchFileAndTitle($title, $options=array())
Fetch a file and its title and register a reference to it.
Definition: Parser.php:4003
do that in ParserLimitReportFormat instead use this to modify the parameters of the image and a DIV can begin in one section and end in another Make sure your code can handle that case gracefully See the EditSectionClearerLink extension for an example zero but section is usually empty its values are the globals values my talk my contributions etc etc otherwise the built in rate limiting checks are if enabled allows for interception of redirect as a string mapping parameter names to values & $type
Definition: hooks.txt:2176
static pages()
Definition: SiteStats.php:146
$line
Definition: cdb.php:59
const COLON_STATE_COMMENT
Definition: Parser.php:102
const SFH_OBJECT_ARGS
Definition: Parser.php:83