MediaWiki  1.23.8
Parser.php
Go to the documentation of this file.
1 <?php
67 class Parser {
73  const VERSION = '1.6.4';
74 
79  const HALF_PARSED_VERSION = 2;
80 
81  # Flags for Parser::setFunctionHook
82  # Also available as global constants from Defines.php
83  const SFH_NO_HASH = 1;
84  const SFH_OBJECT_ARGS = 2;
85 
86  # Constants needed for external link processing
87  # Everything except bracket, space, or control characters
88  # \p{Zs} is unicode 'separator, space' category. It covers the space 0x20
89  # as well as U+3000 is IDEOGRAPHIC SPACE for bug 19052
90  const EXT_LINK_URL_CLASS = '[^][<>"\\x00-\\x20\\x7F\p{Zs}]';
91  const EXT_IMAGE_REGEX = '/^(http:\/\/|https:\/\/)([^][<>"\\x00-\\x20\\x7F\p{Zs}]+)
92  \\/([A-Za-z0-9_.,~%\\-+&;#*?!=()@\\x80-\\xFF]+)\\.((?i)gif|png|jpg|jpeg)$/Sxu';
93 
94  # State constants for the definition list colon extraction
95  const COLON_STATE_TEXT = 0;
96  const COLON_STATE_TAG = 1;
97  const COLON_STATE_TAGSTART = 2;
98  const COLON_STATE_CLOSETAG = 3;
99  const COLON_STATE_TAGSLASH = 4;
100  const COLON_STATE_COMMENT = 5;
101  const COLON_STATE_COMMENTDASH = 6;
102  const COLON_STATE_COMMENTDASHDASH = 7;
103 
104  # Flags for preprocessToDom
105  const PTD_FOR_INCLUSION = 1;
106 
107  # Allowed values for $this->mOutputType
108  # Parameter to startExternalParse().
109  const OT_HTML = 1; # like parse()
110  const OT_WIKI = 2; # like preSaveTransform()
111  const OT_PREPROCESS = 3; # like preprocess()
112  const OT_MSG = 3;
113  const OT_PLAIN = 4; # like extractSections() - portions of the original are returned unchanged.
114 
115  # Marker Suffix needs to be accessible staticly.
116  const MARKER_SUFFIX = "-QINU\x7f";
117 
118  # Markers used for wrapping the table of contents
119  const TOC_START = '<mw:toc>';
120  const TOC_END = '</mw:toc>';
121 
122  # Persistent:
123  var $mTagHooks = array();
124  var $mTransparentTagHooks = array();
125  var $mFunctionHooks = array();
126  var $mFunctionSynonyms = array( 0 => array(), 1 => array() );
127  var $mFunctionTagHooks = array();
128  var $mStripList = array();
129  var $mDefaultStripList = array();
130  var $mVarCache = array();
131  var $mImageParams = array();
132  var $mImageParamsMagicArray = array();
133  var $mMarkerIndex = 0;
134  var $mFirstCall = true;
135 
136  # Initialised by initialiseVariables()
137 
141  var $mVariables;
142 
146  var $mSubstWords;
147  var $mConf, $mPreprocessor, $mExtLinkBracketedRegex, $mUrlProtocols; # Initialised in constructor
148 
149  # Cleared with clearState():
150 
153  var $mOutput;
154  var $mAutonumber, $mDTopen;
155 
159  var $mStripState;
160 
161  var $mIncludeCount, $mArgStack, $mLastSection, $mInPre;
165  var $mLinkHolders;
166 
167  var $mLinkID;
168  var $mIncludeSizes, $mPPNodeCount, $mGeneratedPPNodeCount, $mHighestExpansionDepth;
169  var $mDefaultSort;
170  var $mTplExpandCache; # empty-frame expansion cache
171  var $mTplRedirCache, $mTplDomCache, $mHeadings, $mDoubleUnderscores;
172  var $mExpensiveFunctionCount; # number of expensive parser function calls
173  var $mShowToc, $mForceTocPosition;
174 
178  var $mUser; # User object; only used when doing pre-save transform
179 
180  # Temporary
181  # These are variables reset at least once per parse regardless of $clearState
182 
186  var $mOptions;
187 
191  var $mTitle; # Title context, used for self-link rendering and similar things
192  var $mOutputType; # Output type, one of the OT_xxx constants
193  var $ot; # Shortcut alias, see setOutputType()
194  var $mRevisionObject; # The revision object of the specified revision ID
195  var $mRevisionId; # ID to display in {{REVISIONID}} tags
196  var $mRevisionTimestamp; # The timestamp of the specified revision ID
197  var $mRevisionUser; # User to display in {{REVISIONUSER}} tag
198  var $mRevisionSize; # Size to display in {{REVISIONSIZE}} variable
199  var $mRevIdForTs; # The revision ID which was used to fetch the timestamp
200  var $mInputSize = false; # For {{PAGESIZE}} on current page.
201 
205  var $mUniqPrefix;
206 
212  var $mLangLinkLanguages;
213 
219  public function __construct( $conf = array() ) {
220  $this->mConf = $conf;
221  $this->mUrlProtocols = wfUrlProtocols();
222  $this->mExtLinkBracketedRegex = '/\[(((?i)' . $this->mUrlProtocols . ')' .
223  self::EXT_LINK_URL_CLASS . '+)\p{Zs}*([^\]\\x00-\\x08\\x0a-\\x1F]*?)\]/Su';
224  if ( isset( $conf['preprocessorClass'] ) ) {
225  $this->mPreprocessorClass = $conf['preprocessorClass'];
226  } elseif ( defined( 'HPHP_VERSION' ) ) {
227  # Preprocessor_Hash is much faster than Preprocessor_DOM under HipHop
228  $this->mPreprocessorClass = 'Preprocessor_Hash';
229  } elseif ( extension_loaded( 'domxml' ) ) {
230  # PECL extension that conflicts with the core DOM extension (bug 13770)
231  wfDebug( "Warning: you have the obsolete domxml extension for PHP. Please remove it!\n" );
232  $this->mPreprocessorClass = 'Preprocessor_Hash';
233  } elseif ( extension_loaded( 'dom' ) ) {
234  $this->mPreprocessorClass = 'Preprocessor_DOM';
235  } else {
236  $this->mPreprocessorClass = 'Preprocessor_Hash';
237  }
238  wfDebug( __CLASS__ . ": using preprocessor: {$this->mPreprocessorClass}\n" );
239  }
240 
244  function __destruct() {
245  if ( isset( $this->mLinkHolders ) ) {
246  unset( $this->mLinkHolders );
247  }
248  foreach ( $this as $name => $value ) {
249  unset( $this->$name );
250  }
251  }
252 
256  function __clone() {
257  wfRunHooks( 'ParserCloned', array( $this ) );
258  }
259 
263  function firstCallInit() {
264  if ( !$this->mFirstCall ) {
265  return;
266  }
267  $this->mFirstCall = false;
268 
269  wfProfileIn( __METHOD__ );
270 
272  CoreTagHooks::register( $this );
273  $this->initialiseVariables();
274 
275  wfRunHooks( 'ParserFirstCallInit', array( &$this ) );
276  wfProfileOut( __METHOD__ );
277  }
278 
284  function clearState() {
285  wfProfileIn( __METHOD__ );
286  if ( $this->mFirstCall ) {
287  $this->firstCallInit();
288  }
289  $this->mOutput = new ParserOutput;
290  $this->mOptions->registerWatcher( array( $this->mOutput, 'recordOption' ) );
291  $this->mAutonumber = 0;
292  $this->mLastSection = '';
293  $this->mDTopen = false;
294  $this->mIncludeCount = array();
295  $this->mArgStack = false;
296  $this->mInPre = false;
297  $this->mLinkHolders = new LinkHolderArray( $this );
298  $this->mLinkID = 0;
299  $this->mRevisionObject = $this->mRevisionTimestamp =
300  $this->mRevisionId = $this->mRevisionUser = $this->mRevisionSize = null;
301  $this->mVarCache = array();
302  $this->mUser = null;
303  $this->mLangLinkLanguages = array();
304 
315  $this->mUniqPrefix = "\x7fUNIQ" . self::getRandomString();
316  $this->mStripState = new StripState( $this->mUniqPrefix );
317 
318  # Clear these on every parse, bug 4549
319  $this->mTplExpandCache = $this->mTplRedirCache = $this->mTplDomCache = array();
320 
321  $this->mShowToc = true;
322  $this->mForceTocPosition = false;
323  $this->mIncludeSizes = array(
324  'post-expand' => 0,
325  'arg' => 0,
326  );
327  $this->mPPNodeCount = 0;
328  $this->mGeneratedPPNodeCount = 0;
329  $this->mHighestExpansionDepth = 0;
330  $this->mDefaultSort = false;
331  $this->mHeadings = array();
332  $this->mDoubleUnderscores = array();
333  $this->mExpensiveFunctionCount = 0;
334 
335  # Fix cloning
336  if ( isset( $this->mPreprocessor ) && $this->mPreprocessor->parser !== $this ) {
337  $this->mPreprocessor = null;
338  }
339 
340  wfRunHooks( 'ParserClearState', array( &$this ) );
341  wfProfileOut( __METHOD__ );
342  }
343 
356  public function parse( $text, Title $title, ParserOptions $options, $linestart = true, $clearState = true, $revid = null ) {
362  global $wgUseTidy, $wgAlwaysUseTidy, $wgShowHostnames;
363  $fname = __METHOD__ . '-' . wfGetCaller();
364  wfProfileIn( __METHOD__ );
365  wfProfileIn( $fname );
366 
367  $this->startParse( $title, $options, self::OT_HTML, $clearState );
368 
369  $this->mInputSize = strlen( $text );
370  if ( $this->mOptions->getEnableLimitReport() ) {
371  $this->mOutput->resetParseStartTime();
372  }
373 
374  # Remove the strip marker tag prefix from the input, if present.
375  if ( $clearState ) {
376  $text = str_replace( $this->mUniqPrefix, '', $text );
377  }
378 
379  $oldRevisionId = $this->mRevisionId;
380  $oldRevisionObject = $this->mRevisionObject;
381  $oldRevisionTimestamp = $this->mRevisionTimestamp;
382  $oldRevisionUser = $this->mRevisionUser;
383  $oldRevisionSize = $this->mRevisionSize;
384  if ( $revid !== null ) {
385  $this->mRevisionId = $revid;
386  $this->mRevisionObject = null;
387  $this->mRevisionTimestamp = null;
388  $this->mRevisionUser = null;
389  $this->mRevisionSize = null;
390  }
391 
392  wfRunHooks( 'ParserBeforeStrip', array( &$this, &$text, &$this->mStripState ) );
393  # No more strip!
394  wfRunHooks( 'ParserAfterStrip', array( &$this, &$text, &$this->mStripState ) );
395  $text = $this->internalParse( $text );
396  wfRunHooks( 'ParserAfterParse', array( &$this, &$text, &$this->mStripState ) );
397 
398  $text = $this->mStripState->unstripGeneral( $text );
399 
400  # Clean up special characters, only run once, next-to-last before doBlockLevels
401  $fixtags = array(
402  # french spaces, last one Guillemet-left
403  # only if there is something before the space
404  '/(.) (?=\\?|:|;|!|%|\\302\\273)/' => '\\1&#160;',
405  # french spaces, Guillemet-right
406  '/(\\302\\253) /' => '\\1&#160;',
407  '/&#160;(!\s*important)/' => ' \\1', # Beware of CSS magic word !important, bug #11874.
408  );
409  $text = preg_replace( array_keys( $fixtags ), array_values( $fixtags ), $text );
410 
411  $text = $this->doBlockLevels( $text, $linestart );
412 
413  $this->replaceLinkHolders( $text );
414 
422  if ( !( $options->getDisableContentConversion()
423  || isset( $this->mDoubleUnderscores['nocontentconvert'] ) )
424  ) {
425  if ( !$this->mOptions->getInterfaceMessage() ) {
426  # The position of the convert() call should not be changed. it
427  # assumes that the links are all replaced and the only thing left
428  # is the <nowiki> mark.
429  $text = $this->getConverterLanguage()->convert( $text );
430  }
431  }
432 
440  if ( !( $options->getDisableTitleConversion()
441  || isset( $this->mDoubleUnderscores['nocontentconvert'] )
442  || isset( $this->mDoubleUnderscores['notitleconvert'] )
443  || $this->mOutput->getDisplayTitle() !== false )
444  ) {
445  $convruletitle = $this->getConverterLanguage()->getConvRuleTitle();
446  if ( $convruletitle ) {
447  $this->mOutput->setTitleText( $convruletitle );
448  } else {
449  $titleText = $this->getConverterLanguage()->convertTitle( $title );
450  $this->mOutput->setTitleText( $titleText );
451  }
452  }
453 
454  $text = $this->mStripState->unstripNoWiki( $text );
455 
456  wfRunHooks( 'ParserBeforeTidy', array( &$this, &$text ) );
457 
458  $text = $this->replaceTransparentTags( $text );
459  $text = $this->mStripState->unstripGeneral( $text );
460 
461  $text = Sanitizer::normalizeCharReferences( $text );
462 
463  if ( ( $wgUseTidy && $this->mOptions->getTidy() ) || $wgAlwaysUseTidy ) {
464  $text = MWTidy::tidy( $text );
465  } else {
466  # attempt to sanitize at least some nesting problems
467  # (bug #2702 and quite a few others)
468  $tidyregs = array(
469  # ''Something [http://www.cool.com cool''] -->
470  # <i>Something</i><a href="http://www.cool.com"..><i>cool></i></a>
471  '/(<([bi])>)(<([bi])>)?([^<]*)(<\/?a[^<]*>)([^<]*)(<\/\\4>)?(<\/\\2>)/' =>
472  '\\1\\3\\5\\8\\9\\6\\1\\3\\7\\8\\9',
473  # fix up an anchor inside another anchor, only
474  # at least for a single single nested link (bug 3695)
475  '/(<a[^>]+>)([^<]*)(<a[^>]+>[^<]*)<\/a>(.*)<\/a>/' =>
476  '\\1\\2</a>\\3</a>\\1\\4</a>',
477  # fix div inside inline elements- doBlockLevels won't wrap a line which
478  # contains a div, so fix it up here; replace
479  # div with escaped text
480  '/(<([aib]) [^>]+>)([^<]*)(<div([^>]*)>)(.*)(<\/div>)([^<]*)(<\/\\2>)/' =>
481  '\\1\\3&lt;div\\5&gt;\\6&lt;/div&gt;\\8\\9',
482  # remove empty italic or bold tag pairs, some
483  # introduced by rules above
484  '/<([bi])><\/\\1>/' => '',
485  );
486 
487  $text = preg_replace(
488  array_keys( $tidyregs ),
489  array_values( $tidyregs ),
490  $text );
491  }
492 
493  if ( $this->mExpensiveFunctionCount > $this->mOptions->getExpensiveParserFunctionLimit() ) {
494  $this->limitationWarn( 'expensive-parserfunction',
495  $this->mExpensiveFunctionCount,
496  $this->mOptions->getExpensiveParserFunctionLimit()
497  );
498  }
499 
500  wfRunHooks( 'ParserAfterTidy', array( &$this, &$text ) );
501 
502  # Information on include size limits, for the benefit of users who try to skirt them
503  if ( $this->mOptions->getEnableLimitReport() ) {
504  $max = $this->mOptions->getMaxIncludeSize();
505 
506  $cpuTime = $this->mOutput->getTimeSinceStart( 'cpu' );
507  if ( $cpuTime !== null ) {
508  $this->mOutput->setLimitReportData( 'limitreport-cputime',
509  sprintf( "%.3f", $cpuTime )
510  );
511  }
512 
513  $wallTime = $this->mOutput->getTimeSinceStart( 'wall' );
514  $this->mOutput->setLimitReportData( 'limitreport-walltime',
515  sprintf( "%.3f", $wallTime )
516  );
517 
518  $this->mOutput->setLimitReportData( 'limitreport-ppvisitednodes',
519  array( $this->mPPNodeCount, $this->mOptions->getMaxPPNodeCount() )
520  );
521  $this->mOutput->setLimitReportData( 'limitreport-ppgeneratednodes',
522  array( $this->mGeneratedPPNodeCount, $this->mOptions->getMaxGeneratedPPNodeCount() )
523  );
524  $this->mOutput->setLimitReportData( 'limitreport-postexpandincludesize',
525  array( $this->mIncludeSizes['post-expand'], $max )
526  );
527  $this->mOutput->setLimitReportData( 'limitreport-templateargumentsize',
528  array( $this->mIncludeSizes['arg'], $max )
529  );
530  $this->mOutput->setLimitReportData( 'limitreport-expansiondepth',
531  array( $this->mHighestExpansionDepth, $this->mOptions->getMaxPPExpandDepth() )
532  );
533  $this->mOutput->setLimitReportData( 'limitreport-expensivefunctioncount',
534  array( $this->mExpensiveFunctionCount, $this->mOptions->getExpensiveParserFunctionLimit() )
535  );
536  wfRunHooks( 'ParserLimitReportPrepare', array( $this, $this->mOutput ) );
537 
538  $limitReport = "NewPP limit report\n";
539  if ( $wgShowHostnames ) {
540  $limitReport .= 'Parsed by ' . wfHostname() . "\n";
541  }
542  foreach ( $this->mOutput->getLimitReportData() as $key => $value ) {
543  if ( wfRunHooks( 'ParserLimitReportFormat',
544  array( $key, &$value, &$limitReport, false, false )
545  ) ) {
546  $keyMsg = wfMessage( $key )->inLanguage( 'en' )->useDatabase( false );
547  $valueMsg = wfMessage( array( "$key-value-text", "$key-value" ) )
548  ->inLanguage( 'en' )->useDatabase( false );
549  if ( !$valueMsg->exists() ) {
550  $valueMsg = new RawMessage( '$1' );
551  }
552  if ( !$keyMsg->isDisabled() && !$valueMsg->isDisabled() ) {
553  $valueMsg->params( $value );
554  $limitReport .= "{$keyMsg->text()}: {$valueMsg->text()}\n";
555  }
556  }
557  }
558  // Since we're not really outputting HTML, decode the entities and
559  // then re-encode the things that need hiding inside HTML comments.
560  $limitReport = htmlspecialchars_decode( $limitReport );
561  wfRunHooks( 'ParserLimitReport', array( $this, &$limitReport ) );
562 
563  // Sanitize for comment. Note '‐' in the replacement is U+2010,
564  // which looks much like the problematic '-'.
565  $limitReport = str_replace( array( '-', '&' ), array( '‐', '&amp;' ), $limitReport );
566  $text .= "\n<!-- \n$limitReport-->\n";
567 
568  if ( $this->mGeneratedPPNodeCount > $this->mOptions->getMaxGeneratedPPNodeCount() / 10 ) {
569  wfDebugLog( 'generated-pp-node-count', $this->mGeneratedPPNodeCount . ' ' .
570  $this->mTitle->getPrefixedDBkey() );
571  }
572  }
573  $this->mOutput->setText( $text );
574 
575  $this->mRevisionId = $oldRevisionId;
576  $this->mRevisionObject = $oldRevisionObject;
577  $this->mRevisionTimestamp = $oldRevisionTimestamp;
578  $this->mRevisionUser = $oldRevisionUser;
579  $this->mRevisionSize = $oldRevisionSize;
580  $this->mInputSize = false;
581  wfProfileOut( $fname );
582  wfProfileOut( __METHOD__ );
583 
584  return $this->mOutput;
585  }
586 
598  function recursiveTagParse( $text, $frame = false ) {
599  wfProfileIn( __METHOD__ );
600  wfRunHooks( 'ParserBeforeStrip', array( &$this, &$text, &$this->mStripState ) );
601  wfRunHooks( 'ParserAfterStrip', array( &$this, &$text, &$this->mStripState ) );
602  $text = $this->internalParse( $text, false, $frame );
603  wfProfileOut( __METHOD__ );
604  return $text;
605  }
606 
612  function preprocess( $text, Title $title = null, ParserOptions $options, $revid = null ) {
613  wfProfileIn( __METHOD__ );
614  $this->startParse( $title, $options, self::OT_PREPROCESS, true );
615  if ( $revid !== null ) {
616  $this->mRevisionId = $revid;
617  }
618  wfRunHooks( 'ParserBeforeStrip', array( &$this, &$text, &$this->mStripState ) );
619  wfRunHooks( 'ParserAfterStrip', array( &$this, &$text, &$this->mStripState ) );
620  $text = $this->replaceVariables( $text );
621  $text = $this->mStripState->unstripBoth( $text );
622  wfProfileOut( __METHOD__ );
623  return $text;
624  }
625 
635  public function recursivePreprocess( $text, $frame = false ) {
636  wfProfileIn( __METHOD__ );
637  $text = $this->replaceVariables( $text, $frame );
638  $text = $this->mStripState->unstripBoth( $text );
639  wfProfileOut( __METHOD__ );
640  return $text;
641  }
642 
656  public function getPreloadText( $text, Title $title, ParserOptions $options, $params = array() ) {
657  $msg = new RawMessage( $text );
658  $text = $msg->params( $params )->plain();
659 
660  # Parser (re)initialisation
661  $this->startParse( $title, $options, self::OT_PLAIN, true );
662 
664  $dom = $this->preprocessToDom( $text, self::PTD_FOR_INCLUSION );
665  $text = $this->getPreprocessor()->newFrame()->expand( $dom, $flags );
666  $text = $this->mStripState->unstripBoth( $text );
667  return $text;
668  }
669 
675  public static function getRandomString() {
676  return wfRandomString( 16 );
677  }
678 
685  function setUser( $user ) {
686  $this->mUser = $user;
687  }
688 
694  public function uniqPrefix() {
695  if ( !isset( $this->mUniqPrefix ) ) {
696  # @todo FIXME: This is probably *horribly wrong*
697  # LanguageConverter seems to want $wgParser's uniqPrefix, however
698  # if this is called for a parser cache hit, the parser may not
699  # have ever been initialized in the first place.
700  # Not really sure what the heck is supposed to be going on here.
701  return '';
702  # throw new MWException( "Accessing uninitialized mUniqPrefix" );
703  }
704  return $this->mUniqPrefix;
705  }
706 
712  function setTitle( $t ) {
713  if ( !$t || $t instanceof FakeTitle ) {
714  $t = Title::newFromText( 'NO TITLE' );
715  }
716 
717  if ( $t->hasFragment() ) {
718  # Strip the fragment to avoid various odd effects
719  $this->mTitle = clone $t;
720  $this->mTitle->setFragment( '' );
721  } else {
722  $this->mTitle = $t;
723  }
724  }
725 
731  function getTitle() {
732  return $this->mTitle;
733  }
734 
741  function Title( $x = null ) {
742  return wfSetVar( $this->mTitle, $x );
743  }
744 
750  function setOutputType( $ot ) {
751  $this->mOutputType = $ot;
752  # Shortcut alias
753  $this->ot = array(
754  'html' => $ot == self::OT_HTML,
755  'wiki' => $ot == self::OT_WIKI,
756  'pre' => $ot == self::OT_PREPROCESS,
757  'plain' => $ot == self::OT_PLAIN,
758  );
759  }
760 
767  function OutputType( $x = null ) {
768  return wfSetVar( $this->mOutputType, $x );
769  }
770 
776  function getOutput() {
777  return $this->mOutput;
778  }
779 
785  function getOptions() {
786  return $this->mOptions;
787  }
788 
795  function Options( $x = null ) {
796  return wfSetVar( $this->mOptions, $x );
797  }
798 
802  function nextLinkID() {
803  return $this->mLinkID++;
804  }
805 
809  function setLinkID( $id ) {
810  $this->mLinkID = $id;
811  }
812 
817  function getFunctionLang() {
818  return $this->getTargetLanguage();
819  }
820 
830  public function getTargetLanguage() {
831  $target = $this->mOptions->getTargetLanguage();
832 
833  if ( $target !== null ) {
834  return $target;
835  } elseif ( $this->mOptions->getInterfaceMessage() ) {
836  return $this->mOptions->getUserLangObj();
837  } elseif ( is_null( $this->mTitle ) ) {
838  throw new MWException( __METHOD__ . ': $this->mTitle is null' );
839  }
840 
841  return $this->mTitle->getPageLanguage();
842  }
843 
847  function getConverterLanguage() {
848  return $this->getTargetLanguage();
849  }
850 
857  function getUser() {
858  if ( !is_null( $this->mUser ) ) {
859  return $this->mUser;
860  }
861  return $this->mOptions->getUser();
862  }
863 
869  function getPreprocessor() {
870  if ( !isset( $this->mPreprocessor ) ) {
871  $class = $this->mPreprocessorClass;
872  $this->mPreprocessor = new $class( $this );
873  }
874  return $this->mPreprocessor;
875  }
876 
897  public static function extractTagsAndParams( $elements, $text, &$matches, $uniq_prefix = '' ) {
898  static $n = 1;
899  $stripped = '';
900  $matches = array();
901 
902  $taglist = implode( '|', $elements );
903  $start = "/<($taglist)(\\s+[^>]*?|\\s*?)(\/?" . ">)|<(!--)/i";
904 
905  while ( $text != '' ) {
906  $p = preg_split( $start, $text, 2, PREG_SPLIT_DELIM_CAPTURE );
907  $stripped .= $p[0];
908  if ( count( $p ) < 5 ) {
909  break;
910  }
911  if ( count( $p ) > 5 ) {
912  # comment
913  $element = $p[4];
914  $attributes = '';
915  $close = '';
916  $inside = $p[5];
917  } else {
918  # tag
919  $element = $p[1];
920  $attributes = $p[2];
921  $close = $p[3];
922  $inside = $p[4];
923  }
924 
925  $marker = "$uniq_prefix-$element-" . sprintf( '%08X', $n++ ) . self::MARKER_SUFFIX;
926  $stripped .= $marker;
927 
928  if ( $close === '/>' ) {
929  # Empty element tag, <tag />
930  $content = null;
931  $text = $inside;
932  $tail = null;
933  } else {
934  if ( $element === '!--' ) {
935  $end = '/(-->)/';
936  } else {
937  $end = "/(<\\/$element\\s*>)/i";
938  }
939  $q = preg_split( $end, $inside, 2, PREG_SPLIT_DELIM_CAPTURE );
940  $content = $q[0];
941  if ( count( $q ) < 3 ) {
942  # No end tag -- let it run out to the end of the text.
943  $tail = '';
944  $text = '';
945  } else {
946  $tail = $q[1];
947  $text = $q[2];
948  }
949  }
950 
951  $matches[$marker] = array( $element,
952  $content,
953  Sanitizer::decodeTagAttributes( $attributes ),
954  "<$element$attributes$close$content$tail" );
955  }
956  return $stripped;
957  }
958 
964  function getStripList() {
965  return $this->mStripList;
966  }
967 
977  function insertStripItem( $text ) {
978  $rnd = "{$this->mUniqPrefix}-item-{$this->mMarkerIndex}-" . self::MARKER_SUFFIX;
979  $this->mMarkerIndex++;
980  $this->mStripState->addGeneral( $rnd, $text );
981  return $rnd;
982  }
983 
990  function doTableStuff( $text ) {
991  wfProfileIn( __METHOD__ );
992 
993  $lines = StringUtils::explode( "\n", $text );
994  $out = '';
995  $td_history = array(); # Is currently a td tag open?
996  $last_tag_history = array(); # Save history of last lag activated (td, th or caption)
997  $tr_history = array(); # Is currently a tr tag open?
998  $tr_attributes = array(); # history of tr attributes
999  $has_opened_tr = array(); # Did this table open a <tr> element?
1000  $indent_level = 0; # indent level of the table
1001 
1002  foreach ( $lines as $outLine ) {
1003  $line = trim( $outLine );
1004 
1005  if ( $line === '' ) { # empty line, go to next line
1006  $out .= $outLine . "\n";
1007  continue;
1008  }
1009 
1010  $first_character = $line[0];
1011  $matches = array();
1012 
1013  if ( preg_match( '/^(:*)\{\|(.*)$/', $line, $matches ) ) {
1014  # First check if we are starting a new table
1015  $indent_level = strlen( $matches[1] );
1016 
1017  $attributes = $this->mStripState->unstripBoth( $matches[2] );
1018  $attributes = Sanitizer::fixTagAttributes( $attributes, 'table' );
1019 
1020  $outLine = str_repeat( '<dl><dd>', $indent_level ) . "<table{$attributes}>";
1021  array_push( $td_history, false );
1022  array_push( $last_tag_history, '' );
1023  array_push( $tr_history, false );
1024  array_push( $tr_attributes, '' );
1025  array_push( $has_opened_tr, false );
1026  } elseif ( count( $td_history ) == 0 ) {
1027  # Don't do any of the following
1028  $out .= $outLine . "\n";
1029  continue;
1030  } elseif ( substr( $line, 0, 2 ) === '|}' ) {
1031  # We are ending a table
1032  $line = '</table>' . substr( $line, 2 );
1033  $last_tag = array_pop( $last_tag_history );
1034 
1035  if ( !array_pop( $has_opened_tr ) ) {
1036  $line = "<tr><td></td></tr>{$line}";
1037  }
1038 
1039  if ( array_pop( $tr_history ) ) {
1040  $line = "</tr>{$line}";
1041  }
1042 
1043  if ( array_pop( $td_history ) ) {
1044  $line = "</{$last_tag}>{$line}";
1045  }
1046  array_pop( $tr_attributes );
1047  $outLine = $line . str_repeat( '</dd></dl>', $indent_level );
1048  } elseif ( substr( $line, 0, 2 ) === '|-' ) {
1049  # Now we have a table row
1050  $line = preg_replace( '#^\|-+#', '', $line );
1051 
1052  # Whats after the tag is now only attributes
1053  $attributes = $this->mStripState->unstripBoth( $line );
1054  $attributes = Sanitizer::fixTagAttributes( $attributes, 'tr' );
1055  array_pop( $tr_attributes );
1056  array_push( $tr_attributes, $attributes );
1057 
1058  $line = '';
1059  $last_tag = array_pop( $last_tag_history );
1060  array_pop( $has_opened_tr );
1061  array_push( $has_opened_tr, true );
1062 
1063  if ( array_pop( $tr_history ) ) {
1064  $line = '</tr>';
1065  }
1066 
1067  if ( array_pop( $td_history ) ) {
1068  $line = "</{$last_tag}>{$line}";
1069  }
1070 
1071  $outLine = $line;
1072  array_push( $tr_history, false );
1073  array_push( $td_history, false );
1074  array_push( $last_tag_history, '' );
1075  } elseif ( $first_character === '|' || $first_character === '!' || substr( $line, 0, 2 ) === '|+' ) {
1076  # This might be cell elements, td, th or captions
1077  if ( substr( $line, 0, 2 ) === '|+' ) {
1078  $first_character = '+';
1079  $line = substr( $line, 1 );
1080  }
1081 
1082  $line = substr( $line, 1 );
1083 
1084  if ( $first_character === '!' ) {
1085  $line = str_replace( '!!', '||', $line );
1086  }
1087 
1088  # Split up multiple cells on the same line.
1089  # FIXME : This can result in improper nesting of tags processed
1090  # by earlier parser steps, but should avoid splitting up eg
1091  # attribute values containing literal "||".
1092  $cells = StringUtils::explodeMarkup( '||', $line );
1093 
1094  $outLine = '';
1095 
1096  # Loop through each table cell
1097  foreach ( $cells as $cell ) {
1098  $previous = '';
1099  if ( $first_character !== '+' ) {
1100  $tr_after = array_pop( $tr_attributes );
1101  if ( !array_pop( $tr_history ) ) {
1102  $previous = "<tr{$tr_after}>\n";
1103  }
1104  array_push( $tr_history, true );
1105  array_push( $tr_attributes, '' );
1106  array_pop( $has_opened_tr );
1107  array_push( $has_opened_tr, true );
1108  }
1109 
1110  $last_tag = array_pop( $last_tag_history );
1111 
1112  if ( array_pop( $td_history ) ) {
1113  $previous = "</{$last_tag}>\n{$previous}";
1114  }
1115 
1116  if ( $first_character === '|' ) {
1117  $last_tag = 'td';
1118  } elseif ( $first_character === '!' ) {
1119  $last_tag = 'th';
1120  } elseif ( $first_character === '+' ) {
1121  $last_tag = 'caption';
1122  } else {
1123  $last_tag = '';
1124  }
1125 
1126  array_push( $last_tag_history, $last_tag );
1127 
1128  # A cell could contain both parameters and data
1129  $cell_data = explode( '|', $cell, 2 );
1130 
1131  # Bug 553: Note that a '|' inside an invalid link should not
1132  # be mistaken as delimiting cell parameters
1133  if ( strpos( $cell_data[0], '[[' ) !== false ) {
1134  $cell = "{$previous}<{$last_tag}>{$cell}";
1135  } elseif ( count( $cell_data ) == 1 ) {
1136  $cell = "{$previous}<{$last_tag}>{$cell_data[0]}";
1137  } else {
1138  $attributes = $this->mStripState->unstripBoth( $cell_data[0] );
1139  $attributes = Sanitizer::fixTagAttributes( $attributes, $last_tag );
1140  $cell = "{$previous}<{$last_tag}{$attributes}>{$cell_data[1]}";
1141  }
1142 
1143  $outLine .= $cell;
1144  array_push( $td_history, true );
1145  }
1146  }
1147  $out .= $outLine . "\n";
1148  }
1149 
1150  # Closing open td, tr && table
1151  while ( count( $td_history ) > 0 ) {
1152  if ( array_pop( $td_history ) ) {
1153  $out .= "</td>\n";
1154  }
1155  if ( array_pop( $tr_history ) ) {
1156  $out .= "</tr>\n";
1157  }
1158  if ( !array_pop( $has_opened_tr ) ) {
1159  $out .= "<tr><td></td></tr>\n";
1160  }
1161 
1162  $out .= "</table>\n";
1163  }
1164 
1165  # Remove trailing line-ending (b/c)
1166  if ( substr( $out, -1 ) === "\n" ) {
1167  $out = substr( $out, 0, -1 );
1168  }
1169 
1170  # special case: don't return empty table
1171  if ( $out === "<table>\n<tr><td></td></tr>\n</table>" ) {
1172  $out = '';
1173  }
1174 
1175  wfProfileOut( __METHOD__ );
1176 
1177  return $out;
1178  }
1179 
1192  function internalParse( $text, $isMain = true, $frame = false ) {
1193  wfProfileIn( __METHOD__ );
1194 
1195  $origText = $text;
1196 
1197  # Hook to suspend the parser in this state
1198  if ( !wfRunHooks( 'ParserBeforeInternalParse', array( &$this, &$text, &$this->mStripState ) ) ) {
1199  wfProfileOut( __METHOD__ );
1200  return $text;
1201  }
1202 
1203  # if $frame is provided, then use $frame for replacing any variables
1204  if ( $frame ) {
1205  # use frame depth to infer how include/noinclude tags should be handled
1206  # depth=0 means this is the top-level document; otherwise it's an included document
1207  if ( !$frame->depth ) {
1208  $flag = 0;
1209  } else {
1210  $flag = Parser::PTD_FOR_INCLUSION;
1211  }
1212  $dom = $this->preprocessToDom( $text, $flag );
1213  $text = $frame->expand( $dom );
1214  } else {
1215  # if $frame is not provided, then use old-style replaceVariables
1216  $text = $this->replaceVariables( $text );
1217  }
1218 
1219  wfRunHooks( 'InternalParseBeforeSanitize', array( &$this, &$text, &$this->mStripState ) );
1220  $text = Sanitizer::removeHTMLtags( $text, array( &$this, 'attributeStripCallback' ), false, array_keys( $this->mTransparentTagHooks ) );
1221  wfRunHooks( 'InternalParseBeforeLinks', array( &$this, &$text, &$this->mStripState ) );
1222 
1223  # Tables need to come after variable replacement for things to work
1224  # properly; putting them before other transformations should keep
1225  # exciting things like link expansions from showing up in surprising
1226  # places.
1227  $text = $this->doTableStuff( $text );
1228 
1229  $text = preg_replace( '/(^|\n)-----*/', '\\1<hr />', $text );
1230 
1231  $text = $this->doDoubleUnderscore( $text );
1232 
1233  $text = $this->doHeadings( $text );
1234  $text = $this->replaceInternalLinks( $text );
1235  $text = $this->doAllQuotes( $text );
1236  $text = $this->replaceExternalLinks( $text );
1237 
1238  # replaceInternalLinks may sometimes leave behind
1239  # absolute URLs, which have to be masked to hide them from replaceExternalLinks
1240  $text = str_replace( $this->mUniqPrefix . 'NOPARSE', '', $text );
1241 
1242  $text = $this->doMagicLinks( $text );
1243  $text = $this->formatHeadings( $text, $origText, $isMain );
1244 
1245  wfProfileOut( __METHOD__ );
1246  return $text;
1247  }
1248 
1260  function doMagicLinks( $text ) {
1261  wfProfileIn( __METHOD__ );
1262  $prots = wfUrlProtocolsWithoutProtRel();
1263  $urlChar = self::EXT_LINK_URL_CLASS;
1264  $text = preg_replace_callback(
1265  '!(?: # Start cases
1266  (<a[ \t\r\n>].*?</a>) | # m[1]: Skip link text
1267  (<.*?>) | # m[2]: Skip stuff inside HTML elements' . "
1268  (\\b(?i:$prots)$urlChar+) | # m[3]: Free external links" . '
1269  (?:RFC|PMID)\s+([0-9]+) | # m[4]: RFC or PMID, capture number
1270  ISBN\s+(\b # m[5]: ISBN, capture number
1271  (?: 97[89] [\ \-]? )? # optional 13-digit ISBN prefix
1272  (?: [0-9] [\ \-]? ){9} # 9 digits with opt. delimiters
1273  [0-9Xx] # check digit
1274  \b)
1275  )!xu', array( &$this, 'magicLinkCallback' ), $text );
1276  wfProfileOut( __METHOD__ );
1277  return $text;
1278  }
1279 
1285  function magicLinkCallback( $m ) {
1286  if ( isset( $m[1] ) && $m[1] !== '' ) {
1287  # Skip anchor
1288  return $m[0];
1289  } elseif ( isset( $m[2] ) && $m[2] !== '' ) {
1290  # Skip HTML element
1291  return $m[0];
1292  } elseif ( isset( $m[3] ) && $m[3] !== '' ) {
1293  # Free external link
1294  return $this->makeFreeExternalLink( $m[0] );
1295  } elseif ( isset( $m[4] ) && $m[4] !== '' ) {
1296  # RFC or PMID
1297  if ( substr( $m[0], 0, 3 ) === 'RFC' ) {
1298  $keyword = 'RFC';
1299  $urlmsg = 'rfcurl';
1300  $cssClass = 'mw-magiclink-rfc';
1301  $id = $m[4];
1302  } elseif ( substr( $m[0], 0, 4 ) === 'PMID' ) {
1303  $keyword = 'PMID';
1304  $urlmsg = 'pubmedurl';
1305  $cssClass = 'mw-magiclink-pmid';
1306  $id = $m[4];
1307  } else {
1308  throw new MWException( __METHOD__ . ': unrecognised match type "' .
1309  substr( $m[0], 0, 20 ) . '"' );
1310  }
1311  $url = wfMessage( $urlmsg, $id )->inContentLanguage()->text();
1312  return Linker::makeExternalLink( $url, "{$keyword} {$id}", true, $cssClass );
1313  } elseif ( isset( $m[5] ) && $m[5] !== '' ) {
1314  # ISBN
1315  $isbn = $m[5];
1316  $num = strtr( $isbn, array(
1317  '-' => '',
1318  ' ' => '',
1319  'x' => 'X',
1320  ));
1321  $titleObj = SpecialPage::getTitleFor( 'Booksources', $num );
1322  return '<a href="' .
1323  htmlspecialchars( $titleObj->getLocalURL() ) .
1324  "\" class=\"internal mw-magiclink-isbn\">ISBN $isbn</a>";
1325  } else {
1326  return $m[0];
1327  }
1328  }
1329 
1338  function makeFreeExternalLink( $url ) {
1339  wfProfileIn( __METHOD__ );
1340 
1341  $trail = '';
1342 
1343  # The characters '<' and '>' (which were escaped by
1344  # removeHTMLtags()) should not be included in
1345  # URLs, per RFC 2396.
1346  $m2 = array();
1347  if ( preg_match( '/&(lt|gt);/', $url, $m2, PREG_OFFSET_CAPTURE ) ) {
1348  $trail = substr( $url, $m2[0][1] ) . $trail;
1349  $url = substr( $url, 0, $m2[0][1] );
1350  }
1351 
1352  # Move trailing punctuation to $trail
1353  $sep = ',;\.:!?';
1354  # If there is no left bracket, then consider right brackets fair game too
1355  if ( strpos( $url, '(' ) === false ) {
1356  $sep .= ')';
1357  }
1358 
1359  $numSepChars = strspn( strrev( $url ), $sep );
1360  if ( $numSepChars ) {
1361  $trail = substr( $url, -$numSepChars ) . $trail;
1362  $url = substr( $url, 0, -$numSepChars );
1363  }
1364 
1365  $url = Sanitizer::cleanUrl( $url );
1366 
1367  # Is this an external image?
1368  $text = $this->maybeMakeExternalImage( $url );
1369  if ( $text === false ) {
1370  # Not an image, make a link
1371  $text = Linker::makeExternalLink( $url,
1372  $this->getConverterLanguage()->markNoConversion( $url, true ),
1373  true, 'free',
1374  $this->getExternalLinkAttribs( $url ) );
1375  # Register it in the output object...
1376  # Replace unnecessary URL escape codes with their equivalent characters
1377  $pasteurized = self::replaceUnusualEscapes( $url );
1378  $this->mOutput->addExternalLink( $pasteurized );
1379  }
1380  wfProfileOut( __METHOD__ );
1381  return $text . $trail;
1382  }
1383 
1393  function doHeadings( $text ) {
1394  wfProfileIn( __METHOD__ );
1395  for ( $i = 6; $i >= 1; --$i ) {
1396  $h = str_repeat( '=', $i );
1397  $text = preg_replace( "/^$h(.+)$h\\s*$/m", "<h$i>\\1</h$i>", $text );
1398  }
1399  wfProfileOut( __METHOD__ );
1400  return $text;
1401  }
1402 
1411  function doAllQuotes( $text ) {
1412  wfProfileIn( __METHOD__ );
1413  $outtext = '';
1414  $lines = StringUtils::explode( "\n", $text );
1415  foreach ( $lines as $line ) {
1416  $outtext .= $this->doQuotes( $line ) . "\n";
1417  }
1418  $outtext = substr( $outtext, 0, -1 );
1419  wfProfileOut( __METHOD__ );
1420  return $outtext;
1421  }
1422 
1430  public function doQuotes( $text ) {
1431  $arr = preg_split( "/(''+)/", $text, -1, PREG_SPLIT_DELIM_CAPTURE );
1432  $countarr = count( $arr );
1433  if ( $countarr == 1 ) {
1434  return $text;
1435  }
1436 
1437  // First, do some preliminary work. This may shift some apostrophes from
1438  // being mark-up to being text. It also counts the number of occurrences
1439  // of bold and italics mark-ups.
1440  $numbold = 0;
1441  $numitalics = 0;
1442  for ( $i = 1; $i < $countarr; $i += 2 ) {
1443  $thislen = strlen( $arr[$i] );
1444  // If there are ever four apostrophes, assume the first is supposed to
1445  // be text, and the remaining three constitute mark-up for bold text.
1446  // (bug 13227: ''''foo'''' turns into ' ''' foo ' ''')
1447  if ( $thislen == 4 ) {
1448  $arr[$i - 1] .= "'";
1449  $arr[$i] = "'''";
1450  $thislen = 3;
1451  } elseif ( $thislen > 5 ) {
1452  // If there are more than 5 apostrophes in a row, assume they're all
1453  // text except for the last 5.
1454  // (bug 13227: ''''''foo'''''' turns into ' ''''' foo ' ''''')
1455  $arr[$i - 1] .= str_repeat( "'", $thislen - 5 );
1456  $arr[$i] = "'''''";
1457  $thislen = 5;
1458  }
1459  // Count the number of occurrences of bold and italics mark-ups.
1460  if ( $thislen == 2 ) {
1461  $numitalics++;
1462  } elseif ( $thislen == 3 ) {
1463  $numbold++;
1464  } elseif ( $thislen == 5 ) {
1465  $numitalics++;
1466  $numbold++;
1467  }
1468  }
1469 
1470  // If there is an odd number of both bold and italics, it is likely
1471  // that one of the bold ones was meant to be an apostrophe followed
1472  // by italics. Which one we cannot know for certain, but it is more
1473  // likely to be one that has a single-letter word before it.
1474  if ( ( $numbold % 2 == 1 ) && ( $numitalics % 2 == 1 ) ) {
1475  $firstsingleletterword = -1;
1476  $firstmultiletterword = -1;
1477  $firstspace = -1;
1478  for ( $i = 1; $i < $countarr; $i += 2 ) {
1479  if ( strlen( $arr[$i] ) == 3 ) {
1480  $x1 = substr( $arr[$i - 1], -1 );
1481  $x2 = substr( $arr[$i - 1], -2, 1 );
1482  if ( $x1 === ' ' ) {
1483  if ( $firstspace == -1 ) {
1484  $firstspace = $i;
1485  }
1486  } elseif ( $x2 === ' ' ) {
1487  if ( $firstsingleletterword == -1 ) {
1488  $firstsingleletterword = $i;
1489  // if $firstsingleletterword is set, we don't
1490  // look at the other options, so we can bail early.
1491  break;
1492  }
1493  } else {
1494  if ( $firstmultiletterword == -1 ) {
1495  $firstmultiletterword = $i;
1496  }
1497  }
1498  }
1499  }
1500 
1501  // If there is a single-letter word, use it!
1502  if ( $firstsingleletterword > -1 ) {
1503  $arr[$firstsingleletterword] = "''";
1504  $arr[$firstsingleletterword - 1] .= "'";
1505  } elseif ( $firstmultiletterword > -1 ) {
1506  // If not, but there's a multi-letter word, use that one.
1507  $arr[$firstmultiletterword] = "''";
1508  $arr[$firstmultiletterword - 1] .= "'";
1509  } elseif ( $firstspace > -1 ) {
1510  // ... otherwise use the first one that has neither.
1511  // (notice that it is possible for all three to be -1 if, for example,
1512  // there is only one pentuple-apostrophe in the line)
1513  $arr[$firstspace] = "''";
1514  $arr[$firstspace - 1] .= "'";
1515  }
1516  }
1517 
1518  // Now let's actually convert our apostrophic mush to HTML!
1519  $output = '';
1520  $buffer = '';
1521  $state = '';
1522  $i = 0;
1523  foreach ( $arr as $r ) {
1524  if ( ( $i % 2 ) == 0 ) {
1525  if ( $state === 'both' ) {
1526  $buffer .= $r;
1527  } else {
1528  $output .= $r;
1529  }
1530  } else {
1531  $thislen = strlen( $r );
1532  if ( $thislen == 2 ) {
1533  if ( $state === 'i' ) {
1534  $output .= '</i>';
1535  $state = '';
1536  } elseif ( $state === 'bi' ) {
1537  $output .= '</i>';
1538  $state = 'b';
1539  } elseif ( $state === 'ib' ) {
1540  $output .= '</b></i><b>';
1541  $state = 'b';
1542  } elseif ( $state === 'both' ) {
1543  $output .= '<b><i>' . $buffer . '</i>';
1544  $state = 'b';
1545  } else { // $state can be 'b' or ''
1546  $output .= '<i>';
1547  $state .= 'i';
1548  }
1549  } elseif ( $thislen == 3 ) {
1550  if ( $state === 'b' ) {
1551  $output .= '</b>';
1552  $state = '';
1553  } elseif ( $state === 'bi' ) {
1554  $output .= '</i></b><i>';
1555  $state = 'i';
1556  } elseif ( $state === 'ib' ) {
1557  $output .= '</b>';
1558  $state = 'i';
1559  } elseif ( $state === 'both' ) {
1560  $output .= '<i><b>' . $buffer . '</b>';
1561  $state = 'i';
1562  } else { // $state can be 'i' or ''
1563  $output .= '<b>';
1564  $state .= 'b';
1565  }
1566  } elseif ( $thislen == 5 ) {
1567  if ( $state === 'b' ) {
1568  $output .= '</b><i>';
1569  $state = 'i';
1570  } elseif ( $state === 'i' ) {
1571  $output .= '</i><b>';
1572  $state = 'b';
1573  } elseif ( $state === 'bi' ) {
1574  $output .= '</i></b>';
1575  $state = '';
1576  } elseif ( $state === 'ib' ) {
1577  $output .= '</b></i>';
1578  $state = '';
1579  } elseif ( $state === 'both' ) {
1580  $output .= '<i><b>' . $buffer . '</b></i>';
1581  $state = '';
1582  } else { // ($state == '')
1583  $buffer = '';
1584  $state = 'both';
1585  }
1586  }
1587  }
1588  $i++;
1589  }
1590  // Now close all remaining tags. Notice that the order is important.
1591  if ( $state === 'b' || $state === 'ib' ) {
1592  $output .= '</b>';
1593  }
1594  if ( $state === 'i' || $state === 'bi' || $state === 'ib' ) {
1595  $output .= '</i>';
1596  }
1597  if ( $state === 'bi' ) {
1598  $output .= '</b>';
1599  }
1600  // There might be lonely ''''', so make sure we have a buffer
1601  if ( $state === 'both' && $buffer ) {
1602  $output .= '<b><i>' . $buffer . '</i></b>';
1603  }
1604  return $output;
1605  }
1606 
1620  function replaceExternalLinks( $text ) {
1621  wfProfileIn( __METHOD__ );
1622 
1623  $bits = preg_split( $this->mExtLinkBracketedRegex, $text, -1, PREG_SPLIT_DELIM_CAPTURE );
1624  if ( $bits === false ) {
1625  wfProfileOut( __METHOD__ );
1626  throw new MWException( "PCRE needs to be compiled with --enable-unicode-properties in order for MediaWiki to function" );
1627  }
1628  $s = array_shift( $bits );
1629 
1630  $i = 0;
1631  while ( $i < count( $bits ) ) {
1632  $url = $bits[$i++];
1633  $i++; // protocol
1634  $text = $bits[$i++];
1635  $trail = $bits[$i++];
1636 
1637  # The characters '<' and '>' (which were escaped by
1638  # removeHTMLtags()) should not be included in
1639  # URLs, per RFC 2396.
1640  $m2 = array();
1641  if ( preg_match( '/&(lt|gt);/', $url, $m2, PREG_OFFSET_CAPTURE ) ) {
1642  $text = substr( $url, $m2[0][1] ) . ' ' . $text;
1643  $url = substr( $url, 0, $m2[0][1] );
1644  }
1645 
1646  # If the link text is an image URL, replace it with an <img> tag
1647  # This happened by accident in the original parser, but some people used it extensively
1648  $img = $this->maybeMakeExternalImage( $text );
1649  if ( $img !== false ) {
1650  $text = $img;
1651  }
1652 
1653  $dtrail = '';
1654 
1655  # Set linktype for CSS - if URL==text, link is essentially free
1656  $linktype = ( $text === $url ) ? 'free' : 'text';
1657 
1658  # No link text, e.g. [http://domain.tld/some.link]
1659  if ( $text == '' ) {
1660  # Autonumber
1661  $langObj = $this->getTargetLanguage();
1662  $text = '[' . $langObj->formatNum( ++$this->mAutonumber ) . ']';
1663  $linktype = 'autonumber';
1664  } else {
1665  # Have link text, e.g. [http://domain.tld/some.link text]s
1666  # Check for trail
1667  list( $dtrail, $trail ) = Linker::splitTrail( $trail );
1668  }
1669 
1670  $text = $this->getConverterLanguage()->markNoConversion( $text );
1671 
1672  $url = Sanitizer::cleanUrl( $url );
1673 
1674  # Use the encoded URL
1675  # This means that users can paste URLs directly into the text
1676  # Funny characters like ö aren't valid in URLs anyway
1677  # This was changed in August 2004
1678  $s .= Linker::makeExternalLink( $url, $text, false, $linktype,
1679  $this->getExternalLinkAttribs( $url ) ) . $dtrail . $trail;
1680 
1681  # Register link in the output object.
1682  # Replace unnecessary URL escape codes with the referenced character
1683  # This prevents spammers from hiding links from the filters
1684  $pasteurized = self::replaceUnusualEscapes( $url );
1685  $this->mOutput->addExternalLink( $pasteurized );
1686  }
1687 
1688  wfProfileOut( __METHOD__ );
1689  return $s;
1690  }
1691 
1701  public static function getExternalLinkRel( $url = false, $title = null ) {
1702  global $wgNoFollowLinks, $wgNoFollowNsExceptions, $wgNoFollowDomainExceptions;
1703  $ns = $title ? $title->getNamespace() : false;
1704  if ( $wgNoFollowLinks && !in_array( $ns, $wgNoFollowNsExceptions )
1705  && !wfMatchesDomainList( $url, $wgNoFollowDomainExceptions )
1706  ) {
1707  return 'nofollow';
1708  }
1709  return null;
1710  }
1711 
1722  function getExternalLinkAttribs( $url = false ) {
1723  $attribs = array();
1724  $attribs['rel'] = self::getExternalLinkRel( $url, $this->mTitle );
1725 
1726  if ( $this->mOptions->getExternalLinkTarget() ) {
1727  $attribs['target'] = $this->mOptions->getExternalLinkTarget();
1728  }
1729  return $attribs;
1730  }
1731 
1743  static function replaceUnusualEscapes( $url ) {
1744  return preg_replace_callback( '/%[0-9A-Fa-f]{2}/',
1745  array( __CLASS__, 'replaceUnusualEscapesCallback' ), $url );
1746  }
1747 
1756  private static function replaceUnusualEscapesCallback( $matches ) {
1757  $char = urldecode( $matches[0] );
1758  $ord = ord( $char );
1759  # Is it an unsafe or HTTP reserved character according to RFC 1738?
1760  if ( $ord > 32 && $ord < 127 && strpos( '<>"#{}|\^~[]`;/?', $char ) === false ) {
1761  # No, shouldn't be escaped
1762  return $char;
1763  } else {
1764  # Yes, leave it escaped
1765  return $matches[0];
1766  }
1767  }
1768 
1778  function maybeMakeExternalImage( $url ) {
1779  $imagesfrom = $this->mOptions->getAllowExternalImagesFrom();
1780  $imagesexception = !empty( $imagesfrom );
1781  $text = false;
1782  # $imagesfrom could be either a single string or an array of strings, parse out the latter
1783  if ( $imagesexception && is_array( $imagesfrom ) ) {
1784  $imagematch = false;
1785  foreach ( $imagesfrom as $match ) {
1786  if ( strpos( $url, $match ) === 0 ) {
1787  $imagematch = true;
1788  break;
1789  }
1790  }
1791  } elseif ( $imagesexception ) {
1792  $imagematch = ( strpos( $url, $imagesfrom ) === 0 );
1793  } else {
1794  $imagematch = false;
1795  }
1796  if ( $this->mOptions->getAllowExternalImages()
1797  || ( $imagesexception && $imagematch ) ) {
1798  if ( preg_match( self::EXT_IMAGE_REGEX, $url ) ) {
1799  # Image found
1800  $text = Linker::makeExternalImage( $url );
1801  }
1802  }
1803  if ( !$text && $this->mOptions->getEnableImageWhitelist()
1804  && preg_match( self::EXT_IMAGE_REGEX, $url ) ) {
1805  $whitelist = explode( "\n", wfMessage( 'external_image_whitelist' )->inContentLanguage()->text() );
1806  foreach ( $whitelist as $entry ) {
1807  # Sanitize the regex fragment, make it case-insensitive, ignore blank entries/comments
1808  if ( strpos( $entry, '#' ) === 0 || $entry === '' ) {
1809  continue;
1810  }
1811  if ( preg_match( '/' . str_replace( '/', '\\/', $entry ) . '/i', $url ) ) {
1812  # Image matches a whitelist entry
1813  $text = Linker::makeExternalImage( $url );
1814  break;
1815  }
1816  }
1817  }
1818  return $text;
1819  }
1820 
1830  function replaceInternalLinks( $s ) {
1831  $this->mLinkHolders->merge( $this->replaceInternalLinks2( $s ) );
1832  return $s;
1833  }
1834 
1843  function replaceInternalLinks2( &$s ) {
1844  wfProfileIn( __METHOD__ );
1845 
1846  wfProfileIn( __METHOD__ . '-setup' );
1847  static $tc = false, $e1, $e1_img;
1848  # the % is needed to support urlencoded titles as well
1849  if ( !$tc ) {
1850  $tc = Title::legalChars() . '#%';
1851  # Match a link having the form [[namespace:link|alternate]]trail
1852  $e1 = "/^([{$tc}]+)(?:\\|(.+?))?]](.*)\$/sD";
1853  # Match cases where there is no "]]", which might still be images
1854  $e1_img = "/^([{$tc}]+)\\|(.*)\$/sD";
1855  }
1856 
1857  $holders = new LinkHolderArray( $this );
1858 
1859  # split the entire text string on occurrences of [[
1860  $a = StringUtils::explode( '[[', ' ' . $s );
1861  # get the first element (all text up to first [[), and remove the space we added
1862  $s = $a->current();
1863  $a->next();
1864  $line = $a->current(); # Workaround for broken ArrayIterator::next() that returns "void"
1865  $s = substr( $s, 1 );
1866 
1867  $useLinkPrefixExtension = $this->getTargetLanguage()->linkPrefixExtension();
1868  $e2 = null;
1869  if ( $useLinkPrefixExtension ) {
1870  # Match the end of a line for a word that's not followed by whitespace,
1871  # e.g. in the case of 'The Arab al[[Razi]]', 'al' will be matched
1873  $charset = $wgContLang->linkPrefixCharset();
1874  $e2 = "/^((?>.*[^$charset]|))(.+)$/sDu";
1875  }
1876 
1877  if ( is_null( $this->mTitle ) ) {
1878  wfProfileOut( __METHOD__ . '-setup' );
1879  wfProfileOut( __METHOD__ );
1880  throw new MWException( __METHOD__ . ": \$this->mTitle is null\n" );
1881  }
1882  $nottalk = !$this->mTitle->isTalkPage();
1883 
1884  if ( $useLinkPrefixExtension ) {
1885  $m = array();
1886  if ( preg_match( $e2, $s, $m ) ) {
1887  $first_prefix = $m[2];
1888  } else {
1889  $first_prefix = false;
1890  }
1891  } else {
1892  $prefix = '';
1893  }
1894 
1895  $useSubpages = $this->areSubpagesAllowed();
1896  wfProfileOut( __METHOD__ . '-setup' );
1897 
1898  # Loop for each link
1899  for ( ; $line !== false && $line !== null; $a->next(), $line = $a->current() ) {
1900  # Check for excessive memory usage
1901  if ( $holders->isBig() ) {
1902  # Too big
1903  # Do the existence check, replace the link holders and clear the array
1904  $holders->replace( $s );
1905  $holders->clear();
1906  }
1907 
1908  if ( $useLinkPrefixExtension ) {
1909  wfProfileIn( __METHOD__ . '-prefixhandling' );
1910  if ( preg_match( $e2, $s, $m ) ) {
1911  $prefix = $m[2];
1912  $s = $m[1];
1913  } else {
1914  $prefix = '';
1915  }
1916  # first link
1917  if ( $first_prefix ) {
1918  $prefix = $first_prefix;
1919  $first_prefix = false;
1920  }
1921  wfProfileOut( __METHOD__ . '-prefixhandling' );
1922  }
1923 
1924  $might_be_img = false;
1925 
1926  wfProfileIn( __METHOD__ . "-e1" );
1927  if ( preg_match( $e1, $line, $m ) ) { # page with normal text or alt
1928  $text = $m[2];
1929  # If we get a ] at the beginning of $m[3] that means we have a link that's something like:
1930  # [[Image:Foo.jpg|[http://example.com desc]]] <- having three ] in a row fucks up,
1931  # the real problem is with the $e1 regex
1932  # See bug 1300.
1933  #
1934  # Still some problems for cases where the ] is meant to be outside punctuation,
1935  # and no image is in sight. See bug 2095.
1936  #
1937  if ( $text !== ''
1938  && substr( $m[3], 0, 1 ) === ']'
1939  && strpos( $text, '[' ) !== false
1940  ) {
1941  $text .= ']'; # so that replaceExternalLinks($text) works later
1942  $m[3] = substr( $m[3], 1 );
1943  }
1944  # fix up urlencoded title texts
1945  if ( strpos( $m[1], '%' ) !== false ) {
1946  # Should anchors '#' also be rejected?
1947  $m[1] = str_replace( array( '<', '>' ), array( '&lt;', '&gt;' ), rawurldecode( $m[1] ) );
1948  }
1949  $trail = $m[3];
1950  } elseif ( preg_match( $e1_img, $line, $m ) ) { # Invalid, but might be an image with a link in its caption
1951  $might_be_img = true;
1952  $text = $m[2];
1953  if ( strpos( $m[1], '%' ) !== false ) {
1954  $m[1] = rawurldecode( $m[1] );
1955  }
1956  $trail = "";
1957  } else { # Invalid form; output directly
1958  $s .= $prefix . '[[' . $line;
1959  wfProfileOut( __METHOD__ . "-e1" );
1960  continue;
1961  }
1962  wfProfileOut( __METHOD__ . "-e1" );
1963  wfProfileIn( __METHOD__ . "-misc" );
1964 
1965  # Don't allow internal links to pages containing
1966  # PROTO: where PROTO is a valid URL protocol; these
1967  # should be external links.
1968  if ( preg_match( '/^(?i:' . $this->mUrlProtocols . ')/', $m[1] ) ) {
1969  $s .= $prefix . '[[' . $line;
1970  wfProfileOut( __METHOD__ . "-misc" );
1971  continue;
1972  }
1973 
1974  # Make subpage if necessary
1975  if ( $useSubpages ) {
1976  $link = $this->maybeDoSubpageLink( $m[1], $text );
1977  } else {
1978  $link = $m[1];
1979  }
1980 
1981  $noforce = ( substr( $m[1], 0, 1 ) !== ':' );
1982  if ( !$noforce ) {
1983  # Strip off leading ':'
1984  $link = substr( $link, 1 );
1985  }
1986 
1987  wfProfileOut( __METHOD__ . "-misc" );
1988  wfProfileIn( __METHOD__ . "-title" );
1989  $nt = Title::newFromText( $this->mStripState->unstripNoWiki( $link ) );
1990  if ( $nt === null ) {
1991  $s .= $prefix . '[[' . $line;
1992  wfProfileOut( __METHOD__ . "-title" );
1993  continue;
1994  }
1995 
1996  $ns = $nt->getNamespace();
1997  $iw = $nt->getInterwiki();
1998  wfProfileOut( __METHOD__ . "-title" );
1999 
2000  if ( $might_be_img ) { # if this is actually an invalid link
2001  wfProfileIn( __METHOD__ . "-might_be_img" );
2002  if ( $ns == NS_FILE && $noforce ) { # but might be an image
2003  $found = false;
2004  while ( true ) {
2005  # look at the next 'line' to see if we can close it there
2006  $a->next();
2007  $next_line = $a->current();
2008  if ( $next_line === false || $next_line === null ) {
2009  break;
2010  }
2011  $m = explode( ']]', $next_line, 3 );
2012  if ( count( $m ) == 3 ) {
2013  # the first ]] closes the inner link, the second the image
2014  $found = true;
2015  $text .= "[[{$m[0]}]]{$m[1]}";
2016  $trail = $m[2];
2017  break;
2018  } elseif ( count( $m ) == 2 ) {
2019  # if there's exactly one ]] that's fine, we'll keep looking
2020  $text .= "[[{$m[0]}]]{$m[1]}";
2021  } else {
2022  # if $next_line is invalid too, we need look no further
2023  $text .= '[[' . $next_line;
2024  break;
2025  }
2026  }
2027  if ( !$found ) {
2028  # we couldn't find the end of this imageLink, so output it raw
2029  # but don't ignore what might be perfectly normal links in the text we've examined
2030  $holders->merge( $this->replaceInternalLinks2( $text ) );
2031  $s .= "{$prefix}[[$link|$text";
2032  # note: no $trail, because without an end, there *is* no trail
2033  wfProfileOut( __METHOD__ . "-might_be_img" );
2034  continue;
2035  }
2036  } else { # it's not an image, so output it raw
2037  $s .= "{$prefix}[[$link|$text";
2038  # note: no $trail, because without an end, there *is* no trail
2039  wfProfileOut( __METHOD__ . "-might_be_img" );
2040  continue;
2041  }
2042  wfProfileOut( __METHOD__ . "-might_be_img" );
2043  }
2044 
2045  $wasblank = ( $text == '' );
2046  if ( $wasblank ) {
2047  $text = $link;
2048  } else {
2049  # Bug 4598 madness. Handle the quotes only if they come from the alternate part
2050  # [[Lista d''e paise d''o munno]] -> <a href="...">Lista d''e paise d''o munno</a>
2051  # [[Criticism of Harry Potter|Criticism of ''Harry Potter'']]
2052  # -> <a href="Criticism of Harry Potter">Criticism of <i>Harry Potter</i></a>
2053  $text = $this->doQuotes( $text );
2054  }
2055 
2056  # Link not escaped by : , create the various objects
2057  if ( $noforce ) {
2058  # Interwikis
2059  wfProfileIn( __METHOD__ . "-interwiki" );
2060  if ( $iw && $this->mOptions->getInterwikiMagic() && $nottalk && Language::fetchLanguageName( $iw, null, 'mw' ) ) {
2061  // XXX: the above check prevents links to sites with identifiers that are not language codes
2062 
2063  # Bug 24502: filter duplicates
2064  if ( !isset( $this->mLangLinkLanguages[$iw] ) ) {
2065  $this->mLangLinkLanguages[$iw] = true;
2066  $this->mOutput->addLanguageLink( $nt->getFullText() );
2067  }
2068 
2069  $s = rtrim( $s . $prefix );
2070  $s .= trim( $trail, "\n" ) == '' ? '': $prefix . $trail;
2071  wfProfileOut( __METHOD__ . "-interwiki" );
2072  continue;
2073  }
2074  wfProfileOut( __METHOD__ . "-interwiki" );
2075 
2076  if ( $ns == NS_FILE ) {
2077  wfProfileIn( __METHOD__ . "-image" );
2078  if ( !wfIsBadImage( $nt->getDBkey(), $this->mTitle ) ) {
2079  if ( $wasblank ) {
2080  # if no parameters were passed, $text
2081  # becomes something like "File:Foo.png",
2082  # which we don't want to pass on to the
2083  # image generator
2084  $text = '';
2085  } else {
2086  # recursively parse links inside the image caption
2087  # actually, this will parse them in any other parameters, too,
2088  # but it might be hard to fix that, and it doesn't matter ATM
2089  $text = $this->replaceExternalLinks( $text );
2090  $holders->merge( $this->replaceInternalLinks2( $text ) );
2091  }
2092  # cloak any absolute URLs inside the image markup, so replaceExternalLinks() won't touch them
2093  $s .= $prefix . $this->armorLinks(
2094  $this->makeImage( $nt, $text, $holders ) ) . $trail;
2095  } else {
2096  $s .= $prefix . $trail;
2097  }
2098  wfProfileOut( __METHOD__ . "-image" );
2099  continue;
2100  }
2101 
2102  if ( $ns == NS_CATEGORY ) {
2103  wfProfileIn( __METHOD__ . "-category" );
2104  $s = rtrim( $s . "\n" ); # bug 87
2105 
2106  if ( $wasblank ) {
2107  $sortkey = $this->getDefaultSort();
2108  } else {
2109  $sortkey = $text;
2110  }
2111  $sortkey = Sanitizer::decodeCharReferences( $sortkey );
2112  $sortkey = str_replace( "\n", '', $sortkey );
2113  $sortkey = $this->getConverterLanguage()->convertCategoryKey( $sortkey );
2114  $this->mOutput->addCategory( $nt->getDBkey(), $sortkey );
2115 
2119  $s .= trim( $prefix . $trail, "\n" ) == '' ? '' : $prefix . $trail;
2120 
2121  wfProfileOut( __METHOD__ . "-category" );
2122  continue;
2123  }
2124  }
2125 
2126  # Self-link checking. For some languages, variants of the title are checked in
2127  # LinkHolderArray::doVariants() to allow batching the existence checks necessary
2128  # for linking to a different variant.
2129  if ( $ns != NS_SPECIAL && $nt->equals( $this->mTitle ) && !$nt->hasFragment() ) {
2130  $s .= $prefix . Linker::makeSelfLinkObj( $nt, $text, '', $trail );
2131  continue;
2132  }
2133 
2134  # NS_MEDIA is a pseudo-namespace for linking directly to a file
2135  # @todo FIXME: Should do batch file existence checks, see comment below
2136  if ( $ns == NS_MEDIA ) {
2137  wfProfileIn( __METHOD__ . "-media" );
2138  # Give extensions a chance to select the file revision for us
2139  $options = array();
2140  $descQuery = false;
2141  wfRunHooks( 'BeforeParserFetchFileAndTitle',
2142  array( $this, $nt, &$options, &$descQuery ) );
2143  # Fetch and register the file (file title may be different via hooks)
2144  list( $file, $nt ) = $this->fetchFileAndTitle( $nt, $options );
2145  # Cloak with NOPARSE to avoid replacement in replaceExternalLinks
2146  $s .= $prefix . $this->armorLinks(
2147  Linker::makeMediaLinkFile( $nt, $file, $text ) ) . $trail;
2148  wfProfileOut( __METHOD__ . "-media" );
2149  continue;
2150  }
2151 
2152  wfProfileIn( __METHOD__ . "-always_known" );
2153  # Some titles, such as valid special pages or files in foreign repos, should
2154  # be shown as bluelinks even though they're not included in the page table
2155  #
2156  # @todo FIXME: isAlwaysKnown() can be expensive for file links; we should really do
2157  # batch file existence checks for NS_FILE and NS_MEDIA
2158  if ( $iw == '' && $nt->isAlwaysKnown() ) {
2159  $this->mOutput->addLink( $nt );
2160  $s .= $this->makeKnownLinkHolder( $nt, $text, array(), $trail, $prefix );
2161  } else {
2162  # Links will be added to the output link list after checking
2163  $s .= $holders->makeHolder( $nt, $text, array(), $trail, $prefix );
2164  }
2165  wfProfileOut( __METHOD__ . "-always_known" );
2166  }
2167  wfProfileOut( __METHOD__ );
2168  return $holders;
2169  }
2170 
2185  function makeKnownLinkHolder( $nt, $text = '', $query = array(), $trail = '', $prefix = '' ) {
2186  list( $inside, $trail ) = Linker::splitTrail( $trail );
2187 
2188  if ( is_string( $query ) ) {
2189  $query = wfCgiToArray( $query );
2190  }
2191  if ( $text == '' ) {
2192  $text = htmlspecialchars( $nt->getPrefixedText() );
2193  }
2194 
2195  $link = Linker::linkKnown( $nt, "$prefix$text$inside", array(), $query );
2196 
2197  return $this->armorLinks( $link ) . $trail;
2198  }
2199 
2210  function armorLinks( $text ) {
2211  return preg_replace( '/\b((?i)' . $this->mUrlProtocols . ')/',
2212  "{$this->mUniqPrefix}NOPARSE$1", $text );
2213  }
2214 
2219  function areSubpagesAllowed() {
2220  # Some namespaces don't allow subpages
2221  return MWNamespace::hasSubpages( $this->mTitle->getNamespace() );
2222  }
2223 
2232  function maybeDoSubpageLink( $target, &$text ) {
2233  return Linker::normalizeSubpageLink( $this->mTitle, $target, $text );
2234  }
2235 
2242  function closeParagraph() {
2243  $result = '';
2244  if ( $this->mLastSection != '' ) {
2245  $result = '</' . $this->mLastSection . ">\n";
2246  }
2247  $this->mInPre = false;
2248  $this->mLastSection = '';
2249  return $result;
2250  }
2251 
2262  function getCommon( $st1, $st2 ) {
2263  $fl = strlen( $st1 );
2264  $shorter = strlen( $st2 );
2265  if ( $fl < $shorter ) {
2266  $shorter = $fl;
2267  }
2268 
2269  for ( $i = 0; $i < $shorter; ++$i ) {
2270  if ( $st1[$i] != $st2[$i] ) {
2271  break;
2272  }
2273  }
2274  return $i;
2275  }
2276 
2286  function openList( $char ) {
2287  $result = $this->closeParagraph();
2288 
2289  if ( '*' === $char ) {
2290  $result .= "<ul>\n<li>";
2291  } elseif ( '#' === $char ) {
2292  $result .= "<ol>\n<li>";
2293  } elseif ( ':' === $char ) {
2294  $result .= "<dl>\n<dd>";
2295  } elseif ( ';' === $char ) {
2296  $result .= "<dl>\n<dt>";
2297  $this->mDTopen = true;
2298  } else {
2299  $result = '<!-- ERR 1 -->';
2300  }
2301 
2302  return $result;
2303  }
2304 
2312  function nextItem( $char ) {
2313  if ( '*' === $char || '#' === $char ) {
2314  return "</li>\n<li>";
2315  } elseif ( ':' === $char || ';' === $char ) {
2316  $close = "</dd>\n";
2317  if ( $this->mDTopen ) {
2318  $close = "</dt>\n";
2319  }
2320  if ( ';' === $char ) {
2321  $this->mDTopen = true;
2322  return $close . '<dt>';
2323  } else {
2324  $this->mDTopen = false;
2325  return $close . '<dd>';
2326  }
2327  }
2328  return '<!-- ERR 2 -->';
2329  }
2330 
2338  function closeList( $char ) {
2339  if ( '*' === $char ) {
2340  $text = "</li>\n</ul>";
2341  } elseif ( '#' === $char ) {
2342  $text = "</li>\n</ol>";
2343  } elseif ( ':' === $char ) {
2344  if ( $this->mDTopen ) {
2345  $this->mDTopen = false;
2346  $text = "</dt>\n</dl>";
2347  } else {
2348  $text = "</dd>\n</dl>";
2349  }
2350  } else {
2351  return '<!-- ERR 3 -->';
2352  }
2353  return $text . "\n";
2354  }
2365  function doBlockLevels( $text, $linestart ) {
2366  wfProfileIn( __METHOD__ );
2367 
2368  # Parsing through the text line by line. The main thing
2369  # happening here is handling of block-level elements p, pre,
2370  # and making lists from lines starting with * # : etc.
2371  #
2372  $textLines = StringUtils::explode( "\n", $text );
2373 
2374  $lastPrefix = $output = '';
2375  $this->mDTopen = $inBlockElem = false;
2376  $prefixLength = 0;
2377  $paragraphStack = false;
2378  $inBlockquote = false;
2379 
2380  foreach ( $textLines as $oLine ) {
2381  # Fix up $linestart
2382  if ( !$linestart ) {
2383  $output .= $oLine;
2384  $linestart = true;
2385  continue;
2386  }
2387  # * = ul
2388  # # = ol
2389  # ; = dt
2390  # : = dd
2391 
2392  $lastPrefixLength = strlen( $lastPrefix );
2393  $preCloseMatch = preg_match( '/<\\/pre/i', $oLine );
2394  $preOpenMatch = preg_match( '/<pre/i', $oLine );
2395  # If not in a <pre> element, scan for and figure out what prefixes are there.
2396  if ( !$this->mInPre ) {
2397  # Multiple prefixes may abut each other for nested lists.
2398  $prefixLength = strspn( $oLine, '*#:;' );
2399  $prefix = substr( $oLine, 0, $prefixLength );
2400 
2401  # eh?
2402  # ; and : are both from definition-lists, so they're equivalent
2403  # for the purposes of determining whether or not we need to open/close
2404  # elements.
2405  $prefix2 = str_replace( ';', ':', $prefix );
2406  $t = substr( $oLine, $prefixLength );
2407  $this->mInPre = (bool)$preOpenMatch;
2408  } else {
2409  # Don't interpret any other prefixes in preformatted text
2410  $prefixLength = 0;
2411  $prefix = $prefix2 = '';
2412  $t = $oLine;
2413  }
2414 
2415  # List generation
2416  if ( $prefixLength && $lastPrefix === $prefix2 ) {
2417  # Same as the last item, so no need to deal with nesting or opening stuff
2418  $output .= $this->nextItem( substr( $prefix, -1 ) );
2419  $paragraphStack = false;
2420 
2421  if ( substr( $prefix, -1 ) === ';' ) {
2422  # The one nasty exception: definition lists work like this:
2423  # ; title : definition text
2424  # So we check for : in the remainder text to split up the
2425  # title and definition, without b0rking links.
2426  $term = $t2 = '';
2427  if ( $this->findColonNoLinks( $t, $term, $t2 ) !== false ) {
2428  $t = $t2;
2429  $output .= $term . $this->nextItem( ':' );
2430  }
2431  }
2432  } elseif ( $prefixLength || $lastPrefixLength ) {
2433  # We need to open or close prefixes, or both.
2434 
2435  # Either open or close a level...
2436  $commonPrefixLength = $this->getCommon( $prefix, $lastPrefix );
2437  $paragraphStack = false;
2438 
2439  # Close all the prefixes which aren't shared.
2440  while ( $commonPrefixLength < $lastPrefixLength ) {
2441  $output .= $this->closeList( $lastPrefix[$lastPrefixLength - 1] );
2442  --$lastPrefixLength;
2443  }
2444 
2445  # Continue the current prefix if appropriate.
2446  if ( $prefixLength <= $commonPrefixLength && $commonPrefixLength > 0 ) {
2447  $output .= $this->nextItem( $prefix[$commonPrefixLength - 1] );
2448  }
2449 
2450  # Open prefixes where appropriate.
2451  while ( $prefixLength > $commonPrefixLength ) {
2452  $char = substr( $prefix, $commonPrefixLength, 1 );
2453  $output .= $this->openList( $char );
2454 
2455  if ( ';' === $char ) {
2456  # @todo FIXME: This is dupe of code above
2457  if ( $this->findColonNoLinks( $t, $term, $t2 ) !== false ) {
2458  $t = $t2;
2459  $output .= $term . $this->nextItem( ':' );
2460  }
2461  }
2462  ++$commonPrefixLength;
2463  }
2464  $lastPrefix = $prefix2;
2465  }
2466 
2467  # If we have no prefixes, go to paragraph mode.
2468  if ( 0 == $prefixLength ) {
2469  wfProfileIn( __METHOD__ . "-paragraph" );
2470  # No prefix (not in list)--go to paragraph mode
2471  # XXX: use a stack for nestable elements like span, table and div
2472  $openmatch = preg_match( '/(?:<table|<h1|<h2|<h3|<h4|<h5|<h6|<pre|<tr|<p|<ul|<ol|<dl|<li|<\\/tr|<\\/td|<\\/th)/iS', $t );
2473  $closematch = preg_match(
2474  '/(?:<\\/table|<\\/h1|<\\/h2|<\\/h3|<\\/h4|<\\/h5|<\\/h6|' .
2475  '<td|<th|<\\/?blockquote|<\\/?div|<hr|<\\/pre|<\\/p|<\\/mw:|' . $this->mUniqPrefix . '-pre|<\\/li|<\\/ul|<\\/ol|<\\/dl|<\\/?center)/iS', $t );
2476  if ( $openmatch or $closematch ) {
2477  $paragraphStack = false;
2478  # TODO bug 5718: paragraph closed
2479  $output .= $this->closeParagraph();
2480  if ( $preOpenMatch and !$preCloseMatch ) {
2481  $this->mInPre = true;
2482  }
2483  $bqOffset = 0;
2484  while ( preg_match( '/<(\\/?)blockquote[\s>]/i', $t, $bqMatch, PREG_OFFSET_CAPTURE, $bqOffset ) ) {
2485  $inBlockquote = !$bqMatch[1][0]; // is this a close tag?
2486  $bqOffset = $bqMatch[0][1] + strlen( $bqMatch[0][0] );
2487  }
2488  $inBlockElem = !$closematch;
2489  } elseif ( !$inBlockElem && !$this->mInPre ) {
2490  if ( ' ' == substr( $t, 0, 1 ) and ( $this->mLastSection === 'pre' || trim( $t ) != '' ) and !$inBlockquote ) {
2491  # pre
2492  if ( $this->mLastSection !== 'pre' ) {
2493  $paragraphStack = false;
2494  $output .= $this->closeParagraph() . '<pre>';
2495  $this->mLastSection = 'pre';
2496  }
2497  $t = substr( $t, 1 );
2498  } else {
2499  # paragraph
2500  if ( trim( $t ) === '' ) {
2501  if ( $paragraphStack ) {
2502  $output .= $paragraphStack . '<br />';
2503  $paragraphStack = false;
2504  $this->mLastSection = 'p';
2505  } else {
2506  if ( $this->mLastSection !== 'p' ) {
2507  $output .= $this->closeParagraph();
2508  $this->mLastSection = '';
2509  $paragraphStack = '<p>';
2510  } else {
2511  $paragraphStack = '</p><p>';
2512  }
2513  }
2514  } else {
2515  if ( $paragraphStack ) {
2516  $output .= $paragraphStack;
2517  $paragraphStack = false;
2518  $this->mLastSection = 'p';
2519  } elseif ( $this->mLastSection !== 'p' ) {
2520  $output .= $this->closeParagraph() . '<p>';
2521  $this->mLastSection = 'p';
2522  }
2523  }
2524  }
2525  }
2526  wfProfileOut( __METHOD__ . "-paragraph" );
2527  }
2528  # somewhere above we forget to get out of pre block (bug 785)
2529  if ( $preCloseMatch && $this->mInPre ) {
2530  $this->mInPre = false;
2531  }
2532  if ( $paragraphStack === false ) {
2533  $output .= $t . "\n";
2534  }
2535  }
2536  while ( $prefixLength ) {
2537  $output .= $this->closeList( $prefix2[$prefixLength - 1] );
2538  --$prefixLength;
2539  }
2540  if ( $this->mLastSection != '' ) {
2541  $output .= '</' . $this->mLastSection . '>';
2542  $this->mLastSection = '';
2543  }
2544 
2545  wfProfileOut( __METHOD__ );
2546  return $output;
2547  }
2548 
2559  function findColonNoLinks( $str, &$before, &$after ) {
2560  wfProfileIn( __METHOD__ );
2561 
2562  $pos = strpos( $str, ':' );
2563  if ( $pos === false ) {
2564  # Nothing to find!
2565  wfProfileOut( __METHOD__ );
2566  return false;
2567  }
2568 
2569  $lt = strpos( $str, '<' );
2570  if ( $lt === false || $lt > $pos ) {
2571  # Easy; no tag nesting to worry about
2572  $before = substr( $str, 0, $pos );
2573  $after = substr( $str, $pos + 1 );
2574  wfProfileOut( __METHOD__ );
2575  return $pos;
2576  }
2577 
2578  # Ugly state machine to walk through avoiding tags.
2579  $state = self::COLON_STATE_TEXT;
2580  $stack = 0;
2581  $len = strlen( $str );
2582  for ( $i = 0; $i < $len; $i++ ) {
2583  $c = $str[$i];
2584 
2585  switch ( $state ) {
2586  # (Using the number is a performance hack for common cases)
2587  case 0: # self::COLON_STATE_TEXT:
2588  switch ( $c ) {
2589  case "<":
2590  # Could be either a <start> tag or an </end> tag
2591  $state = self::COLON_STATE_TAGSTART;
2592  break;
2593  case ":":
2594  if ( $stack == 0 ) {
2595  # We found it!
2596  $before = substr( $str, 0, $i );
2597  $after = substr( $str, $i + 1 );
2598  wfProfileOut( __METHOD__ );
2599  return $i;
2600  }
2601  # Embedded in a tag; don't break it.
2602  break;
2603  default:
2604  # Skip ahead looking for something interesting
2605  $colon = strpos( $str, ':', $i );
2606  if ( $colon === false ) {
2607  # Nothing else interesting
2608  wfProfileOut( __METHOD__ );
2609  return false;
2610  }
2611  $lt = strpos( $str, '<', $i );
2612  if ( $stack === 0 ) {
2613  if ( $lt === false || $colon < $lt ) {
2614  # We found it!
2615  $before = substr( $str, 0, $colon );
2616  $after = substr( $str, $colon + 1 );
2617  wfProfileOut( __METHOD__ );
2618  return $i;
2619  }
2620  }
2621  if ( $lt === false ) {
2622  # Nothing else interesting to find; abort!
2623  # We're nested, but there's no close tags left. Abort!
2624  break 2;
2625  }
2626  # Skip ahead to next tag start
2627  $i = $lt;
2628  $state = self::COLON_STATE_TAGSTART;
2629  }
2630  break;
2631  case 1: # self::COLON_STATE_TAG:
2632  # In a <tag>
2633  switch ( $c ) {
2634  case ">":
2635  $stack++;
2636  $state = self::COLON_STATE_TEXT;
2637  break;
2638  case "/":
2639  # Slash may be followed by >?
2640  $state = self::COLON_STATE_TAGSLASH;
2641  break;
2642  default:
2643  # ignore
2644  }
2645  break;
2646  case 2: # self::COLON_STATE_TAGSTART:
2647  switch ( $c ) {
2648  case "/":
2649  $state = self::COLON_STATE_CLOSETAG;
2650  break;
2651  case "!":
2652  $state = self::COLON_STATE_COMMENT;
2653  break;
2654  case ">":
2655  # Illegal early close? This shouldn't happen D:
2656  $state = self::COLON_STATE_TEXT;
2657  break;
2658  default:
2659  $state = self::COLON_STATE_TAG;
2660  }
2661  break;
2662  case 3: # self::COLON_STATE_CLOSETAG:
2663  # In a </tag>
2664  if ( $c === ">" ) {
2665  $stack--;
2666  if ( $stack < 0 ) {
2667  wfDebug( __METHOD__ . ": Invalid input; too many close tags\n" );
2668  wfProfileOut( __METHOD__ );
2669  return false;
2670  }
2671  $state = self::COLON_STATE_TEXT;
2672  }
2673  break;
2674  case self::COLON_STATE_TAGSLASH:
2675  if ( $c === ">" ) {
2676  # Yes, a self-closed tag <blah/>
2677  $state = self::COLON_STATE_TEXT;
2678  } else {
2679  # Probably we're jumping the gun, and this is an attribute
2680  $state = self::COLON_STATE_TAG;
2681  }
2682  break;
2683  case 5: # self::COLON_STATE_COMMENT:
2684  if ( $c === "-" ) {
2685  $state = self::COLON_STATE_COMMENTDASH;
2686  }
2687  break;
2688  case self::COLON_STATE_COMMENTDASH:
2689  if ( $c === "-" ) {
2690  $state = self::COLON_STATE_COMMENTDASHDASH;
2691  } else {
2692  $state = self::COLON_STATE_COMMENT;
2693  }
2694  break;
2695  case self::COLON_STATE_COMMENTDASHDASH:
2696  if ( $c === ">" ) {
2697  $state = self::COLON_STATE_TEXT;
2698  } else {
2699  $state = self::COLON_STATE_COMMENT;
2700  }
2701  break;
2702  default:
2703  wfProfileOut( __METHOD__ );
2704  throw new MWException( "State machine error in " . __METHOD__ );
2705  }
2706  }
2707  if ( $stack > 0 ) {
2708  wfDebug( __METHOD__ . ": Invalid input; not enough close tags (stack $stack, state $state)\n" );
2709  wfProfileOut( __METHOD__ );
2710  return false;
2711  }
2712  wfProfileOut( __METHOD__ );
2713  return false;
2714  }
2715 
2727  function getVariableValue( $index, $frame = false ) {
2728  global $wgContLang, $wgSitename, $wgServer;
2729  global $wgArticlePath, $wgScriptPath, $wgStylePath;
2730 
2731  if ( is_null( $this->mTitle ) ) {
2732  // If no title set, bad things are going to happen
2733  // later. Title should always be set since this
2734  // should only be called in the middle of a parse
2735  // operation (but the unit-tests do funky stuff)
2736  throw new MWException( __METHOD__ . ' Should only be '
2737  . ' called while parsing (no title set)' );
2738  }
2739 
2744  if ( wfRunHooks( 'ParserGetVariableValueVarCache', array( &$this, &$this->mVarCache ) ) ) {
2745  if ( isset( $this->mVarCache[$index] ) ) {
2746  return $this->mVarCache[$index];
2747  }
2748  }
2749 
2750  $ts = wfTimestamp( TS_UNIX, $this->mOptions->getTimestamp() );
2751  wfRunHooks( 'ParserGetVariableValueTs', array( &$this, &$ts ) );
2752 
2753  $pageLang = $this->getFunctionLang();
2754 
2755  switch ( $index ) {
2756  case 'currentmonth':
2757  $value = $pageLang->formatNum( MWTimestamp::getInstance( $ts )->format( 'm' ) );
2758  break;
2759  case 'currentmonth1':
2760  $value = $pageLang->formatNum( MWTimestamp::getInstance( $ts )->format( 'n' ) );
2761  break;
2762  case 'currentmonthname':
2763  $value = $pageLang->getMonthName( MWTimestamp::getInstance( $ts )->format( 'n' ) );
2764  break;
2765  case 'currentmonthnamegen':
2766  $value = $pageLang->getMonthNameGen( MWTimestamp::getInstance( $ts )->format( 'n' ) );
2767  break;
2768  case 'currentmonthabbrev':
2769  $value = $pageLang->getMonthAbbreviation( MWTimestamp::getInstance( $ts )->format( 'n' ) );
2770  break;
2771  case 'currentday':
2772  $value = $pageLang->formatNum( MWTimestamp::getInstance( $ts )->format( 'j' ) );
2773  break;
2774  case 'currentday2':
2775  $value = $pageLang->formatNum( MWTimestamp::getInstance( $ts )->format( 'd' ) );
2776  break;
2777  case 'localmonth':
2778  $value = $pageLang->formatNum( MWTimestamp::getLocalInstance( $ts )->format( 'm' ) );
2779  break;
2780  case 'localmonth1':
2781  $value = $pageLang->formatNum( MWTimestamp::getLocalInstance( $ts )->format( 'n' ) );
2782  break;
2783  case 'localmonthname':
2784  $value = $pageLang->getMonthName( MWTimestamp::getLocalInstance( $ts )->format( 'n' ) );
2785  break;
2786  case 'localmonthnamegen':
2787  $value = $pageLang->getMonthNameGen( MWTimestamp::getLocalInstance( $ts )->format( 'n' ) );
2788  break;
2789  case 'localmonthabbrev':
2790  $value = $pageLang->getMonthAbbreviation( MWTimestamp::getLocalInstance( $ts )->format( 'n' ) );
2791  break;
2792  case 'localday':
2793  $value = $pageLang->formatNum( MWTimestamp::getLocalInstance( $ts )->format( 'j' ) );
2794  break;
2795  case 'localday2':
2796  $value = $pageLang->formatNum( MWTimestamp::getLocalInstance( $ts )->format( 'd' ) );
2797  break;
2798  case 'pagename':
2799  $value = wfEscapeWikiText( $this->mTitle->getText() );
2800  break;
2801  case 'pagenamee':
2802  $value = wfEscapeWikiText( $this->mTitle->getPartialURL() );
2803  break;
2804  case 'fullpagename':
2805  $value = wfEscapeWikiText( $this->mTitle->getPrefixedText() );
2806  break;
2807  case 'fullpagenamee':
2808  $value = wfEscapeWikiText( $this->mTitle->getPrefixedURL() );
2809  break;
2810  case 'subpagename':
2811  $value = wfEscapeWikiText( $this->mTitle->getSubpageText() );
2812  break;
2813  case 'subpagenamee':
2814  $value = wfEscapeWikiText( $this->mTitle->getSubpageUrlForm() );
2815  break;
2816  case 'rootpagename':
2817  $value = wfEscapeWikiText( $this->mTitle->getRootText() );
2818  break;
2819  case 'rootpagenamee':
2820  $value = wfEscapeWikiText( wfUrlEncode( str_replace( ' ', '_', $this->mTitle->getRootText() ) ) );
2821  break;
2822  case 'basepagename':
2823  $value = wfEscapeWikiText( $this->mTitle->getBaseText() );
2824  break;
2825  case 'basepagenamee':
2826  $value = wfEscapeWikiText( wfUrlEncode( str_replace( ' ', '_', $this->mTitle->getBaseText() ) ) );
2827  break;
2828  case 'talkpagename':
2829  if ( $this->mTitle->canTalk() ) {
2830  $talkPage = $this->mTitle->getTalkPage();
2831  $value = wfEscapeWikiText( $talkPage->getPrefixedText() );
2832  } else {
2833  $value = '';
2834  }
2835  break;
2836  case 'talkpagenamee':
2837  if ( $this->mTitle->canTalk() ) {
2838  $talkPage = $this->mTitle->getTalkPage();
2839  $value = wfEscapeWikiText( $talkPage->getPrefixedURL() );
2840  } else {
2841  $value = '';
2842  }
2843  break;
2844  case 'subjectpagename':
2845  $subjPage = $this->mTitle->getSubjectPage();
2846  $value = wfEscapeWikiText( $subjPage->getPrefixedText() );
2847  break;
2848  case 'subjectpagenamee':
2849  $subjPage = $this->mTitle->getSubjectPage();
2850  $value = wfEscapeWikiText( $subjPage->getPrefixedURL() );
2851  break;
2852  case 'pageid': // requested in bug 23427
2853  $pageid = $this->getTitle()->getArticleID();
2854  if ( $pageid == 0 ) {
2855  # 0 means the page doesn't exist in the database,
2856  # which means the user is previewing a new page.
2857  # The vary-revision flag must be set, because the magic word
2858  # will have a different value once the page is saved.
2859  $this->mOutput->setFlag( 'vary-revision' );
2860  wfDebug( __METHOD__ . ": {{PAGEID}} used in a new page, setting vary-revision...\n" );
2861  }
2862  $value = $pageid ? $pageid : null;
2863  break;
2864  case 'revisionid':
2865  # Let the edit saving system know we should parse the page
2866  # *after* a revision ID has been assigned.
2867  $this->mOutput->setFlag( 'vary-revision' );
2868  wfDebug( __METHOD__ . ": {{REVISIONID}} used, setting vary-revision...\n" );
2869  $value = $this->mRevisionId;
2870  break;
2871  case 'revisionday':
2872  # Let the edit saving system know we should parse the page
2873  # *after* a revision ID has been assigned. This is for null edits.
2874  $this->mOutput->setFlag( 'vary-revision' );
2875  wfDebug( __METHOD__ . ": {{REVISIONDAY}} used, setting vary-revision...\n" );
2876  $value = intval( substr( $this->getRevisionTimestamp(), 6, 2 ) );
2877  break;
2878  case 'revisionday2':
2879  # Let the edit saving system know we should parse the page
2880  # *after* a revision ID has been assigned. This is for null edits.
2881  $this->mOutput->setFlag( 'vary-revision' );
2882  wfDebug( __METHOD__ . ": {{REVISIONDAY2}} used, setting vary-revision...\n" );
2883  $value = substr( $this->getRevisionTimestamp(), 6, 2 );
2884  break;
2885  case 'revisionmonth':
2886  # Let the edit saving system know we should parse the page
2887  # *after* a revision ID has been assigned. This is for null edits.
2888  $this->mOutput->setFlag( 'vary-revision' );
2889  wfDebug( __METHOD__ . ": {{REVISIONMONTH}} used, setting vary-revision...\n" );
2890  $value = substr( $this->getRevisionTimestamp(), 4, 2 );
2891  break;
2892  case 'revisionmonth1':
2893  # Let the edit saving system know we should parse the page
2894  # *after* a revision ID has been assigned. This is for null edits.
2895  $this->mOutput->setFlag( 'vary-revision' );
2896  wfDebug( __METHOD__ . ": {{REVISIONMONTH1}} used, setting vary-revision...\n" );
2897  $value = intval( substr( $this->getRevisionTimestamp(), 4, 2 ) );
2898  break;
2899  case 'revisionyear':
2900  # Let the edit saving system know we should parse the page
2901  # *after* a revision ID has been assigned. This is for null edits.
2902  $this->mOutput->setFlag( 'vary-revision' );
2903  wfDebug( __METHOD__ . ": {{REVISIONYEAR}} used, setting vary-revision...\n" );
2904  $value = substr( $this->getRevisionTimestamp(), 0, 4 );
2905  break;
2906  case 'revisiontimestamp':
2907  # Let the edit saving system know we should parse the page
2908  # *after* a revision ID has been assigned. This is for null edits.
2909  $this->mOutput->setFlag( 'vary-revision' );
2910  wfDebug( __METHOD__ . ": {{REVISIONTIMESTAMP}} used, setting vary-revision...\n" );
2911  $value = $this->getRevisionTimestamp();
2912  break;
2913  case 'revisionuser':
2914  # Let the edit saving system know we should parse the page
2915  # *after* a revision ID has been assigned. This is for null edits.
2916  $this->mOutput->setFlag( 'vary-revision' );
2917  wfDebug( __METHOD__ . ": {{REVISIONUSER}} used, setting vary-revision...\n" );
2918  $value = $this->getRevisionUser();
2919  break;
2920  case 'revisionsize':
2921  # Let the edit saving system know we should parse the page
2922  # *after* a revision ID has been assigned. This is for null edits.
2923  $this->mOutput->setFlag( 'vary-revision' );
2924  wfDebug( __METHOD__ . ": {{REVISIONSIZE}} used, setting vary-revision...\n" );
2925  $value = $this->getRevisionSize();
2926  break;
2927  case 'namespace':
2928  $value = str_replace( '_', ' ', $wgContLang->getNsText( $this->mTitle->getNamespace() ) );
2929  break;
2930  case 'namespacee':
2931  $value = wfUrlencode( $wgContLang->getNsText( $this->mTitle->getNamespace() ) );
2932  break;
2933  case 'namespacenumber':
2934  $value = $this->mTitle->getNamespace();
2935  break;
2936  case 'talkspace':
2937  $value = $this->mTitle->canTalk() ? str_replace( '_', ' ', $this->mTitle->getTalkNsText() ) : '';
2938  break;
2939  case 'talkspacee':
2940  $value = $this->mTitle->canTalk() ? wfUrlencode( $this->mTitle->getTalkNsText() ) : '';
2941  break;
2942  case 'subjectspace':
2943  $value = str_replace( '_', ' ', $this->mTitle->getSubjectNsText() );
2944  break;
2945  case 'subjectspacee':
2946  $value = ( wfUrlencode( $this->mTitle->getSubjectNsText() ) );
2947  break;
2948  case 'currentdayname':
2949  $value = $pageLang->getWeekdayName( (int)MWTimestamp::getInstance( $ts )->format( 'w' ) + 1 );
2950  break;
2951  case 'currentyear':
2952  $value = $pageLang->formatNum( MWTimestamp::getInstance( $ts )->format( 'Y' ), true );
2953  break;
2954  case 'currenttime':
2955  $value = $pageLang->time( wfTimestamp( TS_MW, $ts ), false, false );
2956  break;
2957  case 'currenthour':
2958  $value = $pageLang->formatNum( MWTimestamp::getInstance( $ts )->format( 'H' ), true );
2959  break;
2960  case 'currentweek':
2961  # @bug 4594 PHP5 has it zero padded, PHP4 does not, cast to
2962  # int to remove the padding
2963  $value = $pageLang->formatNum( (int)MWTimestamp::getInstance( $ts )->format( 'W' ) );
2964  break;
2965  case 'currentdow':
2966  $value = $pageLang->formatNum( MWTimestamp::getInstance( $ts )->format( 'w' ) );
2967  break;
2968  case 'localdayname':
2969  $value = $pageLang->getWeekdayName( (int)MWTimestamp::getLocalInstance( $ts )->format( 'w' ) + 1 );
2970  break;
2971  case 'localyear':
2972  $value = $pageLang->formatNum( MWTimestamp::getLocalInstance( $ts )->format( 'Y' ), true );
2973  break;
2974  case 'localtime':
2975  $value = $pageLang->time( MWTimestamp::getLocalInstance( $ts )->format( 'YmdHis' ), false, false );
2976  break;
2977  case 'localhour':
2978  $value = $pageLang->formatNum( MWTimestamp::getLocalInstance( $ts )->format( 'H' ), true );
2979  break;
2980  case 'localweek':
2981  # @bug 4594 PHP5 has it zero padded, PHP4 does not, cast to
2982  # int to remove the padding
2983  $value = $pageLang->formatNum( (int)MWTimestamp::getLocalInstance( $ts )->format( 'W' ) );
2984  break;
2985  case 'localdow':
2986  $value = $pageLang->formatNum( MWTimestamp::getLocalInstance( $ts )->format( 'w' ) );
2987  break;
2988  case 'numberofarticles':
2989  $value = $pageLang->formatNum( SiteStats::articles() );
2990  break;
2991  case 'numberoffiles':
2992  $value = $pageLang->formatNum( SiteStats::images() );
2993  break;
2994  case 'numberofusers':
2995  $value = $pageLang->formatNum( SiteStats::users() );
2996  break;
2997  case 'numberofactiveusers':
2998  $value = $pageLang->formatNum( SiteStats::activeUsers() );
2999  break;
3000  case 'numberofpages':
3001  $value = $pageLang->formatNum( SiteStats::pages() );
3002  break;
3003  case 'numberofadmins':
3004  $value = $pageLang->formatNum( SiteStats::numberingroup( 'sysop' ) );
3005  break;
3006  case 'numberofedits':
3007  $value = $pageLang->formatNum( SiteStats::edits() );
3008  break;
3009  case 'numberofviews':
3010  global $wgDisableCounters;
3011  $value = !$wgDisableCounters ? $pageLang->formatNum( SiteStats::views() ) : '';
3012  break;
3013  case 'currenttimestamp':
3014  $value = wfTimestamp( TS_MW, $ts );
3015  break;
3016  case 'localtimestamp':
3017  $value = MWTimestamp::getLocalInstance( $ts )->format( 'YmdHis' );
3018  break;
3019  case 'currentversion':
3021  break;
3022  case 'articlepath':
3023  return $wgArticlePath;
3024  case 'sitename':
3025  return $wgSitename;
3026  case 'server':
3027  return $wgServer;
3028  case 'servername':
3029  $serverParts = wfParseUrl( $wgServer );
3030  return $serverParts && isset( $serverParts['host'] ) ? $serverParts['host'] : $wgServer;
3031  case 'scriptpath':
3032  return $wgScriptPath;
3033  case 'stylepath':
3034  return $wgStylePath;
3035  case 'directionmark':
3036  return $pageLang->getDirMark();
3037  case 'contentlanguage':
3038  global $wgLanguageCode;
3039  return $wgLanguageCode;
3040  case 'cascadingsources':
3042  break;
3043  default:
3044  $ret = null;
3045  wfRunHooks( 'ParserGetVariableValueSwitch', array( &$this, &$this->mVarCache, &$index, &$ret, &$frame ) );
3046  return $ret;
3047  }
3048 
3049  if ( $index ) {
3050  $this->mVarCache[$index] = $value;
3051  }
3052 
3053  return $value;
3054  }
3055 
3061  function initialiseVariables() {
3062  wfProfileIn( __METHOD__ );
3063  $variableIDs = MagicWord::getVariableIDs();
3064  $substIDs = MagicWord::getSubstIDs();
3065 
3066  $this->mVariables = new MagicWordArray( $variableIDs );
3067  $this->mSubstWords = new MagicWordArray( $substIDs );
3068  wfProfileOut( __METHOD__ );
3069  }
3070 
3093  function preprocessToDom( $text, $flags = 0 ) {
3094  $dom = $this->getPreprocessor()->preprocessToObj( $text, $flags );
3095  return $dom;
3096  }
3097 
3105  public static function splitWhitespace( $s ) {
3106  $ltrimmed = ltrim( $s );
3107  $w1 = substr( $s, 0, strlen( $s ) - strlen( $ltrimmed ) );
3108  $trimmed = rtrim( $ltrimmed );
3109  $diff = strlen( $ltrimmed ) - strlen( $trimmed );
3110  if ( $diff > 0 ) {
3111  $w2 = substr( $ltrimmed, -$diff );
3112  } else {
3113  $w2 = '';
3114  }
3115  return array( $w1, $trimmed, $w2 );
3116  }
3117 
3137  function replaceVariables( $text, $frame = false, $argsOnly = false ) {
3138  # Is there any text? Also, Prevent too big inclusions!
3139  if ( strlen( $text ) < 1 || strlen( $text ) > $this->mOptions->getMaxIncludeSize() ) {
3140  return $text;
3141  }
3142  wfProfileIn( __METHOD__ );
3143 
3144  if ( $frame === false ) {
3145  $frame = $this->getPreprocessor()->newFrame();
3146  } elseif ( !( $frame instanceof PPFrame ) ) {
3147  wfDebug( __METHOD__ . " called using plain parameters instead of a PPFrame instance. Creating custom frame.\n" );
3148  $frame = $this->getPreprocessor()->newCustomFrame( $frame );
3149  }
3150 
3151  $dom = $this->preprocessToDom( $text );
3152  $flags = $argsOnly ? PPFrame::NO_TEMPLATES : 0;
3153  $text = $frame->expand( $dom, $flags );
3154 
3155  wfProfileOut( __METHOD__ );
3156  return $text;
3157  }
3158 
3166  static function createAssocArgs( $args ) {
3167  $assocArgs = array();
3168  $index = 1;
3169  foreach ( $args as $arg ) {
3170  $eqpos = strpos( $arg, '=' );
3171  if ( $eqpos === false ) {
3172  $assocArgs[$index++] = $arg;
3173  } else {
3174  $name = trim( substr( $arg, 0, $eqpos ) );
3175  $value = trim( substr( $arg, $eqpos + 1 ) );
3176  if ( $value === false ) {
3177  $value = '';
3178  }
3179  if ( $name !== false ) {
3180  $assocArgs[$name] = $value;
3181  }
3182  }
3183  }
3184 
3185  return $assocArgs;
3186  }
3187 
3212  function limitationWarn( $limitationType, $current = '', $max = '' ) {
3213  # does no harm if $current and $max are present but are unnecessary for the message
3214  $warning = wfMessage( "$limitationType-warning" )->numParams( $current, $max )
3215  ->inLanguage( $this->mOptions->getUserLangObj() )->text();
3216  $this->mOutput->addWarning( $warning );
3217  $this->addTrackingCategory( "$limitationType-category" );
3218  }
3219 
3233  function braceSubstitution( $piece, $frame ) {
3234  wfProfileIn( __METHOD__ );
3235  wfProfileIn( __METHOD__ . '-setup' );
3236 
3237  # Flags
3238  $found = false; # $text has been filled
3239  $nowiki = false; # wiki markup in $text should be escaped
3240  $isHTML = false; # $text is HTML, armour it against wikitext transformation
3241  $forceRawInterwiki = false; # Force interwiki transclusion to be done in raw mode not rendered
3242  $isChildObj = false; # $text is a DOM node needing expansion in a child frame
3243  $isLocalObj = false; # $text is a DOM node needing expansion in the current frame
3244 
3245  # Title object, where $text came from
3246  $title = false;
3247 
3248  # $part1 is the bit before the first |, and must contain only title characters.
3249  # Various prefixes will be stripped from it later.
3250  $titleWithSpaces = $frame->expand( $piece['title'] );
3251  $part1 = trim( $titleWithSpaces );
3252  $titleText = false;
3253 
3254  # Original title text preserved for various purposes
3255  $originalTitle = $part1;
3256 
3257  # $args is a list of argument nodes, starting from index 0, not including $part1
3258  # @todo FIXME: If piece['parts'] is null then the call to getLength() below won't work b/c this $args isn't an object
3259  $args = ( null == $piece['parts'] ) ? array() : $piece['parts'];
3260  wfProfileOut( __METHOD__ . '-setup' );
3261 
3262  $titleProfileIn = null; // profile templates
3263 
3264  # SUBST
3265  wfProfileIn( __METHOD__ . '-modifiers' );
3266  if ( !$found ) {
3267 
3268  $substMatch = $this->mSubstWords->matchStartAndRemove( $part1 );
3269 
3270  # Possibilities for substMatch: "subst", "safesubst" or FALSE
3271  # Decide whether to expand template or keep wikitext as-is.
3272  if ( $this->ot['wiki'] ) {
3273  if ( $substMatch === false ) {
3274  $literal = true; # literal when in PST with no prefix
3275  } else {
3276  $literal = false; # expand when in PST with subst: or safesubst:
3277  }
3278  } else {
3279  if ( $substMatch == 'subst' ) {
3280  $literal = true; # literal when not in PST with plain subst:
3281  } else {
3282  $literal = false; # expand when not in PST with safesubst: or no prefix
3283  }
3284  }
3285  if ( $literal ) {
3286  $text = $frame->virtualBracketedImplode( '{{', '|', '}}', $titleWithSpaces, $args );
3287  $isLocalObj = true;
3288  $found = true;
3289  }
3290  }
3291 
3292  # Variables
3293  if ( !$found && $args->getLength() == 0 ) {
3294  $id = $this->mVariables->matchStartToEnd( $part1 );
3295  if ( $id !== false ) {
3296  $text = $this->getVariableValue( $id, $frame );
3297  if ( MagicWord::getCacheTTL( $id ) > -1 ) {
3298  $this->mOutput->updateCacheExpiry( MagicWord::getCacheTTL( $id ) );
3299  }
3300  $found = true;
3301  }
3302  }
3303 
3304  # MSG, MSGNW and RAW
3305  if ( !$found ) {
3306  # Check for MSGNW:
3307  $mwMsgnw = MagicWord::get( 'msgnw' );
3308  if ( $mwMsgnw->matchStartAndRemove( $part1 ) ) {
3309  $nowiki = true;
3310  } else {
3311  # Remove obsolete MSG:
3312  $mwMsg = MagicWord::get( 'msg' );
3313  $mwMsg->matchStartAndRemove( $part1 );
3314  }
3315 
3316  # Check for RAW:
3317  $mwRaw = MagicWord::get( 'raw' );
3318  if ( $mwRaw->matchStartAndRemove( $part1 ) ) {
3319  $forceRawInterwiki = true;
3320  }
3321  }
3322  wfProfileOut( __METHOD__ . '-modifiers' );
3323 
3324  # Parser functions
3325  if ( !$found ) {
3326  wfProfileIn( __METHOD__ . '-pfunc' );
3327 
3328  $colonPos = strpos( $part1, ':' );
3329  if ( $colonPos !== false ) {
3330  $func = substr( $part1, 0, $colonPos );
3331  $funcArgs = array( trim( substr( $part1, $colonPos + 1 ) ) );
3332  for ( $i = 0; $i < $args->getLength(); $i++ ) {
3333  $funcArgs[] = $args->item( $i );
3334  }
3335  try {
3336  $result = $this->callParserFunction( $frame, $func, $funcArgs );
3337  } catch ( Exception $ex ) {
3338  wfProfileOut( __METHOD__ . '-pfunc' );
3339  wfProfileOut( __METHOD__ );
3340  throw $ex;
3341  }
3342 
3343  # The interface for parser functions allows for extracting
3344  # flags into the local scope. Extract any forwarded flags
3345  # here.
3346  extract( $result );
3347  }
3348  wfProfileOut( __METHOD__ . '-pfunc' );
3349  }
3350 
3351  # Finish mangling title and then check for loops.
3352  # Set $title to a Title object and $titleText to the PDBK
3353  if ( !$found ) {
3354  $ns = NS_TEMPLATE;
3355  # Split the title into page and subpage
3356  $subpage = '';
3357  $relative = $this->maybeDoSubpageLink( $part1, $subpage );
3358  if ( $part1 !== $relative ) {
3359  $part1 = $relative;
3360  $ns = $this->mTitle->getNamespace();
3361  }
3362  $title = Title::newFromText( $part1, $ns );
3363  if ( $title ) {
3364  $titleText = $title->getPrefixedText();
3365  # Check for language variants if the template is not found
3366  if ( $this->getConverterLanguage()->hasVariants() && $title->getArticleID() == 0 ) {
3367  $this->getConverterLanguage()->findVariantLink( $part1, $title, true );
3368  }
3369  # Do recursion depth check
3370  $limit = $this->mOptions->getMaxTemplateDepth();
3371  if ( $frame->depth >= $limit ) {
3372  $found = true;
3373  $text = '<span class="error">'
3374  . wfMessage( 'parser-template-recursion-depth-warning' )
3375  ->numParams( $limit )->inContentLanguage()->text()
3376  . '</span>';
3377  }
3378  }
3379  }
3380 
3381  # Load from database
3382  if ( !$found && $title ) {
3383  if ( !Profiler::instance()->isPersistent() ) {
3384  # Too many unique items can kill profiling DBs/collectors
3385  $titleProfileIn = __METHOD__ . "-title-" . $title->getPrefixedDBkey();
3386  wfProfileIn( $titleProfileIn ); // template in
3387  }
3388  wfProfileIn( __METHOD__ . '-loadtpl' );
3389  if ( !$title->isExternal() ) {
3390  if ( $title->isSpecialPage()
3391  && $this->mOptions->getAllowSpecialInclusion()
3392  && $this->ot['html']
3393  ) {
3394  // Pass the template arguments as URL parameters.
3395  // "uselang" will have no effect since the Language object
3396  // is forced to the one defined in ParserOptions.
3397  $pageArgs = array();
3398  for ( $i = 0; $i < $args->getLength(); $i++ ) {
3399  $bits = $args->item( $i )->splitArg();
3400  if ( strval( $bits['index'] ) === '' ) {
3401  $name = trim( $frame->expand( $bits['name'], PPFrame::STRIP_COMMENTS ) );
3402  $value = trim( $frame->expand( $bits['value'] ) );
3403  $pageArgs[$name] = $value;
3404  }
3405  }
3406 
3407  // Create a new context to execute the special page
3408  $context = new RequestContext;
3409  $context->setTitle( $title );
3410  $context->setRequest( new FauxRequest( $pageArgs ) );
3411  $context->setUser( $this->getUser() );
3412  $context->setLanguage( $this->mOptions->getUserLangObj() );
3414  if ( $ret ) {
3415  $text = $context->getOutput()->getHTML();
3416  $this->mOutput->addOutputPageMetadata( $context->getOutput() );
3417  $found = true;
3418  $isHTML = true;
3419  $this->disableCache();
3420  }
3421  } elseif ( MWNamespace::isNonincludable( $title->getNamespace() ) ) {
3422  $found = false; # access denied
3423  wfDebug( __METHOD__ . ": template inclusion denied for " .
3424  $title->getPrefixedDBkey() . "\n" );
3425  } else {
3426  list( $text, $title ) = $this->getTemplateDom( $title );
3427  if ( $text !== false ) {
3428  $found = true;
3429  $isChildObj = true;
3430  }
3431  }
3432 
3433  # If the title is valid but undisplayable, make a link to it
3434  if ( !$found && ( $this->ot['html'] || $this->ot['pre'] ) ) {
3435  $text = "[[:$titleText]]";
3436  $found = true;
3437  }
3438  } elseif ( $title->isTrans() ) {
3439  # Interwiki transclusion
3440  if ( $this->ot['html'] && !$forceRawInterwiki ) {
3441  $text = $this->interwikiTransclude( $title, 'render' );
3442  $isHTML = true;
3443  } else {
3444  $text = $this->interwikiTransclude( $title, 'raw' );
3445  # Preprocess it like a template
3446  $text = $this->preprocessToDom( $text, self::PTD_FOR_INCLUSION );
3447  $isChildObj = true;
3448  }
3449  $found = true;
3450  }
3451 
3452  # Do infinite loop check
3453  # This has to be done after redirect resolution to avoid infinite loops via redirects
3454  if ( !$frame->loopCheck( $title ) ) {
3455  $found = true;
3456  $text = '<span class="error">'
3457  . wfMessage( 'parser-template-loop-warning', $titleText )->inContentLanguage()->text()
3458  . '</span>';
3459  wfDebug( __METHOD__ . ": template loop broken at '$titleText'\n" );
3460  }
3461  wfProfileOut( __METHOD__ . '-loadtpl' );
3462  }
3463 
3464  # If we haven't found text to substitute by now, we're done
3465  # Recover the source wikitext and return it
3466  if ( !$found ) {
3467  $text = $frame->virtualBracketedImplode( '{{', '|', '}}', $titleWithSpaces, $args );
3468  if ( $titleProfileIn ) {
3469  wfProfileOut( $titleProfileIn ); // template out
3470  }
3471  wfProfileOut( __METHOD__ );
3472  return array( 'object' => $text );
3473  }
3474 
3475  # Expand DOM-style return values in a child frame
3476  if ( $isChildObj ) {
3477  # Clean up argument array
3478  $newFrame = $frame->newChild( $args, $title );
3479 
3480  if ( $nowiki ) {
3481  $text = $newFrame->expand( $text, PPFrame::RECOVER_ORIG );
3482  } elseif ( $titleText !== false && $newFrame->isEmpty() ) {
3483  # Expansion is eligible for the empty-frame cache
3484  if ( isset( $this->mTplExpandCache[$titleText] ) ) {
3485  $text = $this->mTplExpandCache[$titleText];
3486  } else {
3487  $text = $newFrame->expand( $text );
3488  $this->mTplExpandCache[$titleText] = $text;
3489  }
3490  } else {
3491  # Uncached expansion
3492  $text = $newFrame->expand( $text );
3493  }
3494  }
3495  if ( $isLocalObj && $nowiki ) {
3496  $text = $frame->expand( $text, PPFrame::RECOVER_ORIG );
3497  $isLocalObj = false;
3498  }
3499 
3500  if ( $titleProfileIn ) {
3501  wfProfileOut( $titleProfileIn ); // template out
3502  }
3503 
3504  # Replace raw HTML by a placeholder
3505  if ( $isHTML ) {
3506  $text = $this->insertStripItem( $text );
3507  } elseif ( $nowiki && ( $this->ot['html'] || $this->ot['pre'] ) ) {
3508  # Escape nowiki-style return values
3509  $text = wfEscapeWikiText( $text );
3510  } elseif ( is_string( $text )
3511  && !$piece['lineStart']
3512  && preg_match( '/^(?:{\\||:|;|#|\*)/', $text )
3513  ) {
3514  # Bug 529: if the template begins with a table or block-level
3515  # element, it should be treated as beginning a new line.
3516  # This behavior is somewhat controversial.
3517  $text = "\n" . $text;
3518  }
3519 
3520  if ( is_string( $text ) && !$this->incrementIncludeSize( 'post-expand', strlen( $text ) ) ) {
3521  # Error, oversize inclusion
3522  if ( $titleText !== false ) {
3523  # Make a working, properly escaped link if possible (bug 23588)
3524  $text = "[[:$titleText]]";
3525  } else {
3526  # This will probably not be a working link, but at least it may
3527  # provide some hint of where the problem is
3528  preg_replace( '/^:/', '', $originalTitle );
3529  $text = "[[:$originalTitle]]";
3530  }
3531  $text .= $this->insertStripItem( '<!-- WARNING: template omitted, post-expand include size too large -->' );
3532  $this->limitationWarn( 'post-expand-template-inclusion' );
3533  }
3534 
3535  if ( $isLocalObj ) {
3536  $ret = array( 'object' => $text );
3537  } else {
3538  $ret = array( 'text' => $text );
3539  }
3540 
3541  wfProfileOut( __METHOD__ );
3542  return $ret;
3543  }
3544 
3563  public function callParserFunction( $frame, $function, array $args = array() ) {
3565 
3566  wfProfileIn( __METHOD__ );
3567 
3568  # Case sensitive functions
3569  if ( isset( $this->mFunctionSynonyms[1][$function] ) ) {
3570  $function = $this->mFunctionSynonyms[1][$function];
3571  } else {
3572  # Case insensitive functions
3573  $function = $wgContLang->lc( $function );
3574  if ( isset( $this->mFunctionSynonyms[0][$function] ) ) {
3575  $function = $this->mFunctionSynonyms[0][$function];
3576  } else {
3577  wfProfileOut( __METHOD__ );
3578  return array( 'found' => false );
3579  }
3580  }
3581 
3582  wfProfileIn( __METHOD__ . '-pfunc-' . $function );
3583  list( $callback, $flags ) = $this->mFunctionHooks[$function];
3584 
3585  # Workaround for PHP bug 35229 and similar
3586  if ( !is_callable( $callback ) ) {
3587  wfProfileOut( __METHOD__ . '-pfunc-' . $function );
3588  wfProfileOut( __METHOD__ );
3589  throw new MWException( "Tag hook for $function is not callable\n" );
3590  }
3591 
3592  $allArgs = array( &$this );
3593  if ( $flags & SFH_OBJECT_ARGS ) {
3594  # Convert arguments to PPNodes and collect for appending to $allArgs
3595  $funcArgs = array();
3596  foreach ( $args as $k => $v ) {
3597  if ( $v instanceof PPNode || $k === 0 ) {
3598  $funcArgs[] = $v;
3599  } else {
3600  $funcArgs[] = $this->mPreprocessor->newPartNodeArray( array( $k => $v ) )->item( 0 );
3601  }
3602  }
3603 
3604  # Add a frame parameter, and pass the arguments as an array
3605  $allArgs[] = $frame;
3606  $allArgs[] = $funcArgs;
3607  } else {
3608  # Convert arguments to plain text and append to $allArgs
3609  foreach ( $args as $k => $v ) {
3610  if ( $v instanceof PPNode ) {
3611  $allArgs[] = trim( $frame->expand( $v ) );
3612  } elseif ( is_int( $k ) && $k >= 0 ) {
3613  $allArgs[] = trim( $v );
3614  } else {
3615  $allArgs[] = trim( "$k=$v" );
3616  }
3617  }
3618  }
3619 
3620  $result = call_user_func_array( $callback, $allArgs );
3621 
3622  # The interface for function hooks allows them to return a wikitext
3623  # string or an array containing the string and any flags. This mungs
3624  # things around to match what this method should return.
3625  if ( !is_array( $result ) ) {
3626  $result = array(
3627  'found' => true,
3628  'text' => $result,
3629  );
3630  } else {
3631  if ( isset( $result[0] ) && !isset( $result['text'] ) ) {
3632  $result['text'] = $result[0];
3633  }
3634  unset( $result[0] );
3635  $result += array(
3636  'found' => true,
3637  );
3638  }
3639 
3640  $noparse = true;
3641  $preprocessFlags = 0;
3642  if ( isset( $result['noparse'] ) ) {
3643  $noparse = $result['noparse'];
3644  }
3645  if ( isset( $result['preprocessFlags'] ) ) {
3646  $preprocessFlags = $result['preprocessFlags'];
3647  }
3648 
3649  if ( !$noparse ) {
3650  $result['text'] = $this->preprocessToDom( $result['text'], $preprocessFlags );
3651  $result['isChildObj'] = true;
3652  }
3653  wfProfileOut( __METHOD__ . '-pfunc-' . $function );
3654  wfProfileOut( __METHOD__ );
3655 
3656  return $result;
3657  }
3658 
3667  function getTemplateDom( $title ) {
3668  $cacheTitle = $title;
3669  $titleText = $title->getPrefixedDBkey();
3670 
3671  if ( isset( $this->mTplRedirCache[$titleText] ) ) {
3672  list( $ns, $dbk ) = $this->mTplRedirCache[$titleText];
3673  $title = Title::makeTitle( $ns, $dbk );
3674  $titleText = $title->getPrefixedDBkey();
3675  }
3676  if ( isset( $this->mTplDomCache[$titleText] ) ) {
3677  return array( $this->mTplDomCache[$titleText], $title );
3678  }
3679 
3680  # Cache miss, go to the database
3681  list( $text, $title ) = $this->fetchTemplateAndTitle( $title );
3682 
3683  if ( $text === false ) {
3684  $this->mTplDomCache[$titleText] = false;
3685  return array( false, $title );
3686  }
3687 
3688  $dom = $this->preprocessToDom( $text, self::PTD_FOR_INCLUSION );
3689  $this->mTplDomCache[$titleText] = $dom;
3690 
3691  if ( !$title->equals( $cacheTitle ) ) {
3692  $this->mTplRedirCache[$cacheTitle->getPrefixedDBkey()] =
3693  array( $title->getNamespace(), $cdb = $title->getDBkey() );
3694  }
3695 
3696  return array( $dom, $title );
3697  }
3698 
3704  function fetchTemplateAndTitle( $title ) {
3705  $templateCb = $this->mOptions->getTemplateCallback(); # Defaults to Parser::statelessFetchTemplate()
3706  $stuff = call_user_func( $templateCb, $title, $this );
3707  $text = $stuff['text'];
3708  $finalTitle = isset( $stuff['finalTitle'] ) ? $stuff['finalTitle'] : $title;
3709  if ( isset( $stuff['deps'] ) ) {
3710  foreach ( $stuff['deps'] as $dep ) {
3711  $this->mOutput->addTemplate( $dep['title'], $dep['page_id'], $dep['rev_id'] );
3712  if ( $dep['title']->equals( $this->getTitle() ) ) {
3713  // If we transclude ourselves, the final result
3714  // will change based on the new version of the page
3715  $this->mOutput->setFlag( 'vary-revision' );
3716  }
3717  }
3718  }
3719  return array( $text, $finalTitle );
3720  }
3721 
3727  function fetchTemplate( $title ) {
3728  $rv = $this->fetchTemplateAndTitle( $title );
3729  return $rv[0];
3730  }
3731 
3741  static function statelessFetchTemplate( $title, $parser = false ) {
3742  $text = $skip = false;
3743  $finalTitle = $title;
3744  $deps = array();
3745 
3746  # Loop to fetch the article, with up to 1 redirect
3747  for ( $i = 0; $i < 2 && is_object( $title ); $i++ ) {
3748  # Give extensions a chance to select the revision instead
3749  $id = false; # Assume current
3750  wfRunHooks( 'BeforeParserFetchTemplateAndtitle',
3751  array( $parser, $title, &$skip, &$id ) );
3752 
3753  if ( $skip ) {
3754  $text = false;
3755  $deps[] = array(
3756  'title' => $title,
3757  'page_id' => $title->getArticleID(),
3758  'rev_id' => null
3759  );
3760  break;
3761  }
3762  # Get the revision
3763  $rev = $id
3764  ? Revision::newFromId( $id )
3765  : Revision::newFromTitle( $title, false, Revision::READ_NORMAL );
3766  $rev_id = $rev ? $rev->getId() : 0;
3767  # If there is no current revision, there is no page
3768  if ( $id === false && !$rev ) {
3769  $linkCache = LinkCache::singleton();
3770  $linkCache->addBadLinkObj( $title );
3771  }
3772 
3773  $deps[] = array(
3774  'title' => $title,
3775  'page_id' => $title->getArticleID(),
3776  'rev_id' => $rev_id );
3777  if ( $rev && !$title->equals( $rev->getTitle() ) ) {
3778  # We fetched a rev from a different title; register it too...
3779  $deps[] = array(
3780  'title' => $rev->getTitle(),
3781  'page_id' => $rev->getPage(),
3782  'rev_id' => $rev_id );
3783  }
3784 
3785  if ( $rev ) {
3786  $content = $rev->getContent();
3787  $text = $content ? $content->getWikitextForTransclusion() : null;
3788 
3789  if ( $text === false || $text === null ) {
3790  $text = false;
3791  break;
3792  }
3793  } elseif ( $title->getNamespace() == NS_MEDIAWIKI ) {
3795  $message = wfMessage( $wgContLang->lcfirst( $title->getText() ) )->inContentLanguage();
3796  if ( !$message->exists() ) {
3797  $text = false;
3798  break;
3799  }
3800  $content = $message->content();
3801  $text = $message->plain();
3802  } else {
3803  break;
3804  }
3805  if ( !$content ) {
3806  break;
3807  }
3808  # Redirect?
3809  $finalTitle = $title;
3810  $title = $content->getRedirectTarget();
3811  }
3812  return array(
3813  'text' => $text,
3814  'finalTitle' => $finalTitle,
3815  'deps' => $deps );
3816  }
3817 
3825  function fetchFile( $title, $options = array() ) {
3826  $res = $this->fetchFileAndTitle( $title, $options );
3827  return $res[0];
3828  }
3829 
3837  function fetchFileAndTitle( $title, $options = array() ) {
3838  $file = $this->fetchFileNoRegister( $title, $options );
3839 
3840  $time = $file ? $file->getTimestamp() : false;
3841  $sha1 = $file ? $file->getSha1() : false;
3842  # Register the file as a dependency...
3843  $this->mOutput->addImage( $title->getDBkey(), $time, $sha1 );
3844  if ( $file && !$title->equals( $file->getTitle() ) ) {
3845  # Update fetched file title
3846  $title = $file->getTitle();
3847  $this->mOutput->addImage( $title->getDBkey(), $time, $sha1 );
3848  }
3849  return array( $file, $title );
3850  }
3851 
3862  protected function fetchFileNoRegister( $title, $options = array() ) {
3863  if ( isset( $options['broken'] ) ) {
3864  $file = false; // broken thumbnail forced by hook
3865  } elseif ( isset( $options['sha1'] ) ) { // get by (sha1,timestamp)
3866  $file = RepoGroup::singleton()->findFileFromKey( $options['sha1'], $options );
3867  } else { // get by (name,timestamp)
3869  }
3870  return $file;
3871  }
3872 
3881  function interwikiTransclude( $title, $action ) {
3882  global $wgEnableScaryTranscluding;
3883 
3884  if ( !$wgEnableScaryTranscluding ) {
3885  return wfMessage( 'scarytranscludedisabled' )->inContentLanguage()->text();
3886  }
3887 
3888  $url = $title->getFullURL( array( 'action' => $action ) );
3889 
3890  if ( strlen( $url ) > 255 ) {
3891  return wfMessage( 'scarytranscludetoolong' )->inContentLanguage()->text();
3892  }
3893  return $this->fetchScaryTemplateMaybeFromCache( $url );
3894  }
3895 
3900  function fetchScaryTemplateMaybeFromCache( $url ) {
3901  global $wgTranscludeCacheExpiry;
3902  $dbr = wfGetDB( DB_SLAVE );
3903  $tsCond = $dbr->timestamp( time() - $wgTranscludeCacheExpiry );
3904  $obj = $dbr->selectRow( 'transcache', array( 'tc_time', 'tc_contents' ),
3905  array( 'tc_url' => $url, "tc_time >= " . $dbr->addQuotes( $tsCond ) ) );
3906  if ( $obj ) {
3907  return $obj->tc_contents;
3908  }
3909 
3910  $req = MWHttpRequest::factory( $url );
3911  $status = $req->execute(); // Status object
3912  if ( $status->isOK() ) {
3913  $text = $req->getContent();
3914  } elseif ( $req->getStatus() != 200 ) { // Though we failed to fetch the content, this status is useless.
3915  return wfMessage( 'scarytranscludefailed-httpstatus', $url, $req->getStatus() /* HTTP status */ )->inContentLanguage()->text();
3916  } else {
3917  return wfMessage( 'scarytranscludefailed', $url )->inContentLanguage()->text();
3918  }
3919 
3920  $dbw = wfGetDB( DB_MASTER );
3921  $dbw->replace( 'transcache', array( 'tc_url' ), array(
3922  'tc_url' => $url,
3923  'tc_time' => $dbw->timestamp( time() ),
3924  'tc_contents' => $text
3925  ) );
3926  return $text;
3927  }
3928 
3938  function argSubstitution( $piece, $frame ) {
3939  wfProfileIn( __METHOD__ );
3940 
3941  $error = false;
3942  $parts = $piece['parts'];
3943  $nameWithSpaces = $frame->expand( $piece['title'] );
3944  $argName = trim( $nameWithSpaces );
3945  $object = false;
3946  $text = $frame->getArgument( $argName );
3947  if ( $text === false && $parts->getLength() > 0
3948  && ( $this->ot['html']
3949  || $this->ot['pre']
3950  || ( $this->ot['wiki'] && $frame->isTemplate() )
3951  )
3952  ) {
3953  # No match in frame, use the supplied default
3954  $object = $parts->item( 0 )->getChildren();
3955  }
3956  if ( !$this->incrementIncludeSize( 'arg', strlen( $text ) ) ) {
3957  $error = '<!-- WARNING: argument omitted, expansion size too large -->';
3958  $this->limitationWarn( 'post-expand-template-argument' );
3959  }
3960 
3961  if ( $text === false && $object === false ) {
3962  # No match anywhere
3963  $object = $frame->virtualBracketedImplode( '{{{', '|', '}}}', $nameWithSpaces, $parts );
3964  }
3965  if ( $error !== false ) {
3966  $text .= $error;
3967  }
3968  if ( $object !== false ) {
3969  $ret = array( 'object' => $object );
3970  } else {
3971  $ret = array( 'text' => $text );
3972  }
3973 
3974  wfProfileOut( __METHOD__ );
3975  return $ret;
3976  }
3977 
3993  function extensionSubstitution( $params, $frame ) {
3994  $name = $frame->expand( $params['name'] );
3995  $attrText = !isset( $params['attr'] ) ? null : $frame->expand( $params['attr'] );
3996  $content = !isset( $params['inner'] ) ? null : $frame->expand( $params['inner'] );
3997  $marker = "{$this->mUniqPrefix}-$name-" . sprintf( '%08X', $this->mMarkerIndex++ ) . self::MARKER_SUFFIX;
3998 
3999  $isFunctionTag = isset( $this->mFunctionTagHooks[strtolower( $name )] ) &&
4000  ( $this->ot['html'] || $this->ot['pre'] );
4001  if ( $isFunctionTag ) {
4002  $markerType = 'none';
4003  } else {
4004  $markerType = 'general';
4005  }
4006  if ( $this->ot['html'] || $isFunctionTag ) {
4007  $name = strtolower( $name );
4008  $attributes = Sanitizer::decodeTagAttributes( $attrText );
4009  if ( isset( $params['attributes'] ) ) {
4010  $attributes = $attributes + $params['attributes'];
4011  }
4012 
4013  if ( isset( $this->mTagHooks[$name] ) ) {
4014  # Workaround for PHP bug 35229 and similar
4015  if ( !is_callable( $this->mTagHooks[$name] ) ) {
4016  throw new MWException( "Tag hook for $name is not callable\n" );
4017  }
4018  $output = call_user_func_array( $this->mTagHooks[$name],
4019  array( $content, $attributes, $this, $frame ) );
4020  } elseif ( isset( $this->mFunctionTagHooks[$name] ) ) {
4021  list( $callback, ) = $this->mFunctionTagHooks[$name];
4022  if ( !is_callable( $callback ) ) {
4023  throw new MWException( "Tag hook for $name is not callable\n" );
4024  }
4025 
4026  $output = call_user_func_array( $callback, array( &$this, $frame, $content, $attributes ) );
4027  } else {
4028  $output = '<span class="error">Invalid tag extension name: ' .
4029  htmlspecialchars( $name ) . '</span>';
4030  }
4031 
4032  if ( is_array( $output ) ) {
4033  # Extract flags to local scope (to override $markerType)
4034  $flags = $output;
4035  $output = $flags[0];
4036  unset( $flags[0] );
4037  extract( $flags );
4038  }
4039  } else {
4040  if ( is_null( $attrText ) ) {
4041  $attrText = '';
4042  }
4043  if ( isset( $params['attributes'] ) ) {
4044  foreach ( $params['attributes'] as $attrName => $attrValue ) {
4045  $attrText .= ' ' . htmlspecialchars( $attrName ) . '="' .
4046  htmlspecialchars( $attrValue ) . '"';
4047  }
4048  }
4049  if ( $content === null ) {
4050  $output = "<$name$attrText/>";
4051  } else {
4052  $close = is_null( $params['close'] ) ? '' : $frame->expand( $params['close'] );
4053  $output = "<$name$attrText>$content$close";
4054  }
4055  }
4056 
4057  if ( $markerType === 'none' ) {
4058  return $output;
4059  } elseif ( $markerType === 'nowiki' ) {
4060  $this->mStripState->addNoWiki( $marker, $output );
4061  } elseif ( $markerType === 'general' ) {
4062  $this->mStripState->addGeneral( $marker, $output );
4063  } else {
4064  throw new MWException( __METHOD__ . ': invalid marker type' );
4065  }
4066  return $marker;
4067  }
4068 
4076  function incrementIncludeSize( $type, $size ) {
4077  if ( $this->mIncludeSizes[$type] + $size > $this->mOptions->getMaxIncludeSize() ) {
4078  return false;
4079  } else {
4080  $this->mIncludeSizes[$type] += $size;
4081  return true;
4082  }
4083  }
4084 
4090  function incrementExpensiveFunctionCount() {
4091  $this->mExpensiveFunctionCount++;
4092  return $this->mExpensiveFunctionCount <= $this->mOptions->getExpensiveParserFunctionLimit();
4093  }
4094 
4103  function doDoubleUnderscore( $text ) {
4104  wfProfileIn( __METHOD__ );
4105 
4106  # The position of __TOC__ needs to be recorded
4107  $mw = MagicWord::get( 'toc' );
4108  if ( $mw->match( $text ) ) {
4109  $this->mShowToc = true;
4110  $this->mForceTocPosition = true;
4111 
4112  # Set a placeholder. At the end we'll fill it in with the TOC.
4113  $text = $mw->replace( '<!--MWTOC-->', $text, 1 );
4114 
4115  # Only keep the first one.
4116  $text = $mw->replace( '', $text );
4117  }
4118 
4119  # Now match and remove the rest of them
4121  $this->mDoubleUnderscores = $mwa->matchAndRemove( $text );
4122 
4123  if ( isset( $this->mDoubleUnderscores['nogallery'] ) ) {
4124  $this->mOutput->mNoGallery = true;
4125  }
4126  if ( isset( $this->mDoubleUnderscores['notoc'] ) && !$this->mForceTocPosition ) {
4127  $this->mShowToc = false;
4128  }
4129  if ( isset( $this->mDoubleUnderscores['hiddencat'] ) && $this->mTitle->getNamespace() == NS_CATEGORY ) {
4130  $this->addTrackingCategory( 'hidden-category-category' );
4131  }
4132  # (bug 8068) Allow control over whether robots index a page.
4133  #
4134  # @todo FIXME: Bug 14899: __INDEX__ always overrides __NOINDEX__ here! This
4135  # is not desirable, the last one on the page should win.
4136  if ( isset( $this->mDoubleUnderscores['noindex'] ) && $this->mTitle->canUseNoindex() ) {
4137  $this->mOutput->setIndexPolicy( 'noindex' );
4138  $this->addTrackingCategory( 'noindex-category' );
4139  }
4140  if ( isset( $this->mDoubleUnderscores['index'] ) && $this->mTitle->canUseNoindex() ) {
4141  $this->mOutput->setIndexPolicy( 'index' );
4142  $this->addTrackingCategory( 'index-category' );
4143  }
4144 
4145  # Cache all double underscores in the database
4146  foreach ( $this->mDoubleUnderscores as $key => $val ) {
4147  $this->mOutput->setProperty( $key, '' );
4148  }
4149 
4150  wfProfileOut( __METHOD__ );
4151  return $text;
4152  }
4153 
4165  public function addTrackingCategory( $msg ) {
4166  if ( $this->mTitle->getNamespace() === NS_SPECIAL ) {
4167  wfDebug( __METHOD__ . ": Not adding tracking category $msg to special page!\n" );
4168  return false;
4169  }
4170  // Important to parse with correct title (bug 31469)
4171  $cat = wfMessage( $msg )
4172  ->title( $this->getTitle() )
4173  ->inContentLanguage()
4174  ->text();
4175 
4176  # Allow tracking categories to be disabled by setting them to "-"
4177  if ( $cat === '-' ) {
4178  return false;
4179  }
4180 
4181  $containerCategory = Title::makeTitleSafe( NS_CATEGORY, $cat );
4182  if ( $containerCategory ) {
4183  $this->mOutput->addCategory( $containerCategory->getDBkey(), $this->getDefaultSort() );
4184  return true;
4185  } else {
4186  wfDebug( __METHOD__ . ": [[MediaWiki:$msg]] is not a valid title!\n" );
4187  return false;
4188  }
4189  }
4190 
4207  function formatHeadings( $text, $origText, $isMain = true ) {
4208  global $wgMaxTocLevel, $wgExperimentalHtmlIds;
4209 
4210  # Inhibit editsection links if requested in the page
4211  if ( isset( $this->mDoubleUnderscores['noeditsection'] ) ) {
4212  $maybeShowEditLink = $showEditLink = false;
4213  } else {
4214  $maybeShowEditLink = true; /* Actual presence will depend on ParserOptions option */
4215  $showEditLink = $this->mOptions->getEditSection();
4216  }
4217  if ( $showEditLink ) {
4218  $this->mOutput->setEditSectionTokens( true );
4219  }
4220 
4221  # Get all headlines for numbering them and adding funky stuff like [edit]
4222  # links - this is for later, but we need the number of headlines right now
4223  $matches = array();
4224  $numMatches = preg_match_all( '/<H(?P<level>[1-6])(?P<attrib>.*?' . '>)\s*(?P<header>[\s\S]*?)\s*<\/H[1-6] *>/i', $text, $matches );
4225 
4226  # if there are fewer than 4 headlines in the article, do not show TOC
4227  # unless it's been explicitly enabled.
4228  $enoughToc = $this->mShowToc &&
4229  ( ( $numMatches >= 4 ) || $this->mForceTocPosition );
4230 
4231  # Allow user to stipulate that a page should have a "new section"
4232  # link added via __NEWSECTIONLINK__
4233  if ( isset( $this->mDoubleUnderscores['newsectionlink'] ) ) {
4234  $this->mOutput->setNewSection( true );
4235  }
4236 
4237  # Allow user to remove the "new section"
4238  # link via __NONEWSECTIONLINK__
4239  if ( isset( $this->mDoubleUnderscores['nonewsectionlink'] ) ) {
4240  $this->mOutput->hideNewSection( true );
4241  }
4242 
4243  # if the string __FORCETOC__ (not case-sensitive) occurs in the HTML,
4244  # override above conditions and always show TOC above first header
4245  if ( isset( $this->mDoubleUnderscores['forcetoc'] ) ) {
4246  $this->mShowToc = true;
4247  $enoughToc = true;
4248  }
4249 
4250  # headline counter
4251  $headlineCount = 0;
4252  $numVisible = 0;
4253 
4254  # Ugh .. the TOC should have neat indentation levels which can be
4255  # passed to the skin functions. These are determined here
4256  $toc = '';
4257  $full = '';
4258  $head = array();
4259  $sublevelCount = array();
4260  $levelCount = array();
4261  $level = 0;
4262  $prevlevel = 0;
4263  $toclevel = 0;
4264  $prevtoclevel = 0;
4265  $markerRegex = "{$this->mUniqPrefix}-h-(\d+)-" . self::MARKER_SUFFIX;
4266  $baseTitleText = $this->mTitle->getPrefixedDBkey();
4267  $oldType = $this->mOutputType;
4268  $this->setOutputType( self::OT_WIKI );
4269  $frame = $this->getPreprocessor()->newFrame();
4270  $root = $this->preprocessToDom( $origText );
4271  $node = $root->getFirstChild();
4272  $byteOffset = 0;
4273  $tocraw = array();
4274  $refers = array();
4275 
4276  foreach ( $matches[3] as $headline ) {
4277  $isTemplate = false;
4278  $titleText = false;
4279  $sectionIndex = false;
4280  $numbering = '';
4281  $markerMatches = array();
4282  if ( preg_match( "/^$markerRegex/", $headline, $markerMatches ) ) {
4283  $serial = $markerMatches[1];
4284  list( $titleText, $sectionIndex ) = $this->mHeadings[$serial];
4285  $isTemplate = ( $titleText != $baseTitleText );
4286  $headline = preg_replace( "/^$markerRegex\\s*/", "", $headline );
4287  }
4288 
4289  if ( $toclevel ) {
4290  $prevlevel = $level;
4291  }
4292  $level = $matches[1][$headlineCount];
4293 
4294  if ( $level > $prevlevel ) {
4295  # Increase TOC level
4296  $toclevel++;
4297  $sublevelCount[$toclevel] = 0;
4298  if ( $toclevel < $wgMaxTocLevel ) {
4299  $prevtoclevel = $toclevel;
4300  $toc .= Linker::tocIndent();
4301  $numVisible++;
4302  }
4303  } elseif ( $level < $prevlevel && $toclevel > 1 ) {
4304  # Decrease TOC level, find level to jump to
4305 
4306  for ( $i = $toclevel; $i > 0; $i-- ) {
4307  if ( $levelCount[$i] == $level ) {
4308  # Found last matching level
4309  $toclevel = $i;
4310  break;
4311  } elseif ( $levelCount[$i] < $level ) {
4312  # Found first matching level below current level
4313  $toclevel = $i + 1;
4314  break;
4315  }
4316  }
4317  if ( $i == 0 ) {
4318  $toclevel = 1;
4319  }
4320  if ( $toclevel < $wgMaxTocLevel ) {
4321  if ( $prevtoclevel < $wgMaxTocLevel ) {
4322  # Unindent only if the previous toc level was shown :p
4323  $toc .= Linker::tocUnindent( $prevtoclevel - $toclevel );
4324  $prevtoclevel = $toclevel;
4325  } else {
4326  $toc .= Linker::tocLineEnd();
4327  }
4328  }
4329  } else {
4330  # No change in level, end TOC line
4331  if ( $toclevel < $wgMaxTocLevel ) {
4332  $toc .= Linker::tocLineEnd();
4333  }
4334  }
4335 
4336  $levelCount[$toclevel] = $level;
4337 
4338  # count number of headlines for each level
4339  $sublevelCount[$toclevel]++;
4340  $dot = 0;
4341  for ( $i = 1; $i <= $toclevel; $i++ ) {
4342  if ( !empty( $sublevelCount[$i] ) ) {
4343  if ( $dot ) {
4344  $numbering .= '.';
4345  }
4346  $numbering .= $this->getTargetLanguage()->formatNum( $sublevelCount[$i] );
4347  $dot = 1;
4348  }
4349  }
4350 
4351  # The safe header is a version of the header text safe to use for links
4352 
4353  # Remove link placeholders by the link text.
4354  # <!--LINK number-->
4355  # turns into
4356  # link text with suffix
4357  # Do this before unstrip since link text can contain strip markers
4358  $safeHeadline = $this->replaceLinkHoldersText( $headline );
4359 
4360  # Avoid insertion of weird stuff like <math> by expanding the relevant sections
4361  $safeHeadline = $this->mStripState->unstripBoth( $safeHeadline );
4362 
4363  # Strip out HTML (first regex removes any tag not allowed)
4364  # Allowed tags are:
4365  # * <sup> and <sub> (bug 8393)
4366  # * <i> (bug 26375)
4367  # * <b> (r105284)
4368  # * <span dir="rtl"> and <span dir="ltr"> (bug 35167)
4369  #
4370  # We strip any parameter from accepted tags (second regex), except dir="rtl|ltr" from <span>,
4371  # to allow setting directionality in toc items.
4372  $tocline = preg_replace(
4373  array( '#<(?!/?(span|sup|sub|i|b)(?: [^>]*)?>).*?' . '>#', '#<(/?(?:span(?: dir="(?:rtl|ltr)")?|sup|sub|i|b))(?: .*?)?' . '>#' ),
4374  array( '', '<$1>' ),
4375  $safeHeadline
4376  );
4377  $tocline = trim( $tocline );
4378 
4379  # For the anchor, strip out HTML-y stuff period
4380  $safeHeadline = preg_replace( '/<.*?' . '>/', '', $safeHeadline );
4381  $safeHeadline = Sanitizer::normalizeSectionNameWhitespace( $safeHeadline );
4382 
4383  # Save headline for section edit hint before it's escaped
4384  $headlineHint = $safeHeadline;
4385 
4386  if ( $wgExperimentalHtmlIds ) {
4387  # For reverse compatibility, provide an id that's
4388  # HTML4-compatible, like we used to.
4389  #
4390  # It may be worth noting, academically, that it's possible for
4391  # the legacy anchor to conflict with a non-legacy headline
4392  # anchor on the page. In this case likely the "correct" thing
4393  # would be to either drop the legacy anchors or make sure
4394  # they're numbered first. However, this would require people
4395  # to type in section names like "abc_.D7.93.D7.90.D7.A4"
4396  # manually, so let's not bother worrying about it.
4397  $legacyHeadline = Sanitizer::escapeId( $safeHeadline,
4398  array( 'noninitial', 'legacy' ) );
4399  $safeHeadline = Sanitizer::escapeId( $safeHeadline );
4400 
4401  if ( $legacyHeadline == $safeHeadline ) {
4402  # No reason to have both (in fact, we can't)
4403  $legacyHeadline = false;
4404  }
4405  } else {
4406  $legacyHeadline = false;
4407  $safeHeadline = Sanitizer::escapeId( $safeHeadline,
4408  'noninitial' );
4409  }
4410 
4411  # HTML names must be case-insensitively unique (bug 10721).
4412  # This does not apply to Unicode characters per
4413  # http://dev.w3.org/html5/spec/infrastructure.html#case-sensitivity-and-string-comparison
4414  # @todo FIXME: We may be changing them depending on the current locale.
4415  $arrayKey = strtolower( $safeHeadline );
4416  if ( $legacyHeadline === false ) {
4417  $legacyArrayKey = false;
4418  } else {
4419  $legacyArrayKey = strtolower( $legacyHeadline );
4420  }
4421 
4422  # count how many in assoc. array so we can track dupes in anchors
4423  if ( isset( $refers[$arrayKey] ) ) {
4424  $refers[$arrayKey]++;
4425  } else {
4426  $refers[$arrayKey] = 1;
4427  }
4428  if ( isset( $refers[$legacyArrayKey] ) ) {
4429  $refers[$legacyArrayKey]++;
4430  } else {
4431  $refers[$legacyArrayKey] = 1;
4432  }
4433 
4434  # Don't number the heading if it is the only one (looks silly)
4435  if ( count( $matches[3] ) > 1 && $this->mOptions->getNumberHeadings() ) {
4436  # the two are different if the line contains a link
4437  $headline = Html::element( 'span', array( 'class' => 'mw-headline-number' ), $numbering ) . ' ' . $headline;
4438  }
4439 
4440  # Create the anchor for linking from the TOC to the section
4441  $anchor = $safeHeadline;
4442  $legacyAnchor = $legacyHeadline;
4443  if ( $refers[$arrayKey] > 1 ) {
4444  $anchor .= '_' . $refers[$arrayKey];
4445  }
4446  if ( $legacyHeadline !== false && $refers[$legacyArrayKey] > 1 ) {
4447  $legacyAnchor .= '_' . $refers[$legacyArrayKey];
4448  }
4449  if ( $enoughToc && ( !isset( $wgMaxTocLevel ) || $toclevel < $wgMaxTocLevel ) ) {
4450  $toc .= Linker::tocLine( $anchor, $tocline,
4451  $numbering, $toclevel, ( $isTemplate ? false : $sectionIndex ) );
4452  }
4453 
4454  # Add the section to the section tree
4455  # Find the DOM node for this header
4456  $noOffset = ( $isTemplate || $sectionIndex === false );
4457  while ( $node && !$noOffset ) {
4458  if ( $node->getName() === 'h' ) {
4459  $bits = $node->splitHeading();
4460  if ( $bits['i'] == $sectionIndex ) {
4461  break;
4462  }
4463  }
4464  $byteOffset += mb_strlen( $this->mStripState->unstripBoth(
4465  $frame->expand( $node, PPFrame::RECOVER_ORIG ) ) );
4466  $node = $node->getNextSibling();
4467  }
4468  $tocraw[] = array(
4469  'toclevel' => $toclevel,
4470  'level' => $level,
4471  'line' => $tocline,
4472  'number' => $numbering,
4473  'index' => ( $isTemplate ? 'T-' : '' ) . $sectionIndex,
4474  'fromtitle' => $titleText,
4475  'byteoffset' => ( $noOffset ? null : $byteOffset ),
4476  'anchor' => $anchor,
4477  );
4478 
4479  # give headline the correct <h#> tag
4480  if ( $maybeShowEditLink && $sectionIndex !== false ) {
4481  // Output edit section links as markers with styles that can be customized by skins
4482  if ( $isTemplate ) {
4483  # Put a T flag in the section identifier, to indicate to extractSections()
4484  # that sections inside <includeonly> should be counted.
4485  $editlinkArgs = array( $titleText, "T-$sectionIndex"/*, null */ );
4486  } else {
4487  $editlinkArgs = array( $this->mTitle->getPrefixedText(), $sectionIndex, $headlineHint );
4488  }
4489  // We use a bit of pesudo-xml for editsection markers. The language converter is run later on
4490  // Using a UNIQ style marker leads to the converter screwing up the tokens when it converts stuff
4491  // And trying to insert strip tags fails too. At this point all real inputted tags have already been escaped
4492  // so we don't have to worry about a user trying to input one of these markers directly.
4493  // We use a page and section attribute to stop the language converter from converting these important bits
4494  // of data, but put the headline hint inside a content block because the language converter is supposed to
4495  // be able to convert that piece of data.
4496  $editlink = '<mw:editsection page="' . htmlspecialchars( $editlinkArgs[0] );
4497  $editlink .= '" section="' . htmlspecialchars( $editlinkArgs[1] ) . '"';
4498  if ( isset( $editlinkArgs[2] ) ) {
4499  $editlink .= '>' . $editlinkArgs[2] . '</mw:editsection>';
4500  } else {
4501  $editlink .= '/>';
4502  }
4503  } else {
4504  $editlink = '';
4505  }
4506  $head[$headlineCount] = Linker::makeHeadline( $level,
4507  $matches['attrib'][$headlineCount], $anchor, $headline,
4508  $editlink, $legacyAnchor );
4509 
4510  $headlineCount++;
4511  }
4512 
4513  $this->setOutputType( $oldType );
4514 
4515  # Never ever show TOC if no headers
4516  if ( $numVisible < 1 ) {
4517  $enoughToc = false;
4518  }
4519 
4520  if ( $enoughToc ) {
4521  if ( $prevtoclevel > 0 && $prevtoclevel < $wgMaxTocLevel ) {
4522  $toc .= Linker::tocUnindent( $prevtoclevel - 1 );
4523  }
4524  $toc = Linker::tocList( $toc, $this->mOptions->getUserLangObj() );
4525  $this->mOutput->setTOCHTML( $toc );
4526  $toc = self::TOC_START . $toc . self::TOC_END;
4527  }
4528 
4529  if ( $isMain ) {
4530  $this->mOutput->setSections( $tocraw );
4531  }
4532 
4533  # split up and insert constructed headlines
4534  $blocks = preg_split( '/<H[1-6].*?' . '>[\s\S]*?<\/H[1-6]>/i', $text );
4535  $i = 0;
4536 
4537  // build an array of document sections
4538  $sections = array();
4539  foreach ( $blocks as $block ) {
4540  // $head is zero-based, sections aren't.
4541  if ( empty( $head[$i - 1] ) ) {
4542  $sections[$i] = $block;
4543  } else {
4544  $sections[$i] = $head[$i - 1] . $block;
4545  }
4546 
4557  wfRunHooks( 'ParserSectionCreate', array( $this, $i, &$sections[$i], $showEditLink ) );
4558 
4559  $i++;
4560  }
4561 
4562  if ( $enoughToc && $isMain && !$this->mForceTocPosition ) {
4563  // append the TOC at the beginning
4564  // Top anchor now in skin
4565  $sections[0] = $sections[0] . $toc . "\n";
4566  }
4567 
4568  $full .= join( '', $sections );
4569 
4570  if ( $this->mForceTocPosition ) {
4571  return str_replace( '<!--MWTOC-->', $toc, $full );
4572  } else {
4573  return $full;
4574  }
4575  }
4576 
4588  public function preSaveTransform( $text, Title $title, User $user, ParserOptions $options, $clearState = true ) {
4589  $this->startParse( $title, $options, self::OT_WIKI, $clearState );
4590  $this->setUser( $user );
4591 
4592  $pairs = array(
4593  "\r\n" => "\n",
4594  );
4595  $text = str_replace( array_keys( $pairs ), array_values( $pairs ), $text );
4596  if ( $options->getPreSaveTransform() ) {
4597  $text = $this->pstPass2( $text, $user );
4598  }
4599  $text = $this->mStripState->unstripBoth( $text );
4600 
4601  $this->setUser( null ); #Reset
4602 
4603  return $text;
4604  }
4605 
4614  private function pstPass2( $text, $user ) {
4616 
4617  # Note: This is the timestamp saved as hardcoded wikitext to
4618  # the database, we use $wgContLang here in order to give
4619  # everyone the same signature and use the default one rather
4620  # than the one selected in each user's preferences.
4621  # (see also bug 12815)
4622  $ts = $this->mOptions->getTimestamp();
4624  $ts = $timestamp->format( 'YmdHis' );
4625  $tzMsg = $timestamp->format( 'T' ); # might vary on DST changeover!
4626 
4627  # Allow translation of timezones through wiki. format() can return
4628  # whatever crap the system uses, localised or not, so we cannot
4629  # ship premade translations.
4630  $key = 'timezone-' . strtolower( trim( $tzMsg ) );
4631  $msg = wfMessage( $key )->inContentLanguage();
4632  if ( $msg->exists() ) {
4633  $tzMsg = $msg->text();
4634  }
4635 
4636  $d = $wgContLang->timeanddate( $ts, false, false ) . " ($tzMsg)";
4637 
4638  # Variable replacement
4639  # Because mOutputType is OT_WIKI, this will only process {{subst:xxx}} type tags
4640  $text = $this->replaceVariables( $text );
4641 
4642  # This works almost by chance, as the replaceVariables are done before the getUserSig(),
4643  # which may corrupt this parser instance via its wfMessage()->text() call-
4644 
4645  # Signatures
4646  $sigText = $this->getUserSig( $user );
4647  $text = strtr( $text, array(
4648  '~~~~~' => $d,
4649  '~~~~' => "$sigText $d",
4650  '~~~' => $sigText
4651  ) );
4652 
4653  # Context links ("pipe tricks"): [[|name]] and [[name (context)|]]
4654  $tc = '[' . Title::legalChars() . ']';
4655  $nc = '[ _0-9A-Za-z\x80-\xff-]'; # Namespaces can use non-ascii!
4656 
4657  $p1 = "/\[\[(:?$nc+:|:|)($tc+?)( ?\\($tc+\\))\\|]]/"; # [[ns:page (context)|]]
4658  $p4 = "/\[\[(:?$nc+:|:|)($tc+?)( ?($tc+))\\|]]/"; # [[ns:page(context)|]] (double-width brackets, added in r40257)
4659  $p3 = "/\[\[(:?$nc+:|:|)($tc+?)( ?\\($tc+\\)|)((?:, |,)$tc+|)\\|]]/"; # [[ns:page (context), context|]] (using either single or double-width comma)
4660  $p2 = "/\[\[\\|($tc+)]]/"; # [[|page]] (reverse pipe trick: add context from page title)
4661 
4662  # try $p1 first, to turn "[[A, B (C)|]]" into "[[A, B (C)|A, B]]"
4663  $text = preg_replace( $p1, '[[\\1\\2\\3|\\2]]', $text );
4664  $text = preg_replace( $p4, '[[\\1\\2\\3|\\2]]', $text );
4665  $text = preg_replace( $p3, '[[\\1\\2\\3\\4|\\2]]', $text );
4666 
4667  $t = $this->mTitle->getText();
4668  $m = array();
4669  if ( preg_match( "/^($nc+:|)$tc+?( \\($tc+\\))$/", $t, $m ) ) {
4670  $text = preg_replace( $p2, "[[$m[1]\\1$m[2]|\\1]]", $text );
4671  } elseif ( preg_match( "/^($nc+:|)$tc+?(, $tc+|)$/", $t, $m ) && "$m[1]$m[2]" != '' ) {
4672  $text = preg_replace( $p2, "[[$m[1]\\1$m[2]|\\1]]", $text );
4673  } else {
4674  # if there's no context, don't bother duplicating the title
4675  $text = preg_replace( $p2, '[[\\1]]', $text );
4676  }
4677 
4678  # Trim trailing whitespace
4679  $text = rtrim( $text );
4680 
4681  return $text;
4682  }
4683 
4698  function getUserSig( &$user, $nickname = false, $fancySig = null ) {
4699  global $wgMaxSigChars;
4700 
4701  $username = $user->getName();
4702 
4703  # If not given, retrieve from the user object.
4704  if ( $nickname === false ) {
4705  $nickname = $user->getOption( 'nickname' );
4706  }
4707 
4708  if ( is_null( $fancySig ) ) {
4709  $fancySig = $user->getBoolOption( 'fancysig' );
4710  }
4711 
4712  $nickname = $nickname == null ? $username : $nickname;
4713 
4714  if ( mb_strlen( $nickname ) > $wgMaxSigChars ) {
4715  $nickname = $username;
4716  wfDebug( __METHOD__ . ": $username has overlong signature.\n" );
4717  } elseif ( $fancySig !== false ) {
4718  # Sig. might contain markup; validate this
4719  if ( $this->validateSig( $nickname ) !== false ) {
4720  # Validated; clean up (if needed) and return it
4721  return $this->cleanSig( $nickname, true );
4722  } else {
4723  # Failed to validate; fall back to the default
4724  $nickname = $username;
4725  wfDebug( __METHOD__ . ": $username has bad XML tags in signature.\n" );
4726  }
4727  }
4728 
4729  # Make sure nickname doesnt get a sig in a sig
4730  $nickname = self::cleanSigInSig( $nickname );
4731 
4732  # If we're still here, make it a link to the user page
4733  $userText = wfEscapeWikiText( $username );
4734  $nickText = wfEscapeWikiText( $nickname );
4735  $msgName = $user->isAnon() ? 'signature-anon' : 'signature';
4736 
4737  return wfMessage( $msgName, $userText, $nickText )->inContentLanguage()->title( $this->getTitle() )->text();
4738  }
4739 
4746  function validateSig( $text ) {
4747  return Xml::isWellFormedXmlFragment( $text ) ? $text : false;
4748  }
4749 
4760  public function cleanSig( $text, $parsing = false ) {
4761  if ( !$parsing ) {
4762  global $wgTitle;
4763  $this->startParse( $wgTitle, new ParserOptions, self::OT_PREPROCESS, true );
4764  }
4765 
4766  # Option to disable this feature
4767  if ( !$this->mOptions->getCleanSignatures() ) {
4768  return $text;
4769  }
4770 
4771  # @todo FIXME: Regex doesn't respect extension tags or nowiki
4772  # => Move this logic to braceSubstitution()
4773  $substWord = MagicWord::get( 'subst' );
4774  $substRegex = '/\{\{(?!(?:' . $substWord->getBaseRegex() . '))/x' . $substWord->getRegexCase();
4775  $substText = '{{' . $substWord->getSynonym( 0 );
4776 
4777  $text = preg_replace( $substRegex, $substText, $text );
4778  $text = self::cleanSigInSig( $text );
4779  $dom = $this->preprocessToDom( $text );
4780  $frame = $this->getPreprocessor()->newFrame();
4781  $text = $frame->expand( $dom );
4782 
4783  if ( !$parsing ) {
4784  $text = $this->mStripState->unstripBoth( $text );
4785  }
4786 
4787  return $text;
4788  }
4789 
4796  public static function cleanSigInSig( $text ) {
4797  $text = preg_replace( '/~{3,5}/', '', $text );
4798  return $text;
4799  }
4800 
4810  public function startExternalParse( Title $title = null, ParserOptions $options, $outputType, $clearState = true ) {
4811  $this->startParse( $title, $options, $outputType, $clearState );
4812  }
4813 
4820  private function startParse( Title $title = null, ParserOptions $options, $outputType, $clearState = true ) {
4821  $this->setTitle( $title );
4822  $this->mOptions = $options;
4823  $this->setOutputType( $outputType );
4824  if ( $clearState ) {
4825  $this->clearState();
4826  }
4827  }
4828 
4837  public function transformMsg( $text, $options, $title = null ) {
4838  static $executing = false;
4839 
4840  # Guard against infinite recursion
4841  if ( $executing ) {
4842  return $text;
4843  }
4844  $executing = true;
4845 
4846  wfProfileIn( __METHOD__ );
4847  if ( !$title ) {
4848  global $wgTitle;
4849  $title = $wgTitle;
4850  }
4851 
4852  $text = $this->preprocess( $text, $title, $options );
4853 
4854  $executing = false;
4855  wfProfileOut( __METHOD__ );
4856  return $text;
4857  }
4858 
4883  public function setHook( $tag, $callback ) {
4884  $tag = strtolower( $tag );
4885  if ( preg_match( '/[<>\r\n]/', $tag, $m ) ) {
4886  throw new MWException( "Invalid character {$m[0]} in setHook('$tag', ...) call" );
4887  }
4888  $oldVal = isset( $this->mTagHooks[$tag] ) ? $this->mTagHooks[$tag] : null;
4889  $this->mTagHooks[$tag] = $callback;
4890  if ( !in_array( $tag, $this->mStripList ) ) {
4891  $this->mStripList[] = $tag;
4892  }
4893 
4894  return $oldVal;
4895  }
4896 
4914  function setTransparentTagHook( $tag, $callback ) {
4915  $tag = strtolower( $tag );
4916  if ( preg_match( '/[<>\r\n]/', $tag, $m ) ) {
4917  throw new MWException( "Invalid character {$m[0]} in setTransparentHook('$tag', ...) call" );
4918  }
4919  $oldVal = isset( $this->mTransparentTagHooks[$tag] ) ? $this->mTransparentTagHooks[$tag] : null;
4920  $this->mTransparentTagHooks[$tag] = $callback;
4921 
4922  return $oldVal;
4923  }
4924 
4928  function clearTagHooks() {
4929  $this->mTagHooks = array();
4930  $this->mFunctionTagHooks = array();
4931  $this->mStripList = $this->mDefaultStripList;
4932  }
4933 
4977  public function setFunctionHook( $id, $callback, $flags = 0 ) {
4979 
4980  $oldVal = isset( $this->mFunctionHooks[$id] ) ? $this->mFunctionHooks[$id][0] : null;
4981  $this->mFunctionHooks[$id] = array( $callback, $flags );
4982 
4983  # Add to function cache
4984  $mw = MagicWord::get( $id );
4985  if ( !$mw ) {
4986  throw new MWException( __METHOD__ . '() expecting a magic word identifier.' );
4987  }
4988 
4989  $synonyms = $mw->getSynonyms();
4990  $sensitive = intval( $mw->isCaseSensitive() );
4991 
4992  foreach ( $synonyms as $syn ) {
4993  # Case
4994  if ( !$sensitive ) {
4995  $syn = $wgContLang->lc( $syn );
4996  }
4997  # Add leading hash
4998  if ( !( $flags & SFH_NO_HASH ) ) {
4999  $syn = '#' . $syn;
5000  }
5001  # Remove trailing colon
5002  if ( substr( $syn, -1, 1 ) === ':' ) {
5003  $syn = substr( $syn, 0, -1 );
5004  }
5005  $this->mFunctionSynonyms[$sensitive][$syn] = $id;
5006  }
5007  return $oldVal;
5008  }
5009 
5015  function getFunctionHooks() {
5016  return array_keys( $this->mFunctionHooks );
5017  }
5018 
5029  function setFunctionTagHook( $tag, $callback, $flags ) {
5030  $tag = strtolower( $tag );
5031  if ( preg_match( '/[<>\r\n]/', $tag, $m ) ) {
5032  throw new MWException( "Invalid character {$m[0]} in setFunctionTagHook('$tag', ...) call" );
5033  }
5034  $old = isset( $this->mFunctionTagHooks[$tag] ) ?
5035  $this->mFunctionTagHooks[$tag] : null;
5036  $this->mFunctionTagHooks[$tag] = array( $callback, $flags );
5037 
5038  if ( !in_array( $tag, $this->mStripList ) ) {
5039  $this->mStripList[] = $tag;
5040  }
5041 
5042  return $old;
5043  }
5044 
5055  function replaceLinkHolders( &$text, $options = 0 ) {
5056  return $this->mLinkHolders->replace( $text );
5057  }
5058 
5066  function replaceLinkHoldersText( $text ) {
5067  return $this->mLinkHolders->replaceText( $text );
5068  }
5069 
5083  function renderImageGallery( $text, $params ) {
5084  wfProfileIn( __METHOD__ );
5085 
5086  $mode = false;
5087  if ( isset( $params['mode'] ) ) {
5088  $mode = $params['mode'];
5089  }
5090 
5091  try {
5092  $ig = ImageGalleryBase::factory( $mode );
5093  } catch ( MWException $e ) {
5094  // If invalid type set, fallback to default.
5095  $ig = ImageGalleryBase::factory( false );
5096  }
5097 
5098  $ig->setContextTitle( $this->mTitle );
5099  $ig->setShowBytes( false );
5100  $ig->setShowFilename( false );
5101  $ig->setParser( $this );
5102  $ig->setHideBadImages();
5103  $ig->setAttributes( Sanitizer::validateTagAttributes( $params, 'table' ) );
5104 
5105  if ( isset( $params['showfilename'] ) ) {
5106  $ig->setShowFilename( true );
5107  } else {
5108  $ig->setShowFilename( false );
5109  }
5110  if ( isset( $params['caption'] ) ) {
5111  $caption = $params['caption'];
5112  $caption = htmlspecialchars( $caption );
5113  $caption = $this->replaceInternalLinks( $caption );
5114  $ig->setCaptionHtml( $caption );
5115  }
5116  if ( isset( $params['perrow'] ) ) {
5117  $ig->setPerRow( $params['perrow'] );
5118  }
5119  if ( isset( $params['widths'] ) ) {
5120  $ig->setWidths( $params['widths'] );
5121  }
5122  if ( isset( $params['heights'] ) ) {
5123  $ig->setHeights( $params['heights'] );
5124  }
5125  $ig->setAdditionalOptions( $params );
5126 
5127  wfRunHooks( 'BeforeParserrenderImageGallery', array( &$this, &$ig ) );
5128 
5129  $lines = StringUtils::explode( "\n", $text );
5130  foreach ( $lines as $line ) {
5131  # match lines like these:
5132  # Image:someimage.jpg|This is some image
5133  $matches = array();
5134  preg_match( "/^([^|]+)(\\|(.*))?$/", $line, $matches );
5135  # Skip empty lines
5136  if ( count( $matches ) == 0 ) {
5137  continue;
5138  }
5139 
5140  if ( strpos( $matches[0], '%' ) !== false ) {
5141  $matches[1] = rawurldecode( $matches[1] );
5142  }
5144  if ( is_null( $title ) ) {
5145  # Bogus title. Ignore these so we don't bomb out later.
5146  continue;
5147  }
5148 
5149  # We need to get what handler the file uses, to figure out parameters.
5150  # Note, a hook can overide the file name, and chose an entirely different
5151  # file (which potentially could be of a different type and have different handler).
5152  $options = array();
5153  $descQuery = false;
5154  wfRunHooks( 'BeforeParserFetchFileAndTitle',
5155  array( $this, $title, &$options, &$descQuery ) );
5156  # Don't register it now, as ImageGallery does that later.
5157  $file = $this->fetchFileNoRegister( $title, $options );
5158  $handler = $file ? $file->getHandler() : false;
5159 
5160  wfProfileIn( __METHOD__ . '-getMagicWord' );
5161  $paramMap = array(
5162  'img_alt' => 'gallery-internal-alt',
5163  'img_link' => 'gallery-internal-link',
5164  );
5165  if ( $handler ) {
5166  $paramMap = $paramMap + $handler->getParamMap();
5167  // We don't want people to specify per-image widths.
5168  // Additionally the width parameter would need special casing anyhow.
5169  unset( $paramMap['img_width'] );
5170  }
5171 
5172  $mwArray = new MagicWordArray( array_keys( $paramMap ) );
5173  wfProfileOut( __METHOD__ . '-getMagicWord' );
5174 
5175  $label = '';
5176  $alt = '';
5177  $link = '';
5178  $handlerOptions = array();
5179  if ( isset( $matches[3] ) ) {
5180  // look for an |alt= definition while trying not to break existing
5181  // captions with multiple pipes (|) in it, until a more sensible grammar
5182  // is defined for images in galleries
5183 
5184  // FIXME: Doing recursiveTagParse at this stage, and the trim before
5185  // splitting on '|' is a bit odd, and different from makeImage.
5186  $matches[3] = $this->recursiveTagParse( trim( $matches[3] ) );
5187  $parameterMatches = StringUtils::explode( '|', $matches[3] );
5188 
5189  foreach ( $parameterMatches as $parameterMatch ) {
5190  list( $magicName, $match ) = $mwArray->matchVariableStartToEnd( $parameterMatch );
5191  if ( $magicName ) {
5192  $paramName = $paramMap[$magicName];
5193 
5194  switch ( $paramName ) {
5195  case 'gallery-internal-alt':
5196  $alt = $this->stripAltText( $match, false );
5197  break;
5198  case 'gallery-internal-link':
5199  $linkValue = strip_tags( $this->replaceLinkHoldersText( $match ) );
5200  $chars = self::EXT_LINK_URL_CLASS;
5201  $prots = $this->mUrlProtocols;
5202  //check to see if link matches an absolute url, if not then it must be a wiki link.
5203  if ( preg_match( "/^($prots)$chars+$/u", $linkValue ) ) {
5204  $link = $linkValue;
5205  } else {
5206  $localLinkTitle = Title::newFromText( $linkValue );
5207  if ( $localLinkTitle !== null ) {
5208  $link = $localLinkTitle->getLocalURL();
5209  }
5210  }
5211  break;
5212  default:
5213  // Must be a handler specific parameter.
5214  if ( $handler->validateParam( $paramName, $match ) ) {
5215  $handlerOptions[$paramName] = $match;
5216  } else {
5217  // Guess not. Append it to the caption.
5218  wfDebug( "$parameterMatch failed parameter validation\n" );
5219  $label .= '|' . $parameterMatch;
5220  }
5221  }
5222 
5223  } else {
5224  // concatenate all other pipes
5225  $label .= '|' . $parameterMatch;
5226  }
5227  }
5228  // remove the first pipe
5229  $label = substr( $label, 1 );
5230  }
5231 
5232  $ig->add( $title, $label, $alt, $link, $handlerOptions );
5233  }
5234  $html = $ig->toHTML();
5235  wfProfileOut( __METHOD__ );
5236  return $html;
5237  }
5238 
5243  function getImageParams( $handler ) {
5244  if ( $handler ) {
5245  $handlerClass = get_class( $handler );
5246  } else {
5247  $handlerClass = '';
5248  }
5249  if ( !isset( $this->mImageParams[$handlerClass] ) ) {
5250  # Initialise static lists
5251  static $internalParamNames = array(
5252  'horizAlign' => array( 'left', 'right', 'center', 'none' ),
5253  'vertAlign' => array( 'baseline', 'sub', 'super', 'top', 'text-top', 'middle',
5254  'bottom', 'text-bottom' ),
5255  'frame' => array( 'thumbnail', 'manualthumb', 'framed', 'frameless',
5256  'upright', 'border', 'link', 'alt', 'class' ),
5257  );
5258  static $internalParamMap;
5259  if ( !$internalParamMap ) {
5260  $internalParamMap = array();
5261  foreach ( $internalParamNames as $type => $names ) {
5262  foreach ( $names as $name ) {
5263  $magicName = str_replace( '-', '_', "img_$name" );
5264  $internalParamMap[$magicName] = array( $type, $name );
5265  }
5266  }
5267  }
5268 
5269  # Add handler params
5270  $paramMap = $internalParamMap;
5271  if ( $handler ) {
5272  $handlerParamMap = $handler->getParamMap();
5273  foreach ( $handlerParamMap as $magic => $paramName ) {
5274  $paramMap[$magic] = array( 'handler', $paramName );
5275  }
5276  }
5277  $this->mImageParams[$handlerClass] = $paramMap;
5278  $this->mImageParamsMagicArray[$handlerClass] = new MagicWordArray( array_keys( $paramMap ) );
5279  }
5280  return array( $this->mImageParams[$handlerClass], $this->mImageParamsMagicArray[$handlerClass] );
5281  }
5282 
5291  function makeImage( $title, $options, $holders = false ) {
5292  # Check if the options text is of the form "options|alt text"
5293  # Options are:
5294  # * thumbnail make a thumbnail with enlarge-icon and caption, alignment depends on lang
5295  # * left no resizing, just left align. label is used for alt= only
5296  # * right same, but right aligned
5297  # * none same, but not aligned
5298  # * ___px scale to ___ pixels width, no aligning. e.g. use in taxobox
5299  # * center center the image
5300  # * frame Keep original image size, no magnify-button.
5301  # * framed Same as "frame"
5302  # * frameless like 'thumb' but without a frame. Keeps user preferences for width
5303  # * upright reduce width for upright images, rounded to full __0 px
5304  # * border draw a 1px border around the image
5305  # * alt Text for HTML alt attribute (defaults to empty)
5306  # * class Set a class for img node
5307  # * link Set the target of the image link. Can be external, interwiki, or local
5308  # vertical-align values (no % or length right now):
5309  # * baseline
5310  # * sub
5311  # * super
5312  # * top
5313  # * text-top
5314  # * middle
5315  # * bottom
5316  # * text-bottom
5317 
5318  $parts = StringUtils::explode( "|", $options );
5319 
5320  # Give extensions a chance to select the file revision for us
5321  $options = array();
5322  $descQuery = false;
5323  wfRunHooks( 'BeforeParserFetchFileAndTitle',
5324  array( $this, $title, &$options, &$descQuery ) );
5325  # Fetch and register the file (file title may be different via hooks)
5326  list( $file, $title ) = $this->fetchFileAndTitle( $title, $options );
5327 
5328  # Get parameter map
5329  $handler = $file ? $file->getHandler() : false;
5330 
5331  list( $paramMap, $mwArray ) = $this->getImageParams( $handler );
5332 
5333  if ( !$file ) {
5334  $this->addTrackingCategory( 'broken-file-category' );
5335  }
5336 
5337  # Process the input parameters
5338  $caption = '';
5339  $params = array( 'frame' => array(), 'handler' => array(),
5340  'horizAlign' => array(), 'vertAlign' => array() );
5341  foreach ( $parts as $part ) {
5342  $part = trim( $part );
5343  list( $magicName, $value ) = $mwArray->matchVariableStartToEnd( $part );
5344  $validated = false;
5345  if ( isset( $paramMap[$magicName] ) ) {
5346  list( $type, $paramName ) = $paramMap[$magicName];
5347 
5348  # Special case; width and height come in one variable together
5349  if ( $type === 'handler' && $paramName === 'width' ) {
5350  $parsedWidthParam = $this->parseWidthParam( $value );
5351  if ( isset( $parsedWidthParam['width'] ) ) {
5352  $width = $parsedWidthParam['width'];
5353  if ( $handler->validateParam( 'width', $width ) ) {
5354  $params[$type]['width'] = $width;
5355  $validated = true;
5356  }
5357  }
5358  if ( isset( $parsedWidthParam['height'] ) ) {
5359  $height = $parsedWidthParam['height'];
5360  if ( $handler->validateParam( 'height', $height ) ) {
5361  $params[$type]['height'] = $height;
5362  $validated = true;
5363  }
5364  }
5365  # else no validation -- bug 13436
5366  } else {
5367  if ( $type === 'handler' ) {
5368  # Validate handler parameter
5369  $validated = $handler->validateParam( $paramName, $value );
5370  } else {
5371  # Validate internal parameters
5372  switch ( $paramName ) {
5373  case 'manualthumb':
5374  case 'alt':
5375  case 'class':
5376  # @todo FIXME: Possibly check validity here for
5377  # manualthumb? downstream behavior seems odd with
5378  # missing manual thumbs.
5379  $validated = true;
5380  $value = $this->stripAltText( $value, $holders );
5381  break;
5382  case 'link':
5383  $chars = self::EXT_LINK_URL_CLASS;
5384  $prots = $this->mUrlProtocols;
5385  if ( $value === '' ) {
5386  $paramName = 'no-link';
5387  $value = true;
5388  $validated = true;
5389  } elseif ( preg_match( "/^(?i)$prots/", $value ) ) {
5390  if ( preg_match( "/^((?i)$prots)$chars+$/u", $value, $m ) ) {
5391  $paramName = 'link-url';
5392  $this->mOutput->addExternalLink( $value );
5393  if ( $this->mOptions->getExternalLinkTarget() ) {
5394  $params[$type]['link-target'] = $this->mOptions->getExternalLinkTarget();
5395  }
5396  $validated = true;
5397  }
5398  } else {
5399  $linkTitle = Title::newFromText( $value );
5400  if ( $linkTitle ) {
5401  $paramName = 'link-title';
5402  $value = $linkTitle;
5403  $this->mOutput->addLink( $linkTitle );
5404  $validated = true;
5405  }
5406  }
5407  break;
5408  default:
5409  # Most other things appear to be empty or numeric...
5410  $validated = ( $value === false || is_numeric( trim( $value ) ) );
5411  }
5412  }
5413 
5414  if ( $validated ) {
5415  $params[$type][$paramName] = $value;
5416  }
5417  }
5418  }
5419  if ( !$validated ) {
5420  $caption = $part;
5421  }
5422  }
5423 
5424  # Process alignment parameters
5425  if ( $params['horizAlign'] ) {
5426  $params['frame']['align'] = key( $params['horizAlign'] );
5427  }
5428  if ( $params['vertAlign'] ) {
5429  $params['frame']['valign'] = key( $params['vertAlign'] );
5430  }
5431 
5432  $params['frame']['caption'] = $caption;
5433 
5434  # Will the image be presented in a frame, with the caption below?
5435  $imageIsFramed = isset( $params['frame']['frame'] )
5436  || isset( $params['frame']['framed'] )
5437  || isset( $params['frame']['thumbnail'] )
5438  || isset( $params['frame']['manualthumb'] );
5439 
5440  # In the old days, [[Image:Foo|text...]] would set alt text. Later it
5441  # came to also set the caption, ordinary text after the image -- which
5442  # makes no sense, because that just repeats the text multiple times in
5443  # screen readers. It *also* came to set the title attribute.
5444  #
5445  # Now that we have an alt attribute, we should not set the alt text to
5446  # equal the caption: that's worse than useless, it just repeats the
5447  # text. This is the framed/thumbnail case. If there's no caption, we
5448  # use the unnamed parameter for alt text as well, just for the time be-
5449  # ing, if the unnamed param is set and the alt param is not.
5450  #
5451  # For the future, we need to figure out if we want to tweak this more,
5452  # e.g., introducing a title= parameter for the title; ignoring the un-
5453  # named parameter entirely for images without a caption; adding an ex-
5454  # plicit caption= parameter and preserving the old magic unnamed para-
5455  # meter for BC; ...
5456  if ( $imageIsFramed ) { # Framed image
5457  if ( $caption === '' && !isset( $params['frame']['alt'] ) ) {
5458  # No caption or alt text, add the filename as the alt text so
5459  # that screen readers at least get some description of the image
5460  $params['frame']['alt'] = $title->getText();
5461  }
5462  # Do not set $params['frame']['title'] because tooltips don't make sense
5463  # for framed images
5464  } else { # Inline image
5465  if ( !isset( $params['frame']['alt'] ) ) {
5466  # No alt text, use the "caption" for the alt text
5467  if ( $caption !== '' ) {
5468  $params['frame']['alt'] = $this->stripAltText( $caption, $holders );
5469  } else {
5470  # No caption, fall back to using the filename for the
5471  # alt text
5472  $params['frame']['alt'] = $title->getText();
5473  }
5474  }
5475  # Use the "caption" for the tooltip text
5476  $params['frame']['title'] = $this->stripAltText( $caption, $holders );
5477  }
5478 
5479  wfRunHooks( 'ParserMakeImageParams', array( $title, $file, &$params, $this ) );
5480 
5481  # Linker does the rest
5482  $time = isset( $options['time'] ) ? $options['time'] : false;
5483  $ret = Linker::makeImageLink( $this, $title, $file, $params['frame'], $params['handler'],
5484  $time, $descQuery, $this->mOptions->getThumbSize() );
5485 
5486  # Give the handler a chance to modify the parser object
5487  if ( $handler ) {
5488  $handler->parserTransformHook( $this, $file );
5489  }
5490 
5491  return $ret;
5492  }
5493 
5499  protected function stripAltText( $caption, $holders ) {
5500  # Strip bad stuff out of the title (tooltip). We can't just use
5501  # replaceLinkHoldersText() here, because if this function is called
5502  # from replaceInternalLinks2(), mLinkHolders won't be up-to-date.
5503  if ( $holders ) {
5504  $tooltip = $holders->replaceText( $caption );
5505  } else {
5506  $tooltip = $this->replaceLinkHoldersText( $caption );
5507  }
5508 
5509  # make sure there are no placeholders in thumbnail attributes
5510  # that are later expanded to html- so expand them now and
5511  # remove the tags
5512  $tooltip = $this->mStripState->unstripBoth( $tooltip );
5513  $tooltip = Sanitizer::stripAllTags( $tooltip );
5514 
5515  return $tooltip;
5516  }
5517 
5522  function disableCache() {
5523  wfDebug( "Parser output marked as uncacheable.\n" );
5524  if ( !$this->mOutput ) {
5525  throw new MWException( __METHOD__ .
5526  " can only be called when actually parsing something" );
5527  }
5528  $this->mOutput->setCacheTime( -1 ); // old style, for compatibility
5529  $this->mOutput->updateCacheExpiry( 0 ); // new style, for consistency
5530  }
5531 
5540  function attributeStripCallback( &$text, $frame = false ) {
5541  $text = $this->replaceVariables( $text, $frame );
5542  $text = $this->mStripState->unstripBoth( $text );
5543  return $text;
5544  }
5545 
5551  function getTags() {
5552  return array_merge( array_keys( $this->mTransparentTagHooks ), array_keys( $this->mTagHooks ), array_keys( $this->mFunctionTagHooks ) );
5553  }
5554 
5565  function replaceTransparentTags( $text ) {
5566  $matches = array();
5567  $elements = array_keys( $this->mTransparentTagHooks );
5568  $text = self::extractTagsAndParams( $elements, $text, $matches, $this->mUniqPrefix );
5569  $replacements = array();
5570 
5571  foreach ( $matches as $marker => $data ) {
5572  list( $element, $content, $params, $tag ) = $data;
5573  $tagName = strtolower( $element );
5574  if ( isset( $this->mTransparentTagHooks[$tagName] ) ) {
5575  $output = call_user_func_array( $this->mTransparentTagHooks[$tagName], array( $content, $params, $this ) );
5576  } else {
5577  $output = $tag;
5578  }
5579  $replacements[$marker] = $output;
5580  }
5581  return strtr( $text, $replacements );
5582  }
5583 
5613  private function extractSections( $text, $section, $mode, $newText = '' ) {
5614  global $wgTitle; # not generally used but removes an ugly failure mode
5615  $this->startParse( $wgTitle, new ParserOptions, self::OT_PLAIN, true );
5616  $outText = '';
5617  $frame = $this->getPreprocessor()->newFrame();
5618 
5619  # Process section extraction flags
5620  $flags = 0;
5621  $sectionParts = explode( '-', $section );
5622  $sectionIndex = array_pop( $sectionParts );
5623  foreach ( $sectionParts as $part ) {
5624  if ( $part === 'T' ) {
5625  $flags |= self::PTD_FOR_INCLUSION;
5626  }
5627  }
5628 
5629  # Check for empty input
5630  if ( strval( $text ) === '' ) {
5631  # Only sections 0 and T-0 exist in an empty document
5632  if ( $sectionIndex == 0 ) {
5633  if ( $mode === 'get' ) {
5634  return '';
5635  } else {
5636  return $newText;
5637  }
5638  } else {
5639  if ( $mode === 'get' ) {
5640  return $newText;
5641  } else {
5642  return $text;
5643  }
5644  }
5645  }
5646 
5647  # Preprocess the text
5648  $root = $this->preprocessToDom( $text, $flags );
5649 
5650  # <h> nodes indicate section breaks
5651  # They can only occur at the top level, so we can find them by iterating the root's children
5652  $node = $root->getFirstChild();
5653 
5654  # Find the target section
5655  if ( $sectionIndex == 0 ) {
5656  # Section zero doesn't nest, level=big
5657  $targetLevel = 1000;
5658  } else {
5659  while ( $node ) {
5660  if ( $node->getName() === 'h' ) {
5661  $bits = $node->splitHeading();
5662  if ( $bits['i'] == $sectionIndex ) {
5663  $targetLevel = $bits['level'];
5664  break;
5665  }
5666  }
5667  if ( $mode === 'replace' ) {
5668  $outText .= $frame->expand( $node, PPFrame::RECOVER_ORIG );
5669  }
5670  $node = $node->getNextSibling();
5671  }
5672  }
5673 
5674  if ( !$node ) {
5675  # Not found
5676  if ( $mode === 'get' ) {
5677  return $newText;
5678  } else {
5679  return $text;
5680  }
5681  }
5682 
5683  # Find the end of the section, including nested sections
5684  do {
5685  if ( $node->getName() === 'h' ) {
5686  $bits = $node->splitHeading();
5687  $curLevel = $bits['level'];
5688  if ( $bits['i'] != $sectionIndex && $curLevel <= $targetLevel ) {
5689  break;
5690  }
5691  }
5692  if ( $mode === 'get' ) {
5693  $outText .= $frame->expand( $node, PPFrame::RECOVER_ORIG );
5694  }
5695  $node = $node->getNextSibling();
5696  } while ( $node );
5697 
5698  # Write out the remainder (in replace mode only)
5699  if ( $mode === 'replace' ) {
5700  # Output the replacement text
5701  # Add two newlines on -- trailing whitespace in $newText is conventionally
5702  # stripped by the editor, so we need both newlines to restore the paragraph gap
5703  # Only add trailing whitespace if there is newText
5704  if ( $newText != "" ) {
5705  $outText .= $newText . "\n\n";
5706  }
5707 
5708  while ( $node ) {
5709  $outText .= $frame->expand( $node, PPFrame::RECOVER_ORIG );
5710  $node = $node->getNextSibling();
5711  }
5712  }
5713 
5714  if ( is_string( $outText ) ) {
5715  # Re-insert stripped tags
5716  $outText = rtrim( $this->mStripState->unstripBoth( $outText ) );
5717  }
5718 
5719  return $outText;
5720  }
5721 
5734  public function getSection( $text, $section, $deftext = '' ) {
5735  return $this->extractSections( $text, $section, "get", $deftext );
5736  }
5737 
5748  public function replaceSection( $oldtext, $section, $text ) {
5749  return $this->extractSections( $oldtext, $section, "replace", $text );
5750  }
5751 
5757  function getRevisionId() {
5758  return $this->mRevisionId;
5759  }
5760 
5767  public function getRevisionObject() {
5768  if ( !is_null( $this->mRevisionObject ) ) {
5769  return $this->mRevisionObject;
5770  }
5771  if ( is_null( $this->mRevisionId ) ) {
5772  return null;
5773  }
5774 
5775  $this->mRevisionObject = Revision::newFromId( $this->mRevisionId );
5776  return $this->mRevisionObject;
5777  }
5778 
5783  function getRevisionTimestamp() {
5784  if ( is_null( $this->mRevisionTimestamp ) ) {
5785  wfProfileIn( __METHOD__ );
5786 
5788 
5789  $revObject = $this->getRevisionObject();
5790  $timestamp = $revObject ? $revObject->getTimestamp() : wfTimestampNow();
5791 
5792  # The cryptic '' timezone parameter tells to use the site-default
5793  # timezone offset instead of the user settings.
5794  #
5795  # Since this value will be saved into the parser cache, served
5796  # to other users, and potentially even used inside links and such,
5797  # it needs to be consistent for all visitors.
5798  $this->mRevisionTimestamp = $wgContLang->userAdjust( $timestamp, '' );
5799 
5800  wfProfileOut( __METHOD__ );
5801  }
5802  return $this->mRevisionTimestamp;
5803  }
5804 
5810  function getRevisionUser() {
5811  if ( is_null( $this->mRevisionUser ) ) {
5812  $revObject = $this->getRevisionObject();
5813 
5814  # if this template is subst: the revision id will be blank,
5815  # so just use the current user's name
5816  if ( $revObject ) {
5817  $this->mRevisionUser = $revObject->getUserText();
5818  } elseif ( $this->ot['wiki'] || $this->mOptions->getIsPreview() ) {
5819  $this->mRevisionUser = $this->getUser()->getName();
5820  }
5821  }
5822  return $this->mRevisionUser;
5823  }
5824 
5830  function getRevisionSize() {
5831  if ( is_null( $this->mRevisionSize ) ) {
5832  $revObject = $this->getRevisionObject();
5833 
5834  # if this variable is subst: the revision id will be blank,
5835  # so just use the parser input size, because the own substituation
5836  # will change the size.
5837  if ( $revObject ) {
5838  $this->mRevisionSize = $revObject->getSize();
5839  } elseif ( $this->ot['wiki'] || $this->mOptions->getIsPreview() ) {
5840  $this->mRevisionSize = $this->mInputSize;
5841  }
5842  }
5843  return $this->mRevisionSize;
5844  }
5845 
5851  public function setDefaultSort( $sort ) {
5852  $this->mDefaultSort = $sort;
5853  $this->mOutput->setProperty( 'defaultsort', $sort );
5854  }
5855 
5866  public function getDefaultSort() {
5867  if ( $this->mDefaultSort !== false ) {
5868  return $this->mDefaultSort;
5869  } else {
5870  return '';
5871  }
5872  }
5873 
5880  public function getCustomDefaultSort() {
5881  return $this->mDefaultSort;
5882  }
5883 
5893  public function guessSectionNameFromWikiText( $text ) {
5894  # Strip out wikitext links(they break the anchor)
5895  $text = $this->stripSectionName( $text );
5897  return '#' . Sanitizer::escapeId( $text, 'noninitial' );
5898  }
5899 
5908  public function guessLegacySectionNameFromWikiText( $text ) {
5909  # Strip out wikitext links(they break the anchor)
5910  $text = $this->stripSectionName( $text );
5912  return '#' . Sanitizer::escapeId( $text, array( 'noninitial', 'legacy' ) );
5913  }
5914 
5929  public function stripSectionName( $text ) {
5930  # Strip internal link markup
5931  $text = preg_replace( '/\[\[:?([^[|]+)\|([^[]+)\]\]/', '$2', $text );
5932  $text = preg_replace( '/\[\[:?([^[]+)\|?\]\]/', '$1', $text );
5933 
5934  # Strip external link markup
5935  # @todo FIXME: Not tolerant to blank link text
5936  # I.E. [https://www.mediawiki.org] will render as [1] or something depending
5937  # on how many empty links there are on the page - need to figure that out.
5938  $text = preg_replace( '/\[(?i:' . $this->mUrlProtocols . ')([^ ]+?) ([^[]+)\]/', '$2', $text );
5939 
5940  # Parse wikitext quotes (italics & bold)
5941  $text = $this->doQuotes( $text );
5942 
5943  # Strip HTML tags
5944  $text = StringUtils::delimiterReplace( '<', '>', '', $text );
5945  return $text;
5946  }
5947 
5958  function testSrvus( $text, Title $title, ParserOptions $options, $outputType = self::OT_HTML ) {
5959  $this->startParse( $title, $options, $outputType, true );
5960 
5961  $text = $this->replaceVariables( $text );
5962  $text = $this->mStripState->unstripBoth( $text );
5963  $text = Sanitizer::removeHTMLtags( $text );
5964  return $text;
5965  }
5966 
5973  function testPst( $text, Title $title, ParserOptions $options ) {
5974  return $this->preSaveTransform( $text, $title, $options->getUser(), $options );
5975  }
5976 
5983  function testPreprocess( $text, Title $title, ParserOptions $options ) {
5984  return $this->testSrvus( $text, $title, $options, self::OT_PREPROCESS );
5985  }
5986 
6003  function markerSkipCallback( $s, $callback ) {
6004  $i = 0;
6005  $out = '';
6006  while ( $i < strlen( $s ) ) {
6007  $markerStart = strpos( $s, $this->mUniqPrefix, $i );
6008  if ( $markerStart === false ) {
6009  $out .= call_user_func( $callback, substr( $s, $i ) );
6010  break;
6011  } else {
6012  $out .= call_user_func( $callback, substr( $s, $i, $markerStart - $i ) );
6013  $markerEnd = strpos( $s, self::MARKER_SUFFIX, $markerStart );
6014  if ( $markerEnd === false ) {
6015  $out .= substr( $s, $markerStart );
6016  break;
6017  } else {
6018  $markerEnd += strlen( self::MARKER_SUFFIX );
6019  $out .= substr( $s, $markerStart, $markerEnd - $markerStart );
6020  $i = $markerEnd;
6021  }
6022  }
6023  }
6024  return $out;
6025  }
6026 
6033  function killMarkers( $text ) {
6034  return $this->mStripState->killMarkers( $text );
6035  }
6036 
6053  function serializeHalfParsedText( $text ) {
6054  wfProfileIn( __METHOD__ );
6055  $data = array(
6056  'text' => $text,
6057  'version' => self::HALF_PARSED_VERSION,
6058  'stripState' => $this->mStripState->getSubState( $text ),
6059  'linkHolders' => $this->mLinkHolders->getSubArray( $text )
6060  );
6061  wfProfileOut( __METHOD__ );
6062  return $data;
6063  }
6064 
6080  function unserializeHalfParsedText( $data ) {
6081  if ( !isset( $data['version'] ) || $data['version'] != self::HALF_PARSED_VERSION ) {
6082  throw new MWException( __METHOD__ . ': invalid version' );
6083  }
6084 
6085  # First, extract the strip state.
6086  $texts = array( $data['text'] );
6087  $texts = $this->mStripState->merge( $data['stripState'], $texts );
6088 
6089  # Now renumber links
6090  $texts = $this->mLinkHolders->mergeForeign( $data['linkHolders'], $texts );
6091 
6092  # Should be good to go.
6093  return $texts[0];
6094  }
6095 
6105  function isValidHalfParsedText( $data ) {
6106  return isset( $data['version'] ) && $data['version'] == self::HALF_PARSED_VERSION;
6107  }
6108 
6117  public function parseWidthParam( $value ) {
6118  $parsedWidthParam = array();
6119  if ( $value === '' ) {
6120  return $parsedWidthParam;
6121  }
6122  $m = array();
6123  # (bug 13500) In both cases (width/height and width only),
6124  # permit trailing "px" for backward compatibility.
6125  if ( preg_match( '/^([0-9]*)x([0-9]*)\s*(?:px)?\s*$/', $value, $m ) ) {
6126  $width = intval( $m[1] );
6127  $height = intval( $m[2] );
6128  $parsedWidthParam['width'] = $width;
6129  $parsedWidthParam['height'] = $height;
6130  } elseif ( preg_match( '/^[0-9]*\s*(?:px)?\s*$/', $value ) ) {
6131  $width = intval( $value );
6132  $parsedWidthParam['width'] = $width;
6133  }
6134  return $parsedWidthParam;
6135  }
6136 }
OT_MSG
const OT_MSG
Definition: Defines.php:233
SiteStats\articles
static articles()
Definition: SiteStats.php:124
ParserOptions
Set options of the Parser.
Definition: ParserOptions.php:31
Title\makeTitle
static & makeTitle( $ns, $title, $fragment='', $interwiki='')
Create a new Title from a namespace index and a DB key.
Definition: Title.php:398
save
deferred txt A few of the database updates required by various functions here can be deferred until after the result page is displayed to the user For updating the view updating the linked to tables after a save
Definition: deferred.txt:4
MagicWordArray
Class for handling an array of magic words.
Definition: MagicWord.php:676
object
globals will be eliminated from MediaWiki replaced by an application object which would be passed to constructors Whether that would be an convenient solution remains to be but certainly PHP makes such object oriented programming models easier than they were in previous versions For the time being MediaWiki programmers will have to work in an environment with some global context At the time of globals were initialised on startup by MediaWiki of these were configuration which are documented in DefaultSettings php There is no comprehensive documentation for the remaining however some of the most important ones are listed below They are typically initialised either in index php or in Setup php For a description of the see design txt $wgTitle Title object created from the request URL $wgOut OutputPage object for HTTP response $wgUser User object for the user associated with the current request $wgLang Language object selected by user preferences $wgContLang Language object associated with the wiki being viewed $wgParser Parser object Parser extensions register their hooks here $wgRequest WebRequest object
Definition: globals.txt:25
$result
The index of the header message $result[1]=The index of the body text message $result[2 through n]=Parameters passed to body text message. Please note the header message cannot receive/use parameters. 'ImportHandleLogItemXMLTag':When parsing a XML tag in a log item. $reader:XMLReader object $logInfo:Array of information Return false to stop further processing of the tag 'ImportHandlePageXMLTag':When parsing a XML tag in a page. $reader:XMLReader object $pageInfo:Array of information Return false to stop further processing of the tag 'ImportHandleRevisionXMLTag':When parsing a XML tag in a page revision. $reader:XMLReader object $pageInfo:Array of page information $revisionInfo:Array of revision information Return false to stop further processing of the tag 'ImportHandleToplevelXMLTag':When parsing a top level XML tag. $reader:XMLReader object Return false to stop further processing of the tag 'ImportHandleUploadXMLTag':When parsing a XML tag in a file upload. $reader:XMLReader object $revisionInfo:Array of information Return false to stop further processing of the tag 'InfoAction':When building information to display on the action=info page. $context:IContextSource object & $pageInfo:Array of information 'InitializeArticleMaybeRedirect':MediaWiki check to see if title is a redirect. $title:Title object for the current page $request:WebRequest $ignoreRedirect:boolean to skip redirect check $target:Title/string of redirect target $article:Article object 'InterwikiLoadPrefix':When resolving if a given prefix is an interwiki or not. Return true without providing an interwiki to continue interwiki search. $prefix:interwiki prefix we are looking for. & $iwData:output array describing the interwiki with keys iw_url, iw_local, iw_trans and optionally iw_api and iw_wikiid. 'InternalParseBeforeSanitize':during Parser 's internalParse method just before the parser removes unwanted/dangerous HTML tags and after nowiki/noinclude/includeonly/onlyinclude and other processings. Ideal for syntax-extensions after template/parser function execution which respect nowiki and HTML-comments. & $parser:Parser object & $text:string containing partially parsed text & $stripState:Parser 's internal StripState object 'InternalParseBeforeLinks':during Parser 's internalParse method before links but after nowiki/noinclude/includeonly/onlyinclude and other processings. & $parser:Parser object & $text:string containing partially parsed text & $stripState:Parser 's internal StripState object 'InvalidateEmailComplete':Called after a user 's email has been invalidated successfully. $user:user(object) whose email is being invalidated 'IRCLineURL':When constructing the URL to use in an IRC notification. Callee may modify $url and $query, URL will be constructed as $url . $query & $url:URL to index.php & $query:Query string $rc:RecentChange object that triggered url generation 'IsFileCacheable':Override the result of Article::isFileCacheable()(if true) $article:article(object) being checked 'IsTrustedProxy':Override the result of wfIsTrustedProxy() $ip:IP being check $result:Change this value to override the result of wfIsTrustedProxy() 'IsUploadAllowedFromUrl':Override the result of UploadFromUrl::isAllowedUrl() $url:URL used to upload from & $allowed:Boolean indicating if uploading is allowed for given URL 'isValidEmailAddr':Override the result of User::isValidEmailAddr(), for instance to return false if the domain name doesn 't match your organization. $addr:The e-mail address entered by the user & $result:Set this and return false to override the internal checks 'isValidPassword':Override the result of User::isValidPassword() $password:The password entered by the user & $result:Set this and return false to override the internal checks $user:User the password is being validated for 'Language::getMessagesFileName':$code:The language code or the language we 're looking for a messages file for & $file:The messages file path, you can override this to change the location. 'LanguageGetNamespaces':Provide custom ordering for namespaces or remove namespaces. Do not use this hook to add namespaces. Use CanonicalNamespaces for that. & $namespaces:Array of namespaces indexed by their numbers 'LanguageGetMagic':DEPRECATED, use $magicWords in a file listed in $wgExtensionMessagesFiles instead. Use this to define synonyms of magic words depending of the language $magicExtensions:associative array of magic words synonyms $lang:language code(string) 'LanguageGetSpecialPageAliases':DEPRECATED, use $specialPageAliases in a file listed in $wgExtensionMessagesFiles instead. Use to define aliases of special pages names depending of the language $specialPageAliases:associative array of magic words synonyms $lang:language code(string) 'LanguageGetTranslatedLanguageNames':Provide translated language names. & $names:array of language code=> language name $code language of the preferred translations 'LanguageLinks':Manipulate a page 's language links. This is called in various places to allow extensions to define the effective language links for a page. $title:The page 's Title. & $links:Associative array mapping language codes to prefixed links of the form "language:title". & $linkFlags:Associative array mapping prefixed links to arrays of flags. Currently unused, but planned to provide support for marking individual language links in the UI, e.g. for featured articles. 'LinkBegin':Used when generating internal and interwiki links in Linker::link(), before processing starts. Return false to skip default processing and return $ret. See documentation for Linker::link() for details on the expected meanings of parameters. $skin:the Skin object $target:the Title that the link is pointing to & $html:the contents that the< a > tag should have(raw HTML) $result
Definition: hooks.txt:1528
FauxRequest
WebRequest clone which takes values from a provided array.
Definition: WebRequest.php:1275
$time
see documentation in includes Linker php for Linker::makeImageLink & $time
Definition: hooks.txt:1358
ID
occurs before session is loaded can be modified ID
Definition: hooks.txt:2818
Title\newFromText
static newFromText( $text, $defaultNamespace=NS_MAIN)
Create a new Title from text, such as what one would find in a link.
Definition: Title.php:189
PPFrame\STRIP_COMMENTS
const STRIP_COMMENTS
Definition: Preprocessor.php:75
DB_MASTER
const DB_MASTER
Definition: Defines.php:56
MWNamespace\isNonincludable
static isNonincludable( $index)
It is not possible to use pages from this namespace as template?
Definition: Namespace.php:417
of
globals txt Globals are evil The original MediaWiki code relied on globals for processing context far too often MediaWiki development since then has been a story of slowly moving context out of global variables and into objects Storing processing context in object member variables allows those objects to be reused in a much more flexible way Consider the elegance of
Definition: globals.txt:10
RepoGroup\singleton
static singleton()
Get a RepoGroup instance.
Definition: RepoGroup.php:53
ParserOutput
Definition: ParserOutput.php:24
php
skin txt MediaWiki includes four core it has been set as the default in MediaWiki since the replacing Monobook it had been been the default skin since before being replaced by Vector largely rewritten in while keeping its appearance Several legacy skins were removed in the as the burden of supporting them became too heavy to bear Those in etc for skin dependent CSS etc for skin dependent JavaScript These can also be customised on a per user by etc This feature has led to a wide variety of user styles becoming that gallery is a good place to ending in php
Definition: skin.txt:62
Revision\newFromId
static newFromId( $id, $flags=0)
Load a page revision from a given revision ID number.
Definition: Revision.php:88
SiteStats\users
static users()
Definition: SiteStats.php:140
$html
null means default in associative array with keys and values unescaped Should be merged with default with a value of false meaning to suppress the attribute in associative array with keys and values unescaped noclasses just before the function returns a value If you return an< a > element with HTML attributes $attribs and contents $html will be returned If you return $ret will be returned and may include noclasses & $html
Definition: hooks.txt:1530
SiteStats\activeUsers
static activeUsers()
Definition: SiteStats.php:148
Profiler\instance
static instance()
Singleton.
Definition: Profiler.php:127
is
We use the convention $dbr for read and $dbw for write to help you keep track of whether the database object is a the world will explode Or to be a subsequent write query which succeeded on the master may fail when replicated to the slave due to a unique key collision Replication on the slave will stop and it may take hours to repair the database and get it back online Setting read_only in my cnf on the slave will avoid this but given the dire we prefer to have as many checks as possible We provide a but the wrapper functions like please read the documentation for except in special pages derived from QueryPage It s a common pitfall for new developers to submit code containing SQL queries which examine huge numbers of rows Remember that COUNT * is(N), counting rows in atable is like counting beans in a bucket.------------------------------------------------------------------------ Replication------------------------------------------------------------------------The largest installation of MediaWiki, Wikimedia, uses a large set ofslave MySQL servers replicating writes made to a master MySQL server. Itis important to understand the issues associated with this setup if youwant to write code destined for Wikipedia.It 's often the case that the best algorithm to use for a given taskdepends on whether or not replication is in use. Due to our unabashedWikipedia-centrism, we often just use the replication-friendly version, but if you like, you can use wfGetLB() ->getServerCount() > 1 tocheck to see if replication is in use.===Lag===Lag primarily occurs when large write queries are sent to the master.Writes on the master are executed in parallel, but they are executed inserial when they are replicated to the slaves. The master writes thequery to the binlog when the transaction is committed. The slaves pollthe binlog and start executing the query as soon as it appears. They canservice reads while they are performing a write query, but will not readanything more from the binlog and thus will perform no more writes. Thismeans that if the write query runs for a long time, the slaves will lagbehind the master for the time it takes for the write query to complete.Lag can be exacerbated by high read load. MediaWiki 's load balancer willstop sending reads to a slave when it is lagged by more than 30 seconds.If the load ratios are set incorrectly, or if there is too much loadgenerally, this may lead to a slave permanently hovering around 30seconds lag.If all slaves are lagged by more than 30 seconds, MediaWiki will stopwriting to the database. All edits and other write operations will berefused, with an error returned to the user. This gives the slaves achance to catch up. Before we had this mechanism, the slaves wouldregularly lag by several minutes, making review of recent editsdifficult.In addition to this, MediaWiki attempts to ensure that the user seesevents occurring on the wiki in chronological order. A few seconds of lagcan be tolerated, as long as the user sees a consistent picture fromsubsequent requests. This is done by saving the master binlog positionin the session, and then at the start of each request, waiting for theslave to catch up to that position before doing any reads from it. Ifthis wait times out, reads are allowed anyway, but the request isconsidered to be in "lagged slave mode". Lagged slave mode can bechecked by calling wfGetLB() ->getLaggedSlaveMode(). The onlypractical consequence at present is a warning displayed in the pagefooter.===Lag avoidance===To avoid excessive lag, queries which write large numbers of rows shouldbe split up, generally to write one row at a time. Multi-row INSERT ...SELECT queries are the worst offenders should be avoided altogether.Instead do the select first and then the insert.===Working with lag===Despite our best efforts, it 's not practical to guarantee a low-lagenvironment. Lag will usually be less than one second, but mayoccasionally be up to 30 seconds. For scalability, it 's very importantto keep load on the master low, so simply sending all your queries tothe master is not the answer. So when you have a genuine need forup-to-date data, the following approach is advised:1) Do a quick query to the master for a sequence number or timestamp 2) Run the full query on the slave and check if it matches the data you gotfrom the master 3) If it doesn 't, run the full query on the masterTo avoid swamping the master every time the slaves lag, use of thisapproach should be kept to a minimum. In most cases you should just readfrom the slave and let the user deal with the delay.------------------------------------------------------------------------ Lock contention------------------------------------------------------------------------Due to the high write rate on Wikipedia(and some other wikis), MediaWiki developers need to be very careful to structure their writesto avoid long-lasting locks. By default, MediaWiki opens a transactionat the first query, and commits it before the output is sent. Locks willbe held from the time when the query is done until the commit. So youcan reduce lock time by doing as much processing as possible before youdo your write queries.Often this approach is not good enough, and it becomes necessary toenclose small groups of queries in their own transaction. Use thefollowing syntax:$dbw=wfGetDB(DB_MASTER
Linker\makeSelfLinkObj
static makeSelfLinkObj( $nt, $html='', $query='', $trail='', $prefix='')
Make appropriate markup for a link to the current article.
Definition: Linker.php:409
PPFrame\NO_ARGS
const NO_ARGS
Definition: Preprocessor.php:73
wfSetVar
wfSetVar(&$dest, $source, $force=false)
Sets dest to source and returns the original value of dest If source is NULL, it just returns the val...
Definition: GlobalFunctions.php:2139
Linker\tocIndent
static tocIndent()
Add another level to the Table of Contents.
Definition: Linker.php:1615
wfGetDB
& wfGetDB( $db, $groups=array(), $wiki=false)
Get a Database object.
Definition: GlobalFunctions.php:3659
text
design txt This is a brief overview of the new design More thorough and up to date information is available on the documentation wiki at etc Handles the details of getting and saving to the user table of the and dealing with sessions and cookies OutputPage Encapsulates the entire HTML page that will be sent in response to any server request It is used by calling its functions to add text
Definition: design.txt:12
$timestamp
if( $limit) $timestamp
Definition: importImages.php:104
wfTimestamp
wfTimestamp( $outputtype=TS_UNIX, $ts=0)
Get a timestamp string in one of various formats.
Definition: GlobalFunctions.php:2483
wiki
Prior to maintenance scripts were a hodgepodge of code that had no cohesion or formal method of action Beginning maintenance scripts have been cleaned up to use a unified class Directory structure How to run a script How to write your own DIRECTORY STRUCTURE The maintenance directory of a MediaWiki installation contains several all of which have unique purposes HOW TO RUN A SCRIPT Ridiculously just call php someScript php that s in the top level maintenance directory if not default wiki
Definition: maintenance.txt:1
SiteStats\pages
static pages()
Definition: SiteStats.php:132
wfDebugLog
wfDebugLog( $logGroup, $text, $dest='all')
Send a line to a supplementary debug log file, if configured, or main debug log if not.
Definition: GlobalFunctions.php:1040
wfProfileIn
wfProfileIn( $functionname)
Begin profiling of a function.
Definition: Profiler.php:33
$n
$n
Definition: RandomTest.php:76
$ret
null means default in associative array with keys and values unescaped Should be merged with default with a value of false meaning to suppress the attribute in associative array with keys and values unescaped noclasses & $ret
Definition: hooks.txt:1530
wfUrlencode
wfUrlencode( $s)
We want some things to be included as literal characters in our title URLs for prettiness,...
Definition: GlobalFunctions.php:330
SiteStats\numberingroup
static numberingroup( $group)
Find the number of users in a given user group.
Definition: SiteStats.php:166
normal
This document is intended to provide useful advice for parties seeking to redistribute MediaWiki to end users It s targeted particularly at maintainers for Linux since it s been observed that distribution packages of MediaWiki often break We ve consistently had to recommend that users seeking support use official tarballs instead of their distribution s and this often solves whatever problem the user is having It would be nice if this could such and we might be restricted by PHP settings such as safe mode or open_basedir We cannot assume that the software even has read access anywhere useful Many shared hosts run all users web applications under the same so they can t rely on Unix and must forbid reads to even standard directories like tmp lest users read each others files We cannot assume that the user has the ability to install or run any programs not written as web accessible PHP scripts Since anything that works on cheap shared hosting will work if you have shell or root access MediaWiki s design is based around catering to the lowest common denominator Although we support higher end setups as the way many things work by default is tailored toward shared hosting These defaults are unconventional from the point of view of normal(non-web) applications -- they might conflict with distributors' policies
SFH_OBJECT_ARGS
const SFH_OBJECT_ARGS
Definition: Defines.php:241
MagicWord\get
static & get( $id)
Factory: creates an object representing an ID.
Definition: MagicWord.php:238
$fname
if(!defined( 'MEDIAWIKI')) $fname
This file is not a valid entry point, perform no further processing unless MEDIAWIKI is defined.
Definition: Setup.php:35
Sanitizer\normalizeSectionNameWhitespace
static normalizeSectionNameWhitespace( $section)
Normalizes whitespace in a section name, such as might be returned by Parser::stripSectionName(),...
Definition: Sanitizer.php:1297
OT_PREPROCESS
const OT_PREPROCESS
Definition: Defines.php:232
NS_FILE
const NS_FILE
Definition: Defines.php:85
ImageGalleryBase\factory
static factory( $mode=false)
Get a new image gallery.
Definition: ImageGalleryBase.php:66
OT_PLAIN
const OT_PLAIN
Definition: Defines.php:234
$params
$params
Definition: styleTest.css.php:40
$limit
if( $sleep) $limit
Definition: importImages.php:99
NS_TEMPLATE
const NS_TEMPLATE
Definition: Defines.php:89
$s
$s
Definition: mergeMessageFileList.php:156
SpecialPage\getTitleFor
static getTitleFor( $name, $subpage=false, $fragment='')
Get a localised Title object for a specified special page name.
Definition: SpecialPage.php:74
$wgContLang
this class mediates it Skin Encapsulates a look and feel for the wiki All of the functions that render HTML and make choices about how to render it are here and are called from various other places when and is meant to be subclassed with other skins that may override some of its functions The User object contains a reference to a and so rather than having a global skin object we just rely on the global User and get the skin with $wgUser and also has some character encoding functions and other locale stuff The current user interface language is instantiated as and the content language as $wgContLang
Definition: design.txt:56
StripState
Definition: StripState.php:28
Makefile.open
open
Definition: Makefile.py:14
$flags
it s the revision text itself In either if gzip is the revision text is gzipped $flags
Definition: hooks.txt:2113
$link
set to $title object and return false for a match for latest after cache objects are set use the ContentHandler facility to handle CSS and JavaScript for highlighting & $link
Definition: hooks.txt:2149
cache
you have access to all of the normal MediaWiki so you can get a DB use the cache
Definition: maintenance.txt:52
Linker\tocLine
static tocLine( $anchor, $tocline, $tocnumber, $level, $sectionIndex=false)
parameter level defines if we are on an indentation level
Definition: Linker.php:1633
Sanitizer\stripAllTags
static stripAllTags( $text)
Take a fragment of (potentially invalid) HTML and return a version with any tags removed,...
Definition: Sanitizer.php:1735
FakeTitle
Fake title class that triggers an error if any members are called.
Definition: FakeTitle.php:26
Linker\linkKnown
static linkKnown( $target, $html=null, $customAttribs=array(), $query=array(), $options=array( 'known', 'noclasses'))
Identical to link(), except $options defaults to 'known'.
Definition: Linker.php:264
pre
</p > ! end ! test Empty pre
Definition: parserTests.txt:1579
Linker\makeExternalLink
static makeExternalLink( $url, $text, $escape=true, $linktype='', $attribs=array(), $title=null)
Make an external link.
Definition: Linker.php:1034
PPFrame\NO_TEMPLATES
const NO_TEMPLATES
Definition: Preprocessor.php:74
later
If you want to remove the page from your watchlist later
Definition: All_system_messages.txt:361
$dbr
$dbr
Definition: testCompression.php:48
SiteStats\images
static images()
Definition: SiteStats.php:156
Revision
Definition: Revision.php:26
key
design txt This is a brief overview of the new design More thorough and up to date information is available on the documentation wiki at etc Handles the details of getting and saving to the user table of the and dealing with sessions and cookies OutputPage Encapsulates the entire HTML page that will be sent in response to any server request It is used by calling its functions to add in any and then calling but I prefer the flexibility This should also do the output encoding The system allocates a global one in $wgOut Title Represents the title of an and does all the work of translating among various forms such as plain database key
Definition: design.txt:25
title
to move a page</td >< td > &*You are moving the page across *A non empty talk page already exists under the new or *You uncheck the box below In those you will have to move or merge the page manually if desired</td >< td > be sure to &You are responsible for making sure that links continue to point where they are supposed to go Note that the page will &a page at the new title
Definition: All_system_messages.txt:2703
StringUtils\explodeMarkup
static explodeMarkup( $separator, $text)
More or less "markup-safe" explode() Ignores any instances of the separator inside <....
Definition: StringUtils.php:270
NS_SPECIAL
const NS_SPECIAL
Definition: Defines.php:68
RequestContext\setTitle
setTitle( $t)
Set the Title object.
Definition: RequestContext.php:116
wfParseUrl
wfParseUrl( $url)
parse_url() work-alike, but non-broken.
Definition: GlobalFunctions.php:755
MagicWord\getVariableIDs
static getVariableIDs()
Get an array of parser variable IDs.
Definition: MagicWord.php:252
MWException
MediaWiki exception.
Definition: MWException.php:26
$out
$out
Definition: UtfNormalGenerate.php:167
MagicWord\getCacheTTL
static getCacheTTL( $id)
Allow external reads of TTL array.
Definition: MagicWord.php:275
table
deferred txt A few of the database updates required by various functions here can be deferred until after the result page is displayed to the user For updating the view updating the linked to tables after a etc PHP does not yet have any way to tell the server to actually return and disconnect while still running these but it might have such a feature in the future We handle these by creating a deferred update object and putting those objects on a global then executing the whole list after the page is displayed We don t do anything smart like collating updates to the same table or such because the list is almost always going to have just one item on if so it s not worth the trouble Since there is a job queue in the jobs table
Definition: deferred.txt:11
so
I won t presume to tell you how to I m just describing the methods I chose to use for myself If you do choose to follow these it will probably be easier for you to collaborate with others on the but if you want to contribute without by all means do so(and don 't be surprised if I reformat your code). - I have the code indented with tabs to save file size and so that users can set their tab stops to any depth they like. I use 4-space tab stops
Html\element
static element( $element, $attribs=array(), $contents='')
Identical to rawElement(), but HTML-escapes $contents (like Xml::element()).
Definition: Html.php:148
there
has been added to your &Future changes to this page and its associated Talk page will be listed there
Definition: All_system_messages.txt:357
wfUrlProtocolsWithoutProtRel
wfUrlProtocolsWithoutProtRel()
Like wfUrlProtocols(), but excludes '//' from the protocol list.
Definition: GlobalFunctions.php:740
Linker\tocList
static tocList( $toc, $lang=false)
Wraps the TOC in a table and provides the hide/collapse javascript.
Definition: Linker.php:1661
$parser
do that in ParserLimitReportFormat instead $parser
Definition: hooks.txt:1956
CoreTagHooks\register
static register( $parser)
Definition: CoreTagHooks.php:33
directly
design txt This is a brief overview of the new design More thorough and up to date information is available on the documentation wiki at etc Handles the details of getting and saving to the user table of the and dealing with sessions and cookies OutputPage Encapsulates the entire HTML page that will be sent in response to any server request It is used by calling its functions to add in any and then calling but I prefer the flexibility This should also do the output encoding The system allocates a global one in $wgOut Title Represents the title of an and does all the work of translating among various forms such as plain database etc For and for historical it also represents a few features of articles that don t involve their such as access rights See also title txt Article Encapsulates access to the page table of the database The object represents a an and maintains state such as etc Revision Encapsulates individual page revision data and access to the revision text blobs storage system Higher level code should never touch text storage directly
Definition: design.txt:34
StringUtils\explode
static explode( $separator, $subject)
Workalike for explode() with limited memory usage.
Definition: StringUtils.php:310
PPNode
There are three types of nodes:
Definition: Preprocessor.php:183
LinkHolderArray
Definition: LinkHolderArray.php:27
PPFrame\RECOVER_ORIG
const RECOVER_ORIG
Definition: Preprocessor.php:79
wfProfileOut
wfProfileOut( $functionname='missing')
Stop profiling of a function.
Definition: Profiler.php:46
wfMessage
null means default in associative array with keys and values unescaped Should be merged with default with a value of false meaning to suppress the attribute in associative array with keys and values unescaped noclasses just before the function returns a value If you return an< a > element with HTML attributes $attribs and contents $html will be returned If you return $ret will be returned and may include noclasses after processing after in associative array form externallinks including delete and has completed for all link tables default is conds Array Extra conditions for the No matching items in log is displayed if loglist is empty msgKey Array If you want a nice box with a set this to the key of the message First element is the message additional optional elements are parameters for the key that are processed with wfMessage() -> params() ->parseAsBlock() - offset Set to overwrite offset parameter in $wgRequest set to '' to unset offset - wrap String Wrap the message in html(usually something like "&lt
Linker\tocLineEnd
static tocLineEnd()
End a Table Of Contents line.
Definition: Linker.php:1650
wfRunHooks
wfRunHooks( $event, array $args=array(), $deprecatedVersion=null)
Call hook functions defined in $wgHooks.
Definition: GlobalFunctions.php:4010
MWNamespace\hasSubpages
static hasSubpages( $index)
Does the namespace allow subpages?
Definition: Namespace.php:325
wfCgiToArray
wfCgiToArray( $query)
This is the logical opposite of wfArrayToCgi(): it accepts a query string as its argument and returns...
Definition: GlobalFunctions.php:412
$lines
$lines
Definition: router.php:65
array
the array() calling protocol came about after MediaWiki 1.4rc1.
List of Api Query prop modules.
MWTimestamp\getInstance
static getInstance( $ts=false)
Get a timestamp instance in GMT.
Definition: MWTimestamp.php:387
add
An extension or a will often add custom code to the function with or without a global variable For someone wanting email notification when an article is shown may add
Definition: hooks.txt:51
OT_WIKI
const OT_WIKI
Definition: Defines.php:231
global
when a variable name is used in a it is silently declared as a new masking the global
Definition: design.txt:93
form
null means default in associative array form
Definition: hooks.txt:1530
wfTimestampNow
wfTimestampNow()
Convenience function; returns MediaWiki timestamp for the present time.
Definition: GlobalFunctions.php:2514
NS_CATEGORY
const NS_CATEGORY
Definition: Defines.php:93
RequestContext
Group all the pieces relevant to the context of a request into one instance.
Definition: RequestContext.php:30
list
deferred txt A few of the database updates required by various functions here can be deferred until after the result page is displayed to the user For updating the view updating the linked to tables after a etc PHP does not yet have any way to tell the server to actually return and disconnect while still running these but it might have such a feature in the future We handle these by creating a deferred update object and putting those objects on a global list
Definition: deferred.txt:11
false
processing should stop and the error should be shown to the user * false
Definition: hooks.txt:188
$sort
$sort
Definition: profileinfo.php:301
Linker\splitTrail
static splitTrail( $trail)
Split a link trail, return the "inside" portion and the remainder of the trail as a two-element array...
Definition: Linker.php:1731
Sanitizer\escapeId
static escapeId( $id, $options=array())
Given a value, escape it so that it can be used in an id attribute and return it.
Definition: Sanitizer.php:1099
$options
null means default in associative array with keys and values unescaped Should be merged with default with a value of false meaning to suppress the attribute in associative array with keys and values unescaped & $options
Definition: hooks.txt:1530
$section
$section
Definition: Utf8Test.php:88
wfUrlProtocols
wfUrlProtocols( $includeProtocolRelative=true)
Returns a regular expression of url protocols.
Definition: GlobalFunctions.php:695
$line
$line
Definition: cdb.php:57
TS_MW
const TS_MW
MediaWiki concatenated string timestamp (YYYYMMDDHHMMSS)
Definition: GlobalFunctions.php:2431
wfDebug
wfDebug( $text, $dest='all')
Sends a line to the debug log if enabled or, optionally, to a comment in output.
Definition: GlobalFunctions.php:933
Title\makeTitleSafe
static makeTitleSafe( $ns, $title, $fragment='', $interwiki='')
Create a new Title from a namespace index and a DB key.
Definition: Title.php:422
$title
presenting them properly to the user as errors is done by the caller $title
Definition: hooks.txt:1324
CoreParserFunctions\register
static register( $parser)
Definition: CoreParserFunctions.php:33
see
Some information about database access in MediaWiki By Tim January Database layout For information about the MediaWiki database such as a description of the tables and their please see
Definition: database.txt:2
gt
b→ & gt
Definition: parserTests.txt:893
$name
Allows to change the fields on the form that will be generated $name
Definition: hooks.txt:336
$matches
if(!defined( 'MEDIAWIKI')) if(!isset( $wgVersion)) $matches
Definition: NoLocalSettings.php:33
$size
$size
Definition: RandomTest.php:75
$value
$value
Definition: styleTest.css.php:45
NS_MEDIA
const NS_MEDIA
Definition: Defines.php:67
variable
controlled by $wgMainCacheType controlled by $wgParserCacheType controlled by $wgMessageCacheType If you set CACHE_NONE to one of the three control variable
Definition: memcached.txt:78
Sanitizer\validateTagAttributes
static validateTagAttributes( $attribs, $element)
Take an array of attribute names and values and normalize or discard illegal values for the given ele...
Definition: Sanitizer.php:691
PPFrame
Definition: Preprocessor.php:72
up
static configuration should be added through ResourceLoaderGetConfigVars instead can be used to get the real title after the basic globals have been set up
Definition: hooks.txt:1679
wfEscapeWikiText
wfEscapeWikiText( $text)
Escapes the given text so that it may be output using addWikiText() without any linking,...
Definition: GlobalFunctions.php:2077
Linker\makeHeadline
static makeHeadline( $level, $attribs, $anchor, $html, $link, $legacyAnchor=false)
Create a headline for content.
Definition: Linker.php:1715
SpecialPageFactory\capturePath
static capturePath(Title $title, IContextSource $context)
Just like executePath() but will override global variables and execute the page in "inclusion" mode.
Definition: SpecialPageFactory.php:524
Sanitizer\cleanUrl
static cleanUrl( $url)
Definition: Sanitizer.php:1768
$user
please add to it if you re going to add events to the MediaWiki code where normally authentication against an external auth plugin would be creating a account $user
Definition: hooks.txt:237
tags
pre inside other HTML tags(bug 54946) !! wikitext a< div >< pre > foo</pre ></div >< pre ></pre > !! html< p >a</p >< div >< pre > foo</pre ></div >< pre ></pre > !! end !! test HTML pre followed by indent-pre !! wikitext< pre >foo</pre > bar !! html< pre >foo</pre >< pre >bar</pre > !! end !!test Block tag pre !!options parsoid !! wikitext< p >< pre >foo</pre ></p > !! html< p data-parsoid
only
published in in Madrid In the first edition of the Vocabolario for was published In in Rotterdam was the Dictionnaire Universel ! html< p > The first monolingual dictionary written in a Romance language was< i > Sebastián Covarrubias</i >< i > Tesoro de la lengua castellana o published in in Madrid In the first edition of the< i > Vocabolario dell< a href="/index.php?title=Accademia_della_Crusca&amp;action=edit&amp;redlink=1" class="new" title="Accademia della Crusca (page does not exist)"> Accademia della Crusca</a ></i > for was published In in Rotterdam was the< i > Dictionnaire Universel</i ></p > ! end ! test Italics and ! wikitext foo ! html< p >< i > foo</i ></p > !end ! test Italics and ! wikitext foo ! html< p >< i > foo</i ></p > !end ! test Italics and ! wikitext foo ! html< p >< i > foo</i ></p > !end ! test Italics and ! wikitext foo ! html php< p >< i > foo</i ></p > ! html parsoid< p >< i > foo</i >< b ></b ></p > !end ! test Italics and ! wikitext foo ! html< p >< i > foo</i ></p > !end ! test Italics and ! wikitext foo ! html< p >< b > foo</b ></p > !end ! test Italics and ! wikitext foo ! html< p >< b > foo</b ></p > !end ! test Italics and ! wikitext foo ! html php< p >< b > foo</b ></p > ! html parsoid< p >< b > foo</b >< i ></i ></p > !end ! test Italics and ! wikitext foo ! html< p >< i > foo</i ></p > !end ! test Italics and ! wikitext foo ! html< p >< b > foo</b ></p > !end ! test Italics and ! wikitext foo ! html< p >< b > foo</b ></p > !end ! test Italics and ! wikitext foo ! html php< p >< b > foo</b ></p > ! html parsoid< p >< b > foo</b >< i ></i ></p > !end ! test Italics and ! options ! wikitext foo ! html< p >< b >< i > foo</i ></b ></p > !end ! test Italics and ! wikitext foo ! html< p >< i >< b > foo</b ></i ></p > !end ! test Italics and ! wikitext foo ! html< p >< i >< b > foo</b ></i ></p > !end ! test Italics and ! wikitext foo ! html< p >< i >< b > foo</b ></i ></p > !end ! test Italics and ! wikitext foo bar ! html< p >< i > foo< b > bar</b ></i ></p > !end ! test Italics and ! wikitext foo bar ! html< p >< i > foo< b > bar</b ></i ></p > !end ! test Italics and ! wikitext foo bar ! html< p >< i > foo< b > bar</b ></i ></p > !end ! test Italics and ! wikitext foo bar ! html php< p >< b > foo</b > bar</p > ! html parsoid< p >< b > foo</b > bar< i ></i ></p > !end ! test Italics and ! wikitext foo bar ! html php< p >< b > foo</b > bar</p > ! html parsoid< p >< b > foo</b > bar< b ></b ></p > !end ! test Italics and ! wikitext this is about foo s family ! html< p >< i > this is about< b > foo s family</b ></i ></p > !end ! test Italics and ! wikitext this is about foo s family ! html< p >< i > this is about< b > foo s</b > family</i ></p > !end ! test Italics and ! wikitext this is about foo s family ! html< p >< b > this is about< i > foo</i ></b >< i > s family</i ></p > !end ! test Italics and ! options ! wikitext this is about foo s family ! html< p >< i > this is about</i > foo< b > s family</b ></p > !end ! test Italics and ! wikitext this is about foo s family ! html< p >< b > this is about< i > foo s</i > family</b ></p > !end ! test Italicized possessive ! wikitext The s talk page ! html< p > The< i >< a href="/wiki/Main_Page" title="Main Page"> Main Page</a ></i > s talk page</p > ! end ! test Parsoid only
Definition: parserTests.txt:396
SFH_NO_HASH
const SFH_NO_HASH
Definition: Defines.php:240
$file
if(PHP_SAPI !='cli') $file
Definition: UtfNormalTest2.php:30
$wgArticlePath
$wgArticlePath
Definition: img_auth.php:48
$rev
presenting them properly to the user as errors is done by the caller return true use this to change the list i e etc $rev
Definition: hooks.txt:1337
it
=Architecture==Two class hierarchies are used to provide the functionality associated with the different content models:*Content interface(and AbstractContent base class) define functionality that acts on the concrete content of a page, and *ContentHandler base class provides functionality specific to a content model, but not acting on concrete content. The most important function of ContentHandler is to act as a factory for the appropriate implementation of Content. These Content objects are to be used by MediaWiki everywhere, instead of passing page content around as text. All manipulation and analysis of page content must be done via the appropriate methods of the Content object. For each content model, a subclass of ContentHandler has to be registered with $wgContentHandlers. The ContentHandler object for a given content model can be obtained using ContentHandler::getForModelID($id). Also Title, WikiPage and Revision now have getContentHandler() methods for convenience. ContentHandler objects are singletons that provide functionality specific to the content type, but not directly acting on the content of some page. ContentHandler::makeEmptyContent() and ContentHandler::unserializeContent() can be used to create a Content object of the appropriate type. However, it is recommended to instead use WikiPage::getContent() resp. Revision::getContent() to get a page 's content as a Content object. These two methods should be the ONLY way in which page content is accessed. Another important function of ContentHandler objects is to define custom action handlers for a content model, see ContentHandler::getActionOverrides(). This is similar to what WikiPage::getActionOverrides() was already doing.==Serialization==With the ContentHandler facility, page content no longer has to be text based. Objects implementing the Content interface are used to represent and handle the content internally. For storage and data exchange, each content model supports at least one serialization format via ContentHandler::serializeContent($content). The list of supported formats for a given content model can be accessed using ContentHandler::getSupportedFormats(). Content serialization formats are identified using MIME type like strings. The following formats are built in:*text/x-wiki - wikitext *text/javascript - for js pages *text/css - for css pages *text/plain - for future use, e.g. with plain text messages. *text/html - for future use, e.g. with plain html messages. *application/vnd.php.serialized - for future use with the api and for extensions *application/json - for future use with the api, and for use by extensions *application/xml - for future use with the api, and for use by extensions In PHP, use the corresponding CONTENT_FORMAT_XXX constant. Note that when using the API to access page content, especially action=edit, action=parse and action=query &prop=revisions, the model and format of the content should always be handled explicitly. Without that information, interpretation of the provided content is not reliable. The same applies to XML dumps generated via maintenance/dumpBackup.php or Special:Export. Also note that the API will provide encapsulated, serialized content - so if the API was called with format=json, and contentformat is also json(or rather, application/json), the page content is represented as a string containing an escaped json structure. Extensions that use JSON to serialize some types of page content may provide specialized API modules that allow access to that content in a more natural form.==Compatibility==The ContentHandler facility is introduced in a way that should allow all existing code to keep functioning at least for pages that contain wikitext or other text based content. However, a number of functions and hooks have been deprecated in favor of new versions that are aware of the page 's content model, and will now generate warnings when used. Most importantly, the following functions have been deprecated:*Revisions::getText() and Revisions::getRawText() is deprecated in favor Revisions::getContent() *WikiPage::getText() is deprecated in favor WikiPage::getContent() Also, the old Article::getContent()(which returns text) is superceded by Article::getContentObject(). However, both methods should be avoided since they do not provide clean access to the page 's actual content. For instance, they may return a system message for non-existing pages. Use WikiPage::getContent() instead. Code that relies on a textual representation of the page content should eventually be rewritten. However, ContentHandler::getContentText() provides a stop-gap that can be used to get text for a page. Its behavior is controlled by $wgContentHandlerTextFallback it
Definition: contenthandler.txt:107
broken
null means default in associative array with keys and values unescaped Should be merged with default with a value of false meaning to suppress the attribute in associative array with keys and values unescaped broken
Definition: hooks.txt:1530
$args
if( $line===false) $args
Definition: cdb.php:62
OT_HTML
const OT_HTML
Definition: Defines.php:230
DB_SLAVE
const DB_SLAVE
Definition: Defines.php:55
Title
Represents a title within MediaWiki.
Definition: Title.php:35
CoreParserFunctions\cascadingsources
static cascadingsources( $parser, $title='')
Returns the sources of any cascading protection acting on a specified page.
Definition: CoreParserFunctions.php:1170
like
For a write use something like
Definition: database.txt:26
type
This document describes the state of Postgres support in and is fairly well maintained The main code is very well while extensions are very hit and miss it is probably the most supported database after MySQL Much of the work in making MediaWiki database agnostic came about through the work of creating Postgres as and are nearing end of but without copying over all the usage comments General notes on the but these can almost always be programmed around *Although Postgres has a true BOOLEAN type
Definition: postgres.txt:22
MagicWord\getSubstIDs
static getSubstIDs()
Get an array of parser substitution modifier IDs.
Definition: MagicWord.php:265
wfMatchesDomainList
wfMatchesDomainList( $url, $domains)
Check whether a given URL has a domain that occurs in a given set of domains.
Definition: GlobalFunctions.php:902
Sanitizer\fixTagAttributes
static fixTagAttributes( $text, $element)
Take a tag soup fragment listing an HTML element's attributes and normalize it to well-formed XML,...
Definition: Sanitizer.php:1004
output
design txt This is a brief overview of the new design More thorough and up to date information is available on the documentation wiki at etc Handles the details of getting and saving to the user table of the and dealing with sessions and cookies OutputPage Encapsulates the entire HTML page that will be sent in response to any server request It is used by calling its functions to add in any and then calling output() to send it all. It could be easily changed to send incrementally if that becomes useful
Xml\isWellFormedXmlFragment
static isWellFormedXmlFragment( $text)
Check if a string is a well-formed XML fragment.
Definition: Xml.php:716
$term
the value to return A Title object or null whereas SearchGetNearMatch runs after $term
Definition: hooks.txt:2125
in
Prior to maintenance scripts were a hodgepodge of code that had no cohesion or formal method of action Beginning in
Definition: maintenance.txt:1
on
We ve cleaned up the code here by removing clumps of infrequently used code and moving them off somewhere else It s much easier for someone working with this code to see what s _really_ going on
Definition: hooks.txt:86
things
magicword txt Magic Words are some phrases used in the wikitext They are used for two things
Definition: magicword.txt:4
TS_UNIX
const TS_UNIX
Unix time - the number of seconds since 1970-01-01 00:00:00 UTC.
Definition: GlobalFunctions.php:2426
used
you don t have to do a grep find to see where the $wgReverseTitle variable is used
Definition: hooks.txt:117
$output
& $output
Definition: hooks.txt:375
format
if the prop value should be in the metadata multi language array format
Definition: hooks.txt:1230
as
This document is intended to provide useful advice for parties seeking to redistribute MediaWiki to end users It s targeted particularly at maintainers for Linux since it s been observed that distribution packages of MediaWiki often break We ve consistently had to recommend that users seeking support use official tarballs instead of their distribution s and this often solves whatever problem the user is having It would be nice if this could such as
Definition: distributors.txt:9
Linker\tocUnindent
static tocUnindent( $level)
Finish one or more sublevels on the Table of Contents.
Definition: Linker.php:1624
wfFindFile
wfFindFile( $title, $options=array())
Find a file.
Definition: GlobalFunctions.php:3702
Linker\makeMediaLinkFile
static makeMediaLinkFile(Title $title, $file, $html='')
Create a direct link to a given uploaded file.
Definition: Linker.php:993
StringUtils\delimiterReplace
static delimiterReplace( $startDelim, $endDelim, $replace, $subject, $flags='')
Perform an operation equivalent to.
Definition: StringUtils.php:256
Linker\makeImageLink
static makeImageLink($parser, Title $title, $file, $frameParams=array(), $handlerParams=array(), $time=false, $query="", $widthOption=null)
Given parameters derived from [[Image:Foo|options...]], generate the HTML that that syntax inserts in...
Definition: Linker.php:539
ParserLimitReportPrepare
namespace are movable Hooks may change this value to override the return value of MWNamespace::isMovable(). 'NewRevisionFromEditComplete' if it s text intended for display in a monospaced font $report should be output in English ParserLimitReportPrepare
Definition: hooks.txt:1746
MagicWord\getDoubleUnderscoreArray
static getDoubleUnderscoreArray()
Get a MagicWordArray of double-underscore entities.
Definition: MagicWord.php:288
Sanitizer\normalizeCharReferences
static normalizeCharReferences( $text)
Ensure that any entities and character references are legal for XML and XHTML specifically.
Definition: Sanitizer.php:1316
Linker\normalizeSubpageLink
static normalizeSubpageLink( $contextTitle, $target, &$text)
Definition: Linker.php:1474
SiteStats\views
static views()
Definition: SiteStats.php:108
Sanitizer\decodeTagAttributes
static decodeTagAttributes( $text)
Return an associative array of attribute names and values from a partial tag string.
Definition: Sanitizer.php:1183
from
Please log in again after you receive it</td >< td > s a saved copy from
Definition: All_system_messages.txt:3297
NS_MEDIAWIKI
const NS_MEDIAWIKI
Definition: Defines.php:87
that
deferred txt A few of the database updates required by various functions here can be deferred until after the result page is displayed to the user For updating the view updating the linked to tables after a etc PHP does not yet have any way to tell the server to actually return and disconnect while still running these but it might have such a feature in the future We handle these by creating a deferred update object and putting those objects on a global then executing the whole list after the page is displayed We don t do anything smart like collating updates to the same table or such because the list is almost always going to have just one item on if that
Definition: deferred.txt:11
$t
$t
Definition: testCompression.php:65
Title\legalChars
static legalChars()
Get a regex character class describing the legal characters in a link.
Definition: Title.php:529
Sanitizer\decodeCharReferences
static decodeCharReferences( $text)
Decode any character references, numeric or named entities, in the text and return a UTF-8 string.
Definition: Sanitizer.php:1413
$error
usually copyright or history_copyright This message must be in HTML not wikitext $subpages will be ignored and the rest of subPageSubtitle() will run. 'SkinTemplateBuildNavUrlsNav_urlsAfterPermalink' whether MediaWiki currently thinks this is a CSS JS page Hooks may change this value to override the return value of Title::isCssOrJsPage(). 'TitleIsAlwaysKnown' whether MediaWiki currently thinks this page is known isMovable() always returns false. $title whether MediaWiki currently thinks this page is movable Hooks may change this value to override the return value of Title::isMovable(). 'TitleIsWikitextPage' whether MediaWiki currently thinks this is a wikitext page Hooks may change this value to override the return value of Title::isWikitextPage() 'TitleMove' use UploadVerification and UploadVerifyFile instead where the first element is the message key and the remaining elements are used as parameters to the message based on mime etc Preferred in most cases over UploadVerification object with all info about the upload string as detected by MediaWiki Handlers will typically only apply for specific mime types object & $error
Definition: hooks.txt:2573
$e
if( $useReadline) $e
Definition: eval.php:66
$query
return true to allow those checks to and false if checking is done use this to change the tables headers temp or archived zone change it to an object instance and return false override the list derivative used the name of the old file when set the default code will be skipped add a value to it if you want to add a cookie that have to vary cache options can modify $query
Definition: hooks.txt:1105
$attribs
null means default in associative array with keys and values unescaped Should be merged with default with a value of false meaning to suppress the attribute in associative array with keys and values unescaped noclasses just before the function returns a value If you return an< a > element with HTML attributes $attribs and contents $html will be returned If you return $ret will be returned and may include noclasses after processing & $attribs
Definition: hooks.txt:1530
User
The User object encapsulates all of the user-specific settings (user_id, name, rights,...
Definition: User.php:59
$res
$res
Definition: database.txt:21
LinkCache\singleton
static & singleton()
Get an instance of this class.
Definition: LinkCache.php:49
MWTimestamp\getLocalInstance
static getLocalInstance( $ts=false)
Get a timestamp instance in the server local timezone ($wgLocaltimezone)
Definition: MWTimestamp.php:373
wfGetCaller
wfGetCaller( $level=2)
Get the name of the function which called this function wfGetCaller( 1 ) is the function with the wfG...
Definition: GlobalFunctions.php:1941
MWHttpRequest\factory
static factory( $url, $options=null)
Generate a new request object.
Definition: HttpFunctions.php:284
Linker\makeExternalImage
static makeExternalImage( $url, $alt='')
Return the code for images which were added via external links, via Parser::maybeMakeExternalImage().
Definition: Linker.php:487
lt
div & lt
Definition: hooks.txt:1631
SiteStats\edits
static edits()
Definition: SiteStats.php:116
if
if(!function_exists('version_compare')||version_compare(phpversion(), '5.3.2')< 0)
Definition: api.php:37
line
I won t presume to tell you how to I m just describing the methods I chose to use for myself If you do choose to follow these it will probably be easier for you to collaborate with others on the but if you want to contribute without by all means do which work well I also use K &R brace matching style I know that s a religious issue for so if you want to use a style that puts opening braces on the next line
Definition: design.txt:79
SpecialVersion\getVersion
static getVersion( $flags='')
Return a string of the MediaWiki version with SVN revision if available.
Definition: SpecialVersion.php:246
$wgTitle
if(! $wgRequest->checkUrlExtension()) if(! $wgEnableAPI) $wgTitle
Definition: api.php:63
page
do that in ParserLimitReportFormat instead use this to modify the parameters of the image and a DIV can begin in one section and end in another Make sure your code can handle that case gracefully See the EditSectionClearerLink extension for an example zero but section is usually empty its values are the globals values my talk page
Definition: hooks.txt:1956
wfRandomString
wfRandomString( $length=32)
Get a random string containing a number of pseudo-random hex characters.
Definition: GlobalFunctions.php:300
Sanitizer\removeHTMLtags
static removeHTMLtags( $text, $processCallback=null, $args=array(), $extratags=array(), $removetags=array())
Cleans up HTML, removes dangerous tags and attributes, and removes HTML comments.
Definition: Sanitizer.php:366
$type
$type
Definition: testCompression.php:46
MWTidy\tidy
static tidy( $text)
Interface with html tidy, used if $wgUseTidy = true.
Definition: Tidy.php:126