MediaWiki  master
Sanitizer.php
Go to the documentation of this file.
1 <?php
31 class Sanitizer {
37  '/&([A-Za-z0-9\x80-\xff]+);
38  |&\#([0-9]+);
39  |&\#[xX]([0-9A-Fa-f]+);
40  |(&)/x';
41 
46  const ELEMENT_BITS_REGEX = '!^(/?)([A-Za-z][^\t\n\v />\0]*+)([^>]*?)(/?>)([^<]*)$!';
47 
56  const EVIL_URI_PATTERN = '!(^|\s|\*/\s*)(javascript|vbscript)([^\w]|$)!i';
57  const XMLNS_ATTRIBUTE_PATTERN = "/^xmlns:[:A-Z_a-z-.0-9]+$/";
58 
64  private static $htmlEntities = array(
65  'Aacute' => 193,
66  'aacute' => 225,
67  'Acirc' => 194,
68  'acirc' => 226,
69  'acute' => 180,
70  'AElig' => 198,
71  'aelig' => 230,
72  'Agrave' => 192,
73  'agrave' => 224,
74  'alefsym' => 8501,
75  'Alpha' => 913,
76  'alpha' => 945,
77  'amp' => 38,
78  'and' => 8743,
79  'ang' => 8736,
80  'apos' => 39, // New in XHTML & HTML 5; avoid in output for compatibility with IE.
81  'Aring' => 197,
82  'aring' => 229,
83  'asymp' => 8776,
84  'Atilde' => 195,
85  'atilde' => 227,
86  'Auml' => 196,
87  'auml' => 228,
88  'bdquo' => 8222,
89  'Beta' => 914,
90  'beta' => 946,
91  'brvbar' => 166,
92  'bull' => 8226,
93  'cap' => 8745,
94  'Ccedil' => 199,
95  'ccedil' => 231,
96  'cedil' => 184,
97  'cent' => 162,
98  'Chi' => 935,
99  'chi' => 967,
100  'circ' => 710,
101  'clubs' => 9827,
102  'cong' => 8773,
103  'copy' => 169,
104  'crarr' => 8629,
105  'cup' => 8746,
106  'curren' => 164,
107  'dagger' => 8224,
108  'Dagger' => 8225,
109  'darr' => 8595,
110  'dArr' => 8659,
111  'deg' => 176,
112  'Delta' => 916,
113  'delta' => 948,
114  'diams' => 9830,
115  'divide' => 247,
116  'Eacute' => 201,
117  'eacute' => 233,
118  'Ecirc' => 202,
119  'ecirc' => 234,
120  'Egrave' => 200,
121  'egrave' => 232,
122  'empty' => 8709,
123  'emsp' => 8195,
124  'ensp' => 8194,
125  'Epsilon' => 917,
126  'epsilon' => 949,
127  'equiv' => 8801,
128  'Eta' => 919,
129  'eta' => 951,
130  'ETH' => 208,
131  'eth' => 240,
132  'Euml' => 203,
133  'euml' => 235,
134  'euro' => 8364,
135  'exist' => 8707,
136  'fnof' => 402,
137  'forall' => 8704,
138  'frac12' => 189,
139  'frac14' => 188,
140  'frac34' => 190,
141  'frasl' => 8260,
142  'Gamma' => 915,
143  'gamma' => 947,
144  'ge' => 8805,
145  'gt' => 62,
146  'harr' => 8596,
147  'hArr' => 8660,
148  'hearts' => 9829,
149  'hellip' => 8230,
150  'Iacute' => 205,
151  'iacute' => 237,
152  'Icirc' => 206,
153  'icirc' => 238,
154  'iexcl' => 161,
155  'Igrave' => 204,
156  'igrave' => 236,
157  'image' => 8465,
158  'infin' => 8734,
159  'int' => 8747,
160  'Iota' => 921,
161  'iota' => 953,
162  'iquest' => 191,
163  'isin' => 8712,
164  'Iuml' => 207,
165  'iuml' => 239,
166  'Kappa' => 922,
167  'kappa' => 954,
168  'Lambda' => 923,
169  'lambda' => 955,
170  'lang' => 9001,
171  'laquo' => 171,
172  'larr' => 8592,
173  'lArr' => 8656,
174  'lceil' => 8968,
175  'ldquo' => 8220,
176  'le' => 8804,
177  'lfloor' => 8970,
178  'lowast' => 8727,
179  'loz' => 9674,
180  'lrm' => 8206,
181  'lsaquo' => 8249,
182  'lsquo' => 8216,
183  'lt' => 60,
184  'macr' => 175,
185  'mdash' => 8212,
186  'micro' => 181,
187  'middot' => 183,
188  'minus' => 8722,
189  'Mu' => 924,
190  'mu' => 956,
191  'nabla' => 8711,
192  'nbsp' => 160,
193  'ndash' => 8211,
194  'ne' => 8800,
195  'ni' => 8715,
196  'not' => 172,
197  'notin' => 8713,
198  'nsub' => 8836,
199  'Ntilde' => 209,
200  'ntilde' => 241,
201  'Nu' => 925,
202  'nu' => 957,
203  'Oacute' => 211,
204  'oacute' => 243,
205  'Ocirc' => 212,
206  'ocirc' => 244,
207  'OElig' => 338,
208  'oelig' => 339,
209  'Ograve' => 210,
210  'ograve' => 242,
211  'oline' => 8254,
212  'Omega' => 937,
213  'omega' => 969,
214  'Omicron' => 927,
215  'omicron' => 959,
216  'oplus' => 8853,
217  'or' => 8744,
218  'ordf' => 170,
219  'ordm' => 186,
220  'Oslash' => 216,
221  'oslash' => 248,
222  'Otilde' => 213,
223  'otilde' => 245,
224  'otimes' => 8855,
225  'Ouml' => 214,
226  'ouml' => 246,
227  'para' => 182,
228  'part' => 8706,
229  'permil' => 8240,
230  'perp' => 8869,
231  'Phi' => 934,
232  'phi' => 966,
233  'Pi' => 928,
234  'pi' => 960,
235  'piv' => 982,
236  'plusmn' => 177,
237  'pound' => 163,
238  'prime' => 8242,
239  'Prime' => 8243,
240  'prod' => 8719,
241  'prop' => 8733,
242  'Psi' => 936,
243  'psi' => 968,
244  'quot' => 34,
245  'radic' => 8730,
246  'rang' => 9002,
247  'raquo' => 187,
248  'rarr' => 8594,
249  'rArr' => 8658,
250  'rceil' => 8969,
251  'rdquo' => 8221,
252  'real' => 8476,
253  'reg' => 174,
254  'rfloor' => 8971,
255  'Rho' => 929,
256  'rho' => 961,
257  'rlm' => 8207,
258  'rsaquo' => 8250,
259  'rsquo' => 8217,
260  'sbquo' => 8218,
261  'Scaron' => 352,
262  'scaron' => 353,
263  'sdot' => 8901,
264  'sect' => 167,
265  'shy' => 173,
266  'Sigma' => 931,
267  'sigma' => 963,
268  'sigmaf' => 962,
269  'sim' => 8764,
270  'spades' => 9824,
271  'sub' => 8834,
272  'sube' => 8838,
273  'sum' => 8721,
274  'sup' => 8835,
275  'sup1' => 185,
276  'sup2' => 178,
277  'sup3' => 179,
278  'supe' => 8839,
279  'szlig' => 223,
280  'Tau' => 932,
281  'tau' => 964,
282  'there4' => 8756,
283  'Theta' => 920,
284  'theta' => 952,
285  'thetasym' => 977,
286  'thinsp' => 8201,
287  'THORN' => 222,
288  'thorn' => 254,
289  'tilde' => 732,
290  'times' => 215,
291  'trade' => 8482,
292  'Uacute' => 218,
293  'uacute' => 250,
294  'uarr' => 8593,
295  'uArr' => 8657,
296  'Ucirc' => 219,
297  'ucirc' => 251,
298  'Ugrave' => 217,
299  'ugrave' => 249,
300  'uml' => 168,
301  'upsih' => 978,
302  'Upsilon' => 933,
303  'upsilon' => 965,
304  'Uuml' => 220,
305  'uuml' => 252,
306  'weierp' => 8472,
307  'Xi' => 926,
308  'xi' => 958,
309  'Yacute' => 221,
310  'yacute' => 253,
311  'yen' => 165,
312  'Yuml' => 376,
313  'yuml' => 255,
314  'Zeta' => 918,
315  'zeta' => 950,
316  'zwj' => 8205,
317  'zwnj' => 8204
318  );
319 
323  private static $htmlEntityAliases = array(
324  'רלמ' => 'rlm',
325  'رلم' => 'rlm',
326  );
327 
331  private static $attribsRegex;
332 
339  static function getAttribsRegex() {
340  if ( self::$attribsRegex === null ) {
341  $attribFirst = '[:A-Z_a-z0-9]';
342  $attrib = '[:A-Z_a-z-.0-9]';
343  $space = '[\x09\x0a\x0d\x20]';
344  self::$attribsRegex =
345  "/(?:^|$space)({$attribFirst}{$attrib}*)
346  ($space*=$space*
347  (?:
348  # The attribute value: quoted or alone
349  \"([^<\"]*)\"
350  | '([^<']*)'
351  | ([a-zA-Z0-9!#$%&()*,\\-.\\/:;<>?@[\\]^_`{|}~]+)
352  | (\#[0-9a-fA-F]+) # Technically wrong, but lots of
353  # colors are specified like this.
354  # We'll be normalizing it.
355  )
356  )?(?=$space|\$)/sx";
357  }
358  return self::$attribsRegex;
359  }
360 
372  public static function removeHTMLtags( $text, $processCallback = null,
373  $args = array(), $extratags = array(), $removetags = array()
374  ) {
375  global $wgUseTidy, $wgAllowMicrodataAttributes, $wgAllowImageTag;
376 
377  static $htmlpairsStatic, $htmlsingle, $htmlsingleonly, $htmlnest, $tabletags,
378  $htmllist, $listtags, $htmlsingleallowed, $htmlelementsStatic, $staticInitialised;
379 
380  // Base our staticInitialised variable off of the global config state so that if the globals
381  // are changed (like in the screwed up test system) we will re-initialise the settings.
382  $globalContext = implode( '-', compact( 'wgAllowMicrodataAttributes', 'wgAllowImageTag' ) );
383  if ( !$staticInitialised || $staticInitialised != $globalContext ) {
384 
385  $htmlpairsStatic = array( # Tags that must be closed
386  'b', 'bdi', 'del', 'i', 'ins', 'u', 'font', 'big', 'small', 'sub', 'sup', 'h1',
387  'h2', 'h3', 'h4', 'h5', 'h6', 'cite', 'code', 'em', 's',
388  'strike', 'strong', 'tt', 'var', 'div', 'center',
389  'blockquote', 'ol', 'ul', 'dl', 'table', 'caption', 'pre',
390  'ruby', 'rb', 'rp', 'rt', 'rtc', 'p', 'span', 'abbr', 'dfn',
391  'kbd', 'samp', 'data', 'time', 'mark'
392  );
393  $htmlsingle = array(
394  'br', 'wbr', 'hr', 'li', 'dt', 'dd'
395  );
396  $htmlsingleonly = array( # Elements that cannot have close tags
397  'br', 'wbr', 'hr'
398  );
399  if ( $wgAllowMicrodataAttributes ) {
400  $htmlsingle[] = $htmlsingleonly[] = 'meta';
401  $htmlsingle[] = $htmlsingleonly[] = 'link';
402  }
403  $htmlnest = array( # Tags that can be nested--??
404  'table', 'tr', 'td', 'th', 'div', 'blockquote', 'ol', 'ul',
405  'li', 'dl', 'dt', 'dd', 'font', 'big', 'small', 'sub', 'sup', 'span',
406  'var', 'kbd', 'samp', 'em', 'strong', 'q', 'ruby', 'bdo'
407  );
408  $tabletags = array( # Can only appear inside table, we will close them
409  'td', 'th', 'tr',
410  );
411  $htmllist = array( # Tags used by list
412  'ul', 'ol',
413  );
414  $listtags = array( # Tags that can appear in a list
415  'li',
416  );
417 
418  if ( $wgAllowImageTag ) {
419  $htmlsingle[] = 'img';
420  $htmlsingleonly[] = 'img';
421  }
422 
423  $htmlsingleallowed = array_unique( array_merge( $htmlsingle, $tabletags ) );
424  $htmlelementsStatic = array_unique( array_merge( $htmlsingle, $htmlpairsStatic, $htmlnest ) );
425 
426  # Convert them all to hashtables for faster lookup
427  $vars = array( 'htmlpairsStatic', 'htmlsingle', 'htmlsingleonly', 'htmlnest', 'tabletags',
428  'htmllist', 'listtags', 'htmlsingleallowed', 'htmlelementsStatic' );
429  foreach ( $vars as $var ) {
430  $$var = array_flip( $$var );
431  }
432  $staticInitialised = $globalContext;
433  }
434  # Populate $htmlpairs and $htmlelements with the $extratags and $removetags arrays
435  $extratags = array_flip( $extratags );
436  $removetags = array_flip( $removetags );
437  $htmlpairs = array_merge( $extratags, $htmlpairsStatic );
438  $htmlelements = array_diff_key( array_merge( $extratags, $htmlelementsStatic ), $removetags );
439 
440  # Remove HTML comments
441  $text = Sanitizer::removeHTMLcomments( $text );
442  $bits = explode( '<', $text );
443  $text = str_replace( '>', '&gt;', array_shift( $bits ) );
444  if ( !$wgUseTidy ) {
445  $tagstack = $tablestack = array();
446  foreach ( $bits as $x ) {
447  $regs = array();
448  # $slash: Does the current element start with a '/'?
449  # $t: Current element name
450  # $params: String between element name and >
451  # $brace: Ending '>' or '/>'
452  # $rest: Everything until the next element of $bits
453  if ( preg_match( self::ELEMENT_BITS_REGEX, $x, $regs ) ) {
454  list( /* $qbar */, $slash, $t, $params, $brace, $rest ) = $regs;
455  } else {
456  $slash = $t = $params = $brace = $rest = null;
457  }
458 
459  $badtag = false;
460  if ( isset( $htmlelements[$t = strtolower( $t )] ) ) {
461  # Check our stack
462  if ( $slash && isset( $htmlsingleonly[$t] ) ) {
463  $badtag = true;
464  } elseif ( $slash ) {
465  # Closing a tag... is it the one we just opened?
467  $ot = array_pop( $tagstack );
469 
470  if ( $ot != $t ) {
471  if ( isset( $htmlsingleallowed[$ot] ) ) {
472  # Pop all elements with an optional close tag
473  # and see if we find a match below them
474  $optstack = array();
475  array_push( $optstack, $ot );
477  $ot = array_pop( $tagstack );
479  while ( $ot != $t && isset( $htmlsingleallowed[$ot] ) ) {
480  array_push( $optstack, $ot );
482  $ot = array_pop( $tagstack );
484  }
485  if ( $t != $ot ) {
486  # No match. Push the optional elements back again
487  $badtag = true;
489  $ot = array_pop( $optstack );
491  while ( $ot ) {
492  array_push( $tagstack, $ot );
494  $ot = array_pop( $optstack );
496  }
497  }
498  } else {
500  array_push( $tagstack, $ot );
502 
503  # <li> can be nested in <ul> or <ol>, skip those cases:
504  if ( !isset( $htmllist[$ot] ) || !isset( $listtags[$t] ) ) {
505  $badtag = true;
506  }
507  }
508  } else {
509  if ( $t == 'table' ) {
510  $tagstack = array_pop( $tablestack );
511  }
512  }
513  $newparams = '';
514  } else {
515  # Keep track for later
516  if ( isset( $tabletags[$t] ) && !in_array( 'table', $tagstack ) ) {
517  $badtag = true;
518  } elseif ( in_array( $t, $tagstack ) && !isset( $htmlnest[$t] ) ) {
519  $badtag = true;
520  # Is it a self closed htmlpair ? (bug 5487)
521  } elseif ( $brace == '/>' && isset( $htmlpairs[$t] ) ) {
522  $badtag = true;
523  } elseif ( isset( $htmlsingleonly[$t] ) ) {
524  # Hack to force empty tag for unclosable elements
525  $brace = '/>';
526  } elseif ( isset( $htmlsingle[$t] ) ) {
527  # Hack to not close $htmlsingle tags
528  $brace = null;
529  # Still need to push this optionally-closed tag to
530  # the tag stack so that we can match end tags
531  # instead of marking them as bad.
532  array_push( $tagstack, $t );
533  } elseif ( isset( $tabletags[$t] ) && in_array( $t, $tagstack ) ) {
534  // New table tag but forgot to close the previous one
535  $text .= "</$t>";
536  } else {
537  if ( $t == 'table' ) {
538  array_push( $tablestack, $tagstack );
539  $tagstack = array();
540  }
541  array_push( $tagstack, $t );
542  }
543 
544  # Replace any variables or template parameters with
545  # plaintext results.
546  if ( is_callable( $processCallback ) ) {
547  call_user_func_array( $processCallback, array( &$params, $args ) );
548  }
549 
550  if ( !Sanitizer::validateTag( $params, $t ) ) {
551  $badtag = true;
552  }
553 
554  # Strip non-approved attributes from the tag
555  $newparams = Sanitizer::fixTagAttributes( $params, $t );
556  }
557  if ( !$badtag ) {
558  $rest = str_replace( '>', '&gt;', $rest );
559  $close = ( $brace == '/>' && !$slash ) ? ' /' : '';
560  $text .= "<$slash$t$newparams$close>$rest";
561  continue;
562  }
563  }
564  $text .= '&lt;' . str_replace( '>', '&gt;', $x );
565  }
566  # Close off any remaining tags
567  while ( is_array( $tagstack ) && ( $t = array_pop( $tagstack ) ) ) {
568  $text .= "</$t>\n";
569  if ( $t == 'table' ) {
570  $tagstack = array_pop( $tablestack );
571  }
572  }
573  } else {
574  # this might be possible using tidy itself
575  foreach ( $bits as $x ) {
576  preg_match( self::ELEMENT_BITS_REGEX, $x, $regs );
577 
579  list( /* $qbar */, $slash, $t, $params, $brace, $rest ) = $regs;
581 
582  $badtag = false;
583  if ( isset( $htmlelements[$t = strtolower( $t )] ) ) {
584  if ( is_callable( $processCallback ) ) {
585  call_user_func_array( $processCallback, array( &$params, $args ) );
586  }
587 
588  if ( !Sanitizer::validateTag( $params, $t ) ) {
589  $badtag = true;
590  }
591 
592  $newparams = Sanitizer::fixTagAttributes( $params, $t );
593  if ( !$badtag ) {
594  $rest = str_replace( '>', '&gt;', $rest );
595  $text .= "<$slash$t$newparams$brace$rest";
596  continue;
597  }
598  }
599  $text .= '&lt;' . str_replace( '>', '&gt;', $x );
600  }
601  }
602  return $text;
603  }
604 
614  public static function removeHTMLcomments( $text ) {
615  while ( ( $start = strpos( $text, '<!--' ) ) !== false ) {
616  $end = strpos( $text, '-->', $start + 4 );
617  if ( $end === false ) {
618  # Unterminated comment; bail out
619  break;
620  }
621 
622  $end += 3;
623 
624  # Trim space and newline if the comment is both
625  # preceded and followed by a newline
626  $spaceStart = max( $start - 1, 0 );
627  $spaceLen = $end - $spaceStart;
628  while ( substr( $text, $spaceStart, 1 ) === ' ' && $spaceStart > 0 ) {
629  $spaceStart--;
630  $spaceLen++;
631  }
632  while ( substr( $text, $spaceStart + $spaceLen, 1 ) === ' ' ) {
633  $spaceLen++;
634  }
635  if ( substr( $text, $spaceStart, 1 ) === "\n"
636  && substr( $text, $spaceStart + $spaceLen, 1 ) === "\n" ) {
637  # Remove the comment, leading and trailing
638  # spaces, and leave only one newline.
639  $text = substr_replace( $text, "\n", $spaceStart, $spaceLen + 1 );
640  } else {
641  # Remove just the comment.
642  $text = substr_replace( $text, '', $start, $end - $start );
643  }
644  }
645  return $text;
646  }
647 
660  static function validateTag( $params, $element ) {
662 
663  if ( $element == 'meta' || $element == 'link' ) {
664  if ( !isset( $params['itemprop'] ) ) {
665  // <meta> and <link> must have an itemprop="" otherwise they are not valid or safe in content
666  return false;
667  }
668  if ( $element == 'meta' && !isset( $params['content'] ) ) {
669  // <meta> must have a content="" for the itemprop
670  return false;
671  }
672  if ( $element == 'link' && !isset( $params['href'] ) ) {
673  // <link> must have an associated href=""
674  return false;
675  }
676  }
677 
678  return true;
679  }
680 
696  static function validateTagAttributes( $attribs, $element ) {
698  Sanitizer::attributeWhitelist( $element ) );
699  }
700 
716  static function validateAttributes( $attribs, $whitelist ) {
717  global $wgAllowRdfaAttributes, $wgAllowMicrodataAttributes;
718 
719  $whitelist = array_flip( $whitelist );
720  $hrefExp = '/^(' . wfUrlProtocols() . ')[^\s]+$/';
721 
722  $out = array();
723  foreach ( $attribs as $attribute => $value ) {
724  #allow XML namespace declaration if RDFa is enabled
725  if ( $wgAllowRdfaAttributes && preg_match( self::XMLNS_ATTRIBUTE_PATTERN, $attribute ) ) {
726  if ( !preg_match( self::EVIL_URI_PATTERN, $value ) ) {
727  $out[$attribute] = $value;
728  }
729 
730  continue;
731  }
732 
733  # Allow any attribute beginning with "data-"
734  if ( !preg_match( '/^data-/i', $attribute ) && !isset( $whitelist[$attribute] ) ) {
735  continue;
736  }
737 
738  # Strip javascript "expression" from stylesheets.
739  # http://msdn.microsoft.com/workshop/author/dhtml/overview/recalc.asp
740  if ( $attribute == 'style' ) {
742  }
743 
744  if ( $attribute === 'id' ) {
745  $value = Sanitizer::escapeId( $value, 'noninitial' );
746  }
747 
748  # WAI-ARIA
749  # http://www.w3.org/TR/wai-aria/
750  # http://www.whatwg.org/html/elements.html#wai-aria
751  # For now we only support role="presentation" until we work out what roles should be
752  # usable by content and we ensure that our code explicitly rejects patterns that
753  # violate HTML5's ARIA restrictions.
754  if ( $attribute === 'role' && $value !== 'presentation' ) {
755  continue;
756  }
757 
758  // RDFa and microdata properties allow URLs, URIs and/or CURIs.
759  // Check them for sanity.
760  if ( $attribute === 'rel' || $attribute === 'rev'
761  # RDFa
762  || $attribute === 'about' || $attribute === 'property'
763  || $attribute === 'resource' || $attribute === 'datatype'
764  || $attribute === 'typeof'
765  # HTML5 microdata
766  || $attribute === 'itemid' || $attribute === 'itemprop'
767  || $attribute === 'itemref' || $attribute === 'itemscope'
768  || $attribute === 'itemtype'
769  ) {
770  //Paranoia. Allow "simple" values but suppress javascript
771  if ( preg_match( self::EVIL_URI_PATTERN, $value ) ) {
772  continue;
773  }
774  }
775 
776  # NOTE: even though elements using href/src are not allowed directly, supply
777  # validation code that can be used by tag hook handlers, etc
778  if ( $attribute === 'href' || $attribute === 'src' ) {
779  if ( !preg_match( $hrefExp, $value ) ) {
780  continue; //drop any href or src attributes not using an allowed protocol.
781  // NOTE: this also drops all relative URLs
782  }
783  }
784 
785  // If this attribute was previously set, override it.
786  // Output should only have one attribute of each name.
787  $out[$attribute] = $value;
788  }
789 
790  if ( $wgAllowMicrodataAttributes ) {
791  # itemtype, itemid, itemref don't make sense without itemscope
792  if ( !array_key_exists( 'itemscope', $out ) ) {
793  unset( $out['itemtype'] );
794  unset( $out['itemid'] );
795  unset( $out['itemref'] );
796  }
797  # TODO: Strip itemprop if we aren't descendants of an itemscope or pointed to by an itemref.
798  }
799  return $out;
800  }
801 
812  static function mergeAttributes( $a, $b ) {
813  $out = array_merge( $a, $b );
814  if ( isset( $a['class'] ) && isset( $b['class'] )
815  && is_string( $a['class'] ) && is_string( $b['class'] )
816  && $a['class'] !== $b['class']
817  ) {
818  $classes = preg_split( '/\s+/', "{$a['class']} {$b['class']}",
819  -1, PREG_SPLIT_NO_EMPTY );
820  $out['class'] = implode( ' ', array_unique( $classes ) );
821  }
822  return $out;
823  }
824 
834  public static function normalizeCss( $value ) {
835 
836  // Decode character references like &#123;
838 
839  // Decode escape sequences and line continuation
840  // See the grammar in the CSS 2 spec, appendix D.
841  // This has to be done AFTER decoding character references.
842  // This means it isn't possible for this function to return
843  // unsanitized escape sequences. It is possible to manufacture
844  // input that contains character references that decode to
845  // escape sequences that decode to character references, but
846  // it's OK for the return value to contain character references
847  // because the caller is supposed to escape those anyway.
848  static $decodeRegex;
849  if ( !$decodeRegex ) {
850  $space = '[\\x20\\t\\r\\n\\f]';
851  $nl = '(?:\\n|\\r\\n|\\r|\\f)';
852  $backslash = '\\\\';
853  $decodeRegex = "/ $backslash
854  (?:
855  ($nl) | # 1. Line continuation
856  ([0-9A-Fa-f]{1,6})$space? | # 2. character number
857  (.) | # 3. backslash cancelling special meaning
858  () | # 4. backslash at end of string
859  )/xu";
860  }
861  $value = preg_replace_callback( $decodeRegex,
862  array( __CLASS__, 'cssDecodeCallback' ), $value );
863 
864  // Normalize Halfwidth and Fullwidth Unicode block that IE6 might treat as ascii
865  $value = preg_replace_callback(
866  '/[!-[]-z]/u', // U+FF01 to U+FF5A, excluding U+FF3C (bug 58088)
867  function ( $matches ) {
868  $cp = utf8ToCodepoint( $matches[0] );
869  if ( $cp === false ) {
870  return '';
871  }
872  return chr( $cp - 65248 ); // ASCII range \x21-\x7A
873  },
874  $value
875  );
876 
877  // Convert more characters IE6 might treat as ascii
878  // U+0280, U+0274, U+207F, U+029F, U+026A, U+207D, U+208D
879  $value = str_replace(
880  array( 'ʀ', 'ɴ', 'ⁿ', 'ʟ', 'ɪ', '⁽', '₍' ),
881  array( 'r', 'n', 'n', 'l', 'i', '(', '(' ),
882  $value
883  );
884 
885  // Let the value through if it's nothing but a single comment, to
886  // allow other functions which may reject it to pass some error
887  // message through.
888  if ( !preg_match( '! ^ \s* /\* [^*\\/]* \*/ \s* $ !x', $value ) ) {
889  // Remove any comments; IE gets token splitting wrong
890  // This must be done AFTER decoding character references and
891  // escape sequences, because those steps can introduce comments
892  // This step cannot introduce character references or escape
893  // sequences, because it replaces comments with spaces rather
894  // than removing them completely.
895  $value = StringUtils::delimiterReplace( '/*', '*/', ' ', $value );
896 
897  // Remove anything after a comment-start token, to guard against
898  // incorrect client implementations.
899  $commentPos = strpos( $value, '/*' );
900  if ( $commentPos !== false ) {
901  $value = substr( $value, 0, $commentPos );
902  }
903  }
904 
905  // S followed by repeat, iteration, or prolonged sound marks,
906  // which IE will treat as "ss"
907  $value = preg_replace(
908  '/s(?:
909  \xE3\x80\xB1 | # U+3031
910  \xE3\x82\x9D | # U+309D
911  \xE3\x83\xBC | # U+30FC
912  \xE3\x83\xBD | # U+30FD
913  \xEF\xB9\xBC | # U+FE7C
914  \xEF\xB9\xBD | # U+FE7D
915  \xEF\xBD\xB0 # U+FF70
916  )/ix',
917  'ss',
918  $value
919  );
920 
921  return $value;
922  }
923 
924 
943  static function checkCss( $value ) {
944  $value = self::normalizeCss( $value );
945 
946  // Reject problematic keywords and control characters
947  if ( preg_match( '/[\000-\010\013\016-\037\177]/', $value ) ) {
948  return '/* invalid control char */';
949  } elseif ( preg_match(
950  '! expression
951  | filter\s*:
952  | accelerator\s*:
953  | -o-link\s*:
954  | -o-link-source\s*:
955  | -o-replace\s*:
956  | url\s*\(
957  | image\s*\(
958  | image-set\s*\(
959  !ix', $value ) ) {
960  return '/* insecure input */';
961  }
962  return $value;
963  }
964 
969  static function cssDecodeCallback( $matches ) {
970  if ( $matches[1] !== '' ) {
971  // Line continuation
972  return '';
973  } elseif ( $matches[2] !== '' ) {
974  $char = codepointToUtf8( hexdec( $matches[2] ) );
975  } elseif ( $matches[3] !== '' ) {
976  $char = $matches[3];
977  } else {
978  $char = '\\';
979  }
980  if ( $char == "\n" || $char == '"' || $char == "'" || $char == '\\' ) {
981  // These characters need to be escaped in strings
982  // Clean up the escape sequence to avoid parsing errors by clients
983  return '\\' . dechex( ord( $char ) ) . ' ';
984  } else {
985  // Decode unnecessary escape
986  return $char;
987  }
988  }
989 
1009  static function fixTagAttributes( $text, $element ) {
1010  if ( trim( $text ) == '' ) {
1011  return '';
1012  }
1013 
1014  $decoded = Sanitizer::decodeTagAttributes( $text );
1015  $stripped = Sanitizer::validateTagAttributes( $decoded, $element );
1016 
1017  return Sanitizer::safeEncodeTagAttributes( $stripped );
1018  }
1019 
1025  static function encodeAttribute( $text ) {
1026  $encValue = htmlspecialchars( $text, ENT_QUOTES );
1027 
1028  // Whitespace is normalized during attribute decoding,
1029  // so if we've been passed non-spaces we must encode them
1030  // ahead of time or they won't be preserved.
1031  $encValue = strtr( $encValue, array(
1032  "\n" => '&#10;',
1033  "\r" => '&#13;',
1034  "\t" => '&#9;',
1035  ) );
1036 
1037  return $encValue;
1038  }
1039 
1046  static function safeEncodeAttribute( $text ) {
1047  $encValue = Sanitizer::encodeAttribute( $text );
1048 
1049  # Templates and links may be expanded in later parsing,
1050  # creating invalid or dangerous output. Suppress this.
1051  $encValue = strtr( $encValue, array(
1052  '<' => '&lt;', // This should never happen,
1053  '>' => '&gt;', // we've received invalid input
1054  '"' => '&quot;', // which should have been escaped.
1055  '{' => '&#123;',
1056  '[' => '&#91;',
1057  "''" => '&#39;&#39;',
1058  'ISBN' => '&#73;SBN',
1059  'RFC' => '&#82;FC',
1060  'PMID' => '&#80;MID',
1061  '|' => '&#124;',
1062  '__' => '&#95;_',
1063  ) );
1064 
1065  # Stupid hack
1066  $encValue = preg_replace_callback(
1067  '/((?i)' . wfUrlProtocols() . ')/',
1068  array( 'Sanitizer', 'armorLinksCallback' ),
1069  $encValue );
1070  return $encValue;
1071  }
1072 
1104  static function escapeId( $id, $options = array() ) {
1105  global $wgExperimentalHtmlIds;
1106  $options = (array)$options;
1107 
1108  $id = Sanitizer::decodeCharReferences( $id );
1109 
1110  if ( $wgExperimentalHtmlIds && !in_array( 'legacy', $options ) ) {
1111  $id = preg_replace( '/[ \t\n\r\f_\'"&#%]+/', '_', $id );
1112  $id = trim( $id, '_' );
1113  if ( $id === '' ) {
1114  // Must have been all whitespace to start with.
1115  return '_';
1116  } else {
1117  return $id;
1118  }
1119  }
1120 
1121  // HTML4-style escaping
1122  static $replace = array(
1123  '%3A' => ':',
1124  '%' => '.'
1125  );
1126 
1127  $id = urlencode( strtr( $id, ' ', '_' ) );
1128  $id = str_replace( array_keys( $replace ), array_values( $replace ), $id );
1129 
1130  if ( !preg_match( '/^[a-zA-Z]/', $id ) && !in_array( 'noninitial', $options ) ) {
1131  // Initial character must be a letter!
1132  $id = "x$id";
1133  }
1134  return $id;
1135  }
1136 
1148  static function escapeClass( $class ) {
1149  // Convert ugly stuff to underscores and kill underscores in ugly places
1150  return rtrim( preg_replace(
1151  array( '/(^[0-9\\-])|[\\x00-\\x20!"#$%&\'()*+,.\\/:;<=>?@[\\]^`{|}~]|\\xC2\\xA0/', '/_+/' ),
1152  '_',
1153  $class ), '_' );
1154  }
1155 
1163  static function escapeHtmlAllowEntities( $html ) {
1165  # It seems wise to escape ' as well as ", as a matter of course. Can't
1166  # hurt.
1167  $html = htmlspecialchars( $html, ENT_QUOTES );
1168  return $html;
1169  }
1170 
1176  private static function armorLinksCallback( $matches ) {
1177  return str_replace( ':', '&#58;', $matches[1] );
1178  }
1179 
1188  public static function decodeTagAttributes( $text ) {
1189  if ( trim( $text ) == '' ) {
1190  return array();
1191  }
1192 
1193  $attribs = array();
1194  $pairs = array();
1195  if ( !preg_match_all(
1196  self::getAttribsRegex(),
1197  $text,
1198  $pairs,
1199  PREG_SET_ORDER ) ) {
1200  return $attribs;
1201  }
1202 
1203  foreach ( $pairs as $set ) {
1204  $attribute = strtolower( $set[1] );
1206 
1207  // Normalize whitespace
1208  $value = preg_replace( '/[\t\r\n ]+/', ' ', $value );
1209  $value = trim( $value );
1210 
1211  // Decode character references
1213  }
1214  return $attribs;
1215  }
1216 
1224  public static function safeEncodeTagAttributes( $assoc_array ) {
1225  $attribs = array();
1226  foreach ( $assoc_array as $attribute => $value ) {
1227  $encAttribute = htmlspecialchars( $attribute );
1228  $encValue = Sanitizer::safeEncodeAttribute( $value );
1229 
1230  $attribs[] = "$encAttribute=\"$encValue\"";
1231  }
1232  return count( $attribs ) ? ' ' . implode( ' ', $attribs ) : '';
1233  }
1234 
1243  private static function getTagAttributeCallback( $set ) {
1244  if ( isset( $set[6] ) ) {
1245  # Illegal #XXXXXX color with no quotes.
1246  return $set[6];
1247  } elseif ( isset( $set[5] ) ) {
1248  # No quotes.
1249  return $set[5];
1250  } elseif ( isset( $set[4] ) ) {
1251  # Single-quoted
1252  return $set[4];
1253  } elseif ( isset( $set[3] ) ) {
1254  # Double-quoted
1255  return $set[3];
1256  } elseif ( !isset( $set[2] ) ) {
1257  # In XHTML, attributes must have a value.
1258  # For 'reduced' form, return explicitly the attribute name here.
1259  return $set[1];
1260  } else {
1261  throw new MWException( "Tag conditions not met. This should never happen and is a bug." );
1262  }
1263  }
1264 
1269  private static function normalizeWhitespace( $text ) {
1270  return preg_replace(
1271  '/\r\n|[\x20\x0d\x0a\x09]/',
1272  ' ',
1273  $text );
1274  }
1275 
1285  return trim( preg_replace( '/[ _]+/', ' ', $section ) );
1286  }
1287 
1303  static function normalizeCharReferences( $text ) {
1304  return preg_replace_callback(
1305  self::CHAR_REFS_REGEX,
1306  array( 'Sanitizer', 'normalizeCharReferencesCallback' ),
1307  $text );
1308  }
1309 
1315  $ret = null;
1316  if ( $matches[1] != '' ) {
1318  } elseif ( $matches[2] != '' ) {
1320  } elseif ( $matches[3] != '' ) {
1322  }
1323  if ( is_null( $ret ) ) {
1324  return htmlspecialchars( $matches[0] );
1325  } else {
1326  return $ret;
1327  }
1328  }
1329 
1340  static function normalizeEntity( $name ) {
1341  if ( isset( self::$htmlEntityAliases[$name] ) ) {
1342  return '&' . self::$htmlEntityAliases[$name] . ';';
1343  } elseif ( in_array( $name, array( 'lt', 'gt', 'amp', 'quot' ) ) ) {
1344  return "&$name;";
1345  } elseif ( isset( self::$htmlEntities[$name] ) ) {
1346  return '&#' . self::$htmlEntities[$name] . ';';
1347  } else {
1348  return "&amp;$name;";
1349  }
1350  }
1351 
1356  static function decCharReference( $codepoint ) {
1357  $point = intval( $codepoint );
1358  if ( Sanitizer::validateCodepoint( $point ) ) {
1359  return sprintf( '&#%d;', $point );
1360  } else {
1361  return null;
1362  }
1363  }
1364 
1369  static function hexCharReference( $codepoint ) {
1370  $point = hexdec( $codepoint );
1371  if ( Sanitizer::validateCodepoint( $point ) ) {
1372  return sprintf( '&#x%x;', $point );
1373  } else {
1374  return null;
1375  }
1376  }
1377 
1383  private static function validateCodepoint( $codepoint ) {
1384  return $codepoint == 0x09
1385  || $codepoint == 0x0a
1386  || $codepoint == 0x0d
1387  || ( $codepoint >= 0x20 && $codepoint <= 0xd7ff )
1388  || ( $codepoint >= 0xe000 && $codepoint <= 0xfffd )
1389  || ( $codepoint >= 0x10000 && $codepoint <= 0x10ffff );
1390  }
1391 
1399  public static function decodeCharReferences( $text ) {
1400  return preg_replace_callback(
1401  self::CHAR_REFS_REGEX,
1402  array( 'Sanitizer', 'decodeCharReferencesCallback' ),
1403  $text );
1404  }
1405 
1416  public static function decodeCharReferencesAndNormalize( $text ) {
1418  $text = preg_replace_callback(
1419  self::CHAR_REFS_REGEX,
1420  array( 'Sanitizer', 'decodeCharReferencesCallback' ),
1421  $text, /* limit */ -1, $count );
1422 
1423  if ( $count ) {
1424  return $wgContLang->normalize( $text );
1425  } else {
1426  return $text;
1427  }
1428  }
1429 
1435  if ( $matches[1] != '' ) {
1436  return Sanitizer::decodeEntity( $matches[1] );
1437  } elseif ( $matches[2] != '' ) {
1438  return Sanitizer::decodeChar( intval( $matches[2] ) );
1439  } elseif ( $matches[3] != '' ) {
1440  return Sanitizer::decodeChar( hexdec( $matches[3] ) );
1441  }
1442  # Last case should be an ampersand by itself
1443  return $matches[0];
1444  }
1445 
1453  static function decodeChar( $codepoint ) {
1454  if ( Sanitizer::validateCodepoint( $codepoint ) ) {
1455  return codepointToUtf8( $codepoint );
1456  } else {
1457  return UTF8_REPLACEMENT;
1458  }
1459  }
1460 
1469  static function decodeEntity( $name ) {
1470  if ( isset( self::$htmlEntityAliases[$name] ) ) {
1471  $name = self::$htmlEntityAliases[$name];
1472  }
1473  if ( isset( self::$htmlEntities[$name] ) ) {
1474  return codepointToUtf8( self::$htmlEntities[$name] );
1475  } else {
1476  return "&$name;";
1477  }
1478  }
1479 
1486  static function attributeWhitelist( $element ) {
1488  return isset( $list[$element] )
1489  ? $list[$element]
1490  : array();
1491  }
1492 
1498  static function setupAttributeWhitelist() {
1499  global $wgAllowRdfaAttributes, $wgAllowMicrodataAttributes;
1500  static $whitelist, $staticInitialised;
1501 
1502  $globalContext = implode( '-', compact( 'wgAllowRdfaAttributes', 'wgAllowMicrodataAttributes' ) );
1503 
1504  if ( $whitelist !== null && $staticInitialised == $globalContext ) {
1505  return $whitelist;
1506  }
1507 
1508  $common = array(
1509  # HTML
1510  'id',
1511  'class',
1512  'style',
1513  'lang',
1514  'dir',
1515  'title',
1516 
1517  # WAI-ARIA
1518  'role',
1519  );
1520 
1521  if ( $wgAllowRdfaAttributes ) {
1522  # RDFa attributes as specified in section 9 of
1523  # http://www.w3.org/TR/2008/REC-rdfa-syntax-20081014
1524  $common = array_merge( $common, array(
1525  'about', 'property', 'resource', 'datatype', 'typeof',
1526  ) );
1527  }
1528 
1529  if ( $wgAllowMicrodataAttributes ) {
1530  # add HTML5 microdata tags as specified by
1531  # http://www.whatwg.org/html/microdata.html#the-microdata-model
1532  $common = array_merge( $common, array(
1533  'itemid', 'itemprop', 'itemref', 'itemscope', 'itemtype'
1534  ) );
1535  }
1536 
1537  $block = array_merge( $common, array( 'align' ) );
1538  $tablealign = array( 'align', 'valign' );
1539  $tablecell = array(
1540  'abbr',
1541  'axis',
1542  'headers',
1543  'scope',
1544  'rowspan',
1545  'colspan',
1546  'nowrap', # deprecated
1547  'width', # deprecated
1548  'height', # deprecated
1549  'bgcolor', # deprecated
1550  );
1551 
1552  # Numbers refer to sections in HTML 4.01 standard describing the element.
1553  # See: http://www.w3.org/TR/html4/
1554  $whitelist = array(
1555  # 7.5.4
1556  'div' => $block,
1557  'center' => $common, # deprecated
1558  'span' => $common,
1559 
1560  # 7.5.5
1561  'h1' => $block,
1562  'h2' => $block,
1563  'h3' => $block,
1564  'h4' => $block,
1565  'h5' => $block,
1566  'h6' => $block,
1567 
1568  # 7.5.6
1569  # address
1570 
1571  # 8.2.4
1572  'bdo' => $common,
1573 
1574  # 9.2.1
1575  'em' => $common,
1576  'strong' => $common,
1577  'cite' => $common,
1578  'dfn' => $common,
1579  'code' => $common,
1580  'samp' => $common,
1581  'kbd' => $common,
1582  'var' => $common,
1583  'abbr' => $common,
1584  # acronym
1585 
1586  # 9.2.2
1587  'blockquote' => array_merge( $common, array( 'cite' ) ),
1588  'q' => array_merge( $common, array( 'cite' ) ),
1589 
1590  # 9.2.3
1591  'sub' => $common,
1592  'sup' => $common,
1593 
1594  # 9.3.1
1595  'p' => $block,
1596 
1597  # 9.3.2
1598  'br' => array_merge( $common, array( 'clear' ) ),
1599 
1600  # http://www.whatwg.org/html/text-level-semantics.html#the-wbr-element
1601  'wbr' => $common,
1602 
1603  # 9.3.4
1604  'pre' => array_merge( $common, array( 'width' ) ),
1605 
1606  # 9.4
1607  'ins' => array_merge( $common, array( 'cite', 'datetime' ) ),
1608  'del' => array_merge( $common, array( 'cite', 'datetime' ) ),
1609 
1610  # 10.2
1611  'ul' => array_merge( $common, array( 'type' ) ),
1612  'ol' => array_merge( $common, array( 'type', 'start' ) ),
1613  'li' => array_merge( $common, array( 'type', 'value' ) ),
1614 
1615  # 10.3
1616  'dl' => $common,
1617  'dd' => $common,
1618  'dt' => $common,
1619 
1620  # 11.2.1
1621  'table' => array_merge( $common,
1622  array( 'summary', 'width', 'border', 'frame',
1623  'rules', 'cellspacing', 'cellpadding',
1624  'align', 'bgcolor',
1625  ) ),
1626 
1627  # 11.2.2
1628  'caption' => $block,
1629 
1630  # 11.2.3
1631  'thead' => $common,
1632  'tfoot' => $common,
1633  'tbody' => $common,
1634 
1635  # 11.2.4
1636  'colgroup' => array_merge( $common, array( 'span' ) ),
1637  'col' => array_merge( $common, array( 'span' ) ),
1638 
1639  # 11.2.5
1640  'tr' => array_merge( $common, array( 'bgcolor' ), $tablealign ),
1641 
1642  # 11.2.6
1643  'td' => array_merge( $common, $tablecell, $tablealign ),
1644  'th' => array_merge( $common, $tablecell, $tablealign ),
1645 
1646  # 12.2
1647  # NOTE: <a> is not allowed directly, but the attrib
1648  # whitelist is used from the Parser object
1649  'a' => array_merge( $common, array( 'href', 'rel', 'rev' ) ), # rel/rev esp. for RDFa
1650 
1651  # 13.2
1652  # Not usually allowed, but may be used for extension-style hooks
1653  # such as <math> when it is rasterized, or if $wgAllowImageTag is
1654  # true
1655  'img' => array_merge( $common, array( 'alt', 'src', 'width', 'height' ) ),
1656 
1657  # 15.2.1
1658  'tt' => $common,
1659  'b' => $common,
1660  'i' => $common,
1661  'big' => $common,
1662  'small' => $common,
1663  'strike' => $common,
1664  's' => $common,
1665  'u' => $common,
1666 
1667  # 15.2.2
1668  'font' => array_merge( $common, array( 'size', 'color', 'face' ) ),
1669  # basefont
1670 
1671  # 15.3
1672  'hr' => array_merge( $common, array( 'width' ) ),
1673 
1674  # HTML Ruby annotation text module, simple ruby only.
1675  # http://www.whatwg.org/html/text-level-semantics.html#the-ruby-element
1676  'ruby' => $common,
1677  # rbc
1678  'rb' => $common,
1679  'rp' => $common,
1680  'rt' => $common, #array_merge( $common, array( 'rbspan' ) ),
1681  'rtc' => $common,
1682 
1683  # MathML root element, where used for extensions
1684  # 'title' may not be 100% valid here; it's XHTML
1685  # http://www.w3.org/TR/REC-MathML/
1686  'math' => array( 'class', 'style', 'id', 'title' ),
1687 
1688  # HTML 5 section 4.6
1689  'bdi' => $common,
1690 
1691  # HTML5 elements, defined by:
1692  # http://www.whatwg.org/html/
1693  'data' => array_merge( $common, array( 'value' ) ),
1694  'time' => array_merge( $common, array( 'datetime' ) ),
1695  'mark' => $common,
1696 
1697  // meta and link are only permitted by removeHTMLtags when Microdata
1698  // is enabled so we don't bother adding a conditional to hide these
1699  // Also meta and link are only valid in WikiText as Microdata elements
1700  // (ie: validateTag rejects tags missing the attributes needed for Microdata)
1701  // So we don't bother including $common attributes that have no purpose.
1702  'meta' => array( 'itemprop', 'content' ),
1703  'link' => array( 'itemprop', 'href' ),
1704  );
1705 
1706  $staticInitialised = $globalContext;
1707 
1708  return $whitelist;
1709  }
1710 
1721  static function stripAllTags( $text ) {
1722  # Actual <tags>
1723  $text = StringUtils::delimiterReplace( '<', '>', '', $text );
1724 
1725  # Normalize &entities and whitespace
1726  $text = self::decodeCharReferences( $text );
1727  $text = self::normalizeWhitespace( $text );
1728 
1729  return $text;
1730  }
1731 
1741  static function hackDocType() {
1742  $out = "<!DOCTYPE html [\n";
1743  foreach ( self::$htmlEntities as $entity => $codepoint ) {
1744  $out .= "<!ENTITY $entity \"&#$codepoint;\">";
1745  }
1746  $out .= "]>\n";
1747  return $out;
1748  }
1749 
1754  static function cleanUrl( $url ) {
1755  # Normalize any HTML entities in input. They will be
1756  # re-escaped by makeExternalLink().
1757  $url = Sanitizer::decodeCharReferences( $url );
1758 
1759  # Escape any control characters introduced by the above step
1760  $url = preg_replace_callback( '/[\][<>"\\x00-\\x20\\x7F\|]/',
1761  array( __CLASS__, 'cleanUrlCallback' ), $url );
1762 
1763  # Validate hostname portion
1764  $matches = array();
1765  if ( preg_match( '!^([^:]+:)(//[^/]+)?(.*)$!iD', $url, $matches ) ) {
1766  list( /* $whole */, $protocol, $host, $rest ) = $matches;
1767 
1768  // Characters that will be ignored in IDNs.
1769  // http://tools.ietf.org/html/3454#section-3.1
1770  // Strip them before further processing so blacklists and such work.
1771  $strip = "/
1772  \\s| # general whitespace
1773  \xc2\xad| # 00ad SOFT HYPHEN
1774  \xe1\xa0\x86| # 1806 MONGOLIAN TODO SOFT HYPHEN
1775  \xe2\x80\x8b| # 200b ZERO WIDTH SPACE
1776  \xe2\x81\xa0| # 2060 WORD JOINER
1777  \xef\xbb\xbf| # feff ZERO WIDTH NO-BREAK SPACE
1778  \xcd\x8f| # 034f COMBINING GRAPHEME JOINER
1779  \xe1\xa0\x8b| # 180b MONGOLIAN FREE VARIATION SELECTOR ONE
1780  \xe1\xa0\x8c| # 180c MONGOLIAN FREE VARIATION SELECTOR TWO
1781  \xe1\xa0\x8d| # 180d MONGOLIAN FREE VARIATION SELECTOR THREE
1782  \xe2\x80\x8c| # 200c ZERO WIDTH NON-JOINER
1783  \xe2\x80\x8d| # 200d ZERO WIDTH JOINER
1784  [\xef\xb8\x80-\xef\xb8\x8f] # fe00-fe0f VARIATION SELECTOR-1-16
1785  /xuD";
1786 
1787  $host = preg_replace( $strip, '', $host );
1788 
1789  // @todo FIXME: Validate hostnames here
1790 
1791  return $protocol . $host . $rest;
1792  } else {
1793  return $url;
1794  }
1795  }
1796 
1801  static function cleanUrlCallback( $matches ) {
1802  return urlencode( $matches[0] );
1803  }
1804 
1833  public static function validateEmail( $addr ) {
1834  $result = null;
1835  if ( !Hooks::run( 'isValidEmailAddr', array( $addr, &$result ) ) ) {
1836  return $result;
1837  }
1838 
1839  // Please note strings below are enclosed in brackets [], this make the
1840  // hyphen "-" a range indicator. Hence it is double backslashed below.
1841  // See bug 26948
1842  $rfc5322_atext = "a-z0-9!#$%&'*+\\-\/=?^_`{|}~";
1843  $rfc1034_ldh_str = "a-z0-9\\-";
1844 
1845  $html5_email_regexp = "/
1846  ^ # start of string
1847  [$rfc5322_atext\\.]+ # user part which is liberal :p
1848  @ # 'apostrophe'
1849  [$rfc1034_ldh_str]+ # First domain part
1850  (\\.[$rfc1034_ldh_str]+)* # Following part prefixed with a dot
1851  $ # End of string
1852  /ix"; // case Insensitive, eXtended
1853 
1854  return (bool)preg_match( $html5_email_regexp, $addr );
1855  }
1856 }
utf8ToCodepoint($char)
Determine the Unicode codepoint of a single-character UTF-8 sequence.
static decCharReference($codepoint)
Definition: Sanitizer.php:1356
null means default in associative array with keys and values unescaped Should be merged with default with a value of false meaning to suppress the attribute in associative array with keys and values unescaped noclasses just before the function returns a value If you return an< a > element with HTML attributes $attribs and contents $html will be returned If you return $ret will be returned and may include noclasses & $html
Definition: hooks.txt:1694
deferred txt A few of the database updates required by various functions here can be deferred until after the result page is displayed to the user For updating the view updating the linked to tables after a etc PHP does not yet have any way to tell the server to actually return and disconnect while still running these but it might have such a feature in the future We handle these by creating a deferred update object and putting those objects on a global list
Definition: deferred.txt:11
static decodeTagAttributes($text)
Return an associative array of attribute names and values from a partial tag string.
Definition: Sanitizer.php:1188
static normalizeEntity($name)
If the named entity is defined in the HTML 4.0/XHTML 1.0 DTD, return the equivalent numeric entity re...
Definition: Sanitizer.php:1340
This document is intended to provide useful advice for parties seeking to redistribute MediaWiki to end users It s targeted particularly at maintainers for Linux since it s been observed that distribution packages of MediaWiki often break We ve consistently had to recommend that users seeking support use official tarballs instead of their distribution s and this often solves whatever problem the user is having It would be nice if this could such and we might be restricted by PHP settings such as safe mode or open_basedir We cannot assume that the software even has read access anywhere useful Many shared hosts run all users web applications under the same so they can t rely on Unix and must forbid reads to even standard directories like tmp lest users read each others files We cannot assume that the user has the ability to install or run any programs not written as web accessible PHP scripts Since anything that works on cheap shared hosting will work if you have shell or root access MediaWiki s design is based around catering to the lowest common denominator Although we support higher end setups as the way many things work by default is tailored toward shared hosting These defaults are unconventional from the point of view of and they certainly aren t ideal for someone who s installing MediaWiki as root
or
false for read/write
static safeEncodeTagAttributes($assoc_array)
Build a partial tag string from an associative array of attribute names and values as returned by dec...
Definition: Sanitizer.php:1224
static normalizeCharReferencesCallback($matches)
Definition: Sanitizer.php:1314
Allows to change the fields on the form that will be generated $name
Definition: hooks.txt:322
static removeHTMLtags($text, $processCallback=null, $args=array(), $extratags=array(), $removetags=array())
Cleans up HTML, removes dangerous tags and attributes, and removes HTML comments. ...
Definition: Sanitizer.php:372
static setupAttributeWhitelist()
Foreach array key (an allowed HTML element), return an array of allowed attributes.
Definition: Sanitizer.php:1498
null means default in associative array with keys and values unescaped Should be merged with default with a value of false meaning to suppress the attribute in associative array with keys and values unescaped noclasses & $ret
Definition: hooks.txt:1694
=Architecture==Two class hierarchies are used to provide the functionality associated with the different content models:*Content interface(and AbstractContent base class) define functionality that acts on the concrete content of a page, and *ContentHandler base class provides functionality specific to a content model, but not acting on concrete content.The most important function of ContentHandler is to act as a factory for the appropriate implementation of Content.These Content objects are to be used by MediaWiki everywhere, instead of passing page content around as text.All manipulation and analysis of page content must be done via the appropriate methods of the Content object.For each content model, a subclass of ContentHandler has to be registered with $wgContentHandlers.The ContentHandler object for a given content model can be obtained using ContentHandler::getForModelID($id).Also Title, WikiPage and Revision now have getContentHandler() methods for convenience.ContentHandler objects are singletons that provide functionality specific to the content type, but not directly acting on the content of some page.ContentHandler::makeEmptyContent() and ContentHandler::unserializeContent() can be used to create a Content object of the appropriate type.However, it is recommended to instead use WikiPage::getContent() resp.Revision::getContent() to get a page's content as a Content object.These two methods should be the ONLY way in which page content is accessed.Another important function of ContentHandler objects is to define custom action handlers for a content model, see ContentHandler::getActionOverrides().This is similar to what WikiPage::getActionOverrides() was already doing.==Serialization==With the ContentHandler facility, page content no longer has to be text based.Objects implementing the Content interface are used to represent and handle the content internally.For storage and data exchange, each content model supports at least one serialization format via ContentHandler::serializeContent($content).The list of supported formats for a given content model can be accessed using ContentHandler::getSupportedFormats().Content serialization formats are identified using MIME type like strings.The following formats are built in:*text/x-wiki-wikitext *text/javascript-for js pages *text/css-for css pages *text/plain-for future use, e.g.with plain text messages.*text/html-for future use, e.g.with plain html messages.*application/vnd.php.serialized-for future use with the api and for extensions *application/json-for future use with the api, and for use by extensions *application/xml-for future use with the api, and for use by extensions In PHP, use the corresponding CONTENT_FORMAT_XXX constant.Note that when using the API to access page content, especially action=edit, action=parse and action=query &prop=revisions, the model and format of the content should always be handled explicitly.Without that information, interpretation of the provided content is not reliable.The same applies to XML dumps generated via maintenance/dumpBackup.php or Special:Export.Also note that the API will provide encapsulated, serialized content-so if the API was called with format=json, and contentformat is also json(or rather, application/json), the page content is represented as a string containing an escaped json structure.Extensions that use JSON to serialize some types of page content may provide specialized API modules that allow access to that content in a more natural form.==Compatibility==The ContentHandler facility is introduced in a way that should allow all existing code to keep functioning at least for pages that contain wikitext or other text based content.However, a number of functions and hooks have been deprecated in favor of new versions that are aware of the page's content model, and will now generate warnings when used.Most importantly, the following functions have been deprecated:*Revisions::getText() and Revisions::getRawText() is deprecated in favor Revisions::getContent()*WikiPage::getText() is deprecated in favor WikiPage::getContent() Also, the old Article::getContent()(which returns text) is superceded by Article::getContentObject().However, both methods should be avoided since they do not provide clean access to the page's actual content.For instance, they may return a system message for non-existing pages.Use WikiPage::getContent() instead.Code that relies on a textual representation of the page content should eventually be rewritten.However, ContentHandler::getContentText() provides a stop-gap that can be used to get text for a page.Its behavior is controlled by $wgContentHandlerTextFallback it
static decodeCharReferencesCallback($matches)
Definition: Sanitizer.php:1434
static cssDecodeCallback($matches)
Definition: Sanitizer.php:969
$value
skin txt MediaWiki includes four core it has been set as the default in MediaWiki since the replacing Monobook it had been the default skin since before being replaced by Vector largely rewritten in while keeping its appearance Several legacy skins were removed in the as the burden of supporting them became too heavy to bear Those in etc for skin dependent CSS etc for skin dependent JavaScript These can also be customised on a per user by similarly to how extensions are installed You can then make that skin the default by adding
Definition: skin.txt:57
and how to run hooks for an and one after Each event has a preferably in CamelCase For ArticleDelete hook A clump of code and data that should be run when an event happens This can be either a function and a chunk of data
Definition: hooks.txt:6
static escapeClass($class)
Given a value, escape it so that it can be used as a CSS class and return it.
Definition: Sanitizer.php:1148
static hackDocType()
Hack up a private DOCTYPE with HTML's standard entity declarations.
Definition: Sanitizer.php:1741
static cleanUrl($url)
Definition: Sanitizer.php:1754
static stripAllTags($text)
Take a fragment of (potentially invalid) HTML and return a version with any tags removed, encoded as plain text.
Definition: Sanitizer.php:1721
when a variable name is used in a it is silently declared as a new local masking the global
Definition: design.txt:93
const UTF8_REPLACEMENT
static hexCharReference($codepoint)
Definition: Sanitizer.php:1369
and how to run hooks for an and one after Each event has a preferably in CamelCase For ArticleDelete hook A clump of code and data that should be run when an event happens This can be either a function and a chunk of or an object and a method hook function The function part of a third party developers and local administrators to define code that will be run at certain points in the mainline and to modify the data run by that mainline code Hooks can keep mainline code simple
Definition: hooks.txt:23
static validateTagAttributes($attribs, $element)
Take an array of attribute names and values and normalize or discard illegal values for the given ele...
Definition: Sanitizer.php:696
MEETEI MAYEK LETTER WAI
static normalizeWhitespace($text)
Definition: Sanitizer.php:1269
Apache License January http
it sets a lot of them automatically from query and such
Definition: design.txt:93
if($line===false) $args
Definition: cdb.php:64
static decodeCharReferences($text)
Decode any character references, numeric or named entities, in the text and return a UTF-8 string...
Definition: Sanitizer.php:1399
deferred txt A few of the database updates required by various functions here can be deferred until after the result page is displayed to the user For updating the view updating the linked to tables after a etc PHP does not yet have any way to tell the server to actually return and disconnect while still running these but it might have such a feature in the future We handle these by creating a deferred update object and putting those objects on a global then executing the whole list after the page is displayed We don t do anything smart like collating updates to the same table or such because the list is almost always going to have just one item on if that
pull multiple revisions may often pull multiple times from the same blob.
Definition: deferred.txt:11
static validateCodepoint($codepoint)
Returns true if a given Unicode codepoint is a valid character in XML.
Definition: Sanitizer.php:1383
const EVIL_URI_PATTERN
Blacklist for evil uris like javascript: WARNING: DO NOT use this in any place that actually requires...
Definition: Sanitizer.php:56
const ELEMENT_BITS_REGEX
Acceptable tag name charset from HTML5 parsing spec http://www.w3.org/TR/html5/syntax.html#tag-open-state.
Definition: Sanitizer.php:46
static decodeChar($codepoint)
Return UTF-8 string for a codepoint if that is a valid character reference, otherwise U+FFFD REPLACEM...
Definition: Sanitizer.php:1453
static normalizeSectionNameWhitespace($section)
Normalizes whitespace in a section name, such as might be returned by Parser::stripSectionName(), for use in the id's that are used for section links.
Definition: Sanitizer.php:1284
Some quick notes on the file repository architecture Functionality is
Definition: README:3
QUARTER NOTE
static $htmlEntities
List of all named character entities defined in HTML 4.01 http://www.w3.org/TR/html4/sgml/entities.html As well as ' which is only defined starting in XHTML1.
Definition: Sanitizer.php:64
static attributeWhitelist($element)
Fetch the whitelist of acceptable attributes for a given element name.
Definition: Sanitizer.php:1486
The index of the header message $result[1]=The index of the body text message $result[2 through n]=Parameters passed to body text message.Please note the header message cannot receive/use parameters. 'ImportHandleLogItemXMLTag':When parsing a XML tag in a log item.$reader:XMLReader object $logInfo:Array of information Return false to stop further processing of the tag 'ImportHandlePageXMLTag':When parsing a XML tag in a page.$reader:XMLReader object $pageInfo:Array of information Return false to stop further processing of the tag 'ImportHandleRevisionXMLTag':When parsing a XML tag in a page revision.$reader:XMLReader object $pageInfo:Array of page information $revisionInfo:Array of revision information Return false to stop further processing of the tag 'ImportHandleToplevelXMLTag':When parsing a top level XML tag.$reader:XMLReader object Return false to stop further processing of the tag 'ImportHandleUploadXMLTag':When parsing a XML tag in a file upload.$reader:XMLReader object $revisionInfo:Array of information Return false to stop further processing of the tag 'InfoAction':When building information to display on the action=info page.$context:IContextSource object &$pageInfo:Array of information 'InitializeArticleMaybeRedirect':MediaWiki check to see if title is a redirect.$title:Title object for the current page $request:WebRequest $ignoreRedirect:boolean to skip redirect check $target:Title/string of redirect target $article:Article object 'InterwikiLoadPrefix':When resolving if a given prefix is an interwiki or not.Return true without providing an interwiki to continue interwiki search.$prefix:interwiki prefix we are looking for.&$iwData:output array describing the interwiki with keys iw_url, iw_local, iw_trans and optionally iw_api and iw_wikiid. 'InternalParseBeforeSanitize':during Parser's internalParse method just before the parser removes unwanted/dangerous HTML tags and after nowiki/noinclude/includeonly/onlyinclude and other processings.Ideal for syntax-extensions after template/parser function execution which respect nowiki and HTML-comments.&$parser:Parser object &$text:string containing partially parsed text &$stripState:Parser's internal StripState object 'InternalParseBeforeLinks':during Parser's internalParse method before links but after nowiki/noinclude/includeonly/onlyinclude and other processings.&$parser:Parser object &$text:string containing partially parsed text &$stripState:Parser's internal StripState object 'InvalidateEmailComplete':Called after a user's email has been invalidated successfully.$user:user(object) whose email is being invalidated 'IRCLineURL':When constructing the URL to use in an IRC notification.Callee may modify $url and $query, URL will be constructed as $url.$query &$url:URL to index.php &$query:Query string $rc:RecentChange object that triggered url generation 'IsFileCacheable':Override the result of Article::isFileCacheable()(if true) $article:article(object) being checked 'IsTrustedProxy':Override the result of wfIsTrustedProxy() $ip:IP being check $result:Change this value to override the result of wfIsTrustedProxy() 'IsUploadAllowedFromUrl':Override the result of UploadFromUrl::isAllowedUrl() $url:URL used to upload from &$allowed:Boolean indicating if uploading is allowed for given URL 'isValidEmailAddr':Override the result of Sanitizer::validateEmail(), for instance to return false if the domain name doesn't match your organization.$addr:The e-mail address entered by the user &$result:Set this and return false to override the internal checks 'isValidPassword':Override the result of User::isValidPassword() $password:The password entered by the user &$result:Set this and return false to override the internal checks $user:User the password is being validated for 'Language::getMessagesFileName':$code:The language code or the language we're looking for a messages file for &$file:The messages file path, you can override this to change the location. 'LanguageGetNamespaces':Provide custom ordering for namespaces or remove namespaces.Do not use this hook to add namespaces.Use CanonicalNamespaces for that.&$namespaces:Array of namespaces indexed by their numbers 'LanguageGetMagic':DEPRECATED, use $magicWords in a file listed in $wgExtensionMessagesFiles instead.Use this to define synonyms of magic words depending of the language $magicExtensions:associative array of magic words synonyms $lang:language code(string) 'LanguageGetSpecialPageAliases':DEPRECATED, use $specialPageAliases in a file listed in $wgExtensionMessagesFiles instead.Use to define aliases of special pages names depending of the language $specialPageAliases:associative array of magic words synonyms $lang:language code(string) 'LanguageGetTranslatedLanguageNames':Provide translated language names.&$names:array of language code=> language name $code language of the preferred translations 'LanguageLinks':Manipulate a page's language links.This is called in various places to allow extensions to define the effective language links for a page.$title:The page's Title.&$links:Associative array mapping language codes to prefixed links of the form"language:title".&$linkFlags:Associative array mapping prefixed links to arrays of flags.Currently unused, but planned to provide support for marking individual language links in the UI, e.g.for featured articles. 'LanguageSelector':Hook to change the language selector available on a page.$out:The output page.$cssClassName:CSS class name of the language selector. 'LinkBegin':Used when generating internal and interwiki links in Linker::link(), before processing starts.Return false to skip default processing and return $ret.See documentation for Linker::link() for details on the expected meanings of parameters.$skin:the Skin object $target:the Title that the link is pointing to &$html:the contents that the< a > tag should have(raw HTML) $result
Definition: hooks.txt:1692
namespace and then decline to actually register it file or subcat img or subcat RecentChangesLinked and Watchlist RecentChangesLinked and Watchlist e g Watchlist removed from all revisions and log entries to which it was applied This gives extensions a chance to take it off their books as the deletion has already been partly carried out by this point or something similar the user will be unable to create the tag set and then return false from the hook function Ensure you consume the ChangeTagAfterDelete hook to carry out custom deletion actions instead of letting the login form give the generic error message that the account does not exist For when the account has been renamed or deleted or an array to pass a message key and parameters but no entry for that model exists in $wgContentHandlers if desired whether it is OK to use $contentModel on $title Handler functions that modify $ok should generally return false to prevent further hooks from further modifying $ok called by AbstractContent::getParserOutput May be used to override the normal model specific rendering of page content as context as context $options
Definition: hooks.txt:949
static escapeHtmlAllowEntities($html)
Given HTML input, escape with htmlspecialchars but un-escape entities.
Definition: Sanitizer.php:1163
static validateTag($params, $element)
Takes attribute names and values for a tag and the tag name and validates that the tag is allowed to ...
Definition: Sanitizer.php:660
static run($event, array $args=array(), $deprecatedVersion=null)
Call hook functions defined in Hooks::register and $wgHooks.
Definition: Hooks.php:137
static mergeAttributes($a, $b)
Merge two sets of HTML attributes.
Definition: Sanitizer.php:812
MediaWiki exception.
Definition: MWException.php:26
Apache License January AND DISTRIBUTION Definitions License shall mean the terms and conditions for and distribution as defined by Sections through of this document Licensor shall mean the copyright owner or entity authorized by the copyright owner that is granting the License Legal Entity shall mean the union of the acting entity and all other entities that control are controlled by or are under common control with that entity For the purposes of this definition control direct or to cause the direction or management of such whether by contract or including but not limited to software source documentation and configuration files Object form shall mean any form resulting from mechanical transformation or translation of a Source including but not limited to compiled object generated and conversions to other media types Work shall mean the work of whether in Source or Object made available under the as indicated by a copyright notice that is included in or attached to the whether in Source or Object that is based or other modifications as a an original work of authorship For the purposes of this Derivative Works shall not include works that remain separable from
const CHAR_REFS_REGEX
Regular expression to match various types of character references in Sanitizer::normalizeCharReferenc...
Definition: Sanitizer.php:36
$params
</td >< td > &</td >< td > t want your writing to be edited mercilessly and redistributed at will
be sent.
static validateAttributes($attribs, $whitelist)
Take an array of attribute names and values and normalize or discard illegal values for the given whi...
Definition: Sanitizer.php:716
static decodeCharReferencesAndNormalize($text)
Decode any character references, numeric or named entities, in the next and normalize the resulting s...
Definition: Sanitizer.php:1416
Using a hook running we can avoid having all this option specific stuff in our mainline code Using hooks
Definition: hooks.txt:73
design txt This is a brief overview of the new design More thorough and up to date information is available on the documentation wiki at etc Handles the details of getting and saving to the user table of the and dealing with sessions and cookies OutputPage Encapsulates the entire HTML page that will be sent in response to any server request It is used by calling its functions to add text
Definition: design.txt:12
Apache License January AND DISTRIBUTION Definitions License shall mean the terms and conditions for and distribution as defined by Sections through of this document Licensor shall mean the copyright owner or entity authorized by the copyright owner that is granting the License Legal Entity shall mean the union of the acting entity and all other entities that control are controlled by or are under common control with that entity For the purposes of this definition control direct or to cause the direction or management of such whether by contract or including but not limited to software source documentation and configuration files Object form shall mean any form resulting from mechanical transformation or translation of a Source including but not limited to compiled object generated and conversions to other media types Work shall mean the work of whether in Source or Object made available under the as indicated by a copyright notice that is included in or attached to the whether in Source or Object that is based or other modifications as a an original work of authorship For the purposes of this Derivative Works shall not include works that remain separable or merely the Work and Derivative Works thereof Contribution shall mean any work of including the original version of the Work and any modifications or additions to that Work or Derivative Works that is intentionally submitted to Licensor for inclusion in the Work by the copyright owner or by an individual or Legal Entity authorized to submit on behalf of the copyright owner For the purposes of this submitted means any form of or written communication sent to the Licensor or its including but not limited to communication on electronic mailing source code control and issue tracking systems that are managed by
static escapeId($id, $options=array())
Given a value, escape it so that it can be used in an id attribute and return it. ...
Definition: Sanitizer.php:1104
static cleanUrlCallback($matches)
Definition: Sanitizer.php:1801
This document is intended to provide useful advice for parties seeking to redistribute MediaWiki to end users It s targeted particularly at maintainers for Linux since it s been observed that distribution packages of MediaWiki often break We ve consistently had to recommend that users seeking support use official tarballs instead of their distribution s and this often solves whatever problem the user is having It would be nice if this could such as
Definition: distributors.txt:9
wfSuppressWarnings($end=false)
Reference-counted warning suppression.
usually copyright or history_copyright This message must be in HTML not wikitext if the section is included from a template $section
Definition: hooks.txt:2476
#define the
Prior to maintenance scripts were a hodgepodge of code that had no cohesion or formal method of action Beginning in
Definition: maintenance.txt:1
static armorLinksCallback($matches)
Regex replace callback for armoring links against further processing.
Definition: Sanitizer.php:1176
wfUrlProtocols($includeProtocolRelative=true)
Returns a regular expression of url protocols.
static normalizeCss($value)
Normalize CSS into a format we can easily search for hostile input.
Definition: Sanitizer.php:834
const XMLNS_ATTRIBUTE_PATTERN
Definition: Sanitizer.php:57
The ContentHandler facility adds support for arbitrary content types on wiki instead of relying on wikitext for everything It was introduced in MediaWiki Each kind of and so on Built in content types as usual *javascript user provided javascript code *json simple implementation for use by extensions
static $htmlEntityAliases
Character entity aliases accepted by MediaWiki.
Definition: Sanitizer.php:323
deferred txt A few of the database updates required by various functions here can be deferred until after the result page is displayed to the user For updating the view updating the linked to tables after a etc PHP does not yet have any way to tell the server to actually return and disconnect while still running these but it might have such a feature in the future We handle these by creating a deferred update object and putting those objects on a global then executing the whole list after the page is displayed We don t do anything smart like collating updates to the same table or such because the list is almost always going to have just one item on if so it s not worth the trouble Since there is a job queue in the jobs table
Definition: deferred.txt:11
static normalizeCharReferences($text)
Ensure that any entities and character references are legal for XML and XHTML specifically.
Definition: Sanitizer.php:1303
the array() calling protocol came about after MediaWiki 1.4rc1.
List of Api Query prop modules.
design txt This is a brief overview of the new design More thorough and up to date information is available on the documentation wiki at etc Handles the details of getting and saving to the user table of the and dealing with sessions and cookies OutputPage Encapsulates the entire HTML page that will be sent in response to any server request It is used by calling its functions to add in any and then calling but I prefer the flexibility This should also do the output encoding The system allocates a global one in $wgOut Title Represents the title of an and does all the work of translating among various forms such as plain database etc For and for historical it also represents a few features of articles that don t involve their such as access rights See also title txt Article Encapsulates access to the page table of the database The object represents a an and maintains state such as etc Revision Encapsulates individual page revision data and access to the revision text blobs storage system Higher level code should never touch text storage directly
Definition: design.txt:34
Bar style
wfRestoreWarnings()
Restore error level to previous value.
to move a page</td >< td > &*You are moving the page across *A non empty talk page already exists under the new or *You uncheck the box below In those you will have to move or merge the page manually if desired</td >< td > be sure to &You are responsible for making sure that links continue to point where they are supposed to go Note that the page will &a page at the new title
this class mediates it Skin Encapsulates a look and feel for the wiki All of the functions that render HTML and make choices about how to render it are here and are called from various other places when and is meant to be subclassed with other skins that may override some of its functions The User object contains a reference to a and so rather than having a global skin object we just rely on the global User and get the skin with $wgUser and also has some character encoding functions and other locale stuff The current user interface language is instantiated as and the local content language as $wgContLang
Definition: design.txt:56
or there are no hooks to or false if it shouldn t(an error occurred, or one of the hooks handled the action already).Checking the return value matters more for"before"hooks than for"complete"hooks.Note that hook parameters are passed in an array
static fixTagAttributes($text, $element)
Take a tag soup fragment listing an HTML element's attributes and normalize it to well-formed XML...
Definition: Sanitizer.php:1009
static removeHTMLcomments($text)
Remove '', and everything between.
Definition: Sanitizer.php:614
static getAttribsRegex()
Regular expression to match HTML/XML attribute pairs within a tag.
Definition: Sanitizer.php:339
codepointToUtf8($codepoint)
Return UTF-8 sequence for a given Unicode code point.
static validateEmail($addr)
Does a string look like an e-mail address?
Definition: Sanitizer.php:1833
you don t have to do a grep find to see where the $wgReverseTitle variable is used
Definition: hooks.txt:117
static checkCss($value)
Pick apart some CSS and check it for forbidden or unsafe structures.
Definition: Sanitizer.php:943
static getTagAttributeCallback($set)
Pick the appropriate attribute value from a match set from the attribs regex matches.
Definition: Sanitizer.php:1243
HTML sanitizer for MediaWiki.
Definition: Sanitizer.php:31
maintenance dev scripts can help quickly setup a local MediaWiki for development purposes Wikis setup in this way are NOT meant to be publicly available They use a development database not acceptible for use in production Place a sqlite database in an unsafe location a real wiki should never place it in And use predictable default logins for the initial administrator user Running maintenance dev install sh will download and install a local copy of php
Definition: README:5
static decodeEntity($name)
If the named entity is defined in the HTML 4.0/XHTML 1.0 DTD, return the UTF-8 encoding of that chara...
Definition: Sanitizer.php:1469
static safeEncodeAttribute($text)
Encode an attribute value for HTML tags, with extra armoring against further wiki processing...
Definition: Sanitizer.php:1046
static configuration should be added through ResourceLoaderGetConfigVars instead & $vars
Definition: hooks.txt:1870
PHP Parser - Processes wiki markup (which uses a more user-friendly syntax, such as "[[link]]" for ma...
Definition: Parser.php:67
null means default in associative array with keys and values unescaped Should be merged with default with a value of false meaning to suppress the attribute in associative array with keys and values unescaped noclasses just before the function returns a value If you return an< a > element with HTML attributes $attribs and contents $html will be returned If you return $ret will be returned and may include noclasses after processing & $attribs
Definition: hooks.txt:1694
static $attribsRegex
Lazy-initialised attributes regex, see getAttribsRegex()
Definition: Sanitizer.php:331
static encodeAttribute($text)
Encode an attribute value for HTML output.
Definition: Sanitizer.php:1025
$matches