MediaWiki  1.33.0
SanitizerTest.php
Go to the documentation of this file.
1 <?php
2 
9 class SanitizerTest extends MediaWikiTestCase {
10 
11  protected function tearDown() {
13  parent::tearDown();
14  }
15 
19  public function testDecodeNamedEntities() {
20  $this->assertEquals(
21  "\xc3\xa9cole",
22  Sanitizer::decodeCharReferences( '&eacute;cole' ),
23  'decode named entities'
24  );
25  }
26 
30  public function testDecodeNumericEntities() {
31  $this->assertEquals(
32  "\xc4\x88io bonas dans l'\xc3\xa9cole!",
33  Sanitizer::decodeCharReferences( "&#x108;io bonas dans l'&#233;cole!" ),
34  'decode numeric entities'
35  );
36  }
37 
41  public function testDecodeMixedEntities() {
42  $this->assertEquals(
43  "\xc4\x88io bonas dans l'\xc3\xa9cole!",
44  Sanitizer::decodeCharReferences( "&#x108;io bonas dans l'&eacute;cole!" ),
45  'decode mixed numeric/named entities'
46  );
47  }
48 
52  public function testDecodeMixedComplexEntities() {
53  $this->assertEquals(
54  "\xc4\x88io bonas dans l'\xc3\xa9cole! (mais pas &#x108;io dans l'&eacute;cole)",
55  Sanitizer::decodeCharReferences(
56  "&#x108;io bonas dans l'&eacute;cole! (mais pas &amp;#x108;io dans l'&#38;eacute;cole)"
57  ),
58  'decode mixed complex entities'
59  );
60  }
61 
65  public function testInvalidAmpersand() {
66  $this->assertEquals(
67  'a & b',
68  Sanitizer::decodeCharReferences( 'a & b' ),
69  'Invalid ampersand'
70  );
71  }
72 
76  public function testInvalidEntities() {
77  $this->assertEquals(
78  '&foo;',
79  Sanitizer::decodeCharReferences( '&foo;' ),
80  'Invalid named entity'
81  );
82  }
83 
87  public function testInvalidNumberedEntities() {
88  $this->assertEquals(
89  UtfNormal\Constants::UTF8_REPLACEMENT,
90  Sanitizer::decodeCharReferences( "&#88888888888888;" ),
91  'Invalid numbered entity'
92  );
93  }
94 
102  public function testRemovehtmltagsOnHtml5Tags( $tag, $escaped ) {
103  $this->hideDeprecated( 'disabling tidy' );
104  $this->hideDeprecated( 'MWTidy::setInstance' );
105  MWTidy::setInstance( false );
106 
107  if ( $escaped ) {
108  $this->assertEquals( "&lt;$tag&gt;",
109  Sanitizer::removeHTMLtags( "<$tag>" )
110  );
111  } else {
112  $this->assertEquals( "<$tag></$tag>\n",
113  Sanitizer::removeHTMLtags( "<$tag>" )
114  );
115  }
116  }
117 
121  public static function provideHtml5Tags() {
122  $ESCAPED = true; # We want tag to be escaped
123  $VERBATIM = false; # We want to keep the tag
124  return [
125  [ 'data', $VERBATIM ],
126  [ 'mark', $VERBATIM ],
127  [ 'time', $VERBATIM ],
128  [ 'video', $ESCAPED ],
129  ];
130  }
131 
132  function dataRemoveHTMLtags() {
133  return [
134  // former testSelfClosingTag
135  [
136  '<div>Hello world</div />',
137  '<div>Hello world</div>',
138  'Self-closing closing div'
139  ],
140  // Make sure special nested HTML5 semantics are not broken
141  // https://html.spec.whatwg.org/multipage/semantics.html#the-kbd-element
142  [
143  '<kbd><kbd>Shift</kbd>+<kbd>F3</kbd></kbd>',
144  '<kbd><kbd>Shift</kbd>+<kbd>F3</kbd></kbd>',
145  'Nested <kbd>.'
146  ],
147  // https://html.spec.whatwg.org/multipage/semantics.html#the-sub-and-sup-elements
148  [
149  '<var>x<sub><var>i</var></sub></var>, <var>y<sub><var>i</var></sub></var>',
150  '<var>x<sub><var>i</var></sub></var>, <var>y<sub><var>i</var></sub></var>',
151  'Nested <var>.'
152  ],
153  // https://html.spec.whatwg.org/multipage/semantics.html#the-dfn-element
154  [
155  '<dfn><abbr title="Garage Door Opener">GDO</abbr></dfn>',
156  '<dfn><abbr title="Garage Door Opener">GDO</abbr></dfn>',
157  '<abbr> inside <dfn>',
158  ],
159  ];
160  }
161 
166  public function testRemoveHTMLtags( $input, $output, $msg = null ) {
167  $this->hideDeprecated( 'disabling tidy' );
168  $this->hideDeprecated( 'MWTidy::setInstance' );
169  MWTidy::setInstance( false );
170  $this->assertEquals( $output, Sanitizer::removeHTMLtags( $input ), $msg );
171  }
172 
177  public function testDecodeTagAttributes( $expected, $attributes, $message = '' ) {
178  $this->assertEquals( $expected,
179  Sanitizer::decodeTagAttributes( $attributes ),
180  $message
181  );
182  }
183 
184  public static function provideTagAttributesToDecode() {
185  return [
186  [ [ 'foo' => 'bar' ], 'foo=bar', 'Unquoted attribute' ],
187  [ [ 'עברית' => 'bar' ], 'עברית=bar', 'Non-Latin attribute' ],
188  [ [ '६' => 'bar' ], '६=bar', 'Devanagari number' ],
189  [ [ '搭𨋢' => 'bar' ], '搭𨋢=bar', 'Non-BMP character' ],
190  [ [], 'ńgh=bar', 'Combining accent is not allowed' ],
191  [ [ 'foo' => 'bar' ], ' foo = bar ', 'Spaced attribute' ],
192  [ [ 'foo' => 'bar' ], 'foo="bar"', 'Double-quoted attribute' ],
193  [ [ 'foo' => 'bar' ], 'foo=\'bar\'', 'Single-quoted attribute' ],
194  [
195  [ 'foo' => 'bar', 'baz' => 'foo' ],
196  'foo=\'bar\' baz="foo"',
197  'Several attributes'
198  ],
199  [
200  [ 'foo' => 'bar', 'baz' => 'foo' ],
201  'foo=\'bar\' baz="foo"',
202  'Several attributes'
203  ],
204  [
205  [ 'foo' => 'bar', 'baz' => 'foo' ],
206  'foo=\'bar\' baz="foo"',
207  'Several attributes'
208  ],
209  [ [ ':foo' => 'bar' ], ':foo=\'bar\'', 'Leading :' ],
210  [ [ '_foo' => 'bar' ], '_foo=\'bar\'', 'Leading _' ],
211  [ [ 'foo' => 'bar' ], 'Foo=\'bar\'', 'Leading capital' ],
212  [ [ 'foo' => 'BAR' ], 'FOO=BAR', 'Attribute keys are normalized to lowercase' ],
213 
214  # Invalid beginning
215  [ [], '-foo=bar', 'Leading - is forbidden' ],
216  [ [], '.foo=bar', 'Leading . is forbidden' ],
217  [ [ 'foo-bar' => 'bar' ], 'foo-bar=bar', 'A - is allowed inside the attribute' ],
218  [ [ 'foo-' => 'bar' ], 'foo-=bar', 'A - is allowed inside the attribute' ],
219  [ [ 'foo.bar' => 'baz' ], 'foo.bar=baz', 'A . is allowed inside the attribute' ],
220  [ [ 'foo.' => 'baz' ], 'foo.=baz', 'A . is allowed as last character' ],
221  [ [ 'foo6' => 'baz' ], 'foo6=baz', 'Numbers are allowed' ],
222 
223  # This bit is more relaxed than XML rules, but some extensions use
224  # it, like ProofreadPage (see T29539)
225  [ [ '1foo' => 'baz' ], '1foo=baz', 'Leading numbers are allowed' ],
226  [ [], 'foo$=baz', 'Symbols are not allowed' ],
227  [ [], 'foo@=baz', 'Symbols are not allowed' ],
228  [ [], 'foo~=baz', 'Symbols are not allowed' ],
229  [
230  [ 'foo' => '1[#^`*%w/(' ],
231  'foo=1[#^`*%w/(',
232  'All kind of characters are allowed as values'
233  ],
234  [
235  [ 'foo' => '1[#^`*%\'w/(' ],
236  'foo="1[#^`*%\'w/("',
237  'Double quotes are allowed if quoted by single quotes'
238  ],
239  [
240  [ 'foo' => '1[#^`*%"w/(' ],
241  'foo=\'1[#^`*%"w/(\'',
242  'Single quotes are allowed if quoted by double quotes'
243  ],
244  [ [ 'foo' => '&"' ], 'foo=&amp;&quot;', 'Special chars can be provided as entities' ],
245  [ [ 'foo' => '&foobar;' ], 'foo=&foobar;', 'Entity-like items are accepted' ],
246  ];
247  }
248 
253  public function testDeprecatedAttributesUnaltered( $inputAttr, $inputEl, $message = '' ) {
254  $this->assertEquals( " $inputAttr",
255  Sanitizer::fixTagAttributes( $inputAttr, $inputEl ),
256  $message
257  );
258  }
259 
260  public static function provideDeprecatedAttributes() {
262  return [
263  [ 'clear="left"', 'br' ],
264  [ 'clear="all"', 'br' ],
265  [ 'width="100"', 'td' ],
266  [ 'nowrap="true"', 'td' ],
267  [ 'nowrap=""', 'td' ],
268  [ 'align="right"', 'td' ],
269  [ 'align="center"', 'table' ],
270  [ 'align="left"', 'tr' ],
271  [ 'align="center"', 'div' ],
272  [ 'align="left"', 'h1' ],
273  [ 'align="left"', 'p' ],
274  ];
275  }
276 
281  public function testCssCommentsChecking( $expected, $css, $message = '' ) {
282  $this->assertEquals( $expected,
283  Sanitizer::checkCss( $css ),
284  $message
285  );
286  }
287 
288  public static function provideCssCommentsFixtures() {
290  return [
291  // Valid comments spanning entire input
292  [ '/**/', '/**/' ],
293  [ '/* comment */', '/* comment */' ],
294  // Weird stuff
295  [ ' ', '/****/' ],
296  [ ' ', '/* /* */' ],
297  [ 'display: block;', "display:/* foo */block;" ],
298  [ 'display: block;', "display:\\2f\\2a foo \\2a\\2f block;",
299  'Backslash-escaped comments must be stripped (T30450)' ],
300  [ '', '/* unfinished comment structure',
301  'Remove anything after a comment-start token' ],
302  [ '', "\\2f\\2a unifinished comment'",
303  'Remove anything after a backslash-escaped comment-start token' ],
304  [
305  '/* insecure input */',
306  'filter: progid:DXImageTransform.Microsoft.AlphaImageLoader'
307  . '(src=\'asdf.png\',sizingMethod=\'scale\');'
308  ],
309  [
310  '/* insecure input */',
311  '-ms-filter: "progid:DXImageTransform.Microsoft.AlphaImageLoader'
312  . '(src=\'asdf.png\',sizingMethod=\'scale\')";'
313  ],
314  [ '/* insecure input */', 'width: expression(1+1);' ],
315  [ '/* insecure input */', 'background-image: image(asdf.png);' ],
316  [ '/* insecure input */', 'background-image: -webkit-image(asdf.png);' ],
317  [ '/* insecure input */', 'background-image: -moz-image(asdf.png);' ],
318  [ '/* insecure input */', 'background-image: image-set("asdf.png" 1x, "asdf.png" 2x);' ],
319  [
320  '/* insecure input */',
321  'background-image: -webkit-image-set("asdf.png" 1x, "asdf.png" 2x);'
322  ],
323  [
324  '/* insecure input */',
325  'background-image: -moz-image-set("asdf.png" 1x, "asdf.png" 2x);'
326  ],
327  [ '/* insecure input */', 'foo: attr( title, url );' ],
328  [ '/* insecure input */', 'foo: attr( title url );' ],
329  [ '/* insecure input */', 'foo: var(--evil-attribute)' ],
330  ];
331  }
332 
337  public function testEscapeHtmlAllowEntities( $expected, $html ) {
338  $this->assertEquals(
339  $expected,
340  Sanitizer::escapeHtmlAllowEntities( $html )
341  );
342  }
343 
344  public static function provideEscapeHtmlAllowEntities() {
345  return [
346  [ 'foo', 'foo' ],
347  [ 'a¡b', 'a&#161;b' ],
348  [ 'foo&#039;bar', "foo'bar" ],
349  [ '&lt;script&gt;foo&lt;/script&gt;', '<script>foo</script>' ],
350  ];
351  }
352 
359  public function testEscapeId( $input, $output ) {
360  $this->assertEquals(
361  $output,
362  Sanitizer::escapeId( $input, [ 'noninitial', 'legacy' ] )
363  );
364  }
365 
366  public static function provideEscapeId() {
367  return [
368  [ '+', '.2B' ],
369  [ '&', '.26' ],
370  [ '=', '.3D' ],
371  [ ':', ':' ],
372  [ ';', '.3B' ],
373  [ '@', '.40' ],
374  [ '$', '.24' ],
375  [ '-_.', '-_.' ],
376  [ '!', '.21' ],
377  [ '*', '.2A' ],
378  [ '/', '.2F' ],
379  [ '[]', '.5B.5D' ],
380  [ '<>', '.3C.3E' ],
381  [ '\'', '.27' ],
382  [ '§', '.C2.A7' ],
383  [ 'Test:A & B/Here', 'Test:A_.26_B.2FHere' ],
384  [ 'A&B&amp;C&amp;amp;D&amp;amp;amp;E', 'A.26B.26amp.3BC.26amp.3Bamp.3BD.26amp.3Bamp.3Bamp.3BE' ],
385  ];
386  }
387 
394  public function testEscapeIdReferenceList( $referenceList, $id1, $id2 ) {
395  $this->assertEquals(
396  Sanitizer::escapeIdReferenceList( $referenceList ),
397  Sanitizer::escapeIdForAttribute( $id1 )
398  . ' '
399  . Sanitizer::escapeIdForAttribute( $id2 )
400  );
401  }
402 
403  public static function provideEscapeIdReferenceList() {
405  return [
406  [ 'foo bar', 'foo', 'bar' ],
407  [ '#1 #2', '#1', '#2' ],
408  [ '+1 +2', '+1', '+2' ],
409  ];
410  }
411 
416  public function testIsReservedDataAttribute( $attr, $expected ) {
417  $this->assertSame( $expected, Sanitizer::isReservedDataAttribute( $attr ) );
418  }
419 
420  public static function provideIsReservedDataAttribute() {
421  return [
422  [ 'foo', false ],
423  [ 'data', false ],
424  [ 'data-foo', false ],
425  [ 'data-mw', true ],
426  [ 'data-ooui', true ],
427  [ 'data-parsoid', true ],
428  [ 'data-mw-foo', true ],
429  [ 'data-ooui-foo', true ],
430  [ 'data-mwfoo', true ], // could be false but this is how it's implemented currently
431  ];
432  }
433 
448  public function testEscapeIdForStuff( $stuff, array $config, $id, $expected, $mode = null ) {
449  $func = "Sanitizer::escapeIdFor{$stuff}";
450  $iwFlavor = array_pop( $config );
451  $this->setMwGlobals( [
452  'wgFragmentMode' => $config,
453  'wgExternalInterwikiFragmentMode' => $iwFlavor,
454  ] );
455  $escaped = call_user_func( $func, $id, $mode );
456  self::assertEquals( $expected, $escaped );
457  }
458 
459  public function provideEscapeIdForStuff() {
460  // Test inputs and outputs
461  $text = 'foo тест_#%!\'()[]:<>&&amp;&amp;amp;';
462  $legacyEncoded = 'foo_.D1.82.D0.B5.D1.81.D1.82_.23.25.21.27.28.29.5B.5D:.3C.3E' .
463  '.26.26amp.3B.26amp.3Bamp.3B';
464  $html5Encoded = 'foo_тест_#%!\'()[]:<>&&amp;&amp;amp;';
465 
466  // Settings: last element is $wgExternalInterwikiFragmentMode, the rest is $wgFragmentMode
467  $legacy = [ 'legacy', 'legacy' ];
468  $legacyNew = [ 'legacy', 'html5', 'legacy' ];
469  $newLegacy = [ 'html5', 'legacy', 'legacy' ];
470  $new = [ 'html5', 'legacy' ];
471  $allNew = [ 'html5', 'html5' ];
472 
473  return [
474  // Pure legacy: how MW worked before 2017
475  [ 'Attribute', $legacy, $text, $legacyEncoded, Sanitizer::ID_PRIMARY ],
476  [ 'Attribute', $legacy, $text, false, Sanitizer::ID_FALLBACK ],
477  [ 'Link', $legacy, $text, $legacyEncoded ],
478  [ 'ExternalInterwiki', $legacy, $text, $legacyEncoded ],
479 
480  // Transition to a new world: legacy links with HTML5 fallback
481  [ 'Attribute', $legacyNew, $text, $legacyEncoded, Sanitizer::ID_PRIMARY ],
482  [ 'Attribute', $legacyNew, $text, $html5Encoded, Sanitizer::ID_FALLBACK ],
483  [ 'Link', $legacyNew, $text, $legacyEncoded ],
484  [ 'ExternalInterwiki', $legacyNew, $text, $legacyEncoded ],
485 
486  // New world: HTML5 links, legacy fallbacks
487  [ 'Attribute', $newLegacy, $text, $html5Encoded, Sanitizer::ID_PRIMARY ],
488  [ 'Attribute', $newLegacy, $text, $legacyEncoded, Sanitizer::ID_FALLBACK ],
489  [ 'Link', $newLegacy, $text, $html5Encoded ],
490  [ 'ExternalInterwiki', $newLegacy, $text, $legacyEncoded ],
491 
492  // Distant future: no legacy fallbacks, but still linking to leagacy wikis
493  [ 'Attribute', $new, $text, $html5Encoded, Sanitizer::ID_PRIMARY ],
494  [ 'Attribute', $new, $text, false, Sanitizer::ID_FALLBACK ],
495  [ 'Link', $new, $text, $html5Encoded ],
496  [ 'ExternalInterwiki', $new, $text, $legacyEncoded ],
497 
498  // Just before the heat death of universe: external interwikis are also HTML5 \m/
499  [ 'Attribute', $allNew, $text, $html5Encoded, Sanitizer::ID_PRIMARY ],
500  [ 'Attribute', $allNew, $text, false, Sanitizer::ID_FALLBACK ],
501  [ 'Link', $allNew, $text, $html5Encoded ],
502  [ 'ExternalInterwiki', $allNew, $text, $html5Encoded ],
503  ];
504  }
505 
515  public function testStripAllTags( $input, $expected ) {
516  $this->assertEquals( $expected, Sanitizer::stripAllTags( $input ) );
517  }
518 
519  public function provideStripAllTags() {
520  return [
521  [ '<p>Foo</p>', 'Foo' ],
522  [ '<p id="one">Foo</p><p id="two">Bar</p>', 'Foo Bar' ],
523  [ "<p>Foo</p>\n<p>Bar</p>", 'Foo Bar' ],
524  [ '<p>Hello &lt;strong&gt; wor&#x6c;&#100; caf&eacute;</p>', 'Hello <strong> world café' ],
525  [
526  '<p><small data-foo=\'bar"&lt;baz>quux\'><a href="./Foo">Bar</a></small> Whee!</p>',
527  'Bar Whee!'
528  ],
529  [ '1<span class="<?php">2</span>3', '123' ],
530  [ '1<span class="<?">2</span>3', '123' ],
531  [ '<th>1</th><td>2</td>', '1 2' ],
532  ];
533  }
534 
539  public function testInvalidFragmentThrows() {
540  $this->setMwGlobals( 'wgFragmentMode', [ 'boom!' ] );
541  Sanitizer::escapeIdForAttribute( 'This should throw' );
542  }
543 
548  public function testNoPrimaryFragmentModeThrows() {
549  $this->setMwGlobals( 'wgFragmentMode', [ 666 => 'html5' ] );
550  Sanitizer::escapeIdForAttribute( 'This should throw' );
551  }
552 
557  public function testNoPrimaryFragmentModeThrows2() {
558  $this->setMwGlobals( 'wgFragmentMode', [ 666 => 'html5' ] );
559  Sanitizer::escapeIdForLink( 'This should throw' );
560  }
561 }
false
processing should stop and the error should be shown to the user * false
Definition: hooks.txt:187
php
injection txt This is an overview of how MediaWiki makes use of dependency injection The design described here grew from the discussion of RFC T384 The term dependency this means that anything an object needs to operate should be injected from the the object itself should only know narrow no concrete implementation of the logic it relies on The requirement to inject everything typically results in an architecture that based on two main types of and essentially stateless service objects that use other service objects to operate on the value objects As of the beginning MediaWiki is only starting to use the DI approach Much of the code still relies on global state or direct resulting in a highly cyclical dependency which acts as the top level factory for services in MediaWiki which can be used to gain access to default instances of various services MediaWikiServices however also allows new services to be defined and default services to be redefined Services are defined or redefined by providing a callback the instantiator that will return a new instance of the service When it will create an instance of MediaWikiServices and populate it with the services defined in the files listed by thereby bootstrapping the DI framework Per $wgServiceWiringFiles lists includes ServiceWiring php
Definition: injection.txt:35
$html
null means default in associative array with keys and values unescaped Should be merged with default with a value of false meaning to suppress the attribute in associative array with keys and values unescaped noclasses just before the function returns a value If you return an< a > element with HTML attributes $attribs and contents $html will be returned If you return $ret will be returned and may include noclasses & $html
Definition: hooks.txt:1985
$css
$css
Definition: styleTest.css.php:54
$input
if(is_array( $mode)) switch( $mode) $input
Definition: postprocess-phan.php:141
MediaWikiTestCase\setMwGlobals
setMwGlobals( $pairs, $value=null)
Sets a global, maintaining a stashed version of the previous global to be restored in tearDown.
Definition: MediaWikiTestCase.php:709
MediaWikiTestCase
Definition: MediaWikiTestCase.php:17
MediaWikiTestCase\hideDeprecated
hideDeprecated( $function)
Don't throw a warning if $function is deprecated and called later.
Definition: MediaWikiTestCase.php:1974
$output
$output
Definition: SyntaxHighlight.php:334
array
The wiki should then use memcached to cache various data To use multiple just add more items to the array To increase the weight of a make its entry a array("192.168.0.1:11211", 2))
MWTidy\setInstance
static setInstance( $instance)
Set the driver to be used.
Definition: MWTidy.php:85
tag
</code > tag
Definition: citeParserTests.txt:225
true
null means default in associative array with keys and values unescaped Should be merged with default with a value of false meaning to suppress the attribute in associative array with keys and values unescaped noclasses just before the function returns a value If you return true
Definition: hooks.txt:1985
MWTidy\destroySingleton
static destroySingleton()
Destroy the current singleton instance.
Definition: MWTidy.php:93
MediaWikiTestCase\tearDown
tearDown()
Definition: MediaWikiTestCase.php:547