MediaWiki  1.30.0
SanitizerTest.php
Go to the documentation of this file.
1 <?php
2 
9 class SanitizerTest extends MediaWikiTestCase {
10 
11  protected function tearDown() {
13  parent::tearDown();
14  }
15 
19  public function testDecodeNamedEntities() {
20  $this->assertEquals(
21  "\xc3\xa9cole",
22  Sanitizer::decodeCharReferences( '&eacute;cole' ),
23  'decode named entities'
24  );
25  }
26 
30  public function testDecodeNumericEntities() {
31  $this->assertEquals(
32  "\xc4\x88io bonas dans l'\xc3\xa9cole!",
33  Sanitizer::decodeCharReferences( "&#x108;io bonas dans l'&#233;cole!" ),
34  'decode numeric entities'
35  );
36  }
37 
41  public function testDecodeMixedEntities() {
42  $this->assertEquals(
43  "\xc4\x88io bonas dans l'\xc3\xa9cole!",
44  Sanitizer::decodeCharReferences( "&#x108;io bonas dans l'&eacute;cole!" ),
45  'decode mixed numeric/named entities'
46  );
47  }
48 
52  public function testDecodeMixedComplexEntities() {
53  $this->assertEquals(
54  "\xc4\x88io bonas dans l'\xc3\xa9cole! (mais pas &#x108;io dans l'&eacute;cole)",
55  Sanitizer::decodeCharReferences(
56  "&#x108;io bonas dans l'&eacute;cole! (mais pas &amp;#x108;io dans l'&#38;eacute;cole)"
57  ),
58  'decode mixed complex entities'
59  );
60  }
61 
65  public function testInvalidAmpersand() {
66  $this->assertEquals(
67  'a & b',
68  Sanitizer::decodeCharReferences( 'a & b' ),
69  'Invalid ampersand'
70  );
71  }
72 
76  public function testInvalidEntities() {
77  $this->assertEquals(
78  '&foo;',
79  Sanitizer::decodeCharReferences( '&foo;' ),
80  'Invalid named entity'
81  );
82  }
83 
87  public function testInvalidNumberedEntities() {
88  $this->assertEquals(
90  Sanitizer::decodeCharReferences( "&#88888888888888;" ),
91  'Invalid numbered entity'
92  );
93  }
94 
102  public function testRemovehtmltagsOnHtml5Tags( $tag, $escaped ) {
103  MWTidy::setInstance( false );
104 
105  if ( $escaped ) {
106  $this->assertEquals( "&lt;$tag&gt;",
107  Sanitizer::removeHTMLtags( "<$tag>" )
108  );
109  } else {
110  $this->assertEquals( "<$tag></$tag>\n",
111  Sanitizer::removeHTMLtags( "<$tag>" )
112  );
113  }
114  }
115 
119  public static function provideHtml5Tags() {
120  $ESCAPED = true; # We want tag to be escaped
121  $VERBATIM = false; # We want to keep the tag
122  return [
123  [ 'data', $VERBATIM ],
124  [ 'mark', $VERBATIM ],
125  [ 'time', $VERBATIM ],
126  [ 'video', $ESCAPED ],
127  ];
128  }
129 
130  function dataRemoveHTMLtags() {
131  return [
132  // former testSelfClosingTag
133  [
134  '<div>Hello world</div />',
135  '<div>Hello world</div>',
136  'Self-closing closing div'
137  ],
138  // Make sure special nested HTML5 semantics are not broken
139  // https://html.spec.whatwg.org/multipage/semantics.html#the-kbd-element
140  [
141  '<kbd><kbd>Shift</kbd>+<kbd>F3</kbd></kbd>',
142  '<kbd><kbd>Shift</kbd>+<kbd>F3</kbd></kbd>',
143  'Nested <kbd>.'
144  ],
145  // https://html.spec.whatwg.org/multipage/semantics.html#the-sub-and-sup-elements
146  [
147  '<var>x<sub><var>i</var></sub></var>, <var>y<sub><var>i</var></sub></var>',
148  '<var>x<sub><var>i</var></sub></var>, <var>y<sub><var>i</var></sub></var>',
149  'Nested <var>.'
150  ],
151  // https://html.spec.whatwg.org/multipage/semantics.html#the-dfn-element
152  [
153  '<dfn><abbr title="Garage Door Opener">GDO</abbr></dfn>',
154  '<dfn><abbr title="Garage Door Opener">GDO</abbr></dfn>',
155  '<abbr> inside <dfn>',
156  ],
157  ];
158  }
159 
164  public function testRemoveHTMLtags( $input, $output, $msg = null ) {
165  MWTidy::setInstance( false );
166  $this->assertEquals( $output, Sanitizer::removeHTMLtags( $input ), $msg );
167  }
168 
173  public function testDecodeTagAttributes( $expected, $attributes, $message = '' ) {
174  $this->assertEquals( $expected,
175  Sanitizer::decodeTagAttributes( $attributes ),
176  $message
177  );
178  }
179 
180  public static function provideTagAttributesToDecode() {
181  return [
182  [ [ 'foo' => 'bar' ], 'foo=bar', 'Unquoted attribute' ],
183  [ [ 'עברית' => 'bar' ], 'עברית=bar', 'Non-Latin attribute' ],
184  [ [ '६' => 'bar' ], '६=bar', 'Devanagari number' ],
185  [ [ '搭𨋢' => 'bar' ], '搭𨋢=bar', 'Non-BMP character' ],
186  [ [], 'ńgh=bar', 'Combining accent is not allowed' ],
187  [ [ 'foo' => 'bar' ], ' foo = bar ', 'Spaced attribute' ],
188  [ [ 'foo' => 'bar' ], 'foo="bar"', 'Double-quoted attribute' ],
189  [ [ 'foo' => 'bar' ], 'foo=\'bar\'', 'Single-quoted attribute' ],
190  [
191  [ 'foo' => 'bar', 'baz' => 'foo' ],
192  'foo=\'bar\' baz="foo"',
193  'Several attributes'
194  ],
195  [
196  [ 'foo' => 'bar', 'baz' => 'foo' ],
197  'foo=\'bar\' baz="foo"',
198  'Several attributes'
199  ],
200  [
201  [ 'foo' => 'bar', 'baz' => 'foo' ],
202  'foo=\'bar\' baz="foo"',
203  'Several attributes'
204  ],
205  [ [ ':foo' => 'bar' ], ':foo=\'bar\'', 'Leading :' ],
206  [ [ '_foo' => 'bar' ], '_foo=\'bar\'', 'Leading _' ],
207  [ [ 'foo' => 'bar' ], 'Foo=\'bar\'', 'Leading capital' ],
208  [ [ 'foo' => 'BAR' ], 'FOO=BAR', 'Attribute keys are normalized to lowercase' ],
209 
210  # Invalid beginning
211  [ [], '-foo=bar', 'Leading - is forbidden' ],
212  [ [], '.foo=bar', 'Leading . is forbidden' ],
213  [ [ 'foo-bar' => 'bar' ], 'foo-bar=bar', 'A - is allowed inside the attribute' ],
214  [ [ 'foo-' => 'bar' ], 'foo-=bar', 'A - is allowed inside the attribute' ],
215  [ [ 'foo.bar' => 'baz' ], 'foo.bar=baz', 'A . is allowed inside the attribute' ],
216  [ [ 'foo.' => 'baz' ], 'foo.=baz', 'A . is allowed as last character' ],
217  [ [ 'foo6' => 'baz' ], 'foo6=baz', 'Numbers are allowed' ],
218 
219  # This bit is more relaxed than XML rules, but some extensions use
220  # it, like ProofreadPage (see T29539)
221  [ [ '1foo' => 'baz' ], '1foo=baz', 'Leading numbers are allowed' ],
222  [ [], 'foo$=baz', 'Symbols are not allowed' ],
223  [ [], 'foo@=baz', 'Symbols are not allowed' ],
224  [ [], 'foo~=baz', 'Symbols are not allowed' ],
225  [
226  [ 'foo' => '1[#^`*%w/(' ],
227  'foo=1[#^`*%w/(',
228  'All kind of characters are allowed as values'
229  ],
230  [
231  [ 'foo' => '1[#^`*%\'w/(' ],
232  'foo="1[#^`*%\'w/("',
233  'Double quotes are allowed if quoted by single quotes'
234  ],
235  [
236  [ 'foo' => '1[#^`*%"w/(' ],
237  'foo=\'1[#^`*%"w/(\'',
238  'Single quotes are allowed if quoted by double quotes'
239  ],
240  [ [ 'foo' => '&"' ], 'foo=&amp;&quot;', 'Special chars can be provided as entities' ],
241  [ [ 'foo' => '&foobar;' ], 'foo=&foobar;', 'Entity-like items are accepted' ],
242  ];
243  }
244 
249  public function testDeprecatedAttributesUnaltered( $inputAttr, $inputEl, $message = '' ) {
250  $this->assertEquals( " $inputAttr",
251  Sanitizer::fixTagAttributes( $inputAttr, $inputEl ),
252  $message
253  );
254  }
255 
256  public static function provideDeprecatedAttributes() {
258  return [
259  [ 'clear="left"', 'br' ],
260  [ 'clear="all"', 'br' ],
261  [ 'width="100"', 'td' ],
262  [ 'nowrap="true"', 'td' ],
263  [ 'nowrap=""', 'td' ],
264  [ 'align="right"', 'td' ],
265  [ 'align="center"', 'table' ],
266  [ 'align="left"', 'tr' ],
267  [ 'align="center"', 'div' ],
268  [ 'align="left"', 'h1' ],
269  [ 'align="left"', 'p' ],
270  ];
271  }
272 
277  public function testCssCommentsChecking( $expected, $css, $message = '' ) {
278  $this->assertEquals( $expected,
279  Sanitizer::checkCss( $css ),
280  $message
281  );
282  }
283 
284  public static function provideCssCommentsFixtures() {
286  return [
287  // Valid comments spanning entire input
288  [ '/**/', '/**/' ],
289  [ '/* comment */', '/* comment */' ],
290  // Weird stuff
291  [ ' ', '/****/' ],
292  [ ' ', '/* /* */' ],
293  [ 'display: block;', "display:/* foo */block;" ],
294  [ 'display: block;', "display:\\2f\\2a foo \\2a\\2f block;",
295  'Backslash-escaped comments must be stripped (T30450)' ],
296  [ '', '/* unfinished comment structure',
297  'Remove anything after a comment-start token' ],
298  [ '', "\\2f\\2a unifinished comment'",
299  'Remove anything after a backslash-escaped comment-start token' ],
300  [
301  '/* insecure input */',
302  'filter: progid:DXImageTransform.Microsoft.AlphaImageLoader'
303  . '(src=\'asdf.png\',sizingMethod=\'scale\');'
304  ],
305  [
306  '/* insecure input */',
307  '-ms-filter: "progid:DXImageTransform.Microsoft.AlphaImageLoader'
308  . '(src=\'asdf.png\',sizingMethod=\'scale\')";'
309  ],
310  [ '/* insecure input */', 'width: expression(1+1);' ],
311  [ '/* insecure input */', 'background-image: image(asdf.png);' ],
312  [ '/* insecure input */', 'background-image: -webkit-image(asdf.png);' ],
313  [ '/* insecure input */', 'background-image: -moz-image(asdf.png);' ],
314  [ '/* insecure input */', 'background-image: image-set("asdf.png" 1x, "asdf.png" 2x);' ],
315  [
316  '/* insecure input */',
317  'background-image: -webkit-image-set("asdf.png" 1x, "asdf.png" 2x);'
318  ],
319  [
320  '/* insecure input */',
321  'background-image: -moz-image-set("asdf.png" 1x, "asdf.png" 2x);'
322  ],
323  [ '/* insecure input */', 'foo: attr( title, url );' ],
324  [ '/* insecure input */', 'foo: attr( title url );' ],
325  ];
326  }
327 
332  public function testEscapeHtmlAllowEntities( $expected, $html ) {
333  $this->assertEquals(
334  $expected,
335  Sanitizer::escapeHtmlAllowEntities( $html )
336  );
337  }
338 
339  public static function provideEscapeHtmlAllowEntities() {
340  return [
341  [ 'foo', 'foo' ],
342  [ 'a¡b', 'a&#161;b' ],
343  [ 'foo&#039;bar', "foo'bar" ],
344  [ '&lt;script&gt;foo&lt;/script&gt;', '<script>foo</script>' ],
345  ];
346  }
347 
354  public function testEscapeId( $input, $output ) {
355  $this->assertEquals(
356  $output,
357  Sanitizer::escapeId( $input, [ 'noninitial', 'legacy' ] )
358  );
359  }
360 
361  public static function provideEscapeId() {
362  return [
363  [ '+', '.2B' ],
364  [ '&', '.26' ],
365  [ '=', '.3D' ],
366  [ ':', ':' ],
367  [ ';', '.3B' ],
368  [ '@', '.40' ],
369  [ '$', '.24' ],
370  [ '-_.', '-_.' ],
371  [ '!', '.21' ],
372  [ '*', '.2A' ],
373  [ '/', '.2F' ],
374  [ '[]', '.5B.5D' ],
375  [ '<>', '.3C.3E' ],
376  [ '\'', '.27' ],
377  [ '§', '.C2.A7' ],
378  [ 'Test:A & B/Here', 'Test:A_.26_B.2FHere' ],
379  [ 'A&B&amp;C&amp;amp;D&amp;amp;amp;E', 'A.26B.26amp.3BC.26amp.3Bamp.3BD.26amp.3Bamp.3Bamp.3BE' ],
380  ];
381  }
382 
389  public function testEscapeIdReferenceList( $referenceList, $id1, $id2 ) {
390  $this->assertEquals(
391  Sanitizer::escapeIdReferenceList( $referenceList, 'noninitial' ),
392  Sanitizer::escapeIdForAttribute( $id1 )
393  . ' '
394  . Sanitizer::escapeIdForAttribute( $id2 )
395  );
396  }
397 
398  public static function provideEscapeIdReferenceList() {
400  return [
401  [ 'foo bar', 'foo', 'bar' ],
402  [ '#1 #2', '#1', '#2' ],
403  [ '+1 +2', '+1', '+2' ],
404  ];
405  }
406 
410  public function testIsReservedDataAttribute( $attr, $expected ) {
411  $this->assertSame( $expected, Sanitizer::isReservedDataAttribute( $attr ) );
412  }
413 
414  public static function provideIsReservedDataAttribute() {
415  return [
416  [ 'foo', false ],
417  [ 'data', false ],
418  [ 'data-foo', false ],
419  [ 'data-mw', true ],
420  [ 'data-ooui', true ],
421  [ 'data-parsoid', true ],
422  [ 'data-mw-foo', true ],
423  [ 'data-ooui-foo', true ],
424  [ 'data-mwfoo', true ], // could be false but this is how it's implemented currently
425  ];
426  }
427 
442  public function testEscapeIdForStuff( $stuff, array $config, $id, $expected, $mode = null ) {
443  $func = "Sanitizer::escapeIdFor{$stuff}";
444  $iwFlavor = array_pop( $config );
445  $this->setMwGlobals( [
446  'wgFragmentMode' => $config,
447  'wgExternalInterwikiFragmentMode' => $iwFlavor,
448  ] );
449  $escaped = call_user_func( $func, $id, $mode );
450  self::assertEquals( $expected, $escaped );
451  }
452 
453  public function provideEscapeIdForStuff() {
454  // Test inputs and outputs
455  $text = 'foo тест_#%!\'()[]:<>&&amp;&amp;amp;';
456  $legacyEncoded = 'foo_.D1.82.D0.B5.D1.81.D1.82_.23.25.21.27.28.29.5B.5D:.3C.3E' .
457  '.26.26amp.3B.26amp.3Bamp.3B';
458  $html5Encoded = 'foo_тест_#%!\'()[]:<>&&amp;&amp;amp;';
459  $html5Experimental = 'foo_тест_!_()[]:<>_amp;_amp;amp;';
460 
461  // Settings: last element is $wgExternalInterwikiFragmentMode, the rest is $wgFragmentMode
462  $legacy = [ 'legacy', 'legacy' ];
463  $legacyNew = [ 'legacy', 'html5', 'legacy' ];
464  $newLegacy = [ 'html5', 'legacy', 'legacy' ];
465  $new = [ 'html5', 'legacy' ];
466  $allNew = [ 'html5', 'html5' ];
467  $experimentalLegacy = [ 'html5-legacy', 'legacy', 'legacy' ];
468  $newExperimental = [ 'html5', 'html5-legacy', 'legacy' ];
469 
470  return [
471  // Pure legacy: how MW worked before 2017
472  [ 'Attribute', $legacy, $text, $legacyEncoded, Sanitizer::ID_PRIMARY ],
473  [ 'Attribute', $legacy, $text, false, Sanitizer::ID_FALLBACK ],
474  [ 'Link', $legacy, $text, $legacyEncoded ],
475  [ 'ExternalInterwiki', $legacy, $text, $legacyEncoded ],
476 
477  // Transition to a new world: legacy links with HTML5 fallback
478  [ 'Attribute', $legacyNew, $text, $legacyEncoded, Sanitizer::ID_PRIMARY ],
479  [ 'Attribute', $legacyNew, $text, $html5Encoded, Sanitizer::ID_FALLBACK ],
480  [ 'Link', $legacyNew, $text, $legacyEncoded ],
481  [ 'ExternalInterwiki', $legacyNew, $text, $legacyEncoded ],
482 
483  // New world: HTML5 links, legacy fallbacks
484  [ 'Attribute', $newLegacy, $text, $html5Encoded, Sanitizer::ID_PRIMARY ],
485  [ 'Attribute', $newLegacy, $text, $legacyEncoded, Sanitizer::ID_FALLBACK ],
486  [ 'Link', $newLegacy, $text, $html5Encoded ],
487  [ 'ExternalInterwiki', $newLegacy, $text, $legacyEncoded ],
488 
489  // Distant future: no legacy fallbacks, but still linking to leagacy wikis
490  [ 'Attribute', $new, $text, $html5Encoded, Sanitizer::ID_PRIMARY ],
491  [ 'Attribute', $new, $text, false, Sanitizer::ID_FALLBACK ],
492  [ 'Link', $new, $text, $html5Encoded ],
493  [ 'ExternalInterwiki', $new, $text, $legacyEncoded ],
494 
495  // Just before the heat death of universe: external interwikis are also HTML5 \m/
496  [ 'Attribute', $allNew, $text, $html5Encoded, Sanitizer::ID_PRIMARY ],
497  [ 'Attribute', $allNew, $text, false, Sanitizer::ID_FALLBACK ],
498  [ 'Link', $allNew, $text, $html5Encoded ],
499  [ 'ExternalInterwiki', $allNew, $text, $html5Encoded ],
500 
501  // Someone flipped $wgExperimentalHtmlIds on
502  [ 'Attribute', $experimentalLegacy, $text, $html5Experimental, Sanitizer::ID_PRIMARY ],
503  [ 'Attribute', $experimentalLegacy, $text, $legacyEncoded, Sanitizer::ID_FALLBACK ],
504  [ 'Link', $experimentalLegacy, $text, $html5Experimental ],
505  [ 'ExternalInterwiki', $experimentalLegacy, $text, $legacyEncoded ],
506 
507  // Migration from $wgExperimentalHtmlIds to modern HTML5
508  [ 'Attribute', $newExperimental, $text, $html5Encoded, Sanitizer::ID_PRIMARY ],
509  [ 'Attribute', $newExperimental, $text, $html5Experimental, Sanitizer::ID_FALLBACK ],
510  [ 'Link', $newExperimental, $text, $html5Encoded ],
511  [ 'ExternalInterwiki', $newExperimental, $text, $legacyEncoded ],
512  ];
513  }
514 
519  public function testInvalidFragmentThrows() {
520  $this->setMwGlobals( 'wgFragmentMode', [ 'boom!' ] );
521  Sanitizer::escapeIdForAttribute( 'This should throw' );
522  }
523 
528  public function testNoPrimaryFragmentModeThrows() {
529  $this->setMwGlobals( 'wgFragmentMode', [ 666 => 'html5' ] );
530  Sanitizer::escapeIdForAttribute( 'This should throw' );
531  }
532 
537  public function testNoPrimaryFragmentModeThrows2() {
538  $this->setMwGlobals( 'wgFragmentMode', [ 666 => 'html5' ] );
539  Sanitizer::escapeIdForLink( 'This should throw' );
540  }
541 }
false
processing should stop and the error should be shown to the user * false
Definition: hooks.txt:187
$output
static configuration should be added through ResourceLoaderGetConfigVars instead can be used to get the real title after the basic globals have been set but before ordinary actions take place $output
Definition: hooks.txt:2198
php
injection txt This is an overview of how MediaWiki makes use of dependency injection The design described here grew from the discussion of RFC T384 The term dependency this means that anything an object needs to operate should be injected from the the object itself should only know narrow no concrete implementation of the logic it relies on The requirement to inject everything typically results in an architecture that based on two main types of and essentially stateless service objects that use other service objects to operate on the value objects As of the beginning MediaWiki is only starting to use the DI approach Much of the code still relies on global state or direct resulting in a highly cyclical dependency which acts as the top level factory for services in MediaWiki which can be used to gain access to default instances of various services MediaWikiServices however also allows new services to be defined and default services to be redefined Services are defined or redefined by providing a callback the instantiator that will return a new instance of the service When it will create an instance of MediaWikiServices and populate it with the services defined in the files listed by thereby bootstrapping the DI framework Per $wgServiceWiringFiles lists includes ServiceWiring php
Definition: injection.txt:35
$html
null means default in associative array with keys and values unescaped Should be merged with default with a value of false meaning to suppress the attribute in associative array with keys and values unescaped noclasses just before the function returns a value If you return an< a > element with HTML attributes $attribs and contents $html will be returned If you return $ret will be returned and may include noclasses & $html
Definition: hooks.txt:1965
$css
$css
Definition: styleTest.css.php:50
$input
if(is_array( $mode)) switch( $mode) $input
Definition: postprocess-phan.php:141
MediaWikiTestCase\setMwGlobals
setMwGlobals( $pairs, $value=null)
Sets a global, maintaining a stashed version of the previous global to be restored in tearDown.
Definition: MediaWikiTestCase.php:672
UtfNormal
Unicode normalization routines for working with UTF-8 strings.
Definition: UtfNormal.php:48
MediaWikiTestCase
Definition: MediaWikiTestCase.php:15
MWTidy\setInstance
static setInstance( $instance)
Set the driver to be used.
Definition: MWTidy.php:156
tag
</code > tag
Definition: citeParserTests.txt:219
UTF8_REPLACEMENT
const UTF8_REPLACEMENT
Definition: UtfNormalDefines.php:145
true
null means default in associative array with keys and values unescaped Should be merged with default with a value of false meaning to suppress the attribute in associative array with keys and values unescaped noclasses just before the function returns a value If you return true
Definition: hooks.txt:1965
MWTidy\destroySingleton
static destroySingleton()
Destroy the current singleton instance.
Definition: MWTidy.php:163
MediaWikiTestCase\tearDown
tearDown()
Definition: MediaWikiTestCase.php:528
array
the array() calling protocol came about after MediaWiki 1.4rc1.