MediaWiki REL1_32
SanitizerTest.php
Go to the documentation of this file.
1<?php
2
9class SanitizerTest extends MediaWikiTestCase {
10
11 protected function tearDown() {
13 parent::tearDown();
14 }
15
19 public function testDecodeNamedEntities() {
20 $this->assertEquals(
21 "\xc3\xa9cole",
22 Sanitizer::decodeCharReferences( '&eacute;cole' ),
23 'decode named entities'
24 );
25 }
26
30 public function testDecodeNumericEntities() {
31 $this->assertEquals(
32 "\xc4\x88io bonas dans l'\xc3\xa9cole!",
33 Sanitizer::decodeCharReferences( "&#x108;io bonas dans l'&#233;cole!" ),
34 'decode numeric entities'
35 );
36 }
37
41 public function testDecodeMixedEntities() {
42 $this->assertEquals(
43 "\xc4\x88io bonas dans l'\xc3\xa9cole!",
44 Sanitizer::decodeCharReferences( "&#x108;io bonas dans l'&eacute;cole!" ),
45 'decode mixed numeric/named entities'
46 );
47 }
48
52 public function testDecodeMixedComplexEntities() {
53 $this->assertEquals(
54 "\xc4\x88io bonas dans l'\xc3\xa9cole! (mais pas &#x108;io dans l'&eacute;cole)",
55 Sanitizer::decodeCharReferences(
56 "&#x108;io bonas dans l'&eacute;cole! (mais pas &amp;#x108;io dans l'&#38;eacute;cole)"
57 ),
58 'decode mixed complex entities'
59 );
60 }
61
65 public function testInvalidAmpersand() {
66 $this->assertEquals(
67 'a & b',
68 Sanitizer::decodeCharReferences( 'a & b' ),
69 'Invalid ampersand'
70 );
71 }
72
76 public function testInvalidEntities() {
77 $this->assertEquals(
78 '&foo;',
79 Sanitizer::decodeCharReferences( '&foo;' ),
80 'Invalid named entity'
81 );
82 }
83
87 public function testInvalidNumberedEntities() {
88 $this->assertEquals(
89 UtfNormal\Constants::UTF8_REPLACEMENT,
90 Sanitizer::decodeCharReferences( "&#88888888888888;" ),
91 'Invalid numbered entity'
92 );
93 }
94
102 public function testRemovehtmltagsOnHtml5Tags( $tag, $escaped ) {
103 MWTidy::setInstance( false );
104
105 if ( $escaped ) {
106 $this->assertEquals( "&lt;$tag&gt;",
107 Sanitizer::removeHTMLtags( "<$tag>" )
108 );
109 } else {
110 $this->assertEquals( "<$tag></$tag>\n",
111 Sanitizer::removeHTMLtags( "<$tag>" )
112 );
113 }
114 }
115
119 public static function provideHtml5Tags() {
120 $ESCAPED = true; # We want tag to be escaped
121 $VERBATIM = false; # We want to keep the tag
122 return [
123 [ 'data', $VERBATIM ],
124 [ 'mark', $VERBATIM ],
125 [ 'time', $VERBATIM ],
126 [ 'video', $ESCAPED ],
127 ];
128 }
129
130 function dataRemoveHTMLtags() {
131 return [
132 // former testSelfClosingTag
133 [
134 '<div>Hello world</div />',
135 '<div>Hello world</div>',
136 'Self-closing closing div'
137 ],
138 // Make sure special nested HTML5 semantics are not broken
139 // https://html.spec.whatwg.org/multipage/semantics.html#the-kbd-element
140 [
141 '<kbd><kbd>Shift</kbd>+<kbd>F3</kbd></kbd>',
142 '<kbd><kbd>Shift</kbd>+<kbd>F3</kbd></kbd>',
143 'Nested <kbd>.'
144 ],
145 // https://html.spec.whatwg.org/multipage/semantics.html#the-sub-and-sup-elements
146 [
147 '<var>x<sub><var>i</var></sub></var>, <var>y<sub><var>i</var></sub></var>',
148 '<var>x<sub><var>i</var></sub></var>, <var>y<sub><var>i</var></sub></var>',
149 'Nested <var>.'
150 ],
151 // https://html.spec.whatwg.org/multipage/semantics.html#the-dfn-element
152 [
153 '<dfn><abbr title="Garage Door Opener">GDO</abbr></dfn>',
154 '<dfn><abbr title="Garage Door Opener">GDO</abbr></dfn>',
155 '<abbr> inside <dfn>',
156 ],
157 ];
158 }
159
164 public function testRemoveHTMLtags( $input, $output, $msg = null ) {
165 MWTidy::setInstance( false );
166 $this->assertEquals( $output, Sanitizer::removeHTMLtags( $input ), $msg );
167 }
168
173 public function testDecodeTagAttributes( $expected, $attributes, $message = '' ) {
174 $this->assertEquals( $expected,
175 Sanitizer::decodeTagAttributes( $attributes ),
176 $message
177 );
178 }
179
180 public static function provideTagAttributesToDecode() {
181 return [
182 [ [ 'foo' => 'bar' ], 'foo=bar', 'Unquoted attribute' ],
183 [ [ 'עברית' => 'bar' ], 'עברית=bar', 'Non-Latin attribute' ],
184 [ [ '६' => 'bar' ], '६=bar', 'Devanagari number' ],
185 [ [ '搭𨋢' => 'bar' ], '搭𨋢=bar', 'Non-BMP character' ],
186 [ [], 'ńgh=bar', 'Combining accent is not allowed' ],
187 [ [ 'foo' => 'bar' ], ' foo = bar ', 'Spaced attribute' ],
188 [ [ 'foo' => 'bar' ], 'foo="bar"', 'Double-quoted attribute' ],
189 [ [ 'foo' => 'bar' ], 'foo=\'bar\'', 'Single-quoted attribute' ],
190 [
191 [ 'foo' => 'bar', 'baz' => 'foo' ],
192 'foo=\'bar\' baz="foo"',
193 'Several attributes'
194 ],
195 [
196 [ 'foo' => 'bar', 'baz' => 'foo' ],
197 'foo=\'bar\' baz="foo"',
198 'Several attributes'
199 ],
200 [
201 [ 'foo' => 'bar', 'baz' => 'foo' ],
202 'foo=\'bar\' baz="foo"',
203 'Several attributes'
204 ],
205 [ [ ':foo' => 'bar' ], ':foo=\'bar\'', 'Leading :' ],
206 [ [ '_foo' => 'bar' ], '_foo=\'bar\'', 'Leading _' ],
207 [ [ 'foo' => 'bar' ], 'Foo=\'bar\'', 'Leading capital' ],
208 [ [ 'foo' => 'BAR' ], 'FOO=BAR', 'Attribute keys are normalized to lowercase' ],
209
210 # Invalid beginning
211 [ [], '-foo=bar', 'Leading - is forbidden' ],
212 [ [], '.foo=bar', 'Leading . is forbidden' ],
213 [ [ 'foo-bar' => 'bar' ], 'foo-bar=bar', 'A - is allowed inside the attribute' ],
214 [ [ 'foo-' => 'bar' ], 'foo-=bar', 'A - is allowed inside the attribute' ],
215 [ [ 'foo.bar' => 'baz' ], 'foo.bar=baz', 'A . is allowed inside the attribute' ],
216 [ [ 'foo.' => 'baz' ], 'foo.=baz', 'A . is allowed as last character' ],
217 [ [ 'foo6' => 'baz' ], 'foo6=baz', 'Numbers are allowed' ],
218
219 # This bit is more relaxed than XML rules, but some extensions use
220 # it, like ProofreadPage (see T29539)
221 [ [ '1foo' => 'baz' ], '1foo=baz', 'Leading numbers are allowed' ],
222 [ [], 'foo$=baz', 'Symbols are not allowed' ],
223 [ [], 'foo@=baz', 'Symbols are not allowed' ],
224 [ [], 'foo~=baz', 'Symbols are not allowed' ],
225 [
226 [ 'foo' => '1[#^`*%w/(' ],
227 'foo=1[#^`*%w/(',
228 'All kind of characters are allowed as values'
229 ],
230 [
231 [ 'foo' => '1[#^`*%\'w/(' ],
232 'foo="1[#^`*%\'w/("',
233 'Double quotes are allowed if quoted by single quotes'
234 ],
235 [
236 [ 'foo' => '1[#^`*%"w/(' ],
237 'foo=\'1[#^`*%"w/(\'',
238 'Single quotes are allowed if quoted by double quotes'
239 ],
240 [ [ 'foo' => '&"' ], 'foo=&amp;&quot;', 'Special chars can be provided as entities' ],
241 [ [ 'foo' => '&foobar;' ], 'foo=&foobar;', 'Entity-like items are accepted' ],
242 ];
243 }
244
249 public function testDeprecatedAttributesUnaltered( $inputAttr, $inputEl, $message = '' ) {
250 $this->assertEquals( " $inputAttr",
251 Sanitizer::fixTagAttributes( $inputAttr, $inputEl ),
252 $message
253 );
254 }
255
256 public static function provideDeprecatedAttributes() {
258 return [
259 [ 'clear="left"', 'br' ],
260 [ 'clear="all"', 'br' ],
261 [ 'width="100"', 'td' ],
262 [ 'nowrap="true"', 'td' ],
263 [ 'nowrap=""', 'td' ],
264 [ 'align="right"', 'td' ],
265 [ 'align="center"', 'table' ],
266 [ 'align="left"', 'tr' ],
267 [ 'align="center"', 'div' ],
268 [ 'align="left"', 'h1' ],
269 [ 'align="left"', 'p' ],
270 ];
271 }
272
277 public function testCssCommentsChecking( $expected, $css, $message = '' ) {
278 $this->assertEquals( $expected,
279 Sanitizer::checkCss( $css ),
280 $message
281 );
282 }
283
284 public static function provideCssCommentsFixtures() {
286 return [
287 // Valid comments spanning entire input
288 [ '/**/', '/**/' ],
289 [ '/* comment */', '/* comment */' ],
290 // Weird stuff
291 [ ' ', '/****/' ],
292 [ ' ', '/* /* */' ],
293 [ 'display: block;', "display:/* foo */block;" ],
294 [ 'display: block;', "display:\\2f\\2a foo \\2a\\2f block;",
295 'Backslash-escaped comments must be stripped (T30450)' ],
296 [ '', '/* unfinished comment structure',
297 'Remove anything after a comment-start token' ],
298 [ '', "\\2f\\2a unifinished comment'",
299 'Remove anything after a backslash-escaped comment-start token' ],
300 [
301 '/* insecure input */',
302 'filter: progid:DXImageTransform.Microsoft.AlphaImageLoader'
303 . '(src=\'asdf.png\',sizingMethod=\'scale\');'
304 ],
305 [
306 '/* insecure input */',
307 '-ms-filter: "progid:DXImageTransform.Microsoft.AlphaImageLoader'
308 . '(src=\'asdf.png\',sizingMethod=\'scale\')";'
309 ],
310 [ '/* insecure input */', 'width: expression(1+1);' ],
311 [ '/* insecure input */', 'background-image: image(asdf.png);' ],
312 [ '/* insecure input */', 'background-image: -webkit-image(asdf.png);' ],
313 [ '/* insecure input */', 'background-image: -moz-image(asdf.png);' ],
314 [ '/* insecure input */', 'background-image: image-set("asdf.png" 1x, "asdf.png" 2x);' ],
315 [
316 '/* insecure input */',
317 'background-image: -webkit-image-set("asdf.png" 1x, "asdf.png" 2x);'
318 ],
319 [
320 '/* insecure input */',
321 'background-image: -moz-image-set("asdf.png" 1x, "asdf.png" 2x);'
322 ],
323 [ '/* insecure input */', 'foo: attr( title, url );' ],
324 [ '/* insecure input */', 'foo: attr( title url );' ],
325 [ '/* insecure input */', 'foo: var(--evil-attribute)' ],
326 ];
327 }
328
333 public function testEscapeHtmlAllowEntities( $expected, $html ) {
334 $this->assertEquals(
335 $expected,
336 Sanitizer::escapeHtmlAllowEntities( $html )
337 );
338 }
339
340 public static function provideEscapeHtmlAllowEntities() {
341 return [
342 [ 'foo', 'foo' ],
343 [ 'a¡b', 'a&#161;b' ],
344 [ 'foo&#039;bar', "foo'bar" ],
345 [ '&lt;script&gt;foo&lt;/script&gt;', '<script>foo</script>' ],
346 ];
347 }
348
355 public function testEscapeId( $input, $output ) {
356 $this->assertEquals(
357 $output,
358 Sanitizer::escapeId( $input, [ 'noninitial', 'legacy' ] )
359 );
360 }
361
362 public static function provideEscapeId() {
363 return [
364 [ '+', '.2B' ],
365 [ '&', '.26' ],
366 [ '=', '.3D' ],
367 [ ':', ':' ],
368 [ ';', '.3B' ],
369 [ '@', '.40' ],
370 [ '$', '.24' ],
371 [ '-_.', '-_.' ],
372 [ '!', '.21' ],
373 [ '*', '.2A' ],
374 [ '/', '.2F' ],
375 [ '[]', '.5B.5D' ],
376 [ '<>', '.3C.3E' ],
377 [ '\'', '.27' ],
378 [ '§', '.C2.A7' ],
379 [ 'Test:A & B/Here', 'Test:A_.26_B.2FHere' ],
380 [ 'A&B&amp;C&amp;amp;D&amp;amp;amp;E', 'A.26B.26amp.3BC.26amp.3Bamp.3BD.26amp.3Bamp.3Bamp.3BE' ],
381 ];
382 }
383
390 public function testEscapeIdReferenceList( $referenceList, $id1, $id2 ) {
391 $this->assertEquals(
392 Sanitizer::escapeIdReferenceList( $referenceList ),
393 Sanitizer::escapeIdForAttribute( $id1 )
394 . ' '
395 . Sanitizer::escapeIdForAttribute( $id2 )
396 );
397 }
398
399 public static function provideEscapeIdReferenceList() {
401 return [
402 [ 'foo bar', 'foo', 'bar' ],
403 [ '#1 #2', '#1', '#2' ],
404 [ '+1 +2', '+1', '+2' ],
405 ];
406 }
407
412 public function testIsReservedDataAttribute( $attr, $expected ) {
413 $this->assertSame( $expected, Sanitizer::isReservedDataAttribute( $attr ) );
414 }
415
416 public static function provideIsReservedDataAttribute() {
417 return [
418 [ 'foo', false ],
419 [ 'data', false ],
420 [ 'data-foo', false ],
421 [ 'data-mw', true ],
422 [ 'data-ooui', true ],
423 [ 'data-parsoid', true ],
424 [ 'data-mw-foo', true ],
425 [ 'data-ooui-foo', true ],
426 [ 'data-mwfoo', true ], // could be false but this is how it's implemented currently
427 ];
428 }
429
444 public function testEscapeIdForStuff( $stuff, array $config, $id, $expected, $mode = null ) {
445 $func = "Sanitizer::escapeIdFor{$stuff}";
446 $iwFlavor = array_pop( $config );
447 $this->setMwGlobals( [
448 'wgFragmentMode' => $config,
449 'wgExternalInterwikiFragmentMode' => $iwFlavor,
450 ] );
451 $escaped = call_user_func( $func, $id, $mode );
452 self::assertEquals( $expected, $escaped );
453 }
454
455 public function provideEscapeIdForStuff() {
456 // Test inputs and outputs
457 $text = 'foo тест_#%!\'()[]:<>&&amp;&amp;amp;';
458 $legacyEncoded = 'foo_.D1.82.D0.B5.D1.81.D1.82_.23.25.21.27.28.29.5B.5D:.3C.3E' .
459 '.26.26amp.3B.26amp.3Bamp.3B';
460 $html5Encoded = 'foo_тест_#%!\'()[]:<>&&amp;&amp;amp;';
461
462 // Settings: last element is $wgExternalInterwikiFragmentMode, the rest is $wgFragmentMode
463 $legacy = [ 'legacy', 'legacy' ];
464 $legacyNew = [ 'legacy', 'html5', 'legacy' ];
465 $newLegacy = [ 'html5', 'legacy', 'legacy' ];
466 $new = [ 'html5', 'legacy' ];
467 $allNew = [ 'html5', 'html5' ];
468
469 return [
470 // Pure legacy: how MW worked before 2017
471 [ 'Attribute', $legacy, $text, $legacyEncoded, Sanitizer::ID_PRIMARY ],
472 [ 'Attribute', $legacy, $text, false, Sanitizer::ID_FALLBACK ],
473 [ 'Link', $legacy, $text, $legacyEncoded ],
474 [ 'ExternalInterwiki', $legacy, $text, $legacyEncoded ],
475
476 // Transition to a new world: legacy links with HTML5 fallback
477 [ 'Attribute', $legacyNew, $text, $legacyEncoded, Sanitizer::ID_PRIMARY ],
478 [ 'Attribute', $legacyNew, $text, $html5Encoded, Sanitizer::ID_FALLBACK ],
479 [ 'Link', $legacyNew, $text, $legacyEncoded ],
480 [ 'ExternalInterwiki', $legacyNew, $text, $legacyEncoded ],
481
482 // New world: HTML5 links, legacy fallbacks
483 [ 'Attribute', $newLegacy, $text, $html5Encoded, Sanitizer::ID_PRIMARY ],
484 [ 'Attribute', $newLegacy, $text, $legacyEncoded, Sanitizer::ID_FALLBACK ],
485 [ 'Link', $newLegacy, $text, $html5Encoded ],
486 [ 'ExternalInterwiki', $newLegacy, $text, $legacyEncoded ],
487
488 // Distant future: no legacy fallbacks, but still linking to leagacy wikis
489 [ 'Attribute', $new, $text, $html5Encoded, Sanitizer::ID_PRIMARY ],
490 [ 'Attribute', $new, $text, false, Sanitizer::ID_FALLBACK ],
491 [ 'Link', $new, $text, $html5Encoded ],
492 [ 'ExternalInterwiki', $new, $text, $legacyEncoded ],
493
494 // Just before the heat death of universe: external interwikis are also HTML5 \m/
495 [ 'Attribute', $allNew, $text, $html5Encoded, Sanitizer::ID_PRIMARY ],
496 [ 'Attribute', $allNew, $text, false, Sanitizer::ID_FALLBACK ],
497 [ 'Link', $allNew, $text, $html5Encoded ],
498 [ 'ExternalInterwiki', $allNew, $text, $html5Encoded ],
499 ];
500 }
501
511 public function testStripAllTags( $input, $expected ) {
512 $this->assertEquals( $expected, Sanitizer::stripAllTags( $input ) );
513 }
514
515 public function provideStripAllTags() {
516 return [
517 [ '<p>Foo</p>', 'Foo' ],
518 [ '<p id="one">Foo</p><p id="two">Bar</p>', 'Foo Bar' ],
519 [ "<p>Foo</p>\n<p>Bar</p>", 'Foo Bar' ],
520 [ '<p>Hello &lt;strong&gt; wor&#x6c;&#100; caf&eacute;</p>', 'Hello <strong> world café' ],
521 [
522 '<p><small data-foo=\'bar"&lt;baz>quux\'><a href="./Foo">Bar</a></small> Whee!</p>',
523 'Bar Whee!'
524 ],
525 [ '1<span class="<?php">2</span>3', '123' ],
526 [ '1<span class="<?">2</span>3', '123' ],
527 ];
528 }
529
534 public function testInvalidFragmentThrows() {
535 $this->setMwGlobals( 'wgFragmentMode', [ 'boom!' ] );
536 Sanitizer::escapeIdForAttribute( 'This should throw' );
537 }
538
543 public function testNoPrimaryFragmentModeThrows() {
544 $this->setMwGlobals( 'wgFragmentMode', [ 666 => 'html5' ] );
545 Sanitizer::escapeIdForAttribute( 'This should throw' );
546 }
547
552 public function testNoPrimaryFragmentModeThrows2() {
553 $this->setMwGlobals( 'wgFragmentMode', [ 666 => 'html5' ] );
554 Sanitizer::escapeIdForLink( 'This should throw' );
555 }
556}
static setInstance( $instance)
Set the driver to be used.
Definition MWTidy.php:132
static destroySingleton()
Destroy the current singleton instance.
Definition MWTidy.php:139
static escapeIdForLink( $id)
Given a section name or other user-generated or otherwise unsafe string, escapes it to be a valid URL...
const ID_FALLBACK
Tells escapeUrlForHtml() to encode the ID using the fallback encoding, or return false if no fallback...
Definition Sanitizer.php:74
static escapeIdForAttribute( $id, $mode=self::ID_PRIMARY)
Given a section name or other user-generated or otherwise unsafe string, escapes it to be a valid HTM...
const ID_PRIMARY
Tells escapeUrlForHtml() to encode the ID using the wiki's primary encoding.
Definition Sanitizer.php:66
Unicode normalization routines for working with UTF-8 strings.
Definition UtfNormal.php:48
null means default in associative array with keys and values unescaped Should be merged with default with a value of false meaning to suppress the attribute in associative array with keys and values unescaped noclasses just before the function returns a value If you return true
Definition hooks.txt:2055
null means default in associative array with keys and values unescaped Should be merged with default with a value of false meaning to suppress the attribute in associative array with keys and values unescaped noclasses just before the function returns a value If you return an< a > element with HTML attributes $attribs and contents $html will be returned If you return $ret will be returned and may include noclasses & $html
Definition hooks.txt:2062
static configuration should be added through ResourceLoaderGetConfigVars instead can be used to get the real title e g db for database replication lag or jobqueue for job queue size converted to pseudo seconds It is possible to add more fields and they will be returned to the user in the API response after the basic globals have been set but before ordinary actions take place $output
Definition hooks.txt:2317
processing should stop and the error should be shown to the user * false
Definition hooks.txt:187
The wiki should then use memcached to cache various data To use multiple just add more items to the array To increase the weight of a make its entry a array("192.168.0.1:11211", 2))
if(is_array($mode)) switch( $mode) $input