MediaWiki REL1_30
SanitizerTest.php
Go to the documentation of this file.
1<?php
2
10
11 protected function tearDown() {
13 parent::tearDown();
14 }
15
19 public function testDecodeNamedEntities() {
20 $this->assertEquals(
21 "\xc3\xa9cole",
22 Sanitizer::decodeCharReferences( '&eacute;cole' ),
23 'decode named entities'
24 );
25 }
26
30 public function testDecodeNumericEntities() {
31 $this->assertEquals(
32 "\xc4\x88io bonas dans l'\xc3\xa9cole!",
33 Sanitizer::decodeCharReferences( "&#x108;io bonas dans l'&#233;cole!" ),
34 'decode numeric entities'
35 );
36 }
37
41 public function testDecodeMixedEntities() {
42 $this->assertEquals(
43 "\xc4\x88io bonas dans l'\xc3\xa9cole!",
44 Sanitizer::decodeCharReferences( "&#x108;io bonas dans l'&eacute;cole!" ),
45 'decode mixed numeric/named entities'
46 );
47 }
48
53 $this->assertEquals(
54 "\xc4\x88io bonas dans l'\xc3\xa9cole! (mais pas &#x108;io dans l'&eacute;cole)",
55 Sanitizer::decodeCharReferences(
56 "&#x108;io bonas dans l'&eacute;cole! (mais pas &amp;#x108;io dans l'&#38;eacute;cole)"
57 ),
58 'decode mixed complex entities'
59 );
60 }
61
65 public function testInvalidAmpersand() {
66 $this->assertEquals(
67 'a & b',
68 Sanitizer::decodeCharReferences( 'a & b' ),
69 'Invalid ampersand'
70 );
71 }
72
76 public function testInvalidEntities() {
77 $this->assertEquals(
78 '&foo;',
79 Sanitizer::decodeCharReferences( '&foo;' ),
80 'Invalid named entity'
81 );
82 }
83
87 public function testInvalidNumberedEntities() {
88 $this->assertEquals(
89 UtfNormal\Constants::UTF8_REPLACEMENT,
90 Sanitizer::decodeCharReferences( "&#88888888888888;" ),
91 'Invalid numbered entity'
92 );
93 }
94
102 public function testRemovehtmltagsOnHtml5Tags( $tag, $escaped ) {
103 MWTidy::setInstance( false );
104
105 if ( $escaped ) {
106 $this->assertEquals( "&lt;$tag&gt;",
107 Sanitizer::removeHTMLtags( "<$tag>" )
108 );
109 } else {
110 $this->assertEquals( "<$tag></$tag>\n",
111 Sanitizer::removeHTMLtags( "<$tag>" )
112 );
113 }
114 }
115
119 public static function provideHtml5Tags() {
120 $ESCAPED = true; # We want tag to be escaped
121 $VERBATIM = false; # We want to keep the tag
122 return [
123 [ 'data', $VERBATIM ],
124 [ 'mark', $VERBATIM ],
125 [ 'time', $VERBATIM ],
126 [ 'video', $ESCAPED ],
127 ];
128 }
129
131 return [
132 // former testSelfClosingTag
133 [
134 '<div>Hello world</div />',
135 '<div>Hello world</div>',
136 'Self-closing closing div'
137 ],
138 // Make sure special nested HTML5 semantics are not broken
139 // https://html.spec.whatwg.org/multipage/semantics.html#the-kbd-element
140 [
141 '<kbd><kbd>Shift</kbd>+<kbd>F3</kbd></kbd>',
142 '<kbd><kbd>Shift</kbd>+<kbd>F3</kbd></kbd>',
143 'Nested <kbd>.'
144 ],
145 // https://html.spec.whatwg.org/multipage/semantics.html#the-sub-and-sup-elements
146 [
147 '<var>x<sub><var>i</var></sub></var>, <var>y<sub><var>i</var></sub></var>',
148 '<var>x<sub><var>i</var></sub></var>, <var>y<sub><var>i</var></sub></var>',
149 'Nested <var>.'
150 ],
151 // https://html.spec.whatwg.org/multipage/semantics.html#the-dfn-element
152 [
153 '<dfn><abbr title="Garage Door Opener">GDO</abbr></dfn>',
154 '<dfn><abbr title="Garage Door Opener">GDO</abbr></dfn>',
155 '<abbr> inside <dfn>',
156 ],
157 ];
158 }
159
164 public function testRemoveHTMLtags( $input, $output, $msg = null ) {
165 MWTidy::setInstance( false );
166 $this->assertEquals( $output, Sanitizer::removeHTMLtags( $input ), $msg );
167 }
168
173 public function testDecodeTagAttributes( $expected, $attributes, $message = '' ) {
174 $this->assertEquals( $expected,
175 Sanitizer::decodeTagAttributes( $attributes ),
176 $message
177 );
178 }
179
180 public static function provideTagAttributesToDecode() {
181 return [
182 [ [ 'foo' => 'bar' ], 'foo=bar', 'Unquoted attribute' ],
183 [ [ 'עברית' => 'bar' ], 'עברית=bar', 'Non-Latin attribute' ],
184 [ [ '६' => 'bar' ], '६=bar', 'Devanagari number' ],
185 [ [ '搭𨋢' => 'bar' ], '搭𨋢=bar', 'Non-BMP character' ],
186 [ [], 'ńgh=bar', 'Combining accent is not allowed' ],
187 [ [ 'foo' => 'bar' ], ' foo = bar ', 'Spaced attribute' ],
188 [ [ 'foo' => 'bar' ], 'foo="bar"', 'Double-quoted attribute' ],
189 [ [ 'foo' => 'bar' ], 'foo=\'bar\'', 'Single-quoted attribute' ],
190 [
191 [ 'foo' => 'bar', 'baz' => 'foo' ],
192 'foo=\'bar\' baz="foo"',
193 'Several attributes'
194 ],
195 [
196 [ 'foo' => 'bar', 'baz' => 'foo' ],
197 'foo=\'bar\' baz="foo"',
198 'Several attributes'
199 ],
200 [
201 [ 'foo' => 'bar', 'baz' => 'foo' ],
202 'foo=\'bar\' baz="foo"',
203 'Several attributes'
204 ],
205 [ [ ':foo' => 'bar' ], ':foo=\'bar\'', 'Leading :' ],
206 [ [ '_foo' => 'bar' ], '_foo=\'bar\'', 'Leading _' ],
207 [ [ 'foo' => 'bar' ], 'Foo=\'bar\'', 'Leading capital' ],
208 [ [ 'foo' => 'BAR' ], 'FOO=BAR', 'Attribute keys are normalized to lowercase' ],
209
210 # Invalid beginning
211 [ [], '-foo=bar', 'Leading - is forbidden' ],
212 [ [], '.foo=bar', 'Leading . is forbidden' ],
213 [ [ 'foo-bar' => 'bar' ], 'foo-bar=bar', 'A - is allowed inside the attribute' ],
214 [ [ 'foo-' => 'bar' ], 'foo-=bar', 'A - is allowed inside the attribute' ],
215 [ [ 'foo.bar' => 'baz' ], 'foo.bar=baz', 'A . is allowed inside the attribute' ],
216 [ [ 'foo.' => 'baz' ], 'foo.=baz', 'A . is allowed as last character' ],
217 [ [ 'foo6' => 'baz' ], 'foo6=baz', 'Numbers are allowed' ],
218
219 # This bit is more relaxed than XML rules, but some extensions use
220 # it, like ProofreadPage (see T29539)
221 [ [ '1foo' => 'baz' ], '1foo=baz', 'Leading numbers are allowed' ],
222 [ [], 'foo$=baz', 'Symbols are not allowed' ],
223 [ [], 'foo@=baz', 'Symbols are not allowed' ],
224 [ [], 'foo~=baz', 'Symbols are not allowed' ],
225 [
226 [ 'foo' => '1[#^`*%w/(' ],
227 'foo=1[#^`*%w/(',
228 'All kind of characters are allowed as values'
229 ],
230 [
231 [ 'foo' => '1[#^`*%\'w/(' ],
232 'foo="1[#^`*%\'w/("',
233 'Double quotes are allowed if quoted by single quotes'
234 ],
235 [
236 [ 'foo' => '1[#^`*%"w/(' ],
237 'foo=\'1[#^`*%"w/(\'',
238 'Single quotes are allowed if quoted by double quotes'
239 ],
240 [ [ 'foo' => '&"' ], 'foo=&amp;&quot;', 'Special chars can be provided as entities' ],
241 [ [ 'foo' => '&foobar;' ], 'foo=&foobar;', 'Entity-like items are accepted' ],
242 ];
243 }
244
249 public function testDeprecatedAttributesUnaltered( $inputAttr, $inputEl, $message = '' ) {
250 $this->assertEquals( " $inputAttr",
251 Sanitizer::fixTagAttributes( $inputAttr, $inputEl ),
252 $message
253 );
254 }
255
256 public static function provideDeprecatedAttributes() {
258 return [
259 [ 'clear="left"', 'br' ],
260 [ 'clear="all"', 'br' ],
261 [ 'width="100"', 'td' ],
262 [ 'nowrap="true"', 'td' ],
263 [ 'nowrap=""', 'td' ],
264 [ 'align="right"', 'td' ],
265 [ 'align="center"', 'table' ],
266 [ 'align="left"', 'tr' ],
267 [ 'align="center"', 'div' ],
268 [ 'align="left"', 'h1' ],
269 [ 'align="left"', 'p' ],
270 ];
271 }
272
277 public function testCssCommentsChecking( $expected, $css, $message = '' ) {
278 $this->assertEquals( $expected,
279 Sanitizer::checkCss( $css ),
280 $message
281 );
282 }
283
284 public static function provideCssCommentsFixtures() {
286 return [
287 // Valid comments spanning entire input
288 [ '/**/', '/**/' ],
289 [ '/* comment */', '/* comment */' ],
290 // Weird stuff
291 [ ' ', '/****/' ],
292 [ ' ', '/* /* */' ],
293 [ 'display: block;', "display:/* foo */block;" ],
294 [ 'display: block;', "display:\\2f\\2a foo \\2a\\2f block;",
295 'Backslash-escaped comments must be stripped (T30450)' ],
296 [ '', '/* unfinished comment structure',
297 'Remove anything after a comment-start token' ],
298 [ '', "\\2f\\2a unifinished comment'",
299 'Remove anything after a backslash-escaped comment-start token' ],
300 [
301 '/* insecure input */',
302 'filter: progid:DXImageTransform.Microsoft.AlphaImageLoader'
303 . '(src=\'asdf.png\',sizingMethod=\'scale\');'
304 ],
305 [
306 '/* insecure input */',
307 '-ms-filter: "progid:DXImageTransform.Microsoft.AlphaImageLoader'
308 . '(src=\'asdf.png\',sizingMethod=\'scale\')";'
309 ],
310 [ '/* insecure input */', 'width: expression(1+1);' ],
311 [ '/* insecure input */', 'background-image: image(asdf.png);' ],
312 [ '/* insecure input */', 'background-image: -webkit-image(asdf.png);' ],
313 [ '/* insecure input */', 'background-image: -moz-image(asdf.png);' ],
314 [ '/* insecure input */', 'background-image: image-set("asdf.png" 1x, "asdf.png" 2x);' ],
315 [
316 '/* insecure input */',
317 'background-image: -webkit-image-set("asdf.png" 1x, "asdf.png" 2x);'
318 ],
319 [
320 '/* insecure input */',
321 'background-image: -moz-image-set("asdf.png" 1x, "asdf.png" 2x);'
322 ],
323 [ '/* insecure input */', 'foo: attr( title, url );' ],
324 [ '/* insecure input */', 'foo: attr( title url );' ],
325 [ '/* insecure input */', 'foo: var(--evil-attribute)' ],
326 ];
327 }
328
333 public function testEscapeHtmlAllowEntities( $expected, $html ) {
334 $this->assertEquals(
335 $expected,
336 Sanitizer::escapeHtmlAllowEntities( $html )
337 );
338 }
339
340 public static function provideEscapeHtmlAllowEntities() {
341 return [
342 [ 'foo', 'foo' ],
343 [ 'a¡b', 'a&#161;b' ],
344 [ 'foo&#039;bar', "foo'bar" ],
345 [ '&lt;script&gt;foo&lt;/script&gt;', '<script>foo</script>' ],
346 ];
347 }
348
355 public function testEscapeId( $input, $output ) {
356 $this->assertEquals(
357 $output,
358 Sanitizer::escapeId( $input, [ 'noninitial', 'legacy' ] )
359 );
360 }
361
362 public static function provideEscapeId() {
363 return [
364 [ '+', '.2B' ],
365 [ '&', '.26' ],
366 [ '=', '.3D' ],
367 [ ':', ':' ],
368 [ ';', '.3B' ],
369 [ '@', '.40' ],
370 [ '$', '.24' ],
371 [ '-_.', '-_.' ],
372 [ '!', '.21' ],
373 [ '*', '.2A' ],
374 [ '/', '.2F' ],
375 [ '[]', '.5B.5D' ],
376 [ '<>', '.3C.3E' ],
377 [ '\'', '.27' ],
378 [ '§', '.C2.A7' ],
379 [ 'Test:A & B/Here', 'Test:A_.26_B.2FHere' ],
380 [ 'A&B&amp;C&amp;amp;D&amp;amp;amp;E', 'A.26B.26amp.3BC.26amp.3Bamp.3BD.26amp.3Bamp.3Bamp.3BE' ],
381 ];
382 }
383
390 public function testEscapeIdReferenceList( $referenceList, $id1, $id2 ) {
391 $this->assertEquals(
392 Sanitizer::escapeIdReferenceList( $referenceList, 'noninitial' ),
393 Sanitizer::escapeIdForAttribute( $id1 )
394 . ' '
395 . Sanitizer::escapeIdForAttribute( $id2 )
396 );
397 }
398
399 public static function provideEscapeIdReferenceList() {
401 return [
402 [ 'foo bar', 'foo', 'bar' ],
403 [ '#1 #2', '#1', '#2' ],
404 [ '+1 +2', '+1', '+2' ],
405 ];
406 }
407
411 public function testIsReservedDataAttribute( $attr, $expected ) {
412 $this->assertSame( $expected, Sanitizer::isReservedDataAttribute( $attr ) );
413 }
414
415 public static function provideIsReservedDataAttribute() {
416 return [
417 [ 'foo', false ],
418 [ 'data', false ],
419 [ 'data-foo', false ],
420 [ 'data-mw', true ],
421 [ 'data-ooui', true ],
422 [ 'data-parsoid', true ],
423 [ 'data-mw-foo', true ],
424 [ 'data-ooui-foo', true ],
425 [ 'data-mwfoo', true ], // could be false but this is how it's implemented currently
426 ];
427 }
428
443 public function testEscapeIdForStuff( $stuff, array $config, $id, $expected, $mode = null ) {
444 $func = "Sanitizer::escapeIdFor{$stuff}";
445 $iwFlavor = array_pop( $config );
446 $this->setMwGlobals( [
447 'wgFragmentMode' => $config,
448 'wgExternalInterwikiFragmentMode' => $iwFlavor,
449 ] );
450 $escaped = call_user_func( $func, $id, $mode );
451 self::assertEquals( $expected, $escaped );
452 }
453
454 public function provideEscapeIdForStuff() {
455 // Test inputs and outputs
456 $text = 'foo тест_#%!\'()[]:<>&&amp;&amp;amp;';
457 $legacyEncoded = 'foo_.D1.82.D0.B5.D1.81.D1.82_.23.25.21.27.28.29.5B.5D:.3C.3E' .
458 '.26.26amp.3B.26amp.3Bamp.3B';
459 $html5Encoded = 'foo_тест_#%!\'()[]:<>&&amp;&amp;amp;';
460 $html5Experimental = 'foo_тест_!_()[]:<>_amp;_amp;amp;';
461
462 // Settings: last element is $wgExternalInterwikiFragmentMode, the rest is $wgFragmentMode
463 $legacy = [ 'legacy', 'legacy' ];
464 $legacyNew = [ 'legacy', 'html5', 'legacy' ];
465 $newLegacy = [ 'html5', 'legacy', 'legacy' ];
466 $new = [ 'html5', 'legacy' ];
467 $allNew = [ 'html5', 'html5' ];
468 $experimentalLegacy = [ 'html5-legacy', 'legacy', 'legacy' ];
469 $newExperimental = [ 'html5', 'html5-legacy', 'legacy' ];
470
471 return [
472 // Pure legacy: how MW worked before 2017
473 [ 'Attribute', $legacy, $text, $legacyEncoded, Sanitizer::ID_PRIMARY ],
474 [ 'Attribute', $legacy, $text, false, Sanitizer::ID_FALLBACK ],
475 [ 'Link', $legacy, $text, $legacyEncoded ],
476 [ 'ExternalInterwiki', $legacy, $text, $legacyEncoded ],
477
478 // Transition to a new world: legacy links with HTML5 fallback
479 [ 'Attribute', $legacyNew, $text, $legacyEncoded, Sanitizer::ID_PRIMARY ],
480 [ 'Attribute', $legacyNew, $text, $html5Encoded, Sanitizer::ID_FALLBACK ],
481 [ 'Link', $legacyNew, $text, $legacyEncoded ],
482 [ 'ExternalInterwiki', $legacyNew, $text, $legacyEncoded ],
483
484 // New world: HTML5 links, legacy fallbacks
485 [ 'Attribute', $newLegacy, $text, $html5Encoded, Sanitizer::ID_PRIMARY ],
486 [ 'Attribute', $newLegacy, $text, $legacyEncoded, Sanitizer::ID_FALLBACK ],
487 [ 'Link', $newLegacy, $text, $html5Encoded ],
488 [ 'ExternalInterwiki', $newLegacy, $text, $legacyEncoded ],
489
490 // Distant future: no legacy fallbacks, but still linking to leagacy wikis
491 [ 'Attribute', $new, $text, $html5Encoded, Sanitizer::ID_PRIMARY ],
492 [ 'Attribute', $new, $text, false, Sanitizer::ID_FALLBACK ],
493 [ 'Link', $new, $text, $html5Encoded ],
494 [ 'ExternalInterwiki', $new, $text, $legacyEncoded ],
495
496 // Just before the heat death of universe: external interwikis are also HTML5 \m/
497 [ 'Attribute', $allNew, $text, $html5Encoded, Sanitizer::ID_PRIMARY ],
498 [ 'Attribute', $allNew, $text, false, Sanitizer::ID_FALLBACK ],
499 [ 'Link', $allNew, $text, $html5Encoded ],
500 [ 'ExternalInterwiki', $allNew, $text, $html5Encoded ],
501
502 // Someone flipped $wgExperimentalHtmlIds on
503 [ 'Attribute', $experimentalLegacy, $text, $html5Experimental, Sanitizer::ID_PRIMARY ],
504 [ 'Attribute', $experimentalLegacy, $text, $legacyEncoded, Sanitizer::ID_FALLBACK ],
505 [ 'Link', $experimentalLegacy, $text, $html5Experimental ],
506 [ 'ExternalInterwiki', $experimentalLegacy, $text, $legacyEncoded ],
507
508 // Migration from $wgExperimentalHtmlIds to modern HTML5
509 [ 'Attribute', $newExperimental, $text, $html5Encoded, Sanitizer::ID_PRIMARY ],
510 [ 'Attribute', $newExperimental, $text, $html5Experimental, Sanitizer::ID_FALLBACK ],
511 [ 'Link', $newExperimental, $text, $html5Encoded ],
512 [ 'ExternalInterwiki', $newExperimental, $text, $legacyEncoded ],
513 ];
514 }
515
520 public function testInvalidFragmentThrows() {
521 $this->setMwGlobals( 'wgFragmentMode', [ 'boom!' ] );
522 Sanitizer::escapeIdForAttribute( 'This should throw' );
523 }
524
530 $this->setMwGlobals( 'wgFragmentMode', [ 666 => 'html5' ] );
531 Sanitizer::escapeIdForAttribute( 'This should throw' );
532 }
533
539 $this->setMwGlobals( 'wgFragmentMode', [ 666 => 'html5' ] );
540 Sanitizer::escapeIdForLink( 'This should throw' );
541 }
542}
static setInstance( $instance)
Set the driver to be used.
Definition MWTidy.php:156
static destroySingleton()
Destroy the current singleton instance.
Definition MWTidy.php:163
setMwGlobals( $pairs, $value=null)
Sets a global, maintaining a stashed version of the previous global to be restored in tearDown.
testIsReservedDataAttribute( $attr, $expected)
provideIsReservedDataAttribute
testDecodeMixedEntities()
Sanitizer::decodeCharReferences.
testNoPrimaryFragmentModeThrows2()
UnexpectedValueException Sanitizer::escapeIdForLink()
static provideHtml5Tags()
Provide HTML5 tags.
testInvalidEntities()
Sanitizer::decodeCharReferences.
testDecodeTagAttributes( $expected, $attributes, $message='')
provideTagAttributesToDecode Sanitizer::decodeTagAttributes
testRemoveHTMLtags( $input, $output, $msg=null)
dataRemoveHTMLtags Sanitizer::removeHTMLtags
static provideDeprecatedAttributes()
testInvalidAmpersand()
Sanitizer::decodeCharReferences.
testEscapeHtmlAllowEntities( $expected, $html)
provideEscapeHtmlAllowEntities Sanitizer::escapeHtmlAllowEntities
static provideEscapeHtmlAllowEntities()
testDecodeNamedEntities()
Sanitizer::decodeCharReferences.
testDecodeMixedComplexEntities()
Sanitizer::decodeCharReferences.
testInvalidNumberedEntities()
Sanitizer::decodeCharReferences.
testDeprecatedAttributesUnaltered( $inputAttr, $inputEl, $message='')
provideDeprecatedAttributes Sanitizer::fixTagAttributes
static provideCssCommentsFixtures()
static provideEscapeIdReferenceList()
static provideTagAttributesToDecode()
testDecodeNumericEntities()
Sanitizer::decodeCharReferences.
testEscapeIdReferenceList( $referenceList, $id1, $id2)
Test escapeIdReferenceList for consistency with escapeIdForAttribute.
testNoPrimaryFragmentModeThrows()
UnexpectedValueException Sanitizer::escapeIdForAttribute()
static provideEscapeId()
static provideIsReservedDataAttribute()
testEscapeId( $input, $output)
Test Sanitizer::escapeId.
testInvalidFragmentThrows()
InvalidArgumentException Sanitizer::escapeIdInternal()
testEscapeIdForStuff( $stuff, array $config, $id, $expected, $mode=null)
provideEscapeIdForStuff
testRemovehtmltagsOnHtml5Tags( $tag, $escaped)
Sanitizer::removeHTMLtags provideHtml5Tags.
testCssCommentsChecking( $expected, $css, $message='')
provideCssCommentsFixtures Sanitizer::checkCss
Unicode normalization routines for working with UTF-8 strings.
Definition UtfNormal.php:48
the array() calling protocol came about after MediaWiki 1.4rc1.
static configuration should be added through ResourceLoaderGetConfigVars instead can be used to get the real title after the basic globals have been set but before ordinary actions take place $output
Definition hooks.txt:2225
null means default in associative array with keys and values unescaped Should be merged with default with a value of false meaning to suppress the attribute in associative array with keys and values unescaped noclasses just before the function returns a value If you return true
Definition hooks.txt:1976
null means default in associative array with keys and values unescaped Should be merged with default with a value of false meaning to suppress the attribute in associative array with keys and values unescaped noclasses just before the function returns a value If you return an< a > element with HTML attributes $attribs and contents $html will be returned If you return $ret will be returned and may include noclasses & $html
Definition hooks.txt:1983
processing should stop and the error should be shown to the user * false
Definition hooks.txt:187
injection txt This is an overview of how MediaWiki makes use of dependency injection The design described here grew from the discussion of RFC T384 The term dependency this means that anything an object needs to operate should be injected from the the object itself should only know narrow no concrete implementation of the logic it relies on The requirement to inject everything typically results in an architecture that based on two main types of and essentially stateless service objects that use other service objects to operate on the value objects As of the beginning MediaWiki is only starting to use the DI approach Much of the code still relies on global state or direct resulting in a highly cyclical dependency which acts as the top level factory for services in MediaWiki which can be used to gain access to default instances of various services MediaWikiServices however also allows new services to be defined and default services to be redefined Services are defined or redefined by providing a callback the instantiator that will return a new instance of the service When it will create an instance of MediaWikiServices and populate it with the services defined in the files listed by thereby bootstrapping the DI framework Per $wgServiceWiringFiles lists includes ServiceWiring php
Definition injection.txt:37
if(is_array($mode)) switch( $mode) $input