MediaWiki REL1_33
SanitizerTest.php
Go to the documentation of this file.
1<?php
2
9class SanitizerTest extends MediaWikiTestCase {
10
11 protected function tearDown() {
13 parent::tearDown();
14 }
15
19 public function testDecodeNamedEntities() {
20 $this->assertEquals(
21 "\xc3\xa9cole",
22 Sanitizer::decodeCharReferences( '&eacute;cole' ),
23 'decode named entities'
24 );
25 }
26
30 public function testDecodeNumericEntities() {
31 $this->assertEquals(
32 "\xc4\x88io bonas dans l'\xc3\xa9cole!",
33 Sanitizer::decodeCharReferences( "&#x108;io bonas dans l'&#233;cole!" ),
34 'decode numeric entities'
35 );
36 }
37
41 public function testDecodeMixedEntities() {
42 $this->assertEquals(
43 "\xc4\x88io bonas dans l'\xc3\xa9cole!",
44 Sanitizer::decodeCharReferences( "&#x108;io bonas dans l'&eacute;cole!" ),
45 'decode mixed numeric/named entities'
46 );
47 }
48
52 public function testDecodeMixedComplexEntities() {
53 $this->assertEquals(
54 "\xc4\x88io bonas dans l'\xc3\xa9cole! (mais pas &#x108;io dans l'&eacute;cole)",
55 Sanitizer::decodeCharReferences(
56 "&#x108;io bonas dans l'&eacute;cole! (mais pas &amp;#x108;io dans l'&#38;eacute;cole)"
57 ),
58 'decode mixed complex entities'
59 );
60 }
61
65 public function testInvalidAmpersand() {
66 $this->assertEquals(
67 'a & b',
68 Sanitizer::decodeCharReferences( 'a & b' ),
69 'Invalid ampersand'
70 );
71 }
72
76 public function testInvalidEntities() {
77 $this->assertEquals(
78 '&foo;',
79 Sanitizer::decodeCharReferences( '&foo;' ),
80 'Invalid named entity'
81 );
82 }
83
87 public function testInvalidNumberedEntities() {
88 $this->assertEquals(
89 UtfNormal\Constants::UTF8_REPLACEMENT,
90 Sanitizer::decodeCharReferences( "&#88888888888888;" ),
91 'Invalid numbered entity'
92 );
93 }
94
102 public function testRemovehtmltagsOnHtml5Tags( $tag, $escaped ) {
103 $this->hideDeprecated( 'disabling tidy' );
104 $this->hideDeprecated( 'MWTidy::setInstance' );
105 MWTidy::setInstance( false );
106
107 if ( $escaped ) {
108 $this->assertEquals( "&lt;$tag&gt;",
109 Sanitizer::removeHTMLtags( "<$tag>" )
110 );
111 } else {
112 $this->assertEquals( "<$tag></$tag>\n",
113 Sanitizer::removeHTMLtags( "<$tag>" )
114 );
115 }
116 }
117
121 public static function provideHtml5Tags() {
122 $ESCAPED = true; # We want tag to be escaped
123 $VERBATIM = false; # We want to keep the tag
124 return [
125 [ 'data', $VERBATIM ],
126 [ 'mark', $VERBATIM ],
127 [ 'time', $VERBATIM ],
128 [ 'video', $ESCAPED ],
129 ];
130 }
131
132 function dataRemoveHTMLtags() {
133 return [
134 // former testSelfClosingTag
135 [
136 '<div>Hello world</div />',
137 '<div>Hello world</div>',
138 'Self-closing closing div'
139 ],
140 // Make sure special nested HTML5 semantics are not broken
141 // https://html.spec.whatwg.org/multipage/semantics.html#the-kbd-element
142 [
143 '<kbd><kbd>Shift</kbd>+<kbd>F3</kbd></kbd>',
144 '<kbd><kbd>Shift</kbd>+<kbd>F3</kbd></kbd>',
145 'Nested <kbd>.'
146 ],
147 // https://html.spec.whatwg.org/multipage/semantics.html#the-sub-and-sup-elements
148 [
149 '<var>x<sub><var>i</var></sub></var>, <var>y<sub><var>i</var></sub></var>',
150 '<var>x<sub><var>i</var></sub></var>, <var>y<sub><var>i</var></sub></var>',
151 'Nested <var>.'
152 ],
153 // https://html.spec.whatwg.org/multipage/semantics.html#the-dfn-element
154 [
155 '<dfn><abbr title="Garage Door Opener">GDO</abbr></dfn>',
156 '<dfn><abbr title="Garage Door Opener">GDO</abbr></dfn>',
157 '<abbr> inside <dfn>',
158 ],
159 ];
160 }
161
166 public function testRemoveHTMLtags( $input, $output, $msg = null ) {
167 $this->hideDeprecated( 'disabling tidy' );
168 $this->hideDeprecated( 'MWTidy::setInstance' );
169 MWTidy::setInstance( false );
170 $this->assertEquals( $output, Sanitizer::removeHTMLtags( $input ), $msg );
171 }
172
177 public function testDecodeTagAttributes( $expected, $attributes, $message = '' ) {
178 $this->assertEquals( $expected,
179 Sanitizer::decodeTagAttributes( $attributes ),
180 $message
181 );
182 }
183
184 public static function provideTagAttributesToDecode() {
185 return [
186 [ [ 'foo' => 'bar' ], 'foo=bar', 'Unquoted attribute' ],
187 [ [ 'עברית' => 'bar' ], 'עברית=bar', 'Non-Latin attribute' ],
188 [ [ '६' => 'bar' ], '६=bar', 'Devanagari number' ],
189 [ [ '搭𨋢' => 'bar' ], '搭𨋢=bar', 'Non-BMP character' ],
190 [ [], 'ńgh=bar', 'Combining accent is not allowed' ],
191 [ [ 'foo' => 'bar' ], ' foo = bar ', 'Spaced attribute' ],
192 [ [ 'foo' => 'bar' ], 'foo="bar"', 'Double-quoted attribute' ],
193 [ [ 'foo' => 'bar' ], 'foo=\'bar\'', 'Single-quoted attribute' ],
194 [
195 [ 'foo' => 'bar', 'baz' => 'foo' ],
196 'foo=\'bar\' baz="foo"',
197 'Several attributes'
198 ],
199 [
200 [ 'foo' => 'bar', 'baz' => 'foo' ],
201 'foo=\'bar\' baz="foo"',
202 'Several attributes'
203 ],
204 [
205 [ 'foo' => 'bar', 'baz' => 'foo' ],
206 'foo=\'bar\' baz="foo"',
207 'Several attributes'
208 ],
209 [ [ ':foo' => 'bar' ], ':foo=\'bar\'', 'Leading :' ],
210 [ [ '_foo' => 'bar' ], '_foo=\'bar\'', 'Leading _' ],
211 [ [ 'foo' => 'bar' ], 'Foo=\'bar\'', 'Leading capital' ],
212 [ [ 'foo' => 'BAR' ], 'FOO=BAR', 'Attribute keys are normalized to lowercase' ],
213
214 # Invalid beginning
215 [ [], '-foo=bar', 'Leading - is forbidden' ],
216 [ [], '.foo=bar', 'Leading . is forbidden' ],
217 [ [ 'foo-bar' => 'bar' ], 'foo-bar=bar', 'A - is allowed inside the attribute' ],
218 [ [ 'foo-' => 'bar' ], 'foo-=bar', 'A - is allowed inside the attribute' ],
219 [ [ 'foo.bar' => 'baz' ], 'foo.bar=baz', 'A . is allowed inside the attribute' ],
220 [ [ 'foo.' => 'baz' ], 'foo.=baz', 'A . is allowed as last character' ],
221 [ [ 'foo6' => 'baz' ], 'foo6=baz', 'Numbers are allowed' ],
222
223 # This bit is more relaxed than XML rules, but some extensions use
224 # it, like ProofreadPage (see T29539)
225 [ [ '1foo' => 'baz' ], '1foo=baz', 'Leading numbers are allowed' ],
226 [ [], 'foo$=baz', 'Symbols are not allowed' ],
227 [ [], 'foo@=baz', 'Symbols are not allowed' ],
228 [ [], 'foo~=baz', 'Symbols are not allowed' ],
229 [
230 [ 'foo' => '1[#^`*%w/(' ],
231 'foo=1[#^`*%w/(',
232 'All kind of characters are allowed as values'
233 ],
234 [
235 [ 'foo' => '1[#^`*%\'w/(' ],
236 'foo="1[#^`*%\'w/("',
237 'Double quotes are allowed if quoted by single quotes'
238 ],
239 [
240 [ 'foo' => '1[#^`*%"w/(' ],
241 'foo=\'1[#^`*%"w/(\'',
242 'Single quotes are allowed if quoted by double quotes'
243 ],
244 [ [ 'foo' => '&"' ], 'foo=&amp;&quot;', 'Special chars can be provided as entities' ],
245 [ [ 'foo' => '&foobar;' ], 'foo=&foobar;', 'Entity-like items are accepted' ],
246 ];
247 }
248
253 public function testDeprecatedAttributesUnaltered( $inputAttr, $inputEl, $message = '' ) {
254 $this->assertEquals( " $inputAttr",
255 Sanitizer::fixTagAttributes( $inputAttr, $inputEl ),
256 $message
257 );
258 }
259
260 public static function provideDeprecatedAttributes() {
262 return [
263 [ 'clear="left"', 'br' ],
264 [ 'clear="all"', 'br' ],
265 [ 'width="100"', 'td' ],
266 [ 'nowrap="true"', 'td' ],
267 [ 'nowrap=""', 'td' ],
268 [ 'align="right"', 'td' ],
269 [ 'align="center"', 'table' ],
270 [ 'align="left"', 'tr' ],
271 [ 'align="center"', 'div' ],
272 [ 'align="left"', 'h1' ],
273 [ 'align="left"', 'p' ],
274 ];
275 }
276
281 public function testCssCommentsChecking( $expected, $css, $message = '' ) {
282 $this->assertEquals( $expected,
283 Sanitizer::checkCss( $css ),
284 $message
285 );
286 }
287
288 public static function provideCssCommentsFixtures() {
290 return [
291 // Valid comments spanning entire input
292 [ '/**/', '/**/' ],
293 [ '/* comment */', '/* comment */' ],
294 // Weird stuff
295 [ ' ', '/****/' ],
296 [ ' ', '/* /* */' ],
297 [ 'display: block;', "display:/* foo */block;" ],
298 [ 'display: block;', "display:\\2f\\2a foo \\2a\\2f block;",
299 'Backslash-escaped comments must be stripped (T30450)' ],
300 [ '', '/* unfinished comment structure',
301 'Remove anything after a comment-start token' ],
302 [ '', "\\2f\\2a unifinished comment'",
303 'Remove anything after a backslash-escaped comment-start token' ],
304 [
305 '/* insecure input */',
306 'filter: progid:DXImageTransform.Microsoft.AlphaImageLoader'
307 . '(src=\'asdf.png\',sizingMethod=\'scale\');'
308 ],
309 [
310 '/* insecure input */',
311 '-ms-filter: "progid:DXImageTransform.Microsoft.AlphaImageLoader'
312 . '(src=\'asdf.png\',sizingMethod=\'scale\')";'
313 ],
314 [ '/* insecure input */', 'width: expression(1+1);' ],
315 [ '/* insecure input */', 'background-image: image(asdf.png);' ],
316 [ '/* insecure input */', 'background-image: -webkit-image(asdf.png);' ],
317 [ '/* insecure input */', 'background-image: -moz-image(asdf.png);' ],
318 [ '/* insecure input */', 'background-image: image-set("asdf.png" 1x, "asdf.png" 2x);' ],
319 [
320 '/* insecure input */',
321 'background-image: -webkit-image-set("asdf.png" 1x, "asdf.png" 2x);'
322 ],
323 [
324 '/* insecure input */',
325 'background-image: -moz-image-set("asdf.png" 1x, "asdf.png" 2x);'
326 ],
327 [ '/* insecure input */', 'foo: attr( title, url );' ],
328 [ '/* insecure input */', 'foo: attr( title url );' ],
329 [ '/* insecure input */', 'foo: var(--evil-attribute)' ],
330 ];
331 }
332
337 public function testEscapeHtmlAllowEntities( $expected, $html ) {
338 $this->assertEquals(
339 $expected,
340 Sanitizer::escapeHtmlAllowEntities( $html )
341 );
342 }
343
344 public static function provideEscapeHtmlAllowEntities() {
345 return [
346 [ 'foo', 'foo' ],
347 [ 'a¡b', 'a&#161;b' ],
348 [ 'foo&#039;bar', "foo'bar" ],
349 [ '&lt;script&gt;foo&lt;/script&gt;', '<script>foo</script>' ],
350 ];
351 }
352
359 public function testEscapeId( $input, $output ) {
360 $this->assertEquals(
361 $output,
362 Sanitizer::escapeId( $input, [ 'noninitial', 'legacy' ] )
363 );
364 }
365
366 public static function provideEscapeId() {
367 return [
368 [ '+', '.2B' ],
369 [ '&', '.26' ],
370 [ '=', '.3D' ],
371 [ ':', ':' ],
372 [ ';', '.3B' ],
373 [ '@', '.40' ],
374 [ '$', '.24' ],
375 [ '-_.', '-_.' ],
376 [ '!', '.21' ],
377 [ '*', '.2A' ],
378 [ '/', '.2F' ],
379 [ '[]', '.5B.5D' ],
380 [ '<>', '.3C.3E' ],
381 [ '\'', '.27' ],
382 [ '§', '.C2.A7' ],
383 [ 'Test:A & B/Here', 'Test:A_.26_B.2FHere' ],
384 [ 'A&B&amp;C&amp;amp;D&amp;amp;amp;E', 'A.26B.26amp.3BC.26amp.3Bamp.3BD.26amp.3Bamp.3Bamp.3BE' ],
385 ];
386 }
387
394 public function testEscapeIdReferenceList( $referenceList, $id1, $id2 ) {
395 $this->assertEquals(
396 Sanitizer::escapeIdReferenceList( $referenceList ),
397 Sanitizer::escapeIdForAttribute( $id1 )
398 . ' '
399 . Sanitizer::escapeIdForAttribute( $id2 )
400 );
401 }
402
403 public static function provideEscapeIdReferenceList() {
405 return [
406 [ 'foo bar', 'foo', 'bar' ],
407 [ '#1 #2', '#1', '#2' ],
408 [ '+1 +2', '+1', '+2' ],
409 ];
410 }
411
416 public function testIsReservedDataAttribute( $attr, $expected ) {
417 $this->assertSame( $expected, Sanitizer::isReservedDataAttribute( $attr ) );
418 }
419
420 public static function provideIsReservedDataAttribute() {
421 return [
422 [ 'foo', false ],
423 [ 'data', false ],
424 [ 'data-foo', false ],
425 [ 'data-mw', true ],
426 [ 'data-ooui', true ],
427 [ 'data-parsoid', true ],
428 [ 'data-mw-foo', true ],
429 [ 'data-ooui-foo', true ],
430 [ 'data-mwfoo', true ], // could be false but this is how it's implemented currently
431 ];
432 }
433
448 public function testEscapeIdForStuff( $stuff, array $config, $id, $expected, $mode = null ) {
449 $func = "Sanitizer::escapeIdFor{$stuff}";
450 $iwFlavor = array_pop( $config );
451 $this->setMwGlobals( [
452 'wgFragmentMode' => $config,
453 'wgExternalInterwikiFragmentMode' => $iwFlavor,
454 ] );
455 $escaped = call_user_func( $func, $id, $mode );
456 self::assertEquals( $expected, $escaped );
457 }
458
459 public function provideEscapeIdForStuff() {
460 // Test inputs and outputs
461 $text = 'foo тест_#%!\'()[]:<>&&amp;&amp;amp;';
462 $legacyEncoded = 'foo_.D1.82.D0.B5.D1.81.D1.82_.23.25.21.27.28.29.5B.5D:.3C.3E' .
463 '.26.26amp.3B.26amp.3Bamp.3B';
464 $html5Encoded = 'foo_тест_#%!\'()[]:<>&&amp;&amp;amp;';
465
466 // Settings: last element is $wgExternalInterwikiFragmentMode, the rest is $wgFragmentMode
467 $legacy = [ 'legacy', 'legacy' ];
468 $legacyNew = [ 'legacy', 'html5', 'legacy' ];
469 $newLegacy = [ 'html5', 'legacy', 'legacy' ];
470 $new = [ 'html5', 'legacy' ];
471 $allNew = [ 'html5', 'html5' ];
472
473 return [
474 // Pure legacy: how MW worked before 2017
475 [ 'Attribute', $legacy, $text, $legacyEncoded, Sanitizer::ID_PRIMARY ],
476 [ 'Attribute', $legacy, $text, false, Sanitizer::ID_FALLBACK ],
477 [ 'Link', $legacy, $text, $legacyEncoded ],
478 [ 'ExternalInterwiki', $legacy, $text, $legacyEncoded ],
479
480 // Transition to a new world: legacy links with HTML5 fallback
481 [ 'Attribute', $legacyNew, $text, $legacyEncoded, Sanitizer::ID_PRIMARY ],
482 [ 'Attribute', $legacyNew, $text, $html5Encoded, Sanitizer::ID_FALLBACK ],
483 [ 'Link', $legacyNew, $text, $legacyEncoded ],
484 [ 'ExternalInterwiki', $legacyNew, $text, $legacyEncoded ],
485
486 // New world: HTML5 links, legacy fallbacks
487 [ 'Attribute', $newLegacy, $text, $html5Encoded, Sanitizer::ID_PRIMARY ],
488 [ 'Attribute', $newLegacy, $text, $legacyEncoded, Sanitizer::ID_FALLBACK ],
489 [ 'Link', $newLegacy, $text, $html5Encoded ],
490 [ 'ExternalInterwiki', $newLegacy, $text, $legacyEncoded ],
491
492 // Distant future: no legacy fallbacks, but still linking to leagacy wikis
493 [ 'Attribute', $new, $text, $html5Encoded, Sanitizer::ID_PRIMARY ],
494 [ 'Attribute', $new, $text, false, Sanitizer::ID_FALLBACK ],
495 [ 'Link', $new, $text, $html5Encoded ],
496 [ 'ExternalInterwiki', $new, $text, $legacyEncoded ],
497
498 // Just before the heat death of universe: external interwikis are also HTML5 \m/
499 [ 'Attribute', $allNew, $text, $html5Encoded, Sanitizer::ID_PRIMARY ],
500 [ 'Attribute', $allNew, $text, false, Sanitizer::ID_FALLBACK ],
501 [ 'Link', $allNew, $text, $html5Encoded ],
502 [ 'ExternalInterwiki', $allNew, $text, $html5Encoded ],
503 ];
504 }
505
515 public function testStripAllTags( $input, $expected ) {
516 $this->assertEquals( $expected, Sanitizer::stripAllTags( $input ) );
517 }
518
519 public function provideStripAllTags() {
520 return [
521 [ '<p>Foo</p>', 'Foo' ],
522 [ '<p id="one">Foo</p><p id="two">Bar</p>', 'Foo Bar' ],
523 [ "<p>Foo</p>\n<p>Bar</p>", 'Foo Bar' ],
524 [ '<p>Hello &lt;strong&gt; wor&#x6c;&#100; caf&eacute;</p>', 'Hello <strong> world café' ],
525 [
526 '<p><small data-foo=\'bar"&lt;baz>quux\'><a href="./Foo">Bar</a></small> Whee!</p>',
527 'Bar Whee!'
528 ],
529 [ '1<span class="<?php">2</span>3', '123' ],
530 [ '1<span class="<?">2</span>3', '123' ],
531 [ '<th>1</th><td>2</td>', '1 2' ],
532 ];
533 }
534
539 public function testInvalidFragmentThrows() {
540 $this->setMwGlobals( 'wgFragmentMode', [ 'boom!' ] );
541 Sanitizer::escapeIdForAttribute( 'This should throw' );
542 }
543
548 public function testNoPrimaryFragmentModeThrows() {
549 $this->setMwGlobals( 'wgFragmentMode', [ 666 => 'html5' ] );
550 Sanitizer::escapeIdForAttribute( 'This should throw' );
551 }
552
557 public function testNoPrimaryFragmentModeThrows2() {
558 $this->setMwGlobals( 'wgFragmentMode', [ 666 => 'html5' ] );
559 Sanitizer::escapeIdForLink( 'This should throw' );
560 }
561}
static setInstance( $instance)
Set the driver to be used.
Definition MWTidy.php:85
static destroySingleton()
Destroy the current singleton instance.
Definition MWTidy.php:93
static escapeIdForLink( $id)
Given a section name or other user-generated or otherwise unsafe string, escapes it to be a valid URL...
const ID_FALLBACK
Tells escapeUrlForHtml() to encode the ID using the fallback encoding, or return false if no fallback...
Definition Sanitizer.php:74
static escapeIdForAttribute( $id, $mode=self::ID_PRIMARY)
Given a section name or other user-generated or otherwise unsafe string, escapes it to be a valid HTM...
const ID_PRIMARY
Tells escapeUrlForHtml() to encode the ID using the wiki's primary encoding.
Definition Sanitizer.php:66
null means default in associative array with keys and values unescaped Should be merged with default with a value of false meaning to suppress the attribute in associative array with keys and values unescaped noclasses just before the function returns a value If you return true
Definition hooks.txt:2004
null means default in associative array with keys and values unescaped Should be merged with default with a value of false meaning to suppress the attribute in associative array with keys and values unescaped noclasses just before the function returns a value If you return an< a > element with HTML attributes $attribs and contents $html will be returned If you return $ret will be returned and may include noclasses & $html
Definition hooks.txt:2011
static configuration should be added through ResourceLoaderGetConfigVars instead can be used to get the real title e g db for database replication lag or jobqueue for job queue size converted to pseudo seconds It is possible to add more fields and they will be returned to the user in the API response after the basic globals have been set but before ordinary actions take place $output
Definition hooks.txt:2272
processing should stop and the error should be shown to the user * false
Definition hooks.txt:187
injection txt This is an overview of how MediaWiki makes use of dependency injection The design described here grew from the discussion of RFC T384 The term dependency this means that anything an object needs to operate should be injected from the the object itself should only know narrow no concrete implementation of the logic it relies on The requirement to inject everything typically results in an architecture that based on two main types of and essentially stateless service objects that use other service objects to operate on the value objects As of the beginning MediaWiki is only starting to use the DI approach Much of the code still relies on global state or direct resulting in a highly cyclical dependency which acts as the top level factory for services in MediaWiki which can be used to gain access to default instances of various services MediaWikiServices however also allows new services to be defined and default services to be redefined Services are defined or redefined by providing a callback the instantiator that will return a new instance of the service When it will create an instance of MediaWikiServices and populate it with the services defined in the files listed by thereby bootstrapping the DI framework Per $wgServiceWiringFiles lists includes ServiceWiring php
Definition injection.txt:37
The wiki should then use memcached to cache various data To use multiple just add more items to the array To increase the weight of a make its entry a array("192.168.0.1:11211", 2))
if(is_array($mode)) switch( $mode) $input