MediaWiki  master
SanitizerTest.php
Go to the documentation of this file.
1 <?php
2 
4 
9 
10  protected function tearDown() {
12  parent::tearDown();
13  }
14 
22  public function testRemovehtmltagsOnHtml5Tags( $tag, $escaped ) {
23  $this->hideDeprecated( 'disabling tidy' );
24  $this->hideDeprecated( 'MWTidy::setInstance' );
25  MWTidy::setInstance( false );
26 
27  if ( $escaped ) {
28  $this->assertEquals( "&lt;$tag&gt;",
29  Sanitizer::removeHTMLtags( "<$tag>" )
30  );
31  } else {
32  $this->assertEquals( "<$tag></$tag>\n",
33  Sanitizer::removeHTMLtags( "<$tag>" )
34  );
35  }
36  }
37 
41  public static function provideHtml5Tags() {
42  $ESCAPED = true; # We want tag to be escaped
43  $VERBATIM = false; # We want to keep the tag
44  return [
45  [ 'data', $VERBATIM ],
46  [ 'mark', $VERBATIM ],
47  [ 'time', $VERBATIM ],
48  [ 'video', $ESCAPED ],
49  ];
50  }
51 
52  function dataRemoveHTMLtags() {
53  return [
54  // former testSelfClosingTag
55  [
56  '<div>Hello world</div />',
57  '<div>Hello world</div>',
58  'Self-closing closing div'
59  ],
60  // Make sure special nested HTML5 semantics are not broken
61  // https://html.spec.whatwg.org/multipage/semantics.html#the-kbd-element
62  [
63  '<kbd><kbd>Shift</kbd>+<kbd>F3</kbd></kbd>',
64  '<kbd><kbd>Shift</kbd>+<kbd>F3</kbd></kbd>',
65  'Nested <kbd>.'
66  ],
67  // https://html.spec.whatwg.org/multipage/semantics.html#the-sub-and-sup-elements
68  [
69  '<var>x<sub><var>i</var></sub></var>, <var>y<sub><var>i</var></sub></var>',
70  '<var>x<sub><var>i</var></sub></var>, <var>y<sub><var>i</var></sub></var>',
71  'Nested <var>.'
72  ],
73  // https://html.spec.whatwg.org/multipage/semantics.html#the-dfn-element
74  [
75  '<dfn><abbr title="Garage Door Opener">GDO</abbr></dfn>',
76  '<dfn><abbr title="Garage Door Opener">GDO</abbr></dfn>',
77  '<abbr> inside <dfn>',
78  ],
79  ];
80  }
81 
86  public function testRemoveHTMLtags( $input, $output, $msg = null ) {
87  $this->hideDeprecated( 'disabling tidy' );
88  $this->hideDeprecated( 'MWTidy::setInstance' );
89  MWTidy::setInstance( false );
90  $this->assertEquals( $output, Sanitizer::removeHTMLtags( $input ), $msg );
91  }
92 
99  public function testDeprecatedAttributesUnaltered( $inputAttr, $inputEl, $message = '' ) {
100  $this->assertEquals( " $inputAttr",
101  Sanitizer::fixTagAttributes( $inputAttr, $inputEl ),
102  $message
103  );
104  }
105 
106  public static function provideDeprecatedAttributes() {
108  return [
109  [ 'clear="left"', 'br' ],
110  [ 'clear="all"', 'br' ],
111  [ 'width="100"', 'td' ],
112  [ 'nowrap="true"', 'td' ],
113  [ 'nowrap=""', 'td' ],
114  [ 'align="right"', 'td' ],
115  [ 'align="center"', 'table' ],
116  [ 'align="left"', 'tr' ],
117  [ 'align="center"', 'div' ],
118  [ 'align="left"', 'h1' ],
119  [ 'align="left"', 'p' ],
120  ];
121  }
122 
128  public function testValidateTagAttributes( $element, $attribs, $expected ) {
129  $actual = Sanitizer::validateTagAttributes( $attribs, $element );
130  $this->assertArrayEquals( $expected, $actual, false, true );
131  }
132 
133  public static function provideValidateTagAttributes() {
134  return [
135  [ 'math',
136  [ 'id' => 'foo bar', 'bogus' => 'stripped', 'data-foo' => 'bar' ],
137  [ 'id' => 'foo_bar', 'data-foo' => 'bar' ],
138  ],
139  [ 'meta',
140  [ 'id' => 'foo bar', 'itemprop' => 'foo', 'content' => 'bar' ],
141  [ 'itemprop' => 'foo', 'content' => 'bar' ],
142  ],
143  ];
144  }
145 
150  public function testAttributeWhitelist( $element, $attribs ) {
151  $this->hideDeprecated( 'Sanitizer::attributeWhitelist' );
152  $this->hideDeprecated( 'Sanitizer::setupAttributeWhitelist' );
153  $actual = Sanitizer::attributeWhitelist( $element );
154  $this->assertArrayEquals( $attribs, $actual );
155  }
156 
161  public function testAttributeWhitelistInternal( $element, $attribs ) {
162  $sanitizer = TestingAccessWrapper::newFromClass( Sanitizer::class );
163  $actual = $sanitizer->attributeWhitelistInternal( $element );
164  $this->assertArrayEquals( $attribs, array_keys( $actual ) );
165  }
166 
167  public function provideAttributeWhitelist() {
169  return [
170  [ 'math', [ 'class', 'style', 'id', 'title' ] ],
171  [ 'meta', [ 'itemprop', 'content' ] ],
172  [ 'link', [ 'itemprop', 'href', 'title' ] ],
173  ];
174  }
175 
190  public function testEscapeIdForStuff( $stuff, array $config, $id, $expected, $mode = null ) {
191  $func = "Sanitizer::escapeIdFor{$stuff}";
192  $iwFlavor = array_pop( $config );
193  $this->setMwGlobals( [
194  'wgFragmentMode' => $config,
195  'wgExternalInterwikiFragmentMode' => $iwFlavor,
196  ] );
197  $escaped = call_user_func( $func, $id, $mode );
198  self::assertEquals( $expected, $escaped );
199  }
200 
201  public function provideEscapeIdForStuff() {
202  // Test inputs and outputs
203  $text = 'foo тест_#%!\'()[]:<>&&amp;&amp;amp;';
204  $legacyEncoded = 'foo_.D1.82.D0.B5.D1.81.D1.82_.23.25.21.27.28.29.5B.5D:.3C.3E' .
205  '.26.26amp.3B.26amp.3Bamp.3B';
206  $html5Encoded = 'foo_тест_#%!\'()[]:<>&&amp;&amp;amp;';
207 
208  // Settings: last element is $wgExternalInterwikiFragmentMode, the rest is $wgFragmentMode
209  $legacy = [ 'legacy', 'legacy' ];
210  $legacyNew = [ 'legacy', 'html5', 'legacy' ];
211  $newLegacy = [ 'html5', 'legacy', 'legacy' ];
212  $new = [ 'html5', 'legacy' ];
213  $allNew = [ 'html5', 'html5' ];
214 
215  return [
216  // Pure legacy: how MW worked before 2017
217  [ 'Attribute', $legacy, $text, $legacyEncoded, Sanitizer::ID_PRIMARY ],
218  [ 'Attribute', $legacy, $text, false, Sanitizer::ID_FALLBACK ],
219  [ 'Link', $legacy, $text, $legacyEncoded ],
220  [ 'ExternalInterwiki', $legacy, $text, $legacyEncoded ],
221 
222  // Transition to a new world: legacy links with HTML5 fallback
223  [ 'Attribute', $legacyNew, $text, $legacyEncoded, Sanitizer::ID_PRIMARY ],
224  [ 'Attribute', $legacyNew, $text, $html5Encoded, Sanitizer::ID_FALLBACK ],
225  [ 'Link', $legacyNew, $text, $legacyEncoded ],
226  [ 'ExternalInterwiki', $legacyNew, $text, $legacyEncoded ],
227 
228  // New world: HTML5 links, legacy fallbacks
229  [ 'Attribute', $newLegacy, $text, $html5Encoded, Sanitizer::ID_PRIMARY ],
230  [ 'Attribute', $newLegacy, $text, $legacyEncoded, Sanitizer::ID_FALLBACK ],
231  [ 'Link', $newLegacy, $text, $html5Encoded ],
232  [ 'ExternalInterwiki', $newLegacy, $text, $legacyEncoded ],
233 
234  // Distant future: no legacy fallbacks, but still linking to leagacy wikis
235  [ 'Attribute', $new, $text, $html5Encoded, Sanitizer::ID_PRIMARY ],
236  [ 'Attribute', $new, $text, false, Sanitizer::ID_FALLBACK ],
237  [ 'Link', $new, $text, $html5Encoded ],
238  [ 'ExternalInterwiki', $new, $text, $legacyEncoded ],
239 
240  // Just before the heat death of universe: external interwikis are also HTML5 \m/
241  [ 'Attribute', $allNew, $text, $html5Encoded, Sanitizer::ID_PRIMARY ],
242  [ 'Attribute', $allNew, $text, false, Sanitizer::ID_FALLBACK ],
243  [ 'Link', $allNew, $text, $html5Encoded ],
244  [ 'ExternalInterwiki', $allNew, $text, $html5Encoded ],
245  ];
246  }
247 
252  public function testInvalidFragmentThrows() {
253  $this->setMwGlobals( 'wgFragmentMode', [ 'boom!' ] );
254  Sanitizer::escapeIdForAttribute( 'This should throw' );
255  }
256 
262  $this->setMwGlobals( 'wgFragmentMode', [ 666 => 'html5' ] );
263  Sanitizer::escapeIdForAttribute( 'This should throw' );
264  }
265 
271  $this->setMwGlobals( 'wgFragmentMode', [ 666 => 'html5' ] );
272  Sanitizer::escapeIdForLink( 'This should throw' );
273  }
274 
281  public function testEscapeIdReferenceList( $referenceList, $id1, $id2 ) {
282  $this->assertEquals(
283  Sanitizer::escapeIdReferenceList( $referenceList ),
285  . ' '
287  );
288  }
289 
290  public static function provideEscapeIdReferenceList() {
292  return [
293  [ 'foo bar', 'foo', 'bar' ],
294  [ '#1 #2', '#1', '#2' ],
295  [ '+1 +2', '+1', '+2' ],
296  ];
297  }
298 
299 }
static setInstance( $instance)
Set the driver to be used.
Definition: MWTidy.php:85
if(is_array( $mode)) switch( $mode) $input
const ID_PRIMARY
Tells escapeUrlForHtml() to encode the ID using the wiki&#39;s primary encoding.
Definition: Sanitizer.php:66
Apache License January AND DISTRIBUTION Definitions License shall mean the terms and conditions for use
testRemovehtmltagsOnHtml5Tags( $tag, $escaped)
Sanitizer::removeHTMLtags provideHtml5Tags.
static escapeIdReferenceList( $referenceString)
Given a string containing a space delimited list of ids, escape each id to match ids escaped by the e...
Definition: Sanitizer.php:1391
static provideValidateTagAttributes()
testNoPrimaryFragmentModeThrows()
UnexpectedValueException Sanitizer::escapeIdForAttribute()
testInvalidFragmentThrows()
InvalidArgumentException Sanitizer::escapeIdInternal()
static attributeWhitelist( $element)
Fetch the whitelist of acceptable attributes for a given element name.
Definition: Sanitizer.php:1759
testValidateTagAttributes( $element, $attribs, $expected)
provideValidateTagAttributes Sanitizer::validateTagAttributes Sanitizer::validateAttributes ...
static configuration should be added through ResourceLoaderGetConfigVars instead can be used to get the real title e g db for database replication lag or jobqueue for job queue size converted to pseudo seconds It is possible to add more fields and they will be returned to the user in the API response after the basic globals have been set but before ordinary actions take place $output
Definition: hooks.txt:2205
const ID_FALLBACK
Tells escapeUrlForHtml() to encode the ID using the fallback encoding, or return false if no fallback...
Definition: Sanitizer.php:74
static provideHtml5Tags()
Provide HTML5 tags.
static destroySingleton()
Destroy the current singleton instance.
Definition: MWTidy.php:93
Sanitizer.
static validateTagAttributes( $attribs, $element)
Take an array of attribute names and values and normalize or discard illegal values for the given ele...
Definition: Sanitizer.php:791
testDeprecatedAttributesUnaltered( $inputAttr, $inputEl, $message='')
provideDeprecatedAttributes Sanitizer::fixTagAttributes Sanitizer::validateTagAttributes Sanitizer::v...
testEscapeIdReferenceList( $referenceList, $id1, $id2)
Test escapeIdReferenceList for consistency with escapeIdForAttribute.
static escapeIdForAttribute( $id, $mode=self::ID_PRIMARY)
Given a section name or other user-generated or otherwise unsafe string, escapes it to be a valid HTM...
Definition: Sanitizer.php:1295
null means default in associative array with keys and values unescaped Should be merged with default with a value of false meaning to suppress the attribute in associative array with keys and values unescaped noclasses just before the function returns a value If you return an< a > element with HTML attributes $attribs and contents $html will be returned If you return $ret will be returned and may include noclasses after processing & $attribs
Definition: hooks.txt:1972
this hook is for auditing only or null if authentication failed before getting that far or null if we can t even determine that When $user is not null
Definition: hooks.txt:767
static provideDeprecatedAttributes()
testAttributeWhitelistInternal( $element, $attribs)
provideAttributeWhitelist Sanitizer::attributeWhitelistInternal
testEscapeIdForStuff( $stuff, array $config, $id, $expected, $mode=null)
provideEscapeIdForStuff
injection txt This is an overview of how MediaWiki makes use of dependency injection The design described here grew from the discussion of RFC T384 The term dependency this means that anything an object needs to operate should be injected from the the object itself should only know narrow no concrete implementation of the logic it relies on The requirement to inject everything typically results in an architecture that based on two main types of and essentially stateless service objects that use other service objects to operate on the value objects As of the beginning MediaWiki is only starting to use the DI approach Much of the code still relies on global state or direct resulting in a highly cyclical dependency which acts as the top level factory for services in MediaWiki which can be used to gain access to default instances of various services MediaWikiServices however also allows new services to be defined and default services to be redefined Services are defined or redefined by providing a callback the instantiator that will return a new instance of the service When it will create an instance of MediaWikiServices and populate it with the services defined in the files listed by thereby bootstrapping the DI framework Per $wgServiceWiringFiles lists includes ServiceWiring php
Definition: injection.txt:35
you have access to all of the normal MediaWiki so you can get a DB use the etc For full docs on the Maintenance class
Definition: maintenance.txt:52
static escapeIdForLink( $id)
Given a section name or other user-generated or otherwise unsafe string, escapes it to be a valid URL...
Definition: Sanitizer.php:1322
static provideEscapeIdReferenceList()
testNoPrimaryFragmentModeThrows2()
UnexpectedValueException Sanitizer::escapeIdForLink()
static fixTagAttributes( $text, $element, $sorted=false)
Take a tag soup fragment listing an HTML element&#39;s attributes and normalize it to well-formed XML...
Definition: Sanitizer.php:1136
static removeHTMLtags( $text, $processCallback=null, $args=[], $extratags=[], $removetags=[], $warnCallback=null)
Cleans up HTML, removes dangerous tags and attributes, and removes HTML comments. ...
Definition: Sanitizer.php:497
testRemoveHTMLtags( $input, $output, $msg=null)
dataRemoveHTMLtags Sanitizer::removeHTMLtags
testAttributeWhitelist( $element, $attribs)
provideAttributeWhitelist Sanitizer::attributeWhitelist