Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
100.00% |
15 / 15 |
|
100.00% |
6 / 6 |
CRAP | |
100.00% |
1 / 1 |
RemexStripTagHandler | |
100.00% |
15 / 15 |
|
100.00% |
6 / 6 |
11 | |
100.00% |
1 / 1 |
getResult | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
characters | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
2 | |||
startTag | |
100.00% |
4 / 4 |
|
100.00% |
1 / 1 |
3 | |||
endTag | |
100.00% |
4 / 4 |
|
100.00% |
1 / 1 |
3 | |||
isBlockLevelTag | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
isNonVisibleTag | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 |
1 | <?php |
2 | |
3 | namespace MediaWiki\Parser; |
4 | |
5 | use Wikimedia\RemexHtml\Tokenizer\Attributes; |
6 | use Wikimedia\RemexHtml\Tokenizer\NullTokenHandler; |
7 | |
8 | /** |
9 | * Helper class for Sanitizer::stripAllTags(). |
10 | * @internal |
11 | */ |
12 | class RemexStripTagHandler extends NullTokenHandler { |
13 | private $insideNonVisibleTag = false; |
14 | private $text = ''; |
15 | |
16 | public function getResult() { |
17 | return $this->text; |
18 | } |
19 | |
20 | public function characters( $text, $start, $length, $sourceStart, $sourceLength ) { |
21 | if ( !$this->insideNonVisibleTag ) { |
22 | $this->text .= substr( $text, $start, $length ); |
23 | } |
24 | } |
25 | |
26 | public function startTag( $name, Attributes $attrs, $selfClose, $sourceStart, $sourceLength ) { |
27 | if ( $this->isNonVisibleTag( $name ) ) { |
28 | $this->insideNonVisibleTag = true; |
29 | } |
30 | // Inject whitespace for typical block-level tags to |
31 | // prevent merging unrelated<br>words. |
32 | if ( $this->isBlockLevelTag( $name ) ) { |
33 | $this->text .= ' '; |
34 | } |
35 | } |
36 | |
37 | public function endTag( $name, $sourceStart, $sourceLength ) { |
38 | if ( $this->isNonVisibleTag( $name ) ) { |
39 | $this->insideNonVisibleTag = false; |
40 | } |
41 | // Inject whitespace for typical block-level tags to |
42 | // prevent merging unrelated<br>words. |
43 | if ( $this->isBlockLevelTag( $name ) ) { |
44 | $this->text .= ' '; |
45 | } |
46 | } |
47 | |
48 | // Per https://developer.mozilla.org/en-US/docs/Web/HTML/Block-level_elements |
49 | // retrieved on sept 12, 2018. <br> is not block level but was added anyways. |
50 | // The following is a complete list of all HTML block level elements |
51 | // (although "block-level" is not technically defined for elements that are |
52 | // new in HTML5). |
53 | // Structured as tag => true to allow O(1) membership test. |
54 | private const BLOCK_LEVEL_TAGS = [ |
55 | 'address' => true, |
56 | 'article' => true, |
57 | 'aside' => true, |
58 | 'blockquote' => true, |
59 | 'br' => true, |
60 | 'canvas' => true, |
61 | 'dd' => true, |
62 | 'div' => true, |
63 | 'dl' => true, |
64 | 'dt' => true, |
65 | 'fieldset' => true, |
66 | 'figcaption' => true, |
67 | 'figure' => true, |
68 | 'footer' => true, |
69 | 'form' => true, |
70 | 'h1' => true, |
71 | 'h2' => true, |
72 | 'h3' => true, |
73 | 'h4' => true, |
74 | 'h5' => true, |
75 | 'h6' => true, |
76 | 'header' => true, |
77 | 'hgroup' => true, |
78 | 'hr' => true, |
79 | 'li' => true, |
80 | 'main' => true, |
81 | 'nav' => true, |
82 | 'noscript' => true, |
83 | 'ol' => true, |
84 | 'output' => true, |
85 | 'p' => true, |
86 | 'pre' => true, |
87 | 'section' => true, |
88 | 'table' => true, |
89 | 'td' => true, |
90 | 'tfoot' => true, |
91 | 'th' => true, |
92 | 'tr' => true, |
93 | 'ul' => true, |
94 | 'video' => true, |
95 | ]; |
96 | |
97 | /** |
98 | * Detect block level tags. Of course css can make anything a block |
99 | * level tag, but this is still better than nothing. |
100 | * |
101 | * @param string $tagName HTML tag name |
102 | * @return bool True when tag is an html block level element |
103 | */ |
104 | private function isBlockLevelTag( $tagName ) { |
105 | $key = strtolower( trim( $tagName ) ); |
106 | return isset( self::BLOCK_LEVEL_TAGS[$key] ); |
107 | } |
108 | |
109 | private const NON_VISIBLE_TAGS = [ |
110 | 'style' => true, |
111 | 'script' => true, |
112 | ]; |
113 | |
114 | /** |
115 | * Detect block tags which by default are non-visible items. |
116 | * Of course css can make anything non-visible, |
117 | * but this is still better than nothing. |
118 | * |
119 | * We use this primarily to hide TemplateStyles |
120 | * from output in notifications/emails etc. |
121 | * |
122 | * @param string $tagName HTML tag name |
123 | * @return bool True when tag is a html element which should be filtered out |
124 | */ |
125 | private function isNonVisibleTag( $tagName ) { |
126 | $key = strtolower( trim( $tagName ) ); |
127 | return isset( self::NON_VISIBLE_TAGS[$key] ); |
128 | } |
129 | |
130 | } |