Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
100.00% |
15 / 15 |
|
100.00% |
6 / 6 |
CRAP | |
100.00% |
1 / 1 |
RemexStripTagHandler | |
100.00% |
15 / 15 |
|
100.00% |
6 / 6 |
11 | |
100.00% |
1 / 1 |
getResult | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
characters | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
2 | |||
startTag | |
100.00% |
4 / 4 |
|
100.00% |
1 / 1 |
3 | |||
endTag | |
100.00% |
4 / 4 |
|
100.00% |
1 / 1 |
3 | |||
isBlockLevelTag | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
isNonVisibleTag | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 |
1 | <?php |
2 | |
3 | namespace MediaWiki\Parser; |
4 | |
5 | use Wikimedia\RemexHtml\Tokenizer\Attributes; |
6 | use Wikimedia\RemexHtml\Tokenizer\NullTokenHandler; |
7 | |
8 | /** |
9 | * Helper class for Sanitizer::stripAllTags(). |
10 | * @internal |
11 | */ |
12 | class RemexStripTagHandler extends NullTokenHandler { |
13 | /** @var bool */ |
14 | private $insideNonVisibleTag = false; |
15 | /** @var string */ |
16 | private $text = ''; |
17 | |
18 | public function getResult() { |
19 | return $this->text; |
20 | } |
21 | |
22 | public function characters( $text, $start, $length, $sourceStart, $sourceLength ) { |
23 | if ( !$this->insideNonVisibleTag ) { |
24 | $this->text .= substr( $text, $start, $length ); |
25 | } |
26 | } |
27 | |
28 | public function startTag( $name, Attributes $attrs, $selfClose, $sourceStart, $sourceLength ) { |
29 | if ( $this->isNonVisibleTag( $name ) ) { |
30 | $this->insideNonVisibleTag = true; |
31 | } |
32 | // Inject whitespace for typical block-level tags to |
33 | // prevent merging unrelated<br>words. |
34 | if ( $this->isBlockLevelTag( $name ) ) { |
35 | $this->text .= ' '; |
36 | } |
37 | } |
38 | |
39 | public function endTag( $name, $sourceStart, $sourceLength ) { |
40 | if ( $this->isNonVisibleTag( $name ) ) { |
41 | $this->insideNonVisibleTag = false; |
42 | } |
43 | // Inject whitespace for typical block-level tags to |
44 | // prevent merging unrelated<br>words. |
45 | if ( $this->isBlockLevelTag( $name ) ) { |
46 | $this->text .= ' '; |
47 | } |
48 | } |
49 | |
50 | // Per https://developer.mozilla.org/en-US/docs/Web/HTML/Block-level_elements |
51 | // retrieved on sept 12, 2018. <br> is not block level but was added anyways. |
52 | // The following is a complete list of all HTML block level elements |
53 | // (although "block-level" is not technically defined for elements that are |
54 | // new in HTML5). |
55 | // Structured as tag => true to allow O(1) membership test. |
56 | private const BLOCK_LEVEL_TAGS = [ |
57 | 'address' => true, |
58 | 'article' => true, |
59 | 'aside' => true, |
60 | 'blockquote' => true, |
61 | 'br' => true, |
62 | 'canvas' => true, |
63 | 'dd' => true, |
64 | 'div' => true, |
65 | 'dl' => true, |
66 | 'dt' => true, |
67 | 'fieldset' => true, |
68 | 'figcaption' => true, |
69 | 'figure' => true, |
70 | 'footer' => true, |
71 | 'form' => true, |
72 | 'h1' => true, |
73 | 'h2' => true, |
74 | 'h3' => true, |
75 | 'h4' => true, |
76 | 'h5' => true, |
77 | 'h6' => true, |
78 | 'header' => true, |
79 | 'hgroup' => true, |
80 | 'hr' => true, |
81 | 'li' => true, |
82 | 'main' => true, |
83 | 'nav' => true, |
84 | 'noscript' => true, |
85 | 'ol' => true, |
86 | 'output' => true, |
87 | 'p' => true, |
88 | 'pre' => true, |
89 | 'section' => true, |
90 | 'table' => true, |
91 | 'td' => true, |
92 | 'tfoot' => true, |
93 | 'th' => true, |
94 | 'tr' => true, |
95 | 'ul' => true, |
96 | 'video' => true, |
97 | ]; |
98 | |
99 | /** |
100 | * Detect block level tags. Of course css can make anything a block |
101 | * level tag, but this is still better than nothing. |
102 | * |
103 | * @param string $tagName HTML tag name |
104 | * @return bool True when tag is an html block level element |
105 | */ |
106 | private function isBlockLevelTag( $tagName ) { |
107 | $key = strtolower( trim( $tagName ) ); |
108 | return isset( self::BLOCK_LEVEL_TAGS[$key] ); |
109 | } |
110 | |
111 | private const NON_VISIBLE_TAGS = [ |
112 | 'style' => true, |
113 | 'script' => true, |
114 | ]; |
115 | |
116 | /** |
117 | * Detect block tags which by default are non-visible items. |
118 | * Of course css can make anything non-visible, |
119 | * but this is still better than nothing. |
120 | * |
121 | * We use this primarily to hide TemplateStyles |
122 | * from output in notifications/emails etc. |
123 | * |
124 | * @param string $tagName HTML tag name |
125 | * @return bool True when tag is a html element which should be filtered out |
126 | */ |
127 | private function isNonVisibleTag( $tagName ) { |
128 | $key = strtolower( trim( $tagName ) ); |
129 | return isset( self::NON_VISIBLE_TAGS[$key] ); |
130 | } |
131 | |
132 | } |