Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
0.00% |
0 / 39 |
|
0.00% |
0 / 5 |
CRAP | |
0.00% |
0 / 1 |
RemexRemoveTagHandler | |
0.00% |
0 / 39 |
|
0.00% |
0 / 5 |
506 | |
0.00% |
0 / 1 |
__construct | |
0.00% |
0 / 7 |
|
0.00% |
0 / 1 |
2 | |||
comment | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
validateTag | |
0.00% |
0 / 9 |
|
0.00% |
0 / 1 |
72 | |||
startTag | |
0.00% |
0 / 18 |
|
0.00% |
0 / 1 |
110 | |||
endTag | |
0.00% |
0 / 4 |
|
0.00% |
0 / 1 |
6 |
1 | <?php |
2 | |
3 | namespace MediaWiki\Parser; |
4 | |
5 | use Wikimedia\RemexHtml\Tokenizer\Attributes; |
6 | use Wikimedia\RemexHtml\Tokenizer\PlainAttributes; |
7 | use Wikimedia\RemexHtml\Tokenizer\RelayTokenHandler; |
8 | use Wikimedia\RemexHtml\Tokenizer\TokenHandler; |
9 | |
10 | /** |
11 | * Helper class for Sanitizer::removeSomeTags(). |
12 | * @internal |
13 | */ |
14 | class RemexRemoveTagHandler extends RelayTokenHandler { |
15 | /** |
16 | * @var string The original HTML source string (used for fallback text |
17 | * when rejecting an HTML tag). |
18 | */ |
19 | private $source; |
20 | |
21 | /** |
22 | * @var array<string,true> Set of HTML tags which can be self-closed. |
23 | */ |
24 | private $htmlsingle; |
25 | |
26 | /** |
27 | * @var array<string,true> Self-closed tags which are on $htmlsingle |
28 | * but not on $htmlsingleonly will be emitted as an empty element. |
29 | */ |
30 | private $htmlsingleonly; |
31 | |
32 | /** |
33 | * @var array<string,true> Set of allowed HTML open/close tags. |
34 | */ |
35 | private $htmlelements; |
36 | |
37 | /** |
38 | * @var ?callable(Attributes,mixed...):Attributes Callback to mutate or |
39 | * sanitize attributes. |
40 | */ |
41 | private $attrCallback; |
42 | |
43 | /** |
44 | * @var ?array $args Optional extra arguments to provide to the |
45 | * $attrCallback. |
46 | */ |
47 | private $callbackArgs; |
48 | |
49 | /** |
50 | * @param TokenHandler $nextHandler Handler to relay accepted tokens. |
51 | * @param string $source Input source string. |
52 | * @param array $tagData Information about allowed/rejected tags. |
53 | * @param ?callable $attrCallback Attribute handler callback. |
54 | * The full signature is ?callable(Attributes,mixed...):Attributes |
55 | * @param ?array $callbackArgs Optional arguments to attribute handler. |
56 | */ |
57 | public function __construct( |
58 | TokenHandler $nextHandler, |
59 | string $source, |
60 | array $tagData, |
61 | ?callable $attrCallback, |
62 | ?array $callbackArgs |
63 | ) { |
64 | parent::__construct( $nextHandler ); |
65 | $this->source = $source; |
66 | $this->htmlsingle = $tagData['htmlsingle']; |
67 | $this->htmlsingleonly = $tagData['htmlsingleonly']; |
68 | $this->htmlelements = $tagData['htmlelements']; |
69 | $this->attrCallback = $attrCallback; |
70 | $this->callbackArgs = $callbackArgs ?? []; |
71 | } |
72 | |
73 | /** |
74 | * @inheritDoc |
75 | */ |
76 | public function comment( $text, $sourceStart, $sourceLength ) { |
77 | // Don't relay comments. |
78 | } |
79 | |
80 | /** |
81 | * Takes attribute names and values for a tag and the tag name and |
82 | * validates that the tag is allowed to be present. |
83 | * This DOES NOT validate the attributes, nor does it validate the |
84 | * tags themselves. This method only handles the special circumstances |
85 | * where we may want to allow a tag within content but ONLY when it has |
86 | * specific attributes set. |
87 | * |
88 | * @param string $element |
89 | * @param Attributes $attrs |
90 | * @return bool |
91 | * |
92 | * @see Sanitizer::validateTag() |
93 | */ |
94 | private static function validateTag( string $element, Attributes $attrs ): bool { |
95 | if ( $element == 'meta' || $element == 'link' ) { |
96 | $params = $attrs->getValues(); |
97 | if ( !isset( $params['itemprop'] ) ) { |
98 | // <meta> and <link> must have an itemprop="" otherwise they are not valid or safe in content |
99 | return false; |
100 | } |
101 | if ( $element == 'meta' && !isset( $params['content'] ) ) { |
102 | // <meta> must have a content="" for the itemprop |
103 | return false; |
104 | } |
105 | if ( $element == 'link' && !isset( $params['href'] ) ) { |
106 | // <link> must have an associated href="" |
107 | return false; |
108 | } |
109 | } |
110 | |
111 | return true; |
112 | } |
113 | |
114 | /** |
115 | * @inheritDoc |
116 | */ |
117 | public function startTag( $name, Attributes $attrs, $selfClose, $sourceStart, $sourceLength ) { |
118 | // Handle a start tag from the tokenizer: either relay it to the |
119 | // next stage, or re-emit it as raw text. |
120 | |
121 | $badtag = false; |
122 | $t = strtolower( $name ); |
123 | if ( isset( $this->htmlelements[$t] ) ) { |
124 | if ( $this->attrCallback ) { |
125 | $attrs = ( $this->attrCallback )( $attrs, ...$this->callbackArgs ); |
126 | } |
127 | if ( $selfClose && !( isset( $this->htmlsingle[$t] ) || isset( $this->htmlsingleonly[$t] ) ) ) { |
128 | // Remove the self-closing slash, to be consistent with |
129 | // HTML5 semantics. T134423 |
130 | $selfClose = false; |
131 | } |
132 | if ( !self::validateTag( $t, $attrs ) ) { |
133 | $badtag = true; |
134 | } |
135 | $fixedAttrs = Sanitizer::validateTagAttributes( $attrs->getValues(), $t ); |
136 | $attrs = new PlainAttributes( $fixedAttrs ); |
137 | if ( !$badtag ) { |
138 | if ( $selfClose && !isset( $this->htmlsingleonly[$t] ) ) { |
139 | // Interpret self-closing tags as empty tags even when |
140 | // HTML5 would interpret them as start tags. Such input |
141 | // is commonly seen on Wikimedia wikis with this intention. |
142 | $this->nextHandler->startTag( $name, $attrs, false, $sourceStart, $sourceLength ); |
143 | $this->nextHandler->endTag( $name, $sourceStart + $sourceLength, 0 ); |
144 | } else { |
145 | $this->nextHandler->startTag( $name, $attrs, $selfClose, $sourceStart, $sourceLength ); |
146 | } |
147 | return; |
148 | } |
149 | } |
150 | // Emit this as a text node instead. |
151 | $this->nextHandler->characters( $this->source, $sourceStart, $sourceLength, $sourceStart, $sourceLength ); |
152 | } |
153 | |
154 | /** |
155 | * @inheritDoc |
156 | */ |
157 | public function endTag( $name, $sourceStart, $sourceLength ) { |
158 | // Handle an end tag from the tokenizer: either relay it to the |
159 | // next stage, or re-emit it as raw text. |
160 | |
161 | $t = strtolower( $name ); |
162 | if ( isset( $this->htmlelements[$t] ) ) { |
163 | // This is a good tag, relay it. |
164 | $this->nextHandler->endTag( $name, $sourceStart, $sourceLength ); |
165 | } else { |
166 | // Emit this as a text node instead. |
167 | $this->nextHandler->characters( $this->source, $sourceStart, $sourceLength, $sourceStart, $sourceLength ); |
168 | } |
169 | } |
170 | |
171 | } |