Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
0.00% |
0 / 154 |
|
0.00% |
0 / 17 |
CRAP | |
0.00% |
0 / 1 |
Token | |
0.00% |
0 / 154 |
|
0.00% |
0 / 17 |
4422 | |
0.00% |
0 / 1 |
jsonSerialize | n/a |
0 / 0 |
n/a |
0 / 0 |
0 | |||||
getName | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
getType | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
2 | |||
addAttribute | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
addNormalizedAttribute | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
2 | |||
getAttributeV | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
getAttributeKV | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
hasAttribute | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
setAttribute | |
0.00% |
0 / 8 |
|
0.00% |
0 / 1 |
20 | |||
setShadowInfo | |
0.00% |
0 / 5 |
|
0.00% |
0 / 1 |
12 | |||
getAttributeShadowInfo | |
0.00% |
0 / 26 |
|
0.00% |
0 / 1 |
42 | |||
removeAttribute | |
0.00% |
0 / 4 |
|
0.00% |
0 / 1 |
12 | |||
addSpaceSeparatedAttribute | |
0.00% |
0 / 6 |
|
0.00% |
0 / 1 |
12 | |||
getWTSource | |
0.00% |
0 / 6 |
|
0.00% |
0 / 1 |
6 | |||
kvsFromArray | |
0.00% |
0 / 21 |
|
0.00% |
0 / 1 |
72 | |||
rebuildNestedTokens | |
0.00% |
0 / 3 |
|
0.00% |
0 / 1 |
6 | |||
getToken | |
0.00% |
0 / 56 |
|
0.00% |
0 / 1 |
600 | |||
fetchExpandedAttrValue | |
0.00% |
0 / 10 |
|
0.00% |
0 / 1 |
20 |
1 | <?php |
2 | declare( strict_types = 1 ); |
3 | |
4 | namespace Wikimedia\Parsoid\Tokens; |
5 | |
6 | use stdClass; |
7 | use Wikimedia\Assert\Assert; |
8 | use Wikimedia\Parsoid\Core\DomSourceRange; |
9 | use Wikimedia\Parsoid\NodeData\DataParsoid; |
10 | use Wikimedia\Parsoid\Utils\PHPUtils; |
11 | use Wikimedia\Parsoid\Wt2Html\Frame; |
12 | |
13 | /** |
14 | * Catch-all class for all token types. |
15 | */ |
16 | abstract class Token implements \JsonSerializable { |
17 | /** @var DataParsoid|null */ |
18 | public $dataParsoid; |
19 | |
20 | /** @var KV[] */ |
21 | public $attribs; |
22 | |
23 | /** |
24 | * @inheritDoc |
25 | */ |
26 | #[\ReturnTypeWillChange] |
27 | abstract public function jsonSerialize(); |
28 | |
29 | /** |
30 | * Get a name for the token. |
31 | * Derived classes can override this. |
32 | * @return string |
33 | */ |
34 | public function getName(): string { |
35 | return $this->getType(); |
36 | } |
37 | |
38 | /** |
39 | * Returns a string key for this token |
40 | * @return string |
41 | */ |
42 | public function getType(): string { |
43 | $classParts = explode( '\\', get_class( $this ) ); |
44 | return end( $classParts ); |
45 | } |
46 | |
47 | /** |
48 | * Generic set attribute method. |
49 | * |
50 | * @param string $name |
51 | * Always a string when used this way. |
52 | * The more complex form (where the key is a non-string) are found when |
53 | * KV objects are constructed in the tokenizer. |
54 | * @param string|Token|array<Token|string> $value |
55 | * @param ?KVSourceRange $srcOffsets |
56 | */ |
57 | public function addAttribute( |
58 | string $name, $value, ?KVSourceRange $srcOffsets = null |
59 | ): void { |
60 | $this->attribs[] = new KV( $name, $value, $srcOffsets ); |
61 | } |
62 | |
63 | /** |
64 | * Generic set attribute method with support for change detection. |
65 | * Set a value and preserve the original wikitext that produced it. |
66 | * |
67 | * @param string $name |
68 | * @param string|Token|Token[] $value |
69 | * @param mixed $origValue |
70 | */ |
71 | public function addNormalizedAttribute( string $name, $value, $origValue ): void { |
72 | $this->addAttribute( $name, $value ); |
73 | $this->setShadowInfo( $name, $value, $origValue ); |
74 | } |
75 | |
76 | /** |
77 | * Generic attribute accessor. |
78 | * |
79 | * @param string $name |
80 | * @return string|Token|Token[]|KV[]|null |
81 | */ |
82 | public function getAttributeV( string $name ) { |
83 | return KV::lookup( $this->attribs, $name ); |
84 | } |
85 | |
86 | /** |
87 | * Generic attribute accessor. |
88 | * |
89 | * @param string $name |
90 | * @return KV|null |
91 | */ |
92 | public function getAttributeKV( string $name ) { |
93 | return KV::lookupKV( $this->attribs, $name ); |
94 | } |
95 | |
96 | /** |
97 | * Generic attribute accessor. |
98 | * |
99 | * @param string $name |
100 | * @return bool |
101 | */ |
102 | public function hasAttribute( string $name ): bool { |
103 | return $this->getAttributeKV( $name ) !== null; |
104 | } |
105 | |
106 | /** |
107 | * Set an unshadowed attribute. |
108 | * |
109 | * @param string $name |
110 | * @param string|Token|Token[] $value |
111 | */ |
112 | public function setAttribute( string $name, $value ): void { |
113 | // First look for the attribute and change the last match if found. |
114 | for ( $i = count( $this->attribs ) - 1; $i >= 0; $i-- ) { |
115 | $kv = $this->attribs[$i]; |
116 | $k = $kv->k; |
117 | if ( is_string( $k ) && mb_strtolower( $k ) === $name ) { |
118 | $kv->v = $value; |
119 | $this->attribs[$i] = $kv; |
120 | return; |
121 | } |
122 | } |
123 | // Nothing found, just add the attribute |
124 | $this->addAttribute( $name, $value ); |
125 | } |
126 | |
127 | /** |
128 | * Store the original value of an attribute in a token's dataParsoid. |
129 | * |
130 | * @param string $name |
131 | * @param mixed $value |
132 | * @param mixed $origValue |
133 | */ |
134 | public function setShadowInfo( string $name, $value, $origValue ): void { |
135 | // Don't shadow if value is the same or the orig is null |
136 | if ( $value !== $origValue && $origValue !== null ) { |
137 | $this->dataParsoid->a ??= []; |
138 | $this->dataParsoid->a[$name] = $value; |
139 | $this->dataParsoid->sa ??= []; |
140 | $this->dataParsoid->sa[$name] = $origValue; |
141 | } |
142 | } |
143 | |
144 | /** |
145 | * Attribute info accessor for the wikitext serializer. Performs change |
146 | * detection and uses unnormalized attribute values if set. Expects the |
147 | * context to be set to a token. |
148 | * |
149 | * @param string $name |
150 | * @return array Information about the shadow info attached to this attribute: |
151 | * - value: (Token|Token[]|string) |
152 | * When modified is false and fromsrc is true, this is always a string. |
153 | * - modified: (bool) |
154 | * - fromsrc: (bool) |
155 | */ |
156 | public function getAttributeShadowInfo( string $name ): array { |
157 | $curVal = $this->getAttributeV( $name ); |
158 | |
159 | // Not the case, continue regular round-trip information. |
160 | if ( !property_exists( $this->dataParsoid, 'a' ) || |
161 | !array_key_exists( $name, $this->dataParsoid->a ) |
162 | ) { |
163 | return [ |
164 | "value" => $curVal, |
165 | // Mark as modified if a new element |
166 | "modified" => $this->dataParsoid->isModified(), |
167 | "fromsrc" => false |
168 | ]; |
169 | } elseif ( $this->dataParsoid->a[$name] !== $curVal ) { |
170 | return [ |
171 | "value" => $curVal, |
172 | "modified" => true, |
173 | "fromsrc" => false |
174 | ]; |
175 | } elseif ( !property_exists( $this->dataParsoid, 'sa' ) || |
176 | !array_key_exists( $name, $this->dataParsoid->sa ) |
177 | ) { |
178 | return [ |
179 | "value" => $curVal, |
180 | "modified" => false, |
181 | "fromsrc" => false |
182 | ]; |
183 | } else { |
184 | return [ |
185 | "value" => $this->dataParsoid->sa[$name], |
186 | "modified" => false, |
187 | "fromsrc" => true |
188 | ]; |
189 | } |
190 | } |
191 | |
192 | /** |
193 | * Completely remove all attributes with this name. |
194 | * |
195 | * @param string $name |
196 | */ |
197 | public function removeAttribute( string $name ): void { |
198 | foreach ( $this->attribs as $i => $kv ) { |
199 | if ( mb_strtolower( $kv->k ) === $name ) { |
200 | unset( $this->attribs[$i] ); |
201 | } |
202 | } |
203 | $this->attribs = array_values( $this->attribs ); |
204 | } |
205 | |
206 | /** |
207 | * Add a space-separated property value. |
208 | * These are Parsoid-added attributes, not something present in source. |
209 | * So, only a regular ASCII space characters will be used here. |
210 | * |
211 | * @param string $name The attribute name |
212 | * @param string $value The value to add to the attribute |
213 | */ |
214 | public function addSpaceSeparatedAttribute( string $name, string $value ): void { |
215 | $curVal = $this->getAttributeKV( $name ); |
216 | if ( $curVal !== null ) { |
217 | if ( in_array( $value, explode( ' ', $curVal->v ), true ) ) { |
218 | // value is already included, nothing to do. |
219 | return; |
220 | } |
221 | |
222 | // Value was not yet included in the existing attribute, just add |
223 | // it separated with a space |
224 | $this->setAttribute( $curVal->k, $curVal->v . ' ' . $value ); |
225 | } else { |
226 | // the attribute did not exist at all, just add it |
227 | $this->addAttribute( $name, $value ); |
228 | } |
229 | } |
230 | |
231 | /** |
232 | * Get the wikitext source of a token. |
233 | * |
234 | * @param Frame $frame |
235 | * @return string |
236 | */ |
237 | public function getWTSource( Frame $frame ): string { |
238 | $tsr = $this->dataParsoid->tsr ?? null; |
239 | if ( !( $tsr instanceof SourceRange ) ) { |
240 | throw new InvalidTokenException( 'Expected token to have tsr info.' ); |
241 | } |
242 | $srcText = $frame->getSrcText(); |
243 | Assert::invariant( $tsr->end >= $tsr->start, 'Bad TSR' ); |
244 | return $tsr->substr( $srcText ); |
245 | } |
246 | |
247 | /** |
248 | * Create key value set from an array |
249 | * |
250 | * @param array $a |
251 | * @return array |
252 | */ |
253 | private static function kvsFromArray( array $a ): array { |
254 | $kvs = []; |
255 | foreach ( $a as $e ) { |
256 | if ( is_array( $e["k"] ?? null ) ) { |
257 | self::rebuildNestedTokens( $e["k"] ); |
258 | } |
259 | $v = $e['v'] ?? null; |
260 | if ( is_array( $v ) ) { |
261 | // $v is either an array of Tokens or an array of KVs |
262 | if ( count( $v ) > 0 ) { |
263 | if ( is_array( $v[0] ) && array_key_exists( 'k', $v[0] ) ) { |
264 | $v = self::kvsFromArray( $v ); |
265 | } else { |
266 | self::rebuildNestedTokens( $v ); |
267 | } |
268 | } |
269 | } |
270 | $so = $e["srcOffsets"] ?? null; |
271 | if ( $so ) { |
272 | $so = KVSourceRange::fromArray( $so ); |
273 | } |
274 | $kvs[] = new KV( |
275 | $e["k"] ?? null, |
276 | $v, |
277 | $so, |
278 | $e["ksrc"] ?? null, |
279 | $e["vsrc"] ?? null |
280 | ); |
281 | } |
282 | return $kvs; |
283 | } |
284 | |
285 | /** |
286 | * @param iterable|stdClass|DataParsoid &$a |
287 | */ |
288 | private static function rebuildNestedTokens( &$a ): void { |
289 | // objects do not count as iterables in PHP but can be iterated nevertheless |
290 | // @phan-suppress-next-line PhanTypeSuspiciousNonTraversableForeach |
291 | foreach ( $a as &$v ) { |
292 | $v = self::getToken( $v ); |
293 | } |
294 | unset( $v ); // Future-proof protection |
295 | } |
296 | |
297 | /** |
298 | * Get a token from some PHP structure. Used by the PHPUnit tests. |
299 | * |
300 | * @param KV|Token|array|string|int|float|bool|null $input |
301 | * @return Token|string|int|float|bool|null|array<Token|string|int|float|bool|null> |
302 | */ |
303 | public static function getToken( $input ) { |
304 | if ( !$input ) { |
305 | return $input; |
306 | } |
307 | |
308 | if ( is_array( $input ) && isset( $input['type'] ) ) { |
309 | if ( isset( $input['dataParsoid'] ) ) { |
310 | $da = new DataParsoid; |
311 | foreach ( $input['dataParsoid'] as $key => $value ) { |
312 | switch ( $key ) { |
313 | case 'tmp': |
314 | $tmp = $da->getTemp(); |
315 | foreach ( $value as $key2 => $value2 ) { |
316 | $tmp->$key2 = $value2; |
317 | } |
318 | break; |
319 | case 'dsr': |
320 | // dsr is generally for DOM trees, not Tokens. |
321 | $da->dsr = DomSourceRange::fromArray( $value ); |
322 | break; |
323 | case 'tsr': |
324 | $da->tsr = SourceRange::fromArray( $value ); |
325 | break; |
326 | case 'extTagOffsets': |
327 | $da->extTagOffsets = DomSourceRange::fromArray( $value ); |
328 | break; |
329 | case 'extLinkContentOffsets': |
330 | $da->extLinkContentOffsets = |
331 | SourceRange::fromArray( $value ); |
332 | break; |
333 | default: |
334 | $da->$key = $value; |
335 | } |
336 | } |
337 | } else { |
338 | $da = null; |
339 | } |
340 | switch ( $input['type'] ) { |
341 | case "SelfclosingTagTk": |
342 | $token = new SelfclosingTagTk( $input['name'], self::kvsFromArray( $input['attribs'] ), $da ); |
343 | break; |
344 | case "TagTk": |
345 | $token = new TagTk( $input['name'], self::kvsFromArray( $input['attribs'] ), $da ); |
346 | break; |
347 | case "EndTagTk": |
348 | $token = new EndTagTk( $input['name'], self::kvsFromArray( $input['attribs'] ), $da ); |
349 | break; |
350 | case "NlTk": |
351 | $token = new NlTk( $da->tsr ?? null, $da ); |
352 | break; |
353 | case "EOFTk": |
354 | $token = new EOFTk(); |
355 | break; |
356 | case "CommentTk": |
357 | $token = new CommentTk( $input["value"], $da ); |
358 | break; |
359 | default: |
360 | // Looks like data-parsoid can have a 'type' property in some cases |
361 | // We can change that usage and then throw an exception here |
362 | $token = &$input; |
363 | } |
364 | } elseif ( is_array( $input ) ) { |
365 | $token = &$input; |
366 | } else { |
367 | $token = $input; |
368 | } |
369 | |
370 | if ( is_array( $token ) ) { |
371 | self::rebuildNestedTokens( $token ); |
372 | } else { |
373 | if ( !empty( $token->attribs ) ) { |
374 | self::rebuildNestedTokens( $token->attribs ); |
375 | } |
376 | if ( !empty( $token->dataParsoid ) ) { |
377 | self::rebuildNestedTokens( $token->dataParsoid ); |
378 | } |
379 | } |
380 | |
381 | return $token; |
382 | } |
383 | |
384 | public function fetchExpandedAttrValue( string $key ): ?string { |
385 | if ( preg_match( |
386 | '/mw:ExpandedAttrs/', $this->getAttributeV( 'typeof' ) ?? '' |
387 | ) ) { |
388 | $attribs = PHPUtils::jsonDecode( |
389 | $this->getAttributeV( 'data-mw' ), false |
390 | )->attribs; |
391 | foreach ( $attribs as $attr ) { |
392 | if ( $attr[0]->txt === $key ) { |
393 | return $attr[1]->html; |
394 | } |
395 | } |
396 | } |
397 | return null; |
398 | } |
399 | |
400 | } |