Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
0.00% |
0 / 146 |
|
0.00% |
0 / 18 |
CRAP | |
0.00% |
0 / 1 |
Token | |
0.00% |
0 / 146 |
|
0.00% |
0 / 18 |
3782 | |
0.00% |
0 / 1 |
__construct | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
2 | |||
jsonSerialize | n/a |
0 / 0 |
n/a |
0 / 0 |
0 | |||||
getName | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
getType | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
2 | |||
addAttribute | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
addNormalizedAttribute | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
2 | |||
getAttributeV | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
getAttributeKV | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
hasAttribute | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
setAttribute | |
0.00% |
0 / 8 |
|
0.00% |
0 / 1 |
20 | |||
setShadowInfo | |
0.00% |
0 / 5 |
|
0.00% |
0 / 1 |
12 | |||
getAttributeShadowInfo | |
0.00% |
0 / 26 |
|
0.00% |
0 / 1 |
42 | |||
removeAttribute | |
0.00% |
0 / 4 |
|
0.00% |
0 / 1 |
12 | |||
addSpaceSeparatedAttribute | |
0.00% |
0 / 6 |
|
0.00% |
0 / 1 |
12 | |||
getWTSource | |
0.00% |
0 / 6 |
|
0.00% |
0 / 1 |
6 | |||
kvsFromArray | |
0.00% |
0 / 21 |
|
0.00% |
0 / 1 |
72 | |||
rebuildNestedTokens | |
0.00% |
0 / 3 |
|
0.00% |
0 / 1 |
6 | |||
getToken | |
0.00% |
0 / 46 |
|
0.00% |
0 / 1 |
306 | |||
fetchExpandedAttrValue | |
0.00% |
0 / 10 |
|
0.00% |
0 / 1 |
30 |
1 | <?php |
2 | declare( strict_types = 1 ); |
3 | |
4 | namespace Wikimedia\Parsoid\Tokens; |
5 | |
6 | use stdClass; |
7 | use Wikimedia\Assert\Assert; |
8 | use Wikimedia\JsonCodec\JsonCodec; |
9 | use Wikimedia\Parsoid\NodeData\DataMw; |
10 | use Wikimedia\Parsoid\NodeData\DataParsoid; |
11 | use Wikimedia\Parsoid\Utils\DOMDataUtils; |
12 | use Wikimedia\Parsoid\Wt2Html\Frame; |
13 | |
14 | /** |
15 | * Catch-all class for all token types. |
16 | */ |
17 | abstract class Token implements \JsonSerializable { |
18 | public DataParsoid $dataParsoid; |
19 | public ?DataMw $dataMw = null; |
20 | |
21 | /** @var KV[] */ |
22 | public $attribs; |
23 | |
24 | protected function __construct( |
25 | ?DataParsoid $dataParsoid, ?DataMw $dataMw |
26 | ) { |
27 | $this->dataParsoid = $dataParsoid ?? new DataParsoid; |
28 | $this->dataMw = $dataMw; |
29 | } |
30 | |
31 | /** |
32 | * @inheritDoc |
33 | */ |
34 | #[\ReturnTypeWillChange] |
35 | abstract public function jsonSerialize(); |
36 | |
37 | /** |
38 | * Get a name for the token. |
39 | * Derived classes can override this. |
40 | * @return string |
41 | */ |
42 | public function getName(): string { |
43 | return $this->getType(); |
44 | } |
45 | |
46 | /** |
47 | * Returns a string key for this token |
48 | * @return string |
49 | */ |
50 | public function getType(): string { |
51 | $classParts = explode( '\\', get_class( $this ) ); |
52 | return end( $classParts ); |
53 | } |
54 | |
55 | /** |
56 | * Generic set attribute method. |
57 | * |
58 | * @param string $name |
59 | * Always a string when used this way. |
60 | * The more complex form (where the key is a non-string) are found when |
61 | * KV objects are constructed in the tokenizer. |
62 | * @param string|Token|array<Token|string> $value |
63 | * @param ?KVSourceRange $srcOffsets |
64 | */ |
65 | public function addAttribute( |
66 | string $name, $value, ?KVSourceRange $srcOffsets = null |
67 | ): void { |
68 | $this->attribs[] = new KV( $name, $value, $srcOffsets ); |
69 | } |
70 | |
71 | /** |
72 | * Generic set attribute method with support for change detection. |
73 | * Set a value and preserve the original wikitext that produced it. |
74 | * |
75 | * @param string $name |
76 | * @param string|Token|array<Token|string> $value |
77 | * @param mixed $origValue |
78 | */ |
79 | public function addNormalizedAttribute( string $name, $value, $origValue ): void { |
80 | $this->addAttribute( $name, $value ); |
81 | $this->setShadowInfo( $name, $value, $origValue ); |
82 | } |
83 | |
84 | /** |
85 | * Generic attribute accessor. |
86 | * |
87 | * @param string $name |
88 | * @return string|Token|array<Token|string>|KV[]|null |
89 | */ |
90 | public function getAttributeV( string $name ) { |
91 | return KV::lookup( $this->attribs, $name ); |
92 | } |
93 | |
94 | /** |
95 | * Generic attribute accessor. |
96 | * |
97 | * @param string $name |
98 | * @return KV|null |
99 | */ |
100 | public function getAttributeKV( string $name ) { |
101 | return KV::lookupKV( $this->attribs, $name ); |
102 | } |
103 | |
104 | /** |
105 | * Generic attribute accessor. |
106 | * |
107 | * @param string $name |
108 | * @return bool |
109 | */ |
110 | public function hasAttribute( string $name ): bool { |
111 | return $this->getAttributeKV( $name ) !== null; |
112 | } |
113 | |
114 | /** |
115 | * Set an unshadowed attribute. |
116 | * |
117 | * @param string $name |
118 | * @param string|Token|array<Token|string> $value |
119 | */ |
120 | public function setAttribute( string $name, $value ): void { |
121 | // First look for the attribute and change the last match if found. |
122 | for ( $i = count( $this->attribs ) - 1; $i >= 0; $i-- ) { |
123 | $kv = $this->attribs[$i]; |
124 | $k = $kv->k; |
125 | if ( is_string( $k ) && mb_strtolower( $k ) === $name ) { |
126 | $kv->v = $value; |
127 | $this->attribs[$i] = $kv; |
128 | return; |
129 | } |
130 | } |
131 | // Nothing found, just add the attribute |
132 | $this->addAttribute( $name, $value ); |
133 | } |
134 | |
135 | /** |
136 | * Store the original value of an attribute in a token's dataParsoid. |
137 | * |
138 | * @param string $name |
139 | * @param mixed $value |
140 | * @param mixed $origValue |
141 | */ |
142 | public function setShadowInfo( string $name, $value, $origValue ): void { |
143 | // Don't shadow if value is the same or the orig is null |
144 | if ( $value !== $origValue && $origValue !== null ) { |
145 | $this->dataParsoid->a ??= []; |
146 | $this->dataParsoid->a[$name] = $value; |
147 | $this->dataParsoid->sa ??= []; |
148 | $this->dataParsoid->sa[$name] = $origValue; |
149 | } |
150 | } |
151 | |
152 | /** |
153 | * Attribute info accessor for the wikitext serializer. Performs change |
154 | * detection and uses unnormalized attribute values if set. Expects the |
155 | * context to be set to a token. |
156 | * |
157 | * @param string $name |
158 | * @return array Information about the shadow info attached to this attribute: |
159 | * - value: (string|Token|array<Token|string>) |
160 | * When modified is false and fromsrc is true, this is always a string. |
161 | * - modified: (bool) |
162 | * - fromsrc: (bool) |
163 | */ |
164 | public function getAttributeShadowInfo( string $name ): array { |
165 | $curVal = $this->getAttributeV( $name ); |
166 | |
167 | // Not the case, continue regular round-trip information. |
168 | if ( !property_exists( $this->dataParsoid, 'a' ) || |
169 | !array_key_exists( $name, $this->dataParsoid->a ) |
170 | ) { |
171 | return [ |
172 | "value" => $curVal, |
173 | // Mark as modified if a new element |
174 | "modified" => $this->dataParsoid->isModified(), |
175 | "fromsrc" => false |
176 | ]; |
177 | } elseif ( $this->dataParsoid->a[$name] !== $curVal ) { |
178 | return [ |
179 | "value" => $curVal, |
180 | "modified" => true, |
181 | "fromsrc" => false |
182 | ]; |
183 | } elseif ( !property_exists( $this->dataParsoid, 'sa' ) || |
184 | !array_key_exists( $name, $this->dataParsoid->sa ) |
185 | ) { |
186 | return [ |
187 | "value" => $curVal, |
188 | "modified" => false, |
189 | "fromsrc" => false |
190 | ]; |
191 | } else { |
192 | return [ |
193 | "value" => $this->dataParsoid->sa[$name], |
194 | "modified" => false, |
195 | "fromsrc" => true |
196 | ]; |
197 | } |
198 | } |
199 | |
200 | /** |
201 | * Completely remove all attributes with this name. |
202 | * |
203 | * @param string $name |
204 | */ |
205 | public function removeAttribute( string $name ): void { |
206 | foreach ( $this->attribs as $i => $kv ) { |
207 | if ( mb_strtolower( $kv->k ) === $name ) { |
208 | unset( $this->attribs[$i] ); |
209 | } |
210 | } |
211 | $this->attribs = array_values( $this->attribs ); |
212 | } |
213 | |
214 | /** |
215 | * Add a space-separated property value. |
216 | * These are Parsoid-added attributes, not something present in source. |
217 | * So, only a regular ASCII space characters will be used here. |
218 | * |
219 | * @param string $name The attribute name |
220 | * @param string $value The value to add to the attribute |
221 | */ |
222 | public function addSpaceSeparatedAttribute( string $name, string $value ): void { |
223 | $curVal = $this->getAttributeKV( $name ); |
224 | if ( $curVal !== null ) { |
225 | if ( in_array( $value, explode( ' ', $curVal->v ), true ) ) { |
226 | // value is already included, nothing to do. |
227 | return; |
228 | } |
229 | |
230 | // Value was not yet included in the existing attribute, just add |
231 | // it separated with a space |
232 | $this->setAttribute( $curVal->k, $curVal->v . ' ' . $value ); |
233 | } else { |
234 | // the attribute did not exist at all, just add it |
235 | $this->addAttribute( $name, $value ); |
236 | } |
237 | } |
238 | |
239 | /** |
240 | * Get the wikitext source of a token. |
241 | * |
242 | * @param Frame $frame |
243 | * @return string |
244 | */ |
245 | public function getWTSource( Frame $frame ): string { |
246 | $tsr = $this->dataParsoid->tsr ?? null; |
247 | if ( !( $tsr instanceof SourceRange ) ) { |
248 | throw new InvalidTokenException( 'Expected token to have tsr info.' ); |
249 | } |
250 | $srcText = $frame->getSrcText(); |
251 | Assert::invariant( $tsr->end >= $tsr->start, 'Bad TSR' ); |
252 | return $tsr->substr( $srcText ); |
253 | } |
254 | |
255 | /** |
256 | * Create key value set from an array |
257 | * |
258 | * @param array $a |
259 | * @return array |
260 | */ |
261 | private static function kvsFromArray( array $a ): array { |
262 | $kvs = []; |
263 | foreach ( $a as $e ) { |
264 | if ( is_array( $e["k"] ?? null ) ) { |
265 | self::rebuildNestedTokens( $e["k"] ); |
266 | } |
267 | $v = $e['v'] ?? null; |
268 | if ( is_array( $v ) ) { |
269 | // $v is either an array of Tokens or an array of KVs |
270 | if ( count( $v ) > 0 ) { |
271 | if ( is_array( $v[0] ) && array_key_exists( 'k', $v[0] ) ) { |
272 | $v = self::kvsFromArray( $v ); |
273 | } else { |
274 | self::rebuildNestedTokens( $v ); |
275 | } |
276 | } |
277 | } |
278 | $so = $e["srcOffsets"] ?? null; |
279 | if ( $so ) { |
280 | $so = KVSourceRange::newFromJsonArray( $so ); |
281 | } |
282 | $kvs[] = new KV( |
283 | $e["k"] ?? null, |
284 | $v, |
285 | $so, |
286 | $e["ksrc"] ?? null, |
287 | $e["vsrc"] ?? null |
288 | ); |
289 | } |
290 | return $kvs; |
291 | } |
292 | |
293 | /** |
294 | * @param iterable|stdClass|DataParsoid &$a |
295 | */ |
296 | private static function rebuildNestedTokens( &$a ): void { |
297 | // objects do not count as iterables in PHP but can be iterated nevertheless |
298 | // @phan-suppress-next-line PhanTypeSuspiciousNonTraversableForeach |
299 | foreach ( $a as &$v ) { |
300 | $v = self::getToken( $v ); |
301 | } |
302 | unset( $v ); // Future-proof protection |
303 | } |
304 | |
305 | /** |
306 | * Get a token from some PHP structure. Used by the PHPUnit tests. |
307 | * |
308 | * @param KV|Token|array|string|int|float|bool|null $input |
309 | * @return Token|string|int|float|bool|null|array<Token|string|int|float|bool|null> |
310 | */ |
311 | public static function getToken( $input ) { |
312 | if ( !$input ) { |
313 | return $input; |
314 | } |
315 | |
316 | if ( is_array( $input ) && isset( $input['type'] ) ) { |
317 | $codec = new JsonCodec(); |
318 | if ( isset( $input['dataParsoid'] ) ) { |
319 | $da = $codec->newFromJsonArray( |
320 | $input['dataParsoid'], |
321 | DOMDataUtils::getCodecHints()['data-parsoid'] |
322 | ); |
323 | } else { |
324 | $da = null; |
325 | } |
326 | if ( isset( $input['dataMw'] ) ) { |
327 | $dmw = $codec->newFromJsonArray( |
328 | $input['dataMw'], |
329 | DOMDataUtils::getCodecHints()['data-mw'] |
330 | ); |
331 | } else { |
332 | $dmw = null; |
333 | } |
334 | // In theory this should be refactored to use JsonCodecable |
335 | // and remove the ad-hoc deserialization code here. |
336 | switch ( $input['type'] ) { |
337 | case "SelfclosingTagTk": |
338 | $token = new SelfclosingTagTk( $input['name'], self::kvsFromArray( $input['attribs'] ), $da, $dmw ); |
339 | break; |
340 | case "TagTk": |
341 | $token = new TagTk( $input['name'], self::kvsFromArray( $input['attribs'] ), $da, $dmw ); |
342 | break; |
343 | case "EndTagTk": |
344 | $token = new EndTagTk( $input['name'], self::kvsFromArray( $input['attribs'] ), $da, $dmw ); |
345 | break; |
346 | case "NlTk": |
347 | $token = new NlTk( $da->tsr ?? null, $da, $dmw ); |
348 | break; |
349 | case "EOFTk": |
350 | $token = new EOFTk(); |
351 | break; |
352 | case "CommentTk": |
353 | $token = new CommentTk( $input["value"], $da, $dmw ); |
354 | break; |
355 | default: |
356 | // Looks like data-parsoid can have a 'type' property in some cases |
357 | // We can change that usage and then throw an exception here |
358 | $token = &$input; |
359 | } |
360 | } elseif ( is_array( $input ) ) { |
361 | $token = &$input; |
362 | } else { |
363 | $token = $input; |
364 | } |
365 | |
366 | if ( is_array( $token ) ) { |
367 | self::rebuildNestedTokens( $token ); |
368 | } elseif ( $token instanceof Token ) { |
369 | if ( !empty( $token->attribs ) ) { |
370 | self::rebuildNestedTokens( $token->attribs ); |
371 | } |
372 | self::rebuildNestedTokens( $token->dataParsoid ); |
373 | } |
374 | |
375 | return $token; |
376 | } |
377 | |
378 | public function fetchExpandedAttrValue( string $key ): ?string { |
379 | if ( preg_match( |
380 | '/mw:ExpandedAttrs/', $this->getAttributeV( 'typeof' ) ?? '' |
381 | ) ) { |
382 | $dmw = $this->dataMw; |
383 | if ( !isset( $dmw->attribs ) ) { |
384 | return null; |
385 | } |
386 | foreach ( $dmw->attribs as $attr ) { |
387 | if ( ( $attr->key['txt'] ?? null ) === $key ) { |
388 | return $attr->value['html'] ?? null; |
389 | } |
390 | } |
391 | } |
392 | return null; |
393 | } |
394 | |
395 | } |