Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
10.83% |
26 / 240 |
|
5.13% |
2 / 39 |
CRAP | |
0.00% |
0 / 1 |
DOMDataUtils | |
10.83% |
26 / 240 |
|
5.13% |
2 / 39 |
7921.03 | |
0.00% |
0 / 1 |
getBag | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
prepareDoc | |
0.00% |
0 / 3 |
|
0.00% |
0 / 1 |
2 | |||
prepareChildDoc | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
2 | |||
stashObjectInDoc | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
noAttrs | |
0.00% |
0 / 5 |
|
0.00% |
0 / 1 |
20 | |||
getNodeData | |
0.00% |
0 / 16 |
|
0.00% |
0 / 1 |
20 | |||
setNodeData | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
2 | |||
getDataParsoid | |
0.00% |
0 / 3 |
|
0.00% |
0 / 1 |
2 | |||
setDataParsoid | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
2 | |||
getDataMwI18n | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
2 | |||
setDataMwI18n | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
2 | |||
getDataNodeI18n | |
0.00% |
0 / 4 |
|
0.00% |
0 / 1 |
6 | |||
setDataNodeI18n | |
0.00% |
0 / 3 |
|
0.00% |
0 / 1 |
2 | |||
getDataAttrI18n | |
0.00% |
0 / 4 |
|
0.00% |
0 / 1 |
6 | |||
setDataAttrI18n | |
0.00% |
0 / 3 |
|
0.00% |
0 / 1 |
2 | |||
getDataAttrI18nNames | |
0.00% |
0 / 4 |
|
0.00% |
0 / 1 |
6 | |||
getDataParsoidDiff | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
2 | |||
setDataParsoidDiff | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
2 | |||
getDataMw | |
0.00% |
0 / 3 |
|
0.00% |
0 / 1 |
2 | |||
setDataMw | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
2 | |||
validDataMw | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
validDataMwI18n | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
getJSONAttribute | |
0.00% |
0 / 9 |
|
0.00% |
0 / 1 |
12 | |||
setJSONAttribute | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
6 | |||
setShadowInfo | |
0.00% |
0 / 4 |
|
0.00% |
0 / 1 |
2 | |||
setShadowInfoIfModified | |
0.00% |
0 / 8 |
|
0.00% |
0 / 1 |
42 | |||
addNormalizedAttribute | |
0.00% |
0 / 4 |
|
0.00% |
0 / 1 |
6 | |||
getPageBundle | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
storeInPageBundle | |
78.95% |
15 / 19 |
|
0.00% |
0 / 1 |
6.34 | |||
injectPageBundle | |
100.00% |
5 / 5 |
|
100.00% |
1 / 1 |
1 | |||
extractPageBundle | |
100.00% |
6 / 6 |
|
100.00% |
1 / 1 |
2 | |||
visitAndLoadDataAttribs | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
massageLoadedDataParsoid | |
0.00% |
0 / 42 |
|
0.00% |
0 / 1 |
420 | |||
loadDataAttribs | |
0.00% |
0 / 19 |
|
0.00% |
0 / 1 |
20 | |||
usedIdIndex | |
0.00% |
0 / 11 |
|
0.00% |
0 / 1 |
12 | |||
visitAndStoreDataAttribs | |
0.00% |
0 / 3 |
|
0.00% |
0 / 1 |
6 | |||
storeDataAttribs | |
0.00% |
0 / 30 |
|
0.00% |
0 / 1 |
240 | |||
cloneNode | |
0.00% |
0 / 3 |
|
0.00% |
0 / 1 |
2 | |||
fixClonedData | |
0.00% |
0 / 5 |
|
0.00% |
0 / 1 |
20 |
1 | <?php |
2 | declare( strict_types = 1 ); |
3 | |
4 | namespace Wikimedia\Parsoid\Utils; |
5 | |
6 | use Composer\Semver\Semver; |
7 | use stdClass; |
8 | use Wikimedia\Assert\Assert; |
9 | use Wikimedia\Assert\UnreachableException; |
10 | use Wikimedia\Parsoid\Config\Env; |
11 | use Wikimedia\Parsoid\Core\DomSourceRange; |
12 | use Wikimedia\Parsoid\Core\PageBundle; |
13 | use Wikimedia\Parsoid\DOM\Document; |
14 | use Wikimedia\Parsoid\DOM\Element; |
15 | use Wikimedia\Parsoid\DOM\Node; |
16 | use Wikimedia\Parsoid\NodeData\DataBag; |
17 | use Wikimedia\Parsoid\NodeData\DataMw; |
18 | use Wikimedia\Parsoid\NodeData\DataMwI18n; |
19 | use Wikimedia\Parsoid\NodeData\DataParsoid; |
20 | use Wikimedia\Parsoid\NodeData\I18nInfo; |
21 | use Wikimedia\Parsoid\NodeData\NodeData; |
22 | use Wikimedia\Parsoid\NodeData\ParamInfo; |
23 | use Wikimedia\Parsoid\NodeData\TempData; |
24 | use Wikimedia\Parsoid\Tokens\SourceRange; |
25 | |
26 | /** |
27 | * These helpers pertain to HTML and data attributes of a node. |
28 | */ |
29 | class DOMDataUtils { |
30 | public const DATA_OBJECT_ATTR_NAME = 'data-object-id'; |
31 | |
32 | /** |
33 | * Return the dynamic "bag" property of a Document. |
34 | * @param Document $doc |
35 | * @return DataBag |
36 | */ |
37 | public static function getBag( Document $doc ): DataBag { |
38 | // This is a dynamic property; it is not declared. |
39 | // All references go through here so we can suppress phan's complaint. |
40 | // @phan-suppress-next-line PhanUndeclaredProperty |
41 | return $doc->bag; |
42 | } |
43 | |
44 | public static function prepareDoc( Document $doc ): void { |
45 | // `bag` is a deliberate dynamic property; see DOMDataUtils::getBag() |
46 | // @phan-suppress-next-line PhanUndeclaredProperty dynamic property |
47 | $doc->bag = new DataBag(); |
48 | |
49 | // Cache the head and body. |
50 | DOMCompat::getHead( $doc ); |
51 | DOMCompat::getBody( $doc ); |
52 | } |
53 | |
54 | /** |
55 | * @param Document $topLevelDoc |
56 | * @param Document $childDoc |
57 | */ |
58 | public static function prepareChildDoc( Document $topLevelDoc, Document $childDoc ) { |
59 | // @phan-suppress-next-line PhanUndeclaredProperty dynamic property |
60 | Assert::invariant( $topLevelDoc->bag instanceof DataBag, 'doc bag not set' ); |
61 | // @phan-suppress-next-line PhanUndeclaredProperty dynamic property |
62 | $childDoc->bag = $topLevelDoc->bag; |
63 | } |
64 | |
65 | /** |
66 | * Stash $obj in $doc and return an id for later retrieval |
67 | * @param Document $doc |
68 | * @param NodeData $obj |
69 | * @return int |
70 | */ |
71 | public static function stashObjectInDoc( Document $doc, NodeData $obj ): int { |
72 | return self::getBag( $doc )->stashObject( $obj ); |
73 | } |
74 | |
75 | /** |
76 | * Does this node have any attributes? |
77 | * @param Element $node |
78 | * @return bool |
79 | */ |
80 | public static function noAttrs( Element $node ): bool { |
81 | // The 'xmlns' attribute is "invisible" T235295 |
82 | if ( $node->hasAttribute( 'xmlns' ) ) { |
83 | return false; |
84 | } |
85 | $numAttrs = count( $node->attributes ); |
86 | return $numAttrs === 0 || |
87 | ( $numAttrs === 1 && $node->hasAttribute( self::DATA_OBJECT_ATTR_NAME ) ); |
88 | } |
89 | |
90 | /** |
91 | * Get data object from a node. |
92 | * |
93 | * @param Element $node node |
94 | * @return NodeData |
95 | */ |
96 | public static function getNodeData( Element $node ): NodeData { |
97 | if ( !$node->hasAttribute( self::DATA_OBJECT_ATTR_NAME ) ) { |
98 | // Initialized on first request |
99 | $dataObject = new NodeData; |
100 | self::setNodeData( $node, $dataObject ); |
101 | return $dataObject; |
102 | } |
103 | |
104 | $nodeId = DOMCompat::getAttribute( $node, self::DATA_OBJECT_ATTR_NAME ); |
105 | if ( $nodeId !== null ) { |
106 | $dataObject = self::getBag( $node->ownerDocument )->getObject( (int)$nodeId ); |
107 | } else { |
108 | $dataObject = null; // Make phan happy |
109 | } |
110 | Assert::invariant( isset( $dataObject ), 'Bogus nodeId given!' ); |
111 | if ( isset( $dataObject->storedId ) ) { |
112 | throw new UnreachableException( |
113 | 'Trying to fetch node data without loading!' . |
114 | // If this node's data-object id is different from storedId, |
115 | // it will indicate that the data-parsoid object was shared |
116 | // between nodes without getting cloned. Useful for debugging. |
117 | 'Node id: ' . $nodeId . |
118 | 'Stored data: ' . PHPUtils::jsonEncode( $dataObject ) |
119 | ); |
120 | } |
121 | return $dataObject; |
122 | } |
123 | |
124 | /** |
125 | * Set node data. |
126 | * |
127 | * @param Element $node node |
128 | * @param NodeData $data data |
129 | */ |
130 | public static function setNodeData( Element $node, NodeData $data ): void { |
131 | $nodeId = self::stashObjectInDoc( $node->ownerDocument, $data ); |
132 | $node->setAttribute( self::DATA_OBJECT_ATTR_NAME, (string)$nodeId ); |
133 | } |
134 | |
135 | /** |
136 | * Get data parsoid info from a node. |
137 | * |
138 | * @param Element $node node |
139 | * @return DataParsoid |
140 | */ |
141 | public static function getDataParsoid( Element $node ): DataParsoid { |
142 | $data = self::getNodeData( $node ); |
143 | $data->parsoid ??= new DataParsoid; |
144 | return $data->parsoid; |
145 | } |
146 | |
147 | /** |
148 | * Set data parsoid info on a node. |
149 | * |
150 | * @param Element $node node |
151 | * @param DataParsoid $dp data-parsoid |
152 | */ |
153 | public static function setDataParsoid( Element $node, DataParsoid $dp ): void { |
154 | $data = self::getNodeData( $node ); |
155 | $data->parsoid = $dp; |
156 | } |
157 | |
158 | /** |
159 | * Returns the i18n information of a node. This is in private access because it shouldn't |
160 | * typically be used directly; instead getDataNodeI18n and getDataAttrI18n should be used. |
161 | * @param Element $node |
162 | * @return DataMwI18n|null |
163 | */ |
164 | private static function getDataMwI18n( Element $node ): ?DataMwI18n { |
165 | $data = self::getNodeData( $node ); |
166 | // We won't set a default value for this property |
167 | return $data->i18n ?? null; |
168 | } |
169 | |
170 | /** |
171 | * Sets the i18n information of a node. This is in private access because it shouldn't |
172 | * typically be used directly; instead setDataNodeI18n and setDataAttrI18n should be used. |
173 | */ |
174 | private static function setDataMwI18n( Element $node, DataMwI18n $i18n ) { |
175 | $data = self::getNodeData( $node ); |
176 | $data->i18n = $i18n; |
177 | } |
178 | |
179 | /** |
180 | * Retrieves internationalization (i18n) information of a node (typically for localization) |
181 | * @param Element $node |
182 | * @return ?I18nInfo |
183 | */ |
184 | public static function getDataNodeI18n( Element $node ): ?I18nInfo { |
185 | $data = self::getNodeData( $node ); |
186 | // We won't set a default value for this property |
187 | if ( !isset( $data->i18n ) ) { |
188 | return null; |
189 | } |
190 | return $data->i18n->getSpanInfo(); |
191 | } |
192 | |
193 | /** |
194 | * Sets internationalization (i18n) information of a node, used for later localization |
195 | */ |
196 | public static function setDataNodeI18n( Element $node, I18nInfo $i18n ) { |
197 | $data = self::getNodeData( $node ); |
198 | $data->i18n ??= new DataMwI18n(); |
199 | $data->i18n->setSpanInfo( $i18n ); |
200 | } |
201 | |
202 | /** |
203 | * Retrieves internationalization (i18n) information of an attribute value (typically for |
204 | * localization) |
205 | * @param Element $node |
206 | * @param string $name |
207 | * @return ?I18nInfo |
208 | */ |
209 | public static function getDataAttrI18n( Element $node, string $name ): ?I18nInfo { |
210 | $data = self::getNodeData( $node ); |
211 | // We won't set a default value for this property |
212 | if ( !isset( $data->i18n ) ) { |
213 | return null; |
214 | } |
215 | return $data->i18n->getAttributeInfo( $name ); |
216 | } |
217 | |
218 | /** |
219 | * Sets internationalization (i18n) information of a attribute value, used for later |
220 | * localization |
221 | * @param Element $node |
222 | * @param string $name |
223 | * @param I18nInfo $i18n |
224 | */ |
225 | public static function setDataAttrI18n( Element $node, string $name, I18nInfo $i18n ) { |
226 | $data = self::getNodeData( $node ); |
227 | $data->i18n ??= new DataMwI18n(); |
228 | $data->i18n->setAttributeInfo( $name, $i18n ); |
229 | } |
230 | |
231 | /** |
232 | * @param Element $node |
233 | * @return array |
234 | */ |
235 | public static function getDataAttrI18nNames( Element $node ): array { |
236 | $data = self::getNodeData( $node ); |
237 | // We won't set a default value for this property |
238 | if ( !isset( $data->i18n ) ) { |
239 | return []; |
240 | } |
241 | return $data->i18n->getAttributeNames(); |
242 | } |
243 | |
244 | /** |
245 | * Get data diff info from a node. |
246 | * |
247 | * @param Element $node node |
248 | * @return ?stdClass |
249 | */ |
250 | public static function getDataParsoidDiff( Element $node ): ?stdClass { |
251 | $data = self::getNodeData( $node ); |
252 | // We won't set a default value for this property |
253 | return $data->parsoid_diff ?? null; |
254 | } |
255 | |
256 | /** |
257 | * Set data diff info on a node. |
258 | * |
259 | * @param Element $node node |
260 | * @param ?stdClass $diffObj data-parsoid-diff object |
261 | */ |
262 | public static function setDataParsoidDiff( Element $node, ?stdClass $diffObj ): void { |
263 | $data = self::getNodeData( $node ); |
264 | $data->parsoid_diff = $diffObj; |
265 | } |
266 | |
267 | /** |
268 | * Get data meta wiki info from a node. |
269 | * |
270 | * @param Element $node node |
271 | * @return DataMw |
272 | */ |
273 | public static function getDataMw( Element $node ): DataMw { |
274 | $data = self::getNodeData( $node ); |
275 | $data->mw ??= new DataMw; |
276 | return $data->mw; |
277 | } |
278 | |
279 | /** |
280 | * Set data meta wiki info from a node. |
281 | * |
282 | * @param Element $node node |
283 | * @param ?DataMw $dmw data-mw |
284 | */ |
285 | public static function setDataMw( Element $node, ?DataMw $dmw ): void { |
286 | $data = self::getNodeData( $node ); |
287 | $data->mw = $dmw; |
288 | } |
289 | |
290 | /** |
291 | * Check if there is meta wiki info in a node. |
292 | * |
293 | * @param Element $node node |
294 | * @return bool |
295 | */ |
296 | public static function validDataMw( Element $node ): bool { |
297 | return (array)self::getDataMw( $node ) !== []; |
298 | } |
299 | |
300 | /** |
301 | * Check if there is i18n info on a node (for the node or its attributes) |
302 | * @param Element $node |
303 | * @return bool |
304 | */ |
305 | public static function validDataMwI18n( Element $node ): bool { |
306 | return self::getDataMwI18n( $node ) !== null; |
307 | } |
308 | |
309 | /** |
310 | * Get an object from a JSON-encoded XML attribute on a node. |
311 | * |
312 | * @param Element $node node |
313 | * @param string $name name |
314 | * @param mixed $defaultVal |
315 | * @return mixed |
316 | */ |
317 | public static function getJSONAttribute( Element $node, string $name, $defaultVal ) { |
318 | $attVal = DOMCompat::getAttribute( $node, $name ); |
319 | if ( $attVal === null ) { |
320 | return $defaultVal; |
321 | } |
322 | $decoded = PHPUtils::jsonDecode( $attVal, false ); |
323 | if ( $decoded !== null ) { |
324 | return $decoded; |
325 | } else { |
326 | error_log( 'ERROR: Could not decode attribute-val ' . $attVal . |
327 | ' for ' . $name . ' on node ' . DOMCompat::nodeName( $node ) ); |
328 | return $defaultVal; |
329 | } |
330 | } |
331 | |
332 | /** |
333 | * Set a attribute on a node with a JSON-encoded object. |
334 | * |
335 | * @param Element $node node |
336 | * @param string $name Name of the attribute. |
337 | * @param mixed $obj value of the attribute to |
338 | */ |
339 | public static function setJSONAttribute( Element $node, string $name, $obj ): void { |
340 | $val = $obj === [] ? '{}' : PHPUtils::jsonEncode( $obj ); |
341 | $node->setAttribute( $name, $val ); |
342 | } |
343 | |
344 | /** |
345 | * Set shadow info on a node; similar to the method on tokens. |
346 | * Records a key = value pair in data-parsoid['a'] property. |
347 | * |
348 | * This is effectively a call of 'setShadowInfoIfModified' except |
349 | * there is no original value, so by definition, $val is modified. |
350 | * |
351 | * @param Element $node node |
352 | * @param string $name Name of the attribute. |
353 | * @param mixed $val val |
354 | */ |
355 | public static function setShadowInfo( Element $node, string $name, $val ): void { |
356 | $dp = self::getDataParsoid( $node ); |
357 | $dp->a ??= []; |
358 | $dp->sa ??= []; |
359 | $dp->a[$name] = $val; |
360 | } |
361 | |
362 | /** |
363 | * Set shadow info on a node; similar to the method on tokens. |
364 | * |
365 | * If the new value ($val) for the key ($name) is different from the |
366 | * original value ($origVal): |
367 | * - the new value is recorded in data-parsoid->a and |
368 | * - the original value is recorded in data-parsoid->sa |
369 | * |
370 | * @param Element $node node |
371 | * @param string $name Name of the attribute. |
372 | * @param mixed $val val |
373 | * @param mixed $origVal original value (null is a valid value) |
374 | * @param bool $skipOrig |
375 | */ |
376 | public static function setShadowInfoIfModified( |
377 | Element $node, string $name, $val, $origVal, bool $skipOrig = false |
378 | ): void { |
379 | if ( !$skipOrig && ( $val === $origVal || $origVal === null ) ) { |
380 | return; |
381 | } |
382 | $dp = self::getDataParsoid( $node ); |
383 | $dp->a ??= []; |
384 | $dp->sa ??= []; |
385 | // FIXME: This is a hack to not overwrite already shadowed info. |
386 | // We should either fix the call site that depends on this |
387 | // behaviour to do an explicit check, or double down on this |
388 | // by porting it to the token method as well. |
389 | if ( !$skipOrig && !array_key_exists( $name, $dp->a ) ) { |
390 | $dp->sa[$name] = $origVal; |
391 | } |
392 | $dp->a[$name] = $val; |
393 | } |
394 | |
395 | /** |
396 | * Set an attribute and shadow info to a node. |
397 | * Similar to the method on tokens |
398 | * |
399 | * @param Element $node node |
400 | * @param string $name Name of the attribute. |
401 | * @param mixed $val value |
402 | * @param mixed $origVal original value |
403 | * @param bool $skipOrig |
404 | */ |
405 | public static function addNormalizedAttribute( |
406 | Element $node, string $name, $val, $origVal, bool $skipOrig = false |
407 | ): void { |
408 | if ( $name === 'id' ) { |
409 | DOMCompat::setIdAttribute( $node, $val ); |
410 | } else { |
411 | $node->setAttribute( $name, $val ); |
412 | } |
413 | self::setShadowInfoIfModified( $node, $name, $val, $origVal, $skipOrig ); |
414 | } |
415 | |
416 | /** |
417 | * Get this document's pagebundle object |
418 | * @param Document $doc |
419 | * @return PageBundle |
420 | */ |
421 | public static function getPageBundle( Document $doc ): PageBundle { |
422 | return self::getBag( $doc )->getPageBundle(); |
423 | } |
424 | |
425 | /** |
426 | * Removes the `data-*` attribute from a node, and migrates the data to the |
427 | * document's JSON store. Generates a unique id with the following format: |
428 | * ``` |
429 | * mw<base64-encoded counter> |
430 | * ``` |
431 | * but attempts to keep user defined ids. |
432 | * |
433 | * TODO: Note that $data is effective a partial PageBundle containing |
434 | * only the 'parsoid' and 'mw' properties. |
435 | * |
436 | * @param Element $node node |
437 | * @param Env $env environment |
438 | * @param stdClass $data data |
439 | * @param array $idIndex Index of used id attributes in the DOM |
440 | */ |
441 | public static function storeInPageBundle( |
442 | Element $node, Env $env, stdClass $data, array $idIndex |
443 | ): void { |
444 | $uid = DOMCompat::getAttribute( $node, 'id' ); |
445 | $document = $node->ownerDocument; |
446 | $pb = self::getPageBundle( $document ); |
447 | $docDp = &$pb->parsoid; |
448 | $origId = $uid; |
449 | if ( $uid !== null && array_key_exists( $uid, $docDp['ids'] ) ) { |
450 | $uid = null; |
451 | $env->log( 'info', 'Wikitext for this page has duplicate ids: ' . $origId ); |
452 | } |
453 | if ( $uid === '' ) { |
454 | $uid = null; |
455 | $env->log( 'info', 'Bogus empty id' ); |
456 | } |
457 | if ( $uid === null ) { |
458 | do { |
459 | $docDp['counter'] += 1; |
460 | // PORT-FIXME: NOTE that we aren't updating the idIndex here because |
461 | // we are generating unique ids that will not conflict. In any case, |
462 | // the idIndex is a workaround for the PHP DOM's issues and we might |
463 | // switch out of this in the future anyway. |
464 | $uid = 'mw' . PHPUtils::counterToBase64( $docDp['counter'] ); |
465 | } while ( isset( $idIndex[$uid] ) ); |
466 | self::addNormalizedAttribute( $node, 'id', $uid, $origId ); |
467 | } |
468 | $docDp['ids'][$uid] = $data->parsoid; |
469 | if ( isset( $data->mw ) ) { |
470 | $pb->mw['ids'][$uid] = $data->mw; |
471 | } |
472 | } |
473 | |
474 | /** |
475 | * @param Document $doc doc |
476 | * @param PageBundle $pb object |
477 | */ |
478 | public static function injectPageBundle( Document $doc, PageBundle $pb ): void { |
479 | $script = DOMUtils::appendToHead( $doc, 'script', [ |
480 | 'id' => 'mw-pagebundle', |
481 | 'type' => 'application/x-mw-pagebundle', |
482 | ] ); |
483 | $script->appendChild( $doc->createTextNode( $pb->encodeForHeadElement() ) ); |
484 | } |
485 | |
486 | /** |
487 | * @param Document $doc doc |
488 | * @return stdClass|null |
489 | */ |
490 | public static function extractPageBundle( Document $doc ): ?stdClass { |
491 | $pb = null; |
492 | $dpScriptElt = DOMCompat::getElementById( $doc, 'mw-pagebundle' ); |
493 | if ( $dpScriptElt ) { |
494 | $dpScriptElt->parentNode->removeChild( $dpScriptElt ); |
495 | // we actually want arrays in the page bundle rather than stdClasses; but we still |
496 | // want to access the object properties |
497 | $pb = (object)PHPUtils::jsonDecode( $dpScriptElt->textContent ); |
498 | } |
499 | return $pb; |
500 | } |
501 | |
502 | /** |
503 | * Walk DOM from node downward calling loadDataAttribs |
504 | * |
505 | * @param Node $node node |
506 | * @param array $options options |
507 | */ |
508 | public static function visitAndLoadDataAttribs( Node $node, array $options = [] ): void { |
509 | DOMUtils::visitDOM( $node, [ self::class, 'loadDataAttribs' ], $options ); |
510 | } |
511 | |
512 | /** |
513 | * Massage the data parsoid object loaded from a node attribute |
514 | * into expected shape. |
515 | * |
516 | * @param stdClass $stdDP |
517 | * @param array $options |
518 | * @param ?Element $node |
519 | * @return DataParsoid |
520 | */ |
521 | private static function massageLoadedDataParsoid( |
522 | stdClass $stdDP, array $options = [], ?Element $node = null |
523 | ): DataParsoid { |
524 | $dp = new DataParsoid; |
525 | foreach ( $stdDP as $key => $value ) { |
526 | switch ( $key ) { |
527 | case 'a': |
528 | case 'sa': |
529 | $dp->$key = (array)$value; |
530 | break; |
531 | |
532 | case 'dsr': |
533 | case 'extTagOffsets': |
534 | if ( $value !== null ) { |
535 | $dp->$key = DomSourceRange::fromArray( $value ); |
536 | } |
537 | break; |
538 | |
539 | case 'tsr': |
540 | case 'extLinkContentOffsets': |
541 | if ( $value !== null ) { |
542 | $dp->$key = SourceRange::fromArray( $value ); |
543 | } |
544 | break; |
545 | |
546 | case 'optList': |
547 | $optList = []; |
548 | foreach ( $value as $item ) { |
549 | $optList[] = (array)$item; |
550 | } |
551 | $dp->optList = $optList; |
552 | break; |
553 | |
554 | case 'pi': |
555 | $pi = []; |
556 | foreach ( $value as $item ) { |
557 | $pi2 = []; |
558 | foreach ( $item as $item2 ) { |
559 | $pi2[] = ParamInfo::newFromJson( $item2 ); |
560 | } |
561 | $pi[] = $pi2; |
562 | } |
563 | $dp->pi = $pi; |
564 | break; |
565 | |
566 | case 'tmp': |
567 | // $tmp in DataParsoid.php is lazy-initialized and can be empty |
568 | if ( $value ) { |
569 | $tmp = new TempData; |
570 | foreach ( $value as $key2 => $value2 ) { |
571 | $tmp->$key2 = $value2; |
572 | } |
573 | $dp->tmp = $tmp; |
574 | } |
575 | break; |
576 | |
577 | default: |
578 | $dp->$key = $value; |
579 | } |
580 | } |
581 | if ( !empty( $options['markNew'] ) ) { |
582 | $dp->setTempFlag( TempData::IS_NEW, !$node->hasAttribute( 'data-parsoid' ) ); |
583 | } |
584 | return $dp; |
585 | } |
586 | |
587 | /** |
588 | * These are intended be used on a document after post-processing, so that |
589 | * the underlying .dataobject is transparently applied (in the store case) |
590 | * and reloaded (in the load case), rather than worrying about keeping |
591 | * the attributes up-to-date throughout that phase. For the most part, |
592 | * using this.ppTo* should be sufficient and using these directly should be |
593 | * avoided. |
594 | * |
595 | * @param Node $node node |
596 | * @param array $options options |
597 | */ |
598 | public static function loadDataAttribs( Node $node, array $options ): void { |
599 | if ( !( $node instanceof Element ) ) { |
600 | return; |
601 | } |
602 | // Reset the node data object's stored state, since we're reloading it |
603 | self::setNodeData( $node, new NodeData ); |
604 | $dp = self::massageLoadedDataParsoid( |
605 | self::getJSONAttribute( $node, 'data-parsoid', new stdClass ), |
606 | $options, $node ); |
607 | self::setDataParsoid( $node, $dp ); |
608 | $node->removeAttribute( 'data-parsoid' ); |
609 | $dmw = self::getJSONAttribute( $node, 'data-mw', null ); |
610 | self::setDataMw( $node, $dmw !== null ? new DataMw( (array)$dmw ) : null ); |
611 | $node->removeAttribute( 'data-mw' ); |
612 | $dpd = self::getJSONAttribute( $node, 'data-parsoid-diff', null ); |
613 | self::setDataParsoidDiff( $node, $dpd ); |
614 | $node->removeAttribute( 'data-parsoid-diff' ); |
615 | $dataI18n = DOMCompat::getAttribute( $node, 'data-mw-i18n' ); |
616 | if ( $dataI18n !== null ) { |
617 | $i18n = DataMwI18n::fromJson( PHPUtils::jsonDecode( $dataI18n, true ) ); |
618 | self::setDataMwI18n( $node, $i18n ); |
619 | $node->removeAttribute( 'data-mw-i18n' ); |
620 | } |
621 | } |
622 | |
623 | /** |
624 | * Builds an index of id attributes seen in the DOM |
625 | * @param Node $node |
626 | * @return array |
627 | */ |
628 | public static function usedIdIndex( Node $node ): array { |
629 | $index = []; |
630 | DOMUtils::visitDOM( DOMCompat::getBody( $node->ownerDocument ), |
631 | static function ( Node $n, ?array $options = null ) use ( &$index ) { |
632 | if ( $n instanceof Element ) { |
633 | $id = DOMCompat::getAttribute( $n, 'id' ); |
634 | if ( $id !== null ) { |
635 | $index[$id] = true; |
636 | } |
637 | } |
638 | }, |
639 | [] |
640 | ); |
641 | return $index; |
642 | } |
643 | |
644 | /** |
645 | * Walk DOM from node downward calling storeDataAttribs |
646 | * |
647 | * @param Node $node node |
648 | * @param array $options options |
649 | */ |
650 | public static function visitAndStoreDataAttribs( Node $node, array $options = [] ): void { |
651 | // PORT-FIXME: storeDataAttribs calls storeInPageBundle which calls getElementById. |
652 | // PHP's `getElementById` implementation is broken, and we work around that by |
653 | // using Zest which uses XPath. So, getElementById call can be O(n) and calling it |
654 | // on on every element of the DOM via vistDOM here makes it O(n^2) instead of O(n). |
655 | // So, we work around that by building an index and avoiding getElementById entirely |
656 | // in storeInPageBundle. |
657 | if ( !empty( $options['storeInPageBundle'] ) ) { |
658 | $options['idIndex'] = self::usedIdIndex( $node ); |
659 | } |
660 | DOMUtils::visitDOM( $node, [ self::class, 'storeDataAttribs' ], $options ); |
661 | } |
662 | |
663 | /** |
664 | * Copy data attributes from the bag to either JSON-encoded attributes on |
665 | * each node, or the page bundle, erasing the data-object-id attributes. |
666 | * |
667 | * @param Node $node node |
668 | * @param ?array $options options |
669 | * - discardDataParsoid: Discard DataParsoid objects instead of storing them |
670 | * - keepTmp: Preserve DataParsoid::$tmp |
671 | * - storeInPageBundle: If true, data will be stored in the page bundle |
672 | * instead of data-parsoid and data-mw. |
673 | * - env: The Env object required for various features |
674 | * - idIndex: Array of used ID attributes |
675 | */ |
676 | public static function storeDataAttribs( Node $node, ?array $options = null ): void { |
677 | $options ??= []; |
678 | if ( !( $node instanceof Element ) ) { |
679 | return; |
680 | } |
681 | Assert::invariant( empty( $options['discardDataParsoid'] ) || empty( $options['keepTmp'] ), |
682 | 'Conflicting options: discardDataParsoid and keepTmp are both enabled.' ); |
683 | $dp = self::getDataParsoid( $node ); |
684 | $discardDataParsoid = !empty( $options['discardDataParsoid'] ); |
685 | if ( $dp->getTempFlag( TempData::IS_NEW ) ) { |
686 | // Only necessary to support the cite extension's getById, |
687 | // that's already been loaded once. |
688 | // |
689 | // This is basically a hack to ensure that DOMUtils.isNewElt |
690 | // continues to work since we effectively rely on the absence |
691 | // of data-parsoid to identify new elements. But, loadDataAttribs |
692 | // creates an empty {} if one doesn't exist. So, this hack |
693 | // ensures that a loadDataAttribs + storeDataAttribs pair don't |
694 | // dirty the node by introducing an empty data-parsoid attribute |
695 | // where one didn't exist before. |
696 | // |
697 | // Ideally, we'll find a better solution for this edge case later. |
698 | $discardDataParsoid = true; |
699 | } |
700 | $data = null; |
701 | if ( !$discardDataParsoid ) { |
702 | if ( empty( $options['keepTmp'] ) ) { |
703 | // @phan-suppress-next-line PhanTypeObjectUnsetDeclaredProperty |
704 | unset( $dp->tmp ); |
705 | } |
706 | |
707 | if ( !empty( $options['storeInPageBundle'] ) ) { |
708 | $data = (object)[ 'parsoid' => $dp ]; |
709 | } else { |
710 | self::setJSONAttribute( $node, 'data-parsoid', $dp ); |
711 | } |
712 | } |
713 | |
714 | // Strip invalid data-mw attributes |
715 | if ( self::validDataMw( $node ) ) { |
716 | if ( |
717 | !empty( $options['storeInPageBundle'] ) && isset( $options['env'] ) && |
718 | // The pagebundle didn't have data-mw before 999.x |
719 | Semver::satisfies( $options['env']->getOutputContentVersion(), '^999.0.0' ) |
720 | ) { |
721 | $data = $data ?: new stdClass; |
722 | $data->mw = self::getDataMw( $node ); |
723 | } else { |
724 | self::setJSONAttribute( $node, 'data-mw', self::getDataMw( $node ) ); |
725 | } |
726 | } |
727 | |
728 | if ( self::validDataMwI18n( $node ) ) { |
729 | self::setJSONAttribute( $node, 'data-mw-i18n', self::getDataMwI18n( $node ) ); |
730 | } |
731 | |
732 | // Store pagebundle |
733 | if ( $data !== null ) { |
734 | self::storeInPageBundle( $node, $options['env'], $data, $options['idIndex'] ); |
735 | } |
736 | |
737 | // Indicate that this node's data has been stored so that if we try |
738 | // to access it after the fact we're aware and remove the attribute |
739 | // since it's no longer needed. |
740 | $nd = self::getNodeData( $node ); |
741 | $id = DOMCompat::getAttribute( $node, self::DATA_OBJECT_ATTR_NAME ); |
742 | $nd->storedId = $id !== null ? intval( $id ) : null; |
743 | $node->removeAttribute( self::DATA_OBJECT_ATTR_NAME ); |
744 | } |
745 | |
746 | /** |
747 | * Clones a node and its data bag |
748 | * @param Element $elt |
749 | * @param bool $deep |
750 | * @return Element |
751 | */ |
752 | public static function cloneNode( Element $elt, bool $deep ): Element { |
753 | $clone = $elt->cloneNode( $deep ); |
754 | '@phan-var Element $clone'; // @var Element $clone |
755 | // We do not need to worry about $deep because a shallow clone does not have child nodes, |
756 | // so it's always cloning data on the cloned tree (which may be empty). |
757 | self::fixClonedData( $clone ); |
758 | return $clone; |
759 | } |
760 | |
761 | /** |
762 | * Recursively fixes cloned data from $elt: to avoid conflicts of element IDs, we clone the |
763 | * data and set it in the node with a new element ID (which setNodeData does). |
764 | * @param Element $elt |
765 | */ |
766 | private static function fixClonedData( Element $elt ) { |
767 | if ( $elt->hasAttribute( self::DATA_OBJECT_ATTR_NAME ) ) { |
768 | self::setNodeData( $elt, self::getNodeData( $elt )->cloneNodeData() ); |
769 | } |
770 | foreach ( $elt->childNodes as $child ) { |
771 | if ( $child instanceof Element ) { |
772 | self::fixClonedData( $child ); |
773 | } |
774 | } |
775 | } |
776 | } |