Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
12.39% |
56 / 452 |
|
5.45% |
3 / 55 |
CRAP | |
0.00% |
0 / 1 |
DOMDataUtils | |
12.39% |
56 / 452 |
|
5.45% |
3 / 55 |
17591.99 | |
0.00% |
0 / 1 |
getBag | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
getCodec | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
2 | |||
isPrepared | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
isPreparedAndLoaded | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
6 | |||
prepareDoc | |
0.00% |
0 / 4 |
|
0.00% |
0 / 1 |
2 | |||
prepareChildDoc | |
0.00% |
0 / 3 |
|
0.00% |
0 / 1 |
2 | |||
stashObjectInDoc | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
dedupeNodeData | |
0.00% |
0 / 6 |
|
0.00% |
0 / 1 |
2 | |||
dedupeNodeDataVisitor | |
0.00% |
0 / 9 |
|
0.00% |
0 / 1 |
30 | |||
noAttrs | |
0.00% |
0 / 5 |
|
0.00% |
0 / 1 |
20 | |||
getNodeData | |
0.00% |
0 / 30 |
|
0.00% |
0 / 1 |
56 | |||
setNodeData | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
2 | |||
getDataParsoid | |
0.00% |
0 / 3 |
|
0.00% |
0 / 1 |
2 | |||
setDataParsoid | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
2 | |||
getDataMwI18n | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
getDataMwI18nDefault | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
getDataNodeI18n | |
0.00% |
0 / 4 |
|
0.00% |
0 / 1 |
6 | |||
setDataNodeI18n | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
2 | |||
getDataAttrI18n | |
0.00% |
0 / 4 |
|
0.00% |
0 / 1 |
6 | |||
setDataAttrI18n | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
2 | |||
getDataAttrI18nNames | |
0.00% |
0 / 4 |
|
0.00% |
0 / 1 |
6 | |||
getDataParsoidDiff | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
getDataParsoidDiffDefault | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
setDataParsoidDiff | |
0.00% |
0 / 3 |
|
0.00% |
0 / 1 |
6 | |||
getDataMw | |
0.00% |
0 / 3 |
|
0.00% |
0 / 1 |
2 | |||
setDataMw | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
2 | |||
getJSONAttribute | |
0.00% |
0 / 9 |
|
0.00% |
0 / 1 |
12 | |||
setJSONAttribute | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
6 | |||
setShadowInfo | |
0.00% |
0 / 4 |
|
0.00% |
0 / 1 |
2 | |||
setShadowInfoIfModified | |
0.00% |
0 / 8 |
|
0.00% |
0 / 1 |
42 | |||
addNormalizedAttribute | |
0.00% |
0 / 4 |
|
0.00% |
0 / 1 |
6 | |||
storeInPageBundle | |
88.24% |
15 / 17 |
|
0.00% |
0 / 1 |
6.06 | |||
getCodecHints | |
0.00% |
0 / 7 |
|
0.00% |
0 / 1 |
6 | |||
visitAndLoadDataAttribs | |
0.00% |
0 / 6 |
|
0.00% |
0 / 1 |
6 | |||
loadDataAttribs | |
0.00% |
0 / 25 |
|
0.00% |
0 / 1 |
42 | |||
usedIdIndex | |
0.00% |
0 / 11 |
|
0.00% |
0 / 1 |
12 | |||
visitAndStoreDataAttribs | |
0.00% |
0 / 8 |
|
0.00% |
0 / 1 |
6 | |||
storeDataAttribs | |
0.00% |
0 / 44 |
|
0.00% |
0 / 1 |
182 | |||
cloneNode | |
0.00% |
0 / 3 |
|
0.00% |
0 / 1 |
2 | |||
cloneDocumentFragment | |
0.00% |
0 / 5 |
|
0.00% |
0 / 1 |
12 | |||
fixClonedData | |
0.00% |
0 / 5 |
|
0.00% |
0 / 1 |
20 | |||
isHtmlAttributeWithSpecialSemantics | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
getAttributeObject | |
88.89% |
16 / 18 |
|
0.00% |
0 / 1 |
4.02 | |||
getAttributeObjectDefault | |
60.00% |
9 / 15 |
|
0.00% |
0 / 1 |
5.02 | |||
setAttributeObject | |
0.00% |
0 / 9 |
|
0.00% |
0 / 1 |
12 | |||
removeAttributeObject | |
0.00% |
0 / 8 |
|
0.00% |
0 / 1 |
6 | |||
nodeHasDataMw | |
0.00% |
0 / 4 |
|
0.00% |
0 / 1 |
6 | |||
removeFromExpandedAttrs | |
0.00% |
0 / 21 |
|
0.00% |
0 / 1 |
72 | |||
getAttributeDom | |
100.00% |
3 / 3 |
|
100.00% |
1 / 1 |
1 | |||
getAttributeDomDefault | |
100.00% |
5 / 5 |
|
100.00% |
1 / 1 |
2 | |||
setAttributeDom | |
0.00% |
0 / 6 |
|
0.00% |
0 / 1 |
2 | |||
removeAttributeDom | |
100.00% |
8 / 8 |
|
100.00% |
1 / 1 |
2 | |||
loadRichAttributes | |
0.00% |
0 / 29 |
|
0.00% |
0 / 1 |
110 | |||
storeRichAttributes | |
0.00% |
0 / 34 |
|
0.00% |
0 / 1 |
132 | |||
dumpRichAttribs | |
0.00% |
0 / 35 |
|
0.00% |
0 / 1 |
110 |
1 | <?php |
2 | declare( strict_types = 1 ); |
3 | |
4 | namespace Wikimedia\Parsoid\Utils; |
5 | |
6 | use Composer\Semver\Semver; |
7 | use InvalidArgumentException; |
8 | use stdClass; |
9 | use TypeError; |
10 | use UnexpectedValueException; |
11 | use Wikimedia\Assert\Assert; |
12 | use Wikimedia\Assert\UnreachableException; |
13 | use Wikimedia\JsonCodec\Hint; |
14 | use Wikimedia\Parsoid\Core\DomPageBundle; |
15 | use Wikimedia\Parsoid\DOM\Document; |
16 | use Wikimedia\Parsoid\DOM\DocumentFragment; |
17 | use Wikimedia\Parsoid\DOM\Element; |
18 | use Wikimedia\Parsoid\DOM\Node; |
19 | use Wikimedia\Parsoid\NodeData\DataBag; |
20 | use Wikimedia\Parsoid\NodeData\DataMw; |
21 | use Wikimedia\Parsoid\NodeData\DataMwAttrib; |
22 | use Wikimedia\Parsoid\NodeData\DataMwI18n; |
23 | use Wikimedia\Parsoid\NodeData\DataParsoid; |
24 | use Wikimedia\Parsoid\NodeData\DataParsoidDiff; |
25 | use Wikimedia\Parsoid\NodeData\I18nInfo; |
26 | use Wikimedia\Parsoid\NodeData\NodeData; |
27 | use Wikimedia\Parsoid\NodeData\TempData; |
28 | |
29 | /** |
30 | * These helpers pertain to HTML and data attributes of a node. |
31 | */ |
32 | class DOMDataUtils { |
33 | public const DATA_OBJECT_ATTR_NAME = 'data-object-id'; |
34 | |
35 | /** The internal property prefix used for rich attribute data. */ |
36 | private const RICH_ATTR_DATA_PREFIX = 'rich-data-'; |
37 | |
38 | /** The internal property prefix used for rich attribute type hints. */ |
39 | private const RICH_ATTR_HINT_PREFIX = 'rich-hint-'; |
40 | |
41 | /** |
42 | * Return the dynamic "bag" property of a Document. |
43 | * @param Document $doc |
44 | * @return DataBag |
45 | */ |
46 | public static function getBag( Document $doc ): DataBag { |
47 | // This is a dynamic property; it is not declared. |
48 | // All references go through here so we can suppress phan's complaint. |
49 | // @phan-suppress-next-line PhanUndeclaredProperty |
50 | return $doc->bag; |
51 | } |
52 | |
53 | /** |
54 | * Return the JsonCodec used for rich attributes in a Document. |
55 | * @param Node $node |
56 | * @return DOMDataCodec |
57 | */ |
58 | public static function getCodec( Node $node ): DOMDataCodec { |
59 | // Owner document is set for all nodes except Document itself. |
60 | $doc = $node->ownerDocument ?? $node; |
61 | // This is a dynamic property; it is not declared. |
62 | // All references go through here so we can suppress phan's complaint. |
63 | // @phan-suppress-next-line PhanUndeclaredProperty |
64 | return $doc->codec; |
65 | } |
66 | |
67 | public static function isPrepared( Document $doc ): bool { |
68 | // `bag` is a deliberate dynamic property; see DOMDataUtils::getBag() |
69 | // @phan-suppress-next-line PhanUndeclaredProperty dynamic property |
70 | return isset( $doc->bag ); |
71 | } |
72 | |
73 | public static function isPreparedAndLoaded( Document $doc ): bool { |
74 | return self::isPrepared( $doc ) && self::getBag( $doc )->loaded; |
75 | } |
76 | |
77 | public static function prepareDoc( Document $doc ): void { |
78 | // `bag` is a deliberate dynamic property; see DOMDataUtils::getBag() |
79 | // @phan-suppress-next-line PhanUndeclaredProperty dynamic property |
80 | $doc->bag = new DataBag(); |
81 | // `codec` is a deliberate dynamic property; see DOMDataUtils::getCodec() |
82 | // @phan-suppress-next-line PhanUndeclaredProperty dynamic property |
83 | $doc->codec = new DOMDataCodec( $doc, [] ); |
84 | |
85 | // Cache the head and body. |
86 | DOMCompat::getHead( $doc ); |
87 | DOMCompat::getBody( $doc ); |
88 | } |
89 | |
90 | /** |
91 | * @param Document $topLevelDoc |
92 | * @param Document $childDoc |
93 | */ |
94 | public static function prepareChildDoc( Document $topLevelDoc, Document $childDoc ) { |
95 | // @phan-suppress-next-line PhanUndeclaredProperty dynamic property |
96 | Assert::invariant( $topLevelDoc->bag instanceof DataBag, 'doc bag not set' ); |
97 | // @phan-suppress-next-line PhanUndeclaredProperty dynamic property |
98 | $childDoc->bag = $topLevelDoc->bag; |
99 | // @phan-suppress-next-line PhanUndeclaredProperty dynamic property |
100 | $childDoc->codec = $topLevelDoc->codec; |
101 | } |
102 | |
103 | /** |
104 | * Stash $obj in $doc and return an id for later retrieval |
105 | * @param Document $doc |
106 | * @param NodeData $obj |
107 | * @return int |
108 | */ |
109 | public static function stashObjectInDoc( Document $doc, NodeData $obj ): int { |
110 | return self::getBag( $doc )->stashObject( $obj ); |
111 | } |
112 | |
113 | public static function dedupeNodeData( Node $node ): void { |
114 | $seen = []; |
115 | self::dedupeNodeDataVisitor( |
116 | self::getBag( $node->ownerDocument ), |
117 | $seen, |
118 | $node |
119 | ); |
120 | } |
121 | |
122 | private static function dedupeNodeDataVisitor( |
123 | DataBag $bag, array &$seen, Node $node |
124 | ) { |
125 | if ( $node instanceof Element && $node->hasAttribute( self::DATA_OBJECT_ATTR_NAME ) ) { |
126 | $id = (int)DOMCompat::getAttribute( $node, self::DATA_OBJECT_ATTR_NAME ); |
127 | if ( $seen[$id] ?? false ) { |
128 | // dedupe! |
129 | $nd = $bag->getObject( $id ); |
130 | $node->removeAttribute( self::DATA_OBJECT_ATTR_NAME ); |
131 | self::setNodeData( $node, $nd->cloneNodeData() ); |
132 | } |
133 | $seen[$id] = true; |
134 | } |
135 | foreach ( $node->childNodes as $child ) { |
136 | self::dedupeNodeDataVisitor( $bag, $seen, $child ); |
137 | } |
138 | } |
139 | |
140 | /** |
141 | * Does this node have any attributes? |
142 | * @param Element $node |
143 | * @return bool |
144 | */ |
145 | public static function noAttrs( Element $node ): bool { |
146 | // The 'xmlns' attribute is "invisible" T235295 |
147 | if ( $node->hasAttribute( 'xmlns' ) ) { |
148 | return false; |
149 | } |
150 | $numAttrs = count( $node->attributes ); |
151 | return $numAttrs === 0 || |
152 | ( $numAttrs === 1 && $node->hasAttribute( self::DATA_OBJECT_ATTR_NAME ) ); |
153 | } |
154 | |
155 | /** |
156 | * Get data object from a node. |
157 | * |
158 | * @param Element $node node |
159 | * @param ?DomPageBundle $pb Optional source for node data |
160 | * @return NodeData |
161 | */ |
162 | public static function getNodeData( Element $node, ?DomPageBundle $pb = null ): NodeData { |
163 | $nodeId = DOMCompat::getAttribute( $node, self::DATA_OBJECT_ATTR_NAME ); |
164 | if ( $nodeId === null ) { |
165 | // Initialized on first request |
166 | $nodeData = new NodeData; |
167 | self::setNodeData( $node, $nodeData ); |
168 | $id = DOMCompat::getAttribute( $node, 'id' ); |
169 | if ( $id !== null && $pb !== null ) { |
170 | // See if there is data-parsoid or data-mw in the page bundle |
171 | $codec = self::getCodec( $node ); |
172 | $hints = self::getCodecHints(); |
173 | if ( isset( $pb->parsoid['ids'][$id] ) ) { |
174 | $dp = $codec->newFromJsonArray( |
175 | $pb->parsoid['ids'][$id], |
176 | $hints['data-parsoid'] |
177 | ); |
178 | $nodeData->parsoid = $dp; |
179 | } |
180 | if ( isset( $pb->mw['ids'][$id] ) ) { |
181 | $dmw = $codec->newFromJsonArray( |
182 | $pb->mw['ids'][$id], |
183 | $hints['data-mw'] |
184 | ); |
185 | $nodeData->mw = $dmw; |
186 | } |
187 | } |
188 | return $nodeData; |
189 | } |
190 | |
191 | $nodeData = self::getBag( $node->ownerDocument )->getObject( (int)$nodeId ); |
192 | Assert::invariant( $nodeData !== null, 'Bogus nodeId given!' ); |
193 | if ( isset( $nodeData->storedId ) ) { |
194 | throw new UnreachableException( |
195 | 'Trying to fetch node data without loading!' . |
196 | // If this node's data-object id is different from storedId, |
197 | // it will indicate that the data-parsoid object was shared |
198 | // between nodes without getting cloned. Useful for debugging. |
199 | 'Node id: ' . $nodeId . ' ' . |
200 | 'Stored data: ' . PHPUtils::jsonEncode( $nodeData ) |
201 | ); |
202 | } |
203 | return $nodeData; |
204 | } |
205 | |
206 | /** |
207 | * Set node data. |
208 | * |
209 | * @param Element $node node |
210 | * @param NodeData $data data |
211 | */ |
212 | public static function setNodeData( Element $node, NodeData $data ): void { |
213 | $nodeId = self::stashObjectInDoc( $node->ownerDocument, $data ); |
214 | $node->setAttribute( self::DATA_OBJECT_ATTR_NAME, (string)$nodeId ); |
215 | } |
216 | |
217 | /** |
218 | * Get data parsoid info from a node. |
219 | * |
220 | * @param Element $node node |
221 | * @return DataParsoid |
222 | */ |
223 | public static function getDataParsoid( Element $node ): DataParsoid { |
224 | $data = self::getNodeData( $node ); |
225 | $data->parsoid ??= new DataParsoid; |
226 | return $data->parsoid; |
227 | } |
228 | |
229 | /** |
230 | * Set data parsoid info on a node. |
231 | * |
232 | * @param Element $node node |
233 | * @param DataParsoid $dp data-parsoid |
234 | */ |
235 | public static function setDataParsoid( Element $node, DataParsoid $dp ): void { |
236 | $data = self::getNodeData( $node ); |
237 | $data->parsoid = $dp; |
238 | } |
239 | |
240 | /** |
241 | * Returns the i18n information of a node. This is in private access because it shouldn't |
242 | * typically be used directly; instead getDataNodeI18n and getDataAttrI18n should be used. |
243 | * @param Element $node |
244 | * @return DataMwI18n|null |
245 | */ |
246 | private static function getDataMwI18n( Element $node ): ?DataMwI18n { |
247 | // No default value; returns null if not present. |
248 | return self::getAttributeObject( $node, 'data-mw-i18n', DataMwI18n::hint() ); |
249 | } |
250 | |
251 | /** |
252 | * Returns the i18n information of a node, setting it to a default |
253 | * value if it is missing. This should not typically be used |
254 | * directly; instead setDataNodeI18n and setDataAttrI18n should be |
255 | * used. |
256 | * |
257 | * @param Element $node |
258 | * @return DataMwI18n $i18n |
259 | */ |
260 | private static function getDataMwI18nDefault( Element $node ): DataMwI18n { |
261 | return self::getAttributeObjectDefault( $node, 'data-mw-i18n', DataMwI18n::hint() ); |
262 | } |
263 | |
264 | /** |
265 | * Retrieves internationalization (i18n) information of a node (typically for localization) |
266 | * @param Element $node |
267 | * @return ?I18nInfo |
268 | */ |
269 | public static function getDataNodeI18n( Element $node ): ?I18nInfo { |
270 | $i18n = self::getDataMwI18n( $node ); |
271 | if ( $i18n === null ) { |
272 | return null; |
273 | } |
274 | return $i18n->getSpanInfo(); |
275 | } |
276 | |
277 | /** |
278 | * Sets internationalization (i18n) information of a node, used for later localization |
279 | * @param Element $node |
280 | * @param I18nInfo $info |
281 | * @return void |
282 | */ |
283 | public static function setDataNodeI18n( Element $node, I18nInfo $info ) { |
284 | $i18n = self::getDataMwI18nDefault( $node ); |
285 | $i18n->setSpanInfo( $info ); |
286 | } |
287 | |
288 | /** |
289 | * Retrieves internationalization (i18n) information of an attribute value (typically for |
290 | * localization) |
291 | * @param Element $node |
292 | * @param string $name |
293 | * @return ?I18nInfo |
294 | */ |
295 | public static function getDataAttrI18n( Element $node, string $name ): ?I18nInfo { |
296 | $i18n = self::getDataMwI18n( $node ); |
297 | if ( $i18n === null ) { |
298 | return null; |
299 | } |
300 | return $i18n->getAttributeInfo( $name ); |
301 | } |
302 | |
303 | /** |
304 | * Sets internationalization (i18n) information of a attribute value, used for later |
305 | * localization |
306 | * @param Element $node |
307 | * @param string $name |
308 | * @param I18nInfo $info |
309 | * @return void |
310 | */ |
311 | public static function setDataAttrI18n( Element $node, string $name, I18nInfo $info ) { |
312 | $i18n = self::getDataMwI18nDefault( $node ); |
313 | $i18n->setAttributeInfo( $name, $info ); |
314 | } |
315 | |
316 | /** |
317 | * @param Element $node |
318 | * @return array |
319 | */ |
320 | public static function getDataAttrI18nNames( Element $node ): array { |
321 | $i18n = self::getDataMwI18n( $node ); |
322 | if ( $i18n === null ) { |
323 | // We won't set a default value for this property |
324 | return []; |
325 | } |
326 | return $i18n->getAttributeNames(); |
327 | } |
328 | |
329 | /** |
330 | * Get data diff info from a node. |
331 | * |
332 | * @param Element $node node |
333 | * @return ?DataParsoidDiff |
334 | */ |
335 | public static function getDataParsoidDiff( Element $node ): ?DataParsoidDiff { |
336 | // No default value; returns null if not present. |
337 | return self::getAttributeObject( $node, 'data-parsoid-diff', DataParsoidDiff::hint() ); |
338 | } |
339 | |
340 | /** |
341 | * Get data diff info from a node, setting a default value if not present. |
342 | * |
343 | * @param Element $node node |
344 | * @return DataParsoidDiff |
345 | */ |
346 | public static function getDataParsoidDiffDefault( Element $node ): DataParsoidDiff { |
347 | return self::getAttributeObjectDefault( $node, 'data-parsoid-diff', DataParsoidDiff::hint() ); |
348 | } |
349 | |
350 | /** |
351 | * Set data diff info on a node. |
352 | * |
353 | * @param Element $node node |
354 | * @param ?DataParsoidDiff $diffObj data-parsoid-diff object |
355 | */ |
356 | public static function setDataParsoidDiff( Element $node, ?DataParsoidDiff $diffObj ): void { |
357 | if ( $diffObj !== null ) { |
358 | self::setAttributeObject( $node, 'data-parsoid-diff', $diffObj, DataParsoidDiff::hint() ); |
359 | } else { |
360 | self::removeAttributeObject( $node, 'data-parsoid-diff' ); |
361 | } |
362 | } |
363 | |
364 | /** |
365 | * Get data meta wiki info from a node. |
366 | * |
367 | * @param Element $node node |
368 | * @return DataMw |
369 | */ |
370 | public static function getDataMw( Element $node ): DataMw { |
371 | $data = self::getNodeData( $node ); |
372 | $data->mw ??= new DataMw; |
373 | return $data->mw; |
374 | } |
375 | |
376 | /** |
377 | * Set data meta wiki info from a node. |
378 | * |
379 | * @param Element $node node |
380 | * @param ?DataMw $dmw data-mw |
381 | */ |
382 | public static function setDataMw( Element $node, ?DataMw $dmw ): void { |
383 | $data = self::getNodeData( $node ); |
384 | $data->mw = $dmw; |
385 | } |
386 | |
387 | /** |
388 | * Get an object from a JSON-encoded XML attribute on a node. |
389 | * |
390 | * @param Element $node node |
391 | * @param string $name name |
392 | * @param mixed $defaultVal |
393 | * @return mixed |
394 | */ |
395 | public static function getJSONAttribute( Element $node, string $name, $defaultVal ) { |
396 | $attVal = DOMCompat::getAttribute( $node, $name ); |
397 | if ( $attVal === null ) { |
398 | return $defaultVal; |
399 | } |
400 | $decoded = PHPUtils::jsonDecode( $attVal, false ); |
401 | if ( $decoded !== null ) { |
402 | return $decoded; |
403 | } else { |
404 | error_log( 'ERROR: Could not decode attribute-val ' . $attVal . |
405 | ' for ' . $name . ' on node ' . DOMCompat::nodeName( $node ) ); |
406 | return $defaultVal; |
407 | } |
408 | } |
409 | |
410 | /** |
411 | * Set a attribute on a node with a JSON-encoded object. |
412 | * |
413 | * @param Element $node node |
414 | * @param string $name Name of the attribute. |
415 | * @param mixed $obj value of the attribute to |
416 | */ |
417 | public static function setJSONAttribute( Element $node, string $name, $obj ): void { |
418 | $val = $obj === [] ? '{}' : PHPUtils::jsonEncode( $obj ); |
419 | $node->setAttribute( $name, $val ); |
420 | } |
421 | |
422 | // Shadow attributes should probably be unified with rich attributes |
423 | // at some point. [CSA 2024-10-15] |
424 | |
425 | /** |
426 | * Set shadow info on a node; similar to the method on tokens. |
427 | * Records a key = value pair in data-parsoid['a'] property. |
428 | * |
429 | * This is effectively a call of 'setShadowInfoIfModified' except |
430 | * there is no original value, so by definition, $val is modified. |
431 | * |
432 | * @param Element $node node |
433 | * @param string $name Name of the attribute. |
434 | * @param mixed $val val |
435 | */ |
436 | public static function setShadowInfo( Element $node, string $name, $val ): void { |
437 | $dp = self::getDataParsoid( $node ); |
438 | $dp->a ??= []; |
439 | $dp->sa ??= []; |
440 | $dp->a[$name] = $val; |
441 | } |
442 | |
443 | /** |
444 | * Set shadow info on a node; similar to the method on tokens. |
445 | * |
446 | * If the new value ($val) for the key ($name) is different from the |
447 | * original value ($origVal): |
448 | * - the new value is recorded in data-parsoid->a and |
449 | * - the original value is recorded in data-parsoid->sa |
450 | * |
451 | * @param Element $node node |
452 | * @param string $name Name of the attribute. |
453 | * @param mixed $val val |
454 | * @param mixed $origVal original value (null is a valid value) |
455 | * @param bool $skipOrig |
456 | */ |
457 | public static function setShadowInfoIfModified( |
458 | Element $node, string $name, $val, $origVal, bool $skipOrig = false |
459 | ): void { |
460 | if ( !$skipOrig && ( $val === $origVal || $origVal === null ) ) { |
461 | return; |
462 | } |
463 | $dp = self::getDataParsoid( $node ); |
464 | $dp->a ??= []; |
465 | $dp->sa ??= []; |
466 | // FIXME: This is a hack to not overwrite already shadowed info. |
467 | // We should either fix the call site that depends on this |
468 | // behaviour to do an explicit check, or double down on this |
469 | // by porting it to the token method as well. |
470 | if ( !$skipOrig && !array_key_exists( $name, $dp->a ) ) { |
471 | $dp->sa[$name] = $origVal; |
472 | } |
473 | $dp->a[$name] = $val; |
474 | } |
475 | |
476 | /** |
477 | * Set an attribute and shadow info to a node. |
478 | * Similar to the method on tokens |
479 | * |
480 | * @param Element $node node |
481 | * @param string $name Name of the attribute. |
482 | * @param mixed $val value |
483 | * @param mixed $origVal original value |
484 | * @param bool $skipOrig |
485 | */ |
486 | public static function addNormalizedAttribute( |
487 | Element $node, string $name, $val, $origVal, bool $skipOrig = false |
488 | ): void { |
489 | if ( $name === 'id' ) { |
490 | DOMCompat::setIdAttribute( $node, $val ); |
491 | } else { |
492 | $node->setAttribute( $name, $val ); |
493 | } |
494 | self::setShadowInfoIfModified( $node, $name, $val, $origVal, $skipOrig ); |
495 | } |
496 | |
497 | /** |
498 | * Removes the `data-*` attribute from a node, and migrates the data to the |
499 | * given DomPageBundle. Generates a unique id with the following format: |
500 | * ``` |
501 | * mw<base64-encoded counter> |
502 | * ``` |
503 | * but attempts to keep user defined ids. |
504 | * |
505 | * TODO: Note that $data is effective a partial PageBundle containing |
506 | * only the 'parsoid' and 'mw' properties. |
507 | * |
508 | * @param DomPageBundle $pb |
509 | * @param Element $node node |
510 | * @param stdClass $data data |
511 | * @param array $idIndex Index of used id attributes in the DOM |
512 | */ |
513 | public static function storeInPageBundle( |
514 | DomPageBundle $pb, Element $node, stdClass $data, array $idIndex |
515 | ): void { |
516 | $hints = self::getCodecHints(); |
517 | $uid = DOMCompat::getAttribute( $node, 'id' ); |
518 | $codec = self::getCodec( $node ); |
519 | $docDp = &$pb->parsoid; |
520 | $origId = $uid; |
521 | if ( $uid !== null && array_key_exists( $uid, $docDp['ids'] ) ) { |
522 | // Forcibly reset the ID if there's a conflict |
523 | $uid = null; |
524 | } |
525 | if ( $uid === '' ) { |
526 | // Forcibly reset the ID if it is invalid |
527 | $uid = null; |
528 | } |
529 | if ( $uid === null ) { |
530 | do { |
531 | $docDp['counter'] += 1; |
532 | // PORT-FIXME: NOTE that we aren't updating the idIndex here because |
533 | // we are generating unique ids that will not conflict. In any case, |
534 | // the idIndex is a workaround for the PHP DOM's issues and we might |
535 | // switch out of this in the future anyway. |
536 | $uid = 'mw' . PHPUtils::counterToBase64( $docDp['counter'] ); |
537 | } while ( isset( $idIndex[$uid] ) ); |
538 | self::addNormalizedAttribute( $node, 'id', $uid, $origId ); |
539 | } |
540 | // Convert from DataParsoid/DataMw objects to associative array |
541 | $docDp['ids'][$uid] = $codec->toJsonArray( $data->parsoid, $hints['data-parsoid'] ); |
542 | if ( isset( $data->mw ) ) { |
543 | $pb->mw['ids'][$uid] = $codec->toJsonArray( $data->mw, $hints['data-mw'] ); |
544 | } |
545 | } |
546 | |
547 | /** |
548 | * Helper function to create static Hint objects for JsonCodec. |
549 | * @return array<Hint> |
550 | */ |
551 | public static function getCodecHints(): array { |
552 | static $hints = null; |
553 | if ( $hints === null ) { |
554 | $hints = [ |
555 | 'data-parsoid' => Hint::build( DataParsoid::class, Hint::ALLOW_OBJECT ), |
556 | 'data-mw' => Hint::build( DataMw::class, Hint::ALLOW_OBJECT ), |
557 | ]; |
558 | } |
559 | return $hints; |
560 | } |
561 | |
562 | /** |
563 | * Walk DOM from node downward calling loadDataAttribs |
564 | * |
565 | * @param Node $node node |
566 | * @param array $options options |
567 | */ |
568 | public static function visitAndLoadDataAttribs( Node $node, array $options = [] ): void { |
569 | $doc = $node->ownerDocument ?? $node; |
570 | Assert::invariant( self::isPrepared( $doc ), "document should be prepared" ); |
571 | if ( $node === DOMCompat::getBody( $doc ) ) { |
572 | Assert::invariant( !self::getBag( $doc )->loaded, "redundant load" ); |
573 | } |
574 | // If the 'markNew' flag is passed, it needs to be recorded in the |
575 | // Document codec's options, so that we can use this flag when |
576 | // loading embedded document fragments. |
577 | self::getCodec( $node )->setOptions( $options ); |
578 | DOMUtils::visitDOM( $node, [ self::class, 'loadDataAttribs' ], $options ); |
579 | } |
580 | |
581 | /** |
582 | * These are intended be used on a document after post-processing, so that |
583 | * the underlying .dataobject is transparently applied (in the store case) |
584 | * and reloaded (in the load case), rather than worrying about keeping |
585 | * the attributes up-to-date throughout that phase. For the most part, |
586 | * using this.ppTo* should be sufficient and using these directly should be |
587 | * avoided. |
588 | * |
589 | * @param Node $node node |
590 | * @param array $options options |
591 | */ |
592 | public static function loadDataAttribs( Node $node, array $options ): void { |
593 | if ( !( $node instanceof Element ) ) { |
594 | return; |
595 | } |
596 | $nodeData = self::getNodeData( $node, $options['loadFromPageBundle'] ?? null ); |
597 | $codec = self::getCodec( $node ); |
598 | $dataParsoidAttr = DOMCompat::getAttribute( $node, 'data-parsoid' ); |
599 | if ( $dataParsoidAttr === null ) { |
600 | // data-parsoid might have come from page bundle |
601 | $newDP = ( $nodeData->parsoid === null ); |
602 | $dp = self::getDataParsoid( $node ); |
603 | } else { |
604 | $newDP = false; |
605 | $dp = $codec->newFromJsonString( |
606 | $dataParsoidAttr, self::getCodecHints()['data-parsoid'] |
607 | ); |
608 | } |
609 | if ( !empty( $options['markNew'] ) ) { |
610 | $dp->setTempFlag( TempData::IS_NEW, $newDP ); |
611 | } |
612 | self::setDataParsoid( $node, $dp ); |
613 | $node->removeAttribute( 'data-parsoid' ); |
614 | |
615 | $dataMwAttr = DOMCompat::getAttribute( $node, 'data-mw' ); |
616 | // note that data-mw might already be present in node data from |
617 | // page bundle, but inline attribute takes precedence |
618 | if ( $dataMwAttr !== null ) { |
619 | try { |
620 | $dmw = $codec->newFromJsonString( |
621 | $dataMwAttr, self::getCodecHints()['data-mw'] |
622 | ); |
623 | } catch ( TypeError $e ) { |
624 | // improve debuggability |
625 | throw new UnexpectedValueException( "Unable to decode JsonString [$dataMwAttr]", 0, $e ); |
626 | } |
627 | self::setDataMw( $node, $dmw ); |
628 | $node->removeAttribute( 'data-mw' ); |
629 | } |
630 | |
631 | // We don't load rich attributes here: that will be done lazily as |
632 | // getAttributeObject()/etc methods are called because we don't |
633 | // know the true types of the rich values yet. In the future |
634 | // we might have a schema or self-labelling of values which would |
635 | // allow us to load rich attributes here as well. |
636 | } |
637 | |
638 | /** |
639 | * Builds an index of id attributes seen in the DOM |
640 | * @param Node $node |
641 | * @return array |
642 | */ |
643 | public static function usedIdIndex( Node $node ): array { |
644 | $index = []; |
645 | DOMUtils::visitDOM( DOMCompat::getBody( $node->ownerDocument ), |
646 | static function ( Node $n, ?array $options = null ) use ( &$index ) { |
647 | if ( $n instanceof Element ) { |
648 | $id = DOMCompat::getAttribute( $n, 'id' ); |
649 | if ( $id !== null ) { |
650 | $index[$id] = true; |
651 | } |
652 | } |
653 | }, |
654 | [] |
655 | ); |
656 | return $index; |
657 | } |
658 | |
659 | /** |
660 | * Walk DOM from node downward calling storeDataAttribs |
661 | * |
662 | * @param Node $node node |
663 | * @param array $options options |
664 | */ |
665 | public static function visitAndStoreDataAttribs( Node $node, array $options = [] ): void { |
666 | Assert::invariant( self::getBag( $node->ownerDocument ?? $node )->loaded, |
667 | "store without load" ); |
668 | // PORT-FIXME: storeDataAttribs calls storeInPageBundle which calls getElementById. |
669 | // PHP's `getElementById` implementation is broken, and we work around that by |
670 | // using Zest which uses XPath. So, getElementById call can be O(n) and calling it |
671 | // on on every element of the DOM via vistDOM here makes it O(n^2) instead of O(n). |
672 | // So, we work around that by building an index and avoiding getElementById entirely |
673 | // in storeInPageBundle. |
674 | if ( !empty( $options['storeInPageBundle'] ) ) { |
675 | $options['idIndex'] = self::usedIdIndex( $node ); |
676 | } |
677 | // Set the "storage options" and save the "loading options" |
678 | $codec = self::getCodec( $node ); |
679 | $oldOptions = $codec->setOptions( $options ); |
680 | |
681 | DOMUtils::visitDOM( $node, [ self::class, 'storeDataAttribs' ], $options ); |
682 | |
683 | // Restore the "loading options" |
684 | $codec->setOptions( $oldOptions ); |
685 | } |
686 | |
687 | /** |
688 | * Copy data attributes from the bag to either JSON-encoded attributes on |
689 | * each node, or the page bundle, erasing the data-object-id attributes. |
690 | * |
691 | * @param Node $node node |
692 | * @param ?array $options options |
693 | * - discardDataParsoid: Discard DataParsoid objects instead of storing them |
694 | * - keepTmp: Preserve DataParsoid::$tmp |
695 | * - storeInPageBundle: If set to a DomPageBundle, data will be stored |
696 | * in the given page bundle instead of data-parsoid and data-mw. |
697 | * - outputContentVersion: Version of output we're storing. The page bundle |
698 | * didn't have data-mw before 999.x |
699 | * - idIndex: Array of used ID attributes |
700 | */ |
701 | public static function storeDataAttribs( Node $node, ?array $options = null ): void { |
702 | $hints = self::getCodecHints(); |
703 | $options ??= []; |
704 | if ( !( $node instanceof Element ) ) { |
705 | return; |
706 | } |
707 | |
708 | // Store rich attributes. Note that, at present, rich attributes may |
709 | // be serialized into the data-mw attributes which are serialized in |
710 | // the pagebundle; thus we need to serialize all the "attributes |
711 | // with special html semantics" (which will get added to data-mw) |
712 | // *before* we handle the other attributes and the page bundle. |
713 | self::storeRichAttributes( $node, [ 'onlySpecial' => true ] + $options ); |
714 | |
715 | Assert::invariant( empty( $options['discardDataParsoid'] ) || empty( $options['keepTmp'] ), |
716 | 'Conflicting options: discardDataParsoid and keepTmp are both enabled.' ); |
717 | $codec = self::getCodec( $node ); |
718 | $dp = self::getDataParsoid( $node ); |
719 | $discardDataParsoid = !empty( $options['discardDataParsoid'] ); |
720 | if ( $dp->getTempFlag( TempData::IS_NEW ) && !$dp->isModified() ) { |
721 | // This hack ensures that a loadDataAttribs + storeDataAttribs pair |
722 | // don't dirty the node by introducing an empty data-parsoid attribute |
723 | // where one didn't exist before. |
724 | // |
725 | // Ideally, we'll find a better solution for this edge case later. |
726 | $discardDataParsoid = true; |
727 | } |
728 | $data = null; |
729 | if ( !$discardDataParsoid ) { |
730 | // FIXME: $dp->toJsonArray drops tmp so it's discarded regardless |
731 | // of this flag |
732 | if ( empty( $options['keepTmp'] ) ) { |
733 | // @phan-suppress-next-line PhanTypeObjectUnsetDeclaredProperty |
734 | unset( $dp->tmp ); |
735 | } |
736 | |
737 | if ( !empty( $options['storeInPageBundle'] ) ) { |
738 | $data ??= new stdClass; |
739 | $data->parsoid = $dp; |
740 | } else { |
741 | $node->setAttribute( |
742 | 'data-parsoid', |
743 | PHPUtils::jsonEncode( |
744 | $codec->toJsonArray( $dp, $hints['data-parsoid'] ) |
745 | ) |
746 | ); |
747 | } |
748 | } |
749 | |
750 | // Special handling for data-mw. This should eventually go away |
751 | // and be replaced with the standard "rich attribute" handling: |
752 | // (a) now that DataMw is a class type, we should never actually |
753 | // have "invalid" data mw objects in practice; |
754 | // (b) eventually we can remove support for output content version |
755 | // older than 999.x. |
756 | |
757 | // Strip empty data-mw attributes |
758 | $dmw = self::getDataMw( $node ); |
759 | if ( !$dmw->isEmpty() ) { |
760 | if ( |
761 | !empty( $options['storeInPageBundle'] ) && |
762 | // The pagebundle didn't have data-mw before 999.x |
763 | Semver::satisfies( $options['outputContentVersion'] ?? '0.0.0', '^999.0.0' ) |
764 | ) { |
765 | $data ??= new stdClass; |
766 | $data->mw = $dmw; |
767 | } else { |
768 | $node->setAttribute( |
769 | 'data-mw', |
770 | PHPUtils::jsonEncode( |
771 | $codec->toJsonArray( $dmw, $hints['data-mw'] ) |
772 | ) |
773 | ); |
774 | } |
775 | } |
776 | |
777 | // Serialize the rest of the rich attributes |
778 | // (This will eventually include data-mw.) |
779 | self::storeRichAttributes( $node, $options ); |
780 | |
781 | // Store pagebundle |
782 | if ( $data !== null ) { |
783 | self::storeInPageBundle( $options['storeInPageBundle'], $node, $data, $options['idIndex'] ); |
784 | } |
785 | |
786 | // Indicate that this node's data has been stored so that if we try |
787 | // to access it after the fact we're aware and remove the attribute |
788 | // since it's no longer needed. |
789 | $nd = self::getNodeData( $node ); |
790 | $id = DOMCompat::getAttribute( $node, self::DATA_OBJECT_ATTR_NAME ); |
791 | $nd->storedId = $id !== null ? intval( $id ) : null; |
792 | $node->removeAttribute( self::DATA_OBJECT_ATTR_NAME ); |
793 | } |
794 | |
795 | /** |
796 | * Clones a node and its data bag |
797 | * @param Element $elt |
798 | * @param bool $deep |
799 | * @return Element |
800 | */ |
801 | public static function cloneNode( Element $elt, bool $deep ): Element { |
802 | $clone = $elt->cloneNode( $deep ); |
803 | '@phan-var Element $clone'; // @var Element $clone |
804 | // We do not need to worry about $deep because a shallow clone does not have child nodes, |
805 | // so it's always cloning data on the cloned tree (which may be empty). |
806 | self::fixClonedData( $clone ); |
807 | return $clone; |
808 | } |
809 | |
810 | /** |
811 | * Clones a DocumentFragment and its associated data bags |
812 | */ |
813 | public static function cloneDocumentFragment( DocumentFragment $df ): DocumentFragment { |
814 | $clone = $df->cloneNode( true ); |
815 | '@phan-var DocumentFragment $clone'; // @var DocumentFragment $clone |
816 | foreach ( $clone->childNodes as $child ) { |
817 | if ( $child instanceof Element ) { |
818 | self::fixClonedData( $child ); |
819 | } |
820 | } |
821 | return $clone; |
822 | } |
823 | |
824 | /** |
825 | * Recursively fixes cloned data from $elt: to avoid conflicts of element IDs, we clone the |
826 | * data and set it in the node with a new element ID (which setNodeData does). |
827 | * @param Element $elt |
828 | */ |
829 | private static function fixClonedData( Element $elt ): void { |
830 | if ( $elt->hasAttribute( self::DATA_OBJECT_ATTR_NAME ) ) { |
831 | self::setNodeData( $elt, clone self::getNodeData( $elt ) ); |
832 | } |
833 | foreach ( $elt->childNodes as $child ) { |
834 | if ( $child instanceof Element ) { |
835 | self::fixClonedData( $child ); |
836 | } |
837 | } |
838 | } |
839 | |
840 | // This is a generic (and somewhat optimistic) interface for |
841 | // complex-valued attributes in a DOM tree. The object and DOM |
842 | // values are "live"; that is, they are passed by-reference and |
843 | // mutations to the object and DOM persist in the document. |
844 | // These values are only "frozen" into a standards-compliant |
845 | // HTML5 attribute representation when the document is serialized. |
846 | // (A corresponding 'parse' stage needs to occur on a new document |
847 | // to "thaw out" the HTML5 attribute representations.) |
848 | |
849 | // Note that although we are expanding the possible attribute *values* |
850 | // we are still deliberately keeping attribute *names* restricted. |
851 | // This is a deliberate design choice. Dynamically-generated |
852 | // attribute names are best handled by the "key value pair" |
853 | // fragment datatype, which is one of the fragment types from which |
854 | // the output document can be composed -- but that composition |
855 | // mechanism and the way the fragment composition is reflected in |
856 | // the DOM is out-of-scope for this API. This just provides a |
857 | // richer way to embed complex information of that sort into a |
858 | // DOM document. |
859 | |
860 | // An important design decision here was not to embed type information |
861 | // for attributes into the representation, which is done to avoid |
862 | // HTML bloat. This leads directly to a "lazy load" implementation, |
863 | // as we can't actually load an attribute value until we know what |
864 | // its class type is, and that's only provided when the call to |
865 | // ::getAttributeObject() is made. In order to implement an "eager |
866 | // load" implementation, we would need a schema for the document |
867 | // which maps every named attribute to an appropriate type. This |
868 | // is possible if eager loading is desired in the future, or because |
869 | // you like the added structural documentation provided by a schema. |
870 | |
871 | // Certain attributes have semantics given by HTML. For example, |
872 | // the `class` and `alt` attributes shouldn't be serialized as a |
873 | // JSON blob, even if you want to store a rich value. For these |
874 | // "HTML attributes with special semantics" (everything not |
875 | // starting with data-* at the moment) we tolerate a bit of bloat |
876 | // and store a flattened string representation of the rich value |
877 | // in the direct HTML attribute, and store the serialized rich |
878 | // value elsewhere. This value is used to provide the appropriate |
879 | // HTML semantics (ie, the browser will apply CSS styling to the |
880 | // flattened `class`, use the flattened `href` to navigate) but |
881 | // should not be used by clients /of the MediaWiki DOM spec/ |
882 | // (including Parsoid), which should ignore the flattened value |
883 | // and consistently use the rich value in order to avoid |
884 | // losing/overwriting data. |
885 | |
886 | // The JSON representation of a rich valued attribute can be |
887 | // customized using the mechanisms provided by the wikimedia/json-codec |
888 | // library; in particular you will want to use the "implicit typing" |
889 | // mechanism provided by the library to avoid bloating the output |
890 | // with explicit references to the PHP implementation classes. |
891 | |
892 | // See |
893 | // https://www.mediawiki.org/wiki/Parsoid/MediaWiki_DOM_spec/Rich_Attributes |
894 | // for a more detailed discussion of this design. The present |
895 | // implementation corresponds to "proposal 1a", the first step in |
896 | // the full proposal. |
897 | |
898 | /** |
899 | * Determine whether the given attribute name has "special" HTML |
900 | * semantics. For these attributes, a "stringified" flattened |
901 | * version of the attribute is stored in the attribute, for |
902 | * semantic compatibility with browsers etc, and the "rich" form |
903 | * of the attribute is stored in a separate attribute. |
904 | * |
905 | * Although in theory we could minimize this by looking at the |
906 | * names of attributes explicitly reserved for each tag name in |
907 | * the HTML spec, at this time we're going to be conservative and |
908 | * assume every attribute has "special" semantics that we should |
909 | * preserve except for those attributes whose names begin with |
910 | * `data-*`. |
911 | * |
912 | * In the future we might tweak the set of attributes with special |
913 | * semantics in order to reduce unnecessary bloat (ie storing |
914 | * flattened versions of attributes where the flattened value will |
915 | * never be used) and/or to include flattened values for certain |
916 | * data-* attributes (for example, if a gadget were to rely on a |
917 | * flattened value in `data-time`). |
918 | * |
919 | * @param string $tagName The tag name of the Element containing the |
920 | * attribute |
921 | * @param string $attrName The name of the attribute |
922 | * @return bool True if the named attribute has special HTML semantics |
923 | */ |
924 | private static function isHtmlAttributeWithSpecialSemantics( string $tagName, string $attrName ): bool { |
925 | return !(bool)preg_match( '/^data-/i', $attrName ); |
926 | } |
927 | |
928 | /** |
929 | * Return the value of a rich attribute as a live (by-reference) object. |
930 | * This also serves as an assertion that there are not conflicting types. |
931 | * |
932 | * @phan-template T |
933 | * @param Element $node The node on which the attribute is to be found. |
934 | * @param string $name The name of the attribute. |
935 | * @param class-string<T>|Hint<T> $classHint |
936 | * @return ?T The attribute value, or null if not present. |
937 | */ |
938 | public static function getAttributeObject( |
939 | Element $node, string $name, $classHint |
940 | ): ?object { |
941 | self::loadRichAttributes( $node, $name ); // lazy load |
942 | if ( !$node->hasAttribute( self::DATA_OBJECT_ATTR_NAME ) ) { |
943 | // Don't create an empty node data object if we don't need to. |
944 | return null; |
945 | } |
946 | $nodeData = self::getNodeData( $node ); |
947 | $propName = self::RICH_ATTR_DATA_PREFIX . $name; |
948 | $value = $nodeData->$propName ?? null; |
949 | // We lazily decode rich values, because we need to know the $classHint |
950 | // before we decode. Undecoded values are wrapped with an array so |
951 | // we can tell whether the value has been decoded already or not. |
952 | if ( is_array( $value ) ) { |
953 | // This value should be decoded |
954 | $codec = self::getCodec( $node ); |
955 | $value = $codec->newFromJsonArray( $value[0], $classHint ); |
956 | if ( is_array( $value ) ) { |
957 | // JsonCodec allows class hints to indicate that the value |
958 | // is an array of some object type, but for our purposes |
959 | // the result must always be an object so that it is live. |
960 | $value = (object)$value; |
961 | } |
962 | // To signal that it's been decoded already we need $value |
963 | // not to be an array |
964 | Assert::invariant( |
965 | !is_array( $value ), "rich attribute can't be array" |
966 | ); |
967 | $nodeData->$propName = $value; |
968 | $hintName = self::RICH_ATTR_HINT_PREFIX . $name; |
969 | $nodeData->$hintName = $classHint; |
970 | } |
971 | return $value; |
972 | } |
973 | |
974 | /** |
975 | * Return the value of a rich attribute as a live (by-reference) |
976 | * object. This also serves as an assertion that there are not |
977 | * conflicting types. If the value is not present, a default value |
978 | * will be created using `$codec->defaultValue()` falling back to |
979 | * `$className::defaultValue()` and stored as the value of the |
980 | * attribute. |
981 | * |
982 | * @note The $className should have be JsonCodecable (either directly |
983 | * or via a custom JsonClassCodec). |
984 | * |
985 | * @phan-template T |
986 | * @param Element $node The node on which the attribute is to be found. |
987 | * @param string $name The name of the attribute. |
988 | * @param class-string<T>|Hint<T> $classHint |
989 | * @return ?T The attribute value, or null if not present. |
990 | */ |
991 | public static function getAttributeObjectDefault( |
992 | Element $node, string $name, $classHint |
993 | ): ?object { |
994 | $value = self::getAttributeObject( $node, $name, $classHint ); |
995 | if ( $value === null ) { |
996 | $className = $classHint; |
997 | while ( $className instanceof Hint ) { |
998 | Assert::invariant( |
999 | $className->modifier !== Hint::LIST && |
1000 | $className->modifier !== Hint::STDCLASS, |
1001 | "Can't create default value for list or object" |
1002 | ); |
1003 | $className = $className->parent; |
1004 | } |
1005 | '@phan-var string $className'; |
1006 | $codec = self::getCodec( $node ); |
1007 | $value = $codec->defaultValue( $className ); |
1008 | $value ??= new $className; |
1009 | self::setAttributeObject( $node, $name, $value, $classHint ); |
1010 | } |
1011 | return $value; |
1012 | } |
1013 | |
1014 | /** |
1015 | * Set the value of a rich attribute, overwriting any previous |
1016 | * value. Generally mutating the result returned by the |
1017 | * `::getAttribute*Default()` methods should be done instead of |
1018 | * using this method, since the objects returned are live. |
1019 | * |
1020 | * @note For attribute names where |
1021 | * `::isHtmlAttributeWithSpecialSemantics()` returns `true` you |
1022 | * can customize the "flattened" representation used for HTML |
1023 | * semantics via `$codec->flatten()` which falls back to |
1024 | * `$className::flatten()`. |
1025 | * |
1026 | * @phan-template T |
1027 | * @param Element $node The node on which the attribute is to be found. |
1028 | * @param string $name The name of the attribute. |
1029 | * @phan-suppress-next-line PhanTypeMismatchDeclaredParam |
1030 | * @param T $value The new (object) value for the attribute |
1031 | * @param class-string<T>|Hint<T>|null $classHint Optional serialization hint |
1032 | * @phpcs:ignore MediaWiki.Commenting.FunctionAnnotations.UnrecognizedAnnotation |
1033 | * @phan-suppress-next-next-line PhanTemplateTypeNotUsedInFunctionReturn |
1034 | */ |
1035 | public static function setAttributeObject( |
1036 | Element $node, string $name, object $value, $classHint = null |
1037 | ): void { |
1038 | // Remove attribute from DOM; will be rewritten from node data during |
1039 | // serialization. |
1040 | self::removeAttributeObject( $node, $name ); |
1041 | $nodeData = self::getNodeData( $node ); |
1042 | $propName = self::RICH_ATTR_DATA_PREFIX . $name; |
1043 | $nodeData->$propName = $value; |
1044 | if ( $classHint === null && is_a( $value, RichCodecable::class ) ) { |
1045 | $className = get_class( $value ); |
1046 | $classHint = $className::hint(); |
1047 | } |
1048 | $hintName = self::RICH_ATTR_HINT_PREFIX . $name; |
1049 | $nodeData->$hintName = $classHint; |
1050 | } |
1051 | |
1052 | /** |
1053 | * Remove a rich attribute. |
1054 | * |
1055 | * @param Element $node The node on which the attribute is to be found. |
1056 | * @param string $name The name of the attribute. |
1057 | */ |
1058 | public static function removeAttributeObject( |
1059 | Element $node, string $name |
1060 | ): void { |
1061 | $node->removeAttribute( $name ); |
1062 | self::removeFromExpandedAttrs( $node, $name ); |
1063 | if ( $node->hasAttribute( self::DATA_OBJECT_ATTR_NAME ) ) { |
1064 | $nodeData = self::getNodeData( $node ); |
1065 | $propName = self::RICH_ATTR_DATA_PREFIX . $name; |
1066 | unset( $nodeData->$propName ); |
1067 | $hintName = self::RICH_ATTR_HINT_PREFIX . $name; |
1068 | unset( $nodeData->$hintName ); |
1069 | } |
1070 | } |
1071 | |
1072 | /** |
1073 | * Helper function for code clarity: test whether there is |
1074 | * an existing data-mw value on a node which has already had |
1075 | * loadDataAttribs called on it. |
1076 | */ |
1077 | private static function nodeHasDataMw( Element $node ): bool { |
1078 | // If data-mw were present, loadDataAttribs would have created |
1079 | // the DATA_OBJECT_ATTR_NAME attribute for associated NodeData |
1080 | if ( !$node->hasAttribute( self::DATA_OBJECT_ATTR_NAME ) ) { |
1081 | return false; |
1082 | } |
1083 | $data = self::getNodeData( $node ); |
1084 | return $data->mw !== null; |
1085 | } |
1086 | |
1087 | /** |
1088 | * Helper function to remove any entries from data-mw.attribs which match |
1089 | * this attribute name. They will be rewritten during rich attribute |
1090 | * serialization if necessary. |
1091 | * @param Element $node |
1092 | * @param string $name |
1093 | */ |
1094 | private static function removeFromExpandedAttrs( |
1095 | Element $node, string $name |
1096 | ): void { |
1097 | // Don't create a new data-mw yet if we don't need one. |
1098 | if ( !self::nodehasDataMw( $node ) ) { |
1099 | return; |
1100 | } |
1101 | if ( !self::isHtmlAttributeWithSpecialSemantics( $node->tagName, $name ) ) { |
1102 | return; |
1103 | } |
1104 | // If there was a data-mw.attribs for this attribute, remove it |
1105 | // (it will be rewritten during serialization later) |
1106 | $dataMw = self::getDataMw( $node ); |
1107 | $dataMw->attribs = array_values( array_filter( |
1108 | $dataMw->attribs ?? [], |
1109 | static function ( $a ) use ( $name ) { |
1110 | if ( !( $a instanceof DataMwAttrib ) ) { |
1111 | return true; |
1112 | } |
1113 | $key = $a->key; |
1114 | if ( $key === $name ) { |
1115 | return false; // Remove this entry |
1116 | } |
1117 | if ( is_array( $key ) && ( $key['txt'] ?? null ) == $name ) { |
1118 | return false; // Remove this entry |
1119 | } |
1120 | return true; |
1121 | } |
1122 | ) ); |
1123 | if ( count( $dataMw->attribs ) === 0 ) { |
1124 | unset( $dataMw->attribs ); |
1125 | DOMUtils::removeTypeOf( $node, 'mw:ExpandedAttrs' ); |
1126 | } |
1127 | } |
1128 | |
1129 | /** |
1130 | * Return the value of a rich attribute as a live `DocumentFragment`. |
1131 | * This also serves as an assertion that there are not conflicting types. |
1132 | * |
1133 | * @note A string-valued attribute will be returned as a DocumentFragment |
1134 | * with a single Text node. This supports the efficient serialization |
1135 | * of 'simple' DocumentFragments as simple strings. |
1136 | * |
1137 | * @param Element $node The node on which the attribute is to be found. |
1138 | * @param string $name The name of the attribute. |
1139 | * @return ?DocumentFragment The attribute value, or null if not present. |
1140 | */ |
1141 | public static function getAttributeDom( |
1142 | Element $node, string $name |
1143 | ): ?DocumentFragment { |
1144 | // As it turns out, the implementation for a DocumentFragment is |
1145 | // the same; all the implementation differences are in the codec |
1146 | return self::getAttributeObject( |
1147 | $node, $name, DocumentFragment::class |
1148 | ); |
1149 | } |
1150 | |
1151 | /** |
1152 | * Return the value of a rich attribute as a `DocumentFragment`, |
1153 | * creating a new document fragment and setting the attribute if the |
1154 | * attribute was not previously present. |
1155 | * |
1156 | * @param Element $node The node on which the attribute is to be found. |
1157 | * @param string $name The name of the attribute. |
1158 | * @return DocumentFragment The attribute value. |
1159 | */ |
1160 | public static function getAttributeDomDefault( |
1161 | Element $node, string $name |
1162 | ): DocumentFragment { |
1163 | $value = self::getAttributeDom( $node, $name ); |
1164 | if ( $value === null ) { |
1165 | $value = $node->ownerDocument->createDocumentFragment(); |
1166 | self::setAttributeDOM( $node, $name, $value ); |
1167 | } |
1168 | return $value; |
1169 | } |
1170 | |
1171 | /** |
1172 | * Set the value of a rich attribute, overwriting any previous |
1173 | * value. Generally mutating the result returned by the |
1174 | * `::getAttribute*Default()` methods should be done instead of |
1175 | * using this method, since the objects returned are live. |
1176 | * |
1177 | * @param Element $node The node on which the attribute is to be found. |
1178 | * @param string $name The name of the attribute. |
1179 | * @param DocumentFragment $value |
1180 | */ |
1181 | public static function setAttributeDom( |
1182 | Element $node, string $name, DocumentFragment $value |
1183 | ): void { |
1184 | // Remove attribute from DOM; will be rewritten from node data during |
1185 | // serialization. |
1186 | self::removeAttributeDom( $node, $name ); |
1187 | $nodeData = self::getNodeData( $node ); |
1188 | $propName = self::RICH_ATTR_DATA_PREFIX . $name; |
1189 | $nodeData->$propName = $value; |
1190 | $hintName = self::RICH_ATTR_HINT_PREFIX . $name; |
1191 | $nodeData->$hintName = DocumentFragment::class; |
1192 | } |
1193 | |
1194 | /** |
1195 | * Remove a rich attribute. |
1196 | * |
1197 | * @param Element $node The node on which the attribute is to be found. |
1198 | * @param string $name The name of the attribute. |
1199 | */ |
1200 | public static function removeAttributeDom( |
1201 | Element $node, string $name |
1202 | ): void { |
1203 | // Remove attribute from DOM; will be rewritten from node data during |
1204 | // serialization. |
1205 | $node->removeAttribute( $name ); |
1206 | self::removeFromExpandedAttrs( $node, $name ); |
1207 | if ( $node->hasAttribute( self::DATA_OBJECT_ATTR_NAME ) ) { |
1208 | $nodeData = self::getNodeData( $node ); |
1209 | $propName = self::RICH_ATTR_DATA_PREFIX . $name; |
1210 | unset( $nodeData->$propName ); |
1211 | $hintName = self::RICH_ATTR_HINT_PREFIX . $name; |
1212 | unset( $nodeData->$hintName ); |
1213 | } |
1214 | } |
1215 | |
1216 | // Serialization/deserialization support for rich attributes. |
1217 | |
1218 | // There are many possible serializations which could be used. |
1219 | // For the moment we've chosen the simplest possible one, which |
1220 | // embeds big JSON blobs in attribute values. For "attributes |
1221 | // with special HTML semantics" the JSON blobs are stored in |
1222 | // data-mw.attribs and the straight HTML attribute value is a |
1223 | // flattened form of the true value. |
1224 | |
1225 | /** |
1226 | * Internal function to lazy-load rich attribute data from the HTML |
1227 | * DOM representation. |
1228 | * @param Element $node The node possibly containing the rich attribute |
1229 | * @param string $name The attribute name we are going to load values for |
1230 | */ |
1231 | private static function loadRichAttributes( |
1232 | Element $node, string $name |
1233 | ): void { |
1234 | // Because we don't have a complete schema for the document which |
1235 | // identifies which attributes are 'rich' and which are not, we |
1236 | // lazily-load attributes one-by-one once we know their names and types |
1237 | // instead of trying to preload them in bulk. |
1238 | |
1239 | // *However* in order to avoid O(N^2) manipulation of the |
1240 | // data-mw.attribs list, we do move all the values from data-mw.attribs |
1241 | // into NodeData, even those not matching our given name. We can't |
1242 | // decode those yet: they will be decoded once getAttributeObject() |
1243 | // is called on them to provide the proper type hint (or else they |
1244 | // will eventually be reserialized in their undecoded form). |
1245 | |
1246 | $flatValue = DOMCompat::getAttribute( $node, $name ); |
1247 | if ( $flatValue === null ) { |
1248 | // Use the presence of the attribute in the DOM to indicate |
1249 | // whether this attribute has been loaded; this avoids (for |
1250 | // example) traversing AttributeExpander entries in |
1251 | // data-mw.attribs multiple times looking for the name of a |
1252 | // rich attribute. If the attribute is not in the DOM either |
1253 | // there is no attribute of this name or it has already been |
1254 | // loaded. |
1255 | return; |
1256 | } |
1257 | |
1258 | if ( self::isHtmlAttributeWithSpecialSemantics( $node->tagName, $name ) ) { |
1259 | // Look aside at data-mw for attributes with special semantics |
1260 | if ( !self::nodeHasDataMw( $node ) ) { |
1261 | // No data-mw, so no rich value for this attribute |
1262 | return; |
1263 | } |
1264 | $dataMw = self::getDataMw( $node ); |
1265 | // Load all attribute values from $dataMw->attribs to avoid O(N^2) |
1266 | // loading of list |
1267 | if ( $dataMw->attribs ?? false ) { |
1268 | $unused = []; |
1269 | foreach ( $dataMw->attribs as $a ) { |
1270 | if ( $a instanceof DataMwAttrib ) { |
1271 | $key = $a->key; |
1272 | $value = $a->value; |
1273 | // Attribute expander may use array values for |
1274 | // key, since it supports rich key values. |
1275 | // Ignore any entries created this way, since |
1276 | // we can't preserve their values: they will be |
1277 | // added to $unused and replaced. |
1278 | if ( is_string( $key ) || is_numeric( $key ) ) { |
1279 | $propName = self::RICH_ATTR_DATA_PREFIX . $key; |
1280 | $nodeData = self::getNodeData( $node ); |
1281 | // wrap $value with an array to indicate that |
1282 | // is it not yet decoded. Preserve the flattened |
1283 | // value as well in case we round-trip without |
1284 | // modifying this value. |
1285 | $nodeData->$propName = [ $value, $flatValue ]; |
1286 | // Signal that the value has been moved to NodeData |
1287 | // (this will also short cut this iteration over |
1288 | // data-mw.attribs in future calls) |
1289 | $node->removeAttribute( $key ); |
1290 | continue; |
1291 | } |
1292 | } |
1293 | $unused[] = $a; |
1294 | } |
1295 | if ( count( $unused ) === 0 ) { |
1296 | unset( $dataMw->attribs ); |
1297 | } else { |
1298 | $dataMw->attribs = $unused; |
1299 | } |
1300 | } |
1301 | return; |
1302 | } |
1303 | // The attribute does not have "special HTML semantics" |
1304 | $decoded = json_decode( $flatValue, false ); |
1305 | // $decoded is the 'non-string' form of the value; we can't finish |
1306 | // deserializing it into an object until we know the appropriate type |
1307 | // hint. |
1308 | self::removeAttributeObject( $node, $name ); |
1309 | $nodeData = self::getNodeData( $node ); |
1310 | $propName = self::RICH_ATTR_DATA_PREFIX . $name; |
1311 | // Mark this as undecoded by wrapping it as an array, |
1312 | // since decoded values will always be objects. |
1313 | // (Attribute values without "special HTML semantics" do not |
1314 | // have flattened versions, so 2nd element to this array isn't |
1315 | // needed.) |
1316 | $nodeData->$propName = [ $decoded ]; |
1317 | } |
1318 | |
1319 | /** |
1320 | * Internal function to encode rich attribute data into an HTML |
1321 | * DOM representation. |
1322 | * @param Element $node The node possibly containing the rich attribute |
1323 | * @param array $options The options provided to ::storeDataAttribs() |
1324 | */ |
1325 | private static function storeRichAttributes( Element $node, array $options ): void { |
1326 | if ( !$node->hasAttribute( self::DATA_OBJECT_ATTR_NAME ) ) { |
1327 | return; // No rich attributes here |
1328 | } |
1329 | $tagName = $node->tagName; |
1330 | $nodeData = self::getNodeData( $node ); |
1331 | $codec = self::getCodec( $node ); |
1332 | foreach ( get_object_vars( $nodeData ) as $k => $v ) { |
1333 | // Look for dynamic properties with names w/ the proper prefix |
1334 | if ( str_starts_with( $k, self::RICH_ATTR_DATA_PREFIX ) ) { |
1335 | $attrName = substr( $k, strlen( self::RICH_ATTR_DATA_PREFIX ) ); |
1336 | if ( |
1337 | ( $options['onlySpecial'] ?? false ) && |
1338 | !self::isHtmlAttributeWithSpecialSemantics( $tagName, $attrName ) |
1339 | ) { |
1340 | continue; // skip this for now |
1341 | } |
1342 | $flat = null; |
1343 | if ( is_array( $v ) ) { |
1344 | // If $v is an array, it was never decoded. |
1345 | $json = $v[0]; |
1346 | $flat = $v[1] ?? null; |
1347 | } else { |
1348 | $hintName = self::RICH_ATTR_HINT_PREFIX . $attrName; |
1349 | $classHint = $nodeData->$hintName ?? null; |
1350 | if ( is_a( $v, RichCodecable::class ) ) { |
1351 | $classHint ??= $v::hint(); |
1352 | } |
1353 | $classHint ??= get_class( $v ); |
1354 | try { |
1355 | // NOTE: call 'flatten()' before 'toJsonArray()' since |
1356 | // the latter may have side effects on $v. |
1357 | $flat = $codec->flatten( $v ); |
1358 | $json = $codec->toJsonArray( $v, $classHint ); |
1359 | } catch ( InvalidArgumentException $e ) { |
1360 | // For better debuggability, include the attribute name |
1361 | throw new InvalidArgumentException( "$attrName: " . $e->getMessage() ); |
1362 | } |
1363 | } |
1364 | if ( !self::isHtmlAttributeWithSpecialSemantics( $tagName, $attrName ) ) { |
1365 | $encoded = PHPUtils::jsonEncode( $json ); |
1366 | $node->setAttribute( $attrName, $encoded ); |
1367 | } else { |
1368 | // For compatibility, store the rich value in data-mw.attrs |
1369 | // and store a flattened version in the $attrName. |
1370 | if ( $flat !== null ) { |
1371 | $node->setAttribute( $attrName, $flat ); |
1372 | } else { |
1373 | $node->removeAttribute( $attrName ); |
1374 | } |
1375 | $dataMw = self::getDataMw( $node ); |
1376 | $dataMw->attribs[] = new DataMwAttrib( $attrName, $json ); |
1377 | DOMUtils::addTypeOf( $node, 'mw:ExpandedAttrs' ); |
1378 | } |
1379 | unset( $nodeData->$k ); |
1380 | } |
1381 | } |
1382 | } |
1383 | |
1384 | /** |
1385 | * Modify the attribute array, replacing data-object-id with JSON |
1386 | * encoded data. This is just a debugging hack, not to be confused with |
1387 | * DOMDataUtils::storeDataAttribs(), and does not store flattened |
1388 | * versions of attributes. |
1389 | * |
1390 | * @param Element $node |
1391 | * @param array &$attrs |
1392 | * @param bool $keepTmp |
1393 | * @param bool $storeDiffMark |
1394 | */ |
1395 | public static function dumpRichAttribs( Element $node, array &$attrs, bool $keepTmp, bool $storeDiffMark ): void { |
1396 | if ( !$node->hasAttribute( self::DATA_OBJECT_ATTR_NAME ) ) { |
1397 | return; // No rich attributes here |
1398 | } |
1399 | $nodeData = self::getNodeData( $node ); |
1400 | $codec = self::getCodec( $node ); |
1401 | // Reset to a default set of codec options |
1402 | // (in particular, make sure 'useFragmentBank' is not set) |
1403 | $oldOptions = $codec->setOptions( [] ); |
1404 | foreach ( get_object_vars( $nodeData ) as $k => $v ) { |
1405 | // Look for dynamic properties with names w/ the proper prefix |
1406 | if ( str_starts_with( $k, self::RICH_ATTR_DATA_PREFIX ) ) { |
1407 | $attrName = substr( $k, strlen( self::RICH_ATTR_DATA_PREFIX ) ); |
1408 | if ( is_array( $v ) ) { |
1409 | // If $v is an array, it was never decoded. |
1410 | $json = $v[0]; |
1411 | } else { |
1412 | $hintName = self::RICH_ATTR_HINT_PREFIX . $attrName; |
1413 | $classHint = $nodeData->$hintName ?? null; |
1414 | if ( is_a( $v, RichCodecable::class ) ) { |
1415 | $classHint ??= $v::hint(); |
1416 | } |
1417 | $classHint ??= get_class( $v ); |
1418 | $json = $codec->toJsonArray( $v, $classHint ); |
1419 | } |
1420 | $encoded = PHPUtils::jsonEncode( $json ); |
1421 | $attrs[$attrName] = $encoded; |
1422 | } |
1423 | } |
1424 | $dp = $nodeData->parsoid; |
1425 | if ( $dp ) { |
1426 | if ( !$keepTmp ) { |
1427 | $dp = clone $dp; |
1428 | // @phan-suppress-next-line PhanTypeObjectUnsetDeclaredProperty |
1429 | unset( $dp->tmp ); |
1430 | } |
1431 | $attrs['data-parsoid'] = $codec->toJsonString( |
1432 | $dp, self::getCodecHints()['data-parsoid'] |
1433 | ); |
1434 | } |
1435 | $dmw = $nodeData->mw; |
1436 | if ( $dmw ) { |
1437 | $attrs['data-mw'] = $codec->toJsonString( |
1438 | $dmw, self::getCodecHints()['data-mw'] |
1439 | ); |
1440 | } |
1441 | if ( !$storeDiffMark ) { |
1442 | unset( $attrs['data-parsoid-diff'] ); |
1443 | } |
1444 | unset( $attrs[self::DATA_OBJECT_ATTR_NAME] ); |
1445 | // Restore codec options |
1446 | $codec->setOptions( $oldOptions ); |
1447 | } |
1448 | } |