Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
77.25% |
129 / 167 |
|
67.86% |
19 / 28 |
CRAP | |
0.00% |
0 / 1 |
DOMCompat | |
77.25% |
129 / 167 |
|
67.86% |
19 / 28 |
104.84 | |
0.00% |
0 / 1 |
newDocument | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
nodeName | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
getBody | |
87.50% |
7 / 8 |
|
0.00% |
0 / 1 |
5.05 | |||
getHead | |
85.71% |
6 / 7 |
|
0.00% |
0 / 1 |
4.05 | |||
getTitle | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
2 | |||
setTitle | |
100.00% |
7 / 7 |
|
100.00% |
1 / 1 |
4 | |||
getParentElement | |
100.00% |
4 / 4 |
|
100.00% |
1 / 1 |
3 | |||
getElementById | |
100.00% |
8 / 8 |
|
100.00% |
1 / 1 |
1 | |||
setIdAttribute | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
getElementsByTagName | |
0.00% |
0 / 8 |
|
0.00% |
0 / 1 |
2 | |||
getFirstElementChild | |
100.00% |
10 / 10 |
|
100.00% |
1 / 1 |
3 | |||
getLastElementChild | |
100.00% |
10 / 10 |
|
100.00% |
1 / 1 |
3 | |||
querySelector | |
100.00% |
3 / 3 |
|
100.00% |
1 / 1 |
2 | |||
querySelectorAll | |
100.00% |
7 / 7 |
|
100.00% |
1 / 1 |
1 | |||
getPreviousElementSibling | |
100.00% |
10 / 10 |
|
100.00% |
1 / 1 |
3 | |||
getNextElementSibling | |
100.00% |
10 / 10 |
|
100.00% |
1 / 1 |
3 | |||
append | |
100.00% |
11 / 11 |
|
100.00% |
1 / 1 |
3 | |||
remove | |
100.00% |
8 / 8 |
|
100.00% |
1 / 1 |
2 | |||
getInnerHTML | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
setInnerHTML | |
100.00% |
17 / 17 |
|
100.00% |
1 / 1 |
1 | |||
getOuterHTML | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
getAttribute | |
0.00% |
0 / 3 |
|
0.00% |
0 / 1 |
6 | |||
getClassList | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
stripAndCollapseASCIIWhitespace | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
stripEmptyTextNodes | |
0.00% |
0 / 9 |
|
0.00% |
0 / 1 |
30 | |||
normalize | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
replaceChildren | |
0.00% |
0 / 13 |
|
0.00% |
0 / 1 |
20 | |||
or | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 |
1 | <?php |
2 | declare( strict_types = 1 ); |
3 | |
4 | namespace Wikimedia\Parsoid\Utils; |
5 | |
6 | use Wikimedia\Assert\Assert; |
7 | use Wikimedia\Parsoid\DOM\CharacterData; |
8 | use Wikimedia\Parsoid\DOM\Document; |
9 | use Wikimedia\Parsoid\DOM\DocumentFragment; |
10 | use Wikimedia\Parsoid\DOM\Element; |
11 | use Wikimedia\Parsoid\DOM\Node; |
12 | use Wikimedia\Parsoid\DOM\Text; |
13 | use Wikimedia\Parsoid\Utils\DOMCompat\TokenList; |
14 | use Wikimedia\Parsoid\Wt2Html\XMLSerializer; |
15 | use Wikimedia\RemexHtml\DOM\DOMBuilder; |
16 | use Wikimedia\RemexHtml\HTMLData; |
17 | use Wikimedia\RemexHtml\Tokenizer\Tokenizer; |
18 | use Wikimedia\RemexHtml\TreeBuilder\Dispatcher; |
19 | use Wikimedia\RemexHtml\TreeBuilder\TreeBuilder; |
20 | use Wikimedia\Zest\Zest; |
21 | |
22 | /** |
23 | * Helper class that provides missing DOM level 3 methods for the PHP DOM classes. |
24 | * For a DOM method $node->foo( $bar) the equivalent helper is DOMCompat::foo( $node, $bar ). |
25 | * For a DOM property $node->foo there is a DOMCompat::getFoo( $node ) and |
26 | * DOMCompat::setFoo( $node, $value ). |
27 | * |
28 | * Only implements the methods that are actually used by Parsoid. |
29 | * |
30 | * Because this class may be used by code outside Parsoid it tries to |
31 | * be relatively tolerant of object types: you can call it either with |
32 | * PHP's DOM* types or with a "proper" DOM implementation, and it will |
33 | * attempt to Do The Right Thing regardless. As a result there are |
34 | * generally not parameter type hints for DOM object types, and the |
35 | * return types will be broad enough to accomodate the value a "real" |
36 | * DOM implementation would return, as well as the values our |
37 | * thunk will return. (For instance, we can't create a "real" NodeList |
38 | * in our compatibility thunk.) |
39 | */ |
40 | class DOMCompat { |
41 | |
42 | /** |
43 | * Tab, LF, FF, CR, space |
44 | * @see https://infra.spec.whatwg.org/#ascii-whitespace |
45 | */ |
46 | private const ASCII_WHITESPACE = "\t\r\f\n "; |
47 | |
48 | /** |
49 | * Create a new empty document. |
50 | * This is abstracted because the process is a little different depending |
51 | * on whether we're using Dodo or DOMDocument, and phan gets a little |
52 | * confused by this. |
53 | * @param bool $isHtml |
54 | * @return Document |
55 | */ |
56 | public static function newDocument( bool $isHtml ) { |
57 | // @phan-suppress-next-line PhanParamTooMany,PhanTypeInstantiateInterface |
58 | return new Document( "1.0", "UTF-8" ); |
59 | } |
60 | |
61 | /** |
62 | * Return the lower-case version of the node name (HTML says this should |
63 | * be capitalized). |
64 | * @param Node $node |
65 | * @return string |
66 | */ |
67 | public static function nodeName( Node $node ): string { |
68 | return strtolower( $node->nodeName ); |
69 | } |
70 | |
71 | /** |
72 | * Get document body. |
73 | * Unlike the spec we return it as a native PHP DOM object. |
74 | * @param Document $document |
75 | * @return Element|null |
76 | * @see https://html.spec.whatwg.org/multipage/dom.html#dom-document-body |
77 | */ |
78 | public static function getBody( $document ) { |
79 | // WARNING: this will not be updated if (for some reason) the |
80 | // document body changes. |
81 | if ( $document->body !== null ) { |
82 | return $document->body; |
83 | } |
84 | foreach ( $document->documentElement->childNodes as $element ) { |
85 | /** @var Element $element */ |
86 | $nodeName = self::nodeName( $element ); |
87 | if ( $nodeName === 'body' || $nodeName === 'frameset' ) { |
88 | // Caching! |
89 | $document->body = $element; |
90 | // @phan-suppress-next-line PhanTypeMismatchReturnSuperType |
91 | return $element; |
92 | } |
93 | } |
94 | return null; |
95 | } |
96 | |
97 | /** |
98 | * Get document head. |
99 | * Unlike the spec we return it as a native PHP DOM object. |
100 | * @param Document $document |
101 | * @return Element|null |
102 | * @see https://html.spec.whatwg.org/multipage/dom.html#dom-document-head |
103 | */ |
104 | public static function getHead( $document ) { |
105 | // Use an undeclared dynamic property as a cache. |
106 | // WARNING: this will not be updated if (for some reason) the |
107 | // document head changes. |
108 | if ( isset( $document->head ) ) { |
109 | return $document->head; |
110 | } |
111 | foreach ( $document->documentElement->childNodes as $element ) { |
112 | /** @var Element $element */ |
113 | if ( self::nodeName( $element ) === 'head' ) { |
114 | $document->head = $element; // Caching! |
115 | // @phan-suppress-next-line PhanTypeMismatchReturnSuperType |
116 | return $element; |
117 | } |
118 | } |
119 | return null; |
120 | } |
121 | |
122 | /** |
123 | * Get document title. |
124 | * @param Document $document |
125 | * @return string |
126 | * @see https://html.spec.whatwg.org/multipage/dom.html#document.title |
127 | */ |
128 | public static function getTitle( $document ): string { |
129 | $titleElement = self::querySelector( $document, 'title' ); |
130 | return $titleElement ? self::stripAndCollapseASCIIWhitespace( $titleElement->textContent ) : ''; |
131 | } |
132 | |
133 | /** |
134 | * Set document title. |
135 | * @param Document $document |
136 | * @param string $title |
137 | * @see https://html.spec.whatwg.org/multipage/dom.html#document.title |
138 | */ |
139 | public static function setTitle( $document, string $title ): void { |
140 | $titleElement = self::querySelector( $document, 'title' ); |
141 | if ( !$titleElement ) { |
142 | $headElement = self::getHead( $document ); |
143 | if ( $headElement ) { |
144 | $titleElement = DOMUtils::appendToHead( $document, 'title' ); |
145 | } |
146 | } |
147 | if ( $titleElement ) { |
148 | $titleElement->textContent = $title; |
149 | } |
150 | } |
151 | |
152 | /** |
153 | * Return the parent element, or null if the parent is not an element. |
154 | * @param Node $node |
155 | * @return Element|null |
156 | * @see https://dom.spec.whatwg.org/#dom-node-parentelement |
157 | */ |
158 | public static function getParentElement( $node ) { |
159 | $parent = $node->parentNode; |
160 | if ( $parent && $parent->nodeType === XML_ELEMENT_NODE ) { |
161 | /** @var Element $parent */ |
162 | // @phan-suppress-next-line PhanTypeMismatchReturnSuperType |
163 | return $parent; |
164 | } |
165 | return null; |
166 | } |
167 | |
168 | /** |
169 | * Return the descendant with the specified ID. |
170 | * Workaround for https://bugs.php.net/bug.php?id=77686 and other issues related to |
171 | * inconsistent indexing behavior. |
172 | * XXX: 77686 is fixed in php 8.1.21 |
173 | * @param Document|DocumentFragment $node |
174 | * @param string $id |
175 | * @return Element|null |
176 | * @see https://dom.spec.whatwg.org/#dom-nonelementparentnode-getelementbyid |
177 | */ |
178 | public static function getElementById( $node, string $id ) { |
179 | Assert::parameterType( |
180 | self::or( |
181 | Document::class, DocumentFragment::class, |
182 | // For compatibility with code which might call this from |
183 | // outside Parsoid. |
184 | \DOMDocument::class, \DOMDocumentFragment::class |
185 | ), |
186 | $node, '$node' ); |
187 | // @phan-suppress-next-line PhanTypeMismatchArgument Zest is declared to take DOMDocument\DOMElement |
188 | $elements = Zest::getElementsById( $node, $id ); |
189 | // @phan-suppress-next-line PhanTypeMismatchReturn |
190 | return $elements[0] ?? null; |
191 | } |
192 | |
193 | /** |
194 | * Workaround bug in PHP's Document::getElementById() which doesn't |
195 | * actually index the 'id' attribute unless you use the non-standard |
196 | * `Element::setIdAttribute` method after the attribute is set; |
197 | * see https://www.php.net/manual/en/domdocument.getelementbyid.php |
198 | * for more details. |
199 | * |
200 | * @param Element $element |
201 | * @param string $id The desired value for the `id` attribute on $element. |
202 | * @see https://phabricator.wikimedia.org/T232390 |
203 | */ |
204 | public static function setIdAttribute( $element, string $id ): void { |
205 | $element->setAttribute( 'id', $id ); |
206 | $element->setIdAttribute( 'id', true );// phab:T232390 |
207 | } |
208 | |
209 | /** |
210 | * Return all descendants with the specified tag name. |
211 | * Workaround for PHP's getElementsByTagName being inexplicably slow in some situations |
212 | * and the lack of Element::getElementsByTagName(). |
213 | * @param Document|Element $node |
214 | * @param string $tagName |
215 | * @return (iterable<Element>&\Countable)|array<Element> Either an array or an HTMLCollection object |
216 | * @see https://dom.spec.whatwg.org/#dom-document-getelementsbytagname |
217 | * @see https://dom.spec.whatwg.org/#dom-element-getelementsbytagname |
218 | * @note Note that unlike the spec this method is not guaranteed to return a NodeList |
219 | * (which cannot be freely constructed in PHP), just a traversable containing Elements. |
220 | */ |
221 | public static function getElementsByTagName( $node, string $tagName ): iterable { |
222 | Assert::parameterType( |
223 | self::or( |
224 | Document::class, Element::class, |
225 | // For compatibility with code which might call this from |
226 | // outside Parsoid. |
227 | \DOMDocument::class, \DOMElement::class |
228 | ), |
229 | $node, '$node' ); |
230 | // @phan-suppress-next-line PhanTypeMismatchArgument Zest is declared to take DOMDocument\DOMElement |
231 | $result = Zest::getElementsByTagName( $node, $tagName ); |
232 | '@phan-var array<Element> $result'; // @var array<Element> $result |
233 | return $result; |
234 | } |
235 | |
236 | /** |
237 | * Return the first child of the node that is an Element, or null |
238 | * otherwise. |
239 | * @param Document|DocumentFragment|Element $node |
240 | * @return Element|null |
241 | * @see https://dom.spec.whatwg.org/#dom-parentnode-firstelementchild |
242 | * @note This property was added to PHP in 8.0.0, and won't be needed |
243 | * once our minimum required version >= 8.0.0 |
244 | */ |
245 | public static function getFirstElementChild( $node ) { |
246 | Assert::parameterType( |
247 | self::or( |
248 | Document::class, DocumentFragment::class, Element::class, |
249 | // For compatibility with code which might call this from |
250 | // outside Parsoid. |
251 | \DOMDocument::class, \DOMDocumentFragment::class, \DOMElement::class |
252 | ), |
253 | $node, '$node' ); |
254 | $firstChild = $node->firstChild; |
255 | while ( $firstChild && $firstChild->nodeType !== XML_ELEMENT_NODE ) { |
256 | $firstChild = $firstChild->nextSibling; |
257 | } |
258 | // @phan-suppress-next-line PhanTypeMismatchReturnSuperType |
259 | return $firstChild; |
260 | } |
261 | |
262 | /** |
263 | * Return the last child of the node that is an Element, or null otherwise. |
264 | * @param Document|DocumentFragment|Element $node |
265 | * @return Element|null |
266 | * @see https://dom.spec.whatwg.org/#dom-parentnode-lastelementchild |
267 | * @note This property was added to PHP in 8.0.0, and won't be needed |
268 | * once our minimum required version >= 8.0.0 |
269 | */ |
270 | public static function getLastElementChild( $node ) { |
271 | Assert::parameterType( |
272 | self::or( |
273 | Document::class, DocumentFragment::class, Element::class, |
274 | // For compatibility with code which might call this from |
275 | // outside Parsoid. |
276 | \DOMDocument::class, \DOMDocumentFragment::class, \DOMElement::class |
277 | ), |
278 | $node, '$node' ); |
279 | $lastChild = $node->lastChild; |
280 | while ( $lastChild && $lastChild->nodeType !== XML_ELEMENT_NODE ) { |
281 | $lastChild = $lastChild->previousSibling; |
282 | } |
283 | // @phan-suppress-next-line PhanTypeMismatchReturnSuperType |
284 | return $lastChild; |
285 | } |
286 | |
287 | /** |
288 | * @param Document|DocumentFragment|Element $node |
289 | * @param string $selector |
290 | * @return Element|null |
291 | * @see https://dom.spec.whatwg.org/#dom-parentnode-queryselector |
292 | */ |
293 | public static function querySelector( $node, string $selector ) { |
294 | foreach ( self::querySelectorAll( $node, $selector ) as $el ) { |
295 | return $el; |
296 | } |
297 | return null; |
298 | } |
299 | |
300 | /** |
301 | * @param Document|DocumentFragment|Element $node |
302 | * @param string $selector |
303 | * @return (iterable<Element>&\Countable)|array<Element> Either a NodeList or an array |
304 | * @see https://dom.spec.whatwg.org/#dom-parentnode-queryselectorall |
305 | * @note Note that unlike the spec this method is not guaranteed to return a NodeList |
306 | * (which cannot be freely constructed in PHP), just a traversable containing Elements. |
307 | */ |
308 | public static function querySelectorAll( $node, string $selector ): iterable { |
309 | Assert::parameterType( |
310 | self::or( |
311 | Document::class, DocumentFragment::class, Element::class, |
312 | // For compatibility with code which might call this from |
313 | // outside Parsoid. |
314 | \DOMDocument::class, \DOMDocumentFragment::class, \DOMElement::class |
315 | ), |
316 | $node, '$node' ); |
317 | // @phan-suppress-next-line PhanTypeMismatchArgument DOMNode |
318 | return Zest::find( $selector, $node ); |
319 | } |
320 | |
321 | /** |
322 | * Return the last preceding sibling of the node that is an element, or null otherwise. |
323 | * @param Node $node |
324 | * @return Element|null |
325 | * @see https://dom.spec.whatwg.org/#dom-nondocumenttypechildnode-previouselementsibling |
326 | */ |
327 | public static function getPreviousElementSibling( $node ) { |
328 | Assert::parameterType( |
329 | self::or( |
330 | Element::class, CharacterData::class, |
331 | // For compatibility with code which might call this from |
332 | // outside Parsoid. |
333 | \DOMElement::class, \DOMCharacterData::class |
334 | ), |
335 | $node, '$node' ); |
336 | $previousSibling = $node->previousSibling; |
337 | while ( $previousSibling && $previousSibling->nodeType !== XML_ELEMENT_NODE ) { |
338 | $previousSibling = $previousSibling->previousSibling; |
339 | } |
340 | // @phan-suppress-next-line PhanTypeMismatchReturnSuperType |
341 | return $previousSibling; |
342 | } |
343 | |
344 | /** |
345 | * Return the first following sibling of the node that is an element, or null otherwise. |
346 | * @param Node $node |
347 | * @return Element|null |
348 | * @see https://dom.spec.whatwg.org/#dom-nondocumenttypechildnode-nextelementsibling |
349 | */ |
350 | public static function getNextElementSibling( $node ) { |
351 | Assert::parameterType( |
352 | self::or( |
353 | Element::class, CharacterData::class, |
354 | // For compatibility with code which might call this from |
355 | // outside Parsoid. |
356 | \DOMElement::class, \DOMCharacterData::class |
357 | ), |
358 | $node, '$node' ); |
359 | $nextSibling = $node->nextSibling; |
360 | while ( $nextSibling && $nextSibling->nodeType !== XML_ELEMENT_NODE ) { |
361 | $nextSibling = $nextSibling->nextSibling; |
362 | } |
363 | // @phan-suppress-next-line PhanTypeMismatchReturnSuperType |
364 | return $nextSibling; |
365 | } |
366 | |
367 | /** |
368 | * Append the node to the parent node. |
369 | * @param Document|DocumentFragment|Element $parentNode |
370 | * @param Node|string ...$nodes |
371 | * @note This method was added in PHP 8.0.0 |
372 | */ |
373 | public static function append( $parentNode, ...$nodes ): void { |
374 | Assert::parameterType( |
375 | self::or( |
376 | Document::class, DocumentFragment::class, Element::class, |
377 | // For compatibility with code which might call this from |
378 | // outside Parsoid. |
379 | \DOMDocument::class, \DOMDocumentFragment::class, \DOMElement::class |
380 | ), |
381 | $parentNode, '$parentNode' |
382 | ); |
383 | foreach ( $nodes as $node ) { |
384 | if ( is_string( $node ) ) { |
385 | $node = $parentNode->ownerDocument->createTextNode( $node ); |
386 | } |
387 | $parentNode->appendChild( $node ); |
388 | } |
389 | } |
390 | |
391 | /** |
392 | * Removes the node from the document. |
393 | * @param Element|CharacterData $node |
394 | * @see https://dom.spec.whatwg.org/#dom-childnode-remove |
395 | */ |
396 | public static function remove( $node ): void { |
397 | Assert::parameterType( |
398 | self::or( |
399 | Element::class, CharacterData::class, |
400 | // For compatibility with code which might call this from |
401 | // outside Parsoid. |
402 | \DOMElement::class, \DOMCharacterData::class |
403 | ), |
404 | $node, '$node' ); |
405 | if ( $node->parentNode ) { |
406 | $node->parentNode->removeChild( $node ); |
407 | } |
408 | } |
409 | |
410 | /** |
411 | * Get innerHTML. |
412 | * @see DOMUtils::getFragmentInnerHTML() for the fragment version |
413 | * @param Element $element |
414 | * @return string |
415 | * @see https://w3c.github.io/DOM-Parsing/#dom-innerhtml-innerhtml |
416 | */ |
417 | public static function getInnerHTML( $element ): string { |
418 | return XMLSerializer::serialize( $element, [ 'innerXML' => true ] )['html']; |
419 | } |
420 | |
421 | /** |
422 | * Set innerHTML. |
423 | * @see https://w3c.github.io/DOM-Parsing/#dom-innerhtml-innerhtml |
424 | * @see DOMUtils::setFragmentInnerHTML() for the fragment version |
425 | * @param Element $element |
426 | * @param string $html |
427 | */ |
428 | public static function setInnerHTML( $element, string $html ): void { |
429 | $domBuilder = new class( [ |
430 | 'suppressHtmlNamespace' => true, |
431 | ] ) extends DOMBuilder { |
432 | /** @inheritDoc */ |
433 | protected function createDocument( |
434 | ?string $doctypeName = null, |
435 | ?string $public = null, |
436 | ?string $system = null |
437 | ) { |
438 | // @phan-suppress-next-line PhanTypeMismatchReturn |
439 | return DOMCompat::newDocument( $doctypeName === 'html' ); |
440 | } |
441 | }; |
442 | $treeBuilder = new TreeBuilder( $domBuilder ); |
443 | $dispatcher = new Dispatcher( $treeBuilder ); |
444 | $tokenizer = new Tokenizer( $dispatcher, $html, [ 'ignoreErrors' => true ] ); |
445 | |
446 | $tokenizer->execute( [ |
447 | 'fragmentNamespace' => HTMLData::NS_HTML, |
448 | 'fragmentName' => self::nodeName( $element ), |
449 | ] ); |
450 | |
451 | // Empty the element |
452 | self::replaceChildren( $element ); |
453 | |
454 | $frag = $domBuilder->getFragment(); |
455 | '@phan-var Node $frag'; // @var Node $frag |
456 | DOMUtils::migrateChildrenBetweenDocs( |
457 | $frag, $element |
458 | ); |
459 | } |
460 | |
461 | /** |
462 | * Get outerHTML. |
463 | * @param Element $element |
464 | * @return string |
465 | * @see https://w3c.github.io/DOM-Parsing/#dom-element-outerhtml |
466 | */ |
467 | public static function getOuterHTML( $element ): string { |
468 | return XMLSerializer::serialize( $element, [ 'addDoctype' => false ] )['html']; |
469 | } |
470 | |
471 | /** |
472 | * Return the value of an element attribute. |
473 | * |
474 | * Unlike PHP's version, this is spec-compliant and returns `null` if |
475 | * the attribute is not present, allowing the caller to distinguish |
476 | * between "the attribute exists but has the empty string as its value" |
477 | * and "the attribute does not exist". |
478 | * |
479 | * @param Element $element |
480 | * @param string $attributeName |
481 | * @return ?string The attribute value, or `null` if the attribute does |
482 | * not exist on the element. |
483 | * @see https://dom.spec.whatwg.org/#dom-element-getattribute |
484 | */ |
485 | public static function getAttribute( $element, string $attributeName ): ?string { |
486 | if ( !$element->hasAttribute( $attributeName ) ) { |
487 | return null; |
488 | } |
489 | return $element->getAttribute( $attributeName ); |
490 | } |
491 | |
492 | /** |
493 | * Return the class list of this element. |
494 | * @param Element $node |
495 | * @return TokenList |
496 | * @see https://dom.spec.whatwg.org/#dom-element-classlist |
497 | */ |
498 | public static function getClassList( $node ): TokenList { |
499 | return new TokenList( $node ); |
500 | } |
501 | |
502 | /** |
503 | * @param string $text |
504 | * @return string |
505 | * @see https://infra.spec.whatwg.org/#strip-and-collapse-ascii-whitespace |
506 | */ |
507 | private static function stripAndCollapseASCIIWhitespace( string $text ): string { |
508 | $ws = self::ASCII_WHITESPACE; |
509 | return preg_replace( "/[$ws]+/", ' ', trim( $text, $ws ) ); |
510 | } |
511 | |
512 | /** |
513 | * @param Element|DocumentFragment $e |
514 | */ |
515 | private static function stripEmptyTextNodes( $e ): void { |
516 | $c = $e->firstChild; |
517 | while ( $c ) { |
518 | $next = $c->nextSibling; |
519 | if ( $c instanceof Text ) { |
520 | if ( $c->nodeValue === '' ) { |
521 | $e->removeChild( $c ); |
522 | } |
523 | } elseif ( $c instanceof Element ) { |
524 | self::stripEmptyTextNodes( $c ); |
525 | } |
526 | $c = $next; |
527 | } |
528 | } |
529 | |
530 | /** |
531 | * @param Element|DocumentFragment $elt root of the DOM tree that |
532 | * needs to be normalized |
533 | */ |
534 | public static function normalize( $elt ): void { |
535 | $elt->normalize(); |
536 | |
537 | // Now traverse the tree rooted at $elt and remove any stray empty text nodes |
538 | // Unlike what https://www.w3.org/TR/DOM-Level-2-Core/core.html#ID-normalize says, |
539 | // the PHP DOM's normalization leaves behind up to 1 empty text node. |
540 | // See https://bugs.php.net/bug.php?id=78221 |
541 | self::stripEmptyTextNodes( $elt ); |
542 | } |
543 | |
544 | /** |
545 | * ParentNode.replaceChildren() |
546 | * https://developer.mozilla.org/en-US/docs/Web/API/ParentNode/replaceChildren |
547 | * |
548 | * @param Document|DocumentFragment|Element $parentNode |
549 | * @param string|Node ...$nodes |
550 | */ |
551 | public static function replaceChildren( |
552 | $parentNode, ...$nodes |
553 | ): void { |
554 | Assert::parameterType( |
555 | self::or( |
556 | Document::class, DocumentFragment::class, Element::class, |
557 | // For compatibility with code which might call this from |
558 | // outside Parsoid. |
559 | \DOMDocument::class, \DOMDocumentFragment::class, \DOMElement::class |
560 | ), |
561 | $parentNode, '$parentNode' |
562 | ); |
563 | while ( $parentNode->firstChild ) { |
564 | $parentNode->removeChild( $parentNode->firstChild ); |
565 | } |
566 | foreach ( $nodes as $node ) { |
567 | if ( is_string( $node ) ) { |
568 | $node = $parentNode->ownerDocument->createTextNode( $node ); |
569 | } |
570 | $parentNode->insertBefore( $node, null ); |
571 | } |
572 | } |
573 | |
574 | /** |
575 | * Join class names together in a form suitable for Assert::parameterType. |
576 | * @param class-string ...$args |
577 | * @return string |
578 | */ |
579 | private static function or( ...$args ) { |
580 | return implode( '|', $args ); |
581 | } |
582 | } |