Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
| Total | |
73.97% |
108 / 146 |
|
65.38% |
17 / 26 |
CRAP | |
0.00% |
0 / 1 |
| DOMCompat | |
73.97% |
108 / 146 |
|
65.38% |
17 / 26 |
108.34 | |
0.00% |
0 / 1 |
| newDocument | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
| nodeName | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
| getBody | |
87.50% |
7 / 8 |
|
0.00% |
0 / 1 |
5.05 | |||
| getHead | |
85.71% |
6 / 7 |
|
0.00% |
0 / 1 |
4.05 | |||
| getTitle | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
2 | |||
| setTitle | |
100.00% |
7 / 7 |
|
100.00% |
1 / 1 |
4 | |||
| getParentElement | |
100.00% |
4 / 4 |
|
100.00% |
1 / 1 |
3 | |||
| getElementById | |
100.00% |
8 / 8 |
|
100.00% |
1 / 1 |
1 | |||
| setIdAttribute | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
| getElementsByTagName | |
0.00% |
0 / 8 |
|
0.00% |
0 / 1 |
2 | |||
| getLastElementChild | |
100.00% |
10 / 10 |
|
100.00% |
1 / 1 |
3 | |||
| querySelector | |
100.00% |
3 / 3 |
|
100.00% |
1 / 1 |
2 | |||
| querySelectorAll | |
100.00% |
7 / 7 |
|
100.00% |
1 / 1 |
1 | |||
| getPreviousElementSibling | |
100.00% |
10 / 10 |
|
100.00% |
1 / 1 |
3 | |||
| getNextElementSibling | |
100.00% |
10 / 10 |
|
100.00% |
1 / 1 |
3 | |||
| remove | |
100.00% |
8 / 8 |
|
100.00% |
1 / 1 |
2 | |||
| getInnerHTML | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
| setInnerHTML | |
100.00% |
17 / 17 |
|
100.00% |
1 / 1 |
1 | |||
| getOuterHTML | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
| getAttribute | |
0.00% |
0 / 3 |
|
0.00% |
0 / 1 |
6 | |||
| getClassList | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
| stripAndCollapseASCIIWhitespace | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
| stripEmptyTextNodes | |
0.00% |
0 / 9 |
|
0.00% |
0 / 1 |
30 | |||
| normalize | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
| replaceChildren | |
0.00% |
0 / 13 |
|
0.00% |
0 / 1 |
20 | |||
| or | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
| 1 | <?php |
| 2 | declare( strict_types = 1 ); |
| 3 | |
| 4 | namespace Wikimedia\Parsoid\Utils; |
| 5 | |
| 6 | use Wikimedia\Assert\Assert; |
| 7 | use Wikimedia\Parsoid\DOM\CharacterData; |
| 8 | use Wikimedia\Parsoid\DOM\Document; |
| 9 | use Wikimedia\Parsoid\DOM\DocumentFragment; |
| 10 | use Wikimedia\Parsoid\DOM\Element; |
| 11 | use Wikimedia\Parsoid\DOM\Node; |
| 12 | use Wikimedia\Parsoid\DOM\Text; |
| 13 | use Wikimedia\Parsoid\Utils\DOMCompat\TokenList; |
| 14 | use Wikimedia\Parsoid\Wt2Html\XMLSerializer; |
| 15 | use Wikimedia\RemexHtml\DOM\DOMBuilder; |
| 16 | use Wikimedia\RemexHtml\HTMLData; |
| 17 | use Wikimedia\RemexHtml\Tokenizer\Tokenizer; |
| 18 | use Wikimedia\RemexHtml\TreeBuilder\Dispatcher; |
| 19 | use Wikimedia\RemexHtml\TreeBuilder\TreeBuilder; |
| 20 | use Wikimedia\Zest\Zest; |
| 21 | |
| 22 | /** |
| 23 | * Helper class that provides missing DOM level 3 methods for the PHP DOM classes. |
| 24 | * For a DOM method $node->foo( $bar) the equivalent helper is DOMCompat::foo( $node, $bar ). |
| 25 | * For a DOM property $node->foo there is a DOMCompat::getFoo( $node ) and |
| 26 | * DOMCompat::setFoo( $node, $value ). |
| 27 | * |
| 28 | * Only implements the methods that are actually used by Parsoid. |
| 29 | * |
| 30 | * Because this class may be used by code outside Parsoid it tries to |
| 31 | * be relatively tolerant of object types: you can call it either with |
| 32 | * PHP's DOM* types or with a "proper" DOM implementation, and it will |
| 33 | * attempt to Do The Right Thing regardless. As a result there are |
| 34 | * generally not parameter type hints for DOM object types, and the |
| 35 | * return types will be broad enough to accomodate the value a "real" |
| 36 | * DOM implementation would return, as well as the values our |
| 37 | * thunk will return. (For instance, we can't create a "real" NodeList |
| 38 | * in our compatibility thunk.) |
| 39 | */ |
| 40 | class DOMCompat { |
| 41 | |
| 42 | /** |
| 43 | * Tab, LF, FF, CR, space |
| 44 | * @see https://infra.spec.whatwg.org/#ascii-whitespace |
| 45 | */ |
| 46 | private const ASCII_WHITESPACE = "\t\r\f\n "; |
| 47 | |
| 48 | /** |
| 49 | * Create a new empty document. |
| 50 | * This is abstracted because the process is a little different depending |
| 51 | * on whether we're using Dodo or DOMDocument, and phan gets a little |
| 52 | * confused by this. |
| 53 | * @param bool $isHtml |
| 54 | * @return Document |
| 55 | */ |
| 56 | public static function newDocument( bool $isHtml ) { |
| 57 | // @phan-suppress-next-line PhanParamTooMany,PhanTypeInstantiateInterface |
| 58 | return new Document( "1.0", "UTF-8" ); |
| 59 | } |
| 60 | |
| 61 | /** |
| 62 | * Return the lower-case version of the node name (HTML says this should |
| 63 | * be capitalized). |
| 64 | * @param Node $node |
| 65 | * @return string |
| 66 | */ |
| 67 | public static function nodeName( Node $node ): string { |
| 68 | return strtolower( $node->nodeName ); |
| 69 | } |
| 70 | |
| 71 | /** |
| 72 | * Get document body. |
| 73 | * Unlike the spec we return it as a native PHP DOM object. |
| 74 | * @param Document $document |
| 75 | * @return Element|null |
| 76 | * @see https://html.spec.whatwg.org/multipage/dom.html#dom-document-body |
| 77 | */ |
| 78 | public static function getBody( $document ) { |
| 79 | // WARNING: this will not be updated if (for some reason) the |
| 80 | // document body changes. |
| 81 | if ( $document->body !== null ) { |
| 82 | return $document->body; |
| 83 | } |
| 84 | foreach ( $document->documentElement->childNodes as $element ) { |
| 85 | /** @var Element $element */ |
| 86 | $nodeName = self::nodeName( $element ); |
| 87 | if ( $nodeName === 'body' || $nodeName === 'frameset' ) { |
| 88 | // Caching! |
| 89 | $document->body = $element; |
| 90 | // @phan-suppress-next-line PhanTypeMismatchReturnSuperType |
| 91 | return $element; |
| 92 | } |
| 93 | } |
| 94 | return null; |
| 95 | } |
| 96 | |
| 97 | /** |
| 98 | * Get document head. |
| 99 | * Unlike the spec we return it as a native PHP DOM object. |
| 100 | * @param Document $document |
| 101 | * @return Element|null |
| 102 | * @see https://html.spec.whatwg.org/multipage/dom.html#dom-document-head |
| 103 | */ |
| 104 | public static function getHead( $document ) { |
| 105 | // Use an undeclared dynamic property as a cache. |
| 106 | // WARNING: this will not be updated if (for some reason) the |
| 107 | // document head changes. |
| 108 | if ( isset( $document->head ) ) { |
| 109 | return $document->head; |
| 110 | } |
| 111 | foreach ( $document->documentElement->childNodes as $element ) { |
| 112 | /** @var Element $element */ |
| 113 | if ( self::nodeName( $element ) === 'head' ) { |
| 114 | $document->head = $element; // Caching! |
| 115 | // @phan-suppress-next-line PhanTypeMismatchReturnSuperType |
| 116 | return $element; |
| 117 | } |
| 118 | } |
| 119 | return null; |
| 120 | } |
| 121 | |
| 122 | /** |
| 123 | * Get document title. |
| 124 | * @param Document $document |
| 125 | * @return string |
| 126 | * @see https://html.spec.whatwg.org/multipage/dom.html#document.title |
| 127 | */ |
| 128 | public static function getTitle( $document ): string { |
| 129 | $titleElement = self::querySelector( $document, 'title' ); |
| 130 | return $titleElement ? self::stripAndCollapseASCIIWhitespace( $titleElement->textContent ) : ''; |
| 131 | } |
| 132 | |
| 133 | /** |
| 134 | * Set document title. |
| 135 | * @param Document $document |
| 136 | * @param string $title |
| 137 | * @see https://html.spec.whatwg.org/multipage/dom.html#document.title |
| 138 | */ |
| 139 | public static function setTitle( $document, string $title ): void { |
| 140 | $titleElement = self::querySelector( $document, 'title' ); |
| 141 | if ( !$titleElement ) { |
| 142 | $headElement = self::getHead( $document ); |
| 143 | if ( $headElement ) { |
| 144 | $titleElement = DOMUtils::appendToHead( $document, 'title' ); |
| 145 | } |
| 146 | } |
| 147 | if ( $titleElement ) { |
| 148 | $titleElement->textContent = $title; |
| 149 | } |
| 150 | } |
| 151 | |
| 152 | /** |
| 153 | * Return the parent element, or null if the parent is not an element. |
| 154 | * @param Node $node |
| 155 | * @return Element|null |
| 156 | * @see https://dom.spec.whatwg.org/#dom-node-parentelement |
| 157 | */ |
| 158 | public static function getParentElement( $node ) { |
| 159 | $parent = $node->parentNode; |
| 160 | if ( $parent && $parent->nodeType === XML_ELEMENT_NODE ) { |
| 161 | /** @var Element $parent */ |
| 162 | // @phan-suppress-next-line PhanTypeMismatchReturnSuperType |
| 163 | return $parent; |
| 164 | } |
| 165 | return null; |
| 166 | } |
| 167 | |
| 168 | /** |
| 169 | * Return the descendant with the specified ID. |
| 170 | * Workaround for https://bugs.php.net/bug.php?id=77686 and other issues related to |
| 171 | * inconsistent indexing behavior. |
| 172 | * XXX: 77686 is fixed in php 8.1.21 |
| 173 | * @param Document|DocumentFragment $node |
| 174 | * @param string $id |
| 175 | * @return Element|null |
| 176 | * @see https://dom.spec.whatwg.org/#dom-nonelementparentnode-getelementbyid |
| 177 | */ |
| 178 | public static function getElementById( $node, string $id ) { |
| 179 | Assert::parameterType( |
| 180 | self::or( |
| 181 | Document::class, DocumentFragment::class, |
| 182 | // For compatibility with code which might call this from |
| 183 | // outside Parsoid. |
| 184 | \DOMDocument::class, \DOMDocumentFragment::class |
| 185 | ), |
| 186 | $node, '$node' ); |
| 187 | // @phan-suppress-next-line PhanTypeMismatchArgument Zest is declared to take DOMDocument\DOMElement |
| 188 | $elements = Zest::getElementsById( $node, $id ); |
| 189 | // @phan-suppress-next-line PhanTypeMismatchReturn |
| 190 | return $elements[0] ?? null; |
| 191 | } |
| 192 | |
| 193 | /** |
| 194 | * Workaround bug in PHP's Document::getElementById() which doesn't |
| 195 | * actually index the 'id' attribute unless you use the non-standard |
| 196 | * `Element::setIdAttribute` method after the attribute is set; |
| 197 | * see https://www.php.net/manual/en/domdocument.getelementbyid.php |
| 198 | * for more details. |
| 199 | * |
| 200 | * @param Element $element |
| 201 | * @param string $id The desired value for the `id` attribute on $element. |
| 202 | * @see https://phabricator.wikimedia.org/T232390 |
| 203 | */ |
| 204 | public static function setIdAttribute( $element, string $id ): void { |
| 205 | $element->setAttribute( 'id', $id ); |
| 206 | $element->setIdAttribute( 'id', true );// phab:T232390 |
| 207 | } |
| 208 | |
| 209 | /** |
| 210 | * Return all descendants with the specified tag name. |
| 211 | * Workaround for PHP's getElementsByTagName being inexplicably slow in some situations |
| 212 | * and the lack of Element::getElementsByTagName(). |
| 213 | * @param Document|Element $node |
| 214 | * @param string $tagName |
| 215 | * @return (iterable<Element>&\Countable)|array<Element> Either an array or an HTMLCollection object |
| 216 | * @see https://dom.spec.whatwg.org/#dom-document-getelementsbytagname |
| 217 | * @see https://dom.spec.whatwg.org/#dom-element-getelementsbytagname |
| 218 | * @note Note that unlike the spec this method is not guaranteed to return a NodeList |
| 219 | * (which cannot be freely constructed in PHP), just a traversable containing Elements. |
| 220 | */ |
| 221 | public static function getElementsByTagName( $node, string $tagName ): iterable { |
| 222 | Assert::parameterType( |
| 223 | self::or( |
| 224 | Document::class, Element::class, |
| 225 | // For compatibility with code which might call this from |
| 226 | // outside Parsoid. |
| 227 | \DOMDocument::class, \DOMElement::class |
| 228 | ), |
| 229 | $node, '$node' ); |
| 230 | // @phan-suppress-next-line PhanTypeMismatchArgument Zest is declared to take DOMDocument\DOMElement |
| 231 | $result = Zest::getElementsByTagName( $node, $tagName ); |
| 232 | '@phan-var array<Element> $result'; // @var array<Element> $result |
| 233 | return $result; |
| 234 | } |
| 235 | |
| 236 | /** |
| 237 | * Return the last child of the node that is an Element, or null otherwise. |
| 238 | * @param Document|DocumentFragment|Element $node |
| 239 | * @return Element|null |
| 240 | * @see https://dom.spec.whatwg.org/#dom-parentnode-lastelementchild |
| 241 | */ |
| 242 | public static function getLastElementChild( $node ) { |
| 243 | Assert::parameterType( |
| 244 | self::or( |
| 245 | Document::class, DocumentFragment::class, Element::class, |
| 246 | // For compatibility with code which might call this from |
| 247 | // outside Parsoid. |
| 248 | \DOMDocument::class, \DOMDocumentFragment::class, \DOMElement::class |
| 249 | ), |
| 250 | $node, '$node' ); |
| 251 | $lastChild = $node->lastChild; |
| 252 | while ( $lastChild && $lastChild->nodeType !== XML_ELEMENT_NODE ) { |
| 253 | $lastChild = $lastChild->previousSibling; |
| 254 | } |
| 255 | // @phan-suppress-next-line PhanTypeMismatchReturnSuperType |
| 256 | return $lastChild; |
| 257 | } |
| 258 | |
| 259 | /** |
| 260 | * @param Document|DocumentFragment|Element $node |
| 261 | * @param string $selector |
| 262 | * @return Element|null |
| 263 | * @see https://dom.spec.whatwg.org/#dom-parentnode-queryselector |
| 264 | */ |
| 265 | public static function querySelector( $node, string $selector ) { |
| 266 | foreach ( self::querySelectorAll( $node, $selector ) as $el ) { |
| 267 | return $el; |
| 268 | } |
| 269 | return null; |
| 270 | } |
| 271 | |
| 272 | /** |
| 273 | * @param Document|DocumentFragment|Element $node |
| 274 | * @param string $selector |
| 275 | * @return (iterable<Element>&\Countable)|array<Element> Either a NodeList or an array |
| 276 | * @see https://dom.spec.whatwg.org/#dom-parentnode-queryselectorall |
| 277 | * @note Note that unlike the spec this method is not guaranteed to return a NodeList |
| 278 | * (which cannot be freely constructed in PHP), just a traversable containing Elements. |
| 279 | */ |
| 280 | public static function querySelectorAll( $node, string $selector ): iterable { |
| 281 | Assert::parameterType( |
| 282 | self::or( |
| 283 | Document::class, DocumentFragment::class, Element::class, |
| 284 | // For compatibility with code which might call this from |
| 285 | // outside Parsoid. |
| 286 | \DOMDocument::class, \DOMDocumentFragment::class, \DOMElement::class |
| 287 | ), |
| 288 | $node, '$node' ); |
| 289 | // @phan-suppress-next-line PhanTypeMismatchArgument DOMNode |
| 290 | return Zest::find( $selector, $node ); |
| 291 | } |
| 292 | |
| 293 | /** |
| 294 | * Return the last preceding sibling of the node that is an element, or null otherwise. |
| 295 | * @param Node $node |
| 296 | * @return Element|null |
| 297 | * @see https://dom.spec.whatwg.org/#dom-nondocumenttypechildnode-previouselementsibling |
| 298 | */ |
| 299 | public static function getPreviousElementSibling( $node ) { |
| 300 | Assert::parameterType( |
| 301 | self::or( |
| 302 | Element::class, CharacterData::class, |
| 303 | // For compatibility with code which might call this from |
| 304 | // outside Parsoid. |
| 305 | \DOMElement::class, \DOMCharacterData::class |
| 306 | ), |
| 307 | $node, '$node' ); |
| 308 | $previousSibling = $node->previousSibling; |
| 309 | while ( $previousSibling && $previousSibling->nodeType !== XML_ELEMENT_NODE ) { |
| 310 | $previousSibling = $previousSibling->previousSibling; |
| 311 | } |
| 312 | // @phan-suppress-next-line PhanTypeMismatchReturnSuperType |
| 313 | return $previousSibling; |
| 314 | } |
| 315 | |
| 316 | /** |
| 317 | * Return the first following sibling of the node that is an element, or null otherwise. |
| 318 | * @param Node $node |
| 319 | * @return Element|null |
| 320 | * @see https://dom.spec.whatwg.org/#dom-nondocumenttypechildnode-nextelementsibling |
| 321 | */ |
| 322 | public static function getNextElementSibling( $node ) { |
| 323 | Assert::parameterType( |
| 324 | self::or( |
| 325 | Element::class, CharacterData::class, |
| 326 | // For compatibility with code which might call this from |
| 327 | // outside Parsoid. |
| 328 | \DOMElement::class, \DOMCharacterData::class |
| 329 | ), |
| 330 | $node, '$node' ); |
| 331 | $nextSibling = $node->nextSibling; |
| 332 | while ( $nextSibling && $nextSibling->nodeType !== XML_ELEMENT_NODE ) { |
| 333 | $nextSibling = $nextSibling->nextSibling; |
| 334 | } |
| 335 | // @phan-suppress-next-line PhanTypeMismatchReturnSuperType |
| 336 | return $nextSibling; |
| 337 | } |
| 338 | |
| 339 | /** |
| 340 | * Removes the node from the document. |
| 341 | * @param Element|CharacterData $node |
| 342 | * @see https://dom.spec.whatwg.org/#dom-childnode-remove |
| 343 | */ |
| 344 | public static function remove( $node ): void { |
| 345 | Assert::parameterType( |
| 346 | self::or( |
| 347 | Element::class, CharacterData::class, |
| 348 | // For compatibility with code which might call this from |
| 349 | // outside Parsoid. |
| 350 | \DOMElement::class, \DOMCharacterData::class |
| 351 | ), |
| 352 | $node, '$node' ); |
| 353 | if ( $node->parentNode ) { |
| 354 | $node->parentNode->removeChild( $node ); |
| 355 | } |
| 356 | } |
| 357 | |
| 358 | /** |
| 359 | * Get innerHTML. |
| 360 | * @see DOMUtils::getFragmentInnerHTML() for the fragment version |
| 361 | * @param Element $element |
| 362 | * @return string |
| 363 | * @see https://w3c.github.io/DOM-Parsing/#dom-innerhtml-innerhtml |
| 364 | */ |
| 365 | public static function getInnerHTML( $element ): string { |
| 366 | return XMLSerializer::serialize( $element, [ 'innerXML' => true ] )['html']; |
| 367 | } |
| 368 | |
| 369 | /** |
| 370 | * Set innerHTML. |
| 371 | * @see https://w3c.github.io/DOM-Parsing/#dom-innerhtml-innerhtml |
| 372 | * @see DOMUtils::setFragmentInnerHTML() for the fragment version |
| 373 | * @param Element $element |
| 374 | * @param string $html |
| 375 | */ |
| 376 | public static function setInnerHTML( $element, string $html ): void { |
| 377 | $domBuilder = new class( [ |
| 378 | 'suppressHtmlNamespace' => true, |
| 379 | ] ) extends DOMBuilder { |
| 380 | /** @inheritDoc */ |
| 381 | protected function createDocument( |
| 382 | ?string $doctypeName = null, |
| 383 | ?string $public = null, |
| 384 | ?string $system = null |
| 385 | ) { |
| 386 | // @phan-suppress-next-line PhanTypeMismatchReturn |
| 387 | return DOMCompat::newDocument( $doctypeName === 'html' ); |
| 388 | } |
| 389 | }; |
| 390 | $treeBuilder = new TreeBuilder( $domBuilder ); |
| 391 | $dispatcher = new Dispatcher( $treeBuilder ); |
| 392 | $tokenizer = new Tokenizer( $dispatcher, $html, [ 'ignoreErrors' => true ] ); |
| 393 | |
| 394 | $tokenizer->execute( [ |
| 395 | 'fragmentNamespace' => HTMLData::NS_HTML, |
| 396 | 'fragmentName' => self::nodeName( $element ), |
| 397 | ] ); |
| 398 | |
| 399 | // Empty the element |
| 400 | self::replaceChildren( $element ); |
| 401 | |
| 402 | $frag = $domBuilder->getFragment(); |
| 403 | '@phan-var Node $frag'; // @var Node $frag |
| 404 | DOMUtils::migrateChildrenBetweenDocs( |
| 405 | $frag, $element |
| 406 | ); |
| 407 | } |
| 408 | |
| 409 | /** |
| 410 | * Get outerHTML. |
| 411 | * @param Element $element |
| 412 | * @return string |
| 413 | * @see https://w3c.github.io/DOM-Parsing/#dom-element-outerhtml |
| 414 | */ |
| 415 | public static function getOuterHTML( $element ): string { |
| 416 | return XMLSerializer::serialize( $element, [ 'addDoctype' => false ] )['html']; |
| 417 | } |
| 418 | |
| 419 | /** |
| 420 | * Return the value of an element attribute. |
| 421 | * |
| 422 | * Unlike PHP's version, this is spec-compliant and returns `null` if |
| 423 | * the attribute is not present, allowing the caller to distinguish |
| 424 | * between "the attribute exists but has the empty string as its value" |
| 425 | * and "the attribute does not exist". |
| 426 | * |
| 427 | * @param Element $element |
| 428 | * @param string $attributeName |
| 429 | * @return ?string The attribute value, or `null` if the attribute does |
| 430 | * not exist on the element. |
| 431 | * @see https://dom.spec.whatwg.org/#dom-element-getattribute |
| 432 | */ |
| 433 | public static function getAttribute( $element, string $attributeName ): ?string { |
| 434 | if ( !$element->hasAttribute( $attributeName ) ) { |
| 435 | return null; |
| 436 | } |
| 437 | return $element->getAttribute( $attributeName ); |
| 438 | } |
| 439 | |
| 440 | /** |
| 441 | * Return the class list of this element. |
| 442 | * @param Element $node |
| 443 | * @return TokenList |
| 444 | * @see https://dom.spec.whatwg.org/#dom-element-classlist |
| 445 | */ |
| 446 | public static function getClassList( $node ): TokenList { |
| 447 | return new TokenList( $node ); |
| 448 | } |
| 449 | |
| 450 | /** |
| 451 | * @param string $text |
| 452 | * @return string |
| 453 | * @see https://infra.spec.whatwg.org/#strip-and-collapse-ascii-whitespace |
| 454 | */ |
| 455 | private static function stripAndCollapseASCIIWhitespace( string $text ): string { |
| 456 | $ws = self::ASCII_WHITESPACE; |
| 457 | return preg_replace( "/[$ws]+/", ' ', trim( $text, $ws ) ); |
| 458 | } |
| 459 | |
| 460 | /** |
| 461 | * @param Element|DocumentFragment $e |
| 462 | */ |
| 463 | private static function stripEmptyTextNodes( $e ): void { |
| 464 | $c = $e->firstChild; |
| 465 | while ( $c ) { |
| 466 | $next = $c->nextSibling; |
| 467 | if ( $c instanceof Text ) { |
| 468 | if ( $c->nodeValue === '' ) { |
| 469 | $e->removeChild( $c ); |
| 470 | } |
| 471 | } elseif ( $c instanceof Element ) { |
| 472 | self::stripEmptyTextNodes( $c ); |
| 473 | } |
| 474 | $c = $next; |
| 475 | } |
| 476 | } |
| 477 | |
| 478 | /** |
| 479 | * @param Element|DocumentFragment $elt root of the DOM tree that |
| 480 | * needs to be normalized |
| 481 | */ |
| 482 | public static function normalize( $elt ): void { |
| 483 | $elt->normalize(); |
| 484 | |
| 485 | // Now traverse the tree rooted at $elt and remove any stray empty text nodes |
| 486 | // Unlike what https://www.w3.org/TR/DOM-Level-2-Core/core.html#ID-normalize says, |
| 487 | // the PHP DOM's normalization leaves behind up to 1 empty text node. |
| 488 | // See https://bugs.php.net/bug.php?id=78221 |
| 489 | self::stripEmptyTextNodes( $elt ); |
| 490 | } |
| 491 | |
| 492 | /** |
| 493 | * ParentNode.replaceChildren() |
| 494 | * https://developer.mozilla.org/en-US/docs/Web/API/ParentNode/replaceChildren |
| 495 | * |
| 496 | * @param Document|DocumentFragment|Element $parentNode |
| 497 | * @param string|Node ...$nodes |
| 498 | */ |
| 499 | public static function replaceChildren( |
| 500 | $parentNode, ...$nodes |
| 501 | ): void { |
| 502 | Assert::parameterType( |
| 503 | self::or( |
| 504 | Document::class, DocumentFragment::class, Element::class, |
| 505 | // For compatibility with code which might call this from |
| 506 | // outside Parsoid. |
| 507 | \DOMDocument::class, \DOMDocumentFragment::class, \DOMElement::class |
| 508 | ), |
| 509 | $parentNode, '$parentNode' |
| 510 | ); |
| 511 | while ( $parentNode->firstChild ) { |
| 512 | $parentNode->removeChild( $parentNode->firstChild ); |
| 513 | } |
| 514 | foreach ( $nodes as $node ) { |
| 515 | if ( is_string( $node ) ) { |
| 516 | $node = $parentNode->ownerDocument->createTextNode( $node ); |
| 517 | } |
| 518 | $parentNode->insertBefore( $node, null ); |
| 519 | } |
| 520 | } |
| 521 | |
| 522 | /** |
| 523 | * Join class names together in a form suitable for Assert::parameterType. |
| 524 | * @param class-string ...$args |
| 525 | * @return string |
| 526 | */ |
| 527 | private static function or( ...$args ) { |
| 528 | return implode( '|', $args ); |
| 529 | } |
| 530 | } |