Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
| Total | |
10.34% |
6 / 58 |
|
20.00% |
1 / 5 |
CRAP | |
0.00% |
0 / 1 |
| DOMTraverser | |
10.34% |
6 / 58 |
|
20.00% |
1 / 5 |
723.55 | |
0.00% |
0 / 1 |
| __construct | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
2 | |||
| addHandler | |
100.00% |
4 / 4 |
|
100.00% |
1 / 1 |
1 | |||
| callHandlers | |
0.00% |
0 / 17 |
|
0.00% |
0 / 1 |
72 | |||
| traverse | |
50.00% |
2 / 4 |
|
0.00% |
0 / 1 |
2.50 | |||
| traverseInternal | |
0.00% |
0 / 31 |
|
0.00% |
0 / 1 |
380 | |||
| 1 | <?php |
| 2 | declare( strict_types = 1 ); |
| 3 | |
| 4 | namespace Wikimedia\Parsoid\Utils; |
| 5 | |
| 6 | use Wikimedia\Parsoid\Config\SiteConfig; |
| 7 | use Wikimedia\Parsoid\DOM\DocumentFragment; |
| 8 | use Wikimedia\Parsoid\DOM\Element; |
| 9 | use Wikimedia\Parsoid\DOM\Node; |
| 10 | use Wikimedia\Parsoid\Ext\ParsoidExtensionAPI; |
| 11 | |
| 12 | /** |
| 13 | * Class for helping us traverse the DOM. |
| 14 | * |
| 15 | * This class currently does a pre-order depth-first traversal. |
| 16 | * See {@link DOMPostOrder} for post-order traversal. |
| 17 | */ |
| 18 | class DOMTraverser { |
| 19 | /** |
| 20 | * List of handlers to call on each node. Each handler is an array with the following fields: |
| 21 | * - action: a callable to call |
| 22 | * - nodeName: if set, only call it on nodes with this name |
| 23 | * @var array<array{action:callable,nodeName:string}> |
| 24 | * @see addHandler() |
| 25 | */ |
| 26 | private $handlers = []; |
| 27 | |
| 28 | /** |
| 29 | * Should the handlers be called on attribute-embedded-HTML strings? |
| 30 | */ |
| 31 | private bool $applyToAttributeEmbeddedHTML; |
| 32 | |
| 33 | /** |
| 34 | * @var bool |
| 35 | */ |
| 36 | private $traverseWithTplInfo; |
| 37 | |
| 38 | /** |
| 39 | * @param bool $traverseWithTplInfo |
| 40 | * @param bool $applyToAttributeEmbeddedHTML |
| 41 | */ |
| 42 | public function __construct( bool $traverseWithTplInfo = false, bool $applyToAttributeEmbeddedHTML = false ) { |
| 43 | $this->traverseWithTplInfo = $traverseWithTplInfo; |
| 44 | $this->applyToAttributeEmbeddedHTML = $applyToAttributeEmbeddedHTML; |
| 45 | } |
| 46 | |
| 47 | /** |
| 48 | * Add a handler to the DOM traverser. |
| 49 | * |
| 50 | * @param ?string $nodeName An optional node name filter |
| 51 | * @param callable $action A callback, called on each node we traverse that matches nodeName. |
| 52 | * Will be called with the following parameters: |
| 53 | * - Node $node: the node being processed |
| 54 | * - Env $env: the parser environment |
| 55 | * - DTState $state: State. |
| 56 | * Return value: Node|null|true. |
| 57 | * - true: proceed normally |
| 58 | * - Node: traversal will continue on the new node (further handlers will not be called |
| 59 | * on the current node); after processing it and its siblings, it will continue with the |
| 60 | * next sibling of the closest ancestor which has one. |
| 61 | * - null: like the Node case, except there is no new node to process before continuing. |
| 62 | */ |
| 63 | public function addHandler( ?string $nodeName, callable $action ): void { |
| 64 | $this->handlers[] = [ |
| 65 | 'action' => $action, |
| 66 | 'nodeName' => $nodeName, |
| 67 | ]; |
| 68 | } |
| 69 | |
| 70 | /** |
| 71 | * @param Node $node |
| 72 | * @param ?SiteConfig $siteConfig |
| 73 | * @param DTState|null $state |
| 74 | * @return bool|mixed |
| 75 | */ |
| 76 | private function callHandlers( Node $node, ?SiteConfig $siteConfig, ?DTState $state ) { |
| 77 | $name = DOMUtils::nodeName( $node ); |
| 78 | |
| 79 | // Process embedded HTML first since the handlers below might |
| 80 | // return a different node which aborts processing. By processing |
| 81 | // attributes first, we ensure attribute are always processed. |
| 82 | if ( $node instanceof Element && $this->applyToAttributeEmbeddedHTML ) { |
| 83 | ContentUtils::processAttributeEmbeddedDom( |
| 84 | $siteConfig, |
| 85 | $node, |
| 86 | function ( DocumentFragment $dom ) use ( $siteConfig, $state ) { |
| 87 | // We are processing a nested document (which by definition |
| 88 | // is not a top-level document). |
| 89 | // FIXME: |
| 90 | // 1. This argument replicates existing behavior but is it sound? |
| 91 | // In any case, we should first replicate existing behavior |
| 92 | // and revisit this later. |
| 93 | // 2. It is not clear if creating a *new* state is the right thing |
| 94 | // or if reusing *parts* of the old state is the right thing. |
| 95 | // One of the places where this matters is around the use of |
| 96 | // $state->tplInfo. One could probably find arguments for either |
| 97 | // direction. But, "independent parsing" semantics which Parsoid |
| 98 | // is aiming for would lead us to use a new state or even a new |
| 99 | // traversal object here and that feels a little bit "more correct" |
| 100 | // than reusing partial state. |
| 101 | $newState = $state ? new DTState( $state->env, $state->options, false ) : null; |
| 102 | $this->traverse( $siteConfig, $dom, $newState ); |
| 103 | return true; // $dom might have been changed |
| 104 | } |
| 105 | ); |
| 106 | } |
| 107 | |
| 108 | foreach ( $this->handlers as $handler ) { |
| 109 | if ( $handler['nodeName'] === null || $handler['nodeName'] === $name ) { |
| 110 | $result = $handler['action']( $node, $state ); |
| 111 | if ( $result !== true ) { |
| 112 | // Abort processing for this node |
| 113 | return $result; |
| 114 | } |
| 115 | } |
| 116 | } |
| 117 | return true; |
| 118 | } |
| 119 | |
| 120 | /** |
| 121 | * Traverse the DOM and fire the handlers that are registered. |
| 122 | * |
| 123 | * Handlers can return |
| 124 | * - the next node to process: aborts processing for current node (ie. no further handlers are |
| 125 | * called) and continues processing on returned node. Essentially, that node and its siblings |
| 126 | * replace the current node and its siblings for the purposes of the traversal; after they |
| 127 | * are fully processed, the algorithm moves back to the parent of $workNode to look for |
| 128 | * the next sibling. |
| 129 | * - `null`: same as above, except it continues from the next sibling of the parent (or if |
| 130 | * that does not exist, the next sibling of the grandparent etc). This is so that returning |
| 131 | * `$workNode->nextSibling` works even when workNode is a last child of its parent. |
| 132 | * - `true`: continues regular processing on current node. |
| 133 | * |
| 134 | * @param SiteConfig|ParsoidExtensionAPI|null $siteConfig |
| 135 | * Passing ParsoidExtensionAPI here is deprecated. |
| 136 | * @param Node $workNode The starting node for the traversal. |
| 137 | * The traversal could go beyond the subtree rooted at $workNode if |
| 138 | * the handlers called during traversal return an arbitrary node elsewhere |
| 139 | * in the DOM in which case the traversal scope can be pretty much the whole |
| 140 | * DOM that $workNode is present in. This behavior would be confusing but |
| 141 | * there is nothing in the traversal code to prevent that. |
| 142 | * @param DTState|null $state |
| 143 | */ |
| 144 | public function traverse( $siteConfig, Node $workNode, ?DTState $state = null ): void { |
| 145 | if ( $siteConfig instanceof ParsoidExtensionAPI ) { |
| 146 | $siteConfig = $siteConfig->getSiteConfig(); |
| 147 | $siteConfig->deprecated( __METHOD__ . ' with ParsoidExtensionAPI', '0.22' ); |
| 148 | } |
| 149 | $this->traverseInternal( true, $siteConfig, $workNode, $state ); |
| 150 | } |
| 151 | |
| 152 | /** |
| 153 | * @param bool $isRootNode |
| 154 | * @param ?SiteConfig $siteConfig |
| 155 | * @param Node $workNode |
| 156 | * @param DTState|null $state |
| 157 | */ |
| 158 | private function traverseInternal( |
| 159 | bool $isRootNode, ?SiteConfig $siteConfig, Node $workNode, ?DTState $state |
| 160 | ): void { |
| 161 | while ( $workNode !== null ) { |
| 162 | if ( $this->traverseWithTplInfo && $workNode instanceof Element ) { |
| 163 | // Identify the first template/extension node. |
| 164 | // You'd think the !tplInfo check isn't necessary since |
| 165 | // we don't have nested transclusions, however, you can |
| 166 | // get extensions in transclusions. |
| 167 | if ( |
| 168 | !( $state->tplInfo ?? null ) && WTUtils::isFirstEncapsulationWrapperNode( $workNode ) |
| 169 | // Ensure this isn't just a meta marker, since we might |
| 170 | // not be traversing after encapsulation. Note that the |
| 171 | // nonempty data-mw assertion is the same test as used in |
| 172 | // cleanup. |
| 173 | && ( !WTUtils::isTplMarkerMeta( $workNode ) || !DOMDataUtils::getDataMw( $workNode )->isEmpty() ) |
| 174 | // Encapsulation info on sections should not be used to |
| 175 | // traverse with since it's designed to be dropped and |
| 176 | // may have expanded ranges. |
| 177 | && !WTUtils::isParsoidSectionTag( $workNode ) |
| 178 | ) { |
| 179 | $about = DOMCompat::getAttribute( $workNode, 'about' ); |
| 180 | $aboutSiblings = WTUtils::getAboutSiblings( $workNode, $about ); |
| 181 | $state->tplInfo = (object)[ |
| 182 | 'first' => $workNode, |
| 183 | 'last' => end( $aboutSiblings ), |
| 184 | 'clear' => false, |
| 185 | ]; |
| 186 | } |
| 187 | } |
| 188 | |
| 189 | // Call the handlers on this workNode |
| 190 | if ( $workNode instanceof DocumentFragment ) { |
| 191 | $possibleNext = true; |
| 192 | } else { |
| 193 | $possibleNext = $this->callHandlers( $workNode, $siteConfig, $state ); |
| 194 | } |
| 195 | |
| 196 | // We may have walked passed the last about sibling or want to |
| 197 | // ignore the template info in future processing. |
| 198 | // In any case, it's up to the handler returning a possible next |
| 199 | // to figure out. |
| 200 | if ( $this->traverseWithTplInfo && ( $state->tplInfo->clear ?? false ) ) { |
| 201 | $state->tplInfo = null; |
| 202 | } |
| 203 | |
| 204 | if ( $possibleNext === true ) { |
| 205 | // The 'continue processing' case |
| 206 | if ( $workNode->hasChildNodes() ) { |
| 207 | $this->traverseInternal( |
| 208 | false, $siteConfig, $workNode->firstChild, $state |
| 209 | ); |
| 210 | } |
| 211 | if ( $isRootNode ) { |
| 212 | // Confine the traverse to the tree rooted as the root node. |
| 213 | // `$workNode->nextSibling` would take us outside that. |
| 214 | $possibleNext = null; |
| 215 | } else { |
| 216 | $possibleNext = $workNode->nextSibling; |
| 217 | } |
| 218 | } elseif ( $isRootNode && $possibleNext !== $workNode ) { |
| 219 | $isRootNode = false; |
| 220 | } |
| 221 | |
| 222 | // Clear the template info after reaching the last about sibling. |
| 223 | if ( |
| 224 | $this->traverseWithTplInfo && |
| 225 | ( ( $state->tplInfo->last ?? null ) === $workNode ) |
| 226 | ) { |
| 227 | $state->tplInfo = null; |
| 228 | } |
| 229 | |
| 230 | $workNode = $possibleNext; |
| 231 | } |
| 232 | } |
| 233 | } |