Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
8.77% |
5 / 57 |
|
40.00% |
2 / 5 |
CRAP | |
0.00% |
0 / 1 |
DOMTraverser | |
8.77% |
5 / 57 |
|
40.00% |
2 / 5 |
713.33 | |
0.00% |
0 / 1 |
__construct | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
2 | |||
addHandler | |
100.00% |
4 / 4 |
|
100.00% |
1 / 1 |
1 | |||
callHandlers | |
0.00% |
0 / 19 |
|
0.00% |
0 / 1 |
72 | |||
traverse | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
traverseInternal | |
0.00% |
0 / 31 |
|
0.00% |
0 / 1 |
380 |
1 | <?php |
2 | declare( strict_types = 1 ); |
3 | |
4 | namespace Wikimedia\Parsoid\Utils; |
5 | |
6 | use Wikimedia\Parsoid\DOM\DocumentFragment; |
7 | use Wikimedia\Parsoid\DOM\Element; |
8 | use Wikimedia\Parsoid\DOM\Node; |
9 | use Wikimedia\Parsoid\Ext\ParsoidExtensionAPI; |
10 | |
11 | /** |
12 | * Class for helping us traverse the DOM. |
13 | * |
14 | * This class currently does a pre-order depth-first traversal. |
15 | * See {@link DOMPostOrder} for post-order traversal. |
16 | */ |
17 | class DOMTraverser { |
18 | /** |
19 | * List of handlers to call on each node. Each handler is an array with the following fields: |
20 | * - action: a callable to call |
21 | * - nodeName: if set, only call it on nodes with this name |
22 | * @var array<array{action:callable,nodeName:string}> |
23 | * @see addHandler() |
24 | */ |
25 | private $handlers = []; |
26 | |
27 | /** |
28 | * Should the handlers be called on attribute-embedded-HTML strings? |
29 | */ |
30 | private bool $applyToAttributeEmbeddedHTML; |
31 | |
32 | /** |
33 | * @var bool |
34 | */ |
35 | private $traverseWithTplInfo; |
36 | |
37 | /** |
38 | * @param bool $traverseWithTplInfo |
39 | * @param bool $applyToAttributeEmbeddedHTML |
40 | */ |
41 | public function __construct( bool $traverseWithTplInfo = false, bool $applyToAttributeEmbeddedHTML = false ) { |
42 | $this->traverseWithTplInfo = $traverseWithTplInfo; |
43 | $this->applyToAttributeEmbeddedHTML = $applyToAttributeEmbeddedHTML; |
44 | } |
45 | |
46 | /** |
47 | * Add a handler to the DOM traverser. |
48 | * |
49 | * @param ?string $nodeName An optional node name filter |
50 | * @param callable $action A callback, called on each node we traverse that matches nodeName. |
51 | * Will be called with the following parameters: |
52 | * - Node $node: the node being processed |
53 | * - Env $env: the parser environment |
54 | * - DTState $state: State. |
55 | * Return value: Node|null|true. |
56 | * - true: proceed normally |
57 | * - Node: traversal will continue on the new node (further handlers will not be called |
58 | * on the current node); after processing it and its siblings, it will continue with the |
59 | * next sibling of the closest ancestor which has one. |
60 | * - null: like the Node case, except there is no new node to process before continuing. |
61 | */ |
62 | public function addHandler( ?string $nodeName, callable $action ): void { |
63 | $this->handlers[] = [ |
64 | 'action' => $action, |
65 | 'nodeName' => $nodeName, |
66 | ]; |
67 | } |
68 | |
69 | /** |
70 | * @param Node $node |
71 | * @param ?ParsoidExtensionAPI $extAPI |
72 | * @param DTState|null $state |
73 | * @return bool|mixed |
74 | */ |
75 | private function callHandlers( Node $node, ?ParsoidExtensionAPI $extAPI, ?DTState $state ) { |
76 | $name = DOMCompat::nodeName( $node ); |
77 | |
78 | // Process embedded HTML first since the handlers below might |
79 | // return a different node which aborts processing. By processing |
80 | // attributes first, we ensure attribute are always processed. |
81 | if ( $node instanceof Element && $this->applyToAttributeEmbeddedHTML ) { |
82 | $self = $this; |
83 | ContentUtils::processAttributeEmbeddedHTML( |
84 | $extAPI, |
85 | $node, |
86 | static function ( string $html ) use ( $self, $extAPI, $state ) { |
87 | $dom = $extAPI->htmlToDom( $html ); |
88 | // We are processing a nested document (which by definition |
89 | // is not a top-level document). |
90 | // FIXME: |
91 | // 1. This argument replicates existing behavior but is it sound? |
92 | // In any case, we should first replicate existing behavior |
93 | // and revisit this later. |
94 | // 2. It is not clear if creating a *new* state is the right thing |
95 | // or if reusing *parts* of the old state is the right thing. |
96 | // One of the places where this matters is around the use of |
97 | // $state->tplInfo. One could probably find arguments for either |
98 | // direction. But, "independent parsing" semantics which Parsoid |
99 | // is aiming for would lead us to use a new state or even a new |
100 | // traversal object here and that feels a little bit "more correct" |
101 | // than reusing partial state. |
102 | $newState = $state ? new DTState( $state->env, $state->options, false ) : null; |
103 | $self->traverse( $extAPI, $dom, $newState ); |
104 | return $extAPI->domToHtml( $dom, true, true ); |
105 | } |
106 | ); |
107 | } |
108 | |
109 | foreach ( $this->handlers as $handler ) { |
110 | if ( $handler['nodeName'] === null || $handler['nodeName'] === $name ) { |
111 | $result = call_user_func( $handler['action'], $node, $state ); |
112 | if ( $result !== true ) { |
113 | // Abort processing for this node |
114 | return $result; |
115 | } |
116 | } |
117 | } |
118 | return true; |
119 | } |
120 | |
121 | /** |
122 | * Traverse the DOM and fire the handlers that are registered. |
123 | * |
124 | * Handlers can return |
125 | * - the next node to process: aborts processing for current node (ie. no further handlers are |
126 | * called) and continues processing on returned node. Essentially, that node and its siblings |
127 | * replace the current node and its siblings for the purposes of the traversal; after they |
128 | * are fully processed, the algorithm moves back to the parent of $workNode to look for |
129 | * the next sibling. |
130 | * - `null`: same as above, except it continues from the next sibling of the parent (or if |
131 | * that does not exist, the next sibling of the grandparent etc). This is so that returning |
132 | * `$workNode->nextSibling` works even when workNode is a last child of its parent. |
133 | * - `true`: continues regular processing on current node. |
134 | * |
135 | * @param ?ParsoidExtensionAPI $extAPI |
136 | * @param Node $workNode The starting node for the traversal. |
137 | * The traversal could go beyond the subtree rooted at $workNode if |
138 | * the handlers called during traversal return an arbitrary node elsewhere |
139 | * in the DOM in which case the traversal scope can be pretty much the whole |
140 | * DOM that $workNode is present in. This behavior would be confusing but |
141 | * there is nothing in the traversal code to prevent that. |
142 | * @param DTState|null $state |
143 | */ |
144 | public function traverse( ?ParsoidExtensionAPI $extAPI, Node $workNode, ?DTState $state = null ): void { |
145 | $this->traverseInternal( true, $extAPI, $workNode, $state ); |
146 | } |
147 | |
148 | /** |
149 | * @param bool $isRootNode |
150 | * @param ?ParsoidExtensionAPI $extAPI |
151 | * @param Node $workNode |
152 | * @param DTState|null $state |
153 | */ |
154 | private function traverseInternal( |
155 | bool $isRootNode, ?ParsoidExtensionAPI $extAPI, Node $workNode, ?DTState $state |
156 | ): void { |
157 | while ( $workNode !== null ) { |
158 | if ( $this->traverseWithTplInfo && $workNode instanceof Element ) { |
159 | // Identify the first template/extension node. |
160 | // You'd think the !tplInfo check isn't necessary since |
161 | // we don't have nested transclusions, however, you can |
162 | // get extensions in transclusions. |
163 | if ( |
164 | !( $state->tplInfo ?? null ) && WTUtils::isFirstEncapsulationWrapperNode( $workNode ) |
165 | // Ensure this isn't just a meta marker, since we might |
166 | // not be traversing after encapsulation. Note that the |
167 | // valid data-mw assertion is the same test as used in |
168 | // cleanup. |
169 | && ( !WTUtils::isTplMarkerMeta( $workNode ) || DOMDataUtils::validDataMw( $workNode ) ) |
170 | // Encapsulation info on sections should not be used to |
171 | // traverse with since it's designed to be dropped and |
172 | // may have expanded ranges. |
173 | && !WTUtils::isParsoidSectionTag( $workNode ) |
174 | ) { |
175 | $about = DOMCompat::getAttribute( $workNode, 'about' ); |
176 | $aboutSiblings = WTUtils::getAboutSiblings( $workNode, $about ); |
177 | $state->tplInfo = (object)[ |
178 | 'first' => $workNode, |
179 | 'last' => end( $aboutSiblings ), |
180 | 'clear' => false, |
181 | ]; |
182 | } |
183 | } |
184 | |
185 | // Call the handlers on this workNode |
186 | if ( $workNode instanceof DocumentFragment ) { |
187 | $possibleNext = true; |
188 | } else { |
189 | $possibleNext = $this->callHandlers( $workNode, $extAPI, $state ); |
190 | } |
191 | |
192 | // We may have walked passed the last about sibling or want to |
193 | // ignore the template info in future processing. |
194 | // In any case, it's up to the handler returning a possible next |
195 | // to figure out. |
196 | if ( $this->traverseWithTplInfo && ( $state->tplInfo->clear ?? false ) ) { |
197 | $state->tplInfo = null; |
198 | } |
199 | |
200 | if ( $possibleNext === true ) { |
201 | // The 'continue processing' case |
202 | if ( $workNode->hasChildNodes() ) { |
203 | $this->traverseInternal( |
204 | false, $extAPI, $workNode->firstChild, $state |
205 | ); |
206 | } |
207 | if ( $isRootNode ) { |
208 | // Confine the traverse to the tree rooted as the root node. |
209 | // `$workNode->nextSibling` would take us outside that. |
210 | $possibleNext = null; |
211 | } else { |
212 | $possibleNext = $workNode->nextSibling; |
213 | } |
214 | } elseif ( $isRootNode && $possibleNext !== $workNode ) { |
215 | $isRootNode = false; |
216 | } |
217 | |
218 | // Clear the template info after reaching the last about sibling. |
219 | if ( |
220 | $this->traverseWithTplInfo && |
221 | ( ( $state->tplInfo->last ?? null ) === $workNode ) |
222 | ) { |
223 | $state->tplInfo = null; |
224 | } |
225 | |
226 | $workNode = $possibleNext; |
227 | } |
228 | } |
229 | } |