Code Coverage for /src/src/Utils/DOMTraverser.php

	Code Coverage
	Lines			Functions and Methods				Classes and Traits
Total	8.77% covered (danger)	8.77%	5 / 57	40.00% covered (danger)	40.00%	2 / 5	CRAP	0.00% covered (danger)	0.00%	0 / 1
DOMTraverser	8.77% covered (danger)	8.77%	5 / 57	40.00% covered (danger)	40.00%	2 / 5	713.33	0.00% covered (danger)	0.00%	0 / 1
__construct	0.00% covered (danger)	0.00%	0 / 2	0.00% covered (danger)	0.00%	0 / 1	2
addHandler	100.00% covered (success)	100.00%	4 / 4	100.00% covered (success)	100.00%	1 / 1	1
callHandlers	0.00% covered (danger)	0.00%	0 / 19	0.00% covered (danger)	0.00%	0 / 1	72
traverse	100.00% covered (success)	100.00%	1 / 1	100.00% covered (success)	100.00%	1 / 1	1
traverseInternal	0.00% covered (danger)	0.00%	0 / 31	0.00% covered (danger)	0.00%	0 / 1	380

1	<?php
2	declare( strict_types = 1 );
3
4	namespace Wikimedia\Parsoid\Utils;
5
6	use Wikimedia\Parsoid\DOM\DocumentFragment;
7	use Wikimedia\Parsoid\DOM\Element;
8	use Wikimedia\Parsoid\DOM\Node;
9	use Wikimedia\Parsoid\Ext\ParsoidExtensionAPI;
10
11	/**
12	* Class for helping us traverse the DOM.
13	*
14	* This class currently does a pre-order depth-first traversal.
15	* See {@link DOMPostOrder} for post-order traversal.
16	*/
17	class DOMTraverser {
18	/**
19	* List of handlers to call on each node. Each handler is an array with the following fields:
20	* - action: a callable to call
21	* - nodeName: if set, only call it on nodes with this name
22	* @var array<array{action:callable,nodeName:string}>
23	* @see addHandler()
24	*/
25	private $handlers = [];
26
27	/**
28	* Should the handlers be called on attribute-embedded-HTML strings?
29	*/
30	private bool $applyToAttributeEmbeddedHTML;
31
32	/**
33	* @var bool
34	*/
35	private $traverseWithTplInfo;
36
37	/**
38	* @param bool $traverseWithTplInfo
39	* @param bool $applyToAttributeEmbeddedHTML
40	*/
41	public function __construct( bool $traverseWithTplInfo = false, bool $applyToAttributeEmbeddedHTML = false ) {
42	$this->traverseWithTplInfo = $traverseWithTplInfo;
43	$this->applyToAttributeEmbeddedHTML = $applyToAttributeEmbeddedHTML;
44	}
45
46	/**
47	* Add a handler to the DOM traverser.
48	*
49	* @param ?string $nodeName An optional node name filter
50	* @param callable $action A callback, called on each node we traverse that matches nodeName.
51	* Will be called with the following parameters:
52	* - Node $node: the node being processed
53	* - Env $env: the parser environment
54	* - DTState $state: State.
55	* Return value: Node\|null\|true.
56	* - true: proceed normally
57	* - Node: traversal will continue on the new node (further handlers will not be called
58	* on the current node); after processing it and its siblings, it will continue with the
59	* next sibling of the closest ancestor which has one.
60	* - null: like the Node case, except there is no new node to process before continuing.
61	*/
62	public function addHandler( ?string $nodeName, callable $action ): void {
63	$this->handlers[] = [
64	'action' => $action,
65	'nodeName' => $nodeName,
66	];
67	}
68
69	/**
70	* @param Node $node
71	* @param ?ParsoidExtensionAPI $extAPI
72	* @param DTState\|null $state
73	* @return bool\|mixed
74	*/
75	private function callHandlers( Node $node, ?ParsoidExtensionAPI $extAPI, ?DTState $state ) {
76	$name = DOMCompat::nodeName( $node );
77
78	// Process embedded HTML first since the handlers below might
79	// return a different node which aborts processing. By processing
80	// attributes first, we ensure attribute are always processed.
81	if ( $node instanceof Element && $this->applyToAttributeEmbeddedHTML ) {
82	$self = $this;
83	ContentUtils::processAttributeEmbeddedHTML(
84	$extAPI,
85	$node,
86	static function ( string $html ) use ( $self, $extAPI, $state ) {
87	$dom = $extAPI->htmlToDom( $html );
88	// We are processing a nested document (which by definition
89	// is not a top-level document).
90	// FIXME:
91	// 1. This argument replicates existing behavior but is it sound?
92	// In any case, we should first replicate existing behavior
93	// and revisit this later.
94	// 2. It is not clear if creating a new state is the right thing
95	// or if reusing parts of the old state is the right thing.
96	// One of the places where this matters is around the use of
97	// $state->tplInfo. One could probably find arguments for either
98	// direction. But, "independent parsing" semantics which Parsoid
99	// is aiming for would lead us to use a new state or even a new
100	// traversal object here and that feels a little bit "more correct"
101	// than reusing partial state.
102	$newState = $state ? new DTState( $state->env, $state->options, false ) : null;
103	$self->traverse( $extAPI, $dom, $newState );
104	return $extAPI->domToHtml( $dom, true, true );
105	}
106	);
107	}
108
109	foreach ( $this->handlers as $handler ) {
110	if ( $handler['nodeName'] === null \|\| $handler['nodeName'] === $name ) {
111	$result = call_user_func( $handler['action'], $node, $state );
112	if ( $result !== true ) {
113	// Abort processing for this node
114	return $result;
115	}
116	}
117	}
118	return true;
119	}
120
121	/**
122	* Traverse the DOM and fire the handlers that are registered.
123	*
124	* Handlers can return
125	* - the next node to process: aborts processing for current node (ie. no further handlers are
126	* called) and continues processing on returned node. Essentially, that node and its siblings
127	* replace the current node and its siblings for the purposes of the traversal; after they
128	* are fully processed, the algorithm moves back to the parent of $workNode to look for
129	* the next sibling.
130	* - `null`: same as above, except it continues from the next sibling of the parent (or if
131	* that does not exist, the next sibling of the grandparent etc). This is so that returning
132	* `$workNode->nextSibling` works even when workNode is a last child of its parent.
133	* - `true`: continues regular processing on current node.
134	*
135	* @param ?ParsoidExtensionAPI $extAPI
136	* @param Node $workNode The starting node for the traversal.
137	* The traversal could go beyond the subtree rooted at $workNode if
138	* the handlers called during traversal return an arbitrary node elsewhere
139	* in the DOM in which case the traversal scope can be pretty much the whole
140	* DOM that $workNode is present in. This behavior would be confusing but
141	* there is nothing in the traversal code to prevent that.
142	* @param DTState\|null $state
143	*/
144	public function traverse( ?ParsoidExtensionAPI $extAPI, Node $workNode, ?DTState $state = null ): void {
145	$this->traverseInternal( true, $extAPI, $workNode, $state );
146	}
147
148	/**
149	* @param bool $isRootNode
150	* @param ?ParsoidExtensionAPI $extAPI
151	* @param Node $workNode
152	* @param DTState\|null $state
153	*/
154	private function traverseInternal(
155	bool $isRootNode, ?ParsoidExtensionAPI $extAPI, Node $workNode, ?DTState $state
156	): void {
157	while ( $workNode !== null ) {
158	if ( $this->traverseWithTplInfo && $workNode instanceof Element ) {
159	// Identify the first template/extension node.
160	// You'd think the !tplInfo check isn't necessary since
161	// we don't have nested transclusions, however, you can
162	// get extensions in transclusions.
163	if (
164	!( $state->tplInfo ?? null ) && WTUtils::isFirstEncapsulationWrapperNode( $workNode )
165	// Ensure this isn't just a meta marker, since we might
166	// not be traversing after encapsulation. Note that the
167	// valid data-mw assertion is the same test as used in
168	// cleanup.
169	&& ( !WTUtils::isTplMarkerMeta( $workNode ) \|\| DOMDataUtils::validDataMw( $workNode ) )
170	// Encapsulation info on sections should not be used to
171	// traverse with since it's designed to be dropped and
172	// may have expanded ranges.
173	&& !WTUtils::isParsoidSectionTag( $workNode )
174	) {
175	$about = DOMCompat::getAttribute( $workNode, 'about' );
176	$aboutSiblings = WTUtils::getAboutSiblings( $workNode, $about );
177	$state->tplInfo = (object)[
178	'first' => $workNode,
179	'last' => end( $aboutSiblings ),
180	'clear' => false,
181	];
182	}
183	}
184
185	// Call the handlers on this workNode
186	if ( $workNode instanceof DocumentFragment ) {
187	$possibleNext = true;
188	} else {
189	$possibleNext = $this->callHandlers( $workNode, $extAPI, $state );
190	}
191
192	// We may have walked passed the last about sibling or want to
193	// ignore the template info in future processing.
194	// In any case, it's up to the handler returning a possible next
195	// to figure out.
196	if ( $this->traverseWithTplInfo && ( $state->tplInfo->clear ?? false ) ) {
197	$state->tplInfo = null;
198	}
199
200	if ( $possibleNext === true ) {
201	// The 'continue processing' case
202	if ( $workNode->hasChildNodes() ) {
203	$this->traverseInternal(
204	false, $extAPI, $workNode->firstChild, $state
205	);
206	}
207	if ( $isRootNode ) {
208	// Confine the traverse to the tree rooted as the root node.
209	// `$workNode->nextSibling` would take us outside that.
210	$possibleNext = null;
211	} else {
212	$possibleNext = $workNode->nextSibling;
213	}
214	} elseif ( $isRootNode && $possibleNext !== $workNode ) {
215	$isRootNode = false;
216	}
217
218	// Clear the template info after reaching the last about sibling.
219	if (
220	$this->traverseWithTplInfo &&
221	( ( $state->tplInfo->last ?? null ) === $workNode )
222	) {
223	$state->tplInfo = null;
224	}
225
226	$workNode = $possibleNext;
227	}
228	}
229	}