Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
82.95% |
107 / 129 |
|
52.63% |
10 / 19 |
CRAP | |
0.00% |
0 / 1 |
Serializer | |
82.95% |
107 / 129 |
|
52.63% |
10 / 19 |
68.46 | |
0.00% |
0 / 1 |
__construct | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
getResult | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
getRootNode | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
getParentNode | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
getLastChild | |
0.00% |
0 / 4 |
|
0.00% |
0 / 1 |
6 | |||
startDocument | |
100.00% |
4 / 4 |
|
100.00% |
1 / 1 |
1 | |||
endDocument | |
100.00% |
9 / 9 |
|
100.00% |
1 / 1 |
4 | |||
interpretPlacement | |
70.00% |
7 / 10 |
|
0.00% |
0 / 1 |
5.68 | |||
characters | |
85.71% |
12 / 14 |
|
0.00% |
0 / 1 |
5.07 | |||
insertElement | |
84.00% |
21 / 25 |
|
0.00% |
0 / 1 |
6.15 | |||
endTag | |
100.00% |
9 / 9 |
|
100.00% |
1 / 1 |
5 | |||
serializeNode | |
100.00% |
10 / 10 |
|
100.00% |
1 / 1 |
5 | |||
doctype | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
comment | |
69.23% |
9 / 13 |
|
0.00% |
0 / 1 |
5.73 | |||
error | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
2 | |||
mergeAttributes | |
100.00% |
3 / 3 |
|
100.00% |
1 / 1 |
2 | |||
removeNode | |
87.50% |
7 / 8 |
|
0.00% |
0 / 1 |
3.02 | |||
reparentChildren | |
100.00% |
10 / 10 |
|
100.00% |
1 / 1 |
3 | |||
dump | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
2 |
1 | <?php |
2 | |
3 | namespace Wikimedia\RemexHtml\Serializer; |
4 | |
5 | use Wikimedia\RemexHtml\PropGuard; |
6 | use Wikimedia\RemexHtml\Tokenizer\Attributes; |
7 | use Wikimedia\RemexHtml\Tokenizer\PlainAttributes; |
8 | use Wikimedia\RemexHtml\TreeBuilder\Element; |
9 | use Wikimedia\RemexHtml\TreeBuilder\TreeBuilder; |
10 | |
11 | /** |
12 | * A TreeHandler which builds a serialized representation of a document, by |
13 | * encoding elements when the end tags are seen. This is faster than building |
14 | * a DOM and then serializing it, even if you use DOMDocument::saveHTML(). |
15 | */ |
16 | class Serializer implements AbstractSerializer { |
17 | use PropGuard; |
18 | |
19 | /** |
20 | * A node corresponding to the Document |
21 | * @var SerializerNode |
22 | */ |
23 | private $root; |
24 | |
25 | /** |
26 | * The error callback |
27 | * |
28 | * @var callable|null |
29 | */ |
30 | private $errorCallback; |
31 | |
32 | /** |
33 | * The Formatter implementation |
34 | * |
35 | * @var Formatter |
36 | */ |
37 | private $formatter; |
38 | |
39 | /** |
40 | * All active SerializerNode objects in an array, so that they can be |
41 | * referred to by integer indexes. This is a way to emulate weak references, |
42 | * to avoid circular references, allowing nodes to be freed. |
43 | * |
44 | * @var SerializerNode[] |
45 | * @internal |
46 | */ |
47 | protected $nodes = []; |
48 | |
49 | /** |
50 | * True if we are parsing a fragment. The children of the <html> element |
51 | * will be serialized, instead of the whole document. |
52 | * |
53 | * @var bool |
54 | */ |
55 | private $isFragment; |
56 | |
57 | /** |
58 | * The result string |
59 | * |
60 | * @var string |
61 | */ |
62 | private $result = ''; |
63 | |
64 | /** |
65 | * Constructor |
66 | * |
67 | * @param Formatter $formatter |
68 | * @param callable|null $errorCallback A function which is called with the |
69 | * details of each parse error |
70 | */ |
71 | public function __construct( Formatter $formatter, $errorCallback = null ) { |
72 | $this->formatter = $formatter; |
73 | $this->errorCallback = $errorCallback; |
74 | } |
75 | |
76 | /** |
77 | * Get the final string. This can only be called after endDocument() is received. |
78 | * @return string |
79 | */ |
80 | public function getResult() { |
81 | return $this->result; |
82 | } |
83 | |
84 | /** |
85 | * Get the root SerializerNode. |
86 | * |
87 | * @return SerializerNode |
88 | */ |
89 | public function getRootNode() { |
90 | return $this->root; |
91 | } |
92 | |
93 | /** |
94 | * Get the parent SerializerNode of a given SerializerNode |
95 | * |
96 | * @param SerializerNode $node |
97 | * @return SerializerNode |
98 | */ |
99 | public function getParentNode( SerializerNode $node ) { |
100 | return $this->nodes[$node->parentId]; |
101 | } |
102 | |
103 | /** |
104 | * Get the last child of a given SerializerNode |
105 | * |
106 | * @param SerializerNode $node |
107 | * @return SerializerNode|string|null |
108 | */ |
109 | public function getLastChild( SerializerNode $node ) { |
110 | $children = $node->children; |
111 | $lastChildIndex = count( $children ) - 1; |
112 | $lastChild = $lastChildIndex >= 0 ? $children[$lastChildIndex] : null; |
113 | return $lastChild; |
114 | } |
115 | |
116 | public function startDocument( $fragmentNamespace, $fragmentName ) { |
117 | $this->root = new SerializerNode( 0, 0, '', '', new PlainAttributes, false ); |
118 | $this->nodes = [ $this->root ]; |
119 | $this->isFragment = $fragmentNamespace !== null; |
120 | $this->result = $this->formatter->startDocument( $fragmentNamespace, $fragmentName ); |
121 | } |
122 | |
123 | public function endDocument( $pos ) { |
124 | if ( $this->isFragment ) { |
125 | $root = $this->root->children[0]; |
126 | } else { |
127 | $root = $this->root; |
128 | } |
129 | foreach ( $root->children as $childIndex => $child ) { |
130 | if ( is_string( $child ) ) { |
131 | $this->result .= $child; |
132 | } else { |
133 | $this->result .= $this->serializeNode( $root, $child, false ); |
134 | } |
135 | } |
136 | // @phan-suppress-next-line PhanTypeMismatchPropertyProbablyReal |
137 | $this->root = null; |
138 | $this->nodes = []; |
139 | } |
140 | |
141 | protected function interpretPlacement( $preposition, $refElement ) { |
142 | if ( $preposition === TreeBuilder::ROOT ) { |
143 | return [ $this->root, null ]; |
144 | } |
145 | if ( $refElement instanceof Element ) { |
146 | $refNode = $refElement->userData; |
147 | } elseif ( $refElement instanceof SerializerNode ) { |
148 | $refNode = $refElement; |
149 | } else { |
150 | throw new SerializerError( "Invalid type of ref element" ); |
151 | } |
152 | if ( $preposition === TreeBuilder::BEFORE ) { |
153 | return [ $this->nodes[$refNode->parentId], $refNode ]; |
154 | } else { |
155 | return [ $refNode, $refNode ]; |
156 | } |
157 | } |
158 | |
159 | public function characters( $preposition, $refElement, $text, $start, $length, |
160 | $sourceStart, $sourceLength |
161 | ) { |
162 | [ $parent, $refNode ] = $this->interpretPlacement( $preposition, $refElement ); |
163 | $encoded = (string)$this->formatter->characters( $parent, $text, $start, $length ); |
164 | |
165 | $children =& $parent->children; |
166 | $lastChildIndex = count( $children ) - 1; |
167 | $lastChild = $lastChildIndex >= 0 ? $children[$lastChildIndex] : null; |
168 | |
169 | if ( $preposition === TreeBuilder::BEFORE ) { |
170 | // Insert before element |
171 | if ( $lastChild !== $refNode ) { |
172 | $refIndex = array_search( $refNode, $children, true ); |
173 | throw new SerializerError( "invalid insert position $refIndex/$lastChildIndex" ); |
174 | } |
175 | $children[$lastChildIndex] = $encoded; |
176 | $children[$lastChildIndex + 1] = $refNode; |
177 | } else { |
178 | // Append to the list of children |
179 | if ( is_string( $lastChild ) ) { |
180 | $children[$lastChildIndex] .= $encoded; |
181 | } else { |
182 | $children[] = $encoded; |
183 | } |
184 | } |
185 | } |
186 | |
187 | /** |
188 | * Insert an element |
189 | * |
190 | * @param int $preposition |
191 | * @param Element|SerializerNode|null $refElement |
192 | * @param Element $element |
193 | * @param bool $void |
194 | * @param int $sourceStart |
195 | * @param int $sourceLength |
196 | */ |
197 | public function insertElement( $preposition, $refElement, Element $element, $void, |
198 | $sourceStart, $sourceLength |
199 | ) { |
200 | [ $parent, $refNode ] = $this->interpretPlacement( $preposition, $refElement ); |
201 | $children =& $parent->children; |
202 | $lastChildIndex = count( $children ) - 1; |
203 | $lastChild = $lastChildIndex >= 0 ? $children[$lastChildIndex] : null; |
204 | |
205 | if ( $element->userData ) { |
206 | // This element has already been inserted, this is a reparenting operation |
207 | $self = $element->userData; |
208 | '@phan-var SerializerNode $self'; /** @var SerializerNode $self */ |
209 | $oldParent = $this->nodes[$self->parentId]; |
210 | $oldChildren =& $oldParent->children; |
211 | $oldChildIndex = array_search( $self, $oldChildren, true ); |
212 | if ( $oldChildIndex === false ) { |
213 | throw new SerializerError( "cannot find node to reparent: " . |
214 | $element->getDebugTag() ); |
215 | } |
216 | // Remove from the old parent, update parent pointer |
217 | $oldChildren[$oldChildIndex] = ''; |
218 | $self->parentId = $parent->id; |
219 | } else { |
220 | // Inserting an element which has not been seen before |
221 | $id = $element->uid; |
222 | $self = new SerializerNode( $id, $parent->id, $element->namespace, |
223 | $element->name, $element->attrs, $void ); |
224 | $this->nodes[$id] = $element->userData = $self; |
225 | } |
226 | |
227 | if ( $preposition === TreeBuilder::BEFORE ) { |
228 | // Insert before element |
229 | if ( $lastChild !== $refNode ) { |
230 | $refIndex = array_search( $refNode, $children, true ); |
231 | throw new SerializerError( "invalid insert position $refIndex/$lastChildIndex" ); |
232 | } |
233 | $children[$lastChildIndex] = $self; |
234 | $children[$lastChildIndex + 1] = $refNode; |
235 | } else { |
236 | // Append to the list of children |
237 | $children[] = $self; |
238 | } |
239 | } |
240 | |
241 | public function endTag( Element $element, $sourceStart, $sourceLength ) { |
242 | if ( $element->htmlName === 'head' || $element->isVirtual ) { |
243 | // <head> elements are immortal |
244 | return; |
245 | } |
246 | $self = $element->userData; |
247 | $parent = $this->nodes[$self->parentId]; |
248 | $children =& $parent->children; |
249 | for ( $index = count( $children ) - 1; $index >= 0; $index-- ) { |
250 | if ( $children[$index] === $self ) { |
251 | $children[$index] = $this->serializeNode( $parent, $self, true ); |
252 | return; |
253 | } |
254 | } |
255 | // Ignore requests to end non-existent elements (this happens sometimes) |
256 | } |
257 | |
258 | /** |
259 | * Serialize a specific node |
260 | * |
261 | * @param SerializerNode $parent The parent of $node |
262 | * @param SerializerNode $node The node to serialize |
263 | * @param bool $destroy If true, the node and its descendants will be removed from $this->nodes |
264 | * @return string |
265 | */ |
266 | private function serializeNode( SerializerNode $parent, SerializerNode $node, $destroy ) { |
267 | if ( $node->void ) { |
268 | $contents = null; |
269 | } else { |
270 | $contents = ''; |
271 | foreach ( $node->children as $childIndex => $child ) { |
272 | if ( is_string( $child ) ) { |
273 | $contents .= $child; |
274 | } else { |
275 | $contents .= $this->serializeNode( $node, $child, $destroy ); |
276 | } |
277 | } |
278 | } |
279 | if ( $destroy ) { |
280 | unset( $this->nodes[$node->id] ); |
281 | } |
282 | return $this->formatter->element( $parent, $node, $contents ); |
283 | } |
284 | |
285 | public function doctype( $name, $public, $system, $quirks, $sourceStart, $sourceLength ) { |
286 | $this->result .= $this->formatter->doctype( $name, $public, $system ); |
287 | } |
288 | |
289 | public function comment( $preposition, $refElement, $text, $sourceStart, $sourceLength ) { |
290 | [ $parent, $refNode ] = $this->interpretPlacement( $preposition, $refElement ); |
291 | $encoded = $this->formatter->comment( $parent, $text ); |
292 | $children =& $parent->children; |
293 | $lastChildIndex = count( $children ) - 1; |
294 | $lastChild = $lastChildIndex >= 0 ? $children[$lastChildIndex] : null; |
295 | |
296 | if ( $preposition === TreeBuilder::BEFORE ) { |
297 | // Insert before element |
298 | if ( $lastChild !== $refNode ) { |
299 | throw new SerializerError( "invalid insert position" ); |
300 | } |
301 | $children[$lastChildIndex] = $encoded; |
302 | $children[$lastChildIndex + 1] = $refNode; |
303 | } else { |
304 | // Append to the list of children |
305 | if ( is_string( $lastChild ) ) { |
306 | $children[$lastChildIndex] .= $encoded; |
307 | } else { |
308 | $children[] = $encoded; |
309 | } |
310 | } |
311 | } |
312 | |
313 | public function error( $text, $pos ) { |
314 | if ( $this->errorCallback ) { |
315 | call_user_func( $this->errorCallback, $text, $pos ); |
316 | } |
317 | } |
318 | |
319 | public function mergeAttributes( Element $element, Attributes $attrs, $sourceStart ) { |
320 | $element->attrs->merge( $attrs ); |
321 | if ( $element->userData instanceof SerializerNode ) { |
322 | $element->userData->attrs = $element->attrs; |
323 | } |
324 | } |
325 | |
326 | public function removeNode( Element $element, $sourceStart ) { |
327 | $self = $element->userData; |
328 | $parent = $this->nodes[$self->parentId]; |
329 | $children =& $parent->children; |
330 | for ( $index = count( $children ) - 1; $index >= 0; $index-- ) { |
331 | if ( $children[$index] === $self ) { |
332 | $children[$index] = ''; |
333 | return; |
334 | } |
335 | } |
336 | throw new SerializerError( "cannot find element to remove" ); |
337 | } |
338 | |
339 | public function reparentChildren( Element $element, Element $newParent, $sourceStart ) { |
340 | $self = $element->userData; |
341 | $children = $self->children; |
342 | $self->children = []; |
343 | $this->insertElement( TreeBuilder::UNDER, $element, $newParent, false, $sourceStart, 0 ); |
344 | $newParentNode = $newParent->userData; |
345 | $newParentId = $newParentNode->id; |
346 | foreach ( $children as $child ) { |
347 | if ( is_object( $child ) ) { |
348 | $child->parentId = $newParentId; |
349 | } |
350 | } |
351 | $newParentNode->children = $children; |
352 | } |
353 | |
354 | /** |
355 | * Get a text representation of the current state of the serializer, for |
356 | * debugging. |
357 | * |
358 | * @return string |
359 | */ |
360 | public function dump() { |
361 | $s = $this->serializeNode( $this->root, $this->root, false ); |
362 | return substr( $s, 2, -3 ) . "\n"; |
363 | } |
364 | } |