MediaWiki REL1_31
RemexCompatMunger.php
Go to the documentation of this file.
1<?php
2
3namespace MediaWiki\Tidy;
4
5use RemexHtml\HTMLData;
6use RemexHtml\Serializer\Serializer;
7use RemexHtml\Serializer\SerializerNode;
8use RemexHtml\Tokenizer\Attributes;
9use RemexHtml\Tokenizer\PlainAttributes;
10use RemexHtml\TreeBuilder\TreeBuilder;
11use RemexHtml\TreeBuilder\TreeHandler;
12use RemexHtml\TreeBuilder\Element;
13
17class RemexCompatMunger implements TreeHandler {
18 private static $onlyInlineElements = [
19 "a" => true,
20 "abbr" => true,
21 "acronym" => true,
22 "applet" => true,
23 "b" => true,
24 "basefont" => true,
25 "bdo" => true,
26 "big" => true,
27 "br" => true,
28 "button" => true,
29 "cite" => true,
30 "code" => true,
31 "del" => true,
32 "dfn" => true,
33 "em" => true,
34 "font" => true,
35 "i" => true,
36 "iframe" => true,
37 "img" => true,
38 "input" => true,
39 "ins" => true,
40 "kbd" => true,
41 "label" => true,
42 "legend" => true,
43 "map" => true,
44 "object" => true,
45 "param" => true,
46 "q" => true,
47 "rb" => true,
48 "rbc" => true,
49 "rp" => true,
50 "rt" => true,
51 "rtc" => true,
52 "ruby" => true,
53 "s" => true,
54 "samp" => true,
55 "select" => true,
56 "small" => true,
57 "span" => true,
58 "strike" => true,
59 "strong" => true,
60 "sub" => true,
61 "sup" => true,
62 "textarea" => true,
63 "tt" => true,
64 "u" => true,
65 "var" => true,
66 // Those defined in tidy.conf
67 "video" => true,
68 "audio" => true,
69 "bdi" => true,
70 "data" => true,
71 "time" => true,
72 "mark" => true,
73 ];
74
75 private static $formattingElements = [
76 'a' => true,
77 'b' => true,
78 'big' => true,
79 'code' => true,
80 'em' => true,
81 'font' => true,
82 'i' => true,
83 'nobr' => true,
84 's' => true,
85 'small' => true,
86 'strike' => true,
87 'strong' => true,
88 'tt' => true,
89 'u' => true,
90 ];
91
95 public function __construct( Serializer $serializer ) {
96 $this->serializer = $serializer;
97 }
98
99 public function startDocument( $fragmentNamespace, $fragmentName ) {
100 $this->serializer->startDocument( $fragmentNamespace, $fragmentName );
101 $root = $this->serializer->getRootNode();
102 $root->snData = new RemexMungerData;
103 $root->snData->needsPWrapping = true;
104 }
105
106 public function endDocument( $pos ) {
107 $this->serializer->endDocument( $pos );
108 }
109
110 private function getParentForInsert( $preposition, $refElement ) {
111 if ( $preposition === TreeBuilder::ROOT ) {
112 return [ $this->serializer->getRootNode(), null ];
113 } elseif ( $preposition === TreeBuilder::BEFORE ) {
114 $refNode = $refElement->userData;
115 return [ $this->serializer->getParentNode( $refNode ), $refNode ];
116 } else {
117 $refNode = $refElement->userData;
118 $refData = $refNode->snData;
119 if ( $refData->currentCloneElement ) {
120 // Follow a chain of clone links if necessary
121 $origRefData = $refData;
122 while ( $refData->currentCloneElement ) {
123 $refElement = $refData->currentCloneElement;
124 $refNode = $refElement->userData;
125 $refData = $refNode->snData;
126 }
127 // Cache the end of the chain in the requested element
128 $origRefData->currentCloneElement = $refElement;
129 } elseif ( $refData->childPElement ) {
130 $refElement = $refData->childPElement;
131 $refNode = $refElement->userData;
132 }
133 return [ $refNode, $refNode ];
134 }
135 }
136
144 private function insertPWrapper( SerializerNode $parent, $sourceStart ) {
145 $pWrap = new Element( HTMLData::NS_HTML, 'mw:p-wrap', new PlainAttributes );
146 $this->serializer->insertElement( TreeBuilder::UNDER, $parent, $pWrap, false,
147 $sourceStart, 0 );
148 $data = new RemexMungerData;
149 $data->isPWrapper = true;
150 $data->wrapBaseNode = $parent;
151 $pWrap->userData->snData = $data;
152 $parent->snData->childPElement = $pWrap;
153 return $pWrap->userData;
154 }
155
156 public function characters( $preposition, $refElement, $text, $start, $length,
157 $sourceStart, $sourceLength
158 ) {
159 $isBlank = strspn( $text, "\t\n\f\r ", $start, $length ) === $length;
160
161 list( $parent, $refNode ) = $this->getParentForInsert( $preposition, $refElement );
162 $parentData = $parent->snData;
163
164 if ( $preposition === TreeBuilder::UNDER ) {
165 if ( $parentData->needsPWrapping && !$isBlank ) {
166 // Add a p-wrapper for bare text under body/blockquote
167 $refNode = $this->insertPWrapper( $refNode, $sourceStart );
168 $parent = $refNode;
169 $parentData = $parent->snData;
170 } elseif ( $parentData->isSplittable && !$parentData->ancestorPNode ) {
171 // The parent is splittable and in block mode, so split the tag stack
172 $refNode = $this->splitTagStack( $refNode, true, $sourceStart );
173 $parent = $refNode;
174 $parentData = $parent->snData;
175 }
176 }
177
178 if ( !$isBlank ) {
179 // Non-whitespace characters detected
180 $parentData->nonblankNodeCount++;
181 }
182 $this->serializer->characters( $preposition, $refNode, $text, $start,
183 $length, $sourceStart, $sourceLength );
184 }
185
186 private function trace( $msg ) {
187 // echo "[RCM] $msg\n";
188 }
189
243 public function insertElement( $preposition, $refElement, Element $element, $void,
244 $sourceStart, $sourceLength
245 ) {
246 list( $parent, $newRef ) = $this->getParentForInsert( $preposition, $refElement );
247 $parentData = $parent->snData;
248 $parentNs = $parent->namespace;
249 $parentName = $parent->name;
250 $elementName = $element->htmlName;
251
252 $inline = isset( self::$onlyInlineElements[$elementName] );
253 $under = $preposition === TreeBuilder::UNDER;
254
255 if ( $under && $parentData->isPWrapper && !$inline ) {
256 // [B/b] The element is non-inline and the parent is a p-wrapper,
257 // close the parent and insert into its parent instead
258 $this->trace( 'insert B/b' );
259 $newParent = $this->serializer->getParentNode( $parent );
260 $parent = $newParent;
261 $parentData = $parent->snData;
262 $pElement = $parentData->childPElement;
263 $parentData->childPElement = null;
264 $newRef = $refElement->userData;
265 } elseif ( $under && $parentData->isSplittable
266 && (bool)$parentData->ancestorPNode !== $inline
267 ) {
268 // [CS/b, DS/i] The parent is splittable and the current element is
269 // inline in block context, or if the current element is a block
270 // under a p-wrapper, split the tag stack.
271 $this->trace( $inline ? 'insert DS/i' : 'insert CS/b' );
272 $newRef = $this->splitTagStack( $newRef, $inline, $sourceStart );
273 $parent = $newRef;
274 $parentData = $parent->snData;
275 } elseif ( $under && $parentData->needsPWrapping && $inline ) {
276 // [A/i] If the element is inline and we are in body/blockquote,
277 // we need to create a p-wrapper
278 $this->trace( 'insert A/i' );
279 $newRef = $this->insertPWrapper( $newRef, $sourceStart );
280 $parent = $newRef;
281 $parentData = $parent->snData;
282 } elseif ( $parentData->ancestorPNode && !$inline ) {
283 // [CU/b] If the element is non-inline and (despite attempting to
284 // split above) there is still an ancestor p-wrap, disable that
285 // p-wrap
286 $this->trace( 'insert CU/b' );
287 $this->disablePWrapper( $parent, $sourceStart );
288 } else {
289 // [A/b, B/i, C/i, D/b, DU/i] insert as normal
290 $this->trace( 'insert normal' );
291 }
292
293 // An element with element children is a non-blank element
294 $parentData->nonblankNodeCount++;
295
296 // Insert the element downstream and so initialise its userData
297 $this->serializer->insertElement( $preposition, $newRef,
298 $element, $void, $sourceStart, $sourceLength );
299
300 // Initialise snData
301 if ( !$element->userData->snData ) {
302 $elementData = $element->userData->snData = new RemexMungerData;
303 } else {
304 $elementData = $element->userData->snData;
305 }
306 if ( ( $parentData->isPWrapper || $parentData->isSplittable )
307 && isset( self::$formattingElements[$elementName] )
308 ) {
309 $elementData->isSplittable = true;
310 }
311 if ( $parentData->isPWrapper ) {
312 $elementData->ancestorPNode = $parent;
313 } elseif ( $parentData->ancestorPNode ) {
314 $elementData->ancestorPNode = $parentData->ancestorPNode;
315 }
316 if ( $parentData->wrapBaseNode ) {
317 $elementData->wrapBaseNode = $parentData->wrapBaseNode;
318 } elseif ( $parentData->needsPWrapping ) {
319 $elementData->wrapBaseNode = $parent;
320 }
321 if ( $elementName === 'body'
322 || $elementName === 'blockquote'
323 || $elementName === 'html'
324 ) {
325 $elementData->needsPWrapping = true;
326 }
327 }
328
337 private function splitTagStack( SerializerNode $parentNode, $inline, $pos ) {
338 $parentData = $parentNode->snData;
339 $wrapBase = $parentData->wrapBaseNode;
340 $pWrap = $parentData->ancestorPNode;
341 if ( !$pWrap ) {
342 $cloneEnd = $wrapBase;
343 } else {
344 $cloneEnd = $parentData->ancestorPNode;
345 }
346
347 $serializer = $this->serializer;
348 $node = $parentNode;
349 $root = $serializer->getRootNode();
350 $nodes = [];
351 $removableNodes = [];
352 $haveContent = false;
353 while ( $node !== $cloneEnd ) {
354 $nextParent = $serializer->getParentNode( $node );
355 if ( $nextParent === $root ) {
356 throw new \Exception( 'Did not find end of clone range' );
357 }
358 $nodes[] = $node;
359 if ( $node->snData->nonblankNodeCount === 0 ) {
360 $removableNodes[] = $node;
361 $nextParent->snData->nonblankNodeCount--;
362 }
363 $node = $nextParent;
364 }
365
366 if ( $inline ) {
367 $pWrap = $this->insertPWrapper( $wrapBase, $pos );
368 $node = $pWrap;
369 } else {
370 if ( $pWrap ) {
371 // End the p-wrap which was open, cancel the diversion
372 $wrapBase->snData->childPElement = null;
373 }
374 $pWrap = null;
375 $node = $wrapBase;
376 }
377
378 for ( $i = count( $nodes ) - 1; $i >= 0; $i-- ) {
379 $oldNode = $nodes[$i];
380 $oldData = $oldNode->snData;
381 $nodeParent = $node;
382 $element = new Element( $oldNode->namespace, $oldNode->name, $oldNode->attrs );
383 $this->serializer->insertElement( TreeBuilder::UNDER, $nodeParent,
384 $element, false, $pos, 0 );
385 $oldData->currentCloneElement = $element;
386
387 $newNode = $element->userData;
388 $newData = $newNode->snData = new RemexMungerData;
389 if ( $pWrap ) {
390 $newData->ancestorPNode = $pWrap;
391 }
392 $newData->isSplittable = true;
393 $newData->wrapBaseNode = $wrapBase;
394 $newData->isPWrapper = $oldData->isPWrapper;
395
396 $nodeParent->snData->nonblankNodeCount++;
397
398 $node = $newNode;
399 }
400 foreach ( $removableNodes as $rNode ) {
401 $fakeElement = new Element( $rNode->namespace, $rNode->name, $rNode->attrs );
402 $fakeElement->userData = $rNode;
403 $this->serializer->removeNode( $fakeElement, $pos );
404 }
405 return $node;
406 }
407
412 private function disablePWrapper( SerializerNode $node, $sourceStart ) {
413 $nodeData = $node->snData;
414 $pWrapNode = $nodeData->ancestorPNode;
415 $newParent = $this->serializer->getParentNode( $pWrapNode );
416 if ( $pWrapNode !== $this->serializer->getLastChild( $newParent ) ) {
417 // Fostering or something? Abort!
418 return;
419 }
420
421 $nextParent = $node;
422 do {
423 $victim = $nextParent;
424 $victim->snData->ancestorPNode = null;
425 $nextParent = $this->serializer->getParentNode( $victim );
426 } while ( $nextParent !== $pWrapNode );
427
428 // Make a fake Element to use in a reparenting operation
429 $victimElement = new Element( $victim->namespace, $victim->name, $victim->attrs );
430 $victimElement->userData = $victim;
431
432 // Reparent
433 $this->serializer->insertElement( TreeBuilder::UNDER, $newParent, $victimElement,
434 false, $sourceStart, 0 );
435
436 // Decrement nonblank node count
437 $pWrapNode->snData->nonblankNodeCount--;
438
439 // Cancel the diversion so that no more elements are inserted under this p-wrap
440 $newParent->snData->childPElement = null;
441 }
442
443 public function endTag( Element $element, $sourceStart, $sourceLength ) {
444 $data = $element->userData->snData;
445 if ( $data->childPElement ) {
446 $this->endTag( $data->childPElement, $sourceStart, 0 );
447 }
448 $this->serializer->endTag( $element, $sourceStart, $sourceLength );
449 $element->userData->snData = null;
450 $element->userData = null;
451 }
452
453 public function doctype( $name, $public, $system, $quirks, $sourceStart, $sourceLength ) {
454 $this->serializer->doctype( $name, $public, $system, $quirks,
455 $sourceStart, $sourceLength );
456 }
457
458 public function comment( $preposition, $refElement, $text, $sourceStart, $sourceLength ) {
459 list( $parent, $refNode ) = $this->getParentForInsert( $preposition, $refElement );
460 $this->serializer->comment( $preposition, $refNode, $text,
461 $sourceStart, $sourceLength );
462 }
463
464 public function error( $text, $pos ) {
465 $this->serializer->error( $text, $pos );
466 }
467
468 public function mergeAttributes( Element $element, Attributes $attrs, $sourceStart ) {
469 $this->serializer->mergeAttributes( $element, $attrs, $sourceStart );
470 }
471
472 public function removeNode( Element $element, $sourceStart ) {
473 $this->serializer->removeNode( $element, $sourceStart );
474 }
475
476 public function reparentChildren( Element $element, Element $newParent, $sourceStart ) {
477 $self = $element->userData;
478 if ( $self->snData->childPElement ) {
479 // Reparent under the p-wrapper instead, so that e.g.
480 // <blockquote><mw:p-wrap>...</mw:p-wrap></blockquote>
481 // becomes
482 // <blockquote><mw:p-wrap><i>...</i></mw:p-wrap></blockquote>
483
484 // The formatting element should not be the parent of the p-wrap.
485 // Without this special case, the insertElement() of the <i> below
486 // would be diverted into the p-wrapper, causing infinite recursion
487 // (T178632)
488 $this->reparentChildren( $self->snData->childPElement, $newParent, $sourceStart );
489 return;
490 }
491
492 $children = $self->children;
493 $self->children = [];
494 $this->insertElement( TreeBuilder::UNDER, $element, $newParent, false, $sourceStart, 0 );
495 $newParentNode = $newParent->userData;
496 $newParentId = $newParentNode->id;
497 foreach ( $children as $child ) {
498 if ( is_object( $child ) ) {
499 $this->trace( "reparent <{$child->name}>" );
500 $child->parentId = $newParentId;
501 }
502 }
503 $newParentNode->children = $children;
504 }
505}
disablePWrapper(SerializerNode $node, $sourceStart)
Find the ancestor of $node which is a child of a p-wrapper, and reparent that node so that it is plac...
characters( $preposition, $refElement, $text, $start, $length, $sourceStart, $sourceLength)
mergeAttributes(Element $element, Attributes $attrs, $sourceStart)
doctype( $name, $public, $system, $quirks, $sourceStart, $sourceLength)
insertElement( $preposition, $refElement, Element $element, $void, $sourceStart, $sourceLength)
Insert or reparent an element.
removeNode(Element $element, $sourceStart)
getParentForInsert( $preposition, $refElement)
splitTagStack(SerializerNode $parentNode, $inline, $pos)
Clone nodes in a stack range and return the new parent.
__construct(Serializer $serializer)
insertPWrapper(SerializerNode $parent, $sourceStart)
Insert a p-wrapper.
comment( $preposition, $refElement, $text, $sourceStart, $sourceLength)
startDocument( $fragmentNamespace, $fragmentName)
reparentChildren(Element $element, Element $newParent, $sourceStart)
endTag(Element $element, $sourceStart, $sourceLength)
deferred txt A few of the database updates required by various functions here can be deferred until after the result page is displayed to the user For updating the view updating the linked to tables after a etc PHP does not yet have any way to tell the server to actually return and disconnect while still running these but it might have such a feature in the future We handle these by creating a deferred update object and putting those objects on a global list
Definition deferred.txt:11