1 <?php
3 namespace MediaWiki\Tidy;
5 use RemexHtml\HTMLData;
6 use RemexHtml\Serializer\Serializer;
7 use RemexHtml\Serializer\SerializerNode;
8 use RemexHtml\Tokenizer\Attributes;
9 use RemexHtml\Tokenizer\PlainAttributes;
10 use RemexHtml\TreeBuilder\TreeBuilder;
11 use RemexHtml\TreeBuilder\TreeHandler;
12 use RemexHtml\TreeBuilder\Element;
17 class RemexCompatMunger implements TreeHandler {
18  private static $onlyInlineElements = [
19  "a" => true,
20  "abbr" => true,
21  "acronym" => true,
22  "applet" => true,
23  "b" => true,
24  "basefont" => true,
25  "bdo" => true,
26  "big" => true,
27  "br" => true,
28  "button" => true,
29  "cite" => true,
30  "code" => true,
31  "dfn" => true,
32  "em" => true,
33  "font" => true,
34  "i" => true,
35  "iframe" => true,
36  "img" => true,
37  "input" => true,
38  "kbd" => true,
39  "label" => true,
40  "legend" => true,
41  "map" => true,
42  "object" => true,
43  "param" => true,
44  "q" => true,
45  "rb" => true,
46  "rbc" => true,
47  "rp" => true,
48  "rt" => true,
49  "rtc" => true,
50  "ruby" => true,
51  "s" => true,
52  "samp" => true,
53  "select" => true,
54  "small" => true,
55  "span" => true,
56  "strike" => true,
57  "strong" => true,
58  "sub" => true,
59  "sup" => true,
60  "textarea" => true,
61  "tt" => true,
62  "u" => true,
63  "var" => true,
64  ];
66  private static $formattingElements = [
67  'a' => true,
68  'b' => true,
69  'big' => true,
70  'code' => true,
71  'em' => true,
72  'font' => true,
73  'i' => true,
74  'nobr' => true,
75  's' => true,
76  'small' => true,
77  'strike' => true,
78  'strong' => true,
79  'tt' => true,
80  'u' => true,
81  ];
86  public function __construct( Serializer $serializer ) {
87  $this->serializer = $serializer;
88  }
90  public function startDocument( $fragmentNamespace, $fragmentName ) {
91  $this->serializer->startDocument( $fragmentNamespace, $fragmentName );
92  $root = $this->serializer->getRootNode();
93  $root->snData = new RemexMungerData;
94  $root->snData->needsPWrapping = true;
95  }
97  public function endDocument( $pos ) {
98  $this->serializer->endDocument( $pos );
99  }
101  private function getParentForInsert( $preposition, $refElement ) {
102  if ( $preposition === TreeBuilder::ROOT ) {
103  return [ $this->serializer->getRootNode(), null ];
104  } elseif ( $preposition === TreeBuilder::BEFORE ) {
105  $refNode = $refElement->userData;
106  return [ $this->serializer->getParentNode( $refNode ), $refNode ];
107  } else {
108  $refNode = $refElement->userData;
109  $refData = $refNode->snData;
110  if ( $refData->currentCloneElement ) {
111  // Follow a chain of clone links if necessary
112  $origRefData = $refData;
113  while ( $refData->currentCloneElement ) {
114  $refElement = $refData->currentCloneElement;
115  $refNode = $refElement->userData;
116  $refData = $refNode->snData;
117  }
118  // Cache the end of the chain in the requested element
119  $origRefData->currentCloneElement = $refElement;
120  } elseif ( $refData->childPElement ) {
121  $refElement = $refData->childPElement;
122  $refNode = $refElement->userData;
123  }
124  return [ $refNode, $refNode ];
125  }
126  }
135  private function insertPWrapper( SerializerNode $parent, $sourceStart ) {
136  $pWrap = new Element( HTMLData::NS_HTML, 'mw:p-wrap', new PlainAttributes );
137  $this->serializer->insertElement( TreeBuilder::UNDER, $parent, $pWrap, false,
138  $sourceStart, 0 );
139  $data = new RemexMungerData;
140  $data->isPWrapper = true;
141  $data->wrapBaseNode = $parent;
142  $pWrap->userData->snData = $data;
143  $parent->snData->childPElement = $pWrap;
144  return $pWrap->userData;
145  }
147  public function characters( $preposition, $refElement, $text, $start, $length,
148  $sourceStart, $sourceLength
149  ) {
150  $isBlank = strspn( $text, "\t\n\f\r ", $start, $length ) === $length;
152  list( $parent, $refNode ) = $this->getParentForInsert( $preposition, $refElement );
153  $parentData = $parent->snData;
155  if ( $preposition === TreeBuilder::UNDER ) {
156  if ( $parentData->needsPWrapping && !$isBlank ) {
157  // Add a p-wrapper for bare text under body/blockquote
158  $refNode = $this->insertPWrapper( $refNode, $sourceStart );
159  $parent = $refNode;
160  $parentData = $parent->snData;
161  } elseif ( $parentData->isSplittable && !$parentData->ancestorPNode ) {
162  // The parent is splittable and in block mode, so split the tag stack
163  $refNode = $this->splitTagStack( $refNode, true, $sourceStart );
164  $parent = $refNode;
165  $parentData = $parent->snData;
166  }
167  }
169  if ( !$isBlank ) {
170  // Non-whitespace characters detected
171  $parentData->nonblankNodeCount++;
172  }
173  $this->serializer->characters( $preposition, $refNode, $text, $start,
174  $length, $sourceStart, $sourceLength );
175  }
230  public function insertElement( $preposition, $refElement, Element $element, $void,
231  $sourceStart, $sourceLength
232  ) {
233  list( $parent, $newRef ) = $this->getParentForInsert( $preposition, $refElement );
234  $parentData = $parent->snData;
235  $parentNs = $parent->namespace;
236  $parentName = $parent->name;
237  $elementName = $element->htmlName;
239  $inline = isset( self::$onlyInlineElements[$elementName] );
240  $under = $preposition === TreeBuilder::UNDER;
242  if ( $under && $parentData->isPWrapper && !$inline ) {
243  // [B/b] The element is non-inline and the parent is a p-wrapper,
244  // close the parent and insert into its parent instead
245  $newParent = $this->serializer->getParentNode( $parent );
246  $parent = $newParent;
247  $parentData = $parent->snData;
248  $pElement = $parentData->childPElement;
249  $parentData->childPElement = null;
250  $newRef = $refElement->userData;
251  $this->endTag( $pElement, $sourceStart, 0 );
252  } elseif ( $under && $parentData->isSplittable
253  && (bool)$parentData->ancestorPNode !== $inline
254  ) {
255  // [CS/b, DS/i] The parent is splittable and the current element is
256  // inline in block context, or if the current element is a block
257  // under a p-wrapper, split the tag stack.
258  $newRef = $this->splitTagStack( $newRef, $inline, $sourceStart );
259  $parent = $newRef;
260  $parentData = $parent->snData;
261  } elseif ( $under && $parentData->needsPWrapping && $inline ) {
262  // [A/i] If the element is inline and we are in body/blockquote,
263  // we need to create a p-wrapper
264  $newRef = $this->insertPWrapper( $newRef, $sourceStart );
265  $parent = $newRef;
266  $parentData = $parent->snData;
267  } elseif ( $parentData->ancestorPNode && !$inline ) {
268  // [CU/b] If the element is non-inline and (despite attempting to
269  // split above) there is still an ancestor p-wrap, disable that
270  // p-wrap
271  $this->disablePWrapper( $parent, $sourceStart );
272  }
273  // else [A/b, B/i, C/i, D/b, DU/i] insert as normal
275  // An element with element children is a non-blank element
276  $parentData->nonblankNodeCount++;
278  // Insert the element downstream and so initialise its userData
279  $this->serializer->insertElement( $preposition, $newRef,
280  $element, $void, $sourceStart, $sourceLength );
282  // Initialise snData
283  if ( !$element->userData->snData ) {
284  $elementData = $element->userData->snData = new RemexMungerData;
285  } else {
286  $elementData = $element->userData->snData;
287  }
288  if ( ( $parentData->isPWrapper || $parentData->isSplittable )
289  && isset( self::$formattingElements[$elementName] )
290  ) {
291  $elementData->isSplittable = true;
292  }
293  if ( $parentData->isPWrapper ) {
294  $elementData->ancestorPNode = $parent;
295  } elseif ( $parentData->ancestorPNode ) {
296  $elementData->ancestorPNode = $parentData->ancestorPNode;
297  }
298  if ( $parentData->wrapBaseNode ) {
299  $elementData->wrapBaseNode = $parentData->wrapBaseNode;
300  } elseif ( $parentData->needsPWrapping ) {
301  $elementData->wrapBaseNode = $parent;
302  }
303  if ( $elementName === 'body'
304  || $elementName === 'blockquote'
305  || $elementName === 'html'
306  ) {
307  $elementData->needsPWrapping = true;
308  }
309  }
319  private function splitTagStack( SerializerNode $parentNode, $inline, $pos ) {
320  $parentData = $parentNode->snData;
321  $wrapBase = $parentData->wrapBaseNode;
322  $pWrap = $parentData->ancestorPNode;
323  if ( !$pWrap ) {
324  $cloneEnd = $wrapBase;
325  } else {
326  $cloneEnd = $parentData->ancestorPNode;
327  }
329  $serializer = $this->serializer;
330  $node = $parentNode;
331  $root = $serializer->getRootNode();
332  $nodes = [];
333  $removableNodes = [];
334  $haveContent = false;
335  while ( $node !== $cloneEnd ) {
336  $nextParent = $serializer->getParentNode( $node );
337  if ( $nextParent === $root ) {
338  throw new \Exception( 'Did not find end of clone range' );
339  }
340  $nodes[] = $node;
341  if ( $node->snData->nonblankNodeCount === 0 ) {
342  $removableNodes[] = $node;
343  $nextParent->snData->nonblankNodeCount--;
344  }
345  $node = $nextParent;
346  }
348  if ( $inline ) {
349  $pWrap = $this->insertPWrapper( $wrapBase, $pos );
350  $node = $pWrap;
351  } else {
352  if ( $pWrap ) {
353  // End the p-wrap which was open, cancel the diversion
354  $wrapBase->snData->childPElement = null;
355  }
356  $pWrap = null;
357  $node = $wrapBase;
358  }
360  for ( $i = count( $nodes ) - 1; $i >= 0; $i-- ) {
361  $oldNode = $nodes[$i];
362  $oldData = $oldNode->snData;
363  $nodeParent = $node;
364  $element = new Element( $oldNode->namespace, $oldNode->name, $oldNode->attrs );
365  $this->serializer->insertElement( TreeBuilder::UNDER, $nodeParent,
366  $element, false, $pos, 0 );
367  $oldData->currentCloneElement = $element;
369  $newNode = $element->userData;
370  $newData = $newNode->snData = new RemexMungerData;
371  if ( $pWrap ) {
372  $newData->ancestorPNode = $pWrap;
373  }
374  $newData->isSplittable = true;
375  $newData->wrapBaseNode = $wrapBase;
376  $newData->isPWrapper = $oldData->isPWrapper;
378  $nodeParent->snData->nonblankNodeCount++;
380  $node = $newNode;
381  }
382  foreach ( $removableNodes as $rNode ) {
383  $fakeElement = new Element( $rNode->namespace, $rNode->name, $rNode->attrs );
384  $fakeElement->userData = $rNode;
385  $this->serializer->removeNode( $fakeElement, $pos );
386  }
387  return $node;
388  }
394  private function disablePWrapper( SerializerNode $node, $sourceStart ) {
395  $nodeData = $node->snData;
396  $pWrapNode = $nodeData->ancestorPNode;
397  $newParent = $this->serializer->getParentNode( $pWrapNode );
398  if ( $pWrapNode !== $this->serializer->getLastChild( $newParent ) ) {
399  // Fostering or something? Abort!
400  return;
401  }
403  $nextParent = $node;
404  do {
405  $victim = $nextParent;
406  $victim->snData->ancestorPNode = null;
407  $nextParent = $this->serializer->getParentNode( $victim );
408  } while ( $nextParent !== $pWrapNode );
410  // Make a fake Element to use in a reparenting operation
411  $victimElement = new Element( $victim->namespace, $victim->name, $victim->attrs );
412  $victimElement->userData = $victim;
414  // Reparent
415  $this->serializer->insertElement( TreeBuilder::UNDER, $newParent, $victimElement,
416  false, $sourceStart, 0 );
418  // Decrement nonblank node count
419  $pWrapNode->snData->nonblankNodeCount--;
421  // Cancel the diversion so that no more elements are inserted under this p-wrap
422  $newParent->snData->childPElement = null;
423  }
425  public function endTag( Element $element, $sourceStart, $sourceLength ) {
426  $data = $element->userData->snData;
427  if ( $data->childPElement ) {
428  $this->endTag( $data->childPElement, $sourceStart, 0 );
429  }
430  $this->serializer->endTag( $element, $sourceStart, $sourceLength );
431  $element->userData->snData = null;
432  $element->userData = null;
433  }
435  public function doctype( $name, $public, $system, $quirks, $sourceStart, $sourceLength ) {
436  $this->serializer->doctype( $name, $public, $system, $quirks,
437  $sourceStart, $sourceLength );
438  }
440  public function comment( $preposition, $refElement, $text, $sourceStart, $sourceLength ) {
441  list( $parent, $refNode ) = $this->getParentForInsert( $preposition, $refElement );
442  $this->serializer->comment( $preposition, $refNode, $text,
443  $sourceStart, $sourceLength );
444  }
446  public function error( $text, $pos ) {
447  $this->serializer->error( $text, $pos );
448  }
450  public function mergeAttributes( Element $element, Attributes $attrs, $sourceStart ) {
451  $this->serializer->mergeAttributes( $element, $attrs, $sourceStart );
452  }
454  public function removeNode( Element $element, $sourceStart ) {
455  $this->serializer->removeNode( $element, $sourceStart );
456  }
458  public function reparentChildren( Element $element, Element $newParent, $sourceStart ) {
459  $self = $element->userData;
460  $children = $self->children;
461  $self->children = [];
462  $this->insertElement( TreeBuilder::UNDER, $element, $newParent, false, $sourceStart, 0 );
463  $newParentNode = $newParent->userData;
464  $newParentId = $newParentNode->id;
465  foreach ( $children as $child ) {
466  if ( is_object( $child ) ) {
467  $child->parentId = $newParentId;
468  }
469  }
470  $newParentNode->children = $children;
471  }
472 }
