MediaWiki  1.34.0
RemexCompatMunger.php
Go to the documentation of this file.
1 <?php
2 
3 namespace MediaWiki\Tidy;
4 
5 use RemexHtml\HTMLData;
6 use RemexHtml\Serializer\Serializer;
7 use RemexHtml\Serializer\SerializerNode;
8 use RemexHtml\Tokenizer\Attributes;
9 use RemexHtml\Tokenizer\PlainAttributes;
10 use RemexHtml\TreeBuilder\TreeBuilder;
11 use RemexHtml\TreeBuilder\TreeHandler;
12 use RemexHtml\TreeBuilder\Element;
13 
17 class RemexCompatMunger implements TreeHandler {
18  private static $onlyInlineElements = [
19  "a" => true,
20  "abbr" => true,
21  "acronym" => true,
22  "applet" => true,
23  "b" => true,
24  "basefont" => true,
25  "bdo" => true,
26  "big" => true,
27  "br" => true,
28  "button" => true,
29  "cite" => true,
30  "code" => true,
31  "del" => true,
32  "dfn" => true,
33  "em" => true,
34  "font" => true,
35  "i" => true,
36  "iframe" => true,
37  "img" => true,
38  "input" => true,
39  "ins" => true,
40  "kbd" => true,
41  "label" => true,
42  "legend" => true,
43  "map" => true,
44  "object" => true,
45  "param" => true,
46  "q" => true,
47  "rb" => true,
48  "rbc" => true,
49  "rp" => true,
50  "rt" => true,
51  "rtc" => true,
52  "ruby" => true,
53  "s" => true,
54  "samp" => true,
55  "select" => true,
56  "small" => true,
57  "span" => true,
58  "strike" => true,
59  "strong" => true,
60  "sub" => true,
61  "sup" => true,
62  "textarea" => true,
63  "tt" => true,
64  "u" => true,
65  "var" => true,
66  // Those defined in tidy.conf
67  "video" => true,
68  "audio" => true,
69  "bdi" => true,
70  "data" => true,
71  "time" => true,
72  "mark" => true,
73  ];
74 
83  private static $metadataElements = [
84  'style' => true,
85  'script' => true,
86  'link' => true,
87  'meta' => true,
88  ];
89 
90  private static $formattingElements = [
91  'a' => true,
92  'b' => true,
93  'big' => true,
94  'code' => true,
95  'em' => true,
96  'font' => true,
97  'i' => true,
98  'nobr' => true,
99  's' => true,
100  'small' => true,
101  'strike' => true,
102  'strong' => true,
103  'tt' => true,
104  'u' => true,
105  ];
106 
108  private $serializer;
109 
111  private $trace;
112 
117  public function __construct( Serializer $serializer, $trace = false ) {
118  $this->serializer = $serializer;
119  $this->trace = $trace;
120  }
121 
122  public function startDocument( $fragmentNamespace, $fragmentName ) {
123  $this->serializer->startDocument( $fragmentNamespace, $fragmentName );
124  $root = $this->serializer->getRootNode();
125  $root->snData = new RemexMungerData;
126  $root->snData->needsPWrapping = true;
127  }
128 
129  public function endDocument( $pos ) {
130  $this->serializer->endDocument( $pos );
131  }
132 
133  private function getParentForInsert( $preposition, $refElement ) {
134  if ( $preposition === TreeBuilder::ROOT ) {
135  return [ $this->serializer->getRootNode(), null ];
136  } elseif ( $preposition === TreeBuilder::BEFORE ) {
137  $refNode = $refElement->userData;
138  return [ $this->serializer->getParentNode( $refNode ), $refNode ];
139  } else {
140  $refNode = $refElement->userData;
141  $refData = $refNode->snData;
142  if ( $refData->currentCloneElement ) {
143  // Follow a chain of clone links if necessary
144  $origRefData = $refData;
145  while ( $refData->currentCloneElement ) {
146  $refElement = $refData->currentCloneElement;
147  $refNode = $refElement->userData;
148  $refData = $refNode->snData;
149  }
150  // Cache the end of the chain in the requested element
151  $origRefData->currentCloneElement = $refElement;
152  } elseif ( $refData->childPElement ) {
153  $refElement = $refData->childPElement;
154  $refNode = $refElement->userData;
155  }
156  return [ $refNode, $refNode ];
157  }
158  }
159 
167  private function insertPWrapper( SerializerNode $parent, $sourceStart ) {
168  $pWrap = new Element( HTMLData::NS_HTML, 'mw:p-wrap', new PlainAttributes );
169  $this->serializer->insertElement( TreeBuilder::UNDER, $parent, $pWrap, false,
170  $sourceStart, 0 );
171  $data = new RemexMungerData;
172  $data->isPWrapper = true;
173  $data->wrapBaseNode = $parent;
174  $pWrap->userData->snData = $data;
175  $parent->snData->childPElement = $pWrap;
176  return $pWrap->userData;
177  }
178 
179  public function characters( $preposition, $refElement, $text, $start, $length,
180  $sourceStart, $sourceLength
181  ) {
182  $isBlank = strspn( $text, "\t\n\f\r ", $start, $length ) === $length;
183 
184  list( $parent, $refNode ) = $this->getParentForInsert( $preposition, $refElement );
185  $parentData = $parent->snData;
186 
187  if ( $preposition === TreeBuilder::UNDER ) {
188  if ( $parentData->needsPWrapping && !$isBlank ) {
189  // Add a p-wrapper for bare text under body/blockquote
190  $refNode = $this->insertPWrapper( $refNode, $sourceStart );
191  $parent = $refNode;
192  $parentData = $parent->snData;
193  } elseif ( $parentData->isSplittable && !$parentData->ancestorPNode ) {
194  // The parent is splittable and in block mode, so split the tag stack
195  $refNode = $this->splitTagStack( $refNode, true, $sourceStart );
196  $parent = $refNode;
197  $parentData = $parent->snData;
198  }
199  }
200 
201  if ( !$isBlank ) {
202  // Non-whitespace characters detected
203  $parentData->nonblankNodeCount++;
204  }
205  $this->serializer->characters( $preposition, $refNode, $text, $start,
206  $length, $sourceStart, $sourceLength );
207  }
208 
209  private function trace( $msg ) {
210  if ( $this->trace ) {
211  wfDebug( "[RCM] $msg" );
212  }
213  }
214 
268  public function insertElement( $preposition, $refElement, Element $element, $void,
269  $sourceStart, $sourceLength
270  ) {
271  list( $parent, $newRef ) = $this->getParentForInsert( $preposition, $refElement );
272  $parentData = $parent->snData;
273  $elementName = $element->htmlName;
274 
275  $inline = isset( self::$onlyInlineElements[$elementName] );
276  $under = $preposition === TreeBuilder::UNDER;
277  $elementToEnd = null;
278 
279  if ( isset( self::$metadataElements[$elementName] ) ) {
280  // The element is a metadata element, that we allow to appear in
281  // both inline and block contexts.
282  $this->trace( 'insert metadata' );
283  } elseif ( $under && $parentData->isPWrapper && !$inline ) {
284  // [B/b] The element is non-inline and the parent is a p-wrapper,
285  // close the parent and insert into its parent instead
286  $this->trace( 'insert B/b' );
287  $newParent = $this->serializer->getParentNode( $parent );
288  $parent = $newParent;
289  $parentData = $parent->snData;
290  $parentData->childPElement = null;
291  $newRef = $refElement->userData;
292  } elseif ( $under && $parentData->isSplittable
293  && (bool)$parentData->ancestorPNode !== $inline
294  ) {
295  // [CS/b, DS/i] The parent is splittable and the current element is
296  // inline in block context, or if the current element is a block
297  // under a p-wrapper, split the tag stack.
298  $this->trace( $inline ? 'insert DS/i' : 'insert CS/b' );
299  $newRef = $this->splitTagStack( $newRef, $inline, $sourceStart );
300  $parent = $newRef;
301  $parentData = $parent->snData;
302  } elseif ( $under && $parentData->needsPWrapping && $inline ) {
303  // [A/i] If the element is inline and we are in body/blockquote,
304  // we need to create a p-wrapper
305  $this->trace( 'insert A/i' );
306  $newRef = $this->insertPWrapper( $newRef, $sourceStart );
307  $parent = $newRef;
308  $parentData = $parent->snData;
309  } elseif ( $parentData->ancestorPNode && !$inline ) {
310  // [CU/b] If the element is non-inline and (despite attempting to
311  // split above) there is still an ancestor p-wrap, disable that
312  // p-wrap
313  $this->trace( 'insert CU/b' );
314  $this->disablePWrapper( $parent, $sourceStart );
315  } else {
316  // [A/b, B/i, C/i, D/b, DU/i] insert as normal
317  $this->trace( 'insert normal' );
318  }
319 
320  // An element with element children is a non-blank element
321  $parentData->nonblankNodeCount++;
322 
323  // Insert the element downstream and so initialise its userData
324  $this->serializer->insertElement( $preposition, $newRef,
325  $element, $void, $sourceStart, $sourceLength );
326 
327  // Initialise snData
328  if ( !$element->userData->snData ) {
329  $elementData = $element->userData->snData = new RemexMungerData;
330  } else {
331  $elementData = $element->userData->snData;
332  }
333  if ( ( $parentData->isPWrapper || $parentData->isSplittable )
334  && isset( self::$formattingElements[$elementName] )
335  ) {
336  $elementData->isSplittable = true;
337  }
338  if ( $parentData->isPWrapper ) {
339  $elementData->ancestorPNode = $parent;
340  } elseif ( $parentData->ancestorPNode ) {
341  $elementData->ancestorPNode = $parentData->ancestorPNode;
342  }
343  if ( $parentData->wrapBaseNode ) {
344  $elementData->wrapBaseNode = $parentData->wrapBaseNode;
345  } elseif ( $parentData->needsPWrapping ) {
346  $elementData->wrapBaseNode = $parent;
347  }
348  if ( $elementName === 'body'
349  || $elementName === 'blockquote'
350  || $elementName === 'html'
351  ) {
352  $elementData->needsPWrapping = true;
353  }
354  }
355 
364  private function splitTagStack( SerializerNode $parentNode, $inline, $pos ) {
365  $parentData = $parentNode->snData;
366  $wrapBase = $parentData->wrapBaseNode;
367  $pWrap = $parentData->ancestorPNode;
368  if ( !$pWrap ) {
369  $cloneEnd = $wrapBase;
370  } else {
371  $cloneEnd = $parentData->ancestorPNode;
372  }
373 
375  $node = $parentNode;
376  $root = $serializer->getRootNode();
377  $nodes = [];
378  $removableNodes = [];
379  while ( $node !== $cloneEnd ) {
380  $nextParent = $serializer->getParentNode( $node );
381  if ( $nextParent === $root ) {
382  throw new \Exception( 'Did not find end of clone range' );
383  }
384  $nodes[] = $node;
385  if ( $node->snData->nonblankNodeCount === 0 ) {
386  $removableNodes[] = $node;
387  $nextParent->snData->nonblankNodeCount--;
388  }
389  $node = $nextParent;
390  }
391 
392  if ( $inline ) {
393  $pWrap = $this->insertPWrapper( $wrapBase, $pos );
394  $node = $pWrap;
395  } else {
396  if ( $pWrap ) {
397  // End the p-wrap which was open, cancel the diversion
398  $wrapBase->snData->childPElement = null;
399  }
400  $pWrap = null;
401  $node = $wrapBase;
402  }
403 
404  for ( $i = count( $nodes ) - 1; $i >= 0; $i-- ) {
405  $oldNode = $nodes[$i];
406  $oldData = $oldNode->snData;
407  $nodeParent = $node;
408  $element = new Element( $oldNode->namespace, $oldNode->name, $oldNode->attrs );
409  $this->serializer->insertElement( TreeBuilder::UNDER, $nodeParent,
410  $element, false, $pos, 0 );
411  $oldData->currentCloneElement = $element;
412 
413  $newNode = $element->userData;
414  $newData = $newNode->snData = new RemexMungerData;
415  if ( $pWrap ) {
416  $newData->ancestorPNode = $pWrap;
417  }
418  $newData->isSplittable = true;
419  $newData->wrapBaseNode = $wrapBase;
420  $newData->isPWrapper = $oldData->isPWrapper;
421 
422  $nodeParent->snData->nonblankNodeCount++;
423 
424  $node = $newNode;
425  }
426  foreach ( $removableNodes as $rNode ) {
427  $fakeElement = new Element( $rNode->namespace, $rNode->name, $rNode->attrs );
428  $fakeElement->userData = $rNode;
429  $this->serializer->removeNode( $fakeElement, $pos );
430  }
431  return $node;
432  }
433 
438  private function disablePWrapper( SerializerNode $node, $sourceStart ) {
439  $nodeData = $node->snData;
440  $pWrapNode = $nodeData->ancestorPNode;
441  $newParent = $this->serializer->getParentNode( $pWrapNode );
442  if ( $pWrapNode !== $this->serializer->getLastChild( $newParent ) ) {
443  // Fostering or something? Abort!
444  return;
445  }
446 
447  $nextParent = $node;
448  do {
449  $victim = $nextParent;
450  $victim->snData->ancestorPNode = null;
451  $nextParent = $this->serializer->getParentNode( $victim );
452  } while ( $nextParent !== $pWrapNode );
453 
454  // Make a fake Element to use in a reparenting operation
455  $victimElement = new Element( $victim->namespace, $victim->name, $victim->attrs );
456  $victimElement->userData = $victim;
457 
458  // Reparent
459  $this->serializer->insertElement( TreeBuilder::UNDER, $newParent, $victimElement,
460  false, $sourceStart, 0 );
461 
462  // Decrement nonblank node count
463  $pWrapNode->snData->nonblankNodeCount--;
464 
465  // Cancel the diversion so that no more elements are inserted under this p-wrap
466  $newParent->snData->childPElement = null;
467  }
468 
469  public function endTag( Element $element, $sourceStart, $sourceLength ) {
470  $data = $element->userData->snData;
471  if ( $data->childPElement ) {
472  $this->endTag( $data->childPElement, $sourceStart, 0 );
473  }
474  $this->serializer->endTag( $element, $sourceStart, $sourceLength );
475  $element->userData->snData = null;
476  $element->userData = null;
477  }
478 
479  public function doctype( $name, $public, $system, $quirks, $sourceStart, $sourceLength ) {
480  $this->serializer->doctype( $name, $public, $system, $quirks,
481  $sourceStart, $sourceLength );
482  }
483 
484  public function comment( $preposition, $refElement, $text, $sourceStart, $sourceLength ) {
485  list( , $refNode ) = $this->getParentForInsert( $preposition, $refElement );
486  $this->serializer->comment( $preposition, $refNode, $text, $sourceStart, $sourceLength );
487  }
488 
489  public function error( $text, $pos ) {
490  $this->serializer->error( $text, $pos );
491  }
492 
493  public function mergeAttributes( Element $element, Attributes $attrs, $sourceStart ) {
494  $this->serializer->mergeAttributes( $element, $attrs, $sourceStart );
495  }
496 
497  public function removeNode( Element $element, $sourceStart ) {
498  $this->serializer->removeNode( $element, $sourceStart );
499  }
500 
501  public function reparentChildren( Element $element, Element $newParent, $sourceStart ) {
502  $self = $element->userData;
503  if ( $self->snData->childPElement ) {
504  // Reparent under the p-wrapper instead, so that e.g.
505  // <blockquote><mw:p-wrap>...</mw:p-wrap></blockquote>
506  // becomes
507  // <blockquote><mw:p-wrap><i>...</i></mw:p-wrap></blockquote>
508 
509  // The formatting element should not be the parent of the p-wrap.
510  // Without this special case, the insertElement() of the <i> below
511  // would be diverted into the p-wrapper, causing infinite recursion
512  // (T178632)
513  $this->reparentChildren( $self->snData->childPElement, $newParent, $sourceStart );
514  return;
515  }
516 
517  $children = $self->children;
518  $self->children = [];
519  $this->insertElement( TreeBuilder::UNDER, $element, $newParent, false, $sourceStart, 0 );
520  $newParentNode = $newParent->userData;
521  $newParentId = $newParentNode->id;
522  foreach ( $children as $child ) {
523  if ( is_object( $child ) ) {
524  $this->trace( "reparent <{$child->name}>" );
525  $child->parentId = $newParentId;
526  }
527  }
528  $newParentNode->children = $children;
529  }
530 }
MediaWiki\Tidy\RemexCompatMunger
Definition: RemexCompatMunger.php:17
MediaWiki\Tidy\RemexCompatMunger\$metadataElements
static array $metadataElements
For the purposes of this class, "metadata" elements are those that should neither trigger p-wrapping ...
Definition: RemexCompatMunger.php:83
MediaWiki\Tidy\RemexCompatMunger\characters
characters( $preposition, $refElement, $text, $start, $length, $sourceStart, $sourceLength)
Definition: RemexCompatMunger.php:179
MediaWiki\Tidy\RemexCompatMunger\endTag
endTag(Element $element, $sourceStart, $sourceLength)
Definition: RemexCompatMunger.php:469
MediaWiki\Tidy\RemexCompatMunger\mergeAttributes
mergeAttributes(Element $element, Attributes $attrs, $sourceStart)
Definition: RemexCompatMunger.php:493
MediaWiki\Tidy\RemexCompatMunger\$onlyInlineElements
static $onlyInlineElements
Definition: RemexCompatMunger.php:18
MediaWiki\Tidy\RemexCompatMunger\startDocument
startDocument( $fragmentNamespace, $fragmentName)
Definition: RemexCompatMunger.php:122
MediaWiki\Tidy\RemexCompatMunger\doctype
doctype( $name, $public, $system, $quirks, $sourceStart, $sourceLength)
Definition: RemexCompatMunger.php:479
MediaWiki\Tidy\RemexCompatMunger\$trace
bool $trace
Definition: RemexCompatMunger.php:111
MediaWiki\Tidy\RemexCompatMunger\error
error( $text, $pos)
Definition: RemexCompatMunger.php:489
MediaWiki\Tidy\RemexCompatMunger\insertElement
insertElement( $preposition, $refElement, Element $element, $void, $sourceStart, $sourceLength)
Insert or reparent an element.
Definition: RemexCompatMunger.php:268
MediaWiki\Tidy\RemexCompatMunger\$serializer
Serializer $serializer
Definition: RemexCompatMunger.php:108
wfDebug
wfDebug( $text, $dest='all', array $context=[])
Sends a line to the debug log if enabled or, optionally, to a comment in output.
Definition: GlobalFunctions.php:913
MediaWiki\Tidy\RemexCompatMunger\trace
trace( $msg)
Definition: RemexCompatMunger.php:209
MediaWiki\Tidy\RemexCompatMunger\getParentForInsert
getParentForInsert( $preposition, $refElement)
Definition: RemexCompatMunger.php:133
MediaWiki\Tidy\RemexCompatMunger\splitTagStack
splitTagStack(SerializerNode $parentNode, $inline, $pos)
Clone nodes in a stack range and return the new parent.
Definition: RemexCompatMunger.php:364
MediaWiki\Tidy\RemexCompatMunger\$formattingElements
static $formattingElements
Definition: RemexCompatMunger.php:90
MediaWiki\Tidy\RemexCompatMunger\removeNode
removeNode(Element $element, $sourceStart)
Definition: RemexCompatMunger.php:497
$self
$self
Definition: doMaintenance.php:55
MediaWiki\Tidy\RemexCompatMunger\insertPWrapper
insertPWrapper(SerializerNode $parent, $sourceStart)
Insert a p-wrapper.
Definition: RemexCompatMunger.php:167
MediaWiki\Tidy\RemexCompatMunger\disablePWrapper
disablePWrapper(SerializerNode $node, $sourceStart)
Find the ancestor of $node which is a child of a p-wrapper, and reparent that node so that it is plac...
Definition: RemexCompatMunger.php:438
MediaWiki\Tidy\RemexCompatMunger\endDocument
endDocument( $pos)
Definition: RemexCompatMunger.php:129
MediaWiki\Tidy\RemexCompatMunger\comment
comment( $preposition, $refElement, $text, $sourceStart, $sourceLength)
Definition: RemexCompatMunger.php:484
MediaWiki\Tidy\RemexMungerData
Definition: RemexMungerData.php:8
MediaWiki\Tidy
Definition: RemexCompatFormatter.php:3
MediaWiki\Tidy\RemexCompatMunger\__construct
__construct(Serializer $serializer, $trace=false)
Definition: RemexCompatMunger.php:117
MediaWiki\Tidy\RemexCompatMunger\reparentChildren
reparentChildren(Element $element, Element $newParent, $sourceStart)
Definition: RemexCompatMunger.php:501