MediaWiki  1.31.0
RemexCompatMunger.php
Go to the documentation of this file.
1 <?php
2 
3 namespace MediaWiki\Tidy;
4 
5 use RemexHtml\HTMLData;
6 use RemexHtml\Serializer\Serializer;
7 use RemexHtml\Serializer\SerializerNode;
8 use RemexHtml\Tokenizer\Attributes;
9 use RemexHtml\Tokenizer\PlainAttributes;
10 use RemexHtml\TreeBuilder\TreeBuilder;
11 use RemexHtml\TreeBuilder\TreeHandler;
12 use RemexHtml\TreeBuilder\Element;
13 
17 class RemexCompatMunger implements TreeHandler {
18  private static $onlyInlineElements = [
19  "a" => true,
20  "abbr" => true,
21  "acronym" => true,
22  "applet" => true,
23  "b" => true,
24  "basefont" => true,
25  "bdo" => true,
26  "big" => true,
27  "br" => true,
28  "button" => true,
29  "cite" => true,
30  "code" => true,
31  "dfn" => true,
32  "em" => true,
33  "font" => true,
34  "i" => true,
35  "iframe" => true,
36  "img" => true,
37  "input" => true,
38  "kbd" => true,
39  "label" => true,
40  "legend" => true,
41  "map" => true,
42  "object" => true,
43  "param" => true,
44  "q" => true,
45  "rb" => true,
46  "rbc" => true,
47  "rp" => true,
48  "rt" => true,
49  "rtc" => true,
50  "ruby" => true,
51  "s" => true,
52  "samp" => true,
53  "select" => true,
54  "small" => true,
55  "span" => true,
56  "strike" => true,
57  "strong" => true,
58  "sub" => true,
59  "sup" => true,
60  "textarea" => true,
61  "tt" => true,
62  "u" => true,
63  "var" => true,
64  // Those defined in tidy.conf
65  "video" => true,
66  "audio" => true,
67  "bdi" => true,
68  "data" => true,
69  "time" => true,
70  "mark" => true,
71  ];
72 
73  private static $formattingElements = [
74  'a' => true,
75  'b' => true,
76  'big' => true,
77  'code' => true,
78  'em' => true,
79  'font' => true,
80  'i' => true,
81  'nobr' => true,
82  's' => true,
83  'small' => true,
84  'strike' => true,
85  'strong' => true,
86  'tt' => true,
87  'u' => true,
88  ];
89 
93  public function __construct( Serializer $serializer ) {
94  $this->serializer = $serializer;
95  }
96 
97  public function startDocument( $fragmentNamespace, $fragmentName ) {
98  $this->serializer->startDocument( $fragmentNamespace, $fragmentName );
99  $root = $this->serializer->getRootNode();
100  $root->snData = new RemexMungerData;
101  $root->snData->needsPWrapping = true;
102  }
103 
104  public function endDocument( $pos ) {
105  $this->serializer->endDocument( $pos );
106  }
107 
108  private function getParentForInsert( $preposition, $refElement ) {
109  if ( $preposition === TreeBuilder::ROOT ) {
110  return [ $this->serializer->getRootNode(), null ];
111  } elseif ( $preposition === TreeBuilder::BEFORE ) {
112  $refNode = $refElement->userData;
113  return [ $this->serializer->getParentNode( $refNode ), $refNode ];
114  } else {
115  $refNode = $refElement->userData;
116  $refData = $refNode->snData;
117  if ( $refData->currentCloneElement ) {
118  // Follow a chain of clone links if necessary
119  $origRefData = $refData;
120  while ( $refData->currentCloneElement ) {
121  $refElement = $refData->currentCloneElement;
122  $refNode = $refElement->userData;
123  $refData = $refNode->snData;
124  }
125  // Cache the end of the chain in the requested element
126  $origRefData->currentCloneElement = $refElement;
127  } elseif ( $refData->childPElement ) {
128  $refElement = $refData->childPElement;
129  $refNode = $refElement->userData;
130  }
131  return [ $refNode, $refNode ];
132  }
133  }
134 
142  private function insertPWrapper( SerializerNode $parent, $sourceStart ) {
143  $pWrap = new Element( HTMLData::NS_HTML, 'mw:p-wrap', new PlainAttributes );
144  $this->serializer->insertElement( TreeBuilder::UNDER, $parent, $pWrap, false,
145  $sourceStart, 0 );
146  $data = new RemexMungerData;
147  $data->isPWrapper = true;
148  $data->wrapBaseNode = $parent;
149  $pWrap->userData->snData = $data;
150  $parent->snData->childPElement = $pWrap;
151  return $pWrap->userData;
152  }
153 
154  public function characters( $preposition, $refElement, $text, $start, $length,
155  $sourceStart, $sourceLength
156  ) {
157  $isBlank = strspn( $text, "\t\n\f\r ", $start, $length ) === $length;
158 
159  list( $parent, $refNode ) = $this->getParentForInsert( $preposition, $refElement );
160  $parentData = $parent->snData;
161 
162  if ( $preposition === TreeBuilder::UNDER ) {
163  if ( $parentData->needsPWrapping && !$isBlank ) {
164  // Add a p-wrapper for bare text under body/blockquote
165  $refNode = $this->insertPWrapper( $refNode, $sourceStart );
166  $parent = $refNode;
167  $parentData = $parent->snData;
168  } elseif ( $parentData->isSplittable && !$parentData->ancestorPNode ) {
169  // The parent is splittable and in block mode, so split the tag stack
170  $refNode = $this->splitTagStack( $refNode, true, $sourceStart );
171  $parent = $refNode;
172  $parentData = $parent->snData;
173  }
174  }
175 
176  if ( !$isBlank ) {
177  // Non-whitespace characters detected
178  $parentData->nonblankNodeCount++;
179  }
180  $this->serializer->characters( $preposition, $refNode, $text, $start,
181  $length, $sourceStart, $sourceLength );
182  }
183 
184  private function trace( $msg ) {
185  // echo "[RCM] $msg\n";
186  }
187 
241  public function insertElement( $preposition, $refElement, Element $element, $void,
242  $sourceStart, $sourceLength
243  ) {
244  list( $parent, $newRef ) = $this->getParentForInsert( $preposition, $refElement );
245  $parentData = $parent->snData;
246  $parentNs = $parent->namespace;
247  $parentName = $parent->name;
248  $elementName = $element->htmlName;
249 
250  $inline = isset( self::$onlyInlineElements[$elementName] );
251  $under = $preposition === TreeBuilder::UNDER;
252 
253  if ( $under && $parentData->isPWrapper && !$inline ) {
254  // [B/b] The element is non-inline and the parent is a p-wrapper,
255  // close the parent and insert into its parent instead
256  $this->trace( 'insert B/b' );
257  $newParent = $this->serializer->getParentNode( $parent );
258  $parent = $newParent;
259  $parentData = $parent->snData;
260  $pElement = $parentData->childPElement;
261  $parentData->childPElement = null;
262  $newRef = $refElement->userData;
263  $this->endTag( $pElement, $sourceStart, 0 );
264  } elseif ( $under && $parentData->isSplittable
265  && (bool)$parentData->ancestorPNode !== $inline
266  ) {
267  // [CS/b, DS/i] The parent is splittable and the current element is
268  // inline in block context, or if the current element is a block
269  // under a p-wrapper, split the tag stack.
270  $this->trace( $inline ? 'insert DS/i' : 'insert CS/b' );
271  $newRef = $this->splitTagStack( $newRef, $inline, $sourceStart );
272  $parent = $newRef;
273  $parentData = $parent->snData;
274  } elseif ( $under && $parentData->needsPWrapping && $inline ) {
275  // [A/i] If the element is inline and we are in body/blockquote,
276  // we need to create a p-wrapper
277  $this->trace( 'insert A/i' );
278  $newRef = $this->insertPWrapper( $newRef, $sourceStart );
279  $parent = $newRef;
280  $parentData = $parent->snData;
281  } elseif ( $parentData->ancestorPNode && !$inline ) {
282  // [CU/b] If the element is non-inline and (despite attempting to
283  // split above) there is still an ancestor p-wrap, disable that
284  // p-wrap
285  $this->trace( 'insert CU/b' );
286  $this->disablePWrapper( $parent, $sourceStart );
287  } else {
288  // [A/b, B/i, C/i, D/b, DU/i] insert as normal
289  $this->trace( 'insert normal' );
290  }
291 
292  // An element with element children is a non-blank element
293  $parentData->nonblankNodeCount++;
294 
295  // Insert the element downstream and so initialise its userData
296  $this->serializer->insertElement( $preposition, $newRef,
297  $element, $void, $sourceStart, $sourceLength );
298 
299  // Initialise snData
300  if ( !$element->userData->snData ) {
301  $elementData = $element->userData->snData = new RemexMungerData;
302  } else {
303  $elementData = $element->userData->snData;
304  }
305  if ( ( $parentData->isPWrapper || $parentData->isSplittable )
306  && isset( self::$formattingElements[$elementName] )
307  ) {
308  $elementData->isSplittable = true;
309  }
310  if ( $parentData->isPWrapper ) {
311  $elementData->ancestorPNode = $parent;
312  } elseif ( $parentData->ancestorPNode ) {
313  $elementData->ancestorPNode = $parentData->ancestorPNode;
314  }
315  if ( $parentData->wrapBaseNode ) {
316  $elementData->wrapBaseNode = $parentData->wrapBaseNode;
317  } elseif ( $parentData->needsPWrapping ) {
318  $elementData->wrapBaseNode = $parent;
319  }
320  if ( $elementName === 'body'
321  || $elementName === 'blockquote'
322  || $elementName === 'html'
323  ) {
324  $elementData->needsPWrapping = true;
325  }
326  }
327 
336  private function splitTagStack( SerializerNode $parentNode, $inline, $pos ) {
337  $parentData = $parentNode->snData;
338  $wrapBase = $parentData->wrapBaseNode;
339  $pWrap = $parentData->ancestorPNode;
340  if ( !$pWrap ) {
341  $cloneEnd = $wrapBase;
342  } else {
343  $cloneEnd = $parentData->ancestorPNode;
344  }
345 
346  $serializer = $this->serializer;
347  $node = $parentNode;
348  $root = $serializer->getRootNode();
349  $nodes = [];
350  $removableNodes = [];
351  $haveContent = false;
352  while ( $node !== $cloneEnd ) {
353  $nextParent = $serializer->getParentNode( $node );
354  if ( $nextParent === $root ) {
355  throw new \Exception( 'Did not find end of clone range' );
356  }
357  $nodes[] = $node;
358  if ( $node->snData->nonblankNodeCount === 0 ) {
359  $removableNodes[] = $node;
360  $nextParent->snData->nonblankNodeCount--;
361  }
362  $node = $nextParent;
363  }
364 
365  if ( $inline ) {
366  $pWrap = $this->insertPWrapper( $wrapBase, $pos );
367  $node = $pWrap;
368  } else {
369  if ( $pWrap ) {
370  // End the p-wrap which was open, cancel the diversion
371  $wrapBase->snData->childPElement = null;
372  }
373  $pWrap = null;
374  $node = $wrapBase;
375  }
376 
377  for ( $i = count( $nodes ) - 1; $i >= 0; $i-- ) {
378  $oldNode = $nodes[$i];
379  $oldData = $oldNode->snData;
380  $nodeParent = $node;
381  $element = new Element( $oldNode->namespace, $oldNode->name, $oldNode->attrs );
382  $this->serializer->insertElement( TreeBuilder::UNDER, $nodeParent,
383  $element, false, $pos, 0 );
384  $oldData->currentCloneElement = $element;
385 
386  $newNode = $element->userData;
387  $newData = $newNode->snData = new RemexMungerData;
388  if ( $pWrap ) {
389  $newData->ancestorPNode = $pWrap;
390  }
391  $newData->isSplittable = true;
392  $newData->wrapBaseNode = $wrapBase;
393  $newData->isPWrapper = $oldData->isPWrapper;
394 
395  $nodeParent->snData->nonblankNodeCount++;
396 
397  $node = $newNode;
398  }
399  foreach ( $removableNodes as $rNode ) {
400  $fakeElement = new Element( $rNode->namespace, $rNode->name, $rNode->attrs );
401  $fakeElement->userData = $rNode;
402  $this->serializer->removeNode( $fakeElement, $pos );
403  }
404  return $node;
405  }
406 
411  private function disablePWrapper( SerializerNode $node, $sourceStart ) {
412  $nodeData = $node->snData;
413  $pWrapNode = $nodeData->ancestorPNode;
414  $newParent = $this->serializer->getParentNode( $pWrapNode );
415  if ( $pWrapNode !== $this->serializer->getLastChild( $newParent ) ) {
416  // Fostering or something? Abort!
417  return;
418  }
419 
420  $nextParent = $node;
421  do {
422  $victim = $nextParent;
423  $victim->snData->ancestorPNode = null;
424  $nextParent = $this->serializer->getParentNode( $victim );
425  } while ( $nextParent !== $pWrapNode );
426 
427  // Make a fake Element to use in a reparenting operation
428  $victimElement = new Element( $victim->namespace, $victim->name, $victim->attrs );
429  $victimElement->userData = $victim;
430 
431  // Reparent
432  $this->serializer->insertElement( TreeBuilder::UNDER, $newParent, $victimElement,
433  false, $sourceStart, 0 );
434 
435  // Decrement nonblank node count
436  $pWrapNode->snData->nonblankNodeCount--;
437 
438  // Cancel the diversion so that no more elements are inserted under this p-wrap
439  $newParent->snData->childPElement = null;
440  }
441 
442  public function endTag( Element $element, $sourceStart, $sourceLength ) {
443  $data = $element->userData->snData;
444  if ( $data->childPElement ) {
445  $this->endTag( $data->childPElement, $sourceStart, 0 );
446  }
447  $this->serializer->endTag( $element, $sourceStart, $sourceLength );
448  $element->userData->snData = null;
449  $element->userData = null;
450  }
451 
452  public function doctype( $name, $public, $system, $quirks, $sourceStart, $sourceLength ) {
453  $this->serializer->doctype( $name, $public, $system, $quirks,
454  $sourceStart, $sourceLength );
455  }
456 
457  public function comment( $preposition, $refElement, $text, $sourceStart, $sourceLength ) {
458  list( $parent, $refNode ) = $this->getParentForInsert( $preposition, $refElement );
459  $this->serializer->comment( $preposition, $refNode, $text,
460  $sourceStart, $sourceLength );
461  }
462 
463  public function error( $text, $pos ) {
464  $this->serializer->error( $text, $pos );
465  }
466 
467  public function mergeAttributes( Element $element, Attributes $attrs, $sourceStart ) {
468  $this->serializer->mergeAttributes( $element, $attrs, $sourceStart );
469  }
470 
471  public function removeNode( Element $element, $sourceStart ) {
472  $this->serializer->removeNode( $element, $sourceStart );
473  }
474 
475  public function reparentChildren( Element $element, Element $newParent, $sourceStart ) {
476  $self = $element->userData;
477  if ( $self->snData->childPElement ) {
478  // Reparent under the p-wrapper instead, so that e.g.
479  // <blockquote><mw:p-wrap>...</mw:p-wrap></blockquote>
480  // becomes
481  // <blockquote><mw:p-wrap><i>...</i></mw:p-wrap></blockquote>
482 
483  // The formatting element should not be the parent of the p-wrap.
484  // Without this special case, the insertElement() of the <i> below
485  // would be diverted into the p-wrapper, causing infinite recursion
486  // (T178632)
487  $this->reparentChildren( $self->snData->childPElement, $newParent, $sourceStart );
488  return;
489  }
490 
491  $children = $self->children;
492  $self->children = [];
493  $this->insertElement( TreeBuilder::UNDER, $element, $newParent, false, $sourceStart, 0 );
494  $newParentNode = $newParent->userData;
495  $newParentId = $newParentNode->id;
496  foreach ( $children as $child ) {
497  if ( is_object( $child ) ) {
498  $this->trace( "reparent <{$child->name}>" );
499  $child->parentId = $newParentId;
500  }
501  }
502  $newParentNode->children = $children;
503  }
504 }
MediaWiki\Tidy\RemexCompatMunger\__construct
__construct(Serializer $serializer)
Definition: RemexCompatMunger.php:93
MediaWiki\Tidy\RemexCompatMunger
Definition: RemexCompatMunger.php:17
MediaWiki\Tidy\RemexCompatMunger\characters
characters( $preposition, $refElement, $text, $start, $length, $sourceStart, $sourceLength)
Definition: RemexCompatMunger.php:154
captcha-old.count
count
Definition: captcha-old.py:249
use
as see the revision history and available at free of to any person obtaining a copy of this software and associated documentation to deal in the Software without including without limitation the rights to use
Definition: MIT-LICENSE.txt:10
MediaWiki\Tidy\RemexCompatMunger\endTag
endTag(Element $element, $sourceStart, $sourceLength)
Definition: RemexCompatMunger.php:442
$name
Allows to change the fields on the form that will be generated $name
Definition: hooks.txt:302
MediaWiki\Tidy\RemexCompatMunger\mergeAttributes
mergeAttributes(Element $element, Attributes $attrs, $sourceStart)
Definition: RemexCompatMunger.php:467
php
injection txt This is an overview of how MediaWiki makes use of dependency injection The design described here grew from the discussion of RFC T384 The term dependency this means that anything an object needs to operate should be injected from the the object itself should only know narrow no concrete implementation of the logic it relies on The requirement to inject everything typically results in an architecture that based on two main types of and essentially stateless service objects that use other service objects to operate on the value objects As of the beginning MediaWiki is only starting to use the DI approach Much of the code still relies on global state or direct resulting in a highly cyclical dependency which acts as the top level factory for services in MediaWiki which can be used to gain access to default instances of various services MediaWikiServices however also allows new services to be defined and default services to be redefined Services are defined or redefined by providing a callback the instantiator that will return a new instance of the service When it will create an instance of MediaWikiServices and populate it with the services defined in the files listed by thereby bootstrapping the DI framework Per $wgServiceWiringFiles lists includes ServiceWiring php
Definition: injection.txt:35
MediaWiki\Tidy\RemexCompatMunger\$onlyInlineElements
static $onlyInlineElements
Definition: RemexCompatMunger.php:18
MediaWiki\Tidy\RemexCompatMunger\startDocument
startDocument( $fragmentNamespace, $fragmentName)
Definition: RemexCompatMunger.php:97
MediaWiki\Tidy\RemexCompatMunger\doctype
doctype( $name, $public, $system, $quirks, $sourceStart, $sourceLength)
Definition: RemexCompatMunger.php:452
MediaWiki\Tidy\RemexCompatMunger\error
error( $text, $pos)
Definition: RemexCompatMunger.php:463
MediaWiki\Tidy\RemexCompatMunger\insertElement
insertElement( $preposition, $refElement, Element $element, $void, $sourceStart, $sourceLength)
Insert or reparent an element.
Definition: RemexCompatMunger.php:241
list
deferred txt A few of the database updates required by various functions here can be deferred until after the result page is displayed to the user For updating the view updating the linked to tables after a etc PHP does not yet have any way to tell the server to actually return and disconnect while still running these but it might have such a feature in the future We handle these by creating a deferred update object and putting those objects on a global list
Definition: deferred.txt:11
MediaWiki\Tidy\RemexCompatMunger\trace
trace( $msg)
Definition: RemexCompatMunger.php:184
MediaWiki\Tidy\RemexCompatMunger\getParentForInsert
getParentForInsert( $preposition, $refElement)
Definition: RemexCompatMunger.php:108
MediaWiki\Tidy\RemexCompatMunger\splitTagStack
splitTagStack(SerializerNode $parentNode, $inline, $pos)
Clone nodes in a stack range and return the new parent.
Definition: RemexCompatMunger.php:336
MediaWiki\Tidy\RemexCompatMunger\$formattingElements
static $formattingElements
Definition: RemexCompatMunger.php:73
MediaWiki\Tidy\RemexCompatMunger\removeNode
removeNode(Element $element, $sourceStart)
Definition: RemexCompatMunger.php:471
$self
$self
Definition: doMaintenance.php:55
MediaWiki\Tidy\RemexCompatMunger\insertPWrapper
insertPWrapper(SerializerNode $parent, $sourceStart)
Insert a p-wrapper.
Definition: RemexCompatMunger.php:142
MediaWiki\Tidy\RemexCompatMunger\disablePWrapper
disablePWrapper(SerializerNode $node, $sourceStart)
Find the ancestor of $node which is a child of a p-wrapper, and reparent that node so that it is plac...
Definition: RemexCompatMunger.php:411
MediaWiki\Tidy\RemexCompatMunger\endDocument
endDocument( $pos)
Definition: RemexCompatMunger.php:104
as
This document is intended to provide useful advice for parties seeking to redistribute MediaWiki to end users It s targeted particularly at maintainers for Linux since it s been observed that distribution packages of MediaWiki often break We ve consistently had to recommend that users seeking support use official tarballs instead of their distribution s and this often solves whatever problem the user is having It would be nice if this could such as
Definition: distributors.txt:9
MediaWiki\Tidy\RemexCompatMunger\comment
comment( $preposition, $refElement, $text, $sourceStart, $sourceLength)
Definition: RemexCompatMunger.php:457
MediaWiki\Tidy\RemexMungerData
Definition: RemexMungerData.php:8
MediaWiki\Tidy
Definition: Balancer.php:27
MediaWiki\Tidy\RemexCompatMunger\reparentChildren
reparentChildren(Element $element, Element $newParent, $sourceStart)
Definition: RemexCompatMunger.php:475