MediaWiki  1.29.1
RemexCompatMunger.php
Go to the documentation of this file.
1 <?php
2 
3 namespace MediaWiki\Tidy;
4 
5 use RemexHtml\HTMLData;
6 use RemexHtml\Serializer\Serializer;
7 use RemexHtml\Serializer\SerializerNode;
8 use RemexHtml\Tokenizer\Attributes;
9 use RemexHtml\Tokenizer\PlainAttributes;
10 use RemexHtml\TreeBuilder\TreeBuilder;
11 use RemexHtml\TreeBuilder\TreeHandler;
12 use RemexHtml\TreeBuilder\Element;
13 
17 class RemexCompatMunger implements TreeHandler {
18  private static $onlyInlineElements = [
19  "a" => true,
20  "abbr" => true,
21  "acronym" => true,
22  "applet" => true,
23  "b" => true,
24  "basefont" => true,
25  "bdo" => true,
26  "big" => true,
27  "br" => true,
28  "button" => true,
29  "cite" => true,
30  "code" => true,
31  "dfn" => true,
32  "em" => true,
33  "font" => true,
34  "i" => true,
35  "iframe" => true,
36  "img" => true,
37  "input" => true,
38  "kbd" => true,
39  "label" => true,
40  "legend" => true,
41  "map" => true,
42  "object" => true,
43  "param" => true,
44  "q" => true,
45  "rb" => true,
46  "rbc" => true,
47  "rp" => true,
48  "rt" => true,
49  "rtc" => true,
50  "ruby" => true,
51  "s" => true,
52  "samp" => true,
53  "select" => true,
54  "small" => true,
55  "span" => true,
56  "strike" => true,
57  "strong" => true,
58  "sub" => true,
59  "sup" => true,
60  "textarea" => true,
61  "tt" => true,
62  "u" => true,
63  "var" => true,
64  ];
65 
66  private static $formattingElements = [
67  'a' => true,
68  'b' => true,
69  'big' => true,
70  'code' => true,
71  'em' => true,
72  'font' => true,
73  'i' => true,
74  'nobr' => true,
75  's' => true,
76  'small' => true,
77  'strike' => true,
78  'strong' => true,
79  'tt' => true,
80  'u' => true,
81  ];
82 
88  public function __construct( Serializer $serializer ) {
89  $this->serializer = $serializer;
90  }
91 
92  public function startDocument( $fragmentNamespace, $fragmentName ) {
93  $this->serializer->startDocument( $fragmentNamespace, $fragmentName );
94  $root = $this->serializer->getRootNode();
95  $root->snData = new RemexMungerData;
96  $root->snData->needsPWrapping = true;
97  }
98 
99  public function endDocument( $pos ) {
100  $this->serializer->endDocument( $pos );
101  }
102 
103  private function getParentForInsert( $preposition, $refElement ) {
104  if ( $preposition === TreeBuilder::ROOT ) {
105  return [ $this->serializer->getRootNode(), null ];
106  } elseif ( $preposition === TreeBuilder::BEFORE ) {
107  $refNode = $refElement->userData;
108  return [ $this->serializer->getParentNode( $refNode ), $refNode ];
109  } else {
110  $refNode = $refElement->userData;
111  $refData = $refNode->snData;
112  if ( $refData->currentCloneElement ) {
113  // Follow a chain of clone links if necessary
114  $origRefData = $refData;
115  while ( $refData->currentCloneElement ) {
116  $refElement = $refData->currentCloneElement;
117  $refNode = $refElement->userData;
118  $refData = $refNode->snData;
119  }
120  // Cache the end of the chain in the requested element
121  $origRefData->currentCloneElement = $refElement;
122  } elseif ( $refData->childPElement ) {
123  $refElement = $refData->childPElement;
124  $refNode = $refElement->userData;
125  }
126  return [ $refNode, $refNode ];
127  }
128  }
129 
137  private function insertPWrapper( SerializerNode $parent, $sourceStart ) {
138  $pWrap = new Element( HTMLData::NS_HTML, 'mw:p-wrap', new PlainAttributes );
139  $this->serializer->insertElement( TreeBuilder::UNDER, $parent, $pWrap, false,
140  $sourceStart, 0 );
141  $data = new RemexMungerData;
142  $data->isPWrapper = true;
143  $data->wrapBaseNode = $parent;
144  $pWrap->userData->snData = $data;
145  $parent->snData->childPElement = $pWrap;
146  return $pWrap->userData;
147  }
148 
149  public function characters( $preposition, $refElement, $text, $start, $length,
150  $sourceStart, $sourceLength
151  ) {
152  $isBlank = strspn( $text, "\t\n\f\r ", $start, $length ) === $length;
153 
154  list( $parent, $refNode ) = $this->getParentForInsert( $preposition, $refElement );
155  $parentData = $parent->snData;
156 
157  if ( $preposition === TreeBuilder::UNDER ) {
158  if ( $parentData->needsPWrapping && !$isBlank ) {
159  // Add a p-wrapper for bare text under body/blockquote
160  $refNode = $this->insertPWrapper( $refNode, $sourceStart );
161  $parent = $refNode;
162  $parentData = $parent->snData;
163  } elseif ( $parentData->isSplittable && !$parentData->ancestorPNode ) {
164  // The parent is splittable and in block mode, so split the tag stack
165  $refNode = $this->splitTagStack( $refNode, true, $sourceStart );
166  $parent = $refNode;
167  $parentData = $parent->snData;
168  }
169  }
170 
171  if ( !$isBlank ) {
172  // Non-whitespace characters detected
173  $parentData->nonblankNodeCount++;
174  }
175  $this->serializer->characters( $preposition, $refNode, $text, $start,
176  $length, $sourceStart, $sourceLength );
177  }
178 
232  public function insertElement( $preposition, $refElement, Element $element, $void,
233  $sourceStart, $sourceLength
234  ) {
235  list( $parent, $newRef ) = $this->getParentForInsert( $preposition, $refElement );
236  $parentData = $parent->snData;
237  $parentNs = $parent->namespace;
238  $parentName = $parent->name;
239  $elementName = $element->htmlName;
240 
241  $inline = isset( self::$onlyInlineElements[$elementName] );
242  $under = $preposition === TreeBuilder::UNDER;
243 
244  if ( $under && $parentData->isPWrapper && !$inline ) {
245  // [B/b] The element is non-inline and the parent is a p-wrapper,
246  // close the parent and insert into its parent instead
247  $newParent = $this->serializer->getParentNode( $parent );
248  $parent = $newParent;
249  $parentData = $parent->snData;
250  $pElement = $parentData->childPElement;
251  $parentData->childPElement = null;
252  $newRef = $refElement->userData;
253  $this->endTag( $pElement, $sourceStart, 0 );
254  } elseif ( $under && $parentData->isSplittable
255  && (bool)$parentData->ancestorPNode !== $inline
256  ) {
257  // [CS/b, DS/i] The parent is splittable and the current element is
258  // inline in block context, or if the current element is a block
259  // under a p-wrapper, split the tag stack.
260  $newRef = $this->splitTagStack( $newRef, $inline, $sourceStart );
261  $parent = $newRef;
262  $parentData = $parent->snData;
263  } elseif ( $under && $parentData->needsPWrapping && $inline ) {
264  // [A/i] If the element is inline and we are in body/blockquote,
265  // we need to create a p-wrapper
266  $newRef = $this->insertPWrapper( $newRef, $sourceStart );
267  $parent = $newRef;
268  $parentData = $parent->snData;
269  } elseif ( $parentData->ancestorPNode && !$inline ) {
270  // [CU/b] If the element is non-inline and (despite attempting to
271  // split above) there is still an ancestor p-wrap, disable that
272  // p-wrap
273  $this->disablePWrapper( $parent, $sourceStart );
274  }
275  // else [A/b, B/i, C/i, D/b, DU/i] insert as normal
276 
277  // An element with element children is a non-blank element
278  $parentData->nonblankNodeCount++;
279 
280  // Insert the element downstream and so initialise its userData
281  $this->serializer->insertElement( $preposition, $newRef,
282  $element, $void, $sourceStart, $sourceLength );
283 
284  // Initialise snData
285  if ( !$element->userData->snData ) {
286  $elementData = $element->userData->snData = new RemexMungerData;
287  } else {
288  $elementData = $element->userData->snData;
289  }
290  if ( ( $parentData->isPWrapper || $parentData->isSplittable )
291  && isset( self::$formattingElements[$elementName] )
292  ) {
293  $elementData->isSplittable = true;
294  }
295  if ( $parentData->isPWrapper ) {
296  $elementData->ancestorPNode = $parent;
297  } elseif ( $parentData->ancestorPNode ) {
298  $elementData->ancestorPNode = $parentData->ancestorPNode;
299  }
300  if ( $parentData->wrapBaseNode ) {
301  $elementData->wrapBaseNode = $parentData->wrapBaseNode;
302  } elseif ( $parentData->needsPWrapping ) {
303  $elementData->wrapBaseNode = $parent;
304  }
305  if ( $elementName === 'body'
306  || $elementName === 'blockquote'
307  || $elementName === 'html'
308  ) {
309  $elementData->needsPWrapping = true;
310  }
311  }
312 
321  private function splitTagStack( SerializerNode $parentNode, $inline, $pos ) {
322  $parentData = $parentNode->snData;
323  $wrapBase = $parentData->wrapBaseNode;
324  $pWrap = $parentData->ancestorPNode;
325  if ( !$pWrap ) {
326  $cloneEnd = $wrapBase;
327  } else {
328  $cloneEnd = $parentData->ancestorPNode;
329  }
330 
331  $serializer = $this->serializer;
332  $node = $parentNode;
333  $root = $serializer->getRootNode();
334  $nodes = [];
335  $removableNodes = [];
336  $haveContent = false;
337  while ( $node !== $cloneEnd ) {
338  $nextParent = $serializer->getParentNode( $node );
339  if ( $nextParent === $root ) {
340  throw new \Exception( 'Did not find end of clone range' );
341  }
342  $nodes[] = $node;
343  if ( $node->snData->nonblankNodeCount === 0 ) {
344  $removableNodes[] = $node;
345  $nextParent->snData->nonblankNodeCount--;
346  }
347  $node = $nextParent;
348  }
349 
350  if ( $inline ) {
351  $pWrap = $this->insertPWrapper( $wrapBase, $pos );
352  $node = $pWrap;
353  } else {
354  if ( $pWrap ) {
355  // End the p-wrap which was open, cancel the diversion
356  $wrapBase->snData->childPElement = null;
357  }
358  $pWrap = null;
359  $node = $wrapBase;
360  }
361 
362  for ( $i = count( $nodes ) - 1; $i >= 0; $i-- ) {
363  $oldNode = $nodes[$i];
364  $oldData = $oldNode->snData;
365  $nodeParent = $node;
366  $element = new Element( $oldNode->namespace, $oldNode->name, $oldNode->attrs );
367  $this->serializer->insertElement( TreeBuilder::UNDER, $nodeParent,
368  $element, false, $pos, 0 );
369  $oldData->currentCloneElement = $element;
370 
371  $newNode = $element->userData;
372  $newData = $newNode->snData = new RemexMungerData;
373  if ( $pWrap ) {
374  $newData->ancestorPNode = $pWrap;
375  }
376  $newData->isSplittable = true;
377  $newData->wrapBaseNode = $wrapBase;
378  $newData->isPWrapper = $oldData->isPWrapper;
379 
380  $nodeParent->snData->nonblankNodeCount++;
381 
382  $node = $newNode;
383  }
384  foreach ( $removableNodes as $rNode ) {
385  $fakeElement = new Element( $rNode->namespace, $rNode->name, $rNode->attrs );
386  $fakeElement->userData = $rNode;
387  $this->serializer->removeNode( $fakeElement, $pos );
388  }
389  return $node;
390  }
391 
396  private function disablePWrapper( SerializerNode $node, $sourceStart ) {
397  $nodeData = $node->snData;
398  $pWrapNode = $nodeData->ancestorPNode;
399  $newParent = $this->serializer->getParentNode( $pWrapNode );
400  if ( $pWrapNode !== $this->serializer->getLastChild( $newParent ) ) {
401  // Fostering or something? Abort!
402  return;
403  }
404 
405  $nextParent = $node;
406  do {
407  $victim = $nextParent;
408  $victim->snData->ancestorPNode = null;
409  $nextParent = $this->serializer->getParentNode( $victim );
410  } while ( $nextParent !== $pWrapNode );
411 
412  // Make a fake Element to use in a reparenting operation
413  $victimElement = new Element( $victim->namespace, $victim->name, $victim->attrs );
414  $victimElement->userData = $victim;
415 
416  // Reparent
417  $this->serializer->insertElement( TreeBuilder::UNDER, $newParent, $victimElement,
418  false, $sourceStart, 0 );
419 
420  // Decrement nonblank node count
421  $pWrapNode->snData->nonblankNodeCount--;
422 
423  // Cancel the diversion so that no more elements are inserted under this p-wrap
424  $newParent->snData->childPElement = null;
425  }
426 
427  public function endTag( Element $element, $sourceStart, $sourceLength ) {
428  $data = $element->userData->snData;
429  if ( $data->childPElement ) {
430  $this->endTag( $data->childPElement, $sourceStart, 0 );
431  }
432  $this->serializer->endTag( $element, $sourceStart, $sourceLength );
433  $element->userData->snData = null;
434  $element->userData = null;
435  }
436 
437  public function doctype( $name, $public, $system, $quirks, $sourceStart, $sourceLength ) {
438  $this->serializer->doctype( $name, $public, $system, $quirks,
439  $sourceStart, $sourceLength );
440  }
441 
442  public function comment( $preposition, $refElement, $text, $sourceStart, $sourceLength ) {
443  list( $parent, $refNode ) = $this->getParentForInsert( $preposition, $refElement );
444  $this->serializer->comment( $preposition, $refNode, $text,
445  $sourceStart, $sourceLength );
446  }
447 
448  public function error( $text, $pos ) {
449  $this->serializer->error( $text, $pos );
450  }
451 
452  public function mergeAttributes( Element $element, Attributes $attrs, $sourceStart ) {
453  $this->serializer->mergeAttributes( $element, $attrs, $sourceStart );
454  }
455 
456  public function removeNode( Element $element, $sourceStart ) {
457  $this->serializer->removeNode( $element, $sourceStart );
458  }
459 
460  public function reparentChildren( Element $element, Element $newParent, $sourceStart ) {
461  $self = $element->userData;
462  $children = $self->children;
463  $self->children = [];
464  $this->insertElement( TreeBuilder::UNDER, $element, $newParent, false, $sourceStart, 0 );
465  $newParentNode = $newParent->userData;
466  $newParentId = $newParentNode->id;
467  foreach ( $children as $child ) {
468  if ( is_object( $child ) ) {
469  $child->parentId = $newParentId;
470  }
471  }
472  $newParentNode->children = $children;
473  }
474 }
MediaWiki\Tidy\RemexCompatMunger\__construct
__construct(Serializer $serializer)
Constructor.
Definition: RemexCompatMunger.php:88
MediaWiki\Tidy\RemexCompatMunger
Definition: RemexCompatMunger.php:17
MediaWiki\Tidy\RemexCompatMunger\characters
characters( $preposition, $refElement, $text, $start, $length, $sourceStart, $sourceLength)
Definition: RemexCompatMunger.php:149
captcha-old.count
count
Definition: captcha-old.py:225
use
as see the revision history and available at free of to any person obtaining a copy of this software and associated documentation to deal in the Software without including without limitation the rights to use
Definition: MIT-LICENSE.txt:10
MediaWiki\Tidy\RemexCompatMunger\endTag
endTag(Element $element, $sourceStart, $sourceLength)
Definition: RemexCompatMunger.php:427
$name
Allows to change the fields on the form that will be generated $name
Definition: hooks.txt:304
MediaWiki\Tidy\RemexCompatMunger\mergeAttributes
mergeAttributes(Element $element, Attributes $attrs, $sourceStart)
Definition: RemexCompatMunger.php:452
php
injection txt This is an overview of how MediaWiki makes use of dependency injection The design described here grew from the discussion of RFC T384 The term dependency this means that anything an object needs to operate should be injected from the the object itself should only know narrow no concrete implementation of the logic it relies on The requirement to inject everything typically results in an architecture that based on two main types of and essentially stateless service objects that use other service objects to operate on the value objects As of the beginning MediaWiki is only starting to use the DI approach Much of the code still relies on global state or direct resulting in a highly cyclical dependency which acts as the top level factory for services in MediaWiki which can be used to gain access to default instances of various services MediaWikiServices however also allows new services to be defined and default services to be redefined Services are defined or redefined by providing a callback the instantiator that will return a new instance of the service When it will create an instance of MediaWikiServices and populate it with the services defined in the files listed by thereby bootstrapping the DI framework Per $wgServiceWiringFiles lists includes ServiceWiring php
Definition: injection.txt:35
MediaWiki\Tidy\RemexCompatMunger\$onlyInlineElements
static $onlyInlineElements
Definition: RemexCompatMunger.php:18
MediaWiki\Tidy\RemexCompatMunger\startDocument
startDocument( $fragmentNamespace, $fragmentName)
Definition: RemexCompatMunger.php:92
MediaWiki\Tidy\RemexCompatMunger\doctype
doctype( $name, $public, $system, $quirks, $sourceStart, $sourceLength)
Definition: RemexCompatMunger.php:437
MediaWiki\Tidy\RemexCompatMunger\error
error( $text, $pos)
Definition: RemexCompatMunger.php:448
MediaWiki\Tidy\RemexCompatMunger\insertElement
insertElement( $preposition, $refElement, Element $element, $void, $sourceStart, $sourceLength)
Insert or reparent an element.
Definition: RemexCompatMunger.php:232
list
deferred txt A few of the database updates required by various functions here can be deferred until after the result page is displayed to the user For updating the view updating the linked to tables after a etc PHP does not yet have any way to tell the server to actually return and disconnect while still running these but it might have such a feature in the future We handle these by creating a deferred update object and putting those objects on a global list
Definition: deferred.txt:11
MediaWiki\Tidy\RemexCompatMunger\getParentForInsert
getParentForInsert( $preposition, $refElement)
Definition: RemexCompatMunger.php:103
MediaWiki\Tidy\RemexCompatMunger\splitTagStack
splitTagStack(SerializerNode $parentNode, $inline, $pos)
Clone nodes in a stack range and return the new parent.
Definition: RemexCompatMunger.php:321
MediaWiki\Tidy\RemexCompatMunger\$formattingElements
static $formattingElements
Definition: RemexCompatMunger.php:66
MediaWiki\Tidy\RemexCompatMunger\removeNode
removeNode(Element $element, $sourceStart)
Definition: RemexCompatMunger.php:456
$self
$self
Definition: doMaintenance.php:56
MediaWiki\Tidy\RemexCompatMunger\insertPWrapper
insertPWrapper(SerializerNode $parent, $sourceStart)
Insert a p-wrapper.
Definition: RemexCompatMunger.php:137
MediaWiki\Tidy\RemexCompatMunger\disablePWrapper
disablePWrapper(SerializerNode $node, $sourceStart)
Find the ancestor of $node which is a child of a p-wrapper, and reparent that node so that it is plac...
Definition: RemexCompatMunger.php:396
MediaWiki\Tidy\RemexCompatMunger\endDocument
endDocument( $pos)
Definition: RemexCompatMunger.php:99
as
This document is intended to provide useful advice for parties seeking to redistribute MediaWiki to end users It s targeted particularly at maintainers for Linux since it s been observed that distribution packages of MediaWiki often break We ve consistently had to recommend that users seeking support use official tarballs instead of their distribution s and this often solves whatever problem the user is having It would be nice if this could such as
Definition: distributors.txt:9
MediaWiki\Tidy\RemexCompatMunger\comment
comment( $preposition, $refElement, $text, $sourceStart, $sourceLength)
Definition: RemexCompatMunger.php:442
MediaWiki\Tidy\RemexMungerData
Definition: RemexMungerData.php:8
MediaWiki\Tidy
Definition: Balancer.php:26
MediaWiki\Tidy\RemexCompatMunger\reparentChildren
reparentChildren(Element $element, Element $newParent, $sourceStart)
Definition: RemexCompatMunger.php:460