MediaWiki  1.30.0
RemexCompatMunger.php
Go to the documentation of this file.
1 <?php
2 
3 namespace MediaWiki\Tidy;
4 
5 use RemexHtml\HTMLData;
6 use RemexHtml\Serializer\Serializer;
7 use RemexHtml\Serializer\SerializerNode;
8 use RemexHtml\Tokenizer\Attributes;
9 use RemexHtml\Tokenizer\PlainAttributes;
10 use RemexHtml\TreeBuilder\TreeBuilder;
11 use RemexHtml\TreeBuilder\TreeHandler;
12 use RemexHtml\TreeBuilder\Element;
13 
17 class RemexCompatMunger implements TreeHandler {
18  private static $onlyInlineElements = [
19  "a" => true,
20  "abbr" => true,
21  "acronym" => true,
22  "applet" => true,
23  "b" => true,
24  "basefont" => true,
25  "bdo" => true,
26  "big" => true,
27  "br" => true,
28  "button" => true,
29  "cite" => true,
30  "code" => true,
31  "dfn" => true,
32  "em" => true,
33  "font" => true,
34  "i" => true,
35  "iframe" => true,
36  "img" => true,
37  "input" => true,
38  "kbd" => true,
39  "label" => true,
40  "legend" => true,
41  "map" => true,
42  "object" => true,
43  "param" => true,
44  "q" => true,
45  "rb" => true,
46  "rbc" => true,
47  "rp" => true,
48  "rt" => true,
49  "rtc" => true,
50  "ruby" => true,
51  "s" => true,
52  "samp" => true,
53  "select" => true,
54  "small" => true,
55  "span" => true,
56  "strike" => true,
57  "strong" => true,
58  "sub" => true,
59  "sup" => true,
60  "textarea" => true,
61  "tt" => true,
62  "u" => true,
63  "var" => true,
64  ];
65 
66  private static $formattingElements = [
67  'a' => true,
68  'b' => true,
69  'big' => true,
70  'code' => true,
71  'em' => true,
72  'font' => true,
73  'i' => true,
74  'nobr' => true,
75  's' => true,
76  'small' => true,
77  'strike' => true,
78  'strong' => true,
79  'tt' => true,
80  'u' => true,
81  ];
82 
86  public function __construct( Serializer $serializer ) {
87  $this->serializer = $serializer;
88  }
89 
90  public function startDocument( $fragmentNamespace, $fragmentName ) {
91  $this->serializer->startDocument( $fragmentNamespace, $fragmentName );
92  $root = $this->serializer->getRootNode();
93  $root->snData = new RemexMungerData;
94  $root->snData->needsPWrapping = true;
95  }
96 
97  public function endDocument( $pos ) {
98  $this->serializer->endDocument( $pos );
99  }
100 
101  private function getParentForInsert( $preposition, $refElement ) {
102  if ( $preposition === TreeBuilder::ROOT ) {
103  return [ $this->serializer->getRootNode(), null ];
104  } elseif ( $preposition === TreeBuilder::BEFORE ) {
105  $refNode = $refElement->userData;
106  return [ $this->serializer->getParentNode( $refNode ), $refNode ];
107  } else {
108  $refNode = $refElement->userData;
109  $refData = $refNode->snData;
110  if ( $refData->currentCloneElement ) {
111  // Follow a chain of clone links if necessary
112  $origRefData = $refData;
113  while ( $refData->currentCloneElement ) {
114  $refElement = $refData->currentCloneElement;
115  $refNode = $refElement->userData;
116  $refData = $refNode->snData;
117  }
118  // Cache the end of the chain in the requested element
119  $origRefData->currentCloneElement = $refElement;
120  } elseif ( $refData->childPElement ) {
121  $refElement = $refData->childPElement;
122  $refNode = $refElement->userData;
123  }
124  return [ $refNode, $refNode ];
125  }
126  }
127 
135  private function insertPWrapper( SerializerNode $parent, $sourceStart ) {
136  $pWrap = new Element( HTMLData::NS_HTML, 'mw:p-wrap', new PlainAttributes );
137  $this->serializer->insertElement( TreeBuilder::UNDER, $parent, $pWrap, false,
138  $sourceStart, 0 );
139  $data = new RemexMungerData;
140  $data->isPWrapper = true;
141  $data->wrapBaseNode = $parent;
142  $pWrap->userData->snData = $data;
143  $parent->snData->childPElement = $pWrap;
144  return $pWrap->userData;
145  }
146 
147  public function characters( $preposition, $refElement, $text, $start, $length,
148  $sourceStart, $sourceLength
149  ) {
150  $isBlank = strspn( $text, "\t\n\f\r ", $start, $length ) === $length;
151 
152  list( $parent, $refNode ) = $this->getParentForInsert( $preposition, $refElement );
153  $parentData = $parent->snData;
154 
155  if ( $preposition === TreeBuilder::UNDER ) {
156  if ( $parentData->needsPWrapping && !$isBlank ) {
157  // Add a p-wrapper for bare text under body/blockquote
158  $refNode = $this->insertPWrapper( $refNode, $sourceStart );
159  $parent = $refNode;
160  $parentData = $parent->snData;
161  } elseif ( $parentData->isSplittable && !$parentData->ancestorPNode ) {
162  // The parent is splittable and in block mode, so split the tag stack
163  $refNode = $this->splitTagStack( $refNode, true, $sourceStart );
164  $parent = $refNode;
165  $parentData = $parent->snData;
166  }
167  }
168 
169  if ( !$isBlank ) {
170  // Non-whitespace characters detected
171  $parentData->nonblankNodeCount++;
172  }
173  $this->serializer->characters( $preposition, $refNode, $text, $start,
174  $length, $sourceStart, $sourceLength );
175  }
176 
230  public function insertElement( $preposition, $refElement, Element $element, $void,
231  $sourceStart, $sourceLength
232  ) {
233  list( $parent, $newRef ) = $this->getParentForInsert( $preposition, $refElement );
234  $parentData = $parent->snData;
235  $parentNs = $parent->namespace;
236  $parentName = $parent->name;
237  $elementName = $element->htmlName;
238 
239  $inline = isset( self::$onlyInlineElements[$elementName] );
240  $under = $preposition === TreeBuilder::UNDER;
241 
242  if ( $under && $parentData->isPWrapper && !$inline ) {
243  // [B/b] The element is non-inline and the parent is a p-wrapper,
244  // close the parent and insert into its parent instead
245  $newParent = $this->serializer->getParentNode( $parent );
246  $parent = $newParent;
247  $parentData = $parent->snData;
248  $pElement = $parentData->childPElement;
249  $parentData->childPElement = null;
250  $newRef = $refElement->userData;
251  $this->endTag( $pElement, $sourceStart, 0 );
252  } elseif ( $under && $parentData->isSplittable
253  && (bool)$parentData->ancestorPNode !== $inline
254  ) {
255  // [CS/b, DS/i] The parent is splittable and the current element is
256  // inline in block context, or if the current element is a block
257  // under a p-wrapper, split the tag stack.
258  $newRef = $this->splitTagStack( $newRef, $inline, $sourceStart );
259  $parent = $newRef;
260  $parentData = $parent->snData;
261  } elseif ( $under && $parentData->needsPWrapping && $inline ) {
262  // [A/i] If the element is inline and we are in body/blockquote,
263  // we need to create a p-wrapper
264  $newRef = $this->insertPWrapper( $newRef, $sourceStart );
265  $parent = $newRef;
266  $parentData = $parent->snData;
267  } elseif ( $parentData->ancestorPNode && !$inline ) {
268  // [CU/b] If the element is non-inline and (despite attempting to
269  // split above) there is still an ancestor p-wrap, disable that
270  // p-wrap
271  $this->disablePWrapper( $parent, $sourceStart );
272  }
273  // else [A/b, B/i, C/i, D/b, DU/i] insert as normal
274 
275  // An element with element children is a non-blank element
276  $parentData->nonblankNodeCount++;
277 
278  // Insert the element downstream and so initialise its userData
279  $this->serializer->insertElement( $preposition, $newRef,
280  $element, $void, $sourceStart, $sourceLength );
281 
282  // Initialise snData
283  if ( !$element->userData->snData ) {
284  $elementData = $element->userData->snData = new RemexMungerData;
285  } else {
286  $elementData = $element->userData->snData;
287  }
288  if ( ( $parentData->isPWrapper || $parentData->isSplittable )
289  && isset( self::$formattingElements[$elementName] )
290  ) {
291  $elementData->isSplittable = true;
292  }
293  if ( $parentData->isPWrapper ) {
294  $elementData->ancestorPNode = $parent;
295  } elseif ( $parentData->ancestorPNode ) {
296  $elementData->ancestorPNode = $parentData->ancestorPNode;
297  }
298  if ( $parentData->wrapBaseNode ) {
299  $elementData->wrapBaseNode = $parentData->wrapBaseNode;
300  } elseif ( $parentData->needsPWrapping ) {
301  $elementData->wrapBaseNode = $parent;
302  }
303  if ( $elementName === 'body'
304  || $elementName === 'blockquote'
305  || $elementName === 'html'
306  ) {
307  $elementData->needsPWrapping = true;
308  }
309  }
310 
319  private function splitTagStack( SerializerNode $parentNode, $inline, $pos ) {
320  $parentData = $parentNode->snData;
321  $wrapBase = $parentData->wrapBaseNode;
322  $pWrap = $parentData->ancestorPNode;
323  if ( !$pWrap ) {
324  $cloneEnd = $wrapBase;
325  } else {
326  $cloneEnd = $parentData->ancestorPNode;
327  }
328 
329  $serializer = $this->serializer;
330  $node = $parentNode;
331  $root = $serializer->getRootNode();
332  $nodes = [];
333  $removableNodes = [];
334  $haveContent = false;
335  while ( $node !== $cloneEnd ) {
336  $nextParent = $serializer->getParentNode( $node );
337  if ( $nextParent === $root ) {
338  throw new \Exception( 'Did not find end of clone range' );
339  }
340  $nodes[] = $node;
341  if ( $node->snData->nonblankNodeCount === 0 ) {
342  $removableNodes[] = $node;
343  $nextParent->snData->nonblankNodeCount--;
344  }
345  $node = $nextParent;
346  }
347 
348  if ( $inline ) {
349  $pWrap = $this->insertPWrapper( $wrapBase, $pos );
350  $node = $pWrap;
351  } else {
352  if ( $pWrap ) {
353  // End the p-wrap which was open, cancel the diversion
354  $wrapBase->snData->childPElement = null;
355  }
356  $pWrap = null;
357  $node = $wrapBase;
358  }
359 
360  for ( $i = count( $nodes ) - 1; $i >= 0; $i-- ) {
361  $oldNode = $nodes[$i];
362  $oldData = $oldNode->snData;
363  $nodeParent = $node;
364  $element = new Element( $oldNode->namespace, $oldNode->name, $oldNode->attrs );
365  $this->serializer->insertElement( TreeBuilder::UNDER, $nodeParent,
366  $element, false, $pos, 0 );
367  $oldData->currentCloneElement = $element;
368 
369  $newNode = $element->userData;
370  $newData = $newNode->snData = new RemexMungerData;
371  if ( $pWrap ) {
372  $newData->ancestorPNode = $pWrap;
373  }
374  $newData->isSplittable = true;
375  $newData->wrapBaseNode = $wrapBase;
376  $newData->isPWrapper = $oldData->isPWrapper;
377 
378  $nodeParent->snData->nonblankNodeCount++;
379 
380  $node = $newNode;
381  }
382  foreach ( $removableNodes as $rNode ) {
383  $fakeElement = new Element( $rNode->namespace, $rNode->name, $rNode->attrs );
384  $fakeElement->userData = $rNode;
385  $this->serializer->removeNode( $fakeElement, $pos );
386  }
387  return $node;
388  }
389 
394  private function disablePWrapper( SerializerNode $node, $sourceStart ) {
395  $nodeData = $node->snData;
396  $pWrapNode = $nodeData->ancestorPNode;
397  $newParent = $this->serializer->getParentNode( $pWrapNode );
398  if ( $pWrapNode !== $this->serializer->getLastChild( $newParent ) ) {
399  // Fostering or something? Abort!
400  return;
401  }
402 
403  $nextParent = $node;
404  do {
405  $victim = $nextParent;
406  $victim->snData->ancestorPNode = null;
407  $nextParent = $this->serializer->getParentNode( $victim );
408  } while ( $nextParent !== $pWrapNode );
409 
410  // Make a fake Element to use in a reparenting operation
411  $victimElement = new Element( $victim->namespace, $victim->name, $victim->attrs );
412  $victimElement->userData = $victim;
413 
414  // Reparent
415  $this->serializer->insertElement( TreeBuilder::UNDER, $newParent, $victimElement,
416  false, $sourceStart, 0 );
417 
418  // Decrement nonblank node count
419  $pWrapNode->snData->nonblankNodeCount--;
420 
421  // Cancel the diversion so that no more elements are inserted under this p-wrap
422  $newParent->snData->childPElement = null;
423  }
424 
425  public function endTag( Element $element, $sourceStart, $sourceLength ) {
426  $data = $element->userData->snData;
427  if ( $data->childPElement ) {
428  $this->endTag( $data->childPElement, $sourceStart, 0 );
429  }
430  $this->serializer->endTag( $element, $sourceStart, $sourceLength );
431  $element->userData->snData = null;
432  $element->userData = null;
433  }
434 
435  public function doctype( $name, $public, $system, $quirks, $sourceStart, $sourceLength ) {
436  $this->serializer->doctype( $name, $public, $system, $quirks,
437  $sourceStart, $sourceLength );
438  }
439 
440  public function comment( $preposition, $refElement, $text, $sourceStart, $sourceLength ) {
441  list( $parent, $refNode ) = $this->getParentForInsert( $preposition, $refElement );
442  $this->serializer->comment( $preposition, $refNode, $text,
443  $sourceStart, $sourceLength );
444  }
445 
446  public function error( $text, $pos ) {
447  $this->serializer->error( $text, $pos );
448  }
449 
450  public function mergeAttributes( Element $element, Attributes $attrs, $sourceStart ) {
451  $this->serializer->mergeAttributes( $element, $attrs, $sourceStart );
452  }
453 
454  public function removeNode( Element $element, $sourceStart ) {
455  $this->serializer->removeNode( $element, $sourceStart );
456  }
457 
458  public function reparentChildren( Element $element, Element $newParent, $sourceStart ) {
459  $self = $element->userData;
460  $children = $self->children;
461  $self->children = [];
462  $this->insertElement( TreeBuilder::UNDER, $element, $newParent, false, $sourceStart, 0 );
463  $newParentNode = $newParent->userData;
464  $newParentId = $newParentNode->id;
465  foreach ( $children as $child ) {
466  if ( is_object( $child ) ) {
467  $child->parentId = $newParentId;
468  }
469  }
470  $newParentNode->children = $children;
471  }
472 }
MediaWiki\Tidy\RemexCompatMunger\__construct
__construct(Serializer $serializer)
Definition: RemexCompatMunger.php:86
MediaWiki\Tidy\RemexCompatMunger
Definition: RemexCompatMunger.php:17
MediaWiki\Tidy\RemexCompatMunger\characters
characters( $preposition, $refElement, $text, $start, $length, $sourceStart, $sourceLength)
Definition: RemexCompatMunger.php:147
captcha-old.count
count
Definition: captcha-old.py:249
use
as see the revision history and available at free of to any person obtaining a copy of this software and associated documentation to deal in the Software without including without limitation the rights to use
Definition: MIT-LICENSE.txt:10
MediaWiki\Tidy\RemexCompatMunger\endTag
endTag(Element $element, $sourceStart, $sourceLength)
Definition: RemexCompatMunger.php:425
$name
Allows to change the fields on the form that will be generated $name
Definition: hooks.txt:302
MediaWiki\Tidy\RemexCompatMunger\mergeAttributes
mergeAttributes(Element $element, Attributes $attrs, $sourceStart)
Definition: RemexCompatMunger.php:450
php
injection txt This is an overview of how MediaWiki makes use of dependency injection The design described here grew from the discussion of RFC T384 The term dependency this means that anything an object needs to operate should be injected from the the object itself should only know narrow no concrete implementation of the logic it relies on The requirement to inject everything typically results in an architecture that based on two main types of and essentially stateless service objects that use other service objects to operate on the value objects As of the beginning MediaWiki is only starting to use the DI approach Much of the code still relies on global state or direct resulting in a highly cyclical dependency which acts as the top level factory for services in MediaWiki which can be used to gain access to default instances of various services MediaWikiServices however also allows new services to be defined and default services to be redefined Services are defined or redefined by providing a callback the instantiator that will return a new instance of the service When it will create an instance of MediaWikiServices and populate it with the services defined in the files listed by thereby bootstrapping the DI framework Per $wgServiceWiringFiles lists includes ServiceWiring php
Definition: injection.txt:35
MediaWiki\Tidy\RemexCompatMunger\$onlyInlineElements
static $onlyInlineElements
Definition: RemexCompatMunger.php:18
MediaWiki\Tidy\RemexCompatMunger\startDocument
startDocument( $fragmentNamespace, $fragmentName)
Definition: RemexCompatMunger.php:90
MediaWiki\Tidy\RemexCompatMunger\doctype
doctype( $name, $public, $system, $quirks, $sourceStart, $sourceLength)
Definition: RemexCompatMunger.php:435
MediaWiki\Tidy\RemexCompatMunger\error
error( $text, $pos)
Definition: RemexCompatMunger.php:446
MediaWiki\Tidy\RemexCompatMunger\insertElement
insertElement( $preposition, $refElement, Element $element, $void, $sourceStart, $sourceLength)
Insert or reparent an element.
Definition: RemexCompatMunger.php:230
list
deferred txt A few of the database updates required by various functions here can be deferred until after the result page is displayed to the user For updating the view updating the linked to tables after a etc PHP does not yet have any way to tell the server to actually return and disconnect while still running these but it might have such a feature in the future We handle these by creating a deferred update object and putting those objects on a global list
Definition: deferred.txt:11
MediaWiki\Tidy\RemexCompatMunger\getParentForInsert
getParentForInsert( $preposition, $refElement)
Definition: RemexCompatMunger.php:101
MediaWiki\Tidy\RemexCompatMunger\splitTagStack
splitTagStack(SerializerNode $parentNode, $inline, $pos)
Clone nodes in a stack range and return the new parent.
Definition: RemexCompatMunger.php:319
MediaWiki\Tidy\RemexCompatMunger\$formattingElements
static $formattingElements
Definition: RemexCompatMunger.php:66
MediaWiki\Tidy\RemexCompatMunger\removeNode
removeNode(Element $element, $sourceStart)
Definition: RemexCompatMunger.php:454
$self
$self
Definition: doMaintenance.php:56
MediaWiki\Tidy\RemexCompatMunger\insertPWrapper
insertPWrapper(SerializerNode $parent, $sourceStart)
Insert a p-wrapper.
Definition: RemexCompatMunger.php:135
MediaWiki\Tidy\RemexCompatMunger\disablePWrapper
disablePWrapper(SerializerNode $node, $sourceStart)
Find the ancestor of $node which is a child of a p-wrapper, and reparent that node so that it is plac...
Definition: RemexCompatMunger.php:394
MediaWiki\Tidy\RemexCompatMunger\endDocument
endDocument( $pos)
Definition: RemexCompatMunger.php:97
as
This document is intended to provide useful advice for parties seeking to redistribute MediaWiki to end users It s targeted particularly at maintainers for Linux since it s been observed that distribution packages of MediaWiki often break We ve consistently had to recommend that users seeking support use official tarballs instead of their distribution s and this often solves whatever problem the user is having It would be nice if this could such as
Definition: distributors.txt:9
MediaWiki\Tidy\RemexCompatMunger\comment
comment( $preposition, $refElement, $text, $sourceStart, $sourceLength)
Definition: RemexCompatMunger.php:440
MediaWiki\Tidy\RemexMungerData
Definition: RemexMungerData.php:8
MediaWiki\Tidy
Definition: Balancer.php:26
MediaWiki\Tidy\RemexCompatMunger\reparentChildren
reparentChildren(Element $element, Element $newParent, $sourceStart)
Definition: RemexCompatMunger.php:458