MediaWiki  REL1_31
Balancer.php
Go to the documentation of this file.
1 <?php
27 namespace MediaWiki\Tidy;
28 
30 use IteratorAggregate;
33 use Wikimedia\Assert\Assert;
34 use Wikimedia\Assert\ParameterAssertionException;
35 
36 // A note for future librarization[1] -- this file is a good candidate
37 // for splitting into an independent library, except that it is currently
38 // highly optimized for MediaWiki use. It only implements the portions
39 // of the HTML5 tree builder used by tags supported by MediaWiki, and
40 // does not contain a true tokenizer pass, instead relying on
41 // comment stripping, attribute normalization, and escaping done by
42 // the MediaWiki Sanitizer. It also deliberately avoids building
43 // a true DOM in memory, instead serializing elements to an output string
44 // as soon as possible (usually as soon as the tag is closed) to reduce
45 // its memory footprint.
46 
47 // We've been gradually lifting some of these restrictions to handle
48 // non-sanitized output generated by extensions, but we shortcut the tokenizer
49 // for speed (primarily by splitting on `<`) and so rely on syntactic
50 // well-formedness.
51 
52 // On the other hand, I've been pretty careful to note with comments in the
53 // code the places where this implementation omits features of the spec or
54 // depends on the MediaWiki Sanitizer. Perhaps in the future we'll want to
55 // implement the missing pieces and make this a standalone PHP HTML5 parser.
56 // In order to do so, some sort of MediaWiki-specific API will need
57 // to be added to (a) allow the Balancer to bypass the tokenizer,
58 // and (b) support on-the-fly flattening instead of DOM node creation.
59 
60 // [1]: https://www.mediawiki.org/wiki/Library_infrastructure_for_MediaWiki
61 
70 class BalanceSets {
71  const HTML_NAMESPACE = 'http://www.w3.org/1999/xhtml';
72  const MATHML_NAMESPACE = 'http://www.w3.org/1998/Math/MathML';
73  const SVG_NAMESPACE = 'http://www.w3.org/2000/svg';
74 
75  public static $unsupportedSet = [
76  self::HTML_NAMESPACE => [
77  'html' => true, 'head' => true, 'body' => true, 'frameset' => true,
78  'frame' => true,
79  'plaintext' => true,
80  'xmp' => true, 'iframe' => true, 'noembed' => true,
81  'noscript' => true, 'script' => true,
82  'title' => true
83  ]
84  ];
85 
86  public static $emptyElementSet = [
87  self::HTML_NAMESPACE => [
88  'area' => true, 'base' => true, 'basefont' => true,
89  'bgsound' => true, 'br' => true, 'col' => true, 'command' => true,
90  'embed' => true, 'frame' => true, 'hr' => true, 'img' => true,
91  'input' => true, 'keygen' => true, 'link' => true, 'meta' => true,
92  'param' => true, 'source' => true, 'track' => true, 'wbr' => true
93  ]
94  ];
95 
96  public static $extraLinefeedSet = [
97  self::HTML_NAMESPACE => [
98  'pre' => true, 'textarea' => true, 'listing' => true,
99  ]
100  ];
101 
102  public static $headingSet = [
103  self::HTML_NAMESPACE => [
104  'h1' => true, 'h2' => true, 'h3' => true,
105  'h4' => true, 'h5' => true, 'h6' => true
106  ]
107  ];
108 
109  public static $specialSet = [
110  self::HTML_NAMESPACE => [
111  'address' => true, 'applet' => true, 'area' => true,
112  'article' => true, 'aside' => true, 'base' => true,
113  'basefont' => true, 'bgsound' => true, 'blockquote' => true,
114  'body' => true, 'br' => true, 'button' => true, 'caption' => true,
115  'center' => true, 'col' => true, 'colgroup' => true, 'dd' => true,
116  'details' => true, 'dir' => true, 'div' => true, 'dl' => true,
117  'dt' => true, 'embed' => true, 'fieldset' => true,
118  'figcaption' => true, 'figure' => true, 'footer' => true,
119  'form' => true, 'frame' => true, 'frameset' => true, 'h1' => true,
120  'h2' => true, 'h3' => true, 'h4' => true, 'h5' => true,
121  'h6' => true, 'head' => true, 'header' => true, 'hgroup' => true,
122  'hr' => true, 'html' => true, 'iframe' => true, 'img' => true,
123  'input' => true, 'li' => true, 'link' => true,
124  'listing' => true, 'main' => true, 'marquee' => true,
125  'menu' => true, 'meta' => true, 'nav' => true,
126  'noembed' => true, 'noframes' => true, 'noscript' => true,
127  'object' => true, 'ol' => true, 'p' => true, 'param' => true,
128  'plaintext' => true, 'pre' => true, 'script' => true,
129  'section' => true, 'select' => true, 'source' => true,
130  'style' => true, 'summary' => true, 'table' => true,
131  'tbody' => true, 'td' => true, 'template' => true,
132  'textarea' => true, 'tfoot' => true, 'th' => true, 'thead' => true,
133  'title' => true, 'tr' => true, 'track' => true, 'ul' => true,
134  'wbr' => true, 'xmp' => true
135  ],
136  self::SVG_NAMESPACE => [
137  'foreignobject' => true, 'desc' => true, 'title' => true
138  ],
139  self::MATHML_NAMESPACE => [
140  'mi' => true, 'mo' => true, 'mn' => true, 'ms' => true,
141  'mtext' => true, 'annotation-xml' => true
142  ]
143  ];
144 
145  public static $addressDivPSet = [
146  self::HTML_NAMESPACE => [
147  'address' => true, 'div' => true, 'p' => true
148  ]
149  ];
150 
151  public static $tableSectionRowSet = [
152  self::HTML_NAMESPACE => [
153  'table' => true, 'thead' => true, 'tbody' => true,
154  'tfoot' => true, 'tr' => true
155  ]
156  ];
157 
158  public static $impliedEndTagsSet = [
159  self::HTML_NAMESPACE => [
160  'dd' => true, 'dt' => true, 'li' => true,
161  'menuitem' => true, 'optgroup' => true,
162  'option' => true, 'p' => true, 'rb' => true, 'rp' => true,
163  'rt' => true, 'rtc' => true
164  ]
165  ];
166 
167  public static $thoroughImpliedEndTagsSet = [
168  self::HTML_NAMESPACE => [
169  'caption' => true, 'colgroup' => true, 'dd' => true, 'dt' => true,
170  'li' => true, 'optgroup' => true, 'option' => true, 'p' => true,
171  'rb' => true, 'rp' => true, 'rt' => true, 'rtc' => true,
172  'tbody' => true, 'td' => true, 'tfoot' => true, 'th' => true,
173  'thead' => true, 'tr' => true
174  ]
175  ];
176 
177  public static $tableCellSet = [
178  self::HTML_NAMESPACE => [
179  'td' => true, 'th' => true
180  ]
181  ];
182  public static $tableContextSet = [
183  self::HTML_NAMESPACE => [
184  'table' => true, 'template' => true, 'html' => true
185  ]
186  ];
187 
188  public static $tableBodyContextSet = [
189  self::HTML_NAMESPACE => [
190  'tbody' => true, 'tfoot' => true, 'thead' => true,
191  'template' => true, 'html' => true
192  ]
193  ];
194 
195  public static $tableRowContextSet = [
196  self::HTML_NAMESPACE => [
197  'tr' => true, 'template' => true, 'html' => true
198  ]
199  ];
200 
201  // See https://html.spec.whatwg.org/multipage/forms.html#form-associated-element
202  public static $formAssociatedSet = [
203  self::HTML_NAMESPACE => [
204  'button' => true, 'fieldset' => true, 'input' => true,
205  'keygen' => true, 'object' => true, 'output' => true,
206  'select' => true, 'textarea' => true, 'img' => true
207  ]
208  ];
209 
210  public static $inScopeSet = [
211  self::HTML_NAMESPACE => [
212  'applet' => true, 'caption' => true, 'html' => true,
213  'marquee' => true, 'object' => true,
214  'table' => true, 'td' => true, 'template' => true,
215  'th' => true
216  ],
217  self::SVG_NAMESPACE => [
218  'foreignobject' => true, 'desc' => true, 'title' => true
219  ],
220  self::MATHML_NAMESPACE => [
221  'mi' => true, 'mo' => true, 'mn' => true, 'ms' => true,
222  'mtext' => true, 'annotation-xml' => true
223  ]
224  ];
225 
226  private static $inListItemScopeSet = null;
227  public static function inListItemScopeSet() {
228  if ( self::$inListItemScopeSet === null ) {
229  self::$inListItemScopeSet = self::$inScopeSet;
230  self::$inListItemScopeSet[self::HTML_NAMESPACE]['ol'] = true;
231  self::$inListItemScopeSet[self::HTML_NAMESPACE]['ul'] = true;
232  }
234  }
235 
236  private static $inButtonScopeSet = null;
237  public static function inButtonScopeSet() {
238  if ( self::$inButtonScopeSet === null ) {
239  self::$inButtonScopeSet = self::$inScopeSet;
240  self::$inButtonScopeSet[self::HTML_NAMESPACE]['button'] = true;
241  }
243  }
244 
245  public static $inTableScopeSet = [
246  self::HTML_NAMESPACE => [
247  'html' => true, 'table' => true, 'template' => true
248  ]
249  ];
250 
251  public static $inInvertedSelectScopeSet = [
252  self::HTML_NAMESPACE => [
253  'option' => true, 'optgroup' => true
254  ]
255  ];
256 
258  self::MATHML_NAMESPACE => [
259  'mi' => true, 'mo' => true, 'mn' => true, 'ms' => true,
260  'mtext' => true
261  ]
262  ];
263 
264  public static $htmlIntegrationPointSet = [
265  self::SVG_NAMESPACE => [
266  'foreignobject' => true,
267  'desc' => true,
268  'title' => true
269  ]
270  ];
271 
272  // For tidy compatibility.
273  public static $tidyPWrapSet = [
274  self::HTML_NAMESPACE => [
275  'body' => true, 'blockquote' => true,
276  // We parse with <body> as the fragment context, but the top-level
277  // element on the stack is actually <html>. We could use the
278  // "adjusted current node" everywhere to work around this, but it's
279  // easier just to add <html> to the p-wrap set.
280  'html' => true,
281  ],
282  ];
283  public static $tidyInlineSet = [
284  self::HTML_NAMESPACE => [
285  'a' => true, 'abbr' => true, 'acronym' => true, 'applet' => true,
286  'b' => true, 'basefont' => true, 'bdo' => true, 'big' => true,
287  'br' => true, 'button' => true, 'cite' => true, 'code' => true,
288  'dfn' => true, 'em' => true, 'font' => true, 'i' => true,
289  'iframe' => true, 'img' => true, 'input' => true, 'kbd' => true,
290  'label' => true, 'legend' => true, 'map' => true, 'object' => true,
291  'param' => true, 'q' => true, 'rb' => true, 'rbc' => true,
292  'rp' => true, 'rt' => true, 'rtc' => true, 'ruby' => true,
293  's' => true, 'samp' => true, 'select' => true, 'small' => true,
294  'span' => true, 'strike' => true, 'strong' => true, 'sub' => true,
295  'sup' => true, 'textarea' => true, 'tt' => true, 'u' => true,
296  'var' => true,
297  // Those defined in tidy.conf
298  'video' => true, 'audio' => true, 'bdi' => true, 'data' => true,
299  'time' => true, 'mark' => true,
300  ],
301  ];
302 }
303 
326  public $localName;
331  public $attribs;
332 
338  public $parent;
339 
347  public $children;
348 
352  private $noahKey;
353 
358  public $nextAFE;
359 
364  public $prevAFE;
365 
369  public $nextNoah;
370 
380  $this->localName = $localName;
381  $this->namespaceURI = $namespaceURI;
382  $this->attribs = $attribs;
383  $this->contents = '';
384  $this->parent = null;
385  $this->children = [];
386  }
387 
392  private function removeChild( BalanceElement $elt ) {
393  Assert::precondition(
394  $this->parent !== 'flat', "Can't removeChild after flattening $this"
395  );
396  Assert::parameter(
397  $elt->parent === $this, 'elt', 'must have $this as a parent'
398  );
399  $idx = array_search( $elt, $this->children, true );
400  Assert::parameter( $idx !== false, '$elt', 'must be a child of $this' );
401  $elt->parent = null;
402  array_splice( $this->children, $idx, 1 );
403  }
404 
410  public function insertBefore( BalanceElement $a, $b ) {
411  Assert::precondition(
412  $this->parent !== 'flat', "Can't insertBefore after flattening."
413  );
414  $idx = array_search( $a, $this->children, true );
415  Assert::parameter( $idx !== false, '$a', 'must be a child of $this' );
416  if ( is_string( $b ) ) {
417  array_splice( $this->children, $idx, 0, [ $b ] );
418  } else {
419  Assert::parameter( $b->parent !== 'flat', '$b', "Can't be flat" );
420  if ( $b->parent !== null ) {
421  $b->parent->removeChild( $b );
422  }
423  array_splice( $this->children, $idx, 0, [ $b ] );
424  $b->parent = $this;
425  }
426  }
427 
432  public function appendChild( $elt ) {
433  Assert::precondition(
434  $this->parent !== 'flat', "Can't appendChild after flattening."
435  );
436  if ( is_string( $elt ) ) {
437  array_push( $this->children, $elt );
438  return;
439  }
440  // Remove $elt from parent, if it had one.
441  if ( $elt->parent !== null ) {
442  $elt->parent->removeChild( $elt );
443  }
444  array_push( $this->children, $elt );
445  $elt->parent = $this;
446  }
447 
452  public function adoptChildren( BalanceElement $elt ) {
453  Assert::precondition(
454  $elt->parent !== 'flat', "Can't adoptChildren after flattening."
455  );
456  foreach ( $elt->children as $child ) {
457  if ( !is_string( $child ) ) {
458  // This is an optimization which avoids an O(n^2) set of
459  // array_splice operations.
460  $child->parent = null;
461  }
462  $this->appendChild( $child );
463  }
464  $elt->children = [];
465  }
466 
477  public function flatten( array $config ) {
478  Assert::parameter( $this->parent !== null, '$this', 'must be a child' );
479  Assert::parameter( $this->parent !== 'flat', '$this', 'already flat' );
480  $idx = array_search( $this, $this->parent->children, true );
481  Assert::parameter(
482  $idx !== false, '$this', 'must be a child of its parent'
483  );
484  $tidyCompat = $config['tidyCompat'];
485  if ( $tidyCompat ) {
486  $blank = true;
487  foreach ( $this->children as $elt ) {
488  if ( !is_string( $elt ) ) {
489  $elt = $elt->flatten( $config );
490  }
491  if ( $blank && preg_match( '/[^\t\n\f\r ]/', $elt ) ) {
492  $blank = false;
493  }
494  }
495  if ( $this->isHtmlNamed( 'mw:p-wrap' ) ) {
496  $this->localName = 'p';
497  } elseif ( $blank ) {
498  // Add 'mw-empty-elt' class so elements can be hidden via CSS
499  // for compatibility with legacy tidy.
500  if ( !count( $this->attribs ) &&
501  ( $this->localName === 'tr' || $this->localName === 'li' )
502  ) {
503  $this->attribs = [ 'class' => "mw-empty-elt" ];
504  }
505  $blank = false;
506  } elseif (
508  count( $this->children ) > 0 &&
509  substr( $this->children[0], 0, 1 ) == "\n"
510  ) {
511  // Double the linefeed after pre/listing/textarea
512  // according to the (old) HTML5 fragment serialization
513  // algorithm (see https://github.com/whatwg/html/issues/944)
514  // to ensure this will round-trip.
515  array_unshift( $this->children, "\n" );
516  }
517  $flat = $blank ? '' : "{$this}";
518  } else {
519  $flat = "{$this}";
520  }
521  $this->parent->children[$idx] = $flat;
522  $this->parent = 'flat'; // for assertion checking
523  return $flat;
524  }
525 
533  public function __toString() {
534  $encAttribs = '';
535  foreach ( $this->attribs as $name => $value ) {
536  $encValue = Sanitizer::encodeAttribute( $value );
537  $encAttribs .= " $name=\"$encValue\"";
538  }
539  if ( !$this->isA( BalanceSets::$emptyElementSet ) ) {
540  $out = "<{$this->localName}{$encAttribs}>";
541  $len = strlen( $out );
542  // flatten children
543  foreach ( $this->children as $elt ) {
544  $out .= "{$elt}";
545  }
546  $out .= "</{$this->localName}>";
547  } else {
548  $out = "<{$this->localName}{$encAttribs} />";
549  Assert::invariant(
550  count( $this->children ) === 0,
551  "Empty elements shouldn't have children."
552  );
553  }
554  return $out;
555  }
556 
557  // Utility functions on BalanceElements.
558 
567  public function isA( $set ) {
568  if ( $set instanceof BalanceElement ) {
569  return $this === $set;
570  } elseif ( is_array( $set ) ) {
571  return isset( $set[$this->namespaceURI] ) &&
572  isset( $set[$this->namespaceURI][$this->localName] );
573  } else {
574  // assume this is an HTML element name.
575  return $this->isHtml() && $this->localName === $set;
576  }
577  }
578 
584  public function isHtmlNamed( $tagName ) {
585  return $this->namespaceURI === BalanceSets::HTML_NAMESPACE
586  && $this->localName === $tagName;
587  }
588 
594  public function isHtml() {
595  return $this->namespaceURI === BalanceSets::HTML_NAMESPACE;
596  }
597 
605  public function isMathmlTextIntegrationPoint() {
607  }
608 
616  public function isHtmlIntegrationPoint() {
617  if ( $this->isA( BalanceSets::$htmlIntegrationPointSet ) ) {
618  return true;
619  }
620  if (
621  $this->namespaceURI === BalanceSets::MATHML_NAMESPACE &&
622  $this->localName === 'annotation-xml' &&
623  isset( $this->attribs['encoding'] ) &&
624  ( strcasecmp( $this->attribs['encoding'], 'text/html' ) == 0 ||
625  strcasecmp( $this->attribs['encoding'], 'application/xhtml+xml' ) == 0 )
626  ) {
627  return true;
628  }
629  return false;
630  }
631 
636  public function getNoahKey() {
637  if ( $this->noahKey === null ) {
639  ksort( $attribs );
640  $this->noahKey = serialize( [ $this->namespaceURI, $this->localName, $attribs ] );
641  }
642  return $this->noahKey;
643  }
644 }
645 
661 class BalanceStack implements IteratorAggregate {
666  private $elements = [];
673  public $fosterParentMode = false;
679  private $config;
683  public $currentNode;
684 
690  public function __construct( array $config ) {
691  // always a root <html> element on the stack
692  array_push(
693  $this->elements,
695  );
696  $this->currentNode = $this->elements[0];
697  $this->config = $config;
698  }
699 
705  public function getOutput() {
706  // Don't include the outer '<html>....</html>'
707  $out = '';
708  foreach ( $this->elements[0]->children as $elt ) {
709  $out .= is_string( $elt ) ? $elt :
710  $elt->flatten( $this->config );
711  }
712  return $out;
713  }
714 
721  public function insertComment( $value ) {
722  // Just another type of text node, except for tidy p-wrapping.
723  return $this->insertText( '<!--' . $value . '-->', true );
724  }
725 
733  public function insertText( $value, $isComment = false ) {
734  if (
735  $this->fosterParentMode &&
736  $this->currentNode->isA( BalanceSets::$tableSectionRowSet )
737  ) {
738  $this->fosterParent( $value );
739  } elseif (
740  $this->config['tidyCompat'] && !$isComment &&
741  $this->currentNode->isA( BalanceSets::$tidyPWrapSet )
742  ) {
743  $this->insertHTMLElement( 'mw:p-wrap', [] );
744  return $this->insertText( $value );
745  } else {
746  $this->currentNode->appendChild( $value );
747  }
748  }
749 
759  public function insertForeignElement( $namespaceURI, $tag, $attribs ) {
760  return $this->insertElement(
761  new BalanceElement( $namespaceURI, $tag, $attribs )
762  );
763  }
764 
773  public function insertHTMLElement( $tag, $attribs ) {
774  return $this->insertForeignElement(
776  );
777  }
778 
786  public function insertElement( BalanceElement $elt ) {
787  if (
788  $this->currentNode->isHtmlNamed( 'mw:p-wrap' ) &&
790  ) {
791  // Tidy compatibility.
792  $this->pop();
793  }
794  if (
795  $this->fosterParentMode &&
796  $this->currentNode->isA( BalanceSets::$tableSectionRowSet )
797  ) {
798  $elt = $this->fosterParent( $elt );
799  } else {
800  $this->currentNode->appendChild( $elt );
801  }
802  Assert::invariant( $elt->parent !== null, "$elt must be in tree" );
803  Assert::invariant( $elt->parent !== 'flat', "$elt must not have been previous flattened" );
804  array_push( $this->elements, $elt );
805  $this->currentNode = $elt;
806  return $elt;
807  }
808 
815  public function inScope( $tag ) {
816  return $this->inSpecificScope( $tag, BalanceSets::$inScopeSet );
817  }
818 
825  public function inButtonScope( $tag ) {
826  return $this->inSpecificScope( $tag, BalanceSets::inButtonScopeSet() );
827  }
828 
835  public function inListItemScope( $tag ) {
836  return $this->inSpecificScope( $tag, BalanceSets::inListItemScopeSet() );
837  }
838 
845  public function inTableScope( $tag ) {
846  return $this->inSpecificScope( $tag, BalanceSets::$inTableScopeSet );
847  }
848 
855  public function inSelectScope( $tag ) {
856  // Can't use inSpecificScope to implement this, since it involves
857  // *inverting* a set of tags. Implement manually.
858  foreach ( $this as $elt ) {
859  if ( $elt->isA( $tag ) ) {
860  return true;
861  }
862  if ( !$elt->isA( BalanceSets::$inInvertedSelectScopeSet ) ) {
863  return false;
864  }
865  }
866  return false;
867  }
868 
876  public function inSpecificScope( $tag, $set ) {
877  foreach ( $this as $elt ) {
878  if ( $elt->isA( $tag ) ) {
879  return true;
880  }
881  if ( $elt->isA( $set ) ) {
882  return false;
883  }
884  }
885  return false;
886  }
887 
894  public function generateImpliedEndTags( $butnot = null, $thorough = false ) {
895  $endTagSet = $thorough ?
898  while ( $this->currentNode ) {
899  if ( $butnot !== null && $this->currentNode->isHtmlNamed( $butnot ) ) {
900  break;
901  }
902  if ( !$this->currentNode->isA( $endTagSet ) ) {
903  break;
904  }
905  $this->pop();
906  }
907  }
908 
914  public function adjustedCurrentNode( $fragmentContext ) {
915  return ( $fragmentContext && count( $this->elements ) === 1 ) ?
916  $fragmentContext : $this->currentNode;
917  }
918 
924  public function getIterator() {
925  return new ReverseArrayIterator( $this->elements );
926  }
927 
934  public function node( $idx ) {
935  return $this->elements[ $idx ];
936  }
937 
943  public function replaceAt( $idx, BalanceElement $elt ) {
944  Assert::precondition(
945  $this->elements[$idx]->parent !== 'flat',
946  'Replaced element should not have already been flattened.'
947  );
948  Assert::precondition(
949  $elt->parent !== 'flat',
950  'New element should not have already been flattened.'
951  );
952  $this->elements[$idx] = $elt;
953  if ( $idx === count( $this->elements ) - 1 ) {
954  $this->currentNode = $elt;
955  }
956  }
957 
964  public function indexOf( $tag ) {
965  for ( $i = count( $this->elements ) - 1; $i >= 0; $i-- ) {
966  if ( $this->elements[$i]->isA( $tag ) ) {
967  return $i;
968  }
969  }
970  return -1;
971  }
972 
977  public function length() {
978  return count( $this->elements );
979  }
980 
985  public function pop() {
986  $elt = array_pop( $this->elements );
987  if ( count( $this->elements ) ) {
988  $this->currentNode = $this->elements[ count( $this->elements ) - 1 ];
989  } else {
990  $this->currentNode = null;
991  }
992  if ( !$elt->isHtmlNamed( 'mw:p-wrap' ) ) {
993  $elt->flatten( $this->config );
994  }
995  }
996 
1002  public function popTo( $idx ) {
1003  for ( $length = count( $this->elements ); $length > $idx; $length-- ) {
1004  $this->pop();
1005  }
1006  }
1007 
1014  public function popTag( $tag ) {
1015  while ( $this->currentNode ) {
1016  if ( $this->currentNode->isA( $tag ) ) {
1017  $this->pop();
1018  break;
1019  }
1020  $this->pop();
1021  }
1022  }
1023 
1029  public function clearToContext( $set ) {
1030  // Note that we don't loop to 0. Never pop the <html> elt off.
1031  for ( $length = count( $this->elements ); $length > 1; $length-- ) {
1032  if ( $this->currentNode->isA( $set ) ) {
1033  break;
1034  }
1035  $this->pop();
1036  }
1037  }
1038 
1045  public function removeElement( BalanceElement $elt, $flatten = true ) {
1046  Assert::parameter(
1047  $elt->parent !== 'flat',
1048  '$elt',
1049  '$elt should not already have been flattened.'
1050  );
1051  Assert::parameter(
1052  $elt->parent->parent !== 'flat',
1053  '$elt',
1054  'The parent of $elt should not already have been flattened.'
1055  );
1056  $idx = array_search( $elt, $this->elements, true );
1057  Assert::parameter( $idx !== false, '$elt', 'must be in stack' );
1058  array_splice( $this->elements, $idx, 1 );
1059  if ( $idx === count( $this->elements ) ) {
1060  $this->currentNode = $this->elements[$idx - 1];
1061  }
1062  if ( $flatten ) {
1063  // serialize $elt into its parent
1064  // otherwise, it will eventually serialize when the parent
1065  // is serialized, we just hold onto the memory for its
1066  // tree of objects a little longer.
1067  $elt->flatten( $this->config );
1068  }
1069  Assert::postcondition(
1070  array_search( $elt, $this->elements, true ) === false,
1071  '$elt should no longer be in open elements stack'
1072  );
1073  }
1074 
1080  public function insertAfter( BalanceElement $a, BalanceElement $b ) {
1081  $idx = $this->indexOf( $a );
1082  Assert::parameter( $idx !== false, '$a', 'must be in stack' );
1083  if ( $idx === count( $this->elements ) - 1 ) {
1084  array_push( $this->elements, $b );
1085  $this->currentNode = $b;
1086  } else {
1087  array_splice( $this->elements, $idx + 1, 0, [ $b ] );
1088  }
1089  }
1090 
1091  // Fostering and adoption.
1092 
1100  private function fosterParent( $elt ) {
1101  $lastTable = $this->indexOf( 'table' );
1102  $lastTemplate = $this->indexOf( 'template' );
1103  $parent = null;
1104  $before = null;
1105 
1106  if ( $lastTemplate >= 0 && ( $lastTable < 0 || $lastTemplate > $lastTable ) ) {
1107  $parent = $this->elements[$lastTemplate];
1108  } elseif ( $lastTable >= 0 ) {
1109  $parent = $this->elements[$lastTable]->parent;
1110  // Assume all tables have parents, since we're not running scripts!
1111  Assert::invariant(
1112  $parent !== null, "All tables should have parents"
1113  );
1114  $before = $this->elements[$lastTable];
1115  } else {
1116  $parent = $this->elements[0]; // the `html` element.
1117  }
1118 
1119  if ( $this->config['tidyCompat'] ) {
1120  if ( is_string( $elt ) ) {
1121  // We're fostering text: do we need a p-wrapper?
1122  if ( $parent->isA( BalanceSets::$tidyPWrapSet ) ) {
1123  $this->insertHTMLElement( 'mw:p-wrap', [] );
1124  $this->insertText( $elt );
1125  return $elt;
1126  }
1127  } else {
1128  // We're fostering an element; do we need to merge p-wrappers?
1129  if ( $elt->isHtmlNamed( 'mw:p-wrap' ) ) {
1130  $idx = $before ?
1131  array_search( $before, $parent->children, true ) :
1132  count( $parent->children );
1133  $after = $idx > 0 ? $parent->children[$idx - 1] : '';
1134  if (
1135  $after instanceof BalanceElement &&
1136  $after->isHtmlNamed( 'mw:p-wrap' )
1137  ) {
1138  return $after; // Re-use existing p-wrapper.
1139  }
1140  }
1141  }
1142  }
1143 
1144  if ( $before ) {
1145  $parent->insertBefore( $before, $elt );
1146  } else {
1147  $parent->appendChild( $elt );
1148  }
1149  return $elt;
1150  }
1151 
1162  public function adoptionAgency( $tag, $afe ) {
1163  // If the current node is an HTML element whose tag name is subject,
1164  // and the current node is not in the list of active formatting
1165  // elements, then pop the current node off the stack of open
1166  // elements and abort these steps.
1167  if (
1168  $this->currentNode->isHtmlNamed( $tag ) &&
1169  !$afe->isInList( $this->currentNode )
1170  ) {
1171  $this->pop();
1172  return true; // no more handling required
1173  }
1174 
1175  // Outer loop: If outer loop counter is greater than or
1176  // equal to eight, then abort these steps.
1177  for ( $outer = 0; $outer < 8; $outer++ ) {
1178  // Let the formatting element be the last element in the list
1179  // of active formatting elements that: is between the end of
1180  // the list and the last scope marker in the list, if any, or
1181  // the start of the list otherwise, and has the same tag name
1182  // as the token.
1183  $fmtElt = $afe->findElementByTag( $tag );
1184 
1185  // If there is no such node, then abort these steps and instead
1186  // act as described in the "any other end tag" entry below.
1187  if ( !$fmtElt ) {
1188  return false; // false means handle by the default case
1189  }
1190 
1191  // Otherwise, if there is such a node, but that node is not in
1192  // the stack of open elements, then this is a parse error;
1193  // remove the element from the list, and abort these steps.
1194  $index = $this->indexOf( $fmtElt );
1195  if ( $index < 0 ) {
1196  $afe->remove( $fmtElt );
1197  return true; // true means no more handling required
1198  }
1199 
1200  // Otherwise, if there is such a node, and that node is also in
1201  // the stack of open elements, but the element is not in scope,
1202  // then this is a parse error; ignore the token, and abort
1203  // these steps.
1204  if ( !$this->inScope( $fmtElt ) ) {
1205  return true;
1206  }
1207 
1208  // Let the furthest block be the topmost node in the stack of
1209  // open elements that is lower in the stack than the formatting
1210  // element, and is an element in the special category. There
1211  // might not be one.
1212  $furthestBlock = null;
1213  $furthestBlockIndex = -1;
1214  $stackLength = $this->length();
1215  for ( $i = $index + 1; $i < $stackLength; $i++ ) {
1216  if ( $this->node( $i )->isA( BalanceSets::$specialSet ) ) {
1217  $furthestBlock = $this->node( $i );
1218  $furthestBlockIndex = $i;
1219  break;
1220  }
1221  }
1222 
1223  // If there is no furthest block, then the UA must skip the
1224  // subsequent steps and instead just pop all the nodes from the
1225  // bottom of the stack of open elements, from the current node
1226  // up to and including the formatting element, and remove the
1227  // formatting element from the list of active formatting
1228  // elements.
1229  if ( !$furthestBlock ) {
1230  $this->popTag( $fmtElt );
1231  $afe->remove( $fmtElt );
1232  return true;
1233  }
1234 
1235  // Let the common ancestor be the element immediately above
1236  // the formatting element in the stack of open elements.
1237  $ancestor = $this->node( $index - 1 );
1238 
1239  // Let a bookmark note the position of the formatting
1240  // element in the list of active formatting elements
1241  // relative to the elements on either side of it in the
1242  // list.
1243  $BOOKMARK = new BalanceElement( '[bookmark]', '[bookmark]', [] );
1244  $afe->insertAfter( $fmtElt, $BOOKMARK );
1245 
1246  // Let node and last node be the furthest block.
1247  $node = $furthestBlock;
1248  $lastNode = $furthestBlock;
1249  $nodeIndex = $furthestBlockIndex;
1250  $isAFE = false;
1251 
1252  // Inner loop
1253  for ( $inner = 1; true; $inner++ ) {
1254  // Let node be the element immediately above node in
1255  // the stack of open elements, or if node is no longer
1256  // in the stack of open elements (e.g. because it got
1257  // removed by this algorithm), the element that was
1258  // immediately above node in the stack of open elements
1259  // before node was removed.
1260  $node = $this->node( --$nodeIndex );
1261 
1262  // If node is the formatting element, then go
1263  // to the next step in the overall algorithm.
1264  if ( $node === $fmtElt ) break;
1265 
1266  // If the inner loop counter is greater than three and node
1267  // is in the list of active formatting elements, then remove
1268  // node from the list of active formatting elements.
1269  $isAFE = $afe->isInList( $node );
1270  if ( $inner > 3 && $isAFE ) {
1271  $afe->remove( $node );
1272  $isAFE = false;
1273  }
1274 
1275  // If node is not in the list of active formatting
1276  // elements, then remove node from the stack of open
1277  // elements and then go back to the step labeled inner
1278  // loop.
1279  if ( !$isAFE ) {
1280  // Don't flatten here, since we're about to relocate
1281  // parts of this $node.
1282  $this->removeElement( $node, false );
1283  continue;
1284  }
1285 
1286  // Create an element for the token for which the
1287  // element node was created with common ancestor as
1288  // the intended parent, replace the entry for node
1289  // in the list of active formatting elements with an
1290  // entry for the new element, replace the entry for
1291  // node in the stack of open elements with an entry for
1292  // the new element, and let node be the new element.
1293  $newElt = new BalanceElement(
1294  $node->namespaceURI, $node->localName, $node->attribs );
1295  $afe->replace( $node, $newElt );
1296  $this->replaceAt( $nodeIndex, $newElt );
1297  $node = $newElt;
1298 
1299  // If last node is the furthest block, then move the
1300  // aforementioned bookmark to be immediately after the
1301  // new node in the list of active formatting elements.
1302  if ( $lastNode === $furthestBlock ) {
1303  $afe->remove( $BOOKMARK );
1304  $afe->insertAfter( $newElt, $BOOKMARK );
1305  }
1306 
1307  // Insert last node into node, first removing it from
1308  // its previous parent node if any.
1309  $node->appendChild( $lastNode );
1310 
1311  // Let last node be node.
1312  $lastNode = $node;
1313  }
1314 
1315  // If the common ancestor node is a table, tbody, tfoot,
1316  // thead, or tr element, then, foster parent whatever last
1317  // node ended up being in the previous step, first removing
1318  // it from its previous parent node if any.
1319  if (
1320  $this->fosterParentMode &&
1321  $ancestor->isA( BalanceSets::$tableSectionRowSet )
1322  ) {
1323  $this->fosterParent( $lastNode );
1324  } else {
1325  // Otherwise, append whatever last node ended up being in
1326  // the previous step to the common ancestor node, first
1327  // removing it from its previous parent node if any.
1328  $ancestor->appendChild( $lastNode );
1329  }
1330 
1331  // Create an element for the token for which the
1332  // formatting element was created, with furthest block
1333  // as the intended parent.
1334  $newElt2 = new BalanceElement(
1335  $fmtElt->namespaceURI, $fmtElt->localName, $fmtElt->attribs );
1336 
1337  // Take all of the child nodes of the furthest block and
1338  // append them to the element created in the last step.
1339  $newElt2->adoptChildren( $furthestBlock );
1340 
1341  // Append that new element to the furthest block.
1342  $furthestBlock->appendChild( $newElt2 );
1343 
1344  // Remove the formatting element from the list of active
1345  // formatting elements, and insert the new element into the
1346  // list of active formatting elements at the position of
1347  // the aforementioned bookmark.
1348  $afe->remove( $fmtElt );
1349  $afe->replace( $BOOKMARK, $newElt2 );
1350 
1351  // Remove the formatting element from the stack of open
1352  // elements, and insert the new element into the stack of
1353  // open elements immediately below the position of the
1354  // furthest block in that stack.
1355  $this->removeElement( $fmtElt );
1356  $this->insertAfter( $furthestBlock, $newElt2 );
1357  }
1358 
1359  return true;
1360  }
1361 
1367  public function __toString() {
1368  $r = [];
1369  foreach ( $this->elements as $elt ) {
1370  array_push( $r, $elt->localName );
1371  }
1372  return implode( ' ', $r );
1373  }
1374 }
1375 
1383  public $nextAFE;
1384  public $prevAFE;
1385 }
1386 
1398  private $tail;
1399 
1401  private $head;
1402 
1421  private $noahTableStack = [ [] ];
1422 
1423  public function __destruct() {
1424  $next = null;
1425  for ( $node = $this->head; $node; $node = $next ) {
1426  $next = $node->nextAFE;
1427  $node->prevAFE = $node->nextAFE = $node->nextNoah = null;
1428  }
1429  $this->head = $this->tail = $this->noahTableStack = null;
1430  }
1431 
1432  public function insertMarker() {
1433  $elt = new BalanceMarker;
1434  if ( $this->tail ) {
1435  $this->tail->nextAFE = $elt;
1436  $elt->prevAFE = $this->tail;
1437  } else {
1438  $this->head = $elt;
1439  }
1440  $this->tail = $elt;
1441  $this->noahTableStack[] = [];
1442  }
1443 
1449  public function push( BalanceElement $elt ) {
1450  // Must not be in the list already
1451  if ( $elt->prevAFE !== null || $this->head === $elt ) {
1452  throw new ParameterAssertionException( '$elt',
1453  'Cannot insert a node into the AFE list twice' );
1454  }
1455 
1456  // "Noah's Ark clause" -- if there are already three copies of
1457  // this element before we encounter a marker, then drop the last
1458  // one.
1459  $noahKey = $elt->getNoahKey();
1460  $table =& $this->noahTableStack[ count( $this->noahTableStack ) - 1 ];
1461  if ( !isset( $table[$noahKey] ) ) {
1462  $table[$noahKey] = $elt;
1463  } else {
1464  $count = 1;
1465  $head = $tail = $table[$noahKey];
1466  while ( $tail->nextNoah ) {
1467  $tail = $tail->nextNoah;
1468  $count++;
1469  }
1470  if ( $count >= 3 ) {
1471  $this->remove( $head );
1472  }
1473  $tail->nextNoah = $elt;
1474  }
1475  // Add to the main AFE list
1476  if ( $this->tail ) {
1477  $this->tail->nextAFE = $elt;
1478  $elt->prevAFE = $this->tail;
1479  } else {
1480  $this->head = $elt;
1481  }
1482  $this->tail = $elt;
1483  }
1484 
1489  public function clearToMarker() {
1490  // Iterate back through the list starting from the tail
1491  $tail = $this->tail;
1492  while ( $tail && !( $tail instanceof BalanceMarker ) ) {
1493  // Unlink the element
1494  $prev = $tail->prevAFE;
1495  $tail->prevAFE = null;
1496  if ( $prev ) {
1497  $prev->nextAFE = null;
1498  }
1499  $tail->nextNoah = null;
1500  $tail = $prev;
1501  }
1502  // If we finished on a marker, unlink it and pop it off the Noah table stack
1503  if ( $tail ) {
1504  $prev = $tail->prevAFE;
1505  if ( $prev ) {
1506  $prev->nextAFE = null;
1507  }
1508  $tail = $prev;
1509  array_pop( $this->noahTableStack );
1510  } else {
1511  // No marker: wipe the top-level Noah table (which is the only one)
1512  $this->noahTableStack[0] = [];
1513  }
1514  // If we removed all the elements, clear the head pointer
1515  if ( !$tail ) {
1516  $this->head = null;
1517  }
1518  $this->tail = $tail;
1519  }
1520 
1528  public function findElementByTag( $tag ) {
1529  $elt = $this->tail;
1530  while ( $elt && !( $elt instanceof BalanceMarker ) ) {
1531  if ( $elt->localName === $tag ) {
1532  return $elt;
1533  }
1534  $elt = $elt->prevAFE;
1535  }
1536  return null;
1537  }
1538 
1544  public function isInList( BalanceElement $elt ) {
1545  return $this->head === $elt || $elt->prevAFE;
1546  }
1547 
1554  public function remove( BalanceElement $elt ) {
1555  if ( $this->head !== $elt && !$elt->prevAFE ) {
1556  throw new ParameterAssertionException( '$elt',
1557  "Attempted to remove an element which is not in the AFE list" );
1558  }
1559  // Update head and tail pointers
1560  if ( $this->head === $elt ) {
1561  $this->head = $elt->nextAFE;
1562  }
1563  if ( $this->tail === $elt ) {
1564  $this->tail = $elt->prevAFE;
1565  }
1566  // Update previous element
1567  if ( $elt->prevAFE ) {
1568  $elt->prevAFE->nextAFE = $elt->nextAFE;
1569  }
1570  // Update next element
1571  if ( $elt->nextAFE ) {
1572  $elt->nextAFE->prevAFE = $elt->prevAFE;
1573  }
1574  // Clear pointers so that isInList() etc. will work
1575  $elt->prevAFE = $elt->nextAFE = null;
1576  // Update Noah list
1577  $this->removeFromNoahList( $elt );
1578  }
1579 
1580  private function addToNoahList( BalanceElement $elt ) {
1581  $noahKey = $elt->getNoahKey();
1582  $table =& $this->noahTableStack[ count( $this->noahTableStack ) - 1 ];
1583  if ( !isset( $table[$noahKey] ) ) {
1584  $table[$noahKey] = $elt;
1585  } else {
1586  $tail = $table[$noahKey];
1587  while ( $tail->nextNoah ) {
1588  $tail = $tail->nextNoah;
1589  }
1590  $tail->nextNoah = $elt;
1591  }
1592  }
1593 
1594  private function removeFromNoahList( BalanceElement $elt ) {
1595  $table =& $this->noahTableStack[ count( $this->noahTableStack ) - 1 ];
1596  $key = $elt->getNoahKey();
1597  $noahElt = $table[$key];
1598  if ( $noahElt === $elt ) {
1599  if ( $noahElt->nextNoah ) {
1600  $table[$key] = $noahElt->nextNoah;
1601  $noahElt->nextNoah = null;
1602  } else {
1603  unset( $table[$key] );
1604  }
1605  } else {
1606  do {
1607  $prevNoahElt = $noahElt;
1608  $noahElt = $prevNoahElt->nextNoah;
1609  if ( $noahElt === $elt ) {
1610  // Found it, unlink
1611  $prevNoahElt->nextNoah = $elt->nextNoah;
1612  $elt->nextNoah = null;
1613  break;
1614  }
1615  } while ( $noahElt );
1616  }
1617  }
1618 
1625  public function replace( BalanceElement $a, BalanceElement $b ) {
1626  if ( $this->head !== $a && !$a->prevAFE ) {
1627  throw new ParameterAssertionException( '$a',
1628  "Attempted to replace an element which is not in the AFE list" );
1629  }
1630  // Update head and tail pointers
1631  if ( $this->head === $a ) {
1632  $this->head = $b;
1633  }
1634  if ( $this->tail === $a ) {
1635  $this->tail = $b;
1636  }
1637  // Update previous element
1638  if ( $a->prevAFE ) {
1639  $a->prevAFE->nextAFE = $b;
1640  }
1641  // Update next element
1642  if ( $a->nextAFE ) {
1643  $a->nextAFE->prevAFE = $b;
1644  }
1645  $b->prevAFE = $a->prevAFE;
1646  $b->nextAFE = $a->nextAFE;
1647  $a->nextAFE = $a->prevAFE = null;
1648  // Update Noah list
1649  $this->removeFromNoahList( $a );
1650  $this->addToNoahList( $b );
1651  }
1652 
1659  public function insertAfter( BalanceElement $a, BalanceElement $b ) {
1660  if ( $this->head !== $a && !$a->prevAFE ) {
1661  throw new ParameterAssertionException( '$a',
1662  "Attempted to insert after an element which is not in the AFE list" );
1663  }
1664  if ( $this->tail === $a ) {
1665  $this->tail = $b;
1666  }
1667  if ( $a->nextAFE ) {
1668  $a->nextAFE->prevAFE = $b;
1669  }
1670  $b->nextAFE = $a->nextAFE;
1671  $b->prevAFE = $a;
1672  $a->nextAFE = $b;
1673  $this->addToNoahList( $b );
1674  }
1675 
1681  public function reconstruct( $stack ) {
1682  $entry = $this->tail;
1683  // If there are no entries in the list of active formatting elements,
1684  // then there is nothing to reconstruct
1685  if ( !$entry ) {
1686  return;
1687  }
1688  // If the last is a marker, do nothing.
1689  if ( $entry instanceof BalanceMarker ) {
1690  return;
1691  }
1692  // Or if it is an open element, do nothing.
1693  if ( $stack->indexOf( $entry ) >= 0 ) {
1694  return;
1695  }
1696 
1697  // Loop backward through the list until we find a marker or an
1698  // open element
1699  $foundIt = false;
1700  while ( $entry->prevAFE ) {
1701  $entry = $entry->prevAFE;
1702  if ( $entry instanceof BalanceMarker || $stack->indexOf( $entry ) >= 0 ) {
1703  $foundIt = true;
1704  break;
1705  }
1706  }
1707 
1708  // Now loop forward, starting from the element after the current one (or
1709  // the first element if we didn't find a marker or open element),
1710  // recreating formatting elements and pushing them back onto the list
1711  // of open elements.
1712  if ( $foundIt ) {
1713  $entry = $entry->nextAFE;
1714  }
1715  do {
1716  $newElement = $stack->insertHTMLElement(
1717  $entry->localName,
1718  $entry->attribs );
1719  $this->replace( $entry, $newElement );
1720  $entry = $newElement->nextAFE;
1721  } while ( $entry );
1722  }
1723 
1727  public function __toString() {
1728  $prev = null;
1729  $s = '';
1730  for ( $node = $this->head; $node; $prev = $node, $node = $node->nextAFE ) {
1731  if ( $node instanceof BalanceMarker ) {
1732  $s .= "MARKER\n";
1733  continue;
1734  }
1735  $s .= $node->localName . '#' . substr( md5( spl_object_hash( $node ) ), 0, 8 );
1736  if ( $node->nextNoah ) {
1737  $s .= " (noah sibling: {$node->nextNoah->localName}#" .
1738  substr( md5( spl_object_hash( $node->nextNoah ) ), 0, 8 ) .
1739  ')';
1740  }
1741  if ( $node->nextAFE && $node->nextAFE->prevAFE !== $node ) {
1742  $s .= " (reverse link is wrong!)";
1743  }
1744  $s .= "\n";
1745  }
1746  if ( $prev !== $this->tail ) {
1747  $s .= "(tail pointer is wrong!)\n";
1748  }
1749  return $s;
1750  }
1751 }
1752 
1804 class Balancer {
1805  private $parseMode;
1807  private $bitsIterator;
1810  private $afe;
1812  private $stack;
1813  private $strict;
1815  private $config;
1816 
1823  private $inRCDATA;
1824  private $inRAWTEXT;
1825 
1830 
1835  const VALID_COMMENT_REGEX = "~ !--
1836  ( # 1. Comment match detector
1837  > | -> | # Invalid short close
1838  ( # 2. Comment contents
1839  (?:
1840  (?! --> )
1841  (?! --!> )
1842  (?! --! \z )
1843  (?! -- \z )
1844  (?! - \z )
1845  .
1846  )*+
1847  )
1848  ( # 3. Comment close
1849  --> | # Normal close
1850  --!> | # Comment end bang
1851  ( # 4. Indicate matches requiring EOF
1852  --! | # EOF in comment end bang state
1853  -- | # EOF in comment end state
1854  - | # EOF in comment end dash state
1855  (?#nothing) # EOF in comment state
1856  )
1857  )
1858  )
1859  ([^<]*) \z # 5. Non-tag text after the comment
1860  ~xs";
1861 
1888  public function __construct( array $config = [] ) {
1889  $this->config = $config = $config + [
1890  'strict' => false,
1891  'allowedHtmlElements' => null,
1892  'tidyCompat' => false,
1893  'allowComments' => true,
1894  ];
1895  $this->allowedHtmlElements = $config['allowedHtmlElements'];
1896  $this->strict = $config['strict'];
1897  $this->allowComments = $config['allowComments'];
1898  if ( $this->allowedHtmlElements !== null ) {
1899  // Sanity check!
1900  $bad = array_uintersect_assoc(
1901  $this->allowedHtmlElements,
1903  function ( $a, $b ) {
1904  // Ignore the values (just intersect the keys) by saying
1905  // all values are equal to each other.
1906  return 0;
1907  }
1908  );
1909  if ( count( $bad ) > 0 ) {
1910  $badstr = implode( ',', array_keys( $bad ) );
1911  throw new ParameterAssertionException(
1912  '$config',
1913  'Balance attempted with sanitization including ' .
1914  "unsupported elements: {$badstr}"
1915  );
1916  }
1917  }
1918  }
1919 
1932  public function balance( $text, $processingCallback = null, $processingArgs = [] ) {
1933  $this->parseMode = 'inBodyMode';
1934  $this->bitsIterator = new ExplodeIterator( '<', $text );
1935  $this->afe = new BalanceActiveFormattingElements();
1936  $this->stack = new BalanceStack( $this->config );
1937  $this->processingCallback = $processingCallback;
1938  $this->processingArgs = $processingArgs;
1939 
1940  $this->textIntegrationMode =
1941  $this->ignoreLinefeed =
1942  $this->inRCDATA =
1943  $this->inRAWTEXT = false;
1944 
1945  // The stack is constructed with an <html> element already on it.
1946  // Set this up as a fragment parsed with <body> as the context.
1947  $this->fragmentContext =
1948  new BalanceElement( BalanceSets::HTML_NAMESPACE, 'body', [] );
1949  $this->resetInsertionMode();
1950  $this->formElementPointer = null;
1951  for ( $e = $this->fragmentContext; $e != null; $e = $e->parent ) {
1952  if ( $e->isHtmlNamed( 'form' ) ) {
1953  $this->formElementPointer = $e;
1954  break;
1955  }
1956  }
1957 
1958  // First element is text not tag
1959  $x = $this->bitsIterator->current();
1960  $this->bitsIterator->next();
1961  $this->insertToken( 'text', str_replace( '>', '&gt;', $x ) );
1962  // Now process each tag.
1963  while ( $this->bitsIterator->valid() ) {
1964  $this->advance();
1965  }
1966  $this->insertToken( 'eof', null );
1967  $result = $this->stack->getOutput();
1968  // Free memory before returning.
1969  $this->bitsIterator = null;
1970  $this->afe = null;
1971  $this->stack = null;
1972  $this->fragmentContext = null;
1973  $this->formElementPointer = null;
1974  return $result;
1975  }
1976 
1981  private function insertToken( $token, $value, $attribs = null, $selfClose = false ) {
1982  // validate tags against $unsupportedSet
1983  if ( $token === 'tag' || $token === 'endtag' ) {
1985  // As described in "simplifications" above, these tags are
1986  // not supported in the balancer.
1987  Assert::invariant(
1988  !$this->strict,
1989  "Unsupported $token <$value> found."
1990  );
1991  return false;
1992  }
1993  } elseif ( $token === 'text' && $value === '' ) {
1994  // Don't actually inject the empty string as a text token.
1995  return true;
1996  }
1997  // Support pre/listing/textarea by suppressing initial linefeed
1998  if ( $this->ignoreLinefeed ) {
1999  $this->ignoreLinefeed = false;
2000  if ( $token === 'text' ) {
2001  if ( $value[0] === "\n" ) {
2002  if ( $value === "\n" ) {
2003  // Nothing would be left, don't inject the empty string.
2004  return true;
2005  }
2006  $value = substr( $value, 1 );
2007  }
2008  }
2009  }
2010  // Some hoops we have to jump through
2011  $adjusted = $this->stack->adjustedCurrentNode( $this->fragmentContext );
2012 
2013  // The spec calls this the "tree construction dispatcher".
2014  $isForeign = true;
2015  if (
2016  $this->stack->length() === 0 ||
2017  $adjusted->isHtml() ||
2018  $token === 'eof'
2019  ) {
2020  $isForeign = false;
2021  } elseif ( $adjusted->isMathmlTextIntegrationPoint() ) {
2022  if ( $token === 'text' ) {
2023  $isForeign = false;
2024  } elseif (
2025  $token === 'tag' &&
2026  $value !== 'mglyph' && $value !== 'malignmark'
2027  ) {
2028  $isForeign = false;
2029  }
2030  } elseif (
2031  $adjusted->namespaceURI === BalanceSets::MATHML_NAMESPACE &&
2032  $adjusted->localName === 'annotation-xml' &&
2033  $token === 'tag' && $value === 'svg'
2034  ) {
2035  $isForeign = false;
2036  } elseif (
2037  $adjusted->isHtmlIntegrationPoint() &&
2038  ( $token === 'tag' || $token === 'text' )
2039  ) {
2040  $isForeign = false;
2041  }
2042  if ( $isForeign ) {
2043  return $this->insertForeignToken( $token, $value, $attribs, $selfClose );
2044  } else {
2045  $func = $this->parseMode;
2046  return $this->$func( $token, $value, $attribs, $selfClose );
2047  }
2048  }
2049 
2050  private function insertForeignToken( $token, $value, $attribs = null, $selfClose = false ) {
2051  if ( $token === 'text' ) {
2052  $this->stack->insertText( $value );
2053  return true;
2054  } elseif ( $token === 'comment' ) {
2055  $this->stack->insertComment( $value );
2056  return true;
2057  } elseif ( $token === 'tag' ) {
2058  switch ( $value ) {
2059  case 'font':
2060  if ( isset( $attribs['color'] )
2061  || isset( $attribs['face'] )
2062  || isset( $attribs['size'] )
2063  ) {
2064  break;
2065  }
2066  // otherwise, fall through
2067  case 'b':
2068  case 'big':
2069  case 'blockquote':
2070  case 'body':
2071  case 'br':
2072  case 'center':
2073  case 'code':
2074  case 'dd':
2075  case 'div':
2076  case 'dl':
2077  case 'dt':
2078  case 'em':
2079  case 'embed':
2080  case 'h1':
2081  case 'h2':
2082  case 'h3':
2083  case 'h4':
2084  case 'h5':
2085  case 'h6':
2086  case 'head':
2087  case 'hr':
2088  case 'i':
2089  case 'img':
2090  case 'li':
2091  case 'listing':
2092  case 'menu':
2093  case 'meta':
2094  case 'nobr':
2095  case 'ol':
2096  case 'p':
2097  case 'pre':
2098  case 'ruby':
2099  case 's':
2100  case 'small':
2101  case 'span':
2102  case 'strong':
2103  case 'strike':
2104  case 'sub':
2105  case 'sup':
2106  case 'table':
2107  case 'tt':
2108  case 'u':
2109  case 'ul':
2110  case 'var':
2111  if ( $this->fragmentContext ) {
2112  break;
2113  }
2114  while ( true ) {
2115  $this->stack->pop();
2116  $node = $this->stack->currentNode;
2117  if (
2118  $node->isMathmlTextIntegrationPoint() ||
2119  $node->isHtmlIntegrationPoint() ||
2120  $node->isHtml()
2121  ) {
2122  break;
2123  }
2124  }
2125  return $this->insertToken( $token, $value, $attribs, $selfClose );
2126  }
2127  // "Any other start tag"
2128  $adjusted = ( $this->fragmentContext && $this->stack->length() === 1 ) ?
2129  $this->fragmentContext : $this->stack->currentNode;
2130  $this->stack->insertForeignElement(
2131  $adjusted->namespaceURI, $value, $attribs
2132  );
2133  if ( $selfClose ) {
2134  $this->stack->pop();
2135  }
2136  return true;
2137  } elseif ( $token === 'endtag' ) {
2138  $first = true;
2139  foreach ( $this->stack as $i => $node ) {
2140  if ( $node->isHtml() && !$first ) {
2141  // process the end tag as HTML
2142  $func = $this->parseMode;
2143  return $this->$func( $token, $value, $attribs, $selfClose );
2144  } elseif ( $i === 0 ) {
2145  return true;
2146  } elseif ( $node->localName === $value ) {
2147  $this->stack->popTag( $node );
2148  return true;
2149  }
2150  $first = false;
2151  }
2152  }
2153  }
2154 
2159  private function advance() {
2160  $x = $this->bitsIterator->current();
2161  $this->bitsIterator->next();
2162  $regs = [];
2163  // Handle comments. These won't be generated by mediawiki (they
2164  // are stripped in the Sanitizer) but may be generated by extensions.
2165  if (
2166  $this->allowComments &&
2167  !( $this->inRCDATA || $this->inRAWTEXT ) &&
2168  preg_match( self::VALID_COMMENT_REGEX, $x, $regs, PREG_OFFSET_CAPTURE ) &&
2169  // verify EOF condition where necessary
2170  ( $regs[4][1] < 0 || !$this->bitsIterator->valid() )
2171  ) {
2172  $contents = $regs[2][0];
2173  $rest = $regs[5][0];
2174  $this->insertToken( 'comment', $contents );
2175  $this->insertToken( 'text', str_replace( '>', '&gt;', $rest ) );
2176  return;
2177  }
2178  // $slash: Does the current element start with a '/'?
2179  // $t: Current element name
2180  // $attribStr: String between element name and >
2181  // $brace: Ending '>' or '/>'
2182  // $rest: Everything until the next element from the $bitsIterator
2183  if ( preg_match( Sanitizer::ELEMENT_BITS_REGEX, $x, $regs ) ) {
2184  list( /* $qbar */, $slash, $t, $attribStr, $brace, $rest ) = $regs;
2185  $t = strtolower( $t );
2186  if ( $this->strict ) {
2187  // Verify that attributes are all properly double-quoted
2188  Assert::invariant(
2189  preg_match(
2190  '/^( [:_A-Z0-9][-.:_A-Z0-9]*="[^"]*")*[ ]*$/i', $attribStr
2191  ),
2192  "Bad attribute string found"
2193  );
2194  }
2195  } else {
2196  Assert::invariant(
2197  !$this->strict, "< found which does not start a valid tag"
2198  );
2199  $slash = $t = $attribStr = $brace = $rest = null;
2200  }
2201  $goodTag = $t;
2202  if ( $this->inRCDATA ) {
2203  if ( $slash && $t === $this->inRCDATA ) {
2204  $this->inRCDATA = false;
2205  } else {
2206  // No tags allowed; this emulates the "rcdata" tokenizer mode.
2207  $goodTag = false;
2208  }
2209  }
2210  if ( $this->inRAWTEXT ) {
2211  if ( $slash && $t === $this->inRAWTEXT ) {
2212  $this->inRAWTEXT = false;
2213  } else {
2214  // No tags allowed, no entity-escaping done.
2215  $goodTag = false;
2216  }
2217  }
2218  $sanitize = $this->allowedHtmlElements !== null;
2219  if ( $sanitize ) {
2220  $goodTag = $t && isset( $this->allowedHtmlElements[$t] );
2221  }
2222  if ( $goodTag ) {
2223  if ( is_callable( $this->processingCallback ) ) {
2224  call_user_func_array( $this->processingCallback, [ &$attribStr, $this->processingArgs ] );
2225  }
2226  if ( $sanitize ) {
2227  $goodTag = Sanitizer::validateTag( $attribStr, $t );
2228  }
2229  }
2230  if ( $goodTag ) {
2231  if ( $sanitize ) {
2232  $attribs = Sanitizer::decodeTagAttributes( $attribStr );
2234  } else {
2235  $attribs = Sanitizer::decodeTagAttributes( $attribStr );
2236  }
2237  $goodTag = $this->insertToken(
2238  $slash ? 'endtag' : 'tag', $t, $attribs, $brace === '/>'
2239  );
2240  }
2241  if ( $goodTag ) {
2242  $rest = str_replace( '>', '&gt;', $rest );
2243  $this->insertToken( 'text', str_replace( '>', '&gt;', $rest ) );
2244  } elseif ( $this->inRAWTEXT ) {
2245  $this->insertToken( 'text', "<$x" );
2246  } else {
2247  // bad tag; serialize entire thing as text.
2248  $this->insertToken( 'text', '&lt;' . str_replace( '>', '&gt;', $x ) );
2249  }
2250  }
2251 
2252  private function switchMode( $mode ) {
2253  Assert::parameter(
2254  substr( $mode, -4 ) === 'Mode', '$mode', 'should end in Mode'
2255  );
2256  $oldMode = $this->parseMode;
2257  $this->parseMode = $mode;
2258  return $oldMode;
2259  }
2260 
2261  private function switchModeAndReprocess( $mode, $token, $value, $attribs, $selfClose ) {
2262  $this->switchMode( $mode );
2263  return $this->insertToken( $token, $value, $attribs, $selfClose );
2264  }
2265 
2266  private function resetInsertionMode() {
2267  $last = false;
2268  foreach ( $this->stack as $i => $node ) {
2269  if ( $i === 0 ) {
2270  $last = true;
2271  if ( $this->fragmentContext ) {
2272  $node = $this->fragmentContext;
2273  }
2274  }
2275  if ( $node->isHtml() ) {
2276  switch ( $node->localName ) {
2277  case 'select':
2278  $stackLength = $this->stack->length();
2279  for ( $j = $i + 1; $j < $stackLength - 1; $j++ ) {
2280  $ancestor = $this->stack->node( $stackLength - $j - 1 );
2281  if ( $ancestor->isHtmlNamed( 'template' ) ) {
2282  break;
2283  }
2284  if ( $ancestor->isHtmlNamed( 'table' ) ) {
2285  $this->switchMode( 'inSelectInTableMode' );
2286  return;
2287  }
2288  }
2289  $this->switchMode( 'inSelectMode' );
2290  return;
2291  case 'tr':
2292  $this->switchMode( 'inRowMode' );
2293  return;
2294  case 'tbody':
2295  case 'tfoot':
2296  case 'thead':
2297  $this->switchMode( 'inTableBodyMode' );
2298  return;
2299  case 'caption':
2300  $this->switchMode( 'inCaptionMode' );
2301  return;
2302  case 'colgroup':
2303  $this->switchMode( 'inColumnGroupMode' );
2304  return;
2305  case 'table':
2306  $this->switchMode( 'inTableMode' );
2307  return;
2308  case 'template':
2309  $this->switchMode(
2310  array_slice( $this->templateInsertionModes, -1 )[0]
2311  );
2312  return;
2313  case 'body':
2314  $this->switchMode( 'inBodyMode' );
2315  return;
2316  // OMITTED: <frameset>
2317  // OMITTED: <html>
2318  // OMITTED: <head>
2319  default:
2320  if ( !$last ) {
2321  // OMITTED: <head>
2322  if ( $node->isA( BalanceSets::$tableCellSet ) ) {
2323  $this->switchMode( 'inCellMode' );
2324  return;
2325  }
2326  }
2327  }
2328  }
2329  if ( $last ) {
2330  $this->switchMode( 'inBodyMode' );
2331  return;
2332  }
2333  }
2334  }
2335 
2336  private function stopParsing() {
2337  // Most of the spec methods are inapplicable, other than step 2:
2338  // "pop all the nodes off the stack of open elements".
2339  // We're going to keep the top-most <html> element on the stack, though.
2340 
2341  // Clear the AFE list first, otherwise the element objects will stay live
2342  // during serialization, potentially using O(N^2) memory. Note that
2343  // popping the stack will never result in reconstructing the active
2344  // formatting elements.
2345  $this->afe = null;
2346  $this->stack->popTo( 1 );
2347  }
2348 
2349  private function parseRawText( $value, $attribs = null ) {
2350  $this->stack->insertHTMLElement( $value, $attribs );
2351  $this->inRAWTEXT = $value;
2352  $this->originalInsertionMode = $this->switchMode( 'inTextMode' );
2353  return true;
2354  }
2355 
2356  private function inTextMode( $token, $value, $attribs = null, $selfClose = false ) {
2357  if ( $token === 'text' ) {
2358  $this->stack->insertText( $value );
2359  return true;
2360  } elseif ( $token === 'eof' ) {
2361  $this->stack->pop();
2362  return $this->switchModeAndReprocess(
2363  $this->originalInsertionMode, $token, $value, $attribs, $selfClose
2364  );
2365  } elseif ( $token === 'endtag' ) {
2366  $this->stack->pop();
2367  $this->switchMode( $this->originalInsertionMode );
2368  return true;
2369  }
2370  return true;
2371  }
2372 
2373  private function inHeadMode( $token, $value, $attribs = null, $selfClose = false ) {
2374  if ( $token === 'text' ) {
2375  if ( preg_match( '/^[\x09\x0A\x0C\x0D\x20]+/', $value, $matches ) ) {
2376  $this->stack->insertText( $matches[0] );
2377  $value = substr( $value, strlen( $matches[0] ) );
2378  }
2379  if ( strlen( $value ) === 0 ) {
2380  return true; // All text handled.
2381  }
2382  // Fall through to handle non-whitespace below.
2383  } elseif ( $token === 'tag' ) {
2384  switch ( $value ) {
2385  case 'meta':
2386  // OMITTED: in a full HTML parser, this might change the encoding.
2387  // falls through
2388  // OMITTED: <html>
2389  case 'base':
2390  case 'basefont':
2391  case 'bgsound':
2392  case 'link':
2393  $this->stack->insertHTMLElement( $value, $attribs );
2394  $this->stack->pop();
2395  return true;
2396  // OMITTED: <title>
2397  // OMITTED: <noscript>
2398  case 'noframes':
2399  case 'style':
2400  return $this->parseRawText( $value, $attribs );
2401  // OMITTED: <script>
2402  case 'template':
2403  $this->stack->insertHTMLElement( $value, $attribs );
2404  $this->afe->insertMarker();
2405  // OMITTED: frameset_ok
2406  $this->switchMode( 'inTemplateMode' );
2407  $this->templateInsertionModes[] = $this->parseMode;
2408  return true;
2409  // OMITTED: <head>
2410  }
2411  } elseif ( $token === 'endtag' ) {
2412  switch ( $value ) {
2413  // OMITTED: <head>
2414  // OMITTED: <body>
2415  // OMITTED: <html>
2416  case 'br':
2417  break; // handle at the bottom of the function
2418  case 'template':
2419  if ( $this->stack->indexOf( $value ) < 0 ) {
2420  return true; // Ignore the token.
2421  }
2422  $this->stack->generateImpliedEndTags( null, true /* thorough */ );
2423  $this->stack->popTag( $value );
2424  $this->afe->clearToMarker();
2425  array_pop( $this->templateInsertionModes );
2426  $this->resetInsertionMode();
2427  return true;
2428  default:
2429  // ignore any other end tag
2430  return true;
2431  }
2432  } elseif ( $token === 'comment' ) {
2433  $this->stack->insertComment( $value );
2434  return true;
2435  }
2436 
2437  // If not handled above
2438  $this->inHeadMode( 'endtag', 'head' ); // synthetic </head>
2439  // Then redo this one
2440  return $this->insertToken( $token, $value, $attribs, $selfClose );
2441  }
2442 
2443  private function inBodyMode( $token, $value, $attribs = null, $selfClose = false ) {
2444  if ( $token === 'text' ) {
2445  $this->afe->reconstruct( $this->stack );
2446  $this->stack->insertText( $value );
2447  return true;
2448  } elseif ( $token === 'eof' ) {
2449  if ( !empty( $this->templateInsertionModes ) ) {
2450  return $this->inTemplateMode( $token, $value, $attribs, $selfClose );
2451  }
2452  $this->stopParsing();
2453  return true;
2454  } elseif ( $token === 'tag' ) {
2455  switch ( $value ) {
2456  // OMITTED: <html>
2457  case 'base':
2458  case 'basefont':
2459  case 'bgsound':
2460  case 'link':
2461  case 'meta':
2462  case 'noframes':
2463  // OMITTED: <script>
2464  case 'style':
2465  case 'template':
2466  // OMITTED: <title>
2467  return $this->inHeadMode( $token, $value, $attribs, $selfClose );
2468  // OMITTED: <body>
2469  // OMITTED: <frameset>
2470 
2471  case 'address':
2472  case 'article':
2473  case 'aside':
2474  case 'blockquote':
2475  case 'center':
2476  case 'details':
2477  case 'dialog':
2478  case 'dir':
2479  case 'div':
2480  case 'dl':
2481  case 'fieldset':
2482  case 'figcaption':
2483  case 'figure':
2484  case 'footer':
2485  case 'header':
2486  case 'hgroup':
2487  case 'main':
2488  case 'nav':
2489  case 'ol':
2490  case 'p':
2491  case 'section':
2492  case 'summary':
2493  case 'ul':
2494  if ( $this->stack->inButtonScope( 'p' ) ) {
2495  $this->inBodyMode( 'endtag', 'p' );
2496  }
2497  $this->stack->insertHTMLElement( $value, $attribs );
2498  return true;
2499 
2500  case 'menu':
2501  if ( $this->stack->inButtonScope( "p" ) ) {
2502  $this->inBodyMode( 'endtag', 'p' );
2503  }
2504  if ( $this->stack->currentNode->isHtmlNamed( 'menuitem' ) ) {
2505  $this->stack->pop();
2506  }
2507  $this->stack->insertHTMLElement( $value, $attribs );
2508  return true;
2509 
2510  case 'h1':
2511  case 'h2':
2512  case 'h3':
2513  case 'h4':
2514  case 'h5':
2515  case 'h6':
2516  if ( $this->stack->inButtonScope( 'p' ) ) {
2517  $this->inBodyMode( 'endtag', 'p' );
2518  }
2519  if ( $this->stack->currentNode->isA( BalanceSets::$headingSet ) ) {
2520  $this->stack->pop();
2521  }
2522  $this->stack->insertHTMLElement( $value, $attribs );
2523  return true;
2524 
2525  case 'pre':
2526  case 'listing':
2527  if ( $this->stack->inButtonScope( 'p' ) ) {
2528  $this->inBodyMode( 'endtag', 'p' );
2529  }
2530  $this->stack->insertHTMLElement( $value, $attribs );
2531  $this->ignoreLinefeed = true;
2532  // OMITTED: frameset_ok
2533  return true;
2534 
2535  case 'form':
2536  if (
2537  $this->formElementPointer &&
2538  $this->stack->indexOf( 'template' ) < 0
2539  ) {
2540  return true; // in a form, not in a template.
2541  }
2542  if ( $this->stack->inButtonScope( "p" ) ) {
2543  $this->inBodyMode( 'endtag', 'p' );
2544  }
2545  $elt = $this->stack->insertHTMLElement( $value, $attribs );
2546  if ( $this->stack->indexOf( 'template' ) < 0 ) {
2547  $this->formElementPointer = $elt;
2548  }
2549  return true;
2550 
2551  case 'li':
2552  // OMITTED: frameset_ok
2553  foreach ( $this->stack as $node ) {
2554  if ( $node->isHtmlNamed( 'li' ) ) {
2555  $this->inBodyMode( 'endtag', 'li' );
2556  break;
2557  }
2558  if (
2559  $node->isA( BalanceSets::$specialSet ) &&
2560  !$node->isA( BalanceSets::$addressDivPSet )
2561  ) {
2562  break;
2563  }
2564  }
2565  if ( $this->stack->inButtonScope( 'p' ) ) {
2566  $this->inBodyMode( 'endtag', 'p' );
2567  }
2568  $this->stack->insertHTMLElement( $value, $attribs );
2569  return true;
2570 
2571  case 'dd':
2572  case 'dt':
2573  // OMITTED: frameset_ok
2574  foreach ( $this->stack as $node ) {
2575  if ( $node->isHtmlNamed( 'dd' ) ) {
2576  $this->inBodyMode( 'endtag', 'dd' );
2577  break;
2578  }
2579  if ( $node->isHtmlNamed( 'dt' ) ) {
2580  $this->inBodyMode( 'endtag', 'dt' );
2581  break;
2582  }
2583  if (
2584  $node->isA( BalanceSets::$specialSet ) &&
2585  !$node->isA( BalanceSets::$addressDivPSet )
2586  ) {
2587  break;
2588  }
2589  }
2590  if ( $this->stack->inButtonScope( 'p' ) ) {
2591  $this->inBodyMode( 'endtag', 'p' );
2592  }
2593  $this->stack->insertHTMLElement( $value, $attribs );
2594  return true;
2595 
2596  // OMITTED: <plaintext>
2597 
2598  case 'button':
2599  if ( $this->stack->inScope( 'button' ) ) {
2600  $this->inBodyMode( 'endtag', 'button' );
2601  return $this->insertToken( $token, $value, $attribs, $selfClose );
2602  }
2603  $this->afe->reconstruct( $this->stack );
2604  $this->stack->insertHTMLElement( $value, $attribs );
2605  return true;
2606 
2607  case 'a':
2608  $activeElement = $this->afe->findElementByTag( 'a' );
2609  if ( $activeElement ) {
2610  $this->inBodyMode( 'endtag', 'a' );
2611  if ( $this->afe->isInList( $activeElement ) ) {
2612  $this->afe->remove( $activeElement );
2613  // Don't flatten here, since when we fall
2614  // through below we might foster parent
2615  // the new <a> tag inside this one.
2616  $this->stack->removeElement( $activeElement, false );
2617  }
2618  }
2619  // Falls through
2620  case 'b':
2621  case 'big':
2622  case 'code':
2623  case 'em':
2624  case 'font':
2625  case 'i':
2626  case 's':
2627  case 'small':
2628  case 'strike':
2629  case 'strong':
2630  case 'tt':
2631  case 'u':
2632  $this->afe->reconstruct( $this->stack );
2633  $this->afe->push( $this->stack->insertHTMLElement( $value, $attribs ) );
2634  return true;
2635 
2636  case 'nobr':
2637  $this->afe->reconstruct( $this->stack );
2638  if ( $this->stack->inScope( 'nobr' ) ) {
2639  $this->inBodyMode( 'endtag', 'nobr' );
2640  $this->afe->reconstruct( $this->stack );
2641  }
2642  $this->afe->push( $this->stack->insertHTMLElement( $value, $attribs ) );
2643  return true;
2644 
2645  case 'applet':
2646  case 'marquee':
2647  case 'object':
2648  $this->afe->reconstruct( $this->stack );
2649  $this->stack->insertHTMLElement( $value, $attribs );
2650  $this->afe->insertMarker();
2651  // OMITTED: frameset_ok
2652  return true;
2653 
2654  case 'table':
2655  // The document is never in "quirks mode"; see simplifications
2656  // above.
2657  if ( $this->stack->inButtonScope( 'p' ) ) {
2658  $this->inBodyMode( 'endtag', 'p' );
2659  }
2660  $this->stack->insertHTMLElement( $value, $attribs );
2661  // OMITTED: frameset_ok
2662  $this->switchMode( 'inTableMode' );
2663  return true;
2664 
2665  case 'area':
2666  case 'br':
2667  case 'embed':
2668  case 'img':
2669  case 'keygen':
2670  case 'wbr':
2671  $this->afe->reconstruct( $this->stack );
2672  $this->stack->insertHTMLElement( $value, $attribs );
2673  $this->stack->pop();
2674  // OMITTED: frameset_ok
2675  return true;
2676 
2677  case 'input':
2678  $this->afe->reconstruct( $this->stack );
2679  $this->stack->insertHTMLElement( $value, $attribs );
2680  $this->stack->pop();
2681  // OMITTED: frameset_ok
2682  // (hence we don't need to examine the tag's "type" attribute)
2683  return true;
2684 
2685  case 'param':
2686  case 'source':
2687  case 'track':
2688  $this->stack->insertHTMLElement( $value, $attribs );
2689  $this->stack->pop();
2690  return true;
2691 
2692  case 'hr':
2693  if ( $this->stack->inButtonScope( 'p' ) ) {
2694  $this->inBodyMode( 'endtag', 'p' );
2695  }
2696  if ( $this->stack->currentNode->isHtmlNamed( 'menuitem' ) ) {
2697  $this->stack->pop();
2698  }
2699  $this->stack->insertHTMLElement( $value, $attribs );
2700  $this->stack->pop();
2701  return true;
2702 
2703  case 'image':
2704  // warts!
2705  return $this->inBodyMode( $token, 'img', $attribs, $selfClose );
2706 
2707  case 'textarea':
2708  $this->stack->insertHTMLElement( $value, $attribs );
2709  $this->ignoreLinefeed = true;
2710  $this->inRCDATA = $value; // emulate rcdata tokenizer mode
2711  // OMITTED: frameset_ok
2712  return true;
2713 
2714  // OMITTED: <xmp>
2715  // OMITTED: <iframe>
2716  // OMITTED: <noembed>
2717  // OMITTED: <noscript>
2718 
2719  case 'select':
2720  $this->afe->reconstruct( $this->stack );
2721  $this->stack->insertHTMLElement( $value, $attribs );
2722  switch ( $this->parseMode ) {
2723  case 'inTableMode':
2724  case 'inCaptionMode':
2725  case 'inTableBodyMode':
2726  case 'inRowMode':
2727  case 'inCellMode':
2728  $this->switchMode( 'inSelectInTableMode' );
2729  return true;
2730  default:
2731  $this->switchMode( 'inSelectMode' );
2732  return true;
2733  }
2734 
2735  case 'optgroup':
2736  case 'option':
2737  if ( $this->stack->currentNode->isHtmlNamed( 'option' ) ) {
2738  $this->inBodyMode( 'endtag', 'option' );
2739  }
2740  $this->afe->reconstruct( $this->stack );
2741  $this->stack->insertHTMLElement( $value, $attribs );
2742  return true;
2743 
2744  case 'menuitem':
2745  if ( $this->stack->currentNode->isHtmlNamed( 'menuitem' ) ) {
2746  $this->stack->pop();
2747  }
2748  $this->afe->reconstruct( $this->stack );
2749  $this->stack->insertHTMLElement( $value, $attribs );
2750  return true;
2751 
2752  case 'rb':
2753  case 'rtc':
2754  if ( $this->stack->inScope( 'ruby' ) ) {
2755  $this->stack->generateImpliedEndTags();
2756  }
2757  $this->stack->insertHTMLElement( $value, $attribs );
2758  return true;
2759 
2760  case 'rp':
2761  case 'rt':
2762  if ( $this->stack->inScope( 'ruby' ) ) {
2763  $this->stack->generateImpliedEndTags( 'rtc' );
2764  }
2765  $this->stack->insertHTMLElement( $value, $attribs );
2766  return true;
2767 
2768  case 'math':
2769  $this->afe->reconstruct( $this->stack );
2770  // We skip the spec's "adjust MathML attributes" and
2771  // "adjust foreign attributes" steps, since the browser will
2772  // do this later when it parses the output and it doesn't affect
2773  // balancing.
2774  $this->stack->insertForeignElement(
2776  );
2777  if ( $selfClose ) {
2778  // emit explicit </math> tag.
2779  $this->stack->pop();
2780  }
2781  return true;
2782 
2783  case 'svg':
2784  $this->afe->reconstruct( $this->stack );
2785  // We skip the spec's "adjust SVG attributes" and
2786  // "adjust foreign attributes" steps, since the browser will
2787  // do this later when it parses the output and it doesn't affect
2788  // balancing.
2789  $this->stack->insertForeignElement(
2791  );
2792  if ( $selfClose ) {
2793  // emit explicit </svg> tag.
2794  $this->stack->pop();
2795  }
2796  return true;
2797 
2798  case 'caption':
2799  case 'col':
2800  case 'colgroup':
2801  // OMITTED: <frame>
2802  case 'head':
2803  case 'tbody':
2804  case 'td':
2805  case 'tfoot':
2806  case 'th':
2807  case 'thead':
2808  case 'tr':
2809  // Ignore table tags if we're not inTableMode
2810  return true;
2811  }
2812 
2813  // Handle any other start tag here
2814  $this->afe->reconstruct( $this->stack );
2815  $this->stack->insertHTMLElement( $value, $attribs );
2816  return true;
2817  } elseif ( $token === 'endtag' ) {
2818  switch ( $value ) {
2819  // </body>,</html> are unsupported.
2820 
2821  case 'template':
2822  return $this->inHeadMode( $token, $value, $attribs, $selfClose );
2823 
2824  case 'address':
2825  case 'article':
2826  case 'aside':
2827  case 'blockquote':
2828  case 'button':
2829  case 'center':
2830  case 'details':
2831  case 'dialog':
2832  case 'dir':
2833  case 'div':
2834  case 'dl':
2835  case 'fieldset':
2836  case 'figcaption':
2837  case 'figure':
2838  case 'footer':
2839  case 'header':
2840  case 'hgroup':
2841  case 'listing':
2842  case 'main':
2843  case 'menu':
2844  case 'nav':
2845  case 'ol':
2846  case 'pre':
2847  case 'section':
2848  case 'summary':
2849  case 'ul':
2850  // Ignore if there is not a matching open tag
2851  if ( !$this->stack->inScope( $value ) ) {
2852  return true;
2853  }
2854  $this->stack->generateImpliedEndTags();
2855  $this->stack->popTag( $value );
2856  return true;
2857 
2858  case 'form':
2859  if ( $this->stack->indexOf( 'template' ) < 0 ) {
2860  $openform = $this->formElementPointer;
2861  $this->formElementPointer = null;
2862  if ( !$openform || !$this->stack->inScope( $openform ) ) {
2863  return true;
2864  }
2865  $this->stack->generateImpliedEndTags();
2866  // Don't flatten yet if we're removing a <form> element
2867  // out-of-order. (eg. `<form><div></form>`)
2868  $flatten = ( $this->stack->currentNode === $openform );
2869  $this->stack->removeElement( $openform, $flatten );
2870  } else {
2871  if ( !$this->stack->inScope( 'form' ) ) {
2872  return true;
2873  }
2874  $this->stack->generateImpliedEndTags();
2875  $this->stack->popTag( 'form' );
2876  }
2877  return true;
2878 
2879  case 'p':
2880  if ( !$this->stack->inButtonScope( 'p' ) ) {
2881  $this->inBodyMode( 'tag', 'p', [] );
2882  return $this->insertToken( $token, $value, $attribs, $selfClose );
2883  }
2884  $this->stack->generateImpliedEndTags( $value );
2885  $this->stack->popTag( $value );
2886  return true;
2887 
2888  case 'li':
2889  if ( !$this->stack->inListItemScope( $value ) ) {
2890  return true; // ignore
2891  }
2892  $this->stack->generateImpliedEndTags( $value );
2893  $this->stack->popTag( $value );
2894  return true;
2895 
2896  case 'dd':
2897  case 'dt':
2898  if ( !$this->stack->inScope( $value ) ) {
2899  return true; // ignore
2900  }
2901  $this->stack->generateImpliedEndTags( $value );
2902  $this->stack->popTag( $value );
2903  return true;
2904 
2905  case 'h1':
2906  case 'h2':
2907  case 'h3':
2908  case 'h4':
2909  case 'h5':
2910  case 'h6':
2911  if ( !$this->stack->inScope( BalanceSets::$headingSet ) ) {
2912  return true; // ignore
2913  }
2914  $this->stack->generateImpliedEndTags();
2915  $this->stack->popTag( BalanceSets::$headingSet );
2916  return true;
2917 
2918  case 'sarcasm':
2919  // Take a deep breath, then:
2920  break;
2921 
2922  case 'a':
2923  case 'b':
2924  case 'big':
2925  case 'code':
2926  case 'em':
2927  case 'font':
2928  case 'i':
2929  case 'nobr':
2930  case 's':
2931  case 'small':
2932  case 'strike':
2933  case 'strong':
2934  case 'tt':
2935  case 'u':
2936  if ( $this->stack->adoptionAgency( $value, $this->afe ) ) {
2937  return true; // If we did something, we're done.
2938  }
2939  break; // Go to the "any other end tag" case.
2940 
2941  case 'applet':
2942  case 'marquee':
2943  case 'object':
2944  if ( !$this->stack->inScope( $value ) ) {
2945  return true; // ignore
2946  }
2947  $this->stack->generateImpliedEndTags();
2948  $this->stack->popTag( $value );
2949  $this->afe->clearToMarker();
2950  return true;
2951 
2952  case 'br':
2953  // Turn </br> into <br>
2954  return $this->inBodyMode( 'tag', $value, [] );
2955  }
2956 
2957  // Any other end tag goes here
2958  foreach ( $this->stack as $i => $node ) {
2959  if ( $node->isHtmlNamed( $value ) ) {
2960  $this->stack->generateImpliedEndTags( $value );
2961  $this->stack->popTo( $i ); // including $i
2962  break;
2963  } elseif ( $node->isA( BalanceSets::$specialSet ) ) {
2964  return true; // ignore this close token.
2965  }
2966  }
2967  return true;
2968  } elseif ( $token === 'comment' ) {
2969  $this->stack->insertComment( $value );
2970  return true;
2971  } else {
2972  Assert::invariant( false, "Bad token type: $token" );
2973  }
2974  }
2975 
2976  private function inTableMode( $token, $value, $attribs = null, $selfClose = false ) {
2977  if ( $token === 'text' ) {
2978  if ( $this->textIntegrationMode ) {
2979  return $this->inBodyMode( $token, $value, $attribs, $selfClose );
2980  } elseif ( $this->stack->currentNode->isA( BalanceSets::$tableSectionRowSet ) ) {
2981  $this->pendingTableText = '';
2982  $this->originalInsertionMode = $this->parseMode;
2983  return $this->switchModeAndReprocess( 'inTableTextMode',
2984  $token, $value, $attribs, $selfClose );
2985  }
2986  // fall through to default case.
2987  } elseif ( $token === 'eof' ) {
2988  $this->stopParsing();
2989  return true;
2990  } elseif ( $token === 'tag' ) {
2991  switch ( $value ) {
2992  case 'caption':
2993  $this->afe->insertMarker();
2994  $this->stack->insertHTMLElement( $value, $attribs );
2995  $this->switchMode( 'inCaptionMode' );
2996  return true;
2997  case 'colgroup':
2998  $this->stack->clearToContext( BalanceSets::$tableContextSet );
2999  $this->stack->insertHTMLElement( $value, $attribs );
3000  $this->switchMode( 'inColumnGroupMode' );
3001  return true;
3002  case 'col':
3003  $this->inTableMode( 'tag', 'colgroup', [] );
3004  return $this->insertToken( $token, $value, $attribs, $selfClose );
3005  case 'tbody':
3006  case 'tfoot':
3007  case 'thead':
3008  $this->stack->clearToContext( BalanceSets::$tableContextSet );
3009  $this->stack->insertHTMLElement( $value, $attribs );
3010  $this->switchMode( 'inTableBodyMode' );
3011  return true;
3012  case 'td':
3013  case 'th':
3014  case 'tr':
3015  $this->inTableMode( 'tag', 'tbody', [] );
3016  return $this->insertToken( $token, $value, $attribs, $selfClose );
3017  case 'table':
3018  if ( !$this->stack->inTableScope( $value ) ) {
3019  return true; // Ignore this tag.
3020  }
3021  $this->inTableMode( 'endtag', $value );
3022  return $this->insertToken( $token, $value, $attribs, $selfClose );
3023 
3024  case 'style':
3025  // OMITTED: <script>
3026  case 'template':
3027  return $this->inHeadMode( $token, $value, $attribs, $selfClose );
3028 
3029  case 'input':
3030  if ( !isset( $attribs['type'] ) || strcasecmp( $attribs['type'], 'hidden' ) !== 0 ) {
3031  break; // Handle this as "everything else"
3032  }
3033  $this->stack->insertHTMLElement( $value, $attribs );
3034  $this->stack->pop();
3035  return true;
3036 
3037  case 'form':
3038  if (
3039  $this->formElementPointer ||
3040  $this->stack->indexOf( 'template' ) >= 0
3041  ) {
3042  return true; // ignore this token
3043  }
3044  $this->formElementPointer =
3045  $this->stack->insertHTMLElement( $value, $attribs );
3046  $this->stack->popTag( $this->formElementPointer );
3047  return true;
3048  }
3049  // Fall through for "anything else" clause.
3050  } elseif ( $token === 'endtag' ) {
3051  switch ( $value ) {
3052  case 'table':
3053  if ( !$this->stack->inTableScope( $value ) ) {
3054  return true; // Ignore.
3055  }
3056  $this->stack->popTag( $value );
3057  $this->resetInsertionMode();
3058  return true;
3059  // OMITTED: <body>
3060  case 'caption':
3061  case 'col':
3062  case 'colgroup':
3063  // OMITTED: <html>
3064  case 'tbody':
3065  case 'td':
3066  case 'tfoot':
3067  case 'th':
3068  case 'thead':
3069  case 'tr':
3070  return true; // Ignore the token.
3071  case 'template':
3072  return $this->inHeadMode( $token, $value, $attribs, $selfClose );
3073  }
3074  // Fall through for "anything else" clause.
3075  } elseif ( $token === 'comment' ) {
3076  $this->stack->insertComment( $value );
3077  return true;
3078  }
3079  // This is the "anything else" case:
3080  $this->stack->fosterParentMode = true;
3081  $this->inBodyMode( $token, $value, $attribs, $selfClose );
3082  $this->stack->fosterParentMode = false;
3083  return true;
3084  }
3085 
3086  private function inTableTextMode( $token, $value, $attribs = null, $selfClose = false ) {
3087  if ( $token === 'text' ) {
3088  $this->pendingTableText .= $value;
3089  return true;
3090  }
3091  // Non-text token:
3092  $text = $this->pendingTableText;
3093  $this->pendingTableText = '';
3094  if ( preg_match( '/[^\x09\x0A\x0C\x0D\x20]/', $text ) ) {
3095  // This should match the "anything else" case inTableMode
3096  $this->stack->fosterParentMode = true;
3097  $this->inBodyMode( 'text', $text );
3098  $this->stack->fosterParentMode = false;
3099  } else {
3100  // Pending text is just whitespace.
3101  $this->stack->insertText( $text );
3102  }
3103  return $this->switchModeAndReprocess(
3104  $this->originalInsertionMode, $token, $value, $attribs, $selfClose
3105  );
3106  }
3107 
3108  // helper for inCaptionMode
3109  private function endCaption() {
3110  if ( !$this->stack->inTableScope( 'caption' ) ) {
3111  return false;
3112  }
3113  $this->stack->generateImpliedEndTags();
3114  $this->stack->popTag( 'caption' );
3115  $this->afe->clearToMarker();
3116  $this->switchMode( 'inTableMode' );
3117  return true;
3118  }
3119 
3120  private function inCaptionMode( $token, $value, $attribs = null, $selfClose = false ) {
3121  if ( $token === 'tag' ) {
3122  switch ( $value ) {
3123  case 'caption':
3124  case 'col':
3125  case 'colgroup':
3126  case 'tbody':
3127  case 'td':
3128  case 'tfoot':
3129  case 'th':
3130  case 'thead':
3131  case 'tr':
3132  if ( $this->endCaption() ) {
3133  $this->insertToken( $token, $value, $attribs, $selfClose );
3134  }
3135  return true;
3136  }
3137  // Fall through to "anything else" case.
3138  } elseif ( $token === 'endtag' ) {
3139  switch ( $value ) {
3140  case 'caption':
3141  $this->endCaption();
3142  return true;
3143  case 'table':
3144  if ( $this->endCaption() ) {
3145  $this->insertToken( $token, $value, $attribs, $selfClose );
3146  }
3147  return true;
3148  case 'body':
3149  case 'col':
3150  case 'colgroup':
3151  // OMITTED: <html>
3152  case 'tbody':
3153  case 'td':
3154  case 'tfoot':
3155  case 'th':
3156  case 'thead':
3157  case 'tr':
3158  // Ignore the token
3159  return true;
3160  }
3161  // Fall through to "anything else" case.
3162  }
3163  // The Anything Else case
3164  return $this->inBodyMode( $token, $value, $attribs, $selfClose );
3165  }
3166 
3167  private function inColumnGroupMode( $token, $value, $attribs = null, $selfClose = false ) {
3168  if ( $token === 'text' ) {
3169  if ( preg_match( '/^[\x09\x0A\x0C\x0D\x20]+/', $value, $matches ) ) {
3170  $this->stack->insertText( $matches[0] );
3171  $value = substr( $value, strlen( $matches[0] ) );
3172  }
3173  if ( strlen( $value ) === 0 ) {
3174  return true; // All text handled.
3175  }
3176  // Fall through to handle non-whitespace below.
3177  } elseif ( $token === 'tag' ) {
3178  switch ( $value ) {
3179  // OMITTED: <html>
3180  case 'col':
3181  $this->stack->insertHTMLElement( $value, $attribs );
3182  $this->stack->pop();
3183  return true;
3184  case 'template':
3185  return $this->inHeadMode( $token, $value, $attribs, $selfClose );
3186  }
3187  // Fall through for "anything else".
3188  } elseif ( $token === 'endtag' ) {
3189  switch ( $value ) {
3190  case 'colgroup':
3191  if ( !$this->stack->currentNode->isHtmlNamed( 'colgroup' ) ) {
3192  return true; // Ignore the token.
3193  }
3194  $this->stack->pop();
3195  $this->switchMode( 'inTableMode' );
3196  return true;
3197  case 'col':
3198  return true; // Ignore the token.
3199  case 'template':
3200  return $this->inHeadMode( $token, $value, $attribs, $selfClose );
3201  }
3202  // Fall through for "anything else".
3203  } elseif ( $token === 'eof' ) {
3204  return $this->inBodyMode( $token, $value, $attribs, $selfClose );
3205  } elseif ( $token === 'comment' ) {
3206  $this->stack->insertComment( $value );
3207  return true;
3208  }
3209 
3210  // Anything else
3211  if ( !$this->stack->currentNode->isHtmlNamed( 'colgroup' ) ) {
3212  return true; // Ignore the token.
3213  }
3214  $this->inColumnGroupMode( 'endtag', 'colgroup' );
3215  return $this->insertToken( $token, $value, $attribs, $selfClose );
3216  }
3217 
3218  // Helper function for inTableBodyMode
3219  private function endSection() {
3220  if ( !(
3221  $this->stack->inTableScope( 'tbody' ) ||
3222  $this->stack->inTableScope( 'thead' ) ||
3223  $this->stack->inTableScope( 'tfoot' )
3224  ) ) {
3225  return false;
3226  }
3227  $this->stack->clearToContext( BalanceSets::$tableBodyContextSet );
3228  $this->stack->pop();
3229  $this->switchMode( 'inTableMode' );
3230  return true;
3231  }
3232  private function inTableBodyMode( $token, $value, $attribs = null, $selfClose = false ) {
3233  if ( $token === 'tag' ) {
3234  switch ( $value ) {
3235  case 'tr':
3236  $this->stack->clearToContext( BalanceSets::$tableBodyContextSet );
3237  $this->stack->insertHTMLElement( $value, $attribs );
3238  $this->switchMode( 'inRowMode' );
3239  return true;
3240  case 'th':
3241  case 'td':
3242  $this->inTableBodyMode( 'tag', 'tr', [] );
3243  $this->insertToken( $token, $value, $attribs, $selfClose );
3244  return true;
3245  case 'caption':
3246  case 'col':
3247  case 'colgroup':
3248  case 'tbody':
3249  case 'tfoot':
3250  case 'thead':
3251  if ( $this->endSection() ) {
3252  $this->insertToken( $token, $value, $attribs, $selfClose );
3253  }
3254  return true;
3255  }
3256  } elseif ( $token === 'endtag' ) {
3257  switch ( $value ) {
3258  case 'table':
3259  if ( $this->endSection() ) {
3260  $this->insertToken( $token, $value, $attribs, $selfClose );
3261  }
3262  return true;
3263  case 'tbody':
3264  case 'tfoot':
3265  case 'thead':
3266  if ( $this->stack->inTableScope( $value ) ) {
3267  $this->endSection();
3268  }
3269  return true;
3270  // OMITTED: <body>
3271  case 'caption':
3272  case 'col':
3273  case 'colgroup':
3274  // OMITTED: <html>
3275  case 'td':
3276  case 'th':
3277  case 'tr':
3278  return true; // Ignore the token.
3279  }
3280  }
3281  // Anything else:
3282  return $this->inTableMode( $token, $value, $attribs, $selfClose );
3283  }
3284 
3285  // Helper function for inRowMode
3286  private function endRow() {
3287  if ( !$this->stack->inTableScope( 'tr' ) ) {
3288  return false;
3289  }
3290  $this->stack->clearToContext( BalanceSets::$tableRowContextSet );
3291  $this->stack->pop();
3292  $this->switchMode( 'inTableBodyMode' );
3293  return true;
3294  }
3295  private function inRowMode( $token, $value, $attribs = null, $selfClose = false ) {
3296  if ( $token === 'tag' ) {
3297  switch ( $value ) {
3298  case 'th':
3299  case 'td':
3300  $this->stack->clearToContext( BalanceSets::$tableRowContextSet );
3301  $this->stack->insertHTMLElement( $value, $attribs );
3302  $this->switchMode( 'inCellMode' );
3303  $this->afe->insertMarker();
3304  return true;
3305  case 'caption':
3306  case 'col':
3307  case 'colgroup':
3308  case 'tbody':
3309  case 'tfoot':
3310  case 'thead':
3311  case 'tr':
3312  if ( $this->endRow() ) {
3313  $this->insertToken( $token, $value, $attribs, $selfClose );
3314  }
3315  return true;
3316  }
3317  } elseif ( $token === 'endtag' ) {
3318  switch ( $value ) {
3319  case 'tr':
3320  $this->endRow();
3321  return true;
3322  case 'table':
3323  if ( $this->endRow() ) {
3324  $this->insertToken( $token, $value, $attribs, $selfClose );
3325  }
3326  return true;
3327  case 'tbody':
3328  case 'tfoot':
3329  case 'thead':
3330  if (
3331  $this->stack->inTableScope( $value ) &&
3332  $this->endRow()
3333  ) {
3334  $this->insertToken( $token, $value, $attribs, $selfClose );
3335  }
3336  return true;
3337  // OMITTED: <body>
3338  case 'caption':
3339  case 'col':
3340  case 'colgroup':
3341  // OMITTED: <html>
3342  case 'td':
3343  case 'th':
3344  return true; // Ignore the token.
3345  }
3346  }
3347  // Anything else:
3348  return $this->inTableMode( $token, $value, $attribs, $selfClose );
3349  }
3350 
3351  // Helper for inCellMode
3352  private function endCell() {
3353  if ( $this->stack->inTableScope( 'td' ) ) {
3354  $this->inCellMode( 'endtag', 'td' );
3355  return true;
3356  } elseif ( $this->stack->inTableScope( 'th' ) ) {
3357  $this->inCellMode( 'endtag', 'th' );
3358  return true;
3359  } else {
3360  return false;
3361  }
3362  }
3363  private function inCellMode( $token, $value, $attribs = null, $selfClose = false ) {
3364  if ( $token === 'tag' ) {
3365  switch ( $value ) {
3366  case 'caption':
3367  case 'col':
3368  case 'colgroup':
3369  case 'tbody':
3370  case 'td':
3371  case 'tfoot':
3372  case 'th':
3373  case 'thead':
3374  case 'tr':
3375  if ( $this->endCell() ) {
3376  $this->insertToken( $token, $value, $attribs, $selfClose );
3377  }
3378  return true;
3379  }
3380  } elseif ( $token === 'endtag' ) {
3381  switch ( $value ) {
3382  case 'td':
3383  case 'th':
3384  if ( $this->stack->inTableScope( $value ) ) {
3385  $this->stack->generateImpliedEndTags();
3386  $this->stack->popTag( $value );
3387  $this->afe->clearToMarker();
3388  $this->switchMode( 'inRowMode' );
3389  }
3390  return true;
3391  // OMITTED: <body>
3392  case 'caption':
3393  case 'col':
3394  case 'colgroup':
3395  // OMITTED: <html>
3396  return true;
3397 
3398  case 'table':
3399  case 'tbody':
3400  case 'tfoot':
3401  case 'thead':
3402  case 'tr':
3403  if ( $this->stack->inTableScope( $value ) ) {
3404  $this->stack->generateImpliedEndTags();
3405  $this->stack->popTag( BalanceSets::$tableCellSet );
3406  $this->afe->clearToMarker();
3407  $this->switchMode( 'inRowMode' );
3408  $this->insertToken( $token, $value, $attribs, $selfClose );
3409  }
3410  return true;
3411  }
3412  }
3413  // Anything else:
3414  return $this->inBodyMode( $token, $value, $attribs, $selfClose );
3415  }
3416 
3417  private function inSelectMode( $token, $value, $attribs = null, $selfClose = false ) {
3418  if ( $token === 'text' ) {
3419  $this->stack->insertText( $value );
3420  return true;
3421  } elseif ( $token === 'eof' ) {
3422  return $this->inBodyMode( $token, $value, $attribs, $selfClose );
3423  } elseif ( $token === 'tag' ) {
3424  switch ( $value ) {
3425  // OMITTED: <html>
3426  case 'option':
3427  if ( $this->stack->currentNode->isHtmlNamed( 'option' ) ) {
3428  $this->stack->pop();
3429  }
3430  $this->stack->insertHTMLElement( $value, $attribs );
3431  return true;
3432  case 'optgroup':
3433  if ( $this->stack->currentNode->isHtmlNamed( 'option' ) ) {
3434  $this->stack->pop();
3435  }
3436  if ( $this->stack->currentNode->isHtmlNamed( 'optgroup' ) ) {
3437  $this->stack->pop();
3438  }
3439  $this->stack->insertHTMLElement( $value, $attribs );
3440  return true;
3441  case 'select':
3442  $this->inSelectMode( 'endtag', $value ); // treat it like endtag
3443  return true;
3444  case 'input':
3445  case 'keygen':
3446  case 'textarea':
3447  if ( !$this->stack->inSelectScope( 'select' ) ) {
3448  return true; // ignore token (fragment case)
3449  }
3450  $this->inSelectMode( 'endtag', 'select' );
3451  return $this->insertToken( $token, $value, $attribs, $selfClose );
3452  case 'script':
3453  case 'template':
3454  return $this->inHeadMode( $token, $value, $attribs, $selfClose );
3455  }
3456  } elseif ( $token === 'endtag' ) {
3457  switch ( $value ) {
3458  case 'optgroup':
3459  if (
3460  $this->stack->currentNode->isHtmlNamed( 'option' ) &&
3461  $this->stack->length() >= 2 &&
3462  $this->stack->node( $this->stack->length() - 2 )->isHtmlNamed( 'optgroup' )
3463  ) {
3464  $this->stack->pop();
3465  }
3466  if ( $this->stack->currentNode->isHtmlNamed( 'optgroup' ) ) {
3467  $this->stack->pop();
3468  }
3469  return true;
3470  case 'option':
3471  if ( $this->stack->currentNode->isHtmlNamed( 'option' ) ) {
3472  $this->stack->pop();
3473  }
3474  return true;
3475  case 'select':
3476  if ( !$this->stack->inSelectScope( $value ) ) {
3477  return true; // fragment case
3478  }
3479  $this->stack->popTag( $value );
3480  $this->resetInsertionMode();
3481  return true;
3482  case 'template':
3483  return $this->inHeadMode( $token, $value, $attribs, $selfClose );
3484  }
3485  } elseif ( $token === 'comment' ) {
3486  $this->stack->insertComment( $value );
3487  return true;
3488  }
3489  // anything else: just ignore the token
3490  return true;
3491  }
3492 
3493  private function inSelectInTableMode( $token, $value, $attribs = null, $selfClose = false ) {
3494  switch ( $value ) {
3495  case 'caption':
3496  case 'table':
3497  case 'tbody':
3498  case 'tfoot':
3499  case 'thead':
3500  case 'tr':
3501  case 'td':
3502  case 'th':
3503  if ( $token === 'tag' ) {
3504  $this->inSelectInTableMode( 'endtag', 'select' );
3505  return $this->insertToken( $token, $value, $attribs, $selfClose );
3506  } elseif ( $token === 'endtag' ) {
3507  if ( $this->stack->inTableScope( $value ) ) {
3508  $this->inSelectInTableMode( 'endtag', 'select' );
3509  return $this->insertToken( $token, $value, $attribs, $selfClose );
3510  }
3511  return true;
3512  }
3513  }
3514  // anything else
3515  return $this->inSelectMode( $token, $value, $attribs, $selfClose );
3516  }
3517 
3518  private function inTemplateMode( $token, $value, $attribs = null, $selfClose = false ) {
3519  if ( $token === 'text' || $token === 'comment' ) {
3520  return $this->inBodyMode( $token, $value, $attribs, $selfClose );
3521  } elseif ( $token === 'eof' ) {
3522  if ( $this->stack->indexOf( 'template' ) < 0 ) {
3523  $this->stopParsing();
3524  } else {
3525  $this->stack->popTag( 'template' );
3526  $this->afe->clearToMarker();
3527  array_pop( $this->templateInsertionModes );
3528  $this->resetInsertionMode();
3529  $this->insertToken( $token, $value, $attribs, $selfClose );
3530  }
3531  return true;
3532  } elseif ( $token === 'tag' ) {
3533  switch ( $value ) {
3534  case 'base':
3535  case 'basefont':
3536  case 'bgsound':
3537  case 'link':
3538  case 'meta':
3539  case 'noframes':
3540  // OMITTED: <script>
3541  case 'style':
3542  case 'template':
3543  // OMITTED: <title>
3544  return $this->inHeadMode( $token, $value, $attribs, $selfClose );
3545 
3546  case 'caption':
3547  case 'colgroup':
3548  case 'tbody':
3549  case 'tfoot':
3550  case 'thead':
3551  return $this->switchModeAndReprocess(
3552  'inTableMode', $token, $value, $attribs, $selfClose
3553  );
3554 
3555  case 'col':
3556  return $this->switchModeAndReprocess(
3557  'inColumnGroupMode', $token, $value, $attribs, $selfClose
3558  );
3559 
3560  case 'tr':
3561  return $this->switchModeAndReprocess(
3562  'inTableBodyMode', $token, $value, $attribs, $selfClose
3563  );
3564 
3565  case 'td':
3566  case 'th':
3567  return $this->switchModeAndReprocess(
3568  'inRowMode', $token, $value, $attribs, $selfClose
3569  );
3570  }
3571  return $this->switchModeAndReprocess(
3572  'inBodyMode', $token, $value, $attribs, $selfClose
3573  );
3574  } elseif ( $token === 'endtag' ) {
3575  switch ( $value ) {
3576  case 'template':
3577  return $this->inHeadMode( $token, $value, $attribs, $selfClose );
3578  }
3579  return true;
3580  } else {
3581  Assert::invariant( false, "Bad token type: $token" );
3582  }
3583  }
3584 }
MediaWiki\Tidy\BalanceStack\clearToContext
clearToContext( $set)
Pop elements off the stack not including the first element in the specified set.
Definition: Balancer.php:1029
MediaWiki\Tidy\BalanceElement\$children
$children
An array of children of this element.
Definition: Balancer.php:347
MediaWiki\Tidy\BalanceElement\$namespaceURI
$namespaceURI
The namespace of the element.
Definition: Balancer.php:321
MediaWiki\Tidy\BalanceElement\insertBefore
insertBefore(BalanceElement $a, $b)
Find $a in the list of children and insert $b before it.
Definition: Balancer.php:410
MediaWiki\Tidy\BalanceStack\inListItemScope
inListItemScope( $tag)
Determine if the stack has $tag in list item scope.
Definition: Balancer.php:835
MediaWiki\Tidy\BalanceSets\inListItemScopeSet
static inListItemScopeSet()
Definition: Balancer.php:227
MediaWiki\Tidy\BalanceStack\popTo
popTo( $idx)
Remove all nodes up to and including position $idx from the BalanceStack, flattening them in the proc...
Definition: Balancer.php:1002
MediaWiki\Tidy\BalanceStack\$config
$config
Configuration options governing flattening.
Definition: Balancer.php:679
MediaWiki\Tidy\BalanceSets\HTML_NAMESPACE
const HTML_NAMESPACE
Definition: Balancer.php:71
MediaWiki\Tidy\BalanceActiveFormattingElements\addToNoahList
addToNoahList(BalanceElement $elt)
Definition: Balancer.php:1580
MediaWiki\Tidy\Balancer\switchModeAndReprocess
switchModeAndReprocess( $mode, $token, $value, $attribs, $selfClose)
Definition: Balancer.php:2261
use
Apache License January AND DISTRIBUTION Definitions License shall mean the terms and conditions for use
Definition: APACHE-LICENSE-2.0.txt:10
MediaWiki\Tidy\BalanceSets\$inTableScopeSet
static $inTableScopeSet
Definition: Balancer.php:245
MediaWiki\Tidy\BalanceElement\getNoahKey
getNoahKey()
Get a string key for the Noah's Ark algorithm.
Definition: Balancer.php:636
MediaWiki\Tidy\Balancer\switchMode
switchMode( $mode)
Definition: Balancer.php:2252
MediaWiki\Tidy\BalanceStack\node
node( $idx)
Return the BalanceElement at the given position $idx, where position 0 represents the root element.
Definition: Balancer.php:934
MediaWiki\Tidy\Balancer\inHeadMode
inHeadMode( $token, $value, $attribs=null, $selfClose=false)
Definition: Balancer.php:2373
array
the array() calling protocol came about after MediaWiki 1.4rc1.
MediaWiki\Tidy\BalanceElement\isHtmlNamed
isHtmlNamed( $tagName)
Determine if this element is an HTML element with the specified name.
Definition: Balancer.php:584
MediaWiki\Tidy\Balancer\$parseMode
$parseMode
Definition: Balancer.php:1805
MediaWiki\Tidy\BalanceElement\$attribs
$attribs
Attributes for the element, in array form.
Definition: Balancer.php:331
MediaWiki\Tidy\BalanceStack\__toString
__toString()
Return the contents of the open elements stack as a string for debugging.
Definition: Balancer.php:1367
$last
$last
Definition: profileinfo.php:408
MediaWiki\Tidy\BalanceStack\fosterParent
fosterParent( $elt)
Foster parent the given $elt in the stack of open elements.
Definition: Balancer.php:1100
MediaWiki\Tidy\Balancer\$ignoreLinefeed
$ignoreLinefeed
Definition: Balancer.php:1822
MediaWiki\Tidy\BalanceSets\SVG_NAMESPACE
const SVG_NAMESPACE
Definition: Balancer.php:73
MediaWiki\Tidy\BalanceStack\$currentNode
$currentNode
Reference to the current element.
Definition: Balancer.php:683
MediaWiki\Tidy\BalanceStack\insertForeignElement
insertForeignElement( $namespaceURI, $tag, $attribs)
Insert a BalanceElement at the appropriate place, pushing it on to the open elements stack.
Definition: Balancer.php:759
MediaWiki\Tidy\Balancer\$afe
BalanceActiveFormattingElements $afe
Definition: Balancer.php:1810
MediaWiki\Tidy\BalanceStack\insertHTMLElement
insertHTMLElement( $tag, $attribs)
Insert an HTML element at the appropriate place, pushing it on to the open elements stack.
Definition: Balancer.php:773
MediaWiki\Tidy\BalanceSets\$thoroughImpliedEndTagsSet
static $thoroughImpliedEndTagsSet
Definition: Balancer.php:167
MediaWiki\Tidy\Balancer\$processingArgs
array $processingArgs
Definition: Balancer.php:1829
MediaWiki\Tidy\BalanceActiveFormattingElements\$head
$head
The first (least recent) element in the list.
Definition: Balancer.php:1401
MediaWiki\Tidy\Balancer\$pendingTableText
$pendingTableText
Definition: Balancer.php:1818
$out
this hook is for auditing only or null if authentication failed before getting that far or null if we can t even determine that probably a stub it is not rendered in wiki pages or galleries in category pages allow injecting custom HTML after the section Any uses of the hook need to handle escaping see BaseTemplate::getToolbox and BaseTemplate::makeListItem for details on the format of individual items inside of this array or by returning and letting standard HTTP rendering take place modifiable or by returning false and taking over the output $out
Definition: hooks.txt:864
MediaWiki\Tidy\BalanceSets\$tableContextSet
static $tableContextSet
Definition: Balancer.php:182
MediaWiki\Tidy\Balancer\endRow
endRow()
Definition: Balancer.php:3286
serialize
serialize()
Definition: ApiMessage.php:184
MediaWiki\Tidy\BalanceActiveFormattingElements\push
push(BalanceElement $elt)
Follow the steps required when the spec requires us to "push onto the list of active formatting eleme...
Definition: Balancer.php:1449
MediaWiki\Tidy\BalanceSets\inButtonScopeSet
static inButtonScopeSet()
Definition: Balancer.php:237
$s
$s
Definition: mergeMessageFileList.php:187
MediaWiki\Tidy\BalanceElement\__construct
__construct( $namespaceURI, $localName, array $attribs)
Make a new BalanceElement corresponding to the HTML DOM Element with the given localname,...
Definition: Balancer.php:379
MediaWiki\Tidy\BalanceActiveFormattingElements\isInList
isInList(BalanceElement $elt)
Determine whether an element is in the list of formatting elements.
Definition: Balancer.php:1544
MediaWiki\Tidy\Balancer\inTableBodyMode
inTableBodyMode( $token, $value, $attribs=null, $selfClose=false)
Definition: Balancer.php:3232
MediaWiki\Tidy\BalanceStack\__construct
__construct(array $config)
Create a new BalanceStack with a single BalanceElement on it, representing the root <html> node.
Definition: Balancer.php:690
MediaWiki\Tidy\BalanceStack\adjustedCurrentNode
adjustedCurrentNode( $fragmentContext)
Return the adjusted current node.
Definition: Balancer.php:914
MediaWiki\Tidy\BalanceSets\$htmlIntegrationPointSet
static $htmlIntegrationPointSet
Definition: Balancer.php:264
MediaWiki\Tidy\BalanceStack\removeElement
removeElement(BalanceElement $elt, $flatten=true)
Remove the given $elt from the BalanceStack, optionally flattening it in the process.
Definition: Balancer.php:1045
$result
The index of the header message $result[1]=The index of the body text message $result[2 through n]=Parameters passed to body text message. Please note the header message cannot receive/use parameters. 'ImgAuthModifyHeaders':Executed just before a file is streamed to a user via img_auth.php, allowing headers to be modified beforehand. $title:LinkTarget object & $headers:HTTP headers(name=> value, names are case insensitive). Two headers get special handling:If-Modified-Since(value must be a valid HTTP date) and Range(must be of the form "bytes=(\d*-\d*)") will be honored when streaming the file. 'ImportHandleLogItemXMLTag':When parsing a XML tag in a log item. Return false to stop further processing of the tag $reader:XMLReader object $logInfo:Array of information 'ImportHandlePageXMLTag':When parsing a XML tag in a page. Return false to stop further processing of the tag $reader:XMLReader object & $pageInfo:Array of information 'ImportHandleRevisionXMLTag':When parsing a XML tag in a page revision. Return false to stop further processing of the tag $reader:XMLReader object $pageInfo:Array of page information $revisionInfo:Array of revision information 'ImportHandleToplevelXMLTag':When parsing a top level XML tag. Return false to stop further processing of the tag $reader:XMLReader object 'ImportHandleUnknownUser':When a user doesn 't exist locally, this hook is called to give extensions an opportunity to auto-create it. If the auto-creation is successful, return false. $name:User name 'ImportHandleUploadXMLTag':When parsing a XML tag in a file upload. Return false to stop further processing of the tag $reader:XMLReader object $revisionInfo:Array of information 'ImportLogInterwikiLink':Hook to change the interwiki link used in log entries and edit summaries for transwiki imports. & $fullInterwikiPrefix:Interwiki prefix, may contain colons. & $pageTitle:String that contains page title. 'ImportSources':Called when reading from the $wgImportSources configuration variable. Can be used to lazy-load the import sources list. & $importSources:The value of $wgImportSources. Modify as necessary. See the comment in DefaultSettings.php for the detail of how to structure this array. 'InfoAction':When building information to display on the action=info page. $context:IContextSource object & $pageInfo:Array of information 'InitializeArticleMaybeRedirect':MediaWiki check to see if title is a redirect. & $title:Title object for the current page & $request:WebRequest & $ignoreRedirect:boolean to skip redirect check & $target:Title/string of redirect target & $article:Article object 'InternalParseBeforeLinks':during Parser 's internalParse method before links but after nowiki/noinclude/includeonly/onlyinclude and other processings. & $parser:Parser object & $text:string containing partially parsed text & $stripState:Parser 's internal StripState object 'InternalParseBeforeSanitize':during Parser 's internalParse method just before the parser removes unwanted/dangerous HTML tags and after nowiki/noinclude/includeonly/onlyinclude and other processings. Ideal for syntax-extensions after template/parser function execution which respect nowiki and HTML-comments. & $parser:Parser object & $text:string containing partially parsed text & $stripState:Parser 's internal StripState object 'InterwikiLoadPrefix':When resolving if a given prefix is an interwiki or not. Return true without providing an interwiki to continue interwiki search. $prefix:interwiki prefix we are looking for. & $iwData:output array describing the interwiki with keys iw_url, iw_local, iw_trans and optionally iw_api and iw_wikiid. 'InvalidateEmailComplete':Called after a user 's email has been invalidated successfully. $user:user(object) whose email is being invalidated 'IRCLineURL':When constructing the URL to use in an IRC notification. Callee may modify $url and $query, URL will be constructed as $url . $query & $url:URL to index.php & $query:Query string $rc:RecentChange object that triggered url generation 'IsFileCacheable':Override the result of Article::isFileCacheable()(if true) & $article:article(object) being checked 'IsTrustedProxy':Override the result of IP::isTrustedProxy() & $ip:IP being check & $result:Change this value to override the result of IP::isTrustedProxy() 'IsUploadAllowedFromUrl':Override the result of UploadFromUrl::isAllowedUrl() $url:URL used to upload from & $allowed:Boolean indicating if uploading is allowed for given URL 'isValidEmailAddr':Override the result of Sanitizer::validateEmail(), for instance to return false if the domain name doesn 't match your organization. $addr:The e-mail address entered by the user & $result:Set this and return false to override the internal checks 'isValidPassword':Override the result of User::isValidPassword() $password:The password entered by the user & $result:Set this and return false to override the internal checks $user:User the password is being validated for 'Language::getMessagesFileName':$code:The language code or the language we 're looking for a messages file for & $file:The messages file path, you can override this to change the location. 'LanguageGetMagic':DEPRECATED! Use $magicWords in a file listed in $wgExtensionMessagesFiles instead. Use this to define synonyms of magic words depending of the language & $magicExtensions:associative array of magic words synonyms $lang:language code(string) 'LanguageGetNamespaces':Provide custom ordering for namespaces or remove namespaces. Do not use this hook to add namespaces. Use CanonicalNamespaces for that. & $namespaces:Array of namespaces indexed by their numbers 'LanguageGetSpecialPageAliases':DEPRECATED! Use $specialPageAliases in a file listed in $wgExtensionMessagesFiles instead. Use to define aliases of special pages names depending of the language & $specialPageAliases:associative array of magic words synonyms $lang:language code(string) 'LanguageGetTranslatedLanguageNames':Provide translated language names. & $names:array of language code=> language name $code:language of the preferred translations 'LanguageLinks':Manipulate a page 's language links. This is called in various places to allow extensions to define the effective language links for a page. $title:The page 's Title. & $links:Array with elements of the form "language:title" in the order that they will be output. & $linkFlags:Associative array mapping prefixed links to arrays of flags. Currently unused, but planned to provide support for marking individual language links in the UI, e.g. for featured articles. 'LanguageSelector':Hook to change the language selector available on a page. $out:The output page. $cssClassName:CSS class name of the language selector. 'LinkBegin':DEPRECATED! Use HtmlPageLinkRendererBegin instead. Used when generating internal and interwiki links in Linker::link(), before processing starts. Return false to skip default processing and return $ret. See documentation for Linker::link() for details on the expected meanings of parameters. $skin:the Skin object $target:the Title that the link is pointing to & $html:the contents that the< a > tag should have(raw HTML) $result
Definition: hooks.txt:1993
MediaWiki\Tidy\BalanceSets\$unsupportedSet
static $unsupportedSet
Definition: Balancer.php:75
MediaWiki\Tidy\Balancer\inColumnGroupMode
inColumnGroupMode( $token, $value, $attribs=null, $selfClose=false)
Definition: Balancer.php:3167
MediaWiki\Tidy\BalanceStack\inScope
inScope( $tag)
Determine if the stack has $tag in scope.
Definition: Balancer.php:815
true
null means default in associative array with keys and values unescaped Should be merged with default with a value of false meaning to suppress the attribute in associative array with keys and values unescaped noclasses just before the function returns a value If you return true
Definition: hooks.txt:2006
php
injection txt This is an overview of how MediaWiki makes use of dependency injection The design described here grew from the discussion of RFC T384 The term dependency this means that anything an object needs to operate should be injected from the the object itself should only know narrow no concrete implementation of the logic it relies on The requirement to inject everything typically results in an architecture that based on two main types of and essentially stateless service objects that use other service objects to operate on the value objects As of the beginning MediaWiki is only starting to use the DI approach Much of the code still relies on global state or direct resulting in a highly cyclical dependency which acts as the top level factory for services in MediaWiki which can be used to gain access to default instances of various services MediaWikiServices however also allows new services to be defined and default services to be redefined Services are defined or redefined by providing a callback the instantiator that will return a new instance of the service When it will create an instance of MediaWikiServices and populate it with the services defined in the files listed by thereby bootstrapping the DI framework Per $wgServiceWiringFiles lists includes ServiceWiring php
Definition: injection.txt:37
MediaWiki\Tidy\BalanceStack\getIterator
getIterator()
Return an iterator over this stack which visits the current node first, and the root node last.
Definition: Balancer.php:924
MediaWiki\Tidy\BalanceActiveFormattingElements\$noahTableStack
$noahTableStack
An array of arrays representing the population of elements in each bucket according to the Noah's Ark...
Definition: Balancer.php:1421
MediaWiki\Tidy\Balancer\inTemplateMode
inTemplateMode( $token, $value, $attribs=null, $selfClose=false)
Definition: Balancer.php:3518
MediaWiki\Tidy\Balancer\inRowMode
inRowMode( $token, $value, $attribs=null, $selfClose=false)
Definition: Balancer.php:3295
MediaWiki\Tidy\BalanceSets\$addressDivPSet
static $addressDivPSet
Definition: Balancer.php:145
Sanitizer\validateTag
static validateTag( $params, $element)
Takes attribute names and values for a tag and the tag name and validates that the tag is allowed to ...
Definition: Sanitizer.php:736
MediaWiki\Tidy\BalanceStack\getOutput
getOutput()
Return a string representing the output of the tree builder: all the children of the root <html> node...
Definition: Balancer.php:705
MediaWiki\Tidy\Balancer\endCell
endCell()
Definition: Balancer.php:3352
MediaWiki\Tidy\Balancer\inBodyMode
inBodyMode( $token, $value, $attribs=null, $selfClose=false)
Definition: Balancer.php:2443
MediaWiki\Tidy\BalanceActiveFormattingElements\__destruct
__destruct()
Definition: Balancer.php:1423
MediaWiki\Tidy\Balancer\$fragmentContext
$fragmentContext
Definition: Balancer.php:1820
ExplodeIterator
An iterator which works exactly like:
Definition: ExplodeIterator.php:30
MediaWiki\Tidy\BalanceSets\$tableSectionRowSet
static $tableSectionRowSet
Definition: Balancer.php:151
MediaWiki\Tidy\BalanceSets\$inButtonScopeSet
static $inButtonScopeSet
Definition: Balancer.php:236
MediaWiki\Tidy\BalanceSets\MATHML_NAMESPACE
const MATHML_NAMESPACE
Definition: Balancer.php:72
MediaWiki\Tidy\BalanceStack\inButtonScope
inButtonScope( $tag)
Determine if the stack has $tag in button scope.
Definition: Balancer.php:825
MediaWiki\Tidy\BalanceStack\$fosterParentMode
$fosterParentMode
Foster parent mode determines how nodes are inserted into the stack.
Definition: Balancer.php:673
MediaWiki\Tidy\Balancer\$config
$config
Definition: Balancer.php:1815
MediaWiki\Tidy\BalanceSets\$tidyPWrapSet
static $tidyPWrapSet
Definition: Balancer.php:273
MediaWiki\Tidy\BalanceStack
The "stack of open elements" as defined in the HTML5 tree builder spec.
Definition: Balancer.php:661
MediaWiki\Tidy\Balancer
An implementation of the tree building portion of the HTML5 parsing spec.
Definition: Balancer.php:1804
MediaWiki\Tidy\Balancer\VALID_COMMENT_REGEX
const VALID_COMMENT_REGEX
Valid HTML5 comments.
Definition: Balancer.php:1835
MediaWiki\Tidy\Balancer\inSelectInTableMode
inSelectInTableMode( $token, $value, $attribs=null, $selfClose=false)
Definition: Balancer.php:3493
MediaWiki\Tidy\Balancer\$stack
BalanceStack $stack
Definition: Balancer.php:1812
MediaWiki\Tidy\BalanceSets\$formAssociatedSet
static $formAssociatedSet
Definition: Balancer.php:202
MediaWiki\Tidy\BalanceStack\pop
pop()
Remove the current node from the BalanceStack, flattening it in the process.
Definition: Balancer.php:985
$matches
$matches
Definition: NoLocalSettings.php:24
MediaWiki\Tidy\BalanceElement\isHtml
isHtml()
Determine if $this represents an element in the HTML namespace.
Definition: Balancer.php:594
MediaWiki\Tidy\Balancer\insertToken
insertToken( $token, $value, $attribs=null, $selfClose=false)
Pass a token to the tree builder.
Definition: Balancer.php:1981
MediaWiki\Tidy\Balancer\inTextMode
inTextMode( $token, $value, $attribs=null, $selfClose=false)
Definition: Balancer.php:2356
MediaWiki\Tidy\BalanceElement\$parent
$parent
Parent of this element, or the string "flat" if this element has already been flattened into its pare...
Definition: Balancer.php:338
Sanitizer\encodeAttribute
static encodeAttribute( $text)
Encode an attribute value for HTML output.
Definition: Sanitizer.php:1130
MediaWiki\Tidy\BalanceActiveFormattingElements\replace
replace(BalanceElement $a, BalanceElement $b)
Find element $a in the list and replace it with element $b.
Definition: Balancer.php:1625
contents
Some information about database access in MediaWiki By Tim January Database layout For information about the MediaWiki database such as a description of the tables and their contents
Definition: database.txt:9
MediaWiki\Tidy\BalanceActiveFormattingElements\$tail
$tail
The last (most recent) element in the list.
Definition: Balancer.php:1398
MediaWiki\Tidy\BalanceStack\length
length()
Return the number of elements currently in the BalanceStack.
Definition: Balancer.php:977
MediaWiki\Tidy\Balancer\$inRCDATA
$inRCDATA
Definition: Balancer.php:1823
MediaWiki\Tidy\Balancer\$strict
$strict
Definition: Balancer.php:1813
MediaWiki\Tidy\BalanceElement\isHtmlIntegrationPoint
isHtmlIntegrationPoint()
Determine if $this represents an HTML integration point, as defined in the HTML5 specification.
Definition: Balancer.php:616
MediaWiki\Tidy\BalanceMarker\$nextAFE
$nextAFE
Definition: Balancer.php:1383
MediaWiki\Tidy\BalanceSets\$specialSet
static $specialSet
Definition: Balancer.php:109
MediaWiki\Tidy\Balancer\$textIntegrationMode
$textIntegrationMode
Definition: Balancer.php:1817
MediaWiki\Tidy\BalanceStack\insertText
insertText( $value, $isComment=false)
Insert text at the appropriate place for inserting a node.
Definition: Balancer.php:733
MediaWiki\Tidy\BalanceElement\$noahKey
$noahKey
A unique string identifier for Noah's Ark purposes, lazy initialized.
Definition: Balancer.php:352
MediaWiki\Tidy\BalanceStack\popTag
popTag( $tag)
Pop elements off the stack up to and including the first element with the specified HTML tagname (or ...
Definition: Balancer.php:1014
MediaWiki\Tidy\Balancer\inTableTextMode
inTableTextMode( $token, $value, $attribs=null, $selfClose=false)
Definition: Balancer.php:3086
MediaWiki\Tidy\BalanceStack\insertElement
insertElement(BalanceElement $elt)
Insert an element at the appropriate place and push it on to the open elements stack.
Definition: Balancer.php:786
MediaWiki\Tidy\BalanceActiveFormattingElements\findElementByTag
findElementByTag( $tag)
Find and return the last element with the specified tag between the end of the list and the last mark...
Definition: Balancer.php:1528
list
deferred txt A few of the database updates required by various functions here can be deferred until after the result page is displayed to the user For updating the view updating the linked to tables after a etc PHP does not yet have any way to tell the server to actually return and disconnect while still running these but it might have such a feature in the future We handle these by creating a deferred update object and putting those objects on a global list
Definition: deferred.txt:11
MediaWiki\Tidy\BalanceElement\__toString
__toString()
Serialize this node and all of its children to a string, as specified by the HTML serialization speci...
Definition: Balancer.php:533
MediaWiki\Tidy\BalanceElement\isMathmlTextIntegrationPoint
isMathmlTextIntegrationPoint()
Determine if $this represents a MathML text integration point, as defined in the HTML5 specification.
Definition: Balancer.php:605
MediaWiki\Tidy\BalanceStack\replaceAt
replaceAt( $idx, BalanceElement $elt)
Replace the element at position $idx in the BalanceStack with $elt.
Definition: Balancer.php:943
MediaWiki\Tidy\BalanceMarker\$prevAFE
$prevAFE
Definition: Balancer.php:1384
ReverseArrayIterator
Convenience class for iterating over an array in reverse order.
Definition: ReverseArrayIterator.php:29
$attribs
null means default in associative array with keys and values unescaped Should be merged with default with a value of false meaning to suppress the attribute in associative array with keys and values unescaped noclasses just before the function returns a value If you return an< a > element with HTML attributes $attribs and contents $html will be returned If you return $ret will be returned and may include noclasses after processing & $attribs
Definition: hooks.txt:2014
MediaWiki\Tidy\BalanceElement\$nextAFE
$nextAFE
The next active formatting element in the list, or null if this is the end of the AFE list or if the ...
Definition: Balancer.php:358
MediaWiki\Tidy\Balancer\$bitsIterator
Iterator $bitsIterator
Definition: Balancer.php:1807
MediaWiki\Tidy\BalanceSets\$tableRowContextSet
static $tableRowContextSet
Definition: Balancer.php:195
MediaWiki\Tidy\BalanceStack\inSpecificScope
inSpecificScope( $tag, $set)
Determine if the stack has $tag in a specific scope, $set.
Definition: Balancer.php:876
$value
$value
Definition: styleTest.css.php:45
MediaWiki\Tidy\Balancer\$formElementPointer
$formElementPointer
Definition: Balancer.php:1821
Sanitizer\validateTagAttributes
static validateTagAttributes( $attribs, $element)
Take an array of attribute names and values and normalize or discard illegal values for the given ele...
Definition: Sanitizer.php:772
MediaWiki\Tidy\Balancer\inTableMode
inTableMode( $token, $value, $attribs=null, $selfClose=false)
Definition: Balancer.php:2976
MediaWiki\Tidy\Balancer\$allowedHtmlElements
$allowedHtmlElements
Definition: Balancer.php:1808
Sanitizer\ELEMENT_BITS_REGEX
const ELEMENT_BITS_REGEX
Acceptable tag name charset from HTML5 parsing spec https://www.w3.org/TR/html5/syntax....
Definition: Sanitizer.php:46
MediaWiki\Tidy\BalanceActiveFormattingElements\clearToMarker
clearToMarker()
Follow the steps required when the spec asks us to "clear the list of active formatting elements up t...
Definition: Balancer.php:1489
MediaWiki\Tidy\Balancer\parseRawText
parseRawText( $value, $attribs=null)
Definition: Balancer.php:2349
MediaWiki\Tidy\BalanceSets\$tidyInlineSet
static $tidyInlineSet
Definition: Balancer.php:283
MediaWiki\Tidy\BalanceElement\$prevAFE
$prevAFE
The previous active formatting element in the list, or null if this is the start of the list or if th...
Definition: Balancer.php:364
MediaWiki\Tidy\BalanceActiveFormattingElements
The list of active formatting elements, which is used to handle mis-nested formatting element tags in...
Definition: Balancer.php:1396
MediaWiki\Tidy\BalanceStack\insertComment
insertComment( $value)
Insert a comment at the appropriate place for inserting a node.
Definition: Balancer.php:721
MediaWiki\Tidy\Balancer\advance
advance()
Grab the next "token" from $bitsIterator.
Definition: Balancer.php:2159
MediaWiki\Tidy\BalanceElement
A BalanceElement is a simplified version of a DOM Node.
Definition: Balancer.php:316
MediaWiki\Tidy\BalanceSets\$headingSet
static $headingSet
Definition: Balancer.php:102
MediaWiki\Tidy\BalanceSets\$mathmlTextIntegrationPointSet
static $mathmlTextIntegrationPointSet
Definition: Balancer.php:257
MediaWiki\Tidy\BalanceStack\inSelectScope
inSelectScope( $tag)
Determine if the stack has $tag in select scope.
Definition: Balancer.php:855
MediaWiki\Tidy\BalanceSets\$impliedEndTagsSet
static $impliedEndTagsSet
Definition: Balancer.php:158
MediaWiki\Tidy\BalanceSets\$tableBodyContextSet
static $tableBodyContextSet
Definition: Balancer.php:188
MediaWiki\Tidy\Balancer\balance
balance( $text, $processingCallback=null, $processingArgs=[])
Return a balanced HTML string for the HTML fragment given by $text, subject to the caveats listed in ...
Definition: Balancer.php:1932
MediaWiki\Tidy\BalanceStack\inTableScope
inTableScope( $tag)
Determine if the stack has $tag in table scope.
Definition: Balancer.php:845
MediaWiki\Tidy\BalanceSets
Utility constants and sets for the HTML5 tree building algorithm.
Definition: Balancer.php:70
MediaWiki\Tidy\BalanceStack\insertAfter
insertAfter(BalanceElement $a, BalanceElement $b)
Find $a in the BalanceStack and insert $b after it.
Definition: Balancer.php:1080
$name
Allows to change the fields on the form that will be generated $name
Definition: hooks.txt:302
MediaWiki\Tidy\BalanceActiveFormattingElements\insertAfter
insertAfter(BalanceElement $a, BalanceElement $b)
Find $a in the list and insert $b after it.
Definition: Balancer.php:1659
MediaWiki\Tidy\BalanceStack\indexOf
indexOf( $tag)
Return the position of the given BalanceElement, set, or HTML tag name string in the BalanceStack.
Definition: Balancer.php:964
MediaWiki\Tidy\BalanceElement\$localName
$localName
The lower-cased name of the element.
Definition: Balancer.php:326
MediaWiki\Tidy\BalanceActiveFormattingElements\removeFromNoahList
removeFromNoahList(BalanceElement $elt)
Definition: Balancer.php:1594
MediaWiki\Tidy\BalanceActiveFormattingElements\insertMarker
insertMarker()
Definition: Balancer.php:1432
MediaWiki\Tidy\BalanceSets\$emptyElementSet
static $emptyElementSet
Definition: Balancer.php:86
MediaWiki\Tidy\BalanceStack\$elements
$elements
Backing storage for the stack.
Definition: Balancer.php:666
MediaWiki\Tidy\Balancer\inCellMode
inCellMode( $token, $value, $attribs=null, $selfClose=false)
Definition: Balancer.php:3363
MediaWiki\$config
Config $config
Definition: MediaWiki.php:43
as
This document is intended to provide useful advice for parties seeking to redistribute MediaWiki to end users It s targeted particularly at maintainers for Linux since it s been observed that distribution packages of MediaWiki often break We ve consistently had to recommend that users seeking support use official tarballs instead of their distribution s and this often solves whatever problem the user is having It would be nice if this could such as
Definition: distributors.txt:22
MediaWiki\Tidy\Balancer\stopParsing
stopParsing()
Definition: Balancer.php:2336
MediaWiki\Tidy\BalanceStack\adoptionAgency
adoptionAgency( $tag, $afe)
Run the "adoption agency algoritm" (AAA) for the given subject tag name.
Definition: Balancer.php:1162
MediaWiki\Tidy\BalanceElement\isA
isA( $set)
Determine if $this represents a specific HTML tag, is a member of a tag set, or is equal to another B...
Definition: Balancer.php:567
MediaWiki\Tidy\BalanceSets\$extraLinefeedSet
static $extraLinefeedSet
Definition: Balancer.php:96
MediaWiki\Tidy\Balancer\resetInsertionMode
resetInsertionMode()
Definition: Balancer.php:2266
MediaWiki\Tidy\BalanceSets\$tableCellSet
static $tableCellSet
Definition: Balancer.php:177
MediaWiki\Tidy\BalanceElement\$nextNoah
$nextNoah
The next element in the Noah's Ark species bucket.
Definition: Balancer.php:369
Sanitizer\decodeTagAttributes
static decodeTagAttributes( $text)
Return an associative array of attribute names and values from a partial tag string.
Definition: Sanitizer.php:1434
MediaWiki\Tidy\BalanceElement\flatten
flatten(array $config)
Flatten this node and all of its children into a string, as specified by the HTML serialization speci...
Definition: Balancer.php:477
$t
$t
Definition: testCompression.php:69
MediaWiki\Tidy\Balancer\inCaptionMode
inCaptionMode( $token, $value, $attribs=null, $selfClose=false)
Definition: Balancer.php:3120
MediaWiki\Tidy\BalanceMarker
A pseudo-element used as a marker in the list of active formatting elements.
Definition: Balancer.php:1382
MediaWiki\Tidy\Balancer\$originalInsertionMode
$originalInsertionMode
Definition: Balancer.php:1819
MediaWiki\Tidy\BalanceActiveFormattingElements\reconstruct
reconstruct( $stack)
Reconstruct the active formatting elements.
Definition: Balancer.php:1681
MediaWiki\Tidy\BalanceSets\$inScopeSet
static $inScopeSet
Definition: Balancer.php:210
Sanitizer
HTML sanitizer for MediaWiki.
Definition: Sanitizer.php:31
MediaWiki\Tidy\BalanceElement\appendChild
appendChild( $elt)
Append $elt to the end of the list of children.
Definition: Balancer.php:432
MediaWiki\Tidy\Balancer\inSelectMode
inSelectMode( $token, $value, $attribs=null, $selfClose=false)
Definition: Balancer.php:3417
MediaWiki\Tidy\BalanceElement\removeChild
removeChild(BalanceElement $elt)
Remove the given child from this element.
Definition: Balancer.php:392
MediaWiki\Tidy\BalanceActiveFormattingElements\__toString
__toString()
Get a string representation of the AFE list, for debugging.
Definition: Balancer.php:1727
MediaWiki\Tidy\Balancer\endCaption
endCaption()
Definition: Balancer.php:3109
MediaWiki\Tidy\Balancer\$allowComments
$allowComments
Definition: Balancer.php:1814
MediaWiki\Tidy
Definition: Balancer.php:27
MediaWiki\Tidy\BalanceStack\generateImpliedEndTags
generateImpliedEndTags( $butnot=null, $thorough=false)
Generate implied end tags.
Definition: Balancer.php:894
MediaWiki\Tidy\Balancer\endSection
endSection()
Definition: Balancer.php:3219
MediaWiki\Tidy\Balancer\__construct
__construct(array $config=[])
Create a new Balancer.
Definition: Balancer.php:1888
MediaWiki\Tidy\Balancer\insertForeignToken
insertForeignToken( $token, $value, $attribs=null, $selfClose=false)
Definition: Balancer.php:2050
MediaWiki\Tidy\Balancer\$processingCallback
callable null $processingCallback
Definition: Balancer.php:1827
MediaWiki\Tidy\BalanceSets\$inInvertedSelectScopeSet
static $inInvertedSelectScopeSet
Definition: Balancer.php:251
$e
div flags Integer display flags(NO_ACTION_LINK, NO_EXTRA_USER_LINKS) 'LogException' returning false will NOT prevent logging $e
Definition: hooks.txt:2171
MediaWiki\Tidy\Balancer\$inRAWTEXT
$inRAWTEXT
Definition: Balancer.php:1824
MediaWiki\Tidy\BalanceSets\$inListItemScopeSet
static $inListItemScopeSet
Definition: Balancer.php:226
MediaWiki\Tidy\BalanceElement\adoptChildren
adoptChildren(BalanceElement $elt)
Transfer all of the children of $elt to $this.
Definition: Balancer.php:452