MediaWiki  1.28.1
Balancer.php
Go to the documentation of this file.
1 <?php
26 namespace MediaWiki\Tidy;
27 
30 use \ExplodeIterator;
31 use \IteratorAggregate;
32 use \ReverseArrayIterator;
33 use \Sanitizer;
34 
35 // A note for future librarization[1] -- this file is a good candidate
36 // for splitting into an independent library, except that it is currently
37 // highly optimized for MediaWiki use. It only implements the portions
38 // of the HTML5 tree builder used by tags supported by MediaWiki, and
39 // does not contain a true tokenizer pass, instead relying on
40 // comment stripping, attribute normalization, and escaping done by
41 // the MediaWiki Sanitizer. It also deliberately avoids building
42 // a true DOM in memory, instead serializing elements to an output string
43 // as soon as possible (usually as soon as the tag is closed) to reduce
44 // its memory footprint.
45 
46 // We've been gradually lifting some of these restrictions to handle
47 // non-sanitized output generated by extensions, but we shortcut the tokenizer
48 // for speed (primarily by splitting on `<`) and so rely on syntactic
49 // well-formedness.
50 
51 // On the other hand, I've been pretty careful to note with comments in the
52 // code the places where this implementation omits features of the spec or
53 // depends on the MediaWiki Sanitizer. Perhaps in the future we'll want to
54 // implement the missing pieces and make this a standalone PHP HTML5 parser.
55 // In order to do so, some sort of MediaWiki-specific API will need
56 // to be added to (a) allow the Balancer to bypass the tokenizer,
57 // and (b) support on-the-fly flattening instead of DOM node creation.
58 
59 // [1]: https://www.mediawiki.org/wiki/Library_infrastructure_for_MediaWiki
60 
69 class BalanceSets {
70  const HTML_NAMESPACE = 'http://www.w3.org/1999/xhtml';
71  const MATHML_NAMESPACE = 'http://www.w3.org/1998/Math/MathML';
72  const SVG_NAMESPACE = 'http://www.w3.org/2000/svg';
73 
74  public static $unsupportedSet = [
75  self::HTML_NAMESPACE => [
76  'html' => true, 'head' => true, 'body' => true, 'frameset' => true,
77  'frame' => true,
78  'plaintext' => true, 'isindex' => true,
79  'xmp' => true, 'iframe' => true, 'noembed' => true,
80  'noscript' => true, 'script' => true,
81  'title' => true
82  ]
83  ];
84 
85  public static $emptyElementSet = [
86  self::HTML_NAMESPACE => [
87  'area' => true, 'base' => true, 'basefont' => true,
88  'bgsound' => true, 'br' => true, 'col' => true, 'command' => true,
89  'embed' => true, 'frame' => true, 'hr' => true, 'img' => true,
90  'input' => true, 'keygen' => true, 'link' => true, 'meta' => true,
91  'param' => true, 'source' => true, 'track' => true, 'wbr' => true
92  ]
93  ];
94 
95  public static $extraLinefeedSet = [
96  self::HTML_NAMESPACE => [
97  'pre' => true, 'textarea' => true, 'listing' => true,
98  ]
99  ];
100 
101  public static $headingSet = [
102  self::HTML_NAMESPACE => [
103  'h1' => true, 'h2' => true, 'h3' => true,
104  'h4' => true, 'h5' => true, 'h6' => true
105  ]
106  ];
107 
108  public static $specialSet = [
109  self::HTML_NAMESPACE => [
110  'address' => true, 'applet' => true, 'area' => true,
111  'article' => true, 'aside' => true, 'base' => true,
112  'basefont' => true, 'bgsound' => true, 'blockquote' => true,
113  'body' => true, 'br' => true, 'button' => true, 'caption' => true,
114  'center' => true, 'col' => true, 'colgroup' => true, 'dd' => true,
115  'details' => true, 'dir' => true, 'div' => true, 'dl' => true,
116  'dt' => true, 'embed' => true, 'fieldset' => true,
117  'figcaption' => true, 'figure' => true, 'footer' => true,
118  'form' => true, 'frame' => true, 'frameset' => true, 'h1' => true,
119  'h2' => true, 'h3' => true, 'h4' => true, 'h5' => true,
120  'h6' => true, 'head' => true, 'header' => true, 'hgroup' => true,
121  'hr' => true, 'html' => true, 'iframe' => true, 'img' => true,
122  'input' => true, 'isindex' => true, 'li' => true, 'link' => true,
123  'listing' => true, 'main' => true, 'marquee' => true,
124  'menu' => true, 'menuitem' => true, 'meta' => true, 'nav' => true,
125  'noembed' => true, 'noframes' => true, 'noscript' => true,
126  'object' => true, 'ol' => true, 'p' => true, 'param' => true,
127  'plaintext' => true, 'pre' => true, 'script' => true,
128  'section' => true, 'select' => true, 'source' => true,
129  'style' => true, 'summary' => true, 'table' => true,
130  'tbody' => true, 'td' => true, 'template' => true,
131  'textarea' => true, 'tfoot' => true, 'th' => true, 'thead' => true,
132  'title' => true, 'tr' => true, 'track' => true, 'ul' => true,
133  'wbr' => true, 'xmp' => true
134  ],
135  self::SVG_NAMESPACE => [
136  'foreignobject' => true, 'desc' => true, 'title' => true
137  ],
138  self::MATHML_NAMESPACE => [
139  'mi' => true, 'mo' => true, 'mn' => true, 'ms' => true,
140  'mtext' => true, 'annotation-xml' => true
141  ]
142  ];
143 
144  public static $addressDivPSet = [
145  self::HTML_NAMESPACE => [
146  'address' => true, 'div' => true, 'p' => true
147  ]
148  ];
149 
150  public static $tableSectionRowSet = [
151  self::HTML_NAMESPACE => [
152  'table' => true, 'thead' => true, 'tbody' => true,
153  'tfoot' => true, 'tr' => true
154  ]
155  ];
156 
157  public static $impliedEndTagsSet = [
158  self::HTML_NAMESPACE => [
159  'dd' => true, 'dt' => true, 'li' => true, 'optgroup' => true,
160  'option' => true, 'p' => true, 'rb' => true, 'rp' => true,
161  'rt' => true, 'rtc' => true
162  ]
163  ];
164 
165  public static $thoroughImpliedEndTagsSet = [
166  self::HTML_NAMESPACE => [
167  'caption' => true, 'colgroup' => true, 'dd' => true, 'dt' => true,
168  'li' => true, 'optgroup' => true, 'option' => true, 'p' => true,
169  'rb' => true, 'rp' => true, 'rt' => true, 'rtc' => true,
170  'tbody' => true, 'td' => true, 'tfoot' => true, 'th' => true,
171  'thead' => true, 'tr' => true
172  ]
173  ];
174 
175  public static $tableCellSet = [
176  self::HTML_NAMESPACE => [
177  'td' => true, 'th' => true
178  ]
179  ];
180  public static $tableContextSet = [
181  self::HTML_NAMESPACE => [
182  'table' => true, 'template' => true, 'html' => true
183  ]
184  ];
185 
186  public static $tableBodyContextSet = [
187  self::HTML_NAMESPACE => [
188  'tbody' => true, 'tfoot' => true, 'thead' => true,
189  'template' => true, 'html' => true
190  ]
191  ];
192 
193  public static $tableRowContextSet = [
194  self::HTML_NAMESPACE => [
195  'tr' => true, 'template' => true, 'html' => true
196  ]
197  ];
198 
199  // See https://html.spec.whatwg.org/multipage/forms.html#form-associated-element
200  public static $formAssociatedSet = [
201  self::HTML_NAMESPACE => [
202  'button' => true, 'fieldset' => true, 'input' => true,
203  'keygen' => true, 'object' => true, 'output' => true,
204  'select' => true, 'textarea' => true, 'img' => true
205  ]
206  ];
207 
208  public static $inScopeSet = [
209  self::HTML_NAMESPACE => [
210  'applet' => true, 'caption' => true, 'html' => true,
211  'marquee' => true, 'object' => true,
212  'table' => true, 'td' => true, 'template' => true,
213  'th' => true
214  ],
215  self::SVG_NAMESPACE => [
216  'foreignobject' => true, 'desc' => true, 'title' => true
217  ],
218  self::MATHML_NAMESPACE => [
219  'mi' => true, 'mo' => true, 'mn' => true, 'ms' => true,
220  'mtext' => true, 'annotation-xml' => true
221  ]
222  ];
223 
224  private static $inListItemScopeSet = null;
225  public static function inListItemScopeSet() {
226  if ( self::$inListItemScopeSet === null ) {
227  self::$inListItemScopeSet = self::$inScopeSet;
228  self::$inListItemScopeSet[self::HTML_NAMESPACE]['ol'] = true;
229  self::$inListItemScopeSet[self::HTML_NAMESPACE]['ul'] = true;
230  }
231  return self::$inListItemScopeSet;
232  }
233 
234  private static $inButtonScopeSet = null;
235  public static function inButtonScopeSet() {
236  if ( self::$inButtonScopeSet === null ) {
237  self::$inButtonScopeSet = self::$inScopeSet;
238  self::$inButtonScopeSet[self::HTML_NAMESPACE]['button'] = true;
239  }
240  return self::$inButtonScopeSet;
241  }
242 
243  public static $inTableScopeSet = [
244  self::HTML_NAMESPACE => [
245  'html' => true, 'table' => true, 'template' => true
246  ]
247  ];
248 
249  public static $inInvertedSelectScopeSet = [
250  self::HTML_NAMESPACE => [
251  'option' => true, 'optgroup' => true
252  ]
253  ];
254 
256  self::MATHML_NAMESPACE => [
257  'mi' => true, 'mo' => true, 'mn' => true, 'ms' => true,
258  'mtext' => true
259  ]
260  ];
261 
262  public static $htmlIntegrationPointSet = [
263  self::SVG_NAMESPACE => [
264  'foreignobject' => true,
265  'desc' => true,
266  'title' => true
267  ]
268  ];
269 
270  // For tidy compatibility.
271  public static $tidyPWrapSet = [
272  self::HTML_NAMESPACE => [
273  'body' => true, 'blockquote' => true,
274  // We parse with <body> as the fragment context, but the top-level
275  // element on the stack is actually <html>. We could use the
276  // "adjusted current node" everywhere to work around this, but it's
277  // easier just to add <html> to the p-wrap set.
278  'html' => true,
279  ],
280  ];
281  public static $tidyInlineSet = [
282  self::HTML_NAMESPACE => [
283  'a' => true, 'abbr' => true, 'acronym' => true, 'applet' => true,
284  'b' => true, 'basefont' => true, 'bdo' => true, 'big' => true,
285  'br' => true, 'button' => true, 'cite' => true, 'code' => true,
286  'dfn' => true, 'em' => true, 'font' => true, 'i' => true,
287  'iframe' => true, 'img' => true, 'input' => true, 'kbd' => true,
288  'label' => true, 'legend' => true, 'map' => true, 'object' => true,
289  'param' => true, 'q' => true, 'rb' => true, 'rbc' => true,
290  'rp' => true, 'rt' => true, 'rtc' => true, 'ruby' => true,
291  's' => true, 'samp' => true, 'select' => true, 'small' => true,
292  'span' => true, 'strike' => true, 'strong' => true, 'sub' => true,
293  'sup' => true, 'textarea' => true, 'tt' => true, 'u' => true,
294  'var' => true,
295  ],
296  ];
297 }
298 
321  public $localName;
326  public $attribs;
327 
333  public $parent;
334 
342  public $children;
343 
347  private $noahKey;
348 
353  public $nextAFE;
354 
359  public $prevAFE;
360 
364  public $nextNoah;
365 
375  $this->localName = $localName;
376  $this->namespaceURI = $namespaceURI;
377  $this->attribs = $attribs;
378  $this->contents = '';
379  $this->parent = null;
380  $this->children = [];
381  }
382 
387  private function removeChild( BalanceElement $elt ) {
388  Assert::precondition(
389  $this->parent !== 'flat', "Can't removeChild after flattening $this"
390  );
391  Assert::parameter(
392  $elt->parent === $this, 'elt', 'must have $this as a parent'
393  );
394  $idx = array_search( $elt, $this->children, true );
395  Assert::parameter( $idx !== false, '$elt', 'must be a child of $this' );
396  $elt->parent = null;
397  array_splice( $this->children, $idx, 1 );
398  }
399 
405  public function insertBefore( BalanceElement $a, $b ) {
406  Assert::precondition(
407  $this->parent !== 'flat', "Can't insertBefore after flattening."
408  );
409  $idx = array_search( $a, $this->children, true );
410  Assert::parameter( $idx !== false, '$a', 'must be a child of $this' );
411  if ( is_string( $b ) ) {
412  array_splice( $this->children, $idx, 0, [ $b ] );
413  } else {
414  Assert::parameter( $b->parent !== 'flat', '$b', "Can't be flat" );
415  if ( $b->parent !== null ) {
416  $b->parent->removeChild( $b );
417  }
418  array_splice( $this->children, $idx, 0, [ $b ] );
419  $b->parent = $this;
420  }
421  }
422 
427  public function appendChild( $elt ) {
428  Assert::precondition(
429  $this->parent !== 'flat', "Can't appendChild after flattening."
430  );
431  if ( is_string( $elt ) ) {
432  array_push( $this->children, $elt );
433  return;
434  }
435  // Remove $elt from parent, if it had one.
436  if ( $elt->parent !== null ) {
437  $elt->parent->removeChild( $elt );
438  }
439  array_push( $this->children, $elt );
440  $elt->parent = $this;
441  }
442 
447  public function adoptChildren( BalanceElement $elt ) {
448  Assert::precondition(
449  $elt->parent !== 'flat', "Can't adoptChildren after flattening."
450  );
451  foreach ( $elt->children as $child ) {
452  if ( !is_string( $child ) ) {
453  // This is an optimization which avoids an O(n^2) set of
454  // array_splice operations.
455  $child->parent = null;
456  }
457  $this->appendChild( $child );
458  }
459  $elt->children = [];
460  }
461 
472  public function flatten( array $config ) {
473  Assert::parameter( $this->parent !== null, '$this', 'must be a child' );
474  Assert::parameter( $this->parent !== 'flat', '$this', 'already flat' );
475  $idx = array_search( $this, $this->parent->children, true );
476  Assert::parameter(
477  $idx !== false, '$this', 'must be a child of its parent'
478  );
479  $tidyCompat = $config['tidyCompat'];
480  if ( $tidyCompat ) {
481  $blank = true;
482  foreach ( $this->children as $elt ) {
483  if ( !is_string( $elt ) ) {
484  $elt = $elt->flatten( $config );
485  }
486  if ( $blank && preg_match( '/[^\t\n\f\r ]/', $elt ) ) {
487  $blank = false;
488  }
489  }
490  if ( $this->isHtmlNamed( 'mw:p-wrap' ) ) {
491  $this->localName = 'p';
492  } elseif ( $blank ) {
493  // Add 'mw-empty-elt' class so elements can be hidden via CSS
494  // for compatibility with legacy tidy.
495  if ( !count( $this->attribs ) &&
496  ( $this->localName === 'tr' || $this->localName === 'li' )
497  ) {
498  $this->attribs = [ 'class' => "mw-empty-elt" ];
499  }
500  $blank = false;
501  }
502  $flat = $blank ? '' : "{$this}";
503  } else {
504  $flat = "{$this}";
505  }
506  $this->parent->children[$idx] = $flat;
507  $this->parent = 'flat'; // for assertion checking
508  return $flat;
509  }
510 
518  public function __toString() {
519  $encAttribs = '';
520  foreach ( $this->attribs as $name => $value ) {
521  $encValue = Sanitizer::encodeAttribute( $value );
522  $encAttribs .= " $name=\"$encValue\"";
523  }
524  if ( !$this->isA( BalanceSets::$emptyElementSet ) ) {
525  $out = "<{$this->localName}{$encAttribs}>";
526  $len = strlen( $out );
527  // flatten children
528  foreach ( $this->children as $elt ) {
529  $out .= "{$elt}";
530  }
531  $out .= "</{$this->localName}>";
532  if (
534  $out[$len] === "\n"
535  ) {
536  // Double the linefeed after pre/listing/textarea
537  // according to the HTML5 fragment serialization algorithm.
538  $out = substr( $out, 0, $len + 1 ) .
539  substr( $out, $len );
540  }
541  } else {
542  $out = "<{$this->localName}{$encAttribs} />";
543  Assert::invariant(
544  count( $this->children ) === 0,
545  "Empty elements shouldn't have children."
546  );
547  }
548  return $out;
549  }
550 
551  // Utility functions on BalanceElements.
552 
561  public function isA( $set ) {
562  if ( $set instanceof BalanceElement ) {
563  return $this === $set;
564  } elseif ( is_array( $set ) ) {
565  return isset( $set[$this->namespaceURI] ) &&
566  isset( $set[$this->namespaceURI][$this->localName] );
567  } else {
568  // assume this is an HTML element name.
569  return $this->isHtml() && $this->localName === $set;
570  }
571  }
572 
578  public function isHtmlNamed( $tagName ) {
579  return $this->namespaceURI === BalanceSets::HTML_NAMESPACE
580  && $this->localName === $tagName;
581  }
582 
588  public function isHtml() {
589  return $this->namespaceURI === BalanceSets::HTML_NAMESPACE;
590  }
591 
599  public function isMathmlTextIntegrationPoint() {
601  }
602 
610  public function isHtmlIntegrationPoint() {
611  if ( $this->isA( BalanceSets::$htmlIntegrationPointSet ) ) {
612  return true;
613  }
614  if (
615  $this->namespaceURI === BalanceSets::MATHML_NAMESPACE &&
616  $this->localName === 'annotation-xml' &&
617  isset( $this->attribs['encoding'] ) &&
618  ( strcasecmp( $this->attribs['encoding'], 'text/html' ) == 0 ||
619  strcasecmp( $this->attribs['encoding'], 'application/xhtml+xml' ) == 0 )
620  ) {
621  return true;
622  }
623  return false;
624  }
625 
629  public function getNoahKey() {
630  if ( $this->noahKey === null ) {
632  ksort( $attribs );
633  $this->noahKey = serialize( [ $this->namespaceURI, $this->localName, $attribs ] );
634  }
635  return $this->noahKey;
636  }
637 }
638 
654 class BalanceStack implements IteratorAggregate {
659  private $elements = [];
666  public $fosterParentMode = false;
672  private $config;
676  public $currentNode;
677 
683  public function __construct( array $config ) {
684  // always a root <html> element on the stack
685  array_push(
686  $this->elements,
688  );
689  $this->currentNode = $this->elements[0];
690  $this->config = $config;
691  }
692 
698  public function getOutput() {
699  // Don't include the outer '<html>....</html>'
700  $out = '';
701  foreach ( $this->elements[0]->children as $elt ) {
702  $out .= is_string( $elt ) ? $elt :
703  $elt->flatten( $this->config );
704  }
705  return $out;
706  }
707 
713  public function insertComment( $value ) {
714  // Just another type of text node, except for tidy p-wrapping.
715  return $this->insertText( '<!--' . $value . '-->', true );
716  }
717 
724  public function insertText( $value, $isComment = false ) {
725  if (
726  $this->fosterParentMode &&
727  $this->currentNode->isA( BalanceSets::$tableSectionRowSet )
728  ) {
729  $this->fosterParent( $value );
730  } elseif (
731  $this->config['tidyCompat'] && !$isComment &&
732  $this->currentNode->isA( BalanceSets::$tidyPWrapSet )
733  ) {
734  $this->insertHTMLElement( 'mw:p-wrap', [] );
735  return $this->insertText( $value );
736  } else {
737  $this->currentNode->appendChild( $value );
738  }
739  }
740 
750  public function insertForeignElement( $namespaceURI, $tag, $attribs ) {
751  return $this->insertElement(
752  new BalanceElement( $namespaceURI, $tag, $attribs )
753  );
754  }
755 
764  public function insertHTMLElement( $tag, $attribs ) {
765  return $this->insertForeignElement(
767  );
768  }
769 
777  public function insertElement( BalanceElement $elt ) {
778  if (
779  $this->currentNode->isHtmlNamed( 'mw:p-wrap' ) &&
781  ) {
782  // Tidy compatibility.
783  $this->pop();
784  }
785  if (
786  $this->fosterParentMode &&
787  $this->currentNode->isA( BalanceSets::$tableSectionRowSet )
788  ) {
789  $elt = $this->fosterParent( $elt );
790  } else {
791  $this->currentNode->appendChild( $elt );
792  }
793  Assert::invariant( $elt->parent !== null, "$elt must be in tree" );
794  Assert::invariant( $elt->parent !== 'flat', "$elt must not have been previous flattened" );
795  array_push( $this->elements, $elt );
796  $this->currentNode = $elt;
797  return $elt;
798  }
799 
806  public function inScope( $tag ) {
808  }
809 
816  public function inButtonScope( $tag ) {
818  }
819 
826  public function inListItemScope( $tag ) {
828  }
829 
836  public function inTableScope( $tag ) {
838  }
839 
846  public function inSelectScope( $tag ) {
847  // Can't use inSpecificScope to implement this, since it involves
848  // *inverting* a set of tags. Implement manually.
849  foreach ( $this as $elt ) {
850  if ( $elt->isA( $tag ) ) {
851  return true;
852  }
853  if ( !$elt->isA( BalanceSets::$inInvertedSelectScopeSet ) ) {
854  return false;
855  }
856  }
857  return false;
858  }
859 
867  public function inSpecificScope( $tag, $set ) {
868  foreach ( $this as $elt ) {
869  if ( $elt->isA( $tag ) ) {
870  return true;
871  }
872  if ( $elt->isA( $set ) ) {
873  return false;
874  }
875  }
876  return false;
877  }
878 
885  public function generateImpliedEndTags( $butnot = null, $thorough = false ) {
886  $endTagSet = $thorough ?
889  while ( $this->currentNode ) {
890  if ( $butnot !== null && $this->currentNode->isHtmlNamed( $butnot ) ) {
891  break;
892  }
893  if ( !$this->currentNode->isA( $endTagSet ) ) {
894  break;
895  }
896  $this->pop();
897  }
898  }
899 
903  public function adjustedCurrentNode( $fragmentContext ) {
904  return ( $fragmentContext && count( $this->elements ) === 1 ) ?
905  $fragmentContext : $this->currentNode;
906  }
907 
913  public function getIterator() {
914  return new ReverseArrayIterator( $this->elements );
915  }
916 
923  public function node( $idx ) {
924  return $this->elements[ $idx ];
925  }
926 
932  public function replaceAt( $idx, BalanceElement $elt ) {
933  Assert::precondition(
934  $this->elements[$idx]->parent !== 'flat',
935  'Replaced element should not have already been flattened.'
936  );
937  Assert::precondition(
938  $elt->parent !== 'flat',
939  'New element should not have already been flattened.'
940  );
941  $this->elements[$idx] = $elt;
942  if ( $idx === count( $this->elements ) - 1 ) {
943  $this->currentNode = $elt;
944  }
945  }
946 
953  public function indexOf( $tag ) {
954  for ( $i = count( $this->elements ) - 1; $i >= 0; $i-- ) {
955  if ( $this->elements[$i]->isA( $tag ) ) {
956  return $i;
957  }
958  }
959  return -1;
960  }
961 
966  public function length() {
967  return count( $this->elements );
968  }
969 
974  public function pop() {
975  $elt = array_pop( $this->elements );
976  if ( count( $this->elements ) ) {
977  $this->currentNode = $this->elements[ count( $this->elements ) - 1 ];
978  } else {
979  $this->currentNode = null;
980  }
981  if ( !$elt->isHtmlNamed( 'mw:p-wrap' ) ) {
982  $elt->flatten( $this->config );
983  }
984  }
985 
991  public function popTo( $idx ) {
992  for ( $length = count( $this->elements ); $length > $idx; $length-- ) {
993  $this->pop();
994  }
995  }
996 
1003  public function popTag( $tag ) {
1004  while ( $this->currentNode ) {
1005  if ( $this->currentNode->isA( $tag ) ) {
1006  $this->pop();
1007  break;
1008  }
1009  $this->pop();
1010  }
1011  }
1012 
1018  public function clearToContext( $set ) {
1019  // Note that we don't loop to 0. Never pop the <html> elt off.
1020  for ( $length = count( $this->elements ); $length > 1; $length-- ) {
1021  if ( $this->currentNode->isA( $set ) ) {
1022  break;
1023  }
1024  $this->pop();
1025  }
1026  }
1027 
1034  public function removeElement( BalanceElement $elt, $flatten = true ) {
1035  Assert::parameter(
1036  $elt->parent !== 'flat',
1037  '$elt',
1038  '$elt should not already have been flattened.'
1039  );
1040  Assert::parameter(
1041  $elt->parent->parent !== 'flat',
1042  '$elt',
1043  'The parent of $elt should not already have been flattened.'
1044  );
1045  $idx = array_search( $elt, $this->elements, true );
1046  Assert::parameter( $idx !== false, '$elt', 'must be in stack' );
1047  array_splice( $this->elements, $idx, 1 );
1048  if ( $idx === count( $this->elements ) ) {
1049  $this->currentNode = $this->elements[$idx - 1];
1050  }
1051  if ( $flatten ) {
1052  // serialize $elt into its parent
1053  // otherwise, it will eventually serialize when the parent
1054  // is serialized, we just hold onto the memory for its
1055  // tree of objects a little longer.
1056  $elt->flatten( $this->config );
1057  }
1058  Assert::postcondition(
1059  array_search( $elt, $this->elements, true ) === false,
1060  '$elt should no longer be in open elements stack'
1061  );
1062  }
1063 
1069  public function insertAfter( BalanceElement $a, BalanceElement $b ) {
1070  $idx = $this->indexOf( $a );
1071  Assert::parameter( $idx !== false, '$a', 'must be in stack' );
1072  if ( $idx === count( $this->elements ) - 1 ) {
1073  array_push( $this->elements, $b );
1074  $this->currentNode = $b;
1075  } else {
1076  array_splice( $this->elements, $idx + 1, 0, [ $b ] );
1077  }
1078  }
1079 
1080  // Fostering and adoption.
1081 
1089  private function fosterParent( $elt ) {
1090  $lastTable = $this->indexOf( 'table' );
1091  $lastTemplate = $this->indexOf( 'template' );
1092  $parent = null;
1093  $before = null;
1094 
1095  if ( $lastTemplate >= 0 && ( $lastTable < 0 || $lastTemplate > $lastTable ) ) {
1096  $parent = $this->elements[$lastTemplate];
1097  } elseif ( $lastTable >= 0 ) {
1098  $parent = $this->elements[$lastTable]->parent;
1099  // Assume all tables have parents, since we're not running scripts!
1100  Assert::invariant(
1101  $parent !== null, "All tables should have parents"
1102  );
1103  $before = $this->elements[$lastTable];
1104  } else {
1105  $parent = $this->elements[0]; // the `html` element.
1106  }
1107 
1108  if ( $this->config['tidyCompat'] ) {
1109  if ( is_string( $elt ) ) {
1110  // We're fostering text: do we need a p-wrapper?
1111  if ( $parent->isA( BalanceSets::$tidyPWrapSet ) ) {
1112  $this->insertHTMLElement( 'mw:p-wrap', [] );
1113  $this->insertText( $elt );
1114  return $elt;
1115  }
1116  } else {
1117  // We're fostering an element; do we need to merge p-wrappers?
1118  if ( $elt->isHtmlNamed( 'mw:p-wrap' ) ) {
1119  $idx = $before ?
1120  array_search( $before, $parent->children, true ) :
1121  count( $parent->children );
1122  $after = $idx > 0 ? $parent->children[$idx - 1] : '';
1123  if (
1124  $after instanceof BalanceElement &&
1125  $after->isHtmlNamed( 'mw:p-wrap' )
1126  ) {
1127  return $after; // Re-use existing p-wrapper.
1128  }
1129  }
1130  }
1131  }
1132 
1133  if ( $before ) {
1134  $parent->insertBefore( $before, $elt );
1135  } else {
1136  $parent->appendChild( $elt );
1137  }
1138  return $elt;
1139  }
1140 
1151  public function adoptionAgency( $tag, $afe ) {
1152  // If the current node is an HTML element whose tag name is subject,
1153  // and the current node is not in the list of active formatting
1154  // elements, then pop the current node off the stack of open
1155  // elements and abort these steps.
1156  if (
1157  $this->currentNode->isHtmlNamed( $tag ) &&
1158  !$afe->isInList( $this->currentNode )
1159  ) {
1160  $this->pop();
1161  return true; // no more handling required
1162  }
1163 
1164  // Outer loop: If outer loop counter is greater than or
1165  // equal to eight, then abort these steps.
1166  for ( $outer = 0; $outer < 8; $outer++ ) {
1167  // Let the formatting element be the last element in the list
1168  // of active formatting elements that: is between the end of
1169  // the list and the last scope marker in the list, if any, or
1170  // the start of the list otherwise, and has the same tag name
1171  // as the token.
1172  $fmtElt = $afe->findElementByTag( $tag );
1173 
1174  // If there is no such node, then abort these steps and instead
1175  // act as described in the "any other end tag" entry below.
1176  if ( !$fmtElt ) {
1177  return false; // false means handle by the default case
1178  }
1179 
1180  // Otherwise, if there is such a node, but that node is not in
1181  // the stack of open elements, then this is a parse error;
1182  // remove the element from the list, and abort these steps.
1183  $index = $this->indexOf( $fmtElt );
1184  if ( $index < 0 ) {
1185  $afe->remove( $fmtElt );
1186  return true; // true means no more handling required
1187  }
1188 
1189  // Otherwise, if there is such a node, and that node is also in
1190  // the stack of open elements, but the element is not in scope,
1191  // then this is a parse error; ignore the token, and abort
1192  // these steps.
1193  if ( !$this->inScope( $fmtElt ) ) {
1194  return true;
1195  }
1196 
1197  // Let the furthest block be the topmost node in the stack of
1198  // open elements that is lower in the stack than the formatting
1199  // element, and is an element in the special category. There
1200  // might not be one.
1201  $furthestBlock = null;
1202  $furthestBlockIndex = -1;
1203  $stackLength = $this->length();
1204  for ( $i = $index+1; $i < $stackLength; $i++ ) {
1205  if ( $this->node( $i )->isA( BalanceSets::$specialSet ) ) {
1206  $furthestBlock = $this->node( $i );
1207  $furthestBlockIndex = $i;
1208  break;
1209  }
1210  }
1211 
1212  // If there is no furthest block, then the UA must skip the
1213  // subsequent steps and instead just pop all the nodes from the
1214  // bottom of the stack of open elements, from the current node
1215  // up to and including the formatting element, and remove the
1216  // formatting element from the list of active formatting
1217  // elements.
1218  if ( !$furthestBlock ) {
1219  $this->popTag( $fmtElt );
1220  $afe->remove( $fmtElt );
1221  return true;
1222  }
1223 
1224  // Let the common ancestor be the element immediately above
1225  // the formatting element in the stack of open elements.
1226  $ancestor = $this->node( $index-1 );
1227 
1228  // Let a bookmark note the position of the formatting
1229  // element in the list of active formatting elements
1230  // relative to the elements on either side of it in the
1231  // list.
1232  $BOOKMARK = new BalanceElement( '[bookmark]', '[bookmark]', [] );
1233  $afe->insertAfter( $fmtElt, $BOOKMARK );
1234 
1235  // Let node and last node be the furthest block.
1236  $node = $furthestBlock;
1237  $lastNode = $furthestBlock;
1238  $nodeIndex = $furthestBlockIndex;
1239  $isAFE = false;
1240 
1241  // Inner loop
1242  for ( $inner = 1; true; $inner++ ) {
1243  // Let node be the element immediately above node in
1244  // the stack of open elements, or if node is no longer
1245  // in the stack of open elements (e.g. because it got
1246  // removed by this algorithm), the element that was
1247  // immediately above node in the stack of open elements
1248  // before node was removed.
1249  $node = $this->node( --$nodeIndex );
1250 
1251  // If node is the formatting element, then go
1252  // to the next step in the overall algorithm.
1253  if ( $node === $fmtElt ) break;
1254 
1255  // If the inner loop counter is greater than three and node
1256  // is in the list of active formatting elements, then remove
1257  // node from the list of active formatting elements.
1258  $isAFE = $afe->isInList( $node );
1259  if ( $inner > 3 && $isAFE ) {
1260  $afe->remove( $node );
1261  $isAFE = false;
1262  }
1263 
1264  // If node is not in the list of active formatting
1265  // elements, then remove node from the stack of open
1266  // elements and then go back to the step labeled inner
1267  // loop.
1268  if ( !$isAFE ) {
1269  // Don't flatten here, since we're about to relocate
1270  // parts of this $node.
1271  $this->removeElement( $node, false );
1272  continue;
1273  }
1274 
1275  // Create an element for the token for which the
1276  // element node was created with common ancestor as
1277  // the intended parent, replace the entry for node
1278  // in the list of active formatting elements with an
1279  // entry for the new element, replace the entry for
1280  // node in the stack of open elements with an entry for
1281  // the new element, and let node be the new element.
1282  $newElt = new BalanceElement(
1283  $node->namespaceURI, $node->localName, $node->attribs );
1284  $afe->replace( $node, $newElt );
1285  $this->replaceAt( $nodeIndex, $newElt );
1286  $node = $newElt;
1287 
1288  // If last node is the furthest block, then move the
1289  // aforementioned bookmark to be immediately after the
1290  // new node in the list of active formatting elements.
1291  if ( $lastNode === $furthestBlock ) {
1292  $afe->remove( $BOOKMARK );
1293  $afe->insertAfter( $newElt, $BOOKMARK );
1294  }
1295 
1296  // Insert last node into node, first removing it from
1297  // its previous parent node if any.
1298  $node->appendChild( $lastNode );
1299 
1300  // Let last node be node.
1301  $lastNode = $node;
1302  }
1303 
1304  // If the common ancestor node is a table, tbody, tfoot,
1305  // thead, or tr element, then, foster parent whatever last
1306  // node ended up being in the previous step, first removing
1307  // it from its previous parent node if any.
1308  if (
1309  $this->fosterParentMode &&
1310  $ancestor->isA( BalanceSets::$tableSectionRowSet )
1311  ) {
1312  $this->fosterParent( $lastNode );
1313  } else {
1314  // Otherwise, append whatever last node ended up being in
1315  // the previous step to the common ancestor node, first
1316  // removing it from its previous parent node if any.
1317  $ancestor->appendChild( $lastNode );
1318  }
1319 
1320  // Create an element for the token for which the
1321  // formatting element was created, with furthest block
1322  // as the intended parent.
1323  $newElt2 = new BalanceElement(
1324  $fmtElt->namespaceURI, $fmtElt->localName, $fmtElt->attribs );
1325 
1326  // Take all of the child nodes of the furthest block and
1327  // append them to the element created in the last step.
1328  $newElt2->adoptChildren( $furthestBlock );
1329 
1330  // Append that new element to the furthest block.
1331  $furthestBlock->appendChild( $newElt2 );
1332 
1333  // Remove the formatting element from the list of active
1334  // formatting elements, and insert the new element into the
1335  // list of active formatting elements at the position of
1336  // the aforementioned bookmark.
1337  $afe->remove( $fmtElt );
1338  $afe->replace( $BOOKMARK, $newElt2 );
1339 
1340  // Remove the formatting element from the stack of open
1341  // elements, and insert the new element into the stack of
1342  // open elements immediately below the position of the
1343  // furthest block in that stack.
1344  $this->removeElement( $fmtElt );
1345  $this->insertAfter( $furthestBlock, $newElt2 );
1346  }
1347 
1348  return true;
1349  }
1350 
1356  public function __toString() {
1357  $r = [];
1358  foreach ( $this->elements as $elt ) {
1359  array_push( $r, $elt->localName );
1360  }
1361  return implode( $r, ' ' );
1362  }
1363 }
1364 
1372  public $nextAFE;
1373  public $prevAFE;
1374 }
1375 
1387  private $tail;
1388 
1390  private $head;
1391 
1410  private $noahTableStack = [ [] ];
1411 
1412  public function __destruct() {
1413  for ( $node = $this->head; $node; $node = $next ) {
1414  $next = $node->nextAFE;
1415  $node->prevAFE = $node->nextAFE = $node->nextNoah = null;
1416  }
1417  $this->head = $this->tail = $this->noahTableStack = null;
1418  }
1419 
1420  public function insertMarker() {
1421  $elt = new BalanceMarker;
1422  if ( $this->tail ) {
1423  $this->tail->nextAFE = $elt;
1424  $elt->prevAFE = $this->tail;
1425  } else {
1426  $this->head = $elt;
1427  }
1428  $this->tail = $elt;
1429  $this->noahTableStack[] = [];
1430  }
1431 
1437  public function push( BalanceElement $elt ) {
1438  // Must not be in the list already
1439  if ( $elt->prevAFE !== null || $this->head === $elt ) {
1440  throw new ParameterAssertionException( '$elt',
1441  'Cannot insert a node into the AFE list twice' );
1442  }
1443 
1444  // "Noah's Ark clause" -- if there are already three copies of
1445  // this element before we encounter a marker, then drop the last
1446  // one.
1447  $noahKey = $elt->getNoahKey();
1448  $table =& $this->noahTableStack[ count( $this->noahTableStack ) - 1 ];
1449  if ( !isset( $table[$noahKey] ) ) {
1450  $table[$noahKey] = $elt;
1451  } else {
1452  $count = 1;
1453  $head = $tail = $table[$noahKey];
1454  while ( $tail->nextNoah ) {
1455  $tail = $tail->nextNoah;
1456  $count++;
1457  }
1458  if ( $count >= 3 ) {
1459  $this->remove( $head );
1460  }
1461  $tail->nextNoah = $elt;
1462  }
1463  // Add to the main AFE list
1464  if ( $this->tail ) {
1465  $this->tail->nextAFE = $elt;
1466  $elt->prevAFE = $this->tail;
1467  } else {
1468  $this->head = $elt;
1469  }
1470  $this->tail = $elt;
1471  }
1472 
1477  public function clearToMarker() {
1478  // Iterate back through the list starting from the tail
1479  $tail = $this->tail;
1480  while ( $tail && !( $tail instanceof BalanceMarker ) ) {
1481  // Unlink the element
1482  $prev = $tail->prevAFE;
1483  $tail->prevAFE = null;
1484  if ( $prev ) {
1485  $prev->nextAFE = null;
1486  }
1487  $tail->nextNoah = null;
1488  $tail = $prev;
1489  }
1490  // If we finished on a marker, unlink it and pop it off the Noah table stack
1491  if ( $tail ) {
1492  $prev = $tail->prevAFE;
1493  if ( $prev ) {
1494  $prev->nextAFE = null;
1495  }
1496  $tail = $prev;
1497  array_pop( $this->noahTableStack );
1498  } else {
1499  // No marker: wipe the top-level Noah table (which is the only one)
1500  $this->noahTableStack[0] = [];
1501  }
1502  // If we removed all the elements, clear the head pointer
1503  if ( !$tail ) {
1504  $this->head = null;
1505  }
1506  $this->tail = $tail;
1507  }
1508 
1514  public function findElementByTag( $tag ) {
1515  $elt = $this->tail;
1516  while ( $elt && !( $elt instanceof BalanceMarker ) ) {
1517  if ( $elt->localName === $tag ) {
1518  return $elt;
1519  }
1520  $elt = $elt->prevAFE;
1521  }
1522  return null;
1523  }
1524 
1530  public function isInList( BalanceElement $elt ) {
1531  return $this->head === $elt || $elt->prevAFE;
1532  }
1533 
1540  public function remove( BalanceElement $elt ) {
1541  if ( $this->head !== $elt && !$elt->prevAFE ) {
1542  throw new ParameterAssertionException( '$elt',
1543  "Attempted to remove an element which is not in the AFE list" );
1544  }
1545  // Update head and tail pointers
1546  if ( $this->head === $elt ) {
1547  $this->head = $elt->nextAFE;
1548  }
1549  if ( $this->tail === $elt ) {
1550  $this->tail = $elt->prevAFE;
1551  }
1552  // Update previous element
1553  if ( $elt->prevAFE ) {
1554  $elt->prevAFE->nextAFE = $elt->nextAFE;
1555  }
1556  // Update next element
1557  if ( $elt->nextAFE ) {
1558  $elt->nextAFE->prevAFE = $elt->prevAFE;
1559  }
1560  // Clear pointers so that isInList() etc. will work
1561  $elt->prevAFE = $elt->nextAFE = null;
1562  // Update Noah list
1563  $this->removeFromNoahList( $elt );
1564  }
1565 
1566  private function addToNoahList( BalanceElement $elt ) {
1567  $noahKey = $elt->getNoahKey();
1568  $table =& $this->noahTableStack[ count( $this->noahTableStack ) - 1 ];
1569  if ( !isset( $table[$noahKey] ) ) {
1570  $table[$noahKey] = $elt;
1571  } else {
1572  $tail = $table[$noahKey];
1573  while ( $tail->nextNoah ) {
1574  $tail = $tail->nextNoah;
1575  }
1576  $tail->nextNoah = $elt;
1577  }
1578  }
1579 
1580  private function removeFromNoahList( BalanceElement $elt ) {
1581  $table =& $this->noahTableStack[ count( $this->noahTableStack ) - 1 ];
1582  $key = $elt->getNoahKey();
1583  $noahElt = $table[$key];
1584  if ( $noahElt === $elt ) {
1585  if ( $noahElt->nextNoah ) {
1586  $table[$key] = $noahElt->nextNoah;
1587  $noahElt->nextNoah = null;
1588  } else {
1589  unset( $table[$key] );
1590  }
1591  } else {
1592  do {
1593  $prevNoahElt = $noahElt;
1594  $noahElt = $prevNoahElt->nextNoah;
1595  if ( $noahElt === $elt ) {
1596  // Found it, unlink
1597  $prevNoahElt->nextNoah = $elt->nextNoah;
1598  $elt->nextNoah = null;
1599  break;
1600  }
1601  } while ( $noahElt );
1602  }
1603  }
1604 
1611  public function replace( BalanceElement $a, BalanceElement $b ) {
1612  if ( $this->head !== $a && !$a->prevAFE ) {
1613  throw new ParameterAssertionException( '$a',
1614  "Attempted to replace an element which is not in the AFE list" );
1615  }
1616  // Update head and tail pointers
1617  if ( $this->head === $a ) {
1618  $this->head = $b;
1619  }
1620  if ( $this->tail === $a ) {
1621  $this->tail = $b;
1622  }
1623  // Update previous element
1624  if ( $a->prevAFE ) {
1625  $a->prevAFE->nextAFE = $b;
1626  }
1627  // Update next element
1628  if ( $a->nextAFE ) {
1629  $a->nextAFE->prevAFE = $b;
1630  }
1631  $b->prevAFE = $a->prevAFE;
1632  $b->nextAFE = $a->nextAFE;
1633  $a->nextAFE = $a->prevAFE = null;
1634  // Update Noah list
1635  $this->removeFromNoahList( $a );
1636  $this->addToNoahList( $b );
1637  }
1638 
1645  public function insertAfter( BalanceElement $a, BalanceElement $b ) {
1646  if ( $this->head !== $a && !$a->prevAFE ) {
1647  throw new ParameterAssertionException( '$a',
1648  "Attempted to insert after an element which is not in the AFE list" );
1649  }
1650  if ( $this->tail === $a ) {
1651  $this->tail = $b;
1652  }
1653  if ( $a->nextAFE ) {
1654  $a->nextAFE->prevAFE = $b;
1655  }
1656  $b->nextAFE = $a->nextAFE;
1657  $b->prevAFE = $a;
1658  $a->nextAFE = $b;
1659  $this->addToNoahList( $b );
1660  }
1661 
1662  // @codingStandardsIgnoreStart Generic.Files.LineLength.TooLong
1668  // @codingStandardsIgnoreEnd
1669  public function reconstruct( $stack ) {
1670  $entry = $this->tail;
1671  // If there are no entries in the list of active formatting elements,
1672  // then there is nothing to reconstruct
1673  if ( !$entry ) {
1674  return;
1675  }
1676  // If the last is a marker, do nothing.
1677  if ( $entry instanceof BalanceMarker ) {
1678  return;
1679  }
1680  // Or if it is an open element, do nothing.
1681  if ( $stack->indexOf( $entry ) >= 0 ) {
1682  return;
1683  }
1684 
1685  // Loop backward through the list until we find a marker or an
1686  // open element
1687  $foundIt = false;
1688  while ( $entry->prevAFE ) {
1689  $entry = $entry->prevAFE;
1690  if ( $entry instanceof BalanceMarker || $stack->indexOf( $entry ) >= 0 ) {
1691  $foundIt = true;
1692  break;
1693  }
1694  }
1695 
1696  // Now loop forward, starting from the element after the current one (or
1697  // the first element if we didn't find a marker or open element),
1698  // recreating formatting elements and pushing them back onto the list
1699  // of open elements.
1700  if ( $foundIt ) {
1701  $entry = $entry->nextAFE;
1702  }
1703  do {
1704  $newElement = $stack->insertHTMLElement(
1705  $entry->localName,
1706  $entry->attribs );
1707  $this->replace( $entry, $newElement );
1708  $entry = $newElement->nextAFE;
1709  } while ( $entry );
1710  }
1711 
1715  public function __toString() {
1716  $prev = null;
1717  $s = '';
1718  for ( $node = $this->head; $node; $prev = $node, $node = $node->nextAFE ) {
1719  if ( $node instanceof BalanceMarker ) {
1720  $s .= "MARKER\n";
1721  continue;
1722  }
1723  $s .= $node->localName . '#' . substr( md5( spl_object_hash( $node ) ), 0, 8 );
1724  if ( $node->nextNoah ) {
1725  $s .= " (noah sibling: {$node->nextNoah->localName}#" .
1726  substr( md5( spl_object_hash( $node->nextNoah ) ), 0, 8 ) .
1727  ')';
1728  }
1729  if ( $node->nextAFE && $node->nextAFE->prevAFE !== $node ) {
1730  $s .= " (reverse link is wrong!)";
1731  }
1732  $s .= "\n";
1733  }
1734  if ( $prev !== $this->tail ) {
1735  $s .= "(tail pointer is wrong!)\n";
1736  }
1737  return $s;
1738  }
1739 }
1740 
1792 class Balancer {
1793  private $parseMode;
1795  private $bitsIterator;
1798  private $afe;
1800  private $stack;
1801  private $strict;
1803  private $config;
1804 
1811  private $inRCDATA;
1812  private $inRAWTEXT;
1813 
1818 
1823  const VALID_COMMENT_REGEX = "~ !--
1824  ( # 1. Comment match detector
1825  > | -> | # Invalid short close
1826  ( # 2. Comment contents
1827  (?:
1828  (?! --> )
1829  (?! --!> )
1830  (?! --! \z )
1831  (?! -- \z )
1832  (?! - \z )
1833  .
1834  )*+
1835  )
1836  ( # 3. Comment close
1837  --> | # Normal close
1838  --!> | # Comment end bang
1839  ( # 4. Indicate matches requiring EOF
1840  --! | # EOF in comment end bang state
1841  -- | # EOF in comment end state
1842  - | # EOF in comment end dash state
1843  # EOF in comment state
1844  )
1845  )
1846  )
1847  ([^<]*) \z # 5. Non-tag text after the comment
1848  ~xs";
1849 
1874  public function __construct( array $config = [] ) {
1875  $this->config = $config = $config + [
1876  'strict' => false,
1877  'allowedHtmlElements' => null,
1878  'tidyCompat' => false,
1879  'allowComments' => true,
1880  ];
1881  $this->allowedHtmlElements = $config['allowedHtmlElements'];
1882  $this->strict = $config['strict'];
1883  $this->allowComments = $config['allowComments'];
1884  if ( $this->allowedHtmlElements !== null ) {
1885  // Sanity check!
1886  $bad = array_uintersect_assoc(
1887  $this->allowedHtmlElements,
1889  function( $a, $b ) {
1890  // Ignore the values (just intersect the keys) by saying
1891  // all values are equal to each other.
1892  return 0;
1893  }
1894  );
1895  if ( count( $bad ) > 0 ) {
1896  $badstr = implode( array_keys( $bad ), ',' );
1897  throw new ParameterAssertionException(
1898  '$config',
1899  'Balance attempted with sanitization including ' .
1900  "unsupported elements: {$badstr}"
1901  );
1902  }
1903  }
1904  }
1905 
1918  public function balance( $text, $processingCallback = null, $processingArgs = [] ) {
1919  $this->parseMode = 'inBodyMode';
1920  $this->bitsIterator = new ExplodeIterator( '<', $text );
1921  $this->afe = new BalanceActiveFormattingElements();
1922  $this->stack = new BalanceStack( $this->config );
1923  $this->processingCallback = $processingCallback;
1924  $this->processingArgs = $processingArgs;
1925 
1926  $this->textIntegrationMode =
1927  $this->ignoreLinefeed =
1928  $this->inRCDATA =
1929  $this->inRAWTEXT = false;
1930 
1931  // The stack is constructed with an <html> element already on it.
1932  // Set this up as a fragment parsed with <body> as the context.
1933  $this->fragmentContext =
1934  new BalanceElement( BalanceSets::HTML_NAMESPACE, 'body', [] );
1935  $this->resetInsertionMode();
1936  $this->formElementPointer = null;
1937  for ( $e = $this->fragmentContext; $e != null; $e = $e->parent ) {
1938  if ( $e->isHtmlNamed( 'form' ) ) {
1939  $this->formElementPointer = $e;
1940  break;
1941  }
1942  }
1943 
1944  // First element is text not tag
1945  $x = $this->bitsIterator->current();
1946  $this->bitsIterator->next();
1947  $this->insertToken( 'text', str_replace( '>', '&gt;', $x ) );
1948  // Now process each tag.
1949  while ( $this->bitsIterator->valid() ) {
1950  $this->advance();
1951  }
1952  $this->insertToken( 'eof', null );
1953  $result = $this->stack->getOutput();
1954  // Free memory before returning.
1955  $this->bitsIterator = null;
1956  $this->afe = null;
1957  $this->stack = null;
1958  $this->fragmentContext = null;
1959  $this->formElementPointer = null;
1960  return $result;
1961  }
1962 
1967  private function insertToken( $token, $value, $attribs = null, $selfClose = false ) {
1968  // validate tags against $unsupportedSet
1969  if ( $token === 'tag' || $token === 'endtag' ) {
1971  // As described in "simplifications" above, these tags are
1972  // not supported in the balancer.
1973  Assert::invariant(
1974  !$this->strict,
1975  "Unsupported $token <$value> found."
1976  );
1977  return false;
1978  }
1979  } elseif ( $token === 'text' && $value === '' ) {
1980  // Don't actually inject the empty string as a text token.
1981  return true;
1982  }
1983  // Support pre/listing/textarea by suppressing initial linefeed
1984  if ( $this->ignoreLinefeed ) {
1985  $this->ignoreLinefeed = false;
1986  if ( $token === 'text' ) {
1987  if ( $value[0] === "\n" ) {
1988  if ( $value === "\n" ) {
1989  // Nothing would be left, don't inject the empty string.
1990  return true;
1991  }
1992  $value = substr( $value, 1 );
1993  }
1994  }
1995  }
1996  // Some hoops we have to jump through
1997  $adjusted = $this->stack->adjustedCurrentNode( $this->fragmentContext );
1998 
1999  $isForeign = true;
2000  if (
2001  $this->stack->length() === 0 ||
2002  $adjusted->isHtml() ||
2003  $token === 'eof'
2004  ) {
2005  $isForeign = false;
2006  } elseif ( $adjusted->isMathmlTextIntegrationPoint() ) {
2007  if ( $token === 'text' ) {
2008  $isForeign = false;
2009  } elseif (
2010  $token === 'tag' &&
2011  $value !== 'mglyph' && $value !== 'malignmark'
2012  ) {
2013  $isForeign = false;
2014  }
2015  } elseif (
2016  $adjusted->namespaceURI === BalanceSets::MATHML_NAMESPACE &&
2017  $adjusted->localName === 'annotation-xml' &&
2018  $token === 'tag' && $value === 'svg'
2019  ) {
2020  $isForeign = false;
2021  } elseif (
2022  $adjusted->isHtmlIntegrationPoint() &&
2023  ( $token === 'tag' || $token === 'text' )
2024  ) {
2025  $isForeign = false;
2026  }
2027  if ( $isForeign ) {
2028  return $this->insertForeignToken( $token, $value, $attribs, $selfClose );
2029  } else {
2030  $func = $this->parseMode;
2031  return $this->$func( $token, $value, $attribs, $selfClose );
2032  }
2033  }
2034 
2035  private function insertForeignToken( $token, $value, $attribs = null, $selfClose = false ) {
2036  if ( $token === 'text' ) {
2037  $this->stack->insertText( $value );
2038  return true;
2039  } elseif ( $token === 'tag' ) {
2040  switch ( $value ) {
2041  case 'font':
2042  if ( isset( $attribs['color'] )
2043  || isset( $attribs['face'] )
2044  || isset( $attribs['size'] )
2045  ) {
2046  break;
2047  }
2048  // otherwise, fall through
2049  case 'b':
2050  case 'big':
2051  case 'blockquote':
2052  case 'body':
2053  case 'br':
2054  case 'center':
2055  case 'code':
2056  case 'dd':
2057  case 'div':
2058  case 'dl':
2059  case 'dt':
2060  case 'em':
2061  case 'embed':
2062  case 'h1':
2063  case 'h2':
2064  case 'h3':
2065  case 'h4':
2066  case 'h5':
2067  case 'h6':
2068  case 'head':
2069  case 'hr':
2070  case 'i':
2071  case 'img':
2072  case 'li':
2073  case 'listing':
2074  case 'menu':
2075  case 'meta':
2076  case 'nobr':
2077  case 'ol':
2078  case 'p':
2079  case 'pre':
2080  case 'ruby':
2081  case 's':
2082  case 'small':
2083  case 'span':
2084  case 'strong':
2085  case 'strike':
2086  case 'sub':
2087  case 'sup':
2088  case 'table':
2089  case 'tt':
2090  case 'u':
2091  case 'ul':
2092  case 'var':
2093  if ( $this->fragmentContext ) {
2094  break;
2095  }
2096  while ( true ) {
2097  $this->stack->pop();
2098  $node = $this->stack->currentNode;
2099  if (
2100  $node->isMathmlTextIntegrationPoint() ||
2101  $node->isHtmlIntegrationPoint() ||
2102  $node->isHtml()
2103  ) {
2104  break;
2105  }
2106  }
2107  return $this->insertToken( $token, $value, $attribs, $selfClose );
2108  }
2109  // "Any other start tag"
2110  $adjusted = ( $this->fragmentContext && $this->stack->length()===1 ) ?
2111  $this->fragmentContext : $this->stack->currentNode;
2112  $this->stack->insertForeignElement(
2113  $adjusted->namespaceURI, $value, $attribs
2114  );
2115  if ( $selfClose ) {
2116  $this->stack->pop();
2117  }
2118  return true;
2119  } elseif ( $token === 'endtag' ) {
2120  $first = true;
2121  foreach ( $this->stack as $i => $node ) {
2122  if ( $node->isHtml() && !$first ) {
2123  // process the end tag as HTML
2124  $func = $this->parseMode;
2125  return $this->$func( $token, $value, $attribs, $selfClose );
2126  } elseif ( $i === 0 ) {
2127  return true;
2128  } elseif ( $node->localName === $value ) {
2129  $this->stack->popTag( $node );
2130  return true;
2131  }
2132  $first = false;
2133  }
2134  }
2135  }
2136 
2141  private function advance() {
2142  $x = $this->bitsIterator->current();
2143  $this->bitsIterator->next();
2144  $regs = [];
2145  // Handle comments. These won't be generated by mediawiki (they
2146  // are stripped in the Sanitizer) but may be generated by extensions.
2147  if (
2148  $this->allowComments &&
2149  !( $this->inRCDATA || $this->inRAWTEXT ) &&
2150  preg_match( Balancer::VALID_COMMENT_REGEX, $x, $regs, PREG_OFFSET_CAPTURE ) &&
2151  // verify EOF condition where necessary
2152  ( $regs[4][1] < 0 || !$this->bitsIterator->valid() )
2153  ) {
2154  $contents = $regs[2][0];
2155  $rest = $regs[5][0];
2156  $this->insertToken( 'comment', $contents );
2157  $this->insertToken( 'text', str_replace( '>', '&gt;', $rest ) );
2158  return;
2159  }
2160  // $slash: Does the current element start with a '/'?
2161  // $t: Current element name
2162  // $attribStr: String between element name and >
2163  // $brace: Ending '>' or '/>'
2164  // $rest: Everything until the next element from the $bitsIterator
2165  if ( preg_match( Sanitizer::ELEMENT_BITS_REGEX, $x, $regs ) ) {
2166  list( /* $qbar */, $slash, $t, $attribStr, $brace, $rest ) = $regs;
2167  $t = strtolower( $t );
2168  if ( $this->strict ) {
2169  // Verify that attributes are all properly double-quoted
2170  Assert::invariant(
2171  preg_match(
2172  '/^( [:_A-Z0-9][-.:_A-Z0-9]*="[^"]*")*[ ]*$/i', $attribStr
2173  ),
2174  "Bad attribute string found"
2175  );
2176  }
2177  } else {
2178  Assert::invariant(
2179  !$this->strict, "< found which does not start a valid tag"
2180  );
2181  $slash = $t = $attribStr = $brace = $rest = null;
2182  }
2183  $goodTag = $t;
2184  if ( $this->inRCDATA ) {
2185  if ( $slash && $t === $this->inRCDATA ) {
2186  $this->inRCDATA = false;
2187  } else {
2188  // No tags allowed; this emulates the "rcdata" tokenizer mode.
2189  $goodTag = false;
2190  }
2191  }
2192  if ( $this->inRAWTEXT ) {
2193  if ( $slash && $t === $this->inRAWTEXT ) {
2194  $this->inRAWTEXT = false;
2195  } else {
2196  // No tags allowed, no entity-escaping done.
2197  $goodTag = false;
2198  }
2199  }
2200  $sanitize = $this->allowedHtmlElements !== null;
2201  if ( $sanitize ) {
2202  $goodTag = $t && isset( $this->allowedHtmlElements[$t] );
2203  }
2204  if ( $goodTag ) {
2205  if ( is_callable( $this->processingCallback ) ) {
2206  call_user_func_array( $this->processingCallback, [ &$attribStr, $this->processingArgs ] );
2207  }
2208  if ( $sanitize ) {
2209  $goodTag = Sanitizer::validateTag( $attribStr, $t );
2210  }
2211  }
2212  if ( $goodTag ) {
2213  if ( $sanitize ) {
2214  $attribs = Sanitizer::decodeTagAttributes( $attribStr );
2216  } else {
2217  $attribs = Sanitizer::decodeTagAttributes( $attribStr );
2218  }
2219  $goodTag = $this->insertToken(
2220  $slash ? 'endtag' : 'tag', $t, $attribs, $brace === '/>'
2221  );
2222  }
2223  if ( $goodTag ) {
2224  $rest = str_replace( '>', '&gt;', $rest );
2225  $this->insertToken( 'text', str_replace( '>', '&gt;', $rest ) );
2226  } elseif ( $this->inRAWTEXT ) {
2227  $this->insertToken( 'text', "<$x" );
2228  } else {
2229  // bad tag; serialize entire thing as text.
2230  $this->insertToken( 'text', '&lt;' . str_replace( '>', '&gt;', $x ) );
2231  }
2232  }
2233 
2234  private function switchMode( $mode ) {
2235  Assert::parameter(
2236  substr( $mode, -4 )==='Mode', '$mode', 'should end in Mode'
2237  );
2238  $oldMode = $this->parseMode;
2239  $this->parseMode = $mode;
2240  return $oldMode;
2241  }
2242 
2243  private function switchModeAndReprocess( $mode, $token, $value, $attribs, $selfClose ) {
2244  $this->switchMode( $mode );
2245  return $this->insertToken( $token, $value, $attribs, $selfClose );
2246  }
2247 
2248  private function resetInsertionMode() {
2249  $last = false;
2250  foreach ( $this->stack as $i => $node ) {
2251  if ( $i === 0 ) {
2252  $last = true;
2253  if ( $this->fragmentContext ) {
2254  $node = $this->fragmentContext;
2255  }
2256  }
2257  if ( $node->isHtml() ) {
2258  switch ( $node->localName ) {
2259  case 'select':
2260  $stackLength = $this->stack->length();
2261  for ( $j = $i + 1; $j < $stackLength-1; $j++ ) {
2262  $ancestor = $this->stack->node( $stackLength-$j-1 );
2263  if ( $ancestor->isHtmlNamed( 'template' ) ) {
2264  break;
2265  }
2266  if ( $ancestor->isHtmlNamed( 'table' ) ) {
2267  $this->switchMode( 'inSelectInTableMode' );
2268  return;
2269  }
2270  }
2271  $this->switchMode( 'inSelectMode' );
2272  return;
2273  case 'tr':
2274  $this->switchMode( 'inRowMode' );
2275  return;
2276  case 'tbody':
2277  case 'tfoot':
2278  case 'thead':
2279  $this->switchMode( 'inTableBodyMode' );
2280  return;
2281  case 'caption':
2282  $this->switchMode( 'inCaptionMode' );
2283  return;
2284  case 'colgroup':
2285  $this->switchMode( 'inColumnGroupMode' );
2286  return;
2287  case 'table':
2288  $this->switchMode( 'inTableMode' );
2289  return;
2290  case 'template':
2291  $this->switchMode(
2292  array_slice( $this->templateInsertionModes, -1 )[0]
2293  );
2294  return;
2295  case 'body':
2296  $this->switchMode( 'inBodyMode' );
2297  return;
2298  // OMITTED: <frameset>
2299  // OMITTED: <html>
2300  // OMITTED: <head>
2301  default:
2302  if ( !$last ) {
2303  // OMITTED: <head>
2304  if ( $node->isA( BalanceSets::$tableCellSet ) ) {
2305  $this->switchMode( 'inCellMode' );
2306  return;
2307  }
2308  }
2309  }
2310  }
2311  if ( $last ) {
2312  $this->switchMode( 'inBodyMode' );
2313  return;
2314  }
2315  }
2316  }
2317 
2318  private function stopParsing() {
2319  // Most of the spec methods are inapplicable, other than step 2:
2320  // "pop all the nodes off the stack of open elements".
2321  // We're going to keep the top-most <html> element on the stack, though.
2322 
2323  // Clear the AFE list first, otherwise the element objects will stay live
2324  // during serialization, potentially using O(N^2) memory. Note that
2325  // popping the stack will never result in reconstructing the active
2326  // formatting elements.
2327  $this->afe = null;
2328  $this->stack->popTo( 1 );
2329  }
2330 
2331  private function parseRawText( $value, $attribs = null ) {
2332  $this->stack->insertHTMLElement( $value, $attribs );
2333  $this->inRAWTEXT = $value;
2334  $this->originalInsertionMode = $this->switchMode( 'inTextMode' );
2335  return true;
2336  }
2337 
2338  private function inTextMode( $token, $value, $attribs = null, $selfClose = false ) {
2339  if ( $token === 'text' ) {
2340  $this->stack->insertText( $value );
2341  return true;
2342  } elseif ( $token === 'eof' ) {
2343  $this->stack->pop();
2344  return $this->switchModeAndReprocess(
2345  $this->originalInsertionMode, $token, $value, $attribs, $selfClose
2346  );
2347  } elseif ( $token === 'endtag' ) {
2348  $this->stack->pop();
2349  $this->switchMode( $this->originalInsertionMode );
2350  return true;
2351  }
2352  return true;
2353  }
2354 
2355  private function inHeadMode( $token, $value, $attribs = null, $selfClose = false ) {
2356  if ( $token === 'text' ) {
2357  if ( preg_match( '/^[\x09\x0A\x0C\x0D\x20]+/', $value, $matches ) ) {
2358  $this->stack->insertText( $matches[0] );
2359  $value = substr( $value, strlen( $matches[0] ) );
2360  }
2361  if ( strlen( $value ) === 0 ) {
2362  return true; // All text handled.
2363  }
2364  // Fall through to handle non-whitespace below.
2365  } elseif ( $token === 'tag' ) {
2366  switch ( $value ) {
2367  case 'meta':
2368  // OMITTED: in a full HTML parser, this might change the encoding.
2369  // falls through
2370  // OMITTED: <html>
2371  case 'base':
2372  case 'basefont':
2373  case 'bgsound':
2374  case 'link':
2375  $this->stack->insertHTMLElement( $value, $attribs );
2376  $this->stack->pop();
2377  return true;
2378  // OMITTED: <title>
2379  // OMITTED: <noscript>
2380  case 'noframes':
2381  case 'style':
2382  return $this->parseRawText( $value, $attribs );
2383  // OMITTED: <script>
2384  case 'template':
2385  $this->stack->insertHTMLElement( $value, $attribs );
2386  $this->afe->insertMarker();
2387  // OMITTED: frameset_ok
2388  $this->switchMode( 'inTemplateMode' );
2389  $this->templateInsertionModes[] = $this->parseMode;
2390  return true;
2391  // OMITTED: <head>
2392  }
2393  } elseif ( $token === 'endtag' ) {
2394  switch ( $value ) {
2395  // OMITTED: <head>
2396  // OMITTED: <body>
2397  // OMITTED: <html>
2398  case 'br':
2399  break; // handle at the bottom of the function
2400  case 'template':
2401  if ( $this->stack->indexOf( $value ) < 0 ) {
2402  return true; // Ignore the token.
2403  }
2404  $this->stack->generateImpliedEndTags( null, true /* thorough */ );
2405  $this->stack->popTag( $value );
2406  $this->afe->clearToMarker();
2407  array_pop( $this->templateInsertionModes );
2408  $this->resetInsertionMode();
2409  return true;
2410  default:
2411  // ignore any other end tag
2412  return true;
2413  }
2414  } elseif ( $token === 'comment' ) {
2415  $this->stack->insertComment( $value );
2416  return true;
2417  }
2418 
2419  // If not handled above
2420  $this->inHeadMode( 'endtag', 'head' ); // synthetic </head>
2421  // Then redo this one
2422  return $this->insertToken( $token, $value, $attribs, $selfClose );
2423  }
2424 
2425  private function inBodyMode( $token, $value, $attribs = null, $selfClose = false ) {
2426  if ( $token === 'text' ) {
2427  $this->afe->reconstruct( $this->stack );
2428  $this->stack->insertText( $value );
2429  return true;
2430  } elseif ( $token === 'eof' ) {
2431  if ( !empty( $this->templateInsertionModes ) ) {
2432  return $this->inTemplateMode( $token, $value, $attribs, $selfClose );
2433  }
2434  $this->stopParsing();
2435  return true;
2436  } elseif ( $token === 'tag' ) {
2437  switch ( $value ) {
2438  // OMITTED: <html>
2439  case 'base':
2440  case 'basefont':
2441  case 'bgsound':
2442  case 'link':
2443  case 'meta':
2444  case 'noframes':
2445  // OMITTED: <script>
2446  case 'style':
2447  case 'template':
2448  // OMITTED: <title>
2449  return $this->inHeadMode( $token, $value, $attribs, $selfClose );
2450  // OMITTED: <body>
2451  // OMITTED: <frameset>
2452 
2453  case 'address':
2454  case 'article':
2455  case 'aside':
2456  case 'blockquote':
2457  case 'center':
2458  case 'details':
2459  case 'dialog':
2460  case 'dir':
2461  case 'div':
2462  case 'dl':
2463  case 'fieldset':
2464  case 'figcaption':
2465  case 'figure':
2466  case 'footer':
2467  case 'header':
2468  case 'hgroup':
2469  case 'main':
2470  case 'menu':
2471  case 'nav':
2472  case 'ol':
2473  case 'p':
2474  case 'section':
2475  case 'summary':
2476  case 'ul':
2477  if ( $this->stack->inButtonScope( 'p' ) ) {
2478  $this->inBodyMode( 'endtag', 'p' );
2479  }
2480  $this->stack->insertHTMLElement( $value, $attribs );
2481  return true;
2482 
2483  case 'h1':
2484  case 'h2':
2485  case 'h3':
2486  case 'h4':
2487  case 'h5':
2488  case 'h6':
2489  if ( $this->stack->inButtonScope( 'p' ) ) {
2490  $this->inBodyMode( 'endtag', 'p' );
2491  }
2492  if ( $this->stack->currentNode->isA( BalanceSets::$headingSet ) ) {
2493  $this->stack->pop();
2494  }
2495  $this->stack->insertHTMLElement( $value, $attribs );
2496  return true;
2497 
2498  case 'pre':
2499  case 'listing':
2500  if ( $this->stack->inButtonScope( 'p' ) ) {
2501  $this->inBodyMode( 'endtag', 'p' );
2502  }
2503  $this->stack->insertHTMLElement( $value, $attribs );
2504  $this->ignoreLinefeed = true;
2505  // OMITTED: frameset_ok
2506  return true;
2507 
2508  case 'form':
2509  if (
2510  $this->formElementPointer &&
2511  $this->stack->indexOf( 'template' ) < 0
2512  ) {
2513  return true; // in a form, not in a template.
2514  }
2515  if ( $this->stack->inButtonScope( "p" ) ) {
2516  $this->inBodyMode( 'endtag', 'p' );
2517  }
2518  $elt = $this->stack->insertHTMLElement( $value, $attribs );
2519  if ( $this->stack->indexOf( 'template' ) < 0 ) {
2520  $this->formElementPointer = $elt;
2521  }
2522  return true;
2523 
2524  case 'li':
2525  // OMITTED: frameset_ok
2526  foreach ( $this->stack as $node ) {
2527  if ( $node->isHtmlNamed( 'li' ) ) {
2528  $this->inBodyMode( 'endtag', 'li' );
2529  break;
2530  }
2531  if (
2532  $node->isA( BalanceSets::$specialSet ) &&
2533  !$node->isA( BalanceSets::$addressDivPSet )
2534  ) {
2535  break;
2536  }
2537  }
2538  if ( $this->stack->inButtonScope( 'p' ) ) {
2539  $this->inBodyMode( 'endtag', 'p' );
2540  }
2541  $this->stack->insertHTMLElement( $value, $attribs );
2542  return true;
2543 
2544  case 'dd':
2545  case 'dt':
2546  // OMITTED: frameset_ok
2547  foreach ( $this->stack as $node ) {
2548  if ( $node->isHtmlNamed( 'dd' ) ) {
2549  $this->inBodyMode( 'endtag', 'dd' );
2550  break;
2551  }
2552  if ( $node->isHtmlNamed( 'dt' ) ) {
2553  $this->inBodyMode( 'endtag', 'dt' );
2554  break;
2555  }
2556  if (
2557  $node->isA( BalanceSets::$specialSet ) &&
2558  !$node->isA( BalanceSets::$addressDivPSet )
2559  ) {
2560  break;
2561  }
2562  }
2563  if ( $this->stack->inButtonScope( 'p' ) ) {
2564  $this->inBodyMode( 'endtag', 'p' );
2565  }
2566  $this->stack->insertHTMLElement( $value, $attribs );
2567  return true;
2568 
2569  // OMITTED: <plaintext>
2570 
2571  case 'button':
2572  if ( $this->stack->inScope( 'button' ) ) {
2573  $this->inBodyMode( 'endtag', 'button' );
2574  return $this->insertToken( $token, $value, $attribs, $selfClose );
2575  }
2576  $this->afe->reconstruct( $this->stack );
2577  $this->stack->insertHTMLElement( $value, $attribs );
2578  return true;
2579 
2580  case 'a':
2581  $activeElement = $this->afe->findElementByTag( 'a' );
2582  if ( $activeElement ) {
2583  $this->inBodyMode( 'endtag', 'a' );
2584  if ( $this->afe->isInList( $activeElement ) ) {
2585  $this->afe->remove( $activeElement );
2586  // Don't flatten here, since when we fall
2587  // through below we might foster parent
2588  // the new <a> tag inside this one.
2589  $this->stack->removeElement( $activeElement, false );
2590  }
2591  }
2592  // Falls through
2593  case 'b':
2594  case 'big':
2595  case 'code':
2596  case 'em':
2597  case 'font':
2598  case 'i':
2599  case 's':
2600  case 'small':
2601  case 'strike':
2602  case 'strong':
2603  case 'tt':
2604  case 'u':
2605  $this->afe->reconstruct( $this->stack );
2606  $this->afe->push( $this->stack->insertHTMLElement( $value, $attribs ) );
2607  return true;
2608 
2609  case 'nobr':
2610  $this->afe->reconstruct( $this->stack );
2611  if ( $this->stack->inScope( 'nobr' ) ) {
2612  $this->inBodyMode( 'endtag', 'nobr' );
2613  $this->afe->reconstruct( $this->stack );
2614  }
2615  $this->afe->push( $this->stack->insertHTMLElement( $value, $attribs ) );
2616  return true;
2617 
2618  case 'applet':
2619  case 'marquee':
2620  case 'object':
2621  $this->afe->reconstruct( $this->stack );
2622  $this->stack->insertHTMLElement( $value, $attribs );
2623  $this->afe->insertMarker();
2624  // OMITTED: frameset_ok
2625  return true;
2626 
2627  case 'table':
2628  // The document is never in "quirks mode"; see simplifications
2629  // above.
2630  if ( $this->stack->inButtonScope( 'p' ) ) {
2631  $this->inBodyMode( 'endtag', 'p' );
2632  }
2633  $this->stack->insertHTMLElement( $value, $attribs );
2634  // OMITTED: frameset_ok
2635  $this->switchMode( 'inTableMode' );
2636  return true;
2637 
2638  case 'area':
2639  case 'br':
2640  case 'embed':
2641  case 'img':
2642  case 'keygen':
2643  case 'wbr':
2644  $this->afe->reconstruct( $this->stack );
2645  $this->stack->insertHTMLElement( $value, $attribs );
2646  $this->stack->pop();
2647  // OMITTED: frameset_ok
2648  return true;
2649 
2650  case 'input':
2651  $this->afe->reconstruct( $this->stack );
2652  $this->stack->insertHTMLElement( $value, $attribs );
2653  $this->stack->pop();
2654  // OMITTED: frameset_ok
2655  // (hence we don't need to examine the tag's "type" attribute)
2656  return true;
2657 
2658  case 'menuitem':
2659  case 'param':
2660  case 'source':
2661  case 'track':
2662  $this->stack->insertHTMLElement( $value, $attribs );
2663  $this->stack->pop();
2664  return true;
2665 
2666  case 'hr':
2667  if ( $this->stack->inButtonScope( 'p' ) ) {
2668  $this->inBodyMode( 'endtag', 'p' );
2669  }
2670  $this->stack->insertHTMLElement( $value, $attribs );
2671  $this->stack->pop();
2672  return true;
2673 
2674  case 'image':
2675  // warts!
2676  return $this->inBodyMode( $token, 'img', $attribs, $selfClose );
2677 
2678  // OMITTED: <isindex>
2679 
2680  case 'textarea':
2681  $this->stack->insertHTMLElement( $value, $attribs );
2682  $this->ignoreLinefeed = true;
2683  $this->inRCDATA = $value; // emulate rcdata tokenizer mode
2684  // OMITTED: frameset_ok
2685  return true;
2686 
2687  // OMITTED: <xmp>
2688  // OMITTED: <iframe>
2689  // OMITTED: <noembed>
2690  // OMITTED: <noscript>
2691 
2692  case 'select':
2693  $this->afe->reconstruct( $this->stack );
2694  $this->stack->insertHTMLElement( $value, $attribs );
2695  switch ( $this->parseMode ) {
2696  case 'inTableMode':
2697  case 'inCaptionMode':
2698  case 'inTableBodyMode':
2699  case 'inRowMode':
2700  case 'inCellMode':
2701  $this->switchMode( 'inSelectInTableMode' );
2702  return true;
2703  default:
2704  $this->switchMode( 'inSelectMode' );
2705  return true;
2706  }
2707 
2708  case 'optgroup':
2709  case 'option':
2710  if ( $this->stack->currentNode->isHtmlNamed( 'option' ) ) {
2711  $this->inBodyMode( 'endtag', 'option' );
2712  }
2713  $this->afe->reconstruct( $this->stack );
2714  $this->stack->insertHTMLElement( $value, $attribs );
2715  return true;
2716 
2717  case 'rb':
2718  case 'rtc':
2719  if ( $this->stack->inScope( 'ruby' ) ) {
2720  $this->stack->generateImpliedEndTags();
2721  }
2722  $this->stack->insertHTMLElement( $value, $attribs );
2723  return true;
2724 
2725  case 'rp':
2726  case 'rt':
2727  if ( $this->stack->inScope( 'ruby' ) ) {
2728  $this->stack->generateImpliedEndTags( 'rtc' );
2729  }
2730  $this->stack->insertHTMLElement( $value, $attribs );
2731  return true;
2732 
2733  case 'math':
2734  $this->afe->reconstruct( $this->stack );
2735  // We skip the spec's "adjust MathML attributes" and
2736  // "adjust foreign attributes" steps, since the browser will
2737  // do this later when it parses the output and it doesn't affect
2738  // balancing.
2739  $this->stack->insertForeignElement(
2741  );
2742  if ( $selfClose ) {
2743  // emit explicit </math> tag.
2744  $this->stack->pop();
2745  }
2746  return true;
2747 
2748  case 'svg':
2749  $this->afe->reconstruct( $this->stack );
2750  // We skip the spec's "adjust SVG attributes" and
2751  // "adjust foreign attributes" steps, since the browser will
2752  // do this later when it parses the output and it doesn't affect
2753  // balancing.
2754  $this->stack->insertForeignElement(
2756  );
2757  if ( $selfClose ) {
2758  // emit explicit </svg> tag.
2759  $this->stack->pop();
2760  }
2761  return true;
2762 
2763  case 'caption':
2764  case 'col':
2765  case 'colgroup':
2766  // OMITTED: <frame>
2767  case 'head':
2768  case 'tbody':
2769  case 'td':
2770  case 'tfoot':
2771  case 'th':
2772  case 'thead':
2773  case 'tr':
2774  // Ignore table tags if we're not inTableMode
2775  return true;
2776  }
2777 
2778  // Handle any other start tag here
2779  $this->afe->reconstruct( $this->stack );
2780  $this->stack->insertHTMLElement( $value, $attribs );
2781  return true;
2782  } elseif ( $token === 'endtag' ) {
2783  switch ( $value ) {
2784  // </body>,</html> are unsupported.
2785 
2786  case 'template':
2787  return $this->inHeadMode( $token, $value, $attribs, $selfClose );
2788 
2789  case 'address':
2790  case 'article':
2791  case 'aside':
2792  case 'blockquote':
2793  case 'button':
2794  case 'center':
2795  case 'details':
2796  case 'dialog':
2797  case 'dir':
2798  case 'div':
2799  case 'dl':
2800  case 'fieldset':
2801  case 'figcaption':
2802  case 'figure':
2803  case 'footer':
2804  case 'header':
2805  case 'hgroup':
2806  case 'listing':
2807  case 'main':
2808  case 'menu':
2809  case 'nav':
2810  case 'ol':
2811  case 'pre':
2812  case 'section':
2813  case 'summary':
2814  case 'ul':
2815  // Ignore if there is not a matching open tag
2816  if ( !$this->stack->inScope( $value ) ) {
2817  return true;
2818  }
2819  $this->stack->generateImpliedEndTags();
2820  $this->stack->popTag( $value );
2821  return true;
2822 
2823  case 'form':
2824  if ( $this->stack->indexOf( 'template' ) < 0 ) {
2825  $openform = $this->formElementPointer;
2826  $this->formElementPointer = null;
2827  if ( !$openform || !$this->stack->inScope( $openform ) ) {
2828  return true;
2829  }
2830  $this->stack->generateImpliedEndTags();
2831  // Don't flatten yet if we're removing a <form> element
2832  // out-of-order. (eg. `<form><div></form>`)
2833  $flatten = ( $this->stack->currentNode === $openform );
2834  $this->stack->removeElement( $openform, $flatten );
2835  } else {
2836  if ( !$this->stack->inScope( 'form' ) ) {
2837  return true;
2838  }
2839  $this->stack->generateImpliedEndTags();
2840  $this->stack->popTag( 'form' );
2841  }
2842  return true;
2843 
2844  case 'p':
2845  if ( !$this->stack->inButtonScope( 'p' ) ) {
2846  $this->inBodyMode( 'tag', 'p', [] );
2847  return $this->insertToken( $token, $value, $attribs, $selfClose );
2848  }
2849  $this->stack->generateImpliedEndTags( $value );
2850  $this->stack->popTag( $value );
2851  return true;
2852 
2853  case 'li':
2854  if ( !$this->stack->inListItemScope( $value ) ) {
2855  return true; // ignore
2856  }
2857  $this->stack->generateImpliedEndTags( $value );
2858  $this->stack->popTag( $value );
2859  return true;
2860 
2861  case 'dd':
2862  case 'dt':
2863  if ( !$this->stack->inScope( $value ) ) {
2864  return true; // ignore
2865  }
2866  $this->stack->generateImpliedEndTags( $value );
2867  $this->stack->popTag( $value );
2868  return true;
2869 
2870  case 'h1':
2871  case 'h2':
2872  case 'h3':
2873  case 'h4':
2874  case 'h5':
2875  case 'h6':
2876  if ( !$this->stack->inScope( BalanceSets::$headingSet ) ) {
2877  return true; // ignore
2878  }
2879  $this->stack->generateImpliedEndTags();
2880  $this->stack->popTag( BalanceSets::$headingSet );
2881  return true;
2882 
2883  case 'sarcasm':
2884  // Take a deep breath, then:
2885  break;
2886 
2887  case 'a':
2888  case 'b':
2889  case 'big':
2890  case 'code':
2891  case 'em':
2892  case 'font':
2893  case 'i':
2894  case 'nobr':
2895  case 's':
2896  case 'small':
2897  case 'strike':
2898  case 'strong':
2899  case 'tt':
2900  case 'u':
2901  if ( $this->stack->adoptionAgency( $value, $this->afe ) ) {
2902  return true; // If we did something, we're done.
2903  }
2904  break; // Go to the "any other end tag" case.
2905 
2906  case 'applet':
2907  case 'marquee':
2908  case 'object':
2909  if ( !$this->stack->inScope( $value ) ) {
2910  return true; // ignore
2911  }
2912  $this->stack->generateImpliedEndTags();
2913  $this->stack->popTag( $value );
2914  $this->afe->clearToMarker();
2915  return true;
2916 
2917  case 'br':
2918  // Turn </br> into <br>
2919  return $this->inBodyMode( 'tag', $value, [] );
2920  }
2921 
2922  // Any other end tag goes here
2923  foreach ( $this->stack as $i => $node ) {
2924  if ( $node->isHtmlNamed( $value ) ) {
2925  $this->stack->generateImpliedEndTags( $value );
2926  $this->stack->popTo( $i ); // including $i
2927  break;
2928  } elseif ( $node->isA( BalanceSets::$specialSet ) ) {
2929  return true; // ignore this close token.
2930  }
2931  }
2932  return true;
2933  } elseif ( $token === 'comment' ) {
2934  $this->stack->insertComment( $value );
2935  return true;
2936  } else {
2937  Assert::invariant( false, "Bad token type: $token" );
2938  }
2939  }
2940 
2941  private function inTableMode( $token, $value, $attribs = null, $selfClose = false ) {
2942  if ( $token === 'text' ) {
2943  if ( $this->textIntegrationMode ) {
2944  return $this->inBodyMode( $token, $value, $attribs, $selfClose );
2945  } elseif ( $this->stack->currentNode->isA( BalanceSets::$tableSectionRowSet ) ) {
2946  $this->pendingTableText = '';
2947  $this->originalInsertionMode = $this->parseMode;
2948  return $this->switchModeAndReprocess( 'inTableTextMode',
2949  $token, $value, $attribs, $selfClose );
2950  }
2951  // fall through to default case.
2952  } elseif ( $token === 'eof' ) {
2953  $this->stopParsing();
2954  return true;
2955  } elseif ( $token === 'tag' ) {
2956  switch ( $value ) {
2957  case 'caption':
2958  $this->afe->insertMarker();
2959  $this->stack->insertHTMLElement( $value, $attribs );
2960  $this->switchMode( 'inCaptionMode' );
2961  return true;
2962  case 'colgroup':
2963  $this->stack->clearToContext( BalanceSets::$tableContextSet );
2964  $this->stack->insertHTMLElement( $value, $attribs );
2965  $this->switchMode( 'inColumnGroupMode' );
2966  return true;
2967  case 'col':
2968  $this->inTableMode( 'tag', 'colgroup', [] );
2969  return $this->insertToken( $token, $value, $attribs, $selfClose );
2970  case 'tbody':
2971  case 'tfoot':
2972  case 'thead':
2973  $this->stack->clearToContext( BalanceSets::$tableContextSet );
2974  $this->stack->insertHTMLElement( $value, $attribs );
2975  $this->switchMode( 'inTableBodyMode' );
2976  return true;
2977  case 'td':
2978  case 'th':
2979  case 'tr':
2980  $this->inTableMode( 'tag', 'tbody', [] );
2981  return $this->insertToken( $token, $value, $attribs, $selfClose );
2982  case 'table':
2983  if ( !$this->stack->inTableScope( $value ) ) {
2984  return true; // Ignore this tag.
2985  }
2986  $this->inTableMode( 'endtag', $value );
2987  return $this->insertToken( $token, $value, $attribs, $selfClose );
2988 
2989  case 'style':
2990  // OMITTED: <script>
2991  case 'template':
2992  return $this->inHeadMode( $token, $value, $attribs, $selfClose );
2993 
2994  case 'input':
2995  if ( !isset( $attribs['type'] ) || strcasecmp( $attribs['type'], 'hidden' ) !== 0 ) {
2996  break; // Handle this as "everything else"
2997  }
2998  $this->stack->insertHTMLElement( $value, $attribs );
2999  $this->stack->pop();
3000  return true;
3001 
3002  case 'form':
3003  if (
3004  $this->formElementPointer ||
3005  $this->stack->indexOf( 'template' ) >= 0
3006  ) {
3007  return true; // ignore this token
3008  }
3009  $this->formElementPointer =
3010  $this->stack->insertHTMLElement( $value, $attribs );
3011  $this->stack->popTag( $this->formElementPointer );
3012  return true;
3013  }
3014  // Fall through for "anything else" clause.
3015  } elseif ( $token === 'endtag' ) {
3016  switch ( $value ) {
3017  case 'table':
3018  if ( !$this->stack->inTableScope( $value ) ) {
3019  return true; // Ignore.
3020  }
3021  $this->stack->popTag( $value );
3022  $this->resetInsertionMode();
3023  return true;
3024  // OMITTED: <body>
3025  case 'caption':
3026  case 'col':
3027  case 'colgroup':
3028  // OMITTED: <html>
3029  case 'tbody':
3030  case 'td':
3031  case 'tfoot':
3032  case 'th':
3033  case 'thead':
3034  case 'tr':
3035  return true; // Ignore the token.
3036  case 'template':
3037  return $this->inHeadMode( $token, $value, $attribs, $selfClose );
3038  }
3039  // Fall through for "anything else" clause.
3040  } elseif ( $token === 'comment' ) {
3041  $this->stack->insertComment( $value );
3042  return true;
3043  }
3044  // This is the "anything else" case:
3045  $this->stack->fosterParentMode = true;
3046  $this->inBodyMode( $token, $value, $attribs, $selfClose );
3047  $this->stack->fosterParentMode = false;
3048  return true;
3049  }
3050 
3051  private function inTableTextMode( $token, $value, $attribs = null, $selfClose = false ) {
3052  if ( $token === 'text' ) {
3053  $this->pendingTableText .= $value;
3054  return true;
3055  }
3056  // Non-text token:
3057  $text = $this->pendingTableText;
3058  $this->pendingTableText = '';
3059  if ( preg_match( '/[^\x09\x0A\x0C\x0D\x20]/', $text ) ) {
3060  // This should match the "anything else" case inTableMode
3061  $this->stack->fosterParentMode = true;
3062  $this->inBodyMode( 'text', $text );
3063  $this->stack->fosterParentMode = false;
3064  } else {
3065  // Pending text is just whitespace.
3066  $this->stack->insertText( $text );
3067  }
3068  return $this->switchModeAndReprocess(
3069  $this->originalInsertionMode, $token, $value, $attribs, $selfClose
3070  );
3071  }
3072 
3073  // helper for inCaptionMode
3074  private function endCaption() {
3075  if ( !$this->stack->inTableScope( 'caption' ) ) {
3076  return false;
3077  }
3078  $this->stack->generateImpliedEndTags();
3079  $this->stack->popTag( 'caption' );
3080  $this->afe->clearToMarker();
3081  $this->switchMode( 'inTableMode' );
3082  return true;
3083  }
3084 
3085  private function inCaptionMode( $token, $value, $attribs = null, $selfClose = false ) {
3086  if ( $token === 'tag' ) {
3087  switch ( $value ) {
3088  case 'caption':
3089  case 'col':
3090  case 'colgroup':
3091  case 'tbody':
3092  case 'td':
3093  case 'tfoot':
3094  case 'th':
3095  case 'thead':
3096  case 'tr':
3097  if ( $this->endCaption() ) {
3098  $this->insertToken( $token, $value, $attribs, $selfClose );
3099  }
3100  return true;
3101  }
3102  // Fall through to "anything else" case.
3103  } elseif ( $token === 'endtag' ) {
3104  switch ( $value ) {
3105  case 'caption':
3106  $this->endCaption();
3107  return true;
3108  case 'table':
3109  if ( $this->endCaption() ) {
3110  $this->insertToken( $token, $value, $attribs, $selfClose );
3111  }
3112  return true;
3113  case 'body':
3114  case 'col':
3115  case 'colgroup':
3116  // OMITTED: <html>
3117  case 'tbody':
3118  case 'td':
3119  case 'tfoot':
3120  case 'th':
3121  case 'thead':
3122  case 'tr':
3123  // Ignore the token
3124  return true;
3125  }
3126  // Fall through to "anything else" case.
3127  }
3128  // The Anything Else case
3129  return $this->inBodyMode( $token, $value, $attribs, $selfClose );
3130  }
3131 
3132  private function inColumnGroupMode( $token, $value, $attribs = null, $selfClose = false ) {
3133  if ( $token === 'text' ) {
3134  if ( preg_match( '/^[\x09\x0A\x0C\x0D\x20]+/', $value, $matches ) ) {
3135  $this->stack->insertText( $matches[0] );
3136  $value = substr( $value, strlen( $matches[0] ) );
3137  }
3138  if ( strlen( $value ) === 0 ) {
3139  return true; // All text handled.
3140  }
3141  // Fall through to handle non-whitespace below.
3142  } elseif ( $token === 'tag' ) {
3143  switch ( $value ) {
3144  // OMITTED: <html>
3145  case 'col':
3146  $this->stack->insertHTMLElement( $value, $attribs );
3147  $this->stack->pop();
3148  return true;
3149  case 'template':
3150  return $this->inHeadMode( $token, $value, $attribs, $selfClose );
3151  }
3152  // Fall through for "anything else".
3153  } elseif ( $token === 'endtag' ) {
3154  switch ( $value ) {
3155  case 'colgroup':
3156  if ( !$this->stack->currentNode->isHtmlNamed( 'colgroup' ) ) {
3157  return true; // Ignore the token.
3158  }
3159  $this->stack->pop();
3160  $this->switchMode( 'inTableMode' );
3161  return true;
3162  case 'col':
3163  return true; // Ignore the token.
3164  case 'template':
3165  return $this->inHeadMode( $token, $value, $attribs, $selfClose );
3166  }
3167  // Fall through for "anything else".
3168  } elseif ( $token === 'eof' ) {
3169  return $this->inBodyMode( $token, $value, $attribs, $selfClose );
3170  } elseif ( $token === 'comment' ) {
3171  $this->stack->insertComment( $value );
3172  return true;
3173  }
3174 
3175  // Anything else
3176  if ( !$this->stack->currentNode->isHtmlNamed( 'colgroup' ) ) {
3177  return true; // Ignore the token.
3178  }
3179  $this->inColumnGroupMode( 'endtag', 'colgroup' );
3180  return $this->insertToken( $token, $value, $attribs, $selfClose );
3181  }
3182 
3183  // Helper function for inTableBodyMode
3184  private function endSection() {
3185  if ( !(
3186  $this->stack->inTableScope( 'tbody' ) ||
3187  $this->stack->inTableScope( 'thead' ) ||
3188  $this->stack->inTableScope( 'tfoot' )
3189  ) ) {
3190  return false;
3191  }
3192  $this->stack->clearToContext( BalanceSets::$tableBodyContextSet );
3193  $this->stack->pop();
3194  $this->switchMode( 'inTableMode' );
3195  return true;
3196  }
3197  private function inTableBodyMode( $token, $value, $attribs = null, $selfClose = false ) {
3198  if ( $token === 'tag' ) {
3199  switch ( $value ) {
3200  case 'tr':
3201  $this->stack->clearToContext( BalanceSets::$tableBodyContextSet );
3202  $this->stack->insertHTMLElement( $value, $attribs );
3203  $this->switchMode( 'inRowMode' );
3204  return true;
3205  case 'th':
3206  case 'td':
3207  $this->inTableBodyMode( 'tag', 'tr', [] );
3208  $this->insertToken( $token, $value, $attribs, $selfClose );
3209  return true;
3210  case 'caption':
3211  case 'col':
3212  case 'colgroup':
3213  case 'tbody':
3214  case 'tfoot':
3215  case 'thead':
3216  if ( $this->endSection() ) {
3217  $this->insertToken( $token, $value, $attribs, $selfClose );
3218  }
3219  return true;
3220  }
3221  } elseif ( $token === 'endtag' ) {
3222  switch ( $value ) {
3223  case 'table':
3224  if ( $this->endSection() ) {
3225  $this->insertToken( $token, $value, $attribs, $selfClose );
3226  }
3227  return true;
3228  case 'tbody':
3229  case 'tfoot':
3230  case 'thead':
3231  if ( $this->stack->inTableScope( $value ) ) {
3232  $this->endSection();
3233  }
3234  return true;
3235  // OMITTED: <body>
3236  case 'caption':
3237  case 'col':
3238  case 'colgroup':
3239  // OMITTED: <html>
3240  case 'td':
3241  case 'th':
3242  case 'tr':
3243  return true; // Ignore the token.
3244  }
3245  }
3246  // Anything else:
3247  return $this->inTableMode( $token, $value, $attribs, $selfClose );
3248  }
3249 
3250  // Helper function for inRowMode
3251  private function endRow() {
3252  if ( !$this->stack->inTableScope( 'tr' ) ) {
3253  return false;
3254  }
3255  $this->stack->clearToContext( BalanceSets::$tableRowContextSet );
3256  $this->stack->pop();
3257  $this->switchMode( 'inTableBodyMode' );
3258  return true;
3259  }
3260  private function inRowMode( $token, $value, $attribs = null, $selfClose = false ) {
3261  if ( $token === 'tag' ) {
3262  switch ( $value ) {
3263  case 'th':
3264  case 'td':
3265  $this->stack->clearToContext( BalanceSets::$tableRowContextSet );
3266  $this->stack->insertHTMLElement( $value, $attribs );
3267  $this->switchMode( 'inCellMode' );
3268  $this->afe->insertMarker();
3269  return true;
3270  case 'caption':
3271  case 'col':
3272  case 'colgroup':
3273  case 'tbody':
3274  case 'tfoot':
3275  case 'thead':
3276  case 'tr':
3277  if ( $this->endRow() ) {
3278  $this->insertToken( $token, $value, $attribs, $selfClose );
3279  }
3280  return true;
3281  }
3282  } elseif ( $token === 'endtag' ) {
3283  switch ( $value ) {
3284  case 'tr':
3285  $this->endRow();
3286  return true;
3287  case 'table':
3288  if ( $this->endRow() ) {
3289  $this->insertToken( $token, $value, $attribs, $selfClose );
3290  }
3291  return true;
3292  case 'tbody':
3293  case 'tfoot':
3294  case 'thead':
3295  if (
3296  $this->stack->inTableScope( $value ) &&
3297  $this->endRow()
3298  ) {
3299  $this->insertToken( $token, $value, $attribs, $selfClose );
3300  }
3301  return true;
3302  // OMITTED: <body>
3303  case 'caption':
3304  case 'col':
3305  case 'colgroup':
3306  // OMITTED: <html>
3307  case 'td':
3308  case 'th':
3309  return true; // Ignore the token.
3310  }
3311  }
3312  // Anything else:
3313  return $this->inTableMode( $token, $value, $attribs, $selfClose );
3314  }
3315 
3316  // Helper for inCellMode
3317  private function endCell() {
3318  if ( $this->stack->inTableScope( 'td' ) ) {
3319  $this->inCellMode( 'endtag', 'td' );
3320  return true;
3321  } elseif ( $this->stack->inTableScope( 'th' ) ) {
3322  $this->inCellMode( 'endtag', 'th' );
3323  return true;
3324  } else {
3325  return false;
3326  }
3327  }
3328  private function inCellMode( $token, $value, $attribs = null, $selfClose = false ) {
3329  if ( $token === 'tag' ) {
3330  switch ( $value ) {
3331  case 'caption':
3332  case 'col':
3333  case 'colgroup':
3334  case 'tbody':
3335  case 'td':
3336  case 'tfoot':
3337  case 'th':
3338  case 'thead':
3339  case 'tr':
3340  if ( $this->endCell() ) {
3341  $this->insertToken( $token, $value, $attribs, $selfClose );
3342  }
3343  return true;
3344  }
3345  } elseif ( $token === 'endtag' ) {
3346  switch ( $value ) {
3347  case 'td':
3348  case 'th':
3349  if ( $this->stack->inTableScope( $value ) ) {
3350  $this->stack->generateImpliedEndTags();
3351  $this->stack->popTag( $value );
3352  $this->afe->clearToMarker();
3353  $this->switchMode( 'inRowMode' );
3354  }
3355  return true;
3356  // OMITTED: <body>
3357  case 'caption':
3358  case 'col':
3359  case 'colgroup':
3360  // OMITTED: <html>
3361  return true;
3362 
3363  case 'table':
3364  case 'tbody':
3365  case 'tfoot':
3366  case 'thead':
3367  case 'tr':
3368  if ( $this->stack->inTableScope( $value ) ) {
3369  $this->stack->generateImpliedEndTags();
3370  $this->stack->popTag( BalanceSets::$tableCellSet );
3371  $this->afe->clearToMarker();
3372  $this->switchMode( 'inRowMode' );
3373  $this->insertToken( $token, $value, $attribs, $selfClose );
3374  }
3375  return true;
3376  }
3377  }
3378  // Anything else:
3379  return $this->inBodyMode( $token, $value, $attribs, $selfClose );
3380  }
3381 
3382  private function inSelectMode( $token, $value, $attribs = null, $selfClose = false ) {
3383  if ( $token === 'text' ) {
3384  $this->stack->insertText( $value );
3385  return true;
3386  } elseif ( $token === 'eof' ) {
3387  return $this->inBodyMode( $token, $value, $attribs, $selfClose );
3388  } elseif ( $token === 'tag' ) {
3389  switch ( $value ) {
3390  // OMITTED: <html>
3391  case 'option':
3392  if ( $this->stack->currentNode->isHtmlNamed( 'option' ) ) {
3393  $this->stack->pop();
3394  }
3395  $this->stack->insertHTMLElement( $value, $attribs );
3396  return true;
3397  case 'optgroup':
3398  if ( $this->stack->currentNode->isHtmlNamed( 'option' ) ) {
3399  $this->stack->pop();
3400  }
3401  if ( $this->stack->currentNode->isHtmlNamed( 'optgroup' ) ) {
3402  $this->stack->pop();
3403  }
3404  $this->stack->insertHTMLElement( $value, $attribs );
3405  return true;
3406  case 'select':
3407  $this->inSelectMode( 'endtag', $value ); // treat it like endtag
3408  return true;
3409  case 'input':
3410  case 'keygen':
3411  case 'textarea':
3412  if ( !$this->stack->inSelectScope( 'select' ) ) {
3413  return true; // ignore token (fragment case)
3414  }
3415  $this->inSelectMode( 'endtag', 'select' );
3416  return $this->insertToken( $token, $value, $attribs, $selfClose );
3417  case 'script':
3418  case 'template':
3419  return $this->inHeadMode( $token, $value, $attribs, $selfClose );
3420  }
3421  } elseif ( $token === 'endtag' ) {
3422  switch ( $value ) {
3423  case 'optgroup':
3424  if (
3425  $this->stack->currentNode->isHtmlNamed( 'option' ) &&
3426  $this->stack->length() >= 2 &&
3427  $this->stack->node( $this->stack->length() - 2 )->isHtmlNamed( 'optgroup' )
3428  ) {
3429  $this->stack->pop();
3430  }
3431  if ( $this->stack->currentNode->isHtmlNamed( 'optgroup' ) ) {
3432  $this->stack->pop();
3433  }
3434  return true;
3435  case 'option':
3436  if ( $this->stack->currentNode->isHtmlNamed( 'option' ) ) {
3437  $this->stack->pop();
3438  }
3439  return true;
3440  case 'select':
3441  if ( !$this->stack->inSelectScope( $value ) ) {
3442  return true; // fragment case
3443  }
3444  $this->stack->popTag( $value );
3445  $this->resetInsertionMode();
3446  return true;
3447  case 'template':
3448  return $this->inHeadMode( $token, $value, $attribs, $selfClose );
3449  }
3450  } elseif ( $token === 'comment' ) {
3451  $this->stack->insertComment( $value );
3452  return true;
3453  }
3454  // anything else: just ignore the token
3455  return true;
3456  }
3457 
3458  private function inSelectInTableMode( $token, $value, $attribs = null, $selfClose = false ) {
3459  switch ( $value ) {
3460  case 'caption':
3461  case 'table':
3462  case 'tbody':
3463  case 'tfoot':
3464  case 'thead':
3465  case 'tr':
3466  case 'td':
3467  case 'th':
3468  if ( $token === 'tag' ) {
3469  $this->inSelectInTableMode( 'endtag', 'select' );
3470  return $this->insertToken( $token, $value, $attribs, $selfClose );
3471  } elseif ( $token === 'endtag' ) {
3472  if ( $this->stack->inTableScope( $value ) ) {
3473  $this->inSelectInTableMode( 'endtag', 'select' );
3474  return $this->insertToken( $token, $value, $attribs, $selfClose );
3475  }
3476  return true;
3477  }
3478  }
3479  // anything else
3480  return $this->inSelectMode( $token, $value, $attribs, $selfClose );
3481  }
3482 
3483  private function inTemplateMode( $token, $value, $attribs = null, $selfClose = false ) {
3484  if ( $token === 'text' || $token === 'comment' ) {
3485  return $this->inBodyMode( $token, $value, $attribs, $selfClose );
3486  } elseif ( $token === 'eof' ) {
3487  if ( $this->stack->indexOf( 'template' ) < 0 ) {
3488  $this->stopParsing();
3489  } else {
3490  $this->stack->popTag( 'template' );
3491  $this->afe->clearToMarker();
3492  array_pop( $this->templateInsertionModes );
3493  $this->resetInsertionMode();
3494  $this->insertToken( $token, $value, $attribs, $selfClose );
3495  }
3496  return true;
3497  } elseif ( $token === 'tag' ) {
3498  switch ( $value ) {
3499  case 'base':
3500  case 'basefont':
3501  case 'bgsound':
3502  case 'link':
3503  case 'meta':
3504  case 'noframes':
3505  // OMITTED: <script>
3506  case 'style':
3507  case 'template':
3508  // OMITTED: <title>
3509  return $this->inHeadMode( $token, $value, $attribs, $selfClose );
3510 
3511  case 'caption':
3512  case 'colgroup':
3513  case 'tbody':
3514  case 'tfoot':
3515  case 'thead':
3516  return $this->switchModeAndReprocess(
3517  'inTableMode', $token, $value, $attribs, $selfClose
3518  );
3519 
3520  case 'col':
3521  return $this->switchModeAndReprocess(
3522  'inColumnGroupMode', $token, $value, $attribs, $selfClose
3523  );
3524 
3525  case 'tr':
3526  return $this->switchModeAndReprocess(
3527  'inTableBodyMode', $token, $value, $attribs, $selfClose
3528  );
3529 
3530  case 'td':
3531  case 'th':
3532  return $this->switchModeAndReprocess(
3533  'inRowMode', $token, $value, $attribs, $selfClose
3534  );
3535  }
3536  return $this->switchModeAndReprocess(
3537  'inBodyMode', $token, $value, $attribs, $selfClose
3538  );
3539  } elseif ( $token === 'endtag' ) {
3540  switch ( $value ) {
3541  case 'template':
3542  return $this->inHeadMode( $token, $value, $attribs, $selfClose );
3543  }
3544  return true;
3545  } else {
3546  Assert::invariant( false, "Bad token type: $token" );
3547  }
3548  }
3549 }
static static static static static $specialSet
Definition: Balancer.php:108
Config $config
Definition: MediaWiki.php:38
removeElement(BalanceElement $elt, $flatten=true)
Remove the given $elt from the BalanceStack, optionally flattening it in the process.
Definition: Balancer.php:1034
static static $inInvertedSelectScopeSet
Definition: Balancer.php:249
inSpecificScope($tag, $set)
Determine if the stack has $tag in a specific scope, $set.
Definition: Balancer.php:867
node($idx)
Return the BalanceElement at the given position $idx, where position 0 represents the root element...
Definition: Balancer.php:923
static static static $mathmlTextIntegrationPointSet
Definition: Balancer.php:255
BalanceActiveFormattingElements $afe
Definition: Balancer.php:1798
deferred txt A few of the database updates required by various functions here can be deferred until after the result page is displayed to the user For updating the view updating the linked to tables after a etc PHP does not yet have any way to tell the server to actually return and disconnect while still running these but it might have such a feature in the future We handle these by creating a deferred update object and putting those objects on a global list
Definition: deferred.txt:11
static decodeTagAttributes($text)
Return an associative array of attribute names and values from a partial tag string.
Definition: Sanitizer.php:1287
this hook is for auditing only or null if authentication failed before getting that far or null if we can t even determine that probably a stub it is not rendered in wiki pages or galleries in category pages allow injecting custom HTML after the section Any uses of the hook need to handle escaping see BaseTemplate::getToolbox and BaseTemplate::makeListItem for details on the format of individual items inside of this array or by returning and letting standard HTTP rendering take place modifiable or by returning false and taking over the output $out
Definition: hooks.txt:802
the array() calling protocol came about after MediaWiki 1.4rc1.
static static static static static static static static $impliedEndTagsSet
Definition: Balancer.php:157
$fosterParentMode
Foster parent mode determines how nodes are inserted into the stack.
Definition: Balancer.php:666
clearToContext($set)
Pop elements off the stack not including the first element in the specified set.
Definition: Balancer.php:1018
reconstruct($stack)
Reconstruct the active formatting elements.
Definition: Balancer.php:1669
insertAfter(BalanceElement $a, BalanceElement $b)
Find $a in the BalanceStack and insert $b after it.
Definition: Balancer.php:1069
removeChild(BalanceElement $elt)
Remove the given child from this element.
Definition: Balancer.php:387
__toString()
Get a string representation of the AFE list, for debugging.
Definition: Balancer.php:1715
getOutput()
Return a string representing the output of the tree builder: all the children of the root node...
Definition: Balancer.php:698
static static static static $htmlIntegrationPointSet
Definition: Balancer.php:262
Apache License January AND DISTRIBUTION Definitions License shall mean the terms and conditions for use
pop()
Remove the current node from the BalanceStack, flattening it in the process.
Definition: Balancer.php:974
inRowMode($token, $value, $attribs=null, $selfClose=false)
Definition: Balancer.php:3260
parseRawText($value, $attribs=null)
Definition: Balancer.php:2331
inCellMode($token, $value, $attribs=null, $selfClose=false)
Definition: Balancer.php:3328
insertText($value, $isComment=false)
Insert text at the appropriate place for inserting a node.
Definition: Balancer.php:724
div flags Integer display flags(NO_ACTION_LINK, NO_EXTRA_USER_LINKS) 'LogException'returning false will NOT prevent logging $e
Definition: hooks.txt:2102
indexOf($tag)
Return the position of the given BalanceElement, set, or HTML tag name string in the BalanceStack...
Definition: Balancer.php:953
flatten(array $config)
Flatten this node and all of its children into a string, as specified by the HTML serialization speci...
Definition: Balancer.php:472
Some information about database access in MediaWiki By Tim January Database layout For information about the MediaWiki database such as a description of the tables and their contents
Definition: database.txt:2
inTableMode($token, $value, $attribs=null, $selfClose=false)
Definition: Balancer.php:2941
insertBefore(BalanceElement $a, $b)
Find $a in the list of children and insert $b before it.
Definition: Balancer.php:405
static static static static static static static static static static static $tableContextSet
Definition: Balancer.php:180
insertForeignToken($token, $value, $attribs=null, $selfClose=false)
Definition: Balancer.php:2035
$value
The list of active formatting elements, which is used to handle mis-nested formatting element tags in...
Definition: Balancer.php:1385
inTableBodyMode($token, $value, $attribs=null, $selfClose=false)
Definition: Balancer.php:3197
inBodyMode($token, $value, $attribs=null, $selfClose=false)
Definition: Balancer.php:2425
static static static $extraLinefeedSet
Definition: Balancer.php:95
$parent
Parent of this element, or the string "flat" if this element has already been flattened into its pare...
Definition: Balancer.php:333
The "stack of open elements" as defined in the HTML5 tree builder spec.
Definition: Balancer.php:654
$tail
The last (most recent) element in the list.
Definition: Balancer.php:1387
length()
Return the number of elements currently in the BalanceStack.
Definition: Balancer.php:966
getNoahKey()
Get a string key for the Noah's Ark algorithm.
Definition: Balancer.php:629
$noahTableStack
An array of arrays representing the population of elements in each bucket according to the Noah's Ark...
Definition: Balancer.php:1410
$namespaceURI
The namespace of the element.
Definition: Balancer.php:316
isHtmlIntegrationPoint()
Determine if $this represents an HTML integration point, as defined in the HTML5 specification.
Definition: Balancer.php:610
balance($text, $processingCallback=null, $processingArgs=[])
Return a balanced HTML string for the HTML fragment given by $text, subject to the caveats listed in ...
Definition: Balancer.php:1918
isInList(BalanceElement $elt)
Determine whether an element is in the list of formatting elements.
Definition: Balancer.php:1530
static validateTagAttributes($attribs, $element)
Take an array of attribute names and values and normalize or discard illegal values for the given ele...
Definition: Sanitizer.php:748
isHtml()
Determine if $this represents an element in the HTML namespace.
Definition: Balancer.php:588
insertComment($value)
Insert a comment at the appropriate place for inserting a node.
Definition: Balancer.php:713
insertToken($token, $value, $attribs=null, $selfClose=false)
Pass a token to the tree builder.
Definition: Balancer.php:1967
The index of the header message $result[1]=The index of the body text message $result[2 through n]=Parameters passed to body text message.Please note the header message cannot receive/use parameters. 'ImportHandleLogItemXMLTag':When parsing a XML tag in a log item.Return false to stop further processing of the tag $reader:XMLReader object $logInfo:Array of information 'ImportHandlePageXMLTag':When parsing a XML tag in a page.Return false to stop further processing of the tag $reader:XMLReader object &$pageInfo:Array of information 'ImportHandleRevisionXMLTag':When parsing a XML tag in a page revision.Return false to stop further processing of the tag $reader:XMLReader object $pageInfo:Array of page information $revisionInfo:Array of revision information 'ImportHandleToplevelXMLTag':When parsing a top level XML tag.Return false to stop further processing of the tag $reader:XMLReader object 'ImportHandleUploadXMLTag':When parsing a XML tag in a file upload.Return false to stop further processing of the tag $reader:XMLReader object $revisionInfo:Array of information 'ImportLogInterwikiLink':Hook to change the interwiki link used in log entries and edit summaries for transwiki imports.&$fullInterwikiPrefix:Interwiki prefix, may contain colons.&$pageTitle:String that contains page title. 'ImportSources':Called when reading from the $wgImportSources configuration variable.Can be used to lazy-load the import sources list.&$importSources:The value of $wgImportSources.Modify as necessary.See the comment in DefaultSettings.php for the detail of how to structure this array. 'InfoAction':When building information to display on the action=info page.$context:IContextSource object &$pageInfo:Array of information 'InitializeArticleMaybeRedirect':MediaWiki check to see if title is a redirect.&$title:Title object for the current page &$request:WebRequest &$ignoreRedirect:boolean to skip redirect check &$target:Title/string of redirect target &$article:Article object 'InternalParseBeforeLinks':during Parser's internalParse method before links but after nowiki/noinclude/includeonly/onlyinclude and other processings.&$parser:Parser object &$text:string containing partially parsed text &$stripState:Parser's internal StripState object 'InternalParseBeforeSanitize':during Parser's internalParse method just before the parser removes unwanted/dangerous HTML tags and after nowiki/noinclude/includeonly/onlyinclude and other processings.Ideal for syntax-extensions after template/parser function execution which respect nowiki and HTML-comments.&$parser:Parser object &$text:string containing partially parsed text &$stripState:Parser's internal StripState object 'InterwikiLoadPrefix':When resolving if a given prefix is an interwiki or not.Return true without providing an interwiki to continue interwiki search.$prefix:interwiki prefix we are looking for.&$iwData:output array describing the interwiki with keys iw_url, iw_local, iw_trans and optionally iw_api and iw_wikiid. 'InvalidateEmailComplete':Called after a user's email has been invalidated successfully.$user:user(object) whose email is being invalidated 'IRCLineURL':When constructing the URL to use in an IRC notification.Callee may modify $url and $query, URL will be constructed as $url.$query &$url:URL to index.php &$query:Query string $rc:RecentChange object that triggered url generation 'IsFileCacheable':Override the result of Article::isFileCacheable()(if true) &$article:article(object) being checked 'IsTrustedProxy':Override the result of IP::isTrustedProxy() &$ip:IP being check &$result:Change this value to override the result of IP::isTrustedProxy() 'IsUploadAllowedFromUrl':Override the result of UploadFromUrl::isAllowedUrl() $url:URL used to upload from &$allowed:Boolean indicating if uploading is allowed for given URL 'isValidEmailAddr':Override the result of Sanitizer::validateEmail(), for instance to return false if the domain name doesn't match your organization.$addr:The e-mail address entered by the user &$result:Set this and return false to override the internal checks 'isValidPassword':Override the result of User::isValidPassword() $password:The password entered by the user &$result:Set this and return false to override the internal checks $user:User the password is being validated for 'Language::getMessagesFileName':$code:The language code or the language we're looking for a messages file for &$file:The messages file path, you can override this to change the location. 'LanguageGetMagic':DEPRECATED!Use $magicWords in a file listed in $wgExtensionMessagesFiles instead.Use this to define synonyms of magic words depending of the language &$magicExtensions:associative array of magic words synonyms $lang:language code(string) 'LanguageGetNamespaces':Provide custom ordering for namespaces or remove namespaces.Do not use this hook to add namespaces.Use CanonicalNamespaces for that.&$namespaces:Array of namespaces indexed by their numbers 'LanguageGetSpecialPageAliases':DEPRECATED!Use $specialPageAliases in a file listed in $wgExtensionMessagesFiles instead.Use to define aliases of special pages names depending of the language &$specialPageAliases:associative array of magic words synonyms $lang:language code(string) 'LanguageGetTranslatedLanguageNames':Provide translated language names.&$names:array of language code=> language name $code:language of the preferred translations 'LanguageLinks':Manipulate a page's language links.This is called in various places to allow extensions to define the effective language links for a page.$title:The page's Title.&$links:Associative array mapping language codes to prefixed links of the form"language:title".&$linkFlags:Associative array mapping prefixed links to arrays of flags.Currently unused, but planned to provide support for marking individual language links in the UI, e.g.for featured articles. 'LanguageSelector':Hook to change the language selector available on a page.$out:The output page.$cssClassName:CSS class name of the language selector. 'LinkBegin':DEPRECATED!Use HtmlPageLinkRendererBegin instead.Used when generating internal and interwiki links in Linker::link(), before processing starts.Return false to skip default processing and return $ret.See documentation for Linker::link() for details on the expected meanings of parameters.$skin:the Skin object $target:the Title that the link is pointing to &$html:the contents that the< a > tag should have(raw HTML) $result
Definition: hooks.txt:1934
appendChild($elt)
Append $elt to the end of the list of children.
Definition: Balancer.php:427
$last
A BalanceElement is a simplified version of a DOM Node.
Definition: Balancer.php:311
null means default in associative array with keys and values unescaped Should be merged with default with a value of false meaning to suppress the attribute in associative array with keys and values unescaped noclasses just before the function returns a value If you return true
Definition: hooks.txt:1936
adjustedCurrentNode($fragmentContext)
Return the adjusted current node.
Definition: Balancer.php:903
inSelectMode($token, $value, $attribs=null, $selfClose=false)
Definition: Balancer.php:3382
inSelectInTableMode($token, $value, $attribs=null, $selfClose=false)
Definition: Balancer.php:3458
const ELEMENT_BITS_REGEX
Acceptable tag name charset from HTML5 parsing spec http://www.w3.org/TR/html5/syntax.html#tag-open-state.
Definition: Sanitizer.php:46
push(BalanceElement $elt)
Follow the steps required when the spec requires us to "push onto the list of active formatting eleme...
Definition: Balancer.php:1437
__toString()
Serialize this node and all of its children to a string, as specified by the HTML serialization speci...
Definition: Balancer.php:518
__construct($namespaceURI, $localName, array $attribs)
Make a new BalanceElement corresponding to the HTML DOM Element with the given localname, namespace, and attributes.
Definition: Balancer.php:374
static static static static static static static static static static static static static static static $inScopeSet
Definition: Balancer.php:208
inTextMode($token, $value, $attribs=null, $selfClose=false)
Definition: Balancer.php:2338
advance()
Grab the next "token" from $bitsIterator.
Definition: Balancer.php:2141
replaceAt($idx, BalanceElement $elt)
Replace the element at position $idx in the BalanceStack with $elt.
Definition: Balancer.php:932
generateImpliedEndTags($butnot=null, $thorough=false)
Generate implied end tags.
Definition: Balancer.php:885
replace(BalanceElement $a, BalanceElement $b)
Find element $a in the list and replace it with element $b.
Definition: Balancer.php:1611
static validateTag($params, $element)
Takes attribute names and values for a tag and the tag name and validates that the tag is allowed to ...
Definition: Sanitizer.php:712
fosterParent($elt)
Foster parent the given $elt in the stack of open elements.
Definition: Balancer.php:1089
inCaptionMode($token, $value, $attribs=null, $selfClose=false)
Definition: Balancer.php:3085
static static static static static $tidyPWrapSet
Definition: Balancer.php:271
static static static static static static static static static static static static static static $formAssociatedSet
Definition: Balancer.php:200
inHeadMode($token, $value, $attribs=null, $selfClose=false)
Definition: Balancer.php:2355
null means default in associative array with keys and values unescaped Should be merged with default with a value of false meaning to suppress the attribute in associative array with keys and values unescaped noclasses just before the function returns a value If you return an< a > element with HTML attributes $attribs and contents $html will be returned If you return $ret will be returned and may include noclasses after processing & $attribs
Definition: hooks.txt:1936
static static $emptyElementSet
Definition: Balancer.php:85
$currentNode
Reference to the current element.
Definition: Balancer.php:676
inSelectScope($tag)
Determine if the stack has $tag in select scope.
Definition: Balancer.php:846
inListItemScope($tag)
Determine if the stack has $tag in list item scope.
Definition: Balancer.php:826
this hook is for auditing only RecentChangesLinked and Watchlist RecentChangesLinked and Watchlist e g Watchlist removed from all revisions and log entries to which it was applied This gives extensions a chance to take it off their books $tag
Definition: hooks.txt:1007
isHtmlNamed($tagName)
Determine if this element is an HTML element with the specified name.
Definition: Balancer.php:578
Utility constants and sets for the HTML5 tree building algorithm.
Definition: Balancer.php:69
This document is intended to provide useful advice for parties seeking to redistribute MediaWiki to end users It s targeted particularly at maintainers for Linux since it s been observed that distribution packages of MediaWiki often break We ve consistently had to recommend that users seeking support use official tarballs instead of their distribution s and this often solves whatever problem the user is having It would be nice if this could such as
Definition: distributors.txt:9
$localName
The lower-cased name of the element.
Definition: Balancer.php:321
static static static static static static $tidyInlineSet
Definition: Balancer.php:281
inScope($tag)
Determine if the stack has $tag in scope.
Definition: Balancer.php:806
const VALID_COMMENT_REGEX
Valid HTML5 comments.
Definition: Balancer.php:1823
injection txt This is an overview of how MediaWiki makes use of dependency injection The design described here grew from the discussion of RFC T384 The term dependency this means that anything an object needs to operate should be injected from the the object itself should only know narrow no concrete implementation of the logic it relies on The requirement to inject everything typically results in an architecture that based on two main types of and essentially stateless service objects that use other service objects to operate on the value objects As of the beginning MediaWiki is only starting to use the DI approach Much of the code still relies on global state or direct resulting in a highly cyclical dependency which acts as the top level factory for services in MediaWiki which can be used to gain access to default instances of various services MediaWikiServices however also allows new services to be defined and default services to be redefined Services are defined or redefined by providing a callback the instantiator that will return a new instance of the service When it will create an instance of MediaWikiServices and populate it with the services defined in the files listed by thereby bootstrapping the DI framework Per $wgServiceWiringFiles lists includes ServiceWiring php
Definition: injection.txt:35
$nextNoah
The next element in the Noah's Ark species bucket.
Definition: Balancer.php:364
isA($set)
Determine if $this represents a specific HTML tag, is a member of a tag set, or is equal to another B...
Definition: Balancer.php:561
static static static static static static $addressDivPSet
Definition: Balancer.php:144
insertHTMLElement($tag, $attribs)
Insert an HTML element at the appropriate place, pushing it on to the open elements stack...
Definition: Balancer.php:764
An implementation of the tree building portion of the HTML5 parsing spec.
Definition: Balancer.php:1792
popTo($idx)
Remove all nodes up to and including position $idx from the BalanceStack, flattening them in the proc...
Definition: Balancer.php:991
static static static static static static static static static $thoroughImpliedEndTagsSet
Definition: Balancer.php:165
insertAfter(BalanceElement $a, BalanceElement $b)
Find $a in the list and insert $b after it.
Definition: Balancer.php:1645
findElementByTag($tag)
Find and return the last element with the specified tag between the end of the list and the last mark...
Definition: Balancer.php:1514
$nextAFE
The next active formatting element in the list, or null if this is the end of the AFE list or if the ...
Definition: Balancer.php:353
switchModeAndReprocess($mode, $token, $value, $attribs, $selfClose)
Definition: Balancer.php:2243
getIterator()
Return an iterator over this stack which visits the current node first, and the root node last...
Definition: Balancer.php:913
static static static static static static static static static static static static static $tableRowContextSet
Definition: Balancer.php:193
popTag($tag)
Pop elements off the stack up to and including the first element with the specified HTML tagname (or ...
Definition: Balancer.php:1003
static static static static static static static $tableSectionRowSet
Definition: Balancer.php:150
inColumnGroupMode($token, $value, $attribs=null, $selfClose=false)
Definition: Balancer.php:3132
inTemplateMode($token, $value, $attribs=null, $selfClose=false)
Definition: Balancer.php:3483
$noahKey
A unique string identifier for Noah's Ark purposes, lazy initialized.
Definition: Balancer.php:347
insertForeignElement($namespaceURI, $tag, $attribs)
Insert a BalanceElement at the appropriate place, pushing it on to the open elements stack...
Definition: Balancer.php:750
__construct(array $config=[])
Create a new Balancer.
Definition: Balancer.php:1874
callable null $processingCallback
Definition: Balancer.php:1815
isMathmlTextIntegrationPoint()
Determine if $this represents a MathML text integration point, as defined in the HTML5 specification...
Definition: Balancer.php:599
clearToMarker()
Follow the steps required when the spec asks us to "clear the list of active formatting elements up t...
Definition: Balancer.php:1477
$count
$config
Configuration options governing flattening.
Definition: Balancer.php:672
adoptChildren(BalanceElement $elt)
Transfer all of the children of $elt to $this.
Definition: Balancer.php:447
A pseudo-element used as a marker in the list of active formatting elements.
Definition: Balancer.php:1371
__toString()
Return the contents of the open elements stack as a string for debugging.
Definition: Balancer.php:1356
inTableScope($tag)
Determine if the stack has $tag in table scope.
Definition: Balancer.php:836
static static static static $headingSet
Definition: Balancer.php:101
serialize()
Definition: ApiMessage.php:94
$attribs
Attributes for the element, in array form.
Definition: Balancer.php:326
static static static static static static static static static static static static static static static static $inListItemScopeSet
Definition: Balancer.php:224
insertElement(BalanceElement $elt)
Insert an element at the appropriate place and push it on to the open elements stack.
Definition: Balancer.php:777
inButtonScope($tag)
Determine if the stack has $tag in button scope.
Definition: Balancer.php:816
adoptionAgency($tag, $afe)
Run the "adoption agency algoritm" (AAA) for the given subject tag name.
Definition: Balancer.php:1151
$prevAFE
The previous active formatting element in the list, or null if this is the start of the list or if th...
Definition: Balancer.php:359
$head
The first (least recent) element in the list.
Definition: Balancer.php:1390
static static static static static static static static static static static static $tableBodyContextSet
Definition: Balancer.php:186
static static static static static static static static static static $tableCellSet
Definition: Balancer.php:175
inTableTextMode($token, $value, $attribs=null, $selfClose=false)
Definition: Balancer.php:3051
__construct(array $config)
Create a new BalanceStack with a single BalanceElement on it, representing the root node...
Definition: Balancer.php:683
static encodeAttribute($text)
Encode an attribute value for HTML output.
Definition: Sanitizer.php:1091
$matches
Allows to change the fields on the form that will be generated $name
Definition: hooks.txt:300