28 use Wikimedia\Assert\Assert;
29 use Wikimedia\Assert\ParameterAssertionException;
31 use \IteratorAggregate;
32 use \ReverseArrayIterator;
75 self::HTML_NAMESPACE => [
76 'html' =>
true,
'head' =>
true,
'body' =>
true,
'frameset' =>
true,
79 'xmp' =>
true,
'iframe' =>
true,
'noembed' =>
true,
80 'noscript' =>
true,
'script' =>
true,
86 self::HTML_NAMESPACE => [
87 'area' =>
true,
'base' =>
true,
'basefont' =>
true,
88 'bgsound' =>
true,
'br' =>
true,
'col' =>
true,
'command' =>
true,
89 'embed' =>
true,
'frame' =>
true,
'hr' =>
true,
'img' =>
true,
90 'input' =>
true,
'keygen' =>
true,
'link' =>
true,
'meta' =>
true,
91 'param' =>
true,
'source' =>
true,
'track' =>
true,
'wbr' =>
true
96 self::HTML_NAMESPACE => [
97 'pre' =>
true,
'textarea' =>
true,
'listing' =>
true,
102 self::HTML_NAMESPACE => [
103 'h1' =>
true,
'h2' =>
true,
'h3' =>
true,
104 'h4' =>
true,
'h5' =>
true,
'h6' =>
true
109 self::HTML_NAMESPACE => [
110 'address' =>
true,
'applet' =>
true,
'area' =>
true,
111 'article' =>
true,
'aside' =>
true,
'base' =>
true,
112 'basefont' =>
true,
'bgsound' =>
true,
'blockquote' =>
true,
113 'body' =>
true,
'br' =>
true,
'button' =>
true,
'caption' =>
true,
114 'center' =>
true,
'col' =>
true,
'colgroup' =>
true,
'dd' =>
true,
115 'details' =>
true,
'dir' =>
true,
'div' =>
true,
'dl' =>
true,
116 'dt' =>
true,
'embed' =>
true,
'fieldset' =>
true,
117 'figcaption' =>
true,
'figure' =>
true,
'footer' =>
true,
118 'form' =>
true,
'frame' =>
true,
'frameset' =>
true,
'h1' =>
true,
119 'h2' =>
true,
'h3' =>
true,
'h4' =>
true,
'h5' =>
true,
120 'h6' =>
true,
'head' =>
true,
'header' =>
true,
'hgroup' =>
true,
121 'hr' =>
true,
'html' =>
true,
'iframe' =>
true,
'img' =>
true,
122 'input' =>
true,
'li' =>
true,
'link' =>
true,
123 'listing' =>
true,
'main' =>
true,
'marquee' =>
true,
124 'menu' =>
true,
'meta' =>
true,
'nav' =>
true,
125 'noembed' =>
true,
'noframes' =>
true,
'noscript' =>
true,
126 'object' =>
true,
'ol' =>
true,
'p' =>
true,
'param' =>
true,
127 'plaintext' =>
true,
'pre' =>
true,
'script' =>
true,
128 'section' =>
true,
'select' =>
true,
'source' =>
true,
129 'style' =>
true,
'summary' =>
true,
'table' =>
true,
130 'tbody' =>
true,
'td' =>
true,
'template' =>
true,
131 'textarea' =>
true,
'tfoot' =>
true,
'th' =>
true,
'thead' =>
true,
132 'title' =>
true,
'tr' =>
true,
'track' =>
true,
'ul' =>
true,
133 'wbr' =>
true,
'xmp' =>
true
135 self::SVG_NAMESPACE => [
136 'foreignobject' =>
true,
'desc' =>
true,
'title' =>
true
138 self::MATHML_NAMESPACE => [
139 'mi' =>
true,
'mo' =>
true,
'mn' =>
true,
'ms' =>
true,
140 'mtext' =>
true,
'annotation-xml' =>
true
145 self::HTML_NAMESPACE => [
146 'address' =>
true,
'div' =>
true,
'p' =>
true
151 self::HTML_NAMESPACE => [
152 'table' =>
true,
'thead' =>
true,
'tbody' =>
true,
153 'tfoot' =>
true,
'tr' =>
true
158 self::HTML_NAMESPACE => [
159 'dd' =>
true,
'dt' =>
true,
'li' =>
true,
160 'menuitem' =>
true,
'optgroup' =>
true,
161 'option' =>
true,
'p' =>
true,
'rb' =>
true,
'rp' =>
true,
162 'rt' =>
true,
'rtc' =>
true
167 self::HTML_NAMESPACE => [
168 'caption' =>
true,
'colgroup' =>
true,
'dd' =>
true,
'dt' =>
true,
169 'li' =>
true,
'optgroup' =>
true,
'option' =>
true,
'p' =>
true,
170 'rb' =>
true,
'rp' =>
true,
'rt' =>
true,
'rtc' =>
true,
171 'tbody' =>
true,
'td' =>
true,
'tfoot' =>
true,
'th' =>
true,
172 'thead' =>
true,
'tr' =>
true
177 self::HTML_NAMESPACE => [
178 'td' =>
true,
'th' =>
true
182 self::HTML_NAMESPACE => [
183 'table' =>
true,
'template' =>
true,
'html' =>
true
188 self::HTML_NAMESPACE => [
189 'tbody' =>
true,
'tfoot' =>
true,
'thead' =>
true,
190 'template' =>
true,
'html' =>
true
195 self::HTML_NAMESPACE => [
196 'tr' =>
true,
'template' =>
true,
'html' =>
true
202 self::HTML_NAMESPACE => [
203 'button' =>
true,
'fieldset' =>
true,
'input' =>
true,
204 'keygen' =>
true,
'object' =>
true,
'output' =>
true,
205 'select' =>
true,
'textarea' =>
true,
'img' =>
true
210 self::HTML_NAMESPACE => [
211 'applet' =>
true,
'caption' =>
true,
'html' =>
true,
212 'marquee' =>
true,
'object' =>
true,
213 'table' =>
true,
'td' =>
true,
'template' =>
true,
216 self::SVG_NAMESPACE => [
217 'foreignobject' =>
true,
'desc' =>
true,
'title' =>
true
219 self::MATHML_NAMESPACE => [
220 'mi' =>
true,
'mo' =>
true,
'mn' =>
true,
'ms' =>
true,
221 'mtext' =>
true,
'annotation-xml' =>
true
227 if ( self::$inListItemScopeSet ===
null ) {
237 if ( self::$inButtonScopeSet ===
null ) {
245 self::HTML_NAMESPACE => [
246 'html' =>
true,
'table' =>
true,
'template' =>
true
251 self::HTML_NAMESPACE => [
252 'option' =>
true,
'optgroup' =>
true
257 self::MATHML_NAMESPACE => [
258 'mi' =>
true,
'mo' =>
true,
'mn' =>
true,
'ms' =>
true,
264 self::SVG_NAMESPACE => [
265 'foreignobject' =>
true,
273 self::HTML_NAMESPACE => [
274 'body' =>
true,
'blockquote' =>
true,
283 self::HTML_NAMESPACE => [
284 'a' =>
true,
'abbr' =>
true,
'acronym' =>
true,
'applet' =>
true,
285 'b' =>
true,
'basefont' =>
true,
'bdo' =>
true,
'big' =>
true,
286 'br' =>
true,
'button' =>
true,
'cite' =>
true,
'code' =>
true,
287 'dfn' =>
true,
'em' =>
true,
'font' =>
true,
'i' =>
true,
288 'iframe' =>
true,
'img' =>
true,
'input' =>
true,
'kbd' =>
true,
289 'label' =>
true,
'legend' =>
true,
'map' =>
true,
'object' =>
true,
290 'param' =>
true,
'q' =>
true,
'rb' =>
true,
'rbc' =>
true,
291 'rp' =>
true,
'rt' =>
true,
'rtc' =>
true,
'ruby' =>
true,
292 's' =>
true,
'samp' =>
true,
'select' =>
true,
'small' =>
true,
293 'span' =>
true,
'strike' =>
true,
'strong' =>
true,
'sub' =>
true,
294 'sup' =>
true,
'textarea' =>
true,
'tt' =>
true,
'u' =>
true,
380 $this->parent =
null;
381 $this->children = [];
389 Assert::precondition(
390 $this->parent !==
'flat',
"Can't removeChild after flattening $this"
393 $elt->parent === $this,
'elt',
'must have $this as a parent'
395 $idx = array_search( $elt, $this->children,
true );
396 Assert::parameter( $idx !==
false,
'$elt',
'must be a child of $this' );
398 array_splice( $this->children, $idx, 1 );
407 Assert::precondition(
408 $this->parent !==
'flat',
"Can't insertBefore after flattening."
410 $idx = array_search( $a, $this->children,
true );
411 Assert::parameter( $idx !==
false,
'$a',
'must be a child of $this' );
412 if ( is_string( $b ) ) {
413 array_splice( $this->children, $idx, 0, [ $b ] );
415 Assert::parameter( $b->parent !==
'flat',
'$b',
"Can't be flat" );
416 if ( $b->parent !==
null ) {
417 $b->parent->removeChild( $b );
419 array_splice( $this->children, $idx, 0, [ $b ] );
429 Assert::precondition(
430 $this->parent !==
'flat',
"Can't appendChild after flattening."
432 if ( is_string( $elt ) ) {
433 array_push( $this->children, $elt );
437 if ( $elt->parent !==
null ) {
438 $elt->parent->removeChild( $elt );
440 array_push( $this->children, $elt );
441 $elt->parent = $this;
449 Assert::precondition(
450 $elt->parent !==
'flat',
"Can't adoptChildren after flattening."
452 foreach ( $elt->children
as $child ) {
453 if ( !is_string( $child ) ) {
456 $child->parent =
null;
474 Assert::parameter( $this->parent !==
null,
'$this',
'must be a child' );
475 Assert::parameter( $this->parent !==
'flat',
'$this',
'already flat' );
476 $idx = array_search( $this, $this->parent->children,
true );
478 $idx !==
false,
'$this',
'must be a child of its parent'
480 $tidyCompat =
$config[
'tidyCompat'];
483 foreach ( $this->children
as $elt ) {
484 if ( !is_string( $elt ) ) {
485 $elt = $elt->flatten(
$config );
487 if ( $blank && preg_match(
'/[^\t\n\f\r ]/', $elt ) ) {
492 $this->localName =
'p';
493 } elseif ( $blank ) {
496 if ( !
count( $this->attribs ) &&
497 ( $this->localName ===
'tr' || $this->localName ===
'li' )
499 $this->attribs = [
'class' =>
"mw-empty-elt" ];
504 count( $this->children ) > 0 &&
505 substr( $this->children[0], 0, 1 ) ==
"\n"
511 array_unshift( $this->children,
"\n" );
513 $flat = $blank ?
'' :
"{$this}";
517 $this->parent->children[$idx] = $flat;
518 $this->parent =
'flat';
532 $encValue = Sanitizer::encodeAttribute(
$value );
533 $encAttribs .=
" $name=\"$encValue\"";
536 $out =
"<{$this->localName}{$encAttribs}>";
537 $len = strlen(
$out );
539 foreach ( $this->children
as $elt ) {
542 $out .=
"</{$this->localName}>";
544 $out =
"<{$this->localName}{$encAttribs} />";
546 count( $this->children ) === 0,
547 "Empty elements shouldn't have children."
563 public function isA( $set ) {
565 return $this === $set;
566 } elseif ( is_array( $set ) ) {
567 return isset( $set[$this->namespaceURI] ) &&
568 isset( $set[$this->namespaceURI][$this->localName] );
571 return $this->
isHtml() && $this->localName === $set;
582 && $this->localName === $tagName;
618 $this->localName ===
'annotation-xml' &&
619 isset( $this->attribs[
'encoding'] ) &&
620 ( strcasecmp( $this->attribs[
'encoding'],
'text/html' ) == 0 ||
621 strcasecmp( $this->attribs[
'encoding'],
'application/xhtml+xml' ) == 0 )
633 if ( $this->noahKey ===
null ) {
636 $this->noahKey =
serialize( [ $this->namespaceURI, $this->localName,
$attribs ] );
692 $this->currentNode = $this->elements[0];
704 foreach ( $this->elements[0]->children
as $elt ) {
705 $out .= is_string( $elt ) ? $elt :
706 $elt->flatten( $this->config );
731 $this->fosterParentMode &&
736 $this->config[
'tidyCompat'] && !$isComment &&
742 $this->currentNode->appendChild(
$value );
784 $this->currentNode->isHtmlNamed(
'mw:p-wrap' ) &&
791 $this->fosterParentMode &&
796 $this->currentNode->appendChild( $elt );
798 Assert::invariant( $elt->parent !==
null,
"$elt must be in tree" );
799 Assert::invariant( $elt->parent !==
'flat',
"$elt must not have been previous flattened" );
800 array_push( $this->elements, $elt );
801 $this->currentNode = $elt;
854 foreach ( $this
as $elt ) {
855 if ( $elt->isA( $tag ) ) {
873 foreach ( $this
as $elt ) {
874 if ( $elt->isA( $tag ) ) {
877 if ( $elt->isA( $set ) ) {
891 $endTagSet = $thorough ?
894 while ( $this->currentNode ) {
895 if ( $butnot !==
null && $this->currentNode->isHtmlNamed( $butnot ) ) {
898 if ( !$this->currentNode->isA( $endTagSet ) ) {
911 return ( $fragmentContext &&
count( $this->elements ) === 1 ) ?
931 return $this->elements[ $idx ];
940 Assert::precondition(
941 $this->elements[$idx]->parent !==
'flat',
942 'Replaced element should not have already been flattened.'
944 Assert::precondition(
945 $elt->parent !==
'flat',
946 'New element should not have already been flattened.'
948 $this->elements[$idx] = $elt;
949 if ( $idx ===
count( $this->elements ) - 1 ) {
950 $this->currentNode = $elt;
961 for ( $i =
count( $this->elements ) - 1; $i >= 0; $i-- ) {
962 if ( $this->elements[$i]->isA( $tag ) ) {
974 return count( $this->elements );
982 $elt = array_pop( $this->elements );
983 if (
count( $this->elements ) ) {
984 $this->currentNode = $this->elements[
count( $this->elements ) - 1 ];
986 $this->currentNode =
null;
988 if ( !$elt->isHtmlNamed(
'mw:p-wrap' ) ) {
989 $elt->flatten( $this->config );
999 for ( $length =
count( $this->elements ); $length > $idx; $length-- ) {
1011 while ( $this->currentNode ) {
1012 if ( $this->currentNode->isA( $tag ) ) {
1027 for ( $length =
count( $this->elements ); $length > 1; $length-- ) {
1028 if ( $this->currentNode->isA( $set ) ) {
1043 $elt->parent !==
'flat',
1045 '$elt should not already have been flattened.'
1048 $elt->parent->parent !==
'flat',
1050 'The parent of $elt should not already have been flattened.'
1052 $idx = array_search( $elt, $this->elements,
true );
1053 Assert::parameter( $idx !==
false,
'$elt',
'must be in stack' );
1054 array_splice( $this->elements, $idx, 1 );
1055 if ( $idx ===
count( $this->elements ) ) {
1056 $this->currentNode = $this->elements[$idx - 1];
1063 $elt->
flatten( $this->config );
1065 Assert::postcondition(
1066 array_search( $elt, $this->elements,
true ) ===
false,
1067 '$elt should no longer be in open elements stack'
1078 Assert::parameter( $idx !==
false,
'$a',
'must be in stack' );
1079 if ( $idx ===
count( $this->elements ) - 1 ) {
1080 array_push( $this->elements, $b );
1081 $this->currentNode = $b;
1083 array_splice( $this->elements, $idx + 1, 0, [ $b ] );
1097 $lastTable = $this->
indexOf(
'table' );
1098 $lastTemplate = $this->
indexOf(
'template' );
1102 if ( $lastTemplate >= 0 && ( $lastTable < 0 || $lastTemplate > $lastTable ) ) {
1103 $parent = $this->elements[$lastTemplate];
1104 } elseif ( $lastTable >= 0 ) {
1105 $parent = $this->elements[$lastTable]->parent;
1108 $parent !==
null,
"All tables should have parents"
1110 $before = $this->elements[$lastTable];
1112 $parent = $this->elements[0];
1115 if ( $this->config[
'tidyCompat'] ) {
1116 if ( is_string( $elt ) ) {
1125 if ( $elt->isHtmlNamed(
'mw:p-wrap' ) ) {
1127 array_search( $before, $parent->children,
true ) :
1128 count( $parent->children );
1129 $after = $idx > 0 ? $parent->children[$idx - 1] :
'';
1141 $parent->insertBefore( $before, $elt );
1143 $parent->appendChild( $elt );
1164 $this->currentNode->isHtmlNamed( $tag ) &&
1165 !$afe->isInList( $this->currentNode )
1173 for ( $outer = 0; $outer < 8; $outer++ ) {
1179 $fmtElt = $afe->findElementByTag( $tag );
1190 $index = $this->
indexOf( $fmtElt );
1192 $afe->remove( $fmtElt );
1200 if ( !$this->
inScope( $fmtElt ) ) {
1208 $furthestBlock =
null;
1209 $furthestBlockIndex = -1;
1210 $stackLength = $this->
length();
1211 for ( $i = $index + 1; $i < $stackLength; $i++ ) {
1213 $furthestBlock = $this->
node( $i );
1214 $furthestBlockIndex = $i;
1225 if ( !$furthestBlock ) {
1226 $this->
popTag( $fmtElt );
1227 $afe->remove( $fmtElt );
1233 $ancestor = $this->
node( $index - 1 );
1239 $BOOKMARK =
new BalanceElement(
'[bookmark]',
'[bookmark]', [] );
1240 $afe->insertAfter( $fmtElt, $BOOKMARK );
1243 $node = $furthestBlock;
1244 $lastNode = $furthestBlock;
1245 $nodeIndex = $furthestBlockIndex;
1249 for ( $inner = 1;
true; $inner++ ) {
1256 $node = $this->
node( --$nodeIndex );
1260 if ( $node === $fmtElt )
break;
1265 $isAFE = $afe->isInList( $node );
1266 if ( $inner > 3 && $isAFE ) {
1267 $afe->remove( $node );
1290 $node->namespaceURI, $node->localName, $node->attribs );
1291 $afe->replace( $node, $newElt );
1292 $this->
replaceAt( $nodeIndex, $newElt );
1298 if ( $lastNode === $furthestBlock ) {
1299 $afe->remove( $BOOKMARK );
1300 $afe->insertAfter( $newElt, $BOOKMARK );
1305 $node->appendChild( $lastNode );
1316 $this->fosterParentMode &&
1324 $ancestor->appendChild( $lastNode );
1331 $fmtElt->namespaceURI, $fmtElt->localName, $fmtElt->attribs );
1335 $newElt2->adoptChildren( $furthestBlock );
1338 $furthestBlock->appendChild( $newElt2 );
1344 $afe->remove( $fmtElt );
1345 $afe->replace( $BOOKMARK, $newElt2 );
1365 foreach ( $this->elements
as $elt ) {
1366 array_push( $r, $elt->localName );
1368 return implode( $r,
' ' );
1421 for ( $node = $this->head; $node; $node = $next ) {
1422 $next = $node->nextAFE;
1423 $node->prevAFE = $node->nextAFE = $node->nextNoah =
null;
1425 $this->head = $this->tail = $this->noahTableStack =
null;
1430 if ( $this->tail ) {
1431 $this->tail->nextAFE = $elt;
1437 $this->noahTableStack[] = [];
1447 if ( $elt->prevAFE !==
null || $this->head === $elt ) {
1448 throw new ParameterAssertionException(
'$elt',
1449 'Cannot insert a node into the AFE list twice' );
1456 $table =& $this->noahTableStack[
count( $this->noahTableStack ) - 1 ];
1457 if ( !isset( $table[$noahKey] ) ) {
1458 $table[$noahKey] = $elt;
1462 while (
$tail->nextNoah ) {
1466 if ( $count >= 3 ) {
1467 $this->
remove(
$head );
1469 $tail->nextNoah = $elt;
1472 if ( $this->tail ) {
1473 $this->tail->nextAFE = $elt;
1490 $prev =
$tail->prevAFE;
1491 $tail->prevAFE =
null;
1493 $prev->nextAFE =
null;
1495 $tail->nextNoah =
null;
1500 $prev =
$tail->prevAFE;
1502 $prev->nextAFE =
null;
1505 array_pop( $this->noahTableStack );
1508 $this->noahTableStack[0] = [];
1514 $this->tail =
$tail;
1527 if ( $elt->localName === $tag ) {
1530 $elt = $elt->prevAFE;
1541 return $this->head === $elt || $elt->prevAFE;
1551 if ( $this->head !== $elt && !$elt->prevAFE ) {
1552 throw new ParameterAssertionException(
'$elt',
1553 "Attempted to remove an element which is not in the AFE list" );
1556 if ( $this->head === $elt ) {
1557 $this->head = $elt->nextAFE;
1559 if ( $this->tail === $elt ) {
1560 $this->tail = $elt->prevAFE;
1563 if ( $elt->prevAFE ) {
1564 $elt->prevAFE->nextAFE = $elt->nextAFE;
1567 if ( $elt->nextAFE ) {
1568 $elt->nextAFE->prevAFE = $elt->prevAFE;
1571 $elt->prevAFE = $elt->nextAFE =
null;
1578 $table =& $this->noahTableStack[
count( $this->noahTableStack ) - 1 ];
1579 if ( !isset( $table[$noahKey] ) ) {
1580 $table[$noahKey] = $elt;
1582 $tail = $table[$noahKey];
1583 while (
$tail->nextNoah ) {
1586 $tail->nextNoah = $elt;
1591 $table =& $this->noahTableStack[
count( $this->noahTableStack ) - 1 ];
1593 $noahElt = $table[$key];
1594 if ( $noahElt === $elt ) {
1595 if ( $noahElt->nextNoah ) {
1596 $table[$key] = $noahElt->nextNoah;
1597 $noahElt->nextNoah =
null;
1599 unset( $table[$key] );
1603 $prevNoahElt = $noahElt;
1604 $noahElt = $prevNoahElt->nextNoah;
1605 if ( $noahElt === $elt ) {
1607 $prevNoahElt->nextNoah = $elt->nextNoah;
1608 $elt->nextNoah =
null;
1611 }
while ( $noahElt );
1622 if ( $this->head !== $a && !$a->prevAFE ) {
1623 throw new ParameterAssertionException(
'$a',
1624 "Attempted to replace an element which is not in the AFE list" );
1627 if ( $this->head === $a ) {
1630 if ( $this->tail === $a ) {
1634 if ( $a->prevAFE ) {
1635 $a->prevAFE->nextAFE = $b;
1638 if ( $a->nextAFE ) {
1639 $a->nextAFE->prevAFE = $b;
1641 $b->prevAFE = $a->prevAFE;
1642 $b->nextAFE = $a->nextAFE;
1643 $a->nextAFE = $a->prevAFE =
null;
1656 if ( $this->head !== $a && !$a->prevAFE ) {
1657 throw new ParameterAssertionException(
'$a',
1658 "Attempted to insert after an element which is not in the AFE list" );
1660 if ( $this->tail === $a ) {
1663 if ( $a->nextAFE ) {
1664 $a->nextAFE->prevAFE = $b;
1666 $b->nextAFE = $a->nextAFE;
1691 if ( $stack->indexOf( $entry ) >= 0 ) {
1698 while ( $entry->prevAFE ) {
1699 $entry = $entry->prevAFE;
1700 if ( $entry instanceof
BalanceMarker || $stack->indexOf( $entry ) >= 0 ) {
1711 $entry = $entry->nextAFE;
1714 $newElement = $stack->insertHTMLElement(
1717 $this->
replace( $entry, $newElement );
1718 $entry = $newElement->nextAFE;
1728 for ( $node = $this->head; $node; $prev = $node, $node = $node->nextAFE ) {
1733 $s .= $node->localName .
'#' . substr( md5( spl_object_hash( $node ) ), 0, 8 );
1734 if ( $node->nextNoah ) {
1735 $s .=
" (noah sibling: {$node->nextNoah->localName}#" .
1736 substr( md5( spl_object_hash( $node->nextNoah ) ), 0, 8 ) .
1739 if ( $node->nextAFE && $node->nextAFE->prevAFE !== $node ) {
1740 $s .=
" (reverse link is wrong!)";
1744 if ( $prev !== $this->tail ) {
1745 $s .=
"(tail pointer is wrong!)\n";
1834 ( # 1. Comment match detector
1835 > | -> | # Invalid short close
1836 ( # 2. Comment contents
1846 ( # 3. Comment close
1847 --> | # Normal close
1848 --!> | # Comment end bang
1849 ( # 4. Indicate matches requiring EOF
1850 --! | # EOF in comment end bang state
1851 -- | # EOF in comment end state
1852 - | # EOF in comment end dash state
1853 (?#nothing) # EOF in comment state
1857 ([^<]*) \z # 5. Non-tag text after the comment
1889 'allowedHtmlElements' =>
null,
1890 'tidyCompat' =>
false,
1891 'allowComments' =>
true,
1893 $this->allowedHtmlElements =
$config[
'allowedHtmlElements'];
1894 $this->strict =
$config[
'strict'];
1895 $this->allowComments =
$config[
'allowComments'];
1896 if ( $this->allowedHtmlElements !==
null ) {
1898 $bad = array_uintersect_assoc(
1899 $this->allowedHtmlElements,
1901 function ( $a, $b ) {
1907 if (
count( $bad ) > 0 ) {
1908 $badstr = implode( array_keys( $bad ),
',' );
1909 throw new ParameterAssertionException(
1911 'Balance attempted with sanitization including ' .
1912 "unsupported elements: {$badstr}"
1931 $this->parseMode =
'inBodyMode';
1938 $this->textIntegrationMode =
1939 $this->ignoreLinefeed =
1941 $this->inRAWTEXT =
false;
1945 $this->fragmentContext =
1948 $this->formElementPointer =
null;
1949 for (
$e = $this->fragmentContext;
$e !=
null;
$e =
$e->parent ) {
1950 if (
$e->isHtmlNamed(
'form' ) ) {
1951 $this->formElementPointer =
$e;
1957 $x = $this->bitsIterator->current();
1958 $this->bitsIterator->next();
1959 $this->
insertToken(
'text', str_replace(
'>',
'>', $x ) );
1961 while ( $this->bitsIterator->valid() ) {
1965 $result = $this->stack->getOutput();
1967 $this->bitsIterator =
null;
1969 $this->stack =
null;
1970 $this->fragmentContext =
null;
1971 $this->formElementPointer =
null;
1981 if ( $token ===
'tag' || $token ===
'endtag' ) {
1987 "Unsupported $token <$value> found."
1991 } elseif ( $token ===
'text' &&
$value ===
'' ) {
1996 if ( $this->ignoreLinefeed ) {
1997 $this->ignoreLinefeed =
false;
1998 if ( $token ===
'text' ) {
1999 if (
$value[0] ===
"\n" ) {
2009 $adjusted = $this->stack->adjustedCurrentNode( $this->fragmentContext );
2014 $this->stack->length() === 0 ||
2015 $adjusted->isHtml() ||
2019 } elseif ( $adjusted->isMathmlTextIntegrationPoint() ) {
2020 if ( $token ===
'text' ) {
2030 $adjusted->localName ===
'annotation-xml' &&
2031 $token ===
'tag' &&
$value ===
'svg'
2035 $adjusted->isHtmlIntegrationPoint() &&
2036 ( $token ===
'tag' || $token ===
'text' )
2049 if ( $token ===
'text' ) {
2050 $this->stack->insertText(
$value );
2052 } elseif ( $token ===
'comment' ) {
2053 $this->stack->insertComment(
$value );
2055 } elseif ( $token ===
'tag' ) {
2109 if ( $this->fragmentContext ) {
2113 $this->stack->pop();
2114 $node = $this->stack->currentNode;
2116 $node->isMathmlTextIntegrationPoint() ||
2117 $node->isHtmlIntegrationPoint() ||
2126 $adjusted = ( $this->fragmentContext && $this->stack->length() === 1 ) ?
2127 $this->fragmentContext : $this->stack->currentNode;
2128 $this->stack->insertForeignElement(
2132 $this->stack->pop();
2135 } elseif ( $token ===
'endtag' ) {
2137 foreach ( $this->stack
as $i => $node ) {
2138 if ( $node->isHtml() && !$first ) {
2142 } elseif ( $i === 0 ) {
2144 } elseif ( $node->localName ===
$value ) {
2145 $this->stack->popTag( $node );
2158 $x = $this->bitsIterator->current();
2159 $this->bitsIterator->next();
2164 $this->allowComments &&
2165 !( $this->inRCDATA || $this->inRAWTEXT ) &&
2166 preg_match( self::VALID_COMMENT_REGEX, $x, $regs, PREG_OFFSET_CAPTURE ) &&
2168 ( $regs[4][1] < 0 || !$this->bitsIterator->valid() )
2170 $contents = $regs[2][0];
2171 $rest = $regs[5][0];
2173 $this->
insertToken(
'text', str_replace(
'>',
'>', $rest ) );
2181 if ( preg_match( Sanitizer::ELEMENT_BITS_REGEX, $x, $regs ) ) {
2182 list( , $slash,
$t, $attribStr, $brace, $rest ) = $regs;
2183 $t = strtolower(
$t );
2184 if ( $this->strict ) {
2188 '/^( [:_A-Z0-9][-.:_A-Z0-9]*="[^"]*")*[ ]*$/i', $attribStr
2190 "Bad attribute string found"
2195 !$this->strict,
"< found which does not start a valid tag"
2197 $slash =
$t = $attribStr = $brace = $rest =
null;
2200 if ( $this->inRCDATA ) {
2201 if ( $slash &&
$t === $this->inRCDATA ) {
2202 $this->inRCDATA =
false;
2208 if ( $this->inRAWTEXT ) {
2209 if ( $slash &&
$t === $this->inRAWTEXT ) {
2210 $this->inRAWTEXT =
false;
2216 $sanitize = $this->allowedHtmlElements !==
null;
2218 $goodTag =
$t && isset( $this->allowedHtmlElements[
$t] );
2221 if ( is_callable( $this->processingCallback ) ) {
2222 call_user_func_array( $this->processingCallback, [ &$attribStr, $this->processingArgs ] );
2225 $goodTag = Sanitizer::validateTag( $attribStr,
$t );
2230 $attribs = Sanitizer::decodeTagAttributes( $attribStr );
2233 $attribs = Sanitizer::decodeTagAttributes( $attribStr );
2236 $slash ?
'endtag' :
'tag',
$t,
$attribs, $brace ===
'/>'
2240 $rest = str_replace(
'>',
'>', $rest );
2241 $this->
insertToken(
'text', str_replace(
'>',
'>', $rest ) );
2242 } elseif ( $this->inRAWTEXT ) {
2246 $this->
insertToken(
'text',
'<' . str_replace(
'>',
'>', $x ) );
2252 substr( $mode, -4 ) ===
'Mode',
'$mode',
'should end in Mode'
2255 $this->parseMode = $mode;
2266 foreach ( $this->stack
as $i => $node ) {
2269 if ( $this->fragmentContext ) {
2273 if ( $node->isHtml() ) {
2274 switch ( $node->localName ) {
2276 $stackLength = $this->stack->length();
2277 for ( $j = $i + 1; $j < $stackLength - 1; $j++ ) {
2278 $ancestor = $this->stack->node( $stackLength - $j - 1 );
2279 if ( $ancestor->isHtmlNamed(
'template' ) ) {
2282 if ( $ancestor->isHtmlNamed(
'table' ) ) {
2308 array_slice( $this->templateInsertionModes, -1 )[0]
2344 $this->stack->popTo( 1 );
2349 $this->inRAWTEXT =
$value;
2350 $this->originalInsertionMode = $this->
switchMode(
'inTextMode' );
2355 if ( $token ===
'text' ) {
2356 $this->stack->insertText(
$value );
2358 } elseif ( $token ===
'eof' ) {
2359 $this->stack->pop();
2361 $this->originalInsertionMode, $token,
$value,
$attribs, $selfClose
2363 } elseif ( $token ===
'endtag' ) {
2364 $this->stack->pop();
2365 $this->
switchMode( $this->originalInsertionMode );
2372 if ( $token ===
'text' ) {
2373 if ( preg_match(
'/^[\x09\x0A\x0C\x0D\x20]+/',
$value,
$matches ) ) {
2374 $this->stack->insertText(
$matches[0] );
2377 if ( strlen(
$value ) === 0 ) {
2381 } elseif ( $token ===
'tag' ) {
2392 $this->stack->pop();
2402 $this->afe->insertMarker();
2409 } elseif ( $token ===
'endtag' ) {
2417 if ( $this->stack->indexOf(
$value ) < 0 ) {
2420 $this->stack->generateImpliedEndTags(
null,
true );
2421 $this->stack->popTag(
$value );
2422 $this->afe->clearToMarker();
2423 array_pop( $this->templateInsertionModes );
2430 } elseif ( $token ===
'comment' ) {
2431 $this->stack->insertComment(
$value );
2442 if ( $token ===
'text' ) {
2443 $this->afe->reconstruct( $this->stack );
2444 $this->stack->insertText(
$value );
2446 } elseif ( $token ===
'eof' ) {
2447 if ( !empty( $this->templateInsertionModes ) ) {
2452 } elseif ( $token ===
'tag' ) {
2492 if ( $this->stack->inButtonScope(
'p' ) ) {
2499 if ( $this->stack->inButtonScope(
"p" ) ) {
2502 if ( $this->stack->currentNode->isHtmlNamed(
'menuitem' ) ) {
2503 $this->stack->pop();
2514 if ( $this->stack->inButtonScope(
'p' ) ) {
2518 $this->stack->pop();
2525 if ( $this->stack->inButtonScope(
'p' ) ) {
2529 $this->ignoreLinefeed =
true;
2535 $this->formElementPointer &&
2536 $this->stack->indexOf(
'template' ) < 0
2540 if ( $this->stack->inButtonScope(
"p" ) ) {
2544 if ( $this->stack->indexOf(
'template' ) < 0 ) {
2545 $this->formElementPointer = $elt;
2551 foreach ( $this->stack
as $node ) {
2552 if ( $node->isHtmlNamed(
'li' ) ) {
2563 if ( $this->stack->inButtonScope(
'p' ) ) {
2572 foreach ( $this->stack
as $node ) {
2573 if ( $node->isHtmlNamed(
'dd' ) ) {
2577 if ( $node->isHtmlNamed(
'dt' ) ) {
2588 if ( $this->stack->inButtonScope(
'p' ) ) {
2597 if ( $this->stack->inScope(
'button' ) ) {
2601 $this->afe->reconstruct( $this->stack );
2606 $activeElement = $this->afe->findElementByTag(
'a' );
2607 if ( $activeElement ) {
2609 if ( $this->afe->isInList( $activeElement ) ) {
2610 $this->afe->remove( $activeElement );
2614 $this->stack->removeElement( $activeElement,
false );
2630 $this->afe->reconstruct( $this->stack );
2631 $this->afe->push( $this->stack->insertHTMLElement(
$value,
$attribs ) );
2635 $this->afe->reconstruct( $this->stack );
2636 if ( $this->stack->inScope(
'nobr' ) ) {
2638 $this->afe->reconstruct( $this->stack );
2640 $this->afe->push( $this->stack->insertHTMLElement(
$value,
$attribs ) );
2646 $this->afe->reconstruct( $this->stack );
2648 $this->afe->insertMarker();
2655 if ( $this->stack->inButtonScope(
'p' ) ) {
2669 $this->afe->reconstruct( $this->stack );
2671 $this->stack->pop();
2676 $this->afe->reconstruct( $this->stack );
2678 $this->stack->pop();
2687 $this->stack->pop();
2691 if ( $this->stack->inButtonScope(
'p' ) ) {
2694 if ( $this->stack->currentNode->isHtmlNamed(
'menuitem' ) ) {
2695 $this->stack->pop();
2698 $this->stack->pop();
2707 $this->ignoreLinefeed =
true;
2708 $this->inRCDATA =
$value;
2718 $this->afe->reconstruct( $this->stack );
2720 switch ( $this->parseMode ) {
2722 case 'inCaptionMode':
2723 case 'inTableBodyMode':
2735 if ( $this->stack->currentNode->isHtmlNamed(
'option' ) ) {
2738 $this->afe->reconstruct( $this->stack );
2743 if ( $this->stack->currentNode->isHtmlNamed(
'menuitem' ) ) {
2744 $this->stack->pop();
2746 $this->afe->reconstruct( $this->stack );
2752 if ( $this->stack->inScope(
'ruby' ) ) {
2753 $this->stack->generateImpliedEndTags();
2760 if ( $this->stack->inScope(
'ruby' ) ) {
2761 $this->stack->generateImpliedEndTags(
'rtc' );
2767 $this->afe->reconstruct( $this->stack );
2772 $this->stack->insertForeignElement(
2777 $this->stack->pop();
2782 $this->afe->reconstruct( $this->stack );
2787 $this->stack->insertForeignElement(
2792 $this->stack->pop();
2812 $this->afe->reconstruct( $this->stack );
2815 } elseif ( $token ===
'endtag' ) {
2849 if ( !$this->stack->inScope(
$value ) ) {
2852 $this->stack->generateImpliedEndTags();
2853 $this->stack->popTag(
$value );
2857 if ( $this->stack->indexOf(
'template' ) < 0 ) {
2859 $this->formElementPointer =
null;
2860 if ( !$openform || !$this->stack->inScope( $openform ) ) {
2863 $this->stack->generateImpliedEndTags();
2866 $flatten = ( $this->stack->currentNode === $openform );
2867 $this->stack->removeElement( $openform, $flatten );
2869 if ( !$this->stack->inScope(
'form' ) ) {
2872 $this->stack->generateImpliedEndTags();
2873 $this->stack->popTag(
'form' );
2878 if ( !$this->stack->inButtonScope(
'p' ) ) {
2882 $this->stack->generateImpliedEndTags(
$value );
2883 $this->stack->popTag(
$value );
2887 if ( !$this->stack->inListItemScope(
$value ) ) {
2890 $this->stack->generateImpliedEndTags(
$value );
2891 $this->stack->popTag(
$value );
2896 if ( !$this->stack->inScope(
$value ) ) {
2899 $this->stack->generateImpliedEndTags(
$value );
2900 $this->stack->popTag(
$value );
2912 $this->stack->generateImpliedEndTags();
2934 if ( $this->stack->adoptionAgency(
$value, $this->afe ) ) {
2942 if ( !$this->stack->inScope(
$value ) ) {
2945 $this->stack->generateImpliedEndTags();
2946 $this->stack->popTag(
$value );
2947 $this->afe->clearToMarker();
2956 foreach ( $this->stack
as $i => $node ) {
2957 if ( $node->isHtmlNamed(
$value ) ) {
2958 $this->stack->generateImpliedEndTags(
$value );
2959 $this->stack->popTo( $i );
2966 } elseif ( $token ===
'comment' ) {
2967 $this->stack->insertComment(
$value );
2970 Assert::invariant(
false,
"Bad token type: $token" );
2975 if ( $token ===
'text' ) {
2976 if ( $this->textIntegrationMode ) {
2979 $this->pendingTableText =
'';
2985 } elseif ( $token ===
'eof' ) {
2988 } elseif ( $token ===
'tag' ) {
2991 $this->afe->insertMarker();
3016 if ( !$this->stack->inTableScope(
$value ) ) {
3028 if ( !isset(
$attribs[
'type'] ) || strcasecmp(
$attribs[
'type'],
'hidden' ) !== 0 ) {
3032 $this->stack->pop();
3037 $this->formElementPointer ||
3038 $this->stack->indexOf(
'template' ) >= 0
3042 $this->formElementPointer =
3044 $this->stack->popTag( $this->formElementPointer );
3048 } elseif ( $token ===
'endtag' ) {
3051 if ( !$this->stack->inTableScope(
$value ) ) {
3054 $this->stack->popTag(
$value );
3073 } elseif ( $token ===
'comment' ) {
3074 $this->stack->insertComment(
$value );
3078 $this->stack->fosterParentMode =
true;
3080 $this->stack->fosterParentMode =
false;
3085 if ( $token ===
'text' ) {
3086 $this->pendingTableText .=
$value;
3091 $this->pendingTableText =
'';
3092 if ( preg_match(
'/[^\x09\x0A\x0C\x0D\x20]/', $text ) ) {
3094 $this->stack->fosterParentMode =
true;
3096 $this->stack->fosterParentMode =
false;
3099 $this->stack->insertText( $text );
3102 $this->originalInsertionMode, $token,
$value,
$attribs, $selfClose
3108 if ( !$this->stack->inTableScope(
'caption' ) ) {
3111 $this->stack->generateImpliedEndTags();
3112 $this->stack->popTag(
'caption' );
3113 $this->afe->clearToMarker();
3119 if ( $token ===
'tag' ) {
3136 } elseif ( $token ===
'endtag' ) {
3166 if ( $token ===
'text' ) {
3167 if ( preg_match(
'/^[\x09\x0A\x0C\x0D\x20]+/',
$value,
$matches ) ) {
3168 $this->stack->insertText(
$matches[0] );
3171 if ( strlen(
$value ) === 0 ) {
3175 } elseif ( $token ===
'tag' ) {
3180 $this->stack->pop();
3186 } elseif ( $token ===
'endtag' ) {
3189 if ( !$this->stack->currentNode->isHtmlNamed(
'colgroup' ) ) {
3192 $this->stack->pop();
3201 } elseif ( $token ===
'eof' ) {
3203 } elseif ( $token ===
'comment' ) {
3204 $this->stack->insertComment(
$value );
3209 if ( !$this->stack->currentNode->isHtmlNamed(
'colgroup' ) ) {
3219 $this->stack->inTableScope(
'tbody' ) ||
3220 $this->stack->inTableScope(
'thead' ) ||
3221 $this->stack->inTableScope(
'tfoot' )
3226 $this->stack->pop();
3231 if ( $token ===
'tag' ) {
3254 } elseif ( $token ===
'endtag' ) {
3264 if ( $this->stack->inTableScope(
$value ) ) {
3285 if ( !$this->stack->inTableScope(
'tr' ) ) {
3289 $this->stack->pop();
3294 if ( $token ===
'tag' ) {
3301 $this->afe->insertMarker();
3315 } elseif ( $token ===
'endtag' ) {
3329 $this->stack->inTableScope(
$value ) &&
3351 if ( $this->stack->inTableScope(
'td' ) ) {
3354 } elseif ( $this->stack->inTableScope(
'th' ) ) {
3362 if ( $token ===
'tag' ) {
3378 } elseif ( $token ===
'endtag' ) {
3382 if ( $this->stack->inTableScope(
$value ) ) {
3383 $this->stack->generateImpliedEndTags();
3384 $this->stack->popTag(
$value );
3385 $this->afe->clearToMarker();
3401 if ( $this->stack->inTableScope(
$value ) ) {
3402 $this->stack->generateImpliedEndTags();
3404 $this->afe->clearToMarker();
3416 if ( $token ===
'text' ) {
3417 $this->stack->insertText(
$value );
3419 } elseif ( $token ===
'eof' ) {
3421 } elseif ( $token ===
'tag' ) {
3425 if ( $this->stack->currentNode->isHtmlNamed(
'option' ) ) {
3426 $this->stack->pop();
3431 if ( $this->stack->currentNode->isHtmlNamed(
'option' ) ) {
3432 $this->stack->pop();
3434 if ( $this->stack->currentNode->isHtmlNamed(
'optgroup' ) ) {
3435 $this->stack->pop();
3445 if ( !$this->stack->inSelectScope(
'select' ) ) {
3454 } elseif ( $token ===
'endtag' ) {
3458 $this->stack->currentNode->isHtmlNamed(
'option' ) &&
3459 $this->stack->length() >= 2 &&
3460 $this->stack->node( $this->stack->length() - 2 )->isHtmlNamed(
'optgroup' )
3462 $this->stack->pop();
3464 if ( $this->stack->currentNode->isHtmlNamed(
'optgroup' ) ) {
3465 $this->stack->pop();
3469 if ( $this->stack->currentNode->isHtmlNamed(
'option' ) ) {
3470 $this->stack->pop();
3474 if ( !$this->stack->inSelectScope(
$value ) ) {
3477 $this->stack->popTag(
$value );
3483 } elseif ( $token ===
'comment' ) {
3484 $this->stack->insertComment(
$value );
3501 if ( $token ===
'tag' ) {
3504 } elseif ( $token ===
'endtag' ) {
3505 if ( $this->stack->inTableScope(
$value ) ) {
3517 if ( $token ===
'text' || $token ===
'comment' ) {
3519 } elseif ( $token ===
'eof' ) {
3520 if ( $this->stack->indexOf(
'template' ) < 0 ) {
3523 $this->stack->popTag(
'template' );
3524 $this->afe->clearToMarker();
3525 array_pop( $this->templateInsertionModes );
3530 } elseif ( $token ===
'tag' ) {
3572 } elseif ( $token ===
'endtag' ) {
3579 Assert::invariant(
false,
"Bad token type: $token" );