28 use Wikimedia\Assert\Assert;
29 use Wikimedia\Assert\ParameterAssertionException;
31 use \IteratorAggregate;
32 use \ReverseArrayIterator;
75 self::HTML_NAMESPACE => [
76 'html' =>
true,
'head' =>
true,
'body' =>
true,
'frameset' =>
true,
79 'xmp' =>
true,
'iframe' =>
true,
'noembed' =>
true,
80 'noscript' =>
true,
'script' =>
true,
86 self::HTML_NAMESPACE => [
87 'area' =>
true,
'base' =>
true,
'basefont' =>
true,
88 'bgsound' =>
true,
'br' =>
true,
'col' =>
true,
'command' =>
true,
89 'embed' =>
true,
'frame' =>
true,
'hr' =>
true,
'img' =>
true,
90 'input' =>
true,
'keygen' =>
true,
'link' =>
true,
'meta' =>
true,
91 'param' =>
true,
'source' =>
true,
'track' =>
true,
'wbr' =>
true
96 self::HTML_NAMESPACE => [
97 'pre' =>
true,
'textarea' =>
true,
'listing' =>
true,
102 self::HTML_NAMESPACE => [
103 'h1' =>
true,
'h2' =>
true,
'h3' =>
true,
104 'h4' =>
true,
'h5' =>
true,
'h6' =>
true
109 self::HTML_NAMESPACE => [
110 'address' =>
true,
'applet' =>
true,
'area' =>
true,
111 'article' =>
true,
'aside' =>
true,
'base' =>
true,
112 'basefont' =>
true,
'bgsound' =>
true,
'blockquote' =>
true,
113 'body' =>
true,
'br' =>
true,
'button' =>
true,
'caption' =>
true,
114 'center' =>
true,
'col' =>
true,
'colgroup' =>
true,
'dd' =>
true,
115 'details' =>
true,
'dir' =>
true,
'div' =>
true,
'dl' =>
true,
116 'dt' =>
true,
'embed' =>
true,
'fieldset' =>
true,
117 'figcaption' =>
true,
'figure' =>
true,
'footer' =>
true,
118 'form' =>
true,
'frame' =>
true,
'frameset' =>
true,
'h1' =>
true,
119 'h2' =>
true,
'h3' =>
true,
'h4' =>
true,
'h5' =>
true,
120 'h6' =>
true,
'head' =>
true,
'header' =>
true,
'hgroup' =>
true,
121 'hr' =>
true,
'html' =>
true,
'iframe' =>
true,
'img' =>
true,
122 'input' =>
true,
'li' =>
true,
'link' =>
true,
123 'listing' =>
true,
'main' =>
true,
'marquee' =>
true,
124 'menu' =>
true,
'meta' =>
true,
'nav' =>
true,
125 'noembed' =>
true,
'noframes' =>
true,
'noscript' =>
true,
126 'object' =>
true,
'ol' =>
true,
'p' =>
true,
'param' =>
true,
127 'plaintext' =>
true,
'pre' =>
true,
'script' =>
true,
128 'section' =>
true,
'select' =>
true,
'source' =>
true,
129 'style' =>
true,
'summary' =>
true,
'table' =>
true,
130 'tbody' =>
true,
'td' =>
true,
'template' =>
true,
131 'textarea' =>
true,
'tfoot' =>
true,
'th' =>
true,
'thead' =>
true,
132 'title' =>
true,
'tr' =>
true,
'track' =>
true,
'ul' =>
true,
133 'wbr' =>
true,
'xmp' =>
true
135 self::SVG_NAMESPACE => [
136 'foreignobject' =>
true,
'desc' =>
true,
'title' =>
true
138 self::MATHML_NAMESPACE => [
139 'mi' =>
true,
'mo' =>
true,
'mn' =>
true,
'ms' =>
true,
140 'mtext' =>
true,
'annotation-xml' =>
true
145 self::HTML_NAMESPACE => [
146 'address' =>
true,
'div' =>
true,
'p' =>
true
151 self::HTML_NAMESPACE => [
152 'table' =>
true,
'thead' =>
true,
'tbody' =>
true,
153 'tfoot' =>
true,
'tr' =>
true
158 self::HTML_NAMESPACE => [
159 'dd' =>
true,
'dt' =>
true,
'li' =>
true,
160 'menuitem' =>
true,
'optgroup' =>
true,
161 'option' =>
true,
'p' =>
true,
'rb' =>
true,
'rp' =>
true,
162 'rt' =>
true,
'rtc' =>
true
167 self::HTML_NAMESPACE => [
168 'caption' =>
true,
'colgroup' =>
true,
'dd' =>
true,
'dt' =>
true,
169 'li' =>
true,
'optgroup' =>
true,
'option' =>
true,
'p' =>
true,
170 'rb' =>
true,
'rp' =>
true,
'rt' =>
true,
'rtc' =>
true,
171 'tbody' =>
true,
'td' =>
true,
'tfoot' =>
true,
'th' =>
true,
172 'thead' =>
true,
'tr' =>
true
177 self::HTML_NAMESPACE => [
178 'td' =>
true,
'th' =>
true
182 self::HTML_NAMESPACE => [
183 'table' =>
true,
'template' =>
true,
'html' =>
true
188 self::HTML_NAMESPACE => [
189 'tbody' =>
true,
'tfoot' =>
true,
'thead' =>
true,
190 'template' =>
true,
'html' =>
true
195 self::HTML_NAMESPACE => [
196 'tr' =>
true,
'template' =>
true,
'html' =>
true
202 self::HTML_NAMESPACE => [
203 'button' =>
true,
'fieldset' =>
true,
'input' =>
true,
204 'keygen' =>
true,
'object' =>
true,
'output' =>
true,
205 'select' =>
true,
'textarea' =>
true,
'img' =>
true
210 self::HTML_NAMESPACE => [
211 'applet' =>
true,
'caption' =>
true,
'html' =>
true,
212 'marquee' =>
true,
'object' =>
true,
213 'table' =>
true,
'td' =>
true,
'template' =>
true,
216 self::SVG_NAMESPACE => [
217 'foreignobject' =>
true,
'desc' =>
true,
'title' =>
true
219 self::MATHML_NAMESPACE => [
220 'mi' =>
true,
'mo' =>
true,
'mn' =>
true,
'ms' =>
true,
221 'mtext' =>
true,
'annotation-xml' =>
true
227 if ( self::$inListItemScopeSet ===
null ) {
237 if ( self::$inButtonScopeSet ===
null ) {
245 self::HTML_NAMESPACE => [
246 'html' =>
true,
'table' =>
true,
'template' =>
true
251 self::HTML_NAMESPACE => [
252 'option' =>
true,
'optgroup' =>
true
257 self::MATHML_NAMESPACE => [
258 'mi' =>
true,
'mo' =>
true,
'mn' =>
true,
'ms' =>
true,
264 self::SVG_NAMESPACE => [
265 'foreignobject' =>
true,
273 self::HTML_NAMESPACE => [
274 'body' =>
true,
'blockquote' =>
true,
283 self::HTML_NAMESPACE => [
284 'a' =>
true,
'abbr' =>
true,
'acronym' =>
true,
'applet' =>
true,
285 'b' =>
true,
'basefont' =>
true,
'bdo' =>
true,
'big' =>
true,
286 'br' =>
true,
'button' =>
true,
'cite' =>
true,
'code' =>
true,
287 'dfn' =>
true,
'em' =>
true,
'font' =>
true,
'i' =>
true,
288 'iframe' =>
true,
'img' =>
true,
'input' =>
true,
'kbd' =>
true,
289 'label' =>
true,
'legend' =>
true,
'map' =>
true,
'object' =>
true,
290 'param' =>
true,
'q' =>
true,
'rb' =>
true,
'rbc' =>
true,
291 'rp' =>
true,
'rt' =>
true,
'rtc' =>
true,
'ruby' =>
true,
292 's' =>
true,
'samp' =>
true,
'select' =>
true,
'small' =>
true,
293 'span' =>
true,
'strike' =>
true,
'strong' =>
true,
'sub' =>
true,
294 'sup' =>
true,
'textarea' =>
true,
'tt' =>
true,
'u' =>
true,
380 $this->parent =
null;
381 $this->children = [];
389 Assert::precondition(
390 $this->parent !==
'flat',
"Can't removeChild after flattening $this"
393 $elt->parent === $this,
'elt',
'must have $this as a parent'
395 $idx = array_search( $elt, $this->children,
true );
396 Assert::parameter( $idx !==
false,
'$elt',
'must be a child of $this' );
398 array_splice( $this->children, $idx, 1 );
407 Assert::precondition(
408 $this->parent !==
'flat',
"Can't insertBefore after flattening."
410 $idx = array_search( $a, $this->children,
true );
411 Assert::parameter( $idx !==
false,
'$a',
'must be a child of $this' );
412 if ( is_string( $b ) ) {
413 array_splice( $this->children, $idx, 0, [ $b ] );
415 Assert::parameter( $b->parent !==
'flat',
'$b',
"Can't be flat" );
416 if ( $b->parent !==
null ) {
417 $b->parent->removeChild( $b );
419 array_splice( $this->children, $idx, 0, [ $b ] );
429 Assert::precondition(
430 $this->parent !==
'flat',
"Can't appendChild after flattening."
432 if ( is_string( $elt ) ) {
433 array_push( $this->children, $elt );
437 if ( $elt->parent !==
null ) {
438 $elt->parent->removeChild( $elt );
440 array_push( $this->children, $elt );
441 $elt->parent = $this;
449 Assert::precondition(
450 $elt->parent !==
'flat',
"Can't adoptChildren after flattening."
452 foreach ( $elt->children
as $child ) {
453 if ( !is_string( $child ) ) {
456 $child->parent =
null;
474 Assert::parameter( $this->parent !==
null,
'$this',
'must be a child' );
475 Assert::parameter( $this->parent !==
'flat',
'$this',
'already flat' );
476 $idx = array_search( $this, $this->parent->children,
true );
478 $idx !==
false,
'$this',
'must be a child of its parent'
480 $tidyCompat =
$config[
'tidyCompat'];
483 foreach ( $this->children
as $elt ) {
484 if ( !is_string( $elt ) ) {
485 $elt = $elt->flatten(
$config );
487 if ( $blank && preg_match(
'/[^\t\n\f\r ]/', $elt ) ) {
492 $this->localName =
'p';
493 } elseif ( $blank ) {
496 if ( !
count( $this->attribs ) &&
497 ( $this->localName ===
'tr' || $this->localName ===
'li' )
499 $this->attribs = [
'class' =>
"mw-empty-elt" ];
504 count( $this->children ) > 0 &&
505 substr( $this->children[0], 0, 1 ) ==
"\n"
511 array_unshift( $this->children,
"\n" );
513 $flat = $blank ?
'' :
"{$this}";
517 $this->parent->children[$idx] = $flat;
518 $this->parent =
'flat';
532 $encValue = Sanitizer::encodeAttribute(
$value );
533 $encAttribs .=
" $name=\"$encValue\"";
536 $out =
"<{$this->localName}{$encAttribs}>";
537 $len = strlen(
$out );
539 foreach ( $this->children
as $elt ) {
542 $out .=
"</{$this->localName}>";
544 $out =
"<{$this->localName}{$encAttribs} />";
546 count( $this->children ) === 0,
547 "Empty elements shouldn't have children."
563 public function isA( $set ) {
565 return $this === $set;
566 } elseif ( is_array( $set ) ) {
567 return isset( $set[$this->namespaceURI] ) &&
568 isset( $set[$this->namespaceURI][$this->localName] );
571 return $this->
isHtml() && $this->localName === $set;
582 && $this->localName === $tagName;
618 $this->localName ===
'annotation-xml' &&
619 isset( $this->attribs[
'encoding'] ) &&
620 ( strcasecmp( $this->attribs[
'encoding'],
'text/html' ) == 0 ||
621 strcasecmp( $this->attribs[
'encoding'],
'application/xhtml+xml' ) == 0 )
632 if ( $this->noahKey ===
null ) {
635 $this->noahKey =
serialize( [ $this->namespaceURI, $this->localName,
$attribs ] );
691 $this->currentNode = $this->elements[0];
703 foreach ( $this->elements[0]->children
as $elt ) {
704 $out .= is_string( $elt ) ? $elt :
705 $elt->flatten( $this->config );
728 $this->fosterParentMode &&
733 $this->config[
'tidyCompat'] && !$isComment &&
739 $this->currentNode->appendChild(
$value );
781 $this->currentNode->isHtmlNamed(
'mw:p-wrap' ) &&
788 $this->fosterParentMode &&
793 $this->currentNode->appendChild( $elt );
795 Assert::invariant( $elt->parent !==
null,
"$elt must be in tree" );
796 Assert::invariant( $elt->parent !==
'flat',
"$elt must not have been previous flattened" );
797 array_push( $this->elements, $elt );
798 $this->currentNode = $elt;
851 foreach ( $this
as $elt ) {
852 if ( $elt->isA(
$tag ) ) {
870 foreach ( $this
as $elt ) {
871 if ( $elt->isA(
$tag ) ) {
874 if ( $elt->isA( $set ) ) {
888 $endTagSet = $thorough ?
891 while ( $this->currentNode ) {
892 if ( $butnot !==
null && $this->currentNode->isHtmlNamed( $butnot ) ) {
895 if ( !$this->currentNode->isA( $endTagSet ) ) {
906 return ( $fragmentContext &&
count( $this->elements ) === 1 ) ?
926 return $this->elements[ $idx ];
935 Assert::precondition(
936 $this->elements[$idx]->parent !==
'flat',
937 'Replaced element should not have already been flattened.'
939 Assert::precondition(
940 $elt->parent !==
'flat',
941 'New element should not have already been flattened.'
943 $this->elements[$idx] = $elt;
944 if ( $idx ===
count( $this->elements ) - 1 ) {
945 $this->currentNode = $elt;
956 for ( $i =
count( $this->elements ) - 1; $i >= 0; $i-- ) {
957 if ( $this->elements[$i]->isA(
$tag ) ) {
969 return count( $this->elements );
977 $elt = array_pop( $this->elements );
978 if (
count( $this->elements ) ) {
979 $this->currentNode = $this->elements[
count( $this->elements ) - 1 ];
981 $this->currentNode =
null;
983 if ( !$elt->isHtmlNamed(
'mw:p-wrap' ) ) {
984 $elt->flatten( $this->config );
994 for ( $length =
count( $this->elements ); $length > $idx; $length-- ) {
1006 while ( $this->currentNode ) {
1007 if ( $this->currentNode->isA(
$tag ) ) {
1022 for ( $length =
count( $this->elements ); $length > 1; $length-- ) {
1023 if ( $this->currentNode->isA( $set ) ) {
1038 $elt->parent !==
'flat',
1040 '$elt should not already have been flattened.'
1043 $elt->parent->parent !==
'flat',
1045 'The parent of $elt should not already have been flattened.'
1047 $idx = array_search( $elt, $this->elements,
true );
1048 Assert::parameter( $idx !==
false,
'$elt',
'must be in stack' );
1049 array_splice( $this->elements, $idx, 1 );
1050 if ( $idx ===
count( $this->elements ) ) {
1051 $this->currentNode = $this->elements[$idx - 1];
1058 $elt->
flatten( $this->config );
1060 Assert::postcondition(
1061 array_search( $elt, $this->elements,
true ) ===
false,
1062 '$elt should no longer be in open elements stack'
1073 Assert::parameter( $idx !==
false,
'$a',
'must be in stack' );
1074 if ( $idx ===
count( $this->elements ) - 1 ) {
1075 array_push( $this->elements, $b );
1076 $this->currentNode = $b;
1078 array_splice( $this->elements, $idx + 1, 0, [ $b ] );
1092 $lastTable = $this->
indexOf(
'table' );
1093 $lastTemplate = $this->
indexOf(
'template' );
1097 if ( $lastTemplate >= 0 && ( $lastTable < 0 || $lastTemplate > $lastTable ) ) {
1098 $parent = $this->elements[$lastTemplate];
1099 } elseif ( $lastTable >= 0 ) {
1100 $parent = $this->elements[$lastTable]->parent;
1103 $parent !==
null,
"All tables should have parents"
1105 $before = $this->elements[$lastTable];
1107 $parent = $this->elements[0];
1110 if ( $this->config[
'tidyCompat'] ) {
1111 if ( is_string( $elt ) ) {
1120 if ( $elt->isHtmlNamed(
'mw:p-wrap' ) ) {
1122 array_search( $before, $parent->children,
true ) :
1123 count( $parent->children );
1124 $after = $idx > 0 ? $parent->children[$idx - 1] :
'';
1136 $parent->insertBefore( $before, $elt );
1138 $parent->appendChild( $elt );
1159 $this->currentNode->isHtmlNamed(
$tag ) &&
1160 !$afe->isInList( $this->currentNode )
1168 for ( $outer = 0; $outer < 8; $outer++ ) {
1174 $fmtElt = $afe->findElementByTag(
$tag );
1185 $index = $this->
indexOf( $fmtElt );
1187 $afe->remove( $fmtElt );
1195 if ( !$this->
inScope( $fmtElt ) ) {
1203 $furthestBlock =
null;
1204 $furthestBlockIndex = -1;
1205 $stackLength = $this->
length();
1206 for ( $i = $index+1; $i < $stackLength; $i++ ) {
1208 $furthestBlock = $this->
node( $i );
1209 $furthestBlockIndex = $i;
1220 if ( !$furthestBlock ) {
1221 $this->
popTag( $fmtElt );
1222 $afe->remove( $fmtElt );
1228 $ancestor = $this->
node( $index-1 );
1234 $BOOKMARK =
new BalanceElement(
'[bookmark]',
'[bookmark]', [] );
1235 $afe->insertAfter( $fmtElt, $BOOKMARK );
1238 $node = $furthestBlock;
1239 $lastNode = $furthestBlock;
1240 $nodeIndex = $furthestBlockIndex;
1244 for ( $inner = 1;
true; $inner++ ) {
1251 $node = $this->
node( --$nodeIndex );
1255 if ( $node === $fmtElt )
break;
1260 $isAFE = $afe->isInList( $node );
1261 if ( $inner > 3 && $isAFE ) {
1262 $afe->remove( $node );
1285 $node->namespaceURI, $node->localName, $node->attribs );
1286 $afe->replace( $node, $newElt );
1287 $this->
replaceAt( $nodeIndex, $newElt );
1293 if ( $lastNode === $furthestBlock ) {
1294 $afe->remove( $BOOKMARK );
1295 $afe->insertAfter( $newElt, $BOOKMARK );
1300 $node->appendChild( $lastNode );
1311 $this->fosterParentMode &&
1319 $ancestor->appendChild( $lastNode );
1326 $fmtElt->namespaceURI, $fmtElt->localName, $fmtElt->attribs );
1330 $newElt2->adoptChildren( $furthestBlock );
1333 $furthestBlock->appendChild( $newElt2 );
1339 $afe->remove( $fmtElt );
1340 $afe->replace( $BOOKMARK, $newElt2 );
1360 foreach ( $this->elements
as $elt ) {
1361 array_push( $r, $elt->localName );
1363 return implode( $r,
' ' );
1416 for ( $node = $this->head; $node; $node = $next ) {
1417 $next = $node->nextAFE;
1418 $node->prevAFE = $node->nextAFE = $node->nextNoah =
null;
1420 $this->head = $this->tail = $this->noahTableStack =
null;
1425 if ( $this->tail ) {
1426 $this->tail->nextAFE = $elt;
1432 $this->noahTableStack[] = [];
1442 if ( $elt->prevAFE !==
null || $this->head === $elt ) {
1443 throw new ParameterAssertionException(
'$elt',
1444 'Cannot insert a node into the AFE list twice' );
1451 $table =& $this->noahTableStack[
count( $this->noahTableStack ) - 1 ];
1452 if ( !isset( $table[$noahKey] ) ) {
1453 $table[$noahKey] = $elt;
1457 while (
$tail->nextNoah ) {
1461 if ( $count >= 3 ) {
1462 $this->
remove(
$head );
1464 $tail->nextNoah = $elt;
1467 if ( $this->tail ) {
1468 $this->tail->nextAFE = $elt;
1485 $prev =
$tail->prevAFE;
1486 $tail->prevAFE =
null;
1488 $prev->nextAFE =
null;
1490 $tail->nextNoah =
null;
1495 $prev =
$tail->prevAFE;
1497 $prev->nextAFE =
null;
1500 array_pop( $this->noahTableStack );
1503 $this->noahTableStack[0] = [];
1509 $this->tail =
$tail;
1520 if ( $elt->localName ===
$tag ) {
1523 $elt = $elt->prevAFE;
1534 return $this->head === $elt || $elt->prevAFE;
1544 if ( $this->head !== $elt && !$elt->prevAFE ) {
1545 throw new ParameterAssertionException(
'$elt',
1546 "Attempted to remove an element which is not in the AFE list" );
1549 if ( $this->head === $elt ) {
1550 $this->head = $elt->nextAFE;
1552 if ( $this->tail === $elt ) {
1553 $this->tail = $elt->prevAFE;
1556 if ( $elt->prevAFE ) {
1557 $elt->prevAFE->nextAFE = $elt->nextAFE;
1560 if ( $elt->nextAFE ) {
1561 $elt->nextAFE->prevAFE = $elt->prevAFE;
1564 $elt->prevAFE = $elt->nextAFE =
null;
1571 $table =& $this->noahTableStack[
count( $this->noahTableStack ) - 1 ];
1572 if ( !isset( $table[$noahKey] ) ) {
1573 $table[$noahKey] = $elt;
1575 $tail = $table[$noahKey];
1576 while (
$tail->nextNoah ) {
1579 $tail->nextNoah = $elt;
1584 $table =& $this->noahTableStack[
count( $this->noahTableStack ) - 1 ];
1586 $noahElt = $table[$key];
1587 if ( $noahElt === $elt ) {
1588 if ( $noahElt->nextNoah ) {
1589 $table[$key] = $noahElt->nextNoah;
1590 $noahElt->nextNoah =
null;
1592 unset( $table[$key] );
1596 $prevNoahElt = $noahElt;
1597 $noahElt = $prevNoahElt->nextNoah;
1598 if ( $noahElt === $elt ) {
1600 $prevNoahElt->nextNoah = $elt->nextNoah;
1601 $elt->nextNoah =
null;
1604 }
while ( $noahElt );
1615 if ( $this->head !== $a && !$a->prevAFE ) {
1616 throw new ParameterAssertionException(
'$a',
1617 "Attempted to replace an element which is not in the AFE list" );
1620 if ( $this->head === $a ) {
1623 if ( $this->tail === $a ) {
1627 if ( $a->prevAFE ) {
1628 $a->prevAFE->nextAFE = $b;
1631 if ( $a->nextAFE ) {
1632 $a->nextAFE->prevAFE = $b;
1634 $b->prevAFE = $a->prevAFE;
1635 $b->nextAFE = $a->nextAFE;
1636 $a->nextAFE = $a->prevAFE =
null;
1649 if ( $this->head !== $a && !$a->prevAFE ) {
1650 throw new ParameterAssertionException(
'$a',
1651 "Attempted to insert after an element which is not in the AFE list" );
1653 if ( $this->tail === $a ) {
1656 if ( $a->nextAFE ) {
1657 $a->nextAFE->prevAFE = $b;
1659 $b->nextAFE = $a->nextAFE;
1684 if ( $stack->indexOf( $entry ) >= 0 ) {
1691 while ( $entry->prevAFE ) {
1692 $entry = $entry->prevAFE;
1693 if ( $entry instanceof
BalanceMarker || $stack->indexOf( $entry ) >= 0 ) {
1704 $entry = $entry->nextAFE;
1707 $newElement = $stack->insertHTMLElement(
1710 $this->
replace( $entry, $newElement );
1711 $entry = $newElement->nextAFE;
1721 for ( $node = $this->head; $node; $prev = $node, $node = $node->nextAFE ) {
1726 $s .= $node->localName .
'#' . substr( md5( spl_object_hash( $node ) ), 0, 8 );
1727 if ( $node->nextNoah ) {
1728 $s .=
" (noah sibling: {$node->nextNoah->localName}#" .
1729 substr( md5( spl_object_hash( $node->nextNoah ) ), 0, 8 ) .
1732 if ( $node->nextAFE && $node->nextAFE->prevAFE !== $node ) {
1733 $s .=
" (reverse link is wrong!)";
1737 if ( $prev !== $this->tail ) {
1738 $s .=
"(tail pointer is wrong!)\n";
1827 ( # 1. Comment match detector
1828 > | -> | # Invalid short close
1829 ( # 2. Comment contents
1839 ( # 3. Comment close
1840 --> | # Normal close
1841 --!> | # Comment end bang
1842 ( # 4. Indicate matches requiring EOF
1843 --! | # EOF in comment end bang state
1844 -- | # EOF in comment end state
1845 - | # EOF in comment end dash state
1846 (?#nothing) # EOF in comment state
1850 ([^<]*) \z # 5. Non-tag text after the comment
1882 'allowedHtmlElements' =>
null,
1883 'tidyCompat' =>
false,
1884 'allowComments' =>
true,
1886 $this->allowedHtmlElements =
$config[
'allowedHtmlElements'];
1887 $this->strict =
$config[
'strict'];
1888 $this->allowComments =
$config[
'allowComments'];
1889 if ( $this->allowedHtmlElements !==
null ) {
1891 $bad = array_uintersect_assoc(
1892 $this->allowedHtmlElements,
1894 function( $a, $b ) {
1900 if (
count( $bad ) > 0 ) {
1901 $badstr = implode( array_keys( $bad ),
',' );
1902 throw new ParameterAssertionException(
1904 'Balance attempted with sanitization including ' .
1905 "unsupported elements: {$badstr}"
1924 $this->parseMode =
'inBodyMode';
1931 $this->textIntegrationMode =
1932 $this->ignoreLinefeed =
1934 $this->inRAWTEXT =
false;
1938 $this->fragmentContext =
1941 $this->formElementPointer =
null;
1942 for (
$e = $this->fragmentContext;
$e !=
null;
$e =
$e->parent ) {
1943 if (
$e->isHtmlNamed(
'form' ) ) {
1944 $this->formElementPointer =
$e;
1950 $x = $this->bitsIterator->current();
1951 $this->bitsIterator->next();
1952 $this->
insertToken(
'text', str_replace(
'>',
'>', $x ) );
1954 while ( $this->bitsIterator->valid() ) {
1958 $result = $this->stack->getOutput();
1960 $this->bitsIterator =
null;
1962 $this->stack =
null;
1963 $this->fragmentContext =
null;
1964 $this->formElementPointer =
null;
1974 if ( $token ===
'tag' || $token ===
'endtag' ) {
1980 "Unsupported $token <$value> found."
1984 } elseif ( $token ===
'text' &&
$value ===
'' ) {
1989 if ( $this->ignoreLinefeed ) {
1990 $this->ignoreLinefeed =
false;
1991 if ( $token ===
'text' ) {
1992 if (
$value[0] ===
"\n" ) {
2002 $adjusted = $this->stack->adjustedCurrentNode( $this->fragmentContext );
2007 $this->stack->length() === 0 ||
2008 $adjusted->isHtml() ||
2012 } elseif ( $adjusted->isMathmlTextIntegrationPoint() ) {
2013 if ( $token ===
'text' ) {
2023 $adjusted->localName ===
'annotation-xml' &&
2024 $token ===
'tag' &&
$value ===
'svg'
2028 $adjusted->isHtmlIntegrationPoint() &&
2029 ( $token ===
'tag' || $token ===
'text' )
2042 if ( $token ===
'text' ) {
2043 $this->stack->insertText(
$value );
2045 } elseif ( $token ===
'comment' ) {
2046 $this->stack->insertComment(
$value );
2048 } elseif ( $token ===
'tag' ) {
2102 if ( $this->fragmentContext ) {
2106 $this->stack->pop();
2107 $node = $this->stack->currentNode;
2109 $node->isMathmlTextIntegrationPoint() ||
2110 $node->isHtmlIntegrationPoint() ||
2119 $adjusted = ( $this->fragmentContext && $this->stack->length()===1 ) ?
2120 $this->fragmentContext : $this->stack->currentNode;
2121 $this->stack->insertForeignElement(
2125 $this->stack->pop();
2128 } elseif ( $token ===
'endtag' ) {
2130 foreach ( $this->stack
as $i => $node ) {
2131 if ( $node->isHtml() && !$first ) {
2135 } elseif ( $i === 0 ) {
2137 } elseif ( $node->localName ===
$value ) {
2138 $this->stack->popTag( $node );
2151 $x = $this->bitsIterator->current();
2152 $this->bitsIterator->next();
2157 $this->allowComments &&
2158 !( $this->inRCDATA || $this->inRAWTEXT ) &&
2161 ( $regs[4][1] < 0 || !$this->bitsIterator->valid() )
2163 $contents = $regs[2][0];
2164 $rest = $regs[5][0];
2166 $this->
insertToken(
'text', str_replace(
'>',
'>', $rest ) );
2174 if ( preg_match( Sanitizer::ELEMENT_BITS_REGEX, $x, $regs ) ) {
2175 list( , $slash,
$t, $attribStr, $brace, $rest ) = $regs;
2176 $t = strtolower(
$t );
2177 if ( $this->strict ) {
2181 '/^( [:_A-Z0-9][-.:_A-Z0-9]*="[^"]*")*[ ]*$/i', $attribStr
2183 "Bad attribute string found"
2188 !$this->strict,
"< found which does not start a valid tag"
2190 $slash =
$t = $attribStr = $brace = $rest =
null;
2193 if ( $this->inRCDATA ) {
2194 if ( $slash &&
$t === $this->inRCDATA ) {
2195 $this->inRCDATA =
false;
2201 if ( $this->inRAWTEXT ) {
2202 if ( $slash &&
$t === $this->inRAWTEXT ) {
2203 $this->inRAWTEXT =
false;
2209 $sanitize = $this->allowedHtmlElements !==
null;
2211 $goodTag =
$t && isset( $this->allowedHtmlElements[
$t] );
2214 if ( is_callable( $this->processingCallback ) ) {
2215 call_user_func_array( $this->processingCallback, [ &$attribStr, $this->processingArgs ] );
2218 $goodTag = Sanitizer::validateTag( $attribStr,
$t );
2223 $attribs = Sanitizer::decodeTagAttributes( $attribStr );
2226 $attribs = Sanitizer::decodeTagAttributes( $attribStr );
2229 $slash ?
'endtag' :
'tag',
$t,
$attribs, $brace ===
'/>'
2233 $rest = str_replace(
'>',
'>', $rest );
2234 $this->
insertToken(
'text', str_replace(
'>',
'>', $rest ) );
2235 } elseif ( $this->inRAWTEXT ) {
2239 $this->
insertToken(
'text',
'<' . str_replace(
'>',
'>', $x ) );
2245 substr( $mode, -4 )===
'Mode',
'$mode',
'should end in Mode'
2248 $this->parseMode = $mode;
2259 foreach ( $this->stack
as $i => $node ) {
2262 if ( $this->fragmentContext ) {
2266 if ( $node->isHtml() ) {
2267 switch ( $node->localName ) {
2269 $stackLength = $this->stack->length();
2270 for ( $j = $i + 1; $j < $stackLength-1; $j++ ) {
2271 $ancestor = $this->stack->node( $stackLength-$j-1 );
2272 if ( $ancestor->isHtmlNamed(
'template' ) ) {
2275 if ( $ancestor->isHtmlNamed(
'table' ) ) {
2301 array_slice( $this->templateInsertionModes, -1 )[0]
2337 $this->stack->popTo( 1 );
2342 $this->inRAWTEXT =
$value;
2343 $this->originalInsertionMode = $this->
switchMode(
'inTextMode' );
2348 if ( $token ===
'text' ) {
2349 $this->stack->insertText(
$value );
2351 } elseif ( $token ===
'eof' ) {
2352 $this->stack->pop();
2354 $this->originalInsertionMode, $token,
$value,
$attribs, $selfClose
2356 } elseif ( $token ===
'endtag' ) {
2357 $this->stack->pop();
2358 $this->
switchMode( $this->originalInsertionMode );
2365 if ( $token ===
'text' ) {
2366 if ( preg_match(
'/^[\x09\x0A\x0C\x0D\x20]+/',
$value,
$matches ) ) {
2367 $this->stack->insertText(
$matches[0] );
2370 if ( strlen(
$value ) === 0 ) {
2374 } elseif ( $token ===
'tag' ) {
2385 $this->stack->pop();
2395 $this->afe->insertMarker();
2402 } elseif ( $token ===
'endtag' ) {
2410 if ( $this->stack->indexOf(
$value ) < 0 ) {
2413 $this->stack->generateImpliedEndTags(
null,
true );
2414 $this->stack->popTag(
$value );
2415 $this->afe->clearToMarker();
2416 array_pop( $this->templateInsertionModes );
2423 } elseif ( $token ===
'comment' ) {
2424 $this->stack->insertComment(
$value );
2435 if ( $token ===
'text' ) {
2436 $this->afe->reconstruct( $this->stack );
2437 $this->stack->insertText(
$value );
2439 } elseif ( $token ===
'eof' ) {
2440 if ( !empty( $this->templateInsertionModes ) ) {
2445 } elseif ( $token ===
'tag' ) {
2485 if ( $this->stack->inButtonScope(
'p' ) ) {
2492 if ( $this->stack->inButtonScope(
"p" ) ) {
2495 if ( $this->stack->currentNode->isHtmlNamed(
'menuitem' ) ) {
2496 $this->stack->pop();
2507 if ( $this->stack->inButtonScope(
'p' ) ) {
2511 $this->stack->pop();
2518 if ( $this->stack->inButtonScope(
'p' ) ) {
2522 $this->ignoreLinefeed =
true;
2528 $this->formElementPointer &&
2529 $this->stack->indexOf(
'template' ) < 0
2533 if ( $this->stack->inButtonScope(
"p" ) ) {
2537 if ( $this->stack->indexOf(
'template' ) < 0 ) {
2538 $this->formElementPointer = $elt;
2544 foreach ( $this->stack
as $node ) {
2545 if ( $node->isHtmlNamed(
'li' ) ) {
2556 if ( $this->stack->inButtonScope(
'p' ) ) {
2565 foreach ( $this->stack
as $node ) {
2566 if ( $node->isHtmlNamed(
'dd' ) ) {
2570 if ( $node->isHtmlNamed(
'dt' ) ) {
2581 if ( $this->stack->inButtonScope(
'p' ) ) {
2590 if ( $this->stack->inScope(
'button' ) ) {
2594 $this->afe->reconstruct( $this->stack );
2599 $activeElement = $this->afe->findElementByTag(
'a' );
2600 if ( $activeElement ) {
2602 if ( $this->afe->isInList( $activeElement ) ) {
2603 $this->afe->remove( $activeElement );
2607 $this->stack->removeElement( $activeElement,
false );
2623 $this->afe->reconstruct( $this->stack );
2624 $this->afe->push( $this->stack->insertHTMLElement(
$value,
$attribs ) );
2628 $this->afe->reconstruct( $this->stack );
2629 if ( $this->stack->inScope(
'nobr' ) ) {
2631 $this->afe->reconstruct( $this->stack );
2633 $this->afe->push( $this->stack->insertHTMLElement(
$value,
$attribs ) );
2639 $this->afe->reconstruct( $this->stack );
2641 $this->afe->insertMarker();
2648 if ( $this->stack->inButtonScope(
'p' ) ) {
2662 $this->afe->reconstruct( $this->stack );
2664 $this->stack->pop();
2669 $this->afe->reconstruct( $this->stack );
2671 $this->stack->pop();
2680 $this->stack->pop();
2684 if ( $this->stack->inButtonScope(
'p' ) ) {
2687 if ( $this->stack->currentNode->isHtmlNamed(
'menuitem' ) ) {
2688 $this->stack->pop();
2691 $this->stack->pop();
2700 $this->ignoreLinefeed =
true;
2701 $this->inRCDATA =
$value;
2711 $this->afe->reconstruct( $this->stack );
2713 switch ( $this->parseMode ) {
2715 case 'inCaptionMode':
2716 case 'inTableBodyMode':
2728 if ( $this->stack->currentNode->isHtmlNamed(
'option' ) ) {
2731 $this->afe->reconstruct( $this->stack );
2736 if ( $this->stack->currentNode->isHtmlNamed(
'menuitem' ) ) {
2737 $this->stack->pop();
2739 $this->afe->reconstruct( $this->stack );
2745 if ( $this->stack->inScope(
'ruby' ) ) {
2746 $this->stack->generateImpliedEndTags();
2753 if ( $this->stack->inScope(
'ruby' ) ) {
2754 $this->stack->generateImpliedEndTags(
'rtc' );
2760 $this->afe->reconstruct( $this->stack );
2765 $this->stack->insertForeignElement(
2770 $this->stack->pop();
2775 $this->afe->reconstruct( $this->stack );
2780 $this->stack->insertForeignElement(
2785 $this->stack->pop();
2805 $this->afe->reconstruct( $this->stack );
2808 } elseif ( $token ===
'endtag' ) {
2842 if ( !$this->stack->inScope(
$value ) ) {
2845 $this->stack->generateImpliedEndTags();
2846 $this->stack->popTag(
$value );
2850 if ( $this->stack->indexOf(
'template' ) < 0 ) {
2852 $this->formElementPointer =
null;
2853 if ( !$openform || !$this->stack->inScope( $openform ) ) {
2856 $this->stack->generateImpliedEndTags();
2859 $flatten = ( $this->stack->currentNode === $openform );
2860 $this->stack->removeElement( $openform, $flatten );
2862 if ( !$this->stack->inScope(
'form' ) ) {
2865 $this->stack->generateImpliedEndTags();
2866 $this->stack->popTag(
'form' );
2871 if ( !$this->stack->inButtonScope(
'p' ) ) {
2875 $this->stack->generateImpliedEndTags(
$value );
2876 $this->stack->popTag(
$value );
2880 if ( !$this->stack->inListItemScope(
$value ) ) {
2883 $this->stack->generateImpliedEndTags(
$value );
2884 $this->stack->popTag(
$value );
2889 if ( !$this->stack->inScope(
$value ) ) {
2892 $this->stack->generateImpliedEndTags(
$value );
2893 $this->stack->popTag(
$value );
2905 $this->stack->generateImpliedEndTags();
2927 if ( $this->stack->adoptionAgency(
$value, $this->afe ) ) {
2935 if ( !$this->stack->inScope(
$value ) ) {
2938 $this->stack->generateImpliedEndTags();
2939 $this->stack->popTag(
$value );
2940 $this->afe->clearToMarker();
2949 foreach ( $this->stack
as $i => $node ) {
2950 if ( $node->isHtmlNamed(
$value ) ) {
2951 $this->stack->generateImpliedEndTags(
$value );
2952 $this->stack->popTo( $i );
2959 } elseif ( $token ===
'comment' ) {
2960 $this->stack->insertComment(
$value );
2963 Assert::invariant(
false,
"Bad token type: $token" );
2968 if ( $token ===
'text' ) {
2969 if ( $this->textIntegrationMode ) {
2972 $this->pendingTableText =
'';
2978 } elseif ( $token ===
'eof' ) {
2981 } elseif ( $token ===
'tag' ) {
2984 $this->afe->insertMarker();
3009 if ( !$this->stack->inTableScope(
$value ) ) {
3021 if ( !isset(
$attribs[
'type'] ) || strcasecmp(
$attribs[
'type'],
'hidden' ) !== 0 ) {
3025 $this->stack->pop();
3030 $this->formElementPointer ||
3031 $this->stack->indexOf(
'template' ) >= 0
3035 $this->formElementPointer =
3037 $this->stack->popTag( $this->formElementPointer );
3041 } elseif ( $token ===
'endtag' ) {
3044 if ( !$this->stack->inTableScope(
$value ) ) {
3047 $this->stack->popTag(
$value );
3066 } elseif ( $token ===
'comment' ) {
3067 $this->stack->insertComment(
$value );
3071 $this->stack->fosterParentMode =
true;
3073 $this->stack->fosterParentMode =
false;
3078 if ( $token ===
'text' ) {
3079 $this->pendingTableText .=
$value;
3084 $this->pendingTableText =
'';
3085 if ( preg_match(
'/[^\x09\x0A\x0C\x0D\x20]/', $text ) ) {
3087 $this->stack->fosterParentMode =
true;
3089 $this->stack->fosterParentMode =
false;
3092 $this->stack->insertText( $text );
3095 $this->originalInsertionMode, $token,
$value,
$attribs, $selfClose
3101 if ( !$this->stack->inTableScope(
'caption' ) ) {
3104 $this->stack->generateImpliedEndTags();
3105 $this->stack->popTag(
'caption' );
3106 $this->afe->clearToMarker();
3112 if ( $token ===
'tag' ) {
3129 } elseif ( $token ===
'endtag' ) {
3159 if ( $token ===
'text' ) {
3160 if ( preg_match(
'/^[\x09\x0A\x0C\x0D\x20]+/',
$value,
$matches ) ) {
3161 $this->stack->insertText(
$matches[0] );
3164 if ( strlen(
$value ) === 0 ) {
3168 } elseif ( $token ===
'tag' ) {
3173 $this->stack->pop();
3179 } elseif ( $token ===
'endtag' ) {
3182 if ( !$this->stack->currentNode->isHtmlNamed(
'colgroup' ) ) {
3185 $this->stack->pop();
3194 } elseif ( $token ===
'eof' ) {
3196 } elseif ( $token ===
'comment' ) {
3197 $this->stack->insertComment(
$value );
3202 if ( !$this->stack->currentNode->isHtmlNamed(
'colgroup' ) ) {
3212 $this->stack->inTableScope(
'tbody' ) ||
3213 $this->stack->inTableScope(
'thead' ) ||
3214 $this->stack->inTableScope(
'tfoot' )
3219 $this->stack->pop();
3224 if ( $token ===
'tag' ) {
3247 } elseif ( $token ===
'endtag' ) {
3257 if ( $this->stack->inTableScope(
$value ) ) {
3278 if ( !$this->stack->inTableScope(
'tr' ) ) {
3282 $this->stack->pop();
3287 if ( $token ===
'tag' ) {
3294 $this->afe->insertMarker();
3308 } elseif ( $token ===
'endtag' ) {
3322 $this->stack->inTableScope(
$value ) &&
3344 if ( $this->stack->inTableScope(
'td' ) ) {
3347 } elseif ( $this->stack->inTableScope(
'th' ) ) {
3355 if ( $token ===
'tag' ) {
3371 } elseif ( $token ===
'endtag' ) {
3375 if ( $this->stack->inTableScope(
$value ) ) {
3376 $this->stack->generateImpliedEndTags();
3377 $this->stack->popTag(
$value );
3378 $this->afe->clearToMarker();
3394 if ( $this->stack->inTableScope(
$value ) ) {
3395 $this->stack->generateImpliedEndTags();
3397 $this->afe->clearToMarker();
3409 if ( $token ===
'text' ) {
3410 $this->stack->insertText(
$value );
3412 } elseif ( $token ===
'eof' ) {
3414 } elseif ( $token ===
'tag' ) {
3418 if ( $this->stack->currentNode->isHtmlNamed(
'option' ) ) {
3419 $this->stack->pop();
3424 if ( $this->stack->currentNode->isHtmlNamed(
'option' ) ) {
3425 $this->stack->pop();
3427 if ( $this->stack->currentNode->isHtmlNamed(
'optgroup' ) ) {
3428 $this->stack->pop();
3438 if ( !$this->stack->inSelectScope(
'select' ) ) {
3447 } elseif ( $token ===
'endtag' ) {
3451 $this->stack->currentNode->isHtmlNamed(
'option' ) &&
3452 $this->stack->length() >= 2 &&
3453 $this->stack->node( $this->stack->length() - 2 )->isHtmlNamed(
'optgroup' )
3455 $this->stack->pop();
3457 if ( $this->stack->currentNode->isHtmlNamed(
'optgroup' ) ) {
3458 $this->stack->pop();
3462 if ( $this->stack->currentNode->isHtmlNamed(
'option' ) ) {
3463 $this->stack->pop();
3467 if ( !$this->stack->inSelectScope(
$value ) ) {
3470 $this->stack->popTag(
$value );
3476 } elseif ( $token ===
'comment' ) {
3477 $this->stack->insertComment(
$value );
3494 if ( $token ===
'tag' ) {
3497 } elseif ( $token ===
'endtag' ) {
3498 if ( $this->stack->inTableScope(
$value ) ) {
3510 if ( $token ===
'text' || $token ===
'comment' ) {
3512 } elseif ( $token ===
'eof' ) {
3513 if ( $this->stack->indexOf(
'template' ) < 0 ) {
3516 $this->stack->popTag(
'template' );
3517 $this->afe->clearToMarker();
3518 array_pop( $this->templateInsertionModes );
3523 } elseif ( $token ===
'tag' ) {
3565 } elseif ( $token ===
'endtag' ) {
3572 Assert::invariant(
false,
"Bad token type: $token" );