30 use IteratorAggregate;
33 use Wikimedia\Assert\Assert;
34 use Wikimedia\Assert\ParameterAssertionException;
76 self::HTML_NAMESPACE => [
77 'html' =>
true,
'head' =>
true,
'body' =>
true,
'frameset' =>
true,
80 'xmp' =>
true,
'iframe' =>
true,
'noembed' =>
true,
81 'noscript' =>
true,
'script' =>
true,
87 self::HTML_NAMESPACE => [
88 'area' =>
true,
'base' =>
true,
'basefont' =>
true,
89 'bgsound' =>
true,
'br' =>
true,
'col' =>
true,
'command' =>
true,
90 'embed' =>
true,
'frame' =>
true,
'hr' =>
true,
'img' =>
true,
91 'input' =>
true,
'keygen' =>
true,
'link' =>
true,
'meta' =>
true,
92 'param' =>
true,
'source' =>
true,
'track' =>
true,
'wbr' =>
true
97 self::HTML_NAMESPACE => [
98 'pre' =>
true,
'textarea' =>
true,
'listing' =>
true,
103 self::HTML_NAMESPACE => [
104 'h1' =>
true,
'h2' =>
true,
'h3' =>
true,
105 'h4' =>
true,
'h5' =>
true,
'h6' =>
true
110 self::HTML_NAMESPACE => [
111 'address' =>
true,
'applet' =>
true,
'area' =>
true,
112 'article' =>
true,
'aside' =>
true,
'base' =>
true,
113 'basefont' =>
true,
'bgsound' =>
true,
'blockquote' =>
true,
114 'body' =>
true,
'br' =>
true,
'button' =>
true,
'caption' =>
true,
115 'center' =>
true,
'col' =>
true,
'colgroup' =>
true,
'dd' =>
true,
116 'details' =>
true,
'dir' =>
true,
'div' =>
true,
'dl' =>
true,
117 'dt' =>
true,
'embed' =>
true,
'fieldset' =>
true,
118 'figcaption' =>
true,
'figure' =>
true,
'footer' =>
true,
119 'form' =>
true,
'frame' =>
true,
'frameset' =>
true,
'h1' =>
true,
120 'h2' =>
true,
'h3' =>
true,
'h4' =>
true,
'h5' =>
true,
121 'h6' =>
true,
'head' =>
true,
'header' =>
true,
'hgroup' =>
true,
122 'hr' =>
true,
'html' =>
true,
'iframe' =>
true,
'img' =>
true,
123 'input' =>
true,
'li' =>
true,
'link' =>
true,
124 'listing' =>
true,
'main' =>
true,
'marquee' =>
true,
125 'menu' =>
true,
'meta' =>
true,
'nav' =>
true,
126 'noembed' =>
true,
'noframes' =>
true,
'noscript' =>
true,
127 'object' =>
true,
'ol' =>
true,
'p' =>
true,
'param' =>
true,
128 'plaintext' =>
true,
'pre' =>
true,
'script' =>
true,
129 'section' =>
true,
'select' =>
true,
'source' =>
true,
130 'style' =>
true,
'summary' =>
true,
'table' =>
true,
131 'tbody' =>
true,
'td' =>
true,
'template' =>
true,
132 'textarea' =>
true,
'tfoot' =>
true,
'th' =>
true,
'thead' =>
true,
133 'title' =>
true,
'tr' =>
true,
'track' =>
true,
'ul' =>
true,
134 'wbr' =>
true,
'xmp' =>
true
136 self::SVG_NAMESPACE => [
137 'foreignobject' =>
true,
'desc' =>
true,
'title' =>
true
139 self::MATHML_NAMESPACE => [
140 'mi' =>
true,
'mo' =>
true,
'mn' =>
true,
'ms' =>
true,
141 'mtext' =>
true,
'annotation-xml' =>
true
146 self::HTML_NAMESPACE => [
147 'address' =>
true,
'div' =>
true,
'p' =>
true
152 self::HTML_NAMESPACE => [
153 'table' =>
true,
'thead' =>
true,
'tbody' =>
true,
154 'tfoot' =>
true,
'tr' =>
true
159 self::HTML_NAMESPACE => [
160 'dd' =>
true,
'dt' =>
true,
'li' =>
true,
161 'menuitem' =>
true,
'optgroup' =>
true,
162 'option' =>
true,
'p' =>
true,
'rb' =>
true,
'rp' =>
true,
163 'rt' =>
true,
'rtc' =>
true
168 self::HTML_NAMESPACE => [
169 'caption' =>
true,
'colgroup' =>
true,
'dd' =>
true,
'dt' =>
true,
170 'li' =>
true,
'optgroup' =>
true,
'option' =>
true,
'p' =>
true,
171 'rb' =>
true,
'rp' =>
true,
'rt' =>
true,
'rtc' =>
true,
172 'tbody' =>
true,
'td' =>
true,
'tfoot' =>
true,
'th' =>
true,
173 'thead' =>
true,
'tr' =>
true
178 self::HTML_NAMESPACE => [
179 'td' =>
true,
'th' =>
true
183 self::HTML_NAMESPACE => [
184 'table' =>
true,
'template' =>
true,
'html' =>
true
189 self::HTML_NAMESPACE => [
190 'tbody' =>
true,
'tfoot' =>
true,
'thead' =>
true,
191 'template' =>
true,
'html' =>
true
196 self::HTML_NAMESPACE => [
197 'tr' =>
true,
'template' =>
true,
'html' =>
true
203 self::HTML_NAMESPACE => [
204 'button' =>
true,
'fieldset' =>
true,
'input' =>
true,
205 'keygen' =>
true,
'object' =>
true,
'output' =>
true,
206 'select' =>
true,
'textarea' =>
true,
'img' =>
true
211 self::HTML_NAMESPACE => [
212 'applet' =>
true,
'caption' =>
true,
'html' =>
true,
213 'marquee' =>
true,
'object' =>
true,
214 'table' =>
true,
'td' =>
true,
'template' =>
true,
217 self::SVG_NAMESPACE => [
218 'foreignobject' =>
true,
'desc' =>
true,
'title' =>
true
220 self::MATHML_NAMESPACE => [
221 'mi' =>
true,
'mo' =>
true,
'mn' =>
true,
'ms' =>
true,
222 'mtext' =>
true,
'annotation-xml' =>
true
228 if ( self::$inListItemScopeSet ===
null ) {
238 if ( self::$inButtonScopeSet ===
null ) {
246 self::HTML_NAMESPACE => [
247 'html' =>
true,
'table' =>
true,
'template' =>
true
252 self::HTML_NAMESPACE => [
253 'option' =>
true,
'optgroup' =>
true
258 self::MATHML_NAMESPACE => [
259 'mi' =>
true,
'mo' =>
true,
'mn' =>
true,
'ms' =>
true,
265 self::SVG_NAMESPACE => [
266 'foreignobject' =>
true,
274 self::HTML_NAMESPACE => [
275 'body' =>
true,
'blockquote' =>
true,
284 self::HTML_NAMESPACE => [
285 'a' =>
true,
'abbr' =>
true,
'acronym' =>
true,
'applet' =>
true,
286 'b' =>
true,
'basefont' =>
true,
'bdo' =>
true,
'big' =>
true,
287 'br' =>
true,
'button' =>
true,
'cite' =>
true,
'code' =>
true,
288 'dfn' =>
true,
'em' =>
true,
'font' =>
true,
'i' =>
true,
289 'iframe' =>
true,
'img' =>
true,
'input' =>
true,
'kbd' =>
true,
290 'label' =>
true,
'legend' =>
true,
'map' =>
true,
'object' =>
true,
291 'param' =>
true,
'q' =>
true,
'rb' =>
true,
'rbc' =>
true,
292 'rp' =>
true,
'rt' =>
true,
'rtc' =>
true,
'ruby' =>
true,
293 's' =>
true,
'samp' =>
true,
'select' =>
true,
'small' =>
true,
294 'span' =>
true,
'strike' =>
true,
'strong' =>
true,
'sub' =>
true,
295 'sup' =>
true,
'textarea' =>
true,
'tt' =>
true,
'u' =>
true,
298 'video' =>
true,
'audio' =>
true,
'bdi' =>
true,
'data' =>
true,
299 'time' =>
true,
'mark' =>
true,
384 $this->parent =
null;
385 $this->children = [];
393 Assert::precondition(
394 $this->parent !==
'flat',
"Can't removeChild after flattening $this"
397 $elt->parent === $this,
'elt',
'must have $this as a parent'
399 $idx = array_search( $elt, $this->children,
true );
400 Assert::parameter( $idx !==
false,
'$elt',
'must be a child of $this' );
402 array_splice( $this->children, $idx, 1 );
411 Assert::precondition(
412 $this->parent !==
'flat',
"Can't insertBefore after flattening."
414 $idx = array_search( $a, $this->children,
true );
415 Assert::parameter( $idx !==
false,
'$a',
'must be a child of $this' );
416 if ( is_string( $b ) ) {
417 array_splice( $this->children, $idx, 0, [ $b ] );
419 Assert::parameter( $b->parent !==
'flat',
'$b',
"Can't be flat" );
420 if ( $b->parent !==
null ) {
421 $b->parent->removeChild( $b );
423 array_splice( $this->children, $idx, 0, [ $b ] );
433 Assert::precondition(
434 $this->parent !==
'flat',
"Can't appendChild after flattening."
436 if ( is_string( $elt ) ) {
437 array_push( $this->children, $elt );
441 if ( $elt->parent !==
null ) {
442 $elt->parent->removeChild( $elt );
444 array_push( $this->children, $elt );
445 $elt->parent = $this;
453 Assert::precondition(
454 $elt->parent !==
'flat',
"Can't adoptChildren after flattening."
456 foreach ( $elt->children
as $child ) {
457 if ( !is_string( $child ) ) {
460 $child->parent =
null;
478 Assert::parameter( $this->parent !==
null,
'$this',
'must be a child' );
479 Assert::parameter( $this->parent !==
'flat',
'$this',
'already flat' );
480 $idx = array_search( $this, $this->parent->children,
true );
482 $idx !==
false,
'$this',
'must be a child of its parent'
484 $tidyCompat =
$config[
'tidyCompat'];
487 foreach ( $this->children
as $elt ) {
488 if ( !is_string( $elt ) ) {
489 $elt = $elt->flatten(
$config );
491 if ( $blank && preg_match(
'/[^\t\n\f\r ]/', $elt ) ) {
496 $this->localName =
'p';
497 } elseif ( $blank ) {
500 if ( !count( $this->attribs ) &&
501 ( $this->localName ===
'tr' || $this->localName ===
'li' )
503 $this->attribs = [
'class' =>
"mw-empty-elt" ];
508 count( $this->children ) > 0 &&
509 substr( $this->children[0], 0, 1 ) ==
"\n"
515 array_unshift( $this->children,
"\n" );
517 $flat = $blank ?
'' :
"{$this}";
521 $this->parent->children[$idx] = $flat;
522 $this->parent =
'flat';
537 $encAttribs .=
" $name=\"$encValue\"";
540 $out =
"<{$this->localName}{$encAttribs}>";
541 $len = strlen(
$out );
543 foreach ( $this->children
as $elt ) {
546 $out .=
"</{$this->localName}>";
548 $out =
"<{$this->localName}{$encAttribs} />";
550 count( $this->children ) === 0,
551 "Empty elements shouldn't have children."
567 public function isA( $set ) {
569 return $this === $set;
570 } elseif ( is_array( $set ) ) {
571 return isset( $set[$this->namespaceURI] ) &&
572 isset( $set[$this->namespaceURI][$this->localName] );
575 return $this->
isHtml() && $this->localName === $set;
586 && $this->localName === $tagName;
622 $this->localName ===
'annotation-xml' &&
623 isset( $this->attribs[
'encoding'] ) &&
624 ( strcasecmp( $this->attribs[
'encoding'],
'text/html' ) == 0 ||
625 strcasecmp( $this->attribs[
'encoding'],
'application/xhtml+xml' ) == 0 )
637 if ( $this->noahKey ===
null ) {
640 $this->noahKey =
serialize( [ $this->namespaceURI, $this->localName,
$attribs ] );
696 $this->currentNode = $this->elements[0];
708 foreach ( $this->elements[0]->children
as $elt ) {
709 $out .= is_string( $elt ) ? $elt :
710 $elt->flatten( $this->config );
735 $this->fosterParentMode &&
740 $this->config[
'tidyCompat'] && !$isComment &&
746 $this->currentNode->appendChild(
$value );
788 $this->currentNode->isHtmlNamed(
'mw:p-wrap' ) &&
795 $this->fosterParentMode &&
800 $this->currentNode->appendChild( $elt );
802 Assert::invariant( $elt->parent !==
null,
"$elt must be in tree" );
803 Assert::invariant( $elt->parent !==
'flat',
"$elt must not have been previous flattened" );
804 array_push( $this->elements, $elt );
805 $this->currentNode = $elt;
858 foreach ( $this
as $elt ) {
859 if ( $elt->isA( $tag ) ) {
877 foreach ( $this
as $elt ) {
878 if ( $elt->isA( $tag ) ) {
881 if ( $elt->isA( $set ) ) {
895 $endTagSet = $thorough ?
898 while ( $this->currentNode ) {
899 if ( $butnot !==
null && $this->currentNode->isHtmlNamed( $butnot ) ) {
902 if ( !$this->currentNode->isA( $endTagSet ) ) {
915 return ( $fragmentContext && count( $this->elements ) === 1 ) ?
935 return $this->elements[ $idx ];
944 Assert::precondition(
945 $this->elements[$idx]->parent !==
'flat',
946 'Replaced element should not have already been flattened.'
948 Assert::precondition(
949 $elt->parent !==
'flat',
950 'New element should not have already been flattened.'
952 $this->elements[$idx] = $elt;
953 if ( $idx === count( $this->elements ) - 1 ) {
954 $this->currentNode = $elt;
965 for ( $i = count( $this->elements ) - 1; $i >= 0; $i-- ) {
966 if ( $this->elements[$i]->isA( $tag ) ) {
978 return count( $this->elements );
986 $elt = array_pop( $this->elements );
987 if ( count( $this->elements ) ) {
988 $this->currentNode = $this->elements[ count( $this->elements ) - 1 ];
990 $this->currentNode =
null;
992 if ( !$elt->isHtmlNamed(
'mw:p-wrap' ) ) {
993 $elt->flatten( $this->config );
1003 for ( $length = count( $this->elements ); $length > $idx; $length-- ) {
1015 while ( $this->currentNode ) {
1016 if ( $this->currentNode->isA( $tag ) ) {
1031 for ( $length = count( $this->elements ); $length > 1; $length-- ) {
1032 if ( $this->currentNode->isA( $set ) ) {
1047 $elt->parent !==
'flat',
1049 '$elt should not already have been flattened.'
1052 $elt->parent->parent !==
'flat',
1054 'The parent of $elt should not already have been flattened.'
1056 $idx = array_search( $elt, $this->elements,
true );
1057 Assert::parameter( $idx !==
false,
'$elt',
'must be in stack' );
1058 array_splice( $this->elements, $idx, 1 );
1059 if ( $idx === count( $this->elements ) ) {
1060 $this->currentNode = $this->elements[$idx - 1];
1067 $elt->
flatten( $this->config );
1069 Assert::postcondition(
1070 array_search( $elt, $this->elements,
true ) ===
false,
1071 '$elt should no longer be in open elements stack'
1082 Assert::parameter( $idx !==
false,
'$a',
'must be in stack' );
1083 if ( $idx === count( $this->elements ) - 1 ) {
1084 array_push( $this->elements, $b );
1085 $this->currentNode = $b;
1087 array_splice( $this->elements, $idx + 1, 0, [ $b ] );
1101 $lastTable = $this->
indexOf(
'table' );
1102 $lastTemplate = $this->
indexOf(
'template' );
1106 if ( $lastTemplate >= 0 && ( $lastTable < 0 || $lastTemplate > $lastTable ) ) {
1107 $parent = $this->elements[$lastTemplate];
1108 } elseif ( $lastTable >= 0 ) {
1109 $parent = $this->elements[$lastTable]->parent;
1112 $parent !==
null,
"All tables should have parents"
1114 $before = $this->elements[$lastTable];
1116 $parent = $this->elements[0];
1119 if ( $this->config[
'tidyCompat'] ) {
1120 if ( is_string( $elt ) ) {
1129 if ( $elt->isHtmlNamed(
'mw:p-wrap' ) ) {
1131 array_search( $before, $parent->children,
true ) :
1132 count( $parent->children );
1133 $after = $idx > 0 ? $parent->children[$idx - 1] :
'';
1145 $parent->insertBefore( $before, $elt );
1147 $parent->appendChild( $elt );
1168 $this->currentNode->isHtmlNamed( $tag ) &&
1169 !$afe->isInList( $this->currentNode )
1177 for ( $outer = 0; $outer < 8; $outer++ ) {
1183 $fmtElt = $afe->findElementByTag( $tag );
1194 $index = $this->
indexOf( $fmtElt );
1196 $afe->remove( $fmtElt );
1204 if ( !$this->
inScope( $fmtElt ) ) {
1212 $furthestBlock =
null;
1213 $furthestBlockIndex = -1;
1214 $stackLength = $this->
length();
1215 for ( $i = $index + 1; $i < $stackLength; $i++ ) {
1217 $furthestBlock = $this->
node( $i );
1218 $furthestBlockIndex = $i;
1229 if ( !$furthestBlock ) {
1230 $this->
popTag( $fmtElt );
1231 $afe->remove( $fmtElt );
1237 $ancestor = $this->
node( $index - 1 );
1243 $BOOKMARK =
new BalanceElement(
'[bookmark]',
'[bookmark]', [] );
1244 $afe->insertAfter( $fmtElt, $BOOKMARK );
1247 $node = $furthestBlock;
1248 $lastNode = $furthestBlock;
1249 $nodeIndex = $furthestBlockIndex;
1253 for ( $inner = 1;
true; $inner++ ) {
1260 $node = $this->
node( --$nodeIndex );
1264 if ( $node === $fmtElt )
break;
1269 $isAFE = $afe->isInList( $node );
1270 if ( $inner > 3 && $isAFE ) {
1271 $afe->remove( $node );
1294 $node->namespaceURI, $node->localName, $node->attribs );
1295 $afe->replace( $node, $newElt );
1296 $this->
replaceAt( $nodeIndex, $newElt );
1302 if ( $lastNode === $furthestBlock ) {
1303 $afe->remove( $BOOKMARK );
1304 $afe->insertAfter( $newElt, $BOOKMARK );
1309 $node->appendChild( $lastNode );
1320 $this->fosterParentMode &&
1328 $ancestor->appendChild( $lastNode );
1335 $fmtElt->namespaceURI, $fmtElt->localName, $fmtElt->attribs );
1339 $newElt2->adoptChildren( $furthestBlock );
1342 $furthestBlock->appendChild( $newElt2 );
1348 $afe->remove( $fmtElt );
1349 $afe->replace( $BOOKMARK, $newElt2 );
1369 foreach ( $this->elements
as $elt ) {
1370 array_push( $r, $elt->localName );
1372 return implode(
' ', $r );
1425 for ( $node = $this->head; $node; $node = $next ) {
1426 $next = $node->nextAFE;
1427 $node->prevAFE = $node->nextAFE = $node->nextNoah =
null;
1429 $this->head = $this->tail = $this->noahTableStack =
null;
1434 if ( $this->tail ) {
1435 $this->tail->nextAFE = $elt;
1441 $this->noahTableStack[] = [];
1451 if ( $elt->prevAFE !==
null || $this->head === $elt ) {
1452 throw new ParameterAssertionException(
'$elt',
1453 'Cannot insert a node into the AFE list twice' );
1460 $table =& $this->noahTableStack[ count( $this->noahTableStack ) - 1 ];
1461 if ( !isset( $table[$noahKey] ) ) {
1462 $table[$noahKey] = $elt;
1466 while (
$tail->nextNoah ) {
1470 if ( $count >= 3 ) {
1471 $this->
remove(
$head );
1473 $tail->nextNoah = $elt;
1476 if ( $this->tail ) {
1477 $this->tail->nextAFE = $elt;
1494 $prev =
$tail->prevAFE;
1495 $tail->prevAFE =
null;
1497 $prev->nextAFE =
null;
1499 $tail->nextNoah =
null;
1504 $prev =
$tail->prevAFE;
1506 $prev->nextAFE =
null;
1509 array_pop( $this->noahTableStack );
1512 $this->noahTableStack[0] = [];
1518 $this->tail =
$tail;
1531 if ( $elt->localName === $tag ) {
1534 $elt = $elt->prevAFE;
1545 return $this->head === $elt || $elt->prevAFE;
1555 if ( $this->head !== $elt && !$elt->prevAFE ) {
1556 throw new ParameterAssertionException(
'$elt',
1557 "Attempted to remove an element which is not in the AFE list" );
1560 if ( $this->head === $elt ) {
1561 $this->head = $elt->nextAFE;
1563 if ( $this->tail === $elt ) {
1564 $this->tail = $elt->prevAFE;
1567 if ( $elt->prevAFE ) {
1568 $elt->prevAFE->nextAFE = $elt->nextAFE;
1571 if ( $elt->nextAFE ) {
1572 $elt->nextAFE->prevAFE = $elt->prevAFE;
1575 $elt->prevAFE = $elt->nextAFE =
null;
1582 $table =& $this->noahTableStack[ count( $this->noahTableStack ) - 1 ];
1583 if ( !isset( $table[$noahKey] ) ) {
1584 $table[$noahKey] = $elt;
1586 $tail = $table[$noahKey];
1587 while (
$tail->nextNoah ) {
1590 $tail->nextNoah = $elt;
1595 $table =& $this->noahTableStack[ count( $this->noahTableStack ) - 1 ];
1597 $noahElt = $table[$key];
1598 if ( $noahElt === $elt ) {
1599 if ( $noahElt->nextNoah ) {
1600 $table[$key] = $noahElt->nextNoah;
1601 $noahElt->nextNoah =
null;
1603 unset( $table[$key] );
1607 $prevNoahElt = $noahElt;
1608 $noahElt = $prevNoahElt->nextNoah;
1609 if ( $noahElt === $elt ) {
1611 $prevNoahElt->nextNoah = $elt->nextNoah;
1612 $elt->nextNoah =
null;
1615 }
while ( $noahElt );
1626 if ( $this->head !== $a && !$a->prevAFE ) {
1627 throw new ParameterAssertionException(
'$a',
1628 "Attempted to replace an element which is not in the AFE list" );
1631 if ( $this->head === $a ) {
1634 if ( $this->tail === $a ) {
1638 if ( $a->prevAFE ) {
1639 $a->prevAFE->nextAFE = $b;
1642 if ( $a->nextAFE ) {
1643 $a->nextAFE->prevAFE = $b;
1645 $b->prevAFE = $a->prevAFE;
1646 $b->nextAFE = $a->nextAFE;
1647 $a->nextAFE = $a->prevAFE =
null;
1660 if ( $this->head !== $a && !$a->prevAFE ) {
1661 throw new ParameterAssertionException(
'$a',
1662 "Attempted to insert after an element which is not in the AFE list" );
1664 if ( $this->tail === $a ) {
1667 if ( $a->nextAFE ) {
1668 $a->nextAFE->prevAFE = $b;
1670 $b->nextAFE = $a->nextAFE;
1693 if ( $stack->indexOf( $entry ) >= 0 ) {
1700 while ( $entry->prevAFE ) {
1701 $entry = $entry->prevAFE;
1702 if ( $entry instanceof
BalanceMarker || $stack->indexOf( $entry ) >= 0 ) {
1713 $entry = $entry->nextAFE;
1716 $newElement = $stack->insertHTMLElement(
1719 $this->
replace( $entry, $newElement );
1720 $entry = $newElement->nextAFE;
1730 for ( $node = $this->head; $node; $prev = $node, $node = $node->nextAFE ) {
1735 $s .= $node->localName .
'#' . substr( md5( spl_object_hash( $node ) ), 0, 8 );
1736 if ( $node->nextNoah ) {
1737 $s .=
" (noah sibling: {$node->nextNoah->localName}#" .
1738 substr( md5( spl_object_hash( $node->nextNoah ) ), 0, 8 ) .
1741 if ( $node->nextAFE && $node->nextAFE->prevAFE !== $node ) {
1742 $s .=
" (reverse link is wrong!)";
1746 if ( $prev !== $this->tail ) {
1747 $s .=
"(tail pointer is wrong!)\n";
1836 ( # 1. Comment match detector
1837 > | -> | # Invalid short close
1838 ( # 2. Comment contents
1848 ( # 3. Comment close
1849 --> | # Normal close
1850 --!> | # Comment end bang
1851 ( # 4. Indicate matches requiring EOF
1852 --! | # EOF in comment end bang state
1853 -- | # EOF in comment end state
1854 - | # EOF in comment end dash state
1855 (?#nothing) # EOF in comment state
1859 ([^<]*) \z # 5. Non-tag text after the comment
1891 'allowedHtmlElements' =>
null,
1892 'tidyCompat' =>
false,
1893 'allowComments' =>
true,
1895 $this->allowedHtmlElements =
$config[
'allowedHtmlElements'];
1896 $this->strict =
$config[
'strict'];
1897 $this->allowComments =
$config[
'allowComments'];
1898 if ( $this->allowedHtmlElements !==
null ) {
1900 $bad = array_uintersect_assoc(
1901 $this->allowedHtmlElements,
1903 function ( $a, $b ) {
1909 if ( count( $bad ) > 0 ) {
1910 $badstr = implode(
',', array_keys( $bad ) );
1911 throw new ParameterAssertionException(
1913 'Balance attempted with sanitization including ' .
1914 "unsupported elements: {$badstr}"
1933 $this->parseMode =
'inBodyMode';
1940 $this->textIntegrationMode =
1941 $this->ignoreLinefeed =
1943 $this->inRAWTEXT =
false;
1947 $this->fragmentContext =
1950 $this->formElementPointer =
null;
1951 for (
$e = $this->fragmentContext;
$e !=
null;
$e =
$e->parent ) {
1952 if (
$e->isHtmlNamed(
'form' ) ) {
1953 $this->formElementPointer =
$e;
1959 $x = $this->bitsIterator->current();
1960 $this->bitsIterator->next();
1961 $this->
insertToken(
'text', str_replace(
'>',
'>', $x ) );
1963 while ( $this->bitsIterator->valid() ) {
1967 $result = $this->stack->getOutput();
1969 $this->bitsIterator =
null;
1971 $this->stack =
null;
1972 $this->fragmentContext =
null;
1973 $this->formElementPointer =
null;
1983 if ( $token ===
'tag' || $token ===
'endtag' ) {
1989 "Unsupported $token <$value> found."
1993 } elseif ( $token ===
'text' &&
$value ===
'' ) {
1998 if ( $this->ignoreLinefeed ) {
1999 $this->ignoreLinefeed =
false;
2000 if ( $token ===
'text' ) {
2001 if (
$value[0] ===
"\n" ) {
2011 $adjusted = $this->stack->adjustedCurrentNode( $this->fragmentContext );
2016 $this->stack->length() === 0 ||
2017 $adjusted->isHtml() ||
2021 } elseif ( $adjusted->isMathmlTextIntegrationPoint() ) {
2022 if ( $token ===
'text' ) {
2032 $adjusted->localName ===
'annotation-xml' &&
2033 $token ===
'tag' &&
$value ===
'svg'
2037 $adjusted->isHtmlIntegrationPoint() &&
2038 ( $token ===
'tag' || $token ===
'text' )
2051 if ( $token ===
'text' ) {
2052 $this->stack->insertText(
$value );
2054 } elseif ( $token ===
'comment' ) {
2055 $this->stack->insertComment(
$value );
2057 } elseif ( $token ===
'tag' ) {
2111 if ( $this->fragmentContext ) {
2115 $this->stack->pop();
2116 $node = $this->stack->currentNode;
2118 $node->isMathmlTextIntegrationPoint() ||
2119 $node->isHtmlIntegrationPoint() ||
2128 $adjusted = ( $this->fragmentContext && $this->stack->length() === 1 ) ?
2129 $this->fragmentContext : $this->stack->currentNode;
2130 $this->stack->insertForeignElement(
2134 $this->stack->pop();
2137 } elseif ( $token ===
'endtag' ) {
2139 foreach ( $this->stack
as $i => $node ) {
2140 if ( $node->isHtml() && !$first ) {
2144 } elseif ( $i === 0 ) {
2146 } elseif ( $node->localName ===
$value ) {
2147 $this->stack->popTag( $node );
2160 $x = $this->bitsIterator->current();
2161 $this->bitsIterator->next();
2166 $this->allowComments &&
2167 !( $this->inRCDATA || $this->inRAWTEXT ) &&
2168 preg_match( self::VALID_COMMENT_REGEX, $x, $regs, PREG_OFFSET_CAPTURE ) &&
2170 ( $regs[4][1] < 0 || !$this->bitsIterator->valid() )
2172 $contents = $regs[2][0];
2173 $rest = $regs[5][0];
2175 $this->
insertToken(
'text', str_replace(
'>',
'>', $rest ) );
2184 list( , $slash,
$t, $attribStr, $brace, $rest ) = $regs;
2185 $t = strtolower(
$t );
2186 if ( $this->strict ) {
2190 '/^( [:_A-Z0-9][-.:_A-Z0-9]*="[^"]*")*[ ]*$/i', $attribStr
2192 "Bad attribute string found"
2197 !$this->strict,
"< found which does not start a valid tag"
2199 $slash =
$t = $attribStr = $brace = $rest =
null;
2202 if ( $this->inRCDATA ) {
2203 if ( $slash &&
$t === $this->inRCDATA ) {
2204 $this->inRCDATA =
false;
2210 if ( $this->inRAWTEXT ) {
2211 if ( $slash &&
$t === $this->inRAWTEXT ) {
2212 $this->inRAWTEXT =
false;
2218 $sanitize = $this->allowedHtmlElements !==
null;
2220 $goodTag =
$t && isset( $this->allowedHtmlElements[
$t] );
2223 if ( is_callable( $this->processingCallback ) ) {
2224 call_user_func_array( $this->processingCallback, [ &$attribStr, $this->processingArgs ] );
2238 $slash ?
'endtag' :
'tag',
$t,
$attribs, $brace ===
'/>'
2242 $rest = str_replace(
'>',
'>', $rest );
2243 $this->
insertToken(
'text', str_replace(
'>',
'>', $rest ) );
2244 } elseif ( $this->inRAWTEXT ) {
2248 $this->
insertToken(
'text',
'<' . str_replace(
'>',
'>', $x ) );
2254 substr( $mode, -4 ) ===
'Mode',
'$mode',
'should end in Mode'
2257 $this->parseMode = $mode;
2268 foreach ( $this->stack
as $i => $node ) {
2271 if ( $this->fragmentContext ) {
2275 if ( $node->isHtml() ) {
2276 switch ( $node->localName ) {
2278 $stackLength = $this->stack->length();
2279 for ( $j = $i + 1; $j < $stackLength - 1; $j++ ) {
2280 $ancestor = $this->stack->node( $stackLength - $j - 1 );
2281 if ( $ancestor->isHtmlNamed(
'template' ) ) {
2284 if ( $ancestor->isHtmlNamed(
'table' ) ) {
2310 array_slice( $this->templateInsertionModes, -1 )[0]
2346 $this->stack->popTo( 1 );
2351 $this->inRAWTEXT =
$value;
2352 $this->originalInsertionMode = $this->
switchMode(
'inTextMode' );
2357 if ( $token ===
'text' ) {
2358 $this->stack->insertText(
$value );
2360 } elseif ( $token ===
'eof' ) {
2361 $this->stack->pop();
2363 $this->originalInsertionMode, $token,
$value,
$attribs, $selfClose
2365 } elseif ( $token ===
'endtag' ) {
2366 $this->stack->pop();
2367 $this->
switchMode( $this->originalInsertionMode );
2374 if ( $token ===
'text' ) {
2375 if ( preg_match(
'/^[\x09\x0A\x0C\x0D\x20]+/',
$value,
$matches ) ) {
2376 $this->stack->insertText(
$matches[0] );
2379 if ( strlen(
$value ) === 0 ) {
2383 } elseif ( $token ===
'tag' ) {
2394 $this->stack->pop();
2404 $this->afe->insertMarker();
2411 } elseif ( $token ===
'endtag' ) {
2419 if ( $this->stack->indexOf(
$value ) < 0 ) {
2422 $this->stack->generateImpliedEndTags(
null,
true );
2423 $this->stack->popTag(
$value );
2424 $this->afe->clearToMarker();
2425 array_pop( $this->templateInsertionModes );
2432 } elseif ( $token ===
'comment' ) {
2433 $this->stack->insertComment(
$value );
2444 if ( $token ===
'text' ) {
2445 $this->afe->reconstruct( $this->stack );
2446 $this->stack->insertText(
$value );
2448 } elseif ( $token ===
'eof' ) {
2449 if ( !empty( $this->templateInsertionModes ) ) {
2454 } elseif ( $token ===
'tag' ) {
2494 if ( $this->stack->inButtonScope(
'p' ) ) {
2501 if ( $this->stack->inButtonScope(
"p" ) ) {
2504 if ( $this->stack->currentNode->isHtmlNamed(
'menuitem' ) ) {
2505 $this->stack->pop();
2516 if ( $this->stack->inButtonScope(
'p' ) ) {
2520 $this->stack->pop();
2527 if ( $this->stack->inButtonScope(
'p' ) ) {
2531 $this->ignoreLinefeed =
true;
2537 $this->formElementPointer &&
2538 $this->stack->indexOf(
'template' ) < 0
2542 if ( $this->stack->inButtonScope(
"p" ) ) {
2546 if ( $this->stack->indexOf(
'template' ) < 0 ) {
2547 $this->formElementPointer = $elt;
2553 foreach ( $this->stack
as $node ) {
2554 if ( $node->isHtmlNamed(
'li' ) ) {
2565 if ( $this->stack->inButtonScope(
'p' ) ) {
2574 foreach ( $this->stack
as $node ) {
2575 if ( $node->isHtmlNamed(
'dd' ) ) {
2579 if ( $node->isHtmlNamed(
'dt' ) ) {
2590 if ( $this->stack->inButtonScope(
'p' ) ) {
2599 if ( $this->stack->inScope(
'button' ) ) {
2603 $this->afe->reconstruct( $this->stack );
2608 $activeElement = $this->afe->findElementByTag(
'a' );
2609 if ( $activeElement ) {
2611 if ( $this->afe->isInList( $activeElement ) ) {
2612 $this->afe->remove( $activeElement );
2616 $this->stack->removeElement( $activeElement,
false );
2632 $this->afe->reconstruct( $this->stack );
2633 $this->afe->push( $this->stack->insertHTMLElement(
$value,
$attribs ) );
2637 $this->afe->reconstruct( $this->stack );
2638 if ( $this->stack->inScope(
'nobr' ) ) {
2640 $this->afe->reconstruct( $this->stack );
2642 $this->afe->push( $this->stack->insertHTMLElement(
$value,
$attribs ) );
2648 $this->afe->reconstruct( $this->stack );
2650 $this->afe->insertMarker();
2657 if ( $this->stack->inButtonScope(
'p' ) ) {
2671 $this->afe->reconstruct( $this->stack );
2673 $this->stack->pop();
2678 $this->afe->reconstruct( $this->stack );
2680 $this->stack->pop();
2689 $this->stack->pop();
2693 if ( $this->stack->inButtonScope(
'p' ) ) {
2696 if ( $this->stack->currentNode->isHtmlNamed(
'menuitem' ) ) {
2697 $this->stack->pop();
2700 $this->stack->pop();
2709 $this->ignoreLinefeed =
true;
2710 $this->inRCDATA =
$value;
2720 $this->afe->reconstruct( $this->stack );
2722 switch ( $this->parseMode ) {
2724 case 'inCaptionMode':
2725 case 'inTableBodyMode':
2737 if ( $this->stack->currentNode->isHtmlNamed(
'option' ) ) {
2740 $this->afe->reconstruct( $this->stack );
2745 if ( $this->stack->currentNode->isHtmlNamed(
'menuitem' ) ) {
2746 $this->stack->pop();
2748 $this->afe->reconstruct( $this->stack );
2754 if ( $this->stack->inScope(
'ruby' ) ) {
2755 $this->stack->generateImpliedEndTags();
2762 if ( $this->stack->inScope(
'ruby' ) ) {
2763 $this->stack->generateImpliedEndTags(
'rtc' );
2769 $this->afe->reconstruct( $this->stack );
2774 $this->stack->insertForeignElement(
2779 $this->stack->pop();
2784 $this->afe->reconstruct( $this->stack );
2789 $this->stack->insertForeignElement(
2794 $this->stack->pop();
2814 $this->afe->reconstruct( $this->stack );
2817 } elseif ( $token ===
'endtag' ) {
2851 if ( !$this->stack->inScope(
$value ) ) {
2854 $this->stack->generateImpliedEndTags();
2855 $this->stack->popTag(
$value );
2859 if ( $this->stack->indexOf(
'template' ) < 0 ) {
2861 $this->formElementPointer =
null;
2862 if ( !$openform || !$this->stack->inScope( $openform ) ) {
2865 $this->stack->generateImpliedEndTags();
2868 $flatten = ( $this->stack->currentNode === $openform );
2869 $this->stack->removeElement( $openform, $flatten );
2871 if ( !$this->stack->inScope(
'form' ) ) {
2874 $this->stack->generateImpliedEndTags();
2875 $this->stack->popTag(
'form' );
2880 if ( !$this->stack->inButtonScope(
'p' ) ) {
2884 $this->stack->generateImpliedEndTags(
$value );
2885 $this->stack->popTag(
$value );
2889 if ( !$this->stack->inListItemScope(
$value ) ) {
2892 $this->stack->generateImpliedEndTags(
$value );
2893 $this->stack->popTag(
$value );
2898 if ( !$this->stack->inScope(
$value ) ) {
2901 $this->stack->generateImpliedEndTags(
$value );
2902 $this->stack->popTag(
$value );
2914 $this->stack->generateImpliedEndTags();
2936 if ( $this->stack->adoptionAgency(
$value, $this->afe ) ) {
2944 if ( !$this->stack->inScope(
$value ) ) {
2947 $this->stack->generateImpliedEndTags();
2948 $this->stack->popTag(
$value );
2949 $this->afe->clearToMarker();
2958 foreach ( $this->stack
as $i => $node ) {
2959 if ( $node->isHtmlNamed(
$value ) ) {
2960 $this->stack->generateImpliedEndTags(
$value );
2961 $this->stack->popTo( $i );
2968 } elseif ( $token ===
'comment' ) {
2969 $this->stack->insertComment(
$value );
2972 Assert::invariant(
false,
"Bad token type: $token" );
2977 if ( $token ===
'text' ) {
2978 if ( $this->textIntegrationMode ) {
2981 $this->pendingTableText =
'';
2987 } elseif ( $token ===
'eof' ) {
2990 } elseif ( $token ===
'tag' ) {
2993 $this->afe->insertMarker();
3018 if ( !$this->stack->inTableScope(
$value ) ) {
3030 if ( !isset(
$attribs[
'type'] ) || strcasecmp(
$attribs[
'type'],
'hidden' ) !== 0 ) {
3034 $this->stack->pop();
3039 $this->formElementPointer ||
3040 $this->stack->indexOf(
'template' ) >= 0
3044 $this->formElementPointer =
3046 $this->stack->popTag( $this->formElementPointer );
3050 } elseif ( $token ===
'endtag' ) {
3053 if ( !$this->stack->inTableScope(
$value ) ) {
3056 $this->stack->popTag(
$value );
3075 } elseif ( $token ===
'comment' ) {
3076 $this->stack->insertComment(
$value );
3080 $this->stack->fosterParentMode =
true;
3082 $this->stack->fosterParentMode =
false;
3087 if ( $token ===
'text' ) {
3088 $this->pendingTableText .=
$value;
3093 $this->pendingTableText =
'';
3094 if ( preg_match(
'/[^\x09\x0A\x0C\x0D\x20]/', $text ) ) {
3096 $this->stack->fosterParentMode =
true;
3098 $this->stack->fosterParentMode =
false;
3101 $this->stack->insertText( $text );
3104 $this->originalInsertionMode, $token,
$value,
$attribs, $selfClose
3110 if ( !$this->stack->inTableScope(
'caption' ) ) {
3113 $this->stack->generateImpliedEndTags();
3114 $this->stack->popTag(
'caption' );
3115 $this->afe->clearToMarker();
3121 if ( $token ===
'tag' ) {
3138 } elseif ( $token ===
'endtag' ) {
3168 if ( $token ===
'text' ) {
3169 if ( preg_match(
'/^[\x09\x0A\x0C\x0D\x20]+/',
$value,
$matches ) ) {
3170 $this->stack->insertText(
$matches[0] );
3173 if ( strlen(
$value ) === 0 ) {
3177 } elseif ( $token ===
'tag' ) {
3182 $this->stack->pop();
3188 } elseif ( $token ===
'endtag' ) {
3191 if ( !$this->stack->currentNode->isHtmlNamed(
'colgroup' ) ) {
3194 $this->stack->pop();
3203 } elseif ( $token ===
'eof' ) {
3205 } elseif ( $token ===
'comment' ) {
3206 $this->stack->insertComment(
$value );
3211 if ( !$this->stack->currentNode->isHtmlNamed(
'colgroup' ) ) {
3221 $this->stack->inTableScope(
'tbody' ) ||
3222 $this->stack->inTableScope(
'thead' ) ||
3223 $this->stack->inTableScope(
'tfoot' )
3228 $this->stack->pop();
3233 if ( $token ===
'tag' ) {
3256 } elseif ( $token ===
'endtag' ) {
3266 if ( $this->stack->inTableScope(
$value ) ) {
3287 if ( !$this->stack->inTableScope(
'tr' ) ) {
3291 $this->stack->pop();
3296 if ( $token ===
'tag' ) {
3303 $this->afe->insertMarker();
3317 } elseif ( $token ===
'endtag' ) {
3331 $this->stack->inTableScope(
$value ) &&
3353 if ( $this->stack->inTableScope(
'td' ) ) {
3356 } elseif ( $this->stack->inTableScope(
'th' ) ) {
3364 if ( $token ===
'tag' ) {
3380 } elseif ( $token ===
'endtag' ) {
3384 if ( $this->stack->inTableScope(
$value ) ) {
3385 $this->stack->generateImpliedEndTags();
3386 $this->stack->popTag(
$value );
3387 $this->afe->clearToMarker();
3403 if ( $this->stack->inTableScope(
$value ) ) {
3404 $this->stack->generateImpliedEndTags();
3406 $this->afe->clearToMarker();
3418 if ( $token ===
'text' ) {
3419 $this->stack->insertText(
$value );
3421 } elseif ( $token ===
'eof' ) {
3423 } elseif ( $token ===
'tag' ) {
3427 if ( $this->stack->currentNode->isHtmlNamed(
'option' ) ) {
3428 $this->stack->pop();
3433 if ( $this->stack->currentNode->isHtmlNamed(
'option' ) ) {
3434 $this->stack->pop();
3436 if ( $this->stack->currentNode->isHtmlNamed(
'optgroup' ) ) {
3437 $this->stack->pop();
3447 if ( !$this->stack->inSelectScope(
'select' ) ) {
3456 } elseif ( $token ===
'endtag' ) {
3460 $this->stack->currentNode->isHtmlNamed(
'option' ) &&
3461 $this->stack->length() >= 2 &&
3462 $this->stack->node( $this->stack->length() - 2 )->isHtmlNamed(
'optgroup' )
3464 $this->stack->pop();
3466 if ( $this->stack->currentNode->isHtmlNamed(
'optgroup' ) ) {
3467 $this->stack->pop();
3471 if ( $this->stack->currentNode->isHtmlNamed(
'option' ) ) {
3472 $this->stack->pop();
3476 if ( !$this->stack->inSelectScope(
$value ) ) {
3479 $this->stack->popTag(
$value );
3485 } elseif ( $token ===
'comment' ) {
3486 $this->stack->insertComment(
$value );
3503 if ( $token ===
'tag' ) {
3506 } elseif ( $token ===
'endtag' ) {
3507 if ( $this->stack->inTableScope(
$value ) ) {
3519 if ( $token ===
'text' || $token ===
'comment' ) {
3521 } elseif ( $token ===
'eof' ) {
3522 if ( $this->stack->indexOf(
'template' ) < 0 ) {
3525 $this->stack->popTag(
'template' );
3526 $this->afe->clearToMarker();
3527 array_pop( $this->templateInsertionModes );
3532 } elseif ( $token ===
'tag' ) {
3574 } elseif ( $token ===
'endtag' ) {
3581 Assert::invariant(
false,
"Bad token type: $token" );