33use Wikimedia\Assert\Assert;
34use Wikimedia\Assert\ParameterAssertionException;
76 self::HTML_NAMESPACE => [
77 'html' =>
true,
'head' =>
true,
'body' =>
true,
'frameset' =>
true,
80 'xmp' =>
true,
'iframe' =>
true,
'noembed' =>
true,
81 'noscript' =>
true,
'script' =>
true,
87 self::HTML_NAMESPACE => [
88 'area' =>
true,
'base' =>
true,
'basefont' =>
true,
89 'bgsound' =>
true,
'br' =>
true,
'col' =>
true,
'command' =>
true,
90 'embed' =>
true,
'frame' =>
true,
'hr' =>
true,
'img' =>
true,
91 'input' =>
true,
'keygen' =>
true,
'link' =>
true,
'meta' =>
true,
92 'param' =>
true,
'source' =>
true,
'track' =>
true,
'wbr' =>
true
97 self::HTML_NAMESPACE => [
98 'pre' =>
true,
'textarea' =>
true,
'listing' =>
true,
103 self::HTML_NAMESPACE => [
104 'h1' =>
true,
'h2' =>
true,
'h3' =>
true,
105 'h4' =>
true,
'h5' =>
true,
'h6' =>
true
110 self::HTML_NAMESPACE => [
111 'address' =>
true,
'applet' =>
true,
'area' =>
true,
112 'article' =>
true,
'aside' =>
true,
'base' =>
true,
113 'basefont' =>
true,
'bgsound' =>
true,
'blockquote' =>
true,
114 'body' =>
true,
'br' =>
true,
'button' =>
true,
'caption' =>
true,
115 'center' =>
true,
'col' =>
true,
'colgroup' =>
true,
'dd' =>
true,
116 'details' =>
true,
'dir' =>
true,
'div' =>
true,
'dl' =>
true,
117 'dt' =>
true,
'embed' =>
true,
'fieldset' =>
true,
118 'figcaption' =>
true,
'figure' =>
true,
'footer' =>
true,
119 'form' =>
true,
'frame' =>
true,
'frameset' =>
true,
'h1' =>
true,
120 'h2' =>
true,
'h3' =>
true,
'h4' =>
true,
'h5' =>
true,
121 'h6' =>
true,
'head' =>
true,
'header' =>
true,
'hgroup' =>
true,
122 'hr' =>
true,
'html' =>
true,
'iframe' =>
true,
'img' =>
true,
123 'input' =>
true,
'li' =>
true,
'link' =>
true,
124 'listing' =>
true,
'main' =>
true,
'marquee' =>
true,
125 'menu' =>
true,
'meta' =>
true,
'nav' =>
true,
126 'noembed' =>
true,
'noframes' =>
true,
'noscript' =>
true,
127 'object' =>
true,
'ol' =>
true,
'p' =>
true,
'param' =>
true,
128 'plaintext' =>
true,
'pre' =>
true,
'script' =>
true,
129 'section' =>
true,
'select' =>
true,
'source' =>
true,
130 'style' =>
true,
'summary' =>
true,
'table' =>
true,
131 'tbody' =>
true,
'td' =>
true,
'template' =>
true,
132 'textarea' =>
true,
'tfoot' =>
true,
'th' =>
true,
'thead' =>
true,
133 'title' =>
true,
'tr' =>
true,
'track' =>
true,
'ul' =>
true,
134 'wbr' =>
true,
'xmp' =>
true
136 self::SVG_NAMESPACE => [
137 'foreignobject' =>
true,
'desc' =>
true,
'title' =>
true
139 self::MATHML_NAMESPACE => [
140 'mi' =>
true,
'mo' =>
true,
'mn' =>
true,
'ms' =>
true,
141 'mtext' =>
true,
'annotation-xml' =>
true
146 self::HTML_NAMESPACE => [
147 'address' =>
true,
'div' =>
true,
'p' =>
true
152 self::HTML_NAMESPACE => [
153 'table' =>
true,
'thead' =>
true,
'tbody' =>
true,
154 'tfoot' =>
true,
'tr' =>
true
159 self::HTML_NAMESPACE => [
160 'dd' =>
true,
'dt' =>
true,
'li' =>
true,
161 'menuitem' =>
true,
'optgroup' =>
true,
162 'option' =>
true,
'p' =>
true,
'rb' =>
true,
'rp' =>
true,
163 'rt' =>
true,
'rtc' =>
true
168 self::HTML_NAMESPACE => [
169 'caption' =>
true,
'colgroup' =>
true,
'dd' =>
true,
'dt' =>
true,
170 'li' =>
true,
'optgroup' =>
true,
'option' =>
true,
'p' =>
true,
171 'rb' =>
true,
'rp' =>
true,
'rt' =>
true,
'rtc' =>
true,
172 'tbody' =>
true,
'td' =>
true,
'tfoot' =>
true,
'th' =>
true,
173 'thead' =>
true,
'tr' =>
true
178 self::HTML_NAMESPACE => [
179 'td' =>
true,
'th' =>
true
183 self::HTML_NAMESPACE => [
184 'table' =>
true,
'template' =>
true,
'html' =>
true
189 self::HTML_NAMESPACE => [
190 'tbody' =>
true,
'tfoot' =>
true,
'thead' =>
true,
191 'template' =>
true,
'html' =>
true
196 self::HTML_NAMESPACE => [
197 'tr' =>
true,
'template' =>
true,
'html' =>
true
203 self::HTML_NAMESPACE => [
204 'button' =>
true,
'fieldset' =>
true,
'input' =>
true,
205 'keygen' =>
true,
'object' =>
true,
'output' =>
true,
206 'select' =>
true,
'textarea' =>
true,
'img' =>
true
211 self::HTML_NAMESPACE => [
212 'applet' =>
true,
'caption' =>
true,
'html' =>
true,
213 'marquee' =>
true,
'object' =>
true,
214 'table' =>
true,
'td' =>
true,
'template' =>
true,
217 self::SVG_NAMESPACE => [
218 'foreignobject' =>
true,
'desc' =>
true,
'title' =>
true
220 self::MATHML_NAMESPACE => [
221 'mi' =>
true,
'mo' =>
true,
'mn' =>
true,
'ms' =>
true,
222 'mtext' =>
true,
'annotation-xml' =>
true
228 if ( self::$inListItemScopeSet ===
null ) {
238 if ( self::$inButtonScopeSet ===
null ) {
246 self::HTML_NAMESPACE => [
247 'html' =>
true,
'table' =>
true,
'template' =>
true
252 self::HTML_NAMESPACE => [
253 'option' =>
true,
'optgroup' =>
true
258 self::MATHML_NAMESPACE => [
259 'mi' =>
true,
'mo' =>
true,
'mn' =>
true,
'ms' =>
true,
265 self::SVG_NAMESPACE => [
266 'foreignobject' =>
true,
274 self::HTML_NAMESPACE => [
275 'body' =>
true,
'blockquote' =>
true,
284 self::HTML_NAMESPACE => [
285 'a' =>
true,
'abbr' =>
true,
'acronym' =>
true,
'applet' =>
true,
286 'b' =>
true,
'basefont' =>
true,
'bdo' =>
true,
'big' =>
true,
287 'br' =>
true,
'button' =>
true,
'cite' =>
true,
'code' =>
true,
288 'dfn' =>
true,
'em' =>
true,
'font' =>
true,
'i' =>
true,
289 'iframe' =>
true,
'img' =>
true,
'input' =>
true,
'kbd' =>
true,
290 'label' =>
true,
'legend' =>
true,
'map' =>
true,
'object' =>
true,
291 'param' =>
true,
'q' =>
true,
'rb' =>
true,
'rbc' =>
true,
292 'rp' =>
true,
'rt' =>
true,
'rtc' =>
true,
'ruby' =>
true,
293 's' =>
true,
'samp' =>
true,
'select' =>
true,
'small' =>
true,
294 'span' =>
true,
'strike' =>
true,
'strong' =>
true,
'sub' =>
true,
295 'sup' =>
true,
'textarea' =>
true,
'tt' =>
true,
'u' =>
true,
298 'video' =>
true,
'audio' =>
true,
'bdi' =>
true,
'data' =>
true,
299 'time' =>
true,
'mark' =>
true,
384 $this->parent =
null;
385 $this->children = [];
393 Assert::precondition(
394 $this->parent !==
'flat',
"Can't removeChild after flattening $this"
397 $elt->parent === $this,
'elt',
'must have $this as a parent'
399 $idx = array_search( $elt, $this->children,
true );
400 Assert::parameter( $idx !==
false,
'$elt',
'must be a child of $this' );
402 array_splice( $this->children, $idx, 1 );
411 Assert::precondition(
412 $this->parent !==
'flat',
"Can't insertBefore after flattening."
414 $idx = array_search( $a, $this->children,
true );
415 Assert::parameter( $idx !==
false,
'$a',
'must be a child of $this' );
416 if ( is_string( $b ) ) {
417 array_splice( $this->children, $idx, 0, [ $b ] );
419 Assert::parameter( $b->parent !==
'flat',
'$b',
"Can't be flat" );
420 if ( $b->parent !==
null ) {
421 $b->parent->removeChild( $b );
423 array_splice( $this->children, $idx, 0, [ $b ] );
433 Assert::precondition(
434 $this->parent !==
'flat',
"Can't appendChild after flattening."
436 if ( is_string( $elt ) ) {
437 array_push( $this->children, $elt );
441 if ( $elt->parent !==
null ) {
442 $elt->parent->removeChild( $elt );
444 array_push( $this->children, $elt );
445 $elt->parent = $this;
453 Assert::precondition(
454 $elt->parent !==
'flat',
"Can't adoptChildren after flattening."
456 foreach ( $elt->children as $child ) {
457 if ( !is_string( $child ) ) {
460 $child->parent =
null;
478 Assert::parameter( $this->parent !==
null,
'$this',
'must be a child' );
479 Assert::parameter( $this->parent !==
'flat',
'$this',
'already flat' );
480 $idx = array_search( $this, $this->parent->children,
true );
482 $idx !==
false,
'$this',
'must be a child of its parent'
484 $tidyCompat = $config[
'tidyCompat'];
487 foreach ( $this->children as $elt ) {
488 if ( !is_string( $elt ) ) {
489 $elt = $elt->flatten( $config );
491 if ( $blank && preg_match(
'/[^\t\n\f\r ]/', $elt ) ) {
496 $this->localName =
'p';
497 } elseif ( $blank ) {
500 if ( !count( $this->attribs ) &&
501 ( $this->localName ===
'tr' || $this->localName ===
'li' )
503 $this->attribs = [
'class' =>
"mw-empty-elt" ];
508 count( $this->children ) > 0 &&
509 substr( $this->children[0], 0, 1 ) ==
"\n"
515 array_unshift( $this->children,
"\n" );
517 $flat = $blank ?
'' :
"{$this}";
521 $this->parent->children[$idx] = $flat;
522 $this->parent =
'flat';
535 foreach ( $this->attribs as $name =>
$value ) {
536 $encValue = Sanitizer::encodeAttribute(
$value );
537 $encAttribs .=
" $name=\"$encValue\"";
540 $out =
"<{$this->localName}{$encAttribs}>";
541 $len = strlen(
$out );
543 foreach ( $this->children as $elt ) {
546 $out .=
"</{$this->localName}>";
548 $out =
"<{$this->localName}{$encAttribs} />";
550 count( $this->children ) === 0,
551 "Empty elements shouldn't have children."
567 public function isA( $set ) {
569 return $this === $set;
570 } elseif ( is_array( $set ) ) {
571 return isset( $set[$this->namespaceURI] ) &&
572 isset( $set[$this->namespaceURI][$this->localName] );
575 return $this->
isHtml() && $this->localName === $set;
586 && $this->localName === $tagName;
622 $this->localName ===
'annotation-xml' &&
623 isset( $this->attribs[
'encoding'] ) &&
624 ( strcasecmp( $this->attribs[
'encoding'],
'text/html' ) == 0 ||
625 strcasecmp( $this->attribs[
'encoding'],
'application/xhtml+xml' ) == 0 )
637 if ( $this->noahKey ===
null ) {
640 $this->noahKey =
serialize( [ $this->namespaceURI, $this->localName,
$attribs ] );
696 $this->currentNode = $this->elements[0];
708 foreach ( $this->elements[0]->children as $elt ) {
709 $out .= is_string( $elt ) ? $elt :
710 $elt->flatten( $this->config );
735 $this->fosterParentMode &&
740 $this->config[
'tidyCompat'] && !$isComment &&
746 $this->currentNode->appendChild(
$value );
788 $this->currentNode->isHtmlNamed(
'mw:p-wrap' ) &&
795 $this->fosterParentMode &&
800 $this->currentNode->appendChild( $elt );
802 Assert::invariant( $elt->parent !==
null,
"$elt must be in tree" );
803 Assert::invariant( $elt->parent !==
'flat',
"$elt must not have been previous flattened" );
804 array_push( $this->elements, $elt );
805 $this->currentNode = $elt;
858 foreach ( $this as $elt ) {
859 if ( $elt->isA( $tag ) ) {
877 foreach ( $this as $elt ) {
878 if ( $elt->isA( $tag ) ) {
881 if ( $elt->isA( $set ) ) {
895 $endTagSet = $thorough ?
898 while ( $this->currentNode ) {
899 if ( $butnot !==
null && $this->currentNode->isHtmlNamed( $butnot ) ) {
902 if ( !$this->currentNode->isA( $endTagSet ) ) {
915 return ( $fragmentContext && count( $this->elements ) === 1 ) ?
935 return $this->elements[ $idx ];
944 Assert::precondition(
945 $this->elements[$idx]->parent !==
'flat',
946 'Replaced element should not have already been flattened.'
948 Assert::precondition(
949 $elt->parent !==
'flat',
950 'New element should not have already been flattened.'
952 $this->elements[$idx] = $elt;
953 if ( $idx === count( $this->elements ) - 1 ) {
954 $this->currentNode = $elt;
965 for ( $i = count( $this->elements ) - 1; $i >= 0; $i-- ) {
966 if ( $this->elements[$i]->isA( $tag ) ) {
978 return count( $this->elements );
986 $elt = array_pop( $this->elements );
987 if ( count( $this->elements ) ) {
988 $this->currentNode = $this->elements[ count( $this->elements ) - 1 ];
990 $this->currentNode =
null;
992 if ( !$elt->isHtmlNamed(
'mw:p-wrap' ) ) {
993 $elt->flatten( $this->config );
1003 for ( $length = count( $this->elements ); $length > $idx; $length-- ) {
1015 while ( $this->currentNode ) {
1016 if ( $this->currentNode->isA( $tag ) ) {
1031 for ( $length = count( $this->elements ); $length > 1; $length-- ) {
1032 if ( $this->currentNode->isA( $set ) ) {
1047 $elt->parent !==
'flat',
1049 '$elt should not already have been flattened.'
1052 $elt->parent->parent !==
'flat',
1054 'The parent of $elt should not already have been flattened.'
1056 $idx = array_search( $elt, $this->elements,
true );
1057 Assert::parameter( $idx !==
false,
'$elt',
'must be in stack' );
1058 array_splice( $this->elements, $idx, 1 );
1059 if ( $idx === count( $this->elements ) ) {
1060 $this->currentNode = $this->elements[$idx - 1];
1067 $elt->flatten( $this->config );
1069 Assert::postcondition(
1070 array_search( $elt, $this->elements,
true ) ===
false,
1071 '$elt should no longer be in open elements stack'
1082 Assert::parameter( $idx !==
false,
'$a',
'must be in stack' );
1083 if ( $idx === count( $this->elements ) - 1 ) {
1084 array_push( $this->elements, $b );
1085 $this->currentNode = $b;
1087 array_splice( $this->elements, $idx + 1, 0, [ $b ] );
1101 $lastTable = $this->
indexOf(
'table' );
1102 $lastTemplate = $this->
indexOf(
'template' );
1106 if ( $lastTemplate >= 0 && ( $lastTable < 0 || $lastTemplate > $lastTable ) ) {
1107 $parent = $this->elements[$lastTemplate];
1108 } elseif ( $lastTable >= 0 ) {
1109 $parent = $this->elements[$lastTable]->parent;
1112 $parent !==
null,
"All tables should have parents"
1114 $before = $this->elements[$lastTable];
1116 $parent = $this->elements[0];
1119 if ( $this->config[
'tidyCompat'] ) {
1120 if ( is_string( $elt ) ) {
1129 if ( $elt->isHtmlNamed(
'mw:p-wrap' ) ) {
1131 array_search( $before, $parent->children,
true ) :
1132 count( $parent->children );
1133 $after = $idx > 0 ? $parent->children[$idx - 1] :
'';
1145 $parent->insertBefore( $before, $elt );
1147 $parent->appendChild( $elt );
1168 $this->currentNode->isHtmlNamed( $tag ) &&
1169 !$afe->isInList( $this->currentNode )
1177 for ( $outer = 0; $outer < 8; $outer++ ) {
1183 $fmtElt = $afe->findElementByTag( $tag );
1194 $index = $this->
indexOf( $fmtElt );
1196 $afe->remove( $fmtElt );
1204 if ( !$this->
inScope( $fmtElt ) ) {
1212 $furthestBlock =
null;
1213 $furthestBlockIndex = -1;
1214 $stackLength = $this->
length();
1215 for ( $i = $index + 1; $i < $stackLength; $i++ ) {
1217 $furthestBlock = $this->
node( $i );
1218 $furthestBlockIndex = $i;
1229 if ( !$furthestBlock ) {
1230 $this->
popTag( $fmtElt );
1231 $afe->remove( $fmtElt );
1237 $ancestor = $this->
node( $index - 1 );
1243 $BOOKMARK =
new BalanceElement(
'[bookmark]',
'[bookmark]', [] );
1244 $afe->insertAfter( $fmtElt, $BOOKMARK );
1247 $node = $furthestBlock;
1248 $lastNode = $furthestBlock;
1249 $nodeIndex = $furthestBlockIndex;
1253 for ( $inner = 1;
true; $inner++ ) {
1260 $node = $this->
node( --$nodeIndex );
1264 if ( $node === $fmtElt )
break;
1269 $isAFE = $afe->isInList( $node );
1270 if ( $inner > 3 && $isAFE ) {
1271 $afe->remove( $node );
1294 $node->namespaceURI, $node->localName, $node->attribs );
1295 $afe->replace( $node, $newElt );
1296 $this->
replaceAt( $nodeIndex, $newElt );
1302 if ( $lastNode === $furthestBlock ) {
1303 $afe->remove( $BOOKMARK );
1304 $afe->insertAfter( $newElt, $BOOKMARK );
1309 $node->appendChild( $lastNode );
1320 $this->fosterParentMode &&
1328 $ancestor->appendChild( $lastNode );
1335 $fmtElt->namespaceURI, $fmtElt->localName, $fmtElt->attribs );
1339 $newElt2->adoptChildren( $furthestBlock );
1342 $furthestBlock->appendChild( $newElt2 );
1348 $afe->remove( $fmtElt );
1349 $afe->replace( $BOOKMARK, $newElt2 );
1369 foreach ( $this->elements as $elt ) {
1370 array_push( $r, $elt->localName );
1372 return implode(
' ', $r );
1425 for ( $node = $this->head; $node; $node = $next ) {
1426 $next = $node->nextAFE;
1427 $node->prevAFE = $node->nextAFE = $node->nextNoah =
null;
1429 $this->head = $this->tail = $this->noahTableStack =
null;
1434 if ( $this->tail ) {
1435 $this->tail->nextAFE = $elt;
1441 $this->noahTableStack[] = [];
1451 if ( $elt->prevAFE !==
null || $this->head === $elt ) {
1452 throw new ParameterAssertionException(
'$elt',
1453 'Cannot insert a node into the AFE list twice' );
1459 $noahKey = $elt->getNoahKey();
1460 $table =& $this->noahTableStack[ count( $this->noahTableStack ) - 1 ];
1461 if ( !isset( $table[$noahKey] ) ) {
1462 $table[$noahKey] = $elt;
1466 while (
$tail->nextNoah ) {
1470 if ( $count >= 3 ) {
1471 $this->
remove(
$head );
1473 $tail->nextNoah = $elt;
1476 if ( $this->tail ) {
1477 $this->tail->nextAFE = $elt;
1494 $prev =
$tail->prevAFE;
1495 $tail->prevAFE =
null;
1497 $prev->nextAFE =
null;
1499 $tail->nextNoah =
null;
1504 $prev =
$tail->prevAFE;
1506 $prev->nextAFE =
null;
1509 array_pop( $this->noahTableStack );
1512 $this->noahTableStack[0] = [];
1518 $this->tail =
$tail;
1531 if ( $elt->localName === $tag ) {
1534 $elt = $elt->prevAFE;
1545 return $this->head === $elt || $elt->prevAFE;
1555 if ( $this->head !== $elt && !$elt->prevAFE ) {
1556 throw new ParameterAssertionException(
'$elt',
1557 "Attempted to remove an element which is not in the AFE list" );
1560 if ( $this->head === $elt ) {
1561 $this->head = $elt->nextAFE;
1563 if ( $this->tail === $elt ) {
1564 $this->tail = $elt->prevAFE;
1567 if ( $elt->prevAFE ) {
1568 $elt->prevAFE->nextAFE = $elt->nextAFE;
1571 if ( $elt->nextAFE ) {
1572 $elt->nextAFE->prevAFE = $elt->prevAFE;
1575 $elt->prevAFE = $elt->nextAFE =
null;
1581 $noahKey = $elt->getNoahKey();
1582 $table =& $this->noahTableStack[ count( $this->noahTableStack ) - 1 ];
1583 if ( !isset( $table[$noahKey] ) ) {
1584 $table[$noahKey] = $elt;
1586 $tail = $table[$noahKey];
1587 while (
$tail->nextNoah ) {
1590 $tail->nextNoah = $elt;
1595 $table =& $this->noahTableStack[ count( $this->noahTableStack ) - 1 ];
1596 $key = $elt->getNoahKey();
1597 $noahElt = $table[$key];
1598 if ( $noahElt === $elt ) {
1599 if ( $noahElt->nextNoah ) {
1600 $table[$key] = $noahElt->nextNoah;
1601 $noahElt->nextNoah =
null;
1603 unset( $table[$key] );
1607 $prevNoahElt = $noahElt;
1608 $noahElt = $prevNoahElt->nextNoah;
1609 if ( $noahElt === $elt ) {
1611 $prevNoahElt->nextNoah = $elt->nextNoah;
1612 $elt->nextNoah =
null;
1615 }
while ( $noahElt );
1626 if ( $this->head !== $a && !$a->prevAFE ) {
1627 throw new ParameterAssertionException(
'$a',
1628 "Attempted to replace an element which is not in the AFE list" );
1631 if ( $this->head === $a ) {
1634 if ( $this->tail === $a ) {
1638 if ( $a->prevAFE ) {
1639 $a->prevAFE->nextAFE = $b;
1642 if ( $a->nextAFE ) {
1643 $a->nextAFE->prevAFE = $b;
1645 $b->prevAFE = $a->prevAFE;
1646 $b->nextAFE = $a->nextAFE;
1647 $a->nextAFE = $a->prevAFE =
null;
1660 if ( $this->head !== $a && !$a->prevAFE ) {
1661 throw new ParameterAssertionException(
'$a',
1662 "Attempted to insert after an element which is not in the AFE list" );
1664 if ( $this->tail === $a ) {
1667 if ( $a->nextAFE ) {
1668 $a->nextAFE->prevAFE = $b;
1670 $b->nextAFE = $a->nextAFE;
1693 if ( $stack->indexOf( $entry ) >= 0 ) {
1700 while ( $entry->prevAFE ) {
1701 $entry = $entry->prevAFE;
1702 if ( $entry instanceof
BalanceMarker || $stack->indexOf( $entry ) >= 0 ) {
1713 $entry = $entry->nextAFE;
1716 $newElement = $stack->insertHTMLElement(
1719 $this->
replace( $entry, $newElement );
1720 $entry = $newElement->nextAFE;
1730 for ( $node = $this->head; $node; $prev = $node, $node = $node->nextAFE ) {
1735 $s .= $node->localName .
'#' . substr( md5( spl_object_hash( $node ) ), 0, 8 );
1736 if ( $node->nextNoah ) {
1737 $s .=
" (noah sibling: {$node->nextNoah->localName}#" .
1738 substr( md5( spl_object_hash( $node->nextNoah ) ), 0, 8 ) .
1741 if ( $node->nextAFE && $node->nextAFE->prevAFE !== $node ) {
1742 $s .=
" (reverse link is wrong!)";
1746 if ( $prev !== $this->tail ) {
1747 $s .=
"(tail pointer is wrong!)\n";
1836 ( # 1. Comment match detector
1837 > | -> | # Invalid short close
1838 ( # 2. Comment contents
1848 ( # 3. Comment close
1849 --> | # Normal close
1850 --!> | # Comment end bang
1851 ( # 4. Indicate matches requiring EOF
1852 --! | # EOF in comment end bang state
1853 -- | # EOF in comment end state
1854 - | # EOF in comment end dash state
1855 (?#nothing) # EOF in comment state
1859 ([^<]*) \z # 5. Non-tag text after the comment
1891 'allowedHtmlElements' =>
null,
1892 'tidyCompat' =>
false,
1893 'allowComments' =>
true,
1895 $this->allowedHtmlElements =
$config[
'allowedHtmlElements'];
1896 $this->strict =
$config[
'strict'];
1897 $this->allowComments =
$config[
'allowComments'];
1898 if ( $this->allowedHtmlElements !==
null ) {
1900 $bad = array_uintersect_assoc(
1901 $this->allowedHtmlElements,
1903 function ( $a, $b ) {
1909 if ( count( $bad ) > 0 ) {
1910 $badstr = implode(
',', array_keys( $bad ) );
1911 throw new ParameterAssertionException(
1913 'Balance attempted with sanitization including ' .
1914 "unsupported elements: {$badstr}"
1933 $this->parseMode =
'inBodyMode';
1940 $this->textIntegrationMode =
1941 $this->ignoreLinefeed =
1943 $this->inRAWTEXT =
false;
1947 $this->fragmentContext =
1950 $this->formElementPointer =
null;
1951 for (
$e = $this->fragmentContext;
$e !=
null;
$e =
$e->parent ) {
1952 if (
$e->isHtmlNamed(
'form' ) ) {
1953 $this->formElementPointer =
$e;
1959 $x = $this->bitsIterator->current();
1960 $this->bitsIterator->next();
1961 $this->
insertToken(
'text', str_replace(
'>',
'>', $x ) );
1963 while ( $this->bitsIterator->valid() ) {
1967 $result = $this->stack->getOutput();
1969 $this->bitsIterator =
null;
1971 $this->stack =
null;
1972 $this->fragmentContext =
null;
1973 $this->formElementPointer =
null;
1983 if ( $token ===
'tag' || $token ===
'endtag' ) {
1989 "Unsupported $token <$value> found."
1993 } elseif ( $token ===
'text' &&
$value ===
'' ) {
1998 if ( $this->ignoreLinefeed ) {
1999 $this->ignoreLinefeed =
false;
2000 if ( $token ===
'text' ) {
2001 if (
$value[0] ===
"\n" ) {
2011 $adjusted = $this->stack->adjustedCurrentNode( $this->fragmentContext );
2016 $this->stack->length() === 0 ||
2017 $adjusted->isHtml() ||
2021 } elseif ( $adjusted->isMathmlTextIntegrationPoint() ) {
2022 if ( $token ===
'text' ) {
2032 $adjusted->localName ===
'annotation-xml' &&
2033 $token ===
'tag' &&
$value ===
'svg'
2037 $adjusted->isHtmlIntegrationPoint() &&
2038 ( $token ===
'tag' || $token ===
'text' )
2051 if ( $token ===
'text' ) {
2052 $this->stack->insertText(
$value );
2054 } elseif ( $token ===
'comment' ) {
2055 $this->stack->insertComment(
$value );
2057 } elseif ( $token ===
'tag' ) {
2111 if ( $this->fragmentContext ) {
2115 $this->stack->pop();
2116 $node = $this->stack->currentNode;
2118 $node->isMathmlTextIntegrationPoint() ||
2119 $node->isHtmlIntegrationPoint() ||
2128 $adjusted = ( $this->fragmentContext && $this->stack->length() === 1 ) ?
2129 $this->fragmentContext : $this->stack->currentNode;
2130 $this->stack->insertForeignElement(
2134 $this->stack->pop();
2137 } elseif ( $token ===
'endtag' ) {
2139 foreach ( $this->stack as $i => $node ) {
2140 if ( $node->isHtml() && !$first ) {
2144 } elseif ( $i === 0 ) {
2146 } elseif ( $node->localName ===
$value ) {
2147 $this->stack->popTag( $node );
2160 $x = $this->bitsIterator->current();
2161 $this->bitsIterator->next();
2166 $this->allowComments &&
2167 !( $this->inRCDATA || $this->inRAWTEXT ) &&
2168 preg_match( self::VALID_COMMENT_REGEX, $x, $regs, PREG_OFFSET_CAPTURE ) &&
2170 ( $regs[4][1] < 0 || !$this->bitsIterator->valid() )
2172 $contents = $regs[2][0];
2173 $rest = $regs[5][0];
2175 $this->
insertToken(
'text', str_replace(
'>',
'>', $rest ) );
2183 if ( preg_match( Sanitizer::ELEMENT_BITS_REGEX, $x, $regs ) ) {
2184 list( , $slash,
$t, $attribStr, $brace, $rest ) = $regs;
2185 $t = strtolower(
$t );
2186 if ( $this->strict ) {
2190 '/^( [:_A-Z0-9][-.:_A-Z0-9]*="[^"]*")*[ ]*$/i', $attribStr
2192 "Bad attribute string found"
2197 !$this->strict,
"< found which does not start a valid tag"
2199 $slash =
$t = $attribStr = $brace = $rest =
null;
2202 if ( $this->inRCDATA ) {
2203 if ( $slash &&
$t === $this->inRCDATA ) {
2204 $this->inRCDATA =
false;
2210 if ( $this->inRAWTEXT ) {
2211 if ( $slash &&
$t === $this->inRAWTEXT ) {
2212 $this->inRAWTEXT =
false;
2218 $sanitize = $this->allowedHtmlElements !==
null;
2220 $goodTag =
$t && isset( $this->allowedHtmlElements[
$t] );
2223 if ( is_callable( $this->processingCallback ) ) {
2224 call_user_func_array( $this->processingCallback, [ &$attribStr, $this->processingArgs ] );
2227 $goodTag = Sanitizer::validateTag( $attribStr,
$t );
2232 $attribs = Sanitizer::decodeTagAttributes( $attribStr );
2235 $attribs = Sanitizer::decodeTagAttributes( $attribStr );
2238 $slash ?
'endtag' :
'tag',
$t,
$attribs, $brace ===
'/>'
2242 $rest = str_replace(
'>',
'>', $rest );
2243 $this->
insertToken(
'text', str_replace(
'>',
'>', $rest ) );
2244 } elseif ( $this->inRAWTEXT ) {
2248 $this->
insertToken(
'text',
'<' . str_replace(
'>',
'>', $x ) );
2254 substr( $mode, -4 ) ===
'Mode',
'$mode',
'should end in Mode'
2257 $this->parseMode = $mode;
2268 foreach ( $this->stack as $i => $node ) {
2271 if ( $this->fragmentContext ) {
2275 if ( $node->isHtml() ) {
2276 switch ( $node->localName ) {
2278 $stackLength = $this->stack->length();
2279 for ( $j = $i + 1; $j < $stackLength - 1; $j++ ) {
2280 $ancestor = $this->stack->node( $stackLength - $j - 1 );
2281 if ( $ancestor->isHtmlNamed(
'template' ) ) {
2284 if ( $ancestor->isHtmlNamed(
'table' ) ) {
2310 array_slice( $this->templateInsertionModes, -1 )[0]
2346 $this->stack->popTo( 1 );
2351 $this->inRAWTEXT =
$value;
2352 $this->originalInsertionMode = $this->
switchMode(
'inTextMode' );
2357 if ( $token ===
'text' ) {
2358 $this->stack->insertText(
$value );
2360 } elseif ( $token ===
'eof' ) {
2361 $this->stack->pop();
2363 $this->originalInsertionMode, $token,
$value,
$attribs, $selfClose
2365 } elseif ( $token ===
'endtag' ) {
2366 $this->stack->pop();
2367 $this->
switchMode( $this->originalInsertionMode );
2374 if ( $token ===
'text' ) {
2375 if ( preg_match(
'/^[\x09\x0A\x0C\x0D\x20]+/',
$value,
$matches ) ) {
2376 $this->stack->insertText(
$matches[0] );
2379 if ( strlen(
$value ) === 0 ) {
2383 } elseif ( $token ===
'tag' ) {
2394 $this->stack->pop();
2404 $this->afe->insertMarker();
2411 } elseif ( $token ===
'endtag' ) {
2419 if ( $this->stack->indexOf(
$value ) < 0 ) {
2422 $this->stack->generateImpliedEndTags(
null,
true );
2423 $this->stack->popTag(
$value );
2424 $this->afe->clearToMarker();
2425 array_pop( $this->templateInsertionModes );
2432 } elseif ( $token ===
'comment' ) {
2433 $this->stack->insertComment(
$value );
2444 if ( $token ===
'text' ) {
2445 $this->afe->reconstruct( $this->stack );
2446 $this->stack->insertText(
$value );
2448 } elseif ( $token ===
'eof' ) {
2449 if ( !empty( $this->templateInsertionModes ) ) {
2454 } elseif ( $token ===
'tag' ) {
2494 if ( $this->stack->inButtonScope(
'p' ) ) {
2501 if ( $this->stack->inButtonScope(
"p" ) ) {
2504 if ( $this->stack->currentNode->isHtmlNamed(
'menuitem' ) ) {
2505 $this->stack->pop();
2516 if ( $this->stack->inButtonScope(
'p' ) ) {
2520 $this->stack->pop();
2527 if ( $this->stack->inButtonScope(
'p' ) ) {
2531 $this->ignoreLinefeed =
true;
2537 $this->formElementPointer &&
2538 $this->stack->indexOf(
'template' ) < 0
2542 if ( $this->stack->inButtonScope(
"p" ) ) {
2546 if ( $this->stack->indexOf(
'template' ) < 0 ) {
2547 $this->formElementPointer = $elt;
2553 foreach ( $this->stack as $node ) {
2554 if ( $node->isHtmlNamed(
'li' ) ) {
2565 if ( $this->stack->inButtonScope(
'p' ) ) {
2574 foreach ( $this->stack as $node ) {
2575 if ( $node->isHtmlNamed(
'dd' ) ) {
2579 if ( $node->isHtmlNamed(
'dt' ) ) {
2590 if ( $this->stack->inButtonScope(
'p' ) ) {
2599 if ( $this->stack->inScope(
'button' ) ) {
2603 $this->afe->reconstruct( $this->stack );
2608 $activeElement = $this->afe->findElementByTag(
'a' );
2609 if ( $activeElement ) {
2611 if ( $this->afe->isInList( $activeElement ) ) {
2612 $this->afe->remove( $activeElement );
2616 $this->stack->removeElement( $activeElement,
false );
2632 $this->afe->reconstruct( $this->stack );
2633 $this->afe->push( $this->stack->insertHTMLElement(
$value,
$attribs ) );
2637 $this->afe->reconstruct( $this->stack );
2638 if ( $this->stack->inScope(
'nobr' ) ) {
2640 $this->afe->reconstruct( $this->stack );
2642 $this->afe->push( $this->stack->insertHTMLElement(
$value,
$attribs ) );
2648 $this->afe->reconstruct( $this->stack );
2650 $this->afe->insertMarker();
2657 if ( $this->stack->inButtonScope(
'p' ) ) {
2671 $this->afe->reconstruct( $this->stack );
2673 $this->stack->pop();
2678 $this->afe->reconstruct( $this->stack );
2680 $this->stack->pop();
2689 $this->stack->pop();
2693 if ( $this->stack->inButtonScope(
'p' ) ) {
2696 if ( $this->stack->currentNode->isHtmlNamed(
'menuitem' ) ) {
2697 $this->stack->pop();
2700 $this->stack->pop();
2709 $this->ignoreLinefeed =
true;
2710 $this->inRCDATA =
$value;
2720 $this->afe->reconstruct( $this->stack );
2722 switch ( $this->parseMode ) {
2724 case 'inCaptionMode':
2725 case 'inTableBodyMode':
2737 if ( $this->stack->currentNode->isHtmlNamed(
'option' ) ) {
2740 $this->afe->reconstruct( $this->stack );
2745 if ( $this->stack->currentNode->isHtmlNamed(
'menuitem' ) ) {
2746 $this->stack->pop();
2748 $this->afe->reconstruct( $this->stack );
2754 if ( $this->stack->inScope(
'ruby' ) ) {
2755 $this->stack->generateImpliedEndTags();
2762 if ( $this->stack->inScope(
'ruby' ) ) {
2763 $this->stack->generateImpliedEndTags(
'rtc' );
2769 $this->afe->reconstruct( $this->stack );
2774 $this->stack->insertForeignElement(
2779 $this->stack->pop();
2784 $this->afe->reconstruct( $this->stack );
2789 $this->stack->insertForeignElement(
2794 $this->stack->pop();
2814 $this->afe->reconstruct( $this->stack );
2817 } elseif ( $token ===
'endtag' ) {
2851 if ( !$this->stack->inScope(
$value ) ) {
2854 $this->stack->generateImpliedEndTags();
2855 $this->stack->popTag(
$value );
2859 if ( $this->stack->indexOf(
'template' ) < 0 ) {
2861 $this->formElementPointer =
null;
2862 if ( !$openform || !$this->stack->inScope( $openform ) ) {
2865 $this->stack->generateImpliedEndTags();
2868 $flatten = ( $this->stack->currentNode === $openform );
2869 $this->stack->removeElement( $openform, $flatten );
2871 if ( !$this->stack->inScope(
'form' ) ) {
2874 $this->stack->generateImpliedEndTags();
2875 $this->stack->popTag(
'form' );
2880 if ( !$this->stack->inButtonScope(
'p' ) ) {
2884 $this->stack->generateImpliedEndTags(
$value );
2885 $this->stack->popTag(
$value );
2889 if ( !$this->stack->inListItemScope(
$value ) ) {
2892 $this->stack->generateImpliedEndTags(
$value );
2893 $this->stack->popTag(
$value );
2898 if ( !$this->stack->inScope(
$value ) ) {
2901 $this->stack->generateImpliedEndTags(
$value );
2902 $this->stack->popTag(
$value );
2914 $this->stack->generateImpliedEndTags();
2936 if ( $this->stack->adoptionAgency(
$value, $this->afe ) ) {
2944 if ( !$this->stack->inScope(
$value ) ) {
2947 $this->stack->generateImpliedEndTags();
2948 $this->stack->popTag(
$value );
2949 $this->afe->clearToMarker();
2958 foreach ( $this->stack as $i => $node ) {
2959 if ( $node->isHtmlNamed(
$value ) ) {
2960 $this->stack->generateImpliedEndTags(
$value );
2961 $this->stack->popTo( $i );
2968 } elseif ( $token ===
'comment' ) {
2969 $this->stack->insertComment(
$value );
2972 Assert::invariant(
false,
"Bad token type: $token" );
2977 if ( $token ===
'text' ) {
2978 if ( $this->textIntegrationMode ) {
2981 $this->pendingTableText =
'';
2987 } elseif ( $token ===
'eof' ) {
2990 } elseif ( $token ===
'tag' ) {
2993 $this->afe->insertMarker();
3018 if ( !$this->stack->inTableScope(
$value ) ) {
3030 if ( !isset(
$attribs[
'type'] ) || strcasecmp(
$attribs[
'type'],
'hidden' ) !== 0 ) {
3034 $this->stack->pop();
3039 $this->formElementPointer ||
3040 $this->stack->indexOf(
'template' ) >= 0
3044 $this->formElementPointer =
3046 $this->stack->popTag( $this->formElementPointer );
3050 } elseif ( $token ===
'endtag' ) {
3053 if ( !$this->stack->inTableScope(
$value ) ) {
3056 $this->stack->popTag(
$value );
3075 } elseif ( $token ===
'comment' ) {
3076 $this->stack->insertComment(
$value );
3080 $this->stack->fosterParentMode =
true;
3082 $this->stack->fosterParentMode =
false;
3087 if ( $token ===
'text' ) {
3088 $this->pendingTableText .=
$value;
3093 $this->pendingTableText =
'';
3094 if ( preg_match(
'/[^\x09\x0A\x0C\x0D\x20]/', $text ) ) {
3096 $this->stack->fosterParentMode =
true;
3098 $this->stack->fosterParentMode =
false;
3101 $this->stack->insertText( $text );
3104 $this->originalInsertionMode, $token,
$value,
$attribs, $selfClose
3110 if ( !$this->stack->inTableScope(
'caption' ) ) {
3113 $this->stack->generateImpliedEndTags();
3114 $this->stack->popTag(
'caption' );
3115 $this->afe->clearToMarker();
3121 if ( $token ===
'tag' ) {
3138 } elseif ( $token ===
'endtag' ) {
3168 if ( $token ===
'text' ) {
3169 if ( preg_match(
'/^[\x09\x0A\x0C\x0D\x20]+/',
$value,
$matches ) ) {
3170 $this->stack->insertText(
$matches[0] );
3173 if ( strlen(
$value ) === 0 ) {
3177 } elseif ( $token ===
'tag' ) {
3182 $this->stack->pop();
3188 } elseif ( $token ===
'endtag' ) {
3191 if ( !$this->stack->currentNode->isHtmlNamed(
'colgroup' ) ) {
3194 $this->stack->pop();
3203 } elseif ( $token ===
'eof' ) {
3205 } elseif ( $token ===
'comment' ) {
3206 $this->stack->insertComment(
$value );
3211 if ( !$this->stack->currentNode->isHtmlNamed(
'colgroup' ) ) {
3221 $this->stack->inTableScope(
'tbody' ) ||
3222 $this->stack->inTableScope(
'thead' ) ||
3223 $this->stack->inTableScope(
'tfoot' )
3228 $this->stack->pop();
3233 if ( $token ===
'tag' ) {
3256 } elseif ( $token ===
'endtag' ) {
3266 if ( $this->stack->inTableScope(
$value ) ) {
3287 if ( !$this->stack->inTableScope(
'tr' ) ) {
3291 $this->stack->pop();
3296 if ( $token ===
'tag' ) {
3303 $this->afe->insertMarker();
3317 } elseif ( $token ===
'endtag' ) {
3331 $this->stack->inTableScope(
$value ) &&
3353 if ( $this->stack->inTableScope(
'td' ) ) {
3356 } elseif ( $this->stack->inTableScope(
'th' ) ) {
3364 if ( $token ===
'tag' ) {
3380 } elseif ( $token ===
'endtag' ) {
3384 if ( $this->stack->inTableScope(
$value ) ) {
3385 $this->stack->generateImpliedEndTags();
3386 $this->stack->popTag(
$value );
3387 $this->afe->clearToMarker();
3403 if ( $this->stack->inTableScope(
$value ) ) {
3404 $this->stack->generateImpliedEndTags();
3406 $this->afe->clearToMarker();
3418 if ( $token ===
'text' ) {
3419 $this->stack->insertText(
$value );
3421 } elseif ( $token ===
'eof' ) {
3423 } elseif ( $token ===
'tag' ) {
3427 if ( $this->stack->currentNode->isHtmlNamed(
'option' ) ) {
3428 $this->stack->pop();
3433 if ( $this->stack->currentNode->isHtmlNamed(
'option' ) ) {
3434 $this->stack->pop();
3436 if ( $this->stack->currentNode->isHtmlNamed(
'optgroup' ) ) {
3437 $this->stack->pop();
3447 if ( !$this->stack->inSelectScope(
'select' ) ) {
3456 } elseif ( $token ===
'endtag' ) {
3460 $this->stack->currentNode->isHtmlNamed(
'option' ) &&
3461 $this->stack->length() >= 2 &&
3462 $this->stack->node( $this->stack->length() - 2 )->isHtmlNamed(
'optgroup' )
3464 $this->stack->pop();
3466 if ( $this->stack->currentNode->isHtmlNamed(
'optgroup' ) ) {
3467 $this->stack->pop();
3471 if ( $this->stack->currentNode->isHtmlNamed(
'option' ) ) {
3472 $this->stack->pop();
3476 if ( !$this->stack->inSelectScope(
$value ) ) {
3479 $this->stack->popTag(
$value );
3485 } elseif ( $token ===
'comment' ) {
3486 $this->stack->insertComment(
$value );
3503 if ( $token ===
'tag' ) {
3506 } elseif ( $token ===
'endtag' ) {
3507 if ( $this->stack->inTableScope(
$value ) ) {
3519 if ( $token ===
'text' || $token ===
'comment' ) {
3521 } elseif ( $token ===
'eof' ) {
3522 if ( $this->stack->indexOf(
'template' ) < 0 ) {
3525 $this->stack->popTag(
'template' );
3526 $this->afe->clearToMarker();
3527 array_pop( $this->templateInsertionModes );
3532 } elseif ( $token ===
'tag' ) {
3574 } elseif ( $token ===
'endtag' ) {
3581 Assert::invariant(
false,
"Bad token type: $token" );
An iterator which works exactly like:
Convenience class for iterating over an array in reverse order.
HTML sanitizer for MediaWiki.
Some information about database access in MediaWiki By Tim January Database layout For information about the MediaWiki database such as a description of the tables and their contents
deferred txt A few of the database updates required by various functions here can be deferred until after the result page is displayed to the user For updating the view updating the linked to tables after a etc PHP does not yet have any way to tell the server to actually return and disconnect while still running these but it might have such a feature in the future We handle these by creating a deferred update object and putting those objects on a global list
namespace being checked & $result
null means default in associative array with keys and values unescaped Should be merged with default with a value of false meaning to suppress the attribute in associative array with keys and values unescaped noclasses just before the function returns a value If you return true
this hook is for auditing only or null if authentication failed before getting that far or null if we can t even determine that probably a stub it is not rendered in wiki pages or galleries in category pages allow injecting custom HTML after the section Any uses of the hook need to handle escaping see BaseTemplate::getToolbox and BaseTemplate::makeListItem for details on the format of individual items inside of this array or by returning and letting standard HTTP rendering take place modifiable or by returning false and taking over the output $out
null means default in associative array with keys and values unescaped Should be merged with default with a value of false meaning to suppress the attribute in associative array with keys and values unescaped noclasses just before the function returns a value If you return an< a > element with HTML attributes $attribs and contents $html will be returned If you return $ret will be returned and may include noclasses after processing & $attribs
static configuration should be added through ResourceLoaderGetConfigVars instead can be used to get the real title after the basic globals have been set but before ordinary actions take place replace
returning false will NOT prevent logging $e