Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
| Total | |
0.00% |
0 / 47 |
|
0.00% |
0 / 3 |
CRAP | |
0.00% |
0 / 1 |
| SanitizerHandler | |
0.00% |
0 / 47 |
|
0.00% |
0 / 3 |
506 | |
0.00% |
0 / 1 |
| sanitizeToken | |
0.00% |
0 / 33 |
|
0.00% |
0 / 1 |
272 | |||
| __construct | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
2 | |||
| onAny | |
0.00% |
0 / 12 |
|
0.00% |
0 / 1 |
30 | |||
| 1 | <?php |
| 2 | declare( strict_types = 1 ); |
| 3 | |
| 4 | /** |
| 5 | * General token sanitizer. Strips out (or encapsulates) unsafe and disallowed |
| 6 | * tag types and attributes. Should run last in the third, synchronous |
| 7 | * expansion stage. |
| 8 | * |
| 9 | * FIXME: This code was originally ported from PHP to JS in 2012 |
| 10 | * and periodically updated before being back to PHP. This code should be |
| 11 | * (a) resynced with core sanitizer changes (b) updated to use HTML5 spec |
| 12 | */ |
| 13 | |
| 14 | namespace Wikimedia\Parsoid\Wt2Html\TT; |
| 15 | |
| 16 | use Wikimedia\Parsoid\Config\SiteConfig; |
| 17 | use Wikimedia\Parsoid\Core\Sanitizer; |
| 18 | use Wikimedia\Parsoid\Tokens\EndTagTk; |
| 19 | use Wikimedia\Parsoid\Tokens\SelfclosingTagTk; |
| 20 | use Wikimedia\Parsoid\Tokens\TagTk; |
| 21 | use Wikimedia\Parsoid\Tokens\Token; |
| 22 | use Wikimedia\Parsoid\Utils\TokenUtils; |
| 23 | use Wikimedia\Parsoid\Wikitext\Consts; |
| 24 | use Wikimedia\Parsoid\Wt2Html\Frame; |
| 25 | use Wikimedia\Parsoid\Wt2Html\TokenHandlerPipeline; |
| 26 | |
| 27 | class SanitizerHandler extends TokenHandler { |
| 28 | /** @var bool */ |
| 29 | private $inTemplate; |
| 30 | |
| 31 | private const NO_END_TAG_SET = [ 'br' => true ]; |
| 32 | |
| 33 | /** |
| 34 | * Sanitize a token. |
| 35 | * |
| 36 | * If the token is unmodified, return null. |
| 37 | * |
| 38 | * XXX: Make attribute sanitation reversible by storing round-trip info in |
| 39 | * $token->dataParsoid object (which is serialized as JSON in a data-parsoid |
| 40 | * attribute in the DOM). |
| 41 | * |
| 42 | * @param SiteConfig $siteConfig |
| 43 | * @param Frame $frame |
| 44 | * @param Token|string $token |
| 45 | * @param bool $inTemplate |
| 46 | * @return Token|string|null |
| 47 | */ |
| 48 | private function sanitizeToken( |
| 49 | SiteConfig $siteConfig, Frame $frame, $token, bool $inTemplate |
| 50 | ) { |
| 51 | $i = null; |
| 52 | $l = null; |
| 53 | $kv = null; |
| 54 | $attribs = $token->attribs ?? null; |
| 55 | $allowedTags = Consts::$Sanitizer['AllowedLiteralTags']; |
| 56 | |
| 57 | if ( TokenUtils::isHTMLTag( $token ) |
| 58 | && ( empty( $allowedTags[$token->getName()] ) |
| 59 | || ( $token instanceof EndTagTk && !empty( self::NO_END_TAG_SET[$token->getName()] ) ) |
| 60 | ) |
| 61 | ) { // unknown tag -- convert to plain text |
| 62 | if ( !$inTemplate && !empty( $token->dataParsoid->tsr ) ) { |
| 63 | // Just get the original token source, so that we can avoid |
| 64 | // whitespace differences. |
| 65 | $token = $token->getWTSource( $frame ); |
| 66 | } elseif ( !( $token instanceof EndTagTk ) ) { |
| 67 | // Handle things without a TSR: For example template or extension |
| 68 | // content. Whitespace in these is not necessarily preserved. |
| 69 | $buf = '<' . $token->getName(); |
| 70 | for ( $i = 0, $l = count( $attribs ); $i < $l; $i++ ) { |
| 71 | $kv = $attribs[$i]; |
| 72 | $buf .= ' ' . TokenUtils::tokensToString( $kv->k ) . |
| 73 | "='" . TokenUtils::tokensToString( $kv->v ) . "'"; |
| 74 | } |
| 75 | if ( $token instanceof SelfclosingTagTk ) { |
| 76 | $buf .= ' /'; |
| 77 | } |
| 78 | $buf .= '>'; |
| 79 | $token = $buf; |
| 80 | } else { |
| 81 | $token = '</' . $token->getName() . '>'; |
| 82 | } |
| 83 | return $token; |
| 84 | } |
| 85 | |
| 86 | if ( $attribs && count( $attribs ) > 0 ) { |
| 87 | // Sanitize attributes |
| 88 | if ( $token instanceof TagTk || $token instanceof SelfclosingTagTk ) { |
| 89 | $newAttrs = Sanitizer::sanitizeTagAttrs( $siteConfig, null, $token, $attribs ); |
| 90 | |
| 91 | // Reset token attribs and rebuild |
| 92 | $token->attribs = []; |
| 93 | |
| 94 | // SSS FIXME: We are right now adding shadow information for all sanitized |
| 95 | // attributes. This is being done to minimize dirty diffs for the first |
| 96 | // cut. It can be reasonably argued that we can permanently delete dangerous |
| 97 | // and unacceptable attributes in the interest of safety/security and the |
| 98 | // resultant dirty diffs should be acceptable. But, this is something to do |
| 99 | // in the future once we have passed the initial tests of parsoid acceptance. |
| 100 | foreach ( $newAttrs as $k => $v ) { |
| 101 | // explicit check against null to prevent discarding empty strings |
| 102 | if ( $v[0] !== null ) { |
| 103 | $token->addNormalizedAttribute( $k, $v[0], $v[1] ); |
| 104 | } else { |
| 105 | $token->setShadowInfo( $v[2], $v[0], $v[1] ); |
| 106 | } |
| 107 | } |
| 108 | } else { |
| 109 | // EndTagTk, drop attributes |
| 110 | $token->attribs = []; |
| 111 | } |
| 112 | return $token; |
| 113 | } |
| 114 | |
| 115 | return null; |
| 116 | } |
| 117 | |
| 118 | /** |
| 119 | * @param TokenHandlerPipeline $manager manager enviroment |
| 120 | * @param array $options various configuration options |
| 121 | */ |
| 122 | public function __construct( TokenHandlerPipeline $manager, array $options ) { |
| 123 | parent::__construct( $manager, $options ); |
| 124 | $this->inTemplate = $options['inTemplate']; |
| 125 | } |
| 126 | |
| 127 | /** |
| 128 | * @inheritDoc |
| 129 | */ |
| 130 | public function onAny( $token ): ?array { |
| 131 | if ( is_string( $token ) ) { |
| 132 | return null; |
| 133 | } |
| 134 | $env = $this->env; |
| 135 | $env->trace( 'sanitizer', $this->pipelineId, $token ); |
| 136 | |
| 137 | // Pass through a transparent line meta-token |
| 138 | if ( TokenUtils::isEmptyLineMetaToken( $token ) ) { |
| 139 | $env->trace( 'sanitizer', $this->pipelineId, '--unchanged--' ); |
| 140 | return null; |
| 141 | } |
| 142 | |
| 143 | $newToken = $this->sanitizeToken( |
| 144 | $env->getSiteConfig(), $this->manager->getFrame(), $token, $this->inTemplate |
| 145 | ); |
| 146 | |
| 147 | $env->trace( 'sanitizer', $this->pipelineId, $newToken ); |
| 148 | return ( $newToken === null || $newToken === $token ) ? null : [ $newToken ]; |
| 149 | } |
| 150 | } |