Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
0.00% |
0 / 51 |
|
0.00% |
0 / 3 |
CRAP | |
0.00% |
0 / 1 |
SanitizerHandler | |
0.00% |
0 / 51 |
|
0.00% |
0 / 3 |
462 | |
0.00% |
0 / 1 |
sanitizeToken | |
0.00% |
0 / 33 |
|
0.00% |
0 / 1 |
272 | |||
__construct | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
2 | |||
onAny | |
0.00% |
0 / 16 |
|
0.00% |
0 / 1 |
20 |
1 | <?php |
2 | declare( strict_types = 1 ); |
3 | |
4 | /** |
5 | * General token sanitizer. Strips out (or encapsulates) unsafe and disallowed |
6 | * tag types and attributes. Should run last in the third, synchronous |
7 | * expansion stage. |
8 | * |
9 | * FIXME: This code was originally ported from PHP to JS in 2012 |
10 | * and periodically updated before being back to PHP. This code should be |
11 | * (a) resynced with core sanitizer changes (b) updated to use HTML5 spec |
12 | */ |
13 | |
14 | namespace Wikimedia\Parsoid\Wt2Html\TT; |
15 | |
16 | use Wikimedia\Parsoid\Config\SiteConfig; |
17 | use Wikimedia\Parsoid\Core\Sanitizer; |
18 | use Wikimedia\Parsoid\Tokens\EndTagTk; |
19 | use Wikimedia\Parsoid\Tokens\SelfclosingTagTk; |
20 | use Wikimedia\Parsoid\Tokens\TagTk; |
21 | use Wikimedia\Parsoid\Tokens\Token; |
22 | use Wikimedia\Parsoid\Utils\PHPUtils; |
23 | use Wikimedia\Parsoid\Utils\TokenUtils; |
24 | use Wikimedia\Parsoid\Wikitext\Consts; |
25 | use Wikimedia\Parsoid\Wt2Html\Frame; |
26 | use Wikimedia\Parsoid\Wt2Html\TokenTransformManager; |
27 | |
28 | class SanitizerHandler extends TokenHandler { |
29 | /** @var bool */ |
30 | private $inTemplate; |
31 | |
32 | private const NO_END_TAG_SET = [ 'br' => true ]; |
33 | |
34 | /** |
35 | * Sanitize a token. |
36 | * |
37 | * If the token is unmodified, return null. |
38 | * |
39 | * XXX: Make attribute sanitation reversible by storing round-trip info in |
40 | * $token->dataParsoid object (which is serialized as JSON in a data-parsoid |
41 | * attribute in the DOM). |
42 | * |
43 | * @param SiteConfig $siteConfig |
44 | * @param Frame $frame |
45 | * @param Token|string $token |
46 | * @param bool $inTemplate |
47 | * @return Token|string|null |
48 | */ |
49 | private function sanitizeToken( |
50 | SiteConfig $siteConfig, Frame $frame, $token, bool $inTemplate |
51 | ) { |
52 | $i = null; |
53 | $l = null; |
54 | $kv = null; |
55 | $attribs = $token->attribs ?? null; |
56 | $allowedTags = Consts::$Sanitizer['AllowedLiteralTags']; |
57 | |
58 | if ( TokenUtils::isHTMLTag( $token ) |
59 | && ( empty( $allowedTags[$token->getName()] ) |
60 | || ( $token instanceof EndTagTk && !empty( self::NO_END_TAG_SET[$token->getName()] ) ) |
61 | ) |
62 | ) { // unknown tag -- convert to plain text |
63 | if ( !$inTemplate && !empty( $token->dataParsoid->tsr ) ) { |
64 | // Just get the original token source, so that we can avoid |
65 | // whitespace differences. |
66 | $token = $token->getWTSource( $frame ); |
67 | } elseif ( !( $token instanceof EndTagTk ) ) { |
68 | // Handle things without a TSR: For example template or extension |
69 | // content. Whitespace in these is not necessarily preserved. |
70 | $buf = '<' . $token->getName(); |
71 | for ( $i = 0, $l = count( $attribs ); $i < $l; $i++ ) { |
72 | $kv = $attribs[$i]; |
73 | $buf .= ' ' . TokenUtils::tokensToString( $kv->k ) . |
74 | "='" . TokenUtils::tokensToString( $kv->v ) . "'"; |
75 | } |
76 | if ( $token instanceof SelfclosingTagTk ) { |
77 | $buf .= ' /'; |
78 | } |
79 | $buf .= '>'; |
80 | $token = $buf; |
81 | } else { |
82 | $token = '</' . $token->getName() . '>'; |
83 | } |
84 | return $token; |
85 | } |
86 | |
87 | if ( $attribs && count( $attribs ) > 0 ) { |
88 | // Sanitize attributes |
89 | if ( $token instanceof TagTk || $token instanceof SelfclosingTagTk ) { |
90 | $newAttrs = Sanitizer::sanitizeTagAttrs( $siteConfig, null, $token, $attribs ); |
91 | |
92 | // Reset token attribs and rebuild |
93 | $token->attribs = []; |
94 | |
95 | // SSS FIXME: We are right now adding shadow information for all sanitized |
96 | // attributes. This is being done to minimize dirty diffs for the first |
97 | // cut. It can be reasonably argued that we can permanently delete dangerous |
98 | // and unacceptable attributes in the interest of safety/security and the |
99 | // resultant dirty diffs should be acceptable. But, this is something to do |
100 | // in the future once we have passed the initial tests of parsoid acceptance. |
101 | foreach ( $newAttrs as $k => $v ) { |
102 | // explicit check against null to prevent discarding empty strings |
103 | if ( $v[0] !== null ) { |
104 | $token->addNormalizedAttribute( $k, $v[0], $v[1] ); |
105 | } else { |
106 | $token->setShadowInfo( $v[2], $v[0], $v[1] ); |
107 | } |
108 | } |
109 | } else { |
110 | // EndTagTk, drop attributes |
111 | $token->attribs = []; |
112 | } |
113 | return $token; |
114 | } |
115 | |
116 | return null; |
117 | } |
118 | |
119 | /** |
120 | * @param TokenTransformManager $manager manager enviroment |
121 | * @param array $options various configuration options |
122 | */ |
123 | public function __construct( TokenTransformManager $manager, array $options ) { |
124 | parent::__construct( $manager, $options ); |
125 | $this->inTemplate = $options['inTemplate']; |
126 | } |
127 | |
128 | /** |
129 | * @inheritDoc |
130 | */ |
131 | public function onAny( $token ): ?TokenHandlerResult { |
132 | if ( is_string( $token ) ) { |
133 | return null; |
134 | } |
135 | $env = $this->env; |
136 | $env->log( 'trace/sanitizer', $this->pipelineId, static function () use ( $token ) { |
137 | return PHPUtils::jsonEncode( $token ); |
138 | } ); |
139 | |
140 | // Pass through a transparent line meta-token |
141 | if ( TokenUtils::isEmptyLineMetaToken( $token ) ) { |
142 | $env->log( 'trace/sanitizer', $this->pipelineId, '--unchanged--' ); |
143 | return null; |
144 | } |
145 | |
146 | $token = $this->sanitizeToken( |
147 | $env->getSiteConfig(), $this->manager->getFrame(), $token, $this->inTemplate |
148 | ); |
149 | |
150 | $env->log( 'trace/sanitizer', $this->pipelineId, static function () use ( $token ) { |
151 | return ' ---> ' . PHPUtils::jsonEncode( $token ); |
152 | } ); |
153 | return $token === null ? null : new TokenHandlerResult( [ $token ] ); |
154 | } |
155 | } |