Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
0.00% |
0 / 136 |
|
0.00% |
0 / 6 |
CRAP | |
0.00% |
0 / 1 |
ExtensionHandler | |
0.00% |
0 / 136 |
|
0.00% |
0 / 6 |
930 | |
0.00% |
0 / 1 |
__construct | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
normalizeExtOptions | |
0.00% |
0 / 11 |
|
0.00% |
0 / 1 |
20 | |||
onExtension | |
0.00% |
0 / 67 |
|
0.00% |
0 / 1 |
182 | |||
onDocumentFragment | |
0.00% |
0 / 51 |
|
0.00% |
0 / 1 |
72 | |||
onTag | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
6 | |||
stripAnnotations | |
0.00% |
0 / 5 |
|
0.00% |
0 / 1 |
6 |
1 | <?php |
2 | declare( strict_types = 1 ); |
3 | |
4 | namespace Wikimedia\Parsoid\Wt2Html\TT; |
5 | |
6 | use Wikimedia\Assert\Assert; |
7 | use Wikimedia\Parsoid\Config\SiteConfig; |
8 | use Wikimedia\Parsoid\DOM\DocumentFragment; |
9 | use Wikimedia\Parsoid\Ext\ExtensionError; |
10 | use Wikimedia\Parsoid\Ext\ExtensionTag; |
11 | use Wikimedia\Parsoid\Ext\ParsoidExtensionAPI; |
12 | use Wikimedia\Parsoid\NodeData\DataMw; |
13 | use Wikimedia\Parsoid\NodeData\DataMwError; |
14 | use Wikimedia\Parsoid\Tokens\Token; |
15 | use Wikimedia\Parsoid\Utils\DOMDataUtils; |
16 | use Wikimedia\Parsoid\Utils\DOMUtils; |
17 | use Wikimedia\Parsoid\Utils\PipelineUtils; |
18 | use Wikimedia\Parsoid\Utils\TokenUtils; |
19 | use Wikimedia\Parsoid\Utils\Utils; |
20 | use Wikimedia\Parsoid\Utils\WTUtils; |
21 | use Wikimedia\Parsoid\Wt2Html\TokenHandlerPipeline; |
22 | |
23 | class ExtensionHandler extends TokenHandler { |
24 | |
25 | public function __construct( TokenHandlerPipeline $manager, array $options ) { |
26 | parent::__construct( $manager, $options ); |
27 | } |
28 | |
29 | private static function normalizeExtOptions( array $options, string $normalizeFlag ): array { |
30 | // Mimics Sanitizer::decodeTagAttributes from the PHP parser |
31 | // |
32 | // Extension options should always be interpreted as plain text. The |
33 | // tokenizer parses them to tokens in case they are for an HTML tag, |
34 | // but here we use the text source instead. |
35 | $n = count( $options ); |
36 | for ( $i = 0; $i < $n; $i++ ) { |
37 | $o = $options[$i]; |
38 | // Use the source if present. If not use the value, but ensure it's a |
39 | // string, as it can be a token stream if the parser has recognized it |
40 | // as a directive. |
41 | $v = $o->vsrc ?? TokenUtils::tokensToString( $o->v, false, [ 'includeEntities' => true ] ); |
42 | |
43 | // Let extensions decide which format they want their options in; by default they are interpreted as |
44 | // with normalized spaces and trimmed. |
45 | if ( $normalizeFlag === 'keepspaces' ) { |
46 | $o->v = $v; |
47 | } elseif ( $normalizeFlag === 'trim' ) { |
48 | $o->v = trim( $v ); |
49 | } else { |
50 | $o->v = trim( preg_replace( '/[\r\n\t ]+/', ' ', $v ) ); |
51 | } |
52 | |
53 | // Decode character references |
54 | $o->v = Utils::decodeWtEntities( $o->v ); |
55 | } |
56 | return $options; |
57 | } |
58 | |
59 | private function onExtension( Token $token ): TokenHandlerResult { |
60 | $env = $this->env; |
61 | $siteConfig = $env->getSiteConfig(); |
62 | $pageConfig = $env->getPageConfig(); |
63 | $extensionName = $token->getAttributeV( 'name' ); |
64 | $extConfig = $env->getSiteConfig()->getExtTagConfig( $extensionName ); |
65 | |
66 | $metrics = $siteConfig->metrics(); |
67 | if ( $metrics ) { |
68 | // Track uses of extensions |
69 | $wiki = $siteConfig->iwp(); |
70 | $ns = $env->getContextTitle()->getNamespace(); |
71 | if ( $ns === 0 ) { |
72 | // Article space |
73 | $nsName = 'main'; |
74 | } elseif ( $siteConfig->namespaceIsTalk( $ns ) ) { |
75 | // Any talk namespace |
76 | $nsName = 'talk'; |
77 | } else { |
78 | // Everything else |
79 | $nsName = "ns-$ns"; |
80 | } |
81 | $metrics->increment( "extension.{$wiki}.{$nsName}.{$extensionName}" ); |
82 | $siteConfig->incrementCounter( "extension_total", [ |
83 | "wiki" => $wiki, |
84 | "namespace" => $nsName, |
85 | "name" => $extensionName, |
86 | ] ); |
87 | } |
88 | |
89 | $nativeExt = $siteConfig->getExtTagImpl( $extensionName ); |
90 | $options = $token->getAttributeV( 'options' ); |
91 | $normalizeFlag = $extConfig['options']['wt2html']['attributeWSNormalizationPref'] ?? 'normalize'; |
92 | $token->setAttribute( 'options', self::normalizeExtOptions( $options, $normalizeFlag ) ); |
93 | |
94 | // Call after normalizing extension options, since that can affect the result |
95 | $dataMw = Utils::getExtArgInfo( $token ); |
96 | |
97 | if ( $nativeExt !== null ) { |
98 | $extArgs = $token->getAttributeV( 'options' ); |
99 | $extApi = new ParsoidExtensionAPI( $env, [ |
100 | 'wt2html' => [ |
101 | 'frame' => $this->manager->getFrame(), |
102 | 'parseOpts' => $this->options, |
103 | 'extTag' => new ExtensionTag( $token ), |
104 | ], |
105 | ] ); |
106 | try { |
107 | $extSrc = $dataMw->body->extsrc ?? ''; |
108 | if ( !( $extConfig['options']['hasWikitextInput'] ?? true ) ) { |
109 | $extSrc = $this->stripAnnotations( $extSrc, $env->getSiteConfig() ); |
110 | } |
111 | $domFragment = $nativeExt->sourceToDom( |
112 | $extApi, $extSrc ?? '', $extArgs |
113 | ); |
114 | $errors = $extApi->getErrors(); |
115 | if ( $extConfig['options']['wt2html']['customizesDataMw'] ?? false ) { |
116 | $firstNode = $domFragment->firstChild; |
117 | DOMUtils::assertElt( $firstNode ); |
118 | $dataMw = DOMDataUtils::getDataMw( $firstNode ); |
119 | } |
120 | } catch ( ExtensionError $e ) { |
121 | $domFragment = WTUtils::createInterfaceI18nFragment( |
122 | $env->getTopLevelDoc(), $e->err->key, $e->err->params ?: null |
123 | ); |
124 | $errors = [ $e->err ]; |
125 | // FIXME: Should we include any errors collected |
126 | // from $extApi->getErrors() here? Also, what's the correct $dataMw |
127 | // to apply in this case? |
128 | } |
129 | if ( $domFragment !== false ) { |
130 | if ( $domFragment !== null ) { |
131 | // Turn this document fragment into a token |
132 | $toks = $this->onDocumentFragment( |
133 | $token, $domFragment, $dataMw, $errors |
134 | ); |
135 | return new TokenHandlerResult( $toks ); |
136 | } else { |
137 | // The extension dropped this instance completely (!!) |
138 | // Should be a rarity and presumably the extension |
139 | // knows what it is doing. Ex: nested refs are dropped |
140 | // in some scenarios. |
141 | return new TokenHandlerResult( [] ); |
142 | } |
143 | } |
144 | // Fall through: this extension is electing not to use |
145 | // a custom sourceToDom method (by returning false from |
146 | // sourceToDom). |
147 | } |
148 | |
149 | $start = microtime( true ); |
150 | $domFragment = PipelineUtils::parseToHTML( $env, $token->getAttributeV( 'source' ) ); |
151 | if ( $env->profiling() ) { |
152 | $profile = $env->getCurrentProfile(); |
153 | $profile->bumpMWTime( "Extension", 1000 * ( microtime( true ) - $start ), "api" ); |
154 | $profile->bumpCount( "Extension" ); |
155 | } |
156 | if ( !$domFragment ) { |
157 | $domFragment = DOMUtils::parseHTMLToFragment( $env->getTopLevelDoc(), '' ); |
158 | } |
159 | $toks = $this->onDocumentFragment( $token, $domFragment, $dataMw, [] ); |
160 | return new TokenHandlerResult( $toks ); |
161 | } |
162 | |
163 | /** |
164 | * DOMFragment-based encapsulation |
165 | * |
166 | * @param Token $extToken |
167 | * @param DocumentFragment $domFragment |
168 | * @param DataMw $dataMw |
169 | * @param list<DataMwError> $errors |
170 | * @return array |
171 | */ |
172 | private function onDocumentFragment( |
173 | Token $extToken, DocumentFragment $domFragment, DataMw $dataMw, |
174 | array $errors |
175 | ): array { |
176 | $env = $this->env; |
177 | $extensionName = $extToken->getAttributeV( 'name' ); |
178 | |
179 | if ( $env->hasDumpFlag( 'extoutput' ) ) { |
180 | $logger = $env->getSiteConfig()->getLogger(); |
181 | $logger->warning( str_repeat( '=', 80 ) ); |
182 | $logger->warning( |
183 | 'EXTENSION INPUT: ' . $extToken->getAttributeV( 'source' ) |
184 | ); |
185 | $logger->warning( str_repeat( '=', 80 ) ); |
186 | $logger->warning( "EXTENSION OUTPUT:\n" ); |
187 | $logger->warning( |
188 | DOMUtils::getFragmentInnerHTML( $domFragment ) |
189 | ); |
190 | $logger->warning( str_repeat( '-', 80 ) ); |
191 | } |
192 | |
193 | $opts = [ |
194 | 'setDSR' => true, |
195 | 'wrapperName' => $extensionName, |
196 | ]; |
197 | |
198 | // Check if the tag wants its DOM fragment not to be unpacked. |
199 | // The default setting is to unpack the content DOM fragment automatically. |
200 | $extConfig = $env->getSiteConfig()->getExtTagConfig( $extensionName ); |
201 | if ( isset( $extConfig['options']['wt2html'] ) ) { |
202 | $opts += $extConfig['options']['wt2html']; |
203 | } |
204 | |
205 | // This special case is only because, from the beginning, Parsoid has |
206 | // treated <nowiki>s as core functionality with lean markup (no about, |
207 | // no data-mw, custom typeof). |
208 | // |
209 | // We'll keep this hardcoded to avoid exposing the functionality to |
210 | // other native extensions until it's needed. |
211 | if ( $extensionName !== 'nowiki' ) { |
212 | if ( !$domFragment->hasChildNodes() ) { |
213 | // RT extensions expanding to nothing. |
214 | $domFragment->appendChild( |
215 | $domFragment->ownerDocument->createElement( 'link' ) |
216 | ); |
217 | } |
218 | |
219 | // Wrap the top-level nodes so that we have a firstNode element |
220 | // to annotate with the typeof and to apply about ids. |
221 | PipelineUtils::addSpanWrappers( $domFragment->childNodes ); |
222 | |
223 | // Now get the firstNode |
224 | $firstNode = $domFragment->firstChild; |
225 | |
226 | DOMUtils::assertElt( $firstNode ); |
227 | |
228 | // Adds the wrapper attributes to the first element |
229 | DOMUtils::addTypeOf( $firstNode, "mw:Extension/{$extensionName}" ); |
230 | |
231 | // FIXME: What happens if $firstNode is template generated, since |
232 | // they have higher precedence? These questions and more in T214241 |
233 | Assert::invariant( |
234 | !DOMUtils::hasTypeOf( $firstNode, 'mw:Transclusion' ), |
235 | 'First node of extension content is transcluded.' |
236 | ); |
237 | |
238 | if ( count( $errors ) > 0 ) { |
239 | DOMUtils::addTypeOf( $firstNode, 'mw:Error' ); |
240 | $dataMw->errors = is_array( $dataMw->errors ?? null ) ? |
241 | array_merge( $dataMw->errors, $errors ) : $errors; |
242 | } |
243 | |
244 | // Set data-mw |
245 | // FIXME: Similar to T214241, we're clobbering $firstNode |
246 | DOMDataUtils::setDataMw( $firstNode, $dataMw ); |
247 | |
248 | // Add about to all wrapper tokens. |
249 | $about = $env->newAboutId(); |
250 | $n = $firstNode; |
251 | while ( $n ) { |
252 | $n->setAttribute( 'about', $about ); |
253 | $n = $n->nextSibling; |
254 | } |
255 | |
256 | // Update data-parsoid |
257 | $dp = DOMDataUtils::getDataParsoid( $firstNode ); |
258 | $dp->tsr = clone $extToken->dataParsoid->tsr; |
259 | $dp->src = $extToken->dataParsoid->src; |
260 | DOMDataUtils::setDataParsoid( $firstNode, $dp ); |
261 | } |
262 | |
263 | return PipelineUtils::tunnelDOMThroughTokens( |
264 | $env, $extToken, $domFragment, $opts |
265 | ); |
266 | } |
267 | |
268 | /** |
269 | * @inheritDoc |
270 | */ |
271 | public function onTag( Token $token ): ?TokenHandlerResult { |
272 | return $token->getName() === 'extension' ? $this->onExtension( $token ) : null; |
273 | } |
274 | |
275 | private function stripAnnotations( string $s, SiteConfig $siteConfig ): string { |
276 | $annotationStrippers = $siteConfig->getAnnotationStrippers(); |
277 | |
278 | $res = $s; |
279 | foreach ( $annotationStrippers as $annotationStripper ) { |
280 | $res = $annotationStripper->stripAnnotations( $s ); |
281 | } |
282 | return $res; |
283 | } |
284 | } |