Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
0.00% |
0 / 134 |
|
0.00% |
0 / 6 |
CRAP | |
0.00% |
0 / 1 |
ExtensionHandler | |
0.00% |
0 / 134 |
|
0.00% |
0 / 6 |
870 | |
0.00% |
0 / 1 |
__construct | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
normalizeExtOptions | |
0.00% |
0 / 7 |
|
0.00% |
0 / 1 |
6 | |||
onExtension | |
0.00% |
0 / 69 |
|
0.00% |
0 / 1 |
210 | |||
onDocumentFragment | |
0.00% |
0 / 51 |
|
0.00% |
0 / 1 |
72 | |||
onTag | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
6 | |||
stripAnnotations | |
0.00% |
0 / 5 |
|
0.00% |
0 / 1 |
6 |
1 | <?php |
2 | declare( strict_types = 1 ); |
3 | |
4 | namespace Wikimedia\Parsoid\Wt2Html\TT; |
5 | |
6 | use Wikimedia\Assert\Assert; |
7 | use Wikimedia\Assert\UnreachableException; |
8 | use Wikimedia\Parsoid\Config\SiteConfig; |
9 | use Wikimedia\Parsoid\DOM\DocumentFragment; |
10 | use Wikimedia\Parsoid\Ext\ExtensionError; |
11 | use Wikimedia\Parsoid\Ext\ExtensionTag; |
12 | use Wikimedia\Parsoid\Ext\ParsoidExtensionAPI; |
13 | use Wikimedia\Parsoid\NodeData\DataMw; |
14 | use Wikimedia\Parsoid\NodeData\DataMwError; |
15 | use Wikimedia\Parsoid\Tokens\Token; |
16 | use Wikimedia\Parsoid\Utils\DOMDataUtils; |
17 | use Wikimedia\Parsoid\Utils\DOMUtils; |
18 | use Wikimedia\Parsoid\Utils\PipelineUtils; |
19 | use Wikimedia\Parsoid\Utils\TokenUtils; |
20 | use Wikimedia\Parsoid\Utils\Utils; |
21 | use Wikimedia\Parsoid\Utils\WTUtils; |
22 | use Wikimedia\Parsoid\Wt2Html\TokenTransformManager; |
23 | |
24 | class ExtensionHandler extends TokenHandler { |
25 | |
26 | public function __construct( TokenTransformManager $manager, array $options ) { |
27 | parent::__construct( $manager, $options ); |
28 | } |
29 | |
30 | private static function normalizeExtOptions( array $options ): array { |
31 | // Mimics Sanitizer::decodeTagAttributes from the PHP parser |
32 | // |
33 | // Extension options should always be interpreted as plain text. The |
34 | // tokenizer parses them to tokens in case they are for an HTML tag, |
35 | // but here we use the text source instead. |
36 | $n = count( $options ); |
37 | for ( $i = 0; $i < $n; $i++ ) { |
38 | $o = $options[$i]; |
39 | // Use the source if present. If not use the value, but ensure it's a |
40 | // string, as it can be a token stream if the parser has recognized it |
41 | // as a directive. |
42 | $v = $o->vsrc ?? TokenUtils::tokensToString( $o->v, false, [ 'includeEntities' => true ] ); |
43 | // Normalize whitespace in extension attribute values |
44 | // FIXME: If the option is parsed as wikitext, this normalization |
45 | // can mess with src offsets. |
46 | $o->v = trim( preg_replace( '/[\t\r\n ]+/', ' ', $v ) ); |
47 | // Decode character references |
48 | $o->v = Utils::decodeWtEntities( $o->v ); |
49 | } |
50 | return $options; |
51 | } |
52 | |
53 | private function onExtension( Token $token ): TokenHandlerResult { |
54 | $env = $this->env; |
55 | $siteConfig = $env->getSiteConfig(); |
56 | $pageConfig = $env->getPageConfig(); |
57 | $extensionName = $token->getAttributeV( 'name' ); |
58 | $extConfig = $env->getSiteConfig()->getExtTagConfig( $extensionName ); |
59 | |
60 | $metrics = $siteConfig->metrics(); |
61 | if ( $metrics ) { |
62 | // Track uses of extensions |
63 | $wiki = $siteConfig->iwp(); |
64 | $ns = $env->getContextTitle()->getNamespace(); |
65 | if ( $ns === 0 ) { |
66 | // Article space |
67 | $nsName = 'main'; |
68 | } elseif ( $siteConfig->namespaceIsTalk( $ns ) ) { |
69 | // Any talk namespace |
70 | $nsName = 'talk'; |
71 | } else { |
72 | // Everything else |
73 | $nsName = "ns-$ns"; |
74 | } |
75 | $metrics->increment( "extension.{$wiki}.{$nsName}.{$extensionName}" ); |
76 | $siteConfig->incrementCounter( "extension_total", [ |
77 | "wiki" => $wiki, |
78 | "namespace" => $nsName, |
79 | "name" => $extensionName, |
80 | ] ); |
81 | } |
82 | |
83 | $nativeExt = $siteConfig->getExtTagImpl( $extensionName ); |
84 | $cachedExpansion = $env->extensionCache[$token->dataParsoid->src] ?? null; |
85 | |
86 | $options = $token->getAttributeV( 'options' ); |
87 | $token->setAttribute( 'options', self::normalizeExtOptions( $options ) ); |
88 | |
89 | // Call after normalizing extension options, since that can affect the result |
90 | $dataMw = Utils::getExtArgInfo( $token ); |
91 | |
92 | if ( $nativeExt !== null ) { |
93 | $extArgs = $token->getAttributeV( 'options' ); |
94 | $extApi = new ParsoidExtensionAPI( $env, [ |
95 | 'wt2html' => [ |
96 | 'frame' => $this->manager->getFrame(), |
97 | 'parseOpts' => $this->options, |
98 | 'extTag' => new ExtensionTag( $token ), |
99 | ], |
100 | ] ); |
101 | try { |
102 | $extSrc = $dataMw->body->extsrc ?? ''; |
103 | if ( !( $extConfig['options']['hasWikitextInput'] ?? true ) ) { |
104 | $extSrc = $this->stripAnnotations( $extSrc, $env->getSiteConfig() ); |
105 | } |
106 | $domFragment = $nativeExt->sourceToDom( |
107 | $extApi, $extSrc ?? '', $extArgs |
108 | ); |
109 | $errors = $extApi->getErrors(); |
110 | if ( $extConfig['options']['wt2html']['customizesDataMw'] ?? false ) { |
111 | $firstNode = $domFragment->firstChild; |
112 | DOMUtils::assertElt( $firstNode ); |
113 | $dataMw = DOMDataUtils::getDataMw( $firstNode ); |
114 | } |
115 | } catch ( ExtensionError $e ) { |
116 | $domFragment = WTUtils::createInterfaceI18nFragment( |
117 | $env->getTopLevelDoc(), $e->err->key, $e->err->params ?: null |
118 | ); |
119 | $errors = [ $e->err ]; |
120 | // FIXME: Should we include any errors collected |
121 | // from $extApi->getErrors() here? Also, what's the correct $dataMw |
122 | // to apply in this case? |
123 | } |
124 | if ( $domFragment !== false ) { |
125 | if ( $domFragment !== null ) { |
126 | // Turn this document fragment into a token |
127 | $toks = $this->onDocumentFragment( |
128 | $token, $domFragment, $dataMw, $errors |
129 | ); |
130 | return new TokenHandlerResult( $toks ); |
131 | } else { |
132 | // The extension dropped this instance completely (!!) |
133 | // Should be a rarity and presumably the extension |
134 | // knows what it is doing. Ex: nested refs are dropped |
135 | // in some scenarios. |
136 | return new TokenHandlerResult( [] ); |
137 | } |
138 | } |
139 | // Fall through: this extension is electing not to use |
140 | // a custom sourceToDom method (by returning false from |
141 | // sourceToDom). |
142 | } |
143 | |
144 | if ( $cachedExpansion ) { |
145 | // WARNING: THIS HAS BEEN UNUSED SINCE 2015, SEE T98995. |
146 | // THIS CODE WAS WRITTEN BUT APPARENTLY NEVER TESTED. |
147 | // NO WARRANTY. MAY HALT AND CATCH ON FIRE. |
148 | throw new UnreachableException( 'Should not be here!' ); |
149 | /* |
150 | $toks = PipelineUtils::encapsulateExpansionHTML( |
151 | $env, $token, $cachedExpansion, [ 'fromCache' => true ] |
152 | ); |
153 | */ |
154 | } else { |
155 | $start = microtime( true ); |
156 | $domFragment = PipelineUtils::fetchHTML( $env, $token->getAttributeV( 'source' ) ); |
157 | if ( $env->profiling() ) { |
158 | $profile = $env->getCurrentProfile(); |
159 | $profile->bumpMWTime( "Extension", 1000 * ( microtime( true ) - $start ), "api" ); |
160 | $profile->bumpCount( "Extension" ); |
161 | } |
162 | if ( !$domFragment ) { |
163 | $domFragment = DOMUtils::parseHTMLToFragment( $env->getTopLevelDoc(), '' ); |
164 | } |
165 | $toks = $this->onDocumentFragment( $token, $domFragment, $dataMw, [] ); |
166 | } |
167 | return new TokenHandlerResult( $toks ); |
168 | } |
169 | |
170 | /** |
171 | * DOMFragment-based encapsulation |
172 | * |
173 | * @param Token $extToken |
174 | * @param DocumentFragment $domFragment |
175 | * @param DataMw $dataMw |
176 | * @param list<DataMwError> $errors |
177 | * @return array |
178 | */ |
179 | private function onDocumentFragment( |
180 | Token $extToken, DocumentFragment $domFragment, DataMw $dataMw, |
181 | array $errors |
182 | ): array { |
183 | $env = $this->env; |
184 | $extensionName = $extToken->getAttributeV( 'name' ); |
185 | |
186 | if ( $env->hasDumpFlag( 'extoutput' ) ) { |
187 | $logger = $env->getSiteConfig()->getLogger(); |
188 | $logger->warning( str_repeat( '=', 80 ) ); |
189 | $logger->warning( |
190 | 'EXTENSION INPUT: ' . $extToken->getAttributeV( 'source' ) |
191 | ); |
192 | $logger->warning( str_repeat( '=', 80 ) ); |
193 | $logger->warning( "EXTENSION OUTPUT:\n" ); |
194 | $logger->warning( |
195 | DOMUtils::getFragmentInnerHTML( $domFragment ) |
196 | ); |
197 | $logger->warning( str_repeat( '-', 80 ) ); |
198 | } |
199 | |
200 | $opts = [ |
201 | 'setDSR' => true, |
202 | 'wrapperName' => $extensionName, |
203 | ]; |
204 | |
205 | // Check if the tag wants its DOM fragment not to be unpacked. |
206 | // The default setting is to unpack the content DOM fragment automatically. |
207 | $extConfig = $env->getSiteConfig()->getExtTagConfig( $extensionName ); |
208 | if ( isset( $extConfig['options']['wt2html'] ) ) { |
209 | $opts += $extConfig['options']['wt2html']; |
210 | } |
211 | |
212 | // This special case is only because, from the beginning, Parsoid has |
213 | // treated <nowiki>s as core functionality with lean markup (no about, |
214 | // no data-mw, custom typeof). |
215 | // |
216 | // We'll keep this hardcoded to avoid exposing the functionality to |
217 | // other native extensions until it's needed. |
218 | if ( $extensionName !== 'nowiki' ) { |
219 | if ( !$domFragment->hasChildNodes() ) { |
220 | // RT extensions expanding to nothing. |
221 | $domFragment->appendChild( |
222 | $domFragment->ownerDocument->createElement( 'link' ) |
223 | ); |
224 | } |
225 | |
226 | // Wrap the top-level nodes so that we have a firstNode element |
227 | // to annotate with the typeof and to apply about ids. |
228 | PipelineUtils::addSpanWrappers( $domFragment->childNodes ); |
229 | |
230 | // Now get the firstNode |
231 | $firstNode = $domFragment->firstChild; |
232 | |
233 | DOMUtils::assertElt( $firstNode ); |
234 | |
235 | // Adds the wrapper attributes to the first element |
236 | DOMUtils::addTypeOf( $firstNode, "mw:Extension/{$extensionName}" ); |
237 | |
238 | // FIXME: What happens if $firstNode is template generated, since |
239 | // they have higher precedence? These questions and more in T214241 |
240 | Assert::invariant( |
241 | !DOMUtils::hasTypeOf( $firstNode, 'mw:Transclusion' ), |
242 | 'First node of extension content is transcluded.' |
243 | ); |
244 | |
245 | if ( count( $errors ) > 0 ) { |
246 | DOMUtils::addTypeOf( $firstNode, 'mw:Error' ); |
247 | $dataMw->errors = is_array( $dataMw->errors ?? null ) ? |
248 | array_merge( $dataMw->errors, $errors ) : $errors; |
249 | } |
250 | |
251 | // Set data-mw |
252 | // FIXME: Similar to T214241, we're clobbering $firstNode |
253 | DOMDataUtils::setDataMw( $firstNode, $dataMw ); |
254 | |
255 | // Add about to all wrapper tokens. |
256 | $about = $env->newAboutId(); |
257 | $n = $firstNode; |
258 | while ( $n ) { |
259 | $n->setAttribute( 'about', $about ); |
260 | $n = $n->nextSibling; |
261 | } |
262 | |
263 | // Update data-parsoid |
264 | $dp = DOMDataUtils::getDataParsoid( $firstNode ); |
265 | $dp->tsr = clone $extToken->dataParsoid->tsr; |
266 | $dp->src = $extToken->dataParsoid->src; |
267 | DOMDataUtils::setDataParsoid( $firstNode, $dp ); |
268 | } |
269 | |
270 | return PipelineUtils::tunnelDOMThroughTokens( |
271 | $env, $extToken, $domFragment, $opts |
272 | ); |
273 | } |
274 | |
275 | /** |
276 | * @inheritDoc |
277 | */ |
278 | public function onTag( Token $token ): ?TokenHandlerResult { |
279 | return $token->getName() === 'extension' ? $this->onExtension( $token ) : null; |
280 | } |
281 | |
282 | private function stripAnnotations( string $s, SiteConfig $siteConfig ): string { |
283 | $annotationStrippers = $siteConfig->getAnnotationStrippers(); |
284 | |
285 | $res = $s; |
286 | foreach ( $annotationStrippers as $annotationStripper ) { |
287 | $res = $annotationStripper->stripAnnotations( $s ); |
288 | } |
289 | return $res; |
290 | } |
291 | } |