Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
0.00% |
0 / 144 |
|
0.00% |
0 / 7 |
CRAP | |
0.00% |
0 / 1 |
TemplateEncapsulator | |
0.00% |
0 / 144 |
|
0.00% |
0 / 7 |
1806 | |
0.00% |
0 / 1 |
__construct | |
0.00% |
0 / 5 |
|
0.00% |
0 / 1 |
2 | |||
encapTokens | |
0.00% |
0 / 20 |
|
0.00% |
0 / 1 |
72 | |||
getTemplateInfo | |
0.00% |
0 / 66 |
|
0.00% |
0 / 1 |
420 | |||
getEncapsulationInfo | |
0.00% |
0 / 10 |
|
0.00% |
0 / 1 |
6 | |||
getEncapsulationInfoEndTag | |
0.00% |
0 / 10 |
|
0.00% |
0 / 1 |
6 | |||
isSimpleParam | |
0.00% |
0 / 6 |
|
0.00% |
0 / 1 |
42 | |||
getParamHTML | |
0.00% |
0 / 27 |
|
0.00% |
0 / 1 |
12 |
1 | <?php |
2 | declare( strict_types = 1 ); |
3 | |
4 | namespace Wikimedia\Parsoid\Wt2Html\TT; |
5 | |
6 | use Wikimedia\Assert\Assert; |
7 | use Wikimedia\Parsoid\Config\Env; |
8 | use Wikimedia\Parsoid\DOM\Element; |
9 | use Wikimedia\Parsoid\NodeData\DataParsoid; |
10 | use Wikimedia\Parsoid\NodeData\ParamInfo; |
11 | use Wikimedia\Parsoid\NodeData\TemplateInfo; |
12 | use Wikimedia\Parsoid\Tokens\CommentTk; |
13 | use Wikimedia\Parsoid\Tokens\KV; |
14 | use Wikimedia\Parsoid\Tokens\NlTk; |
15 | use Wikimedia\Parsoid\Tokens\SelfclosingTagTk; |
16 | use Wikimedia\Parsoid\Tokens\SourceRange; |
17 | use Wikimedia\Parsoid\Tokens\Token; |
18 | use Wikimedia\Parsoid\Utils\ContentUtils; |
19 | use Wikimedia\Parsoid\Utils\DOMDataUtils; |
20 | use Wikimedia\Parsoid\Utils\DOMUtils; |
21 | use Wikimedia\Parsoid\Utils\PipelineUtils; |
22 | use Wikimedia\Parsoid\Utils\TokenUtils; |
23 | use Wikimedia\Parsoid\Wt2Html\Frame; |
24 | |
25 | /** |
26 | * A helper class for TemplateHandler that encapsulates template-like syntax |
27 | * with the appropriate meta tags, adding argument info data. |
28 | */ |
29 | class TemplateEncapsulator { |
30 | private Env $env; |
31 | private Frame $frame; |
32 | private string $wrapperType; |
33 | private string $aboutId; |
34 | public Token $token; |
35 | public ?string $variableName = null; |
36 | public ?string $parserFunctionName = null; |
37 | public ?string $resolvedTemplateTarget = null; |
38 | |
39 | public function __construct( Env $env, Frame $frame, Token $token, string $wrapperType ) { |
40 | $this->env = $env; |
41 | $this->frame = $frame; |
42 | $this->token = $token; |
43 | $this->wrapperType = $wrapperType; |
44 | $this->aboutId = $env->newAboutId(); |
45 | } |
46 | |
47 | /** |
48 | * Main entry point. |
49 | * Encapsulate the template element, including the arguments. |
50 | * |
51 | * @param array $tokens |
52 | * @return array |
53 | */ |
54 | public function encapTokens( array $tokens ): array { |
55 | $toks = $this->getEncapsulationInfo( $tokens ); |
56 | $toks[] = $this->getEncapsulationInfoEndTag(); |
57 | $tplInfo = $this->getTemplateInfo(); |
58 | |
59 | if ( $this->env->getSiteConfig()->addHTMLTemplateParameters() ) { |
60 | // Parse the parameters that need parsing |
61 | foreach ( $tplInfo->paramInfos as $paramInfo ) { |
62 | $paramTokens = null; |
63 | if ( $paramInfo->named ) { |
64 | $paramTokens = $this->token->getAttributeV( $paramInfo->k ); |
65 | } else { |
66 | $paramTokens = $this->token->attribs[$paramInfo->k]->v; |
67 | } |
68 | |
69 | // No need to pass through a whole sub-pipeline to get the |
70 | // html if the param is either a single string, or if it's |
71 | // just text, comments or newlines. |
72 | if ( $paramTokens && |
73 | ( is_string( $paramTokens ) || self::isSimpleParam( $paramTokens ) ) |
74 | ) { |
75 | $paramInfo->html = $paramInfo->valueWt; |
76 | } elseif ( |
77 | // FIXME: this should not have its own regex parsing separate from the PEG |
78 | preg_match( '#^https?://[^[\]{}\s]*$#D', $paramInfo->valueWt ) |
79 | ) { |
80 | // If the param is just a simple URL, we can process it to |
81 | // HTML directly without going through a sub-pipeline. |
82 | $paramInfo->html = "<a rel='mw:ExtLink' href='" . |
83 | str_replace( "'", ''', $paramInfo->valueWt ) . "'>" . |
84 | $paramInfo->valueWt . '</a>'; |
85 | } else { |
86 | $this->getParamHTML( $paramInfo ); |
87 | } |
88 | } |
89 | } else { |
90 | // Don't add the HTML template parameters, just use their wikitext |
91 | } |
92 | |
93 | $toks[0]->dataParsoid->getTemp()->tplarginfo = $tplInfo; |
94 | |
95 | $this->env->log( 'debug', 'TemplateEncapsulator.encapTokens', $toks ); |
96 | return $toks; |
97 | } |
98 | |
99 | /** |
100 | * Get the public data-mw structure that exposes the template name and |
101 | * parameters. |
102 | * |
103 | * @return TemplateInfo |
104 | */ |
105 | private function getTemplateInfo(): TemplateInfo { |
106 | $ret = new TemplateInfo; |
107 | $src = $this->frame->getSrcText(); |
108 | $params = $this->token->attribs; |
109 | $paramInfos = []; |
110 | $argIndex = 1; |
111 | |
112 | // Use source offsets to extract arg-name and arg-value wikitext |
113 | // since the 'k' and 'v' values in params will be expanded tokens |
114 | // |
115 | // Ignore params[0] -- that is the template name |
116 | for ( $i = 1, $n = count( $params ); $i < $n; $i++ ) { |
117 | $param = $params[$i]; |
118 | $srcOffsets = $param->srcOffsets; |
119 | $kSrc = null; |
120 | $vSrc = null; |
121 | if ( $srcOffsets !== null ) { |
122 | $kSrc = $srcOffsets->key->substr( $src ); |
123 | $vSrc = $srcOffsets->value->substr( $src ); |
124 | } else { |
125 | $kSrc = $param->k; |
126 | $vSrc = $param->v; |
127 | } |
128 | |
129 | $kWt = trim( $kSrc ); |
130 | $k = TokenUtils::tokensToString( $param->k, true, [ 'stripEmptyLineMeta' => true ] ); |
131 | if ( is_array( $k ) ) { |
132 | // The PHP parser only removes comments and whitespace to construct |
133 | // the real parameter name, so if there were other tokens, use the |
134 | // original text |
135 | $k = $kWt; |
136 | } else { |
137 | $k = trim( $k ); |
138 | } |
139 | $v = $vSrc; |
140 | |
141 | // Even if k is empty, we need to check v immediately follows. If not, |
142 | // it's a blank parameter name (which is valid) and we shouldn't make it |
143 | // positional. |
144 | if ( $k === '' && |
145 | $srcOffsets && |
146 | $srcOffsets->key->end === $srcOffsets->value->start |
147 | ) { |
148 | $isPositional = true; |
149 | $k = (string)$argIndex; |
150 | $argIndex++; |
151 | } else { |
152 | $isPositional = false; |
153 | // strip ws from named parameter values |
154 | $v = trim( $v ); |
155 | } |
156 | |
157 | if ( !isset( $paramInfos[$k] ) ) { |
158 | $paramInfo = new ParamInfo( $k, $srcOffsets ); |
159 | |
160 | Assert::invariant( |
161 | preg_match( '/^(\s*)(?:.*\S)?(\s*)$/sD', $kSrc, $keySpaceMatch ), |
162 | 'Template argument whitespace match failed.' |
163 | ); |
164 | $valueSpaceMatch = null; |
165 | |
166 | if ( $isPositional ) { |
167 | // PHP parser does not strip whitespace around |
168 | // positional params and neither will we. |
169 | $valueSpaceMatch = [ null, '', '' ]; |
170 | } else { |
171 | $paramInfo->named = true; |
172 | if ( $v !== '' ) { |
173 | Assert::invariant( |
174 | preg_match( '/^(\s*)(?:.*\S)?(\s*)$/sD', $vSrc, $valueSpaceMatch ), |
175 | 'Template argument whitespace match failed.' |
176 | ); |
177 | } else { |
178 | $valueSpaceMatch = [ null, '', $vSrc ]; |
179 | } |
180 | } |
181 | |
182 | // Preserve key and value space prefix / postfix, if any. |
183 | // "=" is the default spacing used by the serializer, |
184 | if ( $keySpaceMatch[1] || $keySpaceMatch[2] || $valueSpaceMatch[1] || $valueSpaceMatch[2] ) { |
185 | // Remember non-standard spacing |
186 | $paramInfo->spc = [ |
187 | $keySpaceMatch[1], $keySpaceMatch[2], |
188 | $valueSpaceMatch[1], $valueSpaceMatch[2] |
189 | ]; |
190 | } |
191 | } else { |
192 | $paramInfo = $paramInfos[$k]; |
193 | } |
194 | |
195 | $paramInfo->valueWt = $v; |
196 | // Only add the original parameter wikitext if named and different from |
197 | // the actual parameter. |
198 | if ( !$isPositional && $kWt !== $k ) { |
199 | $paramInfo->keyWt = $kWt; |
200 | } |
201 | $paramInfos[$k] = $paramInfo; |
202 | } |
203 | |
204 | $ret->paramInfos = $paramInfos; |
205 | |
206 | $tgtSrcOffsets = $params[0]->srcOffsets; |
207 | if ( $tgtSrcOffsets ) { |
208 | $tplTgtWT = $tgtSrcOffsets->key->substr( $src ); |
209 | $ret->targetWt = $tplTgtWT; |
210 | } |
211 | |
212 | // Add in tpl-target/pf-name info |
213 | // Only one of these will be set. |
214 | if ( $this->variableName !== null ) { |
215 | $ret->func = $this->variableName; |
216 | } elseif ( $this->parserFunctionName !== null ) { |
217 | $ret->func = $this->parserFunctionName; |
218 | } elseif ( $this->resolvedTemplateTarget !== null ) { |
219 | $ret->href = $this->resolvedTemplateTarget; |
220 | } |
221 | |
222 | return $ret; |
223 | } |
224 | |
225 | private function getEncapsulationInfo( ?array $chunk = null ): array { |
226 | // TODO |
227 | // * only add this information for top-level includes, but track parameter |
228 | // expansion in lower-level templates |
229 | // * ref all tables to this (just add about) |
230 | // * ref end token to this, add property="mw:Transclusion/End" |
231 | |
232 | $attrs = [ |
233 | new KV( 'typeof', $this->wrapperType ), |
234 | new KV( 'about', $this->aboutId ) |
235 | ]; |
236 | $dp = new DataParsoid; |
237 | $dp->tsr = clone $this->token->dataParsoid->tsr; |
238 | $dp->src = $this->token->dataParsoid->src; |
239 | |
240 | $meta = [ new SelfclosingTagTk( 'meta', $attrs, $dp ) ]; |
241 | $chunk = $chunk ? array_merge( $meta, $chunk ) : $meta; |
242 | return $chunk; |
243 | } |
244 | |
245 | private function getEncapsulationInfoEndTag(): Token { |
246 | $tsr = $this->token->dataParsoid->tsr ?? null; |
247 | $dp = new DataParsoid; |
248 | $dp->tsr = new SourceRange( null, $tsr ? $tsr->end : null ); |
249 | return new SelfclosingTagTk( 'meta', |
250 | [ |
251 | new KV( 'typeof', $this->wrapperType . '/End' ), |
252 | new KV( 'about', $this->aboutId ) |
253 | ], |
254 | $dp |
255 | ); |
256 | } |
257 | |
258 | /** |
259 | * Parameter processing helpers. |
260 | * |
261 | * @param mixed $tokens |
262 | * @return bool |
263 | */ |
264 | private static function isSimpleParam( $tokens ): bool { |
265 | if ( !is_array( $tokens ) ) { |
266 | $tokens = [ $tokens ]; |
267 | } |
268 | foreach ( $tokens as $t ) { |
269 | if ( !is_string( $t ) && !( $t instanceof CommentTk ) && !( $t instanceof NlTk ) ) { |
270 | return false; |
271 | } |
272 | } |
273 | return true; |
274 | } |
275 | |
276 | /** |
277 | * Add its HTML conversion to a parameter |
278 | * |
279 | * @param ParamInfo $paramInfo |
280 | */ |
281 | private function getParamHTML( ParamInfo $paramInfo ): void { |
282 | $srcStart = $paramInfo->srcOffsets->value->start; |
283 | $srcEnd = $paramInfo->srcOffsets->value->end; |
284 | if ( !empty( $paramInfo->spc ) ) { |
285 | $srcStart += strlen( $paramInfo->spc[2] ); |
286 | $srcEnd -= strlen( $paramInfo->spc[3] ); |
287 | } |
288 | |
289 | $domFragment = PipelineUtils::processContentInPipeline( |
290 | $this->env, $this->frame, |
291 | $paramInfo->valueWt, |
292 | [ |
293 | 'pipelineType' => 'fullparse-wikitext-to-dom', |
294 | 'pipelineOpts' => [ |
295 | 'expandTemplates' => true, |
296 | // No need to do paragraph-wrapping here |
297 | 'inlineContext' => true |
298 | ], |
299 | 'srcOffsets' => new SourceRange( $srcStart, $srcEnd ), |
300 | 'sol' => true |
301 | ] |
302 | ); |
303 | // FIXME: We're better off setting a pipeline option above |
304 | // to skip dsr computation to begin with. Worth revisiting |
305 | // if / when `addHTMLTemplateParameters` is enabled. |
306 | // Remove DSR from children |
307 | DOMUtils::visitDOM( $domFragment, static function ( $node ) { |
308 | if ( !( $node instanceof Element ) ) { |
309 | return; |
310 | } |
311 | $dp = DOMDataUtils::getDataParsoid( $node ); |
312 | $dp->dsr = null; |
313 | } ); |
314 | $paramInfo->html = ContentUtils::ppToXML( |
315 | $domFragment, [ 'innerXML' => true ] |
316 | ); |
317 | } |
318 | |
319 | } |