Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
0.00% |
0 / 145 |
|
0.00% |
0 / 7 |
CRAP | |
0.00% |
0 / 1 |
TemplateEncapsulator | |
0.00% |
0 / 145 |
|
0.00% |
0 / 7 |
1806 | |
0.00% |
0 / 1 |
__construct | |
0.00% |
0 / 5 |
|
0.00% |
0 / 1 |
2 | |||
encapTokens | |
0.00% |
0 / 20 |
|
0.00% |
0 / 1 |
72 | |||
getTemplateInfo | |
0.00% |
0 / 66 |
|
0.00% |
0 / 1 |
420 | |||
getEncapsulationInfo | |
0.00% |
0 / 10 |
|
0.00% |
0 / 1 |
6 | |||
getEncapsulationInfoEndTag | |
0.00% |
0 / 10 |
|
0.00% |
0 / 1 |
6 | |||
isSimpleParam | |
0.00% |
0 / 6 |
|
0.00% |
0 / 1 |
42 | |||
getParamHTML | |
0.00% |
0 / 28 |
|
0.00% |
0 / 1 |
12 |
1 | <?php |
2 | declare( strict_types = 1 ); |
3 | |
4 | namespace Wikimedia\Parsoid\Wt2Html\TT; |
5 | |
6 | use Wikimedia\Assert\Assert; |
7 | use Wikimedia\Parsoid\Config\Env; |
8 | use Wikimedia\Parsoid\DOM\Element; |
9 | use Wikimedia\Parsoid\NodeData\DataParsoid; |
10 | use Wikimedia\Parsoid\NodeData\ParamInfo; |
11 | use Wikimedia\Parsoid\NodeData\TemplateInfo; |
12 | use Wikimedia\Parsoid\Tokens\CommentTk; |
13 | use Wikimedia\Parsoid\Tokens\KV; |
14 | use Wikimedia\Parsoid\Tokens\NlTk; |
15 | use Wikimedia\Parsoid\Tokens\SelfclosingTagTk; |
16 | use Wikimedia\Parsoid\Tokens\SourceRange; |
17 | use Wikimedia\Parsoid\Tokens\Token; |
18 | use Wikimedia\Parsoid\Utils\ContentUtils; |
19 | use Wikimedia\Parsoid\Utils\DOMDataUtils; |
20 | use Wikimedia\Parsoid\Utils\DOMUtils; |
21 | use Wikimedia\Parsoid\Utils\PipelineUtils; |
22 | use Wikimedia\Parsoid\Utils\TokenUtils; |
23 | use Wikimedia\Parsoid\Wt2Html\Frame; |
24 | |
25 | /** |
26 | * A helper class for TemplateHandler that encapsulates template-like syntax |
27 | * with the appropriate meta tags, adding argument info data. |
28 | */ |
29 | class TemplateEncapsulator { |
30 | /** @var Env */ |
31 | private $env; |
32 | /** @var Frame */ |
33 | private $frame; |
34 | /** @var string */ |
35 | private $wrapperType; |
36 | /** @var string */ |
37 | private $wrappedObjectId; |
38 | /** @var Token */ |
39 | public $token; |
40 | /** @var string|null */ |
41 | public $variableName; |
42 | /** @var string|null */ |
43 | public $parserFunctionName; |
44 | /** @var string|null */ |
45 | public $resolvedTemplateTarget; |
46 | |
47 | public function __construct( |
48 | Env $env, Frame $frame, Token $token, string $wrapperType |
49 | ) { |
50 | $this->env = $env; |
51 | $this->frame = $frame; |
52 | $this->token = $token; |
53 | $this->wrapperType = $wrapperType; |
54 | $this->wrappedObjectId = $env->newObjectId(); |
55 | } |
56 | |
57 | /** |
58 | * Main entry point. |
59 | * Encapsulate the template element, including the arguments. |
60 | * |
61 | * @param array $tokens |
62 | * @return array |
63 | */ |
64 | public function encapTokens( array $tokens ): array { |
65 | $toks = $this->getEncapsulationInfo( $tokens ); |
66 | $toks[] = $this->getEncapsulationInfoEndTag(); |
67 | $tplInfo = $this->getTemplateInfo(); |
68 | |
69 | if ( $this->env->getSiteConfig()->addHTMLTemplateParameters() ) { |
70 | // Parse the parameters that need parsing |
71 | foreach ( $tplInfo->paramInfos as $paramInfo ) { |
72 | $paramTokens = null; |
73 | if ( $paramInfo->named ) { |
74 | $paramTokens = $this->token->getAttributeV( $paramInfo->k ); |
75 | } else { |
76 | $paramTokens = $this->token->attribs[$paramInfo->k]->v; |
77 | } |
78 | |
79 | // No need to pass through a whole sub-pipeline to get the |
80 | // html if the param is either a single string, or if it's |
81 | // just text, comments or newlines. |
82 | if ( $paramTokens && |
83 | ( is_string( $paramTokens ) || self::isSimpleParam( $paramTokens ) ) |
84 | ) { |
85 | $paramInfo->html = $paramInfo->valueWt; |
86 | } elseif ( |
87 | // FIXME: this should not have its own regex parsing separate from the PEG |
88 | preg_match( '#^https?://[^[\]{}\s]*$#D', $paramInfo->valueWt ) |
89 | ) { |
90 | // If the param is just a simple URL, we can process it to |
91 | // HTML directly without going through a sub-pipeline. |
92 | $paramInfo->html = "<a rel='mw:ExtLink' href='" . |
93 | str_replace( "'", ''', $paramInfo->valueWt ) . "'>" . |
94 | $paramInfo->valueWt . '</a>'; |
95 | } else { |
96 | $this->getParamHTML( $paramInfo ); |
97 | } |
98 | } |
99 | } else { |
100 | // Don't add the HTML template parameters, just use their wikitext |
101 | } |
102 | |
103 | $toks[0]->dataParsoid->getTemp()->tplarginfo = $tplInfo; |
104 | |
105 | $this->env->log( 'debug', 'TemplateEncapsulator.encapTokens', $toks ); |
106 | return $toks; |
107 | } |
108 | |
109 | /** |
110 | * Get the public data-mw structure that exposes the template name and |
111 | * parameters. |
112 | * |
113 | * @return TemplateInfo |
114 | */ |
115 | private function getTemplateInfo(): TemplateInfo { |
116 | $ret = new TemplateInfo; |
117 | $src = $this->frame->getSrcText(); |
118 | $params = $this->token->attribs; |
119 | $paramInfos = []; |
120 | $argIndex = 1; |
121 | |
122 | // Use source offsets to extract arg-name and arg-value wikitext |
123 | // since the 'k' and 'v' values in params will be expanded tokens |
124 | // |
125 | // Ignore params[0] -- that is the template name |
126 | for ( $i = 1, $n = count( $params ); $i < $n; $i++ ) { |
127 | $param = $params[$i]; |
128 | $srcOffsets = $param->srcOffsets; |
129 | $kSrc = null; |
130 | $vSrc = null; |
131 | if ( $srcOffsets !== null ) { |
132 | $kSrc = $srcOffsets->key->substr( $src ); |
133 | $vSrc = $srcOffsets->value->substr( $src ); |
134 | } else { |
135 | $kSrc = $param->k; |
136 | $vSrc = $param->v; |
137 | } |
138 | |
139 | $kWt = trim( $kSrc ); |
140 | $k = TokenUtils::tokensToString( $param->k, true, [ 'stripEmptyLineMeta' => true ] ); |
141 | if ( is_array( $k ) ) { |
142 | // The PHP parser only removes comments and whitespace to construct |
143 | // the real parameter name, so if there were other tokens, use the |
144 | // original text |
145 | $k = $kWt; |
146 | } else { |
147 | $k = trim( $k ); |
148 | } |
149 | $v = $vSrc; |
150 | |
151 | // Even if k is empty, we need to check v immediately follows. If not, |
152 | // it's a blank parameter name (which is valid) and we shouldn't make it |
153 | // positional. |
154 | if ( $k === '' && |
155 | $srcOffsets && |
156 | $srcOffsets->key->end === $srcOffsets->value->start |
157 | ) { |
158 | $isPositional = true; |
159 | $k = (string)$argIndex; |
160 | $argIndex++; |
161 | } else { |
162 | $isPositional = false; |
163 | // strip ws from named parameter values |
164 | $v = trim( $v ); |
165 | } |
166 | |
167 | if ( !isset( $paramInfos[$k] ) ) { |
168 | $paramInfo = new ParamInfo( $k, $srcOffsets ); |
169 | |
170 | Assert::invariant( |
171 | preg_match( '/^(\s*)(?:.*\S)?(\s*)$/sD', $kSrc, $keySpaceMatch ), |
172 | 'Template argument whitespace match failed.' |
173 | ); |
174 | $valueSpaceMatch = null; |
175 | |
176 | if ( $isPositional ) { |
177 | // PHP parser does not strip whitespace around |
178 | // positional params and neither will we. |
179 | $valueSpaceMatch = [ null, '', '' ]; |
180 | } else { |
181 | $paramInfo->named = true; |
182 | if ( $v !== '' ) { |
183 | Assert::invariant( |
184 | preg_match( '/^(\s*)(?:.*\S)?(\s*)$/sD', $vSrc, $valueSpaceMatch ), |
185 | 'Template argument whitespace match failed.' |
186 | ); |
187 | } else { |
188 | $valueSpaceMatch = [ null, '', $vSrc ]; |
189 | } |
190 | } |
191 | |
192 | // Preserve key and value space prefix / postfix, if any. |
193 | // "=" is the default spacing used by the serializer, |
194 | if ( $keySpaceMatch[1] || $keySpaceMatch[2] || $valueSpaceMatch[1] || $valueSpaceMatch[2] ) { |
195 | // Remember non-standard spacing |
196 | $paramInfo->spc = [ |
197 | $keySpaceMatch[1], $keySpaceMatch[2], |
198 | $valueSpaceMatch[1], $valueSpaceMatch[2] |
199 | ]; |
200 | } |
201 | } else { |
202 | $paramInfo = $paramInfos[$k]; |
203 | } |
204 | |
205 | $paramInfo->valueWt = $v; |
206 | // Only add the original parameter wikitext if named and different from |
207 | // the actual parameter. |
208 | if ( !$isPositional && $kWt !== $k ) { |
209 | $paramInfo->keyWt = $kWt; |
210 | } |
211 | $paramInfos[$k] = $paramInfo; |
212 | } |
213 | |
214 | $ret->paramInfos = $paramInfos; |
215 | |
216 | $tgtSrcOffsets = $params[0]->srcOffsets; |
217 | if ( $tgtSrcOffsets ) { |
218 | $tplTgtWT = $tgtSrcOffsets->key->substr( $src ); |
219 | $ret->targetWt = $tplTgtWT; |
220 | } |
221 | |
222 | // Add in tpl-target/pf-name info |
223 | // Only one of these will be set. |
224 | if ( $this->variableName !== null ) { |
225 | $ret->func = $this->variableName; |
226 | } elseif ( $this->parserFunctionName !== null ) { |
227 | $ret->func = $this->parserFunctionName; |
228 | } elseif ( $this->resolvedTemplateTarget !== null ) { |
229 | $ret->href = $this->resolvedTemplateTarget; |
230 | } |
231 | |
232 | return $ret; |
233 | } |
234 | |
235 | private function getEncapsulationInfo( ?array $chunk = null ): array { |
236 | // TODO |
237 | // * only add this information for top-level includes, but track parameter |
238 | // expansion in lower-level templates |
239 | // * ref all tables to this (just add about) |
240 | // * ref end token to this, add property="mw:Transclusion/End" |
241 | |
242 | $attrs = [ |
243 | new KV( 'typeof', $this->wrapperType ), |
244 | new KV( 'about', '#' . $this->wrappedObjectId ) |
245 | ]; |
246 | $dp = new DataParsoid; |
247 | $dp->tsr = clone $this->token->dataParsoid->tsr; |
248 | $dp->src = $this->token->dataParsoid->src; |
249 | |
250 | $meta = [ new SelfclosingTagTk( 'meta', $attrs, $dp ) ]; |
251 | $chunk = $chunk ? array_merge( $meta, $chunk ) : $meta; |
252 | return $chunk; |
253 | } |
254 | |
255 | private function getEncapsulationInfoEndTag(): Token { |
256 | $tsr = $this->token->dataParsoid->tsr ?? null; |
257 | $dp = new DataParsoid; |
258 | $dp->tsr = new SourceRange( null, $tsr ? $tsr->end : null ); |
259 | return new SelfclosingTagTk( 'meta', |
260 | [ |
261 | new KV( 'typeof', $this->wrapperType . '/End' ), |
262 | new KV( 'about', '#' . $this->wrappedObjectId ) |
263 | ], |
264 | $dp |
265 | ); |
266 | } |
267 | |
268 | /** |
269 | * Parameter processing helpers. |
270 | * |
271 | * @param mixed $tokens |
272 | * @return bool |
273 | */ |
274 | private static function isSimpleParam( $tokens ): bool { |
275 | if ( !is_array( $tokens ) ) { |
276 | $tokens = [ $tokens ]; |
277 | } |
278 | foreach ( $tokens as $t ) { |
279 | if ( !is_string( $t ) && !( $t instanceof CommentTk ) && !( $t instanceof NlTk ) ) { |
280 | return false; |
281 | } |
282 | } |
283 | return true; |
284 | } |
285 | |
286 | /** |
287 | * Add its HTML conversion to a parameter |
288 | * |
289 | * @param ParamInfo $paramInfo |
290 | */ |
291 | private function getParamHTML( ParamInfo $paramInfo ): void { |
292 | $srcStart = $paramInfo->srcOffsets->value->start; |
293 | $srcEnd = $paramInfo->srcOffsets->value->end; |
294 | if ( !empty( $paramInfo->spc ) ) { |
295 | $srcStart += strlen( $paramInfo->spc[2] ); |
296 | $srcEnd -= strlen( $paramInfo->spc[3] ); |
297 | } |
298 | |
299 | $domFragment = PipelineUtils::processContentInPipeline( |
300 | $this->env, $this->frame, |
301 | $paramInfo->valueWt, |
302 | [ |
303 | 'pipelineType' => 'text/x-mediawiki/full', |
304 | 'pipelineOpts' => [ |
305 | 'isInclude' => false, |
306 | 'expandTemplates' => true, |
307 | // No need to do paragraph-wrapping here |
308 | 'inlineContext' => true |
309 | ], |
310 | 'srcOffsets' => new SourceRange( $srcStart, $srcEnd ), |
311 | 'sol' => true |
312 | ] |
313 | ); |
314 | // FIXME: We're better off setting a pipeline option above |
315 | // to skip dsr computation to begin with. Worth revisiting |
316 | // if / when `addHTMLTemplateParameters` is enabled. |
317 | // Remove DSR from children |
318 | DOMUtils::visitDOM( $domFragment, static function ( $node ) { |
319 | if ( !( $node instanceof Element ) ) { |
320 | return; |
321 | } |
322 | $dp = DOMDataUtils::getDataParsoid( $node ); |
323 | $dp->dsr = null; |
324 | } ); |
325 | $paramInfo->html = ContentUtils::ppToXML( |
326 | $domFragment, [ 'innerXML' => true ] |
327 | ); |
328 | } |
329 | |
330 | } |