Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
33.19% |
150 / 452 |
|
20.00% |
4 / 20 |
CRAP | |
0.00% |
0 / 1 |
TemplateHandler | |
33.19% |
150 / 452 |
|
20.00% |
4 / 20 |
6070.85 | |
0.00% |
0 / 1 |
__construct | |
100.00% |
12 / 12 |
|
100.00% |
1 / 1 |
1 | |||
parserFunctionsWrapper | |
0.00% |
0 / 13 |
|
0.00% |
0 / 1 |
20 | |||
stripIncludeTokens | |
0.00% |
0 / 17 |
|
0.00% |
0 / 1 |
156 | |||
processToString | |
0.00% |
0 / 36 |
|
0.00% |
0 / 1 |
420 | |||
isSafeSubst | |
0.00% |
0 / 3 |
|
0.00% |
0 / 1 |
6 | |||
resolveTemplateTarget | |
40.70% |
35 / 86 |
|
0.00% |
0 / 1 |
122.94 | |||
flattenAndAppendToks | |
0.00% |
0 / 16 |
|
0.00% |
0 / 1 |
132 | |||
convertToString | |
0.00% |
0 / 18 |
|
0.00% |
0 / 1 |
6 | |||
enforceTemplateConstraints | |
100.00% |
10 / 10 |
|
100.00% |
1 / 1 |
2 | |||
expandTemplateNatively | |
0.00% |
0 / 40 |
|
0.00% |
0 / 1 |
90 | |||
processTemplateSource | |
73.53% |
25 / 34 |
|
0.00% |
0 / 1 |
3.17 | |||
encapTokens | |
100.00% |
4 / 4 |
|
100.00% |
1 / 1 |
1 | |||
processTemplateTokens | |
62.50% |
10 / 16 |
|
0.00% |
0 / 1 |
15.27 | |||
fetchTemplateAndTitle | |
0.00% |
0 / 10 |
|
0.00% |
0 / 1 |
20 | |||
hasTemplateToken | |
40.00% |
2 / 5 |
|
0.00% |
0 / 1 |
7.46 | |||
processSpecialMagicWord | |
0.00% |
0 / 41 |
|
0.00% |
0 / 1 |
56 | |||
expandTemplate | |
55.22% |
37 / 67 |
|
0.00% |
0 / 1 |
42.94 | |||
onTemplate | |
100.00% |
10 / 10 |
|
100.00% |
1 / 1 |
3 | |||
onTemplateArg | |
0.00% |
0 / 8 |
|
0.00% |
0 / 1 |
12 | |||
onTag | |
83.33% |
5 / 6 |
|
0.00% |
0 / 1 |
4.07 |
1 | <?php |
2 | declare( strict_types = 1 ); |
3 | |
4 | namespace Wikimedia\Parsoid\Wt2Html\TT; |
5 | |
6 | use Wikimedia\Assert\Assert; |
7 | use Wikimedia\Assert\UnreachableException; |
8 | use Wikimedia\Parsoid\Tokens\CommentTk; |
9 | use Wikimedia\Parsoid\Tokens\EndTagTk; |
10 | use Wikimedia\Parsoid\Tokens\KV; |
11 | use Wikimedia\Parsoid\Tokens\NlTk; |
12 | use Wikimedia\Parsoid\Tokens\SelfclosingTagTk; |
13 | use Wikimedia\Parsoid\Tokens\SourceRange; |
14 | use Wikimedia\Parsoid\Tokens\TagTk; |
15 | use Wikimedia\Parsoid\Tokens\Token; |
16 | use Wikimedia\Parsoid\Utils\DOMCompat; |
17 | use Wikimedia\Parsoid\Utils\DOMUtils; |
18 | use Wikimedia\Parsoid\Utils\PHPUtils; |
19 | use Wikimedia\Parsoid\Utils\PipelineUtils; |
20 | use Wikimedia\Parsoid\Utils\Title; |
21 | use Wikimedia\Parsoid\Utils\TitleException; |
22 | use Wikimedia\Parsoid\Utils\TokenUtils; |
23 | use Wikimedia\Parsoid\Utils\Utils; |
24 | use Wikimedia\Parsoid\Wikitext\Wikitext; |
25 | use Wikimedia\Parsoid\Wt2Html\Params; |
26 | use Wikimedia\Parsoid\Wt2Html\TokenTransformManager; |
27 | |
28 | /** |
29 | * Template and template argument handling. |
30 | */ |
31 | class TemplateHandler extends TokenHandler { |
32 | /** |
33 | * @var bool Should we wrap template tokens with template meta tags? |
34 | */ |
35 | private $wrapTemplates; |
36 | |
37 | /** |
38 | * @var AttributeExpander |
39 | * Local copy of the attribute expander to deal with template targets |
40 | * that are templated themselves |
41 | */ |
42 | private $ae; |
43 | |
44 | /** |
45 | * @var ParserFunctions |
46 | */ |
47 | private $parserFunctions; |
48 | |
49 | /** |
50 | * @var bool |
51 | */ |
52 | private $atMaxArticleSize; |
53 | |
54 | /** @var string|null */ |
55 | private $safeSubstRegex; |
56 | |
57 | /** |
58 | * @param TokenTransformManager $manager |
59 | * @param array $options |
60 | * - ?bool inTemplate Is this being invoked while processing a template? |
61 | * - ?bool expandTemplates Should we expand templates encountered here? |
62 | * - ?string extTag The name of the extension tag, if any, which is being expanded. |
63 | */ |
64 | public function __construct( TokenTransformManager $manager, array $options ) { |
65 | parent::__construct( $manager, $options ); |
66 | // Set this here so that it's available in the TokenStreamPatcher, |
67 | // which continues to inherit from TemplateHandler. |
68 | $this->parserFunctions = new ParserFunctions( $this->env ); |
69 | $this->ae = new AttributeExpander( $this->manager, [ |
70 | 'expandTemplates' => $this->options['expandTemplates'], |
71 | 'inTemplate' => $this->options['inTemplate'], |
72 | 'standalone' => true, |
73 | ] ); |
74 | $this->wrapTemplates = !$options['inTemplate']; |
75 | |
76 | // In the legacy parser, the call to replaceVariables from internalParse |
77 | // returns early if the text is already greater than the $wgMaxArticleSize |
78 | // We're going to compare and set a boolean here, then do the "early |
79 | // return" below. |
80 | $this->atMaxArticleSize = !$this->env->compareWt2HtmlLimit( |
81 | 'wikitextSize', |
82 | strlen( $this->env->topFrame->getSrcText() ) |
83 | ); |
84 | } |
85 | |
86 | /** |
87 | * Parser functions also need template wrapping. |
88 | * |
89 | * @param array $tokens |
90 | * @return array |
91 | */ |
92 | private function parserFunctionsWrapper( array $tokens ): array { |
93 | $chunkToks = []; |
94 | if ( $tokens ) { |
95 | // This is only for the Parsoid native expansion pipeline used in |
96 | // parser tests. The "" token sometimes changes foster parenting |
97 | // behavior and trips up some tests. |
98 | $tokens = array_values( array_filter( $tokens, static function ( $t ) { |
99 | return $t !== ''; |
100 | } ) ); |
101 | |
102 | // token chunk should be flattened |
103 | $flat = true; |
104 | foreach ( $tokens as $t ) { |
105 | if ( is_array( $t ) ) { |
106 | $flat = false; |
107 | break; |
108 | } |
109 | } |
110 | Assert::invariant( $flat, "Expected token chunk to be flattened" ); |
111 | |
112 | $chunkToks = $this->processTemplateTokens( $tokens ); |
113 | } |
114 | return $chunkToks; |
115 | } |
116 | |
117 | /** |
118 | * Strip include tags, and the contents of includeonly tags as well. |
119 | * @param (Token|string)[] $tokens |
120 | * @return (Token|string)[] |
121 | */ |
122 | private function stripIncludeTokens( array $tokens ): array { |
123 | $toks = []; |
124 | $includeOnly = false; |
125 | foreach ( $tokens as $tok ) { |
126 | if ( is_string( $tok ) ) { |
127 | if ( !$includeOnly ) { |
128 | $toks[] = $tok; |
129 | } |
130 | continue; |
131 | } |
132 | |
133 | switch ( get_class( $tok ) ) { |
134 | case TagTk::class: |
135 | case EndTagTk::class: |
136 | case SelfclosingTagTk::class: |
137 | $tokName = $tok->getName(); |
138 | if ( $tokName === 'noinclude' || $tokName === 'onlyinclude' ) { |
139 | break; |
140 | } elseif ( $tokName === 'includeonly' ) { |
141 | $includeOnly = $tok instanceof TagTk; |
142 | break; |
143 | } |
144 | // Fall through |
145 | default: |
146 | if ( !$includeOnly ) { |
147 | $toks[] = $tok; |
148 | } |
149 | } |
150 | } |
151 | return $toks; |
152 | } |
153 | |
154 | /** |
155 | * Take output of tokensToString and further postprocess it. |
156 | * - If it can be processed to a string which would be a valid template transclusion target, |
157 | * the return value will be [ $the_string_value, null ] |
158 | * - If not, the return value will be [ $partial_string, $unprocessed_token_array ] |
159 | * The caller can then decide if this would be a valid parser function call |
160 | * where the unprocessed token array would be part of the first arg to the parser function. |
161 | * Ex: With "{{uc:foo [[foo]] {{1x|foo}} bar}}", we return |
162 | * [ "uc:foo ", [ wikilink-token, " ", template-token, " bar" ] ] |
163 | * |
164 | * @param array $tokens |
165 | * @return array first element is always a string |
166 | */ |
167 | private function processToString( array $tokens ): array { |
168 | $maybeTarget = TokenUtils::tokensToString( $tokens, true, [ 'retainNLs' => true ] ); |
169 | if ( !is_array( $maybeTarget ) ) { |
170 | return [ $maybeTarget, null ]; |
171 | } |
172 | |
173 | $buf = $maybeTarget[0]; // Will always be a string |
174 | $tgtTokens = $maybeTarget[1]; |
175 | $preNlContent = null; |
176 | foreach ( $tgtTokens as $i => $ntt ) { |
177 | if ( is_string( $ntt ) ) { |
178 | $buf .= $ntt; |
179 | if ( $preNlContent !== null && !preg_match( '/^\s*$/D', $buf ) ) { |
180 | // intervening newline makes this an invalid template target |
181 | return [ $preNlContent, array_merge( [ $buf ], array_slice( $tgtTokens, $i ) ) ]; |
182 | } |
183 | continue; |
184 | } |
185 | |
186 | switch ( get_class( $ntt ) ) { |
187 | case SelfclosingTagTk::class: |
188 | // Quotes are valid template targets |
189 | if ( $ntt->getName() === 'mw-quote' ) { |
190 | $buf .= $ntt->getAttribute( 'value' ); |
191 | } elseif ( !TokenUtils::isEmptyLineMetaToken( $ntt ) && |
192 | $ntt->getName() !== 'template' && |
193 | $ntt->getName() !== 'templatearg' |
194 | ) { |
195 | // We are okay with empty (comment-only) lines, |
196 | // {{..}} and {{{..}}} in template targets. |
197 | if ( $preNlContent !== null ) { |
198 | return [ $preNlContent, array_merge( [ $buf ], array_slice( $tgtTokens, $i ) ) ]; |
199 | } else { |
200 | return [ $buf, array_slice( $tgtTokens, $i ) ]; |
201 | } |
202 | } |
203 | break; |
204 | |
205 | case TagTk::class: |
206 | case EndTagTk::class: |
207 | if ( $preNlContent !== null ) { |
208 | return [ $preNlContent, array_merge( [ $buf ], array_slice( $tgtTokens, $i ) ) ]; |
209 | } else { |
210 | return [ $buf, array_slice( $tgtTokens, $i ) ]; |
211 | } |
212 | |
213 | case CommentTk::class: |
214 | // Ignore comments as well |
215 | break; |
216 | |
217 | case NlTk::class: |
218 | // Ignore only the leading or trailing newlines |
219 | // (modulo whitespace and comments) |
220 | // |
221 | // If we only have whitespace in $buf thus far, |
222 | // the newline can be ignored. But, if we have |
223 | // non-ws content in $buf, everything that follows |
224 | // can only be ws. |
225 | if ( preg_match( '/^\s*$/D', $buf ) ) { |
226 | $buf .= "\n"; |
227 | break; |
228 | } elseif ( $preNlContent === null ) { |
229 | // Buffer accumulated content |
230 | $preNlContent = $buf; |
231 | $buf = "\n"; |
232 | break; |
233 | } else { |
234 | return [ $preNlContent, array_merge( [ $buf ], array_slice( $tgtTokens, $i ) ) ]; |
235 | } |
236 | |
237 | default: |
238 | throw new UnreachableException( 'Unexpected token type: ' . get_class( $ntt ) ); |
239 | } |
240 | } |
241 | |
242 | // All good! No newline / only whitespace/comments post newline. |
243 | return [ $preNlContent . $buf, null ]; |
244 | } |
245 | |
246 | /** |
247 | * Is the prefix "safesubst" |
248 | * @param string $prefix |
249 | * @return bool |
250 | */ |
251 | private function isSafeSubst( $prefix ): bool { |
252 | if ( $this->safeSubstRegex === null ) { |
253 | $this->safeSubstRegex = $this->env->getSiteConfig()->getMagicWordMatcher( 'safesubst' ); |
254 | } |
255 | return (bool)preg_match( $this->safeSubstRegex, $prefix . ':' ); |
256 | } |
257 | |
258 | /** |
259 | * @param TemplateEncapsulator $state |
260 | * @param string|Token|array $targetToks |
261 | * @param SourceRange $srcOffsets |
262 | * @return array|null |
263 | */ |
264 | private function resolveTemplateTarget( |
265 | TemplateEncapsulator $state, $targetToks, $srcOffsets |
266 | ): ?array { |
267 | $additionalToks = null; |
268 | if ( is_string( $targetToks ) ) { |
269 | $target = $targetToks; |
270 | } else { |
271 | $toks = !is_array( $targetToks ) ? [ $targetToks ] : $targetToks; |
272 | $toks = $this->processToString( $this->stripIncludeTokens( $toks ) ); |
273 | list( $target, $additionalToks ) = $toks; |
274 | } |
275 | |
276 | $target = trim( $target ); |
277 | $pieces = explode( ':', $target ); |
278 | $untrimmedPrefix = $pieces[0]; |
279 | $prefix = trim( $pieces[0] ); |
280 | |
281 | // Parser function names usually (not always) start with a hash |
282 | $hasHash = substr( $target, 0, 1 ) === '#'; |
283 | // String found after the colon will be the parser function arg |
284 | $haveColon = count( $pieces ) > 1; |
285 | |
286 | // safesubst found in content should be treated as if no modifier were |
287 | // present. See https://en.wikipedia.org/wiki/Help:Substitution#The_safesubst:_modifier |
288 | if ( $haveColon && $this->isSafeSubst( $prefix ) ) { |
289 | $target = substr( $target, strlen( $untrimmedPrefix ) + 1 ); |
290 | array_shift( $pieces ); |
291 | $untrimmedPrefix = $pieces[0]; |
292 | $prefix = trim( $pieces[0] ); |
293 | $haveColon = count( $pieces ) > 1; |
294 | } |
295 | |
296 | $env = $this->env; |
297 | $siteConfig = $env->getSiteConfig(); |
298 | |
299 | // Additional tokens are only justifiable in parser functions scenario |
300 | if ( !$haveColon && $additionalToks ) { |
301 | return null; |
302 | } |
303 | |
304 | $pfArg = ''; |
305 | if ( $haveColon ) { |
306 | $pfArg = substr( $target, strlen( $untrimmedPrefix ) + 1 ); |
307 | if ( $additionalToks ) { |
308 | $pfArg = [ $pfArg ]; |
309 | PHPUtils::pushArray( $pfArg, $additionalToks ); |
310 | } |
311 | } |
312 | |
313 | // Check if we have a magic-word variable. |
314 | $magicWordVar = $siteConfig->getMagicWordForVariable( $prefix ) ?? |
315 | $siteConfig->getMagicWordForVariable( mb_strtolower( $prefix ) ); |
316 | if ( $magicWordVar ) { |
317 | $state->variableName = $magicWordVar; |
318 | return [ |
319 | 'isVariable' => true, |
320 | 'magicWordType' => $magicWordVar === '!' ? '!' : null, |
321 | 'name' => $magicWordVar, |
322 | // FIXME: Some made up synthetic title |
323 | 'title' => $env->makeTitleFromURLDecodedStr( "Special:Variable/$magicWordVar" ), |
324 | 'pfArg' => $pfArg, |
325 | 'srcOffsets' => new SourceRange( |
326 | $srcOffsets->start + strlen( $untrimmedPrefix ) + 1, |
327 | $srcOffsets->end ), |
328 | ]; |
329 | } |
330 | |
331 | // FIXME: Checks for msgnw, msg, raw are missing at this point |
332 | |
333 | $canonicalFunctionName = null; |
334 | if ( $haveColon ) { |
335 | $canonicalFunctionName = $siteConfig->getMagicWordForFunctionHook( $prefix ); |
336 | } |
337 | if ( $canonicalFunctionName === null && $hasHash ) { |
338 | // If the target starts with a '#' it can't possibly be a template |
339 | // so this must be a "broken" parser function invocation |
340 | $canonicalFunctionName = substr( $prefix, 1 ); |
341 | // @todo: Flag this as an author error somehow (T314524) |
342 | } |
343 | if ( $canonicalFunctionName !== null ) { |
344 | $state->parserFunctionName = $canonicalFunctionName; |
345 | // XXX this is made up. |
346 | $syntheticTitle = $env->makeTitleFromURLDecodedStr( |
347 | "Special:ParserFunction/$canonicalFunctionName", |
348 | $env->getSiteConfig()->canonicalNamespaceId( 'Special' ), |
349 | true // No exceptions |
350 | ); |
351 | // Note that parserFunctionName/$canonicalFunctionName is not |
352 | // necessarily a valid title! Parsing rules are pretty generous |
353 | // w/r/t valid parser function names. |
354 | if ( $syntheticTitle === null ) { |
355 | $syntheticTitle = $env->makeTitleFromText( |
356 | 'Special:ParserFunction/unknown' |
357 | ); |
358 | } |
359 | return [ |
360 | 'name' => $canonicalFunctionName, |
361 | 'pfArg' => $pfArg, |
362 | 'srcOffsets' => new SourceRange( |
363 | $srcOffsets->start + strlen( $untrimmedPrefix ) + 1, |
364 | $srcOffsets->end ), |
365 | 'isPF' => true, |
366 | // FIXME: Some made up synthetic title |
367 | 'title' => $syntheticTitle, |
368 | 'magicWordType' => isset( Utils::magicMasqs()[$canonicalFunctionName] ) ? 'MASQ' : null, |
369 | 'targetToks' => !is_array( $targetToks ) ? [ $targetToks ] : $targetToks, |
370 | ]; |
371 | } |
372 | |
373 | // We've exhausted the parser-function scenarios, and we still have additional tokens. |
374 | if ( $additionalToks ) { |
375 | return null; |
376 | } |
377 | |
378 | // `resolveTitle()` adds the namespace prefix when it resolves fragments |
379 | // and relative titles, and a leading colon should resolve to a template |
380 | // from the main namespace, hence we omit a default when making a title |
381 | $namespaceId = strspn( $target, ':#/.' ) ? |
382 | null : $siteConfig->canonicalNamespaceId( 'template' ); |
383 | |
384 | // Resolve a possibly relative link and |
385 | // normalize the target before template processing. |
386 | $title = null; |
387 | try { |
388 | $title = $env->resolveTitle( $target ); |
389 | } catch ( TitleException $e ) { |
390 | // Invalid template target! |
391 | return null; |
392 | } |
393 | |
394 | // Entities in transclusions aren't decoded in the PHP parser |
395 | // So, treat the title as a url-decoded string! |
396 | $title = $env->makeTitleFromURLDecodedStr( $title, $namespaceId, true ); |
397 | if ( !$title ) { |
398 | // Invalid template target! |
399 | return null; |
400 | } |
401 | |
402 | // data-mw.target.href should be a url |
403 | $state->resolvedTemplateTarget = $env->makeLink( $title ); |
404 | |
405 | return [ |
406 | 'magicWordType' => null, |
407 | 'name' => $title->getPrefixedDBKey(), |
408 | 'title' => $title, |
409 | ]; |
410 | } |
411 | |
412 | /** |
413 | * Flatten |
414 | * @param (Token|string)[] $tokens |
415 | * @param ?string $prefix |
416 | * @param Token|string|(Token|string)[] $t |
417 | * @return array |
418 | */ |
419 | private function flattenAndAppendToks( |
420 | array $tokens, ?string $prefix, $t |
421 | ): array { |
422 | if ( is_array( $t ) ) { |
423 | $len = count( $t ); |
424 | if ( $len > 0 ) { |
425 | if ( $prefix !== null && $prefix !== '' ) { |
426 | $tokens[] = $prefix; |
427 | } |
428 | PHPUtils::pushArray( $tokens, $t ); |
429 | } |
430 | } elseif ( is_string( $t ) ) { |
431 | $len = strlen( $t ); |
432 | if ( $len > 0 ) { |
433 | if ( $prefix !== null && $prefix !== '' ) { |
434 | $tokens[] = $prefix; |
435 | } |
436 | $tokens[] = $t; |
437 | } |
438 | } else { |
439 | if ( $prefix !== null && $prefix !== '' ) { |
440 | $tokens[] = $prefix; |
441 | } |
442 | $tokens[] = $t; |
443 | } |
444 | |
445 | return $tokens; |
446 | } |
447 | |
448 | /** |
449 | * By default, don't attempt to expand any templates in the wikitext that will be reprocessed. |
450 | * |
451 | * @param Token $token |
452 | * @param bool $expandTemplates |
453 | * @return TemplateExpansionResult |
454 | */ |
455 | private function convertToString( Token $token, bool $expandTemplates = false ): TemplateExpansionResult { |
456 | $frame = $this->manager->getFrame(); |
457 | $tsr = $token->dataParsoid->tsr; |
458 | $src = substr( $token->dataParsoid->src, 1, -1 ); |
459 | $startOffset = $tsr->start + 1; |
460 | $srcOffsets = new SourceRange( $startOffset, $startOffset + strlen( $src ) ); |
461 | |
462 | $toks = PipelineUtils::processContentInPipeline( |
463 | $this->env, $frame, $src, [ |
464 | 'pipelineType' => 'text/x-mediawiki', |
465 | 'pipelineOpts' => [ |
466 | 'inTemplate' => $this->options['inTemplate'], |
467 | 'expandTemplates' => $expandTemplates && $this->options['expandTemplates'], |
468 | ], |
469 | 'sol' => false, |
470 | 'srcOffsets' => $srcOffsets, |
471 | ] |
472 | ); |
473 | TokenUtils::stripEOFTkfromTokens( $toks ); |
474 | return new TemplateExpansionResult( array_merge( [ '{' ], $toks, [ '}' ] ), true ); |
475 | } |
476 | |
477 | /** |
478 | * Enforce template loops / loop depth limit constraints and emit |
479 | * error message if constraints are violated. |
480 | * |
481 | * @param mixed $target |
482 | * @param Title $title |
483 | * @param bool $ignoreLoop |
484 | * @return ?array |
485 | */ |
486 | private function enforceTemplateConstraints( $target, Title $title, bool $ignoreLoop ): ?array { |
487 | $error = $this->manager->getFrame()->loopAndDepthCheck( |
488 | $title, $this->env->getSiteConfig()->getMaxTemplateDepth(), |
489 | $ignoreLoop |
490 | ); |
491 | |
492 | return $error ? [ // Loop detected or depth limit exceeded, abort! |
493 | new TagTk( 'span', [ new KV( 'class', 'error' ) ] ), |
494 | $error, |
495 | new SelfclosingTagTk( 'wikilink', [ new KV( 'href', $target, null, '', '' ) ] ), |
496 | new EndTagTk( 'span' ), |
497 | ] : null; |
498 | } |
499 | |
500 | /** |
501 | * Fetch, tokenize and token-transform a template after all arguments and |
502 | * the target were expanded. |
503 | * |
504 | * @param TemplateEncapsulator $state |
505 | * @param array $resolvedTgt |
506 | * @param array $attribs |
507 | * @return TemplateExpansionResult |
508 | */ |
509 | private function expandTemplateNatively( |
510 | TemplateEncapsulator $state, array $resolvedTgt, array $attribs |
511 | ): TemplateExpansionResult { |
512 | $env = $this->env; |
513 | $encap = $this->options['expandTemplates'] && $this->wrapTemplates; |
514 | |
515 | // XXX: wrap attribs in object with .dict() and .named() methods, |
516 | // and each member (key/value) into object with .tokens(), .dom() and |
517 | // .wikitext() methods (subclass of Array) |
518 | |
519 | $target = $resolvedTgt['name']; |
520 | if ( isset( $resolvedTgt['isPF'] ) || isset( $resolvedTgt['isVariable'] ) ) { |
521 | // FIXME: HARDCODED to core parser function implementations! |
522 | // These should go through function hook registrations in the |
523 | // ParserTests mock setup ideally. But, it is complicated because the |
524 | // Parsoid core parser function versions have "token" versions |
525 | // which are incompatible with implementation in FunctionHookHandler |
526 | // and FunctionArgs. So, we continue down this hacky path for now. |
527 | if ( $target === '=' ) { |
528 | $target = 'equal'; // '=' is not a valid character in function names |
529 | } |
530 | $target = 'pf_' . $target; |
531 | // FIXME: Parsoid may not have implemented the parser function natively |
532 | // Emit an error message, but encapsulate it so it roundtrips back. |
533 | if ( !is_callable( [ $this->parserFunctions, $target ] ) ) { |
534 | // FIXME: Consolidate error response format with enforceTemplateConstraints |
535 | $err = 'Parser function implementation for ' . $target . ' missing in Parsoid.'; |
536 | return new TemplateExpansionResult( [ $err ], false, $encap ); |
537 | } |
538 | |
539 | $pfAttribs = new Params( $attribs ); |
540 | $pfAttribs->args[0] = new KV( |
541 | // FIXME: This is bogus, but preserves borked b/c |
542 | TokenUtils::tokensToString( $resolvedTgt['pfArg'] ), [], |
543 | $resolvedTgt['srcOffsets']->expandTsrK() |
544 | ); |
545 | $env->log( 'debug', 'entering prefix', $target, $state->token ); |
546 | $res = call_user_func( [ $this->parserFunctions, $target ], |
547 | $state->token, $this->manager->getFrame(), $pfAttribs ); |
548 | if ( $this->wrapTemplates ) { |
549 | $res = $this->parserFunctionsWrapper( $res ); |
550 | } |
551 | return new TemplateExpansionResult( $res, false, $encap ); |
552 | } |
553 | |
554 | // Loop detection needs to be enabled since we're doing our own template expansion |
555 | $error = $this->enforceTemplateConstraints( $target, $resolvedTgt['title'], false ); |
556 | if ( $error ) { |
557 | // FIXME: Should we be encapsulating here? |
558 | // Inconsistent with the other place constrainsts are enforced. |
559 | return new TemplateExpansionResult( $error, false, $encap ); |
560 | } |
561 | |
562 | // XXX: notes from brion's mediawiki.parser.environment |
563 | // resolve template name |
564 | // load template w/ canonical name |
565 | // load template w/ variant names (language variants) |
566 | |
567 | // Fetch template source and expand it |
568 | $src = $this->fetchTemplateAndTitle( $target, $attribs ); |
569 | if ( $src !== null ) { |
570 | $toks = $this->processTemplateSource( |
571 | $state->token, |
572 | [ |
573 | 'name' => $target, |
574 | 'title' => $resolvedTgt['title'], |
575 | 'attribs' => array_slice( $attribs, 1 ), // strip template target |
576 | ], |
577 | $src |
578 | ); |
579 | return new TemplateExpansionResult( $toks, true, $encap ); |
580 | } else { |
581 | // Convert to a wikilink (which will become a redlink after the redlinks pass). |
582 | $toks = [ new SelfclosingTagTk( 'wikilink' ) ]; |
583 | $hrefSrc = $resolvedTgt['name']; |
584 | $toks[0]->attribs[] = new KV( 'href', $hrefSrc, null, null, $hrefSrc ); |
585 | return new TemplateExpansionResult( $toks, false, $encap ); |
586 | } |
587 | } |
588 | |
589 | /** |
590 | * Process a fetched template source to a token stream. |
591 | * |
592 | * @param Token $token |
593 | * @param array $tplArgs |
594 | * @param string $src |
595 | * @return array |
596 | */ |
597 | private function processTemplateSource( Token $token, array $tplArgs, string $src ): array { |
598 | $env = $this->env; |
599 | $frame = $this->manager->getFrame(); |
600 | if ( $env->hasDumpFlag( 'tplsrc' ) ) { |
601 | $dump = str_repeat( '=', 28 ) . " template source " . |
602 | str_repeat( '=', 28 ) . "\n"; |
603 | $dump .= 'TEMPLATE:' . $tplArgs['name'] . 'TRANSCLUSION:' . |
604 | PHPUtils::jsonEncode( $token->dataParsoid->src ) . "\n"; |
605 | $dump .= str_repeat( '-', 80 ) . "\n"; |
606 | $dump .= $src . "\n"; |
607 | $dump .= str_repeat( '-', 80 ) . "\n"; |
608 | $env->writeDump( $dump ); |
609 | } |
610 | |
611 | if ( $src === '' ) { |
612 | return []; |
613 | } |
614 | |
615 | $env->log( 'debug', 'TemplateHandler.processTemplateSource', |
616 | $tplArgs['name'], $tplArgs['attribs'] ); |
617 | |
618 | // Get a nested transformation pipeline for the wikitext that takes |
619 | // us through stages 1-2, with the appropriate pipeline options set. |
620 | // |
621 | // Simply returning the tokenized source here (which may be correct |
622 | // when using the legacy preprocessor because we don't expect to |
623 | // tokenize any templates or include directives so skipping those |
624 | // handlers should be ok) won't work since the options for the pipeline |
625 | // we're in probably aren't what we want. |
626 | $toks = PipelineUtils::processContentInPipeline( |
627 | $env, |
628 | $frame, |
629 | $src, |
630 | [ |
631 | 'pipelineType' => 'text/x-mediawiki', |
632 | 'pipelineOpts' => [ |
633 | 'inTemplate' => true, |
634 | 'isInclude' => true, |
635 | // FIXME: In reality, this is broken for parser tests where |
636 | // we expand templates natively. We do want all nested templates |
637 | // to be expanded. But, setting this to !usePHPPreProcessor seems |
638 | // to break a number of tests. Not pursuing this line of enquiry |
639 | // for now since this parserTests vs production distinction will |
640 | // disappear with parser integration. We'll just bear the stench |
641 | // till that time. |
642 | // |
643 | // NOTE: No expansion required for nested templates. |
644 | 'expandTemplates' => false, |
645 | 'extTag' => $this->options['extTag'] ?? null |
646 | ], |
647 | 'srcText' => $src, |
648 | 'srcOffsets' => new SourceRange( 0, strlen( $src ) ), |
649 | 'tplArgs' => $tplArgs, |
650 | // HEADS UP: You might be wondering why we are forcing "sol" => true without |
651 | // using information about whether the transclusion is used in a SOL context. |
652 | // |
653 | // Ex: "foo {{1x|*bar}}" Here, "*bar" is not in SOL context relative to the |
654 | // top-level page and so, should it be actually be parsed as a list item? |
655 | // |
656 | // So, there is a use-case where one could argue that the sol value here |
657 | // should be conditioned on the page-level context where "{{1x|*bar}}" showed |
658 | // up. So, in this example "foo {{1x|*bar}}, sol would be false and in this |
659 | // example "foo\n{{1x|*bar}}", sol would be true. That is effectively how |
660 | // the legacy parser behaves. (Ignore T2529 for the moment.) |
661 | // |
662 | // But, Parsoid is a different beast. Since the Parsoid/JS days, templates |
663 | // have been processed asynchronously. So, {{1x|*bar}} would be expanded and |
664 | // tokenized before even its preceding context might have been processed. |
665 | // From the start, Parsoid has aimed to decouple the processing of fragment |
666 | // generators (be it templates, extensions, or something else) from the |
667 | // processing of the page they are embedded in. This has been the |
668 | // starting point of many a wikitext 2.0 proposal on mediawiki.org; |
669 | // see also [[mw:Parsing/Notes/Wikitext_2.0#Implications_of_this_model]]. |
670 | // |
671 | // The main performance implication is that you can process a transclusion |
672 | // concurrently *and* cache the output of {{1x|*bar}} since its output is |
673 | // the same no matter where on the page it appears. Without this decoupled |
674 | // model, if you got "{{mystery-template-that-takes-30-secs}}{{1x|*bar}}" |
675 | // you have to wait 30 secs before you get to expand {{1x|*bar}} |
676 | // because you have to wait and see whether the mystery template will |
677 | // leave you in SOL state or non-SOL state. |
678 | // |
679 | // In a stroke of good luck, wikitext editors seem to have agreed |
680 | // that it is better for all templates to be expanded in a |
681 | // consistent SOL state and not be dependent on their context; |
682 | // turn now to phab task T2529 which (via a fragile hack) tried |
683 | // to ensure that every template which started with |
684 | // start-of-line-sensitive markup was evaluated in a |
685 | // start-of-line context (by hackily inserting a newline). Not |
686 | // everyone was satisfied with this hack (see T14974), but it's |
687 | // been the way things work for over a decade now (as evidenced |
688 | // by T14974 never having been "fixed"). |
689 | // |
690 | // So, while we've established we would prefer *not* to use page |
691 | // context to set the initial SOL value for tokenizing the |
692 | // template, what *should* the initial SOL value be? |
693 | // |
694 | // * Treat every transclusion as a fresh document starting in SOL |
695 | // state, ie set "sol" => true always. This is supported by |
696 | // most current wiki use, and is the intent behind the original |
697 | // T2529 hack (although that hack left a number of edge cases, |
698 | // described below). |
699 | // |
700 | // * Use `"sol" => false` for templates -- this was the solution |
701 | // rejected by the original T2529 as being contrary to editor |
702 | // expectations. |
703 | // |
704 | // * In the future, one might allow the template itself to |
705 | // specify that its initial SOL state should be, using a |
706 | // mechanism similar to what might be necessary for typed |
707 | // templates. This could also address T14974. This is not |
708 | // excluded by Parsoid at this point; but it would probably be |
709 | // signaled by a template "return type" which is *not* DOM |
710 | // therefore the template wouldn't get parsed "as wikitext" |
711 | // (ie, T14974 wants an "attribute-value" return type which is |
712 | // a plain string, and some of the wikitext 2.0 proposals |
713 | // anticipate a "attribute name/value" dictionary as a possible |
714 | // return type). |
715 | // |
716 | // In support of using sol=>true as the default initial state, |
717 | // let's examine the sol-sensitive wikitext constructs, and |
718 | // implicitly the corner cases left open by the T2529 hack. (For |
719 | // non-sol-sensitive constructs, the initial SOL state is |
720 | // irrelevant.) |
721 | // |
722 | // - SOL-sensitive contructs include lists, headings, indent-pre, |
723 | // and table syntax. |
724 | // - Of these, only lists, headings, and table syntax are actually handled in |
725 | // the PEG tokenizer and are impacted by SOL state. |
726 | // - Indent-Pre has its own handler that operates in a full page token context |
727 | // and isn't impacted. |
728 | // - T2529 effectively means for *#:; (lists) and {| (table start), newlines |
729 | // are added which means no matter what value we set here, they will get |
730 | // processed in sol state. |
731 | // - This leaves us with headings (=), table heading (!), table row (|), and |
732 | // table close (|}) syntax that would be impacted by what we set here. |
733 | // - Given that table row/heading/close templates are very very common on wikis |
734 | // and used for constructing complex tables, sol => true will let us handle |
735 | // those without hacks. We aren't fully off the hook there -- see the code |
736 | // in TokenStreamPatcher, AttributeExpander, TableFixups that all exist to |
737 | // to work around the fact that decoupled processing isn't the wikitext |
738 | // default. But, without sol => true, we'll likely be in deeper trouble. |
739 | // - But, this can cause some occasional bad parses where "=|!" aren't meant |
740 | // to be processed as a sol-wikitext construct. |
741 | // - Note also that the workaround for T14974 (ie, the T2529 hack applying |
742 | // where sol=false is actually desired) has traditionally been to add an |
743 | // initial <nowiki/> which ensures that the "T2529 characters" are not |
744 | // initial. There are a number of alternative mechanisms to accomplish |
745 | // this (ie, HTML-encode the first character). |
746 | // |
747 | // To honor the spirit of T2529 it seems plausible to try to lint |
748 | // away the remaining corner cases where T2529 does *not* result |
749 | // in start-of-line state for template expansion, and to use the |
750 | // various workarounds for compatibility in the meantime. |
751 | // |
752 | // We should also pick *one* of the workarounds for T14974 |
753 | // (probably `<nowiki/>` at the first position in the template), |
754 | // support that (until a better mechanism exists), and (if |
755 | // possible) lint away any others. |
756 | 'sol' => true |
757 | ] |
758 | ); |
759 | |
760 | return $this->processTemplateTokens( $toks ); |
761 | } |
762 | |
763 | /** |
764 | * Process the main template element, including the arguments. |
765 | * |
766 | * @param TemplateEncapsulator $state |
767 | * @param array $tokens |
768 | * @return array |
769 | */ |
770 | private function encapTokens( TemplateEncapsulator $state, array $tokens ): array { |
771 | // Template encapsulation normally wouldn't happen in nested context, |
772 | // since they should have already been expanded, and indeed we set |
773 | // expandTemplates === false in processTemplateSource. However, |
774 | // extension tags from templates can have content that requires wikitext |
775 | // parsing and, due to precedence, contain unexpanded templates. |
776 | // |
777 | // For example, {{1x|hi<ref>{{1x|ho}}</ref>}} |
778 | // |
779 | // Since extensions can require template expansion unconditionally, we can |
780 | // end up here inTemplate, in which case the substrings of env.page.src |
781 | // used in getArgInfo are no longer accurate, and so tplarginfo should be |
782 | // omitted. Presumably, template wrapping in the dom post processor won't |
783 | // be happening anyways, so this is unnecessary work as it is. |
784 | Assert::invariant( |
785 | $this->wrapTemplates, 'Encapsulating tokens when not wrapping!' |
786 | ); |
787 | return $state->encapTokens( $tokens ); |
788 | } |
789 | |
790 | /** |
791 | * Handle chunk emitted from the input pipeline after feeding it a template. |
792 | * |
793 | * @param array $chunk |
794 | * @return array |
795 | */ |
796 | private function processTemplateTokens( array $chunk ): array { |
797 | TokenUtils::stripEOFTkfromTokens( $chunk ); |
798 | |
799 | foreach ( $chunk as $i => $t ) { |
800 | if ( $t && isset( $t->dataParsoid->tsr ) ) { |
801 | unset( $t->dataParsoid->tsr ); |
802 | } |
803 | if ( $t instanceof SelfclosingTagTk && |
804 | strtolower( $t->getName() ) === 'meta' && |
805 | TokenUtils::hasTypeOf( $t, 'mw:Placeholder' ) |
806 | ) { |
807 | // replace with empty string to avoid metas being foster-parented out |
808 | $chunk[$i] = ''; |
809 | } |
810 | } |
811 | |
812 | // FIXME: What is this stuff here? Why do we care about stripping out comments |
813 | // so much that we create a new token array for every expanded template? |
814 | // Unlikely to help perf very much. |
815 | if ( !$this->options['expandTemplates'] ) { |
816 | // Ignore comments in template transclusion mode |
817 | $newChunk = []; |
818 | for ( $i = 0, $n = count( $chunk ); $i < $n; $i++ ) { |
819 | if ( !( $chunk[$i] instanceof CommentTk ) ) { |
820 | $newChunk[] = $chunk[$i]; |
821 | } |
822 | } |
823 | $chunk = $newChunk; |
824 | } |
825 | |
826 | $this->env->log( 'debug', 'TemplateHandler.processTemplateTokens', $chunk ); |
827 | return $chunk; |
828 | } |
829 | |
830 | /** |
831 | * Fetch a template. |
832 | * |
833 | * @param string $templateName |
834 | * @param array $attribs |
835 | * @return ?string |
836 | */ |
837 | private function fetchTemplateAndTitle( string $templateName, array $attribs ): ?string { |
838 | $env = $this->env; |
839 | if ( isset( $env->pageCache[$templateName] ) ) { |
840 | return $env->pageCache[$templateName]; |
841 | } |
842 | |
843 | $start = microtime( true ); |
844 | $pageContent = $env->getDataAccess()->fetchTemplateSource( $env->getPageConfig(), $templateName ); |
845 | if ( $env->profiling() ) { |
846 | $profile = $env->getCurrentProfile(); |
847 | $profile->bumpMWTime( "TemplateFetch", 1000 * ( microtime( true ) - $start ), "api" ); |
848 | $profile->bumpCount( "TemplateFetch" ); |
849 | } |
850 | |
851 | // FIXME: |
852 | // 1. Hard-coded 'main' role |
853 | return $pageContent ? $pageContent->getContent( 'main' ) : null; |
854 | } |
855 | |
856 | /** |
857 | * @param mixed $tokens |
858 | * @return bool |
859 | */ |
860 | private static function hasTemplateToken( $tokens ): bool { |
861 | if ( is_array( $tokens ) ) { |
862 | foreach ( $tokens as $t ) { |
863 | if ( TokenUtils::isTemplateToken( $t ) ) { |
864 | return true; |
865 | } |
866 | } |
867 | } |
868 | return false; |
869 | } |
870 | |
871 | /** |
872 | * Process the special magic word as specified by $resolvedTgt['magicWordType']. |
873 | * ``` |
874 | * magicWordType === '!' => {{!}} is the magic word |
875 | * magicWordtype === 'MASQ' => DEFAULTSORT, DISPLAYTITLE are the magic words |
876 | * (See Util::magicMasqs()) |
877 | * ``` |
878 | * @param bool $atTopLevel |
879 | * @param TemplateEncapsulator $state |
880 | * @param array $resolvedTgt |
881 | * @return TemplateExpansionResult |
882 | */ |
883 | public function processSpecialMagicWord( |
884 | bool $atTopLevel, TemplateEncapsulator $state, array $resolvedTgt |
885 | ): TemplateExpansionResult { |
886 | $env = $this->env; |
887 | $tplToken = $state->token; |
888 | |
889 | // Special case for {{!}} magic word. |
890 | // |
891 | // If we tokenized as a magic word, we meant for it to expand to a |
892 | // string. The tokenizer has handling for this syntax in table |
893 | // positions. However, proceeding to go through template expansion |
894 | // will reparse it as a table cell token. Hence this special case |
895 | // handling to avoid that path. |
896 | if ( $resolvedTgt['magicWordType'] === '!' || $tplToken->attribs[0]->k === '!' ) { |
897 | // If we're not at the top level, return a table cell. This will always |
898 | // be the case. Either {{!}} was tokenized as a td, or it was tokenized |
899 | // as template but the recursive call to fetch its content returns a |
900 | // single | in an ambiguous context which will again be tokenized as td. |
901 | // In any case, this should only be relevant for parserTests. |
902 | if ( empty( $atTopLevel ) ) { |
903 | $toks = [ new TagTk( 'td' ) ]; |
904 | } else { |
905 | $toks = [ '|' ]; |
906 | } |
907 | return new TemplateExpansionResult( $toks, false, (bool)$this->wrapTemplates ); |
908 | } |
909 | |
910 | Assert::invariant( |
911 | $resolvedTgt['magicWordType'] === 'MASQ', |
912 | 'Unexpected magicWordType type: ' . $resolvedTgt['magicWordType'] |
913 | ); |
914 | |
915 | $magicWord = mb_strtolower( $resolvedTgt['name'] ); |
916 | $pageProp = 'mw:PageProp/'; |
917 | if ( $magicWord === 'defaultsort' ) { |
918 | $pageProp .= 'category'; |
919 | } |
920 | $pageProp .= $magicWord; |
921 | |
922 | $metaToken = new SelfclosingTagTk( 'meta', |
923 | [ new KV( 'property', $pageProp ) ], |
924 | $tplToken->dataParsoid->clone() |
925 | ); |
926 | |
927 | if ( isset( $tplToken->dataParsoid->tmp->templatedAttribs ) ) { |
928 | // See [[mw:Specs/HTML#Generated_attributes_of_HTML_tags]] |
929 | // |
930 | // For every attribute that has a templated name and/or value, |
931 | // AttributeExpander creates a 2-item array for that attribute. |
932 | // [ {txt: '..', html: '..'}, { html: '..'} ] |
933 | // 'txt' is the plain-text name/value |
934 | // 'html' is the HTML-version of the name/value |
935 | // |
936 | // Massage the templated magic-word info into a similar format. |
937 | // In this case, the attribute name is 'content' (implicit) and |
938 | // since it is implicit, the name itself cannot be attribute. |
939 | // Hence 'html' property is empty. |
940 | // |
941 | // The attribute value has been templated and is encoded there. |
942 | // |
943 | // NOTE: If any part of the 'MAGIC_WORD:value' string is templated, |
944 | // we consider the magic word as having expanded attributes, rather |
945 | // than only when the 'value' part of it. This is because of the |
946 | // limitation of our token representation for templates. This is |
947 | // an edge case that it is not worth a refactoring right now to |
948 | // handle this properly and choose mw:Transclusion or mw:ExpandedAttrs |
949 | // depending on which part is templated. |
950 | // |
951 | // FIXME: Is there a simpler / better repn. for templated attrs? |
952 | $ta = $tplToken->dataParsoid->tmp->templatedAttribs; |
953 | $html = $ta[0][0]['html']; |
954 | $ta[0] = [ |
955 | [ 'txt' => 'content' ], // Magic-word attribute name |
956 | // FIXME: the content still contains the parser function prefix |
957 | // (eg, the html is 'DISPLAYTITLE:Foo' even though the stripped |
958 | // content attribute is 'Foo') |
959 | [ 'html' => $html ], // HTML repn. of the attribute value |
960 | ]; |
961 | $metaToken->addAttribute( 'data-mw', PHPUtils::jsonEncode( [ 'attribs' => $ta ] ) ); |
962 | |
963 | // Use the textContent of the expanded attribute, similar to how |
964 | // Sanitizer::sanitizeTagAttr does it. However, here we have the |
965 | // opportunity to strip the parser function prefix. |
966 | $dom = DOMUtils::parseHTML( $html ); |
967 | $content = DOMCompat::getBody( $dom )->textContent; |
968 | $content = preg_replace( '#^\w+:#', '', $content, 1 ); |
969 | $metaToken->addAttribute( 'content', $content, $resolvedTgt['srcOffsets']->expandTsrV() ); |
970 | |
971 | $metaToken->addAttribute( 'about', $env->newAboutId() ); |
972 | $metaToken->addSpaceSeparatedAttribute( 'typeof', 'mw:ExpandedAttrs' ); |
973 | } else { |
974 | // Leading/trailing WS should be stripped |
975 | // |
976 | // This is bogus, but preserves existing functionality |
977 | // Clearly we don't have an adequate representation for existing uses |
978 | // of the DISPLAYTITLE: magic word. |
979 | // phpcs:ignore Generic.Files.LineLength.TooLong |
980 | // Ex: {{DISPLAYTITLE:User:<span style="text-transform: lowercase;">MC</span><span style="font-size: 80%;">10</span>/Welcome}} |
981 | $key = trim( TokenUtils::tokensToString( $resolvedTgt['pfArg'] ) ); |
982 | |
983 | $src = $tplToken->dataParsoid->src ?? ''; |
984 | if ( $src ) { |
985 | // If the token has original wikitext, shadow the sort-key |
986 | $origKey = PHPUtils::stripSuffix( preg_replace( '/[^:]+:?/', '', $src, 1 ), '}}' ); |
987 | $metaToken->addNormalizedAttribute( 'content', $key, $origKey ); |
988 | } else { |
989 | // If not, this token came from an extension/template |
990 | // in which case, dont bother with shadowing since the token |
991 | // will never be edited directly. |
992 | $metaToken->addAttribute( 'content', $key ); |
993 | } |
994 | } |
995 | |
996 | return new TemplateExpansionResult( [ $metaToken ] ); |
997 | } |
998 | |
999 | /** |
1000 | * @param TemplateEncapsulator $state |
1001 | * @return TemplateExpansionResult |
1002 | */ |
1003 | private function expandTemplate( TemplateEncapsulator $state ): TemplateExpansionResult { |
1004 | $env = $this->env; |
1005 | $token = $state->token; |
1006 | $expandTemplates = $this->options['expandTemplates']; |
1007 | |
1008 | // Since AttributeExpander runs later in the pipeline than TemplateHandler, |
1009 | // if the template name is templated, use our copy of AttributeExpander |
1010 | // to process all attributes to tokens, and force reprocessing of this |
1011 | // template token since we will then know the actual template target. |
1012 | if ( $expandTemplates && self::hasTemplateToken( $token->attribs[0]->k ) ) { |
1013 | $ret = $this->ae->processComplexAttributes( $token ); |
1014 | $toks = $ret->tokens ?? null; |
1015 | Assert::invariant( $toks && count( $toks ) === 1 && $toks[0] === $token, |
1016 | "Expected only the input token as the return value." ); |
1017 | } |
1018 | |
1019 | if ( $this->atMaxArticleSize ) { |
1020 | // As described above, if we were already greater than $wgMaxArticleSize |
1021 | // we're going to return the tokens without expanding them. |
1022 | // (This case is where the original article as fetched from the DB |
1023 | // or passed to the API exceeded max article size.) |
1024 | return $this->convertToString( $token ); |
1025 | } |
1026 | |
1027 | // There's no point in proceeding if we've already hit the maximum inclusion size |
1028 | // XXX should this be combined with the previous test? |
1029 | if ( !$env->bumpWt2HtmlResourceUse( 'wikitextSize', 0 ) ) { |
1030 | // FIXME: The legacy parser would try to make this a link and |
1031 | // elsewhere we'd return the $e->getMessage() |
1032 | // (This case is where the template post-expansion accumulation is |
1033 | // over the maximum wikitext size.) |
1034 | // XXX: It could be combined with the previous test, but we might |
1035 | // want to use different error messages in the future. |
1036 | return $this->convertToString( $token ); |
1037 | } |
1038 | |
1039 | $toks = null; |
1040 | $text = $token->dataParsoid->src ?? ''; |
1041 | |
1042 | $tgt = $this->resolveTemplateTarget( |
1043 | $state, $token->attribs[0]->k, $token->attribs[0]->srcOffsets->key |
1044 | ); |
1045 | |
1046 | if ( $expandTemplates && $tgt === null ) { |
1047 | // Target contains tags, convert template braces and pipes back into text |
1048 | // Re-join attribute tokens with '=' and '|' |
1049 | return $this->convertToString( $token, true ); |
1050 | } |
1051 | |
1052 | if ( isset( $tgt['magicWordType'] ) ) { |
1053 | return $this->processSpecialMagicWord( $this->atTopLevel, $state, $tgt ); |
1054 | } |
1055 | |
1056 | $frame = $this->manager->getFrame(); |
1057 | |
1058 | if ( $env->nativeTemplateExpansionEnabled() ) { |
1059 | // Expand argument keys |
1060 | $atm = new AttributeTransformManager( $frame, |
1061 | [ 'expandTemplates' => false, 'inTemplate' => true ] |
1062 | ); |
1063 | $newAttribs = $atm->process( $token->attribs ); |
1064 | $target = $newAttribs[0]->k; |
1065 | if ( !$target ) { |
1066 | $env->log( 'debug', 'No template target! ', $newAttribs ); |
1067 | } |
1068 | // Resolve the template target again now that the template token's |
1069 | // attributes have been expanded by the AttributeTransformManager |
1070 | $resolvedTgt = $this->resolveTemplateTarget( $state, $target, $newAttribs[0]->srcOffsets->key ); |
1071 | if ( $resolvedTgt === null ) { |
1072 | // Target contains tags, convert template braces and pipes back into text |
1073 | // Re-join attribute tokens with '=' and '|' |
1074 | return $this->convertToString( $token, true ); |
1075 | } else { |
1076 | return $this->expandTemplateNatively( $state, $resolvedTgt, $newAttribs ); |
1077 | } |
1078 | } elseif ( $expandTemplates ) { |
1079 | // Use MediaWiki's preprocessor |
1080 | // |
1081 | // The tokenizer needs to use `text` as the cache key for caching |
1082 | // expanded tokens from the expanded transclusion text that we get |
1083 | // from the preprocessor, since parameter substitution will already |
1084 | // have taken place. |
1085 | // |
1086 | // It's sufficient to pass `[]` in place of attribs since they |
1087 | // won't be used. In `usePHPPreProcessor`, there is no parameter |
1088 | // substitution coming from the frame. |
1089 | |
1090 | /* If $tgt is not null, target will be present. */ |
1091 | $templateName = $tgt['name']; |
1092 | $templateTitle = $tgt['title']; |
1093 | // FIXME: This is a source of a lot of issues since templateargs |
1094 | // get looked up from the Frame and yield these tokens which then enter |
1095 | // the token stream. See T301948 and others from wmf.22 |
1096 | // $attribs = array_slice( $token->attribs, 1 ); // Strip template name |
1097 | $attribs = []; |
1098 | |
1099 | // We still need to check for limit violations because of the |
1100 | // higher precedence of extension tags, which can result in nested |
1101 | // templates even while using the php preprocessor for expansion. |
1102 | $error = $this->enforceTemplateConstraints( $templateName, $templateTitle, true ); |
1103 | if ( $error ) { |
1104 | // FIXME: Should we be encapsulating here? |
1105 | // Inconsistent with the other place constrainsts are enforced. |
1106 | return new TemplateExpansionResult( $error ); |
1107 | } |
1108 | |
1109 | // Check if we have an expansion for this template in the cache already |
1110 | $cachedTransclusion = $env->transclusionCache[$text] ?? null; |
1111 | if ( $cachedTransclusion ) { |
1112 | // cache hit: reuse the expansion DOM |
1113 | // FIXME(SSS): How does this work again for |
1114 | // templates like {{start table}} and {[end table}}?? |
1115 | return new TemplateExpansionResult( |
1116 | PipelineUtils::encapsulateExpansionHTML( |
1117 | $env, $token, $cachedTransclusion, [ 'fromCache' => true ] |
1118 | ) |
1119 | ); |
1120 | } else { |
1121 | // Fetch and process the template expansion |
1122 | $expansion = Wikitext::preprocess( $env, $text ); |
1123 | if ( $expansion['error'] ) { |
1124 | return new TemplateExpansionResult( |
1125 | [ $expansion['src'] ], false, $this->wrapTemplates |
1126 | ); |
1127 | } else { |
1128 | $tplToks = $this->processTemplateSource( |
1129 | $token, |
1130 | [ |
1131 | 'name' => $templateName, |
1132 | 'title' => $templateTitle, |
1133 | 'attribs' => $attribs |
1134 | ], |
1135 | $expansion['src'] |
1136 | ); |
1137 | return new TemplateExpansionResult( |
1138 | $tplToks, true, $this->wrapTemplates |
1139 | ); |
1140 | } |
1141 | } |
1142 | } else { |
1143 | // We don't perform recursive template expansion- something |
1144 | // template-like that the PHP parser did not expand. This is |
1145 | // encapsulated already, so just return the plain text. |
1146 | Assert::invariant( TokenUtils::isTemplateToken( $token ), "Expected template token." ); |
1147 | return $this->convertToString( $token ); |
1148 | } |
1149 | } |
1150 | |
1151 | /** |
1152 | * Main template token handler. |
1153 | * |
1154 | * Expands target and arguments (both keys and values) and either directly |
1155 | * calls or sets up the callback to expandTemplate, which then fetches and |
1156 | * processes the template. |
1157 | * |
1158 | * @param Token $token |
1159 | * @return TokenHandlerResult |
1160 | */ |
1161 | private function onTemplate( Token $token ): TokenHandlerResult { |
1162 | $state = new TemplateEncapsulator( |
1163 | $this->env, $this->manager->getFrame(), $token, 'mw:Transclusion' |
1164 | ); |
1165 | $res = $this->expandTemplate( $state ); |
1166 | $toks = $res->tokens; |
1167 | if ( $res->encap ) { |
1168 | $toks = $this->encapTokens( $state, $toks ); |
1169 | } |
1170 | if ( $res->shuttle ) { |
1171 | // Shuttle tokens to the end of the stage since they've gone through the |
1172 | // rest of the handlers in the current pipeline in the pipeline above. |
1173 | $toks = $this->manager->shuttleTokensToEndOfStage( $toks ); |
1174 | } |
1175 | return new TokenHandlerResult( $toks ); |
1176 | } |
1177 | |
1178 | /** |
1179 | * Expand template arguments with tokens from the containing frame. |
1180 | * @param Token $token |
1181 | * @return TokenHandlerResult |
1182 | */ |
1183 | private function onTemplateArg( Token $token ): TokenHandlerResult { |
1184 | $toks = $this->manager->getFrame()->expandTemplateArg( $token ); |
1185 | |
1186 | if ( $this->wrapTemplates && $this->options['expandTemplates'] ) { |
1187 | // This is a bare use of template arg syntax at the top level |
1188 | // outside any template use context. Wrap this use with RDF attrs. |
1189 | // so that this chunk can be RT-ed en-masse. |
1190 | $state = new TemplateEncapsulator( |
1191 | $this->env, $this->manager->getFrame(), $token, 'mw:Param' |
1192 | ); |
1193 | $toks = $this->encapTokens( $state, $toks ); |
1194 | } |
1195 | |
1196 | // Shuttle tokens to the end of the stage since they've gone through the |
1197 | // rest of the handlers in the current pipeline in the pipeline above. |
1198 | $toks = $this->manager->shuttleTokensToEndOfStage( $toks ); |
1199 | |
1200 | return new TokenHandlerResult( $toks ); |
1201 | } |
1202 | |
1203 | /** |
1204 | * @param Token $token |
1205 | * @return TokenHandlerResult|null |
1206 | */ |
1207 | public function onTag( Token $token ): ?TokenHandlerResult { |
1208 | switch ( $token->getName() ) { |
1209 | case "template": |
1210 | return $this->onTemplate( $token ); |
1211 | case "templatearg": |
1212 | return $this->onTemplateArg( $token ); |
1213 | default: |
1214 | return null; |
1215 | } |
1216 | } |
1217 | } |