Code Coverage for /src/src/Wt2Html/TT/TemplateHandler.php

	Code Coverage
	Lines			Functions and Methods				Classes and Traits
Total	35.04% covered (danger)	35.04%	157 / 448	20.00% covered (danger)	20.00%	4 / 20	CRAP	0.00% covered (danger)	0.00%	0 / 1
TemplateHandler	35.04% covered (danger)	35.04%	157 / 448	20.00% covered (danger)	20.00%	4 / 20	5747.24	0.00% covered (danger)	0.00%	0 / 1
__construct	100.00% covered (success)	100.00%	12 / 12	100.00% covered (success)	100.00%	1 / 1	1
parserFunctionsWrapper	0.00% covered (danger)	0.00%	0 / 13	0.00% covered (danger)	0.00%	0 / 1	20
stripIncludeTokens	0.00% covered (danger)	0.00%	0 / 17	0.00% covered (danger)	0.00%	0 / 1	156
processToString	0.00% covered (danger)	0.00%	0 / 47	0.00% covered (danger)	0.00%	0 / 1	552
isSafeSubst	0.00% covered (danger)	0.00%	0 / 3	0.00% covered (danger)	0.00%	0 / 1	6
resolveTemplateTarget	41.18% covered (danger)	41.18%	35 / 85	0.00% covered (danger)	0.00%	0 / 1	101.42
flattenAndAppendToks	0.00% covered (danger)	0.00%	0 / 16	0.00% covered (danger)	0.00%	0 / 1	132
convertToString	0.00% covered (danger)	0.00%	0 / 18	0.00% covered (danger)	0.00%	0 / 1	6
enforceTemplateConstraints	100.00% covered (success)	100.00%	10 / 10	100.00% covered (success)	100.00%	1 / 1	2
expandTemplateNatively	0.00% covered (danger)	0.00%	0 / 40	0.00% covered (danger)	0.00%	0 / 1	90
processTemplateSource	73.53% covered (warning)	73.53%	25 / 34	0.00% covered (danger)	0.00%	0 / 1	3.17
encapTokens	100.00% covered (success)	100.00%	4 / 4	100.00% covered (success)	100.00%	1 / 1	1
processTemplateTokens	65.00% covered (warning)	65.00%	13 / 20	0.00% covered (danger)	0.00%	0 / 1	14.29
fetchTemplateAndTitle	0.00% covered (danger)	0.00%	0 / 13	0.00% covered (danger)	0.00%	0 / 1	20
hasTemplateToken	40.00% covered (danger)	40.00%	2 / 5	0.00% covered (danger)	0.00%	0 / 1	7.46
processSpecialMagicWord	0.00% covered (danger)	0.00%	0 / 10	0.00% covered (danger)	0.00%	0 / 1	12
expandTemplate	53.25% covered (warning)	53.25%	41 / 77	0.00% covered (danger)	0.00%	0 / 1	71.46
onTemplate	100.00% covered (success)	100.00%	10 / 10	100.00% covered (success)	100.00%	1 / 1	3
onTemplateArg	0.00% covered (danger)	0.00%	0 / 8	0.00% covered (danger)	0.00%	0 / 1	12
onTag	83.33% covered (warning)	83.33%	5 / 6	0.00% covered (danger)	0.00%	0 / 1	4.07

1	<?php
2	declare( strict_types = 1 );
3
4	namespace Wikimedia\Parsoid\Wt2Html\TT;
5
6	use Wikimedia\Assert\Assert;
7	use Wikimedia\Assert\UnreachableException;
8	use Wikimedia\Parsoid\Tokens\CommentTk;
9	use Wikimedia\Parsoid\Tokens\EndTagTk;
10	use Wikimedia\Parsoid\Tokens\KV;
11	use Wikimedia\Parsoid\Tokens\NlTk;
12	use Wikimedia\Parsoid\Tokens\SelfclosingTagTk;
13	use Wikimedia\Parsoid\Tokens\SourceRange;
14	use Wikimedia\Parsoid\Tokens\TagTk;
15	use Wikimedia\Parsoid\Tokens\Token;
16	use Wikimedia\Parsoid\Utils\PHPUtils;
17	use Wikimedia\Parsoid\Utils\PipelineUtils;
18	use Wikimedia\Parsoid\Utils\Title;
19	use Wikimedia\Parsoid\Utils\TitleException;
20	use Wikimedia\Parsoid\Utils\TokenUtils;
21	use Wikimedia\Parsoid\Utils\WTUtils;
22	use Wikimedia\Parsoid\Wikitext\Wikitext;
23	use Wikimedia\Parsoid\Wt2Html\Params;
24	use Wikimedia\Parsoid\Wt2Html\TokenTransformManager;
25
26	/**
27	* Template and template argument handling.
28	*/
29	class TemplateHandler extends TokenHandler {
30	/**
31	* @var bool Should we wrap template tokens with template meta tags?
32	*/
33	private $wrapTemplates;
34
35	/**
36	* @var AttributeExpander
37	* Local copy of the attribute expander to deal with template targets
38	* that are templated themselves
39	*/
40	private $ae;
41
42	/**
43	* @var ParserFunctions
44	*/
45	private $parserFunctions;
46
47	/**
48	* @var bool
49	*/
50	private $atMaxArticleSize;
51
52	/** @var string\|null */
53	private $safeSubstRegex;
54
55	/**
56	* @param TokenTransformManager $manager
57	* @param array $options
58	* - ?bool inTemplate Is this being invoked while processing a template?
59	* - ?bool expandTemplates Should we expand templates encountered here?
60	* - ?string extTag The name of the extension tag, if any, which is being expanded.
61	*/
62	public function __construct( TokenTransformManager $manager, array $options ) {
63	parent::__construct( $manager, $options );
64	$this->parserFunctions = new ParserFunctions( $this->env );
65	$this->ae = new AttributeExpander( $this->manager, [
66	'expandTemplates' => $this->options['expandTemplates'],
67	'inTemplate' => $this->options['inTemplate'],
68	'standalone' => true,
69	] );
70	$this->wrapTemplates = !$options['inTemplate'];
71
72	// In the legacy parser, the call to replaceVariables from internalParse
73	// returns early if the text is already greater than the $wgMaxArticleSize
74	// We're going to compare and set a boolean here, then do the "early
75	// return" below.
76	$this->atMaxArticleSize = !$this->env->compareWt2HtmlLimit(
77	'wikitextSize',
78	strlen( $this->env->topFrame->getSrcText() )
79	);
80	}
81
82	/**
83	* Parser functions also need template wrapping.
84	*
85	* @param array $tokens
86	* @return array
87	*/
88	private function parserFunctionsWrapper( array $tokens ): array {
89	$chunkToks = [];
90	if ( $tokens ) {
91	// This is only for the Parsoid native expansion pipeline used in
92	// parser tests. The "" token sometimes changes foster parenting
93	// behavior and trips up some tests.
94	$tokens = array_values( array_filter( $tokens, static function ( $t ) {
95	return $t !== '';
96	} ) );
97
98	// token chunk should be flattened
99	$flat = true;
100	foreach ( $tokens as $t ) {
101	if ( is_array( $t ) ) {
102	$flat = false;
103	break;
104	}
105	}
106	Assert::invariant( $flat, "Expected token chunk to be flattened" );
107
108	$chunkToks = $this->processTemplateTokens( $tokens );
109	}
110	return $chunkToks;
111	}
112
113	/**
114	* Strip include tags, and the contents of includeonly tags as well.
115	* @param (Token\|string)[] $tokens
116	* @return (Token\|string)[]
117	*/
118	private function stripIncludeTokens( array $tokens ): array {
119	$toks = [];
120	$includeOnly = false;
121	foreach ( $tokens as $tok ) {
122	if ( is_string( $tok ) ) {
123	if ( !$includeOnly ) {
124	$toks[] = $tok;
125	}
126	continue;
127	}
128
129	switch ( get_class( $tok ) ) {
130	case TagTk::class:
131	case EndTagTk::class:
132	case SelfclosingTagTk::class:
133	$tokName = $tok->getName();
134	if ( $tokName === 'noinclude' \|\| $tokName === 'onlyinclude' ) {
135	break;
136	} elseif ( $tokName === 'includeonly' ) {
137	$includeOnly = $tok instanceof TagTk;
138	break;
139	}
140	// Fall through
141	default:
142	if ( !$includeOnly ) {
143	$toks[] = $tok;
144	}
145	}
146	}
147	return $toks;
148	}
149
150	/**
151	* Take output of tokensToString and further postprocess it.
152	* - If it can be processed to a string which would be a valid template transclusion target,
153	* the return value will be [ $the_string_value, null ]
154	* - If not, the return value will be [ $partial_string, $unprocessed_token_array ]
155	* The caller can then decide if this would be a valid parser function call
156	* where the unprocessed token array would be part of the first arg to the parser function.
157	* Ex: With "{{uc:foo [[foo]] {{1x\|foo}} bar}}", we return
158	* [ "uc:foo ", [ wikilink-token, " ", template-token, " bar" ] ]
159	*
160	* @param array $tokens
161	* @return array first element is always a string
162	*/
163	private function processToString( array $tokens ): array {
164	$maybeTarget = TokenUtils::tokensToString( $tokens, true, [ 'retainNLs' => true ] );
165	if ( !is_array( $maybeTarget ) ) {
166	return [ $maybeTarget, null ];
167	}
168
169	$buf = $maybeTarget[0]; // Will always be a string
170	$tgtTokens = $maybeTarget[1];
171	$preNlContent = null;
172	$i = 0;
173	$n = count( $tgtTokens );
174	while ( $i < $n ) {
175	$ntt = $tgtTokens[$i];
176	if ( is_string( $ntt ) ) {
177	$buf .= $ntt;
178	if ( $preNlContent !== null && !preg_match( '/^\s*$/D', $buf ) ) {
179	// intervening newline makes this an invalid template target
180	return [ $preNlContent, array_merge( [ $buf ], array_slice( $tgtTokens, $i ) ) ];
181	}
182	} else {
183	switch ( get_class( $ntt ) ) {
184	case SelfclosingTagTk::class:
185	// Quotes are valid template targets
186	if ( $ntt->getName() === 'mw-quote' ) {
187	$buf .= $ntt->getAttributeV( 'value' );
188	} elseif (
189	!TokenUtils::isEmptyLineMetaToken( $ntt ) &&
190	$ntt->getName() !== 'template' &&
191	$ntt->getName() !== 'templatearg' &&
192	// Ignore annotations in template targets
193	// NOTE(T295834): There's a large discussion about who's responsible
194	// for stripping these tags in I487baaafcf1ffd771cb6a9e7dd4fb76d6387e412
195	!(
196	$ntt->getName() === 'meta' &&
197	TokenUtils::matchTypeOf( $ntt, WTUtils::ANNOTATION_META_TYPE_REGEXP )
198	)
199	) {
200	// We are okay with empty (comment-only) lines,
201	// {{..}} and {{{..}}} in template targets.
202	if ( $preNlContent !== null ) {
203	return [ $preNlContent, array_merge( [ $buf ], array_slice( $tgtTokens, $i ) ) ];
204	} else {
205	return [ $buf, array_slice( $tgtTokens, $i ) ];
206	}
207	}
208	break;
209
210	case TagTk::class:
211	if ( TokenUtils::isEntitySpanToken( $ntt ) ) {
212	$buf .= $tgtTokens[$i + 1];
213	$i += 2;
214	break;
215	}
216	// Fall-through
217	case EndTagTk::class:
218	if ( $preNlContent !== null ) {
219	return [ $preNlContent, array_merge( [ $buf ], array_slice( $tgtTokens, $i ) ) ];
220	} else {
221	return [ $buf, array_slice( $tgtTokens, $i ) ];
222	}
223
224	case CommentTk::class:
225	// Ignore comments as well
226	break;
227
228	case NlTk::class:
229	// Ignore only the leading or trailing newlines
230	// (modulo whitespace and comments)
231	//
232	// If we only have whitespace in $buf thus far,
233	// the newline can be ignored. But, if we have
234	// non-ws content in $buf, everything that follows
235	// can only be ws.
236	if ( preg_match( '/^\s*$/D', $buf ) ) {
237	$buf .= "\n";
238	break;
239	} elseif ( $preNlContent === null ) {
240	// Buffer accumulated content
241	$preNlContent = $buf;
242	$buf = "\n";
243	break;
244	} else {
245	return [ $preNlContent, array_merge( [ $buf ], array_slice( $tgtTokens, $i ) ) ];
246	}
247
248	default:
249	throw new UnreachableException( 'Unexpected token type: ' . get_class( $ntt ) );
250	}
251	}
252	$i++;
253	}
254
255	// All good! No newline / only whitespace/comments post newline.
256	// (Well, annotation metas and template(arg) tokens too)
257	return [ $preNlContent . $buf, null ];
258	}
259
260	/**
261	* Is the prefix "safesubst"
262	* @param string $prefix
263	* @return bool
264	*/
265	private function isSafeSubst( $prefix ): bool {
266	if ( $this->safeSubstRegex === null ) {
267	$this->safeSubstRegex = $this->env->getSiteConfig()->getMagicWordMatcher( 'safesubst' );
268	}
269	return (bool)preg_match( $this->safeSubstRegex, $prefix . ':' );
270	}
271
272	/**
273	* @param TemplateEncapsulator $state
274	* @param string\|Token\|array $targetToks
275	* @param SourceRange $srcOffsets
276	* @return array\|null
277	*/
278	private function resolveTemplateTarget(
279	TemplateEncapsulator $state, $targetToks, $srcOffsets
280	): ?array {
281	$additionalToks = null;
282	if ( is_string( $targetToks ) ) {
283	$target = $targetToks;
284	} else {
285	$toks = !is_array( $targetToks ) ? [ $targetToks ] : $targetToks;
286	$toks = $this->processToString( $this->stripIncludeTokens( $toks ) );
287	[ $target, $additionalToks ] = $toks;
288	}
289
290	$target = trim( $target );
291	$pieces = explode( ':', $target );
292	$untrimmedPrefix = $pieces[0];
293	$prefix = trim( $pieces[0] );
294
295	// Parser function names usually (not always) start with a hash
296	$hasHash = substr( $target, 0, 1 ) === '#';
297	// String found after the colon will be the parser function arg
298	$haveColon = count( $pieces ) > 1;
299
300	// safesubst found in content should be treated as if no modifier were
301	// present. See https://en.wikipedia.org/wiki/Help:Substitution#The_safesubst:_modifier
302	if ( $haveColon && $this->isSafeSubst( $prefix ) ) {
303	$target = substr( $target, strlen( $untrimmedPrefix ) + 1 );
304	array_shift( $pieces );
305	$untrimmedPrefix = $pieces[0];
306	$prefix = trim( $pieces[0] );
307	$haveColon = count( $pieces ) > 1;
308	}
309
310	$env = $this->env;
311	$siteConfig = $env->getSiteConfig();
312
313	// Additional tokens are only justifiable in parser functions scenario
314	if ( !$haveColon && $additionalToks ) {
315	return null;
316	}
317
318	$pfArg = '';
319	if ( $haveColon ) {
320	$pfArg = substr( $target, strlen( $untrimmedPrefix ) + 1 );
321	if ( $additionalToks ) {
322	$pfArg = [ $pfArg ];
323	PHPUtils::pushArray( $pfArg, $additionalToks );
324	}
325	}
326
327	// Check if we have a magic-word variable.
328	$magicWordVar = $siteConfig->getMagicWordForVariable( $prefix ) ??
329	$siteConfig->getMagicWordForVariable( mb_strtolower( $prefix ) );
330	if ( $magicWordVar ) {
331	$state->variableName = $magicWordVar;
332	return [
333	'isVariable' => true,
334	'magicWordType' => $magicWordVar === '!' ? '!' : null,
335	'name' => $magicWordVar,
336	// FIXME: Some made up synthetic title
337	'title' => $env->makeTitleFromURLDecodedStr( "Special:Variable/$magicWordVar" ),
338	'pfArg' => $pfArg,
339	'srcOffsets' => new SourceRange(
340	$srcOffsets->start + strlen( $untrimmedPrefix ) + 1,
341	$srcOffsets->end ),
342	];
343	}
344
345	// FIXME: Checks for msgnw, msg, raw are missing at this point
346
347	$canonicalFunctionName = null;
348	if ( $haveColon ) {
349	$canonicalFunctionName = $siteConfig->getMagicWordForFunctionHook( $prefix );
350	}
351	if ( $canonicalFunctionName === null && $hasHash ) {
352	// If the target starts with a '#' it can't possibly be a template
353	// so this must be a "broken" parser function invocation
354	$canonicalFunctionName = substr( $prefix, 1 );
355	// @todo: Flag this as an author error somehow (T314524)
356	}
357	if ( $canonicalFunctionName !== null ) {
358	$state->parserFunctionName = $canonicalFunctionName;
359	// XXX this is made up.
360	$syntheticTitle = $env->makeTitleFromURLDecodedStr(
361	"Special:ParserFunction/$canonicalFunctionName",
362	$env->getSiteConfig()->canonicalNamespaceId( 'Special' ),
363	true // No exceptions
364	);
365	// Note that parserFunctionName/$canonicalFunctionName is not
366	// necessarily a valid title! Parsing rules are pretty generous
367	// w/r/t valid parser function names.
368	if ( $syntheticTitle === null ) {
369	$syntheticTitle = $env->makeTitleFromText(
370	'Special:ParserFunction/unknown'
371	);
372	}
373	return [
374	'isParserFunction' => true,
375	'magicWordType' => null,
376	'name' => $canonicalFunctionName,
377	'title' => $syntheticTitle, // FIXME: Some made up synthetic title
378	'pfArg' => $pfArg,
379	'srcOffsets' => new SourceRange(
380	$srcOffsets->start + strlen( $untrimmedPrefix ) + 1,
381	$srcOffsets->end ),
382	];
383	}
384
385	// We've exhausted the parser-function scenarios, and we still have additional tokens.
386	if ( $additionalToks ) {
387	return null;
388	}
389
390	// `resolveTitle()` adds the namespace prefix when it resolves fragments
391	// and relative titles, and a leading colon should resolve to a template
392	// from the main namespace, hence we omit a default when making a title
393	$namespaceId = strspn( $target, ':#/.' ) ?
394	null : $siteConfig->canonicalNamespaceId( 'template' );
395
396	// Resolve a possibly relative link and
397	// normalize the target before template processing.
398	$title = null;
399	try {
400	$title = $env->resolveTitle( $target );
401	} catch ( TitleException $e ) {
402	// Invalid template target!
403	return null;
404	}
405
406	// Entities in transclusions aren't decoded in the PHP parser
407	// So, treat the title as a url-decoded string!
408	$title = $env->makeTitleFromURLDecodedStr( $title, $namespaceId, true );
409	if ( !$title ) {
410	// Invalid template target!
411	return null;
412	}
413
414	// data-mw.target.href should be a url
415	$state->resolvedTemplateTarget = $env->makeLink( $title );
416
417	return [
418	'magicWordType' => null,
419	'name' => $title->getPrefixedDBKey(),
420	'title' => $title,
421	];
422	}
423
424	/**
425	* Flatten
426	* @param (Token\|string)[] $tokens
427	* @param ?string $prefix
428	* @param Token\|string\|(Token\|string)[] $t
429	* @return array
430	*/
431	private function flattenAndAppendToks(
432	array $tokens, ?string $prefix, $t
433	): array {
434	if ( is_array( $t ) ) {
435	$len = count( $t );
436	if ( $len > 0 ) {
437	if ( $prefix !== null && $prefix !== '' ) {
438	$tokens[] = $prefix;
439	}
440	PHPUtils::pushArray( $tokens, $t );
441	}
442	} elseif ( is_string( $t ) ) {
443	$len = strlen( $t );
444	if ( $len > 0 ) {
445	if ( $prefix !== null && $prefix !== '' ) {
446	$tokens[] = $prefix;
447	}
448	$tokens[] = $t;
449	}
450	} else {
451	if ( $prefix !== null && $prefix !== '' ) {
452	$tokens[] = $prefix;
453	}
454	$tokens[] = $t;
455	}
456
457	return $tokens;
458	}
459
460	/**
461	* By default, don't attempt to expand any templates in the wikitext that will be reprocessed.
462	*
463	* @param Token $token
464	* @param bool $expandTemplates
465	* @return TemplateExpansionResult
466	*/
467	private function convertToString( Token $token, bool $expandTemplates = false ): TemplateExpansionResult {
468	$frame = $this->manager->getFrame();
469	$tsr = $token->dataParsoid->tsr;
470	$src = substr( $token->dataParsoid->src, 1, -1 );
471	$startOffset = $tsr->start + 1;
472	$srcOffsets = new SourceRange( $startOffset, $startOffset + strlen( $src ) );
473
474	$toks = PipelineUtils::processContentInPipeline(
475	$this->env, $frame, $src, [
476	'pipelineType' => 'text/x-mediawiki',
477	'pipelineOpts' => [
478	'inTemplate' => $this->options['inTemplate'],
479	'expandTemplates' => $expandTemplates && $this->options['expandTemplates'],
480	],
481	'sol' => false,
482	'srcOffsets' => $srcOffsets,
483	]
484	);
485	TokenUtils::stripEOFTkfromTokens( $toks );
486	return new TemplateExpansionResult( array_merge( [ '{' ], $toks, [ '}' ] ), true );
487	}
488
489	/**
490	* Enforce template loops / loop depth limit constraints and emit
491	* error message if constraints are violated.
492	*
493	* @param mixed $target
494	* @param Title $title
495	* @param bool $ignoreLoop
496	* @return ?array
497	*/
498	private function enforceTemplateConstraints( $target, Title $title, bool $ignoreLoop ): ?array {
499	$error = $this->manager->getFrame()->loopAndDepthCheck(
500	$title, $this->env->getSiteConfig()->getMaxTemplateDepth(),
501	$ignoreLoop
502	);
503
504	return $error ? [ // Loop detected or depth limit exceeded, abort!
505	new TagTk( 'span', [ new KV( 'class', 'error' ) ] ),
506	$error,
507	new SelfclosingTagTk( 'wikilink', [ new KV( 'href', $target, null, '', '' ) ] ),
508	new EndTagTk( 'span' ),
509	] : null;
510	}
511
512	/**
513	* Fetch, tokenize and token-transform a template after all arguments and
514	* the target were expanded.
515	*
516	* @param TemplateEncapsulator $state
517	* @param array $resolvedTgt
518	* @param array $attribs
519	* @return TemplateExpansionResult
520	*/
521	private function expandTemplateNatively(
522	TemplateEncapsulator $state, array $resolvedTgt, array $attribs
523	): TemplateExpansionResult {
524	$env = $this->env;
525	$encap = $this->options['expandTemplates'] && $this->wrapTemplates;
526
527	// XXX: wrap attribs in object with .dict() and .named() methods,
528	// and each member (key/value) into object with .tokens(), .dom() and
529	// .wikitext() methods (subclass of Array)
530
531	$target = $resolvedTgt['name'];
532	if ( isset( $resolvedTgt['isParserFunction'] ) \|\| isset( $resolvedTgt['isVariable'] ) ) {
533	// FIXME: HARDCODED to core parser function implementations!
534	// These should go through function hook registrations in the
535	// ParserTests mock setup ideally. But, it is complicated because the
536	// Parsoid core parser function versions have "token" versions
537	// which are incompatible with implementation in FunctionHookHandler
538	// and FunctionArgs. So, we continue down this hacky path for now.
539	if ( $target === '=' ) {
540	$target = 'equal'; // '=' is not a valid character in function names
541	}
542	$target = 'pf_' . $target;
543	// FIXME: Parsoid may not have implemented the parser function natively
544	// Emit an error message, but encapsulate it so it roundtrips back.
545	if ( !is_callable( [ $this->parserFunctions, $target ] ) ) {
546	// FIXME: Consolidate error response format with enforceTemplateConstraints
547	$err = 'Parser function implementation for ' . $target . ' missing in Parsoid.';
548	return new TemplateExpansionResult( [ $err ], false, $encap );
549	}
550
551	$pfAttribs = new Params( $attribs );
552	$pfAttribs->args[0] = new KV(
553	// FIXME: This is bogus, but preserves borked b/c
554	TokenUtils::tokensToString( $resolvedTgt['pfArg'] ), [],
555	$resolvedTgt['srcOffsets']->expandTsrK()
556	);
557	$env->log( 'debug', 'entering prefix', $target, $state->token );
558	$res = call_user_func( [ $this->parserFunctions, $target ],
559	$state->token, $this->manager->getFrame(), $pfAttribs );
560	if ( $this->wrapTemplates ) {
561	$res = $this->parserFunctionsWrapper( $res );
562	}
563	return new TemplateExpansionResult( $res, false, $encap );
564	}
565
566	// Loop detection needs to be enabled since we're doing our own template expansion
567	$error = $this->enforceTemplateConstraints( $target, $resolvedTgt['title'], false );
568	if ( $error ) {
569	// FIXME: Should we be encapsulating here?
570	// Inconsistent with the other place constrainsts are enforced.
571	return new TemplateExpansionResult( $error, false, $encap );
572	}
573
574	// XXX: notes from brion's mediawiki.parser.environment
575	// resolve template name
576	// load template w/ canonical name
577	// load template w/ variant names (language variants)
578
579	// Fetch template source and expand it
580	$src = $this->fetchTemplateAndTitle( $target, $attribs );
581	if ( $src !== null ) {
582	$toks = $this->processTemplateSource(
583	$state->token,
584	[
585	'name' => $target,
586	'title' => $resolvedTgt['title'],
587	'attribs' => array_slice( $attribs, 1 ), // strip template target
588	],
589	$src
590	);
591	return new TemplateExpansionResult( $toks, true, $encap );
592	} else {
593	// Convert to a wikilink (which will become a redlink after the redlinks pass).
594	$toks = [ new SelfclosingTagTk( 'wikilink' ) ];
595	$hrefSrc = $resolvedTgt['name'];
596	$toks[0]->attribs[] = new KV( 'href', $hrefSrc, null, null, $hrefSrc );
597	return new TemplateExpansionResult( $toks, false, $encap );
598	}
599	}
600
601	/**
602	* Process a fetched template source to a token stream.
603	*
604	* @param Token $token
605	* @param array $tplArgs
606	* @param string $src
607	* @return array
608	*/
609	private function processTemplateSource( Token $token, array $tplArgs, string $src ): array {
610	$env = $this->env;
611	$frame = $this->manager->getFrame();
612	if ( $env->hasDumpFlag( 'tplsrc' ) ) {
613	$dump = str_repeat( '=', 28 ) . " template source " .
614	str_repeat( '=', 28 ) . "\n";
615	$dump .= 'TEMPLATE:' . $tplArgs['name'] . 'TRANSCLUSION:' .
616	PHPUtils::jsonEncode( $token->dataParsoid->src ) . "\n";
617	$dump .= str_repeat( '-', 80 ) . "\n";
618	$dump .= $src . "\n";
619	$dump .= str_repeat( '-', 80 ) . "\n";
620	$env->writeDump( $dump );
621	}
622
623	if ( $src === '' ) {
624	return [];
625	}
626
627	$env->log( 'debug', 'TemplateHandler.processTemplateSource',
628	$tplArgs['name'], $tplArgs['attribs'] );
629
630	// Get a nested transformation pipeline for the wikitext that takes
631	// us through stages 1-2, with the appropriate pipeline options set.
632	//
633	// Simply returning the tokenized source here (which may be correct
634	// when using the legacy preprocessor because we don't expect to
635	// tokenize any templates or include directives so skipping those
636	// handlers should be ok) won't work since the options for the pipeline
637	// we're in probably aren't what we want.
638	$toks = PipelineUtils::processContentInPipeline(
639	$env,
640	$frame,
641	$src,
642	[
643	'pipelineType' => 'text/x-mediawiki',
644	'pipelineOpts' => [
645	'inTemplate' => true,
646	'isInclude' => true,
647	// FIXME: In reality, this is broken for parser tests where
648	// we expand templates natively. We do want all nested templates
649	// to be expanded. But, setting this to !usePHPPreProcessor seems
650	// to break a number of tests. Not pursuing this line of enquiry
651	// for now since this parserTests vs production distinction will
652	// disappear with parser integration. We'll just bear the stench
653	// till that time.
654	//
655	// NOTE: No expansion required for nested templates.
656	'expandTemplates' => false,
657	'extTag' => $this->options['extTag'] ?? null
658	],
659	'srcText' => $src,
660	'srcOffsets' => new SourceRange( 0, strlen( $src ) ),
661	'tplArgs' => $tplArgs,
662	// HEADS UP: You might be wondering why we are forcing "sol" => true without
663	// using information about whether the transclusion is used in a SOL context.
664	//
665	// Ex: "foo {{1x\|bar}}" Here, "bar" is not in SOL context relative to the
666	// top-level page and so, should it be actually be parsed as a list item?
667	//
668	// So, there is a use-case where one could argue that the sol value here
669	// should be conditioned on the page-level context where "{{1x\|*bar}}" showed
670	// up. So, in this example "foo {{1x\|*bar}}, sol would be false and in this
671	// example "foo\n{{1x\|*bar}}", sol would be true. That is effectively how
672	// the legacy parser behaves. (Ignore T2529 for the moment.)
673	//
674	// But, Parsoid is a different beast. Since the Parsoid/JS days, templates
675	// have been processed asynchronously. So, {{1x\|*bar}} would be expanded and
676	// tokenized before even its preceding context might have been processed.
677	// From the start, Parsoid has aimed to decouple the processing of fragment
678	// generators (be it templates, extensions, or something else) from the
679	// processing of the page they are embedded in. This has been the
680	// starting point of many a wikitext 2.0 proposal on mediawiki.org;
681	// see also [[mw:Parsing/Notes/Wikitext_2.0#Implications_of_this_model]].
682	//
683	// The main performance implication is that you can process a transclusion
684	// concurrently and cache the output of {{1x\|*bar}} since its output is
685	// the same no matter where on the page it appears. Without this decoupled
686	// model, if you got "{{mystery-template-that-takes-30-secs}}{{1x\|*bar}}"
687	// you have to wait 30 secs before you get to expand {{1x\|*bar}}
688	// because you have to wait and see whether the mystery template will
689	// leave you in SOL state or non-SOL state.
690	//
691	// In a stroke of good luck, wikitext editors seem to have agreed
692	// that it is better for all templates to be expanded in a
693	// consistent SOL state and not be dependent on their context;
694	// turn now to phab task T2529 which (via a fragile hack) tried
695	// to ensure that every template which started with
696	// start-of-line-sensitive markup was evaluated in a
697	// start-of-line context (by hackily inserting a newline). Not
698	// everyone was satisfied with this hack (see T14974), but it's
699	// been the way things work for over a decade now (as evidenced
700	// by T14974 never having been "fixed").
701	//
702	// So, while we've established we would prefer not to use page
703	// context to set the initial SOL value for tokenizing the
704	// template, what should the initial SOL value be?
705	//
706	// * Treat every transclusion as a fresh document starting in SOL
707	// state, ie set "sol" => true always. This is supported by
708	// most current wiki use, and is the intent behind the original
709	// T2529 hack (although that hack left a number of edge cases,
710	// described below).
711	//
712	// * Use `"sol" => false` for templates -- this was the solution
713	// rejected by the original T2529 as being contrary to editor
714	// expectations.
715	//
716	// * In the future, one might allow the template itself to
717	// specify that its initial SOL state should be, using a
718	// mechanism similar to what might be necessary for typed
719	// templates. This could also address T14974. This is not
720	// excluded by Parsoid at this point; but it would probably be
721	// signaled by a template "return type" which is not DOM
722	// therefore the template wouldn't get parsed "as wikitext"
723	// (ie, T14974 wants an "attribute-value" return type which is
724	// a plain string, and some of the wikitext 2.0 proposals
725	// anticipate a "attribute name/value" dictionary as a possible
726	// return type).
727	//
728	// In support of using sol=>true as the default initial state,
729	// let's examine the sol-sensitive wikitext constructs, and
730	// implicitly the corner cases left open by the T2529 hack. (For
731	// non-sol-sensitive constructs, the initial SOL state is
732	// irrelevant.)
733	//
734	// - SOL-sensitive contructs include lists, headings, indent-pre,
735	// and table syntax.
736	// - Of these, only lists, headings, and table syntax are actually handled in
737	// the PEG tokenizer and are impacted by SOL state.
738	// - Indent-Pre has its own handler that operates in a full page token context
739	// and isn't impacted.
740	// - T2529 effectively means for *#:; (lists) and {\| (table start), newlines
741	// are added which means no matter what value we set here, they will get
742	// processed in sol state.
743	// - This leaves us with headings (=), table heading (!), table row (\|), and
744	// table close (\|}) syntax that would be impacted by what we set here.
745	// - Given that table row/heading/close templates are very very common on wikis
746	// and used for constructing complex tables, sol => true will let us handle
747	// those without hacks. We aren't fully off the hook there -- see the code
748	// in TokenStreamPatcher, AttributeExpander, TableFixups that all exist to
749	// to work around the fact that decoupled processing isn't the wikitext
750	// default. But, without sol => true, we'll likely be in deeper trouble.
751	// - But, this can cause some occasional bad parses where "=\|!" aren't meant
752	// to be processed as a sol-wikitext construct.
753	// - Note also that the workaround for T14974 (ie, the T2529 hack applying
754	// where sol=false is actually desired) has traditionally been to add an
755	// initial <nowiki/> which ensures that the "T2529 characters" are not
756	// initial. There are a number of alternative mechanisms to accomplish
757	// this (ie, HTML-encode the first character).
758	//
759	// To honor the spirit of T2529 it seems plausible to try to lint
760	// away the remaining corner cases where T2529 does not result
761	// in start-of-line state for template expansion, and to use the
762	// various workarounds for compatibility in the meantime.
763	//
764	// We should also pick one of the workarounds for T14974
765	// (probably `<nowiki/>` at the first position in the template),
766	// support that (until a better mechanism exists), and (if
767	// possible) lint away any others.
768	'sol' => true
769	]
770	);
771
772	return $this->processTemplateTokens( $toks );
773	}
774
775	/**
776	* Process the main template element, including the arguments.
777	*
778	* @param TemplateEncapsulator $state
779	* @param array $tokens
780	* @return array
781	*/
782	private function encapTokens( TemplateEncapsulator $state, array $tokens ): array {
783	// Template encapsulation normally wouldn't happen in nested context,
784	// since they should have already been expanded, and indeed we set
785	// expandTemplates === false in processTemplateSource. However,
786	// extension tags from templates can have content that requires wikitext
787	// parsing and, due to precedence, contain unexpanded templates.
788	//
789	// For example, {{1x\|hi<ref>{{1x\|ho}}</ref>}}
790	//
791	// Since extensions can require template expansion unconditionally, we can
792	// end up here inTemplate, in which case the substrings of env.page.src
793	// used in getArgInfo are no longer accurate, and so tplarginfo should be
794	// omitted. Presumably, template wrapping in the dom post processor won't
795	// be happening anyways, so this is unnecessary work as it is.
796	Assert::invariant(
797	$this->wrapTemplates, 'Encapsulating tokens when not wrapping!'
798	);
799	return $state->encapTokens( $tokens );
800	}
801
802	/**
803	* Handle chunk emitted from the input pipeline after feeding it a template.
804	*
805	* @param array $chunk
806	* @return array
807	*/
808	private function processTemplateTokens( array $chunk ): array {
809	TokenUtils::stripEOFTkfromTokens( $chunk );
810
811	foreach ( $chunk as $i => $t ) {
812	if ( !$t ) {
813	continue;
814	}
815
816	if ( isset( $t->dataParsoid->tsr ) ) {
817	unset( $t->dataParsoid->tsr );
818	}
819	Assert::invariant( !isset( $t->dataParsoid->tmp->endTSR ),
820	"Expected endTSR to not be set on templated content." );
821	if ( $t instanceof SelfclosingTagTk &&
822	strtolower( $t->getName() ) === 'meta' &&
823	TokenUtils::hasTypeOf( $t, 'mw:Placeholder' )
824	) {
825	// replace with empty string to avoid metas being foster-parented out
826	$chunk[$i] = '';
827	}
828	}
829
830	// FIXME: What is this stuff here? Why do we care about stripping out comments
831	// so much that we create a new token array for every expanded template?
832	// Unlikely to help perf very much.
833	if ( !$this->options['expandTemplates'] ) {
834	// Ignore comments in template transclusion mode
835	$newChunk = [];
836	for ( $i = 0, $n = count( $chunk ); $i < $n; $i++ ) {
837	if ( !( $chunk[$i] instanceof CommentTk ) ) {
838	$newChunk[] = $chunk[$i];
839	}
840	}
841	$chunk = $newChunk;
842	}
843
844	$this->env->log( 'debug', 'TemplateHandler.processTemplateTokens', $chunk );
845	return $chunk;
846	}
847
848	/**
849	* Fetch a template.
850	*
851	* @param string $templateName
852	* @param array $attribs
853	* @return ?string
854	*/
855	private function fetchTemplateAndTitle( string $templateName, array $attribs ): ?string {
856	$env = $this->env;
857	if ( isset( $env->pageCache[$templateName] ) ) {
858	return $env->pageCache[$templateName];
859	}
860
861	$start = microtime( true );
862	$pageContent = $env->getDataAccess()->fetchTemplateSource(
863	$env->getPageConfig(),
864	Title::newFromText( $templateName, $env->getSiteConfig() )
865	);
866	if ( $env->profiling() ) {
867	$profile = $env->getCurrentProfile();
868	$profile->bumpMWTime( "TemplateFetch", 1000 * ( microtime( true ) - $start ), "api" );
869	$profile->bumpCount( "TemplateFetch" );
870	}
871
872	// FIXME:
873	// 1. Hard-coded 'main' role
874	return $pageContent ? $pageContent->getContent( 'main' ) : null;
875	}
876
877	/**
878	* @param mixed $tokens
879	* @return bool
880	*/
881	private static function hasTemplateToken( $tokens ): bool {
882	if ( is_array( $tokens ) ) {
883	foreach ( $tokens as $t ) {
884	if ( TokenUtils::isTemplateToken( $t ) ) {
885	return true;
886	}
887	}
888	}
889	return false;
890	}
891
892	/**
893	* Process the special magic word as specified by $resolvedTgt['magicWordType'].
894	* ```
895	* magicWordType === '!' => {{!}} is the magic word
896	* ```
897	* @param bool $atTopLevel
898	* @param TemplateEncapsulator $state
899	* @param array $resolvedTgt
900	* @return TemplateExpansionResult
901	*/
902	private function processSpecialMagicWord(
903	bool $atTopLevel, TemplateEncapsulator $state, array $resolvedTgt
904	): TemplateExpansionResult {
905	$env = $this->env;
906	$tplToken = $state->token;
907
908	// Special case for {{!}} magic word.
909	//
910	// If we tokenized as a magic word, we meant for it to expand to a
911	// string. The tokenizer has handling for this syntax in table
912	// positions. However, proceeding to go through template expansion
913	// will reparse it as a table cell token. Hence this special case
914	// handling to avoid that path.
915	if ( $resolvedTgt['magicWordType'] === '!' ) {
916	// If we're not at the top level, return a table cell. This will always
917	// be the case. Either {{!}} was tokenized as a td, or it was tokenized
918	// as template but the recursive call to fetch its content returns a
919	// single \| in an ambiguous context which will again be tokenized as td.
920	// In any case, this should only be relevant for parserTests.
921	if ( empty( $atTopLevel ) ) {
922	$toks = [ new TagTk( 'td' ) ];
923	} else {
924	$toks = [ '\|' ];
925	}
926	return new TemplateExpansionResult( $toks, false, (bool)$this->wrapTemplates );
927	}
928
929	throw new UnreachableException(
930	'Unsupported magic word type: ' . ( $resolvedTgt['magicWordType'] ?? 'null' )
931	);
932	}
933
934	private function expandTemplate( TemplateEncapsulator $state ): TemplateExpansionResult {
935	$env = $this->env;
936	$token = $state->token;
937	$expandTemplates = $this->options['expandTemplates'];
938
939	// Since AttributeExpander runs later in the pipeline than TemplateHandler,
940	// if the template name is templated, use our copy of AttributeExpander
941	// to process all attributes to tokens, and force reprocessing of this
942	// template token since we will then know the actual template target.
943	if ( $expandTemplates && self::hasTemplateToken( $token->attribs[0]->k ) ) {
944	$ret = $this->ae->processComplexAttributes( $token );
945	$toks = $ret->tokens ?? null;
946	Assert::invariant( $toks && count( $toks ) === 1 && $toks[0] === $token,
947	"Expected only the input token as the return value." );
948	}
949
950	if ( $this->atMaxArticleSize ) {
951	// As described above, if we were already greater than $wgMaxArticleSize
952	// we're going to return the tokens without expanding them.
953	// (This case is where the original article as fetched from the DB
954	// or passed to the API exceeded max article size.)
955	return $this->convertToString( $token );
956	}
957
958	// There's no point in proceeding if we've already hit the maximum inclusion size
959	// XXX should this be combined with the previous test?
960	if ( !$env->bumpWt2HtmlResourceUse( 'wikitextSize', 0 ) ) {
961	// FIXME: The legacy parser would try to make this a link and
962	// elsewhere we'd return the $e->getMessage()
963	// (This case is where the template post-expansion accumulation is
964	// over the maximum wikitext size.)
965	// XXX: It could be combined with the previous test, but we might
966	// want to use different error messages in the future.
967	return $this->convertToString( $token );
968	}
969
970	$toks = null;
971	$text = $token->dataParsoid->src ?? '';
972
973	$tgt = $this->resolveTemplateTarget(
974	$state, $token->attribs[0]->k, $token->attribs[0]->srcOffsets->key
975	);
976
977	if ( $expandTemplates && $tgt === null ) {
978	// Target contains tags, convert template braces and pipes back into text
979	// Re-join attribute tokens with '=' and '\|'
980	return $this->convertToString( $token, true );
981	}
982
983	if ( isset( $tgt['magicWordType'] ) ) {
984	return $this->processSpecialMagicWord( $this->atTopLevel, $state, $tgt );
985	}
986
987	$frame = $this->manager->getFrame();
988
989	if ( $env->nativeTemplateExpansionEnabled() ) {
990	// Expand argument keys
991	$atm = new AttributeTransformManager( $frame,
992	[ 'expandTemplates' => false, 'inTemplate' => true ]
993	);
994	$newAttribs = $atm->process( $token->attribs );
995	$target = $newAttribs[0]->k;
996	if ( !$target ) {
997	$env->log( 'debug', 'No template target! ', $newAttribs );
998	}
999	// Resolve the template target again now that the template token's
1000	// attributes have been expanded by the AttributeTransformManager
1001	$resolvedTgt = $this->resolveTemplateTarget( $state, $target, $newAttribs[0]->srcOffsets->key );
1002	if ( $resolvedTgt === null ) {
1003	// Target contains tags, convert template braces and pipes back into text
1004	// Re-join attribute tokens with '=' and '\|'
1005	return $this->convertToString( $token, true );
1006	} else {
1007	return $this->expandTemplateNatively( $state, $resolvedTgt, $newAttribs );
1008	}
1009	} elseif ( $expandTemplates ) {
1010	// Use MediaWiki's preprocessor
1011	//
1012	// The tokenizer needs to use `text` as the cache key for caching
1013	// expanded tokens from the expanded transclusion text that we get
1014	// from the preprocessor, since parameter substitution will already
1015	// have taken place.
1016	//
1017	// It's sufficient to pass `[]` in place of attribs since they
1018	// won't be used. In `usePHPPreProcessor`, there is no parameter
1019	// substitution coming from the frame.
1020
1021	/* If $tgt is not null, target will be present. */
1022	$templateName = $tgt['name'];
1023	$templateTitle = $tgt['title'];
1024	// FIXME: This is a source of a lot of issues since templateargs
1025	// get looked up from the Frame and yield these tokens which then enter
1026	// the token stream. See T301948 and others from wmf.22
1027	// $attribs = array_slice( $token->attribs, 1 ); // Strip template name
1028	$attribs = [];
1029
1030	// We still need to check for limit violations because of the
1031	// higher precedence of extension tags, which can result in nested
1032	// templates even while using the php preprocessor for expansion.
1033	$error = $this->enforceTemplateConstraints( $templateName, $templateTitle, true );
1034	if ( $error ) {
1035	// FIXME: Should we be encapsulating here?
1036	// Inconsistent with the other place constrainsts are enforced.
1037	return new TemplateExpansionResult( $error );
1038	}
1039
1040	// Check if we have an expansion for this template in the cache already
1041	$cachedTransclusion = $env->transclusionCache[$text] ?? null;
1042	if ( $cachedTransclusion ) {
1043	// cache hit: reuse the expansion DOM
1044	// FIXME(SSS): How does this work again for
1045	// templates like {{start table}} and {[end table}}??
1046	return new TemplateExpansionResult(
1047	PipelineUtils::encapsulateExpansionHTML(
1048	$env, $token, $cachedTransclusion, [ 'fromCache' => true ]
1049	)
1050	);
1051	} else {
1052	if (
1053	!isset( $tgt['isParserFunction'] ) &&
1054	!isset( $tgt['isVariable'] ) &&
1055	!$templateTitle->isExternal() &&
1056	$templateTitle->isSpecialPage()
1057	) {
1058	$domFragment = PipelineUtils::fetchHTML( $env, $text );
1059	$toks = $domFragment
1060	? PipelineUtils::tunnelDOMThroughTokens( $env, $token, $domFragment, [] )
1061	: [];
1062	$toks = $this->processTemplateTokens( $toks );
1063	return new TemplateExpansionResult( $toks, true, $this->wrapTemplates );
1064	}
1065
1066	// Fetch and process the template expansion
1067	$expansion = Wikitext::preprocess( $env, $text );
1068	if ( $expansion['error'] ) {
1069	return new TemplateExpansionResult(
1070	[ $expansion['src'] ], false, $this->wrapTemplates
1071	);
1072	} else {
1073	$tplToks = $this->processTemplateSource(
1074	$token,
1075	[
1076	'name' => $templateName,
1077	'title' => $templateTitle,
1078	'attribs' => $attribs
1079	],
1080	$expansion['src']
1081	);
1082	return new TemplateExpansionResult(
1083	$tplToks, true, $this->wrapTemplates
1084	);
1085	}
1086	}
1087	} else {
1088	// We don't perform recursive template expansion- something
1089	// template-like that the PHP parser did not expand. This is
1090	// encapsulated already, so just return the plain text.
1091	Assert::invariant( TokenUtils::isTemplateToken( $token ), "Expected template token." );
1092	return $this->convertToString( $token );
1093	}
1094	}
1095
1096	/**
1097	* Main template token handler.
1098	*
1099	* Expands target and arguments (both keys and values) and either directly
1100	* calls or sets up the callback to expandTemplate, which then fetches and
1101	* processes the template.
1102	*
1103	* @param Token $token
1104	* @return TokenHandlerResult
1105	*/
1106	private function onTemplate( Token $token ): TokenHandlerResult {
1107	$state = new TemplateEncapsulator(
1108	$this->env, $this->manager->getFrame(), $token, 'mw:Transclusion'
1109	);
1110	$res = $this->expandTemplate( $state );
1111	$toks = $res->tokens;
1112	if ( $res->encap ) {
1113	$toks = $this->encapTokens( $state, $toks );
1114	}
1115	if ( $res->shuttle ) {
1116	// Shuttle tokens to the end of the stage since they've gone through the
1117	// rest of the handlers in the current pipeline in the pipeline above.
1118	$toks = $this->manager->shuttleTokensToEndOfStage( $toks );
1119	}
1120	return new TokenHandlerResult( $toks );
1121	}
1122
1123	/**
1124	* Expand template arguments with tokens from the containing frame.
1125	* @param Token $token
1126	* @return TokenHandlerResult
1127	*/
1128	private function onTemplateArg( Token $token ): TokenHandlerResult {
1129	$toks = $this->manager->getFrame()->expandTemplateArg( $token );
1130
1131	if ( $this->wrapTemplates && $this->options['expandTemplates'] ) {
1132	// This is a bare use of template arg syntax at the top level
1133	// outside any template use context. Wrap this use with RDF attrs.
1134	// so that this chunk can be RT-ed en-masse.
1135	$state = new TemplateEncapsulator(
1136	$this->env, $this->manager->getFrame(), $token, 'mw:Param'
1137	);
1138	$toks = $this->encapTokens( $state, $toks );
1139	}
1140
1141	// Shuttle tokens to the end of the stage since they've gone through the
1142	// rest of the handlers in the current pipeline in the pipeline above.
1143	$toks = $this->manager->shuttleTokensToEndOfStage( $toks );
1144
1145	return new TokenHandlerResult( $toks );
1146	}
1147
1148	public function onTag( Token $token ): ?TokenHandlerResult {
1149	switch ( $token->getName() ) {
1150	case "template":
1151	return $this->onTemplate( $token );
1152	case "templatearg":
1153	return $this->onTemplateArg( $token );
1154	default:
1155	return null;
1156	}
1157	}
1158	}