Code Coverage for /src/src/Wt2Html/TT/AttributeExpander.php

	Code Coverage
	Lines			Functions and Methods				Classes and Traits
Total	0.00% covered (danger)	0.00%	0 / 242	0.00% covered (danger)	0.00%	0 / 8	CRAP	0.00% covered (danger)	0.00%	0 / 1
AttributeExpander	0.00% covered (danger)	0.00%	0 / 242	0.00% covered (danger)	0.00%	0 / 8	7832	0.00% covered (danger)	0.00%	0 / 1
__construct	0.00% covered (danger)	0.00%	0 / 2	0.00% covered (danger)	0.00%	0 / 1	2
nlTkIndex	0.00% covered (danger)	0.00%	0 / 13	0.00% covered (danger)	0.00%	0 / 1	72
splitTokens	0.00% covered (danger)	0.00%	0 / 32	0.00% covered (danger)	0.00%	0 / 1	132
stripMetaTags	0.00% covered (danger)	0.00%	0 / 26	0.00% covered (danger)	0.00%	0 / 1	132
tplToksToString	0.00% covered (danger)	0.00%	0 / 6	0.00% covered (danger)	0.00%	0 / 1	20
buildExpandedAttrs	0.00% covered (danger)	0.00%	0 / 130	0.00% covered (danger)	0.00%	0 / 1	1892
processComplexAttributes	0.00% covered (danger)	0.00%	0 / 5	0.00% covered (danger)	0.00%	0 / 1	2
onAny	0.00% covered (danger)	0.00%	0 / 28	0.00% covered (danger)	0.00%	0 / 1	90

1	<?php
2	declare( strict_types = 1 );
3
4	namespace Wikimedia\Parsoid\Wt2Html\TT;
5
6	use Wikimedia\Assert\Assert;
7	use Wikimedia\Assert\UnreachableException;
8	use Wikimedia\Parsoid\Config\Env;
9	use Wikimedia\Parsoid\Tokens\KV;
10	use Wikimedia\Parsoid\Tokens\NlTk;
11	use Wikimedia\Parsoid\Tokens\SelfclosingTagTk;
12	use Wikimedia\Parsoid\Tokens\TagTk;
13	use Wikimedia\Parsoid\Tokens\Token;
14	use Wikimedia\Parsoid\Utils\PHPUtils;
15	use Wikimedia\Parsoid\Utils\PipelineUtils;
16	use Wikimedia\Parsoid\Utils\TokenUtils;
17	use Wikimedia\Parsoid\Utils\Utils;
18	use Wikimedia\Parsoid\Utils\WTUtils;
19	use Wikimedia\Parsoid\Wt2Html\Frame;
20	use Wikimedia\Parsoid\Wt2Html\PegTokenizer;
21	use Wikimedia\Parsoid\Wt2Html\TokenTransformManager;
22
23	/**
24	* Generic attribute expansion handler.
25	*/
26	class AttributeExpander extends TokenHandler {
27	private const META_TYPE_MATCHER = '#(mw:(LanguageVariant\|Transclusion\|Param\|Includes\|Annotation/)(.*)$)#D';
28
29	/**
30	* Used for re-tokenizing attribute strings that need to be re-expanded
31	* @var PegTokenizer
32	*/
33	private $tokenizer;
34
35	/**
36	* @param TokenTransformManager $manager
37	* @param array $options
38	* - bool inTemplate Is this being invoked while processing a template?
39	* - bool expandTemplates Should we expand templates encountered here?
40	* - bool standalone Is this AttributeExpander used as part of a pipeline
41	* or is it being used standalone as an utility class?
42	*/
43	public function __construct( TokenTransformManager $manager, array $options ) {
44	parent::__construct( $manager, $options );
45	$this->tokenizer = new PegTokenizer( $manager->getEnv() );
46	}
47
48	private static function nlTkIndex(
49	bool $nlTkOkay, array $tokens, bool $atTopLevel
50	): int {
51	// Moving this check here since it makes the
52	// callsite cleaner and simpler.
53	if ( $nlTkOkay ) {
54	return -1;
55	}
56
57	// Check if we have a newline token in the attribute key/value token stream.
58	// However, newlines are acceptable inside a <include>..</include> directive
59	// since they are stripped out.
60	//
61	// let includeRE = !atTopLevel ?
62	// /(?:^\|\s)mw:Includes\/NoInclude(\/.*)?(?:\s\|$)/ :
63	// /(?:^\|\s)mw:Includes\/(?:Only)?Include(?:Only)?(\/.*)?(?:\s\|$)/;
64	//
65	// SSS FIXME: We cannot support this usage for <include> directives currently
66	// since they don't go through template encapsulation and don't have a data-mw
67	// format with "wt" and "transclusion" parts that we can use to just track bits
68	// of wikitext that don't have a DOM representation.
69	//
70	// So, for now, we just suppress all newlines contained within these directives.
71	$includeRE = '#(?:^\|\s)mw:Includes/(?:No\|Only)?Include(?:Only)?(/.*)?(?:\s\|$)#D';
72	$inInclude = false;
73	foreach ( $tokens as $i => $t ) {
74	if ( $t instanceof SelfclosingTagTk ) {
75	$type = $t->getAttributeV( 'typeof' );
76	$typeMatch = [];
77	if ( $type && preg_match( $includeRE, $type, $typeMatch, PREG_UNMATCHED_AS_NULL ) ) {
78	$inInclude = !str_ends_with( $typeMatch[1] ?? '', '/End' );
79	}
80	} elseif ( !$inInclude && $t instanceof NlTk ) {
81	// newline token outside <include>
82	return $i;
83	}
84	}
85
86	return -1;
87	}
88
89	private static function splitTokens(
90	Frame $frame, Token $token, int $nlTkPos, array $tokens, bool $wrapTemplates
91	): array {
92	$preNLBuf = [];
93	$postNLBuf = null;
94	$startMeta = null;
95	$metaTokens = null;
96
97	// Split the token array around the first newline token.
98	$startMetaIndex = null;
99	foreach ( $tokens as $i => $t ) {
100	if ( $i === $nlTkPos ) {
101	// split here!
102	$postNLBuf = array_slice( $tokens, $i );
103	break;
104	} else {
105	if ( $wrapTemplates && $t instanceof SelfclosingTagTk ) {
106	$type = $t->getAttributeV( 'typeof' );
107	// We are interested in the last start meta tag.
108	// Everything before it is assumed to be closed.
109	$typeMatch = [];
110	if ( $type &&
111	preg_match( self::META_TYPE_MATCHER, $type, $typeMatch ) &&
112	!str_ends_with( $typeMatch[1], '/End' )
113	) {
114	$startMeta = $t;
115	$startMetaIndex = $i;
116	}
117	}
118
119	// Use $i to make code robust if $tokens were not continugous
120	$preNLBuf[$i] = $t;
121	}
122	}
123
124	// Clear $startMeta from $preNLBuf - setting to '' is sufficient.
125	if ( $startMeta ) {
126	$preNLBuf[$startMetaIndex] = '';
127	}
128
129	// We split the token into pieces.
130	// Since we no longer know where this token now ends tsr-wise,
131	// set tsr->end to null
132	$token->dataParsoid->tsr->end = null;
133
134	if ( $startMeta ) {
135	// Support template wrapping with the following steps:
136	// - Hoist the transclusion start-meta from the first line
137	// to before the token.
138	// - Update the start-meta tsr to that of the token.
139	// - Record the wikitext between the token and the transclusion
140	// as an unwrappedWT data-parsoid attribute of the start-meta.
141	$dp = $startMeta->dataParsoid;
142	$dp->unwrappedWT = substr( $frame->getSrcText(), $token->dataParsoid->tsr->start,
143	$dp->tsr->start - $token->dataParsoid->tsr->start );
144
145	// unwrappedWT will be added to the data-mw.parts array which makes
146	// this a multi-template-content-block.
147	// Record the first wikitext node of this block (required by html->wt serialization)
148
149	// FIXME spec-compliant values would be upper-case, this is just a workaround
150	// for current PHP DOM implementation and could be removed in the future
151	$tokenName = mb_strtoupper( $token->getName() );
152
153	$dp->firstWikitextNode = isset( $token->dataParsoid->stx ) ?
154	$tokenName . '_' . $token->dataParsoid->stx : $tokenName;
155
156	// Update tsr->start only. Unless the end-meta token is moved as well,
157	// updating tsr->end can introduce bugs in cases like:
158	//
159	// {\|
160	// \|{{singlechart\|Australia\|93\|artist=Madonna\|album=Girls Gone Wild}}\|x
161	// \|}
162	//
163	// which can then cause dirty diffs (the "\|" before the x gets dropped).
164	$dp->tsr->start = $token->dataParsoid->tsr->start;
165	$metaTokens = [ $startMeta ];
166
167	return [ 'metaTokens' => $metaTokens, 'preNLBuf' => $preNLBuf, 'postNLBuf' => $postNLBuf ];
168	} else {
169	return [ 'metaTokens' => [], 'preNLBuf' => $tokens, 'postNLBuf' => [] ];
170	}
171	}
172
173	/**
174	* This helper method strips all meta tags introduced by
175	* transclusions, etc. and returns the content.
176	*
177	* @param Env $env
178	* @param array $tokens
179	* @param bool $wrapTemplates
180	* @return array
181	*/
182	private static function stripMetaTags(
183	Env $env, array $tokens, bool $wrapTemplates
184	): array {
185	$buf = [];
186	$hasGeneratedContent = false;
187	$annotationType = [];
188
189	foreach ( $tokens as $t ) {
190	if ( $t instanceof TagTk \|\| $t instanceof SelfclosingTagTk ) {
191	// Take advantage of this iteration of `tokens` to seek out
192	// document fragments. They're an indication that an attribute
193	// value wasn't present as literal text in the input and the
194	// token should be annotated with "mw:ExpandedAttrs".
195	if ( TokenUtils::hasDOMFragmentType( $t ) ) {
196	$hasGeneratedContent = true;
197	}
198
199	if ( $wrapTemplates ) {
200	// Strip all meta tags.
201	$type = $t->getAttributeV( 'typeof' );
202	$typeMatch = [];
203	if ( $type && preg_match( self::META_TYPE_MATCHER, $type, $typeMatch ) ) {
204	if ( !str_ends_with( $typeMatch[1], '/End' ) ) {
205	$hasGeneratedContent = true;
206	}
207	$groups = [];
208	if ( preg_match( WTUtils::ANNOTATION_META_TYPE_REGEXP, $type, $groups ) ) {
209	$annotationType[] = $groups[1];
210	}
211	} else {
212	$buf[] = $t;
213	continue;
214	}
215	}
216
217	if ( $t->getName() !== 'meta' ) {
218	// Dont strip token if it is not a meta-tag
219	$buf[] = $t;
220	}
221	} else {
222	$buf[] = $t;
223	}
224	}
225
226	return [
227	'hasGeneratedContent' => $hasGeneratedContent,
228	'annotationType' => $annotationType,
229	'value' => $buf
230	];
231	}
232
233	/**
234	* @param mixed $a
235	* @return mixed
236	*/
237	private static function tplToksToString( $a ) {
238	if ( !is_array( $a ) ) {
239	return $a;
240	}
241	$ret = [];
242	foreach ( $a as $t ) {
243	$ret[] = TokenUtils::isTemplateToken( $t ) ? $t->dataParsoid->src : $t;
244	}
245	return $ret;
246	}
247
248	/**
249	* Callback for attribute expansion in AttributeTransformManager
250	* @param Token $token
251	* @param KV[] $expandedAttrs
252	* @return TokenHandlerResult
253	*/
254	private function buildExpandedAttrs( Token $token, array $expandedAttrs ) {
255	// If we're not in a template, we'll be doing template wrapping in dom
256	// post-processing (same conditional there), so take care of meta markers
257	// found while processing tokens.
258	$wrapTemplates = !$this->options['inTemplate'];
259	$env = $this->manager->getEnv();
260	$metaTokens = [];
261	$postNLToks = [];
262	$tmpDataMW = null;
263	$oldAttrs = $token->attribs;
264	// Build newAttrs lazily (on-demand) to avoid creating
265	// objects in the common case where nothing of significance
266	// happens in this code.
267	$newAttrs = null;
268	$nlTkPos = -1;
269	$nlTkOkay = TokenUtils::isHTMLTag( $token ) \|\| !TokenUtils::isTableTag( $token );
270	$annotationTypes = [];
271
272	// Identify attributes that were generated in full or in part using templates
273	foreach ( $oldAttrs as $i => $oldA ) {
274	$expandedA = $expandedAttrs[$i];
275
276	// Preserve the key and value source, if available.
277	// But, if 'oldA' wasn't cloned, expandedA will be the same as 'oldA'.
278	if ( $oldA !== $expandedA ) {
279	$expandedA->ksrc = $oldA->ksrc;
280	$expandedA->vsrc = $oldA->vsrc;
281	$expandedA->srcOffsets = $oldA->srcOffsets;
282	}
283
284	// Deal with two template-expansion scenarios for the attribute key (not value)
285	//
286	// 1. We have a template that generates multiple attributes of this token
287	// as well as content after the token.
288	// Ex: infobox templates from aircraft, ship, and other pages
289	// See enwiki:Boeing_757
290	//
291	// - Split the expanded tokens into multiple lines.
292	// - Expanded attributes associated with the token are retained in the
293	// first line before a NlTk.
294	// - Content tokens after the NlTk are moved to subsequent lines.
295	// - The meta tags are hoisted before the original token to make sure
296	// that the entire token and following content is encapsulated as a unit.
297	//
298	// 2. We have a template that only generates multiple attributes of this
299	// token. In that case, we strip all template meta tags from the expanded
300	// tokens and assign it a mw:ExpandedAttrs type with orig/expanded
301	// values in data-mw.
302	//
303	// Reparse-KV-string scenario with templated attributes:
304	// -----------------------------------------------------
305	// In either scenario above, we need additional special handling if the
306	// template generates one or more k=v style strings:
307	// <div {{1x\|1=style='color:red''}}></div>
308	// <div {{1x\|1=style='color:red' title='boo'}}></div>
309	//
310	// Real use case: Template {{ligne grise}} on frwp.
311	//
312	// To support this, we utilize the following hack. If we got a string of the
313	// form "k=v" and our orig-v was "", we convert the token array to a string
314	// and retokenize it to extract one or more attributes.
315	//
316	// But, we won't support scenarios like this:
317	// {\| title={{1x\|1='name' style='color:red;'\n\|-\n\|foo}}\n\|}
318	// Here, part of one attribute and additional complete attribute strings
319	// need reparsing, and that isn't a use case that is worth more complexity here.
320	//
321	// FIXME:
322	// ------
323	// 1. It is not possible for multiple instances of scenario 1 to be triggered
324	// for the same token. So, I am not bothering trying to test and deal with it.
325	//
326	// 2. We trigger the Reparse-KV-string scenario only for attribute keys,
327	// since it isn't possible for attribute values to require this reparsing.
328	// However, it is possible to come up with scenarios where a template
329	// returns the value for one attribute and additional k=v strings for newer
330	// attributes. We don't support that scenario, but don't even test for it.
331	//
332	// Reparse-KV-string scenario with non-string attributes:
333	// ------------------------------------------------------
334	// This is only going to be the case with table wikitext that has special syntax
335	// for attribute strings.
336	//
337	// {\| <div>a</div> style='border:1px solid black;'
338	// \|- <div>b</div> style='border:1px dotted blue;'
339	// \| <div>c</div> style='color:red;'
340	// \|}
341	//
342	// In wikitext like the above, the PEG tokenizer doesn't recognize these as
343	// valid attributes (the templated attribute scenario is a special case) and
344	// orig-v will be "". So, the same strategy as above is applied here as well.
345
346	$expandedK = $origK = $expandedA->k;
347	$expandedV = $origV = $expandedA->v;
348	$updatedK = null;
349	$updatedV = null;
350	$reparsedKV = false;
351	$keyUsesMixedAttrContentTpl = false;
352	$valUsesMixedAttrContentTpl = false;
353
354	if ( $expandedK ) {
355	// FIXME: We should get rid of these array/string/non-string checks
356	// and probably use appropriately-named flags to convey type information.
357	if ( is_array( $oldA->k ) ) {
358	if ( !is_array( $expandedK ) ) {
359	throw new UnreachableException( "expandedK: expected array. Found: " .
360	PHPUtils::jsonEncode( $expandedK ) );
361	}
362
363	$nlTkPos = self::nlTkIndex( $nlTkOkay, $expandedK, $wrapTemplates );
364	if ( $nlTkPos !== -1 ) {
365	// Scenario 1 from the documentation comment above.
366	$keyUsesMixedAttrContentTpl = true;
367	$updatedK = self::splitTokens(
368	$this->manager->getFrame(), $token, $nlTkPos, $expandedK, $wrapTemplates
369	);
370	$expandedK = $updatedK['preNLBuf'];
371	$postNLToks = $updatedK['postNLBuf'];
372	$metaTokens = $updatedK['metaTokens'];
373	// We split up this attribute's key into pieces.
374	if ( $expandedA->srcOffsets->key ) {
375	$expandedA->srcOffsets->key->end = null;
376	}
377	} else {
378	// Maybe scenario 2 from the documentation comment above.
379	$updatedK = self::stripMetaTags( $env, $expandedK, $wrapTemplates );
380	PHPUtils::pushArray( $annotationTypes, $updatedK['annotationType'] );
381	$expandedK = $updatedK['value'];
382	}
383
384	$expandedA->k = $expandedK;
385
386	// Check if we need to deal with the Reparse-KV-string scenario.
387	// (See documentation comment above.)
388	//
389	// Don't incorrectly reparse the kv string for parser functions.
390	// Ex: "#ifexpr" parser function expects the "=" equality operator.
391	// We encounter those in "standalone" mode (used to expand
392	// templated template targets).
393	if ( $expandedA->v === '' && empty( $this->options['standalone'] ) ) {
394	// Extract a parsable string from the token array.
395	// Trim whitespace to ensure tokenizer isn't tripped up
396	// by the presence of unnecessary whitespace.
397	$kStr = trim( TokenUtils::tokensToString( $expandedK, false, [
398	// These tokens haven't been expanded to DOM yet
399	// so unpacking them here is justifiable
400	'unpackDOMFragments' => true,
401	'env' => $env
402	] ) );
403	$rule = $nlTkOkay ? 'generic_newline_attributes' : 'table_attributes';
404	$kvs = str_contains( $kStr, '=' ) ?
405	$this->tokenizer->tokenizeAs( $kStr, $rule, /* sol */true ) : null;
406	if ( $kvs ) {
407	// At this point, templates should have been expanded.
408	// Returning a template token here probably means that
409	// when we just converted to string and reparsed, we failed
410	// to expand the template. This can be particularly bad
411	// when we make iterative calls to expand template names.
412	// So, give up template expansion and convert them to strings.
413	foreach ( $kvs as $kv ) {
414	$kv->k = self::tplToksToString( $kv->k );
415	$kv->v = self::tplToksToString( $kv->v );
416
417	// $kStr is based on running tokensToString on $expandedK.
418	// So, $kStr might have dropped HTML tags, etc. Given that,
419	// we can no longer reliably compute offsets for these
420	// new key/value pairs. We could try to be more smart here,
421	// but it is not worth the complexity.
422	$kv->srcOffsets = null;
423	}
424	// SSS FIXME: Collect all keys here, not just the first key
425	// i.e. in a string like {{1x\|1=id='v1' title='foo' style='..'}}
426	// that string is setting attributes for [id, title, style], not just id.
427	//
428	// That requires the ability for the data-mw.attribs[i].txt to be an array.
429	// However, the spec at [[mw:Specs/HTML#Generated_attributes_of_HTML_tags]]
430	// says:
431	//
432	// "This spec also assumes that a template can only
433	// generate one attribute rather than multiple attributes."
434	//
435	// So, revision of the spec is another FIXME at which point this code can
436	// be updated to reflect the revised spec.
437	$expandedK = $kvs[0]->k;
438	$reparsedKV = true;
439	if ( !$newAttrs ) {
440	$newAttrs = $i === 0 ? [] : array_slice( $expandedAttrs, 0, $i );
441	}
442	PHPUtils::pushArray( $newAttrs, $kvs );
443	}
444	}
445	}
446
447	// We have a potentially expanded value.
448	// Check if the value came from a template/extension expansion.
449	if ( is_string( $expandedK ) && !str_starts_with( $expandedK, 'mw:' )
450	&& is_array( $oldA->v )
451	) {
452	$nlTkPos = self::nlTkIndex( $nlTkOkay, $expandedV, $wrapTemplates );
453	if ( $nlTkPos !== -1 ) {
454	// Scenario 1 from the documentation comment above.
455	$valUsesMixedAttrContentTpl = true;
456	$updatedV = self::splitTokens(
457	$this->manager->getFrame(), $token, $nlTkPos,
458	$expandedV, $wrapTemplates
459	);
460	$expandedV = $updatedV['preNLBuf'];
461	$postNLToks = $updatedV['postNLBuf'];
462	$metaTokens = $updatedV['metaTokens'];
463	// We split up this attribute's value into pieces.
464	if ( $expandedA->srcOffsets->value ) {
465	$expandedA->srcOffsets->value->end = null;
466	}
467	} else {
468	// Maybe scenario 2 from the documentation comment above.
469	$updatedV = self::stripMetaTags( $env, $expandedV, $wrapTemplates );
470	PHPUtils::pushArray( $annotationTypes, $updatedV['annotationType'] );
471	$expandedV = $updatedV['value'];
472	}
473	$expandedA->v = $expandedV;
474	}
475
476	// Update data-mw to account for templated attributes.
477	// For editability, set HTML property.
478	if ( !empty( $updatedK['hasGeneratedContent'] ) \|\|
479	!empty( $updatedV['hasGeneratedContent'] ) \|\|
480	( $reparsedKV && count( $metaTokens ) > 0 )
481	) {
482	$key = TokenUtils::tokensToString( $expandedK );
483	if ( !$tmpDataMW ) {
484	$tmpDataMW = [];
485	}
486
487	// For the $(key\|val)UsesMixedAttrContentTpl checks below,
488	// it is incorrect to assign the HTML for the original wikitext
489	// string since the content part will get duplicated in both
490	// this data-mw and in the actual body of the table (for example)
491	// and cause bugs like T249740.
492	//
493	// So, in this case, we assign just the key/value part of the HTML
494	// ($expandedA->k or $expandedA->v), but we mark it uneditable
495	// because we cannot really edit just the key/value of the attribute
496	// on its own because it is only a part of the template's output.
497	if ( $reparsedKV ) {
498	// If we encountered a reparse-KV-string scenario,
499	// we set the value's HTML to [] since we can edit
500	// the transclusion either via the key's HTML or the
501	// value's HTML, but not both.
502	$keyHTML = $keyUsesMixedAttrContentTpl ? $expandedA->k : $origK;
503	$valHTML = [];
504	} else {
505	Assert::invariant( !$keyUsesMixedAttrContentTpl,
506	"If reparseKV was false, and we had a mixed attr-content template, " .
507	"we should have landed in the valUsesMixedAttrContentTpl codepath." );
508	$keyHTML = empty( $updatedK['hasGeneratedContent'] ) ? null : $origK;
509	$valHTML = $valUsesMixedAttrContentTpl ? $expandedA->v : $origV;
510	}
511
512	// FIXME: Ideally we would have called them ktext, khtml, vhtml
513	// since in the serialized data-mw, the "k" and "v" key strings are dropped.
514	// [{ "ktxt":..., "khtml":... }, { "vhtml":... }]
515	// is clearer and less confusing than
516	// [{ "txt":..., "html":... }, { "html":... }]
517	$tmpDataMW[$key] = [
518	'k' => [ 'txt' => $key, 'srcOffsets' => $expandedA->srcOffsets->key ?? null ],
519	// FIXME: Why is 'txt' missing? Why are we not checking for [] ?
520	'v' => [ 'html' => $valHTML, 'srcOffsets' => $expandedA->srcOffsets->value ?? null ]
521	];
522
523	if ( $keyHTML !== null ) {
524	$tmpDataMW[$key]['k']['html'] = $keyHTML;
525	}
526	if ( $keyUsesMixedAttrContentTpl ) {
527	$tmpDataMW[$key]['k']['uneditable'] = true;
528	}
529	if ( $valUsesMixedAttrContentTpl ) {
530	$tmpDataMW[$key]['v']['uneditable'] = true;
531	}
532	}
533	}
534
535	// Update newAttrs
536	if ( $newAttrs && !$reparsedKV ) {
537	$newAttrs[] = $expandedA;
538	}
539	}
540
541	$token->attribs = $newAttrs ?? $expandedAttrs;
542
543	// If the token already has an about, it already has transclusion/extension
544	// wrapping. No need to record information about templated attributes in addition.
545	//
546	// FIXME: If there is a real use case for extension attributes getting templated,
547	// this check can be relaxed to allow that.
548	// https://gerrit.wikimedia.org/r/#/c/65575 has some reference code that can be used then.
549
550	if ( !$token->getAttributeV( 'about' ) && $tmpDataMW && count( $tmpDataMW ) > 0 ) {
551	// Flatten k-v pairs.
552	$vals = [];
553	foreach ( $tmpDataMW as $obj ) {
554	$vals[] = $obj['k'];
555	$vals[] = $obj['v'];
556	}
557
558	// Clone the vals since they'll be passed to another pipeline
559	// for expanding, which may destructively mutate them in the process.
560	//
561	// This is a problem since subsequent handlers to the
562	// AttributeExpander may interact with the original tokens still
563	// present as attributes of `token`.
564	//
565	// For example, while treebuilding, the object holding dataParsoid
566	// of a token is reused as the data-parsoid attribute of the
567	// corresonding node. Thus, when we get to the DOM cleanup pass,
568	// unsetting properties changes the token as well. This was
569	// the issue when an "href" was expanded and then the
570	// ExternalLinkHandler tried to call tokensToString on it,
571	// resulting in a transcluded entity missing its src (which, by the way,
572	// had already been clobered by WrapTemplates, similar to T214241).
573	//
574	// The general principle here being, don't share tokens between
575	// pipelines.
576	$vals = Utils::clone( $vals );
577
578	// Expand all token arrays to DOM.
579	$eVals = PipelineUtils::expandAttrValuesToDOM(
580	$this->env, $this->manager->getFrame(), $vals,
581	$this->options['expandTemplates'],
582	$this->options['inTemplate']
583	);
584
585	// Rebuild flattened k-v pairs.
586	$expAttrs = [];
587	for ( $j = 0; $j < count( $eVals ); $j += 2 ) {
588	$expAttrs[] = [ $eVals[$j], $eVals[$j + 1] ];
589	}
590
591	if ( $token->getName() === 'template' ) {
592	// Don't add Parsoid about, typeof, data-mw attributes here since
593	// we won't be able to distinguish between Parsoid-added attributes
594	// and actual template attributes in cases like:
595	// {{some-tpl\|about=#mwt1\|typeof=mw:Transclusion}}
596	// In both cases, we will encounter a template token that looks like:
597	// { ... "attribs":[{"k":"about","v":"#mwt1"},{"k":"typeof","v":"mw:Transclusion"}] .. }
598	// So, record these in the tmp attribute for the template hander
599	// to retrieve and process.
600	$token->dataParsoid->getTemp()->templatedAttribs = $expAttrs;
601	} else {
602	// Mark token as having expanded attrs.
603	$token->addAttribute( 'about', $this->env->newAboutId() );
604	$token->addSpaceSeparatedAttribute( 'typeof', 'mw:ExpandedAttrs' );
605	foreach ( $annotationTypes as $annotationType ) {
606	$token->addSpaceSeparatedAttribute( 'typeof', 'mw:Annotation/' . $annotationType );
607	}
608	$token->addAttribute( 'data-mw', PHPUtils::jsonEncode( [ 'attribs' => $expAttrs ] ) );
609	}
610	}
611
612	return new TokenHandlerResult(
613	array_merge( $metaTokens, [ $token ], $postNLToks )
614	);
615	}
616
617	/**
618	* Processes any attribute keys and values that are not simple strings.
619	* (Ex: Templated styles)
620	*
621	* @param Token $token Token whose attrs being expanded.
622	* @return TokenHandlerResult
623	*/
624	public function processComplexAttributes( Token $token ): TokenHandlerResult {
625	$atm = new AttributeTransformManager( $this->manager->getFrame(), [
626	'expandTemplates' => $this->options['expandTemplates'],
627	'inTemplate' => $this->options['inTemplate']
628	] );
629	return $this->buildExpandedAttrs( $token, $atm->process( $token->attribs ) );
630	}
631
632	/**
633	* Token handler.
634	*
635	* For tokens that might have complex attributes, this handler
636	* processes / expands them.
637	* (Ex: Templated styles)
638	*
639	* @param Token\|string $token Token whose attrs being expanded.
640	* @return TokenHandlerResult\|null
641	*/
642	public function onAny( $token ): ?TokenHandlerResult {
643	if (
644	!( $token instanceof TagTk \|\| $token instanceof SelfclosingTagTk ) \|\|
645	!count( $token->attribs )
646	) {
647	return null;
648	}
649
650	$name = $token->getName();
651	$property = $token->getAttributeV( 'property' ) ?? '';
652	$typeOf = $token->getAttributeV( 'typeof' ) ?? '';
653
654	if (
655	// Do not process dom-fragment tokens: a separate handler deals with them.
656	$name === 'mw:dom-fragment-token' \|\|
657	(
658	$name === 'meta' &&
659	(
660	// Parsoid generated metas don't need expansion
661	preg_match( '/mw:(Placeholder\|Transclusion\|Param\|Includes)/', $typeOf ) \|\|
662	// The TemplateHandler runs before the AttributeExpander and
663	// magic words masquerading as templates may themselves be
664	// templated (as in templated template names).
665	// See TemplateHandler::processSpecialMagicWord()
666	// So, we may see page properties that have already been
667	// expanded and annotated with mw:ExpandedAttrs. We return
668	// early to avoid the assertion below, at the expense of
669	// perhaps not catching other cases where tokens are passed
670	// through here doubly by mistake.
671	( preg_match( '/mw:(PageProp)/', $property ) &&
672	str_contains( $typeOf, 'mw:ExpandedAttrs' ) )
673	)
674	)
675	) {
676	return null;
677	}
678
679	Assert::invariant(
680	!str_contains( $typeOf, 'mw:ExpandedAttrs' ),
681	"Expanding an already expanded token, that's a no-no."
682	);
683
684	return $this->processComplexAttributes( $token );
685	}
686	}