Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
0.00% |
0 / 242 |
|
0.00% |
0 / 8 |
CRAP | |
0.00% |
0 / 1 |
AttributeExpander | |
0.00% |
0 / 242 |
|
0.00% |
0 / 8 |
7832 | |
0.00% |
0 / 1 |
__construct | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
2 | |||
nlTkIndex | |
0.00% |
0 / 13 |
|
0.00% |
0 / 1 |
72 | |||
splitTokens | |
0.00% |
0 / 32 |
|
0.00% |
0 / 1 |
132 | |||
stripMetaTags | |
0.00% |
0 / 26 |
|
0.00% |
0 / 1 |
132 | |||
tplToksToString | |
0.00% |
0 / 6 |
|
0.00% |
0 / 1 |
20 | |||
buildExpandedAttrs | |
0.00% |
0 / 130 |
|
0.00% |
0 / 1 |
1892 | |||
processComplexAttributes | |
0.00% |
0 / 5 |
|
0.00% |
0 / 1 |
2 | |||
onAny | |
0.00% |
0 / 28 |
|
0.00% |
0 / 1 |
90 |
1 | <?php |
2 | declare( strict_types = 1 ); |
3 | |
4 | namespace Wikimedia\Parsoid\Wt2Html\TT; |
5 | |
6 | use Wikimedia\Assert\Assert; |
7 | use Wikimedia\Assert\UnreachableException; |
8 | use Wikimedia\Parsoid\Config\Env; |
9 | use Wikimedia\Parsoid\Tokens\KV; |
10 | use Wikimedia\Parsoid\Tokens\NlTk; |
11 | use Wikimedia\Parsoid\Tokens\SelfclosingTagTk; |
12 | use Wikimedia\Parsoid\Tokens\TagTk; |
13 | use Wikimedia\Parsoid\Tokens\Token; |
14 | use Wikimedia\Parsoid\Utils\PHPUtils; |
15 | use Wikimedia\Parsoid\Utils\PipelineUtils; |
16 | use Wikimedia\Parsoid\Utils\TokenUtils; |
17 | use Wikimedia\Parsoid\Utils\Utils; |
18 | use Wikimedia\Parsoid\Utils\WTUtils; |
19 | use Wikimedia\Parsoid\Wt2Html\Frame; |
20 | use Wikimedia\Parsoid\Wt2Html\PegTokenizer; |
21 | use Wikimedia\Parsoid\Wt2Html\TokenTransformManager; |
22 | |
23 | /** |
24 | * Generic attribute expansion handler. |
25 | */ |
26 | class AttributeExpander extends TokenHandler { |
27 | private const META_TYPE_MATCHER = '#(mw:(LanguageVariant|Transclusion|Param|Includes|Annotation/)(.*)$)#D'; |
28 | |
29 | /** |
30 | * Used for re-tokenizing attribute strings that need to be re-expanded |
31 | * @var PegTokenizer |
32 | */ |
33 | private $tokenizer; |
34 | |
35 | /** |
36 | * @param TokenTransformManager $manager |
37 | * @param array $options |
38 | * - bool inTemplate Is this being invoked while processing a template? |
39 | * - bool expandTemplates Should we expand templates encountered here? |
40 | * - bool standalone Is this AttributeExpander used as part of a pipeline |
41 | * or is it being used standalone as an utility class? |
42 | */ |
43 | public function __construct( TokenTransformManager $manager, array $options ) { |
44 | parent::__construct( $manager, $options ); |
45 | $this->tokenizer = new PegTokenizer( $manager->getEnv() ); |
46 | } |
47 | |
48 | private static function nlTkIndex( |
49 | bool $nlTkOkay, array $tokens, bool $atTopLevel |
50 | ): int { |
51 | // Moving this check here since it makes the |
52 | // callsite cleaner and simpler. |
53 | if ( $nlTkOkay ) { |
54 | return -1; |
55 | } |
56 | |
57 | // Check if we have a newline token in the attribute key/value token stream. |
58 | // However, newlines are acceptable inside a <*include*>..</*include*> directive |
59 | // since they are stripped out. |
60 | // |
61 | // let includeRE = !atTopLevel ? |
62 | // /(?:^|\s)mw:Includes\/NoInclude(\/.*)?(?:\s|$)/ : |
63 | // /(?:^|\s)mw:Includes\/(?:Only)?Include(?:Only)?(\/.*)?(?:\s|$)/; |
64 | // |
65 | // SSS FIXME: We cannot support this usage for <*include*> directives currently |
66 | // since they don't go through template encapsulation and don't have a data-mw |
67 | // format with "wt" and "transclusion" parts that we can use to just track bits |
68 | // of wikitext that don't have a DOM representation. |
69 | // |
70 | // So, for now, we just suppress all newlines contained within these directives. |
71 | $includeRE = '#(?:^|\s)mw:Includes/(?:No|Only)?Include(?:Only)?(/.*)?(?:\s|$)#D'; |
72 | $inInclude = false; |
73 | foreach ( $tokens as $i => $t ) { |
74 | if ( $t instanceof SelfclosingTagTk ) { |
75 | $type = $t->getAttributeV( 'typeof' ); |
76 | $typeMatch = []; |
77 | if ( $type && preg_match( $includeRE, $type, $typeMatch, PREG_UNMATCHED_AS_NULL ) ) { |
78 | $inInclude = !str_ends_with( $typeMatch[1] ?? '', '/End' ); |
79 | } |
80 | } elseif ( !$inInclude && $t instanceof NlTk ) { |
81 | // newline token outside <*include*> |
82 | return $i; |
83 | } |
84 | } |
85 | |
86 | return -1; |
87 | } |
88 | |
89 | private static function splitTokens( |
90 | Frame $frame, Token $token, int $nlTkPos, array $tokens, bool $wrapTemplates |
91 | ): array { |
92 | $preNLBuf = []; |
93 | $postNLBuf = null; |
94 | $startMeta = null; |
95 | $metaTokens = null; |
96 | |
97 | // Split the token array around the first newline token. |
98 | $startMetaIndex = null; |
99 | foreach ( $tokens as $i => $t ) { |
100 | if ( $i === $nlTkPos ) { |
101 | // split here! |
102 | $postNLBuf = array_slice( $tokens, $i ); |
103 | break; |
104 | } else { |
105 | if ( $wrapTemplates && $t instanceof SelfclosingTagTk ) { |
106 | $type = $t->getAttributeV( 'typeof' ); |
107 | // We are interested in the last start meta tag. |
108 | // Everything before it is assumed to be closed. |
109 | $typeMatch = []; |
110 | if ( $type && |
111 | preg_match( self::META_TYPE_MATCHER, $type, $typeMatch ) && |
112 | !str_ends_with( $typeMatch[1], '/End' ) |
113 | ) { |
114 | $startMeta = $t; |
115 | $startMetaIndex = $i; |
116 | } |
117 | } |
118 | |
119 | // Use $i to make code robust if $tokens were not continugous |
120 | $preNLBuf[$i] = $t; |
121 | } |
122 | } |
123 | |
124 | // Clear $startMeta from $preNLBuf - setting to '' is sufficient. |
125 | if ( $startMeta ) { |
126 | $preNLBuf[$startMetaIndex] = ''; |
127 | } |
128 | |
129 | // We split the token into pieces. |
130 | // Since we no longer know where this token now ends tsr-wise, |
131 | // set tsr->end to null |
132 | $token->dataParsoid->tsr->end = null; |
133 | |
134 | if ( $startMeta ) { |
135 | // Support template wrapping with the following steps: |
136 | // - Hoist the transclusion start-meta from the first line |
137 | // to before the token. |
138 | // - Update the start-meta tsr to that of the token. |
139 | // - Record the wikitext between the token and the transclusion |
140 | // as an unwrappedWT data-parsoid attribute of the start-meta. |
141 | $dp = $startMeta->dataParsoid; |
142 | $dp->unwrappedWT = substr( $frame->getSrcText(), $token->dataParsoid->tsr->start, |
143 | $dp->tsr->start - $token->dataParsoid->tsr->start ); |
144 | |
145 | // unwrappedWT will be added to the data-mw.parts array which makes |
146 | // this a multi-template-content-block. |
147 | // Record the first wikitext node of this block (required by html->wt serialization) |
148 | |
149 | // FIXME spec-compliant values would be upper-case, this is just a workaround |
150 | // for current PHP DOM implementation and could be removed in the future |
151 | $tokenName = mb_strtoupper( $token->getName() ); |
152 | |
153 | $dp->firstWikitextNode = isset( $token->dataParsoid->stx ) ? |
154 | $tokenName . '_' . $token->dataParsoid->stx : $tokenName; |
155 | |
156 | // Update tsr->start only. Unless the end-meta token is moved as well, |
157 | // updating tsr->end can introduce bugs in cases like: |
158 | // |
159 | // {| |
160 | // |{{singlechart|Australia|93|artist=Madonna|album=Girls Gone Wild}}|x |
161 | // |} |
162 | // |
163 | // which can then cause dirty diffs (the "|" before the x gets dropped). |
164 | $dp->tsr->start = $token->dataParsoid->tsr->start; |
165 | $metaTokens = [ $startMeta ]; |
166 | |
167 | return [ 'metaTokens' => $metaTokens, 'preNLBuf' => $preNLBuf, 'postNLBuf' => $postNLBuf ]; |
168 | } else { |
169 | return [ 'metaTokens' => [], 'preNLBuf' => $tokens, 'postNLBuf' => [] ]; |
170 | } |
171 | } |
172 | |
173 | /** |
174 | * This helper method strips all meta tags introduced by |
175 | * transclusions, etc. and returns the content. |
176 | * |
177 | * @param Env $env |
178 | * @param array $tokens |
179 | * @param bool $wrapTemplates |
180 | * @return array |
181 | */ |
182 | private static function stripMetaTags( |
183 | Env $env, array $tokens, bool $wrapTemplates |
184 | ): array { |
185 | $buf = []; |
186 | $hasGeneratedContent = false; |
187 | $annotationType = []; |
188 | |
189 | foreach ( $tokens as $t ) { |
190 | if ( $t instanceof TagTk || $t instanceof SelfclosingTagTk ) { |
191 | // Take advantage of this iteration of `tokens` to seek out |
192 | // document fragments. They're an indication that an attribute |
193 | // value wasn't present as literal text in the input and the |
194 | // token should be annotated with "mw:ExpandedAttrs". |
195 | if ( TokenUtils::hasDOMFragmentType( $t ) ) { |
196 | $hasGeneratedContent = true; |
197 | } |
198 | |
199 | if ( $wrapTemplates ) { |
200 | // Strip all meta tags. |
201 | $type = $t->getAttributeV( 'typeof' ); |
202 | $typeMatch = []; |
203 | if ( $type && preg_match( self::META_TYPE_MATCHER, $type, $typeMatch ) ) { |
204 | if ( !str_ends_with( $typeMatch[1], '/End' ) ) { |
205 | $hasGeneratedContent = true; |
206 | } |
207 | $groups = []; |
208 | if ( preg_match( WTUtils::ANNOTATION_META_TYPE_REGEXP, $type, $groups ) ) { |
209 | $annotationType[] = $groups[1]; |
210 | } |
211 | } else { |
212 | $buf[] = $t; |
213 | continue; |
214 | } |
215 | } |
216 | |
217 | if ( $t->getName() !== 'meta' ) { |
218 | // Dont strip token if it is not a meta-tag |
219 | $buf[] = $t; |
220 | } |
221 | } else { |
222 | $buf[] = $t; |
223 | } |
224 | } |
225 | |
226 | return [ |
227 | 'hasGeneratedContent' => $hasGeneratedContent, |
228 | 'annotationType' => $annotationType, |
229 | 'value' => $buf |
230 | ]; |
231 | } |
232 | |
233 | /** |
234 | * @param mixed $a |
235 | * @return mixed |
236 | */ |
237 | private static function tplToksToString( $a ) { |
238 | if ( !is_array( $a ) ) { |
239 | return $a; |
240 | } |
241 | $ret = []; |
242 | foreach ( $a as $t ) { |
243 | $ret[] = TokenUtils::isTemplateToken( $t ) ? $t->dataParsoid->src : $t; |
244 | } |
245 | return $ret; |
246 | } |
247 | |
248 | /** |
249 | * Callback for attribute expansion in AttributeTransformManager |
250 | * @param Token $token |
251 | * @param KV[] $expandedAttrs |
252 | * @return TokenHandlerResult |
253 | */ |
254 | private function buildExpandedAttrs( Token $token, array $expandedAttrs ) { |
255 | // If we're not in a template, we'll be doing template wrapping in dom |
256 | // post-processing (same conditional there), so take care of meta markers |
257 | // found while processing tokens. |
258 | $wrapTemplates = !$this->options['inTemplate']; |
259 | $env = $this->manager->getEnv(); |
260 | $metaTokens = []; |
261 | $postNLToks = []; |
262 | $tmpDataMW = null; |
263 | $oldAttrs = $token->attribs; |
264 | // Build newAttrs lazily (on-demand) to avoid creating |
265 | // objects in the common case where nothing of significance |
266 | // happens in this code. |
267 | $newAttrs = null; |
268 | $nlTkPos = -1; |
269 | $nlTkOkay = TokenUtils::isHTMLTag( $token ) || !TokenUtils::isTableTag( $token ); |
270 | $annotationTypes = []; |
271 | |
272 | // Identify attributes that were generated in full or in part using templates |
273 | foreach ( $oldAttrs as $i => $oldA ) { |
274 | $expandedA = $expandedAttrs[$i]; |
275 | |
276 | // Preserve the key and value source, if available. |
277 | // But, if 'oldA' wasn't cloned, expandedA will be the same as 'oldA'. |
278 | if ( $oldA !== $expandedA ) { |
279 | $expandedA->ksrc = $oldA->ksrc; |
280 | $expandedA->vsrc = $oldA->vsrc; |
281 | $expandedA->srcOffsets = $oldA->srcOffsets; |
282 | } |
283 | |
284 | // Deal with two template-expansion scenarios for the attribute key (not value) |
285 | // |
286 | // 1. We have a template that generates multiple attributes of this token |
287 | // as well as content after the token. |
288 | // Ex: infobox templates from aircraft, ship, and other pages |
289 | // See enwiki:Boeing_757 |
290 | // |
291 | // - Split the expanded tokens into multiple lines. |
292 | // - Expanded attributes associated with the token are retained in the |
293 | // first line before a NlTk. |
294 | // - Content tokens after the NlTk are moved to subsequent lines. |
295 | // - The meta tags are hoisted before the original token to make sure |
296 | // that the entire token and following content is encapsulated as a unit. |
297 | // |
298 | // 2. We have a template that only generates multiple attributes of this |
299 | // token. In that case, we strip all template meta tags from the expanded |
300 | // tokens and assign it a mw:ExpandedAttrs type with orig/expanded |
301 | // values in data-mw. |
302 | // |
303 | // Reparse-KV-string scenario with templated attributes: |
304 | // ----------------------------------------------------- |
305 | // In either scenario above, we need additional special handling if the |
306 | // template generates one or more k=v style strings: |
307 | // <div {{1x|1=style='color:red''}}></div> |
308 | // <div {{1x|1=style='color:red' title='boo'}}></div> |
309 | // |
310 | // Real use case: Template {{ligne grise}} on frwp. |
311 | // |
312 | // To support this, we utilize the following hack. If we got a string of the |
313 | // form "k=v" and our orig-v was "", we convert the token array to a string |
314 | // and retokenize it to extract one or more attributes. |
315 | // |
316 | // But, we won't support scenarios like this: |
317 | // {| title={{1x|1='name' style='color:red;'\n|-\n|foo}}\n|} |
318 | // Here, part of one attribute and additional complete attribute strings |
319 | // need reparsing, and that isn't a use case that is worth more complexity here. |
320 | // |
321 | // FIXME: |
322 | // ------ |
323 | // 1. It is not possible for multiple instances of scenario 1 to be triggered |
324 | // for the same token. So, I am not bothering trying to test and deal with it. |
325 | // |
326 | // 2. We trigger the Reparse-KV-string scenario only for attribute keys, |
327 | // since it isn't possible for attribute values to require this reparsing. |
328 | // However, it is possible to come up with scenarios where a template |
329 | // returns the value for one attribute and additional k=v strings for newer |
330 | // attributes. We don't support that scenario, but don't even test for it. |
331 | // |
332 | // Reparse-KV-string scenario with non-string attributes: |
333 | // ------------------------------------------------------ |
334 | // This is only going to be the case with table wikitext that has special syntax |
335 | // for attribute strings. |
336 | // |
337 | // {| <div>a</div> style='border:1px solid black;' |
338 | // |- <div>b</div> style='border:1px dotted blue;' |
339 | // | <div>c</div> style='color:red;' |
340 | // |} |
341 | // |
342 | // In wikitext like the above, the PEG tokenizer doesn't recognize these as |
343 | // valid attributes (the templated attribute scenario is a special case) and |
344 | // orig-v will be "". So, the same strategy as above is applied here as well. |
345 | |
346 | $expandedK = $origK = $expandedA->k; |
347 | $expandedV = $origV = $expandedA->v; |
348 | $updatedK = null; |
349 | $updatedV = null; |
350 | $reparsedKV = false; |
351 | $keyUsesMixedAttrContentTpl = false; |
352 | $valUsesMixedAttrContentTpl = false; |
353 | |
354 | if ( $expandedK ) { |
355 | // FIXME: We should get rid of these array/string/non-string checks |
356 | // and probably use appropriately-named flags to convey type information. |
357 | if ( is_array( $oldA->k ) ) { |
358 | if ( !is_array( $expandedK ) ) { |
359 | throw new UnreachableException( "expandedK: expected array. Found: " . |
360 | PHPUtils::jsonEncode( $expandedK ) ); |
361 | } |
362 | |
363 | $nlTkPos = self::nlTkIndex( $nlTkOkay, $expandedK, $wrapTemplates ); |
364 | if ( $nlTkPos !== -1 ) { |
365 | // Scenario 1 from the documentation comment above. |
366 | $keyUsesMixedAttrContentTpl = true; |
367 | $updatedK = self::splitTokens( |
368 | $this->manager->getFrame(), $token, $nlTkPos, $expandedK, $wrapTemplates |
369 | ); |
370 | $expandedK = $updatedK['preNLBuf']; |
371 | $postNLToks = $updatedK['postNLBuf']; |
372 | $metaTokens = $updatedK['metaTokens']; |
373 | // We split up this attribute's key into pieces. |
374 | if ( $expandedA->srcOffsets->key ) { |
375 | $expandedA->srcOffsets->key->end = null; |
376 | } |
377 | } else { |
378 | // Maybe scenario 2 from the documentation comment above. |
379 | $updatedK = self::stripMetaTags( $env, $expandedK, $wrapTemplates ); |
380 | PHPUtils::pushArray( $annotationTypes, $updatedK['annotationType'] ); |
381 | $expandedK = $updatedK['value']; |
382 | } |
383 | |
384 | $expandedA->k = $expandedK; |
385 | |
386 | // Check if we need to deal with the Reparse-KV-string scenario. |
387 | // (See documentation comment above.) |
388 | // |
389 | // Don't incorrectly reparse the kv string for parser functions. |
390 | // Ex: "#ifexpr" parser function expects the "=" equality operator. |
391 | // We encounter those in "standalone" mode (used to expand |
392 | // templated template targets). |
393 | if ( $expandedA->v === '' && empty( $this->options['standalone'] ) ) { |
394 | // Extract a parsable string from the token array. |
395 | // Trim whitespace to ensure tokenizer isn't tripped up |
396 | // by the presence of unnecessary whitespace. |
397 | $kStr = trim( TokenUtils::tokensToString( $expandedK, false, [ |
398 | // These tokens haven't been expanded to DOM yet |
399 | // so unpacking them here is justifiable |
400 | 'unpackDOMFragments' => true, |
401 | 'env' => $env |
402 | ] ) ); |
403 | $rule = $nlTkOkay ? 'generic_newline_attributes' : 'table_attributes'; |
404 | $kvs = str_contains( $kStr, '=' ) ? |
405 | $this->tokenizer->tokenizeAs( $kStr, $rule, /* sol */true ) : null; |
406 | if ( $kvs ) { |
407 | // At this point, templates should have been expanded. |
408 | // Returning a template token here probably means that |
409 | // when we just converted to string and reparsed, we failed |
410 | // to expand the template. This can be particularly bad |
411 | // when we make iterative calls to expand template names. |
412 | // So, give up template expansion and convert them to strings. |
413 | foreach ( $kvs as $kv ) { |
414 | $kv->k = self::tplToksToString( $kv->k ); |
415 | $kv->v = self::tplToksToString( $kv->v ); |
416 | |
417 | // $kStr is based on running tokensToString on $expandedK. |
418 | // So, $kStr might have dropped HTML tags, etc. Given that, |
419 | // we can no longer reliably compute offsets for these |
420 | // new key/value pairs. We could try to be more smart here, |
421 | // but it is not worth the complexity. |
422 | $kv->srcOffsets = null; |
423 | } |
424 | // SSS FIXME: Collect all keys here, not just the first key |
425 | // i.e. in a string like {{1x|1=id='v1' title='foo' style='..'}} |
426 | // that string is setting attributes for [id, title, style], not just id. |
427 | // |
428 | // That requires the ability for the data-mw.attribs[i].txt to be an array. |
429 | // However, the spec at [[mw:Specs/HTML#Generated_attributes_of_HTML_tags]] |
430 | // says: |
431 | // |
432 | // "This spec also assumes that a template can only |
433 | // generate one attribute rather than multiple attributes." |
434 | // |
435 | // So, revision of the spec is another FIXME at which point this code can |
436 | // be updated to reflect the revised spec. |
437 | $expandedK = $kvs[0]->k; |
438 | $reparsedKV = true; |
439 | if ( !$newAttrs ) { |
440 | $newAttrs = $i === 0 ? [] : array_slice( $expandedAttrs, 0, $i ); |
441 | } |
442 | PHPUtils::pushArray( $newAttrs, $kvs ); |
443 | } |
444 | } |
445 | } |
446 | |
447 | // We have a potentially expanded value. |
448 | // Check if the value came from a template/extension expansion. |
449 | if ( is_string( $expandedK ) && !str_starts_with( $expandedK, 'mw:' ) |
450 | && is_array( $oldA->v ) |
451 | ) { |
452 | $nlTkPos = self::nlTkIndex( $nlTkOkay, $expandedV, $wrapTemplates ); |
453 | if ( $nlTkPos !== -1 ) { |
454 | // Scenario 1 from the documentation comment above. |
455 | $valUsesMixedAttrContentTpl = true; |
456 | $updatedV = self::splitTokens( |
457 | $this->manager->getFrame(), $token, $nlTkPos, |
458 | $expandedV, $wrapTemplates |
459 | ); |
460 | $expandedV = $updatedV['preNLBuf']; |
461 | $postNLToks = $updatedV['postNLBuf']; |
462 | $metaTokens = $updatedV['metaTokens']; |
463 | // We split up this attribute's value into pieces. |
464 | if ( $expandedA->srcOffsets->value ) { |
465 | $expandedA->srcOffsets->value->end = null; |
466 | } |
467 | } else { |
468 | // Maybe scenario 2 from the documentation comment above. |
469 | $updatedV = self::stripMetaTags( $env, $expandedV, $wrapTemplates ); |
470 | PHPUtils::pushArray( $annotationTypes, $updatedV['annotationType'] ); |
471 | $expandedV = $updatedV['value']; |
472 | } |
473 | $expandedA->v = $expandedV; |
474 | } |
475 | |
476 | // Update data-mw to account for templated attributes. |
477 | // For editability, set HTML property. |
478 | if ( !empty( $updatedK['hasGeneratedContent'] ) || |
479 | !empty( $updatedV['hasGeneratedContent'] ) || |
480 | ( $reparsedKV && count( $metaTokens ) > 0 ) |
481 | ) { |
482 | $key = TokenUtils::tokensToString( $expandedK ); |
483 | if ( !$tmpDataMW ) { |
484 | $tmpDataMW = []; |
485 | } |
486 | |
487 | // For the $(key|val)UsesMixedAttrContentTpl checks below, |
488 | // it is incorrect to assign the HTML for the original wikitext |
489 | // string since the content part will get duplicated in both |
490 | // this data-mw and in the actual body of the table (for example) |
491 | // and cause bugs like T249740. |
492 | // |
493 | // So, in this case, we assign just the key/value part of the HTML |
494 | // ($expandedA->k or $expandedA->v), but we mark it uneditable |
495 | // because we cannot really edit just the key/value of the attribute |
496 | // on its own because it is only a part of the template's output. |
497 | if ( $reparsedKV ) { |
498 | // If we encountered a reparse-KV-string scenario, |
499 | // we set the value's HTML to [] since we can edit |
500 | // the transclusion either via the key's HTML or the |
501 | // value's HTML, but not both. |
502 | $keyHTML = $keyUsesMixedAttrContentTpl ? $expandedA->k : $origK; |
503 | $valHTML = []; |
504 | } else { |
505 | Assert::invariant( !$keyUsesMixedAttrContentTpl, |
506 | "If reparseKV was false, and we had a mixed attr-content template, " . |
507 | "we should have landed in the valUsesMixedAttrContentTpl codepath." ); |
508 | $keyHTML = empty( $updatedK['hasGeneratedContent'] ) ? null : $origK; |
509 | $valHTML = $valUsesMixedAttrContentTpl ? $expandedA->v : $origV; |
510 | } |
511 | |
512 | // FIXME: Ideally we would have called them ktext, khtml, vhtml |
513 | // since in the serialized data-mw, the "k" and "v" key strings are dropped. |
514 | // [{ "ktxt":..., "khtml":... }, { "vhtml":... }] |
515 | // is clearer and less confusing than |
516 | // [{ "txt":..., "html":... }, { "html":... }] |
517 | $tmpDataMW[$key] = [ |
518 | 'k' => [ 'txt' => $key, 'srcOffsets' => $expandedA->srcOffsets->key ?? null ], |
519 | // FIXME: Why is 'txt' missing? Why are we not checking for [] ? |
520 | 'v' => [ 'html' => $valHTML, 'srcOffsets' => $expandedA->srcOffsets->value ?? null ] |
521 | ]; |
522 | |
523 | if ( $keyHTML !== null ) { |
524 | $tmpDataMW[$key]['k']['html'] = $keyHTML; |
525 | } |
526 | if ( $keyUsesMixedAttrContentTpl ) { |
527 | $tmpDataMW[$key]['k']['uneditable'] = true; |
528 | } |
529 | if ( $valUsesMixedAttrContentTpl ) { |
530 | $tmpDataMW[$key]['v']['uneditable'] = true; |
531 | } |
532 | } |
533 | } |
534 | |
535 | // Update newAttrs |
536 | if ( $newAttrs && !$reparsedKV ) { |
537 | $newAttrs[] = $expandedA; |
538 | } |
539 | } |
540 | |
541 | $token->attribs = $newAttrs ?? $expandedAttrs; |
542 | |
543 | // If the token already has an about, it already has transclusion/extension |
544 | // wrapping. No need to record information about templated attributes in addition. |
545 | // |
546 | // FIXME: If there is a real use case for extension attributes getting templated, |
547 | // this check can be relaxed to allow that. |
548 | // https://gerrit.wikimedia.org/r/#/c/65575 has some reference code that can be used then. |
549 | |
550 | if ( !$token->getAttributeV( 'about' ) && $tmpDataMW && count( $tmpDataMW ) > 0 ) { |
551 | // Flatten k-v pairs. |
552 | $vals = []; |
553 | foreach ( $tmpDataMW as $obj ) { |
554 | $vals[] = $obj['k']; |
555 | $vals[] = $obj['v']; |
556 | } |
557 | |
558 | // Clone the vals since they'll be passed to another pipeline |
559 | // for expanding, which may destructively mutate them in the process. |
560 | // |
561 | // This is a problem since subsequent handlers to the |
562 | // AttributeExpander may interact with the original tokens still |
563 | // present as attributes of `token`. |
564 | // |
565 | // For example, while treebuilding, the object holding dataParsoid |
566 | // of a token is reused as the data-parsoid attribute of the |
567 | // corresonding node. Thus, when we get to the DOM cleanup pass, |
568 | // unsetting properties changes the token as well. This was |
569 | // the issue when an "href" was expanded and then the |
570 | // ExternalLinkHandler tried to call tokensToString on it, |
571 | // resulting in a transcluded entity missing its src (which, by the way, |
572 | // had already been clobered by WrapTemplates, similar to T214241). |
573 | // |
574 | // The general principle here being, don't share tokens between |
575 | // pipelines. |
576 | $vals = Utils::clone( $vals ); |
577 | |
578 | // Expand all token arrays to DOM. |
579 | $eVals = PipelineUtils::expandAttrValuesToDOM( |
580 | $this->env, $this->manager->getFrame(), $vals, |
581 | $this->options['expandTemplates'], |
582 | $this->options['inTemplate'] |
583 | ); |
584 | |
585 | // Rebuild flattened k-v pairs. |
586 | $expAttrs = []; |
587 | for ( $j = 0; $j < count( $eVals ); $j += 2 ) { |
588 | $expAttrs[] = [ $eVals[$j], $eVals[$j + 1] ]; |
589 | } |
590 | |
591 | if ( $token->getName() === 'template' ) { |
592 | // Don't add Parsoid about, typeof, data-mw attributes here since |
593 | // we won't be able to distinguish between Parsoid-added attributes |
594 | // and actual template attributes in cases like: |
595 | // {{some-tpl|about=#mwt1|typeof=mw:Transclusion}} |
596 | // In both cases, we will encounter a template token that looks like: |
597 | // { ... "attribs":[{"k":"about","v":"#mwt1"},{"k":"typeof","v":"mw:Transclusion"}] .. } |
598 | // So, record these in the tmp attribute for the template hander |
599 | // to retrieve and process. |
600 | $token->dataParsoid->getTemp()->templatedAttribs = $expAttrs; |
601 | } else { |
602 | // Mark token as having expanded attrs. |
603 | $token->addAttribute( 'about', $this->env->newAboutId() ); |
604 | $token->addSpaceSeparatedAttribute( 'typeof', 'mw:ExpandedAttrs' ); |
605 | foreach ( $annotationTypes as $annotationType ) { |
606 | $token->addSpaceSeparatedAttribute( 'typeof', 'mw:Annotation/' . $annotationType ); |
607 | } |
608 | $token->addAttribute( 'data-mw', PHPUtils::jsonEncode( [ 'attribs' => $expAttrs ] ) ); |
609 | } |
610 | } |
611 | |
612 | return new TokenHandlerResult( |
613 | array_merge( $metaTokens, [ $token ], $postNLToks ) |
614 | ); |
615 | } |
616 | |
617 | /** |
618 | * Processes any attribute keys and values that are not simple strings. |
619 | * (Ex: Templated styles) |
620 | * |
621 | * @param Token $token Token whose attrs being expanded. |
622 | * @return TokenHandlerResult |
623 | */ |
624 | public function processComplexAttributes( Token $token ): TokenHandlerResult { |
625 | $atm = new AttributeTransformManager( $this->manager->getFrame(), [ |
626 | 'expandTemplates' => $this->options['expandTemplates'], |
627 | 'inTemplate' => $this->options['inTemplate'] |
628 | ] ); |
629 | return $this->buildExpandedAttrs( $token, $atm->process( $token->attribs ) ); |
630 | } |
631 | |
632 | /** |
633 | * Token handler. |
634 | * |
635 | * For tokens that might have complex attributes, this handler |
636 | * processes / expands them. |
637 | * (Ex: Templated styles) |
638 | * |
639 | * @param Token|string $token Token whose attrs being expanded. |
640 | * @return TokenHandlerResult|null |
641 | */ |
642 | public function onAny( $token ): ?TokenHandlerResult { |
643 | if ( |
644 | !( $token instanceof TagTk || $token instanceof SelfclosingTagTk ) || |
645 | !count( $token->attribs ) |
646 | ) { |
647 | return null; |
648 | } |
649 | |
650 | $name = $token->getName(); |
651 | $property = $token->getAttributeV( 'property' ) ?? ''; |
652 | $typeOf = $token->getAttributeV( 'typeof' ) ?? ''; |
653 | |
654 | if ( |
655 | // Do not process dom-fragment tokens: a separate handler deals with them. |
656 | $name === 'mw:dom-fragment-token' || |
657 | ( |
658 | $name === 'meta' && |
659 | ( |
660 | // Parsoid generated metas don't need expansion |
661 | preg_match( '/mw:(Placeholder|Transclusion|Param|Includes)/', $typeOf ) || |
662 | // The TemplateHandler runs before the AttributeExpander and |
663 | // magic words masquerading as templates may themselves be |
664 | // templated (as in templated template names). |
665 | // See TemplateHandler::processSpecialMagicWord() |
666 | // So, we may see page properties that have already been |
667 | // expanded and annotated with mw:ExpandedAttrs. We return |
668 | // early to avoid the assertion below, at the expense of |
669 | // perhaps not catching other cases where tokens are passed |
670 | // through here doubly by mistake. |
671 | ( preg_match( '/mw:(PageProp)/', $property ) && |
672 | str_contains( $typeOf, 'mw:ExpandedAttrs' ) ) |
673 | ) |
674 | ) |
675 | ) { |
676 | return null; |
677 | } |
678 | |
679 | Assert::invariant( |
680 | !str_contains( $typeOf, 'mw:ExpandedAttrs' ), |
681 | "Expanding an already expanded token, that's a no-no." |
682 | ); |
683 | |
684 | return $this->processComplexAttributes( $token ); |
685 | } |
686 | } |