Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
0.00% |
0 / 250 |
|
0.00% |
0 / 9 |
CRAP | |
0.00% |
0 / 1 |
AttributeExpander | |
0.00% |
0 / 250 |
|
0.00% |
0 / 9 |
8010 | |
0.00% |
0 / 1 |
__construct | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
2 | |||
nlTkIndex | |
0.00% |
0 / 13 |
|
0.00% |
0 / 1 |
72 | |||
splitTokens | |
0.00% |
0 / 32 |
|
0.00% |
0 / 1 |
132 | |||
stripMetaTags | |
0.00% |
0 / 26 |
|
0.00% |
0 / 1 |
132 | |||
tplToksToString | |
0.00% |
0 / 6 |
|
0.00% |
0 / 1 |
20 | |||
buildExpandedAttrs | |
0.00% |
0 / 129 |
|
0.00% |
0 / 1 |
1892 | |||
processComplexAttributes | |
0.00% |
0 / 5 |
|
0.00% |
0 / 1 |
2 | |||
expandFirstAttribute | |
0.00% |
0 / 9 |
|
0.00% |
0 / 1 |
2 | |||
onAny | |
0.00% |
0 / 28 |
|
0.00% |
0 / 1 |
90 |
1 | <?php |
2 | declare( strict_types = 1 ); |
3 | |
4 | namespace Wikimedia\Parsoid\Wt2Html\TT; |
5 | |
6 | use Wikimedia\Assert\Assert; |
7 | use Wikimedia\Assert\UnreachableException; |
8 | use Wikimedia\Parsoid\Config\Env; |
9 | use Wikimedia\Parsoid\NodeData\DataMw; |
10 | use Wikimedia\Parsoid\NodeData\DataMwAttrib; |
11 | use Wikimedia\Parsoid\Tokens\KV; |
12 | use Wikimedia\Parsoid\Tokens\NlTk; |
13 | use Wikimedia\Parsoid\Tokens\SelfclosingTagTk; |
14 | use Wikimedia\Parsoid\Tokens\TagTk; |
15 | use Wikimedia\Parsoid\Tokens\Token; |
16 | use Wikimedia\Parsoid\Utils\PHPUtils; |
17 | use Wikimedia\Parsoid\Utils\PipelineUtils; |
18 | use Wikimedia\Parsoid\Utils\TokenUtils; |
19 | use Wikimedia\Parsoid\Utils\Utils; |
20 | use Wikimedia\Parsoid\Utils\WTUtils; |
21 | use Wikimedia\Parsoid\Wt2Html\Frame; |
22 | use Wikimedia\Parsoid\Wt2Html\PegTokenizer; |
23 | use Wikimedia\Parsoid\Wt2Html\TokenTransformManager; |
24 | |
25 | /** |
26 | * Generic attribute expansion handler. |
27 | */ |
28 | class AttributeExpander extends TokenHandler { |
29 | private const META_TYPE_MATCHER = '#(mw:(LanguageVariant|Transclusion|Param|Includes|Annotation/)(.*)$)#D'; |
30 | |
31 | /** |
32 | * Used for re-tokenizing attribute strings that need to be re-expanded |
33 | * @var PegTokenizer |
34 | */ |
35 | private $tokenizer; |
36 | |
37 | /** |
38 | * @param TokenTransformManager $manager |
39 | * @param array $options |
40 | * - bool inTemplate Is this being invoked while processing a template? |
41 | * - bool expandTemplates Should we expand templates encountered here? |
42 | * - bool standalone Is this AttributeExpander used as part of a pipeline |
43 | * or is it being used standalone as an utility class? |
44 | */ |
45 | public function __construct( TokenTransformManager $manager, array $options ) { |
46 | parent::__construct( $manager, $options ); |
47 | $this->tokenizer = new PegTokenizer( $manager->getEnv() ); |
48 | } |
49 | |
50 | private static function nlTkIndex( |
51 | bool $nlTkOkay, array $tokens, bool $atTopLevel |
52 | ): int { |
53 | // Moving this check here since it makes the |
54 | // callsite cleaner and simpler. |
55 | if ( $nlTkOkay ) { |
56 | return -1; |
57 | } |
58 | |
59 | // Check if we have a newline token in the attribute key/value token stream. |
60 | // However, newlines are acceptable inside a <*include*>..</*include*> directive |
61 | // since they are stripped out. |
62 | // |
63 | // let includeRE = !atTopLevel ? |
64 | // /(?:^|\s)mw:Includes\/NoInclude(\/.*)?(?:\s|$)/ : |
65 | // /(?:^|\s)mw:Includes\/(?:Only)?Include(?:Only)?(\/.*)?(?:\s|$)/; |
66 | // |
67 | // SSS FIXME: We cannot support this usage for <*include*> directives currently |
68 | // since they don't go through template encapsulation and don't have a data-mw |
69 | // format with "wt" and "transclusion" parts that we can use to just track bits |
70 | // of wikitext that don't have a DOM representation. |
71 | // |
72 | // So, for now, we just suppress all newlines contained within these directives. |
73 | $includeRE = '#(?:^|\s)mw:Includes/(?:No|Only)?Include(?:Only)?(/.*)?(?:\s|$)#D'; |
74 | $inInclude = false; |
75 | foreach ( $tokens as $i => $t ) { |
76 | if ( $t instanceof SelfclosingTagTk ) { |
77 | $type = $t->getAttributeV( 'typeof' ); |
78 | $typeMatch = []; |
79 | if ( $type && preg_match( $includeRE, $type, $typeMatch, PREG_UNMATCHED_AS_NULL ) ) { |
80 | $inInclude = !str_ends_with( $typeMatch[1] ?? '', '/End' ); |
81 | } |
82 | } elseif ( !$inInclude && $t instanceof NlTk ) { |
83 | // newline token outside <*include*> |
84 | return $i; |
85 | } |
86 | } |
87 | |
88 | return -1; |
89 | } |
90 | |
91 | private static function splitTokens( |
92 | Frame $frame, Token $token, int $nlTkPos, array $tokens, bool $wrapTemplates |
93 | ): array { |
94 | $preNLBuf = []; |
95 | $postNLBuf = null; |
96 | $startMeta = null; |
97 | $metaTokens = null; |
98 | |
99 | // Split the token array around the first newline token. |
100 | $startMetaIndex = null; |
101 | foreach ( $tokens as $i => $t ) { |
102 | if ( $i === $nlTkPos ) { |
103 | // split here! |
104 | $postNLBuf = array_slice( $tokens, $i ); |
105 | break; |
106 | } else { |
107 | if ( $wrapTemplates && $t instanceof SelfclosingTagTk ) { |
108 | $type = $t->getAttributeV( 'typeof' ); |
109 | // We are interested in the last start meta tag. |
110 | // Everything before it is assumed to be closed. |
111 | $typeMatch = []; |
112 | if ( $type && |
113 | preg_match( self::META_TYPE_MATCHER, $type, $typeMatch ) && |
114 | !str_ends_with( $typeMatch[1], '/End' ) |
115 | ) { |
116 | $startMeta = $t; |
117 | $startMetaIndex = $i; |
118 | } |
119 | } |
120 | |
121 | // Use $i to make code robust if $tokens were not continugous |
122 | $preNLBuf[$i] = $t; |
123 | } |
124 | } |
125 | |
126 | // Clear $startMeta from $preNLBuf - setting to '' is sufficient. |
127 | if ( $startMeta ) { |
128 | $preNLBuf[$startMetaIndex] = ''; |
129 | } |
130 | |
131 | // We split the token into pieces. |
132 | // Since we no longer know where this token now ends tsr-wise, |
133 | // set tsr->end to null |
134 | $token->dataParsoid->tsr->end = null; |
135 | |
136 | if ( $startMeta ) { |
137 | // Support template wrapping with the following steps: |
138 | // - Hoist the transclusion start-meta from the first line |
139 | // to before the token. |
140 | // - Update the start-meta tsr to that of the token. |
141 | // - Record the wikitext between the token and the transclusion |
142 | // as an unwrappedWT data-parsoid attribute of the start-meta. |
143 | $dp = $startMeta->dataParsoid; |
144 | $dp->unwrappedWT = substr( $frame->getSrcText(), $token->dataParsoid->tsr->start, |
145 | $dp->tsr->start - $token->dataParsoid->tsr->start ); |
146 | |
147 | // unwrappedWT will be added to the data-mw.parts array which makes |
148 | // this a multi-template-content-block. |
149 | // Record the first wikitext node of this block (required by html->wt serialization) |
150 | |
151 | // FIXME spec-compliant values would be upper-case, this is just a workaround |
152 | // for current PHP DOM implementation and could be removed in the future |
153 | $tokenName = mb_strtoupper( $token->getName() ); |
154 | |
155 | $dp->firstWikitextNode = isset( $token->dataParsoid->stx ) ? |
156 | $tokenName . '_' . $token->dataParsoid->stx : $tokenName; |
157 | |
158 | // Update tsr->start only. Unless the end-meta token is moved as well, |
159 | // updating tsr->end can introduce bugs in cases like: |
160 | // |
161 | // {| |
162 | // |{{singlechart|Australia|93|artist=Madonna|album=Girls Gone Wild}}|x |
163 | // |} |
164 | // |
165 | // which can then cause dirty diffs (the "|" before the x gets dropped). |
166 | $dp->tsr->start = $token->dataParsoid->tsr->start; |
167 | $metaTokens = [ $startMeta ]; |
168 | |
169 | return [ 'metaTokens' => $metaTokens, 'preNLBuf' => $preNLBuf, 'postNLBuf' => $postNLBuf ]; |
170 | } else { |
171 | return [ 'metaTokens' => [], 'preNLBuf' => $tokens, 'postNLBuf' => [] ]; |
172 | } |
173 | } |
174 | |
175 | /** |
176 | * This helper method strips all meta tags introduced by |
177 | * transclusions, etc. and returns the content. |
178 | * |
179 | * @param Env $env |
180 | * @param array $tokens |
181 | * @param bool $wrapTemplates |
182 | * @return array |
183 | */ |
184 | private static function stripMetaTags( |
185 | Env $env, array $tokens, bool $wrapTemplates |
186 | ): array { |
187 | $buf = []; |
188 | $hasGeneratedContent = false; |
189 | $annotationType = []; |
190 | |
191 | foreach ( $tokens as $t ) { |
192 | if ( $t instanceof TagTk || $t instanceof SelfclosingTagTk ) { |
193 | // Take advantage of this iteration of `tokens` to seek out |
194 | // document fragments. They're an indication that an attribute |
195 | // value wasn't present as literal text in the input and the |
196 | // token should be annotated with "mw:ExpandedAttrs". |
197 | if ( TokenUtils::hasDOMFragmentType( $t ) ) { |
198 | $hasGeneratedContent = true; |
199 | } |
200 | |
201 | if ( $wrapTemplates ) { |
202 | // Strip all meta tags. |
203 | $type = $t->getAttributeV( 'typeof' ); |
204 | $typeMatch = []; |
205 | if ( $type && preg_match( self::META_TYPE_MATCHER, $type, $typeMatch ) ) { |
206 | if ( !str_ends_with( $typeMatch[1], '/End' ) ) { |
207 | $hasGeneratedContent = true; |
208 | } |
209 | $groups = []; |
210 | if ( preg_match( WTUtils::ANNOTATION_META_TYPE_REGEXP, $type, $groups ) ) { |
211 | $annotationType[] = $groups[1]; |
212 | } |
213 | } else { |
214 | $buf[] = $t; |
215 | continue; |
216 | } |
217 | } |
218 | |
219 | if ( $t->getName() !== 'meta' ) { |
220 | // Dont strip token if it is not a meta-tag |
221 | $buf[] = $t; |
222 | } |
223 | } else { |
224 | $buf[] = $t; |
225 | } |
226 | } |
227 | |
228 | return [ |
229 | 'hasGeneratedContent' => $hasGeneratedContent, |
230 | 'annotationType' => $annotationType, |
231 | 'value' => $buf |
232 | ]; |
233 | } |
234 | |
235 | /** |
236 | * @param mixed $a |
237 | * @return mixed |
238 | */ |
239 | private static function tplToksToString( $a ) { |
240 | if ( !is_array( $a ) ) { |
241 | return $a; |
242 | } |
243 | $ret = []; |
244 | foreach ( $a as $t ) { |
245 | $ret[] = TokenUtils::isTemplateToken( $t ) ? $t->dataParsoid->src : $t; |
246 | } |
247 | return $ret; |
248 | } |
249 | |
250 | /** |
251 | * Callback for attribute expansion in AttributeTransformManager |
252 | * @param Token $token |
253 | * @param KV[] $expandedAttrs |
254 | * @return TokenHandlerResult |
255 | */ |
256 | private function buildExpandedAttrs( Token $token, array $expandedAttrs ) { |
257 | // If we're not in a template, we'll be doing template wrapping in dom |
258 | // post-processing (same conditional there), so take care of meta markers |
259 | // found while processing tokens. |
260 | $wrapTemplates = !$this->options['inTemplate']; |
261 | $env = $this->manager->getEnv(); |
262 | $metaTokens = []; |
263 | $postNLToks = []; |
264 | $tmpDataMW = null; |
265 | $oldAttrs = $token->attribs; |
266 | // Build newAttrs lazily (on-demand) to avoid creating |
267 | // objects in the common case where nothing of significance |
268 | // happens in this code. |
269 | $newAttrs = null; |
270 | $nlTkPos = -1; |
271 | $nlTkOkay = TokenUtils::isHTMLTag( $token ) || !TokenUtils::isTableTag( $token ); |
272 | $annotationTypes = []; |
273 | |
274 | // Identify attributes that were generated in full or in part using templates |
275 | foreach ( $oldAttrs as $i => $oldA ) { |
276 | $expandedA = $expandedAttrs[$i]; |
277 | |
278 | // Preserve the key and value source, if available. |
279 | // But, if 'oldA' wasn't cloned, expandedA will be the same as 'oldA'. |
280 | if ( $oldA !== $expandedA ) { |
281 | $expandedA->ksrc = $oldA->ksrc; |
282 | $expandedA->vsrc = $oldA->vsrc; |
283 | $expandedA->srcOffsets = $oldA->srcOffsets; |
284 | } |
285 | |
286 | // Deal with two template-expansion scenarios for the attribute key (not value) |
287 | // |
288 | // 1. We have a template that generates multiple attributes of this token |
289 | // as well as content after the token. |
290 | // Ex: infobox templates from aircraft, ship, and other pages |
291 | // See enwiki:Boeing_757 |
292 | // |
293 | // - Split the expanded tokens into multiple lines. |
294 | // - Expanded attributes associated with the token are retained in the |
295 | // first line before a NlTk. |
296 | // - Content tokens after the NlTk are moved to subsequent lines. |
297 | // - The meta tags are hoisted before the original token to make sure |
298 | // that the entire token and following content is encapsulated as a unit. |
299 | // |
300 | // 2. We have a template that only generates multiple attributes of this |
301 | // token. In that case, we strip all template meta tags from the expanded |
302 | // tokens and assign it a mw:ExpandedAttrs type with orig/expanded |
303 | // values in data-mw. |
304 | // |
305 | // Reparse-KV-string scenario with templated attributes: |
306 | // ----------------------------------------------------- |
307 | // In either scenario above, we need additional special handling if the |
308 | // template generates one or more k=v style strings: |
309 | // <div {{1x|1=style='color:red''}}></div> |
310 | // <div {{1x|1=style='color:red' title='boo'}}></div> |
311 | // |
312 | // Real use case: Template {{ligne grise}} on frwp. |
313 | // |
314 | // To support this, we utilize the following hack. If we got a string of the |
315 | // form "k=v" and our orig-v was "", we convert the token array to a string |
316 | // and retokenize it to extract one or more attributes. |
317 | // |
318 | // But, we won't support scenarios like this: |
319 | // {| title={{1x|1='name' style='color:red;'\n|-\n|foo}}\n|} |
320 | // Here, part of one attribute and additional complete attribute strings |
321 | // need reparsing, and that isn't a use case that is worth more complexity here. |
322 | // |
323 | // FIXME: |
324 | // ------ |
325 | // 1. It is not possible for multiple instances of scenario 1 to be triggered |
326 | // for the same token. So, I am not bothering trying to test and deal with it. |
327 | // |
328 | // 2. We trigger the Reparse-KV-string scenario only for attribute keys, |
329 | // since it isn't possible for attribute values to require this reparsing. |
330 | // However, it is possible to come up with scenarios where a template |
331 | // returns the value for one attribute and additional k=v strings for newer |
332 | // attributes. We don't support that scenario, but don't even test for it. |
333 | // |
334 | // Reparse-KV-string scenario with non-string attributes: |
335 | // ------------------------------------------------------ |
336 | // This is only going to be the case with table wikitext that has special syntax |
337 | // for attribute strings. |
338 | // |
339 | // {| <div>a</div> style='border:1px solid black;' |
340 | // |- <div>b</div> style='border:1px dotted blue;' |
341 | // | <div>c</div> style='color:red;' |
342 | // |} |
343 | // |
344 | // In wikitext like the above, the PEG tokenizer doesn't recognize these as |
345 | // valid attributes (the templated attribute scenario is a special case) and |
346 | // orig-v will be "". So, the same strategy as above is applied here as well. |
347 | |
348 | $expandedK = $origK = $expandedA->k; |
349 | $expandedV = $origV = $expandedA->v; |
350 | $updatedK = null; |
351 | $updatedV = null; |
352 | $reparsedKV = false; |
353 | $keyUsesMixedAttrContentTpl = false; |
354 | $valUsesMixedAttrContentTpl = false; |
355 | |
356 | if ( $expandedK ) { |
357 | // FIXME: We should get rid of these array/string/non-string checks |
358 | // and probably use appropriately-named flags to convey type information. |
359 | if ( is_array( $oldA->k ) ) { |
360 | if ( !is_array( $expandedK ) ) { |
361 | throw new UnreachableException( "expandedK: expected array. Found: " . |
362 | PHPUtils::jsonEncode( $expandedK ) ); |
363 | } |
364 | |
365 | $nlTkPos = self::nlTkIndex( $nlTkOkay, $expandedK, $wrapTemplates ); |
366 | if ( $nlTkPos !== -1 ) { |
367 | // Scenario 1 from the documentation comment above. |
368 | $keyUsesMixedAttrContentTpl = true; |
369 | $updatedK = self::splitTokens( |
370 | $this->manager->getFrame(), $token, $nlTkPos, $expandedK, $wrapTemplates |
371 | ); |
372 | $expandedK = $updatedK['preNLBuf']; |
373 | $postNLToks = $updatedK['postNLBuf']; |
374 | $metaTokens = $updatedK['metaTokens']; |
375 | // We split up this attribute's key into pieces. |
376 | if ( $expandedA->srcOffsets->key ) { |
377 | $expandedA->srcOffsets->key->end = null; |
378 | } |
379 | } else { |
380 | // Maybe scenario 2 from the documentation comment above. |
381 | $updatedK = self::stripMetaTags( $env, $expandedK, $wrapTemplates ); |
382 | PHPUtils::pushArray( $annotationTypes, $updatedK['annotationType'] ); |
383 | $expandedK = $updatedK['value']; |
384 | } |
385 | |
386 | $expandedA->k = $expandedK; |
387 | |
388 | // Check if we need to deal with the Reparse-KV-string scenario. |
389 | // (See documentation comment above.) |
390 | // |
391 | // Don't incorrectly reparse the kv string for parser functions. |
392 | // Ex: "#ifexpr" parser function expects the "=" equality operator. |
393 | // We encounter those in "standalone" mode (used to expand |
394 | // templated template targets). |
395 | if ( $expandedA->v === '' && empty( $this->options['standalone'] ) ) { |
396 | // Extract a parsable string from the token array. |
397 | // Trim whitespace to ensure tokenizer isn't tripped up |
398 | // by the presence of unnecessary whitespace. |
399 | $kStr = trim( TokenUtils::tokensToString( $expandedK, false, [ |
400 | // These tokens haven't been expanded to DOM yet |
401 | // so unpacking them here is justifiable |
402 | 'unpackDOMFragments' => true, |
403 | 'env' => $env |
404 | ] ) ); |
405 | $rule = $nlTkOkay ? 'generic_newline_attributes' : 'table_attributes'; |
406 | $kvs = str_contains( $kStr, '=' ) ? |
407 | $this->tokenizer->tokenizeAs( $kStr, $rule, /* sol */true ) : null; |
408 | if ( $kvs ) { |
409 | // At this point, templates should have been expanded. |
410 | // Returning a template token here probably means that |
411 | // when we just converted to string and reparsed, we failed |
412 | // to expand the template. This can be particularly bad |
413 | // when we make iterative calls to expand template names. |
414 | // So, give up template expansion and convert them to strings. |
415 | foreach ( $kvs as $kv ) { |
416 | $kv->k = self::tplToksToString( $kv->k ); |
417 | $kv->v = self::tplToksToString( $kv->v ); |
418 | |
419 | // $kStr is based on running tokensToString on $expandedK. |
420 | // So, $kStr might have dropped HTML tags, etc. Given that, |
421 | // we can no longer reliably compute offsets for these |
422 | // new key/value pairs. We could try to be more smart here, |
423 | // but it is not worth the complexity. |
424 | $kv->srcOffsets = null; |
425 | } |
426 | // SSS FIXME: Collect all keys here, not just the first key |
427 | // i.e. in a string like {{1x|1=id='v1' title='foo' style='..'}} |
428 | // that string is setting attributes for [id, title, style], not just id. |
429 | // |
430 | // That requires the ability for the data-mw.attribs[i].txt to be an array. |
431 | // However, the spec at [[mw:Specs/HTML#Generated_attributes_of_HTML_tags]] |
432 | // says: |
433 | // |
434 | // "This spec also assumes that a template can only |
435 | // generate one attribute rather than multiple attributes." |
436 | // |
437 | // So, revision of the spec is another FIXME at which point this code can |
438 | // be updated to reflect the revised spec. |
439 | $expandedK = $kvs[0]->k; |
440 | $reparsedKV = true; |
441 | if ( !$newAttrs ) { |
442 | $newAttrs = $i === 0 ? [] : array_slice( $expandedAttrs, 0, $i ); |
443 | } |
444 | PHPUtils::pushArray( $newAttrs, $kvs ); |
445 | } |
446 | } |
447 | } |
448 | |
449 | // We have a potentially expanded value. |
450 | // Check if the value came from a template/extension expansion. |
451 | if ( is_string( $expandedK ) && !str_starts_with( $expandedK, 'mw:' ) |
452 | && is_array( $oldA->v ) |
453 | ) { |
454 | $nlTkPos = self::nlTkIndex( $nlTkOkay, $expandedV, $wrapTemplates ); |
455 | if ( $nlTkPos !== -1 ) { |
456 | // Scenario 1 from the documentation comment above. |
457 | $valUsesMixedAttrContentTpl = true; |
458 | $updatedV = self::splitTokens( |
459 | $this->manager->getFrame(), $token, $nlTkPos, |
460 | $expandedV, $wrapTemplates |
461 | ); |
462 | $expandedV = $updatedV['preNLBuf']; |
463 | $postNLToks = $updatedV['postNLBuf']; |
464 | $metaTokens = $updatedV['metaTokens']; |
465 | // We split up this attribute's value into pieces. |
466 | if ( $expandedA->srcOffsets->value ) { |
467 | $expandedA->srcOffsets->value->end = null; |
468 | } |
469 | } else { |
470 | // Maybe scenario 2 from the documentation comment above. |
471 | $updatedV = self::stripMetaTags( $env, $expandedV, $wrapTemplates ); |
472 | PHPUtils::pushArray( $annotationTypes, $updatedV['annotationType'] ); |
473 | $expandedV = $updatedV['value']; |
474 | } |
475 | $expandedA->v = $expandedV; |
476 | } |
477 | |
478 | // Update data-mw to account for templated attributes. |
479 | // For editability, set HTML property. |
480 | if ( !empty( $updatedK['hasGeneratedContent'] ) || |
481 | !empty( $updatedV['hasGeneratedContent'] ) || |
482 | ( $reparsedKV && count( $metaTokens ) > 0 ) |
483 | ) { |
484 | $key = TokenUtils::tokensToString( $expandedK ); |
485 | if ( !$tmpDataMW ) { |
486 | $tmpDataMW = []; |
487 | } |
488 | |
489 | // For the $(key|val)UsesMixedAttrContentTpl checks below, |
490 | // it is incorrect to assign the HTML for the original wikitext |
491 | // string since the content part will get duplicated in both |
492 | // this data-mw and in the actual body of the table (for example) |
493 | // and cause bugs like T249740. |
494 | // |
495 | // So, in this case, we assign just the key/value part of the HTML |
496 | // ($expandedA->k or $expandedA->v), but we mark it uneditable |
497 | // because we cannot really edit just the key/value of the attribute |
498 | // on its own because it is only a part of the template's output. |
499 | if ( $reparsedKV ) { |
500 | // If we encountered a reparse-KV-string scenario, |
501 | // we set the value's HTML to [] since we can edit |
502 | // the transclusion either via the key's HTML or the |
503 | // value's HTML, but not both. |
504 | $keyHTML = $keyUsesMixedAttrContentTpl ? $expandedA->k : $origK; |
505 | $valHTML = []; |
506 | } else { |
507 | Assert::invariant( !$keyUsesMixedAttrContentTpl, |
508 | "If reparseKV was false, and we had a mixed attr-content template, " . |
509 | "we should have landed in the valUsesMixedAttrContentTpl codepath." ); |
510 | $keyHTML = empty( $updatedK['hasGeneratedContent'] ) ? null : $origK; |
511 | $valHTML = $valUsesMixedAttrContentTpl ? $expandedA->v : $origV; |
512 | } |
513 | |
514 | // FIXME: Ideally we would have called them ktext, khtml, vhtml |
515 | // since in the serialized data-mw, the "k" and "v" key strings are dropped. |
516 | // [{ "ktxt":..., "khtml":... }, { "vhtml":... }] |
517 | // is clearer and less confusing than |
518 | // [{ "txt":..., "html":... }, { "html":... }] |
519 | $tmpDataMW[$key] = [ |
520 | // @phan-suppress-next-line PhanCoalescingNeverNullInLoop $expandedA is nullable |
521 | 'k' => [ 'txt' => $key, 'srcOffsets' => $expandedA->srcOffsets->key ?? null ], |
522 | // FIXME: Why is 'txt' missing? Why are we not checking for [] ? |
523 | // @phan-suppress-next-line PhanCoalescingNeverNullInLoop $expandedA is nullable |
524 | 'v' => [ 'html' => $valHTML, 'srcOffsets' => $expandedA->srcOffsets->value ?? null ] |
525 | ]; |
526 | |
527 | if ( $keyHTML !== null ) { |
528 | $tmpDataMW[$key]['k']['html'] = $keyHTML; |
529 | } |
530 | if ( $keyUsesMixedAttrContentTpl ) { |
531 | $tmpDataMW[$key]['k']['uneditable'] = true; |
532 | } |
533 | if ( $valUsesMixedAttrContentTpl ) { |
534 | $tmpDataMW[$key]['v']['uneditable'] = true; |
535 | } |
536 | } |
537 | } |
538 | |
539 | // Update newAttrs |
540 | if ( $newAttrs && !$reparsedKV ) { |
541 | $newAttrs[] = $expandedA; |
542 | } |
543 | } |
544 | |
545 | $token->attribs = $newAttrs ?? $expandedAttrs; |
546 | |
547 | // If the token already has an about, it already has transclusion/extension |
548 | // wrapping. No need to record information about templated attributes in addition. |
549 | // |
550 | // FIXME: If there is a real use case for extension attributes getting templated, |
551 | // this check can be relaxed to allow that. |
552 | // https://gerrit.wikimedia.org/r/#/c/65575 has some reference code that can be used then. |
553 | |
554 | if ( !$token->getAttributeV( 'about' ) && $tmpDataMW && count( $tmpDataMW ) > 0 ) { |
555 | // Flatten k-v pairs. |
556 | $vals = []; |
557 | foreach ( $tmpDataMW as $obj ) { |
558 | $vals[] = $obj['k']; |
559 | $vals[] = $obj['v']; |
560 | } |
561 | |
562 | // Clone the vals since they'll be passed to another pipeline |
563 | // for expanding, which may destructively mutate them in the process. |
564 | // |
565 | // This is a problem since subsequent handlers to the |
566 | // AttributeExpander may interact with the original tokens still |
567 | // present as attributes of `token`. |
568 | // |
569 | // For example, while treebuilding, the object holding dataParsoid |
570 | // of a token is reused as the data-parsoid attribute of the |
571 | // corresonding node. Thus, when we get to the DOM cleanup pass, |
572 | // unsetting properties changes the token as well. This was |
573 | // the issue when an "href" was expanded and then the |
574 | // ExternalLinkHandler tried to call tokensToString on it, |
575 | // resulting in a transcluded entity missing its src (which, by the way, |
576 | // had already been clobered by WrapTemplates, similar to T214241). |
577 | // |
578 | // The general principle here being, don't share tokens between |
579 | // pipelines. |
580 | $vals = Utils::clone( $vals ); |
581 | |
582 | // Expand all token arrays to DOM. |
583 | $eVals = PipelineUtils::expandAttrValuesToDOM( |
584 | $this->env, $this->manager->getFrame(), $vals, |
585 | $this->options['expandTemplates'], |
586 | $this->options['inTemplate'] |
587 | ); |
588 | |
589 | // Rebuild flattened k-v pairs. |
590 | $expAttrs = []; |
591 | for ( $j = 0; $j < count( $eVals ); $j += 2 ) { |
592 | $expAttrs[] = new DataMwAttrib( $eVals[$j], $eVals[$j + 1] ); |
593 | } |
594 | |
595 | // Mark token as having expanded attrs. |
596 | // |
597 | // Template tokens are omitted because the attribute expander is |
598 | // just being used to resolve the template target. |
599 | if ( $token->getName() !== 'template' ) { |
600 | $token->addAttribute( 'about', $this->env->newAboutId() ); |
601 | $token->addSpaceSeparatedAttribute( 'typeof', 'mw:ExpandedAttrs' ); |
602 | foreach ( $annotationTypes as $annotationType ) { |
603 | $token->addSpaceSeparatedAttribute( 'typeof', 'mw:Annotation/' . $annotationType ); |
604 | } |
605 | $token->dataMw = new DataMw( [ 'attribs' => $expAttrs ] ); |
606 | } |
607 | } |
608 | |
609 | return new TokenHandlerResult( |
610 | array_merge( $metaTokens, [ $token ], $postNLToks ) |
611 | ); |
612 | } |
613 | |
614 | /** |
615 | * Processes any attribute keys and values that are not simple strings. |
616 | * (Ex: Templated styles) |
617 | * |
618 | * @param Token $token Token whose attrs being expanded. |
619 | * @return TokenHandlerResult |
620 | */ |
621 | private function processComplexAttributes( Token $token ): TokenHandlerResult { |
622 | $atm = new AttributeTransformManager( $this->manager->getFrame(), [ |
623 | 'expandTemplates' => $this->options['expandTemplates'], |
624 | 'inTemplate' => $this->options['inTemplate'] |
625 | ] ); |
626 | return $this->buildExpandedAttrs( $token, $atm->process( $token->attribs ) ); |
627 | } |
628 | |
629 | /** |
630 | * Expand the first attribute of the token -- usually needed to support |
631 | * tempate tokens where the template target itself is a complex attribute. |
632 | * |
633 | * @param Token $token Token whose first attribute is being expanded. |
634 | * @return TokenHandlerResult |
635 | */ |
636 | public function expandFirstAttribute( Token $token ): TokenHandlerResult { |
637 | $atm = new AttributeTransformManager( $this->manager->getFrame(), [ |
638 | 'expandTemplates' => $this->options['expandTemplates'], |
639 | 'inTemplate' => $this->options['inTemplate'] |
640 | ] ); |
641 | $expandedAttrs = $atm->process( [ $token->attribs[0] ] ); |
642 | return $this->buildExpandedAttrs( |
643 | $token, |
644 | array_replace( $token->attribs, [ 0 => $expandedAttrs[0] ] ) |
645 | ); |
646 | } |
647 | |
648 | /** |
649 | * Token handler. |
650 | * |
651 | * For tokens that might have complex attributes, this handler |
652 | * processes / expands them. |
653 | * (Ex: Templated styles) |
654 | * |
655 | * @param Token|string $token Token whose attrs being expanded. |
656 | * @return TokenHandlerResult|null |
657 | */ |
658 | public function onAny( $token ): ?TokenHandlerResult { |
659 | if ( |
660 | !( $token instanceof TagTk || $token instanceof SelfclosingTagTk ) || |
661 | !count( $token->attribs ) |
662 | ) { |
663 | return null; |
664 | } |
665 | |
666 | $name = $token->getName(); |
667 | $property = $token->getAttributeV( 'property' ) ?? ''; |
668 | $typeOf = $token->getAttributeV( 'typeof' ) ?? ''; |
669 | |
670 | if ( |
671 | // Do not process dom-fragment tokens: a separate handler deals with them. |
672 | $name === 'mw:dom-fragment-token' || |
673 | ( |
674 | $name === 'meta' && |
675 | ( |
676 | // Parsoid generated metas don't need expansion |
677 | preg_match( '/mw:(Placeholder|Transclusion|Param|Includes)/', $typeOf ) || |
678 | // The TemplateHandler runs before the AttributeExpander and |
679 | // magic words masquerading as templates may themselves be |
680 | // templated (as in templated template names). |
681 | // See TemplateHandler::processSpecialMagicWord() |
682 | // So, we may see page properties that have already been |
683 | // expanded and annotated with mw:ExpandedAttrs. We return |
684 | // early to avoid the assertion below, at the expense of |
685 | // perhaps not catching other cases where tokens are passed |
686 | // through here doubly by mistake. |
687 | ( preg_match( '/mw:(PageProp)/', $property ) && |
688 | str_contains( $typeOf, 'mw:ExpandedAttrs' ) ) |
689 | ) |
690 | ) |
691 | ) { |
692 | return null; |
693 | } |
694 | |
695 | Assert::invariant( |
696 | !str_contains( $typeOf, 'mw:ExpandedAttrs' ), |
697 | "Expanding an already expanded token, that's a no-no." |
698 | ); |
699 | |
700 | return $this->processComplexAttributes( $token ); |
701 | } |
702 | } |