Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
84.27% |
241 / 286 |
|
25.00% |
2 / 8 |
CRAP | |
0.00% |
0 / 1 |
ComputeDSR | |
84.27% |
241 / 286 |
|
25.00% |
2 / 8 |
244.18 | |
0.00% |
0 / 1 |
tsrSpansTagDOM | |
100.00% |
9 / 9 |
|
100.00% |
1 / 1 |
3 | |||
acceptableInconsistency | |
100.00% |
7 / 7 |
|
100.00% |
1 / 1 |
6 | |||
computeListEltWidth | |
78.57% |
11 / 14 |
|
0.00% |
0 / 1 |
10.98 | |||
computeATagWidth | |
80.00% |
12 / 15 |
|
0.00% |
0 / 1 |
11.97 | |||
computeTagWidths | |
92.59% |
25 / 27 |
|
0.00% |
0 / 1 |
15.09 | |||
trace | |
33.33% |
2 / 6 |
|
0.00% |
0 / 1 |
5.67 | |||
computeNodeDSR | |
83.67% |
164 / 196 |
|
0.00% |
0 / 1 |
147.28 | |||
run | |
91.67% |
11 / 12 |
|
0.00% |
0 / 1 |
3.01 |
1 | <?php |
2 | declare( strict_types = 1 ); |
3 | |
4 | namespace Wikimedia\Parsoid\Wt2Html\DOM\Processors; |
5 | |
6 | use Wikimedia\Parsoid\Config\Env; |
7 | use Wikimedia\Parsoid\Core\DomSourceRange; |
8 | use Wikimedia\Parsoid\DOM\Comment; |
9 | use Wikimedia\Parsoid\DOM\Element; |
10 | use Wikimedia\Parsoid\DOM\Node; |
11 | use Wikimedia\Parsoid\DOM\Text; |
12 | use Wikimedia\Parsoid\NodeData\DataParsoid; |
13 | use Wikimedia\Parsoid\Utils\DOMCompat; |
14 | use Wikimedia\Parsoid\Utils\DOMDataUtils; |
15 | use Wikimedia\Parsoid\Utils\DOMUtils; |
16 | use Wikimedia\Parsoid\Utils\PHPUtils; |
17 | use Wikimedia\Parsoid\Utils\Utils; |
18 | use Wikimedia\Parsoid\Utils\WTUtils; |
19 | use Wikimedia\Parsoid\Wikitext\Consts; |
20 | use Wikimedia\Parsoid\Wt2Html\Frame; |
21 | use Wikimedia\Parsoid\Wt2Html\TT\PreHandler; |
22 | use Wikimedia\Parsoid\Wt2Html\Wt2HtmlDOMProcessor; |
23 | |
24 | class ComputeDSR implements Wt2HtmlDOMProcessor { |
25 | /** |
26 | * For an explanation of what TSR is, see ComputeDSR::computeNodeDSR() |
27 | * |
28 | * TSR info on all these tags are only valid for the opening tag. |
29 | * |
30 | * On other tags, a, hr, br, meta-marker tags, the tsr spans |
31 | * the entire DOM, not just the tag. |
32 | * |
33 | * This code is not in Wikitext\Consts.php because this |
34 | * information is Parsoid-implementation-specific. |
35 | */ |
36 | private const WT_TAGS_WITH_LIMITED_TSR = [ |
37 | "b" => true, |
38 | "i" => true, |
39 | "h1" => true, |
40 | "h2" => true, |
41 | "h3" => true, |
42 | "h4" => true, |
43 | "h5" => true, |
44 | "h6" => true, |
45 | "ul" => true, |
46 | "ol" => true, |
47 | "dl" => true, |
48 | "li" => true, |
49 | "dt" => true, |
50 | "dd" => true, |
51 | "table" => true, |
52 | "caption" => true, |
53 | "tr" => true, |
54 | "td" => true, |
55 | "th" => true, |
56 | "hr" => true, // void element |
57 | "br" => true, // void element |
58 | "pre" => true, |
59 | ]; |
60 | |
61 | /** |
62 | * Do $parsoidData->tsr values span the entire DOM subtree rooted at $n? |
63 | * |
64 | * @param Element $n |
65 | * @param DataParsoid $parsoidData |
66 | * @return bool |
67 | */ |
68 | private function tsrSpansTagDOM( Element $n, DataParsoid $parsoidData ): bool { |
69 | // - tags known to have tag-specific tsr |
70 | // - html tags with 'stx' set |
71 | // - tags with certain typeof properties (Parsoid-generated |
72 | // constructs: placeholders, lang variants) |
73 | $name = DOMCompat::nodeName( $n ); |
74 | return !( |
75 | isset( self::WT_TAGS_WITH_LIMITED_TSR[$name] ) || |
76 | DOMUtils::matchTypeOf( |
77 | $n, |
78 | '/^mw:(Placeholder|LanguageVariant)$/D' |
79 | ) || |
80 | WTUtils::hasLiteralHTMLMarker( $parsoidData ) |
81 | ); |
82 | } |
83 | |
84 | /** |
85 | * Is the inconsistency between two different ways of computing |
86 | * start offset ($cs, $s) explainable and acceptable? |
87 | * If so, we can suppress warnings. |
88 | * |
89 | * @param array $opts |
90 | * @param Node $node |
91 | * @param int $cs |
92 | * @param int $s |
93 | * @return bool |
94 | */ |
95 | private function acceptableInconsistency( array $opts, Node $node, int $cs, int $s ): bool { |
96 | /** |
97 | * 1. For wikitext URL links, suppress cs-s diff warnings because |
98 | * the diffs can come about because of various reasons since the |
99 | * canonicalized/decoded href will become the a-link text whose width |
100 | * will not match the tsr width of source wikitext |
101 | * |
102 | * (a) urls with encoded chars (ex: 'http://example.com/?foo=bar') |
103 | * (b) non-canonical spaces (ex: 'RFC 123' instead of 'RFC 123') |
104 | * |
105 | * 2. We currently don't have source offsets for attributes. |
106 | * So, we get a lot of spurious complaints about cs/s mismatch |
107 | * when DSR computation hit the <body> tag on this attribute. |
108 | * $opts['attrExpansion'] tell us when we are processing an attribute |
109 | * and let us suppress the mismatch warning on the <body> tag. |
110 | * |
111 | * 3. Other scenarios .. to be added |
112 | */ |
113 | if ( $node instanceof Element && ( |
114 | WTUtils::isATagFromURLLinkSyntax( $node ) || |
115 | WTUtils::isATagFromMagicLinkSyntax( $node ) |
116 | ) ) { |
117 | return true; |
118 | } elseif ( isset( $opts['attrExpansion'] ) && DOMUtils::atTheTop( $node ) ) { |
119 | return true; |
120 | } else { |
121 | return false; |
122 | } |
123 | } |
124 | |
125 | /** |
126 | * Compute wikitext string length that contributes to this |
127 | * list item's open tag. Closing tag width is always 0 for lists. |
128 | * |
129 | * @param Element $li |
130 | * @return int |
131 | */ |
132 | private function computeListEltWidth( Element $li ): int { |
133 | if ( !$li->previousSibling && $li->firstChild ) { |
134 | if ( DOMUtils::isList( $li->firstChild ) ) { |
135 | // Special case!! |
136 | // First child of a list that is on a chain |
137 | // of nested lists doesn't get a width. |
138 | return 0; |
139 | } |
140 | } |
141 | |
142 | // count nest listing depth and assign |
143 | // that to the opening tag width. |
144 | $depth = 0; |
145 | |
146 | // This is the crux of the algorithm in DOMHandler::getListBullets() |
147 | while ( !DOMUtils::atTheTop( $li ) ) { |
148 | $dp = DOMDataUtils::getDataParsoid( $li ); |
149 | if ( DOMUtils::isListOrListItem( $li ) ) { |
150 | if ( DOMUtils::isListItem( $li ) ) { |
151 | $depth++; |
152 | } |
153 | } elseif ( |
154 | !WTUtils::isLiteralHTMLNode( $li ) || |
155 | empty( $dp->autoInsertedStart ) || empty( $dp->autoInsertedEnd ) |
156 | ) { |
157 | break; |
158 | } |
159 | $li = $li->parentNode; |
160 | } |
161 | |
162 | return $depth; |
163 | } |
164 | |
165 | /** |
166 | * Compute wikitext string lengths that contribute to this |
167 | * anchor's opening (<a>) and closing (</a>) tags. |
168 | * |
169 | * @param Element $node |
170 | * @param ?DataParsoid $dp |
171 | * @return int[]|null |
172 | */ |
173 | private function computeATagWidth( |
174 | Element $node, ?DataParsoid $dp |
175 | ): ?array { |
176 | /* ------------------------------------------------------------- |
177 | * Tag widths are computed as per this logic here: |
178 | * |
179 | * 1. [[Foo|bar]] <-- piped mw:WikiLink |
180 | * -> start-tag: "[[Foo|" |
181 | * -> content : "bar" |
182 | * -> end-tag : "]]" |
183 | * |
184 | * 2. [[Foo]] <-- non-piped mw:WikiLink |
185 | * -> start-tag: "[[" |
186 | * -> content : "Foo" |
187 | * -> end-tag : "]]" |
188 | * |
189 | * 3. [[{{1x|Foo}}|Foo]] <-- tpl-attr mw:WikiLink |
190 | * Don't bother setting tag widths since dp->sa['href'] will be |
191 | * the expanded target and won't correspond to original source. |
192 | * |
193 | * 4. [http://wp.org foo] <-- mw:ExtLink |
194 | * -> start-tag: "[http://wp.org " |
195 | * -> content : "foo" |
196 | * -> end-tag : "]" |
197 | * -------------------------------------------------------------- */ |
198 | if ( !$dp ) { |
199 | return null; |
200 | } else { |
201 | if ( WTUtils::isATagFromWikiLinkSyntax( $node ) && !WTUtils::hasExpandedAttrsType( $node ) ) { |
202 | if ( isset( $dp->stx ) && $dp->stx === "piped" ) { |
203 | // this seems like some kind of a phan bug |
204 | $href = $dp->sa['href'] ?? null; |
205 | if ( $href ) { |
206 | return [ strlen( $href ) + 3, 2 ]; |
207 | } else { |
208 | return null; |
209 | } |
210 | } else { |
211 | return [ 2, 2 ]; |
212 | } |
213 | } elseif ( isset( $dp->tsr ) && WTUtils::isATagFromExtLinkSyntax( $node ) ) { |
214 | return [ $dp->tmp->extLinkContentOffsets->start - $dp->tsr->start, 1 ]; |
215 | } elseif ( WTUtils::isATagFromURLLinkSyntax( $node ) || |
216 | WTUtils::isATagFromMagicLinkSyntax( $node ) |
217 | ) { |
218 | return [ 0, 0 ]; |
219 | } else { |
220 | return null; |
221 | } |
222 | } |
223 | } |
224 | |
225 | /** |
226 | * Compute wikitext string lengths that contribute to this |
227 | * node's opening and closing tags. |
228 | * |
229 | * @param int|null $stWidth Start tag width |
230 | * @param int|null $etWidth End tag width |
231 | * @param Element $node |
232 | * @param DataParsoid $dp |
233 | * @return int[] Start and end tag widths |
234 | */ |
235 | private function computeTagWidths( $stWidth, $etWidth, Element $node, DataParsoid $dp ): array { |
236 | if ( isset( $dp->extTagOffsets ) ) { |
237 | return [ |
238 | $dp->extTagOffsets->openWidth, |
239 | $dp->extTagOffsets->closeWidth |
240 | ]; |
241 | } |
242 | |
243 | if ( WTUtils::hasLiteralHTMLMarker( $dp ) ) { |
244 | if ( !empty( $dp->selfClose ) ) { |
245 | $etWidth = 0; |
246 | } |
247 | } elseif ( DOMUtils::hasTypeOf( $node, 'mw:LanguageVariant' ) ) { |
248 | $stWidth = 2; // -{ |
249 | $etWidth = 2; // }- |
250 | } else { |
251 | $nodeName = DOMCompat::nodeName( $node ); |
252 | // 'tr' tags not in the original source have zero width |
253 | if ( $nodeName === 'tr' && !isset( $dp->startTagSrc ) ) { |
254 | $stWidth = 0; |
255 | $etWidth = 0; |
256 | } else { |
257 | $wtTagWidth = Consts::$WtTagWidths[$nodeName] ?? null; |
258 | if ( $stWidth === null ) { |
259 | // we didn't have a tsr to tell us how wide this tag was. |
260 | if ( $nodeName === 'a' ) { |
261 | $wtTagWidth = $this->computeATagWidth( $node, $dp ); |
262 | $stWidth = $wtTagWidth ? $wtTagWidth[0] : null; |
263 | } elseif ( $nodeName === 'li' || $nodeName === 'dd' ) { |
264 | $stWidth = $this->computeListEltWidth( $node ); |
265 | } elseif ( $wtTagWidth ) { |
266 | $stWidth = $wtTagWidth[0]; |
267 | } |
268 | } |
269 | |
270 | if ( $etWidth === null && $wtTagWidth ) { |
271 | $etWidth = $wtTagWidth[1]; |
272 | } |
273 | } |
274 | } |
275 | |
276 | return [ $stWidth, $etWidth ]; |
277 | } |
278 | |
279 | /** |
280 | * @param Env $env |
281 | * @param mixed ...$args |
282 | */ |
283 | private function trace( Env $env, ...$args ): void { |
284 | $env->log( "trace/dsr", static function () use ( $args ) { |
285 | $buf = ''; |
286 | foreach ( $args as $arg ) { |
287 | $buf .= is_string( $arg ) ? $arg : PHPUtils::jsonEncode( $arg ); |
288 | } |
289 | return $buf; |
290 | } ); |
291 | } |
292 | |
293 | /** |
294 | * TSR = "Tag Source Range". Start and end offsets giving the location |
295 | * where the tag showed up in the original source. |
296 | * |
297 | * DSR = "DOM Source Range". dsr->start and dsr->end are open and end, |
298 | * dsr->openWidth and dsr->closeWidth are widths of the container tag. |
299 | * |
300 | * TSR is set by the tokenizer. In most cases, it only applies to the |
301 | * specific tag (opening or closing). However, for self-closing |
302 | * tags that the tokenizer generates, the TSR values applies to the entire |
303 | * DOM subtree (opening tag + content + closing tag). |
304 | * |
305 | * Ex: So [[foo]] will get tokenized to a SelfClosingTagTk(...) with a TSR |
306 | * value of [0,7]. The DSR algorithm will then use that info and assign |
307 | * the a-tag rooted at the <a href='...'>foo</a> DOM subtree a DSR value of |
308 | * [0,7,2,2], where 2 and 2 refer to the opening and closing tag widths. |
309 | * |
310 | * [s,e) -- if defined, start/end position of wikitext source that generated |
311 | * node's subtree |
312 | * |
313 | * @param Frame $frame |
314 | * @param Node $node node to process |
315 | * @param ?int $s start position, inclusive |
316 | * @param ?int $e end position, exclusive |
317 | * @param int $dsrCorrection |
318 | * @param array $opts |
319 | * @return array |
320 | */ |
321 | private function computeNodeDSR( |
322 | Frame $frame, Node $node, ?int $s, ?int $e, int $dsrCorrection, |
323 | array $opts |
324 | ): array { |
325 | $env = $frame->getEnv(); |
326 | if ( $e === null && !$node->hasChildNodes() ) { |
327 | $e = $s; |
328 | } |
329 | |
330 | $this->trace( $env, "BEG: ", DOMCompat::nodeName( $node ), " with [s, e]=", [ $s, $e ] ); |
331 | |
332 | /** @var int|null $ce Child end */ |
333 | $ce = $e; |
334 | // Initialize $cs to $ce to handle the zero-children case properly |
335 | // if this $node has no child content, then the start and end for |
336 | // the child dom are indeed identical. Alternatively, we could |
337 | // explicitly code this check before everything and bypass this. |
338 | /** @var int|null $cs Child start */ |
339 | $cs = $ce; |
340 | |
341 | $child = $node->lastChild; |
342 | while ( $child !== null ) { |
343 | $prevChild = $child->previousSibling; |
344 | $origCE = $ce; |
345 | $cType = $child->nodeType; |
346 | $fosteredNode = false; |
347 | $cs = null; |
348 | |
349 | if ( $child instanceof Element ) { |
350 | $dp = DOMDataUtils::getDataParsoid( $child ); |
351 | $endTSR = $dp->tmp->endTSR ?? null; |
352 | if ( $endTSR ) { |
353 | $ce = $endTSR->end; |
354 | } |
355 | } else { |
356 | $endTSR = null; |
357 | } |
358 | |
359 | // StrippedTag marker tags will be removed and won't |
360 | // be around to fill in the missing gap. So, absorb its width into |
361 | // the DSR of its previous sibling. Currently, this fix is only for |
362 | // B and I tags where the fix is clear-cut and obvious. |
363 | $next = $child->nextSibling; |
364 | if ( $next instanceof Element ) { |
365 | $ndp = DOMDataUtils::getDataParsoid( $next ); |
366 | if ( |
367 | isset( $ndp->src ) && |
368 | DOMUtils::hasTypeOf( $next, 'mw:Placeholder/StrippedTag' ) && |
369 | // NOTE: This inlist check matches the case in CleanUp where |
370 | // the placeholders are not removed from the DOM. We don't want |
371 | // to move the width into the sibling here and then leave around a |
372 | // a zero width placeholder because serializeDOMNode only handles |
373 | // a few cases of zero width nodes, so we'll end up duplicating |
374 | // it from ->src. |
375 | !DOMUtils::isNestedInListItem( $next ) |
376 | ) { |
377 | if ( isset( Consts::$WTQuoteTags[$ndp->name] ) && |
378 | isset( Consts::$WTQuoteTags[DOMCompat::nodeName( $child )] ) ) { |
379 | $correction = strlen( $ndp->src ); |
380 | $ce += $correction; |
381 | $dsrCorrection = $correction; |
382 | if ( Utils::isValidDSR( $ndp->dsr ?? null ) ) { |
383 | // Record original DSR for the meta tag |
384 | // since it will now get corrected to zero width |
385 | // since child acquires its width-> |
386 | $ndp->getTemp()->origDSR = new DomSourceRange( |
387 | $ndp->dsr->start, $ndp->dsr->end, null, null ); |
388 | } |
389 | } |
390 | } |
391 | } |
392 | |
393 | $env->log( "trace/dsr", static function () use ( $child, $cs, $ce ) { |
394 | // slow, for debugging only |
395 | $i = 0; |
396 | foreach ( $child->parentNode->childNodes as $x ) { |
397 | if ( $x === $child ) { |
398 | break; |
399 | } |
400 | $i++; |
401 | } |
402 | return " CHILD: <" . DOMCompat::nodeName( $child->parentNode ) . ":" . $i . |
403 | ">=" . |
404 | ( $child instanceof Element ? '' : ( $child instanceof Text ? '#' : '!' ) ) . |
405 | ( ( $child instanceof Element ) ? |
406 | ( DOMCompat::nodeName( $child ) === 'meta' ? |
407 | DOMCompat::getOuterHTML( $child ) : DOMCompat::nodeName( $child ) ) : |
408 | PHPUtils::jsonEncode( $child->nodeValue ) ) . |
409 | " with " . PHPUtils::jsonEncode( [ $cs, $ce ] ); |
410 | } ); |
411 | |
412 | if ( $cType === XML_TEXT_NODE ) { |
413 | if ( $ce !== null ) { |
414 | $cs = $ce - strlen( $child->textContent ); |
415 | } |
416 | } elseif ( $cType === XML_COMMENT_NODE ) { |
417 | '@phan-var Comment $child'; // @var Comment $child |
418 | if ( $ce !== null ) { |
419 | // Decode HTML entities & re-encode as wikitext to find length |
420 | $cs = $ce - WTUtils::decodedCommentLength( $child ); |
421 | } |
422 | } elseif ( $cType === XML_ELEMENT_NODE ) { |
423 | DOMUtils::assertElt( $child ); |
424 | $dp = DOMDataUtils::getDataParsoid( $child ); |
425 | $tsr = $dp->tsr ?? null; |
426 | $oldCE = $tsr ? $tsr->end : null; |
427 | $propagateRight = false; |
428 | $stWidth = null; |
429 | $etWidth = null; |
430 | |
431 | $fosteredNode = $dp->fostered ?? false; |
432 | |
433 | // We are making dsr corrections to account for |
434 | // stripped tags (end tags usually). When stripping happens, |
435 | // in most common use cases, a corresponding end tag is added |
436 | // back elsewhere in the DOM. |
437 | // |
438 | // So, when an autoInsertedEnd tag is encountered and a matching |
439 | // dsr-correction is found, make a 1-time correction in the |
440 | // other direction. |
441 | // |
442 | // Currently, this fix is only for |
443 | // B and I tags where the fix is clear-cut and obvious. |
444 | if ( $ce !== null && !empty( $dp->autoInsertedEnd ) && |
445 | DOMUtils::isQuoteElt( $child ) |
446 | ) { |
447 | $correction = 3 + strlen( DOMCompat::nodeName( $child ) ); |
448 | if ( $correction === $dsrCorrection ) { |
449 | $ce -= $correction; |
450 | $dsrCorrection = 0; |
451 | } |
452 | } |
453 | |
454 | if ( DOMCompat::nodeName( $child ) === "meta" ) { |
455 | if ( $tsr ) { |
456 | if ( WTUtils::isTplMarkerMeta( $child ) ) { |
457 | // If this is a meta-marker tag (for templates, extensions), |
458 | // we have a new valid '$cs'. This marker also effectively resets tsr |
459 | // back to the top-level wikitext source range from nested template |
460 | // source range. |
461 | $cs = $tsr->start; |
462 | $ce = $tsr->end; |
463 | $propagateRight = true; |
464 | } else { |
465 | // All other meta-tags: <includeonly>, <noinclude>, etc. |
466 | $cs = $tsr->start; |
467 | $ce = $tsr->end; |
468 | } |
469 | } elseif ( PreHandler::isIndentPreWS( $child ) ) { |
470 | // Adjust start DSR; see PreHandler::newIndentPreWS() |
471 | $cs = $ce - 1; |
472 | } elseif ( DOMUtils::matchTypeOf( $child, '#^mw:Placeholder(/\w*)?$#D' ) && |
473 | $ce !== null && $dp->src |
474 | ) { |
475 | $cs = $ce - strlen( $dp->src ); |
476 | } |
477 | if ( isset( $dp->extTagOffsets ) ) { |
478 | $stWidth = $dp->extTagOffsets->openWidth; |
479 | $etWidth = $dp->extTagOffsets->closeWidth; |
480 | unset( $dp->extTagOffsets ); |
481 | } |
482 | } elseif ( DOMUtils::hasTypeOf( $child, "mw:Entity" ) && $ce !== null && $dp->src ) { |
483 | $cs = $ce - strlen( $dp->src ); |
484 | } else { |
485 | if ( DOMUtils::matchTypeOf( $child, '#^mw:Placeholder(/\w*)?$#D' ) && |
486 | $ce !== null && $dp->src |
487 | ) { |
488 | $cs = $ce - strlen( $dp->src ); |
489 | } else { |
490 | // Non-meta tags |
491 | if ( $endTSR ) { |
492 | $etWidth = $endTSR->length(); |
493 | } |
494 | if ( $tsr && empty( $dp->autoInsertedStart ) ) { |
495 | $cs = $tsr->start; |
496 | if ( $this->tsrSpansTagDOM( $child, $dp ) ) { |
497 | if ( $tsr->end !== null && $tsr->end > 0 ) { |
498 | $ce = $tsr->end; |
499 | $propagateRight = true; |
500 | } |
501 | } else { |
502 | $stWidth = $tsr->end - $tsr->start; |
503 | } |
504 | |
505 | $this->trace( $env, " TSR: ", $tsr, "; cs: ", $cs, "; ce: ", $ce ); |
506 | } elseif ( $s && $child->previousSibling === null ) { |
507 | $cs = $s; |
508 | } |
509 | } |
510 | |
511 | // Compute width of opening/closing tags for this dom $node |
512 | [ $stWidth, $etWidth ] = |
513 | $this->computeTagWidths( $stWidth, $etWidth, $child, $dp ); |
514 | |
515 | if ( !empty( $dp->autoInsertedStart ) ) { |
516 | $stWidth = 0; |
517 | } |
518 | if ( !empty( $dp->autoInsertedEnd ) ) { |
519 | $etWidth = 0; |
520 | } |
521 | |
522 | $ccs = $cs !== null && $stWidth !== null ? $cs + $stWidth : null; |
523 | $cce = $ce !== null && $etWidth !== null ? $ce - $etWidth : null; |
524 | |
525 | /* ----------------------------------------------------------------- |
526 | * Process DOM rooted at '$child'. |
527 | * |
528 | * NOTE: You might wonder why we are not checking for the zero-$children |
529 | * case. It is strictly not necessary and you can set newDsr directly. |
530 | * |
531 | * But, you have 2 options: [$ccs, $ccs] or [$cce, $cce]. Setting it to |
532 | * [$cce, $cce] would be consistent with the RTL approach. We should |
533 | * then compare $ccs and $cce and verify that they are identical. |
534 | * |
535 | * But, if we handled the zero-child case like the other scenarios, |
536 | * we don't have to worry about the above decisions and checks. |
537 | * ----------------------------------------------------------------- */ |
538 | |
539 | if ( WTUtils::isDOMFragmentWrapper( $child ) || |
540 | DOMUtils::hasTypeOf( $child, 'mw:LanguageVariant' ) |
541 | ) { |
542 | // Eliminate artificial $cs/s mismatch warnings since this is |
543 | // just a wrapper token with the right DSR but without any |
544 | // nested subtree that could account for the DSR span. |
545 | $newDsr = [ $ccs, $cce ]; |
546 | } elseif ( $child instanceof Element |
547 | && WTUtils::isATagFromWikiLinkSyntax( $child ) |
548 | && ( !isset( $dp->stx ) || $dp->stx !== "piped" ) ) { |
549 | /* ------------------------------------------------------------- |
550 | * This check here eliminates artificial DSR mismatches on content |
551 | * text of the A-node because of entity expansion, etc. |
552 | * |
553 | * Ex: [[7%25 solution]] will be rendered as: |
554 | * <a href=....>7% solution</a> |
555 | * If we descend into the text for the a-node, we'll have a 2-char |
556 | * DSR mismatch which will trigger artificial error warnings. |
557 | * |
558 | * In the non-piped link scenario, all dsr info is already present |
559 | * in the link target and so we get nothing new by processing |
560 | * content. |
561 | * ------------------------------------------------------------- */ |
562 | $newDsr = [ $ccs, $cce ]; |
563 | } else { |
564 | $env->log( "trace/dsr", static function () use ( |
565 | $env, $cs, $ce, $stWidth, $etWidth, $ccs, $cce |
566 | ) { |
567 | return " before-recursing:" . |
568 | "[cs,ce]=" . PHPUtils::jsonEncode( [ $cs, $ce ] ) . |
569 | "; [sw,ew]=" . PHPUtils::jsonEncode( [ $stWidth, $etWidth ] ) . |
570 | "; subtree-[cs,ce]=" . PHPUtils::jsonEncode( [ $ccs, $cce ] ); |
571 | } ); |
572 | |
573 | $this->trace( $env, "<recursion>" ); |
574 | $newDsr = $this->computeNodeDSR( $frame, $child, $ccs, $cce, $dsrCorrection, $opts ); |
575 | $this->trace( $env, "</recursion>" ); |
576 | } |
577 | |
578 | // $cs = min($child-dom-tree dsr->start - tag-width, current dsr->start) |
579 | if ( $stWidth !== null && $newDsr[0] !== null ) { |
580 | $newCs = $newDsr[0] - $stWidth; |
581 | if ( $cs === null || ( !$tsr && $newCs < $cs ) ) { |
582 | $cs = $newCs; |
583 | } |
584 | } |
585 | |
586 | // $ce = max($child-dom-tree dsr->end + tag-width, current dsr->end) |
587 | if ( $etWidth !== null && $newDsr[1] !== null ) { |
588 | $newCe = $newDsr[1] + $etWidth; |
589 | if ( $newCe > $ce ) { |
590 | $ce = $newCe; |
591 | } |
592 | } |
593 | } |
594 | |
595 | if ( $cs !== null || $ce !== null ) { |
596 | if ( $ce < 0 ) { |
597 | if ( !$fosteredNode ) { |
598 | $env->log( "info/dsr/negative", |
599 | "Negative DSR for node: " . DOMCompat::nodeName( $node ) . "; resetting to zero" ); |
600 | } |
601 | $ce = 0; |
602 | } |
603 | |
604 | // Fostered $nodes get a zero-dsr width range. |
605 | if ( $fosteredNode ) { |
606 | // Reset to 0, if necessary. |
607 | // This is critical to avoid duplication of fostered content in selser mode. |
608 | if ( $origCE < 0 ) { |
609 | $origCE = 0; |
610 | } |
611 | $dp->dsr = new DomSourceRange( $origCE, $origCE, null, null ); |
612 | } else { |
613 | $dp->dsr = new DomSourceRange( $cs, $ce, $stWidth, $etWidth ); |
614 | } |
615 | |
616 | $env->log( "trace/dsr", static function () use ( $frame, $child, $cs, $ce, $dp ) { |
617 | return " UPDATING " . DOMCompat::nodeName( $child ) . |
618 | " with " . PHPUtils::jsonEncode( [ $cs, $ce ] ) . |
619 | "; typeof: " . ( DOMCompat::getAttribute( $child, "typeof" ) ?? '' ); |
620 | } ); |
621 | } |
622 | |
623 | // Propagate any required changes to the right |
624 | // taking care not to cross-over into template content |
625 | if ( $ce !== null && |
626 | ( $propagateRight || $oldCE !== $ce || $e === null ) && |
627 | !WTUtils::isTplStartMarkerMeta( $child ) |
628 | ) { |
629 | $sibling = $child->nextSibling; |
630 | $newCE = $ce; |
631 | while ( $newCE !== null && $sibling && !WTUtils::isTplStartMarkerMeta( $sibling ) ) { |
632 | $nType = $sibling->nodeType; |
633 | if ( $nType === XML_TEXT_NODE ) { |
634 | $newCE += strlen( $sibling->textContent ); |
635 | } elseif ( $nType === XML_COMMENT_NODE ) { |
636 | '@phan-var Comment $sibling'; // @var Comment $sibling |
637 | $newCE += WTUtils::decodedCommentLength( $sibling ); |
638 | } elseif ( $nType === XML_ELEMENT_NODE ) { |
639 | DOMUtils::assertElt( $sibling ); |
640 | $siblingDP = DOMDataUtils::getDataParsoid( $sibling ); |
641 | $siblingDP->dsr ??= new DomSourceRange( null, null, null, null ); |
642 | $sdsrStart = $siblingDP->dsr->start; |
643 | if ( !empty( $siblingDP->fostered ) || |
644 | ( $sdsrStart !== null && $sdsrStart === $newCE ) || |
645 | ( $sdsrStart !== null && $sdsrStart < $newCE && isset( $siblingDP->tsr ) ) |
646 | ) { |
647 | // $sibling is fostered |
648 | // => nothing to propagate past it |
649 | // $sibling's dsr->start matches what we might propagate |
650 | // => nothing will change |
651 | // $sibling's dsr value came from tsr and it is not outside expected range |
652 | // => stop propagation so you don't overwrite it |
653 | break; |
654 | } |
655 | |
656 | // Update and move right |
657 | $env->log( "trace/dsr", static function () use ( $frame, $newCE, $sibling, $siblingDP ) { |
658 | return " CHANGING ce.start of " . DOMCompat::nodeName( $sibling ) . |
659 | " from " . $siblingDP->dsr->start . " to " . $newCE; |
660 | } ); |
661 | |
662 | $siblingDP->dsr->start = $newCE; |
663 | // If we have a dsr->end as well and since we updated |
664 | // dsr->start, we have to ensure that the two values don't |
665 | // introduce an inconsistency where dsr->start > dsr->end. |
666 | // Since we are in a LTR pass and are pushing updates |
667 | // forward, we are resolving it by updating dsr->end as |
668 | // well. There could be scenarios where this would be |
669 | // incorrect, but there is no universal fix here. |
670 | if ( $siblingDP->dsr->end !== null && $newCE > $siblingDP->dsr->end ) { |
671 | $siblingDP->dsr->end = $newCE; |
672 | } |
673 | $newCE = $siblingDP->dsr->end; |
674 | |
675 | } else { |
676 | break; |
677 | } |
678 | $sibling = $sibling->nextSibling; |
679 | } |
680 | |
681 | // Propagate new end information |
682 | if ( !$sibling ) { |
683 | $e = $newCE; |
684 | } |
685 | } |
686 | } |
687 | |
688 | // Don't change state if we processed a fostered $node |
689 | if ( $fosteredNode ) { |
690 | $ce = $origCE; |
691 | } else { |
692 | // $ce for next $child = $cs of current $child |
693 | $ce = $cs; |
694 | } |
695 | |
696 | $child = $prevChild; |
697 | } |
698 | |