Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
0.00% |
0 / 489 |
|
0.00% |
0 / 24 |
CRAP | |
0.00% |
0 / 1 |
DOMRangeBuilder | |
0.00% |
0 / 489 |
|
0.00% |
0 / 24 |
32220 | |
0.00% |
0 / 1 |
__construct | |
0.00% |
0 / 8 |
|
0.00% |
0 / 1 |
2 | |||
updateDSRForFirstRangeNode | |
0.00% |
0 / 8 |
|
0.00% |
0 / 1 |
42 | |||
getRangeEndDSR | |
0.00% |
0 / 26 |
|
0.00% |
0 / 1 |
90 | |||
getRangeId | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
getDOMRange | |
0.00% |
0 / 74 |
|
0.00% |
0 / 1 |
342 | |||
getStartConsideringFosteredContent | |
0.00% |
0 / 7 |
|
0.00% |
0 / 1 |
20 | |||
stripStartMeta | |
0.00% |
0 / 6 |
|
0.00% |
0 / 1 |
12 | |||
findToplevelEnclosingRange | |
0.00% |
0 / 8 |
|
0.00% |
0 / 1 |
12 | |||
recordTemplateInfo | |
0.00% |
0 / 17 |
|
0.00% |
0 / 1 |
30 | |||
introducesCycle | |
0.00% |
0 / 7 |
|
0.00% |
0 / 1 |
12 | |||
rangesOverlap | |
0.00% |
0 / 3 |
|
0.00% |
0 / 1 |
12 | |||
findTopLevelNonOverlappingRanges | |
0.00% |
0 / 109 |
|
0.00% |
0 / 1 |
1122 | |||
findFirstTemplatedNode | |
0.00% |
0 / 12 |
|
0.00% |
0 / 1 |
30 | |||
ensureElementsInRange | |
0.00% |
0 / 20 |
|
0.00% |
0 / 1 |
30 | |||
findEncapTarget | |
0.00% |
0 / 10 |
|
0.00% |
0 / 1 |
42 | |||
encapsulateTemplates | |
0.00% |
0 / 92 |
|
0.00% |
0 / 1 |
1332 | |||
addNodeRange | |
0.00% |
0 / 3 |
|
0.00% |
0 / 1 |
6 | |||
getNodeRanges | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
findWrappableMetaRanges | |
0.00% |
0 / 4 |
|
0.00% |
0 / 1 |
2 | |||
findWrappableTemplateRangesRecursive | |
0.00% |
0 / 44 |
|
0.00% |
0 / 1 |
600 | |||
matchMetaType | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
verifyTplInfoExpectation | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
6 | |||
execute | |
0.00% |
0 / 4 |
|
0.00% |
0 / 1 |
6 | |||
findEnclosingRange | |
0.00% |
0 / 22 |
|
0.00% |
0 / 1 |
30 |
1 | <?php |
2 | declare( strict_types = 1 ); |
3 | |
4 | namespace Wikimedia\Parsoid\Wt2Html\DOM\Processors; |
5 | |
6 | use Error; |
7 | use SplObjectStorage; |
8 | use Wikimedia\Assert\Assert; |
9 | use Wikimedia\Assert\UnreachableException; |
10 | use Wikimedia\Parsoid\Config\Env; |
11 | use Wikimedia\Parsoid\Core\DomSourceRange; |
12 | use Wikimedia\Parsoid\Core\ElementRange; |
13 | use Wikimedia\Parsoid\DOM\Document; |
14 | use Wikimedia\Parsoid\DOM\Element; |
15 | use Wikimedia\Parsoid\DOM\Node; |
16 | use Wikimedia\Parsoid\DOM\Text; |
17 | use Wikimedia\Parsoid\NodeData\DataParsoid; |
18 | use Wikimedia\Parsoid\NodeData\TempData; |
19 | use Wikimedia\Parsoid\NodeData\TemplateInfo; |
20 | use Wikimedia\Parsoid\Utils\DOMCompat; |
21 | use Wikimedia\Parsoid\Utils\DOMDataUtils; |
22 | use Wikimedia\Parsoid\Utils\DOMUtils; |
23 | use Wikimedia\Parsoid\Utils\PHPUtils; |
24 | use Wikimedia\Parsoid\Utils\Utils; |
25 | use Wikimedia\Parsoid\Utils\WTUtils; |
26 | use Wikimedia\Parsoid\Wt2Html\Frame; |
27 | |
28 | /** |
29 | * Template encapsulation happens in three steps. |
30 | * |
31 | * 1. findWrappableTemplateRanges |
32 | * |
33 | * Locate start and end metas. Walk upwards towards the root from both and |
34 | * find a common ancestor A. The subtree rooted at A is now effectively the |
35 | * scope of the dom template ouput. |
36 | * |
37 | * 2. findTopLevelNonOverlappingRanges |
38 | * |
39 | * Mark all nodes in a range and walk up to root from each range start to |
40 | * determine overlaps, nesting. Merge overlapping and nested ranges to find |
41 | * the subset of top-level non-overlapping ranges which will be wrapped as |
42 | * individual units. |
43 | * |
44 | * 3. encapsulateTemplates |
45 | * |
46 | * For each non-overlapping range, |
47 | * - compute a data-mw according to the DOM spec |
48 | * - replace the start / end meta markers with transclusion type and data-mw |
49 | * on the first DOM node |
50 | * - add about ids on all top-level nodes of the range |
51 | * |
52 | * This is a simple high-level overview of the 3 steps to help understand this |
53 | * code. |
54 | * |
55 | * FIXME: At some point, more of the details should be extracted and documented |
56 | * in pseudo-code as an algorithm. |
57 | * @module |
58 | */ |
59 | class DOMRangeBuilder { |
60 | |
61 | private const MAP_TBODY_TR = [ |
62 | 'tbody' => true, |
63 | 'tr' => true |
64 | ]; |
65 | |
66 | /** @var Document */ |
67 | private $document; |
68 | |
69 | /** @var Frame */ |
70 | private $frame; |
71 | |
72 | /** @var Env */ |
73 | protected $env; |
74 | |
75 | /** @var SplObjectStorage */ |
76 | protected $nodeRanges; |
77 | |
78 | /** @var array<string|CompoundTemplateInfo>[] */ |
79 | private $compoundTpls = []; |
80 | |
81 | /** Are we generating spec 3.x HTML for parser functions */ |
82 | private bool $v3PFOutput; |
83 | |
84 | /** @var string */ |
85 | protected $traceType; |
86 | |
87 | public function __construct( |
88 | Document $document, Frame $frame |
89 | ) { |
90 | $this->document = $document; |
91 | $this->frame = $frame; |
92 | $this->env = $frame->getEnv(); |
93 | $this->nodeRanges = new SplObjectStorage; |
94 | $this->traceType = "tplwrap"; |
95 | // @phan-suppress-next-line PhanDeprecatedFunction |
96 | $this->v3PFOutput = (bool)$this->env->getSiteConfig()->getMWConfigValue( |
97 | 'ParsoidExperimentalParserFunctionOutput' |
98 | ); |
99 | } |
100 | |
101 | protected function updateDSRForFirstRangeNode( Element $target, Element $source ): void { |
102 | $srcDP = DOMDataUtils::getDataParsoid( $source ); |
103 | $tgtDP = DOMDataUtils::getDataParsoid( $target ); |
104 | |
105 | // Since TSRs on template content tokens are cleared by the |
106 | // template handler, all computed dsr values for template content |
107 | // is always inferred from top-level content values and is safe. |
108 | // So, do not overwrite a bigger end-dsr value. |
109 | if ( isset( $srcDP->dsr ) && $srcDP->dsr->end !== null && |
110 | isset( $tgtDP->dsr ) && $tgtDP->dsr->end !== null && |
111 | $tgtDP->dsr->end > $srcDP->dsr->end |
112 | ) { |
113 | $tgtDP->dsr->start = $srcDP->dsr->start ?? null; |
114 | } else { |
115 | $tgtDP->dsr = clone $srcDP->dsr; |
116 | $tgtDP->src = $srcDP->src ?? null; |
117 | } |
118 | } |
119 | |
120 | /** |
121 | * Get the DSR of the end of a DOMRange |
122 | * |
123 | * @param DOMRangeInfo $range |
124 | * @return DomSourceRange|null |
125 | */ |
126 | private static function getRangeEndDSR( DOMRangeInfo $range ): ?DomSourceRange { |
127 | $endNode = $range->end; |
128 | if ( $endNode instanceof Element ) { |
129 | return DOMDataUtils::getDataParsoid( $endNode )->dsr ?? null; |
130 | } else { |
131 | // In the rare scenario where the last element of a range is not an ELEMENT, |
132 | // extrapolate based on DSR of first leftmost sibling that is an ELEMENT. |
133 | // We don't try any harder than this for now. |
134 | $offset = 0; |
135 | $n = $endNode->previousSibling; |
136 | while ( $n && !( $n instanceof Element ) ) { |
137 | if ( $n instanceof Text ) { |
138 | $offset += strlen( $n->nodeValue ); |
139 | } else { |
140 | // A comment |
141 | // @phan-suppress-next-line PhanTypeMismatchArgumentSuperType |
142 | $offset += WTUtils::decodedCommentLength( $n ); |
143 | } |
144 | $n = $n->previousSibling; |
145 | } |
146 | |
147 | $dsr = null; |
148 | if ( $n ) { |
149 | /** |
150 | * The point of the above loop is to ensure we're working |
151 | * with a Element if there is an $n. |
152 | * |
153 | * @var Element $n |
154 | */ |
155 | '@phan-var Element $n'; |
156 | $dsr = DOMDataUtils::getDataParsoid( $n )->dsr ?? null; |
157 | } |
158 | |
159 | if ( $dsr && is_int( $dsr->end ?? null ) ) { |
160 | $len = $endNode instanceof Text |
161 | ? strlen( $endNode->nodeValue ) |
162 | // A comment |
163 | // @phan-suppress-next-line PhanTypeMismatchArgumentSuperType |
164 | : WTUtils::decodedCommentLength( $endNode ); |
165 | $dsr = new DomSourceRange( |
166 | $dsr->end + $offset, |
167 | $dsr->end + $offset + $len, |
168 | null, |
169 | null |
170 | ); |
171 | } |
172 | |
173 | return $dsr; |
174 | } |
175 | } |
176 | |
177 | /** |
178 | * Returns the range ID of a node - in the case of templates, its "about" attribute. |
179 | * @param Element $node |
180 | * @return string |
181 | */ |
182 | protected function getRangeId( Element $node ): string { |
183 | return DOMCompat::getAttribute( $node, "about" ); |
184 | } |
185 | |
186 | /** |
187 | * Find the common DOM ancestor of two DOM nodes. |
188 | * |
189 | * @param Element $startMeta |
190 | * @param Element $endMeta |
191 | * @param Element $endElem |
192 | * @return DOMRangeInfo |
193 | */ |
194 | private function getDOMRange( |
195 | Element $startMeta, Element $endMeta, Element $endElem |
196 | ) { |
197 | $range = $this->findEnclosingRange( $startMeta, $endMeta, $endElem ); |
198 | $startsInFosterablePosn = DOMUtils::isFosterablePosition( $range->start ); |
199 | $next = $range->start->nextSibling; |
200 | |
201 | // Detect empty content and handle them! |
202 | if ( WTUtils::isTplMarkerMeta( $range->start ) && $next === $endElem ) { |
203 | Assert::invariant( |
204 | $range->start === $range->startElem, |
205 | "Expected startElem to be same as range.start" |
206 | ); |
207 | if ( $startsInFosterablePosn ) { |
208 | // Expand range! |
209 | $range->start = $range->end = $range->start->parentNode; |
210 | $startsInFosterablePosn = false; |
211 | } else { |
212 | $emptySpan = $this->document->createElement( 'span' ); |
213 | $range->start->parentNode->insertBefore( $emptySpan, $endElem ); |
214 | } |
215 | |
216 | // Handle unwrappable content in fosterable positions |
217 | // and expand template range, if required. |
218 | // NOTE: Template marker meta tags are translated from comments |
219 | // *after* the DOM has been built which is why they can show up in |
220 | // fosterable positions in the DOM. |
221 | } elseif ( $startsInFosterablePosn && |
222 | ( !( $range->start instanceof Element ) || |
223 | ( WTUtils::isTplMarkerMeta( $range->start ) && |
224 | ( !( $next instanceof Element ) || WTUtils::isTplMarkerMeta( $next ) ) ) |
225 | ) |
226 | ) { |
227 | $rangeStartParent = $range->start->parentNode; |
228 | |
229 | // If we are in a table in a foster-element position, then all non-element |
230 | // nodes will be white-space and comments. Skip over all of them and find |
231 | // the first table content node. |
232 | $noWS = true; |
233 | $nodesToMigrate = []; |
234 | $newStart = $range->start; |
235 | $n = $range->start instanceof Element ? $next : $range->start; |
236 | while ( !( $n instanceof Element ) ) { |
237 | if ( $n instanceof Text ) { |
238 | $noWS = false; |
239 | } |
240 | $nodesToMigrate[] = $n; |
241 | $n = $n->nextSibling; |
242 | $newStart = $n; |
243 | } |
244 | |
245 | // As long as $newStart is a tr/tbody or we don't have whitespace |
246 | // migrate $nodesToMigrate into $newStart. Pushing whitespace into |
247 | // th/td/caption can change display semantics. |
248 | if ( $newStart && ( $noWS || isset( self::MAP_TBODY_TR[DOMCompat::nodeName( $newStart )] ) ) ) { |
249 | /** |
250 | * The point of the above loop is to ensure we're working |
251 | * with a Element if there is a $newStart. |
252 | * |
253 | * @var Element $newStart |
254 | */ |
255 | '@phan-var Element $newStart'; |
256 | $insertPosition = $newStart->firstChild; |
257 | foreach ( $nodesToMigrate as $n ) { |
258 | $newStart->insertBefore( $n, $insertPosition ); |
259 | } |
260 | $range->start = $newStart; |
261 | // Update dsr to point to original start |
262 | $this->updateDSRForFirstRangeNode( $range->start, $range->startElem ); |
263 | } else { |
264 | // If not, we are forced to expand the template range. |
265 | $range->start = $range->end = $rangeStartParent; |
266 | } |
267 | } |
268 | |
269 | // Ensure range->start is an element node since we want to |
270 | // add/update the data-parsoid attribute to it. |
271 | if ( !( $range->start instanceof Element ) ) { |
272 | $span = $this->document->createElement( 'span' ); |
273 | $range->start->parentNode->insertBefore( $span, $range->start ); |
274 | $span->appendChild( $range->start ); |
275 | $range->start = $span; |
276 | $this->updateDSRForFirstRangeNode( $range->start, $range->startElem ); |
277 | } |
278 | |
279 | $range->start = $this->getStartConsideringFosteredContent( $range->start ); |
280 | |
281 | // Use the negative test since it doesn't mark the range as flipped |
282 | // if range.start === range.end |
283 | if ( !DOMUtils::inSiblingOrder( $range->start, $range->end ) ) { |
284 | // In foster-parenting situations, the end-meta tag (and hence range.end) |
285 | // can show up before the range.start which would be the table itself. |
286 | // So, we record this info for later analysis. |
287 | $range->flipped = true; |
288 | } |
289 | |
290 | $this->env->log( |
291 | "trace/{$this->traceType}/findranges", |
292 | static function () use ( &$range ) { |
293 | $msg = ''; |
294 | $dp1 = DOMDataUtils::getDataParsoid( $range->start ); |
295 | $dp2 = DOMDataUtils::getDataParsoid( $range->end ); |
296 | $tmp1 = $dp1->tmp; |
297 | $tmp2 = $dp2->tmp; |
298 | $dp1->tmp = null; |
299 | $dp2->tmp = null; |
300 | $msg .= "\n----------------------------------------------"; |
301 | $msg .= "\nFound range : " . $range->id . '; flipped? ' . ( (string)$range->flipped ) . |
302 | '; offset: ' . $range->startOffset; |
303 | $msg .= "\nstart-elem : " . DOMCompat::getOuterHTML( $range->startElem ) . '; DP: ' . |
304 | PHPUtils::jsonEncode( DOMDataUtils::getDataParsoid( $range->startElem ) ); |
305 | $msg .= "\nend-elem : " . DOMCompat::getOuterHTML( $range->endElem ) . '; DP: ' . |
306 | PHPUtils::jsonEncode( DOMDataUtils::getDataParsoid( $range->endElem ) ); |
307 | $msg .= "\nstart : [TAG_ID " . ( $tmp1->tagId ?? '?' ) . ']: ' . |
308 | DOMCompat::getOuterHTML( $range->start ) . |
309 | '; DP: ' . PHPUtils::jsonEncode( $dp1 ); |
310 | $msg .= "\nend : [TAG_ID " . ( $tmp2->tagId ?? '?' ) . ']: ' . |
311 | DOMCompat::getOuterHTML( $range->end ) . |
312 | '; DP: ' . PHPUtils::jsonEncode( $dp2 ); |
313 | $msg .= "\n----------------------------------------------"; |
314 | $dp1->tmp = $tmp1; |
315 | $dp2->tmp = $tmp2; |
316 | return $msg; |
317 | } |
318 | ); |
319 | |
320 | return $range; |
321 | } |
322 | |
323 | /** |
324 | * Returns the current node if it's not just after fostered content, the first node |
325 | * of fostered content otherwise. |
326 | * @param Node $node |
327 | * @return Node |
328 | */ |
329 | protected function getStartConsideringFosteredContent( Node $node ): Node { |
330 | if ( DOMCompat::nodeName( $node ) === 'table' ) { |
331 | // If we have any fostered content, include it as well. |
332 | for ( $previousSibling = $node->previousSibling; |
333 | $previousSibling instanceof Element && |
334 | !empty( DOMDataUtils::getDataParsoid( $previousSibling )->fostered ); |
335 | $previousSibling = $node->previousSibling |
336 | ) { |
337 | $node = $previousSibling; |
338 | } |
339 | } |
340 | return $node; |
341 | } |
342 | |
343 | private static function stripStartMeta( Element $meta ): void { |
344 | if ( DOMCompat::nodeName( $meta ) === 'meta' ) { |
345 | $meta->parentNode->removeChild( $meta ); |
346 | } else { |
347 | // Remove mw:* from the typeof. |
348 | $type = DOMCompat::getAttribute( $meta, 'typeof' ); |
349 | if ( $type !== null ) { |
350 | $type = preg_replace( '/(?:^|\s)mw:[^\/]*(\/\S+|(?=$|\s))/D', '', $type ); |
351 | $meta->setAttribute( 'typeof', $type ); |
352 | } |
353 | } |
354 | } |
355 | |
356 | private static function findToplevelEnclosingRange( |
357 | array $nestingInfo, ?string $startId |
358 | ): ?string { |
359 | // Walk up the implicit nesting tree to find the |
360 | // top-level range within which rId is nested. |
361 | // No cycles can exist since they have been suppressed. |
362 | $visited = []; |
363 | $rId = $startId; |
364 | while ( isset( $nestingInfo[$rId] ) ) { |
365 | if ( isset( $visited[$rId] ) ) { |
366 | throw new Error( "Found a cycle in tpl-range nesting where there shouldn't have been one." ); |
367 | } |
368 | $visited[$rId] = true; |
369 | $rId = $nestingInfo[$rId]; |
370 | } |
371 | return $rId; |
372 | } |
373 | |
374 | /** |
375 | * Add a template to $this->compoundTpls |
376 | * |
377 | * @param string $compoundTplId |
378 | * @param DOMRangeInfo $range |
379 | * @param TemplateInfo $templateInfo |
380 | */ |
381 | private function recordTemplateInfo( |
382 | string $compoundTplId, DOMRangeInfo $range, TemplateInfo $templateInfo |
383 | ): void { |
384 | $this->compoundTpls[$compoundTplId] ??= []; |
385 | |
386 | // Record template args info along with any intervening wikitext |
387 | // between templates that are part of the same compound structure. |
388 | /** @var array $tplArray */ |
389 | $tplArray = &$this->compoundTpls[$compoundTplId]; |
390 | $dp = DOMDataUtils::getDataParsoid( $range->startElem ); |
391 | $dsr = $dp->dsr; |
392 | |
393 | if ( count( $tplArray ) > 0 ) { |
394 | $prevTplInfo = PHPUtils::lastItem( $tplArray ); |
395 | if ( $prevTplInfo->dsr->end < $dsr->start ) { |
396 | $width = $dsr->start - $prevTplInfo->dsr->end; |
397 | $tplArray[] = PHPUtils::safeSubstr( |
398 | $this->frame->getSrcText(), $prevTplInfo->dsr->end, $width ); |
399 | } |
400 | } |
401 | |
402 | if ( !empty( $dp->unwrappedWT ) ) { |
403 | $tplArray[] = (string)$dp->unwrappedWT; |
404 | } |
405 | |
406 | // Get rid of src-offsets since they aren't needed anymore. |
407 | foreach ( $templateInfo->paramInfos as $pi ) { |
408 | $pi->srcOffsets = null; |
409 | } |
410 | $tplArray[] = new CompoundTemplateInfo( |
411 | $dsr, $templateInfo, DOMUtils::hasTypeOf( $range->startElem, 'mw:Param' ) |
412 | ); |
413 | } |
414 | |
415 | /** |
416 | * Determine whether adding the given range would introduce a cycle in the |
417 | * subsumedRanges graph. |
418 | * |
419 | * Nesting cycles with multiple ranges can show up because of foster |
420 | * parenting scenarios if they are not detected and suppressed. |
421 | * |
422 | * @param string $start The ID of the new range |
423 | * @param string $end The ID of the other range |
424 | * @param string[] $subsumedRanges The subsumed ranges graph, encoded as an |
425 | * array in which each element maps one string range ID to another range ID |
426 | * @return bool |
427 | */ |
428 | private static function introducesCycle( string $start, string $end, array $subsumedRanges ): bool { |
429 | $visited = [ $start => true ]; |
430 | $elt = $subsumedRanges[$end] ?? null; |
431 | while ( $elt ) { |
432 | if ( !empty( $visited[$elt] ) ) { |
433 | return true; |
434 | } |
435 | $elt = $subsumedRanges[$elt] ?? null; |
436 | } |
437 | return false; |
438 | } |
439 | |
440 | /** |
441 | * Determine whether DOM ranges overlap. |
442 | * |
443 | * The `inSiblingOrder` check here is sufficient to determine overlaps |
444 | * because the algorithm in `findWrappableTemplateRanges` will put the |
445 | * start/end elements for intersecting ranges on the same plane and prev/ |
446 | * curr are in textual order (which translates to dom order). |
447 | * |
448 | * @param DOMRangeInfo $prev |
449 | * @param DOMRangeInfo $curr |
450 | * @return bool |
451 | */ |
452 | private static function rangesOverlap( DOMRangeInfo $prev, DOMRangeInfo $curr ): bool { |
453 | $prevEnd = ( !$prev->flipped ) ? $prev->end : $prev->start; |
454 | $currStart = ( !$curr->flipped ) ? $curr->start : $curr->end; |
455 | return DOMUtils::inSiblingOrder( $currStart, $prevEnd ); |
456 | } |
457 | |
458 | /** |
459 | * Identify the elements of $tplRanges that are non-overlapping. |
460 | * Record template info in $this->compoundTpls as we go. |
461 | * |
462 | * @param Node $docRoot |
463 | * @param DOMRangeInfo[] $tplRanges The potentially overlapping ranges |
464 | * @return DOMRangeInfo[] The non-overlapping ranges |
465 | */ |
466 | public function findTopLevelNonOverlappingRanges( Node $docRoot, array $tplRanges ): array { |
467 | // For each node, assign an attribute that is a record of all |
468 | // tpl ranges it belongs to at the top-level. |
469 | foreach ( $tplRanges as $r ) { |
470 | $n = !$r->flipped ? $r->start : $r->end; |
471 | $e = !$r->flipped ? $r->end : $r->start; |
472 | |
473 | while ( $n ) { |
474 | if ( $n instanceof Element ) { |
475 | $this->addNodeRange( $n, $r ); |
476 | // Done |
477 | if ( $n === $e ) { |
478 | break; |
479 | } |
480 | } |
481 | |
482 | $n = $n->nextSibling; |
483 | } |
484 | } |
485 | |
486 | // In the first pass over `numRanges` below, `subsumedRanges` is used to |
487 | // record purely the nested ranges. However, in the second pass, we also |
488 | // add the relationships between overlapping ranges so that |
489 | // `findToplevelEnclosingRange` can use that information to add `argInfo` |
490 | // to the right `compoundTpls`. This scenario can come up when you have |
491 | // three ranges, 1 intersecting with 2 but not 3, and 3 nested in 2. |
492 | $subsumedRanges = []; |
493 | |
494 | // For each range r:(s, e), walk up from s --> docRoot and if any of |
495 | // these nodes have tpl-ranges (besides r itself) assigned to them, |
496 | // then r is nested in those other templates and can be ignored. |
497 | foreach ( $tplRanges as $r ) { |
498 | $n = $r->start; |
499 | |
500 | while ( $n !== $docRoot ) { |
501 | $ranges = $this->getNodeRanges( $n ); |
502 | if ( $ranges ) { |
503 | if ( $n !== $r->start ) { |
504 | // 'r' is nested for sure |
505 | // Record the outermost range in which 'r' is nested. |
506 | $outermostId = null; |
507 | $outermostOffset = null; |
508 | foreach ( $ranges as $rangeId => $range ) { |
509 | if ( $outermostId === null |
510 | || $range->startOffset < $outermostOffset |
511 | ) { |
512 | $outermostId = $rangeId; |
513 | $outermostOffset = $range->startOffset; |
514 | } |
515 | } |
516 | $subsumedRanges[$r->id] = (string)$outermostId; |
517 | break; |
518 | } else { |
519 | // n === r.start |
520 | // |
521 | // We have to make sure this is not an overlap scenario. |
522 | // Find the ranges that r.start and r.end belong to and |
523 | // compute their intersection. If this intersection has |
524 | // another tpl range besides r itself, we have a winner! |
525 | // |
526 | // The code below does the above check efficiently. |
527 | $eTpls = $this->getNodeRanges( $r->end ); |
528 | $foundNesting = false; |
529 | |
530 | foreach ( $ranges as $otherId => $other ) { |
531 | // - Don't record nesting cycles. |
532 | // - Record the outermost range in which 'r' is nested in. |
533 | if ( $otherId !== $r->id && |
534 | !empty( $eTpls[$otherId] ) && |
535 | // When we have identical ranges, pick the range with |
536 | // the larger offset to be subsumed. |
537 | ( $r->start !== $other->start || |
538 | $r->end !== $other->end || |
539 | $other->startOffset < $r->startOffset |
540 | ) && |
541 | !self::introducesCycle( $r->id, (string)$otherId, $subsumedRanges ) |
542 | ) { |
543 | $foundNesting = true; |
544 | if ( !isset( $subsumedRanges[$r->id] ) || |
545 | $other->startOffset < $ranges[$subsumedRanges[$r->id]]->startOffset |
546 | ) { |
547 | $subsumedRanges[$r->id] = (string)$otherId; |
548 | } |
549 | } |
550 | } |
551 | |
552 | if ( $foundNesting ) { |
553 | // 'r' is nested |
554 | break; |
555 | } |
556 | } |
557 | } |
558 | |
559 | // Move up |
560 | $n = $n->parentNode; |
561 | } |
562 | } |
563 | |
564 | // Sort by start offset in source wikitext |
565 | usort( $tplRanges, static function ( $r1, $r2 ) { |
566 | return $r1->startOffset - $r2->startOffset; |
567 | } ); |
568 | |
569 | // Since the tpl ranges are sorted in textual order (by start offset), |
570 | // it is sufficient to only look at the most recent template to see |
571 | // if the current one overlaps with the previous one. |
572 | // |
573 | // This works because we've already identify nested ranges and can ignore them. |
574 | |
575 | $newRanges = []; |
576 | $prev = null; |
577 | |
578 | foreach ( $tplRanges as $r ) { |
579 | $endTagToRemove = null; |
580 | $startTagToStrip = null; |
581 | |
582 | // Extract tplargInfo |
583 | $tmp = DOMDataUtils::getDataParsoid( $r->startElem )->getTemp(); |
584 | $templateInfo = $tmp->tplarginfo ?? null; |
585 | |
586 | $this->verifyTplInfoExpectation( $templateInfo, $tmp ); |
587 | |
588 | $this->env->log( "trace/{$this->traceType}/merge", static function () use ( &$DOMDataUtils, &$r ) { |
589 | $msg = ''; |
590 | $dp1 = DOMDataUtils::getDataParsoid( $r->start ); |
591 | $dp2 = DOMDataUtils::getDataParsoid( $r->end ); |
592 | $tmp1 = $dp1->tmp; |
593 | $tmp2 = $dp2->tmp; |
594 | $dp1->tmp = null; |
595 | $dp2->tmp = null; |
596 | $msg .= "\n##############################################"; |
597 | $msg .= "\nrange " . $r->id . '; r-start-elem: ' . DOMCompat::getOuterHTML( $r->startElem ) . |
598 | '; DP: ' . PHPUtils::jsonEncode( DOMDataUtils::getDataParsoid( $r->startElem ) ); |
599 | $msg .= "\nrange " . $r->id . '; r-end-elem: ' . DOMCompat::getOuterHTML( $r->endElem ) . |
600 | '; DP: ' . PHPUtils::jsonEncode( DOMDataUtils::getDataParsoid( $r->endElem ) ); |
601 | $msg .= "\nrange " . $r->id . '; r-start: [TAG_ID ' . ( $tmp1->tagId ?? '?' ) . ']: ' . |
602 | DOMCompat::getOuterHTML( $r->start ) . '; DP: ' . PHPUtils::jsonEncode( $dp1 ); |
603 | $msg .= "\nrange " . $r->id . '; r-end: [TAG_ID ' . ( $tmp2->tagId ?? '?' ) . ']: ' . |
604 | DOMCompat::getOuterHTML( $r->end ) . '; DP: ' . PHPUtils::jsonEncode( $dp2 ); |
605 | $msg .= "\n----------------------------------------------"; |
606 | $dp1->tmp = $tmp1; |
607 | $dp2->tmp = $tmp2; |
608 | return $msg; |
609 | } ); |
610 | |
611 | $enclosingRangeId = self::findToplevelEnclosingRange( |
612 | $subsumedRanges, |
613 | $subsumedRanges[$r->id] ?? null |
614 | ); |
615 | if ( $enclosingRangeId ) { |
616 | $this->env->log( "trace/{$this->traceType}/merge", '--nested in ', $enclosingRangeId, '--' ); |
617 | |
618 | // Nested -- ignore r |
619 | $startTagToStrip = $r->startElem; |
620 | $endTagToRemove = $r->endElem; |
621 | if ( $templateInfo ) { |
622 | // 'r' is nested in 'enclosingRange' at the top-level |
623 | // So, enclosingRange gets r's argInfo |
624 | $this->recordTemplateInfo( $enclosingRangeId, $r, $templateInfo ); |
625 | } |
626 | } elseif ( $prev && self::rangesOverlap( $prev, $r ) ) { |
627 | // In the common case, in overlapping scenarios, r.start is |
628 | // identical to prev.end. However, in fostered content scenarios, |
629 | // there can true overlap of the ranges. |
630 | $this->env->log( "trace/{$this->traceType}/merge", '--overlapped--' ); |
631 | |
632 | // See comment above, where `subsumedRanges` is defined. |
633 | $subsumedRanges[$r->id] = $prev->id; |
634 | |
635 | // Overlapping ranges. |
636 | // r is the regular kind |
637 | // Merge r with prev |
638 | |
639 | // Note that if a table comes from a template, a foster box isn't |
640 | // emitted so the enclosure isn't guaranteed. In pathological |
641 | // cases, like where the table end tag isn't emitted, we can still |
642 | // end up with flipped ranges if the template end marker gets into |
643 | // a fosterable position (which can still happen despite being |
644 | // emitted as a comment). |
645 | Assert::invariant( !$r->flipped, |
646 | 'Flipped range should have been enclosed.' |
647 | ); |
648 | |
649 | $startTagToStrip = $r->startElem; |
650 | $endTagToRemove = $prev->endElem; |
651 | |
652 | $prev->end = $r->end; |
653 | $prev->endElem = $r->endElem; |
654 | if ( WTUtils::isMarkerAnnotation( $r->endElem ) ) { |
655 | $endDataMw = DOMDataUtils::getDataMw( $r->endElem ); |
656 | $endDataMw->rangeId = $r->id; |
657 | $prev->extendedByOverlapMerge = true; |
658 | } |
659 | |
660 | // Update compoundTplInfo |
661 | if ( $templateInfo ) { |
662 | $this->recordTemplateInfo( $prev->id, $r, $templateInfo ); |
663 | } |
664 | } else { |
665 | $this->env->log( "trace/{$this->traceType}/merge", '--normal--' ); |
666 | |
667 | // Default -- no overlap |
668 | // Emit the merged range |
669 | $newRanges[] = $r; |
670 | $prev = $r; |
671 | |
672 | // Update compoundTpls |
673 | if ( $templateInfo ) { |
674 | $this->recordTemplateInfo( $r->id, $r, $templateInfo ); |
675 | } |
676 | } |
677 | |
678 | if ( $endTagToRemove ) { |
679 | // Remove start and end meta-tags |
680 | // Not necessary to remove the start tag, but good to cleanup |
681 | $endTagToRemove->parentNode->removeChild( $endTagToRemove ); |
682 | self::stripStartMeta( $startTagToStrip ); |
683 | } |
684 | } |
685 | |
686 | return $newRanges; |
687 | } |
688 | |
689 | /** |
690 | * Note that the case of nodeName varies with DOM implementation. This |
691 | * method currently forces the name nodeName to uppercase. In the future |
692 | * we can/should switch to using the "native" case of the DOM |
693 | * implementation; we do a case-insensitive match (by converting the result |
694 | * to the "native" case of the DOM implementation) in |
695 | * EncapsulatedContentHandler when this value is used. |
696 | * @param DOMRangeInfo $range |
697 | * @return string|null nodeName with an optional "_$stx" suffix. |
698 | */ |
699 | private static function findFirstTemplatedNode( DOMRangeInfo $range ): ?string { |
700 | $firstNode = $range->start; |
701 | |
702 | // Skip tpl marker meta |
703 | if ( WTUtils::isTplMarkerMeta( $firstNode ) ) { |
704 | $firstNode = $firstNode->nextSibling; |
705 | } |
706 | |
707 | // Walk past fostered nodes since they came from within a table |
708 | // Note that this is not foolproof because in some scenarios, |
709 | // fostered content is not marked up. Ex: when a table is templated, |
710 | // and content from the table is fostered. |
711 | $dp = DOMDataUtils::getDataParsoid( $firstNode ); |
712 | while ( !empty( $dp->fostered ) ) { |
713 | $firstNode = $firstNode->nextSibling; |
714 | /** @var Element $firstNode */ |
715 | DOMUtils::assertElt( $firstNode ); |
716 | $dp = DOMDataUtils::getDataParsoid( $firstNode ); |
717 | } |
718 | |
719 | // FIXME: It is harder to use META as a node name since this is a generic |
720 | // placeholder for a whole bunch of things each of which has its own |
721 | // newline constraint requirements. So, for now, I am skipping that |
722 | // can of worms to prevent confusing the serializer with an overloaded |
723 | // tag name. |
724 | if ( DOMCompat::nodeName( $firstNode ) === 'meta' ) { |
725 | return null; |
726 | } |
727 | |
728 | // FIXME spec-compliant values would be upper-case, this is just a workaround |
729 | // for current PHP DOM implementation and could be removed in the future |
730 | // See discussion in the method comment above. |
731 | $nodeName = mb_strtoupper( DOMCompat::nodeName( $firstNode ), "UTF-8" ); |
732 | |
733 | return !empty( $dp->stx ) ? $nodeName . '_' . $dp->stx : $nodeName; |
734 | } |
735 | |
736 | /** |
737 | * Encapsulation requires adding about attributes on the top-level |
738 | * nodes of the range. This requires them to all be Elements. |
739 | * |
740 | * @param DOMRangeInfo $range |
741 | */ |
742 | private function ensureElementsInRange( DOMRangeInfo $range ): void { |
743 | $n = $range->start; |
744 | $e = $range->end; |
745 | $about = DOMCompat::getAttribute( $range->startElem, 'about' ); |
746 | while ( $n ) { |
747 | $next = $n->nextSibling; |
748 | if ( $n instanceof Element ) { |
749 | $n->setAttribute( 'about', $about ); |
750 | } elseif ( DOMUtils::isFosterablePosition( $n ) ) { |
751 | // NOTE: There cannot be any non-IEW text in fosterable position |
752 | // since the HTML tree builder would already have fostered it out. |
753 | // So, any non-element node found here is safe to delete since: |
754 | // (a) this has no rendering output impact, and |
755 | // (b) data-mw captures template output => we don't need |
756 | // to preserve this for html2wt either. Removing this |
757 | // lets us preserve DOM range continuity. |
758 | $n->parentNode->removeChild( $n ); |
759 | } else { |
760 | // Add a span wrapper to let us add about-ids to represent |
761 | // the DOM range as a contiguous chain of DOM nodes. |
762 | $span = $this->document->createElement( 'span' ); |
763 | $span->setAttribute( 'about', $about ); |
764 | $dp = new DataParsoid; |
765 | $dp->setTempFlag( TempData::WRAPPER ); |
766 | DOMDataUtils::setDataParsoid( $span, $dp ); |
767 | $n->parentNode->replaceChild( $span, $n ); |
768 | $span->appendChild( $n ); |
769 | $n = $span; |
770 | } |
771 | |
772 | if ( $n === $e ) { |
773 | break; |
774 | } |
775 | |
776 | $n = $next; |
777 | } |
778 | } |
779 | |
780 | /** |
781 | * Find the first element to be encapsulated. |
782 | * Skip past marker metas and non-elements (which will all be IEW |
783 | * in fosterable positions in a table). |
784 | * |
785 | * @param DOMRangeInfo $range |
786 | * @return Element |
787 | */ |
788 | private static function findEncapTarget( DOMRangeInfo $range ): Element { |
789 | $encapTgt = $range->start; |
790 | '@phan-var Node $encapTgt'; |
791 | |
792 | // Skip template-marker meta-tags. |
793 | while ( WTUtils::isTplMarkerMeta( $encapTgt ) || |
794 | !( $encapTgt instanceof Element ) |
795 | ) { |
796 | // Detect unwrappable template and bail out early. |
797 | if ( $encapTgt === $range->end || |
798 | ( !( $encapTgt instanceof Element ) && |
799 | !DOMUtils::isFosterablePosition( $encapTgt ) |
800 | ) |
801 | ) { |
802 | throw new Error( 'Cannot encapsulate transclusion. Start=' . |
803 | DOMCompat::getOuterHTML( $range->startElem ) ); |
804 | } |
805 | $encapTgt = $encapTgt->nextSibling; |
806 | } |
807 | |
808 | '@phan-var Element $encapTgt'; |
809 | return $encapTgt; |
810 | } |
811 | |
812 | /** |
813 | * Add markers to the DOM around the non-overlapping ranges. |
814 | * |
815 | * @param DOMRangeInfo[] $nonOverlappingRanges |
816 | */ |
817 | private function encapsulateTemplates( array $nonOverlappingRanges ): void { |
818 | foreach ( $nonOverlappingRanges as $i => $range ) { |
819 | |
820 | // We should never have flipped overlapping ranges, and indeed that's |
821 | // asserted in `findTopLevelNonOverlappingRanges`. Flipping results |
822 | // in either completely nested ranges, or non-intersecting ranges. |
823 | // |
824 | // If the table causing the fostering is not transcluded, we emit a |
825 | // foster box and wrap the whole table+fb in metas, producing nested |
826 | // ranges. For ex, |
827 | // |
828 | // <table> |
829 | // {{1x|<div>}} |
830 | // |
831 | // The tricky part is when the table *is* transcluded, and we omit the |
832 | // foster box. The common case (for some definition of common) might |
833 | // be like, |
834 | // |
835 | // {{1x|<table>}} |
836 | // {{1x|<div>}} |
837 | // |
838 | // Here, #mwt1 leaves a table open and the end meta from #mwt2 is |
839 | // fostered, since it gets closed into the div. The range for #mwt1 |
840 | // is the entire table, which thankfully contains #mwt2, so we still |
841 | // have the expected entire nesting. Any tricks to extend the range |
842 | // of #mwt2 beyond the table (so that we have an overlapping range) will |
843 | // inevitably result in the end meta not being fostered, and we avoid |
844 | // this situation altogether. |
845 | // |
846 | // The very edgy case is as follows, |
847 | // |
848 | // {{1x|<table><div>}}</div> |
849 | // {{1x|<div>}} |
850 | // |
851 | // where both end metas are fostered. Ignoring that we don't even |
852 | // roundtrip the first transclusion properly on its own, here we have |
853 | // a flipped range where, since the end meta for the first range was |
854 | // also fostered, the ranges still don't overlap. |
855 | |
856 | // FIXME: The code below needs to be aware of flipped ranges. |
857 | |
858 | $this->ensureElementsInRange( $range ); |
859 | |
860 | $tplArray = $this->compoundTpls[$range->id] ?? null; |
861 | Assert::invariant( (bool)$tplArray, 'No parts for template range!' ); |
862 | |
863 | $encapTgt = self::findEncapTarget( $range ); |
864 | $encapValid = false; |
865 | $encapDP = DOMDataUtils::getDataParsoid( $encapTgt ); |
866 | |
867 | // Update type-of (always even if tpl-encap below will fail). |
868 | // This ensures that VE will still "edit-protect" this template |
869 | // and not allow its content to be edited directly. |
870 | $startElem = $range->startElem; |
871 | if ( $startElem !== $encapTgt ) { |
872 | $t1 = DOMCompat::getAttribute( $startElem, 'typeof' ); |
873 | if ( $t1 !== null ) { |
874 | foreach ( array_reverse( explode( ' ', $t1 ) ) as $t ) { |
875 | DOMUtils::addTypeOf( $encapTgt, $t, true ); |
876 | } |
877 | } |
878 | } |
879 | |
880 | /* ---------------------------------------------------------------- |
881 | * We'll attempt to update dp1.dsr to reflect the entire range of |
882 | * the template. This relies on a couple observations: |
883 | * |
884 | * 1. In the common case, dp2.dsr->end will be > dp1.dsr->end |
885 | * If so, new range = dp1.dsr->start, dp2.dsr->end |
886 | * |
887 | * 2. But, foster parenting can complicate this when range.end is a table |
888 | * and range.start has been fostered out of the table (range.end). |
889 | * But, we need to verify this assumption. |
890 | * |
891 | * 2a. If dp2.dsr->start is smaller than dp1.dsr->start, this is a |
892 | * confirmed case of range.start being fostered out of range.end. |
893 | * |
894 | * 2b. If dp2.dsr->start is unknown, we rely on fostered flag on |
895 | * range.start, if any. |
896 | * ---------------------------------------------------------------- */ |
897 | $dp1 = DOMDataUtils::getDataParsoid( $range->start ); |
898 | $dp1DSR = isset( $dp1->dsr ) ? clone $dp1->dsr : null; |
899 | $dp2DSR = self::getRangeEndDSR( $range ); |
900 | |
901 | if ( $dp1DSR ) { |
902 | if ( $dp2DSR ) { |
903 | // Case 1. above |
904 | if ( $dp2DSR->end > $dp1DSR->end ) { |
905 | $dp1DSR->end = $dp2DSR->end; |
906 | } |
907 | |
908 | // Case 2. above |
909 | $endDsr = $dp2DSR->start; |
910 | if ( DOMCompat::nodeName( $range->end ) === 'table' && |
911 | $endDsr !== null && |
912 | ( $endDsr < $dp1DSR->start || !empty( $dp1->fostered ) ) |
913 | ) { |
914 | $dp1DSR->start = $endDsr; |
915 | } |
916 | } |
917 | |
918 | // encapsulation possible only if dp1.dsr is valid |
919 | $encapValid = Utils::isValidDSR( $dp1DSR ) && |
920 | $dp1DSR->end >= $dp1DSR->start; |
921 | } |
922 | |
923 | if ( $encapValid ) { |
924 | // Find transclusion info from the array (skip past a wikitext element) |
925 | /** @var CompoundTemplateInfo $firstTplInfo */ |
926 | $firstTplInfo = is_string( $tplArray[0] ) ? $tplArray[1] : $tplArray[0]; |
927 | |
928 | // Add any leading wikitext |
929 | if ( $firstTplInfo->dsr->start > $dp1DSR->start ) { |
930 | // This gap in dsr (between the final encapsulated content, and the |
931 | // content that actually came from a template) is indicative of this |
932 | // being a mixed-template-content-block and/or multi-template-content-block |
933 | // scenario. |
934 | // |
935 | // In this case, record the name of the first node in the encapsulated |
936 | // content. During html -> wt serialization, newline constraints for |
937 | // this entire block has to be determined relative to this node. |
938 | $ftn = self::findFirstTemplatedNode( $range ); |
939 | if ( $ftn !== null ) { |
940 | $encapDP->firstWikitextNode = $ftn; |
941 | } |
942 | $width = $firstTplInfo->dsr->start - $dp1DSR->start; |
943 | array_unshift( |
944 | $tplArray, |
945 | PHPUtils::safeSubstr( $this->frame->getSrcText(), $dp1DSR->start, $width ) |
946 | ); |
947 | } |
948 | |
949 | // Add any trailing wikitext |
950 | /** @var CompoundTemplateInfo $lastTplInfo */ |
951 | $lastTplInfo = PHPUtils::lastItem( $tplArray ); |
952 | if ( $lastTplInfo->dsr->end < $dp1DSR->end ) { |
953 | $width = $dp1DSR->end - $lastTplInfo->dsr->end; |
954 | $tplArray[] = PHPUtils::safeSubstr( $this->frame->getSrcText(), $lastTplInfo->dsr->end, $width ); |
955 | } |
956 | |
957 | // Map the array of { dsr: .. , args: .. } objects to just the args property |
958 | $infoIndex = 0; |
959 | $parts = []; |
960 | $pi = []; |
961 | foreach ( $tplArray as $a ) { |
962 | if ( is_string( $a ) ) { |
963 | $parts[] = $a; |
964 | } elseif ( $a instanceof CompoundTemplateInfo ) { |
965 | // Remember the position of the transclusion relative |
966 | // to other transclusions. Should match the index of |
967 | // the corresponding private metadata in $templateInfos. |
968 | $a->info->i = $infoIndex++; |
969 | $a->info->type = 'template'; |
970 | if ( $a->isParam ) { |
971 | $a->info->type = 'templatearg'; |
972 | } elseif ( $a->info->func ) { |
973 | $a->info->type = $this->v3PFOutput ? |
974 | 'v3parserfunction' : 'parserfunction'; |
975 | } |
976 | $parts[] = $a->info; |
977 | // FIXME: we throw away the array keys and rebuild them |
978 | // again in WikitextSerializer |
979 | $pi[] = array_values( $a->info->paramInfos ); |
980 | } |
981 | } |
982 | |
983 | if ( !is_string( $parts[0] ) && $parts[0]->type === 'v3parserfunction' ) { |
984 | DOMUtils::addTypeOf( $encapTgt, 'mw:ParserFunction', false ); |
985 | } |
986 | |
987 | // Set up dsr->start, dsr->end, and data-mw on the target node |
988 | // Avoid clobbering existing (ex: extension) data-mw information (T214241) |
989 | $encapDataMw = DOMDataUtils::getDataMw( $encapTgt ); |
990 | $encapDataMw->parts = $parts; |
991 | DOMDataUtils::setDataMw( $encapTgt, $encapDataMw ); |
992 | $encapDP->pi = $pi; |
993 | |
994 | // Special case when mixed-attribute-and-content templates are |
995 | // involved. This information is reliable and comes from the |
996 | // AttributeExpander and gets around the problem of unmarked |
997 | // fostered content that findFirstTemplatedNode runs into. |
998 | $firstWikitextNode = DOMDataUtils::getDataParsoid( |
999 | $range->startElem |
1000 | )->firstWikitextNode ?? null; |
1001 | if ( empty( $encapDP->firstWikitextNode ) && $firstWikitextNode ) { |
1002 | $encapDP->firstWikitextNode = $firstWikitextNode; |
1003 | } |
1004 | } else { |
1005 | $errors = [ 'Do not have necessary info. to encapsulate Tpl: ' . $i ]; |
1006 | $errors[] = 'Start Elt : ' . DOMCompat::getOuterHTML( $startElem ); |
1007 | $errors[] = 'End Elt : ' . DOMCompat::getOuterHTML( $range->endElem ); |
1008 | $errors[] = 'Start DSR : ' . PHPUtils::jsonEncode( $dp1DSR ?? 'no-start-dsr' ); |
1009 | $errors[] = 'End DSR : ' . PHPUtils::jsonEncode( $dp2DSR ?? [] ); |
1010 | $this->env->log( 'error', implode( "\n", $errors ) ); |
1011 | } |
1012 | |
1013 | // Make DSR range zero-width for fostered templates after |
1014 | // setting up data-mw. However, since template encapsulation |
1015 | // sometimes captures both fostered content as well as the table |
1016 | // from which it was fostered from, in those scenarios, we should |
1017 | // leave DSR info untouched. |
1018 | // |
1019 | // SSS FIXME: |
1020 | // 1. Should we remove the fostered flag from the entire |
1021 | // encapsulated block if we dont set dsr width range to zero |
1022 | // since only part of the block is fostered, not the entire |
1023 | // encapsulated block? |
1024 | // |
1025 | // 2. In both cases, should we mark these uneditable by adding |
1026 | // mw:Placeholder to the typeof? |
1027 | if ( !empty( $dp1->fostered ) ) { |
1028 | $encapDataMw = DOMDataUtils::getDataMw( $encapTgt ); |
1029 | if ( !$encapDataMw || |
1030 | !$encapDataMw->parts || |
1031 | count( $encapDataMw->parts ) === 1 |
1032 | ) { |
1033 | $dp1DSR->end = $dp1DSR->start; |
1034 | } |
1035 | } |
1036 | |
1037 | // Update DSR after fostering-related fixes are done. |
1038 | if ( $encapValid ) { |
1039 | // encapInfo.dp points to DOMDataUtils.getDataParsoid(encapInfo.target) |
1040 | // and all updates below update properties in that object tree. |
1041 | if ( empty( $encapDP->dsr ) ) { |
1042 | $encapDP->dsr = $dp1DSR; |
1043 | } else { |
1044 | $encapDP->dsr->start = $dp1DSR->start; |
1045 | $encapDP->dsr->end = $dp1DSR->end; |
1046 | } |
1047 | $encapDP->src = $encapDP->dsr->substr( |
1048 | $this->frame->getSrcText() |
1049 | ); |
1050 | } |
1051 | |
1052 | // Remove startElem (=range.startElem) if a meta. If a meta, |
1053 | // it is guaranteed to be a marker meta added to mark the start |
1054 | // of the template. |
1055 | if ( WTUtils::isTplMarkerMeta( $startElem ) ) { |
1056 | $startElem->parentNode->removeChild( $startElem ); |
1057 | } |
1058 | |
1059 | $range->endElem->parentNode->removeChild( $range->endElem ); |
1060 | } |
1061 | } |
1062 | |
1063 | /** |
1064 | * Attach a range to a node. |
1065 | * |
1066 | * @param Element $node |
1067 | * @param DOMRangeInfo $range |
1068 | */ |
1069 | private function addNodeRange( Element $node, DOMRangeInfo $range ): void { |
1070 | // With the native DOM extension, normally you assume that DOMNode |
1071 | // objects are temporary -- you get a new DOMNode every time you |
1072 | // traverse the DOM. But by retaining a reference in the |
1073 | // SplObjectStorage, we ensure that the DOMNode object stays live while |
1074 | // the pass is active. Then its address can be used as an index. |
1075 | if ( !isset( $this->nodeRanges[$node] ) ) { |
1076 | // We have to use an object as the data because |
1077 | // SplObjectStorage::offsetGet() does not provide an lval. |
1078 | $this->nodeRanges[$node] = new DOMRangeInfoArray; |
1079 | } |
1080 | $this->nodeRanges[$node]->ranges[$range->id] = $range; |
1081 | } |
1082 | |
1083 | /** |
1084 | * Get the ranges attached to this node, indexed by range ID. |
1085 | * |
1086 | * @param Element $node |
1087 | * @return DOMRangeInfo[]|null |
1088 | */ |
1089 | private function getNodeRanges( Element $node ): ?array { |
1090 | return $this->nodeRanges[$node]->ranges ?? null; |
1091 | } |
1092 | |
1093 | /** |
1094 | * Recursively walk the DOM tree. Find wrappable template ranges and return them. |
1095 | * |
1096 | * @param Node $rootNode |
1097 | * @return DOMRangeInfo[] |
1098 | */ |
1099 | protected function findWrappableMetaRanges( Node $rootNode ): array { |
1100 | $tpls = []; |
1101 | $tplRanges = []; |
1102 | $this->findWrappableTemplateRangesRecursive( $rootNode, $tpls, $tplRanges ); |
1103 | return $tplRanges; |
1104 | } |
1105 | |
1106 | /** |
1107 | * Recursive helper for findWrappableTemplateRanges() |
1108 | * |
1109 | * @param Node $rootNode |
1110 | * @param ElementRange[] &$tpls Template start and end elements by ID |
1111 | * @param DOMRangeInfo[] &$tplRanges Template range info |
1112 | */ |
1113 | private function findWrappableTemplateRangesRecursive( |
1114 | Node $rootNode, array &$tpls, array &$tplRanges |
1115 | ): void { |
1116 | $elem = $rootNode->firstChild; |
1117 | |
1118 | while ( $elem ) { |
1119 | // get the next sibling before doing anything since |
1120 | // we may delete elem as part of encapsulation |
1121 | $nextSibling = $elem->nextSibling; |
1122 | |
1123 | if ( $elem instanceof Element ) { |
1124 | $metaType = $this->matchMetaType( $elem ); |
1125 | |
1126 | // Ignore templates without tsr. |
1127 | // |
1128 | // These are definitely nested in other templates / extensions |
1129 | // and need not be wrapped themselves since they |
1130 | // can never be edited directly. |
1131 | // |
1132 | // NOTE: We are only testing for tsr presence on the start-elem |
1133 | // because wikitext errors can lead to parse failures and no tsr |
1134 | // on end-meta-tags. |
1135 | // |
1136 | // Ex: "<ref>{{1x|bar}}<!--bad-></ref>" |
1137 | if ( $metaType !== null && |
1138 | ( !empty( DOMDataUtils::getDataParsoid( $elem )->tsr ) || |
1139 | str_ends_with( $metaType, '/End' ) |
1140 | ) |
1141 | ) { |
1142 | $about = $this->getRangeId( $elem ); |
1143 | $tpl = $tpls[$about] ?? null; |
1144 | // Is this a start marker? |
1145 | if ( !str_ends_with( $metaType, '/End' ) ) { |
1146 | if ( $tpl ) { |
1147 | $tpl->startElem = $elem; |
1148 | // content or end marker existed already |
1149 | if ( !empty( $tpl->endElem ) ) { |
1150 | // End marker was foster-parented. |
1151 | // Found actual start tag. |
1152 | $tplRanges[] = $this->getDOMRange( |
1153 | $elem, $tpl->endElem, $tpl->endElem ); |
1154 | } else { |
1155 | // should not happen! |
1156 | throw new UnreachableException( "start found after content for $about." ); |
1157 | } |
1158 | } else { |
1159 | $tpl = new ElementRange; |
1160 | $tpl->startElem = $elem; |
1161 | $tpls[$about] = $tpl; |
1162 | } |
1163 | } else { |
1164 | // elem is the end-meta tag |
1165 | if ( $tpl ) { |
1166 | /* ------------------------------------------------------------ |
1167 | * Special case: In some cases, the entire template content can |
1168 | * get fostered out of a table, not just the start/end marker. |
1169 | * |
1170 | * Simplest example: |
1171 | * |
1172 | * {| |
1173 | * {{1x|foo}} |
1174 | * |} |
1175 | * |
1176 | * More complex example: |
1177 | * |
1178 | * {| |
1179 | * {{1x| |
1180 | * a |
1181 | * b |
1182 | * |
1183 | * c |
1184 | * }} |
1185 | * |} |
1186 | * |
1187 | * Since meta-tags don't normally get fostered out, this scenario |
1188 | * only arises when the entire content including meta-tags was |
1189 | * wrapped in p-tags. So, we look to see if: |
1190 | * 1. the end-meta-tag's parent has a table sibling, |
1191 | * 2. the start meta's parent is marked as fostered. |
1192 | * If so, we recognize this as an adoption scenario and fix up |
1193 | * DSR of start-meta-tag's parent to include the table's DSR. |
1194 | * ------------------------------------------------------------*/ |
1195 | $sm = $tpl->startElem; |
1196 | |
1197 | // TODO: this should only happen in fairly specific cases of the |
1198 | // annotation processing and should eventually be handled properly. |
1199 | // In the meantime, we create and log an exception to have an idea |
1200 | // of the amplitude of the problem. |
1201 | if ( $sm === null ) { |
1202 | throw new RangeBuilderException( 'No start tag found for the range' ); |
1203 | } |
1204 | $em = $elem; |
1205 | $ee = $em; |
1206 | $tbl = $em->parentNode->nextSibling; |
1207 | |
1208 | // Dont get distracted by a newline node -- skip over it |
1209 | // Unsure why it shows up occasionally |
1210 | if ( $tbl && $tbl instanceof Text && $tbl->nodeValue === "\n" ) { |
1211 | $tbl = $tbl->nextSibling; |
1212 | } |
1213 | |
1214 | $dp = !DOMUtils::atTheTop( $sm->parentNode ) ? |
1215 | DOMDataUtils::getDataParsoid( $sm->parentNode ) : null; |
1216 | if ( $tbl && DOMCompat::nodeName( $tbl ) === 'table' && !empty( $dp->fostered ) ) { |
1217 | '@phan-var Element $tbl'; /** @var Element $tbl */ |
1218 | $tblDP = DOMDataUtils::getDataParsoid( $tbl ); |
1219 | if ( isset( $dp->tsr ) && $dp->tsr->start !== null && $dp->tsr->start !== null && |
1220 | isset( $tblDP->dsr ) && $tblDP->dsr->start !== null && $tblDP->dsr->start === null |
1221 | ) { |
1222 | $tblDP->dsr->start = $dp->tsr->start; |
1223 | } |
1224 | $tbl->setAttribute( 'about', $about ); // set about on elem |
1225 | $ee = $tbl; |
1226 | } |
1227 | $tplRanges[] = $this->getDOMRange( $sm, $em, $ee ); |
1228 | } else { |
1229 | // The end tag can appear before the start tag if it is fostered out |
1230 | // of the table and the start tag is not. |
1231 | // It can even technically happen that both tags are fostered out of |
1232 | // a table and that the range is flipped: while the fostered content of |
1233 | // single table is fostered in-order, the ordering might change |
1234 | // across tables if the tags are not initially fostered by the same |
1235 | // table. |
1236 | $tpl = new ElementRange; |
1237 | $tpl->endElem = $elem; |
1238 | $tpls[$about] = $tpl; |
1239 | } |
1240 | } |
1241 | } else { |
1242 | $this->findWrappableTemplateRangesRecursive( $elem, $tpls, $tplRanges ); |
1243 | } |
1244 | } |
1245 | |
1246 | $elem = $nextSibling; |
1247 | } |
1248 | } |
1249 | |
1250 | /** |
1251 | * Returns the meta type of the element if it exists and matches the type expected by the |
1252 | * current class, null otherwise |
1253 | * @param Element $elem the element to check |
1254 | * @return string|null |
1255 | */ |
1256 | protected function matchMetaType( Element $elem ): ?string { |
1257 | // for this class we're interested in the template type |
1258 | return WTUtils::matchTplType( $elem ); |
1259 | } |
1260 | |
1261 | protected function verifyTplInfoExpectation( ?TemplateInfo $templateInfo, TempData $tmp ): void { |
1262 | if ( !$templateInfo ) { |
1263 | // An assertion here is probably an indication that we're |
1264 | // mistakenly doing template wrapping in a nested context. |
1265 | Assert::invariant( $tmp->getFlag( TempData::FROM_FOSTER ), 'Template range without arginfo.' ); |
1266 | } |
1267 | } |
1268 | |
1269 | public function execute( Node $root ): void { |
1270 | $tplRanges = $this->findWrappableMetaRanges( $root ); |
1271 | if ( count( $tplRanges ) > 0 ) { |
1272 | $nonOverlappingRanges = $this->findTopLevelNonOverlappingRanges( $root, $tplRanges ); |
1273 | $this->encapsulateTemplates( $nonOverlappingRanges ); |
1274 | } |
1275 | } |
1276 | |
1277 | /** |
1278 | * Creates a range that encloses $startMeta and $endMeta |
1279 | * |
1280 | * @param Element $startMeta |
1281 | * @param Element $endMeta |
1282 | * @param ?Element $endElem |
1283 | * @return DOMRangeInfo |
1284 | */ |
1285 | protected function findEnclosingRange( |
1286 | Element $startMeta, Element $endMeta, ?Element $endElem = null |
1287 | ): DOMRangeInfo { |
1288 | $range = new DOMRangeInfo( |
1289 | Utils::stripParsoidIdPrefix( $this->getRangeId( $startMeta ) ), |
1290 | DOMDataUtils::getDataParsoid( $startMeta )->tsr->start, |
1291 | $startMeta, |
1292 | $endMeta |
1293 | ); |
1294 | |
1295 | // Find common ancestor of startMeta and endElem |
1296 | $startAncestors = DOMUtils::pathToRoot( $startMeta ); |
1297 | $elem = $endElem ?? $endMeta; |
1298 | $parentNode = $elem->parentNode; |
1299 | while ( $parentNode && $parentNode->nodeType !== XML_DOCUMENT_NODE ) { |
1300 | $i = array_search( $parentNode, $startAncestors, true ); |
1301 | if ( $i === 0 ) { |
1302 | throw new UnreachableException( |
1303 | 'The startMeta cannot be the common ancestor.' |
1304 | ); |
1305 | } elseif ( $i > 0 ) { |
1306 | $range->start = $startAncestors[$i - 1]; |
1307 | $range->end = $elem; |
1308 | break; |
1309 | } |
1310 | $elem = $parentNode; |
1311 | $parentNode = $elem->parentNode; |
1312 | } |
1313 | |
1314 | return $range; |
1315 | } |
1316 | } |