Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
0.00% |
0 / 482 |
|
0.00% |
0 / 24 |
CRAP | |
0.00% |
0 / 1 |
DOMRangeBuilder | |
0.00% |
0 / 482 |
|
0.00% |
0 / 24 |
29756 | |
0.00% |
0 / 1 |
__construct | |
0.00% |
0 / 5 |
|
0.00% |
0 / 1 |
2 | |||
updateDSRForFirstRangeNode | |
0.00% |
0 / 7 |
|
0.00% |
0 / 1 |
20 | |||
getRangeEndDSR | |
0.00% |
0 / 26 |
|
0.00% |
0 / 1 |
90 | |||
getRangeId | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
getDOMRange | |
0.00% |
0 / 74 |
|
0.00% |
0 / 1 |
342 | |||
getStartConsideringFosteredContent | |
0.00% |
0 / 7 |
|
0.00% |
0 / 1 |
20 | |||
stripStartMeta | |
0.00% |
0 / 6 |
|
0.00% |
0 / 1 |
12 | |||
findToplevelEnclosingRange | |
0.00% |
0 / 8 |
|
0.00% |
0 / 1 |
12 | |||
recordTemplateInfo | |
0.00% |
0 / 17 |
|
0.00% |
0 / 1 |
30 | |||
introducesCycle | |
0.00% |
0 / 7 |
|
0.00% |
0 / 1 |
12 | |||
rangesOverlap | |
0.00% |
0 / 3 |
|
0.00% |
0 / 1 |
12 | |||
findTopLevelNonOverlappingRanges | |
0.00% |
0 / 109 |
|
0.00% |
0 / 1 |
1122 | |||
findFirstTemplatedNode | |
0.00% |
0 / 12 |
|
0.00% |
0 / 1 |
30 | |||
ensureElementsInRange | |
0.00% |
0 / 20 |
|
0.00% |
0 / 1 |
30 | |||
findEncapTarget | |
0.00% |
0 / 10 |
|
0.00% |
0 / 1 |
42 | |||
encapsulateTemplates | |
0.00% |
0 / 89 |
|
0.00% |
0 / 1 |
1122 | |||
addNodeRange | |
0.00% |
0 / 3 |
|
0.00% |
0 / 1 |
6 | |||
getNodeRanges | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
findWrappableMetaRanges | |
0.00% |
0 / 4 |
|
0.00% |
0 / 1 |
2 | |||
findWrappableTemplateRangesRecursive | |
0.00% |
0 / 44 |
|
0.00% |
0 / 1 |
506 | |||
matchMetaType | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
verifyTplInfoExpectation | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
6 | |||
execute | |
0.00% |
0 / 4 |
|
0.00% |
0 / 1 |
6 | |||
findEnclosingRange | |
0.00% |
0 / 22 |
|
0.00% |
0 / 1 |
30 |
1 | <?php |
2 | declare( strict_types = 1 ); |
3 | |
4 | namespace Wikimedia\Parsoid\Wt2Html\DOM\Processors; |
5 | |
6 | use Error; |
7 | use SplObjectStorage; |
8 | use Wikimedia\Assert\Assert; |
9 | use Wikimedia\Assert\UnreachableException; |
10 | use Wikimedia\Parsoid\Config\Env; |
11 | use Wikimedia\Parsoid\Core\DomSourceRange; |
12 | use Wikimedia\Parsoid\Core\ElementRange; |
13 | use Wikimedia\Parsoid\DOM\Document; |
14 | use Wikimedia\Parsoid\DOM\Element; |
15 | use Wikimedia\Parsoid\DOM\Node; |
16 | use Wikimedia\Parsoid\DOM\Text; |
17 | use Wikimedia\Parsoid\NodeData\DataParsoid; |
18 | use Wikimedia\Parsoid\NodeData\TempData; |
19 | use Wikimedia\Parsoid\NodeData\TemplateInfo; |
20 | use Wikimedia\Parsoid\Utils\DOMCompat; |
21 | use Wikimedia\Parsoid\Utils\DOMDataUtils; |
22 | use Wikimedia\Parsoid\Utils\DOMUtils; |
23 | use Wikimedia\Parsoid\Utils\PHPUtils; |
24 | use Wikimedia\Parsoid\Utils\Utils; |
25 | use Wikimedia\Parsoid\Utils\WTUtils; |
26 | use Wikimedia\Parsoid\Wt2Html\Frame; |
27 | |
28 | /** |
29 | * Template encapsulation happens in three steps. |
30 | * |
31 | * 1. findWrappableTemplateRanges |
32 | * |
33 | * Locate start and end metas. Walk upwards towards the root from both and |
34 | * find a common ancestor A. The subtree rooted at A is now effectively the |
35 | * scope of the dom template ouput. |
36 | * |
37 | * 2. findTopLevelNonOverlappingRanges |
38 | * |
39 | * Mark all nodes in a range and walk up to root from each range start to |
40 | * determine overlaps, nesting. Merge overlapping and nested ranges to find |
41 | * the subset of top-level non-overlapping ranges which will be wrapped as |
42 | * individual units. |
43 | * |
44 | * 3. encapsulateTemplates |
45 | * |
46 | * For each non-overlapping range, |
47 | * - compute a data-mw according to the DOM spec |
48 | * - replace the start / end meta markers with transclusion type and data-mw |
49 | * on the first DOM node |
50 | * - add about ids on all top-level nodes of the range |
51 | * |
52 | * This is a simple high-level overview of the 3 steps to help understand this |
53 | * code. |
54 | * |
55 | * FIXME: At some point, more of the details should be extracted and documented |
56 | * in pseudo-code as an algorithm. |
57 | * @module |
58 | */ |
59 | class DOMRangeBuilder { |
60 | |
61 | private const MAP_TBODY_TR = [ |
62 | 'tbody' => true, |
63 | 'tr' => true |
64 | ]; |
65 | |
66 | /** @var Document */ |
67 | private $document; |
68 | |
69 | /** @var Frame */ |
70 | private $frame; |
71 | |
72 | /** @var Env */ |
73 | protected $env; |
74 | |
75 | /** @var SplObjectStorage */ |
76 | protected $nodeRanges; |
77 | |
78 | /** @var array<string|CompoundTemplateInfo>[] */ |
79 | private $compoundTpls = []; |
80 | |
81 | /** @var string */ |
82 | protected $traceType; |
83 | |
84 | public function __construct( |
85 | Document $document, Frame $frame |
86 | ) { |
87 | $this->document = $document; |
88 | $this->frame = $frame; |
89 | $this->env = $frame->getEnv(); |
90 | $this->nodeRanges = new SplObjectStorage; |
91 | $this->traceType = "tplwrap"; |
92 | } |
93 | |
94 | protected function updateDSRForFirstRangeNode( Element $target, Element $source ): void { |
95 | $srcDP = DOMDataUtils::getDataParsoid( $source ); |
96 | $tgtDP = DOMDataUtils::getDataParsoid( $target ); |
97 | |
98 | // Since TSRs on template content tokens are cleared by the |
99 | // template handler, all computed dsr values for template content |
100 | // is always inferred from top-level content values and is safe. |
101 | // So, do not overwrite a bigger end-dsr value. |
102 | if ( isset( $srcDP->dsr->end ) && isset( $tgtDP->dsr->end ) && |
103 | $tgtDP->dsr->end > $srcDP->dsr->end |
104 | ) { |
105 | $tgtDP->dsr->start = $srcDP->dsr->start ?? null; |
106 | } else { |
107 | $tgtDP->dsr = clone $srcDP->dsr; |
108 | $tgtDP->src = $srcDP->src ?? null; |
109 | } |
110 | } |
111 | |
112 | /** |
113 | * Get the DSR of the end of a DOMRange |
114 | * |
115 | * @param DOMRangeInfo $range |
116 | * @return DomSourceRange|null |
117 | */ |
118 | private static function getRangeEndDSR( DOMRangeInfo $range ): ?DomSourceRange { |
119 | $endNode = $range->end; |
120 | if ( $endNode instanceof Element ) { |
121 | return DOMDataUtils::getDataParsoid( $endNode )->dsr ?? null; |
122 | } else { |
123 | // In the rare scenario where the last element of a range is not an ELEMENT, |
124 | // extrapolate based on DSR of first leftmost sibling that is an ELEMENT. |
125 | // We don't try any harder than this for now. |
126 | $offset = 0; |
127 | $n = $endNode->previousSibling; |
128 | while ( $n && !( $n instanceof Element ) ) { |
129 | if ( $n instanceof Text ) { |
130 | $offset += strlen( $n->nodeValue ); |
131 | } else { |
132 | // A comment |
133 | // @phan-suppress-next-line PhanTypeMismatchArgumentSuperType |
134 | $offset += WTUtils::decodedCommentLength( $n ); |
135 | } |
136 | $n = $n->previousSibling; |
137 | } |
138 | |
139 | $dsr = null; |
140 | if ( $n ) { |
141 | /** |
142 | * The point of the above loop is to ensure we're working |
143 | * with a Element if there is an $n. |
144 | * |
145 | * @var Element $n |
146 | */ |
147 | '@phan-var Element $n'; |
148 | $dsr = DOMDataUtils::getDataParsoid( $n )->dsr ?? null; |
149 | } |
150 | |
151 | if ( $dsr && is_int( $dsr->end ?? null ) ) { |
152 | $len = $endNode instanceof Text |
153 | ? strlen( $endNode->nodeValue ) |
154 | // A comment |
155 | // @phan-suppress-next-line PhanTypeMismatchArgumentSuperType |
156 | : WTUtils::decodedCommentLength( $endNode ); |
157 | $dsr = new DomSourceRange( |
158 | $dsr->end + $offset, |
159 | $dsr->end + $offset + $len, |
160 | null, |
161 | null |
162 | ); |
163 | } |
164 | |
165 | return $dsr; |
166 | } |
167 | } |
168 | |
169 | /** |
170 | * Returns the range ID of a node - in the case of templates, its "about" attribute. |
171 | * @param Element $node |
172 | * @return string |
173 | */ |
174 | protected function getRangeId( Element $node ): string { |
175 | return DOMCompat::getAttribute( $node, "about" ); |
176 | } |
177 | |
178 | /** |
179 | * Find the common DOM ancestor of two DOM nodes. |
180 | * |
181 | * @param Element $startMeta |
182 | * @param Element $endMeta |
183 | * @param Element $endElem |
184 | * @return DOMRangeInfo |
185 | */ |
186 | private function getDOMRange( |
187 | Element $startMeta, Element $endMeta, Element $endElem |
188 | ) { |
189 | $range = $this->findEnclosingRange( $startMeta, $endMeta, $endElem ); |
190 | $startsInFosterablePosn = DOMUtils::isFosterablePosition( $range->start ); |
191 | $next = $range->start->nextSibling; |
192 | |
193 | // Detect empty content and handle them! |
194 | if ( WTUtils::isTplMarkerMeta( $range->start ) && $next === $endElem ) { |
195 | Assert::invariant( |
196 | $range->start === $range->startElem, |
197 | "Expected startElem to be same as range.start" |
198 | ); |
199 | if ( $startsInFosterablePosn ) { |
200 | // Expand range! |
201 | $range->start = $range->end = $range->start->parentNode; |
202 | $startsInFosterablePosn = false; |
203 | } else { |
204 | $emptySpan = $this->document->createElement( 'span' ); |
205 | $range->start->parentNode->insertBefore( $emptySpan, $endElem ); |
206 | } |
207 | |
208 | // Handle unwrappable content in fosterable positions |
209 | // and expand template range, if required. |
210 | // NOTE: Template marker meta tags are translated from comments |
211 | // *after* the DOM has been built which is why they can show up in |
212 | // fosterable positions in the DOM. |
213 | } elseif ( $startsInFosterablePosn && |
214 | ( !( $range->start instanceof Element ) || |
215 | ( WTUtils::isTplMarkerMeta( $range->start ) && |
216 | ( !( $next instanceof Element ) || WTUtils::isTplMarkerMeta( $next ) ) ) |
217 | ) |
218 | ) { |
219 | $rangeStartParent = $range->start->parentNode; |
220 | |
221 | // If we are in a table in a foster-element position, then all non-element |
222 | // nodes will be white-space and comments. Skip over all of them and find |
223 | // the first table content node. |
224 | $noWS = true; |
225 | $nodesToMigrate = []; |
226 | $newStart = $range->start; |
227 | $n = $range->start instanceof Element ? $next : $range->start; |
228 | while ( !( $n instanceof Element ) ) { |
229 | if ( $n instanceof Text ) { |
230 | $noWS = false; |
231 | } |
232 | $nodesToMigrate[] = $n; |
233 | $n = $n->nextSibling; |
234 | $newStart = $n; |
235 | } |
236 | |
237 | // As long as $newStart is a tr/tbody or we don't have whitespace |
238 | // migrate $nodesToMigrate into $newStart. Pushing whitespace into |
239 | // th/td/caption can change display semantics. |
240 | if ( $newStart && ( $noWS || isset( self::MAP_TBODY_TR[DOMCompat::nodeName( $newStart )] ) ) ) { |
241 | /** |
242 | * The point of the above loop is to ensure we're working |
243 | * with a Element if there is a $newStart. |
244 | * |
245 | * @var Element $newStart |
246 | */ |
247 | '@phan-var Element $newStart'; |
248 | $insertPosition = $newStart->firstChild; |
249 | foreach ( $nodesToMigrate as $n ) { |
250 | $newStart->insertBefore( $n, $insertPosition ); |
251 | } |
252 | $range->start = $newStart; |
253 | // Update dsr to point to original start |
254 | $this->updateDSRForFirstRangeNode( $range->start, $range->startElem ); |
255 | } else { |
256 | // If not, we are forced to expand the template range. |
257 | $range->start = $range->end = $rangeStartParent; |
258 | } |
259 | } |
260 | |
261 | // Ensure range->start is an element node since we want to |
262 | // add/update the data-parsoid attribute to it. |
263 | if ( !( $range->start instanceof Element ) ) { |
264 | $span = $this->document->createElement( 'span' ); |
265 | $range->start->parentNode->insertBefore( $span, $range->start ); |
266 | $span->appendChild( $range->start ); |
267 | $range->start = $span; |
268 | $this->updateDSRForFirstRangeNode( $range->start, $range->startElem ); |
269 | } |
270 | |
271 | $range->start = $this->getStartConsideringFosteredContent( $range->start ); |
272 | |
273 | // Use the negative test since it doesn't mark the range as flipped |
274 | // if range.start === range.end |
275 | if ( !DOMUtils::inSiblingOrder( $range->start, $range->end ) ) { |
276 | // In foster-parenting situations, the end-meta tag (and hence range.end) |
277 | // can show up before the range.start which would be the table itself. |
278 | // So, we record this info for later analysis. |
279 | $range->flipped = true; |
280 | } |
281 | |
282 | $this->env->log( |
283 | "trace/{$this->traceType}/findranges", |
284 | static function () use ( &$range ) { |
285 | $msg = ''; |
286 | $dp1 = DOMDataUtils::getDataParsoid( $range->start ); |
287 | $dp2 = DOMDataUtils::getDataParsoid( $range->end ); |
288 | $tmp1 = $dp1->tmp; |
289 | $tmp2 = $dp2->tmp; |
290 | $dp1->tmp = null; |
291 | $dp2->tmp = null; |
292 | $msg .= "\n----------------------------------------------"; |
293 | $msg .= "\nFound range : " . $range->id . '; flipped? ' . ( (string)$range->flipped ) . |
294 | '; offset: ' . $range->startOffset; |
295 | $msg .= "\nstart-elem : " . DOMCompat::getOuterHTML( $range->startElem ) . '; DP: ' . |
296 | PHPUtils::jsonEncode( DOMDataUtils::getDataParsoid( $range->startElem ) ); |
297 | $msg .= "\nend-elem : " . DOMCompat::getOuterHTML( $range->endElem ) . '; DP: ' . |
298 | PHPUtils::jsonEncode( DOMDataUtils::getDataParsoid( $range->endElem ) ); |
299 | $msg .= "\nstart : [TAG_ID " . ( $tmp1->tagId ?? '?' ) . ']: ' . |
300 | DOMCompat::getOuterHTML( $range->start ) . |
301 | '; DP: ' . PHPUtils::jsonEncode( $dp1 ); |
302 | $msg .= "\nend : [TAG_ID " . ( $tmp2->tagId ?? '?' ) . ']: ' . |
303 | DOMCompat::getOuterHTML( $range->end ) . |
304 | '; DP: ' . PHPUtils::jsonEncode( $dp2 ); |
305 | $msg .= "\n----------------------------------------------"; |
306 | $dp1->tmp = $tmp1; |
307 | $dp2->tmp = $tmp2; |
308 | return $msg; |
309 | } |
310 | ); |
311 | |
312 | return $range; |
313 | } |
314 | |
315 | /** |
316 | * Returns the current node if it's not just after fostered content, the first node |
317 | * of fostered content otherwise. |
318 | * @param Node $node |
319 | * @return Node |
320 | */ |
321 | protected function getStartConsideringFosteredContent( Node $node ): Node { |
322 | if ( DOMCompat::nodeName( $node ) === 'table' ) { |
323 | // If we have any fostered content, include it as well. |
324 | for ( $previousSibling = $node->previousSibling; |
325 | $previousSibling instanceof Element && |
326 | !empty( DOMDataUtils::getDataParsoid( $previousSibling )->fostered ); |
327 | $previousSibling = $node->previousSibling |
328 | ) { |
329 | $node = $previousSibling; |
330 | } |
331 | } |
332 | return $node; |
333 | } |
334 | |
335 | private static function stripStartMeta( Element $meta ): void { |
336 | if ( DOMCompat::nodeName( $meta ) === 'meta' ) { |
337 | $meta->parentNode->removeChild( $meta ); |
338 | } else { |
339 | // Remove mw:* from the typeof. |
340 | $type = DOMCompat::getAttribute( $meta, 'typeof' ); |
341 | if ( $type !== null ) { |
342 | $type = preg_replace( '/(?:^|\s)mw:[^\/]*(\/\S+|(?=$|\s))/D', '', $type ); |
343 | $meta->setAttribute( 'typeof', $type ); |
344 | } |
345 | } |
346 | } |
347 | |
348 | private static function findToplevelEnclosingRange( |
349 | array $nestingInfo, ?string $startId |
350 | ): ?string { |
351 | // Walk up the implicit nesting tree to find the |
352 | // top-level range within which rId is nested. |
353 | // No cycles can exist since they have been suppressed. |
354 | $visited = []; |
355 | $rId = $startId; |
356 | while ( isset( $nestingInfo[$rId] ) ) { |
357 | if ( isset( $visited[$rId] ) ) { |
358 | throw new Error( "Found a cycle in tpl-range nesting where there shouldn't have been one." ); |
359 | } |
360 | $visited[$rId] = true; |
361 | $rId = $nestingInfo[$rId]; |
362 | } |
363 | return $rId; |
364 | } |
365 | |
366 | /** |
367 | * Add a template to $this->compoundTpls |
368 | * |
369 | * @param string $compoundTplId |
370 | * @param DOMRangeInfo $range |
371 | * @param TemplateInfo $templateInfo |
372 | */ |
373 | private function recordTemplateInfo( |
374 | string $compoundTplId, DOMRangeInfo $range, TemplateInfo $templateInfo |
375 | ): void { |
376 | $this->compoundTpls[$compoundTplId] ??= []; |
377 | |
378 | // Record template args info along with any intervening wikitext |
379 | // between templates that are part of the same compound structure. |
380 | /** @var array $tplArray */ |
381 | $tplArray = &$this->compoundTpls[$compoundTplId]; |
382 | $dp = DOMDataUtils::getDataParsoid( $range->startElem ); |
383 | $dsr = $dp->dsr; |
384 | |
385 | if ( count( $tplArray ) > 0 ) { |
386 | $prevTplInfo = PHPUtils::lastItem( $tplArray ); |
387 | if ( $prevTplInfo->dsr->end < $dsr->start ) { |
388 | $width = $dsr->start - $prevTplInfo->dsr->end; |
389 | $tplArray[] = PHPUtils::safeSubstr( |
390 | $this->frame->getSrcText(), $prevTplInfo->dsr->end, $width ); |
391 | } |
392 | } |
393 | |
394 | if ( !empty( $dp->unwrappedWT ) ) { |
395 | $tplArray[] = (string)$dp->unwrappedWT; |
396 | } |
397 | |
398 | // Get rid of src-offsets since they aren't needed anymore. |
399 | foreach ( $templateInfo->paramInfos as $pi ) { |
400 | $pi->srcOffsets = null; |
401 | } |
402 | $tplArray[] = new CompoundTemplateInfo( |
403 | $dsr, $templateInfo, DOMUtils::hasTypeOf( $range->startElem, 'mw:Param' ) |
404 | ); |
405 | } |
406 | |
407 | /** |
408 | * Determine whether adding the given range would introduce a cycle in the |
409 | * subsumedRanges graph. |
410 | * |
411 | * Nesting cycles with multiple ranges can show up because of foster |
412 | * parenting scenarios if they are not detected and suppressed. |
413 | * |
414 | * @param string $start The ID of the new range |
415 | * @param string $end The ID of the other range |
416 | * @param string[] $subsumedRanges The subsumed ranges graph, encoded as an |
417 | * array in which each element maps one string range ID to another range ID |
418 | * @return bool |
419 | */ |
420 | private static function introducesCycle( string $start, string $end, array $subsumedRanges ): bool { |
421 | $visited = [ $start => true ]; |
422 | $elt = $subsumedRanges[$end] ?? null; |
423 | while ( $elt ) { |
424 | if ( !empty( $visited[$elt] ) ) { |
425 | return true; |
426 | } |
427 | $elt = $subsumedRanges[$elt] ?? null; |
428 | } |
429 | return false; |
430 | } |
431 | |
432 | /** |
433 | * Determine whether DOM ranges overlap. |
434 | * |
435 | * The `inSiblingOrder` check here is sufficient to determine overlaps |
436 | * because the algorithm in `findWrappableTemplateRanges` will put the |
437 | * start/end elements for intersecting ranges on the same plane and prev/ |
438 | * curr are in textual order (which translates to dom order). |
439 | * |
440 | * @param DOMRangeInfo $prev |
441 | * @param DOMRangeInfo $curr |
442 | * @return bool |
443 | */ |
444 | private static function rangesOverlap( DOMRangeInfo $prev, DOMRangeInfo $curr ): bool { |
445 | $prevEnd = ( !$prev->flipped ) ? $prev->end : $prev->start; |
446 | $currStart = ( !$curr->flipped ) ? $curr->start : $curr->end; |
447 | return DOMUtils::inSiblingOrder( $currStart, $prevEnd ); |
448 | } |
449 | |
450 | /** |
451 | * Identify the elements of $tplRanges that are non-overlapping. |
452 | * Record template info in $this->compoundTpls as we go. |
453 | * |
454 | * @param Node $docRoot |
455 | * @param DOMRangeInfo[] $tplRanges The potentially overlapping ranges |
456 | * @return DOMRangeInfo[] The non-overlapping ranges |
457 | */ |
458 | public function findTopLevelNonOverlappingRanges( Node $docRoot, array $tplRanges ): array { |
459 | // For each node, assign an attribute that is a record of all |
460 | // tpl ranges it belongs to at the top-level. |
461 | foreach ( $tplRanges as $r ) { |
462 | $n = !$r->flipped ? $r->start : $r->end; |
463 | $e = !$r->flipped ? $r->end : $r->start; |
464 | |
465 | while ( $n ) { |
466 | if ( $n instanceof Element ) { |
467 | $this->addNodeRange( $n, $r ); |
468 | // Done |
469 | if ( $n === $e ) { |
470 | break; |
471 | } |
472 | } |
473 | |
474 | $n = $n->nextSibling; |
475 | } |
476 | } |
477 | |
478 | // In the first pass over `numRanges` below, `subsumedRanges` is used to |
479 | // record purely the nested ranges. However, in the second pass, we also |
480 | // add the relationships between overlapping ranges so that |
481 | // `findToplevelEnclosingRange` can use that information to add `argInfo` |
482 | // to the right `compoundTpls`. This scenario can come up when you have |
483 | // three ranges, 1 intersecting with 2 but not 3, and 3 nested in 2. |
484 | $subsumedRanges = []; |
485 | |
486 | // For each range r:(s, e), walk up from s --> docRoot and if any of |
487 | // these nodes have tpl-ranges (besides r itself) assigned to them, |
488 | // then r is nested in those other templates and can be ignored. |
489 | foreach ( $tplRanges as $r ) { |
490 | $n = $r->start; |
491 | |
492 | while ( $n !== $docRoot ) { |
493 | $ranges = $this->getNodeRanges( $n ); |
494 | if ( $ranges ) { |
495 | if ( $n !== $r->start ) { |
496 | // 'r' is nested for sure |
497 | // Record the outermost range in which 'r' is nested. |
498 | $outermostId = null; |
499 | $outermostOffset = null; |
500 | foreach ( $ranges as $rangeId => $range ) { |
501 | if ( $outermostId === null |
502 | || $range->startOffset < $outermostOffset |
503 | ) { |
504 | $outermostId = $rangeId; |
505 | $outermostOffset = $range->startOffset; |
506 | } |
507 | } |
508 | $subsumedRanges[$r->id] = (string)$outermostId; |
509 | break; |
510 | } else { |
511 | // n === r.start |
512 | // |
513 | // We have to make sure this is not an overlap scenario. |
514 | // Find the ranges that r.start and r.end belong to and |
515 | // compute their intersection. If this intersection has |
516 | // another tpl range besides r itself, we have a winner! |
517 | // |
518 | // The code below does the above check efficiently. |
519 | $eTpls = $this->getNodeRanges( $r->end ); |
520 | $foundNesting = false; |
521 | |
522 | foreach ( $ranges as $otherId => $other ) { |
523 | // - Don't record nesting cycles. |
524 | // - Record the outermost range in which 'r' is nested in. |
525 | if ( $otherId !== $r->id && |
526 | !empty( $eTpls[$otherId] ) && |
527 | // When we have identical ranges, pick the range with |
528 | // the larger offset to be subsumed. |
529 | ( $r->start !== $other->start || |
530 | $r->end !== $other->end || |
531 | $other->startOffset < $r->startOffset |
532 | ) && |
533 | !self::introducesCycle( $r->id, (string)$otherId, $subsumedRanges ) |
534 | ) { |
535 | $foundNesting = true; |
536 | if ( !isset( $subsumedRanges[$r->id] ) || |
537 | $other->startOffset < $ranges[$subsumedRanges[$r->id]]->startOffset |
538 | ) { |
539 | $subsumedRanges[$r->id] = (string)$otherId; |
540 | } |
541 | } |
542 | } |
543 | |
544 | if ( $foundNesting ) { |
545 | // 'r' is nested |
546 | break; |
547 | } |
548 | } |
549 | } |
550 | |
551 | // Move up |
552 | $n = $n->parentNode; |
553 | } |
554 | } |
555 | |
556 | // Sort by start offset in source wikitext |
557 | usort( $tplRanges, static function ( $r1, $r2 ) { |
558 | return $r1->startOffset - $r2->startOffset; |
559 | } ); |
560 | |
561 | // Since the tpl ranges are sorted in textual order (by start offset), |
562 | // it is sufficient to only look at the most recent template to see |
563 | // if the current one overlaps with the previous one. |
564 | // |
565 | // This works because we've already identify nested ranges and can ignore them. |
566 | |
567 | $newRanges = []; |
568 | $prev = null; |
569 | |
570 | foreach ( $tplRanges as $r ) { |
571 | $endTagToRemove = null; |
572 | $startTagToStrip = null; |
573 | |
574 | // Extract tplargInfo |
575 | $tmp = DOMDataUtils::getDataParsoid( $r->startElem )->getTemp(); |
576 | $templateInfo = $tmp->tplarginfo ?? null; |
577 | |
578 | $this->verifyTplInfoExpectation( $templateInfo, $tmp ); |
579 | |
580 | $this->env->log( "trace/{$this->traceType}/merge", static function () use ( &$DOMDataUtils, &$r ) { |
581 | $msg = ''; |
582 | $dp1 = DOMDataUtils::getDataParsoid( $r->start ); |
583 | $dp2 = DOMDataUtils::getDataParsoid( $r->end ); |
584 | $tmp1 = $dp1->tmp; |
585 | $tmp2 = $dp2->tmp; |
586 | $dp1->tmp = null; |
587 | $dp2->tmp = null; |
588 | $msg .= "\n##############################################"; |
589 | $msg .= "\nrange " . $r->id . '; r-start-elem: ' . DOMCompat::getOuterHTML( $r->startElem ) . |
590 | '; DP: ' . PHPUtils::jsonEncode( DOMDataUtils::getDataParsoid( $r->startElem ) ); |
591 | $msg .= "\nrange " . $r->id . '; r-end-elem: ' . DOMCompat::getOuterHTML( $r->endElem ) . |
592 | '; DP: ' . PHPUtils::jsonEncode( DOMDataUtils::getDataParsoid( $r->endElem ) ); |
593 | $msg .= "\nrange " . $r->id . '; r-start: [TAG_ID ' . ( $tmp1->tagId ?? '?' ) . ']: ' . |
594 | DOMCompat::getOuterHTML( $r->start ) . '; DP: ' . PHPUtils::jsonEncode( $dp1 ); |
595 | $msg .= "\nrange " . $r->id . '; r-end: [TAG_ID ' . ( $tmp2->tagId ?? '?' ) . ']: ' . |
596 | DOMCompat::getOuterHTML( $r->end ) . '; DP: ' . PHPUtils::jsonEncode( $dp2 ); |
597 | $msg .= "\n----------------------------------------------"; |
598 | $dp1->tmp = $tmp1; |
599 | $dp2->tmp = $tmp2; |
600 | return $msg; |
601 | } ); |
602 | |
603 | $enclosingRangeId = self::findToplevelEnclosingRange( |
604 | $subsumedRanges, |
605 | $subsumedRanges[$r->id] ?? null |
606 | ); |
607 | if ( $enclosingRangeId ) { |
608 | $this->env->log( "trace/{$this->traceType}/merge", '--nested in ', $enclosingRangeId, '--' ); |
609 | |
610 | // Nested -- ignore r |
611 | $startTagToStrip = $r->startElem; |
612 | $endTagToRemove = $r->endElem; |
613 | if ( $templateInfo ) { |
614 | // 'r' is nested in 'enclosingRange' at the top-level |
615 | // So, enclosingRange gets r's argInfo |
616 | $this->recordTemplateInfo( $enclosingRangeId, $r, $templateInfo ); |
617 | } |
618 | } elseif ( $prev && self::rangesOverlap( $prev, $r ) ) { |
619 | // In the common case, in overlapping scenarios, r.start is |
620 | // identical to prev.end. However, in fostered content scenarios, |
621 | // there can true overlap of the ranges. |
622 | $this->env->log( "trace/{$this->traceType}/merge", '--overlapped--' ); |
623 | |
624 | // See comment above, where `subsumedRanges` is defined. |
625 | $subsumedRanges[$r->id] = $prev->id; |
626 | |
627 | // Overlapping ranges. |
628 | // r is the regular kind |
629 | // Merge r with prev |
630 | |
631 | // Note that if a table comes from a template, a foster box isn't |
632 | // emitted so the enclosure isn't guaranteed. In pathological |
633 | // cases, like where the table end tag isn't emitted, we can still |
634 | // end up with flipped ranges if the template end marker gets into |
635 | // a fosterable position (which can still happen despite being |
636 | // emitted as a comment). |
637 | Assert::invariant( !$r->flipped, |
638 | 'Flipped range should have been enclosed.' |
639 | ); |
640 | |
641 | $startTagToStrip = $r->startElem; |
642 | $endTagToRemove = $prev->endElem; |
643 | |
644 | $prev->end = $r->end; |
645 | $prev->endElem = $r->endElem; |
646 | if ( WTUtils::isMarkerAnnotation( $r->endElem ) ) { |
647 | $endDataMw = DOMDataUtils::getDataMw( $r->endElem ); |
648 | $endDataMw->rangeId = $r->id; |
649 | $prev->extendedByOverlapMerge = true; |
650 | } |
651 | |
652 | // Update compoundTplInfo |
653 | if ( $templateInfo ) { |
654 | $this->recordTemplateInfo( $prev->id, $r, $templateInfo ); |
655 | } |
656 | } else { |
657 | $this->env->log( "trace/{$this->traceType}/merge", '--normal--' ); |
658 | |
659 | // Default -- no overlap |
660 | // Emit the merged range |
661 | $newRanges[] = $r; |
662 | $prev = $r; |
663 | |
664 | // Update compoundTpls |
665 | if ( $templateInfo ) { |
666 | $this->recordTemplateInfo( $r->id, $r, $templateInfo ); |
667 | } |
668 | } |
669 | |
670 | if ( $endTagToRemove ) { |
671 | // Remove start and end meta-tags |
672 | // Not necessary to remove the start tag, but good to cleanup |
673 | $endTagToRemove->parentNode->removeChild( $endTagToRemove ); |
674 | self::stripStartMeta( $startTagToStrip ); |
675 | } |
676 | } |
677 | |
678 | return $newRanges; |
679 | } |
680 | |
681 | /** |
682 | * Note that the case of nodeName varies with DOM implementation. This |
683 | * method currently forces the name nodeName to uppercase. In the future |
684 | * we can/should switch to using the "native" case of the DOM |
685 | * implementation; we do a case-insensitive match (by converting the result |
686 | * to the "native" case of the DOM implementation) in |
687 | * EncapsulatedContentHandler when this value is used. |
688 | * @param DOMRangeInfo $range |
689 | * @return string|null nodeName with an optional "_$stx" suffix. |
690 | */ |
691 | private static function findFirstTemplatedNode( DOMRangeInfo $range ): ?string { |
692 | $firstNode = $range->start; |
693 | |
694 | // Skip tpl marker meta |
695 | if ( WTUtils::isTplMarkerMeta( $firstNode ) ) { |
696 | $firstNode = $firstNode->nextSibling; |
697 | } |
698 | |
699 | // Walk past fostered nodes since they came from within a table |
700 | // Note that this is not foolproof because in some scenarios, |
701 | // fostered content is not marked up. Ex: when a table is templated, |
702 | // and content from the table is fostered. |
703 | $dp = DOMDataUtils::getDataParsoid( $firstNode ); |
704 | while ( !empty( $dp->fostered ) ) { |
705 | $firstNode = $firstNode->nextSibling; |
706 | /** @var Element $firstNode */ |
707 | DOMUtils::assertElt( $firstNode ); |
708 | $dp = DOMDataUtils::getDataParsoid( $firstNode ); |
709 | } |
710 | |
711 | // FIXME: It is harder to use META as a node name since this is a generic |
712 | // placeholder for a whole bunch of things each of which has its own |
713 | // newline constraint requirements. So, for now, I am skipping that |
714 | // can of worms to prevent confusing the serializer with an overloaded |
715 | // tag name. |
716 | if ( DOMCompat::nodeName( $firstNode ) === 'meta' ) { |
717 | return null; |
718 | } |
719 | |
720 | // FIXME spec-compliant values would be upper-case, this is just a workaround |
721 | // for current PHP DOM implementation and could be removed in the future |
722 | // See discussion in the method comment above. |
723 | $nodeName = mb_strtoupper( DOMCompat::nodeName( $firstNode ), "UTF-8" ); |
724 | |
725 | return !empty( $dp->stx ) ? $nodeName . '_' . $dp->stx : $nodeName; |
726 | } |
727 | |
728 | /** |
729 | * Encapsulation requires adding about attributes on the top-level |
730 | * nodes of the range. This requires them to all be Elements. |
731 | * |
732 | * @param DOMRangeInfo $range |
733 | */ |
734 | private function ensureElementsInRange( DOMRangeInfo $range ): void { |
735 | $n = $range->start; |
736 | $e = $range->end; |
737 | $about = DOMCompat::getAttribute( $range->startElem, 'about' ); |
738 | while ( $n ) { |
739 | $next = $n->nextSibling; |
740 | if ( $n instanceof Element ) { |
741 | $n->setAttribute( 'about', $about ); |
742 | } elseif ( DOMUtils::isFosterablePosition( $n ) ) { |
743 | // NOTE: There cannot be any non-IEW text in fosterable position |
744 | // since the HTML tree builder would already have fostered it out. |
745 | // So, any non-element node found here is safe to delete since: |
746 | // (a) this has no rendering output impact, and |
747 | // (b) data-mw captures template output => we don't need |
748 | // to preserve this for html2wt either. Removing this |
749 | // lets us preserve DOM range continuity. |
750 | $n->parentNode->removeChild( $n ); |
751 | } else { |
752 | // Add a span wrapper to let us add about-ids to represent |
753 | // the DOM range as a contiguous chain of DOM nodes. |
754 | $span = $this->document->createElement( 'span' ); |
755 | $span->setAttribute( 'about', $about ); |
756 | $dp = new DataParsoid; |
757 | $dp->setTempFlag( TempData::WRAPPER ); |
758 | DOMDataUtils::setDataParsoid( $span, $dp ); |
759 | $n->parentNode->replaceChild( $span, $n ); |
760 | $span->appendChild( $n ); |
761 | $n = $span; |
762 | } |
763 | |
764 | if ( $n === $e ) { |
765 | break; |
766 | } |
767 | |
768 | $n = $next; |
769 | } |
770 | } |
771 | |
772 | /** |
773 | * Find the first element to be encapsulated. |
774 | * Skip past marker metas and non-elements (which will all be IEW |
775 | * in fosterable positions in a table). |
776 | * |
777 | * @param DOMRangeInfo $range |
778 | * @return Element |
779 | */ |
780 | private static function findEncapTarget( DOMRangeInfo $range ): Element { |
781 | $encapTgt = $range->start; |
782 | '@phan-var Node $encapTgt'; |
783 | |
784 | // Skip template-marker meta-tags. |
785 | while ( WTUtils::isTplMarkerMeta( $encapTgt ) || |
786 | !( $encapTgt instanceof Element ) |
787 | ) { |
788 | // Detect unwrappable template and bail out early. |
789 | if ( $encapTgt === $range->end || |
790 | ( !( $encapTgt instanceof Element ) && |
791 | !DOMUtils::isFosterablePosition( $encapTgt ) |
792 | ) |
793 | ) { |
794 | throw new Error( 'Cannot encapsulate transclusion. Start=' . |
795 | DOMCompat::getOuterHTML( $range->startElem ) ); |
796 | } |
797 | $encapTgt = $encapTgt->nextSibling; |
798 | } |
799 | |
800 | '@phan-var Element $encapTgt'; |
801 | return $encapTgt; |
802 | } |
803 | |
804 | /** |
805 | * Add markers to the DOM around the non-overlapping ranges. |
806 | * |
807 | * @param DOMRangeInfo[] $nonOverlappingRanges |
808 | */ |
809 | private function encapsulateTemplates( array $nonOverlappingRanges ): void { |
810 | foreach ( $nonOverlappingRanges as $i => $range ) { |
811 | |
812 | // We should never have flipped overlapping ranges, and indeed that's |
813 | // asserted in `findTopLevelNonOverlappingRanges`. Flipping results |
814 | // in either completely nested ranges, or non-intersecting ranges. |
815 | // |
816 | // If the table causing the fostering is not transcluded, we emit a |
817 | // foster box and wrap the whole table+fb in metas, producing nested |
818 | // ranges. For ex, |
819 | // |
820 | // <table> |
821 | // {{1x|<div>}} |
822 | // |
823 | // The tricky part is when the table *is* transcluded, and we omit the |
824 | // foster box. The common case (for some definition of common) might |
825 | // be like, |
826 | // |
827 | // {{1x|<table>}} |
828 | // {{1x|<div>}} |
829 | // |
830 | // Here, #mwt1 leaves a table open and the end meta from #mwt2 is |
831 | // fostered, since it gets closed into the div. The range for #mwt1 |
832 | // is the entire table, which thankfully contains #mwt2, so we still |
833 | // have the expected entire nesting. Any tricks to extend the range |
834 | // of #mwt2 beyond the table (so that we have an overlapping range) will |
835 | // inevitably result in the end meta not being fostered, and we avoid |
836 | // this situation altogether. |
837 | // |
838 | // The very edgy case is as follows, |
839 | // |
840 | // {{1x|<table><div>}}</div> |
841 | // {{1x|<div>}} |
842 | // |
843 | // where both end metas are fostered. Ignoring that we don't even |
844 | // roundtrip the first transclusion properly on its own, here we have |
845 | // a flipped range where, since the end meta for the first range was |
846 | // also fostered, the ranges still don't overlap. |
847 | |
848 | // FIXME: The code below needs to be aware of flipped ranges. |
849 | |
850 | $this->ensureElementsInRange( $range ); |
851 | |
852 | $tplArray = $this->compoundTpls[$range->id] ?? null; |
853 | Assert::invariant( (bool)$tplArray, 'No parts for template range!' ); |
854 | |
855 | $encapTgt = self::findEncapTarget( $range ); |
856 | $encapValid = false; |
857 | $encapDP = DOMDataUtils::getDataParsoid( $encapTgt ); |
858 | |
859 | // Update type-of (always even if tpl-encap below will fail). |
860 | // This ensures that VE will still "edit-protect" this template |
861 | // and not allow its content to be edited directly. |
862 | $startElem = $range->startElem; |
863 | if ( $startElem !== $encapTgt ) { |
864 | $t1 = DOMCompat::getAttribute( $startElem, 'typeof' ); |
865 | if ( $t1 !== null ) { |
866 | foreach ( array_reverse( explode( ' ', $t1 ) ) as $t ) { |
867 | DOMUtils::addTypeOf( $encapTgt, $t, true ); |
868 | } |
869 | } |
870 | } |
871 | |
872 | /* ---------------------------------------------------------------- |
873 | * We'll attempt to update dp1.dsr to reflect the entire range of |
874 | * the template. This relies on a couple observations: |
875 | * |
876 | * 1. In the common case, dp2.dsr->end will be > dp1.dsr->end |
877 | * If so, new range = dp1.dsr->start, dp2.dsr->end |
878 | * |
879 | * 2. But, foster parenting can complicate this when range.end is a table |
880 | * and range.start has been fostered out of the table (range.end). |
881 | * But, we need to verify this assumption. |
882 | * |
883 | * 2a. If dp2.dsr->start is smaller than dp1.dsr->start, this is a |
884 | * confirmed case of range.start being fostered out of range.end. |
885 | * |
886 | * 2b. If dp2.dsr->start is unknown, we rely on fostered flag on |
887 | * range.start, if any. |
888 | * ---------------------------------------------------------------- */ |
889 | $dp1 = DOMDataUtils::getDataParsoid( $range->start ); |
890 | $dp1DSR = isset( $dp1->dsr ) ? clone $dp1->dsr : null; |
891 | $dp2DSR = self::getRangeEndDSR( $range ); |
892 | |
893 | if ( $dp1DSR ) { |
894 | if ( $dp2DSR ) { |
895 | // Case 1. above |
896 | if ( $dp2DSR->end > $dp1DSR->end ) { |
897 | $dp1DSR->end = $dp2DSR->end; |
898 | } |
899 | |
900 | // Case 2. above |
901 | $endDsr = $dp2DSR->start; |
902 | if ( DOMCompat::nodeName( $range->end ) === 'table' && |
903 | $endDsr !== null && |
904 | ( $endDsr < $dp1DSR->start || !empty( $dp1->fostered ) ) |
905 | ) { |
906 | $dp1DSR->start = $endDsr; |
907 | } |
908 | } |
909 | |
910 | // encapsulation possible only if dp1.dsr is valid |
911 | $encapValid = Utils::isValidDSR( $dp1DSR ) && |
912 | $dp1DSR->end >= $dp1DSR->start; |
913 | } |
914 | |
915 | if ( $encapValid ) { |
916 | // Find transclusion info from the array (skip past a wikitext element) |
917 | /** @var CompoundTemplateInfo $firstTplInfo */ |
918 | $firstTplInfo = is_string( $tplArray[0] ) ? $tplArray[1] : $tplArray[0]; |
919 | |
920 | // Add any leading wikitext |
921 | if ( $firstTplInfo->dsr->start > $dp1DSR->start ) { |
922 | // This gap in dsr (between the final encapsulated content, and the |
923 | // content that actually came from a template) is indicative of this |
924 | // being a mixed-template-content-block and/or multi-template-content-block |
925 | // scenario. |
926 | // |
927 | // In this case, record the name of the first node in the encapsulated |
928 | // content. During html -> wt serialization, newline constraints for |
929 | // this entire block has to be determined relative to this node. |
930 | $ftn = self::findFirstTemplatedNode( $range ); |
931 | if ( $ftn !== null ) { |
932 | $encapDP->firstWikitextNode = $ftn; |
933 | } |
934 | $width = $firstTplInfo->dsr->start - $dp1DSR->start; |
935 | array_unshift( |
936 | $tplArray, |
937 | PHPUtils::safeSubstr( $this->frame->getSrcText(), $dp1DSR->start, $width ) |
938 | ); |
939 | } |
940 | |
941 | // Add any trailing wikitext |
942 | /** @var CompoundTemplateInfo $lastTplInfo */ |
943 | $lastTplInfo = PHPUtils::lastItem( $tplArray ); |
944 | if ( $lastTplInfo->dsr->end < $dp1DSR->end ) { |
945 | $width = $dp1DSR->end - $lastTplInfo->dsr->end; |
946 | $tplArray[] = PHPUtils::safeSubstr( $this->frame->getSrcText(), $lastTplInfo->dsr->end, $width ); |
947 | } |
948 | |
949 | // Map the array of { dsr: .. , args: .. } objects to just the args property |
950 | $infoIndex = 0; |
951 | $parts = []; |
952 | $pi = []; |
953 | foreach ( $tplArray as $a ) { |
954 | if ( is_string( $a ) ) { |
955 | $parts[] = $a; |
956 | } elseif ( $a instanceof CompoundTemplateInfo ) { |
957 | // Remember the position of the transclusion relative |
958 | // to other transclusions. Should match the index of |
959 | // the corresponding private metadata in $templateInfos. |
960 | $a->info->i = $infoIndex++; |
961 | $a->info->type = 'template'; |
962 | if ( $a->isParam ) { |
963 | $a->info->type = 'templatearg'; |
964 | } elseif ( $a->info->func ) { |
965 | $a->info->type = 'parserfunction'; |
966 | } |
967 | $parts[] = $a->info; |
968 | // FIXME: we throw away the array keys and rebuild them |
969 | // again in WikitextSerializer |
970 | $pi[] = array_values( $a->info->paramInfos ); |
971 | } |
972 | } |
973 | |
974 | // Set up dsr->start, dsr->end, and data-mw on the target node |
975 | // Avoid clobbering existing (ex: extension) data-mw information (T214241) |
976 | $encapDataMw = DOMDataUtils::getDataMw( $encapTgt ); |
977 | $encapDataMw->parts = $parts; |
978 | DOMDataUtils::setDataMw( $encapTgt, $encapDataMw ); |
979 | $encapDP->pi = $pi; |
980 | |
981 | // Special case when mixed-attribute-and-content templates are |
982 | // involved. This information is reliable and comes from the |
983 | // AttributeExpander and gets around the problem of unmarked |
984 | // fostered content that findFirstTemplatedNode runs into. |
985 | $firstWikitextNode = DOMDataUtils::getDataParsoid( |
986 | $range->startElem |
987 | )->firstWikitextNode ?? null; |
988 | if ( empty( $encapDP->firstWikitextNode ) && $firstWikitextNode ) { |
989 | $encapDP->firstWikitextNode = $firstWikitextNode; |
990 | } |
991 | } else { |
992 | $errors = [ 'Do not have necessary info. to encapsulate Tpl: ' . $i ]; |
993 | $errors[] = 'Start Elt : ' . DOMCompat::getOuterHTML( $startElem ); |
994 | $errors[] = 'End Elt : ' . DOMCompat::getOuterHTML( $range->endElem ); |
995 | $errors[] = 'Start DSR : ' . PHPUtils::jsonEncode( $dp1DSR ?? 'no-start-dsr' ); |
996 | $errors[] = 'End DSR : ' . PHPUtils::jsonEncode( $dp2DSR ?? [] ); |
997 | $this->env->log( 'error', implode( "\n", $errors ) ); |
998 | } |
999 | |
1000 | // Make DSR range zero-width for fostered templates after |
1001 | // setting up data-mw. However, since template encapsulation |
1002 | // sometimes captures both fostered content as well as the table |
1003 | // from which it was fostered from, in those scenarios, we should |
1004 | // leave DSR info untouched. |
1005 | // |
1006 | // SSS FIXME: |
1007 | // 1. Should we remove the fostered flag from the entire |
1008 | // encapsulated block if we dont set dsr width range to zero |
1009 | // since only part of the block is fostered, not the entire |
1010 | // encapsulated block? |
1011 | // |
1012 | // 2. In both cases, should we mark these uneditable by adding |
1013 | // mw:Placeholder to the typeof? |
1014 | if ( !empty( $dp1->fostered ) ) { |
1015 | $encapDataMw = DOMDataUtils::getDataMw( $encapTgt ); |
1016 | if ( !$encapDataMw || |
1017 | !$encapDataMw->parts || |
1018 | count( $encapDataMw->parts ) === 1 |
1019 | ) { |
1020 | $dp1DSR->end = $dp1DSR->start; |
1021 | } |
1022 | } |
1023 | |
1024 | // Update DSR after fostering-related fixes are done. |
1025 | if ( $encapValid ) { |
1026 | // encapInfo.dp points to DOMDataUtils.getDataParsoid(encapInfo.target) |
1027 | // and all updates below update properties in that object tree. |
1028 | if ( empty( $encapDP->dsr ) ) { |
1029 | $encapDP->dsr = $dp1DSR; |
1030 | } else { |
1031 | $encapDP->dsr->start = $dp1DSR->start; |
1032 | $encapDP->dsr->end = $dp1DSR->end; |
1033 | } |
1034 | $encapDP->src = $encapDP->dsr->substr( |
1035 | $this->frame->getSrcText() |
1036 | ); |
1037 | } |
1038 | |
1039 | // Remove startElem (=range.startElem) if a meta. If a meta, |
1040 | // it is guaranteed to be a marker meta added to mark the start |
1041 | // of the template. |
1042 | if ( WTUtils::isTplMarkerMeta( $startElem ) ) { |
1043 | $startElem->parentNode->removeChild( $startElem ); |
1044 | } |
1045 | |
1046 | $range->endElem->parentNode->removeChild( $range->endElem ); |
1047 | } |
1048 | } |
1049 | |
1050 | /** |
1051 | * Attach a range to a node. |
1052 | * |
1053 | * @param Element $node |
1054 | * @param DOMRangeInfo $range |
1055 | */ |
1056 | private function addNodeRange( Element $node, DOMRangeInfo $range ): void { |
1057 | // With the native DOM extension, normally you assume that DOMNode |
1058 | // objects are temporary -- you get a new DOMNode every time you |
1059 | // traverse the DOM. But by retaining a reference in the |
1060 | // SplObjectStorage, we ensure that the DOMNode object stays live while |
1061 | // the pass is active. Then its address can be used as an index. |
1062 | if ( !isset( $this->nodeRanges[$node] ) ) { |
1063 | // We have to use an object as the data because |
1064 | // SplObjectStorage::offsetGet() does not provide an lval. |
1065 | $this->nodeRanges[$node] = new DOMRangeInfoArray; |
1066 | } |
1067 | $this->nodeRanges[$node]->ranges[$range->id] = $range; |
1068 | } |
1069 | |
1070 | /** |
1071 | * Get the ranges attached to this node, indexed by range ID. |
1072 | * |
1073 | * @param Element $node |
1074 | * @return DOMRangeInfo[]|null |
1075 | */ |
1076 | private function getNodeRanges( Element $node ): ?array { |
1077 | return $this->nodeRanges[$node]->ranges ?? null; |
1078 | } |
1079 | |
1080 | /** |
1081 | * Recursively walk the DOM tree. Find wrappable template ranges and return them. |
1082 | * |
1083 | * @param Node $rootNode |
1084 | * @return DOMRangeInfo[] |
1085 | */ |
1086 | protected function findWrappableMetaRanges( Node $rootNode ): array { |
1087 | $tpls = []; |
1088 | $tplRanges = []; |
1089 | $this->findWrappableTemplateRangesRecursive( $rootNode, $tpls, $tplRanges ); |
1090 | return $tplRanges; |
1091 | } |
1092 | |
1093 | /** |
1094 | * Recursive helper for findWrappableTemplateRanges() |
1095 | * |
1096 | * @param Node $rootNode |
1097 | * @param ElementRange[] &$tpls Template start and end elements by ID |
1098 | * @param DOMRangeInfo[] &$tplRanges Template range info |
1099 | */ |
1100 | private function findWrappableTemplateRangesRecursive( |
1101 | Node $rootNode, array &$tpls, array &$tplRanges |
1102 | ): void { |
1103 | $elem = $rootNode->firstChild; |
1104 | |
1105 | while ( $elem ) { |
1106 | // get the next sibling before doing anything since |
1107 | // we may delete elem as part of encapsulation |
1108 | $nextSibling = $elem->nextSibling; |
1109 | |
1110 | if ( $elem instanceof Element ) { |
1111 | $metaType = $this->matchMetaType( $elem ); |
1112 | |
1113 | // Ignore templates without tsr. |
1114 | // |
1115 | // These are definitely nested in other templates / extensions |
1116 | // and need not be wrapped themselves since they |
1117 | // can never be edited directly. |
1118 | // |
1119 | // NOTE: We are only testing for tsr presence on the start-elem |
1120 | // because wikitext errors can lead to parse failures and no tsr |
1121 | // on end-meta-tags. |
1122 | // |
1123 | // Ex: "<ref>{{1x|bar}}<!--bad-></ref>" |
1124 | if ( $metaType !== null && |
1125 | ( !empty( DOMDataUtils::getDataParsoid( $elem )->tsr ) || |
1126 | str_ends_with( $metaType, '/End' ) |
1127 | ) |
1128 | ) { |
1129 | $about = $this->getRangeId( $elem ); |
1130 | $tpl = $tpls[$about] ?? null; |
1131 | // Is this a start marker? |
1132 | if ( !str_ends_with( $metaType, '/End' ) ) { |
1133 | if ( $tpl ) { |
1134 | $tpl->startElem = $elem; |
1135 | // content or end marker existed already |
1136 | if ( !empty( $tpl->endElem ) ) { |
1137 | // End marker was foster-parented. |
1138 | // Found actual start tag. |
1139 | $tplRanges[] = $this->getDOMRange( |
1140 | $elem, $tpl->endElem, $tpl->endElem ); |
1141 | } else { |
1142 | // should not happen! |
1143 | throw new UnreachableException( "start found after content for $about." ); |
1144 | } |
1145 | } else { |
1146 | $tpl = new ElementRange; |
1147 | $tpl->startElem = $elem; |
1148 | $tpls[$about] = $tpl; |
1149 | } |
1150 | } else { |
1151 | // elem is the end-meta tag |
1152 | if ( $tpl ) { |
1153 | /* ------------------------------------------------------------ |
1154 | * Special case: In some cases, the entire template content can |
1155 | * get fostered out of a table, not just the start/end marker. |
1156 | * |
1157 | * Simplest example: |
1158 | * |
1159 | * {| |
1160 | * {{1x|foo}} |
1161 | * |} |
1162 | * |
1163 | * More complex example: |
1164 | * |
1165 | * {| |
1166 | * {{1x| |
1167 | * a |
1168 | * b |
1169 | * |
1170 | * c |
1171 | * }} |
1172 | * |} |
1173 | * |
1174 | * Since meta-tags don't normally get fostered out, this scenario |
1175 | * only arises when the entire content including meta-tags was |
1176 | * wrapped in p-tags. So, we look to see if: |
1177 | * 1. the end-meta-tag's parent has a table sibling, |
1178 | * 2. the start meta's parent is marked as fostered. |
1179 | * If so, we recognize this as an adoption scenario and fix up |
1180 | * DSR of start-meta-tag's parent to include the table's DSR. |
1181 | * ------------------------------------------------------------*/ |
1182 | $sm = $tpl->startElem; |
1183 | |
1184 | // TODO: this should only happen in fairly specific cases of the |
1185 | // annotation processing and should eventually be handled properly. |
1186 | // In the meantime, we create and log an exception to have an idea |
1187 | // of the amplitude of the problem. |
1188 | if ( $sm === null ) { |
1189 | throw new RangeBuilderException( 'No start tag found for the range' ); |
1190 | } |
1191 | $em = $elem; |
1192 | $ee = $em; |
1193 | $tbl = $em->parentNode->nextSibling; |
1194 | |
1195 | // Dont get distracted by a newline node -- skip over it |
1196 | // Unsure why it shows up occasionally |
1197 | if ( $tbl && $tbl instanceof Text && $tbl->nodeValue === "\n" ) { |
1198 | $tbl = $tbl->nextSibling; |
1199 | } |
1200 | |
1201 | $dp = !DOMUtils::atTheTop( $sm->parentNode ) ? |
1202 | DOMDataUtils::getDataParsoid( $sm->parentNode ) : null; |
1203 | if ( $tbl && DOMCompat::nodeName( $tbl ) === 'table' && !empty( $dp->fostered ) ) { |
1204 | '@phan-var Element $tbl'; /** @var Element $tbl */ |
1205 | $tblDP = DOMDataUtils::getDataParsoid( $tbl ); |
1206 | if ( isset( $dp->tsr->start ) && $dp->tsr->start !== null && |
1207 | isset( $tblDP->dsr->start ) && $tblDP->dsr->start === null |
1208 | ) { |
1209 | $tblDP->dsr->start = $dp->tsr->start; |
1210 | } |
1211 | $tbl->setAttribute( 'about', $about ); // set about on elem |
1212 | $ee = $tbl; |
1213 | } |
1214 | $tplRanges[] = $this->getDOMRange( $sm, $em, $ee ); |
1215 | } else { |
1216 | // The end tag can appear before the start tag if it is fostered out |
1217 | // of the table and the start tag is not. |
1218 | // It can even technically happen that both tags are fostered out of |
1219 | // a table and that the range is flipped: while the fostered content of |
1220 | // single table is fostered in-order, the ordering might change |
1221 | // across tables if the tags are not initially fostered by the same |
1222 | // table. |
1223 | $tpl = new ElementRange; |
1224 | $tpl->endElem = $elem; |
1225 | $tpls[$about] = $tpl; |
1226 | } |
1227 | } |
1228 | } else { |
1229 | $this->findWrappableTemplateRangesRecursive( $elem, $tpls, $tplRanges ); |
1230 | } |
1231 | } |
1232 | |
1233 | $elem = $nextSibling; |
1234 | } |
1235 | } |
1236 | |
1237 | /** |
1238 | * Returns the meta type of the element if it exists and matches the type expected by the |
1239 | * current class, null otherwise |
1240 | * @param Element $elem the element to check |
1241 | * @return string|null |
1242 | */ |
1243 | protected function matchMetaType( Element $elem ): ?string { |
1244 | // for this class we're interested in the template type |
1245 | return WTUtils::matchTplType( $elem ); |
1246 | } |
1247 | |
1248 | protected function verifyTplInfoExpectation( ?TemplateInfo $templateInfo, TempData $tmp ): void { |
1249 | if ( !$templateInfo ) { |
1250 | // An assertion here is probably an indication that we're |
1251 | // mistakenly doing template wrapping in a nested context. |
1252 | Assert::invariant( $tmp->getFlag( TempData::FROM_FOSTER ), 'Template range without arginfo.' ); |
1253 | } |
1254 | } |
1255 | |
1256 | public function execute( Node $root ): void { |
1257 | $tplRanges = $this->findWrappableMetaRanges( $root ); |
1258 | if ( count( $tplRanges ) > 0 ) { |
1259 | $nonOverlappingRanges = $this->findTopLevelNonOverlappingRanges( $root, $tplRanges ); |
1260 | $this->encapsulateTemplates( $nonOverlappingRanges ); |
1261 | } |
1262 | } |
1263 | |
1264 | /** |
1265 | * Creates a range that encloses $startMeta and $endMeta |
1266 | * |
1267 | * @param Element $startMeta |
1268 | * @param Element $endMeta |
1269 | * @param ?Element $endElem |
1270 | * @return DOMRangeInfo |
1271 | */ |
1272 | protected function findEnclosingRange( |
1273 | Element $startMeta, Element $endMeta, ?Element $endElem = null |
1274 | ): DOMRangeInfo { |
1275 | $range = new DOMRangeInfo( |
1276 | Utils::stripParsoidIdPrefix( $this->getRangeId( $startMeta ) ), |
1277 | DOMDataUtils::getDataParsoid( $startMeta )->tsr->start, |
1278 | $startMeta, |
1279 | $endMeta |
1280 | ); |
1281 | |
1282 | // Find common ancestor of startMeta and endElem |
1283 | $startAncestors = DOMUtils::pathToRoot( $startMeta ); |
1284 | $elem = $endElem ?? $endMeta; |
1285 | $parentNode = $elem->parentNode; |
1286 | while ( $parentNode && $parentNode->nodeType !== XML_DOCUMENT_NODE ) { |
1287 | $i = array_search( $parentNode, $startAncestors, true ); |
1288 | if ( $i === 0 ) { |
1289 | throw new UnreachableException( |
1290 | 'The startMeta cannot be the common ancestor.' |
1291 | ); |
1292 | } elseif ( $i > 0 ) { |
1293 | $range->start = $startAncestors[$i - 1]; |
1294 | $range->end = $elem; |
1295 | break; |
1296 | } |
1297 | $elem = $parentNode; |
1298 | $parentNode = $elem->parentNode; |
1299 | } |
1300 | |
1301 | return $range; |
1302 | } |
1303 | } |