Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
0.00% |
0 / 471 |
|
0.00% |
0 / 24 |
CRAP | |
0.00% |
0 / 1 |
DOMRangeBuilder | |
0.00% |
0 / 471 |
|
0.00% |
0 / 24 |
30102 | |
0.00% |
0 / 1 |
__construct | |
0.00% |
0 / 5 |
|
0.00% |
0 / 1 |
2 | |||
updateDSRForFirstRangeNode | |
0.00% |
0 / 7 |
|
0.00% |
0 / 1 |
20 | |||
getRangeEndDSR | |
0.00% |
0 / 19 |
|
0.00% |
0 / 1 |
90 | |||
getRangeId | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
getDOMRange | |
0.00% |
0 / 77 |
|
0.00% |
0 / 1 |
420 | |||
getStartConsideringFosteredContent | |
0.00% |
0 / 7 |
|
0.00% |
0 / 1 |
20 | |||
stripStartMeta | |
0.00% |
0 / 6 |
|
0.00% |
0 / 1 |
12 | |||
findToplevelEnclosingRange | |
0.00% |
0 / 8 |
|
0.00% |
0 / 1 |
12 | |||
recordTemplateInfo | |
0.00% |
0 / 17 |
|
0.00% |
0 / 1 |
30 | |||
introducesCycle | |
0.00% |
0 / 7 |
|
0.00% |
0 / 1 |
12 | |||
rangesOverlap | |
0.00% |
0 / 3 |
|
0.00% |
0 / 1 |
12 | |||
findTopLevelNonOverlappingRanges | |
0.00% |
0 / 109 |
|
0.00% |
0 / 1 |
1122 | |||
findFirstTemplatedNode | |
0.00% |
0 / 12 |
|
0.00% |
0 / 1 |
30 | |||
ensureElementsInRange | |
0.00% |
0 / 19 |
|
0.00% |
0 / 1 |
30 | |||
findEncapTarget | |
0.00% |
0 / 10 |
|
0.00% |
0 / 1 |
42 | |||
encapsulateTemplates | |
0.00% |
0 / 86 |
|
0.00% |
0 / 1 |
1056 | |||
addNodeRange | |
0.00% |
0 / 3 |
|
0.00% |
0 / 1 |
6 | |||
getNodeRanges | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
findWrappableMetaRanges | |
0.00% |
0 / 4 |
|
0.00% |
0 / 1 |
2 | |||
findWrappableTemplateRangesRecursive | |
0.00% |
0 / 44 |
|
0.00% |
0 / 1 |
506 | |||
matchMetaType | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
verifyTplInfoExpectation | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
6 | |||
execute | |
0.00% |
0 / 4 |
|
0.00% |
0 / 1 |
6 | |||
findEnclosingRange | |
0.00% |
0 / 19 |
|
0.00% |
0 / 1 |
30 |
1 | <?php |
2 | declare( strict_types = 1 ); |
3 | |
4 | namespace Wikimedia\Parsoid\Wt2Html\PP\Processors; |
5 | |
6 | use Error; |
7 | use SplObjectStorage; |
8 | use Wikimedia\Assert\Assert; |
9 | use Wikimedia\Assert\UnreachableException; |
10 | use Wikimedia\Parsoid\Config\Env; |
11 | use Wikimedia\Parsoid\Core\DomSourceRange; |
12 | use Wikimedia\Parsoid\Core\ElementRange; |
13 | use Wikimedia\Parsoid\DOM\Document; |
14 | use Wikimedia\Parsoid\DOM\Element; |
15 | use Wikimedia\Parsoid\DOM\Node; |
16 | use Wikimedia\Parsoid\DOM\Text; |
17 | use Wikimedia\Parsoid\NodeData\DataParsoid; |
18 | use Wikimedia\Parsoid\NodeData\TempData; |
19 | use Wikimedia\Parsoid\NodeData\TemplateInfo; |
20 | use Wikimedia\Parsoid\Utils\DOMCompat; |
21 | use Wikimedia\Parsoid\Utils\DOMDataUtils; |
22 | use Wikimedia\Parsoid\Utils\DOMUtils; |
23 | use Wikimedia\Parsoid\Utils\PHPUtils; |
24 | use Wikimedia\Parsoid\Utils\Utils; |
25 | use Wikimedia\Parsoid\Utils\WTUtils; |
26 | use Wikimedia\Parsoid\Wt2Html\Frame; |
27 | |
28 | /** |
29 | * Template encapsulation happens in three steps. |
30 | * |
31 | * 1. findWrappableTemplateRanges |
32 | * |
33 | * Locate start and end metas. Walk upwards towards the root from both and |
34 | * find a common ancestor A. The subtree rooted at A is now effectively the |
35 | * scope of the dom template ouput. |
36 | * |
37 | * 2. findTopLevelNonOverlappingRanges |
38 | * |
39 | * Mark all nodes in a range and walk up to root from each range start to |
40 | * determine overlaps, nesting. Merge overlapping and nested ranges to find |
41 | * the subset of top-level non-overlapping ranges which will be wrapped as |
42 | * individual units. |
43 | * |
44 | * range.startElem, range.endElem are the start/end meta tags for a transclusion |
45 | * range.start, range.end are the start/end DOM nodes after the range is |
46 | * expanded, merged with other ranges, etc. In the simple cases, they will |
47 | * be identical to startElem, endElem. |
48 | * |
49 | * 3. encapsulateTemplates |
50 | * |
51 | * For each non-overlapping range, |
52 | * - compute a data-mw according to the DOM spec |
53 | * - replace the start / end meta markers with transclusion type and data-mw |
54 | * on the first DOM node |
55 | * - add about ids on all top-level nodes of the range |
56 | * |
57 | * This is a simple high-level overview of the 3 steps to help understand this |
58 | * code. |
59 | * |
60 | * FIXME: At some point, more of the details should be extracted and documented |
61 | * in pseudo-code as an algorithm. |
62 | * @module |
63 | */ |
64 | class DOMRangeBuilder { |
65 | |
66 | private const MAP_TBODY_TR = [ |
67 | 'tbody' => true, |
68 | 'tr' => true |
69 | ]; |
70 | |
71 | /** @var Document */ |
72 | private $document; |
73 | |
74 | /** @var Frame */ |
75 | private $frame; |
76 | |
77 | /** @var Env */ |
78 | protected $env; |
79 | |
80 | /** @var SplObjectStorage */ |
81 | protected $nodeRanges; |
82 | |
83 | /** @var array<string|CompoundTemplateInfo>[] */ |
84 | private $compoundTpls = []; |
85 | |
86 | /** @var string */ |
87 | protected $traceType; |
88 | |
89 | public function __construct( |
90 | Document $document, Frame $frame |
91 | ) { |
92 | $this->document = $document; |
93 | $this->frame = $frame; |
94 | $this->env = $frame->getEnv(); |
95 | $this->nodeRanges = new SplObjectStorage; |
96 | $this->traceType = "tplwrap"; |
97 | } |
98 | |
99 | protected function updateDSRForFirstRangeNode( Element $target, Element $source ): void { |
100 | $srcDP = DOMDataUtils::getDataParsoid( $source ); |
101 | $tgtDP = DOMDataUtils::getDataParsoid( $target ); |
102 | |
103 | // Since TSRs on template content tokens are cleared by the |
104 | // template handler, all computed dsr values for template content |
105 | // is always inferred from top-level content values and is safe. |
106 | // So, do not overwrite a bigger end-dsr value. |
107 | if ( isset( $srcDP->dsr->end ) && isset( $tgtDP->dsr->end ) && |
108 | $tgtDP->dsr->end > $srcDP->dsr->end |
109 | ) { |
110 | $tgtDP->dsr->start = $srcDP->dsr->start ?? null; |
111 | } else { |
112 | $tgtDP->dsr = clone $srcDP->dsr; |
113 | $tgtDP->src = $srcDP->src ?? null; |
114 | } |
115 | } |
116 | |
117 | /** |
118 | * Get the DSR of the end of a DOMRange |
119 | * |
120 | * @param DOMRangeInfo $range |
121 | * @return DomSourceRange|null |
122 | */ |
123 | private static function getRangeEndDSR( DOMRangeInfo $range ): ?DomSourceRange { |
124 | $endNode = $range->end; |
125 | if ( $endNode instanceof Element ) { |
126 | return DOMDataUtils::getDataParsoid( $endNode )->dsr ?? null; |
127 | } else { |
128 | // In the rare scenario where the last element of a range is not an ELEMENT, |
129 | // extrapolate based on DSR of first leftmost sibling that is an ELEMENT. |
130 | // We don't try any harder than this for now. |
131 | $offset = 0; |
132 | $n = $endNode->previousSibling; |
133 | while ( $n && !( $n instanceof Element ) ) { |
134 | if ( $n instanceof Text ) { |
135 | $offset += strlen( $n->nodeValue ); |
136 | } else { |
137 | // A comment |
138 | // @phan-suppress-next-line PhanTypeMismatchArgumentSuperType |
139 | $offset += WTUtils::decodedCommentLength( $n ); |
140 | } |
141 | $n = $n->previousSibling; |
142 | } |
143 | |
144 | $dsr = null; |
145 | if ( $n ) { |
146 | /** |
147 | * The point of the above loop is to ensure we're working |
148 | * with a Element if there is an $n. |
149 | * |
150 | * @var Element $n |
151 | */ |
152 | '@phan-var Element $n'; |
153 | $dsr = DOMDataUtils::getDataParsoid( $n )->dsr ?? null; |
154 | } |
155 | |
156 | if ( $dsr && is_int( $dsr->end ?? null ) ) { |
157 | $len = $endNode instanceof Text |
158 | ? strlen( $endNode->nodeValue ) |
159 | : WTUtils::decodedCommentLength( $endNode ); |
160 | $dsr = new DomSourceRange( $dsr->end + $offset, $dsr->end + $offset + $len, null, null ); |
161 | } |
162 | |
163 | return $dsr; |
164 | } |
165 | } |
166 | |
167 | /** |
168 | * Returns the range ID of a node - in the case of templates, its "about" attribute. |
169 | * @param Element $node |
170 | * @return string |
171 | */ |
172 | protected function getRangeId( Element $node ): string { |
173 | return DOMCompat::getAttribute( $node, "about" ); |
174 | } |
175 | |
176 | /** |
177 | * Find the common DOM ancestor of two DOM nodes. |
178 | * |
179 | * @param Element $startElem |
180 | * @param Element $endMeta |
181 | * @param Element $endElem |
182 | * @return DOMRangeInfo |
183 | */ |
184 | private function getDOMRange( |
185 | Element $startElem, Element $endMeta, Element $endElem |
186 | ) { |
187 | $range = $this->findEnclosingRange( $startElem, $endElem ); |
188 | $range->startElem = $startElem; |
189 | $range->endElem = $endMeta; |
190 | |
191 | $startsInFosterablePosn = DOMUtils::isFosterablePosition( $range->start ); |
192 | $next = $range->start->nextSibling; |
193 | |
194 | // Detect empty content and handle them! |
195 | if ( WTUtils::isTplMarkerMeta( $range->start ) && $next === $endElem ) { |
196 | Assert::invariant( $range->start === $startElem, |
197 | "Expected startElem to be same as range.start" ); |
198 | if ( $startsInFosterablePosn ) { |
199 | // Expand range! |
200 | $range->start = $range->end = $range->start->parentNode; |
201 | $startsInFosterablePosn = false; |
202 | } else { |
203 | $emptySpan = $this->document->createElement( 'span' ); |
204 | $range->start->parentNode->insertBefore( $emptySpan, $endElem ); |
205 | } |
206 | |
207 | // Handle unwrappable content in fosterable positions |
208 | // and expand template range, if required. |
209 | // NOTE: Template marker meta tags are translated from comments |
210 | // *after* the DOM has been built which is why they can show up in |
211 | // fosterable positions in the DOM. |
212 | } elseif ( $startsInFosterablePosn && |
213 | ( !( $range->start instanceof Element ) || |
214 | WTUtils::isTplMarkerMeta( $range->start ) && |
215 | ( !( $next instanceof Element ) || WTUtils::isTplMarkerMeta( $next ) ) |
216 | ) |
217 | ) { |
218 | $rangeStartParent = $range->start->parentNode; |
219 | |
220 | // If we are in a table in a foster-element position, then all non-element |
221 | // nodes will be white-space and comments. Skip over all of them and find |
222 | // the first table content node. |
223 | $noWS = true; |
224 | $nodesToMigrate = []; |
225 | $newStart = $range->start; |
226 | $n = $range->start instanceof Element ? $next : $range->start; |
227 | while ( !( $n instanceof Element ) ) { |
228 | if ( $n instanceof Text ) { |
229 | $noWS = false; |
230 | } |
231 | $nodesToMigrate[] = $n; |
232 | $n = $n->nextSibling; |
233 | $newStart = $n; |
234 | } |
235 | |
236 | // As long as $newStart is a tr/tbody or we don't have whitespace |
237 | // migrate $nodesToMigrate into $newStart. Pushing whitespace into |
238 | // th/td/caption can change display semantics. |
239 | if ( $newStart && ( $noWS || isset( self::MAP_TBODY_TR[DOMCompat::nodeName( $newStart )] ) ) ) { |
240 | /** |
241 | * The point of the above loop is to ensure we're working |
242 | * with a Element if there is a $newStart. |
243 | * |
244 | * @var Element $newStart |
245 | */ |
246 | '@phan-var Element $newStart'; |
247 | $insertPosition = $newStart->firstChild; |
248 | foreach ( $nodesToMigrate as $n ) { |
249 | $newStart->insertBefore( $n, $insertPosition ); |
250 | } |
251 | $range->start = $newStart; |
252 | // Update dsr to point to original start |
253 | $this->updateDSRForFirstRangeNode( $range->start, $startElem ); |
254 | } else { |
255 | // If not, we are forced to expand the template range. |
256 | $range->start = $range->end = $rangeStartParent; |
257 | } |
258 | } |
259 | |
260 | // Ensure range->start is an element node since we want to |
261 | // add/update the data-parsoid attribute to it. |
262 | if ( !( $range->start instanceof Element ) ) { |
263 | $span = $this->document->createElement( 'span' ); |
264 | $range->start->parentNode->insertBefore( $span, $range->start ); |
265 | $span->appendChild( $range->start ); |
266 | $this->updateDSRForFirstRangeNode( $span, $startElem ); |
267 | $range->start = $span; |
268 | } |
269 | |
270 | $range->start = $this->getStartConsideringFosteredContent( $range->start ); |
271 | |
272 | $rangeStartNextSibling = $range->start->nextSibling; |
273 | if ( $range->start === $startElem && $rangeStartNextSibling instanceof Element ) { |
274 | // HACK! |
275 | // The strip-double-tds pass has a HACK that requires DSR and src |
276 | // information being set on this element node. So, this HACK here |
277 | // is supporting that HACK there. |
278 | // |
279 | // (The parser test for T52603 will fail without this fix) |
280 | $this->updateDSRForFirstRangeNode( $rangeStartNextSibling, $startElem ); |
281 | } |
282 | |
283 | // Use the negative test since it doesn't mark the range as flipped |
284 | // if range.start === range.end |
285 | if ( !DOMUtils::inSiblingOrder( $range->start, $range->end ) ) { |
286 | // In foster-parenting situations, the end-meta tag (and hence range.end) |
287 | // can show up before the range.start which would be the table itself. |
288 | // So, we record this info for later analysis. |
289 | $range->flipped = true; |
290 | } |
291 | |
292 | $this->env->log( |
293 | "trace/{$this->traceType}/findranges", |
294 | static function () use ( &$range ) { |
295 | $msg = ''; |
296 | $dp1 = DOMDataUtils::getDataParsoid( $range->start ); |
297 | $dp2 = DOMDataUtils::getDataParsoid( $range->end ); |
298 | $tmp1 = $dp1->tmp; |
299 | $tmp2 = $dp2->tmp; |
300 | $dp1->tmp = null; |
301 | $dp2->tmp = null; |
302 | $msg .= "\n----------------------------------------------"; |
303 | $msg .= "\nFound range : " . $range->id . '; flipped? ' . ( (string)$range->flipped ) . |
304 | '; offset: ' . $range->startOffset; |
305 | $msg .= "\nstart-elem : " . DOMCompat::getOuterHTML( $range->startElem ) . '; DP: ' . |
306 | PHPUtils::jsonEncode( DOMDataUtils::getDataParsoid( $range->startElem ) ); |
307 | $msg .= "\nend-elem : " . DOMCompat::getOuterHTML( $range->endElem ) . '; DP: ' . |
308 | PHPUtils::jsonEncode( DOMDataUtils::getDataParsoid( $range->endElem ) ); |
309 | $msg .= "\nstart : [TAG_ID " . ( $tmp1->tagId ?? '?' ) . ']: ' . |
310 | DOMCompat::getOuterHTML( $range->start ) . |
311 | '; DP: ' . PHPUtils::jsonEncode( $dp1 ); |
312 | $msg .= "\nend : [TAG_ID " . ( $tmp2->tagId ?? '?' ) . ']: ' . |
313 | DOMCompat::getOuterHTML( $range->end ) . |
314 | '; DP: ' . PHPUtils::jsonEncode( $dp2 ); |
315 | $msg .= "\n----------------------------------------------"; |
316 | $dp1->tmp = $tmp1; |
317 | $dp2->tmp = $tmp2; |
318 | return $msg; |
319 | } |
320 | ); |
321 | |
322 | return $range; |
323 | } |
324 | |
325 | /** |
326 | * Returns the current node if it's not just after fostered content, the first node |
327 | * of fostered content otherwise. |
328 | * @param Node $node |
329 | * @return Node |
330 | */ |
331 | protected function getStartConsideringFosteredContent( Node $node ): Node { |
332 | if ( DOMCompat::nodeName( $node ) === 'table' ) { |
333 | // If we have any fostered content, include it as well. |
334 | for ( $previousSibling = $node->previousSibling; |
335 | $previousSibling instanceof Element && |
336 | !empty( DOMDataUtils::getDataParsoid( $previousSibling )->fostered ); |
337 | $previousSibling = $node->previousSibling |
338 | ) { |
339 | $node = $previousSibling; |
340 | } |
341 | } |
342 | return $node; |
343 | } |
344 | |
345 | private static function stripStartMeta( Element $meta ): void { |
346 | if ( DOMCompat::nodeName( $meta ) === 'meta' ) { |
347 | $meta->parentNode->removeChild( $meta ); |
348 | } else { |
349 | // Remove mw:* from the typeof. |
350 | $type = DOMCompat::getAttribute( $meta, 'typeof' ); |
351 | if ( $type !== null ) { |
352 | $type = preg_replace( '/(?:^|\s)mw:[^\/]*(\/\S+|(?=$|\s))/D', '', $type ); |
353 | $meta->setAttribute( 'typeof', $type ); |
354 | } |
355 | } |
356 | } |
357 | |
358 | private static function findToplevelEnclosingRange( |
359 | array $nestingInfo, ?string $startId |
360 | ): ?string { |
361 | // Walk up the implicit nesting tree to find the |
362 | // top-level range within which rId is nested. |
363 | // No cycles can exist since they have been suppressed. |
364 | $visited = []; |
365 | $rId = $startId; |
366 | while ( isset( $nestingInfo[$rId] ) ) { |
367 | if ( isset( $visited[$rId] ) ) { |
368 | throw new Error( "Found a cycle in tpl-range nesting where there shouldn't have been one." ); |
369 | } |
370 | $visited[$rId] = true; |
371 | $rId = $nestingInfo[$rId]; |
372 | } |
373 | return $rId; |
374 | } |
375 | |
376 | /** |
377 | * Add a template to $this->compoundTpls |
378 | * |
379 | * @param string $compoundTplId |
380 | * @param DOMRangeInfo $range |
381 | * @param TemplateInfo $templateInfo |
382 | */ |
383 | private function recordTemplateInfo( |
384 | string $compoundTplId, DOMRangeInfo $range, TemplateInfo $templateInfo |
385 | ): void { |
386 | $this->compoundTpls[$compoundTplId] ??= []; |
387 | |
388 | // Record template args info along with any intervening wikitext |
389 | // between templates that are part of the same compound structure. |
390 | /** @var array $tplArray */ |
391 | $tplArray = &$this->compoundTpls[$compoundTplId]; |
392 | $dp = DOMDataUtils::getDataParsoid( $range->startElem ); |
393 | $dsr = $dp->dsr; |
394 | |
395 | if ( count( $tplArray ) > 0 ) { |
396 | $prevTplInfo = PHPUtils::lastItem( $tplArray ); |
397 | if ( $prevTplInfo->dsr->end < $dsr->start ) { |
398 | $width = $dsr->start - $prevTplInfo->dsr->end; |
399 | $tplArray[] = PHPUtils::safeSubstr( |
400 | $this->frame->getSrcText(), $prevTplInfo->dsr->end, $width ); |
401 | } |
402 | } |
403 | |
404 | if ( !empty( $dp->unwrappedWT ) ) { |
405 | $tplArray[] = (string)$dp->unwrappedWT; |
406 | } |
407 | |
408 | // Get rid of src-offsets since they aren't needed anymore. |
409 | foreach ( $templateInfo->paramInfos as $pi ) { |
410 | $pi->srcOffsets = null; |
411 | } |
412 | $tplArray[] = new CompoundTemplateInfo( |
413 | $dsr, $templateInfo, DOMUtils::hasTypeOf( $range->startElem, 'mw:Param' ) |
414 | ); |
415 | } |
416 | |
417 | /** |
418 | * Determine whether adding the given range would introduce a cycle in the |
419 | * subsumedRanges graph. |
420 | * |
421 | * Nesting cycles with multiple ranges can show up because of foster |
422 | * parenting scenarios if they are not detected and suppressed. |
423 | * |
424 | * @param string $start The ID of the new range |
425 | * @param string $end The ID of the other range |
426 | * @param string[] $subsumedRanges The subsumed ranges graph, encoded as an |
427 | * array in which each element maps one string range ID to another range ID |
428 | * @return bool |
429 | */ |
430 | private static function introducesCycle( string $start, string $end, array $subsumedRanges ): bool { |
431 | $visited = [ $start => true ]; |
432 | $elt = $subsumedRanges[$end] ?? null; |
433 | while ( $elt ) { |
434 | if ( !empty( $visited[$elt] ) ) { |
435 | return true; |
436 | } |
437 | $elt = $subsumedRanges[$elt] ?? null; |
438 | } |
439 | return false; |
440 | } |
441 | |
442 | /** |
443 | * Determine whether DOM ranges overlap. |
444 | * |
445 | * The `inSiblingOrder` check here is sufficient to determine overlaps |
446 | * because the algorithm in `findWrappableTemplateRanges` will put the |
447 | * start/end elements for intersecting ranges on the same plane and prev/ |
448 | * curr are in textual order (which translates to dom order). |
449 | * |
450 | * @param DOMRangeInfo $prev |
451 | * @param DOMRangeInfo $curr |
452 | * @return bool |
453 | */ |
454 | private static function rangesOverlap( DOMRangeInfo $prev, DOMRangeInfo $curr ): bool { |
455 | $prevEnd = ( !$prev->flipped ) ? $prev->end : $prev->start; |
456 | $currStart = ( !$curr->flipped ) ? $curr->start : $curr->end; |
457 | return DOMUtils::inSiblingOrder( $currStart, $prevEnd ); |
458 | } |
459 | |
460 | /** |
461 | * Identify the elements of $tplRanges that are non-overlapping. |
462 | * Record template info in $this->compoundTpls as we go. |
463 | * |
464 | * @param Node $docRoot |
465 | * @param DOMRangeInfo[] $tplRanges The potentially overlapping ranges |
466 | * @return DOMRangeInfo[] The non-overlapping ranges |
467 | */ |
468 | public function findTopLevelNonOverlappingRanges( Node $docRoot, array $tplRanges ): array { |
469 | // For each node, assign an attribute that is a record of all |
470 | // tpl ranges it belongs to at the top-level. |
471 | foreach ( $tplRanges as $r ) { |
472 | $n = !$r->flipped ? $r->start : $r->end; |
473 | $e = !$r->flipped ? $r->end : $r->start; |
474 | |
475 | while ( $n ) { |
476 | if ( $n instanceof Element ) { |
477 | $this->addNodeRange( $n, $r ); |
478 | // Done |
479 | if ( $n === $e ) { |
480 | break; |
481 | } |
482 | } |
483 | |
484 | $n = $n->nextSibling; |
485 | } |
486 | } |
487 | |
488 | // In the first pass over `numRanges` below, `subsumedRanges` is used to |
489 | // record purely the nested ranges. However, in the second pass, we also |
490 | // add the relationships between overlapping ranges so that |
491 | // `findToplevelEnclosingRange` can use that information to add `argInfo` |
492 | // to the right `compoundTpls`. This scenario can come up when you have |
493 | // three ranges, 1 intersecting with 2 but not 3, and 3 nested in 2. |
494 | $subsumedRanges = []; |
495 | |
496 | // For each range r:(s, e), walk up from s --> docRoot and if any of |
497 | // these nodes have tpl-ranges (besides r itself) assigned to them, |
498 | // then r is nested in those other templates and can be ignored. |
499 | foreach ( $tplRanges as $r ) { |
500 | $n = $r->start; |
501 | |
502 | while ( $n !== $docRoot ) { |
503 | $ranges = $this->getNodeRanges( $n ); |
504 | if ( $ranges ) { |
505 | if ( $n !== $r->start ) { |
506 | // 'r' is nested for sure |
507 | // Record the outermost range in which 'r' is nested. |
508 | $outermostId = null; |
509 | $outermostOffset = null; |
510 | foreach ( $ranges as $rangeId => $range ) { |
511 | if ( $outermostId === null |
512 | || $range->startOffset < $outermostOffset |
513 | ) { |
514 | $outermostId = $rangeId; |
515 | $outermostOffset = $range->startOffset; |
516 | } |
517 | } |
518 | $subsumedRanges[$r->id] = (string)$outermostId; |
519 | break; |
520 | } else { |
521 | // n === r.start |
522 | // |
523 | // We have to make sure this is not an overlap scenario. |
524 | // Find the ranges that r.start and r.end belong to and |
525 | // compute their intersection. If this intersection has |
526 | // another tpl range besides r itself, we have a winner! |
527 | // |
528 | // The code below does the above check efficiently. |
529 | $eTpls = $this->getNodeRanges( $r->end ); |
530 | $foundNesting = false; |
531 | |
532 | foreach ( $ranges as $otherId => $other ) { |
533 | // - Don't record nesting cycles. |
534 | // - Record the outermost range in which 'r' is nested in. |
535 | if ( $otherId !== $r->id && |
536 | !empty( $eTpls[$otherId] ) && |
537 | // When we have identical ranges, pick the range with |
538 | // the larger offset to be subsumed. |
539 | ( $r->start !== $other->start || |
540 | $r->end !== $other->end || |
541 | $other->startOffset < $r->startOffset |
542 | ) && |
543 | !self::introducesCycle( $r->id, (string)$otherId, $subsumedRanges ) |
544 | ) { |
545 | $foundNesting = true; |
546 | if ( !isset( $subsumedRanges[$r->id] ) || |
547 | $other->startOffset < $ranges[$subsumedRanges[$r->id]]->startOffset |
548 | ) { |
549 | $subsumedRanges[$r->id] = (string)$otherId; |
550 | } |
551 | } |
552 | } |
553 | |
554 | if ( $foundNesting ) { |
555 | // 'r' is nested |
556 | break; |
557 | } |
558 | } |
559 | } |
560 | |
561 | // Move up |
562 | $n = $n->parentNode; |
563 | } |
564 | } |
565 | |
566 | // Sort by start offset in source wikitext |
567 | usort( $tplRanges, static function ( $r1, $r2 ) { |
568 | return $r1->startOffset - $r2->startOffset; |
569 | } ); |
570 | |
571 | // Since the tpl ranges are sorted in textual order (by start offset), |
572 | // it is sufficient to only look at the most recent template to see |
573 | // if the current one overlaps with the previous one. |
574 | // |
575 | // This works because we've already identify nested ranges and can ignore them. |
576 | |
577 | $newRanges = []; |
578 | $prev = null; |
579 | |
580 | foreach ( $tplRanges as $r ) { |
581 | $endTagToRemove = null; |
582 | $startTagToStrip = null; |
583 | |
584 | // Extract tplargInfo |
585 | $tmp = DOMDataUtils::getDataParsoid( $r->startElem )->getTemp(); |
586 | $templateInfo = $tmp->tplarginfo ?? null; |
587 | |
588 | $this->verifyTplInfoExpectation( $templateInfo, $tmp ); |
589 | |
590 | $this->env->log( "trace/{$this->traceType}/merge", static function () use ( &$DOMDataUtils, &$r ) { |
591 | $msg = ''; |
592 | $dp1 = DOMDataUtils::getDataParsoid( $r->start ); |
593 | $dp2 = DOMDataUtils::getDataParsoid( $r->end ); |
594 | $tmp1 = $dp1->tmp; |
595 | $tmp2 = $dp2->tmp; |
596 | $dp1->tmp = null; |
597 | $dp2->tmp = null; |
598 | $msg .= "\n##############################################"; |
599 | $msg .= "\nrange " . $r->id . '; r-start-elem: ' . DOMCompat::getOuterHTML( $r->startElem ) . |
600 | '; DP: ' . PHPUtils::jsonEncode( DOMDataUtils::getDataParsoid( $r->startElem ) ); |
601 | $msg .= "\nrange " . $r->id . '; r-end-elem: ' . DOMCompat::getOuterHTML( $r->endElem ) . |
602 | '; DP: ' . PHPUtils::jsonEncode( DOMDataUtils::getDataParsoid( $r->endElem ) ); |
603 | $msg .= "\nrange " . $r->id . '; r-start: [TAG_ID ' . ( $tmp1->tagId ?? '?' ) . ']: ' . |
604 | DOMCompat::getOuterHTML( $r->start ) . '; DP: ' . PHPUtils::jsonEncode( $dp1 ); |
605 | $msg .= "\nrange " . $r->id . '; r-end: [TAG_ID ' . ( $tmp2->tagId ?? '?' ) . ']: ' . |
606 | DOMCompat::getOuterHTML( $r->end ) . '; DP: ' . PHPUtils::jsonEncode( $dp2 ); |
607 | $msg .= "\n----------------------------------------------"; |
608 | $dp1->tmp = $tmp1; |
609 | $dp2->tmp = $tmp2; |
610 | return $msg; |
611 | } ); |
612 | |
613 | $enclosingRangeId = self::findToplevelEnclosingRange( |
614 | $subsumedRanges, |
615 | $subsumedRanges[$r->id] ?? null |
616 | ); |
617 | if ( $enclosingRangeId ) { |
618 | $this->env->log( "trace/{$this->traceType}/merge", '--nested in ', $enclosingRangeId, '--' ); |
619 | |
620 | // Nested -- ignore r |
621 | $startTagToStrip = $r->startElem; |
622 | $endTagToRemove = $r->endElem; |
623 | if ( $templateInfo ) { |
624 | // 'r' is nested in 'enclosingRange' at the top-level |
625 | // So, enclosingRange gets r's argInfo |
626 | $this->recordTemplateInfo( $enclosingRangeId, $r, $templateInfo ); |
627 | } |
628 | } elseif ( $prev && self::rangesOverlap( $prev, $r ) ) { |
629 | // In the common case, in overlapping scenarios, r.start is |
630 | // identical to prev.end. However, in fostered content scenarios, |
631 | // there can true overlap of the ranges. |
632 | $this->env->log( "trace/{$this->traceType}/merge", '--overlapped--' ); |
633 | |
634 | // See comment above, where `subsumedRanges` is defined. |
635 | $subsumedRanges[$r->id] = $prev->id; |
636 | |
637 | // Overlapping ranges. |
638 | // r is the regular kind |
639 | // Merge r with prev |
640 | |
641 | // Note that if a table comes from a template, a foster box isn't |
642 | // emitted so the enclosure isn't guaranteed. In pathological |
643 | // cases, like where the table end tag isn't emitted, we can still |
644 | // end up with flipped ranges if the template end marker gets into |
645 | // a fosterable position (which can still happen despite being |
646 | // emitted as a comment). |
647 | Assert::invariant( !$r->flipped, |
648 | 'Flipped range should have been enclosed.' |
649 | ); |
650 | |
651 | $startTagToStrip = $r->startElem; |
652 | $endTagToRemove = $prev->endElem; |
653 | |
654 | $prev->end = $r->end; |
655 | $prev->endElem = $r->endElem; |
656 | if ( WTUtils::isMarkerAnnotation( $r->endElem ) ) { |
657 | $endDataMw = DOMDataUtils::getDataMw( $r->endElem ); |
658 | $endDataMw->rangeId = $r->id; |
659 | $prev->extendedByOverlapMerge = true; |
660 | } |
661 | |
662 | // Update compoundTplInfo |
663 | if ( $templateInfo ) { |
664 | $this->recordTemplateInfo( $prev->id, $r, $templateInfo ); |
665 | } |
666 | } else { |
667 | $this->env->log( "trace/{$this->traceType}/merge", '--normal--' ); |
668 | |
669 | // Default -- no overlap |
670 | // Emit the merged range |
671 | $newRanges[] = $r; |
672 | $prev = $r; |
673 | |
674 | // Update compoundTpls |
675 | if ( $templateInfo ) { |
676 | $this->recordTemplateInfo( $r->id, $r, $templateInfo ); |
677 | } |
678 | } |
679 | |
680 | if ( $endTagToRemove ) { |
681 | // Remove start and end meta-tags |
682 | // Not necessary to remove the start tag, but good to cleanup |
683 | $endTagToRemove->parentNode->removeChild( $endTagToRemove ); |
684 | self::stripStartMeta( $startTagToStrip ); |
685 | } |
686 | } |
687 | |
688 | return $newRanges; |
689 | } |
690 | |
691 | /** |
692 | * Note that the case of nodeName varies with DOM implementation. This |
693 | * method currently forces the name nodeName to uppercase. In the future |
694 | * we can/should switch to using the "native" case of the DOM |
695 | * implementation; we do a case-insensitive match (by converting the result |
696 | * to the "native" case of the DOM implementation) in |
697 | * EncapsulatedContentHandler when this value is used. |
698 | * @param DOMRangeInfo $range |
699 | * @return string|null nodeName with an optional "_$stx" suffix. |
700 | */ |
701 | private static function findFirstTemplatedNode( DOMRangeInfo $range ): ?string { |
702 | $firstNode = $range->start; |
703 | |
704 | // Skip tpl marker meta |
705 | if ( WTUtils::isTplMarkerMeta( $firstNode ) ) { |
706 | $firstNode = $firstNode->nextSibling; |
707 | } |
708 | |
709 | // Walk past fostered nodes since they came from within a table |
710 | // Note that this is not foolproof because in some scenarios, |
711 | // fostered content is not marked up. Ex: when a table is templated, |
712 | // and content from the table is fostered. |
713 | $dp = DOMDataUtils::getDataParsoid( $firstNode ); |
714 | while ( !empty( $dp->fostered ) ) { |
715 | $firstNode = $firstNode->nextSibling; |
716 | /** @var Element $firstNode */ |
717 | DOMUtils::assertElt( $firstNode ); |
718 | $dp = DOMDataUtils::getDataParsoid( $firstNode ); |
719 | } |
720 | |
721 | // FIXME: It is harder to use META as a node name since this is a generic |
722 | // placeholder for a whole bunch of things each of which has its own |
723 | // newline constraint requirements. So, for now, I am skipping that |
724 | // can of worms to prevent confusing the serializer with an overloaded |
725 | // tag name. |
726 | if ( DOMCompat::nodeName( $firstNode ) === 'meta' ) { |
727 | return null; |
728 | } |
729 | |
730 | // FIXME spec-compliant values would be upper-case, this is just a workaround |
731 | // for current PHP DOM implementation and could be removed in the future |
732 | // See discussion in the method comment above. |
733 | $nodeName = mb_strtoupper( DOMCompat::nodeName( $firstNode ), "UTF-8" ); |
734 | |
735 | return !empty( $dp->stx ) ? $nodeName . '_' . $dp->stx : $nodeName; |
736 | } |
737 | |
738 | /** |
739 | * Encapsulation requires adding about attributes on the top-level |
740 | * nodes of the range. This requires them to all be Elements. |
741 | * |
742 | * @param DOMRangeInfo $range |
743 | */ |
744 | private function ensureElementsInRange( DOMRangeInfo $range ): void { |
745 | $n = $range->start; |
746 | $e = $range->end; |
747 | $about = DOMCompat::getAttribute( $range->startElem, 'about' ); |
748 | while ( $n ) { |
749 | $next = $n->nextSibling; |
750 | if ( !( $n instanceof Element ) ) { |
751 | // Don't add span-wrappers in fosterable positions |
752 | // |
753 | // NOTE: there cannot be any non-IEW text in fosterable position |
754 | // since the HTML tree builder would already have fostered it out. |
755 | if ( !DOMUtils::isFosterablePosition( $n ) ) { |
756 | $span = $this->document->createElement( 'span' ); |
757 | $span->setAttribute( 'about', $about ); |
758 | $dp = new DataParsoid; |
759 | $dp->setTempFlag( TempData::WRAPPER ); |
760 | DOMDataUtils::setDataParsoid( $span, $dp ); |
761 | $n->parentNode->replaceChild( $span, $n ); |
762 | $span->appendChild( $n ); |
763 | $n = $span; |
764 | } |
765 | } else { |
766 | $n->setAttribute( 'about', $about ); |
767 | } |
768 | |
769 | if ( $n === $e ) { |
770 | break; |
771 | } |
772 | |
773 | $n = $next; |
774 | } |
775 | } |
776 | |
777 | /** |
778 | * Find the first element to be encapsulated. |
779 | * Skip past marker metas and non-elements (which will all be IEW |
780 | * in fosterable positions in a table). |
781 | * |
782 | * @param DOMRangeInfo $range |
783 | * @return Element |
784 | */ |
785 | private static function findEncapTarget( DOMRangeInfo $range ): Element { |
786 | $encapTgt = $range->start; |
787 | '@phan-var Node $encapTgt'; |
788 | |
789 | // Skip template-marker meta-tags. |
790 | while ( WTUtils::isTplMarkerMeta( $encapTgt ) || |
791 | !( $encapTgt instanceof Element ) |
792 | ) { |
793 | // Detect unwrappable template and bail out early. |
794 | if ( $encapTgt === $range->end || |
795 | ( !( $encapTgt instanceof Element ) && |
796 | !DOMUtils::isFosterablePosition( $encapTgt ) |
797 | ) |
798 | ) { |
799 | throw new Error( 'Cannot encapsulate transclusion. Start=' . |
800 | DOMCompat::getOuterHTML( $range->startElem ) ); |
801 | } |
802 | $encapTgt = $encapTgt->nextSibling; |
803 | } |
804 | |
805 | '@phan-var Element $encapTgt'; |
806 | return $encapTgt; |
807 | } |
808 | |
809 | /** |
810 | * Add markers to the DOM around the non-overlapping ranges. |
811 | * |
812 | * @param DOMRangeInfo[] $nonOverlappingRanges |
813 | */ |
814 | private function encapsulateTemplates( array $nonOverlappingRanges ): void { |
815 | foreach ( $nonOverlappingRanges as $i => $range ) { |
816 | |
817 | // We should never have flipped overlapping ranges, and indeed that's |
818 | // asserted in `findTopLevelNonOverlappingRanges`. Flipping results |
819 | // in either completely nested ranges, or non-intersecting ranges. |
820 | // |
821 | // If the table causing the fostering is not transcluded, we emit a |
822 | // foster box and wrap the whole table+fb in metas, producing nested |
823 | // ranges. For ex, |
824 | // |
825 | // <table> |
826 | // {{1x|<div>}} |
827 | // |
828 | // The tricky part is when the table *is* transcluded, and we omit the |
829 | // foster box. The common case (for some definition of common) might |
830 | // be like, |
831 | // |
832 | // {{1x|<table>}} |
833 | // {{1x|<div>}} |
834 | // |
835 | // Here, #mwt1 leaves a table open and the end meta from #mwt2 is |
836 | // fostered, since it gets closed into the div. The range for #mwt1 |
837 | // is the entire table, which thankfully contains #mwt2, so we still |
838 | // have the expected entire nesting. Any tricks to extend the range |
839 | // of #mwt2 beyond the table (so that we have an overlapping range) will |
840 | // inevitably result in the end meta not being fostered, and we avoid |
841 | // this situation altogether. |
842 | // |
843 | // The very edgy case is as follows, |
844 | // |
845 | // {{1x|<table><div>}}</div> |
846 | // {{1x|<div>}} |
847 | // |
848 | // where both end metas are fostered. Ignoring that we don't even |
849 | // roundtrip the first transclusion properly on its own, here we have |
850 | // a flipped range where, since the end meta for the first range was |
851 | // also fostered, the ranges still don't overlap. |
852 | |
853 | // FIXME: The code below needs to be aware of flipped ranges. |
854 | |
855 | $this->ensureElementsInRange( $range ); |
856 | |
857 | $tplArray = $this->compoundTpls[$range->id] ?? null; |
858 | Assert::invariant( (bool)$tplArray, 'No parts for template range!' ); |
859 | |
860 | $encapTgt = self::findEncapTarget( $range ); |
861 | $encapValid = false; |
862 | $encapDP = DOMDataUtils::getDataParsoid( $encapTgt ); |
863 | |
864 | // Update type-of (always even if tpl-encap below will fail). |
865 | // This ensures that VE will still "edit-protect" this template |
866 | // and not allow its content to be edited directly. |
867 | $startElem = $range->startElem; |
868 | if ( $startElem !== $encapTgt ) { |
869 | $t1 = DOMCompat::getAttribute( $startElem, 'typeof' ); |
870 | if ( $t1 !== null ) { |
871 | foreach ( array_reverse( explode( ' ', $t1 ) ) as $t ) { |
872 | DOMUtils::addTypeOf( $encapTgt, $t, true ); |
873 | } |
874 | } |
875 | } |
876 | |
877 | /* ---------------------------------------------------------------- |
878 | * We'll attempt to update dp1.dsr to reflect the entire range of |
879 | * the template. This relies on a couple observations: |
880 | * |
881 | * 1. In the common case, dp2.dsr->end will be > dp1.dsr->end |
882 | * If so, new range = dp1.dsr->start, dp2.dsr->end |
883 | * |
884 | * 2. But, foster parenting can complicate this when range.end is a table |
885 | * and range.start has been fostered out of the table (range.end). |
886 | * But, we need to verify this assumption. |
887 | * |
888 | * 2a. If dp2.dsr->start is smaller than dp1.dsr->start, this is a |
889 | * confirmed case of range.start being fostered out of range.end. |
890 | * |
891 | * 2b. If dp2.dsr->start is unknown, we rely on fostered flag on |
892 | * range.start, if any. |
893 | * ---------------------------------------------------------------- */ |
894 | $dp1 = DOMDataUtils::getDataParsoid( $range->start ); |
895 | $dp1DSR = isset( $dp1->dsr ) ? clone $dp1->dsr : null; |
896 | $dp2DSR = self::getRangeEndDSR( $range ); |
897 | |
898 | if ( $dp1DSR ) { |
899 | if ( $dp2DSR ) { |
900 | // Case 1. above |
901 | if ( $dp2DSR->end > $dp1DSR->end ) { |
902 | $dp1DSR->end = $dp2DSR->end; |
903 | } |
904 | |
905 | // Case 2. above |
906 | $endDsr = $dp2DSR->start; |
907 | if ( DOMCompat::nodeName( $range->end ) === 'table' && |
908 | $endDsr !== null && |
909 | ( $endDsr < $dp1DSR->start || !empty( $dp1->fostered ) ) |
910 | ) { |
911 | $dp1DSR->start = $endDsr; |
912 | } |
913 | } |
914 | |
915 | // encapsulation possible only if dp1.dsr is valid |
916 | $encapValid = Utils::isValidDSR( $dp1DSR ) && |
917 | $dp1DSR->end >= $dp1DSR->start; |
918 | } |
919 | |
920 | if ( $encapValid ) { |
921 | // Find transclusion info from the array (skip past a wikitext element) |
922 | /** @var CompoundTemplateInfo $firstTplInfo */ |
923 | $firstTplInfo = is_string( $tplArray[0] ) ? $tplArray[1] : $tplArray[0]; |
924 | |
925 | // Add any leading wikitext |
926 | if ( $firstTplInfo->dsr->start > $dp1DSR->start ) { |
927 | // This gap in dsr (between the final encapsulated content, and the |
928 | // content that actually came from a template) is indicative of this |
929 | // being a mixed-template-content-block and/or multi-template-content-block |
930 | // scenario. |
931 | // |
932 | // In this case, record the name of the first node in the encapsulated |
933 | // content. During html -> wt serialization, newline constraints for |
934 | // this entire block has to be determined relative to this node. |
935 | $ftn = self::findFirstTemplatedNode( $range ); |
936 | if ( $ftn !== null ) { |
937 | $encapDP->firstWikitextNode = $ftn; |
938 | } |
939 | $width = $firstTplInfo->dsr->start - $dp1DSR->start; |
940 | array_unshift( |
941 | $tplArray, |
942 | PHPUtils::safeSubstr( $this->frame->getSrcText(), $dp1DSR->start, $width ) |
943 | ); |
944 | } |
945 | |
946 | // Add any trailing wikitext |
947 | /** @var CompoundTemplateInfo $lastTplInfo */ |
948 | $lastTplInfo = PHPUtils::lastItem( $tplArray ); |
949 | if ( $lastTplInfo->dsr->end < $dp1DSR->end ) { |
950 | $width = $dp1DSR->end - $lastTplInfo->dsr->end; |
951 | $tplArray[] = PHPUtils::safeSubstr( $this->frame->getSrcText(), $lastTplInfo->dsr->end, $width ); |
952 | } |
953 | |
954 | // Map the array of { dsr: .. , args: .. } objects to just the args property |
955 | $infoIndex = 0; |
956 | $parts = []; |
957 | $pi = []; |
958 | foreach ( $tplArray as $a ) { |
959 | if ( is_string( $a ) ) { |
960 | $parts[] = $a; |
961 | } elseif ( $a instanceof CompoundTemplateInfo ) { |
962 | // Remember the position of the transclusion relative |
963 | // to other transclusions. Should match the index of |
964 | // the corresponding private metadata in $templateInfos. |
965 | $args = $a->info->getDataMw( $infoIndex++ ); |
966 | $parts[] = $a->isParam |
967 | ? (object)[ 'templatearg' => $args ] |
968 | : (object)[ 'template' => $args ]; |
969 | // FIXME: we throw away the array keys and rebuild them |
970 | // again in WikitextSerializer |
971 | $pi[] = array_values( $a->info->paramInfos ); |
972 | } |
973 | } |
974 | |
975 | // Set up dsr->start, dsr->end, and data-mw on the target node |
976 | // Avoid clobbering existing (ex: extension) data-mw information (T214241) |
977 | $encapDataMw = DOMDataUtils::getDataMw( $encapTgt ); |
978 | $encapDataMw->parts = $parts; |
979 | DOMDataUtils::setDataMw( $encapTgt, $encapDataMw ); |
980 | $encapDP->pi = $pi; |
981 | |
982 | // Special case when mixed-attribute-and-content templates are |
983 | // involved. This information is reliable and comes from the |
984 | // AttributeExpander and gets around the problem of unmarked |
985 | // fostered content that findFirstTemplatedNode runs into. |
986 | $firstWikitextNode = DOMDataUtils::getDataParsoid( |
987 | $range->startElem |
988 | )->firstWikitextNode ?? null; |
989 | if ( empty( $encapDP->firstWikitextNode ) && $firstWikitextNode ) { |
990 | $encapDP->firstWikitextNode = $firstWikitextNode; |
991 | } |
992 | } else { |
993 | $errors = [ 'Do not have necessary info. to encapsulate Tpl: ' . $i ]; |
994 | $errors[] = 'Start Elt : ' . DOMCompat::getOuterHTML( $startElem ); |
995 | $errors[] = 'End Elt : ' . DOMCompat::getOuterHTML( $range->endElem ); |
996 | $errors[] = 'Start DSR : ' . PHPUtils::jsonEncode( $dp1DSR ?? 'no-start-dsr' ); |
997 | $errors[] = 'End DSR : ' . PHPUtils::jsonEncode( $dp2DSR ?? [] ); |
998 | $this->env->log( 'error', implode( "\n", $errors ) ); |
999 | } |
1000 | |
1001 | // Make DSR range zero-width for fostered templates after |
1002 | // setting up data-mw. However, since template encapsulation |
1003 | // sometimes captures both fostered content as well as the table |
1004 | // from which it was fostered from, in those scenarios, we should |
1005 | // leave DSR info untouched. |
1006 | // |
1007 | // SSS FIXME: |
1008 | // 1. Should we remove the fostered flag from the entire |
1009 | // encapsulated block if we dont set dsr width range to zero |
1010 | // since only part of the block is fostered, not the entire |
1011 | // encapsulated block? |
1012 | // |
1013 | // 2. In both cases, should we mark these uneditable by adding |
1014 | // mw:Placeholder to the typeof? |
1015 | if ( !empty( $dp1->fostered ) ) { |
1016 | $encapDataMw = DOMDataUtils::getDataMw( $encapTgt ); |
1017 | if ( !$encapDataMw || |
1018 | !$encapDataMw->parts || |
1019 | count( $encapDataMw->parts ) === 1 |
1020 | ) { |
1021 | $dp1DSR->end = $dp1DSR->start; |
1022 | } |
1023 | } |
1024 | |
1025 | // Update DSR after fostering-related fixes are done. |
1026 | if ( $encapValid ) { |
1027 | // encapInfo.dp points to DOMDataUtils.getDataParsoid(encapInfo.target) |
1028 | // and all updates below update properties in that object tree. |
1029 | if ( empty( $encapDP->dsr ) ) { |
1030 | $encapDP->dsr = $dp1DSR; |
1031 | } else { |
1032 | $encapDP->dsr->start = $dp1DSR->start; |
1033 | $encapDP->dsr->end = $dp1DSR->end; |
1034 | } |
1035 | $encapDP->src = $encapDP->dsr->substr( |
1036 | $this->frame->getSrcText() |
1037 | ); |
1038 | } |
1039 | |
1040 | // Remove startElem (=range.startElem) if a meta. If a meta, |
1041 | // it is guaranteed to be a marker meta added to mark the start |
1042 | // of the template. |
1043 | if ( WTUtils::isTplMarkerMeta( $startElem ) ) { |
1044 | $startElem->parentNode->removeChild( $startElem ); |
1045 | } |
1046 | |
1047 | $range->endElem->parentNode->removeChild( $range->endElem ); |
1048 | } |
1049 | } |
1050 | |
1051 | /** |
1052 | * Attach a range to a node. |
1053 | * |
1054 | * @param Element $node |
1055 | * @param DOMRangeInfo $range |
1056 | */ |
1057 | private function addNodeRange( Element $node, DOMRangeInfo $range ): void { |
1058 | // With the native DOM extension, normally you assume that DOMNode |
1059 | // objects are temporary -- you get a new DOMNode every time you |
1060 | // traverse the DOM. But by retaining a reference in the |
1061 | // SplObjectStorage, we ensure that the DOMNode object stays live while |
1062 | // the pass is active. Then its address can be used as an index. |
1063 | if ( !isset( $this->nodeRanges[$node] ) ) { |
1064 | // We have to use an object as the data because |
1065 | // SplObjectStorage::offsetGet() does not provide an lval. |
1066 | $this->nodeRanges[$node] = new DOMRangeInfoArray; |
1067 | } |
1068 | $this->nodeRanges[$node]->ranges[$range->id] = $range; |
1069 | } |
1070 | |
1071 | /** |
1072 | * Get the ranges attached to this node, indexed by range ID. |
1073 | * |
1074 | * @param Element $node |
1075 | * @return DOMRangeInfo[]|null |
1076 | */ |
1077 | private function getNodeRanges( Element $node ): ?array { |
1078 | return $this->nodeRanges[$node]->ranges ?? null; |
1079 | } |
1080 | |
1081 | /** |
1082 | * Recursively walk the DOM tree. Find wrappable template ranges and return them. |
1083 | * |
1084 | * @param Node $rootNode |
1085 | * @return DOMRangeInfo[] |
1086 | */ |
1087 | protected function findWrappableMetaRanges( Node $rootNode ): array { |
1088 | $tpls = []; |
1089 | $tplRanges = []; |
1090 | $this->findWrappableTemplateRangesRecursive( $rootNode, $tpls, $tplRanges ); |
1091 | return $tplRanges; |
1092 | } |
1093 | |
1094 | /** |
1095 | * Recursive helper for findWrappableTemplateRanges() |
1096 | * |
1097 | * @param Node $rootNode |
1098 | * @param ElementRange[] &$tpls Template start and end elements by ID |
1099 | * @param DOMRangeInfo[] &$tplRanges Template range info |
1100 | */ |
1101 | private function findWrappableTemplateRangesRecursive( |
1102 | Node $rootNode, array &$tpls, array &$tplRanges |
1103 | ): void { |
1104 | $elem = $rootNode->firstChild; |
1105 | |
1106 | while ( $elem ) { |
1107 | // get the next sibling before doing anything since |
1108 | // we may delete elem as part of encapsulation |
1109 | $nextSibling = $elem->nextSibling; |
1110 | |
1111 | if ( $elem instanceof Element ) { |
1112 | $metaType = $this->matchMetaType( $elem ); |
1113 | |
1114 | // Ignore templates without tsr. |
1115 | // |
1116 | // These are definitely nested in other templates / extensions |
1117 | // and need not be wrapped themselves since they |
1118 | // can never be edited directly. |
1119 | // |
1120 | // NOTE: We are only testing for tsr presence on the start-elem |
1121 | // because wikitext errors can lead to parse failures and no tsr |
1122 | // on end-meta-tags. |
1123 | // |
1124 | // Ex: "<ref>{{1x|bar}}<!--bad-></ref>" |
1125 | if ( $metaType !== null && |
1126 | ( !empty( DOMDataUtils::getDataParsoid( $elem )->tsr ) || |
1127 | str_ends_with( $metaType, '/End' ) |
1128 | ) |
1129 | ) { |
1130 | $about = $this->getRangeId( $elem ); |
1131 | $tpl = $tpls[$about] ?? null; |
1132 | // Is this a start marker? |
1133 | if ( !str_ends_with( $metaType, '/End' ) ) { |
1134 | if ( $tpl ) { |
1135 | $tpl->startElem = $elem; |
1136 | // content or end marker existed already |
1137 | if ( !empty( $tpl->endElem ) ) { |
1138 | // End marker was foster-parented. |
1139 | // Found actual start tag. |
1140 | $tplRanges[] = $this->getDOMRange( |
1141 | $elem, $tpl->endElem, $tpl->endElem ); |
1142 | } else { |
1143 | // should not happen! |
1144 | throw new UnreachableException( "start found after content for $about." ); |
1145 | } |
1146 | } else { |
1147 | $tpl = new ElementRange; |
1148 | $tpl->startElem = $elem; |
1149 | $tpls[$about] = $tpl; |
1150 | } |
1151 | } else { |
1152 | // elem is the end-meta tag |
1153 | if ( $tpl ) { |
1154 | /* ------------------------------------------------------------ |
1155 | * Special case: In some cases, the entire template content can |
1156 | * get fostered out of a table, not just the start/end marker. |
1157 | * |
1158 | * Simplest example: |
1159 | * |
1160 | * {| |
1161 | * {{1x|foo}} |
1162 | * |} |
1163 | * |
1164 | * More complex example: |
1165 | * |
1166 | * {| |
1167 | * {{1x| |
1168 | * a |
1169 | * b |
1170 | * |
1171 | * c |
1172 | * }} |
1173 | * |} |
1174 | * |
1175 | * Since meta-tags don't normally get fostered out, this scenario |
1176 | * only arises when the entire content including meta-tags was |
1177 | * wrapped in p-tags. So, we look to see if: |
1178 | * 1. the end-meta-tag's parent has a table sibling, |
1179 | * 2. the start meta's parent is marked as fostered. |
1180 | * If so, we recognize this as an adoption scenario and fix up |
1181 | * DSR of start-meta-tag's parent to include the table's DSR. |
1182 | * ------------------------------------------------------------*/ |
1183 | $sm = $tpl->startElem; |
1184 | |
1185 | // TODO: this should only happen in fairly specific cases of the |
1186 | // annotation processing and should eventually be handled properly. |
1187 | // In the meantime, we create and log an exception to have an idea |
1188 | // of the amplitude of the problem. |
1189 | if ( $sm === null ) { |
1190 | throw new RangeBuilderException( 'No start tag found for the range' ); |
1191 | } |
1192 | $em = $elem; |
1193 | $ee = $em; |
1194 | $tbl = $em->parentNode->nextSibling; |
1195 | |
1196 | // Dont get distracted by a newline node -- skip over it |
1197 | // Unsure why it shows up occasionally |
1198 | if ( $tbl && $tbl instanceof Text && $tbl->nodeValue === "\n" ) { |
1199 | $tbl = $tbl->nextSibling; |
1200 | } |
1201 | |
1202 | $dp = !DOMUtils::atTheTop( $sm->parentNode ) ? |
1203 | DOMDataUtils::getDataParsoid( $sm->parentNode ) : null; |
1204 | if ( $tbl && DOMCompat::nodeName( $tbl ) === 'table' && !empty( $dp->fostered ) ) { |
1205 | '@phan-var Element $tbl'; /** @var Element $tbl */ |
1206 | $tblDP = DOMDataUtils::getDataParsoid( $tbl ); |
1207 | if ( isset( $dp->tsr->start ) && $dp->tsr->start !== null && |
1208 | isset( $tblDP->dsr->start ) && $tblDP->dsr->start === null |
1209 | ) { |
1210 | $tblDP->dsr->start = $dp->tsr->start; |
1211 | } |
1212 | $tbl->setAttribute( 'about', $about ); // set about on elem |
1213 | $ee = $tbl; |
1214 | } |
1215 | $tplRanges[] = $this->getDOMRange( $sm, $em, $ee ); |
1216 | } else { |
1217 | // The end tag can appear before the start tag if it is fostered out |
1218 | // of the table and the start tag is not. |
1219 | // It can even technically happen that both tags are fostered out of |
1220 | // a table and that the range is flipped: while the fostered content of |
1221 | // single table is fostered in-order, the ordering might change |
1222 | // across tables if the tags are not initially fostered by the same |
1223 | // table. |
1224 | $tpl = new ElementRange; |
1225 | $tpl->endElem = $elem; |
1226 | $tpls[$about] = $tpl; |
1227 | } |
1228 | } |
1229 | } else { |
1230 | $this->findWrappableTemplateRangesRecursive( $elem, $tpls, $tplRanges ); |
1231 | } |
1232 | } |
1233 | |
1234 | $elem = $nextSibling; |
1235 | } |
1236 | } |
1237 | |
1238 | /** |
1239 | * Returns the meta type of the element if it exists and matches the type expected by the |
1240 | * current class, null otherwise |
1241 | * @param Element $elem the element to check |
1242 | * @return string|null |
1243 | */ |
1244 | protected function matchMetaType( Element $elem ): ?string { |
1245 | // for this class we're interested in the template type |
1246 | return WTUtils::matchTplType( $elem ); |
1247 | } |
1248 | |
1249 | protected function verifyTplInfoExpectation( ?TemplateInfo $templateInfo, TempData $tmp ): void { |
1250 | if ( !$templateInfo ) { |
1251 | // An assertion here is probably an indication that we're |
1252 | // mistakenly doing template wrapping in a nested context. |
1253 | Assert::invariant( $tmp->getFlag( TempData::FROM_FOSTER ), 'Template range without arginfo.' ); |
1254 | } |
1255 | } |
1256 | |
1257 | public function execute( Node $root ): void { |
1258 | $tplRanges = $this->findWrappableMetaRanges( $root ); |
1259 | if ( count( $tplRanges ) > 0 ) { |
1260 | $nonOverlappingRanges = $this->findTopLevelNonOverlappingRanges( $root, $tplRanges ); |
1261 | $this->encapsulateTemplates( $nonOverlappingRanges ); |
1262 | } |
1263 | } |
1264 | |
1265 | /** |
1266 | * Creates a range that encloses $startElem and $endElem |
1267 | * @param Element $startElem |
1268 | * @param Element $endElem |
1269 | * @return DOMRangeInfo |
1270 | */ |
1271 | protected function findEnclosingRange( Element $startElem, Element $endElem ): DOMRangeInfo { |
1272 | $range = new DOMRangeInfo; |
1273 | $range->id = Utils::stripParsoidIdPrefix( $this->getRangeId( $startElem ) ); |
1274 | $range->startOffset = DOMDataUtils::getDataParsoid( $startElem )->tsr->start; |
1275 | |
1276 | // Find common ancestor of startElem and endElem |
1277 | $startAncestors = DOMUtils::pathToRoot( $startElem ); |
1278 | $elem = $endElem; |
1279 | $parentNode = $endElem->parentNode; |
1280 | while ( $parentNode && $parentNode->nodeType !== XML_DOCUMENT_NODE ) { |
1281 | $i = array_search( $parentNode, $startAncestors, true ); |
1282 | if ( $i === 0 ) { |
1283 | // the common ancestor is startElem |
1284 | // widen the scope to include the full subtree |
1285 | $range->start = $startElem->firstChild; |
1286 | $range->end = $startElem->lastChild; |
1287 | break; |
1288 | } elseif ( $i > 0 ) { |
1289 | $range->start = $startAncestors[$i - 1]; |
1290 | $range->end = $elem; |
1291 | break; |
1292 | } |
1293 | $elem = $parentNode; |
1294 | $parentNode = $elem->parentNode; |
1295 | } |
1296 | |
1297 | return $range; |
1298 | } |
1299 | } |