Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
1.34% |
6 / 447 |
|
7.14% |
1 / 14 |
CRAP | |
0.00% |
0 / 1 |
Separators | |
1.34% |
6 / 447 |
|
7.14% |
1 / 14 |
44190.51 | |
0.00% |
0 / 1 |
loggableConstraints | |
0.00% |
0 / 15 |
|
0.00% |
0 / 1 |
6 | |||
precedingSeparatorTextLen | |
0.00% |
0 / 11 |
|
0.00% |
0 / 1 |
30 | |||
getSepNlConstraints | |
0.00% |
0 / 35 |
|
0.00% |
0 / 1 |
72 | |||
makeSeparator | |
0.00% |
0 / 64 |
|
0.00% |
0 / 1 |
702 | |||
mergeConstraints | |
0.00% |
0 / 15 |
|
0.00% |
0 / 1 |
6 | |||
debugOut | |
0.00% |
0 / 6 |
|
0.00% |
0 / 1 |
12 | |||
updateSeparatorConstraints | |
0.00% |
0 / 37 |
|
0.00% |
0 / 1 |
72 | |||
__construct | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
2 | |||
makeSepIndentPreSafe | |
0.00% |
0 / 60 |
|
0.00% |
0 / 1 |
1260 | |||
handleAutoInserted | |
0.00% |
0 / 9 |
|
0.00% |
0 / 1 |
20 | |||
fetchLeadingTrimmedSpace | |
0.00% |
0 / 27 |
|
0.00% |
0 / 1 |
380 | |||
fetchTrailingTrimmedSpace | |
0.00% |
0 / 26 |
|
0.00% |
0 / 1 |
272 | |||
recoverTrimmedWhitespace | |
100.00% |
6 / 6 |
|
100.00% |
1 / 1 |
5 | |||
buildSep | |
0.00% |
0 / 134 |
|
0.00% |
0 / 1 |
6480 |
1 | <?php |
2 | |
3 | declare( strict_types = 1 ); |
4 | |
5 | namespace Wikimedia\Parsoid\Html2Wt; |
6 | |
7 | use Wikimedia\Assert\Assert; |
8 | use Wikimedia\Parsoid\Config\Env; |
9 | use Wikimedia\Parsoid\Core\DomSourceRange; |
10 | use Wikimedia\Parsoid\DOM\Comment; |
11 | use Wikimedia\Parsoid\DOM\DocumentFragment; |
12 | use Wikimedia\Parsoid\DOM\Element; |
13 | use Wikimedia\Parsoid\DOM\Node; |
14 | use Wikimedia\Parsoid\Html2Wt\DOMHandlers\DOMHandler; |
15 | use Wikimedia\Parsoid\Utils\DOMCompat; |
16 | use Wikimedia\Parsoid\Utils\DOMDataUtils; |
17 | use Wikimedia\Parsoid\Utils\DOMUtils; |
18 | use Wikimedia\Parsoid\Utils\PHPUtils; |
19 | use Wikimedia\Parsoid\Utils\TokenUtils; |
20 | use Wikimedia\Parsoid\Utils\Utils; |
21 | use Wikimedia\Parsoid\Utils\WTUtils; |
22 | use Wikimedia\Parsoid\Wikitext\Consts; |
23 | |
24 | class Separators { |
25 | /* |
26 | * This regexp looks for leading whitespace on the last line of a separator string. |
27 | * So, only comments (single or multi-line) or other newlines can precede that |
28 | * whitespace-of-interest. But, also account for any whitespace preceding newlines |
29 | * since that needs to be skipped over (Ex: " \n "). |
30 | */ |
31 | private const INDENT_PRE_WS_IN_SEP_REGEXP = |
32 | '/^((?: *\n|(?:' . Utils::COMMENT_REGEXP_FRAGMENT . '))*)( +)([^\n]*)$/D'; |
33 | |
34 | /** |
35 | * @var SerializerState |
36 | */ |
37 | private $state; |
38 | |
39 | /** |
40 | * @var Env |
41 | */ |
42 | private $env; |
43 | |
44 | /** |
45 | * Clean up the constraints object to prevent excessively verbose output |
46 | * and clog up log files / test runs. |
47 | * |
48 | * @param array $constraints |
49 | * @return array |
50 | */ |
51 | private static function loggableConstraints( array $constraints ): array { |
52 | $c = [ |
53 | 'a' => $constraints['a'] ?? null, |
54 | 'b' => $constraints['b'] ?? null, |
55 | 'min' => $constraints['min'] ?? null, |
56 | 'max' => $constraints['max'] ?? null, |
57 | ]; |
58 | if ( !empty( $constraints['constraintInfo'] ) ) { |
59 | $constraintInfo = $constraints['constraintInfo']; |
60 | $c['constraintInfo'] = [ |
61 | 'onSOL' => $constraintInfo['onSOL'] ?? false, |
62 | 'sepType' => $constraintInfo['sepType'] ?? null, |
63 | 'nodeA' => DOMCompat::nodeName( $constraintInfo['nodeA'] ), |
64 | 'nodeB' => DOMCompat::nodeName( $constraintInfo['nodeB'] ), |
65 | ]; |
66 | } |
67 | return $c; |
68 | } |
69 | |
70 | /** |
71 | * @param Node $n |
72 | * @return int|null |
73 | */ |
74 | private static function precedingSeparatorTextLen( Node $n ): ?int { |
75 | // Given the CSS white-space property and specifically, |
76 | // "pre" and "pre-line" values for this property, it seems that any |
77 | // sensible HTML editor would have to preserve IEW in HTML documents |
78 | // to preserve rendering. One use-case where an editor might change |
79 | // IEW drastically would be when the user explicitly requests it |
80 | // (Ex: pretty-printing of raw source code). |
81 | // |
82 | // For now, we are going to exploit this. This information is |
83 | // only used to extrapolate DSR values and extract a separator |
84 | // string from source, and is only used locally. In addition, |
85 | // the extracted text is verified for being a valid separator. |
86 | // |
87 | // So, at worst, this can create a local dirty diff around separators |
88 | // and at best, it gets us a clean diff. |
89 | |
90 | $len = 0; |
91 | $orig = $n; |
92 | while ( $n ) { |
93 | if ( DOMUtils::isIEW( $n ) ) { |
94 | $len += strlen( $n->nodeValue ); |
95 | } elseif ( $n instanceof Comment ) { |
96 | $len += WTUtils::decodedCommentLength( $n ); |
97 | } elseif ( $n !== $orig ) { // dont return if input node! |
98 | return null; |
99 | } |
100 | |
101 | $n = $n->previousSibling; |
102 | } |
103 | |
104 | return $len; |
105 | } |
106 | |
107 | /** |
108 | * Helper for updateSeparatorConstraints. |
109 | * |
110 | * Collects, checks and integrates separator newline requirements to a simple |
111 | * min, max structure. |
112 | * |
113 | * @param Node $nodeA |
114 | * @param array $aCons |
115 | * @param Node $nodeB |
116 | * @param array $bCons |
117 | * @return array |
118 | */ |
119 | private function getSepNlConstraints( |
120 | Node $nodeA, array $aCons, Node $nodeB, array $bCons |
121 | ): array { |
122 | $env = $this->state->getEnv(); |
123 | |
124 | $nlConstraints = [ |
125 | 'min' => $aCons['min'] ?? null, |
126 | 'max' => $aCons['max'] ?? null, |
127 | 'constraintInfo' => [], |
128 | ]; |
129 | |
130 | if ( isset( $bCons['min'] ) ) { |
131 | if ( $nlConstraints['max'] !== null && $nlConstraints['max'] < $bCons['min'] ) { |
132 | // Conflict, warn and let nodeB win. |
133 | $env->log( |
134 | 'info/html2wt', |
135 | 'Incompatible constraints 1:', |
136 | DOMCompat::nodeName( $nodeA ), |
137 | DOMCompat::nodeName( $nodeB ), |
138 | self::loggableConstraints( $nlConstraints ) |
139 | ); |
140 | $nlConstraints['min'] = $bCons['min']; |
141 | $nlConstraints['max'] = $bCons['min']; |
142 | } else { |
143 | $nlConstraints['min'] = max( $nlConstraints['min'] ?? 0, $bCons['min'] ); |
144 | } |
145 | } |
146 | |
147 | if ( isset( $bCons['max'] ) ) { |
148 | if ( ( $nlConstraints['min'] ?? 0 ) > $bCons['max'] ) { |
149 | // Conflict, warn and let nodeB win. |
150 | $env->log( |
151 | 'info/html2wt', |
152 | 'Incompatible constraints 2:', |
153 | DOMCompat::nodeName( $nodeA ), |
154 | DOMCompat::nodeName( $nodeB ), |
155 | self::loggableConstraints( $nlConstraints ) |
156 | ); |
157 | $nlConstraints['min'] = $bCons['max']; |
158 | $nlConstraints['max'] = $bCons['max']; |
159 | } else { |
160 | $nlConstraints['max'] = min( $nlConstraints['max'] ?? $bCons['max'], $bCons['max'] ); |
161 | } |
162 | } |
163 | |
164 | if ( $nlConstraints['max'] === null ) { |
165 | // Anything more than two lines will trigger paragraphs, so default to |
166 | // two if nothing is specified. (FIXME: This is a conservative strategy |
167 | // since strictly speaking, this is not always true. This is more a |
168 | // cautious fallback to handle cases where some DOM handler is missing |
169 | // a necessary max constraint.) |
170 | $nlConstraints['max'] = 2; |
171 | } |
172 | |
173 | if ( ( $nlConstraints['min'] ?? 0 ) > $nlConstraints['max'] ) { |
174 | $nlConstraints['max'] = $nlConstraints['min']; |
175 | } |
176 | |
177 | return $nlConstraints; |
178 | } |
179 | |
180 | /** |
181 | * Create a separator given a (potentially empty) separator text and newline constraints. |
182 | * |
183 | * @param Node $node |
184 | * @param string $sep |
185 | * @param array $nlConstraints |
186 | * @return string |
187 | */ |
188 | private function makeSeparator( Node $node, string $sep, array $nlConstraints ): string { |
189 | $origSep = $sep; |
190 | $sepType = $nlConstraints['constraintInfo']['sepType'] ?? null; |
191 | |
192 | // Split on comment/ws-only lines, consuming subsequent newlines since |
193 | // those lines are ignored by the PHP parser |
194 | // Ignore lines with ws and a single comment in them |
195 | $splitRe = implode( [ "#(?:\n(?:[ \t]*?", |
196 | Utils::COMMENT_REGEXP_FRAGMENT, |
197 | "[ \t]*?)+(?=\n))+|", |
198 | Utils::COMMENT_REGEXP_FRAGMENT, |
199 | "#" |
200 | ] ); |
201 | $sepNlCount = substr_count( implode( preg_split( $splitRe, $sep ) ), "\n" ); |
202 | $minNls = $nlConstraints['min'] ?? 0; |
203 | |
204 | if ( $this->state->atStartOfOutput && $minNls > 0 ) { |
205 | // Skip first newline as we are in start-of-line context |
206 | $minNls--; |
207 | } |
208 | |
209 | if ( $minNls > 0 && $sepNlCount < $minNls ) { |
210 | // Append newlines |
211 | $nlBuf = []; |
212 | for ( $i = 0; $i < ( $minNls - $sepNlCount ); $i++ ) { |
213 | $nlBuf[] = "\n"; |
214 | } |
215 | |
216 | /* ------------------------------------------------------------------ |
217 | * The following two heuristics try to do a best-guess on where to |
218 | * add the newlines relative to nodeA and nodeB that best matches |
219 | * wikitext output expectations. |
220 | * |
221 | * 1. In a parent-child separator scenario, where the first child of |
222 | * nodeA is not an element, it could have contributed to the separator. |
223 | * In that case, the newlines should be prepended because they |
224 | * usually correspond to the parent's constraints, |
225 | * and the separator was plucked from the child. |
226 | * |
227 | * Try html2wt on this snippet: |
228 | * |
229 | * a<p><!--cmt-->b</p> |
230 | * |
231 | * 2. In a sibling scenario, if nodeB is a literal-HTML element, nodeA is |
232 | * forcing the newline and hence the newline should be emitted right |
233 | * after it. |
234 | * |
235 | * Try html2wt on this snippet: |
236 | * |
237 | * <p>foo</p> <p data-parsoid='{"stx":"html"}'>bar</p> |
238 | * -------------------------------------------------------------------- */ |
239 | $constraintInfo = $nlConstraints['constraintInfo'] ?? []; |
240 | $sepType = $constraintInfo['sepType'] ?? null; |
241 | $nodeA = $constraintInfo['nodeA'] ?? null; |
242 | $nodeB = $constraintInfo['nodeB'] ?? null; |
243 | if ( |
244 | $sepType === 'parent-child' && |
245 | !DOMUtils::isContentNode( DOMUtils::firstNonDeletedChild( $nodeA ) ) && |
246 | !( |
247 | isset( Consts::$HTML['ChildTableTags'][DOMCompat::nodeName( $nodeB )] ) && |
248 | !WTUtils::isLiteralHTMLNode( $nodeB ) |
249 | ) |
250 | ) { |
251 | $sep = implode( $nlBuf ) . $sep; |
252 | } elseif ( $sepType === 'sibling' && WTUtils::isLiteralHTMLNode( $nodeB ) ) { |
253 | $sep = implode( $nlBuf ) . $sep; |
254 | } else { |
255 | $sep .= implode( $nlBuf ); |
256 | } |
257 | } elseif ( isset( $nlConstraints['max'] ) && $sepNlCount > $nlConstraints['max'] && ( |
258 | // In selser mode, if the current node is an unmodified rendering-transparent node |
259 | // of a sibling pair, leave the separator alone since the excess newlines aren't |
260 | // going to change the semantics of how this node will be parsed in wt->html direction. |
261 | // This will instead eliminate a dirty diff on the page. |
262 | !$this->state->selserMode || |
263 | $sepType !== 'sibling' || |
264 | !$this->state->currNodeUnmodified || |
265 | !WTUtils::isRenderingTransparentNode( $node ) |
266 | ) ) { |
267 | // Strip some newlines outside of comments. |
268 | // |
269 | // Capture separators in a single array with a capturing version of |
270 | // the split regexp, so that we can work on the non-separator bits |
271 | // when stripping newlines. |
272 | // |
273 | // Dirty-diff minimizing heuristic: Strip newlines away from an unmodified node. |
274 | // If both nodes are unmodified, this dirties the separator before the current node. |
275 | // If both nodes are modified, this dirties the separator after the previous node. |
276 | $allBits = preg_split( '#(' . PHPUtils::reStrip( $splitRe, '#' ) . ')#', |
277 | $sep, -1, PREG_SPLIT_DELIM_CAPTURE ); |
278 | $newBits = []; |
279 | $n = $sepNlCount - $nlConstraints['max']; |
280 | |
281 | $stripAtEnd = $this->state->prevNodeUnmodified; |
282 | while ( $n > 0 ) { |
283 | $bit = $stripAtEnd ? array_pop( $allBits ) : array_shift( $allBits ); |
284 | while ( $bit && preg_match( $splitRe, $bit ) ) { |
285 | // Retain comment-only lines as is |
286 | $newBits[] = $bit; |
287 | $bit = $stripAtEnd ? array_pop( $allBits ) : array_shift( $allBits ); |
288 | } |
289 | // @phan-suppress-next-line PhanPluginLoopVariableReuse |
290 | while ( $n > 0 && str_contains( $bit, "\n" ) ) { |
291 | $bit = preg_replace( '/\n([^\n]*)/', '$1', $bit, 1 ); |
292 | $n--; |
293 | } |
294 | $newBits[] = $bit; |
295 | } |
296 | if ( $stripAtEnd ) { |
297 | $newBits = array_merge( $allBits, array_reverse( $newBits ) ); |
298 | } else { |
299 | PHPUtils::pushArray( $newBits, $allBits ); |
300 | } |
301 | $sep = implode( $newBits ); |
302 | } |
303 | |
304 | $this->state->getEnv()->log( |
305 | 'debug/wts/sep', |
306 | 'make-new |', |
307 | static function () use ( $nlConstraints, $sepNlCount, $minNls, $sep, $origSep ) { |
308 | $constraints = Utils::clone( $nlConstraints, true, true ); |
309 | unset( $constraints['constraintInfo'] ); |
310 | return PHPUtils::jsonEncode( $sep ) . ', ' . PHPUtils::jsonEncode( $origSep ) . ', ' . |
311 | $minNls . ', ' . $sepNlCount . ', ' . PHPUtils::jsonEncode( $constraints ); |
312 | } |
313 | ); |
314 | |
315 | return $sep; |
316 | } |
317 | |
318 | /** |
319 | * Merge two constraints. |
320 | * @param Env $env |
321 | * @param array $oldConstraints |
322 | * @param array $newConstraints |
323 | * @return array |
324 | */ |
325 | private static function mergeConstraints( |
326 | Env $env, array $oldConstraints, array $newConstraints |
327 | ): array { |
328 | $res = [ |
329 | 'min' => max( $oldConstraints['min'] ?? 0, $newConstraints['min'] ?? 0 ), |
330 | 'max' => min( $oldConstraints['max'] ?? 2, $newConstraints['max'] ?? 2 ), |
331 | 'constraintInfo' => [], |
332 | ]; |
333 | |
334 | if ( $res['min'] > $res['max'] ) { |
335 | $res['max'] = $res['min']; |
336 | $env->log( |
337 | 'info/html2wt', |
338 | 'Incompatible constraints (merge):', |
339 | $res, |
340 | self::loggableConstraints( $oldConstraints ), |
341 | self::loggableConstraints( $newConstraints ) |
342 | ); |
343 | } |
344 | |
345 | return $res; |
346 | } |
347 | |
348 | /** |
349 | * @param Node $node |
350 | * @return string |
351 | */ |
352 | public static function debugOut( Node $node ): string { |
353 | $value = ''; |
354 | if ( $node instanceof Element ) { |
355 | $value = DOMCompat::getOuterHTML( $node ); |
356 | } |
357 | if ( !$value ) { |
358 | $value = $node->nodeValue; |
359 | } |
360 | return mb_substr( PHPUtils::jsonEncode( $value ), 0, 40 ); |
361 | } |
362 | |
363 | /** |
364 | * Figure out separator constraints and merge them with existing constraints |
365 | * in state so that they can be emitted when the next content emits source. |
366 | * |
367 | * @param Node $nodeA |
368 | * @param DOMHandler $sepHandlerA |
369 | * @param Node $nodeB |
370 | * @param DOMHandler $sepHandlerB |
371 | */ |
372 | public function updateSeparatorConstraints( |
373 | Node $nodeA, DOMHandler $sepHandlerA, Node $nodeB, DOMHandler $sepHandlerB |
374 | ): void { |
375 | $state = $this->state; |
376 | |
377 | if ( $nodeB->parentNode === $nodeA ) { |
378 | // parent-child separator, nodeA parent of nodeB |
379 | '@phan-var Element|DocumentFragment $nodeA'; // @var Element|DocumentFragment $nodeA |
380 | $sepType = 'parent-child'; |
381 | $aCons = $sepHandlerA->firstChild( $nodeA, $nodeB, $state ); |
382 | $bCons = $nodeB instanceof Element ? $sepHandlerB->before( $nodeB, $nodeA, $state ) : []; |
383 | } elseif ( $nodeA->parentNode === $nodeB ) { |
384 | // parent-child separator, nodeB parent of nodeA |
385 | '@phan-var Element|DocumentFragment $nodeB'; // @var Element|DocumentFragment $nodeA |
386 | $sepType = 'child-parent'; |
387 | $aCons = $nodeA instanceof Element ? $sepHandlerA->after( $nodeA, $nodeB, $state ) : []; |
388 | $bCons = $sepHandlerB->lastChild( $nodeB, $nodeA, $state ); |
389 | } else { |
390 | // sibling separator |
391 | $sepType = 'sibling'; |
392 | $aCons = $nodeA instanceof Element ? $sepHandlerA->after( $nodeA, $nodeB, $state ) : []; |
393 | $bCons = $nodeB instanceof Element ? $sepHandlerB->before( $nodeB, $nodeA, $state ) : []; |
394 | } |
395 | $nlConstraints = $this->getSepNlConstraints( $nodeA, $aCons, $nodeB, $bCons ); |
396 | |
397 | if ( !empty( $state->sep->constraints ) ) { |
398 | // Merge the constraints |
399 | $state->sep->constraints = self::mergeConstraints( |
400 | $this->env, |
401 | $state->sep->constraints, |
402 | $nlConstraints |
403 | ); |
404 | } else { |
405 | $state->sep->constraints = $nlConstraints; |
406 | } |
407 | |
408 | $this->env->log( |
409 | 'debug/wts/sep', |
410 | function () use ( $sepType, $nodeA, $nodeB, $state ) { |
411 | return 'constraint' . ' | ' . |
412 | $sepType . ' | ' . |
413 | '<' . DOMCompat::nodeName( $nodeA ) . ',' . DOMCompat::nodeName( $nodeB ) . |
414 | '>' . ' | ' . PHPUtils::jsonEncode( $state->sep->constraints ) . ' | ' . |
415 | self::debugOut( $nodeA ) . ' | ' . self::debugOut( $nodeB ); |
416 | } |
417 | ); |
418 | |
419 | $state->sep->constraints['constraintInfo'] = [ |
420 | 'onSOL' => $state->onSOL, |
421 | // force SOL state when separator is built/emitted |
422 | 'forceSOL' => $sepHandlerB->forceSOL(), |
423 | 'sepType' => $sepType, |
424 | 'nodeA' => $nodeA, |
425 | 'nodeB' => $nodeB, |
426 | ]; |
427 | } |
428 | |
429 | /** |
430 | * @param Env $env |
431 | * @param SerializerState $state |
432 | */ |
433 | public function __construct( Env $env, SerializerState $state ) { |
434 | $this->env = $env; |
435 | $this->state = $state; |
436 | } |
437 | |
438 | /** |
439 | * @param string $sep |
440 | * @param array $nlConstraints |
441 | * @return string |
442 | */ |
443 | private function makeSepIndentPreSafe( |
444 | string $sep, array $nlConstraints |
445 | ): string { |
446 | $state = $this->state; |
447 | $constraintInfo = $nlConstraints['constraintInfo'] ?? []; |
448 | $sepType = $constraintInfo['sepType'] ?? null; |
449 | $nodeA = $constraintInfo['nodeA'] ?? null; |
450 | $nodeB = $constraintInfo['nodeB'] ?? null; |
451 | $forceSOL = ( $constraintInfo['forceSOL'] ?? false ) && $sepType !== 'child-parent'; |
452 | $origNodeB = $nodeB; |
453 | |
454 | // Ex: "<div>foo</div>\n <span>bar</span>" |
455 | // |
456 | // We also should test for onSOL state to deal with HTML like |
457 | // <ul> <li>foo</li></ul> |
458 | // and strip the leading space before non-indent-pre-safe tags |
459 | if ( |
460 | !$state->inPHPBlock && |
461 | !$state->inIndentPre && |
462 | preg_match( self::INDENT_PRE_WS_IN_SEP_REGEXP, $sep ) && ( |
463 | str_contains( $sep, "\n" ) || !empty( $constraintInfo['onSOL'] ) || $forceSOL |
464 | ) |
465 | ) { |
466 | // 'sep' is the separator before 'nodeB' and it has leading spaces on a newline. |
467 | // We have to decide whether that leading space will trigger indent-pres in wikitext. |
468 | // The decision depends on where this separator will be emitted relative |
469 | // to 'nodeA' and 'nodeB'. |
470 | |
471 | $isIndentPreSafe = false; |
472 | |
473 | // Example sepType scenarios: |
474 | // |
475 | // 1. sibling |
476 | // <div>foo</div> |
477 | // <span>bar</span> |
478 | // The span will be wrapped in an indent-pre if the leading space |
479 | // is not stripped since span is not a block tag |
480 | // |
481 | // 2. child-parent |
482 | // <span>foo |
483 | // </span>bar |
484 | // The " </span>bar" will be wrapped in an indent-pre if the |
485 | // leading space is not stripped since span is not a block tag |
486 | // |
487 | // 3. parent-child |
488 | // <div>foo |
489 | // <span>bar</span> |
490 | // </div> |
491 | // |
492 | // In all cases, only block-tags prevent indent-pres. |
493 | // (except for a special case for <br> nodes) |
494 | if ( $nodeB && WTSUtils::precedingSpaceSuppressesIndentPre( $nodeB, $origNodeB ) ) { |
495 | $isIndentPreSafe = true; |
496 | } elseif ( $sepType === 'sibling' || $nodeA && DOMUtils::atTheTop( $nodeA ) ) { |
497 | Assert::invariant( !DOMUtils::atTheTop( $nodeA ) || $sepType === 'parent-child', __METHOD__ ); |
498 | |
499 | // 'nodeB' is the first non-separator child of 'nodeA'. |
500 | // |
501 | // Walk past sol-transparent nodes in the right-sibling chain |
502 | // of 'nodeB' till we establish indent-pre safety. |
503 | while ( $nodeB && |
504 | ( DOMUtils::isDiffMarker( $nodeB ) || WTUtils::emitsSolTransparentSingleLineWT( $nodeB ) ) |
505 | ) { |
506 | $nodeB = $nodeB->nextSibling; |
507 | } |
508 | |
509 | $isIndentPreSafe = !$nodeB || WTSUtils::precedingSpaceSuppressesIndentPre( $nodeB, $origNodeB ); |
510 | } |
511 | |
512 | // Check whether nodeB is nested inside an element that suppresses |
513 | // indent-pres. |
514 | if ( $nodeB && !$isIndentPreSafe && !DOMUtils::atTheTop( $nodeB ) ) { |
515 | $parentB = $nodeB->parentNode; // could be nodeA |
516 | while ( WTUtils::isZeroWidthWikitextElt( $parentB ) ) { |
517 | $parentB = $parentB->parentNode; |
518 | } |
519 | |
520 | // The token stream paragraph wrapper (and legacy doBlockLevels) |
521 | // tracks this separately with $inBlockquote |
522 | $isIndentPreSafe = DOMUtils::hasNameOrHasAncestorOfName( |
523 | $parentB, 'blockquote' |
524 | ); |
525 | |
526 | // First scope wins |
527 | while ( !$isIndentPreSafe && !DOMUtils::atTheTop( $parentB ) ) { |
528 | if ( |
529 | TokenUtils::tagOpensBlockScope( DOMCompat::nodeName( $parentB ) ) && |
530 | // Only html p-tag is indent pre suppressing |
531 | ( DOMCompat::nodeName( $parentB ) !== 'p' || WTUtils::isLiteralHTMLNode( $parentB ) ) |
532 | ) { |
533 | $isIndentPreSafe = true; |
534 | break; |
535 | } elseif ( TokenUtils::tagClosesBlockScope( DOMCompat::nodeName( $parentB ) ) ) { |
536 | break; |
537 | } |
538 | $parentB = $parentB->parentNode; |
539 | } |
540 | } |
541 | |
542 | $stripLeadingSpace = ( !empty( $constraintInfo['onSOL'] ) || $forceSOL ) && |
543 | $nodeB && !WTUtils::isLiteralHTMLNode( $nodeB ) && |
544 | isset( Consts::$HTMLTagsRequiringSOLContext[DOMCompat::nodeName( $nodeB )] ); |
545 | if ( !$isIndentPreSafe || $stripLeadingSpace ) { |
546 | // Wrap non-nl ws from last line, but preserve comments. |
547 | // This avoids triggering indent-pres. |
548 | $sep = preg_replace_callback( |
549 | self::INDENT_PRE_WS_IN_SEP_REGEXP, |
550 | static function ( $matches ) use ( $stripLeadingSpace, $state ) { |
551 | if ( !$stripLeadingSpace ) { |
552 | // Since we nowiki-ed, we are no longer in sol state |
553 | $state->onSOL = false; |
554 | $state->hasIndentPreNowikis = true; |
555 | $space = '<nowiki>' . $matches[2] . '</nowiki>'; |
556 | } |
557 | return ( $matches[1] ?? '' ) . ( $space ?? '' ) . ( $matches[3] ?? '' ); |
558 | }, |
559 | $sep |
560 | ); |
561 | } |
562 | } |
563 | |
564 | $state->getEnv()->log( |
565 | 'debug/wts/sep', |
566 | 'ipre-safe |', |
567 | static function () use ( $sep, $nlConstraints ) { |
568 | $constraints = Utils::clone( $nlConstraints, true, true ); |
569 | unset( $constraints['constraintInfo'] ); |
570 | return PHPUtils::jsonEncode( $sep ) . ', ' . PHPUtils::jsonEncode( $constraints ); |
571 | } |
572 | ); |
573 | |
574 | return $sep; |
575 | } |
576 | |
577 | /** |
578 | * Serializing auto inserted content should invalidate the original separator |
579 | * @param Element $node |
580 | * @return DomSourceRange|null |
581 | */ |
582 | private static function handleAutoInserted( Element $node ): ?DomSourceRange { |
583 | $dp = DOMDataUtils::getDataParsoid( $node ); |
584 | if ( !isset( $dp->dsr ) ) { |
585 | return null; |
586 | } |
587 | |
588 | $dsr = clone $dp->dsr; |
589 | if ( !empty( $dp->autoInsertedStart ) ) { |
590 | $dsr->openWidth = null; |
591 | } |
592 | if ( !empty( $dp->autoInsertedEnd ) ) { |
593 | $dsr->closeWidth = null; |
594 | } |
595 | return $dsr; |
596 | } |
597 | |
598 | /** |
599 | * $node is embedded inside a parent node that has its leading/trailing whitespace trimmed |
600 | * in the wt->html direction. In this method, we attempt to recover leading trimmed whitespace |
601 | * using DSR information on $node. |
602 | * |
603 | * In some cases, $node might have an additional "data-mw-selser-wrapper" span |
604 | * that is added by SelSer - look past those wrappers. |
605 | * |
606 | * The recovery is attempted in two different ways: |
607 | * 1. If we have additional DSR fields about leading/trailing WS |
608 | * (represented by $state->haveTrimmedWsDSR), that info is used. |
609 | * 2. If not, we simply inspect source at $dsr->innerStart and if it |
610 | * happens to be whitespace, we use that. |
611 | * |
612 | * @param Node $node |
613 | * @return ?string |
614 | */ |
615 | private function fetchLeadingTrimmedSpace( Node $node ): ?string { |
616 | $origNode = $node; |
617 | $parentNode = $node->parentNode; |
618 | |
619 | // Skip past the artificial span wrapper |
620 | if ( $parentNode instanceof Element && $parentNode->hasAttribute( 'data-mw-selser-wrapper' ) ) { |
621 | $node = $parentNode; |
622 | $parentNode = $parentNode->parentNode; |
623 | } |
624 | |
625 | // Leading trimmed whitespace only makes sense for first child. |
626 | // Ignore comments (which are part of separators) + deletion markers. |
627 | if ( DOMUtils::previousNonSepSibling( $node ) ) { |
628 | return null; |
629 | } |
630 | |
631 | '@phan-var Element|DocumentFragment $parentNode'; // @var Element|DocumentFragment $parentNode |
632 | if ( isset( Consts::$WikitextTagsWithTrimmableWS[DOMCompat::nodeName( $parentNode )] ) && |
633 | ( $origNode instanceof Element || !preg_match( '/^[ \t]/', $origNode->nodeValue ) ) |
634 | ) { |
635 | // Don't reintroduce whitespace that's already been captured as a DisplaySpace |
636 | if ( DOMUtils::hasTypeOf( $origNode, 'mw:DisplaySpace' ) ) { |
637 | return null; |
638 | } |
639 | |
640 | // FIXME: Is this complexity worth some minor dirty diff on this test? |
641 | // ParserTest: "3. List embedded in a formatting tag in a misnested way" |
642 | // I've not added an equivalent check in the trailing whitespace case. |
643 | if ( $origNode instanceof Element && |
644 | isset( DOMDataUtils::getDataParsoid( $origNode )->autoInsertedStart ) && |
645 | strspn( $origNode->firstChild->textContent ?? '', " \t" ) >= 1 |
646 | ) { |
647 | return null; |
648 | } |
649 | |
650 | $state = $this->state; |
651 | $dsr = DOMDataUtils::getDataParsoid( $parentNode )->dsr ?? null; |
652 | if ( Utils::isValidDSR( $dsr, true ) ) { |
653 | if ( $state->haveTrimmedWsDSR && ( |
654 | $dsr->leadingWS > 0 || ( $dsr->leadingWS === 0 && $dsr->trailingWS > 0 ) |
655 | ) ) { |
656 | $sep = $state->getOrigSrc( $dsr->innerStart(), $dsr->innerStart() + $dsr->leadingWS ) ?? ''; |
657 | return strspn( $sep, " \t" ) === strlen( $sep ) ? $sep : null; |
658 | } else { |
659 | $offset = $dsr->innerStart(); |
660 | if ( $offset < $dsr->innerEnd() ) { |
661 | $sep = $state->getOrigSrc( $offset, $offset + 1 ) ?? ''; |
662 | return preg_match( '/[ \t]/', $sep ) ? $sep : null; |
663 | } |
664 | } |
665 | } |
666 | } |
667 | |
668 | return null; |
669 | } |
670 | |
671 | /** |
672 | * $node is embedded inside a parent node that has its leading/trailing whitespace trimmed |
673 | * in the wt->html direction. In this method, we attempt to recover trailing trimmed whitespace |
674 | * using DSR information on $node. |
675 | * |
676 | * In some cases, $node might have an additional "data-mw-selser-wrapper" span |
677 | * that is added by SelSer - look past those wrappers. |
678 | * |
679 | * The recovery is attempted in two different ways: |
680 | * 1. If we have additional DSR fields about leading/trailing WS |
681 | * (represented by $state->haveTrimmedWsDSR), that info is used. |
682 | * 2. If not, we simply inspect source at $dsr->innerEnd and if it |
683 | * happens to be whitespace, we use that. |
684 | * |
685 | * @param Node $node |
686 | * @return ?string |
687 | */ |
688 | private function fetchTrailingTrimmedSpace( Node $node ): ?string { |
689 | $origNode = $node; |
690 | $parentNode = $node->parentNode; |
691 | |
692 | // Skip past the artificial span wrapper |
693 | if ( $parentNode instanceof Element && $parentNode->hasAttribute( 'data-mw-selser-wrapper' ) ) { |
694 | $node = $parentNode; |
695 | $parentNode = $parentNode->parentNode; |
696 | } |
697 | |
698 | // Trailing trimmed whitespace only makes sense for last child. |
699 | // Ignore comments (which are part of separators) + deletion markers. |
700 | if ( DOMUtils::nextNonSepSibling( $node ) ) { |
701 | return null; |
702 | } |
703 | |
704 | $sep = null; |
705 | '@phan-var Element|DocumentFragment $parentNode'; // @var Element|DocumentFragment $parentNode |
706 | if ( isset( Consts::$WikitextTagsWithTrimmableWS[DOMCompat::nodeName( $parentNode )] ) && |
707 | ( $origNode instanceof Element || !preg_match( '/[ \t]$/', $origNode->nodeValue ) ) |
708 | ) { |
709 | // Don't reintroduce whitespace that's already been captured as a DisplaySpace |
710 | if ( DOMUtils::hasTypeOf( $origNode, 'mw:DisplaySpace' ) ) { |
711 | return null; |
712 | } |
713 | |
714 | $state = $this->state; |
715 | $dsr = DOMDataUtils::getDataParsoid( $parentNode )->dsr ?? null; |
716 | if ( Utils::isValidDSR( $dsr, true ) ) { |
717 | if ( $state->haveTrimmedWsDSR && ( |
718 | $dsr->trailingWS > 0 || ( $dsr->trailingWS === 0 && $dsr->leadingWS > 0 ) |
719 | ) ) { |
720 | $sep = $state->getOrigSrc( $dsr->innerEnd() - $dsr->trailingWS, $dsr->innerEnd() ) ?? ''; |
721 | if ( !preg_match( '/^[ \t]*$/', $sep ) ) { |
722 | $sep = null; |
723 | } |
724 | } else { |
725 | $offset = $dsr->innerEnd() - 1; |
726 | // The > instead of >= is to deal with an edge case |
727 | // = = where that single space is captured by the |
728 | // getLeadingSpace case above |
729 | if ( $offset > $dsr->innerStart() ) { |
730 | $sep = $state->getOrigSrc( $offset, $offset + 1 ) ?? ''; |
731 | if ( !preg_match( '/[ \t]/', $sep ) ) { |
732 | $sep = null; |
733 | } |
734 | } |
735 | } |
736 | } |
737 | } |
738 | |
739 | return $sep; |
740 | } |
741 | |
742 | /** |
743 | * Emit a separator based on the collected (and merged) constraints |
744 | * and existing separator text. Called when new output is triggered. |
745 | * @param Node $node |
746 | * @param bool $leading |
747 | * if true, trimmed leading whitespace is emitted |
748 | * if false, trimmed railing whitespace is emitted |
749 | * @return string|null |
750 | */ |
751 | public function recoverTrimmedWhitespace( Node $node, bool $leading ): ?string { |
752 | // Deal with scenarios where leading / trailing whitespace were trimmed. |
753 | // We now need to figure out if we need to add any leading / trailing WS back. |
754 | if ( $this->state->useWhitespaceHeuristics && $this->state->selserMode ) { |
755 | if ( $leading ) { |
756 | return $this->fetchLeadingTrimmedSpace( $node ); |
757 | } else { |
758 | $lastChild = DOMUtils::lastNonDeletedChild( $node ); |
759 | return $lastChild ? $this->fetchTrailingTrimmedSpace( $lastChild ) : null; |
760 | } |
761 | } |
762 | |
763 | return null; |
764 | } |
765 | |
766 | /** |
767 | * Emit a separator based on the collected (and merged) constraints |
768 | * and existing separator text. Called when new output is triggered. |
769 | * @param Node $node |
770 | * @return string|null |
771 | */ |
772 | public function buildSep( Node $node ): ?string { |
773 | $state = $this->state; |
774 | $sepType = $state->sep->constraints['constraintInfo']['sepType'] ?? null; |
775 | $sep = null; |
776 | $origNode = $node; |
777 | $prevNode = $state->sep->lastSourceNode; |
778 | $dsrA = null; |
779 | $dsrB = null; |
780 | |
781 | /* ---------------------------------------------------------------------- |
782 | * Assuming we have access to the original source, we can use DSR offsets |
783 | * to extract separators from source only if: |
784 | * - we are in selser mode AND |
785 | * - this node is not part of a newly inserted subtree (marked 'modified') |
786 | * for which DSR isn't available |
787 | * - neither node is adjacent to a deleted block node |
788 | * (see the long comment in SerializerState::emitChunk in the middle) |
789 | * |
790 | * In other scenarios, DSR values on "adjacent" nodes in the edited DOM |
791 | * may not reflect deleted content between them. |
792 | * ---------------------------------------------------------------------- */ |
793 | $origSepNeeded = $node !== $prevNode && $state->selserMode; |
794 | $origSepNeededAndUsable = |
795 | $origSepNeeded && !$state->inModifiedContent && |
796 | !WTSUtils::nextToDeletedBlockNodeInWT( $prevNode, true ) && |
797 | !WTSUtils::nextToDeletedBlockNodeInWT( $node, false ) && |
798 | WTSUtils::origSrcValidInEditedContext( $state, $prevNode ) && |
799 | WTSUtils::origSrcValidInEditedContext( $state, $node ); |
800 | |
801 | if ( $origSepNeededAndUsable ) { |
802 | if ( $prevNode instanceof Element ) { |
803 | $dsrA = self::handleAutoInserted( $prevNode ); |
804 | } elseif ( !( $prevNode instanceof DocumentFragment ) ) { |
805 | // Check if $prevNode is the last child of a zero-width element, |
806 | // and use that for dsr purposes instead. Typical case: text in p. |
807 | if ( |
808 | !$prevNode->nextSibling && |
809 | $prevNode->parentNode !== $node && |
810 | $prevNode->parentNode instanceof Element && |
811 | ( DOMDataUtils::getDataParsoid( $prevNode->parentNode )->dsr->closeWidth ?? null ) === 0 |
812 | ) { |
813 | $dsrA = self::handleAutoInserted( $prevNode->parentNode ); |
814 | } elseif ( |
815 | // Can we extrapolate DSR from $prevNode->previousSibling? |
816 | // Yes, if $prevNode->parentNode didn't have its children edited. |
817 | $prevNode->previousSibling instanceof Element && |
818 | !DiffUtils::directChildrenChanged( $prevNode->parentNode, $this->env ) |
819 | ) { |
820 | $endDsr = DOMDataUtils::getDataParsoid( $prevNode->previousSibling )->dsr->end ?? null; |
821 | $correction = null; |
822 | if ( is_int( $endDsr ) ) { |
823 | if ( $prevNode instanceof Comment ) { |
824 | '@phan-var Comment $prevNode'; // @var Comment $prevNode |
825 | $correction = WTUtils::decodedCommentLength( $prevNode ); |
826 | } else { |
827 | $correction = strlen( $prevNode->nodeValue ); |
828 | } |
829 | $dsrA = new DomSourceRange( |
830 | $endDsr, |
831 | $endDsr + $correction + WTUtils::indentPreDSRCorrection( $prevNode ), |
832 | 0, |
833 | 0 |
834 | ); |
835 | } |
836 | } |
837 | } |
838 | |
839 | if ( !$dsrA ) { |
840 | // nothing to do -- no reason to compute dsrB if dsrA is null |
841 | } elseif ( $node instanceof Element ) { |
842 | // $node is parent of $prevNode |
843 | if ( $prevNode->parentNode === $node ) { |
844 | '@phan-var Element|DocumentFragment $node'; // @var Element|DocumentFragment $node |
845 | // FIXME: Maybe we shouldn't set dsr in the dsr pass if both aren't valid? |
846 | // |
847 | // When we are in the lastChild sep scenario and the parent doesn't have |
848 | // useable dsr, if possible, walk up the ancestor nodes till we find |
849 | // a dsr-bearing node |
850 | // |
851 | // This fix is needed to handle trailing newlines in this wikitext: |
852 | // [[File:foo.jpg|thumb|300px|foo\n{{1x|A}}\n{{1x|B}}\n{{1x|C}}\n\n]] |
853 | while ( |
854 | !$node->nextSibling && |
855 | !DOMUtils::atTheTop( $node ) && |
856 | ( |
857 | empty( DOMDataUtils::getDataParsoid( $node )->dsr ) || |
858 | DOMDataUtils::getDataParsoid( $node )->dsr->start === null || |
859 | DOMDataUtils::getDataParsoid( $node )->dsr->end === null |
860 | ) |
861 | ) { |
862 | $node = $node->parentNode; |
863 | } |
864 | } |
865 | |
866 | // The top node could be a document fragment |
867 | $dsrB = $node instanceof Element ? self::handleAutoInserted( $node ) : null; |
868 | } elseif ( !( $node instanceof DocumentFragment ) ) { |
869 | // $node is text/comment. Can we extrapolate DSR from $node->parentNode? |
870 | // Yes, if this is the child of a zero-width element and |
871 | // is only preceded by separator elements. |
872 | // |
873 | // 1. text in p. |
874 | // 2. ws-only child of a node with auto-inserted start tag |
875 | // Ex: "<span> <s>x</span> </s>" --> <span> <s>x</s*></span><s*> </s> |
876 | // 3. ws-only children of a node with auto-inserted start tag |
877 | // Ex: "{|\n|-\n <!--foo--> \n|}" |
878 | $nodeParent = $node->parentNode; |
879 | // phpcs:ignore Generic.Files.LineLength.TooLong |
880 | '@phan-var Element|DocumentFragment $nodeParent'; // @var Element|DocumentFragment $nodeParent |
881 | |
882 | if ( |
883 | $nodeParent !== $prevNode && |
884 | $nodeParent instanceof Element && |
885 | ( DOMDataUtils::getDataParsoid( $nodeParent )->dsr->openWidth ?? null ) === 0 |
886 | ) { |
887 | $sepLen = self::precedingSeparatorTextLen( $node ); |
888 | if ( $sepLen !== null ) { |
889 | $dsrB = DOMDataUtils::getDataParsoid( $nodeParent )->dsr; |
890 | if ( is_int( $dsrB->start ) && $sepLen > 0 ) { |
891 | $dsrB = clone $dsrB; |
892 | $dsrB->start += $sepLen; |
893 | } |
894 | } |
895 | } |
896 | } |
897 | |
898 | // FIXME: Maybe we shouldn't set dsr in the dsr pass if both aren't valid? |
899 | if ( Utils::isValidDSR( $dsrA ) && Utils::isValidDSR( $dsrB ) ) { |
900 | // Figure out containment relationship |
901 | if ( $dsrA->start <= $dsrB->start ) { |
902 | if ( $dsrB->end <= $dsrA->end ) { |
903 | if ( $dsrA->start === $dsrB->start && $dsrA->end === $dsrB->end ) { |
904 | // Both have the same dsr range, so there can't be any |
905 | // separators between them |
906 | $sep = ''; |
907 | } elseif ( ( $dsrA->openWidth ?? null ) !== null ) { |
908 | // B in A, from parent to child |
909 | $sep = $state->getOrigSrc( $dsrA->innerStart(), $dsrB->start ); |
910 | } |
911 | } elseif ( $dsrA->end <= $dsrB->start ) { |
912 | // B following A (siblingish) |
913 | $sep = $state->getOrigSrc( $dsrA->end, $dsrB->start ); |
914 | } elseif ( ( $dsrB->closeWidth ?? null ) !== null ) { |
915 | // A in B, from child to parent |
916 | $sep = $state->getOrigSrc( $dsrA->end, $dsrB->innerEnd() ); |
917 | } |
918 | } elseif ( $dsrA->end <= $dsrB->end ) { |
919 | if ( ( $dsrB->closeWidth ?? null ) !== null ) { |
920 | // A in B, from child to parent |
921 | $sep = $state->getOrigSrc( $dsrA->end, $dsrB->innerEnd() ); |
922 | } |
923 | } else { |
924 | $this->env->log( 'info/html2wt', 'dsr backwards: should not happen!' ); |
925 | } |
926 | |
927 | // Reset if $sep is invalid |
928 | if ( $sep && !WTSUtils::isValidSep( $sep ) ) { |
929 | $sep = null; |
930 | } |
931 | } |
932 | } elseif ( $origSepNeeded && !DiffUtils::hasDiffMarkers( $prevNode, $this->env ) ) { |
933 | // Given the following conditions: |
934 | // - $prevNode has no diff markers. (checked above) |
935 | // - $prevNode's next non-sep sibling ($next) was inserted. |
936 | // - $next is an ancestor of $node. |
937 | // - all of those ancestor nodes from $node->$next have zero-width |
938 | // wikitext (otherwise, the separator isn't usable) |
939 | // Try to extract a separator from original source that existed |
940 | // between $prevNode and its original next sibling or its parent |
941 | // (if $prevNode was the last non-sep child). |
942 | // |
943 | // This minimizes dirty-diffs to that separator text from |
944 | // the insertion of $next after $prevNode. |
945 | $next = DOMUtils::nextNonSepSibling( $prevNode ); |
946 | $origSepUsable = $next && DiffUtils::hasInsertedDiffMark( $next, $this->env ); |
947 | |
948 | // Check that $next is an ancestor of $node and all nodes |
949 | // on that path have zero-width wikitext |
950 | if ( $origSepUsable && $node !== $next ) { |
951 | $n = $node->parentNode; |
952 | while ( $n && $next !== $n ) { |
953 | if ( !WTUtils::isZeroWidthWikitextElt( $n ) ) { |
954 | $origSepUsable = false; |
955 | break; |
956 | } |
957 | $n = $n->parentNode; |
958 | } |
959 | $origSepUsable = $origSepUsable && $n !== null; |
960 | } |
961 | |
962 | // Extract separator from original source if possible |
963 | if ( $origSepUsable ) { |
964 | $origNext = DOMUtils::nextNonSepSibling( $next ); |
965 | if ( !$origNext ) { // $prevNode was last non-sep child of its parent |
966 | // We could work harder for text/comments and extrapolate, but skipping that here |
967 | // FIXME: If we had a generic DSR extrapolation utility, that would be useful |
968 | $o1 = $prevNode instanceof Element ? |
969 | DOMDataUtils::getDataParsoid( $prevNode )->dsr->end ?? null : null; |
970 | if ( $o1 !== null ) { |
971 | $dsr2 = DOMDataUtils::getDataParsoid( $prevNode->parentNode )->dsr ?? null; |
972 | $o2 = $dsr2 ? $dsr2->innerEnd() : null; |
973 | $sep = $o2 !== null ? $state->getOrigSrc( $o1, $o2 ) : null; |
974 | } |
975 | } elseif ( !DiffUtils::hasDiffMarkers( $origNext, $this->env ) ) { |
976 | // We could work harder for text/comments and extrapolate, but skipping that here |
977 | // FIXME: If we had a generic DSR extrapolation utility, that would be useful |
978 | $o1 = $prevNode instanceof Element ? |
979 | DOMDataUtils::getDataParsoid( $prevNode )->dsr->end ?? null : null; |
980 | if ( $o1 !== null ) { |
981 | $o2 = $origNext instanceof Element ? |
982 | DOMDataUtils::getDataParsoid( $origNext )->dsr->start ?? null : null; |
983 | $sep = $o2 !== null ? $state->getOrigSrc( $o1, $o2 ) : null; |
984 | } |
985 | } |
986 | |
987 | if ( $sep !== null ) { |
988 | // Since this is an inserted node, we might have to augment this |
989 | // with newline constraints and so, we just set this recovered sep |
990 | // to the buffered sep in state->sep->src |
991 | $state->sep->src = $sep; |
992 | $sep = null; |
993 | } |
994 | } |
995 | } |
996 | |
997 | // If all efforts failed, use special-purpose heuristics to recover |
998 | // trimmed leading / trailing whitespace from lists, headings, table-cells |
999 | if ( $sep === null ) { |
1000 | if ( $sepType === 'parent-child' ) { |
1001 | $sep = $this->recoverTrimmedWhitespace( $node, true ); |
1002 | if ( $sep !== null ) { |
1003 | $state->sep->src = $sep . $state->sep->src; |
1004 | } |
1005 | } elseif ( $sepType === 'child-parent' ) { |
1006 | $sep = $this->recoverTrimmedWhitespace( $node, false ); |
1007 | if ( $sep !== null ) { |
1008 | $state->sep->src .= $sep; |
1009 | } |
1010 | } |
1011 | } |
1012 | |
1013 | $this->env->log( |
1014 | 'debug/wts/sep', |
1015 | static function () use ( $prevNode, $origNode, $sep, $state ) { |
1016 | return 'maybe-sep | ' . |
1017 | 'prev:' . ( $prevNode ? DOMCompat::nodeName( $prevNode ) : '--none--' ) . |
1018 | ', node:' . DOMCompat::nodeName( $origNode ) . |
1019 | ', sep: ' . PHPUtils::jsonEncode( $sep ) . |
1020 | ', state.sep.src: ' . PHPUtils::jsonEncode( $state->sep->src ?? null ); |
1021 | } |
1022 | ); |
1023 | |
1024 | // If the separator is being emitted before a node that emits sol-transparent WT, |
1025 | // go through makeSeparator to verify indent-pre constraints are met. |
1026 | $sepConstraints = $state->sep->constraints ?? [ 'max' => 0 ]; |
1027 | if ( $sep === null || ( $state->sep->src && $state->sep->src !== $sep ) ) { |
1028 | if ( !empty( $state->sep->constraints ) || !empty( $state->sep->src ) ) { |
1029 | // TODO: set modified flag if start or end node (but not both) are |
1030 | // modified / new so that the selser can use the separator |
1031 | $sep = $this->makeSeparator( $node, $state->sep->src ?? '', $sepConstraints ); |
1032 | } else { |
1033 | $sep = null; |
1034 | } |
1035 | } |
1036 | |
1037 | if ( $sep !== null ) { |
1038 | $sep = self::makeSepIndentPreSafe( $sep, $sepConstraints ); |
1039 | } |
1040 | return $sep; |
1041 | } |
1042 | } |