Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
1.33% |
6 / 450 |
|
7.14% |
1 / 14 |
CRAP | |
0.00% |
0 / 1 |
Separators | |
1.33% |
6 / 450 |
|
7.14% |
1 / 14 |
43382.10 | |
0.00% |
0 / 1 |
loggableConstraints | |
0.00% |
0 / 15 |
|
0.00% |
0 / 1 |
6 | |||
precedingSeparatorTextLen | |
0.00% |
0 / 11 |
|
0.00% |
0 / 1 |
30 | |||
getSepNlConstraints | |
0.00% |
0 / 35 |
|
0.00% |
0 / 1 |
72 | |||
makeSeparator | |
0.00% |
0 / 64 |
|
0.00% |
0 / 1 |
702 | |||
mergeConstraints | |
0.00% |
0 / 15 |
|
0.00% |
0 / 1 |
6 | |||
debugOut | |
0.00% |
0 / 6 |
|
0.00% |
0 / 1 |
12 | |||
updateSeparatorConstraints | |
0.00% |
0 / 37 |
|
0.00% |
0 / 1 |
72 | |||
__construct | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
2 | |||
makeSepIndentPreSafe | |
0.00% |
0 / 60 |
|
0.00% |
0 / 1 |
1260 | |||
handleAutoInserted | |
0.00% |
0 / 9 |
|
0.00% |
0 / 1 |
20 | |||
fetchLeadingTrimmedSpace | |
0.00% |
0 / 31 |
|
0.00% |
0 / 1 |
342 | |||
fetchTrailingTrimmedSpace | |
0.00% |
0 / 27 |
|
0.00% |
0 / 1 |
240 | |||
recoverTrimmedWhitespace | |
100.00% |
6 / 6 |
|
100.00% |
1 / 1 |
5 | |||
buildSep | |
0.00% |
0 / 132 |
|
0.00% |
0 / 1 |
6480 |
1 | <?php |
2 | |
3 | declare( strict_types = 1 ); |
4 | |
5 | namespace Wikimedia\Parsoid\Html2Wt; |
6 | |
7 | use Wikimedia\Assert\Assert; |
8 | use Wikimedia\Parsoid\Config\Env; |
9 | use Wikimedia\Parsoid\Core\DomSourceRange; |
10 | use Wikimedia\Parsoid\DOM\Comment; |
11 | use Wikimedia\Parsoid\DOM\DocumentFragment; |
12 | use Wikimedia\Parsoid\DOM\Element; |
13 | use Wikimedia\Parsoid\DOM\Node; |
14 | use Wikimedia\Parsoid\Html2Wt\DOMHandlers\DOMHandler; |
15 | use Wikimedia\Parsoid\Utils\DiffDOMUtils; |
16 | use Wikimedia\Parsoid\Utils\DOMCompat; |
17 | use Wikimedia\Parsoid\Utils\DOMDataUtils; |
18 | use Wikimedia\Parsoid\Utils\DOMUtils; |
19 | use Wikimedia\Parsoid\Utils\PHPUtils; |
20 | use Wikimedia\Parsoid\Utils\TokenUtils; |
21 | use Wikimedia\Parsoid\Utils\Utils; |
22 | use Wikimedia\Parsoid\Utils\WTUtils; |
23 | use Wikimedia\Parsoid\Wikitext\Consts; |
24 | |
25 | class Separators { |
26 | /* |
27 | * This regexp looks for leading whitespace on the last line of a separator string. |
28 | * So, only comments (single or multi-line) or other newlines can precede that |
29 | * whitespace-of-interest. But, also account for any whitespace preceding newlines |
30 | * since that needs to be skipped over (Ex: " \n "). |
31 | */ |
32 | private const INDENT_PRE_WS_IN_SEP_REGEXP = |
33 | '/^((?: *\n|(?:' . Utils::COMMENT_REGEXP_FRAGMENT . '))*)( +)([^\n]*)$/D'; |
34 | |
35 | /** |
36 | * @var SerializerState |
37 | */ |
38 | private $state; |
39 | |
40 | /** |
41 | * @var Env |
42 | */ |
43 | private $env; |
44 | |
45 | /** |
46 | * Clean up the constraints object to prevent excessively verbose output |
47 | * and clog up log files / test runs. |
48 | * |
49 | * @param array $constraints |
50 | * @return array |
51 | */ |
52 | private static function loggableConstraints( array $constraints ): array { |
53 | $c = [ |
54 | 'a' => $constraints['a'] ?? null, |
55 | 'b' => $constraints['b'] ?? null, |
56 | 'min' => $constraints['min'] ?? null, |
57 | 'max' => $constraints['max'] ?? null, |
58 | ]; |
59 | if ( !empty( $constraints['constraintInfo'] ) ) { |
60 | $constraintInfo = $constraints['constraintInfo']; |
61 | $c['constraintInfo'] = [ |
62 | 'onSOL' => $constraintInfo['onSOL'] ?? false, |
63 | 'sepType' => $constraintInfo['sepType'] ?? null, |
64 | 'nodeA' => DOMCompat::nodeName( $constraintInfo['nodeA'] ), |
65 | 'nodeB' => DOMCompat::nodeName( $constraintInfo['nodeB'] ), |
66 | ]; |
67 | } |
68 | return $c; |
69 | } |
70 | |
71 | private static function precedingSeparatorTextLen( Node $n ): ?int { |
72 | // Given the CSS white-space property and specifically, |
73 | // "pre" and "pre-line" values for this property, it seems that any |
74 | // sensible HTML editor would have to preserve IEW in HTML documents |
75 | // to preserve rendering. One use-case where an editor might change |
76 | // IEW drastically would be when the user explicitly requests it |
77 | // (Ex: pretty-printing of raw source code). |
78 | // |
79 | // For now, we are going to exploit this. This information is |
80 | // only used to extrapolate DSR values and extract a separator |
81 | // string from source, and is only used locally. In addition, |
82 | // the extracted text is verified for being a valid separator. |
83 | // |
84 | // So, at worst, this can create a local dirty diff around separators |
85 | // and at best, it gets us a clean diff. |
86 | |
87 | $len = 0; |
88 | $orig = $n; |
89 | while ( $n ) { |
90 | if ( DOMUtils::isIEW( $n ) ) { |
91 | $len += strlen( $n->nodeValue ); |
92 | } elseif ( $n instanceof Comment ) { |
93 | $len += WTUtils::decodedCommentLength( $n ); |
94 | } elseif ( $n !== $orig ) { // dont return if input node! |
95 | return null; |
96 | } |
97 | |
98 | $n = $n->previousSibling; |
99 | } |
100 | |
101 | return $len; |
102 | } |
103 | |
104 | /** |
105 | * Helper for updateSeparatorConstraints. |
106 | * |
107 | * Collects, checks and integrates separator newline requirements to a simple |
108 | * min, max structure. |
109 | * |
110 | * @param Node $nodeA |
111 | * @param array $aCons |
112 | * @param Node $nodeB |
113 | * @param array $bCons |
114 | * @return array |
115 | */ |
116 | private function getSepNlConstraints( |
117 | Node $nodeA, array $aCons, Node $nodeB, array $bCons |
118 | ): array { |
119 | $env = $this->state->getEnv(); |
120 | |
121 | $nlConstraints = [ |
122 | 'min' => $aCons['min'] ?? null, |
123 | 'max' => $aCons['max'] ?? null, |
124 | 'constraintInfo' => [], |
125 | ]; |
126 | |
127 | if ( isset( $bCons['min'] ) ) { |
128 | if ( $nlConstraints['max'] !== null && $nlConstraints['max'] < $bCons['min'] ) { |
129 | // Conflict, warn and let nodeB win. |
130 | $env->log( |
131 | 'info/html2wt', |
132 | 'Incompatible constraints 1:', |
133 | DOMCompat::nodeName( $nodeA ), |
134 | DOMCompat::nodeName( $nodeB ), |
135 | self::loggableConstraints( $nlConstraints ) |
136 | ); |
137 | $nlConstraints['min'] = $bCons['min']; |
138 | $nlConstraints['max'] = $bCons['min']; |
139 | } else { |
140 | $nlConstraints['min'] = max( $nlConstraints['min'] ?? 0, $bCons['min'] ); |
141 | } |
142 | } |
143 | |
144 | if ( isset( $bCons['max'] ) ) { |
145 | if ( ( $nlConstraints['min'] ?? 0 ) > $bCons['max'] ) { |
146 | // Conflict, warn and let nodeB win. |
147 | $env->log( |
148 | 'info/html2wt', |
149 | 'Incompatible constraints 2:', |
150 | DOMCompat::nodeName( $nodeA ), |
151 | DOMCompat::nodeName( $nodeB ), |
152 | self::loggableConstraints( $nlConstraints ) |
153 | ); |
154 | $nlConstraints['min'] = $bCons['max']; |
155 | $nlConstraints['max'] = $bCons['max']; |
156 | } else { |
157 | $nlConstraints['max'] = min( $nlConstraints['max'] ?? $bCons['max'], $bCons['max'] ); |
158 | } |
159 | } |
160 | |
161 | if ( $nlConstraints['max'] === null ) { |
162 | // Anything more than two lines will trigger paragraphs, so default to |
163 | // two if nothing is specified. (FIXME: This is a conservative strategy |
164 | // since strictly speaking, this is not always true. This is more a |
165 | // cautious fallback to handle cases where some DOM handler is missing |
166 | // a necessary max constraint.) |
167 | $nlConstraints['max'] = 2; |
168 | } |
169 | |
170 | if ( ( $nlConstraints['min'] ?? 0 ) > $nlConstraints['max'] ) { |
171 | $nlConstraints['max'] = $nlConstraints['min']; |
172 | } |
173 | |
174 | return $nlConstraints; |
175 | } |
176 | |
177 | /** |
178 | * Create a separator given a (potentially empty) separator text and newline constraints. |
179 | * |
180 | * @param Node $node |
181 | * @param string $sep |
182 | * @param array $nlConstraints |
183 | * @return string |
184 | */ |
185 | private function makeSeparator( Node $node, string $sep, array $nlConstraints ): string { |
186 | $origSep = $sep; |
187 | $sepType = $nlConstraints['constraintInfo']['sepType'] ?? null; |
188 | |
189 | // Split on comment/ws-only lines, consuming subsequent newlines since |
190 | // those lines are ignored by the PHP parser |
191 | // Ignore lines with ws and a single comment in them |
192 | $splitRe = implode( [ "#(?:\n(?:[ \t]*?", |
193 | Utils::COMMENT_REGEXP_FRAGMENT, |
194 | "[ \t]*?)+(?=\n))+|", |
195 | Utils::COMMENT_REGEXP_FRAGMENT, |
196 | "#" |
197 | ] ); |
198 | $sepNlCount = substr_count( implode( preg_split( $splitRe, $sep ) ), "\n" ); |
199 | $minNls = $nlConstraints['min'] ?? 0; |
200 | |
201 | if ( $this->state->atStartOfOutput && $minNls > 0 ) { |
202 | // Skip first newline as we are in start-of-line context |
203 | $minNls--; |
204 | } |
205 | |
206 | if ( $minNls > 0 && $sepNlCount < $minNls ) { |
207 | // Append newlines |
208 | $nlBuf = []; |
209 | for ( $i = 0; $i < ( $minNls - $sepNlCount ); $i++ ) { |
210 | $nlBuf[] = "\n"; |
211 | } |
212 | |
213 | /* ------------------------------------------------------------------ |
214 | * The following two heuristics try to do a best-guess on where to |
215 | * add the newlines relative to nodeA and nodeB that best matches |
216 | * wikitext output expectations. |
217 | * |
218 | * 1. In a parent-child separator scenario, where the first child of |
219 | * nodeA is not an element, it could have contributed to the separator. |
220 | * In that case, the newlines should be prepended because they |
221 | * usually correspond to the parent's constraints, |
222 | * and the separator was plucked from the child. |
223 | * |
224 | * Try html2wt on this snippet: |
225 | * |
226 | * a<p><!--cmt-->b</p> |
227 | * |
228 | * 2. In a sibling scenario, if nodeB is a literal-HTML element, nodeA is |
229 | * forcing the newline and hence the newline should be emitted right |
230 | * after it. |
231 | * |
232 | * Try html2wt on this snippet: |
233 | * |
234 | * <p>foo</p> <p data-parsoid='{"stx":"html"}'>bar</p> |
235 | * -------------------------------------------------------------------- */ |
236 | $constraintInfo = $nlConstraints['constraintInfo'] ?? []; |
237 | $sepType = $constraintInfo['sepType'] ?? null; |
238 | $nodeA = $constraintInfo['nodeA'] ?? null; |
239 | $nodeB = $constraintInfo['nodeB'] ?? null; |
240 | if ( |
241 | $sepType === 'parent-child' && |
242 | !DiffDOMUtils::isContentNode( DiffDOMUtils::firstNonDeletedChild( $nodeA ) ) && |
243 | !( |
244 | isset( Consts::$HTML['ChildTableTags'][DOMCompat::nodeName( $nodeB )] ) && |
245 | !WTUtils::isLiteralHTMLNode( $nodeB ) |
246 | ) |
247 | ) { |
248 | $sep = implode( $nlBuf ) . $sep; |
249 | } elseif ( $sepType === 'sibling' && WTUtils::isLiteralHTMLNode( $nodeB ) ) { |
250 | $sep = implode( $nlBuf ) . $sep; |
251 | } else { |
252 | $sep .= implode( $nlBuf ); |
253 | } |
254 | } elseif ( isset( $nlConstraints['max'] ) && $sepNlCount > $nlConstraints['max'] && ( |
255 | // In selser mode, if the current node is an unmodified rendering-transparent node |
256 | // of a sibling pair, leave the separator alone since the excess newlines aren't |
257 | // going to change the semantics of how this node will be parsed in wt->html direction. |
258 | // This will instead eliminate a dirty diff on the page. |
259 | !$this->state->selserMode || |
260 | $sepType !== 'sibling' || |
261 | !$this->state->currNodeUnmodified || |
262 | !WTUtils::isRenderingTransparentNode( $node ) |
263 | ) ) { |
264 | // Strip some newlines outside of comments. |
265 | // |
266 | // Capture separators in a single array with a capturing version of |
267 | // the split regexp, so that we can work on the non-separator bits |
268 | // when stripping newlines. |
269 | // |
270 | // Dirty-diff minimizing heuristic: Strip newlines away from an unmodified node. |
271 | // If both nodes are unmodified, this dirties the separator before the current node. |
272 | // If both nodes are modified, this dirties the separator after the previous node. |
273 | $allBits = preg_split( '#(' . PHPUtils::reStrip( $splitRe, '#' ) . ')#', |
274 | $sep, -1, PREG_SPLIT_DELIM_CAPTURE ); |
275 | $newBits = []; |
276 | $n = $sepNlCount - $nlConstraints['max']; |
277 | |
278 | $stripAtEnd = $this->state->prevNodeUnmodified; |
279 | while ( $n > 0 ) { |
280 | $bit = $stripAtEnd ? array_pop( $allBits ) : array_shift( $allBits ); |
281 | while ( $bit && preg_match( $splitRe, $bit ) ) { |
282 | // Retain comment-only lines as is |
283 | $newBits[] = $bit; |
284 | $bit = $stripAtEnd ? array_pop( $allBits ) : array_shift( $allBits ); |
285 | } |
286 | // @phan-suppress-next-line PhanPluginLoopVariableReuse |
287 | while ( $n > 0 && str_contains( $bit, "\n" ) ) { |
288 | $bit = preg_replace( '/\n([^\n]*)/', '$1', $bit, 1 ); |
289 | $n--; |
290 | } |
291 | $newBits[] = $bit; |
292 | } |
293 | if ( $stripAtEnd ) { |
294 | $newBits = array_merge( $allBits, array_reverse( $newBits ) ); |
295 | } else { |
296 | PHPUtils::pushArray( $newBits, $allBits ); |
297 | } |
298 | $sep = implode( $newBits ); |
299 | } |
300 | |
301 | $this->state->getEnv()->log( |
302 | 'debug/wts/sep', |
303 | 'make-new |', |
304 | static function () use ( $nlConstraints, $sepNlCount, $minNls, $sep, $origSep ) { |
305 | $constraints = Utils::clone( $nlConstraints, true, true ); |
306 | unset( $constraints['constraintInfo'] ); |
307 | return PHPUtils::jsonEncode( $sep ) . ', ' . PHPUtils::jsonEncode( $origSep ) . ', ' . |
308 | $minNls . ', ' . $sepNlCount . ', ' . PHPUtils::jsonEncode( $constraints ); |
309 | } |
310 | ); |
311 | |
312 | return $sep; |
313 | } |
314 | |
315 | /** |
316 | * Merge two constraints. |
317 | * @param Env $env |
318 | * @param array $oldConstraints |
319 | * @param array $newConstraints |
320 | * @return array |
321 | */ |
322 | private static function mergeConstraints( |
323 | Env $env, array $oldConstraints, array $newConstraints |
324 | ): array { |
325 | $res = [ |
326 | 'min' => max( $oldConstraints['min'] ?? 0, $newConstraints['min'] ?? 0 ), |
327 | 'max' => min( $oldConstraints['max'] ?? 2, $newConstraints['max'] ?? 2 ), |
328 | 'constraintInfo' => [], |
329 | ]; |
330 | |
331 | if ( $res['min'] > $res['max'] ) { |
332 | $res['max'] = $res['min']; |
333 | $env->log( |
334 | 'info/html2wt', |
335 | 'Incompatible constraints (merge):', |
336 | $res, |
337 | self::loggableConstraints( $oldConstraints ), |
338 | self::loggableConstraints( $newConstraints ) |
339 | ); |
340 | } |
341 | |
342 | return $res; |
343 | } |
344 | |
345 | public static function debugOut( Node $node ): string { |
346 | $value = ''; |
347 | if ( $node instanceof Element ) { |
348 | $value = DOMCompat::getOuterHTML( $node ); |
349 | } |
350 | if ( !$value ) { |
351 | $value = $node->nodeValue; |
352 | } |
353 | return mb_substr( PHPUtils::jsonEncode( $value ), 0, 40 ); |
354 | } |
355 | |
356 | /** |
357 | * Figure out separator constraints and merge them with existing constraints |
358 | * in state so that they can be emitted when the next content emits source. |
359 | * |
360 | * @param Node $nodeA |
361 | * @param DOMHandler $sepHandlerA |
362 | * @param Node $nodeB |
363 | * @param DOMHandler $sepHandlerB |
364 | */ |
365 | public function updateSeparatorConstraints( |
366 | Node $nodeA, DOMHandler $sepHandlerA, Node $nodeB, DOMHandler $sepHandlerB |
367 | ): void { |
368 | $state = $this->state; |
369 | |
370 | if ( $nodeB->parentNode === $nodeA ) { |
371 | // parent-child separator, nodeA parent of nodeB |
372 | '@phan-var Element|DocumentFragment $nodeA'; // @var Element|DocumentFragment $nodeA |
373 | $sepType = 'parent-child'; |
374 | $aCons = $sepHandlerA->firstChild( $nodeA, $nodeB, $state ); |
375 | $bCons = $nodeB instanceof Element ? $sepHandlerB->before( $nodeB, $nodeA, $state ) : []; |
376 | } elseif ( $nodeA->parentNode === $nodeB ) { |
377 | // parent-child separator, nodeB parent of nodeA |
378 | '@phan-var Element|DocumentFragment $nodeB'; // @var Element|DocumentFragment $nodeA |
379 | $sepType = 'child-parent'; |
380 | $aCons = $nodeA instanceof Element ? $sepHandlerA->after( $nodeA, $nodeB, $state ) : []; |
381 | $bCons = $sepHandlerB->lastChild( $nodeB, $nodeA, $state ); |
382 | } else { |
383 | // sibling separator |
384 | $sepType = 'sibling'; |
385 | $aCons = $nodeA instanceof Element ? $sepHandlerA->after( $nodeA, $nodeB, $state ) : []; |
386 | $bCons = $nodeB instanceof Element ? $sepHandlerB->before( $nodeB, $nodeA, $state ) : []; |
387 | } |
388 | $nlConstraints = $this->getSepNlConstraints( $nodeA, $aCons, $nodeB, $bCons ); |
389 | |
390 | if ( !empty( $state->sep->constraints ) ) { |
391 | // Merge the constraints |
392 | $state->sep->constraints = self::mergeConstraints( |
393 | $this->env, |
394 | $state->sep->constraints, |
395 | $nlConstraints |
396 | ); |
397 | } else { |
398 | $state->sep->constraints = $nlConstraints; |
399 | } |
400 | |
401 | $this->env->log( |
402 | 'debug/wts/sep', |
403 | function () use ( $sepType, $nodeA, $nodeB, $state ) { |
404 | return 'constraint' . ' | ' . |
405 | $sepType . ' | ' . |
406 | '<' . DOMCompat::nodeName( $nodeA ) . ',' . DOMCompat::nodeName( $nodeB ) . |
407 | '>' . ' | ' . PHPUtils::jsonEncode( $state->sep->constraints ) . ' | ' . |
408 | self::debugOut( $nodeA ) . ' | ' . self::debugOut( $nodeB ); |
409 | } |
410 | ); |
411 | |
412 | $state->sep->constraints['constraintInfo'] = [ |
413 | 'onSOL' => $state->onSOL, |
414 | // force SOL state when separator is built/emitted |
415 | 'forceSOL' => $sepHandlerB->forceSOL(), |
416 | 'sepType' => $sepType, |
417 | 'nodeA' => $nodeA, |
418 | 'nodeB' => $nodeB, |
419 | ]; |
420 | } |
421 | |
422 | public function __construct( Env $env, SerializerState $state ) { |
423 | $this->env = $env; |
424 | $this->state = $state; |
425 | } |
426 | |
427 | private function makeSepIndentPreSafe( |
428 | string $sep, array $nlConstraints |
429 | ): string { |
430 | $state = $this->state; |
431 | $constraintInfo = $nlConstraints['constraintInfo'] ?? []; |
432 | $sepType = $constraintInfo['sepType'] ?? null; |
433 | $nodeA = $constraintInfo['nodeA'] ?? null; |
434 | $nodeB = $constraintInfo['nodeB'] ?? null; |
435 | $forceSOL = ( $constraintInfo['forceSOL'] ?? false ) && $sepType !== 'child-parent'; |
436 | $origNodeB = $nodeB; |
437 | |
438 | // Ex: "<div>foo</div>\n <span>bar</span>" |
439 | // |
440 | // We also should test for onSOL state to deal with HTML like |
441 | // <ul> <li>foo</li></ul> |
442 | // and strip the leading space before non-indent-pre-safe tags |
443 | if ( |
444 | !$state->inPHPBlock && |
445 | !$state->inIndentPre && |
446 | preg_match( self::INDENT_PRE_WS_IN_SEP_REGEXP, $sep ) && ( |
447 | str_contains( $sep, "\n" ) || !empty( $constraintInfo['onSOL'] ) || $forceSOL |
448 | ) |
449 | ) { |
450 | // 'sep' is the separator before 'nodeB' and it has leading spaces on a newline. |
451 | // We have to decide whether that leading space will trigger indent-pres in wikitext. |
452 | // The decision depends on where this separator will be emitted relative |
453 | // to 'nodeA' and 'nodeB'. |
454 | |
455 | $isIndentPreSafe = false; |
456 | |
457 | // Example sepType scenarios: |
458 | // |
459 | // 1. sibling |
460 | // <div>foo</div> |
461 | // <span>bar</span> |
462 | // The span will be wrapped in an indent-pre if the leading space |
463 | // is not stripped since span is not a block tag |
464 | // |
465 | // 2. child-parent |
466 | // <span>foo |
467 | // </span>bar |
468 | // The " </span>bar" will be wrapped in an indent-pre if the |
469 | // leading space is not stripped since span is not a block tag |
470 | // |
471 | // 3. parent-child |
472 | // <div>foo |
473 | // <span>bar</span> |
474 | // </div> |
475 | // |
476 | // In all cases, only block-tags prevent indent-pres. |
477 | // (except for a special case for <br> nodes) |
478 | if ( $nodeB && WTSUtils::precedingSpaceSuppressesIndentPre( $nodeB, $origNodeB ) ) { |
479 | $isIndentPreSafe = true; |
480 | } elseif ( $sepType === 'sibling' || ( $nodeA && DOMUtils::atTheTop( $nodeA ) ) ) { |
481 | Assert::invariant( !DOMUtils::atTheTop( $nodeA ) || $sepType === 'parent-child', __METHOD__ ); |
482 | |
483 | // 'nodeB' is the first non-separator child of 'nodeA'. |
484 | // |
485 | // Walk past sol-transparent nodes in the right-sibling chain |
486 | // of 'nodeB' till we establish indent-pre safety. |
487 | while ( $nodeB && |
488 | ( DiffUtils::isDiffMarker( $nodeB ) || WTUtils::emitsSolTransparentSingleLineWT( $nodeB ) ) |
489 | ) { |
490 | $nodeB = $nodeB->nextSibling; |
491 | } |
492 | |
493 | $isIndentPreSafe = !$nodeB || WTSUtils::precedingSpaceSuppressesIndentPre( $nodeB, $origNodeB ); |
494 | } |
495 | |
496 | // Check whether nodeB is nested inside an element that suppresses |
497 | // indent-pres. |
498 | if ( $nodeB && !$isIndentPreSafe && !DOMUtils::atTheTop( $nodeB ) ) { |
499 | $parentB = $nodeB->parentNode; // could be nodeA |
500 | while ( WTUtils::isZeroWidthWikitextElt( $parentB ) ) { |
501 | $parentB = $parentB->parentNode; |
502 | } |
503 | |
504 | // The token stream paragraph wrapper (and legacy doBlockLevels) |
505 | // tracks this separately with $inBlockquote |
506 | $isIndentPreSafe = DOMUtils::hasNameOrHasAncestorOfName( |
507 | $parentB, 'blockquote' |
508 | ); |
509 | |
510 | // First scope wins |
511 | while ( !$isIndentPreSafe && !DOMUtils::atTheTop( $parentB ) ) { |
512 | if ( |
513 | TokenUtils::tagOpensBlockScope( DOMCompat::nodeName( $parentB ) ) && |
514 | // Only html p-tag is indent pre suppressing |
515 | ( DOMCompat::nodeName( $parentB ) !== 'p' || WTUtils::isLiteralHTMLNode( $parentB ) ) |
516 | ) { |
517 | $isIndentPreSafe = true; |
518 | break; |
519 | } elseif ( TokenUtils::tagClosesBlockScope( DOMCompat::nodeName( $parentB ) ) ) { |
520 | break; |
521 | } |
522 | $parentB = $parentB->parentNode; |
523 | } |
524 | } |
525 | |
526 | $stripLeadingSpace = ( !empty( $constraintInfo['onSOL'] ) || $forceSOL ) && |
527 | $nodeB && !WTUtils::isLiteralHTMLNode( $nodeB ) && |
528 | isset( Consts::$HTMLTagsRequiringSOLContext[DOMCompat::nodeName( $nodeB )] ); |
529 | if ( !$isIndentPreSafe || $stripLeadingSpace ) { |
530 | // Wrap non-nl ws from last line, but preserve comments. |
531 | // This avoids triggering indent-pres. |
532 | $sep = preg_replace_callback( |
533 | self::INDENT_PRE_WS_IN_SEP_REGEXP, |
534 | static function ( $matches ) use ( $stripLeadingSpace, $state ) { |
535 | if ( !$stripLeadingSpace ) { |
536 | // Since we nowiki-ed, we are no longer in sol state |
537 | $state->onSOL = false; |
538 | $state->hasIndentPreNowikis = true; |
539 | $space = '<nowiki>' . $matches[2] . '</nowiki>'; |
540 | } |
541 | return ( $matches[1] ?? '' ) . ( $space ?? '' ) . ( $matches[3] ?? '' ); |
542 | }, |
543 | $sep |
544 | ); |
545 | } |
546 | } |
547 | |
548 | $state->getEnv()->log( |
549 | 'debug/wts/sep', |
550 | 'ipre-safe |', |
551 | static function () use ( $sep, $nlConstraints ) { |
552 | $constraints = Utils::clone( $nlConstraints, true, true ); |
553 | unset( $constraints['constraintInfo'] ); |
554 | return PHPUtils::jsonEncode( $sep ) . ', ' . PHPUtils::jsonEncode( $constraints ); |
555 | } |
556 | ); |
557 | |
558 | return $sep; |
559 | } |
560 | |
561 | /** |
562 | * Serializing auto inserted content should invalidate the original separator |
563 | * @param Element $node |
564 | * @return DomSourceRange|null |
565 | */ |
566 | private static function handleAutoInserted( Element $node ): ?DomSourceRange { |
567 | $dp = DOMDataUtils::getDataParsoid( $node ); |
568 | if ( !isset( $dp->dsr ) ) { |
569 | return null; |
570 | } |
571 | |
572 | $dsr = clone $dp->dsr; |
573 | if ( !empty( $dp->autoInsertedStart ) ) { |
574 | $dsr->openWidth = null; |
575 | } |
576 | if ( !empty( $dp->autoInsertedEnd ) ) { |
577 | $dsr->closeWidth = null; |
578 | } |
579 | return $dsr; |
580 | } |
581 | |
582 | /** |
583 | * $node is embedded inside a parent node that has its leading/trailing whitespace trimmed |
584 | * in the wt->html direction. In this method, we attempt to recover leading trimmed whitespace |
585 | * using DSR information on $node. |
586 | * |
587 | * In some cases, $node might have an additional "data-mw-selser-wrapper" span |
588 | * that is added by SelSer - look past those wrappers. |
589 | * |
590 | * The recovery is attempted in two different ways: |
591 | * 1. If we have additional DSR fields about leading/trailing WS |
592 | * (represented by $state->haveTrimmedWsDSR), that info is used. |
593 | * 2. If not, we simply inspect source at $dsr->innerStart and if it |
594 | * happens to be whitespace, we use that. |
595 | * |
596 | * @param Node $node |
597 | * @return ?string |
598 | */ |
599 | private function fetchLeadingTrimmedSpace( Node $node ): ?string { |
600 | $origNode = $node; |
601 | $parentNode = $node->parentNode; |
602 | |
603 | // Skip past the artificial span wrapper |
604 | if ( $parentNode instanceof Element && $parentNode->hasAttribute( 'data-mw-selser-wrapper' ) ) { |
605 | $node = $parentNode; |
606 | $parentNode = $parentNode->parentNode; |
607 | } |
608 | |
609 | // Leading trimmed whitespace only makes sense for first child. |
610 | // Ignore comments (which are part of separators) + deletion markers. |
611 | if ( DiffDOMUtils::previousNonSepSibling( $node ) ) { |
612 | return null; |
613 | } |
614 | |
615 | '@phan-var Element|DocumentFragment $parentNode'; // @var Element|DocumentFragment $parentNode |
616 | if ( isset( Consts::$WikitextTagsWithTrimmableWS[DOMCompat::nodeName( $parentNode )] ) && |
617 | ( $origNode instanceof Element || !preg_match( '/^[ \t]/', $origNode->nodeValue ) ) |
618 | ) { |
619 | // Don't reintroduce whitespace that's already been captured as a DisplaySpace |
620 | if ( DOMUtils::hasTypeOf( $origNode, 'mw:DisplaySpace' ) ) { |
621 | return null; |
622 | } |
623 | |
624 | // FIXME: Is this complexity worth some minor dirty diff on this test? |
625 | // ParserTest: "3. List embedded in a formatting tag in a misnested way" |
626 | // I've not added an equivalent check in the trailing whitespace case. |
627 | if ( $origNode instanceof Element && |
628 | isset( DOMDataUtils::getDataParsoid( $origNode )->autoInsertedStart ) && |
629 | strspn( $origNode->firstChild->textContent ?? '', " \t" ) >= 1 |
630 | ) { |
631 | return null; |
632 | } |
633 | |
634 | $state = $this->state; |
635 | $dsr = DOMDataUtils::getDataParsoid( $parentNode )->dsr ?? null; |
636 | if ( Utils::isValidDSR( $dsr, true ) ) { |
637 | if ( |
638 | $state->haveTrimmedWsDSR && |
639 | $dsr->hasTrimmedWS() && |
640 | $dsr->hasValidLeadingWS() |
641 | ) { |
642 | if ( preg_match( |
643 | '/^([ \t]*)/', |
644 | $state->getOrigSrc( $dsr->innerRange() ) ?? '', |
645 | $matches |
646 | ) ) { |
647 | // $matches[1] is just spaces and tabs |
648 | return substr( $matches[1], 0, $dsr->leadingWS ); |
649 | } |
650 | } elseif ( $dsr->innerStart() < $dsr->innerEnd() ) { |
651 | $sep = $state->getOrigSrc( $dsr->innerRange() ) ?? ''; |
652 | // return first character of inner range iff it is |
653 | // tab or space |
654 | return preg_match( '/^[ \t]/', $sep ) ? $sep[0] : null; |
655 | } |
656 | } |
657 | } |
658 | |
659 | return null; |
660 | } |
661 | |
662 | /** |
663 | * $node is embedded inside a parent node that has its leading/trailing whitespace trimmed |
664 | * in the wt->html direction. In this method, we attempt to recover trailing trimmed whitespace |
665 | * using DSR information on $node. |
666 | * |
667 | * In some cases, $node might have an additional "data-mw-selser-wrapper" span |
668 | * that is added by SelSer - look past those wrappers. |
669 | * |
670 | * The recovery is attempted in two different ways: |
671 | * 1. If we have additional DSR fields about leading/trailing WS |
672 | * (represented by $state->haveTrimmedWsDSR), that info is used. |
673 | * 2. If not, we simply inspect source at $dsr->innerEnd and if it |
674 | * happens to be whitespace, we use that. |
675 | * |
676 | * @param Node $node |
677 | * @return ?string |
678 | */ |
679 | private function fetchTrailingTrimmedSpace( Node $node ): ?string { |
680 | $origNode = $node; |
681 | $parentNode = $node->parentNode; |
682 | |
683 | // Skip past the artificial span wrapper |
684 | if ( $parentNode instanceof Element && $parentNode->hasAttribute( 'data-mw-selser-wrapper' ) ) { |
685 | $node = $parentNode; |
686 | $parentNode = $parentNode->parentNode; |
687 | } |
688 | |
689 | // Trailing trimmed whitespace only makes sense for last child. |
690 | // Ignore comments (which are part of separators) + deletion markers. |
691 | if ( DiffDOMUtils::nextNonSepSibling( $node ) ) { |
692 | return null; |
693 | } |
694 | |
695 | '@phan-var Element|DocumentFragment $parentNode'; // @var Element|DocumentFragment $parentNode |
696 | if ( isset( Consts::$WikitextTagsWithTrimmableWS[DOMCompat::nodeName( $parentNode )] ) && |
697 | ( $origNode instanceof Element || !preg_match( '/[ \t]$/', $origNode->nodeValue ) ) |
698 | ) { |
699 | // Don't reintroduce whitespace that's already been captured as a DisplaySpace |
700 | if ( DOMUtils::hasTypeOf( $origNode, 'mw:DisplaySpace' ) ) { |
701 | return null; |
702 | } |
703 | |
704 | $state = $this->state; |
705 | $dsr = DOMDataUtils::getDataParsoid( $parentNode )->dsr ?? null; |
706 | if ( Utils::isValidDSR( $dsr, true ) ) { |
707 | if ( |
708 | $state->haveTrimmedWsDSR && |
709 | $dsr->hasTrimmedWS() && |
710 | $dsr->hasValidTrailingWS() |
711 | ) { |
712 | if ( preg_match( |
713 | '/([ \t]*)$/', |
714 | $state->getOrigSrc( $dsr->innerRange() ) ?? '', |
715 | $matches |
716 | ) ) { |
717 | // $matches[1] is just spaces and tabs |
718 | // note that trailingWS can be zero |
719 | return substr( $matches[1], strlen( $matches[1] ) - $dsr->trailingWS ); |
720 | } |
721 | } elseif ( ( $dsr->innerEnd() - 1 ) > $dsr->innerStart() ) { |
722 | // The > instead of >= in the test above is to |
723 | // deal with an edge case where that single space |
724 | // is captured by the getLeadingSpace case above |
725 | $sep = $state->getOrigSrc( $dsr->innerRange() ) ?? ''; |
726 | // Return last character of $sep iff it is space or tab |
727 | return preg_match( '/[ \t]$/', $sep ) ? substr( $sep, -1 ) : null; |
728 | } |
729 | } |
730 | } |
731 | |
732 | return null; |
733 | } |
734 | |
735 | /** |
736 | * Emit a separator based on the collected (and merged) constraints |
737 | * and existing separator text. Called when new output is triggered. |
738 | * @param Node $node |
739 | * @param bool $leading |
740 | * if true, trimmed leading whitespace is emitted |
741 | * if false, trimmed trailing whitespace is emitted |
742 | * @return string|null |
743 | */ |
744 | public function recoverTrimmedWhitespace( Node $node, bool $leading ): ?string { |
745 | // Deal with scenarios where leading / trailing whitespace were trimmed. |
746 | // We now need to figure out if we need to add any leading / trailing WS back. |
747 | if ( $this->state->useWhitespaceHeuristics && $this->state->selserMode ) { |
748 | if ( $leading ) { |
749 | return $this->fetchLeadingTrimmedSpace( $node ); |
750 | } else { |
751 | $lastChild = DiffDOMUtils::lastNonDeletedChild( $node ); |
752 | return $lastChild ? $this->fetchTrailingTrimmedSpace( $lastChild ) : null; |
753 | } |
754 | } |
755 | |
756 | return null; |
757 | } |
758 | |
759 | /** |
760 | * Emit a separator based on the collected (and merged) constraints |
761 | * and existing separator text. Called when new output is triggered. |
762 | * @param Node $node |
763 | * @return string|null |
764 | */ |
765 | public function buildSep( Node $node ): ?string { |
766 | $state = $this->state; |
767 | $sepType = $state->sep->constraints['constraintInfo']['sepType'] ?? null; |
768 | $sep = null; |
769 | $origNode = $node; |
770 | $prevNode = $state->sep->lastSourceNode; |
771 | $dsrA = null; |
772 | $dsrB = null; |
773 | |
774 | /* ---------------------------------------------------------------------- |
775 | * Assuming we have access to the original source, we can use DSR offsets |
776 | * to extract separators from source only if: |
777 | * - we are in selser mode AND |
778 | * - this node is not part of a newly inserted subtree (marked 'modified') |
779 | * for which DSR isn't available |
780 | * - neither node is adjacent to a deleted block node |
781 | * (see the long comment in SerializerState::emitChunk in the middle) |
782 | * |
783 | * In other scenarios, DSR values on "adjacent" nodes in the edited DOM |
784 | * may not reflect deleted content between them. |
785 | * ---------------------------------------------------------------------- */ |
786 | $origSepNeeded = $node !== $prevNode && $state->selserMode; |
787 | $origSepNeededAndUsable = |
788 | $origSepNeeded && !$state->inInsertedContent && |
789 | !WTSUtils::nextToDeletedBlockNodeInWT( $prevNode, true ) && |
790 | !WTSUtils::nextToDeletedBlockNodeInWT( $node, false ) && |
791 | WTSUtils::origSrcValidInEditedContext( $state, $prevNode ) && |
792 | WTSUtils::origSrcValidInEditedContext( $state, $node ); |
793 | |
794 | if ( $origSepNeededAndUsable ) { |
795 | if ( $prevNode instanceof Element ) { |
796 | $dsrA = self::handleAutoInserted( $prevNode ); |
797 | } elseif ( !( $prevNode instanceof DocumentFragment ) ) { |
798 | // Check if $prevNode is the last child of a zero-width element, |
799 | // and use that for dsr purposes instead. Typical case: text in p. |
800 | if ( |
801 | !$prevNode->nextSibling && |
802 | $prevNode->parentNode !== $node && |
803 | $prevNode->parentNode instanceof Element && |
804 | ( DOMDataUtils::getDataParsoid( $prevNode->parentNode )->dsr->closeWidth ?? null ) === 0 |
805 | ) { |
806 | $dsrA = self::handleAutoInserted( $prevNode->parentNode ); |
807 | } elseif ( |
808 | // Can we extrapolate DSR from $prevNode->previousSibling? |
809 | // Yes, if $prevNode->parentNode didn't have its children edited. |
810 | $prevNode->previousSibling instanceof Element && |
811 | !DiffUtils::directChildrenChanged( $prevNode->parentNode ) |
812 | ) { |
813 | $endDsr = DOMDataUtils::getDataParsoid( $prevNode->previousSibling )->dsr->end ?? null; |
814 | $correction = null; |
815 | if ( is_int( $endDsr ) ) { |
816 | if ( $prevNode instanceof Comment ) { |
817 | '@phan-var Comment $prevNode'; // @var Comment $prevNode |
818 | $correction = WTUtils::decodedCommentLength( $prevNode ); |
819 | } else { |
820 | $correction = strlen( $prevNode->nodeValue ); |
821 | } |
822 | $dsrA = new DomSourceRange( |
823 | $endDsr, |
824 | $endDsr + $correction + WTUtils::indentPreDSRCorrection( $prevNode ), |
825 | 0, |
826 | 0 |
827 | ); |
828 | } |
829 | } |
830 | } |
831 | |
832 | if ( !$dsrA ) { |
833 | // nothing to do -- no reason to compute dsrB if dsrA is null |
834 | } elseif ( $node instanceof Element ) { |
835 | // $node is parent of $prevNode |
836 | if ( $prevNode->parentNode === $node ) { |
837 | '@phan-var Element|DocumentFragment $node'; // @var Element|DocumentFragment $node |
838 | // FIXME: Maybe we shouldn't set dsr in the dsr pass if both aren't valid? |
839 | // |
840 | // When we are in the lastChild sep scenario and the parent doesn't have |
841 | // useable dsr, if possible, walk up the ancestor nodes till we find |
842 | // a dsr-bearing node |
843 | // |
844 | // This fix is needed to handle trailing newlines in this wikitext: |
845 | // [[File:foo.jpg|thumb|300px|foo\n{{1x|A}}\n{{1x|B}}\n{{1x|C}}\n\n]] |
846 | while ( |
847 | !$node->nextSibling && |
848 | !DOMUtils::atTheTop( $node ) && |
849 | ( |
850 | empty( DOMDataUtils::getDataParsoid( $node )->dsr ) || |
851 | DOMDataUtils::getDataParsoid( $node )->dsr->start === null || |
852 | DOMDataUtils::getDataParsoid( $node )->dsr->end === null |
853 | ) |
854 | ) { |
855 | $node = $node->parentNode; |
856 | } |
857 | } |
858 | |
859 | // The top node could be a document fragment |
860 | $dsrB = $node instanceof Element ? self::handleAutoInserted( $node ) : null; |
861 | } elseif ( !( $node instanceof DocumentFragment ) ) { |
862 | // $node is text/comment. Can we extrapolate DSR from $node->parentNode? |
863 | // Yes, if this is the child of a zero-width element and |
864 | // is only preceded by separator elements. |
865 | // |
866 | // 1. text in p. |
867 | // 2. ws-only child of a node with auto-inserted start tag |
868 | // Ex: "<span> <s>x</span> </s>" --> <span> <s>x</s*></span><s*> </s> |
869 | // 3. ws-only children of a node with auto-inserted start tag |
870 | // Ex: "{|\n|-\n <!--foo--> \n|}" |
871 | $nodeParent = $node->parentNode; |
872 | // phpcs:ignore Generic.Files.LineLength.TooLong |
873 | '@phan-var Element|DocumentFragment $nodeParent'; // @var Element|DocumentFragment $nodeParent |
874 | |
875 | if ( |
876 | $nodeParent !== $prevNode && |
877 | $nodeParent instanceof Element && |
878 | ( DOMDataUtils::getDataParsoid( $nodeParent )->dsr->openWidth ?? null ) === 0 |
879 | ) { |
880 | $sepLen = self::precedingSeparatorTextLen( $node ); |
881 | if ( $sepLen !== null ) { |
882 | $dsrB = DOMDataUtils::getDataParsoid( $nodeParent )->dsr; |
883 | if ( is_int( $dsrB->start ) && $sepLen > 0 ) { |
884 | $dsrB = clone $dsrB; |
885 | $dsrB->start += $sepLen; |
886 | } |
887 | } |
888 | } |
889 | } |
890 | |
891 | // FIXME: Maybe we shouldn't set dsr in the dsr pass if both aren't valid? |
892 | // NOTE: Synthetic DSR ranges |
893 | // may not necessarily have offsets that correspond to valid |
894 | // UTF-8 characters. So use $state->isValidDSR() to ensure that |
895 | // all offsets land on valid UTF-8 characters before trying to |
896 | // construct substrings based on relations between them. |
897 | if ( |
898 | $state->isValidDSR( $dsrA ) && |
899 | $state->isValidDSR( $dsrB ) |
900 | ) { |
901 | // Figure out containment relationship |
902 | if ( $dsrA->start <= $dsrB->start ) { |
903 | if ( $dsrB->end <= $dsrA->end ) { |
904 | if ( $dsrA->start === $dsrB->start && $dsrA->end === $dsrB->end ) { |
905 | // Both have the same dsr range, so there can't be any |
906 | // separators between them |
907 | $sep = ''; |
908 | } elseif ( isset( $dsrA->openWidth ) && $state->isValidDSR( $dsrA, true ) ) { |
909 | // B in A, from parent to child |
910 | $sep = $state->getOrigSrc( $dsrA->openRange()->to( $dsrB ) ); |
911 | } |
912 | } elseif ( $dsrA->end <= $dsrB->start ) { |
913 | // B following A (siblingish) |
914 | $sep = $state->getOrigSrc( $dsrA->to( $dsrB ) ); |
915 | } elseif ( isset( $dsrB->closeWidth ) && $state->isValidDSR( $dsrB, true ) ) { |
916 | // A in B, from child to parent |
917 | $sep = $state->getOrigSrc( $dsrA->to( $dsrB->closeRange() ) ); |
918 | } |
919 | } elseif ( $dsrA->end <= $dsrB->end ) { |
920 | if ( isset( $dsrB->closeWidth ) && $state->isValidDSR( $dsrB, true ) ) { |
921 | // A in B, from child to parent |
922 | $sep = $state->getOrigSrc( $dsrA->to( $dsrB->closeRange() ) ); |
923 | } |
924 | } else { |
925 | $this->env->log( 'info/html2wt', 'dsr backwards: should not happen!' ); |
926 | } |
927 | |
928 | // Reset if $sep is invalid |
929 | if ( $sep && !WTSUtils::isValidSep( $sep ) ) { |
930 | $sep = null; |
931 | } |
932 | } |
933 | } elseif ( $origSepNeeded && !DiffUtils::hasDiffMarkers( $prevNode ) ) { |
934 | // Given the following conditions: |
935 | // - $prevNode has no diff markers. (checked above) |
936 | // - $prevNode's next non-sep sibling ($next) was inserted. |
937 | // - $next is an ancestor of $node. |
938 | // - all of those ancestor nodes from $node->$next have zero-width |
939 | // wikitext (otherwise, the separator isn't usable) |
940 | // Try to extract a separator from original source that existed |
941 | // between $prevNode and its original next sibling or its parent |
942 | // (if $prevNode was the last non-sep child). |
943 | // |
944 | // This minimizes dirty-diffs to that separator text from |
945 | // the insertion of $next after $prevNode. |
946 | $next = DiffDOMUtils::nextNonSepSibling( $prevNode ); |
947 | $origSepUsable = $next && DiffUtils::hasInsertedDiffMark( $next ); |
948 | |
949 | // Check that $next is an ancestor of $node and all nodes |
950 | // on that path have zero-width wikitext |
951 | if ( $origSepUsable && $node !== $next ) { |
952 | $n = $node->parentNode; |
953 | while ( $n && $next !== $n ) { |
954 | if ( !WTUtils::isZeroWidthWikitextElt( $n ) ) { |
955 | $origSepUsable = false; |
956 | break; |
957 | } |
958 | $n = $n->parentNode; |
959 | } |
960 | $origSepUsable = $origSepUsable && $n !== null; |
961 | } |
962 | |
963 | // Extract separator from original source if possible |
964 | if ( $origSepUsable ) { |
965 | $origNext = DiffDOMUtils::nextNonSepSibling( $next ); |
966 | if ( !$origNext ) { // $prevNode was last non-sep child of its parent |
967 | // We could work harder for text/comments and extrapolate, but skipping that here |
968 | // FIXME: If we had a generic DSR extrapolation utility, that would be useful |
969 | $o1 = $prevNode instanceof Element ? |
970 | DOMDataUtils::getDataParsoid( $prevNode )->dsr ?? null : null; |
971 | if ( $o1 !== null ) { |
972 | $dsr2 = DOMDataUtils::getDataParsoid( $prevNode->parentNode )->dsr ?? null; |
973 | $sep = $dsr2 !== null ? $state->getOrigSrc( $o1->to( $dsr2->closeRange() ) ) : null; |
974 | } |
975 | } elseif ( !DiffUtils::hasDiffMarkers( $origNext ) ) { |
976 | // We could work harder for text/comments and extrapolate, but skipping that here |
977 | // FIXME: If we had a generic DSR extrapolation utility, that would be useful |
978 | $o1 = $prevNode instanceof Element ? |
979 | DOMDataUtils::getDataParsoid( $prevNode )->dsr ?? null : null; |
980 | if ( $o1 !== null ) { |
981 | $o2 = $origNext instanceof Element ? |
982 | DOMDataUtils::getDataParsoid( $origNext )->dsr ?? null : null; |
983 | $sep = $o2 !== null ? $state->getOrigSrc( $o1->to( $o2 ) ) : null; |
984 | } |
985 | } |
986 | |
987 | if ( $sep !== null ) { |
988 | // Since this is an inserted node, we might have to augment this |
989 | // with newline constraints and so, we just set this recovered sep |
990 | // to the buffered sep in state->sep->src |
991 | $state->sep->src = $sep; |
992 | $sep = null; |
993 | } |
994 | } |
995 | } |
996 | |
997 | // If all efforts failed, use special-purpose heuristics to recover |
998 | // trimmed leading / trailing whitespace from lists, headings, table-cells |
999 | if ( $sep === null ) { |
1000 | if ( $sepType === 'parent-child' ) { |
1001 | $sep = $this->recoverTrimmedWhitespace( $node, true ); |
1002 | $state->sep->src = ( $sep ?? '' ) . $state->sep->src; |
1003 | } elseif ( $sepType === 'child-parent' ) { |
1004 | $sep = $this->recoverTrimmedWhitespace( $node, false ); |
1005 | $state->sep->src .= $sep ?? ''; |
1006 | } |
1007 | } |
1008 | |
1009 | $this->env->log( |
1010 | 'debug/wts/sep', |
1011 | static function () use ( $prevNode, $origNode, $sep, $state ) { |
1012 | return 'maybe-sep | ' . |
1013 | 'prev:' . ( $prevNode ? DOMCompat::nodeName( $prevNode ) : '--none--' ) . |
1014 | ', node:' . DOMCompat::nodeName( $origNode ) . |
1015 | ', sep: ' . PHPUtils::jsonEncode( $sep ) . |
1016 | ', state.sep.src: ' . PHPUtils::jsonEncode( $state->sep->src ?? null ); |
1017 | } |
1018 | ); |
1019 | |
1020 | // If the separator is being emitted before a node that emits sol-transparent WT, |
1021 | // go through makeSeparator to verify indent-pre constraints are met. |
1022 | $sepConstraints = $state->sep->constraints ?? [ 'max' => 0 ]; |
1023 | if ( $sep === null || ( $state->sep->src && $state->sep->src !== $sep ) ) { |
1024 | if ( !empty( $state->sep->constraints ) || !empty( $state->sep->src ) ) { |
1025 | // TODO: set modified flag if start or end node (but not both) are |
1026 | // modified / new so that the selser can use the separator |
1027 | $sep = $this->makeSeparator( $node, $state->sep->src ?? '', $sepConstraints ); |
1028 | } else { |
1029 | $sep = null; |
1030 | } |
1031 | } |
1032 | |
1033 | if ( $sep !== null ) { |
1034 | $sep = self::makeSepIndentPreSafe( $sep, $sepConstraints ); |
1035 | } |
1036 | return $sep; |
1037 | } |
1038 | } |