Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
0.00% |
0 / 98 |
|
0.00% |
0 / 6 |
CRAP | |
0.00% |
0 / 1 |
MigrateTrailingNLs | |
0.00% |
0 / 98 |
|
0.00% |
0 / 6 |
2162 | |
0.00% |
0 / 1 |
nodeEndsLineInWT | |
0.00% |
0 / 5 |
|
0.00% |
0 / 1 |
6 | |||
getTableParent | |
0.00% |
0 / 11 |
|
0.00% |
0 / 1 |
30 | |||
canMigrateNLOutOfNode | |
0.00% |
0 / 16 |
|
0.00% |
0 / 1 |
156 | |||
hasZeroWidthWT | |
0.00% |
0 / 7 |
|
0.00% |
0 / 1 |
42 | |||
doMigrateTrailingNLs | |
0.00% |
0 / 58 |
|
0.00% |
0 / 1 |
420 | |||
run | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 |
1 | <?php |
2 | declare( strict_types = 1 ); |
3 | |
4 | namespace Wikimedia\Parsoid\Wt2Html\DOM\Processors; |
5 | |
6 | use Wikimedia\Parsoid\Config\Env; |
7 | use Wikimedia\Parsoid\DOM\Comment; |
8 | use Wikimedia\Parsoid\DOM\DocumentFragment; |
9 | use Wikimedia\Parsoid\DOM\Element; |
10 | use Wikimedia\Parsoid\DOM\Node; |
11 | use Wikimedia\Parsoid\DOM\Text; |
12 | use Wikimedia\Parsoid\NodeData\DataParsoid; |
13 | use Wikimedia\Parsoid\Utils\DOMCompat; |
14 | use Wikimedia\Parsoid\Utils\DOMDataUtils; |
15 | use Wikimedia\Parsoid\Utils\DOMUtils; |
16 | use Wikimedia\Parsoid\Utils\PHPUtils; |
17 | use Wikimedia\Parsoid\Utils\WTUtils; |
18 | use Wikimedia\Parsoid\Wt2Html\Wt2HtmlDOMProcessor; |
19 | |
20 | class MigrateTrailingNLs implements Wt2HtmlDOMProcessor { |
21 | private static $nodesToMigrateFrom; |
22 | |
23 | private function nodeEndsLineInWT( Node $node, DataParsoid $dp ): bool { |
24 | // These nodes either end a line in wikitext (tr, li, dd, ol, ul, dl, caption, |
25 | // p) or have implicit closing tags that can leak newlines to those that end a |
26 | // line (th, td) |
27 | // |
28 | // SSS FIXME: Given condition 2, we may not need to check th/td anymore |
29 | // (if we can rely on auto inserted start/end tags being present always). |
30 | self::$nodesToMigrateFrom ??= PHPUtils::makeSet( [ |
31 | 'pre', 'th', 'td', 'tr', 'li', 'dd', 'ol', 'ul', 'dl', 'caption', 'p' |
32 | ] ); |
33 | return isset( self::$nodesToMigrateFrom[DOMCompat::nodeName( $node )] ) && |
34 | !WTUtils::hasLiteralHTMLMarker( $dp ); |
35 | } |
36 | |
37 | private function getTableParent( Node $node ): ?Node { |
38 | $nodeName = DOMCompat::nodeName( $node ); |
39 | if ( in_array( $nodeName, [ 'td', 'th' ], true ) ) { |
40 | $node = $node->parentNode; |
41 | $nodeName = DOMCompat::nodeName( $node ); |
42 | } |
43 | if ( $nodeName === 'tr' ) { |
44 | $node = $node->parentNode; |
45 | $nodeName = DOMCompat::nodeName( $node ); |
46 | } |
47 | if ( in_array( $nodeName, [ 'tbody', 'thead', 'tfoot', 'caption' ], true ) ) { |
48 | $node = $node->parentNode; |
49 | $nodeName = DOMCompat::nodeName( $node ); |
50 | } |
51 | return ( $nodeName === 'table' ) ? $node : null; |
52 | } |
53 | |
54 | /** |
55 | * We can migrate a newline out of a node if one of the following is true: |
56 | * (1) The node ends a line in wikitext (=> not a literal html tag) |
57 | * (2) The node has an auto-closed end-tag (wikitext-generated or literal html tag) |
58 | * and hasn't been fostered out of a table. |
59 | * (3) It is the rightmost node in the DOM subtree rooted at a node |
60 | * that ends a line in wikitext |
61 | * @param Node $node |
62 | * @return bool |
63 | */ |
64 | private function canMigrateNLOutOfNode( Node $node ): bool { |
65 | if ( DOMCompat::nodeName( $node ) === 'table' || DOMUtils::atTheTop( $node ) ) { |
66 | return false; |
67 | } |
68 | |
69 | // Don't allow migration out of a table if the table has had |
70 | // content fostered out of it. |
71 | $tableParent = $this->getTableParent( $node ); |
72 | if ( $tableParent && $tableParent->previousSibling instanceof Element ) { |
73 | $previousSibling = $tableParent->previousSibling; |
74 | '@phan-var Element $previousSibling'; // @var Element $previousSibling |
75 | if ( !empty( DOMDataUtils::getDataParsoid( $previousSibling )->fostered ) ) { |
76 | return false; |
77 | } |
78 | } |
79 | |
80 | DOMUtils::assertElt( $node ); |
81 | $dp = DOMDataUtils::getDataParsoid( $node ); |
82 | return empty( $dp->fostered ) && |
83 | ( $this->nodeEndsLineInWT( $node, $dp ) || |
84 | !empty( $dp->autoInsertedEnd ) || |
85 | ( !$node->nextSibling && |
86 | // FIXME: bug compatibility, previously the end meta caused |
87 | // $node->nextSibling to be true for elements with end tags |
88 | empty( $dp->tmp->endTSR ) && |
89 | $node->parentNode && |
90 | $this->canMigrateNLOutOfNode( $node->parentNode ) ) ); |
91 | } |
92 | |
93 | /** |
94 | * A node has zero wt width if: |
95 | * - tsr->start == tsr->end |
96 | * - only has children with zero wt width |
97 | * @param Element $node |
98 | * @return bool |
99 | */ |
100 | private function hasZeroWidthWT( Element $node ): bool { |
101 | $tsr = DOMDataUtils::getDataParsoid( $node )->tsr ?? null; |
102 | if ( !$tsr || $tsr->start === null || $tsr->start !== $tsr->end ) { |
103 | return false; |
104 | } |
105 | |
106 | $c = $node->firstChild; |
107 | while ( $c instanceof Element && $this->hasZeroWidthWT( $c ) ) { |
108 | $c = $c->nextSibling; |
109 | } |
110 | |
111 | return $c === null; |
112 | } |
113 | |
114 | public function doMigrateTrailingNLs( Node $elt, Env $env ): void { |
115 | if ( |
116 | !( $elt instanceof Element ) && |
117 | !( $elt instanceof DocumentFragment ) |
118 | ) { |
119 | return; |
120 | } |
121 | |
122 | // 1. Process DOM rooted at 'elt' first |
123 | // |
124 | // Process children backward so that a table |
125 | // is processed before its fostered content. |
126 | // See subtle changes in newline migration with this wikitext: |
127 | // "<table>\n<tr> || ||\n<td> a\n</table>" |
128 | // when walking backward vs. forward. |
129 | // |
130 | // Separately, walking backward also lets us ignore |
131 | // newly added children after child (because of |
132 | // migrated newline nodes from child's DOM tree). |
133 | $child = $elt->lastChild; |
134 | while ( $child !== null ) { |
135 | $this->doMigrateTrailingNLs( $child, $env ); |
136 | $child = $child->previousSibling; |
137 | } |
138 | |
139 | // 2. Process 'elt' itself after -- skip literal-HTML nodes |
140 | if ( $this->canMigrateNLOutOfNode( $elt ) ) { |
141 | $firstEltToMigrate = null; |
142 | $migrationBarrier = null; |
143 | $partialContent = false; |
144 | $n = $elt->lastChild; |
145 | |
146 | // We can migrate trailing newlines across nodes that have zero-wikitext-width. |
147 | while ( $n instanceof Element && $this->hasZeroWidthWT( $n ) ) { |
148 | $migrationBarrier = $n; |
149 | $n = $n->previousSibling; |
150 | } |
151 | |
152 | $isTdTh = DOMCompat::nodeName( $elt ) === 'td' || DOMCompat::nodeName( $elt ) === 'th'; |
153 | |
154 | // Find nodes that need to be migrated out: |
155 | // - a sequence of comment and newline nodes that is preceded by |
156 | // a non-migratable node (text node with non-white-space content |
157 | // or an element node). |
158 | $foundNL = false; |
159 | $tsrCorrection = 0; |
160 | while ( $n instanceof Text || $n instanceof Comment ) { |
161 | if ( $n instanceof Comment ) { |
162 | if ( $isTdTh ) { |
163 | break; |
164 | } |
165 | $firstEltToMigrate = $n; |
166 | $tsrCorrection += WTUtils::decodedCommentLength( $n ); |
167 | } else { |
168 | if ( !$isTdTh && preg_match( '/^[ \t\r\n]*\n[ \t\r\n]*$/D', $n->nodeValue ) ) { |
169 | $foundNL = true; |
170 | $firstEltToMigrate = $n; |
171 | $partialContent = false; |
172 | // all whitespace is moved |
173 | $tsrCorrection += strlen( $n->nodeValue ); |
174 | } elseif ( str_ends_with( $n->nodeValue, "\n" ) ) { |
175 | $foundNL = true; |
176 | $firstEltToMigrate = $n; |
177 | $partialContent = true; |
178 | // only newlines moved |
179 | preg_match( '/\n+$/D', $n->nodeValue, $matches ); |
180 | $tsrCorrection += strlen( $matches[0] ?? '' ); |
181 | break; |
182 | } else { |
183 | break; |
184 | } |
185 | } |
186 | |
187 | $n = $n->previousSibling; |
188 | } |
189 | |
190 | if ( $firstEltToMigrate && $foundNL ) { |
191 | $eltParent = $elt->parentNode; |
192 | $insertPosition = $elt->nextSibling; |
193 | |
194 | $n = $firstEltToMigrate; |
195 | while ( $n !== $migrationBarrier ) { |
196 | $next = $n->nextSibling; |
197 | if ( $partialContent ) { |
198 | $nls = $n->nodeValue; |
199 | $n->nodeValue = preg_replace( '/\n+$/D', '', $n->nodeValue, 1 ); |
200 | $nls = substr( $nls, strlen( $n->nodeValue ) ); |
201 | $n = $n->ownerDocument->createTextNode( $nls ); |
202 | $partialContent = false; |
203 | } |
204 | $eltParent->insertBefore( $n, $insertPosition ); |
205 | $n = $next; |
206 | } |
207 | |
208 | // Adjust tsr of any nodes after migrationBarrier. |
209 | // Ex: zero-width nodes that have valid tsr on them |
210 | // By definition (zero-width), these are synthetic nodes added by Parsoid |
211 | // that aren't present in the original wikitext. |
212 | $n = $migrationBarrier; |
213 | while ( $n ) { |
214 | // TSR is guaranteed to exist and be valid |
215 | // (checked by hasZeroWidthWT above) |
216 | DOMUtils::assertElt( $n ); |
217 | $dp = DOMDataUtils::getDataParsoid( $n ); |
218 | $dp->tsr = $dp->tsr->offset( -$tsrCorrection ); |
219 | $n = $n->nextSibling; |
220 | } |
221 | } |
222 | } |
223 | } |
224 | |
225 | /** |
226 | * @inheritDoc |
227 | */ |
228 | public function run( |
229 | Env $env, Node $root, array $options = [], bool $atTopLevel = false |
230 | ): void { |
231 | $this->doMigrateTrailingNLs( $root, $env ); |
232 | } |
233 | } |