Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
0.00% |
0 / 83 |
|
0.00% |
0 / 3 |
CRAP | |
0.00% |
0 / 1 |
LiFixups | |
0.00% |
0 / 83 |
|
0.00% |
0 / 3 |
1260 | |
0.00% |
0 / 1 |
getMigrationInfo | |
0.00% |
0 / 9 |
|
0.00% |
0 / 1 |
42 | |||
findLastMigratableNode | |
0.00% |
0 / 20 |
|
0.00% |
0 / 1 |
90 | |||
migrateTrailingSolTransparentLinks | |
0.00% |
0 / 54 |
|
0.00% |
0 / 1 |
420 |
1 | <?php |
2 | declare( strict_types = 1 ); |
3 | |
4 | namespace Wikimedia\Parsoid\Wt2Html\DOM\Handlers; |
5 | |
6 | use Wikimedia\Assert\Assert; |
7 | use Wikimedia\Parsoid\DOM\Comment; |
8 | use Wikimedia\Parsoid\DOM\Element; |
9 | use Wikimedia\Parsoid\DOM\Node; |
10 | use Wikimedia\Parsoid\DOM\Text; |
11 | use Wikimedia\Parsoid\Utils\DiffDOMUtils; |
12 | use Wikimedia\Parsoid\Utils\DOMCompat; |
13 | use Wikimedia\Parsoid\Utils\DOMDataUtils; |
14 | use Wikimedia\Parsoid\Utils\DOMUtils; |
15 | use Wikimedia\Parsoid\Utils\DTState; |
16 | use Wikimedia\Parsoid\Utils\WTUtils; |
17 | |
18 | class LiFixups { |
19 | |
20 | private static function getMigrationInfo( Node $c ): array { |
21 | $tplRoot = WTUtils::findFirstEncapsulationWrapperNode( $c ); |
22 | if ( $tplRoot !== null ) { |
23 | // Check if everything between tplRoot and c is migratable. |
24 | $prev = $tplRoot->previousSibling; |
25 | while ( $c !== $prev ) { |
26 | if ( !WTUtils::isSolTransparentLink( $c ) && |
27 | !( DOMCompat::nodeName( $c ) === 'span' && preg_match( '/^\s*$/D', $c->textContent ) ) |
28 | ) { |
29 | return [ 'tplRoot' => $tplRoot, 'migratable' => false ]; |
30 | } |
31 | |
32 | $c = $c->previousSibling; |
33 | } |
34 | } |
35 | |
36 | return [ 'tplRoot' => $tplRoot, 'migratable' => true ]; |
37 | } |
38 | |
39 | private static function findLastMigratableNode( Node $li ): ?Node { |
40 | $sentinel = null; |
41 | $c = DiffDOMUtils::lastNonSepChild( $li ); |
42 | // c is known to be a sol-transparent link (ex: category) |
43 | // fail fast in parser tests if something changes. |
44 | Assert::invariant( WTUtils::isSolTransparentLink( $c ), 'c is known to be a sol-transparent link' ); |
45 | while ( $c ) { |
46 | // Handle template units first |
47 | $info = self::getMigrationInfo( $c ); |
48 | if ( !$info['migratable'] ) { |
49 | break; |
50 | } elseif ( $info['tplRoot'] !== null ) { |
51 | $c = $info['tplRoot']; |
52 | } |
53 | |
54 | if ( $c instanceof Text ) { |
55 | // Update sentinel if we hit a newline. |
56 | // We want to migrate these newlines and |
57 | // everything following them out of 'li'. |
58 | if ( preg_match( '/\n\s*$/D', $c->nodeValue ) ) { |
59 | $sentinel = $c; |
60 | } |
61 | |
62 | // If we didn't hit pure whitespace, we are done! |
63 | if ( !preg_match( '/^\s*$/D', $c->nodeValue ) ) { |
64 | break; |
65 | } |
66 | } elseif ( $c instanceof Comment ) { |
67 | $sentinel = $c; |
68 | } elseif ( !WTUtils::isSolTransparentLink( $c ) ) { |
69 | // We are done if we hit anything but text |
70 | // or sol-transparent links. |
71 | break; |
72 | } |
73 | |
74 | $c = $c->previousSibling; |
75 | } |
76 | |
77 | return $sentinel; |
78 | } |
79 | |
80 | /** |
81 | * Earlier in the parsing pipeline, we suppress all newlines and |
82 | * other whitespace before sol-transparent links which causes them |
83 | * to be swallowed into preceding paragraphs and list items. |
84 | * |
85 | * However, with wikitext like this: `*a\n\n[[Category:Foo]]`, this |
86 | * could prevent proper roundtripping (because we suppress newlines |
87 | * when serializing list items). This needs addressing because |
88 | * this pattern is extremely common (some list at the end of the page |
89 | * followed by a list of categories for the page). |
90 | */ |
91 | public static function migrateTrailingSolTransparentLinks( Element $li, DTState $state ): bool { |
92 | // * Don't bother fixing up template content when processing the full page |
93 | if ( $state->tplInfo ?? null ) { |
94 | return true; |
95 | } |
96 | |
97 | // If there is migratable content inside a list item |
98 | // (such as categories preceded by newlines), |
99 | // * migrate it out of the outermost list |
100 | // * and fix up the DSR of list items and list along the rightmost path. |
101 | if ( $li->nextSibling === null && DOMUtils::isList( $li->parentNode ) && |
102 | WTUtils::isSolTransparentLink( DiffDOMUtils::lastNonSepChild( $li ) ) |
103 | ) { |
104 | |
105 | // Find the outermost list -- content will be moved after it |
106 | $outerList = $li->parentNode; |
107 | while ( DOMUtils::isListItem( $outerList->parentNode ) ) { |
108 | $p = $outerList->parentNode; |
109 | // Bail if we find ourself on a path that is not the rightmost path. |
110 | if ( $p->nextSibling !== null ) { |
111 | return true; |
112 | } |
113 | $outerList = $p->parentNode; |
114 | } |
115 | |
116 | // Find last migratable node |
117 | $sentinel = self::findLastMigratableNode( $li ); |
118 | if ( !$sentinel ) { |
119 | return true; |
120 | } |
121 | |
122 | // Migrate (and update DSR) |
123 | $c = $li->lastChild; |
124 | $liDsr = DOMDataUtils::getDataParsoid( $li )->dsr ?? null; |
125 | $newEndDsr = -1; // dummy to eliminate useless null checks |
126 | while ( true ) { |
127 | if ( $c instanceof Element ) { |
128 | $dsr = DOMDataUtils::getDataParsoid( $c )->dsr ?? null; |
129 | $newEndDsr = $dsr->start ?? -1; |
130 | $outerList->parentNode->insertBefore( $c, $outerList->nextSibling ); |
131 | } elseif ( $c instanceof Text ) { |
132 | if ( preg_match( '/^\s*$/D', $c->nodeValue ) ) { |
133 | $newEndDsr -= strlen( $c->nodeValue ); |
134 | $outerList->parentNode->insertBefore( $c, $outerList->nextSibling ); |
135 | } else { |
136 | // Split off the newlines into its own node and migrate it |
137 | $nls = $c->nodeValue; |
138 | $c->nodeValue = preg_replace( '/\s+$/D', '', $c->nodeValue, 1 ); |
139 | $nls = substr( $nls, strlen( $c->nodeValue ) ); |
140 | $nlNode = $c->ownerDocument->createTextNode( $nls ); |
141 | $outerList->parentNode->insertBefore( $nlNode, $outerList->nextSibling ); |
142 | $newEndDsr -= strlen( $nls ); |
143 | } |
144 | } elseif ( $c instanceof Comment ) { |
145 | $newEndDsr -= WTUtils::decodedCommentLength( $c ); |
146 | $outerList->parentNode->insertBefore( $c, $outerList->nextSibling ); |
147 | } |
148 | |
149 | if ( $c === $sentinel ) { |
150 | break; |
151 | } |
152 | |
153 | $c = $li->lastChild; |
154 | } |
155 | |
156 | // Update DSR of all listitem & list nodes till |
157 | // we hit the outermost list we started with. |
158 | $delta = null; |
159 | if ( $liDsr && $newEndDsr >= 0 ) { |
160 | $delta = $liDsr->end - $newEndDsr; |
161 | } |
162 | |
163 | // If there is no delta to adjust dsr by, we are done |
164 | if ( !$delta ) { |
165 | return true; |
166 | } |
167 | |
168 | // Fix DSR along the rightmost path to outerList |
169 | $list = null; |
170 | while ( $outerList !== $list ) { |
171 | $list = $li->parentNode; |
172 | DOMUtils::assertElt( $list ); |
173 | |
174 | $liDp = DOMDataUtils::getDataParsoid( $li ); |
175 | if ( !empty( $liDp->dsr ) ) { |
176 | $liDp->dsr->end -= $delta; |
177 | } |
178 | |
179 | $listDp = DOMDataUtils::getDataParsoid( $list ); |
180 | if ( !empty( $listDp->dsr ) ) { |
181 | $listDp->dsr->end -= $delta; |
182 | } |
183 | $li = $list->parentNode; |
184 | } |
185 | } |
186 | |
187 | return true; |
188 | } |
189 | } |