Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
0.00% |
0 / 83 |
|
0.00% |
0 / 3 |
CRAP | |
0.00% |
0 / 1 |
LiFixups | |
0.00% |
0 / 83 |
|
0.00% |
0 / 3 |
1260 | |
0.00% |
0 / 1 |
getMigrationInfo | |
0.00% |
0 / 9 |
|
0.00% |
0 / 1 |
42 | |||
findLastMigratableNode | |
0.00% |
0 / 20 |
|
0.00% |
0 / 1 |
90 | |||
migrateTrailingCategories | |
0.00% |
0 / 54 |
|
0.00% |
0 / 1 |
420 |
1 | <?php |
2 | declare( strict_types = 1 ); |
3 | |
4 | namespace Wikimedia\Parsoid\Wt2Html\PP\Handlers; |
5 | |
6 | use Wikimedia\Assert\Assert; |
7 | use Wikimedia\Parsoid\Config\Env; |
8 | use Wikimedia\Parsoid\DOM\Comment; |
9 | use Wikimedia\Parsoid\DOM\Element; |
10 | use Wikimedia\Parsoid\DOM\Node; |
11 | use Wikimedia\Parsoid\DOM\Text; |
12 | use Wikimedia\Parsoid\Utils\DOMCompat; |
13 | use Wikimedia\Parsoid\Utils\DOMDataUtils; |
14 | use Wikimedia\Parsoid\Utils\DOMUtils; |
15 | use Wikimedia\Parsoid\Utils\DTState; |
16 | use Wikimedia\Parsoid\Utils\WTUtils; |
17 | |
18 | class LiFixups { |
19 | /** |
20 | * @param Node $c |
21 | * @return array |
22 | */ |
23 | private static function getMigrationInfo( Node $c ): array { |
24 | $tplRoot = WTUtils::findFirstEncapsulationWrapperNode( $c ); |
25 | if ( $tplRoot !== null ) { |
26 | // Check if everything between tplRoot and c is migratable. |
27 | $prev = $tplRoot->previousSibling; |
28 | while ( $c !== $prev ) { |
29 | if ( !WTUtils::isCategoryLink( $c ) && |
30 | !( DOMCompat::nodeName( $c ) === 'span' && preg_match( '/^\s*$/D', $c->textContent ) ) |
31 | ) { |
32 | return [ 'tplRoot' => $tplRoot, 'migratable' => false ]; |
33 | } |
34 | |
35 | $c = $c->previousSibling; |
36 | } |
37 | } |
38 | |
39 | return [ 'tplRoot' => $tplRoot, 'migratable' => true ]; |
40 | } |
41 | |
42 | /** |
43 | * @param Node $li |
44 | * @return Node|null |
45 | */ |
46 | private static function findLastMigratableNode( Node $li ): ?Node { |
47 | $sentinel = null; |
48 | $c = DOMUtils::lastNonSepChild( $li ); |
49 | // c is known to be a category link. |
50 | // fail fast in parser tests if something changes. |
51 | Assert::invariant( WTUtils::isCategoryLink( $c ), 'c is known to be a category link' ); |
52 | while ( $c ) { |
53 | // Handle template units first |
54 | $info = self::getMigrationInfo( $c ); |
55 | if ( !$info['migratable'] ) { |
56 | break; |
57 | } elseif ( $info['tplRoot'] !== null ) { |
58 | $c = $info['tplRoot']; |
59 | } |
60 | |
61 | if ( $c instanceof Text ) { |
62 | // Update sentinel if we hit a newline. |
63 | // We want to migrate these newlines and |
64 | // everything following them out of 'li'. |
65 | if ( preg_match( '/\n\s*$/D', $c->nodeValue ) ) { |
66 | $sentinel = $c; |
67 | } |
68 | |
69 | // If we didn't hit pure whitespace, we are done! |
70 | if ( !preg_match( '/^\s*$/D', $c->nodeValue ) ) { |
71 | break; |
72 | } |
73 | } elseif ( $c instanceof Comment ) { |
74 | $sentinel = $c; |
75 | } elseif ( !WTUtils::isCategoryLink( $c ) ) { |
76 | // We are done if we hit anything but text |
77 | // or category links. |
78 | break; |
79 | } |
80 | |
81 | $c = $c->previousSibling; |
82 | } |
83 | |
84 | return $sentinel; |
85 | } |
86 | |
87 | /** |
88 | * Earlier in the parsing pipeline, we suppress all newlines |
89 | * and other whitespace before categories which causes category |
90 | * links to be swallowed into preceding paragraphs and list items. |
91 | * |
92 | * However, with wikitext like this: `*a\n\n[[Category:Foo]]`, this |
93 | * could prevent proper roundtripping (because we suppress newlines |
94 | * when serializing list items). This needs addressing because |
95 | * this pattern is extremely common (some list at the end of the page |
96 | * followed by a list of categories for the page). |
97 | * @param Element $li |
98 | * @param Env $env |
99 | * @param DTState $state |
100 | * @return bool |
101 | */ |
102 | public static function migrateTrailingCategories( |
103 | Element $li, Env $env, DTState $state |
104 | ): bool { |
105 | // * Don't bother fixing up template content when processing the full page |
106 | if ( $state->tplInfo ?? null ) { |
107 | return true; |
108 | } |
109 | |
110 | // If there is migratable content inside a list item |
111 | // (categories preceded by newlines), |
112 | // * migrate it out of the outermost list |
113 | // * and fix up the DSR of list items and list along the rightmost path. |
114 | if ( $li->nextSibling === null && DOMUtils::isList( $li->parentNode ) && |
115 | WTUtils::isCategoryLink( DOMUtils::lastNonSepChild( $li ) ) |
116 | ) { |
117 | |
118 | // Find the outermost list -- content will be moved after it |
119 | $outerList = $li->parentNode; |
120 | while ( DOMUtils::isListItem( $outerList->parentNode ) ) { |
121 | $p = $outerList->parentNode; |
122 | // Bail if we find ourself on a path that is not the rightmost path. |
123 | if ( $p->nextSibling !== null ) { |
124 | return true; |
125 | } |
126 | $outerList = $p->parentNode; |
127 | } |
128 | |
129 | // Find last migratable node |
130 | $sentinel = self::findLastMigratableNode( $li ); |
131 | if ( !$sentinel ) { |
132 | return true; |
133 | } |
134 | |
135 | // Migrate (and update DSR) |
136 | $c = $li->lastChild; |
137 | $liDsr = DOMDataUtils::getDataParsoid( $li )->dsr ?? null; |
138 | $newEndDsr = -1; // dummy to eliminate useless null checks |
139 | while ( true ) { |
140 | if ( $c instanceof Element ) { |
141 | $dsr = DOMDataUtils::getDataParsoid( $c )->dsr ?? null; |
142 | $newEndDsr = $dsr->start ?? -1; |
143 | $outerList->parentNode->insertBefore( $c, $outerList->nextSibling ); |
144 | } elseif ( $c instanceof Text ) { |
145 | if ( preg_match( '/^\s*$/D', $c->nodeValue ) ) { |
146 | $newEndDsr -= strlen( $c->nodeValue ); |
147 | $outerList->parentNode->insertBefore( $c, $outerList->nextSibling ); |
148 | } else { |
149 | // Split off the newlines into its own node and migrate it |
150 | $nls = $c->nodeValue; |
151 | $c->nodeValue = preg_replace( '/\s+$/D', '', $c->nodeValue, 1 ); |
152 | $nls = substr( $nls, strlen( $c->nodeValue ) ); |
153 | $nlNode = $c->ownerDocument->createTextNode( $nls ); |
154 | $outerList->parentNode->insertBefore( $nlNode, $outerList->nextSibling ); |
155 | $newEndDsr -= strlen( $nls ); |
156 | } |
157 | } elseif ( $c instanceof Comment ) { |
158 | $newEndDsr -= WTUtils::decodedCommentLength( $c ); |
159 | $outerList->parentNode->insertBefore( $c, $outerList->nextSibling ); |
160 | } |
161 | |
162 | if ( $c === $sentinel ) { |
163 | break; |
164 | } |
165 | |
166 | $c = $li->lastChild; |
167 | } |
168 | |
169 | // Update DSR of all listitem & list nodes till |
170 | // we hit the outermost list we started with. |
171 | $delta = null; |
172 | if ( $liDsr && $newEndDsr >= 0 ) { |
173 | $delta = $liDsr->end - $newEndDsr; |
174 | } |
175 | |
176 | // If there is no delta to adjust dsr by, we are done |
177 | if ( !$delta ) { |
178 | return true; |
179 | } |
180 | |
181 | // Fix DSR along the rightmost path to outerList |
182 | $list = null; |
183 | while ( $outerList !== $list ) { |
184 | $list = $li->parentNode; |
185 | DOMUtils::assertElt( $list ); |
186 | |
187 | $liDp = DOMDataUtils::getDataParsoid( $li ); |
188 | if ( !empty( $liDp->dsr ) ) { |
189 | $liDp->dsr->end -= $delta; |
190 | } |
191 | |
192 | $listDp = DOMDataUtils::getDataParsoid( $list ); |
193 | if ( !empty( $listDp->dsr ) ) { |
194 | $listDp->dsr->end -= $delta; |
195 | } |
196 | $li = $list->parentNode; |
197 | } |
198 | } |
199 | |
200 | return true; |
201 | } |
202 | } |