Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
0.00% |
0 / 83 |
|
0.00% |
0 / 3 |
CRAP | |
0.00% |
0 / 1 |
LiFixups | |
0.00% |
0 / 83 |
|
0.00% |
0 / 3 |
1260 | |
0.00% |
0 / 1 |
getMigrationInfo | |
0.00% |
0 / 9 |
|
0.00% |
0 / 1 |
42 | |||
findLastMigratableNode | |
0.00% |
0 / 20 |
|
0.00% |
0 / 1 |
90 | |||
migrateTrailingCategories | |
0.00% |
0 / 54 |
|
0.00% |
0 / 1 |
420 |
1 | <?php |
2 | declare( strict_types = 1 ); |
3 | |
4 | namespace Wikimedia\Parsoid\Wt2Html\DOM\Handlers; |
5 | |
6 | use Wikimedia\Assert\Assert; |
7 | use Wikimedia\Parsoid\DOM\Comment; |
8 | use Wikimedia\Parsoid\DOM\Element; |
9 | use Wikimedia\Parsoid\DOM\Node; |
10 | use Wikimedia\Parsoid\DOM\Text; |
11 | use Wikimedia\Parsoid\Utils\DiffDOMUtils; |
12 | use Wikimedia\Parsoid\Utils\DOMCompat; |
13 | use Wikimedia\Parsoid\Utils\DOMDataUtils; |
14 | use Wikimedia\Parsoid\Utils\DOMUtils; |
15 | use Wikimedia\Parsoid\Utils\DTState; |
16 | use Wikimedia\Parsoid\Utils\WTUtils; |
17 | |
18 | class LiFixups { |
19 | |
20 | private static function getMigrationInfo( Node $c ): array { |
21 | $tplRoot = WTUtils::findFirstEncapsulationWrapperNode( $c ); |
22 | if ( $tplRoot !== null ) { |
23 | // Check if everything between tplRoot and c is migratable. |
24 | $prev = $tplRoot->previousSibling; |
25 | while ( $c !== $prev ) { |
26 | if ( !WTUtils::isCategoryLink( $c ) && |
27 | !( DOMCompat::nodeName( $c ) === 'span' && preg_match( '/^\s*$/D', $c->textContent ) ) |
28 | ) { |
29 | return [ 'tplRoot' => $tplRoot, 'migratable' => false ]; |
30 | } |
31 | |
32 | $c = $c->previousSibling; |
33 | } |
34 | } |
35 | |
36 | return [ 'tplRoot' => $tplRoot, 'migratable' => true ]; |
37 | } |
38 | |
39 | private static function findLastMigratableNode( Node $li ): ?Node { |
40 | $sentinel = null; |
41 | $c = DiffDOMUtils::lastNonSepChild( $li ); |
42 | // c is known to be a category link. |
43 | // fail fast in parser tests if something changes. |
44 | Assert::invariant( WTUtils::isCategoryLink( $c ), 'c is known to be a category link' ); |
45 | while ( $c ) { |
46 | // Handle template units first |
47 | $info = self::getMigrationInfo( $c ); |
48 | if ( !$info['migratable'] ) { |
49 | break; |
50 | } elseif ( $info['tplRoot'] !== null ) { |
51 | $c = $info['tplRoot']; |
52 | } |
53 | |
54 | if ( $c instanceof Text ) { |
55 | // Update sentinel if we hit a newline. |
56 | // We want to migrate these newlines and |
57 | // everything following them out of 'li'. |
58 | if ( preg_match( '/\n\s*$/D', $c->nodeValue ) ) { |
59 | $sentinel = $c; |
60 | } |
61 | |
62 | // If we didn't hit pure whitespace, we are done! |
63 | if ( !preg_match( '/^\s*$/D', $c->nodeValue ) ) { |
64 | break; |
65 | } |
66 | } elseif ( $c instanceof Comment ) { |
67 | $sentinel = $c; |
68 | } elseif ( !WTUtils::isCategoryLink( $c ) ) { |
69 | // We are done if we hit anything but text |
70 | // or category links. |
71 | break; |
72 | } |
73 | |
74 | $c = $c->previousSibling; |
75 | } |
76 | |
77 | return $sentinel; |
78 | } |
79 | |
80 | /** |
81 | * Earlier in the parsing pipeline, we suppress all newlines |
82 | * and other whitespace before categories which causes category |
83 | * links to be swallowed into preceding paragraphs and list items. |
84 | * |
85 | * However, with wikitext like this: `*a\n\n[[Category:Foo]]`, this |
86 | * could prevent proper roundtripping (because we suppress newlines |
87 | * when serializing list items). This needs addressing because |
88 | * this pattern is extremely common (some list at the end of the page |
89 | * followed by a list of categories for the page). |
90 | * @param Element $li |
91 | * @param DTState $state |
92 | * @return bool |
93 | */ |
94 | public static function migrateTrailingCategories( Element $li, DTState $state ): bool { |
95 | // * Don't bother fixing up template content when processing the full page |
96 | if ( $state->tplInfo ?? null ) { |
97 | return true; |
98 | } |
99 | |
100 | // If there is migratable content inside a list item |
101 | // (categories preceded by newlines), |
102 | // * migrate it out of the outermost list |
103 | // * and fix up the DSR of list items and list along the rightmost path. |
104 | if ( $li->nextSibling === null && DOMUtils::isList( $li->parentNode ) && |
105 | WTUtils::isCategoryLink( DiffDOMUtils::lastNonSepChild( $li ) ) |
106 | ) { |
107 | |
108 | // Find the outermost list -- content will be moved after it |
109 | $outerList = $li->parentNode; |
110 | while ( DOMUtils::isListItem( $outerList->parentNode ) ) { |
111 | $p = $outerList->parentNode; |
112 | // Bail if we find ourself on a path that is not the rightmost path. |
113 | if ( $p->nextSibling !== null ) { |
114 | return true; |
115 | } |
116 | $outerList = $p->parentNode; |
117 | } |
118 | |
119 | // Find last migratable node |
120 | $sentinel = self::findLastMigratableNode( $li ); |
121 | if ( !$sentinel ) { |
122 | return true; |
123 | } |
124 | |
125 | // Migrate (and update DSR) |
126 | $c = $li->lastChild; |
127 | $liDsr = DOMDataUtils::getDataParsoid( $li )->dsr ?? null; |
128 | $newEndDsr = -1; // dummy to eliminate useless null checks |
129 | while ( true ) { |
130 | if ( $c instanceof Element ) { |
131 | $dsr = DOMDataUtils::getDataParsoid( $c )->dsr ?? null; |
132 | $newEndDsr = $dsr->start ?? -1; |
133 | $outerList->parentNode->insertBefore( $c, $outerList->nextSibling ); |
134 | } elseif ( $c instanceof Text ) { |
135 | if ( preg_match( '/^\s*$/D', $c->nodeValue ) ) { |
136 | $newEndDsr -= strlen( $c->nodeValue ); |
137 | $outerList->parentNode->insertBefore( $c, $outerList->nextSibling ); |
138 | } else { |
139 | // Split off the newlines into its own node and migrate it |
140 | $nls = $c->nodeValue; |
141 | $c->nodeValue = preg_replace( '/\s+$/D', '', $c->nodeValue, 1 ); |
142 | $nls = substr( $nls, strlen( $c->nodeValue ) ); |
143 | $nlNode = $c->ownerDocument->createTextNode( $nls ); |
144 | $outerList->parentNode->insertBefore( $nlNode, $outerList->nextSibling ); |
145 | $newEndDsr -= strlen( $nls ); |
146 | } |
147 | } elseif ( $c instanceof Comment ) { |
148 | $newEndDsr -= WTUtils::decodedCommentLength( $c ); |
149 | $outerList->parentNode->insertBefore( $c, $outerList->nextSibling ); |
150 | } |
151 | |
152 | if ( $c === $sentinel ) { |
153 | break; |
154 | } |
155 | |
156 | $c = $li->lastChild; |
157 | } |
158 | |
159 | // Update DSR of all listitem & list nodes till |
160 | // we hit the outermost list we started with. |
161 | $delta = null; |
162 | if ( $liDsr && $newEndDsr >= 0 ) { |
163 | $delta = $liDsr->end - $newEndDsr; |
164 | } |
165 | |
166 | // If there is no delta to adjust dsr by, we are done |
167 | if ( !$delta ) { |
168 | return true; |
169 | } |
170 | |
171 | // Fix DSR along the rightmost path to outerList |
172 | $list = null; |
173 | while ( $outerList !== $list ) { |
174 | $list = $li->parentNode; |
175 | DOMUtils::assertElt( $list ); |
176 | |
177 | $liDp = DOMDataUtils::getDataParsoid( $li ); |
178 | if ( !empty( $liDp->dsr ) ) { |
179 | $liDp->dsr->end -= $delta; |
180 | } |
181 | |
182 | $listDp = DOMDataUtils::getDataParsoid( $list ); |
183 | if ( !empty( $listDp->dsr ) ) { |
184 | $listDp->dsr->end -= $delta; |
185 | } |
186 | $li = $list->parentNode; |
187 | } |
188 | } |
189 | |
190 | return true; |
191 | } |
192 | } |