Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
3.16% |
3 / 95 |
|
0.00% |
0 / 9 |
CRAP | |
0.00% |
0 / 1 |
PWrap | |
3.16% |
3 / 95 |
|
0.00% |
0 / 9 |
1722.30 | |
0.00% |
0 / 1 |
flatten | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
6 | |||
pWrapOptionalChildren | |
0.00% |
0 / 4 |
|
0.00% |
0 / 1 |
12 | |||
pWrapOptional | |
0.00% |
0 / 9 |
|
0.00% |
0 / 1 |
56 | |||
isSplittableTag | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
mergeRuns | |
0.00% |
0 / 29 |
|
0.00% |
0 / 1 |
90 | |||
split | |
0.00% |
0 / 14 |
|
0.00% |
0 / 1 |
42 | |||
pWrapDOM | |
0.00% |
0 / 25 |
|
0.00% |
0 / 1 |
90 | |||
pWrapInsideTag | |
0.00% |
0 / 8 |
|
0.00% |
0 / 1 |
20 | |||
run | |
75.00% |
3 / 4 |
|
0.00% |
0 / 1 |
2.06 |
1 | <?php |
2 | declare( strict_types = 1 ); |
3 | |
4 | namespace Wikimedia\Parsoid\Wt2Html\DOM\Processors; |
5 | |
6 | use Wikimedia\Assert\UnreachableException; |
7 | use Wikimedia\Parsoid\Config\Env; |
8 | use Wikimedia\Parsoid\DOM\Comment; |
9 | use Wikimedia\Parsoid\DOM\DocumentFragment; |
10 | use Wikimedia\Parsoid\DOM\Element; |
11 | use Wikimedia\Parsoid\DOM\Node; |
12 | use Wikimedia\Parsoid\DOM\Text; |
13 | use Wikimedia\Parsoid\NodeData\TempData; |
14 | use Wikimedia\Parsoid\Utils\DOMCompat; |
15 | use Wikimedia\Parsoid\Utils\DOMDataUtils; |
16 | use Wikimedia\Parsoid\Utils\DOMUtils; |
17 | use Wikimedia\Parsoid\Wt2Html\Wt2HtmlDOMProcessor; |
18 | |
19 | class PWrap implements Wt2HtmlDOMProcessor { |
20 | |
21 | /** |
22 | * Flattens an array with other arrays for elements into |
23 | * an array without nested arrays. |
24 | * |
25 | * @param array[] $a |
26 | * @return array |
27 | */ |
28 | private function flatten( array $a ): array { |
29 | return $a === [] ? [] : array_merge( ...$a ); |
30 | } |
31 | |
32 | private static function pWrapOptionalChildren( Element $elt ): bool { |
33 | foreach ( $elt->childNodes as $c ) { |
34 | if ( !self::pWrapOptional( $c ) ) { |
35 | return false; |
36 | } |
37 | } |
38 | return true; |
39 | } |
40 | |
41 | /** |
42 | * Is a P-wrapper optional for this node? |
43 | * |
44 | * The following nodes do not need p wrappers of their own: |
45 | * - whitespace nodes |
46 | * - comment nodes |
47 | * - HTML metadata tags generated by wikitext (not always rendering-transparent) |
48 | * and these metatags don't need p-wrappers of their own. Both Remex and Parsoid |
49 | * have identical p-wrapping behavior on these tags. This is a superset of |
50 | * \\MediaWiki\Tidy\RemexCompatMunger::$metadataElements. |
51 | * - parsoid-added span wrappers around pwrap-optional nodes |
52 | * |
53 | * @param Node $n |
54 | * @return bool |
55 | */ |
56 | public static function pWrapOptional( Node $n ): bool { |
57 | return $n instanceof Comment || |
58 | ( $n instanceof Text && preg_match( '/^\s*$/D', $n->nodeValue ) ) || |
59 | ( |
60 | $n instanceof Element && |
61 | ( DOMUtils::isMetaDataTag( $n ) || ( |
62 | DOMDataUtils::getDataParsoid( $n )->getTempFlag( TempData::WRAPPER ) && |
63 | self::pWrapOptionalChildren( $n ) |
64 | ) ) |
65 | ); |
66 | } |
67 | |
68 | /** |
69 | * Can we split the subtree rooted at $n into multiple adjacent |
70 | * subtrees rooted in a clone of $n where each of those subtrees |
71 | * get a contiguous subset of $n's children? |
72 | * |
73 | * This is probably equivalent to asking if this node supports the |
74 | * adoption agency algorithm in the HTML5 spec. |
75 | * |
76 | * @param Node $n |
77 | * @return bool |
78 | */ |
79 | private function isSplittableTag( Node $n ): bool { |
80 | // Seems safe to split span, sub, sup, cite tags |
81 | // |
82 | // However, if we want to mimic Parsoid and HTML5 spec |
83 | // precisely, we should only use isFormattingElt(n) |
84 | return DOMUtils::isFormattingElt( $n ); |
85 | } |
86 | |
87 | /** |
88 | * Merge a contiguous run of split subtrees that have identical pwrap properties |
89 | * |
90 | * @param Element $n |
91 | * @param array $a |
92 | * @return array |
93 | */ |
94 | private function mergeRuns( Element $n, array $a ): array { |
95 | $ret = []; |
96 | // This flag should be transferred to the rightmost |
97 | // clone of this node in the loop below. |
98 | $ndp = DOMDataUtils::getDataParsoid( $n ); |
99 | $origAIEnd = $ndp->autoInsertedEnd ?? null; |
100 | $origEndTSR = $ndp->tmp->endTSR ?? null; |
101 | $i = -1; |
102 | foreach ( $a as $v ) { |
103 | if ( $i < 0 ) { |
104 | $ret[] = [ 'pwrap' => $v['pwrap'], 'node' => $n ]; |
105 | $i++; |
106 | } elseif ( $ret[$i]['pwrap'] === null ) { |
107 | // @phan-suppress-previous-line PhanTypeInvalidDimOffset |
108 | $ret[$i]['pwrap'] = $v['pwrap']; |
109 | } elseif ( $ret[$i]['pwrap'] !== $v['pwrap'] && $v['pwrap'] !== null ) { |
110 | // @phan-suppress-previous-line PhanTypeInvalidDimOffset |
111 | // @phan-suppress-next-line PhanTypeInvalidDimOffset |
112 | $dp = DOMDataUtils::getDataParsoid( $ret[$i]['node'] ); |
113 | $dp->autoInsertedEnd = true; |
114 | unset( $dp->tmp->endTSR ); |
115 | $cnode = DOMDataUtils::cloneNode( $n, false ); |
116 | $ret[] = [ 'pwrap' => $v['pwrap'], 'node' => $cnode ]; |
117 | $i++; |
118 | DOMDataUtils::getDataParsoid( $ret[$i]['node'] )->autoInsertedStart = true; |
119 | } |
120 | $ret[$i]['node']->appendChild( $v['node'] ); |
121 | } |
122 | if ( $i >= 0 ) { |
123 | $dp = DOMDataUtils::getDataParsoid( $ret[$i]['node'] ); |
124 | if ( $origAIEnd ) { |
125 | $dp->autoInsertedEnd = true; |
126 | unset( $dp->tmp->endTSR ); |
127 | } else { |
128 | unset( $dp->autoInsertedEnd ); |
129 | if ( $origEndTSR ) { |
130 | $dp->getTemp()->endTSR = $origEndTSR; |
131 | } |
132 | } |
133 | } |
134 | |
135 | return $ret; |
136 | } |
137 | |
138 | /** |
139 | * Implements the split operation described in the algorithm below. |
140 | * |
141 | * The values of 'pwrap' here bear out in pWrapDOM below. |
142 | * |
143 | * true: opens a paragaph or continues adding to a paragraph |
144 | * false: closes a paragraph |
145 | * null: agnostic, doesn't open or close a paragraph |
146 | * |
147 | * @param Node $n |
148 | * @return array |
149 | */ |
150 | private function split( Node $n ): array { |
151 | if ( $this->pWrapOptional( $n ) ) { |
152 | // Set 'pwrap' to null so p-wrapping doesn't break |
153 | // a run of wrappable nodes because of these. |
154 | return [ [ 'pwrap' => null, 'node' => $n ] ]; |
155 | } elseif ( $n instanceof Text ) { |
156 | return [ [ 'pwrap' => true, 'node' => $n ] ]; |
157 | } elseif ( !$this->isSplittableTag( $n ) || count( $n->childNodes ) === 0 ) { |
158 | // block tag OR non-splittable inline tag |
159 | return [ |
160 | [ 'pwrap' => !DOMUtils::hasBlockTag( $n ), 'node' => $n ] |
161 | ]; |
162 | } else { |
163 | DOMUtils::assertElt( $n ); |
164 | // splittable inline tag |
165 | // split for each child and merge runs |
166 | $children = $n->childNodes; |
167 | $splits = []; |
168 | foreach ( $children as $child ) { |
169 | $splits[] = $this->split( $child ); |
170 | } |
171 | return $this->mergeRuns( $n, $this->flatten( $splits ) ); |
172 | } |
173 | } |
174 | |
175 | /** |
176 | * Wrap children of '$root' with paragraph tags |
177 | * so that the final output has the following properties: |
178 | * |
179 | * 1. A paragraph will have at least one non-whitespace text |
180 | * node or an non-block element node in its subtree. |
181 | * |
182 | * 2. Two paragraph nodes aren't siblings of each other. |
183 | * |
184 | * 3. If a child of $root is not a paragraph node, it is one of: |
185 | * - a white-space only text node |
186 | * - a comment node |
187 | * - a block element |
188 | * - a splittable inline element which has some block node |
189 | * on *all* paths from it to all leaves in its subtree. |
190 | * - a non-splittable inline element which has some block node |
191 | * on *some* path from it to a leaf in its subtree. |
192 | * |
193 | * This output is generated with the following algorithm |
194 | * |
195 | * 1. Block nodes are skipped over |
196 | * 2. Non-splittable inline nodes that have a block tag |
197 | * in its subtree are skipped over. |
198 | * 3. A splittable inline node, I, that has at least one block tag |
199 | * in its subtree is split into multiple tree such that |
200 | * - each new tree is $rooted in I |
201 | * - the trees alternate between two kinds |
202 | * (a) it has no block node inside |
203 | * => pwrap is true |
204 | * (b) all paths from I to its leaves have some block node inside |
205 | * => pwrap is false |
206 | * 4. A paragraph tag is wrapped around adjacent runs of comment nodes, |
207 | * text nodes, and an inline node that has no block node embedded inside. |
208 | * This paragraph tag does not start with nodes for which p-wrapping is |
209 | * optional (as determined by the pWrapOptional helper). The current |
210 | * algorithm also ensures that it doesn't end with one of those either |
211 | * (if it impacts template / param / annotation range building). |
212 | * |
213 | * @param Element|DocumentFragment $root |
214 | */ |
215 | private function pWrapDOM( Node $root ) { |
216 | $state = new PWrapState(); |
217 | $c = $root->firstChild; |
218 | while ( $c ) { |
219 | $next = $c->nextSibling; |
220 | if ( DOMUtils::isRemexBlockNode( $c ) ) { |
221 | $state->reset(); |
222 | } else { |
223 | $vs = $this->split( $c ); |
224 | foreach ( $vs as $v ) { |
225 | $n = $v['node']; |
226 | if ( $v['pwrap'] === false ) { |
227 | $state->reset(); |
228 | $root->insertBefore( $n, $next ); |
229 | } elseif ( $v['pwrap'] === null ) { |
230 | if ( $state->p ) { |
231 | $state->p->appendChild( $n ); |
232 | $state->processOptionalNode( $n ); |
233 | } else { |
234 | $root->insertBefore( $n, $next ); |
235 | } |
236 | } elseif ( $v['pwrap'] === true ) { |
237 | if ( !$state->p ) { |
238 | $state->p = $root->ownerDocument->createElement( 'p' ); |
239 | $root->insertBefore( $state->p, $next ); |
240 | } |
241 | $state->p->appendChild( $n ); |
242 | } else { |
243 | throw new UnreachableException( 'Unexpected value for pwrap.' ); |
244 | } |
245 | } |
246 | } |
247 | $c = $next; |
248 | } |
249 | $state->reset(); |
250 | } |
251 | |
252 | /** |
253 | * This function walks the DOM tree $rooted at '$root' |
254 | * and uses pWrapDOM to add appropriate paragraph wrapper |
255 | * tags around children of nodes with tag name '$tagName'. |
256 | * |
257 | * @param Element|DocumentFragment $root |
258 | * @param string $tagName |
259 | */ |
260 | private function pWrapInsideTag( Node $root, string $tagName ) { |
261 | $c = $root->firstChild; |
262 | while ( $c ) { |
263 | $next = $c->nextSibling; |
264 | if ( $c instanceof Element ) { |
265 | if ( DOMCompat::nodeName( $c ) === $tagName ) { |
266 | $this->pWrapDOM( $c ); |
267 | } else { |
268 | $this->pWrapInsideTag( $c, $tagName ); |
269 | } |
270 | } |
271 | $c = $next; |
272 | } |
273 | } |
274 | |
275 | /** |
276 | * Wrap children of <body> as well as children of |
277 | * <blockquote> found anywhere in the DOM tree. |
278 | * |
279 | * @inheritDoc |
280 | */ |
281 | public function run( |
282 | Env $env, Node $root, array $options = [], bool $atTopLevel = false |
283 | ): void { |
284 | // No p-wrapping in an inline context! |
285 | if ( !empty( $options['inlineContext'] ) ) { |
286 | return; |
287 | } |
288 | |
289 | '@phan-var Element|DocumentFragment $root'; // @var Element|DocumentFragment $root |
290 | $this->pWrapDOM( $root ); |
291 | $this->pWrapInsideTag( $root, 'blockquote' ); |
292 | } |
293 | } |