Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
| Total | |
3.09% |
3 / 97 |
|
0.00% |
0 / 9 |
CRAP | |
0.00% |
0 / 1 |
| PWrap | |
3.09% |
3 / 97 |
|
0.00% |
0 / 9 |
1805.87 | |
0.00% |
0 / 1 |
| flatten | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
6 | |||
| pWrapOptionalChildren | |
0.00% |
0 / 4 |
|
0.00% |
0 / 1 |
12 | |||
| pWrapOptional | |
0.00% |
0 / 11 |
|
0.00% |
0 / 1 |
72 | |||
| isSplittableTag | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
| mergeRuns | |
0.00% |
0 / 29 |
|
0.00% |
0 / 1 |
90 | |||
| split | |
0.00% |
0 / 14 |
|
0.00% |
0 / 1 |
42 | |||
| pWrapDOM | |
0.00% |
0 / 25 |
|
0.00% |
0 / 1 |
90 | |||
| pWrapInsideTag | |
0.00% |
0 / 8 |
|
0.00% |
0 / 1 |
20 | |||
| run | |
75.00% |
3 / 4 |
|
0.00% |
0 / 1 |
2.06 | |||
| 1 | <?php |
| 2 | declare( strict_types = 1 ); |
| 3 | |
| 4 | namespace Wikimedia\Parsoid\Wt2Html\DOM\Processors; |
| 5 | |
| 6 | use Wikimedia\Assert\UnreachableException; |
| 7 | use Wikimedia\Parsoid\Config\Env; |
| 8 | use Wikimedia\Parsoid\DOM\Comment; |
| 9 | use Wikimedia\Parsoid\DOM\DocumentFragment; |
| 10 | use Wikimedia\Parsoid\DOM\Element; |
| 11 | use Wikimedia\Parsoid\DOM\Node; |
| 12 | use Wikimedia\Parsoid\DOM\Text; |
| 13 | use Wikimedia\Parsoid\NodeData\TempData; |
| 14 | use Wikimedia\Parsoid\Utils\DOMCompat; |
| 15 | use Wikimedia\Parsoid\Utils\DOMDataUtils; |
| 16 | use Wikimedia\Parsoid\Utils\DOMUtils; |
| 17 | use Wikimedia\Parsoid\Wt2Html\Wt2HtmlDOMProcessor; |
| 18 | |
| 19 | class PWrap implements Wt2HtmlDOMProcessor { |
| 20 | |
| 21 | /** |
| 22 | * Flattens an array with other arrays for elements into |
| 23 | * an array without nested arrays. |
| 24 | * |
| 25 | * @param array[] $a |
| 26 | * @return array |
| 27 | */ |
| 28 | private function flatten( array $a ): array { |
| 29 | return $a === [] ? [] : array_merge( ...$a ); |
| 30 | } |
| 31 | |
| 32 | private static function pWrapOptionalChildren( Env $env, Node $n ): bool { |
| 33 | foreach ( $n->childNodes as $c ) { |
| 34 | if ( !self::pWrapOptional( $env, $c ) ) { |
| 35 | return false; |
| 36 | } |
| 37 | } |
| 38 | return true; |
| 39 | } |
| 40 | |
| 41 | /** |
| 42 | * Is a P-wrapper optional for this node? |
| 43 | * |
| 44 | * The following nodes do not need p wrappers of their own: |
| 45 | * - whitespace nodes |
| 46 | * - comment nodes |
| 47 | * - HTML metadata tags generated by wikitext (not always rendering-transparent) |
| 48 | * and these metatags don't need p-wrappers of their own. Both Remex and Parsoid |
| 49 | * have identical p-wrapping behavior on these tags. This is a superset of |
| 50 | * \\MediaWiki\Tidy\RemexCompatMunger::$metadataElements. |
| 51 | * - parsoid-added span wrappers around pwrap-optional nodes |
| 52 | * |
| 53 | * @param Env $env |
| 54 | * @param Node $n |
| 55 | * @return bool |
| 56 | */ |
| 57 | public static function pWrapOptional( Env $env, Node $n ): bool { |
| 58 | if ( |
| 59 | $n instanceof Element && |
| 60 | DOMDataUtils::getDataParsoid( $n )->getTempFlag( TempData::WRAPPER ) |
| 61 | ) { |
| 62 | if ( DOMUtils::hasTypeOf( $n, 'mw:DOMFragment' ) ) { |
| 63 | $domFragment = $env->getDOMFragment( |
| 64 | DOMDataUtils::getDataParsoid( $n )->html |
| 65 | ); |
| 66 | return self::pWrapOptionalChildren( $env, $domFragment ); |
| 67 | } else { |
| 68 | return self::pWrapOptionalChildren( $env, $n ); |
| 69 | } |
| 70 | } |
| 71 | return $n instanceof Comment || |
| 72 | ( $n instanceof Text && preg_match( '/^\s*$/D', $n->nodeValue ) ) || |
| 73 | ( $n instanceof Element && DOMUtils::isMetaDataTag( $n ) ); |
| 74 | } |
| 75 | |
| 76 | /** |
| 77 | * Can we split the subtree rooted at $n into multiple adjacent |
| 78 | * subtrees rooted in a clone of $n where each of those subtrees |
| 79 | * get a contiguous subset of $n's children? |
| 80 | * |
| 81 | * This is probably equivalent to asking if this node supports the |
| 82 | * adoption agency algorithm in the HTML5 spec. |
| 83 | * |
| 84 | * @param Node $n |
| 85 | * @return bool |
| 86 | */ |
| 87 | private function isSplittableTag( Node $n ): bool { |
| 88 | // Seems safe to split span, sub, sup, cite tags |
| 89 | // |
| 90 | // However, if we want to mimic Parsoid and HTML5 spec |
| 91 | // precisely, we should only use isFormattingElt(n) |
| 92 | return DOMUtils::isFormattingElt( $n ); |
| 93 | } |
| 94 | |
| 95 | /** |
| 96 | * Merge a contiguous run of split subtrees that have identical pwrap properties |
| 97 | * |
| 98 | * @param Element $n |
| 99 | * @param array $a |
| 100 | * @return array |
| 101 | */ |
| 102 | private function mergeRuns( Element $n, array $a ): array { |
| 103 | $ret = []; |
| 104 | // This flag should be transferred to the rightmost |
| 105 | // clone of this node in the loop below. |
| 106 | $ndp = DOMDataUtils::getDataParsoid( $n ); |
| 107 | $origAIEnd = $ndp->autoInsertedEnd ?? null; |
| 108 | $origEndTSR = $ndp->tmp->endTSR ?? null; |
| 109 | $i = -1; |
| 110 | foreach ( $a as $v ) { |
| 111 | if ( $i < 0 ) { |
| 112 | $ret[] = [ 'pwrap' => $v['pwrap'], 'node' => $n ]; |
| 113 | $i++; |
| 114 | } elseif ( $ret[$i]['pwrap'] === null ) { |
| 115 | // @phan-suppress-previous-line PhanTypeInvalidDimOffset |
| 116 | $ret[$i]['pwrap'] = $v['pwrap']; |
| 117 | } elseif ( $ret[$i]['pwrap'] !== $v['pwrap'] && $v['pwrap'] !== null ) { |
| 118 | // @phan-suppress-previous-line PhanTypeInvalidDimOffset |
| 119 | // @phan-suppress-next-line PhanTypeInvalidDimOffset |
| 120 | $dp = DOMDataUtils::getDataParsoid( $ret[$i]['node'] ); |
| 121 | $dp->autoInsertedEnd = true; |
| 122 | unset( $dp->tmp->endTSR ); |
| 123 | $cnode = DOMDataUtils::cloneNode( $n, false ); |
| 124 | $ret[] = [ 'pwrap' => $v['pwrap'], 'node' => $cnode ]; |
| 125 | $i++; |
| 126 | DOMDataUtils::getDataParsoid( $ret[$i]['node'] )->autoInsertedStart = true; |
| 127 | } |
| 128 | $ret[$i]['node']->appendChild( $v['node'] ); |
| 129 | } |
| 130 | if ( $i >= 0 ) { |
| 131 | $dp = DOMDataUtils::getDataParsoid( $ret[$i]['node'] ); |
| 132 | if ( $origAIEnd ) { |
| 133 | $dp->autoInsertedEnd = true; |
| 134 | unset( $dp->tmp->endTSR ); |
| 135 | } else { |
| 136 | unset( $dp->autoInsertedEnd ); |
| 137 | if ( $origEndTSR ) { |
| 138 | $dp->getTemp()->endTSR = $origEndTSR; |
| 139 | } |
| 140 | } |
| 141 | } |
| 142 | |
| 143 | return $ret; |
| 144 | } |
| 145 | |
| 146 | /** |
| 147 | * Implements the split operation described in the algorithm below. |
| 148 | * |
| 149 | * The values of 'pwrap' here bear out in pWrapDOM below. |
| 150 | * |
| 151 | * true: opens a paragaph or continues adding to a paragraph |
| 152 | * false: closes a paragraph |
| 153 | * null: agnostic, doesn't open or close a paragraph |
| 154 | * |
| 155 | * @param Env $env |
| 156 | * @param Node $n |
| 157 | * @return array |
| 158 | */ |
| 159 | private function split( Env $env, Node $n ): array { |
| 160 | if ( $this->pWrapOptional( $env, $n ) ) { |
| 161 | // Set 'pwrap' to null so p-wrapping doesn't break |
| 162 | // a run of wrappable nodes because of these. |
| 163 | return [ [ 'pwrap' => null, 'node' => $n ] ]; |
| 164 | } elseif ( $n instanceof Text ) { |
| 165 | return [ [ 'pwrap' => true, 'node' => $n ] ]; |
| 166 | } elseif ( !$this->isSplittableTag( $n ) || count( $n->childNodes ) === 0 ) { |
| 167 | // block tag OR non-splittable inline tag |
| 168 | return [ |
| 169 | [ 'pwrap' => !DOMUtils::hasBlockTag( $n ), 'node' => $n ] |
| 170 | ]; |
| 171 | } else { |
| 172 | DOMUtils::assertElt( $n ); |
| 173 | // splittable inline tag |
| 174 | // split for each child and merge runs |
| 175 | $children = $n->childNodes; |
| 176 | $splits = []; |
| 177 | foreach ( $children as $child ) { |
| 178 | $splits[] = $this->split( $env, $child ); |
| 179 | } |
| 180 | return $this->mergeRuns( $n, $this->flatten( $splits ) ); |
| 181 | } |
| 182 | } |
| 183 | |
| 184 | /** |
| 185 | * Wrap children of '$root' with paragraph tags |
| 186 | * so that the final output has the following properties: |
| 187 | * |
| 188 | * 1. A paragraph will have at least one non-whitespace text |
| 189 | * node or an non-block element node in its subtree. |
| 190 | * |
| 191 | * 2. Two paragraph nodes aren't siblings of each other. |
| 192 | * |
| 193 | * 3. If a child of $root is not a paragraph node, it is one of: |
| 194 | * - a white-space only text node |
| 195 | * - a comment node |
| 196 | * - a block element |
| 197 | * - a splittable inline element which has some block node |
| 198 | * on *all* paths from it to all leaves in its subtree. |
| 199 | * - a non-splittable inline element which has some block node |
| 200 | * on *some* path from it to a leaf in its subtree. |
| 201 | * |
| 202 | * This output is generated with the following algorithm |
| 203 | * |
| 204 | * 1. Block nodes are skipped over |
| 205 | * 2. Non-splittable inline nodes that have a block tag |
| 206 | * in its subtree are skipped over. |
| 207 | * 3. A splittable inline node, I, that has at least one block tag |
| 208 | * in its subtree is split into multiple tree such that |
| 209 | * - each new tree is $rooted in I |
| 210 | * - the trees alternate between two kinds |
| 211 | * (a) it has no block node inside |
| 212 | * => pwrap is true |
| 213 | * (b) all paths from I to its leaves have some block node inside |
| 214 | * => pwrap is false |
| 215 | * 4. A paragraph tag is wrapped around adjacent runs of comment nodes, |
| 216 | * text nodes, and an inline node that has no block node embedded inside. |
| 217 | * This paragraph tag does not start with nodes for which p-wrapping is |
| 218 | * optional (as determined by the pWrapOptional helper). The current |
| 219 | * algorithm also ensures that it doesn't end with one of those either |
| 220 | * (if it impacts template / param / annotation range building). |
| 221 | * |
| 222 | * @param Env $env |
| 223 | * @param Element|DocumentFragment $root |
| 224 | */ |
| 225 | private function pWrapDOM( Env $env, Node $root ) { |
| 226 | $state = new PWrapState( $env ); |
| 227 | $c = $root->firstChild; |
| 228 | while ( $c ) { |
| 229 | $next = $c->nextSibling; |
| 230 | if ( DOMUtils::isRemexBlockNode( $c ) ) { |
| 231 | $state->reset(); |
| 232 | } else { |
| 233 | $vs = $this->split( $env, $c ); |
| 234 | foreach ( $vs as $v ) { |
| 235 | $n = $v['node']; |
| 236 | if ( $v['pwrap'] === false ) { |
| 237 | $state->reset(); |
| 238 | $root->insertBefore( $n, $next ); |
| 239 | } elseif ( $v['pwrap'] === null ) { |
| 240 | if ( $state->p ) { |
| 241 | $state->p->appendChild( $n ); |
| 242 | $state->processOptionalNode( $n ); |
| 243 | } else { |
| 244 | $root->insertBefore( $n, $next ); |
| 245 | } |
| 246 | } elseif ( $v['pwrap'] === true ) { |
| 247 | if ( !$state->p ) { |
| 248 | $state->p = $root->ownerDocument->createElement( 'p' ); |
| 249 | $root->insertBefore( $state->p, $next ); |
| 250 | } |
| 251 | $state->p->appendChild( $n ); |
| 252 | } else { |
| 253 | throw new UnreachableException( 'Unexpected value for pwrap.' ); |
| 254 | } |
| 255 | } |
| 256 | } |
| 257 | $c = $next; |
| 258 | } |
| 259 | $state->reset(); |
| 260 | } |
| 261 | |
| 262 | /** |
| 263 | * This function walks the DOM tree $rooted at '$root' |
| 264 | * and uses pWrapDOM to add appropriate paragraph wrapper |
| 265 | * tags around children of nodes with tag name '$tagName'. |
| 266 | * |
| 267 | * @param Env $env |
| 268 | * @param Element|DocumentFragment $root |
| 269 | * @param string $tagName |
| 270 | */ |
| 271 | private function pWrapInsideTag( Env $env, Node $root, string $tagName ) { |
| 272 | $c = $root->firstChild; |
| 273 | while ( $c ) { |
| 274 | $next = $c->nextSibling; |
| 275 | if ( $c instanceof Element ) { |
| 276 | if ( DOMCompat::nodeName( $c ) === $tagName ) { |
| 277 | $this->pWrapDOM( $env, $c ); |
| 278 | } else { |
| 279 | $this->pWrapInsideTag( $env, $c, $tagName ); |
| 280 | } |
| 281 | } |
| 282 | $c = $next; |
| 283 | } |
| 284 | } |
| 285 | |
| 286 | /** |
| 287 | * Wrap children of <body> as well as children of |
| 288 | * <blockquote> found anywhere in the DOM tree. |
| 289 | * |
| 290 | * @inheritDoc |
| 291 | */ |
| 292 | public function run( |
| 293 | Env $env, Node $root, array $options = [], bool $atTopLevel = false |
| 294 | ): void { |
| 295 | // No p-wrapping in an inline context! |
| 296 | if ( !empty( $options['inlineContext'] ) ) { |
| 297 | return; |
| 298 | } |
| 299 | |
| 300 | '@phan-var Element|DocumentFragment $root'; // @var Element|DocumentFragment $root |
| 301 | $this->pWrapDOM( $env, $root ); |
| 302 | $this->pWrapInsideTag( $env, $root, 'blockquote' ); |
| 303 | } |
| 304 | } |