Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
| Total | |
0.00% |
0 / 96 |
|
0.00% |
0 / 6 |
CRAP | |
0.00% |
0 / 1 |
| MigrateTrailingNLs | |
0.00% |
0 / 96 |
|
0.00% |
0 / 6 |
2162 | |
0.00% |
0 / 1 |
| nodeEndsLineInWT | |
0.00% |
0 / 5 |
|
0.00% |
0 / 1 |
6 | |||
| getTableParent | |
0.00% |
0 / 11 |
|
0.00% |
0 / 1 |
30 | |||
| canMigrateNLOutOfNode | |
0.00% |
0 / 15 |
|
0.00% |
0 / 1 |
156 | |||
| hasZeroWidthWT | |
0.00% |
0 / 7 |
|
0.00% |
0 / 1 |
42 | |||
| doMigrateTrailingNLs | |
0.00% |
0 / 57 |
|
0.00% |
0 / 1 |
420 | |||
| run | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
| 1 | <?php |
| 2 | declare( strict_types = 1 ); |
| 3 | |
| 4 | namespace Wikimedia\Parsoid\Wt2Html\DOM\Processors; |
| 5 | |
| 6 | use Wikimedia\Parsoid\Config\Env; |
| 7 | use Wikimedia\Parsoid\DOM\Comment; |
| 8 | use Wikimedia\Parsoid\DOM\DocumentFragment; |
| 9 | use Wikimedia\Parsoid\DOM\Element; |
| 10 | use Wikimedia\Parsoid\DOM\Node; |
| 11 | use Wikimedia\Parsoid\DOM\Text; |
| 12 | use Wikimedia\Parsoid\NodeData\DataParsoid; |
| 13 | use Wikimedia\Parsoid\Utils\DOMDataUtils; |
| 14 | use Wikimedia\Parsoid\Utils\DOMUtils; |
| 15 | use Wikimedia\Parsoid\Utils\PHPUtils; |
| 16 | use Wikimedia\Parsoid\Utils\WTUtils; |
| 17 | use Wikimedia\Parsoid\Wt2Html\Wt2HtmlDOMProcessor; |
| 18 | |
| 19 | class MigrateTrailingNLs implements Wt2HtmlDOMProcessor { |
| 20 | private static ?array $nodesToMigrateFrom = null; |
| 21 | |
| 22 | private function nodeEndsLineInWT( Node $node, DataParsoid $dp ): bool { |
| 23 | // These nodes either end a line in wikitext (tr, li, dd, ol, ul, dl, caption, |
| 24 | // p) or have implicit closing tags that can leak newlines to those that end a |
| 25 | // line (th, td) |
| 26 | // |
| 27 | // SSS FIXME: Given condition 2, we may not need to check th/td anymore |
| 28 | // (if we can rely on auto inserted start/end tags being present always). |
| 29 | self::$nodesToMigrateFrom ??= PHPUtils::makeSet( [ |
| 30 | 'pre', 'th', 'td', 'tr', 'li', 'dd', 'ol', 'ul', 'dl', 'caption', 'p' |
| 31 | ] ); |
| 32 | return isset( self::$nodesToMigrateFrom[DOMUtils::nodeName( $node )] ) && |
| 33 | !WTUtils::hasLiteralHTMLMarker( $dp ); |
| 34 | } |
| 35 | |
| 36 | private function getTableParent( Node $node ): ?Node { |
| 37 | $nodeName = DOMUtils::nodeName( $node ); |
| 38 | if ( in_array( $nodeName, [ 'td', 'th' ], true ) ) { |
| 39 | $node = $node->parentNode; |
| 40 | $nodeName = DOMUtils::nodeName( $node ); |
| 41 | } |
| 42 | if ( $nodeName === 'tr' ) { |
| 43 | $node = $node->parentNode; |
| 44 | $nodeName = DOMUtils::nodeName( $node ); |
| 45 | } |
| 46 | if ( in_array( $nodeName, [ 'tbody', 'thead', 'tfoot', 'caption' ], true ) ) { |
| 47 | $node = $node->parentNode; |
| 48 | $nodeName = DOMUtils::nodeName( $node ); |
| 49 | } |
| 50 | return ( $nodeName === 'table' ) ? $node : null; |
| 51 | } |
| 52 | |
| 53 | /** |
| 54 | * We can migrate a newline out of a node if one of the following is true: |
| 55 | * (1) The node ends a line in wikitext (=> not a literal html tag) |
| 56 | * (2) The node has an auto-closed end-tag (wikitext-generated or literal html tag) |
| 57 | * and hasn't been fostered out of a table. |
| 58 | * (3) It is the rightmost node in the DOM subtree rooted at a node |
| 59 | * that ends a line in wikitext |
| 60 | * @param Element|DocumentFragment $node |
| 61 | * @return bool |
| 62 | */ |
| 63 | private function canMigrateNLOutOfNode( Node $node ): bool { |
| 64 | if ( DOMUtils::nodeName( $node ) === 'table' || DOMUtils::atTheTop( $node ) ) { |
| 65 | return false; |
| 66 | } |
| 67 | |
| 68 | // Don't allow migration out of a table if the table has had |
| 69 | // content fostered out of it. |
| 70 | $tableParent = $this->getTableParent( $node ); |
| 71 | if ( $tableParent && $tableParent->previousSibling instanceof Element ) { |
| 72 | $previousSibling = $tableParent->previousSibling; |
| 73 | '@phan-var Element $previousSibling'; // @var Element $previousSibling |
| 74 | if ( !empty( DOMDataUtils::getDataParsoid( $previousSibling )->fostered ) ) { |
| 75 | return false; |
| 76 | } |
| 77 | } |
| 78 | |
| 79 | $dp = DOMDataUtils::getDataParsoid( $node ); |
| 80 | return empty( $dp->fostered ) && |
| 81 | ( $this->nodeEndsLineInWT( $node, $dp ) || |
| 82 | !empty( $dp->autoInsertedEnd ) || |
| 83 | ( !$node->nextSibling && |
| 84 | // FIXME: bug compatibility, previously the end meta caused |
| 85 | // $node->nextSibling to be true for elements with end tags |
| 86 | empty( $dp->tmp->endTSR ) && |
| 87 | $node->parentNode && |
| 88 | $this->canMigrateNLOutOfNode( $node->parentNode ) ) ); |
| 89 | } |
| 90 | |
| 91 | /** |
| 92 | * A node has zero wt width if: |
| 93 | * - tsr->start == tsr->end |
| 94 | * - only has children with zero wt width |
| 95 | * @param Element $node |
| 96 | * @return bool |
| 97 | */ |
| 98 | private function hasZeroWidthWT( Element $node ): bool { |
| 99 | $tsr = DOMDataUtils::getDataParsoid( $node )->tsr ?? null; |
| 100 | if ( !$tsr || $tsr->start === null || $tsr->start !== $tsr->end ) { |
| 101 | return false; |
| 102 | } |
| 103 | |
| 104 | $c = $node->firstChild; |
| 105 | while ( $c instanceof Element && $this->hasZeroWidthWT( $c ) ) { |
| 106 | $c = $c->nextSibling; |
| 107 | } |
| 108 | |
| 109 | return $c === null; |
| 110 | } |
| 111 | |
| 112 | public function doMigrateTrailingNLs( Node $elt, Env $env ): void { |
| 113 | if ( |
| 114 | !( $elt instanceof Element ) && |
| 115 | !( $elt instanceof DocumentFragment ) |
| 116 | ) { |
| 117 | return; |
| 118 | } |
| 119 | |
| 120 | // 1. Process DOM rooted at 'elt' first |
| 121 | // |
| 122 | // Process children backward so that a table |
| 123 | // is processed before its fostered content. |
| 124 | // See subtle changes in newline migration with this wikitext: |
| 125 | // "<table>\n<tr> || ||\n<td> a\n</table>" |
| 126 | // when walking backward vs. forward. |
| 127 | // |
| 128 | // Separately, walking backward also lets us ignore |
| 129 | // newly added children after child (because of |
| 130 | // migrated newline nodes from child's DOM tree). |
| 131 | $child = $elt->lastChild; |
| 132 | while ( $child !== null ) { |
| 133 | $this->doMigrateTrailingNLs( $child, $env ); |
| 134 | $child = $child->previousSibling; |
| 135 | } |
| 136 | |
| 137 | // 2. Process 'elt' itself after -- skip literal-HTML nodes |
| 138 | if ( $this->canMigrateNLOutOfNode( $elt ) ) { |
| 139 | $firstEltToMigrate = null; |
| 140 | $migrationBarrier = null; |
| 141 | $partialContent = false; |
| 142 | $n = $elt->lastChild; |
| 143 | |
| 144 | // We can migrate trailing newlines across nodes that have zero-wikitext-width. |
| 145 | while ( $n instanceof Element && $this->hasZeroWidthWT( $n ) ) { |
| 146 | $migrationBarrier = $n; |
| 147 | $n = $n->previousSibling; |
| 148 | } |
| 149 | |
| 150 | $isTdTh = DOMUtils::nodeName( $elt ) === 'td' || DOMUtils::nodeName( $elt ) === 'th'; |
| 151 | |
| 152 | // Find nodes that need to be migrated out: |
| 153 | // - a sequence of comment and newline nodes that is preceded by |
| 154 | // a non-migratable node (text node with non-white-space content |
| 155 | // or an element node). |
| 156 | $foundNL = false; |
| 157 | $tsrCorrection = 0; |
| 158 | while ( $n instanceof Text || $n instanceof Comment ) { |
| 159 | if ( $n instanceof Comment ) { |
| 160 | if ( $isTdTh ) { |
| 161 | break; |
| 162 | } |
| 163 | $firstEltToMigrate = $n; |
| 164 | $tsrCorrection += WTUtils::decodedCommentLength( $n ); |
| 165 | } else { |
| 166 | if ( !$isTdTh && preg_match( '/^[ \t\r\n]*\n[ \t\r\n]*$/D', $n->nodeValue ) ) { |
| 167 | $foundNL = true; |
| 168 | $firstEltToMigrate = $n; |
| 169 | $partialContent = false; |
| 170 | // all whitespace is moved |
| 171 | $tsrCorrection += strlen( $n->nodeValue ); |
| 172 | } elseif ( str_ends_with( $n->nodeValue, "\n" ) ) { |
| 173 | $foundNL = true; |
| 174 | $firstEltToMigrate = $n; |
| 175 | $partialContent = true; |
| 176 | // only newlines moved |
| 177 | preg_match( '/\n+$/D', $n->nodeValue, $matches ); |
| 178 | $tsrCorrection += strlen( $matches[0] ?? '' ); |
| 179 | break; |
| 180 | } else { |
| 181 | break; |
| 182 | } |
| 183 | } |
| 184 | |
| 185 | $n = $n->previousSibling; |
| 186 | } |
| 187 | |
| 188 | if ( $firstEltToMigrate && $foundNL ) { |
| 189 | $eltParent = $elt->parentNode; |
| 190 | $insertPosition = $elt->nextSibling; |
| 191 | |
| 192 | $n = $firstEltToMigrate; |
| 193 | while ( $n !== $migrationBarrier ) { |
| 194 | $next = $n->nextSibling; |
| 195 | if ( $partialContent ) { |
| 196 | $nls = $n->nodeValue; |
| 197 | $n->nodeValue = preg_replace( '/\n+$/D', '', $n->nodeValue, 1 ); |
| 198 | $nls = substr( $nls, strlen( $n->nodeValue ) ); |
| 199 | $n = $n->ownerDocument->createTextNode( $nls ); |
| 200 | $partialContent = false; |
| 201 | } |
| 202 | $eltParent->insertBefore( $n, $insertPosition ); |
| 203 | $n = $next; |
| 204 | } |
| 205 | |
| 206 | // Adjust tsr of any nodes after migrationBarrier. |
| 207 | // Ex: zero-width nodes that have valid tsr on them |
| 208 | // By definition (zero-width), these are synthetic nodes added by Parsoid |
| 209 | // that aren't present in the original wikitext. |
| 210 | $n = $migrationBarrier; |
| 211 | while ( $n ) { |
| 212 | // TSR is guaranteed to exist and be valid |
| 213 | // (checked by hasZeroWidthWT above) |
| 214 | '@phan-var Element $n'; // @var Element $n |
| 215 | $dp = DOMDataUtils::getDataParsoid( $n ); |
| 216 | $dp->tsr = $dp->tsr->offset( -$tsrCorrection ); |
| 217 | $n = $n->nextSibling; |
| 218 | } |
| 219 | } |
| 220 | } |
| 221 | } |
| 222 | |
| 223 | /** |
| 224 | * @inheritDoc |
| 225 | */ |
| 226 | public function run( |
| 227 | Env $env, Node $root, array $options = [], bool $atTopLevel = false |
| 228 | ): void { |
| 229 | $this->doMigrateTrailingNLs( $root, $env ); |
| 230 | } |
| 231 | } |