Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
0.00% |
0 / 93 |
|
0.00% |
0 / 7 |
CRAP | |
0.00% |
0 / 1 |
EncapsulatedContentHandler | |
0.00% |
0 / 93 |
|
0.00% |
0 / 7 |
1892 | |
0.00% |
0 / 1 |
__construct | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
handle | |
0.00% |
0 / 36 |
|
0.00% |
0 / 1 |
90 | |||
before | |
0.00% |
0 / 15 |
|
0.00% |
0 / 1 |
110 | |||
after | |
0.00% |
0 / 7 |
|
0.00% |
0 / 1 |
42 | |||
handleListPrefix | |
0.00% |
0 / 10 |
|
0.00% |
0 / 1 |
56 | |||
isTplListWithoutSharedPrefix | |
0.00% |
0 / 10 |
|
0.00% |
0 / 1 |
42 | |||
parentBulletsHaveBeenEmitted | |
0.00% |
0 / 14 |
|
0.00% |
0 / 1 |
20 |
1 | <?php |
2 | declare( strict_types = 1 ); |
3 | |
4 | namespace Wikimedia\Parsoid\Html2Wt\DOMHandlers; |
5 | |
6 | use LogicException; |
7 | use Wikimedia\Assert\Assert; |
8 | use Wikimedia\Parsoid\Core\ClientError; |
9 | use Wikimedia\Parsoid\DOM\Element; |
10 | use Wikimedia\Parsoid\DOM\Node; |
11 | use Wikimedia\Parsoid\Html2Wt\SerializerState; |
12 | use Wikimedia\Parsoid\Utils\DiffDOMUtils; |
13 | use Wikimedia\Parsoid\Utils\DOMCompat; |
14 | use Wikimedia\Parsoid\Utils\DOMDataUtils; |
15 | use Wikimedia\Parsoid\Utils\DOMUtils; |
16 | use Wikimedia\Parsoid\Utils\PHPUtils; |
17 | use Wikimedia\Parsoid\Utils\WTUtils; |
18 | |
19 | class EncapsulatedContentHandler extends DOMHandler { |
20 | |
21 | /** @var array[] Maps list item HTML elements to the expected parent element */ |
22 | private $parentMap = [ |
23 | 'li' => [ 'ul', 'ol' ], |
24 | 'dt' => [ 'dl' ], |
25 | 'dd' => [ 'dl' ], |
26 | ]; |
27 | |
28 | public function __construct() { |
29 | parent::__construct( false ); |
30 | } |
31 | |
32 | /** |
33 | * @inheritDoc |
34 | * @throws ClientError |
35 | */ |
36 | public function handle( |
37 | Element $node, SerializerState $state, bool $wrapperUnmodified = false |
38 | ): ?Node { |
39 | $env = $state->getEnv(); |
40 | $serializer = $state->serializer; |
41 | $dp = DOMDataUtils::getDataParsoid( $node ); |
42 | $dataMw = DOMDataUtils::getDataMw( $node ); |
43 | $src = null; |
44 | $transclusionType = DOMUtils::matchTypeOf( $node, '/^mw:(Transclusion|Param)$/' ); |
45 | $extTagName = WTUtils::getExtTagName( $node ); |
46 | if ( $transclusionType ) { |
47 | if ( is_array( $dataMw->parts ?? null ) ) { |
48 | $src = $serializer->serializeFromParts( $state, $node, $dataMw->parts ); |
49 | } elseif ( isset( $dp->src ) ) { |
50 | $env->log( 'error', 'data-mw.parts is not an array: ', DOMCompat::getOuterHTML( $node ), |
51 | PHPUtils::jsonEncode( $dataMw ) ); |
52 | $src = $dp->src; |
53 | } else { |
54 | throw new ClientError( |
55 | "Cannot serialize $transclusionType without data-mw.parts or data-parsoid.src" |
56 | ); |
57 | } |
58 | } elseif ( $extTagName ) { |
59 | // Set name since downstream code assumes it |
60 | if ( ( $dataMw->name ?? '' ) === '' ) { |
61 | $dataMw->name = $extTagName; |
62 | } |
63 | $src = false; |
64 | $ext = $env->getSiteConfig()->getExtTagImpl( $extTagName ); |
65 | if ( $ext ) { |
66 | $src = $ext->domToWikitext( $state->extApi, $node, $wrapperUnmodified ); |
67 | } |
68 | if ( $src === false ) { |
69 | $src = $serializer->defaultExtensionHandler( $node, $state ); |
70 | } |
71 | } elseif ( DOMUtils::hasTypeOf( $node, 'mw:LanguageVariant' ) ) { |
72 | $state->serializer->languageVariantHandler( $node ); |
73 | return $node->nextSibling; |
74 | } else { |
75 | throw new LogicException( 'Should never reach here' ); |
76 | } |
77 | $state->singleLineContext->disable(); |
78 | // FIXME: https://phabricator.wikimedia.org/T184779 |
79 | $src = ( $dataMw->extPrefix ?? '' ) . $src |
80 | . ( $dataMw->extSuffix ?? '' ); |
81 | $serializer->emitWikitext( $this->handleListPrefix( $node, $state ) . $src, $node ); |
82 | $state->singleLineContext->pop(); |
83 | return WTUtils::skipOverEncapsulatedContent( $node ); |
84 | } |
85 | |
86 | // XXX: This is questionable, as the template can expand |
87 | // to newlines too. Which default should we pick for new |
88 | // content? We don't really want to make separator |
89 | // newlines in HTML significant for the semantics of the |
90 | // template content. |
91 | |
92 | /** @inheritDoc */ |
93 | public function before( Element $node, Node $otherNode, SerializerState $state ): array { |
94 | // Handle native extension constraints. Only apply to plain extension tags. |
95 | $extTagName = WTUtils::getExtTagName( $node ); |
96 | if ( $extTagName && !DOMUtils::hasTypeOf( $node, 'mw:Transclusion' ) ) { |
97 | $extConfig = $state->getEnv()->getSiteConfig()->getExtTagConfig( $extTagName ); |
98 | if ( |
99 | ( $extConfig['options']['html2wt']['format'] ?? '' ) === 'block' && |
100 | WTUtils::isNewElt( $node ) |
101 | ) { |
102 | return [ 'min' => 1, 'max' => 2 ]; |
103 | } |
104 | } |
105 | |
106 | // If this content came from a multi-part-template-block |
107 | // use the first node in that block for determining |
108 | // newline constraints. |
109 | $dp = DOMDataUtils::getDataParsoid( $node ); |
110 | if ( isset( $dp->firstWikitextNode ) ) { |
111 | // Note: this should match the case returned by DOMCompat::nodeName |
112 | // so that this is effectively a case-insensitive comparison here. |
113 | // (ie, data-parsoid could have either uppercase tag names or |
114 | // lowercase tag names and this code should still work.) |
115 | $ftn = mb_strtolower( $dp->firstWikitextNode, "UTF-8" ); |
116 | $h = ( new DOMHandlerFactory )->newFromTagHandler( $ftn ); |
117 | if ( !$h && ( $dp->stx ?? null ) === 'html' && $ftn !== 'a_html' ) { |
118 | $h = new FallbackHTMLHandler(); |
119 | } |
120 | if ( $h ) { |
121 | return $h->before( $node, $otherNode, $state ); |
122 | } |
123 | } |
124 | |
125 | // default behavior |
126 | return []; |
127 | } |
128 | |
129 | /** @inheritDoc */ |
130 | public function after( Element $node, Node $otherNode, SerializerState $state ): array { |
131 | // Handle native extension constraints. Only apply to plain extension tags. |
132 | $extTagName = WTUtils::getExtTagName( $node ); |
133 | if ( $extTagName && !DOMUtils::hasTypeOf( $node, 'mw:Transclusion' ) ) { |
134 | $extConfig = $state->getEnv()->getSiteConfig()->getExtTagConfig( $extTagName ); |
135 | if ( |
136 | ( $extConfig['options']['html2wt']['format'] ?? '' ) === 'block' && |
137 | WTUtils::isNewElt( $node ) && !DOMUtils::atTheTop( $otherNode ) |
138 | ) { |
139 | return [ 'min' => 1, 'max' => 2 ]; |
140 | } |
141 | } |
142 | |
143 | // default behavior |
144 | return []; |
145 | } |
146 | |
147 | private function handleListPrefix( Element $node, SerializerState $state ): string { |
148 | $bullets = ''; |
149 | if ( DOMUtils::isListOrListItem( $node ) |
150 | && !$this->parentBulletsHaveBeenEmitted( $node ) |
151 | && !DiffDOMUtils::previousNonSepSibling( $node ) // Maybe consider parentNode. |
152 | && $this->isTplListWithoutSharedPrefix( $node ) |
153 | // Nothing to do for definition list rows, |
154 | // since we're emitting for the parent node. |
155 | && !( DOMCompat::nodeName( $node ) === 'dd' |
156 | && ( DOMDataUtils::getDataParsoid( $node )->stx ?? null ) === 'row' ) |
157 | ) { |
158 | // phan fails to infer that the parent of a Element is always a Element |
159 | $parentNode = $node->parentNode; |
160 | '@phan-var Element $parentNode'; |
161 | $bullets = $this->getListBullets( $state, $parentNode ); |
162 | } |
163 | return $bullets; |
164 | } |
165 | |
166 | /** |
167 | * Normally we wait until hitting the deepest nested list element before |
168 | * emitting bullets. However, if one of those list elements is about-id |
169 | * marked, the tag handler will serialize content from data-mw parts or src. |
170 | * This is a problem when a list wasn't assigned the shared prefix of bullets. |
171 | * For example, |
172 | * |
173 | * ** a |
174 | * ** b |
175 | * |
176 | * Will assign bullets as, |
177 | * |
178 | * <ul><li-*> |
179 | * <ul> |
180 | * <li-*> a</li> <!-- no shared prefix --> |
181 | * <li-**> b</li> <!-- gets both bullets --> |
182 | * </ul> |
183 | * </li></ul> |
184 | * |
185 | * For the b-li, getListsBullets will walk up and emit the two bullets it was |
186 | * assigned. If it was about-id marked, the parts would contain the two bullet |
187 | * start tag it was assigned. However, for the a-li, only one bullet is |
188 | * associated. When it's about-id marked, serializing the data-mw parts or |
189 | * src would miss the bullet assigned to the container li. |
190 | * |
191 | * @param Element $node |
192 | * @return bool |
193 | */ |
194 | private function isTplListWithoutSharedPrefix( Element $node ): bool { |
195 | if ( !WTUtils::isEncapsulationWrapper( $node ) ) { |
196 | return false; |
197 | } |
198 | |
199 | if ( DOMUtils::hasTypeOf( $node, 'mw:Transclusion' ) ) { |
200 | // If the first part is a string, template ranges were expanded to |
201 | // include this list element. That may be trouble. Otherwise, |
202 | // containers aren't part of the template source and we should emit |
203 | // them. |
204 | $dataMw = DOMDataUtils::getDataMw( $node ); |
205 | if ( !isset( $dataMw->parts ) || !is_string( $dataMw->parts[0] ) ) { |
206 | return true; |
207 | } |
208 | // Less than two bullets indicates that a shared prefix was not |
209 | // assigned to this element. A safe indication that we should call |
210 | // getListsBullets on the containing list element. |
211 | return !preg_match( '/^[*#:;]{2,}$/D', $dataMw->parts[0] ); |
212 | } elseif ( DOMUtils::matchTypeOf( $node, '/^mw:(Extension|Param)/' ) ) { |
213 | // Containers won't ever be part of the src here, so emit them. |
214 | return true; |
215 | } else { |
216 | return false; |
217 | } |
218 | } |
219 | |
220 | private function parentBulletsHaveBeenEmitted( Element $node ): bool { |
221 | if ( WTUtils::isLiteralHTMLNode( $node ) ) { |
222 | return true; |
223 | } elseif ( DOMUtils::isList( $node ) ) { |
224 | return !DOMUtils::isListItem( $node->parentNode ); |
225 | } else { |
226 | Assert::invariant( DOMUtils::isListItem( $node ), |
227 | '$node must be a list, list item or literal html node' ); |
228 | $parentNode = $node->parentNode; |
229 | // Skip builder-inserted wrappers |
230 | while ( $this->isBuilderInsertedElt( $parentNode ) ) { |
231 | $parentNode = $parentNode->parentNode; |
232 | } |
233 | return !in_array( |
234 | DOMCompat::nodeName( $parentNode ), |
235 | $this->parentMap[DOMCompat::nodeName( $node )], |
236 | true |
237 | ); |
238 | } |
239 | } |
240 | } |