Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
0.00% |
0 / 107 |
|
0.00% |
0 / 7 |
CRAP | |
0.00% |
0 / 1 |
EncapsulatedContentHandler | |
0.00% |
0 / 107 |
|
0.00% |
0 / 7 |
2450 | |
0.00% |
0 / 1 |
__construct | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
handle | |
0.00% |
0 / 44 |
|
0.00% |
0 / 1 |
182 | |||
before | |
0.00% |
0 / 18 |
|
0.00% |
0 / 1 |
132 | |||
after | |
0.00% |
0 / 10 |
|
0.00% |
0 / 1 |
56 | |||
handleListPrefix | |
0.00% |
0 / 10 |
|
0.00% |
0 / 1 |
56 | |||
isTplListWithoutSharedPrefix | |
0.00% |
0 / 10 |
|
0.00% |
0 / 1 |
42 | |||
parentBulletsHaveBeenEmitted | |
0.00% |
0 / 14 |
|
0.00% |
0 / 1 |
20 |
1 | <?php |
2 | declare( strict_types = 1 ); |
3 | |
4 | namespace Wikimedia\Parsoid\Html2Wt\DOMHandlers; |
5 | |
6 | use LogicException; |
7 | use Wikimedia\Assert\Assert; |
8 | use Wikimedia\Parsoid\Core\ClientError; |
9 | use Wikimedia\Parsoid\DOM\Element; |
10 | use Wikimedia\Parsoid\DOM\Node; |
11 | use Wikimedia\Parsoid\Html2Wt\SerializerState; |
12 | use Wikimedia\Parsoid\Utils\DOMCompat; |
13 | use Wikimedia\Parsoid\Utils\DOMDataUtils; |
14 | use Wikimedia\Parsoid\Utils\DOMUtils; |
15 | use Wikimedia\Parsoid\Utils\PHPUtils; |
16 | use Wikimedia\Parsoid\Utils\WTUtils; |
17 | |
18 | class EncapsulatedContentHandler extends DOMHandler { |
19 | |
20 | /** @var array[] Maps list item HTML elements to the expected parent element */ |
21 | private $parentMap = [ |
22 | 'li' => [ 'ul', 'ol' ], |
23 | 'dt' => [ 'dl' ], |
24 | 'dd' => [ 'dl' ], |
25 | ]; |
26 | |
27 | public function __construct() { |
28 | parent::__construct( false ); |
29 | } |
30 | |
31 | /** |
32 | * @inheritDoc |
33 | * @throws ClientError |
34 | */ |
35 | public function handle( |
36 | Element $node, SerializerState $state, bool $wrapperUnmodified = false |
37 | ): ?Node { |
38 | $env = $state->getEnv(); |
39 | $serializer = $state->serializer; |
40 | $dp = DOMDataUtils::getDataParsoid( $node ); |
41 | $dataMw = DOMDataUtils::getDataMw( $node ); |
42 | $src = null; |
43 | $transclusionType = DOMUtils::matchTypeOf( $node, '/^mw:(Transclusion|Param)$/' ); |
44 | $extType = DOMUtils::matchTypeOf( $node, '!^mw:Extension/!' ); |
45 | if ( $transclusionType ) { |
46 | if ( is_array( $dataMw->parts ?? null ) ) { |
47 | $src = $serializer->serializeFromParts( $state, $node, $dataMw->parts ); |
48 | } elseif ( isset( $dp->src ) ) { |
49 | $env->log( 'error', 'data-mw.parts is not an array: ', DOMCompat::getOuterHTML( $node ), |
50 | PHPUtils::jsonEncode( $dataMw ) ); |
51 | $src = $dp->src; |
52 | } else { |
53 | throw new ClientError( |
54 | "Cannot serialize $transclusionType without data-mw.parts or data-parsoid.src" |
55 | ); |
56 | } |
57 | } elseif ( $extType ) { |
58 | if ( ( $dataMw->name ?? null ) == '' && !isset( $dp->src ) ) { |
59 | // If there was no typeOf name, and no dp.src, try getting |
60 | // the name out of the mw:Extension type. This will |
61 | // generate an empty extension tag, but it's better than |
62 | // just an error. |
63 | $extGivenName = substr( $extType, strlen( 'mw:Extension/' ) ); |
64 | if ( $extGivenName ) { |
65 | $env->log( 'error', 'no data-mw name for extension in: ', DOMCompat::getOuterHTML( $node ) ); |
66 | $dataMw->name = $extGivenName; |
67 | } |
68 | } |
69 | if ( ( $dataMw->name ?? null ) != '' ) { |
70 | $ext = $env->getSiteConfig()->getExtTagImpl( $dataMw->name ); |
71 | if ( $ext ) { |
72 | $src = $ext->domToWikitext( $state->extApi, $node, $wrapperUnmodified ); |
73 | if ( $src === false ) { |
74 | $src = $serializer->defaultExtensionHandler( $node, $state ); |
75 | } |
76 | } else { |
77 | $src = $serializer->defaultExtensionHandler( $node, $state ); |
78 | } |
79 | } elseif ( isset( $dp->src ) ) { |
80 | $env->log( 'error', 'data-mw missing in: ' . DOMCompat::getOuterHTML( $node ) ); |
81 | $src = $dp->src; |
82 | } else { |
83 | throw new ClientError( 'Cannot serialize extension without data-mw.name or data-parsoid.src.' ); |
84 | } |
85 | } elseif ( DOMUtils::hasTypeOf( $node, 'mw:LanguageVariant' ) ) { |
86 | $state->serializer->languageVariantHandler( $node ); |
87 | return $node->nextSibling; |
88 | } else { |
89 | throw new LogicException( 'Should never reach here' ); |
90 | } |
91 | $state->singleLineContext->disable(); |
92 | // FIXME: https://phabricator.wikimedia.org/T184779 |
93 | $src = ( $dataMw->extPrefix ?? '' ) . $src |
94 | . ( $dataMw->extSuffix ?? '' ); |
95 | $serializer->emitWikitext( $this->handleListPrefix( $node, $state ) . $src, $node ); |
96 | $state->singleLineContext->pop(); |
97 | return WTUtils::skipOverEncapsulatedContent( $node ); |
98 | } |
99 | |
100 | // XXX: This is questionable, as the template can expand |
101 | // to newlines too. Which default should we pick for new |
102 | // content? We don't really want to make separator |
103 | // newlines in HTML significant for the semantics of the |
104 | // template content. |
105 | |
106 | /** @inheritDoc */ |
107 | public function before( Element $node, Node $otherNode, SerializerState $state ): array { |
108 | $env = $state->getEnv(); |
109 | $dataMw = DOMDataUtils::getDataMw( $node ); |
110 | $dp = DOMDataUtils::getDataParsoid( $node ); |
111 | |
112 | // Handle native extension constraints. |
113 | if ( DOMUtils::matchTypeOf( $node, '!^mw:Extension/!' ) |
114 | // Only apply to plain extension tags. |
115 | && !DOMUtils::hasTypeOf( $node, 'mw:Transclusion' ) |
116 | ) { |
117 | if ( isset( $dataMw->name ) ) { |
118 | $extConfig = $env->getSiteConfig()->getExtTagConfig( $dataMw->name ); |
119 | if ( ( $extConfig['options']['html2wt']['format'] ?? '' ) === 'block' && |
120 | WTUtils::isNewElt( $node ) |
121 | ) { |
122 | return [ 'min' => 1, 'max' => 2 ]; |
123 | } |
124 | } |
125 | } |
126 | |
127 | // If this content came from a multi-part-template-block |
128 | // use the first node in that block for determining |
129 | // newline constraints. |
130 | if ( isset( $dp->firstWikitextNode ) ) { |
131 | // Note: this should match the case returned by DOMCompat::nodeName |
132 | // so that this is effectively a case-insensitive comparison here. |
133 | // (ie, data-parsoid could have either uppercase tag names or |
134 | // lowercase tag names and this code should still work.) |
135 | $ftn = mb_strtolower( $dp->firstWikitextNode, "UTF-8" ); |
136 | $h = ( new DOMHandlerFactory )->newFromTagHandler( $ftn ); |
137 | if ( !$h && ( $dp->stx ?? null ) === 'html' && $ftn !== 'a_html' ) { |
138 | $h = new FallbackHTMLHandler(); |
139 | } |
140 | if ( $h ) { |
141 | return $h->before( $node, $otherNode, $state ); |
142 | } |
143 | } |
144 | |
145 | // default behavior |
146 | return []; |
147 | } |
148 | |
149 | /** @inheritDoc */ |
150 | public function after( Element $node, Node $otherNode, SerializerState $state ): array { |
151 | $env = $state->getEnv(); |
152 | $dataMw = DOMDataUtils::getDataMw( $node ); |
153 | |
154 | // Handle native extension constraints. |
155 | if ( DOMUtils::matchTypeOf( $node, '!^mw:Extension/!' ) |
156 | // Only apply to plain extension tags. |
157 | && !DOMUtils::hasTypeOf( $node, 'mw:Transclusion' ) |
158 | ) { |
159 | if ( isset( $dataMw->name ) ) { |
160 | $extConfig = $env->getSiteConfig()->getExtTagConfig( $dataMw->name ); |
161 | if ( ( $extConfig['options']['html2wt']['format'] ?? '' ) === 'block' && |
162 | WTUtils::isNewElt( $node ) && !DOMUtils::atTheTop( $otherNode ) |
163 | ) { |
164 | return [ 'min' => 1, 'max' => 2 ]; |
165 | } |
166 | } |
167 | } |
168 | |
169 | // default behavior |
170 | return []; |
171 | } |
172 | |
173 | /** |
174 | * @param Element $node |
175 | * @param SerializerState $state |
176 | * @return string |
177 | */ |
178 | private function handleListPrefix( Element $node, SerializerState $state ): string { |
179 | $bullets = ''; |
180 | if ( DOMUtils::isListOrListItem( $node ) |
181 | && !$this->parentBulletsHaveBeenEmitted( $node ) |
182 | && !DOMUtils::previousNonSepSibling( $node ) // Maybe consider parentNode. |
183 | && $this->isTplListWithoutSharedPrefix( $node ) |
184 | // Nothing to do for definition list rows, |
185 | // since we're emitting for the parent node. |
186 | && !( DOMCompat::nodeName( $node ) === 'dd' |
187 | && ( DOMDataUtils::getDataParsoid( $node )->stx ?? null ) === 'row' ) |
188 | ) { |
189 | // phan fails to infer that the parent of a Element is always a Element |
190 | $parentNode = $node->parentNode; |
191 | '@phan-var Element $parentNode'; |
192 | $bullets = $this->getListBullets( $state, $parentNode ); |
193 | } |
194 | return $bullets; |
195 | } |
196 | |
197 | /** |
198 | * Normally we wait until hitting the deepest nested list element before |
199 | * emitting bullets. However, if one of those list elements is about-id |
200 | * marked, the tag handler will serialize content from data-mw parts or src. |
201 | * This is a problem when a list wasn't assigned the shared prefix of bullets. |
202 | * For example, |
203 | * |
204 | * ** a |
205 | * ** b |
206 | * |
207 | * Will assign bullets as, |
208 | * |
209 | * <ul><li-*> |
210 | * <ul> |
211 | * <li-*> a</li> <!-- no shared prefix --> |
212 | * <li-**> b</li> <!-- gets both bullets --> |
213 | * </ul> |
214 | * </li></ul> |
215 | * |
216 | * For the b-li, getListsBullets will walk up and emit the two bullets it was |
217 | * assigned. If it was about-id marked, the parts would contain the two bullet |
218 | * start tag it was assigned. However, for the a-li, only one bullet is |
219 | * associated. When it's about-id marked, serializing the data-mw parts or |
220 | * src would miss the bullet assigned to the container li. |
221 | * |
222 | * @param Element $node |
223 | * @return bool |
224 | */ |
225 | private function isTplListWithoutSharedPrefix( Element $node ): bool { |
226 | if ( !WTUtils::isEncapsulationWrapper( $node ) ) { |
227 | return false; |
228 | } |
229 | |
230 | if ( DOMUtils::hasTypeOf( $node, 'mw:Transclusion' ) ) { |
231 | // If the first part is a string, template ranges were expanded to |
232 | // include this list element. That may be trouble. Otherwise, |
233 | // containers aren't part of the template source and we should emit |
234 | // them. |
235 | $dataMw = DOMDataUtils::getDataMw( $node ); |
236 | if ( !isset( $dataMw->parts ) || !is_string( $dataMw->parts[0] ) ) { |
237 | return true; |
238 | } |
239 | // Less than two bullets indicates that a shared prefix was not |
240 | // assigned to this element. A safe indication that we should call |
241 | // getListsBullets on the containing list element. |
242 | return !preg_match( '/^[*#:;]{2,}$/D', $dataMw->parts[0] ); |
243 | } elseif ( DOMUtils::matchTypeOf( $node, '/^mw:(Extension|Param)/' ) ) { |
244 | // Containers won't ever be part of the src here, so emit them. |
245 | return true; |
246 | } else { |
247 | return false; |
248 | } |
249 | } |
250 | |
251 | /** |
252 | * @param Element $node |
253 | * @return bool |
254 | */ |
255 | private function parentBulletsHaveBeenEmitted( Element $node ): bool { |
256 | if ( WTUtils::isLiteralHTMLNode( $node ) ) { |
257 | return true; |
258 | } elseif ( DOMUtils::isList( $node ) ) { |
259 | return !DOMUtils::isListItem( $node->parentNode ); |
260 | } else { |
261 | Assert::invariant( DOMUtils::isListItem( $node ), |
262 | '$node must be a list, list item or literal html node' ); |
263 | $parentNode = $node->parentNode; |
264 | // Skip builder-inserted wrappers |
265 | while ( $this->isBuilderInsertedElt( $parentNode ) ) { |
266 | $parentNode = $parentNode->parentNode; |
267 | } |
268 | return !in_array( |
269 | DOMCompat::nodeName( $parentNode ), |
270 | $this->parentMap[DOMCompat::nodeName( $node )], |
271 | true |
272 | ); |
273 | } |
274 | } |
275 | } |