Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
15.86% |
23 / 145 |
|
0.00% |
0 / 7 |
CRAP | |
0.00% |
0 / 1 |
MetaHandler | |
15.86% |
23 / 145 |
|
0.00% |
0 / 7 |
3701.80 | |
0.00% |
0 / 1 |
__construct | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
handle | |
32.39% |
23 / 71 |
|
0.00% |
0 / 1 |
308.09 | |||
needToWriteStartMeta | |
0.00% |
0 / 16 |
|
0.00% |
0 / 1 |
110 | |||
needToWriteEndMeta | |
0.00% |
0 / 14 |
|
0.00% |
0 / 1 |
90 | |||
needNewLineSepBeforeMeta | |
0.00% |
0 / 7 |
|
0.00% |
0 / 1 |
30 | |||
before | |
0.00% |
0 / 22 |
|
0.00% |
0 / 1 |
156 | |||
after | |
0.00% |
0 / 14 |
|
0.00% |
0 / 1 |
132 |
1 | <?php |
2 | declare( strict_types = 1 ); |
3 | |
4 | namespace Wikimedia\Parsoid\Html2Wt\DOMHandlers; |
5 | |
6 | use Wikimedia\Parsoid\DOM\Element; |
7 | use Wikimedia\Parsoid\DOM\Node; |
8 | use Wikimedia\Parsoid\DOM\Text; |
9 | use Wikimedia\Parsoid\Html2Wt\DiffUtils; |
10 | use Wikimedia\Parsoid\Html2Wt\SerializerState; |
11 | use Wikimedia\Parsoid\Html2Wt\WTSUtils; |
12 | use Wikimedia\Parsoid\Utils\DiffDOMUtils; |
13 | use Wikimedia\Parsoid\Utils\DOMCompat; |
14 | use Wikimedia\Parsoid\Utils\DOMDataUtils; |
15 | use Wikimedia\Parsoid\Utils\DOMUtils; |
16 | use Wikimedia\Parsoid\Utils\WTUtils; |
17 | |
18 | class MetaHandler extends DOMHandler { |
19 | |
20 | public function __construct() { |
21 | parent::__construct( false ); |
22 | } |
23 | |
24 | /** @inheritDoc */ |
25 | public function handle( |
26 | Element $node, SerializerState $state, bool $wrapperUnmodified = false |
27 | ): ?Node { |
28 | $property = DOMCompat::getAttribute( $node, 'property' ) ?? ''; |
29 | $dp = DOMDataUtils::getDataParsoid( $node ); |
30 | $dmw = DOMDataUtils::getDataMw( $node ); |
31 | |
32 | if ( isset( $dp->src ) && |
33 | DOMUtils::matchTypeOf( $node, '#^mw:Placeholder(/|$)#' ) |
34 | ) { |
35 | $this->emitPlaceholderSrc( $node, $state ); |
36 | return $node->nextSibling; |
37 | } |
38 | |
39 | // Check for property before type so that page properties with |
40 | // templated attrs roundtrip properly. |
41 | // Ex: {{DEFAULTSORT:{{1x|foo}} }} |
42 | if ( $property ) { |
43 | preg_match( '#^mw\:PageProp/(.*)$#D', $property, $switchType ); |
44 | if ( $switchType ) { |
45 | $out = $switchType[1]; |
46 | $cat = preg_match( '/^(?:category)?(.*)/', $out, $catMatch ); |
47 | if ( $cat && ( |
48 | // Need this b/c support while RESTBase has Parsoid HTML |
49 | // in storage with meta tags for these. |
50 | // Can be removed as part of T335843 |
51 | $catMatch[1] === 'defaultsort' || $catMatch[1] === 'displaytitle' |
52 | ) ) { |
53 | $contentInfo = $state->serializer->serializedAttrVal( $node, 'content' ); |
54 | if ( WTUtils::hasExpandedAttrsType( $node ) ) { |
55 | $out = '{{' . $contentInfo['value'] . '}}'; |
56 | } elseif ( isset( $dp->src ) ) { |
57 | $colon = strpos( $dp->src, ':', 2 ); |
58 | $out = preg_replace( '/^([^:}]+).*$/D', "$1", $dp->src, 1 ); |
59 | if ( ( $colon === false ) && ( $contentInfo['value'] === '' ) ) { |
60 | $out .= '}}'; |
61 | } else { |
62 | $out .= ':' . $contentInfo['value'] . '}}'; |
63 | } |
64 | } else { |
65 | $magicWord = mb_strtoupper( $catMatch[1] ); |
66 | $out = '{{' . $magicWord . ':' . $contentInfo['value'] . '}}'; |
67 | } |
68 | } else { |
69 | $out = $state->getEnv()->getSiteConfig()->getMagicWordWT( |
70 | $switchType[1], $dp->magicSrc ?? '' ); |
71 | } |
72 | $state->emitChunk( $out, $node ); |
73 | } else { |
74 | ( new FallbackHTMLHandler )->handle( $node, $state ); |
75 | } |
76 | } elseif ( WTUtils::isAnnotationStartMarkerMeta( $node ) ) { |
77 | $annType = WTUtils::extractAnnotationType( $node ); |
78 | if ( $this->needToWriteStartMeta( $state, $node ) ) { |
79 | $datamw = DOMDataUtils::getDataMw( $node ); |
80 | $attrs = ""; |
81 | if ( isset( $datamw->attrs ) ) { |
82 | foreach ( get_object_vars( $datamw->attrs ) as $k => $v ) { |
83 | if ( $v === "" ) { |
84 | $attrs .= ' ' . $k; |
85 | } else { |
86 | $attrs .= ' ' . $k . '="' . $v . '"'; |
87 | } |
88 | } |
89 | } |
90 | // Follow-up on attributes sanitation to happen in T295168 |
91 | $state->emitChunk( '<' . $annType . $attrs . '>', $node ); |
92 | $state->openAnnotationRange( $annType, $datamw->extendedRange ?? false ); |
93 | } |
94 | } elseif ( WTUtils::isAnnotationEndMarkerMeta( $node ) ) { |
95 | if ( $this->needToWriteEndMeta( $state, $node ) ) { |
96 | $annType = WTUtils::extractAnnotationType( $node ); |
97 | $state->emitChunk( '</' . $annType . '>', $node ); |
98 | $state->closeAnnotationRange( $annType ); |
99 | } |
100 | } else { |
101 | switch ( DOMCompat::getAttribute( $node, 'typeof' ) ) { |
102 | case 'mw:Includes/IncludeOnly': |
103 | // Remove the dp.src when older revisions of HTML expire in RESTBase |
104 | $state->emitChunk( $dmw->src ?? $dp->src ?? '', $node ); |
105 | break; |
106 | case 'mw:Includes/IncludeOnly/End': |
107 | // Just ignore. |
108 | break; |
109 | case 'mw:Includes/NoInclude': |
110 | $state->emitChunk( $dp->src ?? '<noinclude>', $node ); |
111 | break; |
112 | case 'mw:Includes/NoInclude/End': |
113 | $state->emitChunk( $dp->src ?? '</noinclude>', $node ); |
114 | break; |
115 | case 'mw:Includes/OnlyInclude': |
116 | $state->emitChunk( $dp->src ?? '<onlyinclude>', $node ); |
117 | break; |
118 | case 'mw:Includes/OnlyInclude/End': |
119 | $state->emitChunk( $dp->src ?? '</onlyinclude>', $node ); |
120 | break; |
121 | case 'mw:DiffMarker/inserted': |
122 | case 'mw:DiffMarker/deleted': |
123 | case 'mw:DiffMarker/moved': |
124 | case 'mw:Separator': |
125 | // just ignore it |
126 | break; |
127 | default: |
128 | ( new FallbackHTMLHandler() )->handle( $node, $state ); |
129 | } |
130 | } |
131 | return $node->nextSibling; |
132 | } |
133 | |
134 | /** |
135 | * Decides if we need to write an annotation start meta at the place we encounter it |
136 | * @param SerializerState $state |
137 | * @param Element $node |
138 | * @return bool |
139 | */ |
140 | private function needToWriteStartMeta( SerializerState $state, Element $node ): bool { |
141 | if ( !$state->selserMode ) { |
142 | return true; |
143 | } |
144 | if ( WTUtils::isMovedMetaTag( $node ) ) { |
145 | $nextContentSibling = DOMCompat::getNextElementSibling( $node ); |
146 | // If the meta tag has been moved, it comes from its next element.... "almost". |
147 | // First exception is if we have several marker annotations in a row - then we need |
148 | // to pass them all. Second exception is if we have fostered content: then we're |
149 | // interested in what happens in the table, which happens _after_ the fostered content. |
150 | while ( $nextContentSibling !== null && |
151 | ( WTUtils::isMarkerAnnotation( $nextContentSibling ) || |
152 | !empty( DOMDataUtils::getDataParsoid( $nextContentSibling )->fostered ) |
153 | ) |
154 | ) { |
155 | $nextContentSibling = DOMCompat::getNextElementSibling( $nextContentSibling ); |
156 | } |
157 | |
158 | if ( $nextContentSibling !== null ) { |
159 | // When the content from which the meta tag comes gets |
160 | // deleted or modified, we emit _now_ so that we don't risk losing it. The range |
161 | // stays extended in the round-tripped version of the wikitext. |
162 | $nextdiffdata = DOMDataUtils::getDataParsoidDiff( $nextContentSibling ); |
163 | if ( |
164 | DiffUtils::isDiffMarker( $nextContentSibling ) || |
165 | ( $nextdiffdata && !$nextdiffdata->isEmpty() ) |
166 | ) { |
167 | return true; |
168 | } |
169 | |
170 | return !WTSUtils::origSrcValidInEditedContext( $state, $nextContentSibling ); |
171 | } |
172 | } |
173 | return true; |
174 | } |
175 | |
176 | /** |
177 | * Decides if we need to write an annotation end meta at the place we encounter it |
178 | * @param SerializerState $state |
179 | * @param Element $node |
180 | * @return bool |
181 | */ |
182 | private function needToWriteEndMeta( SerializerState $state, Element $node ): bool { |
183 | if ( !$state->selserMode ) { |
184 | return true; |
185 | } |
186 | if ( WTUtils::isMovedMetaTag( $node ) ) { |
187 | $prevElementSibling = DOMCompat::getPreviousElementSibling( $node ); |
188 | while ( $prevElementSibling !== null && |
189 | WTUtils::isMarkerAnnotation( $prevElementSibling ) |
190 | ) { |
191 | $prevElementSibling = DOMCompat::getPreviousElementSibling( $prevElementSibling ); |
192 | } |
193 | if ( $prevElementSibling ) { |
194 | $prevdiffdata = DOMDataUtils::getDataParsoidDiff( $prevElementSibling ); |
195 | |
196 | if ( |
197 | DiffUtils::isDiffMarker( $prevElementSibling ) || |
198 | ( $prevdiffdata && !$prevdiffdata->isEmpty() ) |
199 | ) { |
200 | return true; |
201 | } |
202 | return !WTSUtils::origSrcValidInEditedContext( $state, $prevElementSibling ); |
203 | } |
204 | } |
205 | return true; |
206 | } |
207 | |
208 | /** |
209 | * We create a newline (or two) if: |
210 | * * the previous element is a block element |
211 | * * the previous element is text, AND we're not in an inline-text situation: this |
212 | * corresponds to text having been added in VE without creating a paragraph, which happens |
213 | * when inserting a new line before the <meta> tag in VE. The "we're not in an inline text" |
214 | * is a heuristic and doesn't work for the ends of line for instance, but it shouldn't add |
215 | * semantic whitespace either. |
216 | * @param Node $meta |
217 | * @param Node $otherNode |
218 | * @return bool |
219 | */ |
220 | private function needNewLineSepBeforeMeta( Node $meta, Node $otherNode ) { |
221 | return ( $otherNode !== $meta->parentNode |
222 | && ( |
223 | ( $otherNode instanceof Element && DOMUtils::isWikitextBlockNode( $otherNode ) ) || |
224 | ( $otherNode instanceof Text && |
225 | DOMUtils::isWikitextBlockNode( DiffDOMUtils::nextNonSepSibling( $meta ) ) |
226 | ) |
227 | ) ); |
228 | } |
229 | |
230 | /** @inheritDoc */ |
231 | public function before( Element $node, Node $otherNode, SerializerState $state ): array { |
232 | if ( WTUtils::isAnnotationStartMarkerMeta( $node ) ) { |
233 | if ( $this->needNewLineSepBeforeMeta( $node, $otherNode ) ) { |
234 | return [ 'min' => 2 ]; |
235 | } else { |
236 | return []; |
237 | } |
238 | } |
239 | if ( WTUtils::isAnnotationEndMarkerMeta( $node ) ) { |
240 | if ( $this->needNewLineSepBeforeMeta( $node, $otherNode ) ) { |
241 | return [ |
242 | 'min' => 1 |
243 | ]; |
244 | } else { |
245 | return []; |
246 | } |
247 | } |
248 | |
249 | $type = DOMCompat::getAttribute( $node, 'typeof' ) ?? |
250 | DOMCompat::getAttribute( $node, 'property' ); |
251 | if ( $type && str_contains( $type, 'mw:PageProp/categorydefaultsort' ) ) { |
252 | if ( $otherNode instanceof Element |
253 | && DOMCompat::nodeName( $otherNode ) === 'p' |
254 | && ( DOMDataUtils::getDataParsoid( $otherNode )->stx ?? null ) !== 'html' |
255 | ) { |
256 | // Since defaultsort is outside the p-tag, we need 2 newlines |
257 | // to ensure that it go back into the p-tag when parsed. |
258 | return [ 'min' => 2 ]; |
259 | } else { |
260 | return [ 'min' => 1 ]; |
261 | } |
262 | } elseif ( WTUtils::isNewElt( $node ) && |
263 | // Placeholder and annotation metas or <*include*> tags don't need to be serialized on |
264 | // their own line |
265 | !DOMUtils::matchTypeOf( $node, '#^mw:(Placeholder|Includes|Annotation)(/|$)#' ) |
266 | ) { |
267 | return [ 'min' => 1 ]; |
268 | } else { |
269 | return []; |
270 | } |
271 | } |
272 | |
273 | /** @inheritDoc */ |
274 | public function after( Element $node, Node $otherNode, SerializerState $state ): array { |
275 | if ( WTUtils::isAnnotationEndMarkerMeta( $node ) ) { |
276 | if ( $otherNode !== $node->parentNode && $otherNode instanceof Element && |
277 | DOMUtils::isWikitextBlockNode( $otherNode ) ) { |
278 | return [ 'min' => 2 ]; |
279 | } else { |
280 | return []; |
281 | } |
282 | } |
283 | if ( WTUtils::isAnnotationStartMarkerMeta( $node ) ) { |
284 | if ( $otherNode !== $node->parentNode && $otherNode instanceof Element && |
285 | DOMUtils::isWikitextBlockNode( $otherNode ) ) { |
286 | return [ 'min' => 1 ]; |
287 | } else { |
288 | return []; |
289 | } |
290 | } |
291 | |
292 | // No diffs |
293 | if ( WTUtils::isNewElt( $node ) && |
294 | // Placeholder and annotation metas or <*include*> tags don't need to be serialized on |
295 | // their own line |
296 | !DOMUtils::matchTypeOf( $node, '#^mw:(Placeholder|Includes|Annotation)(/|$)#' ) |
297 | ) { |
298 | return [ 'min' => 1 ]; |
299 | } else { |
300 | return []; |
301 | } |
302 | } |
303 | } |