Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
18.15% |
45 / 248 |
|
14.06% |
9 / 64 |
CRAP | |
0.00% |
0 / 1 |
WTUtils | |
18.15% |
45 / 248 |
|
14.06% |
9 / 64 |
15096.41 | |
0.00% |
0 / 1 |
hasLiteralHTMLMarker | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
6 | |||
isLiteralHTMLNode | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
6 | |||
isZeroWidthWikitextElt | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
6 | |||
isBlockNodeWithVisibleWT | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
6 | |||
isATagFromWikiLinkSyntax | |
0.00% |
0 / 5 |
|
0.00% |
0 / 1 |
30 | |||
isATagFromExtLinkSyntax | |
0.00% |
0 / 5 |
|
0.00% |
0 / 1 |
30 | |||
isATagFromURLLinkSyntax | |
0.00% |
0 / 5 |
|
0.00% |
0 / 1 |
20 | |||
isATagFromMagicLinkSyntax | |
0.00% |
0 / 5 |
|
0.00% |
0 / 1 |
20 | |||
matchTplType | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
hasExpandedAttrsType | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
isTplMarkerMeta | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
isTplStartMarkerMeta | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
6 | |||
isTplEndMarkerMeta | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
6 | |||
findFirstEncapsulationWrapperNode | |
0.00% |
0 / 10 |
|
0.00% |
0 / 1 |
20 | |||
isNewElt | |
0.00% |
0 / 4 |
|
0.00% |
0 / 1 |
6 | |||
isIndentPre | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
6 | |||
isInlineMedia | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
6 | |||
isGeneratedFigure | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
indentPreDSRCorrection | |
0.00% |
0 / 7 |
|
0.00% |
0 / 1 |
20 | |||
isEncapsulatedDOMForestRoot | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
12 | |||
isRedirectLink | |
0.00% |
0 / 3 |
|
0.00% |
0 / 1 |
12 | |||
isCategoryLink | |
0.00% |
0 / 3 |
|
0.00% |
0 / 1 |
12 | |||
isSolTransparentLink | |
0.00% |
0 / 3 |
|
0.00% |
0 / 1 |
12 | |||
emitsSolTransparentSingleLineWT | |
0.00% |
0 / 5 |
|
0.00% |
0 / 1 |
12 | |||
isFallbackIdSpan | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
isRenderingTransparentNode | |
0.00% |
0 / 7 |
|
0.00% |
0 / 1 |
56 | |||
inHTMLTableTag | |
0.00% |
0 / 8 |
|
0.00% |
0 / 1 |
20 | |||
isFirstEncapsulationWrapperNode | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
isFirstExtensionWrapperNode | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
isExtensionOutputtingCoreMwDomSpec | |
0.00% |
0 / 6 |
|
0.00% |
0 / 1 |
6 | |||
isEncapsulationWrapper | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
6 | |||
isDOMFragmentWrapper | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
isSealedFragmentOfType | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
isParsoidSectionTag | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
6 | |||
fromExtensionContent | |
100.00% |
6 / 6 |
|
100.00% |
1 / 1 |
5 | |||
fromEncapsulatedContent | |
0.00% |
0 / 5 |
|
0.00% |
0 / 1 |
20 | |||
getWTSource | |
0.00% |
0 / 4 |
|
0.00% |
0 / 1 |
6 | |||
getAboutSiblings | |
0.00% |
0 / 13 |
|
0.00% |
0 / 1 |
90 | |||
skipOverEncapsulatedContent | |
0.00% |
0 / 6 |
|
0.00% |
0 / 1 |
12 | |||
encodeComment | |
100.00% |
6 / 6 |
|
100.00% |
1 / 1 |
1 | |||
decodeComment | |
100.00% |
5 / 5 |
|
100.00% |
1 / 1 |
2 | |||
decodedCommentLength | |
58.33% |
7 / 12 |
|
0.00% |
0 / 1 |
8.60 | |||
getExtTagName | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
6 | |||
getPFragmentHandlerKey | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
6 | |||
getNativeExt | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
6 | |||
isIncludeTag | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
12 | |||
isAnnotationTag | |
0.00% |
0 / 9 |
|
0.00% |
0 / 1 |
12 | |||
isAnnOrExtTag | |
0.00% |
0 / 5 |
|
0.00% |
0 / 1 |
12 | |||
createEmptyLocalizationFragment | |
0.00% |
0 / 5 |
|
0.00% |
0 / 1 |
2 | |||
createPageContentI18nFragment | |
100.00% |
4 / 4 |
|
100.00% |
1 / 1 |
1 | |||
createInterfaceI18nFragment | |
100.00% |
4 / 4 |
|
100.00% |
1 / 1 |
1 | |||
createLangI18nFragment | |
100.00% |
4 / 4 |
|
100.00% |
1 / 1 |
1 | |||
addPageContentI18nAttribute | |
100.00% |
3 / 3 |
|
100.00% |
1 / 1 |
1 | |||
addInterfaceI18nAttribute | |
100.00% |
3 / 3 |
|
100.00% |
1 / 1 |
1 | |||
addLangI18nAttribute | |
100.00% |
3 / 3 |
|
100.00% |
1 / 1 |
1 | |||
matchAnnotationMeta | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
extractAnnotationType | |
0.00% |
0 / 5 |
|
0.00% |
0 / 1 |
12 | |||
isAnnotationStartMarkerMeta | |
0.00% |
0 / 5 |
|
0.00% |
0 / 1 |
20 | |||
isAnnotationEndMarkerMeta | |
0.00% |
0 / 5 |
|
0.00% |
0 / 1 |
20 | |||
isMovedMetaTag | |
0.00% |
0 / 5 |
|
0.00% |
0 / 1 |
20 | |||
isMarkerAnnotation | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
6 | |||
getMediaFormat | |
0.00% |
0 / 3 |
|
0.00% |
0 / 1 |
2 | |||
hasVisibleCaption | |
0.00% |
0 / 4 |
|
0.00% |
0 / 1 |
2 | |||
textContentFromCaption | |
0.00% |
0 / 11 |
|
0.00% |
0 / 1 |
42 |
1 | <?php |
2 | declare( strict_types = 1 ); |
3 | |
4 | namespace Wikimedia\Parsoid\Utils; |
5 | |
6 | use DOMException; |
7 | use Wikimedia\Assert\UnreachableException; |
8 | use Wikimedia\Bcp47Code\Bcp47Code; |
9 | use Wikimedia\Parsoid\Config\Env; |
10 | use Wikimedia\Parsoid\DOM\Comment; |
11 | use Wikimedia\Parsoid\DOM\Document; |
12 | use Wikimedia\Parsoid\DOM\DocumentFragment; |
13 | use Wikimedia\Parsoid\DOM\Element; |
14 | use Wikimedia\Parsoid\DOM\Node; |
15 | use Wikimedia\Parsoid\DOM\Text; |
16 | use Wikimedia\Parsoid\Ext\ExtensionTagHandler; |
17 | use Wikimedia\Parsoid\NodeData\DataParsoid; |
18 | use Wikimedia\Parsoid\NodeData\I18nInfo; |
19 | use Wikimedia\Parsoid\NodeData\TempData; |
20 | use Wikimedia\Parsoid\Tokens\CommentTk; |
21 | use Wikimedia\Parsoid\Wikitext\Consts; |
22 | use Wikimedia\Parsoid\Wt2Html\Frame; |
23 | |
24 | /** |
25 | * These utilites pertain to querying / extracting / modifying wikitext information from the DOM. |
26 | * |
27 | * @note Many of these methods are not safe to use unless the DOM has been |
28 | * loaded and prepared, as they consult DataParsoid from the NodeData. |
29 | */ |
30 | class WTUtils { |
31 | private const FIRST_ENCAP_REGEXP = |
32 | '#(?:^|\s)(mw:(?:Transclusion|Param|LanguageVariant|Extension(/\S+)))(?=$|\s)#D'; |
33 | |
34 | /** |
35 | * Regex corresponding to FIRST_ENCAP_REGEXP, but excluding extensions. If FIRST_ENCAP_REGEXP is |
36 | * updated, this one should be as well. |
37 | */ |
38 | private const NON_EXTENSION_ENCAP_REGEXP = |
39 | '#(?:^|\s)(mw:(?:Transclusion|Param|LanguageVariant))(?=$|\s)#D'; |
40 | |
41 | /** |
42 | * Regexp for checking marker metas typeofs representing |
43 | * transclusion markup or template param markup. |
44 | */ |
45 | private const TPL_META_TYPE_REGEXP = '#^mw:(?:Transclusion|Param)(?:/End)?$#D'; |
46 | |
47 | /** |
48 | * Regexp for checking marker metas typeofs representing |
49 | * annotation markup |
50 | */ |
51 | public const ANNOTATION_META_TYPE_REGEXP = '#^mw:(?:Annotation/([\w\d]+))(?:/End)?$#uD'; |
52 | |
53 | /** |
54 | * Check whether a node's data-parsoid object includes |
55 | * an indicator that the original wikitext was a literal |
56 | * HTML element (like table or p) |
57 | */ |
58 | public static function hasLiteralHTMLMarker( DataParsoid $dp ): bool { |
59 | return isset( $dp->stx ) && $dp->stx === 'html'; |
60 | } |
61 | |
62 | /** |
63 | * Run a node through {@link #hasLiteralHTMLMarker}. |
64 | */ |
65 | public static function isLiteralHTMLNode( ?Node $node ): bool { |
66 | return $node instanceof Element && |
67 | self::hasLiteralHTMLMarker( DOMDataUtils::getDataParsoid( $node ) ); |
68 | } |
69 | |
70 | public static function isZeroWidthWikitextElt( Node $node ): bool { |
71 | return isset( Consts::$ZeroWidthWikitextTags[DOMCompat::nodeName( $node )] ) && |
72 | !self::isLiteralHTMLNode( $node ); |
73 | } |
74 | |
75 | /** |
76 | * Is `$node` a block node that is also visible in wikitext? |
77 | * An example of an invisible block node is a `<p>`-tag that |
78 | * Parsoid generated, or a `<ul>`, `<ol>` tag. |
79 | */ |
80 | public static function isBlockNodeWithVisibleWT( Node $node ): bool { |
81 | return DOMUtils::isWikitextBlockNode( $node ) && |
82 | !self::isZeroWidthWikitextElt( $node ); |
83 | } |
84 | |
85 | /** |
86 | * Helper functions to detect when an A-$node uses [[..]]/[..]/... style |
87 | * syntax (for wikilinks, ext links, url links). rel-type is not sufficient |
88 | * anymore since mw:ExtLink is used for all the three link syntaxes. |
89 | */ |
90 | public static function isATagFromWikiLinkSyntax( Element $node ): bool { |
91 | if ( DOMCompat::nodeName( $node ) !== 'a' ) { |
92 | return false; |
93 | } |
94 | |
95 | $dp = DOMDataUtils::getDataParsoid( $node ); |
96 | return DOMUtils::hasRel( $node, 'mw:WikiLink' ) || |
97 | ( isset( $dp->stx ) && $dp->stx !== "url" && $dp->stx !== "magiclink" ); |
98 | } |
99 | |
100 | /** |
101 | * Helper function to detect when an A-node uses ext-link syntax. |
102 | * rel attribute is not sufficient anymore since mw:ExtLink is used for |
103 | * multiple link types |
104 | */ |
105 | public static function isATagFromExtLinkSyntax( Element $node ): bool { |
106 | if ( DOMCompat::nodeName( $node ) !== 'a' ) { |
107 | return false; |
108 | } |
109 | |
110 | $dp = DOMDataUtils::getDataParsoid( $node ); |
111 | return DOMUtils::hasRel( $node, 'mw:ExtLink' ) && |
112 | ( !isset( $dp->stx ) || ( $dp->stx !== "url" && $dp->stx !== "magiclink" ) ); |
113 | } |
114 | |
115 | /** |
116 | * Helper function to detect when an A-node uses url-link syntax. |
117 | * rel attribute is not sufficient anymore since mw:ExtLink is used for |
118 | * multiple link types |
119 | */ |
120 | public static function isATagFromURLLinkSyntax( Element $node ): bool { |
121 | if ( DOMCompat::nodeName( $node ) !== 'a' ) { |
122 | return false; |
123 | } |
124 | |
125 | $dp = DOMDataUtils::getDataParsoid( $node ); |
126 | return DOMUtils::hasRel( $node, 'mw:ExtLink' ) && |
127 | isset( $dp->stx ) && $dp->stx === "url"; |
128 | } |
129 | |
130 | /** |
131 | * Helper function to detect when an A-node uses magic-link syntax. |
132 | * rel attribute is not sufficient anymore since mw:ExtLink is used for |
133 | * multiple link types |
134 | */ |
135 | public static function isATagFromMagicLinkSyntax( Element $node ): bool { |
136 | if ( DOMCompat::nodeName( $node ) !== 'a' ) { |
137 | return false; |
138 | } |
139 | |
140 | $dp = DOMDataUtils::getDataParsoid( $node ); |
141 | return DOMUtils::hasRel( $node, 'mw:ExtLink' ) && |
142 | isset( $dp->stx ) && $dp->stx === 'magiclink'; |
143 | } |
144 | |
145 | /** |
146 | * Check whether a node's typeof indicates that it is a template expansion. |
147 | * |
148 | * @param Element $node |
149 | * @return ?string The matched type, or null if no match. |
150 | */ |
151 | public static function matchTplType( Element $node ): ?string { |
152 | return DOMUtils::matchTypeOf( $node, self::TPL_META_TYPE_REGEXP ); |
153 | } |
154 | |
155 | /** |
156 | * Check whether a typeof indicates that it signifies an |
157 | * expanded attribute. |
158 | */ |
159 | public static function hasExpandedAttrsType( Element $node ): bool { |
160 | return DOMUtils::matchTypeOf( $node, '/^mw:ExpandedAttrs(\/\S+)*$/' ) !== null; |
161 | } |
162 | |
163 | /** |
164 | * Check whether a node is a meta tag that signifies a template expansion. |
165 | */ |
166 | public static function isTplMarkerMeta( Node $node ): bool { |
167 | return DOMUtils::matchNameAndTypeOf( $node, 'meta', self::TPL_META_TYPE_REGEXP ) !== null; |
168 | } |
169 | |
170 | /** |
171 | * Check whether a node is a meta signifying the start of a template expansion. |
172 | */ |
173 | public static function isTplStartMarkerMeta( Node $node ): bool { |
174 | $t = DOMUtils::matchNameAndTypeOf( $node, 'meta', self::TPL_META_TYPE_REGEXP ); |
175 | return $t !== null && !str_ends_with( $t, '/End' ); |
176 | } |
177 | |
178 | /** |
179 | * Check whether a node is a meta signifying the end of a template expansion. |
180 | */ |
181 | public static function isTplEndMarkerMeta( Node $node ): bool { |
182 | $t = DOMUtils::matchNameAndTypeOf( $node, 'meta', self::TPL_META_TYPE_REGEXP ); |
183 | return $t !== null && str_ends_with( $t, '/End' ); |
184 | } |
185 | |
186 | /** |
187 | * Find the first wrapper element of encapsulated content. |
188 | */ |
189 | public static function findFirstEncapsulationWrapperNode( Node $node ): ?Element { |
190 | if ( !self::isEncapsulatedDOMForestRoot( $node ) ) { |
191 | return null; |
192 | } |
193 | '@phan-var Element $node'; // @var ?Element $elt |
194 | $about = DOMCompat::getAttribute( $node, 'about' ); |
195 | $prev = $node; |
196 | do { |
197 | $node = $prev; |
198 | $prev = DiffDOMUtils::previousNonDeletedSibling( $node ); |
199 | } while ( |
200 | $prev instanceof Element && |
201 | DOMCompat::getAttribute( $prev, 'about' ) === $about |
202 | ); |
203 | // NOTE: findFirstEncapsulationWrapperNode can be called by code |
204 | // even before templates have been fully encapsulated everywhere. |
205 | // ProcessTreeBuilderFixups::removeAutoInsertedEmptyTags is the main |
206 | // culprit here and it makes the contract for this helper murky |
207 | // by hiding potential brokenness since this should never return null |
208 | // once all templates have been encapsulated! |
209 | $elt = self::isFirstEncapsulationWrapperNode( $node ) ? $node : null; |
210 | '@phan-var ?Element $elt'; // @var ?Element $elt |
211 | return $elt; |
212 | } |
213 | |
214 | /** |
215 | * This tests whether a DOM node is a new node added during an edit session |
216 | * or an existing node from parsed wikitext. |
217 | * |
218 | * As written, this function can only be used on non-template/extension content |
219 | * or on the top-level nodes of template/extension content. This test will |
220 | * return the wrong results on non-top-level $nodes of template/extension content. |
221 | */ |
222 | public static function isNewElt( Node $node ): bool { |
223 | // We cannot determine newness on text/comment $nodes. |
224 | if ( !( $node instanceof Element ) ) { |
225 | return false; |
226 | } |
227 | |
228 | // For template/extension content, newness should be |
229 | // checked on the encapsulation wrapper $node. |
230 | $node = self::findFirstEncapsulationWrapperNode( $node ) ?? $node; |
231 | return DOMDataUtils::getDataParsoid( $node )->getTempFlag( TempData::IS_NEW ); |
232 | } |
233 | |
234 | /** |
235 | * Check whether a pre is caused by indentation in the original wikitext. |
236 | */ |
237 | public static function isIndentPre( Node $node ): bool { |
238 | return DOMCompat::nodeName( $node ) === "pre" && !self::isLiteralHTMLNode( $node ); |
239 | } |
240 | |
241 | public static function isInlineMedia( Node $node ): bool { |
242 | return DOMCompat::nodeName( $node ) === 'span' && |
243 | self::isGeneratedFigure( $node ); |
244 | } |
245 | |
246 | public static function isGeneratedFigure( Node $node ): bool { |
247 | return DOMUtils::matchTypeOf( $node, '#^mw:File($|/)#D' ) !== null; |
248 | } |
249 | |
250 | /** |
251 | * Find how much offset is necessary for the DSR of an |
252 | * indent-originated pre tag. |
253 | */ |
254 | public static function indentPreDSRCorrection( Node $textNode ): int { |
255 | // NOTE: This assumes a text-node and doesn't check that it is one. |
256 | // |
257 | // FIXME: Doesn't handle text nodes that are not direct children of the pre |
258 | if ( self::isIndentPre( $textNode->parentNode ) ) { |
259 | $numNLs = substr_count( $textNode->nodeValue, "\n" ); |
260 | if ( $textNode->parentNode->lastChild === $textNode ) { |
261 | // We dont want the trailing newline of the last child of the pre |
262 | // to contribute a pre-correction since it doesn't add new content |
263 | // in the pre-node after the text |
264 | if ( str_ends_with( $textNode->nodeValue, "\n" ) ) { |
265 | $numNLs--; |
266 | } |
267 | } |
268 | return $numNLs; |
269 | } else { |
270 | return 0; |
271 | } |
272 | } |
273 | |
274 | /** |
275 | * Check if $node is a root in an encapsulated DOM forest. |
276 | */ |
277 | public static function isEncapsulatedDOMForestRoot( Node $node ): bool { |
278 | $about = $node instanceof Element ? DOMCompat::getAttribute( $node, 'about' ) : null; |
279 | // FIXME: Ensure that our DOM spec clarifies this expectation |
280 | return $about !== null && Utils::isParsoidObjectId( $about ); |
281 | } |
282 | |
283 | /** |
284 | * Does $node represent a redirect link? |
285 | */ |
286 | public static function isRedirectLink( ?Node $node ): bool { |
287 | return $node instanceof Element && |
288 | DOMCompat::nodeName( $node ) === 'link' && |
289 | DOMUtils::matchRel( $node, '#\bmw:PageProp/redirect\b#' ) !== null; |
290 | } |
291 | |
292 | /** |
293 | * Does $node represent a category link? |
294 | */ |
295 | public static function isCategoryLink( ?Node $node ): bool { |
296 | return $node instanceof Element && |
297 | DOMCompat::nodeName( $node ) === 'link' && |
298 | DOMUtils::matchRel( $node, '#\bmw:PageProp/Category\b#' ) !== null; |
299 | } |
300 | |
301 | /** |
302 | * Does $node represent a link that is sol-transparent? |
303 | */ |
304 | public static function isSolTransparentLink( ?Node $node ): bool { |
305 | return $node instanceof Element && |
306 | DOMCompat::nodeName( $node ) === 'link' && |
307 | DOMUtils::matchRel( $node, TokenUtils::SOL_TRANSPARENT_LINK_REGEX ) !== null; |
308 | } |
309 | |
310 | /** |
311 | * Check if '$node' emits wikitext that is sol-transparent in wikitext form. |
312 | * This is a test for wikitext that doesn't introduce line breaks. |
313 | * |
314 | * Comment, whitespace text $nodes, category links, redirect links, behavior |
315 | * switches, and include directives currently satisfy this definition. |
316 | * |
317 | * This should come close to matching TokenUtils.isSolTransparent() |
318 | */ |
319 | public static function emitsSolTransparentSingleLineWT( Node $node ): bool { |
320 | if ( $node instanceof Text ) { |
321 | // NB: We differ here to meet the nl condition. |
322 | return (bool)preg_match( '/^[ \t]*$/D', $node->nodeValue ); |
323 | } elseif ( self::isRenderingTransparentNode( $node ) ) { |
324 | // NB: The only metas in a DOM should be for behavior switches and |
325 | // include directives, other than explicit HTML meta tags. This |
326 | // differs from our counterpart in Util where ref meta tokens |
327 | // haven't been expanded to spans yet. |
328 | return true; |
329 | } else { |
330 | return false; |
331 | } |
332 | } |
333 | |
334 | /** |
335 | * This is the span added to headings to add fallback ids for when legacy |
336 | * and HTML5 ids don't match up. This prevents broken links to legacy ids. |
337 | */ |
338 | public static function isFallbackIdSpan( Node $node ): bool { |
339 | return DOMUtils::hasNameAndTypeOf( $node, 'span', 'mw:FallbackId' ); |
340 | } |
341 | |
342 | /** |
343 | * These are primarily 'metadata'-like $nodes that don't show up in output rendering. |
344 | * - In Parsoid output, they are represented by link/meta tags. |
345 | * - In the PHP parser, they are completely stripped from the input early on. |
346 | * Because of this property, these rendering-transparent $nodes are also |
347 | * SOL-transparent for the purposes of parsing behavior. |
348 | */ |
349 | public static function isRenderingTransparentNode( Node $node ): bool { |
350 | // FIXME: Can we change this entire thing to |
351 | // $node instanceof Comment || |
352 | // DOMUtils::getDataParsoid($node).stx !== 'html' && |
353 | // (DOMCompat::nodeName($node) === 'meta' || DOMCompat::nodeName($node) === 'link') |
354 | // |
355 | return $node instanceof Comment || |
356 | self::isSolTransparentLink( $node ) || ( |
357 | // Catch-all for everything else. |
358 | $node instanceof Element && |
359 | DOMCompat::nodeName( $node ) === 'meta' && |
360 | !self::isMarkerAnnotation( $node ) && |
361 | ( DOMDataUtils::getDataParsoid( $node )->stx ?? '' ) !== 'html' |
362 | ) || self::isFallbackIdSpan( $node ); |
363 | } |
364 | |
365 | /** |
366 | * Is $node nested inside a table tag that uses HTML instead of native |
367 | * wikitext? |
368 | */ |
369 | public static function inHTMLTableTag( Node $node ): bool { |
370 | $p = $node->parentNode; |
371 | while ( DOMUtils::isTableTag( $p ) ) { |
372 | if ( self::isLiteralHTMLNode( $p ) ) { |
373 | return true; |
374 | } elseif ( DOMCompat::nodeName( $p ) === 'table' ) { |
375 | // Don't cross <table> boundaries |
376 | return false; |
377 | } |
378 | $p = $p->parentNode; |
379 | } |
380 | |
381 | return false; |
382 | } |
383 | |
384 | /** |
385 | * Is $node the first wrapper element of encapsulated content? |
386 | */ |
387 | public static function isFirstEncapsulationWrapperNode( Node $node ): bool { |
388 | return DOMUtils::matchTypeOf( $node, self::FIRST_ENCAP_REGEXP ) !== null; |
389 | } |
390 | |
391 | /** |
392 | * Is $node the first wrapper element of extension content? |
393 | */ |
394 | public static function isFirstExtensionWrapperNode( Node $node ): bool { |
395 | return DOMUtils::matchTypeOf( $node, "#mw:Extension/#" ) !== null; |
396 | } |
397 | |
398 | /** |
399 | * Checks whether a first encapsulation wrapper node is encapsulating an extension |
400 | * that outputs MediaWiki Core DOM Spec HTML (https://www.mediawiki.org/wiki/Specs/HTML) |
401 | */ |
402 | public static function isExtensionOutputtingCoreMwDomSpec( Node $node, Env $env ): bool { |
403 | if ( DOMUtils::matchTypeOf( $node, self::NON_EXTENSION_ENCAP_REGEXP ) !== null ) { |
404 | return false; |
405 | } |
406 | $extTagName = self::getExtTagName( $node ); |
407 | $extConfig = $env->getSiteConfig()->getExtTagConfig( $extTagName ); |
408 | $htmlType = $extConfig['options']['outputHasCoreMwDomSpecMarkup'] ?? null; |
409 | return $htmlType === true; |
410 | } |
411 | |
412 | /** |
413 | * Is $node an encapsulation wrapper elt? |
414 | * |
415 | * All root-level $nodes of generated content are considered |
416 | * encapsulation wrappers and share an about-id. |
417 | */ |
418 | public static function isEncapsulationWrapper( Node $node ): bool { |
419 | // True if it has an encapsulation type or while walking backwards |
420 | // over elts with identical about ids, we run into a $node with an |
421 | // encapsulation type. |
422 | return $node instanceof Element && self::findFirstEncapsulationWrapperNode( $node ) !== null; |
423 | } |
424 | |
425 | /** |
426 | * Is $node a DOMFragment wrapper? |
427 | */ |
428 | public static function isDOMFragmentWrapper( Node $node ): bool { |
429 | // See TokenUtils::hasDOMFragmentType |
430 | return DOMUtils::matchTypeOf( $node, '#^mw:DOMFragment(/sealed/\w+)?$#D' ) !== null; |
431 | } |
432 | |
433 | /** |
434 | * Is $node a sealed DOMFragment of a specific type? |
435 | */ |
436 | public static function isSealedFragmentOfType( Node $node, string $type ): bool { |
437 | return DOMUtils::hasTypeOf( $node, "mw:DOMFragment/sealed/$type" ); |
438 | } |
439 | |
440 | /** |
441 | * Is $node a Parsoid-generated <section> tag? |
442 | */ |
443 | public static function isParsoidSectionTag( Node $node ): bool { |
444 | return DOMCompat::nodeName( $node ) === 'section' && |
445 | // @phan-suppress-next-line PhanUndeclaredMethod |
446 | $node->hasAttribute( 'data-mw-section-id' ); |
447 | } |
448 | |
449 | /** |
450 | * Is the $node from extension content? |
451 | * @param Node $node |
452 | * @param ?string $extType If non-null, checks for that specific extension |
453 | * @return bool |
454 | */ |
455 | public static function fromExtensionContent( Node $node, ?string $extType = null ): bool { |
456 | $re = $extType ? "#mw:Extension/$extType#" : "#mw:Extension/\w+#"; |
457 | while ( $node && !DOMUtils::atTheTop( $node ) ) { |
458 | if ( DOMUtils::matchTypeOf( $node, $re ) ) { |
459 | return true; |
460 | } |
461 | $node = $node->parentNode; |
462 | } |
463 | return false; |
464 | } |
465 | |
466 | /** |
467 | * Is $node from encapsulated (template, extension, etc.) content? |
468 | */ |
469 | public static function fromEncapsulatedContent( Node $node ): bool { |
470 | while ( $node && !DOMUtils::atTheTop( $node ) ) { |
471 | if ( self::findFirstEncapsulationWrapperNode( $node ) !== null ) { |
472 | return true; |
473 | } |
474 | $node = $node->parentNode; |
475 | } |
476 | return false; |
477 | } |
478 | |
479 | /** |
480 | * Compute, when possible, the wikitext source for a $node in |
481 | * an environment env. Returns null if the source cannot be |
482 | * extracted. |
483 | */ |
484 | public static function getWTSource( Frame $frame, Element $node ): ?string { |
485 | $dp = DOMDataUtils::getDataParsoid( $node ); |
486 | $dsr = $dp->dsr ?? null; |
487 | // FIXME: We could probably change the null return to '' |
488 | // Just need to verify that code that uses this won't break |
489 | return Utils::isValidDSR( $dsr ) ? |
490 | $dsr->substr( $frame->getSrcText() ) : null; |
491 | } |
492 | |
493 | /** |
494 | * Gets all siblings that follow '$node' that have an 'about' as |
495 | * their about id. |
496 | * |
497 | * This is used to fetch transclusion/extension content by using |
498 | * the about-id as the key. This works because |
499 | * transclusion/extension content is a forest of dom-trees formed |
500 | * by adjacent dom-nodes. This is the contract that template |
501 | * encapsulation, dom-reuse, and VE code all have to abide by. |
502 | * |
503 | * The only exception to this adjacency rule is IEW nodes in |
504 | * fosterable positions (in tables) which are not span-wrapped to |
505 | * prevent them from getting fostered out. |
506 | * |
507 | * @param Node $node |
508 | * @param ?string $about |
509 | * @return Node[] |
510 | */ |
511 | public static function getAboutSiblings( Node $node, ?string $about ): array { |
512 | $nodes = [ $node ]; |
513 | |
514 | if ( $about === null ) { |
515 | return $nodes; |
516 | } |
517 | |
518 | $node = $node->nextSibling; |
519 | while ( $node && ( |
520 | ( $node instanceof Element && DOMCompat::getAttribute( $node, 'about' ) === $about ) || |
521 | ( DOMUtils::isFosterablePosition( $node ) && DOMUtils::isIEW( $node ) ) |
522 | ) ) { |
523 | $nodes[] = $node; |
524 | $node = $node->nextSibling; |
525 | } |
526 | |
527 | // Remove already consumed trailing IEW, if any |
528 | while ( count( $nodes ) > 0 && DOMUtils::isIEW( $nodes[count( $nodes ) - 1] ) ) { |
529 | array_pop( $nodes ); |
530 | } |
531 | |
532 | return $nodes; |
533 | } |
534 | |
535 | /** |
536 | * This function is only intended to be used on encapsulated $nodes |
537 | * (Template/Extension/Param content). |
538 | * |
539 | * Given a '$node' that has an about-id, it is assumed that it is generated |
540 | * by templates or extensions. This function skips over all |
541 | * following content nodes and returns the first non-template node |
542 | * that follows it. |
543 | */ |
544 | public static function skipOverEncapsulatedContent( Node $node ): ?Node { |
545 | $about = $node instanceof Element ? |
546 | DOMCompat::getAttribute( $node, 'about' ) : null; |
547 | if ( $about !== null ) { |
548 | // Guaranteed not to be empty. It will at least include $node. |
549 | $aboutSiblings = self::getAboutSiblings( $node, $about ); |
550 | return end( $aboutSiblings )->nextSibling; |
551 | } else { |
552 | return $node->nextSibling; |
553 | } |
554 | } |
555 | |
556 | /** |
557 | * Comment encoding/decoding. |
558 | * |
559 | * * Some relevant phab tickets: T94055, T70146, T60184, T95039 |
560 | * |
561 | * The wikitext comment rule is very simple: <!-- starts a comment, |
562 | * and --> ends a comment. This means we can have almost anything as the |
563 | * contents of a comment (except the string "-->", but see below), including |
564 | * several things that are not valid in HTML5 comments: |
565 | * |
566 | * * For one, the html5 comment parsing algorithm [0] leniently accepts |
567 | * --!> as a closing comment tag, which differs from the php+tidy combo. |
568 | * |
569 | * * If the comment's data matches /^-?>/, html5 will end the comment. |
570 | * For example, <!-->stuff<--> breaks up as |
571 | * <!--> (the comment) followed by, stuff<--> (as text). |
572 | * |
573 | * * Finally, comment data shouldn't contain two consecutive hyphen-minus |
574 | * characters (--), nor end in a hyphen-minus character (/-$/) as defined |
575 | * in the spec [1]. |
576 | * |
577 | * We work around all these problems by using HTML entity encoding inside |
578 | * the comment body. The characters -, >, and & must be encoded in order |
579 | * to prevent premature termination of the comment by one of the cases |
580 | * above. Encoding other characters is optional; all entities will be |
581 | * decoded during wikitext serialization. |
582 | * |
583 | * In order to allow *arbitrary* content inside a wikitext comment, |
584 | * including the forbidden string "-->" we also do some minimal entity |
585 | * decoding on the wikitext. We are also limited by our inability |
586 | * to encode DSR attributes on the comment $node, so our wikitext entity |
587 | * decoding must be 1-to-1: that is, there must be a unique "decoded" |
588 | * string for every wikitext sequence, and for every decoded string there |
589 | * must be a unique wikitext which creates it. |
590 | * |
591 | * The basic idea here is to replace every string ab*c with the string with |
592 | * one more b in it. This creates a string with no instance of "ac", |
593 | * so you can use 'ac' to encode one more code point. In this case |
594 | * a is "--&", "b" is "amp;", and "c" is "gt;" and we use ac to |
595 | * encode "-->" (which is otherwise unspeakable in wikitext). |
596 | * |
597 | * Note that any user content which does not match the regular |
598 | * expression /--(>|&(amp;)*gt;)/ is unchanged in its wikitext |
599 | * representation, as shown in the first two examples below. |
600 | * |
601 | * User-authored comment text Wikitext HTML5 DOM |
602 | * -------------------------- ------------- ---------------------- |
603 | * & - > & - > & + > |
604 | * Use > here Use > here Use &gt; here |
605 | * --> --> ++> |
606 | * --> --&gt; ++&gt; |
607 | * --&gt; --&amp;gt; ++&amp;gt; |
608 | * |
609 | * [0] http://www.w3.org/TR/html5/syntax.html#comment-start-state |
610 | * [1] http://www.w3.org/TR/html5/syntax.html#comments |
611 | * |
612 | * Map a wikitext-escaped comment to an HTML DOM-escaped comment. |
613 | * |
614 | * @param string $comment Wikitext-escaped comment. |
615 | * @return string DOM-escaped comment. |
616 | */ |
617 | public static function encodeComment( string $comment ): string { |
618 | // Undo wikitext escaping to obtain "true value" of comment. |
619 | $trueValue = preg_replace_callback( '/--&(amp;)*gt;/', static function ( $m ) { |
620 | return Utils::decodeWtEntities( $m[0] ); |
621 | }, $comment ); |
622 | |
623 | // Now encode '-', '>' and '&' in the "true value" as HTML entities, |
624 | // so that they can be safely embedded in an HTML comment. |
625 | // This part doesn't have to map strings 1-to-1. |
626 | return preg_replace_callback( '/[->&]/', static function ( $m ) { |
627 | return Utils::entityEncodeAll( $m[0] ); |
628 | }, $trueValue ); |
629 | } |
630 | |
631 | /** |
632 | * Map an HTML DOM-escaped comment to a wikitext-escaped comment. |
633 | * @param string $comment DOM-escaped comment. |
634 | * @return string Wikitext-escaped comment. |
635 | */ |
636 | public static function decodeComment( string $comment ): string { |
637 | // Undo HTML entity escaping to obtain "true value" of comment. |
638 | $trueValue = Utils::decodeWtEntities( $comment ); |
639 | |
640 | // ok, now encode this "true value" of the comment in such a way |
641 | // that the string "-->" never shows up. (See above.) |
642 | return preg_replace_callback( '/--(&(amp;)*gt;|>)/', static function ( $m ) { |
643 | $s = $m[0]; |
644 | return $s === '-->' ? '-->' : '--&' . substr( $s, 3 ); |
645 | }, $trueValue ); |
646 | } |
647 | |
648 | /** |
649 | * Utility function: we often need to know the wikitext DSR length for |
650 | * an HTML DOM comment value. |
651 | * |
652 | * @param Comment|CommentTk $node A comment node containing a DOM-escaped comment. |
653 | * @return int The wikitext length in UTF-8 bytes necessary to encode this |
654 | * comment, including 7 characters for the `<!--` and `-->` delimiters. |
655 | */ |
656 | public static function decodedCommentLength( $node ): int { |
657 | // Add 7 for the "<!--" and "-->" delimiters in wikitext. |
658 | $syntaxLen = 7; |
659 | if ( $node instanceof Comment ) { |
660 | $value = $node->nodeValue; |
661 | if ( $node->previousSibling && |
662 | DOMUtils::hasTypeOf( $node->previousSibling, "mw:Placeholder/UnclosedComment" ) |
663 | ) { |
664 | $syntaxLen = 4; |
665 | } |
666 | } elseif ( $node instanceof CommentTk ) { |
667 | if ( isset( $node->dataParsoid->unclosedComment ) ) { |
668 | $syntaxLen = 4; |
669 | } |
670 | $value = $node->value; |
671 | } else { |
672 | throw new UnreachableException( 'Should not be here!' ); |
673 | } |
674 | return strlen( self::decodeComment( $value ) ) + $syntaxLen; |
675 | } |
676 | |
677 | public static function getExtTagName( Node $node ): ?string { |
678 | $match = DOMUtils::matchTypeOf( $node, '#^mw:Extension/(.+?)$#D' ); |
679 | return $match ? mb_strtolower( substr( $match, strlen( 'mw:Extension/' ) ) ) : null; |
680 | } |
681 | |
682 | public static function getPFragmentHandlerKey( Node $node ): ?string { |
683 | // TODO (T390342): use ::getExtTagName() to look up extension tag |
684 | // PFragment handlers |
685 | $match = DOMUtils::matchTypeOf( $node, '#^mw:ParserFunction/(.+?)$#D' ); |
686 | return $match ? substr( $match, strlen( 'mw:ParserFunction/' ) ) : null; |
687 | } |
688 | |
689 | public static function getNativeExt( Env $env, Node $node ): ?ExtensionTagHandler { |
690 | $extTagName = self::getExtTagName( $node ); |
691 | return $extTagName ? $env->getSiteConfig()->getExtTagImpl( $extTagName ) : null; |
692 | } |
693 | |
694 | /** |
695 | * Is this an include directive? |
696 | */ |
697 | public static function isIncludeTag( string $name ): bool { |
698 | return $name === 'includeonly' || $name === 'noinclude' || $name === 'onlyinclude'; |
699 | } |
700 | |
701 | public static function isAnnotationTag( Env $env, string $name ): bool { |
702 | $tagName = mb_strtolower( $name ); |
703 | $siteConfig = $env->getSiteConfig(); |
704 | $isAnnotationTag = $siteConfig->isAnnotationTag( $tagName ); |
705 | if ( !$isAnnotationTag ) { |
706 | // avoid crashing on <tvar|name> even if we don't support that syntax explicitly |
707 | $pipepos = strpos( $tagName, '|' ); |
708 | if ( $pipepos ) { |
709 | $strBeforePipe = substr( $tagName, 0, $pipepos ); |
710 | $isAnnotationTag = $siteConfig->isAnnotationTag( $strBeforePipe ); |
711 | } |
712 | } |
713 | return $isAnnotationTag; |
714 | } |
715 | |
716 | /** |
717 | * Check if tag is annotation or extension directive |
718 | * Adapted from similar grammar function |
719 | */ |
720 | public static function isAnnOrExtTag( Env $env, string $name ): bool { |
721 | $tagName = mb_strtolower( $name ); |
722 | $extTags = $env->getSiteConfig()->getExtensionTagNameMap(); |
723 | return isset( $extTags[$tagName] ) || |
724 | self::isIncludeTag( $tagName ) || |
725 | self::isAnnotationTag( $env, $tagName ); |
726 | } |
727 | |
728 | /** |
729 | * Creates a DocumentFragment containing a single span with type "mw:I18n". The created span |
730 | * should be filled in with setDataNodeI18n to be valid. |
731 | * @param Document $doc |
732 | * @return DocumentFragment |
733 | * @throws DOMException |
734 | */ |
735 | public static function createEmptyLocalizationFragment( Document $doc ): DocumentFragment { |
736 | $frag = $doc->createDocumentFragment(); |
737 | $span = $doc->createElement( 'span' ); |
738 | DOMUtils::addTypeOf( $span, 'mw:I18n' ); |
739 | $frag->appendChild( $span ); |
740 | return $frag; |
741 | } |
742 | |
743 | /** |
744 | * Creates an internationalization (i18n) message that will be localized into the page content |
745 | * language. The returned DocumentFragment contains, as a single child, a span |
746 | * element with the appropriate information for later localization. |
747 | * @param Document $doc |
748 | * @param string $key message key for the message to be localized |
749 | * @param ?array $params parameters for localization |
750 | * @return DocumentFragment |
751 | * @throws DOMException |
752 | */ |
753 | public static function createPageContentI18nFragment( |
754 | Document $doc, string $key, ?array $params = null |
755 | ): DocumentFragment { |
756 | $frag = self::createEmptyLocalizationFragment( $doc ); |
757 | $i18n = I18nInfo::createPageContentI18n( $key, $params ); |
758 | DOMDataUtils::setDataNodeI18n( $frag->firstChild, $i18n ); |
759 | return $frag; |
760 | } |
761 | |
762 | /** |
763 | * Creates an internationalization (i18n) message that will be localized into the user |
764 | * interface language. The returned DocumentFragment contains, as a single child, a span |
765 | * element with the appropriate information for later localization. |
766 | * @param Document $doc |
767 | * @param string $key message key for the message to be localized |
768 | * @param ?array $params parameters for localization |
769 | * @return DocumentFragment |
770 | * @throws DOMException |
771 | */ |
772 | public static function createInterfaceI18nFragment( |
773 | Document $doc, string $key, ?array $params = null |
774 | ): DocumentFragment { |
775 | $frag = self::createEmptyLocalizationFragment( $doc ); |
776 | $i18n = I18nInfo::createInterfaceI18n( $key, $params ); |
777 | DOMDataUtils::setDataNodeI18n( $frag->firstChild, $i18n ); |
778 | return $frag; |
779 | } |
780 | |
781 | /** |
782 | * Creates an internationalization (i18n) message that will be localized into an arbitrary |
783 | * language. The returned DocumentFragment contains, as a single child, a span |
784 | * element with the appropriate information for later localization. |
785 | * The use of this method is discouraged; use ::createPageContentI18nFragment(...) and |
786 | * ::createInterfaceI18nFragment(...) where possible rather than, respectively, |
787 | * ::createLangI18nFragment(..., $wgContLang, ...) and |
788 | * ::createLangI18nFragment(..., $wgLang,...). |
789 | * @param Document $doc |
790 | * @param Bcp47Code $lang language for the localization |
791 | * @param string $key message key for the message to be localized |
792 | * @param ?array $params parameters for localization |
793 | * @return DocumentFragment |
794 | * @throws DOMException |
795 | */ |
796 | public static function createLangI18nFragment( |
797 | Document $doc, Bcp47Code $lang, string $key, ?array $params = null |
798 | ): DocumentFragment { |
799 | $frag = self::createEmptyLocalizationFragment( $doc ); |
800 | $i18n = I18nInfo::createLangI18n( $lang, $key, $params ); |
801 | DOMDataUtils::setDataNodeI18n( $frag->firstChild, $i18n ); |
802 | return $frag; |
803 | } |
804 | |
805 | /** |
806 | * Adds to $element the internationalization information needed for the attribute $name to be |
807 | * localized in a later pass into the page content language. |
808 | * @param Element $element element on which to add internationalization information |
809 | * @param string $name name of the attribute whose value will be localized |
810 | * @param string $key message key used for the attribute value localization |
811 | * @param ?array $params parameters for localization |
812 | */ |
813 | public static function addPageContentI18nAttribute( |
814 | Element $element, string $name, string $key, ?array $params = null |
815 | ): void { |
816 | $i18n = I18nInfo::createPageContentI18n( $key, $params ); |
817 | DOMUtils::addTypeOf( $element, 'mw:LocalizedAttrs' ); |
818 | DOMDataUtils::setDataAttrI18n( $element, $name, $i18n ); |
819 | } |
820 | |
821 | /** Adds to $element the internationalization information needed for the attribute $name to be |
822 | * localized in a later pass into the user interface language. |
823 | * @param Element $element element on which to add internationalization information |
824 | * @param string $name name of the attribute whose value will be localized |
825 | * @param string $key message key used for the attribute value localization |
826 | * @param ?array $params parameters for localization |
827 | */ |
828 | public static function addInterfaceI18nAttribute( |
829 | Element $element, string $name, string $key, ?array $params = null |
830 | ): void { |
831 | $i18n = I18nInfo::createInterfaceI18n( $key, $params ); |
832 | DOMUtils::addTypeOf( $element, 'mw:LocalizedAttrs' ); |
833 | DOMDataUtils::setDataAttrI18n( $element, $name, $i18n ); |
834 | } |
835 | |
836 | /** |
837 | * Adds to $element the internationalization information needed for the attribute $name to be |
838 | * localized in a later pass into the provided language. |
839 | * The use of this method is discouraged; ; use ::addPageContentI18nAttribute(...) and |
840 | * ::addInterfaceI18nAttribute(...) where possible rather than, respectively, |
841 | * ::addLangI18nAttribute(..., $wgContLang, ...) and ::addLangI18nAttribute(..., $wgLang, ...). |
842 | * @param Element $element element on which to add internationalization information |
843 | * @param Bcp47Code $lang language in which the message will be localized |
844 | * @param string $name name of the attribute whose value will be localized |
845 | * @param string $key message key used for the attribute value localization |
846 | * @param ?array $params parameters for localization |
847 | */ |
848 | public static function addLangI18nAttribute( |
849 | Element $element, Bcp47Code $lang, string $name, string $key, ?array $params = null |
850 | ): void { |
851 | $i18n = I18nInfo::createLangI18n( $lang, $key, $params ); |
852 | DOMUtils::addTypeOf( $element, 'mw:LocalizedAttrs' ); |
853 | DOMDataUtils::setDataAttrI18n( $element, $name, $i18n ); |
854 | } |
855 | |
856 | /** Check whether a node is an annotation meta; if yes, returns its type |
857 | */ |
858 | public static function matchAnnotationMeta( Node $node ): ?string { |
859 | return DOMUtils::matchNameAndTypeOf( $node, 'meta', self::ANNOTATION_META_TYPE_REGEXP ); |
860 | } |
861 | |
862 | /** |
863 | * Extract the annotation type, excluding potential "/End" suffix; returns null if not a valid |
864 | * annotation meta. &$isStart is set to true if the annotation is a start tag, false otherwise. |
865 | * |
866 | * @param Node $node |
867 | * @param bool &$isStart |
868 | * @return ?string The matched type, or null if no match. |
869 | */ |
870 | public static function extractAnnotationType( Node $node, bool &$isStart = false ): ?string { |
871 | $t = DOMUtils::matchTypeOf( $node, self::ANNOTATION_META_TYPE_REGEXP ); |
872 | if ( $t !== null && preg_match( self::ANNOTATION_META_TYPE_REGEXP, $t, $matches ) ) { |
873 | $isStart = !str_ends_with( $t, '/End' ); |
874 | return $matches[1]; |
875 | } |
876 | return null; |
877 | } |
878 | |
879 | /** |
880 | * Check whether a node is a meta signifying the start of an annotated part of the DOM |
881 | */ |
882 | public static function isAnnotationStartMarkerMeta( Node $node ): bool { |
883 | if ( !$node instanceof Element || DOMCompat::nodeName( $node ) !== 'meta' ) { |
884 | return false; |
885 | } |
886 | $isStart = false; |
887 | $t = self::extractAnnotationType( $node, $isStart ); |
888 | return $t !== null && $isStart; |
889 | } |
890 | |
891 | /** |
892 | * Check whether a node is a meta signifying the end of an annotated part of the DOM |
893 | */ |
894 | public static function isAnnotationEndMarkerMeta( Node $node ): bool { |
895 | if ( !$node instanceof Element || DOMCompat::nodeName( $node ) !== 'meta' ) { |
896 | return false; |
897 | } |
898 | $isStart = false; |
899 | $t = self::extractAnnotationType( $node, $isStart ); |
900 | return $t !== null && !$isStart; |
901 | } |
902 | |
903 | /** |
904 | * Check whether the meta tag was moved from its initial position |
905 | */ |
906 | public static function isMovedMetaTag( Node $node ): bool { |
907 | if ( $node instanceof Element && self::matchAnnotationMeta( $node ) !== null ) { |
908 | $parsoidData = DOMDataUtils::getDataParsoid( $node ); |
909 | if ( isset( $parsoidData->wasMoved ) ) { |
910 | return $parsoidData->wasMoved; |
911 | } |
912 | } |
913 | return false; |
914 | } |
915 | |
916 | /** Returns true if a node is a (start or end) annotation meta tag */ |
917 | public static function isMarkerAnnotation( ?Node $n ): bool { |
918 | return $n !== null && self::matchAnnotationMeta( $n ) !== null; |
919 | } |
920 | |
921 | /** Extracts the media format from the attribute string */ |
922 | public static function getMediaFormat( Element $node ): string { |
923 | $mediaType = DOMUtils::matchTypeOf( $node, '#^mw:File(/|$)#' ); |
924 | $parts = explode( '/', $mediaType ?? '' ); |
925 | return $parts[1] ?? ''; |
926 | } |
927 | |
928 | public static function hasVisibleCaption( Element $node ): bool { |
929 | $format = self::getMediaFormat( $node ); |
930 | return in_array( |
931 | $format, [ 'Thumb', /* 'Manualthumb', FIXME(T305759) */ 'Frame' ], true |
932 | ); |
933 | } |
934 | |
935 | /** |
936 | * Ref dom post-processing happens after adding media info, so the |
937 | * linkbacks aren't available in the textContent added to the alt. |
938 | * However, when serializing, they are in the caption elements. So, this |
939 | * special handler drops the linkbacks for the purpose of comparison. |
940 | */ |
941 | public static function textContentFromCaption( Node $node ): string { |
942 | $content = ''; |
943 | $c = $node->firstChild; |
944 | while ( $c ) { |
945 | if ( $c instanceof Text ) { |
946 | $content .= $c->nodeValue; |
947 | } elseif ( |
948 | $c instanceof Element && |
949 | !DOMUtils::isMetaDataTag( $c ) && |
950 | !DOMUtils::hasTypeOf( $c, "mw:Extension/ref" ) |
951 | ) { |
952 | $content .= self::textContentFromCaption( $c ); |
953 | } |
954 | $c = $c->nextSibling; |
955 | } |
956 | return $content; |
957 | } |
958 | |
959 | } |