Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
17.93% |
45 / 251 |
|
14.52% |
9 / 62 |
CRAP | |
0.00% |
0 / 1 |
WTUtils | |
17.93% |
45 / 251 |
|
14.52% |
9 / 62 |
14850.76 | |
0.00% |
0 / 1 |
hasLiteralHTMLMarker | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
6 | |||
isLiteralHTMLNode | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
6 | |||
isZeroWidthWikitextElt | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
6 | |||
isBlockNodeWithVisibleWT | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
6 | |||
isATagFromWikiLinkSyntax | |
0.00% |
0 / 5 |
|
0.00% |
0 / 1 |
30 | |||
isATagFromExtLinkSyntax | |
0.00% |
0 / 5 |
|
0.00% |
0 / 1 |
30 | |||
isATagFromURLLinkSyntax | |
0.00% |
0 / 5 |
|
0.00% |
0 / 1 |
20 | |||
isATagFromMagicLinkSyntax | |
0.00% |
0 / 5 |
|
0.00% |
0 / 1 |
20 | |||
matchTplType | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
hasExpandedAttrsType | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
isTplMarkerMeta | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
isTplStartMarkerMeta | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
6 | |||
isTplEndMarkerMeta | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
6 | |||
findFirstEncapsulationWrapperNode | |
0.00% |
0 / 11 |
|
0.00% |
0 / 1 |
20 | |||
isNewElt | |
0.00% |
0 / 4 |
|
0.00% |
0 / 1 |
6 | |||
isIndentPre | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
6 | |||
isInlineMedia | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
6 | |||
isGeneratedFigure | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
indentPreDSRCorrection | |
0.00% |
0 / 7 |
|
0.00% |
0 / 1 |
20 | |||
isEncapsulatedDOMForestRoot | |
0.00% |
0 / 5 |
|
0.00% |
0 / 1 |
12 | |||
isRedirectLink | |
0.00% |
0 / 3 |
|
0.00% |
0 / 1 |
12 | |||
isCategoryLink | |
0.00% |
0 / 3 |
|
0.00% |
0 / 1 |
12 | |||
isSolTransparentLink | |
0.00% |
0 / 3 |
|
0.00% |
0 / 1 |
12 | |||
emitsSolTransparentSingleLineWT | |
0.00% |
0 / 5 |
|
0.00% |
0 / 1 |
12 | |||
isFallbackIdSpan | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
isRenderingTransparentNode | |
0.00% |
0 / 7 |
|
0.00% |
0 / 1 |
56 | |||
inHTMLTableTag | |
0.00% |
0 / 8 |
|
0.00% |
0 / 1 |
20 | |||
isFirstEncapsulationWrapperNode | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
isFirstExtensionWrapperNode | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
isExtensionOutputtingCoreMwDomSpec | |
0.00% |
0 / 6 |
|
0.00% |
0 / 1 |
6 | |||
isEncapsulationWrapper | |
0.00% |
0 / 3 |
|
0.00% |
0 / 1 |
6 | |||
isDOMFragmentWrapper | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
isSealedFragmentOfType | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
isParsoidSectionTag | |
0.00% |
0 / 3 |
|
0.00% |
0 / 1 |
12 | |||
fromExtensionContent | |
100.00% |
6 / 6 |
|
100.00% |
1 / 1 |
5 | |||
fromEncapsulatedContent | |
0.00% |
0 / 5 |
|
0.00% |
0 / 1 |
20 | |||
getWTSource | |
0.00% |
0 / 4 |
|
0.00% |
0 / 1 |
6 | |||
getAboutSiblings | |
0.00% |
0 / 13 |
|
0.00% |
0 / 1 |
90 | |||
skipOverEncapsulatedContent | |
0.00% |
0 / 6 |
|
0.00% |
0 / 1 |
12 | |||
encodeComment | |
100.00% |
6 / 6 |
|
100.00% |
1 / 1 |
1 | |||
decodeComment | |
100.00% |
5 / 5 |
|
100.00% |
1 / 1 |
2 | |||
decodedCommentLength | |
58.33% |
7 / 12 |
|
0.00% |
0 / 1 |
8.60 | |||
getExtTagName | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
6 | |||
getNativeExt | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
6 | |||
isIncludeTag | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
12 | |||
isAnnOrExtTag | |
0.00% |
0 / 12 |
|
0.00% |
0 / 1 |
30 | |||
createEmptyLocalizationFragment | |
0.00% |
0 / 5 |
|
0.00% |
0 / 1 |
2 | |||
createPageContentI18nFragment | |
100.00% |
4 / 4 |
|
100.00% |
1 / 1 |
1 | |||
createInterfaceI18nFragment | |
100.00% |
4 / 4 |
|
100.00% |
1 / 1 |
1 | |||
createLangI18nFragment | |
100.00% |
4 / 4 |
|
100.00% |
1 / 1 |
1 | |||
addPageContentI18nAttribute | |
100.00% |
3 / 3 |
|
100.00% |
1 / 1 |
1 | |||
addInterfaceI18nAttribute | |
100.00% |
3 / 3 |
|
100.00% |
1 / 1 |
1 | |||
addLangI18nAttribute | |
100.00% |
3 / 3 |
|
100.00% |
1 / 1 |
1 | |||
matchAnnotationMeta | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
extractAnnotationType | |
0.00% |
0 / 5 |
|
0.00% |
0 / 1 |
12 | |||
isAnnotationStartMarkerMeta | |
0.00% |
0 / 5 |
|
0.00% |
0 / 1 |
20 | |||
isAnnotationEndMarkerMeta | |
0.00% |
0 / 5 |
|
0.00% |
0 / 1 |
20 | |||
isMovedMetaTag | |
0.00% |
0 / 5 |
|
0.00% |
0 / 1 |
20 | |||
isMarkerAnnotation | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
6 | |||
getMediaFormat | |
0.00% |
0 / 3 |
|
0.00% |
0 / 1 |
2 | |||
hasVisibleCaption | |
0.00% |
0 / 4 |
|
0.00% |
0 / 1 |
2 | |||
textContentFromCaption | |
0.00% |
0 / 11 |
|
0.00% |
0 / 1 |
42 |
1 | <?php |
2 | declare( strict_types = 1 ); |
3 | // Suppress UnusedPluginSuppression because |
4 | // Phan on PHP 7.4 and PHP 8.1 need different suppressions |
5 | // @phan-file-suppress UnusedPluginSuppression,UnusedPluginFileSuppression |
6 | |
7 | namespace Wikimedia\Parsoid\Utils; |
8 | |
9 | use DOMException; |
10 | use Wikimedia\Assert\UnreachableException; |
11 | use Wikimedia\Bcp47Code\Bcp47Code; |
12 | use Wikimedia\Parsoid\Config\Env; |
13 | use Wikimedia\Parsoid\DOM\Comment; |
14 | use Wikimedia\Parsoid\DOM\Document; |
15 | use Wikimedia\Parsoid\DOM\DocumentFragment; |
16 | use Wikimedia\Parsoid\DOM\Element; |
17 | use Wikimedia\Parsoid\DOM\Node; |
18 | use Wikimedia\Parsoid\DOM\Text; |
19 | use Wikimedia\Parsoid\Ext\ExtensionTagHandler; |
20 | use Wikimedia\Parsoid\NodeData\DataParsoid; |
21 | use Wikimedia\Parsoid\NodeData\I18nInfo; |
22 | use Wikimedia\Parsoid\NodeData\TempData; |
23 | use Wikimedia\Parsoid\Tokens\CommentTk; |
24 | use Wikimedia\Parsoid\Wikitext\Consts; |
25 | use Wikimedia\Parsoid\Wt2Html\Frame; |
26 | |
27 | /** |
28 | * These utilites pertain to querying / extracting / modifying wikitext information from the DOM. |
29 | */ |
30 | class WTUtils { |
31 | private const FIRST_ENCAP_REGEXP = |
32 | '#(?:^|\s)(mw:(?:Transclusion|Param|LanguageVariant|Extension(/\S+)))(?=$|\s)#D'; |
33 | |
34 | /** |
35 | * Regex corresponding to FIRST_ENCAP_REGEXP, but excluding extensions. If FIRST_ENCAP_REGEXP is |
36 | * updated, this one should be as well. |
37 | */ |
38 | private const NON_EXTENSION_ENCAP_REGEXP = |
39 | '#(?:^|\s)(mw:(?:Transclusion|Param|LanguageVariant))(?=$|\s)#D'; |
40 | |
41 | /** |
42 | * Regexp for checking marker metas typeofs representing |
43 | * transclusion markup or template param markup. |
44 | */ |
45 | private const TPL_META_TYPE_REGEXP = '#^mw:(?:Transclusion|Param)(?:/End)?$#D'; |
46 | |
47 | /** |
48 | * Regexp for checking marker metas typeofs representing |
49 | * annotation markup |
50 | */ |
51 | public const ANNOTATION_META_TYPE_REGEXP = '#^mw:(?:Annotation/([\w\d]+))(?:/End)?$#uD'; |
52 | |
53 | /** |
54 | * Check whether a node's data-parsoid object includes |
55 | * an indicator that the original wikitext was a literal |
56 | * HTML element (like table or p) |
57 | * |
58 | * @param DataParsoid $dp |
59 | * @return bool |
60 | */ |
61 | public static function hasLiteralHTMLMarker( DataParsoid $dp ): bool { |
62 | return isset( $dp->stx ) && $dp->stx === 'html'; |
63 | } |
64 | |
65 | /** |
66 | * Run a node through {@link #hasLiteralHTMLMarker}. |
67 | * @param ?Node $node |
68 | * @return bool |
69 | */ |
70 | public static function isLiteralHTMLNode( ?Node $node ): bool { |
71 | return $node instanceof Element && |
72 | self::hasLiteralHTMLMarker( DOMDataUtils::getDataParsoid( $node ) ); |
73 | } |
74 | |
75 | /** |
76 | * @param Node $node |
77 | * @return bool |
78 | */ |
79 | public static function isZeroWidthWikitextElt( Node $node ): bool { |
80 | return isset( Consts::$ZeroWidthWikitextTags[DOMCompat::nodeName( $node )] ) && |
81 | !self::isLiteralHTMLNode( $node ); |
82 | } |
83 | |
84 | /** |
85 | * Is `$node` a block node that is also visible in wikitext? |
86 | * An example of an invisible block node is a `<p>`-tag that |
87 | * Parsoid generated, or a `<ul>`, `<ol>` tag. |
88 | * |
89 | * @param Node $node |
90 | * @return bool |
91 | */ |
92 | public static function isBlockNodeWithVisibleWT( Node $node ): bool { |
93 | return DOMUtils::isWikitextBlockNode( $node ) && |
94 | !self::isZeroWidthWikitextElt( $node ); |
95 | } |
96 | |
97 | /** |
98 | * Helper functions to detect when an A-$node uses [[..]]/[..]/... style |
99 | * syntax (for wikilinks, ext links, url links). rel-type is not sufficient |
100 | * anymore since mw:ExtLink is used for all the three link syntaxes. |
101 | * |
102 | * @param Element $node |
103 | * @return bool |
104 | */ |
105 | public static function isATagFromWikiLinkSyntax( Element $node ): bool { |
106 | if ( DOMCompat::nodeName( $node ) !== 'a' ) { |
107 | return false; |
108 | } |
109 | |
110 | $dp = DOMDataUtils::getDataParsoid( $node ); |
111 | return DOMUtils::hasRel( $node, 'mw:WikiLink' ) || |
112 | ( isset( $dp->stx ) && $dp->stx !== "url" && $dp->stx !== "magiclink" ); |
113 | } |
114 | |
115 | /** |
116 | * Helper function to detect when an A-node uses ext-link syntax. |
117 | * rel attribute is not sufficient anymore since mw:ExtLink is used for |
118 | * multiple link types |
119 | * |
120 | * @param Element $node |
121 | * @return bool |
122 | */ |
123 | public static function isATagFromExtLinkSyntax( Element $node ): bool { |
124 | if ( DOMCompat::nodeName( $node ) !== 'a' ) { |
125 | return false; |
126 | } |
127 | |
128 | $dp = DOMDataUtils::getDataParsoid( $node ); |
129 | return DOMUtils::hasRel( $node, 'mw:ExtLink' ) && |
130 | ( !isset( $dp->stx ) || ( $dp->stx !== "url" && $dp->stx !== "magiclink" ) ); |
131 | } |
132 | |
133 | /** |
134 | * Helper function to detect when an A-node uses url-link syntax. |
135 | * rel attribute is not sufficient anymore since mw:ExtLink is used for |
136 | * multiple link types |
137 | * |
138 | * @param Element $node |
139 | * @return bool |
140 | */ |
141 | public static function isATagFromURLLinkSyntax( Element $node ): bool { |
142 | if ( DOMCompat::nodeName( $node ) !== 'a' ) { |
143 | return false; |
144 | } |
145 | |
146 | $dp = DOMDataUtils::getDataParsoid( $node ); |
147 | return DOMUtils::hasRel( $node, 'mw:ExtLink' ) && |
148 | isset( $dp->stx ) && $dp->stx === "url"; |
149 | } |
150 | |
151 | /** |
152 | * Helper function to detect when an A-node uses magic-link syntax. |
153 | * rel attribute is not sufficient anymore since mw:ExtLink is used for |
154 | * multiple link types |
155 | * |
156 | * @param Element $node |
157 | * @return bool |
158 | */ |
159 | public static function isATagFromMagicLinkSyntax( Element $node ): bool { |
160 | if ( DOMCompat::nodeName( $node ) !== 'a' ) { |
161 | return false; |
162 | } |
163 | |
164 | $dp = DOMDataUtils::getDataParsoid( $node ); |
165 | return DOMUtils::hasRel( $node, 'mw:ExtLink' ) && |
166 | isset( $dp->stx ) && $dp->stx === 'magiclink'; |
167 | } |
168 | |
169 | /** |
170 | * Check whether a node's typeof indicates that it is a template expansion. |
171 | * |
172 | * @param Element $node |
173 | * @return ?string The matched type, or null if no match. |
174 | */ |
175 | public static function matchTplType( Element $node ): ?string { |
176 | return DOMUtils::matchTypeOf( $node, self::TPL_META_TYPE_REGEXP ); |
177 | } |
178 | |
179 | /** |
180 | * Check whether a typeof indicates that it signifies an |
181 | * expanded attribute. |
182 | * |
183 | * @param Element $node |
184 | * @return bool |
185 | */ |
186 | public static function hasExpandedAttrsType( Element $node ): bool { |
187 | return DOMUtils::matchTypeOf( $node, '/^mw:ExpandedAttrs(\/\S+)*$/' ) !== null; |
188 | } |
189 | |
190 | /** |
191 | * Check whether a node is a meta tag that signifies a template expansion. |
192 | * |
193 | * @param Node $node |
194 | * @return bool |
195 | */ |
196 | public static function isTplMarkerMeta( Node $node ): bool { |
197 | return DOMUtils::matchNameAndTypeOf( $node, 'meta', self::TPL_META_TYPE_REGEXP ) !== null; |
198 | } |
199 | |
200 | /** |
201 | * Check whether a node is a meta signifying the start of a template expansion. |
202 | * |
203 | * @param Node $node |
204 | * @return bool |
205 | */ |
206 | public static function isTplStartMarkerMeta( Node $node ): bool { |
207 | $t = DOMUtils::matchNameAndTypeOf( $node, 'meta', self::TPL_META_TYPE_REGEXP ); |
208 | return $t !== null && !str_ends_with( $t, '/End' ); |
209 | } |
210 | |
211 | /** |
212 | * Check whether a node is a meta signifying the end of a template expansion. |
213 | * |
214 | * @param Node $node |
215 | * @return bool |
216 | */ |
217 | public static function isTplEndMarkerMeta( Node $node ): bool { |
218 | $t = DOMUtils::matchNameAndTypeOf( $node, 'meta', self::TPL_META_TYPE_REGEXP ); |
219 | return $t !== null && str_ends_with( $t, '/End' ); |
220 | } |
221 | |
222 | /** |
223 | * Find the first wrapper element of encapsulated content. |
224 | * @param Node $node |
225 | * @return Element|null |
226 | */ |
227 | public static function findFirstEncapsulationWrapperNode( Node $node ): ?Element { |
228 | if ( !self::isEncapsulatedDOMForestRoot( $node ) ) { |
229 | return null; |
230 | } |
231 | /** @var Element $node */ |
232 | DOMUtils::assertElt( $node ); |
233 | |
234 | $about = DOMCompat::getAttribute( $node, 'about' ); |
235 | $prev = $node; |
236 | do { |
237 | $node = $prev; |
238 | $prev = DiffDOMUtils::previousNonDeletedSibling( $node ); |
239 | } while ( |
240 | $prev instanceof Element && |
241 | DOMCompat::getAttribute( $prev, 'about' ) === $about |
242 | ); |
243 | // NOTE: findFirstEncapsulationWrapperNode can be called by code |
244 | // even before templates have been fully encapsulated everywhere. |
245 | // ProcessTreeBuilderFixups::removeAutoInsertedEmptyTags is the main |
246 | // culprit here and it makes the contract for this helper murky |
247 | // by hiding potential brokenness since this should never return null |
248 | // once all templates have been encapsulated! |
249 | $elt = self::isFirstEncapsulationWrapperNode( $node ) ? $node : null; |
250 | '@phan-var ?Element $elt'; // @var ?Element $elt |
251 | return $elt; |
252 | } |
253 | |
254 | /** |
255 | * This tests whether a DOM node is a new node added during an edit session |
256 | * or an existing node from parsed wikitext. |
257 | * |
258 | * As written, this function can only be used on non-template/extension content |
259 | * or on the top-level nodes of template/extension content. This test will |
260 | * return the wrong results on non-top-level $nodes of template/extension content. |
261 | * |
262 | * @param Node $node |
263 | * @return bool |
264 | */ |
265 | public static function isNewElt( Node $node ): bool { |
266 | // We cannot determine newness on text/comment $nodes. |
267 | if ( !( $node instanceof Element ) ) { |
268 | return false; |
269 | } |
270 | |
271 | // For template/extension content, newness should be |
272 | // checked on the encapsulation wrapper $node. |
273 | $node = self::findFirstEncapsulationWrapperNode( $node ) ?? $node; |
274 | return DOMDataUtils::getDataParsoid( $node )->getTempFlag( TempData::IS_NEW ); |
275 | } |
276 | |
277 | /** |
278 | * Check whether a pre is caused by indentation in the original wikitext. |
279 | * @param Node $node |
280 | * @return bool |
281 | */ |
282 | public static function isIndentPre( Node $node ): bool { |
283 | return DOMCompat::nodeName( $node ) === "pre" && !self::isLiteralHTMLNode( $node ); |
284 | } |
285 | |
286 | /** |
287 | * @param Node $node |
288 | * @return bool |
289 | */ |
290 | public static function isInlineMedia( Node $node ): bool { |
291 | return self::isGeneratedFigure( $node ) && |
292 | DOMCompat::nodeName( $node ) !== 'figure'; // span, figure-inline |
293 | } |
294 | |
295 | /** |
296 | * @param Node $node |
297 | * @return bool |
298 | */ |
299 | public static function isGeneratedFigure( Node $node ): bool { |
300 | // TODO: Remove "Image|Video|Audio" when version 2.4.0 of the content |
301 | // is no longer supported |
302 | return DOMUtils::matchTypeOf( $node, '#^mw:(File|Image|Video|Audio)($|/)#D' ) !== null; |
303 | } |
304 | |
305 | /** |
306 | * Find how much offset is necessary for the DSR of an |
307 | * indent-originated pre tag. |
308 | * |
309 | * @param Node $textNode |
310 | * @return int |
311 | */ |
312 | public static function indentPreDSRCorrection( Node $textNode ): int { |
313 | // NOTE: This assumes a text-node and doesn't check that it is one. |
314 | // |
315 | // FIXME: Doesn't handle text nodes that are not direct children of the pre |
316 | if ( self::isIndentPre( $textNode->parentNode ) ) { |
317 | $numNLs = substr_count( $textNode->nodeValue, "\n" ); |
318 | if ( $textNode->parentNode->lastChild === $textNode ) { |
319 | // We dont want the trailing newline of the last child of the pre |
320 | // to contribute a pre-correction since it doesn't add new content |
321 | // in the pre-node after the text |
322 | if ( str_ends_with( $textNode->nodeValue, "\n" ) ) { |
323 | $numNLs--; |
324 | } |
325 | } |
326 | return $numNLs; |
327 | } else { |
328 | return 0; |
329 | } |
330 | } |
331 | |
332 | /** |
333 | * Check if $node is a root in an encapsulated DOM forest. |
334 | * |
335 | * @param Node $node |
336 | * @return bool |
337 | */ |
338 | public static function isEncapsulatedDOMForestRoot( Node $node ): bool { |
339 | $about = $node instanceof Element ? |
340 | DOMCompat::getAttribute( $node, 'about' ) : null; |
341 | if ( $about !== null ) { |
342 | // FIXME: Ensure that our DOM spec clarifies this expectation |
343 | return Utils::isParsoidObjectId( $about ); |
344 | } else { |
345 | return false; |
346 | } |
347 | } |
348 | |
349 | /** |
350 | * Does $node represent a redirect link? |
351 | * |
352 | * @param Node $node |
353 | * @return bool |
354 | */ |
355 | public static function isRedirectLink( Node $node ): bool { |
356 | return $node instanceof Element && |
357 | DOMCompat::nodeName( $node ) === 'link' && |
358 | DOMUtils::matchRel( $node, '#\bmw:PageProp/redirect\b#' ) !== null; |
359 | } |
360 | |
361 | /** |
362 | * Does $node represent a category link? |
363 | * |
364 | * @param ?Node $node |
365 | * @return bool |
366 | */ |
367 | public static function isCategoryLink( ?Node $node ): bool { |
368 | return $node instanceof Element && |
369 | DOMCompat::nodeName( $node ) === 'link' && |
370 | DOMUtils::matchRel( $node, '#\bmw:PageProp/Category\b#' ) !== null; |
371 | } |
372 | |
373 | /** |
374 | * Does $node represent a link that is sol-transparent? |
375 | * |
376 | * @param Node $node |
377 | * @return bool |
378 | */ |
379 | public static function isSolTransparentLink( Node $node ): bool { |
380 | return $node instanceof Element && |
381 | DOMCompat::nodeName( $node ) === 'link' && |
382 | DOMUtils::matchRel( $node, TokenUtils::SOL_TRANSPARENT_LINK_REGEX ) !== null; |
383 | } |
384 | |
385 | /** |
386 | * Check if '$node' emits wikitext that is sol-transparent in wikitext form. |
387 | * This is a test for wikitext that doesn't introduce line breaks. |
388 | * |
389 | * Comment, whitespace text $nodes, category links, redirect links, behavior |
390 | * switches, and include directives currently satisfy this definition. |
391 | * |
392 | * This should come close to matching TokenUtils.isSolTransparent() |
393 | * |
394 | * @param Node $node |
395 | * @return bool |
396 | */ |
397 | public static function emitsSolTransparentSingleLineWT( Node $node ): bool { |
398 | if ( $node instanceof Text ) { |
399 | // NB: We differ here to meet the nl condition. |
400 | return (bool)preg_match( '/^[ \t]*$/D', $node->nodeValue ); |
401 | } elseif ( self::isRenderingTransparentNode( $node ) ) { |
402 | // NB: The only metas in a DOM should be for behavior switches and |
403 | // include directives, other than explicit HTML meta tags. This |
404 | // differs from our counterpart in Util where ref meta tokens |
405 | // haven't been expanded to spans yet. |
406 | return true; |
407 | } else { |
408 | return false; |
409 | } |
410 | } |
411 | |
412 | /** |
413 | * This is the span added to headings to add fallback ids for when legacy |
414 | * and HTML5 ids don't match up. This prevents broken links to legacy ids. |
415 | * |
416 | * @param Node $node |
417 | * @return bool |
418 | */ |
419 | public static function isFallbackIdSpan( Node $node ): bool { |
420 | return DOMUtils::hasNameAndTypeOf( $node, 'span', 'mw:FallbackId' ); |
421 | } |
422 | |
423 | /** |
424 | * These are primarily 'metadata'-like $nodes that don't show up in output rendering. |
425 | * - In Parsoid output, they are represented by link/meta tags. |
426 | * - In the PHP parser, they are completely stripped from the input early on. |
427 | * Because of this property, these rendering-transparent $nodes are also |
428 | * SOL-transparent for the purposes of parsing behavior. |
429 | * |
430 | * @param Node $node |
431 | * @return bool |
432 | */ |
433 | public static function isRenderingTransparentNode( Node $node ): bool { |
434 | // FIXME: Can we change this entire thing to |
435 | // $node instanceof Comment || |
436 | // DOMUtils::getDataParsoid($node).stx !== 'html' && |
437 | // (DOMCompat::nodeName($node) === 'meta' || DOMCompat::nodeName($node) === 'link') |
438 | // |
439 | return $node instanceof Comment || |
440 | self::isSolTransparentLink( $node ) || ( |
441 | // Catch-all for everything else. |
442 | $node instanceof Element && |
443 | DOMCompat::nodeName( $node ) === 'meta' && |
444 | !self::isMarkerAnnotation( $node ) && |
445 | ( DOMDataUtils::getDataParsoid( $node )->stx ?? '' ) !== 'html' |
446 | ) || self::isFallbackIdSpan( $node ); |
447 | } |
448 | |
449 | /** |
450 | * Is $node nested inside a table tag that uses HTML instead of native |
451 | * wikitext? |
452 | * |
453 | * @param Node $node |
454 | * @return bool |
455 | */ |
456 | public static function inHTMLTableTag( Node $node ): bool { |
457 | $p = $node->parentNode; |
458 | while ( DOMUtils::isTableTag( $p ) ) { |
459 | if ( self::isLiteralHTMLNode( $p ) ) { |
460 | return true; |
461 | } elseif ( DOMCompat::nodeName( $p ) === 'table' ) { |
462 | // Don't cross <table> boundaries |
463 | return false; |
464 | } |
465 | $p = $p->parentNode; |
466 | } |
467 | |
468 | return false; |
469 | } |
470 | |
471 | /** |
472 | * Is $node the first wrapper element of encapsulated content? |
473 | * |
474 | * @param Node $node |
475 | * @return bool |
476 | */ |
477 | public static function isFirstEncapsulationWrapperNode( Node $node ): bool { |
478 | return DOMUtils::matchTypeOf( $node, self::FIRST_ENCAP_REGEXP ) !== null; |
479 | } |
480 | |
481 | /** |
482 | * Is $node the first wrapper element of extension content? |
483 | * |
484 | * @param Node $node |
485 | * @return bool |
486 | */ |
487 | public static function isFirstExtensionWrapperNode( Node $node ): bool { |
488 | return DOMUtils::matchTypeOf( $node, "#mw:Extension/#" ) !== null; |
489 | } |
490 | |
491 | /** |
492 | * Checks whether a first encapsulation wrapper node is encapsulating an extension |
493 | * that outputs MediaWiki Core DOM Spec HTML (https://www.mediawiki.org/wiki/Specs/HTML) |
494 | * @param Node $node |
495 | * @param Env $env |
496 | * @return bool |
497 | */ |
498 | public static function isExtensionOutputtingCoreMwDomSpec( Node $node, Env $env ): bool { |
499 | if ( DOMUtils::matchTypeOf( $node, self::NON_EXTENSION_ENCAP_REGEXP ) !== null ) { |
500 | return false; |
501 | } |
502 | $extTagName = self::getExtTagName( $node ); |
503 | $extConfig = $env->getSiteConfig()->getExtTagConfig( $extTagName ); |
504 | $htmlType = $extConfig['options']['outputHasCoreMwDomSpecMarkup'] ?? null; |
505 | return $htmlType === true; |
506 | } |
507 | |
508 | /** |
509 | * Is $node an encapsulation wrapper elt? |
510 | * |
511 | * All root-level $nodes of generated content are considered |
512 | * encapsulation wrappers and share an about-id. |
513 | * |
514 | * @param Node $node |
515 | * @return bool |
516 | */ |
517 | public static function isEncapsulationWrapper( Node $node ): bool { |
518 | // True if it has an encapsulation type or while walking backwards |
519 | // over elts with identical about ids, we run into a $node with an |
520 | // encapsulation type. |
521 | if ( !( $node instanceof Element ) ) { |
522 | return false; |
523 | } |
524 | return self::findFirstEncapsulationWrapperNode( $node ) !== null; |
525 | } |
526 | |
527 | /** |
528 | * Is $node a DOMFragment wrapper? |
529 | * |
530 | * @param Node $node |
531 | * @return bool |
532 | */ |
533 | public static function isDOMFragmentWrapper( Node $node ): bool { |
534 | // See TokenUtils::hasDOMFragmentType |
535 | return DOMUtils::matchTypeOf( $node, '#^mw:DOMFragment(/sealed/\w+)?$#D' ) !== null; |
536 | } |
537 | |
538 | /** |
539 | * Is $node a sealed DOMFragment of a specific type? |
540 | * |
541 | * @param Node $node |
542 | * @param string $type |
543 | * @return bool |
544 | */ |
545 | public static function isSealedFragmentOfType( Node $node, string $type ): bool { |
546 | return DOMUtils::hasTypeOf( $node, "mw:DOMFragment/sealed/$type" ); |
547 | } |
548 | |
549 | /** |
550 | * Is $node a Parsoid-generated <section> tag? |
551 | * |
552 | * @param Node $node |
553 | * @return bool |
554 | */ |
555 | public static function isParsoidSectionTag( Node $node ): bool { |
556 | return $node instanceof Element && |
557 | DOMCompat::nodeName( $node ) === 'section' && |
558 | $node->hasAttribute( 'data-mw-section-id' ); |
559 | } |
560 | |
561 | /** |
562 | * Is the $node from extension content? |
563 | * @param Node $node |
564 | * @param ?string $extType If non-null, checks for that specific extension |
565 | * @return bool |
566 | */ |
567 | public static function fromExtensionContent( Node $node, ?string $extType = null ): bool { |
568 | $re = $extType ? "#mw:Extension/$extType#" : "#mw:Extension/\w+#"; |
569 | while ( $node && !DOMUtils::atTheTop( $node ) ) { |
570 | if ( DOMUtils::matchTypeOf( $node, $re ) ) { |
571 | return true; |
572 | } |
573 | $node = $node->parentNode; |
574 | } |
575 | return false; |
576 | } |
577 | |
578 | /** |
579 | * Is $node from encapsulated (template, extension, etc.) content? |
580 | * @param Node $node |
581 | * @return bool |
582 | */ |
583 | public static function fromEncapsulatedContent( Node $node ): bool { |
584 | while ( $node && !DOMUtils::atTheTop( $node ) ) { |
585 | if ( self::findFirstEncapsulationWrapperNode( $node ) !== null ) { |
586 | return true; |
587 | } |
588 | $node = $node->parentNode; |
589 | } |
590 | return false; |
591 | } |
592 | |
593 | /** |
594 | * Compute, when possible, the wikitext source for a $node in |
595 | * an environment env. Returns null if the source cannot be |
596 | * extracted. |
597 | * @param Frame $frame |
598 | * @param Element $node |
599 | * @return string|null |
600 | */ |
601 | public static function getWTSource( Frame $frame, Element $node ): ?string { |
602 | $dp = DOMDataUtils::getDataParsoid( $node ); |
603 | $dsr = $dp->dsr ?? null; |
604 | // FIXME: We could probably change the null return to '' |
605 | // Just need to verify that code that uses this won't break |
606 | return Utils::isValidDSR( $dsr ) ? |
607 | $dsr->substr( $frame->getSrcText() ) : null; |
608 | } |
609 | |
610 | /** |
611 | * Gets all siblings that follow '$node' that have an 'about' as |
612 | * their about id. |
613 | * |
614 | * This is used to fetch transclusion/extension content by using |
615 | * the about-id as the key. This works because |
616 | * transclusion/extension content is a forest of dom-trees formed |
617 | * by adjacent dom-nodes. This is the contract that template |
618 | * encapsulation, dom-reuse, and VE code all have to abide by. |
619 | * |
620 | * The only exception to this adjacency rule is IEW nodes in |
621 | * fosterable positions (in tables) which are not span-wrapped to |
622 | * prevent them from getting fostered out. |
623 | * |
624 | * @param Node $node |
625 | * @param ?string $about |
626 | * @return Node[] |
627 | */ |
628 | public static function getAboutSiblings( Node $node, ?string $about ): array { |
629 | $nodes = [ $node ]; |
630 | |
631 | if ( $about === null ) { |
632 | return $nodes; |
633 | } |
634 | |
635 | $node = $node->nextSibling; |
636 | while ( $node && ( |
637 | ( $node instanceof Element && DOMCompat::getAttribute( $node, 'about' ) === $about ) || |
638 | ( DOMUtils::isFosterablePosition( $node ) && DOMUtils::isIEW( $node ) ) |
639 | ) ) { |
640 | $nodes[] = $node; |
641 | $node = $node->nextSibling; |
642 | } |
643 | |
644 | // Remove already consumed trailing IEW, if any |
645 | while ( count( $nodes ) > 0 && DOMUtils::isIEW( $nodes[count( $nodes ) - 1] ) ) { |
646 | array_pop( $nodes ); |
647 | } |
648 | |
649 | return $nodes; |
650 | } |
651 | |
652 | /** |
653 | * This function is only intended to be used on encapsulated $nodes |
654 | * (Template/Extension/Param content). |
655 | * |
656 | * Given a '$node' that has an about-id, it is assumed that it is generated |
657 | * by templates or extensions. This function skips over all |
658 | * following content nodes and returns the first non-template node |
659 | * that follows it. |
660 | * |
661 | * @param Node $node |
662 | * @return Node|null |
663 | */ |
664 | public static function skipOverEncapsulatedContent( Node $node ): ?Node { |
665 | $about = $node instanceof Element ? |
666 | DOMCompat::getAttribute( $node, 'about' ) : null; |
667 | if ( $about !== null ) { |
668 | // Guaranteed not to be empty. It will at least include $node. |
669 | $aboutSiblings = self::getAboutSiblings( $node, $about ); |
670 | return end( $aboutSiblings )->nextSibling; |
671 | } else { |
672 | return $node->nextSibling; |
673 | } |
674 | } |
675 | |
676 | /** |
677 | * Comment encoding/decoding. |
678 | * |
679 | * * Some relevant phab tickets: T94055, T70146, T60184, T95039 |
680 | * |
681 | * The wikitext comment rule is very simple: <!-- starts a comment, |
682 | * and --> ends a comment. This means we can have almost anything as the |
683 | * contents of a comment (except the string "-->", but see below), including |
684 | * several things that are not valid in HTML5 comments: |
685 | * |
686 | * * For one, the html5 comment parsing algorithm [0] leniently accepts |
687 | * --!> as a closing comment tag, which differs from the php+tidy combo. |
688 | * |
689 | * * If the comment's data matches /^-?>/, html5 will end the comment. |
690 | * For example, <!-->stuff<--> breaks up as |
691 | * <!--> (the comment) followed by, stuff<--> (as text). |
692 | * |
693 | * * Finally, comment data shouldn't contain two consecutive hyphen-minus |
694 | * characters (--), nor end in a hyphen-minus character (/-$/) as defined |
695 | * in the spec [1]. |
696 | * |
697 | * We work around all these problems by using HTML entity encoding inside |
698 | * the comment body. The characters -, >, and & must be encoded in order |
699 | * to prevent premature termination of the comment by one of the cases |
700 | * above. Encoding other characters is optional; all entities will be |
701 | * decoded during wikitext serialization. |
702 | * |
703 | * In order to allow *arbitrary* content inside a wikitext comment, |
704 | * including the forbidden string "-->" we also do some minimal entity |
705 | * decoding on the wikitext. We are also limited by our inability |
706 | * to encode DSR attributes on the comment $node, so our wikitext entity |
707 | * decoding must be 1-to-1: that is, there must be a unique "decoded" |
708 | * string for every wikitext sequence, and for every decoded string there |
709 | * must be a unique wikitext which creates it. |
710 | * |
711 | * The basic idea here is to replace every string ab*c with the string with |
712 | * one more b in it. This creates a string with no instance of "ac", |
713 | * so you can use 'ac' to encode one more code point. In this case |
714 | * a is "--&", "b" is "amp;", and "c" is "gt;" and we use ac to |
715 | * encode "-->" (which is otherwise unspeakable in wikitext). |
716 | * |
717 | * Note that any user content which does not match the regular |
718 | * expression /--(>|&(amp;)*gt;)/ is unchanged in its wikitext |
719 | * representation, as shown in the first two examples below. |
720 | * |
721 | * User-authored comment text Wikitext HTML5 DOM |
722 | * -------------------------- ------------- ---------------------- |
723 | * & - > & - > & + > |
724 | * Use > here Use > here Use &gt; here |
725 | * --> --> ++> |
726 | * --> --&gt; ++&gt; |
727 | * --&gt; --&amp;gt; ++&amp;gt; |
728 | * |
729 | * [0] http://www.w3.org/TR/html5/syntax.html#comment-start-state |
730 | * [1] http://www.w3.org/TR/html5/syntax.html#comments |
731 | * |
732 | * Map a wikitext-escaped comment to an HTML DOM-escaped comment. |
733 | * |
734 | * @param string $comment Wikitext-escaped comment. |
735 | * @return string DOM-escaped comment. |
736 | */ |
737 | public static function encodeComment( string $comment ): string { |
738 | // Undo wikitext escaping to obtain "true value" of comment. |
739 | $trueValue = preg_replace_callback( '/--&(amp;)*gt;/', static function ( $m ) { |
740 | return Utils::decodeWtEntities( $m[0] ); |
741 | }, $comment ); |
742 | |
743 | // Now encode '-', '>' and '&' in the "true value" as HTML entities, |
744 | // so that they can be safely embedded in an HTML comment. |
745 | // This part doesn't have to map strings 1-to-1. |
746 | return preg_replace_callback( '/[->&]/', static function ( $m ) { |
747 | return Utils::entityEncodeAll( $m[0] ); |
748 | }, $trueValue ); |
749 | } |
750 | |
751 | /** |
752 | * Map an HTML DOM-escaped comment to a wikitext-escaped comment. |
753 | * @param string $comment DOM-escaped comment. |
754 | * @return string Wikitext-escaped comment. |
755 | */ |
756 | public static function decodeComment( string $comment ): string { |
757 | // Undo HTML entity escaping to obtain "true value" of comment. |
758 | $trueValue = Utils::decodeWtEntities( $comment ); |
759 | |
760 | // ok, now encode this "true value" of the comment in such a way |
761 | // that the string "-->" never shows up. (See above.) |
762 | return preg_replace_callback( '/--(&(amp;)*gt;|>)/', static function ( $m ) { |
763 | $s = $m[0]; |
764 | return $s === '-->' ? '-->' : '--&' . substr( $s, 3 ); |
765 | }, $trueValue ); |
766 | } |
767 | |
768 | /** |
769 | * Utility function: we often need to know the wikitext DSR length for |
770 | * an HTML DOM comment value. |
771 | * |
772 | * @param Comment|CommentTk $node A comment node containing a DOM-escaped comment. |
773 | * @return int The wikitext length in UTF-8 bytes necessary to encode this |
774 | * comment, including 7 characters for the `<!--` and `-->` delimiters. |
775 | */ |
776 | public static function decodedCommentLength( $node ): int { |
777 | // Add 7 for the "<!--" and "-->" delimiters in wikitext. |
778 | $syntaxLen = 7; |
779 | if ( $node instanceof Comment ) { |
780 | $value = $node->nodeValue; |
781 | if ( $node->previousSibling && |
782 | DOMUtils::hasTypeOf( $node->previousSibling, "mw:Placeholder/UnclosedComment" ) |
783 | ) { |
784 | $syntaxLen = 4; |
785 | } |
786 | } elseif ( $node instanceof CommentTk ) { |
787 | // @phan-suppress-next-line PhanUndeclaredProperty dynamic property |
788 | if ( isset( $node->dataParsoid->unclosedComment ) ) { |
789 | $syntaxLen = 4; |
790 | } |
791 | $value = $node->value; |
792 | } else { |
793 | throw new UnreachableException( 'Should not be here!' ); |
794 | } |
795 | return strlen( self::decodeComment( $value ) ) + $syntaxLen; |
796 | } |
797 | |
798 | /** |
799 | * @param Node $node |
800 | * @return ?string |
801 | */ |
802 | public static function getExtTagName( Node $node ): ?string { |
803 | $match = DOMUtils::matchTypeOf( $node, '#^mw:Extension/(.+?)$#D' ); |
804 | return $match ? mb_strtolower( substr( $match, strlen( 'mw:Extension/' ) ) ) : null; |
805 | } |
806 | |
807 | /** |
808 | * @param Env $env |
809 | * @param Node $node |
810 | * @return ?ExtensionTagHandler |
811 | */ |
812 | public static function getNativeExt( Env $env, Node $node ): ?ExtensionTagHandler { |
813 | $extTagName = self::getExtTagName( $node ); |
814 | return $extTagName ? $env->getSiteConfig()->getExtTagImpl( $extTagName ) : null; |
815 | } |
816 | |
817 | /** |
818 | * Is this an include directive? |
819 | * @param string $name |
820 | * @return bool |
821 | */ |
822 | public static function isIncludeTag( string $name ): bool { |
823 | return $name === 'includeonly' || $name === 'noinclude' || $name === 'onlyinclude'; |
824 | } |
825 | |
826 | /** |
827 | * Check if tag is annotation or extension directive |
828 | * Adapted from similar grammar function |
829 | * |
830 | * @param Env $env |
831 | * @param string $name |
832 | * @return bool |
833 | */ |
834 | public static function isAnnOrExtTag( Env $env, string $name ): bool { |
835 | $tagName = mb_strtolower( $name ); |
836 | $siteConfig = $env->getSiteConfig(); |
837 | $extTags = $siteConfig->getExtensionTagNameMap(); |
838 | $isInstalledExt = isset( $extTags[$tagName] ); |
839 | $isIncludeTag = self::isIncludeTag( $tagName ); |
840 | $isAnnotationTag = $siteConfig->isAnnotationTag( $tagName ); |
841 | |
842 | if ( !$isAnnotationTag ) { |
843 | // avoid crashing on <tvar|name> even if we don't support that syntax explicitly |
844 | $pipepos = strpos( $tagName, '|' ); |
845 | if ( $pipepos ) { |
846 | $strBeforePipe = substr( $tagName, 0, $pipepos ); |
847 | $isAnnotationTag = $siteConfig->isAnnotationTag( $strBeforePipe ); |
848 | } |
849 | } |
850 | return $isInstalledExt || $isIncludeTag || $isAnnotationTag; |
851 | } |
852 | |
853 | /** |
854 | * Creates a DocumentFragment containing a single span with type "mw:I18n". The created span |
855 | * should be filled in with setDataNodeI18n to be valid. |
856 | * @param Document $doc |
857 | * @return DocumentFragment |
858 | * @throws DOMException |
859 | */ |
860 | public static function createEmptyLocalizationFragment( Document $doc ): DocumentFragment { |
861 | $frag = $doc->createDocumentFragment(); |
862 | $span = $doc->createElement( 'span' ); |
863 | DOMUtils::addTypeOf( $span, 'mw:I18n' ); |
864 | $frag->appendChild( $span ); |
865 | return $frag; |
866 | } |
867 | |
868 | /** |
869 | * Creates an internationalization (i18n) message that will be localized into the page content |
870 | * language. The returned DocumentFragment contains, as a single child, a span |
871 | * element with the appropriate information for later localization. |
872 | * @param Document $doc |
873 | * @param string $key message key for the message to be localized |
874 | * @param ?array $params parameters for localization |
875 | * @return DocumentFragment |
876 | * @throws DOMException |
877 | */ |
878 | public static function createPageContentI18nFragment( |
879 | Document $doc, string $key, ?array $params = null |
880 | ): DocumentFragment { |
881 | $frag = self::createEmptyLocalizationFragment( $doc ); |
882 | $i18n = I18nInfo::createPageContentI18n( $key, $params ); |
883 | DOMDataUtils::setDataNodeI18n( $frag->firstChild, $i18n ); |
884 | return $frag; |
885 | } |
886 | |
887 | /** |
888 | * Creates an internationalization (i18n) message that will be localized into the user |
889 | * interface language. The returned DocumentFragment contains, as a single child, a span |
890 | * element with the appropriate information for later localization. |
891 | * @param Document $doc |
892 | * @param string $key message key for the message to be localized |
893 | * @param ?array $params parameters for localization |
894 | * @return DocumentFragment |
895 | * @throws DOMException |
896 | */ |
897 | public static function createInterfaceI18nFragment( |
898 | Document $doc, string $key, ?array $params = null |
899 | ): DocumentFragment { |
900 | $frag = self::createEmptyLocalizationFragment( $doc ); |
901 | $i18n = I18nInfo::createInterfaceI18n( $key, $params ); |
902 | DOMDataUtils::setDataNodeI18n( $frag->firstChild, $i18n ); |
903 | return $frag; |
904 | } |
905 | |
906 | /** |
907 | * Creates an internationalization (i18n) message that will be localized into an arbitrary |
908 | * language. The returned DocumentFragment contains, as a single child, a span |
909 | * element with the appropriate information for later localization. |
910 | * The use of this method is discouraged; use ::createPageContentI18nFragment(...) and |
911 | * ::createInterfaceI18nFragment(...) where possible rather than, respectively, |
912 | * ::createLangI18nFragment(..., $wgContLang, ...) and |
913 | * ::createLangI18nFragment(..., $wgLang,...). |
914 | * @param Document $doc |
915 | * @param Bcp47Code $lang language for the localization |
916 | * @param string $key message key for the message to be localized |
917 | * @param ?array $params parameters for localization |
918 | * @return DocumentFragment |
919 | * @throws DOMException |
920 | */ |
921 | public static function createLangI18nFragment( |
922 | Document $doc, Bcp47Code $lang, string $key, ?array $params = null |
923 | ): DocumentFragment { |
924 | $frag = self::createEmptyLocalizationFragment( $doc ); |
925 | $i18n = I18nInfo::createLangI18n( $lang, $key, $params ); |
926 | DOMDataUtils::setDataNodeI18n( $frag->firstChild, $i18n ); |
927 | return $frag; |
928 | } |
929 | |
930 | /** |
931 | * Adds to $element the internationalization information needed for the attribute $name to be |
932 | * localized in a later pass into the page content language. |
933 | * @param Element $element element on which to add internationalization information |
934 | * @param string $name name of the attribute whose value will be localized |
935 | * @param string $key message key used for the attribute value localization |
936 | * @param ?array $params parameters for localization |
937 | */ |
938 | public static function addPageContentI18nAttribute( |
939 | Element $element, string $name, string $key, ?array $params = null |
940 | ): void { |
941 | $i18n = I18nInfo::createPageContentI18n( $key, $params ); |
942 | DOMUtils::addTypeOf( $element, 'mw:LocalizedAttrs' ); |
943 | DOMDataUtils::setDataAttrI18n( $element, $name, $i18n ); |
944 | } |
945 | |
946 | /** Adds to $element the internationalization information needed for the attribute $name to be |
947 | * localized in a later pass into the user interface language. |
948 | * @param Element $element element on which to add internationalization information |
949 | * @param string $name name of the attribute whose value will be localized |
950 | * @param string $key message key used for the attribute value localization |
951 | * @param ?array $params parameters for localization |
952 | */ |
953 | public static function addInterfaceI18nAttribute( |
954 | Element $element, string $name, string $key, ?array $params = null |
955 | ): void { |
956 | $i18n = I18nInfo::createInterfaceI18n( $key, $params ); |
957 | DOMUtils::addTypeOf( $element, 'mw:LocalizedAttrs' ); |
958 | DOMDataUtils::setDataAttrI18n( $element, $name, $i18n ); |
959 | } |
960 | |
961 | /** |
962 | * Adds to $element the internationalization information needed for the attribute $name to be |
963 | * localized in a later pass into the provided language. |
964 | * The use of this method is discouraged; ; use ::addPageContentI18nAttribute(...) and |
965 | * ::addInterfaceI18nAttribute(...) where possible rather than, respectively, |
966 | * ::addLangI18nAttribute(..., $wgContLang, ...) and ::addLangI18nAttribute(..., $wgLang, ...). |
967 | * @param Element $element element on which to add internationalization information |
968 | * @param Bcp47Code $lang language in which the message will be localized |
969 | * @param string $name name of the attribute whose value will be localized |
970 | * @param string $key message key used for the attribute value localization |
971 | * @param ?array $params parameters for localization |
972 | */ |
973 | public static function addLangI18nAttribute( |
974 | Element $element, Bcp47Code $lang, string $name, string $key, ?array $params = null |
975 | ): void { |
976 | $i18n = I18nInfo::createLangI18n( $lang, $key, $params ); |
977 | DOMUtils::addTypeOf( $element, 'mw:LocalizedAttrs' ); |
978 | DOMDataUtils::setDataAttrI18n( $element, $name, $i18n ); |
979 | } |
980 | |
981 | /** Check whether a node is an annotation meta; if yes, returns its type |
982 | * @param Node $node |
983 | * @return ?string |
984 | */ |
985 | public static function matchAnnotationMeta( Node $node ): ?string { |
986 | return DOMUtils::matchNameAndTypeOf( $node, 'meta', self::ANNOTATION_META_TYPE_REGEXP ); |
987 | } |
988 | |
989 | /** |
990 | * Extract the annotation type, excluding potential "/End" suffix; returns null if not a valid |
991 | * annotation meta. &$isStart is set to true if the annotation is a start tag, false otherwise. |
992 | * |
993 | * @param Node $node |
994 | * @param bool &$isStart |
995 | * @return ?string The matched type, or null if no match. |
996 | */ |
997 | public static function extractAnnotationType( Node $node, bool &$isStart = false ): ?string { |
998 | $t = DOMUtils::matchTypeOf( $node, self::ANNOTATION_META_TYPE_REGEXP ); |
999 | if ( $t !== null && preg_match( self::ANNOTATION_META_TYPE_REGEXP, $t, $matches ) ) { |
1000 | $isStart = !str_ends_with( $t, '/End' ); |
1001 | return $matches[1]; |
1002 | } |
1003 | return null; |
1004 | } |
1005 | |
1006 | /** |
1007 | * Check whether a node is a meta signifying the start of an annotated part of the DOM |
1008 | * |
1009 | * @param Node $node |
1010 | * @return bool |
1011 | */ |
1012 | public static function isAnnotationStartMarkerMeta( Node $node ): bool { |
1013 | if ( !$node instanceof Element || DOMCompat::nodeName( $node ) !== 'meta' ) { |
1014 | return false; |
1015 | } |
1016 | $isStart = false; |
1017 | $t = self::extractAnnotationType( $node, $isStart ); |
1018 | return $t !== null && $isStart; |
1019 | } |
1020 | |
1021 | /** |
1022 | * Check whether a node is a meta signifying the end of an annotated part of the DOM |
1023 | * |
1024 | * @param Node $node |
1025 | * @return bool |
1026 | */ |
1027 | public static function isAnnotationEndMarkerMeta( Node $node ): bool { |
1028 | if ( !$node instanceof Element || DOMCompat::nodeName( $node ) !== 'meta' ) { |
1029 | return false; |
1030 | } |
1031 | $isStart = false; |
1032 | $t = self::extractAnnotationType( $node, $isStart ); |
1033 | return $t !== null && !$isStart; |
1034 | } |
1035 | |
1036 | /** |
1037 | * Check whether the meta tag was moved from its initial position |
1038 | * @param Node $node |
1039 | * @return bool |
1040 | */ |
1041 | public static function isMovedMetaTag( Node $node ): bool { |
1042 | if ( $node instanceof Element && self::matchAnnotationMeta( $node ) !== null ) { |
1043 | $parsoidData = DOMDataUtils::getDataParsoid( $node ); |
1044 | if ( isset( $parsoidData->wasMoved ) ) { |
1045 | return $parsoidData->wasMoved; |
1046 | } |
1047 | } |
1048 | return false; |
1049 | } |
1050 | |
1051 | /** Returns true if a node is a (start or end) annotation meta tag |
1052 | * @param ?Node $n |
1053 | * @return bool |
1054 | */ |
1055 | public static function isMarkerAnnotation( ?Node $n ): bool { |
1056 | return $n !== null && self::matchAnnotationMeta( $n ) !== null; |
1057 | } |
1058 | |
1059 | /** |
1060 | * Extracts the media format from the attribute string |
1061 | * |
1062 | * @param Element $node |
1063 | * @return string |
1064 | */ |
1065 | public static function getMediaFormat( Element $node ): string { |
1066 | // TODO: Remove "Image|Video|Audio" when version 2.4.0 of the content |
1067 | // is no longer supported |
1068 | $mediaType = DOMUtils::matchTypeOf( $node, '#^mw:(File|Image|Video|Audio)(/|$)#' ); |
1069 | $parts = explode( '/', $mediaType ?? '' ); |
1070 | return $parts[1] ?? ''; |
1071 | } |
1072 | |
1073 | /** |
1074 | * @param Element $node |
1075 | * @return bool |
1076 | */ |
1077 | public static function hasVisibleCaption( Element $node ): bool { |
1078 | $format = self::getMediaFormat( $node ); |
1079 | return in_array( |
1080 | $format, [ 'Thumb', /* 'Manualthumb', FIXME(T305759) */ 'Frame' ], true |
1081 | ); |
1082 | } |
1083 | |
1084 | /** |
1085 | * Ref dom post-processing happens after adding media info, so the |
1086 | * linkbacks aren't available in the textContent added to the alt. |
1087 | * However, when serializing, they are in the caption elements. So, this |
1088 | * special handler drops the linkbacks for the purpose of comparison. |
1089 | * |
1090 | * @param Node $node |
1091 | * @return string |
1092 | */ |
1093 | public static function textContentFromCaption( Node $node ): string { |
1094 | $content = ''; |
1095 | $c = $node->firstChild; |
1096 | while ( $c ) { |
1097 | if ( $c instanceof Text ) { |
1098 | $content .= $c->nodeValue; |
1099 | } elseif ( |
1100 | $c instanceof Element && |
1101 | !DOMUtils::isMetaDataTag( $c ) && |
1102 | !DOMUtils::hasTypeOf( $c, "mw:Extension/ref" ) |
1103 | ) { |
1104 | $content .= self::textContentFromCaption( $c ); |
1105 | } |
1106 | $c = $c->nextSibling; |
1107 | } |
1108 | return $content; |
1109 | } |
1110 | |
1111 | } |