Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
17.79% |
45 / 253 |
|
14.29% |
9 / 63 |
CRAP | |
0.00% |
0 / 1 |
WTUtils | |
17.79% |
45 / 253 |
|
14.29% |
9 / 63 |
15109.70 | |
0.00% |
0 / 1 |
hasLiteralHTMLMarker | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
6 | |||
isLiteralHTMLNode | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
6 | |||
isZeroWidthWikitextElt | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
6 | |||
isBlockNodeWithVisibleWT | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
6 | |||
isATagFromWikiLinkSyntax | |
0.00% |
0 / 5 |
|
0.00% |
0 / 1 |
30 | |||
isATagFromExtLinkSyntax | |
0.00% |
0 / 5 |
|
0.00% |
0 / 1 |
30 | |||
isATagFromURLLinkSyntax | |
0.00% |
0 / 5 |
|
0.00% |
0 / 1 |
20 | |||
isATagFromMagicLinkSyntax | |
0.00% |
0 / 5 |
|
0.00% |
0 / 1 |
20 | |||
matchTplType | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
hasExpandedAttrsType | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
isTplMarkerMeta | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
isTplStartMarkerMeta | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
6 | |||
isTplEndMarkerMeta | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
6 | |||
findFirstEncapsulationWrapperNode | |
0.00% |
0 / 11 |
|
0.00% |
0 / 1 |
20 | |||
isNewElt | |
0.00% |
0 / 4 |
|
0.00% |
0 / 1 |
6 | |||
isIndentPre | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
6 | |||
isInlineMedia | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
6 | |||
isGeneratedFigure | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
indentPreDSRCorrection | |
0.00% |
0 / 7 |
|
0.00% |
0 / 1 |
20 | |||
isEncapsulatedDOMForestRoot | |
0.00% |
0 / 5 |
|
0.00% |
0 / 1 |
12 | |||
isRedirectLink | |
0.00% |
0 / 3 |
|
0.00% |
0 / 1 |
12 | |||
isCategoryLink | |
0.00% |
0 / 3 |
|
0.00% |
0 / 1 |
12 | |||
isSolTransparentLink | |
0.00% |
0 / 3 |
|
0.00% |
0 / 1 |
12 | |||
emitsSolTransparentSingleLineWT | |
0.00% |
0 / 5 |
|
0.00% |
0 / 1 |
12 | |||
isFallbackIdSpan | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
isRenderingTransparentNode | |
0.00% |
0 / 7 |
|
0.00% |
0 / 1 |
56 | |||
inHTMLTableTag | |
0.00% |
0 / 8 |
|
0.00% |
0 / 1 |
20 | |||
isFirstEncapsulationWrapperNode | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
isFirstExtensionWrapperNode | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
isExtensionOutputtingCoreMwDomSpec | |
0.00% |
0 / 6 |
|
0.00% |
0 / 1 |
6 | |||
isEncapsulationWrapper | |
0.00% |
0 / 3 |
|
0.00% |
0 / 1 |
6 | |||
isDOMFragmentWrapper | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
isSealedFragmentOfType | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
isParsoidSectionTag | |
0.00% |
0 / 3 |
|
0.00% |
0 / 1 |
12 | |||
fromExtensionContent | |
100.00% |
6 / 6 |
|
100.00% |
1 / 1 |
5 | |||
fromEncapsulatedContent | |
0.00% |
0 / 5 |
|
0.00% |
0 / 1 |
20 | |||
getWTSource | |
0.00% |
0 / 4 |
|
0.00% |
0 / 1 |
6 | |||
getAboutSiblings | |
0.00% |
0 / 13 |
|
0.00% |
0 / 1 |
90 | |||
skipOverEncapsulatedContent | |
0.00% |
0 / 6 |
|
0.00% |
0 / 1 |
12 | |||
encodeComment | |
100.00% |
6 / 6 |
|
100.00% |
1 / 1 |
1 | |||
decodeComment | |
100.00% |
5 / 5 |
|
100.00% |
1 / 1 |
2 | |||
decodedCommentLength | |
58.33% |
7 / 12 |
|
0.00% |
0 / 1 |
8.60 | |||
getExtTagName | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
6 | |||
getNativeExt | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
6 | |||
isIncludeTag | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
12 | |||
isAnnotationTag | |
0.00% |
0 / 9 |
|
0.00% |
0 / 1 |
12 | |||
isAnnOrExtTag | |
0.00% |
0 / 5 |
|
0.00% |
0 / 1 |
12 | |||
createEmptyLocalizationFragment | |
0.00% |
0 / 5 |
|
0.00% |
0 / 1 |
2 | |||
createPageContentI18nFragment | |
100.00% |
4 / 4 |
|
100.00% |
1 / 1 |
1 | |||
createInterfaceI18nFragment | |
100.00% |
4 / 4 |
|
100.00% |
1 / 1 |
1 | |||
createLangI18nFragment | |
100.00% |
4 / 4 |
|
100.00% |
1 / 1 |
1 | |||
addPageContentI18nAttribute | |
100.00% |
3 / 3 |
|
100.00% |
1 / 1 |
1 | |||
addInterfaceI18nAttribute | |
100.00% |
3 / 3 |
|
100.00% |
1 / 1 |
1 | |||
addLangI18nAttribute | |
100.00% |
3 / 3 |
|
100.00% |
1 / 1 |
1 | |||
matchAnnotationMeta | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
extractAnnotationType | |
0.00% |
0 / 5 |
|
0.00% |
0 / 1 |
12 | |||
isAnnotationStartMarkerMeta | |
0.00% |
0 / 5 |
|
0.00% |
0 / 1 |
20 | |||
isAnnotationEndMarkerMeta | |
0.00% |
0 / 5 |
|
0.00% |
0 / 1 |
20 | |||
isMovedMetaTag | |
0.00% |
0 / 5 |
|
0.00% |
0 / 1 |
20 | |||
isMarkerAnnotation | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
6 | |||
getMediaFormat | |
0.00% |
0 / 3 |
|
0.00% |
0 / 1 |
2 | |||
hasVisibleCaption | |
0.00% |
0 / 4 |
|
0.00% |
0 / 1 |
2 | |||
textContentFromCaption | |
0.00% |
0 / 11 |
|
0.00% |
0 / 1 |
42 |
1 | <?php |
2 | declare( strict_types = 1 ); |
3 | // Suppress UnusedPluginSuppression because |
4 | // Phan on PHP 7.4 and PHP 8.1 need different suppressions |
5 | // @phan-file-suppress UnusedPluginSuppression,UnusedPluginFileSuppression |
6 | |
7 | namespace Wikimedia\Parsoid\Utils; |
8 | |
9 | use DOMException; |
10 | use Wikimedia\Assert\UnreachableException; |
11 | use Wikimedia\Bcp47Code\Bcp47Code; |
12 | use Wikimedia\Parsoid\Config\Env; |
13 | use Wikimedia\Parsoid\DOM\Comment; |
14 | use Wikimedia\Parsoid\DOM\Document; |
15 | use Wikimedia\Parsoid\DOM\DocumentFragment; |
16 | use Wikimedia\Parsoid\DOM\Element; |
17 | use Wikimedia\Parsoid\DOM\Node; |
18 | use Wikimedia\Parsoid\DOM\Text; |
19 | use Wikimedia\Parsoid\Ext\ExtensionTagHandler; |
20 | use Wikimedia\Parsoid\NodeData\DataParsoid; |
21 | use Wikimedia\Parsoid\NodeData\I18nInfo; |
22 | use Wikimedia\Parsoid\NodeData\TempData; |
23 | use Wikimedia\Parsoid\Tokens\CommentTk; |
24 | use Wikimedia\Parsoid\Wikitext\Consts; |
25 | use Wikimedia\Parsoid\Wt2Html\Frame; |
26 | |
27 | /** |
28 | * These utilites pertain to querying / extracting / modifying wikitext information from the DOM. |
29 | * |
30 | * @note Many of these methods are not safe to use unless the DOM has been |
31 | * loaded and prepared, as they consult DataParsoid from the NodeData. |
32 | */ |
33 | class WTUtils { |
34 | private const FIRST_ENCAP_REGEXP = |
35 | '#(?:^|\s)(mw:(?:Transclusion|Param|LanguageVariant|Extension(/\S+)))(?=$|\s)#D'; |
36 | |
37 | /** |
38 | * Regex corresponding to FIRST_ENCAP_REGEXP, but excluding extensions. If FIRST_ENCAP_REGEXP is |
39 | * updated, this one should be as well. |
40 | */ |
41 | private const NON_EXTENSION_ENCAP_REGEXP = |
42 | '#(?:^|\s)(mw:(?:Transclusion|Param|LanguageVariant))(?=$|\s)#D'; |
43 | |
44 | /** |
45 | * Regexp for checking marker metas typeofs representing |
46 | * transclusion markup or template param markup. |
47 | */ |
48 | private const TPL_META_TYPE_REGEXP = '#^mw:(?:Transclusion|Param)(?:/End)?$#D'; |
49 | |
50 | /** |
51 | * Regexp for checking marker metas typeofs representing |
52 | * annotation markup |
53 | */ |
54 | public const ANNOTATION_META_TYPE_REGEXP = '#^mw:(?:Annotation/([\w\d]+))(?:/End)?$#uD'; |
55 | |
56 | /** |
57 | * Check whether a node's data-parsoid object includes |
58 | * an indicator that the original wikitext was a literal |
59 | * HTML element (like table or p) |
60 | * |
61 | * @param DataParsoid $dp |
62 | * @return bool |
63 | */ |
64 | public static function hasLiteralHTMLMarker( DataParsoid $dp ): bool { |
65 | return isset( $dp->stx ) && $dp->stx === 'html'; |
66 | } |
67 | |
68 | /** |
69 | * Run a node through {@link #hasLiteralHTMLMarker}. |
70 | * @param ?Node $node |
71 | * @return bool |
72 | */ |
73 | public static function isLiteralHTMLNode( ?Node $node ): bool { |
74 | return $node instanceof Element && |
75 | self::hasLiteralHTMLMarker( DOMDataUtils::getDataParsoid( $node ) ); |
76 | } |
77 | |
78 | /** |
79 | * @param Node $node |
80 | * @return bool |
81 | */ |
82 | public static function isZeroWidthWikitextElt( Node $node ): bool { |
83 | return isset( Consts::$ZeroWidthWikitextTags[DOMCompat::nodeName( $node )] ) && |
84 | !self::isLiteralHTMLNode( $node ); |
85 | } |
86 | |
87 | /** |
88 | * Is `$node` a block node that is also visible in wikitext? |
89 | * An example of an invisible block node is a `<p>`-tag that |
90 | * Parsoid generated, or a `<ul>`, `<ol>` tag. |
91 | * |
92 | * @param Node $node |
93 | * @return bool |
94 | */ |
95 | public static function isBlockNodeWithVisibleWT( Node $node ): bool { |
96 | return DOMUtils::isWikitextBlockNode( $node ) && |
97 | !self::isZeroWidthWikitextElt( $node ); |
98 | } |
99 | |
100 | /** |
101 | * Helper functions to detect when an A-$node uses [[..]]/[..]/... style |
102 | * syntax (for wikilinks, ext links, url links). rel-type is not sufficient |
103 | * anymore since mw:ExtLink is used for all the three link syntaxes. |
104 | * |
105 | * @param Element $node |
106 | * @return bool |
107 | */ |
108 | public static function isATagFromWikiLinkSyntax( Element $node ): bool { |
109 | if ( DOMCompat::nodeName( $node ) !== 'a' ) { |
110 | return false; |
111 | } |
112 | |
113 | $dp = DOMDataUtils::getDataParsoid( $node ); |
114 | return DOMUtils::hasRel( $node, 'mw:WikiLink' ) || |
115 | ( isset( $dp->stx ) && $dp->stx !== "url" && $dp->stx !== "magiclink" ); |
116 | } |
117 | |
118 | /** |
119 | * Helper function to detect when an A-node uses ext-link syntax. |
120 | * rel attribute is not sufficient anymore since mw:ExtLink is used for |
121 | * multiple link types |
122 | * |
123 | * @param Element $node |
124 | * @return bool |
125 | */ |
126 | public static function isATagFromExtLinkSyntax( Element $node ): bool { |
127 | if ( DOMCompat::nodeName( $node ) !== 'a' ) { |
128 | return false; |
129 | } |
130 | |
131 | $dp = DOMDataUtils::getDataParsoid( $node ); |
132 | return DOMUtils::hasRel( $node, 'mw:ExtLink' ) && |
133 | ( !isset( $dp->stx ) || ( $dp->stx !== "url" && $dp->stx !== "magiclink" ) ); |
134 | } |
135 | |
136 | /** |
137 | * Helper function to detect when an A-node uses url-link syntax. |
138 | * rel attribute is not sufficient anymore since mw:ExtLink is used for |
139 | * multiple link types |
140 | * |
141 | * @param Element $node |
142 | * @return bool |
143 | */ |
144 | public static function isATagFromURLLinkSyntax( Element $node ): bool { |
145 | if ( DOMCompat::nodeName( $node ) !== 'a' ) { |
146 | return false; |
147 | } |
148 | |
149 | $dp = DOMDataUtils::getDataParsoid( $node ); |
150 | return DOMUtils::hasRel( $node, 'mw:ExtLink' ) && |
151 | isset( $dp->stx ) && $dp->stx === "url"; |
152 | } |
153 | |
154 | /** |
155 | * Helper function to detect when an A-node uses magic-link syntax. |
156 | * rel attribute is not sufficient anymore since mw:ExtLink is used for |
157 | * multiple link types |
158 | * |
159 | * @param Element $node |
160 | * @return bool |
161 | */ |
162 | public static function isATagFromMagicLinkSyntax( Element $node ): bool { |
163 | if ( DOMCompat::nodeName( $node ) !== 'a' ) { |
164 | return false; |
165 | } |
166 | |
167 | $dp = DOMDataUtils::getDataParsoid( $node ); |
168 | return DOMUtils::hasRel( $node, 'mw:ExtLink' ) && |
169 | isset( $dp->stx ) && $dp->stx === 'magiclink'; |
170 | } |
171 | |
172 | /** |
173 | * Check whether a node's typeof indicates that it is a template expansion. |
174 | * |
175 | * @param Element $node |
176 | * @return ?string The matched type, or null if no match. |
177 | */ |
178 | public static function matchTplType( Element $node ): ?string { |
179 | return DOMUtils::matchTypeOf( $node, self::TPL_META_TYPE_REGEXP ); |
180 | } |
181 | |
182 | /** |
183 | * Check whether a typeof indicates that it signifies an |
184 | * expanded attribute. |
185 | * |
186 | * @param Element $node |
187 | * @return bool |
188 | */ |
189 | public static function hasExpandedAttrsType( Element $node ): bool { |
190 | return DOMUtils::matchTypeOf( $node, '/^mw:ExpandedAttrs(\/\S+)*$/' ) !== null; |
191 | } |
192 | |
193 | /** |
194 | * Check whether a node is a meta tag that signifies a template expansion. |
195 | * |
196 | * @param Node $node |
197 | * @return bool |
198 | */ |
199 | public static function isTplMarkerMeta( Node $node ): bool { |
200 | return DOMUtils::matchNameAndTypeOf( $node, 'meta', self::TPL_META_TYPE_REGEXP ) !== null; |
201 | } |
202 | |
203 | /** |
204 | * Check whether a node is a meta signifying the start of a template expansion. |
205 | * |
206 | * @param Node $node |
207 | * @return bool |
208 | */ |
209 | public static function isTplStartMarkerMeta( Node $node ): bool { |
210 | $t = DOMUtils::matchNameAndTypeOf( $node, 'meta', self::TPL_META_TYPE_REGEXP ); |
211 | return $t !== null && !str_ends_with( $t, '/End' ); |
212 | } |
213 | |
214 | /** |
215 | * Check whether a node is a meta signifying the end of a template expansion. |
216 | * |
217 | * @param Node $node |
218 | * @return bool |
219 | */ |
220 | public static function isTplEndMarkerMeta( Node $node ): bool { |
221 | $t = DOMUtils::matchNameAndTypeOf( $node, 'meta', self::TPL_META_TYPE_REGEXP ); |
222 | return $t !== null && str_ends_with( $t, '/End' ); |
223 | } |
224 | |
225 | /** |
226 | * Find the first wrapper element of encapsulated content. |
227 | * @param Node $node |
228 | * @return Element|null |
229 | */ |
230 | public static function findFirstEncapsulationWrapperNode( Node $node ): ?Element { |
231 | if ( !self::isEncapsulatedDOMForestRoot( $node ) ) { |
232 | return null; |
233 | } |
234 | /** @var Element $node */ |
235 | DOMUtils::assertElt( $node ); |
236 | |
237 | $about = DOMCompat::getAttribute( $node, 'about' ); |
238 | $prev = $node; |
239 | do { |
240 | $node = $prev; |
241 | $prev = DiffDOMUtils::previousNonDeletedSibling( $node ); |
242 | } while ( |
243 | $prev instanceof Element && |
244 | DOMCompat::getAttribute( $prev, 'about' ) === $about |
245 | ); |
246 | // NOTE: findFirstEncapsulationWrapperNode can be called by code |
247 | // even before templates have been fully encapsulated everywhere. |
248 | // ProcessTreeBuilderFixups::removeAutoInsertedEmptyTags is the main |
249 | // culprit here and it makes the contract for this helper murky |
250 | // by hiding potential brokenness since this should never return null |
251 | // once all templates have been encapsulated! |
252 | $elt = self::isFirstEncapsulationWrapperNode( $node ) ? $node : null; |
253 | '@phan-var ?Element $elt'; // @var ?Element $elt |
254 | return $elt; |
255 | } |
256 | |
257 | /** |
258 | * This tests whether a DOM node is a new node added during an edit session |
259 | * or an existing node from parsed wikitext. |
260 | * |
261 | * As written, this function can only be used on non-template/extension content |
262 | * or on the top-level nodes of template/extension content. This test will |
263 | * return the wrong results on non-top-level $nodes of template/extension content. |
264 | * |
265 | * @param Node $node |
266 | * @return bool |
267 | */ |
268 | public static function isNewElt( Node $node ): bool { |
269 | // We cannot determine newness on text/comment $nodes. |
270 | if ( !( $node instanceof Element ) ) { |
271 | return false; |
272 | } |
273 | |
274 | // For template/extension content, newness should be |
275 | // checked on the encapsulation wrapper $node. |
276 | $node = self::findFirstEncapsulationWrapperNode( $node ) ?? $node; |
277 | return DOMDataUtils::getDataParsoid( $node )->getTempFlag( TempData::IS_NEW ); |
278 | } |
279 | |
280 | /** |
281 | * Check whether a pre is caused by indentation in the original wikitext. |
282 | * @param Node $node |
283 | * @return bool |
284 | */ |
285 | public static function isIndentPre( Node $node ): bool { |
286 | return DOMCompat::nodeName( $node ) === "pre" && !self::isLiteralHTMLNode( $node ); |
287 | } |
288 | |
289 | /** |
290 | * @param Node $node |
291 | * @return bool |
292 | */ |
293 | public static function isInlineMedia( Node $node ): bool { |
294 | return self::isGeneratedFigure( $node ) && |
295 | DOMCompat::nodeName( $node ) !== 'figure'; // span, figure-inline |
296 | } |
297 | |
298 | /** |
299 | * @param Node $node |
300 | * @return bool |
301 | */ |
302 | public static function isGeneratedFigure( Node $node ): bool { |
303 | // TODO: Remove "Image|Video|Audio" when version 2.4.0 of the content |
304 | // is no longer supported |
305 | return DOMUtils::matchTypeOf( $node, '#^mw:(File|Image|Video|Audio)($|/)#D' ) !== null; |
306 | } |
307 | |
308 | /** |
309 | * Find how much offset is necessary for the DSR of an |
310 | * indent-originated pre tag. |
311 | * |
312 | * @param Node $textNode |
313 | * @return int |
314 | */ |
315 | public static function indentPreDSRCorrection( Node $textNode ): int { |
316 | // NOTE: This assumes a text-node and doesn't check that it is one. |
317 | // |
318 | // FIXME: Doesn't handle text nodes that are not direct children of the pre |
319 | if ( self::isIndentPre( $textNode->parentNode ) ) { |
320 | $numNLs = substr_count( $textNode->nodeValue, "\n" ); |
321 | if ( $textNode->parentNode->lastChild === $textNode ) { |
322 | // We dont want the trailing newline of the last child of the pre |
323 | // to contribute a pre-correction since it doesn't add new content |
324 | // in the pre-node after the text |
325 | if ( str_ends_with( $textNode->nodeValue, "\n" ) ) { |
326 | $numNLs--; |
327 | } |
328 | } |
329 | return $numNLs; |
330 | } else { |
331 | return 0; |
332 | } |
333 | } |
334 | |
335 | /** |
336 | * Check if $node is a root in an encapsulated DOM forest. |
337 | * |
338 | * @param Node $node |
339 | * @return bool |
340 | */ |
341 | public static function isEncapsulatedDOMForestRoot( Node $node ): bool { |
342 | $about = $node instanceof Element ? |
343 | DOMCompat::getAttribute( $node, 'about' ) : null; |
344 | if ( $about !== null ) { |
345 | // FIXME: Ensure that our DOM spec clarifies this expectation |
346 | return Utils::isParsoidObjectId( $about ); |
347 | } else { |
348 | return false; |
349 | } |
350 | } |
351 | |
352 | /** |
353 | * Does $node represent a redirect link? |
354 | */ |
355 | public static function isRedirectLink( ?Node $node ): bool { |
356 | return $node instanceof Element && |
357 | DOMCompat::nodeName( $node ) === 'link' && |
358 | DOMUtils::matchRel( $node, '#\bmw:PageProp/redirect\b#' ) !== null; |
359 | } |
360 | |
361 | /** |
362 | * Does $node represent a category link? |
363 | */ |
364 | public static function isCategoryLink( ?Node $node ): bool { |
365 | return $node instanceof Element && |
366 | DOMCompat::nodeName( $node ) === 'link' && |
367 | DOMUtils::matchRel( $node, '#\bmw:PageProp/Category\b#' ) !== null; |
368 | } |
369 | |
370 | /** |
371 | * Does $node represent a link that is sol-transparent? |
372 | */ |
373 | public static function isSolTransparentLink( ?Node $node ): bool { |
374 | return $node instanceof Element && |
375 | DOMCompat::nodeName( $node ) === 'link' && |
376 | DOMUtils::matchRel( $node, TokenUtils::SOL_TRANSPARENT_LINK_REGEX ) !== null; |
377 | } |
378 | |
379 | /** |
380 | * Check if '$node' emits wikitext that is sol-transparent in wikitext form. |
381 | * This is a test for wikitext that doesn't introduce line breaks. |
382 | * |
383 | * Comment, whitespace text $nodes, category links, redirect links, behavior |
384 | * switches, and include directives currently satisfy this definition. |
385 | * |
386 | * This should come close to matching TokenUtils.isSolTransparent() |
387 | * |
388 | * @param Node $node |
389 | * @return bool |
390 | */ |
391 | public static function emitsSolTransparentSingleLineWT( Node $node ): bool { |
392 | if ( $node instanceof Text ) { |
393 | // NB: We differ here to meet the nl condition. |
394 | return (bool)preg_match( '/^[ \t]*$/D', $node->nodeValue ); |
395 | } elseif ( self::isRenderingTransparentNode( $node ) ) { |
396 | // NB: The only metas in a DOM should be for behavior switches and |
397 | // include directives, other than explicit HTML meta tags. This |
398 | // differs from our counterpart in Util where ref meta tokens |
399 | // haven't been expanded to spans yet. |
400 | return true; |
401 | } else { |
402 | return false; |
403 | } |
404 | } |
405 | |
406 | /** |
407 | * This is the span added to headings to add fallback ids for when legacy |
408 | * and HTML5 ids don't match up. This prevents broken links to legacy ids. |
409 | * |
410 | * @param Node $node |
411 | * @return bool |
412 | */ |
413 | public static function isFallbackIdSpan( Node $node ): bool { |
414 | return DOMUtils::hasNameAndTypeOf( $node, 'span', 'mw:FallbackId' ); |
415 | } |
416 | |
417 | /** |
418 | * These are primarily 'metadata'-like $nodes that don't show up in output rendering. |
419 | * - In Parsoid output, they are represented by link/meta tags. |
420 | * - In the PHP parser, they are completely stripped from the input early on. |
421 | * Because of this property, these rendering-transparent $nodes are also |
422 | * SOL-transparent for the purposes of parsing behavior. |
423 | * |
424 | * @param Node $node |
425 | * @return bool |
426 | */ |
427 | public static function isRenderingTransparentNode( Node $node ): bool { |
428 | // FIXME: Can we change this entire thing to |
429 | // $node instanceof Comment || |
430 | // DOMUtils::getDataParsoid($node).stx !== 'html' && |
431 | // (DOMCompat::nodeName($node) === 'meta' || DOMCompat::nodeName($node) === 'link') |
432 | // |
433 | return $node instanceof Comment || |
434 | self::isSolTransparentLink( $node ) || ( |
435 | // Catch-all for everything else. |
436 | $node instanceof Element && |
437 | DOMCompat::nodeName( $node ) === 'meta' && |
438 | !self::isMarkerAnnotation( $node ) && |
439 | ( DOMDataUtils::getDataParsoid( $node )->stx ?? '' ) !== 'html' |
440 | ) || self::isFallbackIdSpan( $node ); |
441 | } |
442 | |
443 | /** |
444 | * Is $node nested inside a table tag that uses HTML instead of native |
445 | * wikitext? |
446 | * |
447 | * @param Node $node |
448 | * @return bool |
449 | */ |
450 | public static function inHTMLTableTag( Node $node ): bool { |
451 | $p = $node->parentNode; |
452 | while ( DOMUtils::isTableTag( $p ) ) { |
453 | if ( self::isLiteralHTMLNode( $p ) ) { |
454 | return true; |
455 | } elseif ( DOMCompat::nodeName( $p ) === 'table' ) { |
456 | // Don't cross <table> boundaries |
457 | return false; |
458 | } |
459 | $p = $p->parentNode; |
460 | } |
461 | |
462 | return false; |
463 | } |
464 | |
465 | /** |
466 | * Is $node the first wrapper element of encapsulated content? |
467 | * |
468 | * @param Node $node |
469 | * @return bool |
470 | */ |
471 | public static function isFirstEncapsulationWrapperNode( Node $node ): bool { |
472 | return DOMUtils::matchTypeOf( $node, self::FIRST_ENCAP_REGEXP ) !== null; |
473 | } |
474 | |
475 | /** |
476 | * Is $node the first wrapper element of extension content? |
477 | * |
478 | * @param Node $node |
479 | * @return bool |
480 | */ |
481 | public static function isFirstExtensionWrapperNode( Node $node ): bool { |
482 | return DOMUtils::matchTypeOf( $node, "#mw:Extension/#" ) !== null; |
483 | } |
484 | |
485 | /** |
486 | * Checks whether a first encapsulation wrapper node is encapsulating an extension |
487 | * that outputs MediaWiki Core DOM Spec HTML (https://www.mediawiki.org/wiki/Specs/HTML) |
488 | * @param Node $node |
489 | * @param Env $env |
490 | * @return bool |
491 | */ |
492 | public static function isExtensionOutputtingCoreMwDomSpec( Node $node, Env $env ): bool { |
493 | if ( DOMUtils::matchTypeOf( $node, self::NON_EXTENSION_ENCAP_REGEXP ) !== null ) { |
494 | return false; |
495 | } |
496 | $extTagName = self::getExtTagName( $node ); |
497 | $extConfig = $env->getSiteConfig()->getExtTagConfig( $extTagName ); |
498 | $htmlType = $extConfig['options']['outputHasCoreMwDomSpecMarkup'] ?? null; |
499 | return $htmlType === true; |
500 | } |
501 | |
502 | /** |
503 | * Is $node an encapsulation wrapper elt? |
504 | * |
505 | * All root-level $nodes of generated content are considered |
506 | * encapsulation wrappers and share an about-id. |
507 | * |
508 | * @param Node $node |
509 | * @return bool |
510 | */ |
511 | public static function isEncapsulationWrapper( Node $node ): bool { |
512 | // True if it has an encapsulation type or while walking backwards |
513 | // over elts with identical about ids, we run into a $node with an |
514 | // encapsulation type. |
515 | if ( !( $node instanceof Element ) ) { |
516 | return false; |
517 | } |
518 | return self::findFirstEncapsulationWrapperNode( $node ) !== null; |
519 | } |
520 | |
521 | /** |
522 | * Is $node a DOMFragment wrapper? |
523 | * |
524 | * @param Node $node |
525 | * @return bool |
526 | */ |
527 | public static function isDOMFragmentWrapper( Node $node ): bool { |
528 | // See TokenUtils::hasDOMFragmentType |
529 | return DOMUtils::matchTypeOf( $node, '#^mw:DOMFragment(/sealed/\w+)?$#D' ) !== null; |
530 | } |
531 | |
532 | /** |
533 | * Is $node a sealed DOMFragment of a specific type? |
534 | * |
535 | * @param Node $node |
536 | * @param string $type |
537 | * @return bool |
538 | */ |
539 | public static function isSealedFragmentOfType( Node $node, string $type ): bool { |
540 | return DOMUtils::hasTypeOf( $node, "mw:DOMFragment/sealed/$type" ); |
541 | } |
542 | |
543 | /** |
544 | * Is $node a Parsoid-generated <section> tag? |
545 | * |
546 | * @param Node $node |
547 | * @return bool |
548 | */ |
549 | public static function isParsoidSectionTag( Node $node ): bool { |
550 | return $node instanceof Element && |
551 | DOMCompat::nodeName( $node ) === 'section' && |
552 | $node->hasAttribute( 'data-mw-section-id' ); |
553 | } |
554 | |
555 | /** |
556 | * Is the $node from extension content? |
557 | * @param Node $node |
558 | * @param ?string $extType If non-null, checks for that specific extension |
559 | * @return bool |
560 | */ |
561 | public static function fromExtensionContent( Node $node, ?string $extType = null ): bool { |
562 | $re = $extType ? "#mw:Extension/$extType#" : "#mw:Extension/\w+#"; |
563 | while ( $node && !DOMUtils::atTheTop( $node ) ) { |
564 | if ( DOMUtils::matchTypeOf( $node, $re ) ) { |
565 | return true; |
566 | } |
567 | $node = $node->parentNode; |
568 | } |
569 | return false; |
570 | } |
571 | |
572 | /** |
573 | * Is $node from encapsulated (template, extension, etc.) content? |
574 | * @param Node $node |
575 | * @return bool |
576 | */ |
577 | public static function fromEncapsulatedContent( Node $node ): bool { |
578 | while ( $node && !DOMUtils::atTheTop( $node ) ) { |
579 | if ( self::findFirstEncapsulationWrapperNode( $node ) !== null ) { |
580 | return true; |
581 | } |
582 | $node = $node->parentNode; |
583 | } |
584 | return false; |
585 | } |
586 | |
587 | /** |
588 | * Compute, when possible, the wikitext source for a $node in |
589 | * an environment env. Returns null if the source cannot be |
590 | * extracted. |
591 | * @param Frame $frame |
592 | * @param Element $node |
593 | * @return string|null |
594 | */ |
595 | public static function getWTSource( Frame $frame, Element $node ): ?string { |
596 | $dp = DOMDataUtils::getDataParsoid( $node ); |
597 | $dsr = $dp->dsr ?? null; |
598 | // FIXME: We could probably change the null return to '' |
599 | // Just need to verify that code that uses this won't break |
600 | return Utils::isValidDSR( $dsr ) ? |
601 | $dsr->substr( $frame->getSrcText() ) : null; |
602 | } |
603 | |
604 | /** |
605 | * Gets all siblings that follow '$node' that have an 'about' as |
606 | * their about id. |
607 | * |
608 | * This is used to fetch transclusion/extension content by using |
609 | * the about-id as the key. This works because |
610 | * transclusion/extension content is a forest of dom-trees formed |
611 | * by adjacent dom-nodes. This is the contract that template |
612 | * encapsulation, dom-reuse, and VE code all have to abide by. |
613 | * |
614 | * The only exception to this adjacency rule is IEW nodes in |
615 | * fosterable positions (in tables) which are not span-wrapped to |
616 | * prevent them from getting fostered out. |
617 | * |
618 | * @param Node $node |
619 | * @param ?string $about |
620 | * @return Node[] |
621 | */ |
622 | public static function getAboutSiblings( Node $node, ?string $about ): array { |
623 | $nodes = [ $node ]; |
624 | |
625 | if ( $about === null ) { |
626 | return $nodes; |
627 | } |
628 | |
629 | $node = $node->nextSibling; |
630 | while ( $node && ( |
631 | ( $node instanceof Element && DOMCompat::getAttribute( $node, 'about' ) === $about ) || |
632 | ( DOMUtils::isFosterablePosition( $node ) && DOMUtils::isIEW( $node ) ) |
633 | ) ) { |
634 | $nodes[] = $node; |
635 | $node = $node->nextSibling; |
636 | } |
637 | |
638 | // Remove already consumed trailing IEW, if any |
639 | while ( count( $nodes ) > 0 && DOMUtils::isIEW( $nodes[count( $nodes ) - 1] ) ) { |
640 | array_pop( $nodes ); |
641 | } |
642 | |
643 | return $nodes; |
644 | } |
645 | |
646 | /** |
647 | * This function is only intended to be used on encapsulated $nodes |
648 | * (Template/Extension/Param content). |
649 | * |
650 | * Given a '$node' that has an about-id, it is assumed that it is generated |
651 | * by templates or extensions. This function skips over all |
652 | * following content nodes and returns the first non-template node |
653 | * that follows it. |
654 | * |
655 | * @param Node $node |
656 | * @return Node|null |
657 | */ |
658 | public static function skipOverEncapsulatedContent( Node $node ): ?Node { |
659 | $about = $node instanceof Element ? |
660 | DOMCompat::getAttribute( $node, 'about' ) : null; |
661 | if ( $about !== null ) { |
662 | // Guaranteed not to be empty. It will at least include $node. |
663 | $aboutSiblings = self::getAboutSiblings( $node, $about ); |
664 | return end( $aboutSiblings )->nextSibling; |
665 | } else { |
666 | return $node->nextSibling; |
667 | } |
668 | } |
669 | |
670 | /** |
671 | * Comment encoding/decoding. |
672 | * |
673 | * * Some relevant phab tickets: T94055, T70146, T60184, T95039 |
674 | * |
675 | * The wikitext comment rule is very simple: <!-- starts a comment, |
676 | * and --> ends a comment. This means we can have almost anything as the |
677 | * contents of a comment (except the string "-->", but see below), including |
678 | * several things that are not valid in HTML5 comments: |
679 | * |
680 | * * For one, the html5 comment parsing algorithm [0] leniently accepts |
681 | * --!> as a closing comment tag, which differs from the php+tidy combo. |
682 | * |
683 | * * If the comment's data matches /^-?>/, html5 will end the comment. |
684 | * For example, <!-->stuff<--> breaks up as |
685 | * <!--> (the comment) followed by, stuff<--> (as text). |
686 | * |
687 | * * Finally, comment data shouldn't contain two consecutive hyphen-minus |
688 | * characters (--), nor end in a hyphen-minus character (/-$/) as defined |
689 | * in the spec [1]. |
690 | * |
691 | * We work around all these problems by using HTML entity encoding inside |
692 | * the comment body. The characters -, >, and & must be encoded in order |
693 | * to prevent premature termination of the comment by one of the cases |
694 | * above. Encoding other characters is optional; all entities will be |
695 | * decoded during wikitext serialization. |
696 | * |
697 | * In order to allow *arbitrary* content inside a wikitext comment, |
698 | * including the forbidden string "-->" we also do some minimal entity |
699 | * decoding on the wikitext. We are also limited by our inability |
700 | * to encode DSR attributes on the comment $node, so our wikitext entity |
701 | * decoding must be 1-to-1: that is, there must be a unique "decoded" |
702 | * string for every wikitext sequence, and for every decoded string there |
703 | * must be a unique wikitext which creates it. |
704 | * |
705 | * The basic idea here is to replace every string ab*c with the string with |
706 | * one more b in it. This creates a string with no instance of "ac", |
707 | * so you can use 'ac' to encode one more code point. In this case |
708 | * a is "--&", "b" is "amp;", and "c" is "gt;" and we use ac to |
709 | * encode "-->" (which is otherwise unspeakable in wikitext). |
710 | * |
711 | * Note that any user content which does not match the regular |
712 | * expression /--(>|&(amp;)*gt;)/ is unchanged in its wikitext |
713 | * representation, as shown in the first two examples below. |
714 | * |
715 | * User-authored comment text Wikitext HTML5 DOM |
716 | * -------------------------- ------------- ---------------------- |
717 | * & - > & - > & + > |
718 | * Use > here Use > here Use &gt; here |
719 | * --> --> ++> |
720 | * --> --&gt; ++&gt; |
721 | * --&gt; --&amp;gt; ++&amp;gt; |
722 | * |
723 | * [0] http://www.w3.org/TR/html5/syntax.html#comment-start-state |
724 | * [1] http://www.w3.org/TR/html5/syntax.html#comments |
725 | * |
726 | * Map a wikitext-escaped comment to an HTML DOM-escaped comment. |
727 | * |
728 | * @param string $comment Wikitext-escaped comment. |
729 | * @return string DOM-escaped comment. |
730 | */ |
731 | public static function encodeComment( string $comment ): string { |
732 | // Undo wikitext escaping to obtain "true value" of comment. |
733 | $trueValue = preg_replace_callback( '/--&(amp;)*gt;/', static function ( $m ) { |
734 | return Utils::decodeWtEntities( $m[0] ); |
735 | }, $comment ); |
736 | |
737 | // Now encode '-', '>' and '&' in the "true value" as HTML entities, |
738 | // so that they can be safely embedded in an HTML comment. |
739 | // This part doesn't have to map strings 1-to-1. |
740 | return preg_replace_callback( '/[->&]/', static function ( $m ) { |
741 | return Utils::entityEncodeAll( $m[0] ); |
742 | }, $trueValue ); |
743 | } |
744 | |
745 | /** |
746 | * Map an HTML DOM-escaped comment to a wikitext-escaped comment. |
747 | * @param string $comment DOM-escaped comment. |
748 | * @return string Wikitext-escaped comment. |
749 | */ |
750 | public static function decodeComment( string $comment ): string { |
751 | // Undo HTML entity escaping to obtain "true value" of comment. |
752 | $trueValue = Utils::decodeWtEntities( $comment ); |
753 | |
754 | // ok, now encode this "true value" of the comment in such a way |
755 | // that the string "-->" never shows up. (See above.) |
756 | return preg_replace_callback( '/--(&(amp;)*gt;|>)/', static function ( $m ) { |
757 | $s = $m[0]; |
758 | return $s === '-->' ? '-->' : '--&' . substr( $s, 3 ); |
759 | }, $trueValue ); |
760 | } |
761 | |
762 | /** |
763 | * Utility function: we often need to know the wikitext DSR length for |
764 | * an HTML DOM comment value. |
765 | * |
766 | * @param Comment|CommentTk $node A comment node containing a DOM-escaped comment. |
767 | * @return int The wikitext length in UTF-8 bytes necessary to encode this |
768 | * comment, including 7 characters for the `<!--` and `-->` delimiters. |
769 | */ |
770 | public static function decodedCommentLength( $node ): int { |
771 | // Add 7 for the "<!--" and "-->" delimiters in wikitext. |
772 | $syntaxLen = 7; |
773 | if ( $node instanceof Comment ) { |
774 | $value = $node->nodeValue; |
775 | if ( $node->previousSibling && |
776 | DOMUtils::hasTypeOf( $node->previousSibling, "mw:Placeholder/UnclosedComment" ) |
777 | ) { |
778 | $syntaxLen = 4; |
779 | } |
780 | } elseif ( $node instanceof CommentTk ) { |
781 | // @phan-suppress-next-line PhanUndeclaredProperty dynamic property |
782 | if ( isset( $node->dataParsoid->unclosedComment ) ) { |
783 | $syntaxLen = 4; |
784 | } |
785 | $value = $node->value; |
786 | } else { |
787 | throw new UnreachableException( 'Should not be here!' ); |
788 | } |
789 | return strlen( self::decodeComment( $value ) ) + $syntaxLen; |
790 | } |
791 | |
792 | /** |
793 | * @param Node $node |
794 | * @return ?string |
795 | */ |
796 | public static function getExtTagName( Node $node ): ?string { |
797 | $match = DOMUtils::matchTypeOf( $node, '#^mw:Extension/(.+?)$#D' ); |
798 | return $match ? mb_strtolower( substr( $match, strlen( 'mw:Extension/' ) ) ) : null; |
799 | } |
800 | |
801 | /** |
802 | * @param Env $env |
803 | * @param Node $node |
804 | * @return ?ExtensionTagHandler |
805 | */ |
806 | public static function getNativeExt( Env $env, Node $node ): ?ExtensionTagHandler { |
807 | $extTagName = self::getExtTagName( $node ); |
808 | return $extTagName ? $env->getSiteConfig()->getExtTagImpl( $extTagName ) : null; |
809 | } |
810 | |
811 | /** |
812 | * Is this an include directive? |
813 | * @param string $name |
814 | * @return bool |
815 | */ |
816 | public static function isIncludeTag( string $name ): bool { |
817 | return $name === 'includeonly' || $name === 'noinclude' || $name === 'onlyinclude'; |
818 | } |
819 | |
820 | public static function isAnnotationTag( Env $env, string $name ): bool { |
821 | $tagName = mb_strtolower( $name ); |
822 | $siteConfig = $env->getSiteConfig(); |
823 | $isAnnotationTag = $siteConfig->isAnnotationTag( $tagName ); |
824 | if ( !$isAnnotationTag ) { |
825 | // avoid crashing on <tvar|name> even if we don't support that syntax explicitly |
826 | $pipepos = strpos( $tagName, '|' ); |
827 | if ( $pipepos ) { |
828 | $strBeforePipe = substr( $tagName, 0, $pipepos ); |
829 | $isAnnotationTag = $siteConfig->isAnnotationTag( $strBeforePipe ); |
830 | } |
831 | } |
832 | return $isAnnotationTag; |
833 | } |
834 | |
835 | /** |
836 | * Check if tag is annotation or extension directive |
837 | * Adapted from similar grammar function |
838 | * |
839 | * @param Env $env |
840 | * @param string $name |
841 | * @return bool |
842 | */ |
843 | public static function isAnnOrExtTag( Env $env, string $name ): bool { |
844 | $tagName = mb_strtolower( $name ); |
845 | $extTags = $env->getSiteConfig()->getExtensionTagNameMap(); |
846 | return isset( $extTags[$tagName] ) || |
847 | self::isIncludeTag( $tagName ) || |
848 | self::isAnnotationTag( $env, $tagName ); |
849 | } |
850 | |
851 | /** |
852 | * Creates a DocumentFragment containing a single span with type "mw:I18n". The created span |
853 | * should be filled in with setDataNodeI18n to be valid. |
854 | * @param Document $doc |
855 | * @return DocumentFragment |
856 | * @throws DOMException |
857 | */ |
858 | public static function createEmptyLocalizationFragment( Document $doc ): DocumentFragment { |
859 | $frag = $doc->createDocumentFragment(); |
860 | $span = $doc->createElement( 'span' ); |
861 | DOMUtils::addTypeOf( $span, 'mw:I18n' ); |
862 | $frag->appendChild( $span ); |
863 | return $frag; |
864 | } |
865 | |
866 | /** |
867 | * Creates an internationalization (i18n) message that will be localized into the page content |
868 | * language. The returned DocumentFragment contains, as a single child, a span |
869 | * element with the appropriate information for later localization. |
870 | * @param Document $doc |
871 | * @param string $key message key for the message to be localized |
872 | * @param ?array $params parameters for localization |
873 | * @return DocumentFragment |
874 | * @throws DOMException |
875 | */ |
876 | public static function createPageContentI18nFragment( |
877 | Document $doc, string $key, ?array $params = null |
878 | ): DocumentFragment { |
879 | $frag = self::createEmptyLocalizationFragment( $doc ); |
880 | $i18n = I18nInfo::createPageContentI18n( $key, $params ); |
881 | DOMDataUtils::setDataNodeI18n( $frag->firstChild, $i18n ); |
882 | return $frag; |
883 | } |
884 | |
885 | /** |
886 | * Creates an internationalization (i18n) message that will be localized into the user |
887 | * interface language. The returned DocumentFragment contains, as a single child, a span |
888 | * element with the appropriate information for later localization. |
889 | * @param Document $doc |
890 | * @param string $key message key for the message to be localized |
891 | * @param ?array $params parameters for localization |
892 | * @return DocumentFragment |
893 | * @throws DOMException |
894 | */ |
895 | public static function createInterfaceI18nFragment( |
896 | Document $doc, string $key, ?array $params = null |
897 | ): DocumentFragment { |
898 | $frag = self::createEmptyLocalizationFragment( $doc ); |
899 | $i18n = I18nInfo::createInterfaceI18n( $key, $params ); |
900 | DOMDataUtils::setDataNodeI18n( $frag->firstChild, $i18n ); |
901 | return $frag; |
902 | } |
903 | |
904 | /** |
905 | * Creates an internationalization (i18n) message that will be localized into an arbitrary |
906 | * language. The returned DocumentFragment contains, as a single child, a span |
907 | * element with the appropriate information for later localization. |
908 | * The use of this method is discouraged; use ::createPageContentI18nFragment(...) and |
909 | * ::createInterfaceI18nFragment(...) where possible rather than, respectively, |
910 | * ::createLangI18nFragment(..., $wgContLang, ...) and |
911 | * ::createLangI18nFragment(..., $wgLang,...). |
912 | * @param Document $doc |
913 | * @param Bcp47Code $lang language for the localization |
914 | * @param string $key message key for the message to be localized |
915 | * @param ?array $params parameters for localization |
916 | * @return DocumentFragment |
917 | * @throws DOMException |
918 | */ |
919 | public static function createLangI18nFragment( |
920 | Document $doc, Bcp47Code $lang, string $key, ?array $params = null |
921 | ): DocumentFragment { |
922 | $frag = self::createEmptyLocalizationFragment( $doc ); |
923 | $i18n = I18nInfo::createLangI18n( $lang, $key, $params ); |
924 | DOMDataUtils::setDataNodeI18n( $frag->firstChild, $i18n ); |
925 | return $frag; |
926 | } |
927 | |
928 | /** |
929 | * Adds to $element the internationalization information needed for the attribute $name to be |
930 | * localized in a later pass into the page content language. |
931 | * @param Element $element element on which to add internationalization information |
932 | * @param string $name name of the attribute whose value will be localized |
933 | * @param string $key message key used for the attribute value localization |
934 | * @param ?array $params parameters for localization |
935 | */ |
936 | public static function addPageContentI18nAttribute( |
937 | Element $element, string $name, string $key, ?array $params = null |
938 | ): void { |
939 | $i18n = I18nInfo::createPageContentI18n( $key, $params ); |
940 | DOMUtils::addTypeOf( $element, 'mw:LocalizedAttrs' ); |
941 | DOMDataUtils::setDataAttrI18n( $element, $name, $i18n ); |
942 | } |
943 | |
944 | /** Adds to $element the internationalization information needed for the attribute $name to be |
945 | * localized in a later pass into the user interface language. |
946 | * @param Element $element element on which to add internationalization information |
947 | * @param string $name name of the attribute whose value will be localized |
948 | * @param string $key message key used for the attribute value localization |
949 | * @param ?array $params parameters for localization |
950 | */ |
951 | public static function addInterfaceI18nAttribute( |
952 | Element $element, string $name, string $key, ?array $params = null |
953 | ): void { |
954 | $i18n = I18nInfo::createInterfaceI18n( $key, $params ); |
955 | DOMUtils::addTypeOf( $element, 'mw:LocalizedAttrs' ); |
956 | DOMDataUtils::setDataAttrI18n( $element, $name, $i18n ); |
957 | } |
958 | |
959 | /** |
960 | * Adds to $element the internationalization information needed for the attribute $name to be |
961 | * localized in a later pass into the provided language. |
962 | * The use of this method is discouraged; ; use ::addPageContentI18nAttribute(...) and |
963 | * ::addInterfaceI18nAttribute(...) where possible rather than, respectively, |
964 | * ::addLangI18nAttribute(..., $wgContLang, ...) and ::addLangI18nAttribute(..., $wgLang, ...). |
965 | * @param Element $element element on which to add internationalization information |
966 | * @param Bcp47Code $lang language in which the message will be localized |
967 | * @param string $name name of the attribute whose value will be localized |
968 | * @param string $key message key used for the attribute value localization |
969 | * @param ?array $params parameters for localization |
970 | */ |
971 | public static function addLangI18nAttribute( |
972 | Element $element, Bcp47Code $lang, string $name, string $key, ?array $params = null |
973 | ): void { |
974 | $i18n = I18nInfo::createLangI18n( $lang, $key, $params ); |
975 | DOMUtils::addTypeOf( $element, 'mw:LocalizedAttrs' ); |
976 | DOMDataUtils::setDataAttrI18n( $element, $name, $i18n ); |
977 | } |
978 | |
979 | /** Check whether a node is an annotation meta; if yes, returns its type |
980 | * @param Node $node |
981 | * @return ?string |
982 | */ |
983 | public static function matchAnnotationMeta( Node $node ): ?string { |
984 | return DOMUtils::matchNameAndTypeOf( $node, 'meta', self::ANNOTATION_META_TYPE_REGEXP ); |
985 | } |
986 | |
987 | /** |
988 | * Extract the annotation type, excluding potential "/End" suffix; returns null if not a valid |
989 | * annotation meta. &$isStart is set to true if the annotation is a start tag, false otherwise. |
990 | * |
991 | * @param Node $node |
992 | * @param bool &$isStart |
993 | * @return ?string The matched type, or null if no match. |
994 | */ |
995 | public static function extractAnnotationType( Node $node, bool &$isStart = false ): ?string { |
996 | $t = DOMUtils::matchTypeOf( $node, self::ANNOTATION_META_TYPE_REGEXP ); |
997 | if ( $t !== null && preg_match( self::ANNOTATION_META_TYPE_REGEXP, $t, $matches ) ) { |
998 | $isStart = !str_ends_with( $t, '/End' ); |
999 | return $matches[1]; |
1000 | } |
1001 | return null; |
1002 | } |
1003 | |
1004 | /** |
1005 | * Check whether a node is a meta signifying the start of an annotated part of the DOM |
1006 | * |
1007 | * @param Node $node |
1008 | * @return bool |
1009 | */ |
1010 | public static function isAnnotationStartMarkerMeta( Node $node ): bool { |
1011 | if ( !$node instanceof Element || DOMCompat::nodeName( $node ) !== 'meta' ) { |
1012 | return false; |
1013 | } |
1014 | $isStart = false; |
1015 | $t = self::extractAnnotationType( $node, $isStart ); |
1016 | return $t !== null && $isStart; |
1017 | } |
1018 | |
1019 | /** |
1020 | * Check whether a node is a meta signifying the end of an annotated part of the DOM |
1021 | * |
1022 | * @param Node $node |
1023 | * @return bool |
1024 | */ |
1025 | public static function isAnnotationEndMarkerMeta( Node $node ): bool { |
1026 | if ( !$node instanceof Element || DOMCompat::nodeName( $node ) !== 'meta' ) { |
1027 | return false; |
1028 | } |
1029 | $isStart = false; |
1030 | $t = self::extractAnnotationType( $node, $isStart ); |
1031 | return $t !== null && !$isStart; |
1032 | } |
1033 | |
1034 | /** |
1035 | * Check whether the meta tag was moved from its initial position |
1036 | * @param Node $node |
1037 | * @return bool |
1038 | */ |
1039 | public static function isMovedMetaTag( Node $node ): bool { |
1040 | if ( $node instanceof Element && self::matchAnnotationMeta( $node ) !== null ) { |
1041 | $parsoidData = DOMDataUtils::getDataParsoid( $node ); |
1042 | if ( isset( $parsoidData->wasMoved ) ) { |
1043 | return $parsoidData->wasMoved; |
1044 | } |
1045 | } |
1046 | return false; |
1047 | } |
1048 | |
1049 | /** Returns true if a node is a (start or end) annotation meta tag |
1050 | * @param ?Node $n |
1051 | * @return bool |
1052 | */ |
1053 | public static function isMarkerAnnotation( ?Node $n ): bool { |
1054 | return $n !== null && self::matchAnnotationMeta( $n ) !== null; |
1055 | } |
1056 | |
1057 | /** |
1058 | * Extracts the media format from the attribute string |
1059 | * |
1060 | * @param Element $node |
1061 | * @return string |
1062 | */ |
1063 | public static function getMediaFormat( Element $node ): string { |
1064 | // TODO: Remove "Image|Video|Audio" when version 2.4.0 of the content |
1065 | // is no longer supported |
1066 | $mediaType = DOMUtils::matchTypeOf( $node, '#^mw:(File|Image|Video|Audio)(/|$)#' ); |
1067 | $parts = explode( '/', $mediaType ?? '' ); |
1068 | return $parts[1] ?? ''; |
1069 | } |
1070 | |
1071 | /** |
1072 | * @param Element $node |
1073 | * @return bool |
1074 | */ |
1075 | public static function hasVisibleCaption( Element $node ): bool { |
1076 | $format = self::getMediaFormat( $node ); |
1077 | return in_array( |
1078 | $format, [ 'Thumb', /* 'Manualthumb', FIXME(T305759) */ 'Frame' ], true |
1079 | ); |
1080 | } |
1081 | |
1082 | /** |
1083 | * Ref dom post-processing happens after adding media info, so the |
1084 | * linkbacks aren't available in the textContent added to the alt. |
1085 | * However, when serializing, they are in the caption elements. So, this |
1086 | * special handler drops the linkbacks for the purpose of comparison. |
1087 | * |
1088 | * @param Node $node |
1089 | * @return string |
1090 | */ |
1091 | public static function textContentFromCaption( Node $node ): string { |
1092 | $content = ''; |
1093 | $c = $node->firstChild; |
1094 | while ( $c ) { |
1095 | if ( $c instanceof Text ) { |
1096 | $content .= $c->nodeValue; |
1097 | } elseif ( |
1098 | $c instanceof Element && |
1099 | !DOMUtils::isMetaDataTag( $c ) && |
1100 | !DOMUtils::hasTypeOf( $c, "mw:Extension/ref" ) |
1101 | ) { |
1102 | $content .= self::textContentFromCaption( $c ); |
1103 | } |
1104 | $c = $c->nextSibling; |
1105 | } |
1106 | return $content; |
1107 | } |
1108 | |
1109 | } |