Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
| Total | |
16.67% |
43 / 258 |
|
16.18% |
11 / 68 |
CRAP | |
0.00% |
0 / 1 |
| WTUtils | |
16.67% |
43 / 258 |
|
16.18% |
11 / 68 |
16501.33 | |
0.00% |
0 / 1 |
| hasLiteralHTMLMarker | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
6 | |||
| isLiteralHTMLNode | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
6 | |||
| isZeroWidthWikitextElt | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
6 | |||
| isBlockNodeWithVisibleWT | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
6 | |||
| isATagFromWikiLinkSyntax | |
0.00% |
0 / 5 |
|
0.00% |
0 / 1 |
30 | |||
| isATagFromExtLinkSyntax | |
0.00% |
0 / 5 |
|
0.00% |
0 / 1 |
30 | |||
| isATagFromURLLinkSyntax | |
0.00% |
0 / 5 |
|
0.00% |
0 / 1 |
20 | |||
| isATagFromMagicLinkSyntax | |
0.00% |
0 / 4 |
|
0.00% |
0 / 1 |
12 | |||
| matchTplType | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
| hasExpandedAttrsType | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
| isTplMarkerMeta | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
| isTplStartMarkerMeta | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
6 | |||
| isTplEndMarkerMeta | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
6 | |||
| isNewElt | |
0.00% |
0 / 4 |
|
0.00% |
0 / 1 |
6 | |||
| isIndentPre | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
6 | |||
| isInlineMedia | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
6 | |||
| isGeneratedFigure | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
| indentPreDSRCorrection | |
0.00% |
0 / 7 |
|
0.00% |
0 / 1 |
20 | |||
| isEncapsulatedDOMForestRoot | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
12 | |||
| isRedirectLink | |
0.00% |
0 / 3 |
|
0.00% |
0 / 1 |
12 | |||
| isCategoryLink | |
0.00% |
0 / 3 |
|
0.00% |
0 / 1 |
12 | |||
| isSolTransparentLink | |
0.00% |
0 / 5 |
|
0.00% |
0 / 1 |
20 | |||
| emitsSolTransparentSingleLineWT | |
0.00% |
0 / 5 |
|
0.00% |
0 / 1 |
12 | |||
| isFallbackIdSpan | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
| isRenderingTransparentNode | |
0.00% |
0 / 8 |
|
0.00% |
0 / 1 |
72 | |||
| inHTMLTableTag | |
0.00% |
0 / 8 |
|
0.00% |
0 / 1 |
20 | |||
| serializeChildTableTagAsHTML | |
0.00% |
0 / 4 |
|
0.00% |
0 / 1 |
12 | |||
| isFirstEncapsulationWrapperNode | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
| isFirstExtensionWrapperNode | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
| isExtensionOutputtingCoreMwDomSpec | |
0.00% |
0 / 6 |
|
0.00% |
0 / 1 |
6 | |||
| isEncapsulationWrapper | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
6 | |||
| isDOMFragmentWrapper | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
| isSealedFragmentOfType | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
| getDOMFragmentContents | |
0.00% |
0 / 6 |
|
0.00% |
0 / 1 |
6 | |||
| isParsoidSectionTag | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
6 | |||
| findFirstEncapsulationWrapperNode | |
0.00% |
0 / 11 |
|
0.00% |
0 / 1 |
30 | |||
| fromEncapsulatedContentHelper | |
0.00% |
0 / 7 |
|
0.00% |
0 / 1 |
20 | |||
| fromTemplatedContent | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
| fromExtensionContent | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
2 | |||
| fromEncapsulatedContent | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
| getWTSource | |
0.00% |
0 / 4 |
|
0.00% |
0 / 1 |
6 | |||
| getAboutSiblings | |
0.00% |
0 / 10 |
|
0.00% |
0 / 1 |
42 | |||
| skipOverEncapsulatedContent | |
0.00% |
0 / 6 |
|
0.00% |
0 / 1 |
12 | |||
| encodeComment | |
100.00% |
6 / 6 |
|
100.00% |
1 / 1 |
1 | |||
| decodeComment | |
100.00% |
5 / 5 |
|
100.00% |
1 / 1 |
2 | |||
| decodedCommentLength | |
58.33% |
7 / 12 |
|
0.00% |
0 / 1 |
8.60 | |||
| getExtTagName | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
6 | |||
| getPFragmentHandlerKey | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
6 | |||
| getNativeExt | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
6 | |||
| isIncludeTag | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
12 | |||
| isAnnotationTag | |
0.00% |
0 / 9 |
|
0.00% |
0 / 1 |
12 | |||
| isAnnOrExtTag | |
0.00% |
0 / 5 |
|
0.00% |
0 / 1 |
12 | |||
| createEmptyLocalizationFragment | |
0.00% |
0 / 5 |
|
0.00% |
0 / 1 |
2 | |||
| createPageContentI18nFragment | |
100.00% |
4 / 4 |
|
100.00% |
1 / 1 |
1 | |||
| createInterfaceI18nFragment | |
100.00% |
4 / 4 |
|
100.00% |
1 / 1 |
1 | |||
| createLangI18nFragment | |
100.00% |
4 / 4 |
|
100.00% |
1 / 1 |
1 | |||
| addPageContentI18nAttribute | |
100.00% |
3 / 3 |
|
100.00% |
1 / 1 |
1 | |||
| addInterfaceI18nAttribute | |
100.00% |
3 / 3 |
|
100.00% |
1 / 1 |
1 | |||
| addLangI18nAttribute | |
100.00% |
3 / 3 |
|
100.00% |
1 / 1 |
1 | |||
| matchAnnotationMeta | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
| extractAnnotationType | |
0.00% |
0 / 5 |
|
0.00% |
0 / 1 |
12 | |||
| isAnnotationStartMarkerMeta | |
0.00% |
0 / 5 |
|
0.00% |
0 / 1 |
20 | |||
| isAnnotationEndMarkerMeta | |
0.00% |
0 / 5 |
|
0.00% |
0 / 1 |
20 | |||
| isMovedMetaTag | |
0.00% |
0 / 5 |
|
0.00% |
0 / 1 |
20 | |||
| isMarkerAnnotation | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
6 | |||
| getMediaFormat | |
0.00% |
0 / 3 |
|
0.00% |
0 / 1 |
2 | |||
| hasVisibleCaption | |
0.00% |
0 / 4 |
|
0.00% |
0 / 1 |
2 | |||
| textContentFromCaption | |
0.00% |
0 / 11 |
|
0.00% |
0 / 1 |
42 | |||
| 1 | <?php |
| 2 | declare( strict_types = 1 ); |
| 3 | |
| 4 | namespace Wikimedia\Parsoid\Utils; |
| 5 | |
| 6 | use Wikimedia\Assert\Assert; |
| 7 | use Wikimedia\Assert\UnreachableException; |
| 8 | use Wikimedia\Bcp47Code\Bcp47Code; |
| 9 | use Wikimedia\Parsoid\Config\Env; |
| 10 | use Wikimedia\Parsoid\Core\DOMCompat; |
| 11 | use Wikimedia\Parsoid\DOM\Comment; |
| 12 | use Wikimedia\Parsoid\DOM\Document; |
| 13 | use Wikimedia\Parsoid\DOM\DocumentFragment; |
| 14 | use Wikimedia\Parsoid\DOM\DOMException; |
| 15 | use Wikimedia\Parsoid\DOM\Element; |
| 16 | use Wikimedia\Parsoid\DOM\Node; |
| 17 | use Wikimedia\Parsoid\DOM\Text; |
| 18 | use Wikimedia\Parsoid\Ext\ExtensionTagHandler; |
| 19 | use Wikimedia\Parsoid\NodeData\DataParsoid; |
| 20 | use Wikimedia\Parsoid\NodeData\I18nInfo; |
| 21 | use Wikimedia\Parsoid\NodeData\TempData; |
| 22 | use Wikimedia\Parsoid\Tokens\CommentTk; |
| 23 | use Wikimedia\Parsoid\Wikitext\Consts; |
| 24 | use Wikimedia\Parsoid\Wt2Html\Frame; |
| 25 | |
| 26 | /** |
| 27 | * These utilites pertain to querying / extracting / modifying wikitext information from the DOM. |
| 28 | * |
| 29 | * @note Many of these methods are not safe to use unless the DOM has been |
| 30 | * loaded and prepared, as they consult DataParsoid from the NodeData. |
| 31 | */ |
| 32 | class WTUtils { |
| 33 | private const FIRST_ENCAP_REGEXP = |
| 34 | '#(?:^|\s)(mw:(?:Transclusion|Param|LanguageVariant|Extension(/\S+)))(?=$|\s)#D'; |
| 35 | |
| 36 | /** |
| 37 | * Regexp for checking marker metas typeofs representing |
| 38 | * transclusion markup or template param markup. |
| 39 | */ |
| 40 | private const TPL_META_TYPE_REGEXP = '#^mw:(?:Transclusion|Param)(?:/End)?$#D'; |
| 41 | |
| 42 | /** |
| 43 | * Regexp for checking marker metas typeofs representing |
| 44 | * annotation markup |
| 45 | */ |
| 46 | public const ANNOTATION_META_TYPE_REGEXP = '#^mw:(?:Annotation/([\w\d]+))(?:/End)?$#uD'; |
| 47 | |
| 48 | /** |
| 49 | * Checks if a token/node has a rel attribute that matches this regexp. |
| 50 | * Tokens matching this and embedded in a table-cell attribute position |
| 51 | * stops attribute processing in that cell. |
| 52 | */ |
| 53 | // phpcs:ignore Generic.Files.LineLength.TooLong |
| 54 | public const WIKILINK_SYNTAX_CONSTRUCTS_REGEXP = '#^mw:(WikiLink(/Interwiki)?|MediaLink|PageProp/(Category|Language))$#'; |
| 55 | |
| 56 | /** |
| 57 | * Check whether a node's data-parsoid object includes |
| 58 | * an indicator that the original wikitext was a literal |
| 59 | * HTML element (like table or p) |
| 60 | */ |
| 61 | public static function hasLiteralHTMLMarker( DataParsoid $dp ): bool { |
| 62 | return isset( $dp->stx ) && $dp->stx === 'html'; |
| 63 | } |
| 64 | |
| 65 | /** |
| 66 | * Run a node through {@link #hasLiteralHTMLMarker}. |
| 67 | */ |
| 68 | public static function isLiteralHTMLNode( ?Node $node ): bool { |
| 69 | return $node instanceof Element && |
| 70 | self::hasLiteralHTMLMarker( DOMDataUtils::getDataParsoid( $node ) ); |
| 71 | } |
| 72 | |
| 73 | public static function isZeroWidthWikitextElt( Node $node ): bool { |
| 74 | return isset( Consts::$ZeroWidthWikitextTags[DOMUtils::nodeName( $node )] ) && |
| 75 | !self::isLiteralHTMLNode( $node ); |
| 76 | } |
| 77 | |
| 78 | /** |
| 79 | * Is `$node` a block node that is also visible in wikitext? |
| 80 | * An example of an invisible block node is a `<p>`-tag that |
| 81 | * Parsoid generated, or a `<ul>`, `<ol>` tag. |
| 82 | */ |
| 83 | public static function isBlockNodeWithVisibleWT( Node $node ): bool { |
| 84 | return DOMUtils::isWikitextBlockNode( $node ) && |
| 85 | !self::isZeroWidthWikitextElt( $node ); |
| 86 | } |
| 87 | |
| 88 | /** |
| 89 | * Helper functions to detect when an A-$node uses [[..]]/[..]/... style |
| 90 | * syntax (for wikilinks, ext links, url links). rel-type is not sufficient |
| 91 | * anymore since mw:ExtLink is used for all the three link syntaxes. |
| 92 | */ |
| 93 | public static function isATagFromWikiLinkSyntax( Element $node ): bool { |
| 94 | if ( DOMUtils::nodeName( $node ) !== 'a' ) { |
| 95 | return false; |
| 96 | } |
| 97 | |
| 98 | $dp = DOMDataUtils::getDataParsoid( $node ); |
| 99 | return DOMUtils::hasRel( $node, 'mw:WikiLink' ) || |
| 100 | ( isset( $dp->stx ) && $dp->stx !== "url" && $dp->stx !== "magiclink" ); |
| 101 | } |
| 102 | |
| 103 | /** |
| 104 | * Helper function to detect when an A-node uses ext-link syntax. |
| 105 | * rel attribute is not sufficient anymore since mw:ExtLink is used for |
| 106 | * multiple link types |
| 107 | */ |
| 108 | public static function isATagFromExtLinkSyntax( Element $node ): bool { |
| 109 | if ( DOMUtils::nodeName( $node ) !== 'a' ) { |
| 110 | return false; |
| 111 | } |
| 112 | |
| 113 | $dp = DOMDataUtils::getDataParsoid( $node ); |
| 114 | return DOMUtils::hasRel( $node, 'mw:ExtLink' ) && |
| 115 | ( !isset( $dp->stx ) || ( $dp->stx !== "url" && $dp->stx !== "magiclink" ) ); |
| 116 | } |
| 117 | |
| 118 | /** |
| 119 | * Helper function to detect when an A-node uses url-link syntax. |
| 120 | * rel attribute is not sufficient anymore since mw:ExtLink is used for |
| 121 | * multiple link types |
| 122 | */ |
| 123 | public static function isATagFromURLLinkSyntax( Element $node ): bool { |
| 124 | if ( DOMUtils::nodeName( $node ) !== 'a' ) { |
| 125 | return false; |
| 126 | } |
| 127 | |
| 128 | $dp = DOMDataUtils::getDataParsoid( $node ); |
| 129 | return DOMUtils::hasRel( $node, 'mw:ExtLink' ) && |
| 130 | isset( $dp->stx ) && $dp->stx === "url"; |
| 131 | } |
| 132 | |
| 133 | /** |
| 134 | * Helper function to detect when an A-node uses magic-link syntax. |
| 135 | * rel attribute is not sufficient anymore since mw:ExtLink is used for |
| 136 | * multiple link types |
| 137 | */ |
| 138 | public static function isATagFromMagicLinkSyntax( Element $node ): bool { |
| 139 | if ( DOMUtils::nodeName( $node ) !== 'a' ) { |
| 140 | return false; |
| 141 | } |
| 142 | |
| 143 | $dp = DOMDataUtils::getDataParsoid( $node ); |
| 144 | return isset( $dp->stx ) && $dp->stx === 'magiclink'; |
| 145 | } |
| 146 | |
| 147 | /** |
| 148 | * Check whether a node's typeof indicates that it is a template expansion. |
| 149 | * |
| 150 | * @param Element $node |
| 151 | * @return ?string The matched type, or null if no match. |
| 152 | */ |
| 153 | public static function matchTplType( Element $node ): ?string { |
| 154 | return DOMUtils::matchTypeOf( $node, self::TPL_META_TYPE_REGEXP ); |
| 155 | } |
| 156 | |
| 157 | /** |
| 158 | * Check whether a typeof indicates that it signifies an |
| 159 | * expanded attribute. |
| 160 | */ |
| 161 | public static function hasExpandedAttrsType( Element $node ): bool { |
| 162 | return DOMUtils::matchTypeOf( $node, '/^mw:ExpandedAttrs(\/\S+)*$/' ) !== null; |
| 163 | } |
| 164 | |
| 165 | /** |
| 166 | * Check whether a node is a meta tag that signifies a template expansion. |
| 167 | */ |
| 168 | public static function isTplMarkerMeta( Node $node ): bool { |
| 169 | return DOMUtils::matchNameAndTypeOf( $node, 'meta', self::TPL_META_TYPE_REGEXP ) !== null; |
| 170 | } |
| 171 | |
| 172 | /** |
| 173 | * Check whether a node is a meta signifying the start of a template expansion. |
| 174 | */ |
| 175 | public static function isTplStartMarkerMeta( Node $node ): bool { |
| 176 | $t = DOMUtils::matchNameAndTypeOf( $node, 'meta', self::TPL_META_TYPE_REGEXP ); |
| 177 | return $t !== null && !str_ends_with( $t, '/End' ); |
| 178 | } |
| 179 | |
| 180 | /** |
| 181 | * Check whether a node is a meta signifying the end of a template expansion. |
| 182 | */ |
| 183 | public static function isTplEndMarkerMeta( Node $node ): bool { |
| 184 | $t = DOMUtils::matchNameAndTypeOf( $node, 'meta', self::TPL_META_TYPE_REGEXP ); |
| 185 | return $t !== null && str_ends_with( $t, '/End' ); |
| 186 | } |
| 187 | |
| 188 | /** |
| 189 | * This tests whether a DOM node is a new node added during an edit session |
| 190 | * or an existing node from parsed wikitext. |
| 191 | * |
| 192 | * As written, this function can only be used on non-template/extension content |
| 193 | * or on the top-level nodes of template/extension content. This test will |
| 194 | * return the wrong results on non-top-level $nodes of template/extension content. |
| 195 | */ |
| 196 | public static function isNewElt( Node $node ): bool { |
| 197 | // We cannot determine newness on text/comment $nodes. |
| 198 | if ( !( $node instanceof Element ) ) { |
| 199 | return false; |
| 200 | } |
| 201 | |
| 202 | // For template/extension content, newness should be |
| 203 | // checked on the encapsulation wrapper $node. |
| 204 | $node = self::findFirstEncapsulationWrapperNode( $node ) ?? $node; |
| 205 | return DOMDataUtils::getDataParsoid( $node )->getTempFlag( TempData::IS_NEW ); |
| 206 | } |
| 207 | |
| 208 | /** |
| 209 | * Check whether a pre is caused by indentation in the original wikitext. |
| 210 | */ |
| 211 | public static function isIndentPre( Node $node ): bool { |
| 212 | return DOMUtils::nodeName( $node ) === "pre" && !self::isLiteralHTMLNode( $node ); |
| 213 | } |
| 214 | |
| 215 | public static function isInlineMedia( Node $node ): bool { |
| 216 | return DOMUtils::nodeName( $node ) === 'span' && |
| 217 | self::isGeneratedFigure( $node ); |
| 218 | } |
| 219 | |
| 220 | public static function isGeneratedFigure( Node $node ): bool { |
| 221 | return DOMUtils::matchTypeOf( $node, '#^mw:File($|/)#D' ) !== null; |
| 222 | } |
| 223 | |
| 224 | /** |
| 225 | * Find how much offset is necessary for the DSR of an |
| 226 | * indent-originated pre tag. |
| 227 | */ |
| 228 | public static function indentPreDSRCorrection( Node $textNode ): int { |
| 229 | // NOTE: This assumes a text-node and doesn't check that it is one. |
| 230 | // |
| 231 | // FIXME: Doesn't handle text nodes that are not direct children of the pre |
| 232 | if ( self::isIndentPre( $textNode->parentNode ) ) { |
| 233 | $numNLs = substr_count( $textNode->nodeValue, "\n" ); |
| 234 | if ( $textNode->parentNode->lastChild === $textNode ) { |
| 235 | // We dont want the trailing newline of the last child of the pre |
| 236 | // to contribute a pre-correction since it doesn't add new content |
| 237 | // in the pre-node after the text |
| 238 | if ( str_ends_with( $textNode->nodeValue, "\n" ) ) { |
| 239 | $numNLs--; |
| 240 | } |
| 241 | } |
| 242 | return $numNLs; |
| 243 | } else { |
| 244 | return 0; |
| 245 | } |
| 246 | } |
| 247 | |
| 248 | /** |
| 249 | * Check if $node is a root in an encapsulated DOM forest. |
| 250 | */ |
| 251 | public static function isEncapsulatedDOMForestRoot( Node $node ): bool { |
| 252 | $about = $node instanceof Element ? DOMCompat::getAttribute( $node, 'about' ) : null; |
| 253 | // FIXME: Ensure that our DOM spec clarifies this expectation |
| 254 | return $about !== null && CounterType::TRANSCLUSION_ABOUT->matches( $about ); |
| 255 | } |
| 256 | |
| 257 | /** |
| 258 | * Does $node represent a redirect link? |
| 259 | */ |
| 260 | public static function isRedirectLink( ?Node $node ): bool { |
| 261 | return $node instanceof Element && |
| 262 | DOMUtils::nodeName( $node ) === 'link' && |
| 263 | DOMUtils::matchRel( $node, '#\bmw:PageProp/redirect\b#' ) !== null; |
| 264 | } |
| 265 | |
| 266 | /** |
| 267 | * Does $node represent a category link? |
| 268 | */ |
| 269 | public static function isCategoryLink( ?Node $node ): bool { |
| 270 | return $node instanceof Element && |
| 271 | DOMUtils::nodeName( $node ) === 'link' && |
| 272 | DOMUtils::matchRel( $node, '#\bmw:PageProp/Category\b#' ) !== null; |
| 273 | } |
| 274 | |
| 275 | /** |
| 276 | * Does $node represent a link that is sol-transparent? |
| 277 | */ |
| 278 | public static function isSolTransparentLink( ?Node $node ): bool { |
| 279 | return $node instanceof Element && |
| 280 | DOMUtils::nodeName( $node ) === 'link' && |
| 281 | ( DOMUtils::matchRel( $node, TokenUtils::SOL_TRANSPARENT_LINK_REGEX ) !== null || |
| 282 | // Empty extension content are given a synthetic link for roundtripping. |
| 283 | ( DOMDataUtils::getDataParsoid( $node )->getTemp()->empty ?? false ) ); |
| 284 | } |
| 285 | |
| 286 | /** |
| 287 | * Check if '$node' emits wikitext that is sol-transparent in wikitext form. |
| 288 | * This is a test for wikitext that doesn't introduce line breaks. |
| 289 | * |
| 290 | * Comment, whitespace text $nodes, category links, redirect links, behavior |
| 291 | * switches, and include directives currently satisfy this definition. |
| 292 | * |
| 293 | * This should come close to matching TokenUtils.isSolTransparent() |
| 294 | */ |
| 295 | public static function emitsSolTransparentSingleLineWT( Node $node ): bool { |
| 296 | if ( $node instanceof Text ) { |
| 297 | // NB: We differ here to meet the nl condition. |
| 298 | return (bool)preg_match( '/^[ \t]*$/D', $node->nodeValue ); |
| 299 | } elseif ( self::isRenderingTransparentNode( $node ) ) { |
| 300 | // NB: The only metas in a DOM should be for behavior switches and |
| 301 | // include directives, other than explicit HTML meta tags. This |
| 302 | // differs from our counterpart in Util where ref meta tokens |
| 303 | // haven't been expanded to spans yet. |
| 304 | return true; |
| 305 | } else { |
| 306 | return false; |
| 307 | } |
| 308 | } |
| 309 | |
| 310 | /** |
| 311 | * This is the span added to headings to add fallback ids for when legacy |
| 312 | * and HTML5 ids don't match up. This prevents broken links to legacy ids. |
| 313 | */ |
| 314 | public static function isFallbackIdSpan( Node $node ): bool { |
| 315 | return DOMUtils::hasNameAndTypeOf( $node, 'span', 'mw:FallbackId' ); |
| 316 | } |
| 317 | |
| 318 | /** |
| 319 | * These are primarily 'metadata'-like $nodes that don't show up in output rendering. |
| 320 | * - In Parsoid output, they are represented by link/meta tags. |
| 321 | * - In the PHP parser, they are completely stripped from the input early on. |
| 322 | * Because of this property, these rendering-transparent $nodes are also |
| 323 | * SOL-transparent for the purposes of parsing behavior. |
| 324 | */ |
| 325 | public static function isRenderingTransparentNode( Node $node ): bool { |
| 326 | // FIXME: Can we change this entire thing to |
| 327 | // $node instanceof Comment || |
| 328 | // DOMUtils::getDataParsoid($node).stx !== 'html' && |
| 329 | // (DOMUtils::nodeName($node) === 'meta' || DOMUtils::nodeName($node) === 'link') |
| 330 | // |
| 331 | return $node instanceof Comment || |
| 332 | self::isSolTransparentLink( $node ) || ( |
| 333 | // Catch-all for everything else. |
| 334 | $node instanceof Element && |
| 335 | DOMUtils::nodeName( $node ) === 'meta' && |
| 336 | !self::isMarkerAnnotation( $node ) && |
| 337 | !DOMUtils::hasTypeOf( $node, 'mw:DOMFragment' ) && |
| 338 | ( DOMDataUtils::getDataParsoid( $node )->stx ?? '' ) !== 'html' |
| 339 | ) || self::isFallbackIdSpan( $node ); |
| 340 | } |
| 341 | |
| 342 | /** |
| 343 | * Is $node nested inside a table tag that uses HTML instead of native |
| 344 | * wikitext? |
| 345 | */ |
| 346 | public static function inHTMLTableTag( Node $node ): bool { |
| 347 | $p = $node->parentNode; |
| 348 | while ( DOMUtils::isTableTag( $p ) ) { |
| 349 | if ( self::isLiteralHTMLNode( $p ) ) { |
| 350 | return true; |
| 351 | } elseif ( DOMUtils::nodeName( $p ) === 'table' ) { |
| 352 | // Don't cross <table> boundaries |
| 353 | return false; |
| 354 | } |
| 355 | $p = $p->parentNode; |
| 356 | } |
| 357 | |
| 358 | return false; |
| 359 | } |
| 360 | |
| 361 | public static function serializeChildTableTagAsHTML( Element $elt ): bool { |
| 362 | $name = DOMUtils::nodeName( $elt ); |
| 363 | return isset( Consts::$HTML['ChildTableTags'][$name] ) && |
| 364 | !isset( Consts::$ZeroWidthWikitextTags[$name] ) && |
| 365 | self::inHTMLTableTag( $elt ); |
| 366 | } |
| 367 | |
| 368 | /** |
| 369 | * Is $node the first wrapper element of encapsulated content? |
| 370 | */ |
| 371 | public static function isFirstEncapsulationWrapperNode( Node $node ): bool { |
| 372 | return DOMUtils::matchTypeOf( $node, self::FIRST_ENCAP_REGEXP ) !== null; |
| 373 | } |
| 374 | |
| 375 | /** |
| 376 | * Is $node the first wrapper element of extension content? |
| 377 | */ |
| 378 | public static function isFirstExtensionWrapperNode( Node $node ): bool { |
| 379 | return DOMUtils::matchTypeOf( $node, "#mw:Extension/#" ) !== null; |
| 380 | } |
| 381 | |
| 382 | /** |
| 383 | * Checks whether a first encapsulation wrapper node is encapsulating an extension |
| 384 | * that outputs MediaWiki Core DOM Spec HTML (https://www.mediawiki.org/wiki/Specs/HTML) |
| 385 | */ |
| 386 | public static function isExtensionOutputtingCoreMwDomSpec( Node $node, Env $env ): bool { |
| 387 | $extTagName = self::getExtTagName( $node ); |
| 388 | if ( $extTagName === null ) { |
| 389 | return false; |
| 390 | } |
| 391 | $extConfig = $env->getSiteConfig()->getExtTagConfig( $extTagName ); |
| 392 | $htmlType = $extConfig['options']['outputHasCoreMwDomSpecMarkup'] ?? null; |
| 393 | return $htmlType === true; |
| 394 | } |
| 395 | |
| 396 | /** |
| 397 | * Is $node an encapsulation wrapper elt? |
| 398 | * |
| 399 | * All root-level $nodes of generated content are considered |
| 400 | * encapsulation wrappers and share an about-id. |
| 401 | */ |
| 402 | public static function isEncapsulationWrapper( Node $node ): bool { |
| 403 | // True if it has an encapsulation type or while walking backwards |
| 404 | // over elts with identical about ids, we run into a $node with an |
| 405 | // encapsulation type. |
| 406 | return $node instanceof Element && self::findFirstEncapsulationWrapperNode( $node ) !== null; |
| 407 | } |
| 408 | |
| 409 | /** |
| 410 | * Is $node a DOMFragment wrapper? |
| 411 | */ |
| 412 | public static function isDOMFragmentWrapper( Node $node ): bool { |
| 413 | // See TokenUtils::hasDOMFragmentType |
| 414 | return DOMUtils::matchTypeOf( $node, '#^mw:DOMFragment(/sealed/\w+)?$#D' ) !== null; |
| 415 | } |
| 416 | |
| 417 | /** |
| 418 | * Is $node a sealed DOMFragment of a specific type? |
| 419 | */ |
| 420 | public static function isSealedFragmentOfType( Node $node, string $type ): bool { |
| 421 | return DOMUtils::hasTypeOf( $node, "mw:DOMFragment/sealed/$type" ); |
| 422 | } |
| 423 | |
| 424 | /** |
| 425 | * Return the contents of a DOMFragment wrapper. |
| 426 | */ |
| 427 | public static function getDOMFragmentContents( Node $node, bool $clearAfter = false ): DocumentFragment { |
| 428 | Assert::invariant( self::isDOMFragmentWrapper( $node ), "not a mw:DOMFragment" ); |
| 429 | '@phan-var Element $node'; // if it is a wrapper |
| 430 | $dp = DOMDataUtils::getDataParsoid( $node ); |
| 431 | $df = $dp->html; |
| 432 | '@phan-var DocumentFragment $df'; // non-null |
| 433 | if ( $clearAfter ) { |
| 434 | unset( $dp->html ); |
| 435 | } |
| 436 | return $df; |
| 437 | } |
| 438 | |
| 439 | /** |
| 440 | * Is $node a Parsoid-generated <section> tag? |
| 441 | */ |
| 442 | public static function isParsoidSectionTag( Node $node ): bool { |
| 443 | return DOMUtils::nodeName( $node ) === 'section' && |
| 444 | // @phan-suppress-next-line PhanUndeclaredMethod |
| 445 | $node->hasAttribute( 'data-mw-section-id' ); |
| 446 | } |
| 447 | |
| 448 | /** |
| 449 | * Find the first wrapper element of encapsulated content. |
| 450 | */ |
| 451 | public static function findFirstEncapsulationWrapperNode( |
| 452 | Node $node, |
| 453 | string $encapTypeofRE = self::FIRST_ENCAP_REGEXP |
| 454 | ): ?Element { |
| 455 | if ( !$node instanceof Element ) { |
| 456 | return null; |
| 457 | } |
| 458 | $about = DOMCompat::getAttribute( $node, 'about' ); |
| 459 | // No need to check if this is the right about id. |
| 460 | // We are validated by the typeof below. |
| 461 | if ( $about === null ) { |
| 462 | return null; |
| 463 | } |
| 464 | $prev = $node; |
| 465 | do { |
| 466 | $node = $prev; |
| 467 | $prev = DiffDOMUtils::previousNonDeletedSibling( $node ); |
| 468 | } while ( |
| 469 | $prev instanceof Element && |
| 470 | DOMCompat::getAttribute( $prev, 'about' ) === $about |
| 471 | ); |
| 472 | '@phan-var ?Element $node'; // @var ?Element $node |
| 473 | return DOMUtils::matchTypeOf( $node, $encapTypeofRE ) ? $node : null; |
| 474 | } |
| 475 | |
| 476 | /** |
| 477 | * Is $node from encapsulated (template, extension, etc.) content? |
| 478 | */ |
| 479 | public static function fromEncapsulatedContentHelper( Node $node, string $typeofRE ): bool { |
| 480 | if ( !( $node instanceof Element ) ) { |
| 481 | $node = $node->parentNode; |
| 482 | } |
| 483 | while ( !DOMUtils::atTheTop( $node ) ) { |
| 484 | if ( self::findFirstEncapsulationWrapperNode( $node, $typeofRE ) !== null ) { |
| 485 | return true; |
| 486 | } |
| 487 | $node = $node->parentNode; |
| 488 | } |
| 489 | return false; |
| 490 | } |
| 491 | |
| 492 | /** |
| 493 | * Is the $node from templated content? |
| 494 | * @param Node $node |
| 495 | * @return bool |
| 496 | */ |
| 497 | public static function fromTemplatedContent( Node $node ): bool { |
| 498 | return self::fromEncapsulatedContentHelper( $node, "#mw:Transclusion#" ); |
| 499 | } |
| 500 | |
| 501 | /** |
| 502 | * Is the $node from extension content? |
| 503 | * @param Node $node |
| 504 | * @param ?string $extType If non-null, checks for that specific extension |
| 505 | * @return bool |
| 506 | */ |
| 507 | public static function fromExtensionContent( Node $node, ?string $extType = null ): bool { |
| 508 | $re = $extType ? "#mw:Extension/$extType#" : "#mw:Extension/\w+#"; |
| 509 | return self::fromEncapsulatedContentHelper( $node, $re ); |
| 510 | } |
| 511 | |
| 512 | /** |
| 513 | * Is $node from encapsulated (template, extension, etc.) content? |
| 514 | */ |
| 515 | public static function fromEncapsulatedContent( Node $node ): bool { |
| 516 | return self::fromEncapsulatedContentHelper( $node, self::FIRST_ENCAP_REGEXP ); |
| 517 | } |
| 518 | |
| 519 | /** |
| 520 | * Compute, when possible, the wikitext source for a $node in |
| 521 | * an environment env. Returns null if the source cannot be |
| 522 | * extracted. |
| 523 | */ |
| 524 | public static function getWTSource( Frame $frame, Element $node ): ?string { |
| 525 | $dp = DOMDataUtils::getDataParsoid( $node ); |
| 526 | $dsr = $dp->dsr ?? null; |
| 527 | // FIXME: We could probably change the null return to '' |
| 528 | // Just need to verify that code that uses this won't break |
| 529 | return Utils::isValidDSR( $dsr ) ? |
| 530 | $dsr->substr( $frame->getSource() ) : null; |
| 531 | } |
| 532 | |
| 533 | /** |
| 534 | * Gets all siblings that follow '$node' that have an 'about' as |
| 535 | * their about id. |
| 536 | * |
| 537 | * This is used to fetch transclusion/extension content by using |
| 538 | * the about-id as the key. This works because |
| 539 | * transclusion/extension content is a forest of dom-trees formed |
| 540 | * by adjacent dom-nodes. This is the contract that template |
| 541 | * encapsulation, dom-reuse, and VE code all have to abide by. |
| 542 | * |
| 543 | * @param Node $node |
| 544 | * @param ?string $about |
| 545 | * @return Node[] |
| 546 | */ |
| 547 | public static function getAboutSiblings( Node $node, ?string $about ): array { |
| 548 | $nodes = [ $node ]; |
| 549 | |
| 550 | if ( $about === null ) { |
| 551 | return $nodes; |
| 552 | } |
| 553 | |
| 554 | $node = $node->nextSibling; |
| 555 | while ( $node instanceof Element && DOMCompat::getAttribute( $node, 'about' ) === $about ) { |
| 556 | $nodes[] = $node; |
| 557 | $node = $node->nextSibling; |
| 558 | } |
| 559 | |
| 560 | // Remove already consumed trailing IEW, if any |
| 561 | while ( count( $nodes ) > 0 && DOMUtils::isIEW( $nodes[count( $nodes ) - 1] ) ) { |
| 562 | array_pop( $nodes ); |
| 563 | } |
| 564 | |
| 565 | return $nodes; |
| 566 | } |
| 567 | |
| 568 | /** |
| 569 | * This function is only intended to be used on encapsulated $nodes |
| 570 | * (Template/Extension/Param content). |
| 571 | * |
| 572 | * Given a '$node' that has an about-id, it is assumed that it is generated |
| 573 | * by templates or extensions. This function skips over all |
| 574 | * following content nodes and returns the first non-template node |
| 575 | * that follows it. |
| 576 | */ |
| 577 | public static function skipOverEncapsulatedContent( Node $node ): ?Node { |
| 578 | $about = $node instanceof Element ? |
| 579 | DOMCompat::getAttribute( $node, 'about' ) : null; |
| 580 | if ( $about !== null ) { |
| 581 | // Guaranteed not to be empty. It will at least include $node. |
| 582 | $aboutSiblings = self::getAboutSiblings( $node, $about ); |
| 583 | return end( $aboutSiblings )->nextSibling; |
| 584 | } else { |
| 585 | return $node->nextSibling; |
| 586 | } |
| 587 | } |
| 588 | |
| 589 | /** |
| 590 | * Comment encoding/decoding. |
| 591 | * |
| 592 | * * Some relevant phab tickets: T94055, T70146, T60184, T95039 |
| 593 | * |
| 594 | * The wikitext comment rule is very simple: <!-- starts a comment, |
| 595 | * and --> ends a comment. This means we can have almost anything as the |
| 596 | * contents of a comment (except the string "-->", but see below), including |
| 597 | * several things that are not valid in HTML5 comments: |
| 598 | * |
| 599 | * * For one, the html5 comment parsing algorithm [0] leniently accepts |
| 600 | * --!> as a closing comment tag, which differs from the php+tidy combo. |
| 601 | * |
| 602 | * * If the comment's data matches /^-?>/, html5 will end the comment. |
| 603 | * For example, <!-->stuff<--> breaks up as |
| 604 | * <!--> (the comment) followed by, stuff<--> (as text). |
| 605 | * |
| 606 | * * Finally, comment data shouldn't contain two consecutive hyphen-minus |
| 607 | * characters (--), nor end in a hyphen-minus character (/-$/) as defined |
| 608 | * in the spec [1]. |
| 609 | * |
| 610 | * We work around all these problems by using HTML entity encoding inside |
| 611 | * the comment body. The characters -, >, and & must be encoded in order |
| 612 | * to prevent premature termination of the comment by one of the cases |
| 613 | * above. Encoding other characters is optional; all entities will be |
| 614 | * decoded during wikitext serialization. |
| 615 | * |
| 616 | * In order to allow *arbitrary* content inside a wikitext comment, |
| 617 | * including the forbidden string "-->" we also do some minimal entity |
| 618 | * decoding on the wikitext. We are also limited by our inability |
| 619 | * to encode DSR attributes on the comment $node, so our wikitext entity |
| 620 | * decoding must be 1-to-1: that is, there must be a unique "decoded" |
| 621 | * string for every wikitext sequence, and for every decoded string there |
| 622 | * must be a unique wikitext which creates it. |
| 623 | * |
| 624 | * The basic idea here is to replace every string ab*c with the string with |
| 625 | * one more b in it. This creates a string with no instance of "ac", |
| 626 | * so you can use 'ac' to encode one more code point. In this case |
| 627 | * a is "--&", "b" is "amp;", and "c" is "gt;" and we use ac to |
| 628 | * encode "-->" (which is otherwise unspeakable in wikitext). |
| 629 | * |
| 630 | * Note that any user content which does not match the regular |
| 631 | * expression /--(>|&(amp;)*gt;)/ is unchanged in its wikitext |
| 632 | * representation, as shown in the first two examples below. |
| 633 | * |
| 634 | * User-authored comment text Wikitext HTML5 DOM |
| 635 | * -------------------------- ------------- ---------------------- |
| 636 | * & - > & - > & + > |
| 637 | * Use > here Use > here Use &gt; here |
| 638 | * --> --> ++> |
| 639 | * --> --&gt; ++&gt; |
| 640 | * --&gt; --&amp;gt; ++&amp;gt; |
| 641 | * |
| 642 | * [0] http://www.w3.org/TR/html5/syntax.html#comment-start-state |
| 643 | * [1] http://www.w3.org/TR/html5/syntax.html#comments |
| 644 | * |
| 645 | * Map a wikitext-escaped comment to an HTML DOM-escaped comment. |
| 646 | * |
| 647 | * @param string $comment Wikitext-escaped comment. |
| 648 | * @return string DOM-escaped comment. |
| 649 | */ |
| 650 | public static function encodeComment( string $comment ): string { |
| 651 | // Undo wikitext escaping to obtain "true value" of comment. |
| 652 | $trueValue = preg_replace_callback( '/--&(amp;)*gt;/', static function ( $m ) { |
| 653 | return Utils::decodeWtEntities( $m[0] ); |
| 654 | }, $comment ); |
| 655 | |
| 656 | // Now encode '-', '>' and '&' in the "true value" as HTML entities, |
| 657 | // so that they can be safely embedded in an HTML comment. |
| 658 | // This part doesn't have to map strings 1-to-1. |
| 659 | return preg_replace_callback( '/[->&]/', static function ( $m ) { |
| 660 | return Utils::entityEncodeAll( $m[0] ); |
| 661 | }, $trueValue ); |
| 662 | } |
| 663 | |
| 664 | /** |
| 665 | * Map an HTML DOM-escaped comment to a wikitext-escaped comment. |
| 666 | * @param string $comment DOM-escaped comment. |
| 667 | * @return string Wikitext-escaped comment. |
| 668 | */ |
| 669 | public static function decodeComment( string $comment ): string { |
| 670 | // Undo HTML entity escaping to obtain "true value" of comment. |
| 671 | $trueValue = Utils::decodeWtEntities( $comment ); |
| 672 | |
| 673 | // ok, now encode this "true value" of the comment in such a way |
| 674 | // that the string "-->" never shows up. (See above.) |
| 675 | return preg_replace_callback( '/--(&(amp;)*gt;|>)/', static function ( $m ) { |
| 676 | $s = $m[0]; |
| 677 | return $s === '-->' ? '-->' : '--&' . substr( $s, 3 ); |
| 678 | }, $trueValue ); |
| 679 | } |
| 680 | |
| 681 | /** |
| 682 | * Utility function: we often need to know the wikitext DSR length for |
| 683 | * an HTML DOM comment value. |
| 684 | * |
| 685 | * @param Comment|CommentTk $node A comment node containing a DOM-escaped comment. |
| 686 | * @return int The wikitext length in UTF-8 bytes necessary to encode this |
| 687 | * comment, including 7 characters for the `<!--` and `-->` delimiters. |
| 688 | */ |
| 689 | public static function decodedCommentLength( $node ): int { |
| 690 | // Add 7 for the "<!--" and "-->" delimiters in wikitext. |
| 691 | $syntaxLen = 7; |
| 692 | if ( $node instanceof Comment ) { |
| 693 | $value = $node->nodeValue; |
| 694 | if ( $node->previousSibling && |
| 695 | DOMUtils::hasTypeOf( $node->previousSibling, "mw:Placeholder/UnclosedComment" ) |
| 696 | ) { |
| 697 | $syntaxLen = 4; |
| 698 | } |
| 699 | } elseif ( $node instanceof CommentTk ) { |
| 700 | if ( isset( $node->dataParsoid->unclosedComment ) ) { |
| 701 | $syntaxLen = 4; |
| 702 | } |
| 703 | $value = $node->value; |
| 704 | } else { |
| 705 | throw new UnreachableException( 'Should not be here!' ); |
| 706 | } |
| 707 | return strlen( self::decodeComment( $value ) ) + $syntaxLen; |
| 708 | } |
| 709 | |
| 710 | public static function getExtTagName( Node $node ): ?string { |
| 711 | $match = DOMUtils::matchTypeOf( $node, '#^mw:Extension/(.+?)$#D' ); |
| 712 | return $match ? mb_strtolower( substr( $match, strlen( 'mw:Extension/' ) ) ) : null; |
| 713 | } |
| 714 | |
| 715 | public static function getPFragmentHandlerKey( Node $node ): ?string { |
| 716 | // TODO (T390342): use ::getExtTagName() to look up extension tag |
| 717 | // PFragment handlers |
| 718 | $match = DOMUtils::matchTypeOf( $node, '#^mw:ParserFunction/(.+?)$#D' ); |
| 719 | return $match ? substr( $match, strlen( 'mw:ParserFunction/' ) ) : null; |
| 720 | } |
| 721 | |
| 722 | public static function getNativeExt( Env $env, Node $node ): ?ExtensionTagHandler { |
| 723 | $extTagName = self::getExtTagName( $node ); |
| 724 | return $extTagName ? $env->getSiteConfig()->getExtTagImpl( $extTagName ) : null; |
| 725 | } |
| 726 | |
| 727 | /** |
| 728 | * Is this an include directive? |
| 729 | */ |
| 730 | public static function isIncludeTag( string $name ): bool { |
| 731 | return $name === 'includeonly' || $name === 'noinclude' || $name === 'onlyinclude'; |
| 732 | } |
| 733 | |
| 734 | public static function isAnnotationTag( Env $env, string $name ): bool { |
| 735 | $tagName = mb_strtolower( $name ); |
| 736 | $siteConfig = $env->getSiteConfig(); |
| 737 | $isAnnotationTag = $siteConfig->isAnnotationTag( $tagName ); |
| 738 | if ( !$isAnnotationTag ) { |
| 739 | // avoid crashing on <tvar|name> even if we don't support that syntax explicitly |
| 740 | $pipepos = strpos( $tagName, '|' ); |
| 741 | if ( $pipepos ) { |
| 742 | $strBeforePipe = substr( $tagName, 0, $pipepos ); |
| 743 | $isAnnotationTag = $siteConfig->isAnnotationTag( $strBeforePipe ); |
| 744 | } |
| 745 | } |
| 746 | return $isAnnotationTag; |
| 747 | } |
| 748 | |
| 749 | /** |
| 750 | * Check if tag is annotation or extension directive |
| 751 | * Adapted from similar grammar function |
| 752 | */ |
| 753 | public static function isAnnOrExtTag( Env $env, string $name ): bool { |
| 754 | $tagName = mb_strtolower( $name ); |
| 755 | $extTags = $env->getSiteConfig()->getExtensionTagNameMap(); |
| 756 | return isset( $extTags[$tagName] ) || |
| 757 | self::isIncludeTag( $tagName ) || |
| 758 | self::isAnnotationTag( $env, $tagName ); |
| 759 | } |
| 760 | |
| 761 | /** |
| 762 | * Creates a DocumentFragment containing a single span with type "mw:I18n". The created span |
| 763 | * should be filled in with setDataNodeI18n to be valid. |
| 764 | * @param Document $doc |
| 765 | * @return DocumentFragment |
| 766 | * @throws DOMException |
| 767 | */ |
| 768 | public static function createEmptyLocalizationFragment( Document $doc ): DocumentFragment { |
| 769 | $frag = $doc->createDocumentFragment(); |
| 770 | $span = $doc->createElement( 'span' ); |
| 771 | DOMUtils::addTypeOf( $span, 'mw:I18n' ); |
| 772 | $frag->appendChild( $span ); |
| 773 | return $frag; |
| 774 | } |
| 775 | |
| 776 | /** |
| 777 | * Creates an internationalization (i18n) message that will be localized into the page content |
| 778 | * language. The returned DocumentFragment contains, as a single child, a span |
| 779 | * element with the appropriate information for later localization. |
| 780 | * @param Document $doc |
| 781 | * @param string $key message key for the message to be localized |
| 782 | * @param ?array $params parameters for localization |
| 783 | * @return DocumentFragment |
| 784 | * @throws DOMException |
| 785 | */ |
| 786 | public static function createPageContentI18nFragment( |
| 787 | Document $doc, string $key, ?array $params = null |
| 788 | ): DocumentFragment { |
| 789 | $frag = self::createEmptyLocalizationFragment( $doc ); |
| 790 | $i18n = I18nInfo::createPageContentI18n( $key, $params ); |
| 791 | DOMDataUtils::setDataNodeI18n( $frag->firstChild, $i18n ); |
| 792 | return $frag; |
| 793 | } |
| 794 | |
| 795 | /** |
| 796 | * Creates an internationalization (i18n) message that will be localized into the user |
| 797 | * interface language. The returned DocumentFragment contains, as a single child, a span |
| 798 | * element with the appropriate information for later localization. |
| 799 | * @param Document $doc |
| 800 | * @param string $key message key for the message to be localized |
| 801 | * @param ?array $params parameters for localization |
| 802 | * @return DocumentFragment |
| 803 | * @throws DOMException |
| 804 | */ |
| 805 | public static function createInterfaceI18nFragment( |
| 806 | Document $doc, string $key, ?array $params = null |
| 807 | ): DocumentFragment { |
| 808 | $frag = self::createEmptyLocalizationFragment( $doc ); |
| 809 | $i18n = I18nInfo::createInterfaceI18n( $key, $params ); |
| 810 | DOMDataUtils::setDataNodeI18n( $frag->firstChild, $i18n ); |
| 811 | return $frag; |
| 812 | } |
| 813 | |
| 814 | /** |
| 815 | * Creates an internationalization (i18n) message that will be localized into an arbitrary |
| 816 | * language. The returned DocumentFragment contains, as a single child, a span |
| 817 | * element with the appropriate information for later localization. |
| 818 | * The use of this method is discouraged; use ::createPageContentI18nFragment(...) and |
| 819 | * ::createInterfaceI18nFragment(...) where possible rather than, respectively, |
| 820 | * ::createLangI18nFragment(..., $wgContLang, ...) and |
| 821 | * ::createLangI18nFragment(..., $wgLang,...). |
| 822 | * @param Document $doc |
| 823 | * @param Bcp47Code $lang language for the localization |
| 824 | * @param string $key message key for the message to be localized |
| 825 | * @param ?array $params parameters for localization |
| 826 | * @return DocumentFragment |
| 827 | * @throws DOMException |
| 828 | */ |
| 829 | public static function createLangI18nFragment( |
| 830 | Document $doc, Bcp47Code $lang, string $key, ?array $params = null |
| 831 | ): DocumentFragment { |
| 832 | $frag = self::createEmptyLocalizationFragment( $doc ); |
| 833 | $i18n = I18nInfo::createLangI18n( $lang, $key, $params ); |
| 834 | DOMDataUtils::setDataNodeI18n( $frag->firstChild, $i18n ); |
| 835 | return $frag; |
| 836 | } |
| 837 | |
| 838 | /** |
| 839 | * Adds to $element the internationalization information needed for the attribute $name to be |
| 840 | * localized in a later pass into the page content language. |
| 841 | * @param Element $element element on which to add internationalization information |
| 842 | * @param string $name name of the attribute whose value will be localized |
| 843 | * @param string $key message key used for the attribute value localization |
| 844 | * @param ?array $params parameters for localization |
| 845 | */ |
| 846 | public static function addPageContentI18nAttribute( |
| 847 | Element $element, string $name, string $key, ?array $params = null |
| 848 | ): void { |
| 849 | $i18n = I18nInfo::createPageContentI18n( $key, $params ); |
| 850 | DOMUtils::addTypeOf( $element, 'mw:LocalizedAttrs' ); |
| 851 | DOMDataUtils::setDataAttrI18n( $element, $name, $i18n ); |
| 852 | } |
| 853 | |
| 854 | /** Adds to $element the internationalization information needed for the attribute $name to be |
| 855 | * localized in a later pass into the user interface language. |
| 856 | * @param Element $element element on which to add internationalization information |
| 857 | * @param string $name name of the attribute whose value will be localized |
| 858 | * @param string $key message key used for the attribute value localization |
| 859 | * @param ?array $params parameters for localization |
| 860 | */ |
| 861 | public static function addInterfaceI18nAttribute( |
| 862 | Element $element, string $name, string $key, ?array $params = null |
| 863 | ): void { |
| 864 | $i18n = I18nInfo::createInterfaceI18n( $key, $params ); |
| 865 | DOMUtils::addTypeOf( $element, 'mw:LocalizedAttrs' ); |
| 866 | DOMDataUtils::setDataAttrI18n( $element, $name, $i18n ); |
| 867 | } |
| 868 | |
| 869 | /** |
| 870 | * Adds to $element the internationalization information needed for the attribute $name to be |
| 871 | * localized in a later pass into the provided language. |
| 872 | * The use of this method is discouraged; ; use ::addPageContentI18nAttribute(...) and |
| 873 | * ::addInterfaceI18nAttribute(...) where possible rather than, respectively, |
| 874 | * ::addLangI18nAttribute(..., $wgContLang, ...) and ::addLangI18nAttribute(..., $wgLang, ...). |
| 875 | * @param Element $element element on which to add internationalization information |
| 876 | * @param Bcp47Code $lang language in which the message will be localized |
| 877 | * @param string $name name of the attribute whose value will be localized |
| 878 | * @param string $key message key used for the attribute value localization |
| 879 | * @param ?array $params parameters for localization |
| 880 | */ |
| 881 | public static function addLangI18nAttribute( |
| 882 | Element $element, Bcp47Code $lang, string $name, string $key, ?array $params = null |
| 883 | ): void { |
| 884 | $i18n = I18nInfo::createLangI18n( $lang, $key, $params ); |
| 885 | DOMUtils::addTypeOf( $element, 'mw:LocalizedAttrs' ); |
| 886 | DOMDataUtils::setDataAttrI18n( $element, $name, $i18n ); |
| 887 | } |
| 888 | |
| 889 | /** Check whether a node is an annotation meta; if yes, returns its type |
| 890 | */ |
| 891 | public static function matchAnnotationMeta( Node $node ): ?string { |
| 892 | return DOMUtils::matchNameAndTypeOf( $node, 'meta', self::ANNOTATION_META_TYPE_REGEXP ); |
| 893 | } |
| 894 | |
| 895 | /** |
| 896 | * Extract the annotation type, excluding potential "/End" suffix; returns null if not a valid |
| 897 | * annotation meta. &$isStart is set to true if the annotation is a start tag, false otherwise. |
| 898 | * |
| 899 | * @param Node $node |
| 900 | * @param bool &$isStart |
| 901 | * @return ?string The matched type, or null if no match. |
| 902 | */ |
| 903 | public static function extractAnnotationType( Node $node, bool &$isStart = false ): ?string { |
| 904 | $t = DOMUtils::matchTypeOf( $node, self::ANNOTATION_META_TYPE_REGEXP ); |
| 905 | if ( $t !== null && preg_match( self::ANNOTATION_META_TYPE_REGEXP, $t, $matches ) ) { |
| 906 | $isStart = !str_ends_with( $t, '/End' ); |
| 907 | return $matches[1]; |
| 908 | } |
| 909 | return null; |
| 910 | } |
| 911 | |
| 912 | /** |
| 913 | * Check whether a node is a meta signifying the start of an annotated part of the DOM |
| 914 | */ |
| 915 | public static function isAnnotationStartMarkerMeta( Node $node ): bool { |
| 916 | if ( !$node instanceof Element || DOMUtils::nodeName( $node ) !== 'meta' ) { |
| 917 | return false; |
| 918 | } |
| 919 | $isStart = false; |
| 920 | $t = self::extractAnnotationType( $node, $isStart ); |
| 921 | return $t !== null && $isStart; |
| 922 | } |
| 923 | |
| 924 | /** |
| 925 | * Check whether a node is a meta signifying the end of an annotated part of the DOM |
| 926 | */ |
| 927 | public static function isAnnotationEndMarkerMeta( Node $node ): bool { |
| 928 | if ( !$node instanceof Element || DOMUtils::nodeName( $node ) !== 'meta' ) { |
| 929 | return false; |
| 930 | } |
| 931 | $isStart = false; |
| 932 | $t = self::extractAnnotationType( $node, $isStart ); |
| 933 | return $t !== null && !$isStart; |
| 934 | } |
| 935 | |
| 936 | /** |
| 937 | * Check whether the meta tag was moved from its initial position |
| 938 | */ |
| 939 | public static function isMovedMetaTag( Node $node ): bool { |
| 940 | if ( $node instanceof Element && self::matchAnnotationMeta( $node ) !== null ) { |
| 941 | $parsoidData = DOMDataUtils::getDataParsoid( $node ); |
| 942 | if ( isset( $parsoidData->wasMoved ) ) { |
| 943 | return $parsoidData->wasMoved; |
| 944 | } |
| 945 | } |
| 946 | return false; |
| 947 | } |
| 948 | |
| 949 | /** Returns true if a node is a (start or end) annotation meta tag */ |
| 950 | public static function isMarkerAnnotation( ?Node $n ): bool { |
| 951 | return $n !== null && self::matchAnnotationMeta( $n ) !== null; |
| 952 | } |
| 953 | |
| 954 | /** Extracts the media format from the attribute string */ |
| 955 | public static function getMediaFormat( Element $node ): string { |
| 956 | $mediaType = DOMUtils::matchTypeOf( $node, '#^mw:File(/|$)#' ); |
| 957 | $parts = explode( '/', $mediaType ?? '' ); |
| 958 | return $parts[1] ?? ''; |
| 959 | } |
| 960 | |
| 961 | public static function hasVisibleCaption( Element $node ): bool { |
| 962 | $format = self::getMediaFormat( $node ); |
| 963 | return in_array( |
| 964 | $format, [ 'Thumb', /* 'Manualthumb', FIXME(T305759) */ 'Frame' ], true |
| 965 | ); |
| 966 | } |
| 967 | |
| 968 | /** |
| 969 | * Ref dom post-processing happens after adding media info, so the |
| 970 | * linkbacks aren't available in the textContent added to the alt. |
| 971 | * However, when serializing, they are in the caption elements. So, this |
| 972 | * special handler drops the linkbacks for the purpose of comparison. |
| 973 | */ |
| 974 | public static function textContentFromCaption( Node $node ): string { |
| 975 | $content = ''; |
| 976 | $c = $node->firstChild; |
| 977 | while ( $c ) { |
| 978 | if ( $c instanceof Text ) { |
| 979 | $content .= $c->nodeValue; |
| 980 | } elseif ( |
| 981 | $c instanceof Element && |
| 982 | !DOMUtils::isMetaDataTag( $c ) && |
| 983 | !DOMUtils::hasTypeOf( $c, "mw:Extension/ref" ) |
| 984 | ) { |
| 985 | $content .= self::textContentFromCaption( $c ); |
| 986 | } |
| 987 | $c = $c->nextSibling; |
| 988 | } |
| 989 | return $content; |
| 990 | } |
| 991 | |
| 992 | } |