Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
| Total | |
0.00% |
0 / 134 |
|
0.00% |
0 / 6 |
CRAP | |
0.00% |
0 / 1 |
| ExtensionHandler | |
0.00% |
0 / 134 |
|
0.00% |
0 / 6 |
870 | |
0.00% |
0 / 1 |
| __construct | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
| normalizeExtOptions | |
0.00% |
0 / 7 |
|
0.00% |
0 / 1 |
6 | |||
| onExtension | |
0.00% |
0 / 69 |
|
0.00% |
0 / 1 |
210 | |||
| onDocumentFragment | |
0.00% |
0 / 51 |
|
0.00% |
0 / 1 |
72 | |||
| onTag | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
6 | |||
| stripAnnotations | |
0.00% |
0 / 5 |
|
0.00% |
0 / 1 |
6 | |||
| 1 | <?php |
| 2 | declare( strict_types = 1 ); |
| 3 | |
| 4 | namespace Wikimedia\Parsoid\Wt2Html\TT; |
| 5 | |
| 6 | use Wikimedia\Assert\Assert; |
| 7 | use Wikimedia\Assert\UnreachableException; |
| 8 | use Wikimedia\Parsoid\Config\SiteConfig; |
| 9 | use Wikimedia\Parsoid\DOM\DocumentFragment; |
| 10 | use Wikimedia\Parsoid\Ext\ExtensionError; |
| 11 | use Wikimedia\Parsoid\Ext\ExtensionTag; |
| 12 | use Wikimedia\Parsoid\Ext\ParsoidExtensionAPI; |
| 13 | use Wikimedia\Parsoid\NodeData\DataMw; |
| 14 | use Wikimedia\Parsoid\NodeData\DataMwError; |
| 15 | use Wikimedia\Parsoid\Tokens\Token; |
| 16 | use Wikimedia\Parsoid\Utils\DOMDataUtils; |
| 17 | use Wikimedia\Parsoid\Utils\DOMUtils; |
| 18 | use Wikimedia\Parsoid\Utils\PipelineUtils; |
| 19 | use Wikimedia\Parsoid\Utils\TokenUtils; |
| 20 | use Wikimedia\Parsoid\Utils\Utils; |
| 21 | use Wikimedia\Parsoid\Utils\WTUtils; |
| 22 | use Wikimedia\Parsoid\Wt2Html\TokenTransformManager; |
| 23 | |
| 24 | class ExtensionHandler extends TokenHandler { |
| 25 | |
| 26 | public function __construct( TokenTransformManager $manager, array $options ) { |
| 27 | parent::__construct( $manager, $options ); |
| 28 | } |
| 29 | |
| 30 | private static function normalizeExtOptions( array $options ): array { |
| 31 | // Mimics Sanitizer::decodeTagAttributes from the PHP parser |
| 32 | // |
| 33 | // Extension options should always be interpreted as plain text. The |
| 34 | // tokenizer parses them to tokens in case they are for an HTML tag, |
| 35 | // but here we use the text source instead. |
| 36 | $n = count( $options ); |
| 37 | for ( $i = 0; $i < $n; $i++ ) { |
| 38 | $o = $options[$i]; |
| 39 | // Use the source if present. If not use the value, but ensure it's a |
| 40 | // string, as it can be a token stream if the parser has recognized it |
| 41 | // as a directive. |
| 42 | $v = $o->vsrc ?? TokenUtils::tokensToString( $o->v, false, [ 'includeEntities' => true ] ); |
| 43 | // Normalize whitespace in extension attribute values |
| 44 | // FIXME: If the option is parsed as wikitext, this normalization |
| 45 | // can mess with src offsets. |
| 46 | $o->v = trim( preg_replace( '/[\t\r\n ]+/', ' ', $v ) ); |
| 47 | // Decode character references |
| 48 | $o->v = Utils::decodeWtEntities( $o->v ); |
| 49 | } |
| 50 | return $options; |
| 51 | } |
| 52 | |
| 53 | private function onExtension( Token $token ): TokenHandlerResult { |
| 54 | $env = $this->env; |
| 55 | $siteConfig = $env->getSiteConfig(); |
| 56 | $pageConfig = $env->getPageConfig(); |
| 57 | $extensionName = $token->getAttributeV( 'name' ); |
| 58 | $extConfig = $env->getSiteConfig()->getExtTagConfig( $extensionName ); |
| 59 | |
| 60 | $metrics = $siteConfig->metrics(); |
| 61 | if ( $metrics ) { |
| 62 | // Track uses of extensions |
| 63 | $wiki = $siteConfig->iwp(); |
| 64 | $ns = $env->getContextTitle()->getNamespace(); |
| 65 | if ( $ns === 0 ) { |
| 66 | // Article space |
| 67 | $nsName = 'main'; |
| 68 | } elseif ( $siteConfig->namespaceIsTalk( $ns ) ) { |
| 69 | // Any talk namespace |
| 70 | $nsName = 'talk'; |
| 71 | } else { |
| 72 | // Everything else |
| 73 | $nsName = "ns-$ns"; |
| 74 | } |
| 75 | $metrics->increment( "extension.{$wiki}.{$nsName}.{$extensionName}" ); |
| 76 | $siteConfig->incrementCounter( "extension_total", [ |
| 77 | "wiki" => $wiki, |
| 78 | "namespace" => $nsName, |
| 79 | "name" => $extensionName, |
| 80 | ] ); |
| 81 | } |
| 82 | |
| 83 | $nativeExt = $siteConfig->getExtTagImpl( $extensionName ); |
| 84 | $cachedExpansion = $env->extensionCache[$token->dataParsoid->src] ?? null; |
| 85 | |
| 86 | $options = $token->getAttributeV( 'options' ); |
| 87 | $token->setAttribute( 'options', self::normalizeExtOptions( $options ) ); |
| 88 | |
| 89 | // Call after normalizing extension options, since that can affect the result |
| 90 | $dataMw = Utils::getExtArgInfo( $token ); |
| 91 | |
| 92 | if ( $nativeExt !== null ) { |
| 93 | $extArgs = $token->getAttributeV( 'options' ); |
| 94 | $extApi = new ParsoidExtensionAPI( $env, [ |
| 95 | 'wt2html' => [ |
| 96 | 'frame' => $this->manager->getFrame(), |
| 97 | 'parseOpts' => $this->options, |
| 98 | 'extTag' => new ExtensionTag( $token ), |
| 99 | ], |
| 100 | ] ); |
| 101 | try { |
| 102 | $extSrc = $dataMw->body->extsrc ?? ''; |
| 103 | if ( !( $extConfig['options']['hasWikitextInput'] ?? true ) ) { |
| 104 | $extSrc = $this->stripAnnotations( $extSrc, $env->getSiteConfig() ); |
| 105 | } |
| 106 | $domFragment = $nativeExt->sourceToDom( |
| 107 | $extApi, $extSrc ?? '', $extArgs |
| 108 | ); |
| 109 | $errors = $extApi->getErrors(); |
| 110 | if ( $extConfig['options']['wt2html']['customizesDataMw'] ?? false ) { |
| 111 | $firstNode = $domFragment->firstChild; |
| 112 | DOMUtils::assertElt( $firstNode ); |
| 113 | $dataMw = DOMDataUtils::getDataMw( $firstNode ); |
| 114 | } |
| 115 | } catch ( ExtensionError $e ) { |
| 116 | $domFragment = WTUtils::createInterfaceI18nFragment( |
| 117 | $env->topLevelDoc, $e->err->key, $e->err->params ?: null |
| 118 | ); |
| 119 | $errors = [ $e->err ]; |
| 120 | // FIXME: Should we include any errors collected |
| 121 | // from $extApi->getErrors() here? Also, what's the correct $dataMw |
| 122 | // to apply in this case? |
| 123 | } |
| 124 | if ( $domFragment !== false ) { |
| 125 | if ( $domFragment !== null ) { |
| 126 | // Turn this document fragment into a token |
| 127 | $toks = $this->onDocumentFragment( |
| 128 | $token, $domFragment, $dataMw, $errors |
| 129 | ); |
| 130 | return new TokenHandlerResult( $toks ); |
| 131 | } else { |
| 132 | // The extension dropped this instance completely (!!) |
| 133 | // Should be a rarity and presumably the extension |
| 134 | // knows what it is doing. Ex: nested refs are dropped |
| 135 | // in some scenarios. |
| 136 | return new TokenHandlerResult( [] ); |
| 137 | } |
| 138 | } |
| 139 | // Fall through: this extension is electing not to use |
| 140 | // a custom sourceToDom method (by returning false from |
| 141 | // sourceToDom). |
| 142 | } |
| 143 | |
| 144 | if ( $cachedExpansion ) { |
| 145 | // WARNING: THIS HAS BEEN UNUSED SINCE 2015, SEE T98995. |
| 146 | // THIS CODE WAS WRITTEN BUT APPARENTLY NEVER TESTED. |
| 147 | // NO WARRANTY. MAY HALT AND CATCH ON FIRE. |
| 148 | throw new UnreachableException( 'Should not be here!' ); |
| 149 | /* |
| 150 | $toks = PipelineUtils::encapsulateExpansionHTML( |
| 151 | $env, $token, $cachedExpansion, [ 'fromCache' => true ] |
| 152 | ); |
| 153 | */ |
| 154 | } else { |
| 155 | $start = microtime( true ); |
| 156 | $domFragment = PipelineUtils::fetchHTML( $env, $token->getAttributeV( 'source' ) ); |
| 157 | if ( $env->profiling() ) { |
| 158 | $profile = $env->getCurrentProfile(); |
| 159 | $profile->bumpMWTime( "Extension", 1000 * ( microtime( true ) - $start ), "api" ); |
| 160 | $profile->bumpCount( "Extension" ); |
| 161 | } |
| 162 | if ( !$domFragment ) { |
| 163 | $domFragment = DOMUtils::parseHTMLToFragment( $env->topLevelDoc, '' ); |
| 164 | } |
| 165 | $toks = $this->onDocumentFragment( $token, $domFragment, $dataMw, [] ); |
| 166 | } |
| 167 | return new TokenHandlerResult( $toks ); |
| 168 | } |
| 169 | |
| 170 | /** |
| 171 | * DOMFragment-based encapsulation |
| 172 | * |
| 173 | * @param Token $extToken |
| 174 | * @param DocumentFragment $domFragment |
| 175 | * @param DataMw $dataMw |
| 176 | * @param list<DataMwError> $errors |
| 177 | * @return array |
| 178 | */ |
| 179 | private function onDocumentFragment( |
| 180 | Token $extToken, DocumentFragment $domFragment, DataMw $dataMw, |
| 181 | array $errors |
| 182 | ): array { |
| 183 | $env = $this->env; |
| 184 | $extensionName = $extToken->getAttributeV( 'name' ); |
| 185 | |
| 186 | if ( $env->hasDumpFlag( 'extoutput' ) ) { |
| 187 | $logger = $env->getSiteConfig()->getLogger(); |
| 188 | $logger->warning( str_repeat( '=', 80 ) ); |
| 189 | $logger->warning( |
| 190 | 'EXTENSION INPUT: ' . $extToken->getAttributeV( 'source' ) |
| 191 | ); |
| 192 | $logger->warning( str_repeat( '=', 80 ) ); |
| 193 | $logger->warning( "EXTENSION OUTPUT:\n" ); |
| 194 | $logger->warning( |
| 195 | DOMUtils::getFragmentInnerHTML( $domFragment ) |
| 196 | ); |
| 197 | $logger->warning( str_repeat( '-', 80 ) ); |
| 198 | } |
| 199 | |
| 200 | $opts = [ |
| 201 | 'setDSR' => true, |
| 202 | 'wrapperName' => $extensionName, |
| 203 | ]; |
| 204 | |
| 205 | // Check if the tag wants its DOM fragment not to be unpacked. |
| 206 | // The default setting is to unpack the content DOM fragment automatically. |
| 207 | $extConfig = $env->getSiteConfig()->getExtTagConfig( $extensionName ); |
| 208 | if ( isset( $extConfig['options']['wt2html'] ) ) { |
| 209 | $opts += $extConfig['options']['wt2html']; |
| 210 | } |
| 211 | |
| 212 | // This special case is only because, from the beginning, Parsoid has |
| 213 | // treated <nowiki>s as core functionality with lean markup (no about, |
| 214 | // no data-mw, custom typeof). |
| 215 | // |
| 216 | // We'll keep this hardcoded to avoid exposing the functionality to |
| 217 | // other native extensions until it's needed. |
| 218 | if ( $extensionName !== 'nowiki' ) { |
| 219 | if ( !$domFragment->hasChildNodes() ) { |
| 220 | // RT extensions expanding to nothing. |
| 221 | $domFragment->appendChild( |
| 222 | $domFragment->ownerDocument->createElement( 'link' ) |
| 223 | ); |
| 224 | } |
| 225 | |
| 226 | // Wrap the top-level nodes so that we have a firstNode element |
| 227 | // to annotate with the typeof and to apply about ids. |
| 228 | PipelineUtils::addSpanWrappers( $domFragment->childNodes ); |
| 229 | |
| 230 | // Now get the firstNode |
| 231 | $firstNode = $domFragment->firstChild; |
| 232 | |
| 233 | DOMUtils::assertElt( $firstNode ); |
| 234 | |
| 235 | // Adds the wrapper attributes to the first element |
| 236 | DOMUtils::addTypeOf( $firstNode, "mw:Extension/{$extensionName}" ); |
| 237 | |
| 238 | // FIXME: What happens if $firstNode is template generated, since |
| 239 | // they have higher precedence? These questions and more in T214241 |
| 240 | Assert::invariant( |
| 241 | !DOMUtils::hasTypeOf( $firstNode, 'mw:Transclusion' ), |
| 242 | 'First node of extension content is transcluded.' |
| 243 | ); |
| 244 | |
| 245 | if ( count( $errors ) > 0 ) { |
| 246 | DOMUtils::addTypeOf( $firstNode, 'mw:Error' ); |
| 247 | $dataMw->errors = is_array( $dataMw->errors ?? null ) ? |
| 248 | array_merge( $dataMw->errors, $errors ) : $errors; |
| 249 | } |
| 250 | |
| 251 | // Set data-mw |
| 252 | // FIXME: Similar to T214241, we're clobbering $firstNode |
| 253 | DOMDataUtils::setDataMw( $firstNode, $dataMw ); |
| 254 | |
| 255 | // Add about to all wrapper tokens. |
| 256 | $about = $env->newAboutId(); |
| 257 | $n = $firstNode; |
| 258 | while ( $n ) { |
| 259 | $n->setAttribute( 'about', $about ); |
| 260 | $n = $n->nextSibling; |
| 261 | } |
| 262 | |
| 263 | // Update data-parsoid |
| 264 | $dp = DOMDataUtils::getDataParsoid( $firstNode ); |
| 265 | $dp->tsr = clone $extToken->dataParsoid->tsr; |
| 266 | $dp->src = $extToken->dataParsoid->src; |
| 267 | DOMDataUtils::setDataParsoid( $firstNode, $dp ); |
| 268 | } |
| 269 | |
| 270 | return PipelineUtils::tunnelDOMThroughTokens( |
| 271 | $env, $extToken, $domFragment, $opts |
| 272 | ); |
| 273 | } |
| 274 | |
| 275 | /** |
| 276 | * @inheritDoc |
| 277 | */ |
| 278 | public function onTag( Token $token ): ?TokenHandlerResult { |
| 279 | return $token->getName() === 'extension' ? $this->onExtension( $token ) : null; |
| 280 | } |
| 281 | |
| 282 | private function stripAnnotations( string $s, SiteConfig $siteConfig ): string { |
| 283 | $annotationStrippers = $siteConfig->getAnnotationStrippers(); |
| 284 | |
| 285 | $res = $s; |
| 286 | foreach ( $annotationStrippers as $annotationStripper ) { |
| 287 | $res = $annotationStripper->stripAnnotations( $s ); |
| 288 | } |
| 289 | return $res; |
| 290 | } |
| 291 | } |