Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
| Total | |
0.00% |
0 / 272 |
|
0.00% |
0 / 19 |
CRAP | |
0.00% |
0 / 1 |
| PipelineUtils | |
0.00% |
0 / 272 |
|
0.00% |
0 / 19 |
6642 | |
0.00% |
0 / 1 |
| pFragmentToParsoidFragmentMarkers | |
0.00% |
0 / 10 |
|
0.00% |
0 / 1 |
6 | |||
| getDOMFragmentToken | |
0.00% |
0 / 7 |
|
0.00% |
0 / 1 |
12 | |||
| processContentInPipeline | |
0.00% |
0 / 12 |
|
0.00% |
0 / 1 |
2 | |||
| dumpTplSrc | |
0.00% |
0 / 14 |
|
0.00% |
0 / 1 |
12 | |||
| preparePFragment | |
0.00% |
0 / 12 |
|
0.00% |
0 / 1 |
6 | |||
| processTemplateSource | |
0.00% |
0 / 20 |
|
0.00% |
0 / 1 |
6 | |||
| expandAttrValueToDOM | |
0.00% |
0 / 21 |
|
0.00% |
0 / 1 |
6 | |||
| expandAttrValuesToDOM | |
0.00% |
0 / 4 |
|
0.00% |
0 / 1 |
6 | |||
| domAttrsToTagAttrs | |
0.00% |
0 / 10 |
|
0.00% |
0 / 1 |
20 | |||
| convertDOMtoTokens | |
0.00% |
0 / 24 |
|
0.00% |
0 / 1 |
56 | |||
| getWrapperTokens | |
0.00% |
0 / 49 |
|
0.00% |
0 / 1 |
506 | |||
| encapsulateExpansionHTML | |
0.00% |
0 / 20 |
|
0.00% |
0 / 1 |
42 | |||
| wrapAccum | |
0.00% |
0 / 9 |
|
0.00% |
0 / 1 |
6 | |||
| addSpanWrappers | |
0.00% |
0 / 19 |
|
0.00% |
0 / 1 |
110 | |||
| tunnelDOMThroughTokens | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
2 | |||
| makeExpansion | |
0.00% |
0 / 3 |
|
0.00% |
0 / 1 |
2 | |||
| doExtractExpansions | |
0.00% |
0 / 23 |
|
0.00% |
0 / 1 |
72 | |||
| extractExpansions | |
0.00% |
0 / 7 |
|
0.00% |
0 / 1 |
2 | |||
| parseToHTML | |
0.00% |
0 / 6 |
|
0.00% |
0 / 1 |
6 | |||
| 1 | <?php |
| 2 | declare( strict_types = 1 ); |
| 3 | |
| 4 | namespace Wikimedia\Parsoid\Utils; |
| 5 | |
| 6 | use Wikimedia\Assert\Assert; |
| 7 | use Wikimedia\Assert\UnreachableException; |
| 8 | use Wikimedia\Parsoid\Config\Env; |
| 9 | use Wikimedia\Parsoid\DOM\Comment; |
| 10 | use Wikimedia\Parsoid\DOM\Document; |
| 11 | use Wikimedia\Parsoid\DOM\DocumentFragment; |
| 12 | use Wikimedia\Parsoid\DOM\Element; |
| 13 | use Wikimedia\Parsoid\DOM\Node; |
| 14 | use Wikimedia\Parsoid\DOM\NodeList; |
| 15 | use Wikimedia\Parsoid\DOM\Text; |
| 16 | use Wikimedia\Parsoid\Fragments\PFragment; |
| 17 | use Wikimedia\Parsoid\Fragments\WikitextPFragment; |
| 18 | use Wikimedia\Parsoid\NodeData\DataMw; |
| 19 | use Wikimedia\Parsoid\NodeData\DataParsoid; |
| 20 | use Wikimedia\Parsoid\NodeData\TempData; |
| 21 | use Wikimedia\Parsoid\Tokens\CommentTk; |
| 22 | use Wikimedia\Parsoid\Tokens\EndTagTk; |
| 23 | use Wikimedia\Parsoid\Tokens\EOFTk; |
| 24 | use Wikimedia\Parsoid\Tokens\KV; |
| 25 | use Wikimedia\Parsoid\Tokens\SelfclosingTagTk; |
| 26 | use Wikimedia\Parsoid\Tokens\SourceRange; |
| 27 | use Wikimedia\Parsoid\Tokens\TagTk; |
| 28 | use Wikimedia\Parsoid\Tokens\Token; |
| 29 | use Wikimedia\Parsoid\Wt2Html\Frame; |
| 30 | |
| 31 | /** |
| 32 | * This file contains parsing pipeline related utilities. |
| 33 | */ |
| 34 | class PipelineUtils { |
| 35 | // keep in sync with internal_strip_marker in Grammar.pegphp |
| 36 | public const PARSOID_FRAGMENT_PREFIX = '{{#parsoid\0fragment:'; |
| 37 | |
| 38 | /** |
| 39 | * Returns a wikitext string with embedded parsoid fragment markers, |
| 40 | * as well as a mapping from the marker IDs to PFragment objects. |
| 41 | * @return array{0:string,1:array<string,PFragment>} A array consisting of |
| 42 | * the wikitext string, followed by the id-to-PFragment map. |
| 43 | */ |
| 44 | public static function pFragmentToParsoidFragmentMarkers( PFragment $fragment ): array { |
| 45 | static $counter = 0; |
| 46 | $pieces = WikitextPFragment::castFromPFragment( $fragment )->split(); |
| 47 | $result = [ $pieces[0] ]; |
| 48 | $map = []; |
| 49 | for ( $i = 1; $i < count( $pieces ); $i += 2 ) { |
| 50 | $marker = self::PARSOID_FRAGMENT_PREFIX . ( $counter++ ) . '}}'; |
| 51 | $map[$marker] = $pieces[$i]; |
| 52 | $result[] = $marker; |
| 53 | $result[] = $pieces[$i + 1]; |
| 54 | } |
| 55 | return [ implode( '', $result ), $map ]; |
| 56 | } |
| 57 | |
| 58 | /** |
| 59 | * Creates a dom-fragment-token for processing 'content' (an array of tokens) |
| 60 | * in its own subpipeline all the way to DOM. These tokens will be processed |
| 61 | * by their own handler (DOMFragmentBuilder) in the last stage of the async |
| 62 | * pipeline. |
| 63 | * |
| 64 | * srcOffsets should always be provided to process top-level page content in a |
| 65 | * subpipeline. Without it, DSR computation and template wrapping cannot be done |
| 66 | * in the subpipeline. While unpackDOMFragment can do this on unwrapping, that can |
| 67 | * be a bit fragile and makes dom-fragments a leaky abstraction by leaking subpipeline |
| 68 | * processing into the top-level pipeline. |
| 69 | * |
| 70 | * @param string|Token|array<Token|string> $content The array of tokens to process. |
| 71 | * @param SourceRange $srcOffsets Wikitext source offsets (start/end) of these tokens. |
| 72 | * @param array $opts Parsing options. |
| 73 | * - Token token The token that generated the content. |
| 74 | * - bool inlineContext Is this DOM fragment used in an inline context? |
| 75 | * @return SelfclosingTagTk |
| 76 | */ |
| 77 | public static function getDOMFragmentToken( |
| 78 | $content, SourceRange $srcOffsets, array $opts = [] |
| 79 | ): SelfclosingTagTk { |
| 80 | $token = $opts['token']; |
| 81 | return new SelfclosingTagTk( 'mw:dom-fragment-token', [ |
| 82 | new KV( 'contextTok', $token, $token->dataParsoid->tsr->expandTsrV() ), |
| 83 | new KV( 'content', $content, $srcOffsets->expandTsrV() ), |
| 84 | new KV( 'inlineContext', ( $opts['inlineContext'] ?? false ) ? "1" : "0" ), |
| 85 | new KV( 'inPHPBlock', ( $opts['inPHPBlock'] ?? false ) ? "1" : "0" ), |
| 86 | ] ); |
| 87 | } |
| 88 | |
| 89 | /** |
| 90 | * Processes content (wikitext, array of tokens, whatever) in its own |
| 91 | * pipeline based on options. |
| 92 | * |
| 93 | * @param Env $env The environment/context for the expansion. |
| 94 | * @param Frame $frame |
| 95 | * The parent frame within which the expansion is taking place. |
| 96 | * Used for template expansion and source text tracking. |
| 97 | * @param string|Token|array<Token|string>|DocumentFragment|PFragment $content |
| 98 | * How this content is processed depends on what kind of pipeline |
| 99 | * is constructed specified by opts. |
| 100 | * @param array $opts |
| 101 | * Processing options that specify pipeline-type, opts, and callbacks. |
| 102 | * - string pipelineType |
| 103 | * - array pipelineOpts |
| 104 | * - array tplArgs - if set, defines parameters for the child frame |
| 105 | * - string tplArgs['name'] |
| 106 | * - KV[] tplArgs['attribs'] |
| 107 | * - string srcText - if set, defines the source text for the expansion |
| 108 | * - SourceRange srcOffsets - if set, defines the range within the |
| 109 | * source text that $content corresponds to |
| 110 | * - bool sol Whether tokens should be processed in start-of-line context. |
| 111 | * - bool toplevel Whether the pipeline is considered atTopLevel |
| 112 | * @return array<Token|string>|DocumentFragment (depending on pipeline type) |
| 113 | */ |
| 114 | public static function processContentInPipeline( |
| 115 | Env $env, Frame $frame, $content, array $opts |
| 116 | ) { |
| 117 | // Build a pipeline |
| 118 | $pipeline = $env->getPipelineFactory()->getPipeline( |
| 119 | $opts['pipelineType'], |
| 120 | $opts['pipelineOpts'] |
| 121 | ); |
| 122 | |
| 123 | $pipeline->init( [ |
| 124 | // NOTE: some pipelines force toplevel to true |
| 125 | 'toplevel' => $opts['toplevel'] ?? false, |
| 126 | 'frame' => $frame, |
| 127 | 'tplArgs' => $opts['tplArgs'] ?? null, |
| 128 | 'srcText' => $opts['srcText'] ?? $frame->getSrcText(), |
| 129 | 'srcOffsets' => $opts['srcOffsets'] ?? null, |
| 130 | ] ); |
| 131 | |
| 132 | // Off the starting block ... ready, set, go! |
| 133 | return $pipeline->parse( $content, [ 'sol' => $opts['sol'] ] ); |
| 134 | } |
| 135 | |
| 136 | /** |
| 137 | * Dump template source if '--dump tplsrc' flag was set |
| 138 | */ |
| 139 | public static function dumpTplSrc( |
| 140 | Env $env, Token $token, string $templateName, string $src, |
| 141 | bool $fragmentMode = false |
| 142 | ): void { |
| 143 | $codec = DOMDataUtils::getCodec( $env->getTopLevelDoc() ); |
| 144 | $dump = str_repeat( '=', 28 ) . " template source " . ( $fragmentMode ? '(FRAGMENT)' : '' ) . |
| 145 | str_repeat( '=', 28 ) . "\n"; |
| 146 | $dp = $codec->toJsonArray( $token->dataParsoid, DataParsoid::class ); |
| 147 | $dump .= 'TEMPLATE:' . $templateName . 'TRANSCLUSION:' . |
| 148 | PHPUtils::jsonEncode( $dp['src'] ) . "\n"; |
| 149 | $dump .= str_repeat( '-', 80 ) . "\n"; |
| 150 | $dump .= $src . "\n"; |
| 151 | $pfragMapStr = $env->pFragmentMapToString(); |
| 152 | if ( $pfragMapStr ) { |
| 153 | $dump .= "----- P-FRAGMENT MAP -----\n"; |
| 154 | $dump .= $pfragMapStr; |
| 155 | } |
| 156 | $dump .= str_repeat( '-', 80 ) . "\n"; |
| 157 | $env->writeDump( $dump ); |
| 158 | } |
| 159 | |
| 160 | /** |
| 161 | * Prepare a PFragment for our parsing pipeline: split the fragment, |
| 162 | * convert it to embedded fragment markers, and add those markers to |
| 163 | * the pfragment map in the env. |
| 164 | * @param Env $env |
| 165 | * @param Frame $frame |
| 166 | * @param PFragment $pFragment |
| 167 | * @param array $opts |
| 168 | * @return array{frame:Frame,wikitext:string,srcOffsets:?SourceRange} |
| 169 | */ |
| 170 | public static function preparePFragment( |
| 171 | Env $env, |
| 172 | Frame $frame, |
| 173 | PFragment $pFragment, |
| 174 | array $opts |
| 175 | ): array { |
| 176 | [ $wikitext, $pFragmentMap ] = |
| 177 | self::pFragmentToParsoidFragmentMarkers( $pFragment ); |
| 178 | // FUTURE WORK: Fragment should probably contain a Frame pointer as |
| 179 | // well, since srcOffsets are only meaningful in relation to a specific |
| 180 | // Frame::$srcText. When that happens, we should assign an appropriate |
| 181 | // $frame here. |
| 182 | $srcOffsets = $pFragment->getSrcOffsets() ?? $opts['srcOffsets'] ?? null; |
| 183 | if ( !empty( $opts['processInNewFrame'] ) ) { |
| 184 | $frame = $frame->newChild( $frame->getTitle(), [], $wikitext ); |
| 185 | $srcOffsets = new SourceRange( 0, strlen( $wikitext ) ); |
| 186 | } |
| 187 | $env->addToPFragmentMap( $pFragmentMap ); |
| 188 | return [ |
| 189 | 'frame' => $frame, |
| 190 | 'wikitext' => $wikitext, |
| 191 | 'srcOffsets' => $srcOffsets, |
| 192 | ]; |
| 193 | } |
| 194 | |
| 195 | public static function processTemplateSource( |
| 196 | Env $env, Frame $frame, Token $token, ?array $tplArgs, |
| 197 | string $src, array $opts = [] |
| 198 | ): array { |
| 199 | if ( $src === '' ) { |
| 200 | return []; |
| 201 | } |
| 202 | |
| 203 | // Get a nested transformation pipeline for the wikitext that takes |
| 204 | // us through stages 1-2, with the appropriate pipeline options set. |
| 205 | // |
| 206 | // Simply returning the tokenized source here (which may be correct |
| 207 | // when using the legacy preprocessor because we don't expect to |
| 208 | // tokenize any templates or include directives so skipping those |
| 209 | // handlers should be ok) won't work since the options for the pipeline |
| 210 | // we're in probably aren't what we want. |
| 211 | $toks = self::processContentInPipeline( |
| 212 | $env, |
| 213 | $frame, |
| 214 | $src, |
| 215 | [ |
| 216 | 'pipelineType' => 'wikitext-to-expanded-tokens', |
| 217 | 'pipelineOpts' => [ |
| 218 | 'inTemplate' => true, |
| 219 | // FIXME: In reality, this is broken for parser tests where |
| 220 | // we expand templates natively. We do want all nested templates |
| 221 | // to be expanded. But, setting this to !usePHPPreProcessor seems |
| 222 | // to break a number of tests. Not pursuing this line of enquiry |
| 223 | // for now since this parserTests vs production distinction will |
| 224 | // disappear with parser integration. We'll just bear the stench |
| 225 | // till that time. |
| 226 | // |
| 227 | // NOTE: No expansion required for nested templates. |
| 228 | 'expandTemplates' => $opts['expandTemplates'] ?? false, |
| 229 | 'extTag' => $opts['extTag'] ?? null, |
| 230 | ], |
| 231 | 'srcText' => $src, |
| 232 | 'srcOffsets' => new SourceRange( 0, strlen( $src ) ), |
| 233 | 'tplArgs' => $tplArgs, |
| 234 | // HEADS UP: You might be wondering why we are forcing "sol" => true without |
| 235 | // using information about whether the transclusion is used in a SOL context. |
| 236 | // |
| 237 | // Ex: "foo {{1x|*bar}}" Here, "*bar" is not in SOL context relative to the |
| 238 | // top-level page and so, should it be actually be parsed as a list item? |
| 239 | // |
| 240 | // So, there is a use-case where one could argue that the sol value here |
| 241 | // should be conditioned on the page-level context where "{{1x|*bar}}" showed |
| 242 | // up. So, in this example "foo {{1x|*bar}}, sol would be false and in this |
| 243 | // example "foo\n{{1x|*bar}}", sol would be true. That is effectively how |
| 244 | // the legacy parser behaves. (Ignore T2529 for the moment.) |
| 245 | // |
| 246 | // But, Parsoid is a different beast. Since the Parsoid/JS days, templates |
| 247 | // have been processed asynchronously. So, {{1x|*bar}} would be expanded and |
| 248 | // tokenized before even its preceding context might have been processed. |
| 249 | // From the start, Parsoid has aimed to decouple the processing of fragment |
| 250 | // generators (be it templates, extensions, or something else) from the |
| 251 | // processing of the page they are embedded in. This has been the |
| 252 | // starting point of many a wikitext 2.0 proposal on mediawiki.org; |
| 253 | // see also [[mw:Parsing/Notes/Wikitext_2.0#Implications_of_this_model]]. |
| 254 | // |
| 255 | // The main performance implication is that you can process a transclusion |
| 256 | // concurrently *and* cache the output of {{1x|*bar}} since its output is |
| 257 | // the same no matter where on the page it appears. Without this decoupled |
| 258 | // model, if you got "{{mystery-template-that-takes-30-secs}}{{1x|*bar}}" |
| 259 | // you have to wait 30 secs before you get to expand {{1x|*bar}} |
| 260 | // because you have to wait and see whether the mystery template will |
| 261 | // leave you in SOL state or non-SOL state. |
| 262 | // |
| 263 | // In a stroke of good luck, wikitext editors seem to have agreed |
| 264 | // that it is better for all templates to be expanded in a |
| 265 | // consistent SOL state and not be dependent on their context; |
| 266 | // turn now to phab task T2529 which (via a fragile hack) tried |
| 267 | // to ensure that every template which started with |
| 268 | // start-of-line-sensitive markup was evaluated in a |
| 269 | // start-of-line context (by hackily inserting a newline). Not |
| 270 | // everyone was satisfied with this hack (see T14974), but it's |
| 271 | // been the way things work for over a decade now (as evidenced |
| 272 | // by T14974 never having been "fixed"). |
| 273 | // |
| 274 | // So, while we've established we would prefer *not* to use page |
| 275 | // context to set the initial SOL value for tokenizing the |
| 276 | // template, what *should* the initial SOL value be? |
| 277 | // |
| 278 | // * Treat every transclusion as a fresh document starting in SOL |
| 279 | // state, ie set "sol" => true always. This is supported by |
| 280 | // most current wiki use, and is the intent behind the original |
| 281 | // T2529 hack (although that hack left a number of edge cases, |
| 282 | // described below). |
| 283 | // |
| 284 | // * Use `"sol" => false` for templates -- this was the solution |
| 285 | // rejected by the original T2529 as being contrary to editor |
| 286 | // expectations. |
| 287 | // |
| 288 | // * In the future, one might allow the template itself to |
| 289 | // specify that its initial SOL state should be, using a |
| 290 | // mechanism similar to what might be necessary for typed |
| 291 | // templates. This could also address T14974. This is not |
| 292 | // excluded by Parsoid at this point; but it would probably be |
| 293 | // signaled by a template "return type" which is *not* DOM |
| 294 | // therefore the template wouldn't get parsed "as wikitext" |
| 295 | // (ie, T14974 wants an "attribute-value" return type which is |
| 296 | // a plain string, and some of the wikitext 2.0 proposals |
| 297 | // anticipate a "attribute name/value" dictionary as a possible |
| 298 | // return type). |
| 299 | // |
| 300 | // In support of using sol=>true as the default initial state, |
| 301 | // let's examine the sol-sensitive wikitext constructs, and |
| 302 | // implicitly the corner cases left open by the T2529 hack. (For |
| 303 | // non-sol-sensitive constructs, the initial SOL state is |
| 304 | // irrelevant.) |
| 305 | // |
| 306 | // - SOL-sensitive contructs include lists, headings, indent-pre, |
| 307 | // and table syntax. |
| 308 | // - Of these, only lists, headings, and table syntax are actually handled in |
| 309 | // the PEG tokenizer and are impacted by SOL state. |
| 310 | // - Indent-Pre has its own handler that operates in a full page token context |
| 311 | // and isn't impacted. |
| 312 | // - T2529 effectively means for *#:; (lists) and {| (table start), newlines |
| 313 | // are added which means no matter what value we set here, they will get |
| 314 | // processed in sol state. |
| 315 | // - This leaves us with headings (=), table heading (!), table row (|), and |
| 316 | // table close (|}) syntax that would be impacted by what we set here. |
| 317 | // - Given that table row/heading/close templates are very very common on wikis |
| 318 | // and used for constructing complex tables, sol => true will let us handle |
| 319 | // those without hacks. We aren't fully off the hook there -- see the code |
| 320 | // in TokenStreamPatcher, AttributeExpander, TableFixups that all exist to |
| 321 | // to work around the fact that decoupled processing isn't the wikitext |
| 322 | // default. But, without sol => true, we'll likely be in deeper trouble. |
| 323 | // - But, this can cause some occasional bad parses where "=|!" aren't meant |
| 324 | // to be processed as a sol-wikitext construct. |
| 325 | // - Note also that the workaround for T14974 (ie, the T2529 hack applying |
| 326 | // where sol=false is actually desired) has traditionally been to add an |
| 327 | // initial <nowiki/> which ensures that the "T2529 characters" are not |
| 328 | // initial. There are a number of alternative mechanisms to accomplish |
| 329 | // this (ie, HTML-encode the first character). |
| 330 | // |
| 331 | // To honor the spirit of T2529 it seems plausible to try to lint |
| 332 | // away the remaining corner cases where T2529 does *not* result |
| 333 | // in start-of-line state for template expansion, and to use the |
| 334 | // various workarounds for compatibility in the meantime. |
| 335 | // |
| 336 | // We should also pick *one* of the workarounds for T14974 |
| 337 | // (probably `<nowiki/>` at the first position in the template), |
| 338 | // support that (until a better mechanism exists), and (if |
| 339 | // possible) lint away any others. |
| 340 | 'sol' => true |
| 341 | ] |
| 342 | ); |
| 343 | return $toks; |
| 344 | } |
| 345 | |
| 346 | /** |
| 347 | * Expands value all the way to DOM. |
| 348 | * |
| 349 | * @param Env $env |
| 350 | * The environment/context for the expansion. |
| 351 | * @param Frame $frame |
| 352 | * The parent frame within which the expansion is taking place. |
| 353 | * Used for template expansion and source text tracking. |
| 354 | * @param array $v |
| 355 | * The value to process. |
| 356 | * The value is expected to be an associative array with a "html" property. |
| 357 | * The html property is expanded to DOM only if it is an array (of tokens). |
| 358 | * Non-arrays are passed back unexpanded. |
| 359 | * @param bool $expandTemplates |
| 360 | * Should any templates encountered here be expanded |
| 361 | * (usually false for nested templates since they are never directly editable). |
| 362 | * @param bool $inTemplate |
| 363 | * Unexpanded templates can occur in the content of extension tags. |
| 364 | * @return array |
| 365 | */ |
| 366 | public static function expandAttrValueToDOM( |
| 367 | Env $env, Frame $frame, array $v, bool $expandTemplates, bool $inTemplate |
| 368 | ): array { |
| 369 | if ( is_array( $v['html'] ?? null ) ) { |
| 370 | // Set up pipeline options |
| 371 | $opts = [ |
| 372 | 'pipelineType' => 'expanded-tokens-to-fragment', |
| 373 | 'pipelineOpts' => [ |
| 374 | 'attrExpansion' => true, |
| 375 | 'inlineContext' => true, |
| 376 | 'expandTemplates' => $expandTemplates, |
| 377 | 'inTemplate' => $inTemplate |
| 378 | ], |
| 379 | 'srcOffsets' => $v['srcOffsets'], |
| 380 | 'sol' => true |
| 381 | ]; |
| 382 | $content = array_merge( $v['html'], [ new EOFTk() ] ); |
| 383 | $domFragment = self::processContentInPipeline( |
| 384 | $env, $frame, $content, $opts |
| 385 | ); |
| 386 | // Since we aren't at the top level, data attrs |
| 387 | // were not applied in cleanup. However, tmp |
| 388 | // was stripped. |
| 389 | $v['html'] = ContentUtils::ppToXML( |
| 390 | $domFragment, [ 'innerXML' => true, 'fragment' => true ] |
| 391 | ); |
| 392 | } |
| 393 | // Remove srcOffsets after value is expanded, so they don't show |
| 394 | // up in the output data-mw attribute |
| 395 | unset( $v['srcOffsets'] ); |
| 396 | return $v; |
| 397 | } |
| 398 | |
| 399 | /** |
| 400 | * @param Env $env |
| 401 | * The environment/context for the expansion. |
| 402 | * @param Frame $frame |
| 403 | * The parent frame within which the expansion is taking place. |
| 404 | * Used for template expansion and source text tracking. |
| 405 | * @param array $vals |
| 406 | * Array of values to expand. |
| 407 | * Non-array elements of $vals are passed back unmodified. |
| 408 | * If an array element, it is expected to be an associative array with a "html" property. |
| 409 | * The html property is expanded to DOM only if it is an array (of tokens). |
| 410 | * @param bool $expandTemplates |
| 411 | * Should any templates encountered here be expanded |
| 412 | * (usually false for nested templates since they are never directly editable). |
| 413 | * @param bool $inTemplate |
| 414 | * Unexpanded templates can occur in the content of extension tags. |
| 415 | * @return array |
| 416 | */ |
| 417 | public static function expandAttrValuesToDOM( |
| 418 | Env $env, $frame, array $vals, bool $expandTemplates, bool $inTemplate |
| 419 | ): array { |
| 420 | $ret = []; |
| 421 | foreach ( $vals as $v ) { |
| 422 | $ret[] = self::expandAttrValueToDOM( $env, $frame, $v, $expandTemplates, $inTemplate ); |
| 423 | } |
| 424 | return $ret; |
| 425 | } |
| 426 | |
| 427 | /** |
| 428 | * Convert a DOM node to a token. The node comes from a DOM whose data attributes |
| 429 | * are stored outside the DOM. |
| 430 | * |
| 431 | * @param Element $node |
| 432 | * @param array<string,string> $attrs |
| 433 | * @return array{attrs:KV[],dataParsoid:?DataParsoid,dataMw:?DataMw} |
| 434 | */ |
| 435 | private static function domAttrsToTagAttrs( Element $node, array $attrs ): array { |
| 436 | $out = []; |
| 437 | foreach ( $attrs as $name => $value ) { |
| 438 | if ( $name !== DOMDataUtils::DATA_OBJECT_ATTR_NAME ) { |
| 439 | $out[] = new KV( $name, $value ); |
| 440 | } |
| 441 | } |
| 442 | $dmw = DOMDataUtils::getDataMw( $node ); |
| 443 | return [ |
| 444 | 'attrs' => $out, |
| 445 | 'dataParsoid' => DOMDataUtils::getDataParsoid( $node ), |
| 446 | 'dataMw' => $dmw->isEmpty() ? null : $dmw, |
| 447 | ]; |
| 448 | } |
| 449 | |
| 450 | /** |
| 451 | * Convert a DOM to tokens. Data attributes for nodes are stored outside the DOM. |
| 452 | * |
| 453 | * @param Node $node The root of the DOM tree to convert to tokens |
| 454 | * @param array<Token|string> $tokBuf This is where the tokens get stored |
| 455 | * @return array |
| 456 | */ |
| 457 | private static function convertDOMtoTokens( Node $node, array $tokBuf ): array { |
| 458 | if ( $node instanceof Element ) { |
| 459 | $nodeName = DOMCompat::nodeName( $node ); |
| 460 | $attrInfo = self::domAttrsToTagAttrs( $node, DOMUtils::attributes( $node ) ); |
| 461 | |
| 462 | if ( Utils::isVoidElement( $nodeName ) ) { |
| 463 | $tokBuf[] = new SelfclosingTagTk( |
| 464 | $nodeName, $attrInfo['attrs'], |
| 465 | $attrInfo['dataParsoid'], $attrInfo['dataMw'] |
| 466 | ); |
| 467 | } else { |
| 468 | $tokBuf[] = new TagTk( |
| 469 | $nodeName, $attrInfo['attrs'], |
| 470 | $attrInfo['dataParsoid'], $attrInfo['dataMw'] |
| 471 | ); |
| 472 | for ( $child = $node->firstChild; $child; $child = $child->nextSibling ) { |
| 473 | $tokBuf = self::convertDOMtoTokens( $child, $tokBuf ); |
| 474 | } |
| 475 | $endTag = new EndTagTk( $nodeName ); |
| 476 | // Keep stx parity |
| 477 | if ( WTUtils::isLiteralHTMLNode( $node ) ) { |
| 478 | $endTag->dataParsoid->stx = 'html'; |
| 479 | } |
| 480 | $tokBuf[] = $endTag; |
| 481 | } |
| 482 | } elseif ( $node instanceof Text ) { |
| 483 | PHPUtils::pushArray( $tokBuf, TokenUtils::newlinesToNlTks( $node->nodeValue ) ); |
| 484 | } elseif ( $node instanceof Comment ) { |
| 485 | $tokBuf[] = new CommentTk( $node->nodeValue ); |
| 486 | } else { |
| 487 | // getWrapperTokens calls convertDOMToTokens with a Element |
| 488 | // and children of dom elements are always text/comment/elements |
| 489 | // which are all covered above. |
| 490 | throw new UnreachableException( "Should never get here!" ); |
| 491 | } |
| 492 | |
| 493 | return $tokBuf; |
| 494 | } |
| 495 | |
| 496 | /** |
| 497 | * Get tokens representing a DOM forest (from transclusions, extensions, |
| 498 | * whatever that were generated as part of a separate processing pipeline) |
| 499 | * in the token stream. These tokens will tunnel the subtree through the |
| 500 | * token processing while preserving token stream semantics as if |
| 501 | * the DOM had been converted to tokens. |
| 502 | * |
| 503 | * @param DocumentFragment $domFragment List of DOM nodes that need to be tunneled through. |
| 504 | * @param array $opts |
| 505 | * @see encapsulateExpansionHTML's doc. for more info about these options. |
| 506 | * @return array<Token|string> List of token representatives. |
| 507 | */ |
| 508 | private static function getWrapperTokens( |
| 509 | DocumentFragment $domFragment, array $opts |
| 510 | ): array { |
| 511 | if ( !$domFragment->hasChildNodes() ) { |
| 512 | return [ new TagTk( 'span' ), new EndTagTk( 'span' ) ]; |
| 513 | } |
| 514 | |
| 515 | $node = $domFragment->firstChild; |
| 516 | |
| 517 | // Do we represent this with inline or block elements? |
| 518 | // This is to ensure that we get p-wrapping correct. |
| 519 | // |
| 520 | // * If all content is inline, we use inline-elements to represent this |
| 521 | // so that this content gets swallowed into the P tag that wraps |
| 522 | // adjacent inline content. |
| 523 | // |
| 524 | // * If any part of this is a block content, we treat extension content |
| 525 | // independent of surrounding content and don't want inline content |
| 526 | // here to be swallowed into a P tag that wraps adjacent inline content. |
| 527 | // |
| 528 | // This behavior ensures that we and clients can "drop-in" extension content |
| 529 | // into the DOM without messing with fixing up paragraph tags of surrounding |
| 530 | // content. It could potentially introduce minor rendering differences when |
| 531 | // compared to PHP parser output, but we'll swallow it for now. |
| 532 | $wrapperType = 'INLINE'; |
| 533 | if ( !empty( $opts['pipelineOpts']['inlineContext'] ) ) { |
| 534 | // If the DOM fragment is being processed in the context where P wrapping |
| 535 | // has been suppressed, we represent the DOM fragment with inline-tokens. |
| 536 | // |
| 537 | // FIXME(SSS): Looks like we have some "impedance mismatch" here. But, this |
| 538 | // is correct in scenarios where link-content or image-captions are being |
| 539 | // processed in a sub-pipeline and we don't want a <div> in the link-caption |
| 540 | // to cause the <a>..</a> to get split apart. |
| 541 | // |
| 542 | // Filed as T49963 |
| 543 | } elseif ( !$opts['unpackOutput'] ) { |
| 544 | // Fragments that won't be unpacked aren't amenable to inspection, since |
| 545 | // the ultimate content is unknown. For example, refs shuttle content |
| 546 | // through treebuilding that ends up in the references list. |
| 547 | // |
| 548 | // FIXME(arlolra): Do we need a mechanism to specify content |
| 549 | // categories? |
| 550 | } else { |
| 551 | foreach ( $domFragment->childNodes as $n ) { |
| 552 | if ( |
| 553 | DOMUtils::isWikitextBlockNode( $n ) || |
| 554 | DOMUtils::hasBlockElementDescendant( $n ) |
| 555 | ) { |
| 556 | $wrapperType = 'BLOCK'; |
| 557 | break; |
| 558 | } |
| 559 | } |
| 560 | } |
| 561 | |
| 562 | $wrapperName = null; |
| 563 | if ( $wrapperType === 'BLOCK' && !DOMUtils::isWikitextBlockNode( $node ) ) { |
| 564 | $wrapperName = 'div'; |
| 565 | } elseif ( DOMCompat::nodeName( $node ) === 'a' ) { |
| 566 | // Do not use 'A' as a wrapper node because it could |
| 567 | // end up getting nested inside another 'A' and the DOM |
| 568 | // structure can change where the wrapper tokens are no |
| 569 | // longer siblings. |
| 570 | // Ex: "[http://foo.com Bad nesting [[Here]]]. |
| 571 | $wrapperName = 'span'; |
| 572 | } elseif ( |
| 573 | in_array( DOMCompat::nodeName( $node ), [ 'style', 'script' ], true ) && |
| 574 | ( $node->nextSibling !== null ) |
| 575 | ) { |
| 576 | // <style>/<script> tags are not fostered, so if we're wrapping |
| 577 | // more than a single node, they aren't a good representation for |
| 578 | // the content. It can lead to fosterable content being inserted |
| 579 | // in a fosterable position after treebuilding is done, which isn't |
| 580 | // roundtrippable. |
| 581 | $wrapperName = 'span'; |
| 582 | } elseif ( !( $node instanceof Element ) ) { |
| 583 | $wrapperName = 'span'; |
| 584 | } else { |
| 585 | $wrapperName = DOMCompat::nodeName( $node ); |
| 586 | } |
| 587 | |
| 588 | if ( $node instanceof Element ) { |
| 589 | Assert::invariant( |
| 590 | // No need to look for data-mw as well. |
| 591 | // Nodes that have data-mw also have data-parsoid. |
| 592 | !$node->hasAttribute( 'data-parsoid' ), |
| 593 | "Expected node to have its data attributes loaded" ); |
| 594 | |
| 595 | $nodeData = clone DOMDataUtils::getNodeData( $node ); |
| 596 | |
| 597 | if ( $wrapperName !== DOMCompat::nodeName( $node ) ) { |
| 598 | // Create a copy of the node without children |
| 599 | $workNode = $node->ownerDocument->createElement( $wrapperName ); |
| 600 | |
| 601 | // Copy over attributes |
| 602 | foreach ( DOMUtils::attributes( $node ) as $name => $value ) { |
| 603 | // "typeof" is ignored since it'll be removed below. |
| 604 | if ( $name !== 'typeof' ) { |
| 605 | $workNode->setAttribute( $name, $value ); |
| 606 | } |
| 607 | } |
| 608 | |
| 609 | // We are applying a different wrapper. |
| 610 | // So, node's data-parsoid isn't applicable. |
| 611 | $nodeData->parsoid = new DataParsoid; |
| 612 | } else { |
| 613 | // Shallow clone since we don't want to convert the whole tree to tokens. |
| 614 | $workNode = $node->cloneNode( false ); |
| 615 | |
| 616 | // Reset 'tsr' since it isn't applicable. Neither is |
| 617 | // any auxiliary info like 'endTSR'. |
| 618 | // FIXME: The above comment is only true if we are reusing |
| 619 | // DOM fragments from cache from previous revisions in |
| 620 | // incremental parsing scenarios. See T98992 |
| 621 | if ( isset( $nodeData->parsoid->tsr ) ) { |
| 622 | $nodeData->parsoid->tsr = null; |
| 623 | } |
| 624 | if ( isset( $nodeData->parsoid->tmp->endTSR ) ) { |
| 625 | unset( $nodeData->parsoid->tmp->endTSR ); |
| 626 | } |
| 627 | |
| 628 | // The "in transclusion" flag was set on the first child for template |
| 629 | // wrapping in the nested pipeline, and doesn't apply to the dom |
| 630 | // fragment wrapper in this pipeline. Keeping it around can induce |
| 631 | // template wrapping of a foster box if the dom fragment is found in |
| 632 | // a fosterable position. |
| 633 | if ( |
| 634 | $nodeData->parsoid !== null && |
| 635 | $nodeData->parsoid->getTempFlag( TempData::IN_TRANSCLUSION ) |
| 636 | ) { |
| 637 | $nodeData->parsoid->tmp->setFlag( TempData::IN_TRANSCLUSION, false ); |
| 638 | } |
| 639 | // Similarly for "fostered", it applies to the nested pipeline and, |
| 640 | // if transferred, can interfere when unpacking |
| 641 | if ( isset( $nodeData->parsoid->fostered ) ) { |
| 642 | unset( $nodeData->parsoid->fostered ); |
| 643 | } |
| 644 | |
| 645 | // Note that the TempData::WRAPPER flag may be transfered to the |
| 646 | // fragment wrapper. Depending on the contents of the fragment, |
| 647 | // it's questionable if that's truly representative. Our modeling |
| 648 | // based on the first node of the fragment has limitations. |
| 649 | } |
| 650 | |
| 651 | DOMDataUtils::setNodeData( $workNode, $nodeData ); |
| 652 | } else { |
| 653 | $workNode = $node->ownerDocument->createElement( $wrapperName ); |
| 654 | } |
| 655 | |
| 656 | $tokens = self::convertDOMtoTokens( $workNode, [] ); |
| 657 | |
| 658 | // Remove the typeof attribute from the first token. |
| 659 | // It will be replaced with mw:DOMFragment. |
| 660 | $tokens[0]->removeAttribute( 'typeof' ); |
| 661 | |
| 662 | // Remove the about attribute from the first token. |
| 663 | // We want to be able to distinguish when this wrapper was template |
| 664 | // annotated. |
| 665 | $tokens[0]->removeAttribute( 'about' ); |
| 666 | |
| 667 | return $tokens; |
| 668 | } |
| 669 | |
| 670 | /** |
| 671 | * Generates wrapper tokens for a HTML expansion -- the wrapper |
| 672 | * tokens are placeholders that adequately represent semantics |
| 673 | * of the HTML DOM for the purposes of additional token transformations |
| 674 | * that will be applied to them. |
| 675 | * |
| 676 | * @param Env $env |
| 677 | * The active environment/context. |
| 678 | * @param Token $token |
| 679 | * The token that generated the DOM. |
| 680 | * @param array $expansion |
| 681 | * - string html HTML of the expansion. |
| 682 | * - DocumentFragment domFragment Outermost nodes of the HTML. |
| 683 | * @param array $opts |
| 684 | * - SourceRange tsr |
| 685 | * The TSR to set on the generated tokens. This TSR is |
| 686 | * used to compute DSR on the placeholder tokens. |
| 687 | * The computed DSR is transferred over to the unpacked DOM |
| 688 | * if setDSR is true (see below). |
| 689 | * - bool setDSR |
| 690 | * When the DOM fragment is unpacked, this option governs |
| 691 | * whether the DSR from the placeholder node is transferred |
| 692 | * over to the unpacked DOM or not. |
| 693 | * For example: Cite, reused transclusions. |
| 694 | * - bool fromCache |
| 695 | * - array pipelineOpts |
| 696 | * - bool unpackOutput |
| 697 | * - string wrapperName |
| 698 | * @return array<Token|string> |
| 699 | */ |
| 700 | public static function encapsulateExpansionHTML( |
| 701 | Env $env, Token $token, array $expansion, array $opts |
| 702 | ): array { |
| 703 | $opts['unpackOutput'] ??= true; // Default |
| 704 | // Get placeholder tokens to get our subdom through the token processing |
| 705 | // stages. These will be finally unwrapped on the DOM. |
| 706 | $toks = self::getWrapperTokens( $expansion['domFragment'], $opts ); |
| 707 | $firstWrapperToken = $toks[0]; |
| 708 | |
| 709 | // Add the DOMFragment type so that we get unwrapped later. |
| 710 | $fragmentType = 'mw:DOMFragment' . ( !$opts['unpackOutput'] ? '/sealed/' . $opts['wrapperName'] : '' ); |
| 711 | $firstWrapperToken->setAttribute( 'typeof', $fragmentType ); |
| 712 | |
| 713 | // Assign the HTML fragment to the data-parsoid.html on the first wrapper token. |
| 714 | $firstWrapperToken->dataParsoid->html = $expansion['html']; |
| 715 | |
| 716 | // Pass through setDSR flag |
| 717 | if ( !empty( $opts['setDSR'] ) ) { |
| 718 | $firstWrapperToken->dataParsoid->setTempFlag( |
| 719 | TempData::SET_DSR, $opts['setDSR'] ); |
| 720 | } |
| 721 | |
| 722 | // Pass through fromCache flag |
| 723 | if ( !empty( $opts['fromCache'] ) ) { |
| 724 | $firstWrapperToken->dataParsoid->setTempFlag( |
| 725 | TempData::FROM_CACHE, $opts['fromCache'] ); |
| 726 | } |
| 727 | |
| 728 | // Transfer the tsr. |
| 729 | // The first token gets the full width, the following tokens zero width. |
| 730 | $tokenTsr = $opts['tsr'] ?? $token->dataParsoid->tsr ?? null; |
| 731 | if ( $tokenTsr ) { |
| 732 | $firstWrapperToken->dataParsoid->tsr = $tokenTsr; |
| 733 | $firstWrapperToken->dataParsoid->extTagOffsets = $token->dataParsoid->extTagOffsets ?? null; |
| 734 | // XXX to investigate: if $tokenTsr->end is null, then we're losing |
| 735 | // the 'hint' we'd like to provide here that this is a zero-width |
| 736 | // source range. |
| 737 | // ->end can be set to null by WikiLinkHandler::bailTokens() |
| 738 | $endTsr = new SourceRange( $tokenTsr->end, $tokenTsr->end ); |
| 739 | for ( $i = 1; $i < count( $toks ); $i++ ) { |
| 740 | $toks[$i]->dataParsoid->tsr = clone $endTsr; |
| 741 | } |
| 742 | } |
| 743 | |
| 744 | return $toks; |
| 745 | } |
| 746 | |
| 747 | private static function wrapAccum( |
| 748 | Document $doc, array &$textCommentAccum |
| 749 | ): void { |
| 750 | // Wrap accumulated nodes in a span |
| 751 | $span = $doc->createElement( 'span' ); |
| 752 | $parentNode = $textCommentAccum[0]->parentNode; |
| 753 | $parentNode->insertBefore( $span, $textCommentAccum[0] ); |
| 754 | foreach ( $textCommentAccum as $n ) { |
| 755 | $span->appendChild( $n ); |
| 756 | } |
| 757 | $dp = new DataParsoid; |
| 758 | $dp->setTempFlag( TempData::WRAPPER ); |
| 759 | DOMDataUtils::setDataParsoid( $span, $dp ); |
| 760 | $textCommentAccum = []; |
| 761 | } |
| 762 | |
| 763 | /** |
| 764 | * Wrap text and comment nodes in a node list into spans, so that all |
| 765 | * top-level nodes are elements. |
| 766 | * |
| 767 | * @param NodeList $nodes List of DOM nodes to wrap, mix of node types. |
| 768 | * @param ?Node $startAt |
| 769 | * @param ?Node $stopAt |
| 770 | */ |
| 771 | public static function addSpanWrappers( |
| 772 | $nodes, |
| 773 | ?Node $startAt = null, |
| 774 | ?Node $stopAt = null |
| 775 | ): void { |
| 776 | $textCommentAccum = []; |
| 777 | $doc = $nodes->item( 0 )->ownerDocument; |
| 778 | |
| 779 | // Build a real array out of nodes. |
| 780 | // |
| 781 | // Operating directly on DOM child-nodes array |
| 782 | // and manipulating them by adding span wrappers |
| 783 | // changes the traversal itself |
| 784 | $nodeBuf = []; |
| 785 | foreach ( $nodes as $node ) { |
| 786 | $nodeBuf[] = $node; |
| 787 | } |
| 788 | |
| 789 | $start = ( $startAt === null ); |
| 790 | foreach ( $nodeBuf as $node ) { |
| 791 | if ( !$start ) { |
| 792 | if ( $startAt !== $node ) { |
| 793 | continue; |
| 794 | } |
| 795 | $start = true; |
| 796 | } |
| 797 | if ( $node instanceof Text || $node instanceof Comment ) { |
| 798 | $textCommentAccum[] = $node; |
| 799 | } elseif ( count( $textCommentAccum ) ) { |
| 800 | self::wrapAccum( $doc, $textCommentAccum ); |
| 801 | } |
| 802 | if ( $node === $stopAt ) { |
| 803 | break; |
| 804 | } |
| 805 | } |
| 806 | |
| 807 | if ( count( $textCommentAccum ) ) { |
| 808 | self::wrapAccum( $doc, $textCommentAccum ); |
| 809 | } |
| 810 | } |
| 811 | |
| 812 | /** |
| 813 | * Convert a HTML5 DOM into a mw:DOMFragment and generate appropriate |
| 814 | * tokens to insert into the token stream for further processing. |
| 815 | * |
| 816 | * The DOMProcessorPipeline will unpack the fragment and insert the HTML |
| 817 | * back into the DOM. |
| 818 | * |
| 819 | * @param Env $env |
| 820 | * The active environment/context. |
| 821 | * @param Token $token |
| 822 | * The token that generated the DOM. |
| 823 | * @param DocumentFragment $domFragment |
| 824 | * The DOM that the token expanded to. |
| 825 | * @param array $opts |
| 826 | * Options to be passed onto the encapsulation code |
| 827 | * See encapsulateExpansionHTML's doc. for more info about these options. |
| 828 | * @return array<Token|string> |
| 829 | */ |
| 830 | public static function tunnelDOMThroughTokens( |
| 831 | Env $env, Token $token, DocumentFragment $domFragment, array $opts |
| 832 | ): array { |
| 833 | // Get placeholder tokens to get our subdom through the token processing |
| 834 | // stages. These will be finally unwrapped on the DOM. |
| 835 | $expansion = self::makeExpansion( $env, $domFragment ); |
| 836 | return self::encapsulateExpansionHTML( $env, $token, $expansion, $opts ); |
| 837 | } |
| 838 | |
| 839 | public static function makeExpansion( |
| 840 | Env $env, DocumentFragment $domFragment |
| 841 | ): array { |
| 842 | $fragmentId = $env->newFragmentId(); |
| 843 | $env->setDOMFragment( $fragmentId, $domFragment ); |
| 844 | return [ 'domFragment' => $domFragment, 'html' => $fragmentId ]; |
| 845 | } |
| 846 | |
| 847 | private static function doExtractExpansions( Env $env, array &$expansions, Node $node ): void { |
| 848 | $nodes = null; |
| 849 | $expAccum = null; |
| 850 | while ( $node ) { |
| 851 | if ( $node instanceof Element ) { |
| 852 | if ( DOMUtils::matchTypeOf( $node, '#^mw:(Transclusion$|Extension/)#' ) && |
| 853 | $node->hasAttribute( 'about' ) |
| 854 | ) { |
| 855 | $dp = DOMDataUtils::getDataParsoid( $node ); |
| 856 | $about = DOMCompat::getAttribute( $node, 'about' ); |
| 857 | $nodes = WTUtils::getAboutSiblings( $node, $about ); |
| 858 | $key = null; |
| 859 | if ( DOMUtils::hasTypeOf( $node, 'mw:Transclusion' ) ) { |
| 860 | $expAccum = $expansions['transclusions']; |
| 861 | $key = $dp->src; |
| 862 | } elseif ( DOMUtils::matchTypeOf( $node, '#^mw:Extension/#' ) ) { |
| 863 | $expAccum = $expansions['extensions']; |
| 864 | $key = $dp->src; |
| 865 | } else { |
| 866 | $expAccum = $expansions['media']; |
| 867 | // XXX gwicke: use proper key that is not |
| 868 | // source-based? This also needs to work for |
| 869 | // transclusion output. |
| 870 | $key = null; |
| 871 | } |
| 872 | |
| 873 | if ( $key ) { |
| 874 | throw new UnreachableException( 'Callsite was not ported!' ); |
| 875 | // FIXME: makeExpansion return type changed |
| 876 | // $expAccum[$key] = self::makeExpansion( $env, $nodes ); |
| 877 | } |
| 878 | |
| 879 | $node = end( $nodes ); |
| 880 | } else { |
| 881 | self::doExtractExpansions( $env, $expansions, $node->firstChild ); |
| 882 | } |
| 883 | } |
| 884 | $node = $node->nextSibling; |
| 885 | } |
| 886 | } |
| 887 | |
| 888 | /** |
| 889 | * Extract transclusion and extension expansions from a DOM, and return |
| 890 | * them in a structure like this: |
| 891 | * { |
| 892 | * transclusions: { |
| 893 | * 'key1': { |
| 894 | * html: 'html1', |
| 895 | * nodes: [<node1>, <node2>] |
| 896 | * } |
| 897 | * }, |
| 898 | * extensions: { |
| 899 | * 'key2': { |
| 900 | * html: 'html2', |
| 901 | * nodes: [<node1>, <node2>] |
| 902 | * } |
| 903 | * }, |
| 904 | * files: { |
| 905 | * 'key3': { |
| 906 | * html: 'html3', |
| 907 | * nodes: [<node1>, <node2>] |
| 908 | * } |
| 909 | * } |
| 910 | * } |
| 911 | * |
| 912 | * @param Env $env |
| 913 | * @param Element $body |
| 914 | * @return array |
| 915 | */ |
| 916 | public static function extractExpansions( Env $env, Element $body ): array { |
| 917 | $expansions = [ |
| 918 | 'transclusions' => [], |
| 919 | 'extensions' => [], |
| 920 | 'media' => [] |
| 921 | ]; |
| 922 | // Kick off the extraction |
| 923 | self::doExtractExpansions( $env, $expansions, $body->firstChild ); |
| 924 | return $expansions; |
| 925 | } |
| 926 | |
| 927 | /** |
| 928 | * Fetches output of encapsulations that return HTML from the legacy parser |
| 929 | */ |
| 930 | public static function parseToHTML( Env $env, string $source ): ?DocumentFragment { |
| 931 | $ret = $env->getDataAccess()->parseWikitext( |
| 932 | $env->getPageConfig(), $env->getMetadata(), $source |
| 933 | ); |
| 934 | return $ret === '' ? null : DOMUtils::parseHTMLToFragment( |
| 935 | $env->getTopLevelDoc(), DOMUtils::stripPWrapper( $ret ) |
| 936 | ); |
| 937 | } |
| 938 | } |