Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
| Total | |
0.00% |
0 / 306 |
|
0.00% |
0 / 16 |
CRAP | |
0.00% |
0 / 1 |
| PipelineUtils | |
0.00% |
0 / 306 |
|
0.00% |
0 / 16 |
6972 | |
0.00% |
0 / 1 |
| pFragmentToParsoidFragmentMarkers | |
0.00% |
0 / 10 |
|
0.00% |
0 / 1 |
6 | |||
| getDOMFragmentToken | |
0.00% |
0 / 7 |
|
0.00% |
0 / 1 |
12 | |||
| processContentInPipeline | |
0.00% |
0 / 16 |
|
0.00% |
0 / 1 |
6 | |||
| dumpTplSrc | |
0.00% |
0 / 14 |
|
0.00% |
0 / 1 |
12 | |||
| preparePFragment | |
0.00% |
0 / 14 |
|
0.00% |
0 / 1 |
6 | |||
| handleAsyncResult | |
0.00% |
0 / 17 |
|
0.00% |
0 / 1 |
6 | |||
| processTemplateSource | |
0.00% |
0 / 22 |
|
0.00% |
0 / 1 |
12 | |||
| expandAttrValueToDOM | |
0.00% |
0 / 66 |
|
0.00% |
0 / 1 |
156 | |||
| expandAttrValuesToDOM | |
0.00% |
0 / 4 |
|
0.00% |
0 / 1 |
6 | |||
| domAttrsToTagAttrs | |
0.00% |
0 / 10 |
|
0.00% |
0 / 1 |
20 | |||
| convertDOMtoTokens | |
0.00% |
0 / 24 |
|
0.00% |
0 / 1 |
56 | |||
| getWrapperTokens | |
0.00% |
0 / 50 |
|
0.00% |
0 / 1 |
552 | |||
| tunnelDOMThroughTokens | |
0.00% |
0 / 21 |
|
0.00% |
0 / 1 |
30 | |||
| wrapAccum | |
0.00% |
0 / 9 |
|
0.00% |
0 / 1 |
6 | |||
| addSpanWrappers | |
0.00% |
0 / 16 |
|
0.00% |
0 / 1 |
90 | |||
| parseToHTML | |
0.00% |
0 / 6 |
|
0.00% |
0 / 1 |
6 | |||
| 1 | <?php |
| 2 | declare( strict_types = 1 ); |
| 3 | |
| 4 | namespace Wikimedia\Parsoid\Utils; |
| 5 | |
| 6 | use Wikimedia\Assert\Assert; |
| 7 | use Wikimedia\Assert\UnreachableException; |
| 8 | use Wikimedia\Parsoid\Config\Env; |
| 9 | use Wikimedia\Parsoid\Core\DOMCompat; |
| 10 | use Wikimedia\Parsoid\Core\DomSourceRange; |
| 11 | use Wikimedia\Parsoid\Core\Source; |
| 12 | use Wikimedia\Parsoid\Core\SourceRange; |
| 13 | use Wikimedia\Parsoid\Core\SourceString; |
| 14 | use Wikimedia\Parsoid\DOM\Comment; |
| 15 | use Wikimedia\Parsoid\DOM\Document; |
| 16 | use Wikimedia\Parsoid\DOM\DocumentFragment; |
| 17 | use Wikimedia\Parsoid\DOM\Element; |
| 18 | use Wikimedia\Parsoid\DOM\Node; |
| 19 | use Wikimedia\Parsoid\DOM\Text; |
| 20 | use Wikimedia\Parsoid\Ext\AsyncResult; |
| 21 | use Wikimedia\Parsoid\Ext\ParsoidExtensionAPI; |
| 22 | use Wikimedia\Parsoid\Fragments\DomPFragment; |
| 23 | use Wikimedia\Parsoid\Fragments\PFragment; |
| 24 | use Wikimedia\Parsoid\Fragments\WikitextPFragment; |
| 25 | use Wikimedia\Parsoid\NodeData\DataParsoid; |
| 26 | use Wikimedia\Parsoid\NodeData\TempData; |
| 27 | use Wikimedia\Parsoid\Tokens\CommentTk; |
| 28 | use Wikimedia\Parsoid\Tokens\EndTagTk; |
| 29 | use Wikimedia\Parsoid\Tokens\EOFTk; |
| 30 | use Wikimedia\Parsoid\Tokens\KV; |
| 31 | use Wikimedia\Parsoid\Tokens\SelfclosingTagTk; |
| 32 | use Wikimedia\Parsoid\Tokens\TagTk; |
| 33 | use Wikimedia\Parsoid\Tokens\Token; |
| 34 | use Wikimedia\Parsoid\Wt2Html\Frame; |
| 35 | |
| 36 | /** |
| 37 | * This file contains parsing pipeline related utilities. |
| 38 | */ |
| 39 | class PipelineUtils { |
| 40 | // keep in sync with internal_strip_marker in Grammar.pegphp |
| 41 | public const PARSOID_FRAGMENT_PREFIX = "{{#parsoid\0fragment:"; |
| 42 | |
| 43 | /** |
| 44 | * Returns a wikitext string with embedded parsoid fragment markers, |
| 45 | * as well as a mapping from the marker IDs to PFragment objects. |
| 46 | * @return array{0:string,1:array<string,PFragment>} A array consisting of |
| 47 | * the wikitext string, followed by the id-to-PFragment map. |
| 48 | */ |
| 49 | public static function pFragmentToParsoidFragmentMarkers( PFragment $fragment ): array { |
| 50 | static $counter = 0; |
| 51 | $pieces = WikitextPFragment::castFromPFragment( $fragment )->split(); |
| 52 | $result = [ $pieces[0] ]; |
| 53 | $map = []; |
| 54 | for ( $i = 1; $i < count( $pieces ); $i += 2 ) { |
| 55 | $marker = self::PARSOID_FRAGMENT_PREFIX . ( $counter++ ) . '}}'; |
| 56 | $map[$marker] = $pieces[$i]; |
| 57 | $result[] = $marker; |
| 58 | $result[] = $pieces[$i + 1]; |
| 59 | } |
| 60 | return [ implode( '', $result ), $map ]; |
| 61 | } |
| 62 | |
| 63 | /** |
| 64 | * Creates a dom-fragment-token for processing 'content' (an array of tokens) |
| 65 | * in its own subpipeline all the way to DOM. These tokens will be processed |
| 66 | * by their own handler (DOMFragmentBuilder) in the last stage of the async |
| 67 | * pipeline. |
| 68 | * |
| 69 | * srcOffsets should always be provided to process top-level page content in a |
| 70 | * subpipeline. Without it, DSR computation and template wrapping cannot be done |
| 71 | * in the subpipeline. While unpackDOMFragment can do this on unwrapping, that can |
| 72 | * be a bit fragile and makes dom-fragments a leaky abstraction by leaking subpipeline |
| 73 | * processing into the top-level pipeline. |
| 74 | * |
| 75 | * @param string|Token|array<Token|string> $content The array of tokens to process. |
| 76 | * @param SourceRange $srcOffsets Wikitext source offsets (start/end) of these tokens. |
| 77 | * @param array $opts Parsing options. |
| 78 | * - Token token The token that generated the content. |
| 79 | * - bool inlineContext Is this DOM fragment used in an inline context? |
| 80 | * @return SelfclosingTagTk |
| 81 | */ |
| 82 | public static function getDOMFragmentToken( |
| 83 | $content, SourceRange $srcOffsets, array $opts = [] |
| 84 | ): SelfclosingTagTk { |
| 85 | $token = $opts['token']; |
| 86 | return new SelfclosingTagTk( 'mw:dom-fragment-token', [ |
| 87 | new KV( 'contextTokName', $token->getName() ), |
| 88 | new KV( 'content', $content, $srcOffsets->expandTsrV() ), |
| 89 | new KV( 'inlineContext', ( $opts['inlineContext'] ?? false ) ? "1" : "0" ), |
| 90 | new KV( 'inPHPBlock', ( $opts['inPHPBlock'] ?? false ) ? "1" : "0" ), |
| 91 | ] ); |
| 92 | } |
| 93 | |
| 94 | /** |
| 95 | * Processes content (wikitext, array of tokens, whatever) in its own |
| 96 | * pipeline based on options. |
| 97 | * |
| 98 | * @param Env $env The environment/context for the expansion. |
| 99 | * @param Frame $frame |
| 100 | * The parent frame within which the expansion is taking place. |
| 101 | * Used for template expansion and source text tracking. |
| 102 | * @param string|Token|array<Token|string>|DocumentFragment|PFragment $content |
| 103 | * How this content is processed depends on what kind of pipeline |
| 104 | * is constructed specified by opts. |
| 105 | * @param array $opts |
| 106 | * Processing options that specify pipeline-type, opts, and callbacks. |
| 107 | * - string pipelineType |
| 108 | * - array pipelineOpts |
| 109 | * - array tplArgs - if set, defines parameters for the child frame |
| 110 | * - string tplArgs['name'] |
| 111 | * - KV[] tplArgs['attribs'] |
| 112 | * - SourceRange srcOffsets - if set, defines the range and |
| 113 | * source text that $content corresponds to |
| 114 | * - string startRule The start rule to use when tokenizing |
| 115 | * - bool sol Whether tokens should be processed in start-of-line context. |
| 116 | * - bool toplevel Whether the pipeline is considered atTopLevel |
| 117 | * - stdClass tplInfo Template info for pipelines created when DOM processing |
| 118 | * @return array<Token|string>|DocumentFragment (depending on pipeline type) |
| 119 | */ |
| 120 | public static function processContentInPipeline( |
| 121 | Env $env, Frame $frame, $content, array $opts |
| 122 | ) { |
| 123 | // Build a pipeline |
| 124 | $pipeline = $env->getPipelineFactory()->getPipeline( |
| 125 | $opts['pipelineType'], |
| 126 | $opts['pipelineOpts'] |
| 127 | ); |
| 128 | |
| 129 | $pipeline->init( [ |
| 130 | // NOTE: some pipelines force toplevel to true |
| 131 | 'toplevel' => $opts['toplevel'] ?? false, |
| 132 | 'tplInfo' => $opts['tplInfo'] ?? null, |
| 133 | 'frame' => $frame, |
| 134 | 'tplArgs' => $opts['tplArgs'] ?? null, |
| 135 | 'srcOffsets' => $opts['srcOffsets'] ?? null, |
| 136 | ] ); |
| 137 | |
| 138 | // Off the starting block ... ready, set, go! |
| 139 | return $pipeline->parse( $content, [ |
| 140 | 'sol' => $opts['sol'], |
| 141 | ] + ( isset( $opts['startRule'] ) ? [ |
| 142 | 'startRule' => $opts['startRule'], |
| 143 | ] : [] ) ); |
| 144 | } |
| 145 | |
| 146 | /** |
| 147 | * Dump template source if '--dump tplsrc' flag was set |
| 148 | */ |
| 149 | public static function dumpTplSrc( |
| 150 | Env $env, Token $token, string $templateName, string $src, |
| 151 | bool $fragmentMode = false |
| 152 | ): void { |
| 153 | $codec = DOMDataUtils::getCodec( $env->getTopLevelDoc() ); |
| 154 | $dump = str_repeat( '=', 28 ) . " template source " . ( $fragmentMode ? '(FRAGMENT)' : '' ) . |
| 155 | str_repeat( '=', 28 ) . "\n"; |
| 156 | $dp = $codec->toJsonArray( $token->dataParsoid, DataParsoid::class ); |
| 157 | $dump .= 'TEMPLATE:' . $templateName . 'TRANSCLUSION:' . |
| 158 | PHPUtils::jsonEncode( $dp['src'] ) . "\n"; |
| 159 | $dump .= str_repeat( '-', 80 ) . "\n"; |
| 160 | $dump .= $src . "\n"; |
| 161 | $pfragMapStr = $env->pFragmentMapToString(); |
| 162 | if ( $pfragMapStr ) { |
| 163 | $dump .= "----- P-FRAGMENT MAP -----\n"; |
| 164 | $dump .= $pfragMapStr; |
| 165 | } |
| 166 | $dump .= str_repeat( '-', 80 ) . "\n"; |
| 167 | $env->writeDump( $dump ); |
| 168 | } |
| 169 | |
| 170 | /** |
| 171 | * Prepare a PFragment for our parsing pipeline: split the fragment, |
| 172 | * convert it to embedded fragment markers, and add those markers to |
| 173 | * the pfragment map in the env. |
| 174 | * @param Env $env |
| 175 | * @param Frame $frame |
| 176 | * @param PFragment $pFragment |
| 177 | * @param array $opts |
| 178 | * @return array{frame:Frame,wikitext:string,srcOffsets:?SourceRange} |
| 179 | */ |
| 180 | public static function preparePFragment( |
| 181 | Env $env, |
| 182 | Frame $frame, |
| 183 | PFragment $pFragment, |
| 184 | array $opts |
| 185 | ): array { |
| 186 | [ $wikitext, $pFragmentMap ] = |
| 187 | self::pFragmentToParsoidFragmentMarkers( $pFragment ); |
| 188 | // Note that the fragment's srcOffsets may have its own Source, |
| 189 | // which differs from the top level Source. This is separate from |
| 190 | // the `Frame`, which would determine (for example) how `{{{1}}}` |
| 191 | // is evaluated. |
| 192 | $srcOffsets = $pFragment->getSrcOffsets() ?? $opts['srcOffsets'] ?? null; |
| 193 | if ( !empty( $opts['processInNewFrame'] ) ) { |
| 194 | $source = new SourceString( $wikitext ); |
| 195 | $srcOffsets = SourceRange::fromSource( $source ); |
| 196 | $frame = $frame->newChild( $frame->getTitle(), [], $source ); |
| 197 | } |
| 198 | // $srcOffsets shouldn't really be null, but if it is... |
| 199 | $srcOffsets ??= SourceRange::fromSource( new SourceString( $wikitext ) ); |
| 200 | $env->addToPFragmentMap( $pFragmentMap ); |
| 201 | return [ |
| 202 | 'frame' => $frame, |
| 203 | 'wikitext' => $wikitext, |
| 204 | 'srcOffsets' => $srcOffsets, |
| 205 | ]; |
| 206 | } |
| 207 | |
| 208 | public static function handleAsyncResult( |
| 209 | ParsoidExtensionAPI $extApi, AsyncResult $fragment, |
| 210 | ?DomSourceRange $srcOffsets |
| 211 | ): PFragment { |
| 212 | $extApi->getMetadata()->setOutputFlag( 'async-not-ready' ); |
| 213 | $fragment = $fragment->fallbackContent( $extApi ); |
| 214 | if ( $fragment === null ) { |
| 215 | // Create localized fallback message |
| 216 | $doc = $extApi->getTopLevelDoc(); |
| 217 | $msg = $doc->createDocumentFragment(); |
| 218 | $span = $doc->createElement( 'span' ); |
| 219 | $span->setAttribute( 'class', 'mw-async-not-ready' ); |
| 220 | DOMCompat::append( |
| 221 | $span, |
| 222 | WTUtils::createPageContentI18nFragment( |
| 223 | $doc, |
| 224 | $extApi->getSiteConfig()->getAsyncFallbackMessageKey() |
| 225 | ) |
| 226 | ); |
| 227 | $msg->appendChild( $span ); |
| 228 | $fragment = DomPFragment::newFromDocumentFragment( $msg, $srcOffsets ); |
| 229 | } |
| 230 | return $fragment; |
| 231 | } |
| 232 | |
| 233 | public static function processTemplateSource( |
| 234 | Env $env, Frame $frame, Token $token, ?array $tplArgs, |
| 235 | string|Source $src, array $opts = [] |
| 236 | ): array { |
| 237 | if ( is_string( $src ) ) { |
| 238 | if ( $src === '' ) { |
| 239 | return []; |
| 240 | } |
| 241 | // Helper: this should probably be pushed to the caller in |
| 242 | // the case where the source is a Template or substring of |
| 243 | // the original source. |
| 244 | $src = new SourceString( $src ); |
| 245 | } |
| 246 | $srcOffsets = SourceRange::fromSource( $src ); |
| 247 | |
| 248 | // Get a nested transformation pipeline for the wikitext that takes |
| 249 | // us through stages 1-2, with the appropriate pipeline options set. |
| 250 | // |
| 251 | // Simply returning the tokenized source here (which may be correct |
| 252 | // when using the legacy preprocessor because we don't expect to |
| 253 | // tokenize any templates or include directives so skipping those |
| 254 | // handlers should be ok) won't work since the options for the pipeline |
| 255 | // we're in probably aren't what we want. |
| 256 | $toks = self::processContentInPipeline( |
| 257 | $env, |
| 258 | $frame, |
| 259 | $src->getSrcText(), |
| 260 | [ |
| 261 | 'pipelineType' => 'wikitext-to-expanded-tokens', |
| 262 | 'pipelineOpts' => [ |
| 263 | 'inTemplate' => true, |
| 264 | // FIXME: In reality, this is broken for parser tests where |
| 265 | // we expand templates natively. We do want all nested templates |
| 266 | // to be expanded. But, setting this to !usePHPPreProcessor seems |
| 267 | // to break a number of tests. Not pursuing this line of enquiry |
| 268 | // for now since this parserTests vs production distinction will |
| 269 | // disappear with parser integration. We'll just bear the stench |
| 270 | // till that time. |
| 271 | // |
| 272 | // NOTE: No expansion required for nested templates. |
| 273 | 'expandTemplates' => $opts['expandTemplates'] ?? false, |
| 274 | 'extTag' => $opts['extTag'] ?? null, |
| 275 | ], |
| 276 | 'srcOffsets' => $srcOffsets, |
| 277 | 'tplArgs' => $tplArgs, |
| 278 | // HEADS UP: You might be wondering why we are forcing "sol" => true without |
| 279 | // using information about whether the transclusion is used in a SOL context. |
| 280 | // |
| 281 | // Ex: "foo {{1x|*bar}}" Here, "*bar" is not in SOL context relative to the |
| 282 | // top-level page and so, should it be actually be parsed as a list item? |
| 283 | // |
| 284 | // So, there is a use-case where one could argue that the sol value here |
| 285 | // should be conditioned on the page-level context where "{{1x|*bar}}" showed |
| 286 | // up. So, in this example "foo {{1x|*bar}}, sol would be false and in this |
| 287 | // example "foo\n{{1x|*bar}}", sol would be true. That is effectively how |
| 288 | // the legacy parser behaves. (Ignore T2529 for the moment.) |
| 289 | // |
| 290 | // But, Parsoid is a different beast. Since the Parsoid/JS days, templates |
| 291 | // have been processed asynchronously. So, {{1x|*bar}} would be expanded and |
| 292 | // tokenized before even its preceding context might have been processed. |
| 293 | // From the start, Parsoid has aimed to decouple the processing of fragment |
| 294 | // generators (be it templates, extensions, or something else) from the |
| 295 | // processing of the page they are embedded in. This has been the |
| 296 | // starting point of many a wikitext 2.0 proposal on mediawiki.org; |
| 297 | // see also [[mw:Parsing/Notes/Wikitext_2.0#Implications_of_this_model]]. |
| 298 | // |
| 299 | // The main performance implication is that you can process a transclusion |
| 300 | // concurrently *and* cache the output of {{1x|*bar}} since its output is |
| 301 | // the same no matter where on the page it appears. Without this decoupled |
| 302 | // model, if you got "{{mystery-template-that-takes-30-secs}}{{1x|*bar}}" |
| 303 | // you have to wait 30 secs before you get to expand {{1x|*bar}} |
| 304 | // because you have to wait and see whether the mystery template will |
| 305 | // leave you in SOL state or non-SOL state. |
| 306 | // |
| 307 | // In a stroke of good luck, wikitext editors seem to have agreed |
| 308 | // that it is better for all templates to be expanded in a |
| 309 | // consistent SOL state and not be dependent on their context; |
| 310 | // turn now to phab task T2529 which (via a fragile hack) tried |
| 311 | // to ensure that every template which started with |
| 312 | // start-of-line-sensitive markup was evaluated in a |
| 313 | // start-of-line context (by hackily inserting a newline). Not |
| 314 | // everyone was satisfied with this hack (see T14974), but it's |
| 315 | // been the way things work for over a decade now (as evidenced |
| 316 | // by T14974 never having been "fixed"). |
| 317 | // |
| 318 | // So, while we've established we would prefer *not* to use page |
| 319 | // context to set the initial SOL value for tokenizing the |
| 320 | // template, what *should* the initial SOL value be? |
| 321 | // |
| 322 | // * Treat every transclusion as a fresh document starting in SOL |
| 323 | // state, ie set "sol" => true always. This is supported by |
| 324 | // most current wiki use, and is the intent behind the original |
| 325 | // T2529 hack (although that hack left a number of edge cases, |
| 326 | // described below). |
| 327 | // |
| 328 | // * Use `"sol" => false` for templates -- this was the solution |
| 329 | // rejected by the original T2529 as being contrary to editor |
| 330 | // expectations. |
| 331 | // |
| 332 | // * In the future, one might allow the template itself to |
| 333 | // specify that its initial SOL state should be, using a |
| 334 | // mechanism similar to what might be necessary for typed |
| 335 | // templates. This could also address T14974. This is not |
| 336 | // excluded by Parsoid at this point; but it would probably be |
| 337 | // signaled by a template "return type" which is *not* DOM |
| 338 | // therefore the template wouldn't get parsed "as wikitext" |
| 339 | // (ie, T14974 wants an "attribute-value" return type which is |
| 340 | // a plain string, and some of the wikitext 2.0 proposals |
| 341 | // anticipate a "attribute name/value" dictionary as a possible |
| 342 | // return type). |
| 343 | // |
| 344 | // In support of using sol=>true as the default initial state, |
| 345 | // let's examine the sol-sensitive wikitext constructs, and |
| 346 | // implicitly the corner cases left open by the T2529 hack. (For |
| 347 | // non-sol-sensitive constructs, the initial SOL state is |
| 348 | // irrelevant.) |
| 349 | // |
| 350 | // - SOL-sensitive contructs include lists, headings, indent-pre, |
| 351 | // and table syntax. |
| 352 | // - Of these, only lists, headings, and table syntax are actually handled in |
| 353 | // the PEG tokenizer and are impacted by SOL state. |
| 354 | // - Indent-Pre has its own handler that operates in a full page token context |
| 355 | // and isn't impacted. |
| 356 | // - T2529 effectively means for *#:; (lists) and {| (table start), newlines |
| 357 | // are added which means no matter what value we set here, they will get |
| 358 | // processed in sol state. |
| 359 | // - This leaves us with headings (=), table heading (!), table row (|), and |
| 360 | // table close (|}) syntax that would be impacted by what we set here. |
| 361 | // - Given that table row/heading/close templates are very very common on wikis |
| 362 | // and used for constructing complex tables, sol => true will let us handle |
| 363 | // those without hacks. We aren't fully off the hook there -- see the code |
| 364 | // in TokenStreamPatcher, AttributeExpander, TableFixups that all exist to |
| 365 | // to work around the fact that decoupled processing isn't the wikitext |
| 366 | // default. But, without sol => true, we'll likely be in deeper trouble. |
| 367 | // - But, this can cause some occasional bad parses where "=|!" aren't meant |
| 368 | // to be processed as a sol-wikitext construct. |
| 369 | // - Note also that the workaround for T14974 (ie, the T2529 hack applying |
| 370 | // where sol=false is actually desired) has traditionally been to add an |
| 371 | // initial <nowiki/> which ensures that the "T2529 characters" are not |
| 372 | // initial. There are a number of alternative mechanisms to accomplish |
| 373 | // this (ie, HTML-encode the first character). |
| 374 | // |
| 375 | // To honor the spirit of T2529 it seems plausible to try to lint |
| 376 | // away the remaining corner cases where T2529 does *not* result |
| 377 | // in start-of-line state for template expansion, and to use the |
| 378 | // various workarounds for compatibility in the meantime. |
| 379 | // |
| 380 | // We should also pick *one* of the workarounds for T14974 |
| 381 | // (probably `<nowiki/>` at the first position in the template), |
| 382 | // support that (until a better mechanism exists), and (if |
| 383 | // possible) lint away any others. |
| 384 | 'sol' => true |
| 385 | ] |
| 386 | ); |
| 387 | return $toks; |
| 388 | } |
| 389 | |
| 390 | /** |
| 391 | * Expands value all the way to DOM. |
| 392 | * |
| 393 | * @param Env $env |
| 394 | * The environment/context for the expansion. |
| 395 | * @param Frame $frame |
| 396 | * The parent frame within which the expansion is taking place. |
| 397 | * Used for template expansion and source text tracking. |
| 398 | * @param array $v |
| 399 | * The value to process. |
| 400 | * The value is expected to be an associative array with a "html" property. |
| 401 | * The html property is expanded to DOM only if it is an array (of tokens) |
| 402 | * or a (wikitext) string. |
| 403 | * Non-array/non-strings are passed back unexpanded. |
| 404 | * @param bool $expandTemplates |
| 405 | * Should any templates encountered here be expanded |
| 406 | * (usually false for nested templates since they are never directly editable). |
| 407 | * @param bool $inTemplate |
| 408 | * Unexpanded templates can occur in the content of extension tags. |
| 409 | * @return array{html:DocumentFragment} |
| 410 | */ |
| 411 | public static function expandAttrValueToDOM( |
| 412 | Env $env, Frame $frame, array $v, bool $expandTemplates, bool $inTemplate |
| 413 | ): array { |
| 414 | if ( is_string( $v['html'] ?? null ) ) { |
| 415 | $v['html'] = [ $v['html'] ]; |
| 416 | } |
| 417 | if ( is_array( $v['html'] ?? null ) ) { |
| 418 | $attrCache = null; |
| 419 | $cacheKey = null; |
| 420 | $domFragment = null; |
| 421 | $isCacheable = false; |
| 422 | $vSrcOffsets = $v['srcOffsets']; |
| 423 | $tsrStart = $vSrcOffsets->start; |
| 424 | |
| 425 | if ( $tsrStart >= 0 && $vSrcOffsets->length() > 0 ) { |
| 426 | $vSrc = $vSrcOffsets->substr( $frame->getSource() ); |
| 427 | $attrCache = $env->getCache( |
| 428 | "AttributeExpansion", |
| 429 | [ |
| 430 | "repeatThreshold" => 4, |
| 431 | "cloneValue" => static function ( array $value ) { |
| 432 | $value['fragment'] = DOMDataUtils::cloneNode( $value['fragment'], true ); |
| 433 | return $value; |
| 434 | } |
| 435 | ] |
| 436 | ); |
| 437 | |
| 438 | if ( strlen( $vSrc ) > 0 ) { |
| 439 | $isCacheable = true; |
| 440 | // $expandTemplates & $inTemplate are pipeline options below |
| 441 | // and should be part of the cache key |
| 442 | $cacheKey = ( $expandTemplates ? 'e1-' : 'e0-' ) . |
| 443 | ( $inTemplate ? 't1-' : 't0-' ) . $vSrc; |
| 444 | } |
| 445 | } |
| 446 | |
| 447 | if ( $isCacheable ) { |
| 448 | $cachedOutput = $attrCache->lookup( $cacheKey ); |
| 449 | if ( $cachedOutput !== null ) { |
| 450 | $offset = $tsrStart - $cachedOutput['value']['start']; |
| 451 | $domFragment = $cachedOutput['value']['fragment']; |
| 452 | ContentUtils::shiftDSR( |
| 453 | $env, $domFragment, |
| 454 | static function ( DomSourceRange $dsr ) use ( $offset, $frame ) { |
| 455 | $dsr->source = $frame->getSource(); |
| 456 | return $dsr->offset( $offset ); |
| 457 | } |
| 458 | ); |
| 459 | } |
| 460 | } |
| 461 | |
| 462 | if ( $domFragment === null ) { |
| 463 | $domFragment = self::processContentInPipeline( |
| 464 | $env, |
| 465 | $frame, |
| 466 | array_merge( $v['html'], [ new EOFTk() ] ), |
| 467 | [ |
| 468 | 'pipelineType' => 'expanded-tokens-to-fragment', |
| 469 | 'pipelineOpts' => [ |
| 470 | 'attrExpansion' => true, |
| 471 | 'inlineContext' => true, |
| 472 | 'expandTemplates' => $expandTemplates, |
| 473 | 'inTemplate' => $inTemplate |
| 474 | ], |
| 475 | 'srcOffsets' => $vSrcOffsets, |
| 476 | 'sol' => true |
| 477 | ] |
| 478 | ); |
| 479 | if ( $isCacheable ) { |
| 480 | $attrCache->cache( |
| 481 | $cacheKey, |
| 482 | [ |
| 483 | 'start' => $tsrStart, |
| 484 | 'fragment' => $domFragment |
| 485 | ], |
| 486 | $frame->getSource() |
| 487 | ); |
| 488 | } |
| 489 | } |
| 490 | |
| 491 | // Since we aren't at the top level, data attrs |
| 492 | // were not applied in cleanup. However, tmp |
| 493 | // was stripped. |
| 494 | $v['html'] = $domFragment; |
| 495 | } |
| 496 | // Remove srcOffsets after value is expanded, so they don't show |
| 497 | // up in the output data-mw attribute |
| 498 | unset( $v['srcOffsets'] ); |
| 499 | return $v; |
| 500 | } |
| 501 | |
| 502 | /** |
| 503 | * @param Env $env |
| 504 | * The environment/context for the expansion. |
| 505 | * @param Frame $frame |
| 506 | * The parent frame within which the expansion is taking place. |
| 507 | * Used for template expansion and source text tracking. |
| 508 | * @param array $vals |
| 509 | * Array of values to expand. |
| 510 | * Non-array elements of $vals are passed back unmodified. |
| 511 | * If an array element, it is expected to be an associative array with a "html" property. |
| 512 | * The html property is expanded to DOM only if it is an array (of tokens). |
| 513 | * @param bool $expandTemplates |
| 514 | * Should any templates encountered here be expanded |
| 515 | * (usually false for nested templates since they are never directly editable). |
| 516 | * @param bool $inTemplate |
| 517 | * Unexpanded templates can occur in the content of extension tags. |
| 518 | * |
| 519 | * @return list<array{html:DocumentFragment}> |
| 520 | */ |
| 521 | public static function expandAttrValuesToDOM( |
| 522 | Env $env, $frame, array $vals, bool $expandTemplates, bool $inTemplate |
| 523 | ): array { |
| 524 | $ret = []; |
| 525 | foreach ( $vals as $v ) { |
| 526 | $ret[] = self::expandAttrValueToDOM( $env, $frame, $v, $expandTemplates, $inTemplate ); |
| 527 | } |
| 528 | return $ret; |
| 529 | } |
| 530 | |
| 531 | /** |
| 532 | * Convert a DOM node to a token. The node comes from a DOM whose data attributes |
| 533 | * are stored outside the DOM. |
| 534 | * |
| 535 | * @param Element $node |
| 536 | * @param array<string,string> $attrs |
| 537 | * |
| 538 | * @return array{attrs: list<KV>, dataParsoid: DataParsoid, dataMw: ?\Wikimedia\Parsoid\NodeData\DataMw} |
| 539 | */ |
| 540 | private static function domAttrsToTagAttrs( Element $node, array $attrs ): array { |
| 541 | $out = []; |
| 542 | foreach ( $attrs as $name => $value ) { |
| 543 | if ( $name !== DOMDataUtils::DATA_OBJECT_ATTR_NAME ) { |
| 544 | $out[] = new KV( $name, $value ); |
| 545 | } |
| 546 | } |
| 547 | $dmw = DOMDataUtils::getDataMw( $node ); |
| 548 | return [ |
| 549 | 'attrs' => $out, |
| 550 | 'dataParsoid' => DOMDataUtils::getDataParsoid( $node ), |
| 551 | 'dataMw' => $dmw->isEmpty() ? null : $dmw, |
| 552 | ]; |
| 553 | } |
| 554 | |
| 555 | /** |
| 556 | * Convert a DOM to tokens. Data attributes for nodes are stored outside the DOM. |
| 557 | * |
| 558 | * @param Node $node The root of the DOM tree to convert to tokens |
| 559 | * @param array<Token|string> $tokBuf This is where the tokens get stored |
| 560 | * @return array |
| 561 | */ |
| 562 | private static function convertDOMtoTokens( Node $node, array $tokBuf ): array { |
| 563 | if ( $node instanceof Element ) { |
| 564 | $nodeName = DOMUtils::nodeName( $node ); |
| 565 | $attrInfo = self::domAttrsToTagAttrs( $node, DOMCompat::attributes( $node ) ); |
| 566 | |
| 567 | if ( Utils::isVoidElement( $nodeName ) ) { |
| 568 | $tokBuf[] = new SelfclosingTagTk( |
| 569 | $nodeName, $attrInfo['attrs'], |
| 570 | $attrInfo['dataParsoid'], $attrInfo['dataMw'] |
| 571 | ); |
| 572 | } else { |
| 573 | $tokBuf[] = new TagTk( |
| 574 | $nodeName, $attrInfo['attrs'], |
| 575 | $attrInfo['dataParsoid'], $attrInfo['dataMw'] |
| 576 | ); |
| 577 | for ( $child = $node->firstChild; $child; $child = $child->nextSibling ) { |
| 578 | $tokBuf = self::convertDOMtoTokens( $child, $tokBuf ); |
| 579 | } |
| 580 | $endTag = new EndTagTk( $nodeName ); |
| 581 | // Keep stx parity |
| 582 | if ( WTUtils::isLiteralHTMLNode( $node ) ) { |
| 583 | $endTag->dataParsoid->stx = 'html'; |
| 584 | } |
| 585 | $tokBuf[] = $endTag; |
| 586 | } |
| 587 | } elseif ( $node instanceof Text ) { |
| 588 | PHPUtils::pushArray( $tokBuf, TokenUtils::newlinesToNlTks( $node->nodeValue ) ); |
| 589 | } elseif ( $node instanceof Comment ) { |
| 590 | $tokBuf[] = new CommentTk( $node->nodeValue ); |
| 591 | } else { |
| 592 | // getWrapperTokens calls convertDOMToTokens with a Element |
| 593 | // and children of dom elements are always text/comment/elements |
| 594 | // which are all covered above. |
| 595 | throw new UnreachableException( "Should never get here!" ); |
| 596 | } |
| 597 | |
| 598 | return $tokBuf; |
| 599 | } |
| 600 | |
| 601 | /** |
| 602 | * Get tokens representing a DOM forest (from transclusions, extensions, |
| 603 | * whatever that were generated as part of a separate processing pipeline) |
| 604 | * in the token stream. These tokens will tunnel the subtree through the |
| 605 | * token processing while preserving token stream semantics as if |
| 606 | * the DOM had been converted to tokens. |
| 607 | * |
| 608 | * @param DocumentFragment $domFragment List of DOM nodes that need to be tunneled through. |
| 609 | * @param array $opts |
| 610 | * @see tunnelDOMThroughTokens's doc. for more info about these options. |
| 611 | * @return array<Token|string> List of token representatives. |
| 612 | */ |
| 613 | private static function getWrapperTokens( |
| 614 | DocumentFragment $domFragment, array $opts |
| 615 | ): array { |
| 616 | if ( !$domFragment->hasChildNodes() ) { |
| 617 | return [ new TagTk( 'span' ), new EndTagTk( 'span' ) ]; |
| 618 | } |
| 619 | |
| 620 | $node = $domFragment->firstChild; |
| 621 | |
| 622 | // Do we represent this with inline or block elements? |
| 623 | // This is to ensure that we get p-wrapping correct. |
| 624 | // |
| 625 | // * If all content is inline, we use inline-elements to represent this |
| 626 | // so that this content gets swallowed into the P tag that wraps |
| 627 | // adjacent inline content. |
| 628 | // |
| 629 | // * If any part of this is a block content, we treat extension content |
| 630 | // independent of surrounding content and don't want inline content |
| 631 | // here to be swallowed into a P tag that wraps adjacent inline content. |
| 632 | // |
| 633 | // This behavior ensures that we and clients can "drop-in" extension content |
| 634 | // into the DOM without messing with fixing up paragraph tags of surrounding |
| 635 | // content. It could potentially introduce minor rendering differences when |
| 636 | // compared to PHP parser output, but we'll swallow it for now. |
| 637 | $wrapperType = 'INLINE'; |
| 638 | if ( !empty( $opts['pipelineOpts']['inlineContext'] ) ) { |
| 639 | // If the DOM fragment is being processed in the context where P wrapping |
| 640 | // has been suppressed, we represent the DOM fragment with inline-tokens. |
| 641 | // |
| 642 | // FIXME(SSS): Looks like we have some "impedance mismatch" here. But, this |
| 643 | // is correct in scenarios where link-content or image-captions are being |
| 644 | // processed in a sub-pipeline and we don't want a <div> in the link-caption |
| 645 | // to cause the <a>..</a> to get split apart. |
| 646 | // |
| 647 | // Filed as T49963 |
| 648 | } elseif ( !$opts['unpackOutput'] ) { |
| 649 | // Fragments that won't be unpacked aren't amenable to inspection, since |
| 650 | // the ultimate content is unknown. For example, refs shuttle content |
| 651 | // through treebuilding that ends up in the references list. |
| 652 | // |
| 653 | // FIXME(arlolra): Do we need a mechanism to specify content |
| 654 | // categories? |
| 655 | } else { |
| 656 | foreach ( DOMUtils::childNodes( $domFragment ) as $n ) { |
| 657 | if ( |
| 658 | DOMUtils::isWikitextBlockNode( $n ) || |
| 659 | DOMUtils::hasBlockElementDescendant( $n ) |
| 660 | ) { |
| 661 | $wrapperType = 'BLOCK'; |
| 662 | break; |
| 663 | } |
| 664 | } |
| 665 | } |
| 666 | |
| 667 | if ( $wrapperType === 'BLOCK' && !DOMUtils::isWikitextBlockNode( $node ) ) { |
| 668 | $wrapperName = 'div'; |
| 669 | } elseif ( DOMUtils::nodeName( $node ) === 'a' ) { |
| 670 | // Do not use 'A' as a wrapper node because it could |
| 671 | // end up getting nested inside another 'A' and the DOM |
| 672 | // structure can change where the wrapper tokens are no |
| 673 | // longer siblings. |
| 674 | // Ex: "[http://foo.com Bad nesting [[Here]]]. |
| 675 | $wrapperName = 'span'; |
| 676 | } elseif ( |
| 677 | in_array( DOMUtils::nodeName( $node ), [ 'style', 'script' ], true ) && |
| 678 | ( $node->nextSibling !== null ) |
| 679 | ) { |
| 680 | // <style>/<script> tags are not fostered, so if we're wrapping |
| 681 | // more than a single node, they aren't a good representation for |
| 682 | // the content. It can lead to fosterable content being inserted |
| 683 | // in a fosterable position after treebuilding is done, which isn't |
| 684 | // roundtrippable. |
| 685 | $wrapperName = 'span'; |
| 686 | } elseif ( !( $node instanceof Element ) ) { |
| 687 | $wrapperName = 'span'; |
| 688 | } else { |
| 689 | $wrapperName = DOMUtils::nodeName( $node ); |
| 690 | } |
| 691 | |
| 692 | if ( $node instanceof Element ) { |
| 693 | Assert::invariant( |
| 694 | // No need to look for data-mw as well. |
| 695 | // Nodes that have data-mw also have data-parsoid. |
| 696 | !$node->hasAttribute( 'data-parsoid' ), |
| 697 | "Expected node to have its data attributes loaded" ); |
| 698 | |
| 699 | $nodeData = clone DOMDataUtils::getNodeData( $node ); |
| 700 | |
| 701 | if ( $wrapperName !== DOMUtils::nodeName( $node ) ) { |
| 702 | // Create a copy of the node without children |
| 703 | $workNode = $node->ownerDocument->createElement( $wrapperName ); |
| 704 | |
| 705 | // Copy over attributes |
| 706 | foreach ( DOMCompat::attributes( $node ) as $name => $value ) { |
| 707 | // "typeof" is ignored since it'll be removed below. |
| 708 | if ( $name !== 'typeof' ) { |
| 709 | $workNode->setAttribute( $name, $value ); |
| 710 | } |
| 711 | } |
| 712 | |
| 713 | // We are applying a different wrapper. |
| 714 | // So, node's data-parsoid isn't applicable. |
| 715 | $nodeData->parsoid = new DataParsoid; |
| 716 | } else { |
| 717 | // Shallow clone since we don't want to convert the whole tree to tokens. |
| 718 | $workNode = $node->cloneNode( false ); |
| 719 | |
| 720 | // Reset 'tsr' since it isn't applicable. Neither is |
| 721 | // any auxiliary info like 'endTSR'. |
| 722 | // FIXME: The above comment is only true if we are reusing |
| 723 | // DOM fragments from cache from previous revisions in |
| 724 | // incremental parsing scenarios. See T98992 |
| 725 | if ( isset( $nodeData->parsoid->tsr ) ) { |
| 726 | $nodeData->parsoid->tsr = null; |
| 727 | } |
| 728 | if ( isset( $nodeData->parsoid->tmp->endTSR ) ) { |
| 729 | unset( $nodeData->parsoid->tmp->endTSR ); |
| 730 | } |
| 731 | if ( isset( $nodeData->parsoid->html ) ) { |
| 732 | unset( $nodeData->parsoid->html ); |
| 733 | } |
| 734 | |
| 735 | // The "in transclusion" flag was set on the first child for template |
| 736 | // wrapping in the nested pipeline, and doesn't apply to the dom |
| 737 | // fragment wrapper in this pipeline. Keeping it around can induce |
| 738 | // template wrapping of a foster box if the dom fragment is found in |
| 739 | // a fosterable position. |
| 740 | if ( |
| 741 | $nodeData->parsoid !== null && |
| 742 | $nodeData->parsoid->getTempFlag( TempData::IN_TRANSCLUSION ) |
| 743 | ) { |
| 744 | $nodeData->parsoid->tmp->setFlag( TempData::IN_TRANSCLUSION, false ); |
| 745 | } |
| 746 | // Similarly for "fostered", it applies to the nested pipeline and, |
| 747 | // if transferred, can interfere when unpacking |
| 748 | if ( isset( $nodeData->parsoid->fostered ) ) { |
| 749 | unset( $nodeData->parsoid->fostered ); |
| 750 | } |
| 751 | |
| 752 | // Note that the TempData::WRAPPER flag may be transfered to the |
| 753 | // fragment wrapper. Depending on the contents of the fragment, |
| 754 | // it's questionable if that's truly representative. Our modeling |
| 755 | // based on the first node of the fragment has limitations. |
| 756 | } |
| 757 | |
| 758 | DOMDataUtils::setNodeData( $workNode, $nodeData ); |
| 759 | } else { |
| 760 | $workNode = $node->ownerDocument->createElement( $wrapperName ); |
| 761 | } |
| 762 | |
| 763 | $tokens = self::convertDOMtoTokens( $workNode, [] ); |
| 764 | |
| 765 | // Remove the typeof attribute from the first token. |
| 766 | // It will be replaced with mw:DOMFragment. |
| 767 | $tokens[0]->removeAttribute( 'typeof' ); |
| 768 | |
| 769 | // Remove the about attribute from the first token. |
| 770 | // We want to be able to distinguish when this wrapper was template |
| 771 | // annotated. |
| 772 | $tokens[0]->removeAttribute( 'about' ); |
| 773 | |
| 774 | return $tokens; |
| 775 | } |
| 776 | |
| 777 | /** |
| 778 | * Generates wrapper tokens for a HTML expansion -- the wrapper |
| 779 | * tokens are placeholders that adequately represent semantics |
| 780 | * of the HTML DOM for the purposes of additional token transformations |
| 781 | * that will be applied to them. |
| 782 | * |
| 783 | * The DOMProcessorPipeline will unpack the fragment and insert the HTML |
| 784 | * back into the DOM. |
| 785 | * |
| 786 | * @param Env $env |
| 787 | * The active environment/context. |
| 788 | * @param Token $token |
| 789 | * The token that generated the DOM. |
| 790 | * @param DocumentFragment $domFragment Outermost nodes of the HTML |
| 791 | * @param array $opts |
| 792 | * - SourceRange tsr |
| 793 | * The TSR to set on the generated tokens. This TSR is |
| 794 | * used to compute DSR on the placeholder tokens. |
| 795 | * The computed DSR is transferred over to the unpacked DOM |
| 796 | * if setDSR is true (see below). |
| 797 | * - bool setDSR |
| 798 | * When the DOM fragment is unpacked, this option governs |
| 799 | * whether the DSR from the placeholder node is transferred |
| 800 | * over to the unpacked DOM or not. |
| 801 | * For example: Cite, reused transclusions. |
| 802 | * - array pipelineOpts |
| 803 | * - bool unpackOutput |
| 804 | * - string wrapperName |
| 805 | * @return array<Token|string> |
| 806 | */ |
| 807 | public static function tunnelDOMThroughTokens( |
| 808 | Env $env, Token $token, DocumentFragment $domFragment, array $opts |
| 809 | ): array { |
| 810 | $opts['unpackOutput'] ??= true; // Default |
| 811 | // Get placeholder tokens to get our subdom through the token processing |
| 812 | // stages. These will be finally unwrapped on the DOM. |
| 813 | $toks = self::getWrapperTokens( $domFragment, $opts ); |
| 814 | $firstWrapperToken = $toks[0]; |
| 815 | |
| 816 | // Add the DOMFragment type so that we get unwrapped later. |
| 817 | $fragmentType = 'mw:DOMFragment' . ( !$opts['unpackOutput'] ? '/sealed/' . $opts['wrapperName'] : '' ); |
| 818 | $firstWrapperToken->setAttribute( 'typeof', $fragmentType ); |
| 819 | |
| 820 | // Assign the HTML fragment to the data-mw.html on the first wrapper token. |
| 821 | Assert::invariant( |
| 822 | !isset( $firstWrapperToken->dataParsoid->html ), |
| 823 | "Overwriting existing DOMFragment" |
| 824 | ); |
| 825 | $firstWrapperToken->dataParsoid->html = $domFragment; |
| 826 | |
| 827 | // Pass through setDSR flag |
| 828 | if ( !empty( $opts['setDSR'] ) ) { |
| 829 | $firstWrapperToken->dataParsoid->setTempFlag( |
| 830 | TempData::SET_DSR, $opts['setDSR'] ); |
| 831 | } |
| 832 | |
| 833 | // Transfer the tsr. |
| 834 | // The first token gets the full width, the following tokens zero width. |
| 835 | $tokenTsr = $opts['tsr'] ?? $token->dataParsoid->tsr ?? null; |
| 836 | if ( $tokenTsr ) { |
| 837 | $firstWrapperToken->dataParsoid->tsr = $tokenTsr; |
| 838 | $firstWrapperToken->dataParsoid->extTagOffsets = $token->dataParsoid->extTagOffsets ?? null; |
| 839 | // XXX to investigate: if $tokenTsr->end is null, then we're losing |
| 840 | // the 'hint' we'd like to provide here that this is a zero-width |
| 841 | // source range. |
| 842 | // ->end can be set to null by WikiLinkHandler::bailTokens() |
| 843 | $endTsr = new SourceRange( $tokenTsr->end, $tokenTsr->end, $tokenTsr->source ); |
| 844 | for ( $i = 1; $i < count( $toks ); $i++ ) { |
| 845 | $toks[$i]->dataParsoid->tsr = clone $endTsr; |
| 846 | } |
| 847 | } |
| 848 | |
| 849 | return $toks; |
| 850 | } |
| 851 | |
| 852 | private static function wrapAccum( |
| 853 | Document $doc, array &$textCommentAccum |
| 854 | ): void { |
| 855 | // Wrap accumulated nodes in a span |
| 856 | $span = $doc->createElement( 'span' ); |
| 857 | $parentNode = $textCommentAccum[0]->parentNode; |
| 858 | $parentNode->insertBefore( $span, $textCommentAccum[0] ); |
| 859 | foreach ( $textCommentAccum as $n ) { |
| 860 | $span->appendChild( $n ); |
| 861 | } |
| 862 | $dp = new DataParsoid; |
| 863 | $dp->setTempFlag( TempData::WRAPPER ); |
| 864 | DOMDataUtils::setDataParsoid( $span, $dp ); |
| 865 | $textCommentAccum = []; |
| 866 | } |
| 867 | |
| 868 | /** |
| 869 | * Wrap text and comment nodes in a node list into spans, so that all |
| 870 | * top-level nodes are elements. |
| 871 | * |
| 872 | * @param list<Node> $nodes List of DOM nodes to wrap, mix of node types. |
| 873 | * @param ?Node $startAt |
| 874 | * @param ?Node $stopAt |
| 875 | */ |
| 876 | public static function addSpanWrappers( |
| 877 | array $nodes, |
| 878 | ?Node $startAt = null, |
| 879 | ?Node $stopAt = null |
| 880 | ): void { |
| 881 | $textCommentAccum = []; |
| 882 | $doc = $nodes[0]->ownerDocument; |
| 883 | |
| 884 | $start = ( $startAt === null ); |
| 885 | foreach ( $nodes as $node ) { |
| 886 | if ( !$start ) { |
| 887 | if ( $startAt !== $node ) { |
| 888 | continue; |
| 889 | } |
| 890 | $start = true; |
| 891 | } |
| 892 | if ( $node instanceof Text || $node instanceof Comment ) { |
| 893 | $textCommentAccum[] = $node; |
| 894 | } elseif ( count( $textCommentAccum ) ) { |
| 895 | self::wrapAccum( $doc, $textCommentAccum ); |
| 896 | } |
| 897 | if ( $node === $stopAt ) { |
| 898 | break; |
| 899 | } |
| 900 | } |
| 901 | |
| 902 | if ( count( $textCommentAccum ) ) { |
| 903 | self::wrapAccum( $doc, $textCommentAccum ); |
| 904 | } |
| 905 | } |
| 906 | |
| 907 | /** |
| 908 | * Fetches output of encapsulations that return HTML from the legacy parser |
| 909 | */ |
| 910 | public static function parseToHTML( Env $env, string $source ): ?DocumentFragment { |
| 911 | $ret = $env->getDataAccess()->parseWikitext( |
| 912 | $env->getPageConfig(), $env->getMetadata(), $source |
| 913 | ); |
| 914 | return $ret === '' ? null : DOMUtils::parseHTMLToFragment( |
| 915 | $env->getTopLevelDoc(), DOMUtils::stripPWrapper( $ret ) |
| 916 | ); |
| 917 | } |
| 918 | } |