MediaWiki master
ContentDOMTransformStage.php
Go to the documentation of this file.
1<?php
2
4
8use stdClass;
9use Wikimedia\Parsoid\Core\PageBundle;
10use Wikimedia\Parsoid\DOM\Document;
11use Wikimedia\Parsoid\Mocks\MockEnv;
12use Wikimedia\Parsoid\Utils\ContentUtils;
13use Wikimedia\Parsoid\Utils\DOMCompat;
14use Wikimedia\Parsoid\Utils\DOMDataUtils;
15use Wikimedia\Parsoid\Utils\DOMUtils;
16
26
30 public function transform( ParserOutput $po, ?ParserOptions $popts, array &$options ): ParserOutput {
31 // TODO will use HTMLHolder in the future
32 $doc = null;
34 if ( $hasPageBundle ) {
36 $doc = DOMUtils::parseHTML( $po->getContentHolderText() );
37 PageBundle::apply( $doc, $pb );
38 DOMDataUtils::prepareDoc( $doc );
39 DOMDataUtils::visitAndLoadDataAttribs(
40 DOMCompat::getBody( $doc )
41 );
42 } else {
43 $doc = ContentUtils::createAndLoadDocument(
45 );
46 }
47
48 $doc = $this->transformDOM( $doc, $po, $popts, $options );
49
50 // TODO will use HTMLHolder in the future
51 if ( $hasPageBundle ) {
52 DOMDataUtils::visitAndStoreDataAttribs(
53 DOMCompat::getBody( $doc ),
54 [
55 'storeInPageBundle' => true,
56 'env' => new MockEnv( [] ),
57 ]
58 );
59 $pb = DOMDataUtils::getPageBundle( $doc );
60 $pb = self::workaroundT365036( $doc, $pb );
61
63 $text = ContentUtils::toXML( DOMCompat::getBody( $doc ), [
64 'innerXML' => true,
65 ] );
66 } else {
67 $text = ContentUtils::ppToXML( DOMCompat::getBody( $doc ), [
68 'innerXML' => true,
69 ] );
70 }
71 $po->setContentHolderText( $text );
72 return $po;
73 }
74
81 private function workaroundT365036( Document $doc, PageBundle $pb ): stdClass {
82 DOMDataUtils::injectPageBundle( $doc, $pb );
83 $convertedPageBundle = DOMDataUtils::extractPageBundle( $doc );
84 // Tell phan that $convertedPageBundle is non-null since $pb was non-null
85 '@phan-var stdClass $convertedPageBundle';
86 return $convertedPageBundle;
87 }
88
90 abstract public function transformDOM(
91 Document $dom, ParserOutput $po, ?ParserOptions $popts, array &$options
92 ): Document;
93
94}
OutputTransformStages that modify the content as a HTML DOM tree.
transformDOM(Document $dom, ParserOutput $po, ?ParserOptions $popts, array &$options)
Applies the transformation to a DOM document.
transform(ParserOutput $po, ?ParserOptions $popts, array &$options)
Transforms the input ParserOutput into the returned ParserOutput.The returned ParserOutput can explic...
ParserOutput is a rendering of a Content object or a message.
getContentHolderText()
Returns the content holder text of the ParserOutput.
setContentHolderText(string $s)
Sets the content holder text of the ParserOutput.
Provides methods for conversion between PageBundle and ParserOutput TODO: Convert to a trait once we ...
static pageBundleFromParserOutput(ParserOutput $parserOutput)
Returns a Parsoid PageBundle equivalent to the given ParserOutput.
static applyPageBundleDataToParserOutput( $pageBundle, ParserOutput $parserOutput)
Given an existing ParserOutput and a PageBundle, applies the PageBundle data to the ParserOutput.
Set options of the Parser.
Classes implementing the OutputTransformStage aim at being added to a pipeline of transformations tha...