Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
| Total | |
0.00% |
0 / 95 |
|
0.00% |
0 / 9 |
CRAP | |
0.00% |
0 / 1 |
| PageBundle | |
0.00% |
0 / 95 |
|
0.00% |
0 / 9 |
342 | |
0.00% |
0 / 1 |
| __construct | |
0.00% |
0 / 6 |
|
0.00% |
0 / 1 |
2 | |||
| newEmpty | |
0.00% |
0 / 13 |
|
0.00% |
0 / 1 |
2 | |||
| validate | |
0.00% |
0 / 8 |
|
0.00% |
0 / 1 |
42 | |||
| responseData | |
0.00% |
0 / 30 |
|
0.00% |
0 / 1 |
6 | |||
| fromDomPageBundle | |
0.00% |
0 / 16 |
|
0.00% |
0 / 1 |
12 | |||
| toSingleDocumentHtml | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
2 | |||
| toInlineAttributeHtml | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
2 | |||
| toJsonArray | |
0.00% |
0 / 8 |
|
0.00% |
0 / 1 |
2 | |||
| newFromJsonArray | |
0.00% |
0 / 10 |
|
0.00% |
0 / 1 |
6 | |||
| 1 | <?php |
| 2 | declare( strict_types = 1 ); |
| 3 | |
| 4 | namespace Wikimedia\Parsoid\Core; |
| 5 | |
| 6 | use Composer\Semver\Semver; |
| 7 | use Wikimedia\JsonCodec\JsonCodecable; |
| 8 | use Wikimedia\JsonCodec\JsonCodecableTrait; |
| 9 | use Wikimedia\Parsoid\Utils\DOMCompat; |
| 10 | use Wikimedia\Parsoid\Wt2Html\XMLSerializer; |
| 11 | |
| 12 | /** |
| 13 | * A page bundle stores an HTML string with separated data-parsoid and |
| 14 | * (optionally) data-mw content. The data-parsoid and data-mw content |
| 15 | * is indexed by the id attributes on individual nodes. This content |
| 16 | * needs to be loaded before the data-parsoid and/or data-mw |
| 17 | * information can be used. |
| 18 | * |
| 19 | * Note that the parsoid/mw properties of the page bundle are in "serialized |
| 20 | * array" form; that is, they are flat arrays appropriate for json-encoding |
| 21 | * and do not contain DataParsoid or DataMw objects. |
| 22 | * |
| 23 | * See DomPageBundle for a similar structure used where the HTML string |
| 24 | * has been parsed into a DOM. |
| 25 | */ |
| 26 | class PageBundle implements JsonCodecable { |
| 27 | use JsonCodecableTrait; |
| 28 | |
| 29 | /** The document, as an HTML string. */ |
| 30 | public string $html; |
| 31 | |
| 32 | /** |
| 33 | * A map from ID to the array serialization of DataParsoid for the Node |
| 34 | * with that ID. |
| 35 | * |
| 36 | * @var null|array{counter?:int,offsetType?:'byte'|'ucs2'|'char',ids:array<string,array>} |
| 37 | */ |
| 38 | public $parsoid; |
| 39 | |
| 40 | /** |
| 41 | * A map from ID to the array serialization of DataMw for the Node |
| 42 | * with that ID. |
| 43 | * |
| 44 | * @var null|array{ids:array<string,array>} |
| 45 | */ |
| 46 | public $mw; |
| 47 | |
| 48 | /** @var ?string */ |
| 49 | public $version; |
| 50 | |
| 51 | /** |
| 52 | * A map of HTTP headers: both name and value should be strings. |
| 53 | * @var array<string,string>|null |
| 54 | */ |
| 55 | public $headers; |
| 56 | |
| 57 | /** @var string|null */ |
| 58 | public $contentmodel; |
| 59 | |
| 60 | public function __construct( |
| 61 | string $html, ?array $parsoid = null, ?array $mw = null, |
| 62 | ?string $version = null, ?array $headers = null, |
| 63 | ?string $contentmodel = null |
| 64 | ) { |
| 65 | $this->html = $html; |
| 66 | $this->parsoid = $parsoid; |
| 67 | $this->mw = $mw; |
| 68 | $this->version = $version; |
| 69 | $this->headers = $headers; |
| 70 | $this->contentmodel = $contentmodel; |
| 71 | } |
| 72 | |
| 73 | public static function newEmpty( |
| 74 | string $html, |
| 75 | ?string $version = null, |
| 76 | ?array $headers = null, |
| 77 | ?string $contentmodel = null |
| 78 | ): self { |
| 79 | return new PageBundle( |
| 80 | $html, |
| 81 | [ |
| 82 | 'counter' => -1, |
| 83 | 'ids' => [], |
| 84 | ], |
| 85 | [ |
| 86 | 'ids' => [], |
| 87 | ], |
| 88 | $version, |
| 89 | $headers, |
| 90 | $contentmodel |
| 91 | ); |
| 92 | } |
| 93 | |
| 94 | /** |
| 95 | * Check if this pagebundle is valid. |
| 96 | * @param string $contentVersion Document content version to validate against. |
| 97 | * @param ?string &$errorMessage Error message will be returned here. |
| 98 | * @return bool |
| 99 | */ |
| 100 | public function validate( |
| 101 | string $contentVersion, ?string &$errorMessage = null |
| 102 | ) { |
| 103 | if ( !$this->parsoid || !isset( $this->parsoid['ids'] ) ) { |
| 104 | $errorMessage = 'Invalid data-parsoid was provided.'; |
| 105 | return false; |
| 106 | } elseif ( Semver::satisfies( $contentVersion, '^999.0.0' ) |
| 107 | && ( !$this->mw || !isset( $this->mw['ids'] ) ) |
| 108 | ) { |
| 109 | $errorMessage = 'Invalid data-mw was provided.'; |
| 110 | return false; |
| 111 | } |
| 112 | return true; |
| 113 | } |
| 114 | |
| 115 | /** |
| 116 | * @return array |
| 117 | */ |
| 118 | public function responseData() { |
| 119 | $version = $this->version ?? '0.0.0'; |
| 120 | $responseData = [ |
| 121 | 'contentmodel' => $this->contentmodel ?? '', |
| 122 | 'html' => [ |
| 123 | 'headers' => array_merge( [ |
| 124 | 'content-type' => 'text/html; charset=utf-8; ' |
| 125 | . 'profile="https://www.mediawiki.org/wiki/Specs/HTML/' |
| 126 | . $version . '"', |
| 127 | ], $this->headers ?? [] ), |
| 128 | 'body' => $this->html, |
| 129 | ], |
| 130 | 'data-parsoid' => [ |
| 131 | 'headers' => [ |
| 132 | 'content-type' => 'application/json; charset=utf-8; ' |
| 133 | . 'profile="https://www.mediawiki.org/wiki/Specs/data-parsoid/' |
| 134 | . $version . '"', |
| 135 | ], |
| 136 | 'body' => $this->parsoid, |
| 137 | ], |
| 138 | ]; |
| 139 | if ( Semver::satisfies( $version, '^999.0.0' ) ) { |
| 140 | $responseData['data-mw'] = [ |
| 141 | 'headers' => [ |
| 142 | 'content-type' => 'application/json; charset=utf-8; ' . |
| 143 | 'profile="https://www.mediawiki.org/wiki/Specs/data-mw/' . |
| 144 | $version . '"', |
| 145 | ], |
| 146 | 'body' => $this->mw, |
| 147 | ]; |
| 148 | } |
| 149 | return $responseData; |
| 150 | } |
| 151 | |
| 152 | /** |
| 153 | * Convert a DomPageBundle to a PageBundle. |
| 154 | * |
| 155 | * This serializes the DOM from the DomPageBundle, with the given $options. |
| 156 | * The options can also provide defaults for content version, headers, |
| 157 | * content model, and offsetType if they weren't already set in the |
| 158 | * DomPageBundle. |
| 159 | * |
| 160 | * @param DomPageBundle $dpb |
| 161 | * @param array $options XMLSerializer options |
| 162 | * @return PageBundle |
| 163 | */ |
| 164 | public static function fromDomPageBundle( DomPageBundle $dpb, array $options = [] ): PageBundle { |
| 165 | $node = $dpb->doc; |
| 166 | if ( $options['body_only'] ?? false ) { |
| 167 | $node = DOMCompat::getBody( $dpb->doc ); |
| 168 | $options += [ 'innerXML' => true ]; |
| 169 | } |
| 170 | $out = XMLSerializer::serialize( $node, $options ); |
| 171 | $pb = new PageBundle( |
| 172 | $out['html'], |
| 173 | $dpb->parsoid, |
| 174 | $dpb->mw, |
| 175 | $dpb->version ?? $options['contentversion'] ?? null, |
| 176 | $dpb->headers ?? $options['headers'] ?? null, |
| 177 | $dpb->contentmodel ?? $options['contentmodel'] ?? null |
| 178 | ); |
| 179 | if ( isset( $options['offsetType'] ) ) { |
| 180 | $pb->parsoid['offsetType'] ??= $options['offsetType']; |
| 181 | } |
| 182 | return $pb; |
| 183 | } |
| 184 | |
| 185 | /** |
| 186 | * Convert this PageBundle to "single document" form, where page bundle |
| 187 | * information is embedded in the <head> of the document. |
| 188 | * @param array $options XMLSerializer options |
| 189 | * @return string an HTML string |
| 190 | */ |
| 191 | public function toSingleDocumentHtml( array $options = [] ): string { |
| 192 | return DomPageBundle::fromPageBundle( $this ) |
| 193 | ->toSingleDocumentHtml( $options ); |
| 194 | } |
| 195 | |
| 196 | /** |
| 197 | * Convert this PageBundle to "inline attribute" form, where page bundle |
| 198 | * information is represented as inline JSON-valued attributes. |
| 199 | * @param array $options XMLSerializer options |
| 200 | * @return string an HTML string |
| 201 | */ |
| 202 | public function toInlineAttributeHtml( array $options = [] ): string { |
| 203 | return DomPageBundle::fromPageBundle( $this ) |
| 204 | ->toInlineAttributeHtml( $options ); |
| 205 | } |
| 206 | |
| 207 | // JsonCodecable ------------- |
| 208 | |
| 209 | /** @inheritDoc */ |
| 210 | public function toJsonArray(): array { |
| 211 | return [ |
| 212 | 'html' => $this->html, |
| 213 | 'parsoid' => $this->parsoid, |
| 214 | 'mw' => $this->mw, |
| 215 | 'version' => $this->version, |
| 216 | 'headers' => $this->headers, |
| 217 | 'contentmodel' => $this->contentmodel, |
| 218 | ]; |
| 219 | } |
| 220 | |
| 221 | /** @inheritDoc */ |
| 222 | public static function newFromJsonArray( array $json ): PageBundle { |
| 223 | // Forward-compatibility with Parsoid 0.23 |
| 224 | if ( isset( $json['counters']['nodedata'] ) ) { |
| 225 | $json['parsoid']['counter'] = $json['counters']['nodedata']; |
| 226 | } |
| 227 | return new PageBundle( |
| 228 | $json['html'] ?? '', |
| 229 | $json['parsoid'] ?? null, |
| 230 | $json['mw'] ?? null, |
| 231 | $json['version'] ?? null, |
| 232 | $json['headers'] ?? null, |
| 233 | $json['contentmodel'] ?? null |
| 234 | ); |
| 235 | } |
| 236 | } |