Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
0.00% |
0 / 93 |
|
0.00% |
0 / 9 |
CRAP | |
0.00% |
0 / 1 |
PageBundle | |
0.00% |
0 / 93 |
|
0.00% |
0 / 9 |
306 | |
0.00% |
0 / 1 |
__construct | |
0.00% |
0 / 6 |
|
0.00% |
0 / 1 |
2 | |||
newEmpty | |
0.00% |
0 / 13 |
|
0.00% |
0 / 1 |
2 | |||
validate | |
0.00% |
0 / 8 |
|
0.00% |
0 / 1 |
42 | |||
responseData | |
0.00% |
0 / 30 |
|
0.00% |
0 / 1 |
6 | |||
fromDomPageBundle | |
0.00% |
0 / 16 |
|
0.00% |
0 / 1 |
12 | |||
toSingleDocumentHtml | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
2 | |||
toInlineAttributeHtml | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
2 | |||
toJsonArray | |
0.00% |
0 / 8 |
|
0.00% |
0 / 1 |
2 | |||
newFromJsonArray | |
0.00% |
0 / 8 |
|
0.00% |
0 / 1 |
2 |
1 | <?php |
2 | declare( strict_types = 1 ); |
3 | |
4 | namespace Wikimedia\Parsoid\Core; |
5 | |
6 | use Composer\Semver\Semver; |
7 | use Wikimedia\JsonCodec\JsonCodecable; |
8 | use Wikimedia\JsonCodec\JsonCodecableTrait; |
9 | use Wikimedia\Parsoid\Utils\DOMCompat; |
10 | use Wikimedia\Parsoid\Wt2Html\XMLSerializer; |
11 | |
12 | /** |
13 | * A page bundle stores an HTML string with separated data-parsoid and |
14 | * (optionally) data-mw content. The data-parsoid and data-mw content |
15 | * is indexed by the id attributes on individual nodes. This content |
16 | * needs to be loaded before the data-parsoid and/or data-mw |
17 | * information can be used. |
18 | * |
19 | * Note that the parsoid/mw properties of the page bundle are in "serialized |
20 | * array" form; that is, they are flat arrays appropriate for json-encoding |
21 | * and do not contain DataParsoid or DataMw objects. |
22 | * |
23 | * See DomPageBundle for a similar structure used where the HTML string |
24 | * has been parsed into a DOM. |
25 | */ |
26 | class PageBundle implements JsonCodecable { |
27 | use JsonCodecableTrait; |
28 | |
29 | /** The document, as an HTML string. */ |
30 | public string $html; |
31 | |
32 | /** |
33 | * A map from ID to the array serialization of DataParsoid for the Node |
34 | * with that ID. |
35 | * |
36 | * @var null|array{counter?:int,offsetType?:'byte'|'ucs2'|'char',ids:array<string,array>} |
37 | */ |
38 | public $parsoid; |
39 | |
40 | /** |
41 | * A map from ID to the array serialization of DataMw for the Node |
42 | * with that ID. |
43 | * |
44 | * @var null|array{ids:array<string,array>} |
45 | */ |
46 | public $mw; |
47 | |
48 | /** @var ?string */ |
49 | public $version; |
50 | |
51 | /** |
52 | * A map of HTTP headers: both name and value should be strings. |
53 | * @var array<string,string>|null |
54 | */ |
55 | public $headers; |
56 | |
57 | /** @var string|null */ |
58 | public $contentmodel; |
59 | |
60 | public function __construct( |
61 | string $html, ?array $parsoid = null, ?array $mw = null, |
62 | ?string $version = null, ?array $headers = null, |
63 | ?string $contentmodel = null |
64 | ) { |
65 | $this->html = $html; |
66 | $this->parsoid = $parsoid; |
67 | $this->mw = $mw; |
68 | $this->version = $version; |
69 | $this->headers = $headers; |
70 | $this->contentmodel = $contentmodel; |
71 | } |
72 | |
73 | public static function newEmpty( |
74 | string $html, |
75 | ?string $version = null, |
76 | ?array $headers = null, |
77 | ?string $contentmodel = null |
78 | ): self { |
79 | return new PageBundle( |
80 | $html, |
81 | [ |
82 | 'counter' => -1, |
83 | 'ids' => [], |
84 | ], |
85 | [ |
86 | 'ids' => [], |
87 | ], |
88 | $version, |
89 | $headers, |
90 | $contentmodel |
91 | ); |
92 | } |
93 | |
94 | /** |
95 | * Check if this pagebundle is valid. |
96 | * @param string $contentVersion Document content version to validate against. |
97 | * @param ?string &$errorMessage Error message will be returned here. |
98 | * @return bool |
99 | */ |
100 | public function validate( |
101 | string $contentVersion, ?string &$errorMessage = null |
102 | ) { |
103 | if ( !$this->parsoid || !isset( $this->parsoid['ids'] ) ) { |
104 | $errorMessage = 'Invalid data-parsoid was provided.'; |
105 | return false; |
106 | } elseif ( Semver::satisfies( $contentVersion, '^999.0.0' ) |
107 | && ( !$this->mw || !isset( $this->mw['ids'] ) ) |
108 | ) { |
109 | $errorMessage = 'Invalid data-mw was provided.'; |
110 | return false; |
111 | } |
112 | return true; |
113 | } |
114 | |
115 | /** |
116 | * @return array |
117 | */ |
118 | public function responseData() { |
119 | $version = $this->version ?? '0.0.0'; |
120 | $responseData = [ |
121 | 'contentmodel' => $this->contentmodel ?? '', |
122 | 'html' => [ |
123 | 'headers' => array_merge( [ |
124 | 'content-type' => 'text/html; charset=utf-8; ' |
125 | . 'profile="https://www.mediawiki.org/wiki/Specs/HTML/' |
126 | . $version . '"', |
127 | ], $this->headers ?? [] ), |
128 | 'body' => $this->html, |
129 | ], |
130 | 'data-parsoid' => [ |
131 | 'headers' => [ |
132 | 'content-type' => 'application/json; charset=utf-8; ' |
133 | . 'profile="https://www.mediawiki.org/wiki/Specs/data-parsoid/' |
134 | . $version . '"', |
135 | ], |
136 | 'body' => $this->parsoid, |
137 | ], |
138 | ]; |
139 | if ( Semver::satisfies( $version, '^999.0.0' ) ) { |
140 | $responseData['data-mw'] = [ |
141 | 'headers' => [ |
142 | 'content-type' => 'application/json; charset=utf-8; ' . |
143 | 'profile="https://www.mediawiki.org/wiki/Specs/data-mw/' . |
144 | $version . '"', |
145 | ], |
146 | 'body' => $this->mw, |
147 | ]; |
148 | } |
149 | return $responseData; |
150 | } |
151 | |
152 | /** |
153 | * Convert a DomPageBundle to a PageBundle. |
154 | * |
155 | * This serializes the DOM from the DomPageBundle, with the given $options. |
156 | * The options can also provide defaults for content version, headers, |
157 | * content model, and offsetType if they weren't already set in the |
158 | * DomPageBundle. |
159 | * |
160 | * @param DomPageBundle $dpb |
161 | * @param array $options XMLSerializer options |
162 | * @return PageBundle |
163 | */ |
164 | public static function fromDomPageBundle( DomPageBundle $dpb, array $options = [] ): PageBundle { |
165 | $node = $dpb->doc; |
166 | if ( $options['body_only'] ?? false ) { |
167 | $node = DOMCompat::getBody( $dpb->doc ); |
168 | $options += [ 'innerXML' => true ]; |
169 | } |
170 | $out = XMLSerializer::serialize( $node, $options ); |
171 | $pb = new PageBundle( |
172 | $out['html'], |
173 | $dpb->parsoid, |
174 | $dpb->mw, |
175 | $dpb->version ?? $options['contentversion'] ?? null, |
176 | $dpb->headers ?? $options['headers'] ?? null, |
177 | $dpb->contentmodel ?? $options['contentmodel'] ?? null |
178 | ); |
179 | if ( isset( $options['offsetType'] ) ) { |
180 | $pb->parsoid['offsetType'] ??= $options['offsetType']; |
181 | } |
182 | return $pb; |
183 | } |
184 | |
185 | /** |
186 | * Convert this PageBundle to "single document" form, where page bundle |
187 | * information is embedded in the <head> of the document. |
188 | * @param array $options XMLSerializer options |
189 | * @return string an HTML string |
190 | */ |
191 | public function toSingleDocumentHtml( array $options = [] ): string { |
192 | return DomPageBundle::fromPageBundle( $this ) |
193 | ->toSingleDocumentHtml( $options ); |
194 | } |
195 | |
196 | /** |
197 | * Convert this PageBundle to "inline attribute" form, where page bundle |
198 | * information is represented as inline JSON-valued attributes. |
199 | * @param array $options XMLSerializer options |
200 | * @return string an HTML string |
201 | */ |
202 | public function toInlineAttributeHtml( array $options = [] ): string { |
203 | return DomPageBundle::fromPageBundle( $this ) |
204 | ->toInlineAttributeHtml( $options ); |
205 | } |
206 | |
207 | // JsonCodecable ------------- |
208 | |
209 | /** @inheritDoc */ |
210 | public function toJsonArray(): array { |
211 | return [ |
212 | 'html' => $this->html, |
213 | 'parsoid' => $this->parsoid, |
214 | 'mw' => $this->mw, |
215 | 'version' => $this->version, |
216 | 'headers' => $this->headers, |
217 | 'contentmodel' => $this->contentmodel, |
218 | ]; |
219 | } |
220 | |
221 | /** @inheritDoc */ |
222 | public static function newFromJsonArray( array $json ): PageBundle { |
223 | return new PageBundle( |
224 | $json['html'] ?? '', |
225 | $json['parsoid'] ?? null, |
226 | $json['mw'] ?? null, |
227 | $json['version'] ?? null, |
228 | $json['headers'] ?? null, |
229 | $json['contentmodel'] ?? null |
230 | ); |
231 | } |
232 | } |