Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
0.00% |
0 / 62 |
|
0.00% |
0 / 8 |
CRAP | |
0.00% |
0 / 1 |
DataParsoid | |
0.00% |
0 / 62 |
|
0.00% |
0 / 8 |
870 | |
0.00% |
0 / 1 |
__clone | |
0.00% |
0 / 12 |
|
0.00% |
0 / 1 |
56 | |||
isModified | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
getTemp | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
2 | |||
getTempFlag | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
6 | |||
setTempFlag | |
0.00% |
0 / 8 |
|
0.00% |
0 / 1 |
20 | |||
toJsonArray | |
0.00% |
0 / 9 |
|
0.00% |
0 / 1 |
12 | |||
jsonClassHintFor | |
0.00% |
0 / 12 |
|
0.00% |
0 / 1 |
6 | |||
newFromJsonArray | |
0.00% |
0 / 17 |
|
0.00% |
0 / 1 |
90 |
1 | <?php |
2 | declare( strict_types = 1 ); |
3 | |
4 | namespace Wikimedia\Parsoid\NodeData; |
5 | |
6 | use Wikimedia\JsonCodec\Hint; |
7 | use Wikimedia\JsonCodec\JsonCodecable; |
8 | use Wikimedia\JsonCodec\JsonCodecableTrait; |
9 | use Wikimedia\Parsoid\Core\DomSourceRange; |
10 | use Wikimedia\Parsoid\Tokens\SourceRange; |
11 | use Wikimedia\Parsoid\Tokens\Token; |
12 | use Wikimedia\Parsoid\Utils\Utils; |
13 | |
14 | /** |
15 | * Parsoid data for a DOM node. Managed by DOMDataUtils::get/setDataParsoid(). |
16 | * |
17 | * To reduce memory usage, most the properties need to be undeclared, but we can |
18 | * use the property declarations below to satisfy phan and to provide type |
19 | * information to IDEs. |
20 | * |
21 | * TODO: Declaring common properties would be beneficial for memory usage, but |
22 | * changes the JSON serialized output and breaks tests. |
23 | * |
24 | * == Miscellaneous / General properties == |
25 | * |
26 | * Used to emit original wikitext in some scenarios (entities, placeholder spans) |
27 | * Porting note: this can be '0', handle emptiness checks with care |
28 | * @property string|null $src |
29 | * |
30 | * Tag widths for all tokens. |
31 | * Temporarily present in data-parsoid, but not in final DOM output. |
32 | * @see ComputeDSR::computeNodeDSR() |
33 | * @property SourceRange|null $tsr |
34 | * |
35 | * Wikitext source ranges that generated this DOM node. |
36 | * In the form [ start-offset, end-offset ] or |
37 | * [ start-offset, end-offset, start-tag-width, end-tag-width ]. |
38 | * |
39 | * Consider input wikitext: `abcdef ''foo'' something else`. Let us look at the `''foo''` |
40 | * part of the input. It generates `<i data-parsoid='{"dsr":[7,14,2,2]}'>foo</i>` . The dsr |
41 | * property of the data-parsoid attribute of this i-tag tells us the following. This HTML node |
42 | * maps to input wikitext substring 7..14. The opening tag <i> was 2 characters wide in wikitext |
43 | * and the closing tag </i> was also 2 characters wide in wikitext. |
44 | * @property DomSourceRange|null $dsr |
45 | * |
46 | * Denotes special syntax. Possible values: |
47 | * - 'html' for html tags. Ex: `<div>foo</div>` |
48 | * - 'row' for dt/dd that show on the same line. Ex: `;a:b` (but not `;a\n:b`) |
49 | * - 'piped' for piped wikilinks with explicit content Ex: `[[Foo|bar]]` (but not `[[Foo]]`) |
50 | * - 'magiclink', 'url' - legacy, not used anymore |
51 | * @property string|null $stx |
52 | * |
53 | * Template parameter infos produced by TemplateHandler. After unserialization, |
54 | * the objects are not fully populated. |
55 | * @property ParamInfo[][]|null $pi |
56 | * |
57 | * DOM fragment identifier for DocumentFragment tunneled through Tokens. |
58 | * The identifier here indexes into Env::$fragmentMap to map to a |
59 | * DocumentFragment. |
60 | * @property string|null $html |
61 | * |
62 | * On mw:Entity spans this is set to the decoded entity value. |
63 | * @property string|null $srcContent |
64 | * |
65 | * An array of associative arrays describing image rendering options, attached |
66 | * to the image container (span or figure). |
67 | * - ck: Canonical key for the image option. |
68 | * - ak: Aliased key. |
69 | * @property array|null $optList |
70 | * |
71 | * Rendered attributes (shadow info). The key is the attribute name. The value |
72 | * is documented as "mixed" but seems to be coerced to string in |
73 | * Sanitizer::sanitizeTagAttrs(). |
74 | * @property array|null $a Rendered attributes |
75 | * |
76 | * Source attributes (shadow info). The key is the attribute name. The value |
77 | * is documented as "mixed" but may possibly be a nullable string. |
78 | * @property array|null $sa Source attributes |
79 | * |
80 | * FIXME never written |
81 | * @property bool|null $strippedNL |
82 | * |
83 | * The number of extra dashes in the source of an hr |
84 | * @property int|null $extra_dashes |
85 | * |
86 | * The complete text of a double-underscore behavior switch |
87 | * @property string|null $magicSrc |
88 | * |
89 | * True if the input heading element had an id attribute, preventing automatic |
90 | * assignment of a new id attribute. |
91 | * @property bool|null $reusedId |
92 | * |
93 | * FIXME: Get rid of this property and the code that reads it after content |
94 | * version 2.2.0 has expired from caches. |
95 | * @property mixed $liHackSrc |
96 | * |
97 | * The link token associated with a redirect |
98 | * @property Token|null $linkTk |
99 | * |
100 | * On a meta mw:EmptyLine, the associated comment and whitespace tokens. Used |
101 | * in this sense by both the tokenizer and TokenStreamPatcher. |
102 | * @property array $tokens |
103 | * |
104 | * This is set to "extlink" on auto URL (external hotlink) image links. |
105 | * @property string|null $type |
106 | * |
107 | * On a meta mw:Placeholder/StrippedTag, this is the name of the stripped tag. |
108 | * @property string|null $name |
109 | * |
110 | * This is set on image containers in which a template expands to multiple |
111 | * image parameters. It is converted to a typeof attribute later in the same |
112 | * function, so it's unclear why it needs to persist in data-parsoid. |
113 | * @property bool|null $uneditable |
114 | * |
115 | * == WrapTemplates == |
116 | * |
117 | * The wikitext source which was not included in a template wrapper. |
118 | * @property string|null $unwrappedWT |
119 | * |
120 | * The token or DOM node name, optionally suffixed with the syntax name from |
121 | * $this->stx, of the first node within the encapsulated content. |
122 | * @property string|null $firstWikitextNode |
123 | * |
124 | * == Extensions == |
125 | * |
126 | * Offsets of opening and closing tags for extension tags, in the form |
127 | * [ opening tag start , closing tag end, opening tag width, closing tag width ] |
128 | * Temporarily present in data-parsoid, but not in final DOM output. |
129 | * @property DomSourceRange|null $extTagOffsets |
130 | * |
131 | * This is true on the extension output wrapper if the extension input wikitext |
132 | * was an empty string. Consumed by <references/>. |
133 | * @property bool $empty |
134 | * |
135 | * The reference group. This is attached to the <ol> or its wrapper <div>, |
136 | * redundantly with the data-mw-group attribute on the <ol>. It is produced by |
137 | * the extension's sourceToDom() and consumed by wtPostprocess(). |
138 | * @property string $group |
139 | * |
140 | * == Annotations == |
141 | * This is used on annotation meta tags to indicate that the corresponding |
142 | * tag has been moved compared to it's initial location defined by wikitext. |
143 | * An annotation tag can be moved either as the result of fostering or as |
144 | * the result of annotation range extension to enclose a contiguous DOM |
145 | * forest. |
146 | * @property bool|null $wasMoved |
147 | * |
148 | * == HTML tags == |
149 | * |
150 | * Are void tags self-closed? (Ex: `<br>` vs `<br />`) |
151 | * @property bool|null $selfClose |
152 | * |
153 | * Void tags that are not self-closed (Ex: `<br>`) |
154 | * @property bool|null $noClose |
155 | * |
156 | * Whether this start HTML tag has no corresponding wikitext and was auto-inserted by a token |
157 | * handler to generate well-formed html. Usually happens when a token handler fixes up misnesting. |
158 | * @property bool|null $autoInsertedStartToken |
159 | * |
160 | * Whether this end HTML tag has no corresponding wikitext and was auto-inserted by a token |
161 | * handler to generate well-formed html. Usually happens when a token handler fixes up misnesting. |
162 | * @property bool|null $autoInsertedEndToken |
163 | * |
164 | * Whether this start HTML tag has no corresponding wikitext and was auto-inserted to generate |
165 | * well-formed html. Usually happens when treebuilder fixes up badly nested HTML. |
166 | * @property bool|null $autoInsertedStart |
167 | * |
168 | * Whether this end HTML tag has no corresponding wikitext and was auto-inserted to generate |
169 | * well-formed html. Ex: `<tr>`, `<th>`, `<td>`, `<li>`, etc. that have no explicit closing |
170 | * markup. Or, html tags that aren't closed. |
171 | * @property bool|null $autoInsertedEnd |
172 | * |
173 | * Source tag name for HTML tags. Records case variations (`<div>` vs `<DiV>` vs `<DIV>`). |
174 | * @property string|null $srcTagName |
175 | * |
176 | * UnpackDomFragments sets this on misnested elements |
177 | * @property bool|null $misnested |
178 | * |
179 | * This is set by MarkFosteredContent to indicate fostered content and content |
180 | * wrappers. |
181 | * @property bool|null $fostered |
182 | * |
183 | * == Links == |
184 | * |
185 | * Link trail source (Ex: the "l" in `[[Foo]]l`) |
186 | * Porting note: this can be '0', handle emptiness checks with care |
187 | * @property string|null $tail |
188 | * |
189 | * Link prefix source |
190 | * Porting note: this can be '0', handle emptiness checks with care |
191 | * @property string|null $prefix |
192 | * |
193 | * True if the link was a pipetrick (`[[Foo|]]`). |
194 | * @note This will likely be removed soon since this should not show up in saved wikitext since |
195 | * this is a pre-save transformation trick. |
196 | * @property bool|null $pipeTrick |
197 | * |
198 | * Did the link use interwiki syntax? |
199 | * Probably redundant with the rel=mw:WikiLink/Interwiki |
200 | * @property bool|null $isIW |
201 | * |
202 | * == Tables == |
203 | * |
204 | * Source for start-text separators in table wikitext. |
205 | * @property string|null $startTagSrc |
206 | * |
207 | * Source for end-text separators in table wikitext. |
208 | * @property string|null $endTagSrc |
209 | * |
210 | * Source for attribute-text separators in table wikitext. |
211 | * @property string|null $attrSepSrc |
212 | * |
213 | * 'row' for td/th cells that show up on the same line, null otherwise |
214 | * @property string|null $stx_v |
215 | * |
216 | * == Language variant token properties == |
217 | * |
218 | * @property array|null $flags Flags with their human-readable names |
219 | * @property array|null $variants The variant names |
220 | * @property array|null $original Original flags |
221 | * @property array|null $flagSp Spaces around flags, uncompressed |
222 | * |
223 | * An array of associative arrays describing the parts of the variant rule. |
224 | * - text: (string) The text |
225 | * - semi: (bool) A semicolon marker |
226 | * - sp: (array|string) An array of strings containing spaces |
227 | * - oneway: (bool) A one-way rule definition |
228 | * - twoway: (bool) A two-way rule definition |
229 | * - from: (array) An associative array: |
230 | * - tokens: (array) A token array |
231 | * - srcOffsets: SourceRange |
232 | * - to: (array) An associative array same as "from" |
233 | * - lang: (string) |
234 | * @property array|null $texts |
235 | * |
236 | * == Language variant data-parsoid properties == |
237 | * |
238 | * @property array|null $flSp Spaces around flags, compressed with compressSpArray(). |
239 | * @property array|null $tSp Spaces around texts, compressed with compressSpArray(). |
240 | * @property array|null $fl Original flags, copied from $this->original on the token. |
241 | */ |
242 | #[\AllowDynamicProperties] |
243 | class DataParsoid implements JsonCodecable { |
244 | use JsonCodecableTrait; |
245 | |
246 | /** |
247 | * Holds a number of transient properties in the wt->html pipeline to pass information between |
248 | * stages. Dropped before serialization. |
249 | */ |
250 | public ?TempData $tmp; |
251 | |
252 | /** |
253 | * Deeply clone this object |
254 | */ |
255 | public function __clone() { |
256 | // Properties that need deep cloning |
257 | if ( isset( $this->tmp ) ) { |
258 | $this->tmp = Utils::clone( $this->tmp ); |
259 | } |
260 | if ( isset( $this->linkTk ) ) { |
261 | $this->linkTk = Utils::clone( $this->linkTk ); |
262 | } |
263 | if ( isset( $this->tokens ) ) { |
264 | $this->tokens = Utils::clone( $this->tokens ); |
265 | } |
266 | |
267 | // Properties that can use PHP cloning |
268 | if ( isset( $this->tsr ) ) { |
269 | $this->tsr = clone $this->tsr; |
270 | } |
271 | if ( isset( $this->dsr ) ) { |
272 | $this->dsr = clone $this->dsr; |
273 | } |
274 | if ( isset( $this->extTagOffsets ) ) { |
275 | $this->extTagOffsets = clone $this->extTagOffsets; |
276 | } |
277 | } |
278 | |
279 | public function isModified(): bool { |
280 | // NOTE: strict equality will not work in this comparison |
281 | // @phan-suppress-next-line PhanPluginComparisonObjectEqualityNotStrict |
282 | return $this != new self; |
283 | } |
284 | |
285 | /** |
286 | * Get a lazy-initialized object to which temporary properties can be written. |
287 | * @return TempData |
288 | */ |
289 | public function getTemp(): TempData { |
290 | // tmp can be unset despite being declared |
291 | $this->tmp ??= new TempData(); |
292 | return $this->tmp; |
293 | } |
294 | |
295 | /** |
296 | * Check whether a bit is set in $this->tmp->bits |
297 | * |
298 | * @param int $flag |
299 | * @return bool |
300 | */ |
301 | public function getTempFlag( $flag ): bool { |
302 | return isset( $this->tmp ) && ( $this->tmp->bits & $flag ); |
303 | } |
304 | |
305 | /** |
306 | * Set a bit in $this->tmp->bits |
307 | * |
308 | * @param int $flag |
309 | * @param bool $value |
310 | */ |
311 | public function setTempFlag( $flag, $value = true ): void { |
312 | if ( $value ) { |
313 | if ( !isset( $this->tmp ) ) { |
314 | $tmp = new TempData; |
315 | $tmp->bits = $flag; |
316 | $this->tmp = $tmp; |
317 | } else { |
318 | $this->tmp->bits |= $flag; |
319 | } |
320 | } elseif ( isset( $this->tmp ) ) { |
321 | $this->tmp->bits &= ~$flag; |
322 | } |
323 | } |
324 | |
325 | /** @inheritDoc */ |
326 | public function toJsonArray(): array { |
327 | static $clearNullsFrom = [ |
328 | 'dsr', 'tsr', 'extTagOffsets', |
329 | ]; |
330 | $result = (array)$this; |
331 | unset( $result['tmp'] ); |
332 | // Conciseness: don't include `null` values from certain properties. |
333 | foreach ( $clearNullsFrom as $prop ) { |
334 | if ( !isset( $result[$prop] ) ) { |
335 | unset( $result[$prop] ); |
336 | } |
337 | } |
338 | return $result; |
339 | } |
340 | |
341 | /** @inheritDoc */ |
342 | public static function jsonClassHintFor( string $keyname ) { |
343 | static $hints = null; |
344 | if ( $hints === null ) { |
345 | $dsr = DomSourceRange::hint(); |
346 | $sr = SourceRange::hint(); |
347 | $hints = [ |
348 | 'dsr' => $dsr, |
349 | 'extTagOffsets' => $dsr, |
350 | 'tsr' => $sr, |
351 | 'pi' => Hint::build( ParamInfo::class, Hint::LIST, Hint::LIST ), |
352 | 'linkTk' => Token::class, |
353 | ]; |
354 | } |
355 | return $hints[$keyname] ?? null; |
356 | } |
357 | |
358 | /** @inheritDoc */ |
359 | public static function newFromJsonArray( array $json ): DataParsoid { |
360 | $dp = new DataParsoid; |
361 | foreach ( $json as $key => $value ) { |
362 | switch ( $key ) { |
363 | case 'dsr': |
364 | case 'extTagOffsets': |
365 | case 'tsr': |
366 | // For backward compatibility, leave these unset if null. |
367 | if ( $value !== null ) { |
368 | $dp->$key = $value; |
369 | } |
370 | break; |
371 | case 'tmp': |
372 | // This isn't serialized, but we can deserialize it |
373 | // for tests. |
374 | $tmp = new TempData; |
375 | foreach ( $value as $key2 => $value2 ) { |
376 | $tmp->$key2 = $value2; |
377 | } |
378 | $dp->$key = $tmp; |
379 | break; |
380 | default: |
381 | $dp->$key = $value; |
382 | break; |
383 | } |
384 | } |
385 | return $dp; |
386 | } |
387 | } |