Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
0.00% |
0 / 62 |
|
0.00% |
0 / 8 |
CRAP | |
0.00% |
0 / 1 |
DataParsoid | |
0.00% |
0 / 62 |
|
0.00% |
0 / 8 |
870 | |
0.00% |
0 / 1 |
__clone | |
0.00% |
0 / 12 |
|
0.00% |
0 / 1 |
56 | |||
isModified | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
getTemp | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
2 | |||
getTempFlag | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
6 | |||
setTempFlag | |
0.00% |
0 / 8 |
|
0.00% |
0 / 1 |
20 | |||
toJsonArray | |
0.00% |
0 / 9 |
|
0.00% |
0 / 1 |
12 | |||
jsonClassHintFor | |
0.00% |
0 / 12 |
|
0.00% |
0 / 1 |
6 | |||
newFromJsonArray | |
0.00% |
0 / 17 |
|
0.00% |
0 / 1 |
90 |
1 | <?php |
2 | declare( strict_types = 1 ); |
3 | |
4 | namespace Wikimedia\Parsoid\NodeData; |
5 | |
6 | use Wikimedia\JsonCodec\Hint; |
7 | use Wikimedia\JsonCodec\JsonCodecable; |
8 | use Wikimedia\JsonCodec\JsonCodecableTrait; |
9 | use Wikimedia\Parsoid\Core\DomSourceRange; |
10 | use Wikimedia\Parsoid\Tokens\SourceRange; |
11 | use Wikimedia\Parsoid\Tokens\Token; |
12 | use Wikimedia\Parsoid\Utils\Utils; |
13 | |
14 | /** |
15 | * Parsoid data for a DOM node. Managed by DOMDataUtils::get/setDataParsoid(). |
16 | * |
17 | * To reduce memory usage, most the properties need to be undeclared, but we can |
18 | * use the property declarations below to satisfy phan and to provide type |
19 | * information to IDEs. |
20 | * |
21 | * TODO: Declaring common properties would be beneficial for memory usage, but |
22 | * changes the JSON serialized output and breaks tests. |
23 | * |
24 | * == Miscellaneous / General properties == |
25 | * |
26 | * Used to emit original wikitext in some scenarios (entities, placeholder spans) |
27 | * Porting note: this can be '0', handle emptiness checks with care |
28 | * @property string|null $src |
29 | * |
30 | * Tag widths for all tokens. |
31 | * Temporarily present in data-parsoid, but not in final DOM output. |
32 | * @see ComputeDSR::computeNodeDSR() |
33 | * @property SourceRange|null $tsr |
34 | * |
35 | * Wikitext source ranges that generated this DOM node. |
36 | * In the form [ start-offset, end-offset ] or |
37 | * [ start-offset, end-offset, start-tag-width, end-tag-width ]. |
38 | * |
39 | * Consider input wikitext: `abcdef ''foo'' something else`. Let us look at the `''foo''` |
40 | * part of the input. It generates `<i data-parsoid='{"dsr":[7,14,2,2]}'>foo</i>` . The dsr |
41 | * property of the data-parsoid attribute of this i-tag tells us the following. This HTML node |
42 | * maps to input wikitext substring 7..14. The opening tag <i> was 2 characters wide in wikitext |
43 | * and the closing tag </i> was also 2 characters wide in wikitext. |
44 | * @property DomSourceRange|null $dsr |
45 | * |
46 | * Denotes special syntax. Possible values: |
47 | * - 'html' for html tags. Ex: `<div>foo</div>` |
48 | * - 'row' for dt/dd that show on the same line. Ex: `;a:b` (but not `;a\n:b`) |
49 | * - 'piped' for piped wikilinks with explicit content Ex: `[[Foo|bar]]` (but not `[[Foo]]`) |
50 | * - 'magiclink', 'url' - legacy, not used anymore |
51 | * @property string|null $stx |
52 | * |
53 | * Template parameter infos produced by TemplateHandler. After unserialization, |
54 | * the objects are not fully populated. |
55 | * @property ParamInfo[][]|null $pi |
56 | * |
57 | * DOM fragment identifier for DocumentFragment tunneled through Tokens. |
58 | * The identifier here indexes into Env::$fragmentMap to map to a |
59 | * DocumentFragment. |
60 | * @property string|null $html |
61 | * |
62 | * On mw:Entity spans this is set to the decoded entity value. |
63 | * @property string|null $srcContent |
64 | * |
65 | * An array of associative arrays describing image rendering options, attached |
66 | * to the image container (span or figure). |
67 | * - ck: Canonical key for the image option. |
68 | * - ak: Aliased key. |
69 | * @property array|null $optList |
70 | * |
71 | * Rendered attributes (shadow info). The key is the attribute name. The value |
72 | * is documented as "mixed" but seems to be coerced to string in |
73 | * Sanitizer::sanitizeTagAttrs(). |
74 | * @property array|null $a Rendered attributes |
75 | * |
76 | * Source attributes (shadow info). The key is the attribute name. The value |
77 | * is documented as "mixed" but may possibly be a nullable string. |
78 | * @property array|null $sa Source attributes |
79 | * |
80 | * The number of extra dashes in the source of an hr |
81 | * @property int|null $extra_dashes |
82 | * |
83 | * The complete text of a double-underscore behavior switch |
84 | * @property string|null $magicSrc |
85 | * |
86 | * True if the input heading element had an id attribute, preventing automatic |
87 | * assignment of a new id attribute. |
88 | * @property bool|null $reusedId |
89 | * |
90 | * The link token associated with a redirect |
91 | * @property Token|null $linkTk |
92 | * |
93 | * On a meta mw:EmptyLine, the associated comment and whitespace tokens. Used |
94 | * in this sense by both the tokenizer and TokenStreamPatcher. |
95 | * @property array $tokens |
96 | * |
97 | * This is set to "extlink" on auto URL (external hotlink) image links. |
98 | * @property string|null $type |
99 | * |
100 | * On a meta mw:Placeholder/StrippedTag, this is the name of the stripped tag. |
101 | * @property string|null $name |
102 | * |
103 | * This is set on image containers in which a template expands to multiple |
104 | * image parameters. It is converted to a typeof attribute later in the same |
105 | * function, so it's unclear why it needs to persist in data-parsoid. |
106 | * @property bool|null $uneditable |
107 | * |
108 | * == WrapTemplates == |
109 | * |
110 | * The wikitext source which was not included in a template wrapper. |
111 | * @property string|null $unwrappedWT |
112 | * |
113 | * The token or DOM node name, optionally suffixed with the syntax name from |
114 | * $this->stx, of the first node within the encapsulated content. |
115 | * @property string|null $firstWikitextNode |
116 | * |
117 | * == Extensions == |
118 | * |
119 | * Offsets of opening and closing tags for extension tags, in the form |
120 | * [ opening tag start , closing tag end, opening tag width, closing tag width ] |
121 | * Temporarily present in data-parsoid, but not in final DOM output. |
122 | * @property DomSourceRange|null $extTagOffsets |
123 | * |
124 | * This is true on the extension output wrapper if the extension input wikitext |
125 | * was an empty string. Consumed by <references/>. |
126 | * @property bool $empty |
127 | * |
128 | * The reference group. This is attached to the <ol> or its wrapper <div>, |
129 | * redundantly with the data-mw-group attribute on the <ol>. It is produced by |
130 | * the extension's sourceToDom() and consumed by wtPostprocess(). |
131 | * @property string $group |
132 | * |
133 | * == Annotations == |
134 | * This is used on annotation meta tags to indicate that the corresponding |
135 | * tag has been moved compared to it's initial location defined by wikitext. |
136 | * An annotation tag can be moved either as the result of fostering or as |
137 | * the result of annotation range extension to enclose a contiguous DOM |
138 | * forest. |
139 | * @property bool|null $wasMoved |
140 | * |
141 | * == HTML tags == |
142 | * |
143 | * Are void tags self-closed? (Ex: `<br>` vs `<br />`) |
144 | * @property bool|null $selfClose |
145 | * |
146 | * Void tags that are not self-closed (Ex: `<br>`) |
147 | * @property bool|null $noClose |
148 | * |
149 | * Whether this start HTML tag has no corresponding wikitext and was auto-inserted by a token |
150 | * handler to generate well-formed html. Usually happens when a token handler fixes up misnesting. |
151 | * @property bool|null $autoInsertedStartToken |
152 | * |
153 | * Whether this end HTML tag has no corresponding wikitext and was auto-inserted by a token |
154 | * handler to generate well-formed html. Usually happens when a token handler fixes up misnesting. |
155 | * @property bool|null $autoInsertedEndToken |
156 | * |
157 | * Whether this start HTML tag has no corresponding wikitext and was auto-inserted to generate |
158 | * well-formed html. Usually happens when treebuilder fixes up badly nested HTML. |
159 | * @property bool|null $autoInsertedStart |
160 | * |
161 | * Whether this end HTML tag has no corresponding wikitext and was auto-inserted to generate |
162 | * well-formed html. Ex: `<tr>`, `<th>`, `<td>`, `<li>`, etc. that have no explicit closing |
163 | * markup. Or, html tags that aren't closed. |
164 | * @property bool|null $autoInsertedEnd |
165 | * |
166 | * Source tag name for HTML tags. Records case variations (`<div>` vs `<DiV>` vs `<DIV>`). |
167 | * @property string|null $srcTagName |
168 | * |
169 | * UnpackDomFragments sets this on misnested elements |
170 | * @property bool|null $misnested |
171 | * |
172 | * This is set by MarkFosteredContent to indicate fostered content and content |
173 | * wrappers. |
174 | * @property bool|null $fostered |
175 | * |
176 | * == Links == |
177 | * |
178 | * Link trail source (Ex: the "l" in `[[Foo]]l`) |
179 | * Porting note: this can be '0', handle emptiness checks with care |
180 | * @property string|null $tail |
181 | * |
182 | * Link prefix source |
183 | * Porting note: this can be '0', handle emptiness checks with care |
184 | * @property string|null $prefix |
185 | * |
186 | * Did the link use interwiki syntax? |
187 | * Probably redundant with the rel=mw:WikiLink/Interwiki |
188 | * @property bool|null $isIW |
189 | * |
190 | * Source for first separator in a wikilink to account for variation |
191 | * Ex. [[Test{{!}}123]] |
192 | * @property string|null $firstPipeSrc |
193 | * |
194 | * == Tables == |
195 | * |
196 | * Source for start-text separators in table wikitext. |
197 | * @property string|null $startTagSrc |
198 | * |
199 | * Source for end-text separators in table wikitext. |
200 | * @property string|null $endTagSrc |
201 | * |
202 | * Source for attribute-text separators in table wikitext. |
203 | * @property string|null $attrSepSrc |
204 | * |
205 | * 'row' for td/th cells that show up on the same line, null otherwise |
206 | * @property string|null $stx_v |
207 | * |
208 | * == Language variant token properties == |
209 | * |
210 | * @property array|null $flags Flags with their human-readable names |
211 | * @property array|null $variants The variant names |
212 | * @property array|null $original Original flags |
213 | * @property array|null $flagSp Spaces around flags, uncompressed |
214 | * |
215 | * An array of associative arrays describing the parts of the variant rule. |
216 | * - text: (string) The text |
217 | * - semi: (bool) A semicolon marker |
218 | * - sp: (array|string) An array of strings containing spaces |
219 | * - oneway: (bool) A one-way rule definition |
220 | * - twoway: (bool) A two-way rule definition |
221 | * - from: (array) An associative array: |
222 | * - tokens: (array) A token array |
223 | * - srcOffsets: SourceRange |
224 | * - to: (array) An associative array same as "from" |
225 | * - lang: (string) |
226 | * @property array|null $texts |
227 | * |
228 | * == Language variant data-parsoid properties == |
229 | * |
230 | * @property array|null $flSp Spaces around flags, compressed with compressSpArray(). |
231 | * @property array|null $tSp Spaces around texts, compressed with compressSpArray(). |
232 | * @property array|null $fl Original flags, copied from $this->original on the token. |
233 | */ |
234 | #[\AllowDynamicProperties] |
235 | class DataParsoid implements JsonCodecable { |
236 | use JsonCodecableTrait; |
237 | |
238 | /** |
239 | * Holds a number of transient properties in the wt->html pipeline to pass information between |
240 | * stages. Dropped before serialization. |
241 | */ |
242 | public ?TempData $tmp; |
243 | |
244 | /** |
245 | * Deeply clone this object |
246 | */ |
247 | public function __clone() { |
248 | // Properties that need deep cloning |
249 | if ( isset( $this->tmp ) ) { |
250 | $this->tmp = Utils::clone( $this->tmp ); |
251 | } |
252 | if ( isset( $this->linkTk ) ) { |
253 | $this->linkTk = Utils::clone( $this->linkTk ); |
254 | } |
255 | if ( isset( $this->tokens ) ) { |
256 | $this->tokens = Utils::clone( $this->tokens ); |
257 | } |
258 | |
259 | // Properties that can use PHP cloning |
260 | if ( isset( $this->tsr ) ) { |
261 | $this->tsr = clone $this->tsr; |
262 | } |
263 | if ( isset( $this->dsr ) ) { |
264 | $this->dsr = clone $this->dsr; |
265 | } |
266 | if ( isset( $this->extTagOffsets ) ) { |
267 | $this->extTagOffsets = clone $this->extTagOffsets; |
268 | } |
269 | } |
270 | |
271 | public function isModified(): bool { |
272 | return $this->toJsonArray() !== []; |
273 | } |
274 | |
275 | /** |
276 | * Get a lazy-initialized object to which temporary properties can be written. |
277 | * @return TempData |
278 | */ |
279 | public function getTemp(): TempData { |
280 | // tmp can be unset despite being declared |
281 | $this->tmp ??= new TempData(); |
282 | return $this->tmp; |
283 | } |
284 | |
285 | /** |
286 | * Check whether a bit is set in $this->tmp->bits |
287 | * |
288 | * @param int $flag |
289 | * @return bool |
290 | */ |
291 | public function getTempFlag( $flag ): bool { |
292 | return isset( $this->tmp ) && ( $this->tmp->bits & $flag ); |
293 | } |
294 | |
295 | /** |
296 | * Set a bit in $this->tmp->bits |
297 | * |
298 | * @param int $flag |
299 | * @param bool $value |
300 | */ |
301 | public function setTempFlag( $flag, $value = true ): void { |
302 | if ( $value ) { |
303 | if ( !isset( $this->tmp ) ) { |
304 | $tmp = new TempData; |
305 | $tmp->bits = $flag; |
306 | $this->tmp = $tmp; |
307 | } else { |
308 | $this->tmp->bits |= $flag; |
309 | } |
310 | } elseif ( isset( $this->tmp ) ) { |
311 | $this->tmp->bits &= ~$flag; |
312 | } |
313 | } |
314 | |
315 | /** @inheritDoc */ |
316 | public function toJsonArray(): array { |
317 | static $clearNullsFrom = [ |
318 | 'dsr', 'tsr', 'extTagOffsets', |
319 | ]; |
320 | $result = (array)$this; |
321 | unset( $result['tmp'] ); |
322 | // Conciseness: don't include `null` values from certain properties. |
323 | foreach ( $clearNullsFrom as $prop ) { |
324 | if ( !isset( $result[$prop] ) ) { |
325 | unset( $result[$prop] ); |
326 | } |
327 | } |
328 | return $result; |
329 | } |
330 | |
331 | /** @inheritDoc */ |
332 | public static function jsonClassHintFor( string $keyname ) { |
333 | static $hints = null; |
334 | if ( $hints === null ) { |
335 | $dsr = DomSourceRange::hint(); |
336 | $sr = SourceRange::hint(); |
337 | $hints = [ |
338 | 'dsr' => $dsr, |
339 | 'extTagOffsets' => $dsr, |
340 | 'tsr' => $sr, |
341 | 'pi' => Hint::build( ParamInfo::class, Hint::LIST, Hint::LIST ), |
342 | 'linkTk' => Token::class, |
343 | ]; |
344 | } |
345 | return $hints[$keyname] ?? null; |
346 | } |
347 | |
348 | /** @inheritDoc */ |
349 | public static function newFromJsonArray( array $json ): DataParsoid { |
350 | $dp = new DataParsoid; |
351 | foreach ( $json as $key => $value ) { |
352 | switch ( $key ) { |
353 | case 'dsr': |
354 | case 'extTagOffsets': |
355 | case 'tsr': |
356 | // For backward compatibility, leave these unset if null. |
357 | if ( $value !== null ) { |
358 | $dp->$key = $value; |
359 | } |
360 | break; |
361 | case 'tmp': |
362 | // This isn't serialized, but we can deserialize it |
363 | // for tests. |
364 | $tmp = new TempData; |
365 | foreach ( $value as $key2 => $value2 ) { |
366 | $tmp->$key2 = $value2; |
367 | } |
368 | $dp->$key = $tmp; |
369 | break; |
370 | default: |
371 | $dp->$key = $value; |
372 | break; |
373 | } |
374 | } |
375 | return $dp; |
376 | } |
377 | } |