Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
0.00% |
0 / 28 |
|
0.00% |
0 / 5 |
CRAP | |
0.00% |
0 / 1 |
DataParsoid | |
0.00% |
0 / 28 |
|
0.00% |
0 / 5 |
272 | |
0.00% |
0 / 1 |
clone | |
0.00% |
0 / 16 |
|
0.00% |
0 / 1 |
72 | |||
isModified | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
getTemp | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
2 | |||
getTempFlag | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
6 | |||
setTempFlag | |
0.00% |
0 / 8 |
|
0.00% |
0 / 1 |
20 |
1 | <?php |
2 | declare( strict_types = 1 ); |
3 | |
4 | namespace Wikimedia\Parsoid\NodeData; |
5 | |
6 | use Wikimedia\Parsoid\Core\DomSourceRange; |
7 | use Wikimedia\Parsoid\Tokens\SourceRange; |
8 | use Wikimedia\Parsoid\Tokens\Token; |
9 | use Wikimedia\Parsoid\Utils\Utils; |
10 | |
11 | /** |
12 | * Parsoid data for a DOM node. Managed by DOMDataUtils::get/setDataParsoid(). |
13 | * |
14 | * To reduce memory usage, most the properties need to be undeclared, but we can |
15 | * use the property declarations below to satisfy phan and to provide type |
16 | * information to IDEs. |
17 | * |
18 | * TODO: Declaring common properties would be beneficial for memory usage, but |
19 | * changes the JSON serialized output and breaks tests. |
20 | * |
21 | * == Miscellaneous / General properties == |
22 | * |
23 | * Used to emit original wikitext in some scenarios (entities, placeholder spans) |
24 | * Porting note: this can be '0', handle emptiness checks with care |
25 | * @property string|null $src |
26 | * |
27 | * Tag widths for all tokens. |
28 | * Temporarily present in data-parsoid, but not in final DOM output. |
29 | * @see ComputeDSR::computeNodeDSR() |
30 | * @property SourceRange|null $tsr |
31 | * |
32 | * Wikitext source ranges that generated this DOM node. |
33 | * In the form [ start-offset, end-offset ] or |
34 | * [ start-offset, end-offset, start-tag-width, end-tag-width ]. |
35 | * |
36 | * Consider input wikitext: `abcdef ''foo'' something else`. Let us look at the `''foo''` |
37 | * part of the input. It generates `<i data-parsoid='{"dsr":[7,14,2,2]}'>foo</i>` . The dsr |
38 | * property of the data-parsoid attribute of this i-tag tells us the following. This HTML node |
39 | * maps to input wikitext substring 7..14. The opening tag <i> was 2 characters wide in wikitext |
40 | * and the closing tag </i> was also 2 characters wide in wikitext. |
41 | * @property DomSourceRange|null $dsr |
42 | * |
43 | * Denotes special syntax. Possible values: |
44 | * - 'html' for html tags. Ex: `<div>foo</div>` |
45 | * - 'row' for dt/dd that show on the same line. Ex: `;a:b` (but not `;a\n:b`) |
46 | * - 'piped' for piped wikilinks with explicit content Ex: `[[Foo|bar]]` (but not `[[Foo]]`) |
47 | * - 'magiclink', 'url' - legacy, not used anymore |
48 | * @property string|null $stx |
49 | * |
50 | * Template parameter infos produced by TemplateHandler. After unserialization, |
51 | * the objects are not fully populated. |
52 | * @property ParamInfo[][]|null $pi |
53 | * |
54 | * Expanded template HTML (native preprocessor only). |
55 | * @property string|null $html |
56 | * |
57 | * On mw:Entity spans this is set to the decoded entity value. |
58 | * @property string|null $srcContent |
59 | * |
60 | * An array of associative arrays describing image rendering options, attached |
61 | * to the image container (span or figure). |
62 | * - ck: Canonical key for the image option. |
63 | * - ak: Aliased key. |
64 | * @property array|null $optList |
65 | * |
66 | * Rendered attributes (shadow info). The key is the attribute name. The value |
67 | * is documented as "mixed" but seems to be coerced to string in |
68 | * Sanitizer::sanitizeTagAttrs(). |
69 | * @property array|null $a Rendered attributes |
70 | * |
71 | * Source attributes (shadow info). The key is the attribute name. The value |
72 | * is documented as "mixed" but may possibly be a nullable string. |
73 | * @property array|null $sa Source attributes |
74 | * |
75 | * FIXME never written |
76 | * @property bool|null $strippedNL |
77 | * |
78 | * The number of extra dashes in the source of an hr |
79 | * @property int|null $extra_dashes |
80 | * |
81 | * The complete text of a double-underscore behavior switch |
82 | * @property string|null $magicSrc |
83 | * |
84 | * True if the input heading element had an id attribute, preventing automatic |
85 | * assignment of a new id attribute. |
86 | * @property bool|null $reusedId |
87 | * |
88 | * FIXME: Get rid of this property and the code that reads it after content |
89 | * version 2.2.0 has expired from caches. |
90 | * @property mixed $liHackSrc |
91 | * |
92 | * The link token associated with a redirect |
93 | * @property Token|null $linkTk |
94 | * |
95 | * On a meta mw:EmptyLine, the associated comment and whitespace tokens. Used |
96 | * in this sense by both the tokenizer and TokenStreamPatcher. |
97 | * @property array $tokens |
98 | * |
99 | * This is set to "extlink" on auto URL (external hotlink) image links. |
100 | * @property string|null $type |
101 | * |
102 | * On a meta mw:Placeholder/StrippedTag, this is the name of the stripped tag. |
103 | * @property string|null $name |
104 | * |
105 | * This is set on image containers in which a template expands to multiple |
106 | * image parameters. It is converted to a typeof attribute later in the same |
107 | * function, so it's unclear why it needs to persist in data-parsoid. |
108 | * @property bool|null $uneditable |
109 | * |
110 | * == WrapTemplates == |
111 | * |
112 | * The wikitext source which was not included in a template wrapper. |
113 | * @property string|null $unwrappedWT |
114 | * |
115 | * The token or DOM node name, optionally suffixed with the syntax name from |
116 | * $this->stx, of the first node within the encapsulated content. |
117 | * @property string|null $firstWikitextNode |
118 | * |
119 | * == Extensions == |
120 | * |
121 | * Offsets of opening and closing tags for extension tags, in the form |
122 | * [ opening tag start , closing tag end, opening tag width, closing tag width ] |
123 | * Temporarily present in data-parsoid, but not in final DOM output. |
124 | * @property DomSourceRange|null $extTagOffsets |
125 | * |
126 | * This is true on the extension output wrapper if the extension input wikitext |
127 | * was an empty string. Consumed by <references/>. |
128 | * @property bool $empty |
129 | * |
130 | * The reference group. This is attached to the <ol> or its wrapper <div>, |
131 | * redundantly with the data-mw-group attribute on the <ol>. It is produced by |
132 | * the extension's sourceToDom() and consumed by wtPostprocess(). |
133 | * @property string $group |
134 | * |
135 | * == Annotations == |
136 | * This is used on annotation meta tags to indicate that the corresponding |
137 | * tag has been moved compared to it's initial location defined by wikitext. |
138 | * An annotation tag can be moved either as the result of fostering or as |
139 | * the result of annotation range extension to enclose a contiguous DOM |
140 | * forest. |
141 | * @property bool|null $wasMoved |
142 | * |
143 | * == HTML tags == |
144 | * |
145 | * Are void tags self-closed? (Ex: `<br>` vs `<br />`) |
146 | * @property bool|null $selfClose |
147 | * |
148 | * Void tags that are not self-closed (Ex: `<br>`) |
149 | * @property bool|null $noClose |
150 | * |
151 | * Whether this start HTML tag has no corresponding wikitext and was auto-inserted by a token |
152 | * handler to generate well-formed html. Usually happens when a token handler fixes up misnesting. |
153 | * @property bool|null $autoInsertedStartToken |
154 | * |
155 | * Whether this end HTML tag has no corresponding wikitext and was auto-inserted by a token |
156 | * handler to generate well-formed html. Usually happens when a token handler fixes up misnesting. |
157 | * @property bool|null $autoInsertedEndToken |
158 | * |
159 | * Whether this start HTML tag has no corresponding wikitext and was auto-inserted to generate |
160 | * well-formed html. Usually happens when treebuilder fixes up badly nested HTML. |
161 | * @property bool|null $autoInsertedStart |
162 | * |
163 | * Whether this end HTML tag has no corresponding wikitext and was auto-inserted to generate |
164 | * well-formed html. Ex: `<tr>`, `<th>`, `<td>`, `<li>`, etc. that have no explicit closing |
165 | * markup. Or, html tags that aren't closed. |
166 | * @property bool|null $autoInsertedEnd |
167 | * |
168 | * Source tag name for HTML tags. Records case variations (`<div>` vs `<DiV>` vs `<DIV>`). |
169 | * @property string|null $srcTagName |
170 | * |
171 | * UnpackDomFragments sets this on misnested elements |
172 | * @property bool|null $misnested |
173 | * |
174 | * This is set by MarkFosteredContent to indicate fostered content and content |
175 | * wrappers. |
176 | * @property bool|null $fostered |
177 | * |
178 | * == Links == |
179 | * |
180 | * Link trail source (Ex: the "l" in `[[Foo]]l`) |
181 | * Porting note: this can be '0', handle emptiness checks with care |
182 | * @property string|null $tail |
183 | * |
184 | * Link prefix source |
185 | * Porting note: this can be '0', handle emptiness checks with care |
186 | * @property string|null $prefix |
187 | * |
188 | * True if the link was a pipetrick (`[[Foo|]]`). |
189 | * @note This will likely be removed soon since this should not show up in saved wikitext since |
190 | * this is a pre-save transformation trick. |
191 | * @property bool|null $pipeTrick |
192 | * |
193 | * Offsets of external link content. |
194 | * Temporarily present in data-parsoid, but not in final DOM output. |
195 | * @property SourceRange|null $extLinkContentOffsets |
196 | * |
197 | * Did the link use interwiki syntax? |
198 | * Probably redundant with the rel=mw:WikiLink/Interwiki |
199 | * @property bool|null $isIW |
200 | * |
201 | * == Tables == |
202 | * |
203 | * Source for start-text separators in table wikitext. |
204 | * @property string|null $startTagSrc |
205 | * |
206 | * Source for end-text separators in table wikitext. |
207 | * @property string|null $endTagSrc |
208 | * |
209 | * Source for attribute-text separators in table wikitext. |
210 | * @property string|null $attrSepSrc |
211 | * |
212 | * 'row' for td/th cells that show up on the same line, null otherwise |
213 | * @property string|null $stx_v |
214 | * |
215 | * == Language variant token properties == |
216 | * |
217 | * @property array|null $flags Flags with their human-readable names |
218 | * @property array|null $variants The variant names |
219 | * @property array|null $original Original flags |
220 | * @property array|null $flagSp Spaces around flags, uncompressed |
221 | * |
222 | * An array of associative arrays describing the parts of the variant rule. |
223 | * - text: (string) The text |
224 | * - semi: (bool) A semicolon marker |
225 | * - sp: (array|string) An array of strings containing spaces |
226 | * - oneway: (bool) A one-way rule definition |
227 | * - twoway: (bool) A two-way rule definition |
228 | * - from: (array) An associative array: |
229 | * - tokens: (array) A token array |
230 | * - srcOffsets: SourceRange |
231 | * - to: (array) An associative array same as "from" |
232 | * - lang: (string) |
233 | * @property array|null $texts |
234 | * |
235 | * == Language variant data-parsoid properties == |
236 | * |
237 | * @property array|null $flSp Spaces around flags, compressed with compressSpArray(). |
238 | * @property array|null $tSp Spaces around texts, compressed with compressSpArray(). |
239 | * @property array|null $fl Original flags, copied from $this->original on the token. |
240 | */ |
241 | #[\AllowDynamicProperties] |
242 | class DataParsoid { |
243 | /** |
244 | * Holds a number of transient properties in the wt->html pipeline to pass information between |
245 | * stages. Dropped before serialization. |
246 | * @var TempData|null |
247 | */ |
248 | public $tmp; |
249 | |
250 | /** |
251 | * Deeply clone this object |
252 | * |
253 | * @return DataParsoid |
254 | */ |
255 | public function clone(): self { |
256 | $dp = clone $this; |
257 | // Properties that need deep cloning |
258 | if ( isset( $dp->tmp ) ) { |
259 | $dp->tmp = Utils::clone( $dp->tmp ); |
260 | } |
261 | if ( isset( $dp->linkTk ) ) { |
262 | $dp->linkTk = Utils::clone( $dp->linkTk ); |
263 | } |
264 | if ( isset( $dp->tokens ) ) { |
265 | $dp->tokens = Utils::clone( $dp->tokens ); |
266 | } |
267 | |
268 | // Properties that need shallow cloning |
269 | if ( isset( $dp->tsr ) ) { |
270 | $dp->tsr = clone $dp->tsr; |
271 | } |
272 | if ( isset( $dp->dsr ) ) { |
273 | $dp->dsr = clone $dp->dsr; |
274 | } |
275 | if ( isset( $dp->extTagOffsets ) ) { |
276 | $dp->extTagOffsets = clone $dp->extTagOffsets; |
277 | } |
278 | if ( isset( $dp->extLinkContentOffsets ) ) { |
279 | $dp->extLinkContentOffsets = clone $dp->extLinkContentOffsets; |
280 | } |
281 | |
282 | // The remaining properties were sufficiently handled by the clone operator |
283 | return $dp; |
284 | } |
285 | |
286 | public function isModified(): bool { |
287 | // NOTE: strict equality will not work in this comparison |
288 | // @phan-suppress-next-line PhanPluginComparisonObjectEqualityNotStrict |
289 | return $this != new self; |
290 | } |
291 | |
292 | /** |
293 | * Get a lazy-initialized object to which temporary properties can be written. |
294 | * @return TempData |
295 | */ |
296 | public function getTemp(): TempData { |
297 | // tmp can be unset despite being declared |
298 | $this->tmp ??= new TempData(); |
299 | return $this->tmp; |
300 | } |
301 | |
302 | /** |
303 | * Check whether a bit is set in $this->tmp->bits |
304 | * |
305 | * @param int $flag |
306 | * @return bool |
307 | */ |
308 | public function getTempFlag( $flag ): bool { |
309 | return isset( $this->tmp ) && ( $this->tmp->bits & $flag ); |
310 | } |
311 | |
312 | /** |
313 | * Set a bit in $this->tmp->bits |
314 | * |
315 | * @param int $flag |
316 | * @param bool $value |
317 | */ |
318 | public function setTempFlag( $flag, $value = true ): void { |
319 | if ( $value ) { |
320 | if ( !isset( $this->tmp ) ) { |
321 | $tmp = new TempData; |
322 | $tmp->bits = $flag; |
323 | $this->tmp = $tmp; |
324 | } else { |
325 | $this->tmp->bits |= $flag; |
326 | } |
327 | } elseif ( isset( $this->tmp ) ) { |
328 | $this->tmp->bits &= ~$flag; |
329 | } |
330 | } |
331 | } |