Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
| Total | |
0.00% |
0 / 60 |
|
0.00% |
0 / 8 |
CRAP | |
0.00% |
0 / 1 |
| DataParsoid | |
0.00% |
0 / 60 |
|
0.00% |
0 / 8 |
870 | |
0.00% |
0 / 1 |
| __clone | |
0.00% |
0 / 9 |
|
0.00% |
0 / 1 |
56 | |||
| isModified | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
| getTemp | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
2 | |||
| getTempFlag | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
6 | |||
| setTempFlag | |
0.00% |
0 / 8 |
|
0.00% |
0 / 1 |
20 | |||
| toJsonArray | |
0.00% |
0 / 9 |
|
0.00% |
0 / 1 |
12 | |||
| jsonClassHintFor | |
0.00% |
0 / 13 |
|
0.00% |
0 / 1 |
6 | |||
| newFromJsonArray | |
0.00% |
0 / 17 |
|
0.00% |
0 / 1 |
90 | |||
| 1 | <?php |
| 2 | declare( strict_types = 1 ); |
| 3 | |
| 4 | namespace Wikimedia\Parsoid\NodeData; |
| 5 | |
| 6 | use Wikimedia\JsonCodec\Hint; |
| 7 | use Wikimedia\JsonCodec\JsonCodecable; |
| 8 | use Wikimedia\JsonCodec\JsonCodecableTrait; |
| 9 | use Wikimedia\Parsoid\Core\DomSourceRange; |
| 10 | use Wikimedia\Parsoid\DOM\DocumentFragment; |
| 11 | use Wikimedia\Parsoid\Tokens\SourceRange; |
| 12 | use Wikimedia\Parsoid\Tokens\Token; |
| 13 | use Wikimedia\Parsoid\Utils\DOMDataUtils; |
| 14 | use Wikimedia\Parsoid\Utils\Utils; |
| 15 | |
| 16 | /** |
| 17 | * Parsoid data for a DOM node. Managed by DOMDataUtils::get/setDataParsoid(). |
| 18 | * |
| 19 | * To reduce memory usage, most the properties need to be undeclared, but we can |
| 20 | * use the property declarations below to satisfy phan and to provide type |
| 21 | * information to IDEs. |
| 22 | * |
| 23 | * TODO: Declaring common properties would be beneficial for memory usage, but |
| 24 | * changes the JSON serialized output and breaks tests. |
| 25 | * |
| 26 | * == Miscellaneous / General properties == |
| 27 | * |
| 28 | * Used to emit original wikitext in some scenarios (entities, placeholder spans) |
| 29 | * Porting note: this can be '0', handle emptiness checks with care |
| 30 | * @property string|null $src |
| 31 | * |
| 32 | * Tag widths for all tokens. |
| 33 | * Temporarily present in data-parsoid, but not in final DOM output. |
| 34 | * @see ComputeDSR::computeNodeDSR() |
| 35 | * @property SourceRange|null $tsr |
| 36 | * |
| 37 | * Wikitext source ranges that generated this DOM node. |
| 38 | * In the form [ start-offset, end-offset ] or |
| 39 | * [ start-offset, end-offset, start-tag-width, end-tag-width ]. |
| 40 | * |
| 41 | * Consider input wikitext: `abcdef ''foo'' something else`. Let us look at the `''foo''` |
| 42 | * part of the input. It generates `<i data-parsoid='{"dsr":[7,14,2,2]}'>foo</i>` . The dsr |
| 43 | * property of the data-parsoid attribute of this i-tag tells us the following. This HTML node |
| 44 | * maps to input wikitext substring 7..14. The opening tag <i> was 2 characters wide in wikitext |
| 45 | * and the closing tag </i> was also 2 characters wide in wikitext. |
| 46 | * @property DomSourceRange|null $dsr |
| 47 | * |
| 48 | * Denotes special syntax. Possible values: |
| 49 | * - 'html' for html tags. Ex: `<div>foo</div>` |
| 50 | * - 'row' for dt/dd that show on the same line. Ex: `;a:b` (but not `;a\n:b`) |
| 51 | * - 'piped' for piped wikilinks with explicit content Ex: `[[Foo|bar]]` (but not `[[Foo]]`) |
| 52 | * - 'magiclink', 'url' - legacy, not used anymore |
| 53 | * @property string|null $stx |
| 54 | * |
| 55 | * Template parameter infos produced by TemplateHandler. After unserialization, |
| 56 | * the objects are not fully populated. |
| 57 | * @property list<list<ParamInfo>>|null $pi |
| 58 | * |
| 59 | * DocumentFragment content tunneled through for DOMFragment Token. |
| 60 | * @property DocumentFragment|null $html |
| 61 | * |
| 62 | * On mw:Entity spans this is set to the decoded entity value. |
| 63 | * @property string|null $srcContent |
| 64 | * |
| 65 | * An array of associative arrays describing image rendering options, attached |
| 66 | * to the image container (span or figure). |
| 67 | * - ck: Canonical key for the image option. |
| 68 | * - ak: Aliased key. |
| 69 | * @property array|null $optList |
| 70 | * |
| 71 | * Rendered attributes (shadow info). The key is the attribute name. The value |
| 72 | * is documented as "mixed" but seems to be coerced to string in |
| 73 | * Sanitizer::sanitizeTagAttrs(). |
| 74 | * @property array|null $a Rendered attributes |
| 75 | * |
| 76 | * Source attributes (shadow info). The key is the attribute name. The value |
| 77 | * is documented as "mixed" but may possibly be a nullable string. |
| 78 | * @property array|null $sa Source attributes |
| 79 | * |
| 80 | * The number of extra dashes in the source of an hr |
| 81 | * @property int|null $extra_dashes |
| 82 | * |
| 83 | * The complete text of a double-underscore behavior switch |
| 84 | * @property string|null $magicSrc |
| 85 | * |
| 86 | * True if the input heading element had an id attribute, preventing automatic |
| 87 | * assignment of a new id attribute. |
| 88 | * @property bool|null $reusedId |
| 89 | * |
| 90 | * The link token associated with a redirect |
| 91 | * @property Token|null $linkTk |
| 92 | * |
| 93 | * This is set to "extlink" on auto URL (external hotlink) image links. |
| 94 | * @property string|null $type |
| 95 | * |
| 96 | * On a meta mw:Placeholder/StrippedTag, this is the name of the stripped tag. |
| 97 | * @property string|null $name |
| 98 | * |
| 99 | * This is set on image containers in which a template expands to multiple |
| 100 | * image parameters. It is converted to a typeof attribute later in the same |
| 101 | * function, so it's unclear why it needs to persist in data-parsoid. |
| 102 | * @property bool|null $uneditable |
| 103 | * |
| 104 | * == WrapTemplates == |
| 105 | * |
| 106 | * The wikitext source which was not included in a template wrapper. |
| 107 | * @property string|null $unwrappedWT |
| 108 | * |
| 109 | * The token or DOM node name, optionally suffixed with the syntax name from |
| 110 | * $this->stx, of the first node within the encapsulated content. |
| 111 | * @property string|null $firstWikitextNode |
| 112 | * |
| 113 | * == Extensions == |
| 114 | * |
| 115 | * Offsets of opening and closing tags for extension tags, in the form |
| 116 | * [ opening tag start , closing tag end, opening tag width, closing tag width ] |
| 117 | * Temporarily present in data-parsoid, but not in final DOM output. |
| 118 | * @property DomSourceRange|null $extTagOffsets |
| 119 | * |
| 120 | * This is true on the extension output wrapper if the extension input wikitext |
| 121 | * was an empty string. Consumed by <references/>. |
| 122 | * @property bool $empty |
| 123 | * |
| 124 | * The reference group. This is attached to the <ol> or its wrapper <div>, |
| 125 | * redundantly with the data-mw-group attribute on the <ol>. It is produced by |
| 126 | * the extension's sourceToDom() and consumed by wtPostprocess(). |
| 127 | * @property string $group |
| 128 | * |
| 129 | * == Annotations == |
| 130 | * This is used on annotation meta tags to indicate that the corresponding |
| 131 | * tag has been moved compared to it's initial location defined by wikitext. |
| 132 | * An annotation tag can be moved either as the result of fostering or as |
| 133 | * the result of annotation range extension to enclose a contiguous DOM |
| 134 | * forest. |
| 135 | * @property bool|null $wasMoved |
| 136 | * |
| 137 | * == HTML tags == |
| 138 | * |
| 139 | * Are void tags self-closed? (Ex: `<br>` vs `<br />`) |
| 140 | * @property bool|null $selfClose |
| 141 | * |
| 142 | * Void tags that are not self-closed (Ex: `<br>`) |
| 143 | * @property bool|null $noClose |
| 144 | * |
| 145 | * Whether this start HTML tag has no corresponding wikitext and was auto-inserted by a token |
| 146 | * handler to generate well-formed html. Usually happens when a token handler fixes up misnesting. |
| 147 | * @property bool|null $autoInsertedStartToken |
| 148 | * |
| 149 | * Whether this end HTML tag has no corresponding wikitext and was auto-inserted by a token |
| 150 | * handler to generate well-formed html. Usually happens when a token handler fixes up misnesting. |
| 151 | * @property bool|null $autoInsertedEndToken |
| 152 | * |
| 153 | * Whether this start HTML tag has no corresponding wikitext and was auto-inserted to generate |
| 154 | * well-formed html. Usually happens when treebuilder fixes up badly nested HTML. |
| 155 | * @property bool|null $autoInsertedStart |
| 156 | * |
| 157 | * Whether this end HTML tag has no corresponding wikitext and was auto-inserted to generate |
| 158 | * well-formed html. Ex: `<tr>`, `<th>`, `<td>`, `<li>`, etc. that have no explicit closing |
| 159 | * markup. Or, html tags that aren't closed. |
| 160 | * @property bool|null $autoInsertedEnd |
| 161 | * |
| 162 | * Source tag name for HTML tags. Records case variations (`<div>` vs `<DiV>` vs `<DIV>`). |
| 163 | * @property string|null $srcTagName |
| 164 | * |
| 165 | * UnpackDomFragments sets this on misnested elements |
| 166 | * @property bool|null $misnested |
| 167 | * |
| 168 | * This is set by MarkFosteredContent to indicate fostered content and content |
| 169 | * wrappers. |
| 170 | * @property bool|null $fostered |
| 171 | * |
| 172 | * == Links == |
| 173 | * |
| 174 | * Link trail source (Ex: the "l" in `[[Foo]]l`) |
| 175 | * Porting note: this can be '0', handle emptiness checks with care |
| 176 | * @property string|null $tail |
| 177 | * |
| 178 | * Link prefix source |
| 179 | * Porting note: this can be '0', handle emptiness checks with care |
| 180 | * @property string|null $prefix |
| 181 | * |
| 182 | * Did the link use interwiki syntax? |
| 183 | * Probably redundant with the rel=mw:WikiLink/Interwiki |
| 184 | * @property bool|null $isIW |
| 185 | * |
| 186 | * Source for first separator in a wikilink to account for variation |
| 187 | * Ex. [[Test{{!}}123]] |
| 188 | * @property string|null $firstPipeSrc |
| 189 | * |
| 190 | * == Tables == |
| 191 | * |
| 192 | * Source for start-text separators in table wikitext. |
| 193 | * @property string|null $startTagSrc |
| 194 | * |
| 195 | * Source for end-text separators in table wikitext. |
| 196 | * @property string|null $endTagSrc |
| 197 | * |
| 198 | * Source for attribute-text separators in table wikitext. |
| 199 | * @property string|null $attrSepSrc |
| 200 | * |
| 201 | * 'row' for td/th cells that show up on the same line, null otherwise |
| 202 | * @property string|null $stx_v |
| 203 | * |
| 204 | * == Language variant token properties == |
| 205 | * |
| 206 | * @property array|null $flags Flags with their human-readable names |
| 207 | * @property array|null $variants The variant names |
| 208 | * @property array|null $original Original flags |
| 209 | * @property array|null $flagSp Spaces around flags, uncompressed |
| 210 | * |
| 211 | * An array of associative arrays describing the parts of the variant rule. |
| 212 | * - text: (string) The text |
| 213 | * - semi: (bool) A semicolon marker |
| 214 | * - sp: (array|string) An array of strings containing spaces |
| 215 | * - oneway: (bool) A one-way rule definition |
| 216 | * - twoway: (bool) A two-way rule definition |
| 217 | * - from: (array) An associative array: |
| 218 | * - tokens: (array) A token array |
| 219 | * - srcOffsets: SourceRange |
| 220 | * - to: (array) An associative array same as "from" |
| 221 | * - lang: (string) |
| 222 | * @property array|null $texts |
| 223 | * |
| 224 | * == Language variant data-parsoid properties == |
| 225 | * |
| 226 | * @property array|null $flSp Spaces around flags, compressed with compressSpArray(). |
| 227 | * @property array|null $tSp Spaces around texts, compressed with compressSpArray(). |
| 228 | * @property array|null $fl Original flags, copied from $this->original on the token. |
| 229 | */ |
| 230 | #[\AllowDynamicProperties] |
| 231 | class DataParsoid implements JsonCodecable { |
| 232 | use JsonCodecableTrait; |
| 233 | |
| 234 | /** |
| 235 | * Holds a number of transient properties in the wt->html pipeline to pass information between |
| 236 | * stages. Dropped before serialization. |
| 237 | */ |
| 238 | public ?TempData $tmp; |
| 239 | |
| 240 | /** |
| 241 | * Deeply clone this object |
| 242 | */ |
| 243 | public function __clone() { |
| 244 | // Deep clone non-primitive properties |
| 245 | |
| 246 | // 1. Properties which are lists of cloneable objects |
| 247 | foreach ( [ 'pi' ] as $prop ) { |
| 248 | if ( isset( $this->$prop ) ) { |
| 249 | $this->$prop = Utils::cloneArray( $this->$prop ); |
| 250 | } |
| 251 | } |
| 252 | |
| 253 | // 2. Properties which are cloneable objects |
| 254 | foreach ( [ 'tmp', 'linkTk', 'tsr', 'dsr', 'extTagOffsets' ] as $prop ) { |
| 255 | if ( isset( $this->$prop ) ) { |
| 256 | $this->$prop = clone $this->$prop; |
| 257 | } |
| 258 | } |
| 259 | // 3. Properties which are DocumentFragments |
| 260 | foreach ( [ 'html' ] as $field ) { |
| 261 | if ( isset( $this->$field ) ) { |
| 262 | $this->$field = DOMDataUtils::cloneDocumentFragment( $this->$field ); |
| 263 | } |
| 264 | } |
| 265 | } |
| 266 | |
| 267 | public function isModified(): bool { |
| 268 | return $this->toJsonArray() !== []; |
| 269 | } |
| 270 | |
| 271 | /** |
| 272 | * Get a lazy-initialized object to which temporary properties can be written. |
| 273 | * @return TempData |
| 274 | */ |
| 275 | public function getTemp(): TempData { |
| 276 | // tmp can be unset despite being declared |
| 277 | $this->tmp ??= new TempData(); |
| 278 | return $this->tmp; |
| 279 | } |
| 280 | |
| 281 | /** |
| 282 | * Check whether a bit is set in $this->tmp->bits |
| 283 | * |
| 284 | * @param int $flag |
| 285 | * @return bool |
| 286 | */ |
| 287 | public function getTempFlag( $flag ): bool { |
| 288 | return isset( $this->tmp ) && ( $this->tmp->bits & $flag ); |
| 289 | } |
| 290 | |
| 291 | /** |
| 292 | * Set a bit in $this->tmp->bits |
| 293 | * |
| 294 | * @param int $flag |
| 295 | * @param bool $value |
| 296 | */ |
| 297 | public function setTempFlag( $flag, $value = true ): void { |
| 298 | if ( $value ) { |
| 299 | if ( !isset( $this->tmp ) ) { |
| 300 | $tmp = new TempData; |
| 301 | $tmp->bits = $flag; |
| 302 | $this->tmp = $tmp; |
| 303 | } else { |
| 304 | $this->tmp->bits |= $flag; |
| 305 | } |
| 306 | } elseif ( isset( $this->tmp ) ) { |
| 307 | $this->tmp->bits &= ~$flag; |
| 308 | } |
| 309 | } |
| 310 | |
| 311 | /** @inheritDoc */ |
| 312 | public function toJsonArray(): array { |
| 313 | static $clearNullsFrom = [ |
| 314 | 'dsr', 'tsr', 'extTagOffsets', |
| 315 | ]; |
| 316 | $result = (array)$this; |
| 317 | unset( $result['tmp'] ); |
| 318 | // Conciseness: don't include `null` values from certain properties. |
| 319 | foreach ( $clearNullsFrom as $prop ) { |
| 320 | if ( !isset( $result[$prop] ) ) { |
| 321 | unset( $result[$prop] ); |
| 322 | } |
| 323 | } |
| 324 | return $result; |
| 325 | } |
| 326 | |
| 327 | /** @inheritDoc */ |
| 328 | public static function jsonClassHintFor( string $keyname ) { |
| 329 | static $hints = null; |
| 330 | if ( $hints === null ) { |
| 331 | $dsr = DomSourceRange::hint(); |
| 332 | $sr = SourceRange::hint(); |
| 333 | $hints = [ |
| 334 | 'dsr' => $dsr, |
| 335 | 'extTagOffsets' => $dsr, |
| 336 | 'tsr' => $sr, |
| 337 | 'pi' => Hint::build( ParamInfo::class, Hint::LIST, Hint::LIST ), |
| 338 | 'linkTk' => Token::class, |
| 339 | 'html' => DocumentFragment::class, |
| 340 | ]; |
| 341 | } |
| 342 | return $hints[$keyname] ?? null; |
| 343 | } |
| 344 | |
| 345 | /** @inheritDoc */ |
| 346 | public static function newFromJsonArray( array $json ): DataParsoid { |
| 347 | $dp = new DataParsoid; |
| 348 | foreach ( $json as $key => $value ) { |
| 349 | switch ( $key ) { |
| 350 | case 'dsr': |
| 351 | case 'extTagOffsets': |
| 352 | case 'tsr': |
| 353 | // For backward compatibility, leave these unset if null. |
| 354 | if ( $value !== null ) { |
| 355 | $dp->$key = $value; |
| 356 | } |
| 357 | break; |
| 358 | case 'tmp': |
| 359 | // This isn't serialized, but we can deserialize it |
| 360 | // for tests. |
| 361 | $tmp = new TempData; |
| 362 | foreach ( $value as $key2 => $value2 ) { |
| 363 | $tmp->$key2 = $value2; |
| 364 | } |
| 365 | $dp->$key = $tmp; |
| 366 | break; |
| 367 | default: |
| 368 | $dp->$key = $value; |
| 369 | break; |
| 370 | } |
| 371 | } |
| 372 | return $dp; |
| 373 | } |
| 374 | } |