Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
5.41% |
2 / 37 |
|
13.33% |
2 / 15 |
CRAP | |
0.00% |
0 / 1 |
PFragment | |
5.41% |
2 / 37 |
|
13.33% |
2 / 15 |
791.80 | |
0.00% |
0 / 1 |
__construct | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
isEmpty | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
isAtomic | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
isValid | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
getSrcOffsets | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
asDom | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
asHtmlString | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
asMarkedWikitext | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
fromSplitWt | |
0.00% |
0 / 10 |
|
0.00% |
0 / 1 |
56 | |||
joinSourceRange | |
0.00% |
0 / 3 |
|
0.00% |
0 / 1 |
12 | |||
registerFragmentClass | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
6 | |||
toJsonArray | |
0.00% |
0 / 3 |
|
0.00% |
0 / 1 |
6 | |||
newFromJsonArray | |
0.00% |
0 / 4 |
|
0.00% |
0 / 1 |
12 | |||
jsonClassHintFor | |
0.00% |
0 / 6 |
|
0.00% |
0 / 1 |
20 | |||
hint | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 |
1 | <?php |
2 | declare( strict_types = 1 ); |
3 | |
4 | namespace Wikimedia\Parsoid\Fragments; |
5 | |
6 | use JsonException; |
7 | use Wikimedia\JsonCodec\Hint; |
8 | use Wikimedia\JsonCodec\JsonCodecable; |
9 | use Wikimedia\JsonCodec\JsonCodecableTrait; |
10 | use Wikimedia\Parsoid\Core\DomSourceRange; |
11 | use Wikimedia\Parsoid\DOM\DocumentFragment; |
12 | use Wikimedia\Parsoid\Ext\ParsoidExtensionAPI; |
13 | |
14 | /** |
15 | * A PFragment is a MediaWiki content fragment. |
16 | * |
17 | * PFragment is the input and output type for fragment generators in |
18 | * MediaWiki: magic variables, parser functions, templates, and |
19 | * extension tags. You can imagine that the `P` stands for "Parsoid", |
20 | * "Page", or "MediaWiki Content" but in reality it simply |
21 | * disambiguates this fragment type from the DOM DocumentFragment and |
22 | * any other fragments you might encounter. |
23 | * |
24 | * PFragment is an abstract class, and content is lazily converted to the |
25 | * form demanded by a consumer. Converting forms often loses information |
26 | * or introduces edge cases, so we avoid conversion to intermediate forms |
27 | * and defer conversion in general as late as possible. |
28 | * |
29 | * For example, in this invocation: |
30 | * {{1x|'''bold''' <nowiki>fragment</nowiki>}} |
31 | * |
32 | * If we were to flatten this "as string" (traditionally) we would |
33 | * lose the bold face and the <nowiki> would get tunneled as strip |
34 | * state. Alternatively we could ask for this "as a source string" |
35 | * which corresponds to the original "raw" form: "'''bold''' |
36 | * <nowiki>fragment</nowiki>", which is often used to pass literal |
37 | * arguments, bypassing wikitext processing. Or we could |
38 | * ask for the argument "as HTML" or "as DOM" in which case it would |
39 | * get parsed as wikitext and returned as |
40 | * `<b>bold</b> <span>fragment</span>`, either as a possibly-unbalanced |
41 | * string ("as HTML") or as a balanced DOM tree ("as DOM"). These |
42 | * transformations can be irreversible: once we've converted to one |
43 | * representation we can't always recover the others. |
44 | * |
45 | * But now consider if `{{1x|...}}` simply wants to return its argument: |
46 | * it doesn't need to force a specific representation, instead |
47 | * it can return the PFragment directly without losing information |
48 | * and allow the downstream customer to chose the type it prefers. |
49 | * This also works for composition: a composite PFragment can be |
50 | * defined which defers evaluation of its components until demanded, |
51 | * and then applies the appropriate composition operation depending |
52 | * on the demanded result. |
53 | * |
54 | * (WikitextPFragment is one such composite fragment type, which uses |
55 | * Parsoid to do the composition of wikitext and other fragments.) |
56 | * |
57 | * Parsoid defines only those fragment types relevant to itself, and |
58 | * defines conversions (`as*()` methods) only for those formats it |
59 | * needs for HTML rendering. Extensions should feel free to define |
60 | * their own fragment types: as long as they are JsonCodecable and |
61 | * define one of ::asDom() or ::asHtmlString() they will interoperate |
62 | * with Parsoid and other extensions, albeit possibly as an opaque |
63 | * strip marker. |
64 | * |
65 | * For example, Wikifunctions might define a PFragment for ZObjects, |
66 | * which would allow nested wikifunction invocations to transfer |
67 | * ZObjects between themselves without conversion through wikitext. |
68 | * For example, given: |
69 | * {{#function:sum| {{#function:one}} }} |
70 | * then the `sum` function will be given a ZObjectPFragment containing |
71 | * the output of the `one` function, without forcing that value to |
72 | * serialize to a wikitext string and deserialize. With its special |
73 | * knowledge of the ZObjectPFragment type, Wikifunctions can use this |
74 | * to (say) preserve type information of the values. But if this |
75 | * same function is embedded into a wikitext template: |
76 | * {{1x| {{#function:one}} }} |
77 | * then the value will be converted to wikitext or DOM as appropriate |
78 | * and composed onto the page in that form. |
79 | */ |
80 | abstract class PFragment implements JsonCodecable { |
81 | use JsonCodecableTrait; |
82 | |
83 | /** |
84 | * The original wikitext source range for this fragment, or `null` for |
85 | * synthetic content that corresponds to no part of the original |
86 | * authored text. |
87 | */ |
88 | protected ?DomSourceRange $srcOffsets; |
89 | |
90 | /** |
91 | * Registry of known fragment types, used for serialization. |
92 | * @see ::registerFragmentClass() |
93 | * @var list<class-string<PFragment>> |
94 | */ |
95 | protected static array $FRAGMENT_TYPES = [ |
96 | WikitextPFragment::class, |
97 | HtmlPFragment::class, |
98 | DomPFragment::class, |
99 | LiteralStringPFragment::class, |
100 | ]; |
101 | |
102 | protected function __construct( ?DomSourceRange $srcOffsets ) { |
103 | $this->srcOffsets = $srcOffsets; |
104 | } |
105 | |
106 | /** |
107 | * Returns true if this fragment is empty. This enables optimizations |
108 | * if implemented, but returns false by default. |
109 | */ |
110 | public function isEmpty(): bool { |
111 | return false; |
112 | } |
113 | |
114 | /** |
115 | * Returns true if this fragment contains no wikitext elements; that is, |
116 | * if `::asMarkedWikitext()` given an empty strip state |
117 | * would return a single strip marker and add a single item to the |
118 | * strip state (representing $this). Otherwise, returns false. |
119 | */ |
120 | public function isAtomic(): bool { |
121 | // This is consistent with the default implementation of |
122 | // ::asMarkedWikitext() |
123 | return true; |
124 | } |
125 | |
126 | /** |
127 | * As an optimization to avoid unnecessary copying, certain |
128 | * operations on fragments may be destructive or lead to aliasing. |
129 | * For ease of debugging, fragments so affected will return `false` |
130 | * from `::isValid()` and code is encouraged to assert the validity |
131 | * of fragments where convenient to do so. |
132 | * |
133 | * @see the $release parameter to `::asDom()` and `DomPFragment::concat`, |
134 | * but other PFragment types with mutable non-value types might also |
135 | * provide accessors with `$release` parameters that interact with |
136 | * fragment validity. |
137 | */ |
138 | public function isValid(): bool { |
139 | // By default, fragments are valid forever. |
140 | |
141 | // See DomPFragment for an example of a fragment which may become |
142 | // invalid. |
143 | return true; |
144 | } |
145 | |
146 | /** |
147 | * Return the region of the source document that corresponds to this |
148 | * fragment. |
149 | */ |
150 | public function getSrcOffsets(): ?DomSourceRange { |
151 | return $this->srcOffsets; |
152 | } |
153 | |
154 | /** |
155 | * Return the fragment as a (prepared and loaded) DOM |
156 | * DocumentFragment belonging to the Parsoid top-level document. |
157 | * |
158 | * If $release is true, then this PFragment will become invalid |
159 | * after this method returns. |
160 | * |
161 | * @note The default implementation of ::asDom() calls ::asHtmlString(). |
162 | * Subclassses must implement either ::asDom() or ::asHtmlString() |
163 | * to avoid infinite mutual recursion. |
164 | */ |
165 | public function asDom( ParsoidExtensionAPI $ext, bool $release = false ): DocumentFragment { |
166 | return $ext->htmlToDom( $this->asHtmlString( $ext ) ); |
167 | } |
168 | |
169 | /** |
170 | * Return the fragment as a string of HTML. This method is very |
171 | * similar to asDom() but also supports fragmentary and unbalanced |
172 | * HTML, and therefore composition may yield unexpected results. |
173 | * This is a common type in legacy MediaWiki code, but use in |
174 | * new code should be discouraged. Data attributes will be |
175 | * represented as inline attributes, which may be suboptimal. |
176 | * @note The default implementation of ::asHtmlString() calls ::asDom(). |
177 | * Subclassses must implement either ::asDom() or ::asHtmlString() |
178 | * to avoid infinite mutual recursion. |
179 | */ |
180 | public function asHtmlString( ParsoidExtensionAPI $ext ): string { |
181 | return $ext->domToHtml( $this->asDom( $ext ), true ); |
182 | } |
183 | |
184 | /** |
185 | * This method returns a "wikitext string" in the legacy format. |
186 | * Wikitext constructs will be parsed in the result. |
187 | * Constructs which are not representable in wikitext will be replaced |
188 | * with strip markers, and you will get a strip state which maps |
189 | * those markers back to PFragment objects. When you (for example) |
190 | * compose two marked strings and then ask for the result `asDom`, |
191 | * the strip markers in the marked strings will first be conceptually |
192 | * replaced with the PFragment from the StripState, and then |
193 | * the resulting interleaved strings and fragments will be composed. |
194 | */ |
195 | public function asMarkedWikitext( StripState $stripState ): string { |
196 | // By default just adds this fragment to the strip state and |
197 | // returns a strip marker. Non-atomic fragments can be |
198 | // more clever. |
199 | return $stripState->addWtItem( $this ); |
200 | } |
201 | |
202 | /** |
203 | * Helper function to create a new fragment from a mixed array of |
204 | * strings and fragments. |
205 | * |
206 | * Unlike WikitextPFragment::newFromSplitWt() this method will not |
207 | * always return a WikitextPFragment; for example if only one |
208 | * non-empty piece is provided this method will just return that |
209 | * piece without casting it to a WikitextPFragment. |
210 | * |
211 | * @param list<string|PFragment> $pieces |
212 | */ |
213 | public static function fromSplitWt( array $pieces, ?DomSourceRange $srcOffset = null ): PFragment { |
214 | $result = []; |
215 | // Remove empty pieces |
216 | foreach ( $pieces as $p ) { |
217 | if ( $p === '' ) { |
218 | continue; |
219 | } |
220 | if ( $p instanceof PFragment && $p->isEmpty() ) { |
221 | continue; |
222 | } |
223 | $result[] = $p; |
224 | } |
225 | // Optimize! |
226 | if ( count( $result ) === 1 && $result[0] instanceof PFragment ) { |
227 | return $result[0]; |
228 | } |
229 | return WikitextPFragment::newFromSplitWt( $result, $srcOffset ); |
230 | } |
231 | |
232 | /** |
233 | * Helper function to append two source ranges. |
234 | */ |
235 | protected static function joinSourceRange( ?DomSourceRange $first, ?DomSourceRange $second ): ?DomSourceRange { |
236 | if ( $first === null || $second === null ) { |
237 | return null; |
238 | } |
239 | return new DomSourceRange( $first->start, $second->end, null, null ); |
240 | } |
241 | |
242 | // JsonCodec support |
243 | |
244 | /** |
245 | * Register a fragment type with the JSON deserialization code. |
246 | * |
247 | * The given class should have a static constant named TYPE_HINT |
248 | * which gives the unique string property name which will distinguish |
249 | * serialized fragments of the given class. |
250 | * @param class-string<PFragment> $className |
251 | */ |
252 | public function registerFragmentClass( string $className ): void { |
253 | if ( !in_array( $className, self::$FRAGMENT_TYPES, true ) ) { |
254 | self::$FRAGMENT_TYPES[] = $className; |
255 | } |
256 | } |
257 | |
258 | /** @inheritDoc */ |
259 | protected function toJsonArray(): array { |
260 | return $this->srcOffsets === null ? [] : [ |
261 | 'dsr' => $this->srcOffsets |
262 | ]; |
263 | } |
264 | |
265 | /** @inheritDoc */ |
266 | public static function newFromJsonArray( array $json ): PFragment { |
267 | foreach ( self::$FRAGMENT_TYPES as $c ) { |
268 | if ( isset( $json[$c::TYPE_HINT] ) ) { |
269 | return $c::newFromJsonArray( $json ); |
270 | } |
271 | } |
272 | throw new JsonException( "unknown fragment type" ); |
273 | } |
274 | |
275 | /** @inheritDoc */ |
276 | public static function jsonClassHintFor( string $keyName ) { |
277 | if ( $keyName === 'dsr' ) { |
278 | return DomSourceRange::hint(); |
279 | } |
280 | foreach ( self::$FRAGMENT_TYPES as $c ) { |
281 | if ( $keyName === $c::TYPE_HINT ) { |
282 | return $c::jsonClassHintFor( $keyName ); |
283 | } |
284 | } |
285 | return null; |
286 | } |
287 | |
288 | public static function hint(): Hint { |
289 | return Hint::build( self::class, Hint::INHERITED ); |
290 | } |
291 | } |