Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
0.00% |
0 / 48 |
|
0.00% |
0 / 9 |
CRAP | |
0.00% |
0 / 1 |
SectionMetadata | |
0.00% |
0 / 48 |
|
0.00% |
0 / 9 |
210 | |
0.00% |
0 / 1 |
__construct | |
0.00% |
0 / 10 |
|
0.00% |
0 / 1 |
2 | |||
setExtensionData | |
0.00% |
0 / 6 |
|
0.00% |
0 / 1 |
20 | |||
appendExtensionData | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
getExtensionData | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
2 | |||
toArray | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
fromArray | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
fromLegacy | |
0.00% |
0 / 12 |
|
0.00% |
0 / 1 |
6 | |||
toLegacy | |
0.00% |
0 / 14 |
|
0.00% |
0 / 1 |
6 | |||
jsonSerialize | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 |
1 | <?php |
2 | declare( strict_types = 1 ); |
3 | |
4 | namespace Wikimedia\Parsoid\Core; |
5 | |
6 | /** |
7 | * Section metadata for generating TOC. |
8 | * |
9 | * This is not the complete data for the article section, just the |
10 | * information needed to generate the table of contents. |
11 | * |
12 | * For now, this schema matches whatever is generated by Parser.php. |
13 | * Parsoid will attempt to match this output for now. |
14 | * |
15 | * Parser.php::finalizeHeadings() is the authoritative source for how |
16 | * some of these properties are computed right now, especially for the |
17 | * $line, $anchor, and $linkAnchor properties below. |
18 | * |
19 | * Linker.php::tocLine() and ::makeHeadline() demonstrate how these |
20 | * properties are used to create headings and table of contents lines. |
21 | */ |
22 | class SectionMetadata implements \JsonSerializable { |
23 | /** |
24 | * The heading tag level: a 1 here means an <H1> tag was used, a |
25 | * 2 means an <H2> tag was used, etc. |
26 | */ |
27 | public int $hLevel; |
28 | |
29 | /** |
30 | * This is a zero-indexed TOC level and the nesting level. |
31 | * So, if a page has a H2-H4-H6, then, those levels 2,4,6 |
32 | * correspond to TOC-levels 0,1,2. |
33 | */ |
34 | public int $tocLevel; |
35 | |
36 | /** |
37 | * HTML heading of the section. Only a narrow set of HTML tags are allowed here. |
38 | * |
39 | * This starts with the parsed headline seen in wikitext and |
40 | * - replaces links with link text |
41 | * - processes extension strip markers |
42 | * - removes style, script tags |
43 | * - strips all HTML tags except the following tags (from Parser.php) |
44 | * . <sup> and <sub> (T10393) |
45 | * . <i> (T28375) |
46 | * . <b> (r105284) |
47 | * . <bdi> (T74884) |
48 | * . <span dir="rtl"> and <span dir="ltr"> (T37167) |
49 | * . <s> and <strike> (T35715) |
50 | * . <q> (T251672) |
51 | * We strip any parameter from accepted tags, except dir="rtl|ltr" from <span>, |
52 | * to allow setting directionality in toc items. |
53 | * |
54 | * @note This should be converted into the proper target variant. |
55 | */ |
56 | public string $line; |
57 | |
58 | /** |
59 | * TOC number string (3.1.3, 4.5.2, etc.) |
60 | * |
61 | * @note This should be localized into the parser target language. |
62 | */ |
63 | public string $number; |
64 | |
65 | /** |
66 | * Section id (integer, assigned in depth first traversal order) |
67 | * Template generated sections get a "T-" prefix. |
68 | */ |
69 | public string $index; |
70 | |
71 | /** |
72 | * The title of the page that generated this heading. |
73 | * For template-generated sections, this will be the template title. |
74 | */ |
75 | public ?string $fromTitle; |
76 | |
77 | /** |
78 | * Byte offset where the section shows up in wikitext; this is null |
79 | * if this section comes from a template, if it comes from a literal |
80 | * HTML <h_> tag, or otherwise doesn't correspond to a "preprocessor |
81 | * section". |
82 | */ |
83 | public ?int $byteOffset; |
84 | |
85 | /** |
86 | * Anchor attribute. |
87 | * |
88 | * This property is the "true" value of the ID attribute, and should be |
89 | * used when looking up a heading or setting an attribute, for example |
90 | * using Document.getElementById() or Element.setAttribute('id',...). |
91 | * |
92 | * This value is *not* HTML-entity escaped; if you are writing HTML |
93 | * as a literal string, you should still entity-escape ampersands and |
94 | * single/double quotes as appropriate. |
95 | * |
96 | * This value is *not* URL-escaped either; instead use the `linkAnchor` |
97 | * property if you are constructing a URL to target this section. |
98 | * |
99 | * The anchor attribute is based on the $line property, but does extra |
100 | * processing to turn it into a valid attribute: |
101 | * - strip all HTML tags, |
102 | * - normalizes section name |
103 | * - normalizes section name whitespace |
104 | * - decodes char references |
105 | * - makes it a valid HTML id attribute value |
106 | * (HTML5 / HTML4 based on $wgFragmentMode property) |
107 | * - dedupes (case-insensitively) identical anchors by adding "_$n" suffixes |
108 | */ |
109 | public string $anchor; |
110 | |
111 | /** |
112 | * Anchor URL fragment. |
113 | * |
114 | * This is very similar to the $anchor property, but is appropriately |
115 | * URL-escaped to make it appropriate to use in constructing a URL |
116 | * fragment link. You should almost always prepend a `#` symbol |
117 | * to `linkAnchor` if you are using it correctly. |
118 | */ |
119 | public string $linkAnchor; |
120 | |
121 | /** |
122 | * Arbitrary data attached to this section by extensions. This |
123 | * data will be stored and cached in the ParserOutput object along |
124 | * with the rest of the section data, and made available to external |
125 | * clients via the action API. |
126 | * |
127 | * This method is provided to overcome the unsafe practice of attaching |
128 | * extra information to a section by directly assigning member variables. |
129 | * |
130 | * See ParserOutput::setExtensionData() for more information on typical |
131 | * use. |
132 | */ |
133 | private array $extensionData; |
134 | |
135 | /** |
136 | * @param int $tocLevel Zero-indexed TOC level and the nesting level |
137 | * @param int $hLevel The heading tag level |
138 | * @param string $line Stripped headline text |
139 | * @param string $number TOC number string (3.1.3, 4.5.2, etc) |
140 | * @param string $index Section id |
141 | * @param ?string $fromTitle The title of the page or template that |
142 | * generated this heading, or null. |
143 | * @param ?int $byteOffset Byte offset where the section shows up in |
144 | * wikitext, or null if this doesn't correspond to a "preprocesor section" |
145 | * @param string $anchor "True" value of the ID attribute |
146 | * @param string $linkAnchor URL-escaped value of the anchor, for use in |
147 | * constructing a URL fragment link |
148 | * @param ?array $extensionData Extension data passed in as an associative array |
149 | */ |
150 | public function __construct( |
151 | // This is a great candidate for named arguments in PHP 8.0+ |
152 | int $tocLevel, |
153 | int $hLevel, |
154 | string $line, |
155 | string $number, |
156 | string $index, |
157 | ?string $fromTitle, |
158 | ?int $byteOffset, |
159 | string $anchor, |
160 | string $linkAnchor, |
161 | ?array $extensionData = null |
162 | ) { |
163 | $this->tocLevel = $tocLevel; |
164 | $this->line = $line; |
165 | $this->hLevel = $hLevel; |
166 | $this->number = $number; |
167 | $this->index = $index; |
168 | $this->fromTitle = $fromTitle; |
169 | $this->byteOffset = $byteOffset; |
170 | $this->anchor = $anchor; |
171 | $this->linkAnchor = $linkAnchor; |
172 | $this->extensionData = $extensionData ?? []; |
173 | } |
174 | |
175 | /** |
176 | * Attaches arbitrary data to this SectionMetadata object. This |
177 | * can be used to store some information about this section in the |
178 | * ParserOutput object for later use during page output. The data |
179 | * will be cached along with the ParserOutput object. |
180 | * |
181 | * This method is provided to overcome the unsafe practice of |
182 | * attaching extra information to a section by directly assigning |
183 | * member variables. |
184 | * |
185 | * See ParserOutput::setExtensionData() in core for further information |
186 | * about typical usage in hooks. |
187 | * |
188 | * Setting conflicting values for the same key is not allowed. |
189 | * If you call ::setExtensionData() multiple times with the same key |
190 | * on a SectionMetadata, is is expected that the value will be identical |
191 | * each time. If you want to collect multiple pieces of data under a |
192 | * single key, use ::appendExtensionData(). |
193 | * |
194 | * @note Only scalar values (numbers, strings, or arrays) are |
195 | * supported as a value. (A future revision will allow anything |
196 | * that core's JsonCodec can handle.) Attempts to set other types |
197 | * as extension data values will break ParserCache for the page. |
198 | * |
199 | * @param string $key The key for accessing the data. Extensions |
200 | * should take care to avoid conflicts in naming keys. It is |
201 | * suggested to use the extension's name as a prefix. Using |
202 | * the prefix `mw:` is reserved for core. |
203 | * |
204 | * @param mixed $value The value to set. |
205 | * Setting a value to null is equivalent to removing the value. |
206 | */ |
207 | public function setExtensionData( string $key, $value ): void { |
208 | if ( |
209 | array_key_exists( $key, $this->extensionData ) && |
210 | $this->extensionData[$key] !== $value |
211 | ) { |
212 | throw new \InvalidArgumentException( "Conflicting data for $key" ); |
213 | } |
214 | if ( $value === null ) { |
215 | unset( $this->extensionData[$key] ); |
216 | } else { |
217 | $this->extensionData[$key] = $value; |
218 | } |
219 | } |
220 | |
221 | /** |
222 | * Appends arbitrary data to this SectionMetadata. This can be used |
223 | * to store some information about the section in the ParserOutput object for later |
224 | * use during page output. |
225 | * |
226 | * See ::setExtensionData() for more details on rationale and use. |
227 | * |
228 | * @param string $key The key for accessing the data. Extensions should take care to avoid |
229 | * conflicts in naming keys. It is suggested to use the extension's name as a prefix. |
230 | * |
231 | * @param int|string $value The value to append to the list. |
232 | * @return never This method is not yet implemented. |
233 | */ |
234 | public function appendExtensionData( string $key, $value ): void { |
235 | // This implementation would mirror that of |
236 | // ParserOutput::appendExtensionData, but let's defer implementing |
237 | // this until we're sure we need it. In particular, we might need |
238 | // to figure out how a merge on section data is expected to work |
239 | // before we can determine the right semantics for this. |
240 | throw new \InvalidArgumentException( "Not yet implemented" ); |
241 | } |
242 | |
243 | /** |
244 | * Gets extension data previously attached to this SectionMetadata. |
245 | * |
246 | * @param string $key The key to look up |
247 | * @return mixed|null The value(s) previously set for the given key using |
248 | * ::setExtensionData() or ::appendExtensionData(), or null if no |
249 | * value was set for this key. |
250 | */ |
251 | public function getExtensionData( $key ) { |
252 | $value = $this->extensionData[$key] ?? null; |
253 | return $value; |
254 | } |
255 | |
256 | /** |
257 | * Alias for :toLegacy(), for b/c compatibility only. |
258 | * @deprecated |
259 | * @return array |
260 | */ |
261 | public function toArray(): array { |
262 | return $this->toLegacy(); |
263 | } |
264 | |
265 | /** |
266 | * Alias for :fromLegacy(), for b/c compatibility only. |
267 | * @deprecated |
268 | * @param array $data |
269 | * @return SectionMetadata |
270 | */ |
271 | public static function fromArray( array $data ): SectionMetadata { |
272 | return self::fromLegacy( $data ); |
273 | } |
274 | |
275 | /** |
276 | * Create a new SectionMetadata object from an array in the legacy |
277 | * format returned by the action API. |
278 | * |
279 | * This is useful for backward-compatibility, but is expected to |
280 | * be replaced by conversion to/from JSON in the future. |
281 | * |
282 | * @param array $data Associative array with section metadata |
283 | * @return SectionMetadata |
284 | */ |
285 | public static function fromLegacy( array $data ): SectionMetadata { |
286 | return new SectionMetadata( |
287 | $data['toclevel'] ?? 0, |
288 | (int)( $data['level'] ?? -1 ), |
289 | $data['line'] ?? '', |
290 | $data['number'] ?? '', |
291 | $data['index'] ?? '', |
292 | ( $data['fromtitle'] ?? false ) ?: null, |
293 | $data['byteoffset'] ?? null, |
294 | $data['anchor'] ?? '', |
295 | $data['linkAnchor'] ?? $data['anchor'] ?? '', |
296 | $data['extensionData'] ?? null |
297 | ); |
298 | } |
299 | |
300 | /** |
301 | * Return as associative array, in the format returned by the |
302 | * action API (including the order of fields and the value types). |
303 | * |
304 | * This is helpful as b/c support while we transition to objects. |
305 | * @return array |
306 | */ |
307 | public function toLegacy(): array { |
308 | $ret = [ |
309 | 'toclevel' => $this->tocLevel, |
310 | // cast $level to string in order to keep b/c for the parse api |
311 | 'level' => (string)$this->hLevel, |
312 | 'line' => $this->line, |
313 | 'number' => $this->number, |
314 | 'index' => $this->index, |
315 | 'fromtitle' => $this->fromTitle ?? false, |
316 | 'byteoffset' => $this->byteOffset, |
317 | 'anchor' => $this->anchor, |
318 | 'linkAnchor' => $this->linkAnchor, |
319 | ]; |
320 | // Micro-opt: Output 'extensionData' conditionally to avoid bloat |
321 | if ( $this->extensionData ) { |
322 | $ret['extensionData'] = $this->extensionData; |
323 | } |
324 | return $ret; |
325 | } |
326 | |
327 | /** |
328 | * @inheritDoc |
329 | */ |
330 | public function jsonSerialize(): array { |
331 | return $this->toLegacy(); |
332 | } |
333 | } |