Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
87.60% |
113 / 129 |
|
72.73% |
8 / 11 |
CRAP | |
0.00% |
0 / 1 |
ReferenceStack | |
87.60% |
113 / 129 |
|
72.73% |
8 / 11 |
57.16 | |
0.00% |
0 / 1 |
pushInvalidRef | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
pushRef | |
96.72% |
59 / 61 |
|
0.00% |
0 / 1 |
19 | |||
rollbackRefs | |
100.00% |
7 / 7 |
|
100.00% |
1 / 1 |
4 | |||
rollbackRef | |
81.58% |
31 / 38 |
|
0.00% |
0 / 1 |
18.81 | |||
popGroup | |
100.00% |
4 / 4 |
|
100.00% |
1 / 1 |
1 | |||
hasGroup | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
getGroups | |
100.00% |
5 / 5 |
|
100.00% |
1 / 1 |
3 | |||
getGroupRefs | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
resolveFollow | |
100.00% |
3 / 3 |
|
100.00% |
1 / 1 |
1 | |||
listDefinedRef | |
0.00% |
0 / 7 |
|
0.00% |
0 / 1 |
12 | |||
nextRefSequence | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 |
1 | <?php |
2 | |
3 | namespace Cite; |
4 | |
5 | use LogicException; |
6 | use MediaWiki\Parser\StripState; |
7 | |
8 | /** |
9 | * Encapsulates most of Cite state during parsing. This includes metadata about each ref tag, |
10 | * and a rollback stack to correct confusion caused by lost context when `{{#tag` is used. |
11 | * |
12 | * @license GPL-2.0-or-later |
13 | */ |
14 | class ReferenceStack { |
15 | |
16 | /** |
17 | * Data structure representing all <ref> tags parsed so far, indexed by group name (an empty |
18 | * string for the default group) and reference name. |
19 | * |
20 | * References without a name get a numeric index, starting from 0. Conflicts are avoided by |
21 | * disallowing numeric names (e.g. <ref name="1">) in {@see Validator::validateRef}. |
22 | * |
23 | * @var array<string,array<string|int,ReferenceStackItem>> |
24 | */ |
25 | private array $refs = []; |
26 | |
27 | /** |
28 | * Auto-incrementing sequence number for all <ref>, no matter which group |
29 | */ |
30 | private int $refSequence = 0; |
31 | |
32 | /** @var int[] Counter for the number of refs in each group */ |
33 | private array $groupRefSequence = []; |
34 | |
35 | /** |
36 | * <ref> call stack |
37 | * Used to cleanup out of sequence ref calls created by #tag |
38 | * See description of function rollbackRef. |
39 | * |
40 | * @var (array|false)[] |
41 | * @phan-var array<array{0:string,1:int,2:string,3:?string,4:?string,5:?string,6:array}|false> |
42 | */ |
43 | private array $refCallStack = []; |
44 | |
45 | private const ACTION_ASSIGN = 'assign'; |
46 | private const ACTION_INCREMENT = 'increment'; |
47 | private const ACTION_NEW_FROM_PLACEHOLDER = 'new-from-placeholder'; |
48 | private const ACTION_NEW = 'new'; |
49 | |
50 | /** |
51 | * Leave a mark in the stack which matches an invalid ref tag. |
52 | */ |
53 | public function pushInvalidRef(): void { |
54 | $this->refCallStack[] = false; |
55 | } |
56 | |
57 | /** |
58 | * Populate $this->refs and $this->refCallStack based on input and arguments to <ref> |
59 | * |
60 | * @param StripState $stripState |
61 | * @param ?string $text Content from the <ref> tag |
62 | * @param string[] $argv |
63 | * @param string $group |
64 | * @param ?string $name |
65 | * @param ?string $extends |
66 | * @param ?string $follow Guaranteed to not be a numeric string |
67 | * @param ?string $dir ref direction |
68 | * |
69 | * @return ?ReferenceStackItem ref structure, or null if no footnote marker should be rendered |
70 | */ |
71 | public function pushRef( |
72 | StripState $stripState, |
73 | ?string $text, |
74 | array $argv, |
75 | string $group, |
76 | ?string $name, |
77 | ?string $extends, |
78 | ?string $follow, |
79 | ?string $dir |
80 | ): ?ReferenceStackItem { |
81 | $this->refs[$group] ??= []; |
82 | $this->groupRefSequence[$group] ??= 0; |
83 | |
84 | $ref = new ReferenceStackItem(); |
85 | $ref->count = 1; |
86 | $ref->dir = $dir; |
87 | // TODO: Read from this group field or deprecate it. |
88 | $ref->group = $group; |
89 | $ref->name = $name; |
90 | $ref->text = $text; |
91 | |
92 | if ( $follow ) { |
93 | if ( !isset( $this->refs[$group][$follow] ) ) { |
94 | // Mark an incomplete follow="…" as such. This is valid e.g. in the Page:… namespace |
95 | // on Wikisource. |
96 | $ref->follow = $follow; |
97 | $ref->key = $this->nextRefSequence(); |
98 | $this->refs[$group][] = $ref; |
99 | $this->refCallStack[] = [ self::ACTION_NEW, $ref->key, $group, $name, $text, $argv ]; |
100 | } elseif ( $text !== null ) { |
101 | // We know the parent already, so just perform the follow="…" and bail out |
102 | $this->resolveFollow( $group, $follow, $text ); |
103 | } |
104 | // A follow="…" never gets its own footnote marker |
105 | return null; |
106 | } |
107 | |
108 | if ( !$name ) { |
109 | // This is an anonymous reference, which will be given a numeric index. |
110 | $this->refs[$group][] = &$ref; |
111 | $ref->key = $this->nextRefSequence(); |
112 | $action = self::ACTION_NEW; |
113 | } elseif ( !isset( $this->refs[$group][$name] ) ) { |
114 | // Valid key with first occurrence |
115 | $this->refs[$group][$name] = &$ref; |
116 | $ref->key = $this->nextRefSequence(); |
117 | $action = self::ACTION_NEW; |
118 | } elseif ( $this->refs[$group][$name]->placeholder ) { |
119 | // Populate a placeholder. |
120 | $ref->extendsCount = $this->refs[$group][$name]->extendsCount; |
121 | $ref->key = $this->nextRefSequence(); |
122 | $ref->number = $this->refs[$group][$name]->number; |
123 | $this->refs[$group][$name] =& $ref; |
124 | $action = self::ACTION_NEW_FROM_PLACEHOLDER; |
125 | } else { |
126 | // Change an existing entry. |
127 | $ref = &$this->refs[$group][$name]; |
128 | $ref->count++; |
129 | |
130 | if ( $ref->dir && $dir && $ref->dir !== $dir ) { |
131 | $ref->warnings[] = [ 'cite_error_ref_conflicting_dir', $name ]; |
132 | } |
133 | |
134 | if ( $ref->text === null && $text !== null ) { |
135 | // If no text was set before, use this text |
136 | $ref->text = $text; |
137 | // Use the dir parameter only from the full definition of a named ref tag |
138 | $ref->dir = $dir; |
139 | $action = self::ACTION_ASSIGN; |
140 | } else { |
141 | if ( $text !== null |
142 | // T205803 different strip markers might hide the same text |
143 | && $stripState->unstripBoth( $text ) |
144 | !== $stripState->unstripBoth( $ref->text ) |
145 | ) { |
146 | // two refs with same name and different text |
147 | $ref->warnings[] = [ 'cite_error_references_duplicate_key', $name ]; |
148 | } |
149 | $action = self::ACTION_INCREMENT; |
150 | } |
151 | } |
152 | |
153 | $ref->number ??= ++$this->groupRefSequence[$group]; |
154 | |
155 | // Do not mess with a known parent a second time |
156 | if ( $extends && $ref->extendsIndex === null ) { |
157 | $parentRef =& $this->refs[$group][$extends]; |
158 | if ( $parentRef === null ) { |
159 | // Create a new placeholder and give it the current sequence number. |
160 | $parentRef = new ReferenceStackItem(); |
161 | $parentRef->name = $extends; |
162 | $parentRef->number = $ref->number; |
163 | $parentRef->placeholder = true; |
164 | } else { |
165 | $ref->number = $parentRef->number; |
166 | // Roll back the group sequence number. |
167 | --$this->groupRefSequence[$group]; |
168 | } |
169 | $parentRef->extendsCount ??= 0; |
170 | $ref->extends = $extends; |
171 | $ref->extendsIndex = ++$parentRef->extendsCount; |
172 | } elseif ( $extends && $ref->extends !== $extends ) { |
173 | // TODO: Change the error message to talk about "conflicting content or parent"? |
174 | $ref->warnings[] = [ 'cite_error_references_duplicate_key', $name ]; |
175 | } |
176 | |
177 | $this->refCallStack[] = [ $action, $ref->key, $group, $name, $text, $argv ]; |
178 | return $ref; |
179 | } |
180 | |
181 | /** |
182 | * Undo the changes made by the last $count ref tags. This is used when we discover that the |
183 | * last few tags were actually inside of a references tag. |
184 | * |
185 | * @param int $count |
186 | * |
187 | * @return array[] Refs to restore under the correct context, as a list of [ $text, $argv ] |
188 | * @phan-return array<array{0:?string,1:array}> |
189 | */ |
190 | public function rollbackRefs( int $count ): array { |
191 | $redoStack = []; |
192 | while ( $count-- && $this->refCallStack ) { |
193 | $call = array_pop( $this->refCallStack ); |
194 | if ( $call ) { |
195 | // @phan-suppress-next-line PhanParamTooFewUnpack |
196 | $redoStack[] = $this->rollbackRef( ...$call ); |
197 | } |
198 | } |
199 | |
200 | // Drop unused rollbacks, this group is finished. |
201 | $this->refCallStack = []; |
202 | |
203 | return array_reverse( $redoStack ); |
204 | } |
205 | |
206 | /** |
207 | * Partially undoes the effect of calls to stack() |
208 | * |
209 | * The option to define <ref> within <references> makes the |
210 | * behavior of <ref> context dependent. This is normally fine |
211 | * but certain operations (especially #tag) lead to out-of-order |
212 | * parser evaluation with the <ref> tags being processed before |
213 | * their containing <reference> element is read. This leads to |
214 | * stack corruption that this function works to fix. |
215 | * |
216 | * This function is not a total rollback since some internal |
217 | * counters remain incremented. Doing so prevents accidentally |
218 | * corrupting certain links. |
219 | * |
220 | * @param string $action |
221 | * @param int $key Autoincrement counter for this ref. |
222 | * @param string $group |
223 | * @param ?string $name The name attribute passed in the ref tag. |
224 | * @param ?string $text |
225 | * @param array $argv |
226 | * |
227 | * @return array [ $text, $argv ] Ref redo item. |
228 | */ |
229 | private function rollbackRef( |
230 | string $action, |
231 | int $key, |
232 | string $group, |
233 | ?string $name, |
234 | ?string $text, |
235 | array $argv |
236 | ): array { |
237 | if ( !$this->hasGroup( $group ) ) { |
238 | throw new LogicException( "Cannot roll back ref with unknown group \"$group\"." ); |
239 | } |
240 | |
241 | $lookup = $name ?: null; |
242 | if ( $lookup === null ) { |
243 | // Find anonymous ref by key. |
244 | foreach ( $this->refs[$group] as $k => $v ) { |
245 | if ( $v->key === $key ) { |
246 | $lookup = $k; |
247 | break; |
248 | } |
249 | } |
250 | } |
251 | |
252 | // Obsessive sanity checks that the specified element exists. |
253 | if ( $lookup === null ) { |
254 | throw new LogicException( "Cannot roll back unknown ref by key $key." ); |
255 | } elseif ( !isset( $this->refs[$group][$lookup] ) ) { |
256 | throw new LogicException( "Cannot roll back missing named ref \"$lookup\"." ); |
257 | } elseif ( $this->refs[$group][$lookup]->key !== $key ) { |
258 | throw new LogicException( |
259 | "Cannot roll back corrupt named ref \"$lookup\" which should have had key $key." ); |
260 | } |
261 | $ref =& $this->refs[$group][$lookup]; |
262 | |
263 | switch ( $action ) { |
264 | case self::ACTION_NEW: |
265 | // Rollback the addition of new elements to the stack |
266 | unset( $this->refs[$group][$lookup] ); |
267 | if ( !$this->refs[$group] ) { |
268 | $this->popGroup( $group ); |
269 | } elseif ( isset( $this->groupRefSequence[$group] ) ) { |
270 | $this->groupRefSequence[$group]--; |
271 | } |
272 | if ( $ref->extends ) { |
273 | $this->refs[$group][$ref->extends]->extendsCount--; |
274 | } |
275 | break; |
276 | case self::ACTION_NEW_FROM_PLACEHOLDER: |
277 | $ref->placeholder = true; |
278 | $ref->count = 0; |
279 | break; |
280 | case self::ACTION_ASSIGN: |
281 | // Rollback assignment of text to pre-existing elements |
282 | $ref->text = null; |
283 | $ref->count--; |
284 | break; |
285 | case self::ACTION_INCREMENT: |
286 | // Rollback increase in named ref occurrences |
287 | $ref->count--; |
288 | break; |
289 | default: |
290 | throw new LogicException( "Unknown call stack action \"$action\"" ); |
291 | } |
292 | return [ $text, $argv ]; |
293 | } |
294 | |
295 | /** |
296 | * Clear state for a single group. |
297 | * |
298 | * @param string $group |
299 | * |
300 | * @return array<string|int,ReferenceStackItem> The references from the removed group |
301 | */ |
302 | public function popGroup( string $group ): array { |
303 | $refs = $this->getGroupRefs( $group ); |
304 | unset( $this->refs[$group] ); |
305 | unset( $this->groupRefSequence[$group] ); |
306 | return $refs; |
307 | } |
308 | |
309 | /** |
310 | * Returns true if the group exists and contains references. |
311 | */ |
312 | public function hasGroup( string $group ): bool { |
313 | return (bool)( $this->refs[$group] ?? false ); |
314 | } |
315 | |
316 | /** |
317 | * @return string[] List of group names that contain at least one reference |
318 | */ |
319 | public function getGroups(): array { |
320 | $groups = []; |
321 | foreach ( $this->refs as $group => $refs ) { |
322 | if ( $refs ) { |
323 | $groups[] = $group; |
324 | } |
325 | } |
326 | return $groups; |
327 | } |
328 | |
329 | /** |
330 | * Return all references for a group. |
331 | * |
332 | * @param string $group |
333 | * |
334 | * @return array<string|int,ReferenceStackItem> |
335 | */ |
336 | public function getGroupRefs( string $group ): array { |
337 | return $this->refs[$group] ?? []; |
338 | } |
339 | |
340 | private function resolveFollow( string $group, string $follow, string $text ): void { |
341 | $previousRef =& $this->refs[$group][$follow]; |
342 | $previousRef->text ??= ''; |
343 | $previousRef->text .= " $text"; |
344 | } |
345 | |
346 | public function listDefinedRef( string $group, string $name, string $text ): void { |
347 | $ref =& $this->refs[$group][$name]; |
348 | $ref ??= new ReferenceStackItem(); |
349 | $ref->placeholder = false; |
350 | if ( $ref->text === null ) { |
351 | $ref->text = $text; |
352 | } elseif ( $ref->text !== $text ) { |
353 | // two refs with same key and different content |
354 | $ref->warnings[] = [ 'cite_error_references_duplicate_key', $name ]; |
355 | } |
356 | } |
357 | |
358 | private function nextRefSequence(): int { |
359 | return ++$this->refSequence; |
360 | } |
361 | |
362 | } |