Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
0.00% |
0 / 215 |
|
0.00% |
0 / 14 |
CRAP | |
0.00% |
0 / 1 |
ListHandler | |
0.00% |
0 / 215 |
|
0.00% |
0 / 14 |
4556 | |
0.00% |
0 / 1 |
generateImpliedEndTags | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
__construct | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
2 | |||
reset | |
0.00% |
0 / 3 |
|
0.00% |
0 / 1 |
2 | |||
resetCurrListFrame | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
onTag | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
6 | |||
onAny | |
0.00% |
0 / 55 |
|
0.00% |
0 / 1 |
600 | |||
onEnd | |
0.00% |
0 / 9 |
|
0.00% |
0 / 1 |
6 | |||
closeLists | |
0.00% |
0 / 11 |
|
0.00% |
0 / 1 |
12 | |||
onListItem | |
0.00% |
0 / 14 |
|
0.00% |
0 / 1 |
30 | |||
commonPrefixLength | |
0.00% |
0 / 6 |
|
0.00% |
0 / 1 |
12 | |||
pushList | |
0.00% |
0 / 6 |
|
0.00% |
0 / 1 |
2 | |||
popTags | |
0.00% |
0 / 10 |
|
0.00% |
0 / 1 |
20 | |||
isDtDd | |
0.00% |
0 / 3 |
|
0.00% |
0 / 1 |
6 | |||
doListItem | |
0.00% |
0 / 93 |
|
0.00% |
0 / 1 |
306 |
1 | <?php |
2 | declare( strict_types = 1 ); |
3 | |
4 | namespace Wikimedia\Parsoid\Wt2Html\TT; |
5 | |
6 | use Wikimedia\Parsoid\NodeData\DataParsoid; |
7 | use Wikimedia\Parsoid\Tokens\EndTagTk; |
8 | use Wikimedia\Parsoid\Tokens\EOFTk; |
9 | use Wikimedia\Parsoid\Tokens\NlTk; |
10 | use Wikimedia\Parsoid\Tokens\SourceRange; |
11 | use Wikimedia\Parsoid\Tokens\TagTk; |
12 | use Wikimedia\Parsoid\Tokens\Token; |
13 | use Wikimedia\Parsoid\Utils\PHPUtils; |
14 | use Wikimedia\Parsoid\Utils\TokenUtils; |
15 | use Wikimedia\Parsoid\Wt2Html\TokenTransformManager; |
16 | |
17 | /** |
18 | * Create list tag around list items and map wiki bullet levels to html. |
19 | */ |
20 | class ListHandler extends TokenHandler { |
21 | /** @var array<ListFrame> */ |
22 | private array $listFrames = []; |
23 | /** @var ?ListFrame */ |
24 | private $currListFrame; |
25 | /** @var int */ |
26 | private $nestedTableCount; |
27 | /** |
28 | * Debug string output of bullet character mappings. |
29 | * @var array<string,array<string,string>> |
30 | */ |
31 | private static $bullet_chars_map = [ |
32 | '*' => [ 'list' => 'ul', 'item' => 'li' ], |
33 | '#' => [ 'list' => 'ol', 'item' => 'li' ], |
34 | ';' => [ 'list' => 'dl', 'item' => 'dt' ], |
35 | ':' => [ 'list' => 'dl', 'item' => 'dd' ] |
36 | ]; |
37 | |
38 | /** |
39 | * The HTML5 parsing spec says that when encountering a closing tag for a |
40 | * certain set of open tags we should generate implied ends to list items, |
41 | * https://html.spec.whatwg.org/multipage/parsing.html#parsing-main-inbody:generate-implied-end-tags-5 |
42 | * |
43 | * So, in order to roundtrip accurately, we should follow suit. However, |
44 | * we choose an ostensible superset of those tags, our wikitext blocks, to |
45 | * have this behaviour. Hopefully the differences aren't relevant. |
46 | * |
47 | * @param string $tagName |
48 | * @return bool |
49 | */ |
50 | private static function generateImpliedEndTags( string $tagName ): bool { |
51 | return TokenUtils::isWikitextBlockTag( $tagName ); |
52 | } |
53 | |
54 | /** |
55 | * @param TokenTransformManager $manager manager environment |
56 | * @param array $options options |
57 | */ |
58 | public function __construct( TokenTransformManager $manager, array $options ) { |
59 | parent::__construct( $manager, $options ); |
60 | $this->reset(); |
61 | } |
62 | |
63 | /** |
64 | * Resets the list handler |
65 | */ |
66 | private function reset(): void { |
67 | $this->onAnyEnabled = false; |
68 | $this->nestedTableCount = 0; |
69 | $this->resetCurrListFrame(); |
70 | } |
71 | |
72 | /** |
73 | * Resets the current list frame |
74 | */ |
75 | private function resetCurrListFrame(): void { |
76 | $this->currListFrame = null; |
77 | } |
78 | |
79 | /** |
80 | * @inheritDoc |
81 | */ |
82 | public function onTag( Token $token ): ?TokenHandlerResult { |
83 | return $token->getName() === 'listItem' ? $this->onListItem( $token ) : null; |
84 | } |
85 | |
86 | /** |
87 | * @inheritDoc |
88 | */ |
89 | public function onAny( $token ): ?TokenHandlerResult { |
90 | $this->env->log( 'trace/list', $this->pipelineId, |
91 | 'ANY:', static function () use ( $token ) { |
92 | return PHPUtils::jsonEncode( $token ); |
93 | } ); |
94 | $tokens = null; |
95 | |
96 | if ( !$this->currListFrame ) { |
97 | // this.currListFrame will be null only when we are in a table |
98 | // that in turn was seen in a list context. |
99 | // |
100 | // Since we are not in a list within the table, nothing to do. |
101 | // Just send the token back unchanged. |
102 | if ( $token instanceof EndTagTk && $token->getName() === 'table' ) { |
103 | if ( $this->nestedTableCount === 0 ) { |
104 | $this->currListFrame = array_pop( $this->listFrames ); |
105 | } else { |
106 | $this->nestedTableCount--; |
107 | } |
108 | } elseif ( $token instanceof TagTk && $token->getName() === 'table' ) { |
109 | $this->nestedTableCount++; |
110 | } |
111 | |
112 | $this->env->log( 'trace/list', $this->pipelineId, 'RET: ', $token ); |
113 | return null; |
114 | } |
115 | |
116 | // Keep track of open tags per list frame in order to prevent colons |
117 | // starting lists illegally. Php's findColonNoLinks. |
118 | if ( $token instanceof TagTk |
119 | // Table tokens will push the frame and remain balanced. |
120 | // They're safe to ignore in the bookkeeping. |
121 | && $token->getName() !== 'table' ) { |
122 | $this->currListFrame->numOpenTags += 1; |
123 | } elseif ( $token instanceof EndTagTk && $this->currListFrame->numOpenTags > 0 ) { |
124 | $this->currListFrame->numOpenTags -= 1; |
125 | } |
126 | |
127 | if ( $token instanceof EndTagTk ) { |
128 | if ( $token->getName() === 'table' ) { |
129 | // close all open lists and pop a frame |
130 | $ret = $this->closeLists( $token ); |
131 | $this->currListFrame = array_pop( $this->listFrames ); |
132 | return new TokenHandlerResult( $ret ); |
133 | } elseif ( self::generateImpliedEndTags( $token->getName() ) ) { |
134 | if ( $this->currListFrame->numOpenBlockTags === 0 ) { |
135 | // Unbalanced closing block tag in a list context ==> close all previous lists |
136 | return new TokenHandlerResult( $this->closeLists( $token ) ); |
137 | } else { |
138 | $this->currListFrame->numOpenBlockTags--; |
139 | if ( $this->currListFrame->atEOL ) { |
140 | // Non-list item in newline context ==> close all previous lists |
141 | return new TokenHandlerResult( $this->closeLists( $token ) ); |
142 | } else { |
143 | $this->env->log( 'trace/list', $this->pipelineId, 'RET: ', $token ); |
144 | return null; |
145 | } |
146 | } |
147 | } |
148 | |
149 | /* Non-block tag -- fall-through to other tests below */ |
150 | } |
151 | |
152 | if ( $this->currListFrame->atEOL ) { |
153 | if ( !$token instanceof NlTk && TokenUtils::isSolTransparent( $this->env, $token ) ) { |
154 | // Hold on to see where the token stream goes from here |
155 | // - another list item, or |
156 | // - end of list |
157 | if ( $this->currListFrame->nlTk ) { |
158 | $this->currListFrame->solTokens[] = $this->currListFrame->nlTk; |
159 | $this->currListFrame->nlTk = null; |
160 | } |
161 | $this->currListFrame->solTokens[] = $token; |
162 | return new TokenHandlerResult( [] ); |
163 | } else { |
164 | // Non-list item in newline context ==> close all previous lists |
165 | return new TokenHandlerResult( $this->closeLists( $token ) ); |
166 | } |
167 | } |
168 | |
169 | if ( $token instanceof NlTk ) { |
170 | $this->currListFrame->atEOL = true; |
171 | $this->currListFrame->nlTk = $token; |
172 | // php's findColonNoLinks is run in doBlockLevels, which examines |
173 | // the text line-by-line. At nltk, any open tags will cease having |
174 | // an effect. |
175 | $this->currListFrame->numOpenTags = 0; |
176 | return new TokenHandlerResult( [] ); |
177 | } |
178 | |
179 | if ( $token instanceof TagTk ) { |
180 | if ( $token->getName() === 'table' ) { |
181 | $this->listFrames[] = $this->currListFrame; |
182 | $this->resetCurrListFrame(); |
183 | } elseif ( self::generateImpliedEndTags( $token->getName() ) ) { |
184 | $this->currListFrame->numOpenBlockTags++; |
185 | } |
186 | $this->env->log( 'trace/list', $this->pipelineId, 'RET: ', $token ); |
187 | return null; |
188 | } |
189 | |
190 | // Nothing else left to do |
191 | $this->env->log( 'trace/list', $this->pipelineId, 'RET: ', $token ); |
192 | return null; |
193 | } |
194 | |
195 | /** |
196 | * @inheritDoc |
197 | */ |
198 | public function onEnd( EOFTk $token ): ?TokenHandlerResult { |
199 | $this->env->log( 'trace/list', $this->pipelineId, |
200 | 'END:', static function () use ( $token ) { return PHPUtils::jsonEncode( $token ); |
201 | } ); |
202 | |
203 | $this->listFrames = []; |
204 | if ( !$this->currListFrame ) { |
205 | // init here so we dont have to have a check in closeLists |
206 | // That way, if we get a null frame there, we know we have a bug. |
207 | $this->currListFrame = new ListFrame; |
208 | } |
209 | $toks = $this->closeLists( $token ); |
210 | $this->reset(); |
211 | return new TokenHandlerResult( $toks ); |
212 | } |
213 | |
214 | /** |
215 | * Handle close list processing |
216 | * |
217 | * @param Token|string $token |
218 | * @return array |
219 | */ |
220 | private function closeLists( $token ): array { |
221 | // pop all open list item tokens |
222 | $tokens = $this->popTags( count( $this->currListFrame->bstack ) ); |
223 | |
224 | // purge all stashed sol-tokens |
225 | PHPUtils::pushArray( $tokens, $this->currListFrame->solTokens ); |
226 | if ( $this->currListFrame->nlTk ) { |
227 | $tokens[] = $this->currListFrame->nlTk; |
228 | } |
229 | $tokens[] = $token; |
230 | |
231 | // remove any transform if we dont have any stashed list frames |
232 | if ( count( $this->listFrames ) === 0 ) { |
233 | $this->onAnyEnabled = false; |
234 | } |
235 | |
236 | $this->resetCurrListFrame(); |
237 | |
238 | $this->env->log( 'trace/list', $this->pipelineId, '----closing all lists----' ); |
239 | $this->env->log( 'trace/list', $this->pipelineId, 'RET: ', $tokens ); |
240 | |
241 | return $tokens; |
242 | } |
243 | |
244 | /** |
245 | * Handle a list item |
246 | * |
247 | * @param Token $token |
248 | * @return TokenHandlerResult|null |
249 | */ |
250 | private function onListItem( Token $token ): ?TokenHandlerResult { |
251 | if ( $token instanceof TagTk ) { |
252 | $this->onAnyEnabled = true; |
253 | if ( $this->currListFrame ) { |
254 | // Ignoring colons inside tags to prevent illegal overlapping. |
255 | // Attempts to mimic findColonNoLinks in the php parser. |
256 | $bullets = $token->getAttributeV( 'bullets' ); |
257 | if ( PHPUtils::lastItem( $bullets ) === ':' |
258 | && $this->currListFrame->numOpenTags > 0 |
259 | ) { |
260 | $this->env->log( 'trace/list', $this->pipelineId, 'RET: ', ':' ); |
261 | return new TokenHandlerResult( [ ':' ] ); |
262 | } |
263 | } else { |
264 | $this->currListFrame = new ListFrame; |
265 | } |
266 | // convert listItem to list and list item tokens |
267 | $res = $this->doListItem( $this->currListFrame->bstack, $token->getAttributeV( 'bullets' ), |
268 | $token ); |
269 | return new TokenHandlerResult( $res ); |
270 | } |
271 | |
272 | $this->env->log( 'trace/list', $this->pipelineId, 'RET: ', $token ); |
273 | return null; |
274 | } |
275 | |
276 | /** |
277 | * Determine the minimum common prefix length |
278 | * |
279 | * @param array $x |
280 | * @param array $y |
281 | * @return int |
282 | */ |
283 | private function commonPrefixLength( array $x, array $y ): int { |
284 | $minLength = min( count( $x ), count( $y ) ); |
285 | $i = 0; |
286 | for ( ; $i < $minLength; $i++ ) { |
287 | if ( $x[$i] !== $y[$i] ) { |
288 | break; |
289 | } |
290 | } |
291 | return $i; |
292 | } |
293 | |
294 | /** |
295 | * Push a list |
296 | * |
297 | * @param array $container |
298 | * @param DataParsoid $dp1 |
299 | * @param DataParsoid $dp2 |
300 | * @return array |
301 | */ |
302 | private function pushList( array $container, DataParsoid $dp1, DataParsoid $dp2 ): array { |
303 | $this->currListFrame->endtags[] = new EndTagTk( $container['list'] ); |
304 | $this->currListFrame->endtags[] = new EndTagTk( $container['item'] ); |
305 | |
306 | return [ |
307 | new TagTk( $container['list'], [], $dp1 ), |
308 | new TagTk( $container['item'], [], $dp2 ) |
309 | ]; |
310 | } |
311 | |
312 | /** |
313 | * Handle popping tags after processing |
314 | * |
315 | * @param int $n |
316 | * @return array |
317 | */ |
318 | private function popTags( int $n ): array { |
319 | $tokens = []; |
320 | |
321 | while ( $n > 0 ) { |
322 | // push list item.. |
323 | $temp = array_pop( $this->currListFrame->endtags ); |
324 | if ( !empty( $temp ) ) { |
325 | $tokens[] = $temp; |
326 | } |
327 | // and the list end tag |
328 | $temp = array_pop( $this->currListFrame->endtags ); |
329 | if ( !empty( $temp ) ) { |
330 | $tokens[] = $temp; |
331 | } |
332 | $n--; |
333 | } |
334 | return $tokens; |
335 | } |
336 | |
337 | /** |
338 | * Check for Dt Dd sequence |
339 | * |
340 | * @param string $a |
341 | * @param string $b |
342 | * @return bool |
343 | */ |
344 | private function isDtDd( string $a, string $b ): bool { |
345 | $ab = [ $a, $b ]; |
346 | sort( $ab ); |
347 | return ( $ab[0] === ':' && $ab[1] === ';' ); |
348 | } |
349 | |
350 | /** |
351 | * Handle do list item processing |
352 | * |
353 | * @param array $bs |
354 | * @param array $bn |
355 | * @param Token $token |
356 | * @return array |
357 | */ |
358 | private function doListItem( array $bs, array $bn, Token $token ): array { |
359 | $this->env->log( 'trace/list', $this->pipelineId, |
360 | 'BEGIN:', static function () use ( $token ) { return PHPUtils::jsonEncode( $token ); |
361 | } ); |
362 | |
363 | $prefixLen = $this->commonPrefixLength( $bs, $bn ); |
364 | $prefix = array_slice( $bn, 0, $prefixLen/*CHECK THIS*/ ); |
365 | $dp = $token->dataParsoid; |
366 | |
367 | $makeDP = static function ( $k, $j ) use ( $dp ) { |
368 | $newDP = $dp->clone(); |
369 | $tsr = $dp->tsr ?? null; |
370 | if ( $tsr ) { |
371 | $newDP->tsr = new SourceRange( |
372 | $tsr->start + $k, $tsr->start + $j |
373 | ); |
374 | } |
375 | return $newDP; |
376 | }; |
377 | |
378 | $this->currListFrame->bstack = $bn; |
379 | |
380 | $res = null; |
381 | $itemToken = null; |
382 | |
383 | // emit close tag tokens for closed lists |
384 | $this->env->log( 'trace/list', $this->pipelineId, static function () use ( $bs, $bn ) { |
385 | return ' bs: ' . PHPUtils::jsonEncode( $bs ) . '; bn: ' . PHPUtils::jsonEncode( $bn ); |
386 | } ); |
387 | |
388 | if ( count( $prefix ) === count( $bs ) && count( $bn ) === count( $bs ) ) { |
389 | $this->env->log( 'trace/list', $this->pipelineId, ' -> no nesting change' ); |
390 | |
391 | // same list item types and same nesting level |
392 | $itemToken = array_pop( $this->currListFrame->endtags ); |
393 | $this->currListFrame->endtags[] = new EndTagTk( $itemToken->getName() ); |
394 | $res = array_merge( [ $itemToken ], |
395 | $this->currListFrame->solTokens, |
396 | [ |
397 | // this list item gets all the bullets since this is |
398 | // a list item at the same level |
399 | // |
400 | // **a |
401 | // **b |
402 | $this->currListFrame->nlTk ?: '', |
403 | new TagTk( $itemToken->getName(), [], $makeDP( 0, count( $bn ) ) ) |
404 | ] |
405 | ); |
406 | } else { |
407 | $prefixCorrection = 0; |
408 | $tokens = []; |
409 | if ( count( $bs ) > $prefixLen |
410 | && count( $bn ) > $prefixLen |
411 | && $this->isDtDd( $bs[$prefixLen], $bn[$prefixLen] ) ) { |
412 | /* ------------------------------------------------ |
413 | * Handle dd/dt transitions |
414 | * |
415 | * Example: |
416 | * |
417 | * **;:: foo |
418 | * **::: bar |
419 | * |
420 | * the 3rd bullet is the dt-dd transition |
421 | * ------------------------------------------------ */ |
422 | |
423 | $tokens = $this->popTags( count( $bs ) - $prefixLen - 1 ); |
424 | $tokens = array_merge( $this->currListFrame->solTokens, $tokens ); |
425 | $newName = self::$bullet_chars_map[$bn[$prefixLen]]['item']; |
426 | $endTag = array_pop( $this->currListFrame->endtags ); |
427 | $this->currListFrame->endtags[] = new EndTagTk( $newName ); |
428 | |
429 | $newTag = null; |
430 | if ( isset( $dp->stx ) && $dp->stx === 'row' ) { |
431 | // stx='row' is only set for single-line dt-dd lists (see tokenizer) |
432 | // In this scenario, the dd token we are building a token for has no prefix |
433 | // Ex: ;a:b, *;a:b, #**;a:b, etc. Compare with *;a\n*:b, #**;a\n#**:b |
434 | $this->env->log( 'trace/list', $this->pipelineId, |
435 | ' -> single-line dt->dd transition' ); |
436 | $newTag = new TagTk( $newName, [], $makeDP( 0, 1 ) ); |
437 | } else { |
438 | $this->env->log( 'trace/list', $this->pipelineId, ' -> other dt/dd transition' ); |
439 | $newTag = new TagTk( $newName, [], $makeDP( 0, $prefixLen + 1 ) ); |
440 | } |
441 | |
442 | $tokens[] = $endTag; |
443 | $tokens[] = $this->currListFrame->nlTk ?: ''; |
444 | $tokens[] = $newTag; |
445 | |
446 | $prefixCorrection = 1; |
447 | } else { |
448 | $this->env->log( 'trace/list', $this->pipelineId, ' -> reduced nesting' ); |
449 | $tokens = array_merge( |
450 | $this->currListFrame->solTokens, |
451 | $tokens, |
452 | $this->popTags( count( $bs ) - $prefixLen ) |
453 | ); |
454 | if ( $this->currListFrame->nlTk ) { |
455 | $tokens[] = $this->currListFrame->nlTk; |
456 | } |
457 | if ( $prefixLen > 0 && count( $bn ) === $prefixLen ) { |
458 | $itemToken = array_pop( $this->currListFrame->endtags ); |
459 | $tokens[] = $itemToken; |
460 | // this list item gets all bullets upto the shared prefix |
461 | $tokens[] = new TagTk( $itemToken->getName(), [], $makeDP( 0, count( $bn ) ) ); |
462 | $this->currListFrame->endtags[] = new EndTagTk( $itemToken->getName() ); |
463 | } |
464 | } |
465 | |
466 | for ( $i = $prefixLen + $prefixCorrection; $i < count( $bn ); $i++ ) { |
467 | if ( !self::$bullet_chars_map[$bn[$i]] ) { |
468 | throw new \InvalidArgumentException( 'Unknown node prefix ' . $prefix[$i] ); |
469 | } |
470 | |
471 | // Each list item in the chain gets one bullet. |
472 | // However, the first item also includes the shared prefix. |
473 | // |
474 | // Example: |
475 | // |
476 | // **a |
477 | // ****b |
478 | // |
479 | // Yields: |
480 | // |
481 | // <ul><li-*> |
482 | // <ul><li-*>a |
483 | // <ul><li-FIRST-ONE-gets-***> |
484 | // <ul><li-*>b</li></ul> |
485 | // </li></ul> |
486 | // </li></ul> |
487 | // </li></ul> |
488 | // |
489 | // Unless prefixCorrection is > 0, in which case we've |
490 | // already accounted for the initial bullets. |
491 | // |
492 | // prefixCorrection is for handling dl-dts like this |
493 | // |
494 | // ;a:b |
495 | // ;;c:d |
496 | // |
497 | // ";c:d" is embedded within a dt that is 1 char wide(;) |
498 | |
499 | $listDP = null; |
500 | $listItemDP = null; |
501 | if ( $i === $prefixLen ) { |
502 | $this->env->log( 'trace/list', $this->pipelineId, |
503 | ' -> increased nesting: first' |
504 | ); |
505 | $listDP = $makeDP( 0, 0 ); |
506 | $listItemDP = $makeDP( 0, $i + 1 ); |
507 | } else { |
508 | $this->env->log( 'trace/list', $this->pipelineId, |
509 | ' -> increased nesting: 2nd and higher' |
510 | ); |
511 | $listDP = $makeDP( $i, $i ); |
512 | $listItemDP = $makeDP( $i, $i + 1 ); |
513 | } |
514 | |
515 | PHPUtils::pushArray( $tokens, $this->pushList( |
516 | self::$bullet_chars_map[$bn[$i]], $listDP, $listItemDP |
517 | ) ); |
518 | } |
519 | $res = $tokens; |
520 | } |
521 | |
522 | // clear out sol-tokens |
523 | $this->currListFrame->solTokens = []; |
524 | $this->currListFrame->nlTk = null; |
525 | $this->currListFrame->atEOL = false; |
526 | |
527 | $this->env->log( 'trace/list', $this->pipelineId, |
528 | 'RET:', static function () use ( $res ) { return PHPUtils::jsonEncode( $res ); |
529 | } ); |
530 | return $res; |
531 | } |
532 | } |