Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
0.00% |
0 / 217 |
|
0.00% |
0 / 14 |
CRAP | |
0.00% |
0 / 1 |
ParagraphWrapper | |
0.00% |
0 / 217 |
|
0.00% |
0 / 14 |
8010 | |
0.00% |
0 / 1 |
__construct | |
0.00% |
0 / 3 |
|
0.00% |
0 / 1 |
2 | |||
onNewline | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
6 | |||
onEnd | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
reset | |
0.00% |
0 / 7 |
|
0.00% |
0 / 1 |
2 | |||
resetBuffers | |
0.00% |
0 / 3 |
|
0.00% |
0 / 1 |
2 | |||
resetCurrLine | |
0.00% |
0 / 6 |
|
0.00% |
0 / 1 |
6 | |||
processBuffers | |
0.00% |
0 / 9 |
|
0.00% |
0 / 1 |
6 | |||
flushBuffers | |
0.00% |
0 / 11 |
|
0.00% |
0 / 1 |
2 | |||
processOneNlTk | |
0.00% |
0 / 7 |
|
0.00% |
0 / 1 |
12 | |||
openPTag | |
0.00% |
0 / 20 |
|
0.00% |
0 / 1 |
132 | |||
closeOpenPTag | |
0.00% |
0 / 19 |
|
0.00% |
0 / 1 |
132 | |||
onNewlineOrEOF | |
0.00% |
0 / 25 |
|
0.00% |
0 / 1 |
72 | |||
processPendingNLs | |
0.00% |
0 / 31 |
|
0.00% |
0 / 1 |
110 | |||
onAny | |
0.00% |
0 / 74 |
|
0.00% |
0 / 1 |
1260 |
1 | <?php |
2 | declare( strict_types = 1 ); |
3 | |
4 | namespace Wikimedia\Parsoid\Wt2Html\TT; |
5 | |
6 | use Wikimedia\Assert\Assert; |
7 | use Wikimedia\Assert\UnreachableException; |
8 | use Wikimedia\Parsoid\Tokens\CommentTk; |
9 | use Wikimedia\Parsoid\Tokens\EndTagTk; |
10 | use Wikimedia\Parsoid\Tokens\EOFTk; |
11 | use Wikimedia\Parsoid\Tokens\NlTk; |
12 | use Wikimedia\Parsoid\Tokens\SelfclosingTagTk; |
13 | use Wikimedia\Parsoid\Tokens\TagTk; |
14 | use Wikimedia\Parsoid\Tokens\Token; |
15 | use Wikimedia\Parsoid\Utils\PHPUtils; |
16 | use Wikimedia\Parsoid\Utils\TokenUtils; |
17 | use Wikimedia\Parsoid\Wikitext\Consts; |
18 | use Wikimedia\Parsoid\Wt2Html\TokenTransformManager; |
19 | |
20 | /** |
21 | * Insert paragraph tags where needed -- smartly and carefully |
22 | * -- there is much fun to be had mimicking "wikitext visual newlines" |
23 | * behavior as implemented by the PHP parser. |
24 | */ |
25 | class ParagraphWrapper extends TokenHandler { |
26 | |
27 | private bool $inPre = false; |
28 | private bool $hasOpenPTag = false; |
29 | private bool $inBlockElem = false; |
30 | private bool $inBlockquote = false; |
31 | |
32 | /** |
33 | * The state machine in the PreHandler is line based and only suppresses |
34 | * indent-pres when encountering blocks on a line. However, the legacy |
35 | * parser's `doBlockLevels` has a concept of being "$inBlockElem", which |
36 | * is mimicked here. Rather than replicate that awareness in both passes, |
37 | * we piggyback on it here to undo indent-pres when they're found to be |
38 | * undesirable. |
39 | */ |
40 | private bool $undoIndentPre = false; |
41 | private array $tokenBuffer = []; |
42 | private array $nlWsTokens = []; |
43 | private int $newLineCount = 0; |
44 | |
45 | /** @var array */ |
46 | private $currLineTokens = []; |
47 | /** @var bool */ |
48 | private $currLineHasWrappableTokens = false; |
49 | /** @var bool */ |
50 | private $currLineBlockTagSeen = false; |
51 | /** @var bool */ |
52 | private $currLineBlockTagOpen = false; |
53 | |
54 | /** |
55 | * Constructor for paragraph wrapper. |
56 | * @param TokenTransformManager $manager manager enviroment |
57 | * @param array $options various configuration options |
58 | */ |
59 | public function __construct( TokenTransformManager $manager, array $options ) { |
60 | parent::__construct( $manager, $options ); |
61 | // Disable p-wrapper |
62 | $this->disabled = !empty( $this->options['inlineContext'] ); |
63 | $this->reset(); |
64 | } |
65 | |
66 | /** |
67 | * @inheritDoc |
68 | */ |
69 | public function onNewline( NlTk $token ): ?TokenHandlerResult { |
70 | return $this->inPre ? null : $this->onNewlineOrEOF( $token ); |
71 | } |
72 | |
73 | /** |
74 | * @inheritDoc |
75 | */ |
76 | public function onEnd( EOFTk $token ): ?TokenHandlerResult { |
77 | return $this->onNewlineOrEOF( $token ); |
78 | } |
79 | |
80 | /** |
81 | * Reset the token buffer and related info |
82 | * This is the ordering of buffered tokens and how they should get emitted: |
83 | * |
84 | * token-buffer (from previous lines if newLineCount > 0) |
85 | * newline-ws-tokens (buffered nl+sol-transparent tokens since last non-nl-token) |
86 | * current-line-tokens (all tokens after newline-ws-tokens) |
87 | * |
88 | * newline-token-count is > 0 only when we encounter multiple "empty lines". |
89 | * |
90 | * Periodically, when it is clear where an open/close p-tag is required, the buffers |
91 | * are collapsed and emitted. Wherever tokens are buffered/emitted, verify that this |
92 | * order is preserved. |
93 | */ |
94 | private function reset(): void { |
95 | $this->resetBuffers(); |
96 | $this->resetCurrLine(); |
97 | $this->hasOpenPTag = false; |
98 | $this->inPre = false; |
99 | $this->undoIndentPre = false; |
100 | // NOTE: This flag is the local equivalent of what we're mimicking with |
101 | // the 'inlineContext' pipeline option. |
102 | $this->inBlockElem = false; |
103 | $this->inBlockquote = false; |
104 | } |
105 | |
106 | /** |
107 | * Reset the token buffer and new line info |
108 | * |
109 | */ |
110 | private function resetBuffers(): void { |
111 | $this->tokenBuffer = []; |
112 | $this->nlWsTokens = []; |
113 | $this->newLineCount = 0; |
114 | } |
115 | |
116 | /** |
117 | * Reset the current line info |
118 | * |
119 | */ |
120 | private function resetCurrLine(): void { |
121 | if ( $this->currLineBlockTagSeen ) { |
122 | $this->inBlockElem = $this->currLineBlockTagOpen; |
123 | } |
124 | $this->currLineTokens = []; |
125 | $this->currLineHasWrappableTokens = false; |
126 | $this->currLineBlockTagSeen = false; |
127 | $this->currLineBlockTagOpen = false; |
128 | } |
129 | |
130 | /** |
131 | * Process the current buffer contents and token provided |
132 | * |
133 | * @param Token|string $token token |
134 | * @param bool $flushCurrentLine option to flush current line or preserve it |
135 | * @return array |
136 | */ |
137 | private function processBuffers( $token, bool $flushCurrentLine ): array { |
138 | $res = $this->processPendingNLs(); |
139 | $this->currLineTokens[] = $token; |
140 | if ( $flushCurrentLine ) { |
141 | PHPUtils::pushArray( $res, $this->currLineTokens ); |
142 | $this->resetCurrLine(); |
143 | } |
144 | $this->env->log( 'trace/p-wrap', $this->pipelineId, '----> ', static function () use( $res ) { |
145 | return PHPUtils::jsonEncode( $res ); |
146 | } ); |
147 | return $res; |
148 | } |
149 | |
150 | /** |
151 | * Process and flush existing buffer contents |
152 | * |
153 | * @param Token|string $token token |
154 | * @return array |
155 | */ |
156 | private function flushBuffers( $token ): array { |
157 | Assert::invariant( $this->newLineCount === 0, "PWrap: Trying to flush buffers with pending newlines" ); |
158 | |
159 | $this->currLineTokens[] = $token; |
160 | // Juggle the array reference count to allow us to append to it without |
161 | // copying the array |
162 | $resToks = $this->tokenBuffer; |
163 | $nlWsTokens = $this->nlWsTokens; |
164 | $this->resetBuffers(); |
165 | PHPUtils::pushArray( $resToks, $nlWsTokens ); |
166 | $this->env->log( 'trace/p-wrap', $this->pipelineId, '----> ', |
167 | static function () use( $resToks ) { |
168 | return PHPUtils::jsonEncode( $resToks ); |
169 | } ); |
170 | return $resToks; |
171 | } |
172 | |
173 | /** |
174 | * Append tokens from the newline/whitespace buffer to the output array |
175 | * until a newline is encountered. Increment the offset reference. Return |
176 | * the newline token. |
177 | * |
178 | * @param array &$out array to append to |
179 | * @param int &$offset The offset reference to update |
180 | * @return Token |
181 | */ |
182 | public function processOneNlTk( array &$out, &$offset ) { |
183 | $n = count( $this->nlWsTokens ); |
184 | while ( $offset < $n ) { |
185 | $t = $this->nlWsTokens[$offset++]; |
186 | if ( $t instanceof NlTk ) { |
187 | return $t; |
188 | } else { |
189 | $out[] = $t; |
190 | } |
191 | } |
192 | throw new UnreachableException( 'nlWsTokens was expected to contain an NlTk.' ); |
193 | } |
194 | |
195 | /** |
196 | * Search for the opening paragraph tag |
197 | * |
198 | * @param array &$out array to process and update |
199 | */ |
200 | private function openPTag( array &$out ): void { |
201 | if ( !$this->hasOpenPTag ) { |
202 | $tplStartIndex = -1; |
203 | // Be careful not to expand template ranges unnecessarily. |
204 | // Look for open markers before starting a p-tag. |
205 | $countOut = count( $out ); |
206 | for ( $i = 0; $i < $countOut; $i++ ) { |
207 | $t = $out[$i]; |
208 | if ( !is_string( $t ) && $t->getName() === 'meta' ) { |
209 | if ( TokenUtils::hasTypeOf( $t, 'mw:Transclusion' ) ) { |
210 | // We hit a start tag and everything before it is sol-transparent. |
211 | $tplStartIndex = $i; |
212 | continue; |
213 | } elseif ( TokenUtils::matchTypeOf( $t, '#^mw:Transclusion/#' ) ) { |
214 | // End tag. All tokens before this are sol-transparent. |
215 | // Let us leave them all out of the p-wrapping. |
216 | $tplStartIndex = -1; |
217 | continue; |
218 | } elseif ( TokenUtils::isAnnotationStartToken( $t ) ) { |
219 | break; |
220 | } |
221 | } |
222 | // Not a transclusion meta; Check for nl/sol-transparent tokens |
223 | // and leave them out of the p-wrapping. |
224 | if ( !TokenUtils::isSolTransparent( $this->env, $t ) && !( $t instanceof NlTk ) ) { |
225 | break; |
226 | } |
227 | } |
228 | if ( $tplStartIndex > -1 ) { |
229 | $i = $tplStartIndex; |
230 | } |
231 | array_splice( $out, $i, 0, [ new TagTk( 'p' ) ] ); |
232 | $this->hasOpenPTag = true; |
233 | } |
234 | } |
235 | |
236 | /** |
237 | * Search for the closing paragraph tag |
238 | * |
239 | * @param array &$out array to process and update |
240 | */ |
241 | private function closeOpenPTag( array &$out ): void { |
242 | if ( $this->hasOpenPTag ) { |
243 | $tplEndIndex = -1; |
244 | // Be careful not to expand template ranges unnecessarily. |
245 | // Look for open markers before closing. |
246 | for ( $i = count( $out ) - 1; $i > -1; $i-- ) { |
247 | $t = $out[$i]; |
248 | if ( !is_string( $t ) && $t->getName() === 'meta' ) { |
249 | if ( TokenUtils::hasTypeOf( $t, 'mw:Transclusion' ) ) { |
250 | // We hit a start tag and everything after it is sol-transparent. |
251 | // Don't include the sol-transparent tags OR the start tag. |
252 | $tplEndIndex = -1; |
253 | continue; |
254 | } elseif ( TokenUtils::matchTypeOf( $t, '#^mw:Transclusion/#' ) ) { |
255 | // End tag. The rest of the tags past this are sol-transparent. |
256 | // Let us leave them all out of the p-wrapping. |
257 | $tplEndIndex = $i; |
258 | continue; |
259 | } elseif ( TokenUtils::isAnnotationEndToken( $t ) ) { |
260 | break; |
261 | } |
262 | } |
263 | // Not a transclusion meta; Check for nl/sol-transparent tokens |
264 | // and leave them out of the p-wrapping. |
265 | if ( !TokenUtils::isSolTransparent( $this->env, $t ) && !( $t instanceof NlTk ) ) { |
266 | break; |
267 | } |
268 | } |
269 | if ( $tplEndIndex > -1 ) { |
270 | $i = $tplEndIndex; |
271 | } |
272 | array_splice( $out, $i + 1, 0, [ new EndTagTk( 'p' ) ] ); |
273 | $this->hasOpenPTag = false; |
274 | } |
275 | } |
276 | |
277 | /** |
278 | * Handle newline tokens |
279 | * |
280 | * @param Token $token token |
281 | * @return TokenHandlerResult |
282 | */ |
283 | private function onNewlineOrEOF( Token $token ): TokenHandlerResult { |
284 | $this->env->log( 'trace/p-wrap', $this->pipelineId, 'NL |', |
285 | static function () use( $token ) { |
286 | return PHPUtils::jsonEncode( $token ); |
287 | } ); |
288 | if ( $this->currLineBlockTagSeen ) { |
289 | $this->closeOpenPTag( $this->currLineTokens ); |
290 | } elseif ( !$this->inBlockElem && !$this->hasOpenPTag && $this->currLineHasWrappableTokens ) { |
291 | $this->openPTag( $this->currLineTokens ); |
292 | } |
293 | |
294 | // Assertion to catch bugs in p-wrapping; both cannot be true. |
295 | if ( $this->newLineCount > 0 && count( $this->currLineTokens ) > 0 ) { |
296 | $this->env->log( 'error/p-wrap', 'Failed assertion in onNewlineOrEOF: newline-count:', |
297 | $this->newLineCount, '; current line tokens: ', PHPUtils::jsonEncode( $this->currLineTokens ) ); |
298 | } |
299 | |
300 | PHPUtils::pushArray( $this->tokenBuffer, $this->currLineTokens ); |
301 | |
302 | if ( $token instanceof EOFTk ) { |
303 | $this->nlWsTokens[] = $token; |
304 | $this->closeOpenPTag( $this->tokenBuffer ); |
305 | $res = $this->processPendingNLs(); |
306 | $this->reset(); |
307 | $this->env->log( 'trace/p-wrap', $this->pipelineId, '----> ', static function () use( $res ) { |
308 | return PHPUtils::jsonEncode( $res ); |
309 | } ); |
310 | return new TokenHandlerResult( $res, true ); |
311 | } else { |
312 | $this->resetCurrLine(); |
313 | $this->newLineCount++; |
314 | $this->nlWsTokens[] = $token; |
315 | return new TokenHandlerResult( [] ); |
316 | } |
317 | } |
318 | |
319 | /** |
320 | * Process pending newlines |
321 | * |
322 | * @return array |
323 | */ |
324 | private function processPendingNLs(): array { |
325 | $resToks = $this->tokenBuffer; |
326 | $newLineCount = $this->newLineCount; |
327 | $nlTk = null; |
328 | $nlOffset = 0; |
329 | |
330 | $this->env->log( 'trace/p-wrap', $this->pipelineId, ' NL-count: ', |
331 | $newLineCount ); |
332 | |
333 | if ( $newLineCount >= 2 && !$this->inBlockElem ) { |
334 | $this->closeOpenPTag( $resToks ); |
335 | |
336 | // First is emitted as a literal newline |
337 | $resToks[] = $this->processOneNlTk( $resToks, $nlOffset ); |
338 | $newLineCount -= 1; |
339 | |
340 | $remainder = $newLineCount % 2; |
341 | |
342 | while ( $newLineCount > 0 ) { |
343 | $nlTk = $this->processOneNlTk( $resToks, $nlOffset ); |
344 | if ( $newLineCount % 2 === $remainder ) { |
345 | if ( $this->hasOpenPTag ) { |
346 | $resToks[] = new EndTagTk( 'p' ); |
347 | $this->hasOpenPTag = false; |
348 | } |
349 | if ( $newLineCount > 1 ) { |
350 | $resToks[] = new TagTk( 'p' ); |
351 | $this->hasOpenPTag = true; |
352 | } |
353 | } else { |
354 | $resToks[] = new SelfclosingTagTk( 'br' ); |
355 | } |
356 | $resToks[] = $nlTk; |
357 | $newLineCount -= 1; |
358 | } |
359 | } |
360 | |
361 | if ( $this->currLineBlockTagSeen ) { |
362 | $this->closeOpenPTag( $resToks ); |
363 | if ( $newLineCount === 1 ) { |
364 | $resToks[] = $this->processOneNlTk( $resToks, $nlOffset ); |
365 | } |
366 | } |
367 | |
368 | // Gather remaining ws and nl tokens |
369 | for ( $i = $nlOffset; $i < count( $this->nlWsTokens ); $i++ ) { |
370 | $resToks[] = $this->nlWsTokens[$i]; |
371 | } |
372 | |
373 | // reset buffers |
374 | $this->resetBuffers(); |
375 | |
376 | return $resToks; |
377 | } |
378 | |
379 | /** |
380 | * @inheritDoc |
381 | */ |
382 | public function onAny( $token ): ?TokenHandlerResult { |
383 | $this->env->log( 'trace/p-wrap', $this->pipelineId, 'ANY |', |
384 | static function () use( $token ) { |
385 | return PHPUtils::jsonEncode( $token ); |
386 | } ); |
387 | $res = null; |
388 | if ( $token instanceof TagTk && $token->getName() === 'pre' |
389 | && !TokenUtils::isHTMLTag( $token ) |
390 | ) { |
391 | if ( $this->inBlockElem || $this->inBlockquote ) { |
392 | $this->undoIndentPre = true; |
393 | if ( $this->newLineCount === 0 ) { |
394 | return new TokenHandlerResult( $this->flushBuffers( '' ) ); |
395 | } else { |
396 | return new TokenHandlerResult( [] ); |
397 | } |
398 | } else { |
399 | $this->inPre = true; |
400 | // This will put us `inBlockElem`, so we need the extra `!inPre` |
401 | // condition below. Presumably, we couldn't have entered |
402 | // `inBlockElem` while being `inPre`. Alternatively, we could say |
403 | // that indent-pre is "never suppressing" and set the `blockTagOpen` |
404 | // flag to false. The point of all this is that we want to close |
405 | // any open p-tags. |
406 | $this->currLineBlockTagSeen = true; |
407 | $this->currLineBlockTagOpen = true; |
408 | // skip ensures this doesn't hit the AnyHandler |
409 | return new TokenHandlerResult( $this->processBuffers( $token, true ) ); |
410 | } |
411 | } elseif ( $token instanceof EndTagTk && $token->getName() === 'pre' && |
412 | !TokenUtils::isHTMLTag( $token ) |
413 | ) { |
414 | if ( ( $this->inBlockElem && !$this->inPre ) || $this->inBlockquote ) { |
415 | $this->undoIndentPre = false; |
416 | // No pre-tokens inside block tags -- swallow it. |
417 | return new TokenHandlerResult( [] ); |
418 | } else { |
419 | $this->inPre = false; |
420 | $this->currLineBlockTagSeen = true; |
421 | $this->currLineBlockTagOpen = false; |
422 | $this->env->log( 'trace/p-wrap', $this->pipelineId, '----> ', |
423 | static function () use( $token ) { |
424 | return PHPUtils::jsonEncode( $token ); |
425 | } ); |
426 | $res = [ $token ]; |
427 | return new TokenHandlerResult( $res ); |
428 | } |
429 | } elseif ( $token instanceof EOFTk || $this->inPre ) { |
430 | $this->env->log( 'trace/p-wrap', $this->pipelineId, '----> ', |
431 | static function () use( $token ) { |
432 | return PHPUtils::jsonEncode( $token ); |
433 | } |
434 | ); |
435 | $res = [ $token ]; |
436 | return new TokenHandlerResult( $res ); |
437 | } elseif ( |
438 | $token instanceof CommentTk |
439 | || ( is_string( $token ) && preg_match( '/^[\t ]*$/D', $token ) ) |
440 | || TokenUtils::isEmptyLineMetaToken( $token ) |
441 | ) { |
442 | if ( $this->newLineCount === 0 ) { |
443 | // Since we have no pending newlines to trip us up, |
444 | // no need to buffer -- just flush everything |
445 | return new TokenHandlerResult( $this->flushBuffers( $token ) ); |
446 | } else { |
447 | // We are in buffering mode waiting till we are ready to |
448 | // process pending newlines. |
449 | $this->nlWsTokens[] = $token; |
450 | return new TokenHandlerResult( [] ); |
451 | } |
452 | } elseif ( |
453 | // T186965: <style> behaves similarly to sol transparent tokens in |
454 | // that it doesn't open/close paragraphs, but also doesn't induce |
455 | // a new paragraph by itself. |
456 | TokenUtils::isSolTransparent( $this->env, $token ) || |
457 | ( !is_string( $token ) && $token->getName() === 'style' ) |
458 | ) { |
459 | if ( $this->undoIndentPre && PreHandler::isIndentPreWS( $token ) ) { |
460 | $this->nlWsTokens[] = ' '; |
461 | return new TokenHandlerResult( [] ); |
462 | } elseif ( $this->newLineCount === 0 ) { |
463 | // Since we have no pending newlines to trip us up, |
464 | // no need to buffer -- just flush everything |
465 | return new TokenHandlerResult( $this->flushBuffers( $token ) ); |
466 | } elseif ( $this->newLineCount === 1 ) { |
467 | // Swallow newline, whitespace, comments, and the current line |
468 | PHPUtils::pushArray( $this->tokenBuffer, $this->nlWsTokens ); |
469 | PHPUtils::pushArray( $this->tokenBuffer, $this->currLineTokens ); |
470 | $this->newLineCount = 0; |
471 | $this->nlWsTokens = []; |
472 | $this->resetCurrLine(); |
473 | |
474 | // But, don't process the new token yet. |
475 | $this->currLineTokens[] = $token; |
476 | return new TokenHandlerResult( [] ); |
477 | } else { |
478 | return new TokenHandlerResult( $this->processBuffers( $token, false ) ); |
479 | } |
480 | } else { |
481 | if ( !is_string( $token ) ) { |
482 | $name = $token->getName(); |
483 | if ( isset( Consts::$wikitextBlockElems[$name] ) ) { |
484 | $this->currLineBlockTagSeen = true; |
485 | $this->currLineBlockTagOpen = true; |
486 | if ( |
487 | ( isset( Consts::$blockElems[$name] ) && $token instanceof EndTagTk ) || |
488 | ( isset( Consts::$antiBlockElems[$name] ) && !$token instanceof EndTagTk ) || |
489 | isset( Consts::$neverBlockElems[$name] ) |
490 | ) { |
491 | $this->currLineBlockTagOpen = false; |
492 | } |
493 | } |
494 | if ( $name === 'blockquote' ) { |
495 | $this->inBlockquote = !( $token instanceof EndTagTk ); |
496 | } |
497 | } |
498 | $this->currLineHasWrappableTokens = true; |
499 | return new TokenHandlerResult( $this->processBuffers( $token, false ) ); |
500 | } |
501 | } |
502 | } |