Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
0.00% |
0 / 200 |
|
0.00% |
0 / 16 |
CRAP | |
0.00% |
0 / 1 |
PreHandler | |
0.00% |
0 / 200 |
|
0.00% |
0 / 16 |
5256 | |
0.00% |
0 / 1 |
stateStr | |
0.00% |
0 / 8 |
|
0.00% |
0 / 1 |
2 | |||
newIndentPreWS | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
isIndentPreWS | |
0.00% |
0 / 5 |
|
0.00% |
0 / 1 |
12 | |||
__construct | |
0.00% |
0 / 5 |
|
0.00% |
0 / 1 |
6 | |||
resetState | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
reset | |
0.00% |
0 / 6 |
|
0.00% |
0 / 1 |
2 | |||
moveToIgnoreState | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
2 | |||
genPre | |
0.00% |
0 / 27 |
|
0.00% |
0 / 1 |
132 | |||
processCurrLine | |
0.00% |
0 / 8 |
|
0.00% |
0 / 1 |
30 | |||
purgeBuffers | |
0.00% |
0 / 4 |
|
0.00% |
0 / 1 |
2 | |||
discardCurrLinePre | |
0.00% |
0 / 3 |
|
0.00% |
0 / 1 |
2 | |||
initPreTSR | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
2 | |||
onNewline | |
0.00% |
0 / 33 |
|
0.00% |
0 / 1 |
72 | |||
onEnd | |
0.00% |
0 / 26 |
|
0.00% |
0 / 1 |
72 | |||
getUpdatedPreTSR | |
0.00% |
0 / 8 |
|
0.00% |
0 / 1 |
42 | |||
onAny | |
0.00% |
0 / 61 |
|
0.00% |
0 / 1 |
462 |
1 | <?php |
2 | declare( strict_types = 1 ); |
3 | |
4 | namespace Wikimedia\Parsoid\Wt2Html\TT; |
5 | |
6 | use Wikimedia\Parsoid\DOM\Node; |
7 | use Wikimedia\Parsoid\NodeData\DataParsoid; |
8 | use Wikimedia\Parsoid\Tokens\CommentTk; |
9 | use Wikimedia\Parsoid\Tokens\EndTagTk; |
10 | use Wikimedia\Parsoid\Tokens\EOFTk; |
11 | use Wikimedia\Parsoid\Tokens\KV; |
12 | use Wikimedia\Parsoid\Tokens\NlTk; |
13 | use Wikimedia\Parsoid\Tokens\SelfclosingTagTk; |
14 | use Wikimedia\Parsoid\Tokens\SourceRange; |
15 | use Wikimedia\Parsoid\Tokens\TagTk; |
16 | use Wikimedia\Parsoid\Tokens\Token; |
17 | use Wikimedia\Parsoid\Utils\DOMUtils; |
18 | use Wikimedia\Parsoid\Utils\PHPUtils; |
19 | use Wikimedia\Parsoid\Utils\TokenUtils; |
20 | use Wikimedia\Parsoid\Utils\WTUtils; |
21 | use Wikimedia\Parsoid\Wt2Html\TokenTransformManager; |
22 | |
23 | /** |
24 | * PRE-handling relies on the following 6-state FSM. |
25 | * |
26 | * States |
27 | * ------ |
28 | * ``` |
29 | * SOL -- start-of-line |
30 | * (white-space, comments, meta-tags are all SOL transparent) |
31 | * The FSM always starts in this state. |
32 | * PRE -- we might need a pre-block |
33 | * (if we enter the PRE_COLLECT state) |
34 | * PRE_COLLECT -- we will need to generate a pre-block and are collecting |
35 | * content for it. |
36 | * SOL_AFTER_PRE -- we might need to extend the pre-block to multiple lines. |
37 | * (depending on whether we see a white-space tok or not) |
38 | * MULTILINE_PRE -- We will wrap one or more previous lines with <pre> |
39 | * This line could be part of that pre if we enter PRE_COLLECT state |
40 | * IGNORE -- nothing to do for the rest of the line. |
41 | * ``` |
42 | * |
43 | * Action helpers |
44 | * -------------- |
45 | * |
46 | * genPre : return merge("<pre>$TOKS</pre>" while skipping sol-tr toks, sol-tr toks) |
47 | * processCurrLine : $TOKS += $PRE_TOKS; $PRE_TOKS = []; |
48 | * purgeBuffers : convert meta token to ' '; processCurrLine; RET = $TOKS; $TOKS = []; return RET |
49 | * discardCurrLinePre : return merge(genPre, purgeBuffers) |
50 | * |
51 | * Transitions |
52 | * ----------- |
53 | * |
54 | * ``` |
55 | * + --------------+-----------------+---------------+-------------------------+ |
56 | * | Start state | Token | End state | Action | |
57 | * + --------------+-----------------+---------------+-------------------------+ |
58 | * | SOL | --- nl --> | SOL | purgeBuffers | |
59 | * | SOL | --- eof --> | --- | purgeBuffers | |
60 | * | SOL | --- sol-tr --> | SOL | TOKS << tok | |
61 | * | SOL | --- ws --> | PRE | PRE_TOKS = [ wsTok(#) ] | |
62 | * | SOL | --- other --> | IGNORE | purgeBuffers | |
63 | * + --------------+-----------------+---------------+-------------------------+ |
64 | * | PRE | --- nl --> | SOL | purgeBuffers | |
65 | * | PRE | --- eof --> | --- | purgeBuffers | |
66 | * | PRE | --- sol-tr --> | PRE | PRE_TOKS << tok | |
67 | * | PRE | --- blk tag --> | IGNORE | purgeBuffers | |
68 | * | PRE | --- other --> | PRE_COLLECT | PRE_TOKS << tok | |
69 | * + --------------+-----------------+---------------+-------------------------+ |
70 | * | PRE_COLLECT | --- nl --> | SOL_AFTER_PRE | processCurrLine | |
71 | * | PRE_COLLECT | --- eof --> | --- | processCurrLine; genPre | |
72 | * | PRE_COLLECT | --- blk tag --> | IGNORE | discardCurrLinePre | |
73 | * | PRE_COLLECT | --- other --> | PRE_COLLECT | PRE_TOKS << tok | |
74 | * + --------------+-----------------+---------------+-------------------------+ |
75 | * | SOL_AFTER_PRE | --- nl --> | SOL | discardCurrLinePre | |
76 | * | SOL_AFTER_PRE | --- eof --> | --- | discardCurrLinePre | |
77 | * | SOL_AFTER_PRE | --- sol-tr --> | SOL_AFTER_PRE | PRE_TOKS << tok | |
78 | * | SOL_AFTER_PRE | --- ws --> | MULTILINE_PRE | PRE_TOKS << wsTok(#) | |
79 | * | SOL_AFTER_PRE | --- other --> | IGNORE | discardCurrLinePre | |
80 | * + --------------+-----------------+---------------+-------------------------+ |
81 | * | MULTILINE_PRE | --- nl --> | SOL_AFTER_PRE | processCurrLine | |
82 | * | MULTILINE_PRE | --- eof --> | --- | discardCurrLinePre | |
83 | * | MULTILINE_PRE | --- sol-tr --> | SOL_AFTER_PRE | PRE_TOKS << tok | |
84 | * | MULTILINE_PRE | --- blk tag --> | IGNORE | discardCurrLinePre | |
85 | * | MULTILINE_PRE | --- other --> | PRE_COLLECT | PRE_TOKS << tok | |
86 | * + --------------+-----------------+---------------+-------------------------+ |
87 | * | IGNORE | --- eof --> | --- | purgeBuffers | |
88 | * | IGNORE | --- nl --> | SOL | purgeBuffers | |
89 | * + --------------+-----------------+---------------+-------------------------+ |
90 | * |
91 | * # In these states, we assume that the whitespace char is split off from the |
92 | * the rest of the string. |
93 | * ``` |
94 | */ |
95 | class PreHandler extends TokenHandler { |
96 | // FSM states |
97 | private const STATE_SOL = 1; |
98 | private const STATE_PRE = 2; |
99 | private const STATE_PRE_COLLECT = 3; |
100 | private const STATE_SOL_AFTER_PRE = 4; |
101 | private const STATE_MULTILINE_PRE = 5; |
102 | private const STATE_IGNORE = 6; |
103 | |
104 | /** @var int */ |
105 | private $state; |
106 | /** @var int */ |
107 | private $preTSR; |
108 | /** @var array<Token|string> */ |
109 | private $tokens; |
110 | /** @var array<Token|string> */ |
111 | private $currLinePreToks; |
112 | /** @var int index of the whitespace token in $currLinePreToks */ |
113 | private $wsTkIndex; |
114 | |
115 | /** |
116 | * debug string output of FSM states |
117 | * @return array |
118 | */ |
119 | private static function stateStr(): array { |
120 | return [ |
121 | 1 => 'sol ', |
122 | 2 => 'pre ', |
123 | 3 => 'pre_collect ', |
124 | 4 => 'sol_after_pre', |
125 | 5 => 'multiline_pre', |
126 | 6 => 'ignore ' |
127 | ]; |
128 | } |
129 | |
130 | /** |
131 | * Create a token to represent the indent-pre whitespace character. |
132 | * |
133 | * Notes about choice of token representation |
134 | * ------------------------------------------- |
135 | * This token will not make it to the final output and is only present to ensure |
136 | * DSR computation can account for this whitespace character. This meta tag will |
137 | * be removed in CleanUp::stripMarkerMetas(). |
138 | * |
139 | * Given that this token is purely an internal bookkeeping placeholder, |
140 | * it really does not matter how we represent it as long as |
141 | * (a) it doesn't impede code comprehension |
142 | * (b) it is more or less consistent with how other instances of this token behave |
143 | * (c) it doesn't introduce a lot of special-case handling and checks to deal with it. |
144 | * |
145 | * Based on that consideration, we settle for a meta tag because meta tags are transparent |
146 | * to most token and DOM handlers. |
147 | * |
148 | * Notes about DSR computation |
149 | * --------------------------- |
150 | * Once we are done with all DOM processing, we expect indent-pre <pre> tags to have |
151 | * DSR that looks like [ _, _, 1, 0 ], i.e. it has an opening tag width of 1 char and |
152 | * closing tag width of 0 char. But, since we are now explicitly representing the ws char |
153 | * as a meta-tag, we <pre> tag will not get a 1-char width during DSR computation since |
154 | * this meta-tag will consume that width. Accordingly, once we strip this meta-tag in the |
155 | * cleanup pass, we will reassign its width to the opening tag width of the <pre> tag. |
156 | * |
157 | * @return Token |
158 | */ |
159 | public static function newIndentPreWS(): Token { |
160 | return new SelfclosingTagTk( 'meta', [ new KV( 'typeof', 'mw:IndentPreWS' ) ] ); |
161 | } |
162 | |
163 | /** |
164 | * Does this token or node represent an indent-pre whitespace character? |
165 | * @param Token|Node|string $tokenOrNode |
166 | * @return bool |
167 | */ |
168 | public static function isIndentPreWS( $tokenOrNode ): bool { |
169 | if ( $tokenOrNode instanceof Token ) { |
170 | return TokenUtils::hasTypeOf( $tokenOrNode, 'mw:IndentPreWS' ); |
171 | } elseif ( $tokenOrNode instanceof Node ) { |
172 | return DOMUtils::hasTypeOf( $tokenOrNode, 'mw:IndentPreWS' ); |
173 | } else { |
174 | return false; |
175 | } |
176 | } |
177 | |
178 | /** |
179 | * @param TokenTransformManager $manager manager enviroment |
180 | * @param array $options various configuration options |
181 | */ |
182 | public function __construct( TokenTransformManager $manager, array $options ) { |
183 | parent::__construct( $manager, $options ); |
184 | if ( !empty( $this->options['inlineContext'] ) ) { |
185 | $this->disabled = true; |
186 | } else { |
187 | $this->disabled = false; |
188 | $this->resetState( [] ); |
189 | } |
190 | } |
191 | |
192 | public function resetState( array $opts ): void { |
193 | $this->reset(); |
194 | } |
195 | |
196 | /** |
197 | * Resets the FSM state with optional any handler enabled |
198 | */ |
199 | private function reset(): void { |
200 | $this->state = self::STATE_SOL; |
201 | // Initialize to zero to deal with indent-pre |
202 | // on the very first line where there is no |
203 | // preceding newline to initialize this. |
204 | $this->preTSR = 0; |
205 | $this->tokens = []; |
206 | $this->currLinePreToks = []; |
207 | $this->wsTkIndex = -1; |
208 | $this->onAnyEnabled = true; |
209 | } |
210 | |
211 | /** |
212 | * Switches the FSM to STATE_IGNORE |
213 | */ |
214 | private function moveToIgnoreState(): void { |
215 | $this->onAnyEnabled = false; |
216 | $this->state = self::STATE_IGNORE; |
217 | } |
218 | |
219 | /** |
220 | * Wrap buffered tokens with <pre>..</pre> |
221 | * |
222 | * @return array |
223 | */ |
224 | private function genPre(): array { |
225 | $ret = []; |
226 | |
227 | // pre only if we have tokens to enclose |
228 | $n = $i = count( $this->tokens ); |
229 | if ( $n > 0 ) { |
230 | $env = $this->env; |
231 | |
232 | // Don't wrap sol-transparent toks. |
233 | // Find index for last token to wrap. |
234 | $i--; |
235 | while ( $i > 0 ) { |
236 | $t = $this->tokens[$i]; |
237 | if ( !( $t instanceof NlTk ) && !TokenUtils::isSolTransparent( $env, $t ) ) { |
238 | break; |
239 | } |
240 | if ( $t instanceof Token && TokenUtils::matchTypeOf( $t, '#^mw:Transclusion/End#' ) ) { |
241 | break; |
242 | } |
243 | $i--; |
244 | } |
245 | |
246 | // Add pre wrapper around the selected tokens |
247 | $da = null; |
248 | if ( $this->preTSR !== -1 ) { |
249 | $da = new DataParsoid; |
250 | $da->tsr = new SourceRange( $this->preTSR, $this->preTSR ); |
251 | } |
252 | $ret = [ new TagTk( 'pre', [], $da ) ]; |
253 | for ( $j = 0; $j < $i + 1; $j++ ) { |
254 | $ret[] = $this->tokens[$j]; |
255 | } |
256 | $ret[] = new EndTagTk( 'pre' ); |
257 | for ( $j = $i + 1; $j < $n; $j++ ) { |
258 | $t = $this->tokens[$j]; |
259 | if ( self::isIndentPreWS( $t ) ) { |
260 | $t = ' '; |
261 | } |
262 | $ret[] = $t; |
263 | } |
264 | $this->tokens = []; |
265 | } |
266 | return $ret; |
267 | } |
268 | |
269 | /** |
270 | * @param Token|string|null $token |
271 | * @param bool $metaToWS |
272 | * - if true, convert the IndentPreWS meta token to ' '. |
273 | * - if false, leave the meta token as is (it will later be stripped |
274 | * by CleanUp::stripMarkerMetas() and the DSR updated) |
275 | */ |
276 | private function processCurrLine( $token = null, bool $metaToWS = false ): void { |
277 | if ( count( $this->currLinePreToks ) > 0 ) { |
278 | if ( $metaToWS && $this->wsTkIndex !== -1 ) { |
279 | $this->currLinePreToks[$this->wsTkIndex] = ' '; // replace meta token with ' ' |
280 | } |
281 | PHPUtils::pushArray( $this->tokens, $this->currLinePreToks ); |
282 | $this->currLinePreToks = []; |
283 | $this->wsTkIndex = -1; |
284 | } |
285 | if ( $token !== null ) { |
286 | $this->tokens[] = $token; |
287 | } |
288 | } |
289 | |
290 | /** |
291 | * Get results and cleanup state |
292 | * |
293 | * @param Token|string $token |
294 | * @return array |
295 | */ |
296 | private function purgeBuffers( $token ): array { |
297 | $this->processCurrLine( $token, true ); |
298 | $ret = $this->tokens; |
299 | $this->tokens = []; |
300 | |
301 | return $ret; |
302 | } |
303 | |
304 | /** |
305 | * Discard pre on this line. Generate pre formatting for previous lines, if any. |
306 | * |
307 | * @param Token|string $token |
308 | * @return array |
309 | */ |
310 | private function discardCurrLinePre( $token ): array { |
311 | $ret = $this->genPre(); |
312 | PHPUtils::pushArray( $ret, $this->purgeBuffers( $token ) ); |
313 | return $ret; |
314 | } |
315 | |
316 | /** |
317 | * Initialize a pre TSR |
318 | * |
319 | * @param NlTk $nltk |
320 | * @return int |
321 | */ |
322 | private function initPreTSR( NlTk $nltk ): int { |
323 | $da = $nltk->dataParsoid; |
324 | // tsr->end can never be zero, so safe to use tsr->end to check for null/undefined |
325 | return $da->tsr->end ?? -1; |
326 | } |
327 | |
328 | /** |
329 | * @inheritDoc |
330 | */ |
331 | public function onNewline( NlTk $token ): ?TokenHandlerResult { |
332 | $env = $this->env; |
333 | |
334 | $env->log( 'trace/pre', $this->pipelineId, 'NL |', |
335 | $this->state, ':', |
336 | self::stateStr()[$this->state], '|', |
337 | static function () use ( $token ) { |
338 | return PHPUtils::jsonEncode( $token ); |
339 | } |
340 | ); |
341 | |
342 | // Whenever we move into SOL-state, init preTSR to |
343 | // the newline's tsr->end. This will later be used |
344 | // to assign 'tsr' values to the <pre> token. |
345 | |
346 | switch ( $this->state ) { |
347 | case self::STATE_SOL: |
348 | case self::STATE_PRE: |
349 | $ret = $this->purgeBuffers( $token ); |
350 | $this->preTSR = self::initPreTSR( $token ); |
351 | $this->state = self::STATE_SOL; |
352 | break; |
353 | |
354 | case self::STATE_MULTILINE_PRE: |
355 | case self::STATE_PRE_COLLECT: |
356 | $this->processCurrLine( $token ); |
357 | $ret = []; |
358 | $this->state = self::STATE_SOL_AFTER_PRE; |
359 | break; |
360 | |
361 | case self::STATE_SOL_AFTER_PRE: |
362 | $ret = $this->discardCurrLinePre( $token ); |
363 | $this->state = self::STATE_SOL; |
364 | $this->preTSR = self::initPreTSR( $token ); |
365 | break; |
366 | |
367 | case self::STATE_IGNORE: |
368 | $ret = null; // Signals unmodified token |
369 | $this->reset(); |
370 | $this->preTSR = self::initPreTSR( $token ); |
371 | break; |
372 | |
373 | default: |
374 | // probably unreachable but makes phan happy |
375 | $ret = []; |
376 | } |
377 | |
378 | $env->log( 'debug/pre', $this->pipelineId, 'saved :', $this->tokens ); |
379 | $env->log( 'debug/pre', $this->pipelineId, '----> ', |
380 | static function () use ( $ret ) { |
381 | return PHPUtils::jsonEncode( $ret ); |
382 | } |
383 | ); |
384 | |
385 | return new TokenHandlerResult( $ret, true ); |
386 | } |
387 | |
388 | /** |
389 | * @inheritDoc |
390 | */ |
391 | public function onEnd( EOFTk $token ): ?TokenHandlerResult { |
392 | $this->env->log( 'trace/pre', $this->pipelineId, 'eof |', |
393 | $this->state, ':', |
394 | self::stateStr()[$this->state], '|', |
395 | static function () use ( $token ) { |
396 | return PHPUtils::jsonEncode( $token ); |
397 | } |
398 | ); |
399 | |
400 | switch ( $this->state ) { |
401 | case self::STATE_SOL: |
402 | case self::STATE_PRE: |
403 | $ret = $this->purgeBuffers( $token ); |
404 | break; |
405 | |
406 | case self::STATE_SOL_AFTER_PRE: |
407 | case self::STATE_MULTILINE_PRE: |
408 | $ret = $this->discardCurrLinePre( $token ); |
409 | break; |
410 | |
411 | case self::STATE_PRE_COLLECT: |
412 | $this->processCurrLine(); |
413 | $ret = $this->genPre(); |
414 | $ret[] = $token; |
415 | break; |
416 | |
417 | case self::STATE_IGNORE: |
418 | $ret = null; |
419 | break; |
420 | |
421 | default: |
422 | // Probably unreachable but makes phan happy |
423 | $ret = []; |
424 | } |
425 | |
426 | $this->env->log( 'debug/pre', $this->pipelineId, 'saved :', $this->tokens ); |
427 | $this->env->log( 'debug/pre', $this->pipelineId, '----> ', |
428 | static function () use ( $ret ){ |
429 | return PHPUtils::jsonEncode( $ret ); |
430 | } |
431 | ); |
432 | |
433 | return new TokenHandlerResult( $ret, true ); |
434 | } |
435 | |
436 | /** |
437 | * Get updated pre TSR value |
438 | * |
439 | * @param int $tsr |
440 | * @param Token|string $token |
441 | * @return int |
442 | */ |
443 | private function getUpdatedPreTSR( int $tsr, $token ): int { |
444 | if ( $token instanceof CommentTk ) { |
445 | $tsr = isset( $token->dataParsoid->tsr ) ? $token->dataParsoid->tsr->end : |
446 | ( ( $tsr === -1 ) ? -1 : WTUtils::decodedCommentLength( $token ) + $tsr ); |
447 | } elseif ( $token instanceof SelfclosingTagTk ) { |
448 | // meta-tag (cannot compute) |
449 | $tsr = -1; |
450 | } elseif ( $tsr !== -1 ) { |
451 | // string |
452 | $tsr += strlen( $token ); |
453 | } |
454 | return $tsr; |
455 | } |
456 | |
457 | /** |
458 | * @inheritDoc |
459 | */ |
460 | public function onAny( $token ): ?TokenHandlerResult { |
461 | $env = $this->env; |
462 | |
463 | $env->log( 'trace/pre', $this->pipelineId, 'any |', |
464 | $this->state, ':', |
465 | self::stateStr()[$this->state], '|', |
466 | static function () use ( $token ) { |
467 | return PHPUtils::jsonEncode( $token ); |
468 | } |
469 | ); |
470 | |
471 | if ( $this->state === self::STATE_IGNORE ) { |
472 | $env->log( 'error', static function () use ( $token ) { |
473 | return '!ERROR! IGNORE! Cannot get here: ' . PHPUtils::jsonEncode( $token ); |
474 | } ); |
475 | return null; |
476 | } |
477 | |
478 | $ret = []; |
479 | switch ( $this->state ) { |
480 | case self::STATE_SOL: |
481 | if ( is_string( $token ) && ( $token[0] ?? '' ) === ' ' ) { |
482 | $ret = $this->tokens; |
483 | $this->tokens = []; |
484 | $this->wsTkIndex = 0; |
485 | $this->currLinePreToks = [ self::newIndentPreWS() ]; |
486 | $this->state = self::STATE_PRE; |
487 | if ( strlen( $token ) > 1 ) { |
488 | // Treat everything after the first space as a new token |
489 | // (`substr` not `mb_substr` since we know space is ASCII) |
490 | // This is inlined handling of 'case self::PRE' |
491 | // scenario for a string. |
492 | $token = substr( $token, 1 ); |
493 | $this->currLinePreToks[] = $token; |
494 | if ( !TokenUtils::isSolTransparent( $this->env, $token ) ) { |
495 | $this->state = self::STATE_PRE_COLLECT; |
496 | } |
497 | } |
498 | } elseif ( TokenUtils::isSolTransparent( $env, $token ) ) { |
499 | // continue watching ... |
500 | // update pre-tsr since we haven't transitioned to PRE yet |
501 | $this->preTSR = $this->getUpdatedPreTSR( $this->preTSR, $token ); |
502 | $this->tokens[] = $token; |
503 | } else { |
504 | $ret = $this->purgeBuffers( $token ); |
505 | $this->moveToIgnoreState(); |
506 | } |
507 | break; |
508 | |
509 | case self::STATE_PRE: |
510 | case self::STATE_PRE_COLLECT: |
511 | case self::STATE_MULTILINE_PRE: |
512 | if ( !is_string( $token ) && TokenUtils::isWikitextBlockTag( $token->getName() ) ) { |
513 | $ret = $this->state === self::STATE_PRE ? |
514 | $this->purgeBuffers( $token ) : $this->discardCurrLinePre( $token ); |
515 | $this->moveToIgnoreState(); |
516 | } else { |
517 | $this->currLinePreToks[] = $token; |
518 | if ( !TokenUtils::isSolTransparent( $this->env, $token ) ) { |
519 | $this->state = self::STATE_PRE_COLLECT; |
520 | } |
521 | } |
522 | break; |
523 | |
524 | case self::STATE_SOL_AFTER_PRE: |
525 | if ( is_string( $token ) && ( $token[0] ?? '' ) === ' ' ) { |
526 | $this->wsTkIndex = count( $this->currLinePreToks ); |
527 | $this->currLinePreToks[] = self::newIndentPreWS(); |
528 | $this->state = self::STATE_MULTILINE_PRE; |
529 | if ( strlen( $token ) > 1 ) { |
530 | // Treat everything after the first space as a new token |
531 | // (`substr` not `mb_substr` since we know space is ASCII) |
532 | // This is inlined handling of 'case self::MULTILINE_PRE' |
533 | // scenario for a string. |
534 | $token = substr( $token, 1 ); |
535 | $this->currLinePreToks[] = $token; |
536 | if ( !TokenUtils::isSolTransparent( $this->env, $token ) ) { |
537 | $this->state = self::STATE_PRE_COLLECT; |
538 | } |
539 | } |
540 | } elseif ( TokenUtils::isSolTransparent( $env, $token ) ) { // continue watching |
541 | $this->currLinePreToks[] = $token; |
542 | } else { |
543 | $ret = $this->discardCurrLinePre( $token ); |
544 | $this->moveToIgnoreState(); |
545 | } |
546 | break; |
547 | } |
548 | |
549 | $env->log( 'debug/pre', $this->pipelineId, 'saved :', $this->tokens ); |
550 | $env->log( 'debug/pre', $this->pipelineId, '----> ', |
551 | static function () use ( $ret ) { |
552 | return PHPUtils::jsonEncode( $ret ); |
553 | } |
554 | ); |
555 | |
556 | return new TokenHandlerResult( $ret ); |
557 | } |
558 | } |