Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
| Total | |
0.00% |
0 / 182 |
|
0.00% |
0 / 15 |
CRAP | |
0.00% |
0 / 1 |
| PreHandler | |
0.00% |
0 / 182 |
|
0.00% |
0 / 15 |
5700 | |
0.00% |
0 / 1 |
| newIndentPreWS | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
| isIndentPreWS | |
0.00% |
0 / 5 |
|
0.00% |
0 / 1 |
12 | |||
| __construct | |
0.00% |
0 / 5 |
|
0.00% |
0 / 1 |
6 | |||
| resetState | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
| reset | |
0.00% |
0 / 6 |
|
0.00% |
0 / 1 |
2 | |||
| moveToIgnoreState | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
2 | |||
| genPre | |
0.00% |
0 / 32 |
|
0.00% |
0 / 1 |
182 | |||
| processCurrLine | |
0.00% |
0 / 8 |
|
0.00% |
0 / 1 |
30 | |||
| purgeBuffers | |
0.00% |
0 / 4 |
|
0.00% |
0 / 1 |
2 | |||
| discardCurrLinePre | |
0.00% |
0 / 3 |
|
0.00% |
0 / 1 |
2 | |||
| initPreTSR | |
0.00% |
0 / 4 |
|
0.00% |
0 / 1 |
6 | |||
| onNewline | |
0.00% |
0 / 25 |
|
0.00% |
0 / 1 |
72 | |||
| onEnd | |
0.00% |
0 / 18 |
|
0.00% |
0 / 1 |
72 | |||
| getUpdatedPreTSR | |
0.00% |
0 / 14 |
|
0.00% |
0 / 1 |
56 | |||
| onAny | |
0.00% |
0 / 54 |
|
0.00% |
0 / 1 |
462 | |||
| 1 | <?php |
| 2 | declare( strict_types = 1 ); |
| 3 | |
| 4 | namespace Wikimedia\Parsoid\Wt2Html\TT; |
| 5 | |
| 6 | use Wikimedia\Parsoid\Core\SourceRange; |
| 7 | use Wikimedia\Parsoid\DOM\Node; |
| 8 | use Wikimedia\Parsoid\NodeData\DataParsoid; |
| 9 | use Wikimedia\Parsoid\Tokens\CommentTk; |
| 10 | use Wikimedia\Parsoid\Tokens\EmptyLineTk; |
| 11 | use Wikimedia\Parsoid\Tokens\EndTagTk; |
| 12 | use Wikimedia\Parsoid\Tokens\EOFTk; |
| 13 | use Wikimedia\Parsoid\Tokens\IndentPreTk; |
| 14 | use Wikimedia\Parsoid\Tokens\KV; |
| 15 | use Wikimedia\Parsoid\Tokens\NlTk; |
| 16 | use Wikimedia\Parsoid\Tokens\SelfclosingTagTk; |
| 17 | use Wikimedia\Parsoid\Tokens\TagTk; |
| 18 | use Wikimedia\Parsoid\Tokens\Token; |
| 19 | use Wikimedia\Parsoid\Tokens\XMLTagTk; |
| 20 | use Wikimedia\Parsoid\Utils\DOMUtils; |
| 21 | use Wikimedia\Parsoid\Utils\PHPUtils; |
| 22 | use Wikimedia\Parsoid\Utils\TokenUtils; |
| 23 | use Wikimedia\Parsoid\Utils\WTUtils; |
| 24 | use Wikimedia\Parsoid\Wt2Html\TokenHandlerPipeline; |
| 25 | |
| 26 | /** |
| 27 | * PRE-handling relies on the following 6-state FSM. |
| 28 | * |
| 29 | * States |
| 30 | * ------ |
| 31 | * ``` |
| 32 | * SOL -- start-of-line |
| 33 | * (white-space, comments, meta-tags are all SOL transparent) |
| 34 | * The FSM always starts in this state. |
| 35 | * PRE -- we might need a pre-block |
| 36 | * (if we enter the PRE_COLLECT state) |
| 37 | * PRE_COLLECT -- we will need to generate a pre-block and are collecting |
| 38 | * content for it. |
| 39 | * SOL_AFTER_PRE -- we might need to extend the pre-block to multiple lines. |
| 40 | * (depending on whether we see a white-space tok or not) |
| 41 | * MULTILINE_PRE -- We will wrap one or more previous lines with <pre> |
| 42 | * This line could be part of that pre if we enter PRE_COLLECT state |
| 43 | * IGNORE -- nothing to do for the rest of the line. |
| 44 | * ``` |
| 45 | * |
| 46 | * Action helpers |
| 47 | * -------------- |
| 48 | * |
| 49 | * genPre : return merge("<pre>$TOKS</pre>" while skipping sol-tr toks, sol-tr toks) |
| 50 | * processCurrLine : $TOKS += $PRE_TOKS; $PRE_TOKS = []; |
| 51 | * purgeBuffers : convert meta token to ' '; processCurrLine; RET = $TOKS; $TOKS = []; return RET |
| 52 | * discardCurrLinePre : return merge(genPre, purgeBuffers) |
| 53 | * |
| 54 | * Transitions |
| 55 | * ----------- |
| 56 | * |
| 57 | * ``` |
| 58 | * + --------------+-----------------+---------------+-------------------------+ |
| 59 | * | Start state | Token | End state | Action | |
| 60 | * + --------------+-----------------+---------------+-------------------------+ |
| 61 | * | SOL | --- nl --> | SOL | purgeBuffers | |
| 62 | * | SOL | --- eof --> | --- | purgeBuffers | |
| 63 | * | SOL | --- sol-tr --> | SOL | TOKS << tok | |
| 64 | * | SOL | --- ws --> | PRE | PRE_TOKS = [ wsTok(#) ] | |
| 65 | * | SOL | --- other --> | IGNORE | purgeBuffers | |
| 66 | * + --------------+-----------------+---------------+-------------------------+ |
| 67 | * | PRE | --- nl --> | SOL | purgeBuffers | |
| 68 | * | PRE | --- eof --> | --- | purgeBuffers | |
| 69 | * | PRE | --- sol-tr --> | PRE | PRE_TOKS << tok | |
| 70 | * | PRE | --- blk tag --> | IGNORE | purgeBuffers | |
| 71 | * | PRE | --- other --> | PRE_COLLECT | PRE_TOKS << tok | |
| 72 | * + --------------+-----------------+---------------+-------------------------+ |
| 73 | * | PRE_COLLECT | --- nl --> | SOL_AFTER_PRE | processCurrLine | |
| 74 | * | PRE_COLLECT | --- eof --> | --- | processCurrLine; genPre | |
| 75 | * | PRE_COLLECT | --- blk tag --> | IGNORE | discardCurrLinePre | |
| 76 | * | PRE_COLLECT | --- other --> | PRE_COLLECT | PRE_TOKS << tok | |
| 77 | * + --------------+-----------------+---------------+-------------------------+ |
| 78 | * | SOL_AFTER_PRE | --- nl --> | SOL | discardCurrLinePre | |
| 79 | * | SOL_AFTER_PRE | --- eof --> | --- | discardCurrLinePre | |
| 80 | * | SOL_AFTER_PRE | --- sol-tr --> | SOL_AFTER_PRE | PRE_TOKS << tok | |
| 81 | * | SOL_AFTER_PRE | --- ws --> | MULTILINE_PRE | PRE_TOKS << wsTok(#) | |
| 82 | * | SOL_AFTER_PRE | --- other --> | IGNORE | discardCurrLinePre | |
| 83 | * + --------------+-----------------+---------------+-------------------------+ |
| 84 | * | MULTILINE_PRE | --- nl --> | SOL_AFTER_PRE | processCurrLine | |
| 85 | * | MULTILINE_PRE | --- eof --> | --- | discardCurrLinePre | |
| 86 | * | MULTILINE_PRE | --- sol-tr --> | SOL_AFTER_PRE | PRE_TOKS << tok | |
| 87 | * | MULTILINE_PRE | --- blk tag --> | IGNORE | discardCurrLinePre | |
| 88 | * | MULTILINE_PRE | --- other --> | PRE_COLLECT | PRE_TOKS << tok | |
| 89 | * + --------------+-----------------+---------------+-------------------------+ |
| 90 | * | IGNORE | --- eof --> | --- | purgeBuffers | |
| 91 | * | IGNORE | --- nl --> | SOL | purgeBuffers | |
| 92 | * + --------------+-----------------+---------------+-------------------------+ |
| 93 | * |
| 94 | * # In these states, we assume that the whitespace char is split off from the |
| 95 | * the rest of the string. |
| 96 | * ``` |
| 97 | */ |
| 98 | class PreHandler extends LineBasedHandler { |
| 99 | // FSM states |
| 100 | private const STATE_SOL = 1; |
| 101 | private const STATE_PRE = 2; |
| 102 | private const STATE_PRE_COLLECT = 3; |
| 103 | private const STATE_SOL_AFTER_PRE = 4; |
| 104 | private const STATE_MULTILINE_PRE = 5; |
| 105 | private const STATE_IGNORE = 6; |
| 106 | |
| 107 | /** @var int */ |
| 108 | private $state; |
| 109 | private ?SourceRange $preTSR; |
| 110 | /** @var array<Token|string> */ |
| 111 | private $tokens; |
| 112 | /** @var array<Token|string> */ |
| 113 | private $currLinePreToks; |
| 114 | /** @var int index of the whitespace token in $currLinePreToks */ |
| 115 | private $wsTkIndex; |
| 116 | |
| 117 | /** |
| 118 | * debug string output of FSM states |
| 119 | */ |
| 120 | private const STATE_STR = [ |
| 121 | 1 => 'sol ', |
| 122 | 2 => 'pre ', |
| 123 | 3 => 'pre_collect ', |
| 124 | 4 => 'sol_after_pre', |
| 125 | 5 => 'multiline_pre', |
| 126 | 6 => 'ignore ' |
| 127 | ]; |
| 128 | |
| 129 | /** |
| 130 | * Create a token to represent the indent-pre whitespace character. |
| 131 | * |
| 132 | * Notes about choice of token representation |
| 133 | * ------------------------------------------- |
| 134 | * This token will not make it to the final output and is only present to ensure |
| 135 | * DSR computation can account for this whitespace character. This meta tag will |
| 136 | * be removed in CleanUp::stripMarkerMetas(). |
| 137 | * |
| 138 | * Given that this token is purely an internal bookkeeping placeholder, |
| 139 | * it really does not matter how we represent it as long as |
| 140 | * (a) it doesn't impede code comprehension |
| 141 | * (b) it is more or less consistent with how other instances of this token behave |
| 142 | * (c) it doesn't introduce a lot of special-case handling and checks to deal with it. |
| 143 | * |
| 144 | * Based on that consideration, we settle for a meta tag because meta tags are transparent |
| 145 | * to most token and DOM handlers. |
| 146 | * |
| 147 | * Notes about DSR computation |
| 148 | * --------------------------- |
| 149 | * Once we are done with all DOM processing, we expect indent-pre <pre> tags to have |
| 150 | * DSR that looks like [ _, _, 1, 0 ], i.e. it has an opening tag width of 1 char and |
| 151 | * closing tag width of 0 char. But, since we are now explicitly representing the ws char |
| 152 | * as a meta-tag, we <pre> tag will not get a 1-char width during DSR computation since |
| 153 | * this meta-tag will consume that width. Accordingly, once we strip this meta-tag in the |
| 154 | * cleanup pass, we will reassign its width to the opening tag width of the <pre> tag. |
| 155 | */ |
| 156 | public static function newIndentPreWS(): Token { |
| 157 | return new SelfclosingTagTk( 'meta', [ new KV( 'typeof', 'mw:IndentPreWS' ) ] ); |
| 158 | } |
| 159 | |
| 160 | /** |
| 161 | * Does this token or node represent an indent-pre whitespace character? |
| 162 | * @param Token|Node|string $tokenOrNode |
| 163 | * @return bool |
| 164 | */ |
| 165 | public static function isIndentPreWS( $tokenOrNode ): bool { |
| 166 | if ( $tokenOrNode instanceof Token ) { |
| 167 | return TokenUtils::hasTypeOf( $tokenOrNode, 'mw:IndentPreWS' ); |
| 168 | } elseif ( $tokenOrNode instanceof Node ) { |
| 169 | return DOMUtils::hasTypeOf( $tokenOrNode, 'mw:IndentPreWS' ); |
| 170 | } else { |
| 171 | return false; |
| 172 | } |
| 173 | } |
| 174 | |
| 175 | /** |
| 176 | * @param TokenHandlerPipeline $manager manager enviroment |
| 177 | * @param array $options various configuration options |
| 178 | */ |
| 179 | public function __construct( TokenHandlerPipeline $manager, array $options ) { |
| 180 | parent::__construct( $manager, $options ); |
| 181 | if ( !empty( $this->options['inlineContext'] ) ) { |
| 182 | $this->disabled = true; |
| 183 | } else { |
| 184 | $this->disabled = false; |
| 185 | $this->resetState( [] ); |
| 186 | } |
| 187 | } |
| 188 | |
| 189 | public function resetState( array $options ): void { |
| 190 | $this->reset(); |
| 191 | } |
| 192 | |
| 193 | /** |
| 194 | * Resets the FSM state with optional any handler enabled |
| 195 | */ |
| 196 | private function reset(): void { |
| 197 | $this->state = self::STATE_SOL; |
| 198 | // Initialize to zero to deal with indent-pre |
| 199 | // on the very first line where there is no |
| 200 | // preceding newline to initialize this. |
| 201 | // XXX: T405759 should initialize source better |
| 202 | $this->preTSR = new SourceRange( 0, 0, null ); |
| 203 | $this->tokens = []; |
| 204 | $this->currLinePreToks = []; |
| 205 | $this->wsTkIndex = -1; |
| 206 | $this->onAnyEnabled = true; |
| 207 | } |
| 208 | |
| 209 | /** |
| 210 | * Switches the FSM to STATE_IGNORE |
| 211 | */ |
| 212 | private function moveToIgnoreState(): void { |
| 213 | $this->onAnyEnabled = false; |
| 214 | $this->state = self::STATE_IGNORE; |
| 215 | } |
| 216 | |
| 217 | /** |
| 218 | * Wrap buffered tokens with <pre>..</pre> |
| 219 | * |
| 220 | * @return list<string|Token> |
| 221 | */ |
| 222 | private function genPre(): array { |
| 223 | $ret = []; |
| 224 | |
| 225 | // pre only if we have tokens to enclose |
| 226 | $n = $i = count( $this->tokens ); |
| 227 | if ( $n > 0 ) { |
| 228 | $env = $this->env; |
| 229 | |
| 230 | // Don't wrap sol-transparent toks. |
| 231 | // Find index for last token to wrap. |
| 232 | $i--; |
| 233 | while ( $i > 0 ) { |
| 234 | $t = $this->tokens[$i]; |
| 235 | if ( !( $t instanceof NlTk ) && !TokenUtils::isSolTransparent( $env, $t ) ) { |
| 236 | break; |
| 237 | } |
| 238 | if ( $t instanceof Token && TokenUtils::matchTypeOf( $t, '#^mw:Transclusion/End#' ) ) { |
| 239 | break; |
| 240 | } |
| 241 | $i--; |
| 242 | } |
| 243 | |
| 244 | // Add pre wrapper around the selected tokens |
| 245 | // and embed them in a compound IndentPre token |
| 246 | $da = null; |
| 247 | if ( $this->preTSR !== null ) { |
| 248 | $da = new DataParsoid; |
| 249 | $da->tsr = clone $this->preTSR; |
| 250 | } |
| 251 | $indentPreTk = new IndentPreTk; |
| 252 | $indentPreTk->addToken( new TagTk( 'pre', [], $da ) ); |
| 253 | for ( $j = 0; $j < $i + 1; $j++ ) { |
| 254 | $t = $this->tokens[$j]; |
| 255 | // The ListHandler will ignore IndentPreTk tokens but |
| 256 | // we might have tokenized a listItem on this line from |
| 257 | // a template so turn it back to text |
| 258 | if ( $t instanceof XMLTagTk && $t->getName() === 'listItem' ) { |
| 259 | $t = $t->getAttributeKV( 'bullets' )->srcOffsets->value->substr(); |
| 260 | } |
| 261 | $indentPreTk->addToken( $t ); |
| 262 | } |
| 263 | $indentPreTk->addToken( new EndTagTk( 'pre' ) ); |
| 264 | |
| 265 | $ret = [ $indentPreTk ]; |
| 266 | for ( $j = $i + 1; $j < $n; $j++ ) { |
| 267 | $t = $this->tokens[$j]; |
| 268 | if ( self::isIndentPreWS( $t ) ) { |
| 269 | $t = ' '; |
| 270 | } |
| 271 | $ret[] = $t; |
| 272 | } |
| 273 | $this->tokens = []; |
| 274 | } |
| 275 | return $ret; |
| 276 | } |
| 277 | |
| 278 | /** |
| 279 | * @param Token|string|null $token |
| 280 | * @param bool $metaToWS |
| 281 | * - if true, convert the IndentPreWS meta token to ' '. |
| 282 | * - if false, leave the meta token as is (it will later be stripped |
| 283 | * by CleanUp::stripMarkerMetas() and the DSR updated) |
| 284 | */ |
| 285 | private function processCurrLine( $token = null, bool $metaToWS = false ): void { |
| 286 | if ( count( $this->currLinePreToks ) > 0 ) { |
| 287 | if ( $metaToWS && $this->wsTkIndex !== -1 ) { |
| 288 | $this->currLinePreToks[$this->wsTkIndex] = ' '; // replace meta token with ' ' |
| 289 | } |
| 290 | PHPUtils::pushArray( $this->tokens, $this->currLinePreToks ); |
| 291 | $this->currLinePreToks = []; |
| 292 | $this->wsTkIndex = -1; |
| 293 | } |
| 294 | if ( $token !== null ) { |
| 295 | $this->tokens[] = $token; |
| 296 | } |
| 297 | } |
| 298 | |
| 299 | /** |
| 300 | * Get results and cleanup state |
| 301 | * |
| 302 | * @param Token|string $token |
| 303 | * @return array<string|Token> |
| 304 | */ |
| 305 | private function purgeBuffers( $token ): array { |
| 306 | $this->processCurrLine( $token, true ); |
| 307 | $ret = $this->tokens; |
| 308 | $this->tokens = []; |
| 309 | |
| 310 | return $ret; |
| 311 | } |
| 312 | |
| 313 | /** |
| 314 | * Discard pre on this line. Generate pre formatting for previous lines, if any. |
| 315 | * |
| 316 | * @param Token|string $token |
| 317 | * @return array<string|Token> |
| 318 | */ |
| 319 | private function discardCurrLinePre( $token ): array { |
| 320 | $ret = $this->genPre(); |
| 321 | PHPUtils::pushArray( $ret, $this->purgeBuffers( $token ) ); |
| 322 | return $ret; |
| 323 | } |
| 324 | |
| 325 | /** |
| 326 | * Initialize a pre TSR |
| 327 | */ |
| 328 | private function initPreTSR( NlTk $nltk ): ?SourceRange { |
| 329 | $da = $nltk->dataParsoid; |
| 330 | // tsr->end can never be zero, so safe to use tsr->end to check for null/undefined |
| 331 | return ( $da->tsr->end ?? null ) !== null ? |
| 332 | new SourceRange( $da->tsr->end, $da->tsr->end, $da->tsr->source ) : |
| 333 | null; |
| 334 | } |
| 335 | |
| 336 | /** |
| 337 | * @inheritDoc |
| 338 | */ |
| 339 | public function onNewline( NlTk $token ): ?array { |
| 340 | $env = $this->env; |
| 341 | |
| 342 | $env->trace( 'pre', $this->pipelineId, 'NL |', |
| 343 | self::STATE_STR[$this->state], '|', $token |
| 344 | ); |
| 345 | |
| 346 | // Whenever we move into SOL-state, init preTSR to |
| 347 | // the newline's tsr->end. This will later be used |
| 348 | // to assign 'tsr' values to the <pre> token. |
| 349 | |
| 350 | switch ( $this->state ) { |
| 351 | case self::STATE_SOL: |
| 352 | case self::STATE_PRE: |
| 353 | $ret = $this->purgeBuffers( $token ); |
| 354 | $this->preTSR = self::initPreTSR( $token ); |
| 355 | $this->state = self::STATE_SOL; |
| 356 | break; |
| 357 | |
| 358 | case self::STATE_MULTILINE_PRE: |
| 359 | case self::STATE_PRE_COLLECT: |
| 360 | $ret = []; |
| 361 | $this->processCurrLine( $token ); |
| 362 | $this->state = self::STATE_SOL_AFTER_PRE; |
| 363 | break; |
| 364 | |
| 365 | case self::STATE_SOL_AFTER_PRE: |
| 366 | $ret = $this->discardCurrLinePre( $token ); |
| 367 | $this->state = self::STATE_SOL; |
| 368 | $this->preTSR = self::initPreTSR( $token ); |
| 369 | break; |
| 370 | |
| 371 | case self::STATE_IGNORE: |
| 372 | // Returning null will invoke the onAny handler. |
| 373 | // Since we want to skip it, return [ $token ]. |
| 374 | $ret = [ $token ]; |
| 375 | $this->reset(); |
| 376 | $this->preTSR = self::initPreTSR( $token ); |
| 377 | break; |
| 378 | |
| 379 | default: |
| 380 | // probably unreachable but makes phan happy |
| 381 | $ret = []; |
| 382 | } |
| 383 | |
| 384 | $env->log( 'debug/pre', $this->pipelineId, 'saved :', $this->tokens ); |
| 385 | $env->log( 'debug/pre', $this->pipelineId, '----> ', $ret ); |
| 386 | |
| 387 | return $ret; |
| 388 | } |
| 389 | |
| 390 | /** |
| 391 | * @inheritDoc |
| 392 | */ |
| 393 | public function onEnd( EOFTk $token ): ?array { |
| 394 | $this->env->trace( 'pre', $this->pipelineId, 'eof |', |
| 395 | self::STATE_STR[$this->state], '|', $token |
| 396 | ); |
| 397 | |
| 398 | switch ( $this->state ) { |
| 399 | case self::STATE_SOL: |
| 400 | case self::STATE_PRE: |
| 401 | $ret = $this->purgeBuffers( $token ); |
| 402 | break; |
| 403 | |
| 404 | case self::STATE_SOL_AFTER_PRE: |
| 405 | case self::STATE_MULTILINE_PRE: |
| 406 | $ret = $this->discardCurrLinePre( $token ); |
| 407 | break; |
| 408 | |
| 409 | case self::STATE_PRE_COLLECT: |
| 410 | $this->processCurrLine(); |
| 411 | $ret = $this->genPre(); |
| 412 | $ret[] = $token; |
| 413 | break; |
| 414 | |
| 415 | case self::STATE_IGNORE: |
| 416 | // Returning null will invoke the onAny handler. |
| 417 | // Since we want to skip it, return [ $token ]. |
| 418 | $ret = [ $token ]; |
| 419 | break; |
| 420 | |
| 421 | default: |
| 422 | // Probably unreachable but makes phan happy |
| 423 | $ret = []; |
| 424 | } |
| 425 | |
| 426 | $this->env->log( 'debug/pre', $this->pipelineId, 'saved :', $this->tokens ); |
| 427 | $this->env->log( 'debug/pre', $this->pipelineId, '----> ', $ret ); |
| 428 | |
| 429 | return $ret; |
| 430 | } |
| 431 | |
| 432 | /** |
| 433 | * Get updated pre TSR value |
| 434 | * |
| 435 | * @param ?SourceRange $tsr |
| 436 | * @param Token|string $token |
| 437 | * @return ?SourceRange |
| 438 | */ |
| 439 | private function getUpdatedPreTSR( ?SourceRange $tsr, $token ): ?SourceRange { |
| 440 | if ( $token instanceof CommentTk ) { |
| 441 | if ( isset( $token->dataParsoid->tsr ) ) { |
| 442 | $tsr = new SourceRange( |
| 443 | $token->dataParsoid->tsr->end, |
| 444 | $token->dataParsoid->tsr->end, |
| 445 | $token->dataParsoid->tsr->source |
| 446 | ); |
| 447 | } elseif ( $tsr !== null ) { |
| 448 | $tsr = $tsr->offset( WTUtils::decodedCommentLength( $token ) ); |
| 449 | } |
| 450 | } elseif ( $token instanceof SelfclosingTagTk || $token instanceof EmptyLineTk ) { |
| 451 | // meta-tag (cannot compute) |
| 452 | $tsr = null; |
| 453 | } elseif ( $tsr !== null ) { |
| 454 | // string |
| 455 | $tsr = $tsr->offset( strlen( $token ) ); |
| 456 | } |
| 457 | return $tsr; |
| 458 | } |
| 459 | |
| 460 | /** |
| 461 | * @inheritDoc |
| 462 | */ |
| 463 | public function onAny( $token ): ?array { |
| 464 | $env = $this->env; |
| 465 | |
| 466 | $env->trace( 'pre', $this->pipelineId, 'any |', |
| 467 | self::STATE_STR[$this->state], '|', $token |
| 468 | ); |
| 469 | |
| 470 | if ( $this->state === self::STATE_IGNORE ) { |
| 471 | $env->log( 'error', |
| 472 | '!ERROR! IGNORE! Cannot get here: ' . PHPUtils::jsonEncode( $token ) |
| 473 | ); |
| 474 | return null; |
| 475 | } |
| 476 | |
| 477 | $ret = []; |
| 478 | switch ( $this->state ) { |
| 479 | case self::STATE_SOL: |
| 480 | if ( is_string( $token ) && ( $token[0] ?? '' ) === ' ' ) { |
| 481 | $ret = $this->tokens; |
| 482 | $this->tokens = []; |
| 483 | $this->wsTkIndex = 0; |
| 484 | $this->currLinePreToks = [ self::newIndentPreWS() ]; |
| 485 | $this->state = self::STATE_PRE; |
| 486 | if ( strlen( $token ) > 1 ) { |
| 487 | // Treat everything after the first space as a new token |
| 488 | // (`substr` not `mb_substr` since we know space is ASCII) |
| 489 | // This is inlined handling of 'case self::PRE' |
| 490 | // scenario for a string. |
| 491 | $token = substr( $token, 1 ); |
| 492 | $this->currLinePreToks[] = $token; |
| 493 | if ( !TokenUtils::isSolTransparent( $this->env, $token ) ) { |
| 494 | $this->state = self::STATE_PRE_COLLECT; |
| 495 | } |
| 496 | } |
| 497 | } elseif ( TokenUtils::isSolTransparent( $env, $token ) ) { |
| 498 | // continue watching ... |
| 499 | // update pre-tsr since we haven't transitioned to PRE yet |
| 500 | $this->preTSR = $this->getUpdatedPreTSR( $this->preTSR, $token ); |
| 501 | $this->tokens[] = $token; |
| 502 | } else { |
| 503 | $ret = $this->purgeBuffers( $token ); |
| 504 | $this->moveToIgnoreState(); |
| 505 | } |
| 506 | break; |
| 507 | |
| 508 | case self::STATE_PRE: |
| 509 | case self::STATE_PRE_COLLECT: |
| 510 | case self::STATE_MULTILINE_PRE: |
| 511 | if ( |
| 512 | $token instanceof XMLTagTk && |
| 513 | TokenUtils::isWikitextBlockTag( $token->getName() ) |
| 514 | ) { |
| 515 | $ret = $this->state === self::STATE_PRE ? |
| 516 | $this->purgeBuffers( $token ) : $this->discardCurrLinePre( $token ); |
| 517 | $this->moveToIgnoreState(); |
| 518 | } else { |
| 519 | $this->currLinePreToks[] = $token; |
| 520 | if ( !TokenUtils::isSolTransparent( $this->env, $token ) ) { |
| 521 | $this->state = self::STATE_PRE_COLLECT; |
| 522 | } |
| 523 | } |
| 524 | break; |
| 525 | |
| 526 | case self::STATE_SOL_AFTER_PRE: |
| 527 | if ( is_string( $token ) && ( $token[0] ?? '' ) === ' ' ) { |
| 528 | $this->wsTkIndex = count( $this->currLinePreToks ); |
| 529 | $this->currLinePreToks[] = self::newIndentPreWS(); |
| 530 | $this->state = self::STATE_MULTILINE_PRE; |
| 531 | if ( strlen( $token ) > 1 ) { |
| 532 | // Treat everything after the first space as a new token |
| 533 | // (`substr` not `mb_substr` since we know space is ASCII) |
| 534 | // This is inlined handling of 'case self::MULTILINE_PRE' |
| 535 | // scenario for a string. |
| 536 | $token = substr( $token, 1 ); |
| 537 | $this->currLinePreToks[] = $token; |
| 538 | if ( !TokenUtils::isSolTransparent( $this->env, $token ) ) { |
| 539 | $this->state = self::STATE_PRE_COLLECT; |
| 540 | } |
| 541 | } |
| 542 | } elseif ( TokenUtils::isSolTransparent( $env, $token ) ) { // continue watching |
| 543 | $this->currLinePreToks[] = $token; |
| 544 | } else { |
| 545 | $ret = $this->discardCurrLinePre( $token ); |
| 546 | $this->moveToIgnoreState(); |
| 547 | } |
| 548 | break; |
| 549 | } |
| 550 | |
| 551 | $env->log( 'debug/pre', $this->pipelineId, 'saved :', $this->tokens ); |
| 552 | $env->log( 'debug/pre', $this->pipelineId, '----> ', $ret ); |
| 553 | |
| 554 | return $ret; |
| 555 | } |
| 556 | } |