Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
0.00% |
0 / 10 |
|
0.00% |
0 / 8 |
CRAP | |
0.00% |
0 / 1 |
PipelineStage | |
0.00% |
0 / 10 |
|
0.00% |
0 / 8 |
72 | |
0.00% |
0 / 1 |
__construct | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
2 | |||
setPipelineId | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
getPipelineId | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
getEnv | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
addTransformer | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
resetState | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
2 | |||
setFrame | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
setSourceOffsets | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
process | n/a |
0 / 0 |
n/a |
0 / 0 |
0 | |||||
processChunkily | n/a |
0 / 0 |
n/a |
0 / 0 |
0 |
1 | <?php |
2 | declare( strict_types = 1 ); |
3 | |
4 | namespace Wikimedia\Parsoid\Wt2Html; |
5 | |
6 | use Generator; |
7 | use Wikimedia\Parsoid\Config\Env; |
8 | use Wikimedia\Parsoid\DOM\Document; |
9 | use Wikimedia\Parsoid\Tokens\SourceRange; |
10 | use Wikimedia\Parsoid\Wt2Html\TT\TokenHandler; |
11 | |
12 | /** |
13 | * This represents the abstract interface for a wt2html parsing pipeline stage |
14 | * Currently there are 4 known pipeline stages: |
15 | * - PEG Tokenizer |
16 | * - Token Transform Manager |
17 | * - HTML5 Tree Builder |
18 | * - DOM Post Processor |
19 | * |
20 | * The Token Transform Manager could eventually go away and be directly replaced by |
21 | * the very many token transformers that are represented by the abstract TokenHandler class. |
22 | */ |
23 | |
24 | abstract class PipelineStage { |
25 | /** |
26 | * Previous pipeline stage that generates input for this stage. |
27 | * Will be null for the first pipeline stage. |
28 | * @var ?PipelineStage |
29 | */ |
30 | protected $prevStage; |
31 | |
32 | /** |
33 | * This is primarily a debugging aid. |
34 | * @var int |
35 | */ |
36 | protected $pipelineId = -1; |
37 | |
38 | /** @var Env */ |
39 | protected $env = null; |
40 | |
41 | /** Defaults to false and resetState initializes it */ |
42 | protected bool $atTopLevel = false; |
43 | |
44 | protected bool $toFragment = true; |
45 | |
46 | /** @var Frame */ |
47 | protected $frame; |
48 | |
49 | public function __construct( Env $env, ?PipelineStage $prevStage = null ) { |
50 | $this->env = $env; |
51 | $this->prevStage = $prevStage; |
52 | } |
53 | |
54 | public function setPipelineId( int $id ): void { |
55 | $this->pipelineId = $id; |
56 | } |
57 | |
58 | public function getPipelineId(): int { |
59 | return $this->pipelineId; |
60 | } |
61 | |
62 | public function getEnv(): Env { |
63 | return $this->env; |
64 | } |
65 | |
66 | /** |
67 | * Register a token transformer |
68 | * @param TokenHandler $t |
69 | */ |
70 | public function addTransformer( TokenHandler $t ): void { |
71 | throw new \BadMethodCallException( "This pipeline stage doesn't accept token transformers." ); |
72 | } |
73 | |
74 | /** |
75 | * Resets any internal state for this pipeline stage. |
76 | * This is usually called so a cached pipeline can be reused. |
77 | * |
78 | * @param array $options |
79 | */ |
80 | public function resetState( array $options ): void { |
81 | /* Default implementation */ |
82 | $this->atTopLevel = $options['toplevel'] ?? false; |
83 | $this->toFragment = $options['toFragment'] ?? true; |
84 | } |
85 | |
86 | /** |
87 | * Set frame on this pipeline stage |
88 | * @param Frame $frame Pipeline frame |
89 | */ |
90 | public function setFrame( Frame $frame ): void { |
91 | $this->frame = $frame; |
92 | } |
93 | |
94 | /** |
95 | * Set the source offsets for the content being processing by this pipeline |
96 | * This matters for when a substring of the top-level page is being processed |
97 | * in its own pipeline. This ensures that all source offsets assigned to tokens |
98 | * and DOM nodes in this stage are relative to the top-level page. |
99 | * |
100 | * @param SourceRange $so |
101 | */ |
102 | public function setSourceOffsets( SourceRange $so ): void { |
103 | /* Default implementation: Do nothing */ |
104 | } |
105 | |
106 | /** |
107 | * Process wikitext, an array of tokens, or a DOM document depending on |
108 | * what pipeline stage this is. This will be entirety of the input that |
109 | * will be processed by this pipeline stage and no further input or an EOF |
110 | * signal will follow. |
111 | * |
112 | * @param string|array|Document $input |
113 | * @param array{sol:bool} $options |
114 | * - atTopLevel: (bool) Whether we are processing the top-level document |
115 | * - sol: (bool) Whether input should be processed in start-of-line context |
116 | * - chunky (bool) Whether we are processing the input chunkily. |
117 | * @return array|Document |
118 | */ |
119 | abstract public function process( $input, array $options ); |
120 | |
121 | /** |
122 | * Process wikitext, an array of tokens, or a DOM document depending on |
123 | * what pipeline stage this is. This method will either directly or indirectly |
124 | * implement a generator that parses the input in chunks and yields output |
125 | * in chunks as well. |
126 | * |
127 | * Implementations that don't consume tokens (ex: Tokenizer, DOMPostProcessor) |
128 | * will provide specialized implementations that handle their input type. |
129 | * |
130 | * @param string|array|Document $input |
131 | * @param array{sol:bool} $options |
132 | * - atTopLevel: (bool) Whether we are processing the top-level document |
133 | * - sol: (bool) Whether input should be processed in start-of-line context |
134 | * @return Generator |
135 | */ |
136 | abstract public function processChunkily( |
137 | $input, array $options |
138 | ): Generator; |
139 | } |