Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
0.00% |
0 / 10 |
|
0.00% |
0 / 8 |
CRAP | |
0.00% |
0 / 1 |
PipelineStage | |
0.00% |
0 / 10 |
|
0.00% |
0 / 8 |
72 | |
0.00% |
0 / 1 |
__construct | |
0.00% |
0 / 3 |
|
0.00% |
0 / 1 |
2 | |||
setPipelineId | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
getPipelineId | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
getEnv | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
addTransformer | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
resetState | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
setFrame | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
setSourceOffsets | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
process | n/a |
0 / 0 |
n/a |
0 / 0 |
0 | |||||
processChunkily | n/a |
0 / 0 |
n/a |
0 / 0 |
0 |
1 | <?php |
2 | declare( strict_types = 1 ); |
3 | |
4 | namespace Wikimedia\Parsoid\Wt2Html; |
5 | |
6 | use Generator; |
7 | use Wikimedia\Parsoid\Config\Env; |
8 | use Wikimedia\Parsoid\DOM\Document; |
9 | use Wikimedia\Parsoid\Tokens\SourceRange; |
10 | use Wikimedia\Parsoid\Wt2Html\TT\TokenHandler; |
11 | |
12 | /** |
13 | * This represents the abstract interface for a wt2html parsing pipeline stage |
14 | * Currently there are 4 known pipeline stages: |
15 | * - PEG Tokenizer |
16 | * - Token Transform Manager |
17 | * - HTML5 Tree Builder |
18 | * - DOM Post Processor |
19 | * |
20 | * The Token Transform Manager could eventually go away and be directly replaced by |
21 | * the very many token transformers that are represented by the abstract TokenHandler class. |
22 | */ |
23 | |
24 | abstract class PipelineStage { |
25 | /** |
26 | * Previous pipeline stage that generates input for this stage. |
27 | * Will be null for the first pipeline stage. |
28 | * @var ?PipelineStage |
29 | */ |
30 | protected $prevStage; |
31 | |
32 | /** |
33 | * This is primarily a debugging aid. |
34 | * @var int |
35 | */ |
36 | protected $pipelineId = -1; |
37 | |
38 | /** @var Env */ |
39 | protected $env = null; |
40 | |
41 | /** @var bool */ |
42 | protected $atTopLevel; |
43 | |
44 | /** @var Frame */ |
45 | protected $frame; |
46 | |
47 | /** |
48 | * @param Env $env |
49 | * @param ?PipelineStage $prevStage |
50 | */ |
51 | public function __construct( Env $env, ?PipelineStage $prevStage = null ) { |
52 | $this->env = $env; |
53 | $this->prevStage = $prevStage; |
54 | // Defaults to false and resetState initializes it |
55 | $this->atTopLevel = false; |
56 | } |
57 | |
58 | /** |
59 | * @param int $id |
60 | */ |
61 | public function setPipelineId( int $id ): void { |
62 | $this->pipelineId = $id; |
63 | } |
64 | |
65 | /** |
66 | * @return int |
67 | */ |
68 | public function getPipelineId(): int { |
69 | return $this->pipelineId; |
70 | } |
71 | |
72 | /** |
73 | * @return Env |
74 | */ |
75 | public function getEnv(): Env { |
76 | return $this->env; |
77 | } |
78 | |
79 | /** |
80 | * Register a token transformer |
81 | * @param TokenHandler $t |
82 | */ |
83 | public function addTransformer( TokenHandler $t ): void { |
84 | throw new \BadMethodCallException( "This pipeline stage doesn't accept token transformers." ); |
85 | } |
86 | |
87 | /** |
88 | * Resets any internal state for this pipeline stage. |
89 | * This is usually called so a cached pipeline can be reused. |
90 | * |
91 | * @param array $options |
92 | */ |
93 | public function resetState( array $options ): void { |
94 | /* Default implementation */ |
95 | $this->atTopLevel = $options['toplevel'] ?? false; |
96 | } |
97 | |
98 | /** |
99 | * Set frame on this pipeline stage |
100 | * @param Frame $frame Pipeline frame |
101 | */ |
102 | public function setFrame( Frame $frame ): void { |
103 | $this->frame = $frame; |
104 | } |
105 | |
106 | /** |
107 | * Set the source offsets for the content being processing by this pipeline |
108 | * This matters for when a substring of the top-level page is being processed |
109 | * in its own pipeline. This ensures that all source offsets assigned to tokens |
110 | * and DOM nodes in this stage are relative to the top-level page. |
111 | * |
112 | * @param SourceRange $so |
113 | */ |
114 | public function setSourceOffsets( SourceRange $so ): void { |
115 | /* Default implementation: Do nothing */ |
116 | } |
117 | |
118 | /** |
119 | * Process wikitext, an array of tokens, or a DOM document depending on |
120 | * what pipeline stage this is. This will be entirety of the input that |
121 | * will be processed by this pipeline stage and no further input or an EOF |
122 | * signal will follow. |
123 | * |
124 | * @param string|array|Document $input |
125 | * @param ?array $options |
126 | * - atTopLevel: (bool) Whether we are processing the top-level document |
127 | * - sol: (bool) Whether input should be processed in start-of-line context |
128 | * - chunky (bool) Whether we are processing the input chunkily. |
129 | * @return array|Document |
130 | */ |
131 | abstract public function process( $input, ?array $options = null ); |
132 | |
133 | /** |
134 | * Process wikitext, an array of tokens, or a DOM document depending on |
135 | * what pipeline stage this is. This method will either directly or indirectly |
136 | * implement a generator that parses the input in chunks and yields output |
137 | * in chunks as well. |
138 | * |
139 | * Implementations that don't consume tokens (ex: Tokenizer, DOMPostProcessor) |
140 | * will provide specialized implementations that handle their input type. |
141 | * |
142 | * @param string|array|Document $input |
143 | * @param ?array $options |
144 | * - atTopLevel: (bool) Whether we are processing the top-level document |
145 | * - sol: (bool) Whether input should be processed in start-of-line context |
146 | * @return Generator |
147 | */ |
148 | abstract public function processChunkily( |
149 | $input, ?array $options |
150 | ): Generator; |
151 | } |