Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
73.13% |
49 / 67 |
|
80.00% |
8 / 10 |
CRAP | |
0.00% |
0 / 1 |
ParserPipeline | |
73.13% |
49 / 67 |
|
80.00% |
8 / 10 |
39.11 | |
0.00% |
0 / 1 |
__construct | |
100.00% |
6 / 6 |
|
100.00% |
1 / 1 |
1 | |||
getCacheKey | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
applyToStage | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
2 | |||
setPipelineId | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
resetState | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
setSourceOffsets | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
setFrame | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
parse | |
50.00% |
9 / 18 |
|
0.00% |
0 / 1 |
13.12 | |||
parseChunkily | |
50.00% |
9 / 18 |
|
0.00% |
0 / 1 |
13.12 | |||
init | |
100.00% |
16 / 16 |
|
100.00% |
1 / 1 |
4 |
1 | <?php |
2 | declare( strict_types = 1 ); |
3 | |
4 | namespace Wikimedia\Parsoid\Wt2Html; |
5 | |
6 | use Wikimedia\Assert\Assert; |
7 | use Wikimedia\Parsoid\Config\Env; |
8 | use Wikimedia\Parsoid\DOM\Document; |
9 | use Wikimedia\Parsoid\Tokens\SourceRange; |
10 | use Wikimedia\Parsoid\Utils\PHPUtils; |
11 | |
12 | /** |
13 | * Wrap some stages into a pipeline. |
14 | */ |
15 | |
16 | class ParserPipeline { |
17 | /** @var int */ |
18 | private $id; |
19 | |
20 | /** @var string */ |
21 | private $outputType; |
22 | |
23 | /** @var string */ |
24 | private $pipelineType; |
25 | |
26 | /** @var array */ |
27 | private $stages; |
28 | |
29 | /** @var Env */ |
30 | private $env; |
31 | |
32 | /** @var string */ |
33 | private $cacheKey; |
34 | |
35 | /** @var Frame */ |
36 | private $frame; |
37 | |
38 | public function __construct( |
39 | string $type, string $outType, string $cacheKey, array $stages, Env $env |
40 | ) { |
41 | $this->id = -1; |
42 | $this->cacheKey = $cacheKey; |
43 | $this->pipelineType = $type; |
44 | $this->outputType = $outType; |
45 | $this->stages = $stages; |
46 | $this->env = $env; |
47 | } |
48 | |
49 | public function getCacheKey(): string { |
50 | return $this->cacheKey; |
51 | } |
52 | |
53 | /** |
54 | * Applies the function across all stages and transformers registered at |
55 | * each stage. |
56 | * |
57 | * @param string $fn |
58 | * @param mixed ...$args |
59 | */ |
60 | private function applyToStage( string $fn, ...$args ): void { |
61 | // Apply to each stage |
62 | foreach ( $this->stages as $stage ) { |
63 | $stage->$fn( ...$args ); |
64 | } |
65 | } |
66 | |
67 | /** |
68 | * This is useful for debugging. |
69 | * |
70 | * @param int $id |
71 | */ |
72 | public function setPipelineId( int $id ): void { |
73 | $this->id = $id; |
74 | $this->applyToStage( 'setPipelineId', $id ); |
75 | } |
76 | |
77 | /** |
78 | * Reset any local state in the pipeline stage |
79 | * @param array $opts |
80 | */ |
81 | public function resetState( array $opts = [] ): void { |
82 | $this->applyToStage( 'resetState', $opts ); |
83 | } |
84 | |
85 | /** |
86 | * Set source offsets for the source that this pipeline will process. |
87 | * |
88 | * This lets us use different pipelines to parse fragments of the same page |
89 | * Ex: extension content (found on the same page) is parsed with a different |
90 | * pipeline than the top-level page. |
91 | * |
92 | * Because of this, the source offsets are not [0, page.length) always |
93 | * and needs to be explicitly initialized |
94 | * |
95 | * @param SourceRange $so |
96 | */ |
97 | public function setSourceOffsets( SourceRange $so ): void { |
98 | $this->applyToStage( 'setSourceOffsets', $so ); |
99 | } |
100 | |
101 | /** |
102 | * Set frame on this pipeline stage (stages decide if they need it or not) |
103 | * @param Frame $frame frame |
104 | */ |
105 | public function setFrame( Frame $frame ): void { |
106 | $this->frame = $frame; |
107 | $this->applyToStage( 'setFrame', $frame ); |
108 | } |
109 | |
110 | /** |
111 | * Process input through the pipeline (potentially skipping the first stage |
112 | * in case that first stage is the source of input chunks we are processing |
113 | * in the rest of the pipeline) |
114 | * |
115 | * @param array|string|Document $input wikitext string or array of tokens or Document |
116 | * @param array $opts |
117 | * - sol (bool) Whether tokens should be processed in start-of-line context. |
118 | * - chunky (bool) Whether we are processing the input chunkily. |
119 | * If so, the first stage will be skipped |
120 | * @return array|Document |
121 | */ |
122 | public function parse( $input, array $opts ) { |
123 | $profile = $this->env->profiling() ? $this->env->pushNewProfile() : null; |
124 | if ( $profile !== null ) { |
125 | $profile->start(); |
126 | } |
127 | |
128 | $output = $input; |
129 | foreach ( $this->stages as $stage ) { |
130 | $output = $stage->process( $output, $opts ); |
131 | if ( $output === null ) { |
132 | throw new \RuntimeException( 'Stage ' . get_class( $stage ) . ' generated null output.' ); |
133 | } |
134 | } |
135 | |
136 | $this->env->getPipelineFactory()->returnPipeline( $this ); |
137 | |
138 | if ( $profile !== null ) { |
139 | $this->env->popProfile(); |
140 | $profile->end(); |
141 | |
142 | if ( isset( $opts['atTopLevel'] ) ) { |
143 | $body = $output; |
144 | $body->appendChild( $body->ownerDocument->createTextNode( "\n" ) ); |
145 | $body->appendChild( $body->ownerDocument->createComment( $profile->print() ) ); |
146 | $body->appendChild( $body->ownerDocument->createTextNode( "\n" ) ); |
147 | } |
148 | } |
149 | |
150 | return $output; |
151 | } |
152 | |
153 | /** |
154 | * Parse input in chunks |
155 | * |
156 | * @param string $input Input wikitext |
157 | * @param array $opts |
158 | * - atTopLevel: (bool) Whether we are processing the top-level document |
159 | * - sol: (bool) Whether input should be processed in start-of-line context |
160 | * @return Document|array final DOM or array of token chnks |
161 | */ |
162 | public function parseChunkily( string $input, array $opts ) { |
163 | $profile = $this->env->profiling() ? $this->env->pushNewProfile() : null; |
164 | if ( $profile !== null ) { |
165 | $profile->start(); |
166 | } |
167 | |
168 | $ret = []; |
169 | $lastStage = PHPUtils::lastItem( $this->stages ); |
170 | foreach ( $lastStage->processChunkily( $input, $opts ) as $output ) { |
171 | $ret[] = $output; |
172 | } |
173 | |
174 | $this->env->getPipelineFactory()->returnPipeline( $this ); |
175 | |
176 | if ( $profile !== null ) { |
177 | $this->env->popProfile(); |
178 | $profile->end(); |
179 | |
180 | if ( isset( $opts['atTopLevel'] ) ) { |
181 | Assert::invariant( $this->outputType === 'DOM', 'Expected top-level output to be DOM' ); |
182 | $body = $ret[0]; |
183 | $body->appendChild( $body->ownerDocument->createTextNode( "\n" ) ); |
184 | $body->appendChild( $body->ownerDocument->createComment( $profile->print() ) ); |
185 | $body->appendChild( $body->ownerDocument->createTextNode( "\n" ) ); |
186 | } |
187 | } |
188 | |
189 | // Return either the DOM or the array of chunks |
190 | return $this->outputType === "DOM" ? $ret[0] : $ret; |
191 | } |
192 | |
193 | /** |
194 | * @param array $initialState Once the pipeline is retrieved / constructed |
195 | * it will be initialized with this state. |
196 | */ |
197 | public function init( array $initialState = [] ) { |
198 | // Reset pipeline state once per top-level doc. |
199 | // This clears state from any per-doc global state |
200 | // maintained across all pipelines used by the document. |
201 | // (Ex: Cite state) |
202 | $toplevel = $initialState['toplevel']; |
203 | $this->resetState( [ 'toplevel' => $toplevel ] ); |
204 | |
205 | // Set frame |
206 | $frame = $initialState['frame']; |
207 | if ( !$toplevel ) { |
208 | $tplArgs = $initialState['tplArgs'] ?? null; |
209 | $srcText = $initialState['srcText'] ?? null; |
210 | if ( isset( $tplArgs['title'] ) ) { |
211 | $title = $tplArgs['title']; |
212 | $args = $tplArgs['attribs']; // KV[] |
213 | } else { |
214 | $title = $frame->getTitle(); |
215 | $args = $frame->getArgs()->args; // KV[] |
216 | } |
217 | $frame = $frame->newChild( $title, $args, $srcText ); |
218 | } |
219 | $this->setFrame( $frame ); |
220 | |
221 | // Set source offsets for this pipeline's content |
222 | $srcOffsets = $initialState['srcOffsets'] ?? null; |
223 | if ( $srcOffsets ) { |
224 | $this->setSourceOffsets( $srcOffsets ); |
225 | } |
226 | } |
227 | } |