Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
45.95% |
34 / 74 |
|
54.55% |
6 / 11 |
CRAP | |
0.00% |
0 / 1 |
ParserPipeline | |
45.95% |
34 / 74 |
|
54.55% |
6 / 11 |
151.82 | |
0.00% |
0 / 1 |
__construct | |
100.00% |
7 / 7 |
|
100.00% |
1 / 1 |
1 | |||
getCacheKey | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
applyToStage | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
2 | |||
setPipelineId | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
resetState | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
setSourceOffsets | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
setFrame | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
parse | |
0.00% |
0 / 18 |
|
0.00% |
0 / 1 |
56 | |||
parseChunkily | |
50.00% |
9 / 18 |
|
0.00% |
0 / 1 |
13.12 | |||
selectiveParse | |
0.00% |
0 / 3 |
|
0.00% |
0 / 1 |
2 | |||
init | |
52.63% |
10 / 19 |
|
0.00% |
0 / 1 |
7.66 |
1 | <?php |
2 | declare( strict_types = 1 ); |
3 | |
4 | namespace Wikimedia\Parsoid\Wt2Html; |
5 | |
6 | use Wikimedia\Assert\Assert; |
7 | use Wikimedia\Parsoid\Config\Env; |
8 | use Wikimedia\Parsoid\Core\SelectiveUpdateData; |
9 | use Wikimedia\Parsoid\DOM\Document; |
10 | use Wikimedia\Parsoid\DOM\DocumentFragment; |
11 | use Wikimedia\Parsoid\Tokens\SourceRange; |
12 | use Wikimedia\Parsoid\Tokens\Token; |
13 | use Wikimedia\Parsoid\Utils\DOMCompat; |
14 | use Wikimedia\Parsoid\Utils\PHPUtils; |
15 | |
16 | /** |
17 | * Wrap some stages into a pipeline. |
18 | */ |
19 | |
20 | class ParserPipeline { |
21 | private bool $alwaysToplevel; |
22 | private bool $atTopLevel; |
23 | private int $id; |
24 | private string $outputType; |
25 | private string $pipelineType; |
26 | private array $stages; |
27 | private Env $env; |
28 | private string $cacheKey; |
29 | private Frame $frame; |
30 | |
31 | public function __construct( |
32 | bool $alwaysToplevel, string $type, string $outType, string $cacheKey, array $stages, Env $env |
33 | ) { |
34 | $this->id = -1; |
35 | $this->alwaysToplevel = $alwaysToplevel; |
36 | $this->cacheKey = $cacheKey; |
37 | $this->pipelineType = $type; |
38 | $this->outputType = $outType; |
39 | $this->stages = $stages; |
40 | $this->env = $env; |
41 | } |
42 | |
43 | public function getCacheKey(): string { |
44 | return $this->cacheKey; |
45 | } |
46 | |
47 | /** |
48 | * Applies the function across all stages and transformers registered at |
49 | * each stage. |
50 | * |
51 | * @param string $fn |
52 | * @param mixed ...$args |
53 | */ |
54 | private function applyToStage( string $fn, ...$args ): void { |
55 | // Apply to each stage |
56 | foreach ( $this->stages as $stage ) { |
57 | $stage->$fn( ...$args ); |
58 | } |
59 | } |
60 | |
61 | /** |
62 | * This is useful for debugging. |
63 | * |
64 | * @param int $id |
65 | */ |
66 | public function setPipelineId( int $id ): void { |
67 | $this->id = $id; |
68 | $this->applyToStage( 'setPipelineId', $id ); |
69 | } |
70 | |
71 | /** |
72 | * Reset any local state in the pipeline stage |
73 | * @param array $opts |
74 | */ |
75 | public function resetState( array $opts = [] ): void { |
76 | $this->applyToStage( 'resetState', $opts ); |
77 | } |
78 | |
79 | /** |
80 | * Set source offsets for the source that this pipeline will process. |
81 | * |
82 | * This lets us use different pipelines to parse fragments of the same page |
83 | * Ex: extension content (found on the same page) is parsed with a different |
84 | * pipeline than the top-level page. |
85 | * |
86 | * Because of this, the source offsets are not [0, page.length) always |
87 | * and needs to be explicitly initialized |
88 | * |
89 | * @param SourceRange $so |
90 | */ |
91 | public function setSourceOffsets( SourceRange $so ): void { |
92 | $this->applyToStage( 'setSourceOffsets', $so ); |
93 | } |
94 | |
95 | /** |
96 | * Set frame on this pipeline stage (stages decide if they need it or not) |
97 | * @param Frame $frame frame |
98 | */ |
99 | public function setFrame( Frame $frame ): void { |
100 | $this->frame = $frame; |
101 | $this->applyToStage( 'setFrame', $frame ); |
102 | } |
103 | |
104 | /** |
105 | * Process input through the pipeline (potentially skipping the first stage |
106 | * in case that first stage is the source of input chunks we are processing |
107 | * in the rest of the pipeline) |
108 | * |
109 | * @param string|Token|array<Token|string>|DocumentFragment $input |
110 | * @param array{sol:bool} $opts |
111 | * - sol (bool) Whether tokens should be processed in start-of-line context. |
112 | * - chunky (bool) Whether we are processing the input chunkily. |
113 | * If so, the first stage will be skipped |
114 | * @return array|Document |
115 | */ |
116 | public function parse( $input, array $opts ) { |
117 | $profile = $this->env->profiling() ? $this->env->pushNewProfile() : null; |
118 | if ( $profile !== null ) { |
119 | $profile->start(); |
120 | } |
121 | |
122 | $output = $input; |
123 | foreach ( $this->stages as $stage ) { |
124 | $output = $stage->process( $output, $opts ); |
125 | if ( $output === null ) { |
126 | throw new \RuntimeException( 'Stage ' . get_class( $stage ) . ' generated null output.' ); |
127 | } |
128 | } |
129 | |
130 | $this->env->getPipelineFactory()->returnPipeline( $this ); |
131 | |
132 | if ( $profile !== null ) { |
133 | $this->env->popProfile(); |
134 | $profile->end(); |
135 | |
136 | if ( $this->atTopLevel ) { |
137 | $body = $output; |
138 | $body->appendChild( $body->ownerDocument->createTextNode( "\n" ) ); |
139 | $body->appendChild( $body->ownerDocument->createComment( $profile->print() ) ); |
140 | $body->appendChild( $body->ownerDocument->createTextNode( "\n" ) ); |
141 | } |
142 | } |
143 | |
144 | return $output; |
145 | } |
146 | |
147 | /** |
148 | * Parse input in chunks |
149 | * |
150 | * @param string $input Input wikitext |
151 | * @param array{sol:bool} $opts |
152 | * - atTopLevel: (bool) Whether we are processing the top-level document |
153 | * - sol: (bool) Whether input should be processed in start-of-line context |
154 | * @return Document|array final DOM or array of token chnks |
155 | */ |
156 | public function parseChunkily( string $input, array $opts ) { |
157 | $profile = $this->env->profiling() ? $this->env->pushNewProfile() : null; |
158 | if ( $profile !== null ) { |
159 | $profile->start(); |
160 | } |
161 | |
162 | $ret = []; |
163 | $lastStage = PHPUtils::lastItem( $this->stages ); |
164 | foreach ( $lastStage->processChunkily( $input, $opts ) as $output ) { |
165 | $ret[] = $output; |
166 | } |
167 | |
168 | $this->env->getPipelineFactory()->returnPipeline( $this ); |
169 | |
170 | if ( $profile !== null ) { |
171 | $this->env->popProfile(); |
172 | $profile->end(); |
173 | |
174 | if ( $this->atTopLevel ) { |
175 | Assert::invariant( $this->outputType === 'DOM', 'Expected top-level output to be DOM' ); |
176 | $body = $ret[0]; |
177 | $body->appendChild( $body->ownerDocument->createTextNode( "\n" ) ); |
178 | $body->appendChild( $body->ownerDocument->createComment( $profile->print() ) ); |
179 | $body->appendChild( $body->ownerDocument->createTextNode( "\n" ) ); |
180 | } |
181 | } |
182 | |
183 | // Return either the DOM or the array of chunks |
184 | return $this->outputType === "DOM" ? $ret[0] : $ret; |
185 | } |
186 | |
187 | /** |
188 | * Selective update parts of the old DOM based on $options |
189 | * $options has additional info about what needs updating. |
190 | * FIXME: Doucment $options array here. |
191 | */ |
192 | public function selectiveParse( |
193 | SelectiveUpdateData $selparData, array $options |
194 | ): Document { |
195 | $dom = $selparData->revDOM; |
196 | $this->parse( DOMCompat::getBody( $dom ), [ 'selparData' => $selparData ] + $options ); |
197 | return $dom; |
198 | } |
199 | |
200 | /** |
201 | * @param array $initialState Once the pipeline is retrieved / constructed, |
202 | * it will be initialized with this state. |
203 | */ |
204 | public function init( array $initialState = [] ) { |
205 | // Reset pipeline state once per top-level doc. |
206 | // This clears state from any per-doc global state |
207 | // maintained across all pipelines used by the document. |
208 | // (Ex: Cite state) |
209 | $this->atTopLevel = $this->alwaysToplevel ?: $initialState['toplevel']; |
210 | $this->resetState( [ |
211 | 'toplevel' => $this->atTopLevel, |
212 | 'toFragment' => $initialState['toFragment'] ?? true, |
213 | ] ); |
214 | |
215 | // Set frame |
216 | $frame = $initialState['frame']; |
217 | if ( !$this->atTopLevel ) { |
218 | $tplArgs = $initialState['tplArgs'] ?? null; |
219 | $srcText = $initialState['srcText'] ?? null; |
220 | if ( isset( $tplArgs['title'] ) ) { |
221 | $title = $tplArgs['title']; |
222 | $args = $tplArgs['attribs']; // KV[] |
223 | } else { |
224 | $title = $frame->getTitle(); |
225 | $args = $frame->getArgs()->args; // KV[] |
226 | } |
227 | $frame = $frame->newChild( $title, $args, $srcText ); |
228 | } |
229 | $this->setFrame( $frame ); |
230 | |
231 | // Set source offsets for this pipeline's content |
232 | $srcOffsets = $initialState['srcOffsets'] ?? null; |
233 | if ( $srcOffsets ) { |
234 | $this->setSourceOffsets( $srcOffsets ); |
235 | } |
236 | } |
237 | } |