Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
| Total | |
45.95% |
34 / 74 |
|
54.55% |
6 / 11 |
CRAP | |
0.00% |
0 / 1 |
| ParserPipeline | |
45.95% |
34 / 74 |
|
54.55% |
6 / 11 |
151.82 | |
0.00% |
0 / 1 |
| __construct | |
100.00% |
7 / 7 |
|
100.00% |
1 / 1 |
1 | |||
| getCacheKey | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
| applyToStage | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
2 | |||
| setPipelineId | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
| resetState | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
| setSourceOffsets | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
| setFrame | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
| parse | |
0.00% |
0 / 18 |
|
0.00% |
0 / 1 |
56 | |||
| parseChunkily | |
50.00% |
9 / 18 |
|
0.00% |
0 / 1 |
13.12 | |||
| selectiveParse | |
0.00% |
0 / 3 |
|
0.00% |
0 / 1 |
2 | |||
| init | |
52.63% |
10 / 19 |
|
0.00% |
0 / 1 |
7.66 | |||
| 1 | <?php |
| 2 | declare( strict_types = 1 ); |
| 3 | |
| 4 | namespace Wikimedia\Parsoid\Wt2Html; |
| 5 | |
| 6 | use Wikimedia\Assert\Assert; |
| 7 | use Wikimedia\Parsoid\Config\Env; |
| 8 | use Wikimedia\Parsoid\Core\SelectiveUpdateData; |
| 9 | use Wikimedia\Parsoid\DOM\Document; |
| 10 | use Wikimedia\Parsoid\DOM\Element; |
| 11 | use Wikimedia\Parsoid\Tokens\SourceRange; |
| 12 | use Wikimedia\Parsoid\Tokens\Token; |
| 13 | use Wikimedia\Parsoid\Utils\DOMCompat; |
| 14 | use Wikimedia\Parsoid\Utils\PHPUtils; |
| 15 | |
| 16 | /** |
| 17 | * Wrap some stages into a pipeline. |
| 18 | */ |
| 19 | |
| 20 | class ParserPipeline { |
| 21 | private bool $alwaysToplevel; |
| 22 | private bool $atTopLevel; |
| 23 | private int $id; |
| 24 | private string $outputType; |
| 25 | private string $pipelineType; |
| 26 | private array $stages; |
| 27 | private Env $env; |
| 28 | private string $cacheKey; |
| 29 | private Frame $frame; |
| 30 | |
| 31 | public function __construct( |
| 32 | bool $alwaysToplevel, string $type, string $outType, string $cacheKey, array $stages, Env $env |
| 33 | ) { |
| 34 | $this->id = -1; |
| 35 | $this->alwaysToplevel = $alwaysToplevel; |
| 36 | $this->cacheKey = $cacheKey; |
| 37 | $this->pipelineType = $type; |
| 38 | $this->outputType = $outType; |
| 39 | $this->stages = $stages; |
| 40 | $this->env = $env; |
| 41 | } |
| 42 | |
| 43 | public function getCacheKey(): string { |
| 44 | return $this->cacheKey; |
| 45 | } |
| 46 | |
| 47 | /** |
| 48 | * Applies the function across all stages and transformers registered at |
| 49 | * each stage. |
| 50 | * |
| 51 | * @param string $fn |
| 52 | * @param mixed ...$args |
| 53 | */ |
| 54 | private function applyToStage( string $fn, ...$args ): void { |
| 55 | // Apply to each stage |
| 56 | foreach ( $this->stages as $stage ) { |
| 57 | $stage->$fn( ...$args ); |
| 58 | } |
| 59 | } |
| 60 | |
| 61 | /** |
| 62 | * This is useful for debugging. |
| 63 | * |
| 64 | * @param int $id |
| 65 | */ |
| 66 | public function setPipelineId( int $id ): void { |
| 67 | $this->id = $id; |
| 68 | $this->applyToStage( 'setPipelineId', $id ); |
| 69 | } |
| 70 | |
| 71 | /** |
| 72 | * Reset any local state in the pipeline stage |
| 73 | * @param array $opts |
| 74 | */ |
| 75 | public function resetState( array $opts = [] ): void { |
| 76 | $this->applyToStage( 'resetState', $opts ); |
| 77 | } |
| 78 | |
| 79 | /** |
| 80 | * Set source offsets for the source that this pipeline will process. |
| 81 | * |
| 82 | * This lets us use different pipelines to parse fragments of the same page |
| 83 | * Ex: extension content (found on the same page) is parsed with a different |
| 84 | * pipeline than the top-level page. |
| 85 | * |
| 86 | * Because of this, the source offsets are not [0, page.length) always |
| 87 | * and needs to be explicitly initialized |
| 88 | * |
| 89 | * @param SourceRange $so |
| 90 | */ |
| 91 | public function setSourceOffsets( SourceRange $so ): void { |
| 92 | $this->applyToStage( 'setSourceOffsets', $so ); |
| 93 | } |
| 94 | |
| 95 | /** |
| 96 | * Set frame on this pipeline stage (stages decide if they need it or not) |
| 97 | * @param Frame $frame frame |
| 98 | */ |
| 99 | public function setFrame( Frame $frame ): void { |
| 100 | $this->frame = $frame; |
| 101 | $this->applyToStage( 'setFrame', $frame ); |
| 102 | } |
| 103 | |
| 104 | /** |
| 105 | * Process input through the pipeline (potentially skipping the first stage |
| 106 | * in case that first stage is the source of input chunks we are processing |
| 107 | * in the rest of the pipeline) |
| 108 | * |
| 109 | * @param string|Token|array<Token|string>|Element $input |
| 110 | * @param array{sol:bool} $opts |
| 111 | * - sol (bool) Whether tokens should be processed in start-of-line context. |
| 112 | * - chunky (bool) Whether we are processing the input chunkily. |
| 113 | * If so, the first stage will be skipped |
| 114 | * @return array|Document |
| 115 | */ |
| 116 | public function parse( $input, array $opts ) { |
| 117 | $profile = $this->env->profiling() ? $this->env->pushNewProfile() : null; |
| 118 | if ( $profile !== null ) { |
| 119 | $profile->start(); |
| 120 | } |
| 121 | |
| 122 | $output = $input; |
| 123 | foreach ( $this->stages as $stage ) { |
| 124 | $output = $stage->process( $output, $opts ); |
| 125 | if ( $output === null ) { |
| 126 | throw new \RuntimeException( 'Stage ' . get_class( $stage ) . ' generated null output.' ); |
| 127 | } |
| 128 | } |
| 129 | |
| 130 | $this->env->getPipelineFactory()->returnPipeline( $this ); |
| 131 | |
| 132 | if ( $profile !== null ) { |
| 133 | $this->env->popProfile(); |
| 134 | $profile->end(); |
| 135 | |
| 136 | if ( $this->atTopLevel ) { |
| 137 | $body = $output; |
| 138 | $body->appendChild( $body->ownerDocument->createTextNode( "\n" ) ); |
| 139 | $body->appendChild( $body->ownerDocument->createComment( $profile->print() ) ); |
| 140 | $body->appendChild( $body->ownerDocument->createTextNode( "\n" ) ); |
| 141 | } |
| 142 | } |
| 143 | |
| 144 | return $output; |
| 145 | } |
| 146 | |
| 147 | /** |
| 148 | * Parse input in chunks |
| 149 | * |
| 150 | * @param string $input Input wikitext |
| 151 | * @param array{sol:bool} $opts |
| 152 | * - atTopLevel: (bool) Whether we are processing the top-level document |
| 153 | * - sol: (bool) Whether input should be processed in start-of-line context |
| 154 | * @return Document|array final DOM or array of token chnks |
| 155 | */ |
| 156 | public function parseChunkily( string $input, array $opts ) { |
| 157 | $profile = $this->env->profiling() ? $this->env->pushNewProfile() : null; |
| 158 | if ( $profile !== null ) { |
| 159 | $profile->start(); |
| 160 | } |
| 161 | |
| 162 | $ret = []; |
| 163 | $lastStage = PHPUtils::lastItem( $this->stages ); |
| 164 | foreach ( $lastStage->processChunkily( $input, $opts ) as $output ) { |
| 165 | $ret[] = $output; |
| 166 | } |
| 167 | |
| 168 | $this->env->getPipelineFactory()->returnPipeline( $this ); |
| 169 | |
| 170 | if ( $profile !== null ) { |
| 171 | $this->env->popProfile(); |
| 172 | $profile->end(); |
| 173 | |
| 174 | if ( $this->atTopLevel ) { |
| 175 | Assert::invariant( $this->outputType === 'DOM', 'Expected top-level output to be DOM' ); |
| 176 | $body = $ret[0]; |
| 177 | $body->appendChild( $body->ownerDocument->createTextNode( "\n" ) ); |
| 178 | $body->appendChild( $body->ownerDocument->createComment( $profile->print() ) ); |
| 179 | $body->appendChild( $body->ownerDocument->createTextNode( "\n" ) ); |
| 180 | } |
| 181 | } |
| 182 | |
| 183 | // Return either the DOM or the array of chunks |
| 184 | return $this->outputType === "DOM" ? $ret[0] : $ret; |
| 185 | } |
| 186 | |
| 187 | /** |
| 188 | * Selective update parts of the old DOM based on $options |
| 189 | * $options has additional info about what needs updating. |
| 190 | * FIXME: Doucment $options array here. |
| 191 | */ |
| 192 | public function selectiveParse( |
| 193 | SelectiveUpdateData $selparData, array $options |
| 194 | ): Document { |
| 195 | $dom = $selparData->revDOM; |
| 196 | $this->parse( DOMCompat::getBody( $dom ), [ 'selparData' => $selparData ] + $options ); |
| 197 | return $dom; |
| 198 | } |
| 199 | |
| 200 | /** |
| 201 | * @param array $initialState Once the pipeline is retrieved / constructed, |
| 202 | * it will be initialized with this state. |
| 203 | */ |
| 204 | public function init( array $initialState = [] ) { |
| 205 | // Reset pipeline state once per top-level doc. |
| 206 | // This clears state from any per-doc global state |
| 207 | // maintained across all pipelines used by the document. |
| 208 | // (Ex: Cite state) |
| 209 | $this->atTopLevel = $this->alwaysToplevel ?: $initialState['toplevel']; |
| 210 | $this->resetState( [ |
| 211 | 'toplevel' => $this->atTopLevel, |
| 212 | 'toFragment' => $initialState['toFragment'] ?? true, |
| 213 | ] ); |
| 214 | |
| 215 | // Set frame |
| 216 | $frame = $initialState['frame']; |
| 217 | if ( !$this->atTopLevel ) { |
| 218 | $tplArgs = $initialState['tplArgs'] ?? null; |
| 219 | $srcText = $initialState['srcText'] ?? null; |
| 220 | if ( isset( $tplArgs['title'] ) ) { |
| 221 | $title = $tplArgs['title']; |
| 222 | $args = $tplArgs['attribs']; // KV[] |
| 223 | } else { |
| 224 | $title = $frame->getTitle(); |
| 225 | $args = $frame->getArgs()->args; // KV[] |
| 226 | } |
| 227 | $frame = $frame->newChild( $title, $args, $srcText ); |
| 228 | } |
| 229 | $this->setFrame( $frame ); |
| 230 | |
| 231 | // Set source offsets for this pipeline's content |
| 232 | $srcOffsets = $initialState['srcOffsets'] ?? null; |
| 233 | if ( $srcOffsets ) { |
| 234 | $this->setSourceOffsets( $srcOffsets ); |
| 235 | } |
| 236 | } |
| 237 | } |