Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
47.62% |
40 / 84 |
|
28.57% |
2 / 7 |
CRAP | |
0.00% |
0 / 1 |
DOMPostProcessor | |
47.62% |
40 / 84 |
|
28.57% |
2 / 7 |
149.87 | |
0.00% |
0 / 1 |
__construct | |
100.00% |
3 / 3 |
|
100.00% |
1 / 1 |
1 | |||
getTimeProfile | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
registerProcessors | |
100.00% |
8 / 8 |
|
100.00% |
1 / 1 |
4 | |||
setSourceOffsets | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
doPostProcess | |
46.77% |
29 / 62 |
|
0.00% |
0 / 1 |
66.86 | |||
process | |
0.00% |
0 / 4 |
|
0.00% |
0 / 1 |
6 | |||
processChunkily | |
0.00% |
0 / 5 |
|
0.00% |
0 / 1 |
6 |
1 | <?php |
2 | declare( strict_types = 1 ); |
3 | |
4 | namespace Wikimedia\Parsoid\Wt2Html; |
5 | |
6 | use Generator; |
7 | use Wikimedia\Parsoid\Config\Env; |
8 | use Wikimedia\Parsoid\Core\SelectiveUpdateData; |
9 | use Wikimedia\Parsoid\DOM\Node; |
10 | use Wikimedia\Parsoid\Ext\ParsoidExtensionAPI; |
11 | use Wikimedia\Parsoid\Tokens\SourceRange; |
12 | use Wikimedia\Parsoid\Utils\ContentUtils; |
13 | |
14 | /** |
15 | * Perform post-processing steps on an already-built HTML DOM. |
16 | */ |
17 | class DOMPostProcessor extends PipelineStage { |
18 | private array $options; |
19 | /** @var array[] */ |
20 | private array $processors = []; |
21 | private ParsoidExtensionAPI $extApi; // Provides post-processing support to extensions |
22 | private string $timeProfile = ''; |
23 | private ?SelectiveUpdateData $selparData = null; |
24 | |
25 | public function __construct( |
26 | Env $env, array $options = [], string $stageId = "", |
27 | ?PipelineStage $prevStage = null |
28 | ) { |
29 | parent::__construct( $env, $prevStage ); |
30 | |
31 | $this->options = $options; |
32 | $this->extApi = new ParsoidExtensionAPI( $env ); |
33 | } |
34 | |
35 | public function getTimeProfile(): string { |
36 | return $this->timeProfile; |
37 | } |
38 | |
39 | public function registerProcessors( array $processors ): void { |
40 | foreach ( $processors as $p ) { |
41 | if ( isset( $p['Processor'] ) ) { |
42 | // Internal processor w/ ::run() method, class name given |
43 | $p['proc'] = new $p['Processor']( $this ); |
44 | } else { |
45 | $t = new DOMPPTraverser( $this, $p['tplInfo'] ?? false ); |
46 | foreach ( $p['handlers'] as $h ) { |
47 | $t->addHandler( $h['nodeName'], $h['action'] ); |
48 | } |
49 | $p['proc'] = $t; |
50 | } |
51 | $this->processors[] = $p; |
52 | } |
53 | } |
54 | |
55 | /* --------------------------------------------------------------------------- |
56 | * FIXME: |
57 | * 1. PipelineFactory caches pipelines per env |
58 | * 2. PipelineFactory.parse uses a default cache key |
59 | * 3. ParserTests uses a shared/global env object for all tests. |
60 | * 4. ParserTests also uses PipelineFactory.parse (via env.getContentHandler()) |
61 | * => the pipeline constructed for the first test that runs wt2html |
62 | * is used for all subsequent wt2html tests |
63 | * 5. If we are selectively turning on/off options on a per-test basis |
64 | * in parser tests, those options won't work if those options are |
65 | * also used to configure pipeline construction (including which DOM passes |
66 | * are enabled). |
67 | * |
68 | * Ex: if (env.wrapSections) { addPP('wrapSections', wrapSections); } |
69 | * |
70 | * This won't do what you expect it to do. This is primarily a |
71 | * parser tests script issue -- but given the abstraction layers that |
72 | * are on top of the parser pipeline construction, fixing that is |
73 | * not straightforward right now. So, this note is a warning to future |
74 | * developers to pay attention to how they construct pipelines. |
75 | * --------------------------------------------------------------------------- */ |
76 | |
77 | /** |
78 | * @inheritDoc |
79 | */ |
80 | public function setSourceOffsets( SourceRange $so ): void { |
81 | $this->options['sourceOffsets'] = $so; |
82 | } |
83 | |
84 | public function doPostProcess( Node $node ): void { |
85 | $env = $this->env; |
86 | |
87 | $hasDumpFlags = $env->hasDumpFlags(); |
88 | |
89 | // FIXME: This works right now, but may not always be the right place to dump |
90 | // if custom DOM pipelines start getting more specialized and we enter this |
91 | // pipeline immediate after tree building. |
92 | if ( $hasDumpFlags && $env->hasDumpFlag( 'dom:post-builder' ) ) { |
93 | $opts = []; |
94 | $env->writeDump( ContentUtils::dumpDOM( $node, 'DOM: after tree builder', $opts ) ); |
95 | } |
96 | |
97 | $prefix = null; |
98 | $traceLevel = null; |
99 | $resourceCategory = null; |
100 | |
101 | $profile = null; |
102 | if ( $env->profiling() ) { |
103 | $profile = $env->getCurrentProfile(); |
104 | if ( $this->atTopLevel ) { |
105 | $this->timeProfile = str_repeat( "-", 85 ) . "\n"; |
106 | $prefix = 'TOP'; |
107 | // Turn off DOM pass timing tracing on non-top-level documents |
108 | $resourceCategory = 'DOMPasses:TOP'; |
109 | } else { |
110 | $prefix = '---'; |
111 | $resourceCategory = 'DOMPasses:NESTED'; |
112 | } |
113 | } |
114 | |
115 | foreach ( $this->processors as $pp ) { |
116 | // This is an optimization for the 'AddAnnotationIds' handler |
117 | // which is embedded in a DOMTraverser where we cannot check this flag. |
118 | if ( !empty( $pp['withAnnotations'] ) && !$this->env->hasAnnotations ) { |
119 | continue; |
120 | } |
121 | |
122 | $ppName = null; |
123 | $ppStart = null; |
124 | |
125 | // Trace |
126 | if ( $profile ) { |
127 | $ppName = $pp['name'] . str_repeat( |
128 | " ", |
129 | ( strlen( $pp['name'] ) < 30 ) ? 30 - strlen( $pp['name'] ) : 0 |
130 | ); |
131 | $ppStart = microtime( true ); |
132 | } |
133 | |
134 | $opts = null; |
135 | if ( $hasDumpFlags ) { |
136 | $opts = [ |
137 | 'env' => $env, |
138 | 'dumpFragmentMap' => $this->atTopLevel, |
139 | 'keepTmp' => true |
140 | ]; |
141 | |
142 | if ( $env->hasDumpFlag( 'dom:pre-' . $pp['shortcut'] ) |
143 | || $env->hasDumpFlag( 'dom:pre-*' ) |
144 | ) { |
145 | $env->writeDump( |
146 | ContentUtils::dumpDOM( $node, 'DOM: pre-' . $pp['shortcut'], $opts ) |
147 | ); |
148 | } |
149 | } |
150 | |
151 | // FIXME: env, extApi, frame, selparData, options, atTopLevel can all be |
152 | // put into a stdclass or a real class (DOMProcConfig?) and passed around. |
153 | $pp['proc']->run( |
154 | $this->env, |
155 | $node, |
156 | [ |
157 | 'extApi' => $this->extApi, |
158 | 'frame' => $this->frame, |
159 | 'selparData' => $this->selparData, |
160 | ] + $this->options, |
161 | $this->atTopLevel |
162 | ); |
163 | |
164 | if ( $hasDumpFlags && ( $env->hasDumpFlag( 'dom:post-' . $pp['shortcut'] ) |
165 | || $env->hasDumpFlag( 'dom:post-*' ) ) |
166 | ) { |
167 | $env->writeDump( |
168 | ContentUtils::dumpDOM( $node, 'DOM: post-' . $pp['shortcut'], $opts ) |
169 | ); |
170 | } |
171 | |
172 | if ( $profile ) { |
173 | $ppElapsed = 1000 * ( microtime( true ) - $ppStart ); |
174 | if ( $this->atTopLevel ) { |
175 | $this->timeProfile .= str_pad( $prefix . '; ' . $ppName, 65 ) . |
176 | ' time = ' . |
177 | str_pad( number_format( $ppElapsed, 2 ), 10, ' ', STR_PAD_LEFT ) . "\n"; |
178 | } |
179 | $profile->bumpTimeUse( $resourceCategory, $ppElapsed, 'DOM' ); |
180 | } |
181 | } |
182 | } |
183 | |
184 | /** |
185 | * @inheritDoc |
186 | */ |
187 | public function process( $node, array $opts ) { |
188 | if ( isset( $opts['selparData'] ) ) { |
189 | $this->selparData = $opts['selparData']; |
190 | } |
191 | '@phan-var Node $node'; // @var Node $node |
192 | $this->doPostProcess( $node ); |
193 | // @phan-suppress-next-line PhanTypeMismatchReturnSuperType |
194 | return $node; |
195 | } |
196 | |
197 | /** |
198 | * @inheritDoc |
199 | */ |
200 | public function processChunkily( $input, array $options ): Generator { |
201 | if ( $this->prevStage ) { |
202 | // The previous stage will yield a DOM. |
203 | // FIXME: Should we change the signature of that to return a DOM |
204 | // If we do so, a pipeline stage returns either a generator or |
205 | // concrete output (in this case, a DOM). |
206 | $node = $this->prevStage->processChunkily( $input, $options )->current(); |
207 | } else { |
208 | $node = $input; |
209 | } |
210 | $this->process( $node, $options ); |
211 | yield $node; |
212 | } |
213 | } |