Code Coverage for /src/src/Wt2Html/ParserPipelineFactory.php

	Code Coverage
	Lines			Functions and Methods				Classes and Traits
Total	0.00% covered (danger)	0.00%	0 / 90	0.00% covered (danger)	0.00%	0 / 9	CRAP	0.00% covered (danger)	0.00%	0 / 1
ParserPipelineFactory	0.00% covered (danger)	0.00%	0 / 90	0.00% covered (danger)	0.00%	0 / 9	650	0.00% covered (danger)	0.00%	0 / 1
__construct	0.00% covered (danger)	0.00%	0 / 1	0.00% covered (danger)	0.00%	0 / 1	2
defaultOptions	0.00% covered (danger)	0.00%	0 / 8	0.00% covered (danger)	0.00%	0 / 1	6
procNamesToProcs	0.00% covered (danger)	0.00%	0 / 11	0.00% covered (danger)	0.00%	0 / 1	12
makePipeline	0.00% covered (danger)	0.00%	0 / 33	0.00% covered (danger)	0.00%	0 / 1	42
getCacheKey	0.00% covered (danger)	0.00%	0 / 13	0.00% covered (danger)	0.00%	0 / 1	56
parse	0.00% covered (danger)	0.00%	0 / 6	0.00% covered (danger)	0.00%	0 / 1	2
selectiveDOMUpdate	0.00% covered (danger)	0.00%	0 / 6	0.00% covered (danger)	0.00%	0 / 1	2
getPipeline	0.00% covered (danger)	0.00%	0 / 8	0.00% covered (danger)	0.00%	0 / 1	6
returnPipeline	0.00% covered (danger)	0.00%	0 / 4	0.00% covered (danger)	0.00%	0 / 1	6

1	<?php
2	declare( strict_types = 1 );
3
4	namespace Wikimedia\Parsoid\Wt2Html;
5
6	use Wikimedia\Assert\Assert;
7	use Wikimedia\Parsoid\Config\Env;
8	use Wikimedia\Parsoid\Core\InternalException;
9	use Wikimedia\Parsoid\Core\SelectiveUpdateData;
10	use Wikimedia\Parsoid\DOM\Document;
11	use Wikimedia\Parsoid\Utils\PHPUtils;
12	use Wikimedia\Parsoid\Utils\Utils;
13	use Wikimedia\Parsoid\Wt2Html\DOM\Handlers\AddAnnotationIds;
14	use Wikimedia\Parsoid\Wt2Html\DOM\Handlers\AddLinkAttributes;
15	use Wikimedia\Parsoid\Wt2Html\DOM\Handlers\CleanUp;
16	use Wikimedia\Parsoid\Wt2Html\DOM\Handlers\DedupeStyles;
17	use Wikimedia\Parsoid\Wt2Html\DOM\Handlers\DisplaySpace;
18	use Wikimedia\Parsoid\Wt2Html\DOM\Handlers\HandleLinkNeighbours;
19	use Wikimedia\Parsoid\Wt2Html\DOM\Handlers\Headings;
20	use Wikimedia\Parsoid\Wt2Html\DOM\Handlers\LiFixups;
21	use Wikimedia\Parsoid\Wt2Html\DOM\Handlers\TableFixups;
22	use Wikimedia\Parsoid\Wt2Html\DOM\Handlers\UnpackDOMFragments;
23	use Wikimedia\Parsoid\Wt2Html\DOM\Processors\AddMediaInfo;
24	use Wikimedia\Parsoid\Wt2Html\DOM\Processors\AddMetaData;
25	use Wikimedia\Parsoid\Wt2Html\DOM\Processors\AddRedLinks;
26	use Wikimedia\Parsoid\Wt2Html\DOM\Processors\ComputeDSR;
27	use Wikimedia\Parsoid\Wt2Html\DOM\Processors\ConvertOffsets;
28	use Wikimedia\Parsoid\Wt2Html\DOM\Processors\LangConverter;
29	use Wikimedia\Parsoid\Wt2Html\DOM\Processors\Linter;
30	use Wikimedia\Parsoid\Wt2Html\DOM\Processors\MarkFosteredContent;
31	use Wikimedia\Parsoid\Wt2Html\DOM\Processors\MigrateTemplateMarkerMetas;
32	use Wikimedia\Parsoid\Wt2Html\DOM\Processors\MigrateTrailingNLs;
33	use Wikimedia\Parsoid\Wt2Html\DOM\Processors\Normalize;
34	use Wikimedia\Parsoid\Wt2Html\DOM\Processors\ProcessEmbeddedDocs;
35	use Wikimedia\Parsoid\Wt2Html\DOM\Processors\ProcessTreeBuilderFixups;
36	use Wikimedia\Parsoid\Wt2Html\DOM\Processors\PWrap;
37	use Wikimedia\Parsoid\Wt2Html\DOM\Processors\RunExtensionProcessors;
38	use Wikimedia\Parsoid\Wt2Html\DOM\Processors\UpdateTemplateOutput;
39	use Wikimedia\Parsoid\Wt2Html\DOM\Processors\WrapAnnotations;
40	use Wikimedia\Parsoid\Wt2Html\DOM\Processors\WrapSections;
41	use Wikimedia\Parsoid\Wt2Html\DOM\Processors\WrapTemplates;
42	use Wikimedia\Parsoid\Wt2Html\TreeBuilder\TreeBuilderStage;
43	use Wikimedia\Parsoid\Wt2Html\TT\AttributeExpander;
44	use Wikimedia\Parsoid\Wt2Html\TT\BehaviorSwitchHandler;
45	use Wikimedia\Parsoid\Wt2Html\TT\DOMFragmentBuilder;
46	use Wikimedia\Parsoid\Wt2Html\TT\ExtensionHandler;
47	use Wikimedia\Parsoid\Wt2Html\TT\ExternalLinkHandler;
48	use Wikimedia\Parsoid\Wt2Html\TT\LanguageVariantHandler;
49	use Wikimedia\Parsoid\Wt2Html\TT\ListHandler;
50	use Wikimedia\Parsoid\Wt2Html\TT\OnlyInclude;
51	use Wikimedia\Parsoid\Wt2Html\TT\ParagraphWrapper;
52	use Wikimedia\Parsoid\Wt2Html\TT\PreHandler;
53	use Wikimedia\Parsoid\Wt2Html\TT\QuoteTransformer;
54	use Wikimedia\Parsoid\Wt2Html\TT\SanitizerHandler;
55	use Wikimedia\Parsoid\Wt2Html\TT\TemplateHandler;
56	use Wikimedia\Parsoid\Wt2Html\TT\TokenStreamPatcher;
57	use Wikimedia\Parsoid\Wt2Html\TT\WikiLinkHandler;
58
59	/**
60	* This class assembles parser pipelines from parser stages
61	*/
62	class ParserPipelineFactory {
63	private static $globalPipelineId = 0;
64
65	private const DOM_PROCESSOR_CONFIG = [
66	'addmetadata' => AddMetaData::class,
67	'annwrap' => WrapAnnotations::class,
68	'convertoffsets' => ConvertOffsets::class,
69	'dsr' => ComputeDSR::class,
70	'embedded-docs' => ProcessEmbeddedDocs::class,
71	'extpp' => RunExtensionProcessors::class,
72	'fostered' => MarkFosteredContent::class,
73	'linter' => Linter::class,
74	'lang-converter' => LangConverter::class,
75	'media' => AddMediaInfo::class,
76	'migrate-metas' => MigrateTemplateMarkerMetas::class,
77	'migrate-nls' => MigrateTrailingNLs::class,
78	'normalize' => Normalize::class,
79	'process-fixups' => ProcessTreeBuilderFixups::class,
80	'pwrap' => PWrap::class,
81	'redlinks' => AddRedLinks::class,
82	'sections' => WrapSections::class, // Don't process HTML in embedded attributes
83	'tplwrap' => WrapTemplates::class,
84	'update-template' => UpdateTemplateOutput::class,
85	'ann-ids' => [
86	'name' => 'AddAnnotationIds',
87	'handlers' => [
88	[ 'nodeName' => 'meta', 'action' => [ AddAnnotationIds::class, 'handler' ] ]
89	],
90	'withAnnotations' => true
91	],
92	'linkneighbours+dom-unpack' => [
93	'name' => 'HandleLinkNeighbours,UnpackDOMFragments',
94	'handlers' => [
95	// Link prefixes and suffixes
96	[ 'nodeName' => 'a', 'action' => [ HandleLinkNeighbours::class, 'handler' ] ],
97	[ 'nodeName' => null, 'action' => [ UnpackDOMFragments::class, 'handler' ] ]
98	]
99	],
100	'fixups' => [
101	'name' => 'MigrateTrailingCategories,TableFixups',
102	'tplInfo' => true,
103	'handlers' => [
104	// 1. Move trailing categories in <li>s out of the list
105	[ 'nodeName' => 'li', 'action' => [ LiFixups::class, 'migrateTrailingSolTransparentLinks' ] ],
106	[ 'nodeName' => 'dt', 'action' => [ LiFixups::class, 'migrateTrailingSolTransparentLinks' ] ],
107	[ 'nodeName' => 'dd', 'action' => [ LiFixups::class, 'migrateTrailingSolTransparentLinks' ] ],
108	// 2. Fix up issues from templated table cells and table cell attributes
109	[ 'nodeName' => 'td', 'action' => [ TableFixups::class, 'handleTableCellTemplates' ] ],
110	[ 'nodeName' => 'th', 'action' => [ TableFixups::class, 'handleTableCellTemplates' ] ],
111	]
112	],
113	'fixups+dedupe-styles' => [
114	'name' => 'MigrateTrailingCategories,TableFixups,DedupeStyles',
115	'tplInfo' => true,
116	'handlers' => [
117	// 1. Move trailing categories in <li>s out of the list
118	[ 'nodeName' => 'li', 'action' => [ LiFixups::class, 'migrateTrailingSolTransparentLinks' ] ],
119	[ 'nodeName' => 'dt', 'action' => [ LiFixups::class, 'migrateTrailingSolTransparentLinks' ] ],
120	[ 'nodeName' => 'dd', 'action' => [ LiFixups::class, 'migrateTrailingSolTransparentLinks' ] ],
121	// 2. Fix up issues from templated table cells and table cell attributes
122	[ 'nodeName' => 'td', 'action' => [ TableFixups::class, 'handleTableCellTemplates' ] ],
123	[ 'nodeName' => 'th', 'action' => [ TableFixups::class, 'handleTableCellTemplates' ] ],
124	// 3. Deduplicate template styles
125	// (should run after dom-fragment expansion + after extension post-processors)
126	[ 'nodeName' => 'style', 'action' => [ DedupeStyles::class, 'dedupe' ] ]
127	]
128	],
129	// Strip marker metas -- removes left over marker metas (ex: metas
130	// nested in expanded tpl/extension output).
131	'strip-metas' => [
132	'name' => 'CleanUp-stripMarkerMetas',
133	'handlers' => [
134	[ 'nodeName' => 'meta', 'action' => [ CleanUp::class, 'stripMarkerMetas' ] ]
135	]
136	],
137	'displayspace+linkclasses' => [
138	'name' => 'DisplaySpace+AddLinkAttributes',
139	'handlers' => [
140	[ 'nodeName' => null, 'action' => [ DisplaySpace::class, 'leftHandler' ] ],
141	[ 'nodeName' => null, 'action' => [ DisplaySpace::class, 'rightHandler' ] ],
142	[ 'nodeName' => 'a', 'action' => [ AddLinkAttributes::class, 'handler' ] ]
143	]
144	],
145	'gen-anchors' => [
146	'name' => 'Headings-genAnchors',
147	'handlers' => [
148	[ 'nodeName' => null, 'action' => [ Headings::class, 'genAnchors' ] ],
149	]
150	],
151	'dedupe-heading-ids' => [
152	'name' => 'Headings-dedupeIds',
153	'handlers' => [
154	[ 'nodeName' => null, 'action' => [ Headings::class, 'dedupeHeadingIds' ] ]
155	]
156	],
157	'heading-ids' => [
158	'name' => 'Headings-genAnchors',
159	'handlers' => [
160	[ 'nodeName' => null, 'action' => [ Headings::class, 'genAnchors' ] ],
161	[ 'nodeName' => null, 'action' => [ Headings::class, 'dedupeHeadingIds' ] ]
162	]
163	],
164	'cleanup' => [
165	'name' => 'CleanUp-handleEmptyElts,CleanUp-cleanup',
166	'tplInfo' => true,
167	'handlers' => [
168	// Strip empty elements from template content
169	[ 'nodeName' => null, 'action' => [ CleanUp::class, 'handleEmptyElements' ] ],
170	// Additional cleanup
171	[ 'nodeName' => null, 'action' => [ CleanUp::class, 'finalCleanup' ] ]
172	]
173	],
174	'saveDP' => [
175	'name' => 'CleanUp-saveDataParsoid',
176	'tplInfo' => true,
177	'handlers' => [
178	// Mark which data.parsoid's should be serialized into
179	// data-parsoid html attributes.
180	// Make this its own thing so that any changes to the DOM
181	// don't affect other handlers that run alongside it.
182	[ 'nodeName' => null, 'action' => [ CleanUp::class, 'saveDataParsoid' ] ]
183	]
184	]
185	];
186
187	// NOTES about ordering / inclusion:
188	//
189	// media:
190	// This is run at all levels for now - gallery extension's "packed" mode
191	// would otherwise need a post-processing pass to scale media after it
192	// has been fetched. That introduces an ordering dependency that may
193	// or may not complicate things.
194	// migrate-metas:
195	// - Run this after 'pwrap' because it can add additional opportunities for
196	// meta migration which we will miss if we run this before p-wrapping.
197	// - We could potentially move this just before 'tplwrap' by seeing this
198	// as a preprocessing pass for that. But, we will have to update the pass
199	// to update DSR properties where required.
200	// - In summary, this can at most be moved before 'media' or after
201	// 'migrate-nls' without needing any other changes.
202	// dsr, tplwrap:
203	// DSR computation and template wrapping cannot be skipped for top-level content
204	// even if they are part of nested level pipelines, because such content might be
205	// embedded in attributes and they may need to be processed independently.
206	//
207	// Nested (non-top-level) pipelines can never include the following:
208	// - lang-converter, convertoffsets, dedupe-styles, cleanup, saveDP
209	//
210	// FIXME: Perhaps introduce a config flag in the processor config that
211	// verifies this property against a pipeline's 'toplevel' state.
212	public const NESTED_PIPELINE_DOM_TRANSFORMS = [
213	'fostered', 'process-fixups', 'normalize', 'pwrap',
214	'media', 'migrate-metas', 'migrate-nls', 'dsr', 'tplwrap',
215	'ann-ids', 'annwrap', 'linkneighbours+dom-unpack'
216	];
217
218	// NOTES about ordering:
219	// lang-converter, redlinks:
220	// Language conversion and redlink marking are done here
221	// before we cleanup and save data-parsoid because they
222	// are also used in pb2pb/html2html passes, and we want to
223	// keep their input/output formats consistent.
224	public const FULL_PARSE_GLOBAL_DOM_TRANSFORMS = [
225	// FIXME: It should be documented in the spec that an extension's
226	// wtDOMProcess handler is run once on the top level document.
227	'extpp',
228	'fixups+dedupe-styles', 'linter', 'strip-metas',
229	'lang-converter', 'redlinks', 'displayspace+linkclasses',
230	// Benefits from running after determining which media are redlinks
231	'heading-ids',
232	'sections', 'convertoffsets', 'cleanup',
233	'embedded-docs',
234	'saveDP', 'addmetadata'
235	];
236
237	// Skipping sections, addmetadata from the above pipeline
238	//
239	// FIXME: Skip extpp, linter, lang-converter, redlinks, heading-ids, convertoffsets, saveDP for now.
240	// This replicates behavior prior to this refactor.
241	public const FULL_PARSE_EMBEDDED_DOC_DOM_TRANSFORMS = [
242	'fixups+dedupe-styles', 'strip-metas',
243	'displayspace+linkclasses',
244	'cleanup',
245	// Need to run this recursively
246	'embedded-docs',
247	// FIXME This means the data-* from embedded HTML fragments won't end up
248	// in the pagebundle. But, if we try to call this on those fragments,
249	// we get multiple calls to store embedded docs. So, we may need to
250	// write a custom traverser if we want these embedded data* objects
251	// in the pagebundle (this is not a regression since they weren't part
252	// of the pagebundle all this while anyway.)
253	/* 'saveDP' */
254	];
255
256	public const SELECTIVE_UPDATE_FRAGMENT_GLOBAL_DOM_TRANSFORMS = [
257	'extpp', // FIXME: this should be a different processor
258	'fixups', 'strip-metas', 'redlinks', 'displayspace+linkclasses',
259	'gen-anchors', 'convertoffsets', 'cleanup',
260	// FIXME: This will probably need some special-case code to first
261	// strip old metadata before adding fresh metadata.
262	'addmetadata'
263	];
264
265	public const SELECTIVE_UPDATE_GLOBAL_DOM_TRANSFORMS = [
266	'update-template', 'linter', 'lang-converter', /* FIXME: Are lang converters idempotent? */
267	'heading-ids', 'sections', 'saveDP'
268	];
269
270	private static $stages = [
271	"Tokenizer" => [
272	"class" => PegTokenizer::class,
273	],
274	"TokenTransform2" => [
275	"class" => TokenTransformManager::class,
276	"transformers" => [
277	OnlyInclude::class,
278
279	TemplateHandler::class,
280	ExtensionHandler::class,
281
282	// Expand attributes after templates to avoid expanding unused branches.
283	// No expansion of quotes, paragraphs etc in attributes,
284	// as with the legacy parser - up to end of TokenTransform2.
285	AttributeExpander::class,
286
287	// now all attributes expanded to tokens or string
288	// more convenient after attribute expansion
289	WikiLinkHandler::class,
290	ExternalLinkHandler::class,
291	LanguageVariantHandler::class,
292
293	// This converts dom-fragment-token tokens all the way to DOM
294	// and wraps them in DOMFragment wrapper tokens which will then
295	// get unpacked into the DOM by a dom-fragment unpacker.
296	DOMFragmentBuilder::class
297	],
298	],
299	"TokenTransform3" => [
300	"class" => TokenTransformManager::class,
301	"transformers" => [
302	TokenStreamPatcher::class,
303	// add <pre>s
304	PreHandler::class,
305	QuoteTransformer::class,
306	// add before transforms that depend on behavior switches
307	// examples: toc generation, edit sections
308	BehaviorSwitchHandler::class,
309
310	ListHandler::class,
311	SanitizerHandler::class,
312	// Wrap tokens into paragraphs post-sanitization so that
313	// tags that converted to text by the sanitizer have a chance
314	// of getting wrapped into paragraphs. The sanitizer does not
315	// require the existence of p-tags for its functioning.
316	ParagraphWrapper::class
317	],
318	],
319	// Build a tree out of the fully processed token stream
320	"TreeBuilder" => [
321	"class" => TreeBuilderStage::class,
322	],
323	// DOM transformer for top-level documents.
324	// This performs a lot of post-processing of the DOM
325	// (Template wrapping, broken wikitext/html detection, etc.)
326	"FullParseDOMTransform" => [
327	"class" => DOMPostProcessor::class,
328	"processors" => [
329	self::NESTED_PIPELINE_DOM_TRANSFORMS,
330	self::FULL_PARSE_GLOBAL_DOM_TRANSFORMS
331	],
332	],
333	// DOM transformer for fragments of a top-level document
334	"NestedFragmentDOMTransform" => [
335	"class" => DOMPostProcessor::class,
336	"processors" => self::NESTED_PIPELINE_DOM_TRANSFORMS
337	],
338	// DOM transformations to run on attribute-embedded docs of the top level doc
339	"FullParseEmbeddedDocsDOMTransform" => [
340	"class" => DOMPostProcessor::class,
341	"processors" => self::FULL_PARSE_EMBEDDED_DOC_DOM_TRANSFORMS
342	],
343	// DOM transformer for fragments during selective updates.
344	// This may eventually become identical to NestedFrgmentDOMTransform,
345	// but at this time, it is unclear if that will materialize.
346	"SelectiveUpdateFragmentDOMTransform" => [
347	"class" => DOMPostProcessor::class,
348	"processors" => [
349	self::NESTED_PIPELINE_DOM_TRANSFORMS,
350	self::SELECTIVE_UPDATE_FRAGMENT_GLOBAL_DOM_TRANSFORMS
351	],
352	],
353	// DOM transformer for the top-level page during selective updates.
354	"SelectiveUpdateDOMTransform" => [
355	// For use in the top-level of the selective-update pipeline
356	"class" => DOMPostProcessor::class,
357	"processors" => self::SELECTIVE_UPDATE_GLOBAL_DOM_TRANSFORMS
358	]
359	];
360
361	private static $pipelineRecipes = [
362	// This pipeline takes wikitext as input and emits a fully
363	// processed DOM as output. This is the pipeline used for
364	// all top-level documents.
365	"fullparse-wikitext-to-dom" => [
366	"alwaysToplevel" => true,
367	"outType" => "DOM",
368	"stages" => [
369	"Tokenizer", "TokenTransform2", "TokenTransform3", "TreeBuilder", "FullParseDOMTransform"
370	]
371	],
372
373	"fullparse-embedded-docs-dom-to-dom" => [
374	"alwaysToplevel" => true,
375	"outType" => "DOM",
376	"stages" => [ "FullParseEmbeddedDocsDOMTransform" ]
377	],
378
379	// This pipeline takes a DOM and emits a fully processed DOM as output.
380	"selective-update-dom-to-dom" => [
381	"alwaysToplevel" => true,
382	"outType" => "DOM",
383	"stages" => [ "SelectiveUpdateDOMTransform" ]
384	],
385
386	// This pipeline takes wikitext as input and emits a partially
387	// processed DOM as output. This is the pipeline used for processing
388	// page fragments to DOM in a selective page update context
389	// This is always toplevel because the wikitext being updated
390	// is found at the toplevel of the page.
391	"selective-update-fragment-wikitext-to-dom" => [
392	"alwaysToplevel" => true,
393	"outType" => "DOM",
394	"stages" => [
395	"Tokenizer", "TokenTransform2", "TokenTransform3", "TreeBuilder", "SelectiveUpdateFragmentDOMTransform"
396	]
397	],
398
399	// This pipeline takes wikitext as input and emits a fully
400	// processed DOM as output. This is the pipeline used for
401	// wikitext fragments of a top-level document that should be
402	// processed to a DOM fragment. This pipeline doesn't run all
403	// of the DOM transformations in the DOMTransform pipeline.
404	// We will like use a specialized DOMTransform stage here.
405	"wikitext-to-fragment" => [
406	// FIXME: This is known to be always not top-level
407	// We could use a different flag to lock these pipelines too.
408	"outType" => "DOM",
409	"stages" => [
410	"Tokenizer", "TokenTransform2", "TokenTransform3", "TreeBuilder", "NestedFragmentDOMTransform"
411	]
412	],
413
414	// This pipeline takes tokens from stage 2 and emits a DOM fragment
415	// as output - this runs the same DOM transforms as the 'wikitext-to-fragment'
416	// pipeline and will get a spcialized DOMTransform stage as above.
417	"expanded-tokens-to-fragment" => [
418	"outType" => "DOM",
419	"stages" => [ "TokenTransform3", "TreeBuilder", "NestedFragmentDOMTransform" ]
420	],
421
422	// This pipeline takes wikitext as input and emits tokens that
423	// have had all templates, extensions, links, images processed
424	"wikitext-to-expanded-tokens" => [
425	"outType" => "Tokens",
426	"stages" => [ "Tokenizer", "TokenTransform2" ]
427	],
428
429	// This pipeline takes tokens from the PEG tokenizer and emits
430	// tokens that have had all templates and extensions processed.
431	"peg-tokens-to-expanded-tokens" => [
432	"outType" => "Tokens",
433	"stages" => [ "TokenTransform2" ]
434	]
435	];
436
437	private static $supportedOptions = [
438	// If true, templates found in content will have its contents expanded
439	'expandTemplates',
440
441	// If true, indicates pipeline is processing the expanded content of a
442	// template or its arguments
443	'inTemplate',
444
445	// The extension tag that is being processed (Ex: ref, references)
446	// (in current usage, only used for native tag implementation)
447	'extTag',
448
449	// Extension-specific options
450	'extTagOpts',
451
452	// Content being parsed is used in an inline context
453	'inlineContext',
454
455	// Are we processing content of attributes?
456	// (in current usage, used for transcluded attr. keys/values)
457	'attrExpansion',
458	];
459
460	private array $pipelineCache = [];
461
462	private Env $env;
463
464	public function __construct( Env $env ) {
465	$this->env = $env;
466	}
467
468	/**
469	* Default options processing
470	*
471	* @param array $options
472	* @return array
473	*/
474	private function defaultOptions( array $options ): array {
475	// default: not in a template
476	$options['inTemplate'] ??= false;
477
478	// default: wrap templates
479	$options['expandTemplates'] ??= true;
480
481	// Catch pipeline option typos
482	foreach ( $options as $k => $v ) {
483	Assert::invariant(
484	in_array( $k, self::$supportedOptions, true ),
485	'Invalid cacheKey option: ' . $k
486	);
487	}
488
489	return $options;
490	}
491
492	public static function procNamesToProcs( array $procNames ): array {
493	$processors = [];
494	foreach ( $procNames as $name ) {
495	$proc = self::DOM_PROCESSOR_CONFIG[$name];
496	if ( !is_array( $proc ) ) {
497	$proc = [
498	'name' => Utils::stripNamespace( $proc ),
499	'Processor' => $proc,
500	];
501	}
502	$proc['shortcut'] = $name;
503	$processors[] = $proc;
504	}
505	return $processors;
506	}
507
508	/**
509	* Generic pipeline creation from the above recipes.
510	*
511	* @param string $type
512	* @param string $cacheKey
513	* @param array $options
514	* @return ParserPipeline
515	*/
516	private function makePipeline(
517	string $type, string $cacheKey, array $options
518	): ParserPipeline {
519	if ( !isset( self::$pipelineRecipes[$type] ) ) {
520	throw new InternalException( 'Unsupported Pipeline: ' . $type );
521	}
522	$recipe = self::$pipelineRecipes[$type];
523	$pipeStages = [];
524	$prevStage = null;
525	$recipeStages = $recipe["stages"];
526
527	foreach ( $recipeStages as $stageId ) {
528	$stageData = self::$stages[$stageId];
529	$stage = new $stageData["class"]( $this->env, $options, $stageId, $prevStage );
530	if ( isset( $stageData["transformers"] ) ) {
531	foreach ( $stageData["transformers"] as $tName ) {
532	$stage->addTransformer( new $tName( $stage, $options ) );
533	}
534	} elseif ( isset( $stageData["processors"] ) ) {
535	$processors = [];
536	array_walk_recursive(
537	$stageData["processors"],
538	static function ( $p ) use ( &$processors ) {
539	$processors[] = $p;
540	}
541	);
542	$stage->registerProcessors(
543	self::procNamesToProcs( $processors )
544	);
545	}
546	$prevStage = $stage;
547	$pipeStages[] = $stage;
548	}
549
550	return new ParserPipeline(
551	$recipe['alwaysToplevel'] ?? false,
552	$type,
553	$recipe["outType"],
554	$cacheKey,
555	$pipeStages,
556	$this->env
557	);
558	}
559
560	private function getCacheKey( string $cacheKey, array $options ): string {
561	if ( empty( $options['expandTemplates'] ) ) {
562	$cacheKey .= '::noExpand';
563	}
564	if ( !empty( $options['inlineContext'] ) ) {
565	$cacheKey .= '::inlineContext';
566	}
567	if ( !empty( $options['inTemplate'] ) ) {
568	$cacheKey .= '::inTemplate';
569	}
570	if ( !empty( $options['attrExpansion'] ) ) {
571	$cacheKey .= '::attrExpansion';
572	}
573	if ( isset( $options['extTag'] ) ) {
574	$cacheKey .= '::' . $options['extTag'];
575	// FIXME: This is not the best strategy. But, instead of
576	// premature complexity, let us see how extensions want to
577	// use this and then figure out what constraints are needed.
578	if ( isset( $options['extTagOpts'] ) ) {
579	$cacheKey .= '::' . PHPUtils::jsonEncode( $options['extTagOpts'] );
580	}
581	}
582	return $cacheKey;
583	}
584
585	public function parse( string $src ): Document {
586	$pipe = $this->getPipeline( 'fullparse-wikitext-to-dom' );
587	$pipe->init( [
588	'frame' => $this->env->topFrame,
589	'toFragment' => false,
590	] );
591	// Top-level doc parsing always start in SOL state
592	return $pipe->parseChunkily( $src, [ 'sol' => true ] )->ownerDocument;
593	}
594
595	/**
596	* @param SelectiveUpdateData $selparData
597	* @param array $options Options for selective DOM update
598	* - mode: (string) One of "template", "section", "generic"
599	* For now, defaults to 'template', if absent
600	*/
601	public function selectiveDOMUpdate( SelectiveUpdateData $selparData, array $options = [] ): Document {
602	$pipe = $this->getPipeline( 'selective-update-dom-to-dom' );
603	$pipe->init( [
604	'frame' => $this->env->topFrame,
605	'toFragment' => false,
606	] );
607	return $pipe->selectiveParse( $selparData, $options );
608	}
609
610	/**
611	* Get a pipeline of a given type. Pipelines are cached as they are
612	* frequently created.
613	*
614	* @param string $type
615	* @param array $options These also determine the key under which the
616	* pipeline is cached for reuse.
617	* @return ParserPipeline
618	*/
619	public function getPipeline(
620	string $type, array $options = []
621	): ParserPipeline {
622	$options = $this->defaultOptions( $options );
623	$cacheKey = $this->getCacheKey( $type, $options );
624
625	$this->pipelineCache[$cacheKey] ??= [];
626
627	if ( $this->pipelineCache[$cacheKey] ) {
628	$pipe = array_pop( $this->pipelineCache[$cacheKey] );
629	} else {
630	$pipe = $this->makePipeline( $type, $cacheKey, $options );
631	}
632
633	// Debugging aid: Assign unique id to the pipeline
634	$pipe->setPipelineId( self::$globalPipelineId++ );
635
636	return $pipe;
637	}
638
639	/**
640	* Callback called by a pipeline at the end of its processing. Returns the
641	* pipeline to the cache.
642	*
643	* @param ParserPipeline $pipe
644	*/
645	public function returnPipeline( ParserPipeline $pipe ): void {
646	$cacheKey = $pipe->getCacheKey();
647	$this->pipelineCache[$cacheKey] ??= [];
648	if ( count( $this->pipelineCache[$cacheKey] ) < 100 ) {
649	$this->pipelineCache[$cacheKey][] = $pipe;
650	}
651	}
652	}