Code Coverage for /src/src/Html2Wt/WikitextSerializer.php

	Code Coverage
	Lines			Functions and Methods				Classes and Traits
Total	0.00% covered (danger)	0.00%	0 / 682	0.00% covered (danger)	0.00%	0 / 33	CRAP	0.00% covered (danger)	0.00%	0 / 1
WikitextSerializer	0.00% covered (danger)	0.00%	0 / 682	0.00% covered (danger)	0.00%	0 / 33	77562	0.00% covered (danger)	0.00%	0 / 1
__construct	0.00% covered (danger)	0.00%	0 / 4	0.00% covered (danger)	0.00%	0 / 1	2
linkHandler	0.00% covered (danger)	0.00%	0 / 1	0.00% covered (danger)	0.00%	0 / 1	2
languageVariantHandler	0.00% covered (danger)	0.00%	0 / 1	0.00% covered (danger)	0.00%	0 / 1	2
escapeWikitext	0.00% covered (danger)	0.00%	0 / 1	0.00% covered (danger)	0.00%	0 / 1	2
domToWikitext	0.00% covered (danger)	0.00%	0 / 3	0.00% covered (danger)	0.00%	0 / 1	2
htmlToWikitext	0.00% covered (danger)	0.00%	0 / 4	0.00% covered (danger)	0.00%	0 / 1	2
getAttributeKey	0.00% covered (danger)	0.00%	0 / 8	0.00% covered (danger)	0.00%	0 / 1	20
getAttributeValue	0.00% covered (danger)	0.00%	0 / 10	0.00% covered (danger)	0.00%	0 / 1	30
getAttributeValueAsShadowInfo	0.00% covered (danger)	0.00%	0 / 9	0.00% covered (danger)	0.00%	0 / 1	6
serializedImageAttrVal	0.00% covered (danger)	0.00%	0 / 2	0.00% covered (danger)	0.00%	0 / 1	6
serializedAttrVal	0.00% covered (danger)	0.00%	0 / 1	0.00% covered (danger)	0.00%	0 / 1	2
tagNeedsEscaping	0.00% covered (danger)	0.00%	0 / 1	0.00% covered (danger)	0.00%	0 / 1	2
wrapAngleBracket	0.00% covered (danger)	0.00%	0 / 7	0.00% covered (danger)	0.00%	0 / 1	20
serializeHTMLTag	0.00% covered (danger)	0.00%	0 / 19	0.00% covered (danger)	0.00%	0 / 1	72
serializeHTMLEndTag	0.00% covered (danger)	0.00%	0 / 13	0.00% covered (danger)	0.00%	0 / 1	42
serializeAttributes	0.00% covered (danger)	0.00%	0 / 56	0.00% covered (danger)	0.00%	0 / 1	992
handleLIHackIfApplicable	0.00% covered (danger)	0.00%	0 / 7	0.00% covered (danger)	0.00%	0 / 1	42
formatStringSubst	0.00% covered (danger)	0.00%	0 / 10	0.00% covered (danger)	0.00%	0 / 1	20
createParamComparator	0.00% covered (danger)	0.00%	0 / 56	0.00% covered (danger)	0.00%	0 / 1	342
serializePart	0.00% covered (danger)	0.00%	0 / 121	0.00% covered (danger)	0.00%	0 / 1	1892
serializeFromParts	0.00% covered (danger)	0.00%	0 / 33	0.00% covered (danger)	0.00%	0 / 1	132
serializeExtensionStartTag	0.00% covered (danger)	0.00%	0 / 17	0.00% covered (danger)	0.00%	0 / 1	30
defaultExtensionHandler	0.00% covered (danger)	0.00%	0 / 18	0.00% covered (danger)	0.00%	0 / 1	20
serializeText	0.00% covered (danger)	0.00%	0 / 13	0.00% covered (danger)	0.00%	0 / 1	42
serializeTextNode	0.00% covered (danger)	0.00%	0 / 4	0.00% covered (danger)	0.00%	0 / 1	2
emitWikitext	0.00% covered (danger)	0.00%	0 / 1	0.00% covered (danger)	0.00%	0 / 1	2
serializeNodeInternal	0.00% covered (danger)	0.00%	0 / 63	0.00% covered (danger)	0.00%	0 / 1	870
serializeNode	0.00% covered (danger)	0.00%	0 / 62	0.00% covered (danger)	0.00%	0 / 1	306
stripUnnecessaryHeadingNowikis	0.00% covered (danger)	0.00%	0 / 11	0.00% covered (danger)	0.00%	0 / 1	20
stripUnnecessaryIndentPreNowikis	0.00% covered (danger)	0.00%	0 / 32	0.00% covered (danger)	0.00%	0 / 1	72
stripUnnecessaryQuoteNowikis	0.00% covered (danger)	0.00%	0 / 63	0.00% covered (danger)	0.00%	0 / 1	1482
serializeDOM	0.00% covered (danger)	0.00%	0 / 30	0.00% covered (danger)	0.00%	0 / 1	156
trace	0.00% covered (danger)	0.00%	0 / 1	0.00% covered (danger)	0.00%	0 / 1	2

1	<?php
2	declare( strict_types = 1 );
3
4	namespace Wikimedia\Parsoid\Html2Wt;
5
6	use Closure;
7	use Exception;
8	use Wikimedia\Assert\Assert;
9	use Wikimedia\Parsoid\Config\Env;
10	use Wikimedia\Parsoid\Core\InternalException;
11	use Wikimedia\Parsoid\DOM\Comment;
12	use Wikimedia\Parsoid\DOM\Document;
13	use Wikimedia\Parsoid\DOM\DocumentFragment;
14	use Wikimedia\Parsoid\DOM\Element;
15	use Wikimedia\Parsoid\DOM\Node;
16	use Wikimedia\Parsoid\DOM\Text;
17	use Wikimedia\Parsoid\Html2Wt\ConstrainedText\ConstrainedText;
18	use Wikimedia\Parsoid\Html2Wt\DOMHandlers\DOMHandler;
19	use Wikimedia\Parsoid\Html2Wt\DOMHandlers\DOMHandlerFactory;
20	use Wikimedia\Parsoid\NodeData\ParamInfo;
21	use Wikimedia\Parsoid\NodeData\TemplateInfo;
22	use Wikimedia\Parsoid\Tokens\KV;
23	use Wikimedia\Parsoid\Tokens\TagTk;
24	use Wikimedia\Parsoid\Tokens\Token;
25	use Wikimedia\Parsoid\Utils\ContentUtils;
26	use Wikimedia\Parsoid\Utils\DiffDOMUtils;
27	use Wikimedia\Parsoid\Utils\DOMCompat;
28	use Wikimedia\Parsoid\Utils\DOMDataUtils;
29	use Wikimedia\Parsoid\Utils\DOMUtils;
30	use Wikimedia\Parsoid\Utils\PHPUtils;
31	use Wikimedia\Parsoid\Utils\Title;
32	use Wikimedia\Parsoid\Utils\TokenUtils;
33	use Wikimedia\Parsoid\Utils\Utils;
34	use Wikimedia\Parsoid\Utils\WTUtils;
35	use Wikimedia\Parsoid\Wikitext\Consts;
36
37	/**
38	* Wikitext to HTML serializer.
39	* Serializes a chunk of tokens or an HTML DOM to MediaWiki's wikitext flavor.
40	*
41	* This serializer is designed to eventually
42	* - accept arbitrary HTML and
43	* - serialize that to wikitext in a way that round-trips back to the same
44	* HTML DOM as far as possible within the limitations of wikitext.
45	*
46	* Not much effort has been invested so far on supporting
47	* non-Parsoid/VE-generated HTML. Some of this involves adaptively switching
48	* between wikitext and HTML representations based on the values of attributes
49	* and DOM context. A few special cases are already handled adaptively
50	* (multi-paragraph list item contents are serialized as HTML tags for
51	* example, generic A elements are serialized to HTML A tags), but in general
52	* support for this is mostly missing.
53	*
54	* Example issue:
55	* ```
56	* <h1><p>foo</p></h1> will serialize to =\nfoo\n= whereas the
57	* correct serialized output would be: =<p>foo</p>=
58	* ```
59	*
60	* What to do about this?
61	* - add a generic 'can this HTML node be serialized to wikitext in this
62	* context' detection method and use that to adaptively switch between
63	* wikitext and HTML serialization.
64	*
65	*/
66	class WikitextSerializer {
67
68	/** @var string[] */
69	private const IGNORED_ATTRIBUTES = [
70	'data-parsoid' => true,
71	'data-ve-changed' => true,
72	'data-parsoid-changed' => true,
73	'data-parsoid-diff' => true,
74	'data-parsoid-serialize' => true,
75	DOMDataUtils::DATA_OBJECT_ATTR_NAME => true,
76	];
77
78	/** @var string[] attribute name => value regexp */
79	private const PARSOID_ATTRIBUTES = [
80	'about' => '/^#mwt\d+$/D',
81	'typeof' => '/(^\|\s)mw:\S+/',
82	];
83
84	/** @var string Regexp */
85	private const TRAILING_COMMENT_OR_WS_AFTER_NL_REGEXP
86	= '/\n(\s\|' . Utils::COMMENT_REGEXP_FRAGMENT . ')*$/D';
87
88	/** @var string Regexp */
89	private const FORMATSTRING_REGEXP =
90	'/^(\n)?(\{\{ _+)(\n? \\|\n? _+ = )(_+)(\n? \}\})(\n)?$/D';
91
92	/** @var string Regexp for testing whether nowiki added around heading-like wikitext is needed */
93	private const COMMENT_OR_WS_REGEXP = '/^(\s\|' . Utils::COMMENT_REGEXP_FRAGMENT . ')*$/D';
94
95	/** @var string Regexp for testing whether nowiki added around heading-like wikitext is needed */
96	private const HEADING_NOWIKI_REGEXP = '/^(?:' . Utils::COMMENT_REGEXP_FRAGMENT . ')*'
97	. '<nowiki>(=+[^=]+=+)<\/nowiki>(.+)$/D';
98
99	/** @var array string[] */
100	private static $separatorREs = [
101	'pureSepRE' => '/^[ \t\r\n]*$/D',
102	'sepPrefixWithNlsRE' => '/^[ \t]\n+[ \t\r\n]/',
103	'sepSuffixWithNlsRE' => '/\n[ \t\r\n]*$/D',
104	];
105
106	/** @var WikitextEscapeHandlers */
107	public $wteHandlers;
108
109	/** @var Env */
110	public $env;
111
112	/** @var SerializerState */
113	private $state;
114
115	/** @var string Log type for trace() */
116	private $logType;
117
118	/**
119	* @param Env $env
120	* @param array $options List of options for serialization:
121	* - logType: (string)
122	* - extName: (string)
123	*/
124	public function __construct( Env $env, $options ) {
125	$this->env = $env;
126	$this->logType = $options['logType'] ?? 'trace/wts';
127	$this->state = new SerializerState( $this, $options );
128	$this->wteHandlers = new WikitextEscapeHandlers( $env, $options['extName'] ?? null );
129	}
130
131	/**
132	* Main link handler.
133	* @param Element $node
134	* Used in multiple tag handlers (<a> and <link>), and hence added as top-level method
135	*/
136	public function linkHandler( Element $node ): void {
137	LinkHandlerUtils::linkHandler( $this->state, $node );
138	}
139
140	/**
141	* @param Element $node
142	*/
143	public function languageVariantHandler( Node $node ): void {
144	LanguageVariantHandler::handleLanguageVariant( $this->state, $node );
145	}
146
147	/**
148	* Escape wikitext-like strings in '$text' so that $text renders as a plain string
149	* when rendered as HTML. The escaping is done based on the context in which $text
150	* is present (ex: start-of-line, in a link, etc.)
151	*
152	* @param SerializerState $state
153	* @param string $text
154	* @param array $opts
155	* - node: (Node)
156	* - isLastChild: (bool)
157	* @return string
158	*/
159	public function escapeWikitext( SerializerState $state, string $text, array $opts ): string {
160	return $this->wteHandlers->escapeWikitext( $state, $text, $opts );
161	}
162
163	public function domToWikitext(
164	array $opts, DocumentFragment $node
165	): string {
166	$opts['logType'] = $this->logType;
167	$serializer = new WikitextSerializer( $this->env, $opts );
168	return $serializer->serializeDOM( $node );
169	}
170
171	public function htmlToWikitext( array $opts, string $html ): string {
172	$domFragment = ContentUtils::createAndLoadDocumentFragment(
173	$this->env->getTopLevelDoc(), $html, [ 'markNew' => true ]
174	);
175	return $this->domToWikitext( $opts, $domFragment );
176	}
177
178	public function getAttributeKey( Element $node, string $key ): string {
179	$tplAttrs = DOMDataUtils::getDataMw( $node )->attribs ?? [];
180	foreach ( $tplAttrs as $attr ) {
181	// If this attribute's key is generated content,
182	// serialize HTML back to generator wikitext.
183	if ( ( $attr->key['txt'] ?? null ) === $key && isset( $attr->key['html'] ) ) {
184	return $this->htmlToWikitext( [
185	'env' => $this->env,
186	'onSOL' => false,
187	], $attr->key['html'] );
188	}
189	}
190	return $key;
191	}
192
193	/**
194	* @param Element $node
195	* @param string $key Attribute name.
196	* @return ?string The wikitext value, or null if the attribute is not present.
197	*/
198	public function getAttributeValue( Element $node, string $key ): ?string {
199	$tplAttrs = DOMDataUtils::getDataMw( $node )->attribs ?? [];
200	foreach ( $tplAttrs as $attr ) {
201	// If this attribute's value is generated content,
202	// serialize HTML back to generator wikitext.
203	// PORT-FIXME: not type safe. Need documentation on attrib format.
204	if ( ( $attr->key === $key \|\| ( $attr->key['txt'] ?? null ) === $key )
205	// Only return here if the value is generated (ie. .html),
206	// it may just be in .txt form.
207	// html:"" will serialize to "" and
208	// will be returned here. This is used to suppress the =".."
209	// string in the attribute in scenarios where the template
210	// generates a "k=v" string.
211	// Ex: <div {{1x\|1=style='color:red'}}>foo</div>
212	&& isset( $attr->value['html'] )
213	) {
214	return $this->htmlToWikitext( [
215	'env' => $this->env,
216	'onSOL' => false,
217	'inAttribute' => true,
218	], $attr->value['html'] );
219	}
220	}
221	return null;
222	}
223
224	/**
225	* @param Element $node
226	* @param string $key
227	* @return array\|null A tuple in {@link WTSUtils::getShadowInfo()} format,
228	* with an extra 'fromDataMW' flag.
229	*/
230	public function getAttributeValueAsShadowInfo( Element $node, string $key ): ?array {
231	$v = $this->getAttributeValue( $node, $key );
232	if ( $v === null ) {
233	return $v;
234	}
235	return [
236	'value' => $v,
237	'modified' => false,
238	'fromsrc' => true,
239	'fromDataMW' => true,
240	];
241	}
242
243	/**
244	* @param Element $dataMWnode
245	* @param Element $htmlAttrNode
246	* @param string $key
247	* @return array A tuple in {@link WTSUtils::getShadowInfo()} format,
248	* possibly with an extra 'fromDataMW' flag.
249	*/
250	public function serializedImageAttrVal(
251	Element $dataMWnode, Element $htmlAttrNode, string $key
252	): array {
253	$v = $this->getAttributeValueAsShadowInfo( $dataMWnode, $key );
254	return $v ?: WTSUtils::getAttributeShadowInfo( $htmlAttrNode, $key );
255	}
256
257	public function serializedAttrVal( Element $node, string $name ): array {
258	return $this->serializedImageAttrVal( $node, $node, $name );
259	}
260
261	/**
262	* Check if token needs escaping
263	*
264	* @param string $name
265	* @return bool
266	*/
267	public function tagNeedsEscaping( string $name ): bool {
268	return WTUtils::isAnnOrExtTag( $this->env, $name );
269	}
270
271	public function wrapAngleBracket( Token $token, string $inner ): string {
272	if (
273	$this->tagNeedsEscaping( $token->getName() ) &&
274	!(
275	// Allow for html tags that shadow extension tags found in source
276	// to roundtrip. They only parse as html tags if they are unclosed,
277	// since extension tags bail on parsing without closing tags.
278	//
279	// This only applies when wrapAngleBracket() is being called for
280	// start tags, but we wouldn't be here if it was autoInsertedEnd
281	// anyways.
282	isset( Consts::$Sanitizer['AllowedLiteralTags'][$token->getName()] ) &&
283	!empty( $token->dataParsoid->autoInsertedEnd )
284	)
285	) {
286	return "<{$inner}>";
287	}
288	return "<$inner>";
289	}
290
291	public function serializeHTMLTag( Element $node, bool $wrapperUnmodified ): string {
292	// TODO(arlolra): As of 1.3.0, html pre is considered an extension
293	// and wrapped in encapsulation. When that version is no longer
294	// accepted for serialization, we can remove this backwards
295	// compatibility code.
296	//
297	// 'inHTMLPre' flag has to be updated always,
298	// even when we are selsering in the wrapperUnmodified case.
299	$token = WTSUtils::mkTagTk( $node );
300	if ( $token->getName() === 'pre' ) {
301	// html-syntax pre is very similar to nowiki
302	$this->state->inHTMLPre = true;
303	}
304
305	if ( $wrapperUnmodified ) {
306	$dsr = DOMDataUtils::getDataParsoid( $node )->dsr;
307	return $this->state->getOrigSrc( $dsr->openRange() ) ?? '';
308	}
309
310	$da = $token->dataParsoid;
311	if ( !empty( $da->autoInsertedStart ) ) {
312	return '';
313	}
314
315	$close = '';
316	if ( ( Utils::isVoidElement( $token->getName() ) && empty( $da->noClose ) ) \|\|
317	!empty( $da->selfClose )
318	) {
319	$close = ' /';
320	}
321
322	$sAttribs = $this->serializeAttributes( $node, $token );
323	if ( strlen( $sAttribs ) > 0 ) {
324	$sAttribs = ' ' . $sAttribs;
325	}
326
327	// srcTagName cannot be '' so, it is okay to use ?? operator
328	$tokenName = $da->srcTagName ?? $token->getName();
329	$inner = "{$tokenName}{$sAttribs}{$close}";
330	return $this->wrapAngleBracket( $token, $inner );
331	}
332
333	/**
334	* @param Element $node
335	* @param bool $wrapperUnmodified
336	* @return string
337	*/
338	public function serializeHTMLEndTag( Element $node, $wrapperUnmodified ): string {
339	if ( $wrapperUnmodified ) {
340	$dsr = DOMDataUtils::getDataParsoid( $node )->dsr;
341	return $this->state->getOrigSrc( $dsr->closeRange() ) ?? '';
342	}
343
344	$token = WTSUtils::mkEndTagTk( $node );
345	if ( $token->getName() === 'pre' ) {
346	$this->state->inHTMLPre = false;
347	}
348
349	// srcTagName cannot be '' so, it is okay to use ?? operator
350	$tokenName = $token->dataParsoid->srcTagName ?? $token->getName();
351	$ret = '';
352
353	if ( empty( $token->dataParsoid->autoInsertedEnd )
354	&& !Utils::isVoidElement( $token->getName() )
355	&& empty( $token->dataParsoid->selfClose )
356	) {
357	$ret = $this->wrapAngleBracket( $token, "/{$tokenName}" );
358	}
359
360	return $ret;
361	}
362
363	public function serializeAttributes( Element $node, Token $token, bool $isWt = false ): string {
364	$attribs = $token->attribs;
365
366	$out = [];
367	foreach ( $attribs as $kv ) {
368	// Tokens created during html2wt don't have nested tokens for keys.
369	// But, they could be integers but we want strings below.
370	$k = (string)$kv->k;
371	$v = null;
372	$vInfo = null;
373
374	// Unconditionally ignore
375	// (all of the IGNORED_ATTRIBUTES should be filtered out earlier,
376	// but ignore them here too just to make sure.)
377	if ( isset( self::IGNORED_ATTRIBUTES[$k] ) \|\| $k === 'data-mw' ) {
378	continue;
379	}
380
381	// Ignore parsoid-like ids. They may have been left behind
382	// by clients and shouldn't be serialized. This can also happen
383	// in v2/v3 API when there is no matching data-parsoid entry found
384	// for this id.
385	if ( $k === 'id' && preg_match( '/^mw[\w-]{2,}$/D', $kv->v ) ) {
386	if ( WTUtils::isNewElt( $node ) ) {
387	// Parsoid id found on element without a matching data-parsoid. Drop it!
388	} else {
389	$vInfo = $token->getAttributeShadowInfo( $k );
390	if ( !$vInfo['modified'] && $vInfo['fromsrc'] ) {
391	$out[] = $k . '=' . '"' . str_replace( '"', '"', $vInfo['value'] ) . '"';
392	}
393	}
394	continue;
395	}
396
397	// Parsoid auto-generates ids for headings and they should
398	// be stripped out, except if this is not auto-generated id.
399	if ( $k === 'id' && DOMUtils::isHeading( $node ) ) {
400	if ( !empty( DOMDataUtils::getDataParsoid( $node )->reusedId ) ) {
401	$vInfo = $token->getAttributeShadowInfo( $k );
402	// PORT-FIXME: is this safe? value could be a token or token array
403	$out[] = $k . '="' . str_replace( '"', '"', $vInfo['value'] ) . '"';
404	}
405	continue;
406	}
407
408	// Strip Parsoid-inserted class="mw-empty-elt" attributes
409	if ( $k === 'class'
410	&& isset( Consts::$Output['FlaggedEmptyElts'][DOMCompat::nodeName( $node )] )
411	) {
412	$kv->v = preg_replace( '/\bmw-empty-elt\b/', '', $kv->v, 1 );
413	if ( !$kv->v ) {
414	continue;
415	}
416	}
417
418	// Strip other Parsoid-generated values
419	//
420	// FIXME: Given that we are currently escaping about/typeof keys
421	// that show up in wikitext, we could unconditionally strip these
422	// away right now.
423	$parsoidValueRegExp = self::PARSOID_ATTRIBUTES[$k] ?? null;
424	if ( $parsoidValueRegExp && preg_match( $parsoidValueRegExp, $kv->v ) ) {
425	$v = preg_replace( $parsoidValueRegExp, '', $kv->v );
426	if ( $v ) {
427	$out[] = $k . '="' . $v . '"';
428	}
429	continue;
430	}
431
432	if ( strlen( $k ) > 0 ) {
433	$vInfo = $token->getAttributeShadowInfo( $k );
434	$v = $vInfo['value'];
435	// Deal with k/v's that were template-generated
436	$kk = $this->getAttributeKey( $node, $k );
437	// Pass in $k, not $kk since $kk can potentially
438	// be original wikitext source for 'k' rather than
439	// the string value of the key.
440	$vv = $this->getAttributeValue( $node, $k ) ?? $v;
441	// Remove encapsulation from protected attributes
442	// in pegTokenizer.pegjs:generic_newline_attribute
443	$kk = preg_replace( '/^data-x-/i', '', $kk, 1 );
444	// PORT-FIXME: is this type safe? $vv could be a ConstrainedText
445	if ( $vv !== null && strlen( $vv ) > 0 ) {
446	if ( !$vInfo['fromsrc'] && !$isWt ) {
447	// Escape wikitext entities
448	$vv = str_replace( '>', '>', Utils::escapeWtEntities( $vv ) );
449	}
450	$out[] = $kk . '="' . str_replace( '"', '"', $vv ) . '"';
451	} elseif ( preg_match( '/[{<]/', $kk ) ) {
452	// Templated, <include>, or <ext-tag> generated
453	$out[] = $kk;
454	} else {
455	$out[] = $kk . '=""';
456	}
457	continue;
458	// PORT-FIXME: is this type safe? $k->v could be a Token or Token array
459	} elseif ( strlen( $kv->v ) ) {
460	// not very likely..
461	$out[] = $kv->v;
462	}
463	}
464
465	// SSS FIXME: It can be reasonably argued that we can permanently delete
466	// dangerous and unacceptable attributes in the interest of safety/security
467	// and the resultant dirty diffs should be acceptable. But, this is
468	// something to do in the future once we have passed the initial tests
469	// of parsoid acceptance.
470	//
471	// 'a' data attribs -- look for attributes that were removed
472	// as part of sanitization and add them back
473	$dataParsoid = $token->dataParsoid;
474	if ( isset( $dataParsoid->a ) && isset( $dataParsoid->sa ) ) {
475	$aKeys = array_keys( $dataParsoid->a );
476	foreach ( $aKeys as $k ) {
477	// Attrib not present -- sanitized away!
478	if ( !KV::lookupKV( $attribs, (string)$k ) ) {
479	$v = $dataParsoid->sa[$k] ?? null;
480	// FIXME: The tokenizer and attribute shadowing currently
481	// don't make much effort towards distinguishing the use
482	// of HTML empty attribute syntax. We can derive whether
483	// empty attribute syntax was used from the attributes
484	// srcOffsets in the Sanitizer, from the key end position
485	// and value start position being different.
486	if ( $v !== null && $v !== '' ) {
487	$out[] = $k . '="' . str_replace( '"', '"', $v ) . '"';
488	} else {
489	$out[] = $k;
490	}
491	}
492	}
493	}
494	// XXX: round-trip optional whitespace / line breaks etc
495	return implode( ' ', $out );
496	}
497
498	/**
499	* FIXME: Get rid of this function after content version 2.2.0 has expired from caches.
500	*
501	* @param Element $node
502	*/
503	public function handleLIHackIfApplicable( Element $node ): void {
504	$liHackSrc = DOMDataUtils::getDataParsoid( $node )->liHackSrc ?? null;
505	$prev = DiffDOMUtils::previousNonSepSibling( $node );
506
507	// If we are dealing with an LI hack, then we must ensure that
508	// we are dealing with either
509	//
510	// 1. A node with no previous sibling inside of a list.
511	//
512	// 2. A node whose previous sibling is a list element.
513	if ( $liHackSrc !== null
514	// Case 1
515	&& ( ( $prev === null && DOMUtils::isList( $node->parentNode ) )
516	// Case 2
517	\|\| ( $prev !== null && DOMUtils::isListItem( $prev ) ) )
518	) {
519	$this->state->emitChunk( $liHackSrc, $node );
520	}
521	}
522
523	private function formatStringSubst( string $format, string $value, bool $forceTrim ): string {
524	// PORT-FIXME: JS is more agressive and removes various unicode whitespaces
525	// (most notably nbsp). Does that matter?
526	if ( $forceTrim ) {
527	$value = trim( $value );
528	}
529	return preg_replace_callback( '/_+/', static function ( $m ) use ( $value ) {
530	if ( $value === '' ) {
531	return $value;
532	}
533	$hole = $m[0];
534	$holeLen = strlen( $hole );
535	$valueLen = mb_strlen( $value );
536	return $holeLen <= $valueLen ? $value : $value . str_repeat( ' ', $holeLen - $valueLen );
537	}, $format, 1 );
538	}
539
540	/**
541	* Generates a template parameter sort function that tries to preserve existing ordering
542	* but also to follow the order prescribed by the templatedata.
543	* @param array $dpArgInfo
544	* @param ?array $tplData
545	* @param array $dataMwKeys
546	* @return Closure
547	*/
548	private function createParamComparator(
549	array $dpArgInfo, ?array $tplData, array $dataMwKeys
550	): Closure {
551	// Record order of parameters in new data-mw
552	$newOrder = [];
553	foreach ( $dataMwKeys as $i => $key ) {
554	$newOrder[$key] = [ 'order' => $i ];
555	}
556	// Record order of parameters in templatedata (if present)
557	$tplDataOrder = [];
558	$aliasMap = [];
559	$keys = [];
560	if ( $tplData && isset( $tplData['paramOrder'] ) ) {
561	foreach ( $tplData['paramOrder'] as $i => $key ) {
562	$tplDataOrder[$key] = [ 'order' => $i ];
563	$aliasMap[$key] = [ 'key' => $key, 'order' => -1 ];
564	$keys[] = $key;
565	// Aliases have the same sort order as the main name.
566	$aliases = $tplData['params'][$key]['aliases'] ?? [];
567	foreach ( $aliases as $j => $alias ) {
568	$aliasMap[$alias] = [ 'key' => $key, 'order' => $j ];
569	}
570	}
571	}
572	// Record order of parameters in original wikitext (from data-parsoid)
573	$origOrder = [];
574	foreach ( $dpArgInfo as $i => $argInfo ) {
575	$origOrder[$argInfo->k] = [ 'order' => $i, 'dist' => 0 ];
576	}
577	// Canonical parameter key gets the same order as an alias parameter
578	// found in the original wikitext.
579	foreach ( $dpArgInfo as $i => $argInfo ) {
580	$canon = $aliasMap[$argInfo->k] ?? null;
581	if ( $canon !== null && !array_key_exists( $canon['key'], $origOrder ) ) {
582	$origOrder[$canon['key']] = $origOrder[$argInfo->k];
583	}
584	}
585	// Find the closest "original parameter" for each templatedata parameter,
586	// so that newly-added parameters are placed near the parameters which
587	// templatedata says they should be adjacent to.
588	$nearestOrder = $origOrder;
589	$reduceF = static function ( $acc, $val ) use ( &$origOrder, &$nearestOrder ) {
590	if ( isset( $origOrder[$val] ) ) {
591	$acc = $origOrder[$val];
592	}
593	if ( !( isset( $nearestOrder[$val] ) && $nearestOrder[$val]['dist'] < $acc['dist'] ) ) {
594	$nearestOrder[$val] = $acc;
595	}
596	return [ 'order' => $acc['order'], 'dist' => $acc['dist'] + 1 ];
597	};
598	// Find closest original parameter before the key.
599	// @phan-suppress-next-line PhanPluginUseReturnValueInternalKnown
600	array_reduce( $keys, $reduceF, [ 'order' => -1, 'dist' => 2 * count( $keys ) ] );
601	// Find closest original parameter after the key.
602	// @phan-suppress-next-line PhanPluginUseReturnValueInternalKnown
603	array_reduce( array_reverse( $keys ), $reduceF,
604	[ 'order' => count( $origOrder ), 'dist' => count( $keys ) ] );
605
606	// Helper function to return a large number if the given key isn't
607	// in the sort order map
608	$big = max( count( $nearestOrder ), count( $newOrder ) );
609	$defaultGet = static function ( $map, $key1, $key2 = null ) use ( &$big ) {
610	$key = ( !$key2 \|\| isset( $map[$key1] ) ) ? $key1 : $key2;
611	return $map[$key]['order'] ?? $big;
612	};
613
614	return static function ( $a, $b ) use (
615	&$aliasMap, &$defaultGet, &$nearestOrder, &$tplDataOrder, &$newOrder
616	) {
617	$aCanon = $aliasMap[$a] ?? [ 'key' => $a, 'order' => -1 ];
618	$bCanon = $aliasMap[$b] ?? [ 'key' => $b, 'order' => -1 ];
619	// primary key is `nearestOrder` (nearest original parameter)
620	$aOrder = $defaultGet( $nearestOrder, $a, $aCanon['key'] );
621	$bOrder = $defaultGet( $nearestOrder, $b, $bCanon['key'] );
622	if ( $aOrder !== $bOrder ) {
623	return $aOrder - $bOrder;
624	}
625	// secondary key is templatedata order
626	if ( $aCanon['key'] === $bCanon['key'] ) {
627	return $aCanon['order'] - $bCanon['order'];
628	}
629	$aOrder = $defaultGet( $tplDataOrder, $aCanon['key'] );
630	$bOrder = $defaultGet( $tplDataOrder, $bCanon['key'] );
631	if ( $aOrder !== $bOrder ) {
632	return $aOrder - $bOrder;
633	}
634	// tertiary key is original input order (makes sort stable)
635	$aOrder = $defaultGet( $newOrder, $a );
636	$bOrder = $defaultGet( $newOrder, $b );
637	return $aOrder - $bOrder;
638	};
639	}
640
641	/**
642	* Serialize part of a templatelike expression.
643	* @param SerializerState $state
644	* @param string $buf
645	* @param Element $node
646	* @param TemplateInfo $part The expression fragment to serialize. See $srcParts
647	* in serializeFromParts() for format.
648	* @param ?array $tplData Templatedata, see
649	* https://github.com/wikimedia/mediawiki-extensions-TemplateData/blob/master/Specification.md
650	* @param string\|TemplateInfo $prevPart Previous part. See $srcParts in serializeFromParts().
651	* @param string\|TemplateInfo $nextPart Next part. See $srcParts in serializeFromParts().
652	* @return string
653	*/
654	private function serializePart(
655	SerializerState $state, string $buf, Element $node, TemplateInfo $part,
656	?array $tplData, $prevPart, $nextPart
657	): string {
658	// Parse custom format specification, if present.
659	$defaultBlockSpc = "{{_\n\| _ = _\n}}"; // "block"
660	$defaultInlineSpc = '{{_\|_=_}}'; // "inline"
661
662	$format = isset( $tplData['format'] ) ? strtolower( $tplData['format'] ) : null;
663	if ( $format === 'block' ) {
664	$format = $defaultBlockSpc;
665	} elseif ( $format === 'inline' ) {
666	$format = $defaultInlineSpc;
667	}
668	// Check format string for validity.
669	preg_match( self::FORMATSTRING_REGEXP, $format ?? '', $parsedFormat );
670	if ( !$parsedFormat ) {
671	preg_match( self::FORMATSTRING_REGEXP, $defaultInlineSpc, $parsedFormat );
672	$format = null; // Indicates that no valid custom format was present.
673	}
674	$formatSOL = $parsedFormat[1] ?? '';
675	$formatStart = $parsedFormat[2] ?? '';
676	$formatParamName = $parsedFormat[3] ?? '';
677	$formatParamValue = $parsedFormat[4] ?? '';
678	$formatEnd = $parsedFormat[5] ?? '';
679	$formatEOL = $parsedFormat[6] ?? '';
680	$forceTrim = ( $format !== null ) \|\| WTUtils::isNewElt( $node );
681
682	// Shoehorn formatting of top-level templatearg wikitext into this code.
683	if ( $part->type === 'templatearg' ) {
684	$formatStart = preg_replace( '/{{/', '{{{', $formatStart, 1 );
685	$formatEnd = preg_replace( '/}}/', '}}}', $formatEnd, 1 );
686	}
687
688	// handle SOL newline requirement
689	if ( $formatSOL && !str_ends_with( ( $prevPart !== null ) ? $buf : ( $state->sep->src ?? '' ), "\n" ) ) {
690	$buf .= "\n";
691	}
692
693	// open the transclusion
694	$buf .= $this->formatStringSubst( $formatStart, $part->targetWt, $forceTrim );
695
696	// Short-circuit transclusions without params
697	$paramKeys = array_map( fn ( ParamInfo $pi ) => $pi->k, $part->paramInfos );
698	if ( !$paramKeys ) {
699	if ( substr( $formatEnd, 0, 1 ) === "\n" ) {
700	$formatEnd = substr( $formatEnd, 1 );
701	}
702	return $buf . $formatEnd;
703	}
704
705	// Trim whitespace from data-mw keys to deal with non-compliant
706	// clients. Make sure param info is accessible for the stripped key
707	// since later code will be using the stripped key always.
708	$tplKeysFromDataMw = [];
709	foreach ( $part->paramInfos as $pi ) {
710	$strippedKey = trim( $pi->k );
711	$tplKeysFromDataMw[$strippedKey] = $pi;
712	}
713
714	// Per-parameter info from data-parsoid for pre-existing parameters
715	$dp = DOMDataUtils::getDataParsoid( $node );
716	// Account for clients not setting the `i`, see T238721
717	$dpArgInfo = isset( $part->i ) ? ( $dp->pi[$part->i] ?? [] ) : [];
718
719	// Build a key -> arg info map
720	$dpArgInfoMap = [];
721	foreach ( $dpArgInfo as $info ) {
722	$dpArgInfoMap[$info->k] = $info;
723	}
724
725	// 1. Process all parameters and build a map of
726	// arg-name -> [serializeAsNamed, name, value]
727	//
728	// 2. Serialize tpl args in required order
729	//
730	// 3. Format them according to formatParamName/formatParamValue
731
732	$kvMap = [];
733	foreach ( $tplKeysFromDataMw as $key => $param ) {
734	// Storing keys in an array can turn them into ints; stringify.
735	$key = (string)$key;
736	$argInfo = $dpArgInfoMap[$key] ?? [];
737
738	// TODO: Other formats?
739	// Only consider the html parameter if the wikitext one
740	// isn't present at all. If it's present but empty,
741	// that's still considered a valid parameter.
742	if ( $param->valueWt !== null ) {
743	$value = $param->valueWt;
744	} elseif ( $param->html !== null ) {
745	$value = $this->htmlToWikitext( [ 'env' => $this->env ], $param->html );
746	} else {
747	$this->env->log(
748	'error',
749	"params in data-mw part is missing wt/html for $key. " .
750	"Serializing as empty string.",
751	"data-mw part: " . json_encode( $part->toJsonArray() )
752	);
753	$value = "";
754	}
755
756	Assert::invariant( is_string( $value ), "For param: $key, wt property should be a string '
757	. 'but got: $value" );
758
759	$serializeAsNamed = !empty( $argInfo->named );
760
761	// The name is usually equal to the parameter key, but
762	// if there's a key->wt attribute, use that.
763	$name = null;
764	if ( $param->keyWt !== null ) {
765	$name = $param->keyWt;
766	// And make it appear even if there wasn't any data-parsoid information.
767	$serializeAsNamed = true;
768	} else {
769	$name = $key;
770	}
771
772	// Use 'k' as the key, not 'name'.
773	//
774	// The normalized form of 'k' is used as the key in both
775	// data-parsoid and data-mw. The full non-normalized form
776	// is present in '$param->keyWt'
777	$kvMap[$key] = [ 'serializeAsNamed' => $serializeAsNamed, 'name' => $name, 'value' => $value ];
778	}
779
780	$argOrder = array_keys( $kvMap );
781	usort( $argOrder, $this->createParamComparator( $dpArgInfo, $tplData, $argOrder ) );
782
783	$argIndex = 1;
784	$numericIndex = 1;
785
786	$numPositionalArgs = 0;
787	foreach ( $dpArgInfo as $pi ) {
788	if ( isset( $tplKeysFromDataMw[trim( $pi->k )] ) && empty( $pi->named ) ) {
789	$numPositionalArgs++;
790	}
791	}
792
793	$argBuf = [];
794	foreach ( $argOrder as $param ) {
795	$kv = $kvMap[$param];
796	// Add nowiki escapes for the arg value, as required
797	$escapedValue = $this->wteHandlers->escapeTplArgWT( $kv['value'], [
798	'serializeAsNamed' => $kv['serializeAsNamed'] \|\| $param !== $numericIndex,
799	'type' => $part->type,
800	'argPositionalIndex' => $numericIndex,
801	'numPositionalArgs' => $numPositionalArgs,
802	'argIndex' => $argIndex++,
803	'numArgs' => count( $tplKeysFromDataMw ),
804	] );
805	if ( $escapedValue['serializeAsNamed'] ) {
806	// WS trimming for values of named args
807	$argBuf[] = [ 'dpKey' => $param, 'name' => $kv['name'], 'value' => trim( $escapedValue['v'] ) ];
808	} else {
809	$numericIndex++;
810	// No WS trimming for positional args
811	$argBuf[] = [ 'dpKey' => $param, 'name' => null, 'value' => $escapedValue['v'] ];
812	}
813	}
814
815	// If no explicit format is provided, default format is:
816	// - 'inline' for new args
817	// - whatever format is available from data-parsoid for old args
818	// (aka, overriding formatParamName/formatParamValue)
819	//
820	// If an unedited node OR if paramFormat is unspecified,
821	// this strategy prevents unnecessary normalization
822	// of edited transclusions which don't have valid
823	// templatedata formatting information.
824
825	// "magic case": If the format string ends with a newline, an extra newline is added
826	// between the template name and the first parameter.
827
828	foreach ( $argBuf as $arg ) {
829	$name = $arg['name'];
830	$val = $arg['value'];
831	if ( $name === null ) {
832	// We are serializing a positional parameter.
833	// Whitespace is significant for these and
834	// formatting would change semantics.
835	$name = '';
836	$modFormatParamName = '\|_';
837	$modFormatParamValue = '_';
838	} elseif ( $name === '' ) {
839	// No spacing for blank parameters ({{foo\|=bar}})
840	// This should be an edge case and probably only for
841	// inline-formatted templates, but we are consciously
842	// forcing this default here. Can revisit if this is
843	// ever a problem.
844	$modFormatParamName = '\|_=';
845	$modFormatParamValue = '_';
846	} else {
847	// Preserve existing spacing, esp if there was a comment
848	// embedded in it. Otherwise, follow TemplateData's lead.
849	// NOTE: In either case, we are forcibly normalizing
850	// non-block-formatted transclusions into block formats
851	// by adding missing newlines.
852	$spc = $dpArgInfoMap[$arg['dpKey']]->spc ?? null;
853	if ( $spc && ( !$format \|\| preg_match( Utils::COMMENT_REGEXP, $spc[3] ?? '' ) ) ) {
854	$nl = ( substr( $formatParamName, 0, 1 ) === "\n" ) ? "\n" : '';
855	$modFormatParamName = $nl . '\|' . $spc[0] . '_' . $spc[1] . '=' . $spc[2];
856	$modFormatParamValue = '_' . $spc[3];
857	} else {
858	$modFormatParamName = $formatParamName;
859	$modFormatParamValue = $formatParamValue;
860	}
861	}
862
863	// Don't create duplicate newlines.
864	$trailing = preg_match( self::TRAILING_COMMENT_OR_WS_AFTER_NL_REGEXP, $buf );
865	if ( $trailing && substr( $formatParamName, 0, 1 ) === "\n" ) {
866	$modFormatParamName = substr( $formatParamName, 1 );
867	}
868
869	$buf .= $this->formatStringSubst( $modFormatParamName, $name, $forceTrim );
870	$buf .= $this->formatStringSubst( $modFormatParamValue, $val, $forceTrim );
871	}
872
873	// Don't create duplicate newlines.
874	if ( preg_match( self::TRAILING_COMMENT_OR_WS_AFTER_NL_REGEXP, $buf )
875	&& substr( $formatEnd, 0, 1 ) === "\n"
876	) {
877	$buf .= substr( $formatEnd, 1 );
878	} else {
879	$buf .= $formatEnd;
880	}
881
882	if ( $formatEOL ) {
883	if ( $nextPart === null ) {
884	// This is the last part of the block. Add the \n only
885	// if the next non-comment node is not a text node
886	// of if the text node doesn't have a leading \n.
887	$next = DiffDOMUtils::nextNonDeletedSibling( $node );
888	while ( $next instanceof Comment ) {
889	$next = DiffDOMUtils::nextNonDeletedSibling( $next );
890	}
891	if ( !( $next instanceof Text ) \|\| substr( $next->nodeValue, 0, 1 ) !== "\n" ) {
892	$buf .= "\n";
893	}
894	} elseif ( !is_string( $nextPart ) \|\| substr( $nextPart, 0, 1 ) !== "\n" ) {
895	// If nextPart is another template, and it wants a leading nl,
896	// this \n we add here will count towards that because of the
897	// formatSOL check at the top.
898	$buf .= "\n";
899	}
900	}
901
902	return $buf;
903	}
904
905	/**
906	* Serialize a template from its parts.
907	* @param SerializerState $state
908	* @param Element $node
909	* @param list<string\|TemplateInfo> $srcParts Template parts
910	* @return string
911	*/
912	public function serializeFromParts(
913	SerializerState $state, Element $node, array $srcParts
914	): string {
915	$useTplData = WTUtils::isNewElt( $node ) \|\| DiffUtils::hasDiffMarkers( $node );
916	$buf = '';
917	foreach ( $srcParts as $i => $part ) {
918	if ( is_string( $part ) ) {
919	$buf .= $part;
920	continue;
921	}
922
923	$prevPart = $srcParts[$i - 1] ?? null;
924	$nextPart = $srcParts[$i + 1] ?? null;
925
926	if ( !isset( $part->targetWt ) ) {
927	// Maybe we should just raise a ClientError
928	$this->env->log( 'error', 'data-mw.parts array is malformed: ',
929	DOMCompat::getOuterHTML( $node ), PHPUtils::jsonEncode( $srcParts ) );
930	continue;
931	}
932
933	// Account for clients leaving off the params array, presumably when empty.
934	// See T291741
935	$part->paramInfos ??= [];
936
937	if ( $part->type === 'templatearg' ) {
938	$buf = $this->serializePart(
939	$state, $buf, $node, $part, null, $prevPart,
940	$nextPart
941	);
942	continue;
943	}
944
945	// transclusion: tpl or parser function?
946	// templates have $part->href
947	// parser functions have $part->func
948
949	// While the API supports fetching multiple template data objects in one call,
950	// we will fetch one at a time to benefit from cached responses.
951	//
952	// Fetch template data for the template
953	$tplData = null;
954	$apiResp = null;
955	if ( isset( $part->href ) && $useTplData ) {
956	// Not a parser function
957	try {
958	$title = Title::newFromText(
959	PHPUtils::stripPrefix( Utils::decodeURIComponent( $part->href ), './' ),
960	$this->env->getSiteConfig()
961	);
962	$tplData = $this->env->getDataAccess()->fetchTemplateData( $this->env->getPageConfig(), $title );
963	} catch ( Exception $err ) {
964	// Log the error, and use default serialization mode.
965	// Better to misformat a transclusion than to lose an edit.
966	$this->env->log( 'error/html2wt/tpldata', $err );
967	}
968	}
969	// If the template doesn't exist, or does but has no TemplateData, ignore it
970	if ( !empty( $tplData['missing'] ) \|\| !empty( $tplData['notemplatedata'] ) ) {
971	$tplData = null;
972	}
973	$buf = $this->serializePart( $state, $buf, $node, $part, $tplData, $prevPart, $nextPart );
974	}
975	return $buf;
976	}
977
978	public function serializeExtensionStartTag( Element $node, SerializerState $state ): string {
979	$dataMw = DOMDataUtils::getDataMw( $node );
980	$extTagName = $dataMw->name;
981
982	// Serialize extension attributes in normalized form as:
983	// key='value'
984	// FIXME: with no dataParsoid, shadow info will mark it as new
985	$attrs = (array)( $dataMw->attrs ?? [] );
986	$extTok = new TagTk( $extTagName, array_map( static function ( $key ) use ( $attrs ) {
987	return new KV( $key, $attrs[$key] );
988	}, array_keys( $attrs ) ) );
989
990	$about = DOMCompat::getAttribute( $node, 'about' );
991	if ( $about !== null ) {
992	$extTok->addAttribute( 'about', $about );
993	}
994	$typeof = DOMCompat::getAttribute( $node, 'typeof' );
995	if ( $typeof !== null ) {
996	$extTok->addAttribute( 'typeof', $typeof );
997	}
998
999	$attrStr = $this->serializeAttributes( $node, $extTok );
1000	$src = '<' . $extTagName;
1001	if ( $attrStr ) {
1002	$src .= ' ' . $attrStr;
1003	}
1004	return $src . ( !empty( $dataMw->body ) ? '>' : ' />' );
1005	}
1006
1007	public function defaultExtensionHandler( Element $node, SerializerState $state ): string {
1008	$dp = DOMDataUtils::getDataParsoid( $node );
1009	$dataMw = DOMDataUtils::getDataMw( $node );
1010	$src = $this->serializeExtensionStartTag( $node, $state );
1011	if ( !isset( $dataMw->body ) ) {
1012	return $src; // We self-closed this already.
1013	} elseif ( is_string( $dataMw->body->extsrc ?? null ) ) {
1014	$src .= $dataMw->body->extsrc;
1015	} elseif ( isset( $dp->src ) ) {
1016	$this->env->log(
1017	'error/html2wt/ext',
1018	'Extension data-mw missing for: ' . DOMCompat::getOuterHTML( $node )
1019	);
1020	return $dp->src;
1021	} else {
1022	$this->env->log(
1023	'error/html2wt/ext',
1024	'Extension src unavailable for: ' . DOMCompat::getOuterHTML( $node )
1025	);
1026	}
1027	return $src . '</' . $dataMw->name . '>';
1028	}
1029
1030	/**
1031	* Consolidate separator handling when emitting text.
1032	* @param string $res
1033	* @param Node $node
1034	*/
1035	private function serializeText( string $res, Node $node ): void {
1036	$state = $this->state;
1037
1038	// Deal with trailing separator-like text (at least 1 newline and other whitespace)
1039	preg_match( self::$separatorREs['sepSuffixWithNlsRE'], $res, $newSepMatch );
1040	$res = preg_replace( self::$separatorREs['sepSuffixWithNlsRE'], '', $res, 1 );
1041
1042	if ( !$state->inIndentPre ) {
1043	// Strip leading newlines and other whitespace
1044	if ( preg_match( self::$separatorREs['sepPrefixWithNlsRE'], $res, $match ) ) {
1045	$state->appendSep( $match[0] );
1046	$res = substr( $res, strlen( $match[0] ) );
1047	}
1048	}
1049
1050	if ( $state->needsEscaping ) {
1051	$res = Utils::escapeWtEntities( $res );
1052	}
1053	$state->emitChunk( $res, $node );
1054
1055	// Move trailing newlines into the next separator
1056	if ( $newSepMatch ) {
1057	if ( !$state->sep->src ) {
1058	$state->appendSep( $newSepMatch[0] );
1059	} else {
1060	/* SSS FIXME: what are we doing with the stripped NLs?? */
1061	}
1062	}
1063	}
1064
1065	/**
1066	* Serialize the content of a text node
1067	* @param Node $node
1068	* @return Node\|null
1069	*/
1070	private function serializeTextNode( Node $node ): ?Node {
1071	$this->state->needsEscaping = true;
1072	$this->serializeText( $node->nodeValue, $node );
1073	$this->state->needsEscaping = false;
1074	return $node->nextSibling;
1075	}
1076
1077	/**
1078	* Emit non-separator wikitext that does not need to be escaped.
1079	* @param string $res
1080	* @param Node $node
1081	*/
1082	public function emitWikitext( string $res, Node $node ): void {
1083	$this->serializeText( $res, $node );
1084	}
1085
1086	/**
1087	* DOM-based serialization
1088	* @param Element $node
1089	* @param DOMHandler $domHandler
1090	* @return Node\|null
1091	*/
1092	private function serializeNodeInternal( Element $node, DOMHandler $domHandler ) {
1093	// To serialize a node from source, the node should satisfy these
1094	// conditions:
1095	//
1096	// 1. It should not have a diff marker or be in a modified subtree
1097	// WTS should not be in a subtree with a modification flag that
1098	// applies to every node of a subtree (rather than an indication
1099	// that some node in the subtree is modified).
1100	//
1101	// 2. It should continue to be valid in any surrounding edited context
1102	// For some nodes, modification of surrounding context
1103	// can change serialized output of this node
1104	// (ex: <td>s and whether you emit \| or \|\| for them)
1105	//
1106	// 3. It should have valid, usable DSR
1107	//
1108	// 4. Either it has non-zero positive DSR width, or meets one of the
1109	// following:
1110	//
1111	// 4a. It is content like <p><br/><p> or an automatically-inserted
1112	// wikitext <references/> (HTML <ol>) (will have dsr-width 0)
1113	// 4b. it is fostered content (will have dsr-width 0)
1114	// 4c. it is misnested content (will have dsr-width 0)
1115	//
1116	// SSS FIXME: Additionally, we can guard against buggy DSR with
1117	// some validity checks. We can test that non-sep src content
1118	// leading wikitext markup corresponds to the node type.
1119	//
1120	// Ex: If node.nodeName is 'UL', then src[0] should be '*'
1121	//
1122	// TO BE DONE
1123
1124	$state = $this->state;
1125	$wrapperUnmodified = false;
1126	$dp = DOMDataUtils::getDataParsoid( $node );
1127
1128	if ( $state->selserMode
1129	&& !$state->inInsertedContent
1130	&& WTSUtils::origSrcValidInEditedContext( $state, $node )
1131	&& Utils::isValidDSR( $dp->dsr ?? null )
1132	&& ( $dp->dsr->end > $dp->dsr->start
1133	// FIXME: <p><br/></p>
1134	// nodes that have dsr width 0 because currently,
1135	// we emit newlines outside the p-nodes. So, this check
1136	// tries to handle that scenario.
1137	\|\| (
1138	$dp->dsr->end === $dp->dsr->start && (
1139	in_array( DOMCompat::nodeName( $node ), [ 'p', 'br' ], true )
1140	\|\| !empty( DOMDataUtils::getDataMw( $node )->autoGenerated )
1141	// FIXME: This is only necessary while outputContentVersion
1142	// 2.1.2 - 2.2.0 are still valid
1143	\|\| DOMUtils::hasTypeOf( $node, 'mw:Placeholder/StrippedTag' )
1144	)
1145	)
1146	\|\| !empty( $dp->fostered )
1147	\|\| !empty( $dp->misnested )
1148	)
1149	) {
1150	if ( !DiffUtils::hasDiffMarkers( $node ) ) {
1151	// If this HTML node will disappear in wikitext because of
1152	// zero width, then the separator constraints will carry over
1153	// to the node's children.
1154	//
1155	// Since we dont recurse into 'node' in selser mode, we update the
1156	// separator constraintInfo to apply to 'node' and its first child.
1157	//
1158	// We could clear constraintInfo altogether which would be
1159	// correct (but could normalize separators and introduce dirty
1160	// diffs unnecessarily).
1161
1162	$state->currNodeUnmodified = true;
1163
1164	if ( WTUtils::isZeroWidthWikitextElt( $node )
1165	&& $node->hasChildNodes()
1166	&& ( $state->sep->constraints['constraintInfo']['sepType'] ?? null ) === 'sibling'
1167	) {
1168	$state->sep->constraints['constraintInfo']['onSOL'] = $state->onSOL;
1169	$state->sep->constraints['constraintInfo']['sepType'] = 'parent-child';
1170	$state->sep->constraints['constraintInfo']['nodeA'] = $node;
1171	$state->sep->constraints['constraintInfo']['nodeB'] = $node->firstChild;
1172	}
1173
1174	$out = $state->getOrigSrc( $dp->dsr ) ?? '';
1175
1176	$this->trace( 'ORIG-src with DSR', static function () use ( $dp, $out ) {
1177	return '[' . $dp->dsr->start . ',' . $dp->dsr->end . '] = '
1178	. PHPUtils::jsonEncode( $out );
1179	} );
1180
1181	// When reusing source, we should only suppress serializing
1182	// to a single line for the cases we've allowed in normal serialization.
1183	// <a> tags might look surprising here, but, here is the rationale.
1184	// If some link syntax (wikilink, extlink, etc.) accepted a newline
1185	// originally, we can safely let it through here. There is no need to have
1186	// specific checks for wikilnks / extlinks / ... etc. The only concern is
1187	// if the surrounding context in which this link-syntax is embedded also
1188	// breaks the link syntax. There is no such syntax right now.
1189	// FIXME: Note the limitation here, that if these nodes are nested
1190	// in something as trivial as an i / b, the suppression won't happen
1191	// and we'll dirty the text.
1192	$suppressSLC = WTUtils::isFirstEncapsulationWrapperNode( $node )
1193	\|\| DOMUtils::hasTypeOf( $node, 'mw:Nowiki' )
1194	\|\| in_array( DOMCompat::nodeName( $node ), [ 'dl', 'ul', 'ol', 'a' ], true )
1195	\|\| ( DOMCompat::nodeName( $node ) === 'table'
1196	&& DOMCompat::nodeName( $node->parentNode ) === 'dd'
1197	&& DiffDOMUtils::previousNonSepSibling( $node ) === null );
1198
1199	// Use selser to serialize this text! The original
1200	// wikitext is `out`. But first allow
1201	// `ConstrainedText.fromSelSer` to figure out the right
1202	// type of ConstrainedText chunk(s) to use to represent
1203	// `out`, based on the node type. Since we might actually
1204	// have to break this wikitext into multiple chunks,
1205	// `fromSelSer` returns an array.
1206	if ( $suppressSLC ) {
1207	$state->singleLineContext->disable();
1208	}
1209	foreach ( ConstrainedText::fromSelSer( $out, $node, $dp, $this->env ) as $ct ) {
1210	$state->emitChunk( $ct, $ct->node );
1211	}
1212	if ( $suppressSLC ) {
1213	$state->singleLineContext->pop();
1214	}
1215
1216	// Skip over encapsulated content since it has already been
1217	// serialized.
1218	if ( WTUtils::isFirstEncapsulationWrapperNode( $node ) ) {
1219	return WTUtils::skipOverEncapsulatedContent( $node );
1220	} else {
1221	return $node->nextSibling;
1222	}
1223	}
1224
1225	$wrapperUnmodified = DiffUtils::onlySubtreeChanged( $node ) &&
1226	WTSUtils::hasValidTagWidths( $dp->dsr ?? null );
1227	}
1228
1229	$state->currNodeUnmodified = false;
1230
1231	$currentInsertedState = $state->inInsertedContent;
1232
1233	$inInsertedContent = $state->selserMode && DiffUtils::hasInsertedDiffMark( $node );
1234
1235	if ( $inInsertedContent ) {
1236	$state->inInsertedContent = true;
1237	}
1238
1239	$next = $domHandler->handle( $node, $state, $wrapperUnmodified );
1240
1241	if ( $inInsertedContent ) {
1242	$state->inInsertedContent = $currentInsertedState;
1243	}
1244
1245	return $next;
1246	}
1247
1248	/**
1249	* Internal worker. Recursively serialize a DOM subtree.
1250	* @private
1251	* @param Node $node
1252	* @return ?Node
1253	*/
1254	public function serializeNode( Node $node ): ?Node {
1255	$nodeName = DOMCompat::nodeName( $node );
1256	$domHandler = $method = null;
1257	$domHandlerFactory = new DOMHandlerFactory();
1258	$state = $this->state;
1259	$state->currNode = $node;
1260
1261	if ( $state->selserMode ) {
1262	$this->trace(
1263	static function () use ( $node ) {
1264	return WTSUtils::traceNodeName( $node );
1265	},
1266	'; prev-unmodified: ', $state->prevNodeUnmodified,
1267	'; SOL: ', $state->onSOL );
1268	} else {
1269	$this->trace(
1270	static function () use ( $node ) {
1271	return WTSUtils::traceNodeName( $node );
1272	},
1273	'; SOL: ', $state->onSOL );
1274	}
1275
1276	switch ( $node->nodeType ) {
1277	case XML_ELEMENT_NODE:
1278	'@phan-var Element $node';/** @var Element $node */
1279	// Ignore DiffMarker metas, but clear unmodified node state
1280	if ( DiffUtils::isDiffMarker( $node ) ) {
1281	$state->updateModificationFlags( $node );
1282	// `state.sep.lastSourceNode` is cleared here so that removed
1283	// separators between otherwise unmodified nodes don't get
1284	// restored.
1285	$state->updateSep( $node );
1286	return $node->nextSibling;
1287	}
1288	$domHandler = $domHandlerFactory->getDOMHandler( $node );
1289	$method = [ $this, 'serializeNodeInternal' ];
1290	break;
1291	case XML_TEXT_NODE:
1292	// This code assumes that the DOM is in normalized form with no
1293	// run of text nodes.
1294	// Accumulate whitespace from the text node into state.sep.src
1295	$text = $node->nodeValue;
1296	if ( !$state->inIndentPre
1297	// PORT-FIXME: original uses this->state->serializer->separatorREs
1298	// but that does not seem useful
1299	&& preg_match( self::$separatorREs['pureSepRE'], $text )
1300	) {
1301	$state->appendSep( $text );
1302	return $node->nextSibling;
1303	}
1304	if ( $state->selserMode ) {
1305	$prev = $node->previousSibling;
1306	if ( !$state->inInsertedContent && (
1307	( !$prev && DOMUtils::atTheTop( $node->parentNode ) ) \|\|
1308	( $prev && !DiffUtils::isDiffMarker( $prev ) )
1309	) ) {
1310	$state->currNodeUnmodified = true;
1311	} else {
1312	$state->currNodeUnmodified = false;
1313	}
1314	}
1315
1316	$domHandler = new DOMHandler( false );
1317	$method = [ $this, 'serializeTextNode' ];
1318	break;
1319	case XML_COMMENT_NODE:
1320	// Merge this into separators
1321	$state->appendSep( WTSUtils::commentWT( $node->nodeValue ) );
1322	return $node->nextSibling;
1323	default:
1324	throw new InternalException( 'Unhandled node type: ' . $node->nodeType );
1325	}
1326
1327	$prev = DiffDOMUtils::previousNonSepSibling( $node ) ?: $node->parentNode;
1328	$this->env->log( 'debug/wts', 'Before constraints for ' . $nodeName );
1329	$state->separators->updateSeparatorConstraints(
1330	$prev, $domHandlerFactory->getDOMHandler( $prev ),
1331	$node, $domHandler
1332	);
1333
1334	$this->env->log( 'debug/wts', 'Calling serialization handler for ' . $nodeName );
1335	$nextNode = call_user_func( $method, $node, $domHandler );
1336
1337	$next = DiffDOMUtils::nextNonSepSibling( $node ) ?: $node->parentNode;
1338	$this->env->log( 'debug/wts', 'After constraints for ' . $nodeName );
1339	$state->separators->updateSeparatorConstraints(
1340	$node, $domHandler,
1341	$next, $domHandlerFactory->getDOMHandler( $next )
1342	);
1343
1344	// Update modification flags
1345	$state->updateModificationFlags( $node );
1346
1347	return $nextNode;
1348	}
1349
1350	private function stripUnnecessaryHeadingNowikis( string $line ): string {
1351	$state = $this->state;
1352	if ( !$state->hasHeadingEscapes ) {
1353	return $line;
1354	}
1355
1356	$escaper = static function ( string $wt ) use ( $state ) {
1357	$ret = $state->serializer->wteHandlers->escapedText( $state, false, $wt, false, true );
1358	return $ret;
1359	};
1360
1361	preg_match( self::HEADING_NOWIKI_REGEXP, $line, $match );
1362	if ( $match && !preg_match( self::COMMENT_OR_WS_REGEXP, $match[2] ) ) {
1363	// The nowikiing was spurious since the trailing = is not in EOL position
1364	return $escaper( $match[1] ) . $match[2];
1365	} else {
1366	// All is good.
1367	return $line;
1368	}
1369	}
1370
1371	private function stripUnnecessaryIndentPreNowikis(): void {
1372	// FIXME: The solTransparentWikitextRegexp includes redirects, which really
1373	// only belong at the SOF and should be unique. See the "New redirect" test.
1374	$noWikiRegexp = '@^'
1375	. PHPUtils::reStrip( $this->env->getSiteConfig()->solTransparentWikitextNoWsRegexp(), '@' )
1376	. '((?i:<nowiki>\s+</nowiki>))([^\n]*(?:\n\|$))' . '@Dm';
1377	$pieces = preg_split( $noWikiRegexp, $this->state->out, -1, PREG_SPLIT_DELIM_CAPTURE );
1378	$out = $pieces[0];
1379	for ( $i = 1; $i < count( $pieces ); $i += 4 ) {
1380	$out .= $pieces[$i];
1381	$nowiki = $pieces[$i + 1];
1382	$rest = $pieces[$i + 2];
1383	// Ignore comments
1384	preg_match_all( '/<[^!][^<>]*>/', $rest, $htmlTags );
1385
1386	// Not required if just sol transparent wt.
1387	$reqd = !preg_match( $this->env->getSiteConfig()->solTransparentWikitextRegexp(), $rest );
1388
1389	if ( $reqd ) {
1390	foreach ( $htmlTags[0] as $j => $rawTagName ) {
1391	// Strip </, attributes, and > to get the tagname
1392	$tagName = preg_replace( '/<\/?\|\s.*\|>/', '', $rawTagName );
1393	if ( !isset( Consts::$HTML['HTML5Tags'][$tagName] ) ) {
1394	// If we encounter any tag that is not a html5 tag,
1395	// it could be an extension tag. We could do a more complex
1396	// regexp or tokenize the string to determine if any block tags
1397	// show up outside the extension tag. But, for now, we just
1398	// conservatively bail and leave the nowiki as is.
1399	$reqd = true;
1400	break;
1401	} elseif ( TokenUtils::isWikitextBlockTag( $tagName ) ) {
1402	// FIXME: Extension tags shadowing html5 tags might not
1403	// have block semantics.
1404	// Block tags on a line suppress nowikis
1405	$reqd = false;
1406	}
1407	}
1408	}
1409
1410	if ( !$reqd ) {
1411	$nowiki = preg_replace( '#^<nowiki>(\s+)</nowiki>#', '$1', $nowiki, 1 );
1412	} else {
1413	$solTransparentWikitextNoWsRegexpFragment = PHPUtils::reStrip(
1414	$this->env->getSiteConfig()->solTransparentWikitextNoWsRegexp(), '/' );
1415	$wsReplacementRE = '/^(' . $solTransparentWikitextNoWsRegexpFragment . ')\s+/';
1416	// Replace all leading whitespace
1417	do {
1418	$oldRest = $rest;
1419	$rest = preg_replace( $wsReplacementRE, '$1', $rest );
1420	} while ( $rest !== $oldRest );
1421
1422	// Protect against sol-sensitive wikitext characters
1423	$solCharsTest = '/^' . $solTransparentWikitextNoWsRegexpFragment . '[=*#:;]/';
1424	$nowiki = preg_replace( '#^<nowiki>(\s+)</nowiki>#',
1425	preg_match( $solCharsTest, $rest ) ? '<nowiki/>' : '', $nowiki, 1 );
1426	}
1427	$out = $out . $nowiki . $rest . $pieces[$i + 3];
1428	}
1429	$this->state->out = $out;
1430	}
1431
1432	/**
1433	* This implements a heuristic to strip two common sources of <nowiki/>s.
1434	* When <i> and <b> tags are matched up properly,
1435	* - any single ' char before <i> or <b> does not need <nowiki/> protection.
1436	* - any single ' char before </i> or </b> does not need <nowiki/> protection.
1437	* @param string $line
1438	* @return string
1439	*/
1440	private function stripUnnecessaryQuoteNowikis( string $line ): string {
1441	if ( !$this->state->hasQuoteNowikis ) {
1442	return $line;
1443	}
1444
1445	// Optimization: We are interested in <nowiki/>s before quote chars.
1446	// So, skip this if we don't have both.
1447	if ( !( preg_match( '#<nowiki\s*/>#', $line ) && preg_match( "/'/", $line ) ) ) {
1448	return $line;
1449	}
1450
1451	// * Split out all the [[ ]] {{ }} '' ''' ''''' <..> </...>
1452	// parens in the regexp mean that the split segments will
1453	// be spliced into the result array as the odd elements.
1454	// * If we match up the tags properly and we see opening
1455	// <i> / <b> / <i><b> tags preceded by a '<nowiki/>, we
1456	// can remove all those nowikis.
1457	// Ex: '<nowiki/>''foo'' bar '<nowiki/>'''baz'''
1458	// * If we match up the tags properly and we see closing
1459	// <i> / <b> / <i><b> tags preceded by a '<nowiki/>, we
1460	// can remove all those nowikis.
1461	// Ex: ''foo'<nowiki/>'' bar '''baz'<nowiki/>'''
1462	// phpcs:ignore Generic.Files.LineLength.TooLong
1463	$p = preg_split( "#('''''\|'''\|''\|\[\[\|\]\]\|\{\{\|\}\}\|<\w+(?:\s+[^>]?\|\s?)/?>\|</\w+\s*>)#", $line, -1, PREG_SPLIT_DELIM_CAPTURE );
1464
1465	// Which nowiki do we strip out?
1466	$nowikiIndex = -1;
1467
1468	// Verify that everything else is properly paired up.
1469	$stack = [];
1470	$quotesOnStack = 0;
1471	$n = count( $p );
1472	$nonHtmlTag = null;
1473	for ( $j = 1; $j < $n; $j += 2 ) {
1474	// For HTML tags, pull out just the tag name for clearer code below.
1475	preg_match( '#^<(/?\w+)#', $p[$j], $matches );
1476	$tag = mb_strtolower( $matches[1] ?? $p[$j] );
1477	$tagLen = strlen( $tag );
1478	$selfClose = false;
1479	if ( str_ends_with( $p[$j], '/>' ) ) {
1480	$tag .= '/';
1481	$selfClose = true;
1482	}
1483
1484	// Ignore non-html-tag (<nowiki> OR extension tag) blocks
1485	if ( !$nonHtmlTag ) {
1486	if ( isset( $this->env->getSiteConfig()->getExtensionTagNameMap()[$tag] ) ) {
1487	$nonHtmlTag = $tag;
1488	continue;
1489	}
1490	} else {
1491	if ( $tagLen > 0 && $tag[0] === '/' && substr( $tag, 1 ) === $nonHtmlTag ) {
1492	$nonHtmlTag = null;
1493	}
1494	continue;
1495	}
1496
1497	if ( $tag === ']]' ) {
1498	if ( array_pop( $stack ) !== '[[' ) {
1499	return $line;
1500	}
1501	} elseif ( $tag === '}}' ) {
1502	if ( array_pop( $stack ) !== '{{' ) {
1503	return $line;
1504	}
1505	} elseif ( $tagLen > 0 && $tag[0] === '/' ) { // closing html tag
1506	// match html/ext tags
1507	$openTag = array_pop( $stack );
1508	if ( $tag !== ( '/' . $openTag ) ) {
1509	return $line;
1510	}
1511	} elseif ( $tag === 'nowiki/' ) {
1512	// We only want to process:
1513	// - trailing single quotes (bar')
1514	// - or single quotes by themselves without a preceding '' sequence
1515	if ( substr( $p[$j - 1], -1 ) === "'"
1516	&& !( $p[$j - 1] === "'" && $j > 1 && substr( $p[$j - 2], -2 ) === "''" )
1517	// Consider <b>foo<i>bar'</i>baz</b> or <b>foo'<i>bar'</i>baz</b>.
1518	// The <nowiki/> before the <i> or </i> cannot be stripped
1519	// if the <i> is embedded inside another quote.
1520	&& ( $quotesOnStack === 0
1521	// The only strippable scenario with a single quote elt on stack
1522	// is: ''bar'<nowiki/>''
1523	// -> ["", "''", "bar'", "<nowiki/>", "", "''"]
1524	\|\| ( $quotesOnStack === 1
1525	&& $j + 2 < $n
1526	&& $p[$j + 1] === ''
1527	&& $p[$j + 2][0] === "'"
1528	&& $p[$j + 2] === PHPUtils::lastItem( $stack ) ) )
1529	) {
1530	$nowikiIndex = $j;
1531	}
1532	continue;
1533	} elseif ( $selfClose \|\| $tag === 'br' ) {
1534	// Skip over self-closing tags or what should have been self-closed.
1535	// ( While we could do this for all void tags defined in
1536	// mediawiki.wikitext.constants.js, <br> is the most common
1537	// culprit. )
1538	continue;
1539	} elseif ( $tagLen > 0 && $tag[0] === "'" && PHPUtils::lastItem( $stack ) === $tag ) {
1540	array_pop( $stack );
1541	$quotesOnStack--;
1542	} else {
1543	$stack[] = $tag;
1544	if ( $tagLen > 0 && $tag[0] === "'" ) {
1545	$quotesOnStack++;
1546	}
1547	}
1548	}
1549
1550	if ( count( $stack ) ) {
1551	return $line;
1552	}
1553
1554	if ( $nowikiIndex !== -1 ) {
1555	// We can only remove the final trailing nowiki.
1556	//
1557	// HTML : <i>'foo'</i>
1558	// line : ''<nowiki/>'foo'<nowiki/>''
1559	$p[$nowikiIndex] = '';
1560	return implode( '', $p );
1561	} else {
1562	return $line;
1563	}
1564	}
1565
1566	/**
1567	* Serialize an HTML DOM.
1568	*
1569	* WARNING: You probably want to use WikitextContentModelHandler::fromDOM instead.
1570	*
1571	* @param Document\|DocumentFragment $node
1572	* @param bool $selserMode
1573	* @return string
1574	*/
1575	public function serializeDOM(
1576	Node $node, bool $selserMode = false
1577	): string {
1578	Assert::parameterType(
1579	Document::class . '\|' . DocumentFragment::class,
1580	$node, '$node' );
1581
1582	if ( $node instanceof Document ) {
1583	$node = DOMCompat::getBody( $node );
1584	}
1585
1586	$this->logType = $selserMode ? 'trace/selser' : 'trace/wts';
1587
1588	$state = $this->state;
1589	$state->initMode( $selserMode );
1590
1591	$domNormalizer = new DOMNormalizer( $state );
1592	$domNormalizer->normalize( $node );
1593
1594	if ( $this->env->hasDumpFlag( 'dom:post-normal' ) ) {
1595	$options = [ 'storeDiffMark' => true ];
1596	$this->env->writeDump( ContentUtils::dumpDOM( $node, 'DOM: post-normal', $options ) );
1597	}
1598
1599	$state->kickOffSerialize( $node );
1600
1601	if ( $state->hasIndentPreNowikis ) {
1602	// FIXME: Perhaps this can be done on a per-line basis
1603	// rather than do one post-pass on the entire document.
1604	$this->stripUnnecessaryIndentPreNowikis();
1605	}
1606
1607	$splitLines = $state->selserMode
1608	\|\| $state->hasQuoteNowikis
1609	\|\| $state->hasSelfClosingNowikis
1610	\|\| $state->hasHeadingEscapes;
1611
1612	if ( $splitLines ) {
1613	$state->out = implode( "\n", array_map( function ( $line ) {
1614	// FIXME: Perhaps this can be done on a per-line basis
1615	// rather than do one post-pass on the entire document.
1616	$line = $this->stripUnnecessaryQuoteNowikis( $line );
1617
1618	return $this->stripUnnecessaryHeadingNowikis( $line );
1619	}, explode( "\n", $state->out ) ) );
1620	}
1621
1622	if ( $state->redirectText && $state->redirectText !== 'unbuffered' ) {
1623	$firstLine = explode( "\n", $state->out, 1 )[0];
1624	$nl = preg_match( '/^(\s\|$)/D', $firstLine ) ? '' : "\n";
1625	$state->out = $state->redirectText . $nl . $state->out;
1626	}
1627
1628	return $state->out;
1629	}
1630
1631	/**
1632	* @note Porting note: this replaces the pattern $serializer->env->log( $serializer->logType, ... )
1633	* @param mixed ...$args
1634	*/
1635	public function trace( ...$args ) {
1636	$this->env->log( $this->logType, ...$args );
1637	}
1638
1639	}