MediaWiki REL1_39
RemexCompatFormatter.php
Go to the documentation of this file.
1<?php
2
3namespace MediaWiki\Tidy;
4
5use Sanitizer;
6use Wikimedia\RemexHtml\HTMLData;
7use Wikimedia\RemexHtml\Serializer\HtmlFormatter;
8use Wikimedia\RemexHtml\Serializer\SerializerNode;
9
13class RemexCompatFormatter extends HtmlFormatter {
14 private static $markedEmptyElements = [
15 'li' => true,
16 'p' => true,
17 'tr' => true,
18 ];
19
21 private $textProcessor;
22
23 public function __construct( $options = [] ) {
24 parent::__construct( $options );
25 // Escape non-breaking space
26 $this->attributeEscapes["\u{00A0}"] = '&#160;';
27 $this->textEscapes["\u{00A0}"] = '&#160;';
28 // Escape U+0338 (T387130)
29 $this->textEscapes["\u{0338}"] = '&#x338;';
30 // Disable escaping of '&', because we expect to see entities, due to 'ignoreCharRefs'
31 unset( $this->attributeEscapes["&"] );
32 unset( $this->textEscapes["&"] );
33 $this->textProcessor = $options['textProcessor'] ?? null;
34 }
35
36 public function startDocument( $fragmentNamespace, $fragmentName ) {
37 return '';
38 }
39
40 public function characters( SerializerNode $parent, $text, $start, $length ) {
41 $text = parent::characters( $parent, $text, $start, $length );
42
43 if ( $parent->namespace !== HTMLData::NS_HTML
44 || !isset( $this->rawTextElements[$parent->name] )
45 ) {
46 if ( $this->textProcessor !== null ) {
47 $text = call_user_func( $this->textProcessor, $text );
48 }
49 }
50
51 // Ensure a consistent representation for all entities
52 $text = Sanitizer::normalizeCharReferences( $text );
53 return $text;
54 }
55
56 public function element( SerializerNode $parent, SerializerNode $node, $contents ) {
57 $data = $node->snData;
58 if ( $data && $data->isPWrapper ) {
59 if ( $data->nonblankNodeCount ) {
60 return "<p>$contents</p>";
61 } else {
62 return $contents;
63 }
64 }
65
66 $name = $node->name;
67 $attrs = $node->attrs;
68 if ( isset( self::$markedEmptyElements[$name] ) && $attrs->count() === 0
69 && strspn( $contents, "\t\n\f\r " ) === strlen( $contents )
70 ) {
71 return "<{$name} class=\"mw-empty-elt\">$contents</{$name}>";
72 }
73
74 $s = "<$name";
75 foreach ( $attrs->getValues() as $attrName => $attrValue ) {
76 $encValue = strtr( $attrValue, $this->attributeEscapes );
77 $encValue = Sanitizer::normalizeCharReferences( $encValue );
78 $s .= " $attrName=\"$encValue\"";
79 }
80 if ( $node->namespace === HTMLData::NS_HTML && isset( $this->voidElements[$name] ) ) {
81 $s .= ' />';
82 return $s;
83 }
84
85 $s .= '>';
86 if ( $node->namespace === HTMLData::NS_HTML
87 && isset( $contents[0] ) && $contents[0] === "\n"
88 && isset( $this->prefixLfElements[$name] )
89 ) {
90 $s .= "\n$contents</$name>";
91 } else {
92 $s .= "$contents</$name>";
93 }
94 return $s;
95 }
96}
startDocument( $fragmentNamespace, $fragmentName)
element(SerializerNode $parent, SerializerNode $node, $contents)
characters(SerializerNode $parent, $text, $start, $length)
HTML sanitizer for MediaWiki.
Definition Sanitizer.php:41
foreach( $mmfl['setupFiles'] as $fileName) if($queue) if(empty( $mmfl['quiet'])) $s