MediaWiki REL1_40
RemexCompatFormatter.php
Go to the documentation of this file.
1<?php
2
3namespace MediaWiki\Tidy;
4
5use Sanitizer;
6use Wikimedia\RemexHtml\HTMLData;
7use Wikimedia\RemexHtml\Serializer\HtmlFormatter;
8use Wikimedia\RemexHtml\Serializer\SerializerNode;
9
13class RemexCompatFormatter extends HtmlFormatter {
14 private static $markedEmptyElements = [
15 'li' => true,
16 'p' => true,
17 'tr' => true,
18 ];
19
21 private $textProcessor;
22
23 public function __construct( $options = [] ) {
24 parent::__construct( $options );
25 $this->attributeEscapes["\u{00A0}"] = '&#160;';
26 unset( $this->attributeEscapes["&"] );
27 $this->textEscapes["\u{00A0}"] = '&#160;';
28 unset( $this->textEscapes["&"] );
29 $this->textProcessor = $options['textProcessor'] ?? null;
30 }
31
32 public function startDocument( $fragmentNamespace, $fragmentName ) {
33 return '';
34 }
35
36 public function characters( SerializerNode $parent, $text, $start, $length ) {
37 $text = parent::characters( $parent, $text, $start, $length );
38
39 if ( $parent->namespace !== HTMLData::NS_HTML
40 || !isset( $this->rawTextElements[$parent->name] )
41 ) {
42 if ( $this->textProcessor !== null ) {
43 $text = call_user_func( $this->textProcessor, $text );
44 }
45 }
46
47 // Ensure a consistent representation for all entities
48 $text = Sanitizer::normalizeCharReferences( $text );
49 return $text;
50 }
51
52 public function element( SerializerNode $parent, SerializerNode $node, $contents ) {
53 $data = $node->snData;
54 if ( $data && $data->isPWrapper ) {
55 if ( $data->nonblankNodeCount ) {
56 return "<p>$contents</p>";
57 } else {
58 return $contents;
59 }
60 }
61
62 $name = $node->name;
63 $attrs = $node->attrs;
64 if ( isset( self::$markedEmptyElements[$name] ) && $attrs->count() === 0
65 && strspn( $contents, "\t\n\f\r " ) === strlen( $contents )
66 ) {
67 return "<{$name} class=\"mw-empty-elt\">$contents</{$name}>";
68 }
69
70 $s = "<$name";
71 foreach ( $attrs->getValues() as $attrName => $attrValue ) {
72 $encValue = strtr( $attrValue, $this->attributeEscapes );
73 $encValue = Sanitizer::normalizeCharReferences( $encValue );
74 $s .= " $attrName=\"$encValue\"";
75 }
76 if ( $node->namespace === HTMLData::NS_HTML && isset( $this->voidElements[$name] ) ) {
77 $s .= ' />';
78 return $s;
79 }
80
81 $s .= '>';
82 if ( $node->namespace === HTMLData::NS_HTML
83 && isset( $contents[0] ) && $contents[0] === "\n"
84 && isset( $this->prefixLfElements[$name] )
85 ) {
86 $s .= "\n$contents</$name>";
87 } else {
88 $s .= "$contents</$name>";
89 }
90 return $s;
91 }
92}
startDocument( $fragmentNamespace, $fragmentName)
element(SerializerNode $parent, SerializerNode $node, $contents)
characters(SerializerNode $parent, $text, $start, $length)
HTML sanitizer for MediaWiki.
Definition Sanitizer.php:41