Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
94.87% |
37 / 39 |
|
50.00% |
2 / 4 |
CRAP | |
0.00% |
0 / 1 |
RemexCompatFormatter | |
94.87% |
37 / 39 |
|
50.00% |
2 / 4 |
20.05 | |
0.00% |
0 / 1 |
__construct | |
100.00% |
6 / 6 |
|
100.00% |
1 / 1 |
1 | |||
startDocument | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
characters | |
85.71% |
6 / 7 |
|
0.00% |
0 / 1 |
4.05 | |||
element | |
96.00% |
24 / 25 |
|
0.00% |
0 / 1 |
14 |
1 | <?php |
2 | |
3 | namespace MediaWiki\Tidy; |
4 | |
5 | use MediaWiki\Parser\Sanitizer; |
6 | use Wikimedia\RemexHtml\HTMLData; |
7 | use Wikimedia\RemexHtml\Serializer\HtmlFormatter; |
8 | use Wikimedia\RemexHtml\Serializer\SerializerNode; |
9 | |
10 | /** |
11 | * @internal |
12 | * |
13 | * WATCH OUT! Unlike normal HtmlFormatter, this class requires the 'ignoreCharRefs' option |
14 | * in Tokenizer to be used. If that option is not used, it will produce wrong results (T354361). |
15 | */ |
16 | class RemexCompatFormatter extends HtmlFormatter { |
17 | private const MARKED_EMPTY_ELEMENTS = [ |
18 | 'li' => true, |
19 | 'p' => true, |
20 | 'tr' => true, |
21 | ]; |
22 | |
23 | /** @var ?callable */ |
24 | private $textProcessor; |
25 | |
26 | public function __construct( $options = [] ) { |
27 | parent::__construct( $options ); |
28 | // Escape non-breaking space |
29 | $this->attributeEscapes["\u{00A0}"] = ' '; |
30 | $this->textEscapes["\u{00A0}"] = ' '; |
31 | // Disable escaping of '&', because we expect to see entities, due to 'ignoreCharRefs' |
32 | unset( $this->attributeEscapes["&"] ); |
33 | unset( $this->textEscapes["&"] ); |
34 | $this->textProcessor = $options['textProcessor'] ?? null; |
35 | } |
36 | |
37 | public function startDocument( $fragmentNamespace, $fragmentName ) { |
38 | return ''; |
39 | } |
40 | |
41 | /** |
42 | * WATCH OUT! Unlike normal HtmlFormatter, this class expects that the $text argument contains |
43 | * unexpanded character references (entities), as a result of using the 'ignoreCharRefs' option |
44 | * in Tokenizer. If that option is not used, this method will produce wrong results (T354361). |
45 | * |
46 | * @inheritDoc |
47 | */ |
48 | public function characters( SerializerNode $parent, $text, $start, $length ) { |
49 | $text = parent::characters( $parent, $text, $start, $length ); |
50 | |
51 | if ( $parent->namespace !== HTMLData::NS_HTML |
52 | || !isset( $this->rawTextElements[$parent->name] ) |
53 | ) { |
54 | if ( $this->textProcessor !== null ) { |
55 | $text = call_user_func( $this->textProcessor, $text ); |
56 | } |
57 | } |
58 | |
59 | // Ensure a consistent representation for all entities |
60 | $text = Sanitizer::normalizeCharReferences( $text ); |
61 | return $text; |
62 | } |
63 | |
64 | public function element( SerializerNode $parent, SerializerNode $node, $contents ) { |
65 | $data = $node->snData; |
66 | if ( $data && $data->isPWrapper ) { |
67 | if ( $data->nonblankNodeCount ) { |
68 | return "<p>$contents</p>"; |
69 | } else { |
70 | return $contents; |
71 | } |
72 | } |
73 | |
74 | $name = $node->name; |
75 | $attrs = $node->attrs; |
76 | if ( isset( self::MARKED_EMPTY_ELEMENTS[$name] ) && $attrs->count() === 0 |
77 | && strspn( $contents, "\t\n\f\r " ) === strlen( $contents ) |
78 | ) { |
79 | return "<{$name} class=\"mw-empty-elt\">$contents</{$name}>"; |
80 | } |
81 | |
82 | $s = "<$name"; |
83 | foreach ( $attrs->getValues() as $attrName => $attrValue ) { |
84 | $encValue = strtr( $attrValue, $this->attributeEscapes ); |
85 | $encValue = Sanitizer::normalizeCharReferences( $encValue ); |
86 | $s .= " $attrName=\"$encValue\""; |
87 | } |
88 | if ( $node->namespace === HTMLData::NS_HTML && isset( $this->voidElements[$name] ) ) { |
89 | $s .= ' />'; |
90 | return $s; |
91 | } |
92 | |
93 | $s .= '>'; |
94 | if ( $node->namespace === HTMLData::NS_HTML |
95 | && isset( $contents[0] ) && $contents[0] === "\n" |
96 | && isset( $this->prefixLfElements[$name] ) |
97 | ) { |
98 | $s .= "\n$contents</$name>"; |
99 | } else { |
100 | $s .= "$contents</$name>"; |
101 | } |
102 | return $s; |
103 | } |
104 | } |