Code Coverage
 
Lines
Functions and Methods
Classes and Traits
Total
94.87% covered (success)
94.87%
37 / 39
50.00% covered (danger)
50.00%
2 / 4
CRAP
0.00% covered (danger)
0.00%
0 / 1
RemexCompatFormatter
94.87% covered (success)
94.87%
37 / 39
50.00% covered (danger)
50.00%
2 / 4
20.05
0.00% covered (danger)
0.00%
0 / 1
 __construct
100.00% covered (success)
100.00%
6 / 6
100.00% covered (success)
100.00%
1 / 1
1
 startDocument
100.00% covered (success)
100.00%
1 / 1
100.00% covered (success)
100.00%
1 / 1
1
 characters
85.71% covered (warning)
85.71%
6 / 7
0.00% covered (danger)
0.00%
0 / 1
4.05
 element
96.00% covered (success)
96.00%
24 / 25
0.00% covered (danger)
0.00%
0 / 1
14
1<?php
2
3namespace MediaWiki\Tidy;
4
5use MediaWiki\Parser\Sanitizer;
6use Wikimedia\RemexHtml\HTMLData;
7use Wikimedia\RemexHtml\Serializer\HtmlFormatter;
8use Wikimedia\RemexHtml\Serializer\SerializerNode;
9
10/**
11 * @internal
12 *
13 * WATCH OUT! Unlike normal HtmlFormatter, this class requires the 'ignoreCharRefs' option
14 * in Tokenizer to be used. If that option is not used, it will produce wrong results (T354361).
15 */
16class RemexCompatFormatter extends HtmlFormatter {
17    private const MARKED_EMPTY_ELEMENTS = [
18        'li' => true,
19        'p' => true,
20        'tr' => true,
21    ];
22
23    /** @var ?callable */
24    private $textProcessor;
25
26    public function __construct( $options = [] ) {
27        parent::__construct( $options );
28        // Escape non-breaking space
29        $this->attributeEscapes["\u{00A0}"] = '&#160;';
30        $this->textEscapes["\u{00A0}"] = '&#160;';
31        // Disable escaping of '&', because we expect to see entities, due to 'ignoreCharRefs'
32        unset( $this->attributeEscapes["&"] );
33        unset( $this->textEscapes["&"] );
34        $this->textProcessor = $options['textProcessor'] ?? null;
35    }
36
37    public function startDocument( $fragmentNamespace, $fragmentName ) {
38        return '';
39    }
40
41    /**
42     * WATCH OUT! Unlike normal HtmlFormatter, this class expects that the $text argument contains
43     * unexpanded character references (entities), as a result of using the 'ignoreCharRefs' option
44     * in Tokenizer. If that option is not used, this method will produce wrong results (T354361).
45     *
46     * @inheritDoc
47     */
48    public function characters( SerializerNode $parent, $text, $start, $length ) {
49        $text = parent::characters( $parent, $text, $start, $length );
50
51        if ( $parent->namespace !== HTMLData::NS_HTML
52            || !isset( $this->rawTextElements[$parent->name] )
53        ) {
54            if ( $this->textProcessor !== null ) {
55                $text = call_user_func( $this->textProcessor, $text );
56            }
57        }
58
59        // Ensure a consistent representation for all entities
60        $text = Sanitizer::normalizeCharReferences( $text );
61        return $text;
62    }
63
64    public function element( SerializerNode $parent, SerializerNode $node, $contents ) {
65        $data = $node->snData;
66        if ( $data && $data->isPWrapper ) {
67            if ( $data->nonblankNodeCount ) {
68                return "<p>$contents</p>";
69            } else {
70                return $contents;
71            }
72        }
73
74        $name = $node->name;
75        $attrs = $node->attrs;
76        if ( isset( self::MARKED_EMPTY_ELEMENTS[$name] ) && $attrs->count() === 0
77            && strspn( $contents, "\t\n\f\r " ) === strlen( $contents )
78        ) {
79            return "<{$name} class=\"mw-empty-elt\">$contents</{$name}>";
80        }
81
82        $s = "<$name";
83        foreach ( $attrs->getValues() as $attrName => $attrValue ) {
84            $encValue = strtr( $attrValue, $this->attributeEscapes );
85            $encValue = Sanitizer::normalizeCharReferences( $encValue );
86            $s .= " $attrName=\"$encValue\"";
87        }
88        if ( $node->namespace === HTMLData::NS_HTML && isset( $this->voidElements[$name] ) ) {
89            $s .= ' />';
90            return $s;
91        }
92
93        $s .= '>';
94        if ( $node->namespace === HTMLData::NS_HTML
95            && isset( $contents[0] ) && $contents[0] === "\n"
96            && isset( $this->prefixLfElements[$name] )
97        ) {
98            $s .= "\n$contents</$name>";
99        } else {
100            $s .= "$contents</$name>";
101        }
102        return $s;
103    }
104}