Code Coverage
 
Lines
Functions and Methods
Classes and Traits
Total
100.00% covered (success)
100.00%
52 / 52
100.00% covered (success)
100.00%
5 / 5
CRAP
100.00% covered (success)
100.00%
1 / 1
Initial
100.00% covered (success)
100.00%
52 / 52
100.00% covered (success)
100.00%
5 / 5
28
100.00% covered (success)
100.00%
1 / 1
 characters
100.00% covered (success)
100.00%
10 / 10
100.00% covered (success)
100.00%
1 / 1
3
 startTag
100.00% covered (success)
100.00%
5 / 5
100.00% covered (success)
100.00%
1 / 1
2
 endTag
100.00% covered (success)
100.00%
5 / 5
100.00% covered (success)
100.00%
1 / 1
2
 doctype
100.00% covered (success)
100.00%
27 / 27
100.00% covered (success)
100.00%
1 / 1
19
 endDocument
100.00% covered (success)
100.00%
5 / 5
100.00% covered (success)
100.00%
1 / 1
2
1<?php
2
3namespace Wikimedia\RemexHtml\TreeBuilder;
4
5use Wikimedia\RemexHtml\HTMLData;
6use Wikimedia\RemexHtml\Tokenizer\Attributes;
7
8/**
9 * The "initial" insertion mode
10 */
11class Initial extends InsertionMode {
12    /**
13     * The doctypes listed in the spec which are allowed without generating a
14     * parse error. A 2-d array where each row gives the doctype name, the
15     * public identifier and the system identifier.
16     *
17     * @var array
18     */
19    private static $allowedDoctypes = [
20        [ 'html', '-//W3C//DTD HTML 4.0//EN', null ],
21        [ 'html', '-//W3C//DTD HTML 4.0//EN', 'http://www.w3.org/TR/REC-html40/strict.dtd' ],
22        [ 'html', '-//W3C//DTD HTML 4.01//EN', null ],
23        [ 'html', '-//W3C//DTD HTML 4.01//EN', 'http://www.w3.org/TR/html4/strict.dtd' ],
24        [ 'html', '-//W3C//DTD XHTML 1.0 Strict//EN',
25            'http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd' ],
26        [ 'html', '-//W3C//DTD XHTML 1.1//EN', 'http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd' ]
27    ];
28
29    public function characters( $text, $start, $length, $sourceStart, $sourceLength ) {
30        // Ignore whitespace
31        [ $part1, $part2 ] = $this->splitInitialMatch(
32            true, "\t\n\f\r ", $text, $start, $length, $sourceStart, $sourceLength );
33        [ $start, $length, $sourceStart, $sourceLength ] = $part2;
34        if ( !$length ) {
35            return;
36        }
37        if ( !$this->builder->isIframeSrcdoc ) {
38            $this->error( 'missing doctype', $sourceStart );
39            $this->builder->quirks = TreeBuilder::QUIRKS;
40        }
41        $this->dispatcher->switchMode( Dispatcher::BEFORE_HTML )
42            ->characters( $text, $start, $length, $sourceStart, $sourceLength );
43    }
44
45    public function startTag( $name, Attributes $attrs, $selfClose, $sourceStart, $sourceLength ) {
46        if ( !$this->builder->isIframeSrcdoc ) {
47            $this->error( 'missing doctype', $sourceStart );
48            $this->builder->quirks = TreeBuilder::QUIRKS;
49        }
50        $this->dispatcher->switchMode( Dispatcher::BEFORE_HTML )
51            ->startTag( $name, $attrs, $selfClose, $sourceStart, $sourceLength );
52    }
53
54    public function endTag( $name, $sourceStart, $sourceLength ) {
55        if ( !$this->builder->isIframeSrcdoc ) {
56            $this->error( 'missing doctype', $sourceStart );
57            $this->builder->quirks = TreeBuilder::QUIRKS;
58        }
59        $this->dispatcher->switchMode( Dispatcher::BEFORE_HTML )
60            ->endTag( $name, $sourceStart, $sourceLength );
61    }
62
63    public function doctype( $name, $public, $system, $quirks, $sourceStart, $sourceLength ) {
64        if ( ( $name !== 'html' || $public !== null
65                || ( $system !== null && $system !== 'about:legacy-compat' )
66            )
67            && !in_array( [ $name, $public, $system ], self::$allowedDoctypes, true )
68        ) {
69            $this->error( 'invalid doctype', $sourceStart );
70        }
71
72        $quirks = $quirks ? TreeBuilder::QUIRKS : TreeBuilder::NO_QUIRKS;
73
74        $quirksIfNoSystem = '~-//W3C//DTD HTML 4\.01 Frameset//|' .
75            '-//W3C//DTD HTML 4\.01 Transitional//~Ai';
76        $limitedQuirks = '~-//W3C//DTD XHTML 1\.0 Frameset//|' .
77            '-//W3C//DTD XHTML 1\.0 Transitional//~Ai';
78
79        if ( $name !== 'html'
80            || $public === '-//W3O//DTD W3 HTML Strict 3.0//EN//'
81            || $public === '-/W3C/DTD HTML 4.0 Transitional/EN'
82            || $public === 'HTML'
83            || $system === 'http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd'
84            || ( $system === null && preg_match( $quirksIfNoSystem, $public ?? '' ) )
85            || preg_match( HTMLData::$quirkyPrefixRegex, $public ?? '' )
86        ) {
87            $quirks = TreeBuilder::QUIRKS;
88        } elseif ( !$this->builder->isIframeSrcdoc
89            && (
90                preg_match( $limitedQuirks, $public ?? '' )
91                || ( $system !== null && preg_match( $quirksIfNoSystem, $public ?? '' ) )
92            )
93        ) {
94            $quirks = TreeBuilder::LIMITED_QUIRKS;
95        }
96
97        $name ??= '';
98        $public ??= '';
99        $system ??= '';
100        $this->builder->doctype( $name, $public, $system, $quirks,
101            $sourceStart, $sourceLength );
102        $this->dispatcher->switchMode( Dispatcher::BEFORE_HTML );
103    }
104
105    public function endDocument( $pos ) {
106        if ( !$this->builder->isIframeSrcdoc ) {
107            $this->error( 'missing doctype', $pos );
108            $this->builder->quirks = TreeBuilder::QUIRKS;
109        }
110        $this->dispatcher->switchMode( Dispatcher::BEFORE_HTML )
111            ->endDocument( $pos );
112    }
113}