Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
100.00% |
52 / 52 |
|
100.00% |
5 / 5 |
CRAP | |
100.00% |
1 / 1 |
Initial | |
100.00% |
52 / 52 |
|
100.00% |
5 / 5 |
28 | |
100.00% |
1 / 1 |
characters | |
100.00% |
10 / 10 |
|
100.00% |
1 / 1 |
3 | |||
startTag | |
100.00% |
5 / 5 |
|
100.00% |
1 / 1 |
2 | |||
endTag | |
100.00% |
5 / 5 |
|
100.00% |
1 / 1 |
2 | |||
doctype | |
100.00% |
27 / 27 |
|
100.00% |
1 / 1 |
19 | |||
endDocument | |
100.00% |
5 / 5 |
|
100.00% |
1 / 1 |
2 |
1 | <?php |
2 | |
3 | namespace Wikimedia\RemexHtml\TreeBuilder; |
4 | |
5 | use Wikimedia\RemexHtml\HTMLData; |
6 | use Wikimedia\RemexHtml\Tokenizer\Attributes; |
7 | |
8 | /** |
9 | * The "initial" insertion mode |
10 | */ |
11 | class Initial extends InsertionMode { |
12 | /** |
13 | * The doctypes listed in the spec which are allowed without generating a |
14 | * parse error. A 2-d array where each row gives the doctype name, the |
15 | * public identifier and the system identifier. |
16 | * |
17 | * @var array |
18 | */ |
19 | private static $allowedDoctypes = [ |
20 | [ 'html', '-//W3C//DTD HTML 4.0//EN', null ], |
21 | [ 'html', '-//W3C//DTD HTML 4.0//EN', 'http://www.w3.org/TR/REC-html40/strict.dtd' ], |
22 | [ 'html', '-//W3C//DTD HTML 4.01//EN', null ], |
23 | [ 'html', '-//W3C//DTD HTML 4.01//EN', 'http://www.w3.org/TR/html4/strict.dtd' ], |
24 | [ 'html', '-//W3C//DTD XHTML 1.0 Strict//EN', |
25 | 'http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd' ], |
26 | [ 'html', '-//W3C//DTD XHTML 1.1//EN', 'http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd' ] |
27 | ]; |
28 | |
29 | public function characters( $text, $start, $length, $sourceStart, $sourceLength ) { |
30 | // Ignore whitespace |
31 | [ $part1, $part2 ] = $this->splitInitialMatch( |
32 | true, "\t\n\f\r ", $text, $start, $length, $sourceStart, $sourceLength ); |
33 | [ $start, $length, $sourceStart, $sourceLength ] = $part2; |
34 | if ( !$length ) { |
35 | return; |
36 | } |
37 | if ( !$this->builder->isIframeSrcdoc ) { |
38 | $this->error( 'missing doctype', $sourceStart ); |
39 | $this->builder->quirks = TreeBuilder::QUIRKS; |
40 | } |
41 | $this->dispatcher->switchMode( Dispatcher::BEFORE_HTML ) |
42 | ->characters( $text, $start, $length, $sourceStart, $sourceLength ); |
43 | } |
44 | |
45 | public function startTag( $name, Attributes $attrs, $selfClose, $sourceStart, $sourceLength ) { |
46 | if ( !$this->builder->isIframeSrcdoc ) { |
47 | $this->error( 'missing doctype', $sourceStart ); |
48 | $this->builder->quirks = TreeBuilder::QUIRKS; |
49 | } |
50 | $this->dispatcher->switchMode( Dispatcher::BEFORE_HTML ) |
51 | ->startTag( $name, $attrs, $selfClose, $sourceStart, $sourceLength ); |
52 | } |
53 | |
54 | public function endTag( $name, $sourceStart, $sourceLength ) { |
55 | if ( !$this->builder->isIframeSrcdoc ) { |
56 | $this->error( 'missing doctype', $sourceStart ); |
57 | $this->builder->quirks = TreeBuilder::QUIRKS; |
58 | } |
59 | $this->dispatcher->switchMode( Dispatcher::BEFORE_HTML ) |
60 | ->endTag( $name, $sourceStart, $sourceLength ); |
61 | } |
62 | |
63 | public function doctype( $name, $public, $system, $quirks, $sourceStart, $sourceLength ) { |
64 | if ( ( $name !== 'html' || $public !== null |
65 | || ( $system !== null && $system !== 'about:legacy-compat' ) |
66 | ) |
67 | && !in_array( [ $name, $public, $system ], self::$allowedDoctypes, true ) |
68 | ) { |
69 | $this->error( 'invalid doctype', $sourceStart ); |
70 | } |
71 | |
72 | $quirks = $quirks ? TreeBuilder::QUIRKS : TreeBuilder::NO_QUIRKS; |
73 | |
74 | $quirksIfNoSystem = '~-//W3C//DTD HTML 4\.01 Frameset//|' . |
75 | '-//W3C//DTD HTML 4\.01 Transitional//~Ai'; |
76 | $limitedQuirks = '~-//W3C//DTD XHTML 1\.0 Frameset//|' . |
77 | '-//W3C//DTD XHTML 1\.0 Transitional//~Ai'; |
78 | |
79 | if ( $name !== 'html' |
80 | || $public === '-//W3O//DTD W3 HTML Strict 3.0//EN//' |
81 | || $public === '-/W3C/DTD HTML 4.0 Transitional/EN' |
82 | || $public === 'HTML' |
83 | || $system === 'http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd' |
84 | || ( $system === null && preg_match( $quirksIfNoSystem, $public ?? '' ) ) |
85 | || preg_match( HTMLData::$quirkyPrefixRegex, $public ?? '' ) |
86 | ) { |
87 | $quirks = TreeBuilder::QUIRKS; |
88 | } elseif ( !$this->builder->isIframeSrcdoc |
89 | && ( |
90 | preg_match( $limitedQuirks, $public ?? '' ) |
91 | || ( $system !== null && preg_match( $quirksIfNoSystem, $public ?? '' ) ) |
92 | ) |
93 | ) { |
94 | $quirks = TreeBuilder::LIMITED_QUIRKS; |
95 | } |
96 | |
97 | $name ??= ''; |
98 | $public ??= ''; |
99 | $system ??= ''; |
100 | $this->builder->doctype( $name, $public, $system, $quirks, |
101 | $sourceStart, $sourceLength ); |
102 | $this->dispatcher->switchMode( Dispatcher::BEFORE_HTML ); |
103 | } |
104 | |
105 | public function endDocument( $pos ) { |
106 | if ( !$this->builder->isIframeSrcdoc ) { |
107 | $this->error( 'missing doctype', $pos ); |
108 | $this->builder->quirks = TreeBuilder::QUIRKS; |
109 | } |
110 | $this->dispatcher->switchMode( Dispatcher::BEFORE_HTML ) |
111 | ->endDocument( $pos ); |
112 | } |
113 | } |