MediaWiki REL1_33
StringUtilsTest.php
Go to the documentation of this file.
1<?php
2
3class StringUtilsTest extends PHPUnit\Framework\TestCase {
4
5 use MediaWikiCoversValidator;
6
11 public function testIsUtf8( $expected, $string ) {
12 $this->assertEquals( $expected, StringUtils::isUtf8( $string ),
13 'Testing string "' . $this->escaped( $string ) . '"' );
14 }
15
21 function escaped( $string ) {
22 $escaped = '';
23 $length = strlen( $string );
24 for ( $i = 0; $i < $length; $i++ ) {
25 $char = $string[$i];
26 $val = ord( $char );
27 if ( $val > 127 ) {
28 $escaped .= '\x' . dechex( $val );
29 } else {
30 $escaped .= $char;
31 }
32 }
33
34 return $escaped;
35 }
36
42 public static function provideStringsForIsUtf8Check() {
43 // Expected return values for StringUtils::isUtf8()
44 $PASS = true;
45 $FAIL = false;
46
47 return [
48 'some ASCII' => [ $PASS, 'Some ASCII' ],
49 'euro sign' => [ $PASS, "Euro sign €" ],
50
51 'first possible sequence 1 byte' => [ $PASS, "\x00" ],
52 'first possible sequence 2 bytes' => [ $PASS, "\xc2\x80" ],
53 'first possible sequence 3 bytes' => [ $PASS, "\xe0\xa0\x80" ],
54 'first possible sequence 4 bytes' => [ $PASS, "\xf0\x90\x80\x80" ],
55 'first possible sequence 5 bytes' => [ $FAIL, "\xf8\x88\x80\x80\x80" ],
56 'first possible sequence 6 bytes' => [ $FAIL, "\xfc\x84\x80\x80\x80\x80" ],
57
58 'last possible sequence 1 byte' => [ $PASS, "\x7f" ],
59 'last possible sequence 2 bytes' => [ $PASS, "\xdf\xbf" ],
60 'last possible sequence 3 bytes' => [ $PASS, "\xef\xbf\xbf" ],
61 'last possible sequence 4 bytes (U+1FFFFF)' => [ $FAIL, "\xf7\xbf\xbf\xbf" ],
62 'last possible sequence 5 bytes' => [ $FAIL, "\xfb\xbf\xbf\xbf\xbf" ],
63 'last possible sequence 6 bytes' => [ $FAIL, "\xfd\xbf\xbf\xbf\xbf\xbf" ],
64
65 'boundary 1' => [ $PASS, "\xed\x9f\xbf" ],
66 'boundary 2' => [ $PASS, "\xee\x80\x80" ],
67 'boundary 3' => [ $PASS, "\xef\xbf\xbd" ],
68 'boundary 4' => [ $PASS, "\xf2\x80\x80\x80" ],
69 'boundary 5 (U+FFFFF)' => [ $PASS, "\xf3\xbf\xbf\xbf" ],
70 'boundary 6 (U+100000)' => [ $PASS, "\xf4\x80\x80\x80" ],
71 'boundary 7 (U+10FFFF)' => [ $PASS, "\xf4\x8f\xbf\xbf" ],
72 'boundary 8 (U+110000)' => [ $FAIL, "\xf4\x90\x80\x80" ],
73
74 'malformed 1' => [ $FAIL, "\x80" ],
75 'malformed 2' => [ $FAIL, "\xbf" ],
76 'malformed 3' => [ $FAIL, "\x80\xbf" ],
77 'malformed 4' => [ $FAIL, "\x80\xbf\x80" ],
78 'malformed 5' => [ $FAIL, "\x80\xbf\x80\xbf" ],
79 'malformed 6' => [ $FAIL, "\x80\xbf\x80\xbf\x80" ],
80 'malformed 7' => [ $FAIL, "\x80\xbf\x80\xbf\x80\xbf" ],
81 'malformed 8' => [ $FAIL, "\x80\xbf\x80\xbf\x80\xbf\x80" ],
82
83 'last byte missing 1' => [ $FAIL, "\xc0" ],
84 'last byte missing 2' => [ $FAIL, "\xe0\x80" ],
85 'last byte missing 3' => [ $FAIL, "\xf0\x80\x80" ],
86 'last byte missing 4' => [ $FAIL, "\xf8\x80\x80\x80" ],
87 'last byte missing 5' => [ $FAIL, "\xfc\x80\x80\x80\x80" ],
88 'last byte missing 6' => [ $FAIL, "\xdf" ],
89 'last byte missing 7' => [ $FAIL, "\xef\xbf" ],
90 'last byte missing 8' => [ $FAIL, "\xf7\xbf\xbf" ],
91 'last byte missing 9' => [ $FAIL, "\xfb\xbf\xbf\xbf" ],
92 'last byte missing 10' => [ $FAIL, "\xfd\xbf\xbf\xbf\xbf" ],
93
94 'extra continuation byte 1' => [ $FAIL, "e\xaf" ],
95 'extra continuation byte 2' => [ $FAIL, "\xc3\x89\xaf" ],
96 'extra continuation byte 3' => [ $FAIL, "\xef\xbc\xa5\xaf" ],
97 'extra continuation byte 4' => [ $FAIL, "\xf0\x9d\x99\xb4\xaf" ],
98
99 'impossible bytes 1' => [ $FAIL, "\xfe" ],
100 'impossible bytes 2' => [ $FAIL, "\xff" ],
101 'impossible bytes 3' => [ $FAIL, "\xfe\xfe\xff\xff" ],
102
103 'overlong sequences 1' => [ $FAIL, "\xc0\xaf" ],
104 'overlong sequences 2' => [ $FAIL, "\xc1\xaf" ],
105 'overlong sequences 3' => [ $FAIL, "\xe0\x80\xaf" ],
106 'overlong sequences 4' => [ $FAIL, "\xf0\x80\x80\xaf" ],
107 'overlong sequences 5' => [ $FAIL, "\xf8\x80\x80\x80\xaf" ],
108 'overlong sequences 6' => [ $FAIL, "\xfc\x80\x80\x80\x80\xaf" ],
109
110 'maximum overlong sequences 1' => [ $FAIL, "\xc1\xbf" ],
111 'maximum overlong sequences 2' => [ $FAIL, "\xe0\x9f\xbf" ],
112 'maximum overlong sequences 3' => [ $FAIL, "\xf0\x8f\xbf\xbf" ],
113 'maximum overlong sequences 4' => [ $FAIL, "\xf8\x87\xbf\xbf" ],
114 'maximum overlong sequences 5' => [ $FAIL, "\xfc\x83\xbf\xbf\xbf\xbf" ],
115
116 'surrogates 1 (U+D799)' => [ $PASS, "\xed\x9f\xbf" ],
117 'surrogates 2 (U+E000)' => [ $PASS, "\xee\x80\x80" ],
118 'surrogates 3 (U+D800)' => [ $FAIL, "\xed\xa0\x80" ],
119 'surrogates 4 (U+DBFF)' => [ $FAIL, "\xed\xaf\xbf" ],
120 'surrogates 5 (U+DC00)' => [ $FAIL, "\xed\xb0\x80" ],
121 'surrogates 6 (U+DFFF)' => [ $FAIL, "\xed\xbf\xbf" ],
122 'surrogates 7 (U+D800 U+DC00)' => [ $FAIL, "\xed\xa0\x80\xed\xb0\x80" ],
123
124 'noncharacters 1' => [ $PASS, "\xef\xbf\xbe" ],
125 'noncharacters 2' => [ $PASS, "\xef\xbf\xbf" ],
126 ];
127 }
128}
and that you know you can do these things To protect your we need to make restrictions that forbid anyone to deny you these rights or to ask you to surrender the rights These restrictions translate to certain responsibilities for you if you distribute copies of the or if you modify it For if you distribute copies of such a whether gratis or for a you must give the recipients all the rights that you have You must make sure that receive or can get the source code And you must show them these terms so they know their rights We protect your rights with two and(2) offer you this license which gives you legal permission to copy
testIsUtf8( $expected, $string)
StringUtils::isUtf8 provideStringsForIsUtf8Check.
static provideStringsForIsUtf8Check()
See also "UTF-8 decoder capability and stress test" by Markus Kuhn: http://www.cl....
escaped( $string)
Print high range characters as a hexadecimal.