MediaWiki  1.23.14
StringUtilsTest.php
Go to the documentation of this file.
1 <?php
2 
4 
12  public function testIsUtf8WithMbstring( $expected, $string ) {
13  if ( !function_exists( 'mb_check_encoding' ) ) {
14  $this->markTestSkipped( 'Test requires the mbstring PHP extension' );
15  }
16  $this->assertEquals( $expected,
17  StringUtils::isUtf8( $string ),
18  'Testing string "' . $this->escaped( $string ) . '" with mb_check_encoding'
19  );
20  }
21 
30  public function testIsUtf8WithPhpFallbackImplementation( $expected, $string ) {
31  $this->assertEquals( $expected,
32  StringUtils::isUtf8( $string, true ),
33  'Testing string "' . $this->escaped( $string ) . '" with pure PHP implementation'
34  );
35  }
36 
40  function escaped( $string ) {
41  $escaped = '';
42  $length = strlen( $string );
43  for ( $i = 0; $i < $length; $i++ ) {
44  $char = $string[$i];
45  $val = ord( $char );
46  if ( $val > 127 ) {
47  $escaped .= '\x' . dechex( $val );
48  } else {
49  $escaped .= $char;
50  }
51  }
52 
53  return $escaped;
54  }
55 
61  public static function provideStringsForIsUtf8Check() {
62  // Expected return values for StringUtils::isUtf8()
63  $PASS = true;
64  $FAIL = false;
65 
66  return array(
67  'some ASCII' => array( $PASS, 'Some ASCII' ),
68  'euro sign' => array( $PASS, "Euro sign €" ),
69 
70  'first possible sequence 1 byte' => array( $PASS, "\x00" ),
71  'first possible sequence 2 bytes' => array( $PASS, "\xc2\x80" ),
72  'first possible sequence 3 bytes' => array( $PASS, "\xe0\xa0\x80" ),
73  'first possible sequence 4 bytes' => array( $PASS, "\xf0\x90\x80\x80" ),
74  'first possible sequence 5 bytes' => array( $FAIL, "\xf8\x88\x80\x80\x80" ),
75  'first possible sequence 6 bytes' => array( $FAIL, "\xfc\x84\x80\x80\x80\x80" ),
76 
77  'last possible sequence 1 byte' => array( $PASS, "\x7f" ),
78  'last possible sequence 2 bytes' => array( $PASS, "\xdf\xbf" ),
79  'last possible sequence 3 bytes' => array( $PASS, "\xef\xbf\xbf" ),
80  'last possible sequence 4 bytes (U+1FFFFF)' => array( $FAIL, "\xf7\xbf\xbf\xbf" ),
81  'last possible sequence 5 bytes' => array( $FAIL, "\xfb\xbf\xbf\xbf\xbf" ),
82  'last possible sequence 6 bytes' => array( $FAIL, "\xfd\xbf\xbf\xbf\xbf\xbf" ),
83 
84  'boundary 1' => array( $PASS, "\xed\x9f\xbf" ),
85  'boundary 2' => array( $PASS, "\xee\x80\x80" ),
86  'boundary 3' => array( $PASS, "\xef\xbf\xbd" ),
87  'boundary 4' => array( $PASS, "\xf2\x80\x80\x80" ),
88  'boundary 5 (U+FFFFF)' => array( $PASS, "\xf3\xbf\xbf\xbf" ),
89  'boundary 6 (U+100000)' => array( $PASS, "\xf4\x80\x80\x80" ),
90  'boundary 7 (U+10FFFF)' => array( $PASS, "\xf4\x8f\xbf\xbf" ),
91  'boundary 8 (U+110000)' => array( $FAIL, "\xf4\x90\x80\x80" ),
92 
93  'malformed 1' => array( $FAIL, "\x80" ),
94  'malformed 2' => array( $FAIL, "\xbf" ),
95  'malformed 3' => array( $FAIL, "\x80\xbf" ),
96  'malformed 4' => array( $FAIL, "\x80\xbf\x80" ),
97  'malformed 5' => array( $FAIL, "\x80\xbf\x80\xbf" ),
98  'malformed 6' => array( $FAIL, "\x80\xbf\x80\xbf\x80" ),
99  'malformed 7' => array( $FAIL, "\x80\xbf\x80\xbf\x80\xbf" ),
100  'malformed 8' => array( $FAIL, "\x80\xbf\x80\xbf\x80\xbf\x80" ),
101 
102  'last byte missing 1' => array( $FAIL, "\xc0" ),
103  'last byte missing 2' => array( $FAIL, "\xe0\x80" ),
104  'last byte missing 3' => array( $FAIL, "\xf0\x80\x80" ),
105  'last byte missing 4' => array( $FAIL, "\xf8\x80\x80\x80" ),
106  'last byte missing 5' => array( $FAIL, "\xfc\x80\x80\x80\x80" ),
107  'last byte missing 6' => array( $FAIL, "\xdf" ),
108  'last byte missing 7' => array( $FAIL, "\xef\xbf" ),
109  'last byte missing 8' => array( $FAIL, "\xf7\xbf\xbf" ),
110  'last byte missing 9' => array( $FAIL, "\xfb\xbf\xbf\xbf" ),
111  'last byte missing 10' => array( $FAIL, "\xfd\xbf\xbf\xbf\xbf" ),
112 
113  'extra continuation byte 1' => array( $FAIL, "e\xaf" ),
114  'extra continuation byte 2' => array( $FAIL, "\xc3\x89\xaf" ),
115  'extra continuation byte 3' => array( $FAIL, "\xef\xbc\xa5\xaf" ),
116  'extra continuation byte 4' => array( $FAIL, "\xf0\x9d\x99\xb4\xaf" ),
117 
118  'impossible bytes 1' => array( $FAIL, "\xfe" ),
119  'impossible bytes 2' => array( $FAIL, "\xff" ),
120  'impossible bytes 3' => array( $FAIL, "\xfe\xfe\xff\xff" ),
121 
122  'overlong sequences 1' => array( $FAIL, "\xc0\xaf" ),
123  'overlong sequences 2' => array( $FAIL, "\xc1\xaf" ),
124  'overlong sequences 3' => array( $FAIL, "\xe0\x80\xaf" ),
125  'overlong sequences 4' => array( $FAIL, "\xf0\x80\x80\xaf" ),
126  'overlong sequences 5' => array( $FAIL, "\xf8\x80\x80\x80\xaf" ),
127  'overlong sequences 6' => array( $FAIL, "\xfc\x80\x80\x80\x80\xaf" ),
128 
129  'maximum overlong sequences 1' => array( $FAIL, "\xc1\xbf" ),
130  'maximum overlong sequences 2' => array( $FAIL, "\xe0\x9f\xbf" ),
131  'maximum overlong sequences 3' => array( $FAIL, "\xf0\x8f\xbf\xbf" ),
132  'maximum overlong sequences 4' => array( $FAIL, "\xf8\x87\xbf\xbf" ),
133  'maximum overlong sequences 5' => array( $FAIL, "\xfc\x83\xbf\xbf\xbf\xbf" ),
134 
135  'surrogates 1 (U+D799)' => array( $PASS, "\xed\x9f\xbf" ),
136  'surrogates 2 (U+E000)' => array( $PASS, "\xee\x80\x80" ),
137  'surrogates 3 (U+D800)' => array( $FAIL, "\xed\xa0\x80" ),
138  'surrogates 4 (U+DBFF)' => array( $FAIL, "\xed\xaf\xbf" ),
139  'surrogates 5 (U+DC00)' => array( $FAIL, "\xed\xb0\x80" ),
140  'surrogates 6 (U+DFFF)' => array( $FAIL, "\xed\xbf\xbf" ),
141  'surrogates 7 (U+D800 U+DC00)' => array( $FAIL, "\xed\xa0\x80\xed\xb0\x80" ),
142 
143  'noncharacters 1' => array( $PASS, "\xef\xbf\xbe" ),
144  'noncharacters 2' => array( $PASS, "\xef\xbf\xbf" ),
145  );
146  }
147 }
StringUtilsTest\testIsUtf8WithPhpFallbackImplementation
testIsUtf8WithPhpFallbackImplementation( $expected, $string)
This tests StringUtils::isUtf8 making sure we use the pure PHP implementation used as a fallback when...
Definition: StringUtilsTest.php:30
php
skin txt MediaWiki includes four core it has been set as the default in MediaWiki since the replacing Monobook it had been been the default skin since before being replaced by Vector largely rewritten in while keeping its appearance Several legacy skins were removed in the as the burden of supporting them became too heavy to bear Those in etc for skin dependent CSS etc for skin dependent JavaScript These can also be customised on a per user by etc This feature has led to a wide variety of user styles becoming that gallery is a good place to ending in php
Definition: skin.txt:62
StringUtilsTest\escaped
escaped( $string)
Print high range characters as an hexadecimal.
Definition: StringUtilsTest.php:40
MediaWikiTestCase
Definition: MediaWikiTestCase.php:6
array
the array() calling protocol came about after MediaWiki 1.4rc1.
List of Api Query prop modules.
StringUtilsTest
Definition: StringUtilsTest.php:3
StringUtilsTest\testIsUtf8WithMbstring
testIsUtf8WithMbstring( $expected, $string)
This tests StringUtils::isUtf8 whenever we have the mbstring extension loaded.
Definition: StringUtilsTest.php:12
StringUtilsTest\provideStringsForIsUtf8Check
static provideStringsForIsUtf8Check()
See also "UTF-8 decoder capability and stress test" by Markus Kuhn: http://www.cl....
Definition: StringUtilsTest.php:61
StringUtils\isUtf8
static isUtf8( $value, $disableMbstring=false)
Test whether a string is valid UTF-8.
Definition: StringUtils.php:51