MediaWiki  1.23.0
UtfNormalTest2.php
Go to the documentation of this file.
1 #!/usr/bin/env php
2 <?php
25 if( PHP_SAPI != 'cli' ) {
26  die( "Run me from the command line please.\n" );
27 }
28 
29 // From http://unicode.org/Public/UNIDATA/NormalizationTest.txt
30 $file = "NormalizationTest.txt";
31 
32 // Anything after this character is a comment
33 define ( 'COMMENT', '#' );
34 
35 // Semicolons are used to separate the columns
36 define ( 'SEPARATOR', ';' );
37 
38 $f = fopen($file, "r");
39 
68 require_once './UtfNormal.php';
69 function normalize_form_c($c) { return UtfNormal::toNFC($c); }
70 function normalize_form_d($c) { return UtfNormal::toNFD($c); }
71 function normalize_form_kc($c) { return UtfNormal::toNFKC($c); }
72 function normalize_form_kd($c) { return UtfNormal::toNFKD($c); }
73 
81 function normalize_form_c_php($c) { return UtfNormal::toNFC($c, "php"); }
82 function normalize_form_d_php($c) { return UtfNormal::toNFD($c, "php"); }
83 function normalize_form_kc_php($c) { return UtfNormal::toNFKC($c, "php"); }
84 function normalize_form_kd_php($c) { return UtfNormal::toNFKD($c, "php"); }
85 
86 assert_options(ASSERT_ACTIVE, 1);
87 assert_options(ASSERT_WARNING, 0);
88 assert_options(ASSERT_QUIET_EVAL, 1);
89 assert_options(ASSERT_CALLBACK, 'my_assert');
90 
91 function my_assert( $file, $line, $code ) {
92  global $col, $lineNo;
93  echo "Assertion that '$code' failed on line $lineNo ($col[5])\n";
94 }
95 
96 $count = 0;
97 $lineNo = 0;
98 if( $f !== false ) {
99  while( ( $col = getRow( $f ) ) !== false ) {
100  $lineNo++;
101 
102  if(count($col) == 6) {
103  $count++;
104  if( $count % 100 === 0 ) echo "Count: $count\n";
105  } else {
106  continue;
107  }
108 
109  # verify that the pure PHP version is correct
110  $NFCc1 = normalize_form_c($col[0]);
111  $NFCc1p = normalize_form_c_php($col[0]);
112  assert('$NFCc1 === $NFCc1p');
113  $NFCc2 = normalize_form_c($col[1]);
114  $NFCc2p = normalize_form_c_php($col[1]);
115  assert('$NFCc2 === $NFCc2p');
116  $NFCc3 = normalize_form_c($col[2]);
117  $NFCc3p = normalize_form_c_php($col[2]);
118  assert('$NFCc3 === $NFCc3p');
119  $NFCc4 = normalize_form_c($col[3]);
120  $NFCc4p = normalize_form_c_php($col[3]);
121  assert('$NFCc4 === $NFCc4p');
122  $NFCc5 = normalize_form_c($col[4]);
123  $NFCc5p = normalize_form_c_php($col[4]);
124  assert('$NFCc5 === $NFCc5p');
125 
126  $NFDc1 = normalize_form_d($col[0]);
127  $NFDc1p = normalize_form_d_php($col[0]);
128  assert('$NFDc1 === $NFDc1p');
129  $NFDc2 = normalize_form_d($col[1]);
130  $NFDc2p = normalize_form_d_php($col[1]);
131  assert('$NFDc2 === $NFDc2p');
132  $NFDc3 = normalize_form_d($col[2]);
133  $NFDc3p = normalize_form_d_php($col[2]);
134  assert('$NFDc3 === $NFDc3p');
135  $NFDc4 = normalize_form_d($col[3]);
136  $NFDc4p = normalize_form_d_php($col[3]);
137  assert('$NFDc4 === $NFDc4p');
138  $NFDc5 = normalize_form_d($col[4]);
139  $NFDc5p = normalize_form_d_php($col[4]);
140  assert('$NFDc5 === $NFDc5p');
141 
142  $NFKDc1 = normalize_form_kd($col[0]);
143  $NFKDc1p = normalize_form_kd_php($col[0]);
144  assert('$NFKDc1 === $NFKDc1p');
145  $NFKDc2 = normalize_form_kd($col[1]);
146  $NFKDc2p = normalize_form_kd_php($col[1]);
147  assert('$NFKDc2 === $NFKDc2p');
148  $NFKDc3 = normalize_form_kd($col[2]);
149  $NFKDc3p = normalize_form_kd_php($col[2]);
150  assert('$NFKDc3 === $NFKDc3p');
151  $NFKDc4 = normalize_form_kd($col[3]);
152  $NFKDc4p = normalize_form_kd_php($col[3]);
153  assert('$NFKDc4 === $NFKDc4p');
154  $NFKDc5 = normalize_form_kd($col[4]);
155  $NFKDc5p = normalize_form_kd_php($col[4]);
156  assert('$NFKDc5 === $NFKDc5p');
157 
158  $NFKCc1 = normalize_form_kc($col[0]);
159  $NFKCc1p = normalize_form_kc_php($col[0]);
160  assert('$NFKCc1 === $NFKCc1p');
161  $NFKCc2 = normalize_form_kc($col[1]);
162  $NFKCc2p = normalize_form_kc_php($col[1]);
163  assert('$NFKCc2 === $NFKCc2p');
164  $NFKCc3 = normalize_form_kc($col[2]);
165  $NFKCc3p = normalize_form_kc_php($col[2]);
166  assert('$NFKCc3 === $NFKCc3p');
167  $NFKCc4 = normalize_form_kc($col[3]);
168  $NFKCc4p = normalize_form_kc_php($col[3]);
169  assert('$NFKCc4 === $NFKCc4p');
170  $NFKCc5 = normalize_form_kc($col[4]);
171  $NFKCc5p = normalize_form_kc_php($col[4]);
172  assert('$NFKCc5 === $NFKCc5p');
173 
174  # c2 == NFC(c1) == NFC(c2) == NFC(c3)
175  assert('$col[1] === $NFCc1');
176  assert('$col[1] === $NFCc2');
177  assert('$col[1] === $NFCc3');
178 
179  # c4 == NFC(c4) == NFC(c5)
180  assert('$col[3] === $NFCc4');
181  assert('$col[3] === $NFCc5');
182 
183  # c3 == NFD(c1) == NFD(c2) == NFD(c3)
184  assert('$col[2] === $NFDc1');
185  assert('$col[2] === $NFDc2');
186  assert('$col[2] === $NFDc3');
187 
188  # c5 == NFD(c4) == NFD(c5)
189  assert('$col[4] === $NFDc4');
190  assert('$col[4] === $NFDc5');
191 
192  # c4 == NFKC(c1) == NFKC(c2) == NFKC(c3) == NFKC(c4) == NFKC(c5)
193  assert('$col[3] === $NFKCc1');
194  assert('$col[3] === $NFKCc2');
195  assert('$col[3] === $NFKCc3');
196  assert('$col[3] === $NFKCc4');
197  assert('$col[3] === $NFKCc5');
198 
199  # c5 == NFKD(c1) == NFKD(c2) == NFKD(c3) == NFKD(c4) == NFKD(c5)
200  assert('$col[4] === $NFKDc1');
201  assert('$col[4] === $NFKDc2');
202  assert('$col[4] === $NFKDc3');
203  assert('$col[4] === $NFKDc4');
204  assert('$col[4] === $NFKDc5');
205  }
206 }
207 echo "done.\n";
208 
209 // Compare against http://en.wikipedia.org/wiki/UTF-8#Description
210 function unichr($c) {
211  if ($c <= 0x7F) {
212  return chr($c);
213  } elseif ($c <= 0x7FF) {
214  return chr(0xC0 | $c >> 6) . chr(0x80 | $c & 0x3F);
215  } elseif ($c <= 0xFFFF) {
216  return chr(0xE0 | $c >> 12) . chr(0x80 | $c >> 6 & 0x3F)
217  . chr(0x80 | $c & 0x3F);
218  } elseif ($c <= 0x10FFFF) {
219  return chr(0xF0 | $c >> 18) . chr(0x80 | $c >> 12 & 0x3F)
220  . chr(0x80 | $c >> 6 & 0x3F)
221  . chr(0x80 | $c & 0x3F);
222  } else {
223  return false;
224  }
225 }
226 
227 function unistr($c) {
228  return implode("", array_map("unichr", array_map("hexdec", explode(" ", $c))));
229 }
230 
231 function getRow( $f ) {
232  $row = fgets( $f );
233  if( $row === false ) return false;
234  $row = rtrim($row);
235  $pos = strpos( $row, COMMENT );
236  $pos2 = strpos( $row, ")" );
237  if( $pos === 0 ) return array($row);
238  $c = "";
239 
240  if( $pos ) {
241  if($pos2) $c = substr( $row, $pos2 + 2 );
242  else $c = substr( $row, $pos );
243  $row = substr( $row, 0, $pos );
244  }
245 
246  $ret = array();
247  foreach( explode( SEPARATOR, $row ) as $ent ) {
248  if( trim( $ent ) !== "" ) {
249  $ret[] = unistr($ent);
250  }
251  }
252  $ret[] = $c;
253 
254  return $ret;
255 }
normalize_form_kd
normalize_form_kd($c)
Definition: UtfNormalTest2.php:72
php
skin txt MediaWiki includes four core it has been set as the default in MediaWiki since the replacing Monobook it had been been the default skin since before being replaced by Vector largely rewritten in while keeping its appearance Several legacy skins were removed in the as the burden of supporting them became too heavy to bear Those in etc for skin dependent CSS etc for skin dependent JavaScript These can also be customised on a per user by etc This feature has led to a wide variety of user styles becoming that gallery is a good place to ending in php
Definition: skin.txt:62
SEPARATOR
const SEPARATOR
Definition: UtfNormalTest2.php:36
unichr
unichr($c)
Definition: UtfNormalTest2.php:210
$f
$f
Definition: UtfNormalTest2.php:38
unistr
unistr($c)
Definition: UtfNormalTest2.php:227
$ret
null means default in associative array with keys and values unescaped Should be merged with default with a value of false meaning to suppress the attribute in associative array with keys and values unescaped noclasses & $ret
Definition: hooks.txt:1530
normalize_form_c
normalize_form_c($c)
The following section will be used for testing different normalization methods.
Definition: UtfNormalTest2.php:69
UtfNormal\toNFD
static toNFD( $string)
Convert a UTF-8 string to normal form D, canonical decomposition.
Definition: UtfNormal.php:138
normalize_form_d
normalize_form_d($c)
Definition: UtfNormalTest2.php:70
UtfNormal\toNFC
static toNFC( $string)
Convert a UTF-8 string to normal form C, canonical composition.
Definition: UtfNormal.php:120
normalize_form_kd_php
normalize_form_kd_php($c)
Definition: UtfNormalTest2.php:84
normalize_form_c_php
normalize_form_c_php($c)
This set of functions is only useful if youve added a param to the following functions to force pure ...
Definition: UtfNormalTest2.php:81
array
the array() calling protocol came about after MediaWiki 1.4rc1.
List of Api Query prop modules.
global
when a variable name is used in a it is silently declared as a new masking the global
Definition: design.txt:93
UtfNormal\toNFKD
static toNFKD( $string)
Convert a UTF-8 string to normal form KD, compatibility decomposition.
Definition: UtfNormal.php:176
normalize_form_kc_php
normalize_form_kc_php($c)
Definition: UtfNormalTest2.php:83
$line
$line
Definition: cdb.php:57
COMMENT
const COMMENT
Definition: UtfNormalTest2.php:33
$file
if(PHP_SAPI !='cli') $file
Definition: UtfNormalTest2.php:30
$count
$count
Definition: UtfNormalTest2.php:96
as
This document is intended to provide useful advice for parties seeking to redistribute MediaWiki to end users It s targeted particularly at maintainers for Linux since it s been observed that distribution packages of MediaWiki often break We ve consistently had to recommend that users seeking support use official tarballs instead of their distribution s and this often solves whatever problem the user is having It would be nice if this could such as
Definition: distributors.txt:9
normalize_form_kc
normalize_form_kc($c)
Definition: UtfNormalTest2.php:71
$lineNo
$lineNo
Definition: UtfNormalTest2.php:97
getRow
getRow( $f)
Definition: UtfNormalTest2.php:231
UtfNormal\toNFKC
static toNFKC( $string)
Convert a UTF-8 string to normal form KC, compatibility composition.
Definition: UtfNormal.php:157
my_assert
my_assert( $file, $line, $code)
Definition: UtfNormalTest2.php:91
normalize_form_d_php
normalize_form_d_php($c)
Definition: UtfNormalTest2.php:82