MediaWiki  1.23.8
UtfNormalTest.php
Go to the documentation of this file.
1 <?php
28 if( PHP_SAPI != 'cli' ) {
29  die( "Run me from the command line please.\n" );
30 }
31 
32 $verbose = true;
33 #define( 'PRETTY_UTF8', true );
34 
35 if( defined( 'PRETTY_UTF8' ) ) {
36  function pretty( $string ) {
37  return strtoupper( bin2hex( $string ) );
38  }
39 } else {
45  function pretty( $string ) {
46  return strtoupper( utf8ToHexSequence( $string ) );
47  }
48 }
49 
50 if( isset( $_SERVER['argv'] ) && in_array( '--icu', $_SERVER['argv'] ) ) {
51  dl( 'php_utfnormal.so' );
52 }
53 
54 require_once 'UtfNormalDefines.php';
55 require_once 'UtfNormalUtil.php';
56 require_once 'UtfNormal.php';
57 
58 $in = fopen("NormalizationTest.txt", "rt");
59 if( !$in ) {
60  print "Couldn't open NormalizationTest.txt -- can't run tests.\n";
61  print "If necessary, manually download this file. It can be obtained at\n";
62  print "http://www.unicode.org/Public/UNIDATA/NormalizationTest.txt";
63  exit(-1);
64 }
65 
67 
68 $total = 0;
71 $ok = true;
73 while( false !== ( $line = fgets( $in ) ) ) {
74  list( $data, $comment ) = explode( '#', $line );
75  if( $data === '' ) continue;
76  $matches = array();
77  if( preg_match( '/@Part([\d])/', $data, $matches ) ) {
78  if( $matches[1] > 0 ) {
80  }
81  print "Part {$matches[1]}: $comment";
82  continue;
83  }
84 
85  $columns = array_map( "hexSequenceToUtf8", explode( ";", $data ) );
86  array_unshift( $columns, '' );
87 
88  $testedChars[$columns[1]] = true;
89  $total++;
91  $success++;
92  } else {
93  $failure++;
94  # print "FAILED: $comment";
95  }
96  if( $total % 100 == 0 ) print "$total ";
97 }
98 fclose( $in );
99 
101 
102 $in = fopen("UnicodeData.txt", "rt" );
103 if( !$in ) {
104  print "Can't open UnicodeData.txt for reading.\n";
105  print "If necessary, fetch this file from the internet:\n";
106  print "http://www.unicode.org/Public/UNIDATA/UnicodeData.txt\n";
107  exit(-1);
108 }
109 print "Now testing invariants...\n";
110 while( false !== ($line = fgets( $in ) ) ) {
111  $cols = explode( ';', $line );
112  $char = codepointToUtf8( hexdec( $cols[0] ) );
113  $desc = $cols[0] . ": " . $cols[1];
114  if( $char < "\x20" || $char >= UTF8_SURROGATE_FIRST && $char <= UTF8_SURROGATE_LAST ) {
115  # Can't check NULL with the ICU plugin, as null bytes fail in C land.
116  # Skip other control characters, as we strip them for XML safety.
117  # Surrogates are illegal on their own or in UTF-8, ignore.
118  continue;
119  }
120  if( empty( $testedChars[$char] ) ) {
121  $total++;
122  if( testInvariant( $normalizer, $char, $desc, $verbose ) ) {
123  $success++;
124  } else {
125  $failure++;
126  }
127  if( $total % 100 == 0 ) print "$total ";
128  }
129 }
130 fclose( $in );
131 
133 
134 if( $ok ) {
135  print "TEST SUCCEEDED!\n";
136  exit(0);
137 } else {
138  print "TEST FAILED!\n";
139  exit(-1);
140 }
141 
142 ## ------
143 
145  $percSucc = intval( $success * 100 / $total );
146  $percFail = intval( $failure * 100 / $total );
147  print "\n";
148  print "$success tests successful ($percSucc%)\n";
149  print "$failure tests failed ($percFail%)\n\n";
150  $ok = ($success > 0 && $failure == 0);
151  $total = 0;
152  $success = 0;
153  $failure = 0;
154  return $ok;
155 }
156 
157 function testNormals( &$u, $c, $comment, $verbose, $reportFailure = false ) {
158  $result = testNFC( $u, $c, $comment, $reportFailure );
159  $result = testNFD( $u, $c, $comment, $reportFailure ) && $result;
160  $result = testNFKC( $u, $c, $comment, $reportFailure ) && $result;
161  $result = testNFKD( $u, $c, $comment, $reportFailure ) && $result;
162  $result = testCleanUp( $u, $c, $comment, $reportFailure ) && $result;
163 
164  if( $verbose && !$result && !$reportFailure ) {
165  print $comment;
166  testNormals( $u, $c, $comment, $verbose, true );
167  }
168  return $result;
169 }
170 
171 function verbosify( $a, $b, $col, $form, $verbose ) {
172  #$result = ($a === $b);
173  $result = (strcmp( $a, $b ) == 0);
174  if( $verbose ) {
175  $aa = pretty( $a );
176  $bb = pretty( $b );
177  $ok = $result ? "succeed" : " failed";
178  $eq = $result ? "==" : "!=";
179  print " $ok $form c$col '$aa' $eq '$bb'\n";
180  }
181  return $result;
182 }
183 
184 function testNFC( &$u, $c, $comment, $verbose ) {
185  $result = verbosify( $c[2], $u->toNFC( $c[1] ), 1, 'NFC', $verbose );
186  $result = verbosify( $c[2], $u->toNFC( $c[2] ), 2, 'NFC', $verbose ) && $result;
187  $result = verbosify( $c[2], $u->toNFC( $c[3] ), 3, 'NFC', $verbose ) && $result;
188  $result = verbosify( $c[4], $u->toNFC( $c[4] ), 4, 'NFC', $verbose ) && $result;
189  $result = verbosify( $c[4], $u->toNFC( $c[5] ), 5, 'NFC', $verbose ) && $result;
190  return $result;
191 }
192 
193 function testCleanUp( &$u, $c, $comment, $verbose ) {
194  $x = $c[1];
195  $result = verbosify( $c[2], $u->cleanUp( $x ), 1, 'cleanUp', $verbose );
196  $x = $c[2];
197  $result = verbosify( $c[2], $u->cleanUp( $x ), 2, 'cleanUp', $verbose ) && $result;
198  $x = $c[3];
199  $result = verbosify( $c[2], $u->cleanUp( $x ), 3, 'cleanUp', $verbose ) && $result;
200  $x = $c[4];
201  $result = verbosify( $c[4], $u->cleanUp( $x ), 4, 'cleanUp', $verbose ) && $result;
202  $x = $c[5];
203  $result = verbosify( $c[4], $u->cleanUp( $x ), 5, 'cleanUp', $verbose ) && $result;
204  return $result;
205 }
206 
207 function testNFD( &$u, $c, $comment, $verbose ) {
208  $result = verbosify( $c[3], $u->toNFD( $c[1] ), 1, 'NFD', $verbose );
209  $result = verbosify( $c[3], $u->toNFD( $c[2] ), 2, 'NFD', $verbose ) && $result;
210  $result = verbosify( $c[3], $u->toNFD( $c[3] ), 3, 'NFD', $verbose ) && $result;
211  $result = verbosify( $c[5], $u->toNFD( $c[4] ), 4, 'NFD', $verbose ) && $result;
212  $result = verbosify( $c[5], $u->toNFD( $c[5] ), 5, 'NFD', $verbose ) && $result;
213  return $result;
214 }
215 
216 function testNFKC( &$u, $c, $comment, $verbose ) {
217  $result = verbosify( $c[4], $u->toNFKC( $c[1] ), 1, 'NFKC', $verbose );
218  $result = verbosify( $c[4], $u->toNFKC( $c[2] ), 2, 'NFKC', $verbose ) && $result;
219  $result = verbosify( $c[4], $u->toNFKC( $c[3] ), 3, 'NFKC', $verbose ) && $result;
220  $result = verbosify( $c[4], $u->toNFKC( $c[4] ), 4, 'NFKC', $verbose ) && $result;
221  $result = verbosify( $c[4], $u->toNFKC( $c[5] ), 5, 'NFKC', $verbose ) && $result;
222  return $result;
223 }
224 
225 function testNFKD( &$u, $c, $comment, $verbose ) {
226  $result = verbosify( $c[5], $u->toNFKD( $c[1] ), 1, 'NFKD', $verbose );
227  $result = verbosify( $c[5], $u->toNFKD( $c[2] ), 2, 'NFKD', $verbose ) && $result;
228  $result = verbosify( $c[5], $u->toNFKD( $c[3] ), 3, 'NFKD', $verbose ) && $result;
229  $result = verbosify( $c[5], $u->toNFKD( $c[4] ), 4, 'NFKD', $verbose ) && $result;
230  $result = verbosify( $c[5], $u->toNFKD( $c[5] ), 5, 'NFKD', $verbose ) && $result;
231  return $result;
232 }
233 
234 function testInvariant( &$u, $char, $desc, $verbose, $reportFailure = false ) {
235  $result = verbosify( $char, $u->toNFC( $char ), 1, 'NFC', $reportFailure );
236  $result = verbosify( $char, $u->toNFD( $char ), 1, 'NFD', $reportFailure ) && $result;
237  $result = verbosify( $char, $u->toNFKC( $char ), 1, 'NFKC', $reportFailure ) && $result;
238  $result = verbosify( $char, $u->toNFKD( $char ), 1, 'NFKD', $reportFailure ) && $result;
239  $result = verbosify( $char, $u->cleanUp( $char ), 1, 'cleanUp', $reportFailure ) && $result;
240 
241  if( $verbose && !$result && !$reportFailure ) {
242  print $desc;
243  testInvariant( $u, $char, $desc, $verbose, true );
244  }
245  return $result;
246 }
$result
The index of the header message $result[1]=The index of the body text message $result[2 through n]=Parameters passed to body text message. Please note the header message cannot receive/use parameters. 'ImportHandleLogItemXMLTag':When parsing a XML tag in a log item. $reader:XMLReader object $logInfo:Array of information Return false to stop further processing of the tag 'ImportHandlePageXMLTag':When parsing a XML tag in a page. $reader:XMLReader object $pageInfo:Array of information Return false to stop further processing of the tag 'ImportHandleRevisionXMLTag':When parsing a XML tag in a page revision. $reader:XMLReader object $pageInfo:Array of page information $revisionInfo:Array of revision information Return false to stop further processing of the tag 'ImportHandleToplevelXMLTag':When parsing a top level XML tag. $reader:XMLReader object Return false to stop further processing of the tag 'ImportHandleUploadXMLTag':When parsing a XML tag in a file upload. $reader:XMLReader object $revisionInfo:Array of information Return false to stop further processing of the tag 'InfoAction':When building information to display on the action=info page. $context:IContextSource object & $pageInfo:Array of information 'InitializeArticleMaybeRedirect':MediaWiki check to see if title is a redirect. $title:Title object for the current page $request:WebRequest $ignoreRedirect:boolean to skip redirect check $target:Title/string of redirect target $article:Article object 'InterwikiLoadPrefix':When resolving if a given prefix is an interwiki or not. Return true without providing an interwiki to continue interwiki search. $prefix:interwiki prefix we are looking for. & $iwData:output array describing the interwiki with keys iw_url, iw_local, iw_trans and optionally iw_api and iw_wikiid. 'InternalParseBeforeSanitize':during Parser 's internalParse method just before the parser removes unwanted/dangerous HTML tags and after nowiki/noinclude/includeonly/onlyinclude and other processings. Ideal for syntax-extensions after template/parser function execution which respect nowiki and HTML-comments. & $parser:Parser object & $text:string containing partially parsed text & $stripState:Parser 's internal StripState object 'InternalParseBeforeLinks':during Parser 's internalParse method before links but after nowiki/noinclude/includeonly/onlyinclude and other processings. & $parser:Parser object & $text:string containing partially parsed text & $stripState:Parser 's internal StripState object 'InvalidateEmailComplete':Called after a user 's email has been invalidated successfully. $user:user(object) whose email is being invalidated 'IRCLineURL':When constructing the URL to use in an IRC notification. Callee may modify $url and $query, URL will be constructed as $url . $query & $url:URL to index.php & $query:Query string $rc:RecentChange object that triggered url generation 'IsFileCacheable':Override the result of Article::isFileCacheable()(if true) $article:article(object) being checked 'IsTrustedProxy':Override the result of wfIsTrustedProxy() $ip:IP being check $result:Change this value to override the result of wfIsTrustedProxy() 'IsUploadAllowedFromUrl':Override the result of UploadFromUrl::isAllowedUrl() $url:URL used to upload from & $allowed:Boolean indicating if uploading is allowed for given URL 'isValidEmailAddr':Override the result of User::isValidEmailAddr(), for instance to return false if the domain name doesn 't match your organization. $addr:The e-mail address entered by the user & $result:Set this and return false to override the internal checks 'isValidPassword':Override the result of User::isValidPassword() $password:The password entered by the user & $result:Set this and return false to override the internal checks $user:User the password is being validated for 'Language::getMessagesFileName':$code:The language code or the language we 're looking for a messages file for & $file:The messages file path, you can override this to change the location. 'LanguageGetNamespaces':Provide custom ordering for namespaces or remove namespaces. Do not use this hook to add namespaces. Use CanonicalNamespaces for that. & $namespaces:Array of namespaces indexed by their numbers 'LanguageGetMagic':DEPRECATED, use $magicWords in a file listed in $wgExtensionMessagesFiles instead. Use this to define synonyms of magic words depending of the language $magicExtensions:associative array of magic words synonyms $lang:language code(string) 'LanguageGetSpecialPageAliases':DEPRECATED, use $specialPageAliases in a file listed in $wgExtensionMessagesFiles instead. Use to define aliases of special pages names depending of the language $specialPageAliases:associative array of magic words synonyms $lang:language code(string) 'LanguageGetTranslatedLanguageNames':Provide translated language names. & $names:array of language code=> language name $code language of the preferred translations 'LanguageLinks':Manipulate a page 's language links. This is called in various places to allow extensions to define the effective language links for a page. $title:The page 's Title. & $links:Associative array mapping language codes to prefixed links of the form "language:title". & $linkFlags:Associative array mapping prefixed links to arrays of flags. Currently unused, but planned to provide support for marking individual language links in the UI, e.g. for featured articles. 'LinkBegin':Used when generating internal and interwiki links in Linker::link(), before processing starts. Return false to skip default processing and return $ret. See documentation for Linker::link() for details on the expected meanings of parameters. $skin:the Skin object $target:the Title that the link is pointing to & $html:the contents that the< a > tag should have(raw HTML) $result
Definition: hooks.txt:1528
php
skin txt MediaWiki includes four core it has been set as the default in MediaWiki since the replacing Monobook it had been been the default skin since before being replaced by Vector largely rewritten in while keeping its appearance Several legacy skins were removed in the as the burden of supporting them became too heavy to bear Those in etc for skin dependent CSS etc for skin dependent JavaScript These can also be customised on a per user by etc This feature has led to a wide variety of user styles becoming that gallery is a good place to ending in php
Definition: skin.txt:62
$in
$in
Definition: UtfNormalTest.php:58
utf8ToHexSequence
utf8ToHexSequence( $str)
Take a UTF-8 string and return a space-separated series of hex numbers representing Unicode code poin...
Definition: UtfNormalUtil.php:78
$form
usually copyright or history_copyright This message must be in HTML not wikitext $subpages will be ignored and the rest of subPageSubtitle() will run. 'SkinTemplateBuildNavUrlsNav_urlsAfterPermalink' whether MediaWiki currently thinks this is a CSS JS page Hooks may change this value to override the return value of Title::isCssOrJsPage(). 'TitleIsAlwaysKnown' whether MediaWiki currently thinks this page is known isMovable() always returns false. $title whether MediaWiki currently thinks this page is movable Hooks may change this value to override the return value of Title::isMovable(). 'TitleIsWikitextPage' whether MediaWiki currently thinks this is a wikitext page Hooks may change this value to override the return value of Title::isWikitextPage() 'TitleMove' use UploadVerification and UploadVerifyFile instead $form
Definition: hooks.txt:2573
$success
$success
Definition: UtfNormalTest.php:69
$verbose
if(PHP_SAPI !='cli') $verbose
Definition: UtfNormalTest.php:32
testNormals
testNormals(&$u, $c, $comment, $verbose, $reportFailure=false)
Definition: UtfNormalTest.php:157
codepointToUtf8
codepointToUtf8( $codepoint)
Return UTF-8 sequence for a given Unicode code point.
Definition: UtfNormalUtil.php:36
UtfNormal
Unicode normalization routines for working with UTF-8 strings.
Definition: UtfNormal.php:48
reportResults
reportResults(&$total, &$success, &$failure)
Definition: UtfNormalTest.php:144
UTF8_SURROGATE_FIRST
const UTF8_SURROGATE_FIRST
Definition: UtfNormalDefines.php:61
array
the array() calling protocol came about after MediaWiki 1.4rc1.
List of Api Query prop modules.
$comment
$comment
Definition: importImages.php:107
testNFD
testNFD(&$u, $c, $comment, $verbose)
Definition: UtfNormalTest.php:207
$columns
if(! $in) $columns
Definition: Utf8Test.php:50
$normalizer
if(! $in) $normalizer
Definition: UtfNormalTest.php:66
list
deferred txt A few of the database updates required by various functions here can be deferred until after the result page is displayed to the user For updating the view updating the linked to tables after a etc PHP does not yet have any way to tell the server to actually return and disconnect while still running these but it might have such a feature in the future We handle these by creating a deferred update object and putting those objects on a global list
Definition: deferred.txt:11
testNFKC
testNFKC(&$u, $c, $comment, $verbose)
Definition: UtfNormalTest.php:216
testNFKD
testNFKD(&$u, $c, $comment, $verbose)
Definition: UtfNormalTest.php:225
$ok
$ok
Definition: UtfNormalTest.php:71
$line
$line
Definition: cdb.php:57
$matches
if(!defined( 'MEDIAWIKI')) if(!isset( $wgVersion)) $matches
Definition: NoLocalSettings.php:33
testInvariant
testInvariant(&$u, $char, $desc, $verbose, $reportFailure=false)
Definition: UtfNormalTest.php:234
$testedChars
$testedChars
Definition: UtfNormalTest.php:72
verbosify
verbosify( $a, $b, $col, $form, $verbose)
Definition: UtfNormalTest.php:171
testNFC
testNFC(&$u, $c, $comment, $verbose)
Definition: UtfNormalTest.php:184
testCleanUp
testCleanUp(&$u, $c, $comment, $verbose)
Definition: UtfNormalTest.php:193
$total
$total
Definition: UtfNormalTest.php:68
$failure
$failure
Definition: UtfNormalTest.php:70
UTF8_SURROGATE_LAST
const UTF8_SURROGATE_LAST
Definition: UtfNormalDefines.php:62