MediaWiki  1.23.0
UtfNormalGenerate.php
Go to the documentation of this file.
1 <?php
28 if( PHP_SAPI != 'cli' ) {
29  die( "Run me from the command line please.\n" );
30 }
31 
32 require_once 'UtfNormalDefines.php';
33 require_once 'UtfNormalUtil.php';
34 
35 $in = fopen("DerivedNormalizationProps.txt", "rt" );
36 if( !$in ) {
37  print "Can't open DerivedNormalizationProps.txt for reading.\n";
38  print "If necessary, fetch this file from the internet:\n";
39  print "http://www.unicode.org/Public/UNIDATA/DerivedNormalizationProps.txt\n";
40  exit(-1);
41 }
42 print "Initializing normalization quick check tables...\n";
44 while( false !== ($line = fgets( $in ) ) ) {
45  $matches = array();
46  if( preg_match( '/^([0-9A-F]+)(?:..([0-9A-F]+))?\s*;\s*(NFC_QC)\s*;\s*([MN])/', $line, $matches ) ) {
47  list( $junk, $first, $last, $prop, $value ) = $matches;
48  #print "$first $last $prop $value\n";
49  if( !$last ) $last = $first;
50  for( $i = hexdec( $first ); $i <= hexdec( $last ); $i++) {
51  $char = codepointToUtf8( $i );
52  $checkNFC[$char] = $value;
53  }
54  }
55 }
56 fclose( $in );
57 
58 $in = fopen("CompositionExclusions.txt", "rt" );
59 if( !$in ) {
60  print "Can't open CompositionExclusions.txt for reading.\n";
61  print "If necessary, fetch this file from the internet:\n";
62  print "http://www.unicode.org/Public/UNIDATA/CompositionExclusions.txt\n";
63  exit(-1);
64 }
66 while( false !== ($line = fgets( $in ) ) ) {
67  if( preg_match( '/^([0-9A-F]+)/i', $line, $matches ) ) {
68  $codepoint = $matches[1];
69  $source = codepointToUtf8( hexdec( $codepoint ) );
70  $exclude[$source] = true;
71  }
72 }
73 fclose($in);
74 
75 $in = fopen("UnicodeData.txt", "rt" );
76 if( !$in ) {
77  print "Can't open UnicodeData.txt for reading.\n";
78  print "If necessary, fetch this file from the internet:\n";
79  print "http://www.unicode.org/Public/UNIDATA/UnicodeData.txt\n";
80  exit(-1);
81 }
82 
87 $total = 0;
88 $compat = 0;
89 $canon = 0;
90 
91 print "Reading character definitions...\n";
92 while( false !== ($line = fgets( $in ) ) ) {
93  $columns = explode(';', $line);
94  $codepoint = $columns[0];
95  $name = $columns[1];
96  $canonicalCombiningClass = $columns[3];
97  $decompositionMapping = $columns[5];
98 
99  $source = codepointToUtf8( hexdec( $codepoint ) );
100 
101  if( $canonicalCombiningClass != 0 ) {
102  $combiningClass[$source] = intval( $canonicalCombiningClass );
103  }
104 
105  if( $decompositionMapping === '' ) continue;
106  if( preg_match( '/^<(.+)> (.*)$/', $decompositionMapping, $matches ) ) {
107  # Compatibility decomposition
108  $canonical = false;
109  $decompositionMapping = $matches[2];
110  $compat++;
111  } else {
112  $canonical = true;
113  $canon++;
114  }
115  $total++;
116  $dest = hexSequenceToUtf8( $decompositionMapping );
117 
118  $compatibilityDecomp[$source] = $dest;
119  if( $canonical ) {
120  $canonicalDecomp[$source] = $dest;
121  if( empty( $exclude[$source] ) ) {
122  $canonicalComp[$dest] = $source;
123  }
124  }
125  #print "$codepoint | $canonicalCombiningClasses | $decompositionMapping\n";
126 }
127 fclose( $in );
128 
129 print "Recursively expanding canonical mappings...\n";
130 $changed = 42;
131 $pass = 1;
132 while( $changed > 0 ) {
133  print "pass $pass\n";
134  $changed = 0;
135  foreach( $canonicalDecomp as $source => $dest ) {
136  $newDest = preg_replace_callback(
137  '/([\xc0-\xff][\x80-\xbf]+)/',
138  'callbackCanonical',
139  $dest);
140  if( $newDest === $dest ) continue;
141  $changed++;
142  $canonicalDecomp[$source] = $newDest;
143  }
144  $pass++;
145 }
146 
147 print "Recursively expanding compatibility mappings...\n";
148 $changed = 42;
149 $pass = 1;
150 while( $changed > 0 ) {
151  print "pass $pass\n";
152  $changed = 0;
153  foreach( $compatibilityDecomp as $source => $dest ) {
154  $newDest = preg_replace_callback(
155  '/([\xc0-\xff][\x80-\xbf]+)/',
156  'callbackCompat',
157  $dest);
158  if( $newDest === $dest ) continue;
159  $changed++;
160  $compatibilityDecomp[$source] = $newDest;
161  }
162  $pass++;
163 }
164 
165 print "$total decomposition mappings ($canon canonical, $compat compatibility)\n";
166 
167 $out = fopen("UtfNormalData.inc", "wt");
168 if( $out ) {
169  $serCombining = escapeSingleString( serialize( $combiningClass ) );
170  $serComp = escapeSingleString( serialize( $canonicalComp ) );
171  $serCanon = escapeSingleString( serialize( $canonicalDecomp ) );
172  $serCheckNFC = escapeSingleString( serialize( $checkNFC ) );
173  $outdata = "<" . "?php
181 UtfNormal::\$utfCombiningClass = unserialize( '$serCombining' );
182 UtfNormal::\$utfCanonicalComp = unserialize( '$serComp' );
183 UtfNormal::\$utfCanonicalDecomp = unserialize( '$serCanon' );
184 UtfNormal::\$utfCheckNFC = unserialize( '$serCheckNFC' );
185 \n";
186  fputs( $out, $outdata );
187  fclose( $out );
188  print "Wrote out UtfNormalData.inc\n";
189 } else {
190  print "Can't create file UtfNormalData.inc\n";
191  exit(-1);
192 }
193 
194 
195 $out = fopen("UtfNormalDataK.inc", "wt");
196 if( $out ) {
197  $serCompat = escapeSingleString( serialize( $compatibilityDecomp ) );
198  $outdata = "<" . "?php
206 UtfNormal::\$utfCompatibilityDecomp = unserialize( '$serCompat' );
207 \n";
208  fputs( $out, $outdata );
209  fclose( $out );
210  print "Wrote out UtfNormalDataK.inc\n";
211  exit(0);
212 } else {
213  print "Can't create file UtfNormalDataK.inc\n";
214  exit(-1);
215 }
216 
217 # ---------------
218 
221  if( isset( $canonicalDecomp[$matches[1]] ) ) {
222  return $canonicalDecomp[$matches[1]];
223  }
224  return $matches[1];
225 }
226 
229  if( isset( $compatibilityDecomp[$matches[1]] ) ) {
230  return $compatibilityDecomp[$matches[1]];
231  }
232  return $matches[1];
233 }
php
skin txt MediaWiki includes four core it has been set as the default in MediaWiki since the replacing Monobook it had been been the default skin since before being replaced by Vector largely rewritten in while keeping its appearance Several legacy skins were removed in the as the burden of supporting them became too heavy to bear Those in etc for skin dependent CSS etc for skin dependent JavaScript These can also be customised on a per user by etc This feature has led to a wide variety of user styles becoming that gallery is a good place to ending in php
Definition: skin.txt:62
$compatibilityDecomp
if(! $in) $compatibilityDecomp
Definition: UtfNormalGenerate.php:83
$last
$last
Definition: profileinfo.php:365
$compat
$compat
Definition: UtfNormalGenerate.php:88
$pass
$pass
Definition: UtfNormalGenerate.php:131
$total
$total
Definition: UtfNormalGenerate.php:87
$combiningClass
$combiningClass
Definition: UtfNormalGenerate.php:86
codepointToUtf8
codepointToUtf8( $codepoint)
Return UTF-8 sequence for a given Unicode code point.
Definition: UtfNormalUtil.php:36
$out
$out
Definition: UtfNormalGenerate.php:167
$canonicalComp
$canonicalComp
Definition: UtfNormalGenerate.php:85
array
the array() calling protocol came about after MediaWiki 1.4rc1.
List of Api Query prop modules.
$canonicalDecomp
$canonicalDecomp
Definition: UtfNormalGenerate.php:84
global
when a variable name is used in a it is silently declared as a new masking the global
Definition: design.txt:93
$exclude
if(! $in) $exclude
Definition: UtfNormalGenerate.php:65
$columns
if(! $in) $columns
Definition: Utf8Test.php:50
list
deferred txt A few of the database updates required by various functions here can be deferred until after the result page is displayed to the user For updating the view updating the linked to tables after a etc PHP does not yet have any way to tell the server to actually return and disconnect while still running these but it might have such a feature in the future We handle these by creating a deferred update object and putting those objects on a global list
Definition: deferred.txt:11
$checkNFC
$checkNFC
Definition: UtfNormalGenerate.php:43
$line
$line
Definition: cdb.php:57
$name
Allows to change the fields on the form that will be generated $name
Definition: hooks.txt:336
$matches
if(!defined( 'MEDIAWIKI')) if(!isset( $wgVersion)) $matches
Definition: NoLocalSettings.php:33
$value
$value
Definition: styleTest.css.php:45
hexSequenceToUtf8
hexSequenceToUtf8( $sequence)
Take a series of space-separated hexadecimal numbers representing Unicode code points and return a UT...
Definition: UtfNormalUtil.php:61
escapeSingleString
escapeSingleString( $string)
Escape a string for inclusion in a PHP single-quoted string literal.
Definition: UtfNormalUtil.php:134
as
This document is intended to provide useful advice for parties seeking to redistribute MediaWiki to end users It s targeted particularly at maintainers for Linux since it s been observed that distribution packages of MediaWiki often break We ve consistently had to recommend that users seeking support use official tarballs instead of their distribution s and this often solves whatever problem the user is having It would be nice if this could such as
Definition: distributors.txt:9
callbackCompat
callbackCompat( $matches)
Definition: UtfNormalGenerate.php:227
$canon
$canon
Definition: UtfNormalGenerate.php:89
$source
if(PHP_SAPI !='cli') $source
Definition: mwdoc-filter.php:18
callbackCanonical
callbackCanonical( $matches)
Definition: UtfNormalGenerate.php:219
$in
$in
Definition: UtfNormalGenerate.php:35
$changed
$changed
Definition: UtfNormalGenerate.php:130