55 if ( !$this->hasOption(
'unicode-data-file' ) ) {
56 $dataFile =
'UnicodeData.txt';
57 if ( !file_exists( $dataFile ) ) {
58 $this->fatalError(
"Unable to find UnicodeData.txt. Please specify " .
59 "its location with --unicode-data-file=<FILE>" );
62 $dataFile = $this->getOption(
'unicode-data-file' );
63 if ( !file_exists( $dataFile ) ) {
64 $this->fatalError(
'Unable to find the specified data file.' );
68 $file = fopen( $dataFile,
'r' );
70 $this->fatalError(
'Unable to open the data file.' );
78 'Canonical_Combining_Class',
80 'Decomposition_Type_Mapping',
81 'Numeric_Type_Value_6',
82 'Numeric_Type_Value_7',
83 'Numeric_Type_Value_8',
87 'Simple_Uppercase_Mapping',
88 'Simple_Lowercase_Mapping',
89 'Simple_Titlecase_Mapping'
96 while ( ( $line = fgets( $file ) ) !==
false ) {
100 $line = trim( substr( $line, 0, strcspn( $line,
'#' ) ) );
101 if ( $line ===
'' ) {
106 $numberedData = explode(
';', $line );
108 foreach ( $fieldNames as $number => $name ) {
109 $data[$name] = $numberedData[$number];
112 $code = base_convert( $data[
'Code'], 16, 10 );
113 if ( ( $code >= 0xFB50 && $code <= 0xFDFF ) # Arabic presentation forms A
114 || ( $code >= 0xFE70 && $code <= 0xFEFF ) # Arabic presentation forms B
116 if ( $data[
'Decomposition_Type_Mapping'] ===
'' ) {
120 if ( !preg_match(
'/^ *(<\w*>) +([0-9A-F ]*)$/',
121 $data[
'Decomposition_Type_Mapping'], $m )
123 $this->error(
"Can't parse Decomposition_Type/Mapping on line $lineNum" );
124 $this->error( $line );
128 $source = UtfNormal\Utils::hexSequenceToUtf8( $data[
'Code'] );
129 $dest = UtfNormal\Utils::hexSequenceToUtf8( $m[2] );
136 file_put_contents(
"$IP/includes/languages/data/NormalizeAr.php", $writer->writeClass(
139 'header' =>
'Generated by generateNormalizerDataAr.php. Do not modify!',
140 'namespace' =>
'MediaWiki\\Languages\\Data',
141 'class' =>
'NormalizeAr',
146 echo
"ar: " . count( $pairs ) .
" pairs written.\n";