MediaWiki  1.34.0
make-tables.php
Go to the documentation of this file.
1 #!/usr/bin/php
2 <?php
3 
4 if ( PHP_SAPI !== 'cli' && PHP_SAPI !== 'phpdbg' ) {
5  die( "This script may only be executed from the command line.\n" );
6 }
7 
8 $chars = [];
9 for ( $i = 0; $i <= 0x10ffff; $i++ ) {
10  if ( $i < 0xd800 || $i > 0xdfff ) { // Skip UTF-16 surrogates
11  $chars[$i] = mb_convert_encoding( pack( 'N', $i ), 'UTF-8', 'UTF-32BE' );
12  }
13 }
14 
15 ### Uppercase and Lowercase mappings
16 echo "Creating upper and lower tables...\n";
17 $L = fopen( __DIR__ . '/lower.lua', 'w' );
18 if ( !$L ) {
19  die( "Failed to open lower.lua\n" );
20 }
21 $U = fopen( __DIR__ . '/upper.lua', 'w' );
22 if ( !$U ) {
23  die( "Failed to open upper.lua\n" );
24 }
25 fprintf( $L, "-- This file is automatically generated by make-tables.php\n" );
26 fprintf( $L, "return {\n" );
27 fprintf( $U, "-- This file is automatically generated by make-tables.php\n" );
28 fprintf( $U, "return {\n" );
29 foreach ( $chars as $i => $c ) {
30  $l = mb_strtolower( $c, 'UTF-8' );
31  $u = mb_strtoupper( $c, 'UTF-8' );
32  if ( $c !== $l ) {
33  fprintf( $L, "\t[\"%s\"] = \"%s\",\n", $c, $l );
34  }
35  if ( $c !== $u ) {
36  fprintf( $U, "\t[\"%s\"] = \"%s\",\n", $c, $u );
37  }
38 }
39 fprintf( $L, "}\n" );
40 fprintf( $U, "}\n" );
41 fclose( $L );
42 fclose( $U );
43 
44 ### Pattern code mappings
45 echo "Creating charsets table...\n";
46 $fh = fopen( __DIR__ . '/charsets.lua', 'w' );
47 if ( !$fh ) {
48  die( "Failed to open charsets.lua\n" );
49 }
50 $pats = [
51  // These should match the expressions in UstringLibrary::patternToRegex()
52  'a' => [ '\p{L}', 'lu' ],
53  'c' => [ '\p{Cc}', null ],
54  'd' => [ '\p{Nd}', null ],
55  'l' => [ '\p{Ll}', null ],
56  'p' => [ '\p{P}', null ],
57  's' => [ '\p{Xps}', null ],
58  'u' => [ '\p{Lu}', null ],
59  'w' => [ null, 'da' ], # '[\p{L}\p{Nd}]' exactly matches 'a' + 'd'
60  'x' => [ '[0-9A-Fa-f0-9A-Fa-f]', null ],
61  'z' => [ '\0', null ],
62 ];
63 
64 $ranges = [];
65 // @codingStandardsIgnoreLine MediaWiki.NamingConventions.PrefixedGlobalFunctions
66 function addRange( $k, $start, $end ) {
67  // @codingStandardsIgnoreLine MediaWiki.NamingConventions.ValidGlobalName
68  global $fh, $ranges;
69  // Speed/memory tradeoff
70  if ( !( $start >= 0x20 && $start < 0x7f ) && $end - $start >= 10 ) {
71  $ranges[$k][] = sprintf( "c >= 0x%06x and c < 0x%06x", $start, $end );
72  } else {
73  for ( $i = $start; $i < $end; $i++ ) {
74  fprintf( $fh, "\t\t[0x%06x] = 1,\n", $i );
75  }
76  }
77 }
78 
79 fprintf( $fh, "-- This file is automatically generated by make-tables.php\n" );
80 fprintf( $fh, "local pats = {\n" );
81 foreach ( $pats as $k => $pp ) {
82  $ranges[$k] = [];
83  $re = $pp[0];
84  if ( !$re ) {
85  fprintf( $fh, "\t[0x%02x] = {},\n", ord( $k ) );
86  continue;
87  }
88 
89  $re2 = 'fail';
90  if ( $pp[1] ) {
91  $re2 = [];
92  foreach ( str_split( $pp[1] ) as $p ) {
93  $re2[] = $pats[$p][0];
94  }
95  $re2 = implode( '|', $re2 );
96  }
97 
98  fprintf( $fh, "\t[0x%02x] = {\n", ord( $k ) );
99  $rstart = null;
100  foreach ( $chars as $i => $c ) {
101  if ( preg_match( "/^$re$/u", $c ) && !preg_match( "/^$re2$/u", $c ) ) {
102  if ( $rstart === null ) {
103  $rstart = $i;
104  }
105  } else {
106  if ( $rstart !== null ) {
107  addRange( $k, $rstart, $i );
108  $rstart = null;
109  }
110  }
111  }
112  if ( $rstart !== null ) {
113  addRange( $k, $rstart, 0x110000 );
114  }
115  fprintf( $fh, "\t},\n" );
116 }
117 foreach ( $pats as $k => $pp ) {
118  $kk = strtoupper( $k );
119  fprintf( $fh, "\t[0x%02x] = {},\n", ord( $kk ) );
120 }
121 fprintf( $fh, "}\n" );
122 foreach ( $pats as $k => $pp ) {
123  $body = '';
124  $check = [];
125  if ( $pp[1] ) {
126  foreach ( str_split( $pp[1] ) as $p ) {
127  $check[] = sprintf( "pats[0x%02x][k]", ord( $p ) );
128  }
129  }
130  if ( $ranges[$k] ) {
131  $body = "\tlocal c = tonumber( k ) or 0/0;\n";
132  $check = array_merge( $check, $ranges[$k] );
133  }
134  if ( $check ) {
135  $body .= "\treturn " . implode( " or\n\t\t", $check );
136  fprintf( $fh, "setmetatable( pats[0x%02x], { __index = function ( t, k )\n%s\nend } )\n",
137  ord( $k ), $body );
138  }
139 }
140 foreach ( $pats as $k => $pp ) {
141  fprintf( $fh, "setmetatable( pats[0x%02x], { ", ord( strtoupper( $k ) ) );
142  fprintf( $fh, "__index = function ( t, k ) return k and not pats[0x%02x][k] end", ord( $k ) );
143  fprintf( $fh, " } )\n" );
144 }
145 fprintf( $fh, "\n-- For speed, cache printable ASCII characters in main tables\n" );
146 fprintf( $fh, "for k, t in pairs( pats ) do\n" );
147 fprintf( $fh, "\tif k >= 0x61 then\n" );
148 fprintf( $fh, "\t\tfor i = 0x20, 0x7e do\n" );
149 fprintf( $fh, "\t\t\tt[i] = t[i] or false\n" );
150 fprintf( $fh, "\t\tend\n" );
151 fprintf( $fh, "\tend\n" );
152 fprintf( $fh, "end\n" );
153 fprintf( $fh, "\nreturn pats\n" );
154 fclose( $fh );
$pats
if(! $fh) $pats
Definition: make-tables.php:50
$ranges
$ranges
Definition: make-tables.php:64
$L
$L
Definition: make-tables.php:17
$fh
$fh
Definition: make-tables.php:46
$chars
if(PHP_SAPI !=='cli' &&PHP_SAPI !=='phpdbg') $chars
Definition: make-tables.php:8
$U
if(! $L) $U
Definition: make-tables.php:21
addRange
addRange( $k, $start, $end)
Definition: make-tables.php:66