4 if ( PHP_SAPI !==
'cli' && PHP_SAPI !==
'phpdbg' ) {
5 die(
"This script may only be executed from the command line.\n" );
9 for ( $i = 0; $i <= 0x10ffff; $i++ ) {
10 if ( $i < 0xd800 || $i > 0xdfff ) {
11 $chars[$i] = mb_convert_encoding( pack(
'N', $i ),
'UTF-8',
'UTF-32BE' );
15 ### Uppercase and Lowercase mappings
16 echo
"Creating upper and lower tables...\n";
17 $L = fopen( __DIR__ .
'/lower.lua',
'w' );
19 die(
"Failed to open lower.lua\n" );
21 $U = fopen( __DIR__ .
'/upper.lua',
'w' );
23 die(
"Failed to open upper.lua\n" );
25 fprintf(
$L,
"-- This file is automatically generated by make-tables.php\n" );
26 fprintf(
$L,
"return {\n" );
27 fprintf(
$U,
"-- This file is automatically generated by make-tables.php\n" );
28 fprintf(
$U,
"return {\n" );
29 foreach (
$chars as $i => $c ) {
30 $l = mb_strtolower( $c,
'UTF-8' );
31 $u = mb_strtoupper( $c,
'UTF-8' );
33 fprintf(
$L,
"\t[\"%s\"] = \"%s\",\n", $c, $l );
36 fprintf(
$U,
"\t[\"%s\"] = \"%s\",\n", $c, $u );
44 ### Pattern code mappings
45 echo
"Creating charsets table...\n";
46 $fh = fopen( __DIR__ .
'/charsets.lua',
'w' );
48 die(
"Failed to open charsets.lua\n" );
52 'a' => [
'\p{L}',
'lu' ],
53 'c' => [
'\p{Cc}', null ],
54 'd' => [
'\p{Nd}', null ],
55 'l' => [
'\p{Ll}', null ],
56 'p' => [
'\p{P}', null ],
57 's' => [
'\p{Xps}', null ],
58 'u' => [
'\p{Lu}', null ],
59 'w' => [
null,
'da' ], #
'[\p{L}\p{Nd}]' exactly matches
'a' +
'd'
60 'x' => [
'[0-9A-Fa-f0-9A-Fa-f]', null ],
61 'z' => [
'\0', null ],
70 if ( !( $start >= 0x20 && $start < 0x7f ) && $end - $start >= 10 ) {
71 $ranges[$k][] = sprintf(
"c >= 0x%06x and c < 0x%06x", $start, $end );
73 for ( $i = $start; $i < $end; $i++ ) {
74 fprintf(
$fh,
"\t\t[0x%06x] = 1,\n", $i );
79 fprintf(
$fh,
"-- This file is automatically generated by make-tables.php\n" );
80 fprintf(
$fh,
"local pats = {\n" );
81 foreach (
$pats as $k => $pp ) {
85 fprintf(
$fh,
"\t[0x%02x] = {},\n", ord( $k ) );
92 foreach ( str_split( $pp[1] ) as $p ) {
93 $re2[] =
$pats[$p][0];
95 $re2 = implode(
'|', $re2 );
98 fprintf(
$fh,
"\t[0x%02x] = {\n", ord( $k ) );
100 foreach (
$chars as $i => $c ) {
101 if ( preg_match(
"/^$re$/u", $c ) && !preg_match(
"/^$re2$/u", $c ) ) {
102 if ( $rstart ===
null ) {
106 if ( $rstart !==
null ) {
112 if ( $rstart !==
null ) {
115 fprintf(
$fh,
"\t},\n" );
117 foreach (
$pats as $k => $pp ) {
118 $kk = strtoupper( $k );
119 fprintf(
$fh,
"\t[0x%02x] = {},\n", ord( $kk ) );
121 fprintf(
$fh,
"}\n" );
122 foreach (
$pats as $k => $pp ) {
126 foreach ( str_split( $pp[1] ) as $p ) {
127 $check[] = sprintf(
"pats[0x%02x][k]", ord( $p ) );
131 $body =
"\tlocal c = tonumber( k ) or 0/0;\n";
132 $check = array_merge( $check,
$ranges[$k] );
135 $body .=
"\treturn " . implode(
" or\n\t\t", $check );
136 fprintf(
$fh,
"setmetatable( pats[0x%02x], { __index = function ( t, k )\n%s\nend } )\n",
140 foreach (
$pats as $k => $pp ) {
141 fprintf(
$fh,
"setmetatable( pats[0x%02x], { ", ord( strtoupper( $k ) ) );
142 fprintf(
$fh,
"__index = function ( t, k ) return k and not pats[0x%02x][k] end", ord( $k ) );
143 fprintf(
$fh,
" } )\n" );
145 fprintf(
$fh,
"\n-- For speed, cache printable ASCII characters in main tables\n" );
146 fprintf(
$fh,
"for k, t in pairs( pats ) do\n" );
147 fprintf(
$fh,
"\tif k >= 0x61 then\n" );
148 fprintf(
$fh,
"\t\tfor i = 0x20, 0x7e do\n" );
149 fprintf(
$fh,
"\t\t\tt[i] = t[i] or false\n" );
150 fprintf(
$fh,
"\t\tend\n" );
151 fprintf(
$fh,
"\tend\n" );
152 fprintf(
$fh,
"end\n" );
153 fprintf(
$fh,
"\nreturn pats\n" );