Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
0.00% |
0 / 108 |
|
0.00% |
0 / 1 |
CRAP | n/a |
0 / 0 |
|
addRange | |
0.00% |
0 / 5 |
|
0.00% |
0 / 1 |
30 |
1 | #!/usr/bin/php |
2 | <?php |
3 | |
4 | if ( PHP_SAPI !== 'cli' && PHP_SAPI !== 'phpdbg' ) { |
5 | die( "This script may only be executed from the command line.\n" ); |
6 | } |
7 | |
8 | $chars = []; |
9 | for ( $i = 0; $i <= 0x10ffff; $i++ ) { |
10 | // Skip UTF-16 surrogates |
11 | if ( $i < 0xd800 || $i > 0xdfff ) { |
12 | $chars[$i] = mb_convert_encoding( pack( 'N', $i ), 'UTF-8', 'UTF-32BE' ); |
13 | } |
14 | } |
15 | |
16 | ### Uppercase and Lowercase mappings |
17 | echo "Creating upper and lower tables...\n"; |
18 | $L = fopen( __DIR__ . '/lower.lua', 'w' ); |
19 | if ( !$L ) { |
20 | die( "Failed to open lower.lua\n" ); |
21 | } |
22 | $U = fopen( __DIR__ . '/upper.lua', 'w' ); |
23 | if ( !$U ) { |
24 | die( "Failed to open upper.lua\n" ); |
25 | } |
26 | fprintf( $L, "-- This file is automatically generated by make-tables.php\n" ); |
27 | fprintf( $L, "return {\n" ); |
28 | fprintf( $U, "-- This file is automatically generated by make-tables.php\n" ); |
29 | fprintf( $U, "return {\n" ); |
30 | foreach ( $chars as $i => $c ) { |
31 | $l = mb_strtolower( $c, 'UTF-8' ); |
32 | $u = mb_strtoupper( $c, 'UTF-8' ); |
33 | if ( $c !== $l ) { |
34 | fprintf( $L, "\t[\"%s\"] = \"%s\",\n", $c, $l ); |
35 | } |
36 | if ( $c !== $u ) { |
37 | fprintf( $U, "\t[\"%s\"] = \"%s\",\n", $c, $u ); |
38 | } |
39 | } |
40 | fprintf( $L, "}\n" ); |
41 | fprintf( $U, "}\n" ); |
42 | fclose( $L ); |
43 | fclose( $U ); |
44 | |
45 | ### Pattern code mappings |
46 | echo "Creating charsets table...\n"; |
47 | $fh = fopen( __DIR__ . '/charsets.lua', 'w' ); |
48 | if ( !$fh ) { |
49 | die( "Failed to open charsets.lua\n" ); |
50 | } |
51 | $pats = [ |
52 | // These should match the expressions in UstringLibrary::patternToRegex() |
53 | 'a' => [ '\p{L}', 'lu' ], |
54 | 'c' => [ '\p{Cc}', null ], |
55 | 'd' => [ '\p{Nd}', null ], |
56 | 'l' => [ '\p{Ll}', null ], |
57 | 'p' => [ '\p{P}', null ], |
58 | 's' => [ '\p{Xps}', null ], |
59 | 'u' => [ '\p{Lu}', null ], |
60 | # '[\p{L}\p{Nd}]' exactly matches 'a' + 'd' |
61 | 'w' => [ null, 'da' ], |
62 | 'x' => [ '[0-9A-Fa-f0-9A-Fa-f]', null ], |
63 | 'z' => [ '\0', null ], |
64 | ]; |
65 | |
66 | $ranges = []; |
67 | |
68 | /** |
69 | * @param string $k |
70 | * @param int $start |
71 | * @param int $end |
72 | */ |
73 | function addRange( $k, $start, $end ) { // phpcs:ignore MediaWiki.NamingConventions.PrefixedGlobalFunctions |
74 | // phpcs:ignore MediaWiki.NamingConventions.ValidGlobalName |
75 | global $fh, $ranges; |
76 | // Speed/memory tradeoff |
77 | if ( !( $start >= 0x20 && $start < 0x7f ) && $end - $start >= 10 ) { |
78 | $ranges[$k][] = sprintf( "c >= 0x%06x and c < 0x%06x", $start, $end ); |
79 | } else { |
80 | for ( $i = $start; $i < $end; $i++ ) { |
81 | fprintf( $fh, "\t\t[0x%06x] = 1,\n", $i ); |
82 | } |
83 | } |
84 | } |
85 | |
86 | fprintf( $fh, "-- This file is automatically generated by make-tables.php\n" ); |
87 | fprintf( $fh, "local pats = {\n" ); |
88 | foreach ( $pats as $k => $pp ) { |
89 | $ranges[$k] = []; |
90 | $re = $pp[0]; |
91 | if ( !$re ) { |
92 | fprintf( $fh, "\t[0x%02x] = {},\n", ord( $k ) ); |
93 | continue; |
94 | } |
95 | |
96 | $re2 = 'fail'; |
97 | if ( $pp[1] ) { |
98 | $re2 = []; |
99 | foreach ( str_split( $pp[1] ) as $p ) { |
100 | $re2[] = $pats[$p][0]; |
101 | } |
102 | $re2 = implode( '|', $re2 ); |
103 | } |
104 | |
105 | fprintf( $fh, "\t[0x%02x] = {\n", ord( $k ) ); |
106 | $rstart = null; |
107 | foreach ( $chars as $i => $c ) { |
108 | if ( preg_match( "/^$re$/u", $c ) && !preg_match( "/^$re2$/u", $c ) ) { |
109 | $rstart ??= $i; |
110 | } elseif ( $rstart !== null ) { |
111 | addRange( $k, $rstart, $i ); |
112 | $rstart = null; |
113 | } |
114 | } |
115 | if ( $rstart !== null ) { |
116 | addRange( $k, $rstart, 0x110000 ); |
117 | } |
118 | fprintf( $fh, "\t},\n" ); |
119 | } |
120 | foreach ( $pats as $k => $pp ) { |
121 | $kk = strtoupper( $k ); |
122 | fprintf( $fh, "\t[0x%02x] = {},\n", ord( $kk ) ); |
123 | } |
124 | fprintf( $fh, "}\n" ); |
125 | foreach ( $pats as $k => $pp ) { |
126 | $body = ''; |
127 | $check = []; |
128 | if ( $pp[1] ) { |
129 | foreach ( str_split( $pp[1] ) as $p ) { |
130 | $check[] = sprintf( "pats[0x%02x][k]", ord( $p ) ); |
131 | } |
132 | } |
133 | // @phan-suppress-next-line PhanImpossibleConditionInGlobalScope |
134 | if ( $ranges[$k] ) { |
135 | $body = "\tlocal c = tonumber( k ) or 0/0;\n"; |
136 | $check = array_merge( $check, $ranges[$k] ); |
137 | } |
138 | if ( $check ) { |
139 | $body .= "\treturn " . implode( " or\n\t\t", $check ); |
140 | fprintf( $fh, "setmetatable( pats[0x%02x], { __index = function ( t, k )\n%s\nend } )\n", |
141 | ord( $k ), $body ); |
142 | } |
143 | } |
144 | foreach ( $pats as $k => $pp ) { |
145 | fprintf( $fh, "setmetatable( pats[0x%02x], { ", ord( strtoupper( $k ) ) ); |
146 | fprintf( $fh, "__index = function ( t, k ) return k and not pats[0x%02x][k] end", ord( $k ) ); |
147 | fprintf( $fh, " } )\n" ); |
148 | } |
149 | fprintf( $fh, "\n-- For speed, cache printable ASCII characters in main tables\n" ); |
150 | fprintf( $fh, "for k, t in pairs( pats ) do\n" ); |
151 | fprintf( $fh, "\tif k >= 0x61 then\n" ); |
152 | fprintf( $fh, "\t\tfor i = 0x20, 0x7e do\n" ); |
153 | fprintf( $fh, "\t\t\tt[i] = t[i] or false\n" ); |
154 | fprintf( $fh, "\t\tend\n" ); |
155 | fprintf( $fh, "\tend\n" ); |
156 | fprintf( $fh, "end\n" ); |
157 | fprintf( $fh, "\nreturn pats\n" ); |
158 | fclose( $fh ); |