MediaWiki REL1_34
make-tables.php
Go to the documentation of this file.
1#!/usr/bin/php
2<?php
3
4if ( PHP_SAPI !== 'cli' && PHP_SAPI !== 'phpdbg' ) {
5 die( "This script may only be executed from the command line.\n" );
6}
7
8$chars = [];
9for ( $i = 0; $i <= 0x10ffff; $i++ ) {
10 if ( $i < 0xd800 || $i > 0xdfff ) { // Skip UTF-16 surrogates
11 $chars[$i] = mb_convert_encoding( pack( 'N', $i ), 'UTF-8', 'UTF-32BE' );
12 }
13}
14
15### Uppercase and Lowercase mappings
16echo "Creating upper and lower tables...\n";
17$L = fopen( __DIR__ . '/lower.lua', 'w' );
18if ( !$L ) {
19 die( "Failed to open lower.lua\n" );
20}
21$U = fopen( __DIR__ . '/upper.lua', 'w' );
22if ( !$U ) {
23 die( "Failed to open upper.lua\n" );
24}
25fprintf( $L, "-- This file is automatically generated by make-tables.php\n" );
26fprintf( $L, "return {\n" );
27fprintf( $U, "-- This file is automatically generated by make-tables.php\n" );
28fprintf( $U, "return {\n" );
29foreach ( $chars as $i => $c ) {
30 $l = mb_strtolower( $c, 'UTF-8' );
31 $u = mb_strtoupper( $c, 'UTF-8' );
32 if ( $c !== $l ) {
33 fprintf( $L, "\t[\"%s\"] = \"%s\",\n", $c, $l );
34 }
35 if ( $c !== $u ) {
36 fprintf( $U, "\t[\"%s\"] = \"%s\",\n", $c, $u );
37 }
38}
39fprintf( $L, "}\n" );
40fprintf( $U, "}\n" );
41fclose( $L );
42fclose( $U );
43
44### Pattern code mappings
45echo "Creating charsets table...\n";
46$fh = fopen( __DIR__ . '/charsets.lua', 'w' );
47if ( !$fh ) {
48 die( "Failed to open charsets.lua\n" );
49}
51 // These should match the expressions in UstringLibrary::patternToRegex()
52 'a' => [ '\p{L}', 'lu' ],
53 'c' => [ '\p{Cc}', null ],
54 'd' => [ '\p{Nd}', null ],
55 'l' => [ '\p{Ll}', null ],
56 'p' => [ '\p{P}', null ],
57 's' => [ '\p{Xps}', null ],
58 'u' => [ '\p{Lu}', null ],
59 'w' => [ null, 'da' ], # '[\p{L}\p{Nd}]' exactly matches 'a' + 'd'
60 'x' => [ '[0-9A-Fa-f0-9A-Fa-f]', null ],
61 'z' => [ '\0', null ],
62];
63
65// @codingStandardsIgnoreLine MediaWiki.NamingConventions.PrefixedGlobalFunctions
66function addRange( $k, $start, $end ) {
67 // @codingStandardsIgnoreLine MediaWiki.NamingConventions.ValidGlobalName
68 global $fh, $ranges;
69 // Speed/memory tradeoff
70 if ( !( $start >= 0x20 && $start < 0x7f ) && $end - $start >= 10 ) {
71 $ranges[$k][] = sprintf( "c >= 0x%06x and c < 0x%06x", $start, $end );
72 } else {
73 for ( $i = $start; $i < $end; $i++ ) {
74 fprintf( $fh, "\t\t[0x%06x] = 1,\n", $i );
75 }
76 }
77}
78
79fprintf( $fh, "-- This file is automatically generated by make-tables.php\n" );
80fprintf( $fh, "local pats = {\n" );
81foreach ( $pats as $k => $pp ) {
82 $ranges[$k] = [];
83 $re = $pp[0];
84 if ( !$re ) {
85 fprintf( $fh, "\t[0x%02x] = {},\n", ord( $k ) );
86 continue;
87 }
88
89 $re2 = 'fail';
90 if ( $pp[1] ) {
91 $re2 = [];
92 foreach ( str_split( $pp[1] ) as $p ) {
93 $re2[] = $pats[$p][0];
94 }
95 $re2 = implode( '|', $re2 );
96 }
97
98 fprintf( $fh, "\t[0x%02x] = {\n", ord( $k ) );
99 $rstart = null;
100 foreach ( $chars as $i => $c ) {
101 if ( preg_match( "/^$re$/u", $c ) && !preg_match( "/^$re2$/u", $c ) ) {
102 if ( $rstart === null ) {
103 $rstart = $i;
104 }
105 } else {
106 if ( $rstart !== null ) {
107 addRange( $k, $rstart, $i );
108 $rstart = null;
109 }
110 }
111 }
112 if ( $rstart !== null ) {
113 addRange( $k, $rstart, 0x110000 );
114 }
115 fprintf( $fh, "\t},\n" );
116}
117foreach ( $pats as $k => $pp ) {
118 $kk = strtoupper( $k );
119 fprintf( $fh, "\t[0x%02x] = {},\n", ord( $kk ) );
120}
121fprintf( $fh, "}\n" );
122foreach ( $pats as $k => $pp ) {
123 $body = '';
124 $check = [];
125 if ( $pp[1] ) {
126 foreach ( str_split( $pp[1] ) as $p ) {
127 $check[] = sprintf( "pats[0x%02x][k]", ord( $p ) );
128 }
129 }
130 if ( $ranges[$k] ) {
131 $body = "\tlocal c = tonumber( k ) or 0/0;\n";
132 $check = array_merge( $check, $ranges[$k] );
133 }
134 if ( $check ) {
135 $body .= "\treturn " . implode( " or\n\t\t", $check );
136 fprintf( $fh, "setmetatable( pats[0x%02x], { __index = function ( t, k )\n%s\nend } )\n",
137 ord( $k ), $body );
138 }
139}
140foreach ( $pats as $k => $pp ) {
141 fprintf( $fh, "setmetatable( pats[0x%02x], { ", ord( strtoupper( $k ) ) );
142 fprintf( $fh, "__index = function ( t, k ) return k and not pats[0x%02x][k] end", ord( $k ) );
143 fprintf( $fh, " } )\n" );
144}
145fprintf( $fh, "\n-- For speed, cache printable ASCII characters in main tables\n" );
146fprintf( $fh, "for k, t in pairs( pats ) do\n" );
147fprintf( $fh, "\tif k >= 0x61 then\n" );
148fprintf( $fh, "\t\tfor i = 0x20, 0x7e do\n" );
149fprintf( $fh, "\t\t\tt[i] = t[i] or false\n" );
150fprintf( $fh, "\t\tend\n" );
151fprintf( $fh, "\tend\n" );
152fprintf( $fh, "end\n" );
153fprintf( $fh, "\nreturn pats\n" );
154fclose( $fh );
$fh
if(PHP_SAPI !=='cli' &&PHP_SAPI !=='phpdbg' $chars)
addRange( $k, $start, $end)
if(! $L) $U
$ranges
if(! $fh) $pats