Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
100.00% |
106 / 106 |
|
100.00% |
5 / 5 |
CRAP | |
100.00% |
1 / 1 |
GenerateEquivset | |
100.00% |
106 / 106 |
|
100.00% |
5 / 5 |
29 | |
100.00% |
1 / 1 |
__construct | |
100.00% |
3 / 3 |
|
100.00% |
1 / 1 |
3 | |||
configure | |
100.00% |
4 / 4 |
|
100.00% |
1 / 1 |
1 | |||
execute | |
100.00% |
87 / 87 |
|
100.00% |
1 / 1 |
20 | |||
compareCodePoints | |
100.00% |
5 / 5 |
|
100.00% |
1 / 1 |
3 | |||
generatePHP | |
100.00% |
7 / 7 |
|
100.00% |
1 / 1 |
2 |
1 | <?php |
2 | /** |
3 | * This program is free software; you can redistribute it and/or modify |
4 | * it under the terms of the GNU General Public License as published by |
5 | * the Free Software Foundation; either version 2 of the License, or |
6 | * (at your option) any later version. |
7 | * |
8 | * This program is distributed in the hope that it will be useful, |
9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
11 | * GNU General Public License for more details. |
12 | * |
13 | * You should have received a copy of the GNU General Public License along |
14 | * with this program; if not, write to the Free Software Foundation, Inc., |
15 | * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. |
16 | * http://www.gnu.org/copyleft/gpl.html |
17 | */ |
18 | |
19 | namespace Wikimedia\Equivset\Command; |
20 | |
21 | use RuntimeException; |
22 | use Symfony\Component\Console\Command\Command; |
23 | use Symfony\Component\Console\Input\InputInterface; |
24 | use Symfony\Component\Console\Output\OutputInterface; |
25 | |
26 | /** |
27 | * Generate Equivset Command. |
28 | */ |
29 | class GenerateEquivset extends Command { |
30 | |
31 | /** |
32 | * @var string |
33 | */ |
34 | protected string $dataDir; |
35 | |
36 | /** |
37 | * @var string |
38 | */ |
39 | protected string $distDir; |
40 | |
41 | /** |
42 | * Generate Equivset |
43 | * |
44 | * @param string $dataDir Data Directory |
45 | * @param string $distDir Distribution Directory |
46 | */ |
47 | public function __construct( string $dataDir = '', string $distDir = '' ) { |
48 | parent::__construct(); |
49 | |
50 | $this->dataDir = $dataDir ?: __DIR__ . '/../../data'; |
51 | $this->distDir = $distDir ?: __DIR__ . '/../../dist'; |
52 | } |
53 | |
54 | /** {@inheritdoc} */ |
55 | protected function configure() { |
56 | $this->setName( 'generate-equivset' ); |
57 | $this->setDescription( |
58 | 'Generate the JSON, PHP, and plain text versions of the equivset in `./dist`' |
59 | ); |
60 | } |
61 | |
62 | /** {@inheritdoc} */ |
63 | public function execute( InputInterface $input, OutputInterface $output ) { |
64 | // phpcs:ignore Generic.PHP.NoSilencedErrors |
65 | $fp = @fopen( $this->dataDir . '/equivset.in', 'rb' ); |
66 | if ( $fp === false ) { |
67 | throw new RuntimeException( "Unable to open equivset.in" ); |
68 | } |
69 | |
70 | $lineNum = 0; |
71 | $setsByChar = []; |
72 | $sets = []; |
73 | $exitStatus = 0; |
74 | $lastChar = null; |
75 | |
76 | // phpcs:ignore Generic.CodeAnalysis.AssignmentInCondition.FoundInWhileCondition |
77 | while ( ( $line = fgets( $fp ) ) !== false ) { |
78 | $lineNum++; |
79 | $line = trim( $line ); |
80 | |
81 | # Filter comments |
82 | if ( !$line || $line[0] == '#' ) { |
83 | continue; |
84 | } |
85 | |
86 | # Process line |
87 | if ( !preg_match( |
88 | '/^(?P<hexleft>[0-9A-F]+) +(?P<charleft>.) +=> +' . |
89 | '(?:(?P<hexright>[0-9A-F]+) +(?P<charright>.)|(?P<invisible>invisible)|(?P<space>space))$/u', |
90 | $line, $m, PREG_UNMATCHED_AS_NULL |
91 | ) |
92 | ) { |
93 | $output->writeln( "<error>Error: invalid entry at line $lineNum: $line</error>" ); |
94 | $exitStatus = 1; |
95 | continue; |
96 | } |
97 | $error = false; |
98 | |
99 | if ( mb_chr( hexdec( $m['hexleft'] ) ) !== $m['charleft'] ) { |
100 | $actual = strtoupper( dechex( mb_ord( $m['charleft'] ) ) ); |
101 | $output->writeln( "<error>Error: left number ({$m['hexleft']}) does not match left " . |
102 | "character ($actual) at line $lineNum: $line</error>" ); |
103 | $error = true; |
104 | } |
105 | if ( isset( $m['invisible'] ) ) { |
106 | $m['charright'] = ''; |
107 | } elseif ( isset( $m['space'] ) ) { |
108 | $m['charright'] = ' '; |
109 | } elseif ( mb_chr( hexdec( $m['hexright'] ) ) !== $m['charright'] ) { |
110 | $actual = strtoupper( dechex( mb_ord( $m['charright'] ) ) ); |
111 | $output->writeln( "<error>Error: right number ({$m['hexright']}) does not match right " . |
112 | "character ($actual) at line $lineNum: $line</error>" ); |
113 | $error = true; |
114 | } |
115 | if ( $m['charleft'] === $m['charright'] ) { |
116 | $output->writeln( "<error>Error: {$m['hexright']} maps to itself</error>" ); |
117 | $error = true; |
118 | } |
119 | if ( isset( $setsByChar[$m['charleft']] ) ) { |
120 | $output->writeln( "<error>Error: Duplicate character ({$m['charleft']}) " . |
121 | "at line $lineNum: $line</error>" ); |
122 | $error = true; |
123 | } |
124 | if ( $lastChar !== null && $m['charleft'] < $lastChar ) { |
125 | $output->writeln( "<error>Error: Characters not in order based on hex-value ({$m['charleft']}) " . |
126 | "at line $lineNum: $line</error>" ); |
127 | $error = true; |
128 | } else { |
129 | $lastChar = $m['charleft']; |
130 | } |
131 | if ( $error ) { |
132 | $exitStatus = 1; |
133 | continue; |
134 | } |
135 | |
136 | # Find the set for the right character, add a new one if necessary |
137 | $setName = $setsByChar[$m['charright']] ?? $m['charright']; |
138 | |
139 | // Circle detected, one edge in every circle is redundant and can just be ignored |
140 | if ( $setName === $m['charleft'] ) { |
141 | continue; |
142 | } |
143 | |
144 | $sets[$setName] ??= [ $setName ]; |
145 | |
146 | // When a mapping between two chars exists before one of them gets the final set, a merge is needed |
147 | if ( isset( $sets[$m['charleft']] ) ) { |
148 | foreach ( $sets[$m['charleft']] as $char ) { |
149 | $setsByChar[$char] = $setName; |
150 | $sets[$setName][] = $char; |
151 | } |
152 | unset( $sets[$m['charleft']] ); |
153 | } else { |
154 | $setsByChar[$m['charleft']] = $setName; |
155 | $sets[$setName][] = $m['charleft']; |
156 | } |
157 | } |
158 | |
159 | $header = [ |
160 | 'This file is generated by `bin/console generate-equivset`', |
161 | 'It contains a map of characters, encoded in UTF-8, such that running', |
162 | 'strtr() on a string with this map will cause confusable characters to', |
163 | 'be reduced to a canonical representation.', |
164 | ]; |
165 | |
166 | // JSON |
167 | $data = json_encode( |
168 | [ '_readme' => implode( ' ', $header ) ] + $setsByChar, |
169 | JSON_UNESCAPED_SLASHES | JSON_PRETTY_PRINT | JSON_UNESCAPED_UNICODE |
170 | ); |
171 | $data = preg_replace( '/^ +/m', "\t", $data ); |
172 | file_put_contents( $this->distDir . '/equivset.json', $data ); |
173 | |
174 | // PHP file |
175 | file_put_contents( $this->distDir . '/equivset.php', self::generatePHP( $setsByChar, $header ) ); |
176 | |
177 | // Text File. |
178 | uksort( $sets, [ self::class, 'compareCodePoints' ] ); |
179 | touch( $this->distDir . '/equivset.txt' ); |
180 | $textFile = fopen( $this->distDir . '/equivset.txt', 'w' ); |
181 | foreach ( $sets as $members ) { |
182 | $setName = array_shift( $members ); |
183 | usort( $members, [ self::class, 'compareCodePoints' ] ); |
184 | fwrite( $textFile, $setName . ' ' . implode( ' ', $members ) . "\n" ); |
185 | } |
186 | fclose( $textFile ); |
187 | |
188 | if ( $exitStatus > 0 ) { |
189 | $output->writeln( '<error>Finished with errors</error>' ); |
190 | } else { |
191 | $output->writeln( '<info>Finished</info>' ); |
192 | } |
193 | |
194 | return $exitStatus; |
195 | } |
196 | |
197 | private static function compareCodePoints( string $a, string $b ): int { |
198 | if ( $a === '' ) { |
199 | return -1; |
200 | } elseif ( $b === '' ) { |
201 | return 1; |
202 | } |
203 | return mb_ord( $a ) - mb_ord( $b ); |
204 | } |
205 | |
206 | /** |
207 | * @param string[] $data |
208 | * @param string[] $header |
209 | * @return string |
210 | */ |
211 | private static function generatePHP( array $data, array $header ): string { |
212 | $s = "<?php\n" |
213 | . "// " . implode( "\n// ", $header ) . "\n" |
214 | . "return [\n"; |
215 | foreach ( $data as $key => $value ) { |
216 | $s .= "\t" . var_export( (string)$key, true ) . ' => ' . var_export( $value, true ) . ",\n"; |
217 | } |
218 | $s .= "];\n"; |
219 | return $s; |
220 | } |
221 | } |