Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
0.00% |
0 / 1076 |
|
0.00% |
0 / 3 |
CRAP | |
0.00% |
0 / 1 |
MhchemStateMachines | |
0.00% |
0 / 1076 |
|
0.00% |
0 / 3 |
16002 | |
0.00% |
0 / 1 |
mhchemCreateTransitions | |
0.00% |
0 / 25 |
|
0.00% |
0 / 1 |
182 | |||
getGenericActions | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
__construct | |
0.00% |
0 / 1050 |
|
0.00% |
0 / 1 |
12656 |
1 | <?php |
2 | /** |
3 | * Copyright (c) 2023 Johannes Stegmüller |
4 | * |
5 | * This file is a port of mhchemParser originally authored by Martin Hensel in javascript/typescript. |
6 | * The original license for this software can be found in the accompanying LICENSE.mhchemParser-ts.txt file. |
7 | */ |
8 | |
9 | declare( strict_types = 1 ); |
10 | |
11 | namespace MediaWiki\Extension\Math\WikiTexVC\Mhchem; |
12 | |
13 | use Closure; |
14 | use MediaWiki\Extension\Math\WikiTexVC\MHChem\MhchemUtil as MU; |
15 | use RuntimeException; |
16 | |
17 | /** |
18 | * Contains all state machines (~l.506) and genericActions (~l.465) as well as the mhchemCreateTransitions (~l.47) |
19 | * function. |
20 | * These can be found in the mentioned lines in mhchemParser.js by Martin Hensel. |
21 | * |
22 | * Notes: |
23 | * PhanParamTooMany and PhanParamTooFew warnings are suppressed in some cases. |
24 | * These are known false positive warnings for closure call from array with constant keys. |
25 | * https://github.com/phan/phan/issues/4579 |
26 | * |
27 | * @author Johannes Stegmüller |
28 | * @license GPL-2.0-or-later |
29 | */ |
30 | class MhchemStateMachines { |
31 | |
32 | /** @var array|array[] */ |
33 | public array $stateMachines; |
34 | |
35 | /** |
36 | * @var array|Closure[] |
37 | */ |
38 | private array $genericActions; |
39 | |
40 | /** @var MhchemParser */ |
41 | private MhchemParser $mhchemParser; |
42 | |
43 | private static function mhchemCreateTransitions( $o ): array { |
44 | $transitions = []; |
45 | // 1. Collect all states |
46 | foreach ( $o as $pattern => $d1 ) { |
47 | foreach ( $d1 as $state => $d2 ) { |
48 | $stateArray = preg_split( "/\|/", strval( $state ), -1, PREG_SPLIT_NO_EMPTY ); |
49 | $o[$pattern][$state]["stateArray"] = $stateArray; |
50 | for ( $i = 0; $i < count( $stateArray ); $i++ ) { |
51 | $transitions[$stateArray[$i]] = []; |
52 | } |
53 | } |
54 | } |
55 | |
56 | // 2. Fill states |
57 | foreach ( $o as $pattern => $d1 ) { |
58 | foreach ( $d1 as $d2 ) { |
59 | $stateArray = $d2["stateArray"] ?? []; |
60 | |
61 | for ( $i = 0; $i < count( $stateArray ); $i++ ) { |
62 | // 2a. Normalize actions into array: 'text=' ==> [{type_:'text='}] |
63 | $p = $d2; |
64 | if ( is_string( $p["action_"] ) ) { |
65 | $p["action_"] = [ $p["action_"] ]; |
66 | } |
67 | $p["action_"] = array_merge( [], $p["action_"] ); |
68 | |
69 | foreach ( $p["action_"] as $key => $action ) { |
70 | if ( is_string( $action ) ) { |
71 | $p["action_"][$key] = [ "type_" => $p["action_"][$key] ]; |
72 | } |
73 | } |
74 | |
75 | // 2.b Multi-insert |
76 | $patternArray = preg_split( "/\|/", strval( $pattern ), -1, PREG_SPLIT_NO_EMPTY ); |
77 | for ( $j = 0; $j < count( $patternArray ); $j++ ) { |
78 | if ( $stateArray[$i] === '*' ) { |
79 | // insert into all |
80 | foreach ( $transitions as $t => $dEmpty ) { |
81 | $transitions[$t][] = [ "pattern" => $patternArray[$j], "task" => $p ]; |
82 | } |
83 | } else { |
84 | $transitions[$stateArray[$i]][] = [ "pattern" => $patternArray[$j], "task" => $p ]; |
85 | } |
86 | } |
87 | } |
88 | } |
89 | } |
90 | |
91 | return $transitions; |
92 | } |
93 | |
94 | /** |
95 | * @return array |
96 | */ |
97 | public function getGenericActions(): array { |
98 | return $this->genericActions; |
99 | } |
100 | |
101 | /** |
102 | * Initialize arrays for genericActions and StateMachines with mhchemCreateTransitions. |
103 | * @param-taint $mhchemParser none |
104 | */ |
105 | public function __construct( MhchemParser $mhchemParser ) { |
106 | $this->mhchemParser = $mhchemParser; |
107 | $this->genericActions = [ |
108 | 'a=' => static function ( &$buffer, $m ) { |
109 | $buffer["a"] = ( $buffer["a"] ?? "" ) . $m; |
110 | return null; |
111 | }, |
112 | 'b=' => static function ( &$buffer, $m ) { |
113 | $buffer["b"] = ( $buffer["b"] ?? "" ) . $m; |
114 | return null; |
115 | }, |
116 | 'p=' => static function ( &$buffer, $m ) { |
117 | $buffer["p"] = ( $buffer["p"] ?? "" ) . $m; |
118 | return null; |
119 | }, |
120 | 'o=' => static function ( &$buffer, $m ) { |
121 | $buffer["o"] = ( $buffer["o"] ?? "" ) . $m; |
122 | return null; |
123 | }, |
124 | 'o=+p1' => static function ( &$buffer, $m, $a ) { |
125 | $buffer["o"] = ( $buffer["o"] ?? "" ) . $a; |
126 | return null; |
127 | }, |
128 | 'q=' => static function ( &$buffer, $m ) { |
129 | $buffer["q"] = ( $buffer["q"] ?? "" ) . $m; |
130 | return null; |
131 | }, |
132 | 'd=' => static function ( &$buffer, $m ) { |
133 | $buffer["d"] = ( $buffer["d"] ?? "" ) . $m; |
134 | return null; |
135 | }, |
136 | 'rm=' => static function ( &$buffer, $m ) { |
137 | $buffer["rm"] = ( $buffer["rm"] ?? "" ) . $m; |
138 | return null; |
139 | }, |
140 | 'text=' => static function ( &$buffer, $m ) { |
141 | $buffer["text_"] = ( $buffer["text_"] ?? "" ) . $m; |
142 | return null; |
143 | }, |
144 | 'insert' => static function ( &$_buffer, $_m, string $a ) { |
145 | return [ "type_" => $a ]; |
146 | }, |
147 | 'insert+p1' => static function ( &$_buffer, $m, $a ) { |
148 | return [ "type_" => $a, "p1" => $m ]; |
149 | }, |
150 | 'insert+p1+p2' => static function ( &$_buffer, $m, $a ) { |
151 | return [ "type_" => $a, "p1" => $m[0], "p2" => $m[1] ]; |
152 | }, |
153 | 'copy' => static function ( &$_buffer, $m ) { |
154 | return $m; |
155 | }, |
156 | 'write' => static function ( &$_buffer, $_m, string $a ) { |
157 | return $a; |
158 | }, |
159 | 'rm' => static function ( &$_buffer, $m ) { |
160 | return [ "type_" => 'rm', "p1" => $m ]; |
161 | }, |
162 | 'text' => function ( &$_buffer, $m ) { |
163 | return $this->mhchemParser->go( $m, 'text' ); |
164 | }, |
165 | 'tex-math' => function ( &$_buffer, $m ) { |
166 | return $this->mhchemParser->go( $m, 'tex-math' ); |
167 | }, |
168 | 'tex-math tight' => function ( &$_buffer, $m ) { |
169 | return $this->mhchemParser->go( $m, 'tex-math tight' ); |
170 | }, |
171 | 'bond' => static function ( &$_buffer, $m, $k ) { |
172 | // ?? ok ? |
173 | return [ "type_" => 'bond', "kind_" => $k ?? $m ]; |
174 | }, |
175 | 'color0-output' => static function ( &$_buffer, $m ) { |
176 | return [ "type_" => 'color0', "color" => $m ]; |
177 | }, |
178 | 'ce' => function ( &$_buffer, $m ) { |
179 | return $this->mhchemParser->go( $m, 'ce' ); |
180 | }, |
181 | 'pu' => function ( &$_buffer, $m ) { |
182 | return $this->mhchemParser->go( $m, 'pu' ); |
183 | }, |
184 | '1/2' => static function ( &$_buffer, $m ) { |
185 | $ret = []; |
186 | if ( preg_match( "/^[+\-]/", $m ) ) { |
187 | $ret[] = substr( $m, 0, 1 ); |
188 | $m = substr( $m, 1 ); |
189 | } |
190 | $matches = []; |
191 | $n = preg_match( "/^([0-9]+|\\\$[a-z]\\\$|[a-z])\/([0-9]+)(\\\$[a-z]\\\$|[a-z])?$/", |
192 | $m, $matches ); |
193 | if ( !$n || count( $matches ) < 3 ) { |
194 | throw new RuntimeException( "No Result by regex in '1/2' genericAction" ); |
195 | } |
196 | $matches[1] = preg_replace( "/\\\$/", "", $matches[1] ); |
197 | |
198 | $ret[] = [ "type_" => 'frac', "p1" => $matches[1], "p2" => $matches[2] ]; |
199 | |
200 | if ( isset( $matches[3] ) ) { |
201 | $matches[3] = preg_replace( "/\\\$/", "", $matches[3] ); |
202 | $ret[] = [ "type_" => 'tex-math', "p1" => $matches[3] ]; |
203 | } |
204 | return $ret; |
205 | }, |
206 | '9,9' => function ( &$_buffer, $m ) { |
207 | return $this->mhchemParser->go( $m, '9,9' ); |
208 | } |
209 | ]; |
210 | |
211 | $this->stateMachines = [ |
212 | "tex" => [ |
213 | "transitions" => self::mhchemCreateTransitions( [ |
214 | "empty" => [ "0" => [ "action_" => [ "copy" ] ] ], |
215 | "\\ce{(...)}" => [ "0" => [ "action_" => [ [ "type_" => "write", "option" => "{" ], |
216 | "ce", [ "type_" => "write", "option" => "}" ] ] ] ], |
217 | "\\pu{(...)}" => [ "0" => [ "action_" => [ [ "type_" => "write", "option" => "{" ], |
218 | "pu", [ "type_" => "write", "option" => "}" ] ] ] ], |
219 | "else" => [ "0" => [ "action_" => [ "copy" ] ] ] |
220 | ] ), |
221 | "actions" => [] |
222 | ], |
223 | "ce" => [ |
224 | "transitions" => self::mhchemCreateTransitions( [ |
225 | "empty" => [ "*" => [ "action_" => "output" ] ], |
226 | "else" => [ "0|1|2" => [ "action_" => "beginsWithBond=false", |
227 | "revisit" => true, "toContinue" => true ] ], |
228 | "oxidation$" => [ "0" => [ "action_" => 'oxidation-output' ] ], |
229 | "CMT" => [ 'r' => [ "action_" => "rdt=", "nextState" => "rt" ], |
230 | "rd" => [ "action_" => "rqt=", "nextState" => "rdt" ] ], |
231 | "arrowUpDown" => [ '0|1|2|as' => |
232 | [ "action_" => [ 'sb=false', 'output', 'operator' ], "nextState" => '1' ] ], |
233 | "uprightEntities" => [ "0|1|2" => [ "action_" => [ 'o=', 'output' ], "nextState" => "1" ] ], |
234 | "orbital" => [ "0|1|2|3" => [ "action_" => "o=", "nextState" => "o" ] ], |
235 | "->" => [ "0|1|2|3" => [ "action_" => "r=", "nextState" => "r" ], |
236 | "a|as" => [ "action_" => [ 'output', 'r=' ], "nextState" => "r" ], |
237 | "*" => [ "action_" => [ 'output', 'r=' ], "nextState" => "r" ] ], |
238 | "+" => [ |
239 | "o" => [ "action_" => "d= kv", "nextState" => "d" ], |
240 | "d|D" => [ "action_" => 'd=', "nextState" => 'd' ], |
241 | "q" => [ "action_" => 'd=', "nextState" => 'qd' ], |
242 | "qd|qD" => [ "action_" => 'd=', "nextState" => 'qd' ], |
243 | "dq" => [ "action_" => [ 'output', 'd=' ], "nextState" => 'd' ], |
244 | "3" => [ "action_" => [ 'sb=false', 'output', 'operator' ], "nextState" => '0' ], |
245 | ], |
246 | "amount" => [ "0|2" => [ "action_" => "a=", "nextState" => "a" ] ], |
247 | "pm-operator" => [ "0|1|2|a|as" => [ "action_" => [ 'sb=false', 'output', |
248 | [ "type_" => 'operator', "option" => '\\pm' ] ], "nextState" => '0' ] ], |
249 | "operator" => [ "0|1|2|a|as" => |
250 | [ "action_" => [ 'sb=false', 'output', 'operator' ], "nextState" => '0' ] ], |
251 | "-$" => [ |
252 | "o|q" => [ "action_" => [ 'charge or bond', 'output' ], "nextState" => 'qd' ], |
253 | "d" => [ "action_" => 'd=', "nextState" => 'd' ], |
254 | "D" => [ "action_" => [ 'output', [ "type_" => "bond", |
255 | "option" => "-" ] ], "nextState" => '3' ], |
256 | "q" => [ "action_" => 'd=', "nextState" => 'qd' ], |
257 | "qd" => [ "action_" => 'd=', "nextState" => 'qd' ], |
258 | "qD|dq" => [ "action_" => [ 'output', |
259 | [ "type_" => "bond", "option" => "-" ] ], "nextState" => '3' ] |
260 | ], |
261 | "-9" => [ "3|o" => [ "action_" => |
262 | [ 'output', [ "type_" => "insert", "option" => "hyphen" ] ], "nextState" => '3' ] ], |
263 | '- orbital overlap' => [ |
264 | 'o' => [ "action_" => [ 'output', |
265 | [ "type_" => 'insert', "option" => 'hyphen' ] ], "nextState" => '2' ], |
266 | 'd' => [ "action_" => [ 'output', |
267 | [ "type_" => 'insert', "option" => 'hyphen' ] ], "nextState" => '2' ] ], |
268 | '-' => [ |
269 | '0|1|2' => [ "action_" => [ [ "type_" => 'output', "option" => 1 ], |
270 | 'beginsWithBond=true', [ "type_" => 'bond', "option" => "-" ] ], "nextState" => '3' ], |
271 | '3' => [ "action_" => [ [ "type_" => 'bond', "option" => "-" ] ] ], |
272 | 'a' => [ "action_" => [ 'output', |
273 | [ "type_" => 'insert', "option" => 'hyphen' ] ], "nextState" => '2' ], |
274 | 'as' => [ "action_" => [ [ "type_" => 'output', "option" => 2 ], |
275 | [ "type_" => 'bond', "option" => "-" ] ], "nextState" => '3' ], |
276 | 'b' => [ "action_" => 'b=' ], |
277 | 'o' => [ "action_" => [ [ "type_" => '- after o/d', "option" => false ] ], "nextState" => '2' ], |
278 | 'q' => [ "action_" => [ [ "type_" => '- after o/d', "option" => false ] ], "nextState" => '2' ], |
279 | 'd|qd|dq' => [ "action_" => |
280 | [ [ "type_" => '- after o/d', "option" => true ] ], "nextState" => '2' ], |
281 | 'D|qD|p' => [ "action_" => |
282 | [ 'output', [ "type_" => 'bond', "option" => "-" ] ], "nextState" => '3' ] ], |
283 | 'amount2' => [ |
284 | '1|3' => [ "action_" => 'a=', "nextState" => 'a' ] ], |
285 | 'letters' => [ |
286 | '0|1|2|3|a|as|b|p|bp|o' => [ "action_" => 'o=', "nextState" => 'o' ], |
287 | 'q|dq' => [ "action_" => [ 'output', 'o=' ], "nextState" => 'o' ], |
288 | 'd|D|qd|qD' => [ "action_" => 'o after d', "nextState" => 'o' ] ], |
289 | 'digits' => [ |
290 | 'o' => [ "action_" => 'q=', "nextState" => 'q' ], |
291 | 'd|D' => [ "action_" => 'q=', "nextState" => 'dq' ], |
292 | 'q' => [ "action_" => [ 'output', 'o=' ], "nextState" => 'o' ], |
293 | 'a' => [ "action_" => 'o=', "nextState" => 'o' ] ], |
294 | 'space A' => [ |
295 | 'b|p|bp' => [ "action_" => [] ] ], |
296 | 'space' => [ |
297 | 'a' => [ "action_" => [], "nextState" => 'as' ], |
298 | '0' => [ "action_" => 'sb=false' ], |
299 | '1|2' => [ "action_" => 'sb=true' ], |
300 | 'r|rt|rd|rdt|rdq' => [ "action_" => 'output', "nextState" => '0' ], |
301 | '*' => [ "action_" => [ 'output', 'sb=true' ], "nextState" => '1' ] ], |
302 | '1st-level escape' => [ |
303 | '1|2' => [ "action_" => [ 'output', |
304 | [ "type_" => 'insert+p1', "option" => '1st-level escape' ] ] ], |
305 | '*' => [ "action_" => [ 'output', |
306 | [ "type_" => 'insert+p1', "option" => '1st-level escape' ] ], "nextState" => '0' ] ], |
307 | '[(...)]' => [ |
308 | 'r|rt' => [ "action_" => 'rd=', "nextState" => 'rd' ], |
309 | 'rd|rdt' => [ "action_" => 'rq=', "nextState" => 'rdq' ] ], |
310 | '...' => [ |
311 | 'o|d|D|dq|qd|qD' => [ "action_" => |
312 | [ 'output', [ "type_" => 'bond', "option" => "..." ] ], "nextState" => '3' ], |
313 | '*' => [ "action_" => [ [ "type_" => 'output', "option" => 1 ], |
314 | [ "type_" => 'insert', "option" => 'ellipsis' ] ], "nextState" => '1' ] ], |
315 | '. __* ' => [ |
316 | '*' => [ "action_" => [ 'output', |
317 | [ "type_" => 'insert', "option" => 'addition compound' ] ], "nextState" => '1' ] ], |
318 | 'state of aggregation $' => [ |
319 | '*' => [ "action_" => [ 'output', 'state of aggregation' ], "nextState" => '1' ] ], |
320 | '{[(' => [ |
321 | 'a|as|o' => [ "action_" => [ 'o=', 'output', 'parenthesisLevel++' ], "nextState" => '2' ], |
322 | '0|1|2|3' => [ "action_" => [ 'o=', 'output', 'parenthesisLevel++' ], "nextState" => '2' ], |
323 | '*' => [ "action_" => |
324 | [ 'output', 'o=', 'output', 'parenthesisLevel++' ], "nextState" => '2' ] ], |
325 | ')]}' => [ |
326 | '0|1|2|3|b|p|bp|o' => [ "action_" => [ 'o=', 'parenthesisLevel--' ], "nextState" => 'o' ], |
327 | 'a|as|d|D|q|qd|qD|dq' => |
328 | [ "action_" => [ 'output', 'o=', 'parenthesisLevel--' ], "nextState" => 'o' ] ], |
329 | ', ' => [ |
330 | '*' => [ "action_" => [ 'output', 'comma' ], "nextState" => '0' ] ], |
331 | '^_' => [ |
332 | '*' => [ "action_" => [] ] ], |
333 | '^{(...)}|^($...$)' => [ |
334 | '0|1|2|as' => [ "action_" => 'b=', "nextState" => 'b' ], |
335 | 'p' => [ "action_" => 'b=', "nextState" => 'bp' ], |
336 | '3|o' => [ "action_" => 'd= kv', "nextState" => 'D' ], |
337 | 'q' => [ "action_" => 'd=', "nextState" => 'qD' ], |
338 | 'd|D|qd|qD|dq' => [ "action_" => [ 'output', 'd=' ], "nextState" => 'D' ] ], |
339 | '^a|^\\x{}{}|^\\x{}|^\\x|\'' => [ |
340 | '0|1|2|as' => [ "action_" => 'b=', "nextState" => 'b' ], |
341 | 'p' => [ "action_" => 'b=', "nextState" => 'bp' ], |
342 | '3|o' => [ "action_" => 'd= kv', "nextState" => 'd' ], |
343 | 'q' => [ "action_" => 'd=', "nextState" => 'qd' ], |
344 | 'd|qd|D|qD' => [ "action_" => 'd=' ], |
345 | 'dq' => [ "action_" => [ 'output', 'd=' ], "nextState" => 'd' ] ], |
346 | '_{(state of aggregation)}$' => [ |
347 | 'd|D|q|qd|qD|dq' => [ "action_" => [ 'output', 'q=' ], "nextState" => 'q' ] ], |
348 | '_{(...)}|_($...$)|_9|_\\x{}{}|_\\x{}|_\\x' => [ |
349 | '0|1|2|as' => [ "action_" => 'p=', "nextState" => 'p' ], |
350 | 'b' => [ "action_" => 'p=', "nextState" => 'bp' ], |
351 | '3|o' => [ "action_" => 'q=', "nextState" => 'q' ], |
352 | 'd|D' => [ "action_" => 'q=', "nextState" => 'dq' ], |
353 | 'q|qd|qD|dq' => [ "action_" => [ 'output', 'q=' ], "nextState" => 'q' ] ], |
354 | '=<>' => [ |
355 | '0|1|2|3|a|as|o|q|d|D|qd|qD|dq' => [ "action_" => |
356 | [ [ "type_" => 'output', "option" => 2 ], 'bond' ], "nextState" => '3' ] ], |
357 | '#' => [ |
358 | '0|1|2|3|a|as|o' => [ "action_" => [ [ "type_" => 'output', "option" => 2 ], |
359 | [ "type_" => 'bond', "option" => "#" ] ], "nextState" => '3' ] ], |
360 | '{}^' => [ |
361 | '*' => [ "action_" => [ [ "type_" => 'output', "option" => 1 ], |
362 | [ "type_" => 'insert', "option" => 'tinySkip' ] ], "nextState" => '1' ] ], |
363 | '{}' => [ |
364 | '*' => [ "action_" => [ [ "type_" => 'output', "option" => 1 ] ], "nextState" => '1' ] ], |
365 | '{...}' => [ |
366 | '0|1|2|3|a|as|b|p|bp' => [ "action_" => 'o=', "nextState" => 'o' ], |
367 | 'o|d|D|q|qd|qD|dq' => [ "action_" => [ 'output', 'o=' ], "nextState" => 'o' ] ], |
368 | '$...$' => [ |
369 | 'a' => [ "action_" => 'a=' ], |
370 | '0|1|2|3|as|b|p|bp|o' => [ "action_" => 'o=', "nextState" => 'o' ], |
371 | 'as|o' => [ "action_" => 'o=' ], |
372 | 'q|d|D|qd|qD|dq' => [ "action_" => [ 'output', 'o=' ], "nextState" => 'o' ] ], |
373 | '\\bond{(...)}' => [ |
374 | '*' => [ "action_" => [ |
375 | [ "type_" => 'output', "option" => 2 ], 'bond' ], "nextState" => "3" ] ], |
376 | '\\frac{(...)}' => [ |
377 | '*' => [ "action_" => [ |
378 | [ "type_" => 'output', "option" => 1 ], 'frac-output' ], "nextState" => '3' ] ], |
379 | '\\overset{(...)}' => [ |
380 | '*' => [ "action_" => [ |
381 | [ "type_" => 'output', "option" => 2 ], 'overset-output' ], "nextState" => '3' ] ], |
382 | '\\underset{(...)}' => [ |
383 | '*' => [ "action_" => [ |
384 | [ "type_" => 'output', "option" => 2 ], 'underset-output' ], "nextState" => '3' ] ], |
385 | '\\underbrace{(...)}' => [ |
386 | '*' => [ "action_" => [ |
387 | [ "type_" => 'output', "option" => 2 ], 'underbrace-output' ], "nextState" => '3' ] ], |
388 | '\\color{(...)}{(...)}' => [ |
389 | '*' => [ "action_" => [ |
390 | [ "type_" => 'output', "option" => 2 ], 'color-output' ], "nextState" => '3' ] ], |
391 | '\\color{(...)}' => [ |
392 | '*' => [ "action_" => [ [ "type_" => 'output', "option" => 2 ], 'color0-output' ] ] ], |
393 | '\\ce{(...)}' => [ |
394 | '*' => [ "action_" => [ [ "type_" => 'output', "option" => 2 ], 'ce' ], "nextState" => '3' ] ], |
395 | '\\,' => [ |
396 | '*' => [ "action_" => [ |
397 | [ "type_" => 'output', "option" => 1 ], 'copy' ], "nextState" => '1' ] ], |
398 | '\\pu{(...)}' => [ |
399 | '*' => [ "action_" => [ 'output', [ "type_" => 'write', "option" => "{" ], |
400 | 'pu', [ "type_" => 'write', "option" => "}" ] ], "nextState" => '3' ] ], |
401 | '\\x{}{}|\\x{}|\\x' => |
402 | [ '0|1|2|3|a|as|b|p|bp|o|c0' => [ "action_" => [ 'o=', 'output' ], "nextState" => '3' ], |
403 | '*' => [ "action_" => [ 'output', 'o=', 'output' ], "nextState" => '3' ] ], |
404 | 'others' => [ '*' => [ "action_" => |
405 | [ [ "type_" => 'output', "option" => 1 ], 'copy' ], "nextState" => '3' ] ], |
406 | 'else2' => [ 'a' => [ "action_" => 'a to o', "nextState" => 'o', "revisit" => true ], |
407 | 'as' => [ "action_" => [ 'output', 'sb=true' ], "nextState" => '1', "revisit" => true ], |
408 | 'r|rt|rd|rdt|rdq' => [ "action_" => [ 'output' ], "nextState" => '0', "revisit" => true ], |
409 | '*' => [ "action_" => [ 'output', 'copy' ], "nextState" => '3' ] ] |
410 | ] ), |
411 | "actions" => [ |
412 | 'o after d' => function ( &$buffer, $m ) { |
413 | if ( preg_match( "/^[1-9][0-9]*$/", $buffer['d'] ?? "" ) ) { |
414 | $tmp = $buffer["d"]; |
415 | $buffer["d"] = null; |
416 | $ret = $this->stateMachines["ce"]["actions"]["output"]( $buffer, null, null ); |
417 | $ret[] = [ "type_" => "tinySkip" ]; |
418 | $buffer["b"] = $tmp; |
419 | } else { |
420 | $ret = $this->stateMachines["ce"]["actions"]["output"]( $buffer, null, null ); |
421 | } |
422 | |
423 | /** @phan-suppress-next-line PhanParamTooFew */ |
424 | $this->genericActions['o=']( $buffer, $m ); |
425 | return $ret; |
426 | }, |
427 | 'd= kv' => static function ( &$buffer, $m ) { |
428 | $buffer["d"] = $m; |
429 | $buffer["dType"] = "kv"; |
430 | return null; |
431 | }, |
432 | 'charge or bond' => function ( &$buffer, $m ) { |
433 | if ( MhchemUtil::issetJS( $buffer['beginsWithBond'] ?? null ) ) { |
434 | $ret = []; |
435 | $im = $this->stateMachines["ce"]["actions"]["output"]( $buffer, null, null ); |
436 | MhchemUtil::concatArray( $ret, $im ); |
437 | |
438 | /** @phan-suppress-next-line PhanParamTooMany */ |
439 | MhchemUtil::concatArray( $ret, $this->genericActions['bond']( $buffer, $m, "-" ) ); |
440 | return $ret; |
441 | } else { |
442 | $buffer["d"] = $m; |
443 | return null; |
444 | } |
445 | }, |
446 | '- after o/d' => function ( &$buffer, $m, $isAfterD ) { |
447 | $c1 = $this->mhchemParser->getPatterns()->match( 'orbital', $buffer["o"] ?? "" ); |
448 | $c2 = $this->mhchemParser->getPatterns()->match( 'one lowercase greek letter $', |
449 | $buffer["o"] ?? "" ); |
450 | $c3 = $this->mhchemParser->getPatterns()->match( 'one lowercase latin letter $', |
451 | $buffer["o"] ?? "" ); |
452 | $c4 = $this->mhchemParser->getPatterns()->match( '$one lowercase latin letter$ $', |
453 | $buffer["o"] ?? "" ); |
454 | $hyphenFollows = $m === "-" && ( ( isset( $c1["remainder"] ) && $c1["remainder"] === "" ) |
455 | || $c2 !== null || $c3 !== null || $c4 !== null ); |
456 | if ( $hyphenFollows && !isset( $buffer["a"] ) && !isset( $buffer["b"] ) |
457 | && !isset( $buffer["p"] ) && !isset( $buffer["d"] ) |
458 | && !isset( $buffer["q"] ) && !$c1 && $c3 ) { |
459 | $buffer["o"] = '$' . $buffer["o"] . '$'; |
460 | } |
461 | $ret = []; |
462 | if ( $hyphenFollows ) { |
463 | $im = $this->stateMachines["ce"]["actions"]["output"]( $buffer, null, null ); |
464 | MhchemUtil::concatArray( $ret, $im ); |
465 | $ret[] = [ "type_" => 'hyphen' ]; |
466 | } else { |
467 | $c1 = $this->mhchemParser->getPatterns()->match( 'digits', $buffer["d"] ?? "" ); |
468 | if ( $isAfterD && isset( $c1["remainder"] ) && $c1["remainder"] === '' ) { |
469 | /** @phan-suppress-next-line PhanParamTooFew */ |
470 | MhchemUtil::concatArray( $ret, $this->genericActions['d=']( $buffer, $m ) ); |
471 | $im = $this->stateMachines["ce"]["actions"]["output"]( $buffer, null, null ); |
472 | MhchemUtil::concatArray( $ret, $im ); |
473 | } else { |
474 | $im = $this->stateMachines["ce"]["actions"]["output"]( $buffer, null, null ); |
475 | MhchemUtil::concatArray( $ret, $im ); |
476 | /** @phan-suppress-next-line PhanParamTooMany */ |
477 | MhchemUtil::concatArray( $ret, $this->genericActions['bond']( $buffer, $m, "-" ) ); |
478 | } |
479 | } |
480 | return $ret; |
481 | }, |
482 | 'a to o' => static function ( &$buffer ) { |
483 | $buffer["o"] = $buffer["a"]; |
484 | $buffer["a"] = null; |
485 | return null; |
486 | }, |
487 | 'sb=true' => static function ( &$buffer ) { |
488 | $buffer["sb"] = true; |
489 | return null; |
490 | }, |
491 | 'sb=false' => static function ( &$buffer ) { |
492 | $buffer["sb"] = false; |
493 | return null; |
494 | }, |
495 | 'beginsWithBond=true' => static function ( &$buffer ) { |
496 | $buffer['beginsWithBond'] = true; |
497 | return null; |
498 | }, |
499 | 'beginsWithBond=false' => static function ( &$buffer ) { |
500 | $buffer['beginsWithBond'] = false; |
501 | return null; |
502 | }, |
503 | 'parenthesisLevel++' => static function ( &$buffer ) { |
504 | $buffer['parenthesisLevel']++; |
505 | return null; |
506 | }, |
507 | 'parenthesisLevel--' => static function ( &$buffer ) { |
508 | $buffer['parenthesisLevel']--; |
509 | return null; |
510 | }, |
511 | 'state of aggregation' => function ( $_buffer, $m ) { |
512 | return [ "type_" => 'state of aggregation', |
513 | "p1" => $this->mhchemParser->go( $m, 'o' ) ]; |
514 | }, |
515 | 'comma' => static function ( $buffer, $m ) { |
516 | // $a = preg_replace('/\s*$/', '', $m); tbd: final check if using rtrim is ok |
517 | $a = rtrim( $m ); |
518 | $withSpace = ( $a !== $m ); |
519 | if ( $withSpace && $buffer['parenthesisLevel'] === 0 ) { |
520 | return [ "type_" => 'comma enumeration L', "p1" => $a ]; |
521 | } else { |
522 | return [ "type_" => 'comma enumeration M', "p1" => $a ]; |
523 | } |
524 | }, |
525 | 'output' => function ( &$buffer, $_m, $entityFollows ) { |
526 | if ( !isset( $buffer["r"] ) ) { |
527 | $ret = []; |
528 | if ( !isset( $buffer["a"] ) && !isset( $buffer["b"] ) && !isset( $buffer["p"] ) |
529 | && !isset( $buffer["o"] ) && !isset( $buffer["q"] ) |
530 | && !isset( $buffer["d"] ) && !$entityFollows ) { |
531 | // do nothing. |
532 | } else { |
533 | if ( MhchemUtil::issetJS( $buffer["sb"] ?? null ) ) { |
534 | $ret[] = [ "type_" => 'entitySkip' ]; |
535 | } |
536 | if ( !isset( $buffer["o"] ) && !isset( $buffer["q"] ) && !isset( $buffer["d"] ) |
537 | && !isset( $buffer["b"] ) && !isset( $buffer["p"] ) && $entityFollows !== 2 ) { |
538 | $buffer["o"] = $buffer["a"] ?? null; |
539 | $buffer["a"] = null; |
540 | } elseif ( !isset( $buffer["o"] ) && !isset( $buffer["q"] ) && !isset( $buffer["d"] ) |
541 | && ( isset( $buffer["b"] ) || isset( $buffer["p"] ) ) ) { |
542 | $buffer["o"] = $buffer["a"] ?? null; |
543 | $buffer["d"] = $buffer["b"] ?? null; |
544 | $buffer["q"] = $buffer["p"] ?? null; |
545 | $buffer["a"] = $buffer["b"] = $buffer["p"] = null; |
546 | } else { |
547 | if ( isset( $buffer["o"] ) && isset( $buffer["dType"] ) && $buffer["dType"] === 'kv' |
548 | && $this->mhchemParser->getPatterns()->match( 'd-oxidation$', |
549 | $buffer["d"] ?? "" ) ) { |
550 | $buffer["dType"] = 'oxidation'; |
551 | } elseif ( isset( $buffer["o"] ) && isset( $buffer["dType"] ) |
552 | && $buffer["dType"] === 'kv' && !isset( $buffer["q"] ) ) { |
553 | $buffer["dType"] = null; |
554 | } |
555 | } |
556 | |
557 | $retIm = [ |
558 | "type_" => 'chemfive', |
559 | "a" => $this->mhchemParser->go( $buffer["a"] ?? null, 'a' ), |
560 | "b" => $this->mhchemParser->go( $buffer["b"] ?? null, 'bd' ), |
561 | "p" => $this->mhchemParser->go( $buffer["p"] ?? null, 'pq' ), |
562 | "o" => $this->mhchemParser->go( $buffer["o"] ?? null, 'o' ), |
563 | "q" => $this->mhchemParser->go( $buffer["q"] ?? null, 'pq' ), |
564 | "d" => $this->mhchemParser->go( $buffer["d"] ?? null, |
565 | ( isset( $buffer["dType"] ) |
566 | && $buffer["dType"] === 'oxidation' ? 'oxidation' : 'bd' ) ), |
567 | ]; |
568 | |
569 | if ( isset( $buffer["dType"] ) ) { |
570 | $retIm["dType"] = $buffer["dType"]; |
571 | } |
572 | |
573 | $ret[] = $retIm; |
574 | |
575 | } |
576 | } else { |
577 | if ( isset( $buffer["rdt"] ) && $buffer["rdt"] === 'M' ) { |
578 | $rd = $this->mhchemParser->go( $buffer["rd"], 'tex-math' ); |
579 | } elseif ( isset( $buffer["rdt"] ) && $buffer["rdt"] === 'T' ) { |
580 | // tbd double array ok ? |
581 | $rd = [ [ "type_" => 'text', "p1" => $buffer["rd"] ?? "" ] ]; |
582 | } else { |
583 | $rd = $this->mhchemParser->go( $buffer["rd"] ?? null, 'ce' ); |
584 | } |
585 | |
586 | if ( isset( $buffer["rqt"] ) && $buffer["rqt"] === 'M' ) { |
587 | $rq = $this->mhchemParser->go( $buffer["rq"], 'tex-math' ); |
588 | } elseif ( isset( $buffer["rqt"] ) && $buffer["rqt"] === 'T' ) { |
589 | $rq = [ [ "type_" => 'text', "p1" => $buffer["rq"] ?? "" ] ]; |
590 | } else { |
591 | $rq = $this->mhchemParser->go( $buffer["rq"] ?? null, 'ce' ); |
592 | } |
593 | $ret = [ |
594 | "type_" => 'arrow', |
595 | "r" => $buffer["r"], |
596 | "rd" => $rd, |
597 | "rq" => $rq |
598 | ]; |
599 | } |
600 | foreach ( $buffer as $key => $value ) { |
601 | if ( $key !== 'parenthesisLevel' && $key !== 'beginsWithBond' ) { |
602 | unset( $buffer[$key] ); |
603 | } |
604 | } |
605 | return $ret; |
606 | }, |
607 | 'oxidation-output' => function ( $_buffer, $m ) { |
608 | $ret = [ "{" ]; |
609 | MhchemUtil::concatArray( $ret, $this->mhchemParser->go( $m, 'oxidation' ) ); |
610 | $ret[] = "}"; |
611 | return $ret; |
612 | }, |
613 | 'frac-output' => function ( $_buffer, $m ) { |
614 | return [ "type_" => 'frac-ce', |
615 | "p1" => $this->mhchemParser->go( $m[0] ?? null, 'ce' ), |
616 | "p2" => $this->mhchemParser->go( $m[1] ?? null, 'ce' ) ]; |
617 | }, |
618 | 'overset-output' => function ( $_buffer, $m ) { |
619 | return [ "type_" => 'overset', |
620 | "p1" => $this->mhchemParser->go( $m[0] ?? null, 'ce' ), |
621 | "p2" => $this->mhchemParser->go( $m[1] ?? null, 'ce' ) ]; |
622 | }, |
623 | 'underset-output' => function ( $_buffer, $m ) { |
624 | return [ "type_" => 'underset', |
625 | "p1" => $this->mhchemParser->go( $m[0] ?? null, 'ce' ), |
626 | "p2" => $this->mhchemParser->go( $m[1] ?? null, 'ce' ) ]; |
627 | }, |
628 | 'underbrace-output' => function ( $_buffer, $m ) { |
629 | return [ "type_" => 'underbrace', |
630 | "p1" => $this->mhchemParser->go( $m[0] ?? null, 'ce' ), |
631 | "p2" => $this->mhchemParser->go( $m[1] ?? null, 'ce' ) ]; |
632 | }, |
633 | 'color-output' => function ( $_buffer, $m ) { |
634 | return [ "type_" => 'color', "color1" => $m[0] ?? null, |
635 | "color2" => $this->mhchemParser->go( $m[1] ?? null, 'ce' ) ]; |
636 | }, |
637 | 'r=' => static function ( &$buffer, $m ) { |
638 | $buffer["r"] = $m; |
639 | return null; |
640 | }, |
641 | 'rdt=' => static function ( &$buffer, $m ) { |
642 | $buffer["rdt"] = $m; |
643 | return null; |
644 | }, |
645 | 'rd=' => static function ( &$buffer, $m ) { |
646 | $buffer["rd"] = $m; |
647 | return null; |
648 | }, |
649 | 'rqt=' => static function ( &$buffer, $m ) { |
650 | $buffer["rqt"] = $m; |
651 | return null; |
652 | }, |
653 | 'rq=' => static function ( &$buffer, $m ) { |
654 | $buffer["rq"] = $m; |
655 | return null; |
656 | }, |
657 | 'operator' => static function ( &$_buffer, $m, $p1 ) { |
658 | return [ "type_" => 'operator', "kind_" => ( $p1 ?: $m ) ]; |
659 | }, |
660 | ], |
661 | ], |
662 | 'a' => [ |
663 | "transitions" => static::mhchemCreateTransitions( [ |
664 | 'empty' => [ |
665 | '*' => [ "action_" => [] ] |
666 | ], |
667 | '1/2$' => [ |
668 | '0' => [ "action_" => '1/2' ] |
669 | ], |
670 | 'else' => [ |
671 | '0' => [ "action_" => [], "nextState" => '1', "revisit" => true ] |
672 | ], |
673 | '${(...)}$__$(...)$' => [ |
674 | '*' => [ "action_" => 'tex-math tight', "nextState" => '1' ] |
675 | ], |
676 | ',' => [ |
677 | '*' => [ "action_" => [ [ "type_" => 'insert', "option" => 'commaDecimal' ] ] ] |
678 | ], |
679 | 'else2' => [ |
680 | '*' => [ "action_" => 'copy' ] |
681 | ] |
682 | ] ), |
683 | "actions" => [] |
684 | ], |
685 | 'o' => [ |
686 | "transitions" => static::mhchemCreateTransitions( [ |
687 | 'empty' => [ |
688 | '*' => [ "action_" => [] ] |
689 | ], |
690 | '1/2$' => [ |
691 | '0' => [ "action_" => '1/2' ] |
692 | ], |
693 | 'else' => [ |
694 | '0' => [ "action_" => [], "nextState" => '1', "revisit" => true ] |
695 | ], |
696 | 'letters' => [ |
697 | '*' => [ "action_" => 'rm' ] |
698 | ], |
699 | '\\ca' => [ |
700 | '*' => [ "action_" => [ [ "type_" => 'insert', "option" => 'circa' ] ] ] |
701 | ], |
702 | '\\pu{(...)}' => [ |
703 | '*' => [ "action_" => [ [ "type_" => 'write', "option" => "{" ], |
704 | 'pu', [ "type_" => 'write', "option" => "}" ] ] ] |
705 | ], |
706 | '\\x{}{}|\\x{}|\\x' => [ |
707 | '*' => [ "action_" => 'copy' ] |
708 | ], |
709 | '${(...)}$__$(...)$' => [ |
710 | '*' => [ "action_" => 'tex-math' ] |
711 | ], |
712 | '{(...)}' => [ |
713 | '*' => [ "action_" => [ [ "type_" => 'write', "option" => "{" ], |
714 | 'text', [ "type_" => 'write', "option" => "}" ] ] ] |
715 | ], |
716 | 'else2' => [ |
717 | '*' => [ "action_" => 'copy' ] |
718 | ] |
719 | ] ), |
720 | "actions" => [] |
721 | ], |
722 | 'text' => [ |
723 | "transitions" => static::mhchemCreateTransitions( [ |
724 | 'empty' => [ |
725 | '*' => [ "action_" => 'output' ] |
726 | ], |
727 | '{...}' => [ |
728 | '*' => [ "action_" => 'text=' ] |
729 | ], |
730 | '${(...)}$__$(...)$' => [ |
731 | '*' => [ "action_" => 'tex-math' ] |
732 | ], |
733 | '\\greek' => [ |
734 | '*' => [ "action_" => [ 'output', 'rm' ] ] |
735 | ], |
736 | '\\pu{(...)}' => [ |
737 | '*' => [ "action_" => [ 'output', [ "type_" => 'write', "option" => "{" ], |
738 | 'pu', [ "type_" => 'write', "option" => "}" ] ] ] |
739 | ], |
740 | '\\,|\\x{}{}|\\x{}|\\x' => [ |
741 | '*' => [ "action_" => [ 'output', 'copy' ] ] |
742 | ], |
743 | 'else' => [ |
744 | '*' => [ "action_" => 'text=' ] |
745 | ], |
746 | ] ), |
747 | "actions" => [ |
748 | 'output' => static function ( &$buffer ) { |
749 | if ( isset( $buffer["text_"] ) ) { |
750 | $ret = [ "type_" => 'text', "p1" => $buffer["text_"] ]; |
751 | foreach ( $buffer as $key => $value ) { |
752 | unset( $buffer[$key] ); |
753 | } |
754 | return $ret; |
755 | } |
756 | return null; |
757 | } |
758 | ] |
759 | ], |
760 | 'pq' => [ |
761 | "transitions" => static::mhchemCreateTransitions( [ |
762 | 'empty' => [ |
763 | '*' => [ "action_" => [] ] |
764 | ], |
765 | 'state of aggregation $' => [ |
766 | '*' => [ "action_" => 'state of aggregation' ] |
767 | ], |
768 | 'i$' => [ |
769 | '0' => [ "action_" => [], "nextState" => '!f', "revisit" => true ] |
770 | ], |
771 | '(KV letters),' => [ |
772 | '0' => [ "action_" => 'rm', "nextState" => '0' ] |
773 | ], |
774 | 'formula$' => [ |
775 | '0' => [ "action_" => [], "nextState" => 'f', "revisit" => true ] |
776 | ], |
777 | '1/2$' => [ |
778 | '0' => [ "action_" => '1/2' ] |
779 | ], |
780 | 'else' => [ |
781 | '0' => [ "action_" => [], "nextState" => '!f', "revisit" => true ] |
782 | ], |
783 | '${(...)}$__$(...)$' => [ |
784 | '*' => [ "action_" => 'tex-math' ] |
785 | ], |
786 | '{(...)}' => [ |
787 | '*' => [ "action_" => 'text' ] |
788 | ], |
789 | 'a-z' => [ |
790 | 'f' => [ "action_" => 'tex-math' ] |
791 | ], |
792 | 'letters' => [ |
793 | '*' => [ "action_" => 'rm' ] |
794 | ], |
795 | '-9.,9' => [ |
796 | '*' => [ "action_" => '9,9' ] |
797 | ], |
798 | ',' => [ |
799 | '*' => [ "action_" => [ [ "type_" => 'insert+p1', "option" => 'comma enumeration S' ] ] ] |
800 | ], |
801 | '\\color{(...)}{(...)}' => [ |
802 | '*' => [ "action_" => 'color-output' ] |
803 | ], |
804 | '\\color{(...)}' => [ |
805 | '*' => [ "action_" => 'color0-output' ] |
806 | ], |
807 | '\\ce{(...)}' => [ |
808 | '*' => [ "action_" => 'ce' ] |
809 | ], |
810 | '\\pu{(...)}' => [ |
811 | '*' => [ "action_" => [ [ "type_" => 'write', "option" => "{" ], |
812 | 'pu', [ "type_" => 'write', "option" => "}" ] ] ] |
813 | ], |
814 | '\\,|\\x{}{}|\\x{}|\\x' => [ |
815 | '*' => [ "action_" => 'copy' ] |
816 | ], |
817 | 'else2' => [ |
818 | '*' => [ "action_" => 'copy' ] |
819 | ] |
820 | ] ), |
821 | "actions" => [ |
822 | 'state of aggregation' => function ( $_buffer, $m ) { |
823 | return [ "type_" => 'state of aggregation subscript', |
824 | "p1" => $this->mhchemParser->go( $m, 'o' ) ]; |
825 | }, |
826 | 'color-output' => function ( $_buffer, $m ) { |
827 | return [ "type_" => 'color', "color1" => $m[0] ?? null, |
828 | "color2" => $this->mhchemParser->go( $m[1] ?? null, 'pq' ) ]; |
829 | } |
830 | ] |
831 | ], |
832 | 'bd' => [ |
833 | "transitions" => static::mhchemCreateTransitions( [ |
834 | 'empty' => [ |
835 | '*' => [ "action_" => [] ] |
836 | ], |
837 | 'x$' => [ |
838 | '0' => [ "action_" => [], "nextState" => '!f', "revisit" => true ] |
839 | ], |
840 | 'formula$' => [ |
841 | '0' => [ "action_" => [], "nextState" => 'f', "revisit" => true ] |
842 | ], |
843 | 'else' => [ |
844 | '0' => [ "action_" => [], "nextState" => '!f', "revisit" => true ] |
845 | ], |
846 | '-9.,9 no missing 0' => [ |
847 | '*' => [ "action_" => '9,9' ] |
848 | ], |
849 | '.' => [ |
850 | '*' => [ "action_" => [ [ "type_" => 'insert', "option" => 'electron dot' ] ] ] |
851 | ], |
852 | 'a-z' => [ |
853 | 'f' => [ "action_" => 'tex-math' ] |
854 | ], |
855 | 'x' => [ |
856 | '*' => [ "action_" => [ [ "type_" => 'insert', "option" => 'KV x' ] ] ] |
857 | ], |
858 | 'letters' => [ |
859 | '*' => [ "action_" => 'rm' ] |
860 | ], |
861 | '\'' => [ |
862 | '*' => [ "action_" => [ [ "type_" => 'insert', "option" => 'prime' ] ] ] |
863 | ], |
864 | '${(...)}$__$(...)$' => [ |
865 | '*' => [ "action_" => 'tex-math' ] |
866 | ], |
867 | '{(...)}' => [ |
868 | '*' => [ "action_" => 'text' ] |
869 | ], |
870 | '\\color{(...)}{(...)}' => [ |
871 | '*' => [ "action_" => 'color-output' ] |
872 | ], |
873 | '\\color{(...)}' => [ |
874 | '*' => [ "action_" => 'color0-output' ] |
875 | ], |
876 | '\\ce{(...)}' => [ |
877 | '*' => [ "action_" => 'ce' ] |
878 | ], |
879 | '\\pu{(...)}' => [ |
880 | '*' => [ "action_" => [ [ "type_" => 'write', "option" => "{" ], |
881 | 'pu', [ "type_" => 'write', "option" => "}" ] ] ] |
882 | ], |
883 | '\\,|\\x{}{}|\\x{}|\\x' => [ |
884 | '*' => [ "action_" => 'copy' ] |
885 | ], |
886 | 'else2' => [ |
887 | '*' => [ "action_" => 'copy' ] |
888 | ] |
889 | ] ), |
890 | "actions" => [ |
891 | 'color-output' => function ( $_buffer, $m ) { |
892 | return [ "type_" => 'color', "color1" => $m[0] ?? null, |
893 | "color2" => $this->mhchemParser->go( $m[1] ?? null, 'bd' ) ]; |
894 | } |
895 | ] |
896 | ], |
897 | 'oxidation' => [ |
898 | "transitions" => static::mhchemCreateTransitions( [ |
899 | 'empty' => [ |
900 | '*' => [ "action_" => 'roman-numeral' ] |
901 | ], |
902 | 'pm-operator' => [ |
903 | '*' => [ "action_" => [ [ "type_" => 'o=+p1', "option" => '\\pm' ] ] ] |
904 | ], |
905 | 'else' => [ |
906 | '*' => [ "action_" => 'o=' ] |
907 | ] |
908 | ] ), |
909 | "actions" => [ |
910 | 'roman-numeral' => static function ( $buffer ) { |
911 | return [ "type_" => 'roman numeral', "p1" => $buffer["o"] ?? "" ]; |
912 | } |
913 | ] |
914 | ], |
915 | 'tex-math' => [ |
916 | "transitions" => static::mhchemCreateTransitions( [ |
917 | 'empty' => [ |
918 | '*' => [ "action_" => 'output' ] |
919 | ], |
920 | '\\ce{(...)}' => [ |
921 | '*' => [ "action_" => [ 'output', 'ce' ] ] |
922 | ], |
923 | '\\pu{(...)}' => [ |
924 | '*' => [ "action_" => [ 'output', [ "type_" => 'write', "option" => "{" ], |
925 | 'pu', [ "type_" => 'write', "option" => "}" ] ] ] |
926 | ], |
927 | '{...}|\\,|\\x{}{}|\\x{}|\\x' => [ |
928 | '*' => [ "action_" => 'o=' ] |
929 | ], |
930 | 'else' => [ |
931 | '*' => [ "action_" => 'o=' ] |
932 | ] |
933 | ] ), |
934 | "actions" => [ |
935 | 'output' => static function ( &$buffer ) { |
936 | if ( isset( $buffer["o"] ) ) { |
937 | $ret = [ "type_" => 'tex-math', "p1" => $buffer["o"] ]; |
938 | foreach ( $buffer as $key => $value ) { |
939 | unset( $buffer[$key] ); |
940 | } |
941 | return $ret; |
942 | } |
943 | return null; |
944 | } |
945 | ] |
946 | ], |
947 | 'tex-math tight' => [ |
948 | "transitions" => static::mhchemCreateTransitions( [ |
949 | 'empty' => [ |
950 | '*' => [ "action_" => 'output' ] |
951 | ], |
952 | '\\ce{(...)}' => [ |
953 | '*' => [ "action_" => [ 'output', 'ce' ] ] |
954 | ], |
955 | '\\pu{(...)}' => [ |
956 | '*' => [ "action_" => [ 'output', [ "type_" => 'write', "option" => "{" ], |
957 | 'pu', [ "type_" => 'write', "option" => "}" ] ] ] |
958 | ], |
959 | '{...}|\\,|\\x{}{}|\\x{}|\\x' => [ |
960 | '*' => [ "action_" => 'o=' ] |
961 | ], |
962 | '-|+' => [ |
963 | '*' => [ "action_" => 'tight operator' ] |
964 | ], |
965 | 'else' => [ |
966 | '*' => [ "action_" => 'o=' ] |
967 | ] |
968 | ] ), |
969 | "actions" => [ |
970 | 'tight operator' => static function ( &$buffer, $m ) { |
971 | $buffer["o"] = ( $buffer["o"] ?? "" ) . "{" . $m . "}"; |
972 | return null; |
973 | }, |
974 | 'output' => static function ( &$buffer ) { |
975 | if ( $buffer["o"] ) { |
976 | $ret = [ "type_" => 'tex-math', "p1" => $buffer["o"] ]; |
977 | foreach ( $buffer as $key => $value ) { |
978 | unset( $buffer[$key] ); |
979 | } |
980 | return $ret; |
981 | } |
982 | return null; |
983 | } |
984 | ] |
985 | ], |
986 | '9,9' => [ |
987 | "transitions" => static::mhchemCreateTransitions( [ |
988 | 'empty' => [ |
989 | '*' => [ "action_" => [] ] |
990 | ], |
991 | ',' => [ |
992 | '*' => [ "action_" => 'comma' ] |
993 | ], |
994 | 'else' => [ |
995 | '*' => [ "action_" => 'copy' ] |
996 | ] |
997 | ] ), |
998 | "actions" => [ |
999 | 'comma' => static function () { |
1000 | return [ "type_" => 'commaDecimal' ]; |
1001 | } |
1002 | ] |
1003 | ], |
1004 | 'pu' => [ |
1005 | "transitions" => static::mhchemCreateTransitions( [ |
1006 | 'empty' => [ |
1007 | '*' => [ "action_" => 'output' ] |
1008 | ], |
1009 | 'space$' => [ |
1010 | '*' => [ "action_" => [ 'output', 'space' ] ] |
1011 | ], |
1012 | '{[(|)]}' => [ |
1013 | '0|a' => [ "action_" => 'copy' ] |
1014 | ], |
1015 | '(-)(9)^(-9)' => [ |
1016 | '0' => [ "action_" => 'number^', "nextState" => 'a' ] |
1017 | ], |
1018 | '(-)(9.,9)(e)(99)' => [ |
1019 | '0' => [ "action_" => 'enumber', "nextState" => 'a' ] |
1020 | ], |
1021 | 'space' => [ |
1022 | '0|a' => [ "action_" => [] ] |
1023 | ], |
1024 | 'pm-operator' => [ |
1025 | '0|a' => [ "action_" => [ [ "type_" => 'operator', "option" => '\\pm' ] ], "nextState" => '0' ] |
1026 | ], |
1027 | 'operator' => [ |
1028 | '0|a' => [ "action_" => 'copy', "nextState" => '0' ] |
1029 | ], |
1030 | '//' => [ |
1031 | 'd' => [ "action_" => 'o=', "nextState" => '/' ] |
1032 | ], |
1033 | '/' => [ |
1034 | 'd' => [ "action_" => 'o=', "nextState" => '/' ] |
1035 | ], |
1036 | '{...}|else' => [ |
1037 | '0|d' => [ "action_" => 'd=', "nextState" => 'd' ], |
1038 | 'a' => [ "action_" => [ 'space', 'd=' ], "nextState" => 'd' ], |
1039 | '/|q' => [ "action_" => 'q=', "nextState" => 'q' ] |
1040 | ] |
1041 | ] ), |
1042 | "actions" => [ |
1043 | 'enumber' => function ( $_buffer, $m ) { |
1044 | $ret = []; |
1045 | if ( MU::issetJS( $m[0] ?? null ) && ( $m[0] === "+-" || $m[0] === "+/-" ) ) { |
1046 | $ret[] = "\\pm "; |
1047 | } elseif ( MU::issetJS( $m[0] ?? null ) ) { |
1048 | $ret[] = $m[0]; |
1049 | } |
1050 | if ( MU::issetJS( $m[1] ?? null ) ) { |
1051 | MhchemUtil::concatArray( $ret, $this->mhchemParser->go( $m[1], 'pu-9,9' ) ); |
1052 | |
1053 | if ( MU::issetJS( ( $m[2] ?? null ) ) ) { |
1054 | if ( preg_match( "/[,.]/", $m[2] ) ) { |
1055 | MhchemUtil::concatArray( $ret, |
1056 | $this->mhchemParser->go( $m[2], 'pu-9,9' ) ); |
1057 | } else { |
1058 | $ret[] = $m[2]; |
1059 | } |
1060 | } |
1061 | |
1062 | if ( MU::issetJS( $m[3] ?? null ) || MU::issetJS( $m[4] ?? null ) ) { |
1063 | if ( $m[3] === "e" || $m[4] === "*" ) { |
1064 | $ret[] = [ "type_" => 'cdot' ]; |
1065 | } else { |
1066 | $ret[] = [ "type_" => 'times' ]; |
1067 | } |
1068 | } |
1069 | } |
1070 | |
1071 | if ( MU::issetJS( $m[5] ?? null ) ) { |
1072 | $ret[] = "10^{" . $m[5] . "}"; |
1073 | } |
1074 | |
1075 | return $ret; |
1076 | }, |
1077 | 'number^' => function ( $_buffer, $m ) { |
1078 | $ret = []; |
1079 | if ( isset( $m[0] ) && ( $m[0] === "+-" || $m[0] === "+/-" ) ) { |
1080 | $ret[] = "\\pm "; |
1081 | } elseif ( isset( $m[0] ) ) { |
1082 | $ret[] = $m[0]; |
1083 | } |
1084 | MhchemUtil::concatArray( $ret, |
1085 | $this->mhchemParser->go( $m[1] ?? null, 'pu-9,9' ) ); |
1086 | $ret[] = "^{" . ( $m[2] ?? "" ) . "}"; |
1087 | return $ret; |
1088 | }, |
1089 | 'operator' => static function ( $_buffer, $m, $p1 ) { |
1090 | return [ "type_" => 'operator', "kind_" => $p1 ?? $m ]; |
1091 | }, |
1092 | 'space' => static function () { |
1093 | return [ "type_" => 'pu-space-1' ]; |
1094 | }, |
1095 | 'output' => function ( &$buffer ) { |
1096 | $md = $this->mhchemParser->getPatterns()->match( '{(...)}', $buffer["d"] ?? "" ); |
1097 | if ( $md && $md["remainder"] === '' ) { |
1098 | $buffer["d"] = $md["match_"]; |
1099 | } |
1100 | $mq = $this->mhchemParser->getPatterns()->match( '{(...)}', $buffer["q"] ?? "" ); |
1101 | if ( $mq && $mq["remainder"] === '' ) { |
1102 | $buffer["q"] = $mq["match_"]; |
1103 | } |
1104 | if ( isset( $buffer["d"] ) ) { |
1105 | // tbd: g modifiers necessary in regexes ? |
1106 | $buffer["d"] = preg_replace( '/\x{00B0}C|\^oC|\^{o}C/u', |
1107 | "{}^{\\circ}C", $buffer["d"] ); |
1108 | $buffer["d"] = preg_replace( '/\x{00B0}F|\^oF|\^{o}F/u', |
1109 | "{}^{\\circ}C", $buffer["d"] ); |
1110 | } |
1111 | if ( isset( $buffer["q"] ) ) { |
1112 | $buffer["q"] = preg_replace( "/\x{00B0}C|\^oC|\^{o}C/u", |
1113 | "{}^{\\circ}C", $buffer["q"] ); |
1114 | $buffer["q"] = preg_replace( "/\x{00B0}F|\^oF|\^{o}F/u", |
1115 | "{}^{\\circ}F", $buffer["q"] ); |
1116 | $b5 = [ |
1117 | "d" => $this->mhchemParser->go( $buffer["d"] ?? "", 'pu' ), |
1118 | "q" => $this->mhchemParser->go( $buffer["q"], 'pu' ) |
1119 | ]; |
1120 | if ( $buffer["o"] === '//' ) { |
1121 | $ret = [ "type_" => 'pu-frac', "p1" => $b5["d"], "p2" => $b5["q"] ]; |
1122 | } else { |
1123 | $ret = $b5["d"]; |
1124 | if ( count( $b5["d"] ) > 1 || count( $b5["q"] ) > 1 ) { |
1125 | $ret[] = [ "type_" => ' / ' ]; |
1126 | } else { |
1127 | $ret[] = [ "type_" => '/' ]; |
1128 | } |
1129 | MhchemUtil::concatArray( $ret, $b5["q"] ); |
1130 | } |
1131 | } else { |
1132 | $ret = $this->mhchemParser->go( $buffer["d"] ?? null, 'pu-2' ); |
1133 | } |
1134 | |
1135 | foreach ( $buffer as $key => $value ) { |
1136 | unset( $buffer[$key] ); |
1137 | } |
1138 | return $ret; |
1139 | } |
1140 | ], |
1141 | ], |
1142 | 'pu-2' => [ |
1143 | "transitions" => static::mhchemCreateTransitions( [ |
1144 | 'empty' => [ |
1145 | '*' => [ "action_" => 'output' ], |
1146 | ], |
1147 | '*' => [ |
1148 | '*' => [ "action_" => [ 'output', 'cdot' ], "nextState" => '0' ], |
1149 | ], |
1150 | '\\x' => [ '*' => [ "action_" => 'rm=' ] ], |
1151 | 'space' => [ '*' => [ "action_" => [ 'output', 'space' ], "nextState" => '0' ] ], |
1152 | |
1153 | '^{(...)}|^(-1)' => [ '1' => [ "action_" => '^(-1)' ] ], |
1154 | '-9.,9' => [ |
1155 | '0' => [ "action_" => 'rm=', "nextState" => '0' ], |
1156 | '1' => [ "action_" => '^(-1)', "nextState" => '0' ] |
1157 | ], |
1158 | '{...}|else' => [ '*' => [ "action_" => 'rm=', "nextState" => '1' ] ] |
1159 | ] ), |
1160 | "actions" => [ |
1161 | 'cdot' => static function () { |
1162 | return [ "type_" => 'tight cdot' ]; |
1163 | }, |
1164 | '^(-1)' => static function ( &$buffer, $m ) { |
1165 | $buffer["rm"] .= "^{" . $m . "}"; |
1166 | return null; |
1167 | }, |
1168 | 'space' => static function () { |
1169 | return [ "type_" => 'pu-space-2' ]; |
1170 | }, |
1171 | |
1172 | 'output' => function ( &$buffer ){ |
1173 | $ret = []; |
1174 | if ( MU::issetJS( $buffer["rm"] ) ) { |
1175 | $mrm = $this->mhchemParser->getPatterns()->match( '{(...)}', $buffer["rm"] ?? "" ); |
1176 | if ( isset( $mrm["remainder"] ) && $mrm["remainder"] === '' ) { |
1177 | $ret = $this->mhchemParser->go( $mrm["match_"], 'pu' ); |
1178 | } else { |
1179 | $ret = [ "type_" => 'rm', "p1" => $buffer["rm"] ]; |
1180 | } |
1181 | } |
1182 | foreach ( $buffer as $key => $value ) { |
1183 | unset( $buffer[$key] ); |
1184 | } |
1185 | return $ret; |
1186 | } |
1187 | ] |
1188 | ], |
1189 | 'pu-9,9' => [ |
1190 | "transitions" => static::mhchemCreateTransitions( [ |
1191 | 'empty' => [ |
1192 | '0' => [ "action_" => 'output-0' ], |
1193 | 'o' => [ "action_" => 'output-o' ] |
1194 | ], |
1195 | ',' => [ |
1196 | '0' => [ "action_" => [ 'output-0', 'comma' ], "nextState" => 'o' ] |
1197 | ], |
1198 | '.' => [ |
1199 | '0' => [ "action_" => [ 'output-0', 'copy' ], "nextState" => 'o' ] |
1200 | ], |
1201 | 'else' => [ '*' => [ "action_" => 'text=' ] ] |
1202 | ] ), |
1203 | "actions" => [ |
1204 | 'comma' => static function () { |
1205 | return [ "type_" => 'commaDecimal' ]; |
1206 | }, |
1207 | 'output-0' => static function ( &$buffer ) { |
1208 | $ret = []; |
1209 | $buffer["text_"] ??= ""; |
1210 | if ( strlen( $buffer["text_"] ) > 4 ) { |
1211 | $a = strlen( $buffer["text_"] ) % 3; |
1212 | if ( $a === 0 ) { |
1213 | $a = 3; |
1214 | } |
1215 | for ( $i = strlen( $buffer["text_"] ) - 3; $i > 0; $i -= 3 ) { |
1216 | $ret[] = substr( $buffer["text_"], $i, 3 ); |
1217 | $ret[] = [ "type_" => '1000 separator' ]; |
1218 | } |
1219 | $ret[] = substr( $buffer["text_"], 0, $a ); |
1220 | $ret = array_reverse( $ret ); |
1221 | } else { |
1222 | $ret[] = $buffer["text_"]; |
1223 | } |
1224 | foreach ( $buffer as $key => $value ) { |
1225 | unset( $buffer[$key] ); |
1226 | } |
1227 | return $ret; |
1228 | }, |
1229 | 'output-o' => static function ( &$buffer ) { |
1230 | $ret = []; |
1231 | $buffer["text_"] ??= ""; |
1232 | if ( strlen( $buffer["text_"] ) > 4 ) { |
1233 | $a = strlen( $buffer["text_"] ) - 3; |
1234 | for ( $i = 0; $i < $a; $i += 3 ) { |
1235 | $ret[] = substr( $buffer["text_"], $i, 3 ); |
1236 | $ret[] = [ "type_" => '1000 separator' ]; |
1237 | } |
1238 | $ret[] = substr( $buffer["text_"], $i ); |
1239 | } else { |
1240 | $ret[] = $buffer["text_"]; |
1241 | } |
1242 | foreach ( $buffer as $key => $value ) { |
1243 | unset( $buffer[$key] ); |
1244 | } |
1245 | return $ret; |
1246 | } |
1247 | ] |
1248 | ] |
1249 | |
1250 | ]; |
1251 | } |
1252 | |
1253 | } |