Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
0.00% |
0 / 71 |
|
0.00% |
0 / 4 |
CRAP | |
0.00% |
0 / 1 |
MhchemParser | |
0.00% |
0 / 71 |
|
0.00% |
0 / 4 |
600 | |
0.00% |
0 / 1 |
__construct | |
0.00% |
0 / 5 |
|
0.00% |
0 / 1 |
6 | |||
getPatterns | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
toTex | |
0.00% |
0 / 3 |
|
0.00% |
0 / 1 |
2 | |||
go | |
0.00% |
0 / 62 |
|
0.00% |
0 / 1 |
420 |
1 | <?php |
2 | /** |
3 | * Copyright (c) 2023 Johannes Stegmüller |
4 | * |
5 | * This file is a port of mhchemParser originally authored by Martin Hensel in javascript/typescript. |
6 | * The original license for this software can be found in the accompanying LICENSE.mhchemParser-ts.txt file. |
7 | */ |
8 | |
9 | declare( strict_types = 1 ); |
10 | |
11 | namespace MediaWiki\Extension\Math\WikiTexVC\Mhchem; |
12 | |
13 | use MediaWiki\Logger\LoggerFactory; |
14 | use Psr\Log\LoggerInterface; |
15 | use RuntimeException; |
16 | |
17 | /** |
18 | * Port of mhchemParser v4.2.2 by Martin Hensel (https://github.com/mhchem/mhchemParser) |
19 | * from typescript/javascript to PHP. |
20 | * |
21 | * This class contains the go (¸l.89 in mhchemParser.js) |
22 | * and the toTex function (l.39 of mhchemParser.js) |
23 | * |
24 | * For usage of mhchemParser in PHP instantiate this class and call toTex-Function. |
25 | * |
26 | * @author Johannes Stegmüller |
27 | * @license GPL-2.0-or-later |
28 | */ |
29 | class MhchemParser { |
30 | /** @var MhchemPatterns */ |
31 | private MhchemPatterns $mhchemPatterns; |
32 | |
33 | /** @var MhchemStateMachines */ |
34 | private MhchemStateMachines $mhchemStateMachines; |
35 | |
36 | /** @var LoggerInterface */ |
37 | private $logger; |
38 | /** @var int */ |
39 | private int $debugIndex; |
40 | |
41 | /** |
42 | * Instantiate Mhchemparser, required for usage of "toTex" functionality |
43 | * @param bool $doLogging debug log internal state changes and input output for each state |
44 | */ |
45 | public function __construct( bool $doLogging = false ) { |
46 | $this->mhchemPatterns = new MhchemPatterns(); |
47 | $this->mhchemStateMachines = new MhchemStateMachines( $this ); |
48 | $this->debugIndex = 0; |
49 | if ( $doLogging ) { |
50 | $this->logger = LoggerFactory::getInstance( 'Math' ); |
51 | } |
52 | } |
53 | |
54 | public function getPatterns(): MhchemPatterns { |
55 | return $this->mhchemPatterns; |
56 | } |
57 | |
58 | /** |
59 | * @param string $input input formula in tex eventually containing chemical environments or physical units |
60 | * @param string $type currently ce or pu (physical units) |
61 | * @param bool $optimizeMhchemForTexVC optimize the output of mhchem for usage in WikiTexVC, usually extra curlies |
62 | * surrounding parameters which specify dimensions |
63 | * @return string |
64 | */ |
65 | public function toTex( $input, $type, bool $optimizeMhchemForTexVC = false ): string { |
66 | $parsed = $this->go( $input, $type ); |
67 | $mhchemTexifiy = new MhchemTexify( $optimizeMhchemForTexVC ); |
68 | return $mhchemTexifiy->go( $parsed, $type !== "tex" ); |
69 | } |
70 | |
71 | public function go( $input, $stateMachine ): array { |
72 | if ( !MhchemUtil::issetJS( $input ) ) { |
73 | return []; |
74 | } |
75 | |
76 | if ( !MhchemUtil::issetJS( $stateMachine ) ) { |
77 | $stateMachine = 'ce'; |
78 | } |
79 | |
80 | $state = '0'; |
81 | $buffer = []; |
82 | |
83 | $buffer['parenthesisLevel'] = 0; |
84 | |
85 | if ( $input != null ) { |
86 | $input = preg_replace( "/\n/", "", $input ); |
87 | $input = preg_replace( "/[\x{2212}\x{2013}\x{2014}\x{2010}]/u", "-", $input ); |
88 | $input = preg_replace( "/[\x{2026}]/u", "...", $input ); |
89 | |
90 | } |
91 | |
92 | // Looks through _mhchemParser.transitions, to execute a matching action |
93 | // (recursive)actions |
94 | $lastInput = ""; |
95 | $watchdog = 10; |
96 | $output = []; |
97 | while ( true ) { |
98 | if ( $lastInput !== $input ) { |
99 | $watchdog = 10; |
100 | $lastInput = $input; |
101 | } else { |
102 | $watchdog--; |
103 | } |
104 | |
105 | // Find actions in transition table |
106 | $machine = $this->mhchemStateMachines->stateMachines[$stateMachine]; |
107 | $t = $machine["transitions"][$state] ?? $machine["transitions"]['*']; |
108 | |
109 | for ( $i = 0; $i < count( $t ); $i++ ) { |
110 | $matches = $this->mhchemPatterns->match( $t[$i]["pattern"], $input ?? "" ); |
111 | |
112 | if ( $matches ) { |
113 | if ( $this->logger ) { |
114 | $this->logger->debug( "\n Match at: " . $i . "\tPattern: " . $t[$i]["pattern"] . |
115 | "\t State-machine: " . $stateMachine ); |
116 | } |
117 | |
118 | // Execute actions |
119 | $task = $t[$i]["task"]; |
120 | for ( $iA = 0; $iA < count( $task["action_"] ); $iA++ ) { |
121 | $this->debugIndex++; |
122 | |
123 | $o = null; |
124 | |
125 | // Find and execute action |
126 | if ( array_key_exists( $task["action_"][$iA]["type_"], $machine["actions"] ) ) { |
127 | $option = $task["action_"][$iA]["option"] ?? null; // tbd, setting null ok ? |
128 | if ( $this->logger ) { |
129 | $this->logger->debug( "\n action: \t" . $task["action_"][$iA]["type_"] ); |
130 | } |
131 | $o = $machine["actions"][$task["action_"][$iA]["type_"]] |
132 | ( $buffer, $matches["match_"], $option ); |
133 | } elseif ( array_key_exists( $task["action_"][$iA]["type_"], |
134 | $this->mhchemStateMachines->getGenericActions() ) ) { |
135 | $option = $task["action_"][$iA]["option"] ?? null; |
136 | if ( $this->logger ) { |
137 | $this->logger->debug( "\n action: \t" . $task["action_"][$iA]["type_"] ); |
138 | } |
139 | $o = $this->mhchemStateMachines->getGenericActions() |
140 | [$task["action_"][$iA]["type_"]]( $buffer, $matches["match_"], $option ); |
141 | } else { |
142 | // Unexpected character |
143 | throw new RuntimeException( "MhchemBugA: mhchem bug A. Please report. (" |
144 | . $task->action_[$iA]->type_ . ")" ); |
145 | } |
146 | |
147 | // Add output |
148 | MhchemUtil::concatArray( $output, $o ); |
149 | |
150 | if ( $this->logger ) { |
151 | $this->logger->debug( "\n State: " . $state ); |
152 | $this->logger->debug( "\n Buffer: " . json_encode( $buffer ) ); |
153 | $this->logger->debug( "\n Input: " . $input ); |
154 | $this->logger->debug( "\n Output: " . json_encode( $output ) ); |
155 | $this->logger->debug( "\n" ); |
156 | } |
157 | |
158 | } |
159 | |
160 | // Set next state, |
161 | // Shorten input, |
162 | // Continue with next character concatArray |
163 | // (= apply only one transition per position) |
164 | $state = $task["nextState"] ?? $state; |
165 | |
166 | if ( $input != null && strlen( $input ) > 0 ) { |
167 | if ( !array_key_exists( "revisit", $task ) ) { |
168 | $input = $matches["remainder"]; |
169 | } |
170 | if ( !array_key_exists( "toContinue", $task ) ) { |
171 | // this breaks the two for loops |
172 | break 1; |
173 | } |
174 | } else { |
175 | return $output; |
176 | } |
177 | } |
178 | } |
179 | |
180 | // Prevent infinite loop |
181 | if ( $watchdog <= 0 ) { |
182 | // Unexpected character |
183 | throw new RunTimeException( "MhchemBugU: mhchem-PHP bug U. Please report." ); |
184 | } |
185 | } |
186 | } |
187 | } |