Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
| Total | |
80.28% |
57 / 71 |
|
50.00% |
2 / 4 |
CRAP | |
0.00% |
0 / 1 |
| MhchemParser | |
80.28% |
57 / 71 |
|
50.00% |
2 / 4 |
28.42 | |
0.00% |
0 / 1 |
| __construct | |
80.00% |
4 / 5 |
|
0.00% |
0 / 1 |
2.03 | |||
| getPatterns | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
| toTex | |
100.00% |
3 / 3 |
|
100.00% |
1 / 1 |
1 | |||
| go | |
79.03% |
49 / 62 |
|
0.00% |
0 / 1 |
23.69 | |||
| 1 | <?php |
| 2 | /** |
| 3 | * Copyright (c) 2023 Johannes Stegmüller |
| 4 | * |
| 5 | * This file is a port of mhchemParser originally authored by Martin Hensel in javascript/typescript. |
| 6 | * The original license for this software can be found in the accompanying LICENSE.mhchemParser-ts.txt file. |
| 7 | */ |
| 8 | |
| 9 | declare( strict_types = 1 ); |
| 10 | |
| 11 | namespace MediaWiki\Extension\Math\WikiTexVC\Mhchem; |
| 12 | |
| 13 | use MediaWiki\Logger\LoggerFactory; |
| 14 | use Psr\Log\LoggerInterface; |
| 15 | use RuntimeException; |
| 16 | |
| 17 | /** |
| 18 | * Port of mhchemParser v4.2.2 by Martin Hensel (https://github.com/mhchem/mhchemParser) |
| 19 | * from typescript/javascript to PHP. |
| 20 | * |
| 21 | * This class contains the go (¸l.89 in mhchemParser.js) |
| 22 | * and the toTex function (l.39 of mhchemParser.js) |
| 23 | * |
| 24 | * For usage of mhchemParser in PHP instantiate this class and call toTex-Function. |
| 25 | * |
| 26 | * @author Johannes Stegmüller |
| 27 | * @license GPL-2.0-or-later |
| 28 | */ |
| 29 | class MhchemParser { |
| 30 | /** @var MhchemPatterns */ |
| 31 | private MhchemPatterns $mhchemPatterns; |
| 32 | |
| 33 | /** @var MhchemStateMachines */ |
| 34 | private MhchemStateMachines $mhchemStateMachines; |
| 35 | |
| 36 | /** @var LoggerInterface */ |
| 37 | private $logger; |
| 38 | /** @var int */ |
| 39 | private int $debugIndex; |
| 40 | |
| 41 | /** |
| 42 | * Instantiate Mhchemparser, required for usage of "toTex" functionality |
| 43 | * @param bool $doLogging debug log internal state changes and input output for each state |
| 44 | */ |
| 45 | public function __construct( bool $doLogging = false ) { |
| 46 | $this->mhchemPatterns = new MhchemPatterns(); |
| 47 | $this->mhchemStateMachines = new MhchemStateMachines( $this ); |
| 48 | $this->debugIndex = 0; |
| 49 | if ( $doLogging ) { |
| 50 | $this->logger = LoggerFactory::getInstance( 'Math' ); |
| 51 | } |
| 52 | } |
| 53 | |
| 54 | public function getPatterns(): MhchemPatterns { |
| 55 | return $this->mhchemPatterns; |
| 56 | } |
| 57 | |
| 58 | /** |
| 59 | * @param string $input input formula in tex eventually containing chemical environments or physical units |
| 60 | * @param string $type currently ce or pu (physical units) |
| 61 | * @param bool $optimizeMhchemForTexVC optimize the output of mhchem for usage in WikiTexVC, usually extra curlies |
| 62 | * surrounding parameters which specify dimensions |
| 63 | * @return string |
| 64 | */ |
| 65 | public function toTex( $input, $type, bool $optimizeMhchemForTexVC = false ): string { |
| 66 | $parsed = $this->go( $input, $type ); |
| 67 | $mhchemTexifiy = new MhchemTexify( $optimizeMhchemForTexVC ); |
| 68 | return $mhchemTexifiy->go( $parsed, $type !== "tex" ); |
| 69 | } |
| 70 | |
| 71 | /** |
| 72 | * @param mixed|null $input |
| 73 | * @param mixed|null $stateMachine |
| 74 | */ |
| 75 | public function go( $input, $stateMachine ): array { |
| 76 | if ( !MhchemUtil::issetJS( $input ) ) { |
| 77 | return []; |
| 78 | } |
| 79 | |
| 80 | if ( !MhchemUtil::issetJS( $stateMachine ) ) { |
| 81 | $stateMachine = 'ce'; |
| 82 | } |
| 83 | |
| 84 | $state = '0'; |
| 85 | $buffer = []; |
| 86 | |
| 87 | $buffer['parenthesisLevel'] = 0; |
| 88 | |
| 89 | if ( $input != null ) { |
| 90 | $input = preg_replace( "/\n/", "", $input ); |
| 91 | $input = preg_replace( "/[\x{2212}\x{2013}\x{2014}\x{2010}]/u", "-", $input ); |
| 92 | $input = preg_replace( "/[\x{2026}]/u", "...", $input ); |
| 93 | |
| 94 | } |
| 95 | |
| 96 | // Looks through _mhchemParser.transitions, to execute a matching action |
| 97 | // (recursive)actions |
| 98 | $lastInput = ""; |
| 99 | $watchdog = 10; |
| 100 | $output = []; |
| 101 | while ( true ) { |
| 102 | if ( $lastInput !== $input ) { |
| 103 | $watchdog = 10; |
| 104 | $lastInput = $input; |
| 105 | } else { |
| 106 | $watchdog--; |
| 107 | } |
| 108 | |
| 109 | // Find actions in transition table |
| 110 | $machine = $this->mhchemStateMachines->stateMachines[$stateMachine]; |
| 111 | $t = $machine["transitions"][$state] ?? $machine["transitions"]['*']; |
| 112 | |
| 113 | for ( $i = 0; $i < count( $t ); $i++ ) { |
| 114 | $matches = $this->mhchemPatterns->match( $t[$i]["pattern"], $input ?? "" ); |
| 115 | |
| 116 | if ( $matches ) { |
| 117 | if ( $this->logger ) { |
| 118 | $this->logger->debug( "\n Match at: " . $i . "\tPattern: " . $t[$i]["pattern"] . |
| 119 | "\t State-machine: " . $stateMachine ); |
| 120 | } |
| 121 | |
| 122 | // Execute actions |
| 123 | $task = $t[$i]["task"]; |
| 124 | for ( $iA = 0; $iA < count( $task["action_"] ); $iA++ ) { |
| 125 | $this->debugIndex++; |
| 126 | |
| 127 | $o = null; |
| 128 | |
| 129 | // Find and execute action |
| 130 | if ( array_key_exists( $task["action_"][$iA]["type_"], $machine["actions"] ) ) { |
| 131 | $option = $task["action_"][$iA]["option"] ?? null; // tbd, setting null ok ? |
| 132 | if ( $this->logger ) { |
| 133 | $this->logger->debug( "\n action: \t" . $task["action_"][$iA]["type_"] ); |
| 134 | } |
| 135 | $o = $machine["actions"][$task["action_"][$iA]["type_"]] |
| 136 | ( $buffer, $matches["match_"], $option ); |
| 137 | } elseif ( array_key_exists( $task["action_"][$iA]["type_"], |
| 138 | $this->mhchemStateMachines->getGenericActions() ) ) { |
| 139 | $option = $task["action_"][$iA]["option"] ?? null; |
| 140 | if ( $this->logger ) { |
| 141 | $this->logger->debug( "\n action: \t" . $task["action_"][$iA]["type_"] ); |
| 142 | } |
| 143 | $o = $this->mhchemStateMachines->getGenericActions() |
| 144 | [$task["action_"][$iA]["type_"]]( $buffer, $matches["match_"], $option ); |
| 145 | } else { |
| 146 | // Unexpected character |
| 147 | throw new RuntimeException( "MhchemBugA: mhchem bug A. Please report. (" |
| 148 | . $task->action_[$iA]->type_ . ")" ); |
| 149 | } |
| 150 | |
| 151 | // Add output |
| 152 | MhchemUtil::concatArray( $output, $o ); |
| 153 | |
| 154 | if ( $this->logger ) { |
| 155 | $this->logger->debug( "\n State: " . $state ); |
| 156 | $this->logger->debug( "\n Buffer: " . json_encode( $buffer ) ); |
| 157 | $this->logger->debug( "\n Input: " . $input ); |
| 158 | $this->logger->debug( "\n Output: " . json_encode( $output ) ); |
| 159 | $this->logger->debug( "\n" ); |
| 160 | } |
| 161 | |
| 162 | } |
| 163 | |
| 164 | // Set next state, |
| 165 | // Shorten input, |
| 166 | // Continue with next character concatArray |
| 167 | // (= apply only one transition per position) |
| 168 | $state = $task["nextState"] ?? $state; |
| 169 | |
| 170 | if ( $input != null && strlen( $input ) > 0 ) { |
| 171 | if ( !array_key_exists( "revisit", $task ) ) { |
| 172 | $input = $matches["remainder"]; |
| 173 | } |
| 174 | if ( !array_key_exists( "toContinue", $task ) ) { |
| 175 | // this breaks the two for loops |
| 176 | break 1; |
| 177 | } |
| 178 | } else { |
| 179 | return $output; |
| 180 | } |
| 181 | } |
| 182 | } |
| 183 | |
| 184 | // Prevent infinite loop |
| 185 | if ( $watchdog <= 0 ) { |
| 186 | // Unexpected character |
| 187 | throw new RunTimeException( "MhchemBugU: mhchem-PHP bug U. Please report." ); |
| 188 | } |
| 189 | } |
| 190 | } |
| 191 | } |