Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
88.81% |
119 / 134 |
|
37.50% |
3 / 8 |
CRAP | |
0.00% |
0 / 1 |
TexVC | |
88.81% |
119 / 134 |
|
37.50% |
3 / 8 |
52.37 | |
0.00% |
0 / 1 |
__construct | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
parse | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
check | |
93.85% |
61 / 65 |
|
0.00% |
0 / 1 |
24.13 | |||
checkTreeIntents | |
76.19% |
16 / 21 |
|
0.00% |
0 / 1 |
12.63 | |||
checkIntentArg | |
85.71% |
6 / 7 |
|
0.00% |
0 / 1 |
3.03 | |||
checkIntent | |
100.00% |
5 / 5 |
|
100.00% |
1 / 1 |
2 | |||
handleTexError | |
88.46% |
23 / 26 |
|
0.00% |
0 / 1 |
5.04 | |||
getLocationInfo | |
71.43% |
5 / 7 |
|
0.00% |
0 / 1 |
2.09 |
1 | <?php |
2 | |
3 | declare( strict_types = 1 ); |
4 | |
5 | namespace MediaWiki\Extension\Math\WikiTexVC; |
6 | |
7 | use Exception; |
8 | use MediaWiki\Extension\Math\WikiTexVC\Mhchem\MhchemParser; |
9 | use MediaWiki\Extension\Math\WikiTexVC\MMLmappings\Util\MMLParsingUtil; |
10 | use MediaWiki\Extension\Math\WikiTexVC\MMLmappings\Util\MMLutil; |
11 | use MediaWiki\Extension\Math\WikiTexVC\Nodes\Fun2; |
12 | use MediaWiki\Extension\Math\WikiTexVC\Nodes\TexArray; |
13 | use stdClass; |
14 | |
15 | /** |
16 | * A TeX/LaTeX validator and MathML converter. |
17 | * WikiTexVC takes user input and validates it while replacing |
18 | * MediaWiki-specific functions. The validator component is a PHP port of the JavaScript port of texvc, |
19 | * which was originally written in Ocaml for the Math extension. |
20 | * |
21 | * @author Johannes Stegmüller |
22 | */ |
23 | class TexVC { |
24 | /** @var Parser */ |
25 | private $parser; |
26 | /** @var TexUtil */ |
27 | private $tu; |
28 | |
29 | public function __construct() { |
30 | $this->parser = new Parser(); |
31 | $this->tu = TexUtil::getInstance(); |
32 | } |
33 | |
34 | /** |
35 | * Usually this step is done implicitly within the check-method. |
36 | * @param string $input tex-string as input for the grammar |
37 | * @param null|array $options array options for the grammar. |
38 | * @return mixed output of the grammar. |
39 | * @throws SyntaxError when SyntaxError in the input |
40 | */ |
41 | public function parse( $input, $options = null ) { |
42 | return $this->parser->parse( $input, $options ); |
43 | } |
44 | |
45 | /** status is one character: |
46 | * + : success! result is in 'output' |
47 | * E : Lexer exception raised |
48 | * F : TeX function not recognized |
49 | * S : Parsing error |
50 | * - : Generic/Default failure code. Might be an invalid argument, |
51 | * output file already exist, a problem with an external |
52 | * command ... |
53 | * @param string|TexArray|stdClass $input tex to be checked as string, |
54 | * can also be the output of former parser call |
55 | * @param array $options array options for settings of the check |
56 | * @param array &$warnings reference on warnings occurring during the check |
57 | * @param bool $texifyMhchem create TeX for mhchem in input before checking further |
58 | * @return array|string[] output with information status (see above) |
59 | * @throws Exception in case of a major problem with the check and activated debug option. |
60 | */ |
61 | public function check( $input, $options = [], &$warnings = [], bool $texifyMhchem = false ) { |
62 | try { |
63 | if ( $texifyMhchem && isset( $options["usemhchem"] ) && $options["usemhchem"] ) { |
64 | // Parse the chemical equations to TeX with mhChemParser in PHP as preprocessor |
65 | $mhChemParser = new MHChemParser(); |
66 | $input = $mhChemParser->toTex( $input, "tex", true ); |
67 | } |
68 | |
69 | $options = ParserUtil::createOptions( $options ); |
70 | if ( is_string( $input ) ) { |
71 | $input = $this->parser->parse( $input, $options ); |
72 | } |
73 | $output = $input->render(); |
74 | |
75 | $result = [ |
76 | 'inputN' => $input, |
77 | 'status' => '+', |
78 | 'output' => $output, |
79 | 'warnings' => $warnings, |
80 | 'input' => $input, |
81 | 'success' => true, |
82 | ]; |
83 | |
84 | if ( $options['report_required'] ) { |
85 | $pkgs = [ 'ams', 'cancel', 'color', 'euro', 'teubner', |
86 | 'mhchem', 'mathoid', 'mhchemtexified', "intent" ]; |
87 | |
88 | foreach ( $pkgs as $pkg ) { |
89 | $pkg .= '_required'; |
90 | $tuRef = $this->tu->getBaseElements()[$pkg]; |
91 | $result[$pkg] = $input->containsFunc( $tuRef ); |
92 | } |
93 | } |
94 | |
95 | if ( !$options['usemhchem'] ) { |
96 | if ( $result['mhchem_required'] ?? |
97 | $input->containsFunc( $this->tu->getBaseElements()['mhchem_required'] ) |
98 | ) { |
99 | return [ |
100 | 'status' => 'C', |
101 | 'details' => 'mhchem package required.' |
102 | ]; |
103 | } |
104 | } |
105 | if ( !$options['usemhchemtexified'] ) { |
106 | if ( $result['mhchemtexified_required'] ?? |
107 | $input->containsFunc( $this->tu->getBaseElements()['mhchemtexified_required'] ) |
108 | ) { |
109 | return [ |
110 | 'status' => 'C', |
111 | 'details' => 'virtual mhchemtexified package required.' |
112 | ]; |
113 | } |
114 | } |
115 | |
116 | if ( !$options['useintent'] ) { |
117 | if ( $result['intent_required'] ?? |
118 | $input->containsFunc( $this->tu->getBaseElements()['intent_required'] ) |
119 | ) { |
120 | return [ |
121 | 'status' => 'C', |
122 | 'details' => 'virtual intent package required.' |
123 | ]; |
124 | } |
125 | } else { |
126 | // Preliminary post-checks of correct intent-syntax |
127 | if ( $input->containsFunc( $this->tu->getBaseElements()['intent_required'] ) ) { |
128 | $intentCheck = $this->checkTreeIntents( $input ); |
129 | if ( !$intentCheck || ( isset( $intentCheck["success"] ) && !$intentCheck["success"] ) ) { |
130 | return $intentCheck; |
131 | } |
132 | } |
133 | } |
134 | |
135 | return $result; |
136 | } catch ( Exception $ex ) { |
137 | if ( $ex instanceof SyntaxError && !$options['oldtexvc'] |
138 | && str_starts_with( $ex->getMessage(), 'Deprecation' ) |
139 | ) { |
140 | $warnings[] = [ |
141 | 'type' => 'texvc-deprecation', |
142 | 'details' => $this->handleTexError( $ex, $options ) |
143 | ]; |
144 | $options['oldtexvc'] = true; |
145 | return $this->check( $input, $options, $warnings ); |
146 | } |
147 | |
148 | if ( $ex instanceof SyntaxError && $options['usemhchem'] && !$options['oldmhchem'] ) { |
149 | $warnings[] = [ |
150 | 'type' => 'mhchem-deprecation', |
151 | 'details' => $this->handleTexError( $ex, $options ) |
152 | ]; |
153 | $options['oldmhchem'] = true; |
154 | return $this->check( $input, $options, $warnings ); |
155 | } |
156 | } |
157 | return $this->handleTexError( $ex, $options ); |
158 | } |
159 | |
160 | private function checkTreeIntents( $inputTree ) { |
161 | if ( is_string( $inputTree ) ) { |
162 | return true; |
163 | } |
164 | if ( !$inputTree ) { |
165 | return true; |
166 | } |
167 | foreach ( $inputTree->getArgs() as $value ) { |
168 | if ( $value instanceof Fun2 && $value->getFname() === "\\intent" ) { |
169 | $intentStr = MMLutil::squashLitsToUnitIntent( $value->getArg2() ); |
170 | $intentContent = MMLParsingUtil::getIntentContent( $intentStr ); |
171 | $intentArg = MMLParsingUtil::getIntentArgs( $intentStr ); |
172 | $argch = self::checkIntentArg( $intentArg ); |
173 | if ( !$argch ) { |
174 | $retval = []; |
175 | $retval["success"] = false; |
176 | $retval["info"] = "malformatted intent argument"; |
177 | return $retval; |
178 | } |
179 | // do check on arg1 |
180 | $ret = !$intentContent ? true : self::checkIntent( $intentContent ); |
181 | if ( !$ret || ( isset( $ret["success"] ) && $ret["success"] == false ) ) { |
182 | return $ret; |
183 | } |
184 | return $this->checkTreeIntents( $value->getArg1() ); |
185 | } else { |
186 | return self::checkTreeIntents( $value ); |
187 | } |
188 | } |
189 | return true; |
190 | } |
191 | |
192 | public static function checkIntentArg( $input ) { |
193 | if ( !$input ) { |
194 | return true; |
195 | } |
196 | $matchesArgs = []; |
197 | // arg has roughly the same specs like NCName in parserintent.pegjs |
198 | $matchArg = preg_match( "/[a-zA-Z0-9._-]*/", $input, $matchesArgs ); |
199 | if ( $matchArg ) { |
200 | return true; |
201 | } |
202 | return false; |
203 | } |
204 | |
205 | public function checkIntent( $input ) { |
206 | // Very early intent syntax checker |
207 | try { |
208 | $parserIntent = new ParserIntent(); |
209 | $parserIntent->parse( $input ); |
210 | return true; |
211 | } catch ( Exception $exception ) { |
212 | return $this->handleTexError( $exception, null ); |
213 | } |
214 | } |
215 | |
216 | private function handleTexError( Exception $e, $options = null ) { |
217 | if ( $options && $options['debug'] ) { |
218 | throw $e; |
219 | } |
220 | $report = [ 'success' => false, 'warnings' => [] ]; |
221 | if ( $e instanceof SyntaxError ) { |
222 | if ( $e->getMessage() === 'Illegal TeX function' ) { |
223 | $report['status'] = 'F'; |
224 | $report['details'] = $e->found; |
225 | $report += $this->getLocationInfo( $e ); |
226 | } else { |
227 | $report['status'] = 'S'; |
228 | $report['details'] = $e->getMessage(); |
229 | $report += $this->getLocationInfo( $e ); |
230 | } |
231 | $report['error'] = [ |
232 | 'message' => $e->getMessage(), |
233 | 'expected' => $e->expected, |
234 | 'found' => $e->found, |
235 | 'location' => [ |
236 | /** This currently only has the start location, since end is not noted in SyntaxError in PHP |
237 | * this issue is tracked in: https://phabricator.wikimedia.org/T321060 |
238 | */ |
239 | 'offset' => $e->grammarOffset, |
240 | 'line' => $e->grammarLine, |
241 | 'column' => $e->grammarColumn |
242 | ], |
243 | 'name' => $e->name |
244 | ]; |
245 | |
246 | } else { |
247 | $report['status'] = '-'; |
248 | $report['details'] = $e->getMessage(); |
249 | $report['error'] = $e; |
250 | } |
251 | return $report; |
252 | } |
253 | |
254 | /** |
255 | * Gets the location information of an error object, or returns default error |
256 | * location if no location information was specified. |
257 | * @param SyntaxError $e error object |
258 | * @return array information on the error. |
259 | */ |
260 | private function getLocationInfo( SyntaxError $e ) { |
261 | try { |
262 | return [ |
263 | 'offset' => $e->grammarOffset, |
264 | 'line' => $e->grammarLine, |
265 | 'column' => $e->grammarColumn |
266 | ]; |
267 | } catch ( Exception $err ) { |
268 | return [ 'offset' => 0, 'line' => 0, 'column' => 0 ]; |
269 | } |
270 | } |
271 | |
272 | } |