Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
88.89% |
120 / 135 |
|
44.44% |
4 / 9 |
CRAP | |
0.00% |
0 / 1 |
TexVC | |
88.89% |
120 / 135 |
|
44.44% |
4 / 9 |
53.43 | |
0.00% |
0 / 1 |
__construct | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
strStartsWith | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
parse | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
check | |
93.85% |
61 / 65 |
|
0.00% |
0 / 1 |
24.13 | |||
checkTreeIntents | |
76.19% |
16 / 21 |
|
0.00% |
0 / 1 |
12.63 | |||
checkIntentArg | |
85.71% |
6 / 7 |
|
0.00% |
0 / 1 |
3.03 | |||
checkIntent | |
100.00% |
5 / 5 |
|
100.00% |
1 / 1 |
2 | |||
handleTexError | |
88.46% |
23 / 26 |
|
0.00% |
0 / 1 |
5.04 | |||
getLocationInfo | |
71.43% |
5 / 7 |
|
0.00% |
0 / 1 |
2.09 |
1 | <?php |
2 | |
3 | declare( strict_types = 1 ); |
4 | |
5 | namespace MediaWiki\Extension\Math\WikiTexVC; |
6 | |
7 | use Exception; |
8 | use MediaWiki\Extension\Math\WikiTexVC\Mhchem\MhchemParser; |
9 | use MediaWiki\Extension\Math\WikiTexVC\MMLmappings\Util\MMLParsingUtil; |
10 | use MediaWiki\Extension\Math\WikiTexVC\MMLmappings\Util\MMLutil; |
11 | use MediaWiki\Extension\Math\WikiTexVC\Nodes\Fun2; |
12 | use MediaWiki\Extension\Math\WikiTexVC\Nodes\TexArray; |
13 | use stdClass; |
14 | |
15 | /** |
16 | * A TeX/LaTeX validator and MathML converter. |
17 | * WikiTexVC takes user input and validates it while replacing |
18 | * MediaWiki-specific functions. The validator component is a PHP port of the JavaScript port of texvc, |
19 | * which was originally written in Ocaml for the Math extension. |
20 | * |
21 | * @author Johannes Stegmüller |
22 | */ |
23 | class TexVC { |
24 | /** @var Parser */ |
25 | private $parser; |
26 | /** @var TexUtil */ |
27 | private $tu; |
28 | |
29 | public function __construct() { |
30 | $this->parser = new Parser(); |
31 | $this->tu = TexUtil::getInstance(); |
32 | } |
33 | |
34 | private function strStartsWith( $haystack, $needle ): bool { |
35 | return strpos( $haystack, $needle ) === 0; |
36 | } |
37 | |
38 | /** |
39 | * Usually this step is done implicitly within the check-method. |
40 | * @param string $input tex-string as input for the grammar |
41 | * @param null|array $options array options for the grammar. |
42 | * @return mixed output of the grammar. |
43 | * @throws SyntaxError when SyntaxError in the input |
44 | */ |
45 | public function parse( $input, $options = null ) { |
46 | return $this->parser->parse( $input, $options ); |
47 | } |
48 | |
49 | /** status is one character: |
50 | * + : success! result is in 'output' |
51 | * E : Lexer exception raised |
52 | * F : TeX function not recognized |
53 | * S : Parsing error |
54 | * - : Generic/Default failure code. Might be an invalid argument, |
55 | * output file already exist, a problem with an external |
56 | * command ... |
57 | * @param string|TexArray|stdClass $input tex to be checked as string, |
58 | * can also be the output of former parser call |
59 | * @param array $options array options for settings of the check |
60 | * @param array &$warnings reference on warnings occurring during the check |
61 | * @param bool $texifyMhchem create TeX for mhchem in input before checking further |
62 | * @return array|string[] output with information status (see above) |
63 | * @throws Exception in case of a major problem with the check and activated debug option. |
64 | */ |
65 | public function check( $input, $options = [], &$warnings = [], bool $texifyMhchem = false ) { |
66 | try { |
67 | if ( $texifyMhchem && isset( $options["usemhchem"] ) && $options["usemhchem"] ) { |
68 | // Parse the chemical equations to TeX with mhChemParser in PHP as preprocessor |
69 | $mhChemParser = new MHChemParser(); |
70 | $input = $mhChemParser->toTex( $input, "tex", true ); |
71 | } |
72 | |
73 | $options = ParserUtil::createOptions( $options ); |
74 | if ( is_string( $input ) ) { |
75 | $input = $this->parser->parse( $input, $options ); |
76 | } |
77 | $output = $input->render(); |
78 | |
79 | $result = [ |
80 | 'inputN' => $input, |
81 | 'status' => '+', |
82 | 'output' => $output, |
83 | 'warnings' => $warnings, |
84 | 'input' => $input, |
85 | 'success' => true, |
86 | ]; |
87 | |
88 | if ( $options['report_required'] ) { |
89 | $pkgs = [ 'ams', 'cancel', 'color', 'euro', 'teubner', |
90 | 'mhchem', 'mathoid', 'mhchemtexified', "intent" ]; |
91 | |
92 | foreach ( $pkgs as $pkg ) { |
93 | $pkg .= '_required'; |
94 | $tuRef = $this->tu->getBaseElements()[$pkg]; |
95 | $result[$pkg] = $input->containsFunc( $tuRef ); |
96 | } |
97 | } |
98 | |
99 | if ( !$options['usemhchem'] ) { |
100 | if ( $result['mhchem_required'] ?? |
101 | $input->containsFunc( $this->tu->getBaseElements()['mhchem_required'] ) |
102 | ) { |
103 | return [ |
104 | 'status' => 'C', |
105 | 'details' => 'mhchem package required.' |
106 | ]; |
107 | } |
108 | } |
109 | if ( !$options['usemhchemtexified'] ) { |
110 | if ( $result['mhchemtexified_required'] ?? |
111 | $input->containsFunc( $this->tu->getBaseElements()['mhchemtexified_required'] ) |
112 | ) { |
113 | return [ |
114 | 'status' => 'C', |
115 | 'details' => 'virtual mhchemtexified package required.' |
116 | ]; |
117 | } |
118 | } |
119 | |
120 | if ( !$options['useintent'] ) { |
121 | if ( $result['intent_required'] ?? |
122 | $input->containsFunc( $this->tu->getBaseElements()['intent_required'] ) |
123 | ) { |
124 | return [ |
125 | 'status' => 'C', |
126 | 'details' => 'virtual intent package required.' |
127 | ]; |
128 | } |
129 | } else { |
130 | // Preliminary post-checks of correct intent-syntax |
131 | if ( $input->containsFunc( $this->tu->getBaseElements()['intent_required'] ) ) { |
132 | $intentCheck = $this->checkTreeIntents( $input ); |
133 | if ( !$intentCheck || ( isset( $intentCheck["success"] ) && !$intentCheck["success"] ) ) { |
134 | return $intentCheck; |
135 | } |
136 | } |
137 | } |
138 | |
139 | return $result; |
140 | } catch ( Exception $ex ) { |
141 | if ( $ex instanceof SyntaxError && !$options['oldtexvc'] |
142 | && $this->strStartsWith( $ex->getMessage(), 'Deprecation' ) ) { |
143 | |
144 | $warnings[] = [ |
145 | 'type' => 'texvc-deprecation', |
146 | 'details' => $this->handleTexError( $ex, $options ) |
147 | ]; |
148 | $options['oldtexvc'] = true; |
149 | return $this->check( $input, $options, $warnings ); |
150 | } |
151 | |
152 | if ( $ex instanceof SyntaxError && $options['usemhchem'] && !$options['oldmhchem'] ) { |
153 | $warnings[] = [ |
154 | 'type' => 'mhchem-deprecation', |
155 | 'details' => $this->handleTexError( $ex, $options ) |
156 | ]; |
157 | $options['oldmhchem'] = true; |
158 | return $this->check( $input, $options, $warnings ); |
159 | } |
160 | } |
161 | return $this->handleTexError( $ex, $options ); |
162 | } |
163 | |
164 | private function checkTreeIntents( $inputTree ) { |
165 | if ( is_string( $inputTree ) ) { |
166 | return true; |
167 | } |
168 | if ( !$inputTree ) { |
169 | return true; |
170 | } |
171 | foreach ( $inputTree->getArgs() as $value ) { |
172 | if ( $value instanceof Fun2 && $value->getFname() === "\\intent" ) { |
173 | $intentStr = MMLutil::squashLitsToUnitIntent( $value->getArg2() ); |
174 | $intentContent = MMLParsingUtil::getIntentContent( $intentStr ); |
175 | $intentArg = MMLParsingUtil::getIntentArgs( $intentStr ); |
176 | $argch = self::checkIntentArg( $intentArg ); |
177 | if ( !$argch ) { |
178 | $retval = []; |
179 | $retval["success"] = false; |
180 | $retval["info"] = "malformatted intent argument"; |
181 | return $retval; |
182 | } |
183 | // do check on arg1 |
184 | $ret = !$intentContent ? true : self::checkIntent( $intentContent ); |
185 | if ( !$ret || ( isset( $ret["success"] ) && $ret["success"] == false ) ) { |
186 | return $ret; |
187 | } |
188 | return $this->checkTreeIntents( $value->getArg1() ); |
189 | } else { |
190 | return self::checkTreeIntents( $value ); |
191 | } |
192 | } |
193 | return true; |
194 | } |
195 | |
196 | public static function checkIntentArg( $input ) { |
197 | if ( !$input ) { |
198 | return true; |
199 | } |
200 | $matchesArgs = []; |
201 | // arg has roughly the same specs like NCName in parserintent.pegjs |
202 | $matchArg = preg_match( "/[a-zA-Z0-9._-]*/", $input, $matchesArgs ); |
203 | if ( $matchArg ) { |
204 | return true; |
205 | } |
206 | return false; |
207 | } |
208 | |
209 | public function checkIntent( $input ) { |
210 | // Very early intent syntax checker |
211 | try { |
212 | $parserIntent = new ParserIntent(); |
213 | $parserIntent->parse( $input ); |
214 | return true; |
215 | } catch ( Exception $exception ) { |
216 | return $this->handleTexError( $exception, null ); |
217 | } |
218 | } |
219 | |
220 | private function handleTexError( Exception $e, $options = null ) { |
221 | if ( $options && $options['debug'] ) { |
222 | throw $e; |
223 | } |
224 | $report = [ 'success' => false, 'warnings' => [] ]; |
225 | if ( $e instanceof SyntaxError ) { |
226 | if ( $e->getMessage() === 'Illegal TeX function' ) { |
227 | $report['status'] = 'F'; |
228 | $report['details'] = $e->found; |
229 | $report += $this->getLocationInfo( $e ); |
230 | } else { |
231 | $report['status'] = 'S'; |
232 | $report['details'] = $e->getMessage(); |
233 | $report += $this->getLocationInfo( $e ); |
234 | } |
235 | $report['error'] = [ |
236 | 'message' => $e->getMessage(), |
237 | 'expected' => $e->expected, |
238 | 'found' => $e->found, |
239 | 'location' => [ |
240 | /** This currently only has the start location, since end is not noted in SyntaxError in PHP |
241 | * this issue is tracked in: https://phabricator.wikimedia.org/T321060 |
242 | */ |
243 | 'offset' => $e->grammarOffset, |
244 | 'line' => $e->grammarLine, |
245 | 'column' => $e->grammarColumn |
246 | ], |
247 | 'name' => $e->name |
248 | ]; |
249 | |
250 | } else { |
251 | $report['status'] = '-'; |
252 | $report['details'] = $e->getMessage(); |
253 | $report['error'] = $e; |
254 | } |
255 | return $report; |
256 | } |
257 | |
258 | /** |
259 | * Gets the location information of an error object, or returns default error |
260 | * location if no location information was specified. |
261 | * @param SyntaxError $e error object |
262 | * @return array information on the error. |
263 | */ |
264 | private function getLocationInfo( SyntaxError $e ) { |
265 | try { |
266 | return [ |
267 | 'offset' => $e->grammarOffset, |
268 | 'line' => $e->grammarLine, |
269 | 'column' => $e->grammarColumn |
270 | ]; |
271 | } catch ( Exception $err ) { |
272 | return [ 'offset' => 0, 'line' => 0, 'column' => 0 ]; |
273 | } |
274 | } |
275 | |
276 | } |