Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
0.00% |
0 / 184 |
|
0.00% |
0 / 9 |
CRAP | |
0.00% |
0 / 1 |
UpdateMath | |
0.00% |
0 / 179 |
|
0.00% |
0 / 9 |
1260 | |
0.00% |
0 / 1 |
__construct | |
0.00% |
0 / 22 |
|
0.00% |
0 / 1 |
2 | |||
time | |
0.00% |
0 / 14 |
|
0.00% |
0 / 1 |
12 | |||
populateSearchIndex | |
0.00% |
0 / 36 |
|
0.00% |
0 / 1 |
42 | |||
doUpdate | |
0.00% |
0 / 63 |
|
0.00% |
0 / 1 |
240 | |||
getParserOptions | |
0.00% |
0 / 3 |
|
0.00% |
0 / 1 |
6 | |||
getParser | |
0.00% |
0 / 8 |
|
0.00% |
0 / 1 |
6 | |||
execute | |
0.00% |
0 / 15 |
|
0.00% |
0 / 1 |
2 | |||
getMathMLForExport | |
0.00% |
0 / 12 |
|
0.00% |
0 / 1 |
12 | |||
exportMMLtoFile | |
0.00% |
0 / 6 |
|
0.00% |
0 / 1 |
6 |
1 | #!/usr/bin/env php |
2 | <?php |
3 | /** |
4 | * This program is free software; you can redistribute it and/or modify |
5 | * it under the terms of the GNU General Public License as published by |
6 | * the Free Software Foundation; either version 2 of the License, or |
7 | * (at your option) any later version. |
8 | * |
9 | * This program is distributed in the hope that it will be useful, |
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
12 | * GNU General Public License for more details. |
13 | * |
14 | * You should have received a copy of the GNU General Public License along |
15 | * with this program; if not, write to the Free Software Foundation, Inc., |
16 | * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. |
17 | * http://www.gnu.org/copyleft/gpl.html |
18 | * |
19 | * @ingroup Maintenance |
20 | */ |
21 | |
22 | use MediaWiki\Extension\Math\MathRenderer; |
23 | use MediaWiki\MediaWikiServices; |
24 | |
25 | require_once __DIR__ . '/../../../maintenance/Maintenance.php'; |
26 | |
27 | class UpdateMath extends Maintenance { |
28 | |
29 | /** @var bool */ |
30 | private $purge = false; |
31 | /** @var bool */ |
32 | private $verbose; |
33 | /** @var \Wikimedia\Rdbms\IDatabase */ |
34 | private $dbw; |
35 | /** @var \Wikimedia\Rdbms\IDatabase */ |
36 | private $db; |
37 | /** @var MathRenderer */ |
38 | private $current; |
39 | /** @var float */ |
40 | private $time = 0.0; // microtime( true ); |
41 | /** @var float[] */ |
42 | private $performance = []; |
43 | /** @var string */ |
44 | private $renderingMode = 'latexml'; |
45 | /** @var int */ |
46 | private $chunkSize = 1000; |
47 | private $parser; |
48 | private $parserOptions; |
49 | |
50 | public function __construct() { |
51 | parent::__construct(); |
52 | $this->addDescription( 'Updates the index of Mathematical formulae.' ); |
53 | $this->addOption( 'purge', |
54 | "If set all formulae are rendered again without using caches. (Very time consuming!)", |
55 | false, false, "f" ); |
56 | $this->addArg( 'min', "If set processing is started at the page with rank(pageID)>min", |
57 | false ); |
58 | $this->addArg( 'max', "If set processing is stopped at the page with rank(pageID)<=max", |
59 | false ); |
60 | $this->addOption( 'verbose', "If set output for successful rendering will produced", false, |
61 | false, 'v' ); |
62 | $this->addOption( 'SVG', "If set SVG images will be produced", false, false ); |
63 | $this->addOption( 'hooks', "If set hooks will be skipped, but index will be updated.", |
64 | false, false ); |
65 | $this->addOption( 'texvccheck', "If set texvccheck will be skipped", false, false ); |
66 | $this->addOption( 'mode', 'Rendering mode to be used (mathml, latexml)', false, true, |
67 | 'm' ); |
68 | $this->addOption( 'exportmml', 'export LaTeX and generated MathML to the specified file', false, true, |
69 | 'e' ); |
70 | $this->addOption( 'chunk-size', |
71 | 'Determines how many pages are updated in one database transaction.', false, true ); |
72 | $this->requireExtension( 'MathSearch' ); |
73 | } |
74 | |
75 | /** |
76 | * Measures time in ms. |
77 | * In order to have a formula centric evaluation, we can not just the build in profiler |
78 | * @param string $category |
79 | * |
80 | * @return int |
81 | */ |
82 | private function time( $category = 'default' ) { |
83 | global $wgMathDebug; |
84 | $delta = ( microtime( true ) - $this->time ) * 1000; |
85 | if ( isset( $this->performance[$category] ) ) { |
86 | $this->performance[$category] += $delta; |
87 | } else { |
88 | $this->performance[$category] = $delta; |
89 | } |
90 | if ( $wgMathDebug ) { |
91 | $this->db->insert( 'mathperformance', [ |
92 | 'math_inputhash' => $this->current->getInputHash(), |
93 | 'mathperformance_name' => substr( $category, 0, 10 ), |
94 | 'mathperformance_time' => $delta, |
95 | 'mathperformance_mode' => MathObject::MODE_2_USER_OPTION[ $this->renderingMode ] |
96 | ] ); |
97 | } |
98 | $this->time = microtime( true ); |
99 | |
100 | return (int)$delta; |
101 | } |
102 | |
103 | /** |
104 | * Populates the search index with content from all pages |
105 | * |
106 | * @param int $n |
107 | * @param int $cMax |
108 | */ |
109 | protected function populateSearchIndex( $n = 0, $cMax = -1 ) { |
110 | $s = $this->db->selectRow( 'revision', 'MAX(rev_id) AS count', '' ); |
111 | $count = $s->count; |
112 | if ( $cMax > 0 && $count > $cMax ) { |
113 | $count = $cMax; |
114 | } |
115 | $this->output( |
116 | "Rebuilding index fields for pages with revision < {$count} with option {$this->purge}...\n" |
117 | ); |
118 | $fCount = 0; |
119 | // return; |
120 | while ( $n < $count ) { |
121 | if ( $n ) { |
122 | $this->output( $n . " of $count \n" ); |
123 | } |
124 | $end = min( $n + $this->chunkSize - 1, $count ); |
125 | |
126 | # For filtering page by namespace add condition 'page_namespace = 4' |
127 | $res = $this->db->select( [ 'page', 'slots', 'content', 'text', 'revision' ], |
128 | [ 'page_id', 'page_namespace', 'page_title', 'page_latest', |
129 | 'content_address', 'old_text', 'old_flags', 'rev_id' ], |
130 | [ "rev_id BETWEEN $n AND $end" ], |
131 | __METHOD__, |
132 | [], |
133 | [ |
134 | 'slots' => [ 'INNER JOIN', [ 'slot_origin = page_latest' ] ], |
135 | 'content' => [ 'INNER JOIN', [ 'content_id = slot_content_id' ] ], |
136 | 'text' => [ 'INNER JOIN', [ 'old_id = substr(content_address,4)' ] ], |
137 | 'revision' => [ 'INNER JOIN', [ 'page_latest = rev_id' ] ] ] |
138 | ); |
139 | |
140 | $this->dbw->begin( __METHOD__ ); |
141 | $revisionStore = MediaWikiServices::getInstance()->getRevisionStore(); |
142 | // echo "before" +$this->dbw->selectField('mathindex', 'count(*)')."\n"; |
143 | foreach ( $res as $s ) { |
144 | $this->output( "\nr{$s->rev_id} namespace: {$s->page_namespace} page title: {$s->page_title}" ); |
145 | $fCount += $this->doUpdate( $s->page_id, $s->old_text, $s->page_title, $s->rev_id ); |
146 | } |
147 | // echo "before" +$this->dbw->selectField('mathindex', 'count(*)')."\n"; |
148 | $start = microtime( true ); |
149 | $this->dbw->commit( __METHOD__ ); |
150 | echo " committed in " . ( microtime( true ) - $start ) . "s\n\n"; |
151 | var_dump( $this->performance ); |
152 | // echo "after" +$this->dbw->selectField('mathindex', 'count(*)')."\n"; |
153 | $n += $this->chunkSize; |
154 | } |
155 | $this->output( "Updated {$fCount} formulae!\n" ); |
156 | } |
157 | |
158 | /** |
159 | * @param int $pid |
160 | * @param string $pText |
161 | * @param string $pTitle |
162 | * @param int $revId |
163 | * |
164 | * @return number |
165 | */ |
166 | private function doUpdate( $pid, $pText, $pTitle = "", $revId = 0 ) { |
167 | $allFormula = []; |
168 | |
169 | $notused = ''; |
170 | // MathSearchHooks::setNextID($eId); |
171 | $math = MathObject::extractMathTagsFromWikiText( $pText ); |
172 | $matches = count( $math ); |
173 | if ( $matches ) { |
174 | echo ( "\t processing $matches math fields for {$pTitle} page\n" ); |
175 | foreach ( $math as $formula ) { |
176 | $this->time = microtime( true ); |
177 | /** @var MathRenderer $renderer */ |
178 | $renderer = MediaWikiServices::getInstance()->get( 'Math.RendererFactory' ) |
179 | ->getRenderer( $formula[1], $formula[2], $this->renderingMode ); |
180 | $this->current = $renderer; |
181 | $this->time( "loadClass" ); |
182 | if ( $this->getOption( "texvccheck", false ) ) { |
183 | $checked = true; |
184 | } else { |
185 | $checked = $renderer->checkTeX(); |
186 | $this->time( "checkTex" ); |
187 | } |
188 | if ( $checked ) { |
189 | if ( !$renderer->isInDatabase() || $this->purge ) { |
190 | $renderer->render( $this->purge ); |
191 | if ( $renderer->getMathml() ) { |
192 | $this->time( "render" ); |
193 | } else { |
194 | $this->time( "Failing" ); |
195 | } |
196 | if ( $this->getOption( "SVG", false ) ) { |
197 | $svg = $renderer->getSvg(); |
198 | if ( $svg ) { |
199 | $this->time( "SVG-Rendering" ); |
200 | } else { |
201 | $this->time( "SVG-Fail" ); |
202 | } |
203 | } |
204 | } else { |
205 | $this->time( 'checkInDB' ); |
206 | } |
207 | } else { |
208 | $this->time( "checkTex-Fail" ); |
209 | echo "\nF:\t\t" . $renderer->getInputHash() . " texvccheck error:" . |
210 | $renderer->getLastError(); |
211 | continue; |
212 | } |
213 | $renderer->writeCache(); |
214 | $this->time( "write Cache" ); |
215 | if ( !$this->getOption( "hooks", false ) ) { |
216 | $hookContainer = MediaWikiServices::getInstance()->getHookContainer(); |
217 | $hookContainer->run( |
218 | 'MathFormulaPostRender', |
219 | [ |
220 | $this->getParser( $revId ), |
221 | &$renderer, |
222 | &$notused |
223 | ] |
224 | ); |
225 | $this->time( "hooks" ); |
226 | } else { |
227 | $eId = null; |
228 | MathSearchHooks::setMathId( $eId, $renderer, $revId ); |
229 | MathSearchHooks::writeMathIndex( $revId, $eId, $renderer->getInputHash(), '' ); |
230 | $this->time( "index" ); |
231 | } |
232 | if ( $renderer->getLastError() ) { |
233 | echo "\n\t\t" . $renderer->getLastError(); |
234 | echo "\nF:\t\t" . $renderer->getInputHash() . " equation " . ( $eId ) . |
235 | "-failed beginning with\n\t\t'" . substr( $formula, 0, 100 ) |
236 | . "'\n\t\tmathml:" . substr( $renderer->getMathml(), 0, 10 ) . "\n "; |
237 | } else { |
238 | if ( $this->verbose ) { |
239 | echo "\nS:\t\t" . $renderer->getInputHash(); |
240 | } |
241 | } |
242 | if ( $this->getOption( "exportmml", false ) ) { |
243 | $allFormula = $this->getMathMLForExport( $formula[1], $renderer, $allFormula ); |
244 | } |
245 | } |
246 | $mmlPath = $this->getOption( "exportmml", false ); |
247 | if ( $mmlPath ) { |
248 | $this->exportMMLtoFile( $mmlPath, $allFormula, $pTitle ); |
249 | } |
250 | |
251 | return $matches; |
252 | |
253 | } |
254 | return 0; |
255 | } |
256 | |
257 | private function getParserOptions(): ParserOptions { |
258 | if ( !$this->parserOptions ) { |
259 | $this->parserOptions = ParserOptions::newFromAnon(); |
260 | } |
261 | return $this->parserOptions; |
262 | } |
263 | |
264 | private function getParser( $revId ): Parser { |
265 | if ( !$this->parser ) { |
266 | $this->parser = MediaWikiServices::getInstance()->getParserFactory()->create(); |
267 | } |
268 | // hack to set private field mRevisionId id |
269 | $this->parser->preprocess( |
270 | '', |
271 | null, |
272 | $this->getParserOptions(), |
273 | $revId ); |
274 | return $this->parser; |
275 | } |
276 | |
277 | public function execute() { |
278 | global $wgMathValidModes; |
279 | $this->dbw = MediaWikiServices::getInstance() |
280 | ->getConnectionProvider() |
281 | ->getPrimaryDatabase(); |
282 | $this->purge = $this->getOption( "purge", false ); |
283 | $this->verbose = $this->getOption( "verbose", false ); |
284 | $this->renderingMode = $this->getOption( "mode", 'latexml' ); |
285 | $this->chunkSize = $this->getOption( 'chunk-size', $this->chunkSize ); |
286 | $this->db = MediaWikiServices::getInstance() |
287 | ->getConnectionProvider() |
288 | ->getPrimaryDatabase(); |
289 | $wgMathValidModes[] = $this->renderingMode; |
290 | $this->output( "Loaded.\n" ); |
291 | $this->time = microtime( true ); |
292 | $this->populateSearchIndex( $this->getArg( 0, 0 ), $this->getArg( 1, -1 ) ); |
293 | } |
294 | |
295 | /** |
296 | * Fetches a MathML entry for exporting formulas from renderer and forms an entry for json export. |
297 | * @param string $formula formula in tex to save |
298 | * @param MathRenderer $renderer mathrenderer object which contains mathml |
299 | * @param array $allFormula array which is filled with formula entries |
300 | * @return array modified allFormula array |
301 | */ |
302 | public function getMathMLForExport( string $formula, MathRenderer $renderer, array $allFormula ): array { |
303 | if ( $this->verbose ) { |
304 | echo "\n Fetching MML for formula: " . $formula . "\n"; |
305 | } |
306 | $mathML = $renderer->getMathml(); |
307 | if ( $this->verbose ) { |
308 | echo "\n Input-type is: " . $renderer->getInputType(); |
309 | echo "\n MathML is" . substr( $mathML, 0, 50 ); |
310 | } |
311 | $allFormula[] = [ |
312 | 'tex' => $formula, |
313 | 'type' => $renderer->getInputType(), |
314 | 'mml' => $mathML, |
315 | ]; |
316 | return $allFormula; |
317 | } |
318 | |
319 | /** |
320 | * Writes the MathML content in allFormula to a file named '<mmlPath>/mmlAllResults-<mode>-<pTitle>.json' |
321 | * @param string $mmlPath path for saving the mathml (without filename) |
322 | * @param array $allFormula all formula array with mathml for the current page |
323 | * @param string $pTitle title of page |
324 | * @return void |
325 | * @throws InvalidArgumentException when the filepath defined by cli-arg is not a correct folder |
326 | */ |
327 | public function exportMMLtoFile( string $mmlPath, array $allFormula, string $pTitle ): void { |
328 | if ( !is_dir( $mmlPath ) ) { |
329 | throw new InvalidArgumentException( "Filepath for exportmml at not valid at: " . $mmlPath ); |
330 | } |
331 | $jsonData = json_encode( $allFormula, JSON_PRETTY_PRINT ); |
332 | $fullPath = realpath( $mmlPath ) . DIRECTORY_SEPARATOR . 'mmlRes-' . $this->renderingMode . |
333 | "-" . $pTitle . ".json"; |
334 | file_put_contents( $fullPath, $jsonData ); |
335 | } |
336 | } |
337 | |
338 | $maintClass = UpdateMath::class; |
339 | /** @noinspection PhpIncludeInspection */ |
340 | require_once RUN_MAINTENANCE_IF_MAIN; |