Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
0.00% |
0 / 183 |
|
0.00% |
0 / 9 |
CRAP | |
0.00% |
0 / 1 |
UpdateMath | |
0.00% |
0 / 178 |
|
0.00% |
0 / 9 |
1190 | |
0.00% |
0 / 1 |
__construct | |
0.00% |
0 / 22 |
|
0.00% |
0 / 1 |
2 | |||
time | |
0.00% |
0 / 13 |
|
0.00% |
0 / 1 |
6 | |||
populateSearchIndex | |
0.00% |
0 / 36 |
|
0.00% |
0 / 1 |
42 | |||
doUpdate | |
0.00% |
0 / 63 |
|
0.00% |
0 / 1 |
240 | |||
getParserOptions | |
0.00% |
0 / 3 |
|
0.00% |
0 / 1 |
6 | |||
getParser | |
0.00% |
0 / 8 |
|
0.00% |
0 / 1 |
6 | |||
execute | |
0.00% |
0 / 15 |
|
0.00% |
0 / 1 |
2 | |||
getMathMLForExport | |
0.00% |
0 / 12 |
|
0.00% |
0 / 1 |
12 | |||
exportMMLtoFile | |
0.00% |
0 / 6 |
|
0.00% |
0 / 1 |
6 |
1 | #!/usr/bin/env php |
2 | <?php |
3 | /** |
4 | * This program is free software; you can redistribute it and/or modify |
5 | * it under the terms of the GNU General Public License as published by |
6 | * the Free Software Foundation; either version 2 of the License, or |
7 | * (at your option) any later version. |
8 | * |
9 | * This program is distributed in the hope that it will be useful, |
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
12 | * GNU General Public License for more details. |
13 | * |
14 | * You should have received a copy of the GNU General Public License along |
15 | * with this program; if not, write to the Free Software Foundation, Inc., |
16 | * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. |
17 | * http://www.gnu.org/copyleft/gpl.html |
18 | * |
19 | * @ingroup Maintenance |
20 | */ |
21 | |
22 | use MediaWiki\Extension\Math\MathRenderer; |
23 | use MediaWiki\Parser\Parser; |
24 | |
25 | require_once __DIR__ . '/../../../maintenance/Maintenance.php'; |
26 | |
27 | class UpdateMath extends Maintenance { |
28 | |
29 | /** @var bool */ |
30 | private $purge = false; |
31 | /** @var bool */ |
32 | private $verbose; |
33 | /** @var \Wikimedia\Rdbms\IDatabase */ |
34 | private $dbw; |
35 | /** @var \Wikimedia\Rdbms\IDatabase */ |
36 | private $db; |
37 | /** @var MathRenderer */ |
38 | private $current; |
39 | /** @var float */ |
40 | private $time = 0.0; // microtime( true ); |
41 | /** @var float[] */ |
42 | private $performance = []; |
43 | /** @var string */ |
44 | private $renderingMode = 'latexml'; |
45 | /** @var int */ |
46 | private $chunkSize = 1000; |
47 | /** @var Parser */ |
48 | private $parser; |
49 | /** @var ParserOptions */ |
50 | private $parserOptions; |
51 | |
52 | public function __construct() { |
53 | parent::__construct(); |
54 | $this->addDescription( 'Updates the index of Mathematical formulae.' ); |
55 | $this->addOption( 'purge', |
56 | "If set all formulae are rendered again without using caches. (Very time consuming!)", |
57 | false, false, "f" ); |
58 | $this->addArg( 'min', "If set processing is started at the page with rank(pageID)>min", |
59 | false ); |
60 | $this->addArg( 'max', "If set processing is stopped at the page with rank(pageID)<=max", |
61 | false ); |
62 | $this->addOption( 'verbose', "If set output for successful rendering will produced", false, |
63 | false, 'v' ); |
64 | $this->addOption( 'SVG', "If set SVG images will be produced", false, false ); |
65 | $this->addOption( 'hooks', "If set hooks will be skipped, but index will be updated.", |
66 | false, false ); |
67 | $this->addOption( 'texvccheck', "If set texvccheck will be skipped", false, false ); |
68 | $this->addOption( 'mode', 'Rendering mode to be used (mathml, latexml)', false, true, |
69 | 'm' ); |
70 | $this->addOption( 'exportmml', 'export LaTeX and generated MathML to the specified file', false, true, |
71 | 'e' ); |
72 | $this->addOption( 'chunk-size', |
73 | 'Determines how many pages are updated in one database transaction.', false, true ); |
74 | $this->requireExtension( 'MathSearch' ); |
75 | } |
76 | |
77 | /** |
78 | * Measures time in ms. |
79 | * In order to have a formula centric evaluation, we can not just the build in profiler |
80 | * @param string $category |
81 | * |
82 | * @return int |
83 | */ |
84 | private function time( $category = 'default' ) { |
85 | global $wgMathDebug; |
86 | $delta = ( microtime( true ) - $this->time ) * 1000; |
87 | $this->performance[$category] ??= 0; |
88 | $this->performance[$category] += $delta; |
89 | if ( $wgMathDebug ) { |
90 | $this->db->insert( 'mathperformance', [ |
91 | 'math_inputhash' => $this->current->getInputHash(), |
92 | 'mathperformance_name' => substr( $category, 0, 10 ), |
93 | 'mathperformance_time' => $delta, |
94 | 'mathperformance_mode' => MathObject::MODE_2_USER_OPTION[ $this->renderingMode ] |
95 | ] ); |
96 | } |
97 | $this->time = microtime( true ); |
98 | |
99 | return (int)$delta; |
100 | } |
101 | |
102 | /** |
103 | * Populates the search index with content from all pages |
104 | * |
105 | * @param int $n |
106 | * @param int $cMax |
107 | */ |
108 | protected function populateSearchIndex( $n = 0, $cMax = -1 ) { |
109 | $s = $this->db->selectRow( 'revision', 'MAX(rev_id) AS count', '' ); |
110 | $count = $s->count; |
111 | if ( $cMax > 0 && $count > $cMax ) { |
112 | $count = $cMax; |
113 | } |
114 | $this->output( |
115 | "Rebuilding index fields for pages with revision < {$count} with option {$this->purge}...\n" |
116 | ); |
117 | $fCount = 0; |
118 | // return; |
119 | while ( $n < $count ) { |
120 | if ( $n ) { |
121 | $this->output( $n . " of $count \n" ); |
122 | } |
123 | $end = min( $n + $this->chunkSize - 1, $count ); |
124 | |
125 | # For filtering page by namespace add condition 'page_namespace = 4' |
126 | $res = $this->db->select( [ 'page', 'slots', 'content', 'text', 'revision' ], |
127 | [ 'page_id', 'page_namespace', 'page_title', 'page_latest', |
128 | 'content_address', 'old_text', 'old_flags', 'rev_id' ], |
129 | [ "rev_id BETWEEN $n AND $end" ], |
130 | __METHOD__, |
131 | [], |
132 | [ |
133 | 'slots' => [ 'INNER JOIN', [ 'slot_origin = page_latest' ] ], |
134 | 'content' => [ 'INNER JOIN', [ 'content_id = slot_content_id' ] ], |
135 | 'text' => [ 'INNER JOIN', [ 'old_id = substr(content_address,4)' ] ], |
136 | 'revision' => [ 'INNER JOIN', [ 'page_latest = rev_id' ] ] ] |
137 | ); |
138 | |
139 | $this->beginTransaction( $this->dbw, __METHOD__ ); |
140 | $revisionStore = $this->getServiceContainer()->getRevisionStore(); |
141 | // echo "before" +$this->dbw->selectField('mathindex', 'count(*)')."\n"; |
142 | foreach ( $res as $s ) { |
143 | $this->output( "\nr{$s->rev_id} namespace: {$s->page_namespace} page title: {$s->page_title}" ); |
144 | $fCount += $this->doUpdate( $s->page_id, $s->old_text, $s->page_title, $s->rev_id ); |
145 | } |
146 | // echo "before" +$this->dbw->selectField('mathindex', 'count(*)')."\n"; |
147 | $start = microtime( true ); |
148 | $this->commitTransaction( $this->dbw, __METHOD__ ); |
149 | echo " committed in " . ( microtime( true ) - $start ) . "s\n\n"; |
150 | var_dump( $this->performance ); |
151 | // echo "after" +$this->dbw->selectField('mathindex', 'count(*)')."\n"; |
152 | $n += $this->chunkSize; |
153 | } |
154 | $this->output( "Updated {$fCount} formulae!\n" ); |
155 | } |
156 | |
157 | /** |
158 | * @param int $pid |
159 | * @param string $pText |
160 | * @param string $pTitle |
161 | * @param int $revId |
162 | * |
163 | * @return number |
164 | */ |
165 | private function doUpdate( $pid, $pText, $pTitle = "", $revId = 0 ) { |
166 | $allFormula = []; |
167 | |
168 | $notused = ''; |
169 | // MathSearchHooks::setNextID($eId); |
170 | $math = MathObject::extractMathTagsFromWikiText( $pText ); |
171 | $matches = count( $math ); |
172 | if ( $matches ) { |
173 | echo ( "\t processing $matches math fields for {$pTitle} page\n" ); |
174 | foreach ( $math as $formula ) { |
175 | $this->time = microtime( true ); |
176 | /** @var MathRenderer $renderer */ |
177 | $renderer = $this->getServiceContainer()->get( 'Math.RendererFactory' ) |
178 | ->getRenderer( $formula[1], $formula[2], $this->renderingMode ); |
179 | $this->current = $renderer; |
180 | $this->time( "loadClass" ); |
181 | if ( $this->getOption( "texvccheck", false ) ) { |
182 | $checked = true; |
183 | } else { |
184 | $checked = $renderer->checkTeX(); |
185 | $this->time( "checkTex" ); |
186 | } |
187 | if ( $checked ) { |
188 | if ( !$renderer->isInDatabase() || $this->purge ) { |
189 | $renderer->render( $this->purge ); |
190 | if ( $renderer->getMathml() ) { |
191 | $this->time( "render" ); |
192 | } else { |
193 | $this->time( "Failing" ); |
194 | } |
195 | if ( $this->getOption( "SVG", false ) ) { |
196 | $svg = $renderer->getSvg(); |
197 | if ( $svg ) { |
198 | $this->time( "SVG-Rendering" ); |
199 | } else { |
200 | $this->time( "SVG-Fail" ); |
201 | } |
202 | } |
203 | } else { |
204 | $this->time( 'checkInDB' ); |
205 | } |
206 | } else { |
207 | $this->time( "checkTex-Fail" ); |
208 | echo "\nF:\t\t" . $renderer->getInputHash() . " texvccheck error:" . |
209 | $renderer->getLastError(); |
210 | continue; |
211 | } |
212 | $renderer->writeCache(); |
213 | $this->time( "write Cache" ); |
214 | if ( !$this->getOption( "hooks", false ) ) { |
215 | $hookContainer = $this->getServiceContainer()->getHookContainer(); |
216 | $hookContainer->run( |
217 | 'MathFormulaPostRender', |
218 | [ |
219 | $this->getParser( $revId ), |
220 | &$renderer, |
221 | &$notused |
222 | ] |
223 | ); |
224 | $this->time( "hooks" ); |
225 | } else { |
226 | $eId = null; |
227 | MathSearchHooks::setMathId( $eId, $renderer, $revId ); |
228 | MathSearchHooks::writeMathIndex( $revId, $eId, $renderer->getInputHash(), '' ); |
229 | $this->time( "index" ); |
230 | } |
231 | if ( $renderer->getLastError() ) { |
232 | echo "\n\t\t" . $renderer->getLastError(); |
233 | echo "\nF:\t\t" . $renderer->getInputHash() . " equation " . ( $eId ) . |
234 | "-failed beginning with\n\t\t'" . substr( $formula, 0, 100 ) |
235 | . "'\n\t\tmathml:" . substr( $renderer->getMathml(), 0, 10 ) . "\n "; |
236 | } else { |
237 | if ( $this->verbose ) { |
238 | echo "\nS:\t\t" . $renderer->getInputHash(); |
239 | } |
240 | } |
241 | if ( $this->getOption( "exportmml", false ) ) { |
242 | $allFormula = $this->getMathMLForExport( $formula[1], $renderer, $allFormula ); |
243 | } |
244 | } |
245 | $mmlPath = $this->getOption( "exportmml", false ); |
246 | if ( $mmlPath ) { |
247 | $this->exportMMLtoFile( $mmlPath, $allFormula, $pTitle ); |
248 | } |
249 | |
250 | return $matches; |
251 | |
252 | } |
253 | return 0; |
254 | } |
255 | |
256 | private function getParserOptions(): ParserOptions { |
257 | if ( !$this->parserOptions ) { |
258 | $this->parserOptions = ParserOptions::newFromAnon(); |
259 | } |
260 | return $this->parserOptions; |
261 | } |
262 | |
263 | private function getParser( $revId ): Parser { |
264 | if ( !$this->parser ) { |
265 | $this->parser = $this->getServiceContainer()->getParserFactory()->create(); |
266 | } |
267 | // hack to set private field mRevisionId id |
268 | $this->parser->preprocess( |
269 | '', |
270 | null, |
271 | $this->getParserOptions(), |
272 | $revId ); |
273 | return $this->parser; |
274 | } |
275 | |
276 | public function execute() { |
277 | global $wgMathValidModes; |
278 | $this->dbw = $this->getServiceContainer() |
279 | ->getConnectionProvider() |
280 | ->getPrimaryDatabase(); |
281 | $this->purge = $this->getOption( "purge", false ); |
282 | $this->verbose = $this->getOption( "verbose", false ); |
283 | $this->renderingMode = $this->getOption( "mode", 'latexml' ); |
284 | $this->chunkSize = $this->getOption( 'chunk-size', $this->chunkSize ); |
285 | $this->db = $this->getServiceContainer() |
286 | ->getConnectionProvider() |
287 | ->getPrimaryDatabase(); |
288 | $wgMathValidModes[] = $this->renderingMode; |
289 | $this->output( "Loaded.\n" ); |
290 | $this->time = microtime( true ); |
291 | $this->populateSearchIndex( $this->getArg( 0, 0 ), $this->getArg( 1, -1 ) ); |
292 | } |
293 | |
294 | /** |
295 | * Fetches a MathML entry for exporting formulas from renderer and forms an entry for json export. |
296 | * @param string $formula formula in tex to save |
297 | * @param MathRenderer $renderer mathrenderer object which contains mathml |
298 | * @param array $allFormula array which is filled with formula entries |
299 | * @return array modified allFormula array |
300 | */ |
301 | public function getMathMLForExport( string $formula, MathRenderer $renderer, array $allFormula ): array { |
302 | if ( $this->verbose ) { |
303 | echo "\n Fetching MML for formula: " . $formula . "\n"; |
304 | } |
305 | $mathML = $renderer->getMathml(); |
306 | if ( $this->verbose ) { |
307 | echo "\n Input-type is: " . $renderer->getInputType(); |
308 | echo "\n MathML is" . substr( $mathML, 0, 50 ); |
309 | } |
310 | $allFormula[] = [ |
311 | 'tex' => $formula, |
312 | 'type' => $renderer->getInputType(), |
313 | 'mml' => $mathML, |
314 | ]; |
315 | return $allFormula; |
316 | } |
317 | |
318 | /** |
319 | * Writes the MathML content in allFormula to a file named '<mmlPath>/mmlAllResults-<mode>-<pTitle>.json' |
320 | * @param string $mmlPath path for saving the mathml (without filename) |
321 | * @param array $allFormula all formula array with mathml for the current page |
322 | * @param string $pTitle title of page |
323 | * @return void |
324 | * @throws InvalidArgumentException when the filepath defined by cli-arg is not a correct folder |
325 | */ |
326 | public function exportMMLtoFile( string $mmlPath, array $allFormula, string $pTitle ): void { |
327 | if ( !is_dir( $mmlPath ) ) { |
328 | throw new InvalidArgumentException( "Filepath for exportmml at not valid at: " . $mmlPath ); |
329 | } |
330 | $jsonData = json_encode( $allFormula, JSON_PRETTY_PRINT ); |
331 | $fullPath = realpath( $mmlPath ) . DIRECTORY_SEPARATOR . 'mmlRes-' . $this->renderingMode . |
332 | "-" . $pTitle . ".json"; |
333 | file_put_contents( $fullPath, $jsonData ); |
334 | } |
335 | } |
336 | |
337 | $maintClass = UpdateMath::class; |
338 | /** @noinspection PhpIncludeInspection */ |
339 | require_once RUN_MAINTENANCE_IF_MAIN; |