Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
0.00% |
0 / 57 |
|
0.00% |
0 / 5 |
CRAP | |
0.00% |
0 / 1 |
IndexBase | |
0.00% |
0 / 56 |
|
0.00% |
0 / 5 |
56 | |
0.00% |
0 / 1 |
__construct | |
0.00% |
0 / 11 |
|
0.00% |
0 / 1 |
2 | |||
generateIndexString | n/a |
0 / 0 |
n/a |
0 / 0 |
0 | |||||
wFile | |
0.00% |
0 / 12 |
|
0.00% |
0 / 1 |
6 | |||
execute | |
0.00% |
0 / 31 |
|
0.00% |
0 / 1 |
6 | |||
getHead | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
getFooter | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 |
1 | <?php |
2 | /** |
3 | * Generates harvest files for the MathWebSearch Daemon. |
4 | * Example: php CreateMathIndex.php ~/mws_harvest_files |
5 | * |
6 | * This program is free software; you can redistribute it and/or modify |
7 | * it under the terms of the GNU General Public License as published by |
8 | * the Free Software Foundation; either version 2 of the License, or |
9 | * (at your option) any later version. |
10 | * |
11 | * This program is distributed in the hope that it will be useful, |
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
14 | * GNU General Public License for more details. |
15 | * |
16 | * You should have received a copy of the GNU General Public License along |
17 | * with this program; if not, write to the Free Software Foundation, Inc., |
18 | * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. |
19 | * http://www.gnu.org/copyleft/gpl.html |
20 | * |
21 | * @ingroup Maintenance |
22 | */ |
23 | |
24 | require_once __DIR__ . '/../../../maintenance/Maintenance.php'; |
25 | |
26 | use MediaWiki\MediaWikiServices; |
27 | use Wikimedia\Rdbms\IResultWrapper; |
28 | |
29 | /** |
30 | * @author Moritz Schubotz |
31 | */ |
32 | abstract class IndexBase extends Maintenance { |
33 | /** @var IResultWrapper */ |
34 | protected $res; |
35 | |
36 | public function __construct() { |
37 | parent::__construct(); |
38 | $this->addDescription( 'Exports data' ); |
39 | $this->addArg( 'dir', 'The directory where the harvest files go to.' ); |
40 | $this->addArg( 'ffmax', 'The maximal number of formula per file.', false ); |
41 | $this->addArg( 'min', 'If set processing is started at the page with rank(pageID)>min', |
42 | false ); |
43 | $this->addArg( 'max', 'If set processing is stopped at the page with rank(pageID)<=max', |
44 | false ); |
45 | $this->addOption( 'limit', 'The maximal number of database entries to be considered', false, |
46 | true, "L" ); |
47 | $this->requireExtension( 'MathSearch' ); |
48 | } |
49 | |
50 | /** |
51 | * @param stdClass $row |
52 | * |
53 | * @return string |
54 | */ |
55 | abstract protected function generateIndexString( $row ); |
56 | |
57 | /** |
58 | * @param string $fn |
59 | * @param int $min |
60 | * @param int $inc |
61 | * |
62 | * @return bool |
63 | */ |
64 | protected function wFile( $fn, $min, $inc ) { |
65 | $out = $this->getHead(); |
66 | $max = min( $min + $inc, $this->res->numRows() ); |
67 | for ( $i = $min; $i < $max; $i++ ) { |
68 | $this->res->seek( $i ); |
69 | $out .= $this->generateIndexString( $this->res->fetchObject() ); |
70 | restore_error_handler(); |
71 | } |
72 | $out .= "\n" . $this->getFooter(); |
73 | $fh = fopen( $fn, 'w' ); |
74 | // echo $out; |
75 | // die ("test"); |
76 | fwrite( $fh, $out ); |
77 | fclose( $fh ); |
78 | echo "written file $fn with entries($min ... $max)\n"; |
79 | |
80 | return $max < $this->res->numRows() - 1; |
81 | } |
82 | |
83 | public function execute() { |
84 | libxml_use_internal_errors( true ); |
85 | $i = 0; |
86 | $inc = $this->getArg( 1, 100 ); |
87 | $db = MediaWikiServices::getInstance() |
88 | ->getConnectionProvider() |
89 | ->getReplicaDatabase(); |
90 | echo "getting list of all equations from the database\n"; |
91 | $this->res = |
92 | $db->select( [ 'mathindex', 'mathlog' ], [ |
93 | 'mathindex_revision_id', |
94 | 'mathindex_anchor', |
95 | 'math_mathml', |
96 | 'math_inputhash', |
97 | 'mathindex_inputhash' |
98 | ], [ |
99 | 'math_inputhash = mathindex_inputhash', |
100 | 'mathindex_revision_id >= ' . $this->getArg( 2, 0 ), |
101 | 'mathindex_revision_id <= ' . $this->getArg( 3, PHP_INT_MAX ) |
102 | ], __METHOD__, [ |
103 | 'LIMIT' => $this->getOption( 'limit', PHP_INT_MAX ), |
104 | 'ORDER BY' => 'mathindex_revision_id' |
105 | ] ); |
106 | echo "write " . $this->res->numRows() . " results to index\n"; |
107 | $dir = $this->getArg( 0 ); |
108 | if ( !file_exists( $dir ) ) { |
109 | mkdir( $dir, '0755', true ); |
110 | } |
111 | do { |
112 | $fn = $dir . '/math' . sprintf( '%012d', $i ) . '.xml'; |
113 | $res = $this->wFile( $fn, $i, $inc ); |
114 | $i += $inc; |
115 | } while ( $res ); |
116 | echo ( 'done' ); |
117 | } |
118 | |
119 | /** |
120 | * @return string |
121 | */ |
122 | protected function getHead() { |
123 | return ''; |
124 | } |
125 | |
126 | /** |
127 | * @return string |
128 | */ |
129 | protected function getFooter() { |
130 | return ''; |
131 | } |
132 | } |