Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
0.00% |
0 / 88 |
|
0.00% |
0 / 4 |
CRAP | |
0.00% |
0 / 1 |
ExtractFeatures | |
0.00% |
0 / 85 |
|
0.00% |
0 / 4 |
132 | |
0.00% |
0 / 1 |
__construct | |
0.00% |
0 / 10 |
|
0.00% |
0 / 1 |
2 | |||
populateSearchIndex | |
0.00% |
0 / 55 |
|
0.00% |
0 / 1 |
42 | |||
doUpdate | |
0.00% |
0 / 11 |
|
0.00% |
0 / 1 |
12 | |||
execute | |
0.00% |
0 / 9 |
|
0.00% |
0 / 1 |
2 |
1 | <?php |
2 | /** |
3 | * This program is free software; you can redistribute it and/or modify |
4 | * it under the terms of the GNU General Public License as published by |
5 | * the Free Software Foundation; either version 2 of the License, or |
6 | * (at your option) any later version. |
7 | * |
8 | * This program is distributed in the hope that it will be useful, |
9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
11 | * GNU General Public License for more details. |
12 | * |
13 | * You should have received a copy of the GNU General Public License along |
14 | * with this program; if not, write to the Free Software Foundation, Inc., |
15 | * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. |
16 | * http://www.gnu.org/copyleft/gpl.html |
17 | * |
18 | * @ingroup Maintenance |
19 | */ |
20 | |
21 | use MediaWiki\MediaWikiServices; |
22 | |
23 | require_once __DIR__ . '/../../../maintenance/Maintenance.php'; |
24 | |
25 | class ExtractFeatures extends Maintenance { |
26 | |
27 | private const RTI_CHUNK_SIZE = 100; |
28 | |
29 | /** @var bool */ |
30 | private $purge = false; |
31 | /** @var \Wikimedia\Rdbms\IDatabase */ |
32 | private $dbw = null; |
33 | |
34 | /** |
35 | * @var \Wikimedia\Rdbms\IDatabase |
36 | */ |
37 | private $db; |
38 | |
39 | public function __construct() { |
40 | parent::__construct(); |
41 | $this->addDescription( 'Outputs page text to stdout' ); |
42 | $this->addOption( 'purge', |
43 | 'If set all formulae are rendered again from strech. (Very time consuming!)', false, |
44 | false, 'f' ); |
45 | $this->addArg( 'min', 'If set processing is started at the page with rank(pageID)>min', |
46 | false ); |
47 | $this->addArg( 'max', 'If set processing is stopped at the page with rank(pageID)<=max', |
48 | false ); |
49 | $this->requireExtension( 'MathSearch' ); |
50 | } |
51 | |
52 | /** |
53 | * Populates the search index with content from all pages |
54 | * |
55 | * @param int $n |
56 | * @param int $cmax |
57 | */ |
58 | protected function populateSearchIndex( $n = 0, $cmax = -1 ) { |
59 | $s = $this->db->selectRow( 'page', 'MAX(page_id) AS count' ); |
60 | $count = $s->count; |
61 | if ( $cmax > 0 && $count > $cmax ) { |
62 | $count = $cmax; |
63 | } |
64 | $this->output( "Rebuilding index fields for {$count} pages with option {$this->purge}...\n" ); |
65 | $fcount = 0; |
66 | $revisionStore = MediaWikiServices::getInstance()->getRevisionStore(); |
67 | |
68 | while ( $n < $count ) { |
69 | if ( $n ) { |
70 | $this->output( $n . " of $count \n" ); |
71 | } |
72 | $end = $n + self::RTI_CHUNK_SIZE - 1; |
73 | |
74 | $res = |
75 | $this->db->select( [ 'page', 'revision', 'text' ], |
76 | [ 'page_id', 'page_namespace', 'page_title', 'old_flags', 'old_text' ], |
77 | [ |
78 | "page_id BETWEEN $n AND $end", |
79 | 'page_latest = rev_id', |
80 | 'rev_text_id = old_id' |
81 | ], __METHOD__ ); |
82 | $this->dbw->begin( __METHOD__ ); |
83 | // echo "before" +$this->dbw->selectField('mathindex', 'count(*)')."\n"; |
84 | foreach ( $res as $s ) { |
85 | $revtext = $revisionStore->newRevisionFromRow( $s ); |
86 | $fcount += self::doUpdate( $revtext, $s->page_title, $this->purge, |
87 | $this->dbw ); |
88 | } |
89 | // echo "before" +$this->dbw->selectField('mathindex', 'count(*)')."\n"; |
90 | $start = microtime( true ); |
91 | $this->dbw->commit( __METHOD__ ); |
92 | echo " committed in " . ( microtime( true ) - $start ) . "s\n\n"; |
93 | // echo "after" +$this->dbw->selectField('mathindex', 'count(*)')."\n"; |
94 | $n += self::RTI_CHUNK_SIZE; |
95 | } |
96 | $this->output( "Clear mathvarstat\n" ); |
97 | $sql = 'DELETE FROM `mathvarstat`'; |
98 | $this->dbw->query( $sql ); |
99 | $this->output( "Generate mathvarstat\n" ); |
100 | $sql = |
101 | 'INSERT INTO `mathvarstat` (`varstat_featurename` , `varstat_featuretype`, `varstat_featurecount`)' . |
102 | 'SELECT `mathobservation_featurename` , `mathobservation_featuretype` , count( * ) AS CNT ' . |
103 | 'FROM `mathobservation` ' . |
104 | 'JOIN mathindex ON `mathobservation_inputhash` = mathindex_inputhash ' . |
105 | 'GROUP BY `mathobservation_featurename` , `mathobservation_featuretype` ' . |
106 | 'ORDER BY CNT DESC'; |
107 | $this->dbw->query( $sql ); |
108 | $this->output( "Clear mathrevisionstat\n" ); |
109 | $sql = 'DELETE FROM `mathrevisionstat`'; |
110 | $this->dbw->query( $sql ); |
111 | $this->output( "Generate mathrevisionstat\n" ); |
112 | $sql = |
113 | 'INSERT INTO `mathrevisionstat`(`revstat_featureid`,`revstat_revid`,`revstat_featurecount`) ' . |
114 | 'SELECT varstat_id, mathindex_revision_id, count(*) AS CNT FROM `mathobservation` ' |
115 | . 'JOIN mathindex ON `mathobservation_inputhash` =mathindex_inputhash ' . |
116 | 'JOIN mathvarstat ON varstat_featurename = `mathobservation_featurename` ' |
117 | . 'AND varstat_featuretype = `mathobservation_featuretype` ' . |
118 | ' GROUP BY `mathobservation_featurename`, `mathobservation_featuretype`, ' |
119 | . 'mathindex_revision_id ORDER BY CNT DESC'; |
120 | $this->dbw->query( $sql ); |
121 | $this->output( "Updated {$fcount} formulae!\n" ); |
122 | } |
123 | |
124 | /** |
125 | * @param string $pText |
126 | * @param string $pTitle |
127 | * @param bool|string $purge |
128 | * @param \Wikimedia\Rdbms\IDatabase $dbw |
129 | * |
130 | * @return number |
131 | */ |
132 | private static function doUpdate( $pText, $pTitle, $purge, $dbw ) { |
133 | // TODO: fix link id problem |
134 | $anchorID = 0; |
135 | $math = MathObject::extractMathTagsFromWikiText( $pText ); |
136 | $matches = count( $math ); |
137 | if ( $matches ) { |
138 | echo ( "\t processing $matches math fields for {$pTitle} page\n" ); |
139 | foreach ( $math as $formula ) { |
140 | $mo = new MathObject( $formula[1] ); |
141 | $mo->updateObservations( $dbw ); |
142 | // Enable indexing of math formula |
143 | $anchorID++; |
144 | } |
145 | return $matches; |
146 | } |
147 | return 0; |
148 | } |
149 | |
150 | public function execute() { |
151 | $this->dbw = MediaWikiServices::getInstance() |
152 | ->getConnectionProvider() |
153 | ->getPrimaryDatabase(); |
154 | $this->purge = $this->getOption( 'purge', false ); |
155 | $this->db = MediaWikiServices::getInstance() |
156 | ->getConnectionProvider() |
157 | ->getPrimaryDatabase(); |
158 | $this->output( "Done.\n" ); |
159 | $this->populateSearchIndex( $this->getArg( 0, 0 ), $this->getArg( 1, -1 ) ); |
160 | } |
161 | } |
162 | |
163 | $maintClass = ExtractFeatures::class; |
164 | /** @noinspection PhpIncludeInspection */ |
165 | require_once RUN_MAINTENANCE_IF_MAIN; |