Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
0.00% |
0 / 59 |
|
0.00% |
0 / 3 |
CRAP | |
0.00% |
0 / 1 |
CalculateDistances | |
0.00% |
0 / 56 |
|
0.00% |
0 / 3 |
110 | |
0.00% |
0 / 1 |
__construct | |
0.00% |
0 / 7 |
|
0.00% |
0 / 1 |
2 | |||
execute | |
0.00% |
0 / 24 |
|
0.00% |
0 / 1 |
20 | |||
populateSearchIndex | |
0.00% |
0 / 25 |
|
0.00% |
0 / 1 |
30 |
1 | <?php |
2 | /** |
3 | * This program is free software; you can redistribute it and/or modify |
4 | * it under the terms of the GNU General Public License as published by |
5 | * the Free Software Foundation; either version 2 of the License, or |
6 | * (at your option) any later version. |
7 | * |
8 | * This program is distributed in the hope that it will be useful, |
9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
11 | * GNU General Public License for more details. |
12 | * |
13 | * You should have received a copy of the GNU General Public License along |
14 | * with this program; if not, write to the Free Software Foundation, Inc., |
15 | * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. |
16 | * http://www.gnu.org/copyleft/gpl.html |
17 | * |
18 | * @ingroup Maintenance |
19 | */ |
20 | |
21 | use MediaWiki\MediaWikiServices; |
22 | |
23 | require_once __DIR__ . '/../../../maintenance/Maintenance.php'; |
24 | |
25 | class CalculateDistances extends Maintenance { |
26 | |
27 | private const RTI_CHUNK_SIZE = 100; |
28 | |
29 | /** @var \Wikimedia\Rdbms\IDatabase */ |
30 | private $dbw = null; |
31 | |
32 | /** |
33 | * @var \Wikimedia\Rdbms\IDatabase |
34 | */ |
35 | private $db; |
36 | /** @var int[] List of revision ids */ |
37 | private $pagelist = []; |
38 | |
39 | public function __construct() { |
40 | parent::__construct(); |
41 | $this->addDescription( 'Outputs page text to stdout' ); |
42 | $this->addOption( 'page9', 'Ignore pages with only 9 equations or less.', false, false, |
43 | '9' ); |
44 | $this->addArg( 'min', 'If set processing is started at the page with curid>min', false ); |
45 | $this->addArg( 'max', 'If set processing is stopped at the page with curid<=max', false ); |
46 | $this->requireExtension( 'MathSearch' ); |
47 | } |
48 | |
49 | public function execute() { |
50 | $this->dbw = MediaWikiServices::getInstance() |
51 | ->getConnectionProvider() |
52 | ->getPrimaryDatabase(); |
53 | $this->db = MediaWikiServices::getInstance() |
54 | ->getConnectionProvider() |
55 | ->getPrimaryDatabase(); |
56 | $this->pagelist = []; |
57 | $min = $this->getArg( 0, 0 ); |
58 | $max = $this->getArg( 1, PHP_INT_MAX ); |
59 | $conds = "revstat_revid >= $min"; |
60 | if ( $max < PHP_INT_MAX ) { |
61 | $conds .= " AND revstat_revid <= $max"; |
62 | } |
63 | if ( $this->getOption( 'page9', false ) ) { |
64 | // TODO: Can use selectField() |
65 | $res = |
66 | $this->db->select( [ 'mathpage9', 'mathrevisionstat' ], |
67 | 'revstat_revid', |
68 | // FIXME: Should be an array, not a string |
69 | $conds . ' AND revstat_revid = page_id', __METHOD__, [ 'DISTINCT' ] ); |
70 | } else { |
71 | // TODO: Can use selectField() |
72 | $res = |
73 | $this->db->select( 'mathrevisionstat', 'revstat_revid', $conds, __METHOD__, |
74 | [ 'DISTINCT' ] ); |
75 | } |
76 | foreach ( $res as $row ) { |
77 | $this->pagelist[] = $row->revstat_revid; |
78 | } |
79 | $this->populateSearchIndex(); |
80 | $this->output( "Done.\n" ); |
81 | } |
82 | |
83 | /** |
84 | * Populates the search index with content from all pages |
85 | */ |
86 | protected function populateSearchIndex() { |
87 | $n = 0; |
88 | $count = count( $this->pagelist ); |
89 | $this->output( "Rebuilding index fields for $count pages...\n" ); |
90 | while ( $n < $count ) { |
91 | if ( $n ) { |
92 | $this->output( $n . " of $count \n" ); |
93 | } |
94 | $this->dbw->begin( __METHOD__ ); |
95 | for ( $j = 0; $j < self::RTI_CHUNK_SIZE; $j++ ) { |
96 | // TODO: USE PREPARED STATEMENTS |
97 | $pid = $this->pagelist[$n]; |
98 | $sql = |
99 | "INSERT IGNORE INTO mathpagesimilarity(pagesimilarity_A,pagesimilarity_B,pagesimilarity_Value) " . |
100 | "SELECT DISTINCT $pid,`revstat_revid`, " . |
101 | "CosProd( $pid,`revstat_revid`) FROM `mathrevisionstat` m "; |
102 | if ( $this->getOption( 'page9', false ) ) { |
103 | $sql .= " JOIN (SELECT page_id from mathpage9) as r WHERE m.revstat_revid=r.page_id AND "; |
104 | } else { |
105 | $sql .= " WHERE "; |
106 | } |
107 | $sql .= "m.revstat_revid < $pid "; |
108 | echo "writing entries for page $pid..."; |
109 | $start = microtime( true ); |
110 | $this->dbw->query( $sql ); |
111 | echo 'done in ' . ( microtime( true ) - $start ) . "\n"; |
112 | $n++; |
113 | } |
114 | $start = microtime( true ); |
115 | $this->dbw->commit( __METHOD__ ); |
116 | echo ' committed in ' . ( microtime( true ) - $start ) . "s\n\n"; |
117 | } |
118 | } |
119 | } |
120 | |
121 | $maintClass = CalculateDistances::class; |
122 | /** @noinspection PhpIncludeInspection */ |
123 | require_once RUN_MAINTENANCE_IF_MAIN; |