Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
0.00% |
0 / 56 |
|
0.00% |
0 / 3 |
CRAP | |
0.00% |
0 / 1 |
CalculateDistances | |
0.00% |
0 / 53 |
|
0.00% |
0 / 3 |
110 | |
0.00% |
0 / 1 |
__construct | |
0.00% |
0 / 7 |
|
0.00% |
0 / 1 |
2 | |||
execute | |
0.00% |
0 / 21 |
|
0.00% |
0 / 1 |
20 | |||
populateSearchIndex | |
0.00% |
0 / 25 |
|
0.00% |
0 / 1 |
30 |
1 | <?php |
2 | /** |
3 | * This program is free software; you can redistribute it and/or modify |
4 | * it under the terms of the GNU General Public License as published by |
5 | * the Free Software Foundation; either version 2 of the License, or |
6 | * (at your option) any later version. |
7 | * |
8 | * This program is distributed in the hope that it will be useful, |
9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
11 | * GNU General Public License for more details. |
12 | * |
13 | * You should have received a copy of the GNU General Public License along |
14 | * with this program; if not, write to the Free Software Foundation, Inc., |
15 | * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. |
16 | * http://www.gnu.org/copyleft/gpl.html |
17 | * |
18 | * @ingroup Maintenance |
19 | */ |
20 | |
21 | require_once __DIR__ . '/../../../maintenance/Maintenance.php'; |
22 | |
23 | class CalculateDistances extends Maintenance { |
24 | |
25 | private const RTI_CHUNK_SIZE = 100; |
26 | |
27 | /** @var \Wikimedia\Rdbms\IDatabase */ |
28 | private $dbw = null; |
29 | |
30 | /** |
31 | * @var \Wikimedia\Rdbms\IDatabase |
32 | */ |
33 | private $db; |
34 | /** @var int[] List of revision ids */ |
35 | private $pagelist = []; |
36 | |
37 | public function __construct() { |
38 | parent::__construct(); |
39 | $this->addDescription( 'Outputs page text to stdout' ); |
40 | $this->addOption( 'page9', 'Ignore pages with only 9 equations or less.', false, false, |
41 | '9' ); |
42 | $this->addArg( 'min', 'If set processing is started at the page with curid>min', false ); |
43 | $this->addArg( 'max', 'If set processing is stopped at the page with curid<=max', false ); |
44 | $this->requireExtension( 'MathSearch' ); |
45 | } |
46 | |
47 | public function execute() { |
48 | $connectionProvider = $this->getServiceContainer()->getConnectionProvider(); |
49 | $this->dbw = $connectionProvider->getPrimaryDatabase(); |
50 | $this->db = $connectionProvider->getPrimaryDatabase(); |
51 | $this->pagelist = []; |
52 | $min = $this->getArg( 0, 0 ); |
53 | $max = $this->getArg( 1, PHP_INT_MAX ); |
54 | $conds = "revstat_revid >= $min"; |
55 | if ( $max < PHP_INT_MAX ) { |
56 | $conds .= " AND revstat_revid <= $max"; |
57 | } |
58 | if ( $this->getOption( 'page9', false ) ) { |
59 | // TODO: Can use selectField() |
60 | $res = |
61 | $this->db->select( [ 'mathpage9', 'mathrevisionstat' ], |
62 | 'revstat_revid', |
63 | // FIXME: Should be an array, not a string |
64 | $conds . ' AND revstat_revid = page_id', __METHOD__, [ 'DISTINCT' ] ); |
65 | } else { |
66 | // TODO: Can use selectField() |
67 | $res = |
68 | $this->db->select( 'mathrevisionstat', 'revstat_revid', $conds, __METHOD__, |
69 | [ 'DISTINCT' ] ); |
70 | } |
71 | foreach ( $res as $row ) { |
72 | $this->pagelist[] = $row->revstat_revid; |
73 | } |
74 | $this->populateSearchIndex(); |
75 | $this->output( "Done.\n" ); |
76 | } |
77 | |
78 | /** |
79 | * Populates the search index with content from all pages |
80 | */ |
81 | protected function populateSearchIndex() { |
82 | $n = 0; |
83 | $count = count( $this->pagelist ); |
84 | $this->output( "Rebuilding index fields for $count pages...\n" ); |
85 | while ( $n < $count ) { |
86 | if ( $n ) { |
87 | $this->output( $n . " of $count \n" ); |
88 | } |
89 | $this->beginTransaction( $this->dbw, __METHOD__ ); |
90 | for ( $j = 0; $j < self::RTI_CHUNK_SIZE; $j++ ) { |
91 | // TODO: USE PREPARED STATEMENTS |
92 | $pid = $this->pagelist[$n]; |
93 | $sql = |
94 | "INSERT IGNORE INTO mathpagesimilarity(pagesimilarity_A,pagesimilarity_B,pagesimilarity_Value) " . |
95 | "SELECT DISTINCT $pid,`revstat_revid`, " . |
96 | "CosProd( $pid,`revstat_revid`) FROM `mathrevisionstat` m "; |
97 | if ( $this->getOption( 'page9', false ) ) { |
98 | $sql .= " JOIN (SELECT page_id from mathpage9) as r WHERE m.revstat_revid=r.page_id AND "; |
99 | } else { |
100 | $sql .= " WHERE "; |
101 | } |
102 | $sql .= "m.revstat_revid < $pid "; |
103 | echo "writing entries for page $pid..."; |
104 | $start = microtime( true ); |
105 | $this->dbw->query( $sql ); |
106 | echo 'done in ' . ( microtime( true ) - $start ) . "\n"; |
107 | $n++; |
108 | } |
109 | $start = microtime( true ); |
110 | $this->commitTransaction( $this->dbw, __METHOD__ ); |
111 | echo ' committed in ' . ( microtime( true ) - $start ) . "s\n\n"; |
112 | } |
113 | } |
114 | } |
115 | |
116 | $maintClass = CalculateDistances::class; |
117 | /** @noinspection PhpIncludeInspection */ |
118 | require_once RUN_MAINTENANCE_IF_MAIN; |