Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
0.00% |
0 / 72 |
|
0.00% |
0 / 6 |
CRAP | |
0.00% |
0 / 1 |
Saneitize | |
0.00% |
0 / 65 |
|
0.00% |
0 / 6 |
272 | |
0.00% |
0 / 1 |
__construct | |
0.00% |
0 / 12 |
|
0.00% |
0 / 1 |
2 | |||
execute | |
0.00% |
0 / 18 |
|
0.00% |
0 / 1 |
30 | |||
check | |
0.00% |
0 / 7 |
|
0.00% |
0 / 1 |
6 | |||
checkChunk | |
0.00% |
0 / 4 |
|
0.00% |
0 / 1 |
2 | |||
setFromAndTo | |
0.00% |
0 / 10 |
|
0.00% |
0 / 1 |
20 | |||
buildChecker | |
0.00% |
0 / 14 |
|
0.00% |
0 / 1 |
12 |
1 | <?php |
2 | |
3 | namespace CirrusSearch\Maintenance; |
4 | |
5 | use CirrusSearch\Sanity\Checker; |
6 | use CirrusSearch\Sanity\NoopRemediator; |
7 | use CirrusSearch\Sanity\PrintingRemediator; |
8 | use CirrusSearch\Sanity\QueueingRemediator; |
9 | use CirrusSearch\Searcher; |
10 | use MediaWiki\WikiMap\WikiMap; |
11 | |
12 | /** |
13 | * Make sure the index for the wiki is sane. |
14 | * |
15 | * This program is free software; you can redistribute it and/or modify |
16 | * it under the terms of the GNU General Public License as published by |
17 | * the Free Software Foundation; either version 2 of the License, or |
18 | * (at your option) any later version. |
19 | * |
20 | * This program is distributed in the hope that it will be useful, |
21 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
22 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
23 | * GNU General Public License for more details. |
24 | * |
25 | * You should have received a copy of the GNU General Public License along |
26 | * with this program; if not, write to the Free Software Foundation, Inc., |
27 | * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. |
28 | * http://www.gnu.org/copyleft/gpl.html |
29 | */ |
30 | |
31 | $IP = getenv( 'MW_INSTALL_PATH' ); |
32 | if ( $IP === false ) { |
33 | $IP = __DIR__ . '/../../..'; |
34 | } |
35 | require_once "$IP/maintenance/Maintenance.php"; |
36 | require_once __DIR__ . '/../includes/Maintenance/Maintenance.php'; |
37 | |
38 | class Saneitize extends Maintenance { |
39 | /** |
40 | * @var int mediawiki page id |
41 | */ |
42 | private $fromPageId; |
43 | |
44 | /** |
45 | * @var int mediawiki page id |
46 | */ |
47 | private $toPageId; |
48 | |
49 | /** |
50 | * @var bool true to enable fast but inconsistent redirect checks |
51 | */ |
52 | private $fastCheck; |
53 | |
54 | /** |
55 | * @var Checker Checks is the index is insane, and calls on a Remediator |
56 | * instance to do something about it. The remediator may fix the issue, |
57 | * log about it, or do a combination. |
58 | */ |
59 | private $checker; |
60 | |
61 | public function __construct() { |
62 | parent::__construct(); |
63 | $this->setBatchSize( 10 ); |
64 | $this->addDescription( 'Make the index sane. Always operates on a single cluster.' ); |
65 | $this->addOption( 'fromId', 'Start sanitizing at a specific page_id. Default to 0.', false, true ); |
66 | $this->addOption( 'toId', 'Stop sanitizing at a specific page_id. Default to the maximum id in the db + 100.', false, true ); |
67 | $this->addOption( 'noop', 'Rather then queue remediation actions do nothing.' ); |
68 | $this->addOption( 'logSane', 'Print all sane pages.' ); |
69 | $this->addOption( 'fastCheck', 'Do not load page content to check if a page is a redirect, faster but inconsistent.' ); |
70 | $this->addOption( 'buildChunks', 'Instead of running the script spit out commands that can be farmed out to ' . |
71 | 'different processes or machines to check the index. If specified as a number then chunks no larger than ' . |
72 | 'that size are spat out. If specified as a number followed by the word "total" without a space between them ' . |
73 | 'then that many chunks will be spat out sized to cover the entire wiki.', false, true ); |
74 | } |
75 | |
76 | public function execute() { |
77 | $this->disablePoolCountersAndLogging(); |
78 | |
79 | if ( $this->hasOption( 'batch-size' ) ) { |
80 | $this->setBatchSize( $this->getOption( 'batch-size' ) ); |
81 | if ( $this->getBatchSize() > 5000 ) { |
82 | $this->fatalError( "--batch-size too high!" ); |
83 | } elseif ( $this->getBatchSize() <= 0 ) { |
84 | $this->fatalError( "--batch-size must be > 0!" ); |
85 | } |
86 | } |
87 | |
88 | $this->fastCheck = $this->getOption( 'fastCheck', false ); |
89 | |
90 | $this->setFromAndTo(); |
91 | $buildChunks = $this->getOption( 'buildChunks' ); |
92 | if ( $buildChunks ) { |
93 | $builder = new \CirrusSearch\Maintenance\ChunkBuilder(); |
94 | $builder->build( $this->mSelf, $this->mOptions, $buildChunks, $this->fromPageId, $this->toPageId ); |
95 | return null; |
96 | } |
97 | $this->buildChecker(); |
98 | $updated = $this->check(); |
99 | $this->output( "Fixed $updated page(s) (" . ( $this->toPageId - $this->fromPageId ) . " checked)\n" ); |
100 | |
101 | return true; |
102 | } |
103 | |
104 | /** |
105 | * @return int the number of pages corrected |
106 | */ |
107 | private function check() { |
108 | $updated = 0; |
109 | for ( $pageId = $this->fromPageId; |
110 | $pageId <= $this->toPageId; |
111 | $pageId += $this->getBatchSize() |
112 | ) { |
113 | $max = min( $this->toPageId, $pageId + $this->getBatchSize() - 1 ); |
114 | $updated += $this->checkChunk( range( $pageId, $max ) ); |
115 | } |
116 | return $updated; |
117 | } |
118 | |
119 | /** |
120 | * @param int[] $pageIds mediawiki page ids |
121 | * @return int number of pages corrected |
122 | */ |
123 | private function checkChunk( array $pageIds ) { |
124 | $updated = $this->checker->check( $pageIds ); |
125 | $this->output( sprintf( "[%20s]%10d/%d\n", WikiMap::getCurrentWikiId(), end( $pageIds ), |
126 | $this->toPageId ) ); |
127 | return $updated; |
128 | } |
129 | |
130 | private function setFromAndTo() { |
131 | $dbr = $this->getDB( DB_REPLICA ); |
132 | $this->fromPageId = $this->getOption( 'fromId' ); |
133 | if ( $this->fromPageId === null ) { |
134 | $this->fromPageId = 0; |
135 | } |
136 | $this->toPageId = $this->getOption( 'toId' ); |
137 | if ( $this->toPageId === null ) { |
138 | $this->toPageId = $dbr->selectField( 'page', 'MAX(page_id)', [], __METHOD__ ); |
139 | if ( $this->toPageId === false ) { |
140 | $this->toPageId = 0; |
141 | } else { |
142 | // Its technically possible for there to be pages in the index with ids greater |
143 | // than the maximum id in the database. That isn't super likely, but we'll |
144 | // check a bit ahead just in case. This isn't scientific or super accurate, |
145 | // but its cheap. |
146 | $this->toPageId += 100; |
147 | } |
148 | } |
149 | } |
150 | |
151 | private function buildChecker() { |
152 | if ( $this->getOption( 'noop' ) ) { |
153 | $remediator = new NoopRemediator(); |
154 | } else { |
155 | $remediator = new QueueingRemediator( $this->getConnection()->getClusterName() ); |
156 | } |
157 | if ( !$this->isQuiet() ) { |
158 | $remediator = new PrintingRemediator( $remediator ); |
159 | } |
160 | // This searcher searches all indexes for the current wiki. |
161 | $searcher = new Searcher( $this->getConnection(), 0, 0, $this->getSearchConfig(), [], null ); |
162 | $this->checker = new Checker( |
163 | $this->getSearchConfig(), |
164 | $this->getConnection(), |
165 | $remediator, |
166 | $searcher, |
167 | $this->getOption( 'logSane' ), |
168 | $this->fastCheck |
169 | ); |
170 | } |
171 | } |
172 | |
173 | $maintClass = Saneitize::class; |
174 | require_once RUN_MAINTENANCE_IF_MAIN; |