Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
0.00% |
0 / 178 |
|
0.00% |
0 / 20 |
CRAP | |
0.00% |
0 / 1 |
MetaStoreIndex | |
0.00% |
0 / 178 |
|
0.00% |
0 / 20 |
2256 | |
0.00% |
0 / 1 |
__construct | |
0.00% |
0 / 5 |
|
0.00% |
0 / 1 |
2 | |||
versionStore | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
namespaceStore | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
saneitizeJobStore | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
stores | |
0.00% |
0 / 5 |
|
0.00% |
0 / 1 |
2 | |||
createIfNecessary | |
0.00% |
0 / 9 |
|
0.00% |
0 / 1 |
12 | |||
createOrUpgradeIfNecessary | |
0.00% |
0 / 11 |
|
0.00% |
0 / 1 |
30 | |||
buildIndexConfiguration | |
0.00% |
0 / 19 |
|
0.00% |
0 / 1 |
6 | |||
createNewIndex | |
0.00% |
0 / 15 |
|
0.00% |
0 / 1 |
2 | |||
buildMapping | |
0.00% |
0 / 16 |
|
0.00% |
0 / 1 |
20 | |||
switchAliasTo | |
0.00% |
0 / 30 |
|
0.00% |
0 / 1 |
30 | |||
getAliasedIndexName | |
0.00% |
0 / 12 |
|
0.00% |
0 / 1 |
30 | |||
upgradeIndexVersion | |
0.00% |
0 / 26 |
|
0.00% |
0 / 1 |
12 | |||
runtimeVersion | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
storeMetastoreVersion | |
0.00% |
0 / 9 |
|
0.00% |
0 / 1 |
2 | |||
log | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
6 | |||
elasticaIndex | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
cirrusReady | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
metastoreVersion | |
0.00% |
0 / 12 |
|
0.00% |
0 / 1 |
56 | |||
getMasterTimeout | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 |
1 | <?php |
2 | |
3 | namespace CirrusSearch\MetaStore; |
4 | |
5 | use CirrusSearch\Connection; |
6 | use CirrusSearch\Maintenance\AnalysisConfigBuilder; |
7 | use CirrusSearch\Maintenance\AnalysisFilter; |
8 | use CirrusSearch\Maintenance\ConfigUtils; |
9 | use CirrusSearch\Maintenance\Printer; |
10 | use CirrusSearch\SearchConfig; |
11 | use MediaWiki\Status\Status; |
12 | |
13 | /** |
14 | * This program is free software; you can redistribute it and/or modify |
15 | * it under the terms of the GNU General Public License as published by |
16 | * the Free Software Foundation; either version 2 of the License, or |
17 | * (at your option) any later version. |
18 | * |
19 | * This program is distributed in the hope that it will be useful, |
20 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
21 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
22 | * GNU General Public License for more details. |
23 | * |
24 | * You should have received a copy of the GNU General Public License along |
25 | * with this program; if not, write to the Free Software Foundation, Inc., |
26 | * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. |
27 | * http://www.gnu.org/copyleft/gpl.html |
28 | */ |
29 | |
30 | /** |
31 | * Utility class to manage a multipurpose metadata storage index for cirrus. |
32 | * This store is used to store persistent states related to administrative |
33 | * tasks (index settings upgrade, wiki namespace names, ...). |
34 | */ |
35 | class MetaStoreIndex { |
36 | /** |
37 | * @const int version of the index, increment when mappings change |
38 | */ |
39 | private const METASTORE_VERSION = 4; |
40 | |
41 | /** |
42 | * @const string the doc id used to store version information related |
43 | * to the meta store itself. This value is not supposed to be changed. |
44 | */ |
45 | private const METASTORE_VERSION_DOCID = 'metastore_version'; |
46 | |
47 | /** |
48 | * @const string index name |
49 | */ |
50 | public const INDEX_NAME = 'mw_cirrus_metastore'; |
51 | |
52 | /** |
53 | * @const string type for storing internal data |
54 | */ |
55 | private const INTERNAL_TYPE = 'internal'; |
56 | |
57 | /** |
58 | * @var Connection |
59 | */ |
60 | private $connection; |
61 | |
62 | /** |
63 | * @var \Elastica\Client |
64 | */ |
65 | private $client; |
66 | |
67 | /** |
68 | * @var Printer|null output handler |
69 | */ |
70 | private $out; |
71 | |
72 | /** |
73 | * @var SearchConfig |
74 | */ |
75 | private $config; |
76 | |
77 | /** |
78 | * @var ConfigUtils |
79 | */ |
80 | private $configUtils; |
81 | |
82 | /** |
83 | * @param Connection $connection |
84 | * @param Printer $out |
85 | * @param SearchConfig $config |
86 | */ |
87 | public function __construct( |
88 | Connection $connection, Printer $out, SearchConfig $config |
89 | ) { |
90 | $this->connection = $connection; |
91 | $this->client = $connection->getClient(); |
92 | $this->configUtils = new ConfigUtils( $this->client, $out ); |
93 | $this->out = $out; |
94 | $this->config = $config; |
95 | } |
96 | |
97 | /** |
98 | * @return MetaVersionStore |
99 | */ |
100 | public function versionStore() { |
101 | return new MetaVersionStore( $this->elasticaIndex(), $this->connection ); |
102 | } |
103 | |
104 | /** |
105 | * @return MetaNamespaceStore |
106 | */ |
107 | public function namespaceStore() { |
108 | return new MetaNamespaceStore( $this->elasticaIndex(), $this->config->getWikiId() ); |
109 | } |
110 | |
111 | /** |
112 | * @return MetaSaneitizeJobStore |
113 | */ |
114 | public function saneitizeJobStore() { |
115 | return new MetaSaneitizeJobStore( $this->elasticaIndex() ); |
116 | } |
117 | |
118 | /** |
119 | * @return MetaStore[] |
120 | */ |
121 | public function stores() { |
122 | return [ |
123 | 'version' => $this->versionStore(), |
124 | 'namespace' => $this->namespaceStore(), |
125 | 'saneitize' => $this->saneitizeJobStore(), |
126 | ]; |
127 | } |
128 | |
129 | /** |
130 | * @return Status with on success \Elastica\Index|null Index on creation, or null if the index |
131 | * already exists. |
132 | */ |
133 | public function createIfNecessary(): Status { |
134 | // If the mw_cirrus_metastore alias does not exists it |
135 | // means we need to create everything from scratch. |
136 | if ( $this->cirrusReady() ) { |
137 | return Status::newGood(); |
138 | } |
139 | $status = $this->configUtils->checkElasticsearchVersion(); |
140 | if ( !$status->isOK() ) { |
141 | return $status; |
142 | } |
143 | $this->log( self::INDEX_NAME . " missing, creating new metastore index.\n" ); |
144 | $newIndex = $this->createNewIndex(); |
145 | $this->switchAliasTo( $newIndex ); |
146 | return Status::newGood( $newIndex ); |
147 | } |
148 | |
149 | public function createOrUpgradeIfNecessary(): Status { |
150 | $newIndexStatus = $this->createIfNecessary(); |
151 | if ( $newIndexStatus->isOK() && $newIndexStatus->getValue() === null ) { |
152 | $version = $this->metastoreVersion(); |
153 | if ( $version < self::METASTORE_VERSION ) { |
154 | $this->log( self::INDEX_NAME . " version mismatch, upgrading.\n" ); |
155 | $this->upgradeIndexVersion(); |
156 | } elseif ( $version > self::METASTORE_VERSION ) { |
157 | return Status::newFatal( "Metastore version $version found, cannot upgrade to a lower version: " . |
158 | self::METASTORE_VERSION |
159 | ); |
160 | } |
161 | } |
162 | return Status::newGood(); |
163 | } |
164 | |
165 | private function buildIndexConfiguration() { |
166 | $pluginsStatus = $this->configUtils->scanAvailablePlugins( |
167 | $this->config->get( 'CirrusSearchBannedPlugins' ) ); |
168 | if ( !$pluginsStatus->isGood() ) { |
169 | throw new \RuntimeException( (string)$pluginsStatus ); |
170 | } |
171 | $filter = new AnalysisFilter(); |
172 | [ $analysis, $mappings ] = $filter->filterAnalysis( |
173 | // Why 'aa'? It comes first? Hoping it receives generic language treatment. |
174 | ( new AnalysisConfigBuilder( 'aa', $pluginsStatus->getValue() ) )->buildConfig(), |
175 | $this->buildMapping() |
176 | ); |
177 | |
178 | return [ |
179 | // Don't forget to update METASTORE_VERSION when changing something |
180 | // in the settings. |
181 | 'settings' => [ |
182 | 'index' => [ |
183 | 'number_of_shards' => 1, |
184 | 'auto_expand_replicas' => '0-2', |
185 | 'analysis' => $analysis, |
186 | ] |
187 | ], |
188 | 'mappings' => $mappings, |
189 | ]; |
190 | } |
191 | |
192 | /** |
193 | * Create a new metastore index. |
194 | * @param string $suffix index suffix |
195 | * @return \Elastica\Index the newly created index |
196 | */ |
197 | private function createNewIndex( $suffix = 'first' ) { |
198 | $name = self::INDEX_NAME . '_' . $suffix; |
199 | $this->log( "Creating metastore index... $name" ); |
200 | // @todo utilize $this->getIndex()->create(...) once it supports setting |
201 | // the master_timeout parameter. |
202 | $index = $this->client->getIndex( $name ); |
203 | $index->request( |
204 | '', |
205 | \Elastica\Request::PUT, |
206 | $this->buildIndexConfiguration(), |
207 | [ |
208 | 'master_timeout' => $this->getMasterTimeout(), |
209 | ] |
210 | ); |
211 | $this->log( " ok\n" ); |
212 | $this->configUtils->waitForGreen( $index->getName(), 3600 ); |
213 | $this->storeMetastoreVersion( $index ); |
214 | return $index; |
215 | } |
216 | |
217 | /** |
218 | * Don't forget to update METASTORE_VERSION when changing something |
219 | * in the settings. |
220 | * |
221 | * @return array the mapping |
222 | */ |
223 | private function buildMapping() { |
224 | $properties = [ |
225 | 'type' => [ 'type' => 'keyword' ], |
226 | 'wiki' => [ 'type' => 'keyword' ], |
227 | ]; |
228 | |
229 | foreach ( $this->stores() as $store ) { |
230 | // TODO: Reuse field definition implementations from page indices? |
231 | $storeProperties = $store->buildIndexProperties(); |
232 | if ( !$storeProperties ) { |
233 | continue; |
234 | } |
235 | $overlap = array_intersect_key( $properties, $storeProperties ); |
236 | if ( $overlap ) { |
237 | throw new \RuntimeException( 'Metastore property overlap on: ' . implode( ', ', array_keys( $overlap ) ) ); |
238 | } |
239 | $properties += $storeProperties; |
240 | } |
241 | |
242 | return [ |
243 | 'dynamic' => false, |
244 | 'properties' => $properties, |
245 | ]; |
246 | } |
247 | |
248 | /** |
249 | * Switch the mw_cirrus_metastore alias to this new index name. |
250 | * @param \Elastica\Index $index |
251 | */ |
252 | private function switchAliasTo( $index ) { |
253 | $name = $index->getName(); |
254 | $oldIndexName = $this->getAliasedIndexName(); |
255 | if ( $oldIndexName !== null ) { |
256 | $this->log( "Switching " . self::INDEX_NAME . " alias from $oldIndexName to $name.\n" ); |
257 | } else { |
258 | $this->log( "Creating " . self::INDEX_NAME . " alias to $name.\n" ); |
259 | } |
260 | |
261 | if ( $oldIndexName == $name ) { |
262 | throw new \RuntimeException( |
263 | "Cannot switch aliases old and new index names are identical: $name" |
264 | ); |
265 | } |
266 | // Create the alias |
267 | $path = '_aliases'; |
268 | $data = [ 'actions' => [ |
269 | [ |
270 | 'add' => [ |
271 | 'index' => $name, |
272 | 'alias' => self::INDEX_NAME, |
273 | ] |
274 | ], |
275 | ] ]; |
276 | if ( $oldIndexName !== null ) { |
277 | $data['actions'][] = [ |
278 | 'remove' => [ |
279 | 'index' => $oldIndexName, |
280 | 'alias' => self::INDEX_NAME, |
281 | ] |
282 | ]; |
283 | } |
284 | $this->client->request( $path, \Elastica\Request::POST, $data, |
285 | [ 'master_timeout' => $this->getMasterTimeout() ] ); |
286 | if ( $oldIndexName !== null ) { |
287 | $this->log( "Deleting old index $oldIndexName\n" ); |
288 | $this->connection->getIndex( $oldIndexName )->delete(); |
289 | } |
290 | } |
291 | |
292 | /** |
293 | * @return string|null the current index behind the self::INDEX_NAME |
294 | * alias or null if the alias does not exist |
295 | */ |
296 | private function getAliasedIndexName() { |
297 | // FIXME: Elastica seems to have trouble parsing the error reason |
298 | // for this endpoint. Running a simple HEAD first to check if it |
299 | // exists |
300 | $resp = $this->client->request( '_alias/' . self::INDEX_NAME, \Elastica\Request::HEAD, [] ); |
301 | if ( $resp->getStatus() === 404 ) { |
302 | return null; |
303 | } |
304 | $resp = $this->client->request( '_alias/' . self::INDEX_NAME, \Elastica\Request::GET, [] ); |
305 | $indexName = null; |
306 | foreach ( $resp->getData() as $index => $aliases ) { |
307 | if ( isset( $aliases['aliases'][self::INDEX_NAME] ) ) { |
308 | if ( $indexName !== null ) { |
309 | throw new \RuntimeException( "Multiple indices are aliased with " . self::INDEX_NAME . |
310 | ", please fix manually." ); |
311 | } |
312 | $indexName = $index; |
313 | } |
314 | } |
315 | return $indexName; |
316 | } |
317 | |
318 | private function upgradeIndexVersion() { |
319 | $pluginsStatus = $this->configUtils->scanAvailableModules(); |
320 | if ( !$pluginsStatus->isGood() ) { |
321 | throw new \RuntimeException( (string)$pluginsStatus ); |
322 | } |
323 | if ( !array_search( 'reindex', $pluginsStatus->getValue() ) ) { |
324 | throw new \RuntimeException( "The reindex module is mandatory to upgrade the metastore" ); |
325 | } |
326 | $index = $this->createNewIndex( (string)time() ); |
327 | // Reindex everything except the internal type, it's not clear |
328 | // yet if we just need to filter the metastore version info or |
329 | // the whole internal type. Currently we only use the internal |
330 | // type for storing the metastore version. |
331 | $reindex = [ |
332 | 'source' => [ |
333 | 'index' => self::INDEX_NAME, |
334 | 'query' => [ |
335 | 'bool' => [ |
336 | 'must_not' => [ |
337 | [ 'term' => [ 'type' => self::INTERNAL_TYPE ] ] |
338 | ], |
339 | ] |
340 | ], |
341 | ], |
342 | 'dest' => [ 'index' => $index->getName() ], |
343 | ]; |
344 | // reindex is extremely fast so we can wait for it |
345 | // we might consider using the task manager if this process |
346 | // becomes longer and/or prone to curl timeouts |
347 | $this->client->request( '_reindex', |
348 | \Elastica\Request::POST, |
349 | $reindex, |
350 | [ 'wait_for_completion' => 'true' ] |
351 | ); |
352 | $index->refresh(); |
353 | $this->switchAliasTo( $index ); |
354 | } |
355 | |
356 | /** |
357 | * @return int version of metastore index expected by runtime |
358 | */ |
359 | public function runtimeVersion() { |
360 | return self::METASTORE_VERSION; |
361 | } |
362 | |
363 | /** |
364 | * @param \Elastica\Index $index new index |
365 | */ |
366 | private function storeMetastoreVersion( $index ) { |
367 | $index->addDocument( |
368 | new \Elastica\Document( |
369 | self::METASTORE_VERSION_DOCID, |
370 | [ |
371 | 'type' => self::INTERNAL_TYPE, |
372 | 'metastore_major_version' => self::METASTORE_VERSION, |
373 | ] |
374 | ) |
375 | ); |
376 | } |
377 | |
378 | /** |
379 | * @param string $msg log message |
380 | */ |
381 | private function log( $msg ) { |
382 | if ( $this->out ) { |
383 | $this->out->output( $msg ); |
384 | } |
385 | } |
386 | |
387 | public function elasticaIndex(): \Elastica\Index { |
388 | return $this->connection->getIndex( self::INDEX_NAME ); |
389 | } |
390 | |
391 | /** |
392 | * Check if cirrus is ready by checking if the index has been created on this cluster |
393 | * @return bool |
394 | */ |
395 | public function cirrusReady() { |
396 | return $this->elasticaIndex()->exists(); |
397 | } |
398 | |
399 | /** |
400 | * @return int the version of the meta store. 0 means that |
401 | * the metastore has never been created. |
402 | */ |
403 | public function metastoreVersion() { |
404 | try { |
405 | $doc = $this->elasticaIndex()->getDocument( self::METASTORE_VERSION_DOCID ); |
406 | } catch ( \Elastica\Exception\NotFoundException $e ) { |
407 | return 0; |
408 | } catch ( \Elastica\Exception\ResponseException $e ) { |
409 | // BC code in case the metastore alias does not exist yet |
410 | $fullError = $e->getResponse()->getFullError(); |
411 | if ( isset( $fullError['type'] ) |
412 | && $fullError['type'] === 'index_not_found_exception' |
413 | && isset( $fullError['index'] ) |
414 | && $fullError['index'] === self::INDEX_NAME |
415 | ) { |
416 | return 0; |
417 | } |
418 | throw $e; |
419 | } |
420 | return (int)$doc->get( 'metastore_major_version' ); |
421 | } |
422 | |
423 | private function getMasterTimeout() { |
424 | return $this->config->get( 'CirrusSearchMasterTimeout' ); |
425 | } |
426 | } |