Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
0.00% |
0 / 175 |
|
0.00% |
0 / 20 |
CRAP | |
0.00% |
0 / 1 |
MetaStoreIndex | |
0.00% |
0 / 175 |
|
0.00% |
0 / 20 |
2070 | |
0.00% |
0 / 1 |
__construct | |
0.00% |
0 / 5 |
|
0.00% |
0 / 1 |
2 | |||
versionStore | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
namespaceStore | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
saneitizeJobStore | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
stores | |
0.00% |
0 / 5 |
|
0.00% |
0 / 1 |
2 | |||
createIfNecessary | |
0.00% |
0 / 6 |
|
0.00% |
0 / 1 |
6 | |||
createOrUpgradeIfNecessary | |
0.00% |
0 / 11 |
|
0.00% |
0 / 1 |
20 | |||
buildIndexConfiguration | |
0.00% |
0 / 19 |
|
0.00% |
0 / 1 |
6 | |||
createNewIndex | |
0.00% |
0 / 15 |
|
0.00% |
0 / 1 |
2 | |||
buildMapping | |
0.00% |
0 / 16 |
|
0.00% |
0 / 1 |
20 | |||
switchAliasTo | |
0.00% |
0 / 30 |
|
0.00% |
0 / 1 |
30 | |||
getAliasedIndexName | |
0.00% |
0 / 12 |
|
0.00% |
0 / 1 |
30 | |||
upgradeIndexVersion | |
0.00% |
0 / 26 |
|
0.00% |
0 / 1 |
12 | |||
runtimeVersion | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
storeMetastoreVersion | |
0.00% |
0 / 9 |
|
0.00% |
0 / 1 |
2 | |||
log | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
6 | |||
elasticaIndex | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
cirrusReady | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
metastoreVersion | |
0.00% |
0 / 12 |
|
0.00% |
0 / 1 |
56 | |||
getMasterTimeout | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 |
1 | <?php |
2 | |
3 | namespace CirrusSearch\MetaStore; |
4 | |
5 | use CirrusSearch\Connection; |
6 | use CirrusSearch\Maintenance\AnalysisConfigBuilder; |
7 | use CirrusSearch\Maintenance\AnalysisFilter; |
8 | use CirrusSearch\Maintenance\ConfigUtils; |
9 | use CirrusSearch\Maintenance\Printer; |
10 | use CirrusSearch\SearchConfig; |
11 | |
12 | /** |
13 | * This program is free software; you can redistribute it and/or modify |
14 | * it under the terms of the GNU General Public License as published by |
15 | * the Free Software Foundation; either version 2 of the License, or |
16 | * (at your option) any later version. |
17 | * |
18 | * This program is distributed in the hope that it will be useful, |
19 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
20 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
21 | * GNU General Public License for more details. |
22 | * |
23 | * You should have received a copy of the GNU General Public License along |
24 | * with this program; if not, write to the Free Software Foundation, Inc., |
25 | * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. |
26 | * http://www.gnu.org/copyleft/gpl.html |
27 | */ |
28 | |
29 | /** |
30 | * Utility class to manage a multipurpose metadata storage index for cirrus. |
31 | * This store is used to store persistent states related to administrative |
32 | * tasks (index settings upgrade, wiki namespace names, ...). |
33 | */ |
34 | class MetaStoreIndex { |
35 | /** |
36 | * @const int version of the index, increment when mappings change |
37 | */ |
38 | private const METASTORE_VERSION = 4; |
39 | |
40 | /** |
41 | * @const string the doc id used to store version information related |
42 | * to the meta store itself. This value is not supposed to be changed. |
43 | */ |
44 | private const METASTORE_VERSION_DOCID = 'metastore_version'; |
45 | |
46 | /** |
47 | * @const string index name |
48 | */ |
49 | public const INDEX_NAME = 'mw_cirrus_metastore'; |
50 | |
51 | /** |
52 | * @const string type for storing internal data |
53 | */ |
54 | private const INTERNAL_TYPE = 'internal'; |
55 | |
56 | /** |
57 | * @var Connection |
58 | */ |
59 | private $connection; |
60 | |
61 | /** |
62 | * @var \Elastica\Client |
63 | */ |
64 | private $client; |
65 | |
66 | /** |
67 | * @var Printer|null output handler |
68 | */ |
69 | private $out; |
70 | |
71 | /** |
72 | * @var SearchConfig |
73 | */ |
74 | private $config; |
75 | |
76 | /** |
77 | * @var ConfigUtils |
78 | */ |
79 | private $configUtils; |
80 | |
81 | /** |
82 | * @param Connection $connection |
83 | * @param Printer $out |
84 | * @param SearchConfig $config |
85 | */ |
86 | public function __construct( |
87 | Connection $connection, Printer $out, SearchConfig $config |
88 | ) { |
89 | $this->connection = $connection; |
90 | $this->client = $connection->getClient(); |
91 | $this->configUtils = new ConfigUtils( $this->client, $out ); |
92 | $this->out = $out; |
93 | $this->config = $config; |
94 | } |
95 | |
96 | /** |
97 | * @return MetaVersionStore |
98 | */ |
99 | public function versionStore() { |
100 | return new MetaVersionStore( $this->elasticaIndex(), $this->connection ); |
101 | } |
102 | |
103 | /** |
104 | * @return MetaNamespaceStore |
105 | */ |
106 | public function namespaceStore() { |
107 | return new MetaNamespaceStore( $this->elasticaIndex(), $this->config->getWikiId() ); |
108 | } |
109 | |
110 | /** |
111 | * @return MetaSaneitizeJobStore |
112 | */ |
113 | public function saneitizeJobStore() { |
114 | return new MetaSaneitizeJobStore( $this->elasticaIndex() ); |
115 | } |
116 | |
117 | /** |
118 | * @return MetaStore[] |
119 | */ |
120 | public function stores() { |
121 | return [ |
122 | 'version' => $this->versionStore(), |
123 | 'namespace' => $this->namespaceStore(), |
124 | 'saneitize' => $this->saneitizeJobStore(), |
125 | ]; |
126 | } |
127 | |
128 | /** |
129 | * @return \Elastica\Index|null Index on creation, or null if the index |
130 | * already exists. |
131 | */ |
132 | public function createIfNecessary() { |
133 | // If the mw_cirrus_metastore alias does not exists it |
134 | // means we need to create everything from scratch. |
135 | if ( $this->cirrusReady() ) { |
136 | return null; |
137 | } |
138 | $this->log( self::INDEX_NAME . " missing, creating new metastore index.\n" ); |
139 | $newIndex = $this->createNewIndex(); |
140 | $this->switchAliasTo( $newIndex ); |
141 | return $newIndex; |
142 | } |
143 | |
144 | public function createOrUpgradeIfNecessary() { |
145 | $newIndex = $this->createIfNecessary(); |
146 | if ( $newIndex === null ) { |
147 | $version = $this->metastoreVersion(); |
148 | if ( $version < self::METASTORE_VERSION ) { |
149 | $this->log( self::INDEX_NAME . " version mismatch, upgrading.\n" ); |
150 | $this->upgradeIndexVersion(); |
151 | } elseif ( $version > self::METASTORE_VERSION ) { |
152 | throw new \Exception( |
153 | "Metastore version $version found, cannot upgrade to a lower version: " . |
154 | self::METASTORE_VERSION |
155 | ); |
156 | } |
157 | } |
158 | } |
159 | |
160 | private function buildIndexConfiguration() { |
161 | $pluginsStatus = $this->configUtils->scanAvailablePlugins( |
162 | $this->config->get( 'CirrusSearchBannedPlugins' ) ); |
163 | if ( !$pluginsStatus->isGood() ) { |
164 | throw new \RuntimeException( (string)$pluginsStatus ); |
165 | } |
166 | $filter = new AnalysisFilter(); |
167 | list( $analysis, $mappings ) = $filter->filterAnalysis( |
168 | // Why 'aa'? It comes first? Hoping it receives generic language treatment. |
169 | ( new AnalysisConfigBuilder( 'aa', $pluginsStatus->getValue() ) )->buildConfig(), |
170 | $this->buildMapping() |
171 | ); |
172 | |
173 | return [ |
174 | // Don't forget to update METASTORE_VERSION when changing something |
175 | // in the settings. |
176 | 'settings' => [ |
177 | 'index' => [ |
178 | 'number_of_shards' => 1, |
179 | 'auto_expand_replicas' => '0-2', |
180 | 'analysis' => $analysis, |
181 | ] |
182 | ], |
183 | 'mappings' => $mappings, |
184 | ]; |
185 | } |
186 | |
187 | /** |
188 | * Create a new metastore index. |
189 | * @param string $suffix index suffix |
190 | * @return \Elastica\Index the newly created index |
191 | */ |
192 | private function createNewIndex( $suffix = 'first' ) { |
193 | $name = self::INDEX_NAME . '_' . $suffix; |
194 | $this->log( "Creating metastore index... $name" ); |
195 | // @todo utilize $this->getIndex()->create(...) once it supports setting |
196 | // the master_timeout parameter. |
197 | $index = $this->client->getIndex( $name ); |
198 | $index->request( |
199 | '', |
200 | \Elastica\Request::PUT, |
201 | $this->buildIndexConfiguration(), |
202 | [ |
203 | 'master_timeout' => $this->getMasterTimeout(), |
204 | ] |
205 | ); |
206 | $this->log( " ok\n" ); |
207 | $this->configUtils->waitForGreen( $index->getName(), 3600 ); |
208 | $this->storeMetastoreVersion( $index ); |
209 | return $index; |
210 | } |
211 | |
212 | /** |
213 | * Don't forget to update METASTORE_VERSION when changing something |
214 | * in the settings. |
215 | * |
216 | * @return array the mapping |
217 | */ |
218 | private function buildMapping() { |
219 | $properties = [ |
220 | 'type' => [ 'type' => 'keyword' ], |
221 | 'wiki' => [ 'type' => 'keyword' ], |
222 | ]; |
223 | |
224 | foreach ( $this->stores() as $store ) { |
225 | // TODO: Reuse field definition implementations from page indices? |
226 | $storeProperties = $store->buildIndexProperties(); |
227 | if ( !$storeProperties ) { |
228 | continue; |
229 | } |
230 | $overlap = array_intersect_key( $properties, $storeProperties ); |
231 | if ( $overlap ) { |
232 | throw new \Exception( 'Metastore property overlap on: ' . implode( ', ', array_keys( $overlap ) ) ); |
233 | } |
234 | $properties += $storeProperties; |
235 | } |
236 | |
237 | return [ |
238 | 'dynamic' => false, |
239 | 'properties' => $properties, |
240 | ]; |
241 | } |
242 | |
243 | /** |
244 | * Switch the mw_cirrus_metastore alias to this new index name. |
245 | * @param \Elastica\Index $index |
246 | */ |
247 | private function switchAliasTo( $index ) { |
248 | $name = $index->getName(); |
249 | $oldIndexName = $this->getAliasedIndexName(); |
250 | if ( $oldIndexName !== null ) { |
251 | $this->log( "Switching " . self::INDEX_NAME . " alias from $oldIndexName to $name.\n" ); |
252 | } else { |
253 | $this->log( "Creating " . self::INDEX_NAME . " alias to $name.\n" ); |
254 | } |
255 | |
256 | if ( $oldIndexName == $name ) { |
257 | throw new \Exception( |
258 | "Cannot switch aliases old and new index names are identical: $name" |
259 | ); |
260 | } |
261 | // Create the alias |
262 | $path = '_aliases'; |
263 | $data = [ 'actions' => [ |
264 | [ |
265 | 'add' => [ |
266 | 'index' => $name, |
267 | 'alias' => self::INDEX_NAME, |
268 | ] |
269 | ], |
270 | ] ]; |
271 | if ( $oldIndexName !== null ) { |
272 | $data['actions'][] = [ |
273 | 'remove' => [ |
274 | 'index' => $oldIndexName, |
275 | 'alias' => self::INDEX_NAME, |
276 | ] |
277 | ]; |
278 | } |
279 | $this->client->request( $path, \Elastica\Request::POST, $data, |
280 | [ 'master_timeout' => $this->getMasterTimeout() ] ); |
281 | if ( $oldIndexName !== null ) { |
282 | $this->log( "Deleting old index $oldIndexName\n" ); |
283 | $this->connection->getIndex( $oldIndexName )->delete(); |
284 | } |
285 | } |
286 | |
287 | /** |
288 | * @return string|null the current index behind the self::INDEX_NAME |
289 | * alias or null if the alias does not exist |
290 | */ |
291 | private function getAliasedIndexName() { |
292 | // FIXME: Elastica seems to have trouble parsing the error reason |
293 | // for this endpoint. Running a simple HEAD first to check if it |
294 | // exists |
295 | $resp = $this->client->request( '_alias/' . self::INDEX_NAME, \Elastica\Request::HEAD, [] ); |
296 | if ( $resp->getStatus() === 404 ) { |
297 | return null; |
298 | } |
299 | $resp = $this->client->request( '_alias/' . self::INDEX_NAME, \Elastica\Request::GET, [] ); |
300 | $indexName = null; |
301 | foreach ( $resp->getData() as $index => $aliases ) { |
302 | if ( isset( $aliases['aliases'][self::INDEX_NAME] ) ) { |
303 | if ( $indexName !== null ) { |
304 | throw new \Exception( "Multiple indices are aliased with " . self::INDEX_NAME . |
305 | ", please fix manually." ); |
306 | } |
307 | $indexName = $index; |
308 | } |
309 | } |
310 | return $indexName; |
311 | } |
312 | |
313 | private function upgradeIndexVersion() { |
314 | $pluginsStatus = $this->configUtils->scanAvailableModules(); |
315 | if ( !$pluginsStatus->isGood() ) { |
316 | throw new \RuntimeException( (string)$pluginsStatus ); |
317 | } |
318 | if ( !array_search( 'reindex', $pluginsStatus->getValue() ) ) { |
319 | throw new \Exception( "The reindex module is mandatory to upgrade the metastore" ); |
320 | } |
321 | $index = $this->createNewIndex( (string)time() ); |
322 | // Reindex everything except the internal type, it's not clear |
323 | // yet if we just need to filter the metastore version info or |
324 | // the whole internal type. Currently we only use the internal |
325 | // type for storing the metastore version. |
326 | $reindex = [ |
327 | 'source' => [ |
328 | 'index' => self::INDEX_NAME, |
329 | 'query' => [ |
330 | 'bool' => [ |
331 | 'must_not' => [ |
332 | [ 'term' => [ 'type' => self::INTERNAL_TYPE ] ] |
333 | ], |
334 | ] |
335 | ], |
336 | ], |
337 | 'dest' => [ 'index' => $index->getName() ], |
338 | ]; |
339 | // reindex is extremely fast so we can wait for it |
340 | // we might consider using the task manager if this process |
341 | // becomes longer and/or prone to curl timeouts |
342 | $this->client->request( '_reindex', |
343 | \Elastica\Request::POST, |
344 | $reindex, |
345 | [ 'wait_for_completion' => 'true' ] |
346 | ); |
347 | $index->refresh(); |
348 | $this->switchAliasTo( $index ); |
349 | } |
350 | |
351 | /** |
352 | * @return int version of metastore index expected by runtime |
353 | */ |
354 | public function runtimeVersion() { |
355 | return self::METASTORE_VERSION; |
356 | } |
357 | |
358 | /** |
359 | * @param \Elastica\Index $index new index |
360 | */ |
361 | private function storeMetastoreVersion( $index ) { |
362 | $index->addDocument( |
363 | new \Elastica\Document( |
364 | self::METASTORE_VERSION_DOCID, |
365 | [ |
366 | 'type' => self::INTERNAL_TYPE, |
367 | 'metastore_major_version' => self::METASTORE_VERSION, |
368 | ] |
369 | ) |
370 | ); |
371 | } |
372 | |
373 | /** |
374 | * @param string $msg log message |
375 | */ |
376 | private function log( $msg ) { |
377 | if ( $this->out ) { |
378 | $this->out->output( $msg ); |
379 | } |
380 | } |
381 | |
382 | public function elasticaIndex(): \Elastica\Index { |
383 | return $this->connection->getIndex( self::INDEX_NAME ); |
384 | } |
385 | |
386 | /** |
387 | * Check if cirrus is ready by checking if the index has been created on this cluster |
388 | * @return bool |
389 | */ |
390 | public function cirrusReady() { |
391 | return $this->elasticaIndex()->exists(); |
392 | } |
393 | |
394 | /** |
395 | * @return int the version of the meta store. 0 means that |
396 | * the metastore has never been created. |
397 | */ |
398 | public function metastoreVersion() { |
399 | try { |
400 | $doc = $this->elasticaIndex()->getDocument( self::METASTORE_VERSION_DOCID ); |
401 | } catch ( \Elastica\Exception\NotFoundException $e ) { |
402 | return 0; |
403 | } catch ( \Elastica\Exception\ResponseException $e ) { |
404 | // BC code in case the metastore alias does not exist yet |
405 | $fullError = $e->getResponse()->getFullError(); |
406 | if ( isset( $fullError['type'] ) |
407 | && $fullError['type'] === 'index_not_found_exception' |
408 | && isset( $fullError['index'] ) |
409 | && $fullError['index'] === self::INDEX_NAME |
410 | ) { |
411 | return 0; |
412 | } |
413 | throw $e; |
414 | } |
415 | return (int)$doc->get( 'metastore_major_version' ); |
416 | } |
417 | |
418 | private function getMasterTimeout() { |
419 | return $this->config->get( 'CirrusSearchMasterTimeout' ); |
420 | } |
421 | } |