Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
| Total | |
0.00% |
0 / 178 |
|
0.00% |
0 / 20 |
CRAP | |
0.00% |
0 / 1 |
| MetaStoreIndex | |
0.00% |
0 / 178 |
|
0.00% |
0 / 20 |
2256 | |
0.00% |
0 / 1 |
| __construct | |
0.00% |
0 / 5 |
|
0.00% |
0 / 1 |
2 | |||
| versionStore | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
| namespaceStore | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
| saneitizeJobStore | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
| stores | |
0.00% |
0 / 5 |
|
0.00% |
0 / 1 |
2 | |||
| createIfNecessary | |
0.00% |
0 / 9 |
|
0.00% |
0 / 1 |
12 | |||
| createOrUpgradeIfNecessary | |
0.00% |
0 / 11 |
|
0.00% |
0 / 1 |
30 | |||
| buildIndexConfiguration | |
0.00% |
0 / 19 |
|
0.00% |
0 / 1 |
6 | |||
| createNewIndex | |
0.00% |
0 / 15 |
|
0.00% |
0 / 1 |
2 | |||
| buildMapping | |
0.00% |
0 / 16 |
|
0.00% |
0 / 1 |
20 | |||
| switchAliasTo | |
0.00% |
0 / 30 |
|
0.00% |
0 / 1 |
30 | |||
| getAliasedIndexName | |
0.00% |
0 / 12 |
|
0.00% |
0 / 1 |
30 | |||
| upgradeIndexVersion | |
0.00% |
0 / 26 |
|
0.00% |
0 / 1 |
12 | |||
| runtimeVersion | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
| storeMetastoreVersion | |
0.00% |
0 / 9 |
|
0.00% |
0 / 1 |
2 | |||
| log | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
6 | |||
| elasticaIndex | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
| cirrusReady | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
| metastoreVersion | |
0.00% |
0 / 12 |
|
0.00% |
0 / 1 |
56 | |||
| getMasterTimeout | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
| 1 | <?php |
| 2 | |
| 3 | namespace CirrusSearch\MetaStore; |
| 4 | |
| 5 | use CirrusSearch\Connection; |
| 6 | use CirrusSearch\Maintenance\AnalysisConfigBuilder; |
| 7 | use CirrusSearch\Maintenance\AnalysisFilter; |
| 8 | use CirrusSearch\Maintenance\ConfigUtils; |
| 9 | use CirrusSearch\Maintenance\Plugins; |
| 10 | use CirrusSearch\Maintenance\Printer; |
| 11 | use CirrusSearch\SearchConfig; |
| 12 | use MediaWiki\Status\Status; |
| 13 | |
| 14 | /** |
| 15 | * @license GPL-2.0-or-later |
| 16 | */ |
| 17 | |
| 18 | /** |
| 19 | * Utility class to manage a multipurpose metadata storage index for cirrus. |
| 20 | * This store is used to store persistent states related to administrative |
| 21 | * tasks (index settings upgrade, wiki namespace names, ...). |
| 22 | */ |
| 23 | class MetaStoreIndex { |
| 24 | /** |
| 25 | * @const int version of the index, increment when mappings change |
| 26 | */ |
| 27 | private const METASTORE_VERSION = 4; |
| 28 | |
| 29 | /** |
| 30 | * @const string the doc id used to store version information related |
| 31 | * to the meta store itself. This value is not supposed to be changed. |
| 32 | */ |
| 33 | private const METASTORE_VERSION_DOCID = 'metastore_version'; |
| 34 | |
| 35 | /** |
| 36 | * @const string index name |
| 37 | */ |
| 38 | public const INDEX_NAME = 'mw_cirrus_metastore'; |
| 39 | |
| 40 | /** |
| 41 | * @const string type for storing internal data |
| 42 | */ |
| 43 | private const INTERNAL_TYPE = 'internal'; |
| 44 | |
| 45 | /** |
| 46 | * @var Connection |
| 47 | */ |
| 48 | private $connection; |
| 49 | |
| 50 | /** |
| 51 | * @var \Elastica\Client |
| 52 | */ |
| 53 | private $client; |
| 54 | |
| 55 | /** |
| 56 | * @var Printer|null output handler |
| 57 | */ |
| 58 | private $out; |
| 59 | |
| 60 | /** |
| 61 | * @var SearchConfig |
| 62 | */ |
| 63 | private $config; |
| 64 | |
| 65 | /** |
| 66 | * @var ConfigUtils |
| 67 | */ |
| 68 | private $configUtils; |
| 69 | |
| 70 | /** |
| 71 | * @param Connection $connection |
| 72 | * @param Printer $out |
| 73 | * @param SearchConfig $config |
| 74 | */ |
| 75 | public function __construct( |
| 76 | Connection $connection, Printer $out, SearchConfig $config |
| 77 | ) { |
| 78 | $this->connection = $connection; |
| 79 | $this->client = $connection->getClient(); |
| 80 | $this->configUtils = new ConfigUtils( $this->client, $out ); |
| 81 | $this->out = $out; |
| 82 | $this->config = $config; |
| 83 | } |
| 84 | |
| 85 | /** |
| 86 | * @return MetaVersionStore |
| 87 | */ |
| 88 | public function versionStore() { |
| 89 | return new MetaVersionStore( $this->elasticaIndex(), $this->connection ); |
| 90 | } |
| 91 | |
| 92 | /** |
| 93 | * @return MetaNamespaceStore |
| 94 | */ |
| 95 | public function namespaceStore() { |
| 96 | return new MetaNamespaceStore( $this->elasticaIndex(), $this->config->getWikiId() ); |
| 97 | } |
| 98 | |
| 99 | /** |
| 100 | * @return MetaSaneitizeJobStore |
| 101 | */ |
| 102 | public function saneitizeJobStore() { |
| 103 | return new MetaSaneitizeJobStore( $this->elasticaIndex() ); |
| 104 | } |
| 105 | |
| 106 | /** |
| 107 | * @return MetaStore[] |
| 108 | */ |
| 109 | public function stores() { |
| 110 | return [ |
| 111 | 'version' => $this->versionStore(), |
| 112 | 'namespace' => $this->namespaceStore(), |
| 113 | 'saneitize' => $this->saneitizeJobStore(), |
| 114 | ]; |
| 115 | } |
| 116 | |
| 117 | /** |
| 118 | * @return Status with on success \Elastica\Index|null Index on creation, or null if the index |
| 119 | * already exists. |
| 120 | */ |
| 121 | public function createIfNecessary(): Status { |
| 122 | // If the mw_cirrus_metastore alias does not exists it |
| 123 | // means we need to create everything from scratch. |
| 124 | if ( $this->cirrusReady() ) { |
| 125 | return Status::newGood(); |
| 126 | } |
| 127 | $status = $this->configUtils->checkElasticsearchVersion(); |
| 128 | if ( !$status->isOK() ) { |
| 129 | return $status; |
| 130 | } |
| 131 | $this->log( self::INDEX_NAME . " missing, creating new metastore index.\n" ); |
| 132 | $newIndex = $this->createNewIndex(); |
| 133 | $this->switchAliasTo( $newIndex ); |
| 134 | return Status::newGood( $newIndex ); |
| 135 | } |
| 136 | |
| 137 | public function createOrUpgradeIfNecessary(): Status { |
| 138 | $newIndexStatus = $this->createIfNecessary(); |
| 139 | if ( $newIndexStatus->isOK() && $newIndexStatus->getValue() === null ) { |
| 140 | $version = $this->metastoreVersion(); |
| 141 | if ( $version < self::METASTORE_VERSION ) { |
| 142 | $this->log( self::INDEX_NAME . " version mismatch, upgrading.\n" ); |
| 143 | $this->upgradeIndexVersion(); |
| 144 | } elseif ( $version > self::METASTORE_VERSION ) { |
| 145 | return Status::newFatal( "Metastore version $version found, cannot upgrade to a lower version: " . |
| 146 | self::METASTORE_VERSION |
| 147 | ); |
| 148 | } |
| 149 | } |
| 150 | return Status::newGood(); |
| 151 | } |
| 152 | |
| 153 | private function buildIndexConfiguration(): array { |
| 154 | $pluginsStatus = $this->configUtils->scanAvailablePlugins( |
| 155 | $this->config->get( 'CirrusSearchBannedPlugins' ) ); |
| 156 | if ( !$pluginsStatus->isGood() ) { |
| 157 | throw new \RuntimeException( (string)$pluginsStatus ); |
| 158 | } |
| 159 | $filter = new AnalysisFilter(); |
| 160 | [ $analysis, $mappings ] = $filter->filterAnalysis( |
| 161 | // Why 'aa'? It comes first? Hoping it receives generic language treatment. |
| 162 | ( new AnalysisConfigBuilder( 'aa', $pluginsStatus->getValue() ) )->buildConfig(), |
| 163 | $this->buildMapping() |
| 164 | ); |
| 165 | |
| 166 | return [ |
| 167 | // Don't forget to update METASTORE_VERSION when changing something |
| 168 | // in the settings. |
| 169 | 'settings' => [ |
| 170 | 'index' => [ |
| 171 | 'number_of_shards' => 1, |
| 172 | 'auto_expand_replicas' => '0-2', |
| 173 | 'analysis' => $analysis, |
| 174 | ] |
| 175 | ], |
| 176 | 'mappings' => $mappings, |
| 177 | ]; |
| 178 | } |
| 179 | |
| 180 | /** |
| 181 | * Create a new metastore index. |
| 182 | * @param string $suffix index suffix |
| 183 | * @return \Elastica\Index the newly created index |
| 184 | */ |
| 185 | private function createNewIndex( $suffix = 'first' ) { |
| 186 | $name = self::INDEX_NAME . '_' . $suffix; |
| 187 | $this->log( "Creating metastore index... $name" ); |
| 188 | // @todo utilize $this->getIndex()->create(...) once it supports setting |
| 189 | // the master_timeout parameter. |
| 190 | $index = $this->client->getIndex( $name ); |
| 191 | $index->request( |
| 192 | '', |
| 193 | \Elastica\Request::PUT, |
| 194 | $this->buildIndexConfiguration(), |
| 195 | [ |
| 196 | 'master_timeout' => $this->getMasterTimeout(), |
| 197 | ] |
| 198 | ); |
| 199 | $this->log( " ok\n" ); |
| 200 | $this->configUtils->waitForGreen( $index->getName(), 3600 ); |
| 201 | $this->storeMetastoreVersion( $index ); |
| 202 | return $index; |
| 203 | } |
| 204 | |
| 205 | /** |
| 206 | * Don't forget to update METASTORE_VERSION when changing something |
| 207 | * in the settings. |
| 208 | * |
| 209 | * @return array the mapping |
| 210 | */ |
| 211 | private function buildMapping() { |
| 212 | $properties = [ |
| 213 | 'type' => [ 'type' => 'keyword' ], |
| 214 | 'wiki' => [ 'type' => 'keyword' ], |
| 215 | ]; |
| 216 | |
| 217 | foreach ( $this->stores() as $store ) { |
| 218 | // TODO: Reuse field definition implementations from page indices? |
| 219 | $storeProperties = $store->buildIndexProperties(); |
| 220 | if ( !$storeProperties ) { |
| 221 | continue; |
| 222 | } |
| 223 | $overlap = array_intersect_key( $properties, $storeProperties ); |
| 224 | if ( $overlap ) { |
| 225 | throw new \RuntimeException( 'Metastore property overlap on: ' . implode( ', ', array_keys( $overlap ) ) ); |
| 226 | } |
| 227 | $properties += $storeProperties; |
| 228 | } |
| 229 | |
| 230 | return [ |
| 231 | 'dynamic' => false, |
| 232 | 'properties' => $properties, |
| 233 | ]; |
| 234 | } |
| 235 | |
| 236 | /** |
| 237 | * Switch the mw_cirrus_metastore alias to this new index name. |
| 238 | * @param \Elastica\Index $index |
| 239 | */ |
| 240 | private function switchAliasTo( $index ) { |
| 241 | $name = $index->getName(); |
| 242 | $oldIndexName = $this->getAliasedIndexName(); |
| 243 | if ( $oldIndexName !== null ) { |
| 244 | $this->log( "Switching " . self::INDEX_NAME . " alias from $oldIndexName to $name.\n" ); |
| 245 | } else { |
| 246 | $this->log( "Creating " . self::INDEX_NAME . " alias to $name.\n" ); |
| 247 | } |
| 248 | |
| 249 | if ( $oldIndexName == $name ) { |
| 250 | throw new \RuntimeException( |
| 251 | "Cannot switch aliases old and new index names are identical: $name" |
| 252 | ); |
| 253 | } |
| 254 | // Create the alias |
| 255 | $path = '_aliases'; |
| 256 | $data = [ 'actions' => [ |
| 257 | [ |
| 258 | 'add' => [ |
| 259 | 'index' => $name, |
| 260 | 'alias' => self::INDEX_NAME, |
| 261 | ] |
| 262 | ], |
| 263 | ] ]; |
| 264 | if ( $oldIndexName !== null ) { |
| 265 | $data['actions'][] = [ |
| 266 | 'remove' => [ |
| 267 | 'index' => $oldIndexName, |
| 268 | 'alias' => self::INDEX_NAME, |
| 269 | ] |
| 270 | ]; |
| 271 | } |
| 272 | $this->client->request( $path, \Elastica\Request::POST, $data, |
| 273 | [ 'master_timeout' => $this->getMasterTimeout() ] ); |
| 274 | if ( $oldIndexName !== null ) { |
| 275 | $this->log( "Deleting old index $oldIndexName\n" ); |
| 276 | $this->connection->getIndex( $oldIndexName )->delete(); |
| 277 | } |
| 278 | } |
| 279 | |
| 280 | /** |
| 281 | * @return string|null the current index behind the self::INDEX_NAME |
| 282 | * alias or null if the alias does not exist |
| 283 | */ |
| 284 | private function getAliasedIndexName() { |
| 285 | // FIXME: Elastica seems to have trouble parsing the error reason |
| 286 | // for this endpoint. Running a simple HEAD first to check if it |
| 287 | // exists |
| 288 | $resp = $this->client->request( '_alias/' . self::INDEX_NAME, \Elastica\Request::HEAD, [] ); |
| 289 | if ( $resp->getStatus() === 404 ) { |
| 290 | return null; |
| 291 | } |
| 292 | $resp = $this->client->request( '_alias/' . self::INDEX_NAME, \Elastica\Request::GET, [] ); |
| 293 | $indexName = null; |
| 294 | foreach ( $resp->getData() as $index => $aliases ) { |
| 295 | if ( isset( $aliases['aliases'][self::INDEX_NAME] ) ) { |
| 296 | if ( $indexName !== null ) { |
| 297 | throw new \RuntimeException( "Multiple indices are aliased with " . self::INDEX_NAME . |
| 298 | ", please fix manually." ); |
| 299 | } |
| 300 | $indexName = $index; |
| 301 | } |
| 302 | } |
| 303 | return $indexName; |
| 304 | } |
| 305 | |
| 306 | private function upgradeIndexVersion() { |
| 307 | $pluginsStatus = $this->configUtils->scanAvailableModules(); |
| 308 | if ( !$pluginsStatus->isGood() ) { |
| 309 | throw new \RuntimeException( (string)$pluginsStatus ); |
| 310 | } |
| 311 | if ( !Plugins::contains( 'reindex', $pluginsStatus->getValue() ) ) { |
| 312 | throw new \RuntimeException( "The reindex module is mandatory to upgrade the metastore" ); |
| 313 | } |
| 314 | $index = $this->createNewIndex( (string)time() ); |
| 315 | // Reindex everything except the internal type, it's not clear |
| 316 | // yet if we just need to filter the metastore version info or |
| 317 | // the whole internal type. Currently we only use the internal |
| 318 | // type for storing the metastore version. |
| 319 | $reindex = [ |
| 320 | 'source' => [ |
| 321 | 'index' => self::INDEX_NAME, |
| 322 | 'query' => [ |
| 323 | 'bool' => [ |
| 324 | 'must_not' => [ |
| 325 | [ 'term' => [ 'type' => self::INTERNAL_TYPE ] ] |
| 326 | ], |
| 327 | ] |
| 328 | ], |
| 329 | ], |
| 330 | 'dest' => [ 'index' => $index->getName() ], |
| 331 | ]; |
| 332 | // reindex is extremely fast so we can wait for it |
| 333 | // we might consider using the task manager if this process |
| 334 | // becomes longer and/or prone to curl timeouts |
| 335 | $this->client->request( '_reindex', |
| 336 | \Elastica\Request::POST, |
| 337 | $reindex, |
| 338 | [ 'wait_for_completion' => 'true' ] |
| 339 | ); |
| 340 | $index->refresh(); |
| 341 | $this->switchAliasTo( $index ); |
| 342 | } |
| 343 | |
| 344 | /** |
| 345 | * @return int version of metastore index expected by runtime |
| 346 | */ |
| 347 | public function runtimeVersion() { |
| 348 | return self::METASTORE_VERSION; |
| 349 | } |
| 350 | |
| 351 | /** |
| 352 | * @param \Elastica\Index $index new index |
| 353 | */ |
| 354 | private function storeMetastoreVersion( $index ) { |
| 355 | $index->addDocument( |
| 356 | new \Elastica\Document( |
| 357 | self::METASTORE_VERSION_DOCID, |
| 358 | [ |
| 359 | 'type' => self::INTERNAL_TYPE, |
| 360 | 'metastore_major_version' => self::METASTORE_VERSION, |
| 361 | ] |
| 362 | ) |
| 363 | ); |
| 364 | } |
| 365 | |
| 366 | /** |
| 367 | * @param string $msg log message |
| 368 | */ |
| 369 | private function log( $msg ) { |
| 370 | if ( $this->out ) { |
| 371 | $this->out->output( $msg ); |
| 372 | } |
| 373 | } |
| 374 | |
| 375 | public function elasticaIndex(): \Elastica\Index { |
| 376 | return $this->connection->getIndex( self::INDEX_NAME ); |
| 377 | } |
| 378 | |
| 379 | /** |
| 380 | * Check if cirrus is ready by checking if the index has been created on this cluster |
| 381 | * @return bool |
| 382 | */ |
| 383 | public function cirrusReady() { |
| 384 | return $this->elasticaIndex()->exists(); |
| 385 | } |
| 386 | |
| 387 | /** |
| 388 | * @return int the version of the meta store. 0 means that |
| 389 | * the metastore has never been created. |
| 390 | */ |
| 391 | public function metastoreVersion() { |
| 392 | try { |
| 393 | $doc = $this->elasticaIndex()->getDocument( self::METASTORE_VERSION_DOCID ); |
| 394 | } catch ( \Elastica\Exception\NotFoundException ) { |
| 395 | return 0; |
| 396 | } catch ( \Elastica\Exception\ResponseException $e ) { |
| 397 | // BC code in case the metastore alias does not exist yet |
| 398 | $fullError = $e->getResponse()->getFullError(); |
| 399 | if ( isset( $fullError['type'] ) |
| 400 | && $fullError['type'] === 'index_not_found_exception' |
| 401 | && isset( $fullError['index'] ) |
| 402 | && $fullError['index'] === self::INDEX_NAME |
| 403 | ) { |
| 404 | return 0; |
| 405 | } |
| 406 | throw $e; |
| 407 | } |
| 408 | return (int)$doc->get( 'metastore_major_version' ); |
| 409 | } |
| 410 | |
| 411 | private function getMasterTimeout(): string { |
| 412 | return $this->config->get( 'CirrusSearchMasterTimeout' ); |
| 413 | } |
| 414 | } |