Code Coverage |
||||||||||
Classes and Traits |
Functions and Methods |
Lines |
||||||||
Total | |
0.00% |
0 / 1 |
|
0.00% |
0 / 20 |
CRAP | |
0.00% |
0 / 134 |
MetaStoreIndex | |
0.00% |
0 / 1 |
|
0.00% |
0 / 20 |
1892 | |
0.00% |
0 / 134 |
__construct | |
0.00% |
0 / 1 |
2 | |
0.00% |
0 / 6 |
|||
versionStore | |
0.00% |
0 / 1 |
2 | |
0.00% |
0 / 1 |
|||
namespaceStore | |
0.00% |
0 / 1 |
2 | |
0.00% |
0 / 1 |
|||
saneitizeJobStore | |
0.00% |
0 / 1 |
2 | |
0.00% |
0 / 1 |
|||
stores | |
0.00% |
0 / 1 |
2 | |
0.00% |
0 / 3 |
|||
createIfNecessary | |
0.00% |
0 / 1 |
6 | |
0.00% |
0 / 6 |
|||
createOrUpgradeIfNecessary | |
0.00% |
0 / 1 |
20 | |
0.00% |
0 / 11 |
|||
buildIndexConfiguration | |
0.00% |
0 / 1 |
2 | |
0.00% |
0 / 10 |
|||
createNewIndex | |
0.00% |
0 / 1 |
2 | |
0.00% |
0 / 13 |
|||
buildMapping | |
0.00% |
0 / 1 |
20 | |
0.00% |
0 / 11 |
|||
switchAliasTo | |
0.00% |
0 / 1 |
30 | |
0.00% |
0 / 21 |
|||
getAliasedIndexName | |
0.00% |
0 / 1 |
30 | |
0.00% |
0 / 12 |
|||
upgradeIndexVersion | |
0.00% |
0 / 1 |
6 | |
0.00% |
0 / 13 |
|||
runtimeVersion | |
0.00% |
0 / 1 |
2 | |
0.00% |
0 / 1 |
|||
storeMetastoreVersion | |
0.00% |
0 / 1 |
2 | |
0.00% |
0 / 6 |
|||
log | |
0.00% |
0 / 1 |
6 | |
0.00% |
0 / 3 |
|||
elasticaIndex | |
0.00% |
0 / 1 |
2 | |
0.00% |
0 / 1 |
|||
cirrusReady | |
0.00% |
0 / 1 |
2 | |
0.00% |
0 / 1 |
|||
metastoreVersion | |
0.00% |
0 / 1 |
56 | |
0.00% |
0 / 12 |
|||
getMasterTimeout | |
0.00% |
0 / 1 |
2 | |
0.00% |
0 / 1 |
<?php | |
namespace CirrusSearch\MetaStore; | |
use CirrusSearch\Connection; | |
use CirrusSearch\Maintenance\AnalysisConfigBuilder; | |
use CirrusSearch\Maintenance\AnalysisFilter; | |
use CirrusSearch\Maintenance\ConfigUtils; | |
use CirrusSearch\Maintenance\Printer; | |
use CirrusSearch\SearchConfig; | |
/** | |
* This program is free software; you can redistribute it and/or modify | |
* it under the terms of the GNU General Public License as published by | |
* the Free Software Foundation; either version 2 of the License, or | |
* (at your option) any later version. | |
* | |
* This program is distributed in the hope that it will be useful, | |
* but WITHOUT ANY WARRANTY; without even the implied warranty of | |
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
* GNU General Public License for more details. | |
* | |
* You should have received a copy of the GNU General Public License along | |
* with this program; if not, write to the Free Software Foundation, Inc., | |
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. | |
* http://www.gnu.org/copyleft/gpl.html | |
*/ | |
/** | |
* Utility class to manage a multipurpose metadata storage index for cirrus. | |
* This store is used to store persistent states related to administrative | |
* tasks (index settings upgrade, wiki namespace names, ...). | |
*/ | |
class MetaStoreIndex { | |
/** | |
* @const int version of the index, increment when mappings change | |
*/ | |
private const METASTORE_VERSION = 4; | |
/** | |
* @const string the doc id used to store version information related | |
* to the meta store itself. This value is not supposed to be changed. | |
*/ | |
private const METASTORE_VERSION_DOCID = 'metastore_version'; | |
/** | |
* @const string index name | |
*/ | |
public const INDEX_NAME = 'mw_cirrus_metastore'; | |
/** | |
* @const string type for storing internal data | |
*/ | |
private const INTERNAL_TYPE = 'internal'; | |
/** | |
* @var Connection | |
*/ | |
private $connection; | |
/** | |
* @var \Elastica\Client | |
*/ | |
private $client; | |
/** | |
* @var Printer|null output handler | |
*/ | |
private $out; | |
/** | |
* @var SearchConfig | |
*/ | |
private $config; | |
/** | |
* @var ConfigUtils | |
*/ | |
private $configUtils; | |
/** | |
* @param Connection $connection | |
* @param Printer $out | |
* @param SearchConfig $config | |
*/ | |
public function __construct( | |
Connection $connection, Printer $out, SearchConfig $config | |
) { | |
$this->connection = $connection; | |
$this->client = $connection->getClient(); | |
$this->configUtils = new ConfigUtils( $this->client, $out ); | |
$this->out = $out; | |
$this->config = $config; | |
} | |
/** | |
* @return MetaVersionStore | |
*/ | |
public function versionStore() { | |
return new MetaVersionStore( $this->elasticaIndex(), $this->connection ); | |
} | |
/** | |
* @return MetaNamespaceStore | |
*/ | |
public function namespaceStore() { | |
return new MetaNamespaceStore( $this->elasticaIndex(), $this->config->getWikiId() ); | |
} | |
/** | |
* @return MetaSaneitizeJobStore | |
*/ | |
public function saneitizeJobStore() { | |
return new MetaSaneitizeJobStore( $this->elasticaIndex() ); | |
} | |
/** | |
* @return MetaStore[] | |
*/ | |
public function stores() { | |
return [ | |
'version' => $this->versionStore(), | |
'namespace' => $this->namespaceStore(), | |
'saneitize' => $this->saneitizeJobStore(), | |
]; | |
} | |
/** | |
* @return \Elastica\Index|null Index on creation, or null if the index | |
* already exists. | |
*/ | |
public function createIfNecessary() { | |
// If the mw_cirrus_metastore alias does not exists it | |
// means we need to create everything from scratch. | |
if ( $this->cirrusReady() ) { | |
return null; | |
} | |
$this->log( self::INDEX_NAME . " missing, creating new metastore index.\n" ); | |
$newIndex = $this->createNewIndex(); | |
$this->switchAliasTo( $newIndex ); | |
return $newIndex; | |
} | |
public function createOrUpgradeIfNecessary() { | |
$newIndex = $this->createIfNecessary(); | |
if ( $newIndex === null ) { | |
$version = $this->metastoreVersion(); | |
if ( $version < self::METASTORE_VERSION ) { | |
$this->log( self::INDEX_NAME . " version mismatch, upgrading.\n" ); | |
$this->upgradeIndexVersion(); | |
} elseif ( $version > self::METASTORE_VERSION ) { | |
throw new \Exception( | |
"Metastore version $version found, cannot upgrade to a lower version: " . | |
self::METASTORE_VERSION | |
); | |
} | |
} | |
} | |
private function buildIndexConfiguration() { | |
$plugins = $this->configUtils->scanAvailablePlugins( | |
$this->config->get( 'CirrusSearchBannedPlugins' ) ); | |
$filter = new AnalysisFilter(); | |
list( $analysis, $mappings ) = $filter->filterAnalysis( | |
// Why 'aa'? It comes first? Hoping it receives generic language treatment. | |
( new AnalysisConfigBuilder( 'aa', $plugins ) )->buildConfig(), | |
$this->buildMapping() | |
); | |
return [ | |
// Don't forget to update METASTORE_VERSION when changing something | |
// in the settings. | |
'settings' => [ | |
'index' => [ | |
'number_of_shards' => 1, | |
'auto_expand_replicas' => '0-2', | |
'analysis' => $analysis, | |
] | |
], | |
'mappings' => $mappings, | |
]; | |
} | |
/** | |
* Create a new metastore index. | |
* @param string $suffix index suffix | |
* @return \Elastica\Index the newly created index | |
*/ | |
private function createNewIndex( $suffix = 'first' ) { | |
$name = self::INDEX_NAME . '_' . $suffix; | |
$this->log( "Creating metastore index... $name" ); | |
// @todo utilize $this->getIndex()->create(...) once it supports setting | |
// the master_timeout parameter. | |
$index = $this->client->getIndex( $name ); | |
$index->request( | |
'', | |
\Elastica\Request::PUT, | |
$this->buildIndexConfiguration(), | |
[ | |
'master_timeout' => $this->getMasterTimeout(), | |
'include_type_name' => 'false' | |
] | |
); | |
$this->log( " ok\n" ); | |
$this->configUtils->waitForGreen( $index->getName(), 3600 ); | |
$this->storeMetastoreVersion( $index ); | |
return $index; | |
} | |
/** | |
* Don't forget to update METASTORE_VERSION when changing something | |
* in the settings. | |
* | |
* @return array the mapping | |
*/ | |
private function buildMapping() { | |
$properties = [ | |
'type' => [ 'type' => 'keyword' ], | |
'wiki' => [ 'type' => 'keyword' ], | |
]; | |
foreach ( $this->stores() as $store ) { | |
// TODO: Reuse field definition implementations from page indices? | |
$storeProperties = $store->buildIndexProperties(); | |
if ( !$storeProperties ) { | |
continue; | |
} | |
$overlap = array_intersect_key( $properties, $storeProperties ); | |
if ( $overlap ) { | |
throw new \Exception( 'Metastore property overlap on: ' . implode( ', ', array_keys( $overlap ) ) ); | |
} | |
$properties += $storeProperties; | |
} | |
return [ | |
'dynamic' => false, | |
'properties' => $properties, | |
]; | |
} | |
/** | |
* Switch the mw_cirrus_metastore alias to this new index name. | |
* @param \Elastica\Index $index | |
*/ | |
private function switchAliasTo( $index ) { | |
$name = $index->getName(); | |
$oldIndexName = $this->getAliasedIndexName(); | |
if ( $oldIndexName !== null ) { | |
$this->log( "Switching " . self::INDEX_NAME . " alias from $oldIndexName to $name.\n" ); | |
} else { | |
$this->log( "Creating " . self::INDEX_NAME . " alias to $name.\n" ); | |
} | |
if ( $oldIndexName == $name ) { | |
throw new \Exception( | |
"Cannot switch aliases old and new index names are identical: $name" | |
); | |
} | |
// Create the alias | |
$path = '_aliases'; | |
$data = [ 'actions' => [ | |
[ | |
'add' => [ | |
'index' => $name, | |
'alias' => self::INDEX_NAME, | |
] | |
], | |
] ]; | |
if ( $oldIndexName !== null ) { | |
$data['actions'][] = [ | |
'remove' => [ | |
'index' => $oldIndexName, | |
'alias' => self::INDEX_NAME, | |
] | |
]; | |
} | |
$this->client->request( $path, \Elastica\Request::POST, $data, | |
[ 'master_timeout' => $this->getMasterTimeout() ] ); | |
if ( $oldIndexName !== null ) { | |
$this->log( "Deleting old index $oldIndexName\n" ); | |
$this->connection->getIndex( $oldIndexName )->delete(); | |
} | |
} | |
/** | |
* @return string|null the current index behind the self::INDEX_NAME | |
* alias or null if the alias does not exist | |
*/ | |
private function getAliasedIndexName() { | |
// FIXME: Elastica seems to have trouble parsing the error reason | |
// for this endpoint. Running a simple HEAD first to check if it | |
// exists | |
$resp = $this->client->request( '_alias/' . self::INDEX_NAME, \Elastica\Request::HEAD, [] ); | |
if ( $resp->getStatus() === 404 ) { | |
return null; | |
} | |
$resp = $this->client->request( '_alias/' . self::INDEX_NAME, \Elastica\Request::GET, [] ); | |
$indexName = null; | |
foreach ( $resp->getData() as $index => $aliases ) { | |
if ( isset( $aliases['aliases'][self::INDEX_NAME] ) ) { | |
if ( $indexName !== null ) { | |
throw new \Exception( "Multiple indices are aliased with " . self::INDEX_NAME . | |
", please fix manually." ); | |
} | |
$indexName = $index; | |
} | |
} | |
return $indexName; | |
} | |
private function upgradeIndexVersion() { | |
$plugins = $this->configUtils->scanAvailableModules(); | |
if ( !array_search( 'reindex', $plugins ) ) { | |
throw new \Exception( "The reindex module is mandatory to upgrade the metastore" ); | |
} | |
$index = $this->createNewIndex( (string)time() ); | |
// Reindex everything except the internal type, it's not clear | |
// yet if we just need to filter the metastore version info or | |
// the whole internal type. Currently we only use the internal | |
// type for storing the metastore version. | |
$reindex = [ | |
'source' => [ | |
'index' => self::INDEX_NAME, | |
'query' => [ | |
'bool' => [ | |
'must_not' => [ | |
[ 'term' => [ 'type' => self::INTERNAL_TYPE ] ] | |
], | |
] | |
], | |
], | |
'dest' => [ 'index' => $index->getName() ], | |
]; | |
// reindex is extremely fast so we can wait for it | |
// we might consider using the task manager if this process | |
// becomes longer and/or prone to curl timeouts | |
$this->client->request( '_reindex', | |
\Elastica\Request::POST, | |
$reindex, | |
[ 'wait_for_completion' => 'true' ] | |
); | |
$index->refresh(); | |
$this->switchAliasTo( $index ); | |
} | |
/** | |
* @return int version of metastore index expected by runtime | |
*/ | |
public function runtimeVersion() { | |
return self::METASTORE_VERSION; | |
} | |
/** | |
* @param \Elastica\Index $index new index | |
*/ | |
private function storeMetastoreVersion( $index ) { | |
$index->getType( '_doc' )->addDocument( | |
new \Elastica\Document( | |
self::METASTORE_VERSION_DOCID, | |
[ | |
'type' => self::INTERNAL_TYPE, | |
'metastore_major_version' => self::METASTORE_VERSION, | |
] | |
) | |
); | |
} | |
/** | |
* @param string $msg log message | |
*/ | |
private function log( $msg ) { | |
if ( $this->out ) { | |
$this->out->output( $msg ); | |
} | |
} | |
public function elasticaIndex(): \Elastica\Index { | |
return $this->connection->getIndex( self::INDEX_NAME ); | |
} | |
/** | |
* Check if cirrus is ready by checking if the index has been created on this cluster | |
* @return bool | |
*/ | |
public function cirrusReady() { | |
return $this->elasticaIndex()->exists(); | |
} | |
/** | |
* @return int the version of the meta store. 0 means that | |
* the metastore has never been created. | |
*/ | |
public function metastoreVersion() { | |
try { | |
// TODO: remove references to type (T308044) | |
$doc = $this->elasticaIndex()->getType( '_doc' )->getDocument( self::METASTORE_VERSION_DOCID ); | |
} catch ( \Elastica\Exception\NotFoundException $e ) { | |
return 0; | |
} catch ( \Elastica\Exception\ResponseException $e ) { | |
// BC code in case the metastore alias does not exist yet | |
$fullError = $e->getResponse()->getFullError(); | |
if ( isset( $fullError['type'] ) | |
&& $fullError['type'] === 'index_not_found_exception' | |
&& isset( $fullError['index'] ) | |
&& $fullError['index'] === self::INDEX_NAME | |
) { | |
return 0; | |
} | |
throw $e; | |
} | |
return (int)$doc->get( 'metastore_major_version' ); | |
} | |
private function getMasterTimeout() { | |
return $this->config->get( 'CirrusSearchMasterTimeout' ); | |
} | |
} |