11use Elastica\Aggregation\Terms;
14use Elastica\Exception\ExceptionInterface;
16use Elastica\Query\BoolQuery;
17use Elastica\Query\FunctionScore;
18use Elastica\Query\MatchQuery;
19use Elastica\Query\Term;
20use MediaWiki\Extension\Elastica\MWElasticUtils;
22use MediaWiki\Logger\LoggerFactory;
23use MediaWiki\MediaWikiServices;
39 private const BULK_INDEX_RETRY_ATTEMPTS = 5;
46 private const WAIT_UNTIL_READY_TIMEOUT = 3600;
60 return $suggestion[
'wiki'] === WikiMap::getCurrentWikiId();
64 return $suggestion[
'uri'];
67 public function query( $sourceLanguage, $targetLanguage, $text ) {
69 return $this->doQuery( $sourceLanguage, $targetLanguage, $text );
70 }
catch ( Exception $e ) {
75 protected function doQuery( $sourceLanguage, $targetLanguage, $text ) {
79 throw new RuntimeException(
'The wikimedia extra plugin is mandatory.' );
85 $connection = $this->getClient()->getConnection();
86 $oldTimeout = $connection->getTimeout();
87 $connection->setTimeout( 10 );
90 $fuzzyQuery->setLikeText( $text );
91 $fuzzyQuery->addFields( [
'content' ] );
93 $boostQuery =
new FunctionScore();
94 $boostQuery->addFunction(
95 'levenshtein_distance_score',
101 $boostQuery->setBoostMode( FunctionScore::BOOST_MODE_REPLACE );
105 $bool =
new BoolQuery();
106 $bool->addFilter( $fuzzyQuery );
107 $bool->addMust( $boostQuery );
109 $languageFilter =
new Term();
110 $languageFilter->setTerm(
'language', $sourceLanguage );
111 $bool->addFilter( $languageFilter );
114 $query =
new Query();
115 $query->setQuery( $bool );
126 $sizeSecond = $sizeFirst * 5;
128 $query->setFrom( 0 );
129 $query->setSize( $sizeFirst );
130 $query->setParam(
'_source', [
'content' ] );
131 $cutoff = $this->config[
'cutoff'] ?? 0.65;
132 $query->setParam(
'min_score', $cutoff );
133 $query->setSort( [
'_score',
'wiki',
'localid' ] );
140 $contents = $scores = $terms = [];
142 $resultset = $this->getIndex()->search( $query );
144 if ( count( $resultset ) === 0 ) {
148 foreach ( $resultset->getResults() as $result ) {
149 $data = $result->getData();
150 $score = $result->getScore();
152 $sourceId = preg_replace(
'~/[^/]+$~',
'', $result->getId() );
153 $contents[$sourceId] = $data[
'content'];
154 $scores[$sourceId] = $score;
155 $terms[] =
"$sourceId/$targetLanguage";
162 if ( count( array_unique( $scores ) ) > 5 ) {
169 if ( count( $resultset ) === $sizeSecond ) {
175 $query->setParam(
'min_score', $score );
176 $query->setFrom( $query->getParam(
'size' ) + $query->getParam(
'from' ) );
177 $query->setSize( $sizeSecond );
180 }
while ( $resultset->getTotalHits() > count( $contents ) );
186 if ( $terms !== [] ) {
187 $idQuery =
new Query\Terms(
'_id', $terms );
189 $query =
new Query( $idQuery );
190 $query->setSize( 25 );
191 $query->setParam(
'_source', [
'wiki',
'uri',
'content',
'localid' ] );
192 $resultset = $this->getIndex()->search( $query );
194 foreach ( $resultset->getResults() as $result ) {
195 $data = $result->getData();
198 $sourceId = preg_replace(
'~/[^/]+$~',
'', $result->getId() );
201 'source' => $contents[$sourceId],
202 'target' => $data[
'content'],
203 'context' => $data[
'localid'],
204 'quality' => $scores[$sourceId],
205 'wiki' => $data[
'wiki'],
206 'location' => $data[
'localid'] .
'/' . $targetLanguage,
207 'uri' => $data[
'uri'],
212 uasort( $suggestions,
static function ( $a, $b ) {
213 if ( $a[
'quality'] === $b[
'quality'] ) {
217 return ( $a[
'quality'] < $b[
'quality'] ) ? 1 : -1;
221 $connection->setTimeout( $oldTimeout );
251 $sourceLanguage = $handle->
getGroup()->getSourceLanguage();
254 if ( $handle->
getCode() !== $sourceLanguage ) {
256 $this->deleteByQuery( $this->getIndex(), Query::create(
258 ->addFilter(
new Term( [
'wiki' => WikiMap::getCurrentWikiId() ] ) )
259 ->addFilter(
new Term( [
'language' => $handle->
getCode() ] ) )
260 ->addFilter(
new Term( [
'localid' => $localid ] ) ) ) );
264 if ( $targetText ===
null ) {
269 if ( $sourceLanguage ===
null ) {
277 MWElasticUtils::withRetry( self::BULK_INDEX_RETRY_ATTEMPTS,
278 function () use ( $doc ) {
279 $this->getIndex()->addDocuments( [ $doc ] );
281 static function ( $e, $errors ) use ( $fname ) {
282 $c = get_class( $e );
283 $msg = $e->getMessage();
284 error_log( $fname .
": update failed ($c: $msg); retrying." );
299 $language = $handle->
getCode();
302 $wiki = WikiMap::getCurrentWikiId();
303 $globalid =
"$wiki-$localid-$revId/$language";
307 'uri' => $handle->
getTitle()->getCanonicalURL(),
308 'localid' => $localid,
309 'language' => $language,
314 return new Document( $globalid, $data,
'_doc' );
325 'number_of_shards' => $this->getShardCount(),
329 'type' =>
'edge_ngram',
337 'tokenizer' =>
'standard',
338 'filter' => [
'lowercase',
'prefix_filter' ]
341 'tokenizer' =>
'standard'
348 $replicas = $this->getReplicaCount();
349 if ( strpos( $replicas,
'-' ) ===
false ) {
350 $indexSettings[
'settings'][
'index'][
'number_of_replicas'] = $replicas;
352 $indexSettings[
'settings'][
'index'][
'auto_expand_replicas'] = $replicas;
355 $this->getIndex()->create( $indexSettings, $rebuild );
364 $this->checkElasticsearchVersion();
365 $index = $this->getIndex();
366 if ( $this->updateMapping ) {
367 $this->logOutput(
'Updating the index mappings...' );
369 } elseif ( !$index->exists() ) {
373 $settings = $index->getSettings();
374 $settings->setRefreshInterval(
'-1' );
376 $this->deleteByQuery( $this->getIndex(), Query::create(
377 (
new Term() )->setTerm(
'wiki', WikiMap::getCurrentWikiId() ) ) );
380 'wiki' => [
'type' =>
'keyword' ],
381 'localid' => [
'type' =>
'keyword' ],
382 'uri' => [
'type' =>
'keyword' ],
383 'language' => [
'type' =>
'keyword' ],
384 'group' => [
'type' =>
'keyword' ],
390 'term_vector' =>
'yes'
392 'prefix_complete' => [
394 'analyzer' =>
'prefix',
395 'search_analyzer' =>
'standard',
396 'term_vector' =>
'yes'
398 'case_sensitive' => [
400 'analyzer' =>
'casesensitive',
401 'term_vector' =>
'yes'
406 if ( $this->useElastica6() ) {
409 $mapping = new \Elastica\Type\Mapping();
411 $mapping->setType( $index->getType(
'_doc' ) );
413 $mapping->setProperties( $properties );
415 $mapping->send( [
'include_type_name' =>
'true' ] );
418 $mapping = new \Elastica\Mapping( $properties );
419 $mapping->send( $index, [
'include_type_name' =>
'false' ] );
422 $this->waitUntilReady();
434 $lb = MediaWikiServices::getInstance()->getLinkBatchFactory()->newLinkBatch();
435 foreach ( $batch as $data ) {
436 $lb->addObj( $data[0]->getTitle() );
445 foreach ( $batch as $data ) {
446 [ $handle, $sourceLanguage, $text ] = $data;
447 $revId = $handle->getTitleForLanguage( $sourceLanguage )->getLatestRevID();
451 MWElasticUtils::withRetry( self::BULK_INDEX_RETRY_ATTEMPTS,
452 function () use ( $docs ) {
453 $this->getIndex()->addDocuments( $docs );
455 function ( $e, $errors ) {
456 $c = get_class( $e );
457 $msg = $e->getMessage();
458 $this->logOutput(
"Batch failed ($c: $msg), trying again in 10 seconds" );
469 $index = $this->getIndex();
471 $index->forcemerge();
472 $index->getSettings()->setRefreshInterval(
'5s' );
475 public function getClient() {
476 if ( !$this->client ) {
477 if ( isset( $this->config[
'config'] ) ) {
478 $this->client =
new Client( $this->config[
'config'] );
480 $this->client =
new Client();
483 return $this->client;
488 return isset( $this->config[
'use_wikimedia_extra'] ) && $this->config[
'use_wikimedia_extra'];
492 private function getIndexName() {
493 return $this->config[
'index'] ??
'ttmserver';
496 public function getIndex() {
497 return $this->getClient()
498 ->getIndex( $this->getIndexName() );
501 protected function getShardCount() {
502 return $this->config[
'shards'] ?? 1;
505 protected function getReplicaCount() {
506 return $this->config[
'replicas'] ??
'0-2';
518 $path =
"_cluster/health/$indexName";
519 $response = $this->getClient()->request( $path );
520 if ( $response->hasError() ) {
521 throw new Exception(
"Error while fetching index health status: " . $response->getError() );
523 return $response->getData();
542 while ( ( $startTime + $timeout ) > time() ) {
545 $status = $response[
'status'] ??
'unknown';
546 if ( $status ===
'green' ) {
547 $this->logOutput(
"\tGreen!" );
550 $this->logOutput(
"\tIndex is $status retrying..." );
552 }
catch ( Exception $e ) {
553 $this->logOutput(
"Error while waiting for green ({$e->getMessage()}), retrying..." );
559 protected function waitUntilReady() {
560 $statuses = MWElasticUtils::waitForGreen(
562 $this->getIndexName(),
563 self::WAIT_UNTIL_READY_TIMEOUT );
564 $this->logOutput(
"Waiting for the index to go green..." );
565 foreach ( $statuses as $message ) {
566 $this->logOutput( $message );
569 if ( !$statuses->getReturn() ) {
570 die(
"Timeout! Please check server logs for {$this->getIndexName()}." );
574 public function setLogger(
$logger ) {
579 protected function logOutput( $text ) {
580 if ( $this->logger ) {
581 $this->logger->statusLine(
"$text\n" );
590 $this->updateMapping =
true;
600 $fields = $highlights = [];
601 $terms = preg_split(
'/\s+/', $queryString );
602 $match = $opts[
'match'];
603 $case = $opts[
'case'];
606 foreach ( $terms as $term ) {
607 $prefix = strstr( $term,
'*',
true );
610 $fields[
'content.prefix_complete'][] = $prefix;
611 } elseif ( $case ===
'1' ) {
613 $fields[
'content.case_sensitive'][] = $term;
615 $fields[
'content'][] = $term;
621 $searchQuery =
new BoolQuery();
622 foreach ( $fields as $analyzer => $words ) {
623 foreach ( $words as $word ) {
624 $boolQuery =
new BoolQuery();
625 $contentQuery =
new MatchQuery();
626 $contentQuery->setFieldQuery( $analyzer, $word );
627 $boolQuery->addShould( $contentQuery );
628 $messageQuery =
new Term();
629 $messageQuery->setTerm(
'localid', $word );
630 $boolQuery->addShould( $messageQuery );
632 if ( $match ===
'all' ) {
633 $searchQuery->addMust( $boolQuery );
635 $searchQuery->addShould( $boolQuery );
639 $highlights[$analyzer] = [
640 'number_of_fragments' => 0
645 $title = Title::newFromText( $word );
650 if ( $handle->isValid() && $handle->getCode() !==
'' ) {
651 $localid = $handle->getTitleForBase()->getPrefixedText();
652 $boolQuery =
new BoolQuery();
653 $messageId =
new Term();
654 $messageId->setTerm(
'localid', $localid );
655 $boolQuery->addMust( $messageId );
656 $searchQuery->addShould( $boolQuery );
661 return [ $searchQuery, $highlights ];
672 $query =
new Query();
674 [ $searchQuery, $highlights ] = $this->
parseQueryString( $queryString, $opts );
675 $query->setQuery( $searchQuery );
677 $language =
new Terms(
'language' );
678 $language->setField(
'language' );
679 $language->setSize( 500 );
680 $query->addAggregation( $language );
682 $group =
new Terms(
'group' );
683 $group->setField(
'group' );
686 $group->setSize( 500 );
687 $query->addAggregation( $group );
689 $query->setSize( $opts[
'limit'] );
690 $query->setFrom( $opts[
'offset'] );
696 $filters =
new BoolQuery();
698 $language = $opts[
'language'];
699 if ( $language !==
'' ) {
700 $languageFilter =
new Term();
701 $languageFilter->setTerm(
'language', $language );
702 $filters->addFilter( $languageFilter );
705 $group = $opts[
'group'];
706 if ( $group !==
'' ) {
707 $groupFilter =
new Term();
708 $groupFilter->setTerm(
'group', $group );
709 $filters->addFilter( $groupFilter );
713 if ( $language !==
'' || $group !==
'' ) {
717 $query->setPostFilter( $filters );
720 [ $pre, $post ] = $highlight;
721 $query->setHighlight( [
723 'pre_tags' => [ $pre ],
724 'post_tags' => [ $post ],
725 'fields' => $highlights,
728 return $this->getIndex()->createSearch( $query );
739 public function search( $queryString, $opts, $highlight ) {
740 $search = $this->
createSearch( $queryString, $opts, $highlight );
743 return $search->search();
744 }
catch ( ExceptionInterface $e ) {
754 $aggs = $resultset->getAggregations();
755 '@phan-var array[][][] $aggs';
762 foreach ( $aggs as $type => $info ) {
763 foreach ( $info[
'buckets'] as $row ) {
764 $ret[$type][$row[
'key']] = $row[
'doc_count'];
776 return $resultset->getTotalHits();
785 foreach ( $resultset->getResults() as $document ) {
786 $data = $document->getData();
787 $hl = $document->getHighlights();
788 if ( isset( $hl[
'content.prefix_complete'][0] ) ) {
789 $data[
'content'] = $hl[
'content.prefix_complete'][0];
790 } elseif ( isset( $hl[
'content.case_sensitive'][0] ) ) {
791 $data[
'content'] = $hl[
'content.case_sensitive'][0];
792 } elseif ( isset( $hl[
'content'][0] ) ) {
793 $data[
'content'] = $hl[
'content'][0];
810 private function deleteByQuery( \Elastica\Index $index, Query $query ) {
812 MWElasticUtils::deleteByQuery( $index, $query,
true );
813 }
catch ( Exception $e ) {
814 LoggerFactory::getInstance(
'ElasticSearchTTMServer' )->error(
815 'Problem encountered during deletion.',
816 [
'exception' => $e ]
819 throw new RuntimeException(
"Problem encountered during deletion.\n" . $e );
824 private function getElasticsearchVersion(): string {
825 $response = $this->getClient()->request(
'' );
826 if ( !$response->isOK() ) {
827 throw new \RuntimeException(
"Cannot fetch elasticsearch version: " . $response->getError() );
830 $result = $response->getData();
831 if ( !isset( $result[
'version'][
'number'] ) ) {
832 throw new \RuntimeException(
'Unable to determine elasticsearch version, aborting.' );
835 return $result[
'version' ][
'number' ];
838 private function checkElasticsearchVersion() {
839 $version = $this->getElasticsearchVersion();
840 if ( strpos( $version,
'6.8' ) !== 0 && strpos( $version,
'7.' ) !== 0 ) {
841 throw new \RuntimeException(
"Only Elasticsearch 6.8.x and 7.x are supported. Your version: $version." );
845 private function useElastica6(): bool {
846 return class_exists(
'\Elastica\Type' );
return[ 'Translate:ConfigHelper'=> static function():ConfigHelper { return new ConfigHelper();}, 'Translate:CsvTranslationImporter'=> static function(MediaWikiServices $services):CsvTranslationImporter { return new CsvTranslationImporter( $services->getWikiPageFactory());}, 'Translate:EntitySearch'=> static function(MediaWikiServices $services):EntitySearch { return new EntitySearch($services->getMainWANObjectCache(), $services->getCollationFactory() ->makeCollation( 'uca-default-u-kn'), MessageGroups::singleton(), $services->getNamespaceInfo(), $services->get( 'Translate:MessageIndex'), $services->getTitleParser(), $services->getTitleFormatter());}, 'Translate:ExternalMessageSourceStateImporter'=> static function(MediaWikiServices $services):ExternalMessageSourceStateImporter { return new ExternalMessageSourceStateImporter($services->getMainConfig(), $services->get( 'Translate:GroupSynchronizationCache'), $services->getJobQueueGroup(), LoggerFactory::getInstance( 'Translate.GroupSynchronization'), $services->get( 'Translate:MessageIndex'));}, 'Translate:GroupSynchronizationCache'=> static function(MediaWikiServices $services):GroupSynchronizationCache { return new GroupSynchronizationCache( $services->get( 'Translate:PersistentCache'));}, 'Translate:MessageBundleStore'=> static function(MediaWikiServices $services):MessageBundleStore { return new MessageBundleStore(new RevTagStore(), $services->getJobQueueGroup(), $services->getLanguageNameUtils(), $services->get( 'Translate:MessageIndex'));}, 'Translate:MessageGroupReview'=> static function(MediaWikiServices $services):MessageGroupReview { return new MessageGroupReview($services->getDBLoadBalancer(), $services->getHookContainer());}, 'Translate:MessageGroupStatsTableFactory'=> static function(MediaWikiServices $services):MessageGroupStatsTableFactory { return new MessageGroupStatsTableFactory($services->get( 'Translate:ProgressStatsTableFactory'), $services->getDBLoadBalancer(), $services->getLinkRenderer(), $services->getMainConfig() ->get( 'TranslateWorkflowStates') !==false);}, 'Translate:MessageIndex'=> static function(MediaWikiServices $services):MessageIndex { $params=$services->getMainConfig() ->get( 'TranslateMessageIndex');if(is_string( $params)) { $params=(array) $params;} $class=array_shift( $params);return new $class( $params);}, 'Translate:MessagePrefixStats'=> static function(MediaWikiServices $services):MessagePrefixStats { return new MessagePrefixStats( $services->getTitleParser());}, 'Translate:ParsingPlaceholderFactory'=> static function():ParsingPlaceholderFactory { return new ParsingPlaceholderFactory();}, 'Translate:PersistentCache'=> static function(MediaWikiServices $services):PersistentCache { return new PersistentDatabaseCache($services->getDBLoadBalancer(), $services->getJsonCodec());}, 'Translate:ProgressStatsTableFactory'=> static function(MediaWikiServices $services):ProgressStatsTableFactory { return new ProgressStatsTableFactory($services->getLinkRenderer(), $services->get( 'Translate:ConfigHelper'));}, 'Translate:SubpageListBuilder'=> static function(MediaWikiServices $services):SubpageListBuilder { return new SubpageListBuilder($services->get( 'Translate:TranslatableBundleFactory'), $services->getLinkBatchFactory());}, 'Translate:TranslatableBundleFactory'=> static function(MediaWikiServices $services):TranslatableBundleFactory { return new TranslatableBundleFactory($services->get( 'Translate:TranslatablePageStore'), $services->get( 'Translate:MessageBundleStore'));}, 'Translate:TranslatableBundleMover'=> static function(MediaWikiServices $services):TranslatableBundleMover { return new TranslatableBundleMover($services->getMovePageFactory(), $services->getJobQueueGroup(), $services->getLinkBatchFactory(), $services->get( 'Translate:TranslatableBundleFactory'), $services->get( 'Translate:SubpageListBuilder'), $services->getMainConfig() ->get( 'TranslatePageMoveLimit'));}, 'Translate:TranslatableBundleStatusStore'=> static function(MediaWikiServices $services):TranslatableBundleStatusStore { return new TranslatableBundleStatusStore($services->getDBLoadBalancer() ->getConnection(DB_PRIMARY), $services->getCollationFactory() ->makeCollation( 'uca-default-u-kn'), $services->getDBLoadBalancer() ->getMaintenanceConnectionRef(DB_PRIMARY));}, 'Translate:TranslatablePageParser'=> static function(MediaWikiServices $services):TranslatablePageParser { return new TranslatablePageParser($services->get( 'Translate:ParsingPlaceholderFactory'));}, 'Translate:TranslatablePageStore'=> static function(MediaWikiServices $services):TranslatablePageStore { return new TranslatablePageStore($services->get( 'Translate:MessageIndex'), $services->getJobQueueGroup(), new RevTagStore(), $services->getDBLoadBalancer(), $services->get( 'Translate:TranslatableBundleStatusStore'));}, 'Translate:TranslationStashReader'=> static function(MediaWikiServices $services):TranslationStashReader { $db=$services->getDBLoadBalancer() ->getConnectionRef(DB_REPLICA);return new TranslationStashStorage( $db);}, 'Translate:TranslationStatsDataProvider'=> static function(MediaWikiServices $services):TranslationStatsDataProvider { return new TranslationStatsDataProvider(new ServiceOptions(TranslationStatsDataProvider::CONSTRUCTOR_OPTIONS, $services->getMainConfig()), $services->getObjectFactory());}, 'Translate:TranslationUnitStoreFactory'=> static function(MediaWikiServices $services):TranslationUnitStoreFactory { return new TranslationUnitStoreFactory( $services->getDBLoadBalancer());}, 'Translate:TranslatorActivity'=> static function(MediaWikiServices $services):TranslatorActivity { $query=new TranslatorActivityQuery($services->getMainConfig(), $services->getDBLoadBalancer());return new TranslatorActivity($services->getMainObjectStash(), $query, $services->getJobQueueGroup());}, 'Translate:TtmServerFactory'=> static function(MediaWikiServices $services):TtmServerFactory { $config=$services->getMainConfig();$default=$config->get( 'TranslateTranslationDefaultService');if( $default===false) { $default=null;} return new TtmServerFactory( $config->get( 'TranslateTranslationServices'), $default);}]
@phpcs-require-sorted-array
TTMServer backed based on ElasticSearch.
createDocument(MessageHandle $handle, $text, $revId)
update(MessageHandle $handle, $targetText)
Add / update translations.
batchInsertTranslations(array $batch)
Called multiple times per batch if necessary.
$logger
Reference to the maintenance script to relay logging output.
query( $sourceLanguage, $targetLanguage, $text)
Fetches all relevant suggestions for given text.
batchInsertDefinitions(array $batch)
endBatch()
Called before every batch (MessageGroup).
endBootstrap()
Do any cleanup, optimizing etc.
waitForGreen( $indexName, $timeout)
Wait for the index to go green.
search( $queryString, $opts, $highlight)
Search interface.
beginBootstrap()
Begin the bootstrap process.
createIndex( $rebuild)
Create index.
getTotalHits( $resultset)
getDocuments( $resultset)
useWikimediaExtraPlugin()
getIndexHealth( $indexName)
Get index health TODO: Remove this code in the future as we drop support for older versions of the El...
beginBatch()
Called before every batch (MessageGroup).
createSearch( $queryString, $opts, $highlight)
Search interface.
setDoReIndex()
Force the update of index mappings @inheritDoc.
parseQueryString( $queryString, array $opts)
Parse query string and build the search query.
isLocalSuggestion(array $suggestion)
Determines if the suggestion returned by this TTMServer comes from this wiki or any other wiki.
expandLocation(array $suggestion)
Given suggestion returned by this TTMServer, constructs fully qualified URL to the location of the tr...
$updateMapping
Used for Reindex.
Class for pointing to messages, like Title class is for titles.
getGroup()
Get the primary MessageGroup this message belongs to.
getTitleForLanguage( $code)
Get the original title.
isValid()
Checks if the handle corresponds to a known message.
getGroupIds()
Returns all message group ids this message belongs to.
getTitle()
Get the original title.
getCode()
Returns the language code.
getTitleForBase()
Get the title for the page base.
Some general static methods for instantiating TTMServer and helpers.
Interface for TTMServer that can be queried (=all of them).
Interface for TTMServer that can act as backend for translation search.
Interface for TTMServer that can be updated.