11use Elastica\Aggregation\Terms;
14use Elastica\Exception\ExceptionInterface;
16use Elastica\Query\BoolQuery;
17use Elastica\Query\FunctionScore;
18use Elastica\Query\MatchQuery;
19use Elastica\Query\Term;
20use MediaWiki\Extension\Elastica\MWElasticUtils;
22use MediaWiki\Logger\LoggerFactory;
38 private const BULK_INDEX_RETRY_ATTEMPTS = 5;
45 private const WAIT_UNTIL_READY_TIMEOUT = 3600;
59 return $suggestion[
'wiki'] === WikiMap::getCurrentWikiId();
63 return $suggestion[
'uri'];
66 public function query( $sourceLanguage, $targetLanguage, $text ) {
68 return $this->doQuery( $sourceLanguage, $targetLanguage, $text );
69 }
catch ( Exception $e ) {
74 protected function doQuery( $sourceLanguage, $targetLanguage, $text ) {
78 throw new RuntimeException(
'The wikimedia extra plugin is mandatory.' );
84 $connection = $this->getClient()->getConnection();
85 $oldTimeout = $connection->getTimeout();
86 $connection->setTimeout( 10 );
89 $fuzzyQuery->setLikeText( $text );
90 $fuzzyQuery->addFields( [
'content' ] );
92 $boostQuery =
new FunctionScore();
93 $boostQuery->addFunction(
94 'levenshtein_distance_score',
100 $boostQuery->setBoostMode( FunctionScore::BOOST_MODE_REPLACE );
104 $bool =
new BoolQuery();
105 $bool->addFilter( $fuzzyQuery );
106 $bool->addMust( $boostQuery );
108 $languageFilter =
new Term();
109 $languageFilter->setTerm(
'language', $sourceLanguage );
110 $bool->addFilter( $languageFilter );
113 $query =
new Query();
114 $query->setQuery( $bool );
125 $sizeSecond = $sizeFirst * 5;
127 $query->setFrom( 0 );
128 $query->setSize( $sizeFirst );
129 $query->setParam(
'_source', [
'content' ] );
130 $cutoff = $this->config[
'cutoff'] ?? 0.65;
131 $query->setParam(
'min_score', $cutoff );
132 $query->setSort( [
'_score',
'wiki',
'localid' ] );
139 $contents = $scores = $terms = [];
141 $resultset = $this->getIndex()->search( $query );
143 if ( count( $resultset ) === 0 ) {
147 foreach ( $resultset->getResults() as $result ) {
148 $data = $result->getData();
149 $score = $result->getScore();
151 $sourceId = preg_replace(
'~/[^/]+$~',
'', $result->getId() );
152 $contents[$sourceId] = $data[
'content'];
153 $scores[$sourceId] = $score;
154 $terms[] =
"$sourceId/$targetLanguage";
161 if ( count( array_unique( $scores ) ) > 5 ) {
168 if ( count( $resultset ) === $sizeSecond ) {
174 $query->setParam(
'min_score', $score );
175 $query->setFrom( $query->getParam(
'size' ) + $query->getParam(
'from' ) );
176 $query->setSize( $sizeSecond );
179 }
while ( $resultset->getTotalHits() > count( $contents ) );
185 if ( $terms !== [] ) {
186 $idQuery =
new Query\Terms(
'_id', $terms );
188 $query =
new Query( $idQuery );
189 $query->setSize( 25 );
190 $query->setParam(
'_source', [
'wiki',
'uri',
'content',
'localid' ] );
191 $resultset = $this->getIndex()->search( $query );
193 foreach ( $resultset->getResults() as $result ) {
194 $data = $result->getData();
197 $sourceId = preg_replace(
'~/[^/]+$~',
'', $result->getId() );
200 'source' => $contents[$sourceId],
201 'target' => $data[
'content'],
202 'context' => $data[
'localid'],
203 'quality' => $scores[$sourceId],
204 'wiki' => $data[
'wiki'],
205 'location' => $data[
'localid'] .
'/' . $targetLanguage,
206 'uri' => $data[
'uri'],
211 uasort( $suggestions,
static function ( $a, $b ) {
212 if ( $a[
'quality'] === $b[
'quality'] ) {
216 return ( $a[
'quality'] < $b[
'quality'] ) ? 1 : -1;
220 $connection->setTimeout( $oldTimeout );
250 $sourceLanguage = $handle->
getGroup()->getSourceLanguage();
253 if ( $handle->
getCode() !== $sourceLanguage ) {
255 $this->deleteByQuery( $this->getIndex(), Query::create(
257 ->addFilter(
new Term( [
'wiki' => WikiMap::getCurrentWikiId() ] ) )
258 ->addFilter(
new Term( [
'language' => $handle->
getCode() ] ) )
259 ->addFilter(
new Term( [
'localid' => $localid ] ) ) ) );
263 if ( $targetText ===
null ) {
268 if ( $sourceLanguage ===
null ) {
276 $mwElasticUtilsClass = $this->getMWElasticUtilsClass();
277 $mwElasticUtilsClass::withRetry( self::BULK_INDEX_RETRY_ATTEMPTS,
278 function () use ( $doc ) {
279 $this->getIndex()->addDocuments( [ $doc ] );
281 static function ( $e, $errors ) use ( $fname ) {
282 $c = get_class( $e );
283 $msg = $e->getMessage();
284 error_log( $fname .
": update failed ($c: $msg); retrying." );
299 $language = $handle->
getCode();
302 $wiki = WikiMap::getCurrentWikiId();
303 $globalid =
"$wiki-$localid-$revId/$language";
307 'uri' => $handle->
getTitle()->getCanonicalURL(),
308 'localid' => $localid,
309 'language' => $language,
314 return new Document( $globalid, $data,
'_doc' );
325 'number_of_shards' => $this->getShardCount(),
329 'type' =>
'edge_ngram',
337 'tokenizer' =>
'standard',
338 'filter' => [
'lowercase',
'prefix_filter' ]
341 'tokenizer' =>
'standard'
348 $replicas = $this->getReplicaCount();
349 if ( strpos( $replicas,
'-' ) ===
false ) {
350 $indexSettings[
'settings'][
'index'][
'number_of_replicas'] = $replicas;
352 $indexSettings[
'settings'][
'index'][
'auto_expand_replicas'] = $replicas;
355 $this->getIndex()->create( $indexSettings, $rebuild );
364 $this->checkElasticsearchVersion();
365 $index = $this->getIndex();
366 if ( $this->updateMapping ) {
367 $this->logOutput(
'Updating the index mappings...' );
369 } elseif ( !$index->exists() ) {
373 $settings = $index->getSettings();
374 $settings->setRefreshInterval(
'-1' );
376 $this->deleteByQuery( $this->getIndex(), Query::create(
377 (
new Term() )->setTerm(
'wiki', WikiMap::getCurrentWikiId() ) ) );
380 'wiki' => [
'type' =>
'keyword' ],
381 'localid' => [
'type' =>
'keyword' ],
382 'uri' => [
'type' =>
'keyword' ],
383 'language' => [
'type' =>
'keyword' ],
384 'group' => [
'type' =>
'keyword' ],
390 'term_vector' =>
'yes'
392 'prefix_complete' => [
394 'analyzer' =>
'prefix',
395 'search_analyzer' =>
'standard',
396 'term_vector' =>
'yes'
398 'case_sensitive' => [
400 'analyzer' =>
'casesensitive',
401 'term_vector' =>
'yes'
406 if ( $this->useElastica6() ) {
409 $mapping = new \Elastica\Type\Mapping();
411 $mapping->setType( $index->getType(
'_doc' ) );
413 $mapping->setProperties( $properties );
415 $mapping->send( [
'include_type_name' =>
'true' ] );
418 $mapping = new \Elastica\Mapping( $properties );
419 $mapping->send( $index, [
'include_type_name' =>
'false' ] );
422 $this->waitUntilReady();
434 $lb =
new LinkBatch();
435 foreach ( $batch as $data ) {
436 $lb->addObj( $data[0]->getTitle() );
445 foreach ( $batch as $data ) {
446 [ $handle, $sourceLanguage, $text ] = $data;
447 $revId = $handle->getTitleForLanguage( $sourceLanguage )->getLatestRevID();
451 $mwElasticUtilsClass = $this->getMWElasticUtilsClass();
452 $mwElasticUtilsClass::withRetry( self::BULK_INDEX_RETRY_ATTEMPTS,
453 function () use ( $docs ) {
454 $this->getIndex()->addDocuments( $docs );
456 function ( $e, $errors ) {
457 $c = get_class( $e );
458 $msg = $e->getMessage();
459 $this->logOutput(
"Batch failed ($c: $msg), trying again in 10 seconds" );
470 $index = $this->getIndex();
472 $index->forcemerge();
473 $index->getSettings()->setRefreshInterval(
'5s' );
476 public function getClient() {
477 if ( !$this->client ) {
478 if ( isset( $this->config[
'config'] ) ) {
479 $this->client =
new Client( $this->config[
'config'] );
481 $this->client =
new Client();
484 return $this->client;
489 return isset( $this->config[
'use_wikimedia_extra'] ) && $this->config[
'use_wikimedia_extra'];
493 private function getIndexName() {
494 return $this->config[
'index'] ??
'ttmserver';
497 public function getIndex() {
498 return $this->getClient()
499 ->getIndex( $this->getIndexName() );
502 protected function getShardCount() {
503 return $this->config[
'shards'] ?? 1;
506 protected function getReplicaCount() {
507 return $this->config[
'replicas'] ??
'0-2';
519 $path =
"_cluster/health/$indexName";
520 $response = $this->getClient()->request( $path );
521 if ( $response->hasError() ) {
522 throw new Exception(
"Error while fetching index health status: " . $response->getError() );
524 return $response->getData();
543 while ( ( $startTime + $timeout ) > time() ) {
546 $status = $response[
'status'] ??
'unknown';
547 if ( $status ===
'green' ) {
548 $this->logOutput(
"\tGreen!" );
551 $this->logOutput(
"\tIndex is $status retrying..." );
553 }
catch ( Exception $e ) {
554 $this->logOutput(
"Error while waiting for green ({$e->getMessage()}), retrying..." );
560 protected function waitUntilReady() {
561 $mwElasticUtilsClass = $this->getMWElasticUtilsClass();
562 $statuses = $mwElasticUtilsClass::waitForGreen(
564 $this->getIndexName(),
565 self::WAIT_UNTIL_READY_TIMEOUT );
566 $this->logOutput(
"Waiting for the index to go green..." );
567 foreach ( $statuses as $message ) {
568 $this->logOutput( $message );
571 if ( !$statuses->getReturn() ) {
572 die(
"Timeout! Please check server logs for {$this->getIndexName()}." );
576 public function setLogger(
$logger ) {
581 protected function logOutput( $text ) {
582 if ( $this->logger ) {
583 $this->logger->statusLine(
"$text\n" );
592 $this->updateMapping =
true;
602 $fields = $highlights = [];
603 $terms = preg_split(
'/\s+/', $queryString );
604 $match = $opts[
'match'];
605 $case = $opts[
'case'];
608 foreach ( $terms as $term ) {
609 $prefix = strstr( $term,
'*',
true );
612 $fields[
'content.prefix_complete'][] = $prefix;
613 } elseif ( $case ===
'1' ) {
615 $fields[
'content.case_sensitive'][] = $term;
617 $fields[
'content'][] = $term;
623 $searchQuery =
new BoolQuery();
624 foreach ( $fields as $analyzer => $words ) {
625 foreach ( $words as $word ) {
626 $boolQuery =
new BoolQuery();
627 $contentQuery =
new MatchQuery();
628 $contentQuery->setFieldQuery( $analyzer, $word );
629 $boolQuery->addShould( $contentQuery );
630 $messageQuery =
new Term();
631 $messageQuery->setTerm(
'localid', $word );
632 $boolQuery->addShould( $messageQuery );
634 if ( $match ===
'all' ) {
635 $searchQuery->addMust( $boolQuery );
637 $searchQuery->addShould( $boolQuery );
641 $highlights[$analyzer] = [
642 'number_of_fragments' => 0
647 $title = Title::newFromText( $word );
652 if ( $handle->isValid() && $handle->getCode() !==
'' ) {
653 $localid = $handle->getTitleForBase()->getPrefixedText();
654 $boolQuery =
new BoolQuery();
655 $messageId =
new Term();
656 $messageId->setTerm(
'localid', $localid );
657 $boolQuery->addMust( $messageId );
658 $searchQuery->addShould( $boolQuery );
663 return [ $searchQuery, $highlights ];
674 $query =
new Query();
676 [ $searchQuery, $highlights ] = $this->
parseQueryString( $queryString, $opts );
677 $query->setQuery( $searchQuery );
679 $language =
new Terms(
'language' );
680 $language->setField(
'language' );
681 $language->setSize( 500 );
682 $query->addAggregation( $language );
684 $group =
new Terms(
'group' );
685 $group->setField(
'group' );
688 $group->setSize( 500 );
689 $query->addAggregation( $group );
691 $query->setSize( $opts[
'limit'] );
692 $query->setFrom( $opts[
'offset'] );
698 $filters =
new BoolQuery();
700 $language = $opts[
'language'];
701 if ( $language !==
'' ) {
702 $languageFilter =
new Term();
703 $languageFilter->setTerm(
'language', $language );
704 $filters->addFilter( $languageFilter );
707 $group = $opts[
'group'];
708 if ( $group !==
'' ) {
709 $groupFilter =
new Term();
710 $groupFilter->setTerm(
'group', $group );
711 $filters->addFilter( $groupFilter );
715 if ( $language !==
'' || $group !==
'' ) {
719 $query->setPostFilter( $filters );
722 [ $pre, $post ] = $highlight;
723 $query->setHighlight( [
725 'pre_tags' => [ $pre ],
726 'post_tags' => [ $post ],
727 'fields' => $highlights,
730 return $this->getIndex()->createSearch( $query );
741 public function search( $queryString, $opts, $highlight ) {
742 $search = $this->
createSearch( $queryString, $opts, $highlight );
745 return $search->search();
746 }
catch ( ExceptionInterface $e ) {
756 $aggs = $resultset->getAggregations();
757 '@phan-var array[][][] $aggs';
764 foreach ( $aggs as $type => $info ) {
765 foreach ( $info[
'buckets'] as $row ) {
766 $ret[$type][$row[
'key']] = $row[
'doc_count'];
778 return $resultset->getTotalHits();
787 foreach ( $resultset->getResults() as $document ) {
788 $data = $document->getData();
789 $hl = $document->getHighlights();
790 if ( isset( $hl[
'content.prefix_complete'][0] ) ) {
791 $data[
'content'] = $hl[
'content.prefix_complete'][0];
792 } elseif ( isset( $hl[
'content.case_sensitive'][0] ) ) {
793 $data[
'content'] = $hl[
'content.case_sensitive'][0];
794 } elseif ( isset( $hl[
'content'][0] ) ) {
795 $data[
'content'] = $hl[
'content'][0];
812 private function deleteByQuery( \Elastica\Index $index, Query $query ) {
814 $mwElasticUtilsClass = $this->getMWElasticUtilsClass();
815 $mwElasticUtilsClass::deleteByQuery( $index, $query,
true );
816 }
catch ( Exception $e ) {
817 LoggerFactory::getInstance(
'ElasticSearchTTMServer' )->error(
818 'Problem encountered during deletion.',
819 [
'exception' => $e ]
822 throw new RuntimeException(
"Problem encountered during deletion.\n" . $e );
831 private function getMWElasticUtilsClass(): string {
832 if ( class_exists( MWElasticUtils::class ) ) {
833 return MWElasticUtils::class;
835 return '\MWElasticUtils';
840 private function getElasticsearchVersion(): string {
841 $response = $this->getClient()->request(
'' );
842 if ( !$response->isOK() ) {
843 throw new \RuntimeException(
"Cannot fetch elasticsearch version: " . $response->getError() );
846 $result = $response->getData();
847 if ( !isset( $result[
'version'][
'number'] ) ) {
848 throw new \RuntimeException(
'Unable to determine elasticsearch version, aborting.' );
851 return $result[
'version' ][
'number' ];
854 private function checkElasticsearchVersion() {
855 $version = $this->getElasticsearchVersion();
856 if ( strpos( $version,
'6.8' ) !== 0 && strpos( $version,
'7.' ) !== 0 ) {
857 throw new \RuntimeException(
"Only Elasticsearch 6.8.x and 7.x are supported. Your version: $version." );
861 private function useElastica6(): bool {
862 return class_exists(
'\Elastica\Type' );
return[ 'Translate:ConfigHelper'=> static function():ConfigHelper { return new ConfigHelper();}, 'Translate:CsvTranslationImporter'=> static function(MediaWikiServices $services):CsvTranslationImporter { return new CsvTranslationImporter( $services->getWikiPageFactory());}, 'Translate:EntitySearch'=> static function(MediaWikiServices $services):EntitySearch { return new EntitySearch($services->getMainWANObjectCache(), $services->getCollationFactory() ->makeCollation( 'uca-default-u-kn'), MessageGroups::singleton(), $services->getNamespaceInfo(), $services->get( 'Translate:MessageIndex'), $services->getTitleParser(), $services->getTitleFormatter());}, 'Translate:ExternalMessageSourceStateImporter'=> static function(MediaWikiServices $services):ExternalMessageSourceStateImporter { return new ExternalMessageSourceStateImporter($services->getMainConfig(), $services->get( 'Translate:GroupSynchronizationCache'), $services->getJobQueueGroup(), LoggerFactory::getInstance( 'Translate.GroupSynchronization'), MessageIndex::singleton());}, 'Translate:GroupSynchronizationCache'=> static function(MediaWikiServices $services):GroupSynchronizationCache { return new GroupSynchronizationCache( $services->get( 'Translate:PersistentCache'));}, 'Translate:MessageBundleStore'=> static function(MediaWikiServices $services):MessageBundleStore { return new MessageBundleStore(new RevTagStore(), $services->getJobQueueGroup(), $services->getLanguageNameUtils(), $services->get( 'Translate:MessageIndex'));}, 'Translate:MessageGroupReview'=> static function(MediaWikiServices $services):MessageGroupReview { return new MessageGroupReview($services->getDBLoadBalancer(), $services->getHookContainer());}, 'Translate:MessageIndex'=> static function(MediaWikiServices $services):MessageIndex { $params=$services->getMainConfig() ->get( 'TranslateMessageIndex');if(is_string( $params)) { $params=(array) $params;} $class=array_shift( $params);return new $class( $params);}, 'Translate:ParsingPlaceholderFactory'=> static function():ParsingPlaceholderFactory { return new ParsingPlaceholderFactory();}, 'Translate:PersistentCache'=> static function(MediaWikiServices $services):PersistentCache { return new PersistentDatabaseCache($services->getDBLoadBalancer(), $services->getJsonCodec());}, 'Translate:ProgressStatsTableFactory'=> static function(MediaWikiServices $services):ProgressStatsTableFactory { return new ProgressStatsTableFactory($services->getLinkRenderer(), $services->get( 'Translate:ConfigHelper'));}, 'Translate:SubpageListBuilder'=> static function(MediaWikiServices $services):SubpageListBuilder { return new SubpageListBuilder($services->get( 'Translate:TranslatableBundleFactory'), $services->getLinkBatchFactory());}, 'Translate:TranslatableBundleFactory'=> static function(MediaWikiServices $services):TranslatableBundleFactory { return new TranslatableBundleFactory($services->get( 'Translate:TranslatablePageStore'), $services->get( 'Translate:MessageBundleStore'));}, 'Translate:TranslatableBundleMover'=> static function(MediaWikiServices $services):TranslatableBundleMover { return new TranslatableBundleMover($services->getMovePageFactory(), $services->getJobQueueGroup(), $services->getLinkBatchFactory(), $services->get( 'Translate:TranslatableBundleFactory'), $services->get( 'Translate:SubpageListBuilder'), $services->getMainConfig() ->get( 'TranslatePageMoveLimit'));}, 'Translate:TranslatablePageParser'=> static function(MediaWikiServices $services):TranslatablePageParser { return new TranslatablePageParser($services->get( 'Translate:ParsingPlaceholderFactory'));}, 'Translate:TranslatablePageStore'=> static function(MediaWikiServices $services):TranslatablePageStore { return new TranslatablePageStore($services->get( 'Translate:MessageIndex'), $services->getJobQueueGroup(), new RevTagStore(), $services->getDBLoadBalancer());}, 'Translate:TranslationStashReader'=> static function(MediaWikiServices $services):TranslationStashReader { $db=$services->getDBLoadBalancer() ->getConnectionRef(DB_REPLICA);return new TranslationStashStorage( $db);}, 'Translate:TranslationStatsDataProvider'=> static function(MediaWikiServices $services):TranslationStatsDataProvider { return new TranslationStatsDataProvider(new ServiceOptions(TranslationStatsDataProvider::CONSTRUCTOR_OPTIONS, $services->getMainConfig()), $services->getObjectFactory());}, 'Translate:TranslationUnitStoreFactory'=> static function(MediaWikiServices $services):TranslationUnitStoreFactory { return new TranslationUnitStoreFactory( $services->getDBLoadBalancer());}, 'Translate:TranslatorActivity'=> static function(MediaWikiServices $services):TranslatorActivity { $query=new TranslatorActivityQuery($services->getMainConfig(), $services->getDBLoadBalancer());return new TranslatorActivity($services->getMainObjectStash(), $query, $services->getJobQueueGroup());}, 'Translate:TtmServerFactory'=> static function(MediaWikiServices $services):TtmServerFactory { $config=$services->getMainConfig();$default=$config->get( 'TranslateTranslationDefaultService');if( $default===false) { $default=null;} return new TtmServerFactory( $config->get( 'TranslateTranslationServices'), $default);}]
@phpcs-require-sorted-array
TTMServer backed based on ElasticSearch.
createDocument(MessageHandle $handle, $text, $revId)
update(MessageHandle $handle, $targetText)
Add / update translations.
batchInsertTranslations(array $batch)
Called multiple times per batch if necessary.
$logger
Reference to the maintenance script to relay logging output.
query( $sourceLanguage, $targetLanguage, $text)
Fetches all relevant suggestions for given text.
batchInsertDefinitions(array $batch)
endBatch()
Called before every batch (MessageGroup).
endBootstrap()
Do any cleanup, optimizing etc.
waitForGreen( $indexName, $timeout)
Wait for the index to go green.
search( $queryString, $opts, $highlight)
Search interface.
beginBootstrap()
Begin the bootstrap process.
createIndex( $rebuild)
Create index.
getTotalHits( $resultset)
getDocuments( $resultset)
useWikimediaExtraPlugin()
getIndexHealth( $indexName)
Get index health TODO: Remove this code in the future as we drop support for older versions of the El...
beginBatch()
Called before every batch (MessageGroup).
createSearch( $queryString, $opts, $highlight)
Search interface.
setDoReIndex()
Force the update of index mappings @inheritDoc.
parseQueryString( $queryString, array $opts)
Parse query string and build the search query.
isLocalSuggestion(array $suggestion)
Determines if the suggestion returned by this TTMServer comes from this wiki or any other wiki.
expandLocation(array $suggestion)
Given suggestion returned by this TTMServer, constructs fully qualified URL to the location of the tr...
$updateMapping
Used for Reindex.
Class for pointing to messages, like Title class is for titles.
getGroup()
Get the primary MessageGroup this message belongs to.
getTitleForLanguage( $code)
Get the original title.
isValid()
Checks if the handle corresponds to a known message.
getGroupIds()
Returns all message group ids this message belongs to.
getTitle()
Get the original title.
getCode()
Returns the language code.
getTitleForBase()
Get the title for the page base.
Some general static methods for instantiating TTMServer and helpers.
Interface for TTMServer that can be queried (=all of them).
Interface for TTMServer that can act as backend for translation search.
Interface for TTMServer that can be updated.