Translate extension for MediaWiki
 
Loading...
Searching...
No Matches
DatabaseTtmServer.php
1<?php
2declare( strict_types = 1 );
3
4namespace MediaWiki\Extension\Translate\TtmServer;
5
6use MediaWiki\MediaWikiServices;
8use Title;
9use TTMServer;
10use WikiMap;
11use Wikimedia\Rdbms\DBQueryError;
12use Wikimedia\Rdbms\IDatabase;
13use Wikimedia\Rdbms\IResultWrapper;
14
23 private array $sids;
24
25 private function getDB( int $mode = DB_REPLICA ): IDatabase {
26 return MediaWikiServices::getInstance()->getDBLoadBalancer()->getConnection(
27 $mode, 'ttmserver', $this->config['database']
28 );
29 }
30
31 public function update( MessageHandle $handle, ?string $targetText ): bool {
32 if ( !$handle->isValid() || $handle->getCode() === '' ) {
33 return false;
34 }
35
36 $mKey = $handle->getKey();
37 $group = $handle->getGroup();
38 $targetLanguage = $handle->getCode();
39 $sourceLanguage = $group->getSourceLanguage();
40
41 // Skip definitions to not slow down mass imports etc.
42 // These will be added when the first translation is made
43 if ( $targetLanguage === $sourceLanguage ) {
44 return false;
45 }
46
47 $definition = $group->getMessage( $mKey, $sourceLanguage );
48 if ( !is_string( $definition ) || !strlen( trim( $definition ) ) ) {
49 return false;
50 }
51
52 $context = Title::makeTitle( $handle->getTitle()->getNamespace(), $mKey );
53 $dbw = $this->getDB( DB_PRIMARY );
54 /* Check that the definition exists and fetch the sid. If not, add
55 * the definition and retrieve the sid. If the definition changes,
56 * we will create a new entry - otherwise we could at some point
57 * get suggestions which do not match the original definition any
58 * longer. The old translations are still kept until purged by
59 * rerunning the bootstrap script. */
60 $conditions = [
61 'tms_context' => $context->getPrefixedText(),
62 'tms_text' => $definition,
63 ];
64
65 $sid = $dbw->selectField( 'translate_tms', 'tms_sid', $conditions, __METHOD__ );
66 if ( $sid === false ) {
67 $sid = $this->insertSource( $context, $sourceLanguage, $definition );
68 }
69
70 // Delete old translations for this message if any. Could also use replace
71 $deleteConditions = [
72 'tmt_sid' => $sid,
73 'tmt_lang' => $targetLanguage,
74 ];
75 $dbw->delete( 'translate_tmt', $deleteConditions, __METHOD__ );
76
77 // Insert the new translation
78 if ( $targetText !== null ) {
79 $row = $deleteConditions + [
80 'tmt_text' => $targetText,
81 ];
82
83 $dbw->insert( 'translate_tmt', $row, __METHOD__ );
84 }
85
86 return true;
87 }
88
89 private function insertSource( Title $context, string $sourceLanguage, string $text ): int {
90 $row = [
91 'tms_lang' => $sourceLanguage,
92 'tms_len' => mb_strlen( $text ),
93 'tms_text' => $text,
94 'tms_context' => $context->getPrefixedText(),
95 ];
96
97 $dbw = $this->getDB( DB_PRIMARY );
98 $dbw->insert( 'translate_tms', $row, __METHOD__ );
99 $sid = $dbw->insertId();
100
101 $fulltext = $this->filterForFulltext( $sourceLanguage, $text );
102 if ( count( $fulltext ) ) {
103 $row = [
104 'tmf_sid' => $sid,
105 'tmf_text' => implode( ' ', $fulltext ),
106 ];
107 $dbw->insert( 'translate_tmf', $row, __METHOD__ );
108 }
109
110 return $sid;
111 }
112
114 protected function filterForFulltext( string $languageCode, string $input ): array {
115 $lang = MediaWikiServices::getInstance()->getLanguageFactory()->getLanguage( $languageCode );
116
117 $text = preg_replace( '/[^[:alnum:]]/u', ' ', $input );
118 $text = $lang->segmentByWord( $text );
119 $text = $lang->lc( $text );
120 $segments = preg_split( '/\s+/', $text, -1, PREG_SPLIT_NO_EMPTY );
121 if ( count( $segments ) < 4 ) {
122 return [];
123 }
124
125 foreach ( $segments as $i => $segment ) {
126 // Yes strlen
127 $len = strlen( $segment );
128 if ( $len < 4 || $len > 15 ) {
129 unset( $segments[$i] );
130 }
131 }
132
133 return array_slice( array_unique( $segments ), 0, 10 );
134 }
135
136 public function beginBootstrap(): void {
137 $dbw = $this->getDB( DB_PRIMARY );
138 $dbw->delete( 'translate_tms', '*', __METHOD__ );
139 $dbw->delete( 'translate_tmt', '*', __METHOD__ );
140 $dbw->delete( 'translate_tmf', '*', __METHOD__ );
141 $table = $dbw->tableName( 'translate_tmf' );
142 try {
143 $dbw->query( "DROP INDEX tmf_text ON $table", __METHOD__ );
144 } catch ( DBQueryError $e ) {
145 // Perhaps the script was aborted before it got
146 // chance to add the index back.
147 }
148 }
149
150 public function beginBatch(): void {
151 $this->sids = [];
152 }
153
154 public function batchInsertDefinitions( array $batch ): void {
155 $mwInstance = MediaWikiServices::getInstance();
156 $titleFactory = $mwInstance->getTitleFactory();
157 foreach ( $batch as $key => $item ) {
158 [ $handle, $language, $text ] = $item;
159 $context = $titleFactory->makeTitle( $handle->getTitle()->getNamespace(), $handle->getKey() );
160 $this->sids[$key] = $this->insertSource( $context, $language, $text );
161 }
162
163 $mwInstance->getDBLoadBalancerFactory()->waitForReplication( [ 'ifWritesSince' => 10 ] );
164 }
165
166 public function batchInsertTranslations( array $batch ): void {
167 $rows = [];
168 foreach ( $batch as $key => $data ) {
169 [ , $language, $text ] = $data;
170 $rows[] = [
171 'tmt_sid' => $this->sids[$key],
172 'tmt_lang' => $language,
173 'tmt_text' => $text,
174 ];
175 }
176
177 $dbw = $this->getDB( DB_PRIMARY );
178 $dbw->insert( 'translate_tmt', $rows, __METHOD__ );
179
180 MediaWikiServices::getInstance()
181 ->getDBLoadBalancerFactory()
182 ->waitForReplication( [ 'ifWritesSince' => 10 ] );
183 }
184
185 public function endBatch(): void {
186 }
187
188 public function endBootstrap(): void {
189 $dbw = $this->getDB( DB_PRIMARY );
190 $table = $dbw->tableName( 'translate_tmf' );
191 $dbw->query( "CREATE FULLTEXT INDEX tmf_text ON $table (tmf_text)", __METHOD__ );
192 }
193
194 /* Reading interface */
195
196 public function isLocalSuggestion( array $suggestion ): bool {
197 return true;
198 }
199
200 public function expandLocation( array $suggestion ): string {
201 return Title::newFromText( $suggestion['location'] )->getCanonicalURL();
202 }
203
204 public function query( string $sourceLanguage, string $targetLanguage, string $text ): array {
205 // Calculate the bounds of the string length which are able
206 // to satisfy the cutoff percentage in edit distance.
207 $len = mb_strlen( $text );
208 $min = ceil( max( $len * $this->config['cutoff'], 2 ) );
209 $max = floor( $len / $this->config['cutoff'] );
210
211 // We could use fulltext index to narrow the results further
212 $dbr = $this->getDB();
213 $tables = [ 'translate_tmt', 'translate_tms' ];
214 $fields = [ 'tms_context', 'tms_text', 'tmt_lang', 'tmt_text' ];
215
216 $conditions = [
217 'tms_lang' => $sourceLanguage,
218 'tmt_lang' => $targetLanguage,
219 "tms_len BETWEEN $min AND $max",
220 'tms_sid = tmt_sid',
221 ];
222
223 $fulltext = $this->filterForFulltext( $sourceLanguage, $text );
224 if ( $fulltext ) {
225 $tables[] = 'translate_tmf';
226 $list = implode( ' ', $fulltext );
227 $conditions[] = 'tmf_sid = tmt_sid';
228 $conditions[] = "MATCH(tmf_text) AGAINST( '$list' )";
229 }
230
231 $res = $dbr->newSelectQueryBuilder()
232 ->tables( $tables )
233 ->select( $fields )
234 ->where( $conditions )
235 ->caller( __METHOD__ )
236 ->fetchResultSet();
237
238 return $this->processQueryResults( $res, $text, $targetLanguage );
239 }
240
241 private function processQueryResults( IResultWrapper $res, string $text, string $targetLanguage ): array {
242 $timeLimit = microtime( true ) + 5;
243
244 $lenA = mb_strlen( $text );
245 $results = [];
246 foreach ( $res as $row ) {
247 if ( microtime( true ) > $timeLimit ) {
248 // Having no suggestions is better than preventing translation
249 // altogether by timing out the request :(
250 break;
251 }
252
253 $a = $text;
254 $b = $row->tms_text;
255 $lenB = mb_strlen( $b );
256 $len = min( $lenA, $lenB );
257 if ( $len > 600 ) {
258 // two strings of length 1500 ~ 10s
259 // two strings of length 2250 ~ 30s
260 $dist = $len;
261 } else {
262 $dist = self::levenshtein( $a, $b, $lenA, $lenB );
263 }
264 $quality = 1 - ( $dist * 0.9 / $len );
265
266 if ( $quality >= $this->config['cutoff'] ) {
267 $results[] = [
268 'source' => $row->tms_text,
269 'target' => $row->tmt_text,
270 'context' => $row->tms_context,
271 'location' => $row->tms_context . '/' . $targetLanguage,
272 'quality' => $quality,
273 'wiki' => $row->tms_wiki ?? WikiMap::getCurrentWikiId(),
274 ];
275 }
276 }
277
278 return TTMServer::sortSuggestions( $results );
279 }
280
281 public function setDoReIndex(): void {
282 }
283}
return[ 'Translate:ConfigHelper'=> static function():ConfigHelper { return new ConfigHelper();}, 'Translate:CsvTranslationImporter'=> static function(MediaWikiServices $services):CsvTranslationImporter { return new CsvTranslationImporter( $services->getWikiPageFactory());}, 'Translate:EntitySearch'=> static function(MediaWikiServices $services):EntitySearch { return new EntitySearch($services->getMainWANObjectCache(), $services->getCollationFactory() ->makeCollation( 'uca-default-u-kn'), MessageGroups::singleton(), $services->getNamespaceInfo(), $services->get( 'Translate:MessageIndex'), $services->getTitleParser(), $services->getTitleFormatter());}, 'Translate:ExternalMessageSourceStateImporter'=> static function(MediaWikiServices $services):ExternalMessageSourceStateImporter { return new ExternalMessageSourceStateImporter($services->getMainConfig(), $services->get( 'Translate:GroupSynchronizationCache'), $services->getJobQueueGroup(), LoggerFactory::getInstance( 'Translate.GroupSynchronization'), $services->get( 'Translate:MessageIndex'));}, 'Translate:FileFormatFactory'=> static function(MediaWikiServices $services):FileFormatFactory { return new FileFormatFactory( $services->getObjectFactory());}, 'Translate:GroupSynchronizationCache'=> static function(MediaWikiServices $services):GroupSynchronizationCache { return new GroupSynchronizationCache( $services->get( 'Translate:PersistentCache'));}, 'Translate:HookRunner'=> static function(MediaWikiServices $services):HookRunner { return new HookRunner( $services->getHookContainer());}, 'Translate:MessageBundleStore'=> static function(MediaWikiServices $services):MessageBundleStore { return new MessageBundleStore($services->get( 'Translate:RevTagStore'), $services->getJobQueueGroup(), $services->getLanguageNameUtils(), $services->get( 'Translate:MessageIndex'));}, 'Translate:MessageGroupReviewStore'=> static function(MediaWikiServices $services):MessageGroupReviewStore { return new MessageGroupReviewStore($services->getDBLoadBalancer(), $services->get( 'Translate:HookRunner'));}, 'Translate:MessageGroupStatsTableFactory'=> static function(MediaWikiServices $services):MessageGroupStatsTableFactory { return new MessageGroupStatsTableFactory($services->get( 'Translate:ProgressStatsTableFactory'), $services->getDBLoadBalancer(), $services->getLinkRenderer(), $services->get( 'Translate:MessageGroupReviewStore'), $services->getMainConfig() ->get( 'TranslateWorkflowStates') !==false);}, 'Translate:MessageIndex'=> static function(MediaWikiServices $services):MessageIndex { $params=$services->getMainConfig() ->get( 'TranslateMessageIndex');if(is_string( $params)) { $params=(array) $params;} $class=array_shift( $params);return new $class( $params);}, 'Translate:MessagePrefixStats'=> static function(MediaWikiServices $services):MessagePrefixStats { return new MessagePrefixStats( $services->getTitleParser());}, 'Translate:ParsingPlaceholderFactory'=> static function():ParsingPlaceholderFactory { return new ParsingPlaceholderFactory();}, 'Translate:PersistentCache'=> static function(MediaWikiServices $services):PersistentCache { return new PersistentDatabaseCache($services->getDBLoadBalancer(), $services->getJsonCodec());}, 'Translate:ProgressStatsTableFactory'=> static function(MediaWikiServices $services):ProgressStatsTableFactory { return new ProgressStatsTableFactory($services->getLinkRenderer(), $services->get( 'Translate:ConfigHelper'));}, 'Translate:RevTagStore'=> static function(MediaWikiServices $services):RevTagStore { return new RevTagStore($services->getDBLoadBalancerFactory());}, 'Translate:SubpageListBuilder'=> static function(MediaWikiServices $services):SubpageListBuilder { return new SubpageListBuilder($services->get( 'Translate:TranslatableBundleFactory'), $services->getLinkBatchFactory());}, 'Translate:TranslatableBundleExporter'=> static function(MediaWikiServices $services):TranslatableBundleExporter { return new TranslatableBundleExporter($services->get( 'Translate:SubpageListBuilder'), $services->getWikiExporterFactory(), $services->getDBLoadBalancer());}, 'Translate:TranslatableBundleFactory'=> static function(MediaWikiServices $services):TranslatableBundleFactory { return new TranslatableBundleFactory($services->get( 'Translate:TranslatablePageStore'), $services->get( 'Translate:MessageBundleStore'));}, 'Translate:TranslatableBundleImporter'=> static function(MediaWikiServices $services):TranslatableBundleImporter { return new TranslatableBundleImporter($services->getWikiImporterFactory(), $services->get( 'Translate:TranslatablePageParser'), $services->getRevisionLookup());}, 'Translate:TranslatableBundleMover'=> static function(MediaWikiServices $services):TranslatableBundleMover { return new TranslatableBundleMover($services->getMovePageFactory(), $services->getJobQueueGroup(), $services->getLinkBatchFactory(), $services->get( 'Translate:TranslatableBundleFactory'), $services->get( 'Translate:SubpageListBuilder'), $services->getMainConfig() ->get( 'TranslatePageMoveLimit'));}, 'Translate:TranslatableBundleStatusStore'=> static function(MediaWikiServices $services):TranslatableBundleStatusStore { return new TranslatableBundleStatusStore($services->getDBLoadBalancer() ->getConnection(DB_PRIMARY), $services->getCollationFactory() ->makeCollation( 'uca-default-u-kn'), $services->getDBLoadBalancer() ->getMaintenanceConnectionRef(DB_PRIMARY));}, 'Translate:TranslatablePageParser'=> static function(MediaWikiServices $services):TranslatablePageParser { return new TranslatablePageParser($services->get( 'Translate:ParsingPlaceholderFactory'));}, 'Translate:TranslatablePageStore'=> static function(MediaWikiServices $services):TranslatablePageStore { return new TranslatablePageStore($services->get( 'Translate:MessageIndex'), $services->getJobQueueGroup(), $services->get( 'Translate:RevTagStore'), $services->getDBLoadBalancer(), $services->get( 'Translate:TranslatableBundleStatusStore'), $services->get( 'Translate:TranslatablePageParser'),);}, 'Translate:TranslationStashReader'=> static function(MediaWikiServices $services):TranslationStashReader { $db=$services->getDBLoadBalancer() ->getConnection(DB_REPLICA);return new TranslationStashStorage( $db);}, 'Translate:TranslationStatsDataProvider'=> static function(MediaWikiServices $services):TranslationStatsDataProvider { return new TranslationStatsDataProvider(new ServiceOptions(TranslationStatsDataProvider::CONSTRUCTOR_OPTIONS, $services->getMainConfig()), $services->getObjectFactory(), $services->getDBLoadBalancer());}, 'Translate:TranslationUnitStoreFactory'=> static function(MediaWikiServices $services):TranslationUnitStoreFactory { return new TranslationUnitStoreFactory( $services->getDBLoadBalancer());}, 'Translate:TranslatorActivity'=> static function(MediaWikiServices $services):TranslatorActivity { $query=new TranslatorActivityQuery($services->getMainConfig(), $services->getDBLoadBalancer());return new TranslatorActivity($services->getMainObjectStash(), $query, $services->getJobQueueGroup());}, 'Translate:TtmServerFactory'=> static function(MediaWikiServices $services):TtmServerFactory { $config=$services->getMainConfig();$default=$config->get( 'TranslateTranslationDefaultService');if( $default===false) { $default=null;} return new TtmServerFactory( $config->get( 'TranslateTranslationServices'), $default);}]
@phpcs-require-sorted-array
MySQL/MariaDB-based based backend for translation memory.
setDoReIndex()
Instruct the service to fully wipe the index and start from scratch.
expandLocation(array $suggestion)
Given suggestion returned by this TTMServer, constructs fully qualified URL to the location of the tr...
batchInsertDefinitions(array $batch)
Called multiple times per batch if necessary.
update(MessageHandle $handle, ?string $targetText)
Shovels the new translation into translation memory.
endBatch()
Called after every batch (MessageGroup).
query(string $sourceLanguage, string $targetLanguage, string $text)
Fetches all relevant suggestions for given text.
filterForFulltext(string $languageCode, string $input)
Tokenizes the text for fulltext search.
beginBatch()
Called before every batch (MessageGroup).
isLocalSuggestion(array $suggestion)
Determines if the suggestion returned by this TTMServer comes from this wiki or any other wiki.
beginBootstrap()
Called when starting to fill the translation memory.
batchInsertTranslations(array $batch)
Called multiple times per batch if necessary.
Class for pointing to messages, like Title class is for titles.
Some general static methods for instantiating TTMServer and helpers.
Definition TTMServer.php:19
static sortSuggestions(array $suggestions)
Definition TTMServer.php:32
Interface for TTMServer that can be queried (=all of them).
Interface for TTMServer that can be updated.