Translate extension for MediaWiki
 
Loading...
Searching...
No Matches
DatabaseTTMServer.php
Go to the documentation of this file.
1<?php
12use MediaWiki\MediaWikiServices;
13use Wikimedia\Rdbms\DBQueryError;
14
21 protected $sids;
22
27 protected function getDB( $mode = DB_REPLICA ) {
28 return wfGetDB( $mode, 'ttmserver', $this->config['database'] );
29 }
30
31 public function update( MessageHandle $handle, $targetText ) {
32 if ( !$handle->isValid() || $handle->getCode() === '' ) {
33 return false;
34 }
35
36 $mkey = $handle->getKey();
37 $group = $handle->getGroup();
38 $targetLanguage = $handle->getCode();
39 $sourceLanguage = $group->getSourceLanguage();
40
41 // Skip definitions to not slow down mass imports etc.
42 // These will be added when the first translation is made
43 if ( $targetLanguage === $sourceLanguage ) {
44 return false;
45 }
46
47 $definition = $group->getMessage( $mkey, $sourceLanguage );
48 if ( !is_string( $definition ) || !strlen( trim( $definition ) ) ) {
49 return false;
50 }
51
52 $context = Title::makeTitle( $handle->getTitle()->getNamespace(), $mkey );
53 $dbw = $this->getDB( DB_PRIMARY );
54 /* Check that the definition exists and fetch the sid. If not, add
55 * the definition and retrieve the sid. If the definition changes,
56 * we will create a new entry - otherwise we could at some point
57 * get suggestions which do not match the original definition any
58 * longer. The old translations are still kept until purged by
59 * rerunning the bootstrap script. */
60 $conds = [
61 'tms_context' => $context->getPrefixedText(),
62 'tms_text' => $definition,
63 ];
64
65 $sid = $dbw->selectField( 'translate_tms', 'tms_sid', $conds, __METHOD__ );
66 if ( $sid === false ) {
67 $sid = $this->insertSource( $context, $sourceLanguage, $definition );
68 }
69
70 // Delete old translations for this message if any. Could also use replace
71 $deleteConds = [
72 'tmt_sid' => $sid,
73 'tmt_lang' => $targetLanguage,
74 ];
75 $dbw->delete( 'translate_tmt', $deleteConds, __METHOD__ );
76
77 // Insert the new translation
78 if ( $targetText !== null ) {
79 $row = $deleteConds + [
80 'tmt_text' => $targetText,
81 ];
82
83 $dbw->insert( 'translate_tmt', $row, __METHOD__ );
84 }
85
86 return true;
87 }
88
89 protected function insertSource( Title $context, $sourceLanguage, $text ) {
90 $row = [
91 'tms_lang' => $sourceLanguage,
92 'tms_len' => mb_strlen( $text ),
93 'tms_text' => $text,
94 'tms_context' => $context->getPrefixedText(),
95 ];
96
97 $dbw = $this->getDB( DB_PRIMARY );
98 $dbw->insert( 'translate_tms', $row, __METHOD__ );
99 $sid = $dbw->insertId();
100
101 $fulltext = $this->filterForFulltext( $sourceLanguage, $text );
102 if ( count( $fulltext ) ) {
103 $row = [
104 'tmf_sid' => $sid,
105 'tmf_text' => implode( ' ', $fulltext ),
106 ];
107 $dbw->insert( 'translate_tmf', $row, __METHOD__ );
108 }
109
110 return $sid;
111 }
112
121 protected function filterForFulltext( $language, $input ) {
122 $lang = MediaWikiServices::getInstance()->getLanguageFactory()->getLanguage( $language );
123
124 $text = preg_replace( '/[^[:alnum:]]/u', ' ', $input );
125 $text = $lang->segmentByWord( $text );
126 $text = $lang->lc( $text );
127 $segments = preg_split( '/\s+/', $text, -1, PREG_SPLIT_NO_EMPTY );
128 if ( count( $segments ) < 4 ) {
129 return [];
130 }
131
132 foreach ( $segments as $i => $segment ) {
133 // Yes strlen
134 $len = strlen( $segment );
135 if ( $len < 4 || $len > 15 ) {
136 unset( $segments[$i] );
137 }
138 }
139
140 $segments = array_unique( $segments );
141 $segments = array_slice( $segments, 0, 10 );
142
143 return $segments;
144 }
145
146 public function beginBootstrap() {
147 $dbw = $this->getDB( DB_PRIMARY );
148 $dbw->delete( 'translate_tms', '*', __METHOD__ );
149 $dbw->delete( 'translate_tmt', '*', __METHOD__ );
150 $dbw->delete( 'translate_tmf', '*', __METHOD__ );
151 $table = $dbw->tableName( 'translate_tmf' );
152 try {
153 $dbw->query( "DROP INDEX tmf_text ON $table", __METHOD__ );
154 } catch ( DBQueryError $e ) {
155 // Perhaps the script was aborted before it got
156 // chance to add the index back.
157 }
158 }
159
160 public function beginBatch() {
161 $this->sids = [];
162 }
163
164 public function batchInsertDefinitions( array $batch ) {
165 foreach ( $batch as $key => $item ) {
166 list( $title, $language, $text ) = $item;
167 $handle = new MessageHandle( $title );
168 $context = Title::makeTitle( $handle->getTitle()->getNamespace(), $handle->getKey() );
169 $this->sids[$key] = $this->insertSource( $context, $language, $text );
170 }
171 $lbFactory = MediaWikiServices::getInstance()->getDBLoadBalancerFactory();
172 $lbFactory->waitForReplication( [ 'ifWritesSince' => 10 ] );
173 }
174
175 public function batchInsertTranslations( array $batch ) {
176 $rows = [];
177 foreach ( $batch as $key => $data ) {
178 list( , $language, $text ) = $data;
179 $rows[] = [
180 'tmt_sid' => $this->sids[$key],
181 'tmt_lang' => $language,
182 'tmt_text' => $text,
183 ];
184 }
185
186 $dbw = $this->getDB( DB_PRIMARY );
187 $dbw->insert( 'translate_tmt', $rows, __METHOD__ );
188 $lbFactory = MediaWikiServices::getInstance()->getDBLoadBalancerFactory();
189 $lbFactory->waitForReplication( [ 'ifWritesSince' => 10 ] );
190 }
191
192 public function endBatch() {
193 }
194
195 public function endBootstrap() {
196 $dbw = $this->getDB( DB_PRIMARY );
197 $table = $dbw->tableName( 'translate_tmf' );
198 $dbw->query( "CREATE FULLTEXT INDEX tmf_text ON $table (tmf_text)", __METHOD__ );
199 }
200
201 /* Reading interface */
202
203 public function isLocalSuggestion( array $suggestion ) {
204 return true;
205 }
206
207 public function expandLocation( array $suggestion ) {
208 $title = Title::newFromText( $suggestion['location'] );
209
210 return $title->getCanonicalURL();
211 }
212
213 public function query( $sourceLanguage, $targetLanguage, $text ) {
214 // Calculate the bounds of the string length which are able
215 // to satisfy the cutoff percentage in edit distance.
216 $len = mb_strlen( $text );
217 $min = ceil( max( $len * $this->config['cutoff'], 2 ) );
218 $max = floor( $len / $this->config['cutoff'] );
219
220 // We could use fulltext index to narrow the results further
221 $dbr = $this->getDB( DB_REPLICA );
222 $tables = [ 'translate_tmt', 'translate_tms' ];
223 $fields = [ 'tms_context', 'tms_text', 'tmt_lang', 'tmt_text' ];
224
225 $conds = [
226 'tms_lang' => $sourceLanguage,
227 'tmt_lang' => $targetLanguage,
228 "tms_len BETWEEN $min AND $max",
229 'tms_sid = tmt_sid',
230 ];
231
232 $fulltext = $this->filterForFulltext( $sourceLanguage, $text );
233 if ( $fulltext ) {
234 $tables[] = 'translate_tmf';
235 $list = implode( ' ', $fulltext );
236 $conds[] = 'tmf_sid = tmt_sid';
237 $conds[] = "MATCH(tmf_text) AGAINST( '$list' )";
238 }
239
240 $res = $dbr->select( $tables, $fields, $conds, __METHOD__ );
241
242 return $this->processQueryResults( $res, $text, $targetLanguage );
243 }
244
245 protected function processQueryResults( $res, $text, $targetLanguage ) {
246 $timeLimit = microtime( true ) + 5;
247
248 $lenA = mb_strlen( $text );
249 $results = [];
250 foreach ( $res as $row ) {
251 if ( microtime( true ) > $timeLimit ) {
252 // Having no suggestions is better than preventing translation
253 // altogether by timing out the request :(
254 break;
255 }
256
257 $a = $text;
258 $b = $row->tms_text;
259 $lenB = mb_strlen( $b );
260 $len = min( $lenA, $lenB );
261 if ( $len > 600 ) {
262 // two strings of length 1500 ~ 10s
263 // two strings of length 2250 ~ 30s
264 $dist = $len;
265 } else {
266 $dist = self::levenshtein( $a, $b, $lenA, $lenB );
267 }
268 $quality = 1 - ( $dist * 0.9 / $len );
269
270 if ( $quality >= $this->config['cutoff'] ) {
271 $results[] = [
272 'source' => $row->tms_text,
273 'target' => $row->tmt_text,
274 'context' => $row->tms_context,
275 'location' => $row->tms_context . '/' . $targetLanguage,
276 'quality' => $quality,
277 'wiki' => $row->tms_wiki ?? WikiMap::getCurrentWikiId(),
278 ];
279 }
280 }
281 $results = TTMServer::sortSuggestions( $results );
282
283 return $results;
284 }
285
286 public function setDoReIndex() {
287 }
288}
Mysql based backend.
getDB( $mode=DB_REPLICA)
expandLocation(array $suggestion)
Given suggestion returned by this TTMServer, constructs fully qualified URL to the location of the tr...
setDoReIndex()
Instruct the service to fully wipe the index and start from scratch.
endBootstrap()
Do any cleanup, optimizing etc.
query( $sourceLanguage, $targetLanguage, $text)
Fetches all relevant suggestions for given text.
batchInsertTranslations(array $batch)
Called multiple times per batch if necessary.
beginBatch()
Called before every batch (MessageGroup).
endBatch()
Called before every batch (MessageGroup).
batchInsertDefinitions(array $batch)
Called multiple times per batch if necessary.
beginBootstrap()
Called when starting to fill the translation memory.
filterForFulltext( $language, $input)
Tokenizes the text for fulltext search.
isLocalSuggestion(array $suggestion)
Determines if the suggestion returned by this TTMServer comes from this wiki or any other wiki.
update(MessageHandle $handle, $targetText)
Shovels the new translation into translation memory.
Class for pointing to messages, like Title class is for titles.
getGroup()
Get the primary MessageGroup this message belongs to.
isValid()
Checks if the handle corresponds to a known message.
getTitle()
Get the original title.
getCode()
Returns the language code.
getKey()
Returns the identified or guessed message key.
Some general static methods for instantiating TTMServer and helpers.
Definition TTMServer.php:20
static sortSuggestions(array $suggestions)
Definition TTMServer.php:71
Interface for TTMServer that can be queried (=all of them).
Interface for TTMServer that can be updated.