12use MediaWiki\MediaWikiServices;
13use Wikimedia\Rdbms\DBQueryError;
27 protected function getDB( $mode = DB_REPLICA ) {
28 return wfGetDB( $mode,
'ttmserver', $this->config[
'database'] );
38 $targetLanguage = $handle->
getCode();
39 $sourceLanguage = $group->getSourceLanguage();
43 if ( $targetLanguage === $sourceLanguage ) {
47 $definition = $group->getMessage( $mkey, $sourceLanguage );
48 if ( !is_string( $definition ) || !strlen( trim( $definition ) ) ) {
52 $context = Title::makeTitle( $handle->
getTitle()->getNamespace(), $mkey );
53 $dbw = $this->
getDB( DB_PRIMARY );
61 'tms_context' => $context->getPrefixedText(),
62 'tms_text' => $definition,
65 $sid = $dbw->selectField(
'translate_tms',
'tms_sid', $conds, __METHOD__ );
66 if ( $sid ===
false ) {
67 $sid = $this->insertSource( $context, $sourceLanguage, $definition );
73 'tmt_lang' => $targetLanguage,
75 $dbw->delete(
'translate_tmt', $deleteConds, __METHOD__ );
78 if ( $targetText !==
null ) {
79 $row = $deleteConds + [
80 'tmt_text' => $targetText,
83 $dbw->insert(
'translate_tmt', $row, __METHOD__ );
89 protected function insertSource( Title $context, $sourceLanguage, $text ) {
91 'tms_lang' => $sourceLanguage,
92 'tms_len' => mb_strlen( $text ),
94 'tms_context' => $context->getPrefixedText(),
97 $dbw = $this->getDB( DB_PRIMARY );
98 $dbw->insert(
'translate_tms', $row, __METHOD__ );
99 $sid = $dbw->insertId();
101 $fulltext = $this->filterForFulltext( $sourceLanguage, $text );
102 if ( count( $fulltext ) ) {
105 'tmf_text' => implode(
' ', $fulltext ),
107 $dbw->insert(
'translate_tmf', $row, __METHOD__ );
122 $lang = MediaWikiServices::getInstance()->getLanguageFactory()->getLanguage( $language );
124 $text = preg_replace(
'/[^[:alnum:]]/u',
' ', $input );
125 $text = $lang->segmentByWord( $text );
126 $text = $lang->lc( $text );
127 $segments = preg_split(
'/\s+/', $text, -1, PREG_SPLIT_NO_EMPTY );
128 if ( count( $segments ) < 4 ) {
132 foreach ( $segments as $i => $segment ) {
134 $len = strlen( $segment );
135 if ( $len < 4 || $len > 15 ) {
136 unset( $segments[$i] );
140 $segments = array_unique( $segments );
141 $segments = array_slice( $segments, 0, 10 );
147 $dbw = $this->
getDB( DB_PRIMARY );
148 $dbw->delete(
'translate_tms',
'*', __METHOD__ );
149 $dbw->delete(
'translate_tmt',
'*', __METHOD__ );
150 $dbw->delete(
'translate_tmf',
'*', __METHOD__ );
151 $table = $dbw->tableName(
'translate_tmf' );
153 $dbw->query(
"DROP INDEX tmf_text ON $table", __METHOD__ );
154 }
catch ( DBQueryError $e ) {
165 foreach ( $batch as $key => $item ) {
166 list( $title, $language, $text ) = $item;
168 $context = Title::makeTitle( $handle->getTitle()->getNamespace(), $handle->getKey() );
169 $this->sids[$key] = $this->insertSource( $context, $language, $text );
171 $lbFactory = MediaWikiServices::getInstance()->getDBLoadBalancerFactory();
172 $lbFactory->waitForReplication( [
'ifWritesSince' => 10 ] );
177 foreach ( $batch as $key => $data ) {
178 list( , $language, $text ) = $data;
180 'tmt_sid' => $this->sids[$key],
181 'tmt_lang' => $language,
186 $dbw = $this->
getDB( DB_PRIMARY );
187 $dbw->insert(
'translate_tmt', $rows, __METHOD__ );
188 $lbFactory = MediaWikiServices::getInstance()->getDBLoadBalancerFactory();
189 $lbFactory->waitForReplication( [
'ifWritesSince' => 10 ] );
196 $dbw = $this->
getDB( DB_PRIMARY );
197 $table = $dbw->tableName(
'translate_tmf' );
198 $dbw->query(
"CREATE FULLTEXT INDEX tmf_text ON $table (tmf_text)", __METHOD__ );
208 $title = Title::newFromText( $suggestion[
'location'] );
210 return $title->getCanonicalURL();
213 public function query( $sourceLanguage, $targetLanguage, $text ) {
216 $len = mb_strlen( $text );
217 $min = ceil( max( $len * $this->config[
'cutoff'], 2 ) );
218 $max = floor( $len / $this->config[
'cutoff'] );
221 $dbr = $this->
getDB( DB_REPLICA );
222 $tables = [
'translate_tmt',
'translate_tms' ];
223 $fields = [
'tms_context',
'tms_text',
'tmt_lang',
'tmt_text' ];
226 'tms_lang' => $sourceLanguage,
227 'tmt_lang' => $targetLanguage,
228 "tms_len BETWEEN $min AND $max",
234 $tables[] =
'translate_tmf';
235 $list = implode(
' ', $fulltext );
236 $conds[] =
'tmf_sid = tmt_sid';
237 $conds[] =
"MATCH(tmf_text) AGAINST( '$list' )";
240 $res = $dbr->select( $tables, $fields, $conds, __METHOD__ );
242 return $this->processQueryResults( $res, $text, $targetLanguage );
245 protected function processQueryResults( $res, $text, $targetLanguage ) {
246 $timeLimit = microtime(
true ) + 5;
248 $lenA = mb_strlen( $text );
250 foreach ( $res as $row ) {
251 if ( microtime(
true ) > $timeLimit ) {
259 $lenB = mb_strlen( $b );
260 $len = min( $lenA, $lenB );
266 $dist = self::levenshtein( $a, $b, $lenA, $lenB );
268 $quality = 1 - ( $dist * 0.9 / $len );
270 if ( $quality >= $this->config[
'cutoff'] ) {
272 'source' => $row->tms_text,
273 'target' => $row->tmt_text,
274 'context' => $row->tms_context,
275 'location' => $row->tms_context .
'/' . $targetLanguage,
276 'quality' => $quality,
277 'wiki' => $row->tms_wiki ?? WikiMap::getCurrentWikiId(),
expandLocation(array $suggestion)
Given suggestion returned by this TTMServer, constructs fully qualified URL to the location of the tr...
setDoReIndex()
Instruct the service to fully wipe the index and start from scratch.
endBootstrap()
Do any cleanup, optimizing etc.
query( $sourceLanguage, $targetLanguage, $text)
Fetches all relevant suggestions for given text.
batchInsertTranslations(array $batch)
Called multiple times per batch if necessary.
beginBatch()
Called before every batch (MessageGroup).
endBatch()
Called before every batch (MessageGroup).
batchInsertDefinitions(array $batch)
Called multiple times per batch if necessary.
beginBootstrap()
Called when starting to fill the translation memory.
filterForFulltext( $language, $input)
Tokenizes the text for fulltext search.
isLocalSuggestion(array $suggestion)
Determines if the suggestion returned by this TTMServer comes from this wiki or any other wiki.
update(MessageHandle $handle, $targetText)
Shovels the new translation into translation memory.
Class for pointing to messages, like Title class is for titles.
getGroup()
Get the primary MessageGroup this message belongs to.
isValid()
Checks if the handle corresponds to a known message.
getTitle()
Get the original title.
getCode()
Returns the language code.
getKey()
Returns the identified or guessed message key.
Some general static methods for instantiating TTMServer and helpers.
static sortSuggestions(array $suggestions)
Interface for TTMServer that can be queried (=all of them).
Interface for TTMServer that can be updated.