Translate extension for MediaWiki
 
Loading...
Searching...
No Matches
ExternalMessageSourceStateComparator.php
1<?php
2
13
16 public const ALL_LANGUAGES = 'all languages';
17
19 protected $stringComparator;
20
22 public function __construct( StringComparator $stringComparator ) {
23 $this->stringComparator = $stringComparator;
24 }
25
49 public function processGroup( FileBasedMessageGroup $group, $languages ) {
50 $changes = new MessageSourceChange();
51 $processAll = false;
52
53 if ( $languages === self::ALL_LANGUAGES ) {
54 $processAll = true;
55 $languages = $group->getTranslatableLanguages();
56
57 // This means all languages
58 if ( $languages === null ) {
59 $languages = TranslateUtils::getLanguageNames( 'en' );
60 }
61
62 $languages = array_keys( $languages );
63 } elseif ( !is_array( $languages ) ) {
64 throw new InvalidArgumentException( 'Invalid input given for $languages' );
65 }
66
67 // Process the source language before others. Source language might not
68 // be included in $group->getTranslatableLanguages(). The expected
69 // behavior is that source language is always processed when given
70 // self::ALL_LANGUAGES.
71 $sourceLanguage = $group->getSourceLanguage();
72 $index = array_search( $sourceLanguage, $languages );
73 if ( $processAll || $index !== false ) {
74 unset( $languages[$index] );
75 $this->processLanguage( $group, $sourceLanguage, $changes );
76 }
77
78 foreach ( $languages as $language ) {
79 $this->processLanguage( $group, $language, $changes );
80 }
81
82 return $changes;
83 }
84
85 protected function processLanguage(
86 FileBasedMessageGroup $group, $language, MessageSourceChange $changes
87 ) {
88 $cache = $group->getMessageGroupCache( $language );
89 $reason = 0;
90 if ( !$cache->isValid( $reason ) ) {
91 $this->addMessageUpdateChanges( $group, $language, $changes, $reason, $cache );
92
93 if ( $changes->getModificationsForLanguage( $language ) === [] ) {
94 /* Update the cache immediately if file and wiki state match.
95 * Otherwise the cache will get outdated compared to file state
96 * and will give false positive conflicts later. */
97 $cache->create();
98 }
99 }
100 }
101
123 protected function addMessageUpdateChanges(
124 FileBasedMessageGroup $group, $language, MessageSourceChange $changes, $reason, $cache
125 ) {
126 // initCollection returns empty list before first import
127 $wiki = $group->initCollection( $language );
128 $wiki->filter( 'hastranslation', false );
129 $wiki->loadTranslations();
130 $wikiKeys = $wiki->getMessageKeys();
131
132 $sourceLanguage = $group->getSourceLanguage();
133 // By-pass cached message definitions
134 $ffs = $group->getFFS();
135 if ( $language === $sourceLanguage && !$ffs->exists( $language ) ) {
136 $path = $group->getSourceFilePath( $language );
137 throw new RuntimeException( "Source message file for {$group->getId()} does not exist: $path" );
138 }
139
140 $file = $ffs->read( $language );
141
142 // Does not exist
143 if ( $file === false ) {
144 return;
145 }
146
147 // Something went wrong
148 if ( !isset( $file['MESSAGES'] ) ) {
149 $id = $group->getId();
150 $ffsClass = get_class( $ffs );
151
152 error_log( "$id has an FFS ($ffsClass) - it didn't return cake for $language" );
153
154 return;
155 }
156
157 $fileKeys = array_keys( $file['MESSAGES'] );
158
159 $common = array_intersect( $fileKeys, $wikiKeys );
160
161 $supportsFuzzy = $ffs->supportsFuzzy();
162 $changesToRemove = [];
163
164 foreach ( $common as $key ) {
165 $sourceContent = $file['MESSAGES'][$key];
167 $wikiMessage = $wiki[$key];
168 $wikiContent = $wikiMessage->translation();
169
170 // @todo: Fuzzy checking can also be moved to $ffs->isContentEqual();
171 // If FFS doesn't support it, ignore fuzziness as difference
172 $wikiContent = str_replace( TRANSLATE_FUZZY, '', $wikiContent );
173
174 // But if it does, ensure we have exactly one fuzzy marker prefixed
175 if ( $supportsFuzzy === 'yes' && $wikiMessage->hasTag( 'fuzzy' ) ) {
176 $wikiContent = TRANSLATE_FUZZY . $wikiContent;
177 }
178
179 if ( $ffs->isContentEqual( $sourceContent, $wikiContent ) ) {
180 // File and wiki stage agree, nothing to do
181 continue;
182 }
183
184 // Check against interim cache to see whether we have changes
185 // in the wiki, in the file or both.
186
187 if ( $reason !== MessageGroupCache::NO_CACHE ) {
188 $cacheContent = $cache->get( $key );
189
190 /* We want to ignore the common situation that the string
191 * in the wiki has been changed since the last export.
192 * Hence we check that source === cache && cache !== wiki
193 * and if so we skip this string. */
194 if (
195 !$ffs->isContentEqual( $wikiContent, $cacheContent ) &&
196 $ffs->isContentEqual( $sourceContent, $cacheContent )
197 ) {
198 continue;
199 }
200 }
201
202 if ( $language !== $sourceLanguage ) {
203 // Assuming that this is the old key, lets check if it has a corresponding
204 // rename in the source language. The key of the matching message will be
205 // the new renamed key.
206 $renameMsg = $changes->getMatchedMessage( $sourceLanguage, $key );
207 if ( $renameMsg !== null ) {
208 // Rename present in source language but this message has a content change
209 // with the OLD key in a non-source language. We will not process this
210 // here but add it as a rename instead. This way, the key will be renamed
211 // and then the content updated.
212 $this->addNonSourceRenames(
213 $changes, $key, $renameMsg['key'], $sourceContent, $wikiContent, $language
214 );
215 $changesToRemove[] = $key;
216 continue;
217 }
218 }
219 $changes->addChange( $language, $key, $sourceContent );
220 }
221
222 $changes->removeChanges( $language, $changesToRemove );
223
224 $added = array_diff( $fileKeys, $wikiKeys );
225 foreach ( $added as $key ) {
226 $sourceContent = $file['MESSAGES'][$key];
227 $changes->addAddition( $language, $key, $sourceContent );
228 }
229
230 /* Should the cache not exist, don't consider the messages
231 * missing from the file as deleted - they probably aren't
232 * yet exported. For example new language translations are
233 * exported the first time. */
234 if ( $reason !== MessageGroupCache::NO_CACHE ) {
235 $deleted = array_diff( $wikiKeys, $fileKeys );
236 foreach ( $deleted as $key ) {
237 if ( $cache->get( $key ) === false ) {
238 /* This message has never existed in the cache, so it
239 * must be a newly made in the wiki. */
240 continue;
241 }
242 $changes->addDeletion( $language, $key, $wiki[$key]->translation() );
243 }
244 }
245
246 if ( $language === $sourceLanguage ) {
247 $this->findAndMarkSourceRenames( $changes, $language );
248 } else {
249 // Non source language
250 $this->checkNonSourceAdditionsForRename(
251 $changes, $sourceLanguage, $language, $wiki, $wikiKeys
252 );
253 }
254 }
255
265 private function checkNonSourceAdditionsForRename(
266 MessageSourceChange $changes, $sourceLanguage, $targetLanguage, MessageCollection $wiki, $wikiKeys
267 ) {
268 $additions = $changes->getAdditions( $targetLanguage );
269 if ( $additions === [] ) {
270 return;
271 }
272
273 $additionsToRemove = [];
274 $deletionsToRemove = [];
275 foreach ( $additions as $addedMsg ) {
276 $addedMsgKey = $addedMsg['key'];
277
278 // Check if this key is renamed in source.
279 $renamedSourceMsg = $changes->findMessage(
280 $sourceLanguage, $addedMsgKey, [ MessageSourceChange::RENAME ]
281 );
282
283 if ( $renamedSourceMsg === null ) {
284 continue;
285 }
286
287 // Since this key is new, and is present in the renames for the source language,
288 // we will add it as a rename.
289 $deletedSource = $changes->getMatchedMessage( $sourceLanguage, $renamedSourceMsg['key'] );
290 $deletedMsgKey = $deletedSource['key'];
291 $deletedMsg = $changes->findMessage(
292 $targetLanguage, $deletedMsgKey, [ MessageSourceChange::DELETION ]
293 );
294
295 // Sometimes when the cache does not have the translations, the deleted message
296 // is not added in the translations. It is also possible that for this non-source
297 // language the key has not been removed.
298 if ( $deletedMsg === null ) {
299 $content = '';
300 if ( array_search( $deletedMsgKey, $wikiKeys ) !== false ) {
301 $content = $wiki[ $deletedMsgKey ]->translation();
302 }
303 $deletedMsg = [
304 'key' => $deletedMsgKey,
305 'content' => $content
306 ];
307 }
308
309 $similarityPercent = $this->stringComparator->getSimilarity(
310 $addedMsg['content'], $deletedMsg['content']
311 );
312
313 $changes->addRename( $targetLanguage, [
314 'key' => $addedMsgKey,
315 'content' => $addedMsg['content']
316 ], [
317 'key' => $deletedMsgKey,
318 'content' => $deletedMsg['content']
319 ], $similarityPercent );
320
321 $deletionsToRemove[] = $deletedMsgKey;
322 $additionsToRemove[] = $addedMsgKey;
323 }
324
325 $changes->removeAdditions( $targetLanguage, $additionsToRemove );
326 $changes->removeDeletions( $targetLanguage, $deletionsToRemove );
327 }
328
336 private function findAndMarkSourceRenames( MessageSourceChange $changes, $sourceLanguage ) {
337 // Now check for renames. To identify renames we need to compare
338 // the contents of the added messages with the deleted ones and
339 // identify messages that match.
340 $deletions = $changes->getDeletions( $sourceLanguage );
341 $additions = $changes->getAdditions( $sourceLanguage );
342 if ( $deletions === [] || $additions === [] ) {
343 return;
344 }
345
346 // This array contains a dictionary with matching renames in the following structure -
347 // [ A1|D1 => 1.0, A1|D2 => 0.95, A2|D1 => 0.95 ]
348 $potentialRenames = [];
349 foreach ( $additions as $addedMsg ) {
350 $addedMsgKey = $addedMsg['key'];
351
352 foreach ( $deletions as $deletedMsg ) {
353 $similarityPercent = $this->stringComparator->getSimilarity(
354 $addedMsg['content'], $deletedMsg['content']
355 );
356
357 if ( $changes->areStringsSimilar( $similarityPercent ) ) {
358 $potentialRenames[ $addedMsgKey . '|' . $deletedMsg['key'] ] = $similarityPercent;
359 }
360 }
361 }
362
363 $this->matchRenames( $changes, $potentialRenames, $sourceLanguage );
364 }
365
375 private function addNonSourceRenames(
376 MessageSourceChange $changes, $key, $renameKey, $sourceContent, $wikiContent, $language
377 ) {
378 $addedMsg = [
379 'key' => $renameKey,
380 'content' => $sourceContent
381 ];
382
383 $removedMsg = [
384 'key' => $key,
385 'content' => $wikiContent
386 ];
387
388 $similarityPercent = $this->stringComparator->getSimilarity(
389 $sourceContent, $wikiContent
390 );
391 $changes->addRename( $language, $addedMsg, $removedMsg, $similarityPercent );
392 }
393
404 private function matchRenames( MessageSourceChange $changes, array $trackRename, $language ) {
405 arsort( $trackRename, SORT_NUMERIC );
406
407 $alreadyRenamed = $additionsToRemove = $deletionsToRemove = [];
408 foreach ( $trackRename as $key => $similarityPercent ) {
409 list( $addKey, $deleteKey ) = explode( '|', $key, 2 );
410 if ( isset( $alreadyRenamed[ $addKey ] ) || isset( $alreadyRenamed[ $deleteKey ] ) ) {
411 // Already mapped with another name.
412 continue;
413 }
414
415 // Using key should be faster than saving values and searching for them in the array.
416 $alreadyRenamed[ $addKey ] = 1;
417 $alreadyRenamed[ $deleteKey ] = 1;
418
419 $addMsg = $changes->findMessage( $language, $addKey, [ MessageSourceChange::ADDITION ] );
420 $deleteMsg = $changes->findMessage( $language, $deleteKey, [ MessageSourceChange::DELETION ] );
421
422 $changes->addRename( $language, $addMsg, $deleteMsg, $similarityPercent );
423
424 // @phan-suppress-next-line PhanTypeArraySuspiciousNullable
425 $additionsToRemove[] = $addMsg['key'];
426 // @phan-suppress-next-line PhanTypeArraySuspiciousNullable
427 $deletionsToRemove[] = $deleteMsg['key'];
428 }
429
430 $changes->removeAdditions( $language, $additionsToRemove );
431 $changes->removeDeletions( $language, $deletionsToRemove );
432 }
433}
const ALL_LANGUAGES
Process all languages supported by the message group.
addMessageUpdateChanges(FileBasedMessageGroup $group, $language, MessageSourceChange $changes, $reason, $cache)
This is the detective novel.
processGroup(FileBasedMessageGroup $group, $languages)
Finds modifications in external sources compared to wiki state.
This class implements default behavior for file based message groups.
initCollection( $code)
@inheritDoc
Class is use to track the changes made when importing messages from the remote sources using processM...
addRename( $language, $addedMessage, $deletedMessage, $similarity=0)
Adds a rename under a message group for a specific language.
removeAdditions( $language, $keysToRemove)
Remove additions for a language under the group.
addAddition( $language, $key, $content)
Add an addition under a message group for a specific language.
findMessage( $language, $key, $possibleStates=[], &$modificationType=null)
Finds a message with the given key across different types of modifications.
getMatchedMessage( $languageCode, $key)
Get matched rename message for a given key.
addChange( $language, $key, $content)
Add a change under a message group for a specific language.
getModificationsForLanguage( $language)
Get all for a language under the group.
removeDeletions( $language, $keysToRemove)
Remove deletions for a language under the group.
getAdditions( $language)
Fetch additions for a message group under a language.
removeChanges( $language, $keysToRemove)
Remove changes for a language under the group.
getDeletions( $language)
Fetch deletions for a message group under a language.
addDeletion( $language, $key, $content)
Adds a deletion under a message group for a specific language.
areStringsSimilar( $similarity)
Checks if the similarity percent passed passes the min threshold.
Core message collection class.
getTranslatableLanguages()
@inheritDoc
getSourceLanguage()
Returns language code depicting the language of source text.
getId()
Returns the unique identifier for this group.
An interface to be implemented by comparators that will compare percentage of similarity between stri...