Translate extension for MediaWiki
 
Loading...
Searching...
No Matches
ExternalMessageSourceStateComparator.php
1<?php
2declare( strict_types = 1 );
3
5
13use MediaWiki\Logger\LoggerFactory;
14use MediaWiki\Page\PageStore;
15use MediaWiki\Revision\RevisionLookup;
16use MediaWiki\Utils\MWTimestamp;
17use RuntimeException;
18
26 private StringComparator $stringComparator;
27 private RevisionLookup $revisionLookup;
28 private PageStore $pageStore;
29
30 public function __construct(
31 StringComparator $stringComparator,
32 RevisionLookup $revisionLookup,
33 PageStore $pageStore
34 ) {
35 $this->stringComparator = $stringComparator;
36 $this->revisionLookup = $revisionLookup;
37 $this->pageStore = $pageStore;
38 }
39
59 $changes = new MessageSourceChange();
60 $languages = $group->getTranslatableLanguages() ?? Utilities::getLanguageNames( 'en' );
61
62 // Process the source language before others. Source language might not
63 // be included in $group->getTranslatableLanguages(). The expected
64 // behavior is that source language is always processed.
65 $sourceLanguage = $group->getSourceLanguage();
66 $this->processLanguage( $group, $sourceLanguage, $changes );
67 unset( $languages[ $sourceLanguage] );
68
69 foreach ( array_keys( $languages ) as $language ) {
70 $this->processLanguage( $group, $language, $changes );
71 }
72
73 return $changes;
74 }
75
76 private function processLanguage(
78 string $language,
79 MessageSourceChange $changes
80 ): void {
81 $cache = $group->getMessageGroupCache( $language );
82 $reason = 0;
83 if ( !$cache->isValid( $reason ) ) {
84 $this->addMessageUpdateChanges( $group, $language, $changes, $reason, $cache );
85
86 if ( $changes->getModificationsForLanguage( $language ) === [] ) {
87 /* Update the cache immediately if file and wiki state match.
88 * Otherwise the cache will get outdated compared to file state
89 * and will give false positive conflicts later. */
90 $cache->create();
91 }
92 }
93 }
94
111 protected function addMessageUpdateChanges(
113 string $language,
114 MessageSourceChange $changes,
115 int $reason,
116 MessageGroupCache $cache
117 ): void {
118 // initCollection returns empty list before first import
119 $wiki = $group->initCollection( $language );
120 $wiki->filter( 'hastranslation', false );
121 $wiki->loadTranslations();
122 $wikiKeys = $wiki->getMessageKeys();
123
124 $sourceLanguage = $group->getSourceLanguage();
125 // By-pass cached message definitions
126 $ffs = $group->getFFS();
127 if ( $language === $sourceLanguage && !$ffs->exists( $language ) ) {
128 $path = $group->getSourceFilePath( $language );
129 throw new RuntimeException( "Source message file for {$group->getId()} does not exist: $path" );
130 }
131
132 $file = $ffs->read( $language );
133
134 // Does not exist
135 if ( $file === false ) {
136 return;
137 }
138
139 // Something went wrong
140 if ( !isset( $file['MESSAGES'] ) ) {
141 $id = $group->getId();
142 $ffsClass = get_class( $ffs );
143
144 error_log( "$id has an FFS ($ffsClass) - it didn't return cake for $language" );
145
146 return;
147 }
148
149 $fileKeys = array_keys( $file['MESSAGES'] );
150
151 $common = array_intersect( $fileKeys, $wikiKeys );
152
153 $supportsFuzzy = $ffs->supportsFuzzy();
154 $changesToRemove = [];
155
156 foreach ( $common as $key ) {
157 $sourceContent = $file['MESSAGES'][$key];
159 $wikiMessage = $wiki[$key];
160 $wikiContent = $wikiMessage->translation();
161
162 // @todo: Fuzzy checking can also be moved to $ffs->isContentEqual();
163 // If FFS doesn't support it, ignore fuzziness as difference
164 $wikiContent = str_replace( TRANSLATE_FUZZY, '', $wikiContent );
165
166 // But if it does, ensure we have exactly one fuzzy marker prefixed
167 if ( $supportsFuzzy === 'yes' && $wikiMessage->hasTag( 'fuzzy' ) ) {
168 $wikiContent = TRANSLATE_FUZZY . $wikiContent;
169 }
170
171 if ( $ffs->isContentEqual( $sourceContent, $wikiContent ) ) {
172 // File and wiki stage agree, nothing to do
173 continue;
174 }
175
176 // Check against interim cache to see whether we have changes
177 // in the wiki, in the file or both.
178
179 if ( $reason !== MessageGroupCache::NO_CACHE ) {
180 $cacheContent = $cache->get( $key );
181
182 /* We want to ignore the following situations:
183 * 1. The string in the wiki has been changed since the last export.
184 * Hence we check that source === cache && cache !== wiki
185 * 2. Missing cache entry due to the string being translated on translatewiki.net,
186 * exported and then being updated on translatewiki.net again.
187 */
188 if (
189 $this->hasCacheEntry( $cache, $wiki, $key ) &&
190 !$ffs->isContentEqual( $wikiContent, $cacheContent ) &&
191 $ffs->isContentEqual( $sourceContent, $cacheContent )
192 ) {
193 continue;
194 }
195 }
196
197 if ( $language !== $sourceLanguage ) {
198 // Assuming that this is the old key, lets check if it has a corresponding
199 // rename in the source language. The key of the matching message will be
200 // the new renamed key.
201 $renameMsg = $changes->getMatchedMessage( $sourceLanguage, $key );
202 if ( $renameMsg !== null ) {
203 // Rename present in source language but this message has a content change
204 // with the OLD key in a non-source language. We will not process this
205 // here but add it as a rename instead. This way, the key will be renamed
206 // and then the content updated.
207 $this->addNonSourceRenames(
208 $changes, $key, $renameMsg['key'], $sourceContent, $wikiContent, $language
209 );
210 $changesToRemove[] = $key;
211 continue;
212 }
213 }
214 $changes->addChange( $language, $key, $sourceContent );
215 }
216
217 $changes->removeChanges( $language, $changesToRemove );
218
219 $added = array_diff( $fileKeys, $wikiKeys );
220 foreach ( $added as $key ) {
221 $sourceContent = $file['MESSAGES'][$key];
222 $changes->addAddition( $language, $key, $sourceContent );
223 }
224
225 /* Should the cache not exist, don't consider the messages
226 * missing from the file as deleted - they probably aren't
227 * yet exported. For example new language translations are
228 * exported the first time. */
229 if ( $reason !== MessageGroupCache::NO_CACHE ) {
230 $deleted = array_diff( $wikiKeys, $fileKeys );
231 foreach ( $deleted as $key ) {
232 if ( $cache->get( $key ) === false ) {
233 /* This message has never existed in the cache, so it
234 * must be a newly made in the wiki. */
235 continue;
236 }
237 $changes->addDeletion( $language, $key, $wiki[$key]->translation() );
238 }
239 }
240
241 if ( $language === $sourceLanguage ) {
242 $this->findAndMarkSourceRenames( $changes, $language );
243 } else {
244 // Non source language
245 $this->checkNonSourceAdditionsForRename(
246 $changes, $sourceLanguage, $language, $wiki, $wikiKeys
247 );
248 }
249 }
250
260 private function checkNonSourceAdditionsForRename(
261 MessageSourceChange $changes,
262 string $sourceLanguage,
263 string $targetLanguage,
264 MessageCollection $wiki,
265 array $wikiKeys
266 ): void {
267 $additions = $changes->getAdditions( $targetLanguage );
268 if ( $additions === [] ) {
269 return;
270 }
271
272 $additionsToRemove = [];
273 $deletionsToRemove = [];
274 foreach ( $additions as $addedMsg ) {
275 $addedMsgKey = $addedMsg['key'];
276
277 // Check if this key is renamed in source.
278 $renamedSourceMsg = $changes->findMessage(
279 $sourceLanguage, $addedMsgKey, [ MessageSourceChange::RENAME ]
280 );
281
282 if ( $renamedSourceMsg === null ) {
283 continue;
284 }
285
286 // Since this key is new, and is present in the renames for the source language,
287 // we will add it as a rename.
288 $deletedSource = $changes->getMatchedMessage( $sourceLanguage, $renamedSourceMsg['key'] );
289 if ( $deletedSource === null ) {
290 continue;
291 }
292 $deletedMsgKey = $deletedSource['key'];
293 $deletedMsg = $changes->findMessage(
294 $targetLanguage, $deletedMsgKey, [ MessageSourceChange::DELETION ]
295 );
296
297 // Sometimes when the cache does not have the translations, the deleted message
298 // is not added in the translations. It is also possible that for this non-source
299 // language the key has not been removed.
300 if ( $deletedMsg === null ) {
301 $content = '';
302 if ( in_array( $deletedMsgKey, $wikiKeys ) ) {
303 $content = $wiki[ $deletedMsgKey ]->translation();
304 }
305 $deletedMsg = [
306 'key' => $deletedMsgKey,
307 'content' => $content
308 ];
309 }
310
311 $similarityPercent = $this->stringComparator->getSimilarity(
312 $addedMsg['content'], $deletedMsg['content']
313 );
314
315 $changes->addRename( $targetLanguage, [
316 'key' => $addedMsgKey,
317 'content' => $addedMsg['content']
318 ], [
319 'key' => $deletedMsgKey,
320 'content' => $deletedMsg['content']
321 ], $similarityPercent );
322
323 $deletionsToRemove[] = $deletedMsgKey;
324 $additionsToRemove[] = $addedMsgKey;
325 }
326
327 $changes->removeAdditions( $targetLanguage, $additionsToRemove );
328 $changes->removeDeletions( $targetLanguage, $deletionsToRemove );
329 }
330
336 private function findAndMarkSourceRenames( MessageSourceChange $changes, string $sourceLanguage ): void {
337 // Now check for renames. To identify renames we need to compare
338 // the contents of the added messages with the deleted ones and
339 // identify messages that match.
340 $deletions = $changes->getDeletions( $sourceLanguage );
341 $additions = $changes->getAdditions( $sourceLanguage );
342 if ( $deletions === [] || $additions === [] ) {
343 return;
344 }
345
346 // This array contains a dictionary with matching renames in the following structure -
347 // [ A1|D1 => 1.0, A1|D2 => 0.95, A2|D1 => 0.95 ]
348 $potentialRenames = [];
349 foreach ( $additions as $addedMsg ) {
350 $addedMsgKey = $addedMsg['key'];
351
352 foreach ( $deletions as $deletedMsg ) {
353 $similarityPercent = $this->stringComparator->getSimilarity(
354 $addedMsg['content'], $deletedMsg['content']
355 );
356
357 if ( $changes->areStringsSimilar( $similarityPercent ) ) {
358 $potentialRenames[ $addedMsgKey . '|' . $deletedMsg['key'] ] = $similarityPercent;
359 }
360 }
361 }
362
363 $this->matchRenames( $changes, $potentialRenames, $sourceLanguage );
364 }
365
367 private function addNonSourceRenames(
368 MessageSourceChange $changes,
369 string $key,
370 string $renameKey,
371 string $sourceContent,
372 string $wikiContent,
373 string $language
374 ): void {
375 $addedMsg = [
376 'key' => $renameKey,
377 'content' => $sourceContent
378 ];
379
380 $removedMsg = [
381 'key' => $key,
382 'content' => $wikiContent
383 ];
384
385 $similarityPercent = $this->stringComparator->getSimilarity(
386 $sourceContent, $wikiContent
387 );
388 $changes->addRename( $language, $addedMsg, $removedMsg, $similarityPercent );
389 }
390
398 private function matchRenames( MessageSourceChange $changes, array $trackRename, string $language ): void {
399 arsort( $trackRename, SORT_NUMERIC );
400
401 $alreadyRenamed = $additionsToRemove = $deletionsToRemove = [];
402 foreach ( $trackRename as $key => $similarityPercent ) {
403 [ $addKey, $deleteKey ] = explode( '|', $key, 2 );
404 if ( isset( $alreadyRenamed[ $addKey ] ) || isset( $alreadyRenamed[ $deleteKey ] ) ) {
405 // Already mapped with another name.
406 continue;
407 }
408
409 // Using key should be faster than saving values and searching for them in the array.
410 $alreadyRenamed[ $addKey ] = 1;
411 $alreadyRenamed[ $deleteKey ] = 1;
412
413 $addMsg = $changes->findMessage( $language, $addKey, [ MessageSourceChange::ADDITION ] );
414 $deleteMsg = $changes->findMessage( $language, $deleteKey, [ MessageSourceChange::DELETION ] );
415
416 $changes->addRename( $language, $addMsg, $deleteMsg, $similarityPercent );
417
418 // @phan-suppress-next-line PhanTypeArraySuspiciousNullable
419 $additionsToRemove[] = $addMsg['key'];
420 // @phan-suppress-next-line PhanTypeArraySuspiciousNullable
421 $deletionsToRemove[] = $deleteMsg['key'];
422 }
423
424 $changes->removeAdditions( $language, $additionsToRemove );
425 $changes->removeDeletions( $language, $deletionsToRemove );
426 }
427
433 private function hasCacheEntry(
434 MessageGroupCache $cache,
435 MessageCollection $collection,
436 string $messageKey
437 ): bool {
438 $cacheContent = $cache->get( $messageKey );
439 if ( $cacheContent !== false ) {
440 return true;
441 }
442
443 $cacheUpdateTime = $cache->getUpdateTimestamp();
444 $cacheUpdateTime = $cacheUpdateTime !== false ? MWTimestamp::convert( TS_MW, $cacheUpdateTime ) : false;
445
446 $pageIdentity = $this->pageStore->getPageForLink( $collection->keys()[ $messageKey ] );
447 $oldestRevision = $this->revisionLookup->getFirstRevision( $pageIdentity );
448 $latestRevision = $this->revisionLookup->getRevisionByTitle( $pageIdentity );
449
450 $logger = LoggerFactory::getInstance( 'Translate' );
451 // Here we are checking for the following:
452 // 1. New translation was added for a message on translatewiki.net
453 // 2. Translation was exported
454 // 3. Translation was updated on translatewiki.net
455 // In this case the cache does not have the message
456 if (
457 $cacheUpdateTime !== false &&
458 ( $oldestRevision && $oldestRevision->getTimestamp() < $cacheUpdateTime ) &&
459 ( $latestRevision && $cacheUpdateTime < $latestRevision->getTimestamp() )
460 ) {
461 $logger->info(
462 'Expected cache miss for {messageKey} in language: {language}. Cache update time: {cacheUpdateTime}',
463 [
464 'messageKey' => $messageKey,
465 'language' => $collection->getLanguage(),
466 'cacheUpdateTime' => $cacheUpdateTime,
467 'oldestRevisionTs' => $oldestRevision->getTimestamp(),
468 'latestRevisionTs' => $latestRevision->getTimestamp()
469 ]
470 );
471 return false;
472 }
473
474 $logger->warning(
475 'Unexpected cache miss for {messageKey} in language: {language}. Cache update time: {cacheUpdateTime}',
476 [
477 'messageKey' => $messageKey,
478 'language' => $collection->getLanguage(),
479 'cacheUpdateTime' => $cacheUpdateTime,
480 'oldestRevisionTs' => $oldestRevision ? $oldestRevision->getTimestamp() : 'N/A',
481 'latestRevisionTs' => $latestRevision ? $latestRevision->getTimestamp() : 'N/A'
482 ]
483 );
484 return false;
485 }
486
487}
This class implements default behavior for file based message groups.
Caches messages of file based message group source file.
This file contains the class for core message collections implementation.
Interface for message objects used by MessageCollection.
Definition Message.php:13
Class is used to track the changes made when importing messages from the remote sources using importE...
addAddition( $language, $key, $content)
Add an addition under a message group for a specific language.
getMatchedMessage( $languageCode, $key)
Get matched rename message for a given key.
addChange( $language, $key, $content)
Add a change under a message group for a specific language.
removeChanges( $language, $keysToRemove)
Remove changes for a language under the group.
addDeletion( $language, $key, $content)
Adds a deletion under a message group for a specific language.
processGroup(FileBasedMessageGroup $group)
Finds modifications in external sources compared to wiki state.
addMessageUpdateChanges(FileBasedMessageGroup $group, string $language, MessageSourceChange $changes, int $reason, MessageGroupCache $cache)
This is the detective novel.
Essentially random collection of helper functions, similar to GlobalFunctions.php.
Definition Utilities.php:31
getTranslatableLanguages()
@inheritDoc
getSourceLanguage()
Returns language code depicting the language of source text.
getId()
Returns the unique identifier for this group.
An interface to be implemented by comparators that will compare percentage of similarity between stri...
Finds external changes for file based message groups.