Translate extension for MediaWiki
 
Loading...
Searching...
No Matches
ExternalMessageSourceStateComparator.php
1<?php
2
16
19 public const ALL_LANGUAGES = 'all languages';
20
22 protected $stringComparator;
23
25 public function __construct( StringComparator $stringComparator ) {
26 $this->stringComparator = $stringComparator;
27 }
28
52 public function processGroup( FileBasedMessageGroup $group, $languages ) {
53 $changes = new MessageSourceChange();
54 $processAll = $languages === self::ALL_LANGUAGES;
55
56 if ( $processAll ) {
57 $languages = $group->getTranslatableLanguages() ??
58 // This means all languages
59 Utilities::getLanguageNames( 'en' );
60
61 $languages = array_keys( $languages );
62 } elseif ( !is_array( $languages ) ) {
63 throw new InvalidArgumentException( 'Invalid input given for $languages' );
64 }
65
66 // Process the source language before others. Source language might not
67 // be included in $group->getTranslatableLanguages(). The expected
68 // behavior is that source language is always processed when given
69 // self::ALL_LANGUAGES.
70 $sourceLanguage = $group->getSourceLanguage();
71 $index = array_search( $sourceLanguage, $languages );
72 if ( $processAll || $index !== false ) {
73 unset( $languages[$index] );
74 $this->processLanguage( $group, $sourceLanguage, $changes );
75 }
76
77 foreach ( $languages as $language ) {
78 $this->processLanguage( $group, $language, $changes );
79 }
80
81 return $changes;
82 }
83
84 protected function processLanguage(
85 FileBasedMessageGroup $group, $language, MessageSourceChange $changes
86 ) {
87 $cache = $group->getMessageGroupCache( $language );
88 $reason = 0;
89 if ( !$cache->isValid( $reason ) ) {
90 $this->addMessageUpdateChanges( $group, $language, $changes, $reason, $cache );
91
92 if ( $changes->getModificationsForLanguage( $language ) === [] ) {
93 /* Update the cache immediately if file and wiki state match.
94 * Otherwise the cache will get outdated compared to file state
95 * and will give false positive conflicts later. */
96 $cache->create();
97 }
98 }
99 }
100
122 protected function addMessageUpdateChanges(
123 FileBasedMessageGroup $group, $language, MessageSourceChange $changes, $reason, $cache
124 ) {
125 // initCollection returns empty list before first import
126 $wiki = $group->initCollection( $language );
127 $wiki->filter( 'hastranslation', false );
128 $wiki->loadTranslations();
129 $wikiKeys = $wiki->getMessageKeys();
130
131 $sourceLanguage = $group->getSourceLanguage();
132 // By-pass cached message definitions
133 $ffs = $group->getFFS();
134 if ( $language === $sourceLanguage && !$ffs->exists( $language ) ) {
135 $path = $group->getSourceFilePath( $language );
136 throw new RuntimeException( "Source message file for {$group->getId()} does not exist: $path" );
137 }
138
139 $file = $ffs->read( $language );
140
141 // Does not exist
142 if ( $file === false ) {
143 return;
144 }
145
146 // Something went wrong
147 if ( !isset( $file['MESSAGES'] ) ) {
148 $id = $group->getId();
149 $ffsClass = get_class( $ffs );
150
151 error_log( "$id has an FFS ($ffsClass) - it didn't return cake for $language" );
152
153 return;
154 }
155
156 $fileKeys = array_keys( $file['MESSAGES'] );
157
158 $common = array_intersect( $fileKeys, $wikiKeys );
159
160 $supportsFuzzy = $ffs->supportsFuzzy();
161 $changesToRemove = [];
162
163 foreach ( $common as $key ) {
164 $sourceContent = $file['MESSAGES'][$key];
166 $wikiMessage = $wiki[$key];
167 $wikiContent = $wikiMessage->translation();
168
169 // @todo: Fuzzy checking can also be moved to $ffs->isContentEqual();
170 // If FFS doesn't support it, ignore fuzziness as difference
171 $wikiContent = str_replace( TRANSLATE_FUZZY, '', $wikiContent );
172
173 // But if it does, ensure we have exactly one fuzzy marker prefixed
174 if ( $supportsFuzzy === 'yes' && $wikiMessage->hasTag( 'fuzzy' ) ) {
175 $wikiContent = TRANSLATE_FUZZY . $wikiContent;
176 }
177
178 if ( $ffs->isContentEqual( $sourceContent, $wikiContent ) ) {
179 // File and wiki stage agree, nothing to do
180 continue;
181 }
182
183 // Check against interim cache to see whether we have changes
184 // in the wiki, in the file or both.
185
186 if ( $reason !== MessageGroupCache::NO_CACHE ) {
187 $cacheContent = $cache->get( $key );
188
189 /* We want to ignore the common situation that the string
190 * in the wiki has been changed since the last export.
191 * Hence we check that source === cache && cache !== wiki
192 * and if so we skip this string. */
193 if (
194 !$ffs->isContentEqual( $wikiContent, $cacheContent ) &&
195 $ffs->isContentEqual( $sourceContent, $cacheContent )
196 ) {
197 continue;
198 }
199 }
200
201 if ( $language !== $sourceLanguage ) {
202 // Assuming that this is the old key, lets check if it has a corresponding
203 // rename in the source language. The key of the matching message will be
204 // the new renamed key.
205 $renameMsg = $changes->getMatchedMessage( $sourceLanguage, $key );
206 if ( $renameMsg !== null ) {
207 // Rename present in source language but this message has a content change
208 // with the OLD key in a non-source language. We will not process this
209 // here but add it as a rename instead. This way, the key will be renamed
210 // and then the content updated.
211 $this->addNonSourceRenames(
212 $changes, $key, $renameMsg['key'], $sourceContent, $wikiContent, $language
213 );
214 $changesToRemove[] = $key;
215 continue;
216 }
217 }
218 $changes->addChange( $language, $key, $sourceContent );
219 }
220
221 $changes->removeChanges( $language, $changesToRemove );
222
223 $added = array_diff( $fileKeys, $wikiKeys );
224 foreach ( $added as $key ) {
225 $sourceContent = $file['MESSAGES'][$key];
226 $changes->addAddition( $language, $key, $sourceContent );
227 }
228
229 /* Should the cache not exist, don't consider the messages
230 * missing from the file as deleted - they probably aren't
231 * yet exported. For example new language translations are
232 * exported the first time. */
233 if ( $reason !== MessageGroupCache::NO_CACHE ) {
234 $deleted = array_diff( $wikiKeys, $fileKeys );
235 foreach ( $deleted as $key ) {
236 if ( $cache->get( $key ) === false ) {
237 /* This message has never existed in the cache, so it
238 * must be a newly made in the wiki. */
239 continue;
240 }
241 $changes->addDeletion( $language, $key, $wiki[$key]->translation() );
242 }
243 }
244
245 if ( $language === $sourceLanguage ) {
246 $this->findAndMarkSourceRenames( $changes, $language );
247 } else {
248 // Non source language
249 $this->checkNonSourceAdditionsForRename(
250 $changes, $sourceLanguage, $language, $wiki, $wikiKeys
251 );
252 }
253 }
254
264 private function checkNonSourceAdditionsForRename(
265 MessageSourceChange $changes, $sourceLanguage, $targetLanguage, MessageCollection $wiki, $wikiKeys
266 ) {
267 $additions = $changes->getAdditions( $targetLanguage );
268 if ( $additions === [] ) {
269 return;
270 }
271
272 $additionsToRemove = [];
273 $deletionsToRemove = [];
274 foreach ( $additions as $addedMsg ) {
275 $addedMsgKey = $addedMsg['key'];
276
277 // Check if this key is renamed in source.
278 $renamedSourceMsg = $changes->findMessage(
279 $sourceLanguage, $addedMsgKey, [ MessageSourceChange::RENAME ]
280 );
281
282 if ( $renamedSourceMsg === null ) {
283 continue;
284 }
285
286 // Since this key is new, and is present in the renames for the source language,
287 // we will add it as a rename.
288 $deletedSource = $changes->getMatchedMessage( $sourceLanguage, $renamedSourceMsg['key'] );
289 if ( $deletedSource === null ) {
290 continue;
291 }
292 $deletedMsgKey = $deletedSource['key'];
293 $deletedMsg = $changes->findMessage(
294 $targetLanguage, $deletedMsgKey, [ MessageSourceChange::DELETION ]
295 );
296
297 // Sometimes when the cache does not have the translations, the deleted message
298 // is not added in the translations. It is also possible that for this non-source
299 // language the key has not been removed.
300 if ( $deletedMsg === null ) {
301 $content = '';
302 if ( in_array( $deletedMsgKey, $wikiKeys ) ) {
303 $content = $wiki[ $deletedMsgKey ]->translation();
304 }
305 $deletedMsg = [
306 'key' => $deletedMsgKey,
307 'content' => $content
308 ];
309 }
310
311 $similarityPercent = $this->stringComparator->getSimilarity(
312 $addedMsg['content'], $deletedMsg['content']
313 );
314
315 $changes->addRename( $targetLanguage, [
316 'key' => $addedMsgKey,
317 'content' => $addedMsg['content']
318 ], [
319 'key' => $deletedMsgKey,
320 'content' => $deletedMsg['content']
321 ], $similarityPercent );
322
323 $deletionsToRemove[] = $deletedMsgKey;
324 $additionsToRemove[] = $addedMsgKey;
325 }
326
327 $changes->removeAdditions( $targetLanguage, $additionsToRemove );
328 $changes->removeDeletions( $targetLanguage, $deletionsToRemove );
329 }
330
338 private function findAndMarkSourceRenames( MessageSourceChange $changes, $sourceLanguage ) {
339 // Now check for renames. To identify renames we need to compare
340 // the contents of the added messages with the deleted ones and
341 // identify messages that match.
342 $deletions = $changes->getDeletions( $sourceLanguage );
343 $additions = $changes->getAdditions( $sourceLanguage );
344 if ( $deletions === [] || $additions === [] ) {
345 return;
346 }
347
348 // This array contains a dictionary with matching renames in the following structure -
349 // [ A1|D1 => 1.0, A1|D2 => 0.95, A2|D1 => 0.95 ]
350 $potentialRenames = [];
351 foreach ( $additions as $addedMsg ) {
352 $addedMsgKey = $addedMsg['key'];
353
354 foreach ( $deletions as $deletedMsg ) {
355 $similarityPercent = $this->stringComparator->getSimilarity(
356 $addedMsg['content'], $deletedMsg['content']
357 );
358
359 if ( $changes->areStringsSimilar( $similarityPercent ) ) {
360 $potentialRenames[ $addedMsgKey . '|' . $deletedMsg['key'] ] = $similarityPercent;
361 }
362 }
363 }
364
365 $this->matchRenames( $changes, $potentialRenames, $sourceLanguage );
366 }
367
377 private function addNonSourceRenames(
378 MessageSourceChange $changes, $key, $renameKey, $sourceContent, $wikiContent, $language
379 ) {
380 $addedMsg = [
381 'key' => $renameKey,
382 'content' => $sourceContent
383 ];
384
385 $removedMsg = [
386 'key' => $key,
387 'content' => $wikiContent
388 ];
389
390 $similarityPercent = $this->stringComparator->getSimilarity(
391 $sourceContent, $wikiContent
392 );
393 $changes->addRename( $language, $addedMsg, $removedMsg, $similarityPercent );
394 }
395
406 private function matchRenames( MessageSourceChange $changes, array $trackRename, $language ) {
407 arsort( $trackRename, SORT_NUMERIC );
408
409 $alreadyRenamed = $additionsToRemove = $deletionsToRemove = [];
410 foreach ( $trackRename as $key => $similarityPercent ) {
411 [ $addKey, $deleteKey ] = explode( '|', $key, 2 );
412 if ( isset( $alreadyRenamed[ $addKey ] ) || isset( $alreadyRenamed[ $deleteKey ] ) ) {
413 // Already mapped with another name.
414 continue;
415 }
416
417 // Using key should be faster than saving values and searching for them in the array.
418 $alreadyRenamed[ $addKey ] = 1;
419 $alreadyRenamed[ $deleteKey ] = 1;
420
421 $addMsg = $changes->findMessage( $language, $addKey, [ MessageSourceChange::ADDITION ] );
422 $deleteMsg = $changes->findMessage( $language, $deleteKey, [ MessageSourceChange::DELETION ] );
423
424 $changes->addRename( $language, $addMsg, $deleteMsg, $similarityPercent );
425
426 // @phan-suppress-next-line PhanTypeArraySuspiciousNullable
427 $additionsToRemove[] = $addMsg['key'];
428 // @phan-suppress-next-line PhanTypeArraySuspiciousNullable
429 $deletionsToRemove[] = $deleteMsg['key'];
430 }
431
432 $changes->removeAdditions( $language, $additionsToRemove );
433 $changes->removeDeletions( $language, $deletionsToRemove );
434 }
435}
const ALL_LANGUAGES
Process all languages supported by the message group.
addMessageUpdateChanges(FileBasedMessageGroup $group, $language, MessageSourceChange $changes, $reason, $cache)
This is the detective novel.
processGroup(FileBasedMessageGroup $group, $languages)
Finds modifications in external sources compared to wiki state.
This class implements default behavior for file based message groups.
initCollection( $code)
@inheritDoc
This file contains the class for core message collections implementation.
Interface for message objects used by MessageCollection.
Definition Message.php:13
Class is used to track the changes made when importing messages from the remote sources using importE...
addRename( $language, $addedMessage, $deletedMessage, $similarity=0)
Adds a rename under a message group for a specific language.
removeAdditions( $language, $keysToRemove)
Remove additions for a language under the group.
addAddition( $language, $key, $content)
Add an addition under a message group for a specific language.
findMessage( $language, $key, $possibleStates=[], &$modificationType=null)
Finds a message with the given key across different types of modifications.
getMatchedMessage( $languageCode, $key)
Get matched rename message for a given key.
addChange( $language, $key, $content)
Add a change under a message group for a specific language.
getModificationsForLanguage( $language)
Get all for a language under the group.
removeDeletions( $language, $keysToRemove)
Remove deletions for a language under the group.
getAdditions( $language)
Fetch additions for a message group under a language.
removeChanges( $language, $keysToRemove)
Remove changes for a language under the group.
getDeletions( $language)
Fetch deletions for a message group under a language.
addDeletion( $language, $key, $content)
Adds a deletion under a message group for a specific language.
areStringsSimilar( $similarity)
Checks if the similarity percent passed passes the min threshold.
Essentially random collection of helper functions, similar to GlobalFunctions.php.
Definition Utilities.php:31
getTranslatableLanguages()
@inheritDoc
getSourceLanguage()
Returns language code depicting the language of source text.
getId()
Returns the unique identifier for this group.
An interface to be implemented by comparators that will compare percentage of similarity between stri...