Translate extension for MediaWiki
 
Loading...
Searching...
No Matches
ExternalMessageSourceStateComparator.php
1<?php
2
15
18 public const ALL_LANGUAGES = 'all languages';
19
21 protected $stringComparator;
22
24 public function __construct( StringComparator $stringComparator ) {
25 $this->stringComparator = $stringComparator;
26 }
27
51 public function processGroup( FileBasedMessageGroup $group, $languages ) {
52 $changes = new MessageSourceChange();
53 $processAll = false;
54
55 if ( $languages === self::ALL_LANGUAGES ) {
56 $processAll = true;
57 $languages = $group->getTranslatableLanguages();
58
59 // This means all languages
60 if ( $languages === null ) {
61 $languages = Utilities::getLanguageNames( 'en' );
62 }
63
64 $languages = array_keys( $languages );
65 } elseif ( !is_array( $languages ) ) {
66 throw new InvalidArgumentException( 'Invalid input given for $languages' );
67 }
68
69 // Process the source language before others. Source language might not
70 // be included in $group->getTranslatableLanguages(). The expected
71 // behavior is that source language is always processed when given
72 // self::ALL_LANGUAGES.
73 $sourceLanguage = $group->getSourceLanguage();
74 $index = array_search( $sourceLanguage, $languages );
75 if ( $processAll || $index !== false ) {
76 unset( $languages[$index] );
77 $this->processLanguage( $group, $sourceLanguage, $changes );
78 }
79
80 foreach ( $languages as $language ) {
81 $this->processLanguage( $group, $language, $changes );
82 }
83
84 return $changes;
85 }
86
87 protected function processLanguage(
88 FileBasedMessageGroup $group, $language, MessageSourceChange $changes
89 ) {
90 $cache = $group->getMessageGroupCache( $language );
91 $reason = 0;
92 if ( !$cache->isValid( $reason ) ) {
93 $this->addMessageUpdateChanges( $group, $language, $changes, $reason, $cache );
94
95 if ( $changes->getModificationsForLanguage( $language ) === [] ) {
96 /* Update the cache immediately if file and wiki state match.
97 * Otherwise the cache will get outdated compared to file state
98 * and will give false positive conflicts later. */
99 $cache->create();
100 }
101 }
102 }
103
125 protected function addMessageUpdateChanges(
126 FileBasedMessageGroup $group, $language, MessageSourceChange $changes, $reason, $cache
127 ) {
128 // initCollection returns empty list before first import
129 $wiki = $group->initCollection( $language );
130 $wiki->filter( 'hastranslation', false );
131 $wiki->loadTranslations();
132 $wikiKeys = $wiki->getMessageKeys();
133
134 $sourceLanguage = $group->getSourceLanguage();
135 // By-pass cached message definitions
136 $ffs = $group->getFFS();
137 if ( $language === $sourceLanguage && !$ffs->exists( $language ) ) {
138 $path = $group->getSourceFilePath( $language );
139 throw new RuntimeException( "Source message file for {$group->getId()} does not exist: $path" );
140 }
141
142 $file = $ffs->read( $language );
143
144 // Does not exist
145 if ( $file === false ) {
146 return;
147 }
148
149 // Something went wrong
150 if ( !isset( $file['MESSAGES'] ) ) {
151 $id = $group->getId();
152 $ffsClass = get_class( $ffs );
153
154 error_log( "$id has an FFS ($ffsClass) - it didn't return cake for $language" );
155
156 return;
157 }
158
159 $fileKeys = array_keys( $file['MESSAGES'] );
160
161 $common = array_intersect( $fileKeys, $wikiKeys );
162
163 $supportsFuzzy = $ffs->supportsFuzzy();
164 $changesToRemove = [];
165
166 foreach ( $common as $key ) {
167 $sourceContent = $file['MESSAGES'][$key];
169 $wikiMessage = $wiki[$key];
170 $wikiContent = $wikiMessage->translation();
171
172 // @todo: Fuzzy checking can also be moved to $ffs->isContentEqual();
173 // If FFS doesn't support it, ignore fuzziness as difference
174 $wikiContent = str_replace( TRANSLATE_FUZZY, '', $wikiContent );
175
176 // But if it does, ensure we have exactly one fuzzy marker prefixed
177 if ( $supportsFuzzy === 'yes' && $wikiMessage->hasTag( 'fuzzy' ) ) {
178 $wikiContent = TRANSLATE_FUZZY . $wikiContent;
179 }
180
181 if ( $ffs->isContentEqual( $sourceContent, $wikiContent ) ) {
182 // File and wiki stage agree, nothing to do
183 continue;
184 }
185
186 // Check against interim cache to see whether we have changes
187 // in the wiki, in the file or both.
188
189 if ( $reason !== MessageGroupCache::NO_CACHE ) {
190 $cacheContent = $cache->get( $key );
191
192 /* We want to ignore the common situation that the string
193 * in the wiki has been changed since the last export.
194 * Hence we check that source === cache && cache !== wiki
195 * and if so we skip this string. */
196 if (
197 !$ffs->isContentEqual( $wikiContent, $cacheContent ) &&
198 $ffs->isContentEqual( $sourceContent, $cacheContent )
199 ) {
200 continue;
201 }
202 }
203
204 if ( $language !== $sourceLanguage ) {
205 // Assuming that this is the old key, lets check if it has a corresponding
206 // rename in the source language. The key of the matching message will be
207 // the new renamed key.
208 $renameMsg = $changes->getMatchedMessage( $sourceLanguage, $key );
209 if ( $renameMsg !== null ) {
210 // Rename present in source language but this message has a content change
211 // with the OLD key in a non-source language. We will not process this
212 // here but add it as a rename instead. This way, the key will be renamed
213 // and then the content updated.
214 $this->addNonSourceRenames(
215 $changes, $key, $renameMsg['key'], $sourceContent, $wikiContent, $language
216 );
217 $changesToRemove[] = $key;
218 continue;
219 }
220 }
221 $changes->addChange( $language, $key, $sourceContent );
222 }
223
224 $changes->removeChanges( $language, $changesToRemove );
225
226 $added = array_diff( $fileKeys, $wikiKeys );
227 foreach ( $added as $key ) {
228 $sourceContent = $file['MESSAGES'][$key];
229 $changes->addAddition( $language, $key, $sourceContent );
230 }
231
232 /* Should the cache not exist, don't consider the messages
233 * missing from the file as deleted - they probably aren't
234 * yet exported. For example new language translations are
235 * exported the first time. */
236 if ( $reason !== MessageGroupCache::NO_CACHE ) {
237 $deleted = array_diff( $wikiKeys, $fileKeys );
238 foreach ( $deleted as $key ) {
239 if ( $cache->get( $key ) === false ) {
240 /* This message has never existed in the cache, so it
241 * must be a newly made in the wiki. */
242 continue;
243 }
244 $changes->addDeletion( $language, $key, $wiki[$key]->translation() );
245 }
246 }
247
248 if ( $language === $sourceLanguage ) {
249 $this->findAndMarkSourceRenames( $changes, $language );
250 } else {
251 // Non source language
252 $this->checkNonSourceAdditionsForRename(
253 $changes, $sourceLanguage, $language, $wiki, $wikiKeys
254 );
255 }
256 }
257
267 private function checkNonSourceAdditionsForRename(
268 MessageSourceChange $changes, $sourceLanguage, $targetLanguage, MessageCollection $wiki, $wikiKeys
269 ) {
270 $additions = $changes->getAdditions( $targetLanguage );
271 if ( $additions === [] ) {
272 return;
273 }
274
275 $additionsToRemove = [];
276 $deletionsToRemove = [];
277 foreach ( $additions as $addedMsg ) {
278 $addedMsgKey = $addedMsg['key'];
279
280 // Check if this key is renamed in source.
281 $renamedSourceMsg = $changes->findMessage(
282 $sourceLanguage, $addedMsgKey, [ MessageSourceChange::RENAME ]
283 );
284
285 if ( $renamedSourceMsg === null ) {
286 continue;
287 }
288
289 // Since this key is new, and is present in the renames for the source language,
290 // we will add it as a rename.
291 $deletedSource = $changes->getMatchedMessage( $sourceLanguage, $renamedSourceMsg['key'] );
292 $deletedMsgKey = $deletedSource['key'];
293 $deletedMsg = $changes->findMessage(
294 $targetLanguage, $deletedMsgKey, [ MessageSourceChange::DELETION ]
295 );
296
297 // Sometimes when the cache does not have the translations, the deleted message
298 // is not added in the translations. It is also possible that for this non-source
299 // language the key has not been removed.
300 if ( $deletedMsg === null ) {
301 $content = '';
302 if ( in_array( $deletedMsgKey, $wikiKeys ) ) {
303 $content = $wiki[ $deletedMsgKey ]->translation();
304 }
305 $deletedMsg = [
306 'key' => $deletedMsgKey,
307 'content' => $content
308 ];
309 }
310
311 $similarityPercent = $this->stringComparator->getSimilarity(
312 $addedMsg['content'], $deletedMsg['content']
313 );
314
315 $changes->addRename( $targetLanguage, [
316 'key' => $addedMsgKey,
317 'content' => $addedMsg['content']
318 ], [
319 'key' => $deletedMsgKey,
320 'content' => $deletedMsg['content']
321 ], $similarityPercent );
322
323 $deletionsToRemove[] = $deletedMsgKey;
324 $additionsToRemove[] = $addedMsgKey;
325 }
326
327 $changes->removeAdditions( $targetLanguage, $additionsToRemove );
328 $changes->removeDeletions( $targetLanguage, $deletionsToRemove );
329 }
330
338 private function findAndMarkSourceRenames( MessageSourceChange $changes, $sourceLanguage ) {
339 // Now check for renames. To identify renames we need to compare
340 // the contents of the added messages with the deleted ones and
341 // identify messages that match.
342 $deletions = $changes->getDeletions( $sourceLanguage );
343 $additions = $changes->getAdditions( $sourceLanguage );
344 if ( $deletions === [] || $additions === [] ) {
345 return;
346 }
347
348 // This array contains a dictionary with matching renames in the following structure -
349 // [ A1|D1 => 1.0, A1|D2 => 0.95, A2|D1 => 0.95 ]
350 $potentialRenames = [];
351 foreach ( $additions as $addedMsg ) {
352 $addedMsgKey = $addedMsg['key'];
353
354 foreach ( $deletions as $deletedMsg ) {
355 $similarityPercent = $this->stringComparator->getSimilarity(
356 $addedMsg['content'], $deletedMsg['content']
357 );
358
359 if ( $changes->areStringsSimilar( $similarityPercent ) ) {
360 $potentialRenames[ $addedMsgKey . '|' . $deletedMsg['key'] ] = $similarityPercent;
361 }
362 }
363 }
364
365 $this->matchRenames( $changes, $potentialRenames, $sourceLanguage );
366 }
367
377 private function addNonSourceRenames(
378 MessageSourceChange $changes, $key, $renameKey, $sourceContent, $wikiContent, $language
379 ) {
380 $addedMsg = [
381 'key' => $renameKey,
382 'content' => $sourceContent
383 ];
384
385 $removedMsg = [
386 'key' => $key,
387 'content' => $wikiContent
388 ];
389
390 $similarityPercent = $this->stringComparator->getSimilarity(
391 $sourceContent, $wikiContent
392 );
393 $changes->addRename( $language, $addedMsg, $removedMsg, $similarityPercent );
394 }
395
406 private function matchRenames( MessageSourceChange $changes, array $trackRename, $language ) {
407 arsort( $trackRename, SORT_NUMERIC );
408
409 $alreadyRenamed = $additionsToRemove = $deletionsToRemove = [];
410 foreach ( $trackRename as $key => $similarityPercent ) {
411 list( $addKey, $deleteKey ) = explode( '|', $key, 2 );
412 if ( isset( $alreadyRenamed[ $addKey ] ) || isset( $alreadyRenamed[ $deleteKey ] ) ) {
413 // Already mapped with another name.
414 continue;
415 }
416
417 // Using key should be faster than saving values and searching for them in the array.
418 $alreadyRenamed[ $addKey ] = 1;
419 $alreadyRenamed[ $deleteKey ] = 1;
420
421 $addMsg = $changes->findMessage( $language, $addKey, [ MessageSourceChange::ADDITION ] );
422 $deleteMsg = $changes->findMessage( $language, $deleteKey, [ MessageSourceChange::DELETION ] );
423
424 $changes->addRename( $language, $addMsg, $deleteMsg, $similarityPercent );
425
426 // @phan-suppress-next-line PhanTypeArraySuspiciousNullable
427 $additionsToRemove[] = $addMsg['key'];
428 // @phan-suppress-next-line PhanTypeArraySuspiciousNullable
429 $deletionsToRemove[] = $deleteMsg['key'];
430 }
431
432 $changes->removeAdditions( $language, $additionsToRemove );
433 $changes->removeDeletions( $language, $deletionsToRemove );
434 }
435}
const ALL_LANGUAGES
Process all languages supported by the message group.
addMessageUpdateChanges(FileBasedMessageGroup $group, $language, MessageSourceChange $changes, $reason, $cache)
This is the detective novel.
processGroup(FileBasedMessageGroup $group, $languages)
Finds modifications in external sources compared to wiki state.
This class implements default behavior for file based message groups.
initCollection( $code)
@inheritDoc
This file contains the class for core message collections implementation.
Class is used to track the changes made when importing messages from the remote sources using process...
addRename( $language, $addedMessage, $deletedMessage, $similarity=0)
Adds a rename under a message group for a specific language.
removeAdditions( $language, $keysToRemove)
Remove additions for a language under the group.
addAddition( $language, $key, $content)
Add an addition under a message group for a specific language.
findMessage( $language, $key, $possibleStates=[], &$modificationType=null)
Finds a message with the given key across different types of modifications.
getMatchedMessage( $languageCode, $key)
Get matched rename message for a given key.
addChange( $language, $key, $content)
Add a change under a message group for a specific language.
getModificationsForLanguage( $language)
Get all for a language under the group.
removeDeletions( $language, $keysToRemove)
Remove deletions for a language under the group.
getAdditions( $language)
Fetch additions for a message group under a language.
removeChanges( $language, $keysToRemove)
Remove changes for a language under the group.
getDeletions( $language)
Fetch deletions for a message group under a language.
addDeletion( $language, $key, $content)
Adds a deletion under a message group for a specific language.
areStringsSimilar( $similarity)
Checks if the similarity percent passed passes the min threshold.
Essentially random collection of helper functions, similar to GlobalFunctions.php.
Definition Utilities.php:30
getTranslatableLanguages()
@inheritDoc
getSourceLanguage()
Returns language code depicting the language of source text.
getId()
Returns the unique identifier for this group.
An interface to be implemented by comparators that will compare percentage of similarity between stri...