Translate extension for MediaWiki
 
Loading...
Searching...
No Matches
ExportTranslationsMaintenanceScript.php
1<?php
2
4
13use MediaWiki\Logger\LoggerFactory;
14use MediaWiki\MediaWikiServices;
15use MessageGroup;
16
27 private const ACTION_DELETE = 'delete';
29 private const ACTION_CREATE = 'create';
31 private const ACTION_UPDATE = 'update';
32
33 public function __construct() {
34 parent::__construct();
35 $this->addDescription( 'Export translations to files.' );
36
37 $this->addOption(
38 'group',
39 'Comma separated list of message group IDs (supports * wildcard) to export',
40 self::REQUIRED,
41 self::HAS_ARG
42 );
43 $this->addOption(
44 'lang',
45 'Comma separated list of language codes to export or * for all languages',
46 self::REQUIRED,
47 self::HAS_ARG
48 );
49 $this->addOption(
50 'always-export-languages',
51 '(optional) Comma separated list of languages to export ignoring export threshold',
52 self::OPTIONAL,
53 self::HAS_ARG
54 );
55 $this->addOption(
56 'never-export-languages',
57 '(optional) Comma separated list of languages to never export (overrides everything else)',
58 self::OPTIONAL,
59 self::HAS_ARG
60 );
61 $this->addOption(
62 'skip-source-language',
63 '(optional) Do not export the source language of each message group',
64 self::OPTIONAL,
65 self::NO_ARG
66 );
67 $this->addOption(
68 'target',
69 'Target directory for exported files',
70 self::REQUIRED,
71 self::HAS_ARG
72 );
73 $this->addOption(
74 'skip',
75 '(deprecated) See --never-export-languages',
76 self::OPTIONAL,
77 self::HAS_ARG
78 );
79 $this->addOption(
80 'skipgroup',
81 '(optional) Comma separated list of message group IDs (supports * wildcard) to not export',
82 self::OPTIONAL,
83 self::HAS_ARG
84 );
85 $this->addOption(
86 'threshold',
87 '(optional) Threshold for translation completion percentage that must be exceeded for initial export',
88 self::OPTIONAL,
89 self::HAS_ARG
90 );
91 $this->addOption(
92 'removal-threshold',
93 '(optional) Threshold for translation completion percentage that must be exceeded to keep the file',
94 self::OPTIONAL,
95 self::HAS_ARG
96 );
97 $this->addOption(
98 'no-fuzzy',
99 '(optional) Do not include any messages marked as fuzzy/outdated'
100 );
101 $this->addOption(
102 'offline-gettext-format',
103 '(optional) Export languages in offline Gettext format. Give a file pattern with '
104 . '%GROUPID% and %CODE%. Empty pattern defaults to %GROUPID%/%CODE%.po.',
105 self::OPTIONAL,
106 self::HAS_ARG
107 );
108 $this->addOption(
109 'skip-group-sync-check',
110 '(optional) Skip exporting group if synchronization is still in progress or if there ' .
111 'was an error during synchronization. See: ' .
112 'https://www.mediawiki.org/wiki/Help:Extension:Translate/Group_management#Strong_synchronization'
113 );
114
115 $this->requireExtension( 'Translate' );
116 }
117
118 public function execute() {
119 $logger = LoggerFactory::getInstance( LogNames::GROUP_SYNCHRONIZATION );
120 $groupPattern = $this->getOption( 'group' ) ?? '';
121 $groupSkipPattern = $this->getOption( 'skipgroup' ) ?? '';
122 $skipGroupSyncCheck = $this->hasOption( 'skip-group-sync-check' );
123
124 $logger->info(
125 'Starting exports for groups {groups}',
126 [ 'groups' => $groupPattern ]
127 );
128 $exportStartTime = microtime( true );
129
130 $target = $this->getOption( 'target' );
131 if ( !is_writable( $target ) ) {
132 $this->fatalError( "Target directory is not writable ($target)." );
133 }
134
135 $exportThreshold = $this->getOption( 'threshold' );
136 $removalThreshold = $this->getOption( 'removal-threshold' );
137 $noFuzzy = $this->hasOption( 'no-fuzzy' );
138 $requestedLanguages = $this->parseLanguageCodes( $this->getOption( 'lang' ) );
139 $alwaysExportLanguages = $this->csv2array(
140 $this->getOption( 'always-export-languages' ) ?? ''
141 );
142 $neverExportLanguages = $this->csv2array(
143 $this->getOption( 'never-export-languages' ) ??
144 $this->getOption( 'skip' ) ??
145 ''
146 );
147 $skipSourceLanguage = $this->hasOption( 'skip-source-language' );
148
149 $forOffline = $this->hasOption( 'offline-gettext-format' );
150 $offlineTargetPattern = $this->getOption( 'offline-gettext-format' ) ?: "%GROUPID%/%CODE%.po";
151
152 $groups = $this->getMessageGroups( $groupPattern, $groupSkipPattern, $forOffline );
153 if ( $groups === [] ) {
154 $this->fatalError( 'EE1: No valid message groups identified.' );
155 }
156
157 $groupSyncCacheEnabled = MediaWikiServices::getInstance()->getMainConfig()
158 ->get( 'TranslateGroupSynchronizationCache' );
159 $groupSyncCache = Services::getInstance()->getGroupSynchronizationCache();
160
161 foreach ( $groups as $groupId => $group ) {
162 if ( $groupSyncCacheEnabled && !$skipGroupSyncCheck ) {
163 if ( !$this->canGroupBeExported( $groupSyncCache, $groupId ) ) {
164 continue;
165 }
166 }
167
168 if ( $exportThreshold !== null || $removalThreshold !== null ) {
169 $logger->info( 'Calculating stats for group {groupId}', [ 'groupId' => $groupId ] );
170 $tStartTime = microtime( true );
171
172 $languageExportActions = $this->getLanguageExportActions(
173 $groupId,
174 $requestedLanguages,
175 $alwaysExportLanguages,
176 (int)$exportThreshold,
177 (int)$removalThreshold
178 );
179
180 $tEndTime = microtime( true );
181 $logger->info(
182 'Finished calculating stats for group {groupId}. Time: {duration} secs',
183 [
184 'groupId' => $groupId,
185 'duration' => round( $tEndTime - $tStartTime, 3 ),
186 ]
187 );
188 } else {
189 // Convert list to an associative array
190 $languageExportActions = array_fill_keys( $requestedLanguages, self::ACTION_CREATE );
191
192 foreach ( $alwaysExportLanguages as $code ) {
193 $languageExportActions[ $code ] = self::ACTION_CREATE;
194 }
195 }
196
197 foreach ( $neverExportLanguages as $code ) {
198 unset( $languageExportActions[ $code ] );
199 }
200
201 if ( $skipSourceLanguage ) {
202 unset( $languageExportActions[ $group->getSourceLanguage() ] );
203 }
204
205 if ( $languageExportActions === [] ) {
206 continue;
207 }
208
209 $this->output( "Exporting group $groupId\n" );
210 $logger->info( 'Exporting group {groupId}', [ 'groupId' => $groupId ] );
211
212 if ( $forOffline ) {
213 $fileBasedGroup = FileBasedMessageGroup::newFromMessageGroup( $group, $offlineTargetPattern );
214 $fileFormat = new GettextFormat( $fileBasedGroup );
215 $fileFormat->setOfflineMode( true );
216 } else {
217 $fileBasedGroup = $group;
218 // At this point $group should be an instance of FileBasedMessageGroup
219 // This is primarily to keep linting tools / IDE happy.
220 if ( !$fileBasedGroup instanceof FileBasedMessageGroup ) {
221 $this->fatalError( "EE2: Unexportable message group $groupId" );
222 }
223 $fileFormat = $fileBasedGroup->getFFS();
224 }
225
226 $fileFormat->setWritePath( $target );
227 $sourceLanguage = $group->getSourceLanguage();
228 $collection = $group->initCollection( $sourceLanguage );
229
230 $inclusionList = $group->getTranslatableLanguages();
231
232 $langExportTimes = [
233 'collection' => 0,
234 'ffs' => 0,
235 ];
236
237 $languagesExportedCount = 0;
238
239 $langStartTime = microtime( true );
240 foreach ( $languageExportActions as $lang => $action ) {
241 // Do not export languages that are excluded (or not included).
242 // Also check that inclusion list is not null, which means that all
243 // languages are allowed for translation and export.
244 if ( is_array( $inclusionList ) && !isset( $inclusionList[$lang] ) ) {
245 continue;
246 }
247
248 $targetFilePath = $target . '/' . $fileBasedGroup->getTargetFilename( $lang );
249 if ( $action === self::ACTION_DELETE ) {
250 // phpcs:ignore Generic.PHP.NoSilencedErrors.Discouraged
251 @$ok = unlink( $targetFilePath );
252 if ( $ok ) {
253 $logger->info( "Removed $targetFilePath due to removal threshold" );
254 }
255 continue;
256 } elseif ( $action === self::ACTION_UPDATE && !file_exists( $targetFilePath ) ) {
257 // Language is under export threshold, do not export yet
258 $logger->info( "Not creating $targetFilePath due to export threshold" );
259 continue;
260 }
261
262 $startTime = microtime( true );
263 $collection->resetForNewLanguage( $lang );
264 $collection->loadTranslations();
265 // Don't export ignored, unless it is the source language
266 // or message documentation
267 global $wgTranslateDocumentationLanguageCode;
268 if ( $lang !== $wgTranslateDocumentationLanguageCode
269 && $lang !== $sourceLanguage
270 ) {
271 $collection->filter( MessageCollection::FILTER_IGNORED, MessageCollection::EXCLUDE_MATCHING );
272 }
273
274 if ( $noFuzzy ) {
275 $collection->filter( MessageCollection::FILTER_FUZZY, MessageCollection::EXCLUDE_MATCHING );
276 }
277
278 $languagesExportedCount++;
279
280 $endTime = microtime( true );
281 $langExportTimes['collection'] += ( $endTime - $startTime );
282
283 $startTime = microtime( true );
284 $fileFormat->write( $collection );
285 $endTime = microtime( true );
286 $langExportTimes['ffs'] += ( $endTime - $startTime );
287 }
288 $langEndTime = microtime( true );
289
290 $logger->info(
291 'Done exporting {count} languages for group {groupId}. Time taken {duration} secs.',
292 [
293 'count' => $languagesExportedCount,
294 'groupId' => $groupId,
295 'duration' => round( $langEndTime - $langStartTime, 3 ),
296 ]
297 );
298
299 foreach ( $langExportTimes as $type => $time ) {
300 $logger->info(
301 'Time taken by "{type}" for group {groupId} – {duration} secs.',
302 [
303 'groupId' => $groupId,
304 'type' => $type,
305 'duration' => round( $time, 3 ),
306 ]
307 );
308 }
309 }
310
311 $exportEndTime = microtime( true );
312 $logger->info(
313 'Finished export process for groups {groups}. Time: {duration} secs.',
314 [
315 'groups' => $groupPattern,
316 'duration' => round( $exportEndTime - $exportStartTime, 3 ),
317 ]
318 );
319 }
320
322 private function getMessageGroups(
323 string $groupPattern,
324 string $excludePattern,
325 bool $forOffline
326 ): array {
327 $groupIds = MessageGroups::expandWildcards( explode( ',', trim( $groupPattern ) ) );
328 $groups = MessageGroups::getGroupsById( $groupIds );
329 if ( !$forOffline ) {
330 foreach ( $groups as $groupId => $group ) {
331 if ( $group->isMeta() ) {
332 $this->output( "Skipping meta message group $groupId.\n" );
333 unset( $groups[$groupId] );
334 continue;
335 }
336
337 if ( !$group instanceof FileBasedMessageGroup ) {
338 $this->output( "EE2: Unexportable message group $groupId.\n" );
339 unset( $groups[$groupId] );
340 }
341 }
342 }
343
344 $skipIds = MessageGroups::expandWildcards( explode( ',', trim( $excludePattern ) ) );
345 foreach ( $skipIds as $groupId ) {
346 if ( isset( $groups[$groupId] ) ) {
347 unset( $groups[$groupId] );
348 $this->output( "Group $groupId is in skipgroup.\n" );
349 }
350 }
351
352 return $groups;
353 }
354
356 private function getLanguageExportActions(
357 string $groupId,
358 array $requestedLanguages,
359 array $alwaysExportLanguages,
360 int $exportThreshold = 0,
361 int $removalThreshold = 0
362 ): array {
363 $stats = MessageGroupStats::forGroup( $groupId );
364
365 $languages = [];
366
367 foreach ( $requestedLanguages as $code ) {
368 // Statistics unavailable. This should only happen if unknown language code requested.
369 if ( !isset( $stats[$code] ) ) {
370 continue;
371 }
372
373 $total = $stats[$code][MessageGroupStats::TOTAL];
374 $translated = $stats[$code][MessageGroupStats::TRANSLATED];
375 $percentage = $total ? $translated / $total * 100 : 0;
376
377 if ( $percentage === 0 || $percentage < $removalThreshold ) {
378 $languages[$code] = self::ACTION_DELETE;
379 } elseif ( $percentage > $exportThreshold ) {
380 $languages[$code] = self::ACTION_CREATE;
381 } else {
382 $languages[$code] = self::ACTION_UPDATE;
383 }
384 }
385
386 foreach ( $alwaysExportLanguages as $code ) {
387 $languages[$code] = self::ACTION_CREATE;
388 // DWIM: Do not export languages with zero translations, even if requested
389 if ( ( $stats[$code][MessageGroupStats::TRANSLATED] ?? null ) === 0 ) {
390 $languages[$code] = self::ACTION_DELETE;
391 }
392 }
393
394 return $languages;
395 }
396
397 private function canGroupBeExported( GroupSynchronizationCache $groupSyncCache, string $groupId ): bool {
398 if ( $groupSyncCache->isGroupBeingProcessed( $groupId ) ) {
399 $this->error( "Group $groupId is currently being synchronized; skipping exports\n" );
400 return false;
401 }
402
403 if ( $groupSyncCache->groupHasErrors( $groupId ) ) {
404 $this->error( "Skipping $groupId due to synchronization error\n" );
405 return false;
406 }
407
408 if ( $groupSyncCache->isGroupInReview( $groupId ) ) {
409 $this->error( "Group $groupId is currently in review. Review changes on Special:ManageMessageGroups\n" );
410 return false;
411 }
412 return true;
413 }
414
416 private function csv2array( string $input ): array {
417 return array_filter(
418 array_map( 'trim', explode( ',', $input ) ),
419 static function ( $v ) {
420 return $v !== '';
421 }
422 );
423 }
424
426 private function parseLanguageCodes( string $input ): array {
427 if ( $input === '*' ) {
428 $languageNameUtils = MediaWikiServices::getInstance()->getLanguageNameUtils();
429 $languages = $languageNameUtils->getLanguageNames();
430 ksort( $languages );
431 return array_keys( $languages );
432 }
433
434 return $this->csv2array( $input );
435 }
436}
This class implements default behavior for file based message groups.
FileFormat class that implements support for gettext file format.
Constants for log channel names used in this extension.
Definition LogNames.php:13
const GROUP_SYNCHRONIZATION
Channel for message group synchronization.
Definition LogNames.php:18
Factory class for accessing message groups individually by id or all of them as a list.
This file contains the class for core message collections implementation.
Minimal service container.
Definition Services.php:59
This class aims to provide efficient mechanism for fetching translation completion stats.
isGroupBeingProcessed(string $groupId)
Check if the group is in synchronization.
Base maintenance script containing constants and methods used in multiple scripts Hopefully the const...
Interface for message groups.
Finds external changes for file based message groups.