Translate extension for MediaWiki
 
Loading...
Searching...
No Matches
ExportTtmServerDumpMaintenanceScript.php
1<?php
2declare( strict_types = 1 );
3
4namespace MediaWiki\Extension\Translate\TtmServer;
5
13use MediaWiki\Json\FormatJson;
14use MediaWiki\Language\Language;
15use MediaWiki\MediaWikiServices;
16use MediaWiki\WikiMap\WikiMap;
17use MessageGroup;
18
26 private $contentLanguage;
27
28 public function __construct() {
29 parent::__construct();
30 $this->addDescription( 'Creates a dump file that can be imported to a TtmServer' );
31
32 $this->addOption(
33 'output-directory',
34 'Which directory to output files to',
35 self::REQUIRED,
36 self::HAS_ARG,
37 'o'
38 );
39 $this->addOption(
40 'threads',
41 'How many threads to use',
42 self::OPTIONAL,
43 self::HAS_ARG,
44 'n'
45 );
46
47 $availableMethods = array_keys( $this->getAvailableCompressionWrappers() );
48 $values = count( $availableMethods ) ? implode( ', ', $availableMethods ) : 'NONE';
49 $this->addOption(
50 'compress',
51 "Use a compression filter. Possible values: $values",
52 self::OPTIONAL,
53 self::HAS_ARG,
54 'c'
55 );
56
57 $this->requireExtension( 'Translate' );
58 }
59
61 private function getAvailableCompressionWrappers(): array {
62 $out = [];
63 $filters = stream_get_filters();
64 foreach ( $filters as $f ) {
65 if ( preg_match( '/^compress\..+$/', $f ) ) {
66 $out[$f] = $f . '://';
67 }
68 }
69 return $out;
70 }
71
72 public function execute() {
73 $this->contentLanguage = MediaWikiServices::getInstance()->getContentLanguage();
74
75 $threads = (int)$this->getOption( 'threads', 1 );
76 $outputDir = $this->getOption( 'output-directory' );
77 $requestedWrapper = $this->getOption( 'compress' );
78 $availableWrappers = $this->getAvailableCompressionWrappers();
79 if ( $requestedWrapper && !isset( $availableWrappers[$requestedWrapper] ) ) {
80 $this->fatalError(
81 "Compression wrapper '$requestedWrapper' is not supported"
82 );
83 }
84 $wrapper = $availableWrappers[$requestedWrapper] ?? '';
85 $suffix = $requestedWrapper ? ".$requestedWrapper" : '';
86
87 $executor = new ParallelExecutor( $threads );
88
89 $groups = $this->getGroupsInPerformanceOrder();
90 foreach ( $groups as $groupId => $group ) {
91 $path = $wrapper . rtrim( $outputDir, '/' ) . '/' . $groupId . '.json' . $suffix;
92
93 $executor->runInParallel(
94 function ( int $pid ) use ( $groupId ) {
95 $this->output( "Forked process $pid to process $groupId\n" );
96 },
97 function () use ( $group, $path ) {
98 $output = FormatJson::encode(
99 $this->getOutput( $group ),
100 true,
101 FormatJson::ALL_OK
102 );
103 file_put_contents( $path, $output );
104 }
105 );
106 }
107
108 $this->output( "Done.\n" );
109 }
110
121 private function getGroupsInPerformanceOrder(): array {
122 $groupStats = MessageGroupStats::forLanguage(
123 $this->contentLanguage->getCode(),
124 MessageGroupStats::FLAG_CACHE_ONLY
125 );
126
127 uasort(
128 $groupStats,
129 function ( array $a, array $b ): int {
130 return -1 * $this->sortGroupsBySize( $a, $b );
131 }
132 );
133
134 $groups = [];
135 foreach ( array_keys( $groupStats ) as $groupId ) {
136 $group = MessageGroups::getGroup( $groupId );
137 if ( $group->isMeta() ) {
138 continue;
139 }
140
141 $groups[$group->getId()] = $group;
142 }
143
144 return $groups;
145 }
146
147 private function sortGroupsBySize( array $a, array $b ): int {
148 return $a[MessageGroupStats::TOTAL] <=> $b[MessageGroupStats::TOTAL];
149 }
150
151 private function getOutput( MessageGroup $group ): array {
152 $out = [];
153
154 $groupId = $group->getId();
155 $sourceLanguage = $group->getSourceLanguage();
156
157 $stats = MessageGroupStats::forGroup( $groupId );
158 $collection = $group->initCollection( $sourceLanguage );
159 foreach ( $stats as $language => $numbers ) {
160 if ( $numbers[MessageGroupStats::TRANSLATED] === 0 ) {
161 continue;
162 }
163
164 $collection->resetForNewLanguage( $language );
165 $collection->filter( MessageCollection::FILTER_IGNORED, MessageCollection::EXCLUDE_MATCHING );
166 $collection->filter( MessageCollection::FILTER_TRANSLATED, MessageCollection::INCLUDE_MATCHING );
167 $collection->loadTranslations();
168
169 foreach ( $collection->keys() as $mkey => $titleValue ) {
170 $handle = new MessageHandle( $titleValue );
172 $message = $collection[$mkey];
173
174 if ( !isset( $out[$mkey] ) ) {
175 $out[$mkey] = [
176 'wikiId' => WikiMap::getCurrentWikiId(),
177 'title' => $handle->getTitleForBase()->getPrefixedText(),
178 'sourceLanguage' => $sourceLanguage,
179 'primaryGroup' => $groupId,
180 'values' => [],
181 ];
182 }
183
184 $out[$mkey]['values'][] = [
185 'language' => $language,
186 'value' => $message->translation(),
187 'revision' => $message->getProperty( 'revision' ),
188 ];
189 }
190 }
191
192 return array_values( $out );
193 }
194}
Factory class for accessing message groups individually by id or all of them as a list.
This file contains the class for core message collections implementation.
Class for pointing to messages, like Title class is for titles.
Interface for message objects used by MessageCollection.
Definition Message.php:13
This class aims to provide efficient mechanism for fetching translation completion stats.
Base maintenance script containing constants and methods used in multiple scripts Hopefully the const...
Helper class for maintenance scripts to run things in parallel.
Interface for message groups.
initCollection( $code)
Initialises a message collection with the given language code, message definitions and message tags.
getSourceLanguage()
Returns language code depicting the language of source text.
getId()
Returns the unique identifier for this group.