Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
0.00% |
0 / 108 |
|
0.00% |
0 / 6 |
CRAP | |
0.00% |
0 / 1 |
ExportTtmServerDumpMaintenanceScript | |
0.00% |
0 / 108 |
|
0.00% |
0 / 6 |
380 | |
0.00% |
0 / 1 |
__construct | |
0.00% |
0 / 26 |
|
0.00% |
0 / 1 |
6 | |||
getAvailableCompressionWrappers | |
0.00% |
0 / 6 |
|
0.00% |
0 / 1 |
12 | |||
execute | |
0.00% |
0 / 29 |
|
0.00% |
0 / 1 |
30 | |||
getGroupsInPerformanceOrder | |
0.00% |
0 / 17 |
|
0.00% |
0 / 1 |
12 | |||
sortGroupsBySize | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
getOutput | |
0.00% |
0 / 29 |
|
0.00% |
0 / 1 |
30 |
1 | <?php |
2 | declare( strict_types = 1 ); |
3 | |
4 | namespace MediaWiki\Extension\Translate\TtmServer; |
5 | |
6 | use FormatJson; |
7 | use Language; |
8 | use MediaWiki\Extension\Translate\MessageGroupProcessing\MessageGroups; |
9 | use MediaWiki\Extension\Translate\MessageLoading\Message; |
10 | use MediaWiki\Extension\Translate\MessageLoading\MessageHandle; |
11 | use MediaWiki\Extension\Translate\Utilities\BaseMaintenanceScript; |
12 | use MediaWiki\Extension\Translate\Utilities\ParallelExecutor; |
13 | use MediaWiki\MediaWikiServices; |
14 | use MediaWiki\WikiMap\WikiMap; |
15 | use MessageGroup; |
16 | use MessageGroupStats; |
17 | |
18 | /** |
19 | * @since 2020.11 |
20 | * @license GPL-2.0-or-later |
21 | * @author Niklas Laxström |
22 | */ |
23 | class ExportTtmServerDumpMaintenanceScript extends BaseMaintenanceScript { |
24 | /** @var Language */ |
25 | private $contentLanguage; |
26 | |
27 | public function __construct() { |
28 | parent::__construct(); |
29 | $this->addDescription( 'Creates a dump file that can be imported to a TTMServer' ); |
30 | |
31 | $this->addOption( |
32 | 'output-directory', |
33 | 'Which directory to output files to', |
34 | self::REQUIRED, |
35 | self::HAS_ARG, |
36 | 'o' |
37 | ); |
38 | $this->addOption( |
39 | 'threads', |
40 | 'How many threads to use', |
41 | self::OPTIONAL, |
42 | self::HAS_ARG, |
43 | 'n' |
44 | ); |
45 | |
46 | $availableMethods = array_keys( $this->getAvailableCompressionWrappers() ); |
47 | $values = count( $availableMethods ) ? implode( ', ', $availableMethods ) : 'NONE'; |
48 | $this->addOption( |
49 | 'compress', |
50 | "Use a compression filter. Possible values: $values", |
51 | self::OPTIONAL, |
52 | self::HAS_ARG, |
53 | 'c' |
54 | ); |
55 | |
56 | $this->requireExtension( 'Translate' ); |
57 | } |
58 | |
59 | /** @return string[] */ |
60 | private function getAvailableCompressionWrappers(): array { |
61 | $out = []; |
62 | $filters = stream_get_filters(); |
63 | foreach ( $filters as $f ) { |
64 | if ( preg_match( '/^compress\..+$/', $f ) ) { |
65 | $out[$f] = $f . '://'; |
66 | } |
67 | } |
68 | return $out; |
69 | } |
70 | |
71 | public function execute() { |
72 | $this->contentLanguage = MediaWikiServices::getInstance()->getContentLanguage(); |
73 | |
74 | $threads = (int)$this->getOption( 'threads', 1 ); |
75 | $outputDir = $this->getOption( 'output-directory' ); |
76 | $requestedWrapper = $this->getOption( 'compress' ); |
77 | $availableWrappers = $this->getAvailableCompressionWrappers(); |
78 | if ( $requestedWrapper && !isset( $availableWrappers[$requestedWrapper] ) ) { |
79 | $this->fatalError( |
80 | "Compression wrapper '$requestedWrapper' is not supported" |
81 | ); |
82 | } |
83 | $wrapper = $availableWrappers[$requestedWrapper] ?? ''; |
84 | $suffix = $requestedWrapper ? ".$requestedWrapper" : ''; |
85 | |
86 | $executor = new ParallelExecutor( $threads ); |
87 | |
88 | $groups = $this->getGroupsInPerformanceOrder(); |
89 | foreach ( $groups as $groupId => $group ) { |
90 | $path = $wrapper . rtrim( $outputDir, '/' ) . '/' . $groupId . '.json' . $suffix; |
91 | |
92 | $executor->runInParallel( |
93 | function ( int $pid ) use ( $groupId ) { |
94 | $this->output( "Forked process $pid to process $groupId\n" ); |
95 | }, |
96 | function () use ( $group, $path ) { |
97 | $output = FormatJson::encode( |
98 | $this->getOutput( $group ), |
99 | true, |
100 | FormatJson::ALL_OK |
101 | ); |
102 | file_put_contents( $path, $output ); |
103 | } |
104 | ); |
105 | } |
106 | |
107 | $this->output( "Done.\n" ); |
108 | } |
109 | |
110 | /** |
111 | * Return groups sorted by number of messages. |
112 | * |
113 | * For parallel processing, it makes sense to process large groups first so that smaller |
114 | * ones can execute in parallel threads, rather than waiting for large group(s) to process |
115 | * while other threads have nothing to do. Do not spend time on gathering statistics in case |
116 | * they are not present. |
117 | * |
118 | * @return MessageGroup[] |
119 | */ |
120 | private function getGroupsInPerformanceOrder(): array { |
121 | $groupStats = MessageGroupStats::forLanguage( |
122 | $this->contentLanguage->getCode(), |
123 | MessageGroupStats::FLAG_CACHE_ONLY |
124 | ); |
125 | |
126 | uasort( |
127 | $groupStats, |
128 | function ( array $a, array $b ): int { |
129 | return -1 * $this->sortGroupsBySize( $a, $b ); |
130 | } |
131 | ); |
132 | |
133 | $groups = []; |
134 | foreach ( array_keys( $groupStats ) as $groupId ) { |
135 | $group = MessageGroups::getGroup( $groupId ); |
136 | if ( $group->isMeta() ) { |
137 | continue; |
138 | } |
139 | |
140 | $groups[$group->getId()] = $group; |
141 | } |
142 | |
143 | return $groups; |
144 | } |
145 | |
146 | private function sortGroupsBySize( array $a, array $b ): int { |
147 | return $a[MessageGroupStats::TOTAL] <=> $b[MessageGroupStats::TOTAL]; |
148 | } |
149 | |
150 | private function getOutput( MessageGroup $group ): array { |
151 | $out = []; |
152 | |
153 | $groupId = $group->getId(); |
154 | $sourceLanguage = $group->getSourceLanguage(); |
155 | |
156 | $stats = MessageGroupStats::forGroup( $groupId ); |
157 | $collection = $group->initCollection( $sourceLanguage ); |
158 | foreach ( $stats as $language => $numbers ) { |
159 | if ( $numbers[MessageGroupStats::TRANSLATED] === 0 ) { |
160 | continue; |
161 | } |
162 | |
163 | $collection->resetForNewLanguage( $language ); |
164 | $collection->filter( 'ignored' ); |
165 | $collection->filter( 'translated', false ); |
166 | $collection->loadTranslations(); |
167 | |
168 | foreach ( $collection->keys() as $mkey => $titleValue ) { |
169 | $handle = new MessageHandle( $titleValue ); |
170 | /** @var Message $message */ |
171 | $message = $collection[$mkey]; |
172 | |
173 | if ( !isset( $out[$mkey] ) ) { |
174 | $out[$mkey] = [ |
175 | 'wikiId' => WikiMap::getCurrentWikiId(), |
176 | 'title' => $handle->getTitleForBase()->getPrefixedText(), |
177 | 'sourceLanguage' => $sourceLanguage, |
178 | 'primaryGroup' => $groupId, |
179 | 'values' => [], |
180 | ]; |
181 | } |
182 | |
183 | $out[$mkey]['values'][] = [ |
184 | 'language' => $language, |
185 | 'value' => $message->translation(), |
186 | 'revision' => $message->getProperty( 'revision' ), |
187 | ]; |
188 | } |
189 | } |
190 | |
191 | return array_values( $out ); |
192 | } |
193 | } |