Translate extension for MediaWiki
 
Loading...
Searching...
No Matches
ExportTtmServerDumpMaintenanceScript.php
1<?php
2declare( strict_types = 1 );
3
4namespace MediaWiki\Extension\Translate\TtmServer;
5
6use FormatJson;
7use Language;
12use MediaWiki\MediaWikiServices;
13use MessageGroup;
16use WikiMap;
17
25 private $contentLanguage;
26
27 public function __construct() {
28 parent::__construct();
29 $this->addDescription( 'Creates a dump file that can be imported to a TTMServer' );
30
31 $this->addOption(
32 'output-directory',
33 'Which directory to output files to',
34 self::REQUIRED,
35 self::HAS_ARG,
36 'o'
37 );
38 $this->addOption(
39 'threads',
40 'How many threads to use',
41 self::OPTIONAL,
42 self::HAS_ARG,
43 'n'
44 );
45
46 $availableMethods = array_keys( $this->getAvailableCompressionWrappers() );
47 $values = count( $availableMethods ) ? implode( ', ', $availableMethods ) : 'NONE';
48 $this->addOption(
49 'compress',
50 "Use a compression filter. Possible values: $values",
51 self::OPTIONAL,
52 self::HAS_ARG,
53 'c'
54 );
55
56 $this->requireExtension( 'Translate' );
57 }
58
60 private function getAvailableCompressionWrappers(): array {
61 $out = [];
62 $filters = stream_get_filters();
63 foreach ( $filters as $f ) {
64 if ( preg_match( '/^compress\..+$/', $f ) ) {
65 $out[$f] = $f . '://';
66 }
67 }
68 return $out;
69 }
70
71 public function execute() {
72 $this->contentLanguage = MediaWikiServices::getInstance()->getContentLanguage();
73
74 $threads = (int)$this->getOption( 'threads', 1 );
75 $outputDir = $this->getOption( 'output-directory' );
76 $requestedWrapper = $this->getOption( 'compress' );
77 $availableWrappers = $this->getAvailableCompressionWrappers();
78 if ( $requestedWrapper && !isset( $availableWrappers[$requestedWrapper] ) ) {
79 $this->fatalError(
80 "Compression wrapper '$requestedWrapper' is not supported"
81 );
82 }
83 $wrapper = $availableWrappers[$requestedWrapper] ?? '';
84 $suffix = $requestedWrapper ? ".$requestedWrapper" : '';
85
86 $executor = new ParallelExecutor( $threads );
87
88 $groups = $this->getGroupsInPerformanceOrder();
89 foreach ( $groups as $groupId => $group ) {
90 $path = $wrapper . rtrim( $outputDir, '/' ) . '/' . $groupId . '.json' . $suffix;
91
92 $executor->runInParallel(
93 function ( int $pid ) use ( $groupId ) {
94 $this->output( "Forked process $pid to process $groupId\n" );
95 },
96 function () use ( $group, $path ) {
97 $output = FormatJson::encode(
98 $this->getOutput( $group ),
99 true,
100 FormatJson::ALL_OK
101 );
102 file_put_contents( $path, $output );
103 }
104 );
105 }
106
107 $this->output( "Done.\n" );
108 }
109
120 private function getGroupsInPerformanceOrder(): array {
121 $groupStats = MessageGroupStats::forLanguage(
122 $this->contentLanguage->getCode(),
123 MessageGroupStats::FLAG_CACHE_ONLY
124 );
125
126 uasort(
127 $groupStats,
128 function ( array $a, array $b ): int {
129 return -1 * $this->sortGroupsBySize( $a, $b );
130 }
131 );
132
133 $groups = [];
134 foreach ( array_keys( $groupStats ) as $groupId ) {
135 $group = MessageGroups::getGroup( $groupId );
136 if ( $group->isMeta() ) {
137 continue;
138 }
139
140 $groups[$group->getId()] = $group;
141 }
142
143 return $groups;
144 }
145
146 private function sortGroupsBySize( array $a, array $b ): int {
147 return $a[MessageGroupStats::TOTAL] <=> $b[MessageGroupStats::TOTAL];
148 }
149
150 private function getOutput( MessageGroup $group ): array {
151 $out = [];
152
153 $groupId = $group->getId();
154 $sourceLanguage = $group->getSourceLanguage();
155
156 $stats = MessageGroupStats::forGroup( $groupId );
157 $collection = $group->initCollection( $sourceLanguage );
158 foreach ( $stats as $language => $numbers ) {
159 if ( $numbers[MessageGroupStats::TRANSLATED] === 0 ) {
160 continue;
161 }
162
163 $collection->resetForNewLanguage( $language );
164 $collection->filter( 'ignored' );
165 $collection->filter( 'translated', false );
166 $collection->loadTranslations();
167
168 foreach ( $collection->keys() as $mkey => $titleValue ) {
169 $handle = new MessageHandle( $titleValue );
171 $message = $collection[$mkey];
172
173 if ( !isset( $out[$mkey] ) ) {
174 $out[$mkey] = [
175 'wikiId' => WikiMap::getCurrentWikiId(),
176 'title' => $handle->getTitleForBase()->getPrefixedText(),
177 'sourceLanguage' => $sourceLanguage,
178 'primaryGroup' => $groupId,
179 'values' => [],
180 ];
181 }
182
183 $out[$mkey]['values'][] = [
184 'language' => $language,
185 'value' => $message->translation(),
186 'revision' => $message->getProperty( 'revision' ),
187 ];
188 }
189 }
190
191 return array_values( $out );
192 }
193}
Factory class for accessing message groups individually by id or all of them as a list.
Interface for message objects used by MessageCollection.
Definition Message.php:13
Constants for making code for maintenance scripts more readable.
Helper class for maintenance scripts to run things in parallel.
This class abstract MessageGroup statistics calculation and storing.
Class for pointing to messages, like Title class is for titles.
Interface for message groups.
initCollection( $code)
Initialises a message collection with the given language code, message definitions and message tags.
getSourceLanguage()
Returns language code depicting the language of source text.
getId()
Returns the unique identifier for this group.