Translate extension for MediaWiki
 
Loading...
Searching...
No Matches
ttmserver-export.php
Go to the documentation of this file.
1<?php
10// Standard boilerplate to define $IP
11if ( getenv( 'MW_INSTALL_PATH' ) !== false ) {
12 $IP = getenv( 'MW_INSTALL_PATH' );
13} else {
14 $dir = __DIR__;
15 $IP = "$dir/../../..";
16}
17require_once "$IP/maintenance/Maintenance.php";
18
23class TTMServerBootstrap extends Maintenance {
25 private $start;
26
27 public function __construct() {
28 parent::__construct();
29 $this->addDescription( 'Script to bootstrap TTMServer.' );
30 $this->addOption(
31 'threads',
32 '(optional) Number of threads',
33 /*required*/false,
34 /*has arg*/true
35 );
36 $this->addOption(
37 'ttmserver',
38 '(optional) Server configuration identifier',
39 /*required*/false,
40 /*has arg*/true
41 );
42 // This option erases all data, empties the index and rebuilds it.
43 $this->addOption(
44 'reindex',
45 'Update the index mapping. Warning: Clears all existing data in the index.'
46 );
47 $this->addOption(
48 'dry-run',
49 'Do not make any changes to the index.'
50 );
51 $this->addOption(
52 'verbose',
53 'Output more status information.'
54 );
55 $this->setBatchSize( 500 );
56 $this->requireExtension( 'Translate' );
57 $this->start = microtime( true );
58 }
59
60 public function statusLine( $text, $channel = null ) {
61 $pid = sprintf( '%5s', getmypid() );
62 $prefix = sprintf( '%6.2f', microtime( true ) - $this->start );
63 $mem = sprintf( '%5.1fM', memory_get_usage( true ) / ( 1024 * 1024 ) );
64 $this->output( "$pid $prefix $mem $text", $channel );
65 }
66
67 public function execute() {
68 global $wgTranslateTranslationServices,
69 $wgTranslateTranslationDefaultService;
70
71 $configKey = $this->getOption( 'ttmserver', $wgTranslateTranslationDefaultService );
72 if ( !isset( $wgTranslateTranslationServices[$configKey] ) ) {
73 $this->fatalError( 'Translation memory is not configured properly' );
74 }
75
76 $dryRun = $this->getOption( 'dry-run' );
77 if ( $dryRun ) {
78 $config = [ 'class' => FakeTTMServer::class ];
79 } else {
80 $config = $wgTranslateTranslationServices[$configKey];
81 }
82
83 $server = $this->getServer( $config );
84 $this->logInfo( "Implementation: " . get_class( $server ) . "\n" );
85
86 // Do as little as possible in the main thread, to not clobber forked processes.
87 // See also #resetStateForFork.
88 $pid = pcntl_fork();
89 if ( $pid === 0 ) {
90 $this->resetStateForFork();
91 $server = $this->getServer( $config );
92 $this->beginBootstrap( $server );
93 exit();
94 } elseif ( $pid === -1 ) {
95 // Fork failed do it serialized
96 $this->beginBootstrap( $server );
97 } else {
98 // Main thread
99 $this->statusLine( "Forked thread $pid to handle bootstrapping\n" );
100 $status = 0;
101 pcntl_waitpid( $pid, $status );
102 // beginBootstrap probably failed, give up.
103 if ( !$this->verifyChildStatus( $pid, $status ) ) {
104 $this->fatalError( 'Bootstrap failed.' );
105 }
106 }
107
108 $hasErrors = false;
109 $threads = $this->getOption( 'threads', 1 );
110 $pids = [];
111
112 $groups = MessageGroups::singleton()->getGroups();
113 foreach ( $groups as $id => $group ) {
115 if ( $group->isMeta() ) {
116 continue;
117 }
118
119 // Fork to increase speed with parallelism. Also helps with memory usage if there are leaks.
120 $pid = pcntl_fork();
121
122 if ( $pid === 0 ) {
123 $this->resetStateForFork();
124 $server = $this->getServer( $config );
125 $this->exportGroup( $group, $server );
126 exit();
127 } elseif ( $pid === -1 ) {
128 // Fork failed do it serialized
129 $this->exportGroup( $group, $server );
130 } else {
131 // Main thread
132 $this->statusLine( "Forked thread $pid to handle $id\n" );
133 $pids[$pid] = true;
134
135 // If we hit the thread limit, wait for any child to finish.
136 if ( count( $pids ) >= $threads ) {
137 $status = 0;
138 $pid = pcntl_wait( $status );
139 $hasErrors = $hasErrors || !$this->verifyChildStatus( $pid, $status );
140 unset( $pids[$pid] );
141 }
142 }
143 }
144
145 // Return control after all threads have finished.
146 foreach ( array_keys( $pids ) as $pid ) {
147 $status = 0;
148 pcntl_waitpid( $pid, $status );
149 $hasErrors = $hasErrors || !$this->verifyChildStatus( $pid, $status );
150 }
151
152 // It's okay to do this in the main thread as it is the last thing
153 $this->endBootstrap( $server );
154
155 if ( $hasErrors ) {
156 $this->fatalError( '!!! Some threads failed. Review the script output !!!' );
157 }
158 }
159
160 private function getServer( array $config ): WritableTTMServer {
161 $server = TTMServer::factory( $config );
162 if ( !$server instanceof WritableTTMServer ) {
163 $this->fatalError( "Service must implement WritableTTMServer" );
164 }
165
166 if ( is_callable( [ $server, 'setLogger' ] ) ) {
167 // Phan, why you so strict?
168 // @phan-suppress-next-line PhanUndeclaredMethod
169 $server->setLogger( $this );
170 }
171
172 if ( $this->getOption( 'reindex', false ) ) {
173 // This doesn't do the update, just sets a flag to do it
174 $server->setDoReIndex();
175 }
176
177 return $server;
178 }
179
180 protected function beginBootstrap( WritableTTMServer $server ) {
181 $this->statusLine( "Cleaning up old entries...\n" );
182 $server->beginBootstrap();
183 }
184
185 protected function endBootstrap( WritableTTMServer $server ) {
186 $this->statusLine( "Optimizing...\n" );
187 $server->endBootstrap();
188 }
189
190 protected function exportGroup( MessageGroup $group, WritableTTMServer $server ) {
191 $times = [
192 'total' => -microtime( true ),
193 'stats' => 0,
194 'init' => 0,
195 'trans' => 0,
196 ];
197 $countItems = 0;
198
199 $id = $group->getId();
200 $sourceLanguage = $group->getSourceLanguage();
201
202 $times[ 'stats' ] -= microtime( true );
203 $stats = MessageGroupStats::forGroup( $id );
204 $times[ 'stats' ] += microtime( true );
205
206 $times[ 'init' ] -= microtime( true );
207 $collection = $group->initCollection( $sourceLanguage );
208 $collection->filter( 'ignored' );
209 $collection->initMessages();
210
211 $server->beginBatch();
212 $inserts = [];
213 foreach ( $collection->keys() as $mkey => $titleValue ) {
214 $title = Title::newFromLinkTarget( $titleValue );
215 $handle = new MessageHandle( $title );
216 $inserts[] = [ $handle, $sourceLanguage, $collection[$mkey]->definition() ];
217 $countItems++;
218 }
219
220 while ( $inserts !== [] ) {
221 $batch = array_splice( $inserts, 0, $this->mBatchSize );
222 $server->batchInsertDefinitions( $batch );
223 }
224 $inserts = [];
225 $times[ 'init' ] += microtime( true );
226
227 $times[ 'trans' ] -= microtime( true );
228 foreach ( $stats as $targetLanguage => $numbers ) {
229 if ( $targetLanguage === $sourceLanguage ) {
230 continue;
231 }
232 if ( $numbers[MessageGroupStats::TRANSLATED] === 0 ) {
233 continue;
234 }
235
236 $collection->resetForNewLanguage( $targetLanguage );
237 $collection->filter( 'ignored' );
238 $collection->filter( 'translated', false );
239 $collection->loadTranslations();
240
241 foreach ( $collection->keys() as $mkey => $titleValue ) {
242 $title = Title::newFromLinkTarget( $titleValue );
243 $handle = new MessageHandle( $title );
244 $inserts[] = [ $handle, $sourceLanguage, $collection[$mkey]->translation() ];
245 $countItems++;
246 }
247
248 while ( count( $inserts ) >= $this->mBatchSize ) {
249 $batch = array_splice( $inserts, 0, $this->mBatchSize );
250 $server->batchInsertTranslations( $batch );
251 }
252 }
253
254 while ( $inserts !== [] ) {
255 $batch = array_splice( $inserts, 0, $this->mBatchSize );
256 $server->batchInsertTranslations( $batch );
257 }
258
259 $server->endBatch();
260 $times[ 'trans' ] += microtime( true );
261 $times[ 'total' ] += microtime( true );
262
263 if ( $countItems !== 0 ) {
264 $debug = sprintf(
265 "Total %.1f s for %d items >> stats/init/trans %%: %d/%d/%d >> %.1f ms/item",
266 $times[ 'total' ],
267 $countItems,
268 $times[ 'stats'] / $times[ 'total' ] * 100,
269 $times[ 'init'] / $times[ 'total' ] * 100,
270 $times[ 'trans'] / $times[ 'total' ] * 100,
271 $times[ 'total' ] / $countItems * 1000
272 );
273 $this->logInfo( "Finished exporting $id. $debug\n" );
274 }
275 }
276
277 private function logInfo( string $text ) {
278 if ( $this->getOption( 'verbose', false ) ) {
279 $this->statusLine( $text );
280 }
281 }
282
283 protected function resetStateForFork() {
284 // Make sure all existing connections are dead,
285 // we can't use them in forked children.
286 MediaWiki\MediaWikiServices::resetChildProcessServices();
287 // Temporary workaround for https://phabricator.wikimedia.org/T258860.
288 // This script just moves data around, so skipping the message cache should not
289 // cause any major issues. Things like message documentation language name and
290 // main page name were being checked from the message cache and sometimes failing.
291 MediaWiki\MediaWikiServices::getInstance()->getMessageCache()->disable();
292 }
293
294 private function verifyChildStatus( int $pid, int $status ): bool {
295 if ( pcntl_wifexited( $status ) ) {
296 $code = pcntl_wexitstatus( $status );
297 if ( $code ) {
298 $this->output( "Pid $pid exited with status $code !!\n" );
299 return false;
300 }
301 } elseif ( pcntl_wifsignaled( $status ) ) {
302 $signum = pcntl_wtermsig( $status );
303 $this->output( "Pid $pid terminated by signal $signum !!\n" );
304 return false;
305 }
306
307 return true;
308 }
309}
310
311$maintClass = TTMServerBootstrap::class;
312require_once RUN_MAINTENANCE_IF_MAIN;
Class for pointing to messages, like Title class is for titles.
Script to bootstrap TTMServer translation memory.
Interface for message groups.
initCollection( $code)
Initialises a message collection with the given language code, message definitions and message tags.
getSourceLanguage()
Returns language code depicting the language of source text.
getId()
Returns the unique identifier for this group.
Interface for TTMServer that can be updated.
endBatch()
Called before every batch (MessageGroup).
batchInsertDefinitions(array $batch)
Called multiple times per batch if necessary.
batchInsertTranslations(array $batch)
Called multiple times per batch if necessary.
beginBatch()
Called before every batch (MessageGroup).
beginBootstrap()
Called when starting to fill the translation memory.
endBootstrap()
Do any cleanup, optimizing etc.