Translate extension for MediaWiki
 
Loading...
Searching...
No Matches
ttmserver-export.php
Go to the documentation of this file.
1<?php
11
12// Standard boilerplate to define $IP
13if ( getenv( 'MW_INSTALL_PATH' ) !== false ) {
14 $IP = getenv( 'MW_INSTALL_PATH' );
15} else {
16 $dir = __DIR__;
17 $IP = "$dir/../../..";
18}
19require_once "$IP/maintenance/Maintenance.php";
20
25class TTMServerBootstrap extends Maintenance {
27 private $start;
28
29 public function __construct() {
30 parent::__construct();
31 $this->addDescription( 'Script to bootstrap TTMServer.' );
32 $this->addOption(
33 'threads',
34 '(optional) Number of threads',
35 /*required*/false,
36 /*has arg*/true
37 );
38 $this->addOption(
39 'ttmserver',
40 '(optional) Server configuration identifier',
41 /*required*/false,
42 /*has arg*/true
43 );
44 // This option erases all data, empties the index and rebuilds it.
45 $this->addOption(
46 'reindex',
47 'Update the index mapping. Warning: Clears all existing data in the index.'
48 );
49 $this->addOption(
50 'dry-run',
51 'Do not make any changes to the index.'
52 );
53 $this->addOption(
54 'verbose',
55 'Output more status information.'
56 );
57 $this->addOption(
58 'clean',
59 'Only run setup and and cleanup. Skip inserting content.'
60 );
61 $this->setBatchSize( 500 );
62 $this->requireExtension( 'Translate' );
63 $this->start = microtime( true );
64 }
65
66 public function statusLine( $text, $channel = null ) {
67 $pid = sprintf( '%5s', getmypid() );
68 $prefix = sprintf( '%6.2f', microtime( true ) - $this->start );
69 $mem = sprintf( '%5.1fM', memory_get_usage( true ) / ( 1024 * 1024 ) );
70 $this->output( "$pid $prefix $mem $text", $channel );
71 }
72
73 public function execute() {
74 global $wgTranslateTranslationServices,
75 $wgTranslateTranslationDefaultService;
76
77 $configKey = $this->getOption( 'ttmserver', $wgTranslateTranslationDefaultService );
78 if ( !isset( $wgTranslateTranslationServices[$configKey] ) ) {
79 $this->fatalError( 'Translation memory is not configured properly' );
80 }
81
82 $dryRun = $this->getOption( 'dry-run' );
83 if ( $dryRun ) {
84 $config = [ 'class' => FakeTTMServer::class ];
85 } else {
86 $config = $wgTranslateTranslationServices[$configKey];
87 }
88
89 $server = $this->getServer( $config );
90 $this->logInfo( "Implementation: " . get_class( $server ) . "\n" );
91
92 // Do as little as possible in the main thread, to not clobber forked processes.
93 // See also #resetStateForFork.
94 $pid = pcntl_fork();
95 if ( $pid === 0 ) {
96 $this->resetStateForFork();
97 $server = $this->getServer( $config );
98 $this->beginBootstrap( $server );
99 exit();
100 } elseif ( $pid === -1 ) {
101 // Fork failed do it serialized
102 $this->beginBootstrap( $server );
103 } else {
104 // Main thread
105 $this->statusLine( "Forked thread $pid to handle bootstrapping\n" );
106 $status = 0;
107 pcntl_waitpid( $pid, $status );
108 // beginBootstrap probably failed, give up.
109 if ( !$this->verifyChildStatus( $pid, $status ) ) {
110 $this->fatalError( 'Bootstrap failed.' );
111 }
112 }
113
114 $hasErrors = false;
115 $threads = $this->getOption( 'threads', 1 );
116 $pids = [];
117
118 if ( $this->hasOption( 'clean' ) ) {
119 $groups = [];
120 } else {
121 $groups = MessageGroups::singleton()->getGroups();
122 }
123 foreach ( $groups as $id => $group ) {
125 if ( $group->isMeta() ) {
126 continue;
127 }
128
129 // Fork to increase speed with parallelism. Also helps with memory usage if there are leaks.
130 $pid = pcntl_fork();
131
132 if ( $pid === 0 ) {
133 $this->resetStateForFork();
134 $server = $this->getServer( $config );
135 $this->exportGroup( $group, $server );
136 exit();
137 } elseif ( $pid === -1 ) {
138 // Fork failed do it serialized
139 $this->exportGroup( $group, $server );
140 } else {
141 // Main thread
142 $this->statusLine( "Forked thread $pid to handle $id\n" );
143 $pids[$pid] = true;
144
145 // If we hit the thread limit, wait for any child to finish.
146 if ( count( $pids ) >= $threads ) {
147 $status = 0;
148 $pid = pcntl_wait( $status );
149 $hasErrors = $hasErrors || !$this->verifyChildStatus( $pid, $status );
150 unset( $pids[$pid] );
151 }
152 }
153 }
154
155 // Return control after all threads have finished.
156 foreach ( array_keys( $pids ) as $pid ) {
157 $status = 0;
158 pcntl_waitpid( $pid, $status );
159 $hasErrors = $hasErrors || !$this->verifyChildStatus( $pid, $status );
160 }
161
162 // It's okay to do this in the main thread as it is the last thing
163 $this->endBootstrap( $server );
164
165 if ( $hasErrors ) {
166 $this->fatalError( '!!! Some threads failed. Review the script output !!!' );
167 }
168 }
169
170 private function getServer( array $config ): WritableTTMServer {
171 $server = TTMServer::factory( $config );
172 if ( !$server instanceof WritableTTMServer ) {
173 $this->fatalError( "Service must implement WritableTTMServer" );
174 }
175
176 if ( method_exists( $server, 'setLogger' ) ) {
177 // @phan-suppress-next-line PhanUndeclaredMethod
178 $server->setLogger( $this );
179 }
180
181 if ( $this->getOption( 'reindex', false ) ) {
182 // This doesn't do the update, just sets a flag to do it
183 $server->setDoReIndex();
184 }
185
186 return $server;
187 }
188
189 protected function beginBootstrap( WritableTTMServer $server ) {
190 $this->statusLine( "Cleaning up old entries...\n" );
191 $server->beginBootstrap();
192 }
193
194 protected function endBootstrap( WritableTTMServer $server ) {
195 $this->statusLine( "Optimizing...\n" );
196 $server->endBootstrap();
197 }
198
199 protected function exportGroup( MessageGroup $group, WritableTTMServer $server ) {
200 $times = [
201 'total' => -microtime( true ),
202 'stats' => 0,
203 'init' => 0,
204 'trans' => 0,
205 ];
206 $countItems = 0;
207
208 $id = $group->getId();
209 $sourceLanguage = $group->getSourceLanguage();
210
211 $times[ 'stats' ] -= microtime( true );
212 $stats = MessageGroupStats::forGroup( $id );
213 $times[ 'stats' ] += microtime( true );
214
215 $times[ 'init' ] -= microtime( true );
216 $collection = $group->initCollection( $sourceLanguage );
217 $collection->filter( 'ignored' );
218 $collection->initMessages();
219
220 $server->beginBatch();
221 $inserts = [];
222 foreach ( $collection->keys() as $mkey => $titleValue ) {
223 $title = Title::newFromLinkTarget( $titleValue );
224 $handle = new MessageHandle( $title );
225 $inserts[] = [ $handle, $sourceLanguage, $collection[$mkey]->definition() ];
226 $countItems++;
227 }
228
229 while ( $inserts !== [] ) {
230 $batch = array_splice( $inserts, 0, $this->mBatchSize );
231 $server->batchInsertDefinitions( $batch );
232 }
233 $inserts = [];
234 $times[ 'init' ] += microtime( true );
235
236 $times[ 'trans' ] -= microtime( true );
237 foreach ( $stats as $targetLanguage => $numbers ) {
238 if ( $targetLanguage === $sourceLanguage ) {
239 continue;
240 }
241 if ( $numbers[MessageGroupStats::TRANSLATED] === 0 ) {
242 continue;
243 }
244
245 $collection->resetForNewLanguage( $targetLanguage );
246 $collection->filter( 'ignored' );
247 $collection->filter( 'translated', false );
248 $collection->loadTranslations();
249
250 foreach ( $collection->keys() as $mkey => $titleValue ) {
251 $title = Title::newFromLinkTarget( $titleValue );
252 $handle = new MessageHandle( $title );
253 $inserts[] = [ $handle, $sourceLanguage, $collection[$mkey]->translation() ];
254 $countItems++;
255 }
256
257 while ( count( $inserts ) >= $this->mBatchSize ) {
258 $batch = array_splice( $inserts, 0, $this->mBatchSize );
259 $server->batchInsertTranslations( $batch );
260 }
261 }
262
263 while ( $inserts !== [] ) {
264 $batch = array_splice( $inserts, 0, $this->mBatchSize );
265 $server->batchInsertTranslations( $batch );
266 }
267
268 $server->endBatch();
269 $times[ 'trans' ] += microtime( true );
270 $times[ 'total' ] += microtime( true );
271
272 if ( $countItems !== 0 ) {
273 $debug = sprintf(
274 "Total %.1f s for %d items >> stats/init/trans %%: %d/%d/%d >> %.1f ms/item",
275 $times[ 'total' ],
276 $countItems,
277 $times[ 'stats'] / $times[ 'total' ] * 100,
278 $times[ 'init'] / $times[ 'total' ] * 100,
279 $times[ 'trans'] / $times[ 'total' ] * 100,
280 $times[ 'total' ] / $countItems * 1000
281 );
282 $this->logInfo( "Finished exporting $id. $debug\n" );
283 }
284 }
285
286 private function logInfo( string $text ) {
287 if ( $this->getOption( 'verbose', false ) ) {
288 $this->statusLine( $text );
289 }
290 }
291
292 protected function resetStateForFork() {
293 // Make sure all existing connections are dead,
294 // we can't use them in forked children.
295 MediaWiki\MediaWikiServices::resetChildProcessServices();
296 // Temporary workaround for https://phabricator.wikimedia.org/T258860.
297 // This script just moves data around, so skipping the message cache should not
298 // cause any major issues. Things like message documentation language name and
299 // main page name were being checked from the message cache and sometimes failing.
300 MediaWiki\MediaWikiServices::getInstance()->getMessageCache()->disable();
301 }
302
303 private function verifyChildStatus( int $pid, int $status ): bool {
304 if ( pcntl_wifexited( $status ) ) {
305 $code = pcntl_wexitstatus( $status );
306 if ( $code ) {
307 $this->output( "Pid $pid exited with status $code !!\n" );
308 return false;
309 }
310 } elseif ( pcntl_wifsignaled( $status ) ) {
311 $signum = pcntl_wtermsig( $status );
312 $this->output( "Pid $pid terminated by signal $signum !!\n" );
313 return false;
314 }
315
316 return true;
317 }
318}
319
320$maintClass = TTMServerBootstrap::class;
321require_once RUN_MAINTENANCE_IF_MAIN;
Factory class for accessing message groups individually by id or all of them as a list.
Class for pointing to messages, like Title class is for titles.
Script to bootstrap TTMServer translation memory.
Interface for message groups.
initCollection( $code)
Initialises a message collection with the given language code, message definitions and message tags.
getSourceLanguage()
Returns language code depicting the language of source text.
getId()
Returns the unique identifier for this group.
Interface for TTMServer that can be updated.
endBatch()
Called before every batch (MessageGroup).
batchInsertDefinitions(array $batch)
Called multiple times per batch if necessary.
batchInsertTranslations(array $batch)
Called multiple times per batch if necessary.
beginBatch()
Called before every batch (MessageGroup).
beginBootstrap()
Called when starting to fill the translation memory.
endBootstrap()
Do any cleanup, optimizing etc.