Translate extension for MediaWiki
 
Loading...
Searching...
No Matches
ttmserver-export.php
Go to the documentation of this file.
1<?php
18use MediaWiki\Title\Title;
19use Wikimedia\Assert\Assert;
20
21// Standard boilerplate to define $IP
22if ( getenv( 'MW_INSTALL_PATH' ) !== false ) {
23 $IP = getenv( 'MW_INSTALL_PATH' );
24} else {
25 $dir = __DIR__;
26 $IP = "$dir/../../..";
27}
28require_once "$IP/maintenance/Maintenance.php";
29
34class TTMServerBootstrap extends Maintenance {
35 private float $start;
36 private const FAKE_TTM = 'dry-run';
37
38 public function __construct() {
39 parent::__construct();
40 $this->addDescription( 'Script to bootstrap TTMServer.' );
41 $this->addOption(
42 'threads',
43 '(optional) Number of threads',
44 /*required*/false,
45 /*has arg*/true
46 );
47 $this->addOption(
48 'ttmserver',
49 '(optional) Server configuration identifier',
50 /*required*/false,
51 /*has arg*/true
52 );
53 // This option erases all data, empties the index and rebuilds it.
54 $this->addOption(
55 'reindex',
56 'Update the index mapping. Warning: Clears all existing data in the index.'
57 );
58 $this->addOption(
59 'dry-run',
60 'Do not make any changes to the index.'
61 );
62 $this->addOption(
63 'verbose',
64 'Output more status information.'
65 );
66 $this->addOption(
67 'clean',
68 'Only run setup and and cleanup. Skip inserting content.'
69 );
70 $this->setBatchSize( 500 );
71 $this->requireExtension( 'Translate' );
72 $this->start = microtime( true );
73 }
74
75 public function statusLine( $text, $channel = null ) {
76 $pid = sprintf( '%5s', getmypid() );
77 $prefix = sprintf( '%6.2f', microtime( true ) - $this->start );
78 $mem = sprintf( '%5.1fM', memory_get_usage( true ) / ( 1024 * 1024 ) );
79 $this->output( "$pid $prefix $mem $text", $channel );
80 }
81
82 public function execute() {
83 $dryRun = $this->hasOption( 'dry-run' );
84 $ttmServerId = $this->getOption( 'ttmserver' );
85 $shouldReindex = $this->getOption( 'reindex', false );
86
87 if ( $this->mBatchSize !== null && $this->mBatchSize < 1 ) {
88 $this->fatalError( 'Invalid value for option: "batch-size"' );
89 }
90
91 $servers = $this->getServers( $dryRun, $shouldReindex, $ttmServerId );
92
93 // Do as little as possible in the main thread, to not clobber forked processes.
94 // See also #resetStateForFork.
95 foreach ( array_keys( $servers ) as $serverId ) {
96 $pid = pcntl_fork();
97
98 if ( $pid === 0 ) {
99 $server = $this->getWritableServer( $serverId );
100 $this->resetStateForFork();
101 $this->beginBootstrap( $server, $serverId );
102 exit();
103 } elseif ( $pid === -1 ) {
104 // Fork failed do it serialized
105 $server = $this->getWritableServer( $serverId );
106 $this->beginBootstrap( $server, $serverId );
107 } else {
108 // Main thread
109 $this->statusLine( "Forked thread $pid to handle bootstrapping for '$serverId'\n" );
110 $status = 0;
111 pcntl_waitpid( $pid, $status );
112 // beginBootstrap probably failed, give up.
113 if ( !$this->verifyChildStatus( $pid, $status ) ) {
114 $this->fatalError( "Bootstrap failed for '$serverId'." );
115 }
116 }
117 }
118
119 $hasErrors = false;
120 $threads = $this->getOption( 'threads', 1 );
121 $pids = [];
122
123 if ( $this->hasOption( 'clean' ) ) {
124 $groups = [];
125 } else {
126 $groups = MessageGroups::singleton()->getGroups();
127 }
128
129 foreach ( $groups as $id => $group ) {
131 if ( $group->isMeta() ) {
132 continue;
133 }
134
135 // Fork to increase speed with parallelism. Also helps with memory usage if there are leaks.
136 $pid = pcntl_fork();
137 if ( $pid === 0 ) {
138 $this->resetStateForFork();
139 $this->exportGroup( $group, $servers );
140 exit();
141 } elseif ( $pid === -1 ) {
142 $this->exportGroup( $group, $servers );
143 } else {
144 // Main thread
145 $this->statusLine( "Forked thread $pid to handle $id\n" );
146 $pids[$pid] = true;
147
148 // If we hit the thread limit, wait for any child to finish.
149 if ( count( $pids ) >= $threads ) {
150 $status = 0;
151 $pid = pcntl_wait( $status );
152 $hasErrors = $hasErrors || !$this->verifyChildStatus( $pid, $status );
153 unset( $pids[$pid] );
154 }
155 }
156 }
157
158 // Return control after all threads have finished.
159 foreach ( array_keys( $pids ) as $pid ) {
160 $status = 0;
161 pcntl_waitpid( $pid, $status );
162 $hasErrors = $hasErrors || !$this->verifyChildStatus( $pid, $status );
163 }
164
165 // It's okay to do this in the main thread as it is the last thing
166 $this->endBootstrap( $servers );
167
168 if ( $hasErrors ) {
169 $this->fatalError( '!!! Some threads failed. Review the script output !!!' );
170 }
171 }
172
179 private function getServers(
180 bool $isDryRun,
181 bool $shouldReindex,
182 ?string $ttmServerId = null
183 ): array {
184 $servers = [];
185 $ttmServerFactory = Services::getInstance()->getTtmServerFactory();
186 if ( $isDryRun ) {
187 $servers = [ self::FAKE_TTM => new FakeTtmServer() ];
188 } else {
189 if ( $ttmServerId !== null ) {
190 try {
191 $servers[ $ttmServerId ] = $ttmServerFactory->create( $ttmServerId );
192 } catch ( ServiceCreationFailure $e ) {
193 $this->fatalError( "Error while creating TtmServer $ttmServerId: " . $e->getMessage() );
194 }
195 } else {
196 $servers = $ttmServerFactory->getWritable();
197 }
198 }
199
200 if ( !$servers ) {
201 $this->fatalError( "No writable TtmServers found." );
202 }
203
204 foreach ( $servers as $server ) {
205 Assert::parameterType( WritableTtmServer::class, $server, '$server' );
206
207 if ( method_exists( $server, 'setLogger' ) ) {
208 // @phan-suppress-next-line PhanUndeclaredMethod
209 $server->setLogger( $this );
210 }
211
212 if ( $shouldReindex ) {
213 // This doesn't do the update, just sets a flag to do it
214 $server->setDoReIndex();
215 }
216 }
217
218 return $servers;
219 }
220
221 protected function beginBootstrap( WritableTtmServer $server, string $serverId ) {
222 $this->statusLine( "Cleaning up old entries in '$serverId'...\n" );
223 $server->beginBootstrap();
224 }
225
226 protected function endBootstrap( array $servers ) {
227 foreach ( $servers as $serverId => $server ) {
228 $this->statusLine( "Optimizing '$serverId'...\n" );
229 $server->endBootstrap();
230 }
231 }
232
238 private function exportGroup( MessageGroup $group, array $servers ): void {
239 $times = [
240 'total' => -microtime( true ),
241 'stats' => 0,
242 'init' => 0,
243 'trans' => 0,
244 'writes' => 0
245 ];
246 $transWrites = 0;
247
248 $sourceLanguage = $group->getSourceLanguage();
249
250 $times[ 'init' ] -= microtime( true );
251 $collection = $this->getCollection( $group, $sourceLanguage );
252 $times[ 'init' ] += microtime( true );
253
254 $times[ 'stats' ] -= microtime( true );
255 $stats = MessageGroupStats::forGroup( $group->getId() );
256 $times[ 'stats' ] += microtime( true );
257 unset( $stats[ $sourceLanguage ] );
258
259 $translationCount = $definitionCount = 0;
260
261 foreach ( $servers as $server ) {
262 $server->beginBatch();
263 }
264
265 foreach ( $this->getDefinitions( $collection, $sourceLanguage ) as $batch ) {
266 $definitionCount += count( $batch );
267 foreach ( $servers as $server ) {
268 $times[ 'writes' ] -= microtime( true );
269 $server->batchInsertDefinitions( $batch );
270 $times[ 'writes' ] += microtime( true );
271 }
272 }
273
274 $times[ 'trans' ] -= microtime( true );
275 foreach ( $stats as $targetLanguage => $numbers ) {
276 if ( $numbers[MessageGroupStats::TRANSLATED] === 0 ) {
277 continue;
278 }
279
280 foreach ( $this->getTranslations( $collection, $targetLanguage ) as $batch ) {
281 $translationCount += count( $batch );
282 foreach ( $servers as $server ) {
283 $transWrites -= microtime( true );
284 $server->batchInsertTranslations( $batch );
285 $transWrites += microtime( true );
286 }
287 }
288 }
289
290 $times[ 'trans' ] += ( microtime( true ) - $transWrites );
291 $times[ 'writes' ] += $transWrites;
292
293 foreach ( $servers as $server ) {
294 $server->endBatch();
295 }
296
297 $times[ 'total' ] += microtime( true );
298 $countItems = $translationCount + $definitionCount;
299
300 if ( $countItems !== 0 ) {
301 $debug = sprintf(
302 "Total %.1f s for %d items on %d server(s) >> stats/init/trans/writes %%: %d/%d/%d/%d >> %.1f ms/item",
303 $times['total'],
304 $countItems,
305 count( $servers ),
306 $times['stats'] / $times['total'] * 100,
307 $times['init'] / $times['total'] * 100,
308 $times['trans'] / $times['total'] * 100,
309 $times['writes'] / $times['total'] * 100,
310 $times['total'] / $countItems * 1000
311 );
312 $this->logInfo( "Finished exporting {$group->getId()}. $debug\n" );
313 }
314 }
315
316 private function getDefinitions( MessageCollection $collection, string $sourceLanguage ): Generator {
317 $definitions = [];
318 foreach ( $collection->keys() as $mKey => $titleValue ) {
319 $title = Title::newFromLinkTarget( $titleValue );
320 $handle = new MessageHandle( $title );
321 $definition = [ $handle, $sourceLanguage, $collection[$mKey]->definition() ];
322 $definitions[] = $definition;
323 if ( $this->mBatchSize && count( $definitions ) === $this->mBatchSize ) {
324 yield $definitions;
325 $definitions = [];
326 }
327 }
328
329 if ( $definitions ) {
330 yield $definitions;
331 }
332 }
333
334 private function getTranslations( MessageCollection $collection, string $targetLanguage ): Generator {
335 $collection->resetForNewLanguage( $targetLanguage );
336 $collection->filter( 'ignored' );
337 $collection->filter( 'translated', false );
338 $collection->loadTranslations();
339 $translations = [];
340
341 foreach ( $collection->keys() as $mkey => $titleValue ) {
342 $title = Title::newFromLinkTarget( $titleValue );
343 $handle = new MessageHandle( $title );
344 $translations[] = [ $handle, $targetLanguage, $collection[$mkey]->translation() ];
345 if ( $this->mBatchSize && count( $translations ) === $this->mBatchSize ) {
346 yield $translations;
347 $translations = [];
348 }
349 }
350
351 if ( $translations ) {
352 yield $translations;
353 }
354 }
355
356 private function logInfo( string $text ) {
357 if ( $this->hasOption( 'verbose' ) ) {
358 $this->statusLine( $text );
359 }
360 }
361
362 protected function resetStateForFork() {
363 // Make sure all existing connections are dead,
364 // we can't use them in forked children.
365 MediaWiki\MediaWikiServices::resetChildProcessServices();
366 // Temporary workaround for https://phabricator.wikimedia.org/T258860.
367 // This script just moves data around, so skipping the message cache should not
368 // cause any major issues. Things like message documentation language name and
369 // main page name were being checked from the message cache and sometimes failing.
370 MediaWiki\MediaWikiServices::getInstance()->getMessageCache()->disable();
371 }
372
373 private function verifyChildStatus( int $pid, int $status ): bool {
374 if ( pcntl_wifexited( $status ) ) {
375 $code = pcntl_wexitstatus( $status );
376 if ( $code ) {
377 $this->output( "Pid $pid exited with status $code !!\n" );
378 return false;
379 }
380 } elseif ( pcntl_wifsignaled( $status ) ) {
381 $signum = pcntl_wtermsig( $status );
382 $this->output( "Pid $pid terminated by signal $signum !!\n" );
383 return false;
384 }
385
386 return true;
387 }
388
389 private function getWritableServer( string $serverId ): WritableTtmServer {
390 if ( $serverId === self::FAKE_TTM ) {
391 return new FakeTtmServer();
392 }
393
394 $server = Services::getInstance()->getTtmServerFactory()->create( $serverId );
395 if ( !$server instanceof WritableTtmServer ) {
396 throw new InvalidArgumentException(
397 "$serverId TTM server does not implement WritableTtmServer interface "
398 );
399 }
400
401 return $server;
402 }
403
404 private function getCollection( MessageGroup $group, string $sourceLanguage ): MessageCollection {
405 $collection = $group->initCollection( $sourceLanguage );
406 $collection->filter( 'ignored' );
407 $collection->initMessages();
408 return $collection;
409 }
410}
411
412$maintClass = TTMServerBootstrap::class;
413require_once RUN_MAINTENANCE_IF_MAIN;
Factory class for accessing message groups individually by id or all of them as a list.
This file contains the class for core message collections implementation.
resetForNewLanguage(string $code)
Some statistics scripts for example loop the same collection over every language.
filter(string $type, bool $condition=true, ?int $value=null)
Filters messages based on some condition.
initMessages()
Constructs all Messages (ThinMessage) from the data accumulated so far.
Class for pointing to messages, like Title class is for titles.
Minimal service container.
Definition Services.php:58
This class aims to provide efficient mechanism for fetching translation completion stats.
NO-OP version of TTMServer when it is disabled.
Script to bootstrap TTMServer translation memory.
Interface for TTMServer that can be updated.
beginBootstrap()
Called when starting to fill the translation memory.
Interface for message groups.
initCollection( $code)
Initialises a message collection with the given language code, message definitions and message tags.
getSourceLanguage()
Returns language code depicting the language of source text.
getId()
Returns the unique identifier for this group.