Translate extension for MediaWiki
 
Loading...
Searching...
No Matches
ttmserver-export.php
Go to the documentation of this file.
1<?php
15use Wikimedia\Assert\Assert;
16
17// Standard boilerplate to define $IP
18if ( getenv( 'MW_INSTALL_PATH' ) !== false ) {
19 $IP = getenv( 'MW_INSTALL_PATH' );
20} else {
21 $dir = __DIR__;
22 $IP = "$dir/../../..";
23}
24require_once "$IP/maintenance/Maintenance.php";
25
30class TTMServerBootstrap extends Maintenance {
31 private float $start;
32 private const FAKE_TTM = 'dry-run';
33
34 public function __construct() {
35 parent::__construct();
36 $this->addDescription( 'Script to bootstrap TTMServer.' );
37 $this->addOption(
38 'threads',
39 '(optional) Number of threads',
40 /*required*/false,
41 /*has arg*/true
42 );
43 $this->addOption(
44 'ttmserver',
45 '(optional) Server configuration identifier',
46 /*required*/false,
47 /*has arg*/true
48 );
49 // This option erases all data, empties the index and rebuilds it.
50 $this->addOption(
51 'reindex',
52 'Update the index mapping. Warning: Clears all existing data in the index.'
53 );
54 $this->addOption(
55 'dry-run',
56 'Do not make any changes to the index.'
57 );
58 $this->addOption(
59 'verbose',
60 'Output more status information.'
61 );
62 $this->addOption(
63 'clean',
64 'Only run setup and and cleanup. Skip inserting content.'
65 );
66 $this->setBatchSize( 500 );
67 $this->requireExtension( 'Translate' );
68 $this->start = microtime( true );
69 }
70
71 public function statusLine( $text, $channel = null ) {
72 $pid = sprintf( '%5s', getmypid() );
73 $prefix = sprintf( '%6.2f', microtime( true ) - $this->start );
74 $mem = sprintf( '%5.1fM', memory_get_usage( true ) / ( 1024 * 1024 ) );
75 $this->output( "$pid $prefix $mem $text", $channel );
76 }
77
78 public function execute() {
79 $dryRun = $this->hasOption( 'dry-run' );
80 $ttmServerId = $this->getOption( 'ttmserver' );
81 $shouldReindex = $this->getOption( 'reindex', false );
82
83 if ( $this->mBatchSize !== null && $this->mBatchSize < 1 ) {
84 $this->fatalError( 'Invalid value for option: "batch-size"' );
85 }
86
87 $servers = $this->getServers( $dryRun, $shouldReindex, $ttmServerId );
88
89 // Do as little as possible in the main thread, to not clobber forked processes.
90 // See also #resetStateForFork.
91 foreach ( array_keys( $servers ) as $serverId ) {
92 $pid = pcntl_fork();
93
94 if ( $pid === 0 ) {
95 $server = $this->getWritableServer( $serverId );
96 $this->resetStateForFork();
97 $this->beginBootstrap( $server, $serverId );
98 exit();
99 } elseif ( $pid === -1 ) {
100 // Fork failed do it serialized
101 $server = $this->getWritableServer( $serverId );
102 $this->beginBootstrap( $server, $serverId );
103 } else {
104 // Main thread
105 $this->statusLine( "Forked thread $pid to handle bootstrapping for '$serverId'\n" );
106 $status = 0;
107 pcntl_waitpid( $pid, $status );
108 // beginBootstrap probably failed, give up.
109 if ( !$this->verifyChildStatus( $pid, $status ) ) {
110 $this->fatalError( "Bootstrap failed for '$serverId'." );
111 }
112 }
113 }
114
115 $hasErrors = false;
116 $threads = $this->getOption( 'threads', 1 );
117 $pids = [];
118
119 if ( $this->hasOption( 'clean' ) ) {
120 $groups = [];
121 } else {
122 $groups = MessageGroups::singleton()->getGroups();
123 }
124
125 foreach ( $groups as $id => $group ) {
127 if ( $group->isMeta() ) {
128 continue;
129 }
130
131 // Fork to increase speed with parallelism. Also helps with memory usage if there are leaks.
132 $pid = pcntl_fork();
133 if ( $pid === 0 ) {
134 $this->resetStateForFork();
135 $this->exportGroup( $group, $servers );
136 exit();
137 } elseif ( $pid === -1 ) {
138 $this->exportGroup( $group, $servers );
139 } else {
140 // Main thread
141 $this->statusLine( "Forked thread $pid to handle $id\n" );
142 $pids[$pid] = true;
143
144 // If we hit the thread limit, wait for any child to finish.
145 if ( count( $pids ) >= $threads ) {
146 $status = 0;
147 $pid = pcntl_wait( $status );
148 $hasErrors = $hasErrors || !$this->verifyChildStatus( $pid, $status );
149 unset( $pids[$pid] );
150 }
151 }
152 }
153
154 // Return control after all threads have finished.
155 foreach ( array_keys( $pids ) as $pid ) {
156 $status = 0;
157 pcntl_waitpid( $pid, $status );
158 $hasErrors = $hasErrors || !$this->verifyChildStatus( $pid, $status );
159 }
160
161 // It's okay to do this in the main thread as it is the last thing
162 $this->endBootstrap( $servers );
163
164 if ( $hasErrors ) {
165 $this->fatalError( '!!! Some threads failed. Review the script output !!!' );
166 }
167 }
168
175 private function getServers(
176 bool $isDryRun,
177 bool $shouldReindex,
178 ?string $ttmServerId = null
179 ): array {
180 $servers = [];
181 $ttmServerFactory = Services::getInstance()->getTtmServerFactory();
182 if ( $isDryRun ) {
183 $servers = [ self::FAKE_TTM => new FakeTTMServer() ];
184 } else {
185 if ( $ttmServerId !== null ) {
186 try {
187 $servers[ $ttmServerId ] = $ttmServerFactory->create( $ttmServerId );
188 } catch ( ServiceCreationFailure $e ) {
189 $this->fatalError( "Error while creating TtmServer $ttmServerId: " . $e->getMessage() );
190 }
191 } else {
192 $servers = $ttmServerFactory->getWritable();
193 }
194 }
195
196 if ( !$servers ) {
197 $this->fatalError( "No writable TtmServers found." );
198 }
199
200 foreach ( $servers as $server ) {
201 Assert::parameterType( WritableTtmServer::class, $server, '$server' );
202
203 if ( method_exists( $server, 'setLogger' ) ) {
204 // @phan-suppress-next-line PhanUndeclaredMethod
205 $server->setLogger( $this );
206 }
207
208 if ( $shouldReindex ) {
209 // This doesn't do the update, just sets a flag to do it
210 $server->setDoReIndex();
211 }
212 }
213
214 return $servers;
215 }
216
217 protected function beginBootstrap( WritableTtmServer $server, string $serverId ) {
218 $this->statusLine( "Cleaning up old entries in '$serverId'...\n" );
219 $server->beginBootstrap();
220 }
221
222 protected function endBootstrap( array $servers ) {
223 foreach ( $servers as $serverId => $server ) {
224 $this->statusLine( "Optimizing '$serverId'...\n" );
225 $server->endBootstrap();
226 }
227 }
228
234 private function exportGroup( MessageGroup $group, array $servers ): void {
235 $times = [
236 'total' => -microtime( true ),
237 'stats' => 0,
238 'init' => 0,
239 'trans' => 0,
240 'writes' => 0
241 ];
242 $transWrites = 0;
243
244 $sourceLanguage = $group->getSourceLanguage();
245
246 $times[ 'init' ] -= microtime( true );
247 $collection = $this->getCollection( $group, $sourceLanguage );
248 $times[ 'init' ] += microtime( true );
249
250 $times[ 'stats' ] -= microtime( true );
251 $stats = MessageGroupStats::forGroup( $group->getId() );
252 $times[ 'stats' ] += microtime( true );
253 unset( $stats[ $sourceLanguage ] );
254
255 $translationCount = $definitionCount = 0;
256
257 foreach ( $servers as $server ) {
258 $server->beginBatch();
259 }
260
261 foreach ( $this->getDefinitions( $collection, $sourceLanguage ) as $batch ) {
262 $definitionCount += count( $batch );
263 foreach ( $servers as $server ) {
264 $times[ 'writes' ] -= microtime( true );
265 $server->batchInsertDefinitions( $batch );
266 $times[ 'writes' ] += microtime( true );
267 }
268 }
269
270 $times[ 'trans' ] -= microtime( true );
271 foreach ( $stats as $targetLanguage => $numbers ) {
272 if ( $numbers[MessageGroupStats::TRANSLATED] === 0 ) {
273 continue;
274 }
275
276 foreach ( $this->getTranslations( $collection, $targetLanguage ) as $batch ) {
277 $translationCount += count( $batch );
278 foreach ( $servers as $server ) {
279 $transWrites -= microtime( true );
280 $server->batchInsertTranslations( $batch );
281 $transWrites += microtime( true );
282 }
283 }
284 }
285
286 $times[ 'trans' ] += ( microtime( true ) - $transWrites );
287 $times[ 'writes' ] += $transWrites;
288
289 foreach ( $servers as $server ) {
290 $server->endBatch();
291 }
292
293 $times[ 'total' ] += microtime( true );
294 $countItems = $translationCount + $definitionCount;
295
296 if ( $countItems !== 0 ) {
297 $debug = sprintf(
298 "Total %.1f s for %d items on %d server(s) >> stats/init/trans/writes %%: %d/%d/%d/%d >> %.1f ms/item",
299 $times['total'],
300 $countItems,
301 count( $servers ),
302 $times['stats'] / $times['total'] * 100,
303 $times['init'] / $times['total'] * 100,
304 $times['trans'] / $times['total'] * 100,
305 $times['writes'] / $times['total'] * 100,
306 $times['total'] / $countItems * 1000
307 );
308 $this->logInfo( "Finished exporting {$group->getId()}. $debug\n" );
309 }
310 }
311
312 private function getDefinitions( MessageCollection $collection, string $sourceLanguage ): Generator {
313 $definitions = [];
314 foreach ( $collection->keys() as $mKey => $titleValue ) {
315 $title = Title::newFromLinkTarget( $titleValue );
316 $handle = new MessageHandle( $title );
317 $definition = [ $handle, $sourceLanguage, $collection[$mKey]->definition() ];
318 $definitions[] = $definition;
319 if ( $this->mBatchSize && count( $definitions ) === $this->mBatchSize ) {
320 yield $definitions;
321 $definitions = [];
322 }
323 }
324
325 if ( $definitions ) {
326 yield $definitions;
327 }
328 }
329
330 private function getTranslations( MessageCollection $collection, string $targetLanguage ): Generator {
331 $collection->resetForNewLanguage( $targetLanguage );
332 $collection->filter( 'ignored' );
333 $collection->filter( 'translated', false );
334 $collection->loadTranslations();
335 $translations = [];
336
337 foreach ( $collection->keys() as $mkey => $titleValue ) {
338 $title = Title::newFromLinkTarget( $titleValue );
339 $handle = new MessageHandle( $title );
340 $translations[] = [ $handle, $targetLanguage, $collection[$mkey]->translation() ];
341 if ( $this->mBatchSize && count( $translations ) === $this->mBatchSize ) {
342 yield $translations;
343 $translations = [];
344 }
345 }
346
347 if ( $translations ) {
348 yield $translations;
349 }
350 }
351
352 private function logInfo( string $text ) {
353 if ( $this->hasOption( 'verbose' ) ) {
354 $this->statusLine( $text );
355 }
356 }
357
358 protected function resetStateForFork() {
359 // Make sure all existing connections are dead,
360 // we can't use them in forked children.
361 MediaWiki\MediaWikiServices::resetChildProcessServices();
362 // Temporary workaround for https://phabricator.wikimedia.org/T258860.
363 // This script just moves data around, so skipping the message cache should not
364 // cause any major issues. Things like message documentation language name and
365 // main page name were being checked from the message cache and sometimes failing.
366 MediaWiki\MediaWikiServices::getInstance()->getMessageCache()->disable();
367 // Address issues with ObjectCache holding a reference to the MediaWikiServices instance.
368 // MW <= 1.40
369 ObjectCache::clear();
370 }
371
372 private function verifyChildStatus( int $pid, int $status ): bool {
373 if ( pcntl_wifexited( $status ) ) {
374 $code = pcntl_wexitstatus( $status );
375 if ( $code ) {
376 $this->output( "Pid $pid exited with status $code !!\n" );
377 return false;
378 }
379 } elseif ( pcntl_wifsignaled( $status ) ) {
380 $signum = pcntl_wtermsig( $status );
381 $this->output( "Pid $pid terminated by signal $signum !!\n" );
382 return false;
383 }
384
385 return true;
386 }
387
388 private function getWritableServer( string $serverId ): WritableTtmServer {
389 if ( $serverId === self::FAKE_TTM ) {
390 return new FakeTTMServer();
391 }
392
393 $server = Services::getInstance()->getTtmServerFactory()->create( $serverId );
394 if ( !$server instanceof WritableTtmServer ) {
395 throw new InvalidArgumentException(
396 "$serverId TTM server does not implement WritableTtmServer interface "
397 );
398 }
399
400 return $server;
401 }
402
403 private function getCollection( MessageGroup $group, string $sourceLanguage ): MessageCollection {
404 $collection = $group->initCollection( $sourceLanguage );
405 $collection->filter( 'ignored' );
406 $collection->initMessages();
407 return $collection;
408 }
409}
410
411$maintClass = TTMServerBootstrap::class;
412require_once RUN_MAINTENANCE_IF_MAIN;
NO-OP version of TTMServer when it is disabled.
Factory class for accessing message groups individually by id or all of them as a list.
This file contains the class for core message collections implementation.
resetForNewLanguage(string $code)
Some statistics scripts for example loop the same collection over every language.
filter(string $type, bool $condition=true, ?int $value=null)
Filters messages based on some condition.
initMessages()
Constructs all Messages (ThinMessage) from the data accumulated so far.
Minimal service container.
Definition Services.php:44
Class for pointing to messages, like Title class is for titles.
Script to bootstrap TTMServer translation memory.
Interface for TTMServer that can be updated.
beginBootstrap()
Called when starting to fill the translation memory.
Interface for message groups.
initCollection( $code)
Initialises a message collection with the given language code, message definitions and message tags.
getSourceLanguage()
Returns language code depicting the language of source text.
getId()
Returns the unique identifier for this group.