Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
16.67% |
58 / 348 |
|
11.54% |
3 / 26 |
CRAP | |
0.00% |
0 / 1 |
GettextFormat | |
16.71% |
58 / 347 |
|
11.54% |
3 / 26 |
5532.61 | |
0.00% |
0 / 1 |
supportsFuzzy | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
getFileExtensions | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
setOfflineMode | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
read | |
0.00% |
0 / 3 |
|
0.00% |
0 / 1 |
2 | |||
readFromVariable | |
0.00% |
0 / 9 |
|
0.00% |
0 / 1 |
12 | |||
parseGettext | |
0.00% |
0 / 52 |
|
0.00% |
0 / 1 |
182 | |||
parseGettextSection | |
0.00% |
0 / 44 |
|
0.00% |
0 / 1 |
240 | |||
processGettextPluralMessage | |
0.00% |
0 / 10 |
|
0.00% |
0 / 1 |
20 | |||
parseFlags | |
0.00% |
0 / 4 |
|
0.00% |
0 / 1 |
6 | |||
expectKeyword | |
0.00% |
0 / 5 |
|
0.00% |
0 / 1 |
6 | |||
generateKeyFromItem | |
100.00% |
16 / 16 |
|
100.00% |
1 / 1 |
3 | |||
processData | |
0.00% |
0 / 3 |
|
0.00% |
0 / 1 |
2 | |||
handleWhitespace | |
0.00% |
0 / 7 |
|
0.00% |
0 / 1 |
20 | |||
formatForWiki | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
2 | |||
parseHeaderTags | |
0.00% |
0 / 7 |
|
0.00% |
0 / 1 |
12 | |||
writeReal | |
0.00% |
0 / 34 |
|
0.00% |
0 / 1 |
30 | |||
doGettextHeader | |
0.00% |
0 / 47 |
|
0.00% |
0 / 1 |
42 | |||
doAuthors | |
0.00% |
0 / 6 |
|
0.00% |
0 / 1 |
6 | |||
formatMessageBlock | |
54.05% |
20 / 37 |
|
0.00% |
0 / 1 |
25.97 | |||
formatTime | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
2 | |||
getPotTime | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
6 | |||
formatDocumentation | |
22.22% |
2 / 9 |
|
0.00% |
0 / 1 |
11.53 | |||
escape | |
100.00% |
4 / 4 |
|
100.00% |
1 / 1 |
1 | |||
shouldOverwrite | |
100.00% |
4 / 4 |
|
100.00% |
1 / 1 |
1 | |||
getExtraSchema | |
0.00% |
0 / 22 |
|
0.00% |
0 / 1 |
2 | |||
isContentEqual | |
80.00% |
12 / 15 |
|
0.00% |
0 / 1 |
7.39 |
1 | <?php |
2 | declare( strict_types = 1 ); |
3 | |
4 | namespace MediaWiki\Extension\Translate\FileFormatSupport; |
5 | |
6 | use InvalidArgumentException; |
7 | use LanguageCode; |
8 | use MediaWiki\Extension\Translate\MessageGroupConfiguration\MetaYamlSchemaExtender; |
9 | use MediaWiki\Extension\Translate\MessageLoading\Message; |
10 | use MediaWiki\Extension\Translate\MessageLoading\MessageCollection; |
11 | use MediaWiki\Extension\Translate\Services; |
12 | use MediaWiki\Extension\Translate\Utilities\GettextPlural; |
13 | use MediaWiki\Extension\Translate\Utilities\Utilities; |
14 | use MediaWiki\Logger\LoggerFactory; |
15 | use MediaWiki\MediaWikiServices; |
16 | use MediaWiki\Title\Title; |
17 | use RuntimeException; |
18 | use SpecialVersion; |
19 | |
20 | /** |
21 | * FileFormat class that implements support for gettext file format. |
22 | * |
23 | * @author Niklas Laxström |
24 | * @author Siebrand Mazeland |
25 | * @copyright Copyright © 2008-2010, Niklas Laxström, Siebrand Mazeland |
26 | * @license GPL-2.0-or-later |
27 | * @ingroup FileFormatSupport |
28 | */ |
29 | class GettextFormat extends SimpleFormat implements MetaYamlSchemaExtender { |
30 | private bool $allowPotMode = false; |
31 | private bool $offlineMode = false; |
32 | |
33 | public function supportsFuzzy(): string { |
34 | return 'yes'; |
35 | } |
36 | |
37 | public function getFileExtensions(): array { |
38 | return [ '.pot', '.po' ]; |
39 | } |
40 | |
41 | public function setOfflineMode( bool $value ): void { |
42 | $this->offlineMode = $value; |
43 | } |
44 | |
45 | /** @inheritDoc */ |
46 | public function read( $languageCode ) { |
47 | // This is somewhat hacky, but pot mode should only ever be used for the source language. |
48 | // See https://phabricator.wikimedia.org/T230361 |
49 | $this->allowPotMode = $this->getGroup()->getSourceLanguage() === $languageCode; |
50 | |
51 | try { |
52 | return parent::read( $languageCode ); |
53 | } finally { |
54 | $this->allowPotMode = false; |
55 | } |
56 | } |
57 | |
58 | public function readFromVariable( string $data ): array { |
59 | # Authors first |
60 | $matches = []; |
61 | preg_match_all( '/^#\s*Author:\s*(.*)$/m', $data, $matches ); |
62 | $authors = $matches[1]; |
63 | |
64 | # Then messages and everything else |
65 | $parsedData = $this->parseGettext( $data ); |
66 | $parsedData['AUTHORS'] = $authors; |
67 | |
68 | foreach ( $parsedData['MESSAGES'] as $key => $value ) { |
69 | if ( $value === '' ) { |
70 | unset( $parsedData['MESSAGES'][$key] ); |
71 | } |
72 | } |
73 | |
74 | return $parsedData; |
75 | } |
76 | |
77 | private function parseGettext( string $data ): array { |
78 | $mangler = $this->group->getMangler(); |
79 | $useCtxtAsKey = $this->extra['CtxtAsKey'] ?? false; |
80 | $keyAlgorithm = 'simple'; |
81 | if ( isset( $this->extra['keyAlgorithm'] ) ) { |
82 | $keyAlgorithm = $this->extra['keyAlgorithm']; |
83 | } |
84 | |
85 | $potmode = false; |
86 | |
87 | // Normalise newlines, to make processing easier |
88 | $data = str_replace( "\r\n", "\n", $data ); |
89 | |
90 | /* Delimit the file into sections, which are separated by two newlines. |
91 | * We are permissive and accept more than two. This parsing method isn't |
92 | * efficient wrt memory, but was easy to implement */ |
93 | $sections = preg_split( '/\n{2,}/', $data ); |
94 | |
95 | /* First one isn't an actual message. We'll handle it specially below */ |
96 | $headerSection = array_shift( $sections ); |
97 | /* Since this is the header section, we are only interested in the tags |
98 | * and msgid is empty. Somewhere we should extract the header comments |
99 | * too */ |
100 | $match = $this->expectKeyword( 'msgstr', $headerSection ); |
101 | if ( $match !== null ) { |
102 | $headerBlock = $this->formatForWiki( $match, 'trim' ); |
103 | $headers = $this->parseHeaderTags( $headerBlock ); |
104 | |
105 | // Check for pot-mode by checking if the header is fuzzy |
106 | $flags = $this->parseFlags( $headerSection ); |
107 | if ( in_array( 'fuzzy', $flags, true ) ) { |
108 | $potmode = $this->allowPotMode; |
109 | } |
110 | } else { |
111 | $message = "Gettext file header was not found:\n\n$data"; |
112 | throw new GettextParseException( $message ); |
113 | } |
114 | |
115 | $template = []; |
116 | $messages = []; |
117 | |
118 | // Extract some metadata from headers for easier use |
119 | $metadata = []; |
120 | if ( isset( $headers['X-Language-Code'] ) ) { |
121 | $metadata['code'] = $headers['X-Language-Code']; |
122 | } |
123 | |
124 | if ( isset( $headers['X-Message-Group'] ) ) { |
125 | $metadata['group'] = $headers['X-Message-Group']; |
126 | } |
127 | |
128 | /* At this stage we are only interested how many plurals forms we should |
129 | * be expecting when parsing the rest of this file. */ |
130 | $pluralCount = null; |
131 | if ( $potmode ) { |
132 | $pluralCount = 2; |
133 | } elseif ( isset( $headers['Plural-Forms'] ) ) { |
134 | $pluralCount = $metadata['plural'] = GettextPlural::getPluralCount( $headers['Plural-Forms'] ); |
135 | } |
136 | |
137 | $metadata['plural'] = $pluralCount; |
138 | |
139 | // Then parse the messages |
140 | foreach ( $sections as $section ) { |
141 | $item = $this->parseGettextSection( $section, $pluralCount ); |
142 | if ( $item === null ) { |
143 | continue; |
144 | } |
145 | |
146 | if ( $useCtxtAsKey ) { |
147 | if ( !isset( $item['ctxt'] ) ) { |
148 | error_log( "ctxt missing for: $section" ); |
149 | continue; |
150 | } |
151 | $key = $item['ctxt']; |
152 | } else { |
153 | $key = $this->generateKeyFromItem( $item, $keyAlgorithm ); |
154 | } |
155 | |
156 | $key = $mangler->mangle( $key ); |
157 | $messages[$key] = $potmode ? $item['id'] : $item['str']; |
158 | $template[$key] = $item; |
159 | } |
160 | |
161 | return [ |
162 | 'MESSAGES' => $messages, |
163 | 'EXTRA' => [ |
164 | 'TEMPLATE' => $template, |
165 | 'METADATA' => $metadata, |
166 | 'HEADERS' => $headers, |
167 | ], |
168 | ]; |
169 | } |
170 | |
171 | private function parseGettextSection( string $section, ?int $pluralCount ): ?array { |
172 | if ( trim( $section ) === '' ) { |
173 | return null; |
174 | } |
175 | |
176 | /* These inactive sections are of no interest to us. Multiline mode |
177 | * is needed because there may be flags or other annoying stuff |
178 | * before the commented out sections. |
179 | */ |
180 | if ( preg_match( '/^#~/m', $section ) ) { |
181 | return null; |
182 | } |
183 | |
184 | $item = [ |
185 | 'ctxt' => false, |
186 | 'id' => '', |
187 | 'str' => '', |
188 | 'flags' => [], |
189 | 'comments' => [], |
190 | ]; |
191 | |
192 | $match = $this->expectKeyword( 'msgid', $section ); |
193 | if ( $match !== null ) { |
194 | $item['id'] = $this->formatForWiki( $match ); |
195 | } else { |
196 | throw new RuntimeException( "Unable to parse msgid:\n\n$section" ); |
197 | } |
198 | |
199 | $match = $this->expectKeyword( 'msgctxt', $section ); |
200 | if ( $match !== null ) { |
201 | $item['ctxt'] = $this->formatForWiki( $match ); |
202 | } |
203 | |
204 | $pluralMessage = false; |
205 | $match = $this->expectKeyword( 'msgid_plural', $section ); |
206 | if ( $match !== null ) { |
207 | $pluralMessage = true; |
208 | $plural = $this->formatForWiki( $match ); |
209 | $item['id'] = GettextPlural::flatten( [ $item['id'], $plural ] ); |
210 | } |
211 | |
212 | if ( $pluralMessage ) { |
213 | $pluralMessageText = $this->processGettextPluralMessage( $pluralCount, $section ); |
214 | |
215 | // Keep the translation empty if no form has translation |
216 | if ( $pluralMessageText !== '' ) { |
217 | $item['str'] = $pluralMessageText; |
218 | } |
219 | } else { |
220 | $match = $this->expectKeyword( 'msgstr', $section ); |
221 | if ( $match !== null ) { |
222 | $item['str'] = $this->formatForWiki( $match ); |
223 | } else { |
224 | throw new RuntimeException( "Unable to parse msgstr:\n\n$section" ); |
225 | } |
226 | } |
227 | |
228 | // Parse flags |
229 | $flags = $this->parseFlags( $section ); |
230 | foreach ( $flags as $key => $flag ) { |
231 | if ( $flag === 'fuzzy' ) { |
232 | $item['str'] = TRANSLATE_FUZZY . $item['str']; |
233 | unset( $flags[$key] ); |
234 | } |
235 | } |
236 | $item['flags'] = $flags; |
237 | |
238 | // Rest of the comments |
239 | $matches = []; |
240 | if ( preg_match_all( '/^#(.?) (.*)$/m', $section, $matches, PREG_SET_ORDER ) ) { |
241 | foreach ( $matches as $match ) { |
242 | if ( $match[1] !== ',' && !str_starts_with( $match[1], '[Wiki]' ) ) { |
243 | $item['comments'][$match[1]][] = $match[2]; |
244 | } |
245 | } |
246 | } |
247 | |
248 | return $item; |
249 | } |
250 | |
251 | private function processGettextPluralMessage( ?int $pluralCount, string $section ): string { |
252 | $actualForms = []; |
253 | |
254 | for ( $i = 0; $i < $pluralCount; $i++ ) { |
255 | $match = $this->expectKeyword( "msgstr\\[$i\\]", $section ); |
256 | |
257 | if ( $match !== null ) { |
258 | $actualForms[] = $this->formatForWiki( $match ); |
259 | } else { |
260 | $actualForms[] = ''; |
261 | error_log( "Plural $i not found, expecting total of $pluralCount for $section" ); |
262 | } |
263 | } |
264 | |
265 | if ( array_sum( array_map( 'strlen', $actualForms ) ) > 0 ) { |
266 | return GettextPlural::flatten( $actualForms ); |
267 | } else { |
268 | return ''; |
269 | } |
270 | } |
271 | |
272 | private function parseFlags( string $section ): array { |
273 | $matches = []; |
274 | if ( preg_match( '/^#,(.*)$/mu', $section, $matches ) ) { |
275 | return array_map( 'trim', explode( ',', $matches[1] ) ); |
276 | } else { |
277 | return []; |
278 | } |
279 | } |
280 | |
281 | private function expectKeyword( string $name, string $section ): ?string { |
282 | /* Catches the multiline textblock that comes after keywords msgid, |
283 | * msgstr, msgid_plural, msgctxt. |
284 | */ |
285 | $poformat = '".*"\n?(^".*"$\n?)*'; |
286 | |
287 | $matches = []; |
288 | if ( preg_match( "/^$name\s($poformat)/mx", $section, $matches ) ) { |
289 | return $matches[1]; |
290 | } else { |
291 | return null; |
292 | } |
293 | } |
294 | |
295 | /** |
296 | * Generates unique key for each message. Changing this WILL BREAK ALL |
297 | * existing pages! |
298 | * @param array $item As returned by parseGettextSection |
299 | * @param string $algorithm Algorithm used to generate message keys: simple or legacy |
300 | */ |
301 | public function generateKeyFromItem( array $item, string $algorithm = 'simple' ): string { |
302 | $lang = MediaWikiServices::getInstance()->getLanguageFactory()->getLanguage( 'en' ); |
303 | |
304 | if ( $item['ctxt'] === '' ) { |
305 | /* Messages with msgctxt as empty string should be different |
306 | * from messages without any msgctxt. To avoid BC break make |
307 | * the empty ctxt a special case */ |
308 | $hash = sha1( $item['id'] . 'MSGEMPTYCTXT' ); |
309 | } else { |
310 | $hash = sha1( $item['ctxt'] . $item['id'] ); |
311 | } |
312 | |
313 | if ( $algorithm === 'simple' ) { |
314 | $hash = substr( $hash, 0, 6 ); |
315 | $snippet = $lang->truncateForDatabase( $item['id'], 30, '' ); |
316 | $snippet = str_replace( ' ', '_', trim( $snippet ) ); |
317 | } else { // legacy |
318 | $legalChars = Title::legalChars(); |
319 | $snippet = $item['id']; |
320 | $snippet = preg_replace( "/[^$legalChars]/", ' ', $snippet ); |
321 | $snippet = preg_replace( "/[:&%\/_]/", ' ', $snippet ); |
322 | $snippet = preg_replace( '/ {2,}/', ' ', $snippet ); |
323 | $snippet = $lang->truncateForDatabase( $snippet, 30, '' ); |
324 | $snippet = str_replace( ' ', '_', trim( $snippet ) ); |
325 | } |
326 | |
327 | return "$hash-$snippet"; |
328 | } |
329 | |
330 | /** |
331 | * This method processes the gettext text block format. |
332 | */ |
333 | private function processData( string $data ): string { |
334 | $quotePattern = '/(^"|"$\n?)/m'; |
335 | $data = preg_replace( $quotePattern, '', $data ); |
336 | return stripcslashes( $data ); |
337 | } |
338 | |
339 | /** |
340 | * This method handles the whitespace at the end of the data. |
341 | * @throws InvalidArgumentException |
342 | */ |
343 | private function handleWhitespace( string $data, string $whitespace ): string { |
344 | if ( preg_match( '/\s$/', $data ) ) { |
345 | if ( $whitespace === 'mark' ) { |
346 | $data .= '\\'; |
347 | } elseif ( $whitespace === 'trim' ) { |
348 | $data = rtrim( $data ); |
349 | } else { |
350 | // This condition will never happen as long as $whitespace is 'mark' or 'trim' |
351 | throw new InvalidArgumentException( "Unknown action for whitespace: $whitespace" ); |
352 | } |
353 | } |
354 | |
355 | return $data; |
356 | } |
357 | |
358 | /** |
359 | * This parses the Gettext text block format. Since trailing whitespace is |
360 | * not allowed in MediaWiki pages, the default action is to append |
361 | * \-character at the end of the message. You can also choose to ignore it |
362 | * and use the trim action instead. |
363 | */ |
364 | private function formatForWiki( string $data, string $whitespace = 'mark' ): string { |
365 | $data = $this->processData( $data ); |
366 | return $this->handleWhitespace( $data, $whitespace ); |
367 | } |
368 | |
369 | private function parseHeaderTags( string $headers ): array { |
370 | $tags = []; |
371 | foreach ( explode( "\n", $headers ) as $line ) { |
372 | if ( !str_contains( $line, ':' ) ) { |
373 | error_log( __METHOD__ . ": $line" ); |
374 | } |
375 | [ $key, $value ] = explode( ':', $line, 2 ); |
376 | $tags[trim( $key )] = trim( $value ); |
377 | } |
378 | |
379 | return $tags; |
380 | } |
381 | |
382 | protected function writeReal( MessageCollection $collection ): string { |
383 | // FIXME: this should be the source language |
384 | $pot = $this->read( 'en' ) ?? []; |
385 | $code = $collection->code; |
386 | $template = $this->read( $code ) ?? []; |
387 | $output = $this->doGettextHeader( $collection, $template['EXTRA'] ?? [] ); |
388 | |
389 | $pluralRule = GettextPlural::getPluralRule( $code ); |
390 | if ( !$pluralRule ) { |
391 | $pluralRule = GettextPlural::getPluralRule( 'en' ); |
392 | LoggerFactory::getInstance( 'Translate' )->warning( |
393 | "T235180: Missing Gettext plural rule for '{languagecode}'", |
394 | [ 'languagecode' => $code ] |
395 | ); |
396 | } |
397 | $pluralCount = GettextPlural::getPluralCount( $pluralRule ); |
398 | |
399 | $documentationLanguageCode = MediaWikiServices::getInstance() |
400 | ->getMainConfig() |
401 | ->get( 'TranslateDocumentationLanguageCode' ); |
402 | $documentationCollection = null; |
403 | if ( is_string( $documentationLanguageCode ) ) { |
404 | $documentationCollection = clone $collection; |
405 | $documentationCollection->resetForNewLanguage( $documentationLanguageCode ); |
406 | $documentationCollection->loadTranslations(); |
407 | } |
408 | |
409 | /** @var Message $m */ |
410 | foreach ( $collection as $key => $m ) { |
411 | $transTemplate = $template['EXTRA']['TEMPLATE'][$key] ?? []; |
412 | $potTemplate = $pot['EXTRA']['TEMPLATE'][$key] ?? []; |
413 | $documentation = isset( $documentationCollection[$key] ) ? |
414 | $documentationCollection[$key]->translation() : null; |
415 | |
416 | $output .= $this->formatMessageBlock( |
417 | $key, |
418 | $m, |
419 | $transTemplate, |
420 | $potTemplate, |
421 | $pluralCount, |
422 | $documentation |
423 | ); |
424 | } |
425 | |
426 | return $output; |
427 | } |
428 | |
429 | private function doGettextHeader( MessageCollection $collection, array $template ): string { |
430 | global $wgSitename; |
431 | |
432 | $code = $collection->code; |
433 | $name = Utilities::getLanguageName( $code ); |
434 | $native = Utilities::getLanguageName( $code, $code ); |
435 | $authors = $this->doAuthors( $collection ); |
436 | if ( isset( $this->extra['header'] ) ) { |
437 | $extra = "# --\n" . $this->extra['header']; |
438 | } else { |
439 | $extra = ''; |
440 | } |
441 | |
442 | $group = $this->getGroup(); |
443 | $output = |
444 | <<<EOT |
445 | # Translation of {$group->getLabel()} to $name ($native) |
446 | # Exported from $wgSitename |
447 | # |
448 | $authors$extra |
449 | EOT; |
450 | |
451 | // Make sure there is no empty line before msgid |
452 | $output = trim( $output ) . "\n"; |
453 | |
454 | $specs = $template['HEADERS'] ?? []; |
455 | |
456 | $timestamp = wfTimestampNow(); |
457 | $specs['PO-Revision-Date'] = $this->formatTime( $timestamp ); |
458 | if ( $this->offlineMode ) { |
459 | $specs['POT-Creation-Date'] = $this->formatTime( $timestamp ); |
460 | } else { |
461 | $specs['X-POT-Import-Date'] = $this->formatTime( wfTimestamp( TS_MW, $this->getPotTime() ) ); |
462 | } |
463 | $specs['Content-Type'] = 'text/plain; charset=UTF-8'; |
464 | $specs['Content-Transfer-Encoding'] = '8bit'; |
465 | |
466 | $specs['Language'] = LanguageCode::bcp47( $this->group->mapCode( $code ) ); |
467 | |
468 | // @deprecated Use Translate:GettextFormat:headerFields instead. |
469 | // This hook is deprecated and will be removed in a future release. |
470 | // TODO: Remove after 2024.01 |
471 | MediaWikiServices::getInstance()->getHookContainer() |
472 | ->run( 'Translate:GettextFFS:headerFields', [ &$specs, $this->group, $code ] ); |
473 | |
474 | Services::getInstance()->getHookRunner()->onTranslate_GettextFormat_headerFields( |
475 | $specs, |
476 | $this->group, |
477 | $code |
478 | ); |
479 | |
480 | $specs['X-Generator'] = 'MediaWiki ' |
481 | . SpecialVersion::getVersion() |
482 | . '; Translate ' |
483 | . Utilities::getVersion(); |
484 | |
485 | if ( $this->offlineMode ) { |
486 | $specs['X-Language-Code'] = $code; |
487 | $specs['X-Message-Group'] = $group->getId(); |
488 | } |
489 | |
490 | $specs['Plural-Forms'] = GettextPlural::getPluralRule( $code ) |
491 | ?: GettextPlural::getPluralRule( 'en' ); |
492 | |
493 | $output .= 'msgid ""' . "\n"; |
494 | $output .= 'msgstr ""' . "\n"; |
495 | $output .= '""' . "\n"; |
496 | |
497 | foreach ( $specs as $k => $v ) { |
498 | $output .= $this->escape( "$k: $v\n" ) . "\n"; |
499 | } |
500 | |
501 | $output .= "\n"; |
502 | |
503 | return $output; |
504 | } |
505 | |
506 | private function doAuthors( MessageCollection $collection ): string { |
507 | $output = ''; |
508 | $authors = $collection->getAuthors(); |
509 | $authors = $this->filterAuthors( $authors, $collection->code ); |
510 | |
511 | foreach ( $authors as $author ) { |
512 | $output .= "# Author: $author\n"; |
513 | } |
514 | |
515 | return $output; |
516 | } |
517 | |
518 | private function formatMessageBlock( |
519 | string $key, |
520 | Message $message, |
521 | array $trans, |
522 | array $pot, |
523 | int $pluralCount, |
524 | ?string $documentation |
525 | ): string { |
526 | $header = $this->formatDocumentation( $documentation ); |
527 | $content = ''; |
528 | |
529 | $comments = $pot['comments'] ?? $trans['comments'] ?? []; |
530 | foreach ( $comments as $type => $typecomments ) { |
531 | foreach ( $typecomments as $comment ) { |
532 | $header .= "#$type $comment\n"; |
533 | } |
534 | } |
535 | |
536 | $flags = $pot['flags'] ?? $trans['flags'] ?? []; |
537 | $flags = array_merge( $message->getTags(), $flags ); |
538 | |
539 | if ( $this->offlineMode ) { |
540 | $content .= 'msgctxt ' . $this->escape( $key ) . "\n"; |
541 | } else { |
542 | $ctxt = $pot['ctxt'] ?? $trans['ctxt'] ?? false; |
543 | if ( $ctxt !== false ) { |
544 | $content .= 'msgctxt ' . $this->escape( $ctxt ) . "\n"; |
545 | } |
546 | } |
547 | |
548 | $msgid = $message->definition(); |
549 | $msgstr = $message->translation() ?? ''; |
550 | if ( strpos( $msgstr, TRANSLATE_FUZZY ) !== false ) { |
551 | $msgstr = str_replace( TRANSLATE_FUZZY, '', $msgstr ); |
552 | // Might be fuzzy infile |
553 | $flags[] = 'fuzzy'; |
554 | } |
555 | |
556 | if ( GettextPlural::hasPlural( $msgid ) ) { |
557 | $forms = GettextPlural::unflatten( $msgid, 2 ); |
558 | $content .= 'msgid ' . $this->escape( $forms[0] ) . "\n"; |
559 | $content .= 'msgid_plural ' . $this->escape( $forms[1] ) . "\n"; |
560 | |
561 | try { |
562 | $forms = GettextPlural::unflatten( $msgstr, $pluralCount ); |
563 | foreach ( $forms as $index => $form ) { |
564 | $content .= "msgstr[$index] " . $this->escape( $form ) . "\n"; |
565 | } |
566 | } catch ( GettextPluralException $e ) { |
567 | $flags[] = 'invalid-plural'; |
568 | for ( $i = 0; $i < $pluralCount; $i++ ) { |
569 | $content .= "msgstr[$i] \"\"\n"; |
570 | } |
571 | } |
572 | } else { |
573 | $content .= 'msgid ' . $this->escape( $msgid ) . "\n"; |
574 | $content .= 'msgstr ' . $this->escape( $msgstr ) . "\n"; |
575 | } |
576 | |
577 | if ( $flags ) { |
578 | sort( $flags ); |
579 | $header .= '#, ' . implode( ', ', array_unique( $flags ) ) . "\n"; |
580 | } |
581 | |
582 | $output = $header ?: "#\n"; |
583 | $output .= $content . "\n"; |
584 | |
585 | return $output; |
586 | } |
587 | |
588 | private function formatTime( string $time ): string { |
589 | $lang = MediaWikiServices::getInstance()->getLanguageFactory()->getLanguage( 'en' ); |
590 | |
591 | return $lang->sprintfDate( 'xnY-xnm-xnd xnH:xni:xns+0000', $time ); |
592 | } |
593 | |
594 | private function getPotTime(): string { |
595 | $cache = $this->group->getMessageGroupCache( $this->group->getSourceLanguage() ); |
596 | |
597 | return $cache->exists() ? $cache->getTimestamp() : wfTimestampNow(); |
598 | } |
599 | |
600 | private function formatDocumentation( ?string $documentation ): string { |
601 | if ( !is_string( $documentation ) ) { |
602 | return ''; |
603 | } |
604 | |
605 | if ( !$this->offlineMode ) { |
606 | return ''; |
607 | } |
608 | |
609 | $lines = explode( "\n", $documentation ); |
610 | $out = ''; |
611 | foreach ( $lines as $line ) { |
612 | $out .= "#. [Wiki] $line\n"; |
613 | } |
614 | |
615 | return $out; |
616 | } |
617 | |
618 | private function escape( string $line ): string { |
619 | // There may be \ as a last character, for keeping trailing whitespace |
620 | $line = preg_replace( '/(\s)\\\\$/', '\1', $line ); |
621 | $line = addcslashes( $line, '\\"' ); |
622 | $line = str_replace( "\n", '\n', $line ); |
623 | return '"' . $line . '"'; |
624 | } |
625 | |
626 | public function shouldOverwrite( string $a, string $b ): bool { |
627 | $regex = '/^"(.+)-Date: \d\d\d\d-\d\d-\d\d \d\d:\d\d:\d\d\+\d\d\d\d\\\\n"$/m'; |
628 | |
629 | $a = preg_replace( $regex, '', $a ); |
630 | $b = preg_replace( $regex, '', $b ); |
631 | |
632 | return $a !== $b; |
633 | } |
634 | |
635 | public static function getExtraSchema(): array { |
636 | return [ |
637 | 'root' => [ |
638 | '_type' => 'array', |
639 | '_children' => [ |
640 | 'FILES' => [ |
641 | '_type' => 'array', |
642 | '_children' => [ |
643 | 'header' => [ |
644 | '_type' => 'text', |
645 | ], |
646 | 'keyAlgorithm' => [ |
647 | '_type' => 'enum', |
648 | '_values' => [ 'simple', 'legacy' ], |
649 | ], |
650 | 'CtxtAsKey' => [ |
651 | '_type' => 'boolean', |
652 | ], |
653 | ] |
654 | ] |
655 | ] |
656 | ] |
657 | ]; |
658 | } |
659 | |
660 | public function isContentEqual( ?string $a, ?string $b ): bool { |
661 | if ( $a === $b ) { |
662 | return true; |
663 | } |
664 | |
665 | if ( $a === null || $b === null ) { |
666 | return false; |
667 | } |
668 | |
669 | try { |
670 | $parsedA = GettextPlural::parsePluralForms( $a ); |
671 | $parsedB = GettextPlural::parsePluralForms( $b ); |
672 | |
673 | // if they have the different number of plural forms, just fail |
674 | if ( count( $parsedA[1] ) !== count( $parsedB[1] ) ) { |
675 | return false; |
676 | } |
677 | |
678 | } catch ( GettextPluralException $e ) { |
679 | // Something failed, invalid syntax? |
680 | return false; |
681 | } |
682 | |
683 | $expectedPluralCount = count( $parsedA[1] ); |
684 | |
685 | // GettextPlural::unflatten() will return an empty array when $expectedPluralCount is 0 |
686 | // So if they do not have translations and are different strings, they are not equal |
687 | if ( $expectedPluralCount === 0 ) { |
688 | return false; |
689 | } |
690 | |
691 | return GettextPlural::unflatten( $a, $expectedPluralCount ) |
692 | === GettextPlural::unflatten( $b, $expectedPluralCount ); |
693 | } |
694 | } |
695 | |
696 | class_alias( GettextFormat::class, 'GettextFFS' ); |