Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
16.76% |
58 / 346 |
|
11.54% |
3 / 26 |
CRAP | |
0.00% |
0 / 1 |
GettextFormat | |
16.81% |
58 / 345 |
|
11.54% |
3 / 26 |
5513.66 | |
0.00% |
0 / 1 |
supportsFuzzy | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
getFileExtensions | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
setOfflineMode | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
read | |
0.00% |
0 / 3 |
|
0.00% |
0 / 1 |
2 | |||
readFromVariable | |
0.00% |
0 / 9 |
|
0.00% |
0 / 1 |
12 | |||
parseGettext | |
0.00% |
0 / 52 |
|
0.00% |
0 / 1 |
182 | |||
parseGettextSection | |
0.00% |
0 / 44 |
|
0.00% |
0 / 1 |
240 | |||
processGettextPluralMessage | |
0.00% |
0 / 10 |
|
0.00% |
0 / 1 |
20 | |||
parseFlags | |
0.00% |
0 / 4 |
|
0.00% |
0 / 1 |
6 | |||
expectKeyword | |
0.00% |
0 / 5 |
|
0.00% |
0 / 1 |
6 | |||
generateKeyFromItem | |
100.00% |
16 / 16 |
|
100.00% |
1 / 1 |
3 | |||
processData | |
0.00% |
0 / 3 |
|
0.00% |
0 / 1 |
2 | |||
handleWhitespace | |
0.00% |
0 / 7 |
|
0.00% |
0 / 1 |
20 | |||
formatForWiki | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
2 | |||
parseHeaderTags | |
0.00% |
0 / 7 |
|
0.00% |
0 / 1 |
12 | |||
writeReal | |
0.00% |
0 / 34 |
|
0.00% |
0 / 1 |
30 | |||
doGettextHeader | |
0.00% |
0 / 45 |
|
0.00% |
0 / 1 |
42 | |||
doAuthors | |
0.00% |
0 / 6 |
|
0.00% |
0 / 1 |
6 | |||
formatMessageBlock | |
54.05% |
20 / 37 |
|
0.00% |
0 / 1 |
25.97 | |||
formatTime | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
2 | |||
getPotTime | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
6 | |||
formatDocumentation | |
22.22% |
2 / 9 |
|
0.00% |
0 / 1 |
11.53 | |||
escape | |
100.00% |
4 / 4 |
|
100.00% |
1 / 1 |
1 | |||
shouldOverwrite | |
100.00% |
4 / 4 |
|
100.00% |
1 / 1 |
1 | |||
getExtraSchema | |
0.00% |
0 / 22 |
|
0.00% |
0 / 1 |
2 | |||
isContentEqual | |
80.00% |
12 / 15 |
|
0.00% |
0 / 1 |
7.39 |
1 | <?php |
2 | declare( strict_types = 1 ); |
3 | |
4 | namespace MediaWiki\Extension\Translate\FileFormatSupport; |
5 | |
6 | use InvalidArgumentException; |
7 | use MediaWiki\Extension\Translate\LogNames; |
8 | use MediaWiki\Extension\Translate\MessageGroupConfiguration\MetaYamlSchemaExtender; |
9 | use MediaWiki\Extension\Translate\MessageLoading\Message; |
10 | use MediaWiki\Extension\Translate\MessageLoading\MessageCollection; |
11 | use MediaWiki\Extension\Translate\Services; |
12 | use MediaWiki\Extension\Translate\Utilities\GettextPlural; |
13 | use MediaWiki\Extension\Translate\Utilities\Utilities; |
14 | use MediaWiki\Language\LanguageCode; |
15 | use MediaWiki\Logger\LoggerFactory; |
16 | use MediaWiki\MediaWikiServices; |
17 | use MediaWiki\Specials\SpecialVersion; |
18 | use MediaWiki\Title\Title; |
19 | use RuntimeException; |
20 | |
21 | /** |
22 | * FileFormat class that implements support for gettext file format. |
23 | * |
24 | * @author Niklas Laxström |
25 | * @author Siebrand Mazeland |
26 | * @copyright Copyright © 2008-2010, Niklas Laxström, Siebrand Mazeland |
27 | * @license GPL-2.0-or-later |
28 | * @ingroup FileFormatSupport |
29 | */ |
30 | class GettextFormat extends SimpleFormat implements MetaYamlSchemaExtender { |
31 | private bool $allowPotMode = false; |
32 | private bool $offlineMode = false; |
33 | |
34 | public function supportsFuzzy(): string { |
35 | return 'yes'; |
36 | } |
37 | |
38 | public function getFileExtensions(): array { |
39 | return [ '.pot', '.po' ]; |
40 | } |
41 | |
42 | public function setOfflineMode( bool $value ): void { |
43 | $this->offlineMode = $value; |
44 | } |
45 | |
46 | /** @inheritDoc */ |
47 | public function read( $languageCode ) { |
48 | // This is somewhat hacky, but pot mode should only ever be used for the source language. |
49 | // See https://phabricator.wikimedia.org/T230361 |
50 | $this->allowPotMode = $this->getGroup()->getSourceLanguage() === $languageCode; |
51 | |
52 | try { |
53 | return parent::read( $languageCode ); |
54 | } finally { |
55 | $this->allowPotMode = false; |
56 | } |
57 | } |
58 | |
59 | public function readFromVariable( string $data ): array { |
60 | # Authors first |
61 | $matches = []; |
62 | preg_match_all( '/^#\s*Author:\s*(.*)$/m', $data, $matches ); |
63 | $authors = $matches[1]; |
64 | |
65 | # Then messages and everything else |
66 | $parsedData = $this->parseGettext( $data ); |
67 | $parsedData['AUTHORS'] = $authors; |
68 | |
69 | foreach ( $parsedData['MESSAGES'] as $key => $value ) { |
70 | if ( $value === '' ) { |
71 | unset( $parsedData['MESSAGES'][$key] ); |
72 | } |
73 | } |
74 | |
75 | return $parsedData; |
76 | } |
77 | |
78 | private function parseGettext( string $data ): array { |
79 | $mangler = $this->group->getMangler(); |
80 | $useCtxtAsKey = $this->extra['CtxtAsKey'] ?? false; |
81 | $keyAlgorithm = 'simple'; |
82 | if ( isset( $this->extra['keyAlgorithm'] ) ) { |
83 | $keyAlgorithm = $this->extra['keyAlgorithm']; |
84 | } |
85 | |
86 | $potmode = false; |
87 | |
88 | // Normalise newlines, to make processing easier |
89 | $data = str_replace( "\r\n", "\n", $data ); |
90 | |
91 | /* Delimit the file into sections, which are separated by two newlines. |
92 | * We are permissive and accept more than two. This parsing method isn't |
93 | * efficient wrt memory, but was easy to implement */ |
94 | $sections = preg_split( '/\n{2,}/', $data ); |
95 | |
96 | /* First one isn't an actual message. We'll handle it specially below */ |
97 | $headerSection = array_shift( $sections ); |
98 | /* Since this is the header section, we are only interested in the tags |
99 | * and msgid is empty. Somewhere we should extract the header comments |
100 | * too */ |
101 | $match = $this->expectKeyword( 'msgstr', $headerSection ); |
102 | if ( $match !== null ) { |
103 | $headerBlock = $this->formatForWiki( $match, 'trim' ); |
104 | $headers = $this->parseHeaderTags( $headerBlock ); |
105 | |
106 | // Check for pot-mode by checking if the header is fuzzy |
107 | $flags = $this->parseFlags( $headerSection ); |
108 | if ( in_array( 'fuzzy', $flags, true ) ) { |
109 | $potmode = $this->allowPotMode; |
110 | } |
111 | } else { |
112 | $message = "Gettext file header was not found:\n\n$data"; |
113 | throw new GettextParseException( $message ); |
114 | } |
115 | |
116 | $template = []; |
117 | $messages = []; |
118 | |
119 | // Extract some metadata from headers for easier use |
120 | $metadata = []; |
121 | if ( isset( $headers['X-Language-Code'] ) ) { |
122 | $metadata['code'] = $headers['X-Language-Code']; |
123 | } |
124 | |
125 | if ( isset( $headers['X-Message-Group'] ) ) { |
126 | $metadata['group'] = $headers['X-Message-Group']; |
127 | } |
128 | |
129 | /* At this stage we are only interested how many plurals forms we should |
130 | * be expecting when parsing the rest of this file. */ |
131 | $pluralCount = null; |
132 | if ( $potmode ) { |
133 | $pluralCount = 2; |
134 | } elseif ( isset( $headers['Plural-Forms'] ) ) { |
135 | $pluralCount = $metadata['plural'] = GettextPlural::getPluralCount( $headers['Plural-Forms'] ); |
136 | } |
137 | |
138 | $metadata['plural'] = $pluralCount; |
139 | |
140 | // Then parse the messages |
141 | foreach ( $sections as $section ) { |
142 | $item = $this->parseGettextSection( $section, $pluralCount ); |
143 | if ( $item === null ) { |
144 | continue; |
145 | } |
146 | |
147 | if ( $useCtxtAsKey ) { |
148 | if ( !isset( $item['ctxt'] ) ) { |
149 | error_log( "ctxt missing for: $section" ); |
150 | continue; |
151 | } |
152 | $key = $item['ctxt']; |
153 | } else { |
154 | $key = $this->generateKeyFromItem( $item, $keyAlgorithm ); |
155 | } |
156 | |
157 | $key = $mangler->mangle( $key ); |
158 | $messages[$key] = $potmode ? $item['id'] : $item['str']; |
159 | $template[$key] = $item; |
160 | } |
161 | |
162 | return [ |
163 | 'MESSAGES' => $messages, |
164 | 'EXTRA' => [ |
165 | 'TEMPLATE' => $template, |
166 | 'METADATA' => $metadata, |
167 | 'HEADERS' => $headers, |
168 | ], |
169 | ]; |
170 | } |
171 | |
172 | private function parseGettextSection( string $section, ?int $pluralCount ): ?array { |
173 | if ( trim( $section ) === '' ) { |
174 | return null; |
175 | } |
176 | |
177 | /* These inactive sections are of no interest to us. Multiline mode |
178 | * is needed because there may be flags or other annoying stuff |
179 | * before the commented out sections. |
180 | */ |
181 | if ( preg_match( '/^#~/m', $section ) ) { |
182 | return null; |
183 | } |
184 | |
185 | $item = [ |
186 | 'ctxt' => false, |
187 | 'id' => '', |
188 | 'str' => '', |
189 | 'flags' => [], |
190 | 'comments' => [], |
191 | ]; |
192 | |
193 | $match = $this->expectKeyword( 'msgid', $section ); |
194 | if ( $match !== null ) { |
195 | $item['id'] = $this->formatForWiki( $match ); |
196 | } else { |
197 | throw new RuntimeException( "Unable to parse msgid:\n\n$section" ); |
198 | } |
199 | |
200 | $match = $this->expectKeyword( 'msgctxt', $section ); |
201 | if ( $match !== null ) { |
202 | $item['ctxt'] = $this->formatForWiki( $match ); |
203 | } |
204 | |
205 | $pluralMessage = false; |
206 | $match = $this->expectKeyword( 'msgid_plural', $section ); |
207 | if ( $match !== null ) { |
208 | $pluralMessage = true; |
209 | $plural = $this->formatForWiki( $match ); |
210 | $item['id'] = GettextPlural::flatten( [ $item['id'], $plural ] ); |
211 | } |
212 | |
213 | if ( $pluralMessage ) { |
214 | $pluralMessageText = $this->processGettextPluralMessage( $pluralCount, $section ); |
215 | |
216 | // Keep the translation empty if no form has translation |
217 | if ( $pluralMessageText !== '' ) { |
218 | $item['str'] = $pluralMessageText; |
219 | } |
220 | } else { |
221 | $match = $this->expectKeyword( 'msgstr', $section ); |
222 | if ( $match !== null ) { |
223 | $item['str'] = $this->formatForWiki( $match ); |
224 | } else { |
225 | throw new RuntimeException( "Unable to parse msgstr:\n\n$section" ); |
226 | } |
227 | } |
228 | |
229 | // Parse flags |
230 | $flags = $this->parseFlags( $section ); |
231 | foreach ( $flags as $key => $flag ) { |
232 | if ( $flag === 'fuzzy' ) { |
233 | $item['str'] = TRANSLATE_FUZZY . $item['str']; |
234 | unset( $flags[$key] ); |
235 | } |
236 | } |
237 | $item['flags'] = $flags; |
238 | |
239 | // Rest of the comments |
240 | $matches = []; |
241 | if ( preg_match_all( '/^#(.?) (.*)$/m', $section, $matches, PREG_SET_ORDER ) ) { |
242 | foreach ( $matches as $match ) { |
243 | if ( $match[1] !== ',' && !str_starts_with( $match[1], '[Wiki]' ) ) { |
244 | $item['comments'][$match[1]][] = $match[2]; |
245 | } |
246 | } |
247 | } |
248 | |
249 | return $item; |
250 | } |
251 | |
252 | private function processGettextPluralMessage( ?int $pluralCount, string $section ): string { |
253 | $actualForms = []; |
254 | |
255 | for ( $i = 0; $i < $pluralCount; $i++ ) { |
256 | $match = $this->expectKeyword( "msgstr\\[$i\\]", $section ); |
257 | |
258 | if ( $match !== null ) { |
259 | $actualForms[] = $this->formatForWiki( $match ); |
260 | } else { |
261 | $actualForms[] = ''; |
262 | error_log( "Plural $i not found, expecting total of $pluralCount for $section" ); |
263 | } |
264 | } |
265 | |
266 | if ( array_sum( array_map( 'strlen', $actualForms ) ) > 0 ) { |
267 | return GettextPlural::flatten( $actualForms ); |
268 | } else { |
269 | return ''; |
270 | } |
271 | } |
272 | |
273 | private function parseFlags( string $section ): array { |
274 | $matches = []; |
275 | if ( preg_match( '/^#,(.*)$/mu', $section, $matches ) ) { |
276 | return array_map( 'trim', explode( ',', $matches[1] ) ); |
277 | } else { |
278 | return []; |
279 | } |
280 | } |
281 | |
282 | private function expectKeyword( string $name, string $section ): ?string { |
283 | /* Catches the multiline textblock that comes after keywords msgid, |
284 | * msgstr, msgid_plural, msgctxt. |
285 | */ |
286 | $poformat = '".*"\n?(^".*"$\n?)*'; |
287 | |
288 | $matches = []; |
289 | if ( preg_match( "/^$name\s($poformat)/mx", $section, $matches ) ) { |
290 | return $matches[1]; |
291 | } else { |
292 | return null; |
293 | } |
294 | } |
295 | |
296 | /** |
297 | * Generates unique key for each message. Changing this WILL BREAK ALL |
298 | * existing pages! |
299 | * @param array $item As returned by parseGettextSection |
300 | * @param string $algorithm Algorithm used to generate message keys: simple or legacy |
301 | */ |
302 | public function generateKeyFromItem( array $item, string $algorithm = 'simple' ): string { |
303 | $lang = MediaWikiServices::getInstance()->getLanguageFactory()->getLanguage( 'en' ); |
304 | |
305 | if ( $item['ctxt'] === '' ) { |
306 | /* Messages with msgctxt as empty string should be different |
307 | * from messages without any msgctxt. To avoid BC break make |
308 | * the empty ctxt a special case */ |
309 | $hash = sha1( $item['id'] . 'MSGEMPTYCTXT' ); |
310 | } else { |
311 | $hash = sha1( $item['ctxt'] . $item['id'] ); |
312 | } |
313 | |
314 | if ( $algorithm === 'simple' ) { |
315 | $hash = substr( $hash, 0, 6 ); |
316 | $snippet = $lang->truncateForDatabase( $item['id'], 30, '' ); |
317 | $snippet = str_replace( ' ', '_', trim( $snippet ) ); |
318 | } else { // legacy |
319 | $legalChars = Title::legalChars(); |
320 | $snippet = $item['id']; |
321 | $snippet = preg_replace( "/[^$legalChars]/", ' ', $snippet ); |
322 | $snippet = preg_replace( '/[:&%\/_]/', ' ', $snippet ); |
323 | $snippet = preg_replace( '/ {2,}/', ' ', $snippet ); |
324 | $snippet = $lang->truncateForDatabase( $snippet, 30, '' ); |
325 | $snippet = str_replace( ' ', '_', trim( $snippet ) ); |
326 | } |
327 | |
328 | return "$hash-$snippet"; |
329 | } |
330 | |
331 | /** |
332 | * This method processes the gettext text block format. |
333 | */ |
334 | private function processData( string $data ): string { |
335 | $quotePattern = '/(^"|"$\n?)/m'; |
336 | $data = preg_replace( $quotePattern, '', $data ); |
337 | return stripcslashes( $data ); |
338 | } |
339 | |
340 | /** |
341 | * This method handles the whitespace at the end of the data. |
342 | * @throws InvalidArgumentException |
343 | */ |
344 | private function handleWhitespace( string $data, string $whitespace ): string { |
345 | if ( preg_match( '/\s$/', $data ) ) { |
346 | if ( $whitespace === 'mark' ) { |
347 | $data .= '\\'; |
348 | } elseif ( $whitespace === 'trim' ) { |
349 | $data = rtrim( $data ); |
350 | } else { |
351 | // This condition will never happen as long as $whitespace is 'mark' or 'trim' |
352 | throw new InvalidArgumentException( "Unknown action for whitespace: $whitespace" ); |
353 | } |
354 | } |
355 | |
356 | return $data; |
357 | } |
358 | |
359 | /** |
360 | * This parses the Gettext text block format. Since trailing whitespace is |
361 | * not allowed in MediaWiki pages, the default action is to append |
362 | * \-character at the end of the message. You can also choose to ignore it |
363 | * and use the trim action instead. |
364 | */ |
365 | private function formatForWiki( string $data, string $whitespace = 'mark' ): string { |
366 | $data = $this->processData( $data ); |
367 | return $this->handleWhitespace( $data, $whitespace ); |
368 | } |
369 | |
370 | private function parseHeaderTags( string $headers ): array { |
371 | $tags = []; |
372 | foreach ( explode( "\n", $headers ) as $line ) { |
373 | if ( !str_contains( $line, ':' ) ) { |
374 | error_log( __METHOD__ . ": $line" ); |
375 | } |
376 | [ $key, $value ] = explode( ':', $line, 2 ); |
377 | $tags[trim( $key )] = trim( $value ); |
378 | } |
379 | |
380 | return $tags; |
381 | } |
382 | |
383 | protected function writeReal( MessageCollection $collection ): string { |
384 | // FIXME: this should be the source language |
385 | $pot = $this->read( 'en' ) ?? []; |
386 | $code = $collection->code; |
387 | $template = $this->read( $code ) ?? []; |
388 | $output = $this->doGettextHeader( $collection, $template['EXTRA'] ?? [] ); |
389 | |
390 | $pluralRule = GettextPlural::getPluralRule( $code ); |
391 | if ( !$pluralRule ) { |
392 | $pluralRule = GettextPlural::getPluralRule( 'en' ); |
393 | LoggerFactory::getInstance( LogNames::MAIN )->warning( |
394 | "T235180: Missing Gettext plural rule for '{languagecode}'", |
395 | [ 'languagecode' => $code ] |
396 | ); |
397 | } |
398 | $pluralCount = GettextPlural::getPluralCount( $pluralRule ); |
399 | |
400 | $documentationLanguageCode = MediaWikiServices::getInstance() |
401 | ->getMainConfig() |
402 | ->get( 'TranslateDocumentationLanguageCode' ); |
403 | $documentationCollection = null; |
404 | if ( is_string( $documentationLanguageCode ) ) { |
405 | $documentationCollection = clone $collection; |
406 | $documentationCollection->resetForNewLanguage( $documentationLanguageCode ); |
407 | $documentationCollection->loadTranslations(); |
408 | } |
409 | |
410 | /** @var Message $m */ |
411 | foreach ( $collection as $key => $m ) { |
412 | $transTemplate = $template['EXTRA']['TEMPLATE'][$key] ?? []; |
413 | $potTemplate = $pot['EXTRA']['TEMPLATE'][$key] ?? []; |
414 | $documentation = isset( $documentationCollection[$key] ) ? |
415 | $documentationCollection[$key]->translation() : null; |
416 | |
417 | $output .= $this->formatMessageBlock( |
418 | $key, |
419 | $m, |
420 | $transTemplate, |
421 | $potTemplate, |
422 | $pluralCount, |
423 | $documentation |
424 | ); |
425 | } |
426 | |
427 | return $output; |
428 | } |
429 | |
430 | private function doGettextHeader( MessageCollection $collection, array $template ): string { |
431 | global $wgSitename; |
432 | |
433 | $code = $collection->code; |
434 | $name = Utilities::getLanguageName( $code ); |
435 | $native = Utilities::getLanguageName( $code, $code ); |
436 | $authors = $this->doAuthors( $collection ); |
437 | if ( isset( $this->extra['header'] ) ) { |
438 | $extra = "# --\n" . $this->extra['header']; |
439 | } else { |
440 | $extra = ''; |
441 | } |
442 | |
443 | $group = $this->getGroup(); |
444 | $output = |
445 | <<<EOT |
446 | # Translation of {$group->getLabel()} to $name ($native) |
447 | # Exported from $wgSitename |
448 | # |
449 | $authors$extra |
450 | EOT; |
451 | |
452 | // Make sure there is no empty line before msgid |
453 | $output = trim( $output ) . "\n"; |
454 | |
455 | $specs = $template['HEADERS'] ?? []; |
456 | |
457 | $timestamp = wfTimestampNow(); |
458 | $specs['PO-Revision-Date'] = $this->formatTime( $timestamp ); |
459 | if ( $this->offlineMode ) { |
460 | $specs['POT-Creation-Date'] = $this->formatTime( $timestamp ); |
461 | } else { |
462 | $specs['X-POT-Import-Date'] = $this->formatTime( wfTimestamp( TS_MW, $this->getPotTime() ) ); |
463 | } |
464 | $specs['Content-Type'] = 'text/plain; charset=UTF-8'; |
465 | $specs['Content-Transfer-Encoding'] = '8bit'; |
466 | |
467 | $specs['Language'] = LanguageCode::bcp47( $this->group->mapCode( $code ) ); |
468 | |
469 | Services::getInstance()->getHookRunner()->onTranslate_GettextFormat_headerFields( |
470 | $specs, |
471 | $this->group, |
472 | $code |
473 | ); |
474 | |
475 | $specs['X-Generator'] = 'MediaWiki ' |
476 | . SpecialVersion::getVersion() |
477 | . '; Translate ' |
478 | . Utilities::getVersion(); |
479 | |
480 | if ( $this->offlineMode ) { |
481 | $specs['X-Language-Code'] = $code; |
482 | $specs['X-Message-Group'] = $group->getId(); |
483 | } |
484 | |
485 | $specs['Plural-Forms'] = GettextPlural::getPluralRule( $code ) |
486 | ?: GettextPlural::getPluralRule( 'en' ); |
487 | |
488 | $output .= 'msgid ""' . "\n"; |
489 | $output .= 'msgstr ""' . "\n"; |
490 | $output .= '""' . "\n"; |
491 | |
492 | foreach ( $specs as $k => $v ) { |
493 | $output .= $this->escape( "$k: $v\n" ) . "\n"; |
494 | } |
495 | |
496 | $output .= "\n"; |
497 | |
498 | return $output; |
499 | } |
500 | |
501 | private function doAuthors( MessageCollection $collection ): string { |
502 | $output = ''; |
503 | $authors = $collection->getAuthors(); |
504 | $authors = $this->filterAuthors( $authors, $collection->code ); |
505 | |
506 | foreach ( $authors as $author ) { |
507 | $output .= "# Author: $author\n"; |
508 | } |
509 | |
510 | return $output; |
511 | } |
512 | |
513 | private function formatMessageBlock( |
514 | string $key, |
515 | Message $message, |
516 | array $trans, |
517 | array $pot, |
518 | int $pluralCount, |
519 | ?string $documentation |
520 | ): string { |
521 | $header = $this->formatDocumentation( $documentation ); |
522 | $content = ''; |
523 | |
524 | $comments = $pot['comments'] ?? $trans['comments'] ?? []; |
525 | foreach ( $comments as $type => $typecomments ) { |
526 | foreach ( $typecomments as $comment ) { |
527 | $header .= "#$type $comment\n"; |
528 | } |
529 | } |
530 | |
531 | $flags = $pot['flags'] ?? $trans['flags'] ?? []; |
532 | $flags = array_merge( $message->getTags(), $flags ); |
533 | |
534 | if ( $this->offlineMode ) { |
535 | $content .= 'msgctxt ' . $this->escape( $key ) . "\n"; |
536 | } else { |
537 | $ctxt = $pot['ctxt'] ?? $trans['ctxt'] ?? false; |
538 | if ( $ctxt !== false ) { |
539 | $content .= 'msgctxt ' . $this->escape( $ctxt ) . "\n"; |
540 | } |
541 | } |
542 | |
543 | $msgid = $message->definition(); |
544 | $msgstr = $message->translation() ?? ''; |
545 | if ( strpos( $msgstr, TRANSLATE_FUZZY ) !== false ) { |
546 | $msgstr = str_replace( TRANSLATE_FUZZY, '', $msgstr ); |
547 | // Might be fuzzy infile |
548 | $flags[] = 'fuzzy'; |
549 | } |
550 | |
551 | if ( GettextPlural::hasPlural( $msgid ) ) { |
552 | $forms = GettextPlural::unflatten( $msgid, 2 ); |
553 | $content .= 'msgid ' . $this->escape( $forms[0] ) . "\n"; |
554 | $content .= 'msgid_plural ' . $this->escape( $forms[1] ) . "\n"; |
555 | |
556 | try { |
557 | $forms = GettextPlural::unflatten( $msgstr, $pluralCount ); |
558 | foreach ( $forms as $index => $form ) { |
559 | $content .= "msgstr[$index] " . $this->escape( $form ) . "\n"; |
560 | } |
561 | } catch ( GettextPluralException $e ) { |
562 | $flags[] = 'invalid-plural'; |
563 | for ( $i = 0; $i < $pluralCount; $i++ ) { |
564 | $content .= "msgstr[$i] \"\"\n"; |
565 | } |
566 | } |
567 | } else { |
568 | $content .= 'msgid ' . $this->escape( $msgid ) . "\n"; |
569 | $content .= 'msgstr ' . $this->escape( $msgstr ) . "\n"; |
570 | } |
571 | |
572 | if ( $flags ) { |
573 | sort( $flags ); |
574 | $header .= '#, ' . implode( ', ', array_unique( $flags ) ) . "\n"; |
575 | } |
576 | |
577 | $output = $header ?: "#\n"; |
578 | $output .= $content . "\n"; |
579 | |
580 | return $output; |
581 | } |
582 | |
583 | private function formatTime( string $time ): string { |
584 | $lang = MediaWikiServices::getInstance()->getLanguageFactory()->getLanguage( 'en' ); |
585 | |
586 | return $lang->sprintfDate( 'xnY-xnm-xnd xnH:xni:xns+0000', $time ); |
587 | } |
588 | |
589 | private function getPotTime(): string { |
590 | $cache = $this->group->getMessageGroupCache( $this->group->getSourceLanguage() ); |
591 | |
592 | return $cache->exists() ? $cache->getTimestamp() : wfTimestampNow(); |
593 | } |
594 | |
595 | private function formatDocumentation( ?string $documentation ): string { |
596 | if ( !is_string( $documentation ) ) { |
597 | return ''; |
598 | } |
599 | |
600 | if ( !$this->offlineMode ) { |
601 | return ''; |
602 | } |
603 | |
604 | $lines = explode( "\n", $documentation ); |
605 | $out = ''; |
606 | foreach ( $lines as $line ) { |
607 | $out .= "#. [Wiki] $line\n"; |
608 | } |
609 | |
610 | return $out; |
611 | } |
612 | |
613 | private function escape( string $line ): string { |
614 | // There may be \ as a last character, for keeping trailing whitespace |
615 | $line = preg_replace( '/(\s)\\\\$/', '\1', $line ); |
616 | $line = addcslashes( $line, '\\"' ); |
617 | $line = str_replace( "\n", '\n', $line ); |
618 | return '"' . $line . '"'; |
619 | } |
620 | |
621 | public function shouldOverwrite( string $a, string $b ): bool { |
622 | $regex = '/^"(.+)-Date: \d\d\d\d-\d\d-\d\d \d\d:\d\d:\d\d\+\d\d\d\d\\\\n"$/m'; |
623 | |
624 | $a = preg_replace( $regex, '', $a ); |
625 | $b = preg_replace( $regex, '', $b ); |
626 | |
627 | return $a !== $b; |
628 | } |
629 | |
630 | public static function getExtraSchema(): array { |
631 | return [ |
632 | 'root' => [ |
633 | '_type' => 'array', |
634 | '_children' => [ |
635 | 'FILES' => [ |
636 | '_type' => 'array', |
637 | '_children' => [ |
638 | 'header' => [ |
639 | '_type' => 'text', |
640 | ], |
641 | 'keyAlgorithm' => [ |
642 | '_type' => 'enum', |
643 | '_values' => [ 'simple', 'legacy' ], |
644 | ], |
645 | 'CtxtAsKey' => [ |
646 | '_type' => 'boolean', |
647 | ], |
648 | ] |
649 | ] |
650 | ] |
651 | ] |
652 | ]; |
653 | } |
654 | |
655 | public function isContentEqual( ?string $a, ?string $b ): bool { |
656 | if ( $a === $b ) { |
657 | return true; |
658 | } |
659 | |
660 | if ( $a === null || $b === null ) { |
661 | return false; |
662 | } |
663 | |
664 | try { |
665 | $parsedA = GettextPlural::parsePluralForms( $a ); |
666 | $parsedB = GettextPlural::parsePluralForms( $b ); |
667 | |
668 | // if they have the different number of plural forms, just fail |
669 | if ( count( $parsedA[1] ) !== count( $parsedB[1] ) ) { |
670 | return false; |
671 | } |
672 | |
673 | } catch ( GettextPluralException $e ) { |
674 | // Something failed, invalid syntax? |
675 | return false; |
676 | } |
677 | |
678 | $expectedPluralCount = count( $parsedA[1] ); |
679 | |
680 | // GettextPlural::unflatten() will return an empty array when $expectedPluralCount is 0 |
681 | // So if they do not have translations and are different strings, they are not equal |
682 | if ( $expectedPluralCount === 0 ) { |
683 | return false; |
684 | } |
685 | |
686 | return GettextPlural::unflatten( $a, $expectedPluralCount ) |
687 | === GettextPlural::unflatten( $b, $expectedPluralCount ); |
688 | } |
689 | } |
690 | |
691 | class_alias( GettextFormat::class, 'GettextFFS' ); |