25 private $allowPotMode =
false;
26 protected $offlineMode =
false;
33 return [
'.pot',
'.po' ];
38 $this->offlineMode = $value;
42 public function read( $code ) {
45 $this->allowPotMode = $this->
getGroup()->getSourceLanguage() === $code;
48 return parent::read( $code );
50 $this->allowPotMode =
false;
61 preg_match_all(
'/^#\s*Author:\s*(.*)$/m', $data, $matches );
62 $authors = $matches[1];
64 # Then messages and everything else
65 $parsedData = $this->parseGettext( $data );
66 $parsedData[
'AUTHORS'] = $authors;
68 foreach ( $parsedData[
'MESSAGES'] as $key => $value ) {
69 if ( $value ===
'' ) {
70 unset( $parsedData[
'MESSAGES'][$key] );
77 public function parseGettext( $data ) {
78 $mangler = $this->group->getMangler();
79 $useCtxtAsKey = $this->extra[
'CtxtAsKey'] ??
false;
80 $keyAlgorithm =
'simple';
81 if ( isset( $this->extra[
'keyAlgorithm'] ) ) {
82 $keyAlgorithm = $this->extra[
'keyAlgorithm'];
85 return self::parseGettextData( $data, $useCtxtAsKey, $mangler, $keyAlgorithm, $this->allowPotMode );
109 $data = str_replace(
"\r\n",
"\n", $data );
114 $sections = preg_split(
'/\n{2,}/', $data );
117 $headerSection = array_shift( $sections );
121 $match = self::expectKeyword(
'msgstr', $headerSection );
122 if ( $match !==
null ) {
123 $headerBlock = self::formatForWiki( $match,
'trim' );
124 $headers = self::parseHeaderTags( $headerBlock );
127 $flags = self::parseFlags( $headerSection );
128 if ( in_array(
'fuzzy', $flags,
true ) ) {
129 $potmode = $allowPotMode;
132 $message =
"Gettext file header was not found:\n\n$data";
141 if ( isset( $headers[
'X-Language-Code'] ) ) {
142 $metadata[
'code'] = $headers[
'X-Language-Code'];
145 if ( isset( $headers[
'X-Message-Group'] ) ) {
146 $metadata[
'group'] = $headers[
'X-Message-Group'];
151 $pluralCount =
false;
154 } elseif ( isset( $headers[
'Plural-Forms'] ) ) {
155 $pluralCount = $metadata[
'plural'] = GettextPlural::getPluralCount( $headers[
'Plural-Forms'] );
158 $metadata[
'plural'] = $pluralCount;
161 foreach ( $sections as $section ) {
162 $item = self::parseGettextSection( $section, $pluralCount );
163 if ( $item ===
false ) {
167 if ( $useCtxtAsKey ) {
168 if ( !isset( $item[
'ctxt'] ) ) {
169 error_log(
"ctxt missing for: $section" );
172 $key = $item[
'ctxt'];
174 $key = self::generateKeyFromItem( $item, $keyAlgorithm );
177 $key = $mangler->
mangle( $key );
178 $messages[$key] = $potmode ? $item[
'id'] : $item[
'str'];
179 $template[$key] = $item;
183 'MESSAGES' => $messages,
185 'TEMPLATE' => $template,
186 'METADATA' => $metadata,
187 'HEADERS' => $headers,
192 public static function parseGettextSection( $section, $pluralCount ) {
193 if ( trim( $section ) ===
'' ) {
201 if ( preg_match(
'/^#~/m', $section ) ) {
213 $match = self::expectKeyword(
'msgid', $section );
214 if ( $match !==
null ) {
215 $item[
'id'] = self::formatForWiki( $match );
217 throw new MWException(
"Unable to parse msgid:\n\n$section" );
220 $match = self::expectKeyword(
'msgctxt', $section );
221 if ( $match !==
null ) {
222 $item[
'ctxt'] = self::formatForWiki( $match );
225 $pluralMessage =
false;
226 $match = self::expectKeyword(
'msgid_plural', $section );
227 if ( $match !==
null ) {
228 $pluralMessage =
true;
229 $plural = self::formatForWiki( $match );
230 $item[
'id'] = GettextPlural::flatten( [ $item[
'id'], $plural ] );
233 if ( $pluralMessage ) {
234 $pluralMessageText = self::processGettextPluralMessage( $pluralCount, $section );
237 if ( $pluralMessageText !==
'' ) {
238 $item[
'str'] = $pluralMessageText;
241 $match = self::expectKeyword(
'msgstr', $section );
242 if ( $match !==
null ) {
243 $item[
'str'] = self::formatForWiki( $match );
245 throw new MWException(
"Unable to parse msgstr:\n\n$section" );
250 $flags = self::parseFlags( $section );
251 foreach ( $flags as $key => $flag ) {
252 if ( $flag ===
'fuzzy' ) {
253 $item[
'str'] = TRANSLATE_FUZZY . $item[
'str'];
254 unset( $flags[$key] );
257 $item[
'flags'] = $flags;
261 if ( preg_match_all(
'/^#(.?) (.*)$/m', $section, $matches, PREG_SET_ORDER ) ) {
262 foreach ( $matches as $match ) {
263 if ( $match[1] !==
',' && strpos( $match[1],
'[Wiki]' ) !== 0 ) {
264 $item[
'comments'][$match[1]][] = $match[2];
272 public static function processGettextPluralMessage( $pluralCount, $section ) {
275 for ( $i = 0; $i < $pluralCount; $i++ ) {
276 $match = self::expectKeyword(
"msgstr\\[$i\\]", $section );
278 if ( $match !==
null ) {
279 $actualForms[] = self::formatForWiki( $match );
282 error_log(
"Plural $i not found, expecting total of $pluralCount for $section" );
286 if ( array_sum( array_map(
'strlen', $actualForms ) ) > 0 ) {
287 return GettextPlural::flatten( $actualForms );
293 public static function parseFlags( $section ) {
295 if ( preg_match(
'/^#,(.*)$/mu', $section, $matches ) ) {
296 return array_map(
'trim', explode(
',', $matches[1] ) );
302 public static function expectKeyword( $name, $section ) {
306 $poformat =
'".*"\n?(^".*"$\n?)*';
309 if ( preg_match(
"/^$name\s($poformat)/mx", $section, $matches ) ) {
324 $lang = MediaWikiServices::getInstance()->getLanguageFactory()->getLanguage(
'en' );
326 if ( $item[
'ctxt'] ===
'' ) {
330 $hash = sha1( $item[
'id'] .
'MSGEMPTYCTXT' );
332 $hash = sha1( $item[
'ctxt'] . $item[
'id'] );
335 if ( $algorithm ===
'simple' ) {
336 $hash = substr( $hash, 0, 6 );
337 $snippet = $lang->truncateForDatabase( $item[
'id'], 30,
'' );
338 $snippet = str_replace(
' ',
'_', trim( $snippet ) );
340 $legalChars = Title::legalChars();
341 $snippet = $item[
'id'];
342 $snippet = preg_replace(
"/[^$legalChars]/",
' ', $snippet );
343 $snippet = preg_replace(
"/[:&%\/_]/",
' ', $snippet );
344 $snippet = preg_replace(
'/ {2,}/',
' ', $snippet );
345 $snippet = $lang->truncateForDatabase( $snippet, 30,
'' );
346 $snippet = str_replace(
' ',
'_', trim( $snippet ) );
349 return "$hash-$snippet";
363 $quotePattern =
'/(^"|"$\n?)/m';
364 $data = preg_replace( $quotePattern,
'', $data );
365 $data = stripcslashes( $data );
367 if ( preg_match(
'/\s$/', $data ) ) {
368 if ( $whitespace ===
'mark' ) {
370 } elseif ( $whitespace ===
'trim' ) {
371 $data = rtrim( $data );
374 throw new MWException(
'Unknown action for whitespace' );
381 public static function parseHeaderTags( $headers ) {
383 foreach ( explode(
"\n", $headers ) as $line ) {
384 if ( strpos( $line,
':' ) ===
false ) {
385 error_log( __METHOD__ .
": $line" );
387 [ $key, $value ] = explode(
':', $line, 2 );
388 $tags[trim( $key )] = trim( $value );
396 $pot = $this->
read(
'en' ) ?? [];
397 $code = $collection->code;
398 $template = $this->
read( $code ) ?? [];
399 $output = $this->doGettextHeader( $collection, $template[
'EXTRA'] ?? [] );
401 $pluralRule = GettextPlural::getPluralRule( $code );
402 if ( !$pluralRule ) {
403 $pluralRule = GettextPlural::getPluralRule(
'en' );
404 LoggerFactory::getInstance(
'Translate' )->warning(
405 "T235180: Missing Gettext plural rule for '{languagecode}'",
406 [
'languagecode' => $code ]
409 $pluralCount = GettextPlural::getPluralCount( $pluralRule );
412 foreach ( $collection as $key => $m ) {
413 $transTemplate = $template[
'EXTRA'][
'TEMPLATE'][$key] ?? [];
414 $potTemplate = $pot[
'EXTRA'][
'TEMPLATE'][$key] ?? [];
416 $output .= $this->formatMessageBlock( $key, $m, $transTemplate, $potTemplate, $pluralCount );
425 $code = $collection->code;
426 $name = Utilities::getLanguageName( $code );
427 $native = Utilities::getLanguageName( $code, $code );
428 $authors = $this->doAuthors( $collection );
429 if ( isset( $this->extra[
'header'] ) ) {
430 $extra =
"# --\n" . $this->extra[
'header'];
437 # Translation of {$this->group->getLabel()} to $name ($native)
438 # Exported from $wgSitename
444 $output = trim( $output ) .
"\n";
446 $specs = $template[
'HEADERS'] ?? [];
448 $timestamp = wfTimestampNow();
449 $specs[
'PO-Revision-Date'] = self::formatTime( $timestamp );
450 if ( $this->offlineMode ) {
451 $specs[
'POT-Creation-Date'] = self::formatTime( $timestamp );
453 $specs[
'X-POT-Import-Date'] = self::formatTime( wfTimestamp( TS_MW, $this->getPotTime() ) );
455 $specs[
'Content-Type'] =
'text/plain; charset=UTF-8';
456 $specs[
'Content-Transfer-Encoding'] =
'8bit';
457 $specs[
'Language'] = LanguageCode::bcp47( $this->group->mapCode( $code ) );
458 Hooks::run(
'Translate:GettextFFS:headerFields', [ &$specs, $this->group, $code ] );
459 $specs[
'X-Generator'] = $this->getGenerator();
461 if ( $this->offlineMode ) {
462 $specs[
'X-Language-Code'] = $code;
463 $specs[
'X-Message-Group'] = $this->group->getId();
466 $specs[
'Plural-Forms'] = GettextPlural::getPluralRule( $code )
467 ?: GettextPlural::getPluralRule(
'en' );
469 $output .=
'msgid ""' .
"\n";
470 $output .=
'msgstr ""' .
"\n";
471 $output .=
'""' .
"\n";
473 foreach ( $specs as $k => $v ) {
474 $output .= self::escape(
"$k: $v\n" ) .
"\n";
485 $authors = $this->
filterAuthors( $authors, $collection->code );
487 foreach ( $authors as $author ) {
488 $output .=
"# Author: $author\n";
502 protected function formatMessageBlock( $key, $m, $trans, $pot, $pluralCount ) {
503 $header = $this->formatDocumentation( $key );
506 $comments = self::chainGetter(
'comments', $pot, $trans, [] );
507 foreach ( $comments as $type => $typecomments ) {
508 foreach ( $typecomments as $comment ) {
509 $header .=
"#$type $comment\n";
513 $flags = self::chainGetter(
'flags', $pot, $trans, [] );
514 $flags = array_merge( $m->getTags(), $flags );
516 if ( $this->offlineMode ) {
517 $content .=
'msgctxt ' . self::escape( $key ) .
"\n";
519 $ctxt = self::chainGetter(
'ctxt', $pot, $trans,
false );
520 if ( $ctxt !==
false ) {
521 $content .=
'msgctxt ' . self::escape( $ctxt ) .
"\n";
525 $msgid = $m->definition();
526 $msgstr = $m->translation();
527 if ( strpos( $msgstr, TRANSLATE_FUZZY ) !==
false ) {
528 $msgstr = str_replace( TRANSLATE_FUZZY,
'', $msgstr );
533 if ( GettextPlural::hasPlural( $msgid ) ) {
534 $forms = GettextPlural::unflatten( $msgid, 2 );
535 $content .=
'msgid ' . self::escape( $forms[0] ) .
"\n";
536 $content .=
'msgid_plural ' . self::escape( $forms[1] ) .
"\n";
539 $forms = GettextPlural::unflatten( $msgstr, $pluralCount );
540 foreach ( $forms as $index => $form ) {
541 $content .=
"msgstr[$index] " . self::escape( $form ) .
"\n";
544 $flags[] =
'invalid-plural';
545 for ( $i = 0; $i < $pluralCount; $i++ ) {
546 $content .=
"msgstr[$i] \"\"\n";
550 $content .=
'msgid ' . self::escape( $msgid ) .
"\n";
551 $content .=
'msgstr ' . self::escape( $msgstr ) .
"\n";
556 $header .=
'#, ' . implode(
', ', array_unique( $flags ) ) .
"\n";
559 $output = $header ?:
"#\n";
560 $output .= $content .
"\n";
572 protected static function chainGetter( $key, $a, $b, $default ) {
573 return $a[$key] ?? $b[$key] ?? $default;
576 protected static function formatTime( $time ) {
577 $lang = MediaWikiServices::getInstance()->getLanguageFactory()->getLanguage(
'en' );
579 return $lang->sprintfDate(
'xnY-xnm-xnd xnH:xni:xns+0000', $time );
582 protected function getPotTime() {
583 $cache = $this->group->getMessageGroupCache( $this->group->getSourceLanguage() );
585 return $cache->exists() ? $cache->getTimestamp() : wfTimestampNow();
588 protected function getGenerator() {
589 return 'MediaWiki ' . SpecialVersion::getVersion() .
590 '; Translate ' . Utilities::getVersion();
593 protected function formatDocumentation( $key ) {
594 global $wgTranslateDocumentationLanguageCode;
596 if ( !$this->offlineMode ) {
600 $code = $wgTranslateDocumentationLanguageCode;
605 $documentation = Utilities::getMessageContent( $key, $code, $this->group->getNamespace() );
606 if ( !is_string( $documentation ) ) {
610 $lines = explode(
"\n", $documentation );
612 foreach ( $lines as $line ) {
613 $out .=
"#. [Wiki] $line\n";
619 protected static function escape( $line ) {
621 $line = preg_replace(
'/(\s)\\\\$/',
'\1', $line );
622 $line = addcslashes( $line,
'\\"' );
623 $line = str_replace(
"\n",
'\n', $line );
624 $line =
'"' . $line .
'"';
630 $regex =
'/^"(.+)-Date: \d\d\d\d-\d\d-\d\d \d\d:\d\d:\d\d\+\d\d\d\d\\\\n"$/m';
632 $a = preg_replace( $regex,
'', $a );
633 $b = preg_replace( $regex,
'', $b );
651 '_values' => [
'simple',
'legacy' ],
654 '_type' =>
'boolean',
665 public function isContentEqual( $a, $b ) {
671 $parsedA = GettextPlural::parsePluralForms( $a );
672 $parsedB = GettextPlural::parsePluralForms( $b );
675 if ( count( $parsedA[1] ) !== count( $parsedB[1] ) ) {
684 $expectedPluralCount = count( $parsedA[1] );
688 if ( $expectedPluralCount === 0 ) {
692 return GettextPlural::unflatten( $a, $expectedPluralCount )
693 === GettextPlural::unflatten( $b, $expectedPluralCount );
static parseGettextData( $data, $useCtxtAsKey, StringMangler $mangler, $keyAlgorithm, bool $allowPotMode)
Parses gettext file as string into internal representation.