21 private $allowPotMode =
false;
22 protected $offlineMode =
false;
29 return [
'.pot',
'.po' ];
34 $this->offlineMode = $value;
38 public function read( $code ) {
41 $this->allowPotMode = $this->
getGroup()->getSourceLanguage() === $code;
44 return parent::read( $code );
46 $this->allowPotMode =
false;
57 preg_match_all(
'/^#\s*Author:\s*(.*)$/m', $data, $matches );
58 $authors = $matches[1];
60 # Then messages and everything else
61 $parsedData = $this->parseGettext( $data );
62 $parsedData[
'AUTHORS'] = $authors;
64 foreach ( $parsedData[
'MESSAGES'] as $key => $value ) {
65 if ( $value ===
'' ) {
66 unset( $parsedData[
'MESSAGES'][$key] );
73 public function parseGettext( $data ) {
74 $mangler = $this->group->getMangler();
75 $useCtxtAsKey = $this->extra[
'CtxtAsKey'] ??
false;
76 $keyAlgorithm =
'simple';
77 if ( isset( $this->extra[
'keyAlgorithm'] ) ) {
78 $keyAlgorithm = $this->extra[
'keyAlgorithm'];
81 return self::parseGettextData( $data, $useCtxtAsKey, $mangler, $keyAlgorithm, $this->allowPotMode );
105 $data = str_replace(
"\r\n",
"\n", $data );
110 $sections = preg_split(
'/\n{2,}/', $data );
113 $headerSection = array_shift( $sections );
117 $match = self::expectKeyword(
'msgstr', $headerSection );
118 if ( $match !==
null ) {
119 $headerBlock = self::formatForWiki( $match,
'trim' );
120 $headers = self::parseHeaderTags( $headerBlock );
123 $flags = self::parseFlags( $headerSection );
124 if ( in_array(
'fuzzy', $flags,
true ) ) {
125 $potmode = $allowPotMode;
128 $message =
"Gettext file header was not found:\n\n$data";
137 if ( isset( $headers[
'X-Language-Code'] ) ) {
138 $metadata[
'code'] = $headers[
'X-Language-Code'];
141 if ( isset( $headers[
'X-Message-Group'] ) ) {
142 $metadata[
'group'] = $headers[
'X-Message-Group'];
147 $pluralCount =
false;
150 } elseif ( isset( $headers[
'Plural-Forms'] ) ) {
151 $pluralCount = $metadata[
'plural'] = GettextPlural::getPluralCount( $headers[
'Plural-Forms'] );
154 $metadata[
'plural'] = $pluralCount;
157 foreach ( $sections as $section ) {
158 $item = self::parseGettextSection( $section, $pluralCount );
159 if ( $item ===
false ) {
163 if ( $useCtxtAsKey ) {
164 if ( !isset( $item[
'ctxt'] ) ) {
165 error_log(
"ctxt missing for: $section" );
168 $key = $item[
'ctxt'];
170 $key = self::generateKeyFromItem( $item, $keyAlgorithm );
173 $key = $mangler->
mangle( $key );
174 $messages[$key] = $potmode ? $item[
'id'] : $item[
'str'];
175 $template[$key] = $item;
179 'MESSAGES' => $messages,
181 'TEMPLATE' => $template,
182 'METADATA' => $metadata,
183 'HEADERS' => $headers,
188 public static function parseGettextSection( $section, $pluralCount ) {
189 if ( trim( $section ) ===
'' ) {
197 if ( preg_match(
'/^#~/m', $section ) ) {
209 $match = self::expectKeyword(
'msgid', $section );
210 if ( $match !==
null ) {
211 $item[
'id'] = self::formatForWiki( $match );
213 throw new MWException(
"Unable to parse msgid:\n\n$section" );
216 $match = self::expectKeyword(
'msgctxt', $section );
217 if ( $match !==
null ) {
218 $item[
'ctxt'] = self::formatForWiki( $match );
221 $pluralMessage =
false;
222 $match = self::expectKeyword(
'msgid_plural', $section );
223 if ( $match !==
null ) {
224 $pluralMessage =
true;
225 $plural = self::formatForWiki( $match );
226 $item[
'id'] = GettextPlural::flatten( [ $item[
'id'], $plural ] );
229 if ( $pluralMessage ) {
230 $pluralMessageText = self::processGettextPluralMessage( $pluralCount, $section );
233 if ( $pluralMessageText !==
'' ) {
234 $item[
'str'] = $pluralMessageText;
237 $match = self::expectKeyword(
'msgstr', $section );
238 if ( $match !==
null ) {
239 $item[
'str'] = self::formatForWiki( $match );
241 throw new MWException(
"Unable to parse msgstr:\n\n$section" );
246 $flags = self::parseFlags( $section );
247 foreach ( $flags as $key => $flag ) {
248 if ( $flag ===
'fuzzy' ) {
249 $item[
'str'] = TRANSLATE_FUZZY . $item[
'str'];
250 unset( $flags[$key] );
253 $item[
'flags'] = $flags;
257 if ( preg_match_all(
'/^#(.?) (.*)$/m', $section, $matches, PREG_SET_ORDER ) ) {
258 foreach ( $matches as $match ) {
259 if ( $match[1] !==
',' && strpos( $match[1],
'[Wiki]' ) !== 0 ) {
260 $item[
'comments'][$match[1]][] = $match[2];
268 public static function processGettextPluralMessage( $pluralCount, $section ) {
271 for ( $i = 0; $i < $pluralCount; $i++ ) {
272 $match = self::expectKeyword(
"msgstr\\[$i\\]", $section );
274 if ( $match !==
null ) {
275 $actualForms[] = self::formatForWiki( $match );
278 error_log(
"Plural $i not found, expecting total of $pluralCount for $section" );
282 if ( array_sum( array_map(
'strlen', $actualForms ) ) > 0 ) {
283 return GettextPlural::flatten( $actualForms );
289 public static function parseFlags( $section ) {
291 if ( preg_match(
'/^#,(.*)$/mu', $section, $matches ) ) {
292 return array_map(
'trim', explode(
',', $matches[1] ) );
298 public static function expectKeyword( $name, $section ) {
302 $poformat =
'".*"\n?(^".*"$\n?)*';
305 if ( preg_match(
"/^$name\s($poformat)/mx", $section, $matches ) ) {
320 $lang = Language::factory(
'en' );
322 if ( $item[
'ctxt'] ===
'' ) {
326 $hash = sha1( $item[
'id'] .
'MSGEMPTYCTXT' );
328 $hash = sha1( $item[
'ctxt'] . $item[
'id'] );
331 if ( $algorithm ===
'simple' ) {
332 $hash = substr( $hash, 0, 6 );
333 $snippet = $lang->truncateForDatabase( $item[
'id'], 30,
'' );
334 $snippet = str_replace(
' ',
'_', trim( $snippet ) );
336 $legalChars = Title::legalChars();
337 $snippet = $item[
'id'];
338 $snippet = preg_replace(
"/[^$legalChars]/",
' ', $snippet );
339 $snippet = preg_replace(
"/[:&%\/_]/",
' ', $snippet );
340 $snippet = preg_replace(
'/ {2,}/',
' ', $snippet );
341 $snippet = $lang->truncateForDatabase( $snippet, 30,
'' );
342 $snippet = str_replace(
' ',
'_', trim( $snippet ) );
345 return "$hash-$snippet";
359 $quotePattern =
'/(^"|"$\n?)/m';
360 $data = preg_replace( $quotePattern,
'', $data );
361 $data = stripcslashes( $data );
363 if ( preg_match(
'/\s$/', $data ) ) {
364 if ( $whitespace ===
'mark' ) {
366 } elseif ( $whitespace ===
'trim' ) {
367 $data = rtrim( $data );
370 throw new MWException(
'Unknown action for whitespace' );
377 public static function parseHeaderTags( $headers ) {
379 foreach ( explode(
"\n", $headers ) as $line ) {
380 if ( strpos( $line,
':' ) ===
false ) {
381 error_log( __METHOD__ .
": $line" );
383 [ $key, $value ] = explode(
':', $line, 2 );
384 $tags[trim( $key )] = trim( $value );
392 $pot = $this->
read(
'en' ) ?? [];
393 $code = $collection->code;
394 $template = $this->
read( $code ) ?? [];
395 $output = $this->doGettextHeader( $collection, $template[
'EXTRA'] ?? [] );
397 $pluralRule = GettextPlural::getPluralRule( $code );
398 if ( !$pluralRule ) {
399 $pluralRule = GettextPlural::getPluralRule(
'en' );
400 LoggerFactory::getInstance(
'Translate' )->warning(
401 "T235180: Missing Gettext plural rule for '{languagecode}'",
402 [
'languagecode' => $code ]
405 $pluralCount = GettextPlural::getPluralCount( $pluralRule );
408 foreach ( $collection as $key => $m ) {
409 $transTemplate = $template[
'EXTRA'][
'TEMPLATE'][$key] ?? [];
410 $potTemplate = $pot[
'EXTRA'][
'TEMPLATE'][$key] ?? [];
412 $output .= $this->
formatMessageBlock( $key, $m, $transTemplate, $potTemplate, $pluralCount );
421 $code = $collection->code;
422 $name = TranslateUtils::getLanguageName( $code );
423 $native = TranslateUtils::getLanguageName( $code, $code );
424 $authors = $this->doAuthors( $collection );
425 if ( isset( $this->extra[
'header'] ) ) {
426 $extra =
"# --\n" . $this->extra[
'header'];
432# Translation of {$this->group->getLabel()} to $name ($native)
433# Exported from $wgSitename
439 $output = trim( $output ) .
"\n";
441 $specs = $template[
'HEADERS'] ?? [];
443 $timestamp = wfTimestampNow();
444 $specs[
'PO-Revision-Date'] = self::formatTime( $timestamp );
445 if ( $this->offlineMode ) {
446 $specs[
'POT-Creation-Date'] = self::formatTime( $timestamp );
448 $specs[
'X-POT-Import-Date'] = self::formatTime( wfTimestamp( TS_MW, $this->getPotTime() ) );
450 $specs[
'Content-Type'] =
'text/plain; charset=UTF-8';
451 $specs[
'Content-Transfer-Encoding'] =
'8bit';
452 $specs[
'Language'] = LanguageCode::bcp47( $this->group->mapCode( $code ) );
453 Hooks::run(
'Translate:GettextFFS:headerFields', [ &$specs, $this->group, $code ] );
454 $specs[
'X-Generator'] = $this->getGenerator();
456 if ( $this->offlineMode ) {
457 $specs[
'X-Language-Code'] = $code;
458 $specs[
'X-Message-Group'] = $this->group->getId();
461 $specs[
'Plural-Forms'] = GettextPlural::getPluralRule( $code )
462 ?: GettextPlural::getPluralRule(
'en' );
464 $output .=
'msgid ""' .
"\n";
465 $output .=
'msgstr ""' .
"\n";
466 $output .=
'""' .
"\n";
468 foreach ( $specs as $k => $v ) {
469 $output .= self::escape(
"$k: $v\n" ) .
"\n";
480 $authors = $this->
filterAuthors( $authors, $collection->code );
482 foreach ( $authors as $author ) {
483 $output .=
"# Author: $author\n";
498 $header = $this->formatDocumentation( $key );
501 $comments = self::chainGetter(
'comments', $pot, $trans, [] );
502 foreach ( $comments as $type => $typecomments ) {
503 foreach ( $typecomments as $comment ) {
504 $header .=
"#$type $comment\n";
508 $flags = self::chainGetter(
'flags', $pot, $trans, [] );
509 $flags = array_merge( $m->getTags(), $flags );
511 if ( $this->offlineMode ) {
512 $content .=
'msgctxt ' . self::escape( $key ) .
"\n";
514 $ctxt = self::chainGetter(
'ctxt', $pot, $trans,
false );
515 if ( $ctxt !==
false ) {
516 $content .=
'msgctxt ' . self::escape( $ctxt ) .
"\n";
520 $msgid = $m->definition();
521 $msgstr = $m->translation();
522 if ( strpos( $msgstr, TRANSLATE_FUZZY ) !==
false ) {
523 $msgstr = str_replace( TRANSLATE_FUZZY,
'', $msgstr );
528 if ( GettextPlural::hasPlural( $msgid ) ) {
529 $forms = GettextPlural::unflatten( $msgid, 2 );
530 $content .=
'msgid ' . self::escape( $forms[0] ) .
"\n";
531 $content .=
'msgid_plural ' . self::escape( $forms[1] ) .
"\n";
534 $forms = GettextPlural::unflatten( $msgstr, $pluralCount );
535 foreach ( $forms as $index => $form ) {
536 $content .=
"msgstr[$index] " . self::escape( $form ) .
"\n";
539 $flags[] =
'invalid-plural';
540 for ( $i = 0; $i < $pluralCount; $i++ ) {
541 $content .=
"msgstr[$i] \"\"\n";
545 $content .=
'msgid ' . self::escape( $msgid ) .
"\n";
546 $content .=
'msgstr ' . self::escape( $msgstr ) .
"\n";
551 $header .=
'#, ' . implode(
', ', array_unique( $flags ) ) .
"\n";
554 $output = $header ?:
"#\n";
555 $output .= $content .
"\n";
568 return $a[$key] ?? $b[$key] ?? $default;
571 protected static function formatTime( $time ) {
572 $lang = Language::factory(
'en' );
574 return $lang->sprintfDate(
'xnY-xnm-xnd xnH:xni:xns+0000', $time );
577 protected function getPotTime() {
578 $cache = $this->group->getMessageGroupCache( $this->group->getSourceLanguage() );
580 return $cache->exists() ? $cache->getTimestamp() : wfTimestampNow();
583 protected function getGenerator() {
584 return 'MediaWiki ' . SpecialVersion::getVersion() .
588 protected function formatDocumentation( $key ) {
589 global $wgTranslateDocumentationLanguageCode;
591 if ( !$this->offlineMode ) {
595 $code = $wgTranslateDocumentationLanguageCode;
601 if ( !is_string( $documentation ) ) {
605 $lines = explode(
"\n", $documentation );
607 foreach ( $lines as $line ) {
608 $out .=
"#. [Wiki] $line\n";
614 protected static function escape( $line ) {
616 $line = preg_replace(
'/(\s)\\\\$/',
'\1', $line );
617 $line = addcslashes( $line,
'\\"' );
618 $line = str_replace(
"\n",
'\n', $line );
619 $line =
'"' . $line .
'"';
625 $regex =
'/^"(.+)-Date: \d\d\d\d-\d\d-\d\d \d\d:\d\d:\d\d\+\d\d\d\d\\\\n"$/m';
627 $a = preg_replace( $regex,
'', $a );
628 $b = preg_replace( $regex,
'', $b );
646 '_values' => [
'simple',
'legacy' ],
649 '_type' =>
'boolean',
666 $parsedA = GettextPlural::parsePluralForms( $a );
667 $parsedB = GettextPlural::parsePluralForms( $b );
670 if ( count( $parsedA[1] ) !== count( $parsedB[1] ) ) {
679 $expectedPluralCount = count( $parsedA[1] );
683 if ( $expectedPluralCount === 0 ) {
687 return GettextPlural::unflatten( $a, $expectedPluralCount )
688 === GettextPlural::unflatten( $b, $expectedPluralCount );
New-style FFS class that implements support for gettext file format.
static generateKeyFromItem(array $item, $algorithm='simple')
Generates unique key for each message.
isContentEqual( $a, $b)
Checks whether two strings are equal.
getFileExtensions()
Return the commonly used file extensions for these formats.
formatMessageBlock( $key, $m, $trans, $pot, $pluralCount)
static getExtraSchema()
Return a data structure that will be merged with the base schema.
static parseGettextData( $data, $useCtxtAsKey, StringMangler $mangler, $keyAlgorithm, bool $allowPotMode)
Parses gettext file as string into internal representation.
static chainGetter( $key, $a, $b, $default)
shouldOverwrite( $a, $b)
Allows to skip writing the export output into a file.
supportsFuzzy()
Query the capabilities of this FFS.
static formatForWiki( $data, $whitespace='mark')
This parses the Gettext text block format.
writeReal(MessageCollection $collection)
Exception thrown when a Gettext file could not be parsed, such as when missing required headers.