Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
89.13% |
82 / 92 |
|
30.00% |
3 / 10 |
CRAP | |
0.00% |
0 / 1 |
AndroidXmlFormat | |
90.11% |
82 / 91 |
|
30.00% |
3 / 10 |
29.81 | |
0.00% |
0 / 1 |
__construct | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
supportsFuzzy | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
getFileExtensions | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
readFromVariable | |
91.67% |
22 / 24 |
|
0.00% |
0 / 1 |
7.03 | |||
scrapeAuthors | |
85.71% |
6 / 7 |
|
0.00% |
0 / 1 |
3.03 | |||
readElementContents | |
100.00% |
6 / 6 |
|
100.00% |
1 / 1 |
1 | |||
formatElementContents | |
80.00% |
4 / 5 |
|
0.00% |
0 / 1 |
2.03 | |||
doAuthors | |
100.00% |
10 / 10 |
|
100.00% |
1 / 1 |
3 | |||
writeReal | |
94.12% |
32 / 34 |
|
0.00% |
0 / 1 |
9.02 | |||
isContentEqual | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 |
1 | <?php |
2 | declare( strict_types = 1 ); |
3 | |
4 | namespace MediaWiki\Extension\Translate\FileFormatSupport; |
5 | |
6 | use DOMDocument; |
7 | use FileBasedMessageGroup; |
8 | use IntlChar; |
9 | use MediaWiki\Extension\Translate\MessageLoading\Message; |
10 | use MediaWiki\Extension\Translate\MessageLoading\MessageCollection; |
11 | use MediaWiki\Extension\Translate\MessageProcessing\ArrayFlattener; |
12 | use RuntimeException; |
13 | use SimpleXMLElement; |
14 | |
15 | /** |
16 | * Support for XML translation format used by Android. |
17 | * @author Niklas Laxström |
18 | * @license GPL-2.0-or-later |
19 | * @ingroup FileFormatSupport |
20 | */ |
21 | class AndroidXmlFormat extends SimpleFormat { |
22 | private ArrayFlattener $flattener; |
23 | |
24 | public function __construct( FileBasedMessageGroup $group ) { |
25 | parent::__construct( $group ); |
26 | $this->flattener = new ArrayFlattener( '', true ); |
27 | } |
28 | |
29 | public function supportsFuzzy(): string { |
30 | return 'yes'; |
31 | } |
32 | |
33 | public function getFileExtensions(): array { |
34 | return [ '.xml' ]; |
35 | } |
36 | |
37 | public function readFromVariable( string $data ): array { |
38 | $reader = new SimpleXMLElement( $data ); |
39 | |
40 | $messages = []; |
41 | $mangler = $this->group->getMangler(); |
42 | |
43 | $regexBacktrackLimit = ini_get( 'pcre.backtrack_limit' ); |
44 | ini_set( 'pcre.backtrack_limit', '10' ); |
45 | |
46 | /** @var SimpleXMLElement $element */ |
47 | foreach ( $reader as $element ) { |
48 | $key = (string)$element['name']; |
49 | |
50 | if ( $element->getName() === 'string' ) { |
51 | $value = $this->readElementContents( $element ); |
52 | } elseif ( $element->getName() === 'plurals' ) { |
53 | $forms = []; |
54 | foreach ( $element as $item ) { |
55 | $forms[(string)$item['quantity']] = $this->readElementContents( $item ); |
56 | } |
57 | $value = $this->flattener->flattenCLDRPlurals( $forms ); |
58 | } else { |
59 | wfDebug( __METHOD__ . ': Unknown XML element name.' ); |
60 | continue; |
61 | } |
62 | |
63 | if ( isset( $element['fuzzy'] ) && (string)$element['fuzzy'] === 'true' ) { |
64 | $value = TRANSLATE_FUZZY . $value; |
65 | } |
66 | |
67 | $messages[$key] = $value; |
68 | } |
69 | |
70 | ini_set( 'pcre.backtrack_limit', $regexBacktrackLimit ); |
71 | |
72 | return [ |
73 | 'AUTHORS' => $this->scrapeAuthors( $data ), |
74 | 'MESSAGES' => $mangler->mangleArray( $messages ), |
75 | ]; |
76 | } |
77 | |
78 | private function scrapeAuthors( string $string ): array { |
79 | if ( !preg_match( '~<!-- Authors:\n((?:\* .*\n)*)-->~', $string, $match ) ) { |
80 | return []; |
81 | } |
82 | |
83 | $authors = $matches = []; |
84 | preg_match_all( '~\* (.*)~', $match[1], $matches ); |
85 | foreach ( $matches[1] as $author ) { |
86 | $authors[] = str_replace( "\u{2011}\u{2011}", '--', $author ); |
87 | } |
88 | return $authors; |
89 | } |
90 | |
91 | private function readElementContents( SimpleXMLElement $element ): string { |
92 | // Convert string of format \uNNNN (eg: \u1234) to symbols |
93 | $converted = preg_replace_callback( |
94 | '/(?<!\\\\)(?:\\\\{2})*+\\K\\\\u([0-9A-Fa-f]{4,6})+/', |
95 | static fn ( array $matches ) => IntlChar::chr( hexdec( $matches[1] ) ), |
96 | (string)$element |
97 | ); |
98 | |
99 | return stripcslashes( $converted ); |
100 | } |
101 | |
102 | private function formatElementContents( string $contents ): string { |
103 | // Kudos to the brilliant person who invented this braindead file format |
104 | $escaped = addcslashes( $contents, '"\'\\' ); |
105 | if ( substr( $escaped, 0, 1 ) === '@' ) { |
106 | // '@' at beginning of string refers to another string by name. |
107 | // Add backslash to escape it too. |
108 | $escaped = '\\' . $escaped; |
109 | } |
110 | // All html entities seen would be inserted by translators themselves. |
111 | // Treat them as plain text. |
112 | $escaped = str_replace( '&', '&', $escaped ); |
113 | |
114 | // Newlines must be escaped |
115 | return str_replace( "\n", '\n', $escaped ); |
116 | } |
117 | |
118 | private function doAuthors( MessageCollection $collection ): string { |
119 | $authors = $collection->getAuthors(); |
120 | $authors = $this->filterAuthors( $authors, $collection->code ); |
121 | |
122 | if ( !$authors ) { |
123 | return ''; |
124 | } |
125 | |
126 | $output = "\n<!-- Authors:\n"; |
127 | |
128 | foreach ( $authors as $author ) { |
129 | // Since -- is not allowed in XML comments, we rewrite them to |
130 | // U+2011 (non-breaking hyphen). |
131 | $author = str_replace( '--', "\u{2011}\u{2011}", $author ); |
132 | $output .= "* $author\n"; |
133 | } |
134 | |
135 | $output .= "-->\n"; |
136 | |
137 | return $output; |
138 | } |
139 | |
140 | protected function writeReal( MessageCollection $collection ): string { |
141 | global $wgTranslateDocumentationLanguageCode; |
142 | |
143 | $collection->filter( MessageCollection::FILTER_HAS_TRANSLATION, MessageCollection::INCLUDE_MATCHING ); |
144 | if ( count( $collection ) === 0 ) { |
145 | return ''; |
146 | } |
147 | |
148 | $template = '<?xml version="1.0" encoding="utf-8"?>'; |
149 | $template .= $this->doAuthors( $collection ); |
150 | $template .= '<resources></resources>'; |
151 | |
152 | $writer = new SimpleXMLElement( $template ); |
153 | |
154 | if ( $collection->getLanguage() === $wgTranslateDocumentationLanguageCode ) { |
155 | $writer->addAttribute( |
156 | 'tools:ignore', |
157 | 'all', |
158 | 'http://schemas.android.com/tools' |
159 | ); |
160 | } |
161 | |
162 | $mangler = $this->group->getMangler(); |
163 | /** @var Message $m */ |
164 | foreach ( $collection as $key => $m ) { |
165 | $key = $mangler->unmangle( $key ); |
166 | |
167 | $value = $m->translation(); |
168 | if ( $value === null ) { |
169 | throw new RuntimeException( "Expected translation to be present for $key, but found null." ); |
170 | } |
171 | $value = str_replace( TRANSLATE_FUZZY, '', $value ); |
172 | |
173 | $plurals = $this->flattener->unflattenCLDRPlurals( '', $value ); |
174 | |
175 | if ( $plurals === false ) { |
176 | $element = $writer->addChild( 'string', $this->formatElementContents( $value ) ); |
177 | } else { |
178 | $element = $writer->addChild( 'plurals' ); |
179 | foreach ( $plurals as $quantity => $content ) { |
180 | $item = $element->addChild( 'item', $this->formatElementContents( $content ) ); |
181 | $item->addAttribute( 'quantity', $quantity ); |
182 | } |
183 | } |
184 | |
185 | $element->addAttribute( 'name', $key ); |
186 | // This is non-standard |
187 | if ( $m->hasTag( 'fuzzy' ) ) { |
188 | $element->addAttribute( 'fuzzy', 'true' ); |
189 | } |
190 | } |
191 | |
192 | // Make the output pretty with DOMDocument |
193 | $dom = new DOMDocument( '1.0' ); |
194 | $dom->formatOutput = true; |
195 | $dom->loadXML( $writer->asXML() ); |
196 | |
197 | return $dom->saveXML() ?: ''; |
198 | } |
199 | |
200 | public function isContentEqual( ?string $a, ?string $b ): bool { |
201 | return $this->flattener->compareContent( $a, $b ); |
202 | } |
203 | } |
204 | |
205 | class_alias( AndroidXmlFormat::class, 'AndroidXmlFFS' ); |