Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
90.55% |
115 / 127 |
|
70.00% |
14 / 20 |
CRAP | |
0.00% |
0 / 1 |
TranslationUnit | |
90.55% |
115 / 127 |
|
70.00% |
14 / 20 |
51.03 | |
0.00% |
0 / 1 |
__construct | |
100.00% |
4 / 4 |
|
100.00% |
1 / 1 |
1 | |||
setIsInline | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
isInline | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
setCanWrap | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
canWrap | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
getText | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
getTextWithVariables | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
replaceVariablesWithNames | |
100.00% |
4 / 4 |
|
100.00% |
1 / 1 |
2 | |||
getTextForTrans | |
100.00% |
4 / 4 |
|
100.00% |
1 / 1 |
2 | |||
onlyTvarsChanged | |
80.00% |
4 / 5 |
|
0.00% |
0 / 1 |
2.03 | |||
getMarkedText | |
100.00% |
8 / 8 |
|
100.00% |
1 / 1 |
3 | |||
getOldText | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
getVariables | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
loadVariables | |
100.00% |
14 / 14 |
|
100.00% |
1 / 1 |
3 | |||
serializeToArray | |
0.00% |
0 / 4 |
|
0.00% |
0 / 1 |
6 | |||
unserializeFromArray | |
0.00% |
0 / 4 |
|
0.00% |
0 / 1 |
6 | |||
getTextForRendering | |
100.00% |
40 / 40 |
|
100.00% |
1 / 1 |
12 | |||
getIssues | |
100.00% |
20 / 20 |
|
100.00% |
1 / 1 |
5 | |||
getHeading | |
100.00% |
3 / 3 |
|
100.00% |
1 / 1 |
1 | |||
shouldAddAnchor | |
88.89% |
8 / 9 |
|
0.00% |
0 / 1 |
6.05 |
1 | <?php |
2 | declare( strict_types = 1 ); |
3 | |
4 | namespace MediaWiki\Extension\Translate\PageTranslation; |
5 | |
6 | use MediaWiki\Extension\Translate\MessageLoading\Message; |
7 | use MediaWiki\Html\Html; |
8 | use MediaWiki\Language\Language; |
9 | use MediaWiki\Parser\Parser; |
10 | use const PREG_SET_ORDER; |
11 | |
12 | /** |
13 | * This class represents one translation unit in a translatable page. |
14 | * |
15 | * @author Niklas Laxström |
16 | * @license GPL-2.0-or-later |
17 | * @ingroup PageTranslation |
18 | */ |
19 | class TranslationUnit { |
20 | public const UNIT_MARKER_INVALID_CHARS = "_/\n<>"; |
21 | public const NEW_UNIT_ID = '-1'; |
22 | // Deprecated syntax. Example: <tvar|1>...</> |
23 | public const TVAR_OLD_SYNTAX_REGEX = '~<tvar\|([^>]+)>(.*?)</>~us'; |
24 | // Current syntax. Example: <tvar name=1>...</tvar> |
25 | public const TVAR_NEW_SYNTAX_REGEX = |
26 | <<<'REGEXP' |
27 | ~ |
28 | <tvar \s+ name \s* = \s* |
29 | ( ( ' (?<key1> [^']* ) ' ) | ( " (?<key2> [^"]* ) " ) | (?<key3> [^"'\s>]* ) ) |
30 | \s* > (?<value>.*?) </tvar \s* > |
31 | ~xusi |
32 | REGEXP; |
33 | /** |
34 | * Regular expression matching the `{{TRANSLATIONLANGUAGE}}` “magic word” |
35 | * (which is not a real magic word, but rather replaced in the source text) |
36 | */ |
37 | public const TRANSLATIONLANGUAGE_REGEX = '/{{\s*TRANSLATIONLANGUAGE\s*}}/'; |
38 | |
39 | /** @var string Unit name */ |
40 | public $id; |
41 | /** @var string Unit text. */ |
42 | public $text; |
43 | /** @var string Is this new, existing, changed or deleted unit. */ |
44 | public $type; |
45 | /** @var string|null Text of previous version of this unit. */ |
46 | public $oldText = null; |
47 | /** |
48 | * @var bool Whether this unit is inline unit. |
49 | * E.g. "Something <translate>foo</translate> bar". |
50 | */ |
51 | protected $inline = false; |
52 | /** @var bool Whether wrapping the unit is allowed */ |
53 | private $canWrap = true; |
54 | /** @var int Version number for the serialization. */ |
55 | private $version = 1; |
56 | /** @var string[] List of properties to serialize. */ |
57 | private static $properties = [ 'version', 'id', 'text', 'type', 'oldText', 'inline' ]; |
58 | |
59 | public function __construct( |
60 | string $text, |
61 | string $id = self::NEW_UNIT_ID, |
62 | string $type = 'new', |
63 | ?string $oldText = null |
64 | ) { |
65 | $this->text = $text; |
66 | $this->id = $id; |
67 | $this->type = $type; |
68 | $this->oldText = $oldText; |
69 | } |
70 | |
71 | public function setIsInline( bool $value ): void { |
72 | $this->inline = $value; |
73 | } |
74 | |
75 | public function isInline(): bool { |
76 | return $this->inline; |
77 | } |
78 | |
79 | public function setCanWrap( bool $value ): void { |
80 | $this->canWrap = $value; |
81 | } |
82 | |
83 | public function canWrap(): bool { |
84 | return $this->canWrap; |
85 | } |
86 | |
87 | /** Returns unit text unmodified */ |
88 | public function getText(): string { |
89 | return $this->text; |
90 | } |
91 | |
92 | /** Returns the text with tvars replaces with placeholders */ |
93 | public function getTextWithVariables(): string { |
94 | return $this->replaceVariablesWithNames( $this->text ); |
95 | } |
96 | |
97 | private function replaceVariablesWithNames( string $text ): string { |
98 | $variableReplacements = []; |
99 | foreach ( $this->loadVariables( $text ) as $variable ) { |
100 | $variableReplacements[$variable->getDefinition()] = $variable->getName(); |
101 | } |
102 | |
103 | return strtr( $text, $variableReplacements ); |
104 | } |
105 | |
106 | /** Returns unit text with variables replaced. */ |
107 | public function getTextForTrans(): string { |
108 | $variableReplacements = []; |
109 | foreach ( $this->getVariables() as $variable ) { |
110 | $variableReplacements[$variable->getDefinition()] = $variable->getValue(); |
111 | } |
112 | |
113 | return strtr( $this->text, $variableReplacements ); |
114 | } |
115 | |
116 | /** Returns whether all changes to the unit were done inside tvars */ |
117 | public function onlyTvarsChanged(): bool { |
118 | if ( $this->oldText === null ) { |
119 | // This shouldn't ever be called if oldText is null, but just in case |
120 | return false; |
121 | } |
122 | $newText = $this->getTextWithVariables(); |
123 | $oldText = $this->replaceVariablesWithNames( $this->oldText ); |
124 | return $oldText === $newText; |
125 | } |
126 | |
127 | /** Returns the unit text with updated or added unit marker */ |
128 | public function getMarkedText(): string { |
129 | $id = $this->id; |
130 | $header = "<!--T:$id-->"; |
131 | |
132 | if ( $this->getHeading( $this->text ) !== null ) { |
133 | $text = $this->text . ' ' . $header; |
134 | } else { |
135 | if ( $this->inline ) { |
136 | $text = $header . ' ' . $this->text; |
137 | } else { |
138 | $text = $header . "\n" . $this->text; |
139 | } |
140 | } |
141 | |
142 | return $text; |
143 | } |
144 | |
145 | /** Returns oldtext, or current text if not available */ |
146 | public function getOldText(): string { |
147 | return $this->oldText ?? $this->text; |
148 | } |
149 | |
150 | /** @return TranslationVariable[] */ |
151 | public function getVariables(): array { |
152 | return $this->loadVariables( $this->text ); |
153 | } |
154 | |
155 | /** @return TranslationVariable[] */ |
156 | private function loadVariables( string $text ): array { |
157 | $vars = []; |
158 | |
159 | $matches = []; |
160 | preg_match_all( self::TVAR_OLD_SYNTAX_REGEX, $text, $matches, PREG_SET_ORDER ); |
161 | foreach ( $matches as $m ) { |
162 | $vars[] = new TranslationVariable( $m[0], '$' . $m[1], $m[2] ); |
163 | } |
164 | |
165 | $matches = []; |
166 | preg_match_all( self::TVAR_NEW_SYNTAX_REGEX, $text, $matches, PREG_SET_ORDER ); |
167 | foreach ( $matches as $m ) { |
168 | $vars[] = new TranslationVariable( |
169 | $m[0], |
170 | // Maximum of one of these is non-empty string |
171 | '$' . ( $m['key1'] . $m['key2'] . $m['key3'] ), |
172 | $m['value'] |
173 | ); |
174 | } |
175 | |
176 | return $vars; |
177 | } |
178 | |
179 | /** Serialize this object to a PHP array */ |
180 | public function serializeToArray(): array { |
181 | $data = []; |
182 | foreach ( self::$properties as $index => $property ) { |
183 | // Because this is used for the JobQueue, use a list |
184 | // instead of an array to save space. |
185 | $data[$index] = $this->$property; |
186 | } |
187 | |
188 | return $data; |
189 | } |
190 | |
191 | public static function unserializeFromArray( array $data ): self { |
192 | // Give dummy default text, will be overridden |
193 | $unit = new self( '' ); |
194 | foreach ( self::$properties as $index => $property ) { |
195 | $unit->$property = $data[$index]; |
196 | } |
197 | |
198 | return $unit; |
199 | } |
200 | |
201 | public function getTextForRendering( |
202 | ?Message $msg, |
203 | Language $sourceLanguage, |
204 | Language $targetLanguage, |
205 | bool $wrapUntranslated, |
206 | ?Parser $parser = null |
207 | ): string { |
208 | $attributes = []; |
209 | $headingText = null; |
210 | |
211 | if ( $msg && $msg->translation() !== null ) { |
212 | $content = $msg->translation(); |
213 | $headingText = $this->getHeading( $msg->definition() ); |
214 | |
215 | if ( $msg->hasTag( 'fuzzy' ) ) { |
216 | // We do not ever want to show explicit fuzzy marks in the rendered pages |
217 | $content = str_replace( TRANSLATE_FUZZY, '', $content ); |
218 | $attributes['class'] = 'mw-translate-fuzzy'; |
219 | } |
220 | $translationLanguage = $targetLanguage->getCode(); |
221 | } else { |
222 | $content = $this->getTextWithVariables(); |
223 | if ( $wrapUntranslated ) { |
224 | $attributes['lang'] = $sourceLanguage->getHtmlCode(); |
225 | $attributes['dir'] = $sourceLanguage->getDir(); |
226 | $attributes['class'] = 'mw-content-' . $sourceLanguage->getDir(); |
227 | } |
228 | $translationLanguage = $sourceLanguage->getCode(); |
229 | } |
230 | |
231 | if ( $this->canWrap() && $attributes ) { |
232 | $tag = $this->isInline() ? 'span' : 'div'; |
233 | $content = $this->isInline() ? $content : "\n$content\n"; |
234 | $content = Html::rawElement( $tag, $attributes, $content ); |
235 | } |
236 | |
237 | $variableReplacements = []; |
238 | foreach ( $this->getVariables() as $variable ) { |
239 | $variableReplacements[$variable->getName()] = $variable->getValue(); |
240 | } |
241 | |
242 | if ( |
243 | $parser && |
244 | $this->shouldAddAnchor( |
245 | $sourceLanguage, |
246 | $targetLanguage, |
247 | $headingText, |
248 | $msg, |
249 | $this->isInline() |
250 | ) |
251 | ) { |
252 | $sectionName = substr( $parser->guessSectionNameFromWikiText( $headingText ), 1 ); |
253 | $attributes = [ 'id' => $sectionName ]; |
254 | $content = Html::rawElement( 'span', $attributes, '' ) . "\n$content"; |
255 | } |
256 | |
257 | $content = strtr( $content, $variableReplacements ); |
258 | |
259 | // Allow wrapping this inside variables |
260 | $content = preg_replace( |
261 | self::TRANSLATIONLANGUAGE_REGEX, |
262 | $translationLanguage, |
263 | $content |
264 | ); |
265 | |
266 | return $content; |
267 | } |
268 | |
269 | /** @return TranslationUnitIssue[] */ |
270 | public function getIssues(): array { |
271 | $issues = $usedNames = []; |
272 | foreach ( $this->getVariables() as $variable ) { |
273 | $name = $variable->getName(); |
274 | $pattern = '/^' . TranslatablePageInsertablesSuggester::NAME_PATTERN . '$/u'; |
275 | if ( !preg_match( $pattern, $name ) ) { |
276 | // Key by name to avoid multiple issues of the same name |
277 | $issues[$name] = new TranslationUnitIssue( |
278 | TranslationUnitIssue::WARNING, |
279 | 'tpt-validation-not-insertable', |
280 | [ wfEscapeWikiText( $name ) ] |
281 | ); |
282 | } |
283 | |
284 | $usedNames[ $name ][] = $variable->getValue(); |
285 | } |
286 | |
287 | foreach ( $usedNames as $name => $contents ) { |
288 | $uniqueValueCount = count( array_unique( $contents ) ); |
289 | if ( $uniqueValueCount > 1 ) { |
290 | $issues[] = new TranslationUnitIssue( |
291 | TranslationUnitIssue::ERROR, |
292 | 'tpt-validation-name-reuse', |
293 | [ wfEscapeWikiText( $name ) ] |
294 | ); |
295 | } |
296 | } |
297 | |
298 | return array_values( $issues ); |
299 | } |
300 | |
301 | /** Mimic the behavior of how Parser handles headings including handling of unbalanced "=" signs */ |
302 | private function getHeading( string $text ): ?string { |
303 | $match = []; |
304 | preg_match( '/^(={1,6})[ \t]*(.+?)[ \t]*\1\s*$/', $text, $match ); |
305 | return $match[2] ?? null; |
306 | } |
307 | |
308 | private function shouldAddAnchor( |
309 | Language $sourceLanguage, |
310 | Language $targetLanguage, |
311 | ?string $headingText, |
312 | ?Message $msg, |
313 | bool $isInline |
314 | ): bool { |
315 | // If it's not a heading, don't bother adding an anchor |
316 | if ( $headingText === null ) { |
317 | return false; |
318 | } |
319 | |
320 | // We only add an anchor for a translation. See: https://phabricator.wikimedia.org/T62544 |
321 | if ( $sourceLanguage->getCode() === $targetLanguage->getCode() ) { |
322 | return false; |
323 | } |
324 | |
325 | // Translation and the source text are same, avoid adding an anchor that would create |
326 | // an id attribute with duplicate value |
327 | if ( $msg && $msg->translation() === $msg->definition() ) { |
328 | return false; |
329 | } |
330 | |
331 | // If nowrap attribute is set, do not add the anchor |
332 | if ( !$this->canWrap() ) { |
333 | return false; |
334 | } |
335 | |
336 | // We don't add anchors for inline translate tags to avoid breaking input like this: |
337 | // Text here <translate>== not a heading ==</translate> |
338 | return !$isInline; |
339 | } |
340 | } |