Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
90.83% |
109 / 120 |
|
70.59% |
12 / 17 |
CRAP | |
0.00% |
0 / 1 |
TranslationUnit | |
90.83% |
109 / 120 |
|
70.59% |
12 / 17 |
46.56 | |
0.00% |
0 / 1 |
__construct | |
100.00% |
4 / 4 |
|
100.00% |
1 / 1 |
1 | |||
setIsInline | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
isInline | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
setCanWrap | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
canWrap | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
getText | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
getTextWithVariables | |
100.00% |
4 / 4 |
|
100.00% |
1 / 1 |
2 | |||
getTextForTrans | |
100.00% |
4 / 4 |
|
100.00% |
1 / 1 |
2 | |||
getMarkedText | |
100.00% |
8 / 8 |
|
100.00% |
1 / 1 |
3 | |||
getOldText | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
getVariables | |
100.00% |
14 / 14 |
|
100.00% |
1 / 1 |
3 | |||
serializeToArray | |
0.00% |
0 / 4 |
|
0.00% |
0 / 1 |
6 | |||
unserializeFromArray | |
0.00% |
0 / 4 |
|
0.00% |
0 / 1 |
6 | |||
getTextForRendering | |
100.00% |
40 / 40 |
|
100.00% |
1 / 1 |
12 | |||
getIssues | |
100.00% |
20 / 20 |
|
100.00% |
1 / 1 |
5 | |||
getHeading | |
100.00% |
3 / 3 |
|
100.00% |
1 / 1 |
1 | |||
shouldAddAnchor | |
88.89% |
8 / 9 |
|
0.00% |
0 / 1 |
6.05 |
1 | <?php |
2 | declare( strict_types = 1 ); |
3 | |
4 | namespace MediaWiki\Extension\Translate\PageTranslation; |
5 | |
6 | use Language; |
7 | use MediaWiki\Extension\Translate\MessageLoading\Message; |
8 | use MediaWiki\Html\Html; |
9 | use Parser; |
10 | use const PREG_SET_ORDER; |
11 | |
12 | /** |
13 | * This class represents one translation unit in a translatable page. |
14 | * |
15 | * @author Niklas Laxström |
16 | * @license GPL-2.0-or-later |
17 | * @ingroup PageTranslation |
18 | */ |
19 | class TranslationUnit { |
20 | public const UNIT_MARKER_INVALID_CHARS = "_/\n<>"; |
21 | public const NEW_UNIT_ID = '-1'; |
22 | // Deprecated syntax. Example: <tvar|1>...</> |
23 | public const TVAR_OLD_SYNTAX_REGEX = '~<tvar\|([^>]+)>(.*?)</>~us'; |
24 | // Current syntax. Example: <tvar name=1>...</tvar> |
25 | public const TVAR_NEW_SYNTAX_REGEX = |
26 | <<<'REGEXP' |
27 | ~ |
28 | <tvar \s+ name \s* = \s* |
29 | ( ( ' (?<key1> [^']* ) ' ) | ( " (?<key2> [^"]* ) " ) | (?<key3> [^"'\s>]* ) ) |
30 | \s* > (?<value>.*?) </tvar \s* > |
31 | ~xusi |
32 | REGEXP; |
33 | /** |
34 | * Regular expression matching the `{{TRANSLATIONLANGUAGE}}` “magic word” |
35 | * (which is not a real magic word, but rather replaced in the source text) |
36 | */ |
37 | public const TRANSLATIONLANGUAGE_REGEX = '/{{\s*TRANSLATIONLANGUAGE\s*}}/'; |
38 | |
39 | /** @var string Unit name */ |
40 | public $id; |
41 | /** @var string Unit text. */ |
42 | public $text; |
43 | /** @var string Is this new, existing, changed or deleted unit. */ |
44 | public $type; |
45 | /** @var string|null Text of previous version of this unit. */ |
46 | public $oldText = null; |
47 | /** |
48 | * @var bool Whether this unit is inline unit. |
49 | * E.g. "Something <translate>foo</translate> bar". |
50 | */ |
51 | protected $inline = false; |
52 | /** @var bool Whether wrapping the unit is allowed */ |
53 | private $canWrap = true; |
54 | /** @var int Version number for the serialization. */ |
55 | private $version = 1; |
56 | /** @var string[] List of properties to serialize. */ |
57 | private static $properties = [ 'version', 'id', 'text', 'type', 'oldText', 'inline' ]; |
58 | |
59 | public function __construct( |
60 | string $text, |
61 | string $id = self::NEW_UNIT_ID, |
62 | string $type = 'new', |
63 | string $oldText = null |
64 | ) { |
65 | $this->text = $text; |
66 | $this->id = $id; |
67 | $this->type = $type; |
68 | $this->oldText = $oldText; |
69 | } |
70 | |
71 | public function setIsInline( bool $value ): void { |
72 | $this->inline = $value; |
73 | } |
74 | |
75 | public function isInline(): bool { |
76 | return $this->inline; |
77 | } |
78 | |
79 | public function setCanWrap( bool $value ): void { |
80 | $this->canWrap = $value; |
81 | } |
82 | |
83 | public function canWrap(): bool { |
84 | return $this->canWrap; |
85 | } |
86 | |
87 | /** Returns unit text unmodified */ |
88 | public function getText(): string { |
89 | return $this->text; |
90 | } |
91 | |
92 | /** Returns the text with tvars replaces with placeholders */ |
93 | public function getTextWithVariables(): string { |
94 | $variableReplacements = []; |
95 | foreach ( $this->getVariables() as $variable ) { |
96 | $variableReplacements[$variable->getDefinition()] = $variable->getName(); |
97 | } |
98 | |
99 | return strtr( $this->text, $variableReplacements ); |
100 | } |
101 | |
102 | /** Returns unit text with variables replaced. */ |
103 | public function getTextForTrans(): string { |
104 | $variableReplacements = []; |
105 | foreach ( $this->getVariables() as $variable ) { |
106 | $variableReplacements[$variable->getDefinition()] = $variable->getValue(); |
107 | } |
108 | |
109 | return strtr( $this->text, $variableReplacements ); |
110 | } |
111 | |
112 | /** Returns the unit text with updated or added unit marker */ |
113 | public function getMarkedText(): string { |
114 | $id = $this->id; |
115 | $header = "<!--T:$id-->"; |
116 | |
117 | if ( $this->getHeading( $this->text ) !== null ) { |
118 | $text = $this->text . ' ' . $header; |
119 | } else { |
120 | if ( $this->inline ) { |
121 | $text = $header . ' ' . $this->text; |
122 | } else { |
123 | $text = $header . "\n" . $this->text; |
124 | } |
125 | } |
126 | |
127 | return $text; |
128 | } |
129 | |
130 | /** Returns oldtext, or current text if not available */ |
131 | public function getOldText(): string { |
132 | return $this->oldText ?? $this->text; |
133 | } |
134 | |
135 | /** @return TranslationVariable[] */ |
136 | public function getVariables(): array { |
137 | $vars = []; |
138 | |
139 | $matches = []; |
140 | preg_match_all( self::TVAR_OLD_SYNTAX_REGEX, $this->text, $matches, PREG_SET_ORDER ); |
141 | foreach ( $matches as $m ) { |
142 | $vars[] = new TranslationVariable( $m[0], '$' . $m[1], $m[2] ); |
143 | } |
144 | |
145 | $matches = []; |
146 | preg_match_all( self::TVAR_NEW_SYNTAX_REGEX, $this->text, $matches, PREG_SET_ORDER ); |
147 | foreach ( $matches as $m ) { |
148 | $vars[] = new TranslationVariable( |
149 | $m[0], |
150 | // Maximum of one of these is non-empty string |
151 | '$' . ( $m['key1'] . $m['key2'] . $m['key3'] ), |
152 | $m['value'] |
153 | ); |
154 | } |
155 | |
156 | return $vars; |
157 | } |
158 | |
159 | /** Serialize this object to a PHP array */ |
160 | public function serializeToArray(): array { |
161 | $data = []; |
162 | foreach ( self::$properties as $index => $property ) { |
163 | // Because this is used for the JobQueue, use a list |
164 | // instead of an array to save space. |
165 | $data[$index] = $this->$property; |
166 | } |
167 | |
168 | return $data; |
169 | } |
170 | |
171 | public static function unserializeFromArray( array $data ): self { |
172 | // Give dummy default text, will be overridden |
173 | $unit = new self( '' ); |
174 | foreach ( self::$properties as $index => $property ) { |
175 | $unit->$property = $data[$index]; |
176 | } |
177 | |
178 | return $unit; |
179 | } |
180 | |
181 | public function getTextForRendering( |
182 | ?Message $msg, |
183 | Language $sourceLanguage, |
184 | Language $targetLanguage, |
185 | bool $wrapUntranslated, |
186 | ?Parser $parser = null |
187 | ): string { |
188 | $attributes = []; |
189 | $headingText = null; |
190 | |
191 | if ( $msg && $msg->translation() !== null ) { |
192 | $content = $msg->translation(); |
193 | $headingText = $this->getHeading( $msg->definition() ); |
194 | |
195 | if ( $msg->hasTag( 'fuzzy' ) ) { |
196 | // We do not ever want to show explicit fuzzy marks in the rendered pages |
197 | $content = str_replace( TRANSLATE_FUZZY, '', $content ); |
198 | $attributes['class'] = 'mw-translate-fuzzy'; |
199 | } |
200 | $translationLanguage = $targetLanguage->getCode(); |
201 | } else { |
202 | $content = $this->getTextWithVariables(); |
203 | if ( $wrapUntranslated ) { |
204 | $attributes['lang'] = $sourceLanguage->getHtmlCode(); |
205 | $attributes['dir'] = $sourceLanguage->getDir(); |
206 | $attributes['class'] = 'mw-content-' . $sourceLanguage->getDir(); |
207 | } |
208 | $translationLanguage = $sourceLanguage->getCode(); |
209 | } |
210 | |
211 | if ( $this->canWrap() && $attributes ) { |
212 | $tag = $this->isInline() ? 'span' : 'div'; |
213 | $content = $this->isInline() ? $content : "\n$content\n"; |
214 | $content = Html::rawElement( $tag, $attributes, $content ); |
215 | } |
216 | |
217 | $variableReplacements = []; |
218 | foreach ( $this->getVariables() as $variable ) { |
219 | $variableReplacements[$variable->getName()] = $variable->getValue(); |
220 | } |
221 | |
222 | if ( |
223 | $parser && |
224 | $this->shouldAddAnchor( |
225 | $sourceLanguage, |
226 | $targetLanguage, |
227 | $headingText, |
228 | $msg, |
229 | $this->isInline() |
230 | ) |
231 | ) { |
232 | $sectionName = substr( $parser->guessSectionNameFromWikiText( $headingText ), 1 ); |
233 | $attributes = [ 'id' => $sectionName ]; |
234 | $content = Html::rawElement( 'span', $attributes, '' ) . "\n$content"; |
235 | } |
236 | |
237 | $content = strtr( $content, $variableReplacements ); |
238 | |
239 | // Allow wrapping this inside variables |
240 | $content = preg_replace( |
241 | self::TRANSLATIONLANGUAGE_REGEX, |
242 | $translationLanguage, |
243 | $content |
244 | ); |
245 | |
246 | return $content; |
247 | } |
248 | |
249 | /** @return TranslationUnitIssue[] */ |
250 | public function getIssues(): array { |
251 | $issues = $usedNames = []; |
252 | foreach ( $this->getVariables() as $variable ) { |
253 | $name = $variable->getName(); |
254 | $pattern = '/^' . TranslatablePageInsertablesSuggester::NAME_PATTERN . '$/u'; |
255 | if ( !preg_match( $pattern, $name ) ) { |
256 | // Key by name to avoid multiple issues of the same name |
257 | $issues[$name] = new TranslationUnitIssue( |
258 | TranslationUnitIssue::WARNING, |
259 | 'tpt-validation-not-insertable', |
260 | [ wfEscapeWikiText( $name ) ] |
261 | ); |
262 | } |
263 | |
264 | $usedNames[ $name ][] = $variable->getValue(); |
265 | } |
266 | |
267 | foreach ( $usedNames as $name => $contents ) { |
268 | $uniqueValueCount = count( array_unique( $contents ) ); |
269 | if ( $uniqueValueCount > 1 ) { |
270 | $issues[] = new TranslationUnitIssue( |
271 | TranslationUnitIssue::ERROR, |
272 | 'tpt-validation-name-reuse', |
273 | [ wfEscapeWikiText( $name ) ] |
274 | ); |
275 | } |
276 | } |
277 | |
278 | return array_values( $issues ); |
279 | } |
280 | |
281 | /** Mimic the behavior of how Parser handles headings including handling of unbalanced "=" signs */ |
282 | private function getHeading( string $text ): ?string { |
283 | $match = []; |
284 | preg_match( '/^(={1,6})[ \t]*(.+?)[ \t]*\1\s*$/', $text, $match ); |
285 | return $match[2] ?? null; |
286 | } |
287 | |
288 | private function shouldAddAnchor( |
289 | Language $sourceLanguage, |
290 | Language $targetLanguage, |
291 | ?string $headingText, |
292 | ?Message $msg, |
293 | bool $isInline |
294 | ): bool { |
295 | // If it's not a heading, don't bother adding an anchor |
296 | if ( $headingText === null ) { |
297 | return false; |
298 | } |
299 | |
300 | // We only add an anchor for a translation. See: https://phabricator.wikimedia.org/T62544 |
301 | if ( $sourceLanguage->getCode() === $targetLanguage->getCode() ) { |
302 | return false; |
303 | } |
304 | |
305 | // Translation and the source text are same, avoid adding an anchor that would create |
306 | // an id attribute with duplicate value |
307 | if ( $msg && $msg->translation() === $msg->definition() ) { |
308 | return false; |
309 | } |
310 | |
311 | // If nowrap attribute is set, do not add the anchor |
312 | if ( !$this->canWrap() ) { |
313 | return false; |
314 | } |
315 | |
316 | // We don't add anchors for inline translate tags to avoid breaking input like this: |
317 | // Text here <translate>== not a heading ==</translate> |
318 | return !$isInline; |
319 | } |
320 | } |