Translate extension for MediaWiki
 
Loading...
Searching...
No Matches
TranslationUnit.php
1<?php
2declare( strict_types = 1 );
3
4namespace MediaWiki\Extension\Translate\PageTranslation;
5
6use Language;
8use MediaWiki\Html\Html;
9use Parser;
10use const PREG_SET_ORDER;
11
20 public const UNIT_MARKER_INVALID_CHARS = "_/\n<>";
21 public const NEW_UNIT_ID = '-1';
22 // Deprecated syntax. Example: <tvar|1>...</>
23 public const TVAR_OLD_SYNTAX_REGEX = '~<tvar\|([^>]+)>(.*?)</>~us';
24 // Current syntax. Example: <tvar name=1>...</tvar>
25 public const TVAR_NEW_SYNTAX_REGEX =
26 <<<'REGEXP'
27 ~
28 <tvar \s+ name \s* = \s*
29 ( ( ' (?<key1> [^']* ) ' ) | ( " (?<key2> [^"]* ) " ) | (?<key3> [^"'\s>]* ) )
30 \s* > (?<value>.*?) </tvar \s* >
31 ~xusi
32 REGEXP;
37 public const TRANSLATIONLANGUAGE_REGEX = '/{{\s*TRANSLATIONLANGUAGE\s*}}/';
38
40 public $id;
42 public $text;
44 public $type;
46 public $oldText = null;
51 protected $inline = false;
53 private $canWrap = true;
55 private $version = 1;
57 private static $properties = [ 'version', 'id', 'text', 'type', 'oldText', 'inline' ];
58
59 public function __construct(
60 string $text,
61 string $id = self::NEW_UNIT_ID,
62 string $type = 'new',
63 string $oldText = null
64 ) {
65 $this->text = $text;
66 $this->id = $id;
67 $this->type = $type;
68 $this->oldText = $oldText;
69 }
70
71 public function setIsInline( bool $value ): void {
72 $this->inline = $value;
73 }
74
75 public function isInline(): bool {
76 return $this->inline;
77 }
78
79 public function setCanWrap( bool $value ): void {
80 $this->canWrap = $value;
81 }
82
83 public function canWrap(): bool {
84 return $this->canWrap;
85 }
86
88 public function getText(): string {
89 return $this->text;
90 }
91
93 public function getTextWithVariables(): string {
94 return $this->replaceVariablesWithNames( $this->text );
95 }
96
97 private function replaceVariablesWithNames( string $text ): string {
98 $variableReplacements = [];
99 foreach ( $this->loadVariables( $text ) as $variable ) {
100 $variableReplacements[$variable->getDefinition()] = $variable->getName();
101 }
102
103 return strtr( $text, $variableReplacements );
104 }
105
107 public function getTextForTrans(): string {
108 $variableReplacements = [];
109 foreach ( $this->getVariables() as $variable ) {
110 $variableReplacements[$variable->getDefinition()] = $variable->getValue();
111 }
112
113 return strtr( $this->text, $variableReplacements );
114 }
115
117 public function onlyTvarsChanged(): bool {
118 if ( $this->oldText === null ) {
119 // This shouldn't ever be called if oldText is null, but just in case
120 return false;
121 }
122 $newText = $this->getTextWithVariables();
123 $oldText = $this->replaceVariablesWithNames( $this->oldText );
124 return $oldText === $newText;
125 }
126
128 public function getMarkedText(): string {
129 $id = $this->id;
130 $header = "<!--T:$id-->";
131
132 if ( $this->getHeading( $this->text ) !== null ) {
133 $text = $this->text . ' ' . $header;
134 } else {
135 if ( $this->inline ) {
136 $text = $header . ' ' . $this->text;
137 } else {
138 $text = $header . "\n" . $this->text;
139 }
140 }
141
142 return $text;
143 }
144
146 public function getOldText(): string {
147 return $this->oldText ?? $this->text;
148 }
149
151 public function getVariables(): array {
152 return $this->loadVariables( $this->text );
153 }
154
156 private function loadVariables( string $text ): array {
157 $vars = [];
158
159 $matches = [];
160 preg_match_all( self::TVAR_OLD_SYNTAX_REGEX, $text, $matches, PREG_SET_ORDER );
161 foreach ( $matches as $m ) {
162 $vars[] = new TranslationVariable( $m[0], '$' . $m[1], $m[2] );
163 }
164
165 $matches = [];
166 preg_match_all( self::TVAR_NEW_SYNTAX_REGEX, $text, $matches, PREG_SET_ORDER );
167 foreach ( $matches as $m ) {
168 $vars[] = new TranslationVariable(
169 $m[0],
170 // Maximum of one of these is non-empty string
171 '$' . ( $m['key1'] . $m['key2'] . $m['key3'] ),
172 $m['value']
173 );
174 }
175
176 return $vars;
177 }
178
180 public function serializeToArray(): array {
181 $data = [];
182 foreach ( self::$properties as $index => $property ) {
183 // Because this is used for the JobQueue, use a list
184 // instead of an array to save space.
185 $data[$index] = $this->$property;
186 }
187
188 return $data;
189 }
190
191 public static function unserializeFromArray( array $data ): self {
192 // Give dummy default text, will be overridden
193 $unit = new self( '' );
194 foreach ( self::$properties as $index => $property ) {
195 $unit->$property = $data[$index];
196 }
197
198 return $unit;
199 }
200
201 public function getTextForRendering(
202 ?Message $msg,
203 Language $sourceLanguage,
204 Language $targetLanguage,
205 bool $wrapUntranslated,
206 ?Parser $parser = null
207 ): string {
208 $attributes = [];
209 $headingText = null;
210
211 if ( $msg && $msg->translation() !== null ) {
212 $content = $msg->translation();
213 $headingText = $this->getHeading( $msg->definition() );
214
215 if ( $msg->hasTag( 'fuzzy' ) ) {
216 // We do not ever want to show explicit fuzzy marks in the rendered pages
217 $content = str_replace( TRANSLATE_FUZZY, '', $content );
218 $attributes['class'] = 'mw-translate-fuzzy';
219 }
220 $translationLanguage = $targetLanguage->getCode();
221 } else {
222 $content = $this->getTextWithVariables();
223 if ( $wrapUntranslated ) {
224 $attributes['lang'] = $sourceLanguage->getHtmlCode();
225 $attributes['dir'] = $sourceLanguage->getDir();
226 $attributes['class'] = 'mw-content-' . $sourceLanguage->getDir();
227 }
228 $translationLanguage = $sourceLanguage->getCode();
229 }
230
231 if ( $this->canWrap() && $attributes ) {
232 $tag = $this->isInline() ? 'span' : 'div';
233 $content = $this->isInline() ? $content : "\n$content\n";
234 $content = Html::rawElement( $tag, $attributes, $content );
235 }
236
237 $variableReplacements = [];
238 foreach ( $this->getVariables() as $variable ) {
239 $variableReplacements[$variable->getName()] = $variable->getValue();
240 }
241
242 if (
243 $parser &&
244 $this->shouldAddAnchor(
245 $sourceLanguage,
246 $targetLanguage,
247 $headingText,
248 $msg,
249 $this->isInline()
250 )
251 ) {
252 $sectionName = substr( $parser->guessSectionNameFromWikiText( $headingText ), 1 );
253 $attributes = [ 'id' => $sectionName ];
254 $content = Html::rawElement( 'span', $attributes, '' ) . "\n$content";
255 }
256
257 $content = strtr( $content, $variableReplacements );
258
259 // Allow wrapping this inside variables
260 $content = preg_replace(
261 self::TRANSLATIONLANGUAGE_REGEX,
262 $translationLanguage,
263 $content
264 );
265
266 return $content;
267 }
268
270 public function getIssues(): array {
271 $issues = $usedNames = [];
272 foreach ( $this->getVariables() as $variable ) {
273 $name = $variable->getName();
275 if ( !preg_match( $pattern, $name ) ) {
276 // Key by name to avoid multiple issues of the same name
277 $issues[$name] = new TranslationUnitIssue(
278 TranslationUnitIssue::WARNING,
279 'tpt-validation-not-insertable',
280 [ wfEscapeWikiText( $name ) ]
281 );
282 }
283
284 $usedNames[ $name ][] = $variable->getValue();
285 }
286
287 foreach ( $usedNames as $name => $contents ) {
288 $uniqueValueCount = count( array_unique( $contents ) );
289 if ( $uniqueValueCount > 1 ) {
290 $issues[] = new TranslationUnitIssue(
291 TranslationUnitIssue::ERROR,
292 'tpt-validation-name-reuse',
293 [ wfEscapeWikiText( $name ) ]
294 );
295 }
296 }
297
298 return array_values( $issues );
299 }
300
302 private function getHeading( string $text ): ?string {
303 $match = [];
304 preg_match( '/^(={1,6})[ \t]*(.+?)[ \t]*\1\s*$/', $text, $match );
305 return $match[2] ?? null;
306 }
307
308 private function shouldAddAnchor(
309 Language $sourceLanguage,
310 Language $targetLanguage,
311 ?string $headingText,
312 ?Message $msg,
313 bool $isInline
314 ): bool {
315 // If it's not a heading, don't bother adding an anchor
316 if ( $headingText === null ) {
317 return false;
318 }
319
320 // We only add an anchor for a translation. See: https://phabricator.wikimedia.org/T62544
321 if ( $sourceLanguage->getCode() === $targetLanguage->getCode() ) {
322 return false;
323 }
324
325 // Translation and the source text are same, avoid adding an anchor that would create
326 // an id attribute with duplicate value
327 if ( $msg && $msg->translation() === $msg->definition() ) {
328 return false;
329 }
330
331 // If nowrap attribute is set, do not add the anchor
332 if ( !$this->canWrap() ) {
333 return false;
334 }
335
336 // We don't add anchors for inline translate tags to avoid breaking input like this:
337 // Text here <translate>== not a heading ==</translate>
338 return !$isInline;
339 }
340}
Interface for message objects used by MessageCollection.
Definition Message.php:13
hasTag(string $tag)
Check if this message has a given tag.
Definition Message.php:71
This class represents one translation unit in a translatable page.
This class represents one translation variable in a translation unit.