Translate extension for MediaWiki
 
Loading...
Searching...
No Matches
TranslationUnit.php
1<?php
2declare( strict_types = 1 );
3
4namespace MediaWiki\Extension\Translate\PageTranslation;
5
6use Html;
7use Language;
9use Parser;
10use const PREG_SET_ORDER;
11
20 public const UNIT_MARKER_INVALID_CHARS = "_/\n<>";
21 public const NEW_UNIT_ID = '-1';
22 // Deprecated syntax. Example: <tvar|1>...</>
23 public const TVAR_OLD_SYNTAX_REGEX = '~<tvar\|([^>]+)>(.*?)</>~us';
24 // Current syntax. Example: <tvar name=1>...</tvar>
25 public const TVAR_NEW_SYNTAX_REGEX =
26 <<<'REGEXP'
27 ~
28 <tvar \s+ name \s* = \s*
29 ( ( ' (?<key1> [^']* ) ' ) | ( " (?<key2> [^"]* ) " ) | (?<key3> [^"'\s>]* ) )
30 \s* > (?<value>.*?) </tvar \s* >
31 ~xusi
32 REGEXP;
33
35 public $id;
37 public $text;
39 public $type;
41 public $oldText = null;
46 protected $inline = false;
48 private $canWrap = true;
50 private $version = 1;
52 private static $properties = [ 'version', 'id', 'text', 'type', 'oldText', 'inline' ];
53
54 public function __construct(
55 string $text,
56 string $id = self::NEW_UNIT_ID,
57 string $type = 'new',
58 string $oldText = null
59 ) {
60 $this->text = $text;
61 $this->id = $id;
62 $this->type = $type;
63 $this->oldText = $oldText;
64 }
65
66 public function setIsInline( bool $value ): void {
67 $this->inline = $value;
68 }
69
70 public function isInline(): bool {
71 return $this->inline;
72 }
73
74 public function setCanWrap( bool $value ): void {
75 $this->canWrap = $value;
76 }
77
78 public function canWrap(): bool {
79 return $this->canWrap;
80 }
81
83 public function getText(): string {
84 return $this->text;
85 }
86
88 public function getTextWithVariables(): string {
89 $variableReplacements = [];
90 foreach ( $this->getVariables() as $variable ) {
91 $variableReplacements[$variable->getDefinition()] = $variable->getName();
92 }
93
94 return strtr( $this->text, $variableReplacements );
95 }
96
98 public function getTextForTrans(): string {
99 $variableReplacements = [];
100 foreach ( $this->getVariables() as $variable ) {
101 $variableReplacements[$variable->getDefinition()] = $variable->getValue();
102 }
103
104 return strtr( $this->text, $variableReplacements );
105 }
106
108 public function getMarkedText(): string {
109 $id = $this->id;
110 $header = "<!--T:$id-->";
111
112 if ( $this->getHeading( $this->text ) !== null ) {
113 $text = $this->text . ' ' . $header;
114 } else {
115 if ( $this->inline ) {
116 $text = $header . ' ' . $this->text;
117 } else {
118 $text = $header . "\n" . $this->text;
119 }
120 }
121
122 return $text;
123 }
124
126 public function getOldText(): string {
127 return $this->oldText ?? $this->text;
128 }
129
131 public function getVariables(): array {
132 $vars = [];
133
134 $matches = [];
135 preg_match_all( self::TVAR_OLD_SYNTAX_REGEX, $this->text, $matches, PREG_SET_ORDER );
136 foreach ( $matches as $m ) {
137 $vars[] = new TranslationVariable( $m[0], '$' . $m[1], $m[2] );
138 }
139
140 $matches = [];
141 preg_match_all( self::TVAR_NEW_SYNTAX_REGEX, $this->text, $matches, PREG_SET_ORDER );
142 foreach ( $matches as $m ) {
143 $vars[] = new TranslationVariable(
144 $m[0],
145 // Maximum of one of these is non-empty string
146 '$' . ( $m['key1'] . $m['key2'] . $m['key3'] ),
147 $m['value']
148 );
149 }
150
151 return $vars;
152 }
153
155 public function serializeToArray(): array {
156 $data = [];
157 foreach ( self::$properties as $index => $property ) {
158 // Because this is used for the JobQueue, use a list
159 // instead of an array to save space.
160 $data[$index] = $this->$property;
161 }
162
163 return $data;
164 }
165
166 public static function unserializeFromArray( array $data ): self {
167 // Give dummy default text, will be overridden
168 $unit = new self( '' );
169 foreach ( self::$properties as $index => $property ) {
170 $unit->$property = $data[$index];
171 }
172
173 return $unit;
174 }
175
176 public function getTextForRendering(
177 ?Message $msg,
178 Language $sourceLanguage,
179 Language $targetLanguage,
180 bool $wrapUntranslated,
181 ?Parser $parser = null
182 ): string {
183 $attributes = [];
184 $headingText = null;
185
186 if ( $msg && $msg->translation() !== null ) {
187 $content = $msg->translation();
188 $headingText = $this->getHeading( $msg->definition() );
189
190 if ( $msg->hasTag( 'fuzzy' ) ) {
191 // We do not ever want to show explicit fuzzy marks in the rendered pages
192 $content = str_replace( TRANSLATE_FUZZY, '', $content );
193 $attributes['class'] = 'mw-translate-fuzzy';
194 }
195 $translationLanguage = $targetLanguage->getCode();
196 } else {
197 $content = $this->getTextWithVariables();
198 if ( $wrapUntranslated ) {
199 $attributes['lang'] = $sourceLanguage->getHtmlCode();
200 $attributes['dir'] = $sourceLanguage->getDir();
201 $attributes['class'] = 'mw-content-' . $sourceLanguage->getDir();
202 }
203 $translationLanguage = $sourceLanguage->getCode();
204 }
205
206 if ( $this->canWrap() && $attributes ) {
207 $tag = $this->isInline() ? 'span' : 'div';
208 $content = $this->isInline() ? $content : "\n$content\n";
209 $content = Html::rawElement( $tag, $attributes, $content );
210 }
211
212 $variableReplacements = [];
213 foreach ( $this->getVariables() as $variable ) {
214 $variableReplacements[$variable->getName()] = $variable->getValue();
215 }
216
217 if (
218 $parser &&
219 $this->shouldAddAnchor(
220 $sourceLanguage,
221 $targetLanguage,
222 $headingText,
223 $msg,
224 $this->isInline()
225 )
226 ) {
227 $sectionName = substr( $parser->guessSectionNameFromWikiText( $headingText ), 1 );
228 $attributes = [ 'id' => $sectionName ];
229 $content = Html::rawElement( 'span', $attributes, '' ) . "\n$content";
230 }
231
232 $content = strtr( $content, $variableReplacements );
233
234 // Allow wrapping this inside variables
235 $content = preg_replace(
236 '/{{\s*TRANSLATIONLANGUAGE\s*}}/',
237 $translationLanguage,
238 $content
239 );
240
241 return $content;
242 }
243
245 public function getIssues(): array {
246 $issues = $usedNames = [];
247 foreach ( $this->getVariables() as $variable ) {
248 $name = $variable->getName();
250 if ( !preg_match( $pattern, $name ) ) {
251 // Key by name to avoid multiple issues of the same name
252 $issues[$name] = new TranslationUnitIssue(
253 TranslationUnitIssue::WARNING,
254 'tpt-validation-not-insertable',
255 [ wfEscapeWikiText( $name ) ]
256 );
257 }
258
259 $usedNames[ $name ][] = $variable->getValue();
260 }
261
262 foreach ( $usedNames as $name => $contents ) {
263 $uniqueValueCount = count( array_unique( $contents ) );
264 if ( $uniqueValueCount > 1 ) {
265 $issues[] = new TranslationUnitIssue(
266 TranslationUnitIssue::ERROR,
267 'tpt-validation-name-reuse',
268 [ wfEscapeWikiText( $name ) ]
269 );
270 }
271 }
272
273 return array_values( $issues );
274 }
275
277 private function getHeading( string $text ): ?string {
278 $match = [];
279 preg_match( '/^(={1,6})[ \t]*(.+?)[ \t]*\1\s*$/', $text, $match );
280 return $match[2] ?? null;
281 }
282
283 private function shouldAddAnchor(
284 Language $sourceLanguage,
285 Language $targetLanguage,
286 ?string $headingText,
287 ?Message $msg,
288 bool $isInline
289 ): bool {
290 // If it's not a heading, don't bother adding an anchor
291 if ( $headingText === null ) {
292 return false;
293 }
294
295 // We only add an anchor for a translation. See: https://phabricator.wikimedia.org/T62544
296 if ( $sourceLanguage->getCode() === $targetLanguage->getCode() ) {
297 return false;
298 }
299
300 // Translation and the source text are same, avoid adding an anchor that would create
301 // an id attribute with duplicate value
302 if ( $msg && $msg->translation() === $msg->definition() ) {
303 return false;
304 }
305
306 // If nowrap attribute is set, do not add the anchor
307 if ( !$this->canWrap() ) {
308 return false;
309 }
310
311 // We don't add anchors for inline translate tags to avoid breaking input like this:
312 // Text here <translate>== not a heading ==</translate>
313 return !$isInline;
314 }
315}
Interface for message objects used by MessageCollection.
Definition Message.php:13
hasTag(string $tag)
Check if this message has a given tag.
Definition Message.php:71
This class represents one translation unit in a translatable page.
This class represents one translation variable in a translation unit.