Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
54.78% |
275 / 502 |
|
31.11% |
14 / 45 |
CRAP | |
0.00% |
0 / 1 |
LanguageConverter | |
54.89% |
275 / 501 |
|
31.11% |
14 / 45 |
3188.25 | |
0.00% |
0 / 1 |
__construct | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
getMainCode | n/a |
0 / 0 |
n/a |
0 / 0 |
0 | |||||
getStaticDefaultVariant | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
getLanguageVariants | n/a |
0 / 0 |
n/a |
0 / 0 |
0 | |||||
getVariantsFallbacks | n/a |
0 / 0 |
n/a |
0 / 0 |
0 | |||||
getFlags | |
0.00% |
0 / 13 |
|
0.00% |
0 / 1 |
6 | |||
getAdditionalFlags | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
getManualLevel | |
0.00% |
0 / 7 |
|
0.00% |
0 / 1 |
12 | |||
getAdditionalManualLevel | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
getDescCodeSeparator | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
getDescVarSeparator | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
getVariantNames | |
0.00% |
0 / 3 |
|
0.00% |
0 / 1 |
2 | |||
getVariants | |
100.00% |
3 / 3 |
|
100.00% |
1 / 1 |
1 | |||
getVariantFallbacks | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
getConvRuleTitle | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
getPreferredVariant | |
100.00% |
14 / 14 |
|
100.00% |
1 / 1 |
6 | |||
getDefaultVariant | |
0.00% |
0 / 6 |
|
0.00% |
0 / 1 |
12 | |||
validateVariant | |
100.00% |
9 / 9 |
|
100.00% |
1 / 1 |
5 | |||
getURLVariant | |
87.50% |
7 / 8 |
|
0.00% |
0 / 1 |
3.02 | |||
getUserVariant | |
84.62% |
11 / 13 |
|
0.00% |
0 / 1 |
5.09 | |||
getHeaderVariant | |
64.00% |
16 / 25 |
|
0.00% |
0 / 1 |
16.65 | |||
autoConvert | |
71.43% |
55 / 77 |
|
0.00% |
0 / 1 |
27.42 | |||
translate | |
100.00% |
4 / 4 |
|
100.00% |
1 / 1 |
2 | |||
translateWithoutRomanNumbers | |
70.59% |
12 / 17 |
|
0.00% |
0 / 1 |
3.23 | |||
autoConvertToAllVariants | |
0.00% |
0 / 5 |
|
0.00% |
0 / 1 |
6 | |||
applyManualConv | |
35.71% |
5 / 14 |
|
0.00% |
0 / 1 |
20.02 | |||
convertSplitTitle | |
100.00% |
6 / 6 |
|
100.00% |
1 / 1 |
1 | |||
convertTitle | |
100.00% |
4 / 4 |
|
100.00% |
1 / 1 |
2 | |||
convertNamespace | |
100.00% |
12 / 12 |
|
100.00% |
1 / 1 |
2 | |||
computeNsVariantText | |
85.71% |
12 / 14 |
|
0.00% |
0 / 1 |
5.07 | |||
convert | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
2 | |||
convertTo | |
83.33% |
5 / 6 |
|
0.00% |
0 / 1 |
3.04 | |||
recursiveConvertTopLevel | |
100.00% |
28 / 28 |
|
100.00% |
1 / 1 |
6 | |||
recursiveConvertRule | |
51.28% |
20 / 39 |
|
0.00% |
0 / 1 |
24.99 | |||
findVariantLink | |
0.00% |
0 / 43 |
|
0.00% |
0 / 1 |
420 | |||
getExtraHashOptions | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
2 | |||
guessVariant | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
loadDefaultTables | n/a |
0 / 0 |
n/a |
0 / 0 |
0 | |||||
loadTables | |
95.24% |
20 / 21 |
|
0.00% |
0 / 1 |
4 | |||
postLoadTables | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
reloadTables | |
0.00% |
0 / 4 |
|
0.00% |
0 / 1 |
6 | |||
parseCachedTable | |
17.19% |
11 / 64 |
|
0.00% |
0 / 1 |
296.87 | |||
markNoConversion | |
0.00% |
0 / 3 |
|
0.00% |
0 / 1 |
12 | |||
convertCategoryKey | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
updateConversionTable | |
0.00% |
0 / 5 |
|
0.00% |
0 / 1 |
30 | |||
getVarSeparatorPattern | |
93.33% |
14 / 15 |
|
0.00% |
0 / 1 |
5.01 | |||
hasVariants | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
hasVariant | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
2 | |||
convertHtml | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 |
1 | <?php |
2 | /** |
3 | * This program is free software; you can redistribute it and/or modify |
4 | * it under the terms of the GNU General Public License as published by |
5 | * the Free Software Foundation; either version 2 of the License, or |
6 | * (at your option) any later version. |
7 | * |
8 | * This program is distributed in the hope that it will be useful, |
9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
11 | * GNU General Public License for more details. |
12 | * |
13 | * You should have received a copy of the GNU General Public License along |
14 | * with this program; if not, write to the Free Software Foundation, Inc., |
15 | * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. |
16 | * http://www.gnu.org/copyleft/gpl.html |
17 | * |
18 | * @file |
19 | * @author Zhengzhu Feng <zhengzhu@gmail.com> |
20 | * @author fdcn <fdcn64@gmail.com> |
21 | * @author shinjiman <shinjiman@gmail.com> |
22 | * @author PhiLiP <philip.npc@gmail.com> |
23 | */ |
24 | |
25 | namespace MediaWiki\Language; |
26 | |
27 | use InvalidArgumentException; |
28 | use MediaWiki\Context\RequestContext; |
29 | use MediaWiki\Debug\DeprecationHelper; |
30 | use MediaWiki\HookContainer\HookRunner; |
31 | use MediaWiki\Html\Html; |
32 | use MediaWiki\Linker\LinkTarget; |
33 | use MediaWiki\Logger\LoggerFactory; |
34 | use MediaWiki\MainConfigNames; |
35 | use MediaWiki\MediaWikiServices; |
36 | use MediaWiki\Page\PageIdentity; |
37 | use MediaWiki\Parser\Parser; |
38 | use MediaWiki\Parser\Sanitizer; |
39 | use MediaWiki\Revision\RevisionRecord; |
40 | use MediaWiki\Revision\SlotRecord; |
41 | use MediaWiki\StubObject\StubUserLang; |
42 | use MediaWiki\Title\Title; |
43 | use MediaWiki\User\User; |
44 | use RuntimeException; |
45 | use StringUtils; |
46 | use UnexpectedValueException; |
47 | use Wikimedia\ObjectCache\BagOStuff; |
48 | |
49 | /** |
50 | * Base class for multi-variant language conversion. |
51 | * |
52 | * @ingroup Language |
53 | */ |
54 | abstract class LanguageConverter implements ILanguageConverter { |
55 | use DeprecationHelper; |
56 | |
57 | /** |
58 | * languages supporting variants |
59 | * @since 1.20 |
60 | * @var string[] |
61 | */ |
62 | public static $languagesWithVariants = [ |
63 | 'ban', |
64 | 'en', |
65 | 'crh', |
66 | 'gan', |
67 | 'iu', |
68 | 'ku', |
69 | 'mni', |
70 | 'sh', |
71 | 'shi', |
72 | 'sr', |
73 | 'tg', |
74 | 'tly', |
75 | 'uz', |
76 | 'wuu', |
77 | 'zgh', |
78 | 'zh', |
79 | ]; |
80 | |
81 | /** |
82 | * static default variant of languages supporting variants |
83 | * for use with DefaultOptionsLookup.php |
84 | * @since 1.40 |
85 | * @var array<string,string> |
86 | */ |
87 | public static $languagesWithStaticDefaultVariant = [ |
88 | 'ban' => 'ban', |
89 | 'en' => 'en', |
90 | 'crh' => 'crh', |
91 | 'gan' => 'gan', |
92 | 'iu' => 'iu', |
93 | 'ku' => 'ku', |
94 | 'mni' => 'mni', |
95 | 'sh' => 'sh-latn', |
96 | 'shi' => 'shi', |
97 | 'sr' => 'sr', |
98 | 'tg' => 'tg', |
99 | 'tly' => 'tly', |
100 | 'uz' => 'uz', |
101 | 'wuu' => 'wuu', |
102 | 'zgh' => 'zgh', |
103 | 'zh' => 'zh', |
104 | ]; |
105 | |
106 | /** @var bool */ |
107 | private $mTablesLoaded = false; |
108 | /** @var ReplacementArray[] */ |
109 | protected $mTables = []; |
110 | /** @var Language|StubUserLang */ |
111 | private $mLangObj; |
112 | /** @var string|false */ |
113 | private $mConvRuleTitle = false; |
114 | /** @var string|null */ |
115 | private $mURLVariant; |
116 | /** @var string|null */ |
117 | private $mUserVariant; |
118 | /** @var string|null */ |
119 | private $mHeaderVariant; |
120 | /** @var int */ |
121 | private $mMaxDepth = 10; |
122 | /** @var string|null */ |
123 | private $mVarSeparatorPattern; |
124 | |
125 | private const CACHE_VERSION_KEY = 'VERSION 7'; |
126 | |
127 | /** |
128 | * @param Language|StubUserLang $langobj |
129 | */ |
130 | public function __construct( $langobj ) { |
131 | $this->mLangObj = $langobj; |
132 | } |
133 | |
134 | /** |
135 | * Get the language code with converter (the "main" language code). |
136 | * Page language code would be the same of the language code with converter. |
137 | * Note that this code might not be included as one of the variant languages. |
138 | * @since 1.36 |
139 | * |
140 | * @return string |
141 | */ |
142 | abstract public function getMainCode(): string; |
143 | |
144 | /** |
145 | * Get static default variant. |
146 | * For use of specify the default variant form when it different from the |
147 | * default "unconverted/mixed-variant form". |
148 | * @since 1.40 |
149 | * |
150 | * @return string |
151 | */ |
152 | protected function getStaticDefaultVariant(): string { |
153 | $code = $this->getMainCode(); |
154 | return self::$languagesWithStaticDefaultVariant[$code] ?? $code; |
155 | } |
156 | |
157 | /** |
158 | * Get supported variants of the language. |
159 | * @since 1.36 |
160 | * |
161 | * @return array |
162 | */ |
163 | abstract protected function getLanguageVariants(): array; |
164 | |
165 | /** |
166 | * Get language variants fallbacks. |
167 | * @since 1.36 |
168 | * |
169 | * @return array |
170 | */ |
171 | abstract public function getVariantsFallbacks(): array; |
172 | |
173 | /** |
174 | * Get the strings that map to the flags. |
175 | * @since 1.36 |
176 | * |
177 | * @return array |
178 | */ |
179 | final public function getFlags(): array { |
180 | $defaultflags = [ |
181 | // 'S' show the converted text |
182 | // '+' add rules for alltext |
183 | // 'E' the flags have an error |
184 | // these flags above are reserved for program |
185 | 'A' => 'A', // add rule for convert code (all text converted) |
186 | 'T' => 'T', // title convert |
187 | 'R' => 'R', // raw content |
188 | 'D' => 'D', // convert description (subclass implement) |
189 | '-' => '-', // remove convert (not implement) |
190 | 'H' => 'H', // add rule for convert code (but no display in placed code) |
191 | 'N' => 'N', // current variant name |
192 | ]; |
193 | $flags = array_merge( $defaultflags, $this->getAdditionalFlags() ); |
194 | foreach ( $this->getVariants() as $v ) { |
195 | $flags[$v] = $v; |
196 | } |
197 | return $flags; |
198 | } |
199 | |
200 | /** |
201 | * Provides additional flags for converter. By default, it returns empty array and |
202 | * typically should be overridden by implementation of converter. |
203 | * |
204 | * @return array |
205 | */ |
206 | protected function getAdditionalFlags(): array { |
207 | return []; |
208 | } |
209 | |
210 | /** |
211 | * Get manual level limit for supported variants. |
212 | * @since 1.36 |
213 | * |
214 | * @return array |
215 | */ |
216 | final public function getManualLevel() { |
217 | $manualLevel = $this->getAdditionalManualLevel(); |
218 | $result = []; |
219 | foreach ( $this->getVariants() as $v ) { |
220 | if ( array_key_exists( $v, $manualLevel ) ) { |
221 | $result[$v] = $manualLevel[$v]; |
222 | } else { |
223 | $result[$v] = 'bidirectional'; |
224 | } |
225 | } |
226 | return $result; |
227 | } |
228 | |
229 | /** |
230 | * Provides additional flags for converter. By default, this function returns an empty array and |
231 | * typically should be overridden by the implementation of converter. |
232 | * @since 1.36 |
233 | * |
234 | * @return array |
235 | */ |
236 | protected function getAdditionalManualLevel(): array { |
237 | return []; |
238 | } |
239 | |
240 | /** |
241 | * Get desc code separator. By default returns ":", can be overridden by |
242 | * implementation of converter. |
243 | * @since 1.36 |
244 | * |
245 | * @return string |
246 | */ |
247 | public function getDescCodeSeparator(): string { |
248 | return ':'; |
249 | } |
250 | |
251 | /** |
252 | * Get desc var separator. By default returns ";", can be overridden by |
253 | * implementation of converter. |
254 | * @since 1.36 |
255 | * |
256 | * @return string |
257 | */ |
258 | public function getDescVarSeparator(): string { |
259 | return ';'; |
260 | } |
261 | |
262 | /** |
263 | * Get variant names. |
264 | * |
265 | * @return array |
266 | */ |
267 | public function getVariantNames(): array { |
268 | return MediaWikiServices::getInstance() |
269 | ->getLanguageNameUtils() |
270 | ->getLanguageNames(); |
271 | } |
272 | |
273 | final public function getVariants() { |
274 | $disabledVariants = MediaWikiServices::getInstance()->getMainConfig()->get( |
275 | MainConfigNames::DisabledVariants ); |
276 | return array_diff( $this->getLanguageVariants(), $disabledVariants ); |
277 | } |
278 | |
279 | public function getVariantFallbacks( $variant ) { |
280 | return $this->getVariantsFallbacks()[$variant] ?? $this->getStaticDefaultVariant(); |
281 | } |
282 | |
283 | public function getConvRuleTitle() { |
284 | return $this->mConvRuleTitle; |
285 | } |
286 | |
287 | public function getPreferredVariant() { |
288 | $req = $this->getURLVariant(); |
289 | |
290 | $services = MediaWikiServices::getInstance(); |
291 | ( new HookRunner( $services->getHookContainer() ) )->onGetLangPreferredVariant( $req ); |
292 | |
293 | if ( !$req ) { |
294 | $user = RequestContext::getMain()->getUser(); |
295 | // NOTE: For some calls there may not be a context user or session that is safe |
296 | // to use, see (T235360) |
297 | // Use case: During user autocreation, UserNameUtils::isUsable is called which uses interface |
298 | // messages for reserved usernames. |
299 | if ( $user->isSafeToLoad() && $user->isRegistered() ) { |
300 | $req = $this->getUserVariant( $user ); |
301 | } else { |
302 | $req = $this->getHeaderVariant(); |
303 | } |
304 | } |
305 | |
306 | $defaultLanguageVariant = $services->getMainConfig() |
307 | ->get( MainConfigNames::DefaultLanguageVariant ); |
308 | if ( !$req && $defaultLanguageVariant ) { |
309 | $req = $this->validateVariant( $defaultLanguageVariant ); |
310 | } |
311 | |
312 | $req = $this->validateVariant( $req ); |
313 | |
314 | // This function, unlike the other get*Variant functions, is |
315 | // not memoized (i.e., there return value is not cached) since |
316 | // new information might appear during processing after this |
317 | // is first called. |
318 | return $req ?? $this->getStaticDefaultVariant(); |
319 | } |
320 | |
321 | public function getDefaultVariant() { |
322 | $defaultLanguageVariant = MediaWikiServices::getInstance()->getMainConfig()->get( |
323 | MainConfigNames::DefaultLanguageVariant ); |
324 | |
325 | $req = $this->getURLVariant() ?? $this->getHeaderVariant(); |
326 | |
327 | if ( !$req && $defaultLanguageVariant ) { |
328 | $req = $this->validateVariant( $defaultLanguageVariant ); |
329 | } |
330 | |
331 | return $req ?? $this->getStaticDefaultVariant(); |
332 | } |
333 | |
334 | public function validateVariant( $variant = null ) { |
335 | if ( $variant === null ) { |
336 | return null; |
337 | } |
338 | // Our internal variants are always lower-case; the variant we |
339 | // are validating may have mixed cases. |
340 | $variant = LanguageCode::replaceDeprecatedCodes( strtolower( $variant ) ); |
341 | if ( in_array( $variant, $this->getVariants() ) ) { |
342 | return $variant; |
343 | } |
344 | // Browsers are supposed to use BCP 47 standard in the |
345 | // Accept-Language header, but not all of our internal |
346 | // mediawiki variant codes are BCP 47. Map BCP 47 code |
347 | // to our internal code. |
348 | foreach ( $this->getVariants() as $v ) { |
349 | // Case-insensitive match (BCP 47 is mixed-case) |
350 | if ( strtolower( LanguageCode::bcp47( $v ) ) === $variant ) { |
351 | return $v; |
352 | } |
353 | } |
354 | return null; |
355 | } |
356 | |
357 | public function getURLVariant() { |
358 | if ( $this->mURLVariant ) { |
359 | return $this->mURLVariant; |
360 | } |
361 | |
362 | $request = RequestContext::getMain()->getRequest(); |
363 | // see if the preference is set in the request |
364 | $ret = $request->getText( 'variant' ); |
365 | |
366 | if ( !$ret ) { |
367 | $ret = $request->getVal( 'uselang' ); |
368 | } |
369 | |
370 | $this->mURLVariant = $this->validateVariant( $ret ); |
371 | return $this->mURLVariant; |
372 | } |
373 | |
374 | /** |
375 | * Determine if the user has a variant set. |
376 | * |
377 | * @param User $user |
378 | * @return string|null Variant if one found, null otherwise |
379 | */ |
380 | protected function getUserVariant( User $user ) { |
381 | // This should only be called within the class after the user is known to be |
382 | // safe to load and logged in, but check just in case. |
383 | if ( !$user->isSafeToLoad() ) { |
384 | return null; |
385 | } |
386 | |
387 | if ( !$this->mUserVariant ) { |
388 | $services = MediaWikiServices::getInstance(); |
389 | if ( $user->isRegistered() ) { |
390 | // Get language variant preference from logged in users |
391 | if ( |
392 | $this->getMainCode() === |
393 | $services->getContentLanguageCode()->toString() |
394 | ) { |
395 | $optionName = 'variant'; |
396 | } else { |
397 | $optionName = 'variant-' . $this->getMainCode(); |
398 | } |
399 | } else { |
400 | // figure out user lang without constructing wgLang to avoid |
401 | // infinite recursion |
402 | $optionName = 'language'; |
403 | } |
404 | $ret = $services->getUserOptionsLookup()->getOption( $user, $optionName ); |
405 | |
406 | $this->mUserVariant = $this->validateVariant( $ret ); |
407 | } |
408 | |
409 | return $this->mUserVariant; |
410 | } |
411 | |
412 | /** |
413 | * Determine the language variant from the Accept-Language header. |
414 | * |
415 | * @return string|null Variant if one found, null otherwise |
416 | */ |
417 | protected function getHeaderVariant() { |
418 | if ( $this->mHeaderVariant ) { |
419 | return $this->mHeaderVariant; |
420 | } |
421 | |
422 | $request = RequestContext::getMain()->getRequest(); |
423 | // See if some supported language variant is set in the |
424 | // HTTP header. |
425 | $languages = array_keys( $request->getAcceptLang() ); |
426 | if ( !$languages ) { |
427 | return null; |
428 | } |
429 | |
430 | $fallbackLanguages = []; |
431 | foreach ( $languages as $language ) { |
432 | $this->mHeaderVariant = $this->validateVariant( $language ); |
433 | if ( $this->mHeaderVariant ) { |
434 | break; |
435 | } |
436 | |
437 | // To see if there are fallbacks of current language. |
438 | // We record these fallback variants, and process |
439 | // them later. |
440 | $fallbacks = $this->getVariantFallbacks( $language ); |
441 | if ( |
442 | is_string( $fallbacks ) && |
443 | $fallbacks !== $this->getStaticDefaultVariant() |
444 | ) { |
445 | $fallbackLanguages[] = $fallbacks; |
446 | } elseif ( is_array( $fallbacks ) ) { |
447 | $fallbackLanguages = |
448 | array_merge( $fallbackLanguages, $fallbacks ); |
449 | } |
450 | } |
451 | |
452 | if ( !$this->mHeaderVariant ) { |
453 | // process fallback languages now |
454 | $fallback_languages = array_unique( $fallbackLanguages ); |
455 | foreach ( $fallback_languages as $language ) { |
456 | $this->mHeaderVariant = $this->validateVariant( $language ); |
457 | if ( $this->mHeaderVariant ) { |
458 | break; |
459 | } |
460 | } |
461 | } |
462 | |
463 | return $this->mHeaderVariant; |
464 | } |
465 | |
466 | public function autoConvert( $text, $toVariant = false ) { |
467 | $this->loadTables(); |
468 | |
469 | if ( !$toVariant ) { |
470 | $toVariant = $this->getPreferredVariant(); |
471 | if ( !$toVariant ) { |
472 | return $text; |
473 | } |
474 | } |
475 | |
476 | if ( $this->guessVariant( $text, $toVariant ) ) { |
477 | return $text; |
478 | } |
479 | /** |
480 | * We convert everything except: |
481 | * 1. HTML markups (anything between < and >) |
482 | * 2. HTML entities |
483 | * 3. placeholders created by the parser |
484 | * IMPORTANT: Beware of failure from pcre.backtrack_limit (T124404). |
485 | * Minimize the use of backtracking where possible. |
486 | */ |
487 | static $reg; |
488 | if ( $reg === null ) { |
489 | $marker = '|' . Parser::MARKER_PREFIX . '[^\x7f]++\x7f'; |
490 | |
491 | // this one is needed when the text is inside an HTML markup |
492 | $htmlfix = '|<[^>\004]++(?=\004$)|^[^<>]*+>'; |
493 | |
494 | // Optimize for the common case where these tags have |
495 | // few or no children. Thus try and possessively get as much as |
496 | // possible, and only engage in backtracking when we hit a '<'. |
497 | |
498 | // disable convert to variants between <code> tags |
499 | $codefix = '<code>[^<]*+(?:(?:(?!<\/code>).)[^<]*+)*+<\/code>|'; |
500 | // disable conversion of <script> tags |
501 | $scriptfix = '<script[^>]*+>[^<]*+(?:(?:(?!<\/script>).)[^<]*+)*+<\/script>|'; |
502 | // disable conversion of <pre> tags |
503 | $prefix = '<pre[^>]*+>[^<]*+(?:(?:(?!<\/pre>).)[^<]*+)*+<\/pre>|'; |
504 | // disable conversion of <math> tags |
505 | $mathfix = '<math[^>]*+>[^<]*+(?:(?:(?!<\/math>).)[^<]*+)*+<\/math>|'; |
506 | // disable conversion of <svg> tags |
507 | $svgfix = '<svg[^>]*+>[^<]*+(?:(?:(?!<\/svg>).)[^<]*+)*+<\/svg>|'; |
508 | // The "|.*+)" at the end, is in case we missed some part of html syntax, |
509 | // we will fail securely (hopefully) by matching the rest of the string. |
510 | $htmlFullTag = '<(?:[^>=]*+(?>[^>=]*+=\s*+(?:"[^"]*"|\'[^\']*\'|[^\'">\s]*+))*+[^>=]*+>|.*+)|'; |
511 | |
512 | $reg = '/' . $codefix . $scriptfix . $prefix . $mathfix . $svgfix . |
513 | $htmlFullTag . |
514 | '&[a-zA-Z#][a-z0-9]++;' . $marker . $htmlfix . '|\004$/s'; |
515 | } |
516 | $startPos = 0; |
517 | $sourceBlob = ''; |
518 | $literalBlob = ''; |
519 | |
520 | // Guard against delimiter nulls in the input |
521 | // (should never happen: see T159174) |
522 | $text = str_replace( "\000", '', $text ); |
523 | $text = str_replace( "\004", '', $text ); |
524 | |
525 | $markupMatches = null; |
526 | $elementMatches = null; |
527 | |
528 | // We add a marker (\004) at the end of text, to ensure we always match the |
529 | // entire text (Otherwise, pcre.backtrack_limit might cause silent failure) |
530 | $textWithMarker = $text . "\004"; |
531 | while ( $startPos < strlen( $text ) ) { |
532 | if ( preg_match( $reg, $textWithMarker, $markupMatches, PREG_OFFSET_CAPTURE, $startPos ) ) { |
533 | $elementPos = $markupMatches[0][1]; |
534 | $element = $markupMatches[0][0]; |
535 | if ( $element === "\004" ) { |
536 | // We hit the end. |
537 | $elementPos = strlen( $text ); |
538 | $element = ''; |
539 | } elseif ( substr( $element, -1 ) === "\004" ) { |
540 | // This can sometimes happen if we have |
541 | // unclosed html tags. For example, |
542 | // when converting a title attribute |
543 | // during a recursive call that contains |
544 | // a < e.g. <div title="<">. |
545 | $element = substr( $element, 0, -1 ); |
546 | } |
547 | } else { |
548 | // If we hit here, then Language Converter could be tricked |
549 | // into doing an XSS, so we refuse to translate. |
550 | // If expected input manages to reach this code path, |
551 | // we should consider it a bug. |
552 | $log = LoggerFactory::getInstance( 'languageconverter' ); |
553 | $log->error( "Hit pcre.backtrack_limit in " . __METHOD__ |
554 | . ". Disabling language conversion for this page.", |
555 | [ |
556 | "method" => __METHOD__, |
557 | "variant" => $toVariant, |
558 | "startOfText" => substr( $text, 0, 500 ) |
559 | ] |
560 | ); |
561 | return $text; |
562 | } |
563 | // Queue the part before the markup for translation in a batch |
564 | $sourceBlob .= substr( $text, $startPos, $elementPos - $startPos ) . "\000"; |
565 | |
566 | // Advance to the next position |
567 | $startPos = $elementPos + strlen( $element ); |
568 | |
569 | // Translate any alt or title attributes inside the matched element |
570 | if ( $element !== '' |
571 | && preg_match( '/^(<[^>\s]*+)\s([^>]*+)(.*+)$/', $element, $elementMatches ) |
572 | ) { |
573 | // FIXME, this decodes entities, so if you have something |
574 | // like <div title="foo<bar"> the bar won't get |
575 | // translated since after entity decoding it looks like |
576 | // unclosed html and we call this method recursively |
577 | // on attributes. |
578 | $attrs = Sanitizer::decodeTagAttributes( $elementMatches[2] ); |
579 | // Ensure self-closing tags stay self-closing. |
580 | $close = substr( $elementMatches[2], -1 ) === '/' ? ' /' : ''; |
581 | $changed = false; |
582 | foreach ( [ 'title', 'alt' ] as $attrName ) { |
583 | if ( !isset( $attrs[$attrName] ) ) { |
584 | continue; |
585 | } |
586 | $attr = $attrs[$attrName]; |
587 | // Don't convert URLs |
588 | if ( !str_contains( $attr, '://' ) ) { |
589 | $attr = $this->recursiveConvertTopLevel( $attr, $toVariant ); |
590 | } |
591 | |
592 | if ( $attr !== $attrs[$attrName] ) { |
593 | $attrs[$attrName] = $attr; |
594 | $changed = true; |
595 | } |
596 | } |
597 | if ( $changed ) { |
598 | // @phan-suppress-next-line SecurityCheck-DoubleEscaped Explained above with decodeTagAttributes |
599 | $element = $elementMatches[1] . Html::expandAttributes( $attrs ) . |
600 | $close . $elementMatches[3]; |
601 | } |
602 | } |
603 | $literalBlob .= $element . "\000"; |
604 | } |
605 | |
606 | // Do the main translation batch |
607 | $translatedBlob = $this->translate( $sourceBlob, $toVariant ); |
608 | |
609 | // Put the output back together |
610 | $translatedIter = StringUtils::explode( "\000", $translatedBlob ); |
611 | $literalIter = StringUtils::explode( "\000", $literalBlob ); |
612 | $output = ''; |
613 | while ( $translatedIter->valid() && $literalIter->valid() ) { |
614 | $output .= $translatedIter->current(); |
615 | $output .= $literalIter->current(); |
616 | $translatedIter->next(); |
617 | $literalIter->next(); |
618 | } |
619 | |
620 | return $output; |
621 | } |
622 | |
623 | public function translate( $text, $variant ) { |
624 | // If $text is empty or only includes spaces, do nothing |
625 | // Otherwise translate it |
626 | if ( trim( $text ) ) { |
627 | $this->loadTables(); |
628 | $text = $this->mTables[$variant]->replace( $text ); |
629 | } |
630 | return $text; |
631 | } |
632 | |
633 | /** |
634 | * @param string $text Text to convert |
635 | * @param string $variant Variant language code |
636 | * @return string Translated text |
637 | */ |
638 | protected function translateWithoutRomanNumbers( $text, $variant ) { |
639 | $breaks = '[^\w\x80-\xff]'; |
640 | |
641 | // regexp for roman numbers |
642 | // Lookahead assertion ensures $roman doesn't match the empty string |
643 | $roman = '(?=[MDCLXVI])M{0,4}(C[DM]|D?C{0,3})(X[LC]|L?X{0,3})(I[VX]|V?I{0,3})'; |
644 | |
645 | $reg = '/^' . $roman . '$|^' . $roman . $breaks . '|' . $breaks |
646 | . $roman . '$|' . $breaks . $roman . $breaks . '/'; |
647 | |
648 | $matches = preg_split( $reg, $text, -1, PREG_SPLIT_OFFSET_CAPTURE ); |
649 | |
650 | $m = array_shift( $matches ); |
651 | $this->loadTables(); |
652 | if ( !isset( $this->mTables[$variant] ) ) { |
653 | throw new RuntimeException( "Broken variant table: " |
654 | . implode( ',', array_keys( $this->mTables ) ) ); |
655 | } |
656 | $ret = $this->mTables[$variant]->replace( $m[0] ); |
657 | $mstart = (int)$m[1] + strlen( $m[0] ); |
658 | foreach ( $matches as $m ) { |
659 | $ret .= substr( $text, $mstart, (int)$m[1] - $mstart ); |
660 | $ret .= $this->translate( $m[0], $variant ); |
661 | $mstart = (int)$m[1] + strlen( $m[0] ); |
662 | } |
663 | |
664 | return $ret; |
665 | } |
666 | |
667 | public function autoConvertToAllVariants( $text ) { |
668 | $this->loadTables(); |
669 | |
670 | $ret = []; |
671 | foreach ( $this->getVariants() as $variant ) { |
672 | $ret[$variant] = $this->translate( $text, $variant ); |
673 | } |
674 | |
675 | return $ret; |
676 | } |
677 | |
678 | /** |
679 | * Apply manual conversion rules. |
680 | * |
681 | * @param ConverterRule $convRule |
682 | */ |
683 | protected function applyManualConv( ConverterRule $convRule ) { |
684 | // Use syntax -{T|zh-cn:TitleCN; zh-tw:TitleTw}- to custom |
685 | // title conversion. |
686 | // T26072: $mConvRuleTitle was overwritten by other manual |
687 | // rule(s) not for title, this breaks the title conversion. |
688 | $newConvRuleTitle = $convRule->getTitle(); |
689 | if ( $newConvRuleTitle !== false ) { |
690 | // So I add an empty check for getTitle() |
691 | $this->mConvRuleTitle = $newConvRuleTitle; |
692 | } |
693 | |
694 | // merge/remove manual conversion rules to/from global table |
695 | $convTable = $convRule->getConvTable(); |
696 | $action = $convRule->getRulesAction(); |
697 | foreach ( $convTable as $variant => $pair ) { |
698 | $v = $this->validateVariant( $variant ); |
699 | if ( !$v ) { |
700 | continue; |
701 | } |
702 | |
703 | if ( $action == 'add' ) { |
704 | // More efficient than array_merge(), about 2.5 times. |
705 | foreach ( $pair as $from => $to ) { |
706 | $this->mTables[$v]->setPair( $from, $to ); |
707 | } |
708 | } elseif ( $action == 'remove' ) { |
709 | $this->mTables[$v]->removeArray( $pair ); |
710 | } |
711 | } |
712 | } |
713 | |
714 | public function convertSplitTitle( $title ) { |
715 | $variant = $this->getPreferredVariant(); |
716 | |
717 | $index = $title->getNamespace(); |
718 | $nsText = $this->convertNamespace( $index, $variant ); |
719 | |
720 | $name = str_replace( '_', ' ', $title->getDBKey() ); |
721 | $mainText = $this->translate( $name, $variant ); |
722 | |
723 | return [ $nsText, ':', $mainText ]; |
724 | } |
725 | |
726 | public function convertTitle( $title ) { |
727 | [ $nsText, $nsSeparator, $mainText ] = $this->convertSplitTitle( $title ); |
728 | return $nsText !== '' ? |
729 | $nsText . $nsSeparator . $mainText : |
730 | $mainText; |
731 | } |
732 | |
733 | public function convertNamespace( $index, $variant = null ) { |
734 | if ( $index === NS_MAIN ) { |
735 | return ''; |
736 | } |
737 | |
738 | $variant ??= $this->getPreferredVariant(); |
739 | |
740 | $cache = MediaWikiServices::getInstance()->getLocalServerObjectCache(); |
741 | $key = $cache->makeKey( 'languageconverter', 'namespace-text', $index, $variant ); |
742 | return $cache->getWithSetCallback( |
743 | $key, |
744 | BagOStuff::TTL_MINUTE, |
745 | function () use ( $index, $variant ) { |
746 | return $this->computeNsVariantText( $index, $variant ); |
747 | } |
748 | ); |
749 | } |
750 | |
751 | /** |
752 | * @param int $index |
753 | * @param string|null $variant |
754 | * @return string |
755 | */ |
756 | private function computeNsVariantText( int $index, ?string $variant ): string { |
757 | $nsVariantText = false; |
758 | |
759 | // First check if a message gives a converted name in the target variant. |
760 | $nsConvMsg = wfMessage( 'conversion-ns' . $index )->inLanguage( $variant ); |
761 | if ( $nsConvMsg->exists() ) { |
762 | $nsVariantText = $nsConvMsg->plain(); |
763 | } |
764 | |
765 | // Then check if a message gives a converted name in content language |
766 | // which needs extra translation to the target variant. |
767 | if ( $nsVariantText === false ) { |
768 | $nsConvMsg = wfMessage( 'conversion-ns' . $index )->inContentLanguage(); |
769 | if ( $nsConvMsg->exists() ) { |
770 | $nsVariantText = $this->translate( $nsConvMsg->plain(), $variant ); |
771 | } |
772 | } |
773 | |
774 | if ( $nsVariantText === false ) { |
775 | // No message exists, retrieve it from the target variant's namespace names. |
776 | $mLangObj = MediaWikiServices::getInstance() |
777 | ->getLanguageFactory() |
778 | ->getLanguage( $variant ); |
779 | $nsVariantText = $mLangObj->getFormattedNsText( $index ); |
780 | } |
781 | return $nsVariantText; |
782 | } |
783 | |
784 | public function convert( $text ) { |
785 | $variant = $this->getPreferredVariant(); |
786 | return $this->convertTo( $text, $variant ); |
787 | } |
788 | |
789 | public function convertTo( $text, $variant, bool $clearState = true ) { |
790 | $languageConverterFactory = MediaWikiServices::getInstance()->getLanguageConverterFactory(); |
791 | if ( $languageConverterFactory->isConversionDisabled() ) { |
792 | return $text; |
793 | } |
794 | // Reset converter state for a new converter run. |
795 | if ( $clearState ) { |
796 | $this->mConvRuleTitle = false; |
797 | } |
798 | return $this->recursiveConvertTopLevel( $text, $variant ); |
799 | } |
800 | |
801 | /** |
802 | * Recursively convert text on the outside. Allow to use nested |
803 | * markups to custom rules. |
804 | * |
805 | * @param string $text Text to be converted |
806 | * @param string $variant The target variant code |
807 | * @param int $depth Depth of recursion |
808 | * @return string Converted text |
809 | */ |
810 | protected function recursiveConvertTopLevel( $text, $variant, $depth = 0 ) { |
811 | $startPos = 0; |
812 | $out = ''; |
813 | $length = strlen( $text ); |
814 | $shouldConvert = !$this->guessVariant( $text, $variant ); |
815 | $continue = true; |
816 | |
817 | $noScript = '<script.*?>.*?<\/script>(*SKIP)(*FAIL)'; |
818 | $noStyle = '<style.*?>.*?<\/style>(*SKIP)(*FAIL)'; |
819 | $noMath = '<math.*?>.*?<\/math>(*SKIP)(*FAIL)'; |
820 | $noSvg = '<svg.*?>.*?<\/svg>(*SKIP)(*FAIL)'; |
821 | // phpcs:ignore Generic.Files.LineLength |
822 | $noHtml = '<(?:[^>=]*+(?>[^>=]*+=\s*+(?:"[^"]*"|\'[^\']*\'|[^\'">\s]*+))*+[^>=]*+>|.*+)(*SKIP)(*FAIL)'; |
823 | while ( $startPos < $length && $continue ) { |
824 | $continue = preg_match( |
825 | // Only match "-{" outside the html. |
826 | "/$noScript|$noStyle|$noMath|$noSvg|$noHtml|-\{/", |
827 | $text, |
828 | $m, |
829 | PREG_OFFSET_CAPTURE, |
830 | $startPos |
831 | ); |
832 | |
833 | if ( !$continue ) { |
834 | // No more markup, append final segment |
835 | $fragment = substr( $text, $startPos ); |
836 | $out .= $shouldConvert ? $this->autoConvert( $fragment, $variant ) : $fragment; |
837 | return $out; |
838 | } |
839 | |
840 | // Offset of the match of the regex pattern. |
841 | $pos = $m[0][1]; |
842 | |
843 | // Append initial segment |
844 | $fragment = substr( $text, $startPos, $pos - $startPos ); |
845 | $out .= $shouldConvert ? $this->autoConvert( $fragment, $variant ) : $fragment; |
846 | // -{ marker found, not in attribute |
847 | // Advance position up to -{ marker. |
848 | $startPos = $pos; |
849 | // Do recursive conversion |
850 | // Note: This passes $startPos by reference, and advances it. |
851 | $out .= $this->recursiveConvertRule( $text, $variant, $startPos, $depth + 1 ); |
852 | } |
853 | return $out; |
854 | } |
855 | |
856 | /** |
857 | * Recursively convert text on the inside. |
858 | * |
859 | * @param string $text Text to be converted |
860 | * @param string $variant The target variant code |
861 | * @param int &$startPos |
862 | * @param int $depth Depth of recursion |
863 | * @return string Converted text |
864 | */ |
865 | protected function recursiveConvertRule( $text, $variant, &$startPos, $depth = 0 ) { |
866 | // Quick check (no function calls) |
867 | if ( $text[$startPos] !== '-' || $text[$startPos + 1] !== '{' ) { |
868 | throw new InvalidArgumentException( __METHOD__ . ': invalid input string' ); |
869 | } |
870 | |
871 | $startPos += 2; |
872 | $inner = ''; |
873 | $warningDone = false; |
874 | $length = strlen( $text ); |
875 | |
876 | while ( $startPos < $length ) { |
877 | $m = false; |
878 | preg_match( '/-\{|\}-/', $text, $m, PREG_OFFSET_CAPTURE, $startPos ); |
879 | if ( !$m ) { |
880 | // Unclosed rule |
881 | break; |
882 | } |
883 | |
884 | $token = $m[0][0]; |
885 | $pos = $m[0][1]; |
886 | |
887 | // Markup found |
888 | // Append initial segment |
889 | $inner .= substr( $text, $startPos, $pos - $startPos ); |
890 | |
891 | // Advance position |
892 | $startPos = $pos; |
893 | |
894 | switch ( $token ) { |
895 | case '-{': |
896 | // Check max depth |
897 | if ( $depth >= $this->mMaxDepth ) { |
898 | $inner .= '-{'; |
899 | if ( !$warningDone ) { |
900 | $inner .= '<span class="error">' . |
901 | wfMessage( 'language-converter-depth-warning' ) |
902 | ->numParams( $this->mMaxDepth )->inContentLanguage()->text() . |
903 | '</span>'; |
904 | $warningDone = true; |
905 | } |
906 | $startPos += 2; |
907 | break; |
908 | } |
909 | // Recursively parse another rule |
910 | $inner .= $this->recursiveConvertRule( $text, $variant, $startPos, $depth + 1 ); |
911 | break; |
912 | case '}-': |
913 | // Apply the rule |
914 | $startPos += 2; |
915 | $rule = new ConverterRule( $inner, $this ); |
916 | $rule->parse( $variant ); |
917 | $this->applyManualConv( $rule ); |
918 | return $rule->getDisplay(); |
919 | default: |
920 | throw new UnexpectedValueException( __METHOD__ . ': invalid regex match' ); |
921 | } |
922 | } |
923 | |
924 | // Unclosed rule |
925 | if ( $startPos < $length ) { |
926 | $inner .= substr( $text, $startPos ); |
927 | } |
928 | $startPos = $length; |
929 | return '-{' . $this->autoConvert( $inner, $variant ); |
930 | } |
931 | |
932 | public function findVariantLink( &$link, &$nt, $ignoreOtherCond = false ) { |
933 | # If the article has already existed, there is no need to |
934 | # check it again. Otherwise it may cause a fault. |
935 | if ( $nt instanceof LinkTarget ) { |
936 | $nt = Title::castFromLinkTarget( $nt ); |
937 | if ( $nt->exists() ) { |
938 | return; |
939 | } |
940 | } |
941 | |
942 | if ( $nt instanceof PageIdentity && $nt->exists() ) { |
943 | return; |
944 | } |
945 | |
946 | $request = RequestContext::getMain()->getRequest(); |
947 | |
948 | $isredir = $request->getText( 'redirect', 'yes' ); |
949 | $action = $request->getText( 'action' ); |
950 | if ( $action == 'edit' && $request->getBool( 'redlink' ) ) { |
951 | $action = 'view'; |
952 | } |
953 | $linkconvert = $request->getText( 'linkconvert', 'yes' ); |
954 | $disableLinkConversion = |
955 | MediaWikiServices::getInstance()->getLanguageConverterFactory() |
956 | ->isLinkConversionDisabled(); |
957 | $linkBatchFactory = MediaWikiServices::getInstance()->getLinkBatchFactory(); |
958 | $linkBatch = $linkBatchFactory->newLinkBatch(); |
959 | |
960 | $ns = NS_MAIN; |
961 | |
962 | if ( $disableLinkConversion || |
963 | ( !$ignoreOtherCond && |
964 | ( $isredir == 'no' |
965 | || $action == 'edit' |
966 | || $action == 'submit' |
967 | || $linkconvert == 'no' ) |
968 | ) |
969 | ) { |
970 | return; |
971 | } |
972 | |
973 | if ( is_object( $nt ) ) { |
974 | $ns = $nt->getNamespace(); |
975 | } |
976 | |
977 | $variants = $this->autoConvertToAllVariants( $link ); |
978 | if ( !$variants ) { // give up |
979 | return; |
980 | } |
981 | |
982 | $titles = []; |
983 | |
984 | foreach ( $variants as $v ) { |
985 | if ( $v != $link ) { |
986 | $varnt = Title::newFromText( $v, $ns ); |
987 | if ( $varnt !== null ) { |
988 | $linkBatch->addObj( $varnt ); |
989 | $titles[] = $varnt; |
990 | } |
991 | } |
992 | } |
993 | |
994 | // fetch all variants in single query |
995 | $linkBatch->execute(); |
996 | |
997 | foreach ( $titles as $varnt ) { |
998 | if ( $varnt->getArticleID() > 0 ) { |
999 | $nt = $varnt; |
1000 | $link = $varnt->getText(); |
1001 | break; |
1002 | } |
1003 | } |
1004 | } |
1005 | |
1006 | public function getExtraHashOptions() { |
1007 | $variant = $this->getPreferredVariant(); |
1008 | |
1009 | return '!' . $variant; |
1010 | } |
1011 | |
1012 | public function guessVariant( $text, $variant ) { |
1013 | return false; |
1014 | } |
1015 | |
1016 | /** |
1017 | * Load default conversion tables. |
1018 | * |
1019 | * @return array |
1020 | */ |
1021 | abstract protected function loadDefaultTables(): array; |
1022 | |
1023 | /** |
1024 | * Load conversion tables either from the cache or the disk. |
1025 | * @private |
1026 | * @param bool $fromCache Whether to load from cache. Defaults to true. |
1027 | */ |
1028 | protected function loadTables( $fromCache = true ) { |
1029 | $services = MediaWikiServices::getInstance(); |
1030 | $languageConverterCacheType = $services |
1031 | ->getMainConfig()->get( MainConfigNames::LanguageConverterCacheType ); |
1032 | |
1033 | if ( $this->mTablesLoaded ) { |
1034 | return; |
1035 | } |
1036 | |
1037 | $this->mTablesLoaded = true; |
1038 | $cache = $services->getObjectCacheFactory()->getInstance( $languageConverterCacheType ); |
1039 | $cacheKey = $cache->makeKey( |
1040 | 'conversiontables', $this->getMainCode(), |
1041 | md5( implode( ',', $this->getVariants() ) ), self::CACHE_VERSION_KEY |
1042 | ); |
1043 | if ( !$fromCache ) { |
1044 | $cache->delete( $cacheKey ); |
1045 | } |
1046 | $this->mTables = $cache->getWithSetCallback( $cacheKey, $cache::TTL_HOUR * 12, function () { |
1047 | // We will first load the default tables |
1048 | // then update them using things in MediaWiki:Conversiontable/* |
1049 | $tables = $this->loadDefaultTables(); |
1050 | foreach ( $this->getVariants() as $var ) { |
1051 | $cached = $this->parseCachedTable( $var ); |
1052 | $tables[$var]->mergeArray( $cached ); |
1053 | } |
1054 | |
1055 | $this->postLoadTables( $tables ); |
1056 | return $tables; |
1057 | } ); |
1058 | } |
1059 | |
1060 | /** |
1061 | * Hook for post-processing after conversion tables are loaded. |
1062 | * |
1063 | * @param ReplacementArray[] &$tables |
1064 | */ |
1065 | protected function postLoadTables( &$tables ) { |
1066 | } |
1067 | |
1068 | /** |
1069 | * Reload the conversion tables. |
1070 | * |
1071 | * Also used by test suites which need to reset the converter state. |
1072 | * |
1073 | * Called by ParserTestRunner with the help of TestingAccessWrapper |
1074 | */ |
1075 | private function reloadTables() { |
1076 | if ( $this->mTables ) { |
1077 | $this->mTables = []; |
1078 | } |
1079 | |
1080 | $this->mTablesLoaded = false; |
1081 | $this->loadTables( false ); |
1082 | } |
1083 | |
1084 | /** |
1085 | * Parse the conversion table stored in the cache. |
1086 | * |
1087 | * The tables should be in blocks of the following form: |
1088 | * -{ |
1089 | * word => word ; |
1090 | * word => word ; |
1091 | * ... |
1092 | * }- |
1093 | * |
1094 | * To make the tables more manageable, subpages are allowed |
1095 | * and will be parsed recursively if $recursive == true. |
1096 | * |
1097 | * @param string $code Language code |
1098 | * @param string $subpage Subpage name |
1099 | * @param bool $recursive Parse subpages recursively? Defaults to true. |
1100 | * |
1101 | * @return array |
1102 | */ |
1103 | private function parseCachedTable( $code, $subpage = '', $recursive = true ) { |
1104 | static $parsed = []; |
1105 | |
1106 | $key = 'Conversiontable/' . $code; |
1107 | if ( $subpage ) { |
1108 | $key .= '/' . $subpage; |
1109 | } |
1110 | if ( array_key_exists( $key, $parsed ) ) { |
1111 | return []; |
1112 | } |
1113 | |
1114 | $parsed[$key] = true; |
1115 | |
1116 | if ( $subpage === '' ) { |
1117 | $messageCache = MediaWikiServices::getInstance()->getMessageCache(); |
1118 | $txt = $messageCache->getMsgFromNamespace( $key, $code ); |
1119 | } else { |
1120 | $txt = false; |
1121 | $title = Title::makeTitleSafe( NS_MEDIAWIKI, $key ); |
1122 | if ( $title && $title->exists() ) { |
1123 | $revision = MediaWikiServices::getInstance() |
1124 | ->getRevisionLookup() |
1125 | ->getRevisionByTitle( $title ); |
1126 | if ( $revision ) { |
1127 | $model = $revision->getSlot( |
1128 | SlotRecord::MAIN, |
1129 | RevisionRecord::RAW |
1130 | )->getModel(); |
1131 | if ( $model == CONTENT_MODEL_WIKITEXT ) { |
1132 | // @phan-suppress-next-line PhanUndeclaredMethod |
1133 | $txt = $revision->getContent( |
1134 | SlotRecord::MAIN, |
1135 | RevisionRecord::RAW |
1136 | )->getText(); |
1137 | } |
1138 | |
1139 | // @todo in the future, use a specialized content model, perhaps based on json! |
1140 | } |
1141 | } |
1142 | } |
1143 | |
1144 | # Nothing to parse if there's no text |
1145 | if ( $txt === false || $txt === null || $txt === '' ) { |
1146 | return []; |
1147 | } |
1148 | |
1149 | // get all subpage links of the form |
1150 | // [[MediaWiki:Conversiontable/zh-xx/...|...]] |
1151 | $linkhead = $this->mLangObj->getNsText( NS_MEDIAWIKI ) . |
1152 | ':Conversiontable'; |
1153 | $subs = StringUtils::explode( '[[', $txt ); |
1154 | $sublinks = []; |
1155 | foreach ( $subs as $sub ) { |
1156 | $link = explode( ']]', $sub, 2 ); |
1157 | if ( count( $link ) != 2 ) { |
1158 | continue; |
1159 | } |
1160 | $b = explode( '|', $link[0], 2 ); |
1161 | $b = explode( '/', trim( $b[0] ), 3 ); |
1162 | if ( count( $b ) == 3 ) { |
1163 | $sublink = $b[2]; |
1164 | } else { |
1165 | $sublink = ''; |
1166 | } |
1167 | |
1168 | if ( $b[0] == $linkhead && $b[1] == $code ) { |
1169 | $sublinks[] = $sublink; |
1170 | } |
1171 | } |
1172 | |
1173 | // parse the mappings in this page |
1174 | $blocks = StringUtils::explode( '-{', $txt ); |
1175 | $ret = []; |
1176 | $first = true; |
1177 | foreach ( $blocks as $block ) { |
1178 | if ( $first ) { |
1179 | // Skip the part before the first -{ |
1180 | $first = false; |
1181 | continue; |
1182 | } |
1183 | $mappings = explode( '}-', $block, 2 )[0]; |
1184 | $stripped = str_replace( [ "'", '"', '*', '#' ], '', $mappings ); |
1185 | $table = StringUtils::explode( ';', $stripped ); |
1186 | foreach ( $table as $t ) { |
1187 | $m = explode( '=>', $t, 3 ); |
1188 | if ( count( $m ) != 2 ) { |
1189 | continue; |
1190 | } |
1191 | // trim any trailing comments starting with '//' |
1192 | $tt = explode( '//', $m[1], 2 ); |
1193 | $ret[trim( $m[0] )] = trim( $tt[0] ); |
1194 | } |
1195 | } |
1196 | |
1197 | // recursively parse the subpages |
1198 | if ( $recursive ) { |
1199 | foreach ( $sublinks as $link ) { |
1200 | $s = $this->parseCachedTable( $code, $link, $recursive ); |
1201 | $ret = $s + $ret; |
1202 | } |
1203 | } |
1204 | return $ret; |
1205 | } |
1206 | |
1207 | public function markNoConversion( $text, $noParse = false ) { |
1208 | # don't mark if already marked |
1209 | if ( str_contains( $text, '-{' ) || str_contains( $text, '}-' ) ) { |
1210 | return $text; |
1211 | } |
1212 | |
1213 | return "-{R|$text}-"; |
1214 | } |
1215 | |
1216 | public function convertCategoryKey( $key ) { |
1217 | return $key; |
1218 | } |
1219 | |
1220 | /** |
1221 | * @param PageIdentity $page Message page |
1222 | * |
1223 | * @return void |
1224 | */ |
1225 | public function updateConversionTable( PageIdentity $page ) { |
1226 | if ( $page->getNamespace() === NS_MEDIAWIKI ) { |
1227 | $t = explode( '/', $page->getDBkey(), 3 ); |
1228 | $c = count( $t ); |
1229 | if ( $c > 1 && $t[0] == 'Conversiontable' && $this->validateVariant( $t[1] ) ) { |
1230 | $this->reloadTables(); |
1231 | } |
1232 | } |
1233 | } |
1234 | |
1235 | /** |
1236 | * Get the cached separator pattern for ConverterRule::parseRules() |
1237 | * @return string |
1238 | */ |
1239 | public function getVarSeparatorPattern() { |
1240 | if ( $this->mVarSeparatorPattern === null ) { |
1241 | // varsep_pattern for preg_split: |
1242 | // The text should be split by ";" only if a valid variant |
1243 | // name exists after the markup. |
1244 | // For example |
1245 | // -{zh-hans:<span style="font-size:120%;">xxx</span>;zh-hant:\ |
1246 | // <span style="font-size:120%;">yyy</span>;}- |
1247 | // we should split it as: |
1248 | // [ |
1249 | // [0] => 'zh-hans:<span style="font-size:120%;">xxx</span>' |
1250 | // [1] => 'zh-hant:<span style="font-size:120%;">yyy</span>' |
1251 | // [2] => '' |
1252 | // ] |
1253 | $expandedVariants = []; |
1254 | foreach ( $this->getVariants() as $variant ) { |
1255 | $expandedVariants[ $variant ] = 1; |
1256 | // Accept standard BCP 47 names for variants as well. |
1257 | $expandedVariants[ LanguageCode::bcp47( $variant ) ] = 1; |
1258 | } |
1259 | // Accept old deprecated names for variants |
1260 | foreach ( LanguageCode::getDeprecatedCodeMapping() as $old => $new ) { |
1261 | if ( isset( $expandedVariants[ $new ] ) ) { |
1262 | $expandedVariants[ $old ] = 1; |
1263 | } |
1264 | } |
1265 | $expandedVariants = implode( '|', array_keys( $expandedVariants ) ); |
1266 | |
1267 | $pat = '/;\s*(?='; |
1268 | // zh-hans:xxx;zh-hant:yyy |
1269 | $pat .= '(?:' . $expandedVariants . ')\s*:'; |
1270 | // xxx=>zh-hans:yyy; xxx=>zh-hant:zzz |
1271 | $pat .= '|[^;]*?=>\s*(?:' . $expandedVariants . ')\s*:'; |
1272 | $pat .= '|\s*$)/'; |
1273 | $this->mVarSeparatorPattern = $pat; |
1274 | } |
1275 | return $this->mVarSeparatorPattern; |
1276 | } |
1277 | |
1278 | public function hasVariants() { |
1279 | return count( $this->getVariants() ) > 1; |
1280 | } |
1281 | |
1282 | public function hasVariant( $variant ) { |
1283 | return $variant && ( $variant === $this->validateVariant( $variant ) ); |
1284 | } |
1285 | |
1286 | public function convertHtml( $text ) { |
1287 | // @phan-suppress-next-line SecurityCheck-DoubleEscaped convert() is documented to return html |
1288 | return htmlspecialchars( $this->convert( $text ) ); |
1289 | } |
1290 | } |
1291 | |
1292 | /** @deprecated class alias since 1.43 */ |
1293 | class_alias( LanguageConverter::class, 'LanguageConverter' ); |