Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
54.78% |
275 / 502 |
|
31.11% |
14 / 45 |
CRAP | |
0.00% |
0 / 1 |
LanguageConverter | |
54.89% |
275 / 501 |
|
31.11% |
14 / 45 |
3188.25 | |
0.00% |
0 / 1 |
__construct | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
getMainCode | n/a |
0 / 0 |
n/a |
0 / 0 |
0 | |||||
getStaticDefaultVariant | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
getLanguageVariants | n/a |
0 / 0 |
n/a |
0 / 0 |
0 | |||||
getVariantsFallbacks | n/a |
0 / 0 |
n/a |
0 / 0 |
0 | |||||
getFlags | |
0.00% |
0 / 13 |
|
0.00% |
0 / 1 |
6 | |||
getAdditionalFlags | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
getManualLevel | |
0.00% |
0 / 7 |
|
0.00% |
0 / 1 |
12 | |||
getAdditionalManualLevel | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
getDescCodeSeparator | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
getDescVarSeparator | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
getVariantNames | |
0.00% |
0 / 3 |
|
0.00% |
0 / 1 |
2 | |||
getVariants | |
100.00% |
3 / 3 |
|
100.00% |
1 / 1 |
1 | |||
getVariantFallbacks | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
getConvRuleTitle | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
getPreferredVariant | |
100.00% |
14 / 14 |
|
100.00% |
1 / 1 |
6 | |||
getDefaultVariant | |
0.00% |
0 / 6 |
|
0.00% |
0 / 1 |
12 | |||
validateVariant | |
100.00% |
9 / 9 |
|
100.00% |
1 / 1 |
5 | |||
getURLVariant | |
87.50% |
7 / 8 |
|
0.00% |
0 / 1 |
3.02 | |||
getUserVariant | |
84.62% |
11 / 13 |
|
0.00% |
0 / 1 |
5.09 | |||
getHeaderVariant | |
64.00% |
16 / 25 |
|
0.00% |
0 / 1 |
16.65 | |||
autoConvert | |
71.43% |
55 / 77 |
|
0.00% |
0 / 1 |
27.42 | |||
translate | |
100.00% |
4 / 4 |
|
100.00% |
1 / 1 |
2 | |||
translateWithoutRomanNumbers | |
70.59% |
12 / 17 |
|
0.00% |
0 / 1 |
3.23 | |||
autoConvertToAllVariants | |
0.00% |
0 / 5 |
|
0.00% |
0 / 1 |
6 | |||
applyManualConv | |
35.71% |
5 / 14 |
|
0.00% |
0 / 1 |
20.02 | |||
convertSplitTitle | |
100.00% |
6 / 6 |
|
100.00% |
1 / 1 |
1 | |||
convertTitle | |
100.00% |
4 / 4 |
|
100.00% |
1 / 1 |
2 | |||
convertNamespace | |
100.00% |
12 / 12 |
|
100.00% |
1 / 1 |
2 | |||
computeNsVariantText | |
85.71% |
12 / 14 |
|
0.00% |
0 / 1 |
5.07 | |||
convert | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
2 | |||
convertTo | |
83.33% |
5 / 6 |
|
0.00% |
0 / 1 |
3.04 | |||
recursiveConvertTopLevel | |
100.00% |
28 / 28 |
|
100.00% |
1 / 1 |
6 | |||
recursiveConvertRule | |
51.28% |
20 / 39 |
|
0.00% |
0 / 1 |
24.99 | |||
findVariantLink | |
0.00% |
0 / 43 |
|
0.00% |
0 / 1 |
420 | |||
getExtraHashOptions | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
2 | |||
guessVariant | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
loadDefaultTables | n/a |
0 / 0 |
n/a |
0 / 0 |
0 | |||||
loadTables | |
95.24% |
20 / 21 |
|
0.00% |
0 / 1 |
4 | |||
postLoadTables | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
reloadTables | |
0.00% |
0 / 4 |
|
0.00% |
0 / 1 |
6 | |||
parseCachedTable | |
17.19% |
11 / 64 |
|
0.00% |
0 / 1 |
296.87 | |||
markNoConversion | |
0.00% |
0 / 3 |
|
0.00% |
0 / 1 |
12 | |||
convertCategoryKey | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
updateConversionTable | |
0.00% |
0 / 5 |
|
0.00% |
0 / 1 |
30 | |||
getVarSeparatorPattern | |
93.33% |
14 / 15 |
|
0.00% |
0 / 1 |
5.01 | |||
hasVariants | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
hasVariant | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
2 | |||
convertHtml | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 |
1 | <?php |
2 | /** |
3 | * This program is free software; you can redistribute it and/or modify |
4 | * it under the terms of the GNU General Public License as published by |
5 | * the Free Software Foundation; either version 2 of the License, or |
6 | * (at your option) any later version. |
7 | * |
8 | * This program is distributed in the hope that it will be useful, |
9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
11 | * GNU General Public License for more details. |
12 | * |
13 | * You should have received a copy of the GNU General Public License along |
14 | * with this program; if not, write to the Free Software Foundation, Inc., |
15 | * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. |
16 | * http://www.gnu.org/copyleft/gpl.html |
17 | * |
18 | * @file |
19 | * @author Zhengzhu Feng <zhengzhu@gmail.com> |
20 | * @author fdcn <fdcn64@gmail.com> |
21 | * @author shinjiman <shinjiman@gmail.com> |
22 | * @author PhiLiP <philip.npc@gmail.com> |
23 | */ |
24 | |
25 | namespace MediaWiki\Language; |
26 | |
27 | use InvalidArgumentException; |
28 | use MediaWiki\Context\RequestContext; |
29 | use MediaWiki\Debug\DeprecationHelper; |
30 | use MediaWiki\HookContainer\HookRunner; |
31 | use MediaWiki\Html\Html; |
32 | use MediaWiki\Linker\LinkTarget; |
33 | use MediaWiki\Logger\LoggerFactory; |
34 | use MediaWiki\MainConfigNames; |
35 | use MediaWiki\MediaWikiServices; |
36 | use MediaWiki\Page\PageIdentity; |
37 | use MediaWiki\Parser\Parser; |
38 | use MediaWiki\Parser\Sanitizer; |
39 | use MediaWiki\Revision\RevisionRecord; |
40 | use MediaWiki\Revision\SlotRecord; |
41 | use MediaWiki\StubObject\StubUserLang; |
42 | use MediaWiki\Title\Title; |
43 | use MediaWiki\User\User; |
44 | use RuntimeException; |
45 | use StringUtils; |
46 | use UnexpectedValueException; |
47 | use Wikimedia\ObjectCache\BagOStuff; |
48 | |
49 | /** |
50 | * Base class for multi-variant language conversion. |
51 | * |
52 | * @ingroup Language |
53 | */ |
54 | abstract class LanguageConverter implements ILanguageConverter { |
55 | use DeprecationHelper; |
56 | |
57 | /** |
58 | * languages supporting variants |
59 | * @since 1.20 |
60 | * @var string[] |
61 | * @phpcs-require-sorted-array |
62 | */ |
63 | public static $languagesWithVariants = [ |
64 | 'ban', |
65 | 'crh', |
66 | 'en', |
67 | 'gan', |
68 | 'iu', |
69 | 'ku', |
70 | 'mni', |
71 | 'sh', |
72 | 'shi', |
73 | 'sr', |
74 | 'tg', |
75 | 'tly', |
76 | 'uz', |
77 | 'wuu', |
78 | 'zgh', |
79 | 'zh', |
80 | ]; |
81 | |
82 | /** |
83 | * static default variant of languages supporting variants |
84 | * for use with DefaultOptionsLookup.php |
85 | * @since 1.40 |
86 | * @var array<string,string> |
87 | * @phpcs-require-sorted-array |
88 | */ |
89 | public static $languagesWithStaticDefaultVariant = [ |
90 | 'ban' => 'ban', |
91 | 'crh' => 'crh', |
92 | 'en' => 'en', |
93 | 'gan' => 'gan', |
94 | 'iu' => 'iu', |
95 | 'ku' => 'ku', |
96 | 'mni' => 'mni', |
97 | 'sh' => 'sh-latn', |
98 | 'shi' => 'shi', |
99 | 'sr' => 'sr', |
100 | 'tg' => 'tg', |
101 | 'tly' => 'tly', |
102 | 'uz' => 'uz', |
103 | 'wuu' => 'wuu', |
104 | 'zgh' => 'zgh', |
105 | 'zh' => 'zh', |
106 | ]; |
107 | |
108 | /** @var bool */ |
109 | private $mTablesLoaded = false; |
110 | /** @var ReplacementArray[] */ |
111 | protected $mTables = []; |
112 | /** @var Language|StubUserLang */ |
113 | private $mLangObj; |
114 | /** @var string|false */ |
115 | private $mConvRuleTitle = false; |
116 | /** @var string|null */ |
117 | private $mURLVariant; |
118 | /** @var string|null */ |
119 | private $mUserVariant; |
120 | /** @var string|null */ |
121 | private $mHeaderVariant; |
122 | /** @var int */ |
123 | private $mMaxDepth = 10; |
124 | /** @var string|null */ |
125 | private $mVarSeparatorPattern; |
126 | |
127 | private const CACHE_VERSION_KEY = 'VERSION 7'; |
128 | |
129 | /** |
130 | * @param Language|StubUserLang $langobj |
131 | */ |
132 | public function __construct( $langobj ) { |
133 | $this->mLangObj = $langobj; |
134 | } |
135 | |
136 | /** |
137 | * Get the language code with converter (the "main" language code). |
138 | * Page language code would be the same of the language code with converter. |
139 | * Note that this code might not be included as one of the variant languages. |
140 | * @since 1.36 |
141 | * |
142 | * @return string |
143 | */ |
144 | abstract public function getMainCode(): string; |
145 | |
146 | /** |
147 | * Get static default variant. |
148 | * For use of specify the default variant form when it different from the |
149 | * default "unconverted/mixed-variant form". |
150 | * @since 1.40 |
151 | * |
152 | * @return string |
153 | */ |
154 | protected function getStaticDefaultVariant(): string { |
155 | $code = $this->getMainCode(); |
156 | return self::$languagesWithStaticDefaultVariant[$code] ?? $code; |
157 | } |
158 | |
159 | /** |
160 | * Get supported variants of the language. |
161 | * @since 1.36 |
162 | * |
163 | * @return array |
164 | */ |
165 | abstract protected function getLanguageVariants(): array; |
166 | |
167 | /** |
168 | * Get language variants fallbacks. |
169 | * @since 1.36 |
170 | * |
171 | * @return array |
172 | */ |
173 | abstract public function getVariantsFallbacks(): array; |
174 | |
175 | /** |
176 | * Get the strings that map to the flags. |
177 | * @since 1.36 |
178 | * |
179 | * @return array |
180 | */ |
181 | final public function getFlags(): array { |
182 | $defaultflags = [ |
183 | // 'S' show the converted text |
184 | // '+' add rules for alltext |
185 | // 'E' the flags have an error |
186 | // these flags above are reserved for program |
187 | 'A' => 'A', // add rule for convert code (all text converted) |
188 | 'T' => 'T', // title convert |
189 | 'R' => 'R', // raw content |
190 | 'D' => 'D', // convert description (subclass implement) |
191 | '-' => '-', // remove convert (not implement) |
192 | 'H' => 'H', // add rule for convert code (but no display in placed code) |
193 | 'N' => 'N', // current variant name |
194 | ]; |
195 | $flags = array_merge( $defaultflags, $this->getAdditionalFlags() ); |
196 | foreach ( $this->getVariants() as $v ) { |
197 | $flags[$v] = $v; |
198 | } |
199 | return $flags; |
200 | } |
201 | |
202 | /** |
203 | * Provides additional flags for converter. By default, it returns empty array and |
204 | * typically should be overridden by implementation of converter. |
205 | */ |
206 | protected function getAdditionalFlags(): array { |
207 | return []; |
208 | } |
209 | |
210 | /** |
211 | * Get manual level limit for supported variants. |
212 | * @since 1.36 |
213 | * |
214 | * @return array |
215 | */ |
216 | final public function getManualLevel() { |
217 | $manualLevel = $this->getAdditionalManualLevel(); |
218 | $result = []; |
219 | foreach ( $this->getVariants() as $v ) { |
220 | if ( array_key_exists( $v, $manualLevel ) ) { |
221 | $result[$v] = $manualLevel[$v]; |
222 | } else { |
223 | $result[$v] = 'bidirectional'; |
224 | } |
225 | } |
226 | return $result; |
227 | } |
228 | |
229 | /** |
230 | * Provides additional flags for converter. By default, this function returns an empty array and |
231 | * typically should be overridden by the implementation of converter. |
232 | * @since 1.36 |
233 | * |
234 | * @return array |
235 | */ |
236 | protected function getAdditionalManualLevel(): array { |
237 | return []; |
238 | } |
239 | |
240 | /** |
241 | * Get desc code separator. By default returns ":", can be overridden by |
242 | * implementation of converter. |
243 | * @since 1.36 |
244 | * |
245 | * @return string |
246 | */ |
247 | public function getDescCodeSeparator(): string { |
248 | return ':'; |
249 | } |
250 | |
251 | /** |
252 | * Get desc var separator. By default returns ";", can be overridden by |
253 | * implementation of converter. |
254 | * @since 1.36 |
255 | * |
256 | * @return string |
257 | */ |
258 | public function getDescVarSeparator(): string { |
259 | return ';'; |
260 | } |
261 | |
262 | public function getVariantNames(): array { |
263 | return MediaWikiServices::getInstance() |
264 | ->getLanguageNameUtils() |
265 | ->getLanguageNames(); |
266 | } |
267 | |
268 | final public function getVariants() { |
269 | $disabledVariants = MediaWikiServices::getInstance()->getMainConfig()->get( |
270 | MainConfigNames::DisabledVariants ); |
271 | return array_diff( $this->getLanguageVariants(), $disabledVariants ); |
272 | } |
273 | |
274 | public function getVariantFallbacks( $variant ) { |
275 | return $this->getVariantsFallbacks()[$variant] ?? $this->getStaticDefaultVariant(); |
276 | } |
277 | |
278 | public function getConvRuleTitle() { |
279 | return $this->mConvRuleTitle; |
280 | } |
281 | |
282 | public function getPreferredVariant() { |
283 | $req = $this->getURLVariant(); |
284 | |
285 | $services = MediaWikiServices::getInstance(); |
286 | ( new HookRunner( $services->getHookContainer() ) )->onGetLangPreferredVariant( $req ); |
287 | |
288 | if ( !$req ) { |
289 | $user = RequestContext::getMain()->getUser(); |
290 | // NOTE: For some calls there may not be a context user or session that is safe |
291 | // to use, see (T235360) |
292 | // Use case: During user autocreation, UserNameUtils::isUsable is called which uses interface |
293 | // messages for reserved usernames. |
294 | if ( $user->isSafeToLoad() && $user->isRegistered() ) { |
295 | $req = $this->getUserVariant( $user ); |
296 | } else { |
297 | $req = $this->getHeaderVariant(); |
298 | } |
299 | } |
300 | |
301 | $defaultLanguageVariant = $services->getMainConfig() |
302 | ->get( MainConfigNames::DefaultLanguageVariant ); |
303 | if ( !$req && $defaultLanguageVariant ) { |
304 | $req = $this->validateVariant( $defaultLanguageVariant ); |
305 | } |
306 | |
307 | $req = $this->validateVariant( $req ); |
308 | |
309 | // This function, unlike the other get*Variant functions, is |
310 | // not memoized (i.e., there return value is not cached) since |
311 | // new information might appear during processing after this |
312 | // is first called. |
313 | return $req ?? $this->getStaticDefaultVariant(); |
314 | } |
315 | |
316 | public function getDefaultVariant() { |
317 | $defaultLanguageVariant = MediaWikiServices::getInstance()->getMainConfig()->get( |
318 | MainConfigNames::DefaultLanguageVariant ); |
319 | |
320 | $req = $this->getURLVariant() ?? $this->getHeaderVariant(); |
321 | |
322 | if ( !$req && $defaultLanguageVariant ) { |
323 | $req = $this->validateVariant( $defaultLanguageVariant ); |
324 | } |
325 | |
326 | return $req ?? $this->getStaticDefaultVariant(); |
327 | } |
328 | |
329 | public function validateVariant( $variant = null ) { |
330 | if ( $variant === null ) { |
331 | return null; |
332 | } |
333 | // Our internal variants are always lower-case; the variant we |
334 | // are validating may have mixed cases. |
335 | $variant = LanguageCode::replaceDeprecatedCodes( strtolower( $variant ) ); |
336 | if ( in_array( $variant, $this->getVariants() ) ) { |
337 | return $variant; |
338 | } |
339 | // Browsers are supposed to use BCP 47 standard in the |
340 | // Accept-Language header, but not all of our internal |
341 | // mediawiki variant codes are BCP 47. Map BCP 47 code |
342 | // to our internal code. |
343 | foreach ( $this->getVariants() as $v ) { |
344 | // Case-insensitive match (BCP 47 is mixed-case) |
345 | if ( strtolower( LanguageCode::bcp47( $v ) ) === $variant ) { |
346 | return $v; |
347 | } |
348 | } |
349 | return null; |
350 | } |
351 | |
352 | public function getURLVariant() { |
353 | if ( $this->mURLVariant ) { |
354 | return $this->mURLVariant; |
355 | } |
356 | |
357 | $request = RequestContext::getMain()->getRequest(); |
358 | // see if the preference is set in the request |
359 | $ret = $request->getText( 'variant' ); |
360 | |
361 | if ( !$ret ) { |
362 | $ret = $request->getVal( 'uselang' ); |
363 | } |
364 | |
365 | $this->mURLVariant = $this->validateVariant( $ret ); |
366 | return $this->mURLVariant; |
367 | } |
368 | |
369 | /** |
370 | * Determine if the user has a variant set. |
371 | * |
372 | * @param User $user |
373 | * @return string|null Variant if one found, null otherwise |
374 | */ |
375 | protected function getUserVariant( User $user ) { |
376 | // This should only be called within the class after the user is known to be |
377 | // safe to load and logged in, but check just in case. |
378 | if ( !$user->isSafeToLoad() ) { |
379 | return null; |
380 | } |
381 | |
382 | if ( !$this->mUserVariant ) { |
383 | $services = MediaWikiServices::getInstance(); |
384 | if ( $user->isRegistered() ) { |
385 | // Get language variant preference from logged in users |
386 | if ( |
387 | $this->getMainCode() === |
388 | $services->getContentLanguageCode()->toString() |
389 | ) { |
390 | $optionName = 'variant'; |
391 | } else { |
392 | $optionName = 'variant-' . $this->getMainCode(); |
393 | } |
394 | } else { |
395 | // figure out user lang without constructing wgLang to avoid |
396 | // infinite recursion |
397 | $optionName = 'language'; |
398 | } |
399 | $ret = $services->getUserOptionsLookup()->getOption( $user, $optionName ); |
400 | |
401 | $this->mUserVariant = $this->validateVariant( $ret ); |
402 | } |
403 | |
404 | return $this->mUserVariant; |
405 | } |
406 | |
407 | /** |
408 | * Determine the language variant from the Accept-Language header. |
409 | * |
410 | * @return string|null Variant if one found, null otherwise |
411 | */ |
412 | protected function getHeaderVariant() { |
413 | if ( $this->mHeaderVariant ) { |
414 | return $this->mHeaderVariant; |
415 | } |
416 | |
417 | $request = RequestContext::getMain()->getRequest(); |
418 | // See if some supported language variant is set in the |
419 | // HTTP header. |
420 | $languages = array_keys( $request->getAcceptLang() ); |
421 | if ( !$languages ) { |
422 | return null; |
423 | } |
424 | |
425 | $fallbackLanguages = []; |
426 | foreach ( $languages as $language ) { |
427 | $this->mHeaderVariant = $this->validateVariant( $language ); |
428 | if ( $this->mHeaderVariant ) { |
429 | break; |
430 | } |
431 | |
432 | // To see if there are fallbacks of current language. |
433 | // We record these fallback variants, and process |
434 | // them later. |
435 | $fallbacks = $this->getVariantFallbacks( $language ); |
436 | if ( |
437 | is_string( $fallbacks ) && |
438 | $fallbacks !== $this->getStaticDefaultVariant() |
439 | ) { |
440 | $fallbackLanguages[] = $fallbacks; |
441 | } elseif ( is_array( $fallbacks ) ) { |
442 | $fallbackLanguages = |
443 | array_merge( $fallbackLanguages, $fallbacks ); |
444 | } |
445 | } |
446 | |
447 | if ( !$this->mHeaderVariant ) { |
448 | // process fallback languages now |
449 | $fallback_languages = array_unique( $fallbackLanguages ); |
450 | foreach ( $fallback_languages as $language ) { |
451 | $this->mHeaderVariant = $this->validateVariant( $language ); |
452 | if ( $this->mHeaderVariant ) { |
453 | break; |
454 | } |
455 | } |
456 | } |
457 | |
458 | return $this->mHeaderVariant; |
459 | } |
460 | |
461 | public function autoConvert( $text, $toVariant = false ) { |
462 | $this->loadTables(); |
463 | |
464 | if ( !$toVariant ) { |
465 | $toVariant = $this->getPreferredVariant(); |
466 | if ( !$toVariant ) { |
467 | return $text; |
468 | } |
469 | } |
470 | |
471 | if ( $this->guessVariant( $text, $toVariant ) ) { |
472 | return $text; |
473 | } |
474 | /** |
475 | * We convert everything except: |
476 | * 1. HTML markups (anything between < and >) |
477 | * 2. HTML entities |
478 | * 3. placeholders created by the parser |
479 | * IMPORTANT: Beware of failure from pcre.backtrack_limit (T124404). |
480 | * Minimize the use of backtracking where possible. |
481 | */ |
482 | static $reg; |
483 | if ( $reg === null ) { |
484 | $marker = '|' . Parser::MARKER_PREFIX . '[^\x7f]++\x7f'; |
485 | |
486 | // this one is needed when the text is inside an HTML markup |
487 | $htmlfix = '|<[^>\004]++(?=\004$)|^[^<>]*+>'; |
488 | |
489 | // Optimize for the common case where these tags have |
490 | // few or no children. Thus try and possessively get as much as |
491 | // possible, and only engage in backtracking when we hit a '<'. |
492 | |
493 | // disable convert to variants between <code> tags |
494 | $codefix = '<code>[^<]*+(?:(?:(?!<\/code>).)[^<]*+)*+<\/code>|'; |
495 | // disable conversion of <script> tags |
496 | $scriptfix = '<script[^>]*+>[^<]*+(?:(?:(?!<\/script>).)[^<]*+)*+<\/script>|'; |
497 | // disable conversion of <pre> tags |
498 | $prefix = '<pre[^>]*+>[^<]*+(?:(?:(?!<\/pre>).)[^<]*+)*+<\/pre>|'; |
499 | // disable conversion of <math> tags |
500 | $mathfix = '<math[^>]*+>[^<]*+(?:(?:(?!<\/math>).)[^<]*+)*+<\/math>|'; |
501 | // disable conversion of <svg> tags |
502 | $svgfix = '<svg[^>]*+>[^<]*+(?:(?:(?!<\/svg>).)[^<]*+)*+<\/svg>|'; |
503 | // The "|.*+)" at the end, is in case we missed some part of html syntax, |
504 | // we will fail securely (hopefully) by matching the rest of the string. |
505 | $htmlFullTag = '<(?:[^>=]*+(?>[^>=]*+=\s*+(?:"[^"]*"|\'[^\']*\'|[^\'">\s]*+))*+[^>=]*+>|.*+)|'; |
506 | |
507 | $reg = '/' . $codefix . $scriptfix . $prefix . $mathfix . $svgfix . |
508 | $htmlFullTag . |
509 | '&[a-zA-Z#][a-z0-9]++;' . $marker . $htmlfix . '|\004$/s'; |
510 | } |
511 | $startPos = 0; |
512 | $sourceBlob = ''; |
513 | $literalBlob = ''; |
514 | |
515 | // Guard against delimiter nulls in the input |
516 | // (should never happen: see T159174) |
517 | $text = str_replace( "\000", '', $text ); |
518 | $text = str_replace( "\004", '', $text ); |
519 | |
520 | $markupMatches = null; |
521 | $elementMatches = null; |
522 | |
523 | // We add a marker (\004) at the end of text, to ensure we always match the |
524 | // entire text (Otherwise, pcre.backtrack_limit might cause silent failure) |
525 | $textWithMarker = $text . "\004"; |
526 | while ( $startPos < strlen( $text ) ) { |
527 | if ( preg_match( $reg, $textWithMarker, $markupMatches, PREG_OFFSET_CAPTURE, $startPos ) ) { |
528 | $elementPos = $markupMatches[0][1]; |
529 | $element = $markupMatches[0][0]; |
530 | if ( $element === "\004" ) { |
531 | // We hit the end. |
532 | $elementPos = strlen( $text ); |
533 | $element = ''; |
534 | } elseif ( substr( $element, -1 ) === "\004" ) { |
535 | // This can sometimes happen if we have |
536 | // unclosed html tags. For example, |
537 | // when converting a title attribute |
538 | // during a recursive call that contains |
539 | // a < e.g. <div title="<">. |
540 | $element = substr( $element, 0, -1 ); |
541 | } |
542 | } else { |
543 | // If we hit here, then Language Converter could be tricked |
544 | // into doing an XSS, so we refuse to translate. |
545 | // If expected input manages to reach this code path, |
546 | // we should consider it a bug. |
547 | $log = LoggerFactory::getInstance( 'languageconverter' ); |
548 | $log->error( "Hit pcre.backtrack_limit in " . __METHOD__ |
549 | . ". Disabling language conversion for this page.", |
550 | [ |
551 | "method" => __METHOD__, |
552 | "variant" => $toVariant, |
553 | "startOfText" => substr( $text, 0, 500 ) |
554 | ] |
555 | ); |
556 | return $text; |
557 | } |
558 | // Queue the part before the markup for translation in a batch |
559 | $sourceBlob .= substr( $text, $startPos, $elementPos - $startPos ) . "\000"; |
560 | |
561 | // Advance to the next position |
562 | $startPos = $elementPos + strlen( $element ); |
563 | |
564 | // Translate any alt or title attributes inside the matched element |
565 | if ( $element !== '' |
566 | && preg_match( '/^(<[^>\s]*+)\s([^>]*+)(.*+)$/', $element, $elementMatches ) |
567 | ) { |
568 | // FIXME, this decodes entities, so if you have something |
569 | // like <div title="foo<bar"> the bar won't get |
570 | // translated since after entity decoding it looks like |
571 | // unclosed html and we call this method recursively |
572 | // on attributes. |
573 | $attrs = Sanitizer::decodeTagAttributes( $elementMatches[2] ); |
574 | // Ensure self-closing tags stay self-closing. |
575 | $close = substr( $elementMatches[2], -1 ) === '/' ? ' /' : ''; |
576 | $changed = false; |
577 | foreach ( [ 'title', 'alt' ] as $attrName ) { |
578 | if ( !isset( $attrs[$attrName] ) ) { |
579 | continue; |
580 | } |
581 | $attr = $attrs[$attrName]; |
582 | // Don't convert URLs |
583 | if ( !str_contains( $attr, '://' ) ) { |
584 | $attr = $this->recursiveConvertTopLevel( $attr, $toVariant ); |
585 | } |
586 | |
587 | if ( $attr !== $attrs[$attrName] ) { |
588 | $attrs[$attrName] = $attr; |
589 | $changed = true; |
590 | } |
591 | } |
592 | if ( $changed ) { |
593 | // @phan-suppress-next-line SecurityCheck-DoubleEscaped Explained above with decodeTagAttributes |
594 | $element = $elementMatches[1] . Html::expandAttributes( $attrs ) . |
595 | $close . $elementMatches[3]; |
596 | } |
597 | } |
598 | $literalBlob .= $element . "\000"; |
599 | } |
600 | |
601 | // Do the main translation batch |
602 | $translatedBlob = $this->translate( $sourceBlob, $toVariant ); |
603 | |
604 | // Put the output back together |
605 | $translatedIter = StringUtils::explode( "\000", $translatedBlob ); |
606 | $literalIter = StringUtils::explode( "\000", $literalBlob ); |
607 | $output = ''; |
608 | while ( $translatedIter->valid() && $literalIter->valid() ) { |
609 | $output .= $translatedIter->current(); |
610 | $output .= $literalIter->current(); |
611 | $translatedIter->next(); |
612 | $literalIter->next(); |
613 | } |
614 | |
615 | return $output; |
616 | } |
617 | |
618 | public function translate( $text, $variant ) { |
619 | // If $text is empty or only includes spaces, do nothing |
620 | // Otherwise translate it |
621 | if ( trim( $text ) ) { |
622 | $this->loadTables(); |
623 | $text = $this->mTables[$variant]->replace( $text ); |
624 | } |
625 | return $text; |
626 | } |
627 | |
628 | /** |
629 | * @param string $text Text to convert |
630 | * @param string $variant Variant language code |
631 | * @return string Translated text |
632 | */ |
633 | protected function translateWithoutRomanNumbers( $text, $variant ) { |
634 | $breaks = '[^\w\x80-\xff]'; |
635 | |
636 | // regexp for roman numbers |
637 | // Lookahead assertion ensures $roman doesn't match the empty string |
638 | $roman = '(?=[MDCLXVI])M{0,4}(C[DM]|D?C{0,3})(X[LC]|L?X{0,3})(I[VX]|V?I{0,3})'; |
639 | |
640 | $reg = '/^' . $roman . '$|^' . $roman . $breaks . '|' . $breaks |
641 | . $roman . '$|' . $breaks . $roman . $breaks . '/'; |
642 | |
643 | $matches = preg_split( $reg, $text, -1, PREG_SPLIT_OFFSET_CAPTURE ); |
644 | |
645 | $m = array_shift( $matches ); |
646 | $this->loadTables(); |
647 | if ( !isset( $this->mTables[$variant] ) ) { |
648 | throw new RuntimeException( "Broken variant table: " |
649 | . implode( ',', array_keys( $this->mTables ) ) ); |
650 | } |
651 | $ret = $this->mTables[$variant]->replace( $m[0] ); |
652 | $mstart = (int)$m[1] + strlen( $m[0] ); |
653 | foreach ( $matches as $m ) { |
654 | $ret .= substr( $text, $mstart, (int)$m[1] - $mstart ); |
655 | $ret .= $this->translate( $m[0], $variant ); |
656 | $mstart = (int)$m[1] + strlen( $m[0] ); |
657 | } |
658 | |
659 | return $ret; |
660 | } |
661 | |
662 | public function autoConvertToAllVariants( $text ) { |
663 | $this->loadTables(); |
664 | |
665 | $ret = []; |
666 | foreach ( $this->getVariants() as $variant ) { |
667 | $ret[$variant] = $this->translate( $text, $variant ); |
668 | } |
669 | |
670 | return $ret; |
671 | } |
672 | |
673 | /** |
674 | * Apply manual conversion rules. |
675 | */ |
676 | protected function applyManualConv( ConverterRule $convRule ) { |
677 | // Use syntax -{T|zh-cn:TitleCN; zh-tw:TitleTw}- to custom |
678 | // title conversion. |
679 | // T26072: $mConvRuleTitle was overwritten by other manual |
680 | // rule(s) not for title, this breaks the title conversion. |
681 | $newConvRuleTitle = $convRule->getTitle(); |
682 | if ( $newConvRuleTitle !== false ) { |
683 | // So I add an empty check for getTitle() |
684 | $this->mConvRuleTitle = $newConvRuleTitle; |
685 | } |
686 | |
687 | // merge/remove manual conversion rules to/from global table |
688 | $convTable = $convRule->getConvTable(); |
689 | $action = $convRule->getRulesAction(); |
690 | foreach ( $convTable as $variant => $pair ) { |
691 | $v = $this->validateVariant( $variant ); |
692 | if ( !$v ) { |
693 | continue; |
694 | } |
695 | |
696 | if ( $action == 'add' ) { |
697 | // More efficient than array_merge(), about 2.5 times. |
698 | foreach ( $pair as $from => $to ) { |
699 | $this->mTables[$v]->setPair( $from, $to ); |
700 | } |
701 | } elseif ( $action == 'remove' ) { |
702 | $this->mTables[$v]->removeArray( $pair ); |
703 | } |
704 | } |
705 | } |
706 | |
707 | public function convertSplitTitle( $title ) { |
708 | $variant = $this->getPreferredVariant(); |
709 | |
710 | $index = $title->getNamespace(); |
711 | $nsText = $this->convertNamespace( $index, $variant ); |
712 | |
713 | $name = str_replace( '_', ' ', $title->getDBKey() ); |
714 | $mainText = $this->translate( $name, $variant ); |
715 | |
716 | return [ $nsText, ':', $mainText ]; |
717 | } |
718 | |
719 | public function convertTitle( $title ) { |
720 | [ $nsText, $nsSeparator, $mainText ] = $this->convertSplitTitle( $title ); |
721 | return $nsText !== '' ? |
722 | $nsText . $nsSeparator . $mainText : |
723 | $mainText; |
724 | } |
725 | |
726 | public function convertNamespace( $index, $variant = null ) { |
727 | if ( $index === NS_MAIN ) { |
728 | return ''; |
729 | } |
730 | |
731 | $variant ??= $this->getPreferredVariant(); |
732 | |
733 | $cache = MediaWikiServices::getInstance()->getLocalServerObjectCache(); |
734 | $key = $cache->makeKey( 'languageconverter', 'namespace-text', $index, $variant ); |
735 | return $cache->getWithSetCallback( |
736 | $key, |
737 | BagOStuff::TTL_MINUTE, |
738 | function () use ( $index, $variant ) { |
739 | return $this->computeNsVariantText( $index, $variant ); |
740 | } |
741 | ); |
742 | } |
743 | |
744 | /** |
745 | * @param int $index |
746 | * @param string|null $variant |
747 | * @return string |
748 | */ |
749 | private function computeNsVariantText( int $index, ?string $variant ): string { |
750 | $nsVariantText = false; |
751 | |
752 | // First check if a message gives a converted name in the target variant. |
753 | $nsConvMsg = wfMessage( 'conversion-ns' . $index )->inLanguage( $variant ); |
754 | if ( $nsConvMsg->exists() ) { |
755 | $nsVariantText = $nsConvMsg->plain(); |
756 | } |
757 | |
758 | // Then check if a message gives a converted name in content language |
759 | // which needs extra translation to the target variant. |
760 | if ( $nsVariantText === false ) { |
761 | $nsConvMsg = wfMessage( 'conversion-ns' . $index )->inContentLanguage(); |
762 | if ( $nsConvMsg->exists() ) { |
763 | $nsVariantText = $this->translate( $nsConvMsg->plain(), $variant ); |
764 | } |
765 | } |
766 | |
767 | if ( $nsVariantText === false ) { |
768 | // No message exists, retrieve it from the target variant's namespace names. |
769 | $mLangObj = MediaWikiServices::getInstance() |
770 | ->getLanguageFactory() |
771 | ->getLanguage( $variant ); |
772 | $nsVariantText = $mLangObj->getFormattedNsText( $index ); |
773 | } |
774 | return $nsVariantText; |
775 | } |
776 | |
777 | public function convert( $text ) { |
778 | $variant = $this->getPreferredVariant(); |
779 | return $this->convertTo( $text, $variant ); |
780 | } |
781 | |
782 | public function convertTo( $text, $variant, bool $clearState = true ) { |
783 | $languageConverterFactory = MediaWikiServices::getInstance()->getLanguageConverterFactory(); |
784 | if ( $languageConverterFactory->isConversionDisabled() ) { |
785 | return $text; |
786 | } |
787 | // Reset converter state for a new converter run. |
788 | if ( $clearState ) { |
789 | $this->mConvRuleTitle = false; |
790 | } |
791 | return $this->recursiveConvertTopLevel( $text, $variant ); |
792 | } |
793 | |
794 | /** |
795 | * Recursively convert text on the outside. Allow to use nested |
796 | * markups to custom rules. |
797 | * |
798 | * @param string $text Text to be converted |
799 | * @param string $variant The target variant code |
800 | * @param int $depth Depth of recursion |
801 | * @return string Converted text |
802 | */ |
803 | protected function recursiveConvertTopLevel( $text, $variant, $depth = 0 ) { |
804 | $startPos = 0; |
805 | $out = ''; |
806 | $length = strlen( $text ); |
807 | $shouldConvert = !$this->guessVariant( $text, $variant ); |
808 | $continue = true; |
809 | |
810 | $noScript = '<script.*?>.*?<\/script>(*SKIP)(*FAIL)'; |
811 | $noStyle = '<style.*?>.*?<\/style>(*SKIP)(*FAIL)'; |
812 | $noMath = '<math.*?>.*?<\/math>(*SKIP)(*FAIL)'; |
813 | $noSvg = '<svg.*?>.*?<\/svg>(*SKIP)(*FAIL)'; |
814 | // phpcs:ignore Generic.Files.LineLength |
815 | $noHtml = '<(?:[^>=]*+(?>[^>=]*+=\s*+(?:"[^"]*"|\'[^\']*\'|[^\'">\s]*+))*+[^>=]*+>|.*+)(*SKIP)(*FAIL)'; |
816 | while ( $startPos < $length && $continue ) { |
817 | $continue = preg_match( |
818 | // Only match "-{" outside the html. |
819 | "/$noScript|$noStyle|$noMath|$noSvg|$noHtml|-\{/", |
820 | $text, |
821 | $m, |
822 | PREG_OFFSET_CAPTURE, |
823 | $startPos |
824 | ); |
825 | |
826 | if ( !$continue ) { |
827 | // No more markup, append final segment |
828 | $fragment = substr( $text, $startPos ); |
829 | $out .= $shouldConvert ? $this->autoConvert( $fragment, $variant ) : $fragment; |
830 | return $out; |
831 | } |
832 | |
833 | // Offset of the match of the regex pattern. |
834 | $pos = $m[0][1]; |
835 | |
836 | // Append initial segment |
837 | $fragment = substr( $text, $startPos, $pos - $startPos ); |
838 | $out .= $shouldConvert ? $this->autoConvert( $fragment, $variant ) : $fragment; |
839 | // -{ marker found, not in attribute |
840 | // Advance position up to -{ marker. |
841 | $startPos = $pos; |
842 | // Do recursive conversion |
843 | // Note: This passes $startPos by reference, and advances it. |
844 | $out .= $this->recursiveConvertRule( $text, $variant, $startPos, $depth + 1 ); |
845 | } |
846 | return $out; |
847 | } |
848 | |
849 | /** |
850 | * Recursively convert text on the inside. |
851 | * |
852 | * @param string $text Text to be converted |
853 | * @param string $variant The target variant code |
854 | * @param int &$startPos |
855 | * @param int $depth Depth of recursion |
856 | * @return string Converted text |
857 | */ |
858 | protected function recursiveConvertRule( $text, $variant, &$startPos, $depth = 0 ) { |
859 | // Quick check (no function calls) |
860 | if ( $text[$startPos] !== '-' || $text[$startPos + 1] !== '{' ) { |
861 | throw new InvalidArgumentException( __METHOD__ . ': invalid input string' ); |
862 | } |
863 | |
864 | $startPos += 2; |
865 | $inner = ''; |
866 | $warningDone = false; |
867 | $length = strlen( $text ); |
868 | |
869 | while ( $startPos < $length ) { |
870 | $m = false; |
871 | preg_match( '/-\{|\}-/', $text, $m, PREG_OFFSET_CAPTURE, $startPos ); |
872 | if ( !$m ) { |
873 | // Unclosed rule |
874 | break; |
875 | } |
876 | |
877 | $token = $m[0][0]; |
878 | $pos = $m[0][1]; |
879 | |
880 | // Markup found |
881 | // Append initial segment |
882 | $inner .= substr( $text, $startPos, $pos - $startPos ); |
883 | |
884 | // Advance position |
885 | $startPos = $pos; |
886 | |
887 | switch ( $token ) { |
888 | case '-{': |
889 | // Check max depth |
890 | if ( $depth >= $this->mMaxDepth ) { |
891 | $inner .= '-{'; |
892 | if ( !$warningDone ) { |
893 | $inner .= '<span class="error">' . |
894 | wfMessage( 'language-converter-depth-warning' ) |
895 | ->numParams( $this->mMaxDepth )->inContentLanguage()->text() . |
896 | '</span>'; |
897 | $warningDone = true; |
898 | } |
899 | $startPos += 2; |
900 | break; |
901 | } |
902 | // Recursively parse another rule |
903 | $inner .= $this->recursiveConvertRule( $text, $variant, $startPos, $depth + 1 ); |
904 | break; |
905 | case '}-': |
906 | // Apply the rule |
907 | $startPos += 2; |
908 | $rule = new ConverterRule( $inner, $this ); |
909 | $rule->parse( $variant ); |
910 | $this->applyManualConv( $rule ); |
911 | return $rule->getDisplay(); |
912 | default: |
913 | throw new UnexpectedValueException( __METHOD__ . ': invalid regex match' ); |
914 | } |
915 | } |
916 | |
917 | // Unclosed rule |
918 | if ( $startPos < $length ) { |
919 | $inner .= substr( $text, $startPos ); |
920 | } |
921 | $startPos = $length; |
922 | return '-{' . $this->autoConvert( $inner, $variant ); |
923 | } |
924 | |
925 | public function findVariantLink( &$link, &$nt, $ignoreOtherCond = false ) { |
926 | # If the article has already existed, there is no need to |
927 | # check it again. Otherwise it may cause a fault. |
928 | if ( $nt instanceof LinkTarget ) { |
929 | $nt = Title::castFromLinkTarget( $nt ); |
930 | if ( $nt->exists() ) { |
931 | return; |
932 | } |
933 | } |
934 | |
935 | if ( $nt instanceof PageIdentity && $nt->exists() ) { |
936 | return; |
937 | } |
938 | |
939 | $request = RequestContext::getMain()->getRequest(); |
940 | |
941 | $isredir = $request->getText( 'redirect', 'yes' ); |
942 | $action = $request->getText( 'action' ); |
943 | if ( $action == 'edit' && $request->getBool( 'redlink' ) ) { |
944 | $action = 'view'; |
945 | } |
946 | $linkconvert = $request->getText( 'linkconvert', 'yes' ); |
947 | $disableLinkConversion = |
948 | MediaWikiServices::getInstance()->getLanguageConverterFactory() |
949 | ->isLinkConversionDisabled(); |
950 | $linkBatchFactory = MediaWikiServices::getInstance()->getLinkBatchFactory(); |
951 | $linkBatch = $linkBatchFactory->newLinkBatch(); |
952 | |
953 | $ns = NS_MAIN; |
954 | |
955 | if ( $disableLinkConversion || |
956 | ( !$ignoreOtherCond && |
957 | ( $isredir == 'no' |
958 | || $action == 'edit' |
959 | || $action == 'submit' |
960 | || $linkconvert == 'no' ) |
961 | ) |
962 | ) { |
963 | return; |
964 | } |
965 | |
966 | if ( is_object( $nt ) ) { |
967 | $ns = $nt->getNamespace(); |
968 | } |
969 | |
970 | $variants = $this->autoConvertToAllVariants( $link ); |
971 | if ( !$variants ) { // give up |
972 | return; |
973 | } |
974 | |
975 | $titles = []; |
976 | |
977 | foreach ( $variants as $v ) { |
978 | if ( $v != $link ) { |
979 | $varnt = Title::newFromText( $v, $ns ); |
980 | if ( $varnt !== null ) { |
981 | $linkBatch->addObj( $varnt ); |
982 | $titles[] = $varnt; |
983 | } |
984 | } |
985 | } |
986 | |
987 | // fetch all variants in single query |
988 | $linkBatch->execute(); |
989 | |
990 | foreach ( $titles as $varnt ) { |
991 | if ( $varnt->getArticleID() > 0 ) { |
992 | $nt = $varnt; |
993 | $link = $varnt->getText(); |
994 | break; |
995 | } |
996 | } |
997 | } |
998 | |
999 | public function getExtraHashOptions() { |
1000 | $variant = $this->getPreferredVariant(); |
1001 | |
1002 | return '!' . $variant; |
1003 | } |
1004 | |
1005 | public function guessVariant( $text, $variant ) { |
1006 | return false; |
1007 | } |
1008 | |
1009 | /** |
1010 | * Load default conversion tables. |
1011 | * |
1012 | * @return array |
1013 | */ |
1014 | abstract protected function loadDefaultTables(): array; |
1015 | |
1016 | /** |
1017 | * Load conversion tables either from the cache or the disk. |
1018 | * @private |
1019 | * @param bool $fromCache Whether to load from cache. Defaults to true. |
1020 | */ |
1021 | protected function loadTables( $fromCache = true ) { |
1022 | $services = MediaWikiServices::getInstance(); |
1023 | $languageConverterCacheType = $services |
1024 | ->getMainConfig()->get( MainConfigNames::LanguageConverterCacheType ); |
1025 | |
1026 | if ( $this->mTablesLoaded ) { |
1027 | return; |
1028 | } |
1029 | |
1030 | $cache = $services->getObjectCacheFactory()->getInstance( $languageConverterCacheType ); |
1031 | $cacheKey = $cache->makeKey( |
1032 | 'conversiontables', $this->getMainCode(), |
1033 | md5( implode( ',', $this->getVariants() ) ), self::CACHE_VERSION_KEY |
1034 | ); |
1035 | if ( !$fromCache ) { |
1036 | $cache->delete( $cacheKey ); |
1037 | } |
1038 | $this->mTables = $cache->getWithSetCallback( $cacheKey, $cache::TTL_HOUR * 12, function () { |
1039 | // We will first load the default tables |
1040 | // then update them using things in MediaWiki:Conversiontable/* |
1041 | $tables = $this->loadDefaultTables(); |
1042 | foreach ( $this->getVariants() as $var ) { |
1043 | $cached = $this->parseCachedTable( $var ); |
1044 | $tables[$var]->mergeArray( $cached ); |
1045 | } |
1046 | |
1047 | $this->postLoadTables( $tables ); |
1048 | return $tables; |
1049 | } ); |
1050 | $this->mTablesLoaded = true; |
1051 | } |
1052 | |
1053 | /** |
1054 | * Hook for post-processing after conversion tables are loaded. |
1055 | * |
1056 | * @param ReplacementArray[] &$tables |
1057 | */ |
1058 | protected function postLoadTables( &$tables ) { |
1059 | } |
1060 | |
1061 | /** |
1062 | * Reload the conversion tables. |
1063 | * |
1064 | * Also used by test suites which need to reset the converter state. |
1065 | * |
1066 | * Called by ParserTestRunner with the help of TestingAccessWrapper |
1067 | */ |
1068 | private function reloadTables() { |
1069 | if ( $this->mTables ) { |
1070 | $this->mTables = []; |
1071 | } |
1072 | |
1073 | $this->mTablesLoaded = false; |
1074 | $this->loadTables( false ); |
1075 | } |
1076 | |
1077 | /** |
1078 | * Parse the conversion table stored in the cache. |
1079 | * |
1080 | * The tables should be in blocks of the following form: |
1081 | * -{ |
1082 | * word => word ; |
1083 | * word => word ; |
1084 | * ... |
1085 | * }- |
1086 | * |
1087 | * To make the tables more manageable, subpages are allowed |
1088 | * and will be parsed recursively if $recursive == true. |
1089 | * |
1090 | * @param string $code Language code |
1091 | * @param string $subpage Subpage name |
1092 | * @param bool $recursive Parse subpages recursively? Defaults to true. |
1093 | * |
1094 | * @return array |
1095 | */ |
1096 | private function parseCachedTable( $code, $subpage = '', $recursive = true ) { |
1097 | static $parsed = []; |
1098 | |
1099 | $key = 'Conversiontable/' . $code; |
1100 | if ( $subpage ) { |
1101 | $key .= '/' . $subpage; |
1102 | } |
1103 | if ( array_key_exists( $key, $parsed ) ) { |
1104 | return []; |
1105 | } |
1106 | |
1107 | $parsed[$key] = true; |
1108 | |
1109 | if ( $subpage === '' ) { |
1110 | $messageCache = MediaWikiServices::getInstance()->getMessageCache(); |
1111 | $txt = $messageCache->getMsgFromNamespace( $key, $code ); |
1112 | } else { |
1113 | $txt = false; |
1114 | $title = Title::makeTitleSafe( NS_MEDIAWIKI, $key ); |
1115 | if ( $title && $title->exists() ) { |
1116 | $revision = MediaWikiServices::getInstance() |
1117 | ->getRevisionLookup() |
1118 | ->getRevisionByTitle( $title ); |
1119 | if ( $revision ) { |
1120 | $model = $revision->getSlot( |
1121 | SlotRecord::MAIN, |
1122 | RevisionRecord::RAW |
1123 | )->getModel(); |
1124 | if ( $model == CONTENT_MODEL_WIKITEXT ) { |
1125 | // @phan-suppress-next-line PhanUndeclaredMethod |
1126 | $txt = $revision->getContent( |
1127 | SlotRecord::MAIN, |
1128 | RevisionRecord::RAW |
1129 | )->getText(); |
1130 | } |
1131 | |
1132 | // @todo in the future, use a specialized content model, perhaps based on json! |
1133 | } |
1134 | } |
1135 | } |
1136 | |
1137 | # Nothing to parse if there's no text |
1138 | if ( $txt === false || $txt === null || $txt === '' ) { |
1139 | return []; |
1140 | } |
1141 | |
1142 | // get all subpage links of the form |
1143 | // [[MediaWiki:Conversiontable/zh-xx/...|...]] |
1144 | $linkhead = $this->mLangObj->getNsText( NS_MEDIAWIKI ) . |
1145 | ':Conversiontable'; |
1146 | $subs = StringUtils::explode( '[[', $txt ); |
1147 | $sublinks = []; |
1148 | foreach ( $subs as $sub ) { |
1149 | $link = explode( ']]', $sub, 2 ); |
1150 | if ( count( $link ) != 2 ) { |
1151 | continue; |
1152 | } |
1153 | $b = explode( '|', $link[0], 2 ); |
1154 | $b = explode( '/', trim( $b[0] ), 3 ); |
1155 | if ( count( $b ) == 3 ) { |
1156 | $sublink = $b[2]; |
1157 | } else { |
1158 | $sublink = ''; |
1159 | } |
1160 | |
1161 | if ( $b[0] == $linkhead && $b[1] == $code ) { |
1162 | $sublinks[] = $sublink; |
1163 | } |
1164 | } |
1165 | |
1166 | // parse the mappings in this page |
1167 | $blocks = StringUtils::explode( '-{', $txt ); |
1168 | $ret = []; |
1169 | $first = true; |
1170 | foreach ( $blocks as $block ) { |
1171 | if ( $first ) { |
1172 | // Skip the part before the first -{ |
1173 | $first = false; |
1174 | continue; |
1175 | } |
1176 | $mappings = explode( '}-', $block, 2 )[0]; |
1177 | $stripped = str_replace( [ "'", '"', '*', '#' ], '', $mappings ); |
1178 | $table = StringUtils::explode( ';', $stripped ); |
1179 | foreach ( $table as $t ) { |
1180 | $m = explode( '=>', $t, 3 ); |
1181 | if ( count( $m ) != 2 ) { |
1182 | continue; |
1183 | } |
1184 | // trim any trailing comments starting with '//' |
1185 | $tt = explode( '//', $m[1], 2 ); |
1186 | $ret[trim( $m[0] )] = trim( $tt[0] ); |
1187 | } |
1188 | } |
1189 | |
1190 | // recursively parse the subpages |
1191 | if ( $recursive ) { |
1192 | foreach ( $sublinks as $link ) { |
1193 | $s = $this->parseCachedTable( $code, $link, $recursive ); |
1194 | $ret = $s + $ret; |
1195 | } |
1196 | } |
1197 | return $ret; |
1198 | } |
1199 | |
1200 | public function markNoConversion( $text, $noParse = false ) { |
1201 | # don't mark if already marked |
1202 | if ( str_contains( $text, '-{' ) || str_contains( $text, '}-' ) ) { |
1203 | return $text; |
1204 | } |
1205 | |
1206 | return "-{R|$text}-"; |
1207 | } |
1208 | |
1209 | public function convertCategoryKey( $key ) { |
1210 | return $key; |
1211 | } |
1212 | |
1213 | /** |
1214 | * @param PageIdentity $page Message page |
1215 | * |
1216 | * @return void |
1217 | */ |
1218 | public function updateConversionTable( PageIdentity $page ) { |
1219 | if ( $page->getNamespace() === NS_MEDIAWIKI ) { |
1220 | $t = explode( '/', $page->getDBkey(), 3 ); |
1221 | $c = count( $t ); |
1222 | if ( $c > 1 && $t[0] == 'Conversiontable' && $this->validateVariant( $t[1] ) ) { |
1223 | $this->reloadTables(); |
1224 | } |
1225 | } |
1226 | } |
1227 | |
1228 | /** |
1229 | * Get the cached separator pattern for ConverterRule::parseRules() |
1230 | * @return string |
1231 | */ |
1232 | public function getVarSeparatorPattern() { |
1233 | if ( $this->mVarSeparatorPattern === null ) { |
1234 | // varsep_pattern for preg_split: |
1235 | // The text should be split by ";" only if a valid variant |
1236 | // name exists after the markup. |
1237 | // For example |
1238 | // -{zh-hans:<span style="font-size:120%;">xxx</span>;zh-hant:\ |
1239 | // <span style="font-size:120%;">yyy</span>;}- |
1240 | // we should split it as: |
1241 | // [ |
1242 | // [0] => 'zh-hans:<span style="font-size:120%;">xxx</span>' |
1243 | // [1] => 'zh-hant:<span style="font-size:120%;">yyy</span>' |
1244 | // [2] => '' |
1245 | // ] |
1246 | $expandedVariants = []; |
1247 | foreach ( $this->getVariants() as $variant ) { |
1248 | $expandedVariants[ $variant ] = 1; |
1249 | // Accept standard BCP 47 names for variants as well. |
1250 | $expandedVariants[ LanguageCode::bcp47( $variant ) ] = 1; |
1251 | } |
1252 | // Accept old deprecated names for variants |
1253 | foreach ( LanguageCode::getDeprecatedCodeMapping() as $old => $new ) { |
1254 | if ( isset( $expandedVariants[ $new ] ) ) { |
1255 | $expandedVariants[ $old ] = 1; |
1256 | } |
1257 | } |
1258 | $expandedVariants = implode( '|', array_keys( $expandedVariants ) ); |
1259 | |
1260 | $pat = '/;\s*(?='; |
1261 | // zh-hans:xxx;zh-hant:yyy |
1262 | $pat .= '(?:' . $expandedVariants . ')\s*:'; |
1263 | // xxx=>zh-hans:yyy; xxx=>zh-hant:zzz |
1264 | $pat .= '|[^;]*?=>\s*(?:' . $expandedVariants . ')\s*:'; |
1265 | $pat .= '|\s*$)/'; |
1266 | $this->mVarSeparatorPattern = $pat; |
1267 | } |
1268 | return $this->mVarSeparatorPattern; |
1269 | } |
1270 | |
1271 | public function hasVariants() { |
1272 | return count( $this->getVariants() ) > 1; |
1273 | } |
1274 | |
1275 | public function hasVariant( $variant ) { |
1276 | return $variant && ( $variant === $this->validateVariant( $variant ) ); |
1277 | } |
1278 | |
1279 | public function convertHtml( $text ) { |
1280 | // @phan-suppress-next-line SecurityCheck-DoubleEscaped convert() is documented to return html |
1281 | return htmlspecialchars( $this->convert( $text ) ); |
1282 | } |
1283 | } |
1284 | |
1285 | /** @deprecated class alias since 1.43 */ |
1286 | class_alias( LanguageConverter::class, 'LanguageConverter' ); |