Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
54.44% |
270 / 496 |
|
31.11% |
14 / 45 |
CRAP | |
0.00% |
0 / 1 |
LanguageConverter | |
54.44% |
270 / 496 |
|
31.11% |
14 / 45 |
3280.11 | |
0.00% |
0 / 1 |
__construct | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
getMainCode | n/a |
0 / 0 |
n/a |
0 / 0 |
0 | |||||
getStaticDefaultVariant | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
getLanguageVariants | n/a |
0 / 0 |
n/a |
0 / 0 |
0 | |||||
getVariantsFallbacks | n/a |
0 / 0 |
n/a |
0 / 0 |
0 | |||||
getFlags | |
0.00% |
0 / 13 |
|
0.00% |
0 / 1 |
6 | |||
getAdditionalFlags | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
getManualLevel | |
0.00% |
0 / 7 |
|
0.00% |
0 / 1 |
12 | |||
getAdditionalManualLevel | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
getDescCodeSeparator | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
getDescVarSeparator | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
getVariantNames | |
0.00% |
0 / 3 |
|
0.00% |
0 / 1 |
2 | |||
getVariants | |
100.00% |
3 / 3 |
|
100.00% |
1 / 1 |
1 | |||
getVariantFallbacks | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
getConvRuleTitle | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
getPreferredVariant | |
100.00% |
14 / 14 |
|
100.00% |
1 / 1 |
6 | |||
getDefaultVariant | |
0.00% |
0 / 6 |
|
0.00% |
0 / 1 |
12 | |||
validateVariant | |
100.00% |
9 / 9 |
|
100.00% |
1 / 1 |
5 | |||
getURLVariant | |
87.50% |
7 / 8 |
|
0.00% |
0 / 1 |
3.02 | |||
getUserVariant | |
84.62% |
11 / 13 |
|
0.00% |
0 / 1 |
5.09 | |||
getHeaderVariant | |
64.00% |
16 / 25 |
|
0.00% |
0 / 1 |
16.65 | |||
autoConvert | |
70.27% |
52 / 74 |
|
0.00% |
0 / 1 |
28.49 | |||
translate | |
100.00% |
4 / 4 |
|
100.00% |
1 / 1 |
2 | |||
translateWithoutRomanNumbers | |
70.59% |
12 / 17 |
|
0.00% |
0 / 1 |
3.23 | |||
autoConvertToAllVariants | |
0.00% |
0 / 5 |
|
0.00% |
0 / 1 |
6 | |||
applyManualConv | |
35.71% |
5 / 14 |
|
0.00% |
0 / 1 |
20.02 | |||
convertSplitTitle | |
100.00% |
6 / 6 |
|
100.00% |
1 / 1 |
1 | |||
convertTitle | |
100.00% |
4 / 4 |
|
100.00% |
1 / 1 |
2 | |||
convertNamespace | |
100.00% |
12 / 12 |
|
100.00% |
1 / 1 |
2 | |||
computeNsVariantText | |
85.71% |
12 / 14 |
|
0.00% |
0 / 1 |
5.07 | |||
convert | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
2 | |||
convertTo | |
83.33% |
5 / 6 |
|
0.00% |
0 / 1 |
3.04 | |||
recursiveConvertTopLevel | |
100.00% |
26 / 26 |
|
100.00% |
1 / 1 |
6 | |||
recursiveConvertRule | |
51.28% |
20 / 39 |
|
0.00% |
0 / 1 |
24.99 | |||
findVariantLink | |
0.00% |
0 / 43 |
|
0.00% |
0 / 1 |
420 | |||
getExtraHashOptions | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
2 | |||
guessVariant | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
loadDefaultTables | n/a |
0 / 0 |
n/a |
0 / 0 |
0 | |||||
loadTables | |
95.24% |
20 / 21 |
|
0.00% |
0 / 1 |
4 | |||
postLoadTables | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
reloadTables | |
0.00% |
0 / 4 |
|
0.00% |
0 / 1 |
6 | |||
parseCachedTable | |
17.19% |
11 / 64 |
|
0.00% |
0 / 1 |
296.87 | |||
markNoConversion | |
0.00% |
0 / 3 |
|
0.00% |
0 / 1 |
12 | |||
convertCategoryKey | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
updateConversionTable | |
0.00% |
0 / 5 |
|
0.00% |
0 / 1 |
30 | |||
getVarSeparatorPattern | |
93.33% |
14 / 15 |
|
0.00% |
0 / 1 |
5.01 | |||
hasVariants | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
hasVariant | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
2 | |||
convertHtml | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 |
1 | <?php |
2 | /** |
3 | * This program is free software; you can redistribute it and/or modify |
4 | * it under the terms of the GNU General Public License as published by |
5 | * the Free Software Foundation; either version 2 of the License, or |
6 | * (at your option) any later version. |
7 | * |
8 | * This program is distributed in the hope that it will be useful, |
9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
11 | * GNU General Public License for more details. |
12 | * |
13 | * You should have received a copy of the GNU General Public License along |
14 | * with this program; if not, write to the Free Software Foundation, Inc., |
15 | * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. |
16 | * http://www.gnu.org/copyleft/gpl.html |
17 | * |
18 | * @file |
19 | * @author Zhengzhu Feng <zhengzhu@gmail.com> |
20 | * @author fdcn <fdcn64@gmail.com> |
21 | * @author shinjiman <shinjiman@gmail.com> |
22 | * @author PhiLiP <philip.npc@gmail.com> |
23 | */ |
24 | |
25 | use MediaWiki\Context\RequestContext; |
26 | use MediaWiki\Debug\DeprecationHelper; |
27 | use MediaWiki\HookContainer\HookRunner; |
28 | use MediaWiki\Html\Html; |
29 | use MediaWiki\Linker\LinkTarget; |
30 | use MediaWiki\Logger\LoggerFactory; |
31 | use MediaWiki\MainConfigNames; |
32 | use MediaWiki\MediaWikiServices; |
33 | use MediaWiki\Page\PageIdentity; |
34 | use MediaWiki\Parser\Parser; |
35 | use MediaWiki\Parser\Sanitizer; |
36 | use MediaWiki\Revision\RevisionRecord; |
37 | use MediaWiki\Revision\SlotRecord; |
38 | use MediaWiki\StubObject\StubUserLang; |
39 | use MediaWiki\Title\Title; |
40 | use MediaWiki\User\User; |
41 | use Wikimedia\ObjectCache\BagOStuff; |
42 | |
43 | /** |
44 | * Base class for multi-variant language conversion. |
45 | * |
46 | * @ingroup Language |
47 | */ |
48 | abstract class LanguageConverter implements ILanguageConverter { |
49 | use DeprecationHelper; |
50 | |
51 | /** |
52 | * languages supporting variants |
53 | * @since 1.20 |
54 | * @var string[] |
55 | */ |
56 | public static $languagesWithVariants = [ |
57 | 'ban', |
58 | 'en', |
59 | 'crh', |
60 | 'gan', |
61 | 'iu', |
62 | 'ku', |
63 | 'mni', |
64 | 'sh', |
65 | 'shi', |
66 | 'sr', |
67 | 'tg', |
68 | 'tly', |
69 | 'uz', |
70 | 'wuu', |
71 | 'zgh', |
72 | 'zh', |
73 | ]; |
74 | |
75 | /** |
76 | * static default variant of languages supporting variants |
77 | * for use with DefaultOptionsLookup.php |
78 | * @since 1.40 |
79 | * @var array<string,string> |
80 | */ |
81 | public static $languagesWithStaticDefaultVariant = [ |
82 | 'ban' => 'ban', |
83 | 'en' => 'en', |
84 | 'crh' => 'crh', |
85 | 'gan' => 'gan', |
86 | 'iu' => 'iu', |
87 | 'ku' => 'ku', |
88 | 'mni' => 'mni', |
89 | 'sh' => 'sh-latn', |
90 | 'shi' => 'shi', |
91 | 'sr' => 'sr', |
92 | 'tg' => 'tg', |
93 | 'tly' => 'tly', |
94 | 'uz' => 'uz', |
95 | 'wuu' => 'wuu', |
96 | 'zgh' => 'zgh', |
97 | 'zh' => 'zh', |
98 | ]; |
99 | |
100 | /** @var bool */ |
101 | private $mTablesLoaded = false; |
102 | /** @var ReplacementArray[] */ |
103 | protected $mTables = []; |
104 | /** @var Language|StubUserLang */ |
105 | private $mLangObj; |
106 | /** @var string|false */ |
107 | private $mConvRuleTitle = false; |
108 | /** @var string|null */ |
109 | private $mURLVariant; |
110 | /** @var string|null */ |
111 | private $mUserVariant; |
112 | /** @var string|null */ |
113 | private $mHeaderVariant; |
114 | /** @var int */ |
115 | private $mMaxDepth = 10; |
116 | /** @var string|null */ |
117 | private $mVarSeparatorPattern; |
118 | |
119 | private const CACHE_VERSION_KEY = 'VERSION 7'; |
120 | |
121 | /** |
122 | * @param Language|StubUserLang $langobj |
123 | */ |
124 | public function __construct( $langobj ) { |
125 | $this->mLangObj = $langobj; |
126 | } |
127 | |
128 | /** |
129 | * Get the language code with converter (the "main" language code). |
130 | * Page language code would be the same of the language code with converter. |
131 | * Note that this code might not be included as one of the variant languages. |
132 | * @since 1.36 |
133 | * |
134 | * @return string |
135 | */ |
136 | abstract public function getMainCode(): string; |
137 | |
138 | /** |
139 | * Get static default variant. |
140 | * For use of specify the default variant form when it different from the |
141 | * default "unconverted/mixed-variant form". |
142 | * @since 1.40 |
143 | * |
144 | * @return string |
145 | */ |
146 | protected function getStaticDefaultVariant(): string { |
147 | $code = $this->getMainCode(); |
148 | return self::$languagesWithStaticDefaultVariant[$code] ?? $code; |
149 | } |
150 | |
151 | /** |
152 | * Get supported variants of the language. |
153 | * @since 1.36 |
154 | * |
155 | * @return array |
156 | */ |
157 | abstract protected function getLanguageVariants(): array; |
158 | |
159 | /** |
160 | * Get language variants fallbacks. |
161 | * @since 1.36 |
162 | * |
163 | * @return array |
164 | */ |
165 | abstract public function getVariantsFallbacks(): array; |
166 | |
167 | /** |
168 | * Get the strings that map to the flags. |
169 | * @since 1.36 |
170 | * |
171 | * @return array |
172 | */ |
173 | final public function getFlags(): array { |
174 | $defaultflags = [ |
175 | // 'S' show the converted text |
176 | // '+' add rules for alltext |
177 | // 'E' the flags have an error |
178 | // these flags above are reserved for program |
179 | 'A' => 'A', // add rule for convert code (all text converted) |
180 | 'T' => 'T', // title convert |
181 | 'R' => 'R', // raw content |
182 | 'D' => 'D', // convert description (subclass implement) |
183 | '-' => '-', // remove convert (not implement) |
184 | 'H' => 'H', // add rule for convert code (but no display in placed code) |
185 | 'N' => 'N', // current variant name |
186 | ]; |
187 | $flags = array_merge( $defaultflags, $this->getAdditionalFlags() ); |
188 | foreach ( $this->getVariants() as $v ) { |
189 | $flags[$v] = $v; |
190 | } |
191 | return $flags; |
192 | } |
193 | |
194 | /** |
195 | * Provides additional flags for converter. By default, it returns empty array and |
196 | * typically should be overridden by implementation of converter. |
197 | * |
198 | * @return array |
199 | */ |
200 | protected function getAdditionalFlags(): array { |
201 | return []; |
202 | } |
203 | |
204 | /** |
205 | * Get manual level limit for supported variants. |
206 | * @since 1.36 |
207 | * |
208 | * @return array |
209 | */ |
210 | final public function getManualLevel() { |
211 | $manualLevel = $this->getAdditionalManualLevel(); |
212 | $result = []; |
213 | foreach ( $this->getVariants() as $v ) { |
214 | if ( array_key_exists( $v, $manualLevel ) ) { |
215 | $result[$v] = $manualLevel[$v]; |
216 | } else { |
217 | $result[$v] = 'bidirectional'; |
218 | } |
219 | } |
220 | return $result; |
221 | } |
222 | |
223 | /** |
224 | * Provides additional flags for converter. By default, this function returns an empty array and |
225 | * typically should be overridden by the implementation of converter. |
226 | * @since 1.36 |
227 | * |
228 | * @return array |
229 | */ |
230 | protected function getAdditionalManualLevel(): array { |
231 | return []; |
232 | } |
233 | |
234 | /** |
235 | * Get desc code separator. By default returns ":", can be overridden by |
236 | * implementation of converter. |
237 | * @since 1.36 |
238 | * |
239 | * @return string |
240 | */ |
241 | public function getDescCodeSeparator(): string { |
242 | return ':'; |
243 | } |
244 | |
245 | /** |
246 | * Get desc var separator. By default returns ";", can be overridden by |
247 | * implementation of converter. |
248 | * @since 1.36 |
249 | * |
250 | * @return string |
251 | */ |
252 | public function getDescVarSeparator(): string { |
253 | return ';'; |
254 | } |
255 | |
256 | /** |
257 | * Get variant names. |
258 | * |
259 | * @return array |
260 | */ |
261 | public function getVariantNames(): array { |
262 | return MediaWikiServices::getInstance() |
263 | ->getLanguageNameUtils() |
264 | ->getLanguageNames(); |
265 | } |
266 | |
267 | final public function getVariants() { |
268 | $disabledVariants = MediaWikiServices::getInstance()->getMainConfig()->get( |
269 | MainConfigNames::DisabledVariants ); |
270 | return array_diff( $this->getLanguageVariants(), $disabledVariants ); |
271 | } |
272 | |
273 | public function getVariantFallbacks( $variant ) { |
274 | return $this->getVariantsFallbacks()[$variant] ?? $this->getStaticDefaultVariant(); |
275 | } |
276 | |
277 | public function getConvRuleTitle() { |
278 | return $this->mConvRuleTitle; |
279 | } |
280 | |
281 | public function getPreferredVariant() { |
282 | $req = $this->getURLVariant(); |
283 | |
284 | $services = MediaWikiServices::getInstance(); |
285 | ( new HookRunner( $services->getHookContainer() ) )->onGetLangPreferredVariant( $req ); |
286 | |
287 | if ( !$req ) { |
288 | $user = RequestContext::getMain()->getUser(); |
289 | // NOTE: For some calls there may not be a context user or session that is safe |
290 | // to use, see (T235360) |
291 | // Use case: During user autocreation, UserNameUtils::isUsable is called which uses interface |
292 | // messages for reserved usernames. |
293 | if ( $user->isSafeToLoad() && $user->isRegistered() ) { |
294 | $req = $this->getUserVariant( $user ); |
295 | } else { |
296 | $req = $this->getHeaderVariant(); |
297 | } |
298 | } |
299 | |
300 | $defaultLanguageVariant = $services->getMainConfig() |
301 | ->get( MainConfigNames::DefaultLanguageVariant ); |
302 | if ( !$req && $defaultLanguageVariant ) { |
303 | $req = $this->validateVariant( $defaultLanguageVariant ); |
304 | } |
305 | |
306 | $req = $this->validateVariant( $req ); |
307 | |
308 | // This function, unlike the other get*Variant functions, is |
309 | // not memoized (i.e., there return value is not cached) since |
310 | // new information might appear during processing after this |
311 | // is first called. |
312 | return $req ?? $this->getStaticDefaultVariant(); |
313 | } |
314 | |
315 | public function getDefaultVariant() { |
316 | $defaultLanguageVariant = MediaWikiServices::getInstance()->getMainConfig()->get( |
317 | MainConfigNames::DefaultLanguageVariant ); |
318 | |
319 | $req = $this->getURLVariant() ?? $this->getHeaderVariant(); |
320 | |
321 | if ( !$req && $defaultLanguageVariant ) { |
322 | $req = $this->validateVariant( $defaultLanguageVariant ); |
323 | } |
324 | |
325 | return $req ?? $this->getStaticDefaultVariant(); |
326 | } |
327 | |
328 | public function validateVariant( $variant = null ) { |
329 | if ( $variant === null ) { |
330 | return null; |
331 | } |
332 | // Our internal variants are always lower-case; the variant we |
333 | // are validating may have mixed cases. |
334 | $variant = LanguageCode::replaceDeprecatedCodes( strtolower( $variant ) ); |
335 | if ( in_array( $variant, $this->getVariants() ) ) { |
336 | return $variant; |
337 | } |
338 | // Browsers are supposed to use BCP 47 standard in the |
339 | // Accept-Language header, but not all of our internal |
340 | // mediawiki variant codes are BCP 47. Map BCP 47 code |
341 | // to our internal code. |
342 | foreach ( $this->getVariants() as $v ) { |
343 | // Case-insensitive match (BCP 47 is mixed-case) |
344 | if ( strtolower( LanguageCode::bcp47( $v ) ) === $variant ) { |
345 | return $v; |
346 | } |
347 | } |
348 | return null; |
349 | } |
350 | |
351 | public function getURLVariant() { |
352 | if ( $this->mURLVariant ) { |
353 | return $this->mURLVariant; |
354 | } |
355 | |
356 | $request = RequestContext::getMain()->getRequest(); |
357 | // see if the preference is set in the request |
358 | $ret = $request->getText( 'variant' ); |
359 | |
360 | if ( !$ret ) { |
361 | $ret = $request->getVal( 'uselang' ); |
362 | } |
363 | |
364 | $this->mURLVariant = $this->validateVariant( $ret ); |
365 | return $this->mURLVariant; |
366 | } |
367 | |
368 | /** |
369 | * Determine if the user has a variant set. |
370 | * |
371 | * @param User $user |
372 | * @return string|null Variant if one found, null otherwise |
373 | */ |
374 | protected function getUserVariant( User $user ) { |
375 | // This should only be called within the class after the user is known to be |
376 | // safe to load and logged in, but check just in case. |
377 | if ( !$user->isSafeToLoad() ) { |
378 | return null; |
379 | } |
380 | |
381 | if ( !$this->mUserVariant ) { |
382 | $services = MediaWikiServices::getInstance(); |
383 | if ( $user->isRegistered() ) { |
384 | // Get language variant preference from logged in users |
385 | if ( |
386 | $this->getMainCode() === |
387 | $services->getContentLanguage()->getCode() |
388 | ) { |
389 | $optionName = 'variant'; |
390 | } else { |
391 | $optionName = 'variant-' . $this->getMainCode(); |
392 | } |
393 | } else { |
394 | // figure out user lang without constructing wgLang to avoid |
395 | // infinite recursion |
396 | $optionName = 'language'; |
397 | } |
398 | $ret = $services->getUserOptionsLookup()->getOption( $user, $optionName ); |
399 | |
400 | $this->mUserVariant = $this->validateVariant( $ret ); |
401 | } |
402 | |
403 | return $this->mUserVariant; |
404 | } |
405 | |
406 | /** |
407 | * Determine the language variant from the Accept-Language header. |
408 | * |
409 | * @return string|null Variant if one found, null otherwise |
410 | */ |
411 | protected function getHeaderVariant() { |
412 | if ( $this->mHeaderVariant ) { |
413 | return $this->mHeaderVariant; |
414 | } |
415 | |
416 | $request = RequestContext::getMain()->getRequest(); |
417 | // See if some supported language variant is set in the |
418 | // HTTP header. |
419 | $languages = array_keys( $request->getAcceptLang() ); |
420 | if ( !$languages ) { |
421 | return null; |
422 | } |
423 | |
424 | $fallbackLanguages = []; |
425 | foreach ( $languages as $language ) { |
426 | $this->mHeaderVariant = $this->validateVariant( $language ); |
427 | if ( $this->mHeaderVariant ) { |
428 | break; |
429 | } |
430 | |
431 | // To see if there are fallbacks of current language. |
432 | // We record these fallback variants, and process |
433 | // them later. |
434 | $fallbacks = $this->getVariantFallbacks( $language ); |
435 | if ( |
436 | is_string( $fallbacks ) && |
437 | $fallbacks !== $this->getStaticDefaultVariant() |
438 | ) { |
439 | $fallbackLanguages[] = $fallbacks; |
440 | } elseif ( is_array( $fallbacks ) ) { |
441 | $fallbackLanguages = |
442 | array_merge( $fallbackLanguages, $fallbacks ); |
443 | } |
444 | } |
445 | |
446 | if ( !$this->mHeaderVariant ) { |
447 | // process fallback languages now |
448 | $fallback_languages = array_unique( $fallbackLanguages ); |
449 | foreach ( $fallback_languages as $language ) { |
450 | $this->mHeaderVariant = $this->validateVariant( $language ); |
451 | if ( $this->mHeaderVariant ) { |
452 | break; |
453 | } |
454 | } |
455 | } |
456 | |
457 | return $this->mHeaderVariant; |
458 | } |
459 | |
460 | public function autoConvert( $text, $toVariant = false ) { |
461 | $this->loadTables(); |
462 | |
463 | if ( !$toVariant ) { |
464 | $toVariant = $this->getPreferredVariant(); |
465 | if ( !$toVariant ) { |
466 | return $text; |
467 | } |
468 | } |
469 | |
470 | if ( $this->guessVariant( $text, $toVariant ) ) { |
471 | return $text; |
472 | } |
473 | /** |
474 | * We convert everything except: |
475 | * 1. HTML markups (anything between < and >) |
476 | * 2. HTML entities |
477 | * 3. placeholders created by the parser |
478 | * IMPORTANT: Beware of failure from pcre.backtrack_limit (T124404). |
479 | * Minimize the use of backtracking where possible. |
480 | */ |
481 | static $reg; |
482 | if ( $reg === null ) { |
483 | $marker = '|' . Parser::MARKER_PREFIX . '[^\x7f]++\x7f'; |
484 | |
485 | // this one is needed when the text is inside an HTML markup |
486 | $htmlfix = '|<[^>\004]++(?=\004$)|^[^<>]*+>'; |
487 | |
488 | // Optimize for the common case where these tags have |
489 | // few or no children. Thus try and possessively get as much as |
490 | // possible, and only engage in backtracking when we hit a '<'. |
491 | |
492 | // disable convert to variants between <code> tags |
493 | $codefix = '<code>[^<]*+(?:(?:(?!<\/code>).)[^<]*+)*+<\/code>|'; |
494 | // disable conversion of <script> tags |
495 | $scriptfix = '<script[^>]*+>[^<]*+(?:(?:(?!<\/script>).)[^<]*+)*+<\/script>|'; |
496 | // disable conversion of <pre> tags |
497 | $prefix = '<pre[^>]*+>[^<]*+(?:(?:(?!<\/pre>).)[^<]*+)*+<\/pre>|'; |
498 | // The "|.*+)" at the end, is in case we missed some part of html syntax, |
499 | // we will fail securely (hopefully) by matching the rest of the string. |
500 | $htmlFullTag = '<(?:[^>=]*+(?>[^>=]*+=\s*+(?:"[^"]*"|\'[^\']*\'|[^\'">\s]*+))*+[^>=]*+>|.*+)|'; |
501 | |
502 | $reg = '/' . $codefix . $scriptfix . $prefix . $htmlFullTag . |
503 | '&[a-zA-Z#][a-z0-9]++;' . $marker . $htmlfix . '|\004$/s'; |
504 | } |
505 | $startPos = 0; |
506 | $sourceBlob = ''; |
507 | $literalBlob = ''; |
508 | |
509 | // Guard against delimiter nulls in the input |
510 | // (should never happen: see T159174) |
511 | $text = str_replace( "\000", '', $text ); |
512 | $text = str_replace( "\004", '', $text ); |
513 | |
514 | $markupMatches = null; |
515 | $elementMatches = null; |
516 | |
517 | // We add a marker (\004) at the end of text, to ensure we always match the |
518 | // entire text (Otherwise, pcre.backtrack_limit might cause silent failure) |
519 | $textWithMarker = $text . "\004"; |
520 | while ( $startPos < strlen( $text ) ) { |
521 | if ( preg_match( $reg, $textWithMarker, $markupMatches, PREG_OFFSET_CAPTURE, $startPos ) ) { |
522 | $elementPos = $markupMatches[0][1]; |
523 | $element = $markupMatches[0][0]; |
524 | if ( $element === "\004" ) { |
525 | // We hit the end. |
526 | $elementPos = strlen( $text ); |
527 | $element = ''; |
528 | } elseif ( substr( $element, -1 ) === "\004" ) { |
529 | // This can sometimes happen if we have |
530 | // unclosed html tags. For example, |
531 | // when converting a title attribute |
532 | // during a recursive call that contains |
533 | // a < e.g. <div title="<">. |
534 | $element = substr( $element, 0, -1 ); |
535 | } |
536 | } else { |
537 | // If we hit here, then Language Converter could be tricked |
538 | // into doing an XSS, so we refuse to translate. |
539 | // If expected input manages to reach this code path, |
540 | // we should consider it a bug. |
541 | $log = LoggerFactory::getInstance( 'languageconverter' ); |
542 | $log->error( "Hit pcre.backtrack_limit in " . __METHOD__ |
543 | . ". Disabling language conversion for this page.", |
544 | [ |
545 | "method" => __METHOD__, |
546 | "variant" => $toVariant, |
547 | "startOfText" => substr( $text, 0, 500 ) |
548 | ] |
549 | ); |
550 | return $text; |
551 | } |
552 | // Queue the part before the markup for translation in a batch |
553 | $sourceBlob .= substr( $text, $startPos, $elementPos - $startPos ) . "\000"; |
554 | |
555 | // Advance to the next position |
556 | $startPos = $elementPos + strlen( $element ); |
557 | |
558 | // Translate any alt or title attributes inside the matched element |
559 | if ( $element !== '' |
560 | && preg_match( '/^(<[^>\s]*+)\s([^>]*+)(.*+)$/', $element, $elementMatches ) |
561 | ) { |
562 | // FIXME, this decodes entities, so if you have something |
563 | // like <div title="foo<bar"> the bar won't get |
564 | // translated since after entity decoding it looks like |
565 | // unclosed html and we call this method recursively |
566 | // on attributes. |
567 | $attrs = Sanitizer::decodeTagAttributes( $elementMatches[2] ); |
568 | // Ensure self-closing tags stay self-closing. |
569 | $close = substr( $elementMatches[2], -1 ) === '/' ? ' /' : ''; |
570 | $changed = false; |
571 | foreach ( [ 'title', 'alt' ] as $attrName ) { |
572 | if ( !isset( $attrs[$attrName] ) ) { |
573 | continue; |
574 | } |
575 | $attr = $attrs[$attrName]; |
576 | // Don't convert URLs |
577 | if ( !str_contains( $attr, '://' ) ) { |
578 | $attr = $this->recursiveConvertTopLevel( $attr, $toVariant ); |
579 | } |
580 | |
581 | if ( $attr !== $attrs[$attrName] ) { |
582 | $attrs[$attrName] = $attr; |
583 | $changed = true; |
584 | } |
585 | } |
586 | if ( $changed ) { |
587 | // @phan-suppress-next-line SecurityCheck-DoubleEscaped Explained above with decodeTagAttributes |
588 | $element = $elementMatches[1] . Html::expandAttributes( $attrs ) . |
589 | $close . $elementMatches[3]; |
590 | } |
591 | } |
592 | $literalBlob .= $element . "\000"; |
593 | } |
594 | |
595 | // Do the main translation batch |
596 | $translatedBlob = $this->translate( $sourceBlob, $toVariant ); |
597 | |
598 | // Put the output back together |
599 | $translatedIter = StringUtils::explode( "\000", $translatedBlob ); |
600 | $literalIter = StringUtils::explode( "\000", $literalBlob ); |
601 | $output = ''; |
602 | while ( $translatedIter->valid() && $literalIter->valid() ) { |
603 | $output .= $translatedIter->current(); |
604 | $output .= $literalIter->current(); |
605 | $translatedIter->next(); |
606 | $literalIter->next(); |
607 | } |
608 | |
609 | return $output; |
610 | } |
611 | |
612 | public function translate( $text, $variant ) { |
613 | // If $text is empty or only includes spaces, do nothing |
614 | // Otherwise translate it |
615 | if ( trim( $text ) ) { |
616 | $this->loadTables(); |
617 | $text = $this->mTables[$variant]->replace( $text ); |
618 | } |
619 | return $text; |
620 | } |
621 | |
622 | /** |
623 | * @param string $text Text to convert |
624 | * @param string $variant Variant language code |
625 | * @return string Translated text |
626 | */ |
627 | protected function translateWithoutRomanNumbers( $text, $variant ) { |
628 | $breaks = '[^\w\x80-\xff]'; |
629 | |
630 | // regexp for roman numbers |
631 | // Lookahead assertion ensures $roman doesn't match the empty string |
632 | $roman = '(?=[MDCLXVI])M{0,4}(C[DM]|D?C{0,3})(X[LC]|L?X{0,3})(I[VX]|V?I{0,3})'; |
633 | |
634 | $reg = '/^' . $roman . '$|^' . $roman . $breaks . '|' . $breaks |
635 | . $roman . '$|' . $breaks . $roman . $breaks . '/'; |
636 | |
637 | $matches = preg_split( $reg, $text, -1, PREG_SPLIT_OFFSET_CAPTURE ); |
638 | |
639 | $m = array_shift( $matches ); |
640 | $this->loadTables(); |
641 | if ( !isset( $this->mTables[$variant] ) ) { |
642 | throw new RuntimeException( "Broken variant table: " |
643 | . implode( ',', array_keys( $this->mTables ) ) ); |
644 | } |
645 | $ret = $this->mTables[$variant]->replace( $m[0] ); |
646 | $mstart = (int)$m[1] + strlen( $m[0] ); |
647 | foreach ( $matches as $m ) { |
648 | $ret .= substr( $text, $mstart, (int)$m[1] - $mstart ); |
649 | $ret .= $this->translate( $m[0], $variant ); |
650 | $mstart = (int)$m[1] + strlen( $m[0] ); |
651 | } |
652 | |
653 | return $ret; |
654 | } |
655 | |
656 | public function autoConvertToAllVariants( $text ) { |
657 | $this->loadTables(); |
658 | |
659 | $ret = []; |
660 | foreach ( $this->getVariants() as $variant ) { |
661 | $ret[$variant] = $this->translate( $text, $variant ); |
662 | } |
663 | |
664 | return $ret; |
665 | } |
666 | |
667 | /** |
668 | * Apply manual conversion rules. |
669 | * |
670 | * @param ConverterRule $convRule |
671 | */ |
672 | protected function applyManualConv( ConverterRule $convRule ) { |
673 | // Use syntax -{T|zh-cn:TitleCN; zh-tw:TitleTw}- to custom |
674 | // title conversion. |
675 | // T26072: $mConvRuleTitle was overwritten by other manual |
676 | // rule(s) not for title, this breaks the title conversion. |
677 | $newConvRuleTitle = $convRule->getTitle(); |
678 | if ( $newConvRuleTitle !== false ) { |
679 | // So I add an empty check for getTitle() |
680 | $this->mConvRuleTitle = $newConvRuleTitle; |
681 | } |
682 | |
683 | // merge/remove manual conversion rules to/from global table |
684 | $convTable = $convRule->getConvTable(); |
685 | $action = $convRule->getRulesAction(); |
686 | foreach ( $convTable as $variant => $pair ) { |
687 | $v = $this->validateVariant( $variant ); |
688 | if ( !$v ) { |
689 | continue; |
690 | } |
691 | |
692 | if ( $action == 'add' ) { |
693 | // More efficient than array_merge(), about 2.5 times. |
694 | foreach ( $pair as $from => $to ) { |
695 | $this->mTables[$v]->setPair( $from, $to ); |
696 | } |
697 | } elseif ( $action == 'remove' ) { |
698 | $this->mTables[$v]->removeArray( $pair ); |
699 | } |
700 | } |
701 | } |
702 | |
703 | public function convertSplitTitle( $title ) { |
704 | $variant = $this->getPreferredVariant(); |
705 | |
706 | $index = $title->getNamespace(); |
707 | $nsText = $this->convertNamespace( $index, $variant ); |
708 | |
709 | $name = str_replace( '_', ' ', $title->getDBKey() ); |
710 | $mainText = $this->translate( $name, $variant ); |
711 | |
712 | return [ $nsText, ':', $mainText ]; |
713 | } |
714 | |
715 | public function convertTitle( $title ) { |
716 | [ $nsText, $nsSeparator, $mainText ] = $this->convertSplitTitle( $title ); |
717 | return $nsText !== '' ? |
718 | $nsText . $nsSeparator . $mainText : |
719 | $mainText; |
720 | } |
721 | |
722 | public function convertNamespace( $index, $variant = null ) { |
723 | if ( $index === NS_MAIN ) { |
724 | return ''; |
725 | } |
726 | |
727 | $variant ??= $this->getPreferredVariant(); |
728 | |
729 | $cache = MediaWikiServices::getInstance()->getLocalServerObjectCache(); |
730 | $key = $cache->makeKey( 'languageconverter', 'namespace-text', $index, $variant ); |
731 | return $cache->getWithSetCallback( |
732 | $key, |
733 | BagOStuff::TTL_MINUTE, |
734 | function () use ( $index, $variant ) { |
735 | return $this->computeNsVariantText( $index, $variant ); |
736 | } |
737 | ); |
738 | } |
739 | |
740 | /** |
741 | * @param int $index |
742 | * @param string|null $variant |
743 | * @return string |
744 | */ |
745 | private function computeNsVariantText( int $index, ?string $variant ): string { |
746 | $nsVariantText = false; |
747 | |
748 | // First check if a message gives a converted name in the target variant. |
749 | $nsConvMsg = wfMessage( 'conversion-ns' . $index )->inLanguage( $variant ); |
750 | if ( $nsConvMsg->exists() ) { |
751 | $nsVariantText = $nsConvMsg->plain(); |
752 | } |
753 | |
754 | // Then check if a message gives a converted name in content language |
755 | // which needs extra translation to the target variant. |
756 | if ( $nsVariantText === false ) { |
757 | $nsConvMsg = wfMessage( 'conversion-ns' . $index )->inContentLanguage(); |
758 | if ( $nsConvMsg->exists() ) { |
759 | $nsVariantText = $this->translate( $nsConvMsg->plain(), $variant ); |
760 | } |
761 | } |
762 | |
763 | if ( $nsVariantText === false ) { |
764 | // No message exists, retrieve it from the target variant's namespace names. |
765 | $mLangObj = MediaWikiServices::getInstance() |
766 | ->getLanguageFactory() |
767 | ->getLanguage( $variant ); |
768 | $nsVariantText = $mLangObj->getFormattedNsText( $index ); |
769 | } |
770 | return $nsVariantText; |
771 | } |
772 | |
773 | public function convert( $text ) { |
774 | $variant = $this->getPreferredVariant(); |
775 | return $this->convertTo( $text, $variant ); |
776 | } |
777 | |
778 | public function convertTo( $text, $variant, bool $clearState = true ) { |
779 | $languageConverterFactory = MediaWikiServices::getInstance()->getLanguageConverterFactory(); |
780 | if ( $languageConverterFactory->isConversionDisabled() ) { |
781 | return $text; |
782 | } |
783 | // Reset converter state for a new converter run. |
784 | if ( $clearState ) { |
785 | $this->mConvRuleTitle = false; |
786 | } |
787 | return $this->recursiveConvertTopLevel( $text, $variant ); |
788 | } |
789 | |
790 | /** |
791 | * Recursively convert text on the outside. Allow to use nested |
792 | * markups to custom rules. |
793 | * |
794 | * @param string $text Text to be converted |
795 | * @param string $variant The target variant code |
796 | * @param int $depth Depth of recursion |
797 | * @return string Converted text |
798 | */ |
799 | protected function recursiveConvertTopLevel( $text, $variant, $depth = 0 ) { |
800 | $startPos = 0; |
801 | $out = ''; |
802 | $length = strlen( $text ); |
803 | $shouldConvert = !$this->guessVariant( $text, $variant ); |
804 | $continue = true; |
805 | |
806 | $noScript = '<script.*?>.*?<\/script>(*SKIP)(*FAIL)'; |
807 | $noStyle = '<style.*?>.*?<\/style>(*SKIP)(*FAIL)'; |
808 | // phpcs:ignore Generic.Files.LineLength |
809 | $noHtml = '<(?:[^>=]*+(?>[^>=]*+=\s*+(?:"[^"]*"|\'[^\']*\'|[^\'">\s]*+))*+[^>=]*+>|.*+)(*SKIP)(*FAIL)'; |
810 | while ( $startPos < $length && $continue ) { |
811 | $continue = preg_match( |
812 | // Only match "-{" outside the html. |
813 | "/$noScript|$noStyle|$noHtml|-\{/", |
814 | $text, |
815 | $m, |
816 | PREG_OFFSET_CAPTURE, |
817 | $startPos |
818 | ); |
819 | |
820 | if ( !$continue ) { |
821 | // No more markup, append final segment |
822 | $fragment = substr( $text, $startPos ); |
823 | $out .= $shouldConvert ? $this->autoConvert( $fragment, $variant ) : $fragment; |
824 | return $out; |
825 | } |
826 | |
827 | // Offset of the match of the regex pattern. |
828 | $pos = $m[0][1]; |
829 | |
830 | // Append initial segment |
831 | $fragment = substr( $text, $startPos, $pos - $startPos ); |
832 | $out .= $shouldConvert ? $this->autoConvert( $fragment, $variant ) : $fragment; |
833 | // -{ marker found, not in attribute |
834 | // Advance position up to -{ marker. |
835 | $startPos = $pos; |
836 | // Do recursive conversion |
837 | // Note: This passes $startPos by reference, and advances it. |
838 | $out .= $this->recursiveConvertRule( $text, $variant, $startPos, $depth + 1 ); |
839 | } |
840 | return $out; |
841 | } |
842 | |
843 | /** |
844 | * Recursively convert text on the inside. |
845 | * |
846 | * @param string $text Text to be converted |
847 | * @param string $variant The target variant code |
848 | * @param int &$startPos |
849 | * @param int $depth Depth of recursion |
850 | * @return string Converted text |
851 | */ |
852 | protected function recursiveConvertRule( $text, $variant, &$startPos, $depth = 0 ) { |
853 | // Quick check (no function calls) |
854 | if ( $text[$startPos] !== '-' || $text[$startPos + 1] !== '{' ) { |
855 | throw new InvalidArgumentException( __METHOD__ . ': invalid input string' ); |
856 | } |
857 | |
858 | $startPos += 2; |
859 | $inner = ''; |
860 | $warningDone = false; |
861 | $length = strlen( $text ); |
862 | |
863 | while ( $startPos < $length ) { |
864 | $m = false; |
865 | preg_match( '/-\{|\}-/', $text, $m, PREG_OFFSET_CAPTURE, $startPos ); |
866 | if ( !$m ) { |
867 | // Unclosed rule |
868 | break; |
869 | } |
870 | |
871 | $token = $m[0][0]; |
872 | $pos = $m[0][1]; |
873 | |
874 | // Markup found |
875 | // Append initial segment |
876 | $inner .= substr( $text, $startPos, $pos - $startPos ); |
877 | |
878 | // Advance position |
879 | $startPos = $pos; |
880 | |
881 | switch ( $token ) { |
882 | case '-{': |
883 | // Check max depth |
884 | if ( $depth >= $this->mMaxDepth ) { |
885 | $inner .= '-{'; |
886 | if ( !$warningDone ) { |
887 | $inner .= '<span class="error">' . |
888 | wfMessage( 'language-converter-depth-warning' ) |
889 | ->numParams( $this->mMaxDepth )->inContentLanguage()->text() . |
890 | '</span>'; |
891 | $warningDone = true; |
892 | } |
893 | $startPos += 2; |
894 | break; |
895 | } |
896 | // Recursively parse another rule |
897 | $inner .= $this->recursiveConvertRule( $text, $variant, $startPos, $depth + 1 ); |
898 | break; |
899 | case '}-': |
900 | // Apply the rule |
901 | $startPos += 2; |
902 | $rule = new ConverterRule( $inner, $this ); |
903 | $rule->parse( $variant ); |
904 | $this->applyManualConv( $rule ); |
905 | return $rule->getDisplay(); |
906 | default: |
907 | throw new UnexpectedValueException( __METHOD__ . ': invalid regex match' ); |
908 | } |
909 | } |
910 | |
911 | // Unclosed rule |
912 | if ( $startPos < $length ) { |
913 | $inner .= substr( $text, $startPos ); |
914 | } |
915 | $startPos = $length; |
916 | return '-{' . $this->autoConvert( $inner, $variant ); |
917 | } |
918 | |
919 | public function findVariantLink( &$link, &$nt, $ignoreOtherCond = false ) { |
920 | # If the article has already existed, there is no need to |
921 | # check it again. Otherwise it may cause a fault. |
922 | if ( $nt instanceof LinkTarget ) { |
923 | $nt = Title::castFromLinkTarget( $nt ); |
924 | if ( $nt->exists() ) { |
925 | return; |
926 | } |
927 | } |
928 | |
929 | if ( $nt instanceof PageIdentity && $nt->exists() ) { |
930 | return; |
931 | } |
932 | |
933 | $request = RequestContext::getMain()->getRequest(); |
934 | |
935 | $isredir = $request->getText( 'redirect', 'yes' ); |
936 | $action = $request->getText( 'action' ); |
937 | if ( $action == 'edit' && $request->getBool( 'redlink' ) ) { |
938 | $action = 'view'; |
939 | } |
940 | $linkconvert = $request->getText( 'linkconvert', 'yes' ); |
941 | $disableLinkConversion = |
942 | MediaWikiServices::getInstance()->getLanguageConverterFactory() |
943 | ->isLinkConversionDisabled(); |
944 | $linkBatchFactory = MediaWikiServices::getInstance()->getLinkBatchFactory(); |
945 | $linkBatch = $linkBatchFactory->newLinkBatch(); |
946 | |
947 | $ns = NS_MAIN; |
948 | |
949 | if ( $disableLinkConversion || |
950 | ( !$ignoreOtherCond && |
951 | ( $isredir == 'no' |
952 | || $action == 'edit' |
953 | || $action == 'submit' |
954 | || $linkconvert == 'no' ) |
955 | ) |
956 | ) { |
957 | return; |
958 | } |
959 | |
960 | if ( is_object( $nt ) ) { |
961 | $ns = $nt->getNamespace(); |
962 | } |
963 | |
964 | $variants = $this->autoConvertToAllVariants( $link ); |
965 | if ( !$variants ) { // give up |
966 | return; |
967 | } |
968 | |
969 | $titles = []; |
970 | |
971 | foreach ( $variants as $v ) { |
972 | if ( $v != $link ) { |
973 | $varnt = Title::newFromText( $v, $ns ); |
974 | if ( $varnt !== null ) { |
975 | $linkBatch->addObj( $varnt ); |
976 | $titles[] = $varnt; |
977 | } |
978 | } |
979 | } |
980 | |
981 | // fetch all variants in single query |
982 | $linkBatch->execute(); |
983 | |
984 | foreach ( $titles as $varnt ) { |
985 | if ( $varnt->getArticleID() > 0 ) { |
986 | $nt = $varnt; |
987 | $link = $varnt->getText(); |
988 | break; |
989 | } |
990 | } |
991 | } |
992 | |
993 | public function getExtraHashOptions() { |
994 | $variant = $this->getPreferredVariant(); |
995 | |
996 | return '!' . $variant; |
997 | } |
998 | |
999 | public function guessVariant( $text, $variant ) { |
1000 | return false; |
1001 | } |
1002 | |
1003 | /** |
1004 | * Load default conversion tables. |
1005 | * |
1006 | * @return array |
1007 | */ |
1008 | abstract protected function loadDefaultTables(): array; |
1009 | |
1010 | /** |
1011 | * Load conversion tables either from the cache or the disk. |
1012 | * @private |
1013 | * @param bool $fromCache Whether to load from cache. Defaults to true. |
1014 | */ |
1015 | protected function loadTables( $fromCache = true ) { |
1016 | $services = MediaWikiServices::getInstance(); |
1017 | $languageConverterCacheType = $services |
1018 | ->getMainConfig()->get( MainConfigNames::LanguageConverterCacheType ); |
1019 | |
1020 | if ( $this->mTablesLoaded ) { |
1021 | return; |
1022 | } |
1023 | |
1024 | $this->mTablesLoaded = true; |
1025 | $cache = $services->getObjectCacheFactory()->getInstance( $languageConverterCacheType ); |
1026 | $cacheKey = $cache->makeKey( |
1027 | 'conversiontables', $this->getMainCode(), |
1028 | md5( implode( ',', $this->getVariants() ) ), self::CACHE_VERSION_KEY |
1029 | ); |
1030 | if ( !$fromCache ) { |
1031 | $cache->delete( $cacheKey ); |
1032 | } |
1033 | $this->mTables = $cache->getWithSetCallback( $cacheKey, $cache::TTL_HOUR * 12, function () { |
1034 | // We will first load the default tables |
1035 | // then update them using things in MediaWiki:Conversiontable/* |
1036 | $tables = $this->loadDefaultTables(); |
1037 | foreach ( $this->getVariants() as $var ) { |
1038 | $cached = $this->parseCachedTable( $var ); |
1039 | $tables[$var]->mergeArray( $cached ); |
1040 | } |
1041 | |
1042 | $this->postLoadTables( $tables ); |
1043 | return $tables; |
1044 | } ); |
1045 | } |
1046 | |
1047 | /** |
1048 | * Hook for post-processing after conversion tables are loaded. |
1049 | * |
1050 | * @param ReplacementArray[] &$tables |
1051 | */ |
1052 | protected function postLoadTables( &$tables ) { |
1053 | } |
1054 | |
1055 | /** |
1056 | * Reload the conversion tables. |
1057 | * |
1058 | * Also used by test suites which need to reset the converter state. |
1059 | * |
1060 | * Called by ParserTestRunner with the help of TestingAccessWrapper |
1061 | */ |
1062 | private function reloadTables() { |
1063 | if ( $this->mTables ) { |
1064 | $this->mTables = []; |
1065 | } |
1066 | |
1067 | $this->mTablesLoaded = false; |
1068 | $this->loadTables( false ); |
1069 | } |
1070 | |
1071 | /** |
1072 | * Parse the conversion table stored in the cache. |
1073 | * |
1074 | * The tables should be in blocks of the following form: |
1075 | * -{ |
1076 | * word => word ; |
1077 | * word => word ; |
1078 | * ... |
1079 | * }- |
1080 | * |
1081 | * To make the tables more manageable, subpages are allowed |
1082 | * and will be parsed recursively if $recursive == true. |
1083 | * |
1084 | * @param string $code Language code |
1085 | * @param string $subpage Subpage name |
1086 | * @param bool $recursive Parse subpages recursively? Defaults to true. |
1087 | * |
1088 | * @return array |
1089 | */ |
1090 | private function parseCachedTable( $code, $subpage = '', $recursive = true ) { |
1091 | static $parsed = []; |
1092 | |
1093 | $key = 'Conversiontable/' . $code; |
1094 | if ( $subpage ) { |
1095 | $key .= '/' . $subpage; |
1096 | } |
1097 | if ( array_key_exists( $key, $parsed ) ) { |
1098 | return []; |
1099 | } |
1100 | |
1101 | $parsed[$key] = true; |
1102 | |
1103 | if ( $subpage === '' ) { |
1104 | $messageCache = MediaWikiServices::getInstance()->getMessageCache(); |
1105 | $txt = $messageCache->getMsgFromNamespace( $key, $code ); |
1106 | } else { |
1107 | $txt = false; |
1108 | $title = Title::makeTitleSafe( NS_MEDIAWIKI, $key ); |
1109 | if ( $title && $title->exists() ) { |
1110 | $revision = MediaWikiServices::getInstance() |
1111 | ->getRevisionLookup() |
1112 | ->getRevisionByTitle( $title ); |
1113 | if ( $revision ) { |
1114 | $model = $revision->getSlot( |
1115 | SlotRecord::MAIN, |
1116 | RevisionRecord::RAW |
1117 | )->getModel(); |
1118 | if ( $model == CONTENT_MODEL_WIKITEXT ) { |
1119 | // @phan-suppress-next-line PhanUndeclaredMethod |
1120 | $txt = $revision->getContent( |
1121 | SlotRecord::MAIN, |
1122 | RevisionRecord::RAW |
1123 | )->getText(); |
1124 | } |
1125 | |
1126 | // @todo in the future, use a specialized content model, perhaps based on json! |
1127 | } |
1128 | } |
1129 | } |
1130 | |
1131 | # Nothing to parse if there's no text |
1132 | if ( $txt === false || $txt === null || $txt === '' ) { |
1133 | return []; |
1134 | } |
1135 | |
1136 | // get all subpage links of the form |
1137 | // [[MediaWiki:Conversiontable/zh-xx/...|...]] |
1138 | $linkhead = $this->mLangObj->getNsText( NS_MEDIAWIKI ) . |
1139 | ':Conversiontable'; |
1140 | $subs = StringUtils::explode( '[[', $txt ); |
1141 | $sublinks = []; |
1142 | foreach ( $subs as $sub ) { |
1143 | $link = explode( ']]', $sub, 2 ); |
1144 | if ( count( $link ) != 2 ) { |
1145 | continue; |
1146 | } |
1147 | $b = explode( '|', $link[0], 2 ); |
1148 | $b = explode( '/', trim( $b[0] ), 3 ); |
1149 | if ( count( $b ) == 3 ) { |
1150 | $sublink = $b[2]; |
1151 | } else { |
1152 | $sublink = ''; |
1153 | } |
1154 | |
1155 | if ( $b[0] == $linkhead && $b[1] == $code ) { |
1156 | $sublinks[] = $sublink; |
1157 | } |
1158 | } |
1159 | |
1160 | // parse the mappings in this page |
1161 | $blocks = StringUtils::explode( '-{', $txt ); |
1162 | $ret = []; |
1163 | $first = true; |
1164 | foreach ( $blocks as $block ) { |
1165 | if ( $first ) { |
1166 | // Skip the part before the first -{ |
1167 | $first = false; |
1168 | continue; |
1169 | } |
1170 | $mappings = explode( '}-', $block, 2 )[0]; |
1171 | $stripped = str_replace( [ "'", '"', '*', '#' ], '', $mappings ); |
1172 | $table = StringUtils::explode( ';', $stripped ); |
1173 | foreach ( $table as $t ) { |
1174 | $m = explode( '=>', $t, 3 ); |
1175 | if ( count( $m ) != 2 ) { |
1176 | continue; |
1177 | } |
1178 | // trim any trailing comments starting with '//' |
1179 | $tt = explode( '//', $m[1], 2 ); |
1180 | $ret[trim( $m[0] )] = trim( $tt[0] ); |
1181 | } |
1182 | } |
1183 | |
1184 | // recursively parse the subpages |
1185 | if ( $recursive ) { |
1186 | foreach ( $sublinks as $link ) { |
1187 | $s = $this->parseCachedTable( $code, $link, $recursive ); |
1188 | $ret = $s + $ret; |
1189 | } |
1190 | } |
1191 | return $ret; |
1192 | } |
1193 | |
1194 | public function markNoConversion( $text, $noParse = false ) { |
1195 | # don't mark if already marked |
1196 | if ( str_contains( $text, '-{' ) || str_contains( $text, '}-' ) ) { |
1197 | return $text; |
1198 | } |
1199 | |
1200 | return "-{R|$text}-"; |
1201 | } |
1202 | |
1203 | public function convertCategoryKey( $key ) { |
1204 | return $key; |
1205 | } |
1206 | |
1207 | public function updateConversionTable( LinkTarget $linkTarget ) { |
1208 | if ( $linkTarget->getNamespace() === NS_MEDIAWIKI ) { |
1209 | $t = explode( '/', $linkTarget->getDBkey(), 3 ); |
1210 | $c = count( $t ); |
1211 | if ( $c > 1 && $t[0] == 'Conversiontable' && $this->validateVariant( $t[1] ) ) { |
1212 | $this->reloadTables(); |
1213 | } |
1214 | } |
1215 | } |
1216 | |
1217 | /** |
1218 | * Get the cached separator pattern for ConverterRule::parseRules() |
1219 | * @return string |
1220 | */ |
1221 | public function getVarSeparatorPattern() { |
1222 | if ( $this->mVarSeparatorPattern === null ) { |
1223 | // varsep_pattern for preg_split: |
1224 | // The text should be split by ";" only if a valid variant |
1225 | // name exists after the markup. |
1226 | // For example |
1227 | // -{zh-hans:<span style="font-size:120%;">xxx</span>;zh-hant:\ |
1228 | // <span style="font-size:120%;">yyy</span>;}- |
1229 | // we should split it as: |
1230 | // [ |
1231 | // [0] => 'zh-hans:<span style="font-size:120%;">xxx</span>' |
1232 | // [1] => 'zh-hant:<span style="font-size:120%;">yyy</span>' |
1233 | // [2] => '' |
1234 | // ] |
1235 | $expandedVariants = []; |
1236 | foreach ( $this->getVariants() as $variant ) { |
1237 | $expandedVariants[ $variant ] = 1; |
1238 | // Accept standard BCP 47 names for variants as well. |
1239 | $expandedVariants[ LanguageCode::bcp47( $variant ) ] = 1; |
1240 | } |
1241 | // Accept old deprecated names for variants |
1242 | foreach ( LanguageCode::getDeprecatedCodeMapping() as $old => $new ) { |
1243 | if ( isset( $expandedVariants[ $new ] ) ) { |
1244 | $expandedVariants[ $old ] = 1; |
1245 | } |
1246 | } |
1247 | $expandedVariants = implode( '|', array_keys( $expandedVariants ) ); |
1248 | |
1249 | $pat = '/;\s*(?='; |
1250 | // zh-hans:xxx;zh-hant:yyy |
1251 | $pat .= '(?:' . $expandedVariants . ')\s*:'; |
1252 | // xxx=>zh-hans:yyy; xxx=>zh-hant:zzz |
1253 | $pat .= '|[^;]*?=>\s*(?:' . $expandedVariants . ')\s*:'; |
1254 | $pat .= '|\s*$)/'; |
1255 | $this->mVarSeparatorPattern = $pat; |
1256 | } |
1257 | return $this->mVarSeparatorPattern; |
1258 | } |
1259 | |
1260 | public function hasVariants() { |
1261 | return count( $this->getVariants() ) > 1; |
1262 | } |
1263 | |
1264 | public function hasVariant( $variant ) { |
1265 | return $variant && ( $variant === $this->validateVariant( $variant ) ); |
1266 | } |
1267 | |
1268 | public function convertHtml( $text ) { |
1269 | // @phan-suppress-next-line SecurityCheck-DoubleEscaped convert() is documented to return html |
1270 | return htmlspecialchars( $this->convert( $text ) ); |
1271 | } |
1272 | } |