MediaWiki  master
LanguageNameUtils.php
Go to the documentation of this file.
1 <?php
25 namespace MediaWiki\Languages;
26 
27 use BagOStuff;
28 use HashBagOStuff;
33 use MWException;
34 
47  public const AUTONYMS = null;
48 
52  public const ALL = 'all';
53 
57  public const DEFINED = 'mw';
58 
62  public const SUPPORTED = 'mwfile';
63 
65  private $options;
66 
72 
77  private $validCodeCache = [];
78 
82  public const CONSTRUCTOR_OPTIONS = [
83  'ExtraLanguageNames',
84  'UsePigLatinVariant',
85  ];
86 
88  private $hookRunner;
89 
94  public function __construct( ServiceOptions $options, HookContainer $hookContainer ) {
95  $options->assertRequiredOptions( self::CONSTRUCTOR_OPTIONS );
96  $this->options = $options;
97  $this->hookRunner = new HookRunner( $hookContainer );
98  }
99 
107  public function isSupportedLanguage( string $code ): bool {
108  if ( !$this->isValidBuiltInCode( $code ) ) {
109  return false;
110  }
111 
112  if ( $code === 'qqq' ) {
113  // Special code for internal use, not supported even though there is a qqq.json
114  return false;
115  }
116 
117  return is_readable( $this->getMessagesFileName( $code ) ) ||
118  is_readable( $this->getJsonMessagesFileName( $code ) );
119  }
120 
130  public function isValidCode( string $code ): bool {
131  if ( !isset( $this->validCodeCache[$code] ) ) {
132  // People think language codes are HTML-safe, so enforce it. Ideally we should only
133  // allow a-zA-Z0-9- but .+ and other chars are often used for {{int:}} hacks. See bugs
134  // T39564, T39587, T38938.
135  $this->validCodeCache[$code] =
136  // Protect against path traversal
137  strcspn( $code, ":/\\\000&<>'\"" ) === strlen( $code ) &&
138  !preg_match( MediaWikiTitleCodec::getTitleInvalidRegex(), $code ) &&
139  // libicu sets ULOC_FULLNAME_CAPACITY to 157; stay comfortably lower
140  strlen( $code ) <= 128;
141  }
142  return $this->validCodeCache[$code];
143  }
144 
152  public function isValidBuiltInCode( string $code ): bool {
153  return (bool)preg_match( '/^[a-z0-9-]{2,}$/', $code );
154  }
155 
163  public function isKnownLanguageTag( string $tag ): bool {
164  // Quick escape for invalid input to avoid exceptions down the line when code tries to
165  // process tags which are not valid at all.
166  if ( !$this->isValidBuiltInCode( $tag ) ) {
167  return false;
168  }
169 
170  if ( isset( Data\Names::$names[$tag] ) || $this->getLanguageName( $tag, $tag ) !== '' ) {
171  return true;
172  }
173 
174  return false;
175  }
176 
188  public function getLanguageNames( $inLanguage = self::AUTONYMS, $include = self::DEFINED ) {
189  $cacheKey = $inLanguage === self::AUTONYMS ? 'null' : $inLanguage;
190  $cacheKey .= ":$include";
191  if ( !$this->languageNameCache ) {
192  $this->languageNameCache = new HashBagOStuff( [ 'maxKeys' => 20 ] );
193  }
194 
195  return $this->languageNameCache->getWithSetCallback(
196  $cacheKey,
197  BagOStuff::TTL_INDEFINITE,
198  function () use ( $inLanguage, $include ) {
199  return $this->getLanguageNamesUncached( $inLanguage, $include );
200  }
201  );
202  }
203 
210  private function getLanguageNamesUncached( $inLanguage, $include ) {
211  // If passed an invalid language code to use, fallback to en
212  if ( $inLanguage !== self::AUTONYMS && !$this->isValidCode( $inLanguage ) ) {
213  $inLanguage = 'en';
214  }
215 
216  $names = [];
217 
218  if ( $inLanguage !== self::AUTONYMS ) {
219  # TODO: also include for self::AUTONYMS, when this code is more efficient
220  $this->hookRunner->onLanguageGetTranslatedLanguageNames( $names, $inLanguage );
221  }
222 
223  $mwNames = $this->options->get( 'ExtraLanguageNames' ) + Data\Names::$names;
224  if ( $this->options->get( 'UsePigLatinVariant' ) ) {
225  // Pig Latin (for variant development)
226  $mwNames['en-x-piglatin'] = 'Igpay Atinlay';
227  }
228 
229  foreach ( $mwNames as $mwCode => $mwName ) {
230  # - Prefer own MediaWiki native name when not using the hook
231  # - For other names just add if not added through the hook
232  if ( $mwCode === $inLanguage || !isset( $names[$mwCode] ) ) {
233  $names[$mwCode] = $mwName;
234  }
235  }
236 
237  if ( $include === self::ALL ) {
238  ksort( $names );
239  return $names;
240  }
241 
242  $returnMw = [];
243  $coreCodes = array_keys( $mwNames );
244  foreach ( $coreCodes as $coreCode ) {
245  $returnMw[$coreCode] = $names[$coreCode];
246  }
247 
248  if ( $include === self::SUPPORTED ) {
249  $namesMwFile = [];
250  # We do this using a foreach over the codes instead of a directory loop so that messages
251  # files in extensions will work correctly.
252  foreach ( $returnMw as $code => $value ) {
253  if ( is_readable( $this->getMessagesFileName( $code ) ) ||
254  is_readable( $this->getJsonMessagesFileName( $code ) )
255  ) {
256  $namesMwFile[$code] = $names[$code];
257  }
258  }
259 
260  ksort( $namesMwFile );
261  return $namesMwFile;
262  }
263 
264  ksort( $returnMw );
265  # self::DEFINED option; default if it's not one of the other two options
266  # (self::ALL/self::SUPPORTED)
267  return $returnMw;
268  }
269 
279  public function getLanguageName( $code, $inLanguage = self::AUTONYMS, $include = self::ALL ) {
280  $code = strtolower( $code );
281  $array = $this->getLanguageNames( $inLanguage, $include );
282  return $array[$code] ?? '';
283  }
284 
293  public function getFileName( $prefix, $code, $suffix = '.php' ) {
294  if ( !$this->isValidBuiltInCode( $code ) ) {
295  throw new MWException( "Invalid language code \"$code\"" );
296  }
297 
298  return $prefix . str_replace( '-', '_', ucfirst( $code ) ) . $suffix;
299  }
300 
305  public function getMessagesFileName( $code ) {
306  global $IP;
307  $file = $this->getFileName( "$IP/languages/messages/Messages", $code, '.php' );
308  $this->hookRunner->onLanguage__getMessagesFileName( $code, $file );
309  return $file;
310  }
311 
317  public function getJsonMessagesFileName( $code ) {
318  global $IP;
319 
320  if ( !$this->isValidBuiltInCode( $code ) ) {
321  throw new MWException( "Invalid language code \"$code\"" );
322  }
323 
324  return "$IP/languages/i18n/$code.json";
325  }
326 }
MediaWiki\Languages\LanguageNameUtils\$validCodeCache
array $validCodeCache
Cache for validity of language codes.
Definition: LanguageNameUtils.php:77
MediaWikiTitleCodec
A codec for MediaWiki page titles.
Definition: MediaWikiTitleCodec.php:39
HashBagOStuff
Simple store for keeping values in an associative array for the current process.
Definition: HashBagOStuff.php:32
MediaWiki\Languages
MediaWiki\Languages\LanguageNameUtils\__construct
__construct(ServiceOptions $options, HookContainer $hookContainer)
Definition: LanguageNameUtils.php:94
MediaWiki\Languages\LanguageNameUtils\isValidBuiltInCode
isValidBuiltInCode(string $code)
Returns true if a language code is of a valid form for the purposes of internal customisation of Medi...
Definition: LanguageNameUtils.php:152
MediaWiki\Languages\LanguageNameUtils\isSupportedLanguage
isSupportedLanguage(string $code)
Checks whether any localisation is available for that language tag in MediaWiki (MessagesXx....
Definition: LanguageNameUtils.php:107
MediaWiki\Languages\LanguageNameUtils\getFileName
getFileName( $prefix, $code, $suffix='.php')
Get the name of a file for a certain language code.
Definition: LanguageNameUtils.php:293
MediaWikiTitleCodec\getTitleInvalidRegex
static getTitleInvalidRegex()
Returns a simple regex that will match on characters and sequences invalid in titles.
Definition: MediaWikiTitleCodec.php:590
MediaWiki\Languages\LanguageNameUtils\isValidCode
isValidCode(string $code)
Returns true if a language code string is of a valid form, whether or not it exists.
Definition: LanguageNameUtils.php:130
$file
if(PHP_SAPI !='cli-server') if(!isset( $_SERVER['SCRIPT_FILENAME'])) $file
Item class for a filearchive table row.
Definition: router.php:42
BagOStuff
Class representing a cache/ephemeral data store.
Definition: BagOStuff.php:86
MediaWiki\Languages\LanguageNameUtils\ALL
const ALL
Return all known languages in getLanguageName(s).
Definition: LanguageNameUtils.php:52
MediaWiki\Languages\LanguageNameUtils\getMessagesFileName
getMessagesFileName( $code)
Definition: LanguageNameUtils.php:305
MediaWiki\Languages\LanguageNameUtils\isKnownLanguageTag
isKnownLanguageTag(string $tag)
Returns true if a language code is an IETF tag known to MediaWiki.
Definition: LanguageNameUtils.php:163
MediaWiki\Languages\LanguageNameUtils
A service that provides utilities to do with language names and codes.
Definition: LanguageNameUtils.php:43
MediaWiki\Languages\LanguageNameUtils\CONSTRUCTOR_OPTIONS
const CONSTRUCTOR_OPTIONS
Definition: LanguageNameUtils.php:82
MWException
MediaWiki exception.
Definition: MWException.php:29
MediaWiki\Config\ServiceOptions
A class for passing options to services.
Definition: ServiceOptions.php:27
MediaWiki\Languages\LanguageNameUtils\$options
ServiceOptions $options
Definition: LanguageNameUtils.php:65
MediaWiki\Languages\LanguageNameUtils\getJsonMessagesFileName
getJsonMessagesFileName( $code)
Definition: LanguageNameUtils.php:317
MediaWiki\Languages\LanguageNameUtils\getLanguageNamesUncached
getLanguageNamesUncached( $inLanguage, $include)
Uncached helper for getLanguageNames.
Definition: LanguageNameUtils.php:210
MediaWiki\Languages\LanguageNameUtils\$hookRunner
HookRunner $hookRunner
Definition: LanguageNameUtils.php:88
MediaWiki\Languages\LanguageNameUtils\SUPPORTED
const SUPPORTED
Return in getLanguageName(s) only the languages for which we have at least some localisation.
Definition: LanguageNameUtils.php:62
MediaWiki\Languages\LanguageNameUtils\DEFINED
const DEFINED
Return in getLanguageName(s) only the languages that are defined by MediaWiki.
Definition: LanguageNameUtils.php:57
MediaWiki\HookContainer\HookContainer
HookContainer class.
Definition: HookContainer.php:45
MediaWiki\HookContainer\HookRunner
This class provides an implementation of the core hook interfaces, forwarding hook calls to HookConta...
Definition: HookRunner.php:557
MediaWiki\Languages\LanguageNameUtils\AUTONYMS
const AUTONYMS
Return autonyms in getLanguageName(s).
Definition: LanguageNameUtils.php:47
MediaWiki\Languages\Data\Names\$names
static $names
@phpcs-require-sorted-array
Definition: Names.php:44
MediaWiki\Languages\LanguageNameUtils\getLanguageNames
getLanguageNames( $inLanguage=self::AUTONYMS, $include=self::DEFINED)
Get an array of language names, indexed by code.
Definition: LanguageNameUtils.php:188
$IP
$IP
Definition: WebStart.php:49
MediaWiki\Languages\LanguageNameUtils\$languageNameCache
HashBagOStuff null $languageNameCache
Cache for language names.
Definition: LanguageNameUtils.php:71
MediaWiki\Languages\LanguageNameUtils\getLanguageName
getLanguageName( $code, $inLanguage=self::AUTONYMS, $include=self::ALL)
Definition: LanguageNameUtils.php:279
MediaWiki\Config\ServiceOptions\assertRequiredOptions
assertRequiredOptions(array $expectedKeys)
Assert that the list of options provided in this instance exactly match $expectedKeys,...
Definition: ServiceOptions.php:71