MediaWiki  master
LanguageNameUtils.php
Go to the documentation of this file.
1 <?php
29 namespace MediaWiki\Languages;
30 
31 use HashBagOStuff;
32 use Hooks;
35 use MWException;
36 
48  const AUTONYMS = null;
49 
53  const ALL = 'all';
54 
58  const DEFINED = 'mw';
59 
63  const SUPPORTED = 'mwfile';
64 
66  private $options;
67 
73 
78  private $validCodeCache = [];
79 
80  public const CONSTRUCTOR_OPTIONS = [
81  'ExtraLanguageNames',
82  'UsePigLatinVariant',
83  ];
84 
88  public function __construct( ServiceOptions $options ) {
89  $options->assertRequiredOptions( self::CONSTRUCTOR_OPTIONS );
90  $this->options = $options;
91  }
92 
100  public function isSupportedLanguage( string $code ) : bool {
101  if ( !$this->isValidBuiltInCode( $code ) ) {
102  return false;
103  }
104 
105  if ( $code === 'qqq' ) {
106  // Special code for internal use, not supported even though there is a qqq.json
107  return false;
108  }
109 
110  return is_readable( $this->getMessagesFileName( $code ) ) ||
111  is_readable( $this->getJsonMessagesFileName( $code ) );
112  }
113 
122  public function isValidCode( string $code ) : bool {
123  if ( !isset( $this->validCodeCache[$code] ) ) {
124  // People think language codes are HTML-safe, so enforce it. Ideally we should only
125  // allow a-zA-Z0-9- but .+ and other chars are often used for {{int:}} hacks. See bugs
126  // T39564, T39587, T38938.
127  $this->validCodeCache[$code] =
128  // Protect against path traversal
129  strcspn( $code, ":/\\\000&<>'\"" ) === strlen( $code ) &&
130  !preg_match( MediaWikiTitleCodec::getTitleInvalidRegex(), $code );
131  }
132  return $this->validCodeCache[$code];
133  }
134 
142  public function isValidBuiltInCode( string $code ) : bool {
143  return (bool)preg_match( '/^[a-z0-9-]{2,}$/', $code );
144  }
145 
153  public function isKnownLanguageTag( string $tag ) : bool {
154  // Quick escape for invalid input to avoid exceptions down the line when code tries to
155  // process tags which are not valid at all.
156  if ( !$this->isValidBuiltInCode( $tag ) ) {
157  return false;
158  }
159 
160  if ( isset( Data\Names::$names[$tag] ) || $this->getLanguageName( $tag, $tag ) !== '' ) {
161  return true;
162  }
163 
164  return false;
165  }
166 
178  public function getLanguageNames( $inLanguage = self::AUTONYMS, $include = self::DEFINED ) {
179  $cacheKey = $inLanguage === self::AUTONYMS ? 'null' : $inLanguage;
180  $cacheKey .= ":$include";
181  if ( !$this->languageNameCache ) {
182  $this->languageNameCache = new HashBagOStuff( [ 'maxKeys' => 20 ] );
183  }
184 
185  $ret = $this->languageNameCache->get( $cacheKey );
186  if ( !$ret ) {
187  $ret = $this->getLanguageNamesUncached( $inLanguage, $include );
188  $this->languageNameCache->set( $cacheKey, $ret );
189  }
190  return $ret;
191  }
192 
199  private function getLanguageNamesUncached( $inLanguage, $include ) {
200  // If passed an invalid language code to use, fallback to en
201  if ( $inLanguage !== self::AUTONYMS && !$this->isValidCode( $inLanguage ) ) {
202  $inLanguage = 'en';
203  }
204 
205  $names = [];
206 
207  if ( $inLanguage !== self::AUTONYMS ) {
208  # TODO: also include for self::AUTONYMS, when this code is more efficient
209  Hooks::run( 'LanguageGetTranslatedLanguageNames', [ &$names, $inLanguage ] );
210  }
211 
212  $mwNames = $this->options->get( 'ExtraLanguageNames' ) + Data\Names::$names;
213  if ( $this->options->get( 'UsePigLatinVariant' ) ) {
214  // Pig Latin (for variant development)
215  $mwNames['en-x-piglatin'] = 'Igpay Atinlay';
216  }
217 
218  foreach ( $mwNames as $mwCode => $mwName ) {
219  # - Prefer own MediaWiki native name when not using the hook
220  # - For other names just add if not added through the hook
221  if ( $mwCode === $inLanguage || !isset( $names[$mwCode] ) ) {
222  $names[$mwCode] = $mwName;
223  }
224  }
225 
226  if ( $include === self::ALL ) {
227  ksort( $names );
228  return $names;
229  }
230 
231  $returnMw = [];
232  $coreCodes = array_keys( $mwNames );
233  foreach ( $coreCodes as $coreCode ) {
234  $returnMw[$coreCode] = $names[$coreCode];
235  }
236 
237  if ( $include === self::SUPPORTED ) {
238  $namesMwFile = [];
239  # We do this using a foreach over the codes instead of a directory loop so that messages
240  # files in extensions will work correctly.
241  foreach ( $returnMw as $code => $value ) {
242  if ( is_readable( $this->getMessagesFileName( $code ) ) ||
243  is_readable( $this->getJsonMessagesFileName( $code ) )
244  ) {
245  $namesMwFile[$code] = $names[$code];
246  }
247  }
248 
249  ksort( $namesMwFile );
250  return $namesMwFile;
251  }
252 
253  ksort( $returnMw );
254  # self::DEFINED option; default if it's not one of the other two options
255  # (self::ALL/self::SUPPORTED)
256  return $returnMw;
257  }
258 
268  public function getLanguageName( $code, $inLanguage = self::AUTONYMS, $include = self::ALL ) {
269  $code = strtolower( $code );
270  $array = $this->getLanguageNames( $inLanguage, $include );
271  return $array[$code] ?? '';
272  }
273 
282  public function getFileName( $prefix, $code, $suffix = '.php' ) {
283  if ( !$this->isValidBuiltInCode( $code ) ) {
284  throw new MWException( "Invalid language code \"$code\"" );
285  }
286 
287  return $prefix . str_replace( '-', '_', ucfirst( $code ) ) . $suffix;
288  }
289 
294  public function getMessagesFileName( $code ) {
295  global $IP;
296  $file = $this->getFileName( "$IP/languages/messages/Messages", $code, '.php' );
297  Hooks::run( 'Language::getMessagesFileName', [ $code, &$file ] );
298  return $file;
299  }
300 
306  public function getJsonMessagesFileName( $code ) {
307  global $IP;
308 
309  if ( !$this->isValidBuiltInCode( $code ) ) {
310  throw new MWException( "Invalid language code \"$code\"" );
311  }
312 
313  return "$IP/languages/i18n/$code.json";
314  }
315 }
isSupportedLanguage(string $code)
Checks whether any localisation is available for that language tag in MediaWiki (MessagesXx.php or xx.json exists).
if(PHP_SAPI !='cli-server') if(!isset( $_SERVER['SCRIPT_FILENAME'])) $file
Item class for a filearchive table row.
Definition: router.php:42
getFileName( $prefix, $code, $suffix='.php')
Get the name of a file for a certain language code.
const SUPPORTED
Return in getLanguageName(s) only the languages for which we have at least some localisation.
static getTitleInvalidRegex()
Returns a simple regex that will match on characters and sequences invalid in titles.
isValidCode(string $code)
Returns true if a language code string is of a valid form, whether or not it exists.
$IP
Definition: WebStart.php:41
isKnownLanguageTag(string $tag)
Returns true if a language code is an IETF tag known to MediaWiki.
getLanguageName( $code, $inLanguage=self::AUTONYMS, $include=self::ALL)
array $validCodeCache
Cache for validity of language codes.
getLanguageNamesUncached( $inLanguage, $include)
Uncached helper for getLanguageNames.
A class for passing options to services.
const ALL
Return all known languages in getLanguageName(s).
isValidBuiltInCode(string $code)
Returns true if a language code is of a valid form for the purposes of internal customisation of Medi...
HashBagOStuff null $languageNameCache
Cache for language names.
assertRequiredOptions(array $expectedKeys)
Assert that the list of options provided in this instance exactly match $expectedKeys, without regard for order.
const AUTONYMS
Return autonyms in getLanguageName(s).
const DEFINED
Return in getLanguageName(s) only the languages that are defined by MediaWiki.
getLanguageNames( $inLanguage=self::AUTONYMS, $include=self::DEFINED)
Get an array of language names, indexed by code.
A service that provides utilities to do with language names and codes.
static run( $event, array $args=[], $deprecatedVersion=null)
Call hook functions defined in Hooks::register and $wgHooks.
Definition: Hooks.php:200