47 if ( $this->
getConfig()->
get( MainConfigNames::ExtraLanguageNames ) !== [] ) {
48 $this->
fatalError(
'You have entries in $wgExtraLanguageNames. Needs to be empty for this script.' );
53 $ulsLanguages = $this->getLanguageData()[
'languages' ];
54 foreach ( $ulsLanguages as $languageCode => $languageEntry ) {
56 if ( isset( $languageEntry[ 2 ] ) ) {
57 $languageNames[
'autonyms' ][ $languageCode ] = $languageEntry[ 2 ];
65 ->getLanguageNames( LanguageNameUtils::AUTONYMS, LanguageNameUtils::ALL );
66 foreach ( array_keys( $mwLanguages ) as $languageCode ) {
69 $languageNames[ $languageCode ] = LanguageNames::getNames( $languageCode, 0, 2 );
73 foreach ( $languageNames as $translations ) {
74 foreach ( $translations as $targetLanguage => $translation ) {
75 $translation = mb_strtolower( $translation );
76 $translation = trim( $translation );
81 $basicForm = preg_replace(
'/\(.+\)$/',
'', $translation );
82 $words = preg_split(
'/[\s]+/u', $basicForm, -1, PREG_SPLIT_NO_EMPTY );
84 foreach ( $words as $index => $word ) {
85 $bucket = LanguageNameSearch::getIndex( $word );
88 $display = $translation;
91 if ( mb_strlen( $word ) < 3 ) {
96 $display =
"$word — $translation";
98 $buckets[$bucket][$type][$display] = $targetLanguage;
108 $specialLanguages = [
110 'abr' => [
'bono',
'brong' ],
112 'ach' => [
'leb acoli' ],
114 'ayh' => [
'حضرمية' ],
116 'ca' => [
'valencia' ],
118 'cdo' => [
'chinese min dong' ],
120 'cko' => [
'chakosi',
'chokosi',
'tchokossi' ],
124 'dtp' => [
'bundu-liwan, dusun' ],
126 'es' => [
'castellano' ],
128 'hy' => [
'hayeren' ],
130 'ja' => [
'nihongo',
'にほんご' ],
132 'jv-java' => [
'jawa hanacaraka' ],
134 'ka' => [
'kartuli',
'qartuli' ],
139 'laj' => [
'leb lango',
'lango, leb' ],
141 'lue' => [
'luvale, chi-' ],
143 'shn' => [
'ၽႃႇသႃႇတႆး',
'လိၵ်ႈတႆး' ],
167 'mnw' => [
'ဘာသာ မန်' ],
172 'mui' => [
'musi palembang' ],
174 'pnb' => [
'punjabi western' ],
176 'tdd' => [
'ᥖᥭᥰᥖᥬᥳᥑᥨᥒᥰ' ],
178 'wlx' => [
'waali',
'waalii' ],
181 'zh-hans' => [
'chinese simplified' ],
182 'zh-hant' => [
'chinese traditional' ],
184 'zh-min-nan' => [
'chinese min nan' ],
187 foreach ( $specialLanguages as $targetLanguage => $translations ) {
188 foreach ( $translations as $translation ) {
189 $bucket = LanguageNameSearch::getIndex( $translation );
190 $buckets[$bucket][
'prefix'][$translation] = $targetLanguage;
200 foreach ( $buckets as &$bucketTypes ) {
201 $lengths[] = array_sum( array_map(
'count', $bucketTypes ) );
203 krsort( $bucketTypes );
205 foreach ( $bucketTypes as &$bucket ) {
210 $count = count( $buckets );
211 $min = min( $lengths );
212 $max = max( $lengths );
213 $median = $lengths[ceil( $count / 2 )];
214 $avg = array_sum( $lengths ) / $count;
215 $this->
output(
"Bucket stats:\n - $count buckets\n - smallest has $min entries\n" );
216 $this->
output(
" - largest has $max entries\n - median size is $median entries\n" );
217 $this->
output(
" - average size is $avg entries\n" );
219 $this->generateFile( $buckets );