40 'en' => [ 1, 1500,
'multiple' ],
41 'zh-hans' => [ 2, 1300,
'asia' ],
42 'zh-hant' => [ 2, 1300,
'asia' ],
43 'hi' => [ 3, 550,
'asia' ],
44 'ar' => [ 4, 530,
'multiple' ],
45 'es' => [ 5, 500,
'multiple' ],
46 'ms' => [ 6, 300,
'asia' ],
47 'pt' => [ 7, 290,
'multiple' ],
48 'pt-br' => [ 7, 290,
'america' ],
49 'ru' => [ 8, 278,
'multiple' ],
50 'id' => [ 9, 250,
'asia' ],
51 'bn' => [ 10, 230,
'asia' ],
52 'fr' => [ 11, 200,
'multiple' ],
53 'de' => [ 12, 185,
'europe' ],
54 'ja' => [ 13, 132,
'asia' ],
55 'fa' => [ 14, 107,
'asia' ],
56 'pnb' => [ 15, 104,
'asia' ],
57 'tl' => [ 16, 90,
'asia' ],
58 'mr' => [ 17, 90,
'asia' ],
59 'vi' => [ 18, 80,
'asia' ],
60 'jv' => [ 19, 80,
'asia' ],
61 'te' => [ 20, 80,
'asia' ],
62 'ko' => [ 21, 78,
'asia' ],
63 'wuu' => [ 22, 77,
'asia' ],
64 'arz' => [ 23, 76,
'africa' ],
65 'th' => [ 24, 73,
'asia' ],
66 'yue' => [ 25, 71,
'asia' ],
67 'tr' => [ 26, 70,
'multiple' ],
68 'it' => [ 27, 70,
'europe' ],
69 'ta' => [ 28, 66,
'asia' ],
70 'ur' => [ 29, 60,
'asia' ],
71 'my' => [ 30, 52,
'asia' ],
72 'sw' => [ 31, 50,
'africa' ],
73 'nan' => [ 32, 49,
'asia' ],
74 'kn' => [ 33, 47,
'asia' ],
75 'gu' => [ 34, 46,
'asia' ],
76 'uk' => [ 35, 45,
'europe' ],
77 'pl' => [ 36, 43,
'europe' ],
78 'sd' => [ 37, 41,
'asia' ],
79 'ha' => [ 38, 39,
'africa' ],
80 'ml' => [ 39, 37,
'asia' ],
81 'gan-hans' => [ 40, 35,
'asia' ],
82 'gan-hant' => [ 40, 35,
'asia' ],
83 'hak' => [ 41, 34,
'asia' ],
84 'or' => [ 42, 31,
'asia' ],
85 'ne' => [ 43, 30,
'asia' ],
86 'ro' => [ 44, 28,
'europe' ],
87 'su' => [ 45, 27,
'asia' ],
88 'az' => [ 46, 27,
'asia' ],
89 'nl' => [ 47, 27,
'europe' ],
90 'zu' => [ 48, 26,
'africa' ],
91 'ps' => [ 49, 26,
'asia' ],
92 'ckb' => [ 50, 26,
'asia' ],
93 'ku-latn' => [ 50, 26,
'asia' ],
109 'ext-0-wikimedia' => 50
118 'ext-0-wikimedia' => 25,
129 'bat-smg' =>
'bat-smg',
130 'cbk-zam' =>
'cbk-zam',
131 'map-bms' =>
'map-bms',
132 'nds-nl' =>
'nds-nl',
133 'roa-rup' =>
'roa-rup',
134 'roa-tara' =>
'roa-tara',
137 'be-tarask' =>
'be-x-old',
141 'lzh' =>
'zh-classical',
142 'nan' =>
'zh-min-nan',
182 'zh-classical' =>
'',
193 public function __construct() {
194 parent::__construct();
195 $this->addDescription(
'Script to generate statistics about the localisation ' .
196 'level of one or more message groups.' );
199 '(optional) Comma separated list of groups',
205 '(optional) csv: Comma Separated Values, wiki: MediaWiki syntax, ' .
206 'text: Text with tabs. Default: default',
212 '(optional) Comma separated list of languages to be skipped',
218 '(optional) Skip languages that do not have any localisation at all'
222 '(optional) Page name for legend to be transcluded at the top of the details table',
228 '(optional) Page name for legend to be transcluded at the top of the summary table',
234 '(optional) Add column for fuzzy counts'
238 '(optional) Add column for number of speakers (est.). ' .
239 'Only valid when combined with "most"'
243 '(optional) Do not add localised language name if I18ntags is installed'
247 '(optional) Add a continent column. Only available when output is ' .
248 '"wiki" or not specified.'
252 '(optional) Add a summary with counts and scores per continent category ' .
253 'and totals. Only available for a valid "most" value.',
259 'Only output WMF language code and weighted score for all ' .
260 'language codes for weighing group "wikimedia" in CSV. This ' .
261 'report must keep a stable layout as it is used/will be ' .
262 'used in the Wikimedia statistics.'
266 '(optional) "mediawiki" or "wikimedia". Report on the 50 most ' .
267 'spoken languages. Skipzero is ignored. If a valid scope is ' .
268 'defined, the group list and fuzzy are ignored and the ' .
269 'localisation levels are weighted and reported.',
273 $this->requireExtension(
'Translate' );
276 public function execute() {
277 $output = $this->getOption(
'output',
'default' );
282 $out =
new WikiStatsOutput();
285 $out =
new TextStatsOutput();
288 $out =
new CsvStatsOutput();
295 if ( $this->hasOption(
'skiplanguages' ) ) {
296 $skipLanguages = array_map(
298 explode(
',', $this->getOption(
'skiplanguages' ) )
302 $reportScore =
false;
304 $most = $this->getOption(
'most' );
306 if ( $most && isset( $this->localisedWeights[$most] ) ) {
309 foreach ( $this->localisedWeights[$most] as $weight ) {
310 $weights[] = $weight;
316 if ( ( $output ===
'wiki' || $output ===
'default' ) &&
317 !$this->hasOption(
'nol10n' )
322 $wmfscore = $this->hasOption(
'wmfscore' );
326 if ( $reportScore ) {
327 $reqGroups = array_keys( $this->localisedWeights[$most] );
328 } elseif ( $wmfscore ) {
329 $reqGroups = array_keys( $this->localisedWeights[
'wikimedia'] );
331 $reqGroups = array_map(
'trim', explode(
',', $this->getOption(
'groups' ) ) );
335 $allGroups = MessageGroups::singleton()->getGroups();
338 foreach ( $reqGroups as $id ) {
340 $id = str_replace(
'_',
' ', $id );
341 if ( isset( $allGroups[$id] ) ) {
342 $groups[$id] = $allGroups[$id];
344 $this->output(
"Unknown group: $id" );
350 $out =
new CsvStatsOutput();
354 foreach ( $this->localisedWeights[
'wikimedia'] as $weight ) {
355 $weights[] = $weight;
360 if ( !count( $groups ) ) {
361 $this->fatalError(
'No groups given' );
365 $languages = Utilities::getLanguageNames( LanguageNameUtils::AUTONYMS );
369 if ( $this->hasOption(
'legenddetail' ) ) {
370 $out->addFreeText(
'{{' . $this->getOption(
'legenddetail' ) .
"}}\n" );
374 if ( $reportScore ) {
376 foreach ( $this->localisedWeights[
'wikimedia'] as $weight ) {
377 $totalWeight += $weight;
380 foreach ( $this->localisedWeights[$most] as $weight ) {
381 $totalWeight += $weight;
386 $showContinent = $this->getOption(
'continent' );
394 $out->element( ( $l10n ?
'{{int:translate-gs-pos}}' :
'Pos.' ), true );
397 $out->element( ( $l10n ?
'{{int:translate-gs-code}}' :
'Code' ), true );
398 $out->element( ( $l10n ?
'{{int:translate-page-language}}' :
'Language' ), true );
399 if ( $showContinent ) {
400 $out->element( ( $l10n ?
'{{int:translate-gs-continent}}' :
'Continent' ), true );
403 if ( $most && $this->hasOption(
'speakers' ) ) {
404 $out->element( ( $l10n ?
'{{int:translate-gs-speakers}}' :
'Speakers' ), true );
407 if ( $reportScore ) {
409 ( $l10n ?
'{{int:translate-gs-score}}' :
'Score' ) .
' (' . $totalWeight .
')',
415 foreach ( $groups as $g ) {
417 if ( $reportScore ) {
419 $heading = $g->getLabel() .
' (' . $this->localisedWeights[$most][$gid] .
')';
421 $heading = $g->getLabel();
423 $out->element( $heading,
true );
424 if ( !$reportScore && $this->hasOption(
'fuzzy' ) ) {
425 $out->element( ( $l10n ?
'{{int:translate-percentage-fuzzy}}' :
'Fuzzy' ), true );
433 foreach ( $languages as $code => $name ) {
435 if ( in_array( $code, $skipLanguages ) ) {
441 foreach ( $groups as $groupName => $g ) {
445 foreach ( $languages as $code => $name ) {
447 if ( !$most && in_array( $code, $skipLanguages ) ) {
452 if ( $wmfscore && isset( $this->wikimediaCodeMap[$code] )
453 && $this->wikimediaCodeMap[$code] ===
''
459 if ( $most && !isset( $this->mostSpokenLanguages[$code] ) ) {
467 $rows[$code][] = [
false, $translated, $total ];
469 if ( $this->hasOption(
'fuzzy' ) ) {
470 $rows[$code][] = [
true, $fuzzy, $total ];
477 if ( $this->hasOption(
'summary' ) ) {
482 foreach ( $languages as $code => $name ) {
484 if ( !$most && in_array( $code, $skipLanguages ) ) {
489 if ( $wmfscore && isset( $this->wikimediaCodeMap[$code] )
490 && $this->wikimediaCodeMap[$code] ===
''
496 if ( $most && !isset( $this->mostSpokenLanguages[$code] ) ) {
500 $columns = $rows[$code];
503 foreach ( $columns as $fields ) {
504 if ( (
int)$fields[1] !== 0 ) {
510 if ( $allZero && $this->hasOption(
'skipzero' ) ) {
521 $out->element( $this->mostSpokenLanguages[$code][0] );
527 $out->element( $code );
529 if ( $l10n && function_exists(
'efI18nTagsInit' ) ) {
530 $out->element(
'{{#languagename:' . $code .
'}}' );
532 $out->element( $name );
537 if ( $showContinent ) {
538 if ( $this->mostSpokenLanguages[$code][2] ===
'multiple' ) {
539 $continent = ( $l10n ?
'{{int:translate-gs-multiple}}' :
'Multiple' );
542 '{{int:timezoneregion-' . $this->mostSpokenLanguages[$code][2] .
'}}' :
543 ucfirst( $this->mostSpokenLanguages[$code][2] );
546 $out->element( $continent );
550 if ( $most && $this->hasOption(
'speakers' ) ) {
551 $out->element( number_format( $this->mostSpokenLanguages[$code][1] ) );
555 if ( $reportScore ) {
561 foreach ( $columns as $fields ) {
562 list( , $upper, $total ) = $fields;
564 $score += ( $weights[$i] * $upper ) / $total;
569 $score = number_format( $score, 0 );
572 $continent = $this->mostSpokenLanguages[$code][2];
573 if ( isset( $summary[$continent] ) ) {
574 $newcount = $summary[$continent][0] + 1;
575 $newscore = $summary[$continent][1] + (int)$score;
581 $summary[$continent] = [ $newcount, $newscore ];
588 $wmfcode = $this->wikimediaCodeMap[$code] ?? explode(
'-', $code, 2 )[0];
590 if ( isset( $wmfscores[$wmfcode] ) ) {
591 $count = $wmfscores[$wmfcode][
'count'] + 1;
592 $tmpWmfScore = (int)$wmfscores[$wmfcode][
'score'];
593 $tmpWmfCount = (int)$wmfscores[$wmfcode][
'count'];
594 $score = ( ( $tmpWmfCount * $tmpWmfScore ) + (
int)$score ) / $count;
595 $wmfscores[$wmfcode] = [
'score' => $score,
'count' => $count ];
597 $wmfscores[$wmfcode] = [
'score' => $score,
'count' => 1 ];
600 $out->element( $score );
606 foreach ( $columns as $fields ) {
607 list( $invert, $upper, $total ) = $fields;
608 $c = $out->formatPercent( $upper, $total, $invert );
618 if ( $reportScore && $this->hasOption(
'summary' ) ) {
619 if ( $this->hasOption(
'legendsummary' ) ) {
620 $out->addFreeText(
'{{' . $this->getOption(
'legendsummary' ) .
"}}\n" );
623 $out->summaryheading();
627 $out->element( $l10n ?
'{{int:translate-gs-continent}}' :
'Continent', true );
628 $out->element( $l10n ?
'{{int:translate-gs-count}}' :
'Count', true );
629 $out->element( $l10n ?
'{{int:translate-gs-avgscore}}' :
'Avg. score', true );
637 foreach ( $summary as $key => $values ) {
640 if ( $key ===
'multiple' ) {
641 $out->element( $l10n ?
'{{int:translate-gs-multiple}}' :
'Multiple' );
643 $out->element( $l10n ?
'{{int:timezoneregion-' . $key .
'}}' : ucfirst( $key ) );
645 $out->element( $values[0] );
646 $out->element( number_format( $values[1] / $values[0] ) );
650 $totals[0] += $values[0];
651 $totals[1] += $values[1];
655 $out->element( $l10n ?
'{{int:translate-gs-total}}' :
'Total' );
656 $out->element( $totals[0] );
657 $out->element( number_format( $totals[1] / $totals[0] ) );
667 foreach ( $wmfscores as $code => $stats ) {
668 echo $code .
';' . number_format( $stats[
'score'] ) .
";\n";