41 'en' => [ 1, 1500,
'multiple' ],
42 'zh-hans' => [ 2, 1300,
'asia' ],
43 'zh-hant' => [ 2, 1300,
'asia' ],
44 'hi' => [ 3, 550,
'asia' ],
45 'ar' => [ 4, 530,
'multiple' ],
46 'es' => [ 5, 500,
'multiple' ],
47 'ms' => [ 6, 300,
'asia' ],
48 'pt' => [ 7, 290,
'multiple' ],
49 'pt-br' => [ 7, 290,
'america' ],
50 'ru' => [ 8, 278,
'multiple' ],
51 'id' => [ 9, 250,
'asia' ],
52 'bn' => [ 10, 230,
'asia' ],
53 'fr' => [ 11, 200,
'multiple' ],
54 'de' => [ 12, 185,
'europe' ],
55 'ja' => [ 13, 132,
'asia' ],
56 'fa' => [ 14, 107,
'asia' ],
57 'pnb' => [ 15, 104,
'asia' ],
58 'tl' => [ 16, 90,
'asia' ],
59 'mr' => [ 17, 90,
'asia' ],
60 'vi' => [ 18, 80,
'asia' ],
61 'jv' => [ 19, 80,
'asia' ],
62 'te' => [ 20, 80,
'asia' ],
63 'ko' => [ 21, 78,
'asia' ],
64 'wuu' => [ 22, 77,
'asia' ],
65 'arz' => [ 23, 76,
'africa' ],
66 'th' => [ 24, 73,
'asia' ],
67 'yue' => [ 25, 71,
'asia' ],
68 'tr' => [ 26, 70,
'multiple' ],
69 'it' => [ 27, 70,
'europe' ],
70 'ta' => [ 28, 66,
'asia' ],
71 'ur' => [ 29, 60,
'asia' ],
72 'my' => [ 30, 52,
'asia' ],
73 'sw' => [ 31, 50,
'africa' ],
74 'nan' => [ 32, 49,
'asia' ],
75 'kn' => [ 33, 47,
'asia' ],
76 'gu' => [ 34, 46,
'asia' ],
77 'uk' => [ 35, 45,
'europe' ],
78 'pl' => [ 36, 43,
'europe' ],
79 'sd' => [ 37, 41,
'asia' ],
80 'ha' => [ 38, 39,
'africa' ],
81 'ml' => [ 39, 37,
'asia' ],
82 'gan-hans' => [ 40, 35,
'asia' ],
83 'gan-hant' => [ 40, 35,
'asia' ],
84 'hak' => [ 41, 34,
'asia' ],
85 'or' => [ 42, 31,
'asia' ],
86 'ne' => [ 43, 30,
'asia' ],
87 'ro' => [ 44, 28,
'europe' ],
88 'su' => [ 45, 27,
'asia' ],
89 'az' => [ 46, 27,
'asia' ],
90 'nl' => [ 47, 27,
'europe' ],
91 'zu' => [ 48, 26,
'africa' ],
92 'ps' => [ 49, 26,
'asia' ],
93 'ckb' => [ 50, 26,
'asia' ],
94 'ku-latn' => [ 50, 26,
'asia' ],
110 'ext-0-wikimedia' => 50
119 'ext-0-wikimedia' => 25,
130 'bat-smg' =>
'bat-smg',
131 'cbk-zam' =>
'cbk-zam',
132 'map-bms' =>
'map-bms',
133 'nds-nl' =>
'nds-nl',
134 'roa-rup' =>
'roa-rup',
135 'roa-tara' =>
'roa-tara',
138 'be-tarask' =>
'be-x-old',
142 'lzh' =>
'zh-classical',
143 'nan' =>
'zh-min-nan',
183 'zh-classical' =>
'',
194 public function __construct() {
195 parent::__construct();
196 $this->addDescription(
'Script to generate statistics about the localisation ' .
197 'level of one or more message groups.' );
200 '(optional) Comma separated list of groups',
206 '(optional) csv: Comma Separated Values, wiki: MediaWiki syntax, ' .
207 'text: Text with tabs. Default: default',
213 '(optional) Comma separated list of languages to be skipped',
219 '(optional) Skip languages that do not have any localisation at all'
223 '(optional) Page name for legend to be transcluded at the top of the details table',
229 '(optional) Page name for legend to be transcluded at the top of the summary table',
235 '(optional) Add column for fuzzy counts'
239 '(optional) Add column for number of speakers (est.). ' .
240 'Only valid when combined with "most"'
244 '(optional) Do not add localised language name if I18ntags is installed'
248 '(optional) Add a continent column. Only available when output is ' .
249 '"wiki" or not specified.'
253 '(optional) Add a summary with counts and scores per continent category ' .
254 'and totals. Only available for a valid "most" value.',
260 'Only output WMF language code and weighted score for all ' .
261 'language codes for weighing group "wikimedia" in CSV. This ' .
262 'report must keep a stable layout as it is used/will be ' .
263 'used in the Wikimedia statistics.'
267 '(optional) "mediawiki" or "wikimedia". Report on the 50 most ' .
268 'spoken languages. Skipzero is ignored. If a valid scope is ' .
269 'defined, the group list and fuzzy are ignored and the ' .
270 'localisation levels are weighted and reported.',
274 $this->requireExtension(
'Translate' );
277 public function execute() {
278 $output = $this->getOption(
'output',
'default' );
283 $out =
new WikiStatsOutput();
286 $out =
new TextStatsOutput();
289 $out =
new CsvStatsOutput();
296 if ( $this->hasOption(
'skiplanguages' ) ) {
297 $skipLanguages = array_map(
299 explode(
',', $this->getOption(
'skiplanguages' ) )
303 $reportScore =
false;
305 $most = $this->getOption(
'most' );
307 if ( $most && isset( $this->localisedWeights[$most] ) ) {
310 foreach ( $this->localisedWeights[$most] as $weight ) {
311 $weights[] = $weight;
317 if ( ( $output ===
'wiki' || $output ===
'default' ) &&
318 !$this->hasOption(
'nol10n' )
323 $wmfscore = $this->hasOption(
'wmfscore' );
327 if ( $reportScore ) {
328 $reqGroups = array_keys( $this->localisedWeights[$most] );
329 } elseif ( $wmfscore ) {
330 $reqGroups = array_keys( $this->localisedWeights[
'wikimedia'] );
332 $reqGroups = array_map(
'trim', explode(
',', $this->getOption(
'groups' ) ) );
336 $allGroups = MessageGroups::singleton()->getGroups();
339 foreach ( $reqGroups as $id ) {
341 $id = str_replace(
'_',
' ', $id );
342 if ( isset( $allGroups[$id] ) ) {
343 $groups[$id] = $allGroups[$id];
345 $this->output(
"Unknown group: $id" );
351 $out =
new CsvStatsOutput();
355 foreach ( $this->localisedWeights[
'wikimedia'] as $weight ) {
356 $weights[] = $weight;
361 if ( !count( $groups ) ) {
362 $this->fatalError(
'No groups given' );
366 $languages = Utilities::getLanguageNames( LanguageNameUtils::AUTONYMS );
370 if ( $this->hasOption(
'legenddetail' ) ) {
371 $out->addFreeText(
'{{' . $this->getOption(
'legenddetail' ) .
"}}\n" );
375 if ( $reportScore ) {
377 foreach ( $this->localisedWeights[
'wikimedia'] as $weight ) {
378 $totalWeight += $weight;
381 foreach ( $this->localisedWeights[$most] as $weight ) {
382 $totalWeight += $weight;
387 $showContinent = $this->getOption(
'continent' );
395 $out->element( ( $l10n ?
'{{int:translate-gs-pos}}' :
'Pos.' ), true );
398 $out->element( ( $l10n ?
'{{int:translate-gs-code}}' :
'Code' ), true );
399 $out->element( ( $l10n ?
'{{int:translate-page-language}}' :
'Language' ), true );
400 if ( $showContinent ) {
401 $out->element( ( $l10n ?
'{{int:translate-gs-continent}}' :
'Continent' ), true );
404 if ( $most && $this->hasOption(
'speakers' ) ) {
405 $out->element( ( $l10n ?
'{{int:translate-gs-speakers}}' :
'Speakers' ), true );
408 if ( $reportScore ) {
410 ( $l10n ?
'{{int:translate-gs-score}}' :
'Score' ) .
' (' . $totalWeight .
')',
416 foreach ( $groups as $g ) {
418 if ( $reportScore ) {
420 $heading = $g->getLabel() .
' (' . $this->localisedWeights[$most][$gid] .
')';
422 $heading = $g->getLabel();
424 $out->element( $heading,
true );
425 if ( !$reportScore && $this->hasOption(
'fuzzy' ) ) {
426 $out->element( ( $l10n ?
'{{int:translate-percentage-fuzzy}}' :
'Fuzzy' ), true );
434 foreach ( $languages as $code => $name ) {
436 if ( in_array( $code, $skipLanguages ) ) {
442 foreach ( $groups as $groupName => $g ) {
443 $stats = MessageGroupStats::forGroup( $groupName );
446 foreach ( $languages as $code => $name ) {
448 if ( !$most && in_array( $code, $skipLanguages ) ) {
453 if ( $wmfscore && isset( $this->wikimediaCodeMap[$code] )
454 && $this->wikimediaCodeMap[$code] ===
''
460 if ( $most && !isset( $this->mostSpokenLanguages[$code] ) ) {
464 $total = $stats[$code][MessageGroupStats::TOTAL];
465 $translated = $stats[$code][MessageGroupStats::TRANSLATED];
466 $fuzzy = $stats[$code][MessageGroupStats::FUZZY];
468 $rows[$code][] = [
false, $translated, $total ];
470 if ( $this->hasOption(
'fuzzy' ) ) {
471 $rows[$code][] = [
true, $fuzzy, $total ];
478 if ( $this->hasOption(
'summary' ) ) {
483 foreach ( $languages as $code => $name ) {
485 if ( !$most && in_array( $code, $skipLanguages ) ) {
490 if ( $wmfscore && isset( $this->wikimediaCodeMap[$code] )
491 && $this->wikimediaCodeMap[$code] ===
''
497 if ( $most && !isset( $this->mostSpokenLanguages[$code] ) ) {
501 $columns = $rows[$code];
504 foreach ( $columns as $fields ) {
505 if ( (
int)$fields[1] !== 0 ) {
511 if ( $allZero && $this->hasOption(
'skipzero' ) ) {
522 $out->element( $this->mostSpokenLanguages[$code][0] );
528 $out->element( $code );
530 if ( $l10n && function_exists(
'efI18nTagsInit' ) ) {
531 $out->element(
'{{#languagename:' . $code .
'}}' );
533 $out->element( $name );
538 if ( $showContinent ) {
539 if ( $this->mostSpokenLanguages[$code][2] ===
'multiple' ) {
540 $continent = ( $l10n ?
'{{int:translate-gs-multiple}}' :
'Multiple' );
543 '{{int:timezoneregion-' . $this->mostSpokenLanguages[$code][2] .
'}}' :
544 ucfirst( $this->mostSpokenLanguages[$code][2] );
547 $out->element( $continent );
551 if ( $most && $this->hasOption(
'speakers' ) ) {
552 $out->element( number_format( $this->mostSpokenLanguages[$code][1] ) );
556 if ( $reportScore ) {
562 foreach ( $columns as $fields ) {
563 [ , $upper, $total ] = $fields;
565 $score += ( $weights[$i] * $upper ) / $total;
570 $score = number_format( $score, 0 );
573 $continent = $this->mostSpokenLanguages[$code][2];
574 if ( isset( $summary[$continent] ) ) {
575 $newcount = $summary[$continent][0] + 1;
576 $newscore = $summary[$continent][1] + (int)$score;
582 $summary[$continent] = [ $newcount, $newscore ];
589 $wmfcode = $this->wikimediaCodeMap[$code] ?? explode(
'-', $code, 2 )[0];
591 if ( isset( $wmfscores[$wmfcode] ) ) {
592 $count = $wmfscores[$wmfcode][
'count'] + 1;
593 $tmpWmfScore = (int)$wmfscores[$wmfcode][
'score'];
594 $tmpWmfCount = (int)$wmfscores[$wmfcode][
'count'];
595 $score = ( ( $tmpWmfCount * $tmpWmfScore ) + (
int)$score ) / $count;
596 $wmfscores[$wmfcode] = [
'score' => $score,
'count' => $count ];
598 $wmfscores[$wmfcode] = [
'score' => $score,
'count' => 1 ];
601 $out->element( $score );
607 foreach ( $columns as $fields ) {
608 [ $invert, $upper, $total ] = $fields;
609 $c = $out->formatPercent( $upper, $total, $invert );
619 if ( $reportScore && $this->hasOption(
'summary' ) ) {
620 if ( $this->hasOption(
'legendsummary' ) ) {
621 $out->addFreeText(
'{{' . $this->getOption(
'legendsummary' ) .
"}}\n" );
624 $out->summaryheading();
628 $out->element( $l10n ?
'{{int:translate-gs-continent}}' :
'Continent', true );
629 $out->element( $l10n ?
'{{int:translate-gs-count}}' :
'Count', true );
630 $out->element( $l10n ?
'{{int:translate-gs-avgscore}}' :
'Avg. score', true );
638 foreach ( $summary as $key => $values ) {
641 if ( $key ===
'multiple' ) {
642 $out->element( $l10n ?
'{{int:translate-gs-multiple}}' :
'Multiple' );
644 $out->element( $l10n ?
'{{int:timezoneregion-' . $key .
'}}' : ucfirst( $key ) );
646 $out->element( $values[0] );
647 $out->element( number_format( $values[1] / $values[0] ) );
651 $totals[0] += $values[0];
652 $totals[1] += $values[1];
656 $out->element( $l10n ?
'{{int:translate-gs-total}}' :
'Total' );
657 $out->element( $totals[0] );
658 $out->element( number_format( $totals[1] / $totals[0] ) );
668 foreach ( $wmfscores as $code => $stats ) {
669 echo $code .
';' . number_format( $stats[
'score'] ) .
";\n";