42 public $mostSpokenLanguages = [
43 'en' => [ 1, 1500,
'multiple' ],
44 'zh-hans' => [ 2, 1300,
'asia' ],
45 'zh-hant' => [ 2, 1300,
'asia' ],
46 'hi' => [ 3, 550,
'asia' ],
47 'ar' => [ 4, 530,
'multiple' ],
48 'es' => [ 5, 500,
'multiple' ],
49 'ms' => [ 6, 300,
'asia' ],
50 'pt' => [ 7, 290,
'multiple' ],
51 'pt-br' => [ 7, 290,
'america' ],
52 'ru' => [ 8, 278,
'multiple' ],
53 'id' => [ 9, 250,
'asia' ],
54 'bn' => [ 10, 230,
'asia' ],
55 'fr' => [ 11, 200,
'multiple' ],
56 'de' => [ 12, 185,
'europe' ],
57 'ja' => [ 13, 132,
'asia' ],
58 'fa' => [ 14, 107,
'asia' ],
59 'pnb' => [ 15, 104,
'asia' ],
60 'tl' => [ 16, 90,
'asia' ],
61 'mr' => [ 17, 90,
'asia' ],
62 'vi' => [ 18, 80,
'asia' ],
63 'jv' => [ 19, 80,
'asia' ],
64 'te' => [ 20, 80,
'asia' ],
65 'ko' => [ 21, 78,
'asia' ],
66 'wuu' => [ 22, 77,
'asia' ],
67 'arz' => [ 23, 76,
'africa' ],
68 'th' => [ 24, 73,
'asia' ],
69 'yue' => [ 25, 71,
'asia' ],
70 'tr' => [ 26, 70,
'multiple' ],
71 'it' => [ 27, 70,
'europe' ],
72 'ta' => [ 28, 66,
'asia' ],
73 'ur' => [ 29, 60,
'asia' ],
74 'my' => [ 30, 52,
'asia' ],
75 'sw' => [ 31, 50,
'africa' ],
76 'nan' => [ 32, 49,
'asia' ],
77 'kn' => [ 33, 47,
'asia' ],
78 'gu' => [ 34, 46,
'asia' ],
79 'uk' => [ 35, 45,
'europe' ],
80 'pl' => [ 36, 43,
'europe' ],
81 'sd' => [ 37, 41,
'asia' ],
82 'ha' => [ 38, 39,
'africa' ],
83 'ml' => [ 39, 37,
'asia' ],
84 'gan-hans' => [ 40, 35,
'asia' ],
85 'gan-hant' => [ 40, 35,
'asia' ],
86 'hak' => [ 41, 34,
'asia' ],
87 'or' => [ 42, 31,
'asia' ],
88 'ne' => [ 43, 30,
'asia' ],
89 'ro' => [ 44, 28,
'europe' ],
90 'su' => [ 45, 27,
'asia' ],
91 'az' => [ 46, 27,
'asia' ],
92 'nl' => [ 47, 27,
'europe' ],
93 'zu' => [ 48, 26,
'africa' ],
94 'ps' => [ 49, 26,
'asia' ],
95 'ckb' => [ 50, 26,
'asia' ],
96 'ku-latn' => [ 50, 26,
'asia' ],
109 public $localisedWeights = [
113 'ext-0-wikimedia' => 50
122 'ext-0-wikimedia' => 25,
132 public $wikimediaCodeMap = [
134 'bat-smg' =>
'bat-smg',
135 'cbk-zam' =>
'cbk-zam',
136 'map-bms' =>
'map-bms',
137 'nds-nl' =>
'nds-nl',
138 'roa-rup' =>
'roa-rup',
139 'roa-tara' =>
'roa-tara',
142 'be-tarask' =>
'be-x-old',
146 'lzh' =>
'zh-classical',
147 'nan' =>
'zh-min-nan',
187 'zh-classical' =>
'',
198 public function __construct() {
199 parent::__construct();
200 $this->addDescription(
'Script to generate statistics about the localisation ' .
201 'level of one or more message groups.' );
204 '(optional) Comma separated list of groups',
210 '(optional) csv: Comma Separated Values, wiki: MediaWiki syntax, ' .
211 'text: Text with tabs. Default: default',
217 '(optional) Comma separated list of languages to be skipped',
223 '(optional) Skip languages that do not have any localisation at all'
227 '(optional) Page name for legend to be transcluded at the top of the details table',
233 '(optional) Page name for legend to be transcluded at the top of the summary table',
239 '(optional) Add column for fuzzy counts'
243 '(optional) Add column for number of speakers (est.). ' .
244 'Only valid when combined with "most"'
248 '(optional) Do not add localised language name if I18ntags is installed'
252 '(optional) Add a continent column. Only available when output is ' .
253 '"wiki" or not specified.'
257 '(optional) Add a summary with counts and scores per continent category ' .
258 'and totals. Only available for a valid "most" value.',
264 'Only output WMF language code and weighted score for all ' .
265 'language codes for weighing group "wikimedia" in CSV. This ' .
266 'report must keep a stable layout as it is used/will be ' .
267 'used in the Wikimedia statistics.'
271 '(optional) "mediawiki" or "wikimedia". Report on the 50 most ' .
272 'spoken languages. Skipzero is ignored. If a valid scope is ' .
273 'defined, the group list and fuzzy are ignored and the ' .
274 'localisation levels are weighted and reported.',
278 $this->requireExtension(
'Translate' );
281 public function execute() {
282 $output = $this->getOption(
'output',
'default' );
287 $out =
new WikiStatsOutput();
290 $out =
new TextStatsOutput();
293 $out =
new CsvStatsOutput();
300 if ( $this->hasOption(
'skiplanguages' ) ) {
301 $skipLanguages = array_map(
303 explode(
',', $this->getOption(
'skiplanguages' ) )
307 $reportScore =
false;
309 $most = $this->getOption(
'most' );
311 if ( $most && isset( $this->localisedWeights[$most] ) ) {
314 foreach ( $this->localisedWeights[$most] as $weight ) {
315 $weights[] = $weight;
321 if ( ( $output ===
'wiki' || $output ===
'default' ) &&
322 !$this->hasOption(
'nol10n' )
327 $wmfscore = $this->hasOption(
'wmfscore' );
331 if ( $reportScore ) {
332 $reqGroups = array_keys( $this->localisedWeights[$most] );
333 } elseif ( $wmfscore ) {
334 $reqGroups = array_keys( $this->localisedWeights[
'wikimedia'] );
336 $reqGroups = array_map(
'trim', explode(
',', $this->getOption(
'groups' ) ) );
340 $allGroups = MessageGroups::singleton()->getGroups();
343 foreach ( $reqGroups as $id ) {
345 $id = str_replace(
'_',
' ', $id );
346 if ( isset( $allGroups[$id] ) ) {
347 $groups[$id] = $allGroups[$id];
349 $this->output(
"Unknown group: $id" );
355 $out =
new CsvStatsOutput();
359 foreach ( $this->localisedWeights[
'wikimedia'] as $weight ) {
360 $weights[] = $weight;
365 if ( !count( $groups ) ) {
366 $this->fatalError(
'No groups given' );
370 $languages = Utilities::getLanguageNames( LanguageNameUtils::AUTONYMS );
374 if ( $this->hasOption(
'legenddetail' ) ) {
375 $out->addFreeText(
'{{' . $this->getOption(
'legenddetail' ) .
"}}\n" );
379 if ( $reportScore ) {
381 foreach ( $this->localisedWeights[
'wikimedia'] as $weight ) {
382 $totalWeight += $weight;
385 foreach ( $this->localisedWeights[$most] as $weight ) {
386 $totalWeight += $weight;
391 $showContinent = $this->getOption(
'continent' );
399 $out->element( ( $l10n ?
'{{int:translate-gs-pos}}' :
'Pos.' ),
true );
402 $out->element( ( $l10n ?
'{{int:translate-gs-code}}' :
'Code' ),
true );
403 $out->element( ( $l10n ?
'{{int:translate-page-language}}' :
'Language' ),
true );
404 if ( $showContinent ) {
405 $out->element( ( $l10n ?
'{{int:translate-gs-continent}}' :
'Continent' ),
true );
408 if ( $most && $this->hasOption(
'speakers' ) ) {
409 $out->element( ( $l10n ?
'{{int:translate-gs-speakers}}' :
'Speakers' ),
true );
412 if ( $reportScore ) {
414 ( $l10n ?
'{{int:translate-gs-score}}' :
'Score' ) .
' (' . $totalWeight .
')',
420 foreach ( $groups as $g ) {
422 if ( $reportScore ) {
424 $heading = $g->getLabel() .
' (' . $this->localisedWeights[$most][$gid] .
')';
426 $heading = $g->getLabel();
428 $out->element( $heading,
true );
429 if ( !$reportScore && $this->hasOption(
'fuzzy' ) ) {
430 $out->element( ( $l10n ?
'{{int:translate-percentage-fuzzy}}' :
'Fuzzy' ),
true );
438 foreach ( $languages as $code => $name ) {
440 if ( in_array( $code, $skipLanguages ) ) {
446 foreach ( $groups as $groupName => $g ) {
447 $stats = MessageGroupStats::forGroup( $groupName );
450 foreach ( $languages as $code => $name ) {
452 if ( !$most && in_array( $code, $skipLanguages ) ) {
457 if ( $wmfscore && isset( $this->wikimediaCodeMap[$code] )
458 && $this->wikimediaCodeMap[$code] ===
''
464 if ( $most && !isset( $this->mostSpokenLanguages[$code] ) ) {
468 $total = $stats[$code][MessageGroupStats::TOTAL];
469 $translated = $stats[$code][MessageGroupStats::TRANSLATED];
470 $fuzzy = $stats[$code][MessageGroupStats::FUZZY];
472 $rows[$code][] = [
false, $translated, $total ];
474 if ( $this->hasOption(
'fuzzy' ) ) {
475 $rows[$code][] = [
true, $fuzzy, $total ];
482 if ( $this->hasOption(
'summary' ) ) {
487 foreach ( $languages as $code => $name ) {
489 if ( !$most && in_array( $code, $skipLanguages ) ) {
494 if ( $wmfscore && isset( $this->wikimediaCodeMap[$code] )
495 && $this->wikimediaCodeMap[$code] ===
''
501 if ( $most && !isset( $this->mostSpokenLanguages[$code] ) ) {
505 $columns = $rows[$code];
508 foreach ( $columns as $fields ) {
509 if ( (
int)$fields[1] !== 0 ) {
515 if ( $allZero && $this->hasOption(
'skipzero' ) ) {
526 $out->element( $this->mostSpokenLanguages[$code][0] );
532 $out->element( $code );
534 if ( $l10n && function_exists(
'efI18nTagsInit' ) ) {
535 $out->element(
'{{#languagename:' . $code .
'}}' );
537 $out->element( $name );
542 if ( $showContinent ) {
543 if ( $this->mostSpokenLanguages[$code][2] ===
'multiple' ) {
544 $continent = ( $l10n ?
'{{int:translate-gs-multiple}}' :
'Multiple' );
547 '{{int:timezoneregion-' . $this->mostSpokenLanguages[$code][2] .
'}}' :
548 ucfirst( $this->mostSpokenLanguages[$code][2] );
551 $out->element( $continent );
555 if ( $most && $this->hasOption(
'speakers' ) ) {
556 $out->element( number_format( $this->mostSpokenLanguages[$code][1] ) );
560 if ( $reportScore ) {
566 foreach ( $columns as $fields ) {
567 [ , $upper, $total ] = $fields;
569 $score += ( $weights[$i] * $upper ) / $total;
574 $score = number_format( $score, 0 );
577 $continent = $this->mostSpokenLanguages[$code][2];
578 if ( isset( $summary[$continent] ) ) {
579 $newcount = $summary[$continent][0] + 1;
580 $newscore = $summary[$continent][1] + (int)$score;
586 $summary[$continent] = [ $newcount, $newscore ];
593 $wmfcode = $this->wikimediaCodeMap[$code] ?? explode(
'-', $code, 2 )[0];
595 if ( isset( $wmfscores[$wmfcode] ) ) {
596 $count = $wmfscores[$wmfcode][
'count'] + 1;
597 $tmpWmfScore = (int)$wmfscores[$wmfcode][
'score'];
598 $tmpWmfCount = (int)$wmfscores[$wmfcode][
'count'];
599 $score = ( ( $tmpWmfCount * $tmpWmfScore ) + (
int)$score ) / $count;
600 $wmfscores[$wmfcode] = [
'score' => $score,
'count' => $count ];
602 $wmfscores[$wmfcode] = [
'score' => $score,
'count' => 1 ];
605 $out->element( $score );
611 foreach ( $columns as $fields ) {
612 [ $invert, $upper, $total ] = $fields;
613 $c = $out->formatPercent( $upper, $total, $invert );
623 if ( $reportScore && $this->hasOption(
'summary' ) ) {
624 if ( $this->hasOption(
'legendsummary' ) ) {
625 $out->addFreeText(
'{{' . $this->getOption(
'legendsummary' ) .
"}}\n" );
628 $out->summaryheading();
632 $out->element( $l10n ?
'{{int:translate-gs-continent}}' :
'Continent', true );
633 $out->element( $l10n ?
'{{int:translate-gs-count}}' :
'Count', true );
634 $out->element( $l10n ?
'{{int:translate-gs-avgscore}}' :
'Avg. score', true );
642 foreach ( $summary as $key => $values ) {
645 if ( $key ===
'multiple' ) {
646 $out->element( $l10n ?
'{{int:translate-gs-multiple}}' :
'Multiple' );
648 $out->element( $l10n ?
'{{int:timezoneregion-' . $key .
'}}' : ucfirst( $key ) );
650 $out->element( $values[0] );
651 $out->element( number_format( $values[1] / $values[0] ) );
655 $totals[0] += $values[0];
656 $totals[1] += $values[1];
660 $out->element( $l10n ?
'{{int:translate-gs-total}}' :
'Total' );
661 $out->element( $totals[0] );
662 $out->element( number_format( $totals[1] / $totals[0] ) );
672 foreach ( $wmfscores as $code => $stats ) {
673 echo $code .
';' . number_format( $stats[
'score'] ) .
";\n";