36 'en' => [ 1, 1500,
'multiple' ],
37 'zh-hans' => [ 2, 1300,
'asia' ],
38 'zh-hant' => [ 2, 1300,
'asia' ],
39 'hi' => [ 3, 550,
'asia' ],
40 'ar' => [ 4, 530,
'multiple' ],
41 'es' => [ 5, 500,
'multiple' ],
42 'ms' => [ 6, 300,
'asia' ],
43 'pt' => [ 7, 290,
'multiple' ],
44 'pt-br' => [ 7, 290,
'america' ],
45 'ru' => [ 8, 278,
'multiple' ],
46 'id' => [ 9, 250,
'asia' ],
47 'bn' => [ 10, 230,
'asia' ],
48 'fr' => [ 11, 200,
'multiple' ],
49 'de' => [ 12, 185,
'europe' ],
50 'ja' => [ 13, 132,
'asia' ],
51 'fa' => [ 14, 107,
'asia' ],
52 'pnb' => [ 15, 104,
'asia' ],
53 'tl' => [ 16, 90,
'asia' ],
54 'mr' => [ 17, 90,
'asia' ],
55 'vi' => [ 18, 80,
'asia' ],
56 'jv' => [ 19, 80,
'asia' ],
57 'te' => [ 20, 80,
'asia' ],
58 'ko' => [ 21, 78,
'asia' ],
59 'wuu' => [ 22, 77,
'asia' ],
60 'arz' => [ 23, 76,
'africa' ],
61 'th' => [ 24, 73,
'asia' ],
62 'yue' => [ 25, 71,
'asia' ],
63 'tr' => [ 26, 70,
'multiple' ],
64 'it' => [ 27, 70,
'europe' ],
65 'ta' => [ 28, 66,
'asia' ],
66 'ur' => [ 29, 60,
'asia' ],
67 'my' => [ 30, 52,
'asia' ],
68 'sw' => [ 31, 50,
'africa' ],
69 'nan' => [ 32, 49,
'asia' ],
70 'kn' => [ 33, 47,
'asia' ],
71 'gu' => [ 34, 46,
'asia' ],
72 'uk' => [ 35, 45,
'europe' ],
73 'pl' => [ 36, 43,
'europe' ],
74 'sd' => [ 37, 41,
'asia' ],
75 'ha' => [ 38, 39,
'africa' ],
76 'ml' => [ 39, 37,
'asia' ],
77 'gan-hans' => [ 40, 35,
'asia' ],
78 'gan-hant' => [ 40, 35,
'asia' ],
79 'hak' => [ 41, 34,
'asia' ],
80 'or' => [ 42, 31,
'asia' ],
81 'ne' => [ 43, 30,
'asia' ],
82 'ro' => [ 44, 28,
'europe' ],
83 'su' => [ 45, 27,
'asia' ],
84 'az' => [ 46, 27,
'asia' ],
85 'nl' => [ 47, 27,
'europe' ],
86 'zu' => [ 48, 26,
'africa' ],
87 'ps' => [ 49, 26,
'asia' ],
88 'ckb' => [ 50, 26,
'asia' ],
89 'ku-latn' => [ 50, 26,
'asia' ],
105 'ext-0-wikimedia' => 50
114 'ext-0-wikimedia' => 25,
125 'bat-smg' =>
'bat-smg',
126 'cbk-zam' =>
'cbk-zam',
127 'map-bms' =>
'map-bms',
128 'nds-nl' =>
'nds-nl',
129 'roa-rup' =>
'roa-rup',
130 'roa-tara' =>
'roa-tara',
133 'be-tarask' =>
'be-x-old',
137 'lzh' =>
'zh-classical',
138 'nan' =>
'zh-min-nan',
178 'zh-classical' =>
'',
189 public function __construct() {
190 parent::__construct();
191 $this->addDescription(
'Script to generate statistics about the localisation ' .
192 'level of one or more message groups.' );
195 '(optional) Comma separated list of groups',
201 '(optional) csv: Comma Separated Values, wiki: MediaWiki syntax, ' .
202 'text: Text with tabs. Default: default',
208 '(optional) Comma separated list of languages to be skipped',
214 '(optional) Skip languages that do not have any localisation at all'
218 '(optional) Page name for legend to be transcluded at the top of the details table',
224 '(optional) Page name for legend to be transcluded at the top of the summary table',
230 '(optional) Add column for fuzzy counts'
234 '(optional) Add column for number of speakers (est.). ' .
235 'Only valid when combined with "most"'
239 '(optional) Do not add localised language name if I18ntags is installed'
243 '(optional) Add a continent column. Only available when output is ' .
244 '"wiki" or not specified.'
248 '(optional) Add a summary with counts and scores per continent category ' .
249 'and totals. Only available for a valid "most" value.',
255 'Only output WMF language code and weighted score for all ' .
256 'language codes for weighing group "wikimedia" in CSV. This ' .
257 'report must keep a stable layout as it is used/will be ' .
258 'used in the Wikimedia statistics.'
262 '(optional) "mediawiki" or "wikimedia". Report on the 50 most ' .
263 'spoken languages. Skipzero is ignored. If a valid scope is ' .
264 'defined, the group list and fuzzy are ignored and the ' .
265 'localisation levels are weighted and reported.',
269 $this->requireExtension(
'Translate' );
272 public function execute() {
273 $output = $this->getOption(
'output',
'default' );
278 $out =
new WikiStatsOutput();
281 $out =
new TextStatsOutput();
284 $out =
new CsvStatsOutput();
291 if ( $this->hasOption(
'skiplanguages' ) ) {
292 $skipLanguages = array_map(
294 explode(
',', $this->getOption(
'skiplanguages' ) )
298 $reportScore =
false;
300 $most = $this->getOption(
'most' );
302 if ( $most && isset( $this->localisedWeights[$most] ) ) {
305 foreach ( $this->localisedWeights[$most] as $weight ) {
306 $weights[] = $weight;
312 if ( ( $output ===
'wiki' || $output ===
'default' ) &&
313 !$this->hasOption(
'nol10n' )
318 $wmfscore = $this->hasOption(
'wmfscore' );
322 if ( $reportScore ) {
323 $reqGroups = array_keys( $this->localisedWeights[$most] );
324 } elseif ( $wmfscore ) {
325 $reqGroups = array_keys( $this->localisedWeights[
'wikimedia'] );
327 $reqGroups = array_map(
'trim', explode(
',', $this->getOption(
'groups' ) ) );
334 foreach ( $reqGroups as $id ) {
336 $id = str_replace(
'_',
' ', $id );
337 if ( isset( $allGroups[$id] ) ) {
338 $groups[$id] = $allGroups[$id];
340 $this->output(
"Unknown group: $id" );
346 $out =
new CsvStatsOutput();
350 foreach ( $this->localisedWeights[
'wikimedia'] as $weight ) {
351 $weights[] = $weight;
356 if ( !count( $groups ) ) {
357 $this->fatalError(
'No groups given' );
365 if ( $this->hasOption(
'legenddetail' ) ) {
366 $out->addFreeText(
'{{' . $this->getOption(
'legenddetail' ) .
"}}\n" );
370 if ( $reportScore ) {
372 foreach ( $this->localisedWeights[
'wikimedia'] as $weight ) {
373 $totalWeight += $weight;
376 foreach ( $this->localisedWeights[$most] as $weight ) {
377 $totalWeight += $weight;
382 $showContinent = $this->getOption(
'continent' );
390 $out->element( ( $l10n ?
'{{int:translate-gs-pos}}' :
'Pos.' ), true );
393 $out->element( ( $l10n ?
'{{int:translate-gs-code}}' :
'Code' ), true );
394 $out->element( ( $l10n ?
'{{int:translate-page-language}}' :
'Language' ), true );
395 if ( $showContinent ) {
396 $out->element( ( $l10n ?
'{{int:translate-gs-continent}}' :
'Continent' ), true );
399 if ( $most && $this->hasOption(
'speakers' ) ) {
400 $out->element( ( $l10n ?
'{{int:translate-gs-speakers}}' :
'Speakers' ), true );
403 if ( $reportScore ) {
405 ( $l10n ?
'{{int:translate-gs-score}}' :
'Score' ) .
' (' . $totalWeight .
')',
411 foreach ( $groups as $g ) {
413 if ( $reportScore ) {
415 $heading = $g->getLabel() .
' (' . $this->localisedWeights[$most][$gid] .
')';
417 $heading = $g->getLabel();
419 $out->element( $heading,
true );
420 if ( !$reportScore && $this->hasOption(
'fuzzy' ) ) {
421 $out->element( ( $l10n ?
'{{int:translate-percentage-fuzzy}}' :
'Fuzzy' ), true );
429 foreach ( $languages as $code => $name ) {
431 if ( in_array( $code, $skipLanguages ) ) {
437 foreach ( $groups as $groupName => $g ) {
441 foreach ( $languages as $code => $name ) {
443 if ( !$most && in_array( $code, $skipLanguages ) ) {
448 if ( $wmfscore && isset( $this->wikimediaCodeMap[$code] )
449 && $this->wikimediaCodeMap[$code] ===
''
455 if ( $most && !isset( $this->mostSpokenLanguages[$code] ) ) {
463 $rows[$code][] = [
false, $translated, $total ];
465 if ( $this->hasOption(
'fuzzy' ) ) {
466 $rows[$code][] = [
true, $fuzzy, $total ];
473 if ( $this->hasOption(
'summary' ) ) {
478 foreach ( $languages as $code => $name ) {
480 if ( !$most && in_array( $code, $skipLanguages ) ) {
485 if ( $wmfscore && isset( $this->wikimediaCodeMap[$code] )
486 && $this->wikimediaCodeMap[$code] ===
''
492 if ( $most && !isset( $this->mostSpokenLanguages[$code] ) ) {
496 $columns = $rows[$code];
499 foreach ( $columns as $fields ) {
500 if ( (
int)$fields[1] !== 0 ) {
506 if ( $allZero && $this->hasOption(
'skipzero' ) ) {
517 $out->element( $this->mostSpokenLanguages[$code][0] );
523 $out->element( $code );
525 if ( $l10n && function_exists(
'efI18nTagsInit' ) ) {
526 $out->element(
'{{#languagename:' . $code .
'}}' );
528 $out->element( $name );
533 if ( $showContinent ) {
534 if ( $this->mostSpokenLanguages[$code][2] ===
'multiple' ) {
535 $continent = ( $l10n ?
'{{int:translate-gs-multiple}}' :
'Multiple' );
538 '{{int:timezoneregion-' . $this->mostSpokenLanguages[$code][2] .
'}}' :
539 ucfirst( $this->mostSpokenLanguages[$code][2] );
542 $out->element( $continent );
546 if ( $most && $this->hasOption(
'speakers' ) ) {
547 $out->element( number_format( $this->mostSpokenLanguages[$code][1] ) );
551 if ( $reportScore ) {
557 foreach ( $columns as $fields ) {
558 list( , $upper, $total ) = $fields;
560 $score += ( $weights[$i] * $upper ) / $total;
565 $score = number_format( $score, 0 );
568 $continent = $this->mostSpokenLanguages[$code][2];
569 if ( isset( $summary[$continent] ) ) {
570 $newcount = $summary[$continent][0] + 1;
571 $newscore = $summary[$continent][1] + (int)$score;
577 $summary[$continent] = [ $newcount, $newscore ];
584 $wmfcode = $this->wikimediaCodeMap[$code] ?? explode(
'-', $code, 2 )[0];
586 if ( isset( $wmfscores[$wmfcode] ) ) {
587 $count = $wmfscores[$wmfcode][
'count'] + 1;
588 $tmpWmfScore = (int)$wmfscores[$wmfcode][
'score'];
589 $tmpWmfCount = (int)$wmfscores[$wmfcode][
'count'];
590 $score = ( ( $tmpWmfCount * $tmpWmfScore ) + (
int)$score ) / $count;
591 $wmfscores[$wmfcode] = [
'score' => $score,
'count' => $count ];
593 $wmfscores[$wmfcode] = [
'score' => $score,
'count' => 1 ];
596 $out->element( $score );
602 foreach ( $columns as $fields ) {
603 list( $invert, $upper, $total ) = $fields;
604 $c = $out->formatPercent( $upper, $total, $invert );
614 if ( $reportScore && $this->hasOption(
'summary' ) ) {
615 if ( $this->hasOption(
'legendsummary' ) ) {
616 $out->addFreeText(
'{{' . $this->getOption(
'legendsummary' ) .
"}}\n" );
619 $out->summaryheading();
623 $out->element( $l10n ?
'{{int:translate-gs-continent}}' :
'Continent', true );
624 $out->element( $l10n ?
'{{int:translate-gs-count}}' :
'Count', true );
625 $out->element( $l10n ?
'{{int:translate-gs-avgscore}}' :
'Avg. score', true );
633 foreach ( $summary as $key => $values ) {
636 if ( $key ===
'multiple' ) {
637 $out->element( $l10n ?
'{{int:translate-gs-multiple}}' :
'Multiple' );
639 $out->element( $l10n ?
'{{int:timezoneregion-' . $key .
'}}' : ucfirst( $key ) );
641 $out->element( $values[0] );
642 $out->element( number_format( $values[1] / $values[0] ) );
646 $totals[0] += $values[0];
647 $totals[1] += $values[1];
651 $out->element( $l10n ?
'{{int:translate-gs-total}}' :
'Total' );
652 $out->element( $totals[0] );
653 $out->element( number_format( $totals[1] / $totals[0] ) );
663 foreach ( $wmfscores as $code => $stats ) {
664 echo $code .
';' . number_format( $stats[
'score'] ) .
";\n";