Translate extension for MediaWiki
 
Loading...
Searching...
No Matches
plural-comparison.php
Go to the documentation of this file.
1<?php
12use CLDRPluralRuleParser\Evaluator;
13use MediaWiki\Languages\LanguageNameUtils;
14use MediaWiki\MediaWikiServices;
15
16// Standard boilerplate to define $IP
17if ( getenv( 'MW_INSTALL_PATH' ) !== false ) {
18 $IP = getenv( 'MW_INSTALL_PATH' );
19} else {
20 $IP = __DIR__ . '/../../..';
21}
22require_once "$IP/maintenance/Maintenance.php";
23
25class PluralCompare extends Maintenance {
26 public function __construct() {
27 parent::__construct();
28 $this->addDescription( 'Script for comparing different plural implementations.' );
29 }
30
31 public function execute() {
32 $mwLanguages = $this->loadMediaWiki();
33 $gtLanguages = $this->loadGettext();
34 $clLanguages = $this->loadCLDR();
35
36 $services = MediaWikiServices::getInstance();
37 $all = $services
38 ->getLanguageNameUtils()
39 ->getLanguageNames( LanguageNameUtils::AUTONYMS, LanguageNameUtils::ALL );
40 $allkeys = array_keys( $all + $mwLanguages + $gtLanguages + $clLanguages );
41 sort( $allkeys );
42 $languageFallback = $services->getLanguageFallback();
43
44 $this->output( sprintf( "%12s %3s %3s %4s\n", 'Code', 'MW', 'Get', 'CLDR' ) );
45 foreach ( $allkeys as $code ) {
46 $mw = isset( $mwLanguages[$code] ) ? '+' : '';
47 $gt = isset( $gtLanguages[$code] ) ? '+' : '';
48 $cl = isset( $clLanguages[$code] ) ? '+' : '';
49
50 if ( $mw === '' ) {
51 $fallbacks = $languageFallback->getAll( $code );
52 foreach ( $fallbacks as $fcode ) {
53 if ( $fcode !== 'en' && isset( $mwLanguages[$fcode] ) ) {
54 $mw = '.';
55 }
56 }
57 }
58
59 $error = '';
60 if ( substr_count( sprintf( '%s%s%s', $mw, $gt, $cl ), '+' ) > 1 ) {
61 $error = $this->tryMatch( $code, $mw, $gtLanguages, $clLanguages );
62 }
63
64 $this->output( sprintf( "%12s %-3s %-3s %-4s %s\n", $code, $mw, $gt, $cl, $error ) );
65 }
66 }
67
68 protected function tryMatch( $code, $mws, $gtLanguages, $clLanguages ) {
69 if ( $mws !== '' ) {
70 $mwExp = true;
71 $lang = MediaWikiServices::getInstance()->getLanguageFactory()->getLanguage( $code );
72 } else {
73 $mwExp = false;
74 }
75
76 if ( isset( $gtLanguages[$code] ) ) {
77 $gtExp = 'return (int) ' . str_replace( 'n', '$i', $gtLanguages[$code] ) . ';';
78 } else {
79 $gtExp = false;
80 }
81
82 $cldrExp = $clLanguages[$code] ?? false;
83
84 for ( $i = 0; $i <= 250; $i++ ) {
85 $mw = $gt = $cl = '?';
86
87 if ( $mwExp ) {
88 // @phan-suppress-next-line PhanPossiblyUndeclaredVariable
89 $exp = $lang->getCompiledPluralRules();
90 $mw = Evaluator::evaluateCompiled( $i, $exp );
91 }
92
93 if ( $gtExp ) {
94 $gt = eval( $gtExp );
95 }
96
97 if ( $cldrExp ) {
98 $cl = Evaluator::evaluate( $i, $cldrExp );
99 }
100
101 if ( self::comp( $mw, $gt ) && self::comp( $gt, $cl ) && self::comp( $cl, $mw ) ) {
102 continue;
103 }
104
105 return "$i: $mw $gt $cl";
106 }
107
108 return '';
109 }
110
111 public static function comp( $a, $b ) {
112 return $a === '?' || $b === '?' || $a === $b;
113 }
114
115 protected function loadPluralFile( $fileName ) {
116 $doc = new DOMDocument;
117 $doc->load( $fileName );
118 $rulesets = $doc->getElementsByTagName( 'pluralRules' );
119 $plurals = [];
120 foreach ( $rulesets as $ruleset ) {
121 $codes = $ruleset->getAttribute( 'locales' );
122 $rules = [];
123 $ruleElements = $ruleset->getElementsByTagName( 'pluralRule' );
124 foreach ( $ruleElements as $elt ) {
125 $rules[] = $elt->nodeValue;
126 }
127 foreach ( explode( ' ', $codes ) as $code ) {
128 $plurals[$code] = $rules;
129 }
130 }
131
132 return $plurals;
133 }
134
135 public function loadCLDR() {
136 global $IP;
137
138 return $this->loadPluralFile( "$IP/languages/data/plurals.xml" );
139 }
140
141 public function loadMediaWiki() {
142 global $IP;
143
144 $rules = $this->loadPluralFile( "$IP/languages/data/plurals.xml" );
145 $rulesMW = $this->loadPluralFile( "$IP/languages/data/plurals-mediawiki.xml" );
146
147 return array_merge( $rules, $rulesMW );
148 }
149
150 public function loadGettext() {
151 $gtData = file_get_contents( __DIR__ . '/../data/plural-gettext.txt' );
152 $gtLanguages = [];
153 foreach ( preg_split( '/\n|\r/', $gtData, -1, PREG_SPLIT_NO_EMPTY ) as $line ) {
154 [ $code, $rule ] = explode( "\t", $line );
155 $rule = preg_replace( '/^.*?plural=/', '', $rule );
156 $gtLanguages[$code] = $rule;
157 }
158
159 return $gtLanguages;
160 }
161}
162
163$maintClass = PluralCompare::class;
164require_once RUN_MAINTENANCE_IF_MAIN;
Script for comparing different plural implementations.