MediaWiki REL1_34
checkLanguage.inc
Go to the documentation of this file.
1<?php
28 protected $code = null;
29 protected $level = 2;
30 protected $doLinks = false;
31 protected $linksPrefix = '';
32 protected $wikiCode = 'en';
33 protected $checkAll = false;
34 protected $output = 'plain';
35 protected $checks = [];
36 protected $L = null;
37
38 protected $results = [];
39
40 private $includeExif = false;
41
45 public function __construct( array $options ) {
46 if ( isset( $options['help'] ) ) {
47 echo $this->help();
48 exit( 1 );
49 }
50
51 if ( isset( $options['lang'] ) ) {
52 $this->code = $options['lang'];
53 } else {
54 global $wgLanguageCode;
55 $this->code = $wgLanguageCode;
56 }
57
58 if ( isset( $options['level'] ) ) {
59 $this->level = $options['level'];
60 }
61
62 $this->doLinks = isset( $options['links'] );
63 $this->includeExif = !isset( $options['noexif'] );
64 $this->checkAll = isset( $options['all'] );
65
66 if ( isset( $options['prefix'] ) ) {
67 $this->linksPrefix = $options['prefix'];
68 }
69
70 if ( isset( $options['wikilang'] ) ) {
71 $this->wikiCode = $options['wikilang'];
72 }
73
74 if ( isset( $options['whitelist'] ) ) {
75 $this->checks = explode( ',', $options['whitelist'] );
76 } elseif ( isset( $options['blacklist'] ) ) {
77 $this->checks = array_diff(
78 isset( $options['easy'] ) ? $this->easyChecks() : $this->defaultChecks(),
79 explode( ',', $options['blacklist'] )
80 );
81 } elseif ( isset( $options['easy'] ) ) {
82 $this->checks = $this->easyChecks();
83 } else {
84 $this->checks = $this->defaultChecks();
85 }
86
87 if ( isset( $options['output'] ) ) {
88 $this->output = $options['output'];
89 }
90
91 $this->L = new Languages( $this->includeExif );
92 }
93
98 protected function defaultChecks() {
99 return [
100 'untranslated', 'duplicate', 'obsolete', 'variables', 'empty', 'plural',
101 'whitespace', 'xhtml', 'chars', 'links', 'unbalanced', 'namespace',
102 'projecttalk', 'magic', 'magic-old', 'magic-over', 'magic-case',
103 'special', 'special-old',
104 ];
105 }
106
111 protected function nonMessageChecks() {
112 return [
113 'namespace', 'projecttalk', 'magic', 'magic-old', 'magic-over',
114 'magic-case', 'special', 'special-old',
115 ];
116 }
117
122 protected function easyChecks() {
123 return [
124 'duplicate', 'obsolete', 'empty', 'whitespace', 'xhtml', 'chars', 'magic-old',
125 'magic-over', 'magic-case', 'special-old',
126 ];
127 }
128
133 protected function getChecks() {
134 return [
135 'untranslated' => 'getUntranslatedMessages',
136 'duplicate' => 'getDuplicateMessages',
137 'obsolete' => 'getObsoleteMessages',
138 'variables' => 'getMessagesWithMismatchVariables',
139 'plural' => 'getMessagesWithoutPlural',
140 'empty' => 'getEmptyMessages',
141 'whitespace' => 'getMessagesWithWhitespace',
142 'xhtml' => 'getNonXHTMLMessages',
143 'chars' => 'getMessagesWithWrongChars',
144 'links' => 'getMessagesWithDubiousLinks',
145 'unbalanced' => 'getMessagesWithUnbalanced',
146 'namespace' => 'getUntranslatedNamespaces',
147 'projecttalk' => 'getProblematicProjectTalks',
148 'magic' => 'getUntranslatedMagicWords',
149 'magic-old' => 'getObsoleteMagicWords',
150 'magic-over' => 'getOverridingMagicWords',
151 'magic-case' => 'getCaseMismatchMagicWords',
152 'special' => 'getUntraslatedSpecialPages',
153 'special-old' => 'getObsoleteSpecialPages',
154 ];
155 }
156
163 protected function getTotalCount() {
164 return [
165 'namespace' => [ 'getNamespaceNames', 'en' ],
166 'projecttalk' => null,
167 'magic' => [ 'getMagicWords', 'en' ],
168 'magic-old' => [ 'getMagicWords', null ],
169 'magic-over' => [ 'getMagicWords', null ],
170 'magic-case' => [ 'getMagicWords', null ],
171 'special' => [ 'getSpecialPageAliases', 'en' ],
172 'special-old' => [ 'getSpecialPageAliases', null ],
173 ];
174 }
175
180 protected function getDescriptions() {
181 return [
182 'untranslated' => '$1 message(s) of $2 are not translated to $3, but exist in en:',
183 'duplicate' => '$1 message(s) of $2 are translated the same in en and $3:',
184 'obsolete' =>
185 '$1 message(s) of $2 do not exist in en or are in the ignore list, but exist in $3:',
186 'variables' => '$1 message(s) of $2 in $3 don\'t match the variables used in en:',
187 'plural' => '$1 message(s) of $2 in $3 don\'t use {{plural}} while en uses:',
188 'empty' => '$1 message(s) of $2 in $3 are empty or -:',
189 'whitespace' => '$1 message(s) of $2 in $3 have trailing whitespace:',
190 'xhtml' => '$1 message(s) of $2 in $3 contain illegal XHTML:',
191 'chars' =>
192 '$1 message(s) of $2 in $3 include hidden chars which should not be used in the messages:',
193 'links' => '$1 message(s) of $2 in $3 have problematic link(s):',
194 'unbalanced' => '$1 message(s) of $2 in $3 have unbalanced {[]}:',
195 'namespace' => '$1 namespace name(s) of $2 are not translated to $3, but exist in en:',
196 'projecttalk' =>
197 '$1 namespace name(s) and alias(es) in $3 are project talk namespaces without the parameter:',
198 'magic' => '$1 magic word(s) of $2 are not translated to $3, but exist in en:',
199 'magic-old' => '$1 magic word(s) of $2 do not exist in en, but exist in $3:',
200 'magic-over' => '$1 magic word(s) of $2 in $3 do not contain the original en word(s):',
201 'magic-case' =>
202 '$1 magic word(s) of $2 in $3 change the case-sensitivity of the original en word:',
203 'special' => '$1 special page alias(es) of $2 are not translated to $3, but exist in en:',
204 'special-old' => '$1 special page alias(es) of $2 do not exist in en, but exist in $3:',
205 ];
206 }
207
212 protected function help() {
213 return <<<ENDS
214Run this script to check a specific language file, or all of them.
215Command line settings are in form --parameter[=value].
216Parameters:
217 --help: Show this help.
218 --lang: Language code (default: the installation default language).
219 --all: Check all customized languages.
220 --level: Show the following display level (default: 2):
221 * 0: Skip the checks (useful for checking syntax).
222 * 1: Show only the stub headers and number of wrong messages, without
223 list of messages.
224 * 2: Show only the headers and the message keys, without the message
225 values.
226 * 3: Show both the headers and the complete messages, with both keys and
227 values.
228 --links: Link the message values (default off).
229 --prefix: prefix to add to links.
230 --wikilang: For the links, what is the content language of the wiki to
231 display the output in (default en).
232 --noexif: Do not check for Exif messages (a bit hard and boring to
233 translate), if you know what they are currently not translated and want
234 to focus on other problems (default off).
235 --whitelist: Do only the following checks (form: code,code).
236 --blacklist: Do not do the following checks (form: code,code).
237 --easy: Do only the easy checks, which can be treated by non-speakers of
238 the language.
239
240Check codes (ideally, all of them should result 0; all the checks are executed
241by default (except language-specific check blacklists in checkLanguage.inc):
242 * untranslated: Messages which are required to translate, but are not
243 translated.
244 * duplicate: Messages which translation equal to fallback.
245 * obsolete: Messages which are untranslatable or do not exist, but are
246 translated.
247 * variables: Messages without variables which should be used, or with
248 variables which should not be used.
249 * empty: Empty messages and messages that contain only -.
250 * whitespace: Messages which have trailing whitespace.
251 * xhtml: Messages which are not well-formed XHTML (checks only few common
252 errors).
253 * chars: Messages with hidden characters.
254 * links: Messages which contains broken links to pages (does not find all).
255 * unbalanced: Messages which contains unequal numbers of opening {[ and
256 closing ]}.
257 * namespace: Namespace names that were not translated.
258 * projecttalk: Namespace names and aliases where the project talk does not
259 contain $1.
260 * magic: Magic words that were not translated.
261 * magic-old: Magic words which do not exist.
262 * magic-over: Magic words that override the original English word.
263 * magic-case: Magic words whose translation changes the case-sensitivity of
264 the original English word.
265 * special: Special page names that were not translated.
266 * special-old: Special page names which do not exist.
267
268ENDS;
269 }
270
274 public function execute() {
275 $this->doChecks();
276 if ( $this->level > 0 ) {
277 switch ( $this->output ) {
278 case 'plain':
279 $this->outputText();
280 break;
281 case 'wiki':
282 $this->outputWiki();
283 break;
284 default:
285 throw new MWException( "Invalid output type $this->output" );
286 }
287 }
288 }
289
293 protected function doChecks() {
294 $ignoredCodes = [ 'en', 'enRTL' ];
295
296 $this->results = [];
297 # Check the language
298 if ( $this->checkAll ) {
299 foreach ( $this->L->getLanguages() as $language ) {
300 if ( !in_array( $language, $ignoredCodes ) ) {
301 $this->results[$language] = $this->checkLanguage( $language );
302 }
303 }
304 } else {
305 if ( in_array( $this->code, $ignoredCodes ) ) {
306 throw new MWException( "Cannot check code $this->code." );
307 } else {
308 $this->results[$this->code] = $this->checkLanguage( $this->code );
309 }
310 }
311
313 foreach ( $results as $code => $checks ) {
314 foreach ( $checks as $check => $messages ) {
315 foreach ( $messages as $key => $details ) {
316 if ( $this->isCheckBlacklisted( $check, $code, $key ) ) {
317 unset( $this->results[$code][$check][$key] );
318 }
319 }
320 }
321 }
322 }
323
328 protected function getCheckBlacklist() {
329 static $blacklist = null;
330
331 if ( $blacklist !== null ) {
332 return $blacklist;
333 }
334
335 // phpcs:ignore MediaWiki.NamingConventions.ValidGlobalName.wgPrefix
336 global $checkBlacklist;
337
338 $blacklist = $checkBlacklist;
339
340 Hooks::run( 'LocalisationChecksBlacklist', [ &$blacklist ] );
341
342 return $blacklist;
343 }
344
353 protected function isCheckBlacklisted( $check, $code, $message ) {
354 $blacklist = $this->getCheckBlacklist();
355
356 foreach ( $blacklist as $item ) {
357 if ( isset( $item['check'] ) && $check !== $item['check'] ) {
358 continue;
359 }
360
361 if ( isset( $item['code'] ) && !in_array( $code, $item['code'] ) ) {
362 continue;
363 }
364
365 if ( isset( $item['message'] ) &&
366 ( $message === false || !in_array( $message, $item['message'] ) )
367 ) {
368 continue;
369 }
370
371 return true;
372 }
373
374 return false;
375 }
376
383 protected function checkLanguage( $code ) {
384 # Syntax check only
385 $results = [];
386 if ( $this->level === 0 ) {
387 $this->L->getMessages( $code );
388
389 return $results;
390 }
391
392 $checkFunctions = $this->getChecks();
393 foreach ( $this->checks as $check ) {
394 if ( $this->isCheckBlacklisted( $check, $code, false ) ) {
395 $results[$check] = [];
396 continue;
397 }
398
399 $callback = [ $this->L, $checkFunctions[$check] ];
400 if ( !is_callable( $callback ) ) {
401 throw new MWException( "Unkown check $check." );
402 }
403 $results[$check] = call_user_func( $callback, $code );
404 }
405
406 return $results;
407 }
408
415 protected function formatKey( $key, $code ) {
416 if ( $this->doLinks ) {
417 $displayKey = ucfirst( $key );
418 if ( $code == $this->wikiCode ) {
419 return "[[{$this->linksPrefix}MediaWiki:$displayKey|$key]]";
420 } else {
421 return "[[{$this->linksPrefix}MediaWiki:$displayKey/$code|$key]]";
422 }
423 } else {
424 return $key;
425 }
426 }
427
431 protected function outputText() {
432 foreach ( $this->results as $code => $results ) {
433 $translated = $this->L->getMessages( $code );
434 $translated = count( $translated['translated'] );
435 foreach ( $results as $check => $messages ) {
436 $count = count( $messages );
437 if ( $count ) {
438 if ( $check == 'untranslated' ) {
439 $translatable = $this->L->getGeneralMessages();
440 $total = count( $translatable['translatable'] );
441 } elseif ( in_array( $check, $this->nonMessageChecks() ) ) {
442 $totalCount = $this->getTotalCount();
443 $totalCount = $totalCount[$check];
444 $callback = [ $this->L, $totalCount[0] ];
445 $callCode = $totalCount[1] ? $totalCount[1] : $code;
446 $total = count( call_user_func( $callback, $callCode ) );
447 } else {
448 $total = $translated;
449 }
450 $search = [ '$1', '$2', '$3' ];
451 $replace = [ $count, $total, $code ];
452 $descriptions = $this->getDescriptions();
453 echo "\n" . str_replace( $search, $replace, $descriptions[$check] ) . "\n";
454 if ( $this->level == 1 ) {
455 echo "[messages are hidden]\n";
456 } else {
457 foreach ( $messages as $key => $value ) {
458 if ( !in_array( $check, $this->nonMessageChecks() ) ) {
459 $key = $this->formatKey( $key, $code );
460 }
461 if ( $this->level == 2 || empty( $value ) ) {
462 echo "* $key\n";
463 } else {
464 echo "* $key: '$value'\n";
465 }
466 }
467 }
468 }
469 }
470 }
471 }
472
476 function outputWiki() {
477 $detailText = '';
478 $rows[] = '! Language !! Code !! Total !! ' .
479 implode( ' !! ', array_diff( $this->checks, $this->nonMessageChecks() ) );
480 foreach ( $this->results as $code => $results ) {
481 $detailTextForLang = "==$code==\n";
482 $numbers = [];
483 $problems = 0;
484 $detailTextForLangChecks = [];
485 foreach ( $results as $check => $messages ) {
486 if ( in_array( $check, $this->nonMessageChecks() ) ) {
487 continue;
488 }
489 $count = count( $messages );
490 if ( $count ) {
491 $problems += $count;
492 $messageDetails = [];
493 foreach ( $messages as $key => $details ) {
494 $displayKey = $this->formatKey( $key, $code );
495 $messageDetails[] = $displayKey;
496 }
497 $detailTextForLangChecks[] = "=== $code-$check ===\n* " . implode( ', ', $messageDetails );
498 $numbers[] = "'''[[#$code-$check|$count]]'''";
499 } else {
500 $numbers[] = $count;
501 }
502 }
503
504 if ( count( $detailTextForLangChecks ) ) {
505 $detailText .= $detailTextForLang . implode( "\n", $detailTextForLangChecks ) . "\n";
506 }
507
508 if ( !$problems ) {
509 # Don't list languages without problems
510 continue;
511 }
512 $language = Language::fetchLanguageName( $code );
513 $rows[] = "| $language || $code || $problems || " . implode( ' || ', $numbers );
514 }
515
516 $tableRows = implode( "\n|-\n", $rows );
517
518 $version = SpecialVersion::getVersion( 'nodb' );
519 // phpcs:disable Generic.Files.LineLength
520 echo <<<EOL
521'''Check results are for:''' <code>$version</code>
522
523
524{| class="sortable wikitable" border="2" cellpadding="4" cellspacing="0" style="background-color: #F9F9F9; border: 1px #AAAAAA solid; border-collapse: collapse; clear: both;"
525$tableRows
526|}
527
528$detailText
529
530EOL;
531 // phpcs:enable
532 }
533
538 protected function isEmpty() {
539 foreach ( $this->results as $results ) {
540 foreach ( $results as $messages ) {
541 if ( !empty( $messages ) ) {
542 return false;
543 }
544 }
545 }
546
547 return true;
548 }
549}
550
555 private $extensions;
556
561 public function __construct( array $options, $extension ) {
562 if ( isset( $options['help'] ) ) {
563 echo $this->help();
564 exit( 1 );
565 }
566
567 if ( isset( $options['lang'] ) ) {
568 $this->code = $options['lang'];
569 } else {
570 global $wgLanguageCode;
571 $this->code = $wgLanguageCode;
572 }
573
574 if ( isset( $options['level'] ) ) {
575 $this->level = $options['level'];
576 }
577
578 $this->doLinks = isset( $options['links'] );
579
580 if ( isset( $options['wikilang'] ) ) {
581 $this->wikiCode = $options['wikilang'];
582 }
583
584 if ( isset( $options['whitelist'] ) ) {
585 $this->checks = explode( ',', $options['whitelist'] );
586 } elseif ( isset( $options['blacklist'] ) ) {
587 $this->checks = array_diff(
588 isset( $options['easy'] ) ? $this->easyChecks() : $this->defaultChecks(),
589 explode( ',', $options['blacklist'] )
590 );
591 } elseif ( isset( $options['easy'] ) ) {
592 $this->checks = $this->easyChecks();
593 } else {
594 $this->checks = $this->defaultChecks();
595 }
596
597 if ( isset( $options['output'] ) ) {
598 $this->output = $options['output'];
599 }
600
601 # Some additional checks not enabled by default
602 if ( isset( $options['duplicate'] ) ) {
603 $this->checks[] = 'duplicate';
604 }
605
606 $this->extensions = [];
607 $extensions = new PremadeMediawikiExtensionGroups();
608 $extensions->addAll();
609 if ( $extension == 'all' ) {
610 foreach ( MessageGroups::singleton()->getGroups() as $group ) {
611 if ( strpos( $group->getId(), 'ext-' ) === 0 && !$group->isMeta() ) {
612 $this->extensions[] = new ExtensionLanguages( $group );
613 }
614 }
615 } elseif ( $extension == 'wikimedia' ) {
616 $wikimedia = MessageGroups::getGroup( 'ext-0-wikimedia' );
617 foreach ( $wikimedia->wmfextensions() as $extension ) {
618 $group = MessageGroups::getGroup( $extension );
619 $this->extensions[] = new ExtensionLanguages( $group );
620 }
621 } elseif ( $extension == 'flaggedrevs' ) {
622 foreach ( MessageGroups::singleton()->getGroups() as $group ) {
623 if ( strpos( $group->getId(), 'ext-flaggedrevs-' ) === 0 && !$group->isMeta() ) {
624 $this->extensions[] = new ExtensionLanguages( $group );
625 }
626 }
627 } else {
628 $extensions = explode( ',', $extension );
629 foreach ( $extensions as $extension ) {
630 $group = MessageGroups::getGroup( 'ext-' . $extension );
631 if ( $group ) {
632 $extension = new ExtensionLanguages( $group );
633 $this->extensions[] = $extension;
634 } else {
635 print "No such extension $extension.\n";
636 }
637 }
638 }
639 }
640
645 protected function defaultChecks() {
646 return [
647 'untranslated', 'duplicate', 'obsolete', 'variables', 'empty', 'plural',
648 'whitespace', 'xhtml', 'chars', 'links', 'unbalanced',
649 ];
650 }
651
656 protected function nonMessageChecks() {
657 return [];
658 }
659
664 protected function easyChecks() {
665 return [
666 'duplicate', 'obsolete', 'empty', 'whitespace', 'xhtml', 'chars',
667 ];
668 }
669
674 protected function help() {
675 return <<<ENDS
676Run this script to check the status of a specific language in extensions, or
677all of them. Command line settings are in form --parameter[=value], except for
678the first one.
679Parameters:
680 * First parameter (mandatory): Extension name, multiple extension names
681 (separated by commas), "all" for all the extensions, "wikimedia" for
682 extensions used by Wikimedia or "flaggedrevs" for all FLaggedRevs
683 extension messages.
684 * lang: Language code (default: the installation default language).
685 * help: Show this help.
686 * level: Show the following display level (default: 2).
687 * links: Link the message values (default off).
688 * wikilang: For the links, what is the content language of the wiki to
689 display the output in (default en).
690 * whitelist: Do only the following checks (form: code,code).
691 * blacklist: Do not perform the following checks (form: code,code).
692 * easy: Do only the easy checks, which can be treated by non-speakers of
693 the language.
694
695Check codes (ideally, all of them should result 0; all the checks are executed
696by default (except language-specific check blacklists in checkLanguage.inc):
697 * untranslated: Messages which are required to translate, but are not
698 translated.
699 * duplicate: Messages which translation equal to fallback.
700 * obsolete: Messages which are untranslatable, but translated.
701 * variables: Messages without variables which should be used, or with
702 variables which should not be used.
703 * empty: Empty messages.
704 * whitespace: Messages which have trailing whitespace.
705 * xhtml: Messages which are not well-formed XHTML (checks only few common
706 errors).
707 * chars: Messages with hidden characters.
708 * links: Messages which contains broken links to pages (does not find all).
709 * unbalanced: Messages which contains unequal numbers of opening {[ and
710 closing ]}.
711
712Display levels (default: 2):
713 * 0: Skip the checks (useful for checking syntax).
714 * 1: Show only the stub headers and number of wrong messages, without list
715 of messages.
716 * 2: Show only the headers and the message keys, without the message
717 values.
718 * 3: Show both the headers and the complete messages, with both keys and
719 values.
720
721ENDS;
722 }
723
727 public function execute() {
728 $this->doChecks();
729 }
730
736 protected function checkLanguage( $code ) {
737 foreach ( $this->extensions as $extension ) {
738 $this->L = $extension;
739 $this->results = [];
740 $this->results[$code] = parent::checkLanguage( $code );
741
742 if ( !$this->isEmpty() ) {
743 echo $extension->name() . ":\n";
744
745 if ( $this->level > 0 ) {
746 switch ( $this->output ) {
747 case 'plain':
748 $this->outputText();
749 break;
750 case 'wiki':
751 $this->outputWiki();
752 break;
753 default:
754 throw new MWException( "Invalid output type $this->output" );
755 }
756 }
757
758 echo "\n";
759 }
760 }
761 }
762}
763
764// Blacklist some checks for some languages or some messages
765// Possible keys of the sub arrays are: 'check', 'code' and 'message'.
767 [
768 'check' => 'plural',
769 'code' => [ 'az', 'bo', 'cdo', 'dz', 'id', 'fa', 'gan', 'gan-hans',
770 'gan-hant', 'gn', 'hak', 'hu', 'ja', 'jv', 'ka', 'kk-arab',
771 'kk-cyrl', 'kk-latn', 'km', 'kn', 'ko', 'lzh', 'mn', 'ms',
772 'my', 'sah', 'sq', 'tet', 'th', 'to', 'tr', 'vi', 'wuu', 'xmf',
773 'yo', 'yue', 'zh', 'zh-classical', 'zh-cn', 'zh-hans',
774 'zh-hant', 'zh-hk', 'zh-sg', 'zh-tw', 'zh-yue'
775 ],
776 ],
777 [
778 'check' => 'chars',
779 'code' => [ 'my' ],
780 ],
781];
$wgLanguageCode
Site language code.
$checkBlacklist
defaultChecks()
Get the default checks.
checkLanguage( $code)
Check a language and show the results.
execute()
Execute the script.
__construct(array $options, $extension)
nonMessageChecks()
Get the checks which check other things than messages.
easyChecks()
Get the checks that can easily be treated by non-speakers of the language.
doChecks()
Execute the checks.
isCheckBlacklisted( $check, $code, $message)
Verify whether a check is blacklisted.
defaultChecks()
Get the default checks.
getDescriptions()
Get all check descriptions.
nonMessageChecks()
Get the checks which check other things than messages.
checkLanguage( $code)
Check a language.
execute()
Execute the script.
getTotalCount()
Get total count for each check non-messages check.
formatKey( $key, $code)
Format a message key.
outputText()
Output the checks results as plain text.
getCheckBlacklist()
Get the check blacklist.
easyChecks()
Get the checks that can easily be treated by non-speakers of the language.
getChecks()
Get all checks.
__construct(array $options)
isEmpty()
Check if there are any results for the checks, in any language.
outputWiki()
Output the checks results as wiki text.
Class to extract and validate Exif data from jpeg (and possibly tiff) files.
Definition Exif.php:32
Internationalisation code.
Definition Language.php:37
MediaWiki exception.
static getVersion( $flags='', $lang=null)
Return a string of the MediaWiki version with Git revision if available.
while(( $__line=Maintenance::readconsole()) !==false) print
Definition eval.php:64
This program is free software; you can redistribute it and/or modify it under the terms of the GNU Ge...