Translate extension for MediaWiki
 
Loading...
Searching...
No Matches
MessageGroupStats.php
Go to the documentation of this file.
1<?php
14use MediaWiki\Logger\LoggerFactory;
15use MediaWiki\MediaWikiServices;
16use Wikimedia\Rdbms\Database;
17use Wikimedia\Rdbms\IDatabase;
18
28 private const TABLE = 'translate_groupstats';
30 private const LANGUAGE_STATS_KEY = 'translate-all-language-stats';
31
32 public const TOTAL = 0;
33 public const TRANSLATED = 1;
34 public const FUZZY = 2;
35 public const PROOFREAD = 3;
36
38 public const FLAG_CACHE_ONLY = 1;
40 public const FLAG_NO_CACHE = 2;
42 public const FLAG_IMMEDIATE_WRITES = 4;
43
45 protected static $updates = [];
47 private static $languages;
48
55 public static function getEmptyStats() {
56 return [ 0, 0, 0, 0 ];
57 }
58
65 protected static function getUnknownStats() {
66 return [ null, null, null, null ];
67 }
68
69 private static function isValidLanguage( $code ) {
70 $languages = self::getLanguages();
71 return in_array( $code, $languages );
72 }
73
74 private static function isValidMessageGroup( MessageGroup $group = null ) {
75 /* In case some code calls stats for dynamic groups. Calculating these numbers
76 * don't make sense for dynamic groups, and would just throw an exception. */
77 return $group && !MessageGroups::isDynamic( $group );
78 }
79
87 public static function forItem( $id, $code, $flags = 0 ) {
88 $group = MessageGroups::getGroup( $id );
89 if ( !self::isValidMessageGroup( $group ) || !self::isValidLanguage( $code ) ) {
90 return self::getUnknownStats();
91 }
92
93 $res = self::selectRowsIdLang( [ $id ], [ $code ], $flags );
94 $stats = self::extractResults( $res, [ $id ] );
95
96 if ( !isset( $stats[$id][$code] ) ) {
97 $stats[$id][$code] = self::forItemInternal( $stats, $group, $code, $flags );
98 }
99
100 self::queueUpdates( $flags );
101
102 return $stats[$id][$code];
103 }
104
111 public static function forLanguage( $code, $flags = 0 ) {
112 if ( !self::isValidLanguage( $code ) ) {
113 $stats = [];
114 $groups = MessageGroups::singleton()->getGroups();
115 $ids = array_keys( $groups );
116 foreach ( $ids as $id ) {
117 $stats[$id] = self::getUnknownStats();
118 }
119
120 return $stats;
121 }
122
123 $stats = self::forLanguageInternal( $code, [], $flags );
124 $flattened = [];
125 foreach ( $stats as $group => $languages ) {
126 $flattened[$group] = $languages[$code];
127 }
128
129 self::queueUpdates( $flags );
130
131 return $flattened;
132 }
133
140 public static function forGroup( $id, $flags = 0 ) {
141 $group = MessageGroups::getGroup( $id );
142 if ( !self::isValidMessageGroup( $group ) ) {
143 $languages = self::getLanguages();
144 $stats = [];
145 foreach ( $languages as $code ) {
146 $stats[$code] = self::getUnknownStats();
147 }
148
149 return $stats;
150 }
151
152 $stats = self::forGroupInternal( $group, [], $flags );
153
154 self::queueUpdates( $flags );
155
156 return $stats[$id];
157 }
158
166 public static function forEverything( $flags = 0 ) {
167 $groups = MessageGroups::singleton()->getGroups();
168 $groupIds = array_keys( $groups );
169 $languages = self::getLanguages();
170
171 // Pre-load cached values
172 $res = self::selectRowsIdLang( $groupIds, $languages, $flags );
173
174 $stats = [];
175 // Go over each group and language filling missing entries
176 foreach ( $groups as $groupId => $group ) {
177 $stats = self::extractResults( $res, $groupIds, $stats );
178 foreach ( $languages as $code ) {
179 $stats[$groupId][$code] ??= self::forItemInternal( $stats, $group, $code, $flags );
180 }
181 // This is for sorting the values added later in correct order
182 ksort( $stats[$groupId] );
183 }
184
185 self::queueUpdates( $flags );
186
187 return $stats;
188 }
189
196 public static function clear( MessageHandle $handle ): void {
197 $code = $handle->getCode();
198 if ( !self::isValidLanguage( $code ) ) {
199 return;
200 }
201 $groups = self::getSortedGroupsForClearing( $handle->getGroupIds() );
202 self::internalClearGroups( $code, $groups, 0 );
203 }
204
211 public static function clearGroup( $id, int $flags = 0 ): void {
212 $languages = self::getLanguages();
213 $groups = self::getSortedGroupsForClearing( (array)$id );
214
215 // Do one language at a time, to save memory
216 foreach ( $languages as $code ) {
217 self::internalClearGroups( $code, $groups, $flags );
218 }
219 }
220
227 public static function getApproximateLanguageStats(): array {
228 $cache = MediaWikiServices::getInstance()->getMainWANObjectCache();
229 return $cache->getWithSetCallback(
230 self::LANGUAGE_STATS_KEY,
231 WANObjectCache::TTL_INDEFINITE,
232 function ( $oldValue, &$ttl, array &$setOpts ) {
233 $dbr = Utilities::getSafeReadDB();
234 $setOpts += Database::getCacheSetOptions( $dbr );
235
236 return self::getAllLanguageStats();
237 },
238 [
239 'checkKeys' => [ self::LANGUAGE_STATS_KEY ],
240 'pcTTL' => $cache::TTL_PROC_SHORT,
241 ]
242 );
243 }
244
245 private static function getAllLanguageStats(): array {
246 $dbr = Utilities::getSafeReadDB();
247 $res = $dbr->newSelectQueryBuilder()
248 ->table( self::TABLE )
249 ->select( [
250 'tgs_lang',
251 'SUM(tgs_translated) AS tgs_translated',
252 'SUM(tgs_fuzzy) AS tgs_fuzzy',
253 'SUM(tgs_total) AS tgs_total',
254 'SUM(tgs_proofread) AS tgs_proofread'
255 ] )
256 ->groupBy( 'tgs_lang' )
257 ->fetchResultSet();
258
259 $allLanguages = self::getLanguages();
260 $languagesCodes = array_flip( $allLanguages );
261
262 $allStats = [];
263 foreach ( $res as $row ) {
264 $allStats[ $row->tgs_lang ] = self::extractNumbers( $row );
265 unset( $languagesCodes[ $row->tgs_lang ] );
266 }
267
268 // Fill empty stats for missing language codes
269 foreach ( array_keys( $languagesCodes ) as $code ) {
270 $allStats[ $code ] = self::getEmptyStats();
271 }
272
273 return $allStats;
274 }
275
283 private static function internalClearGroups( $code, array $groups, int $flags ): void {
284 $stats = [];
285 foreach ( $groups as $group ) {
286 // $stats is modified by reference
287 self::forItemInternal( $stats, $group, $code, $flags );
288 }
289 self::queueUpdates( 0 );
290 }
291
303 private static function getSortedGroupsForClearing( array $ids ) {
304 $groups = array_map( [ MessageGroups::class, 'getGroup' ], $ids );
305 // Sanity: Remove any invalid groups
306 $groups = array_filter( $groups );
307
308 $sorted = [];
309 $aggs = [];
310 foreach ( $groups as $group ) {
311 if ( $group instanceof AggregateMessageGroup ) {
312 $aggs[$group->getId()] = $group;
313 } else {
314 $sorted[$group->getId()] = $group;
315 }
316 }
317
318 return array_merge( $sorted, $aggs );
319 }
320
326 public static function getLanguages() {
327 if ( self::$languages === null ) {
328 $languages = array_keys( Utilities::getLanguageNames( 'en' ) );
329 sort( $languages );
330 self::$languages = $languages;
331 }
332
333 return self::$languages;
334 }
335
336 public static function clearLanguage( $code ) {
337 if ( !count( $code ) ) {
338 return;
339 }
340 $dbw = wfGetDB( DB_PRIMARY );
341 $conds = [ 'tgs_lang' => $code ];
342 $dbw->delete( self::TABLE, $conds, __METHOD__ );
343 wfDebugLog( 'messagegroupstats', 'Cleared ' . serialize( $conds ) );
344 }
345
351 public static function clearAll() {
352 $dbw = wfGetDB( DB_PRIMARY );
353 $dbw->delete( self::TABLE, '*', __METHOD__ );
354 wfDebugLog( 'messagegroupstats', 'Cleared everything :(' );
355 }
356
366 protected static function extractResults( $res, array $ids, array $stats = [] ) {
367 // Map the internal ids back to real ids
368 $idmap = array_combine( array_map( [ self::class, 'getDatabaseIdForGroupId' ], $ids ), $ids );
369
370 foreach ( $res as $row ) {
371 if ( !isset( $idmap[$row->tgs_group] ) ) {
372 // Stale entry, ignore for now
373 // TODO: Schedule for purge
374 continue;
375 }
376
377 $realId = $idmap[$row->tgs_group];
378 $stats[$realId][$row->tgs_lang] = self::extractNumbers( $row );
379 }
380
381 return $stats;
382 }
383
389 protected static function extractNumbers( $row ) {
390 return [
391 self::TOTAL => (int)$row->tgs_total,
392 self::TRANSLATED => (int)$row->tgs_translated,
393 self::FUZZY => (int)$row->tgs_fuzzy,
394 self::PROOFREAD => (int)$row->tgs_proofread,
395 ];
396 }
397
404 protected static function forLanguageInternal( $code, array $stats, $flags ) {
405 $groups = MessageGroups::singleton()->getGroups();
406
407 $ids = array_keys( $groups );
408 $res = self::selectRowsIdLang( null, [ $code ], $flags );
409 $stats = self::extractResults( $res, $ids, $stats );
410
411 foreach ( $groups as $id => $group ) {
412 if ( isset( $stats[$id][$code] ) ) {
413 continue;
414 }
415 $stats[$id][$code] = self::forItemInternal( $stats, $group, $code, $flags );
416 }
417
418 return $stats;
419 }
420
425 protected static function expandAggregates( AggregateMessageGroup $agg ) {
426 $flattened = [];
427
429 foreach ( $agg->getGroups() as $group ) {
430 if ( $group instanceof AggregateMessageGroup ) {
431 $flattened += self::expandAggregates( $group );
432 } else {
433 $flattened[$group->getId()] = $group;
434 }
435 }
436
437 return $flattened;
438 }
439
446 protected static function forGroupInternal( MessageGroup $group, array $stats, $flags ) {
447 $id = $group->getId();
448
449 $res = self::selectRowsIdLang( [ $id ], null, $flags );
450 $stats = self::extractResults( $res, [ $id ], $stats );
451
452 // Go over each language filling missing entries
453 $languages = self::getLanguages();
454 foreach ( $languages as $code ) {
455 if ( isset( $stats[$id][$code] ) ) {
456 continue;
457 }
458 $stats[$id][$code] = self::forItemInternal( $stats, $group, $code, $flags );
459 }
460
461 // This is for sorting the values added later in correct order
462 foreach ( array_keys( $stats ) as $key ) {
463 ksort( $stats[$key] );
464 }
465
466 return $stats;
467 }
468
477 protected static function selectRowsIdLang( ?array $ids, ?array $codes, $flags ) {
478 if ( $flags & self::FLAG_NO_CACHE ) {
479 return [];
480 }
481
482 $conds = [];
483 if ( $ids !== null ) {
484 $dbids = array_map( [ self::class, 'getDatabaseIdForGroupId' ], $ids );
485 $conds['tgs_group'] = $dbids;
486 }
487
488 if ( $codes !== null ) {
489 $conds['tgs_lang'] = $codes;
490 }
491
492 $dbr = Utilities::getSafeReadDB();
493 $res = $dbr->select( self::TABLE, '*', $conds, __METHOD__ );
494
495 return $res;
496 }
497
505 protected static function forItemInternal( &$stats, MessageGroup $group, $code, $flags ) {
506 $id = $group->getId();
507
508 if ( $flags & self::FLAG_CACHE_ONLY ) {
509 $stats[$id][$code] = self::getUnknownStats();
510 return $stats[$id][$code];
511 }
512
513 // It may happen that caches are requested repeatedly for a group before we get a chance
514 // to write the values to the database. Check for queued updates first. This has the
515 // benefit of avoiding duplicate rows for inserts. Ideally this would be checked before we
516 // query the database for missing values. This code is somewhat ugly as it needs to
517 // reverse engineer the values from the row format.
518 $databaseGroupId = self::getDatabaseIdForGroupId( $id );
519 $uniqueKey = "$databaseGroupId|$code";
520 $queuedValue = self::$updates[$uniqueKey] ?? null;
521 if ( $queuedValue && !( $flags & self::FLAG_NO_CACHE ) ) {
522 return [
523 self::TOTAL => $queuedValue['tgs_total'],
524 self::TRANSLATED => $queuedValue['tgs_translated'],
525 self::FUZZY => $queuedValue['tgs_fuzzy'],
526 self::PROOFREAD => $queuedValue['tgs_proofread'],
527 ];
528 }
529
530 if ( $group instanceof AggregateMessageGroup ) {
531 $aggregates = self::calculateAggregageGroup( $stats, $group, $code, $flags );
532 } else {
533 $aggregates = self::calculateGroup( $group, $code );
534 }
535 // Cache for use in subsequent forItemInternal calls
536 $stats[$id][$code] = $aggregates;
537
538 // Don't add nulls to the database, causes annoying warnings
539 if ( $aggregates[self::TOTAL] === null ) {
540 return $aggregates;
541 }
542
543 self::$updates[$uniqueKey] = [
544 'tgs_group' => $databaseGroupId,
545 'tgs_lang' => $code,
546 'tgs_total' => $aggregates[self::TOTAL],
547 'tgs_translated' => $aggregates[self::TRANSLATED],
548 'tgs_fuzzy' => $aggregates[self::FUZZY],
549 'tgs_proofread' => $aggregates[self::PROOFREAD],
550 ];
551
552 // For big and lengthy updates, attempt some interim saves. This might not have
553 // any effect, because writes to the database may be deferred.
554 if ( count( self::$updates ) % 100 === 0 ) {
555 self::queueUpdates( $flags );
556 }
557
558 return $aggregates;
559 }
560
561 private static function calculateAggregageGroup( &$stats, $group, $code, $flags ) {
562 $aggregates = self::getEmptyStats();
563
564 $expanded = self::expandAggregates( $group );
565 $subGroupIds = array_keys( $expanded );
566
567 // Performance: if we have per-call cache of stats, do not query them again.
568 foreach ( $subGroupIds as $index => $sid ) {
569 if ( isset( $stats[$sid][$code] ) ) {
570 unset( $subGroupIds[ $index ] );
571 }
572 }
573
574 if ( $subGroupIds !== [] ) {
575 $res = self::selectRowsIdLang( $subGroupIds, [ $code ], $flags );
576 $stats = self::extractResults( $res, $subGroupIds, $stats );
577 }
578
579 foreach ( $expanded as $sid => $subgroup ) {
580 // Discouraged groups may belong to another group, usually if there
581 // is an aggregate group for all translatable pages. In that case
582 // calculate and store the statistics, but don't count them as part of
583 // the aggregate group, so that the numbers in Special:LanguageStats
584 // add up. The statistics for discouraged groups can still be viewed
585 // through Special:MessageGroupStats.
586 if ( !isset( $stats[$sid][$code] ) ) {
587 $stats[$sid][$code] = self::forItemInternal( $stats, $subgroup, $code, $flags );
588 }
589
590 if ( !TranslateMetadata::isExcluded( $sid, $code ) ) {
591 $aggregates = self::multiAdd( $aggregates, $stats[$sid][$code] );
592 }
593 }
594
595 return $aggregates;
596 }
597
598 public static function multiAdd( &$a, $b ) {
599 if ( $a[0] === null || $b[0] === null ) {
600 return array_fill( 0, count( $a ), null );
601 }
602 foreach ( $a as $i => &$v ) {
603 $v += $b[$i];
604 }
605
606 return $a;
607 }
608
614 protected static function calculateGroup( MessageGroup $group, $code ) {
615 global $wgTranslateDocumentationLanguageCode;
616 // Calculate if missing and store in the db
617 $collection = $group->initCollection( $code );
618
619 if (
620 $code === $wgTranslateDocumentationLanguageCode
621 && $group instanceof FileBasedMessageGroup
622 ) {
623 $cache = $group->getMessageGroupCache( $group->getSourceLanguage() );
624 if ( $cache->exists() ) {
625 $template = $cache->getExtra()['TEMPLATE'] ?? [];
626 $infile = [];
627 foreach ( $template as $key => $data ) {
628 if ( isset( $data['comments']['.'] ) ) {
629 $infile[$key] = '1';
630 }
631 }
632 $collection->setInFile( $infile );
633 }
634 }
635
636 return self::getStatsForCollection( $collection );
637 }
638
639 protected static function queueUpdates( $flags ) {
640 $mwInstance = MediaWikiServices::getInstance();
641 if ( self::$updates === [] || $mwInstance->getReadOnlyMode()->isReadOnly() ) {
642 return;
643 }
644
645 $lb = $mwInstance->getDBLoadBalancer();
646 $dbw = $lb->getConnection( DB_PRIMARY ); // avoid connecting yet
647 $table = self::TABLE;
648 $callers = wfGetAllCallers( 50 );
649
650 $updateOp = self::withLock(
651 $dbw,
652 'updates',
653 __METHOD__,
654 static function ( IDatabase $dbw, $method ) use ( $table, $callers, $mwInstance ) {
655 // Maybe another deferred update already processed these
656 if ( self::$updates === [] ) {
657 return;
658 }
659
660 // This path should only be hit during web requests
661 if ( count( self::$updates ) > 100 ) {
662 $groups = array_unique( array_column( self::$updates, 'tgs_group' ) );
663 LoggerFactory::getInstance( 'Translate' )->warning(
664 "Huge translation update of {count} rows for group(s) {groups}",
665 [
666 'count' => count( self::$updates ),
667 'groups' => implode( ', ', $groups ),
668 'callers' => $callers,
669 ]
670 );
671 }
672
673 $primaryKey = [ 'tgs_group', 'tgs_lang' ];
674 $dbw->replace( $table, [ $primaryKey ], array_values( self::$updates ), $method );
675 self::$updates = [];
676
677 $mwInstance->getMainWANObjectCache()->touchCheckKey( self::LANGUAGE_STATS_KEY );
678 }
679 );
680
681 if ( $flags & self::FLAG_IMMEDIATE_WRITES ) {
682 call_user_func( $updateOp );
683 } else {
684 DeferredUpdates::addCallableUpdate( $updateOp );
685 }
686 }
687
688 protected static function withLock( IDatabase $dbw, $key, $method, $callback ) {
689 $fname = __METHOD__;
690 return static function () use ( $dbw, $key, $method, $callback, $fname ) {
691 $lockName = 'MessageGroupStats:' . $key;
692 if ( !$dbw->lock( $lockName, $fname, 1 ) ) {
693 return; // raced out
694 }
695
696 $dbw->commit( $fname, 'flush' );
697 call_user_func( $callback, $dbw, $method );
698 $dbw->commit( $fname, 'flush' );
699
700 $dbw->unlock( $lockName, $fname );
701 };
702 }
703
704 public static function getDatabaseIdForGroupId( $id ) {
705 // The column is 100 bytes long, but we don't need to use it all
706 if ( strlen( $id ) <= 72 ) {
707 return $id;
708 }
709
710 $hash = hash( 'sha256', $id, /*asHex*/false );
711 $dbid = substr( $id, 0, 50 ) . '||' . substr( $hash, 0, 20 );
712 return $dbid;
713 }
714
715 public static function getStatsForCollection( MessageCollection $collection ): array {
716 $collection->filter( 'ignored' );
717 $collection->filterUntranslatedOptional();
718 // Store the count of real messages for later calculation.
719 $total = count( $collection );
720
721 // Count fuzzy first.
722 $collection->filter( 'fuzzy' );
723 $fuzzy = $total - count( $collection );
724
725 // Count the completed translations.
726 $collection->filter( 'hastranslation', false );
727 $translated = count( $collection );
728
729 // Count how many of the completed translations
730 // have been proofread
731 $collection->filter( 'reviewer', false );
732 $proofread = count( $collection );
733
734 return [
735 self::TOTAL => $total,
736 self::TRANSLATED => $translated,
737 self::FUZZY => $fuzzy,
738 self::PROOFREAD => $proofread,
739 ];
740 }
741}
Groups multiple message groups together as one group.
getGroups()
Returns a list of message groups that this group consists of.
This class implements default behavior for file based message groups.
Factory class for accessing message groups individually by id or all of them as a list.
This file contains the class for core message collections implementation.
filter(string $type, bool $condition=true, ?int $value=null)
Filters messages based on some condition.
Essentially random collection of helper functions, similar to GlobalFunctions.php.
Definition Utilities.php:31
This class abstract MessageGroup statistics calculation and storing.
const PROOFREAD
Array index.
static clearGroup( $id, int $flags=0)
Recalculate stats for given group(s).
static getLanguages()
Get list of supported languages for statistics.
static forItemInternal(&$stats, MessageGroup $group, $code, $flags)
const FLAG_CACHE_ONLY
If stats are not cached, do not attempt to calculate them on the fly.
static extractResults( $res, array $ids, array $stats=[])
Use this to extract results returned from selectRowsIdLang.
static forLanguage( $code, $flags=0)
Returns stats for all groups in given language.
static forGroupInternal(MessageGroup $group, array $stats, $flags)
static extractNumbers( $row)
Returns an array of needed database fields.
static expandAggregates(AggregateMessageGroup $agg)
static clearAll()
Purges all cached stats.
static clear(MessageHandle $handle)
Recalculate stats for all groups associated with the message.
const TOTAL
Array index.
const FLAG_IMMEDIATE_WRITES
Do not defer updates. Meant for jobs like MessageGroupStatsRebuildJob.
const FLAG_NO_CACHE
Ignore cached values. Useful for updating stale values.
const FUZZY
Array index.
static getUnknownStats()
Returns empty stats array that indicates stats are incomplete or unknown.
static forLanguageInternal( $code, array $stats, $flags)
const TRANSLATED
Array index.
static calculateGroup(MessageGroup $group, $code)
static getEmptyStats()
Returns empty stats array.
static selectRowsIdLang(?array $ids, ?array $codes, $flags)
Fetch rows from the database.
static forItem( $id, $code, $flags=0)
Returns stats for given group in given language.
static forEverything( $flags=0)
Returns stats for all group in all languages.
static forGroup( $id, $flags=0)
Returns stats for all languages in given group.
static getApproximateLanguageStats()
Fetch aggregated statistics for all languages across groups.
Class for pointing to messages, like Title class is for titles.
getGroupIds()
Returns all message group ids this message belongs to.
Interface for message groups.
initCollection( $code)
Initialises a message collection with the given language code, message definitions and message tags.
getSourceLanguage()
Returns language code depicting the language of source text.
getId()
Returns the unique identifier for this group.