Translate extension for MediaWiki
 
Loading...
Searching...
No Matches
MessageGroupStats.php
Go to the documentation of this file.
1<?php
11use MediaWiki\Logger\LoggerFactory;
12use MediaWiki\MediaWikiServices;
13use Wikimedia\Rdbms\IDatabase;
14
24 private const TABLE = 'translate_groupstats';
25
26 public const TOTAL = 0;
27 public const TRANSLATED = 1;
28 public const FUZZY = 2;
29 public const PROOFREAD = 3;
30
32 public const FLAG_CACHE_ONLY = 1;
34 public const FLAG_NO_CACHE = 2;
36 public const FLAG_IMMEDIATE_WRITES = 4;
37
39 protected static $updates = [];
41 private static $languages;
42
49 public static function getEmptyStats() {
50 return [ 0, 0, 0, 0 ];
51 }
52
59 protected static function getUnknownStats() {
60 return [ null, null, null, null ];
61 }
62
63 private static function isValidLanguage( $code ) {
64 $languages = self::getLanguages();
65 return in_array( $code, $languages );
66 }
67
68 private static function isValidMessageGroup( MessageGroup $group = null ) {
69 /* In case some code calls stats for dynamic groups. Calculating these numbers
70 * don't make sense for dynamic groups, and would just throw an exception. */
71 return $group && !MessageGroups::isDynamic( $group );
72 }
73
81 public static function forItem( $id, $code, $flags = 0 ) {
82 $group = MessageGroups::getGroup( $id );
83 if ( !self::isValidMessageGroup( $group ) || !self::isValidLanguage( $code ) ) {
84 return self::getUnknownStats();
85 }
86
87 $res = self::selectRowsIdLang( [ $id ], [ $code ], $flags );
88 $stats = self::extractResults( $res, [ $id ] );
89
90 if ( !isset( $stats[$id][$code] ) ) {
91 $stats[$id][$code] = self::forItemInternal( $stats, $group, $code, $flags );
92 }
93
94 self::queueUpdates( $flags );
95
96 return $stats[$id][$code];
97 }
98
105 public static function forLanguage( $code, $flags = 0 ) {
106 if ( !self::isValidLanguage( $code ) ) {
107 $stats = [];
108 $groups = MessageGroups::singleton()->getGroups();
109 $ids = array_keys( $groups );
110 foreach ( $ids as $id ) {
111 $stats[$id] = self::getUnknownStats();
112 }
113
114 return $stats;
115 }
116
117 $stats = self::forLanguageInternal( $code, [], $flags );
118 $flattened = [];
119 foreach ( $stats as $group => $languages ) {
120 $flattened[$group] = $languages[$code];
121 }
122
123 self::queueUpdates( $flags );
124
125 return $flattened;
126 }
127
134 public static function forGroup( $id, $flags = 0 ) {
135 $group = MessageGroups::getGroup( $id );
136 if ( !self::isValidMessageGroup( $group ) ) {
137 $languages = self::getLanguages();
138 $stats = [];
139 foreach ( $languages as $code ) {
140 $stats[$code] = self::getUnknownStats();
141 }
142
143 return $stats;
144 }
145
146 $stats = self::forGroupInternal( $group, [], $flags );
147
148 self::queueUpdates( $flags );
149
150 return $stats[$id];
151 }
152
160 public static function forEverything( $flags = 0 ) {
161 $groups = MessageGroups::singleton()->getGroups();
162 $stats = [];
163 foreach ( $groups as $g ) {
164 $stats = self::forGroupInternal( $g, $stats, $flags );
165 }
166
167 self::queueUpdates( $flags );
168
169 return $stats;
170 }
171
178 public static function clear( MessageHandle $handle ): void {
179 $code = $handle->getCode();
180 if ( !self::isValidLanguage( $code ) ) {
181 return;
182 }
183 $groups = self::getSortedGroupsForClearing( $handle->getGroupIds() );
184 self::internalClearGroups( $code, $groups, 0 );
185 }
186
193 public static function clearGroup( $id, int $flags = 0 ): void {
194 $languages = self::getLanguages();
195 $groups = self::getSortedGroupsForClearing( (array)$id );
196
197 // Do one language at a time, to save memory
198 foreach ( $languages as $code ) {
199 self::internalClearGroups( $code, $groups, $flags );
200 }
201 }
202
210 private static function internalClearGroups( $code, array $groups, int $flags ): void {
211 $stats = [];
212 foreach ( $groups as $group ) {
213 // $stats is modified by reference
214 self::forItemInternal( $stats, $group, $code, $flags );
215 }
216 self::queueUpdates( 0 );
217 }
218
230 private static function getSortedGroupsForClearing( array $ids ) {
231 $groups = array_map( [ MessageGroups::class, 'getGroup' ], $ids );
232 // Sanity: Remove any invalid groups
233 $groups = array_filter( $groups );
234
235 $sorted = [];
236 $aggs = [];
237 foreach ( $groups as $group ) {
238 if ( $group instanceof AggregateMessageGroup ) {
239 $aggs[$group->getId()] = $group;
240 } else {
241 $sorted[$group->getId()] = $group;
242 }
243 }
244
245 return array_merge( $sorted, $aggs );
246 }
247
253 private static function getLanguages() {
254 if ( self::$languages === null ) {
255 $languages = array_keys( TranslateUtils::getLanguageNames( 'en' ) );
256 sort( $languages );
257 self::$languages = $languages;
258 }
259
260 return self::$languages;
261 }
262
263 public static function clearLanguage( $code ) {
264 if ( !count( $code ) ) {
265 return;
266 }
267 $dbw = wfGetDB( DB_PRIMARY );
268 $conds = [ 'tgs_lang' => $code ];
269 $dbw->delete( self::TABLE, $conds, __METHOD__ );
270 wfDebugLog( 'messagegroupstats', 'Cleared ' . serialize( $conds ) );
271 }
272
278 public static function clearAll() {
279 $dbw = wfGetDB( DB_PRIMARY );
280 $dbw->delete( self::TABLE, '*', __METHOD__ );
281 wfDebugLog( 'messagegroupstats', 'Cleared everything :(' );
282 }
283
293 protected static function extractResults( $res, array $ids, array $stats = [] ) {
294 // Map the internal ids back to real ids
295 $idmap = array_combine( array_map( 'self::getDatabaseIdForGroupId', $ids ), $ids );
296
297 foreach ( $res as $row ) {
298 if ( !isset( $idmap[$row->tgs_group] ) ) {
299 // Stale entry, ignore for now
300 // TODO: Schedule for purge
301 continue;
302 }
303
304 $realId = $idmap[$row->tgs_group];
305 $stats[$realId][$row->tgs_lang] = self::extractNumbers( $row );
306 }
307
308 return $stats;
309 }
310
316 protected static function extractNumbers( $row ) {
317 return [
318 self::TOTAL => (int)$row->tgs_total,
319 self::TRANSLATED => (int)$row->tgs_translated,
320 self::FUZZY => (int)$row->tgs_fuzzy,
321 self::PROOFREAD => (int)$row->tgs_proofread,
322 ];
323 }
324
331 protected static function forLanguageInternal( $code, array $stats, $flags ) {
332 $groups = MessageGroups::singleton()->getGroups();
333
334 $ids = array_keys( $groups );
335 $res = self::selectRowsIdLang( null, [ $code ], $flags );
336 $stats = self::extractResults( $res, $ids, $stats );
337
338 foreach ( $groups as $id => $group ) {
339 if ( isset( $stats[$id][$code] ) ) {
340 continue;
341 }
342 $stats[$id][$code] = self::forItemInternal( $stats, $group, $code, $flags );
343 }
344
345 return $stats;
346 }
347
352 protected static function expandAggregates( AggregateMessageGroup $agg ) {
353 $flattened = [];
354
356 foreach ( $agg->getGroups() as $group ) {
357 if ( $group instanceof AggregateMessageGroup ) {
358 $flattened += self::expandAggregates( $group );
359 } else {
360 $flattened[$group->getId()] = $group;
361 }
362 }
363
364 return $flattened;
365 }
366
373 protected static function forGroupInternal( MessageGroup $group, array $stats, $flags ) {
374 $id = $group->getId();
375
376 $res = self::selectRowsIdLang( [ $id ], null, $flags );
377 $stats = self::extractResults( $res, [ $id ], $stats );
378
379 // Go over each language filling missing entries
380 $languages = self::getLanguages();
381 foreach ( $languages as $code ) {
382 if ( isset( $stats[$id][$code] ) ) {
383 continue;
384 }
385 $stats[$id][$code] = self::forItemInternal( $stats, $group, $code, $flags );
386 }
387
388 // This is for sorting the values added later in correct order
389 foreach ( array_keys( $stats ) as $key ) {
390 ksort( $stats[$key] );
391 }
392
393 return $stats;
394 }
395
404 protected static function selectRowsIdLang( ?array $ids, ?array $codes, $flags ) {
405 if ( $flags & self::FLAG_NO_CACHE ) {
406 return [];
407 }
408
409 $conds = [];
410 if ( $ids !== null ) {
411 $dbids = array_map( 'self::getDatabaseIdForGroupId', $ids );
412 $conds['tgs_group'] = $dbids;
413 }
414
415 if ( $codes !== null ) {
416 $conds['tgs_lang'] = $codes;
417 }
418
419 $dbr = TranslateUtils::getSafeReadDB();
420 $res = $dbr->select( self::TABLE, '*', $conds, __METHOD__ );
421
422 return $res;
423 }
424
432 protected static function forItemInternal( &$stats, MessageGroup $group, $code, $flags ) {
433 $id = $group->getId();
434
435 if ( $flags & self::FLAG_CACHE_ONLY ) {
436 $stats[$id][$code] = self::getUnknownStats();
437 return $stats[$id][$code];
438 }
439
440 // It may happen that caches are requested repeatedly for a group before we get a chance
441 // to write the values to the database. Check for queued updates first. This has the
442 // benefit of avoiding duplicate rows for inserts. Ideally this would be checked before we
443 // query the database for missing values. This code is somewhat ugly as it needs to
444 // reverse engineer the values from the row format.
445 $databaseGroupId = self::getDatabaseIdForGroupId( $id );
446 $uniqueKey = "$databaseGroupId|$code";
447 $queuedValue = self::$updates[$uniqueKey] ?? null;
448 if ( $queuedValue && !( $flags & self::FLAG_NO_CACHE ) ) {
449 return [
450 self::TOTAL => $queuedValue['tgs_total'],
451 self::TRANSLATED => $queuedValue['tgs_translated'],
452 self::FUZZY => $queuedValue['tgs_fuzzy'],
453 self::PROOFREAD => $queuedValue['tgs_proofread'],
454 ];
455 }
456
457 if ( $group instanceof AggregateMessageGroup ) {
458 $aggregates = self::calculateAggregageGroup( $stats, $group, $code, $flags );
459 } else {
460 $aggregates = self::calculateGroup( $group, $code );
461 }
462 // Cache for use in subsequent forItemInternal calls
463 $stats[$id][$code] = $aggregates;
464
465 // Don't add nulls to the database, causes annoying warnings
466 if ( $aggregates[self::TOTAL] === null ) {
467 return $aggregates;
468 }
469
470 self::$updates[$uniqueKey] = [
471 'tgs_group' => $databaseGroupId,
472 'tgs_lang' => $code,
473 'tgs_total' => $aggregates[self::TOTAL],
474 'tgs_translated' => $aggregates[self::TRANSLATED],
475 'tgs_fuzzy' => $aggregates[self::FUZZY],
476 'tgs_proofread' => $aggregates[self::PROOFREAD],
477 ];
478
479 // For big and lengthy updates, attempt some interim saves. This might not have
480 // any effect, because writes to the database may be deferred.
481 if ( count( self::$updates ) % 100 === 0 ) {
482 self::queueUpdates( $flags );
483 }
484
485 return $aggregates;
486 }
487
488 private static function calculateAggregageGroup( &$stats, $group, $code, $flags ) {
489 $aggregates = self::getEmptyStats();
490
491 $expanded = self::expandAggregates( $group );
492 $subGroupIds = array_keys( $expanded );
493
494 // Performance: if we have per-call cache of stats, do not query them again.
495 foreach ( $subGroupIds as $index => $sid ) {
496 if ( isset( $stats[$sid][$code] ) ) {
497 unset( $subGroupIds[ $index ] );
498 }
499 }
500
501 if ( $subGroupIds !== [] ) {
502 $res = self::selectRowsIdLang( $subGroupIds, [ $code ], $flags );
503 $stats = self::extractResults( $res, $subGroupIds, $stats );
504 }
505
506 foreach ( $expanded as $sid => $subgroup ) {
507 // Discouraged groups may belong to another group, usually if there
508 // is an aggregate group for all translatable pages. In that case
509 // calculate and store the statistics, but don't count them as part of
510 // the aggregate group, so that the numbers in Special:LanguageStats
511 // add up. The statistics for discouraged groups can still be viewed
512 // through Special:MessageGroupStats.
513 if ( !isset( $stats[$sid][$code] ) ) {
514 $stats[$sid][$code] = self::forItemInternal( $stats, $subgroup, $code, $flags );
515 }
516
517 if ( !TranslateMetadata::isExcluded( $sid, $code ) ) {
518 $aggregates = self::multiAdd( $aggregates, $stats[$sid][$code] );
519 }
520 }
521
522 return $aggregates;
523 }
524
525 public static function multiAdd( &$a, $b ) {
526 if ( $a[0] === null || $b[0] === null ) {
527 return array_fill( 0, count( $a ), null );
528 }
529 foreach ( $a as $i => &$v ) {
530 $v += $b[$i];
531 }
532
533 return $a;
534 }
535
541 protected static function calculateGroup( MessageGroup $group, $code ) {
542 global $wgTranslateDocumentationLanguageCode;
543 // Calculate if missing and store in the db
544 $collection = $group->initCollection( $code );
545
546 if (
547 $code === $wgTranslateDocumentationLanguageCode
548 && $group instanceof FileBasedMessageGroup
549 ) {
550 $cache = $group->getMessageGroupCache( $group->getSourceLanguage() );
551 if ( $cache->exists() ) {
552 $template = $cache->getExtra()['TEMPLATE'] ?? [];
553 $infile = [];
554 foreach ( $template as $key => $data ) {
555 if ( isset( $data['comments']['.'] ) ) {
556 $infile[$key] = '1';
557 }
558 }
559 $collection->setInFile( $infile );
560 }
561 }
562
563 $collection->filter( 'ignored' );
564 $collection->filterUntranslatedOptional();
565 // Store the count of real messages for later calculation.
566 $total = count( $collection );
567
568 // Count fuzzy first.
569 $collection->filter( 'fuzzy' );
570 $fuzzy = $total - count( $collection );
571
572 // Count the completed translations.
573 $collection->filter( 'hastranslation', false );
574 $translated = count( $collection );
575
576 // Count how many of the completed translations
577 // have been proofread
578 $collection->filter( 'reviewer', false );
579 $proofread = count( $collection );
580
581 return [
582 self::TOTAL => $total,
583 self::TRANSLATED => $translated,
584 self::FUZZY => $fuzzy,
585 self::PROOFREAD => $proofread,
586 ];
587 }
588
589 protected static function queueUpdates( $flags ) {
590 if ( MediaWikiServices::getInstance()->getReadOnlyMode()->isReadOnly() ) {
591 return;
592 }
593
594 if ( self::$updates === [] ) {
595 return;
596 }
597
598 $lb = MediaWikiServices::getInstance()->getDBLoadBalancer();
599 $dbw = $lb->getConnectionRef( DB_PRIMARY ); // avoid connecting yet
600 $table = self::TABLE;
601 $callers = wfGetAllCallers( 50 );
602
603 $updateOp = self::withLock(
604 $dbw,
605 'updates',
606 __METHOD__,
607 static function ( IDatabase $dbw, $method ) use ( $table, $callers ) {
608 // Maybe another deferred update already processed these
609 if ( self::$updates === [] ) {
610 return;
611 }
612
613 // This path should only be hit during web requests
614 if ( count( self::$updates ) > 100 ) {
615 $groups = array_unique( array_column( self::$updates, 'tgs_group' ) );
616 LoggerFactory::getInstance( 'Translate' )->warning(
617 "Huge translation update of {count} rows for group(s) {groups}",
618 [
619 'count' => count( self::$updates ),
620 'groups' => implode( ', ', $groups ),
621 'callers' => $callers,
622 ]
623 );
624 }
625
626 $primaryKey = [ 'tgs_group', 'tgs_lang' ];
627 $dbw->replace( $table, [ $primaryKey ], array_values( self::$updates ), $method );
628 self::$updates = [];
629 }
630 );
631
632 if ( $flags & self::FLAG_IMMEDIATE_WRITES ) {
633 call_user_func( $updateOp );
634 } else {
635 DeferredUpdates::addCallableUpdate( $updateOp );
636 }
637 }
638
639 protected static function withLock( IDatabase $dbw, $key, $method, $callback ) {
640 $fname = __METHOD__;
641 return static function () use ( $dbw, $key, $method, $callback, $fname ) {
642 $lockName = 'MessageGroupStats:' . $key;
643 if ( !$dbw->lock( $lockName, $fname, 1 ) ) {
644 return; // raced out
645 }
646
647 $dbw->commit( $fname, 'flush' );
648 call_user_func( $callback, $dbw, $method );
649 $dbw->commit( $fname, 'flush' );
650
651 $dbw->unlock( $lockName, $fname );
652 };
653 }
654
655 public static function getDatabaseIdForGroupId( $id ) {
656 // The column is 100 bytes long, but we don't need to use it all
657 if ( strlen( $id ) <= 72 ) {
658 return $id;
659 }
660
661 $hash = hash( 'sha256', $id, /*asHex*/false );
662 $dbid = substr( $id, 0, 50 ) . '||' . substr( $hash, 0, 20 );
663 return $dbid;
664 }
665}
Groups multiple message groups together as one group.
getGroups()
Returns a list of message groups that this group consists of.
This class implements default behavior for file based message groups.
This class abstract MessageGroup statistics calculation and storing.
const PROOFREAD
Array index.
static clearGroup( $id, int $flags=0)
Recalculate stats for given group(s).
static forItemInternal(&$stats, MessageGroup $group, $code, $flags)
const FLAG_CACHE_ONLY
If stats are not cached, do not attempt to calculate them on the fly.
static extractResults( $res, array $ids, array $stats=[])
Use this to extract results returned from selectRowsIdLang.
static forLanguage( $code, $flags=0)
Returns stats for all groups in given language.
static forGroupInternal(MessageGroup $group, array $stats, $flags)
static extractNumbers( $row)
Returns an array of needed database fields.
static expandAggregates(AggregateMessageGroup $agg)
static clearAll()
Purges all cached stats.
static clear(MessageHandle $handle)
Recalculate stats for all groups associated with the message.
const TOTAL
Array index.
const FLAG_IMMEDIATE_WRITES
Do not defer updates. Meant for jobs like MessageGroupStatsRebuildJob.
const FLAG_NO_CACHE
Ignore cached values. Useful for updating stale values.
const FUZZY
Array index.
static getUnknownStats()
Returns empty stats array that indicates stats are incomplete or unknown.
static forLanguageInternal( $code, array $stats, $flags)
const TRANSLATED
Array index.
static calculateGroup(MessageGroup $group, $code)
static getEmptyStats()
Returns empty stats array.
static selectRowsIdLang(?array $ids, ?array $codes, $flags)
Fetch rows from the database.
static forItem( $id, $code, $flags=0)
Returns stats for given group in given language.
static forEverything( $flags=0)
Returns stats for all group in all languages.
static forGroup( $id, $flags=0)
Returns stats for all languages in given group.
static isDynamic(MessageGroup $group)
Class for pointing to messages, like Title class is for titles.
getGroupIds()
Returns all message group ids this message belongs to.
Interface for message groups.
initCollection( $code)
Initialises a message collection with the given language code, message definitions and message tags.
getSourceLanguage()
Returns language code depicting the language of source text.
getId()
Returns the unique identifier for this group.