Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
38.54% |
74 / 192 |
|
23.53% |
4 / 17 |
CRAP | |
0.00% |
0 / 1 |
MessageIndex | |
38.54% |
74 / 192 |
|
23.53% |
4 / 17 |
537.20 | |
0.00% |
0 / 1 |
__construct | |
100.00% |
10 / 10 |
|
100.00% |
1 / 1 |
1 | |||
normaliseKey | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
getGroupIds | |
90.91% |
10 / 11 |
|
0.00% |
0 / 1 |
3.01 | |||
getGroupIdsForDatabaseTitle | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
2 | |||
getPrimaryGroupId | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
6 | |||
getWithCache | |
87.50% |
7 / 8 |
|
0.00% |
0 / 1 |
3.02 | |||
get | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
getKeys | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
lock | |
0.00% |
0 / 5 |
|
0.00% |
0 / 1 |
6 | |||
unlock | |
0.00% |
0 / 7 |
|
0.00% |
0 / 1 |
6 | |||
rebuild | |
0.00% |
0 / 66 |
|
0.00% |
0 / 1 |
90 | |||
getStatusCacheKey | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
getInterimCache | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
storeInterim | |
78.26% |
18 / 23 |
|
0.00% |
0 / 1 |
3.09 | |||
getArrayDiff | |
100.00% |
26 / 26 |
|
100.00% |
1 / 1 |
7 | |||
clearMessageGroupStats | |
0.00% |
0 / 10 |
|
0.00% |
0 / 1 |
12 | |||
checkAndAdd | |
0.00% |
0 / 16 |
|
0.00% |
0 / 1 |
30 |
1 | <?php |
2 | declare( strict_types = 1 ); |
3 | |
4 | namespace MediaWiki\Extension\Translate\MessageLoading; |
5 | |
6 | use Exception; |
7 | use JobQueueGroup; |
8 | use MapCacheLRU; |
9 | use MediaWiki\Config\ServiceOptions; |
10 | use MediaWiki\Extension\Translate\HookRunner; |
11 | use MediaWiki\Extension\Translate\MessageGroupProcessing\MessageGroups; |
12 | use MediaWiki\Extension\Translate\Statistics\RebuildMessageGroupStatsJob; |
13 | use MediaWiki\Title\Title; |
14 | use MessageGroup; |
15 | use Psr\Log\LoggerInterface; |
16 | use Wikimedia\ObjectCache\BagOStuff; |
17 | use Wikimedia\ObjectCache\WANObjectCache; |
18 | use Wikimedia\Rdbms\IConnectionProvider; |
19 | |
20 | /** |
21 | * Creates a database of keys in all groups, so that namespace and key can be |
22 | * used to get the groups they belong to. This is used as a fallback when |
23 | * loadgroup parameter is not provided in the request, which happens if someone |
24 | * reaches a messages from somewhere else than Special:Translate. Also used |
25 | * by Special:TranslationStats and alike which need to map lots of titles |
26 | * to message groups. |
27 | * |
28 | * @author Niklas Laxstrom |
29 | * @copyright Copyright © 2008-2013, Niklas Laxström |
30 | * @license GPL-2.0-or-later |
31 | */ |
32 | class MessageIndex { |
33 | // TODO: Use dependency injection |
34 | private const CACHE_KEY = 'Translate-MessageIndex-interim'; |
35 | private const READ_LATEST = true; |
36 | private MessageIndexStore $messageIndexStore; |
37 | private MapCacheLRU $keysCache; |
38 | protected BagOStuff $interimCache; |
39 | private WANObjectCache $statusCache; |
40 | private JobQueueGroup $jobQueueGroup; |
41 | private HookRunner $hookRunner; |
42 | private LoggerInterface $logger; |
43 | private IConnectionProvider $dbProvider; |
44 | private array $translateMessageNamespaces; |
45 | public const SERVICE_OPTIONS = [ |
46 | 'TranslateMessageNamespaces' |
47 | ]; |
48 | |
49 | public function __construct( |
50 | MessageIndexStore $store, |
51 | WANObjectCache $statusCache, |
52 | JobQueueGroup $jobQueueGroup, |
53 | HookRunner $hookRunner, |
54 | LoggerInterface $logger, |
55 | BagOStuff $interimCache, |
56 | IConnectionProvider $dbProvider, |
57 | ServiceOptions $options |
58 | ) { |
59 | $this->messageIndexStore = $store; |
60 | $this->keysCache = new MapCacheLRU( 30 ); |
61 | $this->statusCache = $statusCache; |
62 | $this->jobQueueGroup = $jobQueueGroup; |
63 | $this->hookRunner = $hookRunner; |
64 | $this->logger = $logger; |
65 | $this->interimCache = $interimCache; |
66 | $this->dbProvider = $dbProvider; |
67 | $options->assertRequiredOptions( self::SERVICE_OPTIONS ); |
68 | $this->translateMessageNamespaces = $options->get( 'TranslateMessageNamespaces' ); |
69 | } |
70 | |
71 | /** Converts page name and namespace to message index format. */ |
72 | private function normaliseKey( int $namespace, string $key ): string { |
73 | $key = lcfirst( $key ); |
74 | |
75 | return strtr( "$namespace:$key", ' ', '_' ); |
76 | } |
77 | |
78 | /** |
79 | * Retrieves a list of groups given MessageHandle belongs to. |
80 | * @return string[] |
81 | */ |
82 | public function getGroupIds( MessageHandle $handle ): array { |
83 | $title = $handle->getTitle(); |
84 | |
85 | if ( !$title->inNamespaces( $this->translateMessageNamespaces ) ) { |
86 | return []; |
87 | } |
88 | |
89 | $namespace = $title->getNamespace(); |
90 | $key = $handle->getKey(); |
91 | $normalisedKey = $this->normaliseKey( $namespace, $key ); |
92 | |
93 | $value = $this->keysCache->get( $normalisedKey ); |
94 | if ( $value === null ) { |
95 | $value = (array)$this->getWithCache( $normalisedKey ); |
96 | $this->keysCache->set( $normalisedKey, $value ); |
97 | } |
98 | |
99 | return $value; |
100 | } |
101 | |
102 | /** |
103 | * Fast-path to retrieve groups for database titles. |
104 | * |
105 | * Performance is critical for stats that need to check groups for many rows. |
106 | * Do not include the language code subpage! |
107 | * @return string[] |
108 | */ |
109 | public function getGroupIdsForDatabaseTitle( int $namespace, string $title ): array { |
110 | $normalisedKey = $this->normaliseKey( $namespace, $title ); |
111 | |
112 | // Optimization 1: skip LRU cache assuming that hit rate is very low for this use case |
113 | // Optimization 2: skip interim cache as not essential |
114 | |
115 | return (array)$this->get( $normalisedKey ) ?? []; |
116 | } |
117 | |
118 | public function getPrimaryGroupId( MessageHandle $handle ): ?string { |
119 | $groups = $this->getGroupIds( $handle ); |
120 | |
121 | return count( $groups ) ? array_shift( $groups ) : null; |
122 | } |
123 | |
124 | /** @return string|array|null */ |
125 | private function getWithCache( string $key ) { |
126 | $interimCacheValue = $this->getInterimCache()->get( self::CACHE_KEY ); |
127 | if ( $interimCacheValue && isset( $interimCacheValue['newKeys'][$key] ) ) { |
128 | $this->logger->debug( |
129 | '[MessageIndex] interim cache hit: {messageKey} with value {groupId}', |
130 | [ 'messageKey' => $key, 'groupId' => $interimCacheValue['newKeys'][$key] ] |
131 | ); |
132 | return $interimCacheValue['newKeys'][$key]; |
133 | } |
134 | |
135 | return $this->messageIndexStore->get( $key ); |
136 | } |
137 | |
138 | public function get( string $key ) { |
139 | return $this->messageIndexStore->get( $key ); |
140 | } |
141 | |
142 | /** @return string[] */ |
143 | public function getKeys(): array { |
144 | return $this->messageIndexStore->getKeys(); |
145 | } |
146 | |
147 | private function lock(): bool { |
148 | $dbw = $this->dbProvider->getPrimaryDatabase(); |
149 | |
150 | // Any transaction should be flushed after getting the lock to avoid |
151 | // stale pre-lock REPEATABLE-READ snapshot data. |
152 | $ok = $dbw->lock( 'translate-messageindex', __METHOD__, 5 ); |
153 | if ( $ok ) { |
154 | $dbw->commit( __METHOD__, 'flush' ); |
155 | } |
156 | |
157 | return $ok; |
158 | } |
159 | |
160 | private function unlock(): void { |
161 | $fname = __METHOD__; |
162 | $dbw = $this->dbProvider->getPrimaryDatabase(); |
163 | // Unlock once the rows are actually unlocked to avoid deadlocks |
164 | if ( !$dbw->trxLevel() ) { |
165 | $dbw->unlock( 'translate-messageindex', $fname ); |
166 | } else { |
167 | $dbw->onTransactionResolution( static function () use ( $dbw, $fname ) { |
168 | $dbw->unlock( 'translate-messageindex', $fname ); |
169 | }, $fname ); |
170 | } |
171 | } |
172 | |
173 | /** |
174 | * Creates the index from scratch. |
175 | * |
176 | * @param float|null $timestamp Purge interim caches older than this timestamp. |
177 | * @throws Exception |
178 | */ |
179 | public function rebuild( ?float $timestamp = null ): array { |
180 | static $recursion = 0; |
181 | |
182 | if ( $recursion > 0 ) { |
183 | $msg = __METHOD__ . ': trying to recurse - building the index first time?'; |
184 | wfWarn( $msg ); |
185 | |
186 | $recursion--; |
187 | return []; |
188 | } |
189 | $recursion++; |
190 | |
191 | $this->logger->info( '[MessageIndex] Started rebuild.' ); |
192 | |
193 | $tsStart = microtime( true ); |
194 | if ( !$this->lock() ) { |
195 | throw new MessageIndexException( __CLASS__ . ': unable to acquire lock' ); |
196 | } |
197 | |
198 | $lockWaitDuration = microtime( true ) - $tsStart; |
199 | $this->logger->info( |
200 | '[MessageIndex] Got lock in {duration}', |
201 | [ 'duration' => $lockWaitDuration ] |
202 | ); |
203 | |
204 | $groups = MessageGroups::singleton()->getGroups(); |
205 | $this->keysCache->clear(); |
206 | |
207 | $new = []; |
208 | $old = $this->messageIndexStore->retrieve( self::READ_LATEST ); |
209 | $postponed = []; |
210 | |
211 | foreach ( $groups as $messageGroup ) { |
212 | if ( !$messageGroup->exists() ) { |
213 | $id = $messageGroup->getId(); |
214 | wfWarn( __METHOD__ . ": group '$id' is registered but does not exist" ); |
215 | continue; |
216 | } |
217 | |
218 | # Skip meta thingies |
219 | if ( $messageGroup->isMeta() ) { |
220 | $postponed[] = $messageGroup; |
221 | continue; |
222 | } |
223 | |
224 | $this->checkAndAdd( $new, $messageGroup ); |
225 | } |
226 | |
227 | foreach ( $postponed as $messageGroup ) { |
228 | $this->checkAndAdd( $new, $messageGroup, true ); |
229 | } |
230 | |
231 | $diff = self::getArrayDiff( $old, $new ); |
232 | $this->messageIndexStore->store( $new, $diff['keys'] ); |
233 | |
234 | $cache = $this->getInterimCache(); |
235 | $interimCacheValue = $cache->get( self::CACHE_KEY ); |
236 | if ( $interimCacheValue ) { |
237 | $timestamp ??= microtime( true ); |
238 | if ( $interimCacheValue['timestamp'] <= $timestamp ) { |
239 | $cache->delete( self::CACHE_KEY ); |
240 | $this->logger->debug( |
241 | '[MessageIndex] Deleted interim cache with timestamp {cacheTimestamp} <= {currentTimestamp}.', |
242 | [ |
243 | 'cacheTimestamp' => $interimCacheValue['timestamp'], |
244 | 'currentTimestamp' => $timestamp, |
245 | ] |
246 | ); |
247 | } else { |
248 | // Cache has a later timestamp. This may be caused due to |
249 | // job deduplication. Just in case, spin off a new job to clean up the cache. |
250 | $job = RebuildMessageIndexJob::newJob( __METHOD__ ); |
251 | $this->jobQueueGroup->push( $job ); |
252 | $this->logger->debug( |
253 | '[MessageIndex] Kept interim cache with timestamp {cacheTimestamp} > {currentTimestamp}.', |
254 | [ |
255 | 'cacheTimestamp' => $interimCacheValue['timestamp'], |
256 | 'currentTimestamp' => $timestamp, |
257 | ] |
258 | ); |
259 | } |
260 | } |
261 | |
262 | $this->unlock(); |
263 | $criticalSectionDuration = microtime( true ) - $tsStart - $lockWaitDuration; |
264 | $this->logger->info( |
265 | '[MessageIndex] Finished critical section in {duration}', |
266 | [ 'duration' => $criticalSectionDuration ] |
267 | ); |
268 | |
269 | // Other caches can check this key to know when they need to refresh |
270 | $this->statusCache->touchCheckKey( $this->getStatusCacheKey() ); |
271 | |
272 | $this->clearMessageGroupStats( $diff ); |
273 | |
274 | $recursion--; |
275 | |
276 | return $new; |
277 | } |
278 | |
279 | public function getStatusCacheKey(): string { |
280 | return $this->statusCache->makeKey( 'Translate', 'MessageIndex', 'status' ); |
281 | } |
282 | |
283 | private function getInterimCache(): BagOStuff { |
284 | return $this->interimCache; |
285 | } |
286 | |
287 | public function storeInterim( MessageGroup $group, array $newKeys ): void { |
288 | $namespace = $group->getNamespace(); |
289 | $id = $group->getId(); |
290 | |
291 | $normalizedNewKeys = []; |
292 | foreach ( $newKeys as $key ) { |
293 | $normalizedNewKeys[$this->normaliseKey( $namespace, $key )] = $id; |
294 | } |
295 | |
296 | $cache = $this->getInterimCache(); |
297 | // Merge with existing keys (if present) |
298 | $interimCacheValue = $cache->get( self::CACHE_KEY, $cache::READ_LATEST ); |
299 | if ( $interimCacheValue ) { |
300 | $normalizedNewKeys = array_merge( $interimCacheValue['newKeys'], $normalizedNewKeys ); |
301 | $this->logger->debug( |
302 | '[MessageIndex] interim cache: merging with existing cache of size {count}', |
303 | [ 'count' => count( $interimCacheValue['newKeys'] ) ] |
304 | ); |
305 | } |
306 | |
307 | $value = [ |
308 | 'timestamp' => microtime( true ), |
309 | 'newKeys' => $normalizedNewKeys, |
310 | ]; |
311 | |
312 | $cache->set( self::CACHE_KEY, $value, $cache::TTL_DAY ); |
313 | $this->logger->debug( |
314 | '[MessageIndex] interim cache: added group {groupId} with new size {count} keys and ' . |
315 | 'timestamp {cacheTimestamp}', |
316 | [ 'groupId' => $id, 'count' => count( $normalizedNewKeys ), 'cacheTimestamp' => $value['timestamp'] ] |
317 | ); |
318 | } |
319 | |
320 | /** |
321 | * Compares two associative arrays. |
322 | * |
323 | * Values must be a string or list of strings. Returns an array of added, |
324 | * deleted and modified keys as well as value changes (you can think values |
325 | * as categories and keys as pages). Each of the keys ('add', 'del', 'mod' |
326 | * respectively) maps to an array whose keys are the changed keys of the |
327 | * original arrays and values are lists where first element contains the |
328 | * old value and the second element the new value. |
329 | * |
330 | * @code |
331 | * $a = [ 'a' => '1', 'b' => '2', 'c' => '3' ]; |
332 | * $b = [ 'b' => '2', 'c' => [ '3', '2' ], 'd' => '4' ]; |
333 | * |
334 | * self::getArrayDiff( $a, $b ) === [ |
335 | * 'keys' => [ |
336 | * 'add' => [ 'd' => [ [], [ '4' ] ] ], |
337 | * 'del' => [ 'a' => [ [ '1' ], [] ] ], |
338 | * 'mod' => [ 'c' => [ [ '3' ], [ '3', '2' ] ] ], |
339 | * ], |
340 | * 'values' => [ 2, 4, 1 ] |
341 | * ]; |
342 | * @endcode |
343 | * |
344 | * @param array $old |
345 | * @param array $new |
346 | * @return array |
347 | */ |
348 | public function getArrayDiff( array $old, array $new ): array { |
349 | $values = []; |
350 | $record = static function ( $groups ) use ( &$values ) { |
351 | foreach ( $groups as $group ) { |
352 | $values[$group] = true; |
353 | } |
354 | }; |
355 | |
356 | $keys = [ |
357 | 'add' => [], |
358 | 'del' => [], |
359 | 'mod' => [], |
360 | ]; |
361 | |
362 | foreach ( $new as $key => $groups ) { |
363 | if ( !isset( $old[$key] ) ) { |
364 | $keys['add'][$key] = [ [], (array)$groups ]; |
365 | $record( (array)$groups ); |
366 | // Using != here on purpose to ignore the order of items |
367 | } elseif ( $groups != $old[$key] ) { |
368 | $keys['mod'][$key] = [ (array)$old[$key], (array)$groups ]; |
369 | $record( array_diff( (array)$old[$key], (array)$groups ) ); |
370 | $record( array_diff( (array)$groups, (array)$old[$key] ) ); |
371 | } |
372 | } |
373 | |
374 | foreach ( $old as $key => $groups ) { |
375 | if ( !isset( $new[$key] ) ) { |
376 | $keys['del'][$key] = [ (array)$groups, [] ]; |
377 | $record( (array)$groups ); |
378 | } |
379 | // We already checked for diffs above |
380 | } |
381 | |
382 | return [ |
383 | 'keys' => $keys, |
384 | 'values' => array_keys( $values ), |
385 | ]; |
386 | } |
387 | |
388 | /** Purge stuff when set of keys have changed. */ |
389 | protected function clearMessageGroupStats( array $diff ): void { |
390 | $job = RebuildMessageGroupStatsJob::newRefreshGroupsJob( $diff['values'] ); |
391 | $this->jobQueueGroup->push( $job ); |
392 | |
393 | foreach ( $diff['keys'] as $keys ) { |
394 | foreach ( $keys as $key => $data ) { |
395 | [ $ns, $pageName ] = explode( ':', $key, 2 ); |
396 | $title = Title::makeTitle( (int)$ns, $pageName ); |
397 | $handle = new MessageHandle( $title ); |
398 | [ $oldGroups, $newGroups ] = $data; |
399 | $this->hookRunner->onTranslateEventMessageMembershipChange( |
400 | $handle, $oldGroups, $newGroups ); |
401 | } |
402 | } |
403 | } |
404 | |
405 | protected function checkAndAdd( array &$hugeArray, MessageGroup $g, bool $ignore = false ): void { |
406 | $keys = $g->getKeys(); |
407 | $id = $g->getId(); |
408 | $namespace = $g->getNamespace(); |
409 | |
410 | foreach ( $keys as $key ) { |
411 | # Force all keys to lower case, because the case doesn't matter and it is |
412 | # easier to do comparing when the case of first letter is unknown, because |
413 | # mediawiki forces it to upper case |
414 | $key = $this->normaliseKey( $namespace, $key ); |
415 | if ( isset( $hugeArray[$key] ) ) { |
416 | if ( !$ignore ) { |
417 | $to = implode( ', ', (array)$hugeArray[$key] ); |
418 | wfWarn( "Key $key already belongs to $to, conflict with $id" ); |
419 | } |
420 | |
421 | if ( is_array( $hugeArray[$key] ) ) { |
422 | // Hard work is already done, just add a new reference |
423 | $hugeArray[$key][] = & $id; |
424 | } else { |
425 | // Store the actual reference, then remove it from array, to not |
426 | // replace the references value, but to store an array of new |
427 | // references instead. References are hard! |
428 | $value = & $hugeArray[$key]; |
429 | unset( $hugeArray[$key] ); |
430 | $hugeArray[$key] = [ &$value, &$id ]; |
431 | } |
432 | } else { |
433 | $hugeArray[$key] = & $id; |
434 | } |
435 | } |
436 | unset( $id ); // Disconnect the previous references to this $id |
437 | } |
438 | } |