Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
42.55% |
80 / 188 |
|
33.33% |
6 / 18 |
CRAP | |
0.00% |
0 / 1 |
MessageIndex | |
42.55% |
80 / 188 |
|
33.33% |
6 / 18 |
484.80 | |
0.00% |
0 / 1 |
__construct | |
100.00% |
10 / 10 |
|
100.00% |
1 / 1 |
1 | |||
getGroupIds | |
91.67% |
11 / 12 |
|
0.00% |
0 / 1 |
3.01 | |||
getCache | |
66.67% |
2 / 3 |
|
0.00% |
0 / 1 |
2.15 | |||
getPrimaryGroupId | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
6 | |||
getWithCache | |
87.50% |
7 / 8 |
|
0.00% |
0 / 1 |
3.02 | |||
get | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
retrieve | n/a |
0 / 0 |
n/a |
0 / 0 |
0 | |||||
getKeys | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
store | n/a |
0 / 0 |
n/a |
0 / 0 |
0 | |||||
lock | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
unlock | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
rebuild | |
0.00% |
0 / 67 |
|
0.00% |
0 / 1 |
90 | |||
getStatusCacheKey | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
getInterimCache | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
storeInterim | |
78.26% |
18 / 23 |
|
0.00% |
0 / 1 |
3.09 | |||
getArrayDiff | |
100.00% |
26 / 26 |
|
100.00% |
1 / 1 |
7 | |||
clearMessageGroupStats | |
0.00% |
0 / 11 |
|
0.00% |
0 / 1 |
12 | |||
checkAndAdd | |
0.00% |
0 / 16 |
|
0.00% |
0 / 1 |
30 | |||
serialize | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
2 | |||
unserialize | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
2 |
1 | <?php |
2 | declare( strict_types = 1 ); |
3 | |
4 | namespace MediaWiki\Extension\Translate\MessageLoading; |
5 | |
6 | use BagOStuff; |
7 | use Exception; |
8 | use JobQueueGroup; |
9 | use MapCacheLRU; |
10 | use MediaWiki\Extension\Translate\HookRunner; |
11 | use MediaWiki\Extension\Translate\MessageGroupProcessing\MessageGroups; |
12 | use MediaWiki\Extension\Translate\MessageGroupProcessing\MessageGroupSubscription; |
13 | use MediaWiki\Extension\Translate\Services; |
14 | use MediaWiki\Extension\Translate\Statistics\RebuildMessageGroupStatsJob; |
15 | use MediaWiki\Extension\Translate\Utilities\Utilities; |
16 | use MediaWiki\Logger\LoggerFactory; |
17 | use MediaWiki\MediaWikiServices; |
18 | use MediaWiki\Title\Title; |
19 | use MessageGroup; |
20 | use MessageIndexRebuildJob; |
21 | use ObjectCache; |
22 | use Psr\Log\LoggerInterface; |
23 | use WANObjectCache; |
24 | |
25 | /** |
26 | * Creates a database of keys in all groups, so that namespace and key can be |
27 | * used to get the groups they belong to. This is used as a fallback when |
28 | * loadgroup parameter is not provided in the request, which happens if someone |
29 | * reaches a messages from somewhere else than Special:Translate. Also used |
30 | * by Special:TranslationStats and alike which need to map lots of titles |
31 | * to message groups. |
32 | * |
33 | * @author Niklas Laxstrom |
34 | * @copyright Copyright © 2008-2013, Niklas Laxström |
35 | * @license GPL-2.0-or-later |
36 | */ |
37 | abstract class MessageIndex { |
38 | // TODO: Use dependency injection |
39 | private const CACHE_KEY = 'Translate-MessageIndex-interim'; |
40 | private const READ_LATEST = true; |
41 | private static ?MapCacheLRU $keysCache = null; |
42 | protected BagOStuff $interimCache; |
43 | private WANObjectCache $statusCache; |
44 | private JobQueueGroup $jobQueueGroup; |
45 | private HookRunner $hookRunner; |
46 | private LoggerInterface $logger; |
47 | private MessageGroupSubscription $messageGroupSubscription; |
48 | private array $translateMessageNamespaces; |
49 | |
50 | public function __construct() { |
51 | $mwInstance = MediaWikiServices::getInstance(); |
52 | $this->statusCache = $mwInstance->getMainWANObjectCache(); |
53 | $this->jobQueueGroup = $mwInstance->getJobQueueGroup(); |
54 | $this->translateMessageNamespaces = $mwInstance |
55 | ->getMainConfig() |
56 | ->get( 'TranslateMessageNamespaces' ); |
57 | $this->hookRunner = Services::getInstance()->getHookRunner(); |
58 | $this->logger = LoggerFactory::getInstance( 'Translate' ); |
59 | $this->interimCache = ObjectCache::getInstance( CACHE_ANYTHING ); |
60 | $this->messageGroupSubscription = Services::getInstance()->getMessageGroupSubscription(); |
61 | } |
62 | |
63 | /** |
64 | * Retrieves a list of groups given MessageHandle belongs to. |
65 | * @return string[] |
66 | */ |
67 | public function getGroupIds( MessageHandle $handle ): array { |
68 | $title = $handle->getTitle(); |
69 | |
70 | if ( !$title->inNamespaces( $this->translateMessageNamespaces ) ) { |
71 | return []; |
72 | } |
73 | |
74 | $namespace = $title->getNamespace(); |
75 | $key = $handle->getKey(); |
76 | $normalisedKey = Utilities::normaliseKey( $namespace, $key ); |
77 | |
78 | $cache = $this->getCache(); |
79 | $value = $cache->get( $normalisedKey ); |
80 | if ( $value === null ) { |
81 | $value = (array)$this->getWithCache( $normalisedKey ); |
82 | $cache->set( $normalisedKey, $value ); |
83 | } |
84 | |
85 | return $value; |
86 | } |
87 | |
88 | private function getCache(): MapCacheLRU { |
89 | if ( self::$keysCache === null ) { |
90 | self::$keysCache = new MapCacheLRU( 30 ); |
91 | } |
92 | return self::$keysCache; |
93 | } |
94 | |
95 | public function getPrimaryGroupId( MessageHandle $handle ): ?string { |
96 | $groups = $this->getGroupIds( $handle ); |
97 | |
98 | return count( $groups ) ? array_shift( $groups ) : null; |
99 | } |
100 | |
101 | /** @return string|array|null */ |
102 | private function getWithCache( string $key ) { |
103 | $interimCacheValue = $this->getInterimCache()->get( self::CACHE_KEY ); |
104 | if ( $interimCacheValue && isset( $interimCacheValue['newKeys'][$key] ) ) { |
105 | $this->logger->debug( |
106 | '[MessageIndex] interim cache hit: {messageKey} with value {groupId}', |
107 | [ 'messageKey' => $key, 'groupId' => $interimCacheValue['newKeys'][$key] ] |
108 | ); |
109 | return $interimCacheValue['newKeys'][$key]; |
110 | } |
111 | |
112 | return $this->get( $key ); |
113 | } |
114 | |
115 | /** |
116 | * Looks up the stored value for single key. Only for testing. |
117 | * @param string $key |
118 | * @return string|array|null |
119 | */ |
120 | protected function get( string $key ) { |
121 | // Default implementation |
122 | $mi = $this->retrieve(); |
123 | return $mi[$key] ?? null; |
124 | } |
125 | |
126 | abstract public function retrieve( bool $readLatest = false ): array; |
127 | |
128 | /** @return string[] */ |
129 | public function getKeys(): array { |
130 | return array_keys( $this->retrieve() ); |
131 | } |
132 | |
133 | abstract protected function store( array $array, array $diff ); |
134 | |
135 | protected function lock(): bool { |
136 | return true; |
137 | } |
138 | |
139 | protected function unlock(): bool { |
140 | return true; |
141 | } |
142 | |
143 | /** |
144 | * Creates the index from scratch. |
145 | * |
146 | * @param float|null $timestamp Purge interim caches older than this timestamp. |
147 | * @throws Exception |
148 | */ |
149 | public function rebuild( float $timestamp = null ): array { |
150 | static $recursion = 0; |
151 | |
152 | if ( $recursion > 0 ) { |
153 | $msg = __METHOD__ . ': trying to recurse - building the index first time?'; |
154 | wfWarn( $msg ); |
155 | |
156 | $recursion--; |
157 | return []; |
158 | } |
159 | $recursion++; |
160 | |
161 | $this->logger->info( '[MessageIndex] Started rebuild.' ); |
162 | |
163 | $tsStart = microtime( true ); |
164 | if ( !$this->lock() ) { |
165 | throw new MessageIndexException( __CLASS__ . ': unable to acquire lock' ); |
166 | } |
167 | |
168 | $lockWaitDuration = microtime( true ) - $tsStart; |
169 | $this->logger->info( |
170 | '[MessageIndex] Got lock in {duration}', |
171 | [ 'duration' => $lockWaitDuration ] |
172 | ); |
173 | |
174 | $groups = MessageGroups::singleton()->getGroups(); |
175 | self::getCache()->clear(); |
176 | |
177 | $new = []; |
178 | $old = $this->retrieve( self::READ_LATEST ); |
179 | $postponed = []; |
180 | |
181 | foreach ( $groups as $messageGroup ) { |
182 | if ( !$messageGroup->exists() ) { |
183 | $id = $messageGroup->getId(); |
184 | wfWarn( __METHOD__ . ": group '$id' is registered but does not exist" ); |
185 | continue; |
186 | } |
187 | |
188 | # Skip meta thingies |
189 | if ( $messageGroup->isMeta() ) { |
190 | $postponed[] = $messageGroup; |
191 | continue; |
192 | } |
193 | |
194 | $this->checkAndAdd( $new, $messageGroup ); |
195 | } |
196 | |
197 | foreach ( $postponed as $messageGroup ) { |
198 | $this->checkAndAdd( $new, $messageGroup, true ); |
199 | } |
200 | |
201 | $diff = self::getArrayDiff( $old, $new ); |
202 | $this->store( $new, $diff['keys'] ); |
203 | |
204 | $cache = $this->getInterimCache(); |
205 | $interimCacheValue = $cache->get( self::CACHE_KEY ); |
206 | if ( $interimCacheValue ) { |
207 | $timestamp ??= microtime( true ); |
208 | if ( $interimCacheValue['timestamp'] <= $timestamp ) { |
209 | $cache->delete( self::CACHE_KEY ); |
210 | $this->logger->debug( |
211 | '[MessageIndex] Deleted interim cache with timestamp {cacheTimestamp} <= {currentTimestamp}.', |
212 | [ |
213 | 'cacheTimestamp' => $interimCacheValue['timestamp'], |
214 | 'currentTimestamp' => $timestamp, |
215 | ] |
216 | ); |
217 | } else { |
218 | // Cache has a later timestamp. This may be caused due to |
219 | // job deduplication. Just in case, spin off a new job to clean up the cache. |
220 | $job = MessageIndexRebuildJob::newJob( __METHOD__ ); |
221 | $this->jobQueueGroup->push( $job ); |
222 | $this->logger->debug( |
223 | '[MessageIndex] Kept interim cache with timestamp {cacheTimestamp} > {currentTimestamp}.', |
224 | [ |
225 | 'cacheTimestamp' => $interimCacheValue['timestamp'], |
226 | 'currentTimestamp' => $timestamp, |
227 | ] |
228 | ); |
229 | } |
230 | } |
231 | |
232 | $this->unlock(); |
233 | $criticalSectionDuration = microtime( true ) - $tsStart - $lockWaitDuration; |
234 | $this->logger->info( |
235 | '[MessageIndex] Finished critical section in {duration}', |
236 | [ 'duration' => $criticalSectionDuration ] |
237 | ); |
238 | |
239 | // Other caches can check this key to know when they need to refresh |
240 | $this->statusCache->touchCheckKey( $this->getStatusCacheKey() ); |
241 | |
242 | $this->clearMessageGroupStats( $diff ); |
243 | $this->messageGroupSubscription->queueNotificationJob(); |
244 | |
245 | $recursion--; |
246 | |
247 | return $new; |
248 | } |
249 | |
250 | public function getStatusCacheKey(): string { |
251 | return $this->statusCache->makeKey( 'Translate', 'MessageIndex', 'status' ); |
252 | } |
253 | |
254 | private function getInterimCache(): BagOStuff { |
255 | return $this->interimCache; |
256 | } |
257 | |
258 | public function storeInterim( MessageGroup $group, array $newKeys ): void { |
259 | $namespace = $group->getNamespace(); |
260 | $id = $group->getId(); |
261 | |
262 | $normalizedNewKeys = []; |
263 | foreach ( $newKeys as $key ) { |
264 | $normalizedNewKeys[Utilities::normaliseKey( $namespace, $key )] = $id; |
265 | } |
266 | |
267 | $cache = $this->getInterimCache(); |
268 | // Merge with existing keys (if present) |
269 | $interimCacheValue = $cache->get( self::CACHE_KEY, $cache::READ_LATEST ); |
270 | if ( $interimCacheValue ) { |
271 | $normalizedNewKeys = array_merge( $interimCacheValue['newKeys'], $normalizedNewKeys ); |
272 | $this->logger->debug( |
273 | '[MessageIndex] interim cache: merging with existing cache of size {count}', |
274 | [ 'count' => count( $interimCacheValue['newKeys'] ) ] |
275 | ); |
276 | } |
277 | |
278 | $value = [ |
279 | 'timestamp' => microtime( true ), |
280 | 'newKeys' => $normalizedNewKeys, |
281 | ]; |
282 | |
283 | $cache->set( self::CACHE_KEY, $value, $cache::TTL_DAY ); |
284 | $this->logger->debug( |
285 | '[MessageIndex] interim cache: added group {groupId} with new size {count} keys and ' . |
286 | 'timestamp {cacheTimestamp}', |
287 | [ 'groupId' => $id, 'count' => count( $normalizedNewKeys ), 'cacheTimestamp' => $value['timestamp'] ] |
288 | ); |
289 | } |
290 | |
291 | /** |
292 | * Compares two associative arrays. |
293 | * |
294 | * Values must be a string or list of strings. Returns an array of added, |
295 | * deleted and modified keys as well as value changes (you can think values |
296 | * as categories and keys as pages). Each of the keys ('add', 'del', 'mod' |
297 | * respectively) maps to an array whose keys are the changed keys of the |
298 | * original arrays and values are lists where first element contains the |
299 | * old value and the second element the new value. |
300 | * |
301 | * @code |
302 | * $a = [ 'a' => '1', 'b' => '2', 'c' => '3' ]; |
303 | * $b = [ 'b' => '2', 'c' => [ '3', '2' ], 'd' => '4' ]; |
304 | * |
305 | * self::getArrayDiff( $a, $b ) === [ |
306 | * 'keys' => [ |
307 | * 'add' => [ 'd' => [ [], [ '4' ] ] ], |
308 | * 'del' => [ 'a' => [ [ '1' ], [] ] ], |
309 | * 'mod' => [ 'c' => [ [ '3' ], [ '3', '2' ] ] ], |
310 | * ], |
311 | * 'values' => [ 2, 4, 1 ] |
312 | * ]; |
313 | * @endcode |
314 | * |
315 | * @param array $old |
316 | * @param array $new |
317 | * @return array |
318 | */ |
319 | public function getArrayDiff( array $old, array $new ): array { |
320 | $values = []; |
321 | $record = static function ( $groups ) use ( &$values ) { |
322 | foreach ( $groups as $group ) { |
323 | $values[$group] = true; |
324 | } |
325 | }; |
326 | |
327 | $keys = [ |
328 | 'add' => [], |
329 | 'del' => [], |
330 | 'mod' => [], |
331 | ]; |
332 | |
333 | foreach ( $new as $key => $groups ) { |
334 | if ( !isset( $old[$key] ) ) { |
335 | $keys['add'][$key] = [ [], (array)$groups ]; |
336 | $record( (array)$groups ); |
337 | // Using != here on purpose to ignore the order of items |
338 | } elseif ( $groups != $old[$key] ) { |
339 | $keys['mod'][$key] = [ (array)$old[$key], (array)$groups ]; |
340 | $record( array_diff( (array)$old[$key], (array)$groups ) ); |
341 | $record( array_diff( (array)$groups, (array)$old[$key] ) ); |
342 | } |
343 | } |
344 | |
345 | foreach ( $old as $key => $groups ) { |
346 | if ( !isset( $new[$key] ) ) { |
347 | $keys['del'][$key] = [ (array)$groups, [] ]; |
348 | $record( (array)$groups ); |
349 | } |
350 | // We already checked for diffs above |
351 | } |
352 | |
353 | return [ |
354 | 'keys' => $keys, |
355 | 'values' => array_keys( $values ), |
356 | ]; |
357 | } |
358 | |
359 | /** Purge stuff when set of keys have changed. */ |
360 | protected function clearMessageGroupStats( array $diff ): void { |
361 | $job = RebuildMessageGroupStatsJob::newRefreshGroupsJob( $diff['values'] ); |
362 | $this->jobQueueGroup->push( $job ); |
363 | |
364 | foreach ( $diff['keys'] as $keys ) { |
365 | foreach ( $keys as $key => $data ) { |
366 | [ $ns, $pageName ] = explode( ':', $key, 2 ); |
367 | $title = Title::makeTitle( (int)$ns, $pageName ); |
368 | $handle = new MessageHandle( $title ); |
369 | [ $oldGroups, $newGroups ] = $data; |
370 | $this->hookRunner->onTranslateEventMessageMembershipChange( |
371 | $handle, $oldGroups, $newGroups ); |
372 | $this->messageGroupSubscription->handleMessageIndexUpdate( $handle, $oldGroups, $newGroups ); |
373 | } |
374 | } |
375 | } |
376 | |
377 | protected function checkAndAdd( array &$hugeArray, MessageGroup $g, bool $ignore = false ): void { |
378 | $keys = $g->getKeys(); |
379 | $id = $g->getId(); |
380 | $namespace = $g->getNamespace(); |
381 | |
382 | foreach ( $keys as $key ) { |
383 | # Force all keys to lower case, because the case doesn't matter and it is |
384 | # easier to do comparing when the case of first letter is unknown, because |
385 | # mediawiki forces it to upper case |
386 | $key = Utilities::normaliseKey( $namespace, $key ); |
387 | if ( isset( $hugeArray[$key] ) ) { |
388 | if ( !$ignore ) { |
389 | $to = implode( ', ', (array)$hugeArray[$key] ); |
390 | wfWarn( "Key $key already belongs to $to, conflict with $id" ); |
391 | } |
392 | |
393 | if ( is_array( $hugeArray[$key] ) ) { |
394 | // Hard work is already done, just add a new reference |
395 | $hugeArray[$key][] = & $id; |
396 | } else { |
397 | // Store the actual reference, then remove it from array, to not |
398 | // replace the references value, but to store an array of new |
399 | // references instead. References are hard! |
400 | $value = & $hugeArray[$key]; |
401 | unset( $hugeArray[$key] ); |
402 | $hugeArray[$key] = [ &$value, &$id ]; |
403 | } |
404 | } else { |
405 | $hugeArray[$key] = & $id; |
406 | } |
407 | } |
408 | unset( $id ); // Disconnect the previous references to this $id |
409 | } |
410 | |
411 | /** |
412 | * These are probably slower than serialize and unserialize, |
413 | * but they are more space efficient because we only need |
414 | * strings and arrays. |
415 | * @param mixed $data |
416 | * @return mixed |
417 | */ |
418 | protected function serialize( $data ) { |
419 | return is_array( $data ) ? implode( '|', $data ) : $data; |
420 | } |
421 | |
422 | protected function unserialize( $data ) { |
423 | $array = explode( '|', $data ); |
424 | return count( $array ) > 1 ? $array : $data; |
425 | } |
426 | } |