Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
46.67% |
42 / 90 |
|
50.00% |
8 / 16 |
CRAP | |
0.00% |
0 / 1 |
MessageGroupCache | |
46.67% |
42 / 90 |
|
50.00% |
8 / 16 |
257.06 | |
0.00% |
0 / 1 |
__construct | |
100.00% |
3 / 3 |
|
100.00% |
1 / 1 |
1 | |||
exists | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
getKeys | |
100.00% |
8 / 8 |
|
100.00% |
1 / 1 |
3 | |||
getTimestamp | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
getUpdateTimestamp | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
get | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
getAuthors | |
100.00% |
3 / 3 |
|
100.00% |
1 / 1 |
2 | |||
getExtra | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
2 | |||
create | |
85.00% |
17 / 20 |
|
0.00% |
0 / 1 |
5.08 | |||
isValid | |
0.00% |
0 / 37 |
|
0.00% |
0 / 1 |
182 | |||
invalidate | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
2 | |||
serialize | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
unserialize | |
75.00% |
3 / 4 |
|
0.00% |
0 / 1 |
2.06 | |||
open | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
close | |
33.33% |
1 / 3 |
|
0.00% |
0 / 1 |
3.19 | |||
getCacheFilePath | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 |
1 | <?php |
2 | declare( strict_types = 1 ); |
3 | |
4 | namespace MediaWiki\Extension\Translate\MessageGroupProcessing; |
5 | |
6 | use Cdb\Reader; |
7 | use Cdb\Writer; |
8 | use FileBasedMessageGroup; |
9 | use RuntimeException; |
10 | |
11 | /** |
12 | * Caches messages of file based message group source file. Can also track |
13 | * that the cache is up to date. Parsing the source files can be slow, so |
14 | * constructing CDB cache makes accessing that data constant speed regardless |
15 | * of the actual format. This also avoid having to deal with potentially unsafe |
16 | * external files during web requests. |
17 | * |
18 | * @author Niklas Laxström |
19 | * @license GPL-2.0-or-later |
20 | * |
21 | * @ingroup MessageGroups |
22 | */ |
23 | class MessageGroupCache { |
24 | public const NO_SOURCE = 1; |
25 | public const NO_CACHE = 2; |
26 | public const CHANGED = 3; |
27 | private const VERSION = '4'; |
28 | private FileBasedMessageGroup $group; |
29 | private ?Reader $cache = null; |
30 | private string $languageCode; |
31 | private string $cacheFilePath; |
32 | |
33 | /** Contructs a new cache object for given group and language code. */ |
34 | public function __construct( |
35 | FileBasedMessageGroup $group, |
36 | string $languageCode, |
37 | string $cacheFilePath |
38 | ) { |
39 | $this->group = $group; |
40 | $this->languageCode = $languageCode; |
41 | $this->cacheFilePath = $cacheFilePath; |
42 | } |
43 | |
44 | /** Returns whether cache exists for this language and group. */ |
45 | public function exists(): bool { |
46 | return file_exists( $this->getCacheFilePath() ); |
47 | } |
48 | |
49 | /** |
50 | * Returns list of message keys that are stored. |
51 | * @return string[] Message keys that can be passed one-by-one to get() method. |
52 | */ |
53 | public function getKeys(): array { |
54 | $reader = $this->open(); |
55 | $keys = []; |
56 | |
57 | $key = $reader->firstkey(); |
58 | while ( $key !== false ) { |
59 | if ( ( $key[0] ?? '' ) !== '#' ) { |
60 | $keys[] = $key; |
61 | } |
62 | |
63 | $key = $reader->nextkey(); |
64 | } |
65 | |
66 | return $keys; |
67 | } |
68 | |
69 | /** |
70 | * Returns timestamp in unix-format about when this cache was first created. |
71 | * @return string|false Unix timestamp. |
72 | */ |
73 | public function getTimestamp() { |
74 | return $this->open()->get( '#created' ); |
75 | } |
76 | |
77 | /** @return string|false Unix timestamp. */ |
78 | public function getUpdateTimestamp() { |
79 | return $this->open()->get( '#updated' ); |
80 | } |
81 | |
82 | /** |
83 | * Get an item from the cache. |
84 | * @return string|false |
85 | */ |
86 | public function get( string $key ) { |
87 | return $this->open()->get( $key ); |
88 | } |
89 | |
90 | /** |
91 | * Get a list of authors. |
92 | * @return string[] |
93 | */ |
94 | public function getAuthors(): array { |
95 | $cache = $this->open(); |
96 | return $cache->exists( '#authors' ) ? |
97 | $this->unserialize( $cache->get( '#authors' ) ) : []; |
98 | } |
99 | |
100 | /** Get other data cached from the file format class. */ |
101 | public function getExtra(): array { |
102 | $cache = $this->open(); |
103 | return $cache->exists( '#extra' ) ? $this->unserialize( $cache->get( '#extra' ) ) : []; |
104 | } |
105 | |
106 | /** |
107 | * Populates the cache from current state of the source file. |
108 | * @param string|false $created Unix timestamp when the cache is created (for automatic updates). |
109 | */ |
110 | public function create( $created = false ): void { |
111 | $this->close(); // Close the reader instance just to be sure |
112 | |
113 | $parseOutput = $this->group->parseExternal( $this->languageCode ); |
114 | $messages = $parseOutput['MESSAGES']; |
115 | if ( $messages === [] ) { |
116 | if ( $this->exists() ) { |
117 | // Delete stale cache files |
118 | unlink( $this->getCacheFilePath() ); |
119 | } |
120 | |
121 | return; // Don't create empty caches |
122 | } |
123 | $hash = md5( file_get_contents( $this->group->getSourceFilePath( $this->languageCode ) ) ); |
124 | |
125 | wfMkdirParents( dirname( $this->getCacheFilePath() ) ); |
126 | $cache = Writer::open( $this->getCacheFilePath() ); |
127 | |
128 | foreach ( $messages as $key => $value ) { |
129 | $cache->set( $key, $value ); |
130 | } |
131 | $cache->set( '#authors', $this->serialize( $parseOutput['AUTHORS'] ) ); |
132 | $cache->set( '#extra', $this->serialize( $parseOutput['EXTRA'] ) ); |
133 | $cache->set( '#created', $created ?: wfTimestamp() ); |
134 | $cache->set( '#updated', wfTimestamp() ); |
135 | $cache->set( '#filehash', $hash ); |
136 | $cache->set( '#msghash', md5( serialize( $parseOutput ) ) ); |
137 | $cache->set( '#version', self::VERSION ); |
138 | $cache->close(); |
139 | } |
140 | |
141 | /** |
142 | * Checks whether the cache still reflects the source file. |
143 | * It uses multiple conditions to speed up the checking from file |
144 | * modification timestamps to hashing. |
145 | * |
146 | * @param int &$reason (output) The reason for the cache being invalid. |
147 | * This parameter is an output-only parameter and doesn't need to be initialized |
148 | * by callers. It will be populated with the reason when the function returns. |
149 | * @return bool Whether the cache is up to date. |
150 | */ |
151 | public function isValid( &$reason ): bool { |
152 | $group = $this->group; |
153 | $pattern = $group->getSourceFilePath( '*' ); |
154 | $filename = $group->getSourceFilePath( $this->languageCode ); |
155 | |
156 | $parseOutput = null; |
157 | |
158 | // If the file pattern is not dependent on the language, we will assume |
159 | // that all translations are stored in one file. This means we need to |
160 | // actually parse the file to know if a language is present. |
161 | if ( !str_contains( $pattern, '*' ) ) { |
162 | $parseOutput = $group->parseExternal( $this->languageCode ); |
163 | $source = $parseOutput['MESSAGES'] !== []; |
164 | } else { |
165 | static $globCache = []; |
166 | if ( !isset( $globCache[$pattern] ) ) { |
167 | $globCache[$pattern] = array_flip( glob( $pattern, GLOB_NOESCAPE ) ); |
168 | // Definition file might not match the above pattern |
169 | $globCache[$pattern][$group->getSourceFilePath( 'en' )] = true; |
170 | } |
171 | $source = isset( $globCache[$pattern][$filename] ); |
172 | } |
173 | |
174 | $cache = $this->exists(); |
175 | |
176 | // Timestamp and existence checks |
177 | if ( !$cache && !$source ) { |
178 | return true; |
179 | } elseif ( !$cache && $source ) { |
180 | $reason = self::NO_CACHE; |
181 | |
182 | return false; |
183 | } elseif ( $cache && !$source ) { |
184 | $reason = self::NO_SOURCE; |
185 | |
186 | return false; |
187 | } |
188 | |
189 | if ( $this->get( '#version' ) !== self::VERSION ) { |
190 | $reason = self::CHANGED; |
191 | return false; |
192 | } |
193 | |
194 | if ( filemtime( $filename ) <= $this->get( '#updated' ) ) { |
195 | return true; |
196 | } |
197 | |
198 | // From now on cache and source file exists, but source file mtime is newer |
199 | $created = $this->get( '#created' ); |
200 | |
201 | // File hash check |
202 | $newhash = md5( file_get_contents( $filename ) ); |
203 | if ( $this->get( '#filehash' ) === $newhash ) { |
204 | // Update cache so that we don't need to compare hashes next time |
205 | $this->create( $created ); |
206 | |
207 | return true; |
208 | } |
209 | |
210 | // Parse output hash check |
211 | $parseOutput ??= $group->parseExternal( $this->languageCode ); |
212 | if ( $this->get( '#msghash' ) === md5( serialize( $parseOutput ) ) ) { |
213 | // Update cache so that we don't need to do slow checks next time |
214 | $this->create( $created ); |
215 | |
216 | return true; |
217 | } |
218 | |
219 | $reason = self::CHANGED; |
220 | |
221 | return false; |
222 | } |
223 | |
224 | public function invalidate(): void { |
225 | $this->close(); |
226 | unlink( $this->getCacheFilePath() ); |
227 | } |
228 | |
229 | private function serialize( array $data ): string { |
230 | // Using simple prefix for easy future extension |
231 | return 'J' . json_encode( $data ); |
232 | } |
233 | |
234 | private function unserialize( string $serialized ): array { |
235 | $type = $serialized[0]; |
236 | |
237 | if ( $type !== 'J' ) { |
238 | throw new RuntimeException( 'Unknown serialization format' ); |
239 | } |
240 | |
241 | return json_decode( substr( $serialized, 1 ), true ); |
242 | } |
243 | |
244 | /** Open the cache for reading. */ |
245 | protected function open(): Reader { |
246 | $this->cache ??= Reader::open( $this->getCacheFilePath() ); |
247 | |
248 | return $this->cache; |
249 | } |
250 | |
251 | /** Close the cache from reading. */ |
252 | protected function close(): void { |
253 | if ( $this->cache !== null ) { |
254 | $this->cache->close(); |
255 | $this->cache = null; |
256 | } |
257 | } |
258 | |
259 | /** Returns full path to the cache file. */ |
260 | protected function getCacheFilePath(): string { |
261 | return $this->cacheFilePath; |
262 | } |
263 | } |