Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
69.51% |
228 / 328 |
|
35.71% |
15 / 42 |
CRAP | |
0.00% |
0 / 1 |
MessageCollection | |
69.51% |
228 / 328 |
|
35.71% |
15 / 42 |
600.58 | |
0.00% |
0 / 1 |
__construct | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
newFromDefinitions | |
100.00% |
4 / 4 |
|
100.00% |
1 / 1 |
1 | |||
getLanguage | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
setInFile | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
setTags | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
keys | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
getTitles | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
getMessageKeys | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
getTags | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
getAuthors | |
0.00% |
0 / 16 |
|
0.00% |
0 / 1 |
42 | |||
addCollectionAuthors | |
0.00% |
0 / 7 |
|
0.00% |
0 / 1 |
20 | |||
loadTranslations | |
100.00% |
6 / 6 |
|
100.00% |
1 / 1 |
1 | |||
resetForNewLanguage | |
100.00% |
10 / 10 |
|
100.00% |
1 / 1 |
1 | |||
slice | |
0.00% |
0 / 14 |
|
0.00% |
0 / 1 |
42 | |||
filter | |
64.00% |
16 / 25 |
|
0.00% |
0 / 1 |
16.65 | |||
filterUntranslatedOptional | |
100.00% |
5 / 5 |
|
100.00% |
1 / 1 |
1 | |||
filterOnCondition | |
85.71% |
6 / 7 |
|
0.00% |
0 / 1 |
5.07 | |||
filterFuzzy | |
70.00% |
7 / 10 |
|
0.00% |
0 / 1 |
5.68 | |||
filterHastranslation | |
90.91% |
10 / 11 |
|
0.00% |
0 / 1 |
5.02 | |||
filterChanged | |
92.00% |
23 / 25 |
|
0.00% |
0 / 1 |
9.04 | |||
filterReviewer | |
75.00% |
6 / 8 |
|
0.00% |
0 / 1 |
5.39 | |||
filterLastTranslator | |
0.00% |
0 / 9 |
|
0.00% |
0 / 1 |
20 | |||
fixKeys | |
100.00% |
8 / 8 |
|
100.00% |
1 / 1 |
2 | |||
loadInfo | |
91.30% |
21 / 23 |
|
0.00% |
0 / 1 |
4.01 | |||
loadReviewInfo | |
84.21% |
16 / 19 |
|
0.00% |
0 / 1 |
4.06 | |||
loadData | |
90.00% |
18 / 20 |
|
0.00% |
0 / 1 |
4.02 | |||
getTitleConds | |
100.00% |
18 / 18 |
|
100.00% |
1 / 1 |
4 | |||
rowToKey | |
60.00% |
3 / 5 |
|
0.00% |
0 / 1 |
2.26 | |||
getReverseMap | |
100.00% |
7 / 7 |
|
100.00% |
1 / 1 |
3 | |||
initMessages | |
73.47% |
36 / 49 |
|
0.00% |
0 / 1 |
29.24 | |||
offsetExists | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
offsetGet | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
offsetSet | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
offsetUnset | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
__get | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
__set | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
rewind | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
current | |
0.00% |
0 / 3 |
|
0.00% |
0 / 1 |
6 | |||
key | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
next | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
valid | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
count | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 |
1 | <?php |
2 | declare( strict_types = 1 ); |
3 | |
4 | namespace MediaWiki\Extension\Translate\MessageLoading; |
5 | |
6 | use AppendIterator; |
7 | use ArrayAccess; |
8 | use Countable; |
9 | use EmptyIterator; |
10 | use InvalidArgumentException; |
11 | use Iterator; |
12 | use LogicException; |
13 | use MediaWiki\Content\TextContent; |
14 | use MediaWiki\Extension\Translate\MessageGroupProcessing\RevTagStore; |
15 | use MediaWiki\Extension\Translate\SystemUsers\FuzzyBot; |
16 | use MediaWiki\Extension\Translate\Utilities\Utilities; |
17 | use MediaWiki\MediaWikiServices; |
18 | use MediaWiki\Revision\RevisionRecord; |
19 | use MediaWiki\Revision\SlotRecord; |
20 | use MediaWiki\Title\TitleValue; |
21 | use RuntimeException; |
22 | use stdClass; |
23 | use Traversable; |
24 | use Wikimedia\Rdbms\IDatabase; |
25 | use Wikimedia\Rdbms\IDBAccessObject; |
26 | |
27 | /** |
28 | * This file contains the class for core message collections implementation. |
29 | * |
30 | * Message collection is collection of messages of one message group in one |
31 | * language. It handles loading of the messages in one huge batch, and also |
32 | * stores information that can be used to filter the collection in different |
33 | * ways. |
34 | * |
35 | * @author Niklas Laxström |
36 | * @copyright Copyright © 2007-2011, Niklas Laxström |
37 | * @license GPL-2.0-or-later |
38 | */ |
39 | class MessageCollection implements ArrayAccess, Iterator, Countable { |
40 | /** |
41 | * The queries can get very large because each message title is specified |
42 | * individually. Very large queries can confuse the database query planner. |
43 | * Queries are split into multiple separate queries having at most this many |
44 | * items. |
45 | */ |
46 | private const MAX_ITEMS_PER_QUERY = 2000; |
47 | public const FILTER_FUZZY = 'fuzzy'; |
48 | public const FILTER_OPTIONAL = 'optional'; |
49 | public const FILTER_IGNORED = 'ignored'; |
50 | public const FILTER_HAS_TRANSLATION = 'hastranslation'; |
51 | public const FILTER_CHANGED = 'changed'; |
52 | public const FILTER_TRANSLATED = 'translated'; |
53 | public const FILTER_REVIEWER = 'reviewer'; |
54 | public const FILTER_LAST_TRANSLATOR = 'last-translator'; |
55 | private const AVAILABLE_FILTERS = [ |
56 | self::FILTER_FUZZY, |
57 | self::FILTER_OPTIONAL, |
58 | self::FILTER_IGNORED, |
59 | self::FILTER_HAS_TRANSLATION, |
60 | self::FILTER_CHANGED, |
61 | self::FILTER_TRANSLATED, |
62 | self::FILTER_REVIEWER, |
63 | self::FILTER_LAST_TRANSLATOR, |
64 | ]; |
65 | public const INCLUDE_MATCHING = false; |
66 | public const EXCLUDE_MATCHING = true; |
67 | |
68 | /** Language code. */ |
69 | public string $code; |
70 | private MessageDefinitions $definitions; |
71 | /** array( %Message key => translation, ... ) */ |
72 | private array $infile = []; |
73 | // Keys and messages. |
74 | |
75 | /** @var array<string, TitleValue> Key is message display key */ |
76 | protected array $keys = []; |
77 | /** array( %Message String => Message, ... ) */ |
78 | protected ?array $messages = []; |
79 | private ?array $reverseMap = null; |
80 | // Database resources |
81 | |
82 | /** Stored message existence and fuzzy state. */ |
83 | private Traversable $dbInfo; |
84 | /** Stored translations in database. */ |
85 | private Traversable $dbData; |
86 | /** Stored reviews in database. */ |
87 | private Traversable $dbReviewData; |
88 | /** |
89 | * Tags, copied to thin messages |
90 | * tagtype => keys |
91 | * @var array[] |
92 | */ |
93 | protected array $tags = []; |
94 | /** @var string[] Authors. */ |
95 | private array $authors = []; |
96 | |
97 | /** |
98 | * Constructors. Use newFromDefinitions() instead. |
99 | * @param string $code Language code. |
100 | */ |
101 | public function __construct( string $code ) { |
102 | $this->code = $code; |
103 | } |
104 | |
105 | /** |
106 | * Construct a new message collection from definitions. |
107 | * @param MessageDefinitions $definitions |
108 | * @param string $code Language code. |
109 | */ |
110 | public static function newFromDefinitions( MessageDefinitions $definitions, string $code ): self { |
111 | $collection = new self( $code ); |
112 | $collection->definitions = $definitions; |
113 | $collection->resetForNewLanguage( $code ); |
114 | |
115 | return $collection; |
116 | } |
117 | |
118 | public function getLanguage(): string { |
119 | return $this->code; |
120 | } |
121 | |
122 | // Data setters |
123 | |
124 | /** |
125 | * Set translation from file, as opposed to translation which only exists |
126 | * in the wiki because they are not exported and committed yet. |
127 | * @param string[] $messages Array of translations indexed by display key. |
128 | */ |
129 | public function setInFile( array $messages ): void { |
130 | $this->infile = $messages; |
131 | } |
132 | |
133 | /** |
134 | * Set message tags. |
135 | * @param string $type Tag type, usually ignored or optional. |
136 | * @param string[] $keys List of display keys. |
137 | */ |
138 | public function setTags( string $type, array $keys ): void { |
139 | $this->tags[$type] = $keys; |
140 | } |
141 | |
142 | /** |
143 | * Returns list of available message keys. This is affected by filtering. |
144 | * @return array<string, TitleValue> List of database keys indexed by display keys. |
145 | */ |
146 | public function keys(): array { |
147 | return $this->keys; |
148 | } |
149 | |
150 | /** |
151 | * Returns list of TitleValues of messages that are used in this collection after filtering. |
152 | * @return TitleValue[] |
153 | */ |
154 | private function getTitles(): array { |
155 | return array_values( $this->keys ); |
156 | } |
157 | |
158 | /** |
159 | * Returns list of message keys that are used in this collection after filtering. |
160 | * @return string[] |
161 | */ |
162 | public function getMessageKeys(): array { |
163 | return array_keys( $this->keys ); |
164 | } |
165 | |
166 | /** |
167 | * Returns stored message tags. |
168 | * @param string $type Tag type, usually optional or ignored. |
169 | * @return string[] List of keys with given tag. |
170 | */ |
171 | public function getTags( string $type ): array { |
172 | return $this->tags[$type] ?? []; |
173 | } |
174 | |
175 | /** |
176 | * Lists all translators that have contributed to the latest revisions of |
177 | * each translation. Causes translations to be loaded from the database. |
178 | * Is not affected by filters. |
179 | * @return string[] List of usernames. |
180 | */ |
181 | public function getAuthors(): array { |
182 | $this->loadTranslations(); |
183 | |
184 | $authors = array_flip( $this->authors ); |
185 | |
186 | foreach ( $this->messages as $m ) { |
187 | // Check if there are authors |
188 | /** @var Message $m */ |
189 | $author = $m->getProperty( 'last-translator-text' ); |
190 | |
191 | if ( $author === null ) { |
192 | continue; |
193 | } |
194 | |
195 | if ( !isset( $authors[$author] ) ) { |
196 | $authors[$author] = 1; |
197 | } else { |
198 | $authors[$author]++; |
199 | } |
200 | } |
201 | |
202 | # arsort( $authors, SORT_NUMERIC ); |
203 | ksort( $authors ); |
204 | $fuzzyBot = FuzzyBot::getName(); |
205 | $filteredAuthors = []; |
206 | foreach ( $authors as $author => $edits ) { |
207 | if ( $author !== $fuzzyBot ) { |
208 | $filteredAuthors[] = (string)$author; |
209 | } |
210 | } |
211 | |
212 | return $filteredAuthors; |
213 | } |
214 | |
215 | /** |
216 | * Add external authors (usually from the file). |
217 | * @param string[] $authors List of authors. |
218 | * @param string $mode Either append or set authors. |
219 | */ |
220 | public function addCollectionAuthors( array $authors, string $mode = 'append' ): void { |
221 | switch ( $mode ) { |
222 | case 'append': |
223 | $authors = array_merge( $this->authors, $authors ); |
224 | break; |
225 | case 'set': |
226 | break; |
227 | default: |
228 | throw new InvalidArgumentException( "Invalid mode $mode" ); |
229 | } |
230 | |
231 | $this->authors = array_unique( $authors ); |
232 | } |
233 | |
234 | // Data modifiers |
235 | |
236 | /** |
237 | * Loads all message data. Must be called before accessing the messages |
238 | * with ArrayAccess or iteration. |
239 | */ |
240 | public function loadTranslations(): void { |
241 | // Performance optimization: Instead of building conditions based on key in every |
242 | // method, build them once and pass it on to each of them. |
243 | $dbr = Utilities::getSafeReadDB(); |
244 | $titleConds = $this->getTitleConds( $dbr ); |
245 | |
246 | $this->loadData( $this->keys, $titleConds ); |
247 | $this->loadInfo( $this->keys, $titleConds ); |
248 | $this->loadReviewInfo( $this->keys, $titleConds ); |
249 | $this->initMessages(); |
250 | } |
251 | |
252 | /** |
253 | * Some statistics scripts for example loop the same collection over every |
254 | * language. This is a shortcut which keeps tags and definitions. |
255 | */ |
256 | public function resetForNewLanguage( string $code ): void { |
257 | $this->code = $code; |
258 | $this->keys = $this->fixKeys(); |
259 | $this->dbInfo = new EmptyIterator(); |
260 | $this->dbData = new EmptyIterator(); |
261 | $this->dbReviewData = new EmptyIterator(); |
262 | $this->messages = null; |
263 | $this->infile = []; |
264 | $this->authors = []; |
265 | |
266 | unset( $this->tags['fuzzy'] ); |
267 | $this->reverseMap = null; |
268 | } |
269 | |
270 | /** |
271 | * For paging messages. One can count messages before and after slice. |
272 | * @param string $offset |
273 | * @param int $limit |
274 | * @return array Offsets that can be used for paging backwards and forwards |
275 | * @since String offests and return value since 2013-01-10 |
276 | */ |
277 | public function slice( $offset, $limit ) { |
278 | $indexes = array_keys( $this->keys ); |
279 | |
280 | if ( $offset === '' ) { |
281 | $offset = 0; |
282 | } |
283 | |
284 | // Handle string offsets |
285 | if ( !ctype_digit( (string)$offset ) ) { |
286 | $pos = array_search( $offset, array_keys( $this->keys ), true ); |
287 | // Now offset is always an integer, suitable for array_slice |
288 | $offset = $pos !== false ? $pos : count( $this->keys ); |
289 | } else { |
290 | $offset = (int)$offset; |
291 | } |
292 | |
293 | // False means that cannot go back or forward |
294 | $backwardsOffset = $forwardsOffset = false; |
295 | // Backwards paging uses numerical indexes, see below |
296 | |
297 | // Can only skip this if no offset has been provided or the |
298 | // offset is zero. (offset - limit ) > 1 does not work, because |
299 | // users can end in offest=2, limit=5 and can't see the first |
300 | // two messages. That's also why it is capped into zero with |
301 | // max(). And finally make the offsets to be strings even if |
302 | // they are numbers in this case. |
303 | if ( $offset > 0 ) { |
304 | $backwardsOffset = (string)( max( 0, $offset - $limit ) ); |
305 | } |
306 | |
307 | // Forwards paging uses keys. If user opens view Untranslated, |
308 | // translates some messages and then clicks next, the first |
309 | // message visible in the page is the first message not shown |
310 | // in the previous page (unless someone else translated it at |
311 | // the same time). If we used integer offsets, we would skip |
312 | // same number of messages that were translated, because they |
313 | // are no longer in the list. For backwards paging this is not |
314 | // such a big issue, so it still uses integer offsets, because |
315 | // we would need to also implement "direction" to have it work |
316 | // correctly. |
317 | if ( isset( $indexes[$offset + $limit] ) ) { |
318 | $forwardsOffset = $indexes[$offset + $limit]; |
319 | } |
320 | |
321 | $this->keys = array_slice( $this->keys, $offset, $limit, true ); |
322 | |
323 | return [ $backwardsOffset, $forwardsOffset, $offset ]; |
324 | } |
325 | |
326 | /** |
327 | * Filters messages based on some condition. Some filters cause data to be |
328 | * loaded from the database: |
329 | * - PAGEINFO: existence and fuzzy tags. |
330 | * - TRANSLATIONS: translations for every message. It is recommended to first |
331 | * filter with messages that do not need those. It is recommended to add |
332 | * translations from file with addInfile, and it is needed for changed |
333 | * filter to work. |
334 | * |
335 | * @param string $filter |
336 | * - FILTER_FUZZY: messages with fuzzy tag (PAGEINFO) |
337 | * - FILTER_OPTIONAL: messages marked for optional. |
338 | * - FILTER_IGNORED: messages which are not for translation. |
339 | * - FILTER_HAS_TRANSLATION: messages which have translation (be if fuzzy or not) |
340 | * (PAGEINFO, *INFILE). |
341 | * - FILTER_TRANSLATED: messages which have translation which is not fuzzy |
342 | * (PAGEINFO, *INFILE). |
343 | * - FILTER_CHANGED: translation in database differs from infile. |
344 | * (INFILE, TRANSLATIONS) |
345 | * - FILTER_REVIEWER: messages which are reviewed by a particular user |
346 | * - FILTER_LAST_TRANSLATOR: messages which are last translated by a particular user |
347 | * @param bool $condition Whether to return messages which do not satisfy |
348 | * the given filter condition (true), or only which do (false). |
349 | * @param int|null $value Value for properties filtering. |
350 | * @throws InvalidFilterException If given invalid filter name. |
351 | */ |
352 | public function filter( string $filter, bool $condition, ?int $value = null ): void { |
353 | if ( !in_array( $filter, self::AVAILABLE_FILTERS, true ) ) { |
354 | throw new InvalidFilterException( $filter ); |
355 | } |
356 | |
357 | $keys = $this->keys; |
358 | if ( $filter === self::FILTER_FUZZY ) { |
359 | $keys = $this->filterFuzzy( $keys, $condition ); |
360 | } elseif ( $filter === self::FILTER_HAS_TRANSLATION ) { |
361 | $keys = $this->filterHastranslation( $keys, $condition ); |
362 | } elseif ( $filter === self::FILTER_TRANSLATED ) { |
363 | $fuzzy = $this->filterFuzzy( $keys, self::INCLUDE_MATCHING ); |
364 | $hastranslation = $this->filterHastranslation( $keys, self::INCLUDE_MATCHING ); |
365 | // Fuzzy messages are not counted as translated messages |
366 | $translated = $this->filterOnCondition( $hastranslation, $fuzzy ); |
367 | $keys = $this->filterOnCondition( $keys, $translated, $condition ); |
368 | } elseif ( $filter === self::FILTER_CHANGED ) { |
369 | $keys = $this->filterChanged( $keys, $condition ); |
370 | } elseif ( $filter === self::FILTER_REVIEWER ) { |
371 | $keys = $this->filterReviewer( $keys, $condition, $value ); |
372 | } elseif ( $filter === self::FILTER_LAST_TRANSLATOR ) { |
373 | $keys = $this->filterLastTranslator( $keys, $condition, $value ); |
374 | } else { |
375 | if ( !isset( $this->tags[$filter] ) ) { |
376 | if ( $filter !== self::FILTER_OPTIONAL && $filter !== self::FILTER_IGNORED ) { |
377 | throw new RuntimeException( "No tagged messages for custom filter $filter" ); |
378 | } |
379 | $keys = $this->filterOnCondition( $keys, [], $condition ); |
380 | } else { |
381 | $taggedKeys = array_flip( $this->tags[$filter] ); |
382 | $keys = $this->filterOnCondition( $keys, $taggedKeys, $condition ); |
383 | } |
384 | } |
385 | |
386 | $this->keys = $keys; |
387 | } |
388 | |
389 | /** @internal For MessageGroupStats */ |
390 | public function filterUntranslatedOptional(): void { |
391 | $optionalKeys = array_flip( $this->tags['optional'] ?? [] ); |
392 | // Convert plain message keys to array<string,TitleValue> |
393 | $optional = $this->filterOnCondition( $this->keys, $optionalKeys, self::INCLUDE_MATCHING ); |
394 | // Then get reduce that list to those which have no translation. Ensure we don't |
395 | // accidentally populate the info cache with too few keys. |
396 | $this->loadInfo( $this->keys ); |
397 | $untranslatedOptional = $this->filterHastranslation( $optional, self::EXCLUDE_MATCHING ); |
398 | // Now remove that list from the full list |
399 | $this->keys = $this->filterOnCondition( $this->keys, $untranslatedOptional ); |
400 | } |
401 | |
402 | /** |
403 | * Filters list of keys with other list of keys according to the condition. |
404 | * In other words, you have a list of keys, and you have determined list of |
405 | * keys that have some feature. Now you can either take messages that are |
406 | * both in the first list and the second list OR are in the first list but |
407 | * are not in the second list (conditition = false and true respectively). |
408 | * What makes this more complex is that second list of keys might not be a |
409 | * subset of the first list of keys. |
410 | * @param string[] $keys List of keys to filter. |
411 | * @param string[] $condKeys Second list of keys for filtering. |
412 | * @param bool $condition True (default) to return keys which are on first |
413 | * but not on the second list, false to return keys which are on both. |
414 | * second. |
415 | * @return string[] Filtered keys. |
416 | */ |
417 | private function filterOnCondition( array $keys, array $condKeys, bool $condition = true ): array { |
418 | if ( $condition === self::EXCLUDE_MATCHING ) { |
419 | // Delete $condKeys from $keys |
420 | foreach ( array_keys( $condKeys ) as $key ) { |
421 | unset( $keys[$key] ); |
422 | } |
423 | } else { |
424 | // Keep the keys which are in $condKeys |
425 | foreach ( array_keys( $keys ) as $key ) { |
426 | if ( !isset( $condKeys[$key] ) ) { |
427 | unset( $keys[$key] ); |
428 | } |
429 | } |
430 | } |
431 | |
432 | return $keys; |
433 | } |
434 | |
435 | /** |
436 | * Filters list of keys according to whether the translation is fuzzy. |
437 | * @param string[] $keys List of keys to filter. |
438 | * @param bool $condition True to filter away fuzzy translations, false |
439 | * to filter non-fuzzy translations. |
440 | * @return string[] Filtered keys. |
441 | */ |
442 | private function filterFuzzy( array $keys, bool $condition ): array { |
443 | $this->loadInfo( $keys ); |
444 | |
445 | $origKeys = []; |
446 | if ( $condition === self::INCLUDE_MATCHING ) { |
447 | $origKeys = $keys; |
448 | } |
449 | |
450 | foreach ( $this->dbInfo as $row ) { |
451 | if ( $row->rt_type !== null ) { |
452 | unset( $keys[$this->rowToKey( $row )] ); |
453 | } |
454 | } |
455 | |
456 | if ( $condition === self::INCLUDE_MATCHING ) { |
457 | $keys = array_diff( $origKeys, $keys ); |
458 | } |
459 | |
460 | return $keys; |
461 | } |
462 | |
463 | /** |
464 | * Filters list of keys according to whether they have a translation. |
465 | * @param string[] $keys List of keys to filter. |
466 | * @param bool $condition True to filter away translated, false |
467 | * to filter untranslated. |
468 | * @return string[] Filtered keys. |
469 | */ |
470 | private function filterHastranslation( array $keys, bool $condition ): array { |
471 | $this->loadInfo( $keys ); |
472 | |
473 | $origKeys = []; |
474 | if ( $condition === self::INCLUDE_MATCHING ) { |
475 | $origKeys = $keys; |
476 | } |
477 | |
478 | foreach ( $this->dbInfo as $row ) { |
479 | unset( $keys[$this->rowToKey( $row )] ); |
480 | } |
481 | |
482 | // Check also if there is something in the file that is not yet in the database |
483 | foreach ( array_keys( $this->infile ) as $inf ) { |
484 | unset( $keys[$inf] ); |
485 | } |
486 | |
487 | // Remove the messages which do not have a translation from the list |
488 | if ( $condition === self::INCLUDE_MATCHING ) { |
489 | $keys = array_diff( $origKeys, $keys ); |
490 | } |
491 | |
492 | return $keys; |
493 | } |
494 | |
495 | /** |
496 | * Filters list of keys according to whether the current translation |
497 | * differs from the commited translation. |
498 | * @param string[] $keys List of keys to filter. |
499 | * @param bool $condition True to filter changed translations, false |
500 | * to filter unchanged translations. |
501 | * @return string[] Filtered keys. |
502 | */ |
503 | private function filterChanged( array $keys, bool $condition ): array { |
504 | $this->loadData( $keys ); |
505 | |
506 | $origKeys = []; |
507 | if ( $condition === self::INCLUDE_MATCHING ) { |
508 | $origKeys = $keys; |
509 | } |
510 | |
511 | $revStore = MediaWikiServices::getInstance()->getRevisionStore(); |
512 | $infileRows = []; |
513 | foreach ( $this->dbData as $row ) { |
514 | $mkey = $this->rowToKey( $row ); |
515 | if ( isset( $this->infile[$mkey] ) ) { |
516 | $infileRows[] = $row; |
517 | } |
518 | } |
519 | |
520 | $revisions = $revStore->newRevisionsFromBatch( $infileRows, [ |
521 | 'slots' => [ SlotRecord::MAIN ], |
522 | 'content' => true |
523 | ] )->getValue(); |
524 | foreach ( $infileRows as $row ) { |
525 | /** @var RevisionRecord|null $rev */ |
526 | $rev = $revisions[$row->rev_id]; |
527 | if ( $rev ) { |
528 | /** @var TextContent $content */ |
529 | $content = $rev->getContent( SlotRecord::MAIN ); |
530 | if ( $content ) { |
531 | $mkey = $this->rowToKey( $row ); |
532 | if ( $this->infile[$mkey] === $content->getText() ) { |
533 | // Remove unchanged messages from the list |
534 | unset( $keys[$mkey] ); |
535 | } |
536 | } |
537 | } |
538 | } |
539 | |
540 | // Remove the messages which have changed from the original list |
541 | if ( $condition === self::INCLUDE_MATCHING ) { |
542 | $keys = $this->filterOnCondition( $origKeys, $keys ); |
543 | } |
544 | |
545 | return $keys; |
546 | } |
547 | |
548 | /** |
549 | * Filters list of keys according to whether the user has accepted them. |
550 | * @param string[] $keys List of keys to filter. |
551 | * @param bool $condition True to remove translatations $user has accepted, |
552 | * false to get only translations accepted by $user. |
553 | * @param ?int $userId |
554 | * @return string[] Filtered keys. |
555 | */ |
556 | private function filterReviewer( array $keys, bool $condition, ?int $userId ): array { |
557 | $this->loadReviewInfo( $keys ); |
558 | $origKeys = $keys; |
559 | |
560 | /* This removes messages from the list which have certain |
561 | * reviewer (among others) */ |
562 | foreach ( $this->dbReviewData as $row ) { |
563 | if ( $userId === null || (int)$row->trr_user === $userId ) { |
564 | unset( $keys[$this->rowToKey( $row )] ); |
565 | } |
566 | } |
567 | |
568 | if ( $condition === self::INCLUDE_MATCHING ) { |
569 | $keys = array_diff( $origKeys, $keys ); |
570 | } |
571 | |
572 | return $keys; |
573 | } |
574 | |
575 | /** |
576 | * @param string[] $keys List of keys to filter. |
577 | * @param bool $condition True to remove translatations where last translator is $user |
578 | * false to get only last translations done by others. |
579 | * @return string[] Filtered keys. |
580 | */ |
581 | private function filterLastTranslator( array $keys, bool $condition, ?int $userId ): array { |
582 | $this->loadData( $keys ); |
583 | $origKeys = $keys; |
584 | |
585 | $userId ??= 0; |
586 | foreach ( $this->dbData as $row ) { |
587 | if ( (int)$row->rev_user === $userId ) { |
588 | unset( $keys[$this->rowToKey( $row )] ); |
589 | } |
590 | } |
591 | |
592 | if ( $condition === self::INCLUDE_MATCHING ) { |
593 | $keys = array_diff( $origKeys, $keys ); |
594 | } |
595 | |
596 | return $keys; |
597 | } |
598 | |
599 | /** |
600 | * Takes list of keys and converts them into database format. |
601 | * @return array ( string => string ) Array of keys in database format indexed by display format. |
602 | */ |
603 | private function fixKeys(): array { |
604 | $newkeys = []; |
605 | |
606 | $pages = $this->definitions->getPages(); |
607 | foreach ( $pages as $key => $baseTitle ) { |
608 | $newkeys[$key] = new TitleValue( |
609 | $baseTitle->getNamespace(), |
610 | $baseTitle->getDBkey() . '/' . $this->code |
611 | ); |
612 | } |
613 | |
614 | return $newkeys; |
615 | } |
616 | |
617 | /** |
618 | * Loads existence and fuzzy state for given list of keys. |
619 | * @param string[] $keys List of keys in database format. |
620 | * @param string[]|null $titleConds Database query condition based on current keys. |
621 | */ |
622 | private function loadInfo( array $keys, ?array $titleConds = null ): void { |
623 | if ( !$this->dbInfo instanceof EmptyIterator ) { |
624 | return; |
625 | } |
626 | |
627 | if ( !count( $keys ) ) { |
628 | $this->dbInfo = new EmptyIterator(); |
629 | return; |
630 | } |
631 | |
632 | $dbr = Utilities::getSafeReadDB(); |
633 | |
634 | $titleConds ??= $this->getTitleConds( $dbr ); |
635 | $iterator = new AppendIterator(); |
636 | foreach ( $titleConds as $conds ) { |
637 | $queryResults = $dbr->newSelectQueryBuilder() |
638 | ->select( [ 'page_namespace', 'page_title', 'rt_type' ] ) |
639 | ->from( 'page' ) |
640 | ->leftJoin( 'revtag', null, [ |
641 | 'page_id=rt_page', |
642 | 'page_latest=rt_revision', |
643 | 'rt_type' => RevTagStore::FUZZY_TAG, |
644 | ] ) |
645 | ->where( $conds ) |
646 | ->caller( __METHOD__ ) |
647 | ->fetchResultSet(); |
648 | $iterator->append( $queryResults ); |
649 | } |
650 | |
651 | $this->dbInfo = $iterator; |
652 | |
653 | // Populate and cache reverse map now, since if call to initMesages is delayed (e.g. a |
654 | // filter that calls loadData() is used, or ::slice is used) the reverse map will not |
655 | // contain all the entries that are present in our $iterator and will throw notices. |
656 | $this->getReverseMap(); |
657 | } |
658 | |
659 | /** |
660 | * Loads reviewers for given messages. |
661 | * @param string[] $keys List of keys in database format. |
662 | * @param string[]|null $titleConds Database query condition based on current keys. |
663 | */ |
664 | private function loadReviewInfo( array $keys, ?array $titleConds = null ): void { |
665 | if ( !$this->dbReviewData instanceof EmptyIterator ) { |
666 | return; |
667 | } |
668 | |
669 | if ( !count( $keys ) ) { |
670 | $this->dbReviewData = new EmptyIterator(); |
671 | return; |
672 | } |
673 | |
674 | $dbr = Utilities::getSafeReadDB(); |
675 | |
676 | $titleConds ??= $this->getTitleConds( $dbr ); |
677 | $iterator = new AppendIterator(); |
678 | foreach ( $titleConds as $conds ) { |
679 | $queryResults = $dbr->newSelectQueryBuilder() |
680 | ->select( [ 'page_namespace', 'page_title', 'trr_user' ] ) |
681 | ->from( 'page' ) |
682 | ->join( 'translate_reviews', null, [ 'page_id=trr_page', 'page_latest=trr_revision' ] ) |
683 | ->where( $conds ) |
684 | ->caller( __METHOD__ ) |
685 | ->fetchResultSet(); |
686 | $iterator->append( $queryResults ); |
687 | } |
688 | |
689 | $this->dbReviewData = $iterator; |
690 | |
691 | // Populate and cache reverse map now, since if call to initMesages is delayed (e.g. a |
692 | // filter that calls loadData() is used, or ::slice is used) the reverse map will not |
693 | // contain all the entries that are present in our $iterator and will throw notices. |
694 | $this->getReverseMap(); |
695 | } |
696 | |
697 | /** |
698 | * Loads translation for given list of keys. |
699 | * @param string[] $keys List of keys in database format. |
700 | * @param string[]|null $titleConds Database query condition based on current keys. |
701 | */ |
702 | private function loadData( array $keys, ?array $titleConds = null ): void { |
703 | if ( !$this->dbData instanceof EmptyIterator ) { |
704 | return; |
705 | } |
706 | |
707 | if ( !count( $keys ) ) { |
708 | $this->dbData = new EmptyIterator(); |
709 | return; |
710 | } |
711 | |
712 | $dbr = Utilities::getSafeReadDB(); |
713 | $revisionStore = MediaWikiServices::getInstance()->getRevisionStore(); |
714 | |
715 | $titleConds ??= $this->getTitleConds( $dbr ); |
716 | $iterator = new AppendIterator(); |
717 | foreach ( $titleConds as $conds ) { |
718 | $queryResults = $revisionStore->newSelectQueryBuilder( $dbr ) |
719 | ->joinPage() |
720 | ->joinComment() |
721 | ->where( $conds ) |
722 | ->andWhere( [ 'page_latest = rev_id' ] ) |
723 | ->caller( __METHOD__ ) |
724 | ->fetchResultSet(); |
725 | $iterator->append( $queryResults ); |
726 | } |
727 | |
728 | $this->dbData = $iterator; |
729 | |
730 | // Populate and cache reverse map now, since if call to initMesages is delayed (e.g. a |
731 | // filter that calls loadData() is used, or ::slice is used) the reverse map will not |
732 | // contain all the entries that are present in our $iterator and will throw notices. |
733 | $this->getReverseMap(); |
734 | } |
735 | |
736 | /** |
737 | * Of the current set of keys, construct database query conditions. |
738 | * @return string[] |
739 | */ |
740 | private function getTitleConds( IDatabase $db ): array { |
741 | $titles = $this->getTitles(); |
742 | $chunks = array_chunk( $titles, self::MAX_ITEMS_PER_QUERY ); |
743 | $results = []; |
744 | |
745 | foreach ( $chunks as $titles ) { |
746 | // Array of array( namespace, pagename ) |
747 | $byNamespace = []; |
748 | foreach ( $titles as $title ) { |
749 | $namespace = $title->getNamespace(); |
750 | $pagename = $title->getDBkey(); |
751 | $byNamespace[$namespace][] = $pagename; |
752 | } |
753 | |
754 | $conds = []; |
755 | foreach ( $byNamespace as $namespaces => $pagenames ) { |
756 | $cond = [ |
757 | 'page_namespace' => $namespaces, |
758 | 'page_title' => $pagenames, |
759 | ]; |
760 | |
761 | $conds[] = $db->makeList( $cond, LIST_AND ); |
762 | } |
763 | |
764 | $results[] = $db->makeList( $conds, LIST_OR ); |
765 | } |
766 | |
767 | return $results; |
768 | } |
769 | |
770 | /** |
771 | * Given two-dimensional map of namespace and pagenames, this uses |
772 | * database fields page_namespace and page_title as keys and returns |
773 | * the value for those indexes. |
774 | */ |
775 | private function rowToKey( stdClass $row ): ?string { |
776 | $map = $this->getReverseMap(); |
777 | if ( isset( $map[$row->page_namespace][$row->page_title] ) ) { |
778 | return $map[$row->page_namespace][$row->page_title]; |
779 | } else { |
780 | wfWarn( "Got unknown title from the database: {$row->page_namespace}:{$row->page_title}" ); |
781 | |
782 | return null; |
783 | } |
784 | } |
785 | |
786 | /** Creates a two-dimensional map of namespace and pagenames. */ |
787 | private function getReverseMap(): array { |
788 | if ( $this->reverseMap !== null ) { |
789 | return $this->reverseMap; |
790 | } |
791 | |
792 | $map = []; |
793 | /** @var TitleValue $title */ |
794 | foreach ( $this->keys as $mkey => $title ) { |
795 | $map[$title->getNamespace()][$title->getDBkey()] = $mkey; |
796 | } |
797 | |
798 | $this->reverseMap = $map; |
799 | return $this->reverseMap; |
800 | } |
801 | |
802 | /** |
803 | * Constructs all Messages (ThinMessage) from the data accumulated so far. |
804 | * Usually there is no need to call this method directly. |
805 | */ |
806 | public function initMessages(): void { |
807 | if ( $this->messages !== null ) { |
808 | return; |
809 | } |
810 | |
811 | $messages = []; |
812 | $definitions = $this->definitions->getDefinitions(); |
813 | $revStore = MediaWikiServices::getInstance()->getRevisionStore(); |
814 | $queryFlags = Utilities::shouldReadFromPrimary() ? IDBAccessObject::READ_LATEST : 0; |
815 | foreach ( array_keys( $this->keys ) as $mkey ) { |
816 | $messages[$mkey] = new ThinMessage( $mkey, $definitions[$mkey] ); |
817 | } |
818 | |
819 | if ( !$this->dbData instanceof EmptyIterator ) { |
820 | $slotRows = $revStore->getContentBlobsForBatch( |
821 | $this->dbData, |
822 | [ SlotRecord::MAIN ], |
823 | $queryFlags |
824 | )->getValue(); |
825 | |
826 | foreach ( $this->dbData as $row ) { |
827 | $mkey = $this->rowToKey( $row ); |
828 | if ( !isset( $messages[$mkey] ) ) { |
829 | continue; |
830 | } |
831 | $messages[$mkey]->setRow( $row ); |
832 | $messages[$mkey]->setProperty( 'revision', $row->page_latest ); |
833 | |
834 | if ( isset( $slotRows[$row->rev_id][SlotRecord::MAIN] ) ) { |
835 | $slot = $slotRows[$row->rev_id][SlotRecord::MAIN]; |
836 | $messages[$mkey]->setTranslation( $slot->blob_data ); |
837 | } |
838 | } |
839 | } |
840 | |
841 | $fuzzy = []; |
842 | foreach ( $this->dbInfo as $row ) { |
843 | if ( $row->rt_type !== null ) { |
844 | $fuzzy[] = $this->rowToKey( $row ); |
845 | } |
846 | } |
847 | |
848 | $this->setTags( 'fuzzy', $fuzzy ); |
849 | |
850 | // Copy tags if any. |
851 | foreach ( $this->tags as $type => $keys ) { |
852 | foreach ( $keys as $mkey ) { |
853 | if ( isset( $messages[$mkey] ) ) { |
854 | $messages[$mkey]->addTag( $type ); |
855 | } |
856 | } |
857 | } |
858 | |
859 | // Copy infile if any. |
860 | foreach ( $this->infile as $mkey => $value ) { |
861 | if ( isset( $messages[$mkey] ) ) { |
862 | $messages[$mkey]->setInfile( $value ); |
863 | } |
864 | } |
865 | |
866 | foreach ( $this->dbReviewData as $row ) { |
867 | $mkey = $this->rowToKey( $row ); |
868 | if ( !isset( $messages[$mkey] ) ) { |
869 | continue; |
870 | } |
871 | $messages[$mkey]->appendProperty( 'reviewers', $row->trr_user ); |
872 | } |
873 | |
874 | // Set the status property |
875 | foreach ( $messages as $obj ) { |
876 | if ( $obj->hasTag( 'fuzzy' ) ) { |
877 | $obj->setProperty( 'status', 'fuzzy' ); |
878 | } elseif ( is_array( $obj->getProperty( 'reviewers' ) ) ) { |
879 | $obj->setProperty( 'status', 'proofread' ); |
880 | } elseif ( $obj->translation() !== null ) { |
881 | $obj->setProperty( 'status', 'translated' ); |
882 | } else { |
883 | $obj->setProperty( 'status', 'untranslated' ); |
884 | } |
885 | } |
886 | |
887 | $this->messages = $messages; |
888 | } |
889 | |
890 | /** |
891 | * ArrayAccess methods. @{ |
892 | * @param mixed $offset |
893 | */ |
894 | public function offsetExists( $offset ): bool { |
895 | return isset( $this->keys[$offset] ); |
896 | } |
897 | |
898 | /** @param mixed $offset */ |
899 | public function offsetGet( $offset ): ?Message { |
900 | return $this->messages[$offset] ?? null; |
901 | } |
902 | |
903 | /** |
904 | * @param mixed $offset |
905 | * @param mixed $value |
906 | */ |
907 | public function offsetSet( $offset, $value ): void { |
908 | $this->messages[$offset] = $value; |
909 | } |
910 | |
911 | /** @param mixed $offset */ |
912 | public function offsetUnset( $offset ): void { |
913 | unset( $this->keys[$offset] ); |
914 | } |
915 | |
916 | /** @} */ |
917 | |
918 | /** |
919 | * Fail fast if trying to access unknown properties. @{ |
920 | * @return never |
921 | */ |
922 | public function __get( string $name ): void { |
923 | throw new LogicException( __METHOD__ . ": Trying to access unknown property $name" ); |
924 | } |
925 | |
926 | /** |
927 | * Fail fast if trying to access unknown properties. |
928 | * @param mixed $value |
929 | * @return never |
930 | */ |
931 | public function __set( string $name, $value ): void { |
932 | throw new LogicException( __METHOD__ . ": Trying to modify unknown property $name" ); |
933 | } |
934 | |
935 | /** @} */ |
936 | |
937 | /** |
938 | * Iterator method. @{ |
939 | */ |
940 | public function rewind(): void { |
941 | reset( $this->keys ); |
942 | } |
943 | |
944 | /** @return Message|false */ |
945 | #[\ReturnTypeWillChange] |
946 | public function current() { |
947 | if ( !count( $this->keys ) ) { |
948 | return false; |
949 | } |
950 | |
951 | // @phan-suppress-next-line PhanTypeArraySuspiciousNullable |
952 | return $this->messages[key( $this->keys )]; |
953 | } |
954 | |
955 | public function key(): ?string { |
956 | return key( $this->keys ); |
957 | } |
958 | |
959 | public function next(): void { |
960 | next( $this->keys ); |
961 | } |
962 | |
963 | public function valid(): bool { |
964 | return isset( $this->messages[key( $this->keys )] ); |
965 | } |
966 | |
967 | public function count(): int { |
968 | return count( $this->keys() ); |
969 | } |
970 | |
971 | /** @} */ |
972 | } |