Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
0.00% |
0 / 77 |
|
0.00% |
0 / 9 |
CRAP | |
0.00% |
0 / 1 |
TranslationCorporaManager | |
0.00% |
0 / 77 |
|
0.00% |
0 / 9 |
506 | |
0.00% |
0 / 1 |
__construct | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
getCorporaDumpArraysByTranslationId | |
0.00% |
0 / 7 |
|
0.00% |
0 / 1 |
6 | |||
getUnitsAndCategoriesByTranslationId | |
0.00% |
0 / 10 |
|
0.00% |
0 / 1 |
2 | |||
saveTranslationUnits | |
0.00% |
0 / 5 |
|
0.00% |
0 / 1 |
6 | |||
getFilteredCorporaUnits | |
0.00% |
0 / 4 |
|
0.00% |
0 / 1 |
2 | |||
createTranslationUnitsFromContent | |
0.00% |
0 / 18 |
|
0.00% |
0 / 1 |
42 | |||
getTranslationUnitDTOsByTranslationId | |
0.00% |
0 / 4 |
|
0.00% |
0 / 1 |
2 | |||
getTranslationUnitDTOsAndCategoriesByTranslationId | |
0.00% |
0 / 12 |
|
0.00% |
0 / 1 |
12 | |||
createDTOsFromTranslationUnits | |
0.00% |
0 / 16 |
|
0.00% |
0 / 1 |
30 |
1 | <?php |
2 | declare( strict_types = 1 ); |
3 | |
4 | namespace ContentTranslation\Manager; |
5 | |
6 | use ContentTranslation\DTO\TranslationUnitDTO; |
7 | use ContentTranslation\Entity\TranslationUnit; |
8 | use ContentTranslation\Exception\InvalidSectionDataException; |
9 | use ContentTranslation\Store\TranslationCorporaStore; |
10 | use ContentTranslation\Translation; |
11 | use MediaWiki\Json\FormatJson; |
12 | |
13 | /** |
14 | * @author Nik Gkountas |
15 | * @license GPL-2.0-or-later |
16 | * |
17 | * The TranslationCorporaManager service intends to serve all Action API classes, or other |
18 | * scripts, that need to fetch data from the "cx_corpora" table in some specific form. |
19 | * This service depends on the TranslationCorporaStore service to interact with the |
20 | * database table. The difference between these two classes, is that TranslationCorporaStore |
21 | * intends to represent the Data Access Layer, while the TranslationCorporaManager lives in |
22 | * the Domain Layer of the application, meaning that it modifies data fetched from the database |
23 | * to serve them to its "clients" as needed. |
24 | */ |
25 | class TranslationCorporaManager { |
26 | |
27 | private const CATEGORIES = 'CX_CATEGORY_METADATA'; |
28 | |
29 | private TranslationCorporaStore $corporaStore; |
30 | |
31 | public function __construct( TranslationCorporaStore $corporaStore ) { |
32 | $this->corporaStore = $corporaStore; |
33 | } |
34 | |
35 | public function getCorporaDumpArraysByTranslationId( int $translationId, bool $sanitize ): array { |
36 | $sections = $this->getTranslationUnitDTOsByTranslationId( $translationId ); |
37 | // Filter out units which don't have user provided input or source |
38 | $sections = array_filter( $sections, static function ( TranslationUnitDTO $unit ) { |
39 | return $unit->hasUserBlob() && $unit->hasSourceBlob(); |
40 | } ); |
41 | |
42 | return array_map( static function ( TranslationUnitDTO $unit ) use ( $sanitize ) { |
43 | return $unit->toCorporaDumpArray( $sanitize ); |
44 | }, $sections ); |
45 | } |
46 | |
47 | /** |
48 | * Translation units and target categories. Only target categories are fetched |
49 | * when translation draft is restored. Source categories are saved into cx_corpora table for |
50 | * pairing with target categories, but not retrieved when translation draft is restored. |
51 | * |
52 | * @param int $translationId |
53 | * @return array { translationUnits: TranslationUnitDTO[], categories: ?string } |
54 | */ |
55 | public function getUnitsAndCategoriesByTranslationId( int $translationId ): array { |
56 | $unitsAndCategories = $this->getTranslationUnitDTOsAndCategoriesByTranslationId( $translationId ); |
57 | $translationUnits = array_map( |
58 | static function ( TranslationUnitDTO $unit ) { |
59 | return $unit->toArray(); |
60 | }, $unitsAndCategories['sections'] |
61 | ); |
62 | |
63 | return [ |
64 | 'translationUnits' => $translationUnits, |
65 | 'categories' => $unitsAndCategories['categories'] |
66 | ]; |
67 | } |
68 | |
69 | /** |
70 | * @param Translation $translation Recently saved parent translation object |
71 | * @param string $content |
72 | * @return TranslationUnit[] |
73 | * @throws InvalidSectionDataException |
74 | */ |
75 | public function saveTranslationUnits( Translation $translation, string $content ): array { |
76 | $translationUnits = $this->createTranslationUnitsFromContent( $content, $translation->getTranslationId() ); |
77 | |
78 | $isNewTranslation = $translation->isNew(); |
79 | foreach ( $translationUnits as $translationUnit ) { |
80 | $this->corporaStore->save( $translationUnit, $isNewTranslation ); |
81 | } |
82 | |
83 | return $translationUnits; |
84 | } |
85 | |
86 | /** |
87 | * @param int $translationId |
88 | * @param array $types should be an array of valid types. e.g. ['user', 'mt', 'source'] |
89 | * @param bool $sanitize |
90 | * @return array |
91 | */ |
92 | public function getFilteredCorporaUnits( int $translationId, array $types, bool $sanitize ): array { |
93 | $sections = $this->getTranslationUnitDTOsByTranslationId( $translationId ); |
94 | |
95 | return array_map( static function ( TranslationUnitDTO $unit ) use ( $types, $sanitize ) { |
96 | return $unit->toCustomArray( $types, $sanitize ); |
97 | }, $sections ); |
98 | } |
99 | |
100 | /** |
101 | * @param string $content |
102 | * @param int $translationId |
103 | * @return TranslationUnit[] |
104 | * @throws InvalidSectionDataException |
105 | */ |
106 | private function createTranslationUnitsFromContent( string $content, int $translationId ): array { |
107 | $translationUnits = []; |
108 | $units = FormatJson::decode( $content, true ); |
109 | foreach ( $units as $translationUnitData ) { |
110 | if ( !isset( $translationUnitData['sectionId'] ) || !is_string( $translationUnitData['origin'] ) ) { |
111 | throw new InvalidSectionDataException(); |
112 | } |
113 | |
114 | $validate = isset( $translationUnitData['validate'] ) && $translationUnitData['validate']; |
115 | $timestamp = $translationUnitData['timestamp'] ?? null; |
116 | $sequenceId = isset( $translationUnitData['sequenceId'] ) ? (int)$translationUnitData['sequenceId'] : null; |
117 | '@phan-var ?string $timestamp'; |
118 | $translationUnits[] = new TranslationUnit( |
119 | (string)$translationUnitData['sectionId'], |
120 | $translationUnitData['origin'], |
121 | $sequenceId, |
122 | (string)$translationUnitData['content'], // Content can be null in case translator clear the section. |
123 | $translationId, |
124 | $timestamp, |
125 | $validate |
126 | ); |
127 | } |
128 | |
129 | return $translationUnits; |
130 | } |
131 | |
132 | /** |
133 | * @param int $id |
134 | * @return TranslationUnitDTO[] array indexed by the sectionId of each unit |
135 | */ |
136 | public function getTranslationUnitDTOsByTranslationId( int $id ): array { |
137 | $translationUnits = $this->corporaStore->findByTranslationId( $id ); |
138 | $translationUnitDTOs = $this->createDTOsFromTranslationUnits( $translationUnits ); |
139 | unset( $translationUnitDTOs[ self::CATEGORIES ] ); |
140 | |
141 | return $translationUnitDTOs; |
142 | } |
143 | |
144 | /** |
145 | * @param int $id |
146 | * @return array { sections: TranslationUnitDTO[], categories: ?string } |
147 | */ |
148 | private function getTranslationUnitDTOsAndCategoriesByTranslationId( int $id ): array { |
149 | $translationUnits = $this->corporaStore->findByTranslationId( $id ); |
150 | $translationUnitDTOs = $this->createDTOsFromTranslationUnits( $translationUnits ); |
151 | |
152 | $targetCategories = null; |
153 | |
154 | if ( isset( $translationUnitDTOs[ self::CATEGORIES ] ) ) { |
155 | // Extract target categories and return separately from translation units (sections). |
156 | // Source categories aren't retrieved, only saved in cx_corpora for pairing |
157 | // with target categories. Source and target categories are saved in cx_corpora table |
158 | // with special section ID, to distinguish categories from translation units. |
159 | $userBlob = $translationUnitDTOs[ self::CATEGORIES ]->getUserBlob(); |
160 | |
161 | if ( $userBlob ) { |
162 | $targetCategories = $userBlob[ 'content' ]; |
163 | } |
164 | unset( $translationUnitDTOs[ self::CATEGORIES ] ); |
165 | } |
166 | |
167 | return [ |
168 | 'sections' => $translationUnitDTOs, |
169 | 'categories' => $targetCategories |
170 | ]; |
171 | } |
172 | |
173 | /** |
174 | * @param TranslationUnit[] $translationUnits |
175 | * @return TranslationUnitDTO[] array indexed by the sectionId of each unit |
176 | */ |
177 | private function createDTOsFromTranslationUnits( array $translationUnits ): array { |
178 | /** @type $translationDTOs TranslationUnitDTO[] */ |
179 | $translationDTOs = []; |
180 | |
181 | $isMT = static function ( $type ) { |
182 | return $type !== TranslationUnitDTO::TYPE_SOURCE && $type !== TranslationUnitDTO::TYPE_USER; |
183 | }; |
184 | |
185 | foreach ( $translationUnits as $unit ) { |
186 | // Here I am assuming sequence IDs are unique and won't be re-used |
187 | $id = $unit->getSectionId(); |
188 | $translationDTO = $translationDTOs[$id] ?? new TranslationUnitDTO( $id, (int)$unit->getSequenceId() ); |
189 | |
190 | $type = $isMT( $unit->getOrigin() ) ? TranslationUnitDTO::TYPE_MT : $unit->getOrigin(); |
191 | $blob = [ |
192 | 'engine' => $type === TranslationUnitDTO::TYPE_MT ? $unit->getOrigin() : null, |
193 | 'content' => $unit->getContent(), |
194 | // TS_ISO_8601 is used because it includes timezone (always Z) |
195 | 'timestamp' => wfTimestamp( TS_ISO_8601, $unit->getTimestamp() ), |
196 | ]; |
197 | |
198 | $translationDTO->setBlobForType( $type, $blob ); |
199 | $translationDTOs[$id] = $translationDTO; |
200 | } |
201 | |
202 | return $translationDTOs; |
203 | } |
204 | |
205 | } |