Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
0.00% |
0 / 156 |
|
0.00% |
0 / 12 |
CRAP | |
0.00% |
0 / 1 |
TranslationCorporaStore | |
0.00% |
0 / 156 |
|
0.00% |
0 / 12 |
380 | |
0.00% |
0 / 1 |
__construct | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
2 | |||
updateTranslationUnit | |
0.00% |
0 / 20 |
|
0.00% |
0 / 1 |
6 | |||
insertTranslationUnit | |
0.00% |
0 / 14 |
|
0.00% |
0 / 1 |
2 | |||
deleteTranslationData | |
0.00% |
0 / 7 |
|
0.00% |
0 / 1 |
2 | |||
deleteTranslationDataBySectionId | |
0.00% |
0 / 10 |
|
0.00% |
0 / 1 |
2 | |||
countByTranslationId | |
0.00% |
0 / 7 |
|
0.00% |
0 / 1 |
2 | |||
deleteTranslationDataGently | |
0.00% |
0 / 17 |
|
0.00% |
0 / 1 |
12 | |||
findByTranslationId | |
0.00% |
0 / 18 |
|
0.00% |
0 / 1 |
6 | |||
countTranslatedSubSectionsByTranslationId | |
0.00% |
0 / 13 |
|
0.00% |
0 / 1 |
2 | |||
save | |
0.00% |
0 / 22 |
|
0.00% |
0 / 1 |
12 | |||
doFind | |
0.00% |
0 / 18 |
|
0.00% |
0 / 1 |
6 | |||
createTranslationUnitFromRow | |
0.00% |
0 / 8 |
|
0.00% |
0 / 1 |
2 |
1 | <?php |
2 | /** |
3 | * @copyright See AUTHORS.txt |
4 | * @license GPL-2.0-or-later |
5 | */ |
6 | declare( strict_types = 1 ); |
7 | |
8 | namespace ContentTranslation\Store; |
9 | |
10 | use ContentTranslation\Entity\TranslationUnit; |
11 | use ContentTranslation\LoadBalancer; |
12 | use LogicException; |
13 | use stdClass; |
14 | use Wikimedia\Rdbms\IDatabase; |
15 | use Wikimedia\Rdbms\LBFactory; |
16 | use Wikimedia\Rdbms\Platform\ISQLPlatform; |
17 | use Wikimedia\Rdbms\SelectQueryBuilder; |
18 | |
19 | /** |
20 | * The TranslationCorporaStore service represents the Data Access Layer for the parallel corpora. More |
21 | * specifically, this service relies on the database load balancer to interact with the "cx_corpora" |
22 | * table, to insert, update, delete or fetch data to this table. This class exposes methods that |
23 | * usually accepts TranslationUnit entity objects as arguments (when it's an insert/update query), |
24 | * or return TranslationUnit entity objects (for read operations). This service is mostly used inside |
25 | * TranslationCorporaManager service to interact with the database. |
26 | */ |
27 | class TranslationCorporaStore { |
28 | |
29 | /** @var LoadBalancer */ |
30 | private $lb; |
31 | |
32 | /** @var LBFactory */ |
33 | private $lbFactory; |
34 | public const TABLE_NAME = 'cx_corpora'; |
35 | |
36 | public function __construct( LoadBalancer $lb, LBFactory $lbFactory ) { |
37 | $this->lb = $lb; |
38 | $this->lbFactory = $lbFactory; |
39 | } |
40 | |
41 | /** |
42 | * Update a translation unit. |
43 | * |
44 | * @param TranslationUnit $translationUnit |
45 | * @param string $timestamp |
46 | */ |
47 | private function updateTranslationUnit( TranslationUnit $translationUnit, string $timestamp ): void { |
48 | $dbw = $this->lb->getConnection( DB_PRIMARY ); |
49 | |
50 | $values = [ |
51 | 'cxc_sequence_id' => $translationUnit->getSequenceId(), |
52 | 'cxc_timestamp' => $dbw->timestamp(), |
53 | 'cxc_content' => $translationUnit->getContent() |
54 | ]; |
55 | $conditions = [ |
56 | 'cxc_translation_id' => $translationUnit->getTranslationId(), |
57 | 'cxc_section_id' => $translationUnit->getSectionId(), |
58 | 'cxc_origin' => $translationUnit->getOrigin(), |
59 | // Sometimes we get "duplicates" entries which differ in timestamp. |
60 | // Then any updates to those sections would fail (duplicate key for |
61 | // a unique index), if we did not limit this call to only one of them. |
62 | 'cxc_timestamp' => $dbw->timestamp( $timestamp ), |
63 | ]; |
64 | |
65 | $dbw->newUpdateQueryBuilder() |
66 | ->update( self::TABLE_NAME ) |
67 | ->set( $values ) |
68 | ->where( $conditions ) |
69 | ->caller( __METHOD__ ) |
70 | ->execute(); |
71 | |
72 | if ( $dbw->affectedRows() < 1 ) { |
73 | // Possible reasons: |
74 | // * concurrent request has already updated the row with new timestamp |
75 | // * no change (saving same thing twice in the same second) |
76 | // * translation has been deleted |
77 | throw new LogicException( 'Failed to update a translation section' ); |
78 | } |
79 | } |
80 | |
81 | /** |
82 | * Insert a translation unit. |
83 | * |
84 | * @param TranslationUnit $translationUnit |
85 | */ |
86 | private function insertTranslationUnit( TranslationUnit $translationUnit ): void { |
87 | $dbw = $this->lb->getConnection( DB_PRIMARY ); |
88 | |
89 | $values = [ |
90 | 'cxc_translation_id' => $translationUnit->getTranslationId(), |
91 | 'cxc_section_id' => $translationUnit->getSectionId(), |
92 | 'cxc_origin' => $translationUnit->getOrigin(), |
93 | 'cxc_sequence_id' => $translationUnit->getSequenceId(), |
94 | 'cxc_timestamp' => $dbw->timestamp(), |
95 | 'cxc_content' => $translationUnit->getContent() |
96 | ]; |
97 | |
98 | $dbw->newInsertQueryBuilder() |
99 | ->insertInto( self::TABLE_NAME ) |
100 | ->row( $values ) |
101 | ->caller( __METHOD__ ) |
102 | ->execute(); |
103 | } |
104 | |
105 | /** |
106 | * Delete translation units and categories associated with the given translation identifier. |
107 | * |
108 | * @param int|int[] $translationId |
109 | */ |
110 | public function deleteTranslationData( $translationId ): void { |
111 | $dbw = $this->lb->getConnection( DB_PRIMARY ); |
112 | |
113 | $conditions = [ 'cxc_translation_id' => $translationId ]; |
114 | |
115 | $dbw->newDeleteQueryBuilder() |
116 | ->deleteFrom( self::TABLE_NAME ) |
117 | ->where( $conditions ) |
118 | ->caller( __METHOD__ ) |
119 | ->execute(); |
120 | } |
121 | |
122 | /** |
123 | * Given the "parent" translation id and the base section id (in the "${revision}_${sectionNumber}" |
124 | * form), this method deletes all the translation units that belong to that section translation, |
125 | * from the "cx_corpora" table. |
126 | * |
127 | * NOTE: The "cxc_section_id" field inside "cx_corpora" table is in the following form for |
128 | * section translation parallel corpora units: "${baseSectionId}_${subSectionId}", where |
129 | * "subSectionId" is given by the cxserver as the section HTML element id (e.g. "cxSourceSection4"). |
130 | * This is why we use a "LIKE" query in the following form, here: "${baseSectionId}%" |
131 | * |
132 | * @param int $translationId the id of the "parent" translation inside "cx_translations" table |
133 | * @param string $baseSectionId the "cxsx_section_id" as stored inside "cx_section_translations" table |
134 | * @return void |
135 | */ |
136 | public function deleteTranslationDataBySectionId( int $translationId, string $baseSectionId ): void { |
137 | $dbw = $this->lb->getConnection( DB_PRIMARY ); |
138 | $conditions = [ |
139 | 'cxc_translation_id' => $translationId, |
140 | 'cxc_section_id' . $dbw->buildLike( $baseSectionId, '_', $dbw->anyString() ) |
141 | ]; |
142 | |
143 | $dbw->newDeleteQueryBuilder() |
144 | ->deleteFrom( self::TABLE_NAME ) |
145 | ->where( $conditions ) |
146 | ->caller( __METHOD__ ) |
147 | ->execute(); |
148 | } |
149 | |
150 | /** |
151 | * Given a translation id, this method returns the count of the parallel corpora |
152 | * translation units, associated with this translation id. |
153 | * |
154 | * @param int $translationId the id of the translation inside "cx_translations" table |
155 | * @return int |
156 | */ |
157 | public function countByTranslationId( int $translationId ): int { |
158 | $dbr = $this->lb->getConnection( DB_REPLICA ); |
159 | |
160 | return $dbr->newSelectQueryBuilder() |
161 | ->select( ISQLPlatform::ALL_ROWS ) |
162 | ->from( self::TABLE_NAME ) |
163 | ->where( [ 'cxc_translation_id' => $translationId ] ) |
164 | ->caller( __METHOD__ ) |
165 | ->fetchRowCount(); |
166 | } |
167 | |
168 | /** |
169 | * Delete translation units and categories associated with the given translation identifier |
170 | * in a manner that avoids creating excessive database lag. |
171 | * |
172 | * @param int|int[] $ids |
173 | * @param int $batchSize |
174 | */ |
175 | public function deleteTranslationDataGently( $ids, int $batchSize = 1000 ): void { |
176 | $dbw = $this->lb->getConnection( DB_PRIMARY ); |
177 | |
178 | while ( true ) { |
179 | $rowsToDelete = $dbw->newSelectQueryBuilder() |
180 | ->select( 'cxc_id' ) |
181 | ->from( self::TABLE_NAME ) |
182 | ->where( [ 'cxc_translation_id' => $ids ] ) |
183 | ->limit( $batchSize ) |
184 | ->caller( __METHOD__ ) |
185 | ->fetchFieldValues(); |
186 | |
187 | if ( !$rowsToDelete ) { |
188 | break; |
189 | } |
190 | |
191 | $dbw->newDeleteQueryBuilder() |
192 | ->deleteFrom( self::TABLE_NAME ) |
193 | ->where( [ 'cxc_id' => $rowsToDelete ] ) |
194 | ->caller( __METHOD__ ) |
195 | ->execute(); |
196 | $this->lbFactory->waitForReplication(); |
197 | } |
198 | } |
199 | |
200 | /** |
201 | * @param int $translationId |
202 | * @return TranslationUnit[] |
203 | */ |
204 | public function findByTranslationId( int $translationId ): array { |
205 | $dbr = $this->lb->getConnection( DB_REPLICA ); |
206 | |
207 | $resultSet = $dbr->newSelectQueryBuilder() |
208 | ->select( [ |
209 | 'cxc_translation_id', |
210 | 'cxc_origin', |
211 | 'cxc_section_id', |
212 | 'cxc_timestamp', |
213 | 'cxc_sequence_id', |
214 | 'cxc_content', |
215 | ] ) |
216 | ->from( self::TABLE_NAME ) |
217 | ->where( [ 'cxc_translation_id' => $translationId ] ) |
218 | ->caller( __METHOD__ ) |
219 | ->fetchResultSet(); |
220 | |
221 | $units = []; |
222 | foreach ( $resultSet as $row ) { |
223 | $units[] = $this->createTranslationUnitFromRow( $row ); |
224 | } |
225 | |
226 | return $units; |
227 | } |
228 | |
229 | /** |
230 | * Given a translation id, this method returns an integer, indicating |
231 | * the count of the translated subsections (paragraphs) for that translation. |
232 | * |
233 | * @param int $translationId |
234 | * @return int count of translated subsections |
235 | */ |
236 | public function countTranslatedSubSectionsByTranslationId( int $translationId ): int { |
237 | $dbr = $this->lb->getConnection( DB_REPLICA ); |
238 | |
239 | $row = $dbr->newSelectQueryBuilder() |
240 | ->select( [ |
241 | 'cxc_translation_id', |
242 | 'cxc_section_id', |
243 | 'count' => 'COUNT(DISTINCT cxc_section_id)', |
244 | ] ) |
245 | ->from( self::TABLE_NAME ) |
246 | ->where( [ 'cxc_translation_id' => $translationId ] ) |
247 | ->groupBy( [ 'cxc_translation_id' ] ) |
248 | ->caller( __METHOD__ ) |
249 | ->fetchRow(); |
250 | |
251 | return (int)$row->count; |
252 | } |
253 | |
254 | /** |
255 | * Saves the translation unit. If the record exists, updates it, otherwise creates it. |
256 | * |
257 | * @param TranslationUnit $translationUnit |
258 | * @param bool $isNewTranslation Whether these are for a brand-new Translation record |
259 | */ |
260 | public function save( TranslationUnit $translationUnit, bool $isNewTranslation ): void { |
261 | $fname = __METHOD__; |
262 | // Update the latest row if there is one instead of making a new one |
263 | $conditions = [ |
264 | 'cxc_translation_id' => $translationUnit->getTranslationId(), |
265 | 'cxc_section_id' => $translationUnit->getSectionId(), |
266 | 'cxc_origin' => $translationUnit->getOrigin() |
267 | ]; |
268 | if ( $isNewTranslation ) { |
269 | // T134245: brand new translations can also insert corpora data in the same |
270 | // request. The doFind() query uses only a subset of a unique cx_corpora index, |
271 | // causing SH gap locks. Worse, is that the leftmost values comes from the |
272 | // auto-incrementing translation_id. This puts gap locks on the range of |
273 | // (MAX(cxc_translation_id),+infinity), which could make the whole API prone |
274 | // to deadlocks and timeouts. Bypass this problem by remembering if the parent |
275 | // translation row is brand new and skipping doFind() in such cases. |
276 | $existing = false; |
277 | } else { |
278 | // Note that the only caller of this method will have already locked the |
279 | // parent Translation row, serializing simultaneous duplicate submissions at |
280 | // this point. Without that row lock, the two transaction might both acquire |
281 | // SH gap locks in doFind() and then deadlock in create() trying to get IX gap |
282 | // locks (if no duplicate rows were found). |
283 | $options = []; |
284 | $dbr = $this->lb->getConnection( DB_REPLICA ); |
285 | $existing = $this->doFind( $dbr, $conditions, $options, $fname ); |
286 | } |
287 | |
288 | if ( $existing ) { |
289 | $dbw = $this->lb->getConnection( DB_PRIMARY ); |
290 | $dbw->doAtomicSection( |
291 | __METHOD__, |
292 | function ( IDatabase $dbw ) use ( $translationUnit, $conditions, $fname ) { |
293 | // Lock the record for updating it. This time we use $dbw - primary db. |
294 | // This is to avoid the unnecessary gap locking with 'for update' query |
295 | // when the record does not exist. |
296 | $options = [ 'FOR UPDATE' ]; |
297 | $existing = $this->doFind( $dbw, $conditions, $options, $fname ); |
298 | $this->updateTranslationUnit( $translationUnit, $existing->getTimestamp() ); |
299 | } |
300 | ); |
301 | } else { |
302 | $this->insertTranslationUnit( $translationUnit ); |
303 | } |
304 | } |
305 | |
306 | private function doFind( IDatabase $db, $conditions, $options, $method ): ?TranslationUnit { |
307 | $row = $db->newSelectQueryBuilder() |
308 | ->select( [ |
309 | 'cxc_translation_id', |
310 | 'cxc_section_id', |
311 | 'cxc_origin', |
312 | 'cxc_timestamp', |
313 | 'cxc_sequence_id', |
314 | 'cxc_content' |
315 | ] ) |
316 | ->from( self::TABLE_NAME ) |
317 | ->where( $conditions ) |
318 | ->orderBy( 'cxc_timestamp', SelectQueryBuilder::SORT_DESC ) |
319 | ->options( $options ) |
320 | ->caller( $method ) |
321 | ->fetchRow(); |
322 | if ( $row ) { |
323 | return $this->createTranslationUnitFromRow( $row ); |
324 | } |
325 | |
326 | return null; |
327 | } |
328 | |
329 | private function createTranslationUnitFromRow( stdClass $row ): TranslationUnit { |
330 | return new TranslationUnit( |
331 | $row->cxc_section_id, |
332 | $row->cxc_origin, |
333 | (int)$row->cxc_sequence_id, // cxc_sequence_id can be null |
334 | (string)$row->cxc_content, // cxc_content can be null |
335 | (int)$row->cxc_translation_id, |
336 | $row->cxc_timestamp |
337 | ); |
338 | } |
339 | } |