Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
0.00% |
0 / 146 |
|
0.00% |
0 / 12 |
CRAP | |
0.00% |
0 / 1 |
TranslationCorporaStore | |
0.00% |
0 / 146 |
|
0.00% |
0 / 12 |
380 | |
0.00% |
0 / 1 |
__construct | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
2 | |||
updateTranslationUnit | |
0.00% |
0 / 18 |
|
0.00% |
0 / 1 |
6 | |||
insertTranslationUnit | |
0.00% |
0 / 13 |
|
0.00% |
0 / 1 |
2 | |||
deleteTranslationData | |
0.00% |
0 / 6 |
|
0.00% |
0 / 1 |
2 | |||
deleteTranslationDataBySectionId | |
0.00% |
0 / 10 |
|
0.00% |
0 / 1 |
2 | |||
countByTranslationId | |
0.00% |
0 / 7 |
|
0.00% |
0 / 1 |
2 | |||
deleteTranslationDataGently | |
0.00% |
0 / 17 |
|
0.00% |
0 / 1 |
12 | |||
findByTranslationId | |
0.00% |
0 / 18 |
|
0.00% |
0 / 1 |
6 | |||
countTranslatedSubSectionsByTranslationId | |
0.00% |
0 / 7 |
|
0.00% |
0 / 1 |
2 | |||
save | |
0.00% |
0 / 22 |
|
0.00% |
0 / 1 |
12 | |||
doFind | |
0.00% |
0 / 18 |
|
0.00% |
0 / 1 |
6 | |||
createTranslationUnitFromRow | |
0.00% |
0 / 8 |
|
0.00% |
0 / 1 |
2 |
1 | <?php |
2 | /** |
3 | * @copyright See AUTHORS.txt |
4 | * @license GPL-2.0-or-later |
5 | */ |
6 | declare( strict_types = 1 ); |
7 | |
8 | namespace ContentTranslation\Store; |
9 | |
10 | use ContentTranslation\Entity\TranslationUnit; |
11 | use ContentTranslation\LoadBalancer; |
12 | use LogicException; |
13 | use stdClass; |
14 | use Wikimedia\Rdbms\IDatabase; |
15 | use Wikimedia\Rdbms\IExpression; |
16 | use Wikimedia\Rdbms\LBFactory; |
17 | use Wikimedia\Rdbms\LikeValue; |
18 | use Wikimedia\Rdbms\Platform\ISQLPlatform; |
19 | use Wikimedia\Rdbms\SelectQueryBuilder; |
20 | |
21 | /** |
22 | * The TranslationCorporaStore service represents the Data Access Layer for the parallel corpora. More |
23 | * specifically, this service relies on the database load balancer to interact with the "cx_corpora" |
24 | * table, to insert, update, delete or fetch data to this table. This class exposes methods that |
25 | * usually accepts TranslationUnit entity objects as arguments (when it's an insert/update query), |
26 | * or return TranslationUnit entity objects (for read operations). This service is mostly used inside |
27 | * TranslationCorporaManager service to interact with the database. |
28 | */ |
29 | class TranslationCorporaStore { |
30 | |
31 | /** @var LoadBalancer */ |
32 | private $lb; |
33 | |
34 | /** @var LBFactory */ |
35 | private $lbFactory; |
36 | public const TABLE_NAME = 'cx_corpora'; |
37 | |
38 | public function __construct( LoadBalancer $lb, LBFactory $lbFactory ) { |
39 | $this->lb = $lb; |
40 | $this->lbFactory = $lbFactory; |
41 | } |
42 | |
43 | /** |
44 | * Update a translation unit. |
45 | * |
46 | * @param TranslationUnit $translationUnit |
47 | * @param string $timestamp |
48 | */ |
49 | private function updateTranslationUnit( TranslationUnit $translationUnit, string $timestamp ): void { |
50 | $dbw = $this->lb->getConnection( DB_PRIMARY ); |
51 | |
52 | $dbw->newUpdateQueryBuilder() |
53 | ->update( self::TABLE_NAME ) |
54 | ->set( [ |
55 | 'cxc_sequence_id' => $translationUnit->getSequenceId(), |
56 | 'cxc_timestamp' => $dbw->timestamp(), |
57 | 'cxc_content' => $translationUnit->getContent() |
58 | ] ) |
59 | ->where( [ |
60 | 'cxc_translation_id' => $translationUnit->getTranslationId(), |
61 | 'cxc_section_id' => $translationUnit->getSectionId(), |
62 | 'cxc_origin' => $translationUnit->getOrigin(), |
63 | // Sometimes we get "duplicates" entries which differ in timestamp. |
64 | // Then any updates to those sections would fail (duplicate key for |
65 | // a unique index), if we did not limit this call to only one of them. |
66 | 'cxc_timestamp' => $dbw->timestamp( $timestamp ), |
67 | ] ) |
68 | ->caller( __METHOD__ ) |
69 | ->execute(); |
70 | |
71 | if ( $dbw->affectedRows() < 1 ) { |
72 | // Possible reasons: |
73 | // * concurrent request has already updated the row with new timestamp |
74 | // * no change (saving same thing twice in the same second) |
75 | // * translation has been deleted |
76 | throw new LogicException( 'Failed to update a translation section' ); |
77 | } |
78 | } |
79 | |
80 | /** |
81 | * Insert a translation unit. |
82 | * |
83 | * @param TranslationUnit $translationUnit |
84 | */ |
85 | private function insertTranslationUnit( TranslationUnit $translationUnit ): void { |
86 | $dbw = $this->lb->getConnection( DB_PRIMARY ); |
87 | |
88 | $dbw->newInsertQueryBuilder() |
89 | ->insertInto( self::TABLE_NAME ) |
90 | ->row( [ |
91 | 'cxc_translation_id' => $translationUnit->getTranslationId(), |
92 | 'cxc_section_id' => $translationUnit->getSectionId(), |
93 | 'cxc_origin' => $translationUnit->getOrigin(), |
94 | 'cxc_sequence_id' => $translationUnit->getSequenceId(), |
95 | 'cxc_timestamp' => $dbw->timestamp(), |
96 | 'cxc_content' => $translationUnit->getContent() |
97 | ] ) |
98 | ->caller( __METHOD__ ) |
99 | ->execute(); |
100 | } |
101 | |
102 | /** |
103 | * Delete translation units and categories associated with the given translation identifier. |
104 | * |
105 | * @param int|int[] $translationId |
106 | */ |
107 | public function deleteTranslationData( $translationId ): void { |
108 | $dbw = $this->lb->getConnection( DB_PRIMARY ); |
109 | |
110 | $dbw->newDeleteQueryBuilder() |
111 | ->deleteFrom( self::TABLE_NAME ) |
112 | ->where( [ 'cxc_translation_id' => $translationId ] ) |
113 | ->caller( __METHOD__ ) |
114 | ->execute(); |
115 | } |
116 | |
117 | /** |
118 | * Given the "parent" translation id and the base section id (in the "${revision}_${sectionNumber}" |
119 | * form), this method deletes all the translation units that belong to that section translation, |
120 | * from the "cx_corpora" table. |
121 | * |
122 | * NOTE: The "cxc_section_id" field inside "cx_corpora" table is in the following form for |
123 | * section translation parallel corpora units: "${baseSectionId}_${subSectionId}", where |
124 | * "subSectionId" is given by the cxserver as the section HTML element id (e.g. "cxSourceSection4"). |
125 | * This is why we use a "LIKE" query in the following form, here: "${baseSectionId}_%" |
126 | * |
127 | * @param int $translationId the id of the "parent" translation inside "cx_translations" table |
128 | * @param string $baseSectionId the "cxsx_section_id" as stored inside "cx_section_translations" table |
129 | * @return void |
130 | */ |
131 | public function deleteTranslationDataBySectionId( int $translationId, string $baseSectionId ): void { |
132 | $dbw = $this->lb->getConnection( DB_PRIMARY ); |
133 | |
134 | $dbw->newDeleteQueryBuilder() |
135 | ->deleteFrom( self::TABLE_NAME ) |
136 | ->where( [ |
137 | 'cxc_translation_id' => $translationId, |
138 | $dbw->expr( 'cxc_section_id', IExpression::LIKE, |
139 | new LikeValue( $baseSectionId, '_', $dbw->anyString() ) ), |
140 | ] ) |
141 | ->caller( __METHOD__ ) |
142 | ->execute(); |
143 | } |
144 | |
145 | /** |
146 | * Given a translation id, this method returns the count of the parallel corpora |
147 | * translation units, associated with this translation id. |
148 | * |
149 | * @param int $translationId the id of the translation inside "cx_translations" table |
150 | * @return int |
151 | */ |
152 | public function countByTranslationId( int $translationId ): int { |
153 | $dbr = $this->lb->getConnection( DB_REPLICA ); |
154 | |
155 | return $dbr->newSelectQueryBuilder() |
156 | ->select( ISQLPlatform::ALL_ROWS ) |
157 | ->from( self::TABLE_NAME ) |
158 | ->where( [ 'cxc_translation_id' => $translationId ] ) |
159 | ->caller( __METHOD__ ) |
160 | ->fetchRowCount(); |
161 | } |
162 | |
163 | /** |
164 | * Delete translation units and categories associated with the given translation identifier |
165 | * in a manner that avoids creating excessive database lag. |
166 | * |
167 | * @param int|int[] $ids |
168 | * @param int $batchSize |
169 | */ |
170 | public function deleteTranslationDataGently( $ids, int $batchSize = 1000 ): void { |
171 | $dbw = $this->lb->getConnection( DB_PRIMARY ); |
172 | |
173 | while ( true ) { |
174 | $rowsToDelete = $dbw->newSelectQueryBuilder() |
175 | ->select( 'cxc_id' ) |
176 | ->from( self::TABLE_NAME ) |
177 | ->where( [ 'cxc_translation_id' => $ids ] ) |
178 | ->limit( $batchSize ) |
179 | ->caller( __METHOD__ ) |
180 | ->fetchFieldValues(); |
181 | |
182 | if ( !$rowsToDelete ) { |
183 | break; |
184 | } |
185 | |
186 | $dbw->newDeleteQueryBuilder() |
187 | ->deleteFrom( self::TABLE_NAME ) |
188 | ->where( [ 'cxc_id' => $rowsToDelete ] ) |
189 | ->caller( __METHOD__ ) |
190 | ->execute(); |
191 | $this->lbFactory->waitForReplication(); |
192 | } |
193 | } |
194 | |
195 | /** |
196 | * @param int $translationId |
197 | * @return TranslationUnit[] |
198 | */ |
199 | public function findByTranslationId( int $translationId ): array { |
200 | $dbr = $this->lb->getConnection( DB_REPLICA ); |
201 | |
202 | $resultSet = $dbr->newSelectQueryBuilder() |
203 | ->select( [ |
204 | 'cxc_translation_id', |
205 | 'cxc_origin', |
206 | 'cxc_section_id', |
207 | 'cxc_timestamp', |
208 | 'cxc_sequence_id', |
209 | 'cxc_content', |
210 | ] ) |
211 | ->from( self::TABLE_NAME ) |
212 | ->where( [ 'cxc_translation_id' => $translationId ] ) |
213 | ->caller( __METHOD__ ) |
214 | ->fetchResultSet(); |
215 | |
216 | $units = []; |
217 | foreach ( $resultSet as $row ) { |
218 | $units[] = $this->createTranslationUnitFromRow( $row ); |
219 | } |
220 | |
221 | return $units; |
222 | } |
223 | |
224 | /** |
225 | * Given a translation id, this method returns an integer, indicating |
226 | * the count of the translated subsections (paragraphs) for that translation. |
227 | * |
228 | * @param int $translationId |
229 | * @return int count of translated subsections |
230 | */ |
231 | public function countTranslatedSubSectionsByTranslationId( int $translationId ): int { |
232 | $dbr = $this->lb->getConnection( DB_REPLICA ); |
233 | |
234 | return (int)$dbr->newSelectQueryBuilder() |
235 | ->select( 'COUNT(DISTINCT cxc_section_id)' ) |
236 | ->from( self::TABLE_NAME ) |
237 | ->where( [ 'cxc_translation_id' => $translationId ] ) |
238 | ->caller( __METHOD__ ) |
239 | ->fetchField(); |
240 | } |
241 | |
242 | /** |
243 | * Saves the translation unit. If the record exists, updates it, otherwise creates it. |
244 | * |
245 | * @param TranslationUnit $translationUnit |
246 | * @param bool $isNewTranslation Whether these are for a brand-new Translation record |
247 | */ |
248 | public function save( TranslationUnit $translationUnit, bool $isNewTranslation ): void { |
249 | $fname = __METHOD__; |
250 | // Update the latest row if there is one instead of making a new one |
251 | $conditions = [ |
252 | 'cxc_translation_id' => $translationUnit->getTranslationId(), |
253 | 'cxc_section_id' => $translationUnit->getSectionId(), |
254 | 'cxc_origin' => $translationUnit->getOrigin() |
255 | ]; |
256 | if ( $isNewTranslation ) { |
257 | // T134245: brand new translations can also insert corpora data in the same |
258 | // request. The doFind() query uses only a subset of a unique cx_corpora index, |
259 | // causing SH gap locks. Worse, is that the leftmost values comes from the |
260 | // auto-incrementing translation_id. This puts gap locks on the range of |
261 | // (MAX(cxc_translation_id),+infinity), which could make the whole API prone |
262 | // to deadlocks and timeouts. Bypass this problem by remembering if the parent |
263 | // translation row is brand new and skipping doFind() in such cases. |
264 | $existing = false; |
265 | } else { |
266 | // Note that the only caller of this method will have already locked the |
267 | // parent Translation row, serializing simultaneous duplicate submissions at |
268 | // this point. Without that row lock, the two transaction might both acquire |
269 | // SH gap locks in doFind() and then deadlock in create() trying to get IX gap |
270 | // locks (if no duplicate rows were found). |
271 | $options = []; |
272 | $dbr = $this->lb->getConnection( DB_REPLICA ); |
273 | $existing = $this->doFind( $dbr, $conditions, $options, $fname ); |
274 | } |
275 | |
276 | if ( $existing ) { |
277 | $dbw = $this->lb->getConnection( DB_PRIMARY ); |
278 | $dbw->doAtomicSection( |
279 | __METHOD__, |
280 | function ( IDatabase $dbw ) use ( $translationUnit, $conditions, $fname ) { |
281 | // Lock the record for updating it. This time we use $dbw - primary db. |
282 | // This is to avoid the unnecessary gap locking with 'for update' query |
283 | // when the record does not exist. |
284 | $options = [ 'FOR UPDATE' ]; |
285 | $existing = $this->doFind( $dbw, $conditions, $options, $fname ); |
286 | $this->updateTranslationUnit( $translationUnit, $existing->getTimestamp() ); |
287 | } |
288 | ); |
289 | } else { |
290 | $this->insertTranslationUnit( $translationUnit ); |
291 | } |
292 | } |
293 | |
294 | private function doFind( IDatabase $db, $conditions, $options, $method ): ?TranslationUnit { |
295 | $row = $db->newSelectQueryBuilder() |
296 | ->select( [ |
297 | 'cxc_translation_id', |
298 | 'cxc_section_id', |
299 | 'cxc_origin', |
300 | 'cxc_timestamp', |
301 | 'cxc_sequence_id', |
302 | 'cxc_content' |
303 | ] ) |
304 | ->from( self::TABLE_NAME ) |
305 | ->where( $conditions ) |
306 | ->orderBy( 'cxc_timestamp', SelectQueryBuilder::SORT_DESC ) |
307 | ->options( $options ) |
308 | ->caller( $method ) |
309 | ->fetchRow(); |
310 | if ( $row ) { |
311 | return $this->createTranslationUnitFromRow( $row ); |
312 | } |
313 | |
314 | return null; |
315 | } |
316 | |
317 | private function createTranslationUnitFromRow( stdClass $row ): TranslationUnit { |
318 | return new TranslationUnit( |
319 | $row->cxc_section_id, |
320 | $row->cxc_origin, |
321 | (int)$row->cxc_sequence_id, // cxc_sequence_id can be null |
322 | (string)$row->cxc_content, // cxc_content can be null |
323 | (int)$row->cxc_translation_id, |
324 | $row->cxc_timestamp |
325 | ); |
326 | } |
327 | } |