Code Coverage
 
Lines
Functions and Methods
Classes and Traits
Total
0.00% covered (danger)
0.00%
0 / 156
0.00% covered (danger)
0.00%
0 / 12
CRAP
0.00% covered (danger)
0.00%
0 / 1
TranslationCorporaStore
0.00% covered (danger)
0.00%
0 / 156
0.00% covered (danger)
0.00%
0 / 12
380
0.00% covered (danger)
0.00%
0 / 1
 __construct
0.00% covered (danger)
0.00%
0 / 2
0.00% covered (danger)
0.00%
0 / 1
2
 updateTranslationUnit
0.00% covered (danger)
0.00%
0 / 20
0.00% covered (danger)
0.00%
0 / 1
6
 insertTranslationUnit
0.00% covered (danger)
0.00%
0 / 14
0.00% covered (danger)
0.00%
0 / 1
2
 deleteTranslationData
0.00% covered (danger)
0.00%
0 / 7
0.00% covered (danger)
0.00%
0 / 1
2
 deleteTranslationDataBySectionId
0.00% covered (danger)
0.00%
0 / 10
0.00% covered (danger)
0.00%
0 / 1
2
 countByTranslationId
0.00% covered (danger)
0.00%
0 / 7
0.00% covered (danger)
0.00%
0 / 1
2
 deleteTranslationDataGently
0.00% covered (danger)
0.00%
0 / 17
0.00% covered (danger)
0.00%
0 / 1
12
 findByTranslationId
0.00% covered (danger)
0.00%
0 / 18
0.00% covered (danger)
0.00%
0 / 1
6
 countTranslatedSubSectionsByTranslationId
0.00% covered (danger)
0.00%
0 / 13
0.00% covered (danger)
0.00%
0 / 1
2
 save
0.00% covered (danger)
0.00%
0 / 22
0.00% covered (danger)
0.00%
0 / 1
12
 doFind
0.00% covered (danger)
0.00%
0 / 18
0.00% covered (danger)
0.00%
0 / 1
6
 createTranslationUnitFromRow
0.00% covered (danger)
0.00%
0 / 8
0.00% covered (danger)
0.00%
0 / 1
2
1<?php
2/**
3 * @copyright See AUTHORS.txt
4 * @license GPL-2.0-or-later
5 */
6declare( strict_types = 1 );
7
8namespace ContentTranslation\Store;
9
10use ContentTranslation\Entity\TranslationUnit;
11use ContentTranslation\LoadBalancer;
12use LogicException;
13use stdClass;
14use Wikimedia\Rdbms\IDatabase;
15use Wikimedia\Rdbms\LBFactory;
16use Wikimedia\Rdbms\Platform\ISQLPlatform;
17use Wikimedia\Rdbms\SelectQueryBuilder;
18
19/**
20 * The TranslationCorporaStore service represents the Data Access Layer for the parallel corpora. More
21 * specifically, this service relies on the database load balancer to interact with the "cx_corpora"
22 * table, to insert, update, delete or fetch data to this table. This class exposes methods that
23 * usually accepts TranslationUnit entity objects as arguments (when it's an insert/update query),
24 * or return TranslationUnit entity objects (for read operations). This service is mostly used inside
25 * TranslationCorporaManager service to interact with the database.
26 */
27class TranslationCorporaStore {
28
29    /** @var LoadBalancer */
30    private $lb;
31
32    /** @var LBFactory */
33    private $lbFactory;
34    public const TABLE_NAME = 'cx_corpora';
35
36    public function __construct( LoadBalancer $lb, LBFactory $lbFactory ) {
37        $this->lb = $lb;
38        $this->lbFactory = $lbFactory;
39    }
40
41    /**
42     * Update a translation unit.
43     *
44     * @param TranslationUnit $translationUnit
45     * @param string $timestamp
46     */
47    private function updateTranslationUnit( TranslationUnit $translationUnit, string $timestamp ): void {
48        $dbw = $this->lb->getConnection( DB_PRIMARY );
49
50        $values = [
51            'cxc_sequence_id' => $translationUnit->getSequenceId(),
52            'cxc_timestamp' => $dbw->timestamp(),
53            'cxc_content' => $translationUnit->getContent()
54        ];
55        $conditions = [
56            'cxc_translation_id' => $translationUnit->getTranslationId(),
57            'cxc_section_id' => $translationUnit->getSectionId(),
58            'cxc_origin' => $translationUnit->getOrigin(),
59            // Sometimes we get "duplicates" entries which differ in timestamp.
60            // Then any updates to those sections would fail (duplicate key for
61            // a unique index), if we did not limit this call to only one of them.
62            'cxc_timestamp' => $dbw->timestamp( $timestamp ),
63        ];
64
65        $dbw->newUpdateQueryBuilder()
66            ->update( self::TABLE_NAME )
67            ->set( $values )
68            ->where( $conditions )
69            ->caller( __METHOD__ )
70            ->execute();
71
72        if ( $dbw->affectedRows() < 1 ) {
73            // Possible reasons:
74            // * concurrent request has already updated the row with new timestamp
75            // * no change (saving same thing twice in the same second)
76            // * translation has been deleted
77            throw new LogicException( 'Failed to update a translation section' );
78        }
79    }
80
81    /**
82     * Insert a translation unit.
83     *
84     * @param TranslationUnit $translationUnit
85     */
86    private function insertTranslationUnit( TranslationUnit $translationUnit ): void {
87        $dbw = $this->lb->getConnection( DB_PRIMARY );
88
89        $values = [
90            'cxc_translation_id' => $translationUnit->getTranslationId(),
91            'cxc_section_id' => $translationUnit->getSectionId(),
92            'cxc_origin' => $translationUnit->getOrigin(),
93            'cxc_sequence_id' => $translationUnit->getSequenceId(),
94            'cxc_timestamp' => $dbw->timestamp(),
95            'cxc_content' => $translationUnit->getContent()
96        ];
97
98        $dbw->newInsertQueryBuilder()
99            ->insertInto( self::TABLE_NAME )
100            ->row( $values )
101            ->caller( __METHOD__ )
102            ->execute();
103    }
104
105    /**
106     * Delete translation units and categories associated with the given translation identifier.
107     *
108     * @param int|int[] $translationId
109     */
110    public function deleteTranslationData( $translationId ): void {
111        $dbw = $this->lb->getConnection( DB_PRIMARY );
112
113        $conditions = [ 'cxc_translation_id' => $translationId ];
114
115        $dbw->newDeleteQueryBuilder()
116            ->deleteFrom( self::TABLE_NAME )
117            ->where( $conditions )
118            ->caller( __METHOD__ )
119            ->execute();
120    }
121
122    /**
123     * Given the "parent" translation id and the base section id (in the "${revision}_${sectionNumber}"
124     * form), this method deletes all the translation units that belong to that section translation,
125     * from the "cx_corpora" table.
126     *
127     * NOTE: The "cxc_section_id" field inside "cx_corpora" table is in the following form for
128     * section translation parallel corpora units: "${baseSectionId}_${subSectionId}", where
129     * "subSectionId" is given by the cxserver as the section HTML element id (e.g. "cxSourceSection4").
130     * This is why we use a "LIKE" query in the following form, here: "${baseSectionId}%"
131     *
132     * @param int $translationId the id of the "parent" translation inside "cx_translations" table
133     * @param string $baseSectionId the "cxsx_section_id" as stored inside "cx_section_translations" table
134     * @return void
135     */
136    public function deleteTranslationDataBySectionId( int $translationId, string $baseSectionId ): void {
137        $dbw = $this->lb->getConnection( DB_PRIMARY );
138        $conditions = [
139            'cxc_translation_id' => $translationId,
140            'cxc_section_id' . $dbw->buildLike( $baseSectionId, '_', $dbw->anyString() )
141        ];
142
143        $dbw->newDeleteQueryBuilder()
144            ->deleteFrom( self::TABLE_NAME )
145            ->where( $conditions )
146            ->caller( __METHOD__ )
147            ->execute();
148    }
149
150    /**
151     * Given a translation id, this method returns the count of the parallel corpora
152     * translation units, associated with this translation id.
153     *
154     * @param int $translationId the id of the translation inside "cx_translations" table
155     * @return int
156     */
157    public function countByTranslationId( int $translationId ): int {
158        $dbr = $this->lb->getConnection( DB_REPLICA );
159
160        return $dbr->newSelectQueryBuilder()
161            ->select( ISQLPlatform::ALL_ROWS )
162            ->from( self::TABLE_NAME )
163            ->where( [ 'cxc_translation_id' => $translationId ] )
164            ->caller( __METHOD__ )
165            ->fetchRowCount();
166    }
167
168    /**
169     * Delete translation units and categories associated with the given translation identifier
170     * in a manner that avoids creating excessive database lag.
171     *
172     * @param int|int[] $ids
173     * @param int $batchSize
174     */
175    public function deleteTranslationDataGently( $ids, int $batchSize = 1000 ): void {
176        $dbw = $this->lb->getConnection( DB_PRIMARY );
177
178        while ( true ) {
179            $rowsToDelete = $dbw->newSelectQueryBuilder()
180                ->select( 'cxc_id' )
181                ->from( self::TABLE_NAME )
182                ->where( [ 'cxc_translation_id' => $ids ] )
183                ->limit( $batchSize )
184                ->caller( __METHOD__ )
185                ->fetchFieldValues();
186
187            if ( !$rowsToDelete ) {
188                break;
189            }
190
191            $dbw->newDeleteQueryBuilder()
192                ->deleteFrom( self::TABLE_NAME )
193                ->where( [ 'cxc_id' => $rowsToDelete ] )
194                ->caller( __METHOD__ )
195                ->execute();
196            $this->lbFactory->waitForReplication();
197        }
198    }
199
200    /**
201     * @param int $translationId
202     * @return TranslationUnit[]
203     */
204    public function findByTranslationId( int $translationId ): array {
205        $dbr = $this->lb->getConnection( DB_REPLICA );
206
207        $resultSet = $dbr->newSelectQueryBuilder()
208            ->select( [
209                'cxc_translation_id',
210                'cxc_origin',
211                'cxc_section_id',
212                'cxc_timestamp',
213                'cxc_sequence_id',
214                'cxc_content',
215            ] )
216            ->from( self::TABLE_NAME )
217            ->where( [ 'cxc_translation_id' => $translationId ] )
218            ->caller( __METHOD__ )
219            ->fetchResultSet();
220
221        $units = [];
222        foreach ( $resultSet as $row ) {
223            $units[] = $this->createTranslationUnitFromRow( $row );
224        }
225
226        return $units;
227    }
228
229    /**
230     * Given a translation id, this method returns an integer, indicating
231     * the count of the translated subsections (paragraphs) for that translation.
232     *
233     * @param int $translationId
234     * @return int count of translated subsections
235     */
236    public function countTranslatedSubSectionsByTranslationId( int $translationId ): int {
237        $dbr = $this->lb->getConnection( DB_REPLICA );
238
239        $row = $dbr->newSelectQueryBuilder()
240            ->select( [
241                'cxc_translation_id',
242                'cxc_section_id',
243                'count' => 'COUNT(DISTINCT cxc_section_id)',
244            ] )
245            ->from( self::TABLE_NAME )
246            ->where( [ 'cxc_translation_id' => $translationId ] )
247            ->groupBy( [ 'cxc_translation_id' ] )
248            ->caller( __METHOD__ )
249            ->fetchRow();
250
251        return (int)$row->count;
252    }
253
254    /**
255     * Saves the translation unit. If the record exists, updates it, otherwise creates it.
256     *
257     * @param TranslationUnit $translationUnit
258     * @param bool $isNewTranslation Whether these are for a brand-new Translation record
259     */
260    public function save( TranslationUnit $translationUnit, bool $isNewTranslation ): void {
261        $fname = __METHOD__;
262        // Update the latest row if there is one instead of making a new one
263        $conditions = [
264            'cxc_translation_id' => $translationUnit->getTranslationId(),
265            'cxc_section_id' => $translationUnit->getSectionId(),
266            'cxc_origin' => $translationUnit->getOrigin()
267        ];
268        if ( $isNewTranslation ) {
269            // T134245: brand new translations can also insert corpora data in the same
270            // request. The doFind() query uses only a subset of a unique cx_corpora index,
271            // causing SH gap locks. Worse, is that the leftmost values comes from the
272            // auto-incrementing translation_id. This puts gap locks on the range of
273            // (MAX(cxc_translation_id),+infinity), which could make the whole API prone
274            // to deadlocks and timeouts. Bypass this problem by remembering if the parent
275            // translation row is brand new and skipping doFind() in such cases.
276            $existing = false;
277        } else {
278            // Note that the only caller of this method will have already locked the
279            // parent Translation row, serializing simultaneous duplicate submissions at
280            // this point. Without that row lock, the two transaction might both acquire
281            // SH gap locks in doFind() and then deadlock in create() trying to get IX gap
282            // locks (if no duplicate rows were found).
283            $options = [];
284            $dbr = $this->lb->getConnection( DB_REPLICA );
285            $existing = $this->doFind( $dbr, $conditions, $options, $fname );
286        }
287
288        if ( $existing ) {
289            $dbw = $this->lb->getConnection( DB_PRIMARY );
290            $dbw->doAtomicSection(
291                __METHOD__,
292                function ( IDatabase $dbw ) use ( $translationUnit, $conditions, $fname ) {
293                    // Lock the record for updating it. This time we use $dbw - primary db.
294                    // This is to avoid the unnecessary gap locking with 'for update' query
295                    // when the record does not exist.
296                    $options = [ 'FOR UPDATE' ];
297                    $existing = $this->doFind( $dbw, $conditions, $options, $fname );
298                    $this->updateTranslationUnit( $translationUnit, $existing->getTimestamp() );
299                }
300            );
301        } else {
302            $this->insertTranslationUnit( $translationUnit );
303        }
304    }
305
306    private function doFind( IDatabase $db, $conditions, $options, $method ): ?TranslationUnit {
307        $row = $db->newSelectQueryBuilder()
308            ->select( [
309                'cxc_translation_id',
310                'cxc_section_id',
311                'cxc_origin',
312                'cxc_timestamp',
313                'cxc_sequence_id',
314                'cxc_content'
315            ] )
316            ->from( self::TABLE_NAME )
317            ->where( $conditions )
318            ->orderBy( 'cxc_timestamp', SelectQueryBuilder::SORT_DESC )
319            ->options( $options )
320            ->caller( $method )
321            ->fetchRow();
322        if ( $row ) {
323            return $this->createTranslationUnitFromRow( $row );
324        }
325
326        return null;
327    }
328
329    private function createTranslationUnitFromRow( stdClass $row ): TranslationUnit {
330        return new TranslationUnit(
331            $row->cxc_section_id,
332            $row->cxc_origin,
333            (int)$row->cxc_sequence_id, // cxc_sequence_id can be null
334            (string)$row->cxc_content, // cxc_content can be null
335            (int)$row->cxc_translation_id,
336            $row->cxc_timestamp
337        );
338    }
339}