Code Coverage
 
Lines
Functions and Methods
Classes and Traits
Total
0.00% covered (danger)
0.00%
0 / 146
0.00% covered (danger)
0.00%
0 / 12
CRAP
0.00% covered (danger)
0.00%
0 / 1
TranslationCorporaStore
0.00% covered (danger)
0.00%
0 / 146
0.00% covered (danger)
0.00%
0 / 12
380
0.00% covered (danger)
0.00%
0 / 1
 __construct
0.00% covered (danger)
0.00%
0 / 2
0.00% covered (danger)
0.00%
0 / 1
2
 updateTranslationUnit
0.00% covered (danger)
0.00%
0 / 18
0.00% covered (danger)
0.00%
0 / 1
6
 insertTranslationUnit
0.00% covered (danger)
0.00%
0 / 13
0.00% covered (danger)
0.00%
0 / 1
2
 deleteTranslationData
0.00% covered (danger)
0.00%
0 / 6
0.00% covered (danger)
0.00%
0 / 1
2
 deleteTranslationDataBySectionId
0.00% covered (danger)
0.00%
0 / 10
0.00% covered (danger)
0.00%
0 / 1
2
 countByTranslationId
0.00% covered (danger)
0.00%
0 / 7
0.00% covered (danger)
0.00%
0 / 1
2
 deleteTranslationDataGently
0.00% covered (danger)
0.00%
0 / 17
0.00% covered (danger)
0.00%
0 / 1
12
 findByTranslationId
0.00% covered (danger)
0.00%
0 / 18
0.00% covered (danger)
0.00%
0 / 1
6
 countTranslatedSubSectionsByTranslationId
0.00% covered (danger)
0.00%
0 / 7
0.00% covered (danger)
0.00%
0 / 1
2
 save
0.00% covered (danger)
0.00%
0 / 22
0.00% covered (danger)
0.00%
0 / 1
12
 doFind
0.00% covered (danger)
0.00%
0 / 18
0.00% covered (danger)
0.00%
0 / 1
6
 createTranslationUnitFromRow
0.00% covered (danger)
0.00%
0 / 8
0.00% covered (danger)
0.00%
0 / 1
2
1<?php
2/**
3 * @copyright See AUTHORS.txt
4 * @license GPL-2.0-or-later
5 */
6declare( strict_types = 1 );
7
8namespace ContentTranslation\Store;
9
10use ContentTranslation\Entity\TranslationUnit;
11use ContentTranslation\LoadBalancer;
12use LogicException;
13use stdClass;
14use Wikimedia\Rdbms\IDatabase;
15use Wikimedia\Rdbms\IExpression;
16use Wikimedia\Rdbms\LBFactory;
17use Wikimedia\Rdbms\LikeValue;
18use Wikimedia\Rdbms\Platform\ISQLPlatform;
19use Wikimedia\Rdbms\SelectQueryBuilder;
20
21/**
22 * The TranslationCorporaStore service represents the Data Access Layer for the parallel corpora. More
23 * specifically, this service relies on the database load balancer to interact with the "cx_corpora"
24 * table, to insert, update, delete or fetch data to this table. This class exposes methods that
25 * usually accepts TranslationUnit entity objects as arguments (when it's an insert/update query),
26 * or return TranslationUnit entity objects (for read operations). This service is mostly used inside
27 * TranslationCorporaManager service to interact with the database.
28 */
29class TranslationCorporaStore {
30
31    /** @var LoadBalancer */
32    private $lb;
33
34    /** @var LBFactory */
35    private $lbFactory;
36    public const TABLE_NAME = 'cx_corpora';
37
38    public function __construct( LoadBalancer $lb, LBFactory $lbFactory ) {
39        $this->lb = $lb;
40        $this->lbFactory = $lbFactory;
41    }
42
43    /**
44     * Update a translation unit.
45     *
46     * @param TranslationUnit $translationUnit
47     * @param string $timestamp
48     */
49    private function updateTranslationUnit( TranslationUnit $translationUnit, string $timestamp ): void {
50        $dbw = $this->lb->getConnection( DB_PRIMARY );
51
52        $dbw->newUpdateQueryBuilder()
53            ->update( self::TABLE_NAME )
54            ->set( [
55                'cxc_sequence_id' => $translationUnit->getSequenceId(),
56                'cxc_timestamp' => $dbw->timestamp(),
57                'cxc_content' => $translationUnit->getContent()
58            ] )
59            ->where( [
60                'cxc_translation_id' => $translationUnit->getTranslationId(),
61                'cxc_section_id' => $translationUnit->getSectionId(),
62                'cxc_origin' => $translationUnit->getOrigin(),
63                // Sometimes we get "duplicates" entries which differ in timestamp.
64                // Then any updates to those sections would fail (duplicate key for
65                // a unique index), if we did not limit this call to only one of them.
66                'cxc_timestamp' => $dbw->timestamp( $timestamp ),
67            ] )
68            ->caller( __METHOD__ )
69            ->execute();
70
71        if ( $dbw->affectedRows() < 1 ) {
72            // Possible reasons:
73            // * concurrent request has already updated the row with new timestamp
74            // * no change (saving same thing twice in the same second)
75            // * translation has been deleted
76            throw new LogicException( 'Failed to update a translation section' );
77        }
78    }
79
80    /**
81     * Insert a translation unit.
82     *
83     * @param TranslationUnit $translationUnit
84     */
85    private function insertTranslationUnit( TranslationUnit $translationUnit ): void {
86        $dbw = $this->lb->getConnection( DB_PRIMARY );
87
88        $dbw->newInsertQueryBuilder()
89            ->insertInto( self::TABLE_NAME )
90            ->row( [
91                'cxc_translation_id' => $translationUnit->getTranslationId(),
92                'cxc_section_id' => $translationUnit->getSectionId(),
93                'cxc_origin' => $translationUnit->getOrigin(),
94                'cxc_sequence_id' => $translationUnit->getSequenceId(),
95                'cxc_timestamp' => $dbw->timestamp(),
96                'cxc_content' => $translationUnit->getContent()
97            ] )
98            ->caller( __METHOD__ )
99            ->execute();
100    }
101
102    /**
103     * Delete translation units and categories associated with the given translation identifier.
104     *
105     * @param int|int[] $translationId
106     */
107    public function deleteTranslationData( $translationId ): void {
108        $dbw = $this->lb->getConnection( DB_PRIMARY );
109
110        $dbw->newDeleteQueryBuilder()
111            ->deleteFrom( self::TABLE_NAME )
112            ->where( [ 'cxc_translation_id' => $translationId ] )
113            ->caller( __METHOD__ )
114            ->execute();
115    }
116
117    /**
118     * Given the "parent" translation id and the base section id (in the "${revision}_${sectionNumber}"
119     * form), this method deletes all the translation units that belong to that section translation,
120     * from the "cx_corpora" table.
121     *
122     * NOTE: The "cxc_section_id" field inside "cx_corpora" table is in the following form for
123     * section translation parallel corpora units: "${baseSectionId}_${subSectionId}", where
124     * "subSectionId" is given by the cxserver as the section HTML element id (e.g. "cxSourceSection4").
125     * This is why we use a "LIKE" query in the following form, here: "${baseSectionId}_%"
126     *
127     * @param int $translationId the id of the "parent" translation inside "cx_translations" table
128     * @param string $baseSectionId the "cxsx_section_id" as stored inside "cx_section_translations" table
129     * @return void
130     */
131    public function deleteTranslationDataBySectionId( int $translationId, string $baseSectionId ): void {
132        $dbw = $this->lb->getConnection( DB_PRIMARY );
133
134        $dbw->newDeleteQueryBuilder()
135            ->deleteFrom( self::TABLE_NAME )
136            ->where( [
137                'cxc_translation_id' => $translationId,
138                $dbw->expr( 'cxc_section_id', IExpression::LIKE,
139                    new LikeValue( $baseSectionId, '_', $dbw->anyString() ) ),
140            ] )
141            ->caller( __METHOD__ )
142            ->execute();
143    }
144
145    /**
146     * Given a translation id, this method returns the count of the parallel corpora
147     * translation units, associated with this translation id.
148     *
149     * @param int $translationId the id of the translation inside "cx_translations" table
150     * @return int
151     */
152    public function countByTranslationId( int $translationId ): int {
153        $dbr = $this->lb->getConnection( DB_REPLICA );
154
155        return $dbr->newSelectQueryBuilder()
156            ->select( ISQLPlatform::ALL_ROWS )
157            ->from( self::TABLE_NAME )
158            ->where( [ 'cxc_translation_id' => $translationId ] )
159            ->caller( __METHOD__ )
160            ->fetchRowCount();
161    }
162
163    /**
164     * Delete translation units and categories associated with the given translation identifier
165     * in a manner that avoids creating excessive database lag.
166     *
167     * @param int|int[] $ids
168     * @param int $batchSize
169     */
170    public function deleteTranslationDataGently( $ids, int $batchSize = 1000 ): void {
171        $dbw = $this->lb->getConnection( DB_PRIMARY );
172
173        while ( true ) {
174            $rowsToDelete = $dbw->newSelectQueryBuilder()
175                ->select( 'cxc_id' )
176                ->from( self::TABLE_NAME )
177                ->where( [ 'cxc_translation_id' => $ids ] )
178                ->limit( $batchSize )
179                ->caller( __METHOD__ )
180                ->fetchFieldValues();
181
182            if ( !$rowsToDelete ) {
183                break;
184            }
185
186            $dbw->newDeleteQueryBuilder()
187                ->deleteFrom( self::TABLE_NAME )
188                ->where( [ 'cxc_id' => $rowsToDelete ] )
189                ->caller( __METHOD__ )
190                ->execute();
191            $this->lbFactory->waitForReplication();
192        }
193    }
194
195    /**
196     * @param int $translationId
197     * @return TranslationUnit[]
198     */
199    public function findByTranslationId( int $translationId ): array {
200        $dbr = $this->lb->getConnection( DB_REPLICA );
201
202        $resultSet = $dbr->newSelectQueryBuilder()
203            ->select( [
204                'cxc_translation_id',
205                'cxc_origin',
206                'cxc_section_id',
207                'cxc_timestamp',
208                'cxc_sequence_id',
209                'cxc_content',
210            ] )
211            ->from( self::TABLE_NAME )
212            ->where( [ 'cxc_translation_id' => $translationId ] )
213            ->caller( __METHOD__ )
214            ->fetchResultSet();
215
216        $units = [];
217        foreach ( $resultSet as $row ) {
218            $units[] = $this->createTranslationUnitFromRow( $row );
219        }
220
221        return $units;
222    }
223
224    /**
225     * Given a translation id, this method returns an integer, indicating
226     * the count of the translated subsections (paragraphs) for that translation.
227     *
228     * @param int $translationId
229     * @return int count of translated subsections
230     */
231    public function countTranslatedSubSectionsByTranslationId( int $translationId ): int {
232        $dbr = $this->lb->getConnection( DB_REPLICA );
233
234        return (int)$dbr->newSelectQueryBuilder()
235            ->select( 'COUNT(DISTINCT cxc_section_id)' )
236            ->from( self::TABLE_NAME )
237            ->where( [ 'cxc_translation_id' => $translationId ] )
238            ->caller( __METHOD__ )
239            ->fetchField();
240    }
241
242    /**
243     * Saves the translation unit. If the record exists, updates it, otherwise creates it.
244     *
245     * @param TranslationUnit $translationUnit
246     * @param bool $isNewTranslation Whether these are for a brand-new Translation record
247     */
248    public function save( TranslationUnit $translationUnit, bool $isNewTranslation ): void {
249        $fname = __METHOD__;
250        // Update the latest row if there is one instead of making a new one
251        $conditions = [
252            'cxc_translation_id' => $translationUnit->getTranslationId(),
253            'cxc_section_id' => $translationUnit->getSectionId(),
254            'cxc_origin' => $translationUnit->getOrigin()
255        ];
256        if ( $isNewTranslation ) {
257            // T134245: brand new translations can also insert corpora data in the same
258            // request. The doFind() query uses only a subset of a unique cx_corpora index,
259            // causing SH gap locks. Worse, is that the leftmost values comes from the
260            // auto-incrementing translation_id. This puts gap locks on the range of
261            // (MAX(cxc_translation_id),+infinity), which could make the whole API prone
262            // to deadlocks and timeouts. Bypass this problem by remembering if the parent
263            // translation row is brand new and skipping doFind() in such cases.
264            $existing = false;
265        } else {
266            // Note that the only caller of this method will have already locked the
267            // parent Translation row, serializing simultaneous duplicate submissions at
268            // this point. Without that row lock, the two transaction might both acquire
269            // SH gap locks in doFind() and then deadlock in create() trying to get IX gap
270            // locks (if no duplicate rows were found).
271            $options = [];
272            $dbr = $this->lb->getConnection( DB_REPLICA );
273            $existing = $this->doFind( $dbr, $conditions, $options, $fname );
274        }
275
276        if ( $existing ) {
277            $dbw = $this->lb->getConnection( DB_PRIMARY );
278            $dbw->doAtomicSection(
279                __METHOD__,
280                function ( IDatabase $dbw ) use ( $translationUnit, $conditions, $fname ) {
281                    // Lock the record for updating it. This time we use $dbw - primary db.
282                    // This is to avoid the unnecessary gap locking with 'for update' query
283                    // when the record does not exist.
284                    $options = [ 'FOR UPDATE' ];
285                    $existing = $this->doFind( $dbw, $conditions, $options, $fname );
286                    $this->updateTranslationUnit( $translationUnit, $existing->getTimestamp() );
287                }
288            );
289        } else {
290            $this->insertTranslationUnit( $translationUnit );
291        }
292    }
293
294    private function doFind( IDatabase $db, $conditions, $options, $method ): ?TranslationUnit {
295        $row = $db->newSelectQueryBuilder()
296            ->select( [
297                'cxc_translation_id',
298                'cxc_section_id',
299                'cxc_origin',
300                'cxc_timestamp',
301                'cxc_sequence_id',
302                'cxc_content'
303            ] )
304            ->from( self::TABLE_NAME )
305            ->where( $conditions )
306            ->orderBy( 'cxc_timestamp', SelectQueryBuilder::SORT_DESC )
307            ->options( $options )
308            ->caller( $method )
309            ->fetchRow();
310        if ( $row ) {
311            return $this->createTranslationUnitFromRow( $row );
312        }
313
314        return null;
315    }
316
317    private function createTranslationUnitFromRow( stdClass $row ): TranslationUnit {
318        return new TranslationUnit(
319            $row->cxc_section_id,
320            $row->cxc_origin,
321            (int)$row->cxc_sequence_id, // cxc_sequence_id can be null
322            (string)$row->cxc_content, // cxc_content can be null
323            (int)$row->cxc_translation_id,
324            $row->cxc_timestamp
325        );
326    }
327}