Code Coverage
 
Lines
Functions and Methods
Classes and Traits
Total
0.00% covered (danger)
0.00%
0 / 153
0.00% covered (danger)
0.00%
0 / 12
CRAP
0.00% covered (danger)
0.00%
0 / 1
TranslationCorporaStore
0.00% covered (danger)
0.00%
0 / 153
0.00% covered (danger)
0.00%
0 / 12
420
0.00% covered (danger)
0.00%
0 / 1
 __construct
0.00% covered (danger)
0.00%
0 / 1
0.00% covered (danger)
0.00%
0 / 1
2
 updateTranslationUnit
0.00% covered (danger)
0.00%
0 / 18
0.00% covered (danger)
0.00%
0 / 1
6
 insertTranslationUnit
0.00% covered (danger)
0.00%
0 / 15
0.00% covered (danger)
0.00%
0 / 1
2
 deleteTranslationData
0.00% covered (danger)
0.00%
0 / 6
0.00% covered (danger)
0.00%
0 / 1
2
 deleteTranslationDataBySectionId
0.00% covered (danger)
0.00%
0 / 10
0.00% covered (danger)
0.00%
0 / 1
2
 countByTranslationId
0.00% covered (danger)
0.00%
0 / 7
0.00% covered (danger)
0.00%
0 / 1
2
 deleteTranslationDataGently
0.00% covered (danger)
0.00%
0 / 16
0.00% covered (danger)
0.00%
0 / 1
12
 findByTranslationId
0.00% covered (danger)
0.00%
0 / 18
0.00% covered (danger)
0.00%
0 / 1
6
 countTranslatedSubSectionsByTranslationId
0.00% covered (danger)
0.00%
0 / 7
0.00% covered (danger)
0.00%
0 / 1
2
 save
0.00% covered (danger)
0.00%
0 / 29
0.00% covered (danger)
0.00%
0 / 1
20
 doFind
0.00% covered (danger)
0.00%
0 / 18
0.00% covered (danger)
0.00%
0 / 1
6
 createTranslationUnitFromRow
0.00% covered (danger)
0.00%
0 / 8
0.00% covered (danger)
0.00%
0 / 1
2
1<?php
2/**
3 * @copyright See AUTHORS.txt
4 * @license GPL-2.0-or-later
5 */
6declare( strict_types = 1 );
7
8namespace ContentTranslation\Store;
9
10use ContentTranslation\Entity\TranslationUnit;
11use ContentTranslation\Service\ContentCompressionService;
12use LogicException;
13use Psr\Log\LoggerInterface;
14use stdClass;
15use Wikimedia\Rdbms\IConnectionProvider;
16use Wikimedia\Rdbms\IDatabase;
17use Wikimedia\Rdbms\IExpression;
18use Wikimedia\Rdbms\IReadableDatabase;
19use Wikimedia\Rdbms\LikeValue;
20use Wikimedia\Rdbms\Platform\ISQLPlatform;
21use Wikimedia\Rdbms\SelectQueryBuilder;
22
23/**
24 * The TranslationCorporaStore service represents the Data Access Layer for the parallel corpora. More
25 * specifically, this service relies on the database load balancer to interact with the "cx_corpora"
26 * table, to insert, update, delete or fetch data to this table. This class exposes methods that
27 * usually accepts TranslationUnit entity objects as arguments (when it's an insert/update query),
28 * or return TranslationUnit entity objects (for read operations). This service is mostly used inside
29 * TranslationCorporaManager service to interact with the database.
30 */
31class TranslationCorporaStore {
32    public const TABLE_NAME = 'cx_corpora';
33
34    public function __construct(
35        private readonly IConnectionProvider $connectionProvider,
36        private readonly LoggerInterface $logger,
37        private readonly ContentCompressionService $compressionService
38    ) {
39    }
40
41    /**
42     * Update a translation unit.
43     *
44     * @param TranslationUnit $translationUnit
45     * @param string $timestamp
46     */
47    private function updateTranslationUnit( TranslationUnit $translationUnit, string $timestamp ): void {
48        $dbw = $this->connectionProvider->getPrimaryDatabase();
49
50        $dbw->newUpdateQueryBuilder()
51            ->update( self::TABLE_NAME )
52            ->set( [
53                'cxc_sequence_id' => $translationUnit->getSequenceId(),
54                'cxc_timestamp' => $dbw->timestamp(),
55                'cxc_content' => $this->compressionService->compress( $translationUnit->getContent() )
56            ] )
57            ->where( [
58                'cxc_translation_id' => $translationUnit->getTranslationId(),
59                'cxc_section_id' => $translationUnit->getSectionId(),
60                'cxc_origin' => $translationUnit->getOrigin(),
61                // Sometimes we get "duplicates" entries which differ in timestamp.
62                // Then any updates to those sections would fail (duplicate key for
63                // a unique index), if we did not limit this call to only one of them.
64                'cxc_timestamp' => $dbw->timestamp( $timestamp ),
65            ] )
66            ->caller( __METHOD__ )
67            ->execute();
68
69        if ( $dbw->affectedRows() < 1 ) {
70            // Possible reasons:
71            // * concurrent request has already updated the row with new timestamp
72            // * no change (saving same thing twice in the same second)
73            // * translation has been deleted
74            throw new LogicException( 'Failed to update a translation section' );
75        }
76    }
77
78    /**
79     * Insert a translation unit if it doesn't already exist.
80     *
81     * @param TranslationUnit $translationUnit
82     * @return int Number of rows inserted
83     */
84    private function insertTranslationUnit( TranslationUnit $translationUnit ): int {
85        $dbw = $this->connectionProvider->getPrimaryDatabase();
86
87        $dbw->newInsertQueryBuilder()
88            ->insertInto( self::TABLE_NAME )
89            ->row( [
90                'cxc_translation_id' => $translationUnit->getTranslationId(),
91                'cxc_section_id' => $translationUnit->getSectionId(),
92                'cxc_origin' => $translationUnit->getOrigin(),
93                'cxc_sequence_id' => $translationUnit->getSequenceId(),
94                'cxc_timestamp' => $dbw->timestamp(),
95                'cxc_content' => $this->compressionService->compress( $translationUnit->getContent() )
96            ] )
97            ->caller( __METHOD__ )
98            ->ignore()
99            ->execute();
100
101        return $dbw->affectedRows();
102    }
103
104    /**
105     * Delete translation units and categories associated with the given translation identifier.
106     *
107     * @param int|int[] $translationId
108     */
109    public function deleteTranslationData( $translationId ): void {
110        $dbw = $this->connectionProvider->getPrimaryDatabase();
111
112        $dbw->newDeleteQueryBuilder()
113            ->deleteFrom( self::TABLE_NAME )
114            ->where( [ 'cxc_translation_id' => $translationId ] )
115            ->caller( __METHOD__ )
116            ->execute();
117    }
118
119    /**
120     * Given the "parent" translation id and the base section id (in the "${revision}_${sectionNumber}"
121     * form), this method deletes all the translation units that belong to that section translation,
122     * from the "cx_corpora" table.
123     *
124     * NOTE: The "cxc_section_id" field inside "cx_corpora" table is in the following form for
125     * section translation parallel corpora units: "${baseSectionId}_${subSectionId}", where
126     * "subSectionId" is given by the cxserver as the section HTML element id (e.g. "cxSourceSection4").
127     * This is why we use a "LIKE" query in the following form, here: "${baseSectionId}_%"
128     *
129     * @param int $translationId the id of the "parent" translation inside "cx_translations" table
130     * @param string $baseSectionId the "cxsx_section_id" as stored inside "cx_section_translations" table
131     * @return void
132     */
133    public function deleteTranslationDataBySectionId( int $translationId, string $baseSectionId ): void {
134        $dbw = $this->connectionProvider->getPrimaryDatabase();
135
136        $dbw->newDeleteQueryBuilder()
137            ->deleteFrom( self::TABLE_NAME )
138            ->where( [
139                'cxc_translation_id' => $translationId,
140                $dbw->expr( 'cxc_section_id', IExpression::LIKE,
141                    new LikeValue( $baseSectionId, '_', $dbw->anyString() ) ),
142            ] )
143            ->caller( __METHOD__ )
144            ->execute();
145    }
146
147    /**
148     * Given a translation id, this method returns the count of the parallel corpora
149     * translation units, associated with this translation id.
150     *
151     * @param int $translationId the id of the translation inside "cx_translations" table
152     * @return int
153     */
154    public function countByTranslationId( int $translationId ): int {
155        $dbr = $this->connectionProvider->getReplicaDatabase();
156
157        return $dbr->newSelectQueryBuilder()
158            ->select( ISQLPlatform::ALL_ROWS )
159            ->from( self::TABLE_NAME )
160            ->where( [ 'cxc_translation_id' => $translationId ] )
161            ->caller( __METHOD__ )
162            ->fetchRowCount();
163    }
164
165    /**
166     * Delete translation units and categories associated with the given translation identifier
167     * in a manner that avoids creating excessive database lag.
168     *
169     * @param int|int[] $ids
170     * @param int $batchSize
171     */
172    public function deleteTranslationDataGently( $ids, int $batchSize = 1000 ): void {
173        $dbw = $this->connectionProvider->getPrimaryDatabase();
174
175        while ( true ) {
176            $rowsToDelete = $dbw->newSelectQueryBuilder()
177                ->select( 'cxc_id' )
178                ->from( self::TABLE_NAME )
179                ->where( [ 'cxc_translation_id' => $ids ] )
180                ->limit( $batchSize )
181                ->caller( __METHOD__ )
182                ->fetchFieldValues();
183
184            if ( !$rowsToDelete ) {
185                break;
186            }
187
188            $dbw->newDeleteQueryBuilder()
189                ->deleteFrom( self::TABLE_NAME )
190                ->where( [ 'cxc_id' => $rowsToDelete ] )
191                ->caller( __METHOD__ )
192                ->execute();
193        }
194    }
195
196    /**
197     * @param int $translationId
198     * @return TranslationUnit[]
199     */
200    public function findByTranslationId( int $translationId ): array {
201        $dbr = $this->connectionProvider->getReplicaDatabase();
202
203        $resultSet = $dbr->newSelectQueryBuilder()
204            ->select( [
205                'cxc_translation_id',
206                'cxc_origin',
207                'cxc_section_id',
208                'cxc_timestamp',
209                'cxc_sequence_id',
210                'cxc_content',
211            ] )
212            ->from( self::TABLE_NAME )
213            ->where( [ 'cxc_translation_id' => $translationId ] )
214            ->caller( __METHOD__ )
215            ->fetchResultSet();
216
217        $units = [];
218        foreach ( $resultSet as $row ) {
219            $units[] = $this->createTranslationUnitFromRow( $row );
220        }
221
222        return $units;
223    }
224
225    /**
226     * Given a translation id, this method returns an integer, indicating
227     * the count of the translated subsections (paragraphs) for that translation.
228     *
229     * @param int $translationId
230     * @return int count of translated subsections
231     */
232    public function countTranslatedSubSectionsByTranslationId( int $translationId ): int {
233        $dbr = $this->connectionProvider->getReplicaDatabase();
234
235        return (int)$dbr->newSelectQueryBuilder()
236            ->select( 'COUNT(DISTINCT cxc_section_id)' )
237            ->from( self::TABLE_NAME )
238            ->where( [ 'cxc_translation_id' => $translationId ] )
239            ->caller( __METHOD__ )
240            ->fetchField();
241    }
242
243    /**
244     * Saves the translation unit. If the record exists, updates it, otherwise creates it.
245     *
246     * @param TranslationUnit $translationUnit
247     * @param bool $isNewTranslation Whether these are for a brand-new Translation record
248     */
249    public function save( TranslationUnit $translationUnit, bool $isNewTranslation ): void {
250        $fname = __METHOD__;
251        // Update the latest row if there is one instead of making a new one
252        $conditions = [
253            'cxc_translation_id' => $translationUnit->getTranslationId(),
254            'cxc_section_id' => $translationUnit->getSectionId(),
255            'cxc_origin' => $translationUnit->getOrigin()
256        ];
257        if ( $isNewTranslation ) {
258            // T134245: brand new translations can also insert corpora data in the same
259            // request. The doFind() query uses only a subset of a unique cx_corpora index,
260            // causing SH gap locks. Worse, is that the leftmost values comes from the
261            // auto-incrementing translation_id. This puts gap locks on the range of
262            // (MAX(cxc_translation_id),+infinity), which could make the whole API prone
263            // to deadlocks and timeouts. Bypass this problem by remembering if the parent
264            // translation row is brand new and skipping doFind() in such cases.
265            $existing = false;
266        } else {
267            // Note that the only caller of this method will have already locked the
268            // parent Translation row, serializing simultaneous duplicate submissions at
269            // this point. Without that row lock, the two transaction might both acquire
270            // SH gap locks in doFind() and then deadlock in create() trying to get IX gap
271            // locks (if no duplicate rows were found).
272            $options = [];
273            $dbr = $this->connectionProvider->getReplicaDatabase();
274            $existing = $this->doFind( $dbr, $conditions, $options, $fname );
275        }
276
277        if ( $existing ) {
278            $dbw = $this->connectionProvider->getPrimaryDatabase();
279            $dbw->doAtomicSection(
280                __METHOD__,
281                function ( IDatabase $dbw ) use ( $translationUnit, $conditions, $fname ) {
282                    // Lock the record for updating it. This time we use $dbw - primary db.
283                    // This is to avoid the unnecessary gap locking with 'for update' query
284                    // when the record does not exist.
285                    $options = [ 'FOR UPDATE' ];
286                    $existing = $this->doFind( $dbw, $conditions, $options, $fname );
287                    $this->updateTranslationUnit( $translationUnit, $existing->getTimestamp() );
288                }
289            );
290        } else {
291            $logParams = $conditions + [
292                'contentHash' => crc32( $translationUnit->getContent() ),
293                'contentLength' => strlen( $translationUnit->getContent() )
294            ];
295            $affectedRows = $this->insertTranslationUnit( $translationUnit );
296            if ( $affectedRows === 0 ) {
297                $this->logger->info( 'No translation unit inserted due to unique key conflict.', $logParams );
298            } else {
299                // ContentTranslation channel is configured to log "info"+ level requests
300                // This generates a lot of logs in ContentTranslation channel.
301                // It can be changed to info when investigating T397910
302                $this->logger->debug( 'Inserted translation unit', $logParams );
303            }
304        }
305    }
306
307    /**
308     * @param IReadableDatabase $db
309     * @param array $conditions
310     * @param array $options
311     * @param string $method
312     * @return TranslationUnit|null
313     */
314    private function doFind( IReadableDatabase $db, $conditions, $options, $method ): ?TranslationUnit {
315        $row = $db->newSelectQueryBuilder()
316            ->select( [
317                'cxc_translation_id',
318                'cxc_section_id',
319                'cxc_origin',
320                'cxc_timestamp',
321                'cxc_sequence_id',
322                'cxc_content'
323            ] )
324            ->from( self::TABLE_NAME )
325            ->where( $conditions )
326            ->orderBy( 'cxc_timestamp', SelectQueryBuilder::SORT_DESC )
327            ->options( $options )
328            ->caller( $method )
329            ->fetchRow();
330        if ( $row ) {
331            return $this->createTranslationUnitFromRow( $row );
332        }
333
334        return null;
335    }
336
337    private function createTranslationUnitFromRow( stdClass $row ): TranslationUnit {
338        return new TranslationUnit(
339            $row->cxc_section_id,
340            $row->cxc_origin,
341            (int)$row->cxc_sequence_id, // cxc_sequence_id can be null
342            $this->compressionService->decompress( $row->cxc_content ),
343            (int)$row->cxc_translation_id,
344            $row->cxc_timestamp
345        );
346    }
347}