Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
| Total | |
0.00% |
0 / 77 |
|
0.00% |
0 / 9 |
CRAP | |
0.00% |
0 / 1 |
| TranslationCorporaManager | |
0.00% |
0 / 77 |
|
0.00% |
0 / 9 |
506 | |
0.00% |
0 / 1 |
| __construct | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
| getCorporaDumpArraysByTranslationId | |
0.00% |
0 / 7 |
|
0.00% |
0 / 1 |
6 | |||
| getUnitsAndCategoriesByTranslationId | |
0.00% |
0 / 10 |
|
0.00% |
0 / 1 |
2 | |||
| saveTranslationUnits | |
0.00% |
0 / 5 |
|
0.00% |
0 / 1 |
6 | |||
| getFilteredCorporaUnits | |
0.00% |
0 / 4 |
|
0.00% |
0 / 1 |
2 | |||
| createTranslationUnitsFromContent | |
0.00% |
0 / 18 |
|
0.00% |
0 / 1 |
42 | |||
| getTranslationUnitDTOsByTranslationId | |
0.00% |
0 / 4 |
|
0.00% |
0 / 1 |
2 | |||
| getTranslationUnitDTOsAndCategoriesByTranslationId | |
0.00% |
0 / 12 |
|
0.00% |
0 / 1 |
12 | |||
| createDTOsFromTranslationUnits | |
0.00% |
0 / 16 |
|
0.00% |
0 / 1 |
30 | |||
| 1 | <?php |
| 2 | declare( strict_types = 1 ); |
| 3 | |
| 4 | namespace ContentTranslation\Manager; |
| 5 | |
| 6 | use ContentTranslation\DTO\TranslationUnitDTO; |
| 7 | use ContentTranslation\Entity\TranslationUnit; |
| 8 | use ContentTranslation\Exception\InvalidSectionDataException; |
| 9 | use ContentTranslation\Store\TranslationCorporaStore; |
| 10 | use ContentTranslation\Translation; |
| 11 | use MediaWiki\Json\FormatJson; |
| 12 | |
| 13 | /** |
| 14 | * @author Nik Gkountas |
| 15 | * @license GPL-2.0-or-later |
| 16 | * |
| 17 | * The TranslationCorporaManager service intends to serve all Action API classes, or other |
| 18 | * scripts, that need to fetch data from the "cx_corpora" table in some specific form. |
| 19 | * This service depends on the TranslationCorporaStore service to interact with the |
| 20 | * database table. The difference between these two classes, is that TranslationCorporaStore |
| 21 | * intends to represent the Data Access Layer, while the TranslationCorporaManager lives in |
| 22 | * the Domain Layer of the application, meaning that it modifies data fetched from the database |
| 23 | * to serve them to its "clients" as needed. |
| 24 | */ |
| 25 | class TranslationCorporaManager { |
| 26 | |
| 27 | private const CATEGORIES = 'CX_CATEGORY_METADATA'; |
| 28 | |
| 29 | public function __construct( private readonly TranslationCorporaStore $corporaStore ) { |
| 30 | } |
| 31 | |
| 32 | public function getCorporaDumpArraysByTranslationId( int $translationId, bool $sanitize ): array { |
| 33 | $sections = $this->getTranslationUnitDTOsByTranslationId( $translationId ); |
| 34 | // Filter out units which don't have user provided input or source |
| 35 | $sections = array_filter( $sections, static function ( TranslationUnitDTO $unit ) { |
| 36 | return $unit->hasUserBlob() && $unit->hasSourceBlob(); |
| 37 | } ); |
| 38 | |
| 39 | return array_map( static function ( TranslationUnitDTO $unit ) use ( $sanitize ) { |
| 40 | return $unit->toCorporaDumpArray( $sanitize ); |
| 41 | }, $sections ); |
| 42 | } |
| 43 | |
| 44 | /** |
| 45 | * Translation units and target categories. Only target categories are fetched |
| 46 | * when translation draft is restored. Source categories are saved into cx_corpora table for |
| 47 | * pairing with target categories, but not retrieved when translation draft is restored. |
| 48 | * |
| 49 | * @param int $translationId |
| 50 | * @return array { translationUnits: TranslationUnitDTO[], categories: ?string } |
| 51 | */ |
| 52 | public function getUnitsAndCategoriesByTranslationId( int $translationId ): array { |
| 53 | $unitsAndCategories = $this->getTranslationUnitDTOsAndCategoriesByTranslationId( $translationId ); |
| 54 | $translationUnits = array_map( |
| 55 | static function ( TranslationUnitDTO $unit ) { |
| 56 | return $unit->toArray(); |
| 57 | }, $unitsAndCategories['sections'] |
| 58 | ); |
| 59 | |
| 60 | return [ |
| 61 | 'translationUnits' => $translationUnits, |
| 62 | 'categories' => $unitsAndCategories['categories'] |
| 63 | ]; |
| 64 | } |
| 65 | |
| 66 | /** |
| 67 | * @param Translation $translation Recently saved parent translation object |
| 68 | * @param string $content |
| 69 | * @return TranslationUnit[] |
| 70 | * @throws InvalidSectionDataException |
| 71 | */ |
| 72 | public function saveTranslationUnits( Translation $translation, string $content ): array { |
| 73 | $translationUnits = $this->createTranslationUnitsFromContent( $content, $translation->getTranslationId() ); |
| 74 | |
| 75 | $isNewTranslation = $translation->isNew(); |
| 76 | foreach ( $translationUnits as $translationUnit ) { |
| 77 | $this->corporaStore->save( $translationUnit, $isNewTranslation ); |
| 78 | } |
| 79 | |
| 80 | return $translationUnits; |
| 81 | } |
| 82 | |
| 83 | /** |
| 84 | * @param int $translationId |
| 85 | * @param array $types should be an array of valid types. e.g. ['user', 'mt', 'source'] |
| 86 | * @param bool $sanitize |
| 87 | * @return array |
| 88 | */ |
| 89 | public function getFilteredCorporaUnits( int $translationId, array $types, bool $sanitize ): array { |
| 90 | $sections = $this->getTranslationUnitDTOsByTranslationId( $translationId ); |
| 91 | |
| 92 | return array_map( static function ( TranslationUnitDTO $unit ) use ( $types, $sanitize ) { |
| 93 | return $unit->toCustomArray( $types, $sanitize ); |
| 94 | }, $sections ); |
| 95 | } |
| 96 | |
| 97 | /** |
| 98 | * @param string $content |
| 99 | * @param int $translationId |
| 100 | * @return TranslationUnit[] |
| 101 | * @throws InvalidSectionDataException |
| 102 | */ |
| 103 | private function createTranslationUnitsFromContent( string $content, int $translationId ): array { |
| 104 | $translationUnits = []; |
| 105 | $units = FormatJson::decode( $content, true ); |
| 106 | foreach ( $units as $translationUnitData ) { |
| 107 | if ( !isset( $translationUnitData['sectionId'] ) || !is_string( $translationUnitData['origin'] ) ) { |
| 108 | throw new InvalidSectionDataException(); |
| 109 | } |
| 110 | |
| 111 | $validate = isset( $translationUnitData['validate'] ) && $translationUnitData['validate']; |
| 112 | $timestamp = $translationUnitData['timestamp'] ?? null; |
| 113 | $sequenceId = isset( $translationUnitData['sequenceId'] ) ? (int)$translationUnitData['sequenceId'] : null; |
| 114 | '@phan-var ?string $timestamp'; |
| 115 | $translationUnits[] = new TranslationUnit( |
| 116 | (string)$translationUnitData['sectionId'], |
| 117 | $translationUnitData['origin'], |
| 118 | $sequenceId, |
| 119 | (string)$translationUnitData['content'], // Content can be null in case translator clear the section. |
| 120 | $translationId, |
| 121 | $timestamp, |
| 122 | $validate |
| 123 | ); |
| 124 | } |
| 125 | |
| 126 | return $translationUnits; |
| 127 | } |
| 128 | |
| 129 | /** |
| 130 | * @param int $id |
| 131 | * @return TranslationUnitDTO[] array indexed by the sectionId of each unit |
| 132 | */ |
| 133 | public function getTranslationUnitDTOsByTranslationId( int $id ): array { |
| 134 | $translationUnits = $this->corporaStore->findByTranslationId( $id ); |
| 135 | $translationUnitDTOs = $this->createDTOsFromTranslationUnits( $translationUnits ); |
| 136 | unset( $translationUnitDTOs[ self::CATEGORIES ] ); |
| 137 | |
| 138 | return $translationUnitDTOs; |
| 139 | } |
| 140 | |
| 141 | /** |
| 142 | * @param int $id |
| 143 | * @return array { sections: TranslationUnitDTO[], categories: ?string } |
| 144 | */ |
| 145 | private function getTranslationUnitDTOsAndCategoriesByTranslationId( int $id ): array { |
| 146 | $translationUnits = $this->corporaStore->findByTranslationId( $id ); |
| 147 | $translationUnitDTOs = $this->createDTOsFromTranslationUnits( $translationUnits ); |
| 148 | |
| 149 | $targetCategories = null; |
| 150 | |
| 151 | if ( isset( $translationUnitDTOs[ self::CATEGORIES ] ) ) { |
| 152 | // Extract target categories and return separately from translation units (sections). |
| 153 | // Source categories aren't retrieved, only saved in cx_corpora for pairing |
| 154 | // with target categories. Source and target categories are saved in cx_corpora table |
| 155 | // with special section ID, to distinguish categories from translation units. |
| 156 | $userBlob = $translationUnitDTOs[ self::CATEGORIES ]->getUserBlob(); |
| 157 | |
| 158 | if ( $userBlob ) { |
| 159 | $targetCategories = $userBlob[ 'content' ]; |
| 160 | } |
| 161 | unset( $translationUnitDTOs[ self::CATEGORIES ] ); |
| 162 | } |
| 163 | |
| 164 | return [ |
| 165 | 'sections' => $translationUnitDTOs, |
| 166 | 'categories' => $targetCategories |
| 167 | ]; |
| 168 | } |
| 169 | |
| 170 | /** |
| 171 | * @param TranslationUnit[] $translationUnits |
| 172 | * @return TranslationUnitDTO[] array indexed by the sectionId of each unit |
| 173 | */ |
| 174 | private function createDTOsFromTranslationUnits( array $translationUnits ): array { |
| 175 | /** @type $translationDTOs TranslationUnitDTO[] */ |
| 176 | $translationDTOs = []; |
| 177 | |
| 178 | $isMT = static function ( $type ) { |
| 179 | return $type !== TranslationUnitDTO::TYPE_SOURCE && $type !== TranslationUnitDTO::TYPE_USER; |
| 180 | }; |
| 181 | |
| 182 | foreach ( $translationUnits as $unit ) { |
| 183 | // Here I am assuming sequence IDs are unique and won't be re-used |
| 184 | $id = $unit->getSectionId(); |
| 185 | $translationDTO = $translationDTOs[$id] ?? new TranslationUnitDTO( $id, (int)$unit->getSequenceId() ); |
| 186 | |
| 187 | $type = $isMT( $unit->getOrigin() ) ? TranslationUnitDTO::TYPE_MT : $unit->getOrigin(); |
| 188 | $blob = [ |
| 189 | 'engine' => $type === TranslationUnitDTO::TYPE_MT ? $unit->getOrigin() : null, |
| 190 | 'content' => $unit->getContent(), |
| 191 | // TS_ISO_8601 is used because it includes timezone (always Z) |
| 192 | 'timestamp' => wfTimestamp( TS_ISO_8601, $unit->getTimestamp() ), |
| 193 | ]; |
| 194 | |
| 195 | $translationDTO->setBlobForType( $type, $blob ); |
| 196 | $translationDTOs[$id] = $translationDTO; |
| 197 | } |
| 198 | |
| 199 | return $translationDTOs; |
| 200 | } |
| 201 | |
| 202 | } |