Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
0.00% |
0 / 52 |
|
0.00% |
0 / 12 |
CRAP | |
0.00% |
0 / 1 |
TranslationUnitDTO | |
0.00% |
0 / 52 |
|
0.00% |
0 / 12 |
600 | |
0.00% |
0 / 1 |
__construct | |
0.00% |
0 / 5 |
|
0.00% |
0 / 1 |
2 | |||
setBlobForType | |
0.00% |
0 / 5 |
|
0.00% |
0 / 1 |
20 | |||
toArray | |
0.00% |
0 / 6 |
|
0.00% |
0 / 1 |
2 | |||
getUserBlob | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
getMtBlob | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
hasUserBlob | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
hasSourceBlob | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
toCustomArray | |
0.00% |
0 / 9 |
|
0.00% |
0 / 1 |
20 | |||
toCorporaDumpArray | |
0.00% |
0 / 15 |
|
0.00% |
0 / 1 |
20 | |||
getRevision | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
6 | |||
getMwSectionNumber | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
6 | |||
getBaseSectionId | |
0.00% |
0 / 4 |
|
0.00% |
0 / 1 |
6 |
1 | <?php |
2 | declare( strict_types = 1 ); |
3 | |
4 | namespace ContentTranslation\DTO; |
5 | |
6 | use InvalidArgumentException; |
7 | use MediaWiki\Parser\Sanitizer; |
8 | use UnexpectedValueException; |
9 | |
10 | class TranslationUnitDTO { |
11 | public const TYPE_SOURCE = 'source'; |
12 | public const TYPE_MT = 'mt'; |
13 | public const TYPE_USER = 'user'; |
14 | |
15 | public const VALID_BLOB_TYPES = [ |
16 | self::TYPE_MT, |
17 | self::TYPE_SOURCE, |
18 | self::TYPE_USER |
19 | ]; |
20 | |
21 | private ?int $sequenceId; |
22 | /** |
23 | * Fields: [ 'engine' => null, 'content' => string, timestamp: ISO string ] |
24 | */ |
25 | private ?array $source; |
26 | /** |
27 | * Fields: [ 'engine' => string (e.g. "Google"), 'content' => string, timestamp: ISO string ] |
28 | */ |
29 | private ?array $mt; |
30 | /** |
31 | * Fields: [ 'engine' => null, 'content' => string, timestamp: ISO string ] |
32 | */ |
33 | private ?array $user; |
34 | private string $sectionId; |
35 | |
36 | public function __construct( |
37 | string $sectionId, |
38 | ?int $sequenceId, |
39 | ?array $source = null, |
40 | ?array $mt = null, |
41 | ?array $user = null |
42 | ) { |
43 | $this->sectionId = $sectionId; |
44 | $this->sequenceId = $sequenceId; |
45 | $this->source = $source; |
46 | $this->mt = $mt; |
47 | $this->user = $user; |
48 | } |
49 | |
50 | /** |
51 | * @param string $type |
52 | * @param array|null $blob |
53 | */ |
54 | public function setBlobForType( string $type, ?array $blob ): void { |
55 | if ( !in_array( $type, self::VALID_BLOB_TYPES ) ) { |
56 | throw new InvalidArgumentException( '[CX] Invalid blob type during translation unit restoration' ); |
57 | } |
58 | |
59 | $existingBlob = $this->$type; |
60 | // It's possible we have a "conflict", since we don't enforce uniqueness |
61 | // in the database. In this case, the one with the latest timestamp is used. |
62 | // Note: TS_ISO_8601 is suitable for string comparison if timezone is Z. |
63 | /** @phan-suppress-next-line PhanTypeArraySuspiciousNullable */ |
64 | if ( $existingBlob === null || $blob['timestamp'] > $existingBlob['timestamp'] ) { |
65 | $this->$type = $blob; |
66 | } |
67 | } |
68 | |
69 | /** |
70 | * Returned fields: {sequenceid: int, mt: array, user: array, source: array} |
71 | * @return array |
72 | */ |
73 | public function toArray() { |
74 | return [ |
75 | 'sequenceid' => $this->sequenceId, |
76 | 'mt' => $this->mt, |
77 | 'user' => $this->user, |
78 | 'source' => $this->source, |
79 | ]; |
80 | } |
81 | |
82 | public function getUserBlob(): ?array { |
83 | return $this->user; |
84 | } |
85 | |
86 | public function getMtBlob(): ?array { |
87 | return $this->mt; |
88 | } |
89 | |
90 | /** |
91 | * This method returns a boolean indicating whether the translation unit has user provided input. |
92 | * @return bool |
93 | */ |
94 | public function hasUserBlob(): bool { |
95 | return $this->user !== null; |
96 | } |
97 | |
98 | /** |
99 | * This method returns a boolean indicating whether the translation unit has source content |
100 | * @return bool |
101 | */ |
102 | public function hasSourceBlob(): bool { |
103 | return $this->source !== null; |
104 | } |
105 | |
106 | /** |
107 | * This method returns an array representation of the translation unit, suitable to be used |
108 | * as payload for each (sub)section for the "contenttranslationcorpora" endpoint response. |
109 | * |
110 | * @param string[] $types The types that are used for this translation unit. e.g. ['source', 'user'] |
111 | * @param bool $sanitize |
112 | * @return array |
113 | */ |
114 | public function toCustomArray( array $types, bool $sanitize ): array { |
115 | $unit = [ 'sequenceid' => $this->sequenceId ]; |
116 | |
117 | $usedTypes = array_intersect( self::VALID_BLOB_TYPES, $types ); |
118 | |
119 | // filter out unused blob type fields |
120 | foreach ( $usedTypes as $usedType ) { |
121 | $unit[$usedType] = $this->$usedType; |
122 | if ( !isset( $unit[$usedType] ) ) { |
123 | continue; |
124 | } |
125 | |
126 | if ( $sanitize ) { |
127 | // @phan-suppress-next-line PhanTypeArraySuspiciousNullable |
128 | $unit[$usedType]['content'] = Sanitizer::stripAllTags( $unit[$usedType]['content'] ); |
129 | } |
130 | } |
131 | return $unit; |
132 | } |
133 | |
134 | /** |
135 | * Returned fields: {sequenceid: int, mt: array, user: array, source: array} |
136 | * |
137 | * @param bool $sanitize |
138 | * @return array |
139 | */ |
140 | public function toCorporaDumpArray( bool $sanitize ): array { |
141 | $unit = [ 'sequenceid' => $this->sequenceId ]; |
142 | |
143 | $unit['mt'] = $this->mt; |
144 | $unit['user'] = [ |
145 | // @phan-suppress-next-line PhanTypeArraySuspiciousNullable |
146 | 'content' => $this->user['content'], |
147 | // @phan-suppress-next-line PhanTypeArraySuspiciousNullable |
148 | 'timestamp' => $this->user['timestamp'] |
149 | ]; |
150 | $unit['source'] = [ |
151 | // @phan-suppress-next-line PhanTypeArraySuspiciousNullable |
152 | 'content' => $this->source['content'], |
153 | // @phan-suppress-next-line PhanTypeArraySuspiciousNullable |
154 | 'timestamp' => $this->source['timestamp'] |
155 | ]; |
156 | |
157 | if ( $sanitize ) { |
158 | // filter out unused blob type fields |
159 | foreach ( self::VALID_BLOB_TYPES as $type ) { |
160 | if ( isset( $unit[$type]['content'] ) ) { |
161 | $unit[$type]['content'] = Sanitizer::stripAllTags( $unit[$type]['content'] ); |
162 | } |
163 | } |
164 | } |
165 | |
166 | return $unit; |
167 | } |
168 | |
169 | public function getRevision(): ?int { |
170 | $sectionIdPieces = explode( '_', $this->sectionId ); |
171 | |
172 | return isset( $sectionIdPieces[0] ) ? (int)$sectionIdPieces[0] : null; |
173 | } |
174 | |
175 | public function getMwSectionNumber(): ?int { |
176 | $sectionIdPieces = explode( '_', $this->sectionId ); |
177 | |
178 | return isset( $sectionIdPieces[1] ) ? (int)$sectionIdPieces[1] : null; |
179 | } |
180 | |
181 | public function getBaseSectionId(): string { |
182 | $sectionIdPieces = explode( '_', $this->sectionId ); |
183 | |
184 | // sectionId should be in the following format: ${revision}_${sectionNumber}_${subSectionId} |
185 | if ( count( $sectionIdPieces ) < 3 ) { |
186 | throw new UnexpectedValueException( '[CX] Invalid format for section id of the translation unit DTO' ); |
187 | } |
188 | |
189 | return "$sectionIdPieces[0]_$sectionIdPieces[1]"; |
190 | } |
191 | |
192 | } |