Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
57.01% |
61 / 107 |
|
50.00% |
3 / 6 |
CRAP | |
0.00% |
0 / 1 |
ProductionImageRecommendationApiHandler | |
57.01% |
61 / 107 |
|
50.00% |
3 / 6 |
51.78 | |
0.00% |
0 / 1 |
__construct | |
100.00% |
7 / 7 |
|
100.00% |
1 / 1 |
1 | |||
getApiRequest | |
0.00% |
0 / 15 |
|
0.00% |
0 / 1 |
20 | |||
getSuggestionDataFromApiResponse | |
100.00% |
45 / 45 |
|
100.00% |
1 / 1 |
9 | |||
getArticleIdFromTitle | |
0.00% |
0 / 18 |
|
0.00% |
0 / 1 |
12 | |||
getRequest | |
0.00% |
0 / 13 |
|
0.00% |
0 / 1 |
2 | |||
sortSuggestions | |
100.00% |
9 / 9 |
|
100.00% |
1 / 1 |
2 |
1 | <?php |
2 | |
3 | namespace GrowthExperiments\NewcomerTasks\AddImage; |
4 | |
5 | use GrowthExperiments\NewcomerTasks\TaskType\ImageRecommendationTaskTypeHandler; |
6 | use GrowthExperiments\NewcomerTasks\TaskType\SectionImageRecommendationTaskTypeHandler; |
7 | use GrowthExperiments\NewcomerTasks\TaskType\TaskType; |
8 | use GrowthExperiments\Util; |
9 | use GrowthExperiments\WikiConfigException; |
10 | use MediaWiki\Context\RequestContext; |
11 | use MediaWiki\Http\HttpRequestFactory; |
12 | use MediaWiki\Title\Title; |
13 | use MWHttpRequest; |
14 | use StatusValue; |
15 | use Wikimedia\UUID\GlobalIdGenerator; |
16 | |
17 | /** |
18 | * Handler for production image suggestion API. |
19 | * phpcs:ignore Generic.Files.LineLength.TooLong |
20 | * Documentation: https://www.mediawiki.org/wiki/Platform_Engineering_Team/Data_Value_Stream/Data_Gateway#Image_Suggestions |
21 | * Configuration of constructor parameters: |
22 | * - $url: GEImageRecommendationServiceUrl |
23 | * - $wiki: GEImageRecommendationServiceWikiIdMasquerade (or the actual wiki ID if not set) |
24 | * - $useTitles: GEImageRecommendationServiceUseTitles |
25 | * - $shouldVerifySsl: opposite of GEDeveloperSetup |
26 | */ |
27 | class ProductionImageRecommendationApiHandler implements ImageRecommendationApiHandler { |
28 | |
29 | /** @var HttpRequestFactory */ |
30 | private $httpRequestFactory; |
31 | |
32 | /** @var string */ |
33 | private $url; |
34 | |
35 | /** @var string */ |
36 | private $wikiId; |
37 | |
38 | /** @var GlobalIdGenerator */ |
39 | private $globalIdGenerator; |
40 | |
41 | /** @var int|null */ |
42 | private $requestTimeout; |
43 | |
44 | /** @var bool */ |
45 | private $useTitles; |
46 | |
47 | /** @var bool */ |
48 | private $shouldVerifySsl; |
49 | |
50 | private const KIND_TO_SOURCE = [ |
51 | 'istype-lead-image' => ImageRecommendationImage::SOURCE_WIKIPEDIA, |
52 | 'istype-wikidata-image' => ImageRecommendationImage::SOURCE_WIKIDATA, |
53 | 'istype-commons-category' => ImageRecommendationImage::SOURCE_COMMONS, |
54 | 'istype-section-topics' => ImageRecommendationImage::SOURCE_WIKIDATA_SECTION_TOPICS, |
55 | 'istype-section-topics-p18' => ImageRecommendationImage::SOURCE_WIKIDATA_SECTION_TOPICS, |
56 | 'istype-section-alignment' => ImageRecommendationImage::SOURCE_WIKIDATA_SECTION_ALIGNMENT, |
57 | // WIKIDATA_SECTION_INTERSECTION is handled by one-off code as it's based on two kinds |
58 | 'istype-depicts' => 'unknown', |
59 | ]; |
60 | |
61 | // FIXME not used for now as kinds change too often. |
62 | private const KIND_TO_TASK_TYPE_ID = [ |
63 | 'istype-lead-image' => ImageRecommendationTaskTypeHandler::TASK_TYPE_ID, |
64 | 'istype-wikidata-image' => ImageRecommendationTaskTypeHandler::TASK_TYPE_ID, |
65 | 'istype-commons-category' => ImageRecommendationTaskTypeHandler::TASK_TYPE_ID, |
66 | 'istype-section-topics' => SectionImageRecommendationTaskTypeHandler::TASK_TYPE_ID, |
67 | 'istype-section-topics-p18' => SectionImageRecommendationTaskTypeHandler::TASK_TYPE_ID, |
68 | 'istype-section-alignment' => SectionImageRecommendationTaskTypeHandler::TASK_TYPE_ID, |
69 | 'istype-depicts' => 'ignored', |
70 | ]; |
71 | |
72 | /** |
73 | * @param HttpRequestFactory $httpRequestFactory |
74 | * @param string $url Image recommendation service root URL |
75 | * @param string $wikiId Project ID (for example, 'enwiki') |
76 | * @param GlobalIdGenerator $globalIdGenerator GlobalIdGenerator, used to convert UUID to timestamp |
77 | * when sorting the suggestions |
78 | * @param int|null $requestTimeout Service request timeout in seconds |
79 | * @param bool $useTitles Query image suggestions by title instead of by article ID; |
80 | * used in non-production environments |
81 | * @param bool $shouldVerifySsl Whether the HTTP requests should verify SSL certificate and host |
82 | */ |
83 | public function __construct( |
84 | HttpRequestFactory $httpRequestFactory, |
85 | string $url, |
86 | string $wikiId, |
87 | GlobalIdGenerator $globalIdGenerator, |
88 | ?int $requestTimeout, |
89 | bool $useTitles = false, |
90 | bool $shouldVerifySsl = true |
91 | ) { |
92 | $this->httpRequestFactory = $httpRequestFactory; |
93 | $this->url = $url; |
94 | $this->wikiId = $wikiId; |
95 | $this->globalIdGenerator = $globalIdGenerator; |
96 | $this->requestTimeout = $requestTimeout; |
97 | $this->useTitles = $useTitles; |
98 | $this->shouldVerifySsl = $shouldVerifySsl; |
99 | } |
100 | |
101 | /** @inheritDoc */ |
102 | public function getApiRequest( Title $title, TaskType $taskType ) { |
103 | if ( !$this->url ) { |
104 | return StatusValue::newFatal( 'rawmessage', |
105 | 'Image Suggestions API URL is not configured' ); |
106 | } |
107 | |
108 | $articleId = $this->useTitles ? |
109 | $this->getArticleIdFromTitle( $title ) : |
110 | $title->getArticleID(); |
111 | |
112 | if ( $articleId instanceof StatusValue ) { |
113 | return $articleId; |
114 | } |
115 | |
116 | return $this->getRequest( [ |
117 | 'public', |
118 | 'image_suggestions', |
119 | 'suggestions', |
120 | $this->wikiId, |
121 | $articleId |
122 | ] ); |
123 | } |
124 | |
125 | /** @inheritDoc */ |
126 | public function getSuggestionDataFromApiResponse( array $apiResponse, TaskType $taskType ): array { |
127 | if ( !$apiResponse['rows'] ) { |
128 | return []; |
129 | } |
130 | $imageData = []; |
131 | $sortedSuggestions = $this->sortSuggestions( $apiResponse['rows'] ); |
132 | // Since the suggestions are sorted based on the dataset ID, the id of the first suggestion |
133 | // is that of the most recent dataset. |
134 | $validDatasetId = $sortedSuggestions[0]['id'] ?? ''; |
135 | |
136 | foreach ( $sortedSuggestions as $suggestion ) { |
137 | // Discard suggestions from other datasets |
138 | if ( $suggestion['id'] !== $validDatasetId ) { |
139 | break; |
140 | } |
141 | |
142 | // Ideally we'd have a list of kinds relevant for each task type but kinds are |
143 | // still in flux. Just treat everything with a non-null section_heading as a |
144 | // section-level recommendation. |
145 | $recommendationTaskTypeId = isset( $suggestion['section_heading'] ) ? |
146 | SectionImageRecommendationTaskTypeHandler::TASK_TYPE_ID : |
147 | ImageRecommendationTaskTypeHandler::TASK_TYPE_ID; |
148 | if ( $recommendationTaskTypeId !== $taskType->getId() ) { |
149 | continue; |
150 | } |
151 | |
152 | $knownKinds = array_values( array_intersect( $suggestion['kind'], array_keys( self::KIND_TO_SOURCE ) ) ); |
153 | foreach ( array_diff( $suggestion['kind'], $knownKinds ) as $unknownKind ) { |
154 | Util::logException( new WikiConfigException( |
155 | "Unknown image suggestions API kind: $unknownKind" |
156 | ), [ |
157 | 'page_id' => $suggestion['page_id'] ?? 0, |
158 | 'dataset-id' => $suggestion['id'] ?? 'unknown', |
159 | ] ); |
160 | } |
161 | if ( $knownKinds ) { |
162 | $knownSources = array_map( fn ( $kind ) => self::KIND_TO_SOURCE[$kind], $knownKinds ); |
163 | $intersectionSources = [ |
164 | ImageRecommendationImage::SOURCE_WIKIDATA_SECTION_TOPICS, |
165 | ImageRecommendationImage::SOURCE_WIKIDATA_SECTION_ALIGNMENT, |
166 | ]; |
167 | if ( array_diff( $intersectionSources, $knownSources ) === [] ) { |
168 | $source = ImageRecommendationImage::SOURCE_WIKIDATA_SECTION_INTERSECTION; |
169 | } else { |
170 | $source = self::KIND_TO_SOURCE[ $knownKinds[0] ]; |
171 | } |
172 | } else { |
173 | // FIXME we should probably ignore unknown types of suggestions once the API is more stable |
174 | $source = [ |
175 | ImageRecommendationTaskTypeHandler::TASK_TYPE_ID |
176 | => ImageRecommendationImage::SOURCE_WIKIDATA, |
177 | SectionImageRecommendationTaskTypeHandler::TASK_TYPE_ID |
178 | => ImageRecommendationImage::SOURCE_WIKIDATA_SECTION_TOPICS, |
179 | ][ $taskType->getId()]; |
180 | } |
181 | |
182 | $imageData[] = new ImageRecommendationData( |
183 | $suggestion['image'], |
184 | $source, |
185 | implode( ',', $suggestion['found_on'] ?? [] ), |
186 | $suggestion['id'], |
187 | $suggestion['section_index'], |
188 | $suggestion['section_heading'], |
189 | ); |
190 | } |
191 | return $imageData; |
192 | } |
193 | |
194 | /** |
195 | * Get the production article ID for the given title. |
196 | * The API retrieves image suggestions for a given production article ID, so for non-production |
197 | * environments, the title needs to be mapped to the corresponding production ID. |
198 | * |
199 | * @param Title $title |
200 | * @return StatusValue|int |
201 | */ |
202 | private function getArticleIdFromTitle( Title $title ) { |
203 | $titleText = $title->getDBkey(); |
204 | $request = $this->getRequest( [ |
205 | 'private', |
206 | 'image_suggestions', |
207 | 'title_cache', |
208 | $this->wikiId, |
209 | $titleText |
210 | ] ); |
211 | $status = $request->execute(); |
212 | if ( !$status->isOK() ) { |
213 | return StatusValue::newFatal( 'rawmessage', |
214 | 'Failed to fetch production article ID for ' . $titleText ); |
215 | } |
216 | $responseData = json_decode( $request->getContent(), true ); |
217 | $articleData = $responseData['rows'][0] ?? []; |
218 | if ( array_key_exists( 'page_id', $articleData ) ) { |
219 | return $articleData['page_id']; |
220 | } |
221 | return StatusValue::newFatal( 'rawmessage', |
222 | 'Invalid response from title_cache for ' . $titleText ); |
223 | } |
224 | |
225 | private function getRequest( array $pathArgs = [] ): MWHttpRequest { |
226 | $request = $this->httpRequestFactory->create( |
227 | $this->url . '/' . implode( '/', array_map( 'rawurlencode', $pathArgs ) ), |
228 | [ |
229 | 'method' => 'GET', |
230 | 'originalRequest' => RequestContext::getMain()->getRequest(), |
231 | 'timeout' => $this->requestTimeout, |
232 | 'sslVerifyCert' => $this->shouldVerifySsl, |
233 | 'sslVerifyHost' => $this->shouldVerifySsl, |
234 | ], |
235 | __METHOD__ |
236 | ); |
237 | $request->setHeader( 'Accept', 'application/json' ); |
238 | return $request; |
239 | } |
240 | |
241 | /** |
242 | * Sort the suggestions in decreasing order based on confidence and timestamp |
243 | * |
244 | * @param array $suggestions |
245 | * @return array |
246 | */ |
247 | private function sortSuggestions( array $suggestions ): array { |
248 | // Sort by newer dataset with the highest confidence |
249 | $compare = function ( array $a, array $b ) { |
250 | $confidenceA = $a['confidence'] ?? 0; |
251 | $confidenceB = $b['confidence'] ?? 0; |
252 | $timestampA = $this->globalIdGenerator->getTimestampFromUUIDv1( $a['id'] ?? '' ); |
253 | $timestampB = $this->globalIdGenerator->getTimestampFromUUIDv1( $b['id'] ?? '' ); |
254 | |
255 | return $timestampB <=> $timestampA ?: $confidenceB <=> $confidenceA; |
256 | }; |
257 | usort( $suggestions, $compare ); |
258 | return $suggestions; |
259 | } |
260 | } |