Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
79.17% |
114 / 144 |
|
16.67% |
1 / 6 |
CRAP | |
0.00% |
0 / 1 |
ServiceImageRecommendationProvider | |
79.17% |
114 / 144 |
|
16.67% |
1 / 6 |
35.09 | |
0.00% |
0 / 1 |
__construct | |
0.00% |
0 / 7 |
|
0.00% |
0 / 1 |
2 | |||
get | |
87.27% |
48 / 55 |
|
0.00% |
0 / 1 |
9.17 | |||
processApiResponseData | |
100.00% |
66 / 66 |
|
100.00% |
1 / 1 |
13 | |||
setMaxSuggestionsToProcess | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
hasMinimumWidth | |
0.00% |
0 / 7 |
|
0.00% |
0 / 1 |
6 | |||
isValidMediaType | |
0.00% |
0 / 8 |
|
0.00% |
0 / 1 |
6 |
1 | <?php |
2 | |
3 | namespace GrowthExperiments\NewcomerTasks\AddImage; |
4 | |
5 | use File; |
6 | use GrowthExperiments\NewcomerTasks\TaskType\ImageRecommendationBaseTaskType; |
7 | use GrowthExperiments\NewcomerTasks\TaskType\TaskType; |
8 | use MediaWiki\Api\ApiRawMessage; |
9 | use MediaWiki\Language\RawMessage; |
10 | use MediaWiki\Linker\LinkTarget; |
11 | use MediaWiki\Logger\LoggerFactory; |
12 | use MediaWiki\Page\ProperPageIdentity; |
13 | use MediaWiki\Title\Title; |
14 | use MediaWiki\Title\TitleFactory; |
15 | use MediaWiki\Title\TitleValue; |
16 | use StatusValue; |
17 | use Wikimedia\Assert\Assert; |
18 | use Wikimedia\Stats\StatsFactory; |
19 | |
20 | /** |
21 | * Provides image recommendations via the Image Suggestion API. |
22 | * @see mvp API: https://image-suggestion-api.wmcloud.org/?doc |
23 | * @see production API: https://wikitech.wikimedia.org/wiki/Image-suggestion |
24 | * @see https://phabricator.wikimedia.org/project/profile/5253/ |
25 | */ |
26 | class ServiceImageRecommendationProvider implements ImageRecommendationProvider { |
27 | |
28 | /** @var TitleFactory */ |
29 | private $titleFactory; |
30 | |
31 | private StatsFactory $statsFactory; |
32 | |
33 | /** @var ImageRecommendationApiHandler */ |
34 | private $apiHandler; |
35 | |
36 | /** @var ImageRecommendationMetadataProvider */ |
37 | private $metadataProvider; |
38 | |
39 | /** @var AddImageSubmissionHandler */ |
40 | private $imageSubmissionHandler; |
41 | |
42 | /** @var bool */ |
43 | private $geDeveloperSetup; |
44 | |
45 | /** @var int */ |
46 | private $maxSuggestionsToProcess; |
47 | |
48 | /** |
49 | * @param TitleFactory $titleFactory |
50 | * @param StatsFactory $statsFactory |
51 | * @param ImageRecommendationApiHandler $apiHandler |
52 | * @param ImageRecommendationMetadataProvider $metadataProvider Image metadata provider |
53 | * @param AddImageSubmissionHandler $imageSubmissionHandler |
54 | * @param bool $geDeveloperSetup |
55 | * @param int $maxSuggestionsToProcess Maximum number of valid suggestions to process and return with |
56 | * an ImageRecommendation object. |
57 | */ |
58 | public function __construct( |
59 | TitleFactory $titleFactory, |
60 | StatsFactory $statsFactory, |
61 | ImageRecommendationApiHandler $apiHandler, |
62 | ImageRecommendationMetadataProvider $metadataProvider, |
63 | AddImageSubmissionHandler $imageSubmissionHandler, |
64 | bool $geDeveloperSetup = false, |
65 | int $maxSuggestionsToProcess = 1 |
66 | ) { |
67 | $this->titleFactory = $titleFactory; |
68 | $this->statsFactory = $statsFactory->withComponent( 'GrowthExperiments' ); |
69 | $this->apiHandler = $apiHandler; |
70 | $this->metadataProvider = $metadataProvider; |
71 | $this->imageSubmissionHandler = $imageSubmissionHandler; |
72 | $this->geDeveloperSetup = $geDeveloperSetup; |
73 | $this->maxSuggestionsToProcess = $maxSuggestionsToProcess; |
74 | } |
75 | |
76 | /** @inheritDoc */ |
77 | public function get( LinkTarget $title, TaskType $taskType ) { |
78 | Assert::parameterType( ImageRecommendationBaseTaskType::class, $taskType, '$taskType' ); |
79 | '@phan-var ImageRecommendationBaseTaskType $taskType'; |
80 | |
81 | $title = $this->titleFactory->newFromLinkTarget( $title ); |
82 | $titleText = $title->getPrefixedDBkey(); |
83 | $titleTextSafe = strip_tags( $titleText ); |
84 | if ( !$title->exists() && !$this->geDeveloperSetup ) { |
85 | // These errors might show up to the end user, but provide no useful information; |
86 | // they are merely there to support debugging. So we keep them English-only to |
87 | // reduce the translator burden. |
88 | return StatusValue::newFatal( new RawMessage( |
89 | 'Recommendation could not be loaded for non-existing page: $1', |
90 | [ $titleTextSafe ] |
91 | ) ); |
92 | } |
93 | |
94 | $request = $this->apiHandler->getApiRequest( $title, $taskType ); |
95 | |
96 | if ( $request instanceof StatusValue ) { |
97 | return $request; |
98 | } |
99 | |
100 | $startTime = microtime( true ); |
101 | $status = $request->execute(); |
102 | |
103 | $timing = $this->statsFactory->getTiming( 'image_recommendation_provider_seconds' ); |
104 | $timing->setLabel( 'action', 'get' ) |
105 | ->copyToStatsdAt( "timing.growthExperiments.imageRecommendationProvider.get" ) |
106 | ->observe( microtime( true ) - $startTime ); |
107 | |
108 | if ( !$status->isOK() && $request->getStatus() < 400 ) { |
109 | return $status; |
110 | } |
111 | $response = $request->getContent(); |
112 | $data = json_decode( $response, true ); |
113 | |
114 | if ( $data === null ) { |
115 | $errorMessage = __METHOD__ . ': Unable to decode JSON response for page {title}: {response}'; |
116 | $errorContext = [ 'title' => $titleTextSafe, 'response' => $response ]; |
117 | LoggerFactory::getInstance( 'GrowthExperiments' )->error( $errorMessage, $errorContext ); |
118 | return StatusValue::newFatal( new RawMessage( |
119 | "Unable to decode JSON response for page $1: $2", |
120 | [ $titleTextSafe, $response ] |
121 | ) ); |
122 | } elseif ( $request->getStatus() >= 400 ) { |
123 | return StatusValue::newFatal( new RawMessage( |
124 | 'API returned HTTP code $1 for page $2: $3', |
125 | [ $request->getStatus(), $titleTextSafe, strip_tags( $data['detail'] ?? '(no reason given)' ) ] |
126 | ) ); |
127 | } |
128 | |
129 | $imageRecommendationDatas = $this->apiHandler->getSuggestionDataFromApiResponse( $data, $taskType ); |
130 | if ( $imageRecommendationDatas instanceof StatusValue ) { |
131 | return $imageRecommendationDatas; |
132 | } |
133 | |
134 | $startTime = microtime( true ); |
135 | $responseData = self::processApiResponseData( |
136 | $taskType, |
137 | $title, |
138 | $titleText, |
139 | $imageRecommendationDatas, |
140 | $this->metadataProvider, |
141 | $this->imageSubmissionHandler, |
142 | $this->maxSuggestionsToProcess |
143 | ); |
144 | |
145 | $timing |
146 | ->setLabel( 'action', 'process_api_response_data' ) |
147 | ->copyToStatsdAt( |
148 | "timing.growthExperiments.imageRecommendationProvider.processApiResponseData" |
149 | ) |
150 | ->observe( microtime( true ) - $startTime ); |
151 | |
152 | return $responseData; |
153 | } |
154 | |
155 | /** |
156 | * Process the data returned by the Image Suggestions API and return an ImageRecommendation |
157 | * or an error. |
158 | * @param ImageRecommendationBaseTaskType $taskType |
159 | * @param LinkTarget|ProperPageIdentity $title Title for which to generate the image recommendation for. |
160 | * The title in the API response will be ignored. |
161 | * @param string $titleText Title text, for logging. |
162 | * @param ImageRecommendationData[] $suggestionData |
163 | * @param ImageRecommendationMetadataProvider $metadataProvider |
164 | * @param AddImageSubmissionHandler|null $imageSubmissionHandler |
165 | * @param int $maxSuggestionsToProcess Maximum number of valid suggestions to process and return |
166 | * with an ImageRecommendation object. |
167 | * @return ImageRecommendation|StatusValue |
168 | */ |
169 | public static function processApiResponseData( |
170 | ImageRecommendationBaseTaskType $taskType, |
171 | $title, |
172 | string $titleText, |
173 | array $suggestionData, |
174 | ImageRecommendationMetadataProvider $metadataProvider, |
175 | ?AddImageSubmissionHandler $imageSubmissionHandler, |
176 | int $maxSuggestionsToProcess = 1 |
177 | ) { |
178 | Assert::parameterType( ImageRecommendationBaseTaskType::class, $taskType, '$taskType' ); |
179 | '@phan-var ImageRecommendationBaseTaskType $taskType'; |
180 | |
181 | $suggestionFilters = $taskType->getSuggestionFilters(); |
182 | $titleTextSafe = strip_tags( $titleText ); |
183 | if ( count( $suggestionData ) === 0 ) { |
184 | return StatusValue::newGood()->error( new ApiRawMessage( |
185 | // Keep in sync with Util::STATSD_INCREMENTABLE_ERROR_MESSAGES |
186 | [ 'No recommendation found for page: $1', $titleTextSafe ], |
187 | 'growthexperiments-no-recommendation-found' |
188 | ) ); |
189 | } |
190 | $images = []; |
191 | $datasetId = ''; |
192 | $status = StatusValue::newGood(); |
193 | foreach ( $suggestionData as $suggestion ) { |
194 | if ( count( $images ) >= $maxSuggestionsToProcess ) { |
195 | break; |
196 | } |
197 | $validationError = ImageRecommendationDataValidator::validate( $titleTextSafe, $suggestion ); |
198 | if ( !$validationError->isGood() ) { |
199 | $status->merge( $validationError ); |
200 | continue; |
201 | } |
202 | |
203 | $filename = File::normalizeTitle( $suggestion->getFilename() )->getDBkey(); |
204 | $source = $suggestion->getSource(); |
205 | $projects = $suggestion->getFormattedProjects(); |
206 | $datasetId = $suggestion->getDatasetId(); |
207 | $sectionNumber = $suggestion->getSectionNumber(); |
208 | $sectionTitle = $suggestion->getSectionTitle(); |
209 | $fileMetadata = $metadataProvider->getFileMetadata( $filename ); |
210 | |
211 | if ( is_array( $fileMetadata ) ) { |
212 | $imageWidth = $fileMetadata['originalWidth'] ?: 0; |
213 | $minWidth = $suggestionFilters['minimumSize']['width'] ?? 0; |
214 | $validMediaTypes = $suggestionFilters['validMediaTypes']; |
215 | if ( |
216 | self::hasMinimumWidth( $minWidth, $imageWidth, $filename, $titleTextSafe, $status ) && |
217 | self::isValidMediaType( |
218 | $validMediaTypes, $fileMetadata['mediaType'], $filename, $titleTextSafe, $status |
219 | ) |
220 | ) { |
221 | $imageMetadata = $metadataProvider->getMetadata( [ |
222 | 'filename' => $suggestion->getFilename(), |
223 | 'projects' => $projects, |
224 | 'source' => $source, |
225 | ] ); |
226 | if ( is_array( $imageMetadata ) ) { |
227 | $images[] = new ImageRecommendationImage( |
228 | new TitleValue( NS_FILE, $filename ), |
229 | $source, |
230 | $projects, |
231 | $imageMetadata, |
232 | $sectionNumber, |
233 | $sectionTitle |
234 | ); |
235 | } else { |
236 | $status->merge( $imageMetadata ); |
237 | } |
238 | } |
239 | } else { |
240 | $status->merge( $fileMetadata ); |
241 | } |
242 | } |
243 | if ( $title instanceof ProperPageIdentity ) { |
244 | $pageIdentity = $title; |
245 | $linkTarget = Title::newFromPageIdentity( $title ); |
246 | } else { |
247 | $pageIdentity = Title::newFromLinkTarget( $title )->toPageIdentity(); |
248 | $linkTarget = $title; |
249 | } |
250 | if ( !$images && $imageSubmissionHandler ) { |
251 | $imageSubmissionHandler->invalidateRecommendation( |
252 | $taskType, |
253 | $pageIdentity, |
254 | // We need to pass a user ID for event logging purposes. We can't easily |
255 | // access a user ID here; passing 0 for an anonymous user seems OK. |
256 | 0, |
257 | null, |
258 | '', |
259 | null, |
260 | null |
261 | ); |
262 | return $status; |
263 | } |
264 | // If $status is bad but $images is not empty (fetching some but not all images failed), |
265 | // we can just ignore the errors, they won't be a problem for the recommendation workflow. |
266 | return new ImageRecommendation( $linkTarget, $images, $datasetId ); |
267 | } |
268 | |
269 | /** |
270 | * @param int $maxSuggestionsToProcess |
271 | * @return void |
272 | */ |
273 | public function setMaxSuggestionsToProcess( int $maxSuggestionsToProcess ) { |
274 | $this->maxSuggestionsToProcess = $maxSuggestionsToProcess; |
275 | } |
276 | |
277 | /** |
278 | * @param int $minimumWidth |
279 | * @param int $imageWidth |
280 | * @param string $filename |
281 | * @param string $pageTitleText |
282 | * @param StatusValue $status |
283 | * @return bool |
284 | */ |
285 | private static function hasMinimumWidth( |
286 | int $minimumWidth, |
287 | int $imageWidth, |
288 | string $filename, |
289 | string $pageTitleText, |
290 | StatusValue $status |
291 | ): bool { |
292 | $res = $imageWidth >= $minimumWidth; |
293 | if ( !$res ) { |
294 | $status->error( new RawMessage( |
295 | 'Invalid file $1 in article $2. Filtered because not wide enough: $3 (minimum $4)', |
296 | [ $filename, $pageTitleText, $imageWidth, $minimumWidth ] |
297 | ) ); |
298 | } |
299 | return $res; |
300 | } |
301 | |
302 | /** |
303 | * @param array $validMediaTypes |
304 | * @param string $mediaType |
305 | * @param string $filename |
306 | * @param string $pageTitleText |
307 | * @param StatusValue $status |
308 | * @return bool |
309 | */ |
310 | private static function isValidMediaType( |
311 | array $validMediaTypes, |
312 | string $mediaType, |
313 | string $filename, |
314 | string $pageTitleText, |
315 | StatusValue $status |
316 | ): bool { |
317 | $res = in_array( $mediaType, $validMediaTypes ); |
318 | if ( !$res ) { |
319 | $validMediaTypesText = implode( ', ', $validMediaTypes ); |
320 | $status->error( new RawMessage( |
321 | 'Invalid file $1 in article $2. Filtered because $3 is not valid mime type ($4)', |
322 | [ $filename, $pageTitleText, $mediaType, $validMediaTypesText ] |
323 | ) ); |
324 | } |
325 | return $res; |
326 | } |
327 | } |