Code Coverage
 
Lines
Functions and Methods
Classes and Traits
Total
78.87% covered (warning)
78.87%
112 / 142
16.67% covered (danger)
16.67%
1 / 6
CRAP
0.00% covered (danger)
0.00%
0 / 1
ServiceImageRecommendationProvider
78.87% covered (warning)
78.87%
112 / 142
16.67% covered (danger)
16.67%
1 / 6
35.39
0.00% covered (danger)
0.00%
0 / 1
 __construct
0.00% covered (danger)
0.00%
0 / 7
0.00% covered (danger)
0.00%
0 / 1
2
 get
86.79% covered (warning)
86.79%
46 / 53
0.00% covered (danger)
0.00%
0 / 1
9.19
 processApiResponseData
100.00% covered (success)
100.00%
66 / 66
100.00% covered (success)
100.00%
1 / 1
13
 setMaxSuggestionsToProcess
0.00% covered (danger)
0.00%
0 / 1
0.00% covered (danger)
0.00%
0 / 1
2
 hasMinimumWidth
0.00% covered (danger)
0.00%
0 / 7
0.00% covered (danger)
0.00%
0 / 1
6
 isValidMediaType
0.00% covered (danger)
0.00%
0 / 8
0.00% covered (danger)
0.00%
0 / 1
6
1<?php
2
3namespace GrowthExperiments\NewcomerTasks\AddImage;
4
5use ApiRawMessage;
6use File;
7use GrowthExperiments\NewcomerTasks\TaskType\ImageRecommendationBaseTaskType;
8use GrowthExperiments\NewcomerTasks\TaskType\TaskType;
9use IBufferingStatsdDataFactory;
10use MediaWiki\Language\RawMessage;
11use MediaWiki\Linker\LinkTarget;
12use MediaWiki\Logger\LoggerFactory;
13use MediaWiki\Page\ProperPageIdentity;
14use MediaWiki\Title\Title;
15use MediaWiki\Title\TitleFactory;
16use MediaWiki\Title\TitleValue;
17use StatusValue;
18use Wikimedia\Assert\Assert;
19
20/**
21 * Provides image recommendations via the Image Suggestion API.
22 * @see mvp API: https://image-suggestion-api.wmcloud.org/?doc
23 * @see production API: https://wikitech.wikimedia.org/wiki/Image-suggestion
24 * @see https://phabricator.wikimedia.org/project/profile/5253/
25 */
26class ServiceImageRecommendationProvider implements ImageRecommendationProvider {
27
28    /** @var TitleFactory */
29    private $titleFactory;
30
31    /** @var IBufferingStatsdDataFactory */
32    private $statsdDataFactory;
33
34    /** @var ImageRecommendationApiHandler */
35    private $apiHandler;
36
37    /** @var ImageRecommendationMetadataProvider */
38    private $metadataProvider;
39
40    /** @var AddImageSubmissionHandler */
41    private $imageSubmissionHandler;
42
43    /** @var bool */
44    private $geDeveloperSetup;
45
46    /** @var int */
47    private $maxSuggestionsToProcess;
48
49    /**
50     * @param TitleFactory $titleFactory
51     * @param IBufferingStatsdDataFactory $statsdDataFactory
52     * @param ImageRecommendationApiHandler $apiHandler
53     * @param ImageRecommendationMetadataProvider $metadataProvider Image metadata provider
54     * @param AddImageSubmissionHandler $imageSubmissionHandler
55     * @param bool $geDeveloperSetup
56     * @param int $maxSuggestionsToProcess Maximum number of valid suggestions to process and return with
57     * an ImageRecommendation object.
58     */
59    public function __construct(
60        TitleFactory $titleFactory,
61        IBufferingStatsdDataFactory $statsdDataFactory,
62        ImageRecommendationApiHandler $apiHandler,
63        ImageRecommendationMetadataProvider $metadataProvider,
64        AddImageSubmissionHandler $imageSubmissionHandler,
65        bool $geDeveloperSetup = false,
66        int $maxSuggestionsToProcess = 1
67    ) {
68        $this->titleFactory = $titleFactory;
69        $this->statsdDataFactory = $statsdDataFactory;
70        $this->apiHandler = $apiHandler;
71        $this->metadataProvider = $metadataProvider;
72        $this->imageSubmissionHandler = $imageSubmissionHandler;
73        $this->geDeveloperSetup = $geDeveloperSetup;
74        $this->maxSuggestionsToProcess = $maxSuggestionsToProcess;
75    }
76
77    /** @inheritDoc */
78    public function get( LinkTarget $title, TaskType $taskType ) {
79        Assert::parameterType( ImageRecommendationBaseTaskType::class, $taskType, '$taskType' );
80        '@phan-var ImageRecommendationBaseTaskType $taskType';
81
82        $title = $this->titleFactory->newFromLinkTarget( $title );
83        $titleText = $title->getPrefixedDBkey();
84        $titleTextSafe = strip_tags( $titleText );
85        if ( !$title->exists() && !$this->geDeveloperSetup ) {
86            // These errors might show up to the end user, but provide no useful information;
87            // they are merely there to support debugging. So we keep them English-only to
88            // reduce the translator burden.
89            return StatusValue::newFatal( new RawMessage(
90                'Recommendation could not be loaded for non-existing page: $1',
91                [ $titleTextSafe ]
92            ) );
93        }
94
95        $request = $this->apiHandler->getApiRequest( $title, $taskType );
96
97        if ( $request instanceof StatusValue ) {
98            return $request;
99        }
100
101        $startTime = microtime( true );
102        $status = $request->execute();
103
104        $this->statsdDataFactory->timing(
105            'timing.growthExperiments.imageRecommendationProvider.get',
106            microtime( true ) - $startTime
107        );
108
109        if ( !$status->isOK() && $request->getStatus() < 400 ) {
110            return $status;
111        }
112        $response = $request->getContent();
113        $data = json_decode( $response, true );
114
115        if ( $data === null ) {
116            $errorMessage = __METHOD__ . ': Unable to decode JSON response for page {title}: {response}';
117            $errorContext = [ 'title' => $titleTextSafe, 'response' => $response ];
118            LoggerFactory::getInstance( 'GrowthExperiments' )->error( $errorMessage, $errorContext );
119            return StatusValue::newFatal( new RawMessage(
120                "Unable to decode JSON response for page $1: $2",
121                [ $titleTextSafe, $response ]
122            ) );
123        } elseif ( $request->getStatus() >= 400 ) {
124            return StatusValue::newFatal( new RawMessage(
125                'API returned HTTP code $1 for page $2: $3',
126                [ $request->getStatus(), $titleTextSafe, strip_tags( $data['detail'] ?? '(no reason given)' ) ]
127            ) );
128        }
129
130        $imageRecommendationDatas = $this->apiHandler->getSuggestionDataFromApiResponse( $data, $taskType );
131        if ( $imageRecommendationDatas instanceof StatusValue ) {
132            return $imageRecommendationDatas;
133        }
134
135        $startTime = microtime( true );
136        $responseData = self::processApiResponseData(
137            $taskType,
138            $title,
139            $titleText,
140            $imageRecommendationDatas,
141            $this->metadataProvider,
142            $this->imageSubmissionHandler,
143            $this->maxSuggestionsToProcess
144        );
145
146        $this->statsdDataFactory->timing(
147            'timing.growthExperiments.imageRecommendationProvider.processApiResponseData',
148            microtime( true ) - $startTime
149        );
150
151        return $responseData;
152    }
153
154    /**
155     * Process the data returned by the Image Suggestions API and return an ImageRecommendation
156     * or an error.
157     * @param ImageRecommendationBaseTaskType $taskType
158     * @param LinkTarget|ProperPageIdentity $title Title for which to generate the image recommendation for.
159     *   The title in the API response will be ignored.
160     * @param string $titleText Title text, for logging.
161     * @param ImageRecommendationData[] $suggestionData
162     * @param ImageRecommendationMetadataProvider $metadataProvider
163     * @param AddImageSubmissionHandler|null $imageSubmissionHandler
164     * @param int $maxSuggestionsToProcess Maximum number of valid suggestions to process and return
165     *   with an ImageRecommendation object.
166     * @return ImageRecommendation|StatusValue
167     */
168    public static function processApiResponseData(
169        ImageRecommendationBaseTaskType $taskType,
170        $title,
171        string $titleText,
172        array $suggestionData,
173        ImageRecommendationMetadataProvider $metadataProvider,
174        ?AddImageSubmissionHandler $imageSubmissionHandler,
175        int $maxSuggestionsToProcess = 1
176    ) {
177        Assert::parameterType( ImageRecommendationBaseTaskType::class, $taskType, '$taskType' );
178        '@phan-var ImageRecommendationBaseTaskType $taskType';
179
180        $suggestionFilters = $taskType->getSuggestionFilters();
181        $titleTextSafe = strip_tags( $titleText );
182        if ( count( $suggestionData ) === 0 ) {
183            return StatusValue::newGood()->error( new ApiRawMessage(
184                // Keep in sync with Util::STATSD_INCREMENTABLE_ERROR_MESSAGES
185                [ 'No recommendation found for page: $1', $titleTextSafe ],
186                'growthexperiments-no-recommendation-found'
187            ) );
188        }
189        $images = [];
190        $datasetId = '';
191        $status = StatusValue::newGood();
192        foreach ( $suggestionData as $suggestion ) {
193            if ( count( $images ) >= $maxSuggestionsToProcess ) {
194                break;
195            }
196            $validationError = ImageRecommendationDataValidator::validate( $titleTextSafe, $suggestion );
197            if ( !$validationError->isGood() ) {
198                $status->merge( $validationError );
199                continue;
200            }
201
202            $filename = File::normalizeTitle( $suggestion->getFilename() )->getDBkey();
203            $source = $suggestion->getSource();
204            $projects = $suggestion->getFormattedProjects();
205            $datasetId = $suggestion->getDatasetId();
206            $sectionNumber = $suggestion->getSectionNumber();
207            $sectionTitle = $suggestion->getSectionTitle();
208            $fileMetadata = $metadataProvider->getFileMetadata( $filename );
209
210            if ( is_array( $fileMetadata ) ) {
211                $imageWidth = $fileMetadata['originalWidth'] ?: 0;
212                $minWidth = $suggestionFilters['minimumSize']['width'] ?? 0;
213                $validMediaTypes = $suggestionFilters['validMediaTypes'];
214                if (
215                    self::hasMinimumWidth( $minWidth, $imageWidth, $filename, $titleTextSafe, $status ) &&
216                    self::isValidMediaType(
217                        $validMediaTypes, $fileMetadata['mediaType'], $filename, $titleTextSafe, $status
218                    )
219                ) {
220                    $imageMetadata = $metadataProvider->getMetadata( [
221                        'filename' => $suggestion->getFilename(),
222                        'projects' => $projects,
223                        'source' => $source,
224                    ] );
225                    if ( is_array( $imageMetadata ) ) {
226                        $images[] = new ImageRecommendationImage(
227                            new TitleValue( NS_FILE, $filename ),
228                            $source,
229                            $projects,
230                            $imageMetadata,
231                            $sectionNumber,
232                            $sectionTitle
233                        );
234                    } else {
235                        $status->merge( $imageMetadata );
236                    }
237                }
238            } else {
239                $status->merge( $fileMetadata );
240            }
241        }
242        if ( $title instanceof ProperPageIdentity ) {
243            $pageIdentity = $title;
244            $linkTarget = Title::newFromPageIdentity( $title );
245        } else {
246            $pageIdentity = Title::newFromLinkTarget( $title )->toPageIdentity();
247            $linkTarget = $title;
248        }
249        if ( !$images && $imageSubmissionHandler ) {
250            $imageSubmissionHandler->invalidateRecommendation(
251                $taskType,
252                $pageIdentity,
253                // We need to pass a user ID for event logging purposes. We can't easily
254                // access a user ID here; passing 0 for an anonymous user seems OK.
255                0,
256                null,
257                '',
258                null,
259                null
260            );
261            return $status;
262        }
263        // If $status is bad but $images is not empty (fetching some but not all images failed),
264        // we can just ignore the errors, they won't be a problem for the recommendation workflow.
265        return new ImageRecommendation( $linkTarget, $images, $datasetId );
266    }
267
268    /**
269     * @param int $maxSuggestionsToProcess
270     * @return void
271     */
272    public function setMaxSuggestionsToProcess( int $maxSuggestionsToProcess ) {
273        $this->maxSuggestionsToProcess = $maxSuggestionsToProcess;
274    }
275
276    /**
277     * @param int $minimumWidth
278     * @param int $imageWidth
279     * @param string $filename
280     * @param string $pageTitleText
281     * @param StatusValue $status
282     * @return bool
283     */
284    private static function hasMinimumWidth(
285        int $minimumWidth,
286        int $imageWidth,
287        string $filename,
288        string $pageTitleText,
289        StatusValue $status
290    ): bool {
291        $res = $imageWidth >= $minimumWidth;
292        if ( !$res ) {
293            $status->error( new RawMessage(
294                'Invalid file $1 in article $2. Filtered because not wide enough: $3 (minimum $4)',
295                [ $filename, $pageTitleText, $imageWidth, $minimumWidth ]
296            ) );
297        }
298        return $res;
299    }
300
301    /**
302     * @param array $validMediaTypes
303     * @param string $mediaType
304     * @param string $filename
305     * @param string $pageTitleText
306     * @param StatusValue $status
307     * @return bool
308     */
309    private static function isValidMediaType(
310        array $validMediaTypes,
311        string $mediaType,
312        string $filename,
313        string $pageTitleText,
314        StatusValue $status
315    ): bool {
316        $res = in_array( $mediaType, $validMediaTypes );
317        if ( !$res ) {
318            $validMediaTypesText = implode( ', ', $validMediaTypes );
319            $status->error( new RawMessage(
320                'Invalid file $1 in article $2. Filtered because $3 is not valid mime type ($4)',
321                [ $filename, $pageTitleText, $mediaType, $validMediaTypesText ]
322            ) );
323        }
324        return $res;
325    }
326}