Code Coverage
 
Lines
Functions and Methods
Classes and Traits
Total
79.17% covered (warning)
79.17%
114 / 144
16.67% covered (danger)
16.67%
1 / 6
CRAP
0.00% covered (danger)
0.00%
0 / 1
ServiceImageRecommendationProvider
79.17% covered (warning)
79.17%
114 / 144
16.67% covered (danger)
16.67%
1 / 6
35.09
0.00% covered (danger)
0.00%
0 / 1
 __construct
0.00% covered (danger)
0.00%
0 / 7
0.00% covered (danger)
0.00%
0 / 1
2
 get
87.27% covered (warning)
87.27%
48 / 55
0.00% covered (danger)
0.00%
0 / 1
9.17
 processApiResponseData
100.00% covered (success)
100.00%
66 / 66
100.00% covered (success)
100.00%
1 / 1
13
 setMaxSuggestionsToProcess
0.00% covered (danger)
0.00%
0 / 1
0.00% covered (danger)
0.00%
0 / 1
2
 hasMinimumWidth
0.00% covered (danger)
0.00%
0 / 7
0.00% covered (danger)
0.00%
0 / 1
6
 isValidMediaType
0.00% covered (danger)
0.00%
0 / 8
0.00% covered (danger)
0.00%
0 / 1
6
1<?php
2
3namespace GrowthExperiments\NewcomerTasks\AddImage;
4
5use File;
6use GrowthExperiments\NewcomerTasks\TaskType\ImageRecommendationBaseTaskType;
7use GrowthExperiments\NewcomerTasks\TaskType\TaskType;
8use MediaWiki\Api\ApiRawMessage;
9use MediaWiki\Language\RawMessage;
10use MediaWiki\Linker\LinkTarget;
11use MediaWiki\Logger\LoggerFactory;
12use MediaWiki\Page\ProperPageIdentity;
13use MediaWiki\Title\Title;
14use MediaWiki\Title\TitleFactory;
15use MediaWiki\Title\TitleValue;
16use StatusValue;
17use Wikimedia\Assert\Assert;
18use Wikimedia\Stats\StatsFactory;
19
20/**
21 * Provides image recommendations via the Image Suggestion API.
22 * @see mvp API: https://image-suggestion-api.wmcloud.org/?doc
23 * @see production API: https://wikitech.wikimedia.org/wiki/Image-suggestion
24 * @see https://phabricator.wikimedia.org/project/profile/5253/
25 */
26class ServiceImageRecommendationProvider implements ImageRecommendationProvider {
27
28    /** @var TitleFactory */
29    private $titleFactory;
30
31    private StatsFactory $statsFactory;
32
33    /** @var ImageRecommendationApiHandler */
34    private $apiHandler;
35
36    /** @var ImageRecommendationMetadataProvider */
37    private $metadataProvider;
38
39    /** @var AddImageSubmissionHandler */
40    private $imageSubmissionHandler;
41
42    /** @var bool */
43    private $geDeveloperSetup;
44
45    /** @var int */
46    private $maxSuggestionsToProcess;
47
48    /**
49     * @param TitleFactory $titleFactory
50     * @param StatsFactory $statsFactory
51     * @param ImageRecommendationApiHandler $apiHandler
52     * @param ImageRecommendationMetadataProvider $metadataProvider Image metadata provider
53     * @param AddImageSubmissionHandler $imageSubmissionHandler
54     * @param bool $geDeveloperSetup
55     * @param int $maxSuggestionsToProcess Maximum number of valid suggestions to process and return with
56     * an ImageRecommendation object.
57     */
58    public function __construct(
59        TitleFactory $titleFactory,
60        StatsFactory $statsFactory,
61        ImageRecommendationApiHandler $apiHandler,
62        ImageRecommendationMetadataProvider $metadataProvider,
63        AddImageSubmissionHandler $imageSubmissionHandler,
64        bool $geDeveloperSetup = false,
65        int $maxSuggestionsToProcess = 1
66    ) {
67        $this->titleFactory = $titleFactory;
68        $this->statsFactory = $statsFactory->withComponent( 'GrowthExperiments' );
69        $this->apiHandler = $apiHandler;
70        $this->metadataProvider = $metadataProvider;
71        $this->imageSubmissionHandler = $imageSubmissionHandler;
72        $this->geDeveloperSetup = $geDeveloperSetup;
73        $this->maxSuggestionsToProcess = $maxSuggestionsToProcess;
74    }
75
76    /** @inheritDoc */
77    public function get( LinkTarget $title, TaskType $taskType ) {
78        Assert::parameterType( ImageRecommendationBaseTaskType::class, $taskType, '$taskType' );
79        '@phan-var ImageRecommendationBaseTaskType $taskType';
80
81        $title = $this->titleFactory->newFromLinkTarget( $title );
82        $titleText = $title->getPrefixedDBkey();
83        $titleTextSafe = strip_tags( $titleText );
84        if ( !$title->exists() && !$this->geDeveloperSetup ) {
85            // These errors might show up to the end user, but provide no useful information;
86            // they are merely there to support debugging. So we keep them English-only to
87            // reduce the translator burden.
88            return StatusValue::newFatal( new RawMessage(
89                'Recommendation could not be loaded for non-existing page: $1',
90                [ $titleTextSafe ]
91            ) );
92        }
93
94        $request = $this->apiHandler->getApiRequest( $title, $taskType );
95
96        if ( $request instanceof StatusValue ) {
97            return $request;
98        }
99
100        $startTime = microtime( true );
101        $status = $request->execute();
102
103        $timing = $this->statsFactory->getTiming( 'image_recommendation_provider_seconds' );
104        $timing->setLabel( 'action', 'get' )
105            ->copyToStatsdAt( "timing.growthExperiments.imageRecommendationProvider.get" )
106            ->observe( microtime( true ) - $startTime );
107
108        if ( !$status->isOK() && $request->getStatus() < 400 ) {
109            return $status;
110        }
111        $response = $request->getContent();
112        $data = json_decode( $response, true );
113
114        if ( $data === null ) {
115            $errorMessage = __METHOD__ . ': Unable to decode JSON response for page {title}: {response}';
116            $errorContext = [ 'title' => $titleTextSafe, 'response' => $response ];
117            LoggerFactory::getInstance( 'GrowthExperiments' )->error( $errorMessage, $errorContext );
118            return StatusValue::newFatal( new RawMessage(
119                "Unable to decode JSON response for page $1: $2",
120                [ $titleTextSafe, $response ]
121            ) );
122        } elseif ( $request->getStatus() >= 400 ) {
123            return StatusValue::newFatal( new RawMessage(
124                'API returned HTTP code $1 for page $2: $3',
125                [ $request->getStatus(), $titleTextSafe, strip_tags( $data['detail'] ?? '(no reason given)' ) ]
126            ) );
127        }
128
129        $imageRecommendationDatas = $this->apiHandler->getSuggestionDataFromApiResponse( $data, $taskType );
130        if ( $imageRecommendationDatas instanceof StatusValue ) {
131            return $imageRecommendationDatas;
132        }
133
134        $startTime = microtime( true );
135        $responseData = self::processApiResponseData(
136            $taskType,
137            $title,
138            $titleText,
139            $imageRecommendationDatas,
140            $this->metadataProvider,
141            $this->imageSubmissionHandler,
142            $this->maxSuggestionsToProcess
143        );
144
145        $timing
146            ->setLabel( 'action', 'process_api_response_data' )
147            ->copyToStatsdAt(
148                "timing.growthExperiments.imageRecommendationProvider.processApiResponseData"
149            )
150            ->observe( microtime( true ) - $startTime );
151
152        return $responseData;
153    }
154
155    /**
156     * Process the data returned by the Image Suggestions API and return an ImageRecommendation
157     * or an error.
158     * @param ImageRecommendationBaseTaskType $taskType
159     * @param LinkTarget|ProperPageIdentity $title Title for which to generate the image recommendation for.
160     *   The title in the API response will be ignored.
161     * @param string $titleText Title text, for logging.
162     * @param ImageRecommendationData[] $suggestionData
163     * @param ImageRecommendationMetadataProvider $metadataProvider
164     * @param AddImageSubmissionHandler|null $imageSubmissionHandler
165     * @param int $maxSuggestionsToProcess Maximum number of valid suggestions to process and return
166     *   with an ImageRecommendation object.
167     * @return ImageRecommendation|StatusValue
168     */
169    public static function processApiResponseData(
170        ImageRecommendationBaseTaskType $taskType,
171        $title,
172        string $titleText,
173        array $suggestionData,
174        ImageRecommendationMetadataProvider $metadataProvider,
175        ?AddImageSubmissionHandler $imageSubmissionHandler,
176        int $maxSuggestionsToProcess = 1
177    ) {
178        Assert::parameterType( ImageRecommendationBaseTaskType::class, $taskType, '$taskType' );
179        '@phan-var ImageRecommendationBaseTaskType $taskType';
180
181        $suggestionFilters = $taskType->getSuggestionFilters();
182        $titleTextSafe = strip_tags( $titleText );
183        if ( count( $suggestionData ) === 0 ) {
184            return StatusValue::newGood()->error( new ApiRawMessage(
185                // Keep in sync with Util::STATSD_INCREMENTABLE_ERROR_MESSAGES
186                [ 'No recommendation found for page: $1', $titleTextSafe ],
187                'growthexperiments-no-recommendation-found'
188            ) );
189        }
190        $images = [];
191        $datasetId = '';
192        $status = StatusValue::newGood();
193        foreach ( $suggestionData as $suggestion ) {
194            if ( count( $images ) >= $maxSuggestionsToProcess ) {
195                break;
196            }
197            $validationError = ImageRecommendationDataValidator::validate( $titleTextSafe, $suggestion );
198            if ( !$validationError->isGood() ) {
199                $status->merge( $validationError );
200                continue;
201            }
202
203            $filename = File::normalizeTitle( $suggestion->getFilename() )->getDBkey();
204            $source = $suggestion->getSource();
205            $projects = $suggestion->getFormattedProjects();
206            $datasetId = $suggestion->getDatasetId();
207            $sectionNumber = $suggestion->getSectionNumber();
208            $sectionTitle = $suggestion->getSectionTitle();
209            $fileMetadata = $metadataProvider->getFileMetadata( $filename );
210
211            if ( is_array( $fileMetadata ) ) {
212                $imageWidth = $fileMetadata['originalWidth'] ?: 0;
213                $minWidth = $suggestionFilters['minimumSize']['width'] ?? 0;
214                $validMediaTypes = $suggestionFilters['validMediaTypes'];
215                if (
216                    self::hasMinimumWidth( $minWidth, $imageWidth, $filename, $titleTextSafe, $status ) &&
217                    self::isValidMediaType(
218                        $validMediaTypes, $fileMetadata['mediaType'], $filename, $titleTextSafe, $status
219                    )
220                ) {
221                    $imageMetadata = $metadataProvider->getMetadata( [
222                        'filename' => $suggestion->getFilename(),
223                        'projects' => $projects,
224                        'source' => $source,
225                    ] );
226                    if ( is_array( $imageMetadata ) ) {
227                        $images[] = new ImageRecommendationImage(
228                            new TitleValue( NS_FILE, $filename ),
229                            $source,
230                            $projects,
231                            $imageMetadata,
232                            $sectionNumber,
233                            $sectionTitle
234                        );
235                    } else {
236                        $status->merge( $imageMetadata );
237                    }
238                }
239            } else {
240                $status->merge( $fileMetadata );
241            }
242        }
243        if ( $title instanceof ProperPageIdentity ) {
244            $pageIdentity = $title;
245            $linkTarget = Title::newFromPageIdentity( $title );
246        } else {
247            $pageIdentity = Title::newFromLinkTarget( $title )->toPageIdentity();
248            $linkTarget = $title;
249        }
250        if ( !$images && $imageSubmissionHandler ) {
251            $imageSubmissionHandler->invalidateRecommendation(
252                $taskType,
253                $pageIdentity,
254                // We need to pass a user ID for event logging purposes. We can't easily
255                // access a user ID here; passing 0 for an anonymous user seems OK.
256                0,
257                null,
258                '',
259                null,
260                null
261            );
262            return $status;
263        }
264        // If $status is bad but $images is not empty (fetching some but not all images failed),
265        // we can just ignore the errors, they won't be a problem for the recommendation workflow.
266        return new ImageRecommendation( $linkTarget, $images, $datasetId );
267    }
268
269    /**
270     * @param int $maxSuggestionsToProcess
271     * @return void
272     */
273    public function setMaxSuggestionsToProcess( int $maxSuggestionsToProcess ) {
274        $this->maxSuggestionsToProcess = $maxSuggestionsToProcess;
275    }
276
277    /**
278     * @param int $minimumWidth
279     * @param int $imageWidth
280     * @param string $filename
281     * @param string $pageTitleText
282     * @param StatusValue $status
283     * @return bool
284     */
285    private static function hasMinimumWidth(
286        int $minimumWidth,
287        int $imageWidth,
288        string $filename,
289        string $pageTitleText,
290        StatusValue $status
291    ): bool {
292        $res = $imageWidth >= $minimumWidth;
293        if ( !$res ) {
294            $status->error( new RawMessage(
295                'Invalid file $1 in article $2. Filtered because not wide enough: $3 (minimum $4)',
296                [ $filename, $pageTitleText, $imageWidth, $minimumWidth ]
297            ) );
298        }
299        return $res;
300    }
301
302    /**
303     * @param array $validMediaTypes
304     * @param string $mediaType
305     * @param string $filename
306     * @param string $pageTitleText
307     * @param StatusValue $status
308     * @return bool
309     */
310    private static function isValidMediaType(
311        array $validMediaTypes,
312        string $mediaType,
313        string $filename,
314        string $pageTitleText,
315        StatusValue $status
316    ): bool {
317        $res = in_array( $mediaType, $validMediaTypes );
318        if ( !$res ) {
319            $validMediaTypesText = implode( ', ', $validMediaTypes );
320            $status->error( new RawMessage(
321                'Invalid file $1 in article $2. Filtered because $3 is not valid mime type ($4)',
322                [ $filename, $pageTitleText, $mediaType, $validMediaTypesText ]
323            ) );
324        }
325        return $res;
326    }
327}