Code Coverage
 
Lines
Functions and Methods
Classes and Traits
Total
57.01% covered (warning)
57.01%
61 / 107
50.00% covered (danger)
50.00%
3 / 6
CRAP
0.00% covered (danger)
0.00%
0 / 1
ProductionImageRecommendationApiHandler
57.01% covered (warning)
57.01%
61 / 107
50.00% covered (danger)
50.00%
3 / 6
51.78
0.00% covered (danger)
0.00%
0 / 1
 __construct
100.00% covered (success)
100.00%
7 / 7
100.00% covered (success)
100.00%
1 / 1
1
 getApiRequest
0.00% covered (danger)
0.00%
0 / 15
0.00% covered (danger)
0.00%
0 / 1
20
 getSuggestionDataFromApiResponse
100.00% covered (success)
100.00%
45 / 45
100.00% covered (success)
100.00%
1 / 1
9
 getArticleIdFromTitle
0.00% covered (danger)
0.00%
0 / 18
0.00% covered (danger)
0.00%
0 / 1
12
 getRequest
0.00% covered (danger)
0.00%
0 / 13
0.00% covered (danger)
0.00%
0 / 1
2
 sortSuggestions
100.00% covered (success)
100.00%
9 / 9
100.00% covered (success)
100.00%
1 / 1
2
1<?php
2
3namespace GrowthExperiments\NewcomerTasks\AddImage;
4
5use GrowthExperiments\NewcomerTasks\TaskType\ImageRecommendationTaskTypeHandler;
6use GrowthExperiments\NewcomerTasks\TaskType\SectionImageRecommendationTaskTypeHandler;
7use GrowthExperiments\NewcomerTasks\TaskType\TaskType;
8use GrowthExperiments\Util;
9use GrowthExperiments\WikiConfigException;
10use MediaWiki\Http\HttpRequestFactory;
11use MediaWiki\Title\Title;
12use MWHttpRequest;
13use RequestContext;
14use StatusValue;
15use Wikimedia\UUID\GlobalIdGenerator;
16
17/**
18 * Handler for production image suggestion API.
19 * phpcs:ignore Generic.Files.LineLength.TooLong
20 * Documentation: https://www.mediawiki.org/wiki/Platform_Engineering_Team/Data_Value_Stream/Data_Gateway#Image_Suggestions
21 * Configuration of constructor parameters:
22 * - $url: GEImageRecommendationServiceUrl
23 * - $wiki: GEImageRecommendationServiceWikiIdMasquerade (or the actual wiki ID if not set)
24 * - $useTitles: GEImageRecommendationServiceUseTitles
25 * - $shouldVerifySsl: opposite of GEDeveloperSetup
26 */
27class ProductionImageRecommendationApiHandler implements ImageRecommendationApiHandler {
28
29    /** @var HttpRequestFactory */
30    private $httpRequestFactory;
31
32    /** @var string */
33    private $url;
34
35    /** @var string */
36    private $wikiId;
37
38    /** @var GlobalIdGenerator */
39    private $globalIdGenerator;
40
41    /** @var int|null */
42    private $requestTimeout;
43
44    /** @var bool */
45    private $useTitles;
46
47    /** @var bool */
48    private $shouldVerifySsl;
49
50    private const KIND_TO_SOURCE = [
51        'istype-lead-image' => ImageRecommendationImage::SOURCE_WIKIPEDIA,
52        'istype-wikidata-image' => ImageRecommendationImage::SOURCE_WIKIDATA,
53        'istype-commons-category' => ImageRecommendationImage::SOURCE_COMMONS,
54        'istype-section-topics' => ImageRecommendationImage::SOURCE_WIKIDATA_SECTION_TOPICS,
55        'istype-section-topics-p18' => ImageRecommendationImage::SOURCE_WIKIDATA_SECTION_TOPICS,
56        'istype-section-alignment' => ImageRecommendationImage::SOURCE_WIKIDATA_SECTION_ALIGNMENT,
57        // WIKIDATA_SECTION_INTERSECTION is handled by one-off code as it's based on two kinds
58        'istype-depicts' => 'unknown',
59    ];
60
61    // FIXME not used for now as kinds change too often.
62    private const KIND_TO_TASK_TYPE_ID = [
63        'istype-lead-image' => ImageRecommendationTaskTypeHandler::TASK_TYPE_ID,
64        'istype-wikidata-image' => ImageRecommendationTaskTypeHandler::TASK_TYPE_ID,
65        'istype-commons-category' => ImageRecommendationTaskTypeHandler::TASK_TYPE_ID,
66        'istype-section-topics' => SectionImageRecommendationTaskTypeHandler::TASK_TYPE_ID,
67        'istype-section-topics-p18' => SectionImageRecommendationTaskTypeHandler::TASK_TYPE_ID,
68        'istype-section-alignment' => SectionImageRecommendationTaskTypeHandler::TASK_TYPE_ID,
69        'istype-depicts' => 'ignored',
70    ];
71
72    /**
73     * @param HttpRequestFactory $httpRequestFactory
74     * @param string $url Image recommendation service root URL
75     * @param string $wikiId Project ID (for example, 'enwiki')
76     * @param GlobalIdGenerator $globalIdGenerator GlobalIdGenerator, used to convert UUID to timestamp
77     *     when sorting the suggestions
78     * @param int|null $requestTimeout Service request timeout in seconds
79     * @param bool $useTitles Query image suggestions by title instead of by article ID;
80     *     used in non-production environments
81     * @param bool $shouldVerifySsl Whether the HTTP requests should verify SSL certificate and host
82     */
83    public function __construct(
84        HttpRequestFactory $httpRequestFactory,
85        string $url,
86        string $wikiId,
87        GlobalIdGenerator $globalIdGenerator,
88        ?int $requestTimeout,
89        bool $useTitles = false,
90        bool $shouldVerifySsl = true
91    ) {
92        $this->httpRequestFactory = $httpRequestFactory;
93        $this->url = $url;
94        $this->wikiId = $wikiId;
95        $this->globalIdGenerator = $globalIdGenerator;
96        $this->requestTimeout = $requestTimeout;
97        $this->useTitles = $useTitles;
98        $this->shouldVerifySsl = $shouldVerifySsl;
99    }
100
101    /** @inheritDoc */
102    public function getApiRequest( Title $title, TaskType $taskType ) {
103        if ( !$this->url ) {
104            return StatusValue::newFatal( 'rawmessage',
105                'Image Suggestions API URL is not configured' );
106        }
107
108        $articleId = $this->useTitles ?
109            $this->getArticleIdFromTitle( $title ) :
110            $title->getArticleID();
111
112        if ( $articleId instanceof StatusValue ) {
113            return $articleId;
114        }
115
116        return $this->getRequest( [
117            'public',
118            'image_suggestions',
119            'suggestions',
120            $this->wikiId,
121            $articleId
122        ] );
123    }
124
125    /** @inheritDoc */
126    public function getSuggestionDataFromApiResponse( array $apiResponse, TaskType $taskType ): array {
127        if ( !$apiResponse['rows'] ) {
128            return [];
129        }
130        $imageData = [];
131        $sortedSuggestions = $this->sortSuggestions( $apiResponse['rows'] );
132        // Since the suggestions are sorted based on the dataset ID, the id of the first suggestion
133        // is that of the most recent dataset.
134        $validDatasetId = $sortedSuggestions[0]['id'] ?? '';
135
136        foreach ( $sortedSuggestions as $suggestion ) {
137            // Discard suggestions from other datasets
138            if ( $suggestion['id'] !== $validDatasetId ) {
139                break;
140            }
141
142            // Ideally we'd have a list of kinds relevant for each task type but kinds are
143            // still in flux. Just treat everything with a non-null section_heading as a
144            // section-level recommendation.
145            $recommendationTaskTypeId = isset( $suggestion['section_heading'] ) ?
146                SectionImageRecommendationTaskTypeHandler::TASK_TYPE_ID :
147                ImageRecommendationTaskTypeHandler::TASK_TYPE_ID;
148            if ( $recommendationTaskTypeId !== $taskType->getId() ) {
149                continue;
150            }
151
152            $knownKinds = array_values( array_intersect( $suggestion['kind'], array_keys( self::KIND_TO_SOURCE ) ) );
153            foreach ( array_diff( $suggestion['kind'], $knownKinds ) as $unknownKind ) {
154                Util::logException( new WikiConfigException(
155                    "Unknown image suggestions API kind: $unknownKind"
156                ), [
157                    'page_id' => $suggestion['page_id'] ?? 0,
158                    'dataset-id' => $suggestion['id'] ?? 'unknown',
159                ] );
160            }
161            if ( $knownKinds ) {
162                $knownSources = array_map( fn ( $kind ) => self::KIND_TO_SOURCE[$kind], $knownKinds );
163                $intersectionSources = [
164                    ImageRecommendationImage::SOURCE_WIKIDATA_SECTION_TOPICS,
165                    ImageRecommendationImage::SOURCE_WIKIDATA_SECTION_ALIGNMENT,
166                ];
167                if ( array_diff( $intersectionSources, $knownSources ) === [] ) {
168                    $source = ImageRecommendationImage::SOURCE_WIKIDATA_SECTION_INTERSECTION;
169                } else {
170                    $source = self::KIND_TO_SOURCE[ $knownKinds[0] ];
171                }
172            } else {
173                // FIXME we should probably ignore unknown types of suggestions once the API is more stable
174                $source = [
175                    ImageRecommendationTaskTypeHandler::TASK_TYPE_ID
176                        => ImageRecommendationImage::SOURCE_WIKIDATA,
177                    SectionImageRecommendationTaskTypeHandler::TASK_TYPE_ID
178                        => ImageRecommendationImage::SOURCE_WIKIDATA_SECTION_TOPICS,
179                ][ $taskType->getId()];
180            }
181
182            $imageData[] = new ImageRecommendationData(
183                $suggestion['image'],
184                $source,
185                implode( ',', $suggestion['found_on'] ?? [] ),
186                $suggestion['id'],
187                $suggestion['section_index'],
188                $suggestion['section_heading'],
189            );
190        }
191        return $imageData;
192    }
193
194    /**
195     * Get the production article ID for the given title.
196     * The API retrieves image suggestions for a given production article ID, so for non-production
197     * environments, the title needs to be mapped to the corresponding production ID.
198     *
199     * @param Title $title
200     * @return StatusValue|int
201     */
202    private function getArticleIdFromTitle( Title $title ) {
203        $titleText = $title->getDBkey();
204        $request = $this->getRequest( [
205            'private',
206            'image_suggestions',
207            'title_cache',
208            $this->wikiId,
209            $titleText
210        ] );
211        $status = $request->execute();
212        if ( !$status->isOK() ) {
213            return StatusValue::newFatal( 'rawmessage',
214                'Failed to fetch production article ID for ' . $titleText );
215        }
216        $responseData = json_decode( $request->getContent(), true );
217        $articleData = $responseData['rows'][0] ?? [];
218        if ( array_key_exists( 'page_id', $articleData ) ) {
219            return $articleData['page_id'];
220        }
221        return StatusValue::newFatal( 'rawmessage',
222            'Invalid response from title_cache for ' . $titleText );
223    }
224
225    private function getRequest( array $pathArgs = [] ): MWHttpRequest {
226        $request = $this->httpRequestFactory->create(
227            $this->url . '/' . implode( '/', array_map( 'rawurlencode', $pathArgs ) ),
228            [
229                'method' => 'GET',
230                'originalRequest' => RequestContext::getMain()->getRequest(),
231                'timeout' => $this->requestTimeout,
232                'sslVerifyCert' => $this->shouldVerifySsl,
233                'sslVerifyHost' => $this->shouldVerifySsl,
234            ],
235            __METHOD__
236        );
237        $request->setHeader( 'Accept', 'application/json' );
238        return $request;
239    }
240
241    /**
242     * Sort the suggestions in decreasing order based on confidence and timestamp
243     *
244     * @param array $suggestions
245     * @return array
246     */
247    private function sortSuggestions( array $suggestions ): array {
248        // Sort by newer dataset with the highest confidence
249        $compare = function ( array $a, array $b ) {
250            $confidenceA = $a['confidence'] ?? 0;
251            $confidenceB = $b['confidence'] ?? 0;
252            $timestampA = $this->globalIdGenerator->getTimestampFromUUIDv1( $a['id'] ?? '' );
253            $timestampB = $this->globalIdGenerator->getTimestampFromUUIDv1( $b['id'] ?? '' );
254
255            return $timestampB <=> $timestampA ?: $confidenceB <=> $confidenceA;
256        };
257        usort( $suggestions, $compare );
258        return $suggestions;
259    }
260}