Code Coverage
 
Lines
Functions and Methods
Classes and Traits
Total
98.98% covered (success)
98.98%
194 / 196
88.89% covered (warning)
88.89%
8 / 9
CRAP
0.00% covered (danger)
0.00%
0 / 1
Notifier
98.98% covered (success)
98.98%
194 / 196
88.89% covered (warning)
88.89%
8 / 9
30
0.00% covered (danger)
0.00%
0 / 1
 __construct
n/a
0 / 0
n/a
0 / 0
1
 createSearchAfter
100.00% covered (success)
100.00%
34 / 34
100.00% covered (success)
100.00%
1 / 1
2
 doSearch
100.00% covered (success)
100.00%
2 / 2
100.00% covered (success)
100.00%
1 / 1
1
 run
95.12% covered (success)
95.12%
39 / 41
0.00% covered (danger)
0.00%
0 / 1
8
 getSectionHeading
100.00% covered (success)
100.00%
1 / 1
100.00% covered (success)
100.00%
1 / 1
1
 isArticleLevelSuggestion
100.00% covered (success)
100.00%
1 / 1
100.00% covered (success)
100.00%
1 / 1
1
 isSectionLevelSuggestion
100.00% covered (success)
100.00%
1 / 1
100.00% covered (success)
100.00%
1 / 1
1
 getMediaUrl
100.00% covered (success)
100.00%
5 / 5
100.00% covered (success)
100.00%
1 / 1
1
 getSuggestions
100.00% covered (success)
100.00%
54 / 54
100.00% covered (success)
100.00%
1 / 1
8
 getUserForTitle
100.00% covered (success)
100.00%
57 / 57
100.00% covered (success)
100.00%
1 / 1
6
1<?php
2
3namespace MediaWiki\Extension\ImageSuggestions;
4
5use CirrusSearch\Connection;
6use CirrusSearch\Elastica\SearchAfter;
7use CirrusSearch\SearchConfig;
8use CirrusSearch\Wikimedia\WeightedTagsHooks;
9use Elastica\Query;
10use Elastica\Query\BoolQuery;
11use Elastica\Query\MatchQuery;
12use Elastica\ResultSet;
13use Elastica\Search;
14use MediaWiki\Config\Config;
15use MediaWiki\Title\NamespaceInfo;
16use MediaWiki\Title\Title;
17use MediaWiki\Title\TitleFactory;
18use MediaWiki\User\Options\UserOptionsLookup;
19use MediaWiki\User\UserFactory;
20use MediaWiki\User\UserIdentity;
21use MediaWiki\WikiMap\WikiMap;
22use Psr\Log\LoggerInterface;
23use Wikimedia\Http\MultiHttpClient;
24use Wikimedia\Rdbms\IReadableDatabase;
25use Wikimedia\Rdbms\SelectQueryBuilder;
26
27class Notifier {
28    private MultiHttpClient $multiHttpClient;
29    private UserFactory $userFactory;
30    private UserOptionsLookup $userOptionsLookup;
31    private NamespaceInfo $namespaceInfo;
32    private string $suggestionsUri;
33    private string $instanceOfUri;
34    private LoggerInterface $logger;
35    private IReadableDatabase $dbr;
36    private IReadableDatabase $dbrEcho;
37    private Config $searchConfig;
38    private Connection $searchConnection;
39    private TitleFactory $titleFactory;
40    private NotificationHelper $notificationHelper;
41    private WikiMapHelper $wikiMapHelper;
42    private array $jobParams;
43
44    private SearchAfter $searchAfter;
45
46    public const MAX_SECTION_SUGGESTIONS_PER_NOTIFICATION = 5;
47
48    /**
49     * @codeCoverageIgnore
50     */
51    public function __construct(
52        string $suggestionsUri,
53        string $instanceOfUri,
54        MultiHttpClient $multiHttpClient,
55        UserFactory $userFactory,
56        UserOptionsLookup $userOptionsLookup,
57        NamespaceInfo $namespaceInfo,
58        IReadableDatabase $mainDbConnection,
59        IReadableDatabase $echoDbConnection,
60        LoggerInterface $logger,
61        Config $searchConfig,
62        Connection $searchConnection,
63        TitleFactory $titleFactory,
64        NotificationHelper $notificationHelper,
65        WikiMapHelper $wikiMapHelper,
66        array $jobParams
67    ) {
68        $this->suggestionsUri = $suggestionsUri;
69        $this->instanceOfUri = $instanceOfUri;
70
71        $this->multiHttpClient = $multiHttpClient;
72        $this->userFactory = $userFactory;
73        $this->userOptionsLookup = $userOptionsLookup;
74        $this->namespaceInfo = $namespaceInfo;
75
76        $this->dbr = $mainDbConnection;
77        $this->dbrEcho = $echoDbConnection;
78        $this->logger = $logger;
79
80        $this->searchConfig = $searchConfig;
81        $this->searchConnection = $searchConnection;
82        $this->titleFactory = $titleFactory;
83        $this->notificationHelper = $notificationHelper;
84        $this->wikiMapHelper = $wikiMapHelper;
85
86        $this->jobParams = [ 'numPages' => 0 ] + $jobParams;
87
88        $this->searchAfter = $this->createSearchAfter();
89    }
90
91    private function createSearchAfter(): SearchAfter {
92        $searchClient = $this->searchConnection->getClient();
93        $searchIndex = $this->searchConnection->getIndex(
94            $this->searchConfig->get( SearchConfig::INDEX_BASE_NAME ),
95            $this->searchConnection->pickIndexSuffixForNamespaces(
96                $this->searchConfig->get( 'ContentNamespaces' )
97            )
98        );
99
100        $articleImageQuery = new MatchQuery();
101        $articleImageQuery->setFieldQuery(
102            WeightedTagsHooks::FIELD_NAME,
103            'recommendation.image/exists'
104        );
105        $sectionImageQuery = new MatchQuery();
106        $sectionImageQuery->setFieldQuery(
107            WeightedTagsHooks::FIELD_NAME,
108            'recommendation.image_section/exists'
109        );
110        $bool = new BoolQuery();
111        $bool->addShould( $articleImageQuery );
112        $bool->addShould( $sectionImageQuery );
113        $bool->setMinimumShouldMatch( 1 );
114
115        $query = new Query();
116        $query->setQuery( $bool );
117        $query->setSize( $this->jobParams['batchSize'] );
118        $query->setSource( false );
119        $query->setSort( [ 'page_id' ] );
120        $query->setStoredFields( [ '_id' ] );
121
122        $search = new Search( $searchClient );
123        $search->setQuery( $query );
124        $search->addIndex( $searchIndex );
125
126        $searchAfter = new SearchAfter( $search );
127        if ( $this->jobParams['lastPageId'] > 0 ) {
128            $searchAfter->initializeSearchAfter( [ $this->jobParams['lastPageId'] ] );
129        }
130        return $searchAfter;
131    }
132
133    private function doSearch(): ResultSet {
134        $this->searchAfter->rewind();
135        return $this->searchAfter->current();
136    }
137
138    public function run(): ?array {
139        $searchResults = $this->doSearch();
140        if ( count( $searchResults ) === 0 ) {
141            $this->logger->error( 'No more articles with suggestions found' );
142            return null;
143        }
144
145        foreach ( $searchResults as $searchResult ) {
146            $pageId = (int)$searchResult->getId();
147            $this->jobParams['lastPageId'] = $pageId;
148            $title = $this->titleFactory->newFromId( $pageId );
149            if ( !$title ) {
150                $this->logger->debug( 'No title found for ' . $pageId );
151                continue;
152            }
153
154            $this->jobParams['numPages']++;
155            $user = $this->getUserForTitle( $title );
156            if ( !$user ) {
157                $this->logger->debug( 'No user found for ' . $title->getDBkey() );
158                continue;
159            }
160
161            $suggestions = $this->getSuggestions( $pageId );
162            if ( !$suggestions ) {
163                $this->logger->debug( 'No suggestions found for ' . $pageId );
164                continue;
165            }
166
167            $this->jobParams['notifiedUserIds'][$user->getId()] =
168                ( $this->jobParams['notifiedUserIds'][$user->getId()] ?? 0 ) + 1;
169
170            // If we have a bundle of notifications the newest ones are displayed first.
171            // Reverse the order of the array so that the elements earlier in the array are
172            // created later (and therefore are newer and get displayed earlier)
173            foreach ( array_reverse( $suggestions ) as $suggestion ) {
174                $this->notificationHelper->createNotification(
175                    $user,
176                    $title,
177                    $this->getMediaUrl( $suggestion ),
178                    $this->getSectionHeading( $suggestion ),
179                    $this->jobParams['verbose'] ? $this->logger : null,
180                    $this->jobParams['dryRun'],
181                );
182            }
183        }
184
185        $numUsers = count( $this->jobParams['notifiedUserIds'] );
186        $numNotifications = array_sum( $this->jobParams['notifiedUserIds'] );
187        $numMissing = $this->jobParams['numPages'] - $numNotifications;
188        $this->logger->info(
189            "Finished job. " .
190            "In total have notified {$numUsers} users about {$numNotifications} pages. " .
191            "Notifications not sent for {$numMissing} pages as they had no available users " .
192            "or the suggestions were excluded or didn't meet the confidence threshold."
193        );
194
195        return $this->jobParams;
196    }
197
198    private function getSectionHeading( array $suggestion ): ?string {
199        return $suggestion['section_heading'];
200    }
201
202    private function isArticleLevelSuggestion( array $suggestion ): bool {
203        return $this->getSectionHeading( $suggestion ) === null;
204    }
205
206    private function isSectionLevelSuggestion( array $suggestion ): bool {
207        return !$this->isArticleLevelSuggestion( $suggestion );
208    }
209
210    private function getMediaUrl( array $suggestion ): string {
211        return $this->wikiMapHelper->getForeignURL(
212            $suggestion['origin_wiki'],
213            $this->namespaceInfo->getCanonicalName( NS_FILE ) . ':' .
214            $suggestion['image']
215        );
216    }
217
218    /**
219     * @see https://www.mediawiki.org/wiki/Platform_Engineering_Team/Data_Value_Stream/Data_Gateway#Suggestions
220     * @see https://www.mediawiki.org/wiki/Platform_Engineering_Team/Data_Value_Stream/Data_Gateway#Instanceof_(cache)
221     * @param int $pageId
222     * @return array of filtered suggestions
223     *     - the first element is the first article-level suggestion (sorted by confidence), if one exists
224     *     - followed by up to MAX_SECTION_SUGGESTIONS_PER_NOTIFICATION section-level suggestions
225     *         - initially ordered by confidence, so we return the suggestions with the highest confidence
226     *         - then re-ordered so section-suggestions are in the same order as the sections on the page
227     *     - each value being a row structured as per 1st @see
228     */
229    private function getSuggestions( int $pageId ): array {
230        $currentWikiId = WikiMap::getCurrentWikiId();
231        $requests = [ [ 'method' => 'GET', 'url' => sprintf( $this->suggestionsUri, $currentWikiId, $pageId ) ] ];
232        if ( $this->jobParams['excludeInstanceOf'] ) {
233            $requests[] = [ 'method' => 'GET', 'url' => sprintf( $this->instanceOfUri, $currentWikiId, $pageId ) ];
234        }
235
236        $responses = $this->multiHttpClient->runMulti( $requests );
237        $results = array_map(
238            static function ( $response ) {
239                return json_decode( $response['response']['body'], true ) ?: [];
240            },
241            $responses
242        );
243
244        // if the page is an instance of an entity we wish to exclude, then filter out *article*
245        // level suggestions only
246        $filterArticleSuggestions = false;
247        if ( array_intersect( $this->jobParams['excludeInstanceOf'], $results[1]['rows'][0]['instance_of'] ?? [] ) ) {
248            // page is an instance of an entity that we wish to exclude; return empty resultset
249            $filterArticleSuggestions = true;
250        }
251
252        $results = array_filter(
253            $results[0]['rows'] ?? [],
254            function ( array $row ) use ( $filterArticleSuggestions ) {
255                if ( $filterArticleSuggestions && $this->isArticleLevelSuggestion( $row ) ) {
256                    return false;
257                }
258                return $this->isArticleLevelSuggestion( $row ) ?
259                    $row['confidence'] >= $this->jobParams['minConfidence'] :
260                    $row['confidence'] >= $this->jobParams['minConfidenceSection'];
261            }
262        );
263
264        usort(
265            $results,
266            static function ( array $a, array $b ) {
267                return $b['confidence'] <=> $a['confidence'];
268            }
269        );
270
271        // only 1 suggestion per section
272        $results = array_values( array_reduce(
273            $results,
274            static function ( array $carry, array $row ) {
275                if ( !isset( $carry[$row['section_heading']] ) ) {
276                    $carry[(string)$row['section_heading']] = $row;
277                }
278                return $carry;
279            },
280            []
281        ) );
282
283        $articleSuggestion = array_slice(
284            array_filter( $results, [ $this, 'isArticleLevelSuggestion' ] ), 0, 1
285        );
286        $sectionSuggestions = array_slice(
287            array_filter( $results, [ $this, 'isSectionLevelSuggestion' ] ),
288            0,
289            self::MAX_SECTION_SUGGESTIONS_PER_NOTIFICATION
290        );
291        usort(
292            $sectionSuggestions,
293            static function ( array $a, array $b ) {
294                return (int)$a['section_index'] <=> (int)$b['section_index'];
295            }
296        );
297        return array_merge( $articleSuggestion, $sectionSuggestions );
298    }
299
300    /**
301     * @param Title $title
302     * @return UserIdentity|null
303     */
304    private function getUserForTitle( Title $title ): ?UserIdentity {
305        // list of users who have already received an image suggestion notification for this page
306        $previouslyNotifiedUserIds = $this->dbrEcho->newSelectQueryBuilder()
307            ->select( 'notification_user' )
308            ->distinct()
309            ->from( 'echo_notification' )
310            ->join( 'echo_event', null, 'notification_event = event_id' )
311            ->where( [
312                'event_type' => Hooks::EVENT_NAME,
313                'event_page_id' => $title->getId()
314            ] )
315            ->caller( __METHOD__ )
316            ->fetchFieldValues();
317
318        // list of users who've already been notified a certain amount of times in this run
319        $maxNotifiedUserIds = array_keys(
320            array_filter(
321                $this->jobParams['notifiedUserIds'],
322                function ( $amount ) {
323                    return $amount >= $this->jobParams['maxNotificationsPerUser'];
324                }
325            )
326        );
327
328        // list of users who have opted out of receiving any kind of image suggestions notification
329        $optedOutUserIds = array_keys(
330            array_filter(
331                $this->jobParams['optedInUserIds'],
332                static function ( $value ) {
333                    return $value !== true;
334                }
335            )
336        );
337
338        $excludeUserIds = array_merge( $previouslyNotifiedUserIds, $maxNotifiedUserIds, $optedOutUserIds );
339
340        $userIds = $this->dbr->newSelectQueryBuilder()
341            ->select( 'wl_user' )
342            ->distinct()
343            ->from( 'watchlist' )
344            ->join( 'user', null, 'user_id = wl_user' )
345            ->join( 'actor', null, 'actor_user = wl_user' )
346            ->join( 'page', null, [ 'page_namespace = wl_namespace', 'page_title = wl_title' ] )
347            ->leftJoin( 'revision', null, [ 'rev_page = page_id', 'rev_actor' => 'actor_id' ] )
348            ->where( $excludeUserIds ? [ $this->dbr->expr( 'wl_user', '!=', $excludeUserIds ) ] : [] )
349            ->andWhere( [
350                'wl_namespace' => $title->getNamespace(),
351                'wl_title' => $title->getDBkey(),
352                $this->dbr->expr( 'user_editcount', '>=', (int)$this->jobParams['minEditCount'] ),
353            ] )
354            ->orderBy( 'rev_timestamp', SelectQueryBuilder::SORT_DESC )
355            ->limit( 1000 )
356            ->caller( __METHOD__ )
357            ->fetchFieldValues();
358
359        // iterate users to figure out whether they've opted in to any type of notifications
360        // for this event, and store the known results in $this->jobParams['optedInUserIds'] so we can
361        // easily exclude these for the next result right away.
362        // we can't do this in the query because not all these options are available in the
363        // same database: GlobalPreferences may live elsewhere
364        foreach ( $userIds as $userId ) {
365            $user = $this->userFactory->newFromId( $userId );
366
367            // check whether user is already known to have opted in
368            if ( $this->jobParams['optedInUserIds'][$userId] ?? false ) {
369                return $user;
370            }
371
372            foreach ( [ 'web', 'email', 'push' ] as $type ) {
373                $optionName = "echo-subscriptions-$type-" . Hooks::EVENT_NAME;
374                if ( $this->userOptionsLookup->getOption( $user, $optionName ) ) {
375                    $this->jobParams['optedInUserIds'][$userId] = true;
376                    return $user;
377                }
378            }
379            $this->jobParams['optedInUserIds'][$userId] = false;
380        }
381
382        return null;
383    }
384}