Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
98.98% |
194 / 196 |
|
88.89% |
8 / 9 |
CRAP | |
0.00% |
0 / 1 |
Notifier | |
98.98% |
194 / 196 |
|
88.89% |
8 / 9 |
30 | |
0.00% |
0 / 1 |
__construct | n/a |
0 / 0 |
n/a |
0 / 0 |
1 | |||||
createSearchAfter | |
100.00% |
34 / 34 |
|
100.00% |
1 / 1 |
2 | |||
doSearch | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
run | |
95.12% |
39 / 41 |
|
0.00% |
0 / 1 |
8 | |||
getSectionHeading | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
isArticleLevelSuggestion | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
isSectionLevelSuggestion | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
getMediaUrl | |
100.00% |
5 / 5 |
|
100.00% |
1 / 1 |
1 | |||
getSuggestions | |
100.00% |
54 / 54 |
|
100.00% |
1 / 1 |
8 | |||
getUserForTitle | |
100.00% |
57 / 57 |
|
100.00% |
1 / 1 |
6 |
1 | <?php |
2 | |
3 | namespace MediaWiki\Extension\ImageSuggestions; |
4 | |
5 | use CirrusSearch\Connection; |
6 | use CirrusSearch\Elastica\SearchAfter; |
7 | use CirrusSearch\SearchConfig; |
8 | use CirrusSearch\Wikimedia\WeightedTagsHooks; |
9 | use Elastica\Query; |
10 | use Elastica\Query\BoolQuery; |
11 | use Elastica\Query\MatchQuery; |
12 | use Elastica\ResultSet; |
13 | use Elastica\Search; |
14 | use MediaWiki\Config\Config; |
15 | use MediaWiki\Title\NamespaceInfo; |
16 | use MediaWiki\Title\Title; |
17 | use MediaWiki\Title\TitleFactory; |
18 | use MediaWiki\User\Options\UserOptionsLookup; |
19 | use MediaWiki\User\UserFactory; |
20 | use MediaWiki\User\UserIdentity; |
21 | use MediaWiki\WikiMap\WikiMap; |
22 | use Psr\Log\LoggerInterface; |
23 | use Wikimedia\Http\MultiHttpClient; |
24 | use Wikimedia\Rdbms\IReadableDatabase; |
25 | use Wikimedia\Rdbms\SelectQueryBuilder; |
26 | |
27 | class Notifier { |
28 | private MultiHttpClient $multiHttpClient; |
29 | private UserFactory $userFactory; |
30 | private UserOptionsLookup $userOptionsLookup; |
31 | private NamespaceInfo $namespaceInfo; |
32 | private string $suggestionsUri; |
33 | private string $instanceOfUri; |
34 | private LoggerInterface $logger; |
35 | private IReadableDatabase $dbr; |
36 | private IReadableDatabase $dbrEcho; |
37 | private Config $searchConfig; |
38 | private Connection $searchConnection; |
39 | private TitleFactory $titleFactory; |
40 | private NotificationHelper $notificationHelper; |
41 | private WikiMapHelper $wikiMapHelper; |
42 | private array $jobParams; |
43 | |
44 | private SearchAfter $searchAfter; |
45 | |
46 | public const MAX_SECTION_SUGGESTIONS_PER_NOTIFICATION = 5; |
47 | |
48 | /** |
49 | * @codeCoverageIgnore |
50 | */ |
51 | public function __construct( |
52 | string $suggestionsUri, |
53 | string $instanceOfUri, |
54 | MultiHttpClient $multiHttpClient, |
55 | UserFactory $userFactory, |
56 | UserOptionsLookup $userOptionsLookup, |
57 | NamespaceInfo $namespaceInfo, |
58 | IReadableDatabase $mainDbConnection, |
59 | IReadableDatabase $echoDbConnection, |
60 | LoggerInterface $logger, |
61 | Config $searchConfig, |
62 | Connection $searchConnection, |
63 | TitleFactory $titleFactory, |
64 | NotificationHelper $notificationHelper, |
65 | WikiMapHelper $wikiMapHelper, |
66 | array $jobParams |
67 | ) { |
68 | $this->suggestionsUri = $suggestionsUri; |
69 | $this->instanceOfUri = $instanceOfUri; |
70 | |
71 | $this->multiHttpClient = $multiHttpClient; |
72 | $this->userFactory = $userFactory; |
73 | $this->userOptionsLookup = $userOptionsLookup; |
74 | $this->namespaceInfo = $namespaceInfo; |
75 | |
76 | $this->dbr = $mainDbConnection; |
77 | $this->dbrEcho = $echoDbConnection; |
78 | $this->logger = $logger; |
79 | |
80 | $this->searchConfig = $searchConfig; |
81 | $this->searchConnection = $searchConnection; |
82 | $this->titleFactory = $titleFactory; |
83 | $this->notificationHelper = $notificationHelper; |
84 | $this->wikiMapHelper = $wikiMapHelper; |
85 | |
86 | $this->jobParams = [ 'numPages' => 0 ] + $jobParams; |
87 | |
88 | $this->searchAfter = $this->createSearchAfter(); |
89 | } |
90 | |
91 | private function createSearchAfter(): SearchAfter { |
92 | $searchClient = $this->searchConnection->getClient(); |
93 | $searchIndex = $this->searchConnection->getIndex( |
94 | $this->searchConfig->get( SearchConfig::INDEX_BASE_NAME ), |
95 | $this->searchConnection->pickIndexSuffixForNamespaces( |
96 | $this->searchConfig->get( 'ContentNamespaces' ) |
97 | ) |
98 | ); |
99 | |
100 | $articleImageQuery = new MatchQuery(); |
101 | $articleImageQuery->setFieldQuery( |
102 | WeightedTagsHooks::FIELD_NAME, |
103 | 'recommendation.image/exists' |
104 | ); |
105 | $sectionImageQuery = new MatchQuery(); |
106 | $sectionImageQuery->setFieldQuery( |
107 | WeightedTagsHooks::FIELD_NAME, |
108 | 'recommendation.image_section/exists' |
109 | ); |
110 | $bool = new BoolQuery(); |
111 | $bool->addShould( $articleImageQuery ); |
112 | $bool->addShould( $sectionImageQuery ); |
113 | $bool->setMinimumShouldMatch( 1 ); |
114 | |
115 | $query = new Query(); |
116 | $query->setQuery( $bool ); |
117 | $query->setSize( $this->jobParams['batchSize'] ); |
118 | $query->setSource( false ); |
119 | $query->setSort( [ 'page_id' ] ); |
120 | $query->setStoredFields( [ '_id' ] ); |
121 | |
122 | $search = new Search( $searchClient ); |
123 | $search->setQuery( $query ); |
124 | $search->addIndex( $searchIndex ); |
125 | |
126 | $searchAfter = new SearchAfter( $search ); |
127 | if ( $this->jobParams['lastPageId'] > 0 ) { |
128 | $searchAfter->initializeSearchAfter( [ $this->jobParams['lastPageId'] ] ); |
129 | } |
130 | return $searchAfter; |
131 | } |
132 | |
133 | private function doSearch(): ResultSet { |
134 | $this->searchAfter->rewind(); |
135 | return $this->searchAfter->current(); |
136 | } |
137 | |
138 | public function run(): ?array { |
139 | $searchResults = $this->doSearch(); |
140 | if ( count( $searchResults ) === 0 ) { |
141 | $this->logger->error( 'No more articles with suggestions found' ); |
142 | return null; |
143 | } |
144 | |
145 | foreach ( $searchResults as $searchResult ) { |
146 | $pageId = (int)$searchResult->getId(); |
147 | $this->jobParams['lastPageId'] = $pageId; |
148 | $title = $this->titleFactory->newFromId( $pageId ); |
149 | if ( !$title ) { |
150 | $this->logger->debug( 'No title found for ' . $pageId ); |
151 | continue; |
152 | } |
153 | |
154 | $this->jobParams['numPages']++; |
155 | $user = $this->getUserForTitle( $title ); |
156 | if ( !$user ) { |
157 | $this->logger->debug( 'No user found for ' . $title->getDBkey() ); |
158 | continue; |
159 | } |
160 | |
161 | $suggestions = $this->getSuggestions( $pageId ); |
162 | if ( !$suggestions ) { |
163 | $this->logger->debug( 'No suggestions found for ' . $pageId ); |
164 | continue; |
165 | } |
166 | |
167 | $this->jobParams['notifiedUserIds'][$user->getId()] = |
168 | ( $this->jobParams['notifiedUserIds'][$user->getId()] ?? 0 ) + 1; |
169 | |
170 | // If we have a bundle of notifications the newest ones are displayed first. |
171 | // Reverse the order of the array so that the elements earlier in the array are |
172 | // created later (and therefore are newer and get displayed earlier) |
173 | foreach ( array_reverse( $suggestions ) as $suggestion ) { |
174 | $this->notificationHelper->createNotification( |
175 | $user, |
176 | $title, |
177 | $this->getMediaUrl( $suggestion ), |
178 | $this->getSectionHeading( $suggestion ), |
179 | $this->jobParams['verbose'] ? $this->logger : null, |
180 | $this->jobParams['dryRun'], |
181 | ); |
182 | } |
183 | } |
184 | |
185 | $numUsers = count( $this->jobParams['notifiedUserIds'] ); |
186 | $numNotifications = array_sum( $this->jobParams['notifiedUserIds'] ); |
187 | $numMissing = $this->jobParams['numPages'] - $numNotifications; |
188 | $this->logger->info( |
189 | "Finished job. " . |
190 | "In total have notified {$numUsers} users about {$numNotifications} pages. " . |
191 | "Notifications not sent for {$numMissing} pages as they had no available users " . |
192 | "or the suggestions were excluded or didn't meet the confidence threshold." |
193 | ); |
194 | |
195 | return $this->jobParams; |
196 | } |
197 | |
198 | private function getSectionHeading( array $suggestion ): ?string { |
199 | return $suggestion['section_heading']; |
200 | } |
201 | |
202 | private function isArticleLevelSuggestion( array $suggestion ): bool { |
203 | return $this->getSectionHeading( $suggestion ) === null; |
204 | } |
205 | |
206 | private function isSectionLevelSuggestion( array $suggestion ): bool { |
207 | return !$this->isArticleLevelSuggestion( $suggestion ); |
208 | } |
209 | |
210 | private function getMediaUrl( array $suggestion ): string { |
211 | return $this->wikiMapHelper->getForeignURL( |
212 | $suggestion['origin_wiki'], |
213 | $this->namespaceInfo->getCanonicalName( NS_FILE ) . ':' . |
214 | $suggestion['image'] |
215 | ); |
216 | } |
217 | |
218 | /** |
219 | * @see https://www.mediawiki.org/wiki/Platform_Engineering_Team/Data_Value_Stream/Data_Gateway#Suggestions |
220 | * @see https://www.mediawiki.org/wiki/Platform_Engineering_Team/Data_Value_Stream/Data_Gateway#Instanceof_(cache) |
221 | * @param int $pageId |
222 | * @return array of filtered suggestions |
223 | * - the first element is the first article-level suggestion (sorted by confidence), if one exists |
224 | * - followed by up to MAX_SECTION_SUGGESTIONS_PER_NOTIFICATION section-level suggestions |
225 | * - initially ordered by confidence, so we return the suggestions with the highest confidence |
226 | * - then re-ordered so section-suggestions are in the same order as the sections on the page |
227 | * - each value being a row structured as per 1st @see |
228 | */ |
229 | private function getSuggestions( int $pageId ): array { |
230 | $currentWikiId = WikiMap::getCurrentWikiId(); |
231 | $requests = [ [ 'method' => 'GET', 'url' => sprintf( $this->suggestionsUri, $currentWikiId, $pageId ) ] ]; |
232 | if ( $this->jobParams['excludeInstanceOf'] ) { |
233 | $requests[] = [ 'method' => 'GET', 'url' => sprintf( $this->instanceOfUri, $currentWikiId, $pageId ) ]; |
234 | } |
235 | |
236 | $responses = $this->multiHttpClient->runMulti( $requests ); |
237 | $results = array_map( |
238 | static function ( $response ) { |
239 | return json_decode( $response['response']['body'], true ) ?: []; |
240 | }, |
241 | $responses |
242 | ); |
243 | |
244 | // if the page is an instance of an entity we wish to exclude, then filter out *article* |
245 | // level suggestions only |
246 | $filterArticleSuggestions = false; |
247 | if ( array_intersect( $this->jobParams['excludeInstanceOf'], $results[1]['rows'][0]['instance_of'] ?? [] ) ) { |
248 | // page is an instance of an entity that we wish to exclude; return empty resultset |
249 | $filterArticleSuggestions = true; |
250 | } |
251 | |
252 | $results = array_filter( |
253 | $results[0]['rows'] ?? [], |
254 | function ( array $row ) use ( $filterArticleSuggestions ) { |
255 | if ( $filterArticleSuggestions && $this->isArticleLevelSuggestion( $row ) ) { |
256 | return false; |
257 | } |
258 | return $this->isArticleLevelSuggestion( $row ) ? |
259 | $row['confidence'] >= $this->jobParams['minConfidence'] : |
260 | $row['confidence'] >= $this->jobParams['minConfidenceSection']; |
261 | } |
262 | ); |
263 | |
264 | usort( |
265 | $results, |
266 | static function ( array $a, array $b ) { |
267 | return $b['confidence'] <=> $a['confidence']; |
268 | } |
269 | ); |
270 | |
271 | // only 1 suggestion per section |
272 | $results = array_values( array_reduce( |
273 | $results, |
274 | static function ( array $carry, array $row ) { |
275 | if ( !isset( $carry[$row['section_heading']] ) ) { |
276 | $carry[(string)$row['section_heading']] = $row; |
277 | } |
278 | return $carry; |
279 | }, |
280 | [] |
281 | ) ); |
282 | |
283 | $articleSuggestion = array_slice( |
284 | array_filter( $results, [ $this, 'isArticleLevelSuggestion' ] ), 0, 1 |
285 | ); |
286 | $sectionSuggestions = array_slice( |
287 | array_filter( $results, [ $this, 'isSectionLevelSuggestion' ] ), |
288 | 0, |
289 | self::MAX_SECTION_SUGGESTIONS_PER_NOTIFICATION |
290 | ); |
291 | usort( |
292 | $sectionSuggestions, |
293 | static function ( array $a, array $b ) { |
294 | return (int)$a['section_index'] <=> (int)$b['section_index']; |
295 | } |
296 | ); |
297 | return array_merge( $articleSuggestion, $sectionSuggestions ); |
298 | } |
299 | |
300 | /** |
301 | * @param Title $title |
302 | * @return UserIdentity|null |
303 | */ |
304 | private function getUserForTitle( Title $title ): ?UserIdentity { |
305 | // list of users who have already received an image suggestion notification for this page |
306 | $previouslyNotifiedUserIds = $this->dbrEcho->newSelectQueryBuilder() |
307 | ->select( 'notification_user' ) |
308 | ->distinct() |
309 | ->from( 'echo_notification' ) |
310 | ->join( 'echo_event', null, 'notification_event = event_id' ) |
311 | ->where( [ |
312 | 'event_type' => Hooks::EVENT_NAME, |
313 | 'event_page_id' => $title->getId() |
314 | ] ) |
315 | ->caller( __METHOD__ ) |
316 | ->fetchFieldValues(); |
317 | |
318 | // list of users who've already been notified a certain amount of times in this run |
319 | $maxNotifiedUserIds = array_keys( |
320 | array_filter( |
321 | $this->jobParams['notifiedUserIds'], |
322 | function ( $amount ) { |
323 | return $amount >= $this->jobParams['maxNotificationsPerUser']; |
324 | } |
325 | ) |
326 | ); |
327 | |
328 | // list of users who have opted out of receiving any kind of image suggestions notification |
329 | $optedOutUserIds = array_keys( |
330 | array_filter( |
331 | $this->jobParams['optedInUserIds'], |
332 | static function ( $value ) { |
333 | return $value !== true; |
334 | } |
335 | ) |
336 | ); |
337 | |
338 | $excludeUserIds = array_merge( $previouslyNotifiedUserIds, $maxNotifiedUserIds, $optedOutUserIds ); |
339 | |
340 | $userIds = $this->dbr->newSelectQueryBuilder() |
341 | ->select( 'wl_user' ) |
342 | ->distinct() |
343 | ->from( 'watchlist' ) |
344 | ->join( 'user', null, 'user_id = wl_user' ) |
345 | ->join( 'actor', null, 'actor_user = wl_user' ) |
346 | ->join( 'page', null, [ 'page_namespace = wl_namespace', 'page_title = wl_title' ] ) |
347 | ->leftJoin( 'revision', null, [ 'rev_page = page_id', 'rev_actor' => 'actor_id' ] ) |
348 | ->where( $excludeUserIds ? [ $this->dbr->expr( 'wl_user', '!=', $excludeUserIds ) ] : [] ) |
349 | ->andWhere( [ |
350 | 'wl_namespace' => $title->getNamespace(), |
351 | 'wl_title' => $title->getDBkey(), |
352 | $this->dbr->expr( 'user_editcount', '>=', (int)$this->jobParams['minEditCount'] ), |
353 | ] ) |
354 | ->orderBy( 'rev_timestamp', SelectQueryBuilder::SORT_DESC ) |
355 | ->limit( 1000 ) |
356 | ->caller( __METHOD__ ) |
357 | ->fetchFieldValues(); |
358 | |
359 | // iterate users to figure out whether they've opted in to any type of notifications |
360 | // for this event, and store the known results in $this->jobParams['optedInUserIds'] so we can |
361 | // easily exclude these for the next result right away. |
362 | // we can't do this in the query because not all these options are available in the |
363 | // same database: GlobalPreferences may live elsewhere |
364 | foreach ( $userIds as $userId ) { |
365 | $user = $this->userFactory->newFromId( $userId ); |
366 | |
367 | // check whether user is already known to have opted in |
368 | if ( $this->jobParams['optedInUserIds'][$userId] ?? false ) { |
369 | return $user; |
370 | } |
371 | |
372 | foreach ( [ 'web', 'email', 'push' ] as $type ) { |
373 | $optionName = "echo-subscriptions-$type-" . Hooks::EVENT_NAME; |
374 | if ( $this->userOptionsLookup->getOption( $user, $optionName ) ) { |
375 | $this->jobParams['optedInUserIds'][$userId] = true; |
376 | return $user; |
377 | } |
378 | } |
379 | $this->jobParams['optedInUserIds'][$userId] = false; |
380 | } |
381 | |
382 | return null; |
383 | } |
384 | } |