Code Coverage
 
Lines
Functions and Methods
Classes and Traits
Total
10.67% covered (danger)
10.67%
24 / 225
33.33% covered (danger)
33.33%
1 / 3
CRAP
0.00% covered (danger)
0.00%
0 / 1
ChoiceDataProvider
10.67% covered (danger)
10.67%
24 / 225
33.33% covered (danger)
33.33%
1 / 3
223.03
0.00% covered (danger)
0.00%
0 / 1
 invalidateCache
0.00% covered (danger)
0.00%
0 / 3
0.00% covered (danger)
0.00%
0 / 1
2
 getChoices
100.00% covered (success)
100.00%
24 / 24
100.00% covered (success)
100.00%
1 / 1
2
 fetchChoices
0.00% covered (danger)
0.00%
0 / 198
0.00% covered (danger)
0.00%
0 / 1
210
1<?php
2
3use MediaWiki\MediaWikiServices;
4use Wikimedia\Rdbms\Database;
5use Wikimedia\Rdbms\IDatabase;
6
7/**
8 * Provides a set of campaign and banner choices based on allocations for a
9 * given project and language combination.
10 */
11class ChoiceDataProvider {
12
13    /** @var string Common prefix for choice data cache keys */
14    private const CACHE_KEY_NAMESPACE = 'CentralNoticeChoiceData';
15
16    /** @var int Time-to-live for choice data cache entries, in seconds */
17    private const CACHE_TTL = 3600;
18
19    /**
20     * Invalidate the shared global cache.
21     * @return bool
22     */
23    public static function invalidateCache() {
24        $cache = MediaWikiServices::getInstance()->getMainWANObjectCache();
25        return $cache->touchCheckKey(
26            $cache->makeGlobalKey( self::CACHE_KEY_NAMESPACE, 'check' ) );
27    }
28
29    /**
30     * Get a data structure with the allocation choices.
31     *
32     * @param string $project The project to get choices for
33     * @param string $language The language to get choices for
34     * @return array A structure of arrays. The outer array contains associative
35     *   arrays that represent campaigns. One campaign property is 'banners',
36     *   which has as its value an array of associative arrays that represent
37     *   banners. Note that only some properties of campaigns and banners
38     *   are provided.
39     */
40    public static function getChoices( $project, $language ) {
41        $cache = MediaWikiServices::getInstance()->getMainWANObjectCache();
42
43        $dataKey = $cache->makeGlobalKey( self::CACHE_KEY_NAMESPACE, $project, $language );
44        $checkKey = $cache->makeGlobalKey( self::CACHE_KEY_NAMESPACE, 'check' );
45
46        $choices = $cache->getWithSetCallback(
47            $dataKey,
48            self::CACHE_TTL,
49            function ( $oldValue, &$ttl, array &$setOpts )
50                use ( $project, $language ) {
51                $dbr = CNDatabase::getDb( DB_REPLICA );
52
53                // Account for replica lag to prevent a race condition when
54                // campaigns are updated, the cache is invalidated, and
55                // a client queries a yet-unsynced replica DB.
56                $setOpts += Database::getCacheSetOptions( $dbr );
57
58                return self::fetchChoices( $project, $language, $dbr );
59            },
60            [
61                // We don't bother with the lockTSE option because the only
62                // potentially high-volume requests that would ask for this
63                // object are heavily cached by Varnish, for all users. (Those
64                // requests are for load.php.)
65                'checkKeys' => [ $checkKey ],
66                'pcTTL' => $cache::TTL_PROC_LONG,
67            ]
68        );
69
70        // Filter out campaigns that have ended since we last queried the
71        // database or which have not started yet. array_values re-orders numeric
72        // keys, in case elements were removed.
73        $now = time();
74        return array_values( array_filter(
75            $choices,
76            static function ( $choice ) use ( $now ) {
77                return $choice['end'] >= $now && $choice['start'] <= $now;
78            }
79        ) );
80    }
81
82    private static function fetchChoices( $project, $language,
83        IDatabase $dbr
84    ) {
85        // For speed, we'll do our own queries instead of using methods in
86        // Campaign and Banner.
87
88        // Set up conditions
89        // Choice data will be cached for up to an hour, so we want to include
90        // campaigns that will start during that interval.
91        $start = $dbr->timestamp( time() + self::CACHE_TTL );
92        $end = $dbr->timestamp();
93        $conds = [
94            'notices.not_start <= ' . $dbr->addQuotes( $start ),
95            'notices.not_end >= ' . $dbr->addQuotes( $end ),
96            'notices.not_enabled' => 1,
97            'notices.not_archived' => 0,
98            'notice_projects.np_project' => $project,
99            'notice_languages.nl_language' => $language
100        ];
101
102        // Query campaigns and banners at once
103        $dbRows = $dbr->select(
104            [
105                'notices' => 'cn_notices',
106                'assignments' => 'cn_assignments',
107                'templates' => 'cn_templates',
108                'notice_projects' => 'cn_notice_projects',
109                'notice_languages' => 'cn_notice_languages',
110            ],
111            [
112                'notices.not_id',
113                'notices.not_name',
114                'notices.not_start',
115                'notices.not_end',
116                'notices.not_preferred',
117                'notices.not_throttle',
118                'notices.not_geo',
119                'notices.not_buckets',
120                'notices.not_type',
121                'assignments.tmp_weight',
122                'assignments.asn_bucket',
123                'templates.tmp_id',
124                'templates.tmp_name',
125                'templates.tmp_display_anon',
126                'templates.tmp_display_account',
127                'templates.tmp_category'
128            ],
129            $conds,
130            __METHOD__,
131            [],
132            [
133                'assignments' => [
134                    'INNER JOIN', 'notices.not_id = assignments.not_id'
135                ],
136                'templates' => [
137                    'INNER JOIN', 'assignments.tmp_id = templates.tmp_id'
138                ],
139                'notice_projects' => [
140                    'INNER JOIN', 'notices.not_id = notice_projects.np_notice_id'
141                ],
142                'notice_languages' => [
143                    'INNER JOIN', 'notices.not_id = notice_languages.nl_notice_id'
144                ]
145            ]
146        );
147
148        // Pare it down into a nicer data structure and prepare the next queries.
149        // We'll create a structure with keys that are useful for piecing the
150        // data together. But before returning it, we'll change associative
151        // arrays to indexed ones at levels where the keys are not needed by the
152        // client.
153        $choices = [];
154        $bannerIds = [];
155        $assignmentKeysByBannerIdAndCampaignId = [];
156
157        foreach ( $dbRows as $dbRow ) {
158            $campaignId = $dbRow->not_id;
159            $campaignName = $dbRow->not_name;
160            $bannerId = $dbRow->tmp_id;
161            $bannerName = $dbRow->tmp_name;
162            $bucket = $dbRow->asn_bucket;
163
164            // FIXME Temporary hack to substitute the magic words {{{campaign}}}
165            // and {{{banner}}} in banner categories. (These are the magic
166            // words mentioned in the CN Admin UI.)
167            $category = $dbRow->tmp_category;
168            $category = str_replace( '{{{campaign}}}', $campaignName, $category );
169            $category = str_replace( '{{{banner}}}', $bannerName, $category );
170            $category = Banner::sanitizeRenderedCategory( $category );
171
172            // The first time we see any campaign, create the corresponding
173            // outer K/V entry. The campaign-specific properties should be
174            // repeated on every row for any campaign. Note that these
175            // keys don't make it into data structure we return.
176            if ( !isset( $choices[$campaignId] ) ) {
177                $choices[$campaignId] = [
178                    'name' => $campaignName,
179                    'start' => intval( wfTimestamp( TS_UNIX, $dbRow->not_start ) ),
180                    'end' => intval( wfTimestamp( TS_UNIX, $dbRow->not_end ) ),
181                    'preferred' => intval( $dbRow->not_preferred ),
182                    'throttle' => intval( $dbRow->not_throttle ),
183                    'bucket_count' => intval( $dbRow->not_buckets ),
184                    'geotargeted' => (bool)$dbRow->not_geo,
185                    'type' => $dbRow->not_type,
186                    'banners' => []
187                ];
188            }
189
190            // A temporary assignment key so we can get back to this part of the
191            // data structure quickly and add in devices.
192            $assignmentKey = $bannerId . ':' . $bucket;
193
194            $choices[$campaignId]['banners'][$assignmentKey] = [
195                'name' => $bannerName,
196                'bucket' => intval( $bucket ),
197                'weight' => intval( $dbRow->tmp_weight ),
198                'category' => $category,
199                'display_anon' => (bool)$dbRow->tmp_display_anon,
200                'display_account' => (bool)$dbRow->tmp_display_account,
201                'devices' => [] // To be filled by the last query
202            ];
203
204            $bannerIds[] = $bannerId;
205
206            // Add to the index so we can get back here.
207            // Note that PHP creates arrays here as needed.
208            $assignmentKeysByBannerIdAndCampaignId[$bannerId][$campaignId][] =
209                $assignmentKey;
210        }
211
212        // If there's nothing, return the empty array now
213        if ( count( $choices ) === 0 ) {
214            return $choices;
215        }
216
217        // Fetch countries.
218        // We have to eliminate notices that are not geotargeted, since they
219        // may have residual data in the cn_notice_countries table.
220        $dbRows = $dbr->select(
221            [
222                'notices' => 'cn_notices',
223                'notice_countries' => 'cn_notice_countries',
224            ],
225            [
226                'notices.not_id',
227                'notice_countries.nc_country'
228            ],
229            [
230                'notices.not_geo' => 1,
231                'notices.not_id' => array_keys( $choices )
232            ],
233            __METHOD__,
234            [],
235            [
236                'notice_countries' => [
237                    'INNER JOIN', 'notices.not_id = notice_countries.nc_notice_id'
238                ]
239            ]
240        );
241
242        // Add countries to our data structure.
243        // Note that PHP creates an empty array for countries as needed.
244        foreach ( $dbRows as $dbRow ) {
245            $choices[$dbRow->not_id]['countries'][] = $dbRow->nc_country;
246        }
247
248        // Fetch regions.
249        // We have to eliminate notices that are not geotargeted, since they
250        // may have residual data in the cn_notice_regions table.
251        $dbRows = $dbr->select(
252            [
253                'notices' => 'cn_notices',
254                'notice_regions' => 'cn_notice_regions',
255            ],
256            [
257                'notices.not_id',
258                'notice_regions.nr_region'
259            ],
260            [
261                'notices.not_geo' => 1,
262                'notices.not_id' => array_keys( $choices )
263            ],
264            __METHOD__,
265            [],
266            [
267                'notice_regions' => [
268                    'INNER JOIN', 'notices.not_id = notice_regions.nr_notice_id'
269                ]
270            ]
271        );
272
273        // Add regions to our data structure.
274        // Note that PHP creates an empty array for regions as needed.
275        foreach ( $dbRows as $dbRow ) {
276            $choices[$dbRow->not_id]['regions'][] = $dbRow->nr_region;
277        }
278
279        // FIXME: looks like this is only sorting the last banner's list!
280        // @phan-suppress-next-line PhanPossiblyUndeclaredVariable
281        if ( isset( $choices[$dbRow->not_id]['countries'] ) ) {
282            // @phan-suppress-next-line PhanPossiblyUndeclaredVariable
283            sort( $choices[$dbRow->not_id]['countries'] );
284        }
285
286        // Add campaign-associated mixins to the data structure
287        foreach ( $choices as &$campaignInfo ) {
288            // Get info for enabled mixins for this campaign
289            $campaignInfo['mixins'] =
290                Campaign::getCampaignMixins( $campaignInfo['name'], true );
291        }
292
293        // Fetch the devices
294        $dbRows = $dbr->select(
295            [
296                'template_devices' => 'cn_template_devices',
297                'known_devices' => 'cn_known_devices',
298            ],
299            [
300                'template_devices.tmp_id',
301                'known_devices.dev_name'
302            ],
303            [
304                'template_devices.tmp_id' => $bannerIds
305            ],
306            __METHOD__,
307            [],
308            [
309                'known_devices' => [
310                    'INNER JOIN', 'template_devices.dev_id = known_devices.dev_id'
311                ]
312            ]
313        );
314
315        // Add devices to the data structure.
316        foreach ( $dbRows as $dbRow ) {
317            $bannerId = $dbRow->tmp_id;
318
319            // Traverse the data structure to add in devices
320
321            $assignmentKeysByCampaignId =
322                $assignmentKeysByBannerIdAndCampaignId[$bannerId];
323
324            foreach ( $assignmentKeysByCampaignId
325                as $campaignId => $assignmentKeys
326            ) {
327                foreach ( $assignmentKeys as $assignmentKey ) {
328                    $choices[$campaignId]['banners'][$assignmentKey]['devices'][] =
329                        $dbRow->dev_name;
330                }
331
332                // Ensure consistent ordering (see comment below)
333                // FIXME: only sorting list for last assignmentKey?
334                // @phan-suppress-next-line PhanTypeArraySuspiciousNullable
335                sort( $choices[$campaignId]['banners'][$assignmentKey]['devices'] );
336            }
337        }
338
339        // Make arrays that are associative into plain indexed ones, since the
340        // keys aren't used by the clients.
341        // Also make very sure we don't have duplicate devices or countries.
342        // Finally, ensure consistent ordering, since it's needed for
343        // CNChoiceDataResourceLoaderModule for consistent RL module hashes.
344
345        $choices = array_values( $choices );
346
347        $uniqueDevFn = static function ( $b ) {
348            $b['devices'] = array_unique( $b['devices'] );
349            return $b;
350        };
351
352        $compareNames = static function ( $a, $b ) {
353            if ( $a['name'] == $b['name'] ) {
354                return 0;
355            }
356            return ( $a['name'] < $b['name'] ) ? -1 : 1;
357        };
358
359        $fixCampaignPropsFn = static function ( $c ) use ( $uniqueDevFn, $compareNames ) {
360            $c['banners'] = array_map( $uniqueDevFn, array_values( $c['banners'] ) );
361            usort( $c['banners'], $compareNames );
362
363            if ( $c['geotargeted'] ) {
364                $c['countries'] = array_unique( $c['countries'] ?? [] );
365                sort( $c['countries'] );
366                $c['regions'] = array_unique( $c['regions'] ?? [] );
367                sort( $c['regions'] );
368            }
369
370            return $c;
371        };
372
373        $choices = array_map( $fixCampaignPropsFn, $choices );
374        usort( $choices, $compareNames );
375
376        return $choices;
377    }
378}