Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
| Total | |
98.91% |
182 / 184 |
|
66.67% |
2 / 3 |
CRAP | |
0.00% |
0 / 1 |
| ChoiceDataProvider | |
98.91% |
182 / 184 |
|
66.67% |
2 / 3 |
18 | |
0.00% |
0 / 1 |
| invalidateCache | |
100.00% |
3 / 3 |
|
100.00% |
1 / 1 |
1 | |||
| getChoices | |
100.00% |
22 / 22 |
|
100.00% |
1 / 1 |
2 | |||
| fetchChoices | |
98.74% |
157 / 159 |
|
0.00% |
0 / 1 |
15 | |||
| 1 | <?php |
| 2 | |
| 3 | use MediaWiki\MediaWikiServices; |
| 4 | use Wikimedia\Rdbms\Database; |
| 5 | use Wikimedia\Rdbms\IReadableDatabase; |
| 6 | |
| 7 | /** |
| 8 | * Provides a set of campaign and banner choices based on allocations for a |
| 9 | * given project and language combination. |
| 10 | */ |
| 11 | class ChoiceDataProvider { |
| 12 | |
| 13 | /** @var string Common prefix for choice data cache keys */ |
| 14 | private const CACHE_KEY_NAMESPACE = 'CentralNoticeChoiceData'; |
| 15 | |
| 16 | /** @var int Time-to-live for choice data cache entries, in seconds */ |
| 17 | private const CACHE_TTL = 3600; |
| 18 | |
| 19 | /** |
| 20 | * Invalidate the shared global cache. |
| 21 | * @return bool |
| 22 | */ |
| 23 | public static function invalidateCache() { |
| 24 | $cache = MediaWikiServices::getInstance()->getMainWANObjectCache(); |
| 25 | return $cache->touchCheckKey( |
| 26 | $cache->makeGlobalKey( self::CACHE_KEY_NAMESPACE, 'check' ) ); |
| 27 | } |
| 28 | |
| 29 | /** |
| 30 | * Get a data structure with the allocation choices. |
| 31 | * |
| 32 | * @param string $project The project to get choices for |
| 33 | * @param string $language The language to get choices for |
| 34 | * @return array A structure of arrays. The outer array contains associative |
| 35 | * arrays that represent campaigns. One campaign property is 'banners', |
| 36 | * which has as its value an array of associative arrays that represent |
| 37 | * banners. Note that only some properties of campaigns and banners |
| 38 | * are provided. |
| 39 | */ |
| 40 | public static function getChoices( $project, $language ) { |
| 41 | $cache = MediaWikiServices::getInstance()->getMainWANObjectCache(); |
| 42 | |
| 43 | $dataKey = $cache->makeGlobalKey( self::CACHE_KEY_NAMESPACE, $project, $language ); |
| 44 | $checkKey = $cache->makeGlobalKey( self::CACHE_KEY_NAMESPACE, 'check' ); |
| 45 | |
| 46 | $choices = $cache->getWithSetCallback( |
| 47 | $dataKey, |
| 48 | self::CACHE_TTL, |
| 49 | function ( $oldValue, &$ttl, array &$setOpts ) |
| 50 | use ( $project, $language ) { |
| 51 | // TODO: Should this instead be a primary query, given the concern about replag? |
| 52 | $dbr = CNDatabase::getReplicaDb(); |
| 53 | |
| 54 | // Account for replica lag to prevent a race condition when |
| 55 | // campaigns are updated, the cache is invalidated, and |
| 56 | // a client queries a yet-unsynced replica DB. |
| 57 | $setOpts += Database::getCacheSetOptions( $dbr ); |
| 58 | |
| 59 | return self::fetchChoices( $project, $language, $dbr ); |
| 60 | }, |
| 61 | [ |
| 62 | // We don't bother with the lockTSE option because the only |
| 63 | // potentially high-volume requests that would ask for this |
| 64 | // object are heavily cached by Varnish, for all users. (Those |
| 65 | // requests are for load.php.) |
| 66 | 'checkKeys' => [ $checkKey ], |
| 67 | 'pcTTL' => $cache::TTL_PROC_LONG, |
| 68 | ] |
| 69 | ); |
| 70 | |
| 71 | // Filter out campaigns that have ended since we last queried the |
| 72 | // database or which have not started yet. array_values re-orders numeric |
| 73 | // keys, in case elements were removed. |
| 74 | $now = time(); |
| 75 | return array_values( array_filter( |
| 76 | $choices, |
| 77 | static fn ( array $choice ) => $choice['end'] >= $now && $choice['start'] <= $now |
| 78 | ) ); |
| 79 | } |
| 80 | |
| 81 | /** |
| 82 | * @param string $project |
| 83 | * @param string $language |
| 84 | * @param IReadableDatabase $dbr |
| 85 | * |
| 86 | * @return array |
| 87 | */ |
| 88 | private static function fetchChoices( $project, $language, |
| 89 | IReadableDatabase $dbr |
| 90 | ) { |
| 91 | // For speed, we'll do our own queries instead of using methods in |
| 92 | // Campaign and Banner. |
| 93 | |
| 94 | // Set up conditions |
| 95 | // Choice data will be cached for up to an hour, so we want to include |
| 96 | // campaigns that will start during that interval. |
| 97 | $start = $dbr->timestamp( time() + self::CACHE_TTL ); |
| 98 | $end = $dbr->timestamp(); |
| 99 | $conds = [ |
| 100 | $dbr->expr( 'notices.not_start', '<=', $start ), |
| 101 | $dbr->expr( 'notices.not_end', '>=', $end ), |
| 102 | 'notices.not_enabled' => 1, |
| 103 | 'notices.not_archived' => 0, |
| 104 | 'notice_projects.np_project' => $project, |
| 105 | 'notice_languages.nl_language' => $language |
| 106 | ]; |
| 107 | |
| 108 | // Query campaigns and banners at once |
| 109 | $dbRows = $dbr->newSelectQueryBuilder() |
| 110 | ->select( [ |
| 111 | 'notices.not_id', |
| 112 | 'notices.not_name', |
| 113 | 'notices.not_start', |
| 114 | 'notices.not_end', |
| 115 | 'notices.not_preferred', |
| 116 | 'notices.not_throttle', |
| 117 | 'notices.not_geo', |
| 118 | 'notices.not_buckets', |
| 119 | 'notices.not_type', |
| 120 | 'assignments.tmp_weight', |
| 121 | 'assignments.asn_bucket', |
| 122 | 'templates.tmp_id', |
| 123 | 'templates.tmp_name', |
| 124 | 'templates.tmp_display_anon', |
| 125 | 'templates.tmp_display_account', |
| 126 | 'templates.tmp_category' |
| 127 | ] ); |
| 128 | // Could be null if database is disabled e.g. during tests |
| 129 | if ( $dbRows === null ) { |
| 130 | return []; |
| 131 | } |
| 132 | $dbRows = $dbRows->from( 'cn_notices', 'notices' ) |
| 133 | ->join( 'cn_assignments', 'assignments', 'notices.not_id = assignments.not_id' ) |
| 134 | ->join( 'cn_templates', 'templates', 'assignments.tmp_id = templates.tmp_id' ) |
| 135 | ->join( 'cn_notice_projects', 'notice_projects', 'notices.not_id = notice_projects.np_notice_id' ) |
| 136 | ->join( 'cn_notice_languages', 'notice_languages', 'notices.not_id = notice_languages.nl_notice_id' ) |
| 137 | ->where( $conds ) |
| 138 | ->caller( __METHOD__ ) |
| 139 | ->fetchResultSet(); |
| 140 | |
| 141 | // Pare it down into a nicer data structure and prepare the next queries. |
| 142 | // We'll create a structure with keys that are useful for piecing the |
| 143 | // data together. But before returning it, we'll change associative |
| 144 | // arrays to indexed ones at levels where the keys are not needed by the |
| 145 | // client. |
| 146 | $choices = []; |
| 147 | $bannerIds = []; |
| 148 | $assignmentKeysByBannerIdAndCampaignId = []; |
| 149 | |
| 150 | foreach ( $dbRows as $dbRow ) { |
| 151 | $campaignId = $dbRow->not_id; |
| 152 | $campaignName = $dbRow->not_name; |
| 153 | $bannerId = $dbRow->tmp_id; |
| 154 | $bannerName = $dbRow->tmp_name; |
| 155 | $bucket = $dbRow->asn_bucket; |
| 156 | |
| 157 | // FIXME Temporary hack to substitute the magic words {{{campaign}}} |
| 158 | // and {{{banner}}} in banner categories. (These are the magic |
| 159 | // words mentioned in the CN Admin UI.) |
| 160 | $category = $dbRow->tmp_category; |
| 161 | $category = str_replace( '{{{campaign}}}', $campaignName, $category ); |
| 162 | $category = str_replace( '{{{banner}}}', $bannerName, $category ); |
| 163 | $category = Banner::sanitizeRenderedCategory( $category ); |
| 164 | |
| 165 | // The first time we see any campaign, create the corresponding |
| 166 | // outer K/V entry. The campaign-specific properties should be |
| 167 | // repeated on every row for any campaign. Note that these |
| 168 | // keys don't make it into data structure we return. |
| 169 | if ( !isset( $choices[$campaignId] ) ) { |
| 170 | $choices[$campaignId] = [ |
| 171 | 'name' => $campaignName, |
| 172 | 'start' => intval( wfTimestamp( TS_UNIX, $dbRow->not_start ) ), |
| 173 | 'end' => intval( wfTimestamp( TS_UNIX, $dbRow->not_end ) ), |
| 174 | 'preferred' => intval( $dbRow->not_preferred ), |
| 175 | 'throttle' => intval( $dbRow->not_throttle ), |
| 176 | 'bucket_count' => intval( $dbRow->not_buckets ), |
| 177 | 'geotargeted' => (bool)$dbRow->not_geo, |
| 178 | 'type' => $dbRow->not_type, |
| 179 | 'banners' => [] |
| 180 | ]; |
| 181 | } |
| 182 | |
| 183 | // A temporary assignment key so we can get back to this part of the |
| 184 | // data structure quickly and add in devices. |
| 185 | $assignmentKey = $bannerId . ':' . $bucket; |
| 186 | |
| 187 | $choices[$campaignId]['banners'][$assignmentKey] = [ |
| 188 | 'name' => $bannerName, |
| 189 | 'bucket' => intval( $bucket ), |
| 190 | 'weight' => intval( $dbRow->tmp_weight ), |
| 191 | 'category' => $category, |
| 192 | 'display_anon' => (bool)$dbRow->tmp_display_anon, |
| 193 | 'display_account' => (bool)$dbRow->tmp_display_account, |
| 194 | // To be filled by the last query |
| 195 | 'devices' => [] |
| 196 | ]; |
| 197 | |
| 198 | $bannerIds[] = $bannerId; |
| 199 | |
| 200 | // Add to the index so we can get back here. |
| 201 | // Note that PHP creates arrays here as needed. |
| 202 | $assignmentKeysByBannerIdAndCampaignId[$bannerId][$campaignId][] = |
| 203 | $assignmentKey; |
| 204 | } |
| 205 | |
| 206 | // If there's nothing, return the empty array now |
| 207 | if ( count( $choices ) === 0 ) { |
| 208 | return $choices; |
| 209 | } |
| 210 | |
| 211 | // Fetch countries. |
| 212 | // We have to eliminate notices that are not geotargeted, since they |
| 213 | // may have residual data in the cn_notice_countries table. |
| 214 | $dbRows = $dbr->newSelectQueryBuilder() |
| 215 | ->select( [ |
| 216 | 'notices.not_id', |
| 217 | 'notice_countries.nc_country' |
| 218 | ] ) |
| 219 | ->from( 'cn_notices', 'notices' ) |
| 220 | ->join( 'cn_notice_countries', 'notice_countries', 'notices.not_id = notice_countries.nc_notice_id' ) |
| 221 | ->where( [ |
| 222 | 'notices.not_geo' => 1, |
| 223 | 'notices.not_id' => array_keys( $choices ) |
| 224 | ] ) |
| 225 | ->caller( __METHOD__ ) |
| 226 | ->fetchResultSet(); |
| 227 | |
| 228 | // Add countries to our data structure. |
| 229 | // Note that PHP creates an empty array for countries as needed. |
| 230 | foreach ( $dbRows as $dbRow ) { |
| 231 | $choices[$dbRow->not_id]['countries'][] = $dbRow->nc_country; |
| 232 | } |
| 233 | |
| 234 | // Fetch regions. |
| 235 | // We have to eliminate notices that are not geotargeted, since they |
| 236 | // may have residual data in the cn_notice_regions table. |
| 237 | $dbRows = $dbr->newSelectQueryBuilder() |
| 238 | ->select( [ |
| 239 | 'notices.not_id', |
| 240 | 'notice_regions.nr_region' |
| 241 | ] ) |
| 242 | ->from( 'cn_notices', 'notices' ) |
| 243 | ->join( 'cn_notice_regions', 'notice_regions', 'notices.not_id = notice_regions.nr_notice_id' ) |
| 244 | ->where( [ |
| 245 | 'notices.not_geo' => 1, |
| 246 | 'notices.not_id' => array_keys( $choices ) |
| 247 | ] ) |
| 248 | ->caller( __METHOD__ ) |
| 249 | ->fetchResultSet(); |
| 250 | |
| 251 | // Add regions to our data structure. |
| 252 | // Note that PHP creates an empty array for regions as needed. |
| 253 | foreach ( $dbRows as $dbRow ) { |
| 254 | $choices[$dbRow->not_id]['regions'][] = $dbRow->nr_region; |
| 255 | } |
| 256 | |
| 257 | // FIXME: looks like this is only sorting the last banner's list! |
| 258 | // @phan-suppress-next-line PhanPossiblyUndeclaredVariable |
| 259 | if ( isset( $choices[$dbRow->not_id]['countries'] ) ) { |
| 260 | // @phan-suppress-next-line PhanPossiblyUndeclaredVariable |
| 261 | sort( $choices[$dbRow->not_id]['countries'] ); |
| 262 | } |
| 263 | |
| 264 | // Add campaign-associated mixins to the data structure |
| 265 | foreach ( $choices as &$campaignInfo ) { |
| 266 | // Get info for enabled mixins for this campaign |
| 267 | $campaignInfo['mixins'] = |
| 268 | Campaign::getCampaignMixins( $campaignInfo['name'], true ); |
| 269 | } |
| 270 | |
| 271 | // Fetch the devices |
| 272 | $dbRows = $dbr->newSelectQueryBuilder() |
| 273 | ->select( [ |
| 274 | 'template_devices.tmp_id', |
| 275 | 'known_devices.dev_name' |
| 276 | ] ) |
| 277 | ->from( 'cn_template_devices', 'template_devices' ) |
| 278 | ->join( 'cn_known_devices', 'known_devices', 'template_devices.dev_id = known_devices.dev_id' ) |
| 279 | ->where( [ |
| 280 | 'template_devices.tmp_id' => $bannerIds |
| 281 | ] ) |
| 282 | ->caller( __METHOD__ ) |
| 283 | ->fetchResultSet(); |
| 284 | |
| 285 | // Add devices to the data structure. |
| 286 | foreach ( $dbRows as $dbRow ) { |
| 287 | $bannerId = $dbRow->tmp_id; |
| 288 | |
| 289 | // Traverse the data structure to add in devices |
| 290 | |
| 291 | $assignmentKeysByCampaignId = |
| 292 | $assignmentKeysByBannerIdAndCampaignId[$bannerId]; |
| 293 | |
| 294 | foreach ( $assignmentKeysByCampaignId |
| 295 | as $campaignId => $assignmentKeys |
| 296 | ) { |
| 297 | foreach ( $assignmentKeys as $assignmentKey ) { |
| 298 | $choices[$campaignId]['banners'][$assignmentKey]['devices'][] = |
| 299 | $dbRow->dev_name; |
| 300 | } |
| 301 | |
| 302 | // Ensure consistent ordering (see comment below) |
| 303 | // FIXME: only sorting list for last assignmentKey? |
| 304 | // @phan-suppress-next-line PhanTypeArraySuspiciousNullable,PhanPossiblyUndeclaredVariable |
| 305 | sort( $choices[$campaignId]['banners'][$assignmentKey]['devices'] ); |
| 306 | } |
| 307 | } |
| 308 | |
| 309 | // Make arrays that are associative into plain indexed ones, since the |
| 310 | // keys aren't used by the clients. |
| 311 | // Also make very sure we don't have duplicate devices or countries. |
| 312 | // Finally, ensure consistent ordering, since it's needed for |
| 313 | // CNChoiceDataResourceLoaderModule for consistent RL module hashes. |
| 314 | |
| 315 | $choices = array_values( $choices ); |
| 316 | |
| 317 | $uniqueDevFn = static function ( $b ) { |
| 318 | $b['devices'] = array_unique( $b['devices'] ); |
| 319 | return $b; |
| 320 | }; |
| 321 | |
| 322 | $compareNames = static function ( $a, $b ) { |
| 323 | if ( $a['name'] == $b['name'] ) { |
| 324 | return 0; |
| 325 | } |
| 326 | return ( $a['name'] < $b['name'] ) ? -1 : 1; |
| 327 | }; |
| 328 | |
| 329 | $fixCampaignPropsFn = static function ( $c ) use ( $uniqueDevFn, $compareNames ) { |
| 330 | $c['banners'] = array_map( $uniqueDevFn, array_values( $c['banners'] ) ); |
| 331 | usort( $c['banners'], $compareNames ); |
| 332 | |
| 333 | if ( $c['geotargeted'] ) { |
| 334 | $c['countries'] = array_unique( $c['countries'] ?? [] ); |
| 335 | sort( $c['countries'] ); |
| 336 | $c['regions'] = array_unique( $c['regions'] ?? [] ); |
| 337 | sort( $c['regions'] ); |
| 338 | } |
| 339 | |
| 340 | return $c; |
| 341 | }; |
| 342 | |
| 343 | $choices = array_map( $fixCampaignPropsFn, $choices ); |
| 344 | usort( $choices, $compareNames ); |
| 345 | |
| 346 | return $choices; |
| 347 | } |
| 348 | } |