Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
98.53% |
67 / 68 |
|
83.33% |
5 / 6 |
CRAP | |
0.00% |
0 / 1 |
UserImpactFormatter | |
98.53% |
67 / 68 |
|
83.33% |
5 / 6 |
17 | |
0.00% |
0 / 1 |
__construct | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
format | |
85.71% |
6 / 7 |
|
0.00% |
0 / 1 |
2.01 | |||
sortAndFilter | |
100.00% |
36 / 36 |
|
100.00% |
1 / 1 |
6 | |||
getModifiedDailyArticleViews | |
100.00% |
7 / 7 |
|
100.00% |
1 / 1 |
4 | |||
fillDailyArticleViewsWithPageViewToolsUrl | |
100.00% |
5 / 5 |
|
100.00% |
1 / 1 |
2 | |||
getPageViewToolsUrl | |
100.00% |
12 / 12 |
|
100.00% |
1 / 1 |
2 |
1 | <?php |
2 | |
3 | namespace GrowthExperiments\UserImpact; |
4 | |
5 | use DateTime; |
6 | use Exception; |
7 | use MediaWiki\Utils\MWTimestamp; |
8 | use stdClass; |
9 | |
10 | /** |
11 | * Formats an ExpensiveUserImpact object to be more suitable for frontend use. |
12 | */ |
13 | class UserImpactFormatter { |
14 | |
15 | private const PAGEVIEW_TOOL_BASE_URL = 'https://pageviews.wmcloud.org/'; |
16 | |
17 | private stdClass $AQSConfig; |
18 | |
19 | /** |
20 | * @param stdClass $AQSConfig |
21 | */ |
22 | public function __construct( |
23 | stdClass $AQSConfig |
24 | ) { |
25 | $this->AQSConfig = $AQSConfig; |
26 | } |
27 | |
28 | /** |
29 | * Create a new UserImpactFormatter from a serialized ExpensiveUserImpact. |
30 | * @param array|ExpensiveUserImpact $userImpact |
31 | * @param string $languageCode The requesting user's language code to use in the pageviews url construction. |
32 | * @return array |
33 | * @throws Exception |
34 | */ |
35 | public function format( $userImpact, string $languageCode ): array { |
36 | if ( $userImpact instanceof ExpensiveUserImpact ) { |
37 | $jsonData = $userImpact->jsonSerialize(); |
38 | } else { |
39 | $jsonData = $userImpact; |
40 | } |
41 | $jsonData += $this->sortAndFilter( $jsonData ); |
42 | unset( $jsonData['dailyArticleViews'] ); |
43 | $this->fillDailyArticleViewsWithPageViewToolsUrl( $jsonData, $languageCode ); |
44 | return $jsonData; |
45 | } |
46 | |
47 | /** |
48 | * Calculate the topViewedArticles, topViewedArticlesCount and recentEditsWithoutPageviews |
49 | * fields by sorting and filtering the daily article views data: |
50 | * - Get the top 5 most viewed articles, in descending order of views |
51 | * - Get up to 5 of the most recently edited articles with no page view |
52 | * data available yet, in descending order of recency |
53 | * |
54 | * Note that, in some situations, views/viewsCount information doesn't exist, because this |
55 | * method removed those fields from data, and that data was used as the basis for constructing |
56 | * a new ExpensiveUserImpact object. We can work around that with a few checks for the |
57 | * existence of the properties, before using them. |
58 | * |
59 | * @param array $jsonData |
60 | * @return array |
61 | */ |
62 | public function sortAndFilter( array $jsonData ): array { |
63 | $topViewedArticles = $recentEditsWithoutPageviews = []; |
64 | foreach ( $this->getModifiedDailyArticleViews( $jsonData ) as $title => $data ) { |
65 | $lastDayWithPageViewData = array_key_last( $data['views'] ?? [] ); |
66 | // See if we have pageview data for the page. |
67 | $noPageviewDataYet = $data['firstEditDate'] > $lastDayWithPageViewData |
68 | // The last day actually might or might not have data (T217286) so allow equality |
69 | // if there's no data for that day. (We can't differentiate between no data and |
70 | // legitimately 0 views, but it's not really possible to edit a page and not |
71 | // generate any pageviews.) |
72 | || ( $data['firstEditDate'] === $lastDayWithPageViewData |
73 | && $data['views'][$lastDayWithPageViewData] === 0 ); |
74 | |
75 | if ( $noPageviewDataYet ) { |
76 | $recentEditsWithoutPageviews[$title] = $data; |
77 | } else { |
78 | $topViewedArticles[$title] = $data; |
79 | } |
80 | } |
81 | |
82 | // Order the articles by most views to fewest |
83 | uasort( $topViewedArticles, static function ( $a, $b ) { |
84 | return $b['viewsCount'] <=> $a['viewsCount']; |
85 | } ); |
86 | // Get the top five articles in the list that have page views |
87 | $topViewedArticles = array_slice( array_filter( $topViewedArticles, static function ( $item ) { |
88 | return ( $item['viewsCount'] ?? 0 ) > 0; |
89 | } ), 0, 5, true ); |
90 | |
91 | $topViewedArticlesCount = array_sum( array_column( $topViewedArticles, 'viewsCount' ) ); |
92 | $totalPageviewsCount = array_sum( |
93 | array_column( $this->getModifiedDailyArticleViews( $jsonData ), 'viewsCount' ) |
94 | ); |
95 | |
96 | // Order the articles by date, most recent edit to oldest, and get the most recent 5. |
97 | uasort( $recentEditsWithoutPageviews, static function ( $a, $b ) { |
98 | return $b['newestEdit'] <=> $a['newestEdit']; |
99 | } ); |
100 | $recentEditsWithoutPageviews = array_slice( $recentEditsWithoutPageviews, 0, 5, true ); |
101 | // Remove the 'viewsCount' key - the frontend will show this as still waiting for pageview data. |
102 | // Also unset 'views' as a micro-optimization. |
103 | foreach ( $recentEditsWithoutPageviews as $title => $_ ) { |
104 | unset( $recentEditsWithoutPageviews[$title]['viewsCount'] ); |
105 | unset( $recentEditsWithoutPageviews[$title]['views'] ); |
106 | } |
107 | |
108 | return [ |
109 | 'recentEditsWithoutPageviews' => $recentEditsWithoutPageviews, |
110 | 'topViewedArticles' => $topViewedArticles, |
111 | 'topViewedArticlesCount' => $topViewedArticlesCount, |
112 | 'totalPageviewsCount' => $totalPageviewsCount |
113 | ]; |
114 | } |
115 | |
116 | /** |
117 | * Returns dailyArticleViews field, with views set to 0 on days before the user's first edit |
118 | * to the article, and a 'viewsCount' field added to each title with the sum of the |
119 | * (remaining) views. |
120 | * @param array $jsonData |
121 | * @return array |
122 | */ |
123 | private function getModifiedDailyArticleViews( array $jsonData ): array { |
124 | $dailyArticleViews = $jsonData['dailyArticleViews']; |
125 | foreach ( $dailyArticleViews as $title => $data ) { |
126 | foreach ( $data['views'] ?? [] as $date => $dailyViews ) { |
127 | if ( $date < $data['firstEditDate'] ) { |
128 | // Note this is unreliable for established users, as we look at the user's |
129 | // last 1000 edits to determine firstEditDate. We ignore that issue here. |
130 | $dailyArticleViews[$title]['views'][$date] = 0; |
131 | } |
132 | } |
133 | $dailyArticleViews[$title]['viewsCount'] = array_sum( $dailyArticleViews[$title]['views'] ?? [] ); |
134 | } |
135 | return $dailyArticleViews; |
136 | } |
137 | |
138 | /** |
139 | * @param array &$jsonData |
140 | * @param string $languageCode The requesting user's language code to use in the pageviews url |
141 | * @return void |
142 | * @throws Exception |
143 | */ |
144 | private function fillDailyArticleViewsWithPageViewToolsUrl( |
145 | array &$jsonData, |
146 | string $languageCode |
147 | ): void { |
148 | foreach ( $jsonData['topViewedArticles'] as $title => $articleData ) { |
149 | $latestPageViewDate = array_key_last( $articleData['views'] ); |
150 | $jsonData['topViewedArticles'][$title]['pageviewsUrl'] = $this->getPageViewToolsUrl( |
151 | $title, $articleData['firstEditDate'], $latestPageViewDate, $languageCode |
152 | ); |
153 | } |
154 | } |
155 | |
156 | /** |
157 | * @param string $title |
158 | * @param string $firstEditDate Date of the first edit to the article in Y-m-d format. |
159 | * @param string $latestPageViewDate Date of the most last page view data entry available for this article. |
160 | * Used for constructing the 'end' parameter for the URL, to avoid confusion with timezones and what "latest" |
161 | * means in the context of the pageviews application and Analytics Query Service. |
162 | * @param string $languageCode The requesting user's language code to use in the pageviews url |
163 | * @return string Full URL for the PageViews tool for the given title and start date |
164 | * @throws Exception |
165 | */ |
166 | private function getPageViewToolsUrl( |
167 | string $title, |
168 | string $firstEditDate, |
169 | string $latestPageViewDate, |
170 | string $languageCode |
171 | ): string { |
172 | $daysAgo = ComputedUserImpactLookup::PAGEVIEW_DAYS; |
173 | $dtiAgo = new DateTime( '@' . strtotime( "-$daysAgo days", MWTimestamp::time() ) ); |
174 | $startDate = $dtiAgo->format( 'Y-m-d' ); |
175 | if ( $firstEditDate > $startDate ) { |
176 | $startDate = $firstEditDate; |
177 | } |
178 | return wfAppendQuery( self::PAGEVIEW_TOOL_BASE_URL, [ |
179 | 'project' => $this->AQSConfig->project, |
180 | 'userlang' => $languageCode, |
181 | 'start' => $startDate, |
182 | 'end' => $latestPageViewDate, |
183 | 'pages' => $title, |
184 | ] ); |
185 | } |
186 | |
187 | } |