Code Coverage
 
Lines
Functions and Methods
Classes and Traits
Total
98.53% covered (success)
98.53%
67 / 68
83.33% covered (warning)
83.33%
5 / 6
CRAP
0.00% covered (danger)
0.00%
0 / 1
UserImpactFormatter
98.53% covered (success)
98.53%
67 / 68
83.33% covered (warning)
83.33%
5 / 6
17
0.00% covered (danger)
0.00%
0 / 1
 __construct
100.00% covered (success)
100.00%
1 / 1
100.00% covered (success)
100.00%
1 / 1
1
 format
85.71% covered (warning)
85.71%
6 / 7
0.00% covered (danger)
0.00%
0 / 1
2.01
 sortAndFilter
100.00% covered (success)
100.00%
36 / 36
100.00% covered (success)
100.00%
1 / 1
6
 getModifiedDailyArticleViews
100.00% covered (success)
100.00%
7 / 7
100.00% covered (success)
100.00%
1 / 1
4
 fillDailyArticleViewsWithPageViewToolsUrl
100.00% covered (success)
100.00%
5 / 5
100.00% covered (success)
100.00%
1 / 1
2
 getPageViewToolsUrl
100.00% covered (success)
100.00%
12 / 12
100.00% covered (success)
100.00%
1 / 1
2
1<?php
2
3namespace GrowthExperiments\UserImpact;
4
5use DateTime;
6use Exception;
7use MediaWiki\Utils\MWTimestamp;
8use stdClass;
9
10/**
11 * Formats an ExpensiveUserImpact object to be more suitable for frontend use.
12 */
13class UserImpactFormatter {
14
15    private const PAGEVIEW_TOOL_BASE_URL = 'https://pageviews.wmcloud.org/';
16
17    private stdClass $AQSConfig;
18
19    /**
20     * @param stdClass $AQSConfig
21     */
22    public function __construct(
23        stdClass $AQSConfig
24    ) {
25        $this->AQSConfig = $AQSConfig;
26    }
27
28    /**
29     * Create a new UserImpactFormatter from a serialized ExpensiveUserImpact.
30     * @param array|ExpensiveUserImpact $userImpact
31     * @param string $languageCode The requesting user's language code to use in the pageviews url construction.
32     * @return array
33     * @throws Exception
34     */
35    public function format( $userImpact, string $languageCode ): array {
36        if ( $userImpact instanceof ExpensiveUserImpact ) {
37            $jsonData = $userImpact->jsonSerialize();
38        } else {
39            $jsonData = $userImpact;
40        }
41        $jsonData += $this->sortAndFilter( $jsonData );
42        unset( $jsonData['dailyArticleViews'] );
43        $this->fillDailyArticleViewsWithPageViewToolsUrl( $jsonData, $languageCode );
44        return $jsonData;
45    }
46
47    /**
48     * Calculate the topViewedArticles, topViewedArticlesCount and recentEditsWithoutPageviews
49     * fields by sorting and filtering the daily article views data:
50     * - Get the top 5 most viewed articles, in descending order of views
51     * - Get up to 5 of the most recently edited articles with no page view
52     *   data available yet, in descending order of recency
53     *
54     * Note that, in some situations, views/viewsCount information doesn't exist, because this
55     * method removed those fields from data, and that data was used as the basis for constructing
56     * a new ExpensiveUserImpact object. We can work around that with a few checks for the
57     * existence of the properties, before using them.
58     *
59     * @param array $jsonData
60     * @return array
61     */
62    public function sortAndFilter( array $jsonData ): array {
63        $topViewedArticles = $recentEditsWithoutPageviews = [];
64        foreach ( $this->getModifiedDailyArticleViews( $jsonData ) as $title => $data ) {
65            $lastDayWithPageViewData = array_key_last( $data['views'] ?? [] );
66            // See if we have pageview data for the page.
67            $noPageviewDataYet = $data['firstEditDate'] > $lastDayWithPageViewData
68                // The last day actually might or might not have data (T217286) so allow equality
69                // if there's no data for that day. (We can't differentiate between no data and
70                // legitimately 0 views, but it's not really possible to edit a page and not
71                // generate any pageviews.)
72                || ( $data['firstEditDate'] === $lastDayWithPageViewData
73                    && $data['views'][$lastDayWithPageViewData] === 0 );
74
75            if ( $noPageviewDataYet ) {
76                $recentEditsWithoutPageviews[$title] = $data;
77            } else {
78                $topViewedArticles[$title] = $data;
79            }
80        }
81
82        // Order the articles by most views to fewest
83        uasort( $topViewedArticles, static function ( $a, $b ) {
84            return $b['viewsCount'] <=> $a['viewsCount'];
85        } );
86        // Get the top five articles in the list that have page views
87        $topViewedArticles = array_slice( array_filter( $topViewedArticles, static function ( $item ) {
88            return ( $item['viewsCount'] ?? 0 ) > 0;
89        } ), 0, 5, true );
90
91        $topViewedArticlesCount = array_sum( array_column( $topViewedArticles, 'viewsCount' ) );
92        $totalPageviewsCount = array_sum(
93            array_column( $this->getModifiedDailyArticleViews( $jsonData ), 'viewsCount' )
94        );
95
96        // Order the articles by date, most recent edit to oldest, and get the most recent 5.
97        uasort( $recentEditsWithoutPageviews, static function ( $a, $b ) {
98            return $b['newestEdit'] <=> $a['newestEdit'];
99        } );
100        $recentEditsWithoutPageviews = array_slice( $recentEditsWithoutPageviews, 0, 5, true );
101        // Remove the 'viewsCount' key - the frontend will show this as still waiting for pageview data.
102        // Also unset 'views' as a micro-optimization.
103        foreach ( $recentEditsWithoutPageviews as $title => $_ ) {
104            unset( $recentEditsWithoutPageviews[$title]['viewsCount'] );
105            unset( $recentEditsWithoutPageviews[$title]['views'] );
106        }
107
108        return [
109            'recentEditsWithoutPageviews' => $recentEditsWithoutPageviews,
110            'topViewedArticles' => $topViewedArticles,
111            'topViewedArticlesCount' => $topViewedArticlesCount,
112            'totalPageviewsCount' => $totalPageviewsCount
113        ];
114    }
115
116    /**
117     * Returns dailyArticleViews field, with views set to 0 on days before the user's first edit
118     * to the article, and a 'viewsCount' field added to each title with the sum of the
119     * (remaining) views.
120     * @param array $jsonData
121     * @return array
122     */
123    private function getModifiedDailyArticleViews( array $jsonData ): array {
124        $dailyArticleViews = $jsonData['dailyArticleViews'];
125        foreach ( $dailyArticleViews as $title => $data ) {
126            foreach ( $data['views'] ?? [] as $date => $dailyViews ) {
127                if ( $date < $data['firstEditDate'] ) {
128                    // Note this is unreliable for established users, as we look at the user's
129                    // last 1000 edits to determine firstEditDate. We ignore that issue here.
130                    $dailyArticleViews[$title]['views'][$date] = 0;
131                }
132            }
133            $dailyArticleViews[$title]['viewsCount'] = array_sum( $dailyArticleViews[$title]['views'] ?? [] );
134        }
135        return $dailyArticleViews;
136    }
137
138    /**
139     * @param array &$jsonData
140     * @param string $languageCode The requesting user's language code to use in the pageviews url
141     * @return void
142     * @throws Exception
143     */
144    private function fillDailyArticleViewsWithPageViewToolsUrl(
145        array &$jsonData,
146        string $languageCode
147    ): void {
148        foreach ( $jsonData['topViewedArticles'] as $title => $articleData ) {
149            $latestPageViewDate = array_key_last( $articleData['views'] );
150            $jsonData['topViewedArticles'][$title]['pageviewsUrl'] = $this->getPageViewToolsUrl(
151                $title, $articleData['firstEditDate'], $latestPageViewDate, $languageCode
152            );
153        }
154    }
155
156    /**
157     * @param string $title
158     * @param string $firstEditDate Date of the first edit to the article in Y-m-d format.
159     * @param string $latestPageViewDate Date of the most last page view data entry available for this article.
160     *   Used for constructing the 'end' parameter for the URL, to avoid confusion with timezones and what "latest"
161     *   means in the context of the pageviews application and Analytics Query Service.
162     * @param string $languageCode The requesting user's language code to use in the pageviews url
163     * @return string Full URL for the PageViews tool for the given title and start date
164     * @throws Exception
165     */
166    private function getPageViewToolsUrl(
167        string $title,
168        string $firstEditDate,
169        string $latestPageViewDate,
170        string $languageCode
171    ): string {
172        $daysAgo = ComputedUserImpactLookup::PAGEVIEW_DAYS;
173        $dtiAgo = new DateTime( '@' . strtotime( "-$daysAgo days", MWTimestamp::time() ) );
174        $startDate = $dtiAgo->format( 'Y-m-d' );
175        if ( $firstEditDate > $startDate ) {
176            $startDate = $firstEditDate;
177        }
178        return wfAppendQuery( self::PAGEVIEW_TOOL_BASE_URL, [
179            'project' => $this->AQSConfig->project,
180            'userlang' => $languageCode,
181            'start' => $startDate,
182            'end' => $latestPageViewDate,
183            'pages' => $title,
184        ] );
185    }
186
187}