Code Coverage
 
Lines
Functions and Methods
Classes and Traits
Total
0.00% covered (danger)
0.00%
0 / 124
0.00% covered (danger)
0.00%
0 / 4
CRAP
0.00% covered (danger)
0.00%
0 / 1
ExportWelcomeSurveyMailingListData
0.00% covered (danger)
0.00%
0 / 118
0.00% covered (danger)
0.00%
0 / 4
506
0.00% covered (danger)
0.00%
0 / 1
 __construct
0.00% covered (danger)
0.00%
0 / 23
0.00% covered (danger)
0.00%
0 / 1
2
 execute
0.00% covered (danger)
0.00%
0 / 84
0.00% covered (danger)
0.00%
0 / 1
306
 getLastUserIdBeforeRegistrationDate
0.00% covered (danger)
0.00%
0 / 8
0.00% covered (danger)
0.00%
0 / 1
6
 writeToHandle
0.00% covered (danger)
0.00%
0 / 3
0.00% covered (danger)
0.00%
0 / 1
6
1<?php
2
3namespace GrowthExperiments\Maintenance;
4
5use FormatJson;
6use GrowthExperiments\HomepageHooks;
7use GrowthExperiments\WelcomeSurvey;
8use Maintenance;
9use MediaWiki\MediaWikiServices;
10use MediaWiki\User\UserIdentityValue;
11use Wikimedia\Rdbms\IDatabase;
12use Wikimedia\Rdbms\SelectQueryBuilder;
13
14$IP = getenv( 'MW_INSTALL_PATH' );
15if ( $IP === false ) {
16    $IP = __DIR__ . '/../../..';
17}
18require_once "$IP/maintenance/Maintenance.php";
19
20/**
21 * One-off script to export data from the welcome survey for users who opt-in to mailing list.
22 */
23class ExportWelcomeSurveyMailingListData extends Maintenance {
24
25    /** @var string */
26    private $outputFormat = 'text';
27    /** @var resource */
28    private $handle;
29
30    public function __construct() {
31        parent::__construct();
32        $this->addOption( 'from',
33            'Export data starting from this registration timestamp, e.g. 20220301000000', false, true );
34        $this->addOption( 'to',
35            'Export date up to this registration timestamp, e.g. 20220316000000', false, true );
36        $this->addOption(
37            'question',
38            'Welcome survey checkbox question to be used',
39            true,
40            true
41        );
42        $this->addOption(
43            'group',
44            'Welcome survey group to process (if omitted, all groups are processed)',
45            false,
46            true
47        );
48        $this->addOption(
49            'output-format',
50            'Output format for the results, "text" or "csv"',
51            false
52        );
53        $this->addOption( 'debug', 'Show debug output' );
54    }
55
56    public function execute() {
57        $dbr = $this->getDB( DB_REPLICA );
58
59        $from = wfTimestampOrNull( TS_MW, $this->getOption( 'from' ) );
60        $to = wfTimestampOrNull( TS_MW, $this->getOption( 'to' ) );
61        if ( !$from || !$to ) {
62            $this->fatalError( "--from and --to have to be provided and be valid timestamps" . PHP_EOL );
63        }
64        $this->outputFormat = $this->getOption( 'output-format', 'text' );
65        if ( !in_array( $this->outputFormat, [ 'text', 'csv' ] ) ) {
66            $this->fatalError( "--output-format must be one of 'text' or 'csv'" );
67        }
68
69        $services = MediaWikiServices::getInstance();
70        $fromId = $this->getLastUserIdBeforeRegistrationDate( $dbr, $from );
71        $toId = $this->getLastUserIdBeforeRegistrationDate( $dbr, $to );
72        if ( $this->hasOption( 'debug' ) ) {
73            $this->error( "Converting registration timestamps:" );
74            foreach ( [
75                [ 'From (exclusive)', $from, $fromId ],
76                [ 'To (inclusive)', $to, $toId ]
77            ] as [ $dir, $ts, $id ] ) {
78                $text = 'any';
79                if ( $id ) {
80                    $registered = $services->getUserFactory()->newFromId( $id )->getRegistration();
81                    $text = "UID $id (registered: $registered)";
82                }
83                $this->error( "\t$dir$ts -> $text" );
84            }
85        }
86        if ( $fromId === $toId ) {
87            // There weren't any users between those two timestamps.
88            return;
89        }
90
91        $queryBuilderTemplate = $dbr->newSelectQueryBuilder()
92            ->table( 'user' )
93            ->join( 'user_properties', 'survey_prop', [
94                'user_id = survey_prop.up_user',
95                'survey_prop.up_property' => WelcomeSurvey::SURVEY_PROP,
96            ] )
97            ->leftJoin( 'user_properties', 'homepage_prop', [
98                'user_id = homepage_prop.up_user',
99                'homepage_prop.up_property' => HomepageHooks::HOMEPAGE_PREF_ENABLE,
100            ] )
101            ->fields( [
102                'user_id',
103                'user_name',
104                'user_registration',
105                'user_email',
106                'user_email_authenticated',
107                'survey_data' => 'survey_prop.up_value',
108                'homepage_enabled' => 'homepage_prop.up_value',
109            ] )
110            // need to order by ID so we can use ID ranges for query continuation
111            ->orderBy( 'user_id', SelectQueryBuilder::SORT_ASC )
112            ->limit( $this->getBatchSize() )
113            ->caller( __METHOD__ );
114        if ( $toId ) {
115            $queryBuilderTemplate->where( $dbr->expr( 'user_id', '<=', $toId ) );
116        }
117
118        $userOptionsLookup = $services->getUserOptionsLookup();
119        $this->handle = fopen( 'php://output', 'w' );
120        $headers = [ 'Email Address', 'Opt-in date', 'Group', 'User ID', 'Is email address confirmed' ];
121        $this->writeToHandle( $headers );
122
123        $question = $this->getOption( 'question' );
124        $group = $this->getOption( 'group' );
125        do {
126            $queryBuilder = clone $queryBuilderTemplate;
127            $queryBuilder->andWhere( $dbr->expr( 'user_id', '>', $fromId ?? 0 ) );
128            $result = $queryBuilder->fetchResultSet();
129            foreach ( $result as $row ) {
130                $fromId = $row->user_id;
131                $homepageEnabled = (bool)(
132                    $row->homepage_enabled ??
133                    $userOptionsLookup->getDefaultOption(
134                        HomepageHooks::HOMEPAGE_PREF_ENABLE,
135                        new UserIdentityValue( $row->user_id, $row->user_name )
136                    )
137                );
138                $welcomeSurveyResponse = FormatJson::decode( (string)$row->survey_data, true );
139                // We only want to export survey responses in the T303240_mailinglist group,
140                // see https://gerrit.wikimedia.org/r/c/operations/mediawiki-config/+/775951
141                // and only when the user gets the Growth features
142                if ( !(
143                    $homepageEnabled &&
144                    $welcomeSurveyResponse &&
145                    ( $group === null || $welcomeSurveyResponse['_group'] === $group )
146                ) ) {
147                    continue;
148                }
149                if ( isset( $welcomeSurveyResponse[$question] ) && $welcomeSurveyResponse[$question] ) {
150                    // user_email_authenticated is the timestamp of when the email was confirmed; we want a 1 or 0
151                    // to indicate if the email is confirmed.
152                    $outputData = [
153                        $row->user_email,
154                        wfTimestamp( TS_MW, $row->user_registration ),
155                        $welcomeSurveyResponse['_group'],
156                        $row->user_id,
157                        $row->user_email_authenticated ? 1 : 0
158                    ];
159                } else {
160                    continue;
161                }
162                $this->writeToHandle( $outputData );
163            }
164        } while ( $result->numRows() );
165    }
166
167    /**
168     * Given a registration date, return the ID of the user who last registered before that date.
169     * @param IDatabase $dbr
170     * @param string $registrationDate
171     * @return int|null
172     */
173    private function getLastUserIdBeforeRegistrationDate( IDatabase $dbr, string $registrationDate ): ?int {
174        $res = $dbr->newSelectQueryBuilder()
175            ->select( [ 'user_id' => 'max(user_id)' ] )
176            ->from( 'user' )
177            // Old user records have no registration date. We won't use 'from' dates old enough
178            // to encounter those so we can ignore them here.
179            ->where( $dbr->expr( 'user_registration', '<=', $registrationDate ) )
180            ->fetchField();
181        return is_numeric( $res ) ? intval( $res ) : null;
182    }
183
184    /**
185     * @param array $data
186     * @return void
187     */
188    private function writeToHandle( array $data ): void {
189        if ( $this->outputFormat === 'text' ) {
190            fputs( $this->handle, implode( "\t", $data ) . PHP_EOL );
191        } else {
192            fputcsv( $this->handle, $data );
193        }
194    }
195}
196
197$maintClass = ExportWelcomeSurveyMailingListData::class;
198require_once RUN_MAINTENANCE_IF_MAIN;