Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
0.00% |
0 / 124 |
|
0.00% |
0 / 4 |
CRAP | |
0.00% |
0 / 1 |
ExportWelcomeSurveyMailingListData | |
0.00% |
0 / 118 |
|
0.00% |
0 / 4 |
506 | |
0.00% |
0 / 1 |
__construct | |
0.00% |
0 / 23 |
|
0.00% |
0 / 1 |
2 | |||
execute | |
0.00% |
0 / 84 |
|
0.00% |
0 / 1 |
306 | |||
getLastUserIdBeforeRegistrationDate | |
0.00% |
0 / 8 |
|
0.00% |
0 / 1 |
6 | |||
writeToHandle | |
0.00% |
0 / 3 |
|
0.00% |
0 / 1 |
6 |
1 | <?php |
2 | |
3 | namespace GrowthExperiments\Maintenance; |
4 | |
5 | use FormatJson; |
6 | use GrowthExperiments\HomepageHooks; |
7 | use GrowthExperiments\WelcomeSurvey; |
8 | use Maintenance; |
9 | use MediaWiki\MediaWikiServices; |
10 | use MediaWiki\User\UserIdentityValue; |
11 | use Wikimedia\Rdbms\IDatabase; |
12 | use Wikimedia\Rdbms\SelectQueryBuilder; |
13 | |
14 | $IP = getenv( 'MW_INSTALL_PATH' ); |
15 | if ( $IP === false ) { |
16 | $IP = __DIR__ . '/../../..'; |
17 | } |
18 | require_once "$IP/maintenance/Maintenance.php"; |
19 | |
20 | /** |
21 | * One-off script to export data from the welcome survey for users who opt-in to mailing list. |
22 | */ |
23 | class ExportWelcomeSurveyMailingListData extends Maintenance { |
24 | |
25 | /** @var string */ |
26 | private $outputFormat = 'text'; |
27 | /** @var resource */ |
28 | private $handle; |
29 | |
30 | public function __construct() { |
31 | parent::__construct(); |
32 | $this->addOption( 'from', |
33 | 'Export data starting from this registration timestamp, e.g. 20220301000000', false, true ); |
34 | $this->addOption( 'to', |
35 | 'Export date up to this registration timestamp, e.g. 20220316000000', false, true ); |
36 | $this->addOption( |
37 | 'question', |
38 | 'Welcome survey checkbox question to be used', |
39 | true, |
40 | true |
41 | ); |
42 | $this->addOption( |
43 | 'group', |
44 | 'Welcome survey group to process (if omitted, all groups are processed)', |
45 | false, |
46 | true |
47 | ); |
48 | $this->addOption( |
49 | 'output-format', |
50 | 'Output format for the results, "text" or "csv"', |
51 | false |
52 | ); |
53 | $this->addOption( 'debug', 'Show debug output' ); |
54 | } |
55 | |
56 | public function execute() { |
57 | $dbr = $this->getDB( DB_REPLICA ); |
58 | |
59 | $from = wfTimestampOrNull( TS_MW, $this->getOption( 'from' ) ); |
60 | $to = wfTimestampOrNull( TS_MW, $this->getOption( 'to' ) ); |
61 | if ( !$from || !$to ) { |
62 | $this->fatalError( "--from and --to have to be provided and be valid timestamps" . PHP_EOL ); |
63 | } |
64 | $this->outputFormat = $this->getOption( 'output-format', 'text' ); |
65 | if ( !in_array( $this->outputFormat, [ 'text', 'csv' ] ) ) { |
66 | $this->fatalError( "--output-format must be one of 'text' or 'csv'" ); |
67 | } |
68 | |
69 | $services = MediaWikiServices::getInstance(); |
70 | $fromId = $this->getLastUserIdBeforeRegistrationDate( $dbr, $from ); |
71 | $toId = $this->getLastUserIdBeforeRegistrationDate( $dbr, $to ); |
72 | if ( $this->hasOption( 'debug' ) ) { |
73 | $this->error( "Converting registration timestamps:" ); |
74 | foreach ( [ |
75 | [ 'From (exclusive)', $from, $fromId ], |
76 | [ 'To (inclusive)', $to, $toId ] |
77 | ] as [ $dir, $ts, $id ] ) { |
78 | $text = 'any'; |
79 | if ( $id ) { |
80 | $registered = $services->getUserFactory()->newFromId( $id )->getRegistration(); |
81 | $text = "UID $id (registered: $registered)"; |
82 | } |
83 | $this->error( "\t$dir: $ts -> $text" ); |
84 | } |
85 | } |
86 | if ( $fromId === $toId ) { |
87 | // There weren't any users between those two timestamps. |
88 | return; |
89 | } |
90 | |
91 | $queryBuilderTemplate = $dbr->newSelectQueryBuilder() |
92 | ->table( 'user' ) |
93 | ->join( 'user_properties', 'survey_prop', [ |
94 | 'user_id = survey_prop.up_user', |
95 | 'survey_prop.up_property' => WelcomeSurvey::SURVEY_PROP, |
96 | ] ) |
97 | ->leftJoin( 'user_properties', 'homepage_prop', [ |
98 | 'user_id = homepage_prop.up_user', |
99 | 'homepage_prop.up_property' => HomepageHooks::HOMEPAGE_PREF_ENABLE, |
100 | ] ) |
101 | ->fields( [ |
102 | 'user_id', |
103 | 'user_name', |
104 | 'user_registration', |
105 | 'user_email', |
106 | 'user_email_authenticated', |
107 | 'survey_data' => 'survey_prop.up_value', |
108 | 'homepage_enabled' => 'homepage_prop.up_value', |
109 | ] ) |
110 | // need to order by ID so we can use ID ranges for query continuation |
111 | ->orderBy( 'user_id', SelectQueryBuilder::SORT_ASC ) |
112 | ->limit( $this->getBatchSize() ) |
113 | ->caller( __METHOD__ ); |
114 | if ( $toId ) { |
115 | $queryBuilderTemplate->where( $dbr->expr( 'user_id', '<=', $toId ) ); |
116 | } |
117 | |
118 | $userOptionsLookup = $services->getUserOptionsLookup(); |
119 | $this->handle = fopen( 'php://output', 'w' ); |
120 | $headers = [ 'Email Address', 'Opt-in date', 'Group', 'User ID', 'Is email address confirmed' ]; |
121 | $this->writeToHandle( $headers ); |
122 | |
123 | $question = $this->getOption( 'question' ); |
124 | $group = $this->getOption( 'group' ); |
125 | do { |
126 | $queryBuilder = clone $queryBuilderTemplate; |
127 | $queryBuilder->andWhere( $dbr->expr( 'user_id', '>', $fromId ?? 0 ) ); |
128 | $result = $queryBuilder->fetchResultSet(); |
129 | foreach ( $result as $row ) { |
130 | $fromId = $row->user_id; |
131 | $homepageEnabled = (bool)( |
132 | $row->homepage_enabled ?? |
133 | $userOptionsLookup->getDefaultOption( |
134 | HomepageHooks::HOMEPAGE_PREF_ENABLE, |
135 | new UserIdentityValue( $row->user_id, $row->user_name ) |
136 | ) |
137 | ); |
138 | $welcomeSurveyResponse = FormatJson::decode( (string)$row->survey_data, true ); |
139 | // We only want to export survey responses in the T303240_mailinglist group, |
140 | // see https://gerrit.wikimedia.org/r/c/operations/mediawiki-config/+/775951 |
141 | // and only when the user gets the Growth features |
142 | if ( !( |
143 | $homepageEnabled && |
144 | $welcomeSurveyResponse && |
145 | ( $group === null || $welcomeSurveyResponse['_group'] === $group ) |
146 | ) ) { |
147 | continue; |
148 | } |
149 | if ( isset( $welcomeSurveyResponse[$question] ) && $welcomeSurveyResponse[$question] ) { |
150 | // user_email_authenticated is the timestamp of when the email was confirmed; we want a 1 or 0 |
151 | // to indicate if the email is confirmed. |
152 | $outputData = [ |
153 | $row->user_email, |
154 | wfTimestamp( TS_MW, $row->user_registration ), |
155 | $welcomeSurveyResponse['_group'], |
156 | $row->user_id, |
157 | $row->user_email_authenticated ? 1 : 0 |
158 | ]; |
159 | } else { |
160 | continue; |
161 | } |
162 | $this->writeToHandle( $outputData ); |
163 | } |
164 | } while ( $result->numRows() ); |
165 | } |
166 | |
167 | /** |
168 | * Given a registration date, return the ID of the user who last registered before that date. |
169 | * @param IDatabase $dbr |
170 | * @param string $registrationDate |
171 | * @return int|null |
172 | */ |
173 | private function getLastUserIdBeforeRegistrationDate( IDatabase $dbr, string $registrationDate ): ?int { |
174 | $res = $dbr->newSelectQueryBuilder() |
175 | ->select( [ 'user_id' => 'max(user_id)' ] ) |
176 | ->from( 'user' ) |
177 | // Old user records have no registration date. We won't use 'from' dates old enough |
178 | // to encounter those so we can ignore them here. |
179 | ->where( $dbr->expr( 'user_registration', '<=', $registrationDate ) ) |
180 | ->fetchField(); |
181 | return is_numeric( $res ) ? intval( $res ) : null; |
182 | } |
183 | |
184 | /** |
185 | * @param array $data |
186 | * @return void |
187 | */ |
188 | private function writeToHandle( array $data ): void { |
189 | if ( $this->outputFormat === 'text' ) { |
190 | fputs( $this->handle, implode( "\t", $data ) . PHP_EOL ); |
191 | } else { |
192 | fputcsv( $this->handle, $data ); |
193 | } |
194 | } |
195 | } |
196 | |
197 | $maintClass = ExportWelcomeSurveyMailingListData::class; |
198 | require_once RUN_MAINTENANCE_IF_MAIN; |