Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
100.00% |
153 / 153 |
|
100.00% |
8 / 8 |
CRAP | |
100.00% |
1 / 1 |
UserAgentClientHintsManager | |
100.00% |
153 / 153 |
|
100.00% |
8 / 8 |
36 | |
100.00% |
1 / 1 |
__construct | |
100.00% |
6 / 6 |
|
100.00% |
1 / 1 |
1 | |||
insertClientHintValues | |
100.00% |
26 / 26 |
|
100.00% |
1 / 1 |
4 | |||
getMapIdByType | |
100.00% |
7 / 7 |
|
100.00% |
1 / 1 |
5 | |||
insertMappingRows | |
100.00% |
29 / 29 |
|
100.00% |
1 / 1 |
5 | |||
deleteMappingRows | |
100.00% |
20 / 20 |
|
100.00% |
1 / 1 |
4 | |||
deleteOrphanedMapRows | |
100.00% |
31 / 31 |
|
100.00% |
1 / 1 |
5 | |||
isMapRowOrphaned | |
100.00% |
23 / 23 |
|
100.00% |
1 / 1 |
8 | |||
excludeExistingClientHintData | |
100.00% |
11 / 11 |
|
100.00% |
1 / 1 |
4 |
1 | <?php |
2 | |
3 | namespace MediaWiki\CheckUser\Services; |
4 | |
5 | use DatabaseLogEntry; |
6 | use LogicException; |
7 | use MediaWiki\CheckUser\ClientHints\ClientHintsData; |
8 | use MediaWiki\CheckUser\ClientHints\ClientHintsReferenceIds; |
9 | use MediaWiki\Config\ServiceOptions; |
10 | use MediaWiki\Revision\RevisionLookup; |
11 | use Psr\Log\LoggerInterface; |
12 | use StatusValue; |
13 | use Wikimedia\Rdbms\IConnectionProvider; |
14 | use Wikimedia\Rdbms\IDatabase; |
15 | use Wikimedia\Rdbms\IReadableDatabase; |
16 | use Wikimedia\Timestamp\ConvertibleTimestamp; |
17 | |
18 | /** |
19 | * Service to insert and delete user-agent client hint values and their associations with rows in cu_changes, |
20 | * cu_log_event and cu_private_event. |
21 | */ |
22 | class UserAgentClientHintsManager { |
23 | |
24 | public const CONSTRUCTOR_OPTIONS = [ |
25 | 'CUDMaxAge', |
26 | ]; |
27 | |
28 | public const SUPPORTED_TYPES = [ |
29 | 'revision', |
30 | ]; |
31 | |
32 | /** |
33 | * TINYINT references for use in cu_useragent_clienthints_map.uachm_reference_type |
34 | */ |
35 | // Identifier for the cu_changes table |
36 | public const IDENTIFIER_CU_CHANGES = 0; |
37 | // Identifier for the cu_log_event table |
38 | public const IDENTIFIER_CU_LOG_EVENT = 1; |
39 | // Identifier for the cu_private_event table |
40 | public const IDENTIFIER_CU_PRIVATE_EVENT = 2; |
41 | |
42 | public const IDENTIFIER_TO_TABLE_NAME_MAP = [ |
43 | self::IDENTIFIER_CU_CHANGES => 'cu_changes', |
44 | self::IDENTIFIER_CU_LOG_EVENT => 'cu_log_event', |
45 | self::IDENTIFIER_CU_PRIVATE_EVENT => 'cu_private_event', |
46 | ]; |
47 | public const IDENTIFIER_TO_COLUMN_NAME_MAP = [ |
48 | self::IDENTIFIER_CU_CHANGES => 'cuc_this_oldid', |
49 | self::IDENTIFIER_CU_LOG_EVENT => 'cule_log_id', |
50 | self::IDENTIFIER_CU_PRIVATE_EVENT => 'cupe_id', |
51 | ]; |
52 | private IDatabase $dbw; |
53 | private IReadableDatabase $dbr; |
54 | private RevisionLookup $revisionLookup; |
55 | private ServiceOptions $options; |
56 | private LoggerInterface $logger; |
57 | |
58 | /** |
59 | * @param IConnectionProvider $connectionProvider |
60 | * @param RevisionLookup $revisionLookup |
61 | * @param ServiceOptions $options |
62 | * @param LoggerInterface $logger |
63 | */ |
64 | public function __construct( |
65 | IConnectionProvider $connectionProvider, |
66 | RevisionLookup $revisionLookup, |
67 | ServiceOptions $options, |
68 | LoggerInterface $logger |
69 | ) { |
70 | $options->assertRequiredOptions( self::CONSTRUCTOR_OPTIONS ); |
71 | $this->options = $options; |
72 | $this->dbw = $connectionProvider->getPrimaryDatabase(); |
73 | $this->dbr = $connectionProvider->getReplicaDatabase(); |
74 | $this->revisionLookup = $revisionLookup; |
75 | $this->logger = $logger; |
76 | } |
77 | |
78 | /** |
79 | * Given an array of client hint data, a reference ID, and an identifier type, record the data to the |
80 | * cu_useragent_clienthints and cu_useragent_clienthints_map tables. |
81 | * |
82 | * @param ClientHintsData $clientHintsData |
83 | * @param int $referenceId An ID to use in `uachm_reference_id` column in the |
84 | * cu_useragent_clienthints_map table |
85 | * @param string $type The type of event this data is associated with. Valid values are: |
86 | * - revision |
87 | * @param bool $usePrimary If true, use the primary DB for SELECT queries. |
88 | * @return StatusValue |
89 | */ |
90 | public function insertClientHintValues( |
91 | ClientHintsData $clientHintsData, int $referenceId, string $type, bool $usePrimary = false |
92 | ): StatusValue { |
93 | // Check if there are rows to insert to the map table. |
94 | $rows = $clientHintsData->toDatabaseRows(); |
95 | if ( !count( $rows ) ) { |
96 | // Nothing to insert, so return early. |
97 | // Having nothing to insert isn't considered "bad", so return a new good |
98 | // For example, a browser could choose to provide no Client Hints data but |
99 | // still send an empty API request. |
100 | return StatusValue::newGood(); |
101 | } |
102 | |
103 | // Check for existing entry. |
104 | $existingRecord = $this->dbr->newSelectQueryBuilder() |
105 | ->table( 'cu_useragent_clienthints_map' ) |
106 | ->where( [ |
107 | 'uachm_reference_type' => $this->getMapIdByType( $type ), |
108 | 'uachm_reference_id' => $referenceId |
109 | ] ) |
110 | ->caller( __METHOD__ ) |
111 | ->fetchRowCount(); |
112 | if ( $existingRecord ) { |
113 | return StatusValue::newFatal( |
114 | 'checkuser-api-useragent-clienthints-mappings-exist', |
115 | [ $type, $referenceId ] |
116 | ); |
117 | } |
118 | |
119 | $rows = $this->excludeExistingClientHintData( $rows, $usePrimary ); |
120 | |
121 | if ( count( $rows ) ) { |
122 | $this->dbw->newInsertQueryBuilder() |
123 | ->insertInto( 'cu_useragent_clienthints' ) |
124 | ->ignore() |
125 | ->rows( $rows ) |
126 | ->caller( __METHOD__ ) |
127 | ->execute(); |
128 | // We just inserted rows to cu_useragent_clienthints, so |
129 | // use the primary DB for subsequent SELECT queries. |
130 | $usePrimary = true; |
131 | } |
132 | return $this->insertMappingRows( $clientHintsData, $referenceId, $type, $usePrimary ); |
133 | } |
134 | |
135 | /** |
136 | * Given an identifier for the type of event (e.g. 'revision'), return the relevant TINYINT |
137 | * for the table that the database entry for cu_useragent_clienthints_map refers to |
138 | * |
139 | * @param string $type |
140 | * @return int One of self::IDENTIFIER_* constants |
141 | */ |
142 | private function getMapIdByType( string $type ): int { |
143 | switch ( $type ) { |
144 | case 'revision': |
145 | return self::IDENTIFIER_CU_CHANGES; |
146 | case 'log': |
147 | return self::IDENTIFIER_CU_LOG_EVENT; |
148 | case 'privatelog': |
149 | return self::IDENTIFIER_CU_PRIVATE_EVENT; |
150 | default: |
151 | throw new LogicException( "Invalid type $type" ); |
152 | } |
153 | } |
154 | |
155 | /** |
156 | * Insert rows into the cu_useragent_clienthints_map table. |
157 | * |
158 | * This links a foreign ID (e.g. "revision 1234") with client hint data values stored in cu_useragent_clienthints. |
159 | * |
160 | * @param ClientHintsData $clientHintsData |
161 | * @param int $foreignId |
162 | * @param string $type |
163 | * @param bool $usePrimary If true, use the primary DB for SELECT queries. |
164 | * @return StatusValue |
165 | * @see insertClientHintValues, which invokes this method. |
166 | * |
167 | */ |
168 | private function insertMappingRows( |
169 | ClientHintsData $clientHintsData, int $foreignId, string $type, bool $usePrimary = false |
170 | ): StatusValue { |
171 | $rows = $clientHintsData->toDatabaseRows(); |
172 | // We might need primary DB if the call is happening in the context of a server-side hook, |
173 | $db = $usePrimary ? $this->dbw : $this->dbr; |
174 | |
175 | // TINYINT reference to cu_changes, cu_log_event or cu_private_event. |
176 | $idType = $this->getMapIdByType( $type ); |
177 | $mapRows = []; |
178 | foreach ( $rows as $row ) { |
179 | $result = $db->newSelectQueryBuilder() |
180 | ->table( 'cu_useragent_clienthints' ) |
181 | ->field( 'uach_id' ) |
182 | ->where( $row ) |
183 | ->caller( __METHOD__ ) |
184 | ->fetchField(); |
185 | if ( $result !== false ) { |
186 | $mapRows[] = [ |
187 | 'uachm_uach_id' => (int)$result, |
188 | 'uachm_reference_type' => $idType, |
189 | 'uachm_reference_id' => $foreignId, |
190 | ]; |
191 | } else { |
192 | $this->logger->warning( |
193 | "Lookup failed for cu_useragent_clienthints row with name {name} and value {value}.", |
194 | [ $row['uach_name'], $row['uach_value'] ] |
195 | ); |
196 | } |
197 | } |
198 | |
199 | if ( count( $mapRows ) ) { |
200 | $this->dbw->newInsertQueryBuilder() |
201 | ->insertInto( 'cu_useragent_clienthints_map' ) |
202 | ->ignore() |
203 | ->rows( $mapRows ) |
204 | ->caller( __METHOD__ ) |
205 | ->execute(); |
206 | } |
207 | return StatusValue::newGood(); |
208 | } |
209 | |
210 | /** |
211 | * Given reference IDs this method finds and deletes |
212 | * the mapping entries for these reference IDs. |
213 | * |
214 | * @param ClientHintsReferenceIds $clientHintsReferenceIds |
215 | * @return int The number of mapping rows deleted. |
216 | */ |
217 | public function deleteMappingRows( ClientHintsReferenceIds $clientHintsReferenceIds ): int { |
218 | // Keep a track of the number of mapping rows that are deleted. |
219 | $mappingRowsDeleted = 0; |
220 | foreach ( $clientHintsReferenceIds->getReferenceIds() as $mapId => $referenceIds ) { |
221 | if ( !count( $referenceIds ) ) { |
222 | continue; |
223 | } |
224 | // Delete the rows in cu_useragent_clienthints_map associated with these reference IDs |
225 | $this->dbw->newDeleteQueryBuilder() |
226 | ->table( 'cu_useragent_clienthints_map' ) |
227 | ->where( [ |
228 | 'uachm_reference_id' => $referenceIds, |
229 | 'uachm_reference_type' => $mapId |
230 | ] ) |
231 | ->caller( __METHOD__ ) |
232 | ->execute(); |
233 | $mappingRowsDeleted += $this->dbw->affectedRows(); |
234 | } |
235 | if ( !$mappingRowsDeleted ) { |
236 | $this->logger->info( "No mapping rows deleted." ); |
237 | } else { |
238 | $this->logger->debug( |
239 | "Deleted {mapping_rows_deleted} mapping rows.", |
240 | [ 'mapping_rows_deleted' => $mappingRowsDeleted ] |
241 | ); |
242 | } |
243 | return $mappingRowsDeleted; |
244 | } |
245 | |
246 | /** |
247 | * Checks 100 rows with the smallest uachm_reference_id |
248 | * for each uachm_reference_type value to see whether their |
249 | * associated entry referenced by the uachm_reference_id |
250 | * value has been already purged. |
251 | * |
252 | * If it reaches an entry that is not orphaned, the checks are |
253 | * stopped as items with a larger reference ID are unlikely to |
254 | * be orphaned. |
255 | * |
256 | * This catches rows that have been left without deletion |
257 | * due to unforeseen circumstances, as described in T350681. |
258 | * |
259 | * @return int The number of orphaned map rows deleted. |
260 | */ |
261 | public function deleteOrphanedMapRows(): int { |
262 | // Keep a track of the number of mapping rows that are deleted. |
263 | $mappingRowsDeleted = 0; |
264 | foreach ( self::IDENTIFIER_TO_TABLE_NAME_MAP as $mappingId => $table ) { |
265 | // Get 100 rows with the given mapping ID |
266 | $resultSet = $this->dbr->newSelectQueryBuilder() |
267 | ->select( 'uachm_reference_id' ) |
268 | ->table( 'cu_useragent_clienthints_map' ) |
269 | ->where( [ 'uachm_reference_type' => $mappingId ] ) |
270 | ->orderBy( 'uachm_reference_id' ) |
271 | ->groupBy( 'uachm_reference_id' ) |
272 | ->limit( 100 ) |
273 | ->caller( __METHOD__ ) |
274 | ->fetchResultSet(); |
275 | foreach ( $resultSet as $row ) { |
276 | // For each row, check if the ::isMapRowOrphaned method |
277 | // indicates that the row is orphaned. |
278 | $referenceId = $row->uachm_reference_id; |
279 | $mapRowIsOrphaned = $this->isMapRowOrphaned( $referenceId, $mappingId ); |
280 | if ( $mapRowIsOrphaned ) { |
281 | // If the map row is orphaned, then perform the deletion |
282 | // and add the affected rows count to the return count. |
283 | $this->dbw->newDeleteQueryBuilder() |
284 | ->deleteFrom( 'cu_useragent_clienthints_map' ) |
285 | ->where( [ |
286 | 'uachm_reference_id' => $referenceId, |
287 | 'uachm_reference_type' => $mappingId, |
288 | ] ) |
289 | ->caller( __METHOD__ ) |
290 | ->execute(); |
291 | $mappingRowsDeleted += $this->dbw->affectedRows(); |
292 | } else { |
293 | // If the map row is probably not orphaned, then just stop processing |
294 | // the rows in this table. |
295 | break; |
296 | } |
297 | } |
298 | } |
299 | if ( $mappingRowsDeleted ) { |
300 | $this->logger->info( |
301 | "Deleted {mapping_rows_deleted} orphaned mapping rows.", |
302 | [ 'mapping_rows_deleted' => $mappingRowsDeleted ] |
303 | ); |
304 | } |
305 | return $mappingRowsDeleted; |
306 | } |
307 | |
308 | /** |
309 | * Returns whether rows with the given $referenceId and $mappingId |
310 | * in cu_useragent_clienthints_map are likely orphaned. |
311 | * |
312 | * @param int $referenceId |
313 | * @param int $mappingId |
314 | * @return bool |
315 | */ |
316 | private function isMapRowOrphaned( int $referenceId, int $mappingId ): bool { |
317 | if ( !array_key_exists( $mappingId, self::IDENTIFIER_TO_TABLE_NAME_MAP ) ) { |
318 | throw new LogicException( "Unrecognised map ID '$mappingId'" ); |
319 | } |
320 | if ( !in_array( $mappingId, [ self::IDENTIFIER_CU_LOG_EVENT, self::IDENTIFIER_CU_CHANGES ] ) ) { |
321 | // If the mapping ID is not for cu_changes or cu_log_event, |
322 | // query the table directly to check if the associated reference ID |
323 | // exists in the table. |
324 | return !$this->dbr->newSelectQueryBuilder() |
325 | ->field( '1' ) |
326 | ->table( self::IDENTIFIER_TO_TABLE_NAME_MAP[$mappingId] ) |
327 | ->where( [ self::IDENTIFIER_TO_COLUMN_NAME_MAP[$mappingId] => $referenceId ] ) |
328 | ->caller( __METHOD__ ) |
329 | ->fetchField(); |
330 | } |
331 | // If the mapping ID is for cu_changes or cu_log_event, |
332 | // then query the revision table or logging table respectively |
333 | // for the associated timestamp to determine if the map |
334 | // row should have already been deleted. |
335 | $associatedTimestamp = false; |
336 | if ( $mappingId === self::IDENTIFIER_CU_CHANGES ) { |
337 | // Get the timestamp from the revision lookup service |
338 | $revisionRecord = $this->revisionLookup->getRevisionById( $referenceId ); |
339 | if ( $revisionRecord ) { |
340 | $associatedTimestamp = $revisionRecord->getTimestamp(); |
341 | } |
342 | } elseif ( $mappingId === self::IDENTIFIER_CU_LOG_EVENT ) { |
343 | // Get the timestamp from using DatabaseLogEntry::newFromId |
344 | $logObject = DatabaseLogEntry::newFromId( $referenceId, $this->dbr ); |
345 | if ( $logObject ) { |
346 | $associatedTimestamp = $logObject->getTimestamp(); |
347 | } |
348 | } |
349 | // The map rows are considered orphaned if of the following any apply: |
350 | // * There is no timestamp for the revision or log event (should be generally impossible for this |
351 | // to be the case). |
352 | // * No such reference ID exists (i.e. no such revision ID or log ID) |
353 | // * The timestamp associated with the revision or log event is before the |
354 | // wgCUDMaxAge + 100 seconds ago to the current time. |
355 | // |
356 | // The 100 seconds are added to wgCUDMaxAge to prevent attempting to delete map rows |
357 | // that would have been normally deleted. This code is intended to catch map rows that |
358 | // were not deleted normally. |
359 | return !$associatedTimestamp || |
360 | $associatedTimestamp < ConvertibleTimestamp::convert( |
361 | TS_MW, |
362 | ConvertibleTimestamp::time() - ( $this->options->get( 'CUDMaxAge' ) + 100 ) |
363 | ); |
364 | } |
365 | |
366 | /** |
367 | * Helper method to avoid duplicate INSERT for existing client hint values. |
368 | * |
369 | * E.g. if "architecture: arm" already exists as a name/value pair, exclude this from the set of rows to insert. |
370 | * |
371 | * @param array[] $rows An array of arrays, where each array contains a key/value pair: |
372 | * uach_name => "some name", |
373 | * uach_value => "some value" |
374 | * @param bool $usePrimary If true, use the primary DB for SELECT queries. |
375 | * @return array[] An array of arrays to insert to the cu_useragent_clienthints table, see the @param $rows |
376 | * documentation for the format. |
377 | */ |
378 | private function excludeExistingClientHintData( array $rows, bool $usePrimary = false ): array { |
379 | $rowsToInsert = []; |
380 | $db = $usePrimary ? $this->dbw : $this->dbr; |
381 | foreach ( $rows as $row ) { |
382 | $result = $db->newSelectQueryBuilder() |
383 | ->table( 'cu_useragent_clienthints' ) |
384 | ->where( $row ) |
385 | ->caller( __METHOD__ ) |
386 | ->fetchRowCount(); |
387 | if ( $result === 0 ) { |
388 | $rowsToInsert[] = $row; |
389 | } |
390 | } |
391 | return $rowsToInsert; |
392 | } |
393 | |
394 | } |