Code Coverage
 
Lines
Functions and Methods
Classes and Traits
Total
100.00% covered (success)
100.00%
153 / 153
100.00% covered (success)
100.00%
8 / 8
CRAP
100.00% covered (success)
100.00%
1 / 1
UserAgentClientHintsManager
100.00% covered (success)
100.00%
153 / 153
100.00% covered (success)
100.00%
8 / 8
36
100.00% covered (success)
100.00%
1 / 1
 __construct
100.00% covered (success)
100.00%
6 / 6
100.00% covered (success)
100.00%
1 / 1
1
 insertClientHintValues
100.00% covered (success)
100.00%
26 / 26
100.00% covered (success)
100.00%
1 / 1
4
 getMapIdByType
100.00% covered (success)
100.00%
7 / 7
100.00% covered (success)
100.00%
1 / 1
5
 insertMappingRows
100.00% covered (success)
100.00%
29 / 29
100.00% covered (success)
100.00%
1 / 1
5
 deleteMappingRows
100.00% covered (success)
100.00%
20 / 20
100.00% covered (success)
100.00%
1 / 1
4
 deleteOrphanedMapRows
100.00% covered (success)
100.00%
31 / 31
100.00% covered (success)
100.00%
1 / 1
5
 isMapRowOrphaned
100.00% covered (success)
100.00%
23 / 23
100.00% covered (success)
100.00%
1 / 1
8
 excludeExistingClientHintData
100.00% covered (success)
100.00%
11 / 11
100.00% covered (success)
100.00%
1 / 1
4
1<?php
2
3namespace MediaWiki\CheckUser\Services;
4
5use DatabaseLogEntry;
6use LogicException;
7use MediaWiki\CheckUser\ClientHints\ClientHintsData;
8use MediaWiki\CheckUser\ClientHints\ClientHintsReferenceIds;
9use MediaWiki\Config\ServiceOptions;
10use MediaWiki\Revision\RevisionLookup;
11use Psr\Log\LoggerInterface;
12use StatusValue;
13use Wikimedia\Rdbms\IConnectionProvider;
14use Wikimedia\Rdbms\IDatabase;
15use Wikimedia\Rdbms\IReadableDatabase;
16use Wikimedia\Timestamp\ConvertibleTimestamp;
17
18/**
19 * Service to insert and delete user-agent client hint values and their associations with rows in cu_changes,
20 * cu_log_event and cu_private_event.
21 */
22class UserAgentClientHintsManager {
23
24    public const CONSTRUCTOR_OPTIONS = [
25        'CUDMaxAge',
26    ];
27
28    public const SUPPORTED_TYPES = [
29        'revision',
30    ];
31
32    /**
33     * TINYINT references for use in cu_useragent_clienthints_map.uachm_reference_type
34     */
35    // Identifier for the cu_changes table
36    public const IDENTIFIER_CU_CHANGES = 0;
37    // Identifier for the cu_log_event table
38    public const IDENTIFIER_CU_LOG_EVENT = 1;
39    // Identifier for the cu_private_event table
40    public const IDENTIFIER_CU_PRIVATE_EVENT = 2;
41
42    public const IDENTIFIER_TO_TABLE_NAME_MAP = [
43        self::IDENTIFIER_CU_CHANGES => 'cu_changes',
44        self::IDENTIFIER_CU_LOG_EVENT => 'cu_log_event',
45        self::IDENTIFIER_CU_PRIVATE_EVENT => 'cu_private_event',
46    ];
47    public const IDENTIFIER_TO_COLUMN_NAME_MAP = [
48        self::IDENTIFIER_CU_CHANGES => 'cuc_this_oldid',
49        self::IDENTIFIER_CU_LOG_EVENT => 'cule_log_id',
50        self::IDENTIFIER_CU_PRIVATE_EVENT => 'cupe_id',
51    ];
52    private IDatabase $dbw;
53    private IReadableDatabase $dbr;
54    private RevisionLookup $revisionLookup;
55    private ServiceOptions $options;
56    private LoggerInterface $logger;
57
58    /**
59     * @param IConnectionProvider $connectionProvider
60     * @param RevisionLookup $revisionLookup
61     * @param ServiceOptions $options
62     * @param LoggerInterface $logger
63     */
64    public function __construct(
65        IConnectionProvider $connectionProvider,
66        RevisionLookup $revisionLookup,
67        ServiceOptions $options,
68        LoggerInterface $logger
69    ) {
70        $options->assertRequiredOptions( self::CONSTRUCTOR_OPTIONS );
71        $this->options = $options;
72        $this->dbw = $connectionProvider->getPrimaryDatabase();
73        $this->dbr = $connectionProvider->getReplicaDatabase();
74        $this->revisionLookup = $revisionLookup;
75        $this->logger = $logger;
76    }
77
78    /**
79     * Given an array of client hint data, a reference ID, and an identifier type, record the data to the
80     * cu_useragent_clienthints and cu_useragent_clienthints_map tables.
81     *
82     * @param ClientHintsData $clientHintsData
83     * @param int $referenceId An ID to use in `uachm_reference_id` column in the
84     *   cu_useragent_clienthints_map table
85     * @param string $type The type of event this data is associated with. Valid values are:
86     *  - revision
87     * @param bool $usePrimary If true, use the primary DB for SELECT queries.
88     * @return StatusValue
89     */
90    public function insertClientHintValues(
91        ClientHintsData $clientHintsData, int $referenceId, string $type, bool $usePrimary = false
92    ): StatusValue {
93        // Check if there are rows to insert to the map table.
94        $rows = $clientHintsData->toDatabaseRows();
95        if ( !count( $rows ) ) {
96            // Nothing to insert, so return early.
97            // Having nothing to insert isn't considered "bad", so return a new good
98            // For example, a browser could choose to provide no Client Hints data but
99            // still send an empty API request.
100            return StatusValue::newGood();
101        }
102
103        // Check for existing entry.
104        $existingRecord = $this->dbr->newSelectQueryBuilder()
105            ->table( 'cu_useragent_clienthints_map' )
106            ->where( [
107                'uachm_reference_type' => $this->getMapIdByType( $type ),
108                'uachm_reference_id' => $referenceId
109            ] )
110            ->caller( __METHOD__ )
111            ->fetchRowCount();
112        if ( $existingRecord ) {
113            return StatusValue::newFatal(
114                'checkuser-api-useragent-clienthints-mappings-exist',
115                [ $type, $referenceId ]
116            );
117        }
118
119        $rows = $this->excludeExistingClientHintData( $rows, $usePrimary );
120
121        if ( count( $rows ) ) {
122            $this->dbw->newInsertQueryBuilder()
123                ->insertInto( 'cu_useragent_clienthints' )
124                ->ignore()
125                ->rows( $rows )
126                ->caller( __METHOD__ )
127                ->execute();
128            // We just inserted rows to cu_useragent_clienthints, so
129            // use the primary DB for subsequent SELECT queries.
130            $usePrimary = true;
131        }
132        return $this->insertMappingRows( $clientHintsData, $referenceId, $type, $usePrimary );
133    }
134
135    /**
136     * Given an identifier for the type of event (e.g. 'revision'), return the relevant TINYINT
137     * for the table that the database entry for cu_useragent_clienthints_map refers to
138     *
139     * @param string $type
140     * @return int One of self::IDENTIFIER_* constants
141     */
142    private function getMapIdByType( string $type ): int {
143        switch ( $type ) {
144            case 'revision':
145                return self::IDENTIFIER_CU_CHANGES;
146            case 'log':
147                return self::IDENTIFIER_CU_LOG_EVENT;
148            case 'privatelog':
149                return self::IDENTIFIER_CU_PRIVATE_EVENT;
150            default:
151                throw new LogicException( "Invalid type $type" );
152        }
153    }
154
155    /**
156     * Insert rows into the cu_useragent_clienthints_map table.
157     *
158     * This links a foreign ID (e.g. "revision 1234") with client hint data values stored in cu_useragent_clienthints.
159     *
160     * @param ClientHintsData $clientHintsData
161     * @param int $foreignId
162     * @param string $type
163     * @param bool $usePrimary If true, use the primary DB for SELECT queries.
164     * @return StatusValue
165     * @see insertClientHintValues, which invokes this method.
166     *
167     */
168    private function insertMappingRows(
169        ClientHintsData $clientHintsData, int $foreignId, string $type, bool $usePrimary = false
170    ): StatusValue {
171        $rows = $clientHintsData->toDatabaseRows();
172        // We might need primary DB if the call is happening in the context of a server-side hook,
173        $db = $usePrimary ? $this->dbw : $this->dbr;
174
175        // TINYINT reference to cu_changes, cu_log_event or cu_private_event.
176        $idType = $this->getMapIdByType( $type );
177        $mapRows = [];
178        foreach ( $rows as $row ) {
179            $result = $db->newSelectQueryBuilder()
180                ->table( 'cu_useragent_clienthints' )
181                ->field( 'uach_id' )
182                ->where( $row )
183                ->caller( __METHOD__ )
184                ->fetchField();
185            if ( $result !== false ) {
186                $mapRows[] = [
187                    'uachm_uach_id' => (int)$result,
188                    'uachm_reference_type' => $idType,
189                    'uachm_reference_id' => $foreignId,
190                ];
191            } else {
192                $this->logger->warning(
193                    "Lookup failed for cu_useragent_clienthints row with name {name} and value {value}.",
194                    [ $row['uach_name'], $row['uach_value'] ]
195                );
196            }
197        }
198
199        if ( count( $mapRows ) ) {
200            $this->dbw->newInsertQueryBuilder()
201                ->insertInto( 'cu_useragent_clienthints_map' )
202                ->ignore()
203                ->rows( $mapRows )
204                ->caller( __METHOD__ )
205                ->execute();
206        }
207        return StatusValue::newGood();
208    }
209
210    /**
211     * Given reference IDs this method finds and deletes
212     * the mapping entries for these reference IDs.
213     *
214     * @param ClientHintsReferenceIds $clientHintsReferenceIds
215     * @return int The number of mapping rows deleted.
216     */
217    public function deleteMappingRows( ClientHintsReferenceIds $clientHintsReferenceIds ): int {
218        // Keep a track of the number of mapping rows that are deleted.
219        $mappingRowsDeleted = 0;
220        foreach ( $clientHintsReferenceIds->getReferenceIds() as $mapId => $referenceIds ) {
221            if ( !count( $referenceIds ) ) {
222                continue;
223            }
224            // Delete the rows in cu_useragent_clienthints_map associated with these reference IDs
225            $this->dbw->newDeleteQueryBuilder()
226                ->table( 'cu_useragent_clienthints_map' )
227                ->where( [
228                    'uachm_reference_id' => $referenceIds,
229                    'uachm_reference_type' => $mapId
230                ] )
231                ->caller( __METHOD__ )
232                ->execute();
233            $mappingRowsDeleted += $this->dbw->affectedRows();
234        }
235        if ( !$mappingRowsDeleted ) {
236            $this->logger->info( "No mapping rows deleted." );
237        } else {
238            $this->logger->debug(
239                "Deleted {mapping_rows_deleted} mapping rows.",
240                [ 'mapping_rows_deleted' => $mappingRowsDeleted ]
241            );
242        }
243        return $mappingRowsDeleted;
244    }
245
246    /**
247     * Checks 100 rows with the smallest uachm_reference_id
248     * for each uachm_reference_type value to see whether their
249     * associated entry referenced by the uachm_reference_id
250     * value has been already purged.
251     *
252     * If it reaches an entry that is not orphaned, the checks are
253     * stopped as items with a larger reference ID are unlikely to
254     * be orphaned.
255     *
256     * This catches rows that have been left without deletion
257     * due to unforeseen circumstances, as described in T350681.
258     *
259     * @return int The number of orphaned map rows deleted.
260     */
261    public function deleteOrphanedMapRows(): int {
262        // Keep a track of the number of mapping rows that are deleted.
263        $mappingRowsDeleted = 0;
264        foreach ( self::IDENTIFIER_TO_TABLE_NAME_MAP as $mappingId => $table ) {
265            // Get 100 rows with the given mapping ID
266            $resultSet = $this->dbr->newSelectQueryBuilder()
267                ->select( 'uachm_reference_id' )
268                ->table( 'cu_useragent_clienthints_map' )
269                ->where( [ 'uachm_reference_type' => $mappingId ] )
270                ->orderBy( 'uachm_reference_id' )
271                ->groupBy( 'uachm_reference_id' )
272                ->limit( 100 )
273                ->caller( __METHOD__ )
274                ->fetchResultSet();
275            foreach ( $resultSet as $row ) {
276                // For each row, check if the ::isMapRowOrphaned method
277                // indicates that the row is orphaned.
278                $referenceId = $row->uachm_reference_id;
279                $mapRowIsOrphaned = $this->isMapRowOrphaned( $referenceId, $mappingId );
280                if ( $mapRowIsOrphaned ) {
281                    // If the map row is orphaned, then perform the deletion
282                    // and add the affected rows count to the return count.
283                    $this->dbw->newDeleteQueryBuilder()
284                        ->deleteFrom( 'cu_useragent_clienthints_map' )
285                        ->where( [
286                            'uachm_reference_id' => $referenceId,
287                            'uachm_reference_type' => $mappingId,
288                        ] )
289                        ->caller( __METHOD__ )
290                        ->execute();
291                    $mappingRowsDeleted += $this->dbw->affectedRows();
292                } else {
293                    // If the map row is probably not orphaned, then just stop processing
294                    // the rows in this table.
295                    break;
296                }
297            }
298        }
299        if ( $mappingRowsDeleted ) {
300            $this->logger->info(
301                "Deleted {mapping_rows_deleted} orphaned mapping rows.",
302                [ 'mapping_rows_deleted' => $mappingRowsDeleted ]
303            );
304        }
305        return $mappingRowsDeleted;
306    }
307
308    /**
309     * Returns whether rows with the given $referenceId and $mappingId
310     * in cu_useragent_clienthints_map are likely orphaned.
311     *
312     * @param int $referenceId
313     * @param int $mappingId
314     * @return bool
315     */
316    private function isMapRowOrphaned( int $referenceId, int $mappingId ): bool {
317        if ( !array_key_exists( $mappingId, self::IDENTIFIER_TO_TABLE_NAME_MAP ) ) {
318            throw new LogicException( "Unrecognised map ID '$mappingId'" );
319        }
320        if ( !in_array( $mappingId, [ self::IDENTIFIER_CU_LOG_EVENT, self::IDENTIFIER_CU_CHANGES ] ) ) {
321            // If the mapping ID is not for cu_changes or cu_log_event,
322            // query the table directly to check if the associated reference ID
323            // exists in the table.
324            return !$this->dbr->newSelectQueryBuilder()
325                ->field( '1' )
326                ->table( self::IDENTIFIER_TO_TABLE_NAME_MAP[$mappingId] )
327                ->where( [ self::IDENTIFIER_TO_COLUMN_NAME_MAP[$mappingId] => $referenceId ] )
328                ->caller( __METHOD__ )
329                ->fetchField();
330        }
331        // If the mapping ID is for cu_changes or cu_log_event,
332        // then query the revision table or logging table respectively
333        // for the associated timestamp to determine if the map
334        // row should have already been deleted.
335        $associatedTimestamp = false;
336        if ( $mappingId === self::IDENTIFIER_CU_CHANGES ) {
337            // Get the timestamp from the revision lookup service
338            $revisionRecord = $this->revisionLookup->getRevisionById( $referenceId );
339            if ( $revisionRecord ) {
340                $associatedTimestamp = $revisionRecord->getTimestamp();
341            }
342        } elseif ( $mappingId === self::IDENTIFIER_CU_LOG_EVENT ) {
343            // Get the timestamp from using DatabaseLogEntry::newFromId
344            $logObject = DatabaseLogEntry::newFromId( $referenceId, $this->dbr );
345            if ( $logObject ) {
346                $associatedTimestamp = $logObject->getTimestamp();
347            }
348        }
349        // The map rows are considered orphaned if of the following any apply:
350        // * There is no timestamp for the revision or log event (should be generally impossible for this
351        //   to be the case).
352        // * No such reference ID exists (i.e. no such revision ID or log ID)
353        // * The timestamp associated with the revision or log event is before the
354        //   wgCUDMaxAge + 100 seconds ago to the current time.
355        //
356        // The 100 seconds are added to wgCUDMaxAge to prevent attempting to delete map rows
357        // that would have been normally deleted. This code is intended to catch map rows that
358        // were not deleted normally.
359        return !$associatedTimestamp ||
360            $associatedTimestamp < ConvertibleTimestamp::convert(
361                TS_MW,
362                ConvertibleTimestamp::time() - ( $this->options->get( 'CUDMaxAge' ) + 100 )
363            );
364    }
365
366    /**
367     * Helper method to avoid duplicate INSERT for existing client hint values.
368     *
369     * E.g. if "architecture: arm" already exists as a name/value pair, exclude this from the set of rows to insert.
370     *
371     * @param array[] $rows An array of arrays, where each array contains a key/value pair:
372     *  uach_name => "some name",
373     *  uach_value => "some value"
374     * @param bool $usePrimary If true, use the primary DB for SELECT queries.
375     * @return array[] An array of arrays to insert to the cu_useragent_clienthints table, see the @param $rows
376     *  documentation for the format.
377     */
378    private function excludeExistingClientHintData( array $rows, bool $usePrimary = false ): array {
379        $rowsToInsert = [];
380        $db = $usePrimary ? $this->dbw : $this->dbr;
381        foreach ( $rows as $row ) {
382            $result = $db->newSelectQueryBuilder()
383                ->table( 'cu_useragent_clienthints' )
384                ->where( $row )
385                ->caller( __METHOD__ )
386                ->fetchRowCount();
387            if ( $result === 0 ) {
388                $rowsToInsert[] = $row;
389            }
390        }
391        return $rowsToInsert;
392    }
393
394}