Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
100.00% |
55 / 55 |
|
100.00% |
7 / 7 |
CRAP | |
100.00% |
1 / 1 |
MediaModerationDatabaseLookup | |
100.00% |
55 / 55 |
|
100.00% |
7 / 7 |
15 | |
100.00% |
1 / 1 |
__construct | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
fileExistsInScanTable | |
100.00% |
7 / 7 |
|
100.00% |
1 / 1 |
1 | |||
getMatchStatusForSha1 | |
100.00% |
10 / 10 |
|
100.00% |
1 / 1 |
2 | |||
getDb | |
100.00% |
3 / 3 |
|
100.00% |
1 / 1 |
2 | |||
getDateFromTimestamp | |
100.00% |
4 / 4 |
|
100.00% |
1 / 1 |
2 | |||
newSelectQueryBuilderForScan | |
100.00% |
25 / 25 |
|
100.00% |
1 / 1 |
6 | |||
getSha1ValuesForScan | |
100.00% |
5 / 5 |
|
100.00% |
1 / 1 |
1 |
1 | <?php |
2 | |
3 | namespace MediaWiki\Extension\MediaModeration\Services; |
4 | |
5 | use ArchivedFile; |
6 | use File; |
7 | use Wikimedia\Rdbms\IConnectionProvider; |
8 | use Wikimedia\Rdbms\IDBAccessObject; |
9 | use Wikimedia\Rdbms\IReadableDatabase; |
10 | use Wikimedia\Rdbms\SelectQueryBuilder; |
11 | use Wikimedia\Timestamp\ConvertibleTimestamp; |
12 | use Wikimedia\Timestamp\TimestampException; |
13 | |
14 | class MediaModerationDatabaseLookup { |
15 | |
16 | public const ANY_MATCH_STATUS = 'any'; |
17 | public const POSITIVE_MATCH_STATUS = '1'; |
18 | public const NEGATIVE_MATCH_STATUS = '0'; |
19 | public const NULL_MATCH_STATUS = null; |
20 | |
21 | private IConnectionProvider $connectionProvider; |
22 | |
23 | public function __construct( IConnectionProvider $connectionProvider ) { |
24 | $this->connectionProvider = $connectionProvider; |
25 | } |
26 | |
27 | /** |
28 | * Returns whether the given $file exists in the mediamoderation_scan table. |
29 | * |
30 | * @param File|ArchivedFile $file |
31 | * @param int $flags IDBAccessObject flags. Does not support READ_LOCKING or READ_EXCLUSIVE |
32 | * @return bool |
33 | */ |
34 | public function fileExistsInScanTable( $file, int $flags = IDBAccessObject::READ_NORMAL ): bool { |
35 | $db = $this->getDb( $flags ); |
36 | return (bool)$db->newSelectQueryBuilder() |
37 | ->select( 'COUNT(*)' ) |
38 | ->from( 'mediamoderation_scan' ) |
39 | ->where( [ 'mms_sha1' => $file->getSha1() ] ) |
40 | ->caller( __METHOD__ ) |
41 | ->fetchField(); |
42 | } |
43 | |
44 | /** |
45 | * Returns the match status for a given SHA-1. If the SHA-1 does not |
46 | * exist in the mediamoderation_scan table, this method will return null. |
47 | * |
48 | * @param string $sha1 |
49 | * @param int $flags IDBAccessObject flags. Does not support READ_LOCKING or READ_EXCLUSIVE |
50 | * @return bool|null The match status (null indicates the SHA-1 hasn't been scanned) |
51 | */ |
52 | public function getMatchStatusForSha1( string $sha1, int $flags = IDBAccessObject::READ_NORMAL ): ?bool { |
53 | $db = $this->getDb( $flags ); |
54 | $rawMatchStatus = $db->newSelectQueryBuilder() |
55 | ->select( 'mms_is_match' ) |
56 | ->from( 'mediamoderation_scan' ) |
57 | ->where( [ 'mms_sha1' => $sha1 ] ) |
58 | ->caller( __METHOD__ ) |
59 | ->fetchField(); |
60 | if ( is_string( $rawMatchStatus ) ) { |
61 | return boolval( $rawMatchStatus ); |
62 | } else { |
63 | return null; |
64 | } |
65 | } |
66 | |
67 | /** |
68 | * Gets the IReadableDatabase object for the virtual-mediamoderation DB domain |
69 | * for the given $flags. |
70 | * |
71 | * @param int $flags IDBAccessObject flags. |
72 | * @return IReadableDatabase |
73 | */ |
74 | public function getDb( int $flags ): IReadableDatabase { |
75 | if ( $flags & IDBAccessObject::READ_LATEST ) { |
76 | return $this->connectionProvider->getPrimaryDatabase( 'virtual-mediamoderation' ); |
77 | } else { |
78 | return $this->connectionProvider->getReplicaDatabase( 'virtual-mediamoderation' ); |
79 | } |
80 | } |
81 | |
82 | /** |
83 | * Converts a given timestamp to a string representing the date in the format YYYYMMDD. |
84 | * |
85 | * @param ConvertibleTimestamp|string|int $timestamp A ConvertibleTimestamp or timestamp recognised by |
86 | * ConvertibleTimestamp. |
87 | * @return string The timestamp as a date in the form YYYYMMDD |
88 | * @throws TimestampException If the $timestamp cannot be parsed |
89 | */ |
90 | public function getDateFromTimestamp( $timestamp ): string { |
91 | // Convert the $timestamp to a ConvertibleTimestamp instance |
92 | if ( !( $timestamp instanceof ConvertibleTimestamp ) ) { |
93 | $timestamp = new ConvertibleTimestamp( $timestamp ); |
94 | } |
95 | // Get the timestamp as in TS_MW form (YYYMMDDHHMMSS) |
96 | $timestampAsTSMW = $timestamp->getTimestamp( TS_MW ); |
97 | // Return the first 8 characters of the TS_MW timestamp, which |
98 | // means the YYYYMMDD part. |
99 | return substr( $timestampAsTSMW, 0, 8 ); |
100 | } |
101 | |
102 | /** |
103 | * Returns a SelectQueryBuilder that can be used to query SHA-1 values for a scan. |
104 | * |
105 | * The parameters to this method allow filtering for rows with a specific match status and/or rows that were |
106 | * last checked before or at a particular date. |
107 | * |
108 | * @param ConvertibleTimestamp|int|string|null $lastChecked See ::getSha1ValuesForScan |
109 | * @param string $direction See ::getSha1ValuesForScan |
110 | * @param array $excludedSha1Values See ::getSha1ValuesForScan |
111 | * @param string|null $matchStatus See ::getSha1ValuesForScan |
112 | * @return SelectQueryBuilder |
113 | * @throws TimestampException If the $lastChecked timestamp could not be parsed as a valid timestamp. |
114 | */ |
115 | protected function newSelectQueryBuilderForScan( |
116 | $lastChecked, string $direction, array $excludedSha1Values, ?string $matchStatus = self::ANY_MATCH_STATUS |
117 | ): SelectQueryBuilder { |
118 | // Get a replica DB connection. |
119 | $dbr = $this->getDb( IDBAccessObject::READ_NORMAL ); |
120 | // Create a SelectQueryBuilder that reads from the mediamoderation_scan table. |
121 | // The fields to read is set by the callers of this method. |
122 | $selectQueryBuilder = $dbr->newSelectQueryBuilder() |
123 | ->from( 'mediamoderation_scan' ); |
124 | if ( $lastChecked === null ) { |
125 | // If $lastChecked is null, then only get rows with the last checked value as null. |
126 | $selectQueryBuilder->where( [ 'mms_last_checked' => null ] ); |
127 | } else { |
128 | // If $lastChecked is not null, then treat it as a timestamp. |
129 | // Then using this timestamp as a date in the form YYYYMMDD, filter |
130 | // for rows with a smaller last checked date or which have never been |
131 | // checked (last checked as null). |
132 | $lastCheckedAsMWTimestamp = $this->getDateFromTimestamp( $lastChecked ); |
133 | $selectQueryBuilder->where( |
134 | $dbr->expr( |
135 | 'mms_last_checked', |
136 | '<=', |
137 | $lastCheckedAsMWTimestamp |
138 | )->or( 'mms_last_checked', '=', null ) |
139 | ); |
140 | } |
141 | if ( $dbr->getType() === 'postgres' ) { |
142 | // Postgres DBs treat NULLs by default as larger than non-NULL values. |
143 | // This is the opposite for Mariadb / SQLite. Postgres should have the same |
144 | // behaviour as Mariadb / SQLite. By using NULLS FIRST and NULLS LAST |
145 | // we can control where the NULL comes in the results list for postgres DBs |
146 | if ( $direction === SelectQueryBuilder::SORT_ASC ) { |
147 | $direction .= ' NULLS FIRST'; |
148 | } else { |
149 | $direction .= ' NULLS LAST'; |
150 | } |
151 | } |
152 | // Filter by match status if $matchStatus does not indicate to |
153 | // allow rows with any match status. |
154 | if ( $matchStatus !== self::ANY_MATCH_STATUS ) { |
155 | $selectQueryBuilder->where( [ 'mms_is_match' => $matchStatus ] ); |
156 | } |
157 | // Exclude the SHA-1 values specified by the caller, if any are provided. |
158 | if ( count( $excludedSha1Values ) ) { |
159 | // Pass through array_values to remove any existing keys, which results in numeric keys that are |
160 | // acceptable to IReadableDatabase::expr. |
161 | $selectQueryBuilder->where( $dbr->expr( |
162 | 'mms_sha1', '!=', array_values( $excludedSha1Values ) |
163 | ) ); |
164 | } |
165 | // Return the constructed SelectQueryBuilder after adding the order by field. |
166 | return $selectQueryBuilder |
167 | ->orderBy( 'mms_last_checked', $direction ); |
168 | } |
169 | |
170 | /** |
171 | * Gets $limit rows from the mediamoderation_scan table that have mms_last_checked less than the timestamp |
172 | * in $lastChecked. The returned rows are ordered by last checked timestamp in $direction. |
173 | * |
174 | * @param int $limit The maximum number of scan rows to return |
175 | * @param ConvertibleTimestamp|int|string|null $lastChecked Filters for scan rows that have been last checked |
176 | * on or before this date. If null, only include files which have never been checked. If not null, treats this as |
177 | * a timestamp that can be parsed by ConvertibleTimestamp. |
178 | * @param string $direction Either SelectQueryBuilder::SORT_ASC or ::SORT_DESC. Used to control |
179 | * whether to start at the rows with the newest or oldest last checked timestamp. No-op if $lastChecked |
180 | * is null. |
181 | * @param array $excludedSha1Values SHA-1 values to exclude from the returned array. |
182 | * @param string|null $matchStatus Filter for rows that have this match status. Any constants of this |
183 | * service in the format ::*_MATCH_STATUS can be passed in this parameter. The default is to not filter |
184 | * by match status (using ::ANY_MATCH_STATUS). |
185 | * @return array The SHA-1 values from the selected rows |
186 | * @throws TimestampException If the $lastChecked timestamp could not be parsed as a valid timestamp. |
187 | */ |
188 | public function getSha1ValuesForScan( |
189 | int $limit, $lastChecked, string $direction, |
190 | array $excludedSha1Values, ?string $matchStatus |
191 | ): array { |
192 | // Return up to $limit SHA-1 values that match the given criteria. |
193 | return $this->newSelectQueryBuilderForScan( $lastChecked, $direction, $excludedSha1Values, $matchStatus ) |
194 | ->select( 'mms_sha1' ) |
195 | ->limit( $limit ) |
196 | ->caller( __METHOD__ ) |
197 | ->fetchFieldValues(); |
198 | } |
199 | } |