Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
100.00% |
150 / 150 |
|
100.00% |
15 / 15 |
CRAP | |
100.00% |
1 / 1 |
NukeQueryBuilder | |
100.00% |
150 / 150 |
|
100.00% |
15 / 15 |
38 | |
100.00% |
1 / 1 |
__construct | |
100.00% |
12 / 12 |
|
100.00% |
1 / 1 |
4 | |||
fromRevisionTableQuery | |
100.00% |
14 / 14 |
|
100.00% |
1 / 1 |
1 | |||
fromRecentChangesTableQuery | |
100.00% |
16 / 16 |
|
100.00% |
1 / 1 |
1 | |||
limit | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
filterActor | |
100.00% |
6 / 6 |
|
100.00% |
1 / 1 |
3 | |||
filterNamespaces | |
100.00% |
8 / 8 |
|
100.00% |
1 / 1 |
2 | |||
filterPattern | |
100.00% |
10 / 10 |
|
100.00% |
1 / 1 |
3 | |||
filterByMinPageSize | |
100.00% |
7 / 7 |
|
100.00% |
1 / 1 |
2 | |||
filterByMaxPageSize | |
100.00% |
7 / 7 |
|
100.00% |
1 / 1 |
2 | |||
getOverriddenNamespaces | |
100.00% |
8 / 8 |
|
100.00% |
1 / 1 |
5 | |||
filterSimplePattern | |
100.00% |
11 / 11 |
|
100.00% |
1 / 1 |
2 | |||
filterPatternWithOverriddenNamespaces | |
100.00% |
30 / 30 |
|
100.00% |
1 / 1 |
7 | |||
filterFromTimestamp | |
100.00% |
9 / 9 |
|
100.00% |
1 / 1 |
2 | |||
filterToTimestamp | |
100.00% |
9 / 9 |
|
100.00% |
1 / 1 |
2 | |||
build | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 |
1 | <?php |
2 | |
3 | namespace MediaWiki\Extension\Nuke; |
4 | |
5 | use LogicException; |
6 | use MediaWiki\Config\Config; |
7 | use MediaWiki\Language\Language; |
8 | use MediaWiki\MainConfigNames; |
9 | use MediaWiki\Title\NamespaceInfo; |
10 | use Wikimedia\Rdbms\IExpression; |
11 | use Wikimedia\Rdbms\IReadableDatabase; |
12 | use Wikimedia\Rdbms\LikeMatch; |
13 | use Wikimedia\Rdbms\LikeValue; |
14 | use Wikimedia\Rdbms\SelectQueryBuilder; |
15 | |
16 | class NukeQueryBuilder { |
17 | |
18 | /** |
19 | * Constant to run queries on the revision table. |
20 | */ |
21 | public const TABLE_REVISION = 'revision'; |
22 | /** |
23 | * Constant to run queries on the recentchanges table. |
24 | */ |
25 | public const TABLE_RECENTCHANGES = 'recentchanges'; |
26 | |
27 | /** |
28 | * Default fields to include in the result set. Must be fields that can be found |
29 | * in both revision and recentchanges queries. |
30 | */ |
31 | private const DEFAULT_FIELDS = [ 'page_id', 'page_title', 'page_namespace', 'actor_name' ]; |
32 | |
33 | private IReadableDatabase $readableDatabase; |
34 | private Config $config; |
35 | private NamespaceInfo $namespaceInfo; |
36 | private Language $contentLanguage; |
37 | |
38 | /** |
39 | * The query builder for this query. Set once in the constructor, and should never be |
40 | * reassigned afterward. |
41 | * |
42 | * @var SelectQueryBuilder|null |
43 | */ |
44 | private ?SelectQueryBuilder $selectQueryBuilder = null; |
45 | /** |
46 | * The table being used. |
47 | * |
48 | * @var string |
49 | */ |
50 | private string $table; |
51 | |
52 | /** |
53 | * @param IReadableDatabase $readableDatabase |
54 | * @param Config $config |
55 | * @param NamespaceInfo $namespaceInfo |
56 | * @param Language $contentLanguage |
57 | * @param string $table The table to use. Must be one of |
58 | * {@link TABLE_REVISION} or {@link TABLE_RECENTCHANGES}. |
59 | */ |
60 | public function __construct( |
61 | IReadableDatabase $readableDatabase, |
62 | Config $config, |
63 | NamespaceInfo $namespaceInfo, |
64 | Language $contentLanguage, |
65 | string $table |
66 | ) { |
67 | $this->readableDatabase = $readableDatabase; |
68 | $this->config = $config; |
69 | $this->namespaceInfo = $namespaceInfo; |
70 | $this->contentLanguage = $contentLanguage; |
71 | |
72 | switch ( $table ) { |
73 | case self::TABLE_REVISION: |
74 | $this->fromRevisionTableQuery(); |
75 | break; |
76 | case self::TABLE_RECENTCHANGES: |
77 | $this->fromRecentChangesTableQuery(); |
78 | break; |
79 | default: |
80 | throw new LogicException( "Invalid Nuke table target: $table" ); |
81 | } |
82 | $this->table = $table; |
83 | } |
84 | |
85 | private function fromRevisionTableQuery() { |
86 | $dbr = $this->readableDatabase; |
87 | $this->selectQueryBuilder = $dbr->newSelectQueryBuilder() |
88 | ->select( self::DEFAULT_FIELDS ) |
89 | ->distinct() |
90 | ->from( self::TABLE_REVISION ) |
91 | ->join( 'actor', null, 'actor_id=rev_actor' ) |
92 | ->join( 'page', null, 'page_id=rev_page' ) |
93 | ->where( [ |
94 | $dbr->expr( 'rev_parent_id', '=', 0 ) |
95 | ] ) |
96 | ->orderBy( 'rev_timestamp', SelectQueryBuilder::SORT_DESC ) |
97 | ->setMaxExecutionTime( |
98 | $this->config->get( MainConfigNames::MaxExecutionTimeForExpensiveQueries ) |
99 | ); |
100 | } |
101 | |
102 | private function fromRecentChangesTableQuery() { |
103 | $dbr = $this->readableDatabase; |
104 | $this->selectQueryBuilder = $dbr->newSelectQueryBuilder() |
105 | ->select( self::DEFAULT_FIELDS ) |
106 | ->from( self::TABLE_RECENTCHANGES ) |
107 | ->join( 'actor', null, 'actor_id=rc_actor' ) |
108 | ->join( 'page', null, 'page_id=rc_cur_id' ) |
109 | ->where( [ |
110 | $dbr->expr( 'rc_source', '=', 'mw.new' )->orExpr( |
111 | $dbr->expr( 'rc_log_type', '=', 'upload' ) |
112 | ->and( 'rc_log_action', '=', 'upload' ) |
113 | ) |
114 | ] ) |
115 | ->orderBy( 'rc_timestamp', SelectQueryBuilder::SORT_DESC ) |
116 | ->setMaxExecutionTime( |
117 | $this->config->get( MainConfigNames::MaxExecutionTimeForExpensiveQueries ) |
118 | ); |
119 | } |
120 | |
121 | /** |
122 | * Limit the number of rows returned. |
123 | * |
124 | * @param int $limit The limit to follow |
125 | * @return $this |
126 | */ |
127 | public function limit( int $limit ): self { |
128 | $this->selectQueryBuilder->limit( $limit ); |
129 | return $this; |
130 | } |
131 | |
132 | /** |
133 | * Filter based on one or multiple actor names. |
134 | * |
135 | * If `$actors` is an empty array, this is a no-op. |
136 | * |
137 | * @param string|string[] $actors The actor names to filter to |
138 | * @return self |
139 | */ |
140 | public function filterActor( $actors ): self { |
141 | if ( !is_array( $actors ) ) { |
142 | $actors = [ $actors ]; |
143 | } elseif ( !count( $actors ) ) { |
144 | return $this; |
145 | } |
146 | |
147 | $this->selectQueryBuilder->andWhere( [ 'actor_name' => $actors ] ); |
148 | return $this; |
149 | } |
150 | |
151 | /** |
152 | * Filter based on namespaces. |
153 | * |
154 | * If `$namespaces` is null, this is a no-op. |
155 | * |
156 | * @param int[]|null $namespaces The namespace IDs to filter to |
157 | * @return $this |
158 | */ |
159 | public function filterNamespaces( ?array $namespaces ): self { |
160 | if ( $namespaces == null ) { |
161 | return $this; |
162 | } |
163 | |
164 | $dbr = $this->readableDatabase; |
165 | $namespaceConditions = array_map( static function ( $ns ) use ( $dbr ) { |
166 | return $dbr->expr( 'page_namespace', '=', $ns ); |
167 | }, $namespaces ); |
168 | $this->selectQueryBuilder->andWhere( $dbr->orExpr( $namespaceConditions ) ); |
169 | return $this; |
170 | } |
171 | |
172 | /** |
173 | * Filter based on a page title pattern. |
174 | * |
175 | * If `$pattern` is an empty string, this is a no-op. |
176 | * |
177 | * @param string $pattern The pattern to use |
178 | * @param int[]|null $namespaces The namespace IDs to filter to |
179 | * @return $this |
180 | */ |
181 | public function filterPattern( string $pattern, ?array $namespaces = null ): self { |
182 | $pattern = trim( $pattern ); |
183 | |
184 | if ( $pattern == '' ) { |
185 | return $this; |
186 | } |
187 | |
188 | $pattern = preg_replace( '/ +/', '`_', $pattern ); |
189 | $pattern = preg_replace( '/\\\\([%_])/', '`$1', $pattern ); |
190 | |
191 | $overriddenNamespaces = $this->getOverriddenNamespaces( $namespaces ); |
192 | if ( count( $overriddenNamespaces ) ) { |
193 | $this->filterPatternWithOverriddenNamespaces( $pattern, $namespaces, $overriddenNamespaces ); |
194 | } else { |
195 | $this->filterSimplePattern( $pattern ); |
196 | } |
197 | |
198 | return $this; |
199 | } |
200 | |
201 | /** |
202 | * Filter based on a minimum page size. |
203 | * |
204 | * If `$minPageSize` is 0 or negative, this is a no-op. |
205 | * |
206 | * @param int $minPageSize The minimum size (in bytes) that a page must be to be included |
207 | * @return $this |
208 | */ |
209 | public function filterByMinPageSize( int $minPageSize ): self { |
210 | if ( $minPageSize <= 0 ) { |
211 | // No filtering if minPageSize is 0 or negative, because |
212 | // this would do nothing if we added it to the query |
213 | // anyway. |
214 | return $this; |
215 | } |
216 | |
217 | $dbr = $this->readableDatabase; |
218 | // Add a condition to filter by page length if minPageSize is greater than 0 |
219 | $this->selectQueryBuilder->andWhere( |
220 | $dbr->expr( 'page_len', '>=', $minPageSize ) |
221 | ); |
222 | |
223 | return $this; |
224 | } |
225 | |
226 | /** |
227 | * Filter based on a maximum page size. |
228 | * |
229 | * If `$maxPageSize` is negative, this is a no-op. |
230 | * It is possible for a page to exist with 0 bytes, so having a |
231 | * max of 0 is allowed. |
232 | * |
233 | * @param int $maxPageSize The maximum size (in bytes) that a page must be to be included |
234 | * @return $this |
235 | */ |
236 | public function filterByMaxPageSize( int $maxPageSize ): self { |
237 | if ( $maxPageSize < 0 ) { |
238 | // No filtering if maxPageSize is negative as this doesn't make sense |
239 | // The user is told this will be ignored in the UI |
240 | return $this; |
241 | } |
242 | |
243 | $dbr = $this->readableDatabase; |
244 | // Add a condition to filter by page length if maxPageSize is greater than 0 |
245 | $this->selectQueryBuilder->andWhere( |
246 | $dbr->expr( 'page_len', '<=', $maxPageSize ) |
247 | ); |
248 | |
249 | return $this; |
250 | } |
251 | |
252 | /** |
253 | * Get an array of all namespaces in `$namespaces` (or all namespaces, if `$namespaces` is |
254 | * null) where their `$wgCapitalLinksOverride` configuration does not match the wiki's |
255 | * `$wgCapitalLinks` option. Used to determine whether the pattern should be capitalized |
256 | * for some namespaces. |
257 | * |
258 | * When `$namespaces` is set, the return value will always be a subset of it. |
259 | * |
260 | * @param int[]|null $namespaces The namespaces to check for |
261 | * @return int[] |
262 | */ |
263 | protected function getOverriddenNamespaces( ?array $namespaces ): array { |
264 | $overriddenNamespaces = []; |
265 | $capitalLinks = $this->config->get( MainConfigNames::CapitalLinks ); |
266 | $capitalLinkOverrides = $this->config->get( MainConfigNames::CapitalLinkOverrides ); |
267 | // If there are any capital-overridden namespaces, keep track of them. "overridden" |
268 | // here means the namespace-specific value is not equal to $wgCapitalLinks. |
269 | foreach ( $capitalLinkOverrides as $nsId => $nsOverridden ) { |
270 | if ( $nsOverridden !== $capitalLinks && ( |
271 | $namespaces == null || in_array( $nsId, $namespaces ) |
272 | ) ) { |
273 | $overriddenNamespaces[] = $nsId; |
274 | } |
275 | } |
276 | |
277 | return $overriddenNamespaces; |
278 | } |
279 | |
280 | /** |
281 | * Add a WHERE condition on the query, filtering pages by a given pattern. |
282 | * This function performs capitalization as needed, to ensure that the pattern has a capital |
283 | * first character when `$wgCapitalLinks` is not disabled (since all titles are stored in the |
284 | * database with the first letter capitalized). |
285 | * |
286 | * @param string $pattern The pattern to use |
287 | * @return void |
288 | */ |
289 | protected function filterSimplePattern( string $pattern ) { |
290 | $pattern = $this->namespaceInfo->isCapitalized( NS_MAIN ) ? |
291 | $this->contentLanguage->ucfirst( $pattern ) : $pattern; |
292 | |
293 | $this->selectQueryBuilder->andWhere( |
294 | $this->readableDatabase->expr( |
295 | 'page_title', |
296 | IExpression::LIKE, |
297 | new LikeValue( |
298 | new LikeMatch( $pattern ) |
299 | ) |
300 | ) |
301 | ); |
302 | } |
303 | |
304 | /** |
305 | * Add a WHERE condition on the query, filtering pages by a given pattern. |
306 | * Depending on the namespaces selected (or all of them, if applicable), some |
307 | * namespaces may be case-sensitive (configured via `$wgCapitalLinkOverrides`). |
308 | * This function performs conversion on the pattern as needed to ensure that |
309 | * matching is always case-sensitive for namespaces which are also case-sensitive, |
310 | * or capitalizes the first character of the pattern if it isn't (since all titles |
311 | * are stored in the database with the first letter capitalized). |
312 | * |
313 | * @param string $pattern The pattern to use |
314 | * @param int[]|null $namespaces The namespaces selected by the user |
315 | * @param int[] $overriddenNamespaces The list of namespaces which have entries in the |
316 | * `$wgCapitalLinkOverrides` configuration variable that varies from the default. |
317 | * @return void |
318 | */ |
319 | protected function filterPatternWithOverriddenNamespaces( |
320 | string $pattern, |
321 | ?array $namespaces, |
322 | array $overriddenNamespaces |
323 | ) { |
324 | $dbr = $this->readableDatabase; |
325 | |
326 | // If there are overridden namespaces, they have to be converted |
327 | // on a case-by-case basis. |
328 | |
329 | // Our scope should only be limited to the namespaces selected by the user, |
330 | // or all namespaces (when $namespaces == null). |
331 | $validNamespaces = $namespaces == null ? |
332 | $this->namespaceInfo->getValidNamespaces() : |
333 | $namespaces; |
334 | $nonOverriddenNamespaces = []; |
335 | foreach ( $validNamespaces as $ns ) { |
336 | if ( !in_array( $ns, $overriddenNamespaces ) ) { |
337 | // Put all namespaces that aren't overridden in $nonOverriddenNamespaces. |
338 | $nonOverriddenNamespaces[] = $ns; |
339 | } |
340 | } |
341 | |
342 | $patternSpecific = $this->namespaceInfo->isCapitalized( $overriddenNamespaces[0] ) ? |
343 | $this->contentLanguage->ucfirst( $pattern ) : $pattern; |
344 | $orConditions = [ |
345 | $dbr->expr( |
346 | 'page_title', IExpression::LIKE, new LikeValue( |
347 | new LikeMatch( $patternSpecific ) |
348 | ) |
349 | )->and( |
350 | // IN condition |
351 | 'page_namespace', '=', $overriddenNamespaces |
352 | ) |
353 | ]; |
354 | if ( count( $nonOverriddenNamespaces ) ) { |
355 | $patternStandard = $this->namespaceInfo->isCapitalized( $nonOverriddenNamespaces[0] ) ? |
356 | $this->contentLanguage->ucfirst( $pattern ) : $pattern; |
357 | $orConditions[] = $dbr->expr( |
358 | 'page_title', IExpression::LIKE, new LikeValue( |
359 | new LikeMatch( $patternStandard ) |
360 | ) |
361 | )->and( |
362 | // IN condition, with the non-overridden namespaces. |
363 | // If the default is case-sensitive namespaces, $pattern's first |
364 | // character is turned lowercase. Otherwise, it is turned uppercase. |
365 | 'page_namespace', '=', $nonOverriddenNamespaces |
366 | ); |
367 | } |
368 | $this->selectQueryBuilder->andWhere( $dbr->orExpr( $orConditions ) ); |
369 | } |
370 | |
371 | /** |
372 | * Filter based on timestamp, only allowing creations which are after `$timestamp` (inclusive). |
373 | * |
374 | * @param int $timestamp The minimum timestamp that a page creation must be to be included |
375 | * @return $this |
376 | */ |
377 | public function filterFromTimestamp( int $timestamp ): self { |
378 | $dbr = $this->readableDatabase; |
379 | if ( $this->table === 'revision' ) { |
380 | $this->selectQueryBuilder->andWhere( |
381 | $dbr->expr( 'rev_timestamp', '>=', $dbr->timestamp( $timestamp ) ) |
382 | ); |
383 | } else { |
384 | $this->selectQueryBuilder->andWhere( |
385 | $dbr->expr( 'rc_timestamp', '>=', $dbr->timestamp( $timestamp ) ) |
386 | ); |
387 | } |
388 | return $this; |
389 | } |
390 | |
391 | /** |
392 | * Filter based on timestamp, only allowing creations which are before `$timestamp` (exclusive). |
393 | * |
394 | * @param int $timestamp The maximum timestamp that a page creation must be to be included |
395 | * @return $this |
396 | */ |
397 | public function filterToTimestamp( int $timestamp ): self { |
398 | $dbr = $this->readableDatabase; |
399 | if ( $this->table === 'revision' ) { |
400 | $this->selectQueryBuilder->andWhere( |
401 | $dbr->expr( 'rev_timestamp', '<', $dbr->timestamp( $timestamp ) ) |
402 | ); |
403 | } else { |
404 | $this->selectQueryBuilder->andWhere( |
405 | $dbr->expr( 'rc_timestamp', '<', $dbr->timestamp( $timestamp ) ) |
406 | ); |
407 | } |
408 | return $this; |
409 | } |
410 | |
411 | /** |
412 | * Get a copy of the {@link SelectQueryBuilder} for this instance. |
413 | * |
414 | * @return SelectQueryBuilder |
415 | */ |
416 | public function build(): SelectQueryBuilder { |
417 | return clone $this->selectQueryBuilder; |
418 | } |
419 | |
420 | } |