Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
42.75% |
59 / 138 |
|
25.00% |
3 / 12 |
CRAP | |
0.00% |
0 / 1 |
DenyListManager | |
42.75% |
59 / 138 |
|
25.00% |
3 / 12 |
559.28 | |
0.00% |
0 / 1 |
__construct | |
100.00% |
4 / 4 |
|
100.00% |
1 / 1 |
2 | |||
singleton | |
0.00% |
0 / 8 |
|
0.00% |
0 / 1 |
6 | |||
isIpDenyListed | |
66.67% |
2 / 3 |
|
0.00% |
0 / 1 |
2.15 | |||
getCachedIpDenyList | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
purgeCachedIpDenyList | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
2 | |||
getIpDenyList | |
96.77% |
30 / 31 |
|
0.00% |
0 / 1 |
7 | |||
getIpDenyListSet | |
100.00% |
3 / 3 |
|
100.00% |
1 / 1 |
3 | |||
getDenyListKey | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
fetchFlatDenyListHexIps | |
44.44% |
4 / 9 |
|
0.00% |
0 / 1 |
4.54 | |||
fetchFlatDenyListHexIpsLocal | |
83.33% |
15 / 18 |
|
0.00% |
0 / 1 |
9.37 | |||
fetchFlatDenyListHexIpsRemote | |
0.00% |
0 / 50 |
|
0.00% |
0 / 1 |
342 | |||
fetchRemoteFile | |
0.00% |
0 / 8 |
|
0.00% |
0 / 1 |
12 |
1 | <?php |
2 | /** |
3 | * This program is free software; you can redistribute it and/or modify |
4 | * it under the terms of the GNU General Public License as published by |
5 | * the Free Software Foundation; either version 2 of the License, or |
6 | * (at your option) any later version. |
7 | * |
8 | * This program is distributed in the hope that it will be useful, |
9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
11 | * GNU General Public License for more details. |
12 | * |
13 | * You should have received a copy of the GNU General Public License along |
14 | * with this program; if not, write to the Free Software Foundation, Inc., |
15 | * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. |
16 | * https://www.gnu.org/copyleft/gpl.html |
17 | * |
18 | * @file |
19 | */ |
20 | |
21 | namespace MediaWiki\Extension\StopForumSpam; |
22 | |
23 | use DomainException; |
24 | use MediaWiki\Http\HttpRequestFactory; |
25 | use MediaWiki\Logger\LoggerFactory; |
26 | use MediaWiki\MediaWikiServices; |
27 | use Psr\Log\LoggerInterface; |
28 | use Psr\Log\NullLogger; |
29 | use RuntimeException; |
30 | use Wikimedia\IPSet; |
31 | use Wikimedia\IPUtils; |
32 | use Wikimedia\ObjectCache\BagOStuff; |
33 | use Wikimedia\ObjectCache\IStoreKeyEncoder; |
34 | use Wikimedia\ObjectCache\WANObjectCache; |
35 | |
36 | /** |
37 | * @internal |
38 | */ |
39 | class DenyListManager { |
40 | |
41 | private const CACHE_VERSION = 1; |
42 | |
43 | /** @var HttpRequestFactory */ |
44 | private $http; |
45 | /** @var BagOStuff */ |
46 | private $srvCache; |
47 | /** @var WANObjectCache */ |
48 | private $wanCache; |
49 | /** @var LoggerInterface */ |
50 | private $logger; |
51 | |
52 | /** @var IPSet|null */ |
53 | private $denyListIPSet; |
54 | |
55 | /** @var self */ |
56 | private static $instance = null; |
57 | |
58 | /** |
59 | * @param HttpRequestFactory $http |
60 | * @param BagOStuff $srvCache |
61 | * @param WANObjectCache $wanCache |
62 | * @param LoggerInterface|null $logger |
63 | */ |
64 | public function __construct( |
65 | HttpRequestFactory $http, |
66 | BagOStuff $srvCache, |
67 | WANObjectCache $wanCache, |
68 | ?LoggerInterface $logger |
69 | ) { |
70 | $this->http = $http; |
71 | $this->srvCache = $srvCache; |
72 | $this->wanCache = $wanCache; |
73 | $this->logger = $logger ?: new NullLogger(); |
74 | } |
75 | |
76 | /** |
77 | * @todo use MediaWikiServices |
78 | * @return DenyListManager |
79 | */ |
80 | public static function singleton() { |
81 | if ( self::$instance == null ) { |
82 | $services = MediaWikiServices::getInstance(); |
83 | |
84 | $srvCache = $services->getLocalServerObjectCache(); |
85 | $wanCache = $services->getMainWANObjectCache(); |
86 | $http = $services->getHttpRequestFactory(); |
87 | $logger = LoggerFactory::getInstance( 'DenyList' ); |
88 | |
89 | self::$instance = new self( $http, $srvCache, $wanCache, $logger ); |
90 | } |
91 | |
92 | return self::$instance; |
93 | } |
94 | |
95 | /** |
96 | * Check whether the IP address is deny-listed |
97 | * |
98 | * @param string $ip An IP address |
99 | * @return bool |
100 | */ |
101 | public function isIpDenyListed( $ip ) { |
102 | if ( IPUtils::isIPAddress( $ip ) === null ) { |
103 | return false; |
104 | } |
105 | |
106 | return $this->getIpDenyListSet()->match( $ip ); |
107 | } |
108 | |
109 | /** |
110 | * Get the list of deny-listed IPs from cache only |
111 | * |
112 | * @return string[]|false List of deny-listed IP addresses; false if uncached |
113 | */ |
114 | public function getCachedIpDenyList() { |
115 | return $this->getIpDenyList(); |
116 | } |
117 | |
118 | /** |
119 | * Purge cache of deny-list IPs |
120 | * |
121 | * @return bool Success |
122 | */ |
123 | public function purgeCachedIpDenyList() { |
124 | $wanCache = $this->wanCache; |
125 | |
126 | return $wanCache->delete( $this->getDenyListKey( $wanCache ) ); |
127 | } |
128 | |
129 | /** |
130 | * Fetch the list of IPs from cache, regenerating the cache as needed |
131 | * |
132 | * @param string|null $recache Use 'recache' to force a recache |
133 | * @return string[] List of deny-listed IP addresses |
134 | */ |
135 | public function getIpDenyList( $recache = null ): array { |
136 | global $wgSFSDenyListCacheDuration; |
137 | |
138 | $srvCache = $this->srvCache; |
139 | $srvCacheKey = $this->getDenyListKey( $srvCache ); |
140 | if ( $recache === 'recache' ) { |
141 | $flatIpList = false; |
142 | } else { |
143 | $flatIpList = $srvCache->get( $srvCacheKey ); |
144 | } |
145 | |
146 | if ( $flatIpList === false ) { |
147 | $wanCache = $this->wanCache; |
148 | $flatHexIpList = $wanCache->getWithSetCallback( |
149 | $this->getDenyListKey( $wanCache ), |
150 | $wgSFSDenyListCacheDuration, |
151 | function () { |
152 | // This uses hexadecimal IP addresses to reduce network I/O |
153 | return $this->fetchFlatDenyListHexIps(); |
154 | }, |
155 | [ |
156 | 'lockTSE' => $wgSFSDenyListCacheDuration, |
157 | 'staleTTL' => $wgSFSDenyListCacheDuration, |
158 | // placeholder |
159 | 'busyValue' => '', |
160 | 'minAsOf' => ( $recache === 'recache' ) ? INF : $wanCache::MIN_TIMESTAMP_NONE |
161 | ] |
162 | ); |
163 | |
164 | $ips = []; |
165 | for ( $hex = strtok( $flatHexIpList, "\n" ); $hex !== false; $hex = strtok( "\n" ) ) { |
166 | $ips[] = IPUtils::formatHex( $hex ); |
167 | } |
168 | |
169 | $flatIpList = implode( "\n", $ips ); |
170 | |
171 | // Refill the local server cache if the list is not empty nor a placeholder |
172 | if ( $flatIpList !== '' ) { |
173 | $srvCache->set( |
174 | $srvCacheKey, |
175 | $flatIpList, |
176 | mt_rand( $srvCache::TTL_HOUR, $srvCache::TTL_DAY ) |
177 | ); |
178 | } |
179 | } |
180 | |
181 | return ( $flatIpList != '' ) ? explode( "\n", $flatIpList ) : []; |
182 | } |
183 | |
184 | /** |
185 | * @param string|null $recache Use 'recache' to force a recache |
186 | * @return IPSet |
187 | */ |
188 | public function getIpDenyListSet( $recache = null ) { |
189 | if ( $this->denyListIPSet === null || $recache === "recache" ) { |
190 | $this->denyListIPSet = new IPSet( $this->getIpDenyList( $recache ) ); |
191 | } |
192 | |
193 | return $this->denyListIPSet; |
194 | } |
195 | |
196 | /** |
197 | * @param IStoreKeyEncoder $cache |
198 | * @return string Cache key for primary deny list |
199 | */ |
200 | private function getDenyListKey( IStoreKeyEncoder $cache ) { |
201 | return $cache->makeGlobalKey( 'sfs-denylist-set', self::CACHE_VERSION ); |
202 | } |
203 | |
204 | /** |
205 | * @return string Newline separated list of SFS deny-listed IP addresses |
206 | */ |
207 | private function fetchFlatDenyListHexIps(): string { |
208 | global $wgSFSIPListLocation, $wgSFSValidateIPListLocationMD5; |
209 | |
210 | if ( $wgSFSIPListLocation === false ) { |
211 | throw new DomainException( '$wgSFSIPListLocation has not been configured.' ); |
212 | } |
213 | |
214 | if ( is_file( $wgSFSIPListLocation ) ) { |
215 | $ipList = $this->fetchFlatDenyListHexIpsLocal( $wgSFSIPListLocation ); |
216 | } else { |
217 | $ipList = $this->fetchFlatDenyListHexIpsRemote( |
218 | $wgSFSIPListLocation, |
219 | $wgSFSValidateIPListLocationMD5 |
220 | ); |
221 | } |
222 | |
223 | return $ipList; |
224 | } |
225 | |
226 | /** |
227 | * Fetch gunzipped/unzipped SFS deny list from local file |
228 | * |
229 | * @param string $listFilePath Local file path |
230 | * @return string Newline separated list of SFS deny-listed IP addresses |
231 | */ |
232 | private function fetchFlatDenyListHexIpsLocal( string $listFilePath ): string { |
233 | global $wgSFSIPThreshold; |
234 | |
235 | $fh = fopen( $listFilePath, 'rb' ); |
236 | if ( !$fh ) { |
237 | throw new DomainException( "wgSFSIPListLocation file handle could not be obtained." ); |
238 | } |
239 | |
240 | $ipList = []; |
241 | |
242 | while ( !feof( $fh ) ) { |
243 | $ipData = fgetcsv( $fh, 4096, ',', '"' ); |
244 | if ( $ipData === false ) { |
245 | break; |
246 | } |
247 | |
248 | if ( $ipData === null || $ipData === [ null ] ) { |
249 | continue; |
250 | } |
251 | if ( isset( $ipData[1] ) && $ipData[1] < $wgSFSIPThreshold ) { |
252 | continue; |
253 | } |
254 | |
255 | $ip = (string)$ipData[0]; |
256 | $hex = IPUtils::toHex( $ip ); |
257 | if ( $hex === false ) { |
258 | // invalid address |
259 | continue; |
260 | } |
261 | |
262 | $ipList[] = $hex; |
263 | } |
264 | |
265 | return implode( "\n", $ipList ); |
266 | } |
267 | |
268 | /** |
269 | * Fetch SFS IP deny list file from SFS site and returns an array of IPs |
270 | * (https://www.stopforumspam.com/downloads - use gz files) |
271 | * |
272 | * @param string $uri SFS vendor or third-party URL to the list |
273 | * @param string|null $md5uri SFS vendor URL to the MD5 of the list |
274 | * @return string Newline-separated list of SFS deny-listed IP addresses |
275 | */ |
276 | private function fetchFlatDenyListHexIpsRemote( string $uri, ?string $md5uri ): string { |
277 | global $wgSFSProxy, $wgSFSIPThreshold; |
278 | |
279 | // Hacky, but needed to keep a sensible default value of $wgSFSIPListLocation for |
280 | // users, whilst also preventing HTTP requests for other extension when they call |
281 | // permission related hooks that mean the code here gets executed too... |
282 | // So, if we have a URL, and try and do a HTTP request whilst in MW_PHPUNIT_TEST, |
283 | // just fallback to loading sample_denylist_all.txt as a file... |
284 | // See also: T262443, T265628. |
285 | if ( defined( 'MW_PHPUNIT_TEST' ) ) { |
286 | $filePath = dirname( __DIR__ ) . '/tests/phpunit/sample_denylist_all.txt'; |
287 | return $this->fetchFlatDenyListHexIpsLocal( $filePath ); |
288 | } |
289 | |
290 | // T353001 - this should be an ok way to determine that we're in a quibble context |
291 | if ( getenv( 'ZUUL_PROJECT' ) && getenv( 'MW_INSTALL_PATH' ) ) { |
292 | $filePath = getenv( 'MW_INSTALL_PATH' ) . '/extensions/StopForumSpam/tests/phpunit/sample_denylist_all.txt'; |
293 | return $this->fetchFlatDenyListHexIpsLocal( $filePath ); |
294 | } |
295 | |
296 | if ( !filter_var( $uri, FILTER_VALIDATE_URL ) ) { |
297 | throw new DomainException( "wgSFSIPListLocation does not appear to be a valid URL." ); |
298 | } |
299 | |
300 | // check for zlib function for later processing |
301 | if ( !function_exists( 'gzdecode' ) ) { |
302 | throw new RuntimeException( "Zlib does not appear to be configured for php!" ); |
303 | } |
304 | |
305 | $options = [ 'followRedirects' => true ]; |
306 | if ( $wgSFSProxy !== false ) { |
307 | $options['proxy'] = $wgSFSProxy; |
308 | } |
309 | |
310 | $fileData = $this->fetchRemoteFile( $uri, $options ); |
311 | if ( $fileData === '' ) { |
312 | $this->logger->error( __METHOD__ . ": SFS IP list could not be fetched!" ); |
313 | |
314 | return ''; |
315 | } |
316 | |
317 | if ( is_string( $md5uri ) && $md5uri !== '' ) { |
318 | // check vendor-provided md5 |
319 | $fileDataMD5 = $this->fetchRemoteFile( $md5uri, $options ); |
320 | if ( $fileDataMD5 === '' ) { |
321 | $this->logger->error( __METHOD__ . ": SFS IP list MD5 could not be fetched!" ); |
322 | return ''; |
323 | } |
324 | |
325 | if ( md5( $fileData ) !== $fileDataMD5 ) { |
326 | $this->logger->error( __METHOD__ . ": SFS IP list has an unexpected MD5!" ); |
327 | return ''; |
328 | } |
329 | } |
330 | |
331 | // ungzip and process vendor file |
332 | $csvTable = gzdecode( $fileData ); |
333 | if ( $csvTable === false ) { |
334 | $this->logger->error( __METHOD__ . ": SFS IP file contents could not be decoded!" ); |
335 | return ''; |
336 | } |
337 | |
338 | $ipList = []; |
339 | $scoreSkipped = 0; |
340 | $rows = 0; |
341 | |
342 | for ( $line = strtok( $csvTable, "\n" ); $line !== false; $line = strtok( "\n" ) ) { |
343 | |
344 | $rows++; |
345 | |
346 | $ipData = str_getcsv( $line, ",", "\"", "\\" ); |
347 | $ip = (string)$ipData[0]; |
348 | $score = (int)$ipData[1]; |
349 | |
350 | if ( $score && ( $score < $wgSFSIPThreshold ) ) { |
351 | $scoreSkipped++; |
352 | continue; |
353 | } |
354 | |
355 | $hex = IPUtils::toHex( $ip ); |
356 | if ( $hex === false ) { |
357 | // invalid address |
358 | continue; |
359 | } |
360 | |
361 | $ipList[] = $hex; |
362 | } |
363 | |
364 | if ( $scoreSkipped > 0 ) { |
365 | $this->logger->info( |
366 | __METHOD__ . ": {$rows} rows were processed. " |
367 | . "{$scoreSkipped} were skipped because their score was less than {$wgSFSIPThreshold}." |
368 | ); |
369 | } |
370 | |
371 | return implode( "\n", $ipList ); |
372 | } |
373 | |
374 | /** |
375 | * Fetch a network file's contents via HttpRequestFactory |
376 | * |
377 | * @param string $fileUrl |
378 | * @param array $httpOptions |
379 | * @return string |
380 | */ |
381 | private function fetchRemoteFile( string $fileUrl, array $httpOptions ): string { |
382 | $req = $this->http->create( $fileUrl, $httpOptions, __METHOD__ ); |
383 | |
384 | $status = $req->execute(); |
385 | if ( !$status->isOK() ) { |
386 | throw new RuntimeException( "Failed to download resource at {$fileUrl}" ); |
387 | } |
388 | |
389 | $code = $req->getStatus(); |
390 | if ( $code !== 200 ) { |
391 | throw new RuntimeException( "Unexpected HTTP {$code} response from {$fileUrl}" ); |
392 | } |
393 | |
394 | return (string)$req->getContent(); |
395 | } |
396 | } |