Code Coverage
 
Lines
Functions and Methods
Classes and Traits
Total
0.00% covered (danger)
0.00%
0 / 109
0.00% covered (danger)
0.00%
0 / 6
CRAP
0.00% covered (danger)
0.00%
0 / 1
IPReputationHooks
0.00% covered (danger)
0.00%
0 / 109
0.00% covered (danger)
0.00%
0 / 6
702
0.00% covered (danger)
0.00%
0 / 1
 __construct
0.00% covered (danger)
0.00%
0 / 9
0.00% covered (danger)
0.00%
0 / 1
2
 onLocalUserCreated
0.00% covered (danger)
0.00%
0 / 5
0.00% covered (danger)
0.00%
0 / 1
6
 onPageSaveComplete
0.00% covered (danger)
0.00%
0 / 8
0.00% covered (danger)
0.00%
0 / 1
2
 getIPoidDataForIp
0.00% covered (danger)
0.00%
0 / 47
0.00% covered (danger)
0.00%
0 / 1
56
 convertIPoidDataToEventLoggingFormat
0.00% covered (danger)
0.00%
0 / 26
0.00% covered (danger)
0.00%
0 / 1
182
 recordEvent
0.00% covered (danger)
0.00%
0 / 14
0.00% covered (danger)
0.00%
0 / 1
6
1<?php
2
3namespace WikimediaEvents;
4
5use MediaWiki\Auth\Hook\LocalUserCreatedHook;
6use MediaWiki\Config\Config;
7use MediaWiki\Context\RequestContext;
8use MediaWiki\Deferred\DeferredUpdates;
9use MediaWiki\Extension\EventBus\EventFactory;
10use MediaWiki\Extension\EventBus\Serializers\MediaWiki\UserEntitySerializer;
11use MediaWiki\Extension\EventLogging\EventSubmitter\EventSubmitter;
12use MediaWiki\Http\HttpRequestFactory;
13use MediaWiki\Language\FormatterFactory;
14use MediaWiki\Logger\LoggerFactory;
15use MediaWiki\Storage\Hook\PageSaveCompleteHook;
16use MediaWiki\User\UserFactory;
17use MediaWiki\User\UserGroupManager;
18use MediaWiki\User\UserIdentity;
19use MediaWiki\WikiMap\WikiMap;
20use Psr\Log\LoggerInterface;
21use WANObjectCache;
22use Wikimedia\IPUtils;
23
24/**
25 * Hooks for logging IP reputation data with an event (edit, account creation, etc.)
26 *
27 * Note: these hook implementations will eventually move to Extension:IPReputation, when
28 * that is running in production.
29 */
30class IPReputationHooks implements PageSaveCompleteHook, LocalUserCreatedHook {
31
32    private const STREAM = 'mediawiki.ip_reputation.score';
33    private const SCHEMA = '/analytics/mediawiki/ip_reputation/score/1.1.0';
34
35    private FormatterFactory $formatterFactory;
36    private HttpRequestFactory $httpRequestFactory;
37    private WANObjectCache $cache;
38
39    private LoggerInterface $logger;
40    private Config $config;
41    private EventFactory $eventFactory;
42    private UserFactory $userFactory;
43    private UserGroupManager $userGroupManager;
44    private EventSubmitter $eventSubmitter;
45
46    public function __construct(
47        Config $config,
48        FormatterFactory $formatterFactory,
49        HttpRequestFactory $httpRequestFactory,
50        WANObjectCache $cache,
51        UserFactory $userFactory,
52        UserGroupManager $userGroupManager,
53        EventFactory $eventFactory,
54        EventSubmitter $eventSubmitter
55    ) {
56        $this->config = $config;
57        $this->formatterFactory = $formatterFactory;
58        $this->httpRequestFactory = $httpRequestFactory;
59        $this->cache = $cache;
60        $this->logger = LoggerFactory::getInstance( 'WikimediaEvents' );
61        $this->userFactory = $userFactory;
62        $this->userGroupManager = $userGroupManager;
63        $this->eventFactory = $eventFactory;
64        $this->eventSubmitter = $eventSubmitter;
65    }
66
67    /** @inheritDoc */
68    public function onLocalUserCreated( $user, $autocreated ) {
69        $ip = RequestContext::getMain()->getRequest()->getIP();
70        DeferredUpdates::addCallableUpdate( function () use ( $ip, $user, $autocreated ) {
71            $action = $autocreated ? 'autocreateaccount' : 'createaccount';
72            $this->recordEvent( $ip, $action, $user, $user->getId() );
73        } );
74    }
75
76    /** @inheritDoc */
77    public function onPageSaveComplete( $wikiPage, $user, $summary, $flags, $revisionRecord, $editResult ) {
78        $ip = RequestContext::getMain()->getRequest()->getIP();
79        DeferredUpdates::addCallableUpdate( function () use (
80            $ip,
81            $user,
82            $revisionRecord
83        ) {
84            $this->recordEvent( $ip, 'edit', $user, $revisionRecord->getId() );
85        } );
86    }
87
88    /**
89     * @param string $ip
90     *
91     * @return array|null IPoid data for the specific address, or null if there is no data
92     */
93    private function getIPoidDataForIp( string $ip ): ?array {
94        $baseUrl = $this->config->get( 'WikimediaEventsIPoidUrl' );
95        if ( !$baseUrl ) {
96            return null;
97        }
98        $sanitizedIp = IPUtils::sanitizeIP( $ip );
99        $data = $this->cache->getWithSetCallback(
100            $this->cache->makeGlobalKey( 'wikimediaevents-ipoid', $sanitizedIp ),
101            // IPoid data is refreshed every 24 hours and roughly 10% of its IPs drop out
102            // of the database each 24-hour cycle. A one hour TTL seems reasonable to allow
103            // no longer problematic IPs to get evicted from the cache relatively quickly,
104            // and also means that IPs for e.g. residential proxies are updated in our cache
105            // relatively quickly.
106            $this->cache::TTL_HOUR,
107            function () use ( $sanitizedIp ) {
108                // If IPoid URL isn't configured, don't do any checks, let the user proceed.
109                $timeout = $this->config->get( 'WikimediaEventsIPoidRequestTimeoutSeconds' );
110                // Convert IPv6 to lowercase, to match IPoid storage format.
111                $url = $this->config->get( 'WikimediaEventsIPoidUrl' ) . '/feed/v1/ip/' . $sanitizedIp;
112                $request = $this->httpRequestFactory->create( $url, [
113                    'method' => 'GET',
114                    'timeout' => $timeout,
115                    'connectTimeout' => $timeout,
116                ] );
117                $response = $request->execute();
118                if ( !$response->isOK() ) {
119                    // Probably a 404, which means IPoid doesn't know about the IP.
120                    // If not a 404, log it, so we can figure out what happened.
121                    if ( $request->getStatus() !== 404 ) {
122                        $statusFormatter = $this->formatterFactory->getStatusFormatter( RequestContext::getMain() );
123                        [ $errorText, $context ] = $statusFormatter->getPsr3MessageAndContext( $response );
124                        $this->logger->error( $errorText, $context );
125                    }
126                    return null;
127                }
128
129                $data = json_decode( $request->getContent(), true );
130
131                if ( !$data ) {
132                    // Malformed data.
133                    $this->logger->error(
134                        'Got invalid JSON data while checking IP {ip}',
135                        [
136                            'ip' => $sanitizedIp,
137                            'response' => $request->getContent()
138                        ]
139                    );
140                    return null;
141                }
142
143                if ( !isset( $data[$sanitizedIp] ) ) {
144                    // IP should always be set in the data array, but just to be safe.
145                    $this->logger->error(
146                        'Got JSON data with no IP {ip} present',
147                        [
148                            'ip' => $sanitizedIp,
149                            'response' => $request->getContent()
150                        ]
151                    );
152                    return null;
153                }
154
155                // We have a match and valid data structure;
156                // return the values for this IP for storage in the cache.
157                return $data[$sanitizedIp];
158            }
159        );
160
161        // Unlike null, false tells cache not to cache something. Normalize both to null before returning.
162        if ( $data === false ) {
163            return null;
164        }
165
166        return $data;
167    }
168
169    /**
170     * @param array $data Array returned from IPoid service
171     * @return array Array of data suitable for use with ip_reputation.score stream
172     */
173    private function convertIPoidDataToEventLoggingFormat( array $data ): array {
174        $event = [];
175        // See IPoid repo's generateInsertActorQueries for mapping of ipoid fields
176        // to Spur data field names.
177        if ( isset( $data['risks'] ) ) {
178            $event['risks'] = $data['risks'];
179        }
180        if ( isset( $data['proxies'] ) ) {
181            $event['client_proxies'] = $data['proxies'];
182        }
183        if ( isset( $data['org'] ) ) {
184            $event['organization'] = $data['org'];
185        }
186        if ( isset( $data['client_count'] ) ) {
187            $event['client_count'] = $data['client_count'];
188        }
189        if ( isset( $data['types'] ) ) {
190            $event['client_types'] = $data['types'];
191        }
192        if ( isset( $data['conc_city'] ) ) {
193            $event['location_city'] = $data['conc_city'];
194        }
195        // Prefer client.concentration.country, otherwise fallback to location.country
196        if ( !empty( $data['conc_country'] ) ) {
197            $event['location_country'] = $data['conc_country'];
198        } elseif ( isset( $data['location_country'] ) ) {
199            $event['location_country'] = $data['location_country'];
200        }
201        if ( isset( $data['countries'] ) ) {
202            $event['client_countries'] = $data['countries'];
203        }
204        if ( isset( $data['behaviors'] ) ) {
205            $event['client_behaviors'] = $data['behaviors'];
206        }
207        if ( isset( $data['proxies'] ) ) {
208            $event['client_proxies'] = $data['proxies'];
209        }
210        // IPoid's "tunnels" property is a list of tunnel operator strings.
211        if ( isset( $data['tunnels'] ) ) {
212            $event['tunnels_operators'] = $data['tunnels'];
213        }
214        // n.b. there are other properties in the ip_reputation.score stream, but
215        // they rely on raw Spur data which is not currently accessible via IPoid.
216        return $event;
217    }
218
219    /**
220     * Attempt to fetch data from ipoid, and submit an appropriate event if data is found.
221     *
222     * @param string $ip
223     * @param string $action
224     * @param UserIdentity $user
225     * @param int $identifier
226     * @return void
227     */
228    private function recordEvent( string $ip, string $action, UserIdentity $user, int $identifier ) {
229        $data = $this->getIPoidDataForIp( $ip );
230        if ( !$data ) {
231            return;
232        }
233        $event = $this->convertIPoidDataToEventLoggingFormat( $data );
234        $userEntitySerializer = new UserEntitySerializer( $this->userFactory, $this->userGroupManager );
235        $event += [
236            '$schema' => self::SCHEMA,
237            'wiki_id' => WikiMap::getCurrentWikiId(),
238            'http' => [ 'client_ip' => $ip ],
239            'performer' => $userEntitySerializer->toArray( $user ),
240            'action' => $action,
241            'identifier' => $identifier,
242        ];
243        $this->eventSubmitter->submit( self::STREAM, $event );
244    }
245}