Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
0.00% |
0 / 109 |
|
0.00% |
0 / 6 |
CRAP | |
0.00% |
0 / 1 |
IPReputationHooks | |
0.00% |
0 / 109 |
|
0.00% |
0 / 6 |
702 | |
0.00% |
0 / 1 |
__construct | |
0.00% |
0 / 9 |
|
0.00% |
0 / 1 |
2 | |||
onLocalUserCreated | |
0.00% |
0 / 5 |
|
0.00% |
0 / 1 |
6 | |||
onPageSaveComplete | |
0.00% |
0 / 8 |
|
0.00% |
0 / 1 |
2 | |||
getIPoidDataForIp | |
0.00% |
0 / 47 |
|
0.00% |
0 / 1 |
56 | |||
convertIPoidDataToEventLoggingFormat | |
0.00% |
0 / 26 |
|
0.00% |
0 / 1 |
182 | |||
recordEvent | |
0.00% |
0 / 14 |
|
0.00% |
0 / 1 |
6 |
1 | <?php |
2 | |
3 | namespace WikimediaEvents; |
4 | |
5 | use MediaWiki\Auth\Hook\LocalUserCreatedHook; |
6 | use MediaWiki\Config\Config; |
7 | use MediaWiki\Context\RequestContext; |
8 | use MediaWiki\Deferred\DeferredUpdates; |
9 | use MediaWiki\Extension\EventBus\EventFactory; |
10 | use MediaWiki\Extension\EventBus\Serializers\MediaWiki\UserEntitySerializer; |
11 | use MediaWiki\Extension\EventLogging\EventSubmitter\EventSubmitter; |
12 | use MediaWiki\Http\HttpRequestFactory; |
13 | use MediaWiki\Language\FormatterFactory; |
14 | use MediaWiki\Logger\LoggerFactory; |
15 | use MediaWiki\Storage\Hook\PageSaveCompleteHook; |
16 | use MediaWiki\User\UserFactory; |
17 | use MediaWiki\User\UserGroupManager; |
18 | use MediaWiki\User\UserIdentity; |
19 | use MediaWiki\WikiMap\WikiMap; |
20 | use Psr\Log\LoggerInterface; |
21 | use WANObjectCache; |
22 | use Wikimedia\IPUtils; |
23 | |
24 | /** |
25 | * Hooks for logging IP reputation data with an event (edit, account creation, etc.) |
26 | * |
27 | * Note: these hook implementations will eventually move to Extension:IPReputation, when |
28 | * that is running in production. |
29 | */ |
30 | class IPReputationHooks implements PageSaveCompleteHook, LocalUserCreatedHook { |
31 | |
32 | private const STREAM = 'mediawiki.ip_reputation.score'; |
33 | private const SCHEMA = '/analytics/mediawiki/ip_reputation/score/1.1.0'; |
34 | |
35 | private FormatterFactory $formatterFactory; |
36 | private HttpRequestFactory $httpRequestFactory; |
37 | private WANObjectCache $cache; |
38 | |
39 | private LoggerInterface $logger; |
40 | private Config $config; |
41 | private EventFactory $eventFactory; |
42 | private UserFactory $userFactory; |
43 | private UserGroupManager $userGroupManager; |
44 | private EventSubmitter $eventSubmitter; |
45 | |
46 | public function __construct( |
47 | Config $config, |
48 | FormatterFactory $formatterFactory, |
49 | HttpRequestFactory $httpRequestFactory, |
50 | WANObjectCache $cache, |
51 | UserFactory $userFactory, |
52 | UserGroupManager $userGroupManager, |
53 | EventFactory $eventFactory, |
54 | EventSubmitter $eventSubmitter |
55 | ) { |
56 | $this->config = $config; |
57 | $this->formatterFactory = $formatterFactory; |
58 | $this->httpRequestFactory = $httpRequestFactory; |
59 | $this->cache = $cache; |
60 | $this->logger = LoggerFactory::getInstance( 'WikimediaEvents' ); |
61 | $this->userFactory = $userFactory; |
62 | $this->userGroupManager = $userGroupManager; |
63 | $this->eventFactory = $eventFactory; |
64 | $this->eventSubmitter = $eventSubmitter; |
65 | } |
66 | |
67 | /** @inheritDoc */ |
68 | public function onLocalUserCreated( $user, $autocreated ) { |
69 | $ip = RequestContext::getMain()->getRequest()->getIP(); |
70 | DeferredUpdates::addCallableUpdate( function () use ( $ip, $user, $autocreated ) { |
71 | $action = $autocreated ? 'autocreateaccount' : 'createaccount'; |
72 | $this->recordEvent( $ip, $action, $user, $user->getId() ); |
73 | } ); |
74 | } |
75 | |
76 | /** @inheritDoc */ |
77 | public function onPageSaveComplete( $wikiPage, $user, $summary, $flags, $revisionRecord, $editResult ) { |
78 | $ip = RequestContext::getMain()->getRequest()->getIP(); |
79 | DeferredUpdates::addCallableUpdate( function () use ( |
80 | $ip, |
81 | $user, |
82 | $revisionRecord |
83 | ) { |
84 | $this->recordEvent( $ip, 'edit', $user, $revisionRecord->getId() ); |
85 | } ); |
86 | } |
87 | |
88 | /** |
89 | * @param string $ip |
90 | * |
91 | * @return array|null IPoid data for the specific address, or null if there is no data |
92 | */ |
93 | private function getIPoidDataForIp( string $ip ): ?array { |
94 | $baseUrl = $this->config->get( 'WikimediaEventsIPoidUrl' ); |
95 | if ( !$baseUrl ) { |
96 | return null; |
97 | } |
98 | $sanitizedIp = IPUtils::sanitizeIP( $ip ); |
99 | $data = $this->cache->getWithSetCallback( |
100 | $this->cache->makeGlobalKey( 'wikimediaevents-ipoid', $sanitizedIp ), |
101 | // IPoid data is refreshed every 24 hours and roughly 10% of its IPs drop out |
102 | // of the database each 24-hour cycle. A one hour TTL seems reasonable to allow |
103 | // no longer problematic IPs to get evicted from the cache relatively quickly, |
104 | // and also means that IPs for e.g. residential proxies are updated in our cache |
105 | // relatively quickly. |
106 | $this->cache::TTL_HOUR, |
107 | function () use ( $sanitizedIp ) { |
108 | // If IPoid URL isn't configured, don't do any checks, let the user proceed. |
109 | $timeout = $this->config->get( 'WikimediaEventsIPoidRequestTimeoutSeconds' ); |
110 | // Convert IPv6 to lowercase, to match IPoid storage format. |
111 | $url = $this->config->get( 'WikimediaEventsIPoidUrl' ) . '/feed/v1/ip/' . $sanitizedIp; |
112 | $request = $this->httpRequestFactory->create( $url, [ |
113 | 'method' => 'GET', |
114 | 'timeout' => $timeout, |
115 | 'connectTimeout' => $timeout, |
116 | ] ); |
117 | $response = $request->execute(); |
118 | if ( !$response->isOK() ) { |
119 | // Probably a 404, which means IPoid doesn't know about the IP. |
120 | // If not a 404, log it, so we can figure out what happened. |
121 | if ( $request->getStatus() !== 404 ) { |
122 | $statusFormatter = $this->formatterFactory->getStatusFormatter( RequestContext::getMain() ); |
123 | [ $errorText, $context ] = $statusFormatter->getPsr3MessageAndContext( $response ); |
124 | $this->logger->error( $errorText, $context ); |
125 | } |
126 | return null; |
127 | } |
128 | |
129 | $data = json_decode( $request->getContent(), true ); |
130 | |
131 | if ( !$data ) { |
132 | // Malformed data. |
133 | $this->logger->error( |
134 | 'Got invalid JSON data while checking IP {ip}', |
135 | [ |
136 | 'ip' => $sanitizedIp, |
137 | 'response' => $request->getContent() |
138 | ] |
139 | ); |
140 | return null; |
141 | } |
142 | |
143 | if ( !isset( $data[$sanitizedIp] ) ) { |
144 | // IP should always be set in the data array, but just to be safe. |
145 | $this->logger->error( |
146 | 'Got JSON data with no IP {ip} present', |
147 | [ |
148 | 'ip' => $sanitizedIp, |
149 | 'response' => $request->getContent() |
150 | ] |
151 | ); |
152 | return null; |
153 | } |
154 | |
155 | // We have a match and valid data structure; |
156 | // return the values for this IP for storage in the cache. |
157 | return $data[$sanitizedIp]; |
158 | } |
159 | ); |
160 | |
161 | // Unlike null, false tells cache not to cache something. Normalize both to null before returning. |
162 | if ( $data === false ) { |
163 | return null; |
164 | } |
165 | |
166 | return $data; |
167 | } |
168 | |
169 | /** |
170 | * @param array $data Array returned from IPoid service |
171 | * @return array Array of data suitable for use with ip_reputation.score stream |
172 | */ |
173 | private function convertIPoidDataToEventLoggingFormat( array $data ): array { |
174 | $event = []; |
175 | // See IPoid repo's generateInsertActorQueries for mapping of ipoid fields |
176 | // to Spur data field names. |
177 | if ( isset( $data['risks'] ) ) { |
178 | $event['risks'] = $data['risks']; |
179 | } |
180 | if ( isset( $data['proxies'] ) ) { |
181 | $event['client_proxies'] = $data['proxies']; |
182 | } |
183 | if ( isset( $data['org'] ) ) { |
184 | $event['organization'] = $data['org']; |
185 | } |
186 | if ( isset( $data['client_count'] ) ) { |
187 | $event['client_count'] = $data['client_count']; |
188 | } |
189 | if ( isset( $data['types'] ) ) { |
190 | $event['client_types'] = $data['types']; |
191 | } |
192 | if ( isset( $data['conc_city'] ) ) { |
193 | $event['location_city'] = $data['conc_city']; |
194 | } |
195 | // Prefer client.concentration.country, otherwise fallback to location.country |
196 | if ( !empty( $data['conc_country'] ) ) { |
197 | $event['location_country'] = $data['conc_country']; |
198 | } elseif ( isset( $data['location_country'] ) ) { |
199 | $event['location_country'] = $data['location_country']; |
200 | } |
201 | if ( isset( $data['countries'] ) ) { |
202 | $event['client_countries'] = $data['countries']; |
203 | } |
204 | if ( isset( $data['behaviors'] ) ) { |
205 | $event['client_behaviors'] = $data['behaviors']; |
206 | } |
207 | if ( isset( $data['proxies'] ) ) { |
208 | $event['client_proxies'] = $data['proxies']; |
209 | } |
210 | // IPoid's "tunnels" property is a list of tunnel operator strings. |
211 | if ( isset( $data['tunnels'] ) ) { |
212 | $event['tunnels_operators'] = $data['tunnels']; |
213 | } |
214 | // n.b. there are other properties in the ip_reputation.score stream, but |
215 | // they rely on raw Spur data which is not currently accessible via IPoid. |
216 | return $event; |
217 | } |
218 | |
219 | /** |
220 | * Attempt to fetch data from ipoid, and submit an appropriate event if data is found. |
221 | * |
222 | * @param string $ip |
223 | * @param string $action |
224 | * @param UserIdentity $user |
225 | * @param int $identifier |
226 | * @return void |
227 | */ |
228 | private function recordEvent( string $ip, string $action, UserIdentity $user, int $identifier ) { |
229 | $data = $this->getIPoidDataForIp( $ip ); |
230 | if ( !$data ) { |
231 | return; |
232 | } |
233 | $event = $this->convertIPoidDataToEventLoggingFormat( $data ); |
234 | $userEntitySerializer = new UserEntitySerializer( $this->userFactory, $this->userGroupManager ); |
235 | $event += [ |
236 | '$schema' => self::SCHEMA, |
237 | 'wiki_id' => WikiMap::getCurrentWikiId(), |
238 | 'http' => [ 'client_ip' => $ip ], |
239 | 'performer' => $userEntitySerializer->toArray( $user ), |
240 | 'action' => $action, |
241 | 'identifier' => $identifier, |
242 | ]; |
243 | $this->eventSubmitter->submit( self::STREAM, $event ); |
244 | } |
245 | } |