Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
97.98% |
242 / 247 |
|
89.66% |
26 / 29 |
CRAP | |
0.00% |
0 / 1 |
IPUtils | |
97.98% |
242 / 247 |
|
89.66% |
26 / 29 |
114 | |
0.00% |
0 / 1 |
isIPAddress | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
isIPv6 | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
isIPv4 | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
isValid | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
2 | |||
isValidIPv4 | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
isValidIPv6 | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
isValidIPv4Range | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
isValidIPv6Range | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
isValidRange | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
2 | |||
sanitizeIP | |
100.00% |
30 / 30 |
|
100.00% |
1 / 1 |
9 | |||
prettifyIP | |
100.00% |
23 / 23 |
|
100.00% |
1 / 1 |
8 | |||
splitHostAndPort | |
100.00% |
17 / 17 |
|
100.00% |
1 / 1 |
8 | |||
combineHostAndPort | |
100.00% |
5 / 5 |
|
100.00% |
1 / 1 |
4 | |||
formatHex | |
100.00% |
3 / 3 |
|
100.00% |
1 / 1 |
2 | |||
hexToOctet | |
100.00% |
5 / 5 |
|
100.00% |
1 / 1 |
2 | |||
hexToQuad | |
100.00% |
7 / 7 |
|
100.00% |
1 / 1 |
3 | |||
isPublic | |
100.00% |
14 / 14 |
|
100.00% |
1 / 1 |
2 | |||
toHex | |
100.00% |
10 / 10 |
|
100.00% |
1 / 1 |
6 | |||
IPv6ToRawHex | |
85.71% |
6 / 7 |
|
0.00% |
0 / 1 |
3.03 | |||
parseCIDR | |
93.75% |
15 / 16 |
|
0.00% |
0 / 1 |
9.02 | |||
parseRange | |
100.00% |
22 / 22 |
|
100.00% |
1 / 1 |
12 | |||
parseCIDR6 | |
78.57% |
11 / 14 |
|
0.00% |
0 / 1 |
7.48 | |||
parseRange6 | |
100.00% |
21 / 21 |
|
100.00% |
1 / 1 |
7 | |||
isInRange | |
100.00% |
4 / 4 |
|
100.00% |
1 / 1 |
2 | |||
isInRanges | |
100.00% |
4 / 4 |
|
100.00% |
1 / 1 |
3 | |||
canonicalize | |
100.00% |
11 / 11 |
|
100.00% |
1 / 1 |
5 | |||
sanitizeRange | |
100.00% |
6 / 6 |
|
100.00% |
1 / 1 |
2 | |||
getSubnet | |
100.00% |
8 / 8 |
|
100.00% |
1 / 1 |
3 | |||
getIPsInRange | |
100.00% |
11 / 11 |
|
100.00% |
1 / 1 |
6 |
1 | <?php |
2 | /** |
3 | * This program is free software; you can redistribute it and/or modify |
4 | * it under the terms of the GNU General Public License as published by |
5 | * the Free Software Foundation; either version 2 of the License, or |
6 | * (at your option) any later version. |
7 | * |
8 | * This program is distributed in the hope that it will be useful, |
9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
11 | * GNU General Public License for more details. |
12 | * |
13 | * You should have received a copy of the GNU General Public License along |
14 | * with this program; if not, write to the Free Software Foundation, Inc., |
15 | * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. |
16 | * http://www.gnu.org/copyleft/gpl.html |
17 | * |
18 | * @file |
19 | * @author Antoine Musso "<hashar at free dot fr>" |
20 | */ |
21 | |
22 | namespace Wikimedia; |
23 | |
24 | use InvalidArgumentException; |
25 | |
26 | /** |
27 | * Play with IP addresses and IP ranges. |
28 | */ |
29 | class IPUtils { |
30 | |
31 | /** |
32 | * An IPv4 address is made of 4 bytes from x00 to xFF which is d0 to d255 |
33 | */ |
34 | public const RE_IP_BYTE = '(25[0-5]|2[0-4][0-9]|1[0-9][0-9]|0?[0-9]?[0-9])'; |
35 | |
36 | private const RE_IP_ADD = self::RE_IP_BYTE . '\.' . self::RE_IP_BYTE . '\.' |
37 | . self::RE_IP_BYTE . '\.' . self::RE_IP_BYTE; |
38 | /** |
39 | * An IPv4 range is an IP address and a prefix (d0 to d32) |
40 | */ |
41 | private const RE_IP_PREFIX = '(3[0-2]|[12][0-9]|[0-9])'; |
42 | |
43 | private const RE_IP_RANGE = '(' . self::RE_IP_ADD . '\/' . self::RE_IP_PREFIX . '|' |
44 | . self::RE_IP_ADD . ' ?\- ?' . self::RE_IP_ADD . ')'; |
45 | |
46 | /** |
47 | * An IPv6 address is made up of 8 words (each x0000 to xFFFF). |
48 | * However, the "::" abbreviation can be used on consecutive x0000 words. |
49 | */ |
50 | private const RE_IPV6_WORD = '([0-9A-Fa-f]{1,4})'; |
51 | /** |
52 | * An IPv6 range is an IP address and a prefix (d0 to d128) |
53 | */ |
54 | private const RE_IPV6_PREFIX = '(12[0-8]|1[01][0-9]|[1-9][0-9]|[0-9])'; |
55 | |
56 | private const RE_IPV6_ADD = |
57 | '(?:' . |
58 | // starts with "::" (including "::") |
59 | ':(?::|(?::' . self::RE_IPV6_WORD . '){1,7})' . |
60 | '|' . |
61 | // ends with "::" (except "::") |
62 | self::RE_IPV6_WORD . '(?::' . self::RE_IPV6_WORD . '){0,6}::' . |
63 | '|' . |
64 | // contains one "::" in the middle (the ^ makes the test fail if none found) |
65 | self::RE_IPV6_WORD . '(?::((?(-1)|:))?' . self::RE_IPV6_WORD . '){1,6}(?(-2)|^)' . |
66 | '|' . |
67 | // contains no "::" |
68 | self::RE_IPV6_WORD . '(?::' . self::RE_IPV6_WORD . '){7}' . |
69 | ')'; |
70 | /** |
71 | * An IPv6 range is an IP address and a prefix (d1 to d128) |
72 | */ |
73 | private const RE_IPV6_RANGE = '(' . self::RE_IPV6_ADD . '\/' . self::RE_IPV6_PREFIX |
74 | . '|' . self::RE_IPV6_ADD . ' ?\- ?' . self::RE_IPV6_ADD . ')'; |
75 | /** |
76 | * For IPv6 canonicalization (NOT for strict validation; these are quite lax!) |
77 | */ |
78 | public const RE_IPV6_GAP = ':(?:0+:)*(?::(?:0+:)*)?'; |
79 | /** @private */ |
80 | private const RE_IPV6_V4_PREFIX = '0*' . self::RE_IPV6_GAP . '(?:ffff:)?'; |
81 | |
82 | /** |
83 | * This might be useful for regexps used elsewhere, matches any IPv4 or IPv6 address or network |
84 | */ |
85 | private const RE_IP_ADDRESS_STRING = |
86 | '(?:' . |
87 | // IPv4 |
88 | self::RE_IP_ADD . '(?:\/' . self::RE_IP_PREFIX . ')?' . |
89 | '|' . |
90 | // IPv6 |
91 | self::RE_IPV6_ADD . '(?:\/' . self::RE_IPV6_PREFIX . ')?' . |
92 | ')'; |
93 | |
94 | /** |
95 | * Maximum number of IP addresses that can be retrieved from a given range. |
96 | */ |
97 | private const MAXIMUM_IPS_FROM_RANGE = 2 ** 16; |
98 | |
99 | /** |
100 | * Determine if a string is as valid IP address or network (CIDR prefix). |
101 | * SIIT IPv4-translated addresses are rejected. |
102 | * @note canonicalize() tries to convert translated addresses to IPv4. |
103 | * |
104 | * @param string $ip Possible IP address |
105 | * @return bool |
106 | */ |
107 | public static function isIPAddress( $ip ) { |
108 | return (bool)preg_match( '/^' . self::RE_IP_ADDRESS_STRING . '$/', $ip ); |
109 | } |
110 | |
111 | /** |
112 | * Given a string, determine if it as valid IP in IPv6 only. |
113 | * @note Unlike isValid(), this looks for networks too. |
114 | * |
115 | * @param string $ip Possible IP address |
116 | * @return bool |
117 | */ |
118 | public static function isIPv6( $ip ) { |
119 | return (bool)preg_match( '/^' . self::RE_IPV6_ADD . '(?:\/' . self::RE_IPV6_PREFIX . ')?$/', $ip ); |
120 | } |
121 | |
122 | /** |
123 | * Given a string, determine if it as valid IP in IPv4 only. |
124 | * @note Unlike isValid(), this looks for networks too. |
125 | * |
126 | * @param string $ip Possible IP address |
127 | * @return bool |
128 | */ |
129 | public static function isIPv4( $ip ) { |
130 | return (bool)preg_match( '/^' . self::RE_IP_ADD . '(?:\/' . self::RE_IP_PREFIX . ')?$/', $ip ); |
131 | } |
132 | |
133 | /** |
134 | * Validate an IP address. Ranges are NOT considered valid. |
135 | * SIIT IPv4-translated addresses are rejected. |
136 | * @note canonicalize() tries to convert translated addresses to IPv4. |
137 | * |
138 | * @param string $ip |
139 | * @return bool True if it is valid |
140 | */ |
141 | public static function isValid( $ip ) { |
142 | // Test IPv4 before IPv6 as it's more common. |
143 | return self::isValidIPv4( $ip ) || self::isValidIPv6( $ip ); |
144 | } |
145 | |
146 | /** |
147 | * Validate an IPv4 address. Ranges are NOT considered valid. |
148 | * |
149 | * @param string $ip |
150 | * @return bool True if it is valid |
151 | */ |
152 | public static function isValidIPv4( $ip ) { |
153 | return (bool)preg_match( '/^' . self::RE_IP_ADD . '$/', $ip ); |
154 | } |
155 | |
156 | /** |
157 | * Validate an IPv6 address. Ranges are NOT considered valid. |
158 | * SIIT IPv4-translated addresses are rejected. |
159 | * @note canonicalize() tries to convert translated addresses to IPv4. |
160 | * |
161 | * @param string $ip |
162 | * @return bool True if it is valid |
163 | */ |
164 | public static function isValidIPv6( $ip ) { |
165 | return (bool)preg_match( '/^' . self::RE_IPV6_ADD . '$/', $ip ); |
166 | } |
167 | |
168 | /** |
169 | * Validate an IPv4 range (valid IPv4 address with a valid CIDR prefix or explicit range). |
170 | * |
171 | * @param string $ipRange |
172 | * @return bool True if input is valid |
173 | */ |
174 | private static function isValidIPv4Range( $ipRange ) { |
175 | return (bool)preg_match( '/^' . self::RE_IP_RANGE . '$/', $ipRange ); |
176 | } |
177 | |
178 | /** |
179 | * Validate an IPv6 range (valid IPv6 address with a valid CIDR prefix or explicit range). |
180 | * |
181 | * @param string $ipRange |
182 | * @return bool True if input is valid |
183 | */ |
184 | private static function isValidIPv6Range( $ipRange ) { |
185 | return (bool)preg_match( '/^' . self::RE_IPV6_RANGE . '$/', $ipRange ); |
186 | } |
187 | |
188 | /** |
189 | * Validate an IP range (valid in either IPv4 OR IPv6; given with valid CIDR prefix or in explicit notation). |
190 | * SIIT IPv4-translated addresses are rejected. |
191 | * @note canonicalize() tries to convert translated addresses to IPv4. |
192 | * |
193 | * @param string $ipRange |
194 | * @return bool True if it is valid |
195 | */ |
196 | public static function isValidRange( $ipRange ) { |
197 | // Test IPv4 before IPv6 as it's more common. |
198 | return self::isValidIPv4Range( $ipRange ) || self::isValidIPv6Range( $ipRange ); |
199 | } |
200 | |
201 | /** |
202 | * Convert an IP into a verbose, uppercase, normalized form. |
203 | * Both IPv4 and IPv6 addresses are trimmed. Additionally, |
204 | * IPv6 addresses in octet notation are expanded to 8 words; |
205 | * IPv4 addresses have leading zeros, in each octet, removed. |
206 | * |
207 | * @param string $ip IP address in quad or octet form (CIDR or not). |
208 | * @return string|null |
209 | */ |
210 | public static function sanitizeIP( $ip ) { |
211 | $ip = trim( $ip ); |
212 | if ( $ip === '' ) { |
213 | return null; |
214 | } |
215 | // If not an IP, just return trimmed value, since sanitizeIP() is called |
216 | // in a number of contexts where usernames are supplied as input. |
217 | if ( !self::isIPAddress( $ip ) ) { |
218 | return $ip; |
219 | } |
220 | if ( self::isIPv4( $ip ) ) { |
221 | // Remove leading 0's from octet representation of IPv4 address |
222 | return preg_replace( '!(?:^|(?<=\.))0+(?=[1-9]|0[./]|0$)!', '', $ip ); |
223 | } |
224 | // Remove any whitespaces, convert to upper case |
225 | $ip = strtoupper( $ip ); |
226 | // Expand zero abbreviations |
227 | $abbrevPos = strpos( $ip, '::' ); |
228 | if ( $abbrevPos !== false ) { |
229 | // We know this is valid IPv6. Find the last index of the |
230 | // address before any CIDR number (e.g. "a:b:c::/24"). |
231 | $CIDRStart = strpos( $ip, "/" ); |
232 | $addressEnd = ( $CIDRStart !== false ) |
233 | ? $CIDRStart - 1 |
234 | : strlen( $ip ) - 1; |
235 | // If the '::' is at the beginning... |
236 | if ( $abbrevPos === 0 ) { |
237 | $repeat = '0:'; |
238 | // for the address '::' |
239 | $extra = $ip === '::' ? '0' : ''; |
240 | // 7+2 (due to '::') |
241 | $pad = 9; |
242 | // If the '::' is at the end... |
243 | } elseif ( $abbrevPos === $addressEnd - 1 ) { |
244 | $repeat = ':0'; |
245 | $extra = ''; |
246 | // 7+2 (due to '::') |
247 | $pad = 9; |
248 | // If the '::' is in the middle... |
249 | } else { |
250 | $repeat = ':0'; |
251 | $extra = ':'; |
252 | // 6+2 (due to '::') |
253 | $pad = 8; |
254 | } |
255 | $ip = str_replace( '::', |
256 | str_repeat( $repeat, $pad - substr_count( $ip, ':' ) ) . $extra, |
257 | $ip |
258 | ); |
259 | } |
260 | // Remove leading zeros from each bloc as needed |
261 | return preg_replace( '/(^|:)0+(' . self::RE_IPV6_WORD . ')/', '$1$2', $ip ); |
262 | } |
263 | |
264 | /** |
265 | * Prettify an IP for display to end users. |
266 | * This will make it more compact and lower-case. |
267 | * |
268 | * @param string $ip |
269 | * @return string|null |
270 | */ |
271 | public static function prettifyIP( $ip ) { |
272 | // normalize (removes '::') |
273 | $ip = self::sanitizeIP( $ip ); |
274 | if ( $ip === null ) { |
275 | return null; |
276 | } |
277 | if ( self::isIPv6( $ip ) ) { |
278 | // Split IP into an address and a CIDR |
279 | if ( strpos( $ip, '/' ) !== false ) { |
280 | [ $ip, $cidr ] = explode( '/', $ip, 2 ); |
281 | } else { |
282 | [ $ip, $cidr ] = [ $ip, '' ]; |
283 | } |
284 | // Get the largest slice of words with multiple zeros |
285 | $offset = 0; |
286 | $longest = $longestPos = false; |
287 | while ( preg_match( |
288 | '!(?:^|:)0(?::0)+(?:$|:)!', $ip, $m, PREG_OFFSET_CAPTURE, $offset |
289 | ) ) { |
290 | // full match |
291 | [ $match, $pos ] = $m[0]; |
292 | if ( strlen( (string)$match ) > strlen( (string)$longest ) ) { |
293 | $longest = $match; |
294 | $longestPos = $pos; |
295 | } |
296 | // advance |
297 | $offset = $pos + strlen( $match ); |
298 | } |
299 | if ( $longest !== false ) { |
300 | // Replace this portion of the string with the '::' abbreviation |
301 | $ip = substr_replace( $ip, '::', $longestPos, strlen( $longest ) ); |
302 | } |
303 | // Add any CIDR back on |
304 | if ( $cidr !== '' ) { |
305 | $ip = "{$ip}/{$cidr}"; |
306 | } |
307 | // Convert to lower case to make it more readable |
308 | $ip = strtolower( $ip ); |
309 | } |
310 | |
311 | return $ip; |
312 | } |
313 | |
314 | /** |
315 | * Given a host/port string, like one might find in the host part of a URL |
316 | * per RFC 2732, split the hostname part and the port part and return an |
317 | * array with an element for each. If there is no port part, the array will |
318 | * have false in place of the port. If the string was invalid in some way, |
319 | * false is returned. |
320 | * |
321 | * This was easy with IPv4 and was generally done in an ad-hoc way, but |
322 | * with IPv6 it's somewhat more complicated due to the need to parse the |
323 | * square brackets and colons. |
324 | * |
325 | * A bare IPv6 address is accepted despite the lack of square brackets. |
326 | * |
327 | * @param string $both The string with the host (or IPv4/IPv6 address) and port |
328 | * @return array|false Array normally, false on certain failures |
329 | */ |
330 | public static function splitHostAndPort( $both ) { |
331 | if ( substr( $both, 0, 1 ) === '[' ) { |
332 | if ( preg_match( '/^\[(' . self::RE_IPV6_ADD . ')\](?::(?P<port>\d+))?$/', $both, $m ) ) { |
333 | if ( isset( $m['port'] ) ) { |
334 | return [ $m[1], intval( $m['port'] ) ]; |
335 | } |
336 | |
337 | return [ $m[1], false ]; |
338 | } |
339 | |
340 | // Square bracket found but no IPv6 |
341 | return false; |
342 | } |
343 | $numColons = substr_count( $both, ':' ); |
344 | if ( $numColons >= 2 ) { |
345 | // Is it a bare IPv6 address? |
346 | if ( preg_match( '/^' . self::RE_IPV6_ADD . '$/', $both ) ) { |
347 | return [ $both, false ]; |
348 | } |
349 | |
350 | // Not valid IPv6, but too many colons for anything else |
351 | return false; |
352 | } |
353 | if ( $numColons >= 1 ) { |
354 | // Host:port? |
355 | $bits = explode( ':', $both ); |
356 | if ( preg_match( '/^\d+/', $bits[1] ) ) { |
357 | return [ $bits[0], intval( $bits[1] ) ]; |
358 | } |
359 | |
360 | // Not a valid port |
361 | return false; |
362 | } |
363 | |
364 | // Plain hostname |
365 | return [ $both, false ]; |
366 | } |
367 | |
368 | /** |
369 | * Given a host name and a port, combine them into host/port string like |
370 | * you might find in a URL. If the host contains a colon, wrap it in square |
371 | * brackets like in RFC 2732. If the port matches the default port, omit |
372 | * the port specification |
373 | * |
374 | * @param string $host |
375 | * @param int $port |
376 | * @param bool|int $defaultPort |
377 | * @return string |
378 | */ |
379 | public static function combineHostAndPort( $host, $port, $defaultPort = false ) { |
380 | if ( strpos( $host, ':' ) !== false ) { |
381 | $host = "[$host]"; |
382 | } |
383 | if ( $defaultPort !== false && $port === $defaultPort ) { |
384 | return $host; |
385 | } |
386 | |
387 | return "$host:$port"; |
388 | } |
389 | |
390 | /** |
391 | * Convert an IPv4 or IPv6 hexadecimal representation back to readable format |
392 | * |
393 | * @param string $hex Number, with "v6-" prefix if it is IPv6 |
394 | * @return string Quad-dotted (IPv4) or octet notation (IPv6) |
395 | */ |
396 | public static function formatHex( $hex ) { |
397 | if ( substr( $hex, 0, 3 ) === 'v6-' ) { |
398 | // IPv6 |
399 | return self::hexToOctet( substr( $hex, 3 ) ); |
400 | } |
401 | |
402 | // IPv4 |
403 | return self::hexToQuad( $hex ); |
404 | } |
405 | |
406 | /** |
407 | * Converts a hexadecimal number to an IPv6 address in octet notation |
408 | * |
409 | * @param string $ip_hex Pure hex (no v6- prefix) |
410 | * @return string (of format a:b:c:d:e:f:g:h) |
411 | */ |
412 | public static function hexToOctet( $ip_hex ) { |
413 | // Pad hex to 32 chars (128 bits) |
414 | $ip_hex = str_pad( strtoupper( $ip_hex ), 32, '0', STR_PAD_LEFT ); |
415 | // Separate into 8 words |
416 | $ip_oct = substr( $ip_hex, 0, 4 ); |
417 | for ( $n = 1; $n < 8; $n++ ) { |
418 | $ip_oct .= ':' . substr( $ip_hex, 4 * $n, 4 ); |
419 | } |
420 | // NO leading zeroes |
421 | return preg_replace( '/(^|:)0+(' . self::RE_IPV6_WORD . ')/', '$1$2', $ip_oct ); |
422 | } |
423 | |
424 | /** |
425 | * Converts a hexadecimal number to an IPv4 address in quad-dotted notation |
426 | * |
427 | * @param string $ip_hex Pure hex |
428 | * @return string (of format a.b.c.d) |
429 | */ |
430 | public static function hexToQuad( $ip_hex ) { |
431 | // Pad hex to 8 chars (32 bits) |
432 | $ip_hex = str_pad( strtoupper( $ip_hex ), 8, '0', STR_PAD_LEFT ); |
433 | // Separate into four quads |
434 | $s = ''; |
435 | for ( $i = 0; $i < 4; $i++ ) { |
436 | if ( $s !== '' ) { |
437 | $s .= '.'; |
438 | } |
439 | $s .= base_convert( substr( $ip_hex, $i * 2, 2 ), 16, 10 ); |
440 | } |
441 | |
442 | return $s; |
443 | } |
444 | |
445 | /** |
446 | * Determine if an IP address really is an IP address, and if it is public, |
447 | * i.e. not RFC 1918 or similar |
448 | * |
449 | * @param string $ip |
450 | * @return bool |
451 | */ |
452 | public static function isPublic( $ip ) { |
453 | static $privateSet = null; |
454 | if ( !$privateSet ) { |
455 | $privateSet = new IPSet( [ |
456 | // RFC 1918 (private) |
457 | '10.0.0.0/8', |
458 | // RFC 1918 (private) |
459 | '172.16.0.0/12', |
460 | // RFC 1918 (private) |
461 | '192.168.0.0/16', |
462 | // this network |
463 | '0.0.0.0/8', |
464 | // loopback |
465 | '127.0.0.0/8', |
466 | // RFC 4193 (local) |
467 | 'fc00::/7', |
468 | // loopback |
469 | '0:0:0:0:0:0:0:1', |
470 | // link-local |
471 | '169.254.0.0/16', |
472 | // link-local |
473 | 'fe80::/10', |
474 | ] ); |
475 | } |
476 | return !$privateSet->match( $ip ); |
477 | } |
478 | |
479 | /** |
480 | * Return a zero-padded upper case hexadecimal representation of an IP address. |
481 | * |
482 | * Hexadecimal addresses are used because they can easily be extended to |
483 | * IPv6 support. To separate the ranges, the return value from this |
484 | * function for an IPv6 address will be prefixed with "v6-", a non- |
485 | * hexadecimal string which sorts after the IPv4 addresses. |
486 | * |
487 | * @param string $ip Quad dotted/octet IP address. |
488 | * @return string|bool False on failure |
489 | */ |
490 | public static function toHex( $ip ) { |
491 | if ( self::isIPv6( $ip ) ) { |
492 | $n = 'v6-' . self::IPv6ToRawHex( $ip ); |
493 | } elseif ( self::isIPv4( $ip ) ) { |
494 | // T62035/T97897: An IP with leading 0's fails in ip2long sometimes (e.g. *.08), |
495 | // also double/triple 0 needs to be changed to just a single 0 for ip2long. |
496 | $ip = self::sanitizeIP( $ip ); |
497 | $n = ip2long( $ip ); |
498 | if ( $n < 0 ) { |
499 | // We don't run code coverage on a 32-bit OS or Windows, so this will never be exercised |
500 | // @codeCoverageIgnoreStart |
501 | $n += 2 ** 32; |
502 | // On 32-bit platforms (and on Windows), 2^32 does not fit into an int, |
503 | // so $n becomes a float. We convert it to string instead. |
504 | if ( is_float( $n ) ) { |
505 | $n = (string)$n; |
506 | } |
507 | // @codeCoverageIgnoreEnd |
508 | } |
509 | if ( $n !== false ) { |
510 | // Floating points can handle the conversion; faster than \Wikimedia\base_convert() |
511 | $n = strtoupper( str_pad( base_convert( $n, 10, 16 ), 8, '0', STR_PAD_LEFT ) ); |
512 | } |
513 | } else { |
514 | $n = false; |
515 | } |
516 | |
517 | return $n; |
518 | } |
519 | |
520 | /** |
521 | * Given an IPv6 address in octet notation, returns a pure hex string. |
522 | * |
523 | * @param string $ip Octet ipv6 IP address. |
524 | * @return string|bool Pure hex (uppercase); false on failure |
525 | */ |
526 | private static function IPv6ToRawHex( $ip ) { |
527 | $ip = self::sanitizeIP( $ip ); |
528 | if ( !$ip ) { |
529 | return false; |
530 | } |
531 | $r_ip = ''; |
532 | foreach ( explode( ':', $ip ) as $v ) { |
533 | $r_ip .= str_pad( $v, 4, '0', STR_PAD_LEFT ); |
534 | } |
535 | |
536 | return $r_ip; |
537 | } |
538 | |
539 | /** |
540 | * Convert a network specification in CIDR notation |
541 | * to an integer network and a number of bits |
542 | * |
543 | * @param string $range IP with CIDR prefix |
544 | * @return array [int|string, int] |
545 | */ |
546 | public static function parseCIDR( $range ) { |
547 | if ( self::isIPv6( $range ) ) { |
548 | return self::parseCIDR6( $range ); |
549 | } |
550 | $parts = explode( '/', $range, 2 ); |
551 | if ( count( $parts ) !== 2 ) { |
552 | return [ false, false ]; |
553 | } |
554 | [ $network, $bits ] = $parts; |
555 | $network = ip2long( $network ); |
556 | if ( $network !== false && is_numeric( $bits ) && $bits >= 0 && $bits <= 32 ) { |
557 | '@phan-var int $bits'; |
558 | if ( $bits === 0 ) { |
559 | $network = 0; |
560 | } else { |
561 | $network &= ~( ( 1 << ( 32 - (int)$bits ) ) - 1 ); |
562 | } |
563 | // Convert to unsigned |
564 | if ( $network < 0 ) { |
565 | $network += 2 ** 32; |
566 | } |
567 | } else { |
568 | $network = false; |
569 | $bits = false; |
570 | } |
571 | |
572 | return [ $network, $bits ]; |
573 | } |
574 | |
575 | /** |
576 | * Given a string range in a number of formats, |
577 | * return the start and end of the range in hexadecimal. |
578 | * |
579 | * Formats are: |
580 | * 1.2.3.4/24 CIDR |
581 | * 1.2.3.4 - 1.2.3.5 Explicit range |
582 | * 1.2.3.4 Single IP |
583 | * |
584 | * 2001:0db8:85a3::7344/96 CIDR |
585 | * 2001:0db8:85a3::7344 - 2001:0db8:85a3::7344 Explicit range |
586 | * 2001:0db8:85a3::7344 Single IP |
587 | * @param string $range IP range |
588 | * @return array{string,string}|array{false,false} If the start or end of the range |
589 | * is invalid, then array `[false, false]` is returned |
590 | */ |
591 | public static function parseRange( $range ) { |
592 | // CIDR notation |
593 | if ( strpos( $range, '/' ) !== false ) { |
594 | if ( self::isIPv6( $range ) ) { |
595 | return self::parseRange6( $range ); |
596 | } |
597 | [ $network, $bits ] = self::parseCIDR( $range ); |
598 | if ( $network === false ) { |
599 | $start = $end = false; |
600 | } else { |
601 | $start = sprintf( '%08X', $network ); |
602 | $end = sprintf( '%08X', $network + 2 ** ( 32 - $bits ) - 1 ); |
603 | } |
604 | // Explicit range |
605 | } elseif ( strpos( $range, '-' ) !== false ) { |
606 | [ $start, $end ] = array_map( 'trim', explode( '-', $range, 2 ) ); |
607 | if ( self::isIPv6( $start ) && self::isIPv6( $end ) ) { |
608 | return self::parseRange6( $range ); |
609 | } |
610 | if ( self::isIPv4( $start ) && self::isIPv4( $end ) ) { |
611 | $start = self::toHex( $start ); |
612 | $end = self::toHex( $end ); |
613 | if ( $start > $end ) { |
614 | $start = $end = false; |
615 | } |
616 | } else { |
617 | $start = $end = false; |
618 | } |
619 | } else { |
620 | // Single IP |
621 | $start = $end = self::toHex( $range ); |
622 | } |
623 | if ( $start === false || $end === false ) { |
624 | return [ false, false ]; |
625 | } |
626 | |
627 | return [ $start, $end ]; |
628 | } |
629 | |
630 | /** |
631 | * Convert a network specification in IPv6 CIDR notation to an |
632 | * integer network and a number of bits |
633 | * |
634 | * @param string $range |
635 | * |
636 | * @return array{string,int}|array{false,false} |
637 | */ |
638 | private static function parseCIDR6( $range ) { |
639 | // Explode into <expanded IP,range> |
640 | $parts = explode( '/', self::sanitizeIP( $range ), 2 ); |
641 | if ( count( $parts ) !== 2 ) { |
642 | return [ false, false ]; |
643 | } |
644 | [ $network, $bits ] = $parts; |
645 | $network = self::IPv6ToRawHex( $network ); |
646 | if ( $network !== false && is_numeric( $bits ) && $bits >= 0 && $bits <= 128 ) { |
647 | '@phan-var int $bits'; |
648 | if ( $bits === 0 ) { |
649 | $network = "0"; |
650 | } else { |
651 | // Native 32 bit functions WONT work here!!! |
652 | // Convert to a padded binary number |
653 | $network = \Wikimedia\base_convert( $network, 16, 2, 128 ); |
654 | // Truncate the last (128-$bits) bits and replace them with zeros |
655 | $network = str_pad( substr( $network, 0, (int)$bits ), 128, '0', STR_PAD_RIGHT ); |
656 | // Convert back to an integer |
657 | $network = \Wikimedia\base_convert( $network, 2, 10 ); |
658 | } |
659 | } else { |
660 | $network = false; |
661 | $bits = false; |
662 | } |
663 | |
664 | return [ $network, (int)$bits ]; |
665 | } |
666 | |
667 | /** |
668 | * Given a string range in a number of formats, return the |
669 | * start and end of the range in hexadecimal. For IPv6. |
670 | * |
671 | * Formats are: |
672 | * 2001:0db8:85a3::7344/96 CIDR |
673 | * 2001:0db8:85a3::7344 - 2001:0db8:85a3::7344 Explicit range |
674 | * 2001:0db8:85a3::7344/96 Single IP |
675 | * |
676 | * @param string $range |
677 | * |
678 | * @return array [string, string]|array [false, false] If the start or end of the range |
679 | * is invalid, then array [false, false] is returned |
680 | */ |
681 | private static function parseRange6( $range ) { |
682 | // Expand any IPv6 IP |
683 | $range = self::sanitizeIP( $range ); |
684 | |
685 | $start = false; |
686 | $end = false; |
687 | |
688 | // CIDR notation... |
689 | if ( strpos( $range, '/' ) !== false ) { |
690 | [ $network, $bits ] = self::parseCIDR6( $range ); |
691 | if ( $network !== false ) { |
692 | $start = \Wikimedia\base_convert( $network, 10, 16, 32, false ); |
693 | // Turn network to binary (again) |
694 | $end = \Wikimedia\base_convert( $network, 10, 2, 128 ); |
695 | // Truncate the last (128-$bits) bits and replace them with ones |
696 | $end = str_pad( substr( $end, 0, $bits ), 128, '1', STR_PAD_RIGHT ); |
697 | // Convert to hex |
698 | $end = \Wikimedia\base_convert( $end, 2, 16, 32, false ); |
699 | // see toHex() comment |
700 | $start = "v6-$start"; |
701 | $end = "v6-$end"; |
702 | } |
703 | // Explicit range notation... |
704 | } elseif ( strpos( $range, '-' ) !== false ) { |
705 | [ $start, $end ] = array_map( 'trim', explode( '-', $range, 2 ) ); |
706 | $start = self::toHex( $start ); |
707 | $end = self::toHex( $end ); |
708 | if ( $start > $end ) { |
709 | $start = $end = false; |
710 | } |
711 | } |
712 | |
713 | if ( $start === false || $end === false ) { |
714 | return [ false, false ]; |
715 | } |
716 | |
717 | return [ $start, $end ]; |
718 | } |
719 | |
720 | /** |
721 | * Determine if a given IPv4/IPv6 address is in a given CIDR network |
722 | * |
723 | * @param string $addr The address to check against the given range. |
724 | * @param string $range The range to check the given address against. |
725 | * @return bool Whether or not the given address is in the given range. |
726 | * |
727 | * @note This can return unexpected results for invalid arguments! |
728 | * Make sure you pass a valid IP address and IP range. |
729 | */ |
730 | public static function isInRange( $addr, $range ) { |
731 | $hexIP = self::toHex( $addr ); |
732 | [ $start, $end ] = self::parseRange( $range ); |
733 | |
734 | return strcmp( $hexIP, $start ) >= 0 && |
735 | strcmp( $hexIP, $end ) <= 0; |
736 | } |
737 | |
738 | /** |
739 | * Determines if an IP address is a list of CIDR a.b.c.d/n ranges. |
740 | * |
741 | * @param string $ip the IP to check |
742 | * @param array $ranges the IP ranges, each element a range |
743 | * |
744 | * @return bool true if the specified adress belongs to the specified range; otherwise, false. |
745 | */ |
746 | public static function isInRanges( $ip, $ranges ) { |
747 | foreach ( $ranges as $range ) { |
748 | if ( self::isInRange( $ip, $range ) ) { |
749 | return true; |
750 | } |
751 | } |
752 | return false; |
753 | } |
754 | |
755 | /** |
756 | * Convert some unusual representations of IPv4 addresses to their |
757 | * canonical dotted quad representation. |
758 | * |
759 | * This currently only checks a few IPV4-to-IPv6 related cases. More |
760 | * unusual representations may be added later. |
761 | * |
762 | * @param string $addr Something that might be an IP address |
763 | * @return string|null Valid IP address or null |
764 | * @return-taint none |
765 | */ |
766 | public static function canonicalize( $addr ) { |
767 | // remove zone info (T37738) |
768 | $addr = preg_replace( '/\%.*/', '', $addr ); |
769 | |
770 | // If it's already a valid IPv4 address, nothing to do |
771 | if ( self::isValidIPv4( $addr ) ) { |
772 | return $addr; |
773 | } |
774 | |
775 | // https://en.wikipedia.org/wiki/IPv6#IPv4-mapped_IPv6_addresses |
776 | // Turn mapped addresses from: |
777 | // ::ce:ffff:1.2.3.4 to 1.2.3.4 (IPv4-mapped IPv6 addresses) |
778 | // ::1.2.3.4 to 1.2.3.4 (IPv4-compatible IPv6 address) |
779 | // IPv4-compatible IPv6 addresses are now deprecated https://tools.ietf.org/html/rfc4291#section-2.5.5.1 |
780 | if ( preg_match( '/^' . self::RE_IPV6_V4_PREFIX . '(' . self::RE_IP_ADD . ')$/i', $addr, $m ) ) { |
781 | return $m[1]; |
782 | } |
783 | |
784 | // Converts :ffff:1F to 255.255.0.31 |
785 | // Is this actually used/needed? |
786 | if ( preg_match( '/^' . self::RE_IPV6_V4_PREFIX . self::RE_IPV6_WORD . |
787 | ':' . self::RE_IPV6_WORD . '$/i', $addr, $m ) |
788 | ) { |
789 | return long2ip( ( hexdec( $m[1] ) << 16 ) + hexdec( $m[2] ) ); |
790 | } |
791 | |
792 | // It's a valid IPv6 address that we haven't canonicalized, so return it |
793 | if ( self::isValidIPv6( $addr ) ) { |
794 | return $addr; |
795 | } |
796 | |
797 | // Not a valid IP address |
798 | return null; |
799 | } |
800 | |
801 | /** |
802 | * Gets rid of unneeded numbers in quad-dotted/octet IP strings |
803 | * For example, 127.111.113.151/24 -> 127.111.113.0/24 |
804 | * @param string $range IP address to normalize |
805 | * @return string |
806 | */ |
807 | public static function sanitizeRange( $range ) { |
808 | [ , $bits ] = self::parseCIDR( $range ); |
809 | [ $start, ] = self::parseRange( $range ); |
810 | $start = self::formatHex( $start ); |
811 | if ( $bits === false ) { |
812 | // wasn't actually a range |
813 | return $start; |
814 | } |
815 | |
816 | return "$start/$bits"; |
817 | } |
818 | |
819 | /** |
820 | * Returns the subnet of a given IP |
821 | * |
822 | * @param string $ip |
823 | * @return string|false |
824 | */ |
825 | public static function getSubnet( $ip ) { |
826 | $matches = []; |
827 | $subnet = false; |
828 | if ( self::isIPv6( $ip ) ) { |
829 | $parts = self::parseRange( "$ip/64" ); |
830 | $subnet = $parts[0]; |
831 | } elseif ( preg_match( '/^' . self::RE_IP_ADD . '$/', $ip, $matches ) ) { |
832 | // IPv4 |
833 | $subnet = "{$matches[1]}.{$matches[2]}.{$matches[3]}"; |
834 | } |
835 | return $subnet; |
836 | } |
837 | |
838 | /** |
839 | * Return all the addresses in a given range |
840 | * |
841 | * This currently does not support IPv6 ranges and is limited to /16 block (65535 addresses). |
842 | * |
843 | * @param string $range IP ranges to get the IPs within |
844 | * @return string[] Array of addresses in the range |
845 | * @throws InvalidArgumentException If input uses IPv6 |
846 | * @throws InvalidArgumentException If input range is too large |
847 | */ |
848 | public static function getIPsInRange( $range ): array { |
849 | // No IPv6 for now. |
850 | if ( self::isValidIPv6( $range ) || self::isValidIPv6Range( $range ) ) { |
851 | throw new InvalidArgumentException( 'Cannot retrieve addresses for IPv6 range: ' . $range ); |
852 | } |
853 | |
854 | [ $start, $end ] = self::parseRange( $range ); |
855 | if ( $start === false || $start === $end ) { |
856 | throw new InvalidArgumentException( 'Invalid range given: ' . $range ); |
857 | } |
858 | |
859 | if ( hexdec( $end ) - hexdec( $start ) > self::MAXIMUM_IPS_FROM_RANGE ) { |
860 | throw new InvalidArgumentException( "Range {$range} is too large, it contains more than " |
861 | . self::MAXIMUM_IPS_FROM_RANGE . ' addresses' ); |
862 | } |
863 | |
864 | $start = ip2long( self::formatHex( $start ) ); |
865 | $end = ip2long( self::formatHex( $end ) ); |
866 | |
867 | return array_map( 'long2ip', range( $start, $end ) ); |
868 | } |
869 | } |