Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
100.00% |
77 / 77 |
|
100.00% |
5 / 5 |
CRAP | |
100.00% |
1 / 1 |
IPSet | |
100.00% |
77 / 77 |
|
100.00% |
5 / 5 |
29 | |
100.00% |
1 / 1 |
__construct | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
2 | |||
addCidr | |
100.00% |
48 / 48 |
|
100.00% |
1 / 1 |
18 | |||
match | |
100.00% |
18 / 18 |
|
100.00% |
1 / 1 |
7 | |||
newFromJson | |
100.00% |
5 / 5 |
|
100.00% |
1 / 1 |
1 | |||
jsonSerialize | |
100.00% |
4 / 4 |
|
100.00% |
1 / 1 |
1 |
1 | <?php |
2 | /** |
3 | * Copyright 2014, 2015 Brandon Black <blblack@gmail.com> |
4 | * |
5 | * This program is free software; you can redistribute it and/or modify |
6 | * it under the terms of the GNU General Public License as published by |
7 | * the Free Software Foundation; either version 2 of the License, or |
8 | * (at your option) any later version. |
9 | * |
10 | * This program is distributed in the hope that it will be useful, |
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
13 | * GNU General Public License for more details. |
14 | * |
15 | * You should have received a copy of the GNU General Public License along |
16 | * with this program; if not, write to the Free Software Foundation, Inc., |
17 | * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. |
18 | * http://www.gnu.org/copyleft/gpl.html |
19 | * |
20 | * @file |
21 | * @author Brandon Black <blblack@gmail.com> |
22 | */ |
23 | namespace Wikimedia; |
24 | |
25 | use JsonSerializable; |
26 | |
27 | /** |
28 | * Matches IP addresses against a set of CIDR specifications |
29 | * |
30 | * Usage: |
31 | * |
32 | * use Wikimedia\IPSet; |
33 | * // At startup, calculate the optimized data structure for the set: |
34 | * $ipset = new IPSet( [ |
35 | * '208.80.154.0/26', |
36 | * '2620:0:861:1::/64', |
37 | * '10.64.0.0/22', |
38 | * ] ); |
39 | * |
40 | * // Runtime check against cached set (returns bool): |
41 | * $allowme = $ipset->match( $ip ); |
42 | * |
43 | * In rough benchmarking, this takes about 80% more time than |
44 | * in_array() checks on a short (a couple hundred at most) array |
45 | * of addresses. It's fast either way at those levels, though, |
46 | * and IPSet would scale better than in_array if the array were |
47 | * much larger. |
48 | * |
49 | * For mixed-family CIDR sets, however, this code gives well over |
50 | * 100x speedup vs iterating Wikimedia\IPUtils::isInRange() over an array |
51 | * of CIDR specs. |
52 | * |
53 | * The basic implementation is two separate binary trees |
54 | * (IPv4 and IPv6) as nested php arrays with keys named 0 and 1. |
55 | * The values false and true are terminal match-fail and match-success, |
56 | * otherwise the value is a deeper node in the tree. |
57 | * |
58 | * A simple depth-compression scheme is also implemented: whole-byte |
59 | * tree compression at whole-byte boundaries only, where no branching |
60 | * occurs during that whole byte of depth. A compressed node has |
61 | * keys 'comp' (the byte to compare) and 'next' (the next node to |
62 | * recurse into if 'comp' matched successfully). |
63 | * |
64 | * For example, given these inputs: |
65 | * |
66 | * 25.0.0.0/9 |
67 | * 25.192.0.0/10 |
68 | * |
69 | * The v4 tree would look like: |
70 | * |
71 | * root4 => [ |
72 | * 'comp' => 25, |
73 | * 'next' => [ |
74 | * 0 => true, |
75 | * 1 => [ |
76 | * 0 => false, |
77 | * 1 => true, |
78 | * ], |
79 | * ], |
80 | * ]; |
81 | * |
82 | * (multi-byte compression nodes were attempted as well, but were |
83 | * a net loss in my test scenarios due to additional match complexity) |
84 | */ |
85 | class IPSet implements JsonSerializable { |
86 | /** @var array|bool The root of the IPv4 matching tree */ |
87 | private $root4 = false; |
88 | |
89 | /** @var array|bool The root of the IPv6 matching tree */ |
90 | private $root6 = false; |
91 | |
92 | /** |
93 | * Instantiate the object from an array of CIDR specs |
94 | * |
95 | * Invalid input network/mask values in $cfg will result in issuing |
96 | * E_WARNING and/or E_USER_WARNING and the bad values being ignored. |
97 | * |
98 | * @param array $cfg Array of IPv[46] CIDR specs as strings |
99 | */ |
100 | public function __construct( array $cfg ) { |
101 | foreach ( $cfg as $cidr ) { |
102 | $this->addCidr( $cidr ); |
103 | } |
104 | } |
105 | |
106 | /** |
107 | * Add a single CIDR spec to the internal matching trees |
108 | * |
109 | * @param string $cidr String CIDR spec, IPv[46], optional /mask (def all-1's) |
110 | * @return bool Returns true on success, false on failure |
111 | */ |
112 | private function addCidr( $cidr ): bool { |
113 | // v4 or v6 check |
114 | if ( strpos( $cidr, ':' ) === false ) { |
115 | $node =& $this->root4; |
116 | $defMask = '32'; |
117 | } else { |
118 | $node =& $this->root6; |
119 | $defMask = '128'; |
120 | } |
121 | |
122 | // Default to all-1's mask if no netmask in the input |
123 | if ( strpos( $cidr, '/' ) === false ) { |
124 | $net = $cidr; |
125 | $mask = $defMask; |
126 | } else { |
127 | [ $net, $mask ] = explode( '/', $cidr, 2 ); |
128 | if ( (int)$mask > $defMask || !ctype_digit( $mask ) ) { |
129 | trigger_error( "IPSet: Bad mask '$mask' from '$cidr', ignored", E_USER_WARNING ); |
130 | return false; |
131 | } |
132 | } |
133 | // explicit integer convert, checked above |
134 | $mask = (int)$mask; |
135 | |
136 | // convert $net to an array of integer bytes, length 4 or 16 |
137 | // phpcs:ignore Generic.PHP.NoSilencedErrors.Discouraged |
138 | $raw = @inet_pton( $net ); |
139 | if ( $raw === false ) { |
140 | return false; |
141 | } |
142 | $rawOrd = array_map( 'ord', str_split( $raw ) ); |
143 | |
144 | // iterate the bits of the address while walking the tree structure for inserts |
145 | // at the end, $snode will point to the highest node that could only lead to a |
146 | // successful match (and thus can be set to true) |
147 | $snode =& $node; |
148 | $curBit = 0; |
149 | while ( 1 ) { |
150 | if ( $node === true ) { |
151 | // already added a larger supernet, no need to go deeper |
152 | return true; |
153 | } |
154 | |
155 | if ( $curBit === $mask ) { |
156 | // this may wipe out deeper subnets from earlier |
157 | $snode = true; |
158 | return true; |
159 | } |
160 | |
161 | if ( $node === false ) { |
162 | // create new subarray to go deeper |
163 | if ( !( $curBit & 7 ) && $curBit <= $mask - 8 ) { |
164 | $node = [ 'comp' => $rawOrd[$curBit >> 3], 'next' => false ]; |
165 | } else { |
166 | $node = [ false, false ]; |
167 | } |
168 | } |
169 | |
170 | if ( isset( $node['comp'] ) ) { |
171 | $comp = $node['comp']; |
172 | if ( $rawOrd[$curBit >> 3] === $comp && $curBit <= $mask - 8 ) { |
173 | // whole byte matches, skip over the compressed node |
174 | $node =& $node['next']; |
175 | $snode =& $node; |
176 | $curBit += 8; |
177 | continue; |
178 | } |
179 | |
180 | // have to decompress the node and check individual bits |
181 | $unode = $node['next']; |
182 | for ( $i = 0; $i < 8; ++$i ) { |
183 | $unode = ( $comp & ( 1 << $i ) ) |
184 | ? [ false, $unode ] |
185 | : [ $unode, false ]; |
186 | } |
187 | $node = $unode; |
188 | } |
189 | |
190 | $maskShift = 7 - ( $curBit & 7 ); |
191 | $index = ( $rawOrd[$curBit >> 3] & ( 1 << $maskShift ) ) >> $maskShift; |
192 | if ( $node[$index ^ 1] !== true ) { |
193 | // no adjacent subnet, can't form a supernet at this level |
194 | $snode =& $node[$index]; |
195 | } |
196 | $node =& $node[$index]; |
197 | ++$curBit; |
198 | } |
199 | } |
200 | |
201 | /** |
202 | * Match an IP address against the set |
203 | * |
204 | * If $ip is unparseable, inet_pton may issue an E_WARNING to that effect |
205 | * |
206 | * @param string $ip string IPv[46] address |
207 | * @return bool True is match success, false is match failure |
208 | */ |
209 | public function match( $ip ): bool { |
210 | // phpcs:ignore Generic.PHP.NoSilencedErrors.Discouraged |
211 | $raw = @inet_pton( $ip ); |
212 | if ( $raw === false ) { |
213 | return false; |
214 | } |
215 | |
216 | $rawOrd = array_map( 'ord', str_split( $raw ) ); |
217 | if ( count( $rawOrd ) === 4 ) { |
218 | $node =& $this->root4; |
219 | } else { |
220 | $node =& $this->root6; |
221 | } |
222 | |
223 | $curBit = 0; |
224 | while ( $node !== true && $node !== false ) { |
225 | if ( isset( $node['comp'] ) ) { |
226 | // compressed node, matches 1 whole byte on a byte boundary |
227 | if ( $rawOrd[$curBit >> 3] !== $node['comp'] ) { |
228 | return false; |
229 | } |
230 | $curBit += 8; |
231 | $node =& $node['next']; |
232 | } else { |
233 | // uncompressed node, walk in the correct direction for the current bit-value |
234 | $maskShift = 7 - ( $curBit & 7 ); |
235 | $node =& $node[( $rawOrd[$curBit >> 3] & ( 1 << $maskShift ) ) >> $maskShift]; |
236 | ++$curBit; |
237 | } |
238 | } |
239 | |
240 | return $node; |
241 | } |
242 | |
243 | /** |
244 | * @param string $json |
245 | * |
246 | * @return IPSet |
247 | */ |
248 | public static function newFromJson( string $json ): IPSet { |
249 | $ipset = new IPSet( [] ); |
250 | $decoded = json_decode( $json, true ); |
251 | $ipset->root4 = $decoded['ipv4'] ?? false; |
252 | $ipset->root6 = $decoded['ipv6'] ?? false; |
253 | |
254 | return $ipset; |
255 | } |
256 | |
257 | public function jsonSerialize(): array { |
258 | return [ |
259 | 'ipv4' => $this->root4, |
260 | 'ipv6' => $this->root6, |
261 | ]; |
262 | } |
263 | } |