Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
87.33% |
131 / 150 |
|
44.44% |
8 / 18 |
CRAP | |
0.00% |
0 / 1 |
HashRing | |
87.33% |
131 / 150 |
|
44.44% |
8 / 18 |
66.07 | |
0.00% |
0 / 1 |
__construct | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
init | |
72.73% |
8 / 11 |
|
0.00% |
0 / 1 |
4.32 | |||
getLocation | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
getLocations | |
90.00% |
18 / 20 |
|
0.00% |
0 / 1 |
9.08 | |||
findNodeIndexForPosition | |
86.96% |
20 / 23 |
|
0.00% |
0 / 1 |
9.18 | |||
getLocationWeights | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
ejectFromLiveRing | |
83.33% |
5 / 6 |
|
0.00% |
0 / 1 |
2.02 | |||
getLiveLocation | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
getLiveLocations | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
getLiveLocationWeights | |
100.00% |
10 / 10 |
|
100.00% |
1 / 1 |
1 | |||
buildLocationRing | |
92.59% |
25 / 27 |
|
0.00% |
0 / 1 |
8.03 | |||
getItemPosition | |
71.43% |
5 / 7 |
|
0.00% |
0 / 1 |
3.21 | |||
getNodePositionQuartet | |
85.71% |
6 / 7 |
|
0.00% |
0 / 1 |
3.03 | |||
getNextClockwiseNodeIndex | |
75.00% |
3 / 4 |
|
0.00% |
0 / 1 |
3.14 | |||
getLiveRing | |
85.71% |
18 / 21 |
|
0.00% |
0 / 1 |
8.19 | |||
getCurrentTime | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
__serialize | |
100.00% |
5 / 5 |
|
100.00% |
1 / 1 |
1 | |||
__unserialize | |
66.67% |
2 / 3 |
|
0.00% |
0 / 1 |
2.15 |
1 | <?php |
2 | /** |
3 | * Convenience class for weighted consistent hash rings. |
4 | * |
5 | * This program is free software; you can redistribute it and/or modify |
6 | * it under the terms of the GNU General Public License as published by |
7 | * the Free Software Foundation; either version 2 of the License, or |
8 | * (at your option) any later version. |
9 | * |
10 | * This program is distributed in the hope that it will be useful, |
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
13 | * GNU General Public License for more details. |
14 | * |
15 | * You should have received a copy of the GNU General Public License along |
16 | * with this program; if not, write to the Free Software Foundation, Inc., |
17 | * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. |
18 | * http://www.gnu.org/copyleft/gpl.html |
19 | * |
20 | * @file |
21 | */ |
22 | |
23 | /** |
24 | * Convenience class for weighted consistent hash rings |
25 | * |
26 | * This deterministically maps "keys" to a set of "locations" while avoiding clumping |
27 | * |
28 | * Each location is represented by a number of nodes on a ring proportionate to the ratio |
29 | * of its weight compared to the total location weight. Note positions are deterministically |
30 | * derived from the hash of the location name. Nodes are responsible for the portion of the |
31 | * ring, counter-clockwise, up until the next node. Locations are responsible for all portions |
32 | * of the ring that the location's nodes are responsible for. |
33 | * |
34 | * A location that is temporarily "ejected" is said to be absent from the "live" ring. |
35 | * If no location ejections are active, then the base ring and live ring are identical. |
36 | * |
37 | * This class is designed in a way that using the "md5" algorithm will make it compatible |
38 | * with libketama, e.g. OPT_LIBKETAMA_COMPATIBLE from the PECL memcached extension or "ketama" |
39 | * from twemproxy. This can simplify the process of switching client libraries. However, note |
40 | * that different clients might use incompatible 32-bit memcached value flag conventions. |
41 | * |
42 | * @since 1.22 |
43 | */ |
44 | class HashRing { |
45 | /** @var string Hashing algorithm for hash() */ |
46 | protected $algo; |
47 | /** @var int[] Non-empty (location => integer weight) */ |
48 | protected $weightByLocation; |
49 | /** @var int[] Map of (location => UNIX timestamp) */ |
50 | protected $ejectExpiryByLocation; |
51 | |
52 | /** @var array[] Non-empty position-ordered list of (position, location name) */ |
53 | protected $baseRing; |
54 | /** @var array[]|null Non-empty position-ordered list of (position, location name) */ |
55 | protected $liveRing; |
56 | |
57 | /** @var integer Overall number of node groups per server */ |
58 | private const HASHES_PER_LOCATION = 40; |
59 | /** @var integer Number of nodes in a node group */ |
60 | private const SECTORS_PER_HASH = 4; |
61 | |
62 | public const KEY_POS = 0; |
63 | public const KEY_LOCATION = 1; |
64 | |
65 | /** @var int Consider all locations */ |
66 | public const RING_ALL = 0; |
67 | /** @var int Only consider "live" locations */ |
68 | public const RING_LIVE = 1; |
69 | |
70 | /** |
71 | * Make a consistent hash ring given a set of locations and their weight values |
72 | * |
73 | * @param int[] $map Map of (location => weight) |
74 | * @param string $algo Hashing algorithm listed in hash_algos() [optional] |
75 | * @param int[] $ejections Map of (location => UNIX timestamp) for ejection expiries |
76 | * @since 1.31 |
77 | */ |
78 | public function __construct( array $map, $algo = 'sha1', array $ejections = [] ) { |
79 | $this->init( $map, $algo, $ejections ); |
80 | } |
81 | |
82 | /** |
83 | * @param int[] $map Map of (location => integer) |
84 | * @param string $algo Hashing algorithm |
85 | * @param int[] $ejections Map of (location => UNIX timestamp) for ejection expires |
86 | */ |
87 | protected function init( array $map, $algo, array $ejections ) { |
88 | if ( !in_array( $algo, hash_algos(), true ) ) { |
89 | throw new RuntimeException( __METHOD__ . ": unsupported '$algo' hash algorithm." ); |
90 | } |
91 | |
92 | $weightByLocation = array_filter( $map ); |
93 | if ( $weightByLocation === [] ) { |
94 | throw new UnexpectedValueException( "No locations with non-zero weight." ); |
95 | } elseif ( min( $map ) < 0 ) { |
96 | throw new InvalidArgumentException( "Location weight cannot be negative." ); |
97 | } |
98 | |
99 | $this->algo = $algo; |
100 | $this->weightByLocation = $weightByLocation; |
101 | $this->ejectExpiryByLocation = $ejections; |
102 | $this->baseRing = $this->buildLocationRing( $this->weightByLocation ); |
103 | } |
104 | |
105 | /** |
106 | * Get the location of an item on the ring |
107 | * |
108 | * @param string $item |
109 | * @return string Location |
110 | * @throws UnexpectedValueException |
111 | */ |
112 | final public function getLocation( $item ) { |
113 | return $this->getLocations( $item, 1 )[0]; |
114 | } |
115 | |
116 | /** |
117 | * Get the location of an item on the ring followed by the next ring locations |
118 | * |
119 | * @param string $item |
120 | * @param int $limit Maximum number of locations to return |
121 | * @param int $from One of the RING_* class constants |
122 | * @return string[] List of locations |
123 | * @throws InvalidArgumentException |
124 | * @throws UnexpectedValueException |
125 | */ |
126 | public function getLocations( $item, $limit, $from = self::RING_ALL ) { |
127 | if ( $from === self::RING_ALL ) { |
128 | $ring = $this->baseRing; |
129 | } elseif ( $from === self::RING_LIVE ) { |
130 | $ring = $this->getLiveRing(); |
131 | } else { |
132 | throw new InvalidArgumentException( "Invalid ring source specified." ); |
133 | } |
134 | |
135 | // Short-circuit for the common single-location case. Note that if there was only one |
136 | // location and it was ejected from the live ring, getLiveRing() would have error out. |
137 | if ( count( $this->weightByLocation ) == 1 ) { |
138 | return ( $limit > 0 ) ? [ $ring[0][self::KEY_LOCATION] ] : []; |
139 | } |
140 | |
141 | // Locate the node index for this item's position on the hash ring |
142 | $itemIndex = $this->findNodeIndexForPosition( $this->getItemPosition( $item ), $ring ); |
143 | |
144 | $locations = []; |
145 | $currentIndex = null; |
146 | while ( count( $locations ) < $limit ) { |
147 | if ( $currentIndex === null ) { |
148 | $currentIndex = $itemIndex; |
149 | } else { |
150 | $currentIndex = $this->getNextClockwiseNodeIndex( $currentIndex, $ring ); |
151 | if ( $currentIndex === $itemIndex ) { |
152 | break; // all nodes visited |
153 | } |
154 | } |
155 | // @phan-suppress-next-line PhanTypeMismatchDimFetchNullable False positive |
156 | $nodeLocation = $ring[$currentIndex][self::KEY_LOCATION]; |
157 | if ( !in_array( $nodeLocation, $locations, true ) ) { |
158 | // Ignore other nodes for the same locations already added |
159 | $locations[] = $nodeLocation; |
160 | } |
161 | } |
162 | |
163 | return $locations; |
164 | } |
165 | |
166 | /** |
167 | * @param float $position |
168 | * @param array[] $ring Either the base or live ring |
169 | * @return int|null |
170 | */ |
171 | private function findNodeIndexForPosition( $position, $ring ) { |
172 | $count = count( $ring ); |
173 | if ( $count === 0 ) { |
174 | return null; |
175 | } |
176 | |
177 | $index = null; |
178 | $lowPos = 0; |
179 | $highPos = $count; |
180 | while ( true ) { |
181 | $midPos = (int)( ( $lowPos + $highPos ) / 2 ); |
182 | if ( $midPos === $count ) { |
183 | $index = 0; |
184 | break; |
185 | } |
186 | |
187 | $midVal = $ring[$midPos][self::KEY_POS]; |
188 | $midMinusOneVal = ( $midPos === 0 ) ? 0 : $ring[$midPos - 1][self::KEY_POS]; |
189 | if ( $position <= $midVal && $position > $midMinusOneVal ) { |
190 | $index = $midPos; |
191 | break; |
192 | } |
193 | |
194 | if ( $midVal < $position ) { |
195 | $lowPos = $midPos + 1; |
196 | } else { |
197 | $highPos = $midPos - 1; |
198 | } |
199 | |
200 | if ( $lowPos > $highPos ) { |
201 | $index = 0; |
202 | break; |
203 | } |
204 | } |
205 | |
206 | return $index; |
207 | } |
208 | |
209 | /** |
210 | * Get the map of locations to weight (does not include zero weight items) |
211 | * |
212 | * @return int[] |
213 | */ |
214 | public function getLocationWeights() { |
215 | return $this->weightByLocation; |
216 | } |
217 | |
218 | /** |
219 | * Remove a location from the "live" hash ring |
220 | * |
221 | * @param string $location |
222 | * @param int $ttl Seconds |
223 | * @return bool Whether some non-ejected locations are left |
224 | * @throws UnexpectedValueException |
225 | */ |
226 | public function ejectFromLiveRing( $location, $ttl ) { |
227 | if ( !isset( $this->weightByLocation[$location] ) ) { |
228 | throw new UnexpectedValueException( "No location '$location' in the ring." ); |
229 | } |
230 | |
231 | $expiry = $this->getCurrentTime() + $ttl; |
232 | $this->ejectExpiryByLocation[$location] = $expiry; |
233 | |
234 | $this->liveRing = null; // invalidate ring cache |
235 | |
236 | return ( count( $this->ejectExpiryByLocation ) < count( $this->weightByLocation ) ); |
237 | } |
238 | |
239 | /** |
240 | * Get the location of an item on the "live" ring |
241 | * |
242 | * @param string $item |
243 | * @return string Location |
244 | * @throws UnexpectedValueException |
245 | */ |
246 | final public function getLiveLocation( $item ) { |
247 | return $this->getLocations( $item, 1, self::RING_LIVE )[0]; |
248 | } |
249 | |
250 | /** |
251 | * Get the location of an item on the "live" ring, as well as the next locations |
252 | * |
253 | * @param string $item |
254 | * @param int $limit Maximum number of locations to return |
255 | * @return string[] List of locations |
256 | * @throws UnexpectedValueException |
257 | */ |
258 | final public function getLiveLocations( $item, $limit ) { |
259 | return $this->getLocations( $item, $limit, self::RING_LIVE ); |
260 | } |
261 | |
262 | /** |
263 | * Get the map of "live" locations to weight (does not include zero weight items) |
264 | * |
265 | * @return int[] |
266 | * @throws UnexpectedValueException |
267 | */ |
268 | public function getLiveLocationWeights() { |
269 | $now = $this->getCurrentTime(); |
270 | |
271 | return array_diff_key( |
272 | $this->weightByLocation, |
273 | array_filter( |
274 | $this->ejectExpiryByLocation, |
275 | static function ( $expiry ) use ( $now ) { |
276 | return ( $expiry > $now ); |
277 | } |
278 | ) |
279 | ); |
280 | } |
281 | |
282 | /** |
283 | * @param int[] $weightByLocation |
284 | * @return array[] |
285 | */ |
286 | private function buildLocationRing( array $weightByLocation ) { |
287 | $locationCount = count( $weightByLocation ); |
288 | $totalWeight = array_sum( $weightByLocation ); |
289 | |
290 | $ring = []; |
291 | // Assign nodes to all locations based on location weight |
292 | $claimed = []; // (position as string => (node, index)) |
293 | foreach ( $weightByLocation as $location => $weight ) { |
294 | $ratio = $weight / $totalWeight; |
295 | // There $locationCount * (HASHES_PER_LOCATION * 4) nodes available; |
296 | // assign a few groups of nodes to this location based on its weight. |
297 | $nodesQuartets = intval( $ratio * self::HASHES_PER_LOCATION * $locationCount ); |
298 | for ( $qi = 0; $qi < $nodesQuartets; ++$qi ) { |
299 | // For efficiency, get 4 points per hash call and 4X node count. |
300 | // If $algo is MD5, then this matches that of with libketama. |
301 | // See https://github.com/RJ/ketama/blob/master/libketama/ketama.c |
302 | $positions = $this->getNodePositionQuartet( "{$location}-{$qi}" ); |
303 | foreach ( $positions as $gi => $position ) { |
304 | $node = ( $qi * self::SECTORS_PER_HASH + $gi ) . "@$location"; |
305 | $posKey = (string)$position; // large integer |
306 | if ( isset( $claimed[$posKey] ) ) { |
307 | // Disallow duplicates (name decides precedence) |
308 | if ( $claimed[$posKey]['node'] > $node ) { |
309 | continue; |
310 | } else { |
311 | unset( $ring[$claimed[$posKey]['index']] ); |
312 | } |
313 | } |
314 | $ring[] = [ |
315 | self::KEY_POS => $position, |
316 | self::KEY_LOCATION => $location |
317 | ]; |
318 | $claimed[$posKey] = [ 'node' => $node, 'index' => count( $ring ) - 1 ]; |
319 | } |
320 | } |
321 | } |
322 | // Sort the locations into clockwise order based on the hash ring position |
323 | usort( $ring, static function ( $a, $b ) { |
324 | if ( $a[self::KEY_POS] === $b[self::KEY_POS] ) { |
325 | throw new UnexpectedValueException( 'Duplicate node positions.' ); |
326 | } |
327 | |
328 | return ( $a[self::KEY_POS] < $b[self::KEY_POS] ? -1 : 1 ); |
329 | } ); |
330 | |
331 | return $ring; |
332 | } |
333 | |
334 | /** |
335 | * @param string $item Key |
336 | * @return float Ring position; integral number in [0, 4294967296] (2^32) |
337 | */ |
338 | private function getItemPosition( $item ) { |
339 | // If $algo is MD5, then this matches that of with libketama. |
340 | // See https://github.com/RJ/ketama/blob/master/libketama/ketama.c |
341 | $octets = substr( hash( $this->algo, (string)$item, true ), 0, 4 ); |
342 | if ( strlen( $octets ) != 4 ) { |
343 | throw new UnexpectedValueException( __METHOD__ . ": {$this->algo} is < 32 bits." ); |
344 | } |
345 | |
346 | $pos = unpack( 'V', $octets )[1]; |
347 | if ( $pos < 0 ) { |
348 | // Most-significant-bit is set, causing unpack() to return a negative integer due |
349 | // to the fact that it returns a signed int. Cast it to an unsigned integer string. |
350 | $pos = sprintf( '%u', $pos ); |
351 | } |
352 | |
353 | return (float)$pos; |
354 | } |
355 | |
356 | /** |
357 | * @param string $nodeGroupName |
358 | * @return float[] Four ring positions on [0, 4294967296] (2^32) |
359 | */ |
360 | private function getNodePositionQuartet( $nodeGroupName ) { |
361 | $octets = substr( hash( $this->algo, (string)$nodeGroupName, true ), 0, 16 ); |
362 | if ( strlen( $octets ) != 16 ) { |
363 | throw new UnexpectedValueException( __METHOD__ . ": {$this->algo} is < 128 bits." ); |
364 | } |
365 | |
366 | $positions = []; |
367 | foreach ( unpack( 'V4', $octets ) as $signed ) { |
368 | $positions[] = (float)sprintf( '%u', $signed ); |
369 | } |
370 | |
371 | return $positions; |
372 | } |
373 | |
374 | /** |
375 | * @param int $i Valid index for a node in the ring |
376 | * @param array[] $ring Either the base or live ring |
377 | * @return int Valid index for a node in the ring |
378 | */ |
379 | private function getNextClockwiseNodeIndex( $i, $ring ) { |
380 | if ( !isset( $ring[$i] ) ) { |
381 | throw new UnexpectedValueException( __METHOD__ . ": reference index is invalid." ); |
382 | } |
383 | |
384 | $next = $i + 1; |
385 | |
386 | return ( $next < count( $ring ) ) ? $next : 0; |
387 | } |
388 | |
389 | /** |
390 | * Get the "live" hash ring (which does not include ejected locations) |
391 | * |
392 | * @return array[] |
393 | * @throws UnexpectedValueException |
394 | */ |
395 | protected function getLiveRing() { |
396 | if ( !$this->ejectExpiryByLocation ) { |
397 | return $this->baseRing; // nothing ejected |
398 | } |
399 | |
400 | $now = $this->getCurrentTime(); |
401 | |
402 | if ( $this->liveRing === null || min( $this->ejectExpiryByLocation ) <= $now ) { |
403 | // Live ring needs to be regenerated... |
404 | $this->ejectExpiryByLocation = array_filter( |
405 | $this->ejectExpiryByLocation, |
406 | static function ( $expiry ) use ( $now ) { |
407 | return ( $expiry > $now ); |
408 | } |
409 | ); |
410 | |
411 | if ( count( $this->ejectExpiryByLocation ) ) { |
412 | // Some locations are still ejected from the ring |
413 | $liveRing = []; |
414 | foreach ( $this->baseRing as $nodeInfo ) { |
415 | $location = $nodeInfo[self::KEY_LOCATION]; |
416 | if ( !isset( $this->ejectExpiryByLocation[$location] ) ) { |
417 | $liveRing[] = $nodeInfo; |
418 | } |
419 | } |
420 | } else { |
421 | $liveRing = $this->baseRing; |
422 | } |
423 | |
424 | $this->liveRing = $liveRing; |
425 | } |
426 | |
427 | if ( !$this->liveRing ) { |
428 | throw new UnexpectedValueException( "The live ring is currently empty." ); |
429 | } |
430 | |
431 | return $this->liveRing; |
432 | } |
433 | |
434 | /** |
435 | * @return int UNIX timestamp |
436 | */ |
437 | protected function getCurrentTime() { |
438 | return time(); |
439 | } |
440 | |
441 | public function __serialize() { |
442 | return [ |
443 | 'algorithm' => $this->algo, |
444 | 'locations' => $this->weightByLocation, |
445 | 'ejections' => $this->ejectExpiryByLocation |
446 | ]; |
447 | } |
448 | |
449 | public function __unserialize( $data ) { |
450 | if ( is_array( $data ) ) { |
451 | $this->init( $data['locations'] ?? [], $data['algorithm'] ?? 'sha1', $data['ejections'] ?? [] ); |
452 | } else { |
453 | throw new UnexpectedValueException( __METHOD__ . ": unable to decode JSON." ); |
454 | } |
455 | } |
456 | } |