Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
| Total | |
86.75% |
131 / 151 |
|
44.44% |
8 / 18 |
CRAP | |
0.00% |
0 / 1 |
| HashRing | |
87.33% |
131 / 150 |
|
44.44% |
8 / 18 |
66.07 | |
0.00% |
0 / 1 |
| __construct | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
| init | |
72.73% |
8 / 11 |
|
0.00% |
0 / 1 |
4.32 | |||
| getLocation | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
| getLocations | |
90.00% |
18 / 20 |
|
0.00% |
0 / 1 |
9.08 | |||
| findNodeIndexForPosition | |
86.96% |
20 / 23 |
|
0.00% |
0 / 1 |
9.18 | |||
| getLocationWeights | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
| ejectFromLiveRing | |
83.33% |
5 / 6 |
|
0.00% |
0 / 1 |
2.02 | |||
| getLiveLocation | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
| getLiveLocations | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
| getLiveLocationWeights | |
100.00% |
10 / 10 |
|
100.00% |
1 / 1 |
1 | |||
| buildLocationRing | |
92.59% |
25 / 27 |
|
0.00% |
0 / 1 |
8.03 | |||
| getItemPosition | |
71.43% |
5 / 7 |
|
0.00% |
0 / 1 |
3.21 | |||
| getNodePositionQuartet | |
85.71% |
6 / 7 |
|
0.00% |
0 / 1 |
3.03 | |||
| getNextClockwiseNodeIndex | |
75.00% |
3 / 4 |
|
0.00% |
0 / 1 |
3.14 | |||
| getLiveRing | |
85.71% |
18 / 21 |
|
0.00% |
0 / 1 |
8.19 | |||
| getCurrentTime | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
| __serialize | |
100.00% |
5 / 5 |
|
100.00% |
1 / 1 |
1 | |||
| __unserialize | |
66.67% |
2 / 3 |
|
0.00% |
0 / 1 |
2.15 | |||
| 1 | <?php |
| 2 | /** |
| 3 | * Convenience class for weighted consistent hash rings. |
| 4 | * |
| 5 | * @license GPL-2.0-or-later |
| 6 | * @file |
| 7 | */ |
| 8 | |
| 9 | namespace Wikimedia\HashRing; |
| 10 | |
| 11 | use InvalidArgumentException; |
| 12 | use RuntimeException; |
| 13 | use UnexpectedValueException; |
| 14 | |
| 15 | /** |
| 16 | * Convenience class for weighted consistent hash rings |
| 17 | * |
| 18 | * This deterministically maps "keys" to a set of "locations" while avoiding clumping |
| 19 | * |
| 20 | * Each location is represented by a number of nodes on a ring proportionate to the ratio |
| 21 | * of its weight compared to the total location weight. Note positions are deterministically |
| 22 | * derived from the hash of the location name. Nodes are responsible for the portion of the |
| 23 | * ring, counter-clockwise, up until the next node. Locations are responsible for all portions |
| 24 | * of the ring that the location's nodes are responsible for. |
| 25 | * |
| 26 | * A location that is temporarily "ejected" is said to be absent from the "live" ring. |
| 27 | * If no location ejections are active, then the base ring and live ring are identical. |
| 28 | * |
| 29 | * This class is designed in a way that using the "md5" algorithm will make it compatible |
| 30 | * with libketama, e.g. OPT_LIBKETAMA_COMPATIBLE from the PECL memcached extension or "ketama" |
| 31 | * from twemproxy. This can simplify the process of switching client libraries. However, note |
| 32 | * that different clients might use incompatible 32-bit memcached value flag conventions. |
| 33 | * |
| 34 | * @since 1.22 |
| 35 | */ |
| 36 | class HashRing { |
| 37 | /** @var string Hashing algorithm for hash() */ |
| 38 | protected $algo; |
| 39 | /** @var int[] Non-empty (location => integer weight) */ |
| 40 | protected $weightByLocation; |
| 41 | /** @var int[] Map of (location => UNIX timestamp) */ |
| 42 | protected $ejectExpiryByLocation; |
| 43 | |
| 44 | /** @var array[] Non-empty position-ordered list of (position, location name) */ |
| 45 | protected $baseRing; |
| 46 | /** @var array[]|null Non-empty position-ordered list of (position, location name) */ |
| 47 | protected $liveRing; |
| 48 | |
| 49 | /** @var integer Overall number of node groups per server */ |
| 50 | private const HASHES_PER_LOCATION = 40; |
| 51 | /** @var integer Number of nodes in a node group */ |
| 52 | private const SECTORS_PER_HASH = 4; |
| 53 | |
| 54 | public const KEY_POS = 0; |
| 55 | public const KEY_LOCATION = 1; |
| 56 | |
| 57 | /** @var int Consider all locations */ |
| 58 | public const RING_ALL = 0; |
| 59 | /** @var int Only consider "live" locations */ |
| 60 | public const RING_LIVE = 1; |
| 61 | |
| 62 | /** |
| 63 | * Make a consistent hash ring given a set of locations and their weight values |
| 64 | * |
| 65 | * @param int[] $map Map of (location => weight) |
| 66 | * @param string $algo Hashing algorithm listed in hash_algos() [optional] |
| 67 | * @param int[] $ejections Map of (location => UNIX timestamp) for ejection expiries |
| 68 | * @since 1.31 |
| 69 | */ |
| 70 | public function __construct( array $map, $algo = 'sha1', array $ejections = [] ) { |
| 71 | $this->init( $map, $algo, $ejections ); |
| 72 | } |
| 73 | |
| 74 | /** |
| 75 | * @param int[] $map Map of (location => integer) |
| 76 | * @param string $algo Hashing algorithm |
| 77 | * @param int[] $ejections Map of (location => UNIX timestamp) for ejection expires |
| 78 | */ |
| 79 | protected function init( array $map, $algo, array $ejections ) { |
| 80 | if ( !in_array( $algo, hash_algos(), true ) ) { |
| 81 | throw new RuntimeException( __METHOD__ . ": unsupported '$algo' hash algorithm." ); |
| 82 | } |
| 83 | |
| 84 | $weightByLocation = array_filter( $map ); |
| 85 | if ( $weightByLocation === [] ) { |
| 86 | throw new UnexpectedValueException( "No locations with non-zero weight." ); |
| 87 | } elseif ( min( $map ) < 0 ) { |
| 88 | throw new InvalidArgumentException( "Location weight cannot be negative." ); |
| 89 | } |
| 90 | |
| 91 | $this->algo = $algo; |
| 92 | $this->weightByLocation = $weightByLocation; |
| 93 | $this->ejectExpiryByLocation = $ejections; |
| 94 | $this->baseRing = $this->buildLocationRing( $this->weightByLocation ); |
| 95 | } |
| 96 | |
| 97 | /** |
| 98 | * Get the location of an item on the ring |
| 99 | * |
| 100 | * @param string $item |
| 101 | * @return string Location |
| 102 | * @throws UnexpectedValueException |
| 103 | */ |
| 104 | final public function getLocation( $item ) { |
| 105 | return $this->getLocations( $item, 1 )[0]; |
| 106 | } |
| 107 | |
| 108 | /** |
| 109 | * Get the location of an item on the ring followed by the next ring locations |
| 110 | * |
| 111 | * @param string $item |
| 112 | * @param int $limit Maximum number of locations to return |
| 113 | * @param int $from One of the RING_* class constants |
| 114 | * @return string[] List of locations |
| 115 | * @throws UnexpectedValueException |
| 116 | */ |
| 117 | public function getLocations( $item, $limit, $from = self::RING_ALL ) { |
| 118 | if ( $from === self::RING_ALL ) { |
| 119 | $ring = $this->baseRing; |
| 120 | } elseif ( $from === self::RING_LIVE ) { |
| 121 | $ring = $this->getLiveRing(); |
| 122 | } else { |
| 123 | throw new InvalidArgumentException( "Invalid ring source specified." ); |
| 124 | } |
| 125 | |
| 126 | // Short-circuit for the common single-location case. Note that if there was only one |
| 127 | // location and it was ejected from the live ring, getLiveRing() would have error out. |
| 128 | if ( count( $this->weightByLocation ) == 1 ) { |
| 129 | return ( $limit > 0 ) ? [ $ring[0][self::KEY_LOCATION] ] : []; |
| 130 | } |
| 131 | |
| 132 | // Locate the node index for this item's position on the hash ring |
| 133 | $itemIndex = $this->findNodeIndexForPosition( $this->getItemPosition( $item ), $ring ); |
| 134 | |
| 135 | $locations = []; |
| 136 | $currentIndex = null; |
| 137 | while ( count( $locations ) < $limit ) { |
| 138 | if ( $currentIndex === null ) { |
| 139 | $currentIndex = $itemIndex; |
| 140 | } else { |
| 141 | $currentIndex = $this->getNextClockwiseNodeIndex( $currentIndex, $ring ); |
| 142 | if ( $currentIndex === $itemIndex ) { |
| 143 | break; // all nodes visited |
| 144 | } |
| 145 | } |
| 146 | // @phan-suppress-next-line PhanTypeMismatchDimFetchNullable False positive |
| 147 | $nodeLocation = $ring[$currentIndex][self::KEY_LOCATION]; |
| 148 | if ( !in_array( $nodeLocation, $locations, true ) ) { |
| 149 | // Ignore other nodes for the same locations already added |
| 150 | $locations[] = $nodeLocation; |
| 151 | } |
| 152 | } |
| 153 | |
| 154 | return $locations; |
| 155 | } |
| 156 | |
| 157 | /** |
| 158 | * @param float $position |
| 159 | * @param array[] $ring Either the base or live ring |
| 160 | * @return int|null |
| 161 | */ |
| 162 | private function findNodeIndexForPosition( $position, $ring ) { |
| 163 | $count = count( $ring ); |
| 164 | if ( $count === 0 ) { |
| 165 | return null; |
| 166 | } |
| 167 | |
| 168 | $index = null; |
| 169 | $lowPos = 0; |
| 170 | $highPos = $count; |
| 171 | while ( true ) { |
| 172 | $midPos = (int)( ( $lowPos + $highPos ) / 2 ); |
| 173 | if ( $midPos === $count ) { |
| 174 | $index = 0; |
| 175 | break; |
| 176 | } |
| 177 | |
| 178 | $midVal = $ring[$midPos][self::KEY_POS]; |
| 179 | $midMinusOneVal = ( $midPos === 0 ) ? 0 : $ring[$midPos - 1][self::KEY_POS]; |
| 180 | if ( $position <= $midVal && $position > $midMinusOneVal ) { |
| 181 | $index = $midPos; |
| 182 | break; |
| 183 | } |
| 184 | |
| 185 | if ( $midVal < $position ) { |
| 186 | $lowPos = $midPos + 1; |
| 187 | } else { |
| 188 | $highPos = $midPos - 1; |
| 189 | } |
| 190 | |
| 191 | if ( $lowPos > $highPos ) { |
| 192 | $index = 0; |
| 193 | break; |
| 194 | } |
| 195 | } |
| 196 | |
| 197 | return $index; |
| 198 | } |
| 199 | |
| 200 | /** |
| 201 | * Get the map of locations to weight (does not include zero weight items) |
| 202 | * |
| 203 | * @return int[] |
| 204 | */ |
| 205 | public function getLocationWeights() { |
| 206 | return $this->weightByLocation; |
| 207 | } |
| 208 | |
| 209 | /** |
| 210 | * Remove a location from the "live" hash ring |
| 211 | * |
| 212 | * @param string $location |
| 213 | * @param int $ttl Seconds |
| 214 | * @return bool Whether some non-ejected locations are left |
| 215 | * @throws UnexpectedValueException |
| 216 | */ |
| 217 | public function ejectFromLiveRing( $location, $ttl ) { |
| 218 | if ( !isset( $this->weightByLocation[$location] ) ) { |
| 219 | throw new UnexpectedValueException( "No location '$location' in the ring." ); |
| 220 | } |
| 221 | |
| 222 | $expiry = $this->getCurrentTime() + $ttl; |
| 223 | $this->ejectExpiryByLocation[$location] = $expiry; |
| 224 | |
| 225 | $this->liveRing = null; // invalidate ring cache |
| 226 | |
| 227 | return ( count( $this->ejectExpiryByLocation ) < count( $this->weightByLocation ) ); |
| 228 | } |
| 229 | |
| 230 | /** |
| 231 | * Get the location of an item on the "live" ring |
| 232 | * |
| 233 | * @param string $item |
| 234 | * @return string Location |
| 235 | * @throws UnexpectedValueException |
| 236 | */ |
| 237 | final public function getLiveLocation( $item ) { |
| 238 | return $this->getLocations( $item, 1, self::RING_LIVE )[0]; |
| 239 | } |
| 240 | |
| 241 | /** |
| 242 | * Get the location of an item on the "live" ring, as well as the next locations |
| 243 | * |
| 244 | * @param string $item |
| 245 | * @param int $limit Maximum number of locations to return |
| 246 | * @return string[] List of locations |
| 247 | * @throws UnexpectedValueException |
| 248 | */ |
| 249 | final public function getLiveLocations( $item, $limit ) { |
| 250 | return $this->getLocations( $item, $limit, self::RING_LIVE ); |
| 251 | } |
| 252 | |
| 253 | /** |
| 254 | * Get the map of "live" locations to weight (does not include zero weight items) |
| 255 | * |
| 256 | * @return int[] |
| 257 | * @throws UnexpectedValueException |
| 258 | */ |
| 259 | public function getLiveLocationWeights() { |
| 260 | $now = $this->getCurrentTime(); |
| 261 | |
| 262 | return array_diff_key( |
| 263 | $this->weightByLocation, |
| 264 | array_filter( |
| 265 | $this->ejectExpiryByLocation, |
| 266 | static function ( $expiry ) use ( $now ) { |
| 267 | return ( $expiry > $now ); |
| 268 | } |
| 269 | ) |
| 270 | ); |
| 271 | } |
| 272 | |
| 273 | /** |
| 274 | * @param int[] $weightByLocation |
| 275 | * @return array[] |
| 276 | */ |
| 277 | private function buildLocationRing( array $weightByLocation ) { |
| 278 | $locationCount = count( $weightByLocation ); |
| 279 | $totalWeight = array_sum( $weightByLocation ); |
| 280 | |
| 281 | $ring = []; |
| 282 | // Assign nodes to all locations based on location weight |
| 283 | $claimed = []; // (position as string => (node, index)) |
| 284 | foreach ( $weightByLocation as $location => $weight ) { |
| 285 | $ratio = $weight / $totalWeight; |
| 286 | // There $locationCount * (HASHES_PER_LOCATION * 4) nodes available; |
| 287 | // assign a few groups of nodes to this location based on its weight. |
| 288 | $nodesQuartets = intval( $ratio * self::HASHES_PER_LOCATION * $locationCount ); |
| 289 | for ( $qi = 0; $qi < $nodesQuartets; ++$qi ) { |
| 290 | // For efficiency, get 4 points per hash call and 4X node count. |
| 291 | // If $algo is MD5, then this matches that of with libketama. |
| 292 | // See https://github.com/RJ/ketama/blob/master/libketama/ketama.c |
| 293 | $positions = $this->getNodePositionQuartet( "{$location}-{$qi}" ); |
| 294 | foreach ( $positions as $gi => $position ) { |
| 295 | $node = ( $qi * self::SECTORS_PER_HASH + $gi ) . "@$location"; |
| 296 | $posKey = (string)$position; // large integer |
| 297 | if ( isset( $claimed[$posKey] ) ) { |
| 298 | // Disallow duplicates (name decides precedence) |
| 299 | if ( $claimed[$posKey]['node'] > $node ) { |
| 300 | continue; |
| 301 | } else { |
| 302 | unset( $ring[$claimed[$posKey]['index']] ); |
| 303 | } |
| 304 | } |
| 305 | $ring[] = [ |
| 306 | self::KEY_POS => $position, |
| 307 | self::KEY_LOCATION => $location |
| 308 | ]; |
| 309 | $claimed[$posKey] = [ 'node' => $node, 'index' => count( $ring ) - 1 ]; |
| 310 | } |
| 311 | } |
| 312 | } |
| 313 | // Sort the locations into clockwise order based on the hash ring position |
| 314 | usort( $ring, static function ( $a, $b ) { |
| 315 | if ( $a[self::KEY_POS] === $b[self::KEY_POS] ) { |
| 316 | throw new UnexpectedValueException( 'Duplicate node positions.' ); |
| 317 | } |
| 318 | |
| 319 | return ( $a[self::KEY_POS] < $b[self::KEY_POS] ? -1 : 1 ); |
| 320 | } ); |
| 321 | |
| 322 | return $ring; |
| 323 | } |
| 324 | |
| 325 | /** |
| 326 | * @param string $item Key |
| 327 | * @return float Ring position; integral number in [0, 4294967296] (2^32) |
| 328 | */ |
| 329 | private function getItemPosition( $item ) { |
| 330 | // If $algo is MD5, then this matches that of with libketama. |
| 331 | // See https://github.com/RJ/ketama/blob/master/libketama/ketama.c |
| 332 | $octets = substr( hash( $this->algo, (string)$item, true ), 0, 4 ); |
| 333 | if ( strlen( $octets ) != 4 ) { |
| 334 | throw new UnexpectedValueException( __METHOD__ . ": {$this->algo} is < 32 bits." ); |
| 335 | } |
| 336 | |
| 337 | $pos = unpack( 'V', $octets )[1]; |
| 338 | if ( $pos < 0 ) { |
| 339 | // Most-significant-bit is set, causing unpack() to return a negative integer due |
| 340 | // to the fact that it returns a signed int. Cast it to an unsigned integer string. |
| 341 | $pos = sprintf( '%u', $pos ); |
| 342 | } |
| 343 | |
| 344 | return (float)$pos; |
| 345 | } |
| 346 | |
| 347 | /** |
| 348 | * @param string $nodeGroupName |
| 349 | * @return float[] Four ring positions on [0, 4294967296] (2^32) |
| 350 | */ |
| 351 | private function getNodePositionQuartet( $nodeGroupName ) { |
| 352 | $octets = substr( hash( $this->algo, (string)$nodeGroupName, true ), 0, 16 ); |
| 353 | if ( strlen( $octets ) != 16 ) { |
| 354 | throw new UnexpectedValueException( __METHOD__ . ": {$this->algo} is < 128 bits." ); |
| 355 | } |
| 356 | |
| 357 | $positions = []; |
| 358 | foreach ( unpack( 'V4', $octets ) as $signed ) { |
| 359 | $positions[] = (float)sprintf( '%u', $signed ); |
| 360 | } |
| 361 | |
| 362 | return $positions; |
| 363 | } |
| 364 | |
| 365 | /** |
| 366 | * @param int $i Valid index for a node in the ring |
| 367 | * @param array[] $ring Either the base or live ring |
| 368 | * @return int Valid index for a node in the ring |
| 369 | */ |
| 370 | private function getNextClockwiseNodeIndex( $i, $ring ) { |
| 371 | if ( !isset( $ring[$i] ) ) { |
| 372 | throw new UnexpectedValueException( __METHOD__ . ": reference index is invalid." ); |
| 373 | } |
| 374 | |
| 375 | $next = $i + 1; |
| 376 | |
| 377 | return ( $next < count( $ring ) ) ? $next : 0; |
| 378 | } |
| 379 | |
| 380 | /** |
| 381 | * Get the "live" hash ring (which does not include ejected locations) |
| 382 | * |
| 383 | * @return array[] |
| 384 | * @throws UnexpectedValueException |
| 385 | */ |
| 386 | protected function getLiveRing() { |
| 387 | if ( !$this->ejectExpiryByLocation ) { |
| 388 | return $this->baseRing; // nothing ejected |
| 389 | } |
| 390 | |
| 391 | $now = $this->getCurrentTime(); |
| 392 | |
| 393 | if ( $this->liveRing === null || min( $this->ejectExpiryByLocation ) <= $now ) { |
| 394 | // Live ring needs to be regenerated... |
| 395 | $this->ejectExpiryByLocation = array_filter( |
| 396 | $this->ejectExpiryByLocation, |
| 397 | static function ( $expiry ) use ( $now ) { |
| 398 | return ( $expiry > $now ); |
| 399 | } |
| 400 | ); |
| 401 | |
| 402 | if ( count( $this->ejectExpiryByLocation ) ) { |
| 403 | // Some locations are still ejected from the ring |
| 404 | $liveRing = []; |
| 405 | foreach ( $this->baseRing as $nodeInfo ) { |
| 406 | $location = $nodeInfo[self::KEY_LOCATION]; |
| 407 | if ( !isset( $this->ejectExpiryByLocation[$location] ) ) { |
| 408 | $liveRing[] = $nodeInfo; |
| 409 | } |
| 410 | } |
| 411 | } else { |
| 412 | $liveRing = $this->baseRing; |
| 413 | } |
| 414 | |
| 415 | $this->liveRing = $liveRing; |
| 416 | } |
| 417 | |
| 418 | if ( !$this->liveRing ) { |
| 419 | throw new UnexpectedValueException( "The live ring is currently empty." ); |
| 420 | } |
| 421 | |
| 422 | return $this->liveRing; |
| 423 | } |
| 424 | |
| 425 | /** |
| 426 | * @return int UNIX timestamp |
| 427 | */ |
| 428 | protected function getCurrentTime() { |
| 429 | return time(); |
| 430 | } |
| 431 | |
| 432 | public function __serialize() { |
| 433 | return [ |
| 434 | 'algorithm' => $this->algo, |
| 435 | 'locations' => $this->weightByLocation, |
| 436 | 'ejections' => $this->ejectExpiryByLocation |
| 437 | ]; |
| 438 | } |
| 439 | |
| 440 | public function __unserialize( $data ) { |
| 441 | if ( is_array( $data ) ) { |
| 442 | $this->init( $data['locations'] ?? [], $data['algorithm'] ?? 'sha1', $data['ejections'] ?? [] ); |
| 443 | } else { |
| 444 | throw new UnexpectedValueException( __METHOD__ . ": unable to decode JSON." ); |
| 445 | } |
| 446 | } |
| 447 | } |
| 448 | |
| 449 | /** @deprecated class alias since 1.44 */ |
| 450 | class_alias( HashRing::class, 'HashRing' ); |