MediaWiki master
HashRing.php
Go to the documentation of this file.
1<?php
44class HashRing {
46 protected $algo;
51
53 protected $baseRing;
55 protected $liveRing;
56
58 private const HASHES_PER_LOCATION = 40;
60 private const SECTORS_PER_HASH = 4;
61
62 public const KEY_POS = 0;
63 public const KEY_LOCATION = 1;
64
66 public const RING_ALL = 0;
68 public const RING_LIVE = 1;
69
78 public function __construct( array $map, $algo = 'sha1', array $ejections = [] ) {
79 $this->init( $map, $algo, $ejections );
80 }
81
87 protected function init( array $map, $algo, array $ejections ) {
88 if ( !in_array( $algo, hash_algos(), true ) ) {
89 throw new RuntimeException( __METHOD__ . ": unsupported '$algo' hash algorithm." );
90 }
91
92 $weightByLocation = array_filter( $map );
93 if ( $weightByLocation === [] ) {
94 throw new UnexpectedValueException( "No locations with non-zero weight." );
95 } elseif ( min( $map ) < 0 ) {
96 throw new InvalidArgumentException( "Location weight cannot be negative." );
97 }
98
99 $this->algo = $algo;
100 $this->weightByLocation = $weightByLocation;
101 $this->ejectExpiryByLocation = $ejections;
102 $this->baseRing = $this->buildLocationRing( $this->weightByLocation );
103 }
104
112 final public function getLocation( $item ) {
113 return $this->getLocations( $item, 1 )[0];
114 }
115
126 public function getLocations( $item, $limit, $from = self::RING_ALL ) {
127 if ( $from === self::RING_ALL ) {
128 $ring = $this->baseRing;
129 } elseif ( $from === self::RING_LIVE ) {
130 $ring = $this->getLiveRing();
131 } else {
132 throw new InvalidArgumentException( "Invalid ring source specified." );
133 }
134
135 // Short-circuit for the common single-location case. Note that if there was only one
136 // location and it was ejected from the live ring, getLiveRing() would have error out.
137 if ( count( $this->weightByLocation ) == 1 ) {
138 return ( $limit > 0 ) ? [ $ring[0][self::KEY_LOCATION] ] : [];
139 }
140
141 // Locate the node index for this item's position on the hash ring
142 $itemIndex = $this->findNodeIndexForPosition( $this->getItemPosition( $item ), $ring );
143
144 $locations = [];
145 $currentIndex = null;
146 while ( count( $locations ) < $limit ) {
147 if ( $currentIndex === null ) {
148 $currentIndex = $itemIndex;
149 } else {
150 $currentIndex = $this->getNextClockwiseNodeIndex( $currentIndex, $ring );
151 if ( $currentIndex === $itemIndex ) {
152 break; // all nodes visited
153 }
154 }
155 // @phan-suppress-next-line PhanTypeMismatchDimFetchNullable False positive
156 $nodeLocation = $ring[$currentIndex][self::KEY_LOCATION];
157 if ( !in_array( $nodeLocation, $locations, true ) ) {
158 // Ignore other nodes for the same locations already added
159 $locations[] = $nodeLocation;
160 }
161 }
162
163 return $locations;
164 }
165
171 private function findNodeIndexForPosition( $position, $ring ) {
172 $count = count( $ring );
173 if ( $count === 0 ) {
174 return null;
175 }
176
177 $index = null;
178 $lowPos = 0;
179 $highPos = $count;
180 while ( true ) {
181 $midPos = (int)( ( $lowPos + $highPos ) / 2 );
182 if ( $midPos === $count ) {
183 $index = 0;
184 break;
185 }
186
187 $midVal = $ring[$midPos][self::KEY_POS];
188 $midMinusOneVal = ( $midPos === 0 ) ? 0 : $ring[$midPos - 1][self::KEY_POS];
189 if ( $position <= $midVal && $position > $midMinusOneVal ) {
190 $index = $midPos;
191 break;
192 }
193
194 if ( $midVal < $position ) {
195 $lowPos = $midPos + 1;
196 } else {
197 $highPos = $midPos - 1;
198 }
199
200 if ( $lowPos > $highPos ) {
201 $index = 0;
202 break;
203 }
204 }
205
206 return $index;
207 }
208
214 public function getLocationWeights() {
215 return $this->weightByLocation;
216 }
217
226 public function ejectFromLiveRing( $location, $ttl ) {
227 if ( !isset( $this->weightByLocation[$location] ) ) {
228 throw new UnexpectedValueException( "No location '$location' in the ring." );
229 }
230
231 $expiry = $this->getCurrentTime() + $ttl;
232 $this->ejectExpiryByLocation[$location] = $expiry;
233
234 $this->liveRing = null; // invalidate ring cache
235
236 return ( count( $this->ejectExpiryByLocation ) < count( $this->weightByLocation ) );
237 }
238
246 final public function getLiveLocation( $item ) {
247 return $this->getLocations( $item, 1, self::RING_LIVE )[0];
248 }
249
258 final public function getLiveLocations( $item, $limit ) {
259 return $this->getLocations( $item, $limit, self::RING_LIVE );
260 }
261
268 public function getLiveLocationWeights() {
269 $now = $this->getCurrentTime();
270
271 return array_diff_key(
272 $this->weightByLocation,
273 array_filter(
274 $this->ejectExpiryByLocation,
275 static function ( $expiry ) use ( $now ) {
276 return ( $expiry > $now );
277 }
278 )
279 );
280 }
281
286 private function buildLocationRing( array $weightByLocation ) {
287 $locationCount = count( $weightByLocation );
288 $totalWeight = array_sum( $weightByLocation );
289
290 $ring = [];
291 // Assign nodes to all locations based on location weight
292 $claimed = []; // (position as string => (node, index))
293 foreach ( $weightByLocation as $location => $weight ) {
294 $ratio = $weight / $totalWeight;
295 // There $locationCount * (HASHES_PER_LOCATION * 4) nodes available;
296 // assign a few groups of nodes to this location based on its weight.
297 $nodesQuartets = intval( $ratio * self::HASHES_PER_LOCATION * $locationCount );
298 for ( $qi = 0; $qi < $nodesQuartets; ++$qi ) {
299 // For efficiency, get 4 points per hash call and 4X node count.
300 // If $algo is MD5, then this matches that of with libketama.
301 // See https://github.com/RJ/ketama/blob/master/libketama/ketama.c
302 $positions = $this->getNodePositionQuartet( "{$location}-{$qi}" );
303 foreach ( $positions as $gi => $position ) {
304 $node = ( $qi * self::SECTORS_PER_HASH + $gi ) . "@$location";
305 $posKey = (string)$position; // large integer
306 if ( isset( $claimed[$posKey] ) ) {
307 // Disallow duplicates (name decides precedence)
308 if ( $claimed[$posKey]['node'] > $node ) {
309 continue;
310 } else {
311 unset( $ring[$claimed[$posKey]['index']] );
312 }
313 }
314 $ring[] = [
315 self::KEY_POS => $position,
316 self::KEY_LOCATION => $location
317 ];
318 $claimed[$posKey] = [ 'node' => $node, 'index' => count( $ring ) - 1 ];
319 }
320 }
321 }
322 // Sort the locations into clockwise order based on the hash ring position
323 usort( $ring, static function ( $a, $b ) {
324 if ( $a[self::KEY_POS] === $b[self::KEY_POS] ) {
325 throw new UnexpectedValueException( 'Duplicate node positions.' );
326 }
327
328 return ( $a[self::KEY_POS] < $b[self::KEY_POS] ? -1 : 1 );
329 } );
330
331 return $ring;
332 }
333
338 private function getItemPosition( $item ) {
339 // If $algo is MD5, then this matches that of with libketama.
340 // See https://github.com/RJ/ketama/blob/master/libketama/ketama.c
341 $octets = substr( hash( $this->algo, (string)$item, true ), 0, 4 );
342 if ( strlen( $octets ) != 4 ) {
343 throw new UnexpectedValueException( __METHOD__ . ": {$this->algo} is < 32 bits." );
344 }
345
346 $pos = unpack( 'V', $octets )[1];
347 if ( $pos < 0 ) {
348 // Most-significant-bit is set, causing unpack() to return a negative integer due
349 // to the fact that it returns a signed int. Cast it to an unsigned integer string.
350 $pos = sprintf( '%u', $pos );
351 }
352
353 return (float)$pos;
354 }
355
360 private function getNodePositionQuartet( $nodeGroupName ) {
361 $octets = substr( hash( $this->algo, (string)$nodeGroupName, true ), 0, 16 );
362 if ( strlen( $octets ) != 16 ) {
363 throw new UnexpectedValueException( __METHOD__ . ": {$this->algo} is < 128 bits." );
364 }
365
366 $positions = [];
367 foreach ( unpack( 'V4', $octets ) as $signed ) {
368 $positions[] = (float)sprintf( '%u', $signed );
369 }
370
371 return $positions;
372 }
373
379 private function getNextClockwiseNodeIndex( $i, $ring ) {
380 if ( !isset( $ring[$i] ) ) {
381 throw new UnexpectedValueException( __METHOD__ . ": reference index is invalid." );
382 }
383
384 $next = $i + 1;
385
386 return ( $next < count( $ring ) ) ? $next : 0;
387 }
388
395 protected function getLiveRing() {
396 if ( !$this->ejectExpiryByLocation ) {
397 return $this->baseRing; // nothing ejected
398 }
399
400 $now = $this->getCurrentTime();
401
402 if ( $this->liveRing === null || min( $this->ejectExpiryByLocation ) <= $now ) {
403 // Live ring needs to be regenerated...
404 $this->ejectExpiryByLocation = array_filter(
405 $this->ejectExpiryByLocation,
406 static function ( $expiry ) use ( $now ) {
407 return ( $expiry > $now );
408 }
409 );
410
411 if ( count( $this->ejectExpiryByLocation ) ) {
412 // Some locations are still ejected from the ring
413 $liveRing = [];
414 foreach ( $this->baseRing as $nodeInfo ) {
415 $location = $nodeInfo[self::KEY_LOCATION];
416 if ( !isset( $this->ejectExpiryByLocation[$location] ) ) {
417 $liveRing[] = $nodeInfo;
418 }
419 }
420 } else {
421 $liveRing = $this->baseRing;
422 }
423
424 $this->liveRing = $liveRing;
425 }
426
427 if ( !$this->liveRing ) {
428 throw new UnexpectedValueException( "The live ring is currently empty." );
429 }
430
431 return $this->liveRing;
432 }
433
437 protected function getCurrentTime() {
438 return time();
439 }
440
441 public function __serialize() {
442 return [
443 'algorithm' => $this->algo,
444 'locations' => $this->weightByLocation,
445 'ejections' => $this->ejectExpiryByLocation
446 ];
447 }
448
449 public function __unserialize( $data ) {
450 if ( is_array( $data ) ) {
451 $this->init( $data['locations'] ?? [], $data['algorithm'] ?? 'sha1', $data['ejections'] ?? [] );
452 } else {
453 throw new UnexpectedValueException( __METHOD__ . ": unable to decode JSON." );
454 }
455 }
456}
Convenience class for weighted consistent hash rings.
Definition HashRing.php:44
getLiveLocations( $item, $limit)
Get the location of an item on the "live" ring, as well as the next locations.
Definition HashRing.php:258
int[] $ejectExpiryByLocation
Map of (location => UNIX timestamp)
Definition HashRing.php:50
const KEY_LOCATION
Definition HashRing.php:63
array[] null $liveRing
Non-empty position-ordered list of (position, location name)
Definition HashRing.php:55
getLiveLocationWeights()
Get the map of "live" locations to weight (does not include zero weight items)
Definition HashRing.php:268
array[] $baseRing
Non-empty position-ordered list of (position, location name)
Definition HashRing.php:53
init(array $map, $algo, array $ejections)
Definition HashRing.php:87
getLiveLocation( $item)
Get the location of an item on the "live" ring.
Definition HashRing.php:246
int[] $weightByLocation
Non-empty (location => integer weight)
Definition HashRing.php:48
getCurrentTime()
Definition HashRing.php:437
getLocation( $item)
Get the location of an item on the ring.
Definition HashRing.php:112
__unserialize( $data)
Definition HashRing.php:449
const KEY_POS
Definition HashRing.php:62
__serialize()
Definition HashRing.php:441
getLocationWeights()
Get the map of locations to weight (does not include zero weight items)
Definition HashRing.php:214
getLocations( $item, $limit, $from=self::RING_ALL)
Get the location of an item on the ring followed by the next ring locations.
Definition HashRing.php:126
getLiveRing()
Get the "live" hash ring (which does not include ejected locations)
Definition HashRing.php:395
string $algo
Hashing algorithm for hash()
Definition HashRing.php:46
__construct(array $map, $algo='sha1', array $ejections=[])
Make a consistent hash ring given a set of locations and their weight values.
Definition HashRing.php:78
ejectFromLiveRing( $location, $ttl)
Remove a location from the "live" hash ring.
Definition HashRing.php:226