Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
| Total | |
76.75% |
175 / 228 |
|
50.00% |
11 / 22 |
CRAP | |
0.00% |
0 / 1 |
| GlobalIdGenerator | |
76.75% |
175 / 228 |
|
50.00% |
11 / 22 |
123.39 | |
0.00% |
0 / 1 |
| __construct | |
55.00% |
11 / 20 |
|
0.00% |
0 / 1 |
11.47 | |||
| newTimestampedUID88 | |
100.00% |
5 / 5 |
|
100.00% |
1 / 1 |
1 | |||
| getTimestampedID88 | |
87.50% |
7 / 8 |
|
0.00% |
0 / 1 |
2.01 | |||
| newTimestampedUID128 | |
100.00% |
5 / 5 |
|
100.00% |
1 / 1 |
1 | |||
| getTimestampedID128 | |
90.00% |
9 / 10 |
|
0.00% |
0 / 1 |
2.00 | |||
| newUUIDv1 | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
| getUUIDv1 | |
95.65% |
22 / 23 |
|
0.00% |
0 / 1 |
2 | |||
| newRawUUIDv1 | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
| newUUIDv4 | |
100.00% |
8 / 8 |
|
100.00% |
1 / 1 |
1 | |||
| newRawUUIDv4 | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
| newSequentialPerNodeID | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
| newSequentialPerNodeIDs | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
| getTimestampFromUUIDv1 | |
100.00% |
10 / 10 |
|
100.00% |
1 / 1 |
2 | |||
| getSequentialPerNodeIDs | |
81.48% |
22 / 27 |
|
0.00% |
0 / 1 |
9.51 | |||
| getTimeAndDelay | |
79.17% |
38 / 48 |
|
0.00% |
0 / 1 |
9.73 | |||
| timeWaitUntil | |
66.67% |
4 / 6 |
|
0.00% |
0 / 1 |
2.15 | |||
| millisecondsSinceEpochBinary | |
66.67% |
4 / 6 |
|
0.00% |
0 / 1 |
2.15 | |||
| intervalsSinceGregorianBinary | |
31.58% |
6 / 19 |
|
0.00% |
0 / 1 |
9.12 | |||
| load | |
65.22% |
15 / 23 |
|
0.00% |
0 / 1 |
12.41 | |||
| getNodeId32 | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
| getNodeId48 | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
| deleteCacheFiles | n/a |
0 / 0 |
n/a |
0 / 0 |
5 | |||||
| unitTestTearDown | n/a |
0 / 0 |
n/a |
0 / 0 |
1 | |||||
| __destruct | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
| 1 | <?php |
| 2 | /** |
| 3 | * @license GPL-2.0-or-later |
| 4 | * @file |
| 5 | */ |
| 6 | |
| 7 | namespace Wikimedia\UUID; |
| 8 | |
| 9 | use InvalidArgumentException; |
| 10 | use RuntimeException; |
| 11 | use Wikimedia\Assert\Assert; |
| 12 | use Wikimedia\AtEase\AtEase; |
| 13 | use Wikimedia\Timestamp\ConvertibleTimestamp; |
| 14 | |
| 15 | /** |
| 16 | * Class for getting statistically unique IDs without a central coordinator |
| 17 | * |
| 18 | * @since 1.35 |
| 19 | */ |
| 20 | class GlobalIdGenerator { |
| 21 | /** @var callable Callback for running shell commands */ |
| 22 | protected $shellCallback; |
| 23 | |
| 24 | /** @var string Temporary directory */ |
| 25 | protected $tmpDir; |
| 26 | /** @var string |
| 27 | * File prefix containing user ID to prevent collisions |
| 28 | * if multiple users run MediaWiki (T268420) and getmyuid() is enabled |
| 29 | */ |
| 30 | protected $uniqueFilePrefix; |
| 31 | /** @var string Local file path */ |
| 32 | protected $nodeIdFile; |
| 33 | /** @var string Node ID in binary (32 bits) */ |
| 34 | protected $nodeId32; |
| 35 | /** @var string Node ID in binary (48 bits) */ |
| 36 | protected $nodeId48; |
| 37 | |
| 38 | /** @var bool Whether initialization completed */ |
| 39 | protected $loaded = false; |
| 40 | /** @var string Local file path */ |
| 41 | protected $lockFile88; |
| 42 | /** @var string Local file path */ |
| 43 | protected $lockFile128; |
| 44 | /** @var string Local file path */ |
| 45 | protected $lockFileUUID; |
| 46 | |
| 47 | /** @var array Cached file handles */ |
| 48 | protected $fileHandles = []; |
| 49 | |
| 50 | /** |
| 51 | * Avoid using __CLASS__ so namespace separators aren't interpreted |
| 52 | * as path components on Windows (T259693) |
| 53 | */ |
| 54 | private const FILE_PREFIX = 'mw-GlobalIdGenerator'; |
| 55 | |
| 56 | /** Key used in the serialized clock state map that is stored on disk */ |
| 57 | private const CLOCK_TIME = 'time'; |
| 58 | /** Key used in the serialized clock state map that is stored on disk */ |
| 59 | private const CLOCK_COUNTER = 'counter'; |
| 60 | /** Key used in the serialized clock state map that is stored on disk */ |
| 61 | private const CLOCK_SEQUENCE = 'clkSeq'; |
| 62 | /** Key used in the serialized clock state map that is stored on disk */ |
| 63 | private const CLOCK_OFFSET = 'offset'; |
| 64 | /** Key used in the serialized clock state map that is stored on disk */ |
| 65 | private const CLOCK_OFFSET_COUNTER = 'offsetCounter'; |
| 66 | |
| 67 | /** |
| 68 | * @param string|bool $tempDirectory A writable temporary directory |
| 69 | * @param callback $shellCallback A callback that takes a shell command and returns the output |
| 70 | */ |
| 71 | public function __construct( $tempDirectory, $shellCallback ) { |
| 72 | if ( func_num_args() >= 3 && !is_callable( $shellCallback ) ) { |
| 73 | trigger_error( |
| 74 | __CLASS__ . ' with a BagOStuff instance was deprecated in MediaWiki 1.37.', |
| 75 | E_USER_DEPRECATED |
| 76 | ); |
| 77 | $shellCallback = func_get_arg( 2 ); |
| 78 | } |
| 79 | if ( $tempDirectory === false || $tempDirectory === '' ) { |
| 80 | throw new InvalidArgumentException( "No temp directory provided" ); |
| 81 | } |
| 82 | $this->tmpDir = $tempDirectory; |
| 83 | // Include the UID in the filename (T268420, T358768) |
| 84 | if ( function_exists( 'posix_geteuid' ) ) { |
| 85 | $fileSuffix = posix_geteuid(); |
| 86 | } elseif ( function_exists( 'getmyuid' ) ) { |
| 87 | $fileSuffix = getmyuid(); |
| 88 | } else { |
| 89 | $fileSuffix = ''; |
| 90 | } |
| 91 | $this->uniqueFilePrefix = self::FILE_PREFIX . $fileSuffix; |
| 92 | $this->nodeIdFile = $tempDirectory . '/' . $this->uniqueFilePrefix . '-UID-nodeid'; |
| 93 | // If different processes run as different users, they may have different temp dirs. |
| 94 | // This is dealt with by initializing the clock sequence number and counters randomly. |
| 95 | $this->lockFile88 = $tempDirectory . '/' . $this->uniqueFilePrefix . '-UID-88'; |
| 96 | $this->lockFile128 = $tempDirectory . '/' . $this->uniqueFilePrefix . '-UID-128'; |
| 97 | $this->lockFileUUID = $tempDirectory . '/' . $this->uniqueFilePrefix . '-UUID-128'; |
| 98 | |
| 99 | $this->shellCallback = $shellCallback; |
| 100 | } |
| 101 | |
| 102 | /** |
| 103 | * Get a statistically unique 88-bit unsigned integer ID string. |
| 104 | * The bits of the UID are prefixed with the time (down to the millisecond). |
| 105 | * |
| 106 | * These IDs are suitable as values for the shard key of distributed data. |
| 107 | * If a column uses these as values, it should be declared UNIQUE to handle collisions. |
| 108 | * New rows almost always have higher UIDs, which makes B-TREE updates on INSERT fast. |
| 109 | * They can also be stored "DECIMAL(27) UNSIGNED" or BINARY(11) in MySQL. |
| 110 | * |
| 111 | * UID generation is serialized on each server (as the node ID is for the whole machine). |
| 112 | * |
| 113 | * @param int $base Specifies a base other than 10 |
| 114 | * @return string Number |
| 115 | * @throws RuntimeException |
| 116 | */ |
| 117 | public function newTimestampedUID88( int $base = 10 ) { |
| 118 | Assert::parameter( $base <= 36, '$base', 'must be <= 36' ); |
| 119 | Assert::parameter( $base >= 2, '$base', 'must be >= 2' ); |
| 120 | |
| 121 | $info = $this->getTimeAndDelay( 'lockFile88', 1, 1024, 1024 ); |
| 122 | $info[self::CLOCK_OFFSET_COUNTER] %= 1024; |
| 123 | |
| 124 | return \Wikimedia\base_convert( $this->getTimestampedID88( $info ), 2, $base ); |
| 125 | } |
| 126 | |
| 127 | /** |
| 128 | * @param array $info result of GlobalIdGenerator::getTimeAndDelay() |
| 129 | * @return string 88 bits |
| 130 | * @throws RuntimeException |
| 131 | */ |
| 132 | protected function getTimestampedID88( array $info ) { |
| 133 | $time = $info[self::CLOCK_TIME]; |
| 134 | $counter = $info[self::CLOCK_OFFSET_COUNTER]; |
| 135 | // Take the 46 LSBs of "milliseconds since epoch" |
| 136 | $id_bin = $this->millisecondsSinceEpochBinary( $time ); |
| 137 | // Add a 10 bit counter resulting in 56 bits total |
| 138 | $id_bin .= str_pad( decbin( $counter ), 10, '0', STR_PAD_LEFT ); |
| 139 | // Add the 32 bit node ID resulting in 88 bits total |
| 140 | $id_bin .= $this->getNodeId32(); |
| 141 | // Convert to a 1-27 digit integer string |
| 142 | if ( strlen( $id_bin ) !== 88 ) { |
| 143 | throw new RuntimeException( "Detected overflow for millisecond timestamp." ); |
| 144 | } |
| 145 | |
| 146 | return $id_bin; |
| 147 | } |
| 148 | |
| 149 | /** |
| 150 | * Get a statistically unique 128-bit unsigned integer ID string. |
| 151 | * The bits of the UID are prefixed with the time (down to the millisecond). |
| 152 | * |
| 153 | * These IDs are suitable as globally unique IDs, without any enforced uniqueness. |
| 154 | * New rows almost always have higher UIDs, which makes B-TREE updates on INSERT fast. |
| 155 | * They can also be stored as "DECIMAL(39) UNSIGNED" or BINARY(16) in MySQL. |
| 156 | * |
| 157 | * UID generation is serialized on each server (as the node ID is for the whole machine). |
| 158 | * |
| 159 | * @param int $base Specifies a base other than 10 |
| 160 | * @return string Number |
| 161 | * @throws RuntimeException |
| 162 | */ |
| 163 | public function newTimestampedUID128( int $base = 10 ) { |
| 164 | Assert::parameter( $base <= 36, '$base', 'must be <= 36' ); |
| 165 | Assert::parameter( $base >= 2, '$base', 'must be >= 2' ); |
| 166 | |
| 167 | $info = $this->getTimeAndDelay( 'lockFile128', 16384, 1_048_576, 1_048_576 ); |
| 168 | $info[self::CLOCK_OFFSET_COUNTER] %= 1_048_576; |
| 169 | |
| 170 | return \Wikimedia\base_convert( $this->getTimestampedID128( $info ), 2, $base ); |
| 171 | } |
| 172 | |
| 173 | /** |
| 174 | * @param array $info The result of GlobalIdGenerator::getTimeAndDelay() |
| 175 | * @return string 128 bits |
| 176 | * @throws RuntimeException |
| 177 | */ |
| 178 | protected function getTimestampedID128( array $info ) { |
| 179 | $time = $info[self::CLOCK_TIME]; |
| 180 | $counter = $info[self::CLOCK_OFFSET_COUNTER]; |
| 181 | $clkSeq = $info[self::CLOCK_SEQUENCE]; |
| 182 | // Take the 46 LSBs of "milliseconds since epoch" |
| 183 | $id_bin = $this->millisecondsSinceEpochBinary( $time ); |
| 184 | // Add a 20 bit counter resulting in 66 bits total |
| 185 | $id_bin .= str_pad( decbin( $counter ), 20, '0', STR_PAD_LEFT ); |
| 186 | // Add a 14 bit clock sequence number resulting in 80 bits total |
| 187 | $id_bin .= str_pad( decbin( $clkSeq ), 14, '0', STR_PAD_LEFT ); |
| 188 | // Add the 48 bit node ID resulting in 128 bits total |
| 189 | $id_bin .= $this->getNodeId48(); |
| 190 | // Convert to a 1-39 digit integer string |
| 191 | if ( strlen( $id_bin ) !== 128 ) { |
| 192 | throw new RuntimeException( "Detected overflow for millisecond timestamp." ); |
| 193 | } |
| 194 | |
| 195 | return $id_bin; |
| 196 | } |
| 197 | |
| 198 | /** |
| 199 | * Return an RFC4122 compliant v1 UUID |
| 200 | * |
| 201 | * @return string |
| 202 | * @throws RuntimeException |
| 203 | */ |
| 204 | public function newUUIDv1() { |
| 205 | // There can be up to 10000 intervals for the same millisecond timestamp. |
| 206 | // [0,4999] counter + [0,5000] offset is in [0,9999] for the offset counter. |
| 207 | // Add this onto the timestamp to allow making up to 5000 IDs per second. |
| 208 | return $this->getUUIDv1( $this->getTimeAndDelay( 'lockFileUUID', 16384, 5000, 5001 ) ); |
| 209 | } |
| 210 | |
| 211 | /** |
| 212 | * @param array $info Result of GlobalIdGenerator::getTimeAndDelay() |
| 213 | * @return string 128 bits |
| 214 | */ |
| 215 | protected function getUUIDv1( array $info ) { |
| 216 | $clkSeq_bin = \Wikimedia\base_convert( $info[self::CLOCK_SEQUENCE], 10, 2, 14 ); |
| 217 | $time_bin = $this->intervalsSinceGregorianBinary( |
| 218 | $info[self::CLOCK_TIME], |
| 219 | $info[self::CLOCK_OFFSET_COUNTER] |
| 220 | ); |
| 221 | // Take the 32 bits of "time low" |
| 222 | $id_bin = substr( $time_bin, 28, 32 ); |
| 223 | // Add 16 bits of "time mid" resulting in 48 bits total |
| 224 | $id_bin .= substr( $time_bin, 12, 16 ); |
| 225 | // Add 4 bit version resulting in 52 bits total |
| 226 | $id_bin .= '0001'; |
| 227 | // Add 12 bits of "time high" resulting in 64 bits total |
| 228 | $id_bin .= substr( $time_bin, 0, 12 ); |
| 229 | // Add 2 bits of "variant" resulting in 66 bits total |
| 230 | $id_bin .= '10'; |
| 231 | // Add 6 bits of "clock seq high" resulting in 72 bits total |
| 232 | $id_bin .= substr( $clkSeq_bin, 0, 6 ); |
| 233 | // Add 8 bits of "clock seq low" resulting in 80 bits total |
| 234 | $id_bin .= substr( $clkSeq_bin, 6, 8 ); |
| 235 | // Add the 48 bit node ID resulting in 128 bits total |
| 236 | $id_bin .= $this->getNodeId48(); |
| 237 | // Convert to a 32 char hex string with dashes |
| 238 | if ( strlen( $id_bin ) !== 128 ) { |
| 239 | throw new RuntimeException( "Detected overflow for millisecond timestamp." ); |
| 240 | } |
| 241 | $hex = \Wikimedia\base_convert( $id_bin, 2, 16, 32 ); |
| 242 | return sprintf( '%s-%s-%s-%s-%s', |
| 243 | // "time_low" (32 bits) |
| 244 | substr( $hex, 0, 8 ), |
| 245 | // "time_mid" (16 bits) |
| 246 | substr( $hex, 8, 4 ), |
| 247 | // "time_hi_and_version" (16 bits) |
| 248 | substr( $hex, 12, 4 ), |
| 249 | // "clk_seq_hi_res" (8 bits) and "clk_seq_low" (8 bits) |
| 250 | substr( $hex, 16, 4 ), |
| 251 | // "node" (48 bits) |
| 252 | substr( $hex, 20, 12 ) |
| 253 | ); |
| 254 | } |
| 255 | |
| 256 | /** |
| 257 | * Return an RFC4122 compliant v1 UUID |
| 258 | * |
| 259 | * @return string 32 hex characters with no hyphens |
| 260 | * @throws RuntimeException |
| 261 | */ |
| 262 | public function newRawUUIDv1() { |
| 263 | return str_replace( '-', '', $this->newUUIDv1() ); |
| 264 | } |
| 265 | |
| 266 | /** |
| 267 | * Return an RFC4122 compliant v4 UUID |
| 268 | * |
| 269 | * @return string |
| 270 | * @throws RuntimeException |
| 271 | */ |
| 272 | public function newUUIDv4() { |
| 273 | $hex = bin2hex( random_bytes( 32 / 2 ) ); |
| 274 | |
| 275 | return sprintf( '%s-%s-%s-%s-%s', |
| 276 | // "time_low" (32 bits) |
| 277 | substr( $hex, 0, 8 ), |
| 278 | // "time_mid" (16 bits) |
| 279 | substr( $hex, 8, 4 ), |
| 280 | // "time_hi_and_version" (16 bits) |
| 281 | '4' . substr( $hex, 12, 3 ), |
| 282 | // "clk_seq_hi_res" (8 bits, variant is binary 10x) and "clk_seq_low" (8 bits) |
| 283 | dechex( 0x8 | ( hexdec( $hex[15] ) & 0x3 ) ) . $hex[16] . substr( $hex, 17, 2 ), |
| 284 | // "node" (48 bits) |
| 285 | substr( $hex, 19, 12 ) |
| 286 | ); |
| 287 | } |
| 288 | |
| 289 | /** |
| 290 | * Return an RFC4122 compliant v4 UUID |
| 291 | * |
| 292 | * @return string 32 hex characters with no hyphens |
| 293 | * @throws RuntimeException |
| 294 | */ |
| 295 | public function newRawUUIDv4() { |
| 296 | return str_replace( '-', '', $this->newUUIDv4() ); |
| 297 | } |
| 298 | |
| 299 | /** |
| 300 | * Return an ID that is sequential *only* for this node and bucket |
| 301 | * |
| 302 | * These IDs are suitable for per-host sequence numbers, e.g. for some packet protocols. |
| 303 | * |
| 304 | * @param string $bucket Arbitrary bucket name (should be ASCII) |
| 305 | * @param int $bits Bit size (<=48) of resulting numbers before wrap-around |
| 306 | * @return float Integer value as float |
| 307 | */ |
| 308 | public function newSequentialPerNodeID( $bucket, $bits = 48 ) { |
| 309 | return current( $this->newSequentialPerNodeIDs( $bucket, $bits, 1 ) ); |
| 310 | } |
| 311 | |
| 312 | /** |
| 313 | * Return IDs that are sequential *only* for this node and bucket |
| 314 | * |
| 315 | * @param string $bucket Arbitrary bucket name (should be ASCII) |
| 316 | * @param int $bits Bit size (16 to 48) of resulting numbers before wrap-around |
| 317 | * @param int $count Number of IDs to return |
| 318 | * @return array Ordered list of float integer values |
| 319 | * @see GlobalIdGenerator::newSequentialPerNodeID() |
| 320 | */ |
| 321 | public function newSequentialPerNodeIDs( $bucket, $bits, $count ) { |
| 322 | return $this->getSequentialPerNodeIDs( $bucket, $bits, $count ); |
| 323 | } |
| 324 | |
| 325 | /** |
| 326 | * Get timestamp in a specified format from UUIDv1 |
| 327 | * |
| 328 | * @param string $uuid the UUID to get the timestamp from |
| 329 | * @param int $format the format to convert the timestamp to. Default: TS_MW |
| 330 | * @return string|false timestamp in requested format or false |
| 331 | */ |
| 332 | public function getTimestampFromUUIDv1( string $uuid, int $format = TS_MW ) { |
| 333 | $components = []; |
| 334 | if ( !preg_match( |
| 335 | '/^([0-9a-f]{8})-([0-9a-f]{4})-(1[0-9a-f]{3})-([89ab][0-9a-f]{3})-([0-9a-f]{12})$/', |
| 336 | $uuid, |
| 337 | $components |
| 338 | ) ) { |
| 339 | throw new InvalidArgumentException( "Invalid UUIDv1 {$uuid}" ); |
| 340 | } |
| 341 | |
| 342 | $timestamp = hexdec( substr( $components[3], 1 ) . $components[2] . $components[1] ); |
| 343 | // The 60 bit timestamp value is constructed from fields of this UUID. |
| 344 | // The timestamp is measured in 100-nanosecond units since midnight, October 15, 1582 UTC. |
| 345 | $unixTime = ( $timestamp - 0x01b21dd213814000 ) / 1e7; |
| 346 | |
| 347 | return ConvertibleTimestamp::convert( $format, $unixTime ); |
| 348 | } |
| 349 | |
| 350 | /** |
| 351 | * Return IDs that are sequential *only* for this node and bucket |
| 352 | * |
| 353 | * @param string $bucket Arbitrary bucket name (should be ASCII) |
| 354 | * @param int $bits Bit size (16 to 48) of resulting numbers before wrap-around |
| 355 | * @param int $count Number of IDs to return |
| 356 | * @return array Ordered list of float integer values |
| 357 | * @throws RuntimeException |
| 358 | * @see GlobalIdGenerator::newSequentialPerNodeID() |
| 359 | */ |
| 360 | protected function getSequentialPerNodeIDs( $bucket, $bits, $count ) { |
| 361 | if ( $count <= 0 ) { |
| 362 | return []; |
| 363 | } |
| 364 | if ( $bits < 16 || $bits > 48 ) { |
| 365 | throw new RuntimeException( "Requested bit size ($bits) is out of range." ); |
| 366 | } |
| 367 | |
| 368 | $path = $this->tmpDir . '/' . $this->uniqueFilePrefix . '-' . rawurlencode( $bucket ) . '-48'; |
| 369 | // Get the UID lock file handle |
| 370 | if ( isset( $this->fileHandles[$path] ) ) { |
| 371 | $handle = $this->fileHandles[$path]; |
| 372 | } else { |
| 373 | $handle = fopen( $path, 'cb+' ); |
| 374 | $this->fileHandles[$path] = $handle ?: null; |
| 375 | } |
| 376 | // Acquire the UID lock file |
| 377 | if ( $handle === false ) { |
| 378 | throw new RuntimeException( "Could not open '{$path}'." ); |
| 379 | } |
| 380 | if ( !flock( $handle, LOCK_EX ) ) { |
| 381 | fclose( $handle ); |
| 382 | throw new RuntimeException( "Could not acquire '{$path}'." ); |
| 383 | } |
| 384 | // Fetch the counter value and increment it... |
| 385 | rewind( $handle ); |
| 386 | |
| 387 | // fetch as float |
| 388 | $counter = floor( (float)trim( fgets( $handle ) ) ) + $count; |
| 389 | |
| 390 | // Write back the new counter value |
| 391 | ftruncate( $handle, 0 ); |
| 392 | rewind( $handle ); |
| 393 | |
| 394 | // Use fmod() to avoid "division by zero" on 32 bit machines |
| 395 | // warp-around as needed |
| 396 | fwrite( $handle, (string)fmod( $counter, 2 ** 48 ) ); |
| 397 | fflush( $handle ); |
| 398 | |
| 399 | // Release the UID lock file |
| 400 | flock( $handle, LOCK_UN ); |
| 401 | |
| 402 | $ids = []; |
| 403 | $divisor = 2 ** $bits; |
| 404 | |
| 405 | // pre-increment counter value |
| 406 | $currentId = floor( $counter - $count ); |
| 407 | for ( $i = 0; $i < $count; ++$i ) { |
| 408 | // Use fmod() to avoid "division by zero" on 32 bit machines |
| 409 | $ids[] = fmod( ++$currentId, $divisor ); |
| 410 | } |
| 411 | |
| 412 | return $ids; |
| 413 | } |
| 414 | |
| 415 | /** |
| 416 | * Get a (time,counter,clock sequence) where (time,counter) is higher |
| 417 | * than any previous (time,counter) value for the given clock sequence. |
| 418 | * This is useful for making UIDs sequential on a per-node basis. |
| 419 | * |
| 420 | * @param string $lockFile Name of a local lock file |
| 421 | * @param int $clockSeqSize The number of possible clock sequence values |
| 422 | * @param int $counterSize The number of possible counter values |
| 423 | * @param int $offsetSize The number of possible offset values |
| 424 | * @return array Array with the following keys: |
| 425 | * - GlobalIdGenerator::CLOCK_TIME: (integer seconds, integer milliseconds) array |
| 426 | * - GlobalIdGenerator::CLOCK_COUNTER: integer millisecond tie-breaking counter |
| 427 | * - GlobalIdGenerator::CLOCK_SEQUENCE: integer clock identifier that is local to the node |
| 428 | * - GlobalIdGenerator::CLOCK_OFFSET: integer offset for millisecond tie-breaking counter |
| 429 | * - GlobalIdGenerator::CLOCK_OFFSET_COUNTER: integer; CLOCK_COUNTER with CLOCK_OFFSET applied |
| 430 | * @throws RuntimeException |
| 431 | */ |
| 432 | protected function getTimeAndDelay( $lockFile, $clockSeqSize, $counterSize, $offsetSize ) { |
| 433 | // Get the UID lock file handle |
| 434 | if ( isset( $this->fileHandles[$this->$lockFile] ) ) { |
| 435 | $handle = $this->fileHandles[$this->$lockFile]; |
| 436 | } else { |
| 437 | $handle = fopen( $this->$lockFile, 'cb+' ); |
| 438 | $this->fileHandles[$this->$lockFile] = $handle ?: null; |
| 439 | } |
| 440 | // Acquire the UID lock file |
| 441 | if ( $handle === false ) { |
| 442 | throw new RuntimeException( "Could not open '{$this->$lockFile}'." ); |
| 443 | } |
| 444 | if ( !flock( $handle, LOCK_EX ) ) { |
| 445 | fclose( $handle ); |
| 446 | throw new RuntimeException( "Could not acquire '{$this->$lockFile}'." ); |
| 447 | } |
| 448 | |
| 449 | // The formatters that use this method expect a timestamp with millisecond |
| 450 | // precision and a counter upto a certain size. When more IDs than the counter |
| 451 | // size are generated during the same timestamp, an exception is thrown as we |
| 452 | // cannot increment further, because the formatted ID would not have enough |
| 453 | // bits to fit the counter. |
| 454 | // |
| 455 | // To orchestrate this between independent PHP processes on the same host, |
| 456 | // we must have a common sense of time so that we only have to maintain |
| 457 | // a single counter in a single lock file. |
| 458 | // |
| 459 | // Given that: |
| 460 | // * The system clock can be observed via time(), without milliseconds. |
| 461 | // * Some other clock can be observed via microtime(), which also offers |
| 462 | // millisecond precision. |
| 463 | // * microtime() drifts in-process further and further away from the system |
| 464 | // clock the longer a process runs for. |
| 465 | // For example, on 2018-10-03 an HHVM 3.18 JobQueue process at WMF, |
| 466 | // that ran for 9 min 55 sec, microtime drifted by 7 seconds. |
| 467 | // time() does not have this problem. See https://bugs.php.net/bug.php?id=42659. |
| 468 | // |
| 469 | // We have two choices: |
| 470 | // |
| 471 | // 1. Use microtime() with the following caveats: |
| 472 | // - The last stored time may be in the future, or our current time may be in the |
| 473 | // past, in which case we'll frequently enter the slow timeWaitUntil() method to |
| 474 | // try and "sync" the current process with the previous process. |
| 475 | // We mustn't block for long though, max 10ms? |
| 476 | // - For any drift above 10ms, we pretend that the clock went backwards, and treat |
| 477 | // it the same way as after an NTP sync, by incrementing clock sequence instead. |
| 478 | // Given the sequence rolls over automatically, and silently, and is meant to be |
| 479 | // rare, this essentially sacrifices a reasonable guarantee of uniqueness. |
| 480 | // - For long running processes (e.g. longer than a few seconds) the drift can |
| 481 | // easily be more than 2 seconds. Because we only have a single lock file |
| 482 | // and don't want to keep too many counters and deal with clearing those, |
| 483 | // we fatal the user and refuse to make an ID. (T94522) |
| 484 | // - This offers terrible service availability. |
| 485 | // 2. Use time() instead, and expand the counter size by 1000x and use its |
| 486 | // digits as if they were the millisecond fraction of our timestamp. |
| 487 | // Known caveats or perf impact: None. We still need to read-write our |
| 488 | // lock file on each generation, so might as well make the most of it. |
| 489 | // |
| 490 | // We choose the latter. |
| 491 | $msecCounterSize = $counterSize * 1000; |
| 492 | |
| 493 | rewind( $handle ); |
| 494 | // Format of lock file contents: |
| 495 | // "<clk seq> <sec> <msec counter> <rand offset>" |
| 496 | $data = explode( ' ', fgets( $handle ) ); |
| 497 | |
| 498 | if ( count( $data ) === 4 ) { |
| 499 | // The UID lock file was already initialized |
| 500 | $clkSeq = (int)$data[0] % $clockSeqSize; |
| 501 | $prevSec = (int)$data[1]; |
| 502 | $prevMsecCounter = (int)$data[2] % $msecCounterSize; |
| 503 | $randOffset = (int)$data[3] % $counterSize; |
| 504 | // If the system clock moved back or inter-process clock drift caused the last |
| 505 | // writer process to record a higher time than the current process time, then |
| 506 | // briefly wait for the current process clock to catch up. |
| 507 | $sec = $this->timeWaitUntil( $prevSec ); |
| 508 | if ( $sec === false ) { |
| 509 | // There was too much clock drift to wait. Bump the clock sequence number to |
| 510 | // avoid collisions between new and already-generated IDs with the same time. |
| 511 | $clkSeq = ( $clkSeq + 1 ) % $clockSeqSize; |
| 512 | $sec = time(); |
| 513 | $msecCounter = 0; |
| 514 | $randOffset = random_int( 0, $offsetSize - 1 ); |
| 515 | trigger_error( "Clock was set back; sequence number incremented." ); |
| 516 | } elseif ( $sec === $prevSec ) { |
| 517 | // The time matches the last ID. Bump the tie-breaking counter. |
| 518 | $msecCounter = $prevMsecCounter + 1; |
| 519 | if ( $msecCounter >= $msecCounterSize ) { |
| 520 | // More IDs generated with the same time than counterSize can accommodate |
| 521 | flock( $handle, LOCK_UN ); |
| 522 | throw new RuntimeException( "Counter overflow for timestamp value." ); |
| 523 | } |
| 524 | } else { |
| 525 | // The time is higher than the last ID. Reset the tie-breaking counter. |
| 526 | $msecCounter = 0; |
| 527 | } |
| 528 | } else { |
| 529 | // Initialize UID lock file information |
| 530 | $clkSeq = random_int( 0, $clockSeqSize - 1 ); |
| 531 | $sec = time(); |
| 532 | $msecCounter = 0; |
| 533 | $randOffset = random_int( 0, $offsetSize - 1 ); |
| 534 | } |
| 535 | |
| 536 | // Update and release the UID lock file |
| 537 | ftruncate( $handle, 0 ); |
| 538 | rewind( $handle ); |
| 539 | fwrite( $handle, "{$clkSeq} {$sec} {$msecCounter} {$randOffset}" ); |
| 540 | fflush( $handle ); |
| 541 | flock( $handle, LOCK_UN ); |
| 542 | |
| 543 | // Split msecCounter back into msec and counter |
| 544 | $msec = (int)( $msecCounter / 1000 ); |
| 545 | $counter = $msecCounter % 1000; |
| 546 | |
| 547 | return [ |
| 548 | self::CLOCK_TIME => [ $sec, $msec ], |
| 549 | self::CLOCK_COUNTER => $counter, |
| 550 | self::CLOCK_SEQUENCE => $clkSeq, |
| 551 | self::CLOCK_OFFSET => $randOffset, |
| 552 | self::CLOCK_OFFSET_COUNTER => $counter + $randOffset, |
| 553 | ]; |
| 554 | } |
| 555 | |
| 556 | /** |
| 557 | * Wait till the current timestamp reaches $time and return the current |
| 558 | * timestamp. This returns false if it would have to wait more than 10ms. |
| 559 | * |
| 560 | * @param int $time Result of time() |
| 561 | * @return int|bool Timestamp or false |
| 562 | */ |
| 563 | protected function timeWaitUntil( $time ) { |
| 564 | $start = microtime( true ); |
| 565 | do { |
| 566 | $ct = time(); |
| 567 | // https://www.php.net/manual/en/language.operators.comparison.php |
| 568 | if ( $ct >= $time ) { |
| 569 | // current time is higher than or equal to than $time |
| 570 | return $ct; |
| 571 | } |
| 572 | // up to 10ms |
| 573 | } while ( ( microtime( true ) - $start ) <= 0.010 ); |
| 574 | |
| 575 | return false; |
| 576 | } |
| 577 | |
| 578 | /** |
| 579 | * @param array $time Array of second and millisecond integers |
| 580 | * @return string 46 LSBs of "milliseconds since epoch" in binary (rolls over in 4201) |
| 581 | * @throws RuntimeException |
| 582 | */ |
| 583 | protected function millisecondsSinceEpochBinary( array $time ) { |
| 584 | [ $sec, $msec ] = $time; |
| 585 | $ts = 1000 * $sec + $msec; |
| 586 | if ( $ts > 2 ** 52 ) { |
| 587 | throw new RuntimeException( __METHOD__ . |
| 588 | ': sorry, this function doesn\'t work after the year 144680' ); |
| 589 | } |
| 590 | |
| 591 | return substr( \Wikimedia\base_convert( (string)$ts, 10, 2, 46 ), -46 ); |
| 592 | } |
| 593 | |
| 594 | /** |
| 595 | * @param array{0:int,1:int} $time Array of second and millisecond integers |
| 596 | * @param int $delta Number of intervals to add on to the timestamp |
| 597 | * @return string 60 bits of "100ns intervals since 15 October 1582" (rolls over in 3400) |
| 598 | * @throws RuntimeException |
| 599 | */ |
| 600 | protected function intervalsSinceGregorianBinary( array $time, $delta = 0 ) { |
| 601 | [ $sec, $msec ] = $time; |
| 602 | $offset = '122192928000000000'; |
| 603 | |
| 604 | // 64 bit integers |
| 605 | if ( PHP_INT_SIZE >= 8 ) { |
| 606 | $ts = ( 1000 * $sec + $msec ) * 10000 + (int)$offset + $delta; |
| 607 | $id_bin = str_pad( decbin( $ts % ( 2 ** 60 ) ), 60, '0', STR_PAD_LEFT ); |
| 608 | } elseif ( extension_loaded( 'gmp' ) ) { |
| 609 | // ms |
| 610 | $ts = gmp_add( gmp_mul( (string)$sec, '1000' ), (string)$msec ); |
| 611 | // 100ns intervals |
| 612 | $ts = gmp_add( gmp_mul( $ts, '10000' ), $offset ); |
| 613 | $ts = gmp_add( $ts, (string)$delta ); |
| 614 | // wrap around |
| 615 | $ts = gmp_mod( $ts, gmp_pow( '2', 60 ) ); |
| 616 | $id_bin = str_pad( gmp_strval( $ts, 2 ), 60, '0', STR_PAD_LEFT ); |
| 617 | } elseif ( extension_loaded( 'bcmath' ) ) { |
| 618 | // ms |
| 619 | $ts = bcadd( bcmul( (string)$sec, '1000' ), (string)$msec ); |
| 620 | // 100ns intervals |
| 621 | $ts = bcadd( bcmul( $ts, '10000' ), $offset ); |
| 622 | $ts = bcadd( $ts, (string)$delta ); |
| 623 | // wrap around |
| 624 | $ts = bcmod( $ts, bcpow( '2', '60' ) ); |
| 625 | $id_bin = \Wikimedia\base_convert( $ts, 10, 2, 60 ); |
| 626 | } else { |
| 627 | throw new RuntimeException( 'bcmath or gmp extension required for 32 bit machines.' ); |
| 628 | } |
| 629 | return $id_bin; |
| 630 | } |
| 631 | |
| 632 | /** |
| 633 | * Load the node ID information |
| 634 | */ |
| 635 | private function load() { |
| 636 | if ( $this->loaded ) { |
| 637 | return; |
| 638 | } |
| 639 | |
| 640 | $this->loaded = true; |
| 641 | |
| 642 | // phpcs:ignore Generic.PHP.NoSilencedErrors.Discouraged |
| 643 | $nodeId = @file_get_contents( $this->nodeIdFile ) ?: ''; |
| 644 | // Try to get some ID that uniquely identifies this machine (RFC 4122)... |
| 645 | if ( !preg_match( '/^[0-9a-f]{12}$/i', $nodeId ) ) { |
| 646 | AtEase::suppressWarnings(); |
| 647 | if ( PHP_OS_FAMILY === 'Windows' ) { |
| 648 | // https://technet.microsoft.com/en-us/library/bb490913.aspx |
| 649 | $csv = trim( ( $this->shellCallback )( 'getmac /NH /FO CSV' ) ); |
| 650 | $line = substr( $csv, 0, strcspn( $csv, "\n" ) ); |
| 651 | $info = str_getcsv( $line, ",", "\"", "\\" ); |
| 652 | // @phan-suppress-next-line PhanTypeMismatchArgumentNullableInternal False positive |
| 653 | $nodeId = isset( $info[0] ) ? str_replace( '-', '', $info[0] ) : ''; |
| 654 | } elseif ( is_executable( '/sbin/ifconfig' ) ) { |
| 655 | // Linux/BSD/Solaris/OS X |
| 656 | // See https://linux.die.net/man/8/ifconfig |
| 657 | $m = []; |
| 658 | preg_match( '/\s([0-9a-f]{2}(?::[0-9a-f]{2}){5})\s/', |
| 659 | ( $this->shellCallback )( '/sbin/ifconfig -a' ), $m ); |
| 660 | $nodeId = isset( $m[1] ) ? str_replace( ':', '', $m[1] ) : ''; |
| 661 | } |
| 662 | AtEase::restoreWarnings(); |
| 663 | if ( !preg_match( '/^[0-9a-f]{12}$/i', $nodeId ) ) { |
| 664 | $nodeId = bin2hex( random_bytes( 12 / 2 ) ); |
| 665 | // set multicast bit |
| 666 | $nodeId[1] = dechex( hexdec( $nodeId[1] ) | 0x1 ); |
| 667 | } |
| 668 | file_put_contents( $this->nodeIdFile, $nodeId ); |
| 669 | } |
| 670 | $this->nodeId32 = \Wikimedia\base_convert( substr( sha1( $nodeId ), 0, 8 ), 16, 2, 32 ); |
| 671 | $this->nodeId48 = \Wikimedia\base_convert( $nodeId, 16, 2, 48 ); |
| 672 | } |
| 673 | |
| 674 | /** |
| 675 | * @return string |
| 676 | */ |
| 677 | private function getNodeId32() { |
| 678 | $this->load(); |
| 679 | |
| 680 | return $this->nodeId32; |
| 681 | } |
| 682 | |
| 683 | /** |
| 684 | * @return string |
| 685 | */ |
| 686 | private function getNodeId48() { |
| 687 | $this->load(); |
| 688 | |
| 689 | return $this->nodeId48; |
| 690 | } |
| 691 | |
| 692 | /** |
| 693 | * Delete all cache files that have been created (T46850) |
| 694 | * |
| 695 | * This is a cleanup method primarily meant to be used from unit tests to |
| 696 | * avoid polluting the local filesystem. If used outside of a unit test |
| 697 | * environment it should be used with caution as it may destroy state saved |
| 698 | * in the files. |
| 699 | * |
| 700 | * @see unitTestTearDown |
| 701 | * @codeCoverageIgnore |
| 702 | */ |
| 703 | private function deleteCacheFiles() { |
| 704 | foreach ( $this->fileHandles as $path => $handle ) { |
| 705 | if ( $handle !== null ) { |
| 706 | fclose( $handle ); |
| 707 | } |
| 708 | if ( is_file( $path ) ) { |
| 709 | unlink( $path ); |
| 710 | } |
| 711 | unset( $this->fileHandles[$path] ); |
| 712 | } |
| 713 | if ( is_file( $this->nodeIdFile ) ) { |
| 714 | unlink( $this->nodeIdFile ); |
| 715 | } |
| 716 | } |
| 717 | |
| 718 | /** |
| 719 | * Cleanup resources when tearing down after a unit test (T46850) |
| 720 | * |
| 721 | * This is a cleanup method primarily meant to be used from unit tests to |
| 722 | * avoid polluting the local filesystem. If used outside of a unit test |
| 723 | * environment it should be used with caution as it may destroy state saved |
| 724 | * in the files. |
| 725 | * |
| 726 | * @internal For use by unit tests |
| 727 | * @see deleteCacheFiles |
| 728 | * @codeCoverageIgnore |
| 729 | */ |
| 730 | public function unitTestTearDown() { |
| 731 | $this->deleteCacheFiles(); |
| 732 | } |
| 733 | |
| 734 | public function __destruct() { |
| 735 | // @phan-suppress-next-line PhanPluginUseReturnValueInternalKnown |
| 736 | array_map( 'fclose', array_filter( $this->fileHandles ) ); |
| 737 | } |
| 738 | } |