MediaWiki REL1_34
UIDGenerator.php
Go to the documentation of this file.
1<?php
22use Wikimedia\Assert\Assert;
24
32 protected static $instance = null;
34 protected $nodeIdFile;
36 protected $nodeId32;
38 protected $nodeId48;
39
41 protected $lockFile88;
43 protected $lockFile128;
45 protected $lockFileUUID;
46
48 protected $fileHandles = []; // cached file handles
49
50 const QUICK_RAND = 1; // get randomness from fast and insecure sources
51 const QUICK_VOLATILE = 2; // use an APC like in-memory counter if available
52
53 protected function __construct() {
54 $this->nodeIdFile = wfTempDir() . '/mw-' . __CLASS__ . '-UID-nodeid';
55 $nodeId = '';
56 if ( is_file( $this->nodeIdFile ) ) {
57 $nodeId = file_get_contents( $this->nodeIdFile );
58 }
59 // Try to get some ID that uniquely identifies this machine (RFC 4122)...
60 if ( !preg_match( '/^[0-9a-f]{12}$/i', $nodeId ) ) {
61 Wikimedia\suppressWarnings();
62 if ( wfIsWindows() ) {
63 // https://technet.microsoft.com/en-us/library/bb490913.aspx
64 $csv = trim( wfShellExec( 'getmac /NH /FO CSV' ) );
65 $line = substr( $csv, 0, strcspn( $csv, "\n" ) );
66 $info = str_getcsv( $line );
67 $nodeId = isset( $info[0] ) ? str_replace( '-', '', $info[0] ) : '';
68 } elseif ( is_executable( '/sbin/ifconfig' ) ) { // Linux/BSD/Solaris/OS X
69 // See https://linux.die.net/man/8/ifconfig
70 $m = [];
71 preg_match( '/\s([0-9a-f]{2}(:[0-9a-f]{2}){5})\s/',
72 wfShellExec( '/sbin/ifconfig -a' ), $m );
73 $nodeId = isset( $m[1] ) ? str_replace( ':', '', $m[1] ) : '';
74 }
75 Wikimedia\restoreWarnings();
76 if ( !preg_match( '/^[0-9a-f]{12}$/i', $nodeId ) ) {
77 $nodeId = MWCryptRand::generateHex( 12 );
78 $nodeId[1] = dechex( hexdec( $nodeId[1] ) | 0x1 ); // set multicast bit
79 }
80 file_put_contents( $this->nodeIdFile, $nodeId ); // cache
81 }
82 $this->nodeId32 = Wikimedia\base_convert( substr( sha1( $nodeId ), 0, 8 ), 16, 2, 32 );
83 $this->nodeId48 = Wikimedia\base_convert( $nodeId, 16, 2, 48 );
84 // If different processes run as different users, they may have different temp dirs.
85 // This is dealt with by initializing the clock sequence number and counters randomly.
86 $this->lockFile88 = wfTempDir() . '/mw-' . __CLASS__ . '-UID-88';
87 $this->lockFile128 = wfTempDir() . '/mw-' . __CLASS__ . '-UID-128';
88 $this->lockFileUUID = wfTempDir() . '/mw-' . __CLASS__ . '-UUID-128';
89 }
90
95 protected static function singleton() {
96 if ( self::$instance === null ) {
97 self::$instance = new self();
98 }
99
100 return self::$instance;
101 }
102
118 public static function newTimestampedUID88( $base = 10 ) {
119 Assert::parameterType( 'integer', $base, '$base' );
120 Assert::parameter( $base <= 36, '$base', 'must be <= 36' );
121 Assert::parameter( $base >= 2, '$base', 'must be >= 2' );
122
123 $gen = self::singleton();
124 $info = $gen->getTimeAndDelay( 'lockFile88', 1, 1024, 1024 );
125 $info['offsetCounter'] = $info['offsetCounter'] % 1024;
126 return Wikimedia\base_convert( $gen->getTimestampedID88( $info ), 2, $base );
127 }
128
135 protected function getTimestampedID88( array $info ) {
136 if ( isset( $info['time'] ) ) {
137 $time = $info['time'];
138 $counter = $info['offsetCounter'];
139 } else {
140 list( $time, $counter ) = $info;
141 }
142 // Take the 46 LSBs of "milliseconds since epoch"
143 $id_bin = $this->millisecondsSinceEpochBinary( $time );
144 // Add a 10 bit counter resulting in 56 bits total
145 $id_bin .= str_pad( decbin( $counter ), 10, '0', STR_PAD_LEFT );
146 // Add the 32 bit node ID resulting in 88 bits total
147 $id_bin .= $this->nodeId32;
148 // Convert to a 1-27 digit integer string
149 if ( strlen( $id_bin ) !== 88 ) {
150 throw new RuntimeException( "Detected overflow for millisecond timestamp." );
151 }
152
153 return $id_bin;
154 }
155
170 public static function newTimestampedUID128( $base = 10 ) {
171 Assert::parameterType( 'integer', $base, '$base' );
172 Assert::parameter( $base <= 36, '$base', 'must be <= 36' );
173 Assert::parameter( $base >= 2, '$base', 'must be >= 2' );
174
175 $gen = self::singleton();
176 $info = $gen->getTimeAndDelay( 'lockFile128', 16384, 1048576, 1048576 );
177 $info['offsetCounter'] = $info['offsetCounter'] % 1048576;
178
179 return Wikimedia\base_convert( $gen->getTimestampedID128( $info ), 2, $base );
180 }
181
188 protected function getTimestampedID128( array $info ) {
189 if ( isset( $info['time'] ) ) {
190 $time = $info['time'];
191 $counter = $info['offsetCounter'];
192 $clkSeq = $info['clkSeq'];
193 } else {
194 list( $time, $counter, $clkSeq ) = $info;
195 }
196 // Take the 46 LSBs of "milliseconds since epoch"
197 $id_bin = $this->millisecondsSinceEpochBinary( $time );
198 // Add a 20 bit counter resulting in 66 bits total
199 $id_bin .= str_pad( decbin( $counter ), 20, '0', STR_PAD_LEFT );
200 // Add a 14 bit clock sequence number resulting in 80 bits total
201 $id_bin .= str_pad( decbin( $clkSeq ), 14, '0', STR_PAD_LEFT );
202 // Add the 48 bit node ID resulting in 128 bits total
203 $id_bin .= $this->nodeId48;
204 // Convert to a 1-39 digit integer string
205 if ( strlen( $id_bin ) !== 128 ) {
206 throw new RuntimeException( "Detected overflow for millisecond timestamp." );
207 }
208
209 return $id_bin;
210 }
211
219 public static function newUUIDv1() {
220 $gen = self::singleton();
221 // There can be up to 10000 intervals for the same millisecond timestamp.
222 // [0,4999] counter + [0,5000] offset is in [0,9999] for the offset counter.
223 // Add this onto the timestamp to allow making up to 5000 IDs per second.
224 return $gen->getUUIDv1( $gen->getTimeAndDelay( 'lockFileUUID', 16384, 5000, 5001 ) );
225 }
226
234 public static function newRawUUIDv1() {
235 return str_replace( '-', '', self::newUUIDv1() );
236 }
237
242 protected function getUUIDv1( array $info ) {
243 $clkSeq_bin = Wikimedia\base_convert( $info['clkSeq'], 10, 2, 14 );
244 $time_bin = $this->intervalsSinceGregorianBinary( $info['time'], $info['offsetCounter'] );
245 // Take the 32 bits of "time low"
246 $id_bin = substr( $time_bin, 28, 32 );
247 // Add 16 bits of "time mid" resulting in 48 bits total
248 $id_bin .= substr( $time_bin, 12, 16 );
249 // Add 4 bit version resulting in 52 bits total
250 $id_bin .= '0001';
251 // Add 12 bits of "time high" resulting in 64 bits total
252 $id_bin .= substr( $time_bin, 0, 12 );
253 // Add 2 bits of "variant" resulting in 66 bits total
254 $id_bin .= '10';
255 // Add 6 bits of "clock seq high" resulting in 72 bits total
256 $id_bin .= substr( $clkSeq_bin, 0, 6 );
257 // Add 8 bits of "clock seq low" resulting in 80 bits total
258 $id_bin .= substr( $clkSeq_bin, 6, 8 );
259 // Add the 48 bit node ID resulting in 128 bits total
260 $id_bin .= $this->nodeId48;
261 // Convert to a 32 char hex string with dashes
262 if ( strlen( $id_bin ) !== 128 ) {
263 throw new RuntimeException( "Detected overflow for millisecond timestamp." );
264 }
265 $hex = Wikimedia\base_convert( $id_bin, 2, 16, 32 );
266 return sprintf( '%s-%s-%s-%s-%s',
267 // "time_low" (32 bits)
268 substr( $hex, 0, 8 ),
269 // "time_mid" (16 bits)
270 substr( $hex, 8, 4 ),
271 // "time_hi_and_version" (16 bits)
272 substr( $hex, 12, 4 ),
273 // "clk_seq_hi_res" (8 bits) and "clk_seq_low" (8 bits)
274 substr( $hex, 16, 4 ),
275 // "node" (48 bits)
276 substr( $hex, 20, 12 )
277 );
278 }
279
287 public static function newUUIDv4( $flags = 0 ) {
288 $hex = ( $flags & self::QUICK_RAND )
289 ? wfRandomString( 31 )
291
292 return sprintf( '%s-%s-%s-%s-%s',
293 // "time_low" (32 bits)
294 substr( $hex, 0, 8 ),
295 // "time_mid" (16 bits)
296 substr( $hex, 8, 4 ),
297 // "time_hi_and_version" (16 bits)
298 '4' . substr( $hex, 12, 3 ),
299 // "clk_seq_hi_res" (8 bits, variant is binary 10x) and "clk_seq_low" (8 bits)
300 dechex( 0x8 | ( hexdec( $hex[15] ) & 0x3 ) ) . $hex[16] . substr( $hex, 17, 2 ),
301 // "node" (48 bits)
302 substr( $hex, 19, 12 )
303 );
304 }
305
313 public static function newRawUUIDv4( $flags = 0 ) {
314 return str_replace( '-', '', self::newUUIDv4( $flags ) );
315 }
316
329 public static function newSequentialPerNodeID( $bucket, $bits = 48, $flags = 0 ) {
330 return current( self::newSequentialPerNodeIDs( $bucket, $bits, 1, $flags ) );
331 }
332
344 public static function newSequentialPerNodeIDs( $bucket, $bits, $count, $flags = 0 ) {
345 $gen = self::singleton();
346 return $gen->getSequentialPerNodeIDs( $bucket, $bits, $count, $flags );
347 }
348
360 protected function getSequentialPerNodeIDs( $bucket, $bits, $count, $flags ) {
361 if ( $count <= 0 ) {
362 return []; // nothing to do
363 }
364 if ( $bits < 16 || $bits > 48 ) {
365 throw new RuntimeException( "Requested bit size ($bits) is out of range." );
366 }
367
368 $counter = null; // post-increment persistent counter value
369
370 // Use APC/etc if requested, available, and not in CLI mode;
371 // Counter values would not survive across script instances in CLI mode.
372 $cache = null;
373 if ( ( $flags & self::QUICK_VOLATILE ) && !wfIsCLI() ) {
374 $cache = MediaWikiServices::getInstance()->getLocalServerObjectCache();
375 }
376 if ( $cache ) {
377 $counter = $cache->incrWithInit( $bucket, $cache::TTL_INDEFINITE, $count, $count );
378 if ( $counter === false ) {
379 throw new RuntimeException( 'Unable to set value to ' . get_class( $cache ) );
380 }
381 }
382
383 // Note: use of fmod() avoids "division by zero" on 32 bit machines
384 if ( $counter === null ) {
385 $path = wfTempDir() . '/mw-' . __CLASS__ . '-' . rawurlencode( $bucket ) . '-48';
386 // Get the UID lock file handle
387 if ( isset( $this->fileHandles[$path] ) ) {
388 $handle = $this->fileHandles[$path];
389 } else {
390 $handle = fopen( $path, 'cb+' );
391 $this->fileHandles[$path] = $handle ?: null; // cache
392 }
393 // Acquire the UID lock file
394 if ( $handle === false ) {
395 throw new RuntimeException( "Could not open '{$path}'." );
396 }
397 if ( !flock( $handle, LOCK_EX ) ) {
398 fclose( $handle );
399 throw new RuntimeException( "Could not acquire '{$path}'." );
400 }
401 // Fetch the counter value and increment it...
402 rewind( $handle );
403 $counter = floor( trim( fgets( $handle ) ) ) + $count; // fetch as float
404 // Write back the new counter value
405 ftruncate( $handle, 0 );
406 rewind( $handle );
407 fwrite( $handle, fmod( $counter, 2 ** 48 ) ); // warp-around as needed
408 fflush( $handle );
409 // Release the UID lock file
410 flock( $handle, LOCK_UN );
411 }
412
413 $ids = [];
414 $divisor = 2 ** $bits;
415 $currentId = floor( $counter - $count ); // pre-increment counter value
416 for ( $i = 0; $i < $count; ++$i ) {
417 $ids[] = fmod( ++$currentId, $divisor );
418 }
419
420 return $ids;
421 }
422
440 protected function getTimeAndDelay( $lockFile, $clockSeqSize, $counterSize, $offsetSize ) {
441 // Get the UID lock file handle
442 if ( isset( $this->fileHandles[$lockFile] ) ) {
443 $handle = $this->fileHandles[$lockFile];
444 } else {
445 $handle = fopen( $this->$lockFile, 'cb+' );
446 $this->fileHandles[$lockFile] = $handle ?: null; // cache
447 }
448 // Acquire the UID lock file
449 if ( $handle === false ) {
450 throw new RuntimeException( "Could not open '{$this->$lockFile}'." );
451 }
452 if ( !flock( $handle, LOCK_EX ) ) {
453 fclose( $handle );
454 throw new RuntimeException( "Could not acquire '{$this->$lockFile}'." );
455 }
456
457 // The formatters that use this method expect a timestamp with millisecond
458 // precision and a counter upto a certain size. When more IDs than the counter
459 // size are generated during the same timestamp, an exception is thrown as we
460 // cannot increment further, because the formatted ID would not have enough
461 // bits to fit the counter.
462 //
463 // To orchestrate this between independant PHP processes on the same hosts,
464 // we must have a common sense of time so that we only have to maintain
465 // a single counter in a single lock file.
466 //
467 // Given that:
468 // * The system clock can be observed via time(), without milliseconds.
469 // * Some other clock can be observed via microtime(), which also offers
470 // millisecond precision.
471 // * microtime() drifts in-process further and further away from the system
472 // clock the longer the longer the process runs for.
473 // For example, on 2018-10-03 an HHVM 3.18 JobQueue process at WMF,
474 // that ran for 9 min 55 sec, drifted 7 seconds.
475 // The drift is immediate for processes running while the system clock changes.
476 // time() does not have this problem. See https://bugs.php.net/bug.php?id=42659.
477 //
478 // We have two choices:
479 //
480 // 1. Use microtime() with the following caveats:
481 // - The last stored time may be in the future, or our current time
482 // may be in the past, in which case we'll frequently enter the slow
483 // timeWaitUntil() method to try and "sync" the current process with
484 // the previous process. We mustn't block for long though, max 10ms?
485 // - For any drift above 10ms, we pretend that the clock went backwards,
486 // and treat it the same way as after an NTP sync, by incrementing clock
487 // sequence instead. Given this rolls over automatically and silently
488 // and is meant to be rare, this is essentially sacrifices a reasonable
489 // guarantee of uniqueness.
490 // - For long running processes (e.g. longer than a few seconds) the drift
491 // can easily be more than 2 seconds. Because we only have a single lock
492 // file and don't want to keep too many counters and deal with clearing
493 // those, we fatal the user and refuse to make an ID. (T94522)
494 // 2. Use time() and expand the counter by 1000x and use the first digits
495 // as if they are the millisecond fraction of the timestamp.
496 // Known caveats or perf impact: None.
497 //
498 // We choose the latter.
499 $msecCounterSize = $counterSize * 1000;
500
501 rewind( $handle );
502 // Format of lock file contents:
503 // "<clk seq> <sec> <msec counter> <rand offset>"
504 $data = explode( ' ', fgets( $handle ) );
505
506 if ( count( $data ) === 4 ) {
507 // The UID lock file was already initialized
508 $clkSeq = (int)$data[0] % $clockSeqSize;
509 $prevSec = (int)$data[1];
510 // Counter for UIDs with the same timestamp,
511 $msecCounter = 0;
512 $randOffset = (int)$data[3] % $counterSize;
513
514 // If the system clock moved backwards by an NTP sync,
515 // or if the last writer process had its clock drift ahead,
516 // Try to catch up if the gap is small, so that we can keep a single
517 // monotonic logic file.
518 $sec = $this->timeWaitUntil( $prevSec );
519 if ( $sec === false ) {
520 // Gap is too big. Looks like the clock got moved back significantly.
521 // Start a new clock sequence, and re-randomize the extra offset,
522 // which is useful for UIDs that do not include the clock sequence number.
523 $clkSeq = ( $clkSeq + 1 ) % $clockSeqSize;
524 $sec = time();
525 $randOffset = mt_rand( 0, $offsetSize - 1 );
526 trigger_error( "Clock was set back; sequence number incremented." );
527 } elseif ( $sec === $prevSec ) {
528 // Sanity check, only keep remainder if a previous writer wrote
529 // something here that we don't accept.
530 $msecCounter = (int)$data[2] % $msecCounterSize;
531 // Bump the counter if the time has not changed yet
532 if ( ++$msecCounter >= $msecCounterSize ) {
533 // More IDs generated with the same time than counterSize can accomodate
534 flock( $handle, LOCK_UN );
535 throw new RuntimeException( "Counter overflow for timestamp value." );
536 }
537 }
538 } else {
539 // Initialize UID lock file information
540 $clkSeq = mt_rand( 0, $clockSeqSize - 1 );
541 $sec = time();
542 $msecCounter = 0;
543 $randOffset = mt_rand( 0, $offsetSize - 1 );
544 }
545
546 // Update and release the UID lock file
547 ftruncate( $handle, 0 );
548 rewind( $handle );
549 fwrite( $handle, "{$clkSeq} {$sec} {$msecCounter} {$randOffset}" );
550 fflush( $handle );
551 flock( $handle, LOCK_UN );
552
553 // Split msecCounter back into msec and counter
554 $msec = (int)( $msecCounter / 1000 );
555 $counter = $msecCounter % 1000;
556
557 return [
558 'time' => [ $sec, $msec ],
559 'counter' => $counter,
560 'clkSeq' => $clkSeq,
561 'offset' => $randOffset,
562 'offsetCounter' => $counter + $randOffset,
563 ];
564 }
565
573 protected function timeWaitUntil( $time ) {
574 $start = microtime( true );
575 do {
576 $ct = time();
577 // https://www.php.net/manual/en/language.operators.comparison.php
578 if ( $ct >= $time ) {
579 // current time is higher than or equal to than $time
580 return $ct;
581 }
582 } while ( ( microtime( true ) - $start ) <= 0.010 ); // upto 10ms
583
584 return false;
585 }
586
592 protected function millisecondsSinceEpochBinary( array $time ) {
593 list( $sec, $msec ) = $time;
594 $ts = 1000 * $sec + $msec;
595 if ( $ts > 2 ** 52 ) {
596 throw new RuntimeException( __METHOD__ .
597 ': sorry, this function doesn\'t work after the year 144680' );
598 }
599
600 return substr( Wikimedia\base_convert( $ts, 10, 2, 46 ), -46 );
601 }
602
609 protected function intervalsSinceGregorianBinary( array $time, $delta = 0 ) {
610 list( $sec, $msec ) = $time;
611 $offset = '122192928000000000';
612 if ( PHP_INT_SIZE >= 8 ) { // 64 bit integers
613 $ts = ( 1000 * $sec + $msec ) * 10000 + (int)$offset + $delta;
614 $id_bin = str_pad( decbin( $ts % ( 2 ** 60 ) ), 60, '0', STR_PAD_LEFT );
615 } elseif ( extension_loaded( 'gmp' ) ) {
616 $ts = gmp_add( gmp_mul( (string)$sec, '1000' ), (string)$msec ); // ms
617 $ts = gmp_add( gmp_mul( $ts, '10000' ), $offset ); // 100ns intervals
618 $ts = gmp_add( $ts, (string)$delta );
619 $ts = gmp_mod( $ts, gmp_pow( '2', '60' ) ); // wrap around
620 $id_bin = str_pad( gmp_strval( $ts, 2 ), 60, '0', STR_PAD_LEFT );
621 } elseif ( extension_loaded( 'bcmath' ) ) {
622 $ts = bcadd( bcmul( $sec, 1000 ), $msec ); // ms
623 $ts = bcadd( bcmul( $ts, 10000 ), $offset ); // 100ns intervals
624 $ts = bcadd( $ts, $delta );
625 $ts = bcmod( $ts, bcpow( 2, 60 ) ); // wrap around
626 $id_bin = Wikimedia\base_convert( $ts, 10, 2, 60 );
627 } else {
628 throw new RuntimeException( 'bcmath or gmp extension required for 32 bit machines.' );
629 }
630 return $id_bin;
631 }
632
645 private function deleteCacheFiles() {
646 // T46850
647 foreach ( $this->fileHandles as $path => $handle ) {
648 if ( $handle !== null ) {
649 fclose( $handle );
650 }
651 if ( is_file( $path ) ) {
652 unlink( $path );
653 }
654 unset( $this->fileHandles[$path] );
655 }
656 if ( is_file( $this->nodeIdFile ) ) {
657 unlink( $this->nodeIdFile );
658 }
659 }
660
674 public static function unitTestTearDown() {
675 // T46850
676 $gen = self::singleton();
677 $gen->deleteCacheFiles();
678 }
679
680 function __destruct() {
681 array_map( 'fclose', array_filter( $this->fileHandles ) );
682 }
683}
wfTempDir()
Tries to get the system directory for temporary files.
wfRandomString( $length=32)
Get a random string containing a number of pseudo-random hex characters.
wfShellExec( $cmd, &$retval=null, $environ=[], $limits=[], $options=[])
Execute a shell command, with time and memory limits mirrored from the PHP configuration if supported...
wfIsWindows()
Check if the operating system is Windows.
wfIsCLI()
Check if we are running from the commandline.
$line
Definition cdb.php:59
static generateHex( $chars)
Generate a run of cryptographically random data and return it in hexadecimal string format.
MediaWikiServices is the service locator for the application scope of MediaWiki.
Class for getting statistically unique IDs.
getUUIDv1(array $info)
static newRawUUIDv1()
Return an RFC4122 compliant v1 UUID.
string $nodeIdFile
Local file path.
deleteCacheFiles()
Delete all cache files that have been created.
static newSequentialPerNodeID( $bucket, $bits=48, $flags=0)
Return an ID that is sequential only for this node and bucket.
array $fileHandles
Cached file handles.
millisecondsSinceEpochBinary(array $time)
getTimestampedID88(array $info)
string $lockFile128
Local file path.
const QUICK_VOLATILE
static newSequentialPerNodeIDs( $bucket, $bits, $count, $flags=0)
Return IDs that are sequential only for this node and bucket.
string $nodeId32
Node ID in binary (32 bits)
getTimeAndDelay( $lockFile, $clockSeqSize, $counterSize, $offsetSize)
Get a (time,counter,clock sequence) where (time,counter) is higher than any previous (time,...
string $nodeId48
Node ID in binary (48 bits)
string $lockFile88
Local file path.
static UIDGenerator $instance
static newRawUUIDv4( $flags=0)
Return an RFC4122 compliant v4 UUID.
getSequentialPerNodeIDs( $bucket, $bits, $count, $flags)
Return IDs that are sequential only for this node and bucket.
timeWaitUntil( $time)
Wait till the current timestamp reaches $time and return the current timestamp.
getTimestampedID128(array $info)
intervalsSinceGregorianBinary(array $time, $delta=0)
static newUUIDv4( $flags=0)
Return an RFC4122 compliant v4 UUID.
string $lockFileUUID
Local file path.
static singleton()
static newUUIDv1()
Return an RFC4122 compliant v1 UUID.
static newTimestampedUID128( $base=10)
Get a statistically unique 128-bit unsigned integer ID string.
static unitTestTearDown()
Cleanup resources when tearing down after a unit test.
static newTimestampedUID88( $base=10)
Get a statistically unique 88-bit unsigned integer ID string.
$cache
Definition mcc.php:33
This program is free software; you can redistribute it and/or modify it under the terms of the GNU Ge...