MediaWiki REL1_30
UIDGenerator.php
Go to the documentation of this file.
1<?php
22use Wikimedia\Assert\Assert;
24
32 protected static $instance = null;
33
34 protected $nodeIdFile; // string; local file path
35 protected $nodeId32; // string; node ID in binary (32 bits)
36 protected $nodeId48; // string; node ID in binary (48 bits)
37
38 protected $lockFile88; // string; local file path
39 protected $lockFile128; // string; local file path
40 protected $lockFileUUID; // string; local file path
41
43 protected $fileHandles = []; // cache file handles
44
45 const QUICK_RAND = 1; // get randomness from fast and insecure sources
46 const QUICK_VOLATILE = 2; // use an APC like in-memory counter if available
47
48 protected function __construct() {
49 $this->nodeIdFile = wfTempDir() . '/mw-' . __CLASS__ . '-UID-nodeid';
50 $nodeId = '';
51 if ( is_file( $this->nodeIdFile ) ) {
52 $nodeId = file_get_contents( $this->nodeIdFile );
53 }
54 // Try to get some ID that uniquely identifies this machine (RFC 4122)...
55 if ( !preg_match( '/^[0-9a-f]{12}$/i', $nodeId ) ) {
56 MediaWiki\suppressWarnings();
57 if ( wfIsWindows() ) {
58 // https://technet.microsoft.com/en-us/library/bb490913.aspx
59 $csv = trim( wfShellExec( 'getmac /NH /FO CSV' ) );
60 $line = substr( $csv, 0, strcspn( $csv, "\n" ) );
61 $info = str_getcsv( $line );
62 $nodeId = isset( $info[0] ) ? str_replace( '-', '', $info[0] ) : '';
63 } elseif ( is_executable( '/sbin/ifconfig' ) ) { // Linux/BSD/Solaris/OS X
64 // See https://linux.die.net/man/8/ifconfig
65 $m = [];
66 preg_match( '/\s([0-9a-f]{2}(:[0-9a-f]{2}){5})\s/',
67 wfShellExec( '/sbin/ifconfig -a' ), $m );
68 $nodeId = isset( $m[1] ) ? str_replace( ':', '', $m[1] ) : '';
69 }
70 MediaWiki\restoreWarnings();
71 if ( !preg_match( '/^[0-9a-f]{12}$/i', $nodeId ) ) {
72 $nodeId = MWCryptRand::generateHex( 12, true );
73 $nodeId[1] = dechex( hexdec( $nodeId[1] ) | 0x1 ); // set multicast bit
74 }
75 file_put_contents( $this->nodeIdFile, $nodeId ); // cache
76 }
77 $this->nodeId32 = Wikimedia\base_convert( substr( sha1( $nodeId ), 0, 8 ), 16, 2, 32 );
78 $this->nodeId48 = Wikimedia\base_convert( $nodeId, 16, 2, 48 );
79 // If different processes run as different users, they may have different temp dirs.
80 // This is dealt with by initializing the clock sequence number and counters randomly.
81 $this->lockFile88 = wfTempDir() . '/mw-' . __CLASS__ . '-UID-88';
82 $this->lockFile128 = wfTempDir() . '/mw-' . __CLASS__ . '-UID-128';
83 $this->lockFileUUID = wfTempDir() . '/mw-' . __CLASS__ . '-UUID-128';
84 }
85
90 protected static function singleton() {
91 if ( self::$instance === null ) {
92 self::$instance = new self();
93 }
94
95 return self::$instance;
96 }
97
113 public static function newTimestampedUID88( $base = 10 ) {
114 Assert::parameterType( 'integer', $base, '$base' );
115 Assert::parameter( $base <= 36, '$base', 'must be <= 36' );
116 Assert::parameter( $base >= 2, '$base', 'must be >= 2' );
117
118 $gen = self::singleton();
119 $info = $gen->getTimeAndDelay( 'lockFile88', 1, 1024, 1024 );
120 $info['offsetCounter'] = $info['offsetCounter'] % 1024;
121 return Wikimedia\base_convert( $gen->getTimestampedID88( $info ), 2, $base );
122 }
123
130 protected function getTimestampedID88( array $info ) {
131 if ( isset( $info['time'] ) ) {
132 $time = $info['time'];
133 $counter = $info['offsetCounter'];
134 } else {
135 $time = $info[0];
136 $counter = $info[1];
137 }
138 // Take the 46 LSBs of "milliseconds since epoch"
139 $id_bin = $this->millisecondsSinceEpochBinary( $time );
140 // Add a 10 bit counter resulting in 56 bits total
141 $id_bin .= str_pad( decbin( $counter ), 10, '0', STR_PAD_LEFT );
142 // Add the 32 bit node ID resulting in 88 bits total
143 $id_bin .= $this->nodeId32;
144 // Convert to a 1-27 digit integer string
145 if ( strlen( $id_bin ) !== 88 ) {
146 throw new RuntimeException( "Detected overflow for millisecond timestamp." );
147 }
148
149 return $id_bin;
150 }
151
166 public static function newTimestampedUID128( $base = 10 ) {
167 Assert::parameterType( 'integer', $base, '$base' );
168 Assert::parameter( $base <= 36, '$base', 'must be <= 36' );
169 Assert::parameter( $base >= 2, '$base', 'must be >= 2' );
170
171 $gen = self::singleton();
172 $info = $gen->getTimeAndDelay( 'lockFile128', 16384, 1048576, 1048576 );
173 $info['offsetCounter'] = $info['offsetCounter'] % 1048576;
174
175 return Wikimedia\base_convert( $gen->getTimestampedID128( $info ), 2, $base );
176 }
177
184 protected function getTimestampedID128( array $info ) {
185 if ( isset( $info['time'] ) ) {
186 $time = $info['time'];
187 $counter = $info['offsetCounter'];
188 $clkSeq = $info['clkSeq'];
189 } else {
190 $time = $info[0];
191 $counter = $info[1];
192 $clkSeq = $info[2];
193 }
194 // Take the 46 LSBs of "milliseconds since epoch"
195 $id_bin = $this->millisecondsSinceEpochBinary( $time );
196 // Add a 20 bit counter resulting in 66 bits total
197 $id_bin .= str_pad( decbin( $counter ), 20, '0', STR_PAD_LEFT );
198 // Add a 14 bit clock sequence number resulting in 80 bits total
199 $id_bin .= str_pad( decbin( $clkSeq ), 14, '0', STR_PAD_LEFT );
200 // Add the 48 bit node ID resulting in 128 bits total
201 $id_bin .= $this->nodeId48;
202 // Convert to a 1-39 digit integer string
203 if ( strlen( $id_bin ) !== 128 ) {
204 throw new RuntimeException( "Detected overflow for millisecond timestamp." );
205 }
206
207 return $id_bin;
208 }
209
217 public static function newUUIDv1() {
218 $gen = self::singleton();
219 // There can be up to 10000 intervals for the same millisecond timestamp.
220 // [0,4999] counter + [0,5000] offset is in [0,9999] for the offset counter.
221 // Add this onto the timestamp to allow making up to 5000 IDs per second.
222 return $gen->getUUIDv1( $gen->getTimeAndDelay( 'lockFileUUID', 16384, 5000, 5001 ) );
223 }
224
232 public static function newRawUUIDv1() {
233 return str_replace( '-', '', self::newUUIDv1() );
234 }
235
240 protected function getUUIDv1( array $info ) {
241 $clkSeq_bin = Wikimedia\base_convert( $info['clkSeq'], 10, 2, 14 );
242 $time_bin = $this->intervalsSinceGregorianBinary( $info['time'], $info['offsetCounter'] );
243 // Take the 32 bits of "time low"
244 $id_bin = substr( $time_bin, 28, 32 );
245 // Add 16 bits of "time mid" resulting in 48 bits total
246 $id_bin .= substr( $time_bin, 12, 16 );
247 // Add 4 bit version resulting in 52 bits total
248 $id_bin .= '0001';
249 // Add 12 bits of "time high" resulting in 64 bits total
250 $id_bin .= substr( $time_bin, 0, 12 );
251 // Add 2 bits of "variant" resulting in 66 bits total
252 $id_bin .= '10';
253 // Add 6 bits of "clock seq high" resulting in 72 bits total
254 $id_bin .= substr( $clkSeq_bin, 0, 6 );
255 // Add 8 bits of "clock seq low" resulting in 80 bits total
256 $id_bin .= substr( $clkSeq_bin, 6, 8 );
257 // Add the 48 bit node ID resulting in 128 bits total
258 $id_bin .= $this->nodeId48;
259 // Convert to a 32 char hex string with dashes
260 if ( strlen( $id_bin ) !== 128 ) {
261 throw new RuntimeException( "Detected overflow for millisecond timestamp." );
262 }
263 $hex = Wikimedia\base_convert( $id_bin, 2, 16, 32 );
264 return sprintf( '%s-%s-%s-%s-%s',
265 // "time_low" (32 bits)
266 substr( $hex, 0, 8 ),
267 // "time_mid" (16 bits)
268 substr( $hex, 8, 4 ),
269 // "time_hi_and_version" (16 bits)
270 substr( $hex, 12, 4 ),
271 // "clk_seq_hi_res" (8 bits) and "clk_seq_low" (8 bits)
272 substr( $hex, 16, 4 ),
273 // "node" (48 bits)
274 substr( $hex, 20, 12 )
275 );
276 }
277
285 public static function newUUIDv4( $flags = 0 ) {
286 $hex = ( $flags & self::QUICK_RAND )
287 ? wfRandomString( 31 )
289
290 return sprintf( '%s-%s-%s-%s-%s',
291 // "time_low" (32 bits)
292 substr( $hex, 0, 8 ),
293 // "time_mid" (16 bits)
294 substr( $hex, 8, 4 ),
295 // "time_hi_and_version" (16 bits)
296 '4' . substr( $hex, 12, 3 ),
297 // "clk_seq_hi_res" (8 bits, variant is binary 10x) and "clk_seq_low" (8 bits)
298 dechex( 0x8 | ( hexdec( $hex[15] ) & 0x3 ) ) . $hex[16] . substr( $hex, 17, 2 ),
299 // "node" (48 bits)
300 substr( $hex, 19, 12 )
301 );
302 }
303
311 public static function newRawUUIDv4( $flags = 0 ) {
312 return str_replace( '-', '', self::newUUIDv4( $flags ) );
313 }
314
327 public static function newSequentialPerNodeID( $bucket, $bits = 48, $flags = 0 ) {
328 return current( self::newSequentialPerNodeIDs( $bucket, $bits, 1, $flags ) );
329 }
330
342 public static function newSequentialPerNodeIDs( $bucket, $bits, $count, $flags = 0 ) {
343 $gen = self::singleton();
344 return $gen->getSequentialPerNodeIDs( $bucket, $bits, $count, $flags );
345 }
346
358 protected function getSequentialPerNodeIDs( $bucket, $bits, $count, $flags ) {
359 if ( $count <= 0 ) {
360 return []; // nothing to do
361 } elseif ( $bits < 16 || $bits > 48 ) {
362 throw new RuntimeException( "Requested bit size ($bits) is out of range." );
363 }
364
365 $counter = null; // post-increment persistent counter value
366
367 // Use APC/eAccelerator/xcache if requested, available, and not in CLI mode;
368 // Counter values would not survive accross script instances in CLI mode.
369 $cache = null;
370 if ( ( $flags & self::QUICK_VOLATILE ) && PHP_SAPI !== 'cli' ) {
371 $cache = MediaWikiServices::getInstance()->getLocalServerObjectCache();
372 }
373 if ( $cache ) {
374 $counter = $cache->incrWithInit( $bucket, $cache::TTL_INDEFINITE, $count, $count );
375 if ( $counter === false ) {
376 throw new RuntimeException( 'Unable to set value to ' . get_class( $cache ) );
377 }
378 }
379
380 // Note: use of fmod() avoids "division by zero" on 32 bit machines
381 if ( $counter === null ) {
382 $path = wfTempDir() . '/mw-' . __CLASS__ . '-' . rawurlencode( $bucket ) . '-48';
383 // Get the UID lock file handle
384 if ( isset( $this->fileHandles[$path] ) ) {
385 $handle = $this->fileHandles[$path];
386 } else {
387 $handle = fopen( $path, 'cb+' );
388 $this->fileHandles[$path] = $handle ?: null; // cache
389 }
390 // Acquire the UID lock file
391 if ( $handle === false ) {
392 throw new RuntimeException( "Could not open '{$path}'." );
393 } elseif ( !flock( $handle, LOCK_EX ) ) {
394 fclose( $handle );
395 throw new RuntimeException( "Could not acquire '{$path}'." );
396 }
397 // Fetch the counter value and increment it...
398 rewind( $handle );
399 $counter = floor( trim( fgets( $handle ) ) ) + $count; // fetch as float
400 // Write back the new counter value
401 ftruncate( $handle, 0 );
402 rewind( $handle );
403 fwrite( $handle, fmod( $counter, pow( 2, 48 ) ) ); // warp-around as needed
404 fflush( $handle );
405 // Release the UID lock file
406 flock( $handle, LOCK_UN );
407 }
408
409 $ids = [];
410 $divisor = pow( 2, $bits );
411 $currentId = floor( $counter - $count ); // pre-increment counter value
412 for ( $i = 0; $i < $count; ++$i ) {
413 $ids[] = fmod( ++$currentId, $divisor );
414 }
415
416 return $ids;
417 }
418
431 protected function getTimeAndDelay( $lockFile, $clockSeqSize, $counterSize, $offsetSize ) {
432 // Get the UID lock file handle
433 if ( isset( $this->fileHandles[$lockFile] ) ) {
434 $handle = $this->fileHandles[$lockFile];
435 } else {
436 $handle = fopen( $this->$lockFile, 'cb+' );
437 $this->fileHandles[$lockFile] = $handle ?: null; // cache
438 }
439 // Acquire the UID lock file
440 if ( $handle === false ) {
441 throw new RuntimeException( "Could not open '{$this->$lockFile}'." );
442 } elseif ( !flock( $handle, LOCK_EX ) ) {
443 fclose( $handle );
444 throw new RuntimeException( "Could not acquire '{$this->$lockFile}'." );
445 }
446 // Get the current timestamp, clock sequence number, last time, and counter
447 rewind( $handle );
448 $data = explode( ' ', fgets( $handle ) ); // "<clk seq> <sec> <msec> <counter> <offset>"
449 $clockChanged = false; // clock set back significantly?
450 if ( count( $data ) == 5 ) { // last UID info already initialized
451 $clkSeq = (int)$data[0] % $clockSeqSize;
452 $prevTime = [ (int)$data[1], (int)$data[2] ];
453 $offset = (int)$data[4] % $counterSize; // random counter offset
454 $counter = 0; // counter for UIDs with the same timestamp
455 // Delay until the clock reaches the time of the last ID.
456 // This detects any microtime() drift among processes.
457 $time = $this->timeWaitUntil( $prevTime );
458 if ( !$time ) { // too long to delay?
459 $clockChanged = true; // bump clock sequence number
461 } elseif ( $time == $prevTime ) {
462 // Bump the counter if there are timestamp collisions
463 $counter = (int)$data[3] % $counterSize;
464 if ( ++$counter >= $counterSize ) { // sanity (starts at 0)
465 flock( $handle, LOCK_UN ); // abort
466 throw new RuntimeException( "Counter overflow for timestamp value." );
467 }
468 }
469 } else { // last UID info not initialized
470 $clkSeq = mt_rand( 0, $clockSeqSize - 1 );
471 $counter = 0;
472 $offset = mt_rand( 0, $offsetSize - 1 );
474 }
475 // microtime() and gettimeofday() can drift from time() at least on Windows.
476 // The drift is immediate for processes running while the system clock changes.
477 // time() does not have this problem. See https://bugs.php.net/bug.php?id=42659.
478 if ( abs( time() - $time[0] ) >= 2 ) {
479 // We don't want processes using too high or low timestamps to avoid duplicate
480 // UIDs and clock sequence number churn. This process should just be restarted.
481 flock( $handle, LOCK_UN ); // abort
482 throw new RuntimeException( "Process clock is outdated or drifted." );
483 }
484 // If microtime() is synced and a clock change was detected, then the clock went back
485 if ( $clockChanged ) {
486 // Bump the clock sequence number and also randomize the counter offset,
487 // which is useful for UIDs that do not include the clock sequence number.
488 $clkSeq = ( $clkSeq + 1 ) % $clockSeqSize;
489 $offset = mt_rand( 0, $offsetSize - 1 );
490 trigger_error( "Clock was set back; sequence number incremented." );
491 }
492 // Update the (clock sequence number, timestamp, counter)
493 ftruncate( $handle, 0 );
494 rewind( $handle );
495 fwrite( $handle, "{$clkSeq} {$time[0]} {$time[1]} {$counter} {$offset}" );
496 fflush( $handle );
497 // Release the UID lock file
498 flock( $handle, LOCK_UN );
499
500 return [
501 'time' => $time,
502 'counter' => $counter,
503 'clkSeq' => $clkSeq,
504 'offset' => $offset,
505 'offsetCounter' => $counter + $offset
506 ];
507 }
508
516 protected function timeWaitUntil( array $time ) {
517 do {
518 $ct = self::millitime();
519 if ( $ct >= $time ) { // https://secure.php.net/manual/en/language.operators.comparison.php
520 return $ct; // current timestamp is higher than $time
521 }
522 } while ( ( ( $time[0] - $ct[0] ) * 1000 + ( $time[1] - $ct[1] ) ) <= 10 );
523
524 return false;
525 }
526
532 protected function millisecondsSinceEpochBinary( array $time ) {
533 list( $sec, $msec ) = $time;
534 $ts = 1000 * $sec + $msec;
535 if ( $ts > pow( 2, 52 ) ) {
536 throw new RuntimeException( __METHOD__ .
537 ': sorry, this function doesn\'t work after the year 144680' );
538 }
539
540 return substr( Wikimedia\base_convert( $ts, 10, 2, 46 ), -46 );
541 }
542
549 protected function intervalsSinceGregorianBinary( array $time, $delta = 0 ) {
550 list( $sec, $msec ) = $time;
551 $offset = '122192928000000000';
552 if ( PHP_INT_SIZE >= 8 ) { // 64 bit integers
553 $ts = ( 1000 * $sec + $msec ) * 10000 + (int)$offset + $delta;
554 $id_bin = str_pad( decbin( $ts % pow( 2, 60 ) ), 60, '0', STR_PAD_LEFT );
555 } elseif ( extension_loaded( 'gmp' ) ) {
556 $ts = gmp_add( gmp_mul( (string)$sec, '1000' ), (string)$msec ); // ms
557 $ts = gmp_add( gmp_mul( $ts, '10000' ), $offset ); // 100ns intervals
558 $ts = gmp_add( $ts, (string)$delta );
559 $ts = gmp_mod( $ts, gmp_pow( '2', '60' ) ); // wrap around
560 $id_bin = str_pad( gmp_strval( $ts, 2 ), 60, '0', STR_PAD_LEFT );
561 } elseif ( extension_loaded( 'bcmath' ) ) {
562 $ts = bcadd( bcmul( $sec, 1000 ), $msec ); // ms
563 $ts = bcadd( bcmul( $ts, 10000 ), $offset ); // 100ns intervals
564 $ts = bcadd( $ts, $delta );
565 $ts = bcmod( $ts, bcpow( 2, 60 ) ); // wrap around
566 $id_bin = Wikimedia\base_convert( $ts, 10, 2, 60 );
567 } else {
568 throw new RuntimeException( 'bcmath or gmp extension required for 32 bit machines.' );
569 }
570 return $id_bin;
571 }
572
576 protected static function millitime() {
577 list( $msec, $sec ) = explode( ' ', microtime() );
578
579 return [ (int)$sec, (int)( $msec * 1000 ) ];
580 }
581
593 protected function deleteCacheFiles() {
594 // Bug: 44850
595 foreach ( $this->fileHandles as $path => $handle ) {
596 if ( $handle !== null ) {
597 fclose( $handle );
598 }
599 if ( is_file( $path ) ) {
600 unlink( $path );
601 }
602 unset( $this->fileHandles[$path] );
603 }
604 if ( is_file( $this->nodeIdFile ) ) {
605 unlink( $this->nodeIdFile );
606 }
607 }
608
620 public static function unitTestTearDown() {
621 // Bug: 44850
622 $gen = self::singleton();
623 $gen->deleteCacheFiles();
624 }
625
626 function __destruct() {
627 array_map( 'fclose', array_filter( $this->fileHandles ) );
628 }
629}
wfTempDir()
Tries to get the system directory for temporary files.
wfRandomString( $length=32)
Get a random string containing a number of pseudo-random hex characters.
wfShellExec( $cmd, &$retval=null, $environ=[], $limits=[], $options=[])
Execute a shell command, with time and memory limits mirrored from the PHP configuration if supported...
wfIsWindows()
Check if the operating system is Windows.
$line
Definition cdb.php:58
static generateHex( $chars, $forceStrong=false)
Generate a run of (ideally) cryptographically random data and return it in hexadecimal string format.
MediaWikiServices is the service locator for the application scope of MediaWiki.
Class for getting statistically unique IDs.
getUUIDv1(array $info)
static newRawUUIDv1()
Return an RFC4122 compliant v1 UUID.
deleteCacheFiles()
Delete all cache files that have been created.
static newSequentialPerNodeID( $bucket, $bits=48, $flags=0)
Return an ID that is sequential only for this node and bucket.
millisecondsSinceEpochBinary(array $time)
getTimestampedID88(array $info)
const QUICK_VOLATILE
static newSequentialPerNodeIDs( $bucket, $bits, $count, $flags=0)
Return IDs that are sequential only for this node and bucket.
getTimeAndDelay( $lockFile, $clockSeqSize, $counterSize, $offsetSize)
Get a (time,counter,clock sequence) where (time,counter) is higher than any previous (time,...
static UIDGenerator $instance
static newRawUUIDv4( $flags=0)
Return an RFC4122 compliant v4 UUID.
timeWaitUntil(array $time)
Wait till the current timestamp reaches $time and return the current timestamp.
static millitime()
getSequentialPerNodeIDs( $bucket, $bits, $count, $flags)
Return IDs that are sequential only for this node and bucket.
getTimestampedID128(array $info)
intervalsSinceGregorianBinary(array $time, $delta=0)
static newUUIDv4( $flags=0)
Return an RFC4122 compliant v4 UUID.
static singleton()
static newUUIDv1()
Return an RFC4122 compliant v1 UUID.
static newTimestampedUID128( $base=10)
Get a statistically unique 128-bit unsigned integer ID string.
static unitTestTearDown()
Cleanup resources when tearing down after a unit test.
static newTimestampedUID88( $base=10)
Get a statistically unique 88-bit unsigned integer ID string.
deferred txt A few of the database updates required by various functions here can be deferred until after the result page is displayed to the user For updating the view updating the linked to tables after a etc PHP does not yet have any way to tell the server to actually return and disconnect while still running these but it might have such a feature in the future We handle these by creating a deferred update object and putting those objects on a global list
Definition deferred.txt:11
see documentation in includes Linker php for Linker::makeImageLink & $time
Definition hooks.txt:1778
it s the revision text itself In either if gzip is the revision text is gzipped $flags
Definition hooks.txt:2805
$cache
Definition mcc.php:33