MediaWiki REL1_28
UIDGenerator.php
Go to the documentation of this file.
1<?php
23use Wikimedia\Assert\Assert;
25
33 protected static $instance = null;
34
35 protected $nodeIdFile; // string; local file path
36 protected $nodeId32; // string; node ID in binary (32 bits)
37 protected $nodeId48; // string; node ID in binary (48 bits)
38
39 protected $lockFile88; // string; local file path
40 protected $lockFile128; // string; local file path
41 protected $lockFileUUID; // string; local file path
42
44 protected $fileHandles = []; // cache file handles
45
46 const QUICK_RAND = 1; // get randomness from fast and insecure sources
47 const QUICK_VOLATILE = 2; // use an APC like in-memory counter if available
48
49 protected function __construct() {
50 $this->nodeIdFile = wfTempDir() . '/mw-' . __CLASS__ . '-UID-nodeid';
51 $nodeId = '';
52 if ( is_file( $this->nodeIdFile ) ) {
53 $nodeId = file_get_contents( $this->nodeIdFile );
54 }
55 // Try to get some ID that uniquely identifies this machine (RFC 4122)...
56 if ( !preg_match( '/^[0-9a-f]{12}$/i', $nodeId ) ) {
57 MediaWiki\suppressWarnings();
58 if ( wfIsWindows() ) {
59 // http://technet.microsoft.com/en-us/library/bb490913.aspx
60 $csv = trim( wfShellExec( 'getmac /NH /FO CSV' ) );
61 $line = substr( $csv, 0, strcspn( $csv, "\n" ) );
62 $info = str_getcsv( $line );
63 $nodeId = isset( $info[0] ) ? str_replace( '-', '', $info[0] ) : '';
64 } elseif ( is_executable( '/sbin/ifconfig' ) ) { // Linux/BSD/Solaris/OS X
65 // See http://linux.die.net/man/8/ifconfig
66 $m = [];
67 preg_match( '/\s([0-9a-f]{2}(:[0-9a-f]{2}){5})\s/',
68 wfShellExec( '/sbin/ifconfig -a' ), $m );
69 $nodeId = isset( $m[1] ) ? str_replace( ':', '', $m[1] ) : '';
70 }
71 MediaWiki\restoreWarnings();
72 if ( !preg_match( '/^[0-9a-f]{12}$/i', $nodeId ) ) {
73 $nodeId = MWCryptRand::generateHex( 12, true );
74 $nodeId[1] = dechex( hexdec( $nodeId[1] ) | 0x1 ); // set multicast bit
75 }
76 file_put_contents( $this->nodeIdFile, $nodeId ); // cache
77 }
78 $this->nodeId32 = Wikimedia\base_convert( substr( sha1( $nodeId ), 0, 8 ), 16, 2, 32 );
79 $this->nodeId48 = Wikimedia\base_convert( $nodeId, 16, 2, 48 );
80 // If different processes run as different users, they may have different temp dirs.
81 // This is dealt with by initializing the clock sequence number and counters randomly.
82 $this->lockFile88 = wfTempDir() . '/mw-' . __CLASS__ . '-UID-88';
83 $this->lockFile128 = wfTempDir() . '/mw-' . __CLASS__ . '-UID-128';
84 $this->lockFileUUID = wfTempDir() . '/mw-' . __CLASS__ . '-UUID-128';
85 }
86
91 protected static function singleton() {
92 if ( self::$instance === null ) {
93 self::$instance = new self();
94 }
95
96 return self::$instance;
97 }
98
114 public static function newTimestampedUID88( $base = 10 ) {
115 Assert::parameterType( 'integer', $base, '$base' );
116 Assert::parameter( $base <= 36, '$base', 'must be <= 36' );
117 Assert::parameter( $base >= 2, '$base', 'must be >= 2' );
118
119 $gen = self::singleton();
120 $info = $gen->getTimeAndDelay( 'lockFile88', 1, 1024, 1024 );
121 $info['offsetCounter'] = $info['offsetCounter'] % 1024;
122 return Wikimedia\base_convert( $gen->getTimestampedID88( $info ), 2, $base );
123 }
124
131 protected function getTimestampedID88( array $info ) {
132 if ( isset( $info['time'] ) ) {
133 $time = $info['time'];
134 $counter = $info['offsetCounter'];
135 } else {
136 $time = $info[0];
137 $counter = $info[1];
138 }
139 // Take the 46 LSBs of "milliseconds since epoch"
140 $id_bin = $this->millisecondsSinceEpochBinary( $time );
141 // Add a 10 bit counter resulting in 56 bits total
142 $id_bin .= str_pad( decbin( $counter ), 10, '0', STR_PAD_LEFT );
143 // Add the 32 bit node ID resulting in 88 bits total
144 $id_bin .= $this->nodeId32;
145 // Convert to a 1-27 digit integer string
146 if ( strlen( $id_bin ) !== 88 ) {
147 throw new RuntimeException( "Detected overflow for millisecond timestamp." );
148 }
149
150 return $id_bin;
151 }
152
167 public static function newTimestampedUID128( $base = 10 ) {
168 Assert::parameterType( 'integer', $base, '$base' );
169 Assert::parameter( $base <= 36, '$base', 'must be <= 36' );
170 Assert::parameter( $base >= 2, '$base', 'must be >= 2' );
171
172 $gen = self::singleton();
173 $info = $gen->getTimeAndDelay( 'lockFile128', 16384, 1048576, 1048576 );
174 $info['offsetCounter'] = $info['offsetCounter'] % 1048576;
175
176 return Wikimedia\base_convert( $gen->getTimestampedID128( $info ), 2, $base );
177 }
178
185 protected function getTimestampedID128( array $info ) {
186 if ( isset( $info['time'] ) ) {
187 $time = $info['time'];
188 $counter = $info['offsetCounter'];
189 $clkSeq = $info['clkSeq'];
190 } else {
191 $time = $info[0];
192 $counter = $info[1];
193 $clkSeq = $info[2];
194 }
195 // Take the 46 LSBs of "milliseconds since epoch"
196 $id_bin = $this->millisecondsSinceEpochBinary( $time );
197 // Add a 20 bit counter resulting in 66 bits total
198 $id_bin .= str_pad( decbin( $counter ), 20, '0', STR_PAD_LEFT );
199 // Add a 14 bit clock sequence number resulting in 80 bits total
200 $id_bin .= str_pad( decbin( $clkSeq ), 14, '0', STR_PAD_LEFT );
201 // Add the 48 bit node ID resulting in 128 bits total
202 $id_bin .= $this->nodeId48;
203 // Convert to a 1-39 digit integer string
204 if ( strlen( $id_bin ) !== 128 ) {
205 throw new RuntimeException( "Detected overflow for millisecond timestamp." );
206 }
207
208 return $id_bin;
209 }
210
218 public static function newUUIDv1() {
219 $gen = self::singleton();
220 // There can be up to 10000 intervals for the same millisecond timestamp.
221 // [0,4999] counter + [0,5000] offset is in [0,9999] for the offset counter.
222 // Add this onto the timestamp to allow making up to 5000 IDs per second.
223 return $gen->getUUIDv1( $gen->getTimeAndDelay( 'lockFileUUID', 16384, 5000, 5001 ) );
224 }
225
233 public static function newRawUUIDv1() {
234 return str_replace( '-', '', self::newUUIDv1() );
235 }
236
241 protected function getUUIDv1( array $info ) {
242 $clkSeq_bin = Wikimedia\base_convert( $info['clkSeq'], 10, 2, 14 );
243 $time_bin = $this->intervalsSinceGregorianBinary( $info['time'], $info['offsetCounter'] );
244 // Take the 32 bits of "time low"
245 $id_bin = substr( $time_bin, 28, 32 );
246 // Add 16 bits of "time mid" resulting in 48 bits total
247 $id_bin .= substr( $time_bin, 12, 16 );
248 // Add 4 bit version resulting in 52 bits total
249 $id_bin .= '0001';
250 // Add 12 bits of "time high" resulting in 64 bits total
251 $id_bin .= substr( $time_bin, 0, 12 );
252 // Add 2 bits of "variant" resulting in 66 bits total
253 $id_bin .= '10';
254 // Add 6 bits of "clock seq high" resulting in 72 bits total
255 $id_bin .= substr( $clkSeq_bin, 0, 6 );
256 // Add 8 bits of "clock seq low" resulting in 80 bits total
257 $id_bin .= substr( $clkSeq_bin, 6, 8 );
258 // Add the 48 bit node ID resulting in 128 bits total
259 $id_bin .= $this->nodeId48;
260 // Convert to a 32 char hex string with dashes
261 if ( strlen( $id_bin ) !== 128 ) {
262 throw new RuntimeException( "Detected overflow for millisecond timestamp." );
263 }
264 $hex = Wikimedia\base_convert( $id_bin, 2, 16, 32 );
265 return sprintf( '%s-%s-%s-%s-%s',
266 // "time_low" (32 bits)
267 substr( $hex, 0, 8 ),
268 // "time_mid" (16 bits)
269 substr( $hex, 8, 4 ),
270 // "time_hi_and_version" (16 bits)
271 substr( $hex, 12, 4 ),
272 // "clk_seq_hi_res" (8 bits) and "clk_seq_low" (8 bits)
273 substr( $hex, 16, 4 ),
274 // "node" (48 bits)
275 substr( $hex, 20, 12 )
276 );
277 }
278
286 public static function newUUIDv4( $flags = 0 ) {
287 $hex = ( $flags & self::QUICK_RAND )
288 ? wfRandomString( 31 )
290
291 return sprintf( '%s-%s-%s-%s-%s',
292 // "time_low" (32 bits)
293 substr( $hex, 0, 8 ),
294 // "time_mid" (16 bits)
295 substr( $hex, 8, 4 ),
296 // "time_hi_and_version" (16 bits)
297 '4' . substr( $hex, 12, 3 ),
298 // "clk_seq_hi_res" (8 bits, variant is binary 10x) and "clk_seq_low" (8 bits)
299 dechex( 0x8 | ( hexdec( $hex[15] ) & 0x3 ) ) . $hex[16] . substr( $hex, 17, 2 ),
300 // "node" (48 bits)
301 substr( $hex, 19, 12 )
302 );
303 }
304
312 public static function newRawUUIDv4( $flags = 0 ) {
313 return str_replace( '-', '', self::newUUIDv4( $flags ) );
314 }
315
328 public static function newSequentialPerNodeID( $bucket, $bits = 48, $flags = 0 ) {
329 return current( self::newSequentialPerNodeIDs( $bucket, $bits, 1, $flags ) );
330 }
331
343 public static function newSequentialPerNodeIDs( $bucket, $bits, $count, $flags = 0 ) {
344 $gen = self::singleton();
345 return $gen->getSequentialPerNodeIDs( $bucket, $bits, $count, $flags );
346 }
347
359 protected function getSequentialPerNodeIDs( $bucket, $bits, $count, $flags ) {
360 if ( $count <= 0 ) {
361 return []; // nothing to do
362 } elseif ( $bits < 16 || $bits > 48 ) {
363 throw new RuntimeException( "Requested bit size ($bits) is out of range." );
364 }
365
366 $counter = null; // post-increment persistent counter value
367
368 // Use APC/eAccelerator/xcache if requested, available, and not in CLI mode;
369 // Counter values would not survive accross script instances in CLI mode.
370 $cache = null;
371 if ( ( $flags & self::QUICK_VOLATILE ) && PHP_SAPI !== 'cli' ) {
372 $cache = MediaWikiServices::getInstance()->getLocalServerObjectCache();
373 }
374 if ( $cache ) {
375 $counter = $cache->incrWithInit( $bucket, $cache::TTL_INDEFINITE, $count, $count );
376 if ( $counter === false ) {
377 throw new RuntimeException( 'Unable to set value to ' . get_class( $cache ) );
378 }
379 }
380
381 // Note: use of fmod() avoids "division by zero" on 32 bit machines
382 if ( $counter === null ) {
383 $path = wfTempDir() . '/mw-' . __CLASS__ . '-' . rawurlencode( $bucket ) . '-48';
384 // Get the UID lock file handle
385 if ( isset( $this->fileHandles[$path] ) ) {
386 $handle = $this->fileHandles[$path];
387 } else {
388 $handle = fopen( $path, 'cb+' );
389 $this->fileHandles[$path] = $handle ?: null; // cache
390 }
391 // Acquire the UID lock file
392 if ( $handle === false ) {
393 throw new RuntimeException( "Could not open '{$path}'." );
394 } elseif ( !flock( $handle, LOCK_EX ) ) {
395 fclose( $handle );
396 throw new RuntimeException( "Could not acquire '{$path}'." );
397 }
398 // Fetch the counter value and increment it...
399 rewind( $handle );
400 $counter = floor( trim( fgets( $handle ) ) ) + $count; // fetch as float
401 // Write back the new counter value
402 ftruncate( $handle, 0 );
403 rewind( $handle );
404 fwrite( $handle, fmod( $counter, pow( 2, 48 ) ) ); // warp-around as needed
405 fflush( $handle );
406 // Release the UID lock file
407 flock( $handle, LOCK_UN );
408 }
409
410 $ids = [];
411 $divisor = pow( 2, $bits );
412 $currentId = floor( $counter - $count ); // pre-increment counter value
413 for ( $i = 0; $i < $count; ++$i ) {
414 $ids[] = fmod( ++$currentId, $divisor );
415 }
416
417 return $ids;
418 }
419
432 protected function getTimeAndDelay( $lockFile, $clockSeqSize, $counterSize, $offsetSize ) {
433 // Get the UID lock file handle
434 if ( isset( $this->fileHandles[$lockFile] ) ) {
435 $handle = $this->fileHandles[$lockFile];
436 } else {
437 $handle = fopen( $this->$lockFile, 'cb+' );
438 $this->fileHandles[$lockFile] = $handle ?: null; // cache
439 }
440 // Acquire the UID lock file
441 if ( $handle === false ) {
442 throw new RuntimeException( "Could not open '{$this->$lockFile}'." );
443 } elseif ( !flock( $handle, LOCK_EX ) ) {
444 fclose( $handle );
445 throw new RuntimeException( "Could not acquire '{$this->$lockFile}'." );
446 }
447 // Get the current timestamp, clock sequence number, last time, and counter
448 rewind( $handle );
449 $data = explode( ' ', fgets( $handle ) ); // "<clk seq> <sec> <msec> <counter> <offset>"
450 $clockChanged = false; // clock set back significantly?
451 if ( count( $data ) == 5 ) { // last UID info already initialized
452 $clkSeq = (int)$data[0] % $clockSeqSize;
453 $prevTime = [ (int)$data[1], (int)$data[2] ];
454 $offset = (int)$data[4] % $counterSize; // random counter offset
455 $counter = 0; // counter for UIDs with the same timestamp
456 // Delay until the clock reaches the time of the last ID.
457 // This detects any microtime() drift among processes.
458 $time = $this->timeWaitUntil( $prevTime );
459 if ( !$time ) { // too long to delay?
460 $clockChanged = true; // bump clock sequence number
462 } elseif ( $time == $prevTime ) {
463 // Bump the counter if there are timestamp collisions
464 $counter = (int)$data[3] % $counterSize;
465 if ( ++$counter >= $counterSize ) { // sanity (starts at 0)
466 flock( $handle, LOCK_UN ); // abort
467 throw new RuntimeException( "Counter overflow for timestamp value." );
468 }
469 }
470 } else { // last UID info not initialized
471 $clkSeq = mt_rand( 0, $clockSeqSize - 1 );
472 $counter = 0;
473 $offset = mt_rand( 0, $offsetSize - 1 );
475 }
476 // microtime() and gettimeofday() can drift from time() at least on Windows.
477 // The drift is immediate for processes running while the system clock changes.
478 // time() does not have this problem. See https://bugs.php.net/bug.php?id=42659.
479 if ( abs( time() - $time[0] ) >= 2 ) {
480 // We don't want processes using too high or low timestamps to avoid duplicate
481 // UIDs and clock sequence number churn. This process should just be restarted.
482 flock( $handle, LOCK_UN ); // abort
483 throw new RuntimeException( "Process clock is outdated or drifted." );
484 }
485 // If microtime() is synced and a clock change was detected, then the clock went back
486 if ( $clockChanged ) {
487 // Bump the clock sequence number and also randomize the counter offset,
488 // which is useful for UIDs that do not include the clock sequence number.
489 $clkSeq = ( $clkSeq + 1 ) % $clockSeqSize;
490 $offset = mt_rand( 0, $offsetSize - 1 );
491 trigger_error( "Clock was set back; sequence number incremented." );
492 }
493 // Update the (clock sequence number, timestamp, counter)
494 ftruncate( $handle, 0 );
495 rewind( $handle );
496 fwrite( $handle, "{$clkSeq} {$time[0]} {$time[1]} {$counter} {$offset}" );
497 fflush( $handle );
498 // Release the UID lock file
499 flock( $handle, LOCK_UN );
500
501 return [
502 'time' => $time,
503 'counter' => $counter,
504 'clkSeq' => $clkSeq,
505 'offset' => $offset,
506 'offsetCounter' => $counter + $offset
507 ];
508 }
509
517 protected function timeWaitUntil( array $time ) {
518 do {
519 $ct = self::millitime();
520 if ( $ct >= $time ) { // http://php.net/manual/en/language.operators.comparison.php
521 return $ct; // current timestamp is higher than $time
522 }
523 } while ( ( ( $time[0] - $ct[0] ) * 1000 + ( $time[1] - $ct[1] ) ) <= 10 );
524
525 return false;
526 }
527
534 list( $sec, $msec ) = $time;
535 $ts = 1000 * $sec + $msec;
536 if ( $ts > pow( 2, 52 ) ) {
537 throw new RuntimeException( __METHOD__ .
538 ': sorry, this function doesn\'t work after the year 144680' );
539 }
540
541 return substr( Wikimedia\base_convert( $ts, 10, 2, 46 ), -46 );
542 }
543
550 protected function intervalsSinceGregorianBinary( array $time, $delta = 0 ) {
551 list( $sec, $msec ) = $time;
552 $offset = '122192928000000000';
553 if ( PHP_INT_SIZE >= 8 ) { // 64 bit integers
554 $ts = ( 1000 * $sec + $msec ) * 10000 + (int)$offset + $delta;
555 $id_bin = str_pad( decbin( $ts % pow( 2, 60 ) ), 60, '0', STR_PAD_LEFT );
556 } elseif ( extension_loaded( 'gmp' ) ) {
557 $ts = gmp_add( gmp_mul( (string) $sec, '1000' ), (string) $msec ); // ms
558 $ts = gmp_add( gmp_mul( $ts, '10000' ), $offset ); // 100ns intervals
559 $ts = gmp_add( $ts, (string) $delta );
560 $ts = gmp_mod( $ts, gmp_pow( '2', '60' ) ); // wrap around
561 $id_bin = str_pad( gmp_strval( $ts, 2 ), 60, '0', STR_PAD_LEFT );
562 } elseif ( extension_loaded( 'bcmath' ) ) {
563 $ts = bcadd( bcmul( $sec, 1000 ), $msec ); // ms
564 $ts = bcadd( bcmul( $ts, 10000 ), $offset ); // 100ns intervals
565 $ts = bcadd( $ts, $delta );
566 $ts = bcmod( $ts, bcpow( 2, 60 ) ); // wrap around
567 $id_bin = Wikimedia\base_convert( $ts, 10, 2, 60 );
568 } else {
569 throw new RuntimeException( 'bcmath or gmp extension required for 32 bit machines.' );
570 }
571 return $id_bin;
572 }
573
577 protected static function millitime() {
578 list( $msec, $sec ) = explode( ' ', microtime() );
579
580 return [ (int)$sec, (int)( $msec * 1000 ) ];
581 }
582
594 protected function deleteCacheFiles() {
595 // Bug: 44850
596 foreach ( $this->fileHandles as $path => $handle ) {
597 if ( $handle !== null ) {
598 fclose( $handle );
599 }
600 if ( is_file( $path ) ) {
601 unlink( $path );
602 }
603 unset( $this->fileHandles[$path] );
604 }
605 if ( is_file( $this->nodeIdFile ) ) {
606 unlink( $this->nodeIdFile );
607 }
608 }
609
621 public static function unitTestTearDown() {
622 // Bug: 44850
623 $gen = self::singleton();
624 $gen->deleteCacheFiles();
625 }
626
627 function __destruct() {
628 array_map( 'fclose', array_filter( $this->fileHandles ) );
629 }
630}
Apache License January AND DISTRIBUTION Definitions License shall mean the terms and conditions for use
wfTempDir()
Tries to get the system directory for temporary files.
wfRandomString( $length=32)
Get a random string containing a number of pseudo-random hex characters.
wfShellExec( $cmd, &$retval=null, $environ=[], $limits=[], $options=[])
Execute a shell command, with time and memory limits mirrored from the PHP configuration if supported...
wfIsWindows()
Check if the operating system is Windows.
$line
Definition cdb.php:59
static generateHex( $chars, $forceStrong=false)
Generate a run of (ideally) cryptographically random data and return it in hexadecimal string format.
MediaWikiServices is the service locator for the application scope of MediaWiki.
Class for getting statistically unique IDs.
getUUIDv1(array $info)
static newRawUUIDv1()
Return an RFC4122 compliant v1 UUID.
deleteCacheFiles()
Delete all cache files that have been created.
static newSequentialPerNodeID( $bucket, $bits=48, $flags=0)
Return an ID that is sequential only for this node and bucket.
millisecondsSinceEpochBinary(array $time)
getTimestampedID88(array $info)
const QUICK_VOLATILE
static newSequentialPerNodeIDs( $bucket, $bits, $count, $flags=0)
Return IDs that are sequential only for this node and bucket.
getTimeAndDelay( $lockFile, $clockSeqSize, $counterSize, $offsetSize)
Get a (time,counter,clock sequence) where (time,counter) is higher than any previous (time,...
static UIDGenerator $instance
static newRawUUIDv4( $flags=0)
Return an RFC4122 compliant v4 UUID.
timeWaitUntil(array $time)
Wait till the current timestamp reaches $time and return the current timestamp.
static millitime()
getSequentialPerNodeIDs( $bucket, $bits, $count, $flags)
Return IDs that are sequential only for this node and bucket.
getTimestampedID128(array $info)
intervalsSinceGregorianBinary(array $time, $delta=0)
static newUUIDv4( $flags=0)
Return an RFC4122 compliant v4 UUID.
static singleton()
static newUUIDv1()
Return an RFC4122 compliant v1 UUID.
static newTimestampedUID128( $base=10)
Get a statistically unique 128-bit unsigned integer ID string.
static unitTestTearDown()
Cleanup resources when tearing down after a unit test.
static newTimestampedUID88( $base=10)
Get a statistically unique 88-bit unsigned integer ID string.
deferred txt A few of the database updates required by various functions here can be deferred until after the result page is displayed to the user For updating the view updating the linked to tables after a etc PHP does not yet have any way to tell the server to actually return and disconnect while still running these but it might have such a feature in the future We handle these by creating a deferred update object and putting those objects on a global list
Definition deferred.txt:11
This document is intended to provide useful advice for parties seeking to redistribute MediaWiki to end users It s targeted particularly at maintainers for Linux since it s been observed that distribution packages of MediaWiki often break We ve consistently had to recommend that users seeking support use official tarballs instead of their distribution s and this often solves whatever problem the user is having It would be nice if this could such as
the array() calling protocol came about after MediaWiki 1.4rc1.
see documentation in includes Linker php for Linker::makeImageLink & $time
Definition hooks.txt:1752
it s the revision text itself In either if gzip is the revision text is gzipped $flags
Definition hooks.txt:2710
injection txt This is an overview of how MediaWiki makes use of dependency injection The design described here grew from the discussion of RFC T384 The term dependency this means that anything an object needs to operate should be injected from the the object itself should only know narrow no concrete implementation of the logic it relies on The requirement to inject everything typically results in an architecture that based on two main types of and essentially stateless service objects that use other service objects to operate on the value objects As of the beginning MediaWiki is only starting to use the DI approach Much of the code still relies on global state or direct resulting in a highly cyclical dependency which acts as the top level factory for services in MediaWiki which can be used to gain access to default instances of various services MediaWikiServices however also allows new services to be defined and default services to be redefined Services are defined or redefined by providing a callback the instantiator that will return a new instance of the service When it will create an instance of MediaWikiServices and populate it with the services defined in the files listed by thereby bootstrapping the DI framework Per $wgServiceWiringFiles lists includes ServiceWiring php
Definition injection.txt:37
$cache
Definition mcc.php:33