Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
77.43% |
175 / 226 |
|
50.00% |
11 / 22 |
CRAP | |
0.00% |
0 / 1 |
GlobalIdGenerator | |
77.43% |
175 / 226 |
|
50.00% |
11 / 22 |
116.06 | |
0.00% |
0 / 1 |
__construct | |
55.00% |
11 / 20 |
|
0.00% |
0 / 1 |
9.28 | |||
newTimestampedUID88 | |
100.00% |
5 / 5 |
|
100.00% |
1 / 1 |
1 | |||
getTimestampedID88 | |
87.50% |
7 / 8 |
|
0.00% |
0 / 1 |
2.01 | |||
newTimestampedUID128 | |
100.00% |
5 / 5 |
|
100.00% |
1 / 1 |
1 | |||
getTimestampedID128 | |
90.00% |
9 / 10 |
|
0.00% |
0 / 1 |
2.00 | |||
newUUIDv1 | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
getUUIDv1 | |
95.65% |
22 / 23 |
|
0.00% |
0 / 1 |
2 | |||
newRawUUIDv1 | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
newUUIDv4 | |
100.00% |
8 / 8 |
|
100.00% |
1 / 1 |
1 | |||
newRawUUIDv4 | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
newSequentialPerNodeID | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
newSequentialPerNodeIDs | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
getTimestampFromUUIDv1 | |
100.00% |
10 / 10 |
|
100.00% |
1 / 1 |
2 | |||
getSequentialPerNodeIDs | |
81.48% |
22 / 27 |
|
0.00% |
0 / 1 |
9.51 | |||
getTimeAndDelay | |
82.61% |
38 / 46 |
|
0.00% |
0 / 1 |
9.43 | |||
timeWaitUntil | |
66.67% |
4 / 6 |
|
0.00% |
0 / 1 |
2.15 | |||
millisecondsSinceEpochBinary | |
66.67% |
4 / 6 |
|
0.00% |
0 / 1 |
2.15 | |||
intervalsSinceGregorianBinary | |
31.58% |
6 / 19 |
|
0.00% |
0 / 1 |
9.12 | |||
load | |
65.22% |
15 / 23 |
|
0.00% |
0 / 1 |
12.41 | |||
getNodeId32 | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
getNodeId48 | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
deleteCacheFiles | n/a |
0 / 0 |
n/a |
0 / 0 |
5 | |||||
unitTestTearDown | n/a |
0 / 0 |
n/a |
0 / 0 |
1 | |||||
__destruct | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 |
1 | <?php |
2 | /** |
3 | * This program is free software; you can redistribute it and/or modify |
4 | * it under the terms of the GNU General Public License as published by |
5 | * the Free Software Foundation; either version 2 of the License, or |
6 | * (at your option) any later version. |
7 | * |
8 | * This program is distributed in the hope that it will be useful, |
9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
11 | * GNU General Public License for more details. |
12 | * |
13 | * You should have received a copy of the GNU General Public License along |
14 | * with this program; if not, write to the Free Software Foundation, Inc., |
15 | * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. |
16 | * http://www.gnu.org/copyleft/gpl.html |
17 | * |
18 | * @file |
19 | */ |
20 | |
21 | namespace Wikimedia\UUID; |
22 | |
23 | use InvalidArgumentException; |
24 | use RuntimeException; |
25 | use Wikimedia\Assert\Assert; |
26 | use Wikimedia\AtEase\AtEase; |
27 | use Wikimedia\Timestamp\ConvertibleTimestamp; |
28 | |
29 | /** |
30 | * Class for getting statistically unique IDs without a central coordinator |
31 | * |
32 | * @since 1.35 |
33 | */ |
34 | class GlobalIdGenerator { |
35 | /** @var callable Callback for running shell commands */ |
36 | protected $shellCallback; |
37 | |
38 | /** @var string Temporary directory */ |
39 | protected $tmpDir; |
40 | /** @var string |
41 | * File prefix containing user ID to prevent collisions |
42 | * if multiple users run MediaWiki (T268420) and getmyuid() is enabled |
43 | */ |
44 | protected $uniqueFilePrefix; |
45 | /** @var string Local file path */ |
46 | protected $nodeIdFile; |
47 | /** @var string Node ID in binary (32 bits) */ |
48 | protected $nodeId32; |
49 | /** @var string Node ID in binary (48 bits) */ |
50 | protected $nodeId48; |
51 | |
52 | /** @var bool Whether initialization completed */ |
53 | protected $loaded = false; |
54 | /** @var string Local file path */ |
55 | protected $lockFile88; |
56 | /** @var string Local file path */ |
57 | protected $lockFile128; |
58 | /** @var string Local file path */ |
59 | protected $lockFileUUID; |
60 | |
61 | /** @var array Cached file handles */ |
62 | protected $fileHandles = []; |
63 | |
64 | /** @var int B/C constant (deprecated since 1.36) */ |
65 | public const QUICK_VOLATILE = 1; |
66 | |
67 | /** |
68 | * Avoid using __CLASS__ so namespace separators aren't interpreted |
69 | * as path components on Windows (T259693) |
70 | */ |
71 | private const FILE_PREFIX = 'mw-GlobalIdGenerator'; |
72 | |
73 | /** Key used in the serialized clock state map that is stored on disk */ |
74 | private const CLOCK_TIME = 'time'; |
75 | /** Key used in the serialized clock state map that is stored on disk */ |
76 | private const CLOCK_COUNTER = 'counter'; |
77 | /** Key used in the serialized clock state map that is stored on disk */ |
78 | private const CLOCK_SEQUENCE = 'clkSeq'; |
79 | /** Key used in the serialized clock state map that is stored on disk */ |
80 | private const CLOCK_OFFSET = 'offset'; |
81 | /** Key used in the serialized clock state map that is stored on disk */ |
82 | private const CLOCK_OFFSET_COUNTER = 'offsetCounter'; |
83 | |
84 | /** |
85 | * @param string|bool $tempDirectory A writable temporary directory |
86 | * @param callback $shellCallback A callback that takes a shell command and returns the output |
87 | */ |
88 | public function __construct( $tempDirectory, $shellCallback ) { |
89 | if ( func_num_args() >= 3 && !is_callable( $shellCallback ) ) { |
90 | trigger_error( |
91 | __CLASS__ . ' with a BagOStuff instance was deprecated in MediaWiki 1.37.', |
92 | E_USER_DEPRECATED |
93 | ); |
94 | $shellCallback = func_get_arg( 2 ); |
95 | } |
96 | if ( !strlen( $tempDirectory ) ) { |
97 | throw new InvalidArgumentException( "No temp directory provided" ); |
98 | } |
99 | $this->tmpDir = $tempDirectory; |
100 | // Include the UID in the filename (T268420, T358768) |
101 | if ( function_exists( 'posix_geteuid' ) ) { |
102 | $fileSuffix = posix_geteuid(); |
103 | } elseif ( function_exists( 'getmyuid' ) ) { |
104 | $fileSuffix = getmyuid(); |
105 | } else { |
106 | $fileSuffix = ''; |
107 | } |
108 | $this->uniqueFilePrefix = self::FILE_PREFIX . $fileSuffix; |
109 | $this->nodeIdFile = $tempDirectory . '/' . $this->uniqueFilePrefix . '-UID-nodeid'; |
110 | // If different processes run as different users, they may have different temp dirs. |
111 | // This is dealt with by initializing the clock sequence number and counters randomly. |
112 | $this->lockFile88 = $tempDirectory . '/' . $this->uniqueFilePrefix . '-UID-88'; |
113 | $this->lockFile128 = $tempDirectory . '/' . $this->uniqueFilePrefix . '-UID-128'; |
114 | $this->lockFileUUID = $tempDirectory . '/' . $this->uniqueFilePrefix . '-UUID-128'; |
115 | |
116 | $this->shellCallback = $shellCallback; |
117 | } |
118 | |
119 | /** |
120 | * Get a statistically unique 88-bit unsigned integer ID string. |
121 | * The bits of the UID are prefixed with the time (down to the millisecond). |
122 | * |
123 | * These IDs are suitable as values for the shard key of distributed data. |
124 | * If a column uses these as values, it should be declared UNIQUE to handle collisions. |
125 | * New rows almost always have higher UIDs, which makes B-TREE updates on INSERT fast. |
126 | * They can also be stored "DECIMAL(27) UNSIGNED" or BINARY(11) in MySQL. |
127 | * |
128 | * UID generation is serialized on each server (as the node ID is for the whole machine). |
129 | * |
130 | * @param int $base Specifies a base other than 10 |
131 | * @return string Number |
132 | * @throws RuntimeException |
133 | */ |
134 | public function newTimestampedUID88( int $base = 10 ) { |
135 | Assert::parameter( $base <= 36, '$base', 'must be <= 36' ); |
136 | Assert::parameter( $base >= 2, '$base', 'must be >= 2' ); |
137 | |
138 | $info = $this->getTimeAndDelay( 'lockFile88', 1, 1024, 1024 ); |
139 | $info[self::CLOCK_OFFSET_COUNTER] %= 1024; |
140 | |
141 | return \Wikimedia\base_convert( $this->getTimestampedID88( $info ), 2, $base ); |
142 | } |
143 | |
144 | /** |
145 | * @param array $info result of GlobalIdGenerator::getTimeAndDelay() |
146 | * @return string 88 bits |
147 | * @throws RuntimeException |
148 | */ |
149 | protected function getTimestampedID88( array $info ) { |
150 | $time = $info[self::CLOCK_TIME]; |
151 | $counter = $info[self::CLOCK_OFFSET_COUNTER]; |
152 | // Take the 46 LSBs of "milliseconds since epoch" |
153 | $id_bin = $this->millisecondsSinceEpochBinary( $time ); |
154 | // Add a 10 bit counter resulting in 56 bits total |
155 | $id_bin .= str_pad( decbin( $counter ), 10, '0', STR_PAD_LEFT ); |
156 | // Add the 32 bit node ID resulting in 88 bits total |
157 | $id_bin .= $this->getNodeId32(); |
158 | // Convert to a 1-27 digit integer string |
159 | if ( strlen( $id_bin ) !== 88 ) { |
160 | throw new RuntimeException( "Detected overflow for millisecond timestamp." ); |
161 | } |
162 | |
163 | return $id_bin; |
164 | } |
165 | |
166 | /** |
167 | * Get a statistically unique 128-bit unsigned integer ID string. |
168 | * The bits of the UID are prefixed with the time (down to the millisecond). |
169 | * |
170 | * These IDs are suitable as globally unique IDs, without any enforced uniqueness. |
171 | * New rows almost always have higher UIDs, which makes B-TREE updates on INSERT fast. |
172 | * They can also be stored as "DECIMAL(39) UNSIGNED" or BINARY(16) in MySQL. |
173 | * |
174 | * UID generation is serialized on each server (as the node ID is for the whole machine). |
175 | * |
176 | * @param int $base Specifies a base other than 10 |
177 | * @return string Number |
178 | * @throws RuntimeException |
179 | */ |
180 | public function newTimestampedUID128( int $base = 10 ) { |
181 | Assert::parameter( $base <= 36, '$base', 'must be <= 36' ); |
182 | Assert::parameter( $base >= 2, '$base', 'must be >= 2' ); |
183 | |
184 | $info = $this->getTimeAndDelay( 'lockFile128', 16384, 1_048_576, 1_048_576 ); |
185 | $info[self::CLOCK_OFFSET_COUNTER] %= 1_048_576; |
186 | |
187 | return \Wikimedia\base_convert( $this->getTimestampedID128( $info ), 2, $base ); |
188 | } |
189 | |
190 | /** |
191 | * @param array $info The result of GlobalIdGenerator::getTimeAndDelay() |
192 | * @return string 128 bits |
193 | * @throws RuntimeException |
194 | */ |
195 | protected function getTimestampedID128( array $info ) { |
196 | $time = $info[self::CLOCK_TIME]; |
197 | $counter = $info[self::CLOCK_OFFSET_COUNTER]; |
198 | $clkSeq = $info[self::CLOCK_SEQUENCE]; |
199 | // Take the 46 LSBs of "milliseconds since epoch" |
200 | $id_bin = $this->millisecondsSinceEpochBinary( $time ); |
201 | // Add a 20 bit counter resulting in 66 bits total |
202 | $id_bin .= str_pad( decbin( $counter ), 20, '0', STR_PAD_LEFT ); |
203 | // Add a 14 bit clock sequence number resulting in 80 bits total |
204 | $id_bin .= str_pad( decbin( $clkSeq ), 14, '0', STR_PAD_LEFT ); |
205 | // Add the 48 bit node ID resulting in 128 bits total |
206 | $id_bin .= $this->getNodeId48(); |
207 | // Convert to a 1-39 digit integer string |
208 | if ( strlen( $id_bin ) !== 128 ) { |
209 | throw new RuntimeException( "Detected overflow for millisecond timestamp." ); |
210 | } |
211 | |
212 | return $id_bin; |
213 | } |
214 | |
215 | /** |
216 | * Return an RFC4122 compliant v1 UUID |
217 | * |
218 | * @return string |
219 | * @throws RuntimeException |
220 | */ |
221 | public function newUUIDv1() { |
222 | // There can be up to 10000 intervals for the same millisecond timestamp. |
223 | // [0,4999] counter + [0,5000] offset is in [0,9999] for the offset counter. |
224 | // Add this onto the timestamp to allow making up to 5000 IDs per second. |
225 | return $this->getUUIDv1( $this->getTimeAndDelay( 'lockFileUUID', 16384, 5000, 5001 ) ); |
226 | } |
227 | |
228 | /** |
229 | * @param array $info Result of GlobalIdGenerator::getTimeAndDelay() |
230 | * @return string 128 bits |
231 | */ |
232 | protected function getUUIDv1( array $info ) { |
233 | $clkSeq_bin = \Wikimedia\base_convert( $info[self::CLOCK_SEQUENCE], 10, 2, 14 ); |
234 | $time_bin = $this->intervalsSinceGregorianBinary( |
235 | $info[self::CLOCK_TIME], |
236 | $info[self::CLOCK_OFFSET_COUNTER] |
237 | ); |
238 | // Take the 32 bits of "time low" |
239 | $id_bin = substr( $time_bin, 28, 32 ); |
240 | // Add 16 bits of "time mid" resulting in 48 bits total |
241 | $id_bin .= substr( $time_bin, 12, 16 ); |
242 | // Add 4 bit version resulting in 52 bits total |
243 | $id_bin .= '0001'; |
244 | // Add 12 bits of "time high" resulting in 64 bits total |
245 | $id_bin .= substr( $time_bin, 0, 12 ); |
246 | // Add 2 bits of "variant" resulting in 66 bits total |
247 | $id_bin .= '10'; |
248 | // Add 6 bits of "clock seq high" resulting in 72 bits total |
249 | $id_bin .= substr( $clkSeq_bin, 0, 6 ); |
250 | // Add 8 bits of "clock seq low" resulting in 80 bits total |
251 | $id_bin .= substr( $clkSeq_bin, 6, 8 ); |
252 | // Add the 48 bit node ID resulting in 128 bits total |
253 | $id_bin .= $this->getNodeId48(); |
254 | // Convert to a 32 char hex string with dashes |
255 | if ( strlen( $id_bin ) !== 128 ) { |
256 | throw new RuntimeException( "Detected overflow for millisecond timestamp." ); |
257 | } |
258 | $hex = \Wikimedia\base_convert( $id_bin, 2, 16, 32 ); |
259 | return sprintf( '%s-%s-%s-%s-%s', |
260 | // "time_low" (32 bits) |
261 | substr( $hex, 0, 8 ), |
262 | // "time_mid" (16 bits) |
263 | substr( $hex, 8, 4 ), |
264 | // "time_hi_and_version" (16 bits) |
265 | substr( $hex, 12, 4 ), |
266 | // "clk_seq_hi_res" (8 bits) and "clk_seq_low" (8 bits) |
267 | substr( $hex, 16, 4 ), |
268 | // "node" (48 bits) |
269 | substr( $hex, 20, 12 ) |
270 | ); |
271 | } |
272 | |
273 | /** |
274 | * Return an RFC4122 compliant v1 UUID |
275 | * |
276 | * @return string 32 hex characters with no hyphens |
277 | * @throws RuntimeException |
278 | */ |
279 | public function newRawUUIDv1() { |
280 | return str_replace( '-', '', $this->newUUIDv1() ); |
281 | } |
282 | |
283 | /** |
284 | * Return an RFC4122 compliant v4 UUID |
285 | * |
286 | * @return string |
287 | * @throws RuntimeException |
288 | */ |
289 | public function newUUIDv4() { |
290 | $hex = bin2hex( random_bytes( 32 / 2 ) ); |
291 | |
292 | return sprintf( '%s-%s-%s-%s-%s', |
293 | // "time_low" (32 bits) |
294 | substr( $hex, 0, 8 ), |
295 | // "time_mid" (16 bits) |
296 | substr( $hex, 8, 4 ), |
297 | // "time_hi_and_version" (16 bits) |
298 | '4' . substr( $hex, 12, 3 ), |
299 | // "clk_seq_hi_res" (8 bits, variant is binary 10x) and "clk_seq_low" (8 bits) |
300 | dechex( 0x8 | ( hexdec( $hex[15] ) & 0x3 ) ) . $hex[16] . substr( $hex, 17, 2 ), |
301 | // "node" (48 bits) |
302 | substr( $hex, 19, 12 ) |
303 | ); |
304 | } |
305 | |
306 | /** |
307 | * Return an RFC4122 compliant v4 UUID |
308 | * |
309 | * @return string 32 hex characters with no hyphens |
310 | * @throws RuntimeException |
311 | */ |
312 | public function newRawUUIDv4() { |
313 | return str_replace( '-', '', $this->newUUIDv4() ); |
314 | } |
315 | |
316 | /** |
317 | * Return an ID that is sequential *only* for this node and bucket |
318 | * |
319 | * These IDs are suitable for per-host sequence numbers, e.g. for some packet protocols. |
320 | * If GlobalIdGenerator::QUICK_VOLATILE is used the counter might reset on |
321 | * server restart. |
322 | * |
323 | * @param string $bucket Arbitrary bucket name (should be ASCII) |
324 | * @param int $bits Bit size (<=48) of resulting numbers before wrap-around |
325 | * @param int $flags (supports GlobalIdGenerator::QUICK_VOLATILE) |
326 | * @return float Integer value as float |
327 | */ |
328 | public function newSequentialPerNodeID( $bucket, $bits = 48, $flags = 0 ) { |
329 | return current( $this->newSequentialPerNodeIDs( $bucket, $bits, 1, $flags ) ); |
330 | } |
331 | |
332 | /** |
333 | * Return IDs that are sequential *only* for this node and bucket |
334 | * |
335 | * @param string $bucket Arbitrary bucket name (should be ASCII) |
336 | * @param int $bits Bit size (16 to 48) of resulting numbers before wrap-around |
337 | * @param int $count Number of IDs to return |
338 | * @param int $flags (supports GlobalIdGenerator::QUICK_VOLATILE) |
339 | * @return array Ordered list of float integer values |
340 | * @see GlobalIdGenerator::newSequentialPerNodeID() |
341 | */ |
342 | public function newSequentialPerNodeIDs( $bucket, $bits, $count, $flags = 0 ) { |
343 | return $this->getSequentialPerNodeIDs( $bucket, $bits, $count, $flags ); |
344 | } |
345 | |
346 | /** |
347 | * Get timestamp in a specified format from UUIDv1 |
348 | * |
349 | * @param string $uuid the UUID to get the timestamp from |
350 | * @param int $format the format to convert the timestamp to. Default: TS_MW |
351 | * @return string|false timestamp in requested format or false |
352 | */ |
353 | public function getTimestampFromUUIDv1( string $uuid, int $format = TS_MW ) { |
354 | $components = []; |
355 | if ( !preg_match( |
356 | '/^([0-9a-f]{8})-([0-9a-f]{4})-(1[0-9a-f]{3})-([89ab][0-9a-f]{3})-([0-9a-f]{12})$/', |
357 | $uuid, |
358 | $components |
359 | ) ) { |
360 | throw new InvalidArgumentException( "Invalid UUIDv1 {$uuid}" ); |
361 | } |
362 | |
363 | $timestamp = hexdec( substr( $components[3], 1 ) . $components[2] . $components[1] ); |
364 | // The 60 bit timestamp value is constructed from fields of this UUID. |
365 | // The timestamp is measured in 100-nanosecond units since midnight, October 15, 1582 UTC. |
366 | $unixTime = ( $timestamp - 0x01b21dd213814000 ) / 1e7; |
367 | |
368 | return ConvertibleTimestamp::convert( $format, $unixTime ); |
369 | } |
370 | |
371 | /** |
372 | * Return IDs that are sequential *only* for this node and bucket |
373 | * |
374 | * @param string $bucket Arbitrary bucket name (should be ASCII) |
375 | * @param int $bits Bit size (16 to 48) of resulting numbers before wrap-around |
376 | * @param int $count Number of IDs to return |
377 | * @param int $flags (supports GlobalIdGenerator::QUICK_VOLATILE) |
378 | * @return array Ordered list of float integer values |
379 | * @throws RuntimeException |
380 | * @see GlobalIdGenerator::newSequentialPerNodeID() |
381 | */ |
382 | protected function getSequentialPerNodeIDs( $bucket, $bits, $count, $flags ) { |
383 | if ( $count <= 0 ) { |
384 | return []; |
385 | } |
386 | if ( $bits < 16 || $bits > 48 ) { |
387 | throw new RuntimeException( "Requested bit size ($bits) is out of range." ); |
388 | } |
389 | |
390 | $path = $this->tmpDir . '/' . $this->uniqueFilePrefix . '-' . rawurlencode( $bucket ) . '-48'; |
391 | // Get the UID lock file handle |
392 | if ( isset( $this->fileHandles[$path] ) ) { |
393 | $handle = $this->fileHandles[$path]; |
394 | } else { |
395 | $handle = fopen( $path, 'cb+' ); |
396 | $this->fileHandles[$path] = $handle ?: null; |
397 | } |
398 | // Acquire the UID lock file |
399 | if ( $handle === false ) { |
400 | throw new RuntimeException( "Could not open '{$path}'." ); |
401 | } |
402 | if ( !flock( $handle, LOCK_EX ) ) { |
403 | fclose( $handle ); |
404 | throw new RuntimeException( "Could not acquire '{$path}'." ); |
405 | } |
406 | // Fetch the counter value and increment it... |
407 | rewind( $handle ); |
408 | |
409 | // fetch as float |
410 | $counter = floor( (float)trim( fgets( $handle ) ) ) + $count; |
411 | |
412 | // Write back the new counter value |
413 | ftruncate( $handle, 0 ); |
414 | rewind( $handle ); |
415 | |
416 | // Use fmod() to avoid "division by zero" on 32 bit machines |
417 | // warp-around as needed |
418 | fwrite( $handle, (string)fmod( $counter, 2 ** 48 ) ); |
419 | fflush( $handle ); |
420 | |
421 | // Release the UID lock file |
422 | flock( $handle, LOCK_UN ); |
423 | |
424 | $ids = []; |
425 | $divisor = 2 ** $bits; |
426 | |
427 | // pre-increment counter value |
428 | $currentId = floor( $counter - $count ); |
429 | for ( $i = 0; $i < $count; ++$i ) { |
430 | // Use fmod() to avoid "division by zero" on 32 bit machines |
431 | $ids[] = fmod( ++$currentId, $divisor ); |
432 | } |
433 | |
434 | return $ids; |
435 | } |
436 | |
437 | /** |
438 | * Get a (time,counter,clock sequence) where (time,counter) is higher |
439 | * than any previous (time,counter) value for the given clock sequence. |
440 | * This is useful for making UIDs sequential on a per-node bases. |
441 | * |
442 | * @param string $lockFile Name of a local lock file |
443 | * @param int $clockSeqSize The number of possible clock sequence values |
444 | * @param int $counterSize The number of possible counter values |
445 | * @param int $offsetSize The number of possible offset values |
446 | * @return array Array with the following keys: |
447 | * - GlobalIdGenerator::CLOCK_TIME: (integer seconds, integer milliseconds) array |
448 | * - GlobalIdGenerator::CLOCK_COUNTER: integer millisecond tie-breaking counter |
449 | * - GlobalIdGenerator::CLOCK_SEQUENCE: integer clock identifier that is local to the node |
450 | * - GlobalIdGenerator::CLOCK_OFFSET: integer offset for millisecond tie-breaking counter |
451 | * - GlobalIdGenerator::CLOCK_OFFSET_COUNTER: integer; CLOCK_COUNTER with CLOCK_OFFSET applied |
452 | * @throws RuntimeException |
453 | */ |
454 | protected function getTimeAndDelay( $lockFile, $clockSeqSize, $counterSize, $offsetSize ) { |
455 | // Get the UID lock file handle |
456 | if ( isset( $this->fileHandles[$this->$lockFile] ) ) { |
457 | $handle = $this->fileHandles[$this->$lockFile]; |
458 | } else { |
459 | $handle = fopen( $this->$lockFile, 'cb+' ); |
460 | $this->fileHandles[$this->$lockFile] = $handle ?: null; |
461 | } |
462 | // Acquire the UID lock file |
463 | if ( $handle === false ) { |
464 | throw new RuntimeException( "Could not open '{$this->$lockFile}'." ); |
465 | } |
466 | if ( !flock( $handle, LOCK_EX ) ) { |
467 | fclose( $handle ); |
468 | throw new RuntimeException( "Could not acquire '{$this->$lockFile}'." ); |
469 | } |
470 | |
471 | // The formatters that use this method expect a timestamp with millisecond |
472 | // precision and a counter upto a certain size. When more IDs than the counter |
473 | // size are generated during the same timestamp, an exception is thrown as we |
474 | // cannot increment further, because the formatted ID would not have enough |
475 | // bits to fit the counter. |
476 | // |
477 | // To orchestrate this between independent PHP processes on the same host, |
478 | // we must have a common sense of time so that we only have to maintain |
479 | // a single counter in a single lock file. |
480 | // |
481 | // Given that: |
482 | // * The system clock can be observed via time(), without milliseconds. |
483 | // * Some other clock can be observed via microtime(), which also offers |
484 | // millisecond precision. |
485 | // * microtime() drifts in-process further and further away from the system |
486 | // clock the longer a process runs for. |
487 | // For example, on 2018-10-03 an HHVM 3.18 JobQueue process at WMF, |
488 | // that ran for 9 min 55 sec, microtime drifted by 7 seconds. |
489 | // time() does not have this problem. See https://bugs.php.net/bug.php?id=42659. |
490 | // |
491 | // We have two choices: |
492 | // |
493 | // 1. Use microtime() with the following caveats: |
494 | // - The last stored time may be in the future, or our current time may be in the |
495 | // past, in which case we'll frequently enter the slow timeWaitUntil() method to |
496 | // try and "sync" the current process with the previous process. |
497 | // We mustn't block for long though, max 10ms? |
498 | // - For any drift above 10ms, we pretend that the clock went backwards, and treat |
499 | // it the same way as after an NTP sync, by incrementing clock sequence instead. |
500 | // Given the sequence rolls over automatically, and silently, and is meant to be |
501 | // rare, this is essentially sacrifices a reasonable guarantee of uniqueness. |
502 | // - For long running processes (e.g. longer than a few seconds) the drift can |
503 | // easily be more than 2 seconds. Because we only have a single lock file |
504 | // and don't want to keep too many counters and deal with clearing those, |
505 | // we fatal the user and refuse to make an ID. (T94522) |
506 | // - This offers terrible service availability. |
507 | // 2. Use time() instead, and expand the counter size by 1000x and use its |
508 | // digits as if they were the millisecond fraction of our timestamp. |
509 | // Known caveats or perf impact: None. We still need to read-write our |
510 | // lock file on each generation, so might as well make the most of it. |
511 | // |
512 | // We choose the latter. |
513 | $msecCounterSize = $counterSize * 1000; |
514 | |
515 | rewind( $handle ); |
516 | // Format of lock file contents: |
517 | // "<clk seq> <sec> <msec counter> <rand offset>" |
518 | $data = explode( ' ', fgets( $handle ) ); |
519 | |
520 | if ( count( $data ) === 4 ) { |
521 | // The UID lock file was already initialized |
522 | $clkSeq = (int)$data[0] % $clockSeqSize; |
523 | $prevSec = (int)$data[1]; |
524 | // Counter for UIDs with the same timestamp, |
525 | $msecCounter = 0; |
526 | $randOffset = (int)$data[3] % $counterSize; |
527 | |
528 | // If the system clock moved backwards by an NTP sync, |
529 | // or if the last writer process had its clock drift ahead, |
530 | // Try to catch up if the gap is small, so that we can keep a single |
531 | // monotonic logic file. |
532 | $sec = $this->timeWaitUntil( $prevSec ); |
533 | if ( $sec === false ) { |
534 | // Gap is too big. Looks like the clock got moved back significantly. |
535 | // Start a new clock sequence, and re-randomize the extra offset, |
536 | // which is useful for UIDs that do not include the clock sequence number. |
537 | $clkSeq = ( $clkSeq + 1 ) % $clockSeqSize; |
538 | $sec = time(); |
539 | $randOffset = mt_rand( 0, $offsetSize - 1 ); |
540 | trigger_error( "Clock was set back; sequence number incremented." ); |
541 | } elseif ( $sec === $prevSec ) { |
542 | // Double check, only keep remainder if a previous writer wrote |
543 | // something here that we don't accept. |
544 | $msecCounter = (int)$data[2] % $msecCounterSize; |
545 | // Bump the counter if the time has not changed yet |
546 | if ( ++$msecCounter >= $msecCounterSize ) { |
547 | // More IDs generated with the same time than counterSize can accommodate |
548 | flock( $handle, LOCK_UN ); |
549 | throw new RuntimeException( "Counter overflow for timestamp value." ); |
550 | } |
551 | } |
552 | } else { |
553 | // Initialize UID lock file information |
554 | $clkSeq = mt_rand( 0, $clockSeqSize - 1 ); |
555 | $sec = time(); |
556 | $msecCounter = 0; |
557 | $randOffset = mt_rand( 0, $offsetSize - 1 ); |
558 | } |
559 | |
560 | // Update and release the UID lock file |
561 | ftruncate( $handle, 0 ); |
562 | rewind( $handle ); |
563 | fwrite( $handle, "{$clkSeq} {$sec} {$msecCounter} {$randOffset}" ); |
564 | fflush( $handle ); |
565 | flock( $handle, LOCK_UN ); |
566 | |
567 | // Split msecCounter back into msec and counter |
568 | $msec = (int)( $msecCounter / 1000 ); |
569 | $counter = $msecCounter % 1000; |
570 | |
571 | return [ |
572 | self::CLOCK_TIME => [ $sec, $msec ], |
573 | self::CLOCK_COUNTER => $counter, |
574 | self::CLOCK_SEQUENCE => $clkSeq, |
575 | self::CLOCK_OFFSET => $randOffset, |
576 | self::CLOCK_OFFSET_COUNTER => $counter + $randOffset, |
577 | ]; |
578 | } |
579 | |
580 | /** |
581 | * Wait till the current timestamp reaches $time and return the current |
582 | * timestamp. This returns false if it would have to wait more than 10ms. |
583 | * |
584 | * @param int $time Result of time() |
585 | * @return int|bool Timestamp or false |
586 | */ |
587 | protected function timeWaitUntil( $time ) { |
588 | $start = microtime( true ); |
589 | do { |
590 | $ct = time(); |
591 | // https://www.php.net/manual/en/language.operators.comparison.php |
592 | if ( $ct >= $time ) { |
593 | // current time is higher than or equal to than $time |
594 | return $ct; |
595 | } |
596 | // up to 10ms |
597 | } while ( ( microtime( true ) - $start ) <= 0.010 ); |
598 | |
599 | return false; |
600 | } |
601 | |
602 | /** |
603 | * @param array $time Array of second and millisecond integers |
604 | * @return string 46 LSBs of "milliseconds since epoch" in binary (rolls over in 4201) |
605 | * @throws RuntimeException |
606 | */ |
607 | protected function millisecondsSinceEpochBinary( array $time ) { |
608 | [ $sec, $msec ] = $time; |
609 | $ts = 1000 * $sec + $msec; |
610 | if ( $ts > 2 ** 52 ) { |
611 | throw new RuntimeException( __METHOD__ . |
612 | ': sorry, this function doesn\'t work after the year 144680' ); |
613 | } |
614 | |
615 | return substr( \Wikimedia\base_convert( (string)$ts, 10, 2, 46 ), -46 ); |
616 | } |
617 | |
618 | /** |
619 | * @param array $time Array of second and millisecond integers |
620 | * @param int $delta Number of intervals to add on to the timestamp |
621 | * @return string 60 bits of "100ns intervals since 15 October 1582" (rolls over in 3400) |
622 | * @throws RuntimeException |
623 | */ |
624 | protected function intervalsSinceGregorianBinary( array $time, $delta = 0 ) { |
625 | [ $sec, $msec ] = $time; |
626 | $offset = '122192928000000000'; |
627 | |
628 | // 64 bit integers |
629 | if ( PHP_INT_SIZE >= 8 ) { |
630 | $ts = ( 1000 * $sec + $msec ) * 10000 + (int)$offset + $delta; |
631 | $id_bin = str_pad( decbin( $ts % ( 2 ** 60 ) ), 60, '0', STR_PAD_LEFT ); |
632 | } elseif ( extension_loaded( 'gmp' ) ) { |
633 | // ms |
634 | $ts = gmp_add( gmp_mul( (string)$sec, '1000' ), (string)$msec ); |
635 | // 100ns intervals |
636 | $ts = gmp_add( gmp_mul( $ts, '10000' ), $offset ); |
637 | $ts = gmp_add( $ts, (string)$delta ); |
638 | // wrap around |
639 | $ts = gmp_mod( $ts, gmp_pow( '2', 60 ) ); |
640 | $id_bin = str_pad( gmp_strval( $ts, 2 ), 60, '0', STR_PAD_LEFT ); |
641 | } elseif ( extension_loaded( 'bcmath' ) ) { |
642 | // ms |
643 | $ts = bcadd( bcmul( $sec, '1000' ), $msec ); |
644 | // 100ns intervals |
645 | $ts = bcadd( bcmul( $ts, '10000' ), $offset ); |
646 | $ts = bcadd( $ts, (string)$delta ); |
647 | // wrap around |
648 | $ts = bcmod( $ts, bcpow( '2', '60' ) ); |
649 | $id_bin = \Wikimedia\base_convert( $ts, 10, 2, 60 ); |
650 | } else { |
651 | throw new RuntimeException( 'bcmath or gmp extension required for 32 bit machines.' ); |
652 | } |
653 | return $id_bin; |
654 | } |
655 | |
656 | /** |
657 | * Load the node ID information |
658 | */ |
659 | private function load() { |
660 | if ( $this->loaded ) { |
661 | return; |
662 | } |
663 | |
664 | $this->loaded = true; |
665 | |
666 | // phpcs:ignore Generic.PHP.NoSilencedErrors.Discouraged |
667 | $nodeId = @file_get_contents( $this->nodeIdFile ) ?: ''; |
668 | // Try to get some ID that uniquely identifies this machine (RFC 4122)... |
669 | if ( !preg_match( '/^[0-9a-f]{12}$/i', $nodeId ) ) { |
670 | AtEase::suppressWarnings(); |
671 | if ( PHP_OS_FAMILY === 'Windows' ) { |
672 | // https://technet.microsoft.com/en-us/library/bb490913.aspx |
673 | $csv = trim( ( $this->shellCallback )( 'getmac /NH /FO CSV' ) ); |
674 | $line = substr( $csv, 0, strcspn( $csv, "\n" ) ); |
675 | $info = str_getcsv( $line ); |
676 | // @phan-suppress-next-line PhanTypeMismatchArgumentNullableInternal False positive |
677 | $nodeId = isset( $info[0] ) ? str_replace( '-', '', $info[0] ) : ''; |
678 | } elseif ( is_executable( '/sbin/ifconfig' ) ) { |
679 | // Linux/BSD/Solaris/OS X |
680 | // See https://linux.die.net/man/8/ifconfig |
681 | $m = []; |
682 | preg_match( '/\s([0-9a-f]{2}(?::[0-9a-f]{2}){5})\s/', |
683 | ( $this->shellCallback )( '/sbin/ifconfig -a' ), $m ); |
684 | $nodeId = isset( $m[1] ) ? str_replace( ':', '', $m[1] ) : ''; |
685 | } |
686 | AtEase::restoreWarnings(); |
687 | if ( !preg_match( '/^[0-9a-f]{12}$/i', $nodeId ) ) { |
688 | $nodeId = bin2hex( random_bytes( 12 / 2 ) ); |
689 | // set multicast bit |
690 | $nodeId[1] = dechex( hexdec( $nodeId[1] ) | 0x1 ); |
691 | } |
692 | file_put_contents( $this->nodeIdFile, $nodeId ); |
693 | } |
694 | $this->nodeId32 = \Wikimedia\base_convert( substr( sha1( $nodeId ), 0, 8 ), 16, 2, 32 ); |
695 | $this->nodeId48 = \Wikimedia\base_convert( $nodeId, 16, 2, 48 ); |
696 | } |
697 | |
698 | /** |
699 | * @return string |
700 | */ |
701 | private function getNodeId32() { |
702 | $this->load(); |
703 | |
704 | return $this->nodeId32; |
705 | } |
706 | |
707 | /** |
708 | * @return string |
709 | */ |
710 | private function getNodeId48() { |
711 | $this->load(); |
712 | |
713 | return $this->nodeId48; |
714 | } |
715 | |
716 | /** |
717 | * Delete all cache files that have been created (T46850) |
718 | * |
719 | * This is a cleanup method primarily meant to be used from unit tests to |
720 | * avoid polluting the local filesystem. If used outside of a unit test |
721 | * environment it should be used with caution as it may destroy state saved |
722 | * in the files. |
723 | * |
724 | * @see unitTestTearDown |
725 | * @codeCoverageIgnore |
726 | */ |
727 | private function deleteCacheFiles() { |
728 | foreach ( $this->fileHandles as $path => $handle ) { |
729 | if ( $handle !== null ) { |
730 | fclose( $handle ); |
731 | } |
732 | if ( is_file( $path ) ) { |
733 | unlink( $path ); |
734 | } |
735 | unset( $this->fileHandles[$path] ); |
736 | } |
737 | if ( is_file( $this->nodeIdFile ) ) { |
738 | unlink( $this->nodeIdFile ); |
739 | } |
740 | } |
741 | |
742 | /** |
743 | * Cleanup resources when tearing down after a unit test (T46850) |
744 | * |
745 | * This is a cleanup method primarily meant to be used from unit tests to |
746 | * avoid polluting the local filesystem. If used outside of a unit test |
747 | * environment it should be used with caution as it may destroy state saved |
748 | * in the files. |
749 | * |
750 | * @internal For use by unit tests |
751 | * @see deleteCacheFiles |
752 | * @codeCoverageIgnore |
753 | */ |
754 | public function unitTestTearDown() { |
755 | $this->deleteCacheFiles(); |
756 | } |
757 | |
758 | public function __destruct() { |
759 | // @phan-suppress-next-line PhanPluginUseReturnValueInternalKnown |
760 | array_map( 'fclose', array_filter( $this->fileHandles ) ); |
761 | } |
762 | } |